Files
scylladb/test/boost/commitlog_test.cc
Benny Halevy 3feb759943 everywhere: use utils::chunked_vector for list of mutations
Currently, we use std::vector<*mutation> to keep
a list of mutations for processing.
This can lead to large allocation, e.g. when the vector
size is a function of the number of tables.

Use a chunked vector instead to prevent oversized allocations.

`perf-simple-query --smp 1` results obtained for fixed 400MHz frequency
and PGO disabled:

Before (read path):
```
enable-cache=1
Running test with config: {partitions=10000, concurrency=100, mode=read, query_single_key=no, counters=no}
Disabling auto compaction
Creating 10000 partitions...

89055.97 tps ( 66.1 allocs/op,   0.0 logallocs/op,  14.2 tasks/op,   39417 insns/op,   18003 cycles/op,        0 errors)
103372.72 tps ( 66.1 allocs/op,   0.0 logallocs/op,  14.2 tasks/op,   39380 insns/op,   17300 cycles/op,        0 errors)
98942.27 tps ( 66.1 allocs/op,   0.0 logallocs/op,  14.2 tasks/op,   39413 insns/op,   17336 cycles/op,        0 errors)
103752.93 tps ( 66.1 allocs/op,   0.0 logallocs/op,  14.2 tasks/op,   39407 insns/op,   17252 cycles/op,        0 errors)
102516.77 tps ( 66.1 allocs/op,   0.0 logallocs/op,  14.2 tasks/op,   39403 insns/op,   17288 cycles/op,        0 errors)
throughput:
	mean=   99528.13 standard-deviation=6155.71
	median= 102516.77 median-absolute-deviation=3844.59
	maximum=103752.93 minimum=89055.97
instructions_per_op:
	mean=   39403.99 standard-deviation=14.25
	median= 39406.75 median-absolute-deviation=9.30
	maximum=39416.63 minimum=39380.39
cpu_cycles_per_op:
	mean=   17435.81 standard-deviation=318.24
	median= 17300.40 median-absolute-deviation=147.59
	maximum=18002.53 minimum=17251.75
```

After (read path)
```
enable-cache=1
Running test with config: {partitions=10000, concurrency=100, mode=read, query_single_key=no, counters=no}
Disabling auto compaction
Creating 10000 partitions...
59755.04 tps ( 66.2 allocs/op,   0.0 logallocs/op,  14.2 tasks/op,   39466 insns/op,   22834 cycles/op,        0 errors)
71854.16 tps ( 66.1 allocs/op,   0.0 logallocs/op,  14.2 tasks/op,   39417 insns/op,   17883 cycles/op,        0 errors)
82149.45 tps ( 66.1 allocs/op,   0.0 logallocs/op,  14.2 tasks/op,   39411 insns/op,   17409 cycles/op,        0 errors)
49640.04 tps ( 66.1 allocs/op,   0.0 logallocs/op,  14.3 tasks/op,   39474 insns/op,   19975 cycles/op,        0 errors)
54963.22 tps ( 66.1 allocs/op,   0.0 logallocs/op,  14.3 tasks/op,   39474 insns/op,   18235 cycles/op,        0 errors)
throughput:
	mean=   63672.38 standard-deviation=13195.12
	median= 59755.04 median-absolute-deviation=8709.16
	maximum=82149.45 minimum=49640.04
instructions_per_op:
	mean=   39448.38 standard-deviation=31.60
	median= 39466.17 median-absolute-deviation=25.75
	maximum=39474.12 minimum=39411.42
cpu_cycles_per_op:
	mean=   19267.01 standard-deviation=2217.03
	median= 18234.80 median-absolute-deviation=1384.25
	maximum=22834.26 minimum=17408.67
```

`perf-simple-query --smp 1 --write` results obtained for fixed 400MHz frequency
and PGO disabled:

Before (write path):
```
enable-cache=1
Running test with config: {partitions=10000, concurrency=100, mode=write, query_single_key=no, counters=no}
Disabling auto compaction
63736.96 tps ( 59.4 allocs/op,  16.4 logallocs/op,  14.3 tasks/op,   49667 insns/op,   19924 cycles/op,        0 errors)
64109.41 tps ( 59.3 allocs/op,  16.0 logallocs/op,  14.3 tasks/op,   49992 insns/op,   20084 cycles/op,        0 errors)
56950.47 tps ( 59.3 allocs/op,  16.0 logallocs/op,  14.3 tasks/op,   50005 insns/op,   20501 cycles/op,        0 errors)
44858.42 tps ( 59.3 allocs/op,  16.0 logallocs/op,  14.3 tasks/op,   50014 insns/op,   21947 cycles/op,        0 errors)
28592.87 tps ( 59.3 allocs/op,  16.0 logallocs/op,  14.3 tasks/op,   50027 insns/op,   27659 cycles/op,        0 errors)
throughput:
	mean=   51649.63 standard-deviation=15059.74
	median= 56950.47 median-absolute-deviation=12087.33
	maximum=64109.41 minimum=28592.87
instructions_per_op:
	mean=   49941.18 standard-deviation=153.76
	median= 50005.24 median-absolute-deviation=73.01
	maximum=50027.07 minimum=49667.05
cpu_cycles_per_op:
	mean=   22023.01 standard-deviation=3249.92
	median= 20500.74 median-absolute-deviation=1938.76
	maximum=27658.75 minimum=19924.32
```

After (write path)
```
enable-cache=1
Running test with config: {partitions=10000, concurrency=100, mode=write, query_single_key=no, counters=no}
Disabling auto compaction
53395.93 tps ( 59.4 allocs/op,  16.5 logallocs/op,  14.3 tasks/op,   50326 insns/op,   21252 cycles/op,        0 errors)
46527.83 tps ( 59.3 allocs/op,  16.0 logallocs/op,  14.3 tasks/op,   50704 insns/op,   21555 cycles/op,        0 errors)
55846.30 tps ( 59.3 allocs/op,  16.0 logallocs/op,  14.3 tasks/op,   50731 insns/op,   21060 cycles/op,        0 errors)
55669.30 tps ( 59.3 allocs/op,  16.0 logallocs/op,  14.3 tasks/op,   50735 insns/op,   21521 cycles/op,        0 errors)
52130.17 tps ( 59.3 allocs/op,  16.0 logallocs/op,  14.3 tasks/op,   50757 insns/op,   21334 cycles/op,        0 errors)
throughput:
	mean=   52713.91 standard-deviation=3795.38
	median= 53395.93 median-absolute-deviation=2955.40
	maximum=55846.30 minimum=46527.83
instructions_per_op:
	mean=   50650.57 standard-deviation=182.46
	median= 50731.38 median-absolute-deviation=84.09
	maximum=50756.62 minimum=50325.87
cpu_cycles_per_op:
	mean=   21344.42 standard-deviation=202.86
	median= 21334.00 median-absolute-deviation=176.37
	maximum=21554.61 minimum=21060.24
```

Fixes #24815

Improvement for rare corner cases. No backport required

Signed-off-by: Benny Halevy <bhalevy@scylladb.com>

Closes scylladb/scylladb#24919
2025-07-13 19:13:11 +03:00

2106 lines
78 KiB
C++

/*
* Copyright (C) 2015-present ScyllaDB
*/
/*
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
*/
#include <boost/test/unit_test.hpp>
#include <stdlib.h>
#include <iostream>
#include <unordered_map>
#include <unordered_set>
#include <set>
#include <deque>
#include <fmt/ranges.h>
#undef SEASTAR_TESTING_MAIN
#include <seastar/testing/test_case.hh>
#include <seastar/core/coroutine.hh>
#include <seastar/core/future-util.hh>
#include <seastar/core/do_with.hh>
#include <seastar/core/scollectd_api.hh>
#include <seastar/core/file.hh>
#include <seastar/core/seastar.hh>
#include <seastar/util/noncopyable_function.hh>
#include <seastar/util/closeable.hh>
#include "utils/assert.hh"
#include "utils/UUID_gen.hh"
#include "test/lib/tmpdir.hh"
#include "db/commitlog/commitlog.hh"
#include "db/commitlog/commitlog_replayer.hh"
#include "db/commitlog/commitlog_extensions.hh"
#include "db/commitlog/rp_set.hh"
#include "db/extensions.hh"
#include "readers/combined.hh"
#include "utils/log.hh"
#include "test/lib/exception_utils.hh"
#include "test/lib/cql_test_env.hh"
#include "test/lib/data_model.hh"
#include "test/lib/sstable_utils.hh"
#include "test/lib/mutation_source_test.hh"
#include "test/lib/key_utils.hh"
#include "test/lib/test_utils.hh"
BOOST_AUTO_TEST_SUITE(commitlog_test)
using namespace db;
static future<> cl_test(commitlog::config cfg, noncopyable_function<future<> (commitlog&)> f) {
// enable as needed.
// moved from static init because static init fiasco.
#if 0
logging::logger_registry().set_logger_level("commitlog", logging::log_level::trace);
#endif
tmpdir tmp;
cfg.commit_log_location = tmp.path().string();
return commitlog::create_commitlog(cfg).then([f = std::move(f)](commitlog log) mutable {
return do_with(std::move(log), [f = std::move(f)](commitlog& log) {
return futurize_invoke(f, log).finally([&log] {
return log.shutdown().then([&log] {
return log.clear();
});
});
});
}).finally([tmp = std::move(tmp)] {
});
}
static future<> cl_test(noncopyable_function<future<> (commitlog&)> f) {
commitlog::config cfg;
cfg.metrics_category_name = "commitlog";
return cl_test(cfg, std::move(f));
}
static table_id make_table_id() {
return table_id(utils::UUID_gen::get_time_UUID());
}
// just write in-memory...
SEASTAR_TEST_CASE(test_create_commitlog){
return cl_test([](commitlog& log) {
sstring tmp = "hej bubba cow";
return log.add_mutation(make_table_id(), tmp.size(), db::commitlog::force_sync::no, [tmp](db::commitlog::output& dst) {
dst.write(tmp.data(), tmp.size());
}).then([](db::replay_position rp) {
BOOST_CHECK_NE(rp, db::replay_position());
});
});
}
// check we
SEASTAR_TEST_CASE(test_commitlog_written_to_disk_batch){
commitlog::config cfg;
cfg.mode = commitlog::sync_mode::BATCH;
return cl_test(cfg, [](commitlog& log) {
sstring tmp = "hej bubba cow";
return log.add_mutation(make_table_id(), tmp.size(), db::commitlog::force_sync::no, [tmp](db::commitlog::output& dst) {
dst.write(tmp.data(), tmp.size());
}).then([&log](replay_position rp) {
BOOST_CHECK_NE(rp, db::replay_position());
auto n = log.get_flush_count();
BOOST_REQUIRE(n > 0);
});
});
}
// check that an entry marked as sync is immediately flushed to a storage
SEASTAR_TEST_CASE(test_commitlog_written_to_disk_sync){
commitlog::config cfg;
return cl_test(cfg, [](commitlog& log) {
sstring tmp = "hej bubba cow";
return log.add_mutation(make_table_id(), tmp.size(), db::commitlog::force_sync::yes, [tmp](db::commitlog::output& dst) {
dst.write(tmp.data(), tmp.size());
}).then([&log](replay_position rp) {
BOOST_CHECK_NE(rp, db::replay_position());
auto n = log.get_flush_count();
BOOST_REQUIRE(n > 0);
});
});
}
// check that an entry marked as sync is immediately flushed to a storage
SEASTAR_TEST_CASE(test_commitlog_written_to_disk_no_sync){
commitlog::config cfg;
cfg.commitlog_sync_period_in_ms = 10000000000;
return cl_test(cfg, [](commitlog& log) {
sstring tmp = "hej bubba cow";
return log.add_mutation(make_table_id(), tmp.size(), db::commitlog::force_sync::no, [tmp](db::commitlog::output& dst) {
dst.write(tmp.data(), tmp.size());
}).then([&log](replay_position rp) {
BOOST_CHECK_NE(rp, db::replay_position());
auto n = log.get_flush_count();
BOOST_REQUIRE(n == 0);
});
});
}
SEASTAR_TEST_CASE(test_commitlog_written_to_disk_periodic){
return cl_test([](commitlog& log) {
auto state = make_lw_shared<bool>(false);
auto uuid = make_table_id();
return do_until([state]() {return *state;},
[&log, state, uuid]() {
sstring tmp = "hej bubba cow";
return log.add_mutation(uuid, tmp.size(), db::commitlog::force_sync::no, [tmp](db::commitlog::output& dst) {
dst.write(tmp.data(), tmp.size());
}).then([&log, state](replay_position rp) {
BOOST_CHECK_NE(rp, db::replay_position());
auto n = log.get_flush_count();
*state = n > 0;
});
});
});
}
SEASTAR_TEST_CASE(test_commitlog_new_segment){
commitlog::config cfg;
cfg.commitlog_segment_size_in_mb = 1;
return cl_test(cfg, [](commitlog& log) {
return do_with(rp_set(), [&log](auto& set) {
auto uuid = make_table_id();
return do_until([&set]() { return set.size() > 1; }, [&log, &set, uuid]() {
sstring tmp = "hej bubba cow";
return log.add_mutation(uuid, tmp.size(), db::commitlog::force_sync::no, [tmp](db::commitlog::output& dst) {
dst.write(tmp.data(), tmp.size());
}).then([&set](rp_handle h) {
BOOST_CHECK_NE(h.rp(), db::replay_position());
set.put(std::move(h));
});
});
}).then([&log] {
auto n = log.get_active_segment_names().size();
BOOST_REQUIRE(n > 1);
});
});
}
typedef std::vector<sstring> segment_names;
static segment_names segment_diff(commitlog& log, segment_names prev = {}) {
segment_names now = log.get_active_segment_names();
segment_names diff;
// safety fix. We should always get segment names in alphabetical order, but
// we're not explicitly guaranteed it. Lets sort the sets just to be sure.
std::sort(now.begin(), now.end());
std::sort(prev.begin(), prev.end());
std::set_difference(prev.begin(), prev.end(), now.begin(), now.end(), std::back_inserter(diff));
return diff;
}
SEASTAR_TEST_CASE(test_commitlog_discard_completed_segments){
//logging::logger_registry().set_logger_level("commitlog", logging::log_level::trace);
commitlog::config cfg;
cfg.commitlog_segment_size_in_mb = 1;
return cl_test(cfg, [](commitlog& log) {
struct state_type {
std::vector<table_id> uuids;
std::unordered_map<table_id, db::rp_set> rps;
mutable size_t index = 0;
state_type() {
for (int i = 0; i < 10; ++i) {
uuids.push_back(make_table_id());
}
}
const table_id& next_uuid() const {
return uuids[index++ % uuids.size()];
}
bool done() const {
return std::any_of(rps.begin(), rps.end(), [](auto& rps) {
return rps.second.size() > 1;
});
}
};
auto state = make_lw_shared<state_type>();
return do_until([state]() { return state->done(); },
[&log, state]() {
sstring tmp = "hej bubba cow";
auto uuid = state->next_uuid();
return log.add_mutation(uuid, tmp.size(), db::commitlog::force_sync::no, [tmp](db::commitlog::output& dst) {
dst.write(tmp.data(), tmp.size());
}).then([state, uuid](db::rp_handle h) {
state->rps[uuid].put(std::move(h));
});
}).then([&log, state]() {
auto names = log.get_active_segment_names();
BOOST_REQUIRE(names.size() > 1);
// sync all so we have no outstanding async sync ops that
// might prevent discard_completed_segments to actually dispose
// of clean segments (shared_ptr in task)
return log.sync_all_segments().then([&log, state, names] {
for (auto & p : state->rps) {
log.discard_completed_segments(p.first, p.second);
}
auto diff = segment_diff(log, names);
auto nn = diff.size();
auto dn = log.get_num_segments_destroyed();
BOOST_REQUIRE(nn > 0);
BOOST_REQUIRE(nn <= names.size());
BOOST_REQUIRE(dn <= nn);
});
}).then([&log] {
return log.shutdown().then([&log] {
return log.list_existing_segments().then([] (auto descs) {
BOOST_CHECK_EQUAL(descs, decltype(descs){});
});
});
});
});
}
SEASTAR_TEST_CASE(test_equal_record_limit){
return cl_test([](commitlog& log) {
auto size = log.max_record_size();
return log.add_mutation(make_table_id(), size, db::commitlog::force_sync::no, [size](db::commitlog::output& dst) {
dst.fill(char(1), size);
}).then([](db::replay_position rp) {
BOOST_CHECK_NE(rp, db::replay_position());
});
});
}
SEASTAR_TEST_CASE(test_exceed_record_limit){
return cl_test([](commitlog& log) {
auto size = log.max_record_size() + 1;
return log.add_mutation(make_table_id(), size, db::commitlog::force_sync::no, [size](db::commitlog::output& dst) {
dst.fill(char(1), size);
}).then_wrapped([](future<db::rp_handle> f) {
try {
f.get();
} catch (...) {
// ok.
return make_ready_future();
}
throw std::runtime_error("Did not get expected exception from writing too large record");
});
});
}
SEASTAR_TEST_CASE(test_commitlog_closed) {
commitlog::config cfg;
return cl_test(cfg, [](commitlog& log) {
return log.shutdown().then([&log] {
sstring tmp = "test321";
auto uuid = make_table_id();
return log.add_mutation(uuid, tmp.size(), db::commitlog::force_sync::no, [tmp](db::commitlog::output& dst) {
dst.write(tmp.data(), tmp.size());
}).then_wrapped([] (future<db::rp_handle> f) {
BOOST_REQUIRE_EXCEPTION(f.get(), gate_closed_exception, exception_predicate::message_contains("gate closed"));
});
});
});
}
SEASTAR_TEST_CASE(test_commitlog_delete_when_over_disk_limit) {
commitlog::config cfg;
constexpr auto max_size_mb = 2;
cfg.commitlog_segment_size_in_mb = max_size_mb;
cfg.commitlog_total_space_in_mb = 1;
cfg.commitlog_sync_period_in_ms = 1;
cfg.allow_going_over_size_limit = true;
return cl_test(cfg, [](commitlog& log) {
auto sem = make_lw_shared<semaphore>(0);
auto segments = make_lw_shared<std::set<sstring>>();
// add a flush handler that simply says we're done with the range.
auto r = log.add_flush_handler([&log, sem, segments](cf_id_type id, replay_position pos) {
auto active_segments = log.get_active_segment_names();
for (auto&& s : active_segments) {
segments->insert(s);
}
// Verify #5899 - file size should not exceed the config max.
return parallel_for_each(active_segments, [](sstring filename) {
return file_size(filename).then([](uint64_t size) {
BOOST_REQUIRE_LE(size, max_size_mb * 1024 * 1024);
});
}).then([&log, sem, id] {
log.discard_completed_segments(id);
sem->signal();
});
});
auto set = make_lw_shared<std::set<segment_id_type>>();
auto uuid = make_table_id();
return do_until([set, sem]() {return set->size() > 2 && sem->try_wait();},
[&log, set, uuid]() {
sstring tmp = "hej bubba cow";
return log.add_mutation(uuid, tmp.size(), db::commitlog::force_sync::no, [tmp](db::commitlog::output& dst) {
dst.write(tmp.data(), tmp.size());
}).then([set](rp_handle h) {
BOOST_CHECK_NE(h.rp(), db::replay_position());
set->insert(h.release().id);
});
}).then([&log, segments]() {
segment_names names(segments->begin(), segments->end());
auto diff = segment_diff(log, names);
auto nn = diff.size();
auto dn = log.get_num_segments_destroyed();
BOOST_REQUIRE(nn > 0);
BOOST_REQUIRE(nn <= segments->size());
BOOST_REQUIRE(dn <= nn);
}).finally([r = std::move(r)] {
});
});
}
SEASTAR_TEST_CASE(test_commitlog_reader){
static auto count_mutations_in_segment = [] (sstring path) -> future<size_t> {
size_t count = 0;
try {
co_await db::commitlog::read_log_file(path, db::commitlog::descriptor::FILENAME_PREFIX, [&count](db::commitlog::buffer_and_replay_position buf_rp) -> future<> {
auto&& [buf, rp] = buf_rp;
auto linearization_buffer = bytes_ostream();
auto in = buf.get_istream();
auto str = to_string_view(in.read_bytes_view(buf.size_bytes(), linearization_buffer));
BOOST_CHECK_EQUAL(str, "hej bubba cow");
count++;
co_return;
});
} catch (commitlog::segment_truncation&) {
// ok. this does not ensure to have fully synced segments
// before reading them in all code paths. We can end up
// hitting (premature) eof or even half-written page.
}
co_return count;
};
commitlog::config cfg;
cfg.commitlog_segment_size_in_mb = 1;
return cl_test(cfg, [](commitlog& log) -> future<> {
rp_set set;
size_t count = 0;
auto uuid = make_table_id();
sstring tmp = "hej bubba cow";
while (set.size() < 2) {
auto h = co_await log.add_mutation(uuid, tmp.size(), db::commitlog::force_sync::no, [&tmp](db::commitlog::output& dst) {
dst.write(tmp.data(), tmp.size());
});
BOOST_CHECK_NE(db::replay_position(), h.rp());
set.put(std::move(h));
if (set.size() == 1) {
++count;
}
}
auto segments = log.get_active_segment_names();
BOOST_REQUIRE(segments.size() > 1);
auto ids = set.usage() | std::views::keys | std::ranges::to<std::vector>();
std::sort(ids.begin(), ids.end());
auto id = ids.front();
auto i = std::find_if(segments.begin(), segments.end(), [id](sstring filename) {
commitlog::descriptor desc(filename, db::commitlog::descriptor::FILENAME_PREFIX);
return desc.id == id;
});
if (i == segments.end()) {
throw std::runtime_error("Did not find expected log file");
}
sstring segment_path = *i;
// Check reading from an unsynced segment
auto replay_count = co_await count_mutations_in_segment(segment_path);
BOOST_CHECK_GE(count, replay_count);
co_await log.sync_all_segments();
// Check reading from a synced segment
auto replay_count2 = co_await count_mutations_in_segment(segment_path);
BOOST_CHECK_EQUAL(count, replay_count2);
});
}
static future<> corrupt_segment(sstring seg, uint64_t off, uint32_t value) {
return open_file_dma(seg, open_flags::rw).then([off, value](file f) {
size_t size = align_up<size_t>(off, 4096);
return do_with(std::move(f), [size, off, value](file& f) {
return f.dma_read_exactly<char>(0, size).then([&f, off, value](auto buf) {
std::copy_n(reinterpret_cast<const char*>(&value), sizeof(value), buf.get_write() + off);
auto dst = buf.get();
auto size = buf.size();
return f.dma_write(0, dst, size).then([buf = std::move(buf)](size_t) {});
}).finally([&f] {
return f.close();
});
});
});
}
SEASTAR_TEST_CASE(test_commitlog_entry_corruption){
commitlog::config cfg;
cfg.commitlog_segment_size_in_mb = 1;
return cl_test(cfg, [](commitlog& log) {
auto rps = make_lw_shared<std::vector<db::replay_position>>();
return do_until([rps]() {return rps->size() > 1;},
[&log, rps]() {
auto uuid = make_table_id();
sstring tmp = "hej bubba cow";
return log.add_mutation(uuid, tmp.size(), db::commitlog::force_sync::no, [tmp](db::commitlog::output& dst) {
dst.write(tmp.data(), tmp.size());
}).then([rps](rp_handle h) {
BOOST_CHECK_NE(h.rp(), db::replay_position());
rps->push_back(h.release());
});
}).then([&log]() {
return log.sync_all_segments();
}).then([&log, rps] {
auto segments = log.get_active_segment_names();
BOOST_REQUIRE(!segments.empty());
auto seg = segments[0];
return corrupt_segment(seg, rps->at(1).pos + 4, 0x451234ab).then([seg, rps] {
return db::commitlog::read_log_file(seg, db::commitlog::descriptor::FILENAME_PREFIX, [rps](db::commitlog::buffer_and_replay_position buf_rp) {
auto&& [buf, rp] = buf_rp;
BOOST_CHECK_EQUAL(rp, rps->at(0));
return make_ready_future<>();
}).then_wrapped([](auto&& f) {
try {
f.get();
BOOST_FAIL("Expected exception");
} catch (commitlog::segment_data_corruption_error& e) {
// ok.
BOOST_REQUIRE(e.bytes() > 0);
}
});
});
});
});
}
SEASTAR_TEST_CASE(test_commitlog_chunk_corruption){
commitlog::config cfg;
cfg.commitlog_segment_size_in_mb = 1;
return cl_test(cfg, [](commitlog& log) {
auto rps = make_lw_shared<std::vector<db::replay_position>>();
return do_until([rps]() {return rps->size() > 1;},
[&log, rps]() {
auto uuid = make_table_id();
sstring tmp = "hej bubba cow";
return log.add_mutation(uuid, tmp.size(), db::commitlog::force_sync::no, [tmp](db::commitlog::output& dst) {
dst.write(tmp.data(), tmp.size());
}).then([rps](rp_handle h) {
BOOST_CHECK_NE(h.rp(), db::replay_position());
rps->push_back(h.release());
});
}).then([&log]() {
return log.sync_all_segments();
}).then([&log, rps] {
auto segments = log.get_active_segment_names();
BOOST_REQUIRE(!segments.empty());
auto seg = segments[0];
auto cpos = rps->at(0).pos - 4;
return corrupt_segment(seg, cpos, 0x451234ab).then([seg, rps] {
return db::commitlog::read_log_file(seg, db::commitlog::descriptor::FILENAME_PREFIX, [rps](db::commitlog::buffer_and_replay_position buf_rp) {
BOOST_FAIL("Should not reach");
return make_ready_future<>();
}).then_wrapped([](auto&& f) {
try {
f.get();
BOOST_FAIL("Expected exception");
} catch (commitlog::segment_data_corruption_error& e) {
// ok.
BOOST_REQUIRE(e.bytes() > 0);
}
});
});
});
});
}
SEASTAR_TEST_CASE(test_commitlog_chunk_corruption2){
commitlog::config cfg;
cfg.commitlog_segment_size_in_mb = 1;
return cl_test(cfg, [](commitlog& log) {
auto rps = make_lw_shared<std::vector<db::replay_position>>();
// write enough entries to fill more than one chunk
return do_until([rps]() {return rps->size() > (128*1024/8);},
[&log, rps]() {
auto uuid = make_table_id();
sstring tmp = "hej bubba cow";
return log.add_mutation(uuid, tmp.size(), db::commitlog::force_sync::no, [tmp](db::commitlog::output& dst) {
dst.write(tmp.data(), tmp.size());
}).then([rps](rp_handle h) {
BOOST_CHECK_NE(h.rp(), db::replay_position());
rps->push_back(h.release());
});
}).then([&log]() {
return log.sync_all_segments();
}).then([&log, rps] {
auto segments = log.get_active_segment_names();
BOOST_REQUIRE(!segments.empty());
auto seg = segments[0];
auto desc = commitlog::descriptor(seg);
auto e = std::find_if(rps->begin(), rps->end(), [desc](auto& rp) {
return rp.id != desc.id;
});
auto idx = std::distance(rps->begin(), e);
auto cpos = rps->at(idx/2).pos;
return corrupt_segment(seg, cpos, 0x451234ab).then([seg, rps, cpos] {
return db::commitlog::read_log_file(seg, db::commitlog::descriptor::FILENAME_PREFIX, [rps, cpos](db::commitlog::buffer_and_replay_position buf_rp) {
BOOST_CHECK_NE(buf_rp.position.pos, cpos);
return make_ready_future<>();
}).then_wrapped([](auto&& f) {
try {
f.get();
BOOST_FAIL("Expected exception");
} catch (commitlog::segment_data_corruption_error& e) {
// ok.
BOOST_REQUIRE(e.bytes() > 0);
}
});
});
});
});
}
SEASTAR_TEST_CASE(test_commitlog_chunk_corruption3){
commitlog::config cfg;
cfg.commitlog_segment_size_in_mb = 1;
cfg.commitlog_total_space_in_mb = 2 * smp::count;
cfg.allow_going_over_size_limit = false;
return cl_test(cfg, [](commitlog& log) -> future<> {
auto uuid = make_table_id();
sstring tmp = "hej bubba cow";
std::optional<db::segment_id_type> last;
db::replay_position last_rp;
int n = 0;
for (;;) {
auto h = co_await log.add_mutation(uuid, tmp.size(), db::commitlog::force_sync::no, [&](db::commitlog::output& dst) {
dst.write(tmp.data(), tmp.size());
});
BOOST_CHECK_NE(h.rp(), db::replay_position());
// release data immediately
auto rp = h.rp();
auto id = rp.id;
if (last && last != id) {
// this should mean we are in a recycled segment.
if (++n > 3) {
last_rp = h.release(); // prevent removal for now.
break;
}
}
last = id;
}
co_await log.sync_all_segments();
auto segments = log.get_active_segment_names();
BOOST_REQUIRE(!segments.empty());
for (auto& seg : segments) {
auto desc = commitlog::descriptor(seg);
if (desc.id == last_rp.id) {
try {
co_await db::commitlog::read_log_file(seg, db::commitlog::descriptor::FILENAME_PREFIX, [&](db::commitlog::buffer_and_replay_position buf_rp) {
BOOST_CHECK_LE(buf_rp.position, last_rp);
return make_ready_future<>();
});
BOOST_FAIL("Expected exception");
} catch (commitlog::segment_truncation& e) {
// ok.
}
co_return;
}
}
BOOST_FAIL("Did not find segment");
});
}
SEASTAR_TEST_CASE(test_commitlog_replay_single_large_mutation){
commitlog::config cfg;
cfg.commitlog_segment_size_in_mb = 4;
cfg.commitlog_total_space_in_mb = 2 * cfg.commitlog_segment_size_in_mb * smp::count;
cfg.allow_going_over_size_limit = false;
return cl_test(cfg, [](commitlog& log) -> future<> {
auto uuid = make_table_id();
auto size = log.max_record_size();
auto buf = fragmented_temporary_buffer::allocate_to_fit(size);
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_int_distribution<char> dist;
auto out = buf.get_ostream();
for (size_t i = 0; i < size; ++i) {
auto c = dist(gen);
out.write(&c, 1);
}
auto h = co_await log.add_mutation(uuid, size, db::commitlog::force_sync::no, [&](db::commitlog::output& dst) {
for (auto& tmp : buf) {
dst.write(tmp.get(), tmp.size());
}
});
auto rp = h.release();
co_await log.sync_all_segments();
auto segments = log.get_active_segment_names();
BOOST_REQUIRE(!segments.empty());
for (auto& seg : segments) {
auto desc = commitlog::descriptor(seg);
if (desc.id == rp.id) {
co_await db::commitlog::read_log_file(seg, db::commitlog::descriptor::FILENAME_PREFIX, [&](db::commitlog::buffer_and_replay_position buf_rp) {
BOOST_CHECK_EQUAL(buf_rp.position, rp);
auto& rp_buf = buf_rp.buffer;
auto in1 = buf.get_istream();
auto in2 = rp_buf.get_istream();
for (size_t i = 0; i < size; ++i) {
auto c1 = in1.read<char>();
auto c2 = in2.read<char>();
BOOST_CHECK_EQUAL(c1, c2);
}
return make_ready_future<>();
});
co_return;
}
}
BOOST_FAIL("Did not find segment");
});
}
/**
* Checks same thing as above, but will also ensure the seek mechanism in
* replayer is working, since we will span multiple chunks.
*/
SEASTAR_TEST_CASE(test_commitlog_replay_large_mutations){
commitlog::config cfg;
cfg.commitlog_segment_size_in_mb = 14;
cfg.commitlog_total_space_in_mb = 2 * cfg.commitlog_segment_size_in_mb * smp::count;
cfg.allow_going_over_size_limit = false;
return cl_test(cfg, [](commitlog& log) -> future<> {
auto uuid = make_table_id();
auto size = log.max_record_size() / 2;
std::unordered_map<replay_position, fragmented_temporary_buffer> buffers;
for (size_t i = 0; i < 4; ++i) {
auto buf = fragmented_temporary_buffer::allocate_to_fit(size);
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_int_distribution<char> dist;
auto out = buf.get_ostream();
for (size_t i = 0; i < size; ++i) {
auto c = dist(gen);
out.write(&c, 1);
}
auto h = co_await log.add_mutation(uuid, size, db::commitlog::force_sync::no, [&](db::commitlog::output& dst) {
for (auto& tmp : buf) {
dst.write(tmp.get(), tmp.size());
}
});
auto rp = h.release();
buffers.emplace(rp, std::move(buf));
}
co_await log.sync_all_segments();
auto segments = log.get_active_segment_names();
BOOST_REQUIRE(!segments.empty());
size_t n = 0;
for (auto& seg : segments) {
auto desc = commitlog::descriptor(seg);
if (std::any_of(buffers.begin(), buffers.end(), [&](auto& p) { return p.first.id == desc.id; })) {
co_await db::commitlog::read_log_file(seg, db::commitlog::descriptor::FILENAME_PREFIX, [&](db::commitlog::buffer_and_replay_position buf_rp) {
auto& buf = buffers.at(buf_rp.position);
auto& rp_buf = buf_rp.buffer;
auto in1 = buf.get_istream();
auto in2 = rp_buf.get_istream();
for (size_t i = 0; i < size; ++i) {
auto c1 = in1.read<char>();
auto c2 = in2.read<char>();
BOOST_CHECK_EQUAL(c1, c2);
}
++n;
return make_ready_future<>();
});
}
}
BOOST_CHECK_EQUAL(n, buffers.size());
});
}
// Tests #15269 - skipping past EOF
SEASTAR_TEST_CASE(test_commitlog_chunk_truncation) {
commitlog::config cfg;
cfg.commitlog_segment_size_in_mb = 1;
return cl_test(cfg, [](commitlog& log) -> future<> {
auto uuid = make_table_id();
sstring tmp = "hej bubba cow";
db::replay_position rp, corrupt;
// we want to corrupt somewhere in second chunk, before
// middle of it. I.e. between 128k and 192k
constexpr auto min_corrupt_pos = 128*1024;
constexpr auto max_corrupt_pos = 128*1024 + 64 * 1024;
// fill one full segment.
for (;;) {
auto h = co_await log.add_mutation(uuid, tmp.size(), db::commitlog::force_sync::no, [tmp](db::commitlog::output& dst) {
dst.write(tmp.data(), tmp.size());
});
auto nrp = h.release();
if (rp != db::replay_position() && nrp.base_id() != rp.base_id()) {
break;
}
rp = nrp;
if (rp.pos > min_corrupt_pos && (rp.pos + tmp.size() + 12) < max_corrupt_pos) {
corrupt = rp;
}
}
co_await log.sync_all_segments();
auto segments = log.get_active_segment_names();
BOOST_REQUIRE(!segments.empty());
BOOST_CHECK_NE(rp, db::replay_position());
BOOST_CHECK_NE(corrupt, db::replay_position());
BOOST_CHECK_EQUAL(rp.base_id(), corrupt.base_id());
for (auto& seg : segments) {
commitlog::descriptor d(seg);
if (d.id == rp.base_id()) {
// Corrupt the entry so we skip the rest of the chunk.
co_await corrupt_segment(seg, corrupt.pos + 4, 'bose');
auto f = co_await open_file_dma(seg, open_flags::rw);
// then truncate the file so skipping the chunk will
// cause EOF.
co_await f.truncate(max_corrupt_pos);
co_await f.close();
// Reading this segment will now get corruption at the above position,
// right before where we have truncated the file. It will try to skip
// to next chunk, which is past actual EOF. If #15269 is broken, this
// will SCYLLA_ASSERT and crash in file_data_source_impl. If not, we should
// get a corruption exception and no more entries past the corrupt one.
db::position_type pos = 0;
try {
co_await db::commitlog::read_log_file(seg, db::commitlog::descriptor::FILENAME_PREFIX, [&](db::commitlog::buffer_and_replay_position buf_rp) {
pos = buf_rp.position.pos; // keep track of how far we reach in the segment.
return make_ready_future<>();
});
} catch (commitlog::segment_data_corruption_error& e) {
// ok.
BOOST_CHECK_GT(e.bytes(), 0);
}
BOOST_CHECK_GT(pos, min_corrupt_pos);
BOOST_CHECK_LT(pos, max_corrupt_pos);
co_return;
}
}
BOOST_FAIL("Should not reach");
});
}
SEASTAR_TEST_CASE(test_commitlog_reader_produce_exception){
commitlog::config cfg;
cfg.commitlog_segment_size_in_mb = 1;
return cl_test(cfg, [](commitlog& log) {
auto rps = make_lw_shared<std::vector<db::replay_position>>();
return do_until([rps]() {return rps->size() > 1;},
[&log, rps]() {
auto uuid = make_table_id();
sstring tmp = "hej bubba cow";
return log.add_mutation(uuid, tmp.size(), db::commitlog::force_sync::no, [tmp](db::commitlog::output& dst) {
dst.write(tmp.data(), tmp.size());
}).then([rps](rp_handle h) {
BOOST_CHECK_NE(h.rp(), db::replay_position());
rps->push_back(h.release());
});
}).then([&log]() {
return log.sync_all_segments();
}).then([&log] {
auto segments = log.get_active_segment_names();
BOOST_REQUIRE(!segments.empty());
auto seg = segments[0];
return db::commitlog::read_log_file(seg, db::commitlog::descriptor::FILENAME_PREFIX, [](db::commitlog::buffer_and_replay_position buf_rp) {
return make_exception_future(std::runtime_error("I am in a throwing mode"));
}).then_wrapped([](auto&& f) {
try {
f.get();
BOOST_FAIL("Expected exception");
} catch (std::runtime_error&) {
// Ok
} catch (...) {
// ok.
BOOST_FAIL("Wrong exception");
}
});
});
});
}
SEASTAR_TEST_CASE(test_commitlog_counters) {
auto count_cl_counters = []() -> size_t {
auto ids = scollectd::get_collectd_ids();
return std::count_if(ids.begin(), ids.end(), [](const scollectd::type_instance_id& id) {
return id.plugin() == "commitlog";
});
};
BOOST_CHECK_EQUAL(count_cl_counters(), 0);
return cl_test([count_cl_counters](commitlog& log) {
BOOST_CHECK_GT(count_cl_counters(), 0);
return make_ready_future<>();
}).finally([count_cl_counters] {
BOOST_CHECK_EQUAL(count_cl_counters(), 0);
});
}
#ifndef SEASTAR_DEFAULT_ALLOCATOR
SEASTAR_TEST_CASE(test_allocation_failure){
return cl_test([](commitlog& log) {
auto size = log.max_record_size() - 1;
auto junk = make_lw_shared<std::list<std::unique_ptr<char[]>>>();
// Use us loads of memory so we can OOM at the appropriate place
try {
SCYLLA_ASSERT(fragmented_temporary_buffer::default_fragment_size < size);
for (;;) {
junk->emplace_back(new char[fragmented_temporary_buffer::default_fragment_size]);
}
} catch (std::bad_alloc&) {
}
auto last = junk->end();
junk->erase(--last);
return log.add_mutation(make_table_id(), size, db::commitlog::force_sync::no, [size](db::commitlog::output& dst) {
dst.fill(char(1), size);
}).then_wrapped([junk, size](future<db::rp_handle> f) {
std::exception_ptr ep;
try {
f.get();
throw std::runtime_error(format("Adding mutation of size {} succeeded unexpectedly", size));
} catch (std::bad_alloc&) {
// ok. this is what we expected
junk->clear();
return make_ready_future();
} catch (...) {
ep = std::current_exception();
}
throw std::runtime_error(format("Got an unexpected exception from writing too large record: {}", ep));
});
});
}
#endif
SEASTAR_TEST_CASE(test_commitlog_replay_invalid_key){
return do_with_cql_env_thread([] (cql_test_env& env) {
env.execute_cql("create table t (pk text primary key, v text)").get();
auto& db = env.local_db();
auto& table = db.find_column_family("ks", "t");
auto& cl = *table.commitlog();
auto s = table.schema();
auto& sharder = table.get_effective_replication_map()->get_sharder(*table.schema());
auto memtables = active_memtables(table);
auto add_entry = [&cl, s, &sharder] (const partition_key& key) mutable {
auto md = tests::data_model::mutation_description(key.explode());
md.add_clustered_cell({}, "v", to_bytes("val"));
auto m = md.build(s);
auto fm = freeze(m);
commitlog_entry_writer cew(s, fm, db::commitlog::force_sync::yes);
cl.add_entry(m.column_family_id(), cew, db::no_timeout).get();
return sharder.shard_for_reads(m.token());
};
const auto shard = add_entry(partition_key::make_empty());
auto dk = tests::generate_partition_key(s, shard);
add_entry(std::move(dk.key()));
BOOST_REQUIRE(std::ranges::all_of(memtables, std::mem_fn(&replica::memtable::empty)));
{
auto paths = cl.get_active_segment_names();
BOOST_REQUIRE(!paths.empty());
auto rp = db::commitlog_replayer::create_replayer(env.db(), env.get_system_keyspace()).get();
rp.recover(paths, db::commitlog::descriptor::FILENAME_PREFIX).get();
}
{
std::vector<mutation_reader> readers;
readers.reserve(memtables.size());
auto permit = db.get_reader_concurrency_semaphore().make_tracking_only_permit(s, "test", db::no_timeout, {});
for (auto mt : memtables) {
readers.push_back(mt->make_mutation_reader(s, permit));
}
auto rd = make_combined_reader(s, permit, std::move(readers));
auto close_rd = deferred_close(rd);
auto mopt = read_mutation_from_mutation_reader(rd).get();
BOOST_REQUIRE(mopt);
mopt = {};
mopt = read_mutation_from_mutation_reader(rd).get();
BOOST_REQUIRE(!mopt);
}
});
}
using namespace std::chrono_literals;
SEASTAR_TEST_CASE(test_commitlog_add_entry) {
return cl_test([](commitlog& log) {
return seastar::async([&] {
using force_sync = commitlog_entry_writer::force_sync;
constexpr auto n = 10;
for (auto fs : { force_sync(false), force_sync(true) }) {
std::vector<commitlog_entry_writer> writers;
utils::chunked_vector<frozen_mutation> mutations;
std::vector<replay_position> rps;
writers.reserve(n);
mutations.reserve(n);
for (auto i = 0; i < n; ++i) {
random_mutation_generator gen(random_mutation_generator::generate_counters(false));
mutations.emplace_back(gen(1).front());
writers.emplace_back(gen.schema(), mutations.back(), fs);
}
std::set<segment_id_type> ids;
for (auto& w : writers) {
auto h = log.add_entry(w.schema()->id(), w, db::timeout_clock::now() + 60s).get();
ids.emplace(h.rp().id);
rps.emplace_back(h.rp());
}
BOOST_CHECK_EQUAL(ids.size(), 1);
log.sync_all_segments().get();
auto segments = log.get_active_segment_names();
BOOST_REQUIRE(!segments.empty());
std::unordered_set<replay_position> result;
for (auto& seg : segments) {
db::commitlog::read_log_file(seg, db::commitlog::descriptor::FILENAME_PREFIX, [&](db::commitlog::buffer_and_replay_position buf_rp) {
commitlog_entry_reader r(buf_rp.buffer);
auto& rp = buf_rp.position;
auto i = std::find(rps.begin(), rps.end(), rp);
// since we are looping, we can be reading last test cases
// segment (force_sync permutations)
if (i != rps.end()) {
auto n = std::distance(rps.begin(), i);
auto& fm1 = mutations.at(n);
auto& fm2 = r.mutation();
auto s = writers.at(n).schema();
auto m1 = fm1.unfreeze(s);
auto m2 = fm2.unfreeze(s);
BOOST_CHECK_EQUAL(m1, m2);
result.emplace(rp);
}
return make_ready_future<>();
}).get();
}
BOOST_CHECK_EQUAL(result.size(), rps.size());
}
});
});
}
SEASTAR_TEST_CASE(test_commitlog_add_entries) {
return cl_test([](commitlog& log) {
return seastar::async([&] {
using force_sync = commitlog_entry_writer::force_sync;
constexpr auto n = 10;
for (auto fs : { force_sync(false), force_sync(true) }) {
utils::chunked_vector<commitlog_entry_writer> writers;
utils::chunked_vector<frozen_mutation> mutations;
std::vector<replay_position> rps;
writers.reserve(n);
mutations.reserve(n);
for (auto i = 0; i < n; ++i) {
random_mutation_generator gen(random_mutation_generator::generate_counters(false));
mutations.emplace_back(gen(1).front());
writers.emplace_back(gen.schema(), mutations.back(), fs);
}
auto res = log.add_entries(writers, db::timeout_clock::now() + 60s).get();
std::set<segment_id_type> ids;
for (auto& h : res) {
ids.emplace(h.rp().id);
rps.emplace_back(h.rp());
}
BOOST_CHECK_EQUAL(ids.size(), 1);
log.sync_all_segments().get();
auto segments = log.get_active_segment_names();
BOOST_REQUIRE(!segments.empty());
std::unordered_set<replay_position> result;
for (auto& seg : segments) {
db::commitlog::read_log_file(seg, db::commitlog::descriptor::FILENAME_PREFIX, [&](db::commitlog::buffer_and_replay_position buf_rp) {
commitlog_entry_reader r(buf_rp.buffer);
auto& rp = buf_rp.position;
auto i = std::find(rps.begin(), rps.end(), rp);
// since we are looping, we can be reading last test cases
// segment (force_sync permutations)
if (i != rps.end()) {
auto n = std::distance(rps.begin(), i);
auto& fm1 = mutations.at(n);
auto& fm2 = r.mutation();
auto s = writers.at(n).schema();
auto m1 = fm1.unfreeze(s);
auto m2 = fm2.unfreeze(s);
BOOST_CHECK_EQUAL(m1, m2);
result.emplace(rp);
}
return make_ready_future<>();
}).get();
}
BOOST_CHECK_EQUAL(result.size(), rps.size());
}
});
});
}
// #16298 - check entry offsets so that we report the correct file positions both
// when reading and writing CL data.
SEASTAR_TEST_CASE(test_commitlog_entry_offsets) {
commitlog::config cfg;
cfg.commitlog_segment_size_in_mb = 1;
cfg.commitlog_total_space_in_mb = 2 * smp::count;
cfg.allow_going_over_size_limit = false;
return cl_test(cfg, [](commitlog& log) -> future<> {
auto uuid = make_table_id();
sstring tmp = "hej bubba cow";
auto h = co_await log.add_mutation(uuid, tmp.size(), db::commitlog::force_sync::no, [&](db::commitlog::output& dst) {
dst.write(tmp.data(), tmp.size());
});
auto rp = h.release();
// verify the first rp is at file offset 32 (file header + chunk header)
BOOST_CHECK_EQUAL(rp.pos, 24 + 8); // TODO: export these sizes for tests.
co_await log.sync_all_segments();
auto segments = log.get_active_segment_names();
BOOST_REQUIRE(!segments.empty());
for (auto& seg : segments) {
auto desc = commitlog::descriptor(seg);
if (desc.id == rp.id) {
bool found = false;
co_await db::commitlog::read_log_file(seg, db::commitlog::descriptor::FILENAME_PREFIX, [&](db::commitlog::buffer_and_replay_position buf_rp) {
BOOST_CHECK_EQUAL(buf_rp.position, rp);
found = true;
return make_ready_future<>();
});
BOOST_REQUIRE(found);
co_return;
}
}
BOOST_FAIL("Did not find segment");
});
}
SEASTAR_TEST_CASE(test_commitlog_max_segment_size) {
commitlog::config cfg;
cfg.commitlog_segment_size_in_mb = 1;
cfg.commitlog_total_space_in_mb = 2 * smp::count;
cfg.allow_going_over_size_limit = false;
return cl_test(cfg, [max_size = cfg.commitlog_segment_size_in_mb](commitlog& log) -> future<> {
auto uuid = make_table_id();
sstring tmp = "hej bubba cow";
db::replay_position last_rp;
std::optional<db::segment_id_type> last;
auto max_size_bytes = max_size * 1024 * 1024;
for (;;) {
auto h = co_await log.add_mutation(uuid, tmp.size(), db::commitlog::force_sync::no, [&](db::commitlog::output& dst) {
dst.write(tmp.data(), tmp.size());
});
auto rp = h.release();
if (last && last != rp.id) {
break;
}
last = rp.id;
}
co_await log.sync_all_segments();
auto segments = log.get_active_segment_names();
BOOST_REQUIRE(!segments.empty());
for (auto& seg : segments) {
auto desc = commitlog::descriptor(seg);
if (last == desc.id) {
auto size = co_await file_size(seg);
BOOST_REQUIRE_LE(size, max_size_bytes);
co_return;
}
}
BOOST_FAIL("Did not find segment");
});
}
SEASTAR_TEST_CASE(test_commitlog_new_segment_odsync){
commitlog::config cfg;
cfg.commitlog_segment_size_in_mb = 1;
cfg.use_o_dsync = true;
return cl_test(cfg, [](commitlog& log) -> future<> {
auto uuid = make_table_id();
rp_set set;
while (set.size() <= 1) {
sstring tmp = "hej bubba cow";
rp_handle h = co_await log.add_mutation(uuid, tmp.size(), db::commitlog::force_sync::no, [tmp](db::commitlog::output& dst) {
dst.write(tmp.data(), tmp.size());
});
set.put(std::move(h));
}
auto names = log.get_active_segment_names();
BOOST_REQUIRE(names.size() > 1);
// check that all the segments are pre-allocated.
for (auto& name : names) {
auto f = co_await seastar::open_file_dma(name, seastar::open_flags::ro);
auto s = co_await f.size();
co_await f.close();
BOOST_CHECK_EQUAL(s, 1024u*1024u);
}
});
}
// Test for #8363
// try to provoke edge case where we race segment deletion
// and waiting for recycled to be replenished.
SEASTAR_TEST_CASE(test_commitlog_deadlock_in_recycle) {
commitlog::config cfg;
constexpr auto max_size_mb = 2;
cfg.commitlog_segment_size_in_mb = max_size_mb;
// ensure total size per shard is not multiple of segment size.
cfg.commitlog_total_space_in_mb = 5 * smp::count;
cfg.commitlog_sync_period_in_ms = 10;
cfg.allow_going_over_size_limit = false;
cfg.use_o_dsync = true; // make sure we pre-allocate.
// not using cl_test, because we need to be able to abandon
// the log.
tmpdir tmp;
cfg.commit_log_location = tmp.path().string();
auto log = co_await commitlog::create_commitlog(cfg);
rp_set rps;
std::deque<rp_set> queue;
size_t n = 0;
// uncomment for verbosity
// logging::logger_registry().set_logger_level("commitlog", logging::log_level::debug);
auto uuid = make_table_id();
auto size = log.max_record_size() / 2;
timer<> t;
t.set_callback([&] {
while (!queue.empty()) {
auto flush = std::move(queue.front());
queue.pop_front();
log.discard_completed_segments(uuid, flush);
++n;
};
});
uint64_t num_active_allocations = 0, num_blocked_on_new_segment = 0;
// add a flush handler that delays releasing things until disk threshold is reached.
auto r = log.add_flush_handler([&](cf_id_type, replay_position pos) {
auto old = std::exchange(rps, rp_set{});
queue.emplace_back(std::move(old));
if (log.disk_footprint() >= log.disk_limit()) {
num_active_allocations += log.get_num_active_allocations();
num_blocked_on_new_segment += log.get_num_blocked_on_new_segment();
if (!t.armed()) {
t.arm(5s);
}
}
});
try {
while (n < 10 || !num_active_allocations || !num_blocked_on_new_segment) {
auto now = timeout_clock::now();
rp_handle h = co_await with_timeout(now + 30s, log.add_mutation(uuid, size, db::commitlog::force_sync::no, [&](db::commitlog::output& dst) {
dst.fill('1', size);
}));
rps.put(std::move(h));
}
} catch (timed_out_error&) {
BOOST_ERROR("log write timed out. maybe it is deadlocked... Will not free log. ASAN errors and leaks will follow...");
abort();
}
co_await log.shutdown();
co_await log.clear();
BOOST_REQUIRE_GT(num_active_allocations, 0);
BOOST_REQUIRE_GT(num_blocked_on_new_segment, 0);
}
// Test for #8438 - ensure we can shut down (in orderly fashion)
// even if CL write is stuck waiting for segment recycle/alloc
SEASTAR_TEST_CASE(test_commitlog_shutdown_during_wait) {
commitlog::config cfg;
constexpr auto max_size_mb = 2;
cfg.commitlog_segment_size_in_mb = max_size_mb;
// ensure total size per shard is not multiple of segment size.
cfg.commitlog_total_space_in_mb = 5 * smp::count;
cfg.commitlog_sync_period_in_ms = 10;
cfg.allow_going_over_size_limit = false;
cfg.use_o_dsync = true; // make sure we pre-allocate.
// not using cl_test, because we need to be able to abandon
// the log.
tmpdir tmp;
cfg.commit_log_location = tmp.path().string();
auto log = co_await commitlog::create_commitlog(cfg);
rp_set rps;
std::deque<rp_set> queue;
// uncomment for verbosity
//logging::logger_registry().set_logger_level("commitlog", logging::log_level::debug);
auto uuid = make_table_id();
auto size = log.max_record_size() / 2;
// add a flush handler that does not.
auto r = log.add_flush_handler([&](cf_id_type, replay_position pos) {
auto old = std::exchange(rps, rp_set{});
queue.emplace_back(std::move(old));
});
for (;;) {
try {
auto now = timeout_clock::now();
rp_handle h = co_await with_timeout(now + 10s, log.add_mutation(uuid, size, db::commitlog::force_sync::no, [&](db::commitlog::output& dst) {
dst.fill('1', size);
}));
rps.put(std::move(h));
} catch (timed_out_error&) {
if (log.disk_footprint() >= log.disk_limit()) {
// now a segment alloc is waiting.
break;
}
}
}
// shut down is assumed to
// a.) stop allocating
// b.) ensure all segments get freed
while (!queue.empty()) {
auto flush = std::move(queue.front());
queue.pop_front();
log.discard_completed_segments(uuid, flush);
}
try {
auto now = timeout_clock::now();
co_await with_timeout(now + 30s, log.shutdown());
co_await log.clear();
} catch (timed_out_error&) {
BOOST_ERROR("log shutdown timed out. maybe it is deadlocked... Will not free log. ASAN errors and leaks will follow...");
abort();
}
}
SEASTAR_TEST_CASE(test_commitlog_deadlock_with_flush_threshold) {
commitlog::config cfg;
constexpr auto max_size_mb = 1;
cfg.commitlog_segment_size_in_mb = max_size_mb;
cfg.commitlog_total_space_in_mb = 2 * max_size_mb * smp::count;
cfg.commitlog_sync_period_in_ms = 10;
cfg.allow_going_over_size_limit = false;
cfg.use_o_dsync = true; // make sure we pre-allocate.
// not using cl_test, because we need to be able to abandon
// the log.
tmpdir tmp;
cfg.commit_log_location = tmp.path().string();
auto log = co_await commitlog::create_commitlog(cfg);
rp_set rps;
// uncomment for verbosity
// logging::logger_registry().set_logger_level("commitlog", logging::log_level::debug);
auto uuid = make_table_id();
auto size = log.max_record_size();
bool done = false;
auto r = log.add_flush_handler([&](cf_id_type id, replay_position pos) {
log.discard_completed_segments(id, rps);
done = true;
});
try {
while (!done) {
auto now = timeout_clock::now();
rp_handle h = co_await with_timeout(now + 30s, log.add_mutation(uuid, size, db::commitlog::force_sync::no, [&](db::commitlog::output& dst) {
dst.fill('1', size);
}));
rps.put(std::move(h));
}
} catch (timed_out_error&) {
BOOST_ERROR("log write timed out. maybe it is deadlocked... Will not free log. ASAN errors and leaks will follow...");
abort();
}
co_await log.shutdown();
co_await log.clear();
}
static future<> do_test_exception_in_allocate_ex(bool do_file_delete) {
commitlog::config cfg;
constexpr auto max_size_mb = 1;
cfg.commitlog_segment_size_in_mb = max_size_mb;
cfg.commitlog_total_space_in_mb = 2 * max_size_mb * smp::count;
cfg.commitlog_sync_period_in_ms = 10;
cfg.allow_going_over_size_limit = false; // #9348 - now can enforce size limit always
cfg.use_o_dsync = true; // make sure we pre-allocate.
// not using cl_test, because we need to be able to abandon
// the log.
tmpdir tmp;
cfg.commit_log_location = tmp.path().string();
class myfail : public std::exception {
public:
using std::exception::exception;
};
struct myext: public db::commitlog_file_extension {
public:
bool fail = false;
bool thrown = false;
bool do_file_delete;
myext(bool dd)
: do_file_delete(dd)
{}
seastar::future<seastar::file> wrap_file(const seastar::sstring& filename, seastar::file f, seastar::open_flags flags) override {
if (fail && !thrown) {
thrown = true;
if (do_file_delete) {
co_await f.close();
co_await seastar::remove_file(filename);
}
throw myfail{};
}
co_return f;
}
seastar::future<> before_delete(const seastar::sstring&) override {
co_return;
}
};
auto ep = std::make_unique<myext>(do_file_delete);
auto& mx = *ep;
db::extensions myexts;
myexts.add_commitlog_file_extension("hufflepuff", std::move(ep));
cfg.extensions = &myexts;
auto log = co_await commitlog::create_commitlog(cfg);
rp_set rps;
// uncomment for verbosity
// logging::logger_registry().set_logger_level("commitlog", logging::log_level::debug);
auto uuid = make_table_id();
auto size = log.max_record_size();
auto r = log.add_flush_handler([&](cf_id_type id, replay_position pos) {
log.discard_completed_segments(id, rps);
mx.fail = true;
});
try {
while (!mx.thrown) {
rp_handle h = co_await log.add_mutation(uuid, size, db::commitlog::force_sync::no, [&](db::commitlog::output& dst) {
dst.fill('1', size);
});
rps.put(std::move(h));
}
} catch (...) {
BOOST_ERROR("log write timed out. maybe it is deadlocked... Will not free log. ASAN errors and leaks will follow...");
abort();
}
co_await log.shutdown();
co_await log.clear();
}
/**
* Test generating an exception in segment file allocation
*/
SEASTAR_TEST_CASE(test_commitlog_exceptions_in_allocate_ex) {
co_await do_test_exception_in_allocate_ex(false);
}
/**
* Test generating an exception in segment file allocation, but also
* delete the file, which in turn should cause follow-up exceptions
* in cleanup delete. Which CL should handle
*/
SEASTAR_TEST_CASE(test_commitlog_exceptions_in_allocate_ex_deleted_file_no_recycle) {
co_await do_test_exception_in_allocate_ex(true);
}
using namespace std::string_literals;
BOOST_AUTO_TEST_CASE(test_commitlog_segment_descriptor) {
for (auto& prefix : { "tuta"s, "ninja"s, "Commitlog"s, "Schemalog"s, "bamboo"s }) {
// create a descriptor without given filename
commitlog::descriptor d(db::replay_position(), prefix + commitlog::descriptor::SEPARATOR);
for (auto& add : { ""s, "Recycled-"s }) {
auto filename = "/some/path/we/dont/open/"s + add + std::string(d.filename());
// ensure we only allow same prefix
for (auto& wrong_prefix : { "fisk"s, "notter"s, "blazer"s }) {
try {
commitlog::descriptor d2(filename, wrong_prefix + commitlog::descriptor::SEPARATOR);
} catch (std::domain_error&) {
// ok
continue;
}
BOOST_FAIL("Should not reach");
}
commitlog::descriptor d3(filename, prefix + commitlog::descriptor::SEPARATOR);
try {
// check we require id
commitlog::descriptor d3("/tmp/" + add + prefix + commitlog::descriptor::SEPARATOR + ".log", prefix);
BOOST_FAIL("Should not reach");
} catch (std::domain_error&) {
// ok
}
try {
// check we require ver
commitlog::descriptor d3("/tmp/" + add + prefix + commitlog::descriptor::SEPARATOR + "12.log", prefix);
BOOST_FAIL("Should not reach");
} catch (std::domain_error&) {
// ok
}
}
}
}
// Test for #16207 - files deleted after recycle does not have size updated
// and when deleting does not actual decrease footprint.
SEASTAR_TEST_CASE(test_delete_recycled_segment_removes_size) {
commitlog::config cfg;
constexpr auto max_size_mb = 1;
cfg.commitlog_segment_size_in_mb = max_size_mb;
cfg.commitlog_total_space_in_mb = 2 * max_size_mb * smp::count;
cfg.allow_going_over_size_limit = false; // #9348 - now can enforce size limit always
cfg.use_o_dsync = true; // make sure we pre-allocate.
// not using cl_test, because we need to be able to abandon
// the log.
tmpdir tmp;
cfg.commit_log_location = tmp.path().string();
std::vector<sstring> fakes;
struct myext: public db::commitlog_file_extension {
public:
std::vector<std::string> deleted;
seastar::future<seastar::file> wrap_file(const seastar::sstring& filename, seastar::file f, seastar::open_flags flags) override {
co_return f;
}
seastar::future<> before_delete(const seastar::sstring& filename) override {
deleted.emplace_back(filename);
co_return;
}
};
auto ep = std::make_unique<myext>();
auto& ex = *ep;
db::extensions myexts;
myexts.add_commitlog_file_extension("hufflepuff", std::move(ep));
cfg.extensions = &myexts;
auto log = co_await commitlog::create_commitlog(cfg);
auto max_file_size_bytes = (max_size_mb * 1024 * 1024);
// we might be one segment over disk threshold.
auto max_shard_size_bytes = max_file_size_bytes + (cfg.commitlog_total_space_in_mb * 1024 * 1024) / smp::count;
// Add a bunch of fake segments (pretending replayed). The total footprint will be much
// more than above limit.
for (int i = 0; i < 20; ++i) {
fakes.emplace_back(cfg.commit_log_location + "/fake" + std::to_string(i) + ".log");
auto f = co_await open_file_dma(fakes.back(), open_flags::wo|open_flags::create);
co_await f.truncate(max_file_size_bytes);
auto size = co_await f.size();
co_await f.close();
BOOST_CHECK_EQUAL(size, max_file_size_bytes);
}
// this will add the size of all the files, and if the buf is fixed
// also ensure actual delete _drops_ footprint.
co_await log.delete_segments(fakes);
// Must have deleted.
BOOST_REQUIRE_GT(ex.deleted.size(), 0);
// Must have footprint less than effective limit (limit + crossover segment size)
BOOST_REQUIRE_LE(log.disk_footprint(), max_shard_size_bytes);
// All our fake files should have been either deleted or recycled, in any case
// they should pass through the above extension.
BOOST_REQUIRE(std::all_of(fakes.begin(), fakes.end(), [&](const std::string& name) {
return std::find(ex.deleted.begin(), ex.deleted.end(), name) != ex.deleted.end();
}));
co_await log.shutdown();
co_await log.clear();
}
SEASTAR_TEST_CASE(test_wait_for_delete) {
commitlog::config cfg;
constexpr auto max_size_mb = 1;
cfg.commitlog_segment_size_in_mb = max_size_mb;
cfg.commitlog_total_space_in_mb = 8 * max_size_mb * smp::count;
cfg.allow_going_over_size_limit = false; // #9348 - now can enforce size limit always
cfg.use_o_dsync = true; // make sure we pre-allocate.
// not using cl_test, because we need to be able to abandon
// the log.
tmpdir tmp;
cfg.commit_log_location = tmp.path().string();
std::vector<sstring> fakes;
struct myext: public db::commitlog_file_extension {
public:
commitlog* log = nullptr;
std::vector<future<>> waiters;
bool done = false;
seastar::future<seastar::file> wrap_file(const seastar::sstring& filename, seastar::file f, seastar::open_flags flags) override {
co_return f;
}
seastar::future<> before_delete(const seastar::sstring& filename) override {
if (!done) {
auto f = log->wait_for_pending_deletes();
BOOST_REQUIRE(!f.available());
waiters.emplace_back(std::move(f));
}
co_return;
}
};
auto ep = std::make_unique<myext>();
auto& ex = *ep;
db::extensions myexts;
myexts.add_commitlog_file_extension("hufflepuff", std::move(ep));
cfg.extensions = &myexts;
auto log = co_await commitlog::create_commitlog(cfg);
ex.log = &log;
auto r = log.add_flush_handler([&](cf_id_type id, replay_position pos) {
log.discard_completed_segments(id);
});
// uncomment for verbosity
// logging::logger_registry().set_logger_level("commitlog", logging::log_level::debug);
auto uuid = make_table_id();
auto size = log.max_record_size();
while (ex.waiters.size() < 5) {
rp_handle h = co_await log.add_mutation(uuid, size, db::commitlog::force_sync::no, [&](db::commitlog::output& dst) {
dst.fill('1', size);
});
h.release();
}
ex.done = true; // stop adding futures
for (auto&& f : ex.waiters) {
co_await std::move(f);
}
co_await log.shutdown();
co_await log.clear();
}
SEASTAR_TEST_CASE(test_commitlog_max_data_lifetime) {
commitlog::config cfg;
constexpr auto max_size_mb = 1;
cfg.commitlog_segment_size_in_mb = max_size_mb;
cfg.commitlog_total_space_in_mb = 2 * max_size_mb * smp::count;
cfg.commitlog_sync_period_in_ms = 10;
cfg.commitlog_data_max_lifetime_in_seconds = 2;
cfg.allow_going_over_size_limit = false;
cfg.use_o_dsync = true; // make sure we pre-allocate.
tmpdir tmp;
cfg.commit_log_location = tmp.path().string();
auto log = co_await commitlog::create_commitlog(cfg);
rp_set rps;
// uncomment for verbosity
// logging::logger_registry().set_logger_level("commitlog", logging::log_level::debug);
auto uuid = make_table_id();
auto size = log.max_record_size();
std::unordered_set<cf_id_type> ids;
condition_variable cond;
auto r = log.add_flush_handler([&](cf_id_type id, replay_position pos) {
log.discard_completed_segments(id, rps);
ids.insert(id);
cond.signal();
});
rp_handle h = co_await log.add_mutation(uuid, size, db::commitlog::force_sync::no, [&](db::commitlog::output& dst) {
dst.fill('1', size);
});
h.release();
// should not be signaled yet.
BOOST_REQUIRE(!ids.contains(uuid));
int n = 0;
while (!ids.contains(uuid) && n++ < 3) {
// way long, but lets give it some leeway on slow test cluster.
co_await cond.wait(20s);
}
// but now it must
BOOST_REQUIRE(ids.contains(uuid));
co_await log.shutdown();
co_await log.clear();
}
SEASTAR_TEST_CASE(test_commitlog_update_max_data_lifetime) {
commitlog::config cfg;
constexpr auto max_size_mb = 1;
cfg.commitlog_segment_size_in_mb = max_size_mb;
cfg.commitlog_total_space_in_mb = 2 * max_size_mb * smp::count;
cfg.commitlog_sync_period_in_ms = 10;
cfg.commitlog_data_max_lifetime_in_seconds = std::nullopt;
cfg.allow_going_over_size_limit = false;
cfg.use_o_dsync = true; // make sure we pre-allocate.
tmpdir tmp;
cfg.commit_log_location = tmp.path().string();
auto log = co_await commitlog::create_commitlog(cfg);
rp_set rps;
// uncomment for verbosity
// logging::logger_registry().set_logger_level("commitlog", logging::log_level::debug);
auto uuid = make_table_id();
auto size = log.max_record_size();
std::unordered_set<cf_id_type> ids;
condition_variable cond;
auto r = log.add_flush_handler([&](cf_id_type id, replay_position pos) {
log.discard_completed_segments(id, rps);
ids.insert(id);
cond.signal();
});
rp_handle h = co_await log.add_mutation(uuid, size, db::commitlog::force_sync::no, [&](db::commitlog::output& dst) {
dst.fill('1', size);
});
h.release();
try {
co_await cond.wait(10s);
BOOST_FAIL("should not reach");
} catch (condition_variable_timed_out&) {
}
// should not be signaled yet.
BOOST_REQUIRE(!ids.contains(uuid));
log.update_max_data_lifetime(2);
int n = 0;
while (!ids.contains(uuid) && n++ < 3) {
// way long, but lets give it some leeway on slow test cluster.
co_await cond.wait(10s);
}
// but now it must
BOOST_REQUIRE(ids.contains(uuid));
co_await log.shutdown();
co_await log.clear();
}
/**
* Test allocating oversized multi-entry
*/
static future<> do_test_oversized_entry(size_t max_size_mb) {
commitlog::config cfg;
cfg.commitlog_segment_size_in_mb = max_size_mb;
cfg.commitlog_total_space_in_mb = 8 * max_size_mb * smp::count;
cfg.allow_going_over_size_limit = false;
cfg.allow_fragmented_entries = true;
cfg.use_o_dsync = false;
// not using cl_test, because we need to be able to abandon
// the log.
tmpdir tmp;
cfg.commit_log_location = tmp.path().string();
std::unordered_map<replay_position, frozen_mutation> rp2mut;
random_mutation_generator gen(random_mutation_generator::generate_counters(false));
{
auto log = co_await commitlog::create_commitlog(cfg);
auto size = log.max_record_size() * 2;
utils::chunked_vector<commitlog_entry_writer> writers;
utils::chunked_vector<frozen_mutation> mutations;
size_t tot = 0;
// generate a bunch of mutation until we have more data than allowed.
while (tot <= size) {
mutations.emplace_back(gen(1).front());
tot += mutations.back().representation().size();
}
for (auto& fm : mutations) {
writers.emplace_back(gen.schema(), fm, commitlog::force_sync::no);
}
// this will create an oversized entry set.
auto res = co_await log.add_entries(writers, db::timeout_clock::now() + 200s);
auto i = mutations.begin();
for (auto& h : res) {
rp2mut.emplace(h.release(), *i++);
}
co_await log.sync_all_segments();
// as if we crashed -> segment left on disk
co_await log.release();
co_await log.shutdown();
}
// new log, for replay.
auto log = co_await commitlog::create_commitlog(cfg);
std::exception_ptr e;
size_t n = 0;
auto replay_set = co_await log.get_segments_to_replay();
// Now replay the old commitlog and ensure we match all data.
commitlog::replay_state state;
for (auto& f : replay_set) {
try {
co_await commitlog::read_log_file(state, f, cfg.fname_prefix, [&](commitlog::buffer_and_replay_position buf_rp) -> future<> {
auto&& buf = buf_rp.buffer;
auto&& rp = buf_rp.position;
BOOST_CHECK(rp2mut.count(rp));
commitlog_entry_reader cer(buf);
auto& fm = cer.mutation();
auto m1 = fm.unfreeze(gen.schema());
auto m2 = rp2mut.at(rp).unfreeze(gen.schema());
BOOST_CHECK_EQUAL(m1, m2);
++n;
co_return;
});
} catch (commitlog::segment_truncation&) {
e = std::current_exception();
}
}
BOOST_CHECK_EQUAL(n, rp2mut.size());
co_await log.shutdown();
co_await log.clear();
if (n != rp2mut.size() && e) {
std::rethrow_exception(e);
}
}
SEASTAR_TEST_CASE(test_oversized_entry_small) {
co_await do_test_oversized_entry(1); // small segments
}
SEASTAR_TEST_CASE(test_oversized_entry_normal) {
co_await do_test_oversized_entry(32); // normal segments
}
SEASTAR_TEST_CASE(test_oversized_entry_large) {
co_await do_test_oversized_entry(32*3); // bigger segments
}
static future<> test_oversized(size_t n_entries, size_t max_size_mb) {
commitlog::config cfg;
cfg.commitlog_segment_size_in_mb = max_size_mb;
cfg.commitlog_total_space_in_mb = 8 * n_entries * max_size_mb * smp::count;
cfg.allow_going_over_size_limit = false;
cfg.allow_fragmented_entries = true;
cfg.use_o_dsync = false;
// not using cl_test, because we need to be able to abandon
// the log.
tmpdir tmp;
cfg.commit_log_location = tmp.path().string();
auto uuid = make_table_id();
std::unordered_map<replay_position, fragmented_temporary_buffer> rp2buf;
{
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_int_distribution<unsigned> dist(0u, 255u);
auto log = co_await commitlog::create_commitlog(cfg);
auto size = log.max_record_size() * 2 + dist(gen) * 1024 + dist(gen) * 64;
// TODO: we can't create multi-entries using current API.
for (size_t i = 0; i < n_entries; ++i) {
auto buf = fragmented_temporary_buffer::allocate_to_fit(size);
auto out = buf.get_ostream();
for (size_t i = 0; i < size; ++i) {
auto c = static_cast<char>(dist(gen));
out.write(&c, 1);
}
auto h = co_await log.add_mutation(uuid, size, db::commitlog::force_sync::no, [&](db::commitlog::output& dst) {
for (auto& tmp : buf) {
dst.write(tmp.get(), tmp.size());
}
});
rp2buf.emplace(h.release(), std::move(buf)); // no freeing for you
}
co_await log.sync_all_segments();
// as if we crashed -> segment left on disk
co_await log.release();
co_await log.shutdown();
}
// new log, for replay.
auto log = co_await commitlog::create_commitlog(cfg);
auto replay_set = co_await log.get_segments_to_replay();
size_t n_found = 0;
std::exception_ptr e;
commitlog::replay_state state;
// Now replay the old commitlog and ensure we match all data.
for (auto& f : replay_set) {
try {
co_await commitlog::read_log_file(state, f, cfg.fname_prefix, [&](commitlog::buffer_and_replay_position buf_rp) -> future<> {
auto&& buf_in = buf_rp.buffer;
auto&& rp_in = buf_rp.position;
auto& buf = rp2buf.at(rp_in);
BOOST_CHECK_EQUAL(buf.size_bytes(), buf_in.size_bytes());
fragmented_temporary_buffer::view v1(buf);
fragmented_temporary_buffer::view v2(buf_in);
BOOST_CHECK_EQUAL(v1, v2);
++n_found;
co_return;
});
} catch (commitlog::segment_truncation&) {
e = std::current_exception();
}
}
BOOST_CHECK_EQUAL(n_found, rp2buf.size());
co_await log.shutdown();
co_await log.clear();
if (n_found != rp2buf.size() && e) {
std::rethrow_exception(e);
}
}
SEASTAR_TEST_CASE(test_oversized_single_entry) {
co_await test_oversized(1, 1);
}
SEASTAR_TEST_CASE(test_oversized_several_small) {
co_await test_oversized(8, 1);
}
SEASTAR_TEST_CASE(test_oversized_several_medium) {
co_await test_oversized(8, 8);
}
SEASTAR_TEST_CASE(test_oversized_several_large) {
co_await test_oversized(8, 32);
}
// tests #20862 - buffer usage counter not being updated correctly
SEASTAR_TEST_CASE(test_commitlog_buffer_size_counter) {
commitlog::config cfg;
tmpdir tmp;
cfg.commit_log_location = tmp.path().string();
auto log = co_await commitlog::create_commitlog(cfg);
rp_set rps;
// uncomment for verbosity
// logging::logger_registry().set_logger_level("commitlog", logging::log_level::debug);
auto uuid = make_table_id();
auto size = 1024;
auto size_before_alloc = log.get_buffer_size();
rp_handle h = co_await log.add_mutation(uuid, size, db::commitlog::force_sync::no, [&](db::commitlog::output& dst) {
dst.fill('1', size);
});
h.release();
auto size_after_alloc = log.get_buffer_size();
co_await log.sync_all_segments();
auto size_after_sync = log.get_buffer_size();
BOOST_CHECK_LE(size_before_alloc, size_after_alloc);
BOOST_CHECK_LE(size_after_sync, size_before_alloc);
co_await log.shutdown();
co_await log.clear();
}
SEASTAR_TEST_CASE(test_commitlog_handle_replayed_segments) {
commitlog::config cfg;
constexpr uint64_t max_size_mb = 8 * 1024;
cfg.commitlog_total_space_in_mb = max_size_mb * smp::count;
cfg.allow_going_over_size_limit = false;
cfg.commitlog_sync_period_in_ms = 1;
for (size_t k = 0; k < 100; ++k) {
tmpdir tmp;
cfg.commit_log_location = tmp.path().string();
auto log = co_await commitlog::create_commitlog(cfg);
auto shard_size = max_size_mb * 1024 * 1024;
auto seg_size = cfg.commitlog_segment_size_in_mb * 1024 * 1024;
auto n = 4 + (max_size_mb / cfg.commitlog_segment_size_in_mb);
std::vector<sstring> files;
for (size_t i = 0; i < n; ++i) {
auto name = (tmp.path() / ("tuta" + std::to_string(i) + ".log")).string();
auto f = co_await open_file_dma(name, open_flags::create|open_flags::wo, {});
co_await f.allocate(0, seg_size);
co_await f.truncate(seg_size);
co_await f.close();
files.emplace_back(std::move(name));
}
BOOST_REQUIRE_GE(files.size() * seg_size, shard_size + seg_size);
co_await log.delete_segments(std::move(files));
auto footprint = log.disk_footprint();
BOOST_REQUIRE_LE(footprint, shard_size + seg_size);
BOOST_REQUIRE_GE(footprint, shard_size);
co_await log.shutdown();
co_await log.clear();
}
}
BOOST_AUTO_TEST_SUITE_END()