We don't need the database to determine the shard of the mutation, only its schema. So move the implementation to the respecive definitions of mutation and frozen_mutation. Signed-off-by: Benny Halevy <bhalevy@scylladb.com> Closes #10430
1038 lines
42 KiB
C++
1038 lines
42 KiB
C++
/*
|
|
* Copyright (C) 2015-present ScyllaDB
|
|
*/
|
|
|
|
/*
|
|
* SPDX-License-Identifier: AGPL-3.0-or-later
|
|
*/
|
|
|
|
|
|
#include <boost/test/unit_test.hpp>
|
|
#include <boost/range/adaptor/map.hpp>
|
|
|
|
#include <stdlib.h>
|
|
#include <iostream>
|
|
#include <unordered_map>
|
|
#include <unordered_set>
|
|
#include <set>
|
|
#include <deque>
|
|
|
|
#include <seastar/testing/test_case.hh>
|
|
#include <seastar/core/coroutine.hh>
|
|
#include <seastar/core/future-util.hh>
|
|
#include <seastar/core/do_with.hh>
|
|
#include <seastar/core/scollectd_api.hh>
|
|
#include <seastar/core/file.hh>
|
|
#include <seastar/core/seastar.hh>
|
|
#include <seastar/util/noncopyable_function.hh>
|
|
#include <seastar/util/closeable.hh>
|
|
|
|
#include "utils/UUID_gen.hh"
|
|
#include "test/lib/tmpdir.hh"
|
|
#include "db/commitlog/commitlog.hh"
|
|
#include "db/commitlog/commitlog_replayer.hh"
|
|
#include "db/commitlog/commitlog_extensions.hh"
|
|
#include "db/commitlog/rp_set.hh"
|
|
#include "db/extensions.hh"
|
|
#include "log.hh"
|
|
#include "service/priority_manager.hh"
|
|
#include "test/lib/exception_utils.hh"
|
|
#include "test/lib/cql_test_env.hh"
|
|
#include "test/lib/data_model.hh"
|
|
#include "test/lib/sstable_utils.hh"
|
|
#include "test/lib/mutation_source_test.hh"
|
|
|
|
using namespace db;
|
|
|
|
static future<> cl_test(commitlog::config cfg, noncopyable_function<future<> (commitlog&)> f) {
|
|
// enable as needed.
|
|
// moved from static init because static init fiasco.
|
|
#if 0
|
|
logging::logger_registry().set_logger_level("commitlog", logging::log_level::trace);
|
|
#endif
|
|
tmpdir tmp;
|
|
cfg.commit_log_location = tmp.path().string();
|
|
return commitlog::create_commitlog(cfg).then([f = std::move(f)](commitlog log) mutable {
|
|
return do_with(std::move(log), [f = std::move(f)](commitlog& log) {
|
|
return futurize_invoke(f, log).finally([&log] {
|
|
return log.shutdown().then([&log] {
|
|
return log.clear();
|
|
});
|
|
});
|
|
});
|
|
}).finally([tmp = std::move(tmp)] {
|
|
});
|
|
}
|
|
|
|
static future<> cl_test(noncopyable_function<future<> (commitlog&)> f) {
|
|
commitlog::config cfg;
|
|
cfg.metrics_category_name = "commitlog";
|
|
return cl_test(cfg, std::move(f));
|
|
}
|
|
|
|
// just write in-memory...
|
|
SEASTAR_TEST_CASE(test_create_commitlog){
|
|
return cl_test([](commitlog& log) {
|
|
sstring tmp = "hej bubba cow";
|
|
return log.add_mutation(utils::UUID_gen::get_time_UUID(), tmp.size(), db::commitlog::force_sync::no, [tmp](db::commitlog::output& dst) {
|
|
dst.write(tmp.data(), tmp.size());
|
|
}).then([](db::replay_position rp) {
|
|
BOOST_CHECK_NE(rp, db::replay_position());
|
|
});
|
|
});
|
|
}
|
|
|
|
// check we
|
|
SEASTAR_TEST_CASE(test_commitlog_written_to_disk_batch){
|
|
commitlog::config cfg;
|
|
cfg.mode = commitlog::sync_mode::BATCH;
|
|
return cl_test(cfg, [](commitlog& log) {
|
|
sstring tmp = "hej bubba cow";
|
|
return log.add_mutation(utils::UUID_gen::get_time_UUID(), tmp.size(), db::commitlog::force_sync::no, [tmp](db::commitlog::output& dst) {
|
|
dst.write(tmp.data(), tmp.size());
|
|
}).then([&log](replay_position rp) {
|
|
BOOST_CHECK_NE(rp, db::replay_position());
|
|
auto n = log.get_flush_count();
|
|
BOOST_REQUIRE(n > 0);
|
|
});
|
|
});
|
|
}
|
|
|
|
// check that an entry marked as sync is immediately flushed to a storage
|
|
SEASTAR_TEST_CASE(test_commitlog_written_to_disk_sync){
|
|
commitlog::config cfg;
|
|
return cl_test(cfg, [](commitlog& log) {
|
|
sstring tmp = "hej bubba cow";
|
|
return log.add_mutation(utils::UUID_gen::get_time_UUID(), tmp.size(), db::commitlog::force_sync::yes, [tmp](db::commitlog::output& dst) {
|
|
dst.write(tmp.data(), tmp.size());
|
|
}).then([&log](replay_position rp) {
|
|
BOOST_CHECK_NE(rp, db::replay_position());
|
|
auto n = log.get_flush_count();
|
|
BOOST_REQUIRE(n > 0);
|
|
});
|
|
});
|
|
}
|
|
|
|
// check that an entry marked as sync is immediately flushed to a storage
|
|
SEASTAR_TEST_CASE(test_commitlog_written_to_disk_no_sync){
|
|
commitlog::config cfg;
|
|
cfg.commitlog_sync_period_in_ms = 10000000000;
|
|
return cl_test(cfg, [](commitlog& log) {
|
|
sstring tmp = "hej bubba cow";
|
|
return log.add_mutation(utils::UUID_gen::get_time_UUID(), tmp.size(), db::commitlog::force_sync::no, [tmp](db::commitlog::output& dst) {
|
|
dst.write(tmp.data(), tmp.size());
|
|
}).then([&log](replay_position rp) {
|
|
BOOST_CHECK_NE(rp, db::replay_position());
|
|
auto n = log.get_flush_count();
|
|
BOOST_REQUIRE(n == 0);
|
|
});
|
|
});
|
|
}
|
|
|
|
SEASTAR_TEST_CASE(test_commitlog_written_to_disk_periodic){
|
|
return cl_test([](commitlog& log) {
|
|
auto state = make_lw_shared<bool>(false);
|
|
auto uuid = utils::UUID_gen::get_time_UUID();
|
|
return do_until([state]() {return *state;},
|
|
[&log, state, uuid]() {
|
|
sstring tmp = "hej bubba cow";
|
|
return log.add_mutation(uuid, tmp.size(), db::commitlog::force_sync::no, [tmp](db::commitlog::output& dst) {
|
|
dst.write(tmp.data(), tmp.size());
|
|
}).then([&log, state](replay_position rp) {
|
|
BOOST_CHECK_NE(rp, db::replay_position());
|
|
auto n = log.get_flush_count();
|
|
*state = n > 0;
|
|
});
|
|
|
|
});
|
|
});
|
|
}
|
|
|
|
SEASTAR_TEST_CASE(test_commitlog_new_segment){
|
|
commitlog::config cfg;
|
|
cfg.commitlog_segment_size_in_mb = 1;
|
|
return cl_test(cfg, [](commitlog& log) {
|
|
return do_with(rp_set(), [&log](auto& set) {
|
|
auto uuid = utils::UUID_gen::get_time_UUID();
|
|
return do_until([&set]() { return set.size() > 1; }, [&log, &set, uuid]() {
|
|
sstring tmp = "hej bubba cow";
|
|
return log.add_mutation(uuid, tmp.size(), db::commitlog::force_sync::no, [tmp](db::commitlog::output& dst) {
|
|
dst.write(tmp.data(), tmp.size());
|
|
}).then([&set](rp_handle h) {
|
|
BOOST_CHECK_NE(h.rp(), db::replay_position());
|
|
set.put(std::move(h));
|
|
});
|
|
});
|
|
}).then([&log] {
|
|
auto n = log.get_active_segment_names().size();
|
|
BOOST_REQUIRE(n > 1);
|
|
});
|
|
});
|
|
}
|
|
|
|
typedef std::vector<sstring> segment_names;
|
|
|
|
static segment_names segment_diff(commitlog& log, segment_names prev = {}) {
|
|
segment_names now = log.get_active_segment_names();
|
|
segment_names diff;
|
|
// safety fix. We should always get segment names in alphabetical order, but
|
|
// we're not explicitly guaranteed it. Lets sort the sets just to be sure.
|
|
std::sort(now.begin(), now.end());
|
|
std::sort(prev.begin(), prev.end());
|
|
std::set_difference(prev.begin(), prev.end(), now.begin(), now.end(), std::back_inserter(diff));
|
|
return diff;
|
|
}
|
|
|
|
SEASTAR_TEST_CASE(test_commitlog_discard_completed_segments){
|
|
//logging::logger_registry().set_logger_level("commitlog", logging::log_level::trace);
|
|
commitlog::config cfg;
|
|
cfg.commitlog_segment_size_in_mb = 1;
|
|
return cl_test(cfg, [](commitlog& log) {
|
|
struct state_type {
|
|
std::vector<utils::UUID> uuids;
|
|
std::unordered_map<utils::UUID, db::rp_set> rps;
|
|
|
|
mutable size_t index = 0;
|
|
|
|
state_type() {
|
|
for (int i = 0; i < 10; ++i) {
|
|
uuids.push_back(utils::UUID_gen::get_time_UUID());
|
|
}
|
|
}
|
|
const utils::UUID & next_uuid() const {
|
|
return uuids[index++ % uuids.size()];
|
|
}
|
|
bool done() const {
|
|
return std::any_of(rps.begin(), rps.end(), [](auto& rps) {
|
|
return rps.second.size() > 1;
|
|
});
|
|
}
|
|
};
|
|
|
|
auto state = make_lw_shared<state_type>();
|
|
return do_until([state]() { return state->done(); },
|
|
[&log, state]() {
|
|
sstring tmp = "hej bubba cow";
|
|
auto uuid = state->next_uuid();
|
|
return log.add_mutation(uuid, tmp.size(), db::commitlog::force_sync::no, [tmp](db::commitlog::output& dst) {
|
|
dst.write(tmp.data(), tmp.size());
|
|
}).then([state, uuid](db::rp_handle h) {
|
|
state->rps[uuid].put(std::move(h));
|
|
});
|
|
}).then([&log, state]() {
|
|
auto names = log.get_active_segment_names();
|
|
BOOST_REQUIRE(names.size() > 1);
|
|
// sync all so we have no outstanding async sync ops that
|
|
// might prevent discard_completed_segments to actually dispose
|
|
// of clean segments (shared_ptr in task)
|
|
return log.sync_all_segments().then([&log, state, names] {
|
|
for (auto & p : state->rps) {
|
|
log.discard_completed_segments(p.first, p.second);
|
|
}
|
|
auto diff = segment_diff(log, names);
|
|
auto nn = diff.size();
|
|
auto dn = log.get_num_segments_destroyed();
|
|
|
|
BOOST_REQUIRE(nn > 0);
|
|
BOOST_REQUIRE(nn <= names.size());
|
|
BOOST_REQUIRE(dn <= nn);
|
|
});
|
|
}).then([&log] {
|
|
return log.shutdown().then([&log] {
|
|
return log.list_existing_segments().then([] (auto descs) {
|
|
BOOST_CHECK_EQUAL(descs, decltype(descs){});
|
|
});
|
|
});
|
|
});
|
|
});
|
|
}
|
|
|
|
SEASTAR_TEST_CASE(test_equal_record_limit){
|
|
return cl_test([](commitlog& log) {
|
|
auto size = log.max_record_size();
|
|
return log.add_mutation(utils::UUID_gen::get_time_UUID(), size, db::commitlog::force_sync::no, [size](db::commitlog::output& dst) {
|
|
dst.fill(char(1), size);
|
|
}).then([](db::replay_position rp) {
|
|
BOOST_CHECK_NE(rp, db::replay_position());
|
|
});
|
|
});
|
|
}
|
|
|
|
SEASTAR_TEST_CASE(test_exceed_record_limit){
|
|
return cl_test([](commitlog& log) {
|
|
auto size = log.max_record_size() + 1;
|
|
return log.add_mutation(utils::UUID_gen::get_time_UUID(), size, db::commitlog::force_sync::no, [size](db::commitlog::output& dst) {
|
|
dst.fill(char(1), size);
|
|
}).then_wrapped([](future<db::rp_handle> f) {
|
|
try {
|
|
f.get();
|
|
} catch (...) {
|
|
// ok.
|
|
return make_ready_future();
|
|
}
|
|
throw std::runtime_error("Did not get expected exception from writing too large record");
|
|
});
|
|
});
|
|
}
|
|
|
|
SEASTAR_TEST_CASE(test_commitlog_closed) {
|
|
commitlog::config cfg;
|
|
return cl_test(cfg, [](commitlog& log) {
|
|
return log.shutdown().then([&log] {
|
|
sstring tmp = "test321";
|
|
auto uuid = utils::UUID_gen::get_time_UUID();
|
|
return log.add_mutation(uuid, tmp.size(), db::commitlog::force_sync::no, [tmp](db::commitlog::output& dst) {
|
|
dst.write(tmp.data(), tmp.size());
|
|
}).then_wrapped([] (future<db::rp_handle> f) {
|
|
BOOST_REQUIRE_EXCEPTION(f.get(), gate_closed_exception, exception_predicate::message_equals("gate closed"));
|
|
});
|
|
});
|
|
});
|
|
}
|
|
|
|
SEASTAR_TEST_CASE(test_commitlog_delete_when_over_disk_limit) {
|
|
commitlog::config cfg;
|
|
|
|
constexpr auto max_size_mb = 2;
|
|
cfg.commitlog_segment_size_in_mb = max_size_mb;
|
|
cfg.commitlog_total_space_in_mb = 1;
|
|
cfg.commitlog_sync_period_in_ms = 1;
|
|
return cl_test(cfg, [](commitlog& log) {
|
|
auto sem = make_lw_shared<semaphore>(0);
|
|
auto segments = make_lw_shared<segment_names>();
|
|
|
|
// add a flush handler that simply says we're done with the range.
|
|
auto r = log.add_flush_handler([&log, sem, segments](cf_id_type id, replay_position pos) {
|
|
auto f = make_ready_future<>();
|
|
// #6195 only get segment list at first callback. We can (not often)
|
|
// be called again, but reading segment list at that point might (will)
|
|
// render same list as in the diff check below.
|
|
if (segments->empty()) {
|
|
*segments = log.get_active_segment_names();
|
|
// Verify #5899 - file size should not exceed the config max.
|
|
f = parallel_for_each(*segments, [](sstring filename) {
|
|
return file_size(filename).then([](uint64_t size) {
|
|
BOOST_REQUIRE_LE(size, max_size_mb * 1024 * 1024);
|
|
});
|
|
});
|
|
}
|
|
return f.then([&log, sem, id] {
|
|
log.discard_completed_segments(id);
|
|
sem->signal();
|
|
});
|
|
});
|
|
|
|
auto set = make_lw_shared<std::set<segment_id_type>>();
|
|
auto uuid = utils::UUID_gen::get_time_UUID();
|
|
return do_until([set, sem]() {return set->size() > 2 && sem->try_wait();},
|
|
[&log, set, uuid]() {
|
|
sstring tmp = "hej bubba cow";
|
|
return log.add_mutation(uuid, tmp.size(), db::commitlog::force_sync::no, [tmp](db::commitlog::output& dst) {
|
|
dst.write(tmp.data(), tmp.size());
|
|
}).then([set](rp_handle h) {
|
|
BOOST_CHECK_NE(h.rp(), db::replay_position());
|
|
set->insert(h.release().id);
|
|
});
|
|
}).then([&log, segments]() {
|
|
auto diff = segment_diff(log, *segments);
|
|
auto nn = diff.size();
|
|
auto dn = log.get_num_segments_destroyed();
|
|
|
|
BOOST_REQUIRE(nn > 0);
|
|
BOOST_REQUIRE(nn <= segments->size());
|
|
BOOST_REQUIRE(dn <= nn);
|
|
}).finally([r = std::move(r)] {
|
|
});
|
|
});
|
|
}
|
|
|
|
SEASTAR_TEST_CASE(test_commitlog_reader){
|
|
static auto count_mutations_in_segment = [] (sstring path) -> future<size_t> {
|
|
auto count = make_lw_shared<size_t>(0);
|
|
return db::commitlog::read_log_file(path, db::commitlog::descriptor::FILENAME_PREFIX, service::get_local_commitlog_priority(), [count](db::commitlog::buffer_and_replay_position buf_rp) {
|
|
auto&& [buf, rp] = buf_rp;
|
|
auto linearization_buffer = bytes_ostream();
|
|
auto in = buf.get_istream();
|
|
auto str = to_sstring_view(in.read_bytes_view(buf.size_bytes(), linearization_buffer));
|
|
BOOST_CHECK_EQUAL(str, "hej bubba cow");
|
|
(*count)++;
|
|
return make_ready_future<>();
|
|
}).then([count] {
|
|
return *count;
|
|
});
|
|
};
|
|
commitlog::config cfg;
|
|
cfg.commitlog_segment_size_in_mb = 1;
|
|
return cl_test(cfg, [](commitlog& log) {
|
|
auto set = make_lw_shared<rp_set>();
|
|
auto count = make_lw_shared<size_t>(0);
|
|
auto count2 = make_lw_shared<size_t>(0);
|
|
auto uuid = utils::UUID_gen::get_time_UUID();
|
|
return do_until([count, set]() {return set->size() > 1;},
|
|
[&log, uuid, count, set]() {
|
|
sstring tmp = "hej bubba cow";
|
|
return log.add_mutation(uuid, tmp.size(), db::commitlog::force_sync::no, [tmp](db::commitlog::output& dst) {
|
|
dst.write(tmp.data(), tmp.size());
|
|
}).then([&log, set, count](auto h) {
|
|
BOOST_CHECK_NE(db::replay_position(), h.rp());
|
|
set->put(std::move(h));
|
|
if (set->size() == 1) {
|
|
++(*count);
|
|
}
|
|
});
|
|
|
|
}).then([&log, set, count2]() {
|
|
auto segments = log.get_active_segment_names();
|
|
BOOST_REQUIRE(segments.size() > 1);
|
|
|
|
auto ids = boost::copy_range<std::vector<segment_id_type>>(set->usage() | boost::adaptors::map_keys);
|
|
std::sort(ids.begin(), ids.end());
|
|
auto id = ids.front();
|
|
auto i = std::find_if(segments.begin(), segments.end(), [id](sstring filename) {
|
|
commitlog::descriptor desc(filename, db::commitlog::descriptor::FILENAME_PREFIX);
|
|
return desc.id == id;
|
|
});
|
|
if (i == segments.end()) {
|
|
throw std::runtime_error("Did not find expected log file");
|
|
}
|
|
return *i;
|
|
}).then([&log, count] (sstring segment_path) {
|
|
// Check reading from an unsynced segment
|
|
return count_mutations_in_segment(segment_path).then([count] (size_t replay_count) {
|
|
BOOST_CHECK_GE(*count, replay_count);
|
|
}).then([&log, count, segment_path] {
|
|
return log.sync_all_segments().then([count, segment_path] {
|
|
// Check reading from a synced segment
|
|
return count_mutations_in_segment(segment_path).then([count] (size_t replay_count) {
|
|
BOOST_CHECK_EQUAL(*count, replay_count);
|
|
});
|
|
});
|
|
});
|
|
});
|
|
});
|
|
}
|
|
|
|
static future<> corrupt_segment(sstring seg, uint64_t off, uint32_t value) {
|
|
return open_file_dma(seg, open_flags::rw).then([off, value](file f) {
|
|
size_t size = align_up<size_t>(off, 4096);
|
|
return do_with(std::move(f), [size, off, value](file& f) {
|
|
return f.dma_read_exactly<char>(0, size).then([&f, off, value](auto buf) {
|
|
std::copy_n(reinterpret_cast<const char*>(&value), sizeof(value), buf.get_write() + off);
|
|
auto dst = buf.get();
|
|
auto size = buf.size();
|
|
return f.dma_write(0, dst, size).then([buf = std::move(buf)](size_t) {});
|
|
}).finally([&f] {
|
|
return f.close();
|
|
});
|
|
});
|
|
});
|
|
}
|
|
|
|
SEASTAR_TEST_CASE(test_commitlog_entry_corruption){
|
|
commitlog::config cfg;
|
|
cfg.commitlog_segment_size_in_mb = 1;
|
|
return cl_test(cfg, [](commitlog& log) {
|
|
auto rps = make_lw_shared<std::vector<db::replay_position>>();
|
|
return do_until([rps]() {return rps->size() > 1;},
|
|
[&log, rps]() {
|
|
auto uuid = utils::UUID_gen::get_time_UUID();
|
|
sstring tmp = "hej bubba cow";
|
|
return log.add_mutation(uuid, tmp.size(), db::commitlog::force_sync::no, [tmp](db::commitlog::output& dst) {
|
|
dst.write(tmp.data(), tmp.size());
|
|
}).then([&log, rps](rp_handle h) {
|
|
BOOST_CHECK_NE(h.rp(), db::replay_position());
|
|
rps->push_back(h.release());
|
|
});
|
|
}).then([&log]() {
|
|
return log.sync_all_segments();
|
|
}).then([&log, rps] {
|
|
auto segments = log.get_active_segment_names();
|
|
BOOST_REQUIRE(!segments.empty());
|
|
auto seg = segments[0];
|
|
return corrupt_segment(seg, rps->at(1).pos + 4, 0x451234ab).then([seg, rps, &log] {
|
|
return db::commitlog::read_log_file(seg, db::commitlog::descriptor::FILENAME_PREFIX, service::get_local_commitlog_priority(), [rps](db::commitlog::buffer_and_replay_position buf_rp) {
|
|
auto&& [buf, rp] = buf_rp;
|
|
BOOST_CHECK_EQUAL(rp, rps->at(0));
|
|
return make_ready_future<>();
|
|
}).then_wrapped([](auto&& f) {
|
|
try {
|
|
f.get();
|
|
BOOST_FAIL("Expected exception");
|
|
} catch (commitlog::segment_data_corruption_error& e) {
|
|
// ok.
|
|
BOOST_REQUIRE(e.bytes() > 0);
|
|
}
|
|
});
|
|
});
|
|
});
|
|
});
|
|
}
|
|
|
|
SEASTAR_TEST_CASE(test_commitlog_chunk_corruption){
|
|
commitlog::config cfg;
|
|
cfg.commitlog_segment_size_in_mb = 1;
|
|
return cl_test(cfg, [](commitlog& log) {
|
|
auto rps = make_lw_shared<std::vector<db::replay_position>>();
|
|
return do_until([rps]() {return rps->size() > 1;},
|
|
[&log, rps]() {
|
|
auto uuid = utils::UUID_gen::get_time_UUID();
|
|
sstring tmp = "hej bubba cow";
|
|
return log.add_mutation(uuid, tmp.size(), db::commitlog::force_sync::no, [tmp](db::commitlog::output& dst) {
|
|
dst.write(tmp.data(), tmp.size());
|
|
}).then([&log, rps](rp_handle h) {
|
|
BOOST_CHECK_NE(h.rp(), db::replay_position());
|
|
rps->push_back(h.release());
|
|
});
|
|
}).then([&log]() {
|
|
return log.sync_all_segments();
|
|
}).then([&log, rps] {
|
|
auto segments = log.get_active_segment_names();
|
|
BOOST_REQUIRE(!segments.empty());
|
|
auto seg = segments[0];
|
|
return corrupt_segment(seg, rps->at(0).pos - 4, 0x451234ab).then([seg, rps, &log] {
|
|
return db::commitlog::read_log_file(seg, db::commitlog::descriptor::FILENAME_PREFIX, service::get_local_commitlog_priority(), [rps](db::commitlog::buffer_and_replay_position buf_rp) {
|
|
BOOST_FAIL("Should not reach");
|
|
return make_ready_future<>();
|
|
}).then_wrapped([](auto&& f) {
|
|
try {
|
|
f.get();
|
|
BOOST_FAIL("Expected exception");
|
|
} catch (commitlog::segment_data_corruption_error& e) {
|
|
// ok.
|
|
BOOST_REQUIRE(e.bytes() > 0);
|
|
}
|
|
});
|
|
});
|
|
});
|
|
});
|
|
}
|
|
|
|
SEASTAR_TEST_CASE(test_commitlog_reader_produce_exception){
|
|
commitlog::config cfg;
|
|
cfg.commitlog_segment_size_in_mb = 1;
|
|
return cl_test(cfg, [](commitlog& log) {
|
|
auto rps = make_lw_shared<std::vector<db::replay_position>>();
|
|
return do_until([rps]() {return rps->size() > 1;},
|
|
[&log, rps]() {
|
|
auto uuid = utils::UUID_gen::get_time_UUID();
|
|
sstring tmp = "hej bubba cow";
|
|
return log.add_mutation(uuid, tmp.size(), db::commitlog::force_sync::no, [tmp](db::commitlog::output& dst) {
|
|
dst.write(tmp.data(), tmp.size());
|
|
}).then([&log, rps](rp_handle h) {
|
|
BOOST_CHECK_NE(h.rp(), db::replay_position());
|
|
rps->push_back(h.release());
|
|
});
|
|
}).then([&log]() {
|
|
return log.sync_all_segments();
|
|
}).then([&log] {
|
|
auto segments = log.get_active_segment_names();
|
|
BOOST_REQUIRE(!segments.empty());
|
|
auto seg = segments[0];
|
|
return db::commitlog::read_log_file(seg, db::commitlog::descriptor::FILENAME_PREFIX, service::get_local_commitlog_priority(), [](db::commitlog::buffer_and_replay_position buf_rp) {
|
|
return make_exception_future(std::runtime_error("I am in a throwing mode"));
|
|
}).then_wrapped([](auto&& f) {
|
|
try {
|
|
f.get();
|
|
BOOST_FAIL("Expected exception");
|
|
} catch (std::runtime_error&) {
|
|
// Ok
|
|
} catch (...) {
|
|
// ok.
|
|
BOOST_FAIL("Wrong exception");
|
|
}
|
|
});
|
|
});
|
|
});
|
|
}
|
|
|
|
SEASTAR_TEST_CASE(test_commitlog_counters) {
|
|
auto count_cl_counters = []() -> size_t {
|
|
auto ids = scollectd::get_collectd_ids();
|
|
return std::count_if(ids.begin(), ids.end(), [](const scollectd::type_instance_id& id) {
|
|
return id.plugin() == "commitlog";
|
|
});
|
|
};
|
|
BOOST_CHECK_EQUAL(count_cl_counters(), 0);
|
|
return cl_test([count_cl_counters](commitlog& log) {
|
|
BOOST_CHECK_GT(count_cl_counters(), 0);
|
|
return make_ready_future<>();
|
|
}).finally([count_cl_counters] {
|
|
BOOST_CHECK_EQUAL(count_cl_counters(), 0);
|
|
});
|
|
}
|
|
|
|
#ifndef SEASTAR_DEFAULT_ALLOCATOR
|
|
|
|
SEASTAR_TEST_CASE(test_allocation_failure){
|
|
return cl_test([](commitlog& log) {
|
|
auto size = log.max_record_size() - 1;
|
|
|
|
auto junk = make_lw_shared<std::list<std::unique_ptr<char[]>>>();
|
|
|
|
// Use us loads of memory so we can OOM at the appropriate place
|
|
try {
|
|
assert(fragmented_temporary_buffer::default_fragment_size < size);
|
|
for (;;) {
|
|
junk->emplace_back(new char[fragmented_temporary_buffer::default_fragment_size]);
|
|
}
|
|
} catch (std::bad_alloc&) {
|
|
}
|
|
auto last = junk->end();
|
|
junk->erase(--last);
|
|
return log.add_mutation(utils::UUID_gen::get_time_UUID(), size, db::commitlog::force_sync::no, [size](db::commitlog::output& dst) {
|
|
dst.fill(char(1), size);
|
|
}).then_wrapped([junk, size](future<db::rp_handle> f) {
|
|
std::exception_ptr ep;
|
|
try {
|
|
f.get();
|
|
throw std::runtime_error(format("Adding mutation of size {} succeeded unexpectedly", size));
|
|
} catch (std::bad_alloc&) {
|
|
// ok. this is what we expected
|
|
junk->clear();
|
|
return make_ready_future();
|
|
} catch (...) {
|
|
ep = std::current_exception();
|
|
}
|
|
throw std::runtime_error(format("Got an unexpected exception from writing too large record: {}", ep));
|
|
});
|
|
});
|
|
}
|
|
|
|
#endif
|
|
|
|
SEASTAR_TEST_CASE(test_commitlog_replay_invalid_key){
|
|
return do_with_cql_env_thread([] (cql_test_env& env) {
|
|
env.execute_cql("create table t (pk text primary key, v text)").get();
|
|
|
|
auto& db = env.local_db();
|
|
auto& table = db.find_column_family("ks", "t");
|
|
auto& cl = *table.commitlog();
|
|
auto s = table.schema();
|
|
auto& mt = table.active_memtable();
|
|
|
|
auto add_entry = [&db, &cl, s] (bytes key) mutable {
|
|
auto md = tests::data_model::mutation_description({ key });
|
|
md.add_clustered_cell({}, "v", to_bytes("val"));
|
|
auto m = md.build(s);
|
|
|
|
auto fm = freeze(m);
|
|
commitlog_entry_writer cew(s, fm, db::commitlog::force_sync::yes);
|
|
cl.add_entry(m.column_family_id(), cew, db::no_timeout).get();
|
|
return m.shard_of();
|
|
};
|
|
|
|
const auto shard = add_entry(bytes{});
|
|
auto pk1_raw = make_key_for_shard(shard, s);
|
|
|
|
add_entry(to_bytes(pk1_raw));
|
|
|
|
BOOST_REQUIRE(mt.empty());
|
|
|
|
{
|
|
auto paths = cl.get_active_segment_names();
|
|
BOOST_REQUIRE(!paths.empty());
|
|
auto rp = db::commitlog_replayer::create_replayer(env.db()).get0();
|
|
rp.recover(paths, db::commitlog::descriptor::FILENAME_PREFIX).get();
|
|
}
|
|
|
|
{
|
|
auto rd = mt.make_flat_reader(s, db.get_reader_concurrency_semaphore().make_tracking_only_permit(s.get(), "test", db::no_timeout));
|
|
auto close_rd = deferred_close(rd);
|
|
auto mopt = read_mutation_from_flat_mutation_reader(rd).get0();
|
|
BOOST_REQUIRE(mopt);
|
|
|
|
mopt = {};
|
|
mopt = read_mutation_from_flat_mutation_reader(rd).get0();
|
|
BOOST_REQUIRE(!mopt);
|
|
}
|
|
});
|
|
}
|
|
|
|
using namespace std::chrono_literals;
|
|
|
|
SEASTAR_TEST_CASE(test_commitlog_add_entries) {
|
|
return cl_test([](commitlog& log) {
|
|
return seastar::async([&] {
|
|
using force_sync = commitlog_entry_writer::force_sync;
|
|
|
|
constexpr auto n = 10;
|
|
for (auto fs : { force_sync(false), force_sync(true) }) {
|
|
std::vector<commitlog_entry_writer> writers;
|
|
std::vector<frozen_mutation> mutations;
|
|
std::vector<replay_position> rps;
|
|
|
|
writers.reserve(n);
|
|
mutations.reserve(n);
|
|
|
|
for (auto i = 0; i < n; ++i) {
|
|
random_mutation_generator gen(random_mutation_generator::generate_counters(false));
|
|
mutations.emplace_back(gen(1).front());
|
|
writers.emplace_back(gen.schema(), mutations.back(), fs);
|
|
}
|
|
|
|
auto res = log.add_entries(writers, db::timeout_clock::now() + 60s).get0();
|
|
|
|
std::set<segment_id_type> ids;
|
|
for (auto& h : res) {
|
|
ids.emplace(h.rp().id);
|
|
rps.emplace_back(h.rp());
|
|
}
|
|
BOOST_CHECK_EQUAL(ids.size(), 1);
|
|
|
|
log.sync_all_segments().get();
|
|
auto segments = log.get_active_segment_names();
|
|
BOOST_REQUIRE(!segments.empty());
|
|
|
|
std::unordered_set<replay_position> result;
|
|
|
|
for (auto& seg : segments) {
|
|
db::commitlog::read_log_file(seg, db::commitlog::descriptor::FILENAME_PREFIX, service::get_local_commitlog_priority(), [&](db::commitlog::buffer_and_replay_position buf_rp) {
|
|
commitlog_entry_reader r(buf_rp.buffer);
|
|
auto& rp = buf_rp.position;
|
|
auto i = std::find(rps.begin(), rps.end(), rp);
|
|
// since we are looping, we can be reading last test cases
|
|
// segment (force_sync permutations)
|
|
if (i != rps.end()) {
|
|
auto n = std::distance(rps.begin(), i);
|
|
auto& fm1 = mutations.at(n);
|
|
auto& fm2 = r.mutation();
|
|
auto s = writers.at(n).schema();
|
|
auto m1 = fm1.unfreeze(s);
|
|
auto m2 = fm2.unfreeze(s);
|
|
BOOST_CHECK_EQUAL(m1, m2);
|
|
result.emplace(rp);
|
|
}
|
|
return make_ready_future<>();
|
|
}).get();
|
|
}
|
|
|
|
BOOST_CHECK_EQUAL(result.size(), rps.size());
|
|
}
|
|
});
|
|
});
|
|
}
|
|
|
|
SEASTAR_TEST_CASE(test_commitlog_new_segment_odsync){
|
|
commitlog::config cfg;
|
|
cfg.commitlog_segment_size_in_mb = 1;
|
|
cfg.use_o_dsync = true;
|
|
|
|
return cl_test(cfg, [](commitlog& log) -> future<> {
|
|
auto uuid = utils::UUID_gen::get_time_UUID();
|
|
rp_set set;
|
|
while (set.size() <= 1) {
|
|
sstring tmp = "hej bubba cow";
|
|
rp_handle h = co_await log.add_mutation(uuid, tmp.size(), db::commitlog::force_sync::no, [tmp](db::commitlog::output& dst) {
|
|
dst.write(tmp.data(), tmp.size());
|
|
});
|
|
set.put(std::move(h));
|
|
}
|
|
|
|
auto names = log.get_active_segment_names();
|
|
BOOST_REQUIRE(names.size() > 1);
|
|
|
|
// check that all the segments are pre-allocated.
|
|
for (auto& name : names) {
|
|
auto f = co_await seastar::open_file_dma(name, seastar::open_flags::ro);
|
|
auto s = co_await f.size();
|
|
co_await f.close();
|
|
BOOST_CHECK_EQUAL(s, 1024u*1024u);
|
|
}
|
|
});
|
|
}
|
|
|
|
// Test for #8363
|
|
// try to provoke edge case where we race segment deletion
|
|
// and waiting for recycled to be replenished.
|
|
SEASTAR_TEST_CASE(test_commitlog_deadlock_in_recycle) {
|
|
commitlog::config cfg;
|
|
|
|
constexpr auto max_size_mb = 2;
|
|
cfg.commitlog_segment_size_in_mb = max_size_mb;
|
|
// ensure total size per shard is not multiple of segment size.
|
|
cfg.commitlog_total_space_in_mb = 5 * smp::count;
|
|
cfg.commitlog_sync_period_in_ms = 10;
|
|
cfg.allow_going_over_size_limit = false;
|
|
cfg.use_o_dsync = true; // make sure we pre-allocate.
|
|
|
|
// not using cl_test, because we need to be able to abandon
|
|
// the log.
|
|
|
|
tmpdir tmp;
|
|
cfg.commit_log_location = tmp.path().string();
|
|
auto log = co_await commitlog::create_commitlog(cfg);
|
|
|
|
rp_set rps;
|
|
std::deque<rp_set> queue;
|
|
size_t n = 0;
|
|
|
|
// uncomment for verbosity
|
|
// logging::logger_registry().set_logger_level("commitlog", logging::log_level::debug);
|
|
|
|
auto uuid = utils::UUID_gen::get_time_UUID();
|
|
auto size = log.max_record_size() / 2;
|
|
|
|
timer<> t;
|
|
t.set_callback([&] {
|
|
while (!queue.empty()) {
|
|
auto flush = std::move(queue.front());
|
|
queue.pop_front();
|
|
log.discard_completed_segments(uuid, flush);
|
|
++n;
|
|
};
|
|
});
|
|
|
|
// add a flush handler that delays releasing things until disk threshold is reached.
|
|
auto r = log.add_flush_handler([&](cf_id_type, replay_position pos) {
|
|
auto old = std::exchange(rps, rp_set{});
|
|
queue.emplace_back(std::move(old));
|
|
if (log.disk_footprint() >= log.disk_limit() && !t.armed()) {
|
|
t.arm(5s);
|
|
}
|
|
});
|
|
|
|
bool release = true;
|
|
|
|
try {
|
|
while (n < 10) {
|
|
auto now = timeout_clock::now();
|
|
rp_handle h = co_await with_timeout(now + 30s, log.add_mutation(uuid, size, db::commitlog::force_sync::no, [&](db::commitlog::output& dst) {
|
|
dst.fill('1', size);
|
|
}));
|
|
rps.put(std::move(h));
|
|
}
|
|
} catch (timed_out_error&) {
|
|
BOOST_FAIL("log write timed out. maybe it is deadlocked... Will not free log. ASAN errors and leaks will follow...");
|
|
release = false;
|
|
}
|
|
|
|
if (release) {
|
|
co_await log.shutdown();
|
|
co_await log.clear();
|
|
}
|
|
}
|
|
|
|
// Test for #8438 - ensure we can shut down (in orderly fashion)
|
|
// even if CL write is stuck waiting for segment recycle/alloc
|
|
SEASTAR_TEST_CASE(test_commitlog_shutdown_during_wait) {
|
|
commitlog::config cfg;
|
|
|
|
constexpr auto max_size_mb = 2;
|
|
cfg.commitlog_segment_size_in_mb = max_size_mb;
|
|
// ensure total size per shard is not multiple of segment size.
|
|
cfg.commitlog_total_space_in_mb = 5 * smp::count;
|
|
cfg.commitlog_sync_period_in_ms = 10;
|
|
cfg.allow_going_over_size_limit = false;
|
|
cfg.use_o_dsync = true; // make sure we pre-allocate.
|
|
|
|
// not using cl_test, because we need to be able to abandon
|
|
// the log.
|
|
|
|
tmpdir tmp;
|
|
cfg.commit_log_location = tmp.path().string();
|
|
auto log = co_await commitlog::create_commitlog(cfg);
|
|
|
|
rp_set rps;
|
|
std::deque<rp_set> queue;
|
|
size_t n = 0;
|
|
|
|
// uncomment for verbosity
|
|
//logging::logger_registry().set_logger_level("commitlog", logging::log_level::debug);
|
|
|
|
auto uuid = utils::UUID_gen::get_time_UUID();
|
|
auto size = log.max_record_size() / 2;
|
|
|
|
// add a flush handler that does not.
|
|
auto r = log.add_flush_handler([&](cf_id_type, replay_position pos) {
|
|
auto old = std::exchange(rps, rp_set{});
|
|
queue.emplace_back(std::move(old));
|
|
});
|
|
|
|
for (;;) {
|
|
try {
|
|
auto now = timeout_clock::now();
|
|
rp_handle h = co_await with_timeout(now + 10s, log.add_mutation(uuid, size, db::commitlog::force_sync::no, [&](db::commitlog::output& dst) {
|
|
dst.fill('1', size);
|
|
}));
|
|
rps.put(std::move(h));
|
|
} catch (timed_out_error&) {
|
|
if (log.disk_footprint() >= log.disk_limit()) {
|
|
// now a segment alloc is waiting.
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
// shut down is assumed to
|
|
// a.) stop allocating
|
|
// b.) ensure all segments get's free:d
|
|
while (!queue.empty()) {
|
|
auto flush = std::move(queue.front());
|
|
queue.pop_front();
|
|
log.discard_completed_segments(uuid, flush);
|
|
}
|
|
|
|
try {
|
|
auto now = timeout_clock::now();
|
|
co_await with_timeout(now + 30s, log.shutdown());
|
|
co_await log.clear();
|
|
} catch (timed_out_error&) {
|
|
BOOST_FAIL("log shutdown timed out. maybe it is deadlocked... Will not free log. ASAN errors and leaks will follow...");
|
|
}
|
|
}
|
|
|
|
SEASTAR_TEST_CASE(test_commitlog_deadlock_with_flush_threshold) {
|
|
commitlog::config cfg;
|
|
|
|
constexpr auto max_size_mb = 1;
|
|
|
|
cfg.commitlog_segment_size_in_mb = max_size_mb;
|
|
cfg.commitlog_total_space_in_mb = 2 * max_size_mb * smp::count;
|
|
cfg.commitlog_sync_period_in_ms = 10;
|
|
cfg.allow_going_over_size_limit = false;
|
|
cfg.use_o_dsync = true; // make sure we pre-allocate.
|
|
|
|
// not using cl_test, because we need to be able to abandon
|
|
// the log.
|
|
|
|
tmpdir tmp;
|
|
cfg.commit_log_location = tmp.path().string();
|
|
auto log = co_await commitlog::create_commitlog(cfg);
|
|
|
|
rp_set rps;
|
|
// uncomment for verbosity
|
|
// logging::logger_registry().set_logger_level("commitlog", logging::log_level::debug);
|
|
|
|
auto uuid = utils::UUID_gen::get_time_UUID();
|
|
auto size = log.max_record_size();
|
|
|
|
bool done = false;
|
|
|
|
auto r = log.add_flush_handler([&](cf_id_type id, replay_position pos) {
|
|
log.discard_completed_segments(id, rps);
|
|
done = true;
|
|
});
|
|
|
|
bool release = true;
|
|
|
|
try {
|
|
while (!done) {
|
|
auto now = timeout_clock::now();
|
|
rp_handle h = co_await with_timeout(now + 30s, log.add_mutation(uuid, size, db::commitlog::force_sync::no, [&](db::commitlog::output& dst) {
|
|
dst.fill('1', size);
|
|
}));
|
|
rps.put(std::move(h));
|
|
}
|
|
} catch (timed_out_error&) {
|
|
BOOST_FAIL("log write timed out. maybe it is deadlocked... Will not free log. ASAN errors and leaks will follow...");
|
|
release = false;
|
|
}
|
|
|
|
if (release) {
|
|
co_await log.shutdown();
|
|
co_await log.clear();
|
|
}
|
|
}
|
|
|
|
static future<> do_test_exception_in_allocate_ex(bool do_file_delete) {
|
|
commitlog::config cfg;
|
|
|
|
constexpr auto max_size_mb = 1;
|
|
|
|
cfg.commitlog_segment_size_in_mb = max_size_mb;
|
|
cfg.commitlog_total_space_in_mb = 2 * max_size_mb * smp::count;
|
|
cfg.commitlog_sync_period_in_ms = 10;
|
|
cfg.allow_going_over_size_limit = false; // #9348 - now can enforce size limit always
|
|
cfg.use_o_dsync = true; // make sure we pre-allocate.
|
|
|
|
// not using cl_test, because we need to be able to abandon
|
|
// the log.
|
|
|
|
tmpdir tmp;
|
|
cfg.commit_log_location = tmp.path().string();
|
|
|
|
class myfail : public std::exception {
|
|
public:
|
|
using std::exception::exception;
|
|
};
|
|
|
|
struct myext: public db::commitlog_file_extension {
|
|
public:
|
|
bool fail = false;
|
|
bool thrown = false;
|
|
bool do_file_delete;
|
|
|
|
myext(bool dd)
|
|
: do_file_delete(dd)
|
|
{}
|
|
|
|
seastar::future<seastar::file> wrap_file(const seastar::sstring& filename, seastar::file f, seastar::open_flags flags) override {
|
|
if (fail && !thrown) {
|
|
thrown = true;
|
|
if (do_file_delete) {
|
|
co_await f.close();
|
|
co_await seastar::remove_file(filename);
|
|
}
|
|
throw myfail{};
|
|
}
|
|
co_return f;
|
|
}
|
|
seastar::future<> before_delete(const seastar::sstring&) override {
|
|
co_return;
|
|
}
|
|
};
|
|
|
|
auto ep = std::make_unique<myext>(do_file_delete);
|
|
auto& mx = *ep;
|
|
|
|
db::extensions myexts;
|
|
myexts.add_commitlog_file_extension("hufflepuff", std::move(ep));
|
|
|
|
cfg.extensions = &myexts;
|
|
|
|
auto log = co_await commitlog::create_commitlog(cfg);
|
|
|
|
rp_set rps;
|
|
// uncomment for verbosity
|
|
// logging::logger_registry().set_logger_level("commitlog", logging::log_level::debug);
|
|
|
|
auto uuid = utils::UUID_gen::get_time_UUID();
|
|
auto size = log.max_record_size();
|
|
|
|
auto r = log.add_flush_handler([&](cf_id_type id, replay_position pos) {
|
|
log.discard_completed_segments(id, rps);
|
|
mx.fail = true;
|
|
});
|
|
|
|
try {
|
|
while (!mx.thrown) {
|
|
rp_handle h = co_await log.add_mutation(uuid, size, db::commitlog::force_sync::no, [&](db::commitlog::output& dst) {
|
|
dst.fill('1', size);
|
|
});
|
|
rps.put(std::move(h));
|
|
}
|
|
} catch (...) {
|
|
BOOST_FAIL("log write timed out. maybe it is deadlocked... Will not free log. ASAN errors and leaks will follow...");
|
|
}
|
|
|
|
co_await log.shutdown();
|
|
co_await log.clear();
|
|
}
|
|
|
|
/**
|
|
* Test generating an exception in segment file allocation
|
|
*/
|
|
SEASTAR_TEST_CASE(test_commitlog_exceptions_in_allocate_ex) {
|
|
co_await do_test_exception_in_allocate_ex(false);
|
|
}
|
|
|
|
/**
|
|
* Test generating an exception in segment file allocation, but also
|
|
* delete the file, which in turn should cause follow-up exceptions
|
|
* in cleanup delete. Which CL should handle
|
|
*/
|
|
|
|
SEASTAR_TEST_CASE(test_commitlog_exceptions_in_allocate_ex_deleted_file_no_recycle) {
|
|
co_await do_test_exception_in_allocate_ex(true);
|
|
}
|