mirror of
https://github.com/scylladb/scylladb.git
synced 2026-05-28 10:41:12 +00:00
Revert "Commitlog: Pre-allocate "reserve" segments"
This reverts commit cbf3b63853, due to
reports of increased latency (instead of the opposite).
This commit is contained in:
@@ -176,7 +176,7 @@ public:
|
||||
cfg.commit_log_location = "/var/lib/scylla/commitlog";
|
||||
}
|
||||
logger.trace("Commitlog maximum disk size: {} MB / cpu ({} cpus)",
|
||||
max_disk_size / (1024*1024), smp::count);
|
||||
max_disk_size / (1024*1024));
|
||||
_regs = create_counters();
|
||||
}
|
||||
|
||||
@@ -187,8 +187,6 @@ public:
|
||||
future<> init();
|
||||
future<sseg_ptr> new_segment();
|
||||
future<sseg_ptr> active_segment();
|
||||
future<sseg_ptr> allocate_segment(bool active);
|
||||
|
||||
future<> clear();
|
||||
future<> sync_all_segments();
|
||||
future<> shutdown();
|
||||
@@ -198,7 +196,6 @@ public:
|
||||
void discard_unused_segments();
|
||||
void discard_completed_segments(const cf_id_type& id,
|
||||
const replay_position& pos);
|
||||
void on_timer();
|
||||
void sync();
|
||||
void arm() {
|
||||
_timer.arm(std::chrono::milliseconds(cfg.commitlog_sync_period_in_ms));
|
||||
@@ -227,21 +224,11 @@ public:
|
||||
private:
|
||||
segment_id_type _ids = 0;
|
||||
std::vector<sseg_ptr> _segments;
|
||||
std::deque<sseg_ptr> _reserve_segments;
|
||||
std::vector<buffer_type> _temp_buffers;
|
||||
std::unordered_map<flush_handler_id, flush_handler> _flush_handlers;
|
||||
flush_handler_id _flush_ids = 0;
|
||||
replay_position _flush_position;
|
||||
timer<clock_type> _timer;
|
||||
size_t _reserve_allocating = 0;
|
||||
// # segments to try to keep available in reserve
|
||||
// i.e. the amount of segments we expect to consume inbetween timer
|
||||
// callbacks.
|
||||
// The idea is that since the files are 0 len at start, and thus cost little,
|
||||
// it is easier to adapt this value compared to timer freq.
|
||||
size_t _num_reserve_segments = 0;
|
||||
seastar::gate _gate;
|
||||
uint64_t _new_counter = 0;
|
||||
};
|
||||
|
||||
/*
|
||||
@@ -292,12 +279,12 @@ public:
|
||||
// TODO : tune initial / default size
|
||||
static constexpr size_t default_size = align_up<size_t>(128 * 1024, alignment);
|
||||
|
||||
segment(segment_manager* m, const descriptor& d, file && f, bool active)
|
||||
segment(segment_manager* m, const descriptor& d, file && f)
|
||||
: _segment_manager(m), _desc(std::move(d)), _file(std::move(f)), _sync_time(
|
||||
clock_type::now())
|
||||
{
|
||||
++_segment_manager->totals.segments_created;
|
||||
logger.debug("Created new {} segment {}", active ? "active" : "reserve", *this);
|
||||
logger.debug("Created new segment {}", *this);
|
||||
}
|
||||
~segment() {
|
||||
if (is_clean()) {
|
||||
@@ -320,7 +307,7 @@ public:
|
||||
auto ms = std::chrono::duration_cast<std::chrono::milliseconds>(
|
||||
now - _sync_time).count();
|
||||
if ((_segment_manager->cfg.commitlog_sync_period_in_ms * 2) < uint64_t(ms)) {
|
||||
logger.debug("{} needs sync. {} ms elapsed", *this, ms);
|
||||
logger.debug("Need sync. {} ms elapsed", ms);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
@@ -338,7 +325,7 @@ public:
|
||||
// It is when it was initiated
|
||||
_sync_time = clock_type::now();
|
||||
if (position() <= _flush_pos) {
|
||||
logger.trace("Sync not needed {}: ({} / {})", *this, position(), _flush_pos);
|
||||
logger.trace("Sync not needed : ({} / {})", position(), _flush_pos);
|
||||
return make_ready_future<sseg_ptr>(shared_from_this());
|
||||
}
|
||||
return cycle().then([](auto seg) {
|
||||
@@ -354,7 +341,7 @@ public:
|
||||
pos = _file_pos;
|
||||
}
|
||||
if (pos != 0 && pos <= _flush_pos) {
|
||||
logger.trace("{} already synced! ({} < {})", *this, pos, _flush_pos);
|
||||
logger.trace("Already synced! ({} < {})", pos, _flush_pos);
|
||||
return make_ready_future<sseg_ptr>(std::move(me));
|
||||
}
|
||||
logger.trace("Syncing {} -> {}", _flush_pos, pos);
|
||||
@@ -366,7 +353,7 @@ public:
|
||||
_dwrite.write_unlock(); // release it already.
|
||||
pos = std::max(pos, _file_pos);
|
||||
if (pos <= _flush_pos) {
|
||||
logger.trace("{} already synced! ({} < {})", *this, pos, _flush_pos);
|
||||
logger.trace("Already synced! ({} < {})", pos, _flush_pos);
|
||||
return make_ready_future<sseg_ptr>(std::move(me));
|
||||
}
|
||||
++_segment_manager->totals.pending_operations;
|
||||
@@ -385,7 +372,7 @@ public:
|
||||
}).then([this, pos, me = std::move(me)]() {
|
||||
_flush_pos = std::max(pos, _flush_pos);
|
||||
++_segment_manager->totals.flush_count;
|
||||
logger.trace("{} synced to {}", *this, _flush_pos);
|
||||
logger.trace("Synced to {}", _flush_pos);
|
||||
return make_ready_future<sseg_ptr>(std::move(me));
|
||||
}).finally([this] {
|
||||
--_segment_manager->totals.pending_operations;
|
||||
@@ -484,13 +471,16 @@ public:
|
||||
}
|
||||
// gah, partial write. should always get here with dma chunk sized
|
||||
// "bytes", but lets make sure...
|
||||
logger.debug("Partial write {}: {}/{} bytes", *this, *written, size);
|
||||
logger.debug("Partial write: {}/{} bytes", *written, size);
|
||||
*written = align_down(*written, alignment);
|
||||
return make_ready_future<stop_iteration>(stop_iteration::no);
|
||||
// TODO: retry/ignore/fail/stop - optional behaviour in origin.
|
||||
// we fast-fail the whole commit.
|
||||
} catch (std::exception& e) {
|
||||
logger.error("Failed to persist commits to disk: {}", e.what());
|
||||
throw;
|
||||
} catch (...) {
|
||||
logger.error("Failed to persist commits to disk for {}: {}", *this, std::current_exception());
|
||||
logger.error("Failed to persist commits to disk.");
|
||||
throw;
|
||||
}
|
||||
});
|
||||
@@ -681,9 +671,11 @@ future<> db::commitlog::segment_manager::init() {
|
||||
|
||||
// base id counter is [ <shard> | <base> ]
|
||||
_ids = replay_position(engine().cpu_id(), id).id;
|
||||
// always run the timer now, since we need to handle segment pre-alloc etc as well.
|
||||
_timer.set_callback(std::bind(&segment_manager::on_timer, this));
|
||||
this->arm();
|
||||
|
||||
if (cfg.mode != sync_mode::BATCH) {
|
||||
_timer.set_callback(std::bind(&segment_manager::sync, this));
|
||||
this->arm();
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
@@ -794,36 +786,22 @@ void db::commitlog::segment_manager::flush_segments(bool force) {
|
||||
}
|
||||
}
|
||||
|
||||
future<db::commitlog::segment_manager::sseg_ptr> db::commitlog::segment_manager::allocate_segment(bool active) {
|
||||
descriptor d(next_id());
|
||||
return engine().open_file_dma(cfg.commit_log_location + "/" + d.filename(), open_flags::wo | open_flags::create).then([this, d, active](file f) {
|
||||
auto s = make_lw_shared<segment>(this, d, std::move(f), active);
|
||||
return make_ready_future<sseg_ptr>(s);
|
||||
});
|
||||
}
|
||||
|
||||
future<db::commitlog::segment_manager::sseg_ptr> db::commitlog::segment_manager::new_segment() {
|
||||
if (_shutdown) {
|
||||
throw std::runtime_error("Commitlog has been shut down. Cannot add data");
|
||||
}
|
||||
|
||||
++_new_counter;
|
||||
|
||||
if (_reserve_segments.empty()) {
|
||||
if (_num_reserve_segments < cfg.max_reserve_segments) {
|
||||
++_num_reserve_segments;
|
||||
logger.trace("Increased segment reserve count to {}", _num_reserve_segments);
|
||||
descriptor d(next_id());
|
||||
return engine().open_file_dma(cfg.commit_log_location + "/" + d.filename(), open_flags::wo | open_flags::create).then([this, d](file f) {
|
||||
_segments.emplace_back(make_lw_shared<segment>(this, d, std::move(f)));
|
||||
auto max = max_disk_size;
|
||||
auto cur = totals.total_size_on_disk;
|
||||
if (max != 0 && cur >= max) {
|
||||
logger.debug("Size on disk {} MB exceeds local maximum {} MB", cur / (1024 * 1024), max / (1024 * 1024));
|
||||
flush_segments();
|
||||
}
|
||||
return allocate_segment(true).then([this](sseg_ptr s) {
|
||||
_segments.push_back(s);
|
||||
return make_ready_future<sseg_ptr>(s);
|
||||
});
|
||||
}
|
||||
|
||||
_segments.push_back(_reserve_segments.front());
|
||||
_reserve_segments.pop_front();
|
||||
logger.trace("Acquired segment {} from reserve", _segments.back());
|
||||
return make_ready_future<sseg_ptr>(_segments.back());
|
||||
}).then([this] {
|
||||
return make_ready_future<sseg_ptr>(_segments.back());
|
||||
});
|
||||
}
|
||||
|
||||
future<db::commitlog::segment_manager::sseg_ptr> db::commitlog::segment_manager::active_segment() {
|
||||
@@ -846,7 +824,7 @@ future<db::commitlog::segment_manager::sseg_ptr> db::commitlog::segment_manager:
|
||||
*/
|
||||
void db::commitlog::segment_manager::discard_completed_segments(
|
||||
const cf_id_type& id, const replay_position& pos) {
|
||||
logger.debug("Discard completed segments for {}, table {}", pos, id);
|
||||
logger.debug("discard completed log segments for {}, table {}", pos, id);
|
||||
for (auto&s : _segments) {
|
||||
s->mark_clean(id, pos);
|
||||
}
|
||||
@@ -854,7 +832,7 @@ void db::commitlog::segment_manager::discard_completed_segments(
|
||||
}
|
||||
|
||||
std::ostream& db::operator<<(std::ostream& out, const db::commitlog::segment& s) {
|
||||
return out << s._desc.filename();
|
||||
return out << "commit log segment (" << s._desc.filename() << ")";
|
||||
}
|
||||
|
||||
std::ostream& db::operator<<(std::ostream& out, const db::commitlog::segment::cf_mark& m) {
|
||||
@@ -868,10 +846,10 @@ std::ostream& db::operator<<(std::ostream& out, const db::replay_position& p) {
|
||||
void db::commitlog::segment_manager::discard_unused_segments() {
|
||||
auto i = std::remove_if(_segments.begin(), _segments.end(), [=](auto& s) {
|
||||
if (s->is_unused()) {
|
||||
logger.debug("Segment {} is unused", *s);
|
||||
logger.debug("{} is unused", *s);
|
||||
return true;
|
||||
}
|
||||
logger.debug("Not safe to delete segment {}; dirty is {}", s, segment::cf_mark {*s});
|
||||
logger.debug("Not safe to delete {}; dirty is {}", s, segment::cf_mark {*s});
|
||||
return false;
|
||||
});
|
||||
if (i != _segments.end()) {
|
||||
@@ -883,22 +861,16 @@ future<> db::commitlog::segment_manager::sync_all_segments() {
|
||||
logger.debug("Issuing sync for all segments");
|
||||
return parallel_for_each(_segments, [this](sseg_ptr s) {
|
||||
return s->sync().then([](sseg_ptr s) {
|
||||
logger.debug("Synced segment {}", *s);
|
||||
logger.debug("Synced {}", *s);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
future<> db::commitlog::segment_manager::shutdown() {
|
||||
if (!_shutdown) {
|
||||
_shutdown = true;
|
||||
_timer.cancel();
|
||||
return _gate.close().then([this] {
|
||||
return parallel_for_each(_segments, [this](sseg_ptr s) {
|
||||
return s->shutdown();
|
||||
});
|
||||
});
|
||||
}
|
||||
return make_ready_future<>();
|
||||
_shutdown = true;
|
||||
return parallel_for_each(_segments, [this](sseg_ptr s) {
|
||||
return s->shutdown();
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@@ -909,8 +881,6 @@ future<> db::commitlog::segment_manager::shutdown() {
|
||||
*/
|
||||
future<> db::commitlog::segment_manager::clear() {
|
||||
logger.debug("Clearing all segments");
|
||||
_shutdown = true;
|
||||
_timer.cancel();
|
||||
flush_segments(true);
|
||||
return sync_all_segments().then([this] {
|
||||
for (auto& s : _segments) {
|
||||
@@ -926,51 +896,6 @@ void db::commitlog::segment_manager::sync() {
|
||||
for (auto& s : _segments) {
|
||||
s->sync(); // we do not care about waiting...
|
||||
}
|
||||
}
|
||||
|
||||
void db::commitlog::segment_manager::on_timer() {
|
||||
if (cfg.mode != sync_mode::BATCH) {
|
||||
sync();
|
||||
}
|
||||
// IFF a new segment was put in use since last we checked, and we're
|
||||
// above threshold, request flush.
|
||||
if (_new_counter > 0) {
|
||||
auto max = max_disk_size;
|
||||
auto cur = totals.total_size_on_disk;
|
||||
if (max != 0 && cur >= max) {
|
||||
_new_counter = 0;
|
||||
logger.debug("Size on disk {} MB exceeds local maximum {} MB", cur / (1024 * 1024), max / (1024 * 1024));
|
||||
flush_segments();
|
||||
}
|
||||
}
|
||||
// Gate, because we are starting potentially blocking ops
|
||||
// without waiting for them, so segement_manager could be shut down
|
||||
// while they are running.
|
||||
seastar::with_gate(_gate, [this] {
|
||||
// take outstanding allocations into regard. This is paranoid,
|
||||
// but if for some reason the file::open takes longer than timer period,
|
||||
// we could flood the reserve list with new segments
|
||||
auto n = _reserve_segments.size() + _reserve_allocating;
|
||||
return parallel_for_each(boost::irange(n, _num_reserve_segments), [this, n](auto i) {
|
||||
++_reserve_allocating;
|
||||
return this->allocate_segment(false).then([this](sseg_ptr s) {
|
||||
if (!_shutdown) {
|
||||
// insertion sort.
|
||||
auto i = std::upper_bound(_reserve_segments.begin(), _reserve_segments.end(), s, [](auto s1, auto s2) {
|
||||
const descriptor& d1 = s1->_desc;
|
||||
const descriptor& d2 = s2->_desc;
|
||||
return d1.id < d2.id;
|
||||
});
|
||||
i = _reserve_segments.emplace(i, std::move(s));
|
||||
logger.trace("Added reserve segment {}", *i);
|
||||
}
|
||||
}).finally([this] {
|
||||
--_reserve_allocating;
|
||||
});
|
||||
});
|
||||
}).handle_exception([](auto ep) {
|
||||
logger.warn("Exception in segment reservation: {}", ep);
|
||||
});
|
||||
arm();
|
||||
}
|
||||
|
||||
@@ -1294,14 +1219,6 @@ uint64_t db::commitlog::get_pending_tasks() const {
|
||||
return _segment_manager->totals.pending_operations;
|
||||
}
|
||||
|
||||
uint64_t db::commitlog::get_num_segments_created() const {
|
||||
return _segment_manager->totals.segments_created;
|
||||
}
|
||||
|
||||
uint64_t db::commitlog::get_num_segments_destroyed() const {
|
||||
return _segment_manager->totals.segments_destroyed;
|
||||
}
|
||||
|
||||
future<std::vector<db::commitlog::descriptor>> db::commitlog::list_existing_descriptors() const {
|
||||
return list_existing_descriptors(active_config().commit_log_location);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user