mirror of
https://github.com/scylladb/scylladb.git
synced 2026-06-08 16:03:20 +00:00
Merge 'Use commitlog cleanup table + repair history to reduce tombstone gc window' from Calle Wilund
Fixes: SCYLLADB-1416 Adds an optional replay_position to the (transient) replay history, and provides this (when available) to tombstone_gc gc_min_time callback checks. When updating repair history, we set a commitlog cleanup record preventing replay of data lower than current flush position, and add said flush position to the local repair history. The idea is that when set (not zero rp), this can be used to limit the commitlog segments from which we check timestamps, potentially filtering out older ones. This will allow tombstone gc repair case to potentially filter out some commitlog data, and thus reducing the retention window for tombstones. Note: this does not affect the persisted repair history, only node-local, transient data. Feature improvement. No backport needed. Closes scylladb/scylladb#29854 * github.com:scylladb/scylladb: test_...data_resurrection: Add test case for repair CL truncation test.py: Make --log-level work again. repair: Set commitlog cleanup record and store replay pos in history tombstone_gc: Add optional replay_position to repair_history main/cql_test_env: Make tombstone gc_time_min_source use just table CL table: Add ready_for_writes attribute db::commitlog: Add optional replay_position sieve to min_gc_time db::replay_position: Add attribute valid()
This commit is contained in:
@@ -488,7 +488,7 @@ public:
|
||||
future<std::vector<descriptor>> list_descriptors(sstring dir) const;
|
||||
future<std::vector<sstring>> get_segments_to_replay() const;
|
||||
|
||||
gc_clock::time_point min_gc_time(const cf_id_type&) const;
|
||||
gc_clock::time_point min_gc_time(const cf_id_type&, const db::replay_position&) const;
|
||||
|
||||
flush_handler_id add_flush_handler(flush_handler h) {
|
||||
auto id = ++_flush_ids;
|
||||
@@ -2053,9 +2053,12 @@ future<std::vector<sstring>> db::commitlog::segment_manager::get_segments_to_rep
|
||||
co_return segments_to_replay;
|
||||
}
|
||||
|
||||
gc_clock::time_point db::commitlog::segment_manager::min_gc_time(const cf_id_type& id) const {
|
||||
gc_clock::time_point db::commitlog::segment_manager::min_gc_time(const cf_id_type& id, const db::replay_position& rp) const {
|
||||
auto res = gc_clock::time_point::max();
|
||||
for (auto& s : _segments) {
|
||||
if (rp.valid() && replay_position(s->_desc.id, s->position()) <= rp) {
|
||||
continue;
|
||||
}
|
||||
res = std::min(res, s->min_time(id));
|
||||
}
|
||||
return res;
|
||||
@@ -3956,8 +3959,8 @@ future<std::vector<sstring>> db::commitlog::list_existing_segments(const sstring
|
||||
});
|
||||
}
|
||||
|
||||
gc_clock::time_point db::commitlog::min_gc_time(const cf_id_type& id) const {
|
||||
return _segment_manager->min_gc_time(id);
|
||||
gc_clock::time_point db::commitlog::min_gc_time(const cf_id_type& id, const db::replay_position& rp) const {
|
||||
return _segment_manager->min_gc_time(id, rp);
|
||||
}
|
||||
|
||||
db::replay_position db::commitlog::min_position() const {
|
||||
|
||||
Reference in New Issue
Block a user