mirror of
https://github.com/scylladb/scylladb.git
synced 2026-05-12 19:02:12 +00:00
Merge 'range_tombstone_change_generator: fix an edge case in flush()' from Michał Chojnowski
range_tombstone_change_generator::flush() mishandles the case when two range tombstones are adjacent and flush(pos, end_of_range=true) is called with pos equal to the end bound of the lesser-position range tombstone. In such case, the start change of the greater-position rtc will be accidentally emitted, and there won't be an end change, which breaks reader assumptions by ending the stream with an unclosed range tombstone, triggering an assertion. This is due to a non-strict inequality used in a place where strict inequality should be used. The modified line was intended to close range tombstones which end exactly on the flush position, but this is unnecessary because such range tombstones are handled by the last `if` in the function anyway. Instead, this line caused range tombstones beginning right after the flush position to be emitted sometimes. Fixes https://github.com/scylladb/scylladb/issues/12462 Closes #13894 * github.com:scylladb/scylladb: tests: row_cache: Add reproducer for reader producing missing closing range tombstone range_tombstone_change_generator: fix an edge case in flush()
This commit is contained in:
@@ -83,7 +83,7 @@ public:
|
||||
|
||||
position_in_partition::tri_compare cmp(_schema);
|
||||
std::optional<range_tombstone> prev;
|
||||
const bool allow_eq = end_of_range || upper_bound.is_after_all_clustered_rows(_schema);
|
||||
const bool allow_eq = upper_bound.is_after_all_clustered_rows(_schema);
|
||||
const auto should_flush = [&] (position_in_partition_view pos) {
|
||||
const auto res = cmp(pos, upper_bound);
|
||||
if (allow_eq) {
|
||||
|
||||
@@ -37,6 +37,7 @@
|
||||
#include "readers/from_mutations_v2.hh"
|
||||
#include "readers/delegating_v2.hh"
|
||||
#include "readers/empty_v2.hh"
|
||||
#include "seastar/testing/thread_test_case.hh"
|
||||
|
||||
using namespace std::chrono_literals;
|
||||
|
||||
@@ -2369,6 +2370,65 @@ SEASTAR_TEST_CASE(test_tombstone_and_row_with_small_buffer) {
|
||||
});
|
||||
}
|
||||
|
||||
// Reproducer for https://github.com/scylladb/scylladb/issues/12462
|
||||
SEASTAR_THREAD_TEST_CASE(test_range_tombstone_adjacent_to_slice_is_closed) {
|
||||
simple_schema s;
|
||||
tests::reader_concurrency_semaphore_wrapper semaphore;
|
||||
cache_tracker tracker;
|
||||
memtable_snapshot_source underlying(s.schema());
|
||||
|
||||
auto pk = s.make_pkey(0);
|
||||
auto pr = dht::partition_range::make_singular(pk);
|
||||
|
||||
mutation m1(s.schema(), pk);
|
||||
auto rt0 = s.make_range_tombstone(*position_range_to_clustering_range(position_range(
|
||||
position_in_partition::before_key(s.make_ckey(1)),
|
||||
position_in_partition::before_key(s.make_ckey(3))), *s.schema()));
|
||||
m1.partition().apply_delete(*s.schema(), rt0);
|
||||
s.add_row(m1, s.make_ckey(0), "v1");
|
||||
|
||||
underlying.apply(m1);
|
||||
|
||||
row_cache cache(s.schema(), snapshot_source([&] { return underlying(); }), tracker);
|
||||
populate_range(cache);
|
||||
|
||||
// Create a reader to pin the MVCC version
|
||||
auto rd0 = cache.make_reader(s.schema(), semaphore.make_permit(), pr);
|
||||
auto close_rd0 = deferred_close(rd0);
|
||||
rd0.set_max_buffer_size(1);
|
||||
rd0.fill_buffer().get();
|
||||
|
||||
mutation m2(s.schema(), pk);
|
||||
auto rt1 = s.make_range_tombstone(*position_range_to_clustering_range(position_range(
|
||||
position_in_partition::before_key(s.make_ckey(1)),
|
||||
position_in_partition::before_key(s.make_ckey(2))), *s.schema()));
|
||||
m2.partition().apply_delete(*s.schema(), rt1);
|
||||
apply(cache, underlying, m2);
|
||||
|
||||
// State of cache:
|
||||
// v2: ROW(0), RT(before(1), before(2))@t1
|
||||
// v1: RT(before(1), before(3))@t0
|
||||
|
||||
// range_tombstone_change_generator will work with the stream: RT(1, before(2))@t1, RT(before(2), before(3))@t0
|
||||
// It's important that there is an RT which starts exactly at the slice upper bound to trigger
|
||||
// the problem, and the RT will be in the stream only because it is a residual from RT(before(1), before(3)),
|
||||
// which overlaps with the slice in the older version. That's why we need two MVCC versions.
|
||||
|
||||
auto slice = partition_slice_builder(*s.schema())
|
||||
.with_range(*position_range_to_clustering_range(position_range(
|
||||
position_in_partition::before_key(s.make_ckey(0)),
|
||||
position_in_partition::before_key(s.make_ckey(2))), *s.schema()))
|
||||
.build();
|
||||
|
||||
assert_that(cache.make_reader(s.schema(), semaphore.make_permit(), pr, slice))
|
||||
.produces_partition_start(pk)
|
||||
.produces_row_with_key(s.make_ckey(0))
|
||||
.produces_range_tombstone_change(start_change(rt1))
|
||||
.produces_range_tombstone_change(end_change(rt1))
|
||||
.produces_partition_end()
|
||||
.produces_end_of_stream();
|
||||
}
|
||||
|
||||
//
|
||||
// Tests the following case of eviction and re-population:
|
||||
//
|
||||
|
||||
Reference in New Issue
Block a user