Merge 'range_tombstone_change_generator: fix an edge case in flush()' from Michał Chojnowski

range_tombstone_change_generator::flush() mishandles the case when two range
tombstones are adjacent and flush(pos, end_of_range=true) is called with pos
equal to the end bound of the lesser-position range tombstone.

In such case, the start change of the greater-position rtc will be accidentally
emitted, and there won't be an end change, which breaks reader assumptions by
ending the stream with an unclosed range tombstone, triggering an assertion.

This is due to a non-strict inequality used in a place where strict inequality
should be used. The modified line was intended to close range tombstones
which end exactly on the flush position, but this is unnecessary because such
range tombstones are handled by the last `if` in the function anyway.
Instead, this line caused range tombstones beginning right after the flush
position to be emitted sometimes.

Fixes https://github.com/scylladb/scylladb/issues/12462

Closes #13894

* github.com:scylladb/scylladb:
  tests: row_cache: Add reproducer for reader producing missing closing range tombstone
  range_tombstone_change_generator: fix an edge case in flush()
This commit is contained in:
Tomasz Grabiec
2023-05-15 23:29:08 +02:00
2 changed files with 61 additions and 1 deletions

View File

@@ -83,7 +83,7 @@ public:
position_in_partition::tri_compare cmp(_schema);
std::optional<range_tombstone> prev;
const bool allow_eq = end_of_range || upper_bound.is_after_all_clustered_rows(_schema);
const bool allow_eq = upper_bound.is_after_all_clustered_rows(_schema);
const auto should_flush = [&] (position_in_partition_view pos) {
const auto res = cmp(pos, upper_bound);
if (allow_eq) {

View File

@@ -37,6 +37,7 @@
#include "readers/from_mutations_v2.hh"
#include "readers/delegating_v2.hh"
#include "readers/empty_v2.hh"
#include "seastar/testing/thread_test_case.hh"
using namespace std::chrono_literals;
@@ -2369,6 +2370,65 @@ SEASTAR_TEST_CASE(test_tombstone_and_row_with_small_buffer) {
});
}
// Reproducer for https://github.com/scylladb/scylladb/issues/12462
SEASTAR_THREAD_TEST_CASE(test_range_tombstone_adjacent_to_slice_is_closed) {
simple_schema s;
tests::reader_concurrency_semaphore_wrapper semaphore;
cache_tracker tracker;
memtable_snapshot_source underlying(s.schema());
auto pk = s.make_pkey(0);
auto pr = dht::partition_range::make_singular(pk);
mutation m1(s.schema(), pk);
auto rt0 = s.make_range_tombstone(*position_range_to_clustering_range(position_range(
position_in_partition::before_key(s.make_ckey(1)),
position_in_partition::before_key(s.make_ckey(3))), *s.schema()));
m1.partition().apply_delete(*s.schema(), rt0);
s.add_row(m1, s.make_ckey(0), "v1");
underlying.apply(m1);
row_cache cache(s.schema(), snapshot_source([&] { return underlying(); }), tracker);
populate_range(cache);
// Create a reader to pin the MVCC version
auto rd0 = cache.make_reader(s.schema(), semaphore.make_permit(), pr);
auto close_rd0 = deferred_close(rd0);
rd0.set_max_buffer_size(1);
rd0.fill_buffer().get();
mutation m2(s.schema(), pk);
auto rt1 = s.make_range_tombstone(*position_range_to_clustering_range(position_range(
position_in_partition::before_key(s.make_ckey(1)),
position_in_partition::before_key(s.make_ckey(2))), *s.schema()));
m2.partition().apply_delete(*s.schema(), rt1);
apply(cache, underlying, m2);
// State of cache:
// v2: ROW(0), RT(before(1), before(2))@t1
// v1: RT(before(1), before(3))@t0
// range_tombstone_change_generator will work with the stream: RT(1, before(2))@t1, RT(before(2), before(3))@t0
// It's important that there is an RT which starts exactly at the slice upper bound to trigger
// the problem, and the RT will be in the stream only because it is a residual from RT(before(1), before(3)),
// which overlaps with the slice in the older version. That's why we need two MVCC versions.
auto slice = partition_slice_builder(*s.schema())
.with_range(*position_range_to_clustering_range(position_range(
position_in_partition::before_key(s.make_ckey(0)),
position_in_partition::before_key(s.make_ckey(2))), *s.schema()))
.build();
assert_that(cache.make_reader(s.schema(), semaphore.make_permit(), pr, slice))
.produces_partition_start(pk)
.produces_row_with_key(s.make_ckey(0))
.produces_range_tombstone_change(start_change(rt1))
.produces_range_tombstone_change(end_change(rt1))
.produces_partition_end()
.produces_end_of_stream();
}
//
// Tests the following case of eviction and re-population:
//