Compare commits

...

5 Commits

Author SHA1 Message Date
Yaron Kaikov
d10aee15e7 release: prepare for 5.1.0-rc1 2022-09-02 06:15:05 +03:00
Avi Kivity
9e017cb1e6 Update seastar submodule (tls error handling)
* seastar f9f5228b74...3aa91b4d2d (1):
  > Merge 'tls: vec_push: handle async errors rather than throwing on_internal_error' from Benny Halevy

Fixes #11252
2022-09-01 13:10:13 +03:00
Avi Kivity
b8504cc9b2 .gitmodules: switch seastar to scylla-seastar.git
This allows us to backport seastar patches to branch-5.1 on
scylla-seastar.git.
2022-09-01 13:08:22 +03:00
Avi Kivity
856703a85e Merge 'row_cache: Fix missing row if upper bound of population range is evicted and has adjacent dummy' from Tomasz Grabiec
Scenario:

cache = [
    row(pos=2, continuous=false),
    row(pos=after(2), dummy=true)
]

Scanning read starts, starts populating [-inf, before(2)] from sstables.

row(pos=2) is evicted.

cache = [
    row(pos=after(2), dummy=true)
]

Scanning read finishes reading from sstables.

Refreshes cache cursor via
partition_snapshot_row_cursor::maybe_refresh(), which calls
partition_snapshot_row_cursor::advance_to() because iterators are
invalidated. This advances the cursor to
after(2). no_clustering_row_between(2, after(2)) returns true, so
advance_to() returns true, and maybe_refresh() returns true. This is
interpreted by the cache reader as "the cursor has not moved forward",
so it marks the range as complete, without emitting the row with
pos=2. Also, it marks row(pos=after(2)) as continuous, so later reads
will also miss the row.

The bug is in advance_to(), which is using
no_clustering_row_between(a, b) to determine its result, which by
definition excludes the starting key.

Discovered by row_cache_test.cc::test_concurrent_reads_and_eviction
with reduced key range in the random_mutation_generator (1024 -> 16).

Fixes #11239

Closes #11240

* github.com:scylladb/scylladb:
  test: mvcc: Fix illegal use of maybe_refresh()
  tests: row_cache_test: Add test_eviction_of_upper_bound_of_population_range()
  tests: row_cache_test: Introduce one_shot mode to throttle
  row_cache: Fix missing row if upper bound of population range is evicted and has adjacent dummy
2022-08-11 16:51:59 +02:00
Yaron Kaikov
86a6c1fb2b release: prepare for 5.1.0-rc0 2022-08-09 18:48:43 +03:00
7 changed files with 113 additions and 10 deletions

2
.gitmodules vendored
View File

@@ -1,6 +1,6 @@
[submodule "seastar"]
path = seastar
url = ../seastar
url = ../scylla-seastar
ignore = dirty
[submodule "swagger-ui"]
path = swagger-ui

View File

@@ -60,7 +60,7 @@ fi
# Default scylla product/version tags
PRODUCT=scylla
VERSION=5.1.0-dev
VERSION=5.1.0-rc1
if test -f version
then

View File

@@ -444,7 +444,7 @@ public:
// When throws, the cursor is invalidated and its position is not changed.
bool advance_to(position_in_partition_view lower_bound) {
maybe_advance_to(lower_bound);
return no_clustering_row_between(_schema, lower_bound, position());
return no_clustering_row_between_weak(_schema, lower_bound, position());
}
// Call only when valid.

View File

@@ -571,6 +571,20 @@ bool no_clustering_row_between(const schema& s, position_in_partition_view a, po
}
}
// Returns true if and only if there can't be any clustering_row with position >= a and < b.
// It is assumed that a <= b.
inline
bool no_clustering_row_between_weak(const schema& s, position_in_partition_view a, position_in_partition_view b) {
clustering_key_prefix::equality eq(s);
if (a.has_key() && b.has_key()) {
return eq(a.key(), b.key())
&& (a.get_bound_weight() == bound_weight::after_all_prefixed
|| b.get_bound_weight() != bound_weight::after_all_prefixed);
} else {
return !a.has_key() && !b.has_key();
}
}
// Includes all position_in_partition objects "p" for which: start <= p < end
// And only those.
class position_range {

Submodule seastar updated: f9f5228b74...3aa91b4d2d

View File

@@ -641,7 +641,7 @@ SEASTAR_TEST_CASE(test_apply_to_incomplete_respects_continuity) {
static mutation_partition read_using_cursor(partition_snapshot& snap) {
tests::reader_concurrency_semaphore_wrapper semaphore;
partition_snapshot_row_cursor cur(*snap.schema(), snap);
cur.maybe_refresh();
cur.advance_to(position_in_partition::before_all_clustered_rows());
auto mp = read_partition_from(*snap.schema(), cur);
for (auto&& rt : snap.range_tombstones()) {
mp.apply_delete(*snap.schema(), rt);

View File

@@ -1235,9 +1235,13 @@ SEASTAR_TEST_CASE(test_update_failure) {
class throttle {
unsigned _block_counter = 0;
promise<> _p; // valid when _block_counter != 0, resolves when goes down to 0
std::optional<promise<>> _entered;
bool _one_shot;
public:
// one_shot means whether only the first enter() after block() will block.
throttle(bool one_shot = false) : _one_shot(one_shot) {}
future<> enter() {
if (_block_counter) {
if (_block_counter && (!_one_shot || _entered)) {
promise<> p1;
promise<> p2;
@@ -1249,16 +1253,21 @@ public:
p3.set_value();
});
_p = std::move(p2);
if (_entered) {
_entered->set_value();
_entered.reset();
}
return f1;
} else {
return make_ready_future<>();
}
}
void block() {
future<> block() {
++_block_counter;
_p = promise<>();
_entered = promise<>();
return _entered->get_future();
}
void unblock() {
@@ -1402,7 +1411,7 @@ SEASTAR_TEST_CASE(test_cache_population_and_update_race) {
mt2->apply(m);
}
thr.block();
auto f = thr.block();
auto m0_range = dht::partition_range::make_singular(ring[0].ring_position());
auto rd1 = cache.make_reader(s, semaphore.make_permit(), m0_range);
@@ -1413,6 +1422,7 @@ SEASTAR_TEST_CASE(test_cache_population_and_update_race) {
rd2.set_max_buffer_size(1);
auto rd2_fill_buffer = rd2.fill_buffer();
f.get();
sleep(10ms).get();
// This update should miss on all partitions
@@ -1540,12 +1550,13 @@ SEASTAR_TEST_CASE(test_cache_population_and_clear_race) {
mt2->apply(m);
}
thr.block();
auto f = thr.block();
auto rd1 = cache.make_reader(s, semaphore.make_permit());
rd1.set_max_buffer_size(1);
auto rd1_fill_buffer = rd1.fill_buffer();
f.get();
sleep(10ms).get();
// This update should miss on all partitions
@@ -3994,3 +4005,81 @@ SEASTAR_TEST_CASE(row_cache_is_populated_using_compacting_sstable_reader) {
BOOST_ASSERT(rt.calculate_size() == 1);
});
}
SEASTAR_TEST_CASE(test_eviction_of_upper_bound_of_population_range) {
return seastar::async([] {
simple_schema s;
tests::reader_concurrency_semaphore_wrapper semaphore;
auto cache_mt = make_lw_shared<replica::memtable>(s.schema());
auto pkey = s.make_pkey("pk");
mutation m1(s.schema(), pkey);
s.add_row(m1, s.make_ckey(1), "v1");
s.add_row(m1, s.make_ckey(2), "v2");
cache_mt->apply(m1);
cache_tracker tracker;
throttle thr(true);
auto cache_source = make_decorated_snapshot_source(snapshot_source([&] { return cache_mt->as_data_source(); }),
[&] (mutation_source src) {
return throttled_mutation_source(thr, std::move(src));
});
row_cache cache(s.schema(), cache_source, tracker);
auto pr = dht::partition_range::make_singular(pkey);
auto read = [&] (int start, int end) {
auto slice = partition_slice_builder(*s.schema())
.with_range(query::clustering_range::make(s.make_ckey(start), s.make_ckey(end)))
.build();
auto rd = cache.make_reader(s.schema(), semaphore.make_permit(), pr, slice);
auto close_rd = deferred_close(rd);
auto m_cache = read_mutation_from_flat_mutation_reader(rd).get0();
close_rd.close_now();
rd = cache_mt->make_flat_reader(s.schema(), semaphore.make_permit(), pr, slice);
auto close_rd2 = deferred_close(rd);
auto m_mt = read_mutation_from_flat_mutation_reader(rd).get0();
BOOST_REQUIRE(m_mt);
assert_that(m_cache).has_mutation().is_equal_to(*m_mt);
};
// populate [2]
{
auto slice = partition_slice_builder(*s.schema())
.with_range(query::clustering_range::make_singular(s.make_ckey(2)))
.build();
assert_that(cache.make_reader(s.schema(), semaphore.make_permit(), pr, slice))
.has_monotonic_positions();
}
auto arrived = thr.block();
// Read [0, 2]
auto f = seastar::async([&] {
read(0, 2);
});
arrived.get();
// populate (2, 3]
{
auto slice = partition_slice_builder(*s.schema())
.with_range(query::clustering_range::make(query::clustering_range::bound(s.make_ckey(2), false),
query::clustering_range::bound(s.make_ckey(3), true)))
.build();
assert_that(cache.make_reader(s.schema(), semaphore.make_permit(), pr, slice))
.has_monotonic_positions();
}
testlog.trace("Evicting");
evict_one_row(tracker); // Evicts before(0)
evict_one_row(tracker); // Evicts ck(2)
testlog.trace("Unblocking");
thr.unblock();
f.get();
read(0, 3);
});
}