Currently timeout is opt-in, that is, all methods that even have it default it to `db::no_timeout`. This means that ensuring timeout is used where it should be is completely up to the author and the reviewrs of the code. As humans are notoriously prone to mistakes this has resulted in a very inconsistent usage of timeout, many clients of `flat_mutation_reader` passing the timeout only to some members and only on certain call sites. This is small wonder considering that some core operations like `operator()()` only recently received a timeout parameter and others like `peek()` didn't even have one until this patch. Both of these methods call `fill_buffer()` which potentially talks to the lower layers and is supposed to propagate the timeout. All this makes the `flat_mutation_reader`'s timeout effectively useless. To make order in this chaos make the timeout parameter a mandatory one on all `flat_mutation_reader` methods that need it. This ensures that humans now get a reminder from the compiler when they forget to pass the timeout. Clients can still opt-out from passing a timeout by passing `db::no_timeout` (the previous default value) but this will be now explicit and developers should think before typing it. There were suprisingly few core call sites to fix up. Where a timeout was available nearby I propagated it to be able to pass it to the reader, where I couldn't I passed `db::no_timeout`. Authors of the latter kind of code (view, streaming and repair are some of the notable examples) should maybe consider propagating down a timeout if needed. In the test code (the wast majority of the changes) I just used `db::no_timeout` everywhere. Tests: unit(release, debug) Signed-off-by: Botond Dénes <bdenes@scylladb.com> Message-Id: <1edc10802d5eb23de8af28c9f48b8d3be0f1a468.1536744563.git.bdenes@scylladb.com>
683 lines
29 KiB
C++
683 lines
29 KiB
C++
/*
|
|
* Copyright (C) 2017 ScyllaDB
|
|
*/
|
|
|
|
/*
|
|
* This file is part of Scylla.
|
|
*
|
|
* Scylla is free software: you can redistribute it and/or modify
|
|
* it under the terms of the GNU Affero General Public License as published by
|
|
* the Free Software Foundation, either version 3 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* Scylla is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
#pragma once
|
|
|
|
#include <vector>
|
|
#include "row_cache.hh"
|
|
#include "mutation_reader.hh"
|
|
#include "mutation_fragment.hh"
|
|
#include "partition_version.hh"
|
|
#include "utils/logalloc.hh"
|
|
#include "query-request.hh"
|
|
#include "partition_snapshot_reader.hh"
|
|
#include "partition_snapshot_row_cursor.hh"
|
|
#include "read_context.hh"
|
|
#include "flat_mutation_reader.hh"
|
|
|
|
namespace cache {
|
|
|
|
extern logging::logger clogger;
|
|
|
|
class cache_flat_mutation_reader final : public flat_mutation_reader::impl {
|
|
enum class state {
|
|
before_static_row,
|
|
|
|
// Invariants:
|
|
// - position_range(_lower_bound, _upper_bound) covers all not yet emitted positions from current range
|
|
// - if _next_row has valid iterators:
|
|
// - _next_row points to the nearest row in cache >= _lower_bound
|
|
// - _next_row_in_range = _next.position() < _upper_bound
|
|
// - if _next_row doesn't have valid iterators, it has no meaning.
|
|
reading_from_cache,
|
|
|
|
// Starts reading from underlying reader.
|
|
// The range to read is position_range(_lower_bound, min(_next_row.position(), _upper_bound)).
|
|
// Invariants:
|
|
// - _next_row_in_range = _next.position() < _upper_bound
|
|
move_to_underlying,
|
|
|
|
// Invariants:
|
|
// - Upper bound of the read is min(_next_row.position(), _upper_bound)
|
|
// - _next_row_in_range = _next.position() < _upper_bound
|
|
// - _last_row points at a direct predecessor of the next row which is going to be read.
|
|
// Used for populating continuity.
|
|
// - _population_range_starts_before_all_rows is set accordingly
|
|
reading_from_underlying,
|
|
|
|
end_of_stream
|
|
};
|
|
partition_snapshot_ptr _snp;
|
|
position_in_partition::tri_compare _position_cmp;
|
|
|
|
query::clustering_key_filter_ranges _ck_ranges;
|
|
query::clustering_row_ranges::const_iterator _ck_ranges_curr;
|
|
query::clustering_row_ranges::const_iterator _ck_ranges_end;
|
|
|
|
lsa_manager _lsa_manager;
|
|
|
|
partition_snapshot_row_weakref _last_row;
|
|
|
|
// Holds the lower bound of a position range which hasn't been processed yet.
|
|
// Only rows with positions < _lower_bound have been emitted, and only
|
|
// range_tombstones with positions <= _lower_bound.
|
|
position_in_partition _lower_bound;
|
|
position_in_partition_view _upper_bound;
|
|
|
|
state _state = state::before_static_row;
|
|
lw_shared_ptr<read_context> _read_context;
|
|
partition_snapshot_row_cursor _next_row;
|
|
bool _next_row_in_range = false;
|
|
|
|
// True iff current population interval, since the previous clustering row, starts before all clustered rows.
|
|
// We cannot just look at _lower_bound, because emission of range tombstones changes _lower_bound and
|
|
// because we mark clustering intervals as continuous when consuming a clustering_row, it would prevent
|
|
// us from marking the interval as continuous.
|
|
// Valid when _state == reading_from_underlying.
|
|
bool _population_range_starts_before_all_rows;
|
|
|
|
// Whether _lower_bound was changed within current fill_buffer().
|
|
// If it did not then we cannot break out of it (e.g. on preemption) because
|
|
// forward progress is not guaranteed in case iterators are getting constantly invalidated.
|
|
bool _lower_bound_changed = false;
|
|
|
|
future<> do_fill_buffer(db::timeout_clock::time_point);
|
|
void copy_from_cache_to_buffer();
|
|
future<> process_static_row(db::timeout_clock::time_point);
|
|
void move_to_end();
|
|
void move_to_next_range();
|
|
void move_to_range(query::clustering_row_ranges::const_iterator);
|
|
void move_to_next_entry();
|
|
void add_to_buffer(const partition_snapshot_row_cursor&);
|
|
void add_clustering_row_to_buffer(mutation_fragment&&);
|
|
void add_to_buffer(range_tombstone&&);
|
|
void add_to_buffer(mutation_fragment&&);
|
|
future<> read_from_underlying(db::timeout_clock::time_point);
|
|
void start_reading_from_underlying();
|
|
bool after_current_range(position_in_partition_view position);
|
|
bool can_populate() const;
|
|
// Marks the range between _last_row (exclusive) and _next_row (exclusive) as continuous,
|
|
// provided that the underlying reader still matches the latest version of the partition.
|
|
void maybe_update_continuity();
|
|
// Tries to ensure that the lower bound of the current population range exists.
|
|
// Returns false if it failed and range cannot be populated.
|
|
// Assumes can_populate().
|
|
bool ensure_population_lower_bound();
|
|
void maybe_add_to_cache(const mutation_fragment& mf);
|
|
void maybe_add_to_cache(const clustering_row& cr);
|
|
void maybe_add_to_cache(const range_tombstone& rt);
|
|
void maybe_add_to_cache(const static_row& sr);
|
|
void maybe_set_static_row_continuous();
|
|
void finish_reader() {
|
|
push_mutation_fragment(partition_end());
|
|
_end_of_stream = true;
|
|
_state = state::end_of_stream;
|
|
}
|
|
void touch_partition();
|
|
public:
|
|
cache_flat_mutation_reader(schema_ptr s,
|
|
dht::decorated_key dk,
|
|
query::clustering_key_filter_ranges&& crr,
|
|
lw_shared_ptr<read_context> ctx,
|
|
partition_snapshot_ptr snp,
|
|
row_cache& cache)
|
|
: flat_mutation_reader::impl(std::move(s))
|
|
, _snp(std::move(snp))
|
|
, _position_cmp(*_schema)
|
|
, _ck_ranges(std::move(crr))
|
|
, _ck_ranges_curr(_ck_ranges.begin())
|
|
, _ck_ranges_end(_ck_ranges.end())
|
|
, _lsa_manager(cache)
|
|
, _lower_bound(position_in_partition::before_all_clustered_rows())
|
|
, _upper_bound(position_in_partition_view::before_all_clustered_rows())
|
|
, _read_context(std::move(ctx))
|
|
, _next_row(*_schema, *_snp)
|
|
{
|
|
clogger.trace("csm {}: table={}.{}", this, _schema->ks_name(), _schema->cf_name());
|
|
push_mutation_fragment(partition_start(std::move(dk), _snp->partition_tombstone()));
|
|
}
|
|
cache_flat_mutation_reader(const cache_flat_mutation_reader&) = delete;
|
|
cache_flat_mutation_reader(cache_flat_mutation_reader&&) = delete;
|
|
virtual future<> fill_buffer(db::timeout_clock::time_point timeout) override;
|
|
virtual void next_partition() override {
|
|
clear_buffer_to_next_partition();
|
|
if (is_buffer_empty()) {
|
|
_end_of_stream = true;
|
|
}
|
|
}
|
|
virtual future<> fast_forward_to(const dht::partition_range&, db::timeout_clock::time_point timeout) override {
|
|
clear_buffer();
|
|
_end_of_stream = true;
|
|
return make_ready_future<>();
|
|
}
|
|
virtual future<> fast_forward_to(position_range pr, db::timeout_clock::time_point timeout) override {
|
|
throw std::bad_function_call();
|
|
}
|
|
};
|
|
|
|
inline
|
|
future<> cache_flat_mutation_reader::process_static_row(db::timeout_clock::time_point timeout) {
|
|
if (_snp->static_row_continuous()) {
|
|
_read_context->cache().on_row_hit();
|
|
static_row sr = _lsa_manager.run_in_read_section([this] {
|
|
return _snp->static_row(_read_context->digest_requested());
|
|
});
|
|
if (!sr.empty()) {
|
|
push_mutation_fragment(mutation_fragment(std::move(sr)));
|
|
}
|
|
return make_ready_future<>();
|
|
} else {
|
|
_read_context->cache().on_row_miss();
|
|
return _read_context->get_next_fragment(timeout).then([this] (mutation_fragment_opt&& sr) {
|
|
if (sr) {
|
|
assert(sr->is_static_row());
|
|
maybe_add_to_cache(sr->as_static_row());
|
|
push_mutation_fragment(std::move(*sr));
|
|
}
|
|
maybe_set_static_row_continuous();
|
|
});
|
|
}
|
|
}
|
|
|
|
inline
|
|
void cache_flat_mutation_reader::touch_partition() {
|
|
if (_snp->at_latest_version()) {
|
|
rows_entry& last_dummy = *_snp->version()->partition().clustered_rows().rbegin();
|
|
_snp->tracker()->touch(last_dummy);
|
|
}
|
|
}
|
|
|
|
inline
|
|
future<> cache_flat_mutation_reader::fill_buffer(db::timeout_clock::time_point timeout) {
|
|
if (_state == state::before_static_row) {
|
|
auto after_static_row = [this, timeout] {
|
|
if (_ck_ranges_curr == _ck_ranges_end) {
|
|
touch_partition();
|
|
finish_reader();
|
|
return make_ready_future<>();
|
|
}
|
|
_state = state::reading_from_cache;
|
|
_lsa_manager.run_in_read_section([this] {
|
|
move_to_range(_ck_ranges_curr);
|
|
});
|
|
return fill_buffer(timeout);
|
|
};
|
|
if (_schema->has_static_columns()) {
|
|
return process_static_row(timeout).then(std::move(after_static_row));
|
|
} else {
|
|
return after_static_row();
|
|
}
|
|
}
|
|
clogger.trace("csm {}: fill_buffer(), range={}, lb={}", this, *_ck_ranges_curr, _lower_bound);
|
|
return do_until([this] { return _end_of_stream || is_buffer_full(); }, [this, timeout] {
|
|
return do_fill_buffer(timeout);
|
|
});
|
|
}
|
|
|
|
inline
|
|
future<> cache_flat_mutation_reader::do_fill_buffer(db::timeout_clock::time_point timeout) {
|
|
if (_state == state::move_to_underlying) {
|
|
_state = state::reading_from_underlying;
|
|
_population_range_starts_before_all_rows = _lower_bound.is_before_all_clustered_rows(*_schema);
|
|
auto end = _next_row_in_range ? position_in_partition(_next_row.position())
|
|
: position_in_partition(_upper_bound);
|
|
return _read_context->fast_forward_to(position_range{_lower_bound, std::move(end)}, timeout).then([this, timeout] {
|
|
return read_from_underlying(timeout);
|
|
});
|
|
}
|
|
if (_state == state::reading_from_underlying) {
|
|
return read_from_underlying(timeout);
|
|
}
|
|
// assert(_state == state::reading_from_cache)
|
|
return _lsa_manager.run_in_read_section([this] {
|
|
auto next_valid = _next_row.iterators_valid();
|
|
clogger.trace("csm {}: reading_from_cache, range=[{}, {}), next={}, valid={}", this, _lower_bound,
|
|
_upper_bound, _next_row.position(), next_valid);
|
|
// We assume that if there was eviction, and thus the range may
|
|
// no longer be continuous, the cursor was invalidated.
|
|
if (!next_valid) {
|
|
auto adjacent = _next_row.advance_to(_lower_bound);
|
|
_next_row_in_range = !after_current_range(_next_row.position());
|
|
if (!adjacent && !_next_row.continuous()) {
|
|
_last_row = nullptr; // We could insert a dummy here, but this path is unlikely.
|
|
start_reading_from_underlying();
|
|
return make_ready_future<>();
|
|
}
|
|
}
|
|
_next_row.maybe_refresh();
|
|
clogger.trace("csm {}: next={}, cont={}", this, _next_row.position(), _next_row.continuous());
|
|
_lower_bound_changed = false;
|
|
while (_state == state::reading_from_cache) {
|
|
copy_from_cache_to_buffer();
|
|
// We need to check _lower_bound_changed even if is_buffer_full() because
|
|
// we may have emitted only a range tombstone which overlapped with _lower_bound
|
|
// and thus didn't cause _lower_bound to change.
|
|
if ((need_preempt() || is_buffer_full()) && _lower_bound_changed) {
|
|
break;
|
|
}
|
|
}
|
|
return make_ready_future<>();
|
|
});
|
|
}
|
|
|
|
inline
|
|
future<> cache_flat_mutation_reader::read_from_underlying(db::timeout_clock::time_point timeout) {
|
|
return consume_mutation_fragments_until(_read_context->underlying().underlying(),
|
|
[this] { return _state != state::reading_from_underlying || is_buffer_full(); },
|
|
[this] (mutation_fragment mf) {
|
|
_read_context->cache().on_row_miss();
|
|
maybe_add_to_cache(mf);
|
|
add_to_buffer(std::move(mf));
|
|
},
|
|
[this] {
|
|
_state = state::reading_from_cache;
|
|
_lsa_manager.run_in_update_section([this] {
|
|
auto same_pos = _next_row.maybe_refresh();
|
|
if (!same_pos) {
|
|
_read_context->cache().on_mispopulate(); // FIXME: Insert dummy entry at _upper_bound.
|
|
_next_row_in_range = !after_current_range(_next_row.position());
|
|
if (!_next_row.continuous()) {
|
|
start_reading_from_underlying();
|
|
}
|
|
return;
|
|
}
|
|
if (_next_row_in_range) {
|
|
maybe_update_continuity();
|
|
_last_row = _next_row;
|
|
add_to_buffer(_next_row);
|
|
try {
|
|
move_to_next_entry();
|
|
} catch (const std::bad_alloc&) {
|
|
// We cannot reenter the section, since we may have moved to the new range, and
|
|
// because add_to_buffer() should not be repeated.
|
|
_snp->region().allocator().invalidate_references(); // Invalidates _next_row
|
|
}
|
|
} else {
|
|
if (no_clustering_row_between(*_schema, _upper_bound, _next_row.position())) {
|
|
this->maybe_update_continuity();
|
|
} else if (can_populate()) {
|
|
rows_entry::compare less(*_schema);
|
|
auto& rows = _snp->version()->partition().clustered_rows();
|
|
if (query::is_single_row(*_schema, *_ck_ranges_curr)) {
|
|
with_allocator(_snp->region().allocator(), [&] {
|
|
auto e = alloc_strategy_unique_ptr<rows_entry>(
|
|
current_allocator().construct<rows_entry>(_ck_ranges_curr->start()->value()));
|
|
// Use _next_row iterator only as a hint, because there could be insertions after _upper_bound.
|
|
auto insert_result = rows.insert_check(_next_row.get_iterator_in_latest_version(), *e, less);
|
|
auto inserted = insert_result.second;
|
|
auto it = insert_result.first;
|
|
if (inserted) {
|
|
_snp->tracker()->insert(*e);
|
|
e.release();
|
|
auto next = std::next(it);
|
|
it->set_continuous(next->continuous());
|
|
clogger.trace("csm {}: inserted dummy at {}, cont={}", this, it->position(), it->continuous());
|
|
}
|
|
});
|
|
} else if (ensure_population_lower_bound()) {
|
|
with_allocator(_snp->region().allocator(), [&] {
|
|
auto e = alloc_strategy_unique_ptr<rows_entry>(
|
|
current_allocator().construct<rows_entry>(*_schema, _upper_bound, is_dummy::yes, is_continuous::yes));
|
|
// Use _next_row iterator only as a hint, because there could be insertions after _upper_bound.
|
|
auto insert_result = rows.insert_check(_next_row.get_iterator_in_latest_version(), *e, less);
|
|
auto inserted = insert_result.second;
|
|
if (inserted) {
|
|
clogger.trace("csm {}: inserted dummy at {}", this, _upper_bound);
|
|
_snp->tracker()->insert(*e);
|
|
e.release();
|
|
} else {
|
|
clogger.trace("csm {}: mark {} as continuous", this, insert_result.first->position());
|
|
insert_result.first->set_continuous(true);
|
|
}
|
|
});
|
|
}
|
|
} else {
|
|
_read_context->cache().on_mispopulate();
|
|
}
|
|
try {
|
|
move_to_next_range();
|
|
} catch (const std::bad_alloc&) {
|
|
// We cannot reenter the section, since we may have moved to the new range
|
|
_snp->region().allocator().invalidate_references(); // Invalidates _next_row
|
|
}
|
|
}
|
|
});
|
|
return make_ready_future<>();
|
|
}, timeout);
|
|
}
|
|
|
|
inline
|
|
bool cache_flat_mutation_reader::ensure_population_lower_bound() {
|
|
if (_population_range_starts_before_all_rows) {
|
|
return true;
|
|
}
|
|
if (!_last_row.refresh(*_snp)) {
|
|
return false;
|
|
}
|
|
// Continuity flag we will later set for the upper bound extends to the previous row in the same version,
|
|
// so we need to ensure we have an entry in the latest version.
|
|
if (!_last_row.is_in_latest_version()) {
|
|
with_allocator(_snp->region().allocator(), [&] {
|
|
auto& rows = _snp->version()->partition().clustered_rows();
|
|
rows_entry::compare less(*_schema);
|
|
// FIXME: Avoid the copy by inserting an incomplete clustering row
|
|
auto e = alloc_strategy_unique_ptr<rows_entry>(
|
|
current_allocator().construct<rows_entry>(*_schema, *_last_row));
|
|
e->set_continuous(false);
|
|
auto insert_result = rows.insert_check(rows.end(), *e, less);
|
|
auto inserted = insert_result.second;
|
|
if (inserted) {
|
|
clogger.trace("csm {}: inserted lower bound dummy at {}", this, e->position());
|
|
_snp->tracker()->insert(*e);
|
|
e.release();
|
|
}
|
|
});
|
|
}
|
|
return true;
|
|
}
|
|
|
|
inline
|
|
void cache_flat_mutation_reader::maybe_update_continuity() {
|
|
if (can_populate() && ensure_population_lower_bound()) {
|
|
with_allocator(_snp->region().allocator(), [&] {
|
|
rows_entry& e = _next_row.ensure_entry_in_latest().row;
|
|
e.set_continuous(true);
|
|
});
|
|
} else {
|
|
_read_context->cache().on_mispopulate();
|
|
}
|
|
}
|
|
|
|
inline
|
|
void cache_flat_mutation_reader::maybe_add_to_cache(const mutation_fragment& mf) {
|
|
if (mf.is_range_tombstone()) {
|
|
maybe_add_to_cache(mf.as_range_tombstone());
|
|
} else {
|
|
assert(mf.is_clustering_row());
|
|
const clustering_row& cr = mf.as_clustering_row();
|
|
maybe_add_to_cache(cr);
|
|
}
|
|
}
|
|
|
|
inline
|
|
void cache_flat_mutation_reader::maybe_add_to_cache(const clustering_row& cr) {
|
|
if (!can_populate()) {
|
|
_last_row = nullptr;
|
|
_population_range_starts_before_all_rows = false;
|
|
_read_context->cache().on_mispopulate();
|
|
return;
|
|
}
|
|
clogger.trace("csm {}: populate({})", this, cr);
|
|
_lsa_manager.run_in_update_section_with_allocator([this, &cr] {
|
|
mutation_partition& mp = _snp->version()->partition();
|
|
rows_entry::compare less(*_schema);
|
|
|
|
if (_read_context->digest_requested()) {
|
|
cr.cells().prepare_hash(*_schema, column_kind::regular_column);
|
|
}
|
|
auto new_entry = alloc_strategy_unique_ptr<rows_entry>(
|
|
current_allocator().construct<rows_entry>(*_schema, cr.key(), cr.tomb(), cr.marker(), cr.cells()));
|
|
new_entry->set_continuous(false);
|
|
auto it = _next_row.iterators_valid() ? _next_row.get_iterator_in_latest_version()
|
|
: mp.clustered_rows().lower_bound(cr.key(), less);
|
|
auto insert_result = mp.clustered_rows().insert_check(it, *new_entry, less);
|
|
if (insert_result.second) {
|
|
_snp->tracker()->insert(*new_entry);
|
|
new_entry.release();
|
|
}
|
|
it = insert_result.first;
|
|
|
|
rows_entry& e = *it;
|
|
if (ensure_population_lower_bound()) {
|
|
clogger.trace("csm {}: set_continuous({})", this, e.position());
|
|
e.set_continuous(true);
|
|
} else {
|
|
_read_context->cache().on_mispopulate();
|
|
}
|
|
with_allocator(standard_allocator(), [&] {
|
|
_last_row = partition_snapshot_row_weakref(*_snp, it, true);
|
|
});
|
|
_population_range_starts_before_all_rows = false;
|
|
});
|
|
}
|
|
|
|
inline
|
|
bool cache_flat_mutation_reader::after_current_range(position_in_partition_view p) {
|
|
return _position_cmp(p, _upper_bound) >= 0;
|
|
}
|
|
|
|
inline
|
|
void cache_flat_mutation_reader::start_reading_from_underlying() {
|
|
clogger.trace("csm {}: start_reading_from_underlying(), range=[{}, {})", this, _lower_bound, _next_row_in_range ? _next_row.position() : _upper_bound);
|
|
_state = state::move_to_underlying;
|
|
_next_row.touch();
|
|
}
|
|
|
|
inline
|
|
void cache_flat_mutation_reader::copy_from_cache_to_buffer() {
|
|
clogger.trace("csm {}: copy_from_cache, next={}, next_row_in_range={}", this, _next_row.position(), _next_row_in_range);
|
|
_next_row.touch();
|
|
position_in_partition_view next_lower_bound = _next_row.dummy() ? _next_row.position() : position_in_partition_view::after_key(_next_row.key());
|
|
for (auto &&rts : _snp->range_tombstones(_lower_bound, _next_row_in_range ? next_lower_bound : _upper_bound)) {
|
|
position_in_partition::less_compare less(*_schema);
|
|
// This guarantees that rts starts after any emitted clustering_row
|
|
// and not before any emitted range tombstone.
|
|
if (!less(_lower_bound, rts.position())) {
|
|
rts.set_start(*_schema, _lower_bound);
|
|
} else {
|
|
_lower_bound = position_in_partition(rts.position());
|
|
_lower_bound_changed = true;
|
|
if (is_buffer_full()) {
|
|
return;
|
|
}
|
|
}
|
|
push_mutation_fragment(std::move(rts));
|
|
}
|
|
// We add the row to the buffer even when it's full.
|
|
// This simplifies the code. For more info see #3139.
|
|
if (_next_row_in_range) {
|
|
_last_row = _next_row;
|
|
add_to_buffer(_next_row);
|
|
move_to_next_entry();
|
|
} else {
|
|
move_to_next_range();
|
|
}
|
|
}
|
|
|
|
inline
|
|
void cache_flat_mutation_reader::move_to_end() {
|
|
finish_reader();
|
|
clogger.trace("csm {}: eos", this);
|
|
}
|
|
|
|
inline
|
|
void cache_flat_mutation_reader::move_to_next_range() {
|
|
auto next_it = std::next(_ck_ranges_curr);
|
|
if (next_it == _ck_ranges_end) {
|
|
move_to_end();
|
|
_ck_ranges_curr = next_it;
|
|
} else {
|
|
move_to_range(next_it);
|
|
}
|
|
}
|
|
|
|
inline
|
|
void cache_flat_mutation_reader::move_to_range(query::clustering_row_ranges::const_iterator next_it) {
|
|
auto lb = position_in_partition::for_range_start(*next_it);
|
|
auto ub = position_in_partition_view::for_range_end(*next_it);
|
|
_last_row = nullptr;
|
|
_lower_bound = std::move(lb);
|
|
_upper_bound = std::move(ub);
|
|
_lower_bound_changed = true;
|
|
_ck_ranges_curr = next_it;
|
|
auto adjacent = _next_row.advance_to(_lower_bound);
|
|
_next_row_in_range = !after_current_range(_next_row.position());
|
|
clogger.trace("csm {}: move_to_range(), range={}, lb={}, ub={}, next={}", this, *_ck_ranges_curr, _lower_bound, _upper_bound, _next_row.position());
|
|
if (!adjacent && !_next_row.continuous()) {
|
|
// FIXME: We don't insert a dummy for singular range to avoid allocating 3 entries
|
|
// for a hit (before, at and after). If we supported the concept of an incomplete row,
|
|
// we could insert such a row for the lower bound if it's full instead, for both singular and
|
|
// non-singular ranges.
|
|
if (_ck_ranges_curr->start() && !query::is_single_row(*_schema, *_ck_ranges_curr)) {
|
|
// Insert dummy for lower bound
|
|
if (can_populate()) {
|
|
// FIXME: _lower_bound could be adjacent to the previous row, in which case we could skip this
|
|
clogger.trace("csm {}: insert dummy at {}", this, _lower_bound);
|
|
auto it = with_allocator(_lsa_manager.region().allocator(), [&] {
|
|
auto& rows = _snp->version()->partition().clustered_rows();
|
|
auto new_entry = current_allocator().construct<rows_entry>(*_schema, _lower_bound, is_dummy::yes, is_continuous::no);
|
|
return rows.insert_before(_next_row.get_iterator_in_latest_version(), *new_entry);
|
|
});
|
|
_snp->tracker()->insert(*it);
|
|
_last_row = partition_snapshot_row_weakref(*_snp, it, true);
|
|
} else {
|
|
_read_context->cache().on_mispopulate();
|
|
}
|
|
}
|
|
start_reading_from_underlying();
|
|
}
|
|
}
|
|
|
|
// _next_row must be inside the range.
|
|
inline
|
|
void cache_flat_mutation_reader::move_to_next_entry() {
|
|
clogger.trace("csm {}: move_to_next_entry(), curr={}", this, _next_row.position());
|
|
if (no_clustering_row_between(*_schema, _next_row.position(), _upper_bound)) {
|
|
move_to_next_range();
|
|
} else {
|
|
if (!_next_row.next()) {
|
|
move_to_end();
|
|
return;
|
|
}
|
|
_next_row_in_range = !after_current_range(_next_row.position());
|
|
clogger.trace("csm {}: next={}, cont={}, in_range={}", this, _next_row.position(), _next_row.continuous(), _next_row_in_range);
|
|
if (!_next_row.continuous()) {
|
|
start_reading_from_underlying();
|
|
}
|
|
}
|
|
}
|
|
|
|
inline
|
|
void cache_flat_mutation_reader::add_to_buffer(mutation_fragment&& mf) {
|
|
clogger.trace("csm {}: add_to_buffer({})", this, mf);
|
|
if (mf.is_clustering_row()) {
|
|
add_clustering_row_to_buffer(std::move(mf));
|
|
} else {
|
|
assert(mf.is_range_tombstone());
|
|
add_to_buffer(std::move(mf).as_range_tombstone());
|
|
}
|
|
}
|
|
|
|
inline
|
|
void cache_flat_mutation_reader::add_to_buffer(const partition_snapshot_row_cursor& row) {
|
|
if (!row.dummy()) {
|
|
_read_context->cache().on_row_hit();
|
|
add_clustering_row_to_buffer(row.row(_read_context->digest_requested()));
|
|
}
|
|
}
|
|
|
|
// Maintains the following invariants, also in case of exception:
|
|
// (1) no fragment with position >= _lower_bound was pushed yet
|
|
// (2) If _lower_bound > mf.position(), mf was emitted
|
|
inline
|
|
void cache_flat_mutation_reader::add_clustering_row_to_buffer(mutation_fragment&& mf) {
|
|
clogger.trace("csm {}: add_clustering_row_to_buffer({})", this, mf);
|
|
auto& row = mf.as_clustering_row();
|
|
auto new_lower_bound = position_in_partition::after_key(row.key());
|
|
push_mutation_fragment(std::move(mf));
|
|
_lower_bound = std::move(new_lower_bound);
|
|
_lower_bound_changed = true;
|
|
}
|
|
|
|
inline
|
|
void cache_flat_mutation_reader::add_to_buffer(range_tombstone&& rt) {
|
|
clogger.trace("csm {}: add_to_buffer({})", this, rt);
|
|
// This guarantees that rt starts after any emitted clustering_row
|
|
// and not before any emitted range tombstone.
|
|
position_in_partition::less_compare less(*_schema);
|
|
if (!less(_lower_bound, rt.end_position())) {
|
|
return;
|
|
}
|
|
if (!less(_lower_bound, rt.position())) {
|
|
rt.set_start(*_schema, _lower_bound);
|
|
} else {
|
|
_lower_bound = position_in_partition(rt.position());
|
|
_lower_bound_changed = true;
|
|
}
|
|
push_mutation_fragment(std::move(rt));
|
|
}
|
|
|
|
inline
|
|
void cache_flat_mutation_reader::maybe_add_to_cache(const range_tombstone& rt) {
|
|
if (can_populate()) {
|
|
clogger.trace("csm {}: maybe_add_to_cache({})", this, rt);
|
|
_lsa_manager.run_in_update_section_with_allocator([&] {
|
|
_snp->version()->partition().row_tombstones().apply_monotonically(*_schema, rt);
|
|
});
|
|
} else {
|
|
_read_context->cache().on_mispopulate();
|
|
}
|
|
}
|
|
|
|
inline
|
|
void cache_flat_mutation_reader::maybe_add_to_cache(const static_row& sr) {
|
|
if (can_populate()) {
|
|
clogger.trace("csm {}: populate({})", this, sr);
|
|
_read_context->cache().on_static_row_insert();
|
|
_lsa_manager.run_in_update_section_with_allocator([&] {
|
|
if (_read_context->digest_requested()) {
|
|
sr.cells().prepare_hash(*_schema, column_kind::static_column);
|
|
}
|
|
_snp->version()->partition().static_row().apply(*_schema, column_kind::static_column, sr.cells());
|
|
});
|
|
} else {
|
|
_read_context->cache().on_mispopulate();
|
|
}
|
|
}
|
|
|
|
inline
|
|
void cache_flat_mutation_reader::maybe_set_static_row_continuous() {
|
|
if (can_populate()) {
|
|
clogger.trace("csm {}: set static row continuous", this);
|
|
_snp->version()->partition().set_static_row_continuous(true);
|
|
} else {
|
|
_read_context->cache().on_mispopulate();
|
|
}
|
|
}
|
|
|
|
inline
|
|
bool cache_flat_mutation_reader::can_populate() const {
|
|
return _snp->at_latest_version() && _read_context->cache().phase_of(_read_context->key()) == _read_context->phase();
|
|
}
|
|
|
|
} // namespace cache
|
|
|
|
inline flat_mutation_reader make_cache_flat_mutation_reader(schema_ptr s,
|
|
dht::decorated_key dk,
|
|
query::clustering_key_filter_ranges crr,
|
|
row_cache& cache,
|
|
lw_shared_ptr<cache::read_context> ctx,
|
|
partition_snapshot_ptr snp)
|
|
{
|
|
return make_flat_mutation_reader<cache::cache_flat_mutation_reader>(
|
|
std::move(s), std::move(dk), std::move(crr), std::move(ctx), std::move(snp), cache);
|
|
}
|