Files
scylladb/sstables/sstable_mutation_reader.hh
Botond Dénes 4b222e7f37 sstables: move mp_row_consumer_reader_k_l to kl/reader.cc
Its only user is in said file, so that is a better place for it.
2022-04-28 14:12:24 +03:00

203 lines
9.4 KiB
C++

/*
* Copyright (C) 2015-present ScyllaDB
*/
/*
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
#include "mutation.hh"
#include "sstables.hh"
#include "types.hh"
#include <seastar/core/future-util.hh>
#include <seastar/core/coroutine.hh>
#include "key.hh"
#include "keys.hh"
#include <seastar/core/do_with.hh>
#include "unimplemented.hh"
#include "dht/i_partitioner.hh"
#include <seastar/core/byteorder.hh>
#include "index_reader.hh"
#include "counters.hh"
#include "utils/data_input.hh"
#include "clustering_ranges_walker.hh"
#include "binary_search.hh"
#include "../dht/i_partitioner.hh"
#include "sstables/mx/partition_reversing_data_source.hh"
namespace sstables {
namespace kl {
class mp_row_consumer_k_l;
}
namespace mx {
class mp_row_consumer_m;
}
class mp_row_consumer_reader_base {
protected:
shared_sstable _sst;
// Whether index lower bound is in current partition
bool _index_in_current_partition = false;
// True iff the consumer finished generating fragments for a partition and hasn't
// entered the new partition yet.
// Implies that partition_end was emitted for the last partition.
// Will cause the reader to skip to the next partition if !_before_partition.
bool _partition_finished = true;
// When set, the consumer is positioned right before a partition or at end of the data file.
// _index_in_current_partition applies to the partition which is about to be read.
bool _before_partition = true;
std::optional<dht::decorated_key> _current_partition_key;
public:
mp_row_consumer_reader_base(shared_sstable sst)
: _sst(std::move(sst))
{ }
// Called when all fragments relevant to the query range or fast forwarding window
// within the current partition have been pushed.
// If no skipping is required, this method may not be called before transitioning
// to the next partition.
virtual void on_out_of_clustering_range() = 0;
};
inline atomic_cell make_atomic_cell(const abstract_type& type,
api::timestamp_type timestamp,
fragmented_temporary_buffer::view value,
gc_clock::duration ttl,
gc_clock::time_point expiration,
atomic_cell::collection_member cm) {
if (ttl != gc_clock::duration::zero()) {
return atomic_cell::make_live(type, timestamp, value, expiration, ttl, cm);
} else {
return atomic_cell::make_live(type, timestamp, value, cm);
}
}
atomic_cell make_counter_cell(api::timestamp_type timestamp, fragmented_temporary_buffer::view value);
position_in_partition_view get_slice_upper_bound(const schema& s, const query::partition_slice& slice, dht::ring_position_view key);
// data_consume_rows() iterates over rows in the data file from
// a particular range, feeding them into the consumer. The iteration is
// done as efficiently as possible - reading only the data file (not the
// summary or index files) and reading data in batches.
//
// The consumer object may request the iteration to stop before reaching
// the end of the requested data range (e.g. stop after each sstable row).
// A context object is returned which allows to resume this consumption:
// This context's read() method requests that consumption begins, and
// returns a future which will be resolved when it ends (because the
// consumer asked to stop, or the data range ended). Only after the
// returned future is resolved, may read() be called again to consume
// more.
// The caller must ensure (e.g., using do_with()) that the context object,
// as well as the sstable, remains alive as long as a read() is in
// progress (i.e., returned a future which hasn't completed yet).
//
// The "toread" range specifies the range we want to read initially.
// However, the object returned by the read, a data_consume_context, also
// provides a fast_forward_to(start,end) method which allows resetting
// the reader to a new range. To allow that, we also have a "last_end"
// byte which should be the last end to which fast_forward_to is
// eventually allowed. If last_end==end, fast_forward_to is not allowed
// at all, if last_end==file_size fast_forward_to is allowed until the
// end of the file, and it can be something in between if we know that we
// are planning to skip parts, but eventually read until last_end.
// When last_end==end, we guarantee that the read will only read the
// desired byte range from disk. However, when last_end > end, we may
// read beyond end in anticipation of a small skip via fast_foward_to.
// The amount of this excessive read is controlled by read ahead
// hueristics which learn from the usefulness of previous read aheads.
template <typename DataConsumeRowsContext>
inline std::unique_ptr<DataConsumeRowsContext> data_consume_rows(const schema& s, shared_sstable sst, typename DataConsumeRowsContext::consumer& consumer, sstable::disk_read_range toread, uint64_t last_end) {
// Although we were only asked to read until toread.end, we'll not limit
// the underlying file input stream to this end, but rather to last_end.
// This potentially enables read-ahead beyond end, until last_end, which
// can be beneficial if the user wants to fast_forward_to() on the
// returned context, and may make small skips.
auto input = sst->data_stream(toread.start, last_end - toread.start, consumer.io_priority(),
consumer.permit(), consumer.trace_state(), sst->_partition_range_history);
return std::make_unique<DataConsumeRowsContext>(s, std::move(sst), consumer, std::move(input), toread.start, toread.end - toread.start);
}
template <typename DataConsumeRowsContext>
struct reversed_context {
std::unique_ptr<DataConsumeRowsContext> the_context;
// Underneath, the context is iterating over the sstable file in reverse order.
// This points to the current position of the context over the underlying sstable file;
// either the end of partition or the beginning of some row (never in the middle of a row).
// The reference is valid as long as the context is alive.
const uint64_t& current_position_in_sstable;
};
// See `sstables::mx::make_partition_reversing_data_source` for documentation.
template <typename DataConsumeRowsContext>
inline reversed_context<DataConsumeRowsContext> data_consume_reversed_partition(
const schema& s, shared_sstable sst, index_reader& ir,
typename DataConsumeRowsContext::consumer& consumer, sstable::disk_read_range toread) {
auto reversing_data_source = sstables::mx::make_partition_reversing_data_source(
s, sst, ir, toread.start, toread.end - toread.start,
consumer.permit(), consumer.io_priority(), consumer.trace_state());
return reversed_context<DataConsumeRowsContext> {
.the_context = std::make_unique<DataConsumeRowsContext>(
s, std::move(sst), consumer, input_stream<char>(std::move(reversing_data_source.the_source)),
toread.start, toread.end - toread.start),
.current_position_in_sstable = reversing_data_source.current_position_in_sstable
};
}
template <typename DataConsumeRowsContext>
inline std::unique_ptr<DataConsumeRowsContext> data_consume_single_partition(const schema& s, shared_sstable sst, typename DataConsumeRowsContext::consumer& consumer, sstable::disk_read_range toread) {
auto input = sst->data_stream(toread.start, toread.end - toread.start, consumer.io_priority(),
consumer.permit(), consumer.trace_state(), sst->_single_partition_history);
return std::make_unique<DataConsumeRowsContext>(s, std::move(sst), consumer, std::move(input), toread.start, toread.end - toread.start);
}
// Like data_consume_rows() with bounds, but iterates over whole range
template <typename DataConsumeRowsContext>
inline std::unique_ptr<DataConsumeRowsContext> data_consume_rows(const schema& s, shared_sstable sst, typename DataConsumeRowsContext::consumer& consumer) {
auto data_size = sst->data_size();
return data_consume_rows<DataConsumeRowsContext>(s, std::move(sst), consumer, {0, data_size}, data_size);
}
template<typename T>
concept RowConsumer =
requires(T t,
const partition_key& pk,
position_range cr) {
{ t.io_priority() } -> std::convertible_to<const io_priority_class&>;
{ t.is_mutation_end() } -> std::same_as<bool>;
{ t.setup_for_partition(pk) } -> std::same_as<void>;
{ t.push_ready_fragments() } -> std::same_as<void>;
{ t.maybe_skip() } -> std::same_as<std::optional<position_in_partition_view>>;
{ t.fast_forward_to(std::move(cr)) } -> std::same_as<std::optional<position_in_partition_view>>;
};
/*
* Helper method to set or reset the range tombstone start bound according to the
* end open marker of a promoted index block.
*
* Only applies to consumers that have the following methods:
* void reset_range_tombstone_start();
* void set_range_tombstone_start(clustering_key_prefix, bound_kind, tombstone);
*
* For other consumers, it is a no-op.
*/
template <typename Consumer>
void set_range_tombstone_start_from_end_open_marker(Consumer& c, const schema& s, const index_reader& idx) {
if constexpr (Consumer::is_setting_range_tombstone_start_supported) {
auto open_end_marker = idx.end_open_marker();
if (open_end_marker) {
auto[pos, tomb] = *open_end_marker;
c.set_range_tombstone_start(tomb);
}
}
}
}