Files
scylladb/sstables/data_consume_context.hh
Pavel Emelyanov 812eed27fe code: Force formatting of pointer in .debug and .trace
... and tests. Printin a pointer in logs is considered to be a bad practice,
so the proposal is to keep this explicit (with fmt::ptr) and allow it for
.debug and .trace cases.

Signed-off-by: Pavel Emelyanov <xemul@scylladb.com>
2020-08-26 20:44:11 +03:00

199 lines
9.0 KiB
C++

/*
* Copyright (C) 2018 ScyllaDB
*
*/
/*
* This file is part of Scylla.
*
* Scylla is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Scylla is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include <memory>
#include <seastar/core/future.hh>
#include <seastar/util/optimized_optional.hh>
#include "shared_sstable.hh"
#include "row.hh"
#include "sstables.hh"
namespace sstables {
class reader_position_tracker;
template <typename DataConsumeRowsContext>
data_consume_context<DataConsumeRowsContext>
data_consume_rows(const schema&, shared_sstable, typename DataConsumeRowsContext::consumer&, sstable::disk_read_range, uint64_t);
template <typename DataConsumeRowsContext>
data_consume_context<DataConsumeRowsContext>
data_consume_single_partition(const schema&, shared_sstable, typename DataConsumeRowsContext::consumer&, sstable::disk_read_range);
template <typename DataConsumeRowsContext>
data_consume_context<DataConsumeRowsContext>
data_consume_rows(const schema&, shared_sstable, typename DataConsumeRowsContext::consumer&);
// data_consume_context is an object returned by sstable::data_consume_rows()
// which allows knowing when the consumer stops reading, and starting it again
// (e.g., when the consumer wants to stop after every sstable row).
//
// The read() method initiates reading into the consumer, and continues to
// read and feed data into the consumer until one of the consumer's callbacks
// requests to stop, or until we reach the end of the data range originally
// requested. read() returns a future which completes when reading stopped.
// If we're at the end-of-file, the read may complete without reading anything
// so it's the consumer class's task to check if anything was consumed.
// Note:
// The caller MUST ensure that between calling read() on this object,
// and the time the returned future is completed, the object lives on.
// Moreover, the sstable object used for the sstable::data_consume_rows()
// call which created this data_consume_context, must also be kept alive.
template <typename DataConsumeRowsContext>
requires ConsumeRowsContext<DataConsumeRowsContext>
class data_consume_context {
shared_sstable _sst;
std::unique_ptr<DataConsumeRowsContext> _ctx;
template <typename Consumer>
data_consume_context(const schema& s, shared_sstable sst, Consumer &consumer, input_stream<char> &&input, uint64_t start, uint64_t maxlen)
: _sst(std::move(sst))
, _ctx(std::make_unique<DataConsumeRowsContext>(s, _sst, consumer, std::move(input), start, maxlen))
{ }
friend class sstable;
friend data_consume_context<DataConsumeRowsContext>
data_consume_rows<DataConsumeRowsContext>(const schema&, shared_sstable, typename DataConsumeRowsContext::consumer&, sstable::disk_read_range, uint64_t);
friend data_consume_context<DataConsumeRowsContext>
data_consume_single_partition<DataConsumeRowsContext>(const schema&, shared_sstable, typename DataConsumeRowsContext::consumer&, sstable::disk_read_range);
friend data_consume_context<DataConsumeRowsContext>
data_consume_rows<DataConsumeRowsContext>(const schema&, shared_sstable, typename DataConsumeRowsContext::consumer&);
data_consume_context() = default;
explicit operator bool() const noexcept {
return bool(_ctx);
}
friend class optimized_optional<data_consume_context<DataConsumeRowsContext>>;
public:
future<> read() {
return _ctx->consume_input();
}
future<> fast_forward_to(uint64_t begin, uint64_t end) {
_ctx->reset(indexable_element::partition);
return _ctx->fast_forward_to(begin, end);
}
bool need_skip(uint64_t pos) const {
return pos > _ctx->position();
}
future<> skip_to(indexable_element el, uint64_t begin) {
sstlog.trace("data_consume_rows_context {}: skip_to({} -> {}, el={})", fmt::ptr(_ctx.get()), _ctx->position(), begin, static_cast<int>(el));
if (begin <= _ctx->position()) {
return make_ready_future<>();
}
_ctx->reset(el);
return _ctx->skip_to(begin);
}
const reader_position_tracker &reader_position() const {
return _ctx->reader_position();
}
bool eof() const {
return _ctx->eof();
}
~data_consume_context() {
if (_ctx) {
auto f = _ctx->close();
//FIXME: discarded future.
(void)f.handle_exception([ctx = std::move(_ctx), sst = std::move(_sst)](auto) {});
}
}
data_consume_context(data_consume_context &&) noexcept = default;
data_consume_context &operator=(data_consume_context &&) noexcept = default;
};
template <typename DataConsumeRowsContext>
using data_consume_context_opt = optimized_optional<data_consume_context<DataConsumeRowsContext>>;
// data_consume_rows() iterates over rows in the data file from
// a particular range, feeding them into the consumer. The iteration is
// done as efficiently as possible - reading only the data file (not the
// summary or index files) and reading data in batches.
//
// The consumer object may request the iteration to stop before reaching
// the end of the requested data range (e.g. stop after each sstable row).
// A context object is returned which allows to resume this consumption:
// This context's read() method requests that consumption begins, and
// returns a future which will be resolved when it ends (because the
// consumer asked to stop, or the data range ended). Only after the
// returned future is resolved, may read() be called again to consume
// more.
// The caller must ensure (e.g., using do_with()) that the context object,
// as well as the sstable, remains alive as long as a read() is in
// progress (i.e., returned a future which hasn't completed yet).
//
// The "toread" range specifies the range we want to read initially.
// However, the object returned by the read, a data_consume_context, also
// provides a fast_forward_to(start,end) method which allows resetting
// the reader to a new range. To allow that, we also have a "last_end"
// byte which should be the last end to which fast_forward_to is
// eventually allowed. If last_end==end, fast_forward_to is not allowed
// at all, if last_end==file_size fast_forward_to is allowed until the
// end of the file, and it can be something in between if we know that we
// are planning to skip parts, but eventually read until last_end.
// When last_end==end, we guarantee that the read will only read the
// desired byte range from disk. However, when last_end > end, we may
// read beyond end in anticipation of a small skip via fast_foward_to.
// The amount of this excessive read is controlled by read ahead
// hueristics which learn from the usefulness of previous read aheads.
template <typename DataConsumeRowsContext>
inline data_consume_context<DataConsumeRowsContext> data_consume_rows(const schema& s, shared_sstable sst, typename DataConsumeRowsContext::consumer& consumer, sstable::disk_read_range toread, uint64_t last_end) {
// Although we were only asked to read until toread.end, we'll not limit
// the underlying file input stream to this end, but rather to last_end.
// This potentially enables read-ahead beyond end, until last_end, which
// can be beneficial if the user wants to fast_forward_to() on the
// returned context, and may make small skips.
auto input = sst->data_stream(toread.start, last_end - toread.start, consumer.io_priority(),
consumer.permit(), consumer.trace_state(), sst->_partition_range_history);
return {s, std::move(sst), consumer, std::move(input), toread.start, toread.end - toread.start };
}
template <typename DataConsumeRowsContext>
inline data_consume_context<DataConsumeRowsContext> data_consume_single_partition(const schema& s, shared_sstable sst, typename DataConsumeRowsContext::consumer& consumer, sstable::disk_read_range toread) {
auto input = sst->data_stream(toread.start, toread.end - toread.start, consumer.io_priority(),
consumer.permit(), consumer.trace_state(), sst->_single_partition_history);
return {s, std::move(sst), consumer, std::move(input), toread.start, toread.end - toread.start };
}
// Like data_consume_rows() with bounds, but iterates over whole range
template <typename DataConsumeRowsContext>
inline data_consume_context<DataConsumeRowsContext> data_consume_rows(const schema& s, shared_sstable sst, typename DataConsumeRowsContext::consumer& consumer) {
auto data_size = sst->data_size();
return data_consume_rows<DataConsumeRowsContext>(s, std::move(sst), consumer, {0, data_size}, data_size);
}
}