Currently, it may happen that the last promoted index block includes the partition_end marker. That's because we first write the partition end marker and then emit the unclosed block. This behavior matches Cassandra (checked in 3.x and 5.0.1). This is problematic for ruling out data file reads based on index. The width field is currently unused, but it will be used later where the width of the last block is used to compute the skip position past the last block for lookups which land after all keys in the partition. If width includes the marker then such a skip would land in the next partition, which is incorrect, as the reader context expects a cell element. Even if that was recognized, it's wrong - if this is not a single partition read (so upper bound is not at the next partition too), then we would read from the wrong (next) partition. We want to be able to make such skips in order to avoid unnecessary data file IO for reads of missing rows. Currently, we would always read the last block even if the key is past its "end" position. Another way to solve this would be to propagate the "past the last block" condition from the index cursor to the reader and let it deal with it, but the logic for that would be complicated. With this fix, there is no special logic required.
54 lines
1.7 KiB
C++
54 lines
1.7 KiB
C++
/*
|
|
* Copyright (C) 2018-present ScyllaDB
|
|
*/
|
|
|
|
/*
|
|
* SPDX-License-Identifier: AGPL-3.0-or-later
|
|
*/
|
|
|
|
#pragma once
|
|
|
|
#include "sstable_writer.hh"
|
|
#include "sstables_manager.hh"
|
|
#include "schema/schema_fwd.hh"
|
|
#include "mutation/mutation_fragment.hh"
|
|
#include "metadata_collector.hh"
|
|
#include "mutation/mutation_fragment_stream_validator.hh"
|
|
|
|
namespace sstables {
|
|
|
|
struct sstable_writer::writer_impl {
|
|
sstable& _sst;
|
|
const schema& _schema;
|
|
const sstable_writer_config _cfg;
|
|
// NOTE: _collector and _c_stats are used to generation of statistics file
|
|
// when writing a new sstable.
|
|
metadata_collector _collector;
|
|
column_stats _c_stats;
|
|
mutation_fragment_stream_validating_filter _validator;
|
|
sstable_enabled_features _features = sstable_enabled_features::all();
|
|
|
|
writer_impl(sstable& sst, const schema& schema, const sstable_writer_config& cfg)
|
|
: _sst(sst)
|
|
, _schema(schema)
|
|
, _cfg(cfg)
|
|
, _collector(_schema, sst.get_filename(), sst.manager().get_local_host_id())
|
|
, _validator(format("sstable writer {}", _sst.get_filename()), _schema, _cfg.validation_level)
|
|
{
|
|
if (!cfg.correct_pi_block_width) {
|
|
_features.disable(CorrectLastPiBlockWidth);
|
|
}
|
|
}
|
|
|
|
virtual void consume_new_partition(const dht::decorated_key& dk) = 0;
|
|
virtual void consume(tombstone t) = 0;
|
|
virtual stop_iteration consume(static_row&& sr) = 0;
|
|
virtual stop_iteration consume(clustering_row&& cr) = 0;
|
|
virtual stop_iteration consume(range_tombstone_change&& rtc) = 0;
|
|
virtual stop_iteration consume_end_of_partition() = 0;
|
|
virtual void consume_end_of_stream() = 0;
|
|
virtual ~writer_impl() {}
|
|
};
|
|
|
|
}
|