mirror of
https://github.com/scylladb/scylladb.git
synced 2026-05-12 19:02:12 +00:00
Merge "Ensure correct EOC for PI block cell names" from Duarte
"This series ensures the always write correct cell names to promoted index cell blocks, taking into account the eoc of range tombstones. Fixes #2333" * 'pi-cell-name/v1' of github.com:duarten/scylla: tests/sstable_mutation_test: Test promoted index blocks are monotonic sstables: Consider eoc when flushing pi block sstables: Extract out converting bound_kind to eoc
This commit is contained in:
@@ -1334,6 +1334,18 @@ future<foreign_sstable_open_info> sstable::get_open_info() & {
|
||||
});
|
||||
}
|
||||
|
||||
static composite::eoc bound_kind_to_start_marker(bound_kind start_kind) {
|
||||
return start_kind == bound_kind::excl_start
|
||||
? composite::eoc::end
|
||||
: composite::eoc::start;
|
||||
}
|
||||
|
||||
static composite::eoc bound_kind_to_end_marker(bound_kind end_kind) {
|
||||
return end_kind == bound_kind::excl_end
|
||||
? composite::eoc::start
|
||||
: composite::eoc::end;
|
||||
}
|
||||
|
||||
static void output_promoted_index_entry(bytes_ostream& promoted_index,
|
||||
const bytes& first_col,
|
||||
const bytes& last_col,
|
||||
@@ -1390,8 +1402,9 @@ static bytes serialize_colname(const composite& clustering_key,
|
||||
// (which might be gone later).
|
||||
void sstable::maybe_flush_pi_block(file_writer& out,
|
||||
const composite& clustering_key,
|
||||
const std::vector<bytes_view>& column_names) {
|
||||
bytes colname = serialize_colname(clustering_key, column_names, composite::eoc::none);
|
||||
const std::vector<bytes_view>& column_names,
|
||||
composite::eoc marker) {
|
||||
bytes colname = serialize_colname(clustering_key, column_names, marker);
|
||||
if (_pi_write.block_first_colname.empty()) {
|
||||
// This is the first column in the partition, or first column since we
|
||||
// closed a promoted-index block. Remember its name and position -
|
||||
@@ -1423,7 +1436,9 @@ void sstable::maybe_flush_pi_block(file_writer& out,
|
||||
auto start = composite::from_clustering_element(*_pi_write.schemap, rt.start);
|
||||
auto end = composite::from_clustering_element(*_pi_write.schemap, rt.end);
|
||||
write_range_tombstone(out,
|
||||
start, rt.start_kind, end, rt.end_kind, {}, rt.tomb);
|
||||
start, bound_kind_to_start_marker(rt.start_kind),
|
||||
end, bound_kind_to_end_marker(rt.end_kind),
|
||||
{}, rt.tomb);
|
||||
}
|
||||
}
|
||||
_pi_write.block_next_start_offset = out.offset() + _pi_write.desired_block_size;
|
||||
@@ -1605,24 +1620,18 @@ void sstable::write_row_tombstone(file_writer& out, const composite& key, const
|
||||
|
||||
void sstable::write_range_tombstone(file_writer& out,
|
||||
const composite& start,
|
||||
bound_kind start_kind,
|
||||
composite::eoc start_marker,
|
||||
const composite& end,
|
||||
bound_kind end_kind,
|
||||
composite::eoc end_marker,
|
||||
std::vector<bytes_view> suffix,
|
||||
const tombstone t) {
|
||||
if (!t) {
|
||||
return;
|
||||
}
|
||||
|
||||
auto start_marker = start_kind == bound_kind::excl_start
|
||||
? composite::eoc::end
|
||||
: composite::eoc::start;
|
||||
write_column_name(out, start, suffix, start_marker);
|
||||
column_mask mask = column_mask::range_tombstone;
|
||||
write(out, mask);
|
||||
auto end_marker = end_kind == bound_kind::excl_end
|
||||
? composite::eoc::start
|
||||
: composite::eoc::end;
|
||||
write_column_name(out, end, suffix, end_marker);
|
||||
write_deletion_time(out, t);
|
||||
}
|
||||
@@ -1987,9 +1996,11 @@ stop_iteration components_writer::consume(range_tombstone&& rt) {
|
||||
// already closed by rt.start, so the accumulator doesn't grow boundless.
|
||||
_sst._pi_write.tombstone_accumulator->apply(rt);
|
||||
auto start = composite::from_clustering_element(_schema, std::move(rt.start));
|
||||
auto start_marker = bound_kind_to_start_marker(rt.start_kind);
|
||||
auto end = composite::from_clustering_element(_schema, std::move(rt.end));
|
||||
_sst.maybe_flush_pi_block(_out, start, {});
|
||||
_sst.write_range_tombstone(_out, std::move(start), rt.start_kind, std::move(end), rt.end_kind, {}, rt.tomb);
|
||||
auto end_marker = bound_kind_to_end_marker(rt.end_kind);
|
||||
_sst.maybe_flush_pi_block(_out, start, {}, start_marker);
|
||||
_sst.write_range_tombstone(_out, std::move(start), start_marker, std::move(end), end_marker, {}, rt.tomb);
|
||||
return stop_iteration::no;
|
||||
}
|
||||
|
||||
|
||||
@@ -512,7 +512,8 @@ private:
|
||||
|
||||
void maybe_flush_pi_block(file_writer& out,
|
||||
const composite& clustering_key,
|
||||
const std::vector<bytes_view>& column_names);
|
||||
const std::vector<bytes_view>& column_names,
|
||||
composite::eoc marker = composite::eoc::none);
|
||||
|
||||
schema_ptr _schema;
|
||||
sstring _dir;
|
||||
@@ -617,9 +618,9 @@ private:
|
||||
void write_cell(file_writer& out, atomic_cell_view cell, const column_definition& cdef);
|
||||
void write_column_name(file_writer& out, const composite& clustering_key, const std::vector<bytes_view>& column_names, composite::eoc marker = composite::eoc::none);
|
||||
void write_column_name(file_writer& out, bytes_view column_names);
|
||||
void write_range_tombstone(file_writer& out, const composite& start, bound_kind start_kind, const composite& end, bound_kind stop_kind, std::vector<bytes_view> suffix, const tombstone t);
|
||||
void write_range_tombstone(file_writer& out, const composite& start, composite::eoc start_marker, const composite& end, composite::eoc end_marker, std::vector<bytes_view> suffix, const tombstone t);
|
||||
void write_range_tombstone(file_writer& out, const composite& start, const composite& end, std::vector<bytes_view> suffix, const tombstone t) {
|
||||
write_range_tombstone(out, start, bound_kind::incl_start, end, bound_kind::incl_end, std::move(suffix), std::move(t));
|
||||
write_range_tombstone(out, start, composite::eoc::start, end, composite::eoc::end, std::move(suffix), std::move(t));
|
||||
}
|
||||
void write_collection(file_writer& out, const composite& clustering_key, const column_definition& cdef, collection_mutation_view collection);
|
||||
void write_row_tombstone(file_writer& out, const composite& key, const row_tombstone t);
|
||||
|
||||
74
tests/sstable_assertions.hh
Normal file
74
tests/sstable_assertions.hh
Normal file
@@ -0,0 +1,74 @@
|
||||
/*
|
||||
* Copyright (C) 2017 ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file is part of Scylla.
|
||||
*
|
||||
* Scylla is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Scylla is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "dht/i_partitioner.hh"
|
||||
#include "schema.hh"
|
||||
#include "sstables/index_reader.hh"
|
||||
|
||||
class index_reader_assertions {
|
||||
std::unique_ptr<sstables::index_reader> _r;
|
||||
public:
|
||||
index_reader_assertions(std::unique_ptr<sstables::index_reader> r)
|
||||
: _r(std::move(r))
|
||||
{ }
|
||||
|
||||
index_reader_assertions& has_monotonic_positions(const schema& s) {
|
||||
auto pos_cmp = position_in_partition::composite_less_compare(s);
|
||||
auto rp_cmp = dht::ring_position_comparator(s);
|
||||
auto prev = dht::ring_position::min();
|
||||
_r->read_partition_data().get();
|
||||
while (!_r->eof()) {
|
||||
auto k = _r->current_partition_entry().get_decorated_key();
|
||||
auto rp = dht::ring_position(k.token(), k.key().to_partition_key(s));
|
||||
|
||||
if (!rp_cmp(prev, rp)) {
|
||||
BOOST_FAIL(sprint("Partitions have invalid order: %s >= %s", prev, rp));
|
||||
}
|
||||
|
||||
prev = rp;
|
||||
|
||||
auto* pi = _r->current_partition_entry().get_promoted_index(s);
|
||||
if (!pi->entries.empty()) {
|
||||
auto& prev = pi->entries[0];
|
||||
for (size_t i = 1; i < pi->entries.size(); ++i) {
|
||||
auto& cur = pi->entries[i];
|
||||
if (!pos_cmp(prev.end, cur.start)) {
|
||||
std::cout << "promoted index:\n";
|
||||
for (auto& e : pi->entries) {
|
||||
std::cout << " " << e.start << "-" << e.end << ": +" << e.offset << " len=" << e.width << std::endl;
|
||||
}
|
||||
BOOST_FAIL(sprint("Index blocks are not monotonic: %s >= %s", prev.end, cur.start));
|
||||
}
|
||||
cur = prev;
|
||||
}
|
||||
}
|
||||
_r->advance_to_next_partition().get();
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
};
|
||||
|
||||
inline
|
||||
index_reader_assertions assert_that(std::unique_ptr<sstables::index_reader> r) {
|
||||
return { std::move(r) };
|
||||
}
|
||||
@@ -35,6 +35,7 @@
|
||||
#include "tmpdir.hh"
|
||||
#include "memtable-sstable.hh"
|
||||
#include "disk-error-handler.hh"
|
||||
#include "tests/sstable_assertions.hh"
|
||||
|
||||
thread_local disk_error_signal_type commit_error;
|
||||
thread_local disk_error_signal_type general_disk_error;
|
||||
@@ -801,3 +802,52 @@ SEASTAR_TEST_CASE(test_non_compound_table_row_is_not_marked_as_static) {
|
||||
BOOST_REQUIRE(bool(mut));
|
||||
});
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_promoted_index_blocks_are_monotonic) {
|
||||
return seastar::async([] {
|
||||
auto dir = make_lw_shared<tmpdir>();
|
||||
schema_builder builder("ks", "cf");
|
||||
builder.with_column("p", utf8_type, column_kind::partition_key);
|
||||
builder.with_column("c1", int32_type, column_kind::clustering_key);
|
||||
builder.with_column("c2", int32_type, column_kind::clustering_key);
|
||||
builder.with_column("v", int32_type);
|
||||
auto s = builder.build();
|
||||
|
||||
auto k = partition_key::from_exploded(*s, {to_bytes("key1")});
|
||||
auto cell = atomic_cell::make_live(1, int32_type->decompose(88), { });
|
||||
mutation m(k, s);
|
||||
|
||||
auto ck = clustering_key::from_exploded(*s, {int32_type->decompose(1), int32_type->decompose(2)});
|
||||
m.set_clustered_cell(ck, *s->get_column_definition("v"), cell);
|
||||
|
||||
ck = clustering_key::from_exploded(*s, {int32_type->decompose(1), int32_type->decompose(4)});
|
||||
m.set_clustered_cell(ck, *s->get_column_definition("v"), std::move(cell));
|
||||
|
||||
ck = clustering_key::from_exploded(*s, {int32_type->decompose(1), int32_type->decompose(6)});
|
||||
m.set_clustered_cell(ck, *s->get_column_definition("v"), std::move(cell));
|
||||
|
||||
ck = clustering_key::from_exploded(*s, {int32_type->decompose(3), int32_type->decompose(9)});
|
||||
m.set_clustered_cell(ck, *s->get_column_definition("v"), std::move(cell));
|
||||
|
||||
m.partition().apply_row_tombstone(*s, range_tombstone(
|
||||
clustering_key_prefix::from_exploded(*s, {int32_type->decompose(1)}),
|
||||
bound_kind::excl_start,
|
||||
clustering_key_prefix::from_exploded(*s, {int32_type->decompose(2)}),
|
||||
bound_kind::incl_end,
|
||||
{1, gc_clock::now()}));
|
||||
|
||||
auto mt = make_lw_shared<memtable>(s);
|
||||
mt->apply(std::move(m));
|
||||
|
||||
auto sst = make_lw_shared<sstables::sstable>(s,
|
||||
dir->path,
|
||||
1 /* generation */,
|
||||
sstables::sstable::version_types::ka,
|
||||
sstables::sstable::format_types::big);
|
||||
sstable_writer_config cfg;
|
||||
cfg.promoted_index_block_size = 1;
|
||||
sst->write_components(mt->make_reader(s), 1, s, cfg).get();
|
||||
sst->load().get();
|
||||
assert_that(sst->get_index_reader(default_priority_class())).has_monotonic_positions(*s);
|
||||
});
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user