sstables: Generate sharding metadata using sharder from erm when writing

We need to keep sharding metadata consistent with tablet mapping to
shards in order for node restart to detect that those sstables belong
to a single shard and that resharding is not necessary. Resharding of
sstables based on tablet metadata is not implemented yet and will
abort after this series.

Keeping sharding metadata accurate for tablets is only necessary until
compaction group integration is finished. After that, we can use the
sstable token range to determine the owning tablet and thus the owning
shard. Before that, we can't, because a single sstable may contain
keys from different tablets, and the whole key range may overlap with
keys which belong to other shards.
This commit is contained in:
Tomasz Grabiec
2023-05-11 13:43:48 +02:00
parent 36e12020b9
commit 17d6163548
5 changed files with 22 additions and 8 deletions

View File

@@ -873,6 +873,7 @@ table::try_flush_memtable_to_sstable(compaction_group& cg, lw_shared_ptr<memtabl
try {
sstables::sstable_writer_config cfg = get_sstables_manager().configure_writer("memtable");
cfg.backup = incremental_backups_enabled();
cfg.erm = _erm;
auto newtab = make_sstable();
newtabs.push_back(newtab);
@@ -2782,7 +2783,9 @@ public:
return _t.make_sstable();
}
sstables::sstable_writer_config configure_writer(sstring origin) const override {
return _t.get_sstables_manager().configure_writer(std::move(origin));
auto cfg = _t.get_sstables_manager().configure_writer(std::move(origin));
cfg.erm = _t.get_effective_replication_map();
return cfg;
}
api::timestamp_type min_memtable_timestamp() const override {
return _cg.min_memtable_timestamp();

View File

@@ -1471,7 +1471,9 @@ void writer::consume_end_of_stream() {
{ large_data_type::elements_in_collection, std::move(_elements_in_collection_entry) },
}
});
_sst.write_scylla_metadata(_shard, std::move(features), std::move(identifier), std::move(ld_stats), _cfg.origin);
const dht::sharder& sharder = _cfg.erm ? _cfg.erm->get_sharder(_schema)
: _schema.get_sharder(); // Used in tests
_sst.write_scylla_metadata(_shard, sharder, std::move(features), std::move(identifier), std::move(ld_stats), _cfg.origin);
_sst.seal_sstable(_cfg.backup).get();
}

View File

@@ -1624,11 +1624,12 @@ sstable::read_scylla_metadata() noexcept {
}
void
sstable::write_scylla_metadata(shard_id shard, sstable_enabled_features features, struct run_identifier identifier,
sstable::write_scylla_metadata(shard_id shard, const dht::sharder& sharder, sstable_enabled_features features, struct run_identifier identifier,
std::optional<scylla_metadata::large_data_stats> ld_stats, sstring origin) {
auto&& first_key = get_first_decorated_key();
auto&& last_key = get_last_decorated_key();
auto sm = create_sharding_metadata(_schema, _schema->get_sharder(), first_key, last_key, shard);
auto sm = create_sharding_metadata(_schema, sharder, first_key, last_key, shard);
// sstable write may fail to generate empty metadata if mutation source has only data from other shard.
// see https://github.com/scylladb/scylla/issues/2932 for details on how it can happen.

View File

@@ -39,6 +39,7 @@
#include "readers/flat_mutation_reader_fwd.hh"
#include "tracing/trace_state.hh"
#include "utils/updateable_value.hh"
#include "locator/abstract_replication_strategy.hh"
#include <seastar/util/optimized_optional.hh>
@@ -110,6 +111,7 @@ struct sstable_writer_config {
run_id run_identifier = run_id::create_random_id();
size_t summary_byte_cost;
sstring origin;
locator::effective_replication_map_ptr erm;
private:
explicit sstable_writer_config() {}
@@ -579,8 +581,13 @@ private:
void write_compression();
future<> read_scylla_metadata() noexcept;
void write_scylla_metadata(shard_id shard, sstable_enabled_features features, run_identifier identifier,
std::optional<scylla_metadata::large_data_stats> ld_stats, sstring origin);
void write_scylla_metadata(shard_id shard,
const dht::sharder& sharder,
sstable_enabled_features features,
run_identifier identifier,
std::optional<scylla_metadata::large_data_stats> ld_stats,
sstring origin);
future<> read_filter(sstable_open_config cfg = {});

View File

@@ -54,9 +54,10 @@ std::function<future<> (flat_mutation_reader_v2)> make_streaming_consumer(sstrin
}
schema_ptr s = reader.schema();
auto cfg = cf->get_sstables_manager().configure_writer(origin);
cfg.erm = cf->get_effective_replication_map();
return sst->write_components(std::move(reader), adjusted_estimated_partitions, s,
cf->get_sstables_manager().configure_writer(origin),
encoding_stats{}).then([sst] {
cfg, encoding_stats{}).then([sst] {
return sst->open_data();
}).then([cf, sst, offstrategy, origin] {
if (offstrategy && sstables::repair_origin == origin) {