Files
scylladb/streaming/consumer.cc
Raphael S. Carvalho b551f4abd2 streaming: Improve partition estimation with TWCS
When off-strategy is disabled, data segregation is not postponed,
meaning that getting partition estimate right is important to
decrease filter's false positives. With streaming, we don't
have min and max timestamps at destination, well, we could have
extended the RPC verb to send them, but turns out we can deduce
easily the amount of windows using default TTL. Given partitioner
random nature, it's not absurd to assume that a given range being
streamed may overlap with all windows, meaning that each range
will yield one sstable for each window when segregating incoming
data. Today, we assume the worst of 100 windows (which is the
max amount of sstables the input data can be segregated into)
due to the lack of metadata for estimating the window count.
But given that users are recommended to target a max of ~20
windows, it means partition estimate is being downsized 5x more
than needed. Let's improve it by using default TTL when
estimating window count, so even on absence of timestamp
metadata, the partition estimation won't be way off.

Fixes #15704.

Signed-off-by: Raphael S. Carvalho <raphaelsc@scylladb.com>
2023-11-08 12:10:03 +02:00

89 lines
4.2 KiB
C++

/*
* Copyright (C) 2021-present ScyllaDB
*/
/*
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
#include <seastar/core/coroutine.hh>
#include "consumer.hh"
#include "replica/database.hh"
#include "mutation/mutation_source_metadata.hh"
#include "db/view/view_update_generator.hh"
#include "db/view/view_update_checks.hh"
#include "sstables/sstables.hh"
#include "sstables/sstables_manager.hh"
namespace streaming {
std::function<future<> (flat_mutation_reader_v2)> make_streaming_consumer(sstring origin,
sharded<replica::database>& db,
sharded<db::system_distributed_keyspace>& sys_dist_ks,
sharded<db::view::view_update_generator>& vug,
uint64_t estimated_partitions,
stream_reason reason,
sstables::offstrategy offstrategy) {
return [&db, &sys_dist_ks, &vug, estimated_partitions, reason, offstrategy, origin = std::move(origin)] (flat_mutation_reader_v2 reader) -> future<> {
std::exception_ptr ex;
try {
auto cf = db.local().find_column_family(reader.schema()).shared_from_this();
auto use_view_update_path = co_await db::view::check_needs_view_update_path(sys_dist_ks.local(), db.local().get_token_metadata(), *cf, reason);
//FIXME: for better estimations this should be transmitted from remote
auto metadata = mutation_source_metadata{};
auto& cs = cf->get_compaction_strategy();
// Data segregation is postponed to happen during off-strategy if latter is enabled, which
// means partition estimation shouldn't be adjusted.
const auto adjusted_estimated_partitions = (offstrategy) ? estimated_partitions : cs.adjust_partition_estimate(metadata, estimated_partitions, cf->schema());
auto make_interposer_consumer = [&cs, offstrategy] (const mutation_source_metadata& ms_meta, reader_consumer_v2 end_consumer) mutable {
if (offstrategy) {
return end_consumer;
}
return cs.make_interposer_consumer(ms_meta, std::move(end_consumer));
};
auto consumer = make_interposer_consumer(metadata,
[cf = std::move(cf), adjusted_estimated_partitions, use_view_update_path, &vug, origin = std::move(origin), offstrategy] (flat_mutation_reader_v2 reader) {
sstables::shared_sstable sst;
try {
sst = use_view_update_path ? cf->make_streaming_staging_sstable() : cf->make_streaming_sstable_for_write();
} catch (...) {
return current_exception_as_future().finally([reader = std::move(reader)] () mutable {
return reader.close();
});
}
schema_ptr s = reader.schema();
auto cfg = cf->get_sstables_manager().configure_writer(origin);
cfg.erm = cf->get_effective_replication_map();
return sst->write_components(std::move(reader), adjusted_estimated_partitions, s,
cfg, encoding_stats{}).then([sst] {
return sst->open_data();
}).then([cf, sst, offstrategy, origin] {
if (offstrategy && sstables::repair_origin == origin) {
sstables::sstlog.debug("Enabled automatic off-strategy trigger for table {}.{}",
cf->schema()->ks_name(), cf->schema()->cf_name());
cf->enable_off_strategy_trigger();
}
return cf->add_sstable_and_update_cache(sst, offstrategy);
}).then([cf, s, sst, use_view_update_path, &vug]() mutable -> future<> {
if (!use_view_update_path) {
return make_ready_future<>();
}
return vug.local().register_staging_sstable(sst, std::move(cf));
});
});
co_return co_await consumer(std::move(reader));
} catch (...) {
ex = std::current_exception();
}
if (ex) {
co_await reader.close();
std::rethrow_exception(std::move(ex));
}
};
}
}