mirror of
https://github.com/scylladb/scylladb.git
synced 2026-05-25 09:11:10 +00:00
212 lines
11 KiB
C++
212 lines
11 KiB
C++
/*
|
|
* Copyright (C) 2025-present ScyllaDB
|
|
*/
|
|
|
|
/*
|
|
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.1
|
|
*/
|
|
|
|
#include "sstable_dict_autotrainer.hh"
|
|
#include "gms/feature_service.hh"
|
|
#include "service/storage_service.hh"
|
|
#include <seastar/core/sleep.hh>
|
|
#include "schema/schema_builder.hh"
|
|
#include "sstables/sstables_manager.hh"
|
|
#include <seastar/coroutine/maybe_yield.hh>
|
|
|
|
static seastar::logger alogger("sstable_dict_autotrainer");
|
|
|
|
sstable_dict_autotrainer::sstable_dict_autotrainer(service::storage_service& ss, service::raft_group0_client& rgc, config cfg)
|
|
: _ss(ss)
|
|
, _group0_client(rgc)
|
|
, _cfg(std::move(cfg))
|
|
, _fiber(run())
|
|
{}
|
|
|
|
future<> sstable_dict_autotrainer::tick() {
|
|
alogger.debug("sstable_dict_autotrainer::tick(): entering");
|
|
auto dict_aware_tables = std::vector<schema_ptr>();
|
|
_ss.get_database().get_tables_metadata().for_each_table([&dict_aware_tables] (table_id id, lw_shared_ptr<replica::table> table) {
|
|
if (is_system_keyspace(table->schema()->ks_name())) {
|
|
return;
|
|
}
|
|
auto params = table->schema()->get_compressor_params();
|
|
auto compression_algo = params.get_algorithm();
|
|
if (compression_algo == compressor::algorithm::lz4_with_dicts || compression_algo == compressor::algorithm::zstd_with_dicts) {
|
|
alogger.debug("sstable_dict_autotrainer::tick(): {}.{} is dict-aware", table->schema()->ks_name(), table->schema()->cf_name());
|
|
dict_aware_tables.push_back(table->schema());
|
|
} else {
|
|
alogger.debug("sstable_dict_autotrainer::tick(): {}.{} is not dict-aware", table->schema()->ks_name(), table->schema()->cf_name());
|
|
}
|
|
});
|
|
for (const auto& s : dict_aware_tables) {
|
|
auto dict_name = fmt::format("sstables/{}", s->id());
|
|
auto current_dict_timestamp = co_await _ss.get_system_keyspace().query_dict_timestamp(dict_name);
|
|
if (current_dict_timestamp) {
|
|
alogger.debug("sstable_dict_autotrainer::tick(): {}.{} has timestamp {}", s->ks_name(), s->cf_name(), current_dict_timestamp);
|
|
auto delta = (db_clock::now() - *current_dict_timestamp);
|
|
auto min_delta = std::chrono::duration<float>(_cfg.retrain_period_in_seconds());
|
|
if (delta < min_delta) {
|
|
alogger.debug("sstable_dict_autotrainer::tick(): {}.{} has dict younger than {}, not updating", s->ks_name(), s->cf_name(), min_delta);
|
|
continue;
|
|
}
|
|
} else {
|
|
alogger.debug("sstable_dict_autotrainer::tick(): {}.{} has no dict", s->ks_name(), s->cf_name());
|
|
}
|
|
alogger.debug("sstable_dict_autotrainer::tick(): attempting to update the dict for {}.{}", s->ks_name(), s->cf_name());
|
|
auto estimated_size = co_await _ss.estimate_total_sstable_volume(s->id());
|
|
alogger.debug("sstable_dict_autotrainer::tick(): {}.{}: estimated_size={}", s->ks_name(), s->cf_name(), estimated_size);
|
|
auto min_dataset_size = _cfg.min_dataset_bytes();
|
|
if (estimated_size < min_dataset_size) {
|
|
alogger.debug("sstable_dict_autotrainer::tick(): {}.{}: dataset not big enough yet (< {}), giving up on the update", s->ks_name(), s->cf_name(), min_dataset_size);
|
|
continue;
|
|
}
|
|
auto params = s->get_compressor_params();
|
|
auto ticket = co_await get_units(_ss.get_do_sample_sstables_concurrency_limiter(), 1);
|
|
// When we sample a block from a set of files, we can think of this block's
|
|
// compression ratio like about a random variable, with value in [0;1] and
|
|
// some underlying distribution.
|
|
//
|
|
// By sampling, we are trying to estimate the mean of this distribution.
|
|
//
|
|
// By Hoeffding's inequality,
|
|
// for random variables with values in [0;1],
|
|
// with n samples,
|
|
// the probability that the sample mean will be off by more than `t`
|
|
// (w.r.t. the real mean) is smaller than `2*e**(2*n*t**2)`.
|
|
//
|
|
// From this, we can pick 4096 as a reasonable number of samples.
|
|
// It should be small enough not to cause performance problems,
|
|
// and it should give a good confidence interval
|
|
// (probability of 0.01 error: 50%)
|
|
// (probability of 0.02 error: 4%)
|
|
// (probability of 0.03 error: 0.1%)
|
|
// even in the worst possible distributions.
|
|
uint64_t target_number_of_samples = 4096;
|
|
// Arbitrary memory consumption limit. Goals:
|
|
// - Avoid straining the node too much.
|
|
// - In realistic setups, get enough samples.
|
|
//
|
|
// For the second goal, it should be enough if:
|
|
// - With the minimum recommended shard memory (1 GiB) and default `chunk_len` (4 kiB),
|
|
// we can collect 4096 samples.
|
|
// - With the usual shard memory (AWS i4i instances have 8 GiB per shard)
|
|
// and big `chunk_len` (32 kiB), we can collect 4096 samples.
|
|
//
|
|
// And 0.02 is big enough for those.
|
|
//
|
|
// In practice, even 0.02 is probably overkill. 4096 should be good enough even
|
|
// for the highest possible compression ratio variance across chunks.
|
|
// In practice, the compression ratio is probably normally distributed,
|
|
// and a much, much smaller number of samples would be enough.
|
|
uint64_t max_number_of_samples = 0.02 * seastar::memory::stats().total_memory() / params.chunk_length();
|
|
auto n_chunks = std::min<uint64_t>(target_number_of_samples, max_number_of_samples);
|
|
alogger.debug("sstable_dict_autotrainer::tick(): {}.{}: sampling for training with chunk_length={}, n_chunks={}", s->ks_name(), s->cf_name(), params.chunk_length(), n_chunks);
|
|
auto training_sample = co_await _ss.do_sample_sstables(s->id(), params.chunk_length(), n_chunks);
|
|
alogger.debug("sstable_dict_autotrainer::tick(): {}.{}: got training sample with {} chunks", s->ks_name(), s->cf_name(), training_sample.size());
|
|
if (training_sample.size() < n_chunks) {
|
|
alogger.debug("sstable_dict_autotrainer::tick(): {}.{}: not enough chunks, giving up on the update", s->ks_name(), s->cf_name());
|
|
continue;
|
|
}
|
|
auto dict = co_await _ss.train_dict(std::move(training_sample));
|
|
alogger.debug("sstable_dict_autotrainer::tick(): {}.{}: trained dict of size {}", s->ks_name(), s->cf_name(), dict.size());
|
|
alogger.debug("sstable_dict_autotrainer::tick(): {}.{}: sampling for validation with chunk_length={}, n_chunks={}", s->ks_name(), s->cf_name(), params.chunk_length(), n_chunks);
|
|
auto validation_sample = co_await _ss.do_sample_sstables(s->id(), params.chunk_length(), n_chunks);
|
|
alogger.debug("sstable_dict_autotrainer::tick(): {}.{}: got validation sample with {} chunks", s->ks_name(), s->cf_name(), validation_sample.size());
|
|
if (validation_sample.size() < n_chunks) {
|
|
alogger.debug("sstable_dict_autotrainer::tick(): {}.{}: not enough chunks, giving up on the update", s->ks_name(), s->cf_name());
|
|
continue;
|
|
}
|
|
auto ratio_before = co_await try_one_compression_config(_ss.get_database().get_user_sstables_manager().get_compressor_factory(), s, params, validation_sample);
|
|
auto ratio_after = co_await try_one_compression_config(dict, s, params, validation_sample);
|
|
float threshold = _cfg.min_improvement_factor() * ratio_before;
|
|
alogger.debug("sstable_dict_autotrainer::tick(): {}.{}: ratio_before={}, ratio_after={}, update_threshold={}", s->ks_name(), s->cf_name(), ratio_before, ratio_after, threshold);
|
|
if (ratio_after > threshold) {
|
|
alogger.debug("sstable_dict_autotrainer::tick(): {}.{}: ratio_after below threshold. Discarding the new dict.", s->ks_name(), s->cf_name());
|
|
continue;
|
|
}
|
|
alogger.debug("sstable_dict_autotrainer::tick(): {}.{}: Publishing the new dict.", s->ks_name(), s->cf_name());
|
|
co_await _ss.publish_new_sstable_dict(s->id(), dict, _group0_client);
|
|
}
|
|
}
|
|
|
|
future<> sstable_dict_autotrainer::run() {
|
|
SCYLLA_ASSERT(this_shard_id() == 0);
|
|
alogger.debug("sstable_dict_autotrainer::run(): starting");
|
|
std::default_random_engine rng(0);
|
|
while (!_as.abort_requested()) {
|
|
try {
|
|
auto tick_period = std::chrono::duration<float>(_cfg.tick_period_in_seconds());
|
|
auto tick_period_ms = std::chrono::duration_cast<std::chrono::milliseconds>(tick_period);
|
|
co_await seastar::sleep_abortable(tick_period_ms, _as);
|
|
if (_ss.is_raft_leader() && _ss.get_feature_service().sstable_compression_dicts) {
|
|
co_await tick();
|
|
}
|
|
} catch (const abort_requested_exception&) {
|
|
alogger.debug("sstable_dict_autotrainer::run(): exiting via abort_requested_exception");
|
|
} catch (const gate_closed_exception&) {
|
|
alogger.debug("sstable_dict_autotrainer::run(): exiting via gate_closed_exception");
|
|
} catch (const raft::stopped_error&) {
|
|
alogger.debug("sstable_dict_autotrainer::run(): exiting via raft::stopped_error");
|
|
} catch (...) {
|
|
alogger.debug("sstable_dict_autotrainer::run(): tick() failed with: {}", std::current_exception());
|
|
}
|
|
}
|
|
}
|
|
|
|
future<> sstable_dict_autotrainer::stop() {
|
|
_as.request_abort();
|
|
return std::move(_fiber);
|
|
}
|
|
|
|
future<float> try_one_compression_config(
|
|
sstable_compressor_factory& factory,
|
|
schema_ptr initial_schema,
|
|
const compression_parameters& params,
|
|
const utils::chunked_vector<temporary_buffer<char>>& validation_samples
|
|
) {
|
|
auto modified_schema = schema_builder(initial_schema).set_compressor_params(params).build();
|
|
auto compressor = co_await factory.make_compressor_for_writing(modified_schema);
|
|
size_t raw_size = 0;
|
|
size_t compressed_size = 0;
|
|
std::vector<char> tmp;
|
|
for (const auto& s : validation_samples) {
|
|
auto chunk_len = params.chunk_length();
|
|
for (size_t offset = 0; offset < s.size(); offset += chunk_len) {
|
|
auto frag = std::string_view(s.get(), s.size()).substr(offset);
|
|
frag = frag.substr(0, std::min<size_t>(chunk_len, frag.size()));
|
|
raw_size += frag.size();
|
|
tmp.resize(compressor->compress_max_size(frag.size()));
|
|
compressed_size += compressor->compress(frag.data(), frag.size(), tmp.data(), tmp.size());
|
|
}
|
|
co_await coroutine::maybe_yield();
|
|
}
|
|
if (raw_size == 0) {
|
|
co_return 0.0f;
|
|
}
|
|
co_return float(compressed_size) / raw_size;
|
|
}
|
|
|
|
future<float> try_one_compression_config(
|
|
std::span<std::byte> dict,
|
|
schema_ptr initial_schema,
|
|
const compression_parameters& params,
|
|
const utils::chunked_vector<temporary_buffer<char>>& validation_samples
|
|
) {
|
|
sharded<default_sstable_compressor_factory> factory;
|
|
co_await factory.start();
|
|
std::exception_ptr ex;
|
|
float result;
|
|
try {
|
|
co_await factory.local().set_recommended_dict(initial_schema->id(), dict);
|
|
result = co_await try_one_compression_config(factory.local(), initial_schema, params, validation_samples);
|
|
} catch (...) {
|
|
ex = std::current_exception();
|
|
}
|
|
co_await factory.stop();
|
|
if (ex) {
|
|
co_return coroutine::exception(ex);
|
|
}
|
|
co_return result;
|
|
}
|