Files
scylladb/test/perf/perf_commitlog.cc
Avi Kivity f3eade2f62 treewide: relicense to ScyllaDB-Source-Available-1.0
Drop the AGPL license in favor of a source-available license.
See the blog post [1] for details.

[1] https://www.scylladb.com/2024/12/18/why-were-moving-to-a-source-available-license/
2024-12-18 17:45:13 +02:00

281 lines
12 KiB
C++

/*
* Copyright (C) 2015-present ScyllaDB
*/
/*
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
*/
#include <fstream>
#include <fmt/ranges.h>
#include <boost/algorithm/string/split.hpp>
#include <boost/algorithm/string/classification.hpp>
#include <json/json.h>
#include <seastar/core/app-template.hh>
#include <seastar/core/coroutine.hh>
#include <seastar/core/reactor.hh>
#include <seastar/core/on_internal_error.hh>
#include <seastar/testing/test_runner.hh>
#include "test/lib/tmpdir.hh"
#include "test/perf/perf.hh"
#include "test/lib/random_utils.hh"
#include "db/config.hh"
#include "schema/schema_builder.hh"
#include "release.hh"
#include "db/config.hh"
#include "db/extensions.hh"
#include "db/commitlog/commitlog.hh"
#include "utils/assert.hh"
#include "utils/UUID_gen.hh"
struct test_config {
unsigned concurrency;
unsigned duration_in_seconds;
unsigned operations_per_shard = 0;
size_t min_data_size;
size_t max_data_size;
uint64_t min_flush_delay_in_ms;
uint64_t max_flush_delay_in_ms;
};
using clperf_result = perf_result_with_aio_writes;
static void write_json_result(std::string result_file, const test_config& cfg, clperf_result median, double mad, double max, double min) {
Json::Value results;
Json::Value params;
params["concurrency"] = cfg.concurrency;
params["cpus"] = smp::count;
params["duration"] = cfg.duration_in_seconds;
params["min-data-size"] = cfg.min_data_size;
params["max-data-size"] = cfg.max_data_size;
params["min-flush-delay-in-ms"] = cfg.min_flush_delay_in_ms;
params["max-flush-delay-in-ms"] = cfg.max_flush_delay_in_ms;
params["concurrency,cpus,duration"] = fmt::format("{},{},{}", cfg.concurrency, smp::count, cfg.duration_in_seconds);
results["parameters"] = std::move(params);
Json::Value stats;
stats["median tps"] = median.throughput;
stats["allocs_per_op"] = median.mallocs_per_op;
stats["logallocs_per_op"] = median.logallocs_per_op;
stats["tasks_per_op"] = median.tasks_per_op;
stats["instructions_per_op"] = median.instructions_per_op;
stats["cpu_cycles_per_op"] = median.cpu_cycles_per_op;
stats["aio_writes"] = median.aio_writes;
stats["aio_write_bytes"] = median.aio_write_bytes;
stats["mad tps"] = mad;
stats["max tps"] = max;
stats["min tps"] = min;
results["stats"] = std::move(stats);
std::string test_type = "commitlog_write";
results["test_properties"]["type"] = test_type;
// <version>-<release>
auto version_components = std::vector<std::string>{};
auto sver = scylla_version();
boost::algorithm::split(version_components, sver, boost::is_any_of("-"));
// <scylla-build>.<date>.<git-hash>
auto release_components = std::vector<std::string>{};
boost::algorithm::split(release_components, version_components[1], boost::is_any_of("."));
Json::Value version;
version["commit_id"] = release_components[2];
version["date"] = release_components[1];
version["version"] = version_components[0];
// It'd be nice to have std::chrono::format(), wouldn't it?
auto current_time = std::time(nullptr);
char time_str[100];
::tm time_buf;
std::strftime(time_str, sizeof(time_str), "%Y-%m-%d %H:%M:%S", ::localtime_r(&current_time, &time_buf));
version["run_date_time"] = time_str;
results["versions"]["scylla-server"] = std::move(version);
auto out = std::ofstream(result_file);
out << results;
}
struct commitlog_service {
test_config cfg;
std::uniform_int_distribution<unsigned> delay_dist;
std::uniform_int_distribution<size_t> size_dist;
std::optional<db::commitlog> log;
std::optional<db::commitlog::flush_handler_anchor> fa;
timer<> flush_timer;
commitlog_service(const test_config& c)
: cfg(c)
, delay_dist(cfg.min_flush_delay_in_ms, cfg.max_flush_delay_in_ms)
, size_dist(cfg.min_data_size, cfg.max_data_size)
{}
future<> init(const db::commitlog::config& cfg) {
SCYLLA_ASSERT(!log);
log.emplace(co_await db::commitlog::create_commitlog(cfg));
fa.emplace(log->add_flush_handler(std::bind(&commitlog_service::flush_handler, this, std::placeholders::_1, std::placeholders::_2)));
}
future<> stop() {
if (log) {
co_await log->shutdown();
co_await log->clear();
}
}
void flush_handler(db::cf_id_type id, db::replay_position pos) {
if (!flush_timer.armed()) {
flush_timer.set_callback([id, this] { log->discard_completed_segments(id); });
flush_timer.arm(std::chrono::milliseconds(delay_dist(tests::random::gen())));
}
}
};
static std::vector<clperf_result> do_commitlog_test(distributed<commitlog_service>& cls, test_config& cfg) {
auto uuid = table_id(utils::UUID_gen::get_time_UUID());
return time_parallel_ex<clperf_result>([&] {
auto& log = cls.local();
size_t size = log.size_dist(tests::random::gen());
return log.log->add_mutation(uuid, size, db::commitlog::force_sync::no, [size](db::commitlog::output& dst) {
dst.fill('1', size);
}).then([](db::rp_handle h) {
h.release();
});
}, cfg.concurrency, cfg.duration_in_seconds, cfg.operations_per_shard, true, &clperf_result::update);
}
int main(int argc, char** argv) {
namespace bpo = boost::program_options;
app_template app;
app.add_options()
("random-seed", boost::program_options::value<unsigned>(), "Random number generator seed")
("duration", bpo::value<unsigned>()->default_value(5), "test duration in seconds")
("concurrency", bpo::value<unsigned>()->default_value(100), "workers per core")
("operations-per-shard", bpo::value<unsigned>(), "run this many operations per shard (overrides duration)")
("commitlog-sync", bpo::value<sstring>(), "commitlog sync method (pediodic/batch)")
("commitlog-segment-size-in-mb", bpo::value<unsigned>(), "commitlog segment size")
("commitlog-total-space-in-mb", bpo::value<unsigned>(), "total commitlog size")
("commitlog-sync-period-in-ms", bpo::value<unsigned>(), "how long the system waits for other writes before performing a sync in \"periodic\" mode")
("commitlog-use-o-dsync", bpo::value<bool>()->default_value(true), "whether or not to use O_DSYNC mode for commitlog segments io")
("commitlog-use-hard-size-limit", bpo::value<bool>()->default_value(true), "whether or not to use a hard size limit for commitlog disk usage")
("min-data-size", bpo::value<size_t>()->default_value(200), "minimum size of data element added")
("max-data-size", bpo::value<size_t>()->default_value(32/2 * 1024 * 1024 - 1), "maximum size of data element added")
("min-flush-delay-in-ms", bpo::value<uint64_t>()->default_value(10), "minimum flush response delay")
("max-flush-delay-in-ms", bpo::value<uint64_t>()->default_value(800), "maximum flush response delay")
("json-result", bpo::value<std::string>(), "name of the json result file")
;
set_abort_on_internal_error(true);
return app.run(argc, argv, [&app_in = app]() -> future<> {
auto& app = app_in;
auto conf_seed = app.configuration()["random-seed"];
auto seed = conf_seed.empty() ? std::random_device()() : conf_seed.as<unsigned>();
std::cout << "random-seed=" << seed << '\n';
co_await smp::invoke_on_all([seed] {
seastar::testing::local_random_engine.seed(seed + this_shard_id());
});
auto ext = std::make_shared<db::extensions>(); // TODO: maybe add commitlog file extension + delays or errors.
auto db_cfg = ::make_shared<db::config>(ext);
if (app.configuration().contains("commitlog-sync")) {
db_cfg->commitlog_sync(app.configuration()["commitlog-sync"].as<sstring>());
}
if (app.configuration().contains("commitlog-segment-size-in-mb")) {
db_cfg->commitlog_segment_size_in_mb(app.configuration()["commitlog-segment-size-in-mb"].as<unsigned>());
}
if (app.configuration().contains("commitlog-total-space-in-mb")) {
db_cfg->commitlog_total_space_in_mb(app.configuration()["commitlog-total-space-in-mb"].as<unsigned>());
}
if (app.configuration().contains("commitlog-sync-period-in-ms")) {
db_cfg->commitlog_sync_period_in_ms(app.configuration()["commitlog-sync-period-in-ms"].as<unsigned>());
}
if (app.configuration().contains("commitlog-use-o-dsync")) {
db_cfg->commitlog_use_o_dsync(app.configuration()["commitlog-use-o-dsync"].as<bool>());
}
if (app.configuration().contains("commitlog-use-hard-size-limit")) {
db_cfg->commitlog_use_hard_size_limit(app.configuration()["commitlog-use-hard-size-limit"].as<bool>());
}
auto cfg = test_config();
cfg.duration_in_seconds = app.configuration()["duration"].as<unsigned>();
cfg.concurrency = app.configuration()["concurrency"].as<unsigned>();
if (app.configuration().contains("operations-per-shard")) {
cfg.operations_per_shard = app.configuration()["operations-per-shard"].as<unsigned>();
}
cfg.min_data_size = app.configuration()["min-data-size"].as<size_t>();
cfg.max_data_size = app.configuration()["max-data-size"].as<size_t>();
cfg.min_flush_delay_in_ms = app.configuration()["min-flush-delay-in-ms"].as<uint64_t>();
cfg.max_flush_delay_in_ms = app.configuration()["min-flush-delay-in-ms"].as<uint64_t>();
if (cfg.min_data_size > cfg.max_data_size) {
cfg.max_data_size = cfg.min_data_size;
}
if (cfg.min_flush_delay_in_ms > cfg.max_flush_delay_in_ms) {
cfg.max_flush_delay_in_ms = cfg.min_flush_delay_in_ms;
}
db::commitlog::config cl_cfg = db::commitlog::config::from_db_config(*db_cfg, current_scheduling_group(), memory::stats().total_memory());
tmpdir tmp;
cl_cfg.commit_log_location = tmp.path().string();
distributed<commitlog_service> test_commitlog;
//logging::logger_registry().set_logger_level("commitlog", logging::log_level::debug);
co_await test_commitlog.start(cfg);
co_await test_commitlog.invoke_on_all(std::mem_fn(&commitlog_service::init), cl_cfg);
std::exception_ptr ex;
try {
if (cfg.max_data_size > test_commitlog.local().log->max_record_size()) {
throw std::invalid_argument(sstring("Too large max data size: ") + std::to_string(cfg.max_data_size));
}
// test "framework" expects seastar thread
auto results = co_await seastar::async([&] {
return do_commitlog_test(test_commitlog, cfg);
});
auto compare_throughput = [] (perf_result a, perf_result b) { return a.throughput < b.throughput; };
std::sort(results.begin(), results.end(), compare_throughput);
auto median_result = results[results.size() / 2];
auto median = median_result.throughput;
auto min = results[0].throughput;
auto max = results[results.size() - 1].throughput;
auto absolute_deviations = results
| std::views::transform(std::mem_fn(&perf_result::throughput))
| std::views::transform([&] (double r) { return abs(r - median); })
| std::ranges::to<std::vector<double>>();
std::sort(absolute_deviations.begin(), absolute_deviations.end());
auto mad = absolute_deviations[results.size() / 2];
std::cout << format("\nmedian {}\nmedian absolute deviation: {:.2f}\nmaximum: {:.2f}\nminimum: {:.2f}\n", median_result, mad, max, min);
if (app.configuration().contains("json-result")) {
write_json_result(app.configuration()["json-result"].as<std::string>(), cfg, median_result, mad, max, min);
}
} catch (...) {
ex = std::current_exception();
}
co_await test_commitlog.stop();
if (ex) {
std::rethrow_exception(ex);
}
});
}