mirror of
https://github.com/scylladb/scylladb.git
synced 2026-04-29 20:57:00 +00:00
Currently, we use std::vector<*mutation> to keep
a list of mutations for processing.
This can lead to large allocation, e.g. when the vector
size is a function of the number of tables.
Use a chunked vector instead to prevent oversized allocations.
`perf-simple-query --smp 1` results obtained for fixed 400MHz frequency
and PGO disabled:
Before (read path):
```
enable-cache=1
Running test with config: {partitions=10000, concurrency=100, mode=read, query_single_key=no, counters=no}
Disabling auto compaction
Creating 10000 partitions...
89055.97 tps ( 66.1 allocs/op, 0.0 logallocs/op, 14.2 tasks/op, 39417 insns/op, 18003 cycles/op, 0 errors)
103372.72 tps ( 66.1 allocs/op, 0.0 logallocs/op, 14.2 tasks/op, 39380 insns/op, 17300 cycles/op, 0 errors)
98942.27 tps ( 66.1 allocs/op, 0.0 logallocs/op, 14.2 tasks/op, 39413 insns/op, 17336 cycles/op, 0 errors)
103752.93 tps ( 66.1 allocs/op, 0.0 logallocs/op, 14.2 tasks/op, 39407 insns/op, 17252 cycles/op, 0 errors)
102516.77 tps ( 66.1 allocs/op, 0.0 logallocs/op, 14.2 tasks/op, 39403 insns/op, 17288 cycles/op, 0 errors)
throughput:
mean= 99528.13 standard-deviation=6155.71
median= 102516.77 median-absolute-deviation=3844.59
maximum=103752.93 minimum=89055.97
instructions_per_op:
mean= 39403.99 standard-deviation=14.25
median= 39406.75 median-absolute-deviation=9.30
maximum=39416.63 minimum=39380.39
cpu_cycles_per_op:
mean= 17435.81 standard-deviation=318.24
median= 17300.40 median-absolute-deviation=147.59
maximum=18002.53 minimum=17251.75
```
After (read path)
```
enable-cache=1
Running test with config: {partitions=10000, concurrency=100, mode=read, query_single_key=no, counters=no}
Disabling auto compaction
Creating 10000 partitions...
59755.04 tps ( 66.2 allocs/op, 0.0 logallocs/op, 14.2 tasks/op, 39466 insns/op, 22834 cycles/op, 0 errors)
71854.16 tps ( 66.1 allocs/op, 0.0 logallocs/op, 14.2 tasks/op, 39417 insns/op, 17883 cycles/op, 0 errors)
82149.45 tps ( 66.1 allocs/op, 0.0 logallocs/op, 14.2 tasks/op, 39411 insns/op, 17409 cycles/op, 0 errors)
49640.04 tps ( 66.1 allocs/op, 0.0 logallocs/op, 14.3 tasks/op, 39474 insns/op, 19975 cycles/op, 0 errors)
54963.22 tps ( 66.1 allocs/op, 0.0 logallocs/op, 14.3 tasks/op, 39474 insns/op, 18235 cycles/op, 0 errors)
throughput:
mean= 63672.38 standard-deviation=13195.12
median= 59755.04 median-absolute-deviation=8709.16
maximum=82149.45 minimum=49640.04
instructions_per_op:
mean= 39448.38 standard-deviation=31.60
median= 39466.17 median-absolute-deviation=25.75
maximum=39474.12 minimum=39411.42
cpu_cycles_per_op:
mean= 19267.01 standard-deviation=2217.03
median= 18234.80 median-absolute-deviation=1384.25
maximum=22834.26 minimum=17408.67
```
`perf-simple-query --smp 1 --write` results obtained for fixed 400MHz frequency
and PGO disabled:
Before (write path):
```
enable-cache=1
Running test with config: {partitions=10000, concurrency=100, mode=write, query_single_key=no, counters=no}
Disabling auto compaction
63736.96 tps ( 59.4 allocs/op, 16.4 logallocs/op, 14.3 tasks/op, 49667 insns/op, 19924 cycles/op, 0 errors)
64109.41 tps ( 59.3 allocs/op, 16.0 logallocs/op, 14.3 tasks/op, 49992 insns/op, 20084 cycles/op, 0 errors)
56950.47 tps ( 59.3 allocs/op, 16.0 logallocs/op, 14.3 tasks/op, 50005 insns/op, 20501 cycles/op, 0 errors)
44858.42 tps ( 59.3 allocs/op, 16.0 logallocs/op, 14.3 tasks/op, 50014 insns/op, 21947 cycles/op, 0 errors)
28592.87 tps ( 59.3 allocs/op, 16.0 logallocs/op, 14.3 tasks/op, 50027 insns/op, 27659 cycles/op, 0 errors)
throughput:
mean= 51649.63 standard-deviation=15059.74
median= 56950.47 median-absolute-deviation=12087.33
maximum=64109.41 minimum=28592.87
instructions_per_op:
mean= 49941.18 standard-deviation=153.76
median= 50005.24 median-absolute-deviation=73.01
maximum=50027.07 minimum=49667.05
cpu_cycles_per_op:
mean= 22023.01 standard-deviation=3249.92
median= 20500.74 median-absolute-deviation=1938.76
maximum=27658.75 minimum=19924.32
```
After (write path)
```
enable-cache=1
Running test with config: {partitions=10000, concurrency=100, mode=write, query_single_key=no, counters=no}
Disabling auto compaction
53395.93 tps ( 59.4 allocs/op, 16.5 logallocs/op, 14.3 tasks/op, 50326 insns/op, 21252 cycles/op, 0 errors)
46527.83 tps ( 59.3 allocs/op, 16.0 logallocs/op, 14.3 tasks/op, 50704 insns/op, 21555 cycles/op, 0 errors)
55846.30 tps ( 59.3 allocs/op, 16.0 logallocs/op, 14.3 tasks/op, 50731 insns/op, 21060 cycles/op, 0 errors)
55669.30 tps ( 59.3 allocs/op, 16.0 logallocs/op, 14.3 tasks/op, 50735 insns/op, 21521 cycles/op, 0 errors)
52130.17 tps ( 59.3 allocs/op, 16.0 logallocs/op, 14.3 tasks/op, 50757 insns/op, 21334 cycles/op, 0 errors)
throughput:
mean= 52713.91 standard-deviation=3795.38
median= 53395.93 median-absolute-deviation=2955.40
maximum=55846.30 minimum=46527.83
instructions_per_op:
mean= 50650.57 standard-deviation=182.46
median= 50731.38 median-absolute-deviation=84.09
maximum=50756.62 minimum=50325.87
cpu_cycles_per_op:
mean= 21344.42 standard-deviation=202.86
median= 21334.00 median-absolute-deviation=176.37
maximum=21554.61 minimum=21060.24
```
Fixes #24815
Improvement for rare corner cases. No backport required
Signed-off-by: Benny Halevy <bhalevy@scylladb.com>
Closes scylladb/scylladb#24919
228 lines
8.0 KiB
C++
228 lines
8.0 KiB
C++
/*
|
|
* Copyright (C) 2023-present ScyllaDB
|
|
*/
|
|
|
|
/*
|
|
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
|
|
*/
|
|
|
|
#include "utils/assert.hh"
|
|
#include <fmt/ranges.h>
|
|
|
|
#include <seastar/core/distributed.hh>
|
|
#include <seastar/core/app-template.hh>
|
|
#include <seastar/core/sstring.hh>
|
|
#include <seastar/core/thread.hh>
|
|
#include <seastar/core/reactor.hh>
|
|
#include <seastar/util/defer.hh>
|
|
|
|
#include "locator/tablets.hh"
|
|
#include "replica/tablet_mutation_builder.hh"
|
|
#include "replica/tablets.hh"
|
|
#include "locator/tablet_replication_strategy.hh"
|
|
#include "db/config.hh"
|
|
#include "schema/schema_builder.hh"
|
|
#include "service/storage_proxy.hh"
|
|
#include "db/system_keyspace.hh"
|
|
|
|
#include "test/perf/perf.hh"
|
|
#include "test/lib/log.hh"
|
|
#include "test/lib/cql_test_env.hh"
|
|
|
|
using namespace locator;
|
|
using namespace replica;
|
|
|
|
seastar::abort_source aborted;
|
|
|
|
static const size_t MiB = 1 << 20;
|
|
|
|
static
|
|
cql_test_config tablet_cql_test_config() {
|
|
cql_test_config c;
|
|
c.db_config->tablets_mode_for_new_keyspaces.set(db::tablets_mode_t::mode::enabled);
|
|
return c;
|
|
}
|
|
|
|
static
|
|
future<table_id> add_table(cql_test_env& e) {
|
|
auto id = table_id(utils::UUID_gen::get_time_UUID());
|
|
co_await e.create_table([id] (std::string_view ks_name) {
|
|
return *schema_builder(ks_name, id.to_sstring(), id)
|
|
.with_column("p1", utf8_type, column_kind::partition_key)
|
|
.with_column("r1", int32_type)
|
|
.build();
|
|
});
|
|
co_return id;
|
|
}
|
|
|
|
static future<> test_basic_operations(app_template& app) {
|
|
return do_with_cql_env_thread([&] (cql_test_env& e) {
|
|
tablet_metadata tm;
|
|
|
|
auto h1 = host_id(utils::UUID_gen::get_time_UUID());
|
|
auto h2 = host_id(utils::UUID_gen::get_time_UUID());
|
|
|
|
int nr_tables = app.configuration()["tables"].as<int>();
|
|
int tablets_per_table = app.configuration()["tablets-per-table"].as<int>();
|
|
int rf = app.configuration()["rf"].as<int>();
|
|
|
|
size_t total_tablets = 0;
|
|
|
|
std::vector<table_id> ids;
|
|
ids.resize(nr_tables);
|
|
for (int i = 0; i < nr_tables; ++i) {
|
|
ids[i] = add_table(e).get();
|
|
}
|
|
|
|
testlog.info("Generating tablet metadata");
|
|
|
|
for (int i = 0; i < nr_tables; ++i) {
|
|
tablet_map tmap(tablets_per_table);
|
|
|
|
for (tablet_id j : tmap.tablet_ids()) {
|
|
aborted.check();
|
|
thread::maybe_yield();
|
|
tablet_replica_set replicas;
|
|
for (int k = 0; k < rf; ++k) {
|
|
replicas.push_back({h1, 0});
|
|
}
|
|
SCYLLA_ASSERT(std::cmp_equal(replicas.size(), rf));
|
|
tmap.set_tablet(j, tablet_info{std::move(replicas)});
|
|
++total_tablets;
|
|
}
|
|
|
|
tm.set_tablet_map(ids[i], std::move(tmap));
|
|
}
|
|
|
|
testlog.info("Total tablet count: {}", total_tablets);
|
|
|
|
testlog.info("Size of tablet_metadata in memory: {} KiB",
|
|
(tm.external_memory_usage() + sizeof(tablet_metadata)) / 1024);
|
|
|
|
tablet_metadata tm2;
|
|
auto time_to_copy = duration_in_seconds([&] {
|
|
tm2 = tm.copy().get();
|
|
});
|
|
|
|
testlog.info("Copied in {:.6f} [ms]", time_to_copy.count() * 1000);
|
|
|
|
auto time_to_clear = duration_in_seconds([&] {
|
|
tm2.clear_gently().get();
|
|
});
|
|
|
|
testlog.info("Cleared in {:.6f} [ms]", time_to_clear.count() * 1000);
|
|
|
|
auto time_to_save = duration_in_seconds([&] {
|
|
save_tablet_metadata(e.local_db(), tm, api::new_timestamp()).get();
|
|
});
|
|
|
|
testlog.info("Saved in {:.6f} [ms]", time_to_save.count() * 1000);
|
|
|
|
auto time_to_read = duration_in_seconds([&] {
|
|
tm2 = read_tablet_metadata(e.local_qp()).get();
|
|
});
|
|
SCYLLA_ASSERT(tm == tm2);
|
|
|
|
testlog.info("Read in {:.6f} [ms]", time_to_read.count() * 1000);
|
|
|
|
utils::chunked_vector<canonical_mutation> muts;
|
|
auto time_to_read_muts = duration_in_seconds([&] {
|
|
muts = replica::read_tablet_mutations(e.local_qp().proxy().get_db()).get();
|
|
});
|
|
|
|
testlog.info("Read mutations in {:.6f} [ms]", time_to_read_muts.count() * 1000);
|
|
|
|
auto time_to_read_hosts = duration_in_seconds([&] {
|
|
replica::read_required_hosts(e.local_qp()).get();
|
|
});
|
|
|
|
testlog.info("Read required hosts in {:.6f} [ms]", time_to_read_hosts.count() * 1000);
|
|
|
|
auto cm_size = 0;
|
|
for (auto&& cm : muts) {
|
|
cm_size += cm.representation().size();
|
|
}
|
|
|
|
testlog.info("Size of canonical mutations: {:.6f} [MiB]", double(cm_size) / MiB);
|
|
|
|
auto&& tablets_table = e.local_db().find_column_family(db::system_keyspace::tablets());
|
|
testlog.info("Disk space used by system.tablets: {:.6f} [MiB]", double(tablets_table.get_stats().live_disk_space_used) / MiB);
|
|
|
|
locator::tablet_metadata_change_hint hint;
|
|
|
|
// Migrate one tablet to h2
|
|
{
|
|
const auto last_table_id = ids.back();
|
|
const auto& tmap = tm.get_tablet_map(last_table_id);
|
|
|
|
auto ts = utils::UUID_gen::micros_timestamp(e.get_system_keyspace().local().get_last_group0_state_id().get()) + 1;
|
|
|
|
const auto tb = tmap.first_tablet();
|
|
replica::tablet_mutation_builder builder(ts++, last_table_id);
|
|
const auto token = tmap.get_last_token(tb);
|
|
|
|
builder.set_new_replicas(token,
|
|
tablet_replica_set {
|
|
tablet_replica {h2, 0},
|
|
}
|
|
);
|
|
builder.set_stage(token, tablet_transition_stage::streaming);
|
|
builder.set_transition(token, tablet_transition_kind::migration);
|
|
|
|
utils::chunked_vector<mutation> muts;
|
|
muts.push_back(builder.build());
|
|
e.local_db().apply(freeze(muts), db::no_timeout).get();
|
|
replica::update_tablet_metadata_change_hint(hint, muts.front());
|
|
}
|
|
|
|
using clk = std::chrono::high_resolution_clock;
|
|
|
|
const auto start_full_reload = clk::now();
|
|
const auto tm_full_reload = read_tablet_metadata(e.local_qp()).get();
|
|
const auto end_full_reload = clk::now();
|
|
const auto full_reload_duration = std::chrono::duration<double, std::milli>(end_full_reload - start_full_reload);
|
|
|
|
const auto start_partial_reload = clk::now();
|
|
update_tablet_metadata(e.local_db(), e.local_qp(), tm, hint).get();
|
|
const auto end_partial_reload = clk::now();
|
|
const auto partial_reload_duration = std::chrono::duration<double, std::milli>(end_partial_reload - start_partial_reload);
|
|
|
|
assert(tm == tm_full_reload);
|
|
|
|
testlog.info("Tablet metadata reload:\nfull {:>8.2f}ms\npartial {:>8.2f}ms", full_reload_duration.count(), partial_reload_duration.count());
|
|
}, tablet_cql_test_config());
|
|
}
|
|
|
|
namespace perf {
|
|
|
|
int scylla_tablets_main(int argc, char** argv) {
|
|
namespace bpo = boost::program_options;
|
|
app_template app;
|
|
app.add_options()
|
|
("tables", bpo::value<int>()->default_value(100), "Number of tables to create.")
|
|
("tablets-per-table", bpo::value<int>()->default_value(2048), "Number of tablets per table.")
|
|
("rf", bpo::value<int>()->default_value(3), "Number of replicas per tablet.")
|
|
("verbose", "Enables standard logging")
|
|
;
|
|
return app.run(argc, argv, [&] {
|
|
return seastar::async([&] {
|
|
if (!app.configuration().contains("verbose")) {
|
|
auto testlog_level = logging::logger_registry().get_logger_level("testlog");
|
|
logging::logger_registry().set_all_loggers_level(seastar::log_level::warn);
|
|
logging::logger_registry().set_logger_level("testlog", testlog_level);
|
|
}
|
|
auto stop_test = defer([] {
|
|
aborted.request_abort();
|
|
});
|
|
logalloc::prime_segment_pool(memory::stats().total_memory(), memory::min_free_memory()).get();
|
|
try {
|
|
test_basic_operations(app).get();
|
|
} catch (seastar::abort_requested_exception&) {
|
|
// Ignore
|
|
}
|
|
});
|
|
});
|
|
}
|
|
|
|
} // namespace perf
|