Files
scylladb/test/perf/memory_footprint_test.cc
Avi Kivity 0ae22a09d4 LICENSE: Update to version 1.1
Updated terms of non-commercial use (must be a never-customer).
2026-04-12 19:46:33 +03:00

277 lines
11 KiB
C++

/*
* Copyright (C) 2015-present ScyllaDB
*/
/*
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.1
*/
#include "utils/assert.hh"
#include <seastar/util/defer.hh>
#include <seastar/core/app-template.hh>
#include <seastar/core/thread.hh>
#include "partition_slice_builder.hh"
#include "schema/schema_builder.hh"
#include "replica/memtable.hh"
#include "db/row_cache.hh"
#include "mutation/frozen_mutation.hh"
#include "test/lib/tmpdir.hh"
#include "sstables/sstables.hh"
#include "mutation/canonical_mutation.hh"
#include "test/lib/sstable_utils.hh"
#include "test/lib/test_services.hh"
#include "test/lib/sstable_test_env.hh"
#include "test/lib/cql_test_env.hh"
class size_calculator {
using cells_type = row::sparse_array_type;
class nest {
public:
static thread_local int level;
nest() { ++level; }
~nest() { --level; }
};
static std::string prefix() {
std::string s(" ");
for (int i = 0; i < nest::level; ++i) {
s += "-- ";
}
return s;
}
public:
static void print_cache_entry_size() {
std::cout << prefix() << "sizeof(cache_entry) = " << sizeof(cache_entry) << "\n";
std::cout << prefix() << "sizeof(memtable_entry) = " << sizeof(replica::memtable_entry) << "\n";
std::cout << prefix() << "sizeof(bptree::node) = " << sizeof(row_cache::partitions_type::outer_tree::node) << "\n";
std::cout << prefix() << "sizeof(bptree::data) = " << sizeof(row_cache::partitions_type::outer_tree::data) << "\n";
{
nest n;
std::cout << prefix() << "sizeof(decorated_key) = " << sizeof(dht::decorated_key) << "\n";
print_mutation_partition_size();
}
std::cout << "\n";
std::cout << prefix() << "sizeof(rows_entry) = " << sizeof(rows_entry) << "\n";
std::cout << prefix() << "sizeof(evictable) = " << sizeof(evictable) << "\n";
std::cout << prefix() << "sizeof(deletable_row) = " << sizeof(deletable_row) << "\n";
std::cout << prefix() << "sizeof(row) = " << sizeof(row) << "\n";
std::cout << prefix() << "radix_tree::inner_node::node_sizes = ";
for (int i = 4; i <= 128; i *= 2) {
std::cout << " " << cells_type::inner_node::node_type::node_size(cells_type::layout::direct_dynamic, i);
}
std::cout << "\n";
std::cout << prefix() << "radix_tree::leaf_node::node_sizes = ";
std::cout << " " << cells_type::leaf_node::node_type::node_size(cells_type::layout::indirect_tiny, 0);
std::cout << " " << cells_type::leaf_node::node_type::node_size(cells_type::layout::indirect_small, 0);
std::cout << " " << cells_type::leaf_node::node_type::node_size(cells_type::layout::indirect_medium, 0);
std::cout << " " << cells_type::leaf_node::node_type::node_size(cells_type::layout::indirect_large, 0);
std::cout << " " << cells_type::leaf_node::node_type::node_size(cells_type::layout::direct_static, 0);
std::cout << "\n";
std::cout << prefix() << "sizeof(atomic_cell_or_collection) = " << sizeof(atomic_cell_or_collection) << "\n";
std::cout << prefix() << "btree::linear_node_size(1) = " << mutation_partition::rows_type::node::linear_node_size(1) << "\n";
std::cout << prefix() << "btree::inner_node_size = " << mutation_partition::rows_type::node::inner_node_size << "\n";
std::cout << prefix() << "btree::leaf_node_size = " << mutation_partition::rows_type::node::leaf_node_size << "\n";
}
static void print_mutation_partition_size() {
std::cout << prefix() << "sizeof(mutation_partition) = " << sizeof(mutation_partition) << "\n";
{
nest n;
std::cout << prefix() << "sizeof(_static_row) = " << sizeof(mutation_partition::_static_row) << "\n";
std::cout << prefix() << "sizeof(_rows) = " << sizeof(mutation_partition::_rows) << "\n";
std::cout << prefix() << "sizeof(_row_tombstones) = " << sizeof(mutation_partition::_row_tombstones) <<
"\n";
}
}
};
thread_local int size_calculator::nest::level = 0;
static schema_ptr cassandra_stress_schema() {
return schema_builder("ks", "cf")
.with_column("KEY", bytes_type, column_kind::partition_key)
.with_column("C0", bytes_type)
.with_column("C1", bytes_type)
.with_column("C2", bytes_type)
.with_column("C3", bytes_type)
.with_column("C4", bytes_type)
.build();
}
[[gnu::unused]]
static mutation make_cs_mutation() {
auto s = cassandra_stress_schema();
mutation m(s, partition_key::from_single_value(*s, bytes_type->from_string("4b343050393536353531")));
for (auto&& col : s->regular_columns()) {
m.set_clustered_cell(clustering_key::make_empty(), col,
atomic_cell::make_live(*bytes_type, 1, bytes_type->from_string("8f75da6b3dcec90c8a404fb9a5f6b0621e62d39c69ba5758e5f41b78311fbb26cc7a")));
}
return m;
}
bytes random_bytes(size_t size) {
bytes result(bytes::initialized_later(), size);
for (size_t i = 0; i < size; ++i) {
result[i] = std::rand() % std::numeric_limits<uint8_t>::max();
}
return result;
}
sstring random_name(size_t size) {
sstring result = uninitialized_string(size);
static const char chars[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
for (size_t i = 0; i < size; ++i) {
result[i] = chars[std::rand() % sizeof(chars)];
}
return result;
}
struct mutation_settings {
size_t column_count;
size_t column_name_size;
size_t row_count;
size_t partition_count;
size_t partition_key_size;
size_t clustering_key_size;
size_t data_size;
};
static schema_ptr make_schema(const mutation_settings& settings) {
auto builder = schema_builder("ks", "cf")
.with_column("pk", bytes_type, column_kind::partition_key)
.with_column("ck", bytes_type, column_kind::clustering_key);
for (size_t i = 0; i < settings.column_count; ++i) {
builder.with_column(to_bytes(random_name(settings.column_name_size)), bytes_type);
}
return builder.build();
}
static mutation make_mutation(schema_ptr s, mutation_settings settings) {
mutation m(s, partition_key::from_single_value(*s, bytes_type->decompose(data_value(random_bytes(settings.partition_key_size)))));
for (size_t i = 0; i < settings.row_count; ++i) {
auto ck = clustering_key::from_single_value(*s, bytes_type->decompose(data_value(random_bytes(settings.clustering_key_size))));
for (auto&& col : s->regular_columns()) {
m.set_clustered_cell(ck, col,
atomic_cell::make_live(*bytes_type, 1,
bytes_type->decompose(data_value(random_bytes(settings.data_size)))));
}
}
return m;
}
struct sizes {
size_t memtable;
size_t cache;
std::map<sstables::sstable::version_types, size_t> sstable;
size_t frozen;
size_t canonical;
size_t query_result;
};
static sizes calculate_sizes(cache_tracker& tracker, const mutation_settings& settings) {
sizes result;
auto s = make_schema(settings);
auto mt = make_lw_shared<replica::memtable>(s);
row_cache cache(s, make_empty_snapshot_source(), tracker);
auto cache_initial_occupancy = tracker.region().occupancy().used_space();
SCYLLA_ASSERT(mt->occupancy().used_space() == 0);
std::vector<mutation> muts;
for (size_t i = 0; i < settings.partition_count; ++i) {
muts.emplace_back(make_mutation(s, settings));
mt->apply(muts.back());
cache.populate(muts.back());
}
mutation& m = muts[0];
result.memtable = mt->occupancy().used_space();
result.cache = tracker.region().occupancy().used_space() - cache_initial_occupancy;
result.frozen = freeze(m).representation().size();
result.canonical = canonical_mutation(m).representation().size();
result.query_result = query_mutation(mutation(m), partition_slice_builder(*s).build()).buf().size();
tmpdir sstable_dir;
sstables::test_env::do_with_async([&] (sstables::test_env& env) {
for (auto v : sstables::writable_sstable_versions) {
auto sst = env.make_sstable(s, v);
auto mt2 = make_lw_shared<replica::memtable>(s);
mt2->apply(*mt, env.make_reader_permit()).get();
write_memtable_to_sstable(*mt2, sst).get();
sst->open_data().get();
result.sstable[v] = sst->data_size();
}
}).get();
return result;
}
int main(int argc, char** argv) {
namespace bpo = boost::program_options;
app_template app;
app.add_options()
("verbose", "Enable info-level logging")
("column-count", bpo::value<size_t>()->default_value(5), "column count")
("column-name-size", bpo::value<size_t>()->default_value(2), "column name size")
("row-count", bpo::value<size_t>()->default_value(1), "row count")
("partition-count", bpo::value<size_t>()->default_value(1), "partition count")
("partition-key-size", bpo::value<size_t>()->default_value(10), "partition key size")
("clustering-key-size", bpo::value<size_t>()->default_value(10), "clustering key size")
("data-size", bpo::value<size_t>()->default_value(32), "cell data size");
return app.run(argc, argv, [&] {
if (smp::count != 1) {
throw std::runtime_error("This test has to be run with -c1");
}
if (!app.configuration().contains("verbose")) {
logging::logger_registry().set_all_loggers_level(seastar::log_level::warn);
}
return do_with_cql_env_thread([&](cql_test_env& env) {
mutation_settings settings;
settings.column_count = app.configuration()["column-count"].as<size_t>();
settings.column_name_size = app.configuration()["column-name-size"].as<size_t>();
settings.row_count = app.configuration()["row-count"].as<size_t>();
settings.partition_count = app.configuration()["partition-count"].as<size_t>();
settings.partition_key_size = app.configuration()["partition-key-size"].as<size_t>();
settings.clustering_key_size = app.configuration()["clustering-key-size"].as<size_t>();
settings.data_size = app.configuration()["data-size"].as<size_t>();
auto& tracker = env.local_db().find_column_family("system", "local").get_row_cache().get_cache_tracker();
auto sizes = calculate_sizes(tracker, settings);
std::cout << "mutation footprint:" << "\n";
std::cout << " - in cache: " << sizes.cache << "\n";
std::cout << " - in memtable: " << sizes.memtable << "\n";
std::cout << " - in sstable:\n";
for (auto v : sizes.sstable) {
std::cout << " " << fmt::to_string(v.first) << ": " << v.second << "\n";
}
std::cout << " - frozen: " << sizes.frozen << "\n";
std::cout << " - canonical: " << sizes.canonical << "\n";
std::cout << " - query result: " << sizes.query_result << "\n";
std::cout << "\n";
size_calculator::print_cache_entry_size();
auto cache_st = tracker.region().collect_stats();
std::cout << "LSA stats:" << "\n";
for (auto [ name, size ] : cache_st) {
std::cout << " " << name << ": " << size << "\n";
}
});
});
}