database: hook-in to the seastar OOM diagnostics report generation
Use the mechanism provided by seastar to add scylla specific information
to the memory diagnostics report. The information added is mostly the
same contained in the output of `scylla memory` from `scylla-gdb.py`,
with the exception of the coordinator-specific metrics. The report is
generated in the database layer, where the storage-proxy is not
available and it is not worth pulling it in just for this purpose.
An example report:
INFO 2020-11-10 12:02:44,182 [shard 0] testlog - Dumping seastar memory diagnostics
Used memory: 2029M
Free memory: 19M
Total memory: 2G
LSA
allocated: 1770M
used: 1766M
free: 3M
Cache:
total: 1770M
used: 1716M
free: 54M
Memtables:
total: 0B
Regular:
real dirty: 0B
virt dirty: 0B
System:
real dirty: 0B
virt dirty: 0B
Replica:
Read Concurrency Semaphores:
user: 100/100, 33M/41M, queued: 477
streaming: 0/10, 0B/41M, queued: 0
system: 0/100, 0B/41M, queued: 0
compaction: 0/∞, 0B/∞
Execution Stages:
data query stage:
statement 987
Total: 987
mutation query stage:
Total: 0
apply stage:
Total: 0
Tables - Ongoing Operations:
Pending writes (top 10):
0 Total (all)
Pending reads (top 10):
1564 ks.test
1564 Total (all)
Pending streams (top 10):
0 Total (all)
Small pools:
objsz spansz usedobj memory unused wst%
8 4K 11k 88K 6K 6
10 4K 10 8K 8K 98
12 4K 2 8K 8K 99
14 4K 4 8K 8K 99
16 4K 15k 244K 5K 2
32 4K 2k 52K 3K 5
32 4K 20k 628K 2K 0
32 4K 528 20K 4K 17
32 4K 5k 144K 480B 0
48 4K 17k 780K 3K 0
48 4K 3k 140K 3K 2
64 4K 50k 3M 6K 0
64 4K 66k 4M 7K 0
80 4K 131k 10M 1K 0
96 4K 37k 3M 192B 0
112 4K 65k 7M 10K 0
128 4K 21k 3M 2K 0
160 4K 38k 6M 3K 0
192 4K 15k 3M 12K 0
224 4K 3k 720K 10K 1
256 4K 148 56K 19K 33
320 8K 13k 4M 14K 0
384 8K 3k 1M 20K 1
448 4K 11k 5M 5K 0
512 4K 2k 1M 39K 3
640 12K 163 144K 42K 29
768 12K 1k 832K 59K 7
896 8K 131 144K 29K 20
1024 4K 643 732K 89K 12
1280 20K 11k 13M 26K 0
1536 12K 12 128K 110K 85
1792 16K 12 144K 123K 85
2048 8K 601 1M 14K 1
2560 20K 70 224K 48K 21
3072 12K 13 240K 201K 83
3584 28K 6 288K 266K 92
4096 16K 10k 39M 88K 0
5120 20K 7 416K 380K 91
6144 24K 24 480K 336K 70
7168 28K 27 608K 413K 67
8192 32K 256 3M 736K 26
10240 40K 11k 105M 550K 0
12288 48K 21 960K 708K 73
14336 56K 59 1M 378K 31
16384 64K 8 1M 1M 89
Page spans:
index size free used spans
0 4K 48M 48M 12k
1 8K 6M 6M 822
2 16K 41M 41M 3k
3 32K 18M 18M 579
4 64K 108M 108M 2k
5 128K 1774M 2G 14k
6 256K 512K 0B 2
7 512K 2M 2M 4
8 1M 0B 0B 0
9 2M 2M 0B 1
10 4M 0B 0B 0
11 8M 0B 0B 0
12 16M 16M 0B 1
13 32M 32M 32M 1
14 64M 0B 0B 0
15 128M 0B 0B 0
16 256M 0B 0B 0
17 512M 0B 0B 0
18 1G 0B 0B 0
19 2G 0B 0B 0
20 4G 0B 0B 0
21 8G 0B 0B 0
22 16G 0B 0B 0
23 32G 0B 0B 0
24 64G 0B 0B 0
25 128G 0B 0B 0
26 256G 0B 0B 0
27 512G 0B 0B 0
28 1T 0B 0B 0
29 2T 0B 0B 0
30 4T 0B 0B 0
31 8T 0B 0B 0
This commit is contained in:
173
database.cc
173
database.cc
@@ -57,6 +57,7 @@
|
||||
#include <boost/range/algorithm/find_if.hpp>
|
||||
#include <boost/range/algorithm/sort.hpp>
|
||||
#include <boost/range/adaptor/map.hpp>
|
||||
#include <boost/container/static_vector.hpp>
|
||||
#include "frozen_mutation.hh"
|
||||
#include <seastar/core/do_with.hh>
|
||||
#include "service/migration_manager.hh"
|
||||
@@ -82,6 +83,7 @@
|
||||
|
||||
#include "checked-file-impl.hh"
|
||||
#include "utils/disk-error-handler.hh"
|
||||
#include "utils/human_readable.hh"
|
||||
|
||||
#include "db/timeout_clock.hh"
|
||||
#include "db/large_data_handler.hh"
|
||||
@@ -90,6 +92,7 @@
|
||||
|
||||
#include "user_types_metadata.hh"
|
||||
#include <seastar/core/shared_ptr_incomplete.hh>
|
||||
#include <seastar/util/memory_diagnostics.hh>
|
||||
|
||||
#include "schema_builder.hh"
|
||||
|
||||
@@ -165,6 +168,174 @@ bool string_pair_eq::operator()(spair lhs, spair rhs) const {
|
||||
|
||||
utils::UUID database::empty_version = utils::UUID_gen::get_name_UUID(bytes{});
|
||||
|
||||
namespace {
|
||||
|
||||
class memory_diagnostics_line_writer {
|
||||
std::array<char, 4096> _line_buf;
|
||||
memory::memory_diagnostics_writer _wr;
|
||||
|
||||
public:
|
||||
memory_diagnostics_line_writer(memory::memory_diagnostics_writer wr) : _wr(std::move(wr)) { }
|
||||
void operator() (const char* fmt) {
|
||||
_wr(fmt);
|
||||
}
|
||||
void operator() (const char* fmt, const auto& param1, const auto&... params) {
|
||||
const auto begin = _line_buf.begin();
|
||||
auto it = fmt::format_to(begin, fmt, param1, params...);
|
||||
_wr(std::string_view(begin, it - begin));
|
||||
}
|
||||
};
|
||||
|
||||
const boost::container::static_vector<std::pair<size_t, boost::container::static_vector<table*, 16>>, 10>
|
||||
phased_barrier_top_10_counts(const std::unordered_map<utils::UUID, lw_shared_ptr<column_family>>& tables, std::function<size_t(table&)> op_count_getter) {
|
||||
using table_list = boost::container::static_vector<table*, 16>;
|
||||
using count_and_tables = std::pair<size_t, table_list>;
|
||||
const auto less = [] (const count_and_tables& a, const count_and_tables& b) {
|
||||
return a.first < b.first;
|
||||
};
|
||||
|
||||
boost::container::static_vector<count_and_tables, 10> res;
|
||||
count_and_tables* min_element = nullptr;
|
||||
|
||||
for (const auto& [tid, table] : tables) {
|
||||
const auto count = op_count_getter(*table);
|
||||
if (!count) {
|
||||
continue;
|
||||
}
|
||||
if (res.size() < res.capacity()) {
|
||||
auto& elem = res.emplace_back(count, table_list({table.get()}));
|
||||
if (!min_element || min_element->first > count) {
|
||||
min_element = &elem;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (min_element->first > count) {
|
||||
continue;
|
||||
}
|
||||
|
||||
auto it = boost::find_if(res, [count] (const count_and_tables& x) {
|
||||
return x.first == count;
|
||||
});
|
||||
if (it != res.end()) {
|
||||
it->second.push_back(table.get());
|
||||
continue;
|
||||
}
|
||||
|
||||
// If we are here, min_element->first < count
|
||||
*min_element = {count, table_list({table.get()})};
|
||||
min_element = &*boost::min_element(res, less);
|
||||
}
|
||||
|
||||
boost::sort(res, less);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
void database::setup_scylla_memory_diagnostics_producer() {
|
||||
memory::set_additional_diagnostics_producer([this] (memory::memory_diagnostics_writer wr) {
|
||||
auto writeln = memory_diagnostics_line_writer(std::move(wr));
|
||||
|
||||
const auto lsa_occupancy_stats = logalloc::lsa_global_occupancy_stats();
|
||||
writeln("LSA\n");
|
||||
writeln(" allocated: {}\n", utils::to_hr_size(lsa_occupancy_stats.total_space()));
|
||||
writeln(" used: {}\n", utils::to_hr_size(lsa_occupancy_stats.used_space()));
|
||||
writeln(" free: {}\n\n", utils::to_hr_size(lsa_occupancy_stats.free_space()));
|
||||
|
||||
const auto row_cache_occupancy_stats = _row_cache_tracker.region().occupancy();
|
||||
writeln("Cache:\n");
|
||||
writeln(" total: {}\n", utils::to_hr_size(row_cache_occupancy_stats.total_space()));
|
||||
writeln(" used: {}\n", utils::to_hr_size(row_cache_occupancy_stats.used_space()));
|
||||
writeln(" free: {}\n\n", utils::to_hr_size(row_cache_occupancy_stats.free_space()));
|
||||
|
||||
writeln("Memtables:\n");
|
||||
writeln(" total: {}\n", utils::to_hr_size(lsa_occupancy_stats.total_space() - row_cache_occupancy_stats.total_space()));
|
||||
|
||||
writeln(" Regular:\n");
|
||||
writeln(" real dirty: {}\n", utils::to_hr_size(_dirty_memory_manager.real_dirty_memory()));
|
||||
writeln(" virt dirty: {}\n", utils::to_hr_size(_dirty_memory_manager.virtual_dirty_memory()));
|
||||
writeln(" System:\n");
|
||||
writeln(" real dirty: {}\n", utils::to_hr_size(_system_dirty_memory_manager.real_dirty_memory()));
|
||||
writeln(" virt dirty: {}\n\n", utils::to_hr_size(_system_dirty_memory_manager.virtual_dirty_memory()));
|
||||
|
||||
writeln("Replica:\n");
|
||||
|
||||
writeln(" Read Concurrency Semaphores:\n");
|
||||
const std::pair<const char*, reader_concurrency_semaphore&> semaphores[] = {
|
||||
{"user", _read_concurrency_sem},
|
||||
{"streaming", _streaming_concurrency_sem},
|
||||
{"system", _system_read_concurrency_sem},
|
||||
{"compaction", _compaction_concurrency_sem},
|
||||
};
|
||||
for (const auto& [name, sem] : semaphores) {
|
||||
const auto initial_res = sem.initial_resources();
|
||||
const auto available_res = sem.available_resources();
|
||||
if (sem.is_unlimited()) {
|
||||
writeln(" {}: {}/∞, {}/∞\n",
|
||||
name,
|
||||
initial_res.count - available_res.count,
|
||||
utils::to_hr_size(initial_res.memory - available_res.memory),
|
||||
sem.waiters());
|
||||
} else {
|
||||
writeln(" {}: {}/{}, {}/{}, queued: {}\n",
|
||||
name,
|
||||
initial_res.count - available_res.count,
|
||||
initial_res.count,
|
||||
utils::to_hr_size(initial_res.memory - available_res.memory),
|
||||
utils::to_hr_size(initial_res.memory),
|
||||
sem.waiters());
|
||||
}
|
||||
}
|
||||
|
||||
writeln(" Execution Stages:\n");
|
||||
const std::pair<const char*, inheriting_execution_stage::stats> execution_stage_summaries[] = {
|
||||
{"data query stage", _data_query_stage.get_stats()},
|
||||
{"mutation query stage", _mutation_query_stage.get_stats()},
|
||||
{"apply stage", _apply_stage.get_stats()},
|
||||
};
|
||||
for (const auto& [name, exec_stage_summary] : execution_stage_summaries) {
|
||||
writeln(" {}:\n", name);
|
||||
size_t total = 0;
|
||||
for (const auto& [sg, stats ] : exec_stage_summary) {
|
||||
const auto count = stats.function_calls_enqueued - stats.function_calls_executed;
|
||||
if (!count) {
|
||||
continue;
|
||||
}
|
||||
writeln(" {}\t{}\n", sg.name(), count);
|
||||
total += count;
|
||||
}
|
||||
writeln(" Total: {}\n", total);
|
||||
}
|
||||
|
||||
writeln(" Tables - Ongoing Operations:\n");
|
||||
const std::pair<const char*, std::function<size_t(table&)>> phased_barriers[] = {
|
||||
{"Pending writes", std::mem_fn(&table::writes_in_progress)},
|
||||
{"Pending reads", std::mem_fn(&table::reads_in_progress)},
|
||||
{"Pending streams", std::mem_fn(&table::streams_in_progress)},
|
||||
};
|
||||
for (const auto& [name, op_count_getter] : phased_barriers) {
|
||||
writeln(" {} (top 10):\n", name);
|
||||
auto total = 0;
|
||||
for (const auto& [count, table_list] : phased_barrier_top_10_counts(_column_families, op_count_getter)) {
|
||||
total += count;
|
||||
writeln(" {}", count);
|
||||
if (table_list.empty()) {
|
||||
writeln("\n");
|
||||
continue;
|
||||
}
|
||||
auto it = table_list.begin();
|
||||
for (; it != table_list.end() - 1; ++it) {
|
||||
writeln(" {}.{},", (*it)->schema()->ks_name(), (*it)->schema()->cf_name());
|
||||
}
|
||||
writeln(" {}.{}\n", (*it)->schema()->ks_name(), (*it)->schema()->cf_name());
|
||||
}
|
||||
writeln(" {} Total (all)\n", total);
|
||||
}
|
||||
writeln("\n");
|
||||
});
|
||||
}
|
||||
|
||||
database::database(const db::config& cfg, database_config dbcfg, service::migration_notifier& mn, gms::feature_service& feat, const locator::shared_token_metadata& stm, abort_source& as, sharded<semaphore>& sst_dir_sem)
|
||||
: _stats(make_lw_shared<db_stats>())
|
||||
, _cl_stats(std::make_unique<cell_locker_stats>())
|
||||
@@ -232,6 +403,8 @@ database::database(const db::config& cfg, database_config dbcfg, service::migrat
|
||||
dblog.debug("Enabling infinite bound range deletions");
|
||||
_supports_infinite_bound_range_deletions = true;
|
||||
});
|
||||
|
||||
setup_scylla_memory_diagnostics_producer();
|
||||
}
|
||||
|
||||
const db::extensions& database::extensions() const {
|
||||
|
||||
@@ -1347,6 +1347,7 @@ private:
|
||||
void create_in_memory_keyspace(const lw_shared_ptr<keyspace_metadata>& ksm);
|
||||
friend void db::system_keyspace::make(database& db, bool durable, bool volatile_testing_only);
|
||||
void setup_metrics();
|
||||
void setup_scylla_memory_diagnostics_producer();
|
||||
|
||||
friend class db_apply_executor;
|
||||
future<> do_apply(schema_ptr, const frozen_mutation&, tracing::trace_state_ptr tr_state, db::timeout_clock::time_point timeout, db::commitlog::force_sync sync);
|
||||
|
||||
Reference in New Issue
Block a user