Merge 'Profile dumping rest api support' from Eliran Sinvani

This change is motivated by wanting to have code coverage reporting support.
Currently the only way to get a profile dump in ScyllaDB is stopping it with SIGTERM, however, this doesn't
suite all cases, more specifically:
1. In dtest, when some of the tests intentionally abruptly kill a node
2. In test.py, where we would like to distinguish (at least for now), graceful shutdown of ScyllaDB testing and
teardown procedures (which currently kills the nodes).

This mini series adds two changes:
1. It adds the support for profile dumping in ScyllaDB with rest api ('/system/dump_profile')
2. It adds the support for this API in test.py and also adds a call for it as part of the node stop procedure in a permissive way that will not fail the teardown or test if the call doesn't succeed for whatever reason - after this change, all current
test.py suits except for pylib_test (expected) dumps profiles if instrumented and will be able to participate in coverage
reporting.

Refs #16323

Closes scylladb/scylladb#16557

* github.com:scylladb/scylladb:
  test.py: Dump coverage profile before killing a node
  rest api: Add an api for profile dumping
This commit is contained in:
Nadav Har'El
2023-12-27 12:06:39 +02:00
4 changed files with 53 additions and 1 deletions

View File

@@ -179,6 +179,21 @@
]
}
]
},
{
"path":"/system/dump_llvm_profile",
"operations":[
{
"method":"POST",
"summary":"Dump llvm profile data (raw profile data) that can later be used for coverage reporting or PGO (no-op if the current binary is not instrumented)",
"type":"void",
"nickname":"dump_profile",
"produces":[
"application/json"
],
"parameters":[]
}
]
}
]
}

View File

@@ -30,6 +30,10 @@ using namespace seastar::httpd;
namespace hs = httpd::system_json;
namespace hm = httpd::metrics_json;
extern "C" void __attribute__((weak)) __llvm_profile_dump();
extern "C" const char * __attribute__((weak)) __llvm_profile_get_filename();
extern "C" void __attribute__((weak)) __llvm_profile_reset_counters();
void set_system(http_context& ctx, routes& r) {
hm::get_metrics_config.set(r, [](const_req req) {
std::vector<hm::metrics_config> res;
@@ -158,6 +162,27 @@ void set_system(http_context& ctx, routes& r) {
return json::json_return_type(json::json_void());
});
});
hs::dump_profile.set(r, [](std::unique_ptr<request> req) {
if (!__llvm_profile_dump) {
apilog.info("Profile will not be dumped, executable is not instrumented with profile dumping.");
return make_ready_future<json::json_return_type>(json::json_return_type(json::json_void()));
}
sstring profile_dest(__llvm_profile_get_filename ? __llvm_profile_get_filename() : "disk");
apilog.info("Dumping profile to {}", profile_dest);
__llvm_profile_dump();
if (__llvm_profile_reset_counters) {
// If counters are not reset the profile dumping mechanism will issue a warning and exit
// next time it is attempted. If the counters are reset, profiles can be accumulated
// (if %m is present in LLVM_PROFILE_FILE pattern) so it can be dumped in stages or
// multiple times during runtime.
__llvm_profile_reset_counters();
} else {
apilog.warn("Could not reset profile counters, profile dumping will be skipped next time it is attempted");
}
apilog.info("Profile dumped to {}", profile_dest);
return make_ready_future<json::json_return_type>(json::json_return_type(json::json_void()));
}) ;
}
}

View File

@@ -271,7 +271,11 @@ class ScyllaRESTAPIClient():
if table is not None:
url += "?cf={table}"
await self.client.post(url, host=node_ip)
async def dump_llvm_profile(self, node_ip : str):
"""Dump llvm profile to disk that can later be used for PGO or coverage reporting.
no-op if the scylla binary is not instrumented."""
url = "/system/dump_llvm_profile"
await self.client.post(url, host=node_ip)
class ScyllaMetrics:
def __init__(self, lines: list[str]):

View File

@@ -503,6 +503,14 @@ class ScyllaServer:
if not self.cmd:
return
# Dump the profile if exists and supported by the API.
try:
api = ScyllaRESTAPIClient()
await api.dump_llvm_profile(self.ip_addr)
except:
# since it is not part of the test functionality, allow
# this step to fail unconditionally.
pass
await self.shutdown_control_connection()
try:
self.cmd.kill()