scylla-sstable: correctly dump sharding_metadata

This patch fixes 2 issues at one go:

First, Currently sstables::load clears the sharding metadata
(via open_data()), and so scylla-sstable always prints
an empty array for it.

Second, printing token values would generate invalid json
as they are currently printed as binary bytes, and they
should be printed simply as numbers, as we do elsewhere,
for example, for the first and last keys.

Fixes #26982

Signed-off-by: Benny Halevy <bhalevy@scylladb.com>

Closes scylladb/scylladb#26991

(cherry picked from commit f9ce98384a)

Closes scylladb/scylladb#27030
This commit is contained in:
Benny Halevy
2025-11-11 11:17:48 +02:00
committed by Avi Kivity
parent 3818e15d91
commit 4dcb8c19bd
4 changed files with 20 additions and 5 deletions

View File

@@ -79,6 +79,8 @@ struct sstable_open_config {
// Mimics behavior when a SSTable is streamed to a given shard, where SSTable
// writer considers the shard that created the SSTable as its owner.
bool current_shard_as_sstable_owner = false;
// Do not move the sharding metadata to the sharder, keeping it in the scylla metadata..
bool keep_sharding_metadata = false;
};
}

View File

@@ -1347,7 +1347,7 @@ future<> sstable::open_data(sstable_open_config cfg) noexcept {
co_await update_info_for_opened_data(cfg);
parse_assert(!_shards.empty(), get_filename());
auto* sm = _components->scylla_metadata->data.get<scylla_metadata_type::Sharding, sharding_metadata>();
if (sm) {
if (sm && !cfg.keep_sharding_metadata) {
// Sharding information uses a lot of memory and once we're doing with this computation we will no longer use it.
co_await utils::clear_gently(sm->token_ranges.elements);
sm->token_ranges.elements = {};

View File

@@ -176,8 +176,14 @@ def test_scylla_sstable_dump_component(cql, test_keyspace, scylla_path, scylla_d
print(out)
assert out
assert json.loads(out)
json_out = json.loads(out)
assert json_out
if what == "scylla-metadata":
assert "sstables" in json_out, f"Expected 'sstables' in json output: {json_out}"
for sst_name, sst_metadata in json_out["sstables"].items():
assert "sharding" in sst_metadata, f"Expected 'sharding' metadata in sstable scylla-metadata: sstable={sst_name}: {sst_metadata}"
assert sst_metadata["sharding"] != [], f"Expected non-empty sharding metadata in sstable scylla-metadata: sstable={sst_name}: {sst_metadata}"
@pytest.mark.parametrize("table_factory", [
simple_no_clustering_table,

View File

@@ -350,7 +350,11 @@ const std::vector<sstables::shared_sstable> load_sstables(schema_ptr schema, sst
auto sst = sst_man.make_sstable(schema, local, ed.generation, sstables::sstable_state::normal, ed.version, ed.format);
try {
co_await sst->load(schema->get_sharder(), sstables::sstable_open_config{.load_first_and_last_position_metadata = false});
auto open_cfg = sstables::sstable_open_config{
.load_first_and_last_position_metadata = false,
.keep_sharding_metadata = true,
};
co_await sst->load(schema->get_sharder(), open_cfg);
} catch (...) {
// Print each individual error here since parallel_for_each
// will propagate only one of them up the stack.
@@ -1503,6 +1507,9 @@ const char* to_string(sstables::ext_timestamp_stats_type t) {
class scylla_metadata_visitor : public boost::static_visitor<> {
json_writer& _writer;
dht::token as_token(const sstables::disk_string<uint16_t>& ds) const {
return dht::token(dht::token::kind::key, bytes_view(ds));
}
public:
scylla_metadata_visitor(json_writer& writer) : _writer(writer) { }
@@ -1516,7 +1523,7 @@ public:
_writer.Key("exclusive");
_writer.Bool(e.left.exclusive);
_writer.Key("token");
_writer.String(disk_string_to_string(e.left.token));
_writer.AsString(as_token(e.left.token));
_writer.EndObject();
_writer.Key("right");
@@ -1524,7 +1531,7 @@ public:
_writer.Key("exclusive");
_writer.Bool(e.right.exclusive);
_writer.Key("token");
_writer.String(disk_string_to_string(e.right.token));
_writer.AsString(as_token(e.right.token));
_writer.EndObject();
_writer.EndObject();