From 4dcb8c19bd64afb49e313a773cb2968f4e99a0dc Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Tue, 11 Nov 2025 11:17:48 +0200 Subject: [PATCH] scylla-sstable: correctly dump sharding_metadata This patch fixes 2 issues at one go: First, Currently sstables::load clears the sharding metadata (via open_data()), and so scylla-sstable always prints an empty array for it. Second, printing token values would generate invalid json as they are currently printed as binary bytes, and they should be printed simply as numbers, as we do elsewhere, for example, for the first and last keys. Fixes #26982 Signed-off-by: Benny Halevy Closes scylladb/scylladb#26991 (cherry picked from commit f9ce98384abdc9f5600aaef1fd991430844e579e) Closes scylladb/scylladb#27030 --- sstables/open_info.hh | 2 ++ sstables/sstables.cc | 2 +- test/cqlpy/test_tools.py | 8 +++++++- tools/scylla-sstable.cc | 13 ++++++++++--- 4 files changed, 20 insertions(+), 5 deletions(-) diff --git a/sstables/open_info.hh b/sstables/open_info.hh index 323e78490a..7a2dde6f62 100644 --- a/sstables/open_info.hh +++ b/sstables/open_info.hh @@ -79,6 +79,8 @@ struct sstable_open_config { // Mimics behavior when a SSTable is streamed to a given shard, where SSTable // writer considers the shard that created the SSTable as its owner. bool current_shard_as_sstable_owner = false; + // Do not move the sharding metadata to the sharder, keeping it in the scylla metadata.. + bool keep_sharding_metadata = false; }; } diff --git a/sstables/sstables.cc b/sstables/sstables.cc index 87a2a75005..13864b1fb7 100644 --- a/sstables/sstables.cc +++ b/sstables/sstables.cc @@ -1347,7 +1347,7 @@ future<> sstable::open_data(sstable_open_config cfg) noexcept { co_await update_info_for_opened_data(cfg); parse_assert(!_shards.empty(), get_filename()); auto* sm = _components->scylla_metadata->data.get(); - if (sm) { + if (sm && !cfg.keep_sharding_metadata) { // Sharding information uses a lot of memory and once we're doing with this computation we will no longer use it. co_await utils::clear_gently(sm->token_ranges.elements); sm->token_ranges.elements = {}; diff --git a/test/cqlpy/test_tools.py b/test/cqlpy/test_tools.py index cdad320845..3ccb042b76 100644 --- a/test/cqlpy/test_tools.py +++ b/test/cqlpy/test_tools.py @@ -176,8 +176,14 @@ def test_scylla_sstable_dump_component(cql, test_keyspace, scylla_path, scylla_d print(out) assert out - assert json.loads(out) + json_out = json.loads(out) + assert json_out + if what == "scylla-metadata": + assert "sstables" in json_out, f"Expected 'sstables' in json output: {json_out}" + for sst_name, sst_metadata in json_out["sstables"].items(): + assert "sharding" in sst_metadata, f"Expected 'sharding' metadata in sstable scylla-metadata: sstable={sst_name}: {sst_metadata}" + assert sst_metadata["sharding"] != [], f"Expected non-empty sharding metadata in sstable scylla-metadata: sstable={sst_name}: {sst_metadata}" @pytest.mark.parametrize("table_factory", [ simple_no_clustering_table, diff --git a/tools/scylla-sstable.cc b/tools/scylla-sstable.cc index 0a8fe9f88d..4fadd468fc 100644 --- a/tools/scylla-sstable.cc +++ b/tools/scylla-sstable.cc @@ -350,7 +350,11 @@ const std::vector load_sstables(schema_ptr schema, sst auto sst = sst_man.make_sstable(schema, local, ed.generation, sstables::sstable_state::normal, ed.version, ed.format); try { - co_await sst->load(schema->get_sharder(), sstables::sstable_open_config{.load_first_and_last_position_metadata = false}); + auto open_cfg = sstables::sstable_open_config{ + .load_first_and_last_position_metadata = false, + .keep_sharding_metadata = true, + }; + co_await sst->load(schema->get_sharder(), open_cfg); } catch (...) { // Print each individual error here since parallel_for_each // will propagate only one of them up the stack. @@ -1503,6 +1507,9 @@ const char* to_string(sstables::ext_timestamp_stats_type t) { class scylla_metadata_visitor : public boost::static_visitor<> { json_writer& _writer; + dht::token as_token(const sstables::disk_string& ds) const { + return dht::token(dht::token::kind::key, bytes_view(ds)); + } public: scylla_metadata_visitor(json_writer& writer) : _writer(writer) { } @@ -1516,7 +1523,7 @@ public: _writer.Key("exclusive"); _writer.Bool(e.left.exclusive); _writer.Key("token"); - _writer.String(disk_string_to_string(e.left.token)); + _writer.AsString(as_token(e.left.token)); _writer.EndObject(); _writer.Key("right"); @@ -1524,7 +1531,7 @@ public: _writer.Key("exclusive"); _writer.Bool(e.right.exclusive); _writer.Key("token"); - _writer.String(disk_string_to_string(e.right.token)); + _writer.AsString(as_token(e.right.token)); _writer.EndObject(); _writer.EndObject();