diff --git a/sstables/sstables.cc b/sstables/sstables.cc index c4cc2ad9d7..01f1394951 100644 --- a/sstables/sstables.cc +++ b/sstables/sstables.cc @@ -1860,6 +1860,16 @@ sstable::read_scylla_metadata() noexcept { }); } +static sstable_column_kind to_sstable_column_kind(column_kind k) { + switch (k) { + case column_kind::partition_key: return sstable_column_kind::partition_key; + case column_kind::clustering_key: return sstable_column_kind::clustering_key; + case column_kind::static_column: return sstable_column_kind::static_column; + case column_kind::regular_column: return sstable_column_kind::regular_column; + } + on_internal_error(sstlog, format("to_sstable_column_kind(): unknown column kind {}", static_cast>(k))); +} + void sstable::write_scylla_metadata(shard_id shard, struct run_identifier identifier, std::optional ld_stats, std::optional ts_stats) { @@ -1926,6 +1936,16 @@ sstable::write_scylla_metadata(shard_id shard, struct run_identifier identifier, } _components->scylla_metadata->data.set(scylla_metadata::sstable_identifier{sid}); + sstable_schema_type sstable_schema; + sstable_schema.id = _schema->id(); + sstable_schema.version = _schema->version(); + sstable_schema.keyspace_name.value = to_bytes(_schema->ks_name()); + sstable_schema.table_name.value = to_bytes(_schema->cf_name()); + for (const auto& col : _schema->all_columns()) { + sstable_schema.columns.elements.push_back(sstable_column_description{to_sstable_column_kind(col.kind), {col.name()}, {to_bytes(col.type->name())}}); + } + _components->scylla_metadata->data.set(std::move(sstable_schema)); + write_simple(*_components->scylla_metadata); } diff --git a/sstables/types.hh b/sstables/types.hh index d2f4fc8407..4fccf27c11 100644 --- a/sstables/types.hh +++ b/sstables/types.hh @@ -27,6 +27,7 @@ #include "version.hh" #include "encoding_stats.hh" #include "types_fwd.hh" +#include "schema/schema_fwd.hh" // While the sstable code works with char, bytes_view works with int8_t // (signed char). Rather than change all the code, let's do a cast. @@ -542,6 +543,7 @@ enum class scylla_metadata_type : uint32_t { ScyllaVersion = 8, ExtTimestampStats = 9, SSTableIdentifier = 10, + Schema = 11, }; // UUID is used for uniqueness across nodes, such that an imported sstable @@ -598,6 +600,38 @@ enum class ext_timestamp_stats_type : uint32_t { min_live_row_marker_timestamp = 2, }; +// Mirrors column_kind from schema.hh +// Not reusing said enum because this enum is ABI, it must have a defined +// integer storage type and defined values for each member. This kind of +// restrictions are hard to enforce on an enum in a seemingly unrelated part +// of the code. +enum class sstable_column_kind : uint8_t { + partition_key = 1, + clustering_key = 2, + static_column = 3, + regular_column = 4, +}; + +struct sstable_column_description { + sstable_column_kind kind; + disk_string name; + disk_string type; + + template + auto describe_type(sstable_version_types v, Describer f) { return f(kind, name, type); } +}; + +struct sstable_schema_type { + table_id id; + table_schema_version version; + disk_string keyspace_name; + disk_string table_name; + disk_array columns; + + template + auto describe_type(sstable_version_types v, Describer f) { return f(id, version, keyspace_name, table_name, columns); } +}; + struct scylla_metadata { using extension_attributes = disk_hash, disk_string>; using large_data_stats = disk_hash; @@ -606,6 +640,7 @@ struct scylla_metadata { using scylla_version = disk_string; using ext_timestamp_stats = disk_hash; using sstable_identifier = sstable_identifier_type; + using sstable_schema = sstable_schema_type; disk_set_of_tagged_union, @@ -617,7 +652,8 @@ struct scylla_metadata { disk_tagged_union_member, disk_tagged_union_member, disk_tagged_union_member, - disk_tagged_union_member + disk_tagged_union_member, + disk_tagged_union_member > data; sstable_enabled_features get_features() const { diff --git a/tools/scylla-sstable.cc b/tools/scylla-sstable.cc index b99e912d4f..c6923f9a19 100644 --- a/tools/scylla-sstable.cc +++ b/tools/scylla-sstable.cc @@ -1320,6 +1320,7 @@ const char* to_string(sstables::scylla_metadata_type t) { case sstables::scylla_metadata_type::ScyllaBuildId: return "scylla_build_id"; case sstables::scylla_metadata_type::ExtTimestampStats: return "ext_timestamp_stats"; case sstables::scylla_metadata_type::SSTableIdentifier: return "sstable_identifier"; + case sstables::scylla_metadata_type::Schema: return "schema"; } std::abort(); } @@ -1442,9 +1443,54 @@ public: (*this)(m.value); } + template + void operator()(const sstables::disk_array& a) const { + _writer.StartArray(); + for (const auto& element : a.elements) { + (*this)(element); + } + _writer.EndArray(); + } + void operator()(const sstables::scylla_metadata::sstable_identifier& sid) const { _writer.AsString(sid.value); } + + void operator()(const sstables::sstable_column_description& cd) const { + _writer.StartObject(); + + _writer.Key("kind"); + _writer.Int64(static_cast(cd.kind)); + + _writer.Key("name"); + _writer.String(disk_string_to_string(cd.name)); + + _writer.Key("type"); + _writer.String(disk_string_to_string(cd.type)); + + _writer.EndObject(); + } + + void operator()(const sstables::scylla_metadata::sstable_schema& s) const { + _writer.StartObject(); + + _writer.Key("id"); + _writer.String(fmt::to_string(s.id)); + + _writer.Key("version"); + _writer.String(fmt::to_string(s.version)); + + _writer.Key("keyspace_name"); + _writer.String(disk_string_to_string(s.keyspace_name)); + + _writer.Key("table_name"); + _writer.String(disk_string_to_string(s.table_name)); + + _writer.Key("columns"); + (*this)(s.columns); + + _writer.EndObject(); + } }; void dump_scylla_metadata_operation(schema_ptr schema, reader_permit permit, const std::vector& sstables,