/* * Copyright (C) 2021-present ScyllaDB */ /* * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0 */ #include #include "data_dictionary.hh" #include "cql3/description.hh" #include "data_dictionary/consistency_config_options.hh" #include "impl.hh" #include "user_types_metadata.hh" #include "keyspace_metadata.hh" #include "schema/schema.hh" #include "cql3/util.hh" #include "gms/feature_service.hh" #include #include #include #include #include #include #include "replica/database.hh" #include "utils/overloaded_functor.hh" namespace data_dictionary { schema_ptr table::schema() const { return _ops->get_table_schema(*this); } const std::vector& table::views() const { return _ops->get_table_views(*this); } const secondary_index::secondary_index_manager& table::get_index_manager() const { return _ops->get_index_manager(*this); } db_clock::time_point table::get_truncation_time() const { return _ops->get_truncation_time(*this); } lw_shared_ptr keyspace::metadata() const { return _ops->get_keyspace_metadata(*this); } const user_types_metadata& keyspace::user_types() const { return metadata()->user_types(); } bool keyspace::is_internal() const { return _ops->is_internal(*this); } bool keyspace::uses_tablets() const { return metadata()->uses_tablets(); } const locator::abstract_replication_strategy& keyspace::get_replication_strategy() const { return _ops->get_replication_strategy(*this); } const table_schema_version& database::get_version() const { return _ops->get_version(*this); } std::optional database::try_find_keyspace(std::string_view name) const { return _ops->try_find_keyspace(*this, name); } bool database::has_keyspace(std::string_view name) const { return bool(try_find_keyspace(name)); } keyspace database::find_keyspace(std::string_view name) const { auto ks = try_find_keyspace(name); if (!ks) { throw no_such_keyspace(name); } return *ks; } std::vector database::get_keyspaces() const { return _ops->get_keyspaces(*this); } std::vector database::get_user_keyspaces() const { return _ops->get_user_keyspaces(*this); } std::vector database::get_all_keyspaces() const { return _ops->get_all_keyspaces(*this); } std::vector database::get_tables() const { return _ops->get_tables(*this); } std::optional
database::try_find_table(std::string_view ks, std::string_view table) const { return _ops->try_find_table(*this, ks, table); } table database::find_table(std::string_view ks, std::string_view table) const { auto t = try_find_table(ks, table); if (!t) { throw no_such_column_family(ks, table); } return *t; } std::optional
database::try_find_table(table_id id) const { return _ops->try_find_table(*this, id); } table database::find_column_family(table_id uuid) const { auto t = try_find_table(uuid); if (!t) { throw no_such_column_family(uuid); } return *t; } schema_ptr database::find_schema(std::string_view ks, std::string_view table) const { return find_table(ks, table).schema(); } schema_ptr database::find_schema(table_id uuid) const { return find_column_family(uuid).schema(); } bool database::has_schema(std::string_view ks_name, std::string_view cf_name) const { return bool(try_find_table(ks_name, cf_name)); } table database::find_column_family(schema_ptr s) const { return find_column_family(s->id()); } schema_ptr database::find_indexed_table(std::string_view ks_name, std::string_view index_name) const { return _ops->find_indexed_table(*this, ks_name, index_name); } sstring database::get_available_index_name(std::string_view ks_name, std::string_view table_name, std::optional index_name_root) const { return _ops->get_available_index_name(*this, ks_name, table_name, index_name_root); } std::set database::existing_index_names(std::string_view ks_name, std::string_view cf_to_exclude) const { return _ops->existing_index_names(*this, ks_name, cf_to_exclude); } schema_ptr database::get_cdc_base_table(std::string_view ks_name, std::string_view table_name) const { return get_cdc_base_table(*find_table(ks_name, table_name).schema()); } schema_ptr database::get_cdc_base_table(const schema& s) const { return _ops->get_cdc_base_table(*this, s); } const db::extensions& database::extensions() const { return _ops->get_extensions(*this); } const gms::feature_service& database::features() const { return _ops->get_features(*this); } const db::config& database::get_config() const { return _ops->get_config(*this); } replica::database& database::real_database() const { return _ops->real_database(*this); } replica::database* database::real_database_ptr() const { return _ops->real_database_ptr(*this); } impl::~impl() = default; keyspace_metadata::keyspace_metadata(std::string_view name, std::string_view strategy_name, locator::replication_strategy_config_options strategy_options, std::optional initial_tablets, std::optional consistency_option, bool durable_writes, std::vector cf_defs, user_types_metadata user_types, storage_options storage_opts) : _name{name} , _strategy_name{locator::abstract_replication_strategy::to_qualified_class_name(strategy_name.empty() ? "NetworkTopologyStrategy" : strategy_name)} , _strategy_options{std::move(strategy_options)} , _initial_tablets(initial_tablets) , _durable_writes{durable_writes} , _user_types{std::move(user_types)} , _storage_options(make_lw_shared(std::move(storage_opts))) , _consistency_option(consistency_option) { for (auto&& s : cf_defs) { _cf_meta_data.emplace(s->cf_name(), s); } } void keyspace_metadata::validate(const gms::feature_service& fs, const locator::topology& topology) const { using namespace locator; locator::replication_strategy_params params(strategy_options(), initial_tablets(), consistency_option()); auto strategy = locator::abstract_replication_strategy::create_replication_strategy(strategy_name(), params, topology); strategy->validate_options(fs, topology); if (!params.initial_tablets && params.consistency.value_or(data_dictionary::consistency_config_option::eventual) != data_dictionary::consistency_config_option::eventual) { throw exceptions::configuration_exception("Only eventual consistency is supported for non-tablet keyspaces"); } if (params.consistency && !fs.strongly_consistent_tables) { throw exceptions::configuration_exception("The strongly_consistent_tables feature must be enabled to use a consistency option"); } if (params.consistency && *params.consistency == data_dictionary::consistency_config_option::global) { throw exceptions::configuration_exception("Global consistency is not supported yet"); } } locator::replication_strategy_config_options keyspace_metadata::strategy_options_v1() const { auto opts = _strategy_options; for (auto& [key, value] : opts) { if (std::holds_alternative(value)) { opts[key] = to_sstring(std::get(value).size()); } } return opts; } lw_shared_ptr keyspace_metadata::new_keyspace(std::string_view name, std::string_view strategy_name, locator::replication_strategy_config_options options, std::optional initial_tablets, std::optional consistency_option, bool durables_writes, storage_options storage_opts, std::vector cf_defs) { return ::make_lw_shared(name, strategy_name, options, initial_tablets, consistency_option, durables_writes, cf_defs, user_types_metadata{}, storage_opts); } lw_shared_ptr keyspace_metadata::new_keyspace(const keyspace_metadata& ksm) { return new_keyspace(ksm.name(), ksm.strategy_name(), ksm.strategy_options(), ksm.initial_tablets(), ksm.consistency_option(), ksm.durable_writes(), ksm.get_storage_options()); } void keyspace_metadata::add_user_type(const user_type ut) { _user_types.add_type(ut); } void keyspace_metadata::remove_user_type(const user_type ut) { _user_types.remove_type(ut); } std::vector keyspace_metadata::tables() const { return _cf_meta_data | std::views::values | std::views::filter([] (const auto& s) { return !s->is_view(); }) | std::ranges::to>(); } std::vector keyspace_metadata::views() const { return _cf_meta_data | std::views::values | std::views::filter([] (const auto& s) { return s->is_view(); }) | std::views::transform([] (auto& s) { return view_ptr(s); }) | std::ranges::to>(); } static storage_options::local local_from_map(const std::map& values) { if (!values.empty()) { throw std::runtime_error("Local storage does not accept any custom options"); } return {}; } std::map storage_options::local::to_map() const { return {}; } std::string_view storage_options::local::name() const { return LOCAL_NAME; } static storage_options::object_storage object_storage_from_map(std::string_view type, const std::map& values) { storage_options::object_storage options; const std::array, 2> allowed_options { std::make_pair("bucket", &options.bucket), std::make_pair("endpoint", &options.endpoint), }; for (auto& option : allowed_options) { if (auto it = values.find(option.first); it != values.end()) { *option.second = it->second; } else { throw std::runtime_error(fmt::format("Missing {} option: {}", type, option.first)); } } if (values.size() > allowed_options.size()) { throw std::runtime_error(fmt::format("Extraneous options for {}: {}; allowed: {}", fmt::join(values | std::views::keys, ","), type, fmt::join(allowed_options | std::views::keys, ","))); } options.type = std::string(type); return options; } std::map storage_options::object_storage::to_map() const { return {{"bucket", bucket}, {"endpoint", endpoint}}; } std::string_view storage_options::object_storage::name() const { return type; } bool storage_options::object_storage::operator==(const object_storage&) const = default; bool storage_options::is_local_type() const noexcept { return std::holds_alternative(value); } bool storage_options::is_object_storage_type() const noexcept { return std::holds_alternative(value); } bool storage_options::is_s3_type() const noexcept { return is_object_storage_type() && type_string() == S3_NAME; } bool storage_options::is_gs_type() const noexcept { return is_object_storage_type() && type_string() == GS_NAME; } const std::string storage_options::LOCAL_NAME = "LOCAL"; const std::string storage_options::S3_NAME = "S3"; const std::string storage_options::GS_NAME = "GS"; storage_options::value_type storage_options::from_map(std::string_view type, const std::map& values) { if (type == LOCAL_NAME) { return local_from_map(values); } if (type == S3_NAME || type == GS_NAME) { return object_storage_from_map(type, values); } throw std::runtime_error(fmt::format("Unknown storage type: {}", type)); } std::string_view storage_options::type_string() const { return std::visit([] (auto& opt) { return opt.name(); }, value); } std::map storage_options::to_map() const { return std::visit([] (auto& opt) { return opt.to_map(); }, value); } bool storage_options::can_update_to(const storage_options& new_options) { return value == new_options.value; } storage_options storage_options::append_to_object_storage_prefix(const sstring& s) const { // when restoring from object storage, the API of /storage_service/restore // provides: // 1. a shared prefix // 2. a list of sstables, each of which has its own partial path // // for example, assuming we have following API call: // - shared prefix: /bucket/ks/cf // - sstables // - 3123/me-3gdq_0bki_2cvk01yl83nj0tp5gh-big-TOC.txt // - 3123/me-3gdq_0bki_2edkg2vx4xtksugjj5-big-TOC.txt // - 3245/me-3gdq_0bki_2cvk02wubgncy8qd41-big-TOC.txt // // note, this example shows three sstables from two different snapshot backups. // // we assume all sstables' locations share the same base prefix (storage_options::object_storage::prefix). // however, sstable in different backups have different prefixes. to handle this, we compose // a per-sstable prefix by concatenating the shared prefix and the "parent directory" of the // sstable's location. the resulting structure looks like: // // sstables: // - prefix: /bucket/ks/cf/3123 // desc: me-3gdq_0bki_2cvk01yl83nj0tp5gh-big // - prefix: /bucket/ks/cf/3123 // desc: me-3gdq_0bki_2edkg2vx4xtksugjj5-big // - prefix: /bucket/ks/cf/3145 // desc: me-3gdq_0bki_2cvk02wubgncy8qd41-big SCYLLA_ASSERT(!is_local_type()); storage_options ret = *this; if (s.empty()) { // scylla-manager should always pass sstables with non-empty dirname, // but still.. return ret; } object_storage options = std::get(value); SCYLLA_ASSERT(std::holds_alternative(options.location)); sstring prefix = std::get(options.location); options.location = seastar::format("{}/{}", prefix, s); ret.value = std::move(options); return ret; } storage_options make_local_options(std::filesystem::path dir) { storage_options so; so.value = data_dictionary::storage_options::local { .dir = std::move(dir) }; return so; } static std::string fqn_type(const std::string& fqn) { auto i = fqn.find_first_of(':'); return fqn.substr(0, i) | std::views::transform(&toupper) | std::ranges::to(); } storage_options make_object_storage_options(const std::string& endpoint, const std::string& fqn, abort_source* as) { std::string bucket; std::string object; auto type = fqn_type(fqn); object_storage_fqn_to_parts(fqn, type, bucket, object); object = std::filesystem::path(object).parent_path().string(); // remove the filename and trailing separator from the path return make_object_storage_options(endpoint, type, bucket, object, as); } storage_options make_object_storage_options(const std::string& endpoint, const std::string& type, const std::string& bucket, const std::string& prefix, abort_source* as) { storage_options so; storage_options::object_storage os{ .bucket = std::move(bucket), .endpoint = endpoint, .location = std::move(prefix), .abort_source = as, .type = type | std::views::transform(&toupper) | std::ranges::to() }; so.value = std::move(os); return so; } namespace fs = std::filesystem; using namespace std::string_literals; static fs::path object_store_canonicalize(const fs::path& path, std::string_view type) { if (!is_object_storage_fqn(path, type) || path.string().length() < (type.length() + 2)) { return path; } // Canonicalizing the original "://" changes it to ":/". Trim and re-add the "type://" prefix. auto canonical = path.lexically_normal().string().substr(type.length() + 2); return (type | std::views::transform(&tolower) | std::ranges::to()) + "://"s + canonical; } bool is_object_storage_fqn(const fs::path& fqn, std::string_view type) { if (fqn.empty()) { return false; } std::string tmp = *(fqn.begin()); return tmp.size() == (type.size() + 1) // additional ':' && tmp.back() == ':' // allow case insensitive checks, like type=S3 as well as type=s3. Only because ::name // members (history) are upper case. && std::equal(tmp.begin(), tmp.begin() + type.size(), type.begin(), [](char c1, char c2) { return ::tolower(c1) == ::tolower(c2); }) ; } bool object_storage_fqn_to_parts(const fs::path& fqn, std::string_view type, std::string& bucket_name, std::string& object_name) { if (!is_object_storage_fqn(fqn, type)) { return false; } const auto canonical = object_store_canonicalize(fqn, type); auto it = canonical.begin(); // Expect at least two components: the scheme (e.g., "s3:") and the bucket name. if (std::distance(it, canonical.end()) < 2) { return false; } // Skip the scheme component. ++it; // The next component is the bucket name. bucket_name = it->string(); // Advance to check for object parts. ++it; if (it == canonical.end()) { // No object parts – default to root. object_name = "/"; return true; } // Combine remaining parts into the object path. fs::path obj; for (; it != canonical.end(); ++it) { obj /= *it; } object_name = obj.string().empty() ? "/" : obj.string(); return true; } no_such_keyspace::no_such_keyspace(std::string_view ks_name) : runtime_error{fmt::format("Can't find a keyspace {}", ks_name)} { } no_such_column_family::no_such_column_family(const table_id& uuid) : runtime_error{fmt::format("Can't find a column family with UUID {}", uuid)} { } no_such_column_family::no_such_column_family(std::string_view ks_name, std::string_view cf_name) : runtime_error{fmt::format("Can't find a column family {} in keyspace {}", cf_name, ks_name)} { } no_such_column_family::no_such_column_family(std::string_view ks_name, const table_id& uuid) : runtime_error{fmt::format("Can't find a column family with UUID {} in keyspace {}", uuid, ks_name)} { } cql3::description keyspace_metadata::describe(const replica::database& db, cql3::with_create_statement with_create_statement) const { auto maybe_create_statement = std::invoke([&] -> std::optional { if (!with_create_statement) { return std::nullopt; } fragmented_ostringstream os; os << "CREATE KEYSPACE " << cql3::util::maybe_quote(_name) << " WITH replication = {'class': " << cql3::util::single_quote(_strategy_name); for (const auto& opt: _strategy_options) { os << ", " << cql3::util::single_quote(opt.first) << ": "; std::visit(overloaded_functor{ [&os] (const sstring& str) { os << cql3::util::single_quote(str); }, [&os] (const std::vector& vec) { os << "["; for (auto it = vec.begin(); it != vec.end(); ++it) { if (it != vec.begin()) { os << ", "; } os << cql3::util::single_quote(*it); } os << "]"; } }, opt.second); } if (!_storage_options->is_local_type()) { os << "} AND storage = {'type': " << cql3::util::single_quote(sstring(_storage_options->type_string())); for (const auto& e : _storage_options->to_map()) { os << ", " << cql3::util::single_quote(e.first) << ": " << cql3::util::single_quote(e.second); } } os << "} AND durable_writes = " << fmt::to_string(_durable_writes); if (db.features().tablets) { if (_consistency_option) { os << " AND consistency = " << cql3::util::single_quote(consistency_config_option_to_string(*_consistency_option)); } if (!_initial_tablets.has_value()) { os << " AND tablets = {'enabled': false}"; } else { os << format(" AND tablets = {{'enabled': true{}}}", _initial_tablets.value() ? format(", 'initial': {}", _initial_tablets.value()) : ""); } } os << ";"; return std::move(os).to_managed_string(); }); return cql3::description { .keyspace = name(), .type = "keyspace", .name = name(), .create_statement = std::move(maybe_create_statement) }; } consistency_config_option consistency_config_option_from_string(const seastar::sstring& str) { if (str == "eventual") { return consistency_config_option::eventual; } else if (str == "local") { return consistency_config_option::local; } else if (str == "global") { return consistency_config_option::global; } else { throw exceptions::configuration_exception(fmt::format("Consistency option must be one of 'eventual', 'local', or 'global'; found: {}", str)); } } seastar::sstring consistency_config_option_to_string(consistency_config_option option) { switch (option) { case consistency_config_option::eventual: return "eventual"; case consistency_config_option::local: return "local"; case consistency_config_option::global: return "global"; } } } // namespace data_dictionary template <> struct fmt::formatter { constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); } auto format(const data_dictionary::user_types_metadata& m, fmt::format_context& ctx) const -> decltype(ctx.out()) { return fmt::format_to(ctx.out(), "org.apache.cassandra.config.UTMetaData@{}", fmt::ptr(&m)); } }; auto fmt::formatter::format(const data_dictionary::keyspace_metadata& m, fmt::format_context& ctx) const -> decltype(ctx.out()) { fmt::format_to(ctx.out(), "KSMetaData{{name={}, strategyClass={}, strategyOptions={}, cfMetaData={}, durable_writes={}, tablets=", m.name(), m.strategy_name(), m.strategy_options(), m.cf_meta_data(), m.durable_writes()); if (m.initial_tablets()) { if (auto initial_tablets = m.initial_tablets().value()) { fmt::format_to(ctx.out(), "{{\"initial\":{}}}", initial_tablets); } else { fmt::format_to(ctx.out(), "{{\"enabled\":true}}"); } } else { fmt::format_to(ctx.out(), "{{\"enabled\":false}}"); } return fmt::format_to(ctx.out(), ", userTypes={}}}", m.user_types()); } auto fmt::formatter::format(const data_dictionary::storage_options& so, fmt::format_context& ctx) const -> decltype(ctx.out()) { auto type = so.type_string() | std::views::transform(&tolower) | std::ranges::to(); return std::visit(overloaded_functor { [&ctx] (const data_dictionary::storage_options::local& so) -> decltype(ctx.out()) { return fmt::format_to(ctx.out(), "{}", so.dir); }, [&ctx, &type] (const data_dictionary::storage_options::object_storage& so) -> decltype(ctx.out()) { return std::visit(overloaded_functor { [&] (const sstring& prefix) -> decltype(ctx.out()) { return fmt::format_to(ctx.out(), "{}://{}/{}", type, so.bucket, prefix); }, [&] (const table_id& owner) -> decltype(ctx.out()) { return fmt::format_to(ctx.out(), "{}://{} (owner {})", type, so.bucket, owner); } }, so.location); } }, so.value); }