From fb14fae79bd0a66bf8645b1710ffb34ee2bac29d Mon Sep 17 00:00:00 2001 From: Piotr Sarna Date: Fri, 23 Oct 2020 17:18:02 +0200 Subject: [PATCH] Merge 'Backport PR #7469 to 4.2' from Eliran Sinvani This is a backport of PR #7469 that did not apply cleanly to 4.2 with a trivial conflict, another commit that touched one of the files but in a completely different region. Closes #7480 * github.com:scylladb/scylla: materialized views: add a base table reference if missing view info: support partial match between base and view for only reading from view. view info: guard against null dereference of the base info (cherry picked from commit c74ba1bc36844857e018c17ec80061312300adfc) --- db/view/view.cc | 86 +++++++++++++++++++++++++++++++----- db/view/view.hh | 25 +++++++++-- service/migration_manager.cc | 14 ++++++ 3 files changed, 111 insertions(+), 14 deletions(-) diff --git a/db/view/view.cc b/db/view/view.cc index 8051b360ca..3ad5eeab69 100644 --- a/db/view/view.cc +++ b/db/view/view.cc @@ -134,22 +134,86 @@ void view_info::set_base_info(db::view::base_info_ptr base_info) { _base_info = std::move(base_info); } +// A constructor for a base info that can facilitate reads and writes from the materialized view. +db::view::base_dependent_view_info::base_dependent_view_info(schema_ptr base_schema, std::vector&& base_non_pk_columns_in_view_pk) + : _base_schema{std::move(base_schema)} + , _base_non_pk_columns_in_view_pk{std::move(base_non_pk_columns_in_view_pk)} + , has_base_non_pk_columns_in_view_pk{!_base_non_pk_columns_in_view_pk.empty()} + , use_only_for_reads{false} { + +} + +// A constructor for a base info that can facilitate only reads from the materialized view. +db::view::base_dependent_view_info::base_dependent_view_info(bool has_base_non_pk_columns_in_view_pk) + : _base_schema{nullptr} + , has_base_non_pk_columns_in_view_pk{has_base_non_pk_columns_in_view_pk} + , use_only_for_reads{true} { +} + +const std::vector& db::view::base_dependent_view_info::base_non_pk_columns_in_view_pk() const { + if (use_only_for_reads) { + on_internal_error(vlogger, "base_non_pk_columns_in_view_pk(): operation unsupported when initialized only for view reads."); + } + return _base_non_pk_columns_in_view_pk; +} + +const schema_ptr& db::view::base_dependent_view_info::base_schema() const { + if (use_only_for_reads) { + on_internal_error(vlogger, "base_schema(): operation unsupported when initialized only for view reads."); + } + return _base_schema; +} + db::view::base_info_ptr view_info::make_base_dependent_view_info(const schema& base) const { std::vector base_non_pk_columns_in_view_pk; + bool has_base_non_pk_columns_in_view_pk = false; + bool can_only_read_from_view = false; + for (auto&& view_col : boost::range::join(_schema.partition_key_columns(), _schema.clustering_key_columns())) { + if (view_col.is_computed()) { + // we are not going to find it in the base table... + continue; + } auto* base_col = base.get_column_definition(view_col.name()); if (base_col && !base_col->is_primary_key()) { base_non_pk_columns_in_view_pk.push_back(base_col->id); + has_base_non_pk_columns_in_view_pk = true; + } else if (!base_col) { + // If we didn't find the column in the base column then it must have been deleted + // or not yet added (by alter command), this means it is for sure not a pk column + // in the base table. This can happen if the version of the base schema is not the + // one that the view was created with. Seting this schema as the base can't harm since + // if we got to such a situation then it means it is only going to be used for reading + // (computation of shadowable tombstones) and in that case the existence of such a column + // is the only thing that is of interest to us. + has_base_non_pk_columns_in_view_pk = true; + can_only_read_from_view = true; + + // We can break the loop here since we have the info we wanted and the list + // of columns is not going to be reliable anyhow. + break; } } - return make_lw_shared({ - .base_schema = base.shared_from_this(), - .base_non_pk_columns_in_view_pk = std::move(base_non_pk_columns_in_view_pk) - }); + + if (can_only_read_from_view) { + return make_lw_shared(has_base_non_pk_columns_in_view_pk); + } else { + return make_lw_shared(base.shared_from_this(), std::move(base_non_pk_columns_in_view_pk)); + } + } bool view_info::has_base_non_pk_columns_in_view_pk() const { - return !_base_info->base_non_pk_columns_in_view_pk.empty(); + // The base info is not always available, this is because + // the base info initialization is separate from the view + // info construction. If we are trying to get this info without + // initializing the base information it means that we have a + // schema integrity problem as the creator of owning view schema + // didn't make sure to initialize it with base information. + if (!_base_info) { + on_internal_error(vlogger, "Tried to perform a view query which is base info dependant without initializing it"); + } + return _base_info->has_base_non_pk_columns_in_view_pk; } namespace db { @@ -254,7 +318,7 @@ public: explicit view_updates(view_and_base vab) : _view(std::move(vab.view)) , _view_info(*_view->view_info()) - , _base(vab.base->base_schema) + , _base(vab.base->base_schema()) , _base_info(vab.base) , _updates(8, partition_key::hashing(*_view), partition_key::equality(*_view)) { } @@ -317,7 +381,7 @@ row_marker view_updates::compute_row_marker(const clustering_row& base_row) cons // they share liveness information. It's true especially in the only case currently allowed by CQL, // which assumes there's up to one non-pk column in the view key. It's also true in alternator, // which does not carry TTL information. - const auto& col_ids = _base_info->base_non_pk_columns_in_view_pk; + const auto& col_ids = _base_info->base_non_pk_columns_in_view_pk(); if (!col_ids.empty()) { auto& def = _base->regular_column_at(col_ids[0]); // Note: multi-cell columns can't be part of the primary key. @@ -548,7 +612,7 @@ void view_updates::delete_old_entry(const partition_key& base_key, const cluster void view_updates::do_delete_old_entry(const partition_key& base_key, const clustering_row& existing, const clustering_row& update, gc_clock::time_point now) { auto& r = get_view_row(base_key, existing); - const auto& col_ids = _base_info->base_non_pk_columns_in_view_pk; + const auto& col_ids = _base_info->base_non_pk_columns_in_view_pk(); if (!col_ids.empty()) { // We delete the old row using a shadowable row tombstone, making sure that // the tombstone deletes everything in the row (or it might still show up). @@ -689,7 +753,7 @@ void view_updates::generate_update( return; } - const auto& col_ids = _base_info->base_non_pk_columns_in_view_pk; + const auto& col_ids = _base_info->base_non_pk_columns_in_view_pk(); if (col_ids.empty()) { // The view key is necessarily the same pre and post update. if (existing && existing->is_live(*_base)) { @@ -947,10 +1011,10 @@ future> generate_view_updates( flat_mutation_reader&& updates, flat_mutation_reader_opt&& existings) { auto vs = boost::copy_range>(views_to_update | boost::adaptors::transformed([&] (view_and_base v) { - if (base->version() != v.base->base_schema->version()) { + if (base->version() != v.base->base_schema()->version()) { on_internal_error(vlogger, format("Schema version used for view updates ({}) does not match the current" " base schema version of the view ({}) for view {}.{} of {}.{}", - base->version(), v.base->base_schema->version(), v.view->ks_name(), v.view->cf_name(), base->ks_name(), base->cf_name())); + base->version(), v.base->base_schema()->version(), v.view->ks_name(), v.view->cf_name(), base->ks_name(), base->cf_name())); } return view_updates(std::move(v)); })); diff --git a/db/view/view.hh b/db/view/view.hh index 712cd8ed19..048589c7c7 100644 --- a/db/view/view.hh +++ b/db/view/view.hh @@ -48,11 +48,30 @@ namespace view { // This structure may change even though the view schema doesn't change, so // it needs to live outside view_ptr. struct base_dependent_view_info { - schema_ptr base_schema; - +private: + schema_ptr _base_schema; // Id of a regular base table column included in the view's PK, if any. // Scylla views only allow one such column, alternator can have up to two. - std::vector base_non_pk_columns_in_view_pk; + std::vector _base_non_pk_columns_in_view_pk; +public: + const std::vector& base_non_pk_columns_in_view_pk() const; + const schema_ptr& base_schema() const; + + // Indicates if the view hase pk columns which are not part of the base + // pk, it seems that !base_non_pk_columns_in_view_pk.empty() is the same, + // but actually there are cases where we can compute this boolean without + // succeeding to reliably build the former. + const bool has_base_non_pk_columns_in_view_pk; + + // If base_non_pk_columns_in_view_pk couldn't reliably be built, this base + // info can't be used for computing view updates, only for reading the materialized + // view. + const bool use_only_for_reads; + + // A constructor for a base info that can facilitate reads and writes from the materialized view. + base_dependent_view_info(schema_ptr base_schema, std::vector&& base_non_pk_columns_in_view_pk); + // A constructor for a base info that can facilitate only reads from the materialized view. + base_dependent_view_info(bool has_base_non_pk_columns_in_view_pk); }; // Immutable snapshot of view's base-schema-dependent part. diff --git a/service/migration_manager.cc b/service/migration_manager.cc index 138e720f37..bd5d7f3d18 100644 --- a/service/migration_manager.cc +++ b/service/migration_manager.cc @@ -1104,6 +1104,20 @@ future get_schema_definition(table_schema_version v, netw::messaging mlogger.debug("Requesting schema {} from {}", v, dst); auto& ms = netw::get_local_messaging_service(); return ms.send_get_schema_version(dst, v); + }).then([] (schema_ptr s) { + // If this is a view so this schema also needs a reference to the base + // table. + if (s->is_view()) { + if (!s->view_info()->base_info()) { + auto& db = service::get_local_storage_proxy().get_db().local(); + // This line might throw a no_such_column_family + // It should be fine since if we tried to register a view for which + // we don't know the base table, our registry is broken. + schema_ptr base_schema = db.find_schema(s->view_info()->base_id()); + s->view_info()->set_base_info(s->view_info()->make_base_dependent_view_info(*base_schema)); + } + } + return s; }); }