diff --git a/cql3/Cql.g b/cql3/Cql.g index 7cb7082738..f3911dc5f5 100644 --- a/cql3/Cql.g +++ b/cql3/Cql.g @@ -1531,12 +1531,22 @@ inMarkerForTuple returns [shared_ptr marker] | ':' name=ident { $marker = new_tuple_in_bind_variables(name); } ; -comparatorType returns [shared_ptr t] - : n=native_type { $t = cql3_type::raw::from(n); } - | c=collection_type { $t = c; } - | tt=tuple_type { $t = tt; } +// The comparator_type rule is used for users' queries (internal=false) +// and for internal calls from db::cql_type_parser::parse() (internal=true). +// The latter is used for reading schemas stored in the system tables, and +// may support additional column types that cannot be created through CQL, +// but only internally through code. Today the only such type is "empty": +// Scylla code internally creates columns with type "empty" or collections +// "empty" to represent unselected columns in materialized views. +// If a user (internal=false) tries to use "empty" as a type, it is treated - +// as do all unknown types - as an attempt to use a user-defined type, and +// we report this name is reserved (as for _reserved_type_names()). +comparator_type [bool internal] returns [shared_ptr t] + : n=native_or_internal_type[internal] { $t = cql3_type::raw::from(n); } + | c=collection_type[internal] { $t = c; } + | tt=tuple_type[internal] { $t = tt; } | id=userTypeName { $t = cql3::cql3_type::raw::user_type(id); } - | K_FROZEN '<' f=comparatorType '>' + | K_FROZEN '<' f=comparator_type[internal] '>' { try { $t = cql3::cql3_type::raw::frozen(f); @@ -1558,6 +1568,22 @@ comparatorType returns [shared_ptr t] #endif ; +native_or_internal_type [bool internal] returns [shared_ptr t] + : n=native_type { $t = n; } + // The "internal" types, only supported when internal==true: + | K_EMPTY { + if (internal) { + $t = cql3_type::empty; + } else { + add_recognition_error("Invalid (reserved) user type name empty"); + } + } + ; + +comparatorType returns [shared_ptr t] + : tt=comparator_type[false] { $t = tt; } + ; + native_type returns [shared_ptr t] : K_ASCII { $t = cql3_type::ascii; } | K_BIGINT { $t = cql3_type::bigint; } @@ -1582,24 +1608,24 @@ native_type returns [shared_ptr t] | K_TIME { $t = cql3_type::time; } ; -collection_type returns [shared_ptr pt] - : K_MAP '<' t1=comparatorType ',' t2=comparatorType '>' +collection_type [bool internal] returns [shared_ptr pt] + : K_MAP '<' t1=comparator_type[internal] ',' t2=comparator_type[internal] '>' { // if we can't parse either t1 or t2, antlr will "recover" and we may have t1 or t2 null. if (t1 && t2) { $pt = cql3::cql3_type::raw::map(t1, t2); } } - | K_LIST '<' t=comparatorType '>' + | K_LIST '<' t=comparator_type[internal] '>' { if (t) { $pt = cql3::cql3_type::raw::list(t); } } - | K_SET '<' t=comparatorType '>' + | K_SET '<' t=comparator_type[internal] '>' { if (t) { $pt = cql3::cql3_type::raw::set(t); } } ; -tuple_type returns [shared_ptr t] +tuple_type [bool internal] returns [shared_ptr t] @init{ std::vector> types; } : K_TUPLE '<' - t1=comparatorType { types.push_back(t1); } (',' tn=comparatorType { types.push_back(tn); })* + t1=comparator_type[internal] { types.push_back(t1); } (',' tn=comparator_type[internal] { types.push_back(tn); })* '>' { $t = cql3::cql3_type::raw::tuple(std::move(types)); } ; @@ -1625,7 +1651,7 @@ unreserved_keyword returns [sstring str] unreserved_function_keyword returns [sstring str] : u=basic_unreserved_keyword { $str = u; } - | t=native_type { $str = t->to_string(); } + | t=native_or_internal_type[true] { $str = t->to_string(); } ; basic_unreserved_keyword returns [sstring str] @@ -1810,6 +1836,8 @@ K_REPLACE: R E P L A C E; K_DETERMINISTIC: D E T E R M I N I S T I C; K_JSON: J S O N; +K_EMPTY: E M P T Y; + K_SCYLLA_TIMEUUID_LIST_INDEX: S C Y L L A '_' T I M E U U I D '_' L I S T '_' I N D E X; K_SCYLLA_COUNTER_SHARD_LIST: S C Y L L A '_' C O U N T E R '_' S H A R D '_' L I S T; diff --git a/cql3/column_identifier.cc b/cql3/column_identifier.cc index 8ebc0b5642..cffc3f18c3 100644 --- a/cql3/column_identifier.cc +++ b/cql3/column_identifier.cc @@ -127,7 +127,11 @@ column_identifier::new_selector_factory(database& db, schema_ptr schema, std::ve if (!def) { throw exceptions::invalid_request_exception(sprint("Undefined name %s in selection clause", _text)); } - + // Do not allow explicitly selecting hidden columns. We also skip them on + // "SELECT *" (see selection::wildcard()). + if (def->is_view_virtual()) { + throw exceptions::invalid_request_exception(sprint("Undefined name %s in selection clause", _text)); + } return selection::simple_selector::new_factory(def->name_as_text(), add_and_get_index(*def, defs), def->type); } diff --git a/cql3/selection/selection.cc b/cql3/selection/selection.cc index 69d884e0d7..92361f1c93 100644 --- a/cql3/selection/selection.cc +++ b/cql3/selection/selection.cc @@ -40,6 +40,7 @@ */ #include +#include #include "cql3/selection/selection.hh" #include "cql3/selection/selector_factories.hh" @@ -208,9 +209,17 @@ protected: ::shared_ptr selection::wildcard(schema_ptr schema) { auto columns = schema->all_columns_in_select_order(); - auto cds = boost::copy_range>(columns | boost::adaptors::transformed([](const column_definition& c) { - return &c; - })); + // filter out hidden columns, which should not be seen by the + // user when doing "SELECT *". We also disallow selecting them + // individually (see column_identifier::new_selector_factory()). + auto cds = boost::copy_range>( + columns | + boost::adaptors::filtered([](const column_definition& c) { + return !c.is_view_virtual(); + }) | + boost::adaptors::transformed([](const column_definition& c) { + return &c; + })); return simple_selection::make(schema, std::move(cds), true); } diff --git a/cql3/statements/alter_table_statement.cc b/cql3/statements/alter_table_statement.cc index 2f5a9fe4cd..34b088b0d5 100644 --- a/cql3/statements/alter_table_statement.cc +++ b/cql3/statements/alter_table_statement.cc @@ -246,18 +246,22 @@ future> alter_table_statement::a cfm.with_column(column_name->name(), type, _is_static ? column_kind::static_column : column_kind::regular_column); - // Adding a column to a table which has an include all view requires the column to be added to the view - // as well. If the view has a regular base column in its PK, then the column ID needs to be updated in - // view_info; for that, rebuild the schema. + // Adding a column to a base table always requires updating the view + // schemas: If the view includes all columns it should include the new + // column, but if it doesn't, it may need to include the new + // unselected column as a virtual column. The case when it we + // shouldn't add a virtual column is when the view has in its PK one + // of the base's regular columns - but even in this case we need to + // rebuild the view schema, to update the column ID. if (!_is_static) { for (auto&& view : cf.views()) { - if (view->view_info()->include_all_columns() || view->view_info()->base_non_pk_column_in_view_pk()) { - schema_builder builder(view); - if (view->view_info()->include_all_columns()) { - builder.with_column(column_name->name(), type); - } - view_updates.push_back(view_ptr(builder.build())); + schema_builder builder(view); + if (view->view_info()->include_all_columns()) { + builder.with_column(column_name->name(), type); + } else if (!view->view_info()->base_non_pk_column_in_view_pk()) { + db::view::create_virtual_column(builder, column_name->name(), type); } + view_updates.push_back(view_ptr(builder.build())); } } @@ -347,7 +351,8 @@ future> alter_table_statement::a validate_column_rename(db, *schema, *from, *to); cfm.with_column_rename(from->name(), to->name()); - // If the view includes a renamed column, it must be renamed in the view table and the definition. + // If the view includes a renamed column, it must be renamed in + // the view table and the definition. for (auto&& view : cf.views()) { if (view->get_column_definition(from->name())) { schema_builder builder(view); diff --git a/cql3/statements/create_view_statement.cc b/cql3/statements/create_view_statement.cc index 422a27b5ff..40c64e2944 100644 --- a/cql3/statements/create_view_statement.cc +++ b/cql3/statements/create_view_statement.cc @@ -275,6 +275,7 @@ future> create_view_statement::a std::vector missing_pk_columns; std::vector target_non_pk_columns; + std::vector unselected_columns; // We need to include all of the primary key columns from the base table in order to make sure that we do not // overwrite values in the view. We cannot support "collapsing" the base table into a smaller number of rows in @@ -292,6 +293,9 @@ future> create_view_statement::a if (included_def && !def_in_target_pk) { target_non_pk_columns.push_back(&def); } + if (!included_def && !def_in_target_pk && !def.is_static()) { + unselected_columns.push_back(&def); + } if (def.is_primary_key() && !def_in_target_pk) { missing_pk_columns.push_back(&def); } @@ -321,6 +325,19 @@ future> create_view_statement::a add_columns(target_partition_keys, column_kind::partition_key); add_columns(target_clustering_keys, column_kind::clustering_key); add_columns(target_non_pk_columns, column_kind::regular_column); + // Add all unselected columns (base-table columns which are not selected + // in the view) as "virtual columns" - columns which have timestamp and + // ttl information, but an empty value. These are needed to keep view + // rows alive when the base row is alive, even if the view row has no + // data, just a key (see issue #3362). The virtual columns are not needed + // when the view pk adds a regular base column (i.e., has_non_pk_column) + // because in that case, the liveness of that base column is what + // determines the liveness of the view row. + if (!has_non_pk_column) { + for (auto* def : unselected_columns) { + db::view::create_virtual_column(builder, def->name(), def->type); + } + } _properties.properties()->apply_to_builder(builder, proxy.get_db().local().get_config().extensions()); if (builder.default_time_to_live().count() > 0) { diff --git a/db/cql_type_parser.cc b/db/cql_type_parser.cc index 46dd4eeb17..2932274787 100644 --- a/db/cql_type_parser.cc +++ b/db/cql_type_parser.cc @@ -49,7 +49,10 @@ #include "types.hh" static ::shared_ptr parse_raw(const sstring& str) { - return cql3::util::do_with_parser(str, std::mem_fn(&cql3_parser::CqlParser::comparatorType)); + return cql3::util::do_with_parser(str, + [] (cql3_parser::CqlParser& parser) { + return parser.comparator_type(true); + }); } data_type db::cql_type_parser::parse(const sstring& keyspace, const sstring& str, lw_shared_ptr user_types) { diff --git a/db/schema_tables.cc b/db/schema_tables.cc index 5960dd2135..d7658b1295 100644 --- a/db/schema_tables.cc +++ b/db/schema_tables.cc @@ -160,7 +160,7 @@ static future<> do_merge_schema(distributed&, std::vecto static std::vector create_columns_from_column_rows( const query::result_set& rows, const sstring& keyspace, - const sstring& table, bool is_super); + const sstring& table, bool is_super, column_view_virtual is_view_virtual); static std::vector create_indices_from_index_rows(const query::result_set& rows, @@ -304,9 +304,20 @@ schema_ptr scylla_tables() { return schema; } -schema_ptr columns() { - static thread_local auto schema = [] { - schema_builder builder(make_lw_shared(::schema(generate_legacy_id(NAME, COLUMNS), NAME, COLUMNS, +// The "columns" table lists the definitions of all columns in all tables +// and views. Its schema needs to be identical to the one in Cassandra because +// it is the API through which drivers inspect the list of columns in a table +// (e.g., cqlsh's "DESCRIBE TABLE" and "DESCRIBE MATERIALIZED VIEW" get their +// information from the columns table). +// The "view_virtual_columns" table is an additional table with exactly the +// same schema (both are created by columns_schema()), but has a separate +// list of "virtual" columns. Those are used in materialized views for keeping +// rows without data alive (see issue #3362). These virtual columns cannot be +// listed in the regular "columns" table, otherwise the "DESCRIBE MATERIALIZED +// VIEW" would list them - while it should only list real, selected, columns. + +static schema_ptr columns_schema(const char* columns_table_name) { + schema_builder builder(make_lw_shared(::schema(generate_legacy_id(NAME, columns_table_name), NAME, columns_table_name, // partition key {{"keyspace_name", utf8_type}}, // clustering key @@ -326,10 +337,16 @@ schema_ptr columns() { // comment "column definitions" ))); - builder.set_gc_grace_seconds(schema_gc_grace); - builder.with_version(generate_schema_version(builder.uuid())); - return builder.build(); - }(); + builder.set_gc_grace_seconds(schema_gc_grace); + builder.with_version(generate_schema_version(builder.uuid())); + return builder.build(); +} +schema_ptr columns() { + static thread_local auto schema = columns_schema(COLUMNS); + return schema; +} +schema_ptr view_virtual_columns() { + static thread_local auto schema = columns_schema(VIEW_VIRTUAL_COLUMNS); return schema; } @@ -1619,6 +1636,9 @@ static schema_mutations make_table_mutations(schema_ptr table, api::timestamp_ty if (with_columns_and_triggers) { for (auto&& column : table->v3().all_columns()) { + if (column.is_view_virtual()) { + throw std::logic_error("view_virtual column found in non-view table"); + } add_column_to_schema_mutation(table, column, timestamp, columns_mutation); } for (auto&& index : table->indices()) { @@ -1631,7 +1651,8 @@ static schema_mutations make_table_mutations(schema_ptr table, api::timestamp_ty } } - return schema_mutations{std::move(m), std::move(columns_mutation), std::move(indices_mutation), std::move(dropped_columns_mutation), + return schema_mutations{std::move(m), std::move(columns_mutation), stdx::nullopt, + std::move(indices_mutation), std::move(dropped_columns_mutation), std::move(scylla_tables_mutation)}; } @@ -1690,6 +1711,7 @@ static void make_update_columns_mutations(schema_ptr old_table, bool from_thrift, std::vector& mutations) { mutation columns_mutation(columns(), partition_key::from_singular(*columns(), old_table->ks_name())); + mutation view_virtual_columns_mutation(view_virtual_columns(), partition_key::from_singular(*columns(), old_table->ks_name())); auto diff = difference(old_table->v3().columns_by_name(), new_table->v3().columns_by_name()); @@ -1701,17 +1723,25 @@ static void make_update_columns_mutations(schema_ptr old_table, if (from_thrift && !column.is_regular()) { continue; } - - drop_column_from_schema_mutation(columns(), old_table, column.name_as_text(), timestamp, mutations); + if (column.is_view_virtual()) { + drop_column_from_schema_mutation(view_virtual_columns(), old_table, column.name_as_text(), timestamp, mutations); + } else { + drop_column_from_schema_mutation(columns(), old_table, column.name_as_text(), timestamp, mutations); + } } // newly added columns and old columns with updated attributes for (auto&& name : boost::range::join(diff.entries_differing, diff.entries_only_on_right)) { const column_definition& column = *new_table->v3().columns_by_name().at(name); - add_column_to_schema_mutation(new_table, column, timestamp, columns_mutation); + if (column.is_view_virtual()) { + add_column_to_schema_mutation(new_table, column, timestamp, view_virtual_columns_mutation); + } else { + add_column_to_schema_mutation(new_table, column, timestamp, columns_mutation); + } } mutations.emplace_back(std::move(columns_mutation)); + mutations.emplace_back(std::move(view_virtual_columns_mutation)); // dropped columns auto dc_diff = difference(old_table->dropped_columns(), new_table->dropped_columns()); @@ -1761,7 +1791,11 @@ static void make_drop_table_or_view_mutations(schema_ptr schema_table, m.partition().apply_delete(*schema_table, ckey, tombstone(timestamp, gc_clock::now())); mutations.emplace_back(m); for (auto& column : table_or_view->v3().all_columns()) { - drop_column_from_schema_mutation(columns(), table_or_view, column.name_as_text(), timestamp, mutations); + if (column.is_view_virtual()) { + drop_column_from_schema_mutation(view_virtual_columns(), table_or_view, column.name_as_text(), timestamp, mutations); + } else { + drop_column_from_schema_mutation(columns(), table_or_view, column.name_as_text(), timestamp, mutations); + } } for (auto& column : table_or_view->dropped_columns() | boost::adaptors::map_keys) { drop_column_from_schema_mutation(dropped_columns(), table_or_view, column, timestamp, mutations); @@ -1796,11 +1830,12 @@ static future read_table_mutations(distributed create_columns_from_column_rows(const quer const sstring& keyspace, const sstring& table, /*, AbstractType rawComparator, */ - bool is_super) + bool is_super, + column_view_virtual is_view_virtual) { std::vector columns; for (auto&& row : rows.rows()) { @@ -2204,7 +2241,7 @@ static std::vector create_columns_from_column_rows(const quer } } - columns.emplace_back(name_bytes, type, kind, position); + columns.emplace_back(name_bytes, type, kind, position, is_view_virtual); } return columns; } @@ -2247,10 +2284,16 @@ view_ptr create_view_from_mutations(const schema_ctxt& ctxt, schema_mutations sm schema_builder builder{ks_name, cf_name, id}; prepare_builder_from_table_row(ctxt, builder, row); - auto column_defs = create_columns_from_column_rows(query::result_set(sm.columns_mutation()), ks_name, cf_name, false); + auto column_defs = create_columns_from_column_rows(query::result_set(sm.columns_mutation()), ks_name, cf_name, false, column_view_virtual::no); for (auto&& cdef : column_defs) { builder.with_column(cdef); } + if (sm.view_virtual_columns_mutation()) { + column_defs = create_columns_from_column_rows(query::result_set(*sm.view_virtual_columns_mutation()), ks_name, cf_name, false, column_view_virtual::yes); + for (auto&& cdef : column_defs) { + builder.with_column(cdef); + } + } if (version) { builder.with_version(*version); @@ -2320,12 +2363,17 @@ static schema_mutations make_view_mutations(view_ptr view, api::timestamp_type t mutation columns_mutation(columns(), pkey); + mutation view_virtual_columns_mutation(view_virtual_columns(), pkey); mutation dropped_columns_mutation(dropped_columns(), pkey); mutation indices_mutation(indexes(), pkey); if (with_columns) { for (auto&& column : view->v3().all_columns()) { - add_column_to_schema_mutation(view, column, timestamp, columns_mutation); + if (column.is_view_virtual()) { + add_column_to_schema_mutation(view, column, timestamp, view_virtual_columns_mutation); + } else { + add_column_to_schema_mutation(view, column, timestamp, columns_mutation); + } } for (auto&& e : view->dropped_columns()) { @@ -2338,7 +2386,8 @@ static schema_mutations make_view_mutations(view_ptr view, api::timestamp_type t auto scylla_tables_mutation = make_scylla_tables_mutation(view, timestamp); - return schema_mutations{std::move(m), std::move(columns_mutation), std::move(indices_mutation), std::move(dropped_columns_mutation), + return schema_mutations{std::move(m), std::move(columns_mutation), std::move(view_virtual_columns_mutation), + std::move(indices_mutation), std::move(dropped_columns_mutation), std::move(scylla_tables_mutation)}; } @@ -2634,7 +2683,7 @@ data_type parse_type(sstring str) std::vector all_tables() { return { keyspaces(), tables(), scylla_tables(), columns(), dropped_columns(), triggers(), - views(), indexes(), types(), functions(), aggregates(), + views(), indexes(), types(), functions(), aggregates(), view_virtual_columns() }; } diff --git a/db/schema_tables.hh b/db/schema_tables.hh index 56cdbc24b1..7a3e8a1de5 100644 --- a/db/schema_tables.hh +++ b/db/schema_tables.hh @@ -91,8 +91,10 @@ static constexpr auto TYPES = "types"; static constexpr auto FUNCTIONS = "functions"; static constexpr auto AGGREGATES = "aggregates"; static constexpr auto INDEXES = "indexes"; +static constexpr auto VIEW_VIRTUAL_COLUMNS = "view_virtual_columns"; // Scylla specific schema_ptr columns(); +schema_ptr view_virtual_columns(); schema_ptr dropped_columns(); schema_ptr indexes(); schema_ptr tables(); diff --git a/db/view/view.cc b/db/view/view.cc index a508221bc6..f949a99cd1 100644 --- a/db/view/view.cc +++ b/db/view/view.cc @@ -288,50 +288,6 @@ row_marker view_updates::compute_row_marker(const clustering_row& base_row) cons return cell.is_live_and_has_ttl() ? row_marker(cell.timestamp(), cell.ttl(), cell.expiry()) : row_marker(cell.timestamp()); } - if (_view_info.include_all_columns()) { - return marker; - } - - auto timestamp = marker.timestamp(); - bool has_non_expiring_live_cell = false; - expiry_opt biggest_expiry; - gc_clock::duration ttl = gc_clock::duration::min(); - if (marker.is_expiring()) { - biggest_expiry = marker.expiry(); - ttl = marker.ttl(); - } - auto maybe_update_expiry_and_ttl = [&] (atomic_cell_view&& cell) { - timestamp = std::max(timestamp, cell.timestamp()); - if (cell.is_live_and_has_ttl()) { - if (cell.expiry() >= biggest_expiry.value_or(cell.expiry())) { - biggest_expiry = cell.expiry(); - ttl = cell.ttl(); - } - } else if (cell.is_live()) { - has_non_expiring_live_cell = true; - } - }; - - // Iterate over regular cells not in the view, as we already have the timestamps of the included columns. - base_row.cells().for_each_cell([&] (column_id id, const atomic_cell_or_collection& c) { - auto& def = _base->regular_column_at(id); - if (_view_info.view_column(def)) { - return; - } - if (def.is_atomic()) { - maybe_update_expiry_and_ttl(c.as_atomic_cell(def)); - } else { - auto ctype = static_pointer_cast(def.type); - ctype->for_each_cell(c.as_collection_mutation(), maybe_update_expiry_and_ttl); - } - }); - - if ((marker.is_live() && !marker.is_expiring()) || has_non_expiring_live_cell) { - return row_marker(timestamp); - } - if (biggest_expiry) { - return row_marker(timestamp, ttl, *biggest_expiry); - } return marker; } @@ -376,10 +332,134 @@ static const column_definition* view_column(const schema& base, const schema& vi return view.get_column_definition(base.regular_column_at(base_id).name()); } +// Utility function for taking an existing cell, and creating a copy with an +// empty value instead of the original value, but with the original liveness +// information (expiration and deletion time) unchanged. +static atomic_cell make_empty(const atomic_cell_view& ac) { + if (ac.is_live_and_has_ttl()) { + return atomic_cell::make_live(*empty_type, ac.timestamp(), bytes_view{}, ac.expiry(), ac.ttl()); + } else if (ac.is_live()) { + return atomic_cell::make_live(*empty_type, ac.timestamp(), bytes_view{}); + } else { + return atomic_cell::make_dead(ac.timestamp(), ac.deletion_time()); + } +} + +// Utility function for taking an existing collection which has both keys and +// values (i.e., either a list or map, but not a set), and creating a copy of +// this collection with all the values replaced by empty values. +// The make_empty() function above is used to ensure that liveness information +// is copied unchanged. +static collection_mutation make_empty( + const collection_mutation_view& cm, + const collection_type_impl& ctype) { + collection_type_impl::mutation n; + cm.data.with_linearized([&] (bytes_view bv) { + auto m_view = ctype.deserialize_mutation_form(bv); + n.tomb = m_view.tomb; + for (auto&& c : m_view.cells) { + n.cells.emplace_back(c.first, make_empty(c.second)); + } + }); + return ctype.serialize_mutation_form(n); +} + +// In some cases, we need to copy to a view table even columns which have not +// been SELECTed. For these columns we only need to save liveness information +// (timestamp, deletion, ttl), but not the value. We call these columns +// "virtual columns", and the reason why we need them is explained in +// issue #3362. The following function, maybe_make_virtual() takes a full +// value c (taken from the base table) for the given column col, and if that +// column is a virtual column it modifies c to remove the unwanted value. +// The function create_virtual_column(), below, creates the virtual column in +// the view schema, that maybe_make_virtual() will fill. +static void maybe_make_virtual(atomic_cell_or_collection& c, const column_definition* col) { + if (!col->is_view_virtual()) { + // This is a regular selected column. Leave c untouched. + return; + } + if (col->type->is_atomic()) { + // A virtual cell for an atomic value or frozen collection. Its + // value is empty (of type empty_type). + if (col->type != empty_type) { + throw std::logic_error("Virtual cell has wrong type"); + } + c = make_empty(c.as_atomic_cell(*col)); + } else { + if (!col->type->is_collection()) { + // TODO: when we support unfrozen UDT (#2201), we will need to + // supported it here too. + throw std::logic_error("Virtual cell is neither atomic nor collection"); + } + auto ctype = static_pointer_cast(col->type); + if (ctype->is_list()) { + // A list has integers as keys, and values (the list's items). + // We just need to build a list with the same keys (and liveness + // information), but empty values. + auto ltype = static_cast(col->type.get()); + if (ltype->get_elements_type() != empty_type) { + throw std::logic_error("Virtual cell has wrong list type"); + } + c = make_empty(c.as_collection_mutation(), *ctype); + } else if (ctype->is_map()) { + // A map has keys and values. We just need to build a map with + // the same keys (and liveness information), but empty values. + auto mtype = static_cast(col->type.get()); + if (mtype->get_values_type() != empty_type) { + throw std::logic_error("Virtual cell has wrong map type"); + } + c = make_empty(c.as_collection_mutation(), *ctype); + } else if (ctype->is_set()) { + // A set has just keys (and liveness information). We need + // all of it as a virtual column, unfortunately, so we + // leave c unmodified. + } else { + // A collection can't be anything but a list, map or set... + throw std::logic_error("Virtual cell has unexpected collection type"); + } + } +} + +void create_virtual_column(schema_builder& builder, const bytes& name, const data_type& type) { + if (type->is_atomic()) { + builder.with_column(name, empty_type, column_kind::regular_column, column_view_virtual::yes); + return; + } + // A multi-cell collection (a frozen collection is a single + // cell and handled handled in the is_atomic() case above). + // The virtual version can't be just one cell, it has to be + // itself a collection of cells. + auto ctype = dynamic_pointer_cast(type); + if (!ctype) { + // TODO: When #2201 is done, we also need to handle here + // unfrozen UDTs. + throw exceptions::invalid_request_exception(sprint("Unsupported unselected multi-cell non-collection column %s for Materialized View", name)); + } + if (ctype->is_list()) { + // A list has ints as keys, and values (the list's items). + // We just need these intss, i.e., a list of empty items. + builder.with_column(name, list_type_impl::get_instance(empty_type, true), column_kind::regular_column, column_view_virtual::yes); + } else if (ctype->is_map()) { + // A map has keys and values. We don't need these values, + // and can use empty values instead. + auto mtype = dynamic_pointer_cast(type); + builder.with_column(name, map_type_impl::get_instance(mtype->get_values_type(), empty_type, true), column_kind::regular_column, column_view_virtual::yes); + } else if (ctype->is_set()) { + // A set's cell has nothing beyond the keys, so the + // virtual version of a set is, unfortunately, a complete + // copy of the set. + builder.with_column(name, type, column_kind::regular_column, column_view_virtual::yes); + } else { + // A collection can't be anything but a list, map or set... + abort(); + } +} + static void add_cells_to_view(const schema& base, const schema& view, row base_cells, row& view_cells) { base_cells.for_each_cell([&] (column_id id, atomic_cell_or_collection& c) { auto* view_col = view_column(base, view, id); if (view_col && !view_col->is_primary_key()) { + maybe_make_virtual(c, view_col); view_cells.append_cell(view_col->id, std::move(c)); } }); diff --git a/db/view/view.hh b/db/view/view.hh index 0113b596db..f098da4a1a 100644 --- a/db/view/view.hh +++ b/db/view/view.hh @@ -104,6 +104,21 @@ query::clustering_row_ranges calculate_affected_clustering_ranges( future<> mutate_MV(const dht::token& base_token, std::vector mutations, db::view::stats& stats); +/** + * create_virtual_column() adds a "virtual column" to a schema builder. + * The definition of a "virtual column" is based on the given definition + * of a regular column, except that any value types are replaced by the + * empty type - and only the information needed to track column liveness + * is kept: timestamp, deletion, ttl, and keys in maps. + * In some cases we add such virtual columns for unselected columns in + * materialized views, for reasons explained in issue #3362. + * @param builder the schema_builder where we want to add the virtual column. + * @param name the name of the virtual column to be created. + * @param type of the base column for which we want to build a virtual column. + * When type is a multi-cell collection, so will be the virtual column. + */ + void create_virtual_column(schema_builder& builder, const bytes& name, const data_type& type); + } } diff --git a/idl/frozen_schema.idl.hh b/idl/frozen_schema.idl.hh index 01e6e8b892..eb0ec3f34c 100644 --- a/idl/frozen_schema.idl.hh +++ b/idl/frozen_schema.idl.hh @@ -30,6 +30,7 @@ class schema_mutations { std::experimental::optional indices_canonical_mutation()[[version 2.0]]; std::experimental::optional dropped_columns_canonical_mutation()[[version 2.0]]; std::experimental::optional scylla_tables_canonical_mutation()[[version 2.0]]; + std::experimental::optional view_virtual_columns_canonical_mutation()[[version 2.4]]; }; class schema stub [[writable]] { diff --git a/schema.cc b/schema.cc index 62297c847c..9c9ee67c22 100644 --- a/schema.cc +++ b/schema.cc @@ -489,11 +489,12 @@ sstring index_metadata::get_default_index_name(const sstring& cf_name, return cf_name + "_idx"; } -column_definition::column_definition(bytes name, data_type type, column_kind kind, column_id component_index, api::timestamp_type dropped_at) +column_definition::column_definition(bytes name, data_type type, column_kind kind, column_id component_index, column_view_virtual is_view_virtual, api::timestamp_type dropped_at) : _name(std::move(name)) , _dropped_at(dropped_at) , _is_atomic(type->is_atomic()) , _is_counter(type->is_counter()) + , _is_view_virtual(is_view_virtual) , type(std::move(type)) , id(component_index) , kind(kind) @@ -504,6 +505,9 @@ std::ostream& operator<<(std::ostream& os, const column_definition& cd) { os << "name=" << cd.name_as_text(); os << ", type=" << cd.type->name(); os << ", kind=" << to_sstring(cd.kind); + if (cd.is_view_virtual()) { + os << ", view_virtual"; + } os << ", componentIndex=" << (cd.has_component_index() ? std::to_string(cd.component_index()) : "null"); os << ", droppedAt=" << cd._dropped_at; os << "}"; @@ -689,16 +693,16 @@ column_definition& schema_builder::find_column(const cql3::column_identifier& c) } schema_builder& schema_builder::with_column(const column_definition& c) { - return with_column(bytes(c.name()), data_type(c.type), column_kind(c.kind), c.position()); + return with_column(bytes(c.name()), data_type(c.type), column_kind(c.kind), c.position(), c.view_virtual()); } -schema_builder& schema_builder::with_column(bytes name, data_type type, column_kind kind) { +schema_builder& schema_builder::with_column(bytes name, data_type type, column_kind kind, column_view_virtual is_view_virtual) { // component_index will be determined by schema cosntructor - return with_column(name, type, kind, 0); + return with_column(name, type, kind, 0, is_view_virtual); } -schema_builder& schema_builder::with_column(bytes name, data_type type, column_kind kind, column_id component_index) { - _raw._columns.emplace_back(name, type, kind, component_index); +schema_builder& schema_builder::with_column(bytes name, data_type type, column_kind kind, column_id component_index, column_view_virtual is_view_virtual) { + _raw._columns.emplace_back(name, type, kind, component_index, is_view_virtual); if (type->is_multi_cell()) { with_collection(name, type); } else if (type->is_counter()) { diff --git a/schema.hh b/schema.hh index 48d9ea3b4c..9c9e2612f4 100644 --- a/schema.hh +++ b/schema.hh @@ -75,6 +75,8 @@ class extensions; // make sure these match the order we like columns back from schema enum class column_kind { partition_key, clustering_key, static_column, regular_column }; +enum class column_view_virtual { no, yes }; + sstring to_sstring(column_kind k); bool is_compatible(column_kind k1, column_kind k2); @@ -207,6 +209,7 @@ private: api::timestamp_type _dropped_at; bool _is_atomic; bool _is_counter; + column_view_virtual _is_view_virtual; struct thrift_bits { thrift_bits() @@ -221,6 +224,7 @@ private: public: column_definition(bytes name, data_type type, column_kind kind, column_id component_index = 0, + column_view_virtual view_virtual = column_view_virtual::no, api::timestamp_type dropped_at = api::missing_timestamp); data_type type; @@ -241,6 +245,12 @@ public: bool is_atomic() const { return _is_atomic; } bool is_multi_cell() const { return !_is_atomic; } bool is_counter() const { return _is_counter; } + // "virtual columns" appear in a materialized view as placeholders for + // unselected columns, with liveness information but without data, and + // allow view rows to remain alive despite having no data (issue #3362). + // These columns should be hidden from the user's SELECT queries. + bool is_view_virtual() const { return _is_view_virtual == column_view_virtual::yes; } + column_view_virtual view_virtual() const { return _is_view_virtual; } const sstring& name_as_text() const; const bytes& name() const; sstring name_as_cql_string() const; diff --git a/schema_builder.hh b/schema_builder.hh index 8824f78fb3..6e34f2b8ff 100644 --- a/schema_builder.hh +++ b/schema_builder.hh @@ -238,8 +238,8 @@ public: column_definition& find_column(const cql3::column_identifier&); schema_builder& with_column(const column_definition& c); - schema_builder& with_column(bytes name, data_type type, column_kind kind = column_kind::regular_column); - schema_builder& with_column(bytes name, data_type type, column_kind kind, column_id component_index); + schema_builder& with_column(bytes name, data_type type, column_kind kind = column_kind::regular_column, column_view_virtual view_virtual = column_view_virtual::no); + schema_builder& with_column(bytes name, data_type type, column_kind kind, column_id component_index, column_view_virtual view_virtual = column_view_virtual::no); schema_builder& without_column(bytes name); schema_builder& without_column(sstring name, api::timestamp_type timestamp); schema_builder& without_column(sstring name, data_type, api::timestamp_type timestamp); diff --git a/schema_mutations.cc b/schema_mutations.cc index 898652cc7a..50026baf9c 100644 --- a/schema_mutations.cc +++ b/schema_mutations.cc @@ -29,9 +29,11 @@ schema_mutations::schema_mutations(canonical_mutation columnfamilies, bool is_view, stdx::optional indices, stdx::optional dropped_columns, - stdx::optional scylla_tables) + stdx::optional scylla_tables, + stdx::optional view_virtual_columns) : _columnfamilies(columnfamilies.to_mutation(is_view ? db::schema_tables::views() : db::schema_tables::tables())) , _columns(columns.to_mutation(db::schema_tables::columns())) + , _view_virtual_columns(view_virtual_columns ? mutation_opt{view_virtual_columns.value().to_mutation(db::schema_tables::view_virtual_columns())} : stdx::nullopt) , _indices(indices ? mutation_opt{indices.value().to_mutation(db::schema_tables::indexes())} : stdx::nullopt) , _dropped_columns(dropped_columns ? mutation_opt{dropped_columns.value().to_mutation(db::schema_tables::dropped_columns())} : stdx::nullopt) , _scylla_tables(scylla_tables ? mutation_opt{scylla_tables.value().to_mutation(db::schema_tables::scylla_tables())} : stdx::nullopt) @@ -40,6 +42,9 @@ schema_mutations::schema_mutations(canonical_mutation columnfamilies, void schema_mutations::copy_to(std::vector& dst) const { dst.push_back(_columnfamilies); dst.push_back(_columns); + if (_view_virtual_columns) { + dst.push_back(*_view_virtual_columns); + } if (_indices) { dst.push_back(*_indices); } @@ -68,6 +73,9 @@ table_schema_version schema_mutations::digest() const { md5_hasher h; db::schema_tables::feed_hash_for_schema_digest(h, _columnfamilies); db::schema_tables::feed_hash_for_schema_digest(h, _columns); + if (_view_virtual_columns && !_view_virtual_columns->partition().empty()) { + db::schema_tables::feed_hash_for_schema_digest(h, *_view_virtual_columns); + } if (_indices && !_indices->partition().empty()) { db::schema_tables::feed_hash_for_schema_digest(h, *_indices); } @@ -94,6 +102,7 @@ static mutation_opt compact(const mutation& m) { bool schema_mutations::operator==(const schema_mutations& other) const { return compact(_columnfamilies) == compact(other._columnfamilies) && compact(_columns) == compact(other._columns) + && compact(_view_virtual_columns) == compact(other._view_virtual_columns) && compact(_indices) == compact(other._indices) && compact(_dropped_columns) == compact(other._dropped_columns) && compact(_scylla_tables) == compact(other._scylla_tables) @@ -105,7 +114,8 @@ bool schema_mutations::operator!=(const schema_mutations& other) const { } bool schema_mutations::live() const { - return _columnfamilies.live_row_count() > 0 || _columns.live_row_count() > 0; + return _columnfamilies.live_row_count() > 0 || _columns.live_row_count() > 0 || + (_view_virtual_columns && _view_virtual_columns->live_row_count() > 0); } bool schema_mutations::is_view() const { diff --git a/schema_mutations.hh b/schema_mutations.hh index a9d38fdabe..e355c556d2 100644 --- a/schema_mutations.hh +++ b/schema_mutations.hh @@ -31,14 +31,16 @@ class schema_mutations { mutation _columnfamilies; mutation _columns; + mutation_opt _view_virtual_columns; mutation_opt _indices; mutation_opt _dropped_columns; mutation_opt _scylla_tables; public: - schema_mutations(mutation columnfamilies, mutation columns, mutation_opt indices, mutation_opt dropped_columns, + schema_mutations(mutation columnfamilies, mutation columns, mutation_opt view_virtual_columns, mutation_opt indices, mutation_opt dropped_columns, mutation_opt scylla_tables) : _columnfamilies(std::move(columnfamilies)) , _columns(std::move(columns)) + , _view_virtual_columns(std::move(view_virtual_columns)) , _indices(std::move(indices)) , _dropped_columns(std::move(dropped_columns)) , _scylla_tables(std::move(scylla_tables)) @@ -48,7 +50,8 @@ public: bool is_view, stdx::optional indices, stdx::optional dropped_columns, - stdx::optional scylla_tables); + stdx::optional scylla_tables, + stdx::optional view_virtual_columns); schema_mutations(schema_mutations&&) = default; schema_mutations& operator=(schema_mutations&&) = default; @@ -65,6 +68,10 @@ public: return _columns; } + const mutation_opt& view_virtual_columns_mutation() const { + return _view_virtual_columns; + } + const mutation_opt& scylla_tables() const { return _scylla_tables; } @@ -88,6 +95,13 @@ public: return canonical_mutation(_columns); } + stdx::optional view_virtual_columns_canonical_mutation() const { + if (_view_virtual_columns) { + return canonical_mutation(*_view_virtual_columns); + } + return {}; + } + stdx::optional indices_canonical_mutation() const { if (_indices) { return canonical_mutation(*_indices); diff --git a/tests/view_complex_test.cc b/tests/view_complex_test.cc index bd5b04204c..c3f570805a 100644 --- a/tests/view_complex_test.cc +++ b/tests/view_complex_test.cc @@ -1216,3 +1216,345 @@ SEASTAR_TEST_CASE(test_marker_timestamp_is_not_shadowed_by_previous_updatewith_f }); }, cfg); } + +// A reproducer for issue #3362, not involving TTLs. +// The test involves a view that selects no column except the base's primary +// key, so view rows contain no cells besides a row marker, so as a base +// row appears and disappears as we update and delete individual cells in +// that row, we need to insert and delete the row marker with varying +// timestamps to make sure the view row appears and disappears as needed. +// But as we shall see, after enough trickery, we run out of timestamps +// to use to revive the row marker, and fail to revive it. So to fix +// issue #3362, we needed to remember all cells separately ("virtual +// cells"). +SEASTAR_TEST_CASE(test_3362_no_ttls) { + return do_with_cql_env_thread([] (auto& e) { + e.execute_cql("create table cf (p int, c int, a int, b int, primary key (p, c))").get(); + e.execute_cql("create materialized view vcf as select p, c from cf " + "where p is not null and c is not null " + "primary key (p, c)").get(); + + // In row p=1 c=1, insert two cells - b=1 at timestamp 10, a=1 at timestamp 20: + BOOST_TEST_PASSPOINT(); + e.execute_cql("update cf using timestamp 10 set b = 1 where p = 1 and c = 1").get(); + eventually([&] { + auto msg = e.execute_cql("select * from vcf where p = 1 and c = 1").get0(); + assert_that(msg).is_rows().with_rows({{ {int32_type->decompose(1)}, {int32_type->decompose(1)} }}); + }); + + BOOST_TEST_PASSPOINT(); + e.execute_cql("update cf using timestamp 20 set a = 1 where p = 1 and c = 1").get(); + eventually([&] { + auto msg = e.execute_cql("select * from vcf where p = 1 and c = 1").get0(); + assert_that(msg).is_rows().with_rows({{ {int32_type->decompose(1)}, {int32_type->decompose(1)} }}); + }); + + + // Delete just a=1 (with timestamp 21). The base row will still exist (with b=1), + // and accordingly the view row too: + BOOST_TEST_PASSPOINT(); + e.execute_cql("delete a from cf using timestamp 21 where p = 1 and c = 1").get(); + eventually([&] { + auto msg = e.execute_cql("select * from vcf where p = 1 and c = 1").get0(); + assert_that(msg).is_rows().with_rows({{ {int32_type->decompose(1)}, {int32_type->decompose(1)} }}); + }); + + // At this point, we still have the base row with b=1 at timestamp 10 + // (and a=1 was deleted at timestamp 21). If we delete the b=1 at + // timestamp 11, nothing will remain in the base row, and the view + // row should disappear as well: + BOOST_TEST_PASSPOINT(); + e.execute_cql("delete b from cf using timestamp 11 where p = 1 and c = 1").get(); + eventually([&] { + auto msg = e.execute_cql("select * from vcf where p = 1 and c = 1").get0(); + assert_that(msg).is_rows().is_empty(); + }); + + // Now we finally reproduce #3362: We now add b=1 again, at timestamp + // 12 (it was earlier deleted in timestamp 11). The base row is live + // again, and so should the view row. + // With issue #3362, the view row failed to become alive. The reason + // is that to make the above is_empty() succeed, the implementation + // deletes the row marker with timestamp 21 (the maximal timestamp + // seen in the row). But now, we add a row marker again with the same + // timestamp 21, but the deletion wins so the row marker is still + // missing. (note that had data won over deletions, the is_empty() + // test above would have failed instead). + BOOST_TEST_PASSPOINT(); + e.execute_cql("update cf using timestamp 12 set b = 1 where p = 1 and c = 1").get(); + eventually([&] { + auto msg = e.execute_cql("select * from vcf where p = 1 and c = 1").get0(); + assert_that(msg).is_rows().with_rows({{ {int32_type->decompose(1)}, {int32_type->decompose(1)} }}); + }); + }); +} + +// This is another reproducer for issue #3362, using TTLs instead of +// numerous back-and-forth additions and deletions. +SEASTAR_TEST_CASE(test_3362_with_ttls) { + return do_with_cql_env_thread([] (auto& e) { + e.execute_cql("create table cf (p int, c int, a int, b int, primary key (p, c))").get(); + e.execute_cql("create materialized view vcf as select p, c from cf " + "where p is not null and c is not null " + "primary key (p, c)").get(); + + // In row p=1 c=1, insert two cells - a=1 with ttl, and b=1 without + // ttl. The ttl'ed cell is inserted first, with a newer timestamp. + // The problem is that the view row's marker gets, with a new + // timestamp, a ttl. Then, when we go to add another column with an + // older timestamp, and try to set the row marker without a + // ttl - the older timestamp of this update looses, and we wrongly + // remain with a ttl on the view row marker. + BOOST_TEST_PASSPOINT(); + e.execute_cql("update cf using timestamp 2 and ttl 5 set a = 1 where p = 1 and c = 1").get(); + eventually([&] { + auto msg = e.execute_cql("select * from vcf where p = 1 and c = 1").get0(); + assert_that(msg).is_rows().with_rows({{ {int32_type->decompose(1)}, {int32_type->decompose(1)} }}); + }); + BOOST_TEST_PASSPOINT(); + e.execute_cql("update cf using timestamp 1 set b = 1 where p = 1 and c = 1").get(); + eventually([&] { + auto msg = e.execute_cql("select * from vcf where p = 1 and c = 1").get0(); + assert_that(msg).is_rows().with_rows({{ {int32_type->decompose(1)}, {int32_type->decompose(1)} }}); + }); + // Pass the time 6 seconds forward. Cell 'a' will have expired, but + // cell 'b' will still exist, so the base row still exists and the + // corresponding view row should also exist too. + forward_jump_clocks(6s); + BOOST_TEST_PASSPOINT(); + // verify that the base row still exists (cell b didn't expire) + eventually([&] { + auto msg = e.execute_cql("select * from cf where p = 1 and c = 1").get0(); + assert_that(msg).is_rows().with_rows({{ {int32_type->decompose(1)}, {int32_type->decompose(1)}, {}, {{int32_type->decompose(1)}} }}); + }); + BOOST_TEST_PASSPOINT(); + // verify that the view row still exists too. + // This check failing is issue #3362. + eventually([&] { + auto msg = e.execute_cql("select * from vcf where p = 1 and c = 1").get0(); + assert_that(msg).is_rows().with_rows({{ {int32_type->decompose(1)}, {int32_type->decompose(1)} }}); + }); + }); +} + +// ‎‎‎The following are more test for issue #3362, same as test_3362_not_ttls +// and test_3362_with_ttls, just with a collection with items "1" and "2" +// instead of separate columns a and b. For brevity, comments were removed, +// so refer to the comments in the original code above. +enum class collection_kind { set, list, map }; +void do_test_3362_no_ttls_with_collections(cql_test_env& e, collection_kind t) { + sstring type, pref, suf; + switch(t) { + case collection_kind::set: + type = "set"; + pref = "{"; + suf = "}"; + break; + case collection_kind::list: + type = "list"; + pref = "["; + suf = "]"; + break; + case collection_kind::map: + type = "map"; + pref = "{"; + suf = " : 17}"; + break; + } + e.execute_cql(sprint("create table cf (p int, c int, a %s, primary key (p, c))", type)).get(); + e.execute_cql("create materialized view vcf as select p, c from cf " + "where p is not null and c is not null " + "primary key (p, c)").get(); + e.execute_cql(sprint("update cf using timestamp 10 set a = a + %s2%s where p = 1 and c = 1", pref, suf)).get(); + eventually([&] { + auto msg = e.execute_cql("select * from vcf where p = 1 and c = 1").get0(); + assert_that(msg).is_rows().with_rows({{ {int32_type->decompose(1)}, {int32_type->decompose(1)} }}); + }); + e.execute_cql(sprint("update cf using timestamp 20 set a = a + %s1%s where p = 1 and c = 1", pref, suf)).get(); + eventually([&] { + auto msg = e.execute_cql("select * from vcf where p = 1 and c = 1").get0(); + assert_that(msg).is_rows().with_rows({{ {int32_type->decompose(1)}, {int32_type->decompose(1)} }}); + }); + if (t == collection_kind::map) { + e.execute_cql("delete a[1] from cf using timestamp 21 where p = 1 and c = 1").get(); + } else { + e.execute_cql(sprint("update cf using timestamp 21 set a = a - %s1%s where p = 1 and c = 1", pref, suf)).get(); + } + eventually([&] { + auto msg = e.execute_cql("select * from vcf where p = 1 and c = 1").get0(); + assert_that(msg).is_rows().with_rows({{ {int32_type->decompose(1)}, {int32_type->decompose(1)} }}); + }); + if (t == collection_kind::map) { + e.execute_cql("delete a[2] from cf using timestamp 11 where p = 1 and c = 1").get(); + } else { + e.execute_cql(sprint("update cf using timestamp 11 set a = a - %s2%s where p = 1 and c = 1", pref, suf)).get(); + } + eventually([&] { + auto msg = e.execute_cql("select * from vcf where p = 1 and c = 1").get0(); + assert_that(msg).is_rows().is_empty(); + }); + e.execute_cql(sprint("update cf using timestamp 12 set a = a + %s2%s where p = 1 and c = 1", pref, suf)).get(); + eventually([&] { + auto msg = e.execute_cql("select * from vcf where p = 1 and c = 1").get0(); + assert_that(msg).is_rows().with_rows({{ {int32_type->decompose(1)}, {int32_type->decompose(1)} }}); + }); +} +SEASTAR_TEST_CASE(test_3362_no_ttls_with_set) { + return do_with_cql_env_thread([] (auto& e) { + do_test_3362_no_ttls_with_collections(e, collection_kind::set); + }); +} +SEASTAR_TEST_CASE(test_3362_no_ttls_with_list) { + return do_with_cql_env_thread([] (auto& e) { + do_test_3362_no_ttls_with_collections(e, collection_kind::list); + }); +} +SEASTAR_TEST_CASE(test_3362_no_ttls_with_map) { + return do_with_cql_env_thread([] (auto& e) { + do_test_3362_no_ttls_with_collections(e, collection_kind::map); + }); +} + +void do_test_3362_with_ttls_with_collections(cql_test_env& e, collection_kind t) { + sstring type, pref, suf; + switch(t) { + case collection_kind::set: + type = "set"; + pref = "{"; + suf = "}"; + break; + case collection_kind::list: + type = "list"; + pref = "["; + suf = "]"; + break; + case collection_kind::map: + type = "map"; + pref = "{"; + suf = " : 17}"; + break; + } + e.execute_cql(sprint("create table cf (p int, c int, a %s, primary key (p, c))", type)).get(); + e.execute_cql("create materialized view vcf as select p, c from cf " + "where p is not null and c is not null " + "primary key (p, c)").get(); + e.execute_cql(sprint("update cf using timestamp 2 and ttl 5 set a = a + %s1%s where p = 1 and c = 1", pref, suf)).get(); + eventually([&] { + auto msg = e.execute_cql("select * from vcf where p = 1 and c = 1").get0(); + assert_that(msg).is_rows().with_rows({{ {int32_type->decompose(1)}, {int32_type->decompose(1)} }}); + }); + e.execute_cql(sprint("update cf using timestamp 1 set a = a + %s2%s where p = 1 and c = 1", pref, suf)).get(); + eventually([&] { + auto msg = e.execute_cql("select * from vcf where p = 1 and c = 1").get0(); + assert_that(msg).is_rows().with_rows({{ {int32_type->decompose(1)}, {int32_type->decompose(1)} }}); + }); + forward_jump_clocks(6s); + eventually([&] { + auto msg = e.execute_cql("select * from vcf where p = 1 and c = 1").get0(); + assert_that(msg).is_rows().with_rows({{ {int32_type->decompose(1)}, {int32_type->decompose(1)} }}); + }); +} +SEASTAR_TEST_CASE(test_3362_with_ttls_with_set) { + return do_with_cql_env_thread([] (auto& e) { + do_test_3362_with_ttls_with_collections(e, collection_kind::set); + }); +} +SEASTAR_TEST_CASE(test_3362_with_ttls_with_list) { + return do_with_cql_env_thread([] (auto& e) { + do_test_3362_with_ttls_with_collections(e, collection_kind::list); + }); +} +SEASTAR_TEST_CASE(test_3362_with_ttls_with_map) { + return do_with_cql_env_thread([] (auto& e) { + do_test_3362_with_ttls_with_collections(e, collection_kind::map); + }); +} + +// This is a version of test_3362_with_ttls with frozen collection fields +// instead of integer fields in test_3362_with_ttls. The intention is to +// verify that we properly fixed #3362 in this case - by replacing the +// frozen collection by a single virtual cell, not a collection. +SEASTAR_TEST_CASE(test_3362_with_ttls_frozen) { + return do_with_cql_env_thread([] (auto& e) { + e.execute_cql("create table cf (p int, c int, a frozen>, b frozen>, primary key (p, c))").get(); + e.execute_cql("create materialized view vcf as select p, c from cf " + "where p is not null and c is not null " + "primary key (p, c)").get(); + BOOST_TEST_PASSPOINT(); + e.execute_cql("update cf using timestamp 2 and ttl 5 set a = {1,2} where p = 1 and c = 1").get(); + eventually([&] { + auto msg = e.execute_cql("select * from vcf where p = 1 and c = 1").get0(); + assert_that(msg).is_rows().with_rows({{ {int32_type->decompose(1)}, {int32_type->decompose(1)} }}); + }); + BOOST_TEST_PASSPOINT(); + e.execute_cql("update cf using timestamp 1 set b = {3,4} where p = 1 and c = 1").get(); + eventually([&] { + auto msg = e.execute_cql("select * from vcf where p = 1 and c = 1").get0(); + assert_that(msg).is_rows().with_rows({{ {int32_type->decompose(1)}, {int32_type->decompose(1)} }}); + }); + forward_jump_clocks(6s); + BOOST_TEST_PASSPOINT(); + eventually([&] { + auto msg = e.execute_cql("select * from vcf where p = 1 and c = 1").get0(); + assert_that(msg).is_rows().with_rows({{ {int32_type->decompose(1)}, {int32_type->decompose(1)} }}); + }); + }); +} + +// This is a version of test_3362_with_ttls with the added twist that the +// unselected column involved did not exist when the base table and view +// were originally created, but only added later with an "alter table". +// For this test to work, "alter table" will need to add the virtual +// columns in the view table for the newly created unselected column in +// the base table. +SEASTAR_TEST_CASE(test_3362_with_ttls_alter_add) { + return do_with_cql_env_thread([] (auto& e) { + e.execute_cql("create table cf (p int, c int, primary key (p, c))").get(); + e.execute_cql("create materialized view vcf as select p, c from cf " + "where p is not null and c is not null " + "primary key (p, c)").get(); + // Add with "alter table" two additional columns to the base table - + // a and b. These are not selected in the materialized view, and we + // want to check that they are treated like unselected columns + // (namely, virtual columns are added to the view). + e.execute_cql("alter table cf add a int").get(); + e.execute_cql("alter table cf add b int").get(); + BOOST_TEST_PASSPOINT(); + e.execute_cql("update cf using timestamp 2 and ttl 5 set a = 1 where p = 1 and c = 1").get(); + eventually([&] { + auto msg = e.execute_cql("select * from vcf where p = 1 and c = 1").get0(); + assert_that(msg).is_rows().with_rows({{ {int32_type->decompose(1)}, {int32_type->decompose(1)} }}); + }); + BOOST_TEST_PASSPOINT(); + e.execute_cql("update cf using timestamp 1 set b = 1 where p = 1 and c = 1").get(); + eventually([&] { + auto msg = e.execute_cql("select * from vcf where p = 1 and c = 1").get0(); + assert_that(msg).is_rows().with_rows({{ {int32_type->decompose(1)}, {int32_type->decompose(1)} }}); + }); + forward_jump_clocks(6s); + BOOST_TEST_PASSPOINT(); + eventually([&] { + auto msg = e.execute_cql("select * from cf where p = 1 and c = 1").get0(); + assert_that(msg).is_rows().with_rows({{ {int32_type->decompose(1)}, {int32_type->decompose(1)}, {}, {{int32_type->decompose(1)}} }}); + }); + BOOST_TEST_PASSPOINT(); + eventually([&] { + auto msg = e.execute_cql("select * from vcf where p = 1 and c = 1").get0(); + assert_that(msg).is_rows().with_rows({{ {int32_type->decompose(1)}, {int32_type->decompose(1)} }}); + }); + }); +} + +// test_3362_with_ttls_alter_add() above is about handling changes to virtual +// columns as the base table columns change, but only for the "add" case. +// Theoretically we could have had problems in the "drop" and "rename" cases +// as well, but today, those are not supported: +// 1. Today we do not allow "alter table drop" to drop any column from a base +// table with views - even unselected columns. +// If we every do allow this, we need to also check that we drop the +// virtual column from the view. +// 2. Today we do not allow "alter table rename" to rename any non-pk +// column, so unselected columns also cannot be renamed. If this +// limitation is ever lifted, we will need to check that if we +// rename an unselected base column, the virtual column in the view is +// also renamed. diff --git a/tests/view_schema_test.cc b/tests/view_schema_test.cc index 5b79e704a8..2f1b81e200 100644 --- a/tests/view_schema_test.cc +++ b/tests/view_schema_test.cc @@ -3298,3 +3298,52 @@ SEASTAR_TEST_CASE(test_alter_table_with_updates) { }); }); } + +// Test that a regular column which we did not add to the view is really +// not in the view. Even if to fix issue #3362 we add "virtual cells" +// for the unselected columns, those should not be visible to the end-user +// of the view table. +SEASTAR_TEST_CASE(test_unselected_column) { + return do_with_cql_env_thread([] (auto& e) { + e.execute_cql("create table cf (p int, c int, x int, y list, z set, w map, primary key (p, c))").get(); + e.execute_cql("create materialized view vcf as select p, c from cf " + "where p is not null and c is not null " + "primary key (c, p)").get(); + e.execute_cql("insert into cf (p, c, x) values (1, 2, 3)").get(); + BOOST_TEST_PASSPOINT(); + auto msg = e.execute_cql("select * from cf").get0(); + assert_that(msg).is_rows().with_size(1) + .with_row({{int32_type->decompose(1)}, {int32_type->decompose(2)}, {}, {int32_type->decompose(3)}, {}, {}}); + BOOST_TEST_PASSPOINT(); + // Check that when we ask for all of vcf's columns, we only get the + // ones we actually selected - c and p, not x, y, z, or w: + eventually([&] { + auto msg = e.execute_cql("select * from vcf").get0(); + assert_that(msg).is_rows().with_size(1) + .with_row({{int32_type->decompose(2)}, {int32_type->decompose(1)}}); + }); + // Check that we cannot explicitly select the x, y, z or w columns in + // vcf as they are not one of the columns we selected for the view. + try { + e.execute_cql("select x from vcf").get(); + BOOST_ASSERT(false); + } catch (exceptions::invalid_request_exception&) { + // we expect: exceptions::invalid_request_exception: Undefined name x in selection clause + } + try { + e.execute_cql("select y from vcf").get(); + BOOST_ASSERT(false); + } catch (exceptions::invalid_request_exception&) { + } + try { + e.execute_cql("select z from vcf").get(); + BOOST_ASSERT(false); + } catch (exceptions::invalid_request_exception&) { + } + try { + e.execute_cql("select w from vcf").get(); + BOOST_ASSERT(false); + } catch (exceptions::invalid_request_exception&) { + } + }); +}