From b4fc711903e917a90a6a624c4f6f5b3bbf2e22cc Mon Sep 17 00:00:00 2001 From: Nadav Har'El Date: Thu, 16 Aug 2018 15:12:27 +0300 Subject: [PATCH 1/9] Add "empty" type name to CQL parser, but only for internal parsing Even before this patch, Scylla supported the "empty" type (a column with no content) but only internally - i.e., in code but not in CQL syntax. The "empty" type was used in dense tables without regular columns, and a special optimization in db::cql_type_parser::parse() allowed this type name to be parsed when reading the schema tables, without allowing the "empty" type to be used by users in CQL statements. However, parse() only supported "empty" itself, and more complex types like list were not recognized by parse(). In the following patches, we plan to add to virtual columns to materialized views, with types empty, list or map. We need all these types to work, and before this patch, they don't. But we want all of these types to only work internally - when Scylla's code creates these hidden columns; we do not want to add the "empty" type to CQL's syntax. This is what we do in this patch: The CQL parser's comparator_type rule now has a parameter, "internal", used to differenciate internal calls via db::cql_type_parser::parse() from calls from CQL query parsing. If a user tries something like: CREATE TABLE e (pk empty PRIMARY KEY); He will get the error: Invalid (reserved) user type name empty Note that here, as usual, unknown types are treated as "user types", and "empty" is not allowed as a user type name - we "reserve" it in case one day in the future we will want to allow users a direct syntax to create empty columns. We already have, following Cassandra, a bunch of other names reserved from being user type names, including "byte", "complex", and others (see _reserved_type_names()), and using "empty" as a type name will result in a similar error message. Just like all other type names, the name "empty" is not a reserved keyword in other senses: a user can create a table or a column with the name "empty", just like he can create one with the name "int". Refs #3362. Signed-off-by: Nadav Har'El --- cql3/Cql.g | 52 +++++++++++++++++++++++++++++++++---------- db/cql_type_parser.cc | 5 ++++- 2 files changed, 44 insertions(+), 13 deletions(-) diff --git a/cql3/Cql.g b/cql3/Cql.g index 7cb7082738..f3911dc5f5 100644 --- a/cql3/Cql.g +++ b/cql3/Cql.g @@ -1531,12 +1531,22 @@ inMarkerForTuple returns [shared_ptr marker] | ':' name=ident { $marker = new_tuple_in_bind_variables(name); } ; -comparatorType returns [shared_ptr t] - : n=native_type { $t = cql3_type::raw::from(n); } - | c=collection_type { $t = c; } - | tt=tuple_type { $t = tt; } +// The comparator_type rule is used for users' queries (internal=false) +// and for internal calls from db::cql_type_parser::parse() (internal=true). +// The latter is used for reading schemas stored in the system tables, and +// may support additional column types that cannot be created through CQL, +// but only internally through code. Today the only such type is "empty": +// Scylla code internally creates columns with type "empty" or collections +// "empty" to represent unselected columns in materialized views. +// If a user (internal=false) tries to use "empty" as a type, it is treated - +// as do all unknown types - as an attempt to use a user-defined type, and +// we report this name is reserved (as for _reserved_type_names()). +comparator_type [bool internal] returns [shared_ptr t] + : n=native_or_internal_type[internal] { $t = cql3_type::raw::from(n); } + | c=collection_type[internal] { $t = c; } + | tt=tuple_type[internal] { $t = tt; } | id=userTypeName { $t = cql3::cql3_type::raw::user_type(id); } - | K_FROZEN '<' f=comparatorType '>' + | K_FROZEN '<' f=comparator_type[internal] '>' { try { $t = cql3::cql3_type::raw::frozen(f); @@ -1558,6 +1568,22 @@ comparatorType returns [shared_ptr t] #endif ; +native_or_internal_type [bool internal] returns [shared_ptr t] + : n=native_type { $t = n; } + // The "internal" types, only supported when internal==true: + | K_EMPTY { + if (internal) { + $t = cql3_type::empty; + } else { + add_recognition_error("Invalid (reserved) user type name empty"); + } + } + ; + +comparatorType returns [shared_ptr t] + : tt=comparator_type[false] { $t = tt; } + ; + native_type returns [shared_ptr t] : K_ASCII { $t = cql3_type::ascii; } | K_BIGINT { $t = cql3_type::bigint; } @@ -1582,24 +1608,24 @@ native_type returns [shared_ptr t] | K_TIME { $t = cql3_type::time; } ; -collection_type returns [shared_ptr pt] - : K_MAP '<' t1=comparatorType ',' t2=comparatorType '>' +collection_type [bool internal] returns [shared_ptr pt] + : K_MAP '<' t1=comparator_type[internal] ',' t2=comparator_type[internal] '>' { // if we can't parse either t1 or t2, antlr will "recover" and we may have t1 or t2 null. if (t1 && t2) { $pt = cql3::cql3_type::raw::map(t1, t2); } } - | K_LIST '<' t=comparatorType '>' + | K_LIST '<' t=comparator_type[internal] '>' { if (t) { $pt = cql3::cql3_type::raw::list(t); } } - | K_SET '<' t=comparatorType '>' + | K_SET '<' t=comparator_type[internal] '>' { if (t) { $pt = cql3::cql3_type::raw::set(t); } } ; -tuple_type returns [shared_ptr t] +tuple_type [bool internal] returns [shared_ptr t] @init{ std::vector> types; } : K_TUPLE '<' - t1=comparatorType { types.push_back(t1); } (',' tn=comparatorType { types.push_back(tn); })* + t1=comparator_type[internal] { types.push_back(t1); } (',' tn=comparator_type[internal] { types.push_back(tn); })* '>' { $t = cql3::cql3_type::raw::tuple(std::move(types)); } ; @@ -1625,7 +1651,7 @@ unreserved_keyword returns [sstring str] unreserved_function_keyword returns [sstring str] : u=basic_unreserved_keyword { $str = u; } - | t=native_type { $str = t->to_string(); } + | t=native_or_internal_type[true] { $str = t->to_string(); } ; basic_unreserved_keyword returns [sstring str] @@ -1810,6 +1836,8 @@ K_REPLACE: R E P L A C E; K_DETERMINISTIC: D E T E R M I N I S T I C; K_JSON: J S O N; +K_EMPTY: E M P T Y; + K_SCYLLA_TIMEUUID_LIST_INDEX: S C Y L L A '_' T I M E U U I D '_' L I S T '_' I N D E X; K_SCYLLA_COUNTER_SHARD_LIST: S C Y L L A '_' C O U N T E R '_' S H A R D '_' L I S T; diff --git a/db/cql_type_parser.cc b/db/cql_type_parser.cc index 46dd4eeb17..2932274787 100644 --- a/db/cql_type_parser.cc +++ b/db/cql_type_parser.cc @@ -49,7 +49,10 @@ #include "types.hh" static ::shared_ptr parse_raw(const sstring& str) { - return cql3::util::do_with_parser(str, std::mem_fn(&cql3_parser::CqlParser::comparatorType)); + return cql3::util::do_with_parser(str, + [] (cql3_parser::CqlParser& parser) { + return parser.comparator_type(true); + }); } data_type db::cql_type_parser::parse(const sstring& keyspace, const sstring& str, lw_shared_ptr user_types) { From 0a1d93138d6681b193279e4a8f45a6d20d4e849b Mon Sep 17 00:00:00 2001 From: Nadav Har'El Date: Thu, 16 Aug 2018 15:23:09 +0300 Subject: [PATCH 2/9] schema: add "view virtual" flag to schema's column_definition In this patch we add a flag, "view virtual", that we can mark on on a column defined in a schema. In following patches, we will add such virtual columns to materialized views to allow view rows to remain alive despite having no data (refs #3362). After this patch, the "view virtual" flag exists in our in-memory representation of the schema, but not persisted to disk - we will fix this in the next patch. Signed-off-by: Nadav Har'El --- schema.cc | 16 ++++++++++------ schema.hh | 10 ++++++++++ schema_builder.hh | 4 ++-- 3 files changed, 22 insertions(+), 8 deletions(-) diff --git a/schema.cc b/schema.cc index 62297c847c..9c9ee67c22 100644 --- a/schema.cc +++ b/schema.cc @@ -489,11 +489,12 @@ sstring index_metadata::get_default_index_name(const sstring& cf_name, return cf_name + "_idx"; } -column_definition::column_definition(bytes name, data_type type, column_kind kind, column_id component_index, api::timestamp_type dropped_at) +column_definition::column_definition(bytes name, data_type type, column_kind kind, column_id component_index, column_view_virtual is_view_virtual, api::timestamp_type dropped_at) : _name(std::move(name)) , _dropped_at(dropped_at) , _is_atomic(type->is_atomic()) , _is_counter(type->is_counter()) + , _is_view_virtual(is_view_virtual) , type(std::move(type)) , id(component_index) , kind(kind) @@ -504,6 +505,9 @@ std::ostream& operator<<(std::ostream& os, const column_definition& cd) { os << "name=" << cd.name_as_text(); os << ", type=" << cd.type->name(); os << ", kind=" << to_sstring(cd.kind); + if (cd.is_view_virtual()) { + os << ", view_virtual"; + } os << ", componentIndex=" << (cd.has_component_index() ? std::to_string(cd.component_index()) : "null"); os << ", droppedAt=" << cd._dropped_at; os << "}"; @@ -689,16 +693,16 @@ column_definition& schema_builder::find_column(const cql3::column_identifier& c) } schema_builder& schema_builder::with_column(const column_definition& c) { - return with_column(bytes(c.name()), data_type(c.type), column_kind(c.kind), c.position()); + return with_column(bytes(c.name()), data_type(c.type), column_kind(c.kind), c.position(), c.view_virtual()); } -schema_builder& schema_builder::with_column(bytes name, data_type type, column_kind kind) { +schema_builder& schema_builder::with_column(bytes name, data_type type, column_kind kind, column_view_virtual is_view_virtual) { // component_index will be determined by schema cosntructor - return with_column(name, type, kind, 0); + return with_column(name, type, kind, 0, is_view_virtual); } -schema_builder& schema_builder::with_column(bytes name, data_type type, column_kind kind, column_id component_index) { - _raw._columns.emplace_back(name, type, kind, component_index); +schema_builder& schema_builder::with_column(bytes name, data_type type, column_kind kind, column_id component_index, column_view_virtual is_view_virtual) { + _raw._columns.emplace_back(name, type, kind, component_index, is_view_virtual); if (type->is_multi_cell()) { with_collection(name, type); } else if (type->is_counter()) { diff --git a/schema.hh b/schema.hh index 48d9ea3b4c..9c9e2612f4 100644 --- a/schema.hh +++ b/schema.hh @@ -75,6 +75,8 @@ class extensions; // make sure these match the order we like columns back from schema enum class column_kind { partition_key, clustering_key, static_column, regular_column }; +enum class column_view_virtual { no, yes }; + sstring to_sstring(column_kind k); bool is_compatible(column_kind k1, column_kind k2); @@ -207,6 +209,7 @@ private: api::timestamp_type _dropped_at; bool _is_atomic; bool _is_counter; + column_view_virtual _is_view_virtual; struct thrift_bits { thrift_bits() @@ -221,6 +224,7 @@ private: public: column_definition(bytes name, data_type type, column_kind kind, column_id component_index = 0, + column_view_virtual view_virtual = column_view_virtual::no, api::timestamp_type dropped_at = api::missing_timestamp); data_type type; @@ -241,6 +245,12 @@ public: bool is_atomic() const { return _is_atomic; } bool is_multi_cell() const { return !_is_atomic; } bool is_counter() const { return _is_counter; } + // "virtual columns" appear in a materialized view as placeholders for + // unselected columns, with liveness information but without data, and + // allow view rows to remain alive despite having no data (issue #3362). + // These columns should be hidden from the user's SELECT queries. + bool is_view_virtual() const { return _is_view_virtual == column_view_virtual::yes; } + column_view_virtual view_virtual() const { return _is_view_virtual; } const sstring& name_as_text() const; const bytes& name() const; sstring name_as_cql_string() const; diff --git a/schema_builder.hh b/schema_builder.hh index 8824f78fb3..6e34f2b8ff 100644 --- a/schema_builder.hh +++ b/schema_builder.hh @@ -238,8 +238,8 @@ public: column_definition& find_column(const cql3::column_identifier&); schema_builder& with_column(const column_definition& c); - schema_builder& with_column(bytes name, data_type type, column_kind kind = column_kind::regular_column); - schema_builder& with_column(bytes name, data_type type, column_kind kind, column_id component_index); + schema_builder& with_column(bytes name, data_type type, column_kind kind = column_kind::regular_column, column_view_virtual view_virtual = column_view_virtual::no); + schema_builder& with_column(bytes name, data_type type, column_kind kind, column_id component_index, column_view_virtual view_virtual = column_view_virtual::no); schema_builder& without_column(bytes name); schema_builder& without_column(sstring name, api::timestamp_type timestamp); schema_builder& without_column(sstring name, data_type, api::timestamp_type timestamp); From 36a657fc1096af7c56c7fa2606238e6fe0e2cc25 Mon Sep 17 00:00:00 2001 From: Nadav Har'El Date: Thu, 16 Aug 2018 15:30:06 +0300 Subject: [PATCH 3/9] schema: persist "view virtual" columns to a separate system table In the previous patch, we added a "view virtual" flag on columns. In this patch we add persistance to this flag: I.e., writing it to the on-disk schema table and reading it back on startup. But the implementation is not as simple as adding a flag: In the on-disk system tables, we have a "columns" table listing all the columns in the database and their types. Cqlsh's "DESCRIBE MATERIALIZED VIEW" works by reading this "columns" table, and listing all of the requested view's columns. Therefore, we cannot add "virtual columns" - which are columns not added by the user and not intended to be seen - to this list. We therefore need to create in this patch a separate list for virtual columns, in a new table "view_virtual_columns". This table is essentially identical to the existing "columns" table, just separate. We need to write each column to the appropriate table (columns with the view_virtual flag to "view_virtual_columns", columns without it to the old "columns"), read from both on startup, and remember to delete columns from both when a table is dropped. Signed-off-by: Nadav Har'El --- db/schema_tables.cc | 93 ++++++++++++++++++++++++++++++---------- db/schema_tables.hh | 2 + idl/frozen_schema.idl.hh | 1 + schema_mutations.cc | 14 +++++- schema_mutations.hh | 18 +++++++- 5 files changed, 102 insertions(+), 26 deletions(-) diff --git a/db/schema_tables.cc b/db/schema_tables.cc index 5960dd2135..d7658b1295 100644 --- a/db/schema_tables.cc +++ b/db/schema_tables.cc @@ -160,7 +160,7 @@ static future<> do_merge_schema(distributed&, std::vecto static std::vector create_columns_from_column_rows( const query::result_set& rows, const sstring& keyspace, - const sstring& table, bool is_super); + const sstring& table, bool is_super, column_view_virtual is_view_virtual); static std::vector create_indices_from_index_rows(const query::result_set& rows, @@ -304,9 +304,20 @@ schema_ptr scylla_tables() { return schema; } -schema_ptr columns() { - static thread_local auto schema = [] { - schema_builder builder(make_lw_shared(::schema(generate_legacy_id(NAME, COLUMNS), NAME, COLUMNS, +// The "columns" table lists the definitions of all columns in all tables +// and views. Its schema needs to be identical to the one in Cassandra because +// it is the API through which drivers inspect the list of columns in a table +// (e.g., cqlsh's "DESCRIBE TABLE" and "DESCRIBE MATERIALIZED VIEW" get their +// information from the columns table). +// The "view_virtual_columns" table is an additional table with exactly the +// same schema (both are created by columns_schema()), but has a separate +// list of "virtual" columns. Those are used in materialized views for keeping +// rows without data alive (see issue #3362). These virtual columns cannot be +// listed in the regular "columns" table, otherwise the "DESCRIBE MATERIALIZED +// VIEW" would list them - while it should only list real, selected, columns. + +static schema_ptr columns_schema(const char* columns_table_name) { + schema_builder builder(make_lw_shared(::schema(generate_legacy_id(NAME, columns_table_name), NAME, columns_table_name, // partition key {{"keyspace_name", utf8_type}}, // clustering key @@ -326,10 +337,16 @@ schema_ptr columns() { // comment "column definitions" ))); - builder.set_gc_grace_seconds(schema_gc_grace); - builder.with_version(generate_schema_version(builder.uuid())); - return builder.build(); - }(); + builder.set_gc_grace_seconds(schema_gc_grace); + builder.with_version(generate_schema_version(builder.uuid())); + return builder.build(); +} +schema_ptr columns() { + static thread_local auto schema = columns_schema(COLUMNS); + return schema; +} +schema_ptr view_virtual_columns() { + static thread_local auto schema = columns_schema(VIEW_VIRTUAL_COLUMNS); return schema; } @@ -1619,6 +1636,9 @@ static schema_mutations make_table_mutations(schema_ptr table, api::timestamp_ty if (with_columns_and_triggers) { for (auto&& column : table->v3().all_columns()) { + if (column.is_view_virtual()) { + throw std::logic_error("view_virtual column found in non-view table"); + } add_column_to_schema_mutation(table, column, timestamp, columns_mutation); } for (auto&& index : table->indices()) { @@ -1631,7 +1651,8 @@ static schema_mutations make_table_mutations(schema_ptr table, api::timestamp_ty } } - return schema_mutations{std::move(m), std::move(columns_mutation), std::move(indices_mutation), std::move(dropped_columns_mutation), + return schema_mutations{std::move(m), std::move(columns_mutation), stdx::nullopt, + std::move(indices_mutation), std::move(dropped_columns_mutation), std::move(scylla_tables_mutation)}; } @@ -1690,6 +1711,7 @@ static void make_update_columns_mutations(schema_ptr old_table, bool from_thrift, std::vector& mutations) { mutation columns_mutation(columns(), partition_key::from_singular(*columns(), old_table->ks_name())); + mutation view_virtual_columns_mutation(view_virtual_columns(), partition_key::from_singular(*columns(), old_table->ks_name())); auto diff = difference(old_table->v3().columns_by_name(), new_table->v3().columns_by_name()); @@ -1701,17 +1723,25 @@ static void make_update_columns_mutations(schema_ptr old_table, if (from_thrift && !column.is_regular()) { continue; } - - drop_column_from_schema_mutation(columns(), old_table, column.name_as_text(), timestamp, mutations); + if (column.is_view_virtual()) { + drop_column_from_schema_mutation(view_virtual_columns(), old_table, column.name_as_text(), timestamp, mutations); + } else { + drop_column_from_schema_mutation(columns(), old_table, column.name_as_text(), timestamp, mutations); + } } // newly added columns and old columns with updated attributes for (auto&& name : boost::range::join(diff.entries_differing, diff.entries_only_on_right)) { const column_definition& column = *new_table->v3().columns_by_name().at(name); - add_column_to_schema_mutation(new_table, column, timestamp, columns_mutation); + if (column.is_view_virtual()) { + add_column_to_schema_mutation(new_table, column, timestamp, view_virtual_columns_mutation); + } else { + add_column_to_schema_mutation(new_table, column, timestamp, columns_mutation); + } } mutations.emplace_back(std::move(columns_mutation)); + mutations.emplace_back(std::move(view_virtual_columns_mutation)); // dropped columns auto dc_diff = difference(old_table->dropped_columns(), new_table->dropped_columns()); @@ -1761,7 +1791,11 @@ static void make_drop_table_or_view_mutations(schema_ptr schema_table, m.partition().apply_delete(*schema_table, ckey, tombstone(timestamp, gc_clock::now())); mutations.emplace_back(m); for (auto& column : table_or_view->v3().all_columns()) { - drop_column_from_schema_mutation(columns(), table_or_view, column.name_as_text(), timestamp, mutations); + if (column.is_view_virtual()) { + drop_column_from_schema_mutation(view_virtual_columns(), table_or_view, column.name_as_text(), timestamp, mutations); + } else { + drop_column_from_schema_mutation(columns(), table_or_view, column.name_as_text(), timestamp, mutations); + } } for (auto& column : table_or_view->dropped_columns() | boost::adaptors::map_keys) { drop_column_from_schema_mutation(dropped_columns(), table_or_view, column, timestamp, mutations); @@ -1796,11 +1830,12 @@ static future read_table_mutations(distributed create_columns_from_column_rows(const quer const sstring& keyspace, const sstring& table, /*, AbstractType rawComparator, */ - bool is_super) + bool is_super, + column_view_virtual is_view_virtual) { std::vector columns; for (auto&& row : rows.rows()) { @@ -2204,7 +2241,7 @@ static std::vector create_columns_from_column_rows(const quer } } - columns.emplace_back(name_bytes, type, kind, position); + columns.emplace_back(name_bytes, type, kind, position, is_view_virtual); } return columns; } @@ -2247,10 +2284,16 @@ view_ptr create_view_from_mutations(const schema_ctxt& ctxt, schema_mutations sm schema_builder builder{ks_name, cf_name, id}; prepare_builder_from_table_row(ctxt, builder, row); - auto column_defs = create_columns_from_column_rows(query::result_set(sm.columns_mutation()), ks_name, cf_name, false); + auto column_defs = create_columns_from_column_rows(query::result_set(sm.columns_mutation()), ks_name, cf_name, false, column_view_virtual::no); for (auto&& cdef : column_defs) { builder.with_column(cdef); } + if (sm.view_virtual_columns_mutation()) { + column_defs = create_columns_from_column_rows(query::result_set(*sm.view_virtual_columns_mutation()), ks_name, cf_name, false, column_view_virtual::yes); + for (auto&& cdef : column_defs) { + builder.with_column(cdef); + } + } if (version) { builder.with_version(*version); @@ -2320,12 +2363,17 @@ static schema_mutations make_view_mutations(view_ptr view, api::timestamp_type t mutation columns_mutation(columns(), pkey); + mutation view_virtual_columns_mutation(view_virtual_columns(), pkey); mutation dropped_columns_mutation(dropped_columns(), pkey); mutation indices_mutation(indexes(), pkey); if (with_columns) { for (auto&& column : view->v3().all_columns()) { - add_column_to_schema_mutation(view, column, timestamp, columns_mutation); + if (column.is_view_virtual()) { + add_column_to_schema_mutation(view, column, timestamp, view_virtual_columns_mutation); + } else { + add_column_to_schema_mutation(view, column, timestamp, columns_mutation); + } } for (auto&& e : view->dropped_columns()) { @@ -2338,7 +2386,8 @@ static schema_mutations make_view_mutations(view_ptr view, api::timestamp_type t auto scylla_tables_mutation = make_scylla_tables_mutation(view, timestamp); - return schema_mutations{std::move(m), std::move(columns_mutation), std::move(indices_mutation), std::move(dropped_columns_mutation), + return schema_mutations{std::move(m), std::move(columns_mutation), std::move(view_virtual_columns_mutation), + std::move(indices_mutation), std::move(dropped_columns_mutation), std::move(scylla_tables_mutation)}; } @@ -2634,7 +2683,7 @@ data_type parse_type(sstring str) std::vector all_tables() { return { keyspaces(), tables(), scylla_tables(), columns(), dropped_columns(), triggers(), - views(), indexes(), types(), functions(), aggregates(), + views(), indexes(), types(), functions(), aggregates(), view_virtual_columns() }; } diff --git a/db/schema_tables.hh b/db/schema_tables.hh index 56cdbc24b1..7a3e8a1de5 100644 --- a/db/schema_tables.hh +++ b/db/schema_tables.hh @@ -91,8 +91,10 @@ static constexpr auto TYPES = "types"; static constexpr auto FUNCTIONS = "functions"; static constexpr auto AGGREGATES = "aggregates"; static constexpr auto INDEXES = "indexes"; +static constexpr auto VIEW_VIRTUAL_COLUMNS = "view_virtual_columns"; // Scylla specific schema_ptr columns(); +schema_ptr view_virtual_columns(); schema_ptr dropped_columns(); schema_ptr indexes(); schema_ptr tables(); diff --git a/idl/frozen_schema.idl.hh b/idl/frozen_schema.idl.hh index 01e6e8b892..eb0ec3f34c 100644 --- a/idl/frozen_schema.idl.hh +++ b/idl/frozen_schema.idl.hh @@ -30,6 +30,7 @@ class schema_mutations { std::experimental::optional indices_canonical_mutation()[[version 2.0]]; std::experimental::optional dropped_columns_canonical_mutation()[[version 2.0]]; std::experimental::optional scylla_tables_canonical_mutation()[[version 2.0]]; + std::experimental::optional view_virtual_columns_canonical_mutation()[[version 2.4]]; }; class schema stub [[writable]] { diff --git a/schema_mutations.cc b/schema_mutations.cc index 898652cc7a..50026baf9c 100644 --- a/schema_mutations.cc +++ b/schema_mutations.cc @@ -29,9 +29,11 @@ schema_mutations::schema_mutations(canonical_mutation columnfamilies, bool is_view, stdx::optional indices, stdx::optional dropped_columns, - stdx::optional scylla_tables) + stdx::optional scylla_tables, + stdx::optional view_virtual_columns) : _columnfamilies(columnfamilies.to_mutation(is_view ? db::schema_tables::views() : db::schema_tables::tables())) , _columns(columns.to_mutation(db::schema_tables::columns())) + , _view_virtual_columns(view_virtual_columns ? mutation_opt{view_virtual_columns.value().to_mutation(db::schema_tables::view_virtual_columns())} : stdx::nullopt) , _indices(indices ? mutation_opt{indices.value().to_mutation(db::schema_tables::indexes())} : stdx::nullopt) , _dropped_columns(dropped_columns ? mutation_opt{dropped_columns.value().to_mutation(db::schema_tables::dropped_columns())} : stdx::nullopt) , _scylla_tables(scylla_tables ? mutation_opt{scylla_tables.value().to_mutation(db::schema_tables::scylla_tables())} : stdx::nullopt) @@ -40,6 +42,9 @@ schema_mutations::schema_mutations(canonical_mutation columnfamilies, void schema_mutations::copy_to(std::vector& dst) const { dst.push_back(_columnfamilies); dst.push_back(_columns); + if (_view_virtual_columns) { + dst.push_back(*_view_virtual_columns); + } if (_indices) { dst.push_back(*_indices); } @@ -68,6 +73,9 @@ table_schema_version schema_mutations::digest() const { md5_hasher h; db::schema_tables::feed_hash_for_schema_digest(h, _columnfamilies); db::schema_tables::feed_hash_for_schema_digest(h, _columns); + if (_view_virtual_columns && !_view_virtual_columns->partition().empty()) { + db::schema_tables::feed_hash_for_schema_digest(h, *_view_virtual_columns); + } if (_indices && !_indices->partition().empty()) { db::schema_tables::feed_hash_for_schema_digest(h, *_indices); } @@ -94,6 +102,7 @@ static mutation_opt compact(const mutation& m) { bool schema_mutations::operator==(const schema_mutations& other) const { return compact(_columnfamilies) == compact(other._columnfamilies) && compact(_columns) == compact(other._columns) + && compact(_view_virtual_columns) == compact(other._view_virtual_columns) && compact(_indices) == compact(other._indices) && compact(_dropped_columns) == compact(other._dropped_columns) && compact(_scylla_tables) == compact(other._scylla_tables) @@ -105,7 +114,8 @@ bool schema_mutations::operator!=(const schema_mutations& other) const { } bool schema_mutations::live() const { - return _columnfamilies.live_row_count() > 0 || _columns.live_row_count() > 0; + return _columnfamilies.live_row_count() > 0 || _columns.live_row_count() > 0 || + (_view_virtual_columns && _view_virtual_columns->live_row_count() > 0); } bool schema_mutations::is_view() const { diff --git a/schema_mutations.hh b/schema_mutations.hh index a9d38fdabe..e355c556d2 100644 --- a/schema_mutations.hh +++ b/schema_mutations.hh @@ -31,14 +31,16 @@ class schema_mutations { mutation _columnfamilies; mutation _columns; + mutation_opt _view_virtual_columns; mutation_opt _indices; mutation_opt _dropped_columns; mutation_opt _scylla_tables; public: - schema_mutations(mutation columnfamilies, mutation columns, mutation_opt indices, mutation_opt dropped_columns, + schema_mutations(mutation columnfamilies, mutation columns, mutation_opt view_virtual_columns, mutation_opt indices, mutation_opt dropped_columns, mutation_opt scylla_tables) : _columnfamilies(std::move(columnfamilies)) , _columns(std::move(columns)) + , _view_virtual_columns(std::move(view_virtual_columns)) , _indices(std::move(indices)) , _dropped_columns(std::move(dropped_columns)) , _scylla_tables(std::move(scylla_tables)) @@ -48,7 +50,8 @@ public: bool is_view, stdx::optional indices, stdx::optional dropped_columns, - stdx::optional scylla_tables); + stdx::optional scylla_tables, + stdx::optional view_virtual_columns); schema_mutations(schema_mutations&&) = default; schema_mutations& operator=(schema_mutations&&) = default; @@ -65,6 +68,10 @@ public: return _columns; } + const mutation_opt& view_virtual_columns_mutation() const { + return _view_virtual_columns; + } + const mutation_opt& scylla_tables() const { return _scylla_tables; } @@ -88,6 +95,13 @@ public: return canonical_mutation(_columns); } + stdx::optional view_virtual_columns_canonical_mutation() const { + if (_view_virtual_columns) { + return canonical_mutation(*_view_virtual_columns); + } + return {}; + } + stdx::optional indices_canonical_mutation() const { if (_indices) { return canonical_mutation(*_indices); From 3f3a76aa8f90a4921529f66386b2e6e5f928e523 Mon Sep 17 00:00:00 2001 From: Nadav Har'El Date: Thu, 16 Aug 2018 15:34:22 +0300 Subject: [PATCH 4/9] Do not allow selecting a virtual column For issue #3362, we will need to add to a materialized view also unselected base-table columns as "virtual columns". We need these columns to exist to keep view rows alive, but we don't want the user to be able to see them. In this patch we prevent SELECTing the virtual columns of the view, and also exclude the virtual columns from a "SELECT *" on a view. Signed-off-by: Nadav Har'El --- cql3/column_identifier.cc | 6 +++++- cql3/selection/selection.cc | 15 ++++++++++++--- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/cql3/column_identifier.cc b/cql3/column_identifier.cc index 8ebc0b5642..cffc3f18c3 100644 --- a/cql3/column_identifier.cc +++ b/cql3/column_identifier.cc @@ -127,7 +127,11 @@ column_identifier::new_selector_factory(database& db, schema_ptr schema, std::ve if (!def) { throw exceptions::invalid_request_exception(sprint("Undefined name %s in selection clause", _text)); } - + // Do not allow explicitly selecting hidden columns. We also skip them on + // "SELECT *" (see selection::wildcard()). + if (def->is_view_virtual()) { + throw exceptions::invalid_request_exception(sprint("Undefined name %s in selection clause", _text)); + } return selection::simple_selector::new_factory(def->name_as_text(), add_and_get_index(*def, defs), def->type); } diff --git a/cql3/selection/selection.cc b/cql3/selection/selection.cc index 69d884e0d7..92361f1c93 100644 --- a/cql3/selection/selection.cc +++ b/cql3/selection/selection.cc @@ -40,6 +40,7 @@ */ #include +#include #include "cql3/selection/selection.hh" #include "cql3/selection/selector_factories.hh" @@ -208,9 +209,17 @@ protected: ::shared_ptr selection::wildcard(schema_ptr schema) { auto columns = schema->all_columns_in_select_order(); - auto cds = boost::copy_range>(columns | boost::adaptors::transformed([](const column_definition& c) { - return &c; - })); + // filter out hidden columns, which should not be seen by the + // user when doing "SELECT *". We also disallow selecting them + // individually (see column_identifier::new_selector_factory()). + auto cds = boost::copy_range>( + columns | + boost::adaptors::filtered([](const column_definition& c) { + return !c.is_view_virtual(); + }) | + boost::adaptors::transformed([](const column_definition& c) { + return &c; + })); return simple_selection::make(schema, std::move(cds), true); } From 782baa44efe9cbd63e5b5a634a58c76afce8e799 Mon Sep 17 00:00:00 2001 From: Nadav Har'El Date: Thu, 16 Aug 2018 15:38:27 +0300 Subject: [PATCH 5/9] Materialized Views: fill virtual columns The add_cells_to_view() function usually adds selected cells from the base table to the view mutation. For issue #3362, we sometimes want to also add unselected cells as "virtual" cells - truncated versions of the base-table cells just without the values. This patch contains the code to fill the virtual columns' data using the regular columns from the base table. This patch does not yet actually *add* any virtual columns to the schema, so until that is done (in the next patch), this patch will not yet cause any behavior change. This is important for bisectability. Refs #3362. Signed-off-by: Nadav Har'El --- db/view/view.cc | 89 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 89 insertions(+) diff --git a/db/view/view.cc b/db/view/view.cc index a508221bc6..31edb0882e 100644 --- a/db/view/view.cc +++ b/db/view/view.cc @@ -376,10 +376,99 @@ static const column_definition* view_column(const schema& base, const schema& vi return view.get_column_definition(base.regular_column_at(base_id).name()); } +// Utility function for taking an existing cell, and creating a copy with an +// empty value instead of the original value, but with the original liveness +// information (expiration and deletion time) unchanged. +static atomic_cell make_empty(const atomic_cell_view& ac) { + if (ac.is_live_and_has_ttl()) { + return atomic_cell::make_live(*empty_type, ac.timestamp(), bytes_view{}, ac.expiry(), ac.ttl()); + } else if (ac.is_live()) { + return atomic_cell::make_live(*empty_type, ac.timestamp(), bytes_view{}); + } else { + return atomic_cell::make_dead(ac.timestamp(), ac.deletion_time()); + } +} + +// Utility function for taking an existing collection which has both keys and +// values (i.e., either a list or map, but not a set), and creating a copy of +// this collection with all the values replaced by empty values. +// The make_empty() function above is used to ensure that liveness information +// is copied unchanged. +static collection_mutation make_empty( + const collection_mutation_view& cm, + const collection_type_impl& ctype) { + collection_type_impl::mutation n; + cm.data.with_linearized([&] (bytes_view bv) { + auto m_view = ctype.deserialize_mutation_form(bv); + n.tomb = m_view.tomb; + for (auto&& c : m_view.cells) { + n.cells.emplace_back(c.first, make_empty(c.second)); + } + }); + return ctype.serialize_mutation_form(n); +} + +// In some cases, we need to copy to a view table even columns which have not +// been SELECTed. For these columns we only need to save liveness information +// (timestamp, deletion, ttl), but not the value. We call these columns +// "virtual columns", and the reason why we need them is explained in +// issue #3362. The following function, maybe_make_virtual() takes a full +// value c (taken from the base table) for the given column col, and if that +// column is a virtual column it modifies c to remove the unwanted value. +// The function create_virtual_column(), below, creates the virtual column in +// the view schema, that maybe_make_virtual() will fill. +static void maybe_make_virtual(atomic_cell_or_collection& c, const column_definition* col) { + if (!col->is_view_virtual()) { + // This is a regular selected column. Leave c untouched. + return; + } + if (col->type->is_atomic()) { + // A virtual cell for an atomic value or frozen collection. Its + // value is empty (of type empty_type). + if (col->type != empty_type) { + throw std::logic_error("Virtual cell has wrong type"); + } + c = make_empty(c.as_atomic_cell(*col)); + } else { + if (!col->type->is_collection()) { + // TODO: when we support unfrozen UDT (#2201), we will need to + // supported it here too. + throw std::logic_error("Virtual cell is neither atomic nor collection"); + } + auto ctype = static_pointer_cast(col->type); + if (ctype->is_list()) { + // A list has integers as keys, and values (the list's items). + // We just need to build a list with the same keys (and liveness + // information), but empty values. + auto ltype = static_cast(col->type.get()); + if (ltype->get_elements_type() != empty_type) { + throw std::logic_error("Virtual cell has wrong list type"); + } + c = make_empty(c.as_collection_mutation(), *ctype); + } else if (ctype->is_map()) { + // A map has keys and values. We just need to build a map with + // the same keys (and liveness information), but empty values. + auto mtype = static_cast(col->type.get()); + if (mtype->get_values_type() != empty_type) { + throw std::logic_error("Virtual cell has wrong map type"); + } + c = make_empty(c.as_collection_mutation(), *ctype); + } else if (ctype->is_set()) { + // A set has just keys (and liveness information). We need + // all of it as a virtual column, unfortunately, so we + // leave c unmodified. + } else { + // A collection can't be anything but a list, map or set... + throw std::logic_error("Virtual cell has unexpected collection type"); + } + } +} + static void add_cells_to_view(const schema& base, const schema& view, row base_cells, row& view_cells) { base_cells.for_each_cell([&] (column_id id, atomic_cell_or_collection& c) { auto* view_col = view_column(base, view, id); if (view_col && !view_col->is_primary_key()) { + maybe_make_virtual(c, view_col); view_cells.append_cell(view_col->id, std::move(c)); } }); From 30f721afabb4c0f8bf7c3c32e0ab02bcb36b77e4 Mon Sep 17 00:00:00 2001 From: Nadav Har'El Date: Thu, 16 Aug 2018 15:42:22 +0300 Subject: [PATCH 6/9] Materialized Views: add unselected columns as virtual columns When a view's partition key contains only columns from the base's partition key (and not an additional one), the liveness (existance or disappearance) of a view-table row is tied to the liveness of the base table row - and that depends not only on selected columns (base-table columns SELECTed to also appear in the view) but also on unselected columns. This means that we may need to keep a view row alive even without data, just because some unselected column is alive in the base table. Before this patch we tried to build a single "row marker" in the view column which summarizes the liveness information in all unselected columns, but this proved unworkable, as explained in issue #3362 and as will be demonstrated in unit tests in a later patch. Because we can't replace several unselected cells by one row marker, what we do in this patch is to add for each for the unselected cell a "virtual cell" which contains the cell's liveness information (timestamp, deletion, ttl) but not its value. For collections, we can't represent the entire collection by one virtual cell, and rather need a collection of virtual cells. This patch just adds the virtual columns to the view schema. Code in the previous patch, when it notices the virtual columns in the view's schema, added the appropriate content into these columns. We may need to add virtual columns to a view when first created, but also when an unselected column is added to the base table with "ALTER TABLE", so both are supported in this patch. Fixes #3362. Signed-off-by: Nadav Har'El --- cql3/statements/alter_table_statement.cc | 25 ++++++++++------- cql3/statements/create_view_statement.cc | 17 ++++++++++++ db/view/view.cc | 35 ++++++++++++++++++++++++ db/view/view.hh | 15 ++++++++++ 4 files changed, 82 insertions(+), 10 deletions(-) diff --git a/cql3/statements/alter_table_statement.cc b/cql3/statements/alter_table_statement.cc index 2f5a9fe4cd..34b088b0d5 100644 --- a/cql3/statements/alter_table_statement.cc +++ b/cql3/statements/alter_table_statement.cc @@ -246,18 +246,22 @@ future> alter_table_statement::a cfm.with_column(column_name->name(), type, _is_static ? column_kind::static_column : column_kind::regular_column); - // Adding a column to a table which has an include all view requires the column to be added to the view - // as well. If the view has a regular base column in its PK, then the column ID needs to be updated in - // view_info; for that, rebuild the schema. + // Adding a column to a base table always requires updating the view + // schemas: If the view includes all columns it should include the new + // column, but if it doesn't, it may need to include the new + // unselected column as a virtual column. The case when it we + // shouldn't add a virtual column is when the view has in its PK one + // of the base's regular columns - but even in this case we need to + // rebuild the view schema, to update the column ID. if (!_is_static) { for (auto&& view : cf.views()) { - if (view->view_info()->include_all_columns() || view->view_info()->base_non_pk_column_in_view_pk()) { - schema_builder builder(view); - if (view->view_info()->include_all_columns()) { - builder.with_column(column_name->name(), type); - } - view_updates.push_back(view_ptr(builder.build())); + schema_builder builder(view); + if (view->view_info()->include_all_columns()) { + builder.with_column(column_name->name(), type); + } else if (!view->view_info()->base_non_pk_column_in_view_pk()) { + db::view::create_virtual_column(builder, column_name->name(), type); } + view_updates.push_back(view_ptr(builder.build())); } } @@ -347,7 +351,8 @@ future> alter_table_statement::a validate_column_rename(db, *schema, *from, *to); cfm.with_column_rename(from->name(), to->name()); - // If the view includes a renamed column, it must be renamed in the view table and the definition. + // If the view includes a renamed column, it must be renamed in + // the view table and the definition. for (auto&& view : cf.views()) { if (view->get_column_definition(from->name())) { schema_builder builder(view); diff --git a/cql3/statements/create_view_statement.cc b/cql3/statements/create_view_statement.cc index 422a27b5ff..40c64e2944 100644 --- a/cql3/statements/create_view_statement.cc +++ b/cql3/statements/create_view_statement.cc @@ -275,6 +275,7 @@ future> create_view_statement::a std::vector missing_pk_columns; std::vector target_non_pk_columns; + std::vector unselected_columns; // We need to include all of the primary key columns from the base table in order to make sure that we do not // overwrite values in the view. We cannot support "collapsing" the base table into a smaller number of rows in @@ -292,6 +293,9 @@ future> create_view_statement::a if (included_def && !def_in_target_pk) { target_non_pk_columns.push_back(&def); } + if (!included_def && !def_in_target_pk && !def.is_static()) { + unselected_columns.push_back(&def); + } if (def.is_primary_key() && !def_in_target_pk) { missing_pk_columns.push_back(&def); } @@ -321,6 +325,19 @@ future> create_view_statement::a add_columns(target_partition_keys, column_kind::partition_key); add_columns(target_clustering_keys, column_kind::clustering_key); add_columns(target_non_pk_columns, column_kind::regular_column); + // Add all unselected columns (base-table columns which are not selected + // in the view) as "virtual columns" - columns which have timestamp and + // ttl information, but an empty value. These are needed to keep view + // rows alive when the base row is alive, even if the view row has no + // data, just a key (see issue #3362). The virtual columns are not needed + // when the view pk adds a regular base column (i.e., has_non_pk_column) + // because in that case, the liveness of that base column is what + // determines the liveness of the view row. + if (!has_non_pk_column) { + for (auto* def : unselected_columns) { + db::view::create_virtual_column(builder, def->name(), def->type); + } + } _properties.properties()->apply_to_builder(builder, proxy.get_db().local().get_config().extensions()); if (builder.default_time_to_live().count() > 0) { diff --git a/db/view/view.cc b/db/view/view.cc index 31edb0882e..3144814b45 100644 --- a/db/view/view.cc +++ b/db/view/view.cc @@ -464,6 +464,41 @@ static void maybe_make_virtual(atomic_cell_or_collection& c, const column_defini } } +void create_virtual_column(schema_builder& builder, const bytes& name, const data_type& type) { + if (type->is_atomic()) { + builder.with_column(name, empty_type, column_kind::regular_column, column_view_virtual::yes); + return; + } + // A multi-cell collection (a frozen collection is a single + // cell and handled handled in the is_atomic() case above). + // The virtual version can't be just one cell, it has to be + // itself a collection of cells. + auto ctype = dynamic_pointer_cast(type); + if (!ctype) { + // TODO: When #2201 is done, we also need to handle here + // unfrozen UDTs. + throw exceptions::invalid_request_exception(sprint("Unsupported unselected multi-cell non-collection column %s for Materialized View", name)); + } + if (ctype->is_list()) { + // A list has ints as keys, and values (the list's items). + // We just need these intss, i.e., a list of empty items. + builder.with_column(name, list_type_impl::get_instance(empty_type, true), column_kind::regular_column, column_view_virtual::yes); + } else if (ctype->is_map()) { + // A map has keys and values. We don't need these values, + // and can use empty values instead. + auto mtype = dynamic_pointer_cast(type); + builder.with_column(name, map_type_impl::get_instance(mtype->get_values_type(), empty_type, true), column_kind::regular_column, column_view_virtual::yes); + } else if (ctype->is_set()) { + // A set's cell has nothing beyond the keys, so the + // virtual version of a set is, unfortunately, a complete + // copy of the set. + builder.with_column(name, type, column_kind::regular_column, column_view_virtual::yes); + } else { + // A collection can't be anything but a list, map or set... + abort(); + } +} + static void add_cells_to_view(const schema& base, const schema& view, row base_cells, row& view_cells) { base_cells.for_each_cell([&] (column_id id, atomic_cell_or_collection& c) { auto* view_col = view_column(base, view, id); diff --git a/db/view/view.hh b/db/view/view.hh index 0113b596db..f098da4a1a 100644 --- a/db/view/view.hh +++ b/db/view/view.hh @@ -104,6 +104,21 @@ query::clustering_row_ranges calculate_affected_clustering_ranges( future<> mutate_MV(const dht::token& base_token, std::vector mutations, db::view::stats& stats); +/** + * create_virtual_column() adds a "virtual column" to a schema builder. + * The definition of a "virtual column" is based on the given definition + * of a regular column, except that any value types are replaced by the + * empty type - and only the information needed to track column liveness + * is kept: timestamp, deletion, ttl, and keys in maps. + * In some cases we add such virtual columns for unselected columns in + * materialized views, for reasons explained in issue #3362. + * @param builder the schema_builder where we want to add the virtual column. + * @param name the name of the virtual column to be created. + * @param type of the base column for which we want to build a virtual column. + * When type is a multi-cell collection, so will be the virtual column. + */ + void create_virtual_column(schema_builder& builder, const bytes& name, const data_type& type); + } } From 6c0034138383f4e596e83a661c22fe255fc395b5 Mon Sep 17 00:00:00 2001 From: Nadav Har'El Date: Thu, 16 Aug 2018 15:45:41 +0300 Subject: [PATCH 7/9] Materialized Views: no need for elaborate row marker calculations Now that we have separate virtual cells to represent unselected columns in a materialized view, we no longer need the elaborate row-marker liveness calculations which aimed (but failed) to do the same thing. So that code can be removed. Signed-off-by: Nadav Har'El --- db/view/view.cc | 44 -------------------------------------------- 1 file changed, 44 deletions(-) diff --git a/db/view/view.cc b/db/view/view.cc index 3144814b45..f949a99cd1 100644 --- a/db/view/view.cc +++ b/db/view/view.cc @@ -288,50 +288,6 @@ row_marker view_updates::compute_row_marker(const clustering_row& base_row) cons return cell.is_live_and_has_ttl() ? row_marker(cell.timestamp(), cell.ttl(), cell.expiry()) : row_marker(cell.timestamp()); } - if (_view_info.include_all_columns()) { - return marker; - } - - auto timestamp = marker.timestamp(); - bool has_non_expiring_live_cell = false; - expiry_opt biggest_expiry; - gc_clock::duration ttl = gc_clock::duration::min(); - if (marker.is_expiring()) { - biggest_expiry = marker.expiry(); - ttl = marker.ttl(); - } - auto maybe_update_expiry_and_ttl = [&] (atomic_cell_view&& cell) { - timestamp = std::max(timestamp, cell.timestamp()); - if (cell.is_live_and_has_ttl()) { - if (cell.expiry() >= biggest_expiry.value_or(cell.expiry())) { - biggest_expiry = cell.expiry(); - ttl = cell.ttl(); - } - } else if (cell.is_live()) { - has_non_expiring_live_cell = true; - } - }; - - // Iterate over regular cells not in the view, as we already have the timestamps of the included columns. - base_row.cells().for_each_cell([&] (column_id id, const atomic_cell_or_collection& c) { - auto& def = _base->regular_column_at(id); - if (_view_info.view_column(def)) { - return; - } - if (def.is_atomic()) { - maybe_update_expiry_and_ttl(c.as_atomic_cell(def)); - } else { - auto ctype = static_pointer_cast(def.type); - ctype->for_each_cell(c.as_collection_mutation(), maybe_update_expiry_and_ttl); - } - }); - - if ((marker.is_live() && !marker.is_expiring()) || has_non_expiring_live_cell) { - return row_marker(timestamp); - } - if (biggest_expiry) { - return row_marker(timestamp, ttl, *biggest_expiry); - } return marker; } From 5ca974547afd33ac86875c6482460d2eaa654b8f Mon Sep 17 00:00:00 2001 From: Nadav Har'El Date: Thu, 16 Aug 2018 15:48:07 +0300 Subject: [PATCH 8/9] Materialized Views: unit test reproducing fixed issue #3362 This patch includes several tests reproducing issue #3362 - the effect of unselected columns on view-table row liveness - and confirming that it was fixed. We found two example scenarios to demonstrate the bug. One scenario, test_3362_with_ttls(), involves an unselected column with a TTL. The other, test_3362_no_ttls() demonstrates the same bug without using TTL, and using explicit updates and deletions instead. These two tests are heavily commented, to explain what they test, and why. In addition to these two basic tests, we also include similar tests involving multiple items in a collection column, instead of multiple separate columns, which demonstrate the same problem exists there (and why, unfortunately, the "virtual columns" we add in that case need to be collections too). We also test that the virtual columns - and the problems they fix - work not only on columns originally created with the view, but also with unselected columns added later with ALTER TABLE on the base table. Refs #3362. Signed-off-by: Nadav Har'El --- tests/view_complex_test.cc | 342 +++++++++++++++++++++++++++++++++++++ 1 file changed, 342 insertions(+) diff --git a/tests/view_complex_test.cc b/tests/view_complex_test.cc index bd5b04204c..c3f570805a 100644 --- a/tests/view_complex_test.cc +++ b/tests/view_complex_test.cc @@ -1216,3 +1216,345 @@ SEASTAR_TEST_CASE(test_marker_timestamp_is_not_shadowed_by_previous_updatewith_f }); }, cfg); } + +// A reproducer for issue #3362, not involving TTLs. +// The test involves a view that selects no column except the base's primary +// key, so view rows contain no cells besides a row marker, so as a base +// row appears and disappears as we update and delete individual cells in +// that row, we need to insert and delete the row marker with varying +// timestamps to make sure the view row appears and disappears as needed. +// But as we shall see, after enough trickery, we run out of timestamps +// to use to revive the row marker, and fail to revive it. So to fix +// issue #3362, we needed to remember all cells separately ("virtual +// cells"). +SEASTAR_TEST_CASE(test_3362_no_ttls) { + return do_with_cql_env_thread([] (auto& e) { + e.execute_cql("create table cf (p int, c int, a int, b int, primary key (p, c))").get(); + e.execute_cql("create materialized view vcf as select p, c from cf " + "where p is not null and c is not null " + "primary key (p, c)").get(); + + // In row p=1 c=1, insert two cells - b=1 at timestamp 10, a=1 at timestamp 20: + BOOST_TEST_PASSPOINT(); + e.execute_cql("update cf using timestamp 10 set b = 1 where p = 1 and c = 1").get(); + eventually([&] { + auto msg = e.execute_cql("select * from vcf where p = 1 and c = 1").get0(); + assert_that(msg).is_rows().with_rows({{ {int32_type->decompose(1)}, {int32_type->decompose(1)} }}); + }); + + BOOST_TEST_PASSPOINT(); + e.execute_cql("update cf using timestamp 20 set a = 1 where p = 1 and c = 1").get(); + eventually([&] { + auto msg = e.execute_cql("select * from vcf where p = 1 and c = 1").get0(); + assert_that(msg).is_rows().with_rows({{ {int32_type->decompose(1)}, {int32_type->decompose(1)} }}); + }); + + + // Delete just a=1 (with timestamp 21). The base row will still exist (with b=1), + // and accordingly the view row too: + BOOST_TEST_PASSPOINT(); + e.execute_cql("delete a from cf using timestamp 21 where p = 1 and c = 1").get(); + eventually([&] { + auto msg = e.execute_cql("select * from vcf where p = 1 and c = 1").get0(); + assert_that(msg).is_rows().with_rows({{ {int32_type->decompose(1)}, {int32_type->decompose(1)} }}); + }); + + // At this point, we still have the base row with b=1 at timestamp 10 + // (and a=1 was deleted at timestamp 21). If we delete the b=1 at + // timestamp 11, nothing will remain in the base row, and the view + // row should disappear as well: + BOOST_TEST_PASSPOINT(); + e.execute_cql("delete b from cf using timestamp 11 where p = 1 and c = 1").get(); + eventually([&] { + auto msg = e.execute_cql("select * from vcf where p = 1 and c = 1").get0(); + assert_that(msg).is_rows().is_empty(); + }); + + // Now we finally reproduce #3362: We now add b=1 again, at timestamp + // 12 (it was earlier deleted in timestamp 11). The base row is live + // again, and so should the view row. + // With issue #3362, the view row failed to become alive. The reason + // is that to make the above is_empty() succeed, the implementation + // deletes the row marker with timestamp 21 (the maximal timestamp + // seen in the row). But now, we add a row marker again with the same + // timestamp 21, but the deletion wins so the row marker is still + // missing. (note that had data won over deletions, the is_empty() + // test above would have failed instead). + BOOST_TEST_PASSPOINT(); + e.execute_cql("update cf using timestamp 12 set b = 1 where p = 1 and c = 1").get(); + eventually([&] { + auto msg = e.execute_cql("select * from vcf where p = 1 and c = 1").get0(); + assert_that(msg).is_rows().with_rows({{ {int32_type->decompose(1)}, {int32_type->decompose(1)} }}); + }); + }); +} + +// This is another reproducer for issue #3362, using TTLs instead of +// numerous back-and-forth additions and deletions. +SEASTAR_TEST_CASE(test_3362_with_ttls) { + return do_with_cql_env_thread([] (auto& e) { + e.execute_cql("create table cf (p int, c int, a int, b int, primary key (p, c))").get(); + e.execute_cql("create materialized view vcf as select p, c from cf " + "where p is not null and c is not null " + "primary key (p, c)").get(); + + // In row p=1 c=1, insert two cells - a=1 with ttl, and b=1 without + // ttl. The ttl'ed cell is inserted first, with a newer timestamp. + // The problem is that the view row's marker gets, with a new + // timestamp, a ttl. Then, when we go to add another column with an + // older timestamp, and try to set the row marker without a + // ttl - the older timestamp of this update looses, and we wrongly + // remain with a ttl on the view row marker. + BOOST_TEST_PASSPOINT(); + e.execute_cql("update cf using timestamp 2 and ttl 5 set a = 1 where p = 1 and c = 1").get(); + eventually([&] { + auto msg = e.execute_cql("select * from vcf where p = 1 and c = 1").get0(); + assert_that(msg).is_rows().with_rows({{ {int32_type->decompose(1)}, {int32_type->decompose(1)} }}); + }); + BOOST_TEST_PASSPOINT(); + e.execute_cql("update cf using timestamp 1 set b = 1 where p = 1 and c = 1").get(); + eventually([&] { + auto msg = e.execute_cql("select * from vcf where p = 1 and c = 1").get0(); + assert_that(msg).is_rows().with_rows({{ {int32_type->decompose(1)}, {int32_type->decompose(1)} }}); + }); + // Pass the time 6 seconds forward. Cell 'a' will have expired, but + // cell 'b' will still exist, so the base row still exists and the + // corresponding view row should also exist too. + forward_jump_clocks(6s); + BOOST_TEST_PASSPOINT(); + // verify that the base row still exists (cell b didn't expire) + eventually([&] { + auto msg = e.execute_cql("select * from cf where p = 1 and c = 1").get0(); + assert_that(msg).is_rows().with_rows({{ {int32_type->decompose(1)}, {int32_type->decompose(1)}, {}, {{int32_type->decompose(1)}} }}); + }); + BOOST_TEST_PASSPOINT(); + // verify that the view row still exists too. + // This check failing is issue #3362. + eventually([&] { + auto msg = e.execute_cql("select * from vcf where p = 1 and c = 1").get0(); + assert_that(msg).is_rows().with_rows({{ {int32_type->decompose(1)}, {int32_type->decompose(1)} }}); + }); + }); +} + +// ‎‎‎The following are more test for issue #3362, same as test_3362_not_ttls +// and test_3362_with_ttls, just with a collection with items "1" and "2" +// instead of separate columns a and b. For brevity, comments were removed, +// so refer to the comments in the original code above. +enum class collection_kind { set, list, map }; +void do_test_3362_no_ttls_with_collections(cql_test_env& e, collection_kind t) { + sstring type, pref, suf; + switch(t) { + case collection_kind::set: + type = "set"; + pref = "{"; + suf = "}"; + break; + case collection_kind::list: + type = "list"; + pref = "["; + suf = "]"; + break; + case collection_kind::map: + type = "map"; + pref = "{"; + suf = " : 17}"; + break; + } + e.execute_cql(sprint("create table cf (p int, c int, a %s, primary key (p, c))", type)).get(); + e.execute_cql("create materialized view vcf as select p, c from cf " + "where p is not null and c is not null " + "primary key (p, c)").get(); + e.execute_cql(sprint("update cf using timestamp 10 set a = a + %s2%s where p = 1 and c = 1", pref, suf)).get(); + eventually([&] { + auto msg = e.execute_cql("select * from vcf where p = 1 and c = 1").get0(); + assert_that(msg).is_rows().with_rows({{ {int32_type->decompose(1)}, {int32_type->decompose(1)} }}); + }); + e.execute_cql(sprint("update cf using timestamp 20 set a = a + %s1%s where p = 1 and c = 1", pref, suf)).get(); + eventually([&] { + auto msg = e.execute_cql("select * from vcf where p = 1 and c = 1").get0(); + assert_that(msg).is_rows().with_rows({{ {int32_type->decompose(1)}, {int32_type->decompose(1)} }}); + }); + if (t == collection_kind::map) { + e.execute_cql("delete a[1] from cf using timestamp 21 where p = 1 and c = 1").get(); + } else { + e.execute_cql(sprint("update cf using timestamp 21 set a = a - %s1%s where p = 1 and c = 1", pref, suf)).get(); + } + eventually([&] { + auto msg = e.execute_cql("select * from vcf where p = 1 and c = 1").get0(); + assert_that(msg).is_rows().with_rows({{ {int32_type->decompose(1)}, {int32_type->decompose(1)} }}); + }); + if (t == collection_kind::map) { + e.execute_cql("delete a[2] from cf using timestamp 11 where p = 1 and c = 1").get(); + } else { + e.execute_cql(sprint("update cf using timestamp 11 set a = a - %s2%s where p = 1 and c = 1", pref, suf)).get(); + } + eventually([&] { + auto msg = e.execute_cql("select * from vcf where p = 1 and c = 1").get0(); + assert_that(msg).is_rows().is_empty(); + }); + e.execute_cql(sprint("update cf using timestamp 12 set a = a + %s2%s where p = 1 and c = 1", pref, suf)).get(); + eventually([&] { + auto msg = e.execute_cql("select * from vcf where p = 1 and c = 1").get0(); + assert_that(msg).is_rows().with_rows({{ {int32_type->decompose(1)}, {int32_type->decompose(1)} }}); + }); +} +SEASTAR_TEST_CASE(test_3362_no_ttls_with_set) { + return do_with_cql_env_thread([] (auto& e) { + do_test_3362_no_ttls_with_collections(e, collection_kind::set); + }); +} +SEASTAR_TEST_CASE(test_3362_no_ttls_with_list) { + return do_with_cql_env_thread([] (auto& e) { + do_test_3362_no_ttls_with_collections(e, collection_kind::list); + }); +} +SEASTAR_TEST_CASE(test_3362_no_ttls_with_map) { + return do_with_cql_env_thread([] (auto& e) { + do_test_3362_no_ttls_with_collections(e, collection_kind::map); + }); +} + +void do_test_3362_with_ttls_with_collections(cql_test_env& e, collection_kind t) { + sstring type, pref, suf; + switch(t) { + case collection_kind::set: + type = "set"; + pref = "{"; + suf = "}"; + break; + case collection_kind::list: + type = "list"; + pref = "["; + suf = "]"; + break; + case collection_kind::map: + type = "map"; + pref = "{"; + suf = " : 17}"; + break; + } + e.execute_cql(sprint("create table cf (p int, c int, a %s, primary key (p, c))", type)).get(); + e.execute_cql("create materialized view vcf as select p, c from cf " + "where p is not null and c is not null " + "primary key (p, c)").get(); + e.execute_cql(sprint("update cf using timestamp 2 and ttl 5 set a = a + %s1%s where p = 1 and c = 1", pref, suf)).get(); + eventually([&] { + auto msg = e.execute_cql("select * from vcf where p = 1 and c = 1").get0(); + assert_that(msg).is_rows().with_rows({{ {int32_type->decompose(1)}, {int32_type->decompose(1)} }}); + }); + e.execute_cql(sprint("update cf using timestamp 1 set a = a + %s2%s where p = 1 and c = 1", pref, suf)).get(); + eventually([&] { + auto msg = e.execute_cql("select * from vcf where p = 1 and c = 1").get0(); + assert_that(msg).is_rows().with_rows({{ {int32_type->decompose(1)}, {int32_type->decompose(1)} }}); + }); + forward_jump_clocks(6s); + eventually([&] { + auto msg = e.execute_cql("select * from vcf where p = 1 and c = 1").get0(); + assert_that(msg).is_rows().with_rows({{ {int32_type->decompose(1)}, {int32_type->decompose(1)} }}); + }); +} +SEASTAR_TEST_CASE(test_3362_with_ttls_with_set) { + return do_with_cql_env_thread([] (auto& e) { + do_test_3362_with_ttls_with_collections(e, collection_kind::set); + }); +} +SEASTAR_TEST_CASE(test_3362_with_ttls_with_list) { + return do_with_cql_env_thread([] (auto& e) { + do_test_3362_with_ttls_with_collections(e, collection_kind::list); + }); +} +SEASTAR_TEST_CASE(test_3362_with_ttls_with_map) { + return do_with_cql_env_thread([] (auto& e) { + do_test_3362_with_ttls_with_collections(e, collection_kind::map); + }); +} + +// This is a version of test_3362_with_ttls with frozen collection fields +// instead of integer fields in test_3362_with_ttls. The intention is to +// verify that we properly fixed #3362 in this case - by replacing the +// frozen collection by a single virtual cell, not a collection. +SEASTAR_TEST_CASE(test_3362_with_ttls_frozen) { + return do_with_cql_env_thread([] (auto& e) { + e.execute_cql("create table cf (p int, c int, a frozen>, b frozen>, primary key (p, c))").get(); + e.execute_cql("create materialized view vcf as select p, c from cf " + "where p is not null and c is not null " + "primary key (p, c)").get(); + BOOST_TEST_PASSPOINT(); + e.execute_cql("update cf using timestamp 2 and ttl 5 set a = {1,2} where p = 1 and c = 1").get(); + eventually([&] { + auto msg = e.execute_cql("select * from vcf where p = 1 and c = 1").get0(); + assert_that(msg).is_rows().with_rows({{ {int32_type->decompose(1)}, {int32_type->decompose(1)} }}); + }); + BOOST_TEST_PASSPOINT(); + e.execute_cql("update cf using timestamp 1 set b = {3,4} where p = 1 and c = 1").get(); + eventually([&] { + auto msg = e.execute_cql("select * from vcf where p = 1 and c = 1").get0(); + assert_that(msg).is_rows().with_rows({{ {int32_type->decompose(1)}, {int32_type->decompose(1)} }}); + }); + forward_jump_clocks(6s); + BOOST_TEST_PASSPOINT(); + eventually([&] { + auto msg = e.execute_cql("select * from vcf where p = 1 and c = 1").get0(); + assert_that(msg).is_rows().with_rows({{ {int32_type->decompose(1)}, {int32_type->decompose(1)} }}); + }); + }); +} + +// This is a version of test_3362_with_ttls with the added twist that the +// unselected column involved did not exist when the base table and view +// were originally created, but only added later with an "alter table". +// For this test to work, "alter table" will need to add the virtual +// columns in the view table for the newly created unselected column in +// the base table. +SEASTAR_TEST_CASE(test_3362_with_ttls_alter_add) { + return do_with_cql_env_thread([] (auto& e) { + e.execute_cql("create table cf (p int, c int, primary key (p, c))").get(); + e.execute_cql("create materialized view vcf as select p, c from cf " + "where p is not null and c is not null " + "primary key (p, c)").get(); + // Add with "alter table" two additional columns to the base table - + // a and b. These are not selected in the materialized view, and we + // want to check that they are treated like unselected columns + // (namely, virtual columns are added to the view). + e.execute_cql("alter table cf add a int").get(); + e.execute_cql("alter table cf add b int").get(); + BOOST_TEST_PASSPOINT(); + e.execute_cql("update cf using timestamp 2 and ttl 5 set a = 1 where p = 1 and c = 1").get(); + eventually([&] { + auto msg = e.execute_cql("select * from vcf where p = 1 and c = 1").get0(); + assert_that(msg).is_rows().with_rows({{ {int32_type->decompose(1)}, {int32_type->decompose(1)} }}); + }); + BOOST_TEST_PASSPOINT(); + e.execute_cql("update cf using timestamp 1 set b = 1 where p = 1 and c = 1").get(); + eventually([&] { + auto msg = e.execute_cql("select * from vcf where p = 1 and c = 1").get0(); + assert_that(msg).is_rows().with_rows({{ {int32_type->decompose(1)}, {int32_type->decompose(1)} }}); + }); + forward_jump_clocks(6s); + BOOST_TEST_PASSPOINT(); + eventually([&] { + auto msg = e.execute_cql("select * from cf where p = 1 and c = 1").get0(); + assert_that(msg).is_rows().with_rows({{ {int32_type->decompose(1)}, {int32_type->decompose(1)}, {}, {{int32_type->decompose(1)}} }}); + }); + BOOST_TEST_PASSPOINT(); + eventually([&] { + auto msg = e.execute_cql("select * from vcf where p = 1 and c = 1").get0(); + assert_that(msg).is_rows().with_rows({{ {int32_type->decompose(1)}, {int32_type->decompose(1)} }}); + }); + }); +} + +// test_3362_with_ttls_alter_add() above is about handling changes to virtual +// columns as the base table columns change, but only for the "add" case. +// Theoretically we could have had problems in the "drop" and "rename" cases +// as well, but today, those are not supported: +// 1. Today we do not allow "alter table drop" to drop any column from a base +// table with views - even unselected columns. +// If we every do allow this, we need to also check that we drop the +// virtual column from the view. +// 2. Today we do not allow "alter table rename" to rename any non-pk +// column, so unselected columns also cannot be renamed. If this +// limitation is ever lifted, we will need to check that if we +// rename an unselected base column, the virtual column in the view is +// also renamed. From 8c604921ac16808f8e4abb62463e0cbf1cf38f37 Mon Sep 17 00:00:00 2001 From: Nadav Har'El Date: Thu, 16 Aug 2018 15:51:46 +0300 Subject: [PATCH 9/9] Materialized Views: test that virtual columns are not visible In the previous patches, we added "virtual columns" to materialized views to solve row liveness issues (issue #3362). Here we add a test that confirms that although these virtual columns exist in the view, they should not be visible to the user. They cannot be explicitly SELECTed from the view table, and a "SELECT *" will skip them. Refs #3362. Signed-off-by: Nadav Har'El --- tests/view_schema_test.cc | 49 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/tests/view_schema_test.cc b/tests/view_schema_test.cc index 5b79e704a8..2f1b81e200 100644 --- a/tests/view_schema_test.cc +++ b/tests/view_schema_test.cc @@ -3298,3 +3298,52 @@ SEASTAR_TEST_CASE(test_alter_table_with_updates) { }); }); } + +// Test that a regular column which we did not add to the view is really +// not in the view. Even if to fix issue #3362 we add "virtual cells" +// for the unselected columns, those should not be visible to the end-user +// of the view table. +SEASTAR_TEST_CASE(test_unselected_column) { + return do_with_cql_env_thread([] (auto& e) { + e.execute_cql("create table cf (p int, c int, x int, y list, z set, w map, primary key (p, c))").get(); + e.execute_cql("create materialized view vcf as select p, c from cf " + "where p is not null and c is not null " + "primary key (c, p)").get(); + e.execute_cql("insert into cf (p, c, x) values (1, 2, 3)").get(); + BOOST_TEST_PASSPOINT(); + auto msg = e.execute_cql("select * from cf").get0(); + assert_that(msg).is_rows().with_size(1) + .with_row({{int32_type->decompose(1)}, {int32_type->decompose(2)}, {}, {int32_type->decompose(3)}, {}, {}}); + BOOST_TEST_PASSPOINT(); + // Check that when we ask for all of vcf's columns, we only get the + // ones we actually selected - c and p, not x, y, z, or w: + eventually([&] { + auto msg = e.execute_cql("select * from vcf").get0(); + assert_that(msg).is_rows().with_size(1) + .with_row({{int32_type->decompose(2)}, {int32_type->decompose(1)}}); + }); + // Check that we cannot explicitly select the x, y, z or w columns in + // vcf as they are not one of the columns we selected for the view. + try { + e.execute_cql("select x from vcf").get(); + BOOST_ASSERT(false); + } catch (exceptions::invalid_request_exception&) { + // we expect: exceptions::invalid_request_exception: Undefined name x in selection clause + } + try { + e.execute_cql("select y from vcf").get(); + BOOST_ASSERT(false); + } catch (exceptions::invalid_request_exception&) { + } + try { + e.execute_cql("select z from vcf").get(); + BOOST_ASSERT(false); + } catch (exceptions::invalid_request_exception&) { + } + try { + e.execute_cql("select w from vcf").get(); + BOOST_ASSERT(false); + } catch (exceptions::invalid_request_exception&) { + } + }); +}