Merge 'Materialized Views: row liveness correction' from Nadav

"
When a view's partition key contains only columns from the base's partition
key (and not an additional one), the liveness - existance or disappearance -
of a view-table row is tied to the liveness of the base table row. And
that, in turn, depends not only on selected columns (base-table columns
SELECTed to also appear in the view) but also on unselected columns.

This means that we may need to keep a view row alive even without data,
just because some unselected column is alive in the base table. Before this
patch set we tried to build a single "row marker" in the view column which
tried to summarize the liveness information in all unselected columns.
But this proved unworkable, as explained in issue #3362 and as will be
demonstrated in unit tests at the end of this series.

Because we can't replace several unselected cells by one row marker, what
we do in this series is to add for each for the unselected cells a "virtual
cell" which contains the cell's liveness information (timestamp, deletion,
ttl) but not its value. For collections, we can't represent the entire
collection by one virtual cell, and rather need a collection of virtual
cells.

Fixes #3362
"

* 'virtual-cols-v3' of https://github.com/nyh/scylla:
  Materialized Views: test that virtual columns are not visible
  Materialized Views: unit test reproducing fixed issue #3362
  Materialized Views: no need for elaborate row marker calculations
  Materialized Views: add unselected columns as virtual columns
  Materialized Views: fill virtual columns
  Do not allow selecting a virtual column
  schema: persist "view virtual" columns to a separate system table
  schema: add "view virtual" flag to schema's column_definition
  Add "empty" type name to CQL parser, but only for internal parsing
This commit is contained in:
Duarte Nunes
2018-08-29 14:32:38 +01:00
18 changed files with 747 additions and 105 deletions

View File

@@ -1531,12 +1531,22 @@ inMarkerForTuple returns [shared_ptr<cql3::tuples::in_raw> marker]
| ':' name=ident { $marker = new_tuple_in_bind_variables(name); }
;
comparatorType returns [shared_ptr<cql3_type::raw> t]
: n=native_type { $t = cql3_type::raw::from(n); }
| c=collection_type { $t = c; }
| tt=tuple_type { $t = tt; }
// The comparator_type rule is used for users' queries (internal=false)
// and for internal calls from db::cql_type_parser::parse() (internal=true).
// The latter is used for reading schemas stored in the system tables, and
// may support additional column types that cannot be created through CQL,
// but only internally through code. Today the only such type is "empty":
// Scylla code internally creates columns with type "empty" or collections
// "empty" to represent unselected columns in materialized views.
// If a user (internal=false) tries to use "empty" as a type, it is treated -
// as do all unknown types - as an attempt to use a user-defined type, and
// we report this name is reserved (as for _reserved_type_names()).
comparator_type [bool internal] returns [shared_ptr<cql3_type::raw> t]
: n=native_or_internal_type[internal] { $t = cql3_type::raw::from(n); }
| c=collection_type[internal] { $t = c; }
| tt=tuple_type[internal] { $t = tt; }
| id=userTypeName { $t = cql3::cql3_type::raw::user_type(id); }
| K_FROZEN '<' f=comparatorType '>'
| K_FROZEN '<' f=comparator_type[internal] '>'
{
try {
$t = cql3::cql3_type::raw::frozen(f);
@@ -1558,6 +1568,22 @@ comparatorType returns [shared_ptr<cql3_type::raw> t]
#endif
;
native_or_internal_type [bool internal] returns [shared_ptr<cql3_type> t]
: n=native_type { $t = n; }
// The "internal" types, only supported when internal==true:
| K_EMPTY {
if (internal) {
$t = cql3_type::empty;
} else {
add_recognition_error("Invalid (reserved) user type name empty");
}
}
;
comparatorType returns [shared_ptr<cql3_type::raw> t]
: tt=comparator_type[false] { $t = tt; }
;
native_type returns [shared_ptr<cql3_type> t]
: K_ASCII { $t = cql3_type::ascii; }
| K_BIGINT { $t = cql3_type::bigint; }
@@ -1582,24 +1608,24 @@ native_type returns [shared_ptr<cql3_type> t]
| K_TIME { $t = cql3_type::time; }
;
collection_type returns [shared_ptr<cql3::cql3_type::raw> pt]
: K_MAP '<' t1=comparatorType ',' t2=comparatorType '>'
collection_type [bool internal] returns [shared_ptr<cql3::cql3_type::raw> pt]
: K_MAP '<' t1=comparator_type[internal] ',' t2=comparator_type[internal] '>'
{
// if we can't parse either t1 or t2, antlr will "recover" and we may have t1 or t2 null.
if (t1 && t2) {
$pt = cql3::cql3_type::raw::map(t1, t2);
}
}
| K_LIST '<' t=comparatorType '>'
| K_LIST '<' t=comparator_type[internal] '>'
{ if (t) { $pt = cql3::cql3_type::raw::list(t); } }
| K_SET '<' t=comparatorType '>'
| K_SET '<' t=comparator_type[internal] '>'
{ if (t) { $pt = cql3::cql3_type::raw::set(t); } }
;
tuple_type returns [shared_ptr<cql3::cql3_type::raw> t]
tuple_type [bool internal] returns [shared_ptr<cql3::cql3_type::raw> t]
@init{ std::vector<shared_ptr<cql3::cql3_type::raw>> types; }
: K_TUPLE '<'
t1=comparatorType { types.push_back(t1); } (',' tn=comparatorType { types.push_back(tn); })*
t1=comparator_type[internal] { types.push_back(t1); } (',' tn=comparator_type[internal] { types.push_back(tn); })*
'>' { $t = cql3::cql3_type::raw::tuple(std::move(types)); }
;
@@ -1625,7 +1651,7 @@ unreserved_keyword returns [sstring str]
unreserved_function_keyword returns [sstring str]
: u=basic_unreserved_keyword { $str = u; }
| t=native_type { $str = t->to_string(); }
| t=native_or_internal_type[true] { $str = t->to_string(); }
;
basic_unreserved_keyword returns [sstring str]
@@ -1810,6 +1836,8 @@ K_REPLACE: R E P L A C E;
K_DETERMINISTIC: D E T E R M I N I S T I C;
K_JSON: J S O N;
K_EMPTY: E M P T Y;
K_SCYLLA_TIMEUUID_LIST_INDEX: S C Y L L A '_' T I M E U U I D '_' L I S T '_' I N D E X;
K_SCYLLA_COUNTER_SHARD_LIST: S C Y L L A '_' C O U N T E R '_' S H A R D '_' L I S T;

View File

@@ -127,7 +127,11 @@ column_identifier::new_selector_factory(database& db, schema_ptr schema, std::ve
if (!def) {
throw exceptions::invalid_request_exception(sprint("Undefined name %s in selection clause", _text));
}
// Do not allow explicitly selecting hidden columns. We also skip them on
// "SELECT *" (see selection::wildcard()).
if (def->is_view_virtual()) {
throw exceptions::invalid_request_exception(sprint("Undefined name %s in selection clause", _text));
}
return selection::simple_selector::new_factory(def->name_as_text(), add_and_get_index(*def, defs), def->type);
}

View File

@@ -40,6 +40,7 @@
*/
#include <boost/range/adaptor/transformed.hpp>
#include <boost/range/adaptor/filtered.hpp>
#include "cql3/selection/selection.hh"
#include "cql3/selection/selector_factories.hh"
@@ -208,9 +209,17 @@ protected:
::shared_ptr<selection> selection::wildcard(schema_ptr schema) {
auto columns = schema->all_columns_in_select_order();
auto cds = boost::copy_range<std::vector<const column_definition*>>(columns | boost::adaptors::transformed([](const column_definition& c) {
return &c;
}));
// filter out hidden columns, which should not be seen by the
// user when doing "SELECT *". We also disallow selecting them
// individually (see column_identifier::new_selector_factory()).
auto cds = boost::copy_range<std::vector<const column_definition*>>(
columns |
boost::adaptors::filtered([](const column_definition& c) {
return !c.is_view_virtual();
}) |
boost::adaptors::transformed([](const column_definition& c) {
return &c;
}));
return simple_selection::make(schema, std::move(cds), true);
}

View File

@@ -246,18 +246,22 @@ future<shared_ptr<cql_transport::event::schema_change>> alter_table_statement::a
cfm.with_column(column_name->name(), type, _is_static ? column_kind::static_column : column_kind::regular_column);
// Adding a column to a table which has an include all view requires the column to be added to the view
// as well. If the view has a regular base column in its PK, then the column ID needs to be updated in
// view_info; for that, rebuild the schema.
// Adding a column to a base table always requires updating the view
// schemas: If the view includes all columns it should include the new
// column, but if it doesn't, it may need to include the new
// unselected column as a virtual column. The case when it we
// shouldn't add a virtual column is when the view has in its PK one
// of the base's regular columns - but even in this case we need to
// rebuild the view schema, to update the column ID.
if (!_is_static) {
for (auto&& view : cf.views()) {
if (view->view_info()->include_all_columns() || view->view_info()->base_non_pk_column_in_view_pk()) {
schema_builder builder(view);
if (view->view_info()->include_all_columns()) {
builder.with_column(column_name->name(), type);
}
view_updates.push_back(view_ptr(builder.build()));
schema_builder builder(view);
if (view->view_info()->include_all_columns()) {
builder.with_column(column_name->name(), type);
} else if (!view->view_info()->base_non_pk_column_in_view_pk()) {
db::view::create_virtual_column(builder, column_name->name(), type);
}
view_updates.push_back(view_ptr(builder.build()));
}
}
@@ -347,7 +351,8 @@ future<shared_ptr<cql_transport::event::schema_change>> alter_table_statement::a
validate_column_rename(db, *schema, *from, *to);
cfm.with_column_rename(from->name(), to->name());
// If the view includes a renamed column, it must be renamed in the view table and the definition.
// If the view includes a renamed column, it must be renamed in
// the view table and the definition.
for (auto&& view : cf.views()) {
if (view->get_column_definition(from->name())) {
schema_builder builder(view);

View File

@@ -275,6 +275,7 @@ future<shared_ptr<cql_transport::event::schema_change>> create_view_statement::a
std::vector<const column_definition*> missing_pk_columns;
std::vector<const column_definition*> target_non_pk_columns;
std::vector<const column_definition*> unselected_columns;
// We need to include all of the primary key columns from the base table in order to make sure that we do not
// overwrite values in the view. We cannot support "collapsing" the base table into a smaller number of rows in
@@ -292,6 +293,9 @@ future<shared_ptr<cql_transport::event::schema_change>> create_view_statement::a
if (included_def && !def_in_target_pk) {
target_non_pk_columns.push_back(&def);
}
if (!included_def && !def_in_target_pk && !def.is_static()) {
unselected_columns.push_back(&def);
}
if (def.is_primary_key() && !def_in_target_pk) {
missing_pk_columns.push_back(&def);
}
@@ -321,6 +325,19 @@ future<shared_ptr<cql_transport::event::schema_change>> create_view_statement::a
add_columns(target_partition_keys, column_kind::partition_key);
add_columns(target_clustering_keys, column_kind::clustering_key);
add_columns(target_non_pk_columns, column_kind::regular_column);
// Add all unselected columns (base-table columns which are not selected
// in the view) as "virtual columns" - columns which have timestamp and
// ttl information, but an empty value. These are needed to keep view
// rows alive when the base row is alive, even if the view row has no
// data, just a key (see issue #3362). The virtual columns are not needed
// when the view pk adds a regular base column (i.e., has_non_pk_column)
// because in that case, the liveness of that base column is what
// determines the liveness of the view row.
if (!has_non_pk_column) {
for (auto* def : unselected_columns) {
db::view::create_virtual_column(builder, def->name(), def->type);
}
}
_properties.properties()->apply_to_builder(builder, proxy.get_db().local().get_config().extensions());
if (builder.default_time_to_live().count() > 0) {

View File

@@ -49,7 +49,10 @@
#include "types.hh"
static ::shared_ptr<cql3::cql3_type::raw> parse_raw(const sstring& str) {
return cql3::util::do_with_parser(str, std::mem_fn(&cql3_parser::CqlParser::comparatorType));
return cql3::util::do_with_parser(str,
[] (cql3_parser::CqlParser& parser) {
return parser.comparator_type(true);
});
}
data_type db::cql_type_parser::parse(const sstring& keyspace, const sstring& str, lw_shared_ptr<user_types_metadata> user_types) {

View File

@@ -160,7 +160,7 @@ static future<> do_merge_schema(distributed<service::storage_proxy>&, std::vecto
static std::vector<column_definition> create_columns_from_column_rows(
const query::result_set& rows, const sstring& keyspace,
const sstring& table, bool is_super);
const sstring& table, bool is_super, column_view_virtual is_view_virtual);
static std::vector<index_metadata> create_indices_from_index_rows(const query::result_set& rows,
@@ -304,9 +304,20 @@ schema_ptr scylla_tables() {
return schema;
}
schema_ptr columns() {
static thread_local auto schema = [] {
schema_builder builder(make_lw_shared(::schema(generate_legacy_id(NAME, COLUMNS), NAME, COLUMNS,
// The "columns" table lists the definitions of all columns in all tables
// and views. Its schema needs to be identical to the one in Cassandra because
// it is the API through which drivers inspect the list of columns in a table
// (e.g., cqlsh's "DESCRIBE TABLE" and "DESCRIBE MATERIALIZED VIEW" get their
// information from the columns table).
// The "view_virtual_columns" table is an additional table with exactly the
// same schema (both are created by columns_schema()), but has a separate
// list of "virtual" columns. Those are used in materialized views for keeping
// rows without data alive (see issue #3362). These virtual columns cannot be
// listed in the regular "columns" table, otherwise the "DESCRIBE MATERIALIZED
// VIEW" would list them - while it should only list real, selected, columns.
static schema_ptr columns_schema(const char* columns_table_name) {
schema_builder builder(make_lw_shared(::schema(generate_legacy_id(NAME, columns_table_name), NAME, columns_table_name,
// partition key
{{"keyspace_name", utf8_type}},
// clustering key
@@ -326,10 +337,16 @@ schema_ptr columns() {
// comment
"column definitions"
)));
builder.set_gc_grace_seconds(schema_gc_grace);
builder.with_version(generate_schema_version(builder.uuid()));
return builder.build();
}();
builder.set_gc_grace_seconds(schema_gc_grace);
builder.with_version(generate_schema_version(builder.uuid()));
return builder.build();
}
schema_ptr columns() {
static thread_local auto schema = columns_schema(COLUMNS);
return schema;
}
schema_ptr view_virtual_columns() {
static thread_local auto schema = columns_schema(VIEW_VIRTUAL_COLUMNS);
return schema;
}
@@ -1619,6 +1636,9 @@ static schema_mutations make_table_mutations(schema_ptr table, api::timestamp_ty
if (with_columns_and_triggers) {
for (auto&& column : table->v3().all_columns()) {
if (column.is_view_virtual()) {
throw std::logic_error("view_virtual column found in non-view table");
}
add_column_to_schema_mutation(table, column, timestamp, columns_mutation);
}
for (auto&& index : table->indices()) {
@@ -1631,7 +1651,8 @@ static schema_mutations make_table_mutations(schema_ptr table, api::timestamp_ty
}
}
return schema_mutations{std::move(m), std::move(columns_mutation), std::move(indices_mutation), std::move(dropped_columns_mutation),
return schema_mutations{std::move(m), std::move(columns_mutation), stdx::nullopt,
std::move(indices_mutation), std::move(dropped_columns_mutation),
std::move(scylla_tables_mutation)};
}
@@ -1690,6 +1711,7 @@ static void make_update_columns_mutations(schema_ptr old_table,
bool from_thrift,
std::vector<mutation>& mutations) {
mutation columns_mutation(columns(), partition_key::from_singular(*columns(), old_table->ks_name()));
mutation view_virtual_columns_mutation(view_virtual_columns(), partition_key::from_singular(*columns(), old_table->ks_name()));
auto diff = difference(old_table->v3().columns_by_name(), new_table->v3().columns_by_name());
@@ -1701,17 +1723,25 @@ static void make_update_columns_mutations(schema_ptr old_table,
if (from_thrift && !column.is_regular()) {
continue;
}
drop_column_from_schema_mutation(columns(), old_table, column.name_as_text(), timestamp, mutations);
if (column.is_view_virtual()) {
drop_column_from_schema_mutation(view_virtual_columns(), old_table, column.name_as_text(), timestamp, mutations);
} else {
drop_column_from_schema_mutation(columns(), old_table, column.name_as_text(), timestamp, mutations);
}
}
// newly added columns and old columns with updated attributes
for (auto&& name : boost::range::join(diff.entries_differing, diff.entries_only_on_right)) {
const column_definition& column = *new_table->v3().columns_by_name().at(name);
add_column_to_schema_mutation(new_table, column, timestamp, columns_mutation);
if (column.is_view_virtual()) {
add_column_to_schema_mutation(new_table, column, timestamp, view_virtual_columns_mutation);
} else {
add_column_to_schema_mutation(new_table, column, timestamp, columns_mutation);
}
}
mutations.emplace_back(std::move(columns_mutation));
mutations.emplace_back(std::move(view_virtual_columns_mutation));
// dropped columns
auto dc_diff = difference(old_table->dropped_columns(), new_table->dropped_columns());
@@ -1761,7 +1791,11 @@ static void make_drop_table_or_view_mutations(schema_ptr schema_table,
m.partition().apply_delete(*schema_table, ckey, tombstone(timestamp, gc_clock::now()));
mutations.emplace_back(m);
for (auto& column : table_or_view->v3().all_columns()) {
drop_column_from_schema_mutation(columns(), table_or_view, column.name_as_text(), timestamp, mutations);
if (column.is_view_virtual()) {
drop_column_from_schema_mutation(view_virtual_columns(), table_or_view, column.name_as_text(), timestamp, mutations);
} else {
drop_column_from_schema_mutation(columns(), table_or_view, column.name_as_text(), timestamp, mutations);
}
}
for (auto& column : table_or_view->dropped_columns() | boost::adaptors::map_keys) {
drop_column_from_schema_mutation(dropped_columns(), table_or_view, column, timestamp, mutations);
@@ -1796,11 +1830,12 @@ static future<schema_mutations> read_table_mutations(distributed<service::storag
return when_all_succeed(
read_schema_partition_for_table(proxy, s, table.keyspace_name, table.table_name),
read_schema_partition_for_table(proxy, columns(), table.keyspace_name, table.table_name),
read_schema_partition_for_table(proxy, view_virtual_columns(), table.keyspace_name, table.table_name),
read_schema_partition_for_table(proxy, dropped_columns(), table.keyspace_name, table.table_name),
read_schema_partition_for_table(proxy, indexes(), table.keyspace_name, table.table_name),
read_schema_partition_for_table(proxy, scylla_tables(), table.keyspace_name, table.table_name)).then(
[] (mutation cf_m, mutation col_m, mutation dropped_m, mutation idx_m, mutation st_m) {
return schema_mutations{std::move(cf_m), std::move(col_m), std::move(idx_m), std::move(dropped_m), std::move(st_m)};
[] (mutation cf_m, mutation col_m, mutation vv_col_m, mutation dropped_m, mutation idx_m, mutation st_m) {
return schema_mutations{std::move(cf_m), std::move(col_m), std::move(vv_col_m), std::move(idx_m), std::move(dropped_m), std::move(st_m)};
});
#if 0
// FIXME:
@@ -2025,7 +2060,8 @@ schema_ptr create_table_from_mutations(const schema_ctxt& ctxt, schema_mutations
ks_name,
cf_name,/*,
fullRawComparator, */
cf == cf_type::super);
cf == cf_type::super,
column_view_virtual::no);
builder.set_is_dense(is_dense);
@@ -2187,7 +2223,8 @@ static std::vector<column_definition> create_columns_from_column_rows(const quer
const sstring& keyspace,
const sstring& table, /*,
AbstractType<?> rawComparator, */
bool is_super)
bool is_super,
column_view_virtual is_view_virtual)
{
std::vector<column_definition> columns;
for (auto&& row : rows.rows()) {
@@ -2204,7 +2241,7 @@ static std::vector<column_definition> create_columns_from_column_rows(const quer
}
}
columns.emplace_back(name_bytes, type, kind, position);
columns.emplace_back(name_bytes, type, kind, position, is_view_virtual);
}
return columns;
}
@@ -2247,10 +2284,16 @@ view_ptr create_view_from_mutations(const schema_ctxt& ctxt, schema_mutations sm
schema_builder builder{ks_name, cf_name, id};
prepare_builder_from_table_row(ctxt, builder, row);
auto column_defs = create_columns_from_column_rows(query::result_set(sm.columns_mutation()), ks_name, cf_name, false);
auto column_defs = create_columns_from_column_rows(query::result_set(sm.columns_mutation()), ks_name, cf_name, false, column_view_virtual::no);
for (auto&& cdef : column_defs) {
builder.with_column(cdef);
}
if (sm.view_virtual_columns_mutation()) {
column_defs = create_columns_from_column_rows(query::result_set(*sm.view_virtual_columns_mutation()), ks_name, cf_name, false, column_view_virtual::yes);
for (auto&& cdef : column_defs) {
builder.with_column(cdef);
}
}
if (version) {
builder.with_version(*version);
@@ -2320,12 +2363,17 @@ static schema_mutations make_view_mutations(view_ptr view, api::timestamp_type t
mutation columns_mutation(columns(), pkey);
mutation view_virtual_columns_mutation(view_virtual_columns(), pkey);
mutation dropped_columns_mutation(dropped_columns(), pkey);
mutation indices_mutation(indexes(), pkey);
if (with_columns) {
for (auto&& column : view->v3().all_columns()) {
add_column_to_schema_mutation(view, column, timestamp, columns_mutation);
if (column.is_view_virtual()) {
add_column_to_schema_mutation(view, column, timestamp, view_virtual_columns_mutation);
} else {
add_column_to_schema_mutation(view, column, timestamp, columns_mutation);
}
}
for (auto&& e : view->dropped_columns()) {
@@ -2338,7 +2386,8 @@ static schema_mutations make_view_mutations(view_ptr view, api::timestamp_type t
auto scylla_tables_mutation = make_scylla_tables_mutation(view, timestamp);
return schema_mutations{std::move(m), std::move(columns_mutation), std::move(indices_mutation), std::move(dropped_columns_mutation),
return schema_mutations{std::move(m), std::move(columns_mutation), std::move(view_virtual_columns_mutation),
std::move(indices_mutation), std::move(dropped_columns_mutation),
std::move(scylla_tables_mutation)};
}
@@ -2634,7 +2683,7 @@ data_type parse_type(sstring str)
std::vector<schema_ptr> all_tables() {
return {
keyspaces(), tables(), scylla_tables(), columns(), dropped_columns(), triggers(),
views(), indexes(), types(), functions(), aggregates(),
views(), indexes(), types(), functions(), aggregates(), view_virtual_columns()
};
}

View File

@@ -91,8 +91,10 @@ static constexpr auto TYPES = "types";
static constexpr auto FUNCTIONS = "functions";
static constexpr auto AGGREGATES = "aggregates";
static constexpr auto INDEXES = "indexes";
static constexpr auto VIEW_VIRTUAL_COLUMNS = "view_virtual_columns"; // Scylla specific
schema_ptr columns();
schema_ptr view_virtual_columns();
schema_ptr dropped_columns();
schema_ptr indexes();
schema_ptr tables();

View File

@@ -288,50 +288,6 @@ row_marker view_updates::compute_row_marker(const clustering_row& base_row) cons
return cell.is_live_and_has_ttl() ? row_marker(cell.timestamp(), cell.ttl(), cell.expiry()) : row_marker(cell.timestamp());
}
if (_view_info.include_all_columns()) {
return marker;
}
auto timestamp = marker.timestamp();
bool has_non_expiring_live_cell = false;
expiry_opt biggest_expiry;
gc_clock::duration ttl = gc_clock::duration::min();
if (marker.is_expiring()) {
biggest_expiry = marker.expiry();
ttl = marker.ttl();
}
auto maybe_update_expiry_and_ttl = [&] (atomic_cell_view&& cell) {
timestamp = std::max(timestamp, cell.timestamp());
if (cell.is_live_and_has_ttl()) {
if (cell.expiry() >= biggest_expiry.value_or(cell.expiry())) {
biggest_expiry = cell.expiry();
ttl = cell.ttl();
}
} else if (cell.is_live()) {
has_non_expiring_live_cell = true;
}
};
// Iterate over regular cells not in the view, as we already have the timestamps of the included columns.
base_row.cells().for_each_cell([&] (column_id id, const atomic_cell_or_collection& c) {
auto& def = _base->regular_column_at(id);
if (_view_info.view_column(def)) {
return;
}
if (def.is_atomic()) {
maybe_update_expiry_and_ttl(c.as_atomic_cell(def));
} else {
auto ctype = static_pointer_cast<const collection_type_impl>(def.type);
ctype->for_each_cell(c.as_collection_mutation(), maybe_update_expiry_and_ttl);
}
});
if ((marker.is_live() && !marker.is_expiring()) || has_non_expiring_live_cell) {
return row_marker(timestamp);
}
if (biggest_expiry) {
return row_marker(timestamp, ttl, *biggest_expiry);
}
return marker;
}
@@ -376,10 +332,134 @@ static const column_definition* view_column(const schema& base, const schema& vi
return view.get_column_definition(base.regular_column_at(base_id).name());
}
// Utility function for taking an existing cell, and creating a copy with an
// empty value instead of the original value, but with the original liveness
// information (expiration and deletion time) unchanged.
static atomic_cell make_empty(const atomic_cell_view& ac) {
if (ac.is_live_and_has_ttl()) {
return atomic_cell::make_live(*empty_type, ac.timestamp(), bytes_view{}, ac.expiry(), ac.ttl());
} else if (ac.is_live()) {
return atomic_cell::make_live(*empty_type, ac.timestamp(), bytes_view{});
} else {
return atomic_cell::make_dead(ac.timestamp(), ac.deletion_time());
}
}
// Utility function for taking an existing collection which has both keys and
// values (i.e., either a list or map, but not a set), and creating a copy of
// this collection with all the values replaced by empty values.
// The make_empty() function above is used to ensure that liveness information
// is copied unchanged.
static collection_mutation make_empty(
const collection_mutation_view& cm,
const collection_type_impl& ctype) {
collection_type_impl::mutation n;
cm.data.with_linearized([&] (bytes_view bv) {
auto m_view = ctype.deserialize_mutation_form(bv);
n.tomb = m_view.tomb;
for (auto&& c : m_view.cells) {
n.cells.emplace_back(c.first, make_empty(c.second));
}
});
return ctype.serialize_mutation_form(n);
}
// In some cases, we need to copy to a view table even columns which have not
// been SELECTed. For these columns we only need to save liveness information
// (timestamp, deletion, ttl), but not the value. We call these columns
// "virtual columns", and the reason why we need them is explained in
// issue #3362. The following function, maybe_make_virtual() takes a full
// value c (taken from the base table) for the given column col, and if that
// column is a virtual column it modifies c to remove the unwanted value.
// The function create_virtual_column(), below, creates the virtual column in
// the view schema, that maybe_make_virtual() will fill.
static void maybe_make_virtual(atomic_cell_or_collection& c, const column_definition* col) {
if (!col->is_view_virtual()) {
// This is a regular selected column. Leave c untouched.
return;
}
if (col->type->is_atomic()) {
// A virtual cell for an atomic value or frozen collection. Its
// value is empty (of type empty_type).
if (col->type != empty_type) {
throw std::logic_error("Virtual cell has wrong type");
}
c = make_empty(c.as_atomic_cell(*col));
} else {
if (!col->type->is_collection()) {
// TODO: when we support unfrozen UDT (#2201), we will need to
// supported it here too.
throw std::logic_error("Virtual cell is neither atomic nor collection");
}
auto ctype = static_pointer_cast<const collection_type_impl>(col->type);
if (ctype->is_list()) {
// A list has integers as keys, and values (the list's items).
// We just need to build a list with the same keys (and liveness
// information), but empty values.
auto ltype = static_cast<const list_type_impl*>(col->type.get());
if (ltype->get_elements_type() != empty_type) {
throw std::logic_error("Virtual cell has wrong list type");
}
c = make_empty(c.as_collection_mutation(), *ctype);
} else if (ctype->is_map()) {
// A map has keys and values. We just need to build a map with
// the same keys (and liveness information), but empty values.
auto mtype = static_cast<const map_type_impl*>(col->type.get());
if (mtype->get_values_type() != empty_type) {
throw std::logic_error("Virtual cell has wrong map type");
}
c = make_empty(c.as_collection_mutation(), *ctype);
} else if (ctype->is_set()) {
// A set has just keys (and liveness information). We need
// all of it as a virtual column, unfortunately, so we
// leave c unmodified.
} else {
// A collection can't be anything but a list, map or set...
throw std::logic_error("Virtual cell has unexpected collection type");
}
}
}
void create_virtual_column(schema_builder& builder, const bytes& name, const data_type& type) {
if (type->is_atomic()) {
builder.with_column(name, empty_type, column_kind::regular_column, column_view_virtual::yes);
return;
}
// A multi-cell collection (a frozen collection is a single
// cell and handled handled in the is_atomic() case above).
// The virtual version can't be just one cell, it has to be
// itself a collection of cells.
auto ctype = dynamic_pointer_cast<const collection_type_impl>(type);
if (!ctype) {
// TODO: When #2201 is done, we also need to handle here
// unfrozen UDTs.
throw exceptions::invalid_request_exception(sprint("Unsupported unselected multi-cell non-collection column %s for Materialized View", name));
}
if (ctype->is_list()) {
// A list has ints as keys, and values (the list's items).
// We just need these intss, i.e., a list of empty items.
builder.with_column(name, list_type_impl::get_instance(empty_type, true), column_kind::regular_column, column_view_virtual::yes);
} else if (ctype->is_map()) {
// A map has keys and values. We don't need these values,
// and can use empty values instead.
auto mtype = dynamic_pointer_cast<const map_type_impl>(type);
builder.with_column(name, map_type_impl::get_instance(mtype->get_values_type(), empty_type, true), column_kind::regular_column, column_view_virtual::yes);
} else if (ctype->is_set()) {
// A set's cell has nothing beyond the keys, so the
// virtual version of a set is, unfortunately, a complete
// copy of the set.
builder.with_column(name, type, column_kind::regular_column, column_view_virtual::yes);
} else {
// A collection can't be anything but a list, map or set...
abort();
}
}
static void add_cells_to_view(const schema& base, const schema& view, row base_cells, row& view_cells) {
base_cells.for_each_cell([&] (column_id id, atomic_cell_or_collection& c) {
auto* view_col = view_column(base, view, id);
if (view_col && !view_col->is_primary_key()) {
maybe_make_virtual(c, view_col);
view_cells.append_cell(view_col->id, std::move(c));
}
});

View File

@@ -104,6 +104,21 @@ query::clustering_row_ranges calculate_affected_clustering_ranges(
future<> mutate_MV(const dht::token& base_token, std::vector<mutation> mutations, db::view::stats& stats);
/**
* create_virtual_column() adds a "virtual column" to a schema builder.
* The definition of a "virtual column" is based on the given definition
* of a regular column, except that any value types are replaced by the
* empty type - and only the information needed to track column liveness
* is kept: timestamp, deletion, ttl, and keys in maps.
* In some cases we add such virtual columns for unselected columns in
* materialized views, for reasons explained in issue #3362.
* @param builder the schema_builder where we want to add the virtual column.
* @param name the name of the virtual column to be created.
* @param type of the base column for which we want to build a virtual column.
* When type is a multi-cell collection, so will be the virtual column.
*/
void create_virtual_column(schema_builder& builder, const bytes& name, const data_type& type);
}
}

View File

@@ -30,6 +30,7 @@ class schema_mutations {
std::experimental::optional<canonical_mutation> indices_canonical_mutation()[[version 2.0]];
std::experimental::optional<canonical_mutation> dropped_columns_canonical_mutation()[[version 2.0]];
std::experimental::optional<canonical_mutation> scylla_tables_canonical_mutation()[[version 2.0]];
std::experimental::optional<canonical_mutation> view_virtual_columns_canonical_mutation()[[version 2.4]];
};
class schema stub [[writable]] {

View File

@@ -489,11 +489,12 @@ sstring index_metadata::get_default_index_name(const sstring& cf_name,
return cf_name + "_idx";
}
column_definition::column_definition(bytes name, data_type type, column_kind kind, column_id component_index, api::timestamp_type dropped_at)
column_definition::column_definition(bytes name, data_type type, column_kind kind, column_id component_index, column_view_virtual is_view_virtual, api::timestamp_type dropped_at)
: _name(std::move(name))
, _dropped_at(dropped_at)
, _is_atomic(type->is_atomic())
, _is_counter(type->is_counter())
, _is_view_virtual(is_view_virtual)
, type(std::move(type))
, id(component_index)
, kind(kind)
@@ -504,6 +505,9 @@ std::ostream& operator<<(std::ostream& os, const column_definition& cd) {
os << "name=" << cd.name_as_text();
os << ", type=" << cd.type->name();
os << ", kind=" << to_sstring(cd.kind);
if (cd.is_view_virtual()) {
os << ", view_virtual";
}
os << ", componentIndex=" << (cd.has_component_index() ? std::to_string(cd.component_index()) : "null");
os << ", droppedAt=" << cd._dropped_at;
os << "}";
@@ -689,16 +693,16 @@ column_definition& schema_builder::find_column(const cql3::column_identifier& c)
}
schema_builder& schema_builder::with_column(const column_definition& c) {
return with_column(bytes(c.name()), data_type(c.type), column_kind(c.kind), c.position());
return with_column(bytes(c.name()), data_type(c.type), column_kind(c.kind), c.position(), c.view_virtual());
}
schema_builder& schema_builder::with_column(bytes name, data_type type, column_kind kind) {
schema_builder& schema_builder::with_column(bytes name, data_type type, column_kind kind, column_view_virtual is_view_virtual) {
// component_index will be determined by schema cosntructor
return with_column(name, type, kind, 0);
return with_column(name, type, kind, 0, is_view_virtual);
}
schema_builder& schema_builder::with_column(bytes name, data_type type, column_kind kind, column_id component_index) {
_raw._columns.emplace_back(name, type, kind, component_index);
schema_builder& schema_builder::with_column(bytes name, data_type type, column_kind kind, column_id component_index, column_view_virtual is_view_virtual) {
_raw._columns.emplace_back(name, type, kind, component_index, is_view_virtual);
if (type->is_multi_cell()) {
with_collection(name, type);
} else if (type->is_counter()) {

View File

@@ -75,6 +75,8 @@ class extensions;
// make sure these match the order we like columns back from schema
enum class column_kind { partition_key, clustering_key, static_column, regular_column };
enum class column_view_virtual { no, yes };
sstring to_sstring(column_kind k);
bool is_compatible(column_kind k1, column_kind k2);
@@ -207,6 +209,7 @@ private:
api::timestamp_type _dropped_at;
bool _is_atomic;
bool _is_counter;
column_view_virtual _is_view_virtual;
struct thrift_bits {
thrift_bits()
@@ -221,6 +224,7 @@ private:
public:
column_definition(bytes name, data_type type, column_kind kind,
column_id component_index = 0,
column_view_virtual view_virtual = column_view_virtual::no,
api::timestamp_type dropped_at = api::missing_timestamp);
data_type type;
@@ -241,6 +245,12 @@ public:
bool is_atomic() const { return _is_atomic; }
bool is_multi_cell() const { return !_is_atomic; }
bool is_counter() const { return _is_counter; }
// "virtual columns" appear in a materialized view as placeholders for
// unselected columns, with liveness information but without data, and
// allow view rows to remain alive despite having no data (issue #3362).
// These columns should be hidden from the user's SELECT queries.
bool is_view_virtual() const { return _is_view_virtual == column_view_virtual::yes; }
column_view_virtual view_virtual() const { return _is_view_virtual; }
const sstring& name_as_text() const;
const bytes& name() const;
sstring name_as_cql_string() const;

View File

@@ -238,8 +238,8 @@ public:
column_definition& find_column(const cql3::column_identifier&);
schema_builder& with_column(const column_definition& c);
schema_builder& with_column(bytes name, data_type type, column_kind kind = column_kind::regular_column);
schema_builder& with_column(bytes name, data_type type, column_kind kind, column_id component_index);
schema_builder& with_column(bytes name, data_type type, column_kind kind = column_kind::regular_column, column_view_virtual view_virtual = column_view_virtual::no);
schema_builder& with_column(bytes name, data_type type, column_kind kind, column_id component_index, column_view_virtual view_virtual = column_view_virtual::no);
schema_builder& without_column(bytes name);
schema_builder& without_column(sstring name, api::timestamp_type timestamp);
schema_builder& without_column(sstring name, data_type, api::timestamp_type timestamp);

View File

@@ -29,9 +29,11 @@ schema_mutations::schema_mutations(canonical_mutation columnfamilies,
bool is_view,
stdx::optional<canonical_mutation> indices,
stdx::optional<canonical_mutation> dropped_columns,
stdx::optional<canonical_mutation> scylla_tables)
stdx::optional<canonical_mutation> scylla_tables,
stdx::optional<canonical_mutation> view_virtual_columns)
: _columnfamilies(columnfamilies.to_mutation(is_view ? db::schema_tables::views() : db::schema_tables::tables()))
, _columns(columns.to_mutation(db::schema_tables::columns()))
, _view_virtual_columns(view_virtual_columns ? mutation_opt{view_virtual_columns.value().to_mutation(db::schema_tables::view_virtual_columns())} : stdx::nullopt)
, _indices(indices ? mutation_opt{indices.value().to_mutation(db::schema_tables::indexes())} : stdx::nullopt)
, _dropped_columns(dropped_columns ? mutation_opt{dropped_columns.value().to_mutation(db::schema_tables::dropped_columns())} : stdx::nullopt)
, _scylla_tables(scylla_tables ? mutation_opt{scylla_tables.value().to_mutation(db::schema_tables::scylla_tables())} : stdx::nullopt)
@@ -40,6 +42,9 @@ schema_mutations::schema_mutations(canonical_mutation columnfamilies,
void schema_mutations::copy_to(std::vector<mutation>& dst) const {
dst.push_back(_columnfamilies);
dst.push_back(_columns);
if (_view_virtual_columns) {
dst.push_back(*_view_virtual_columns);
}
if (_indices) {
dst.push_back(*_indices);
}
@@ -68,6 +73,9 @@ table_schema_version schema_mutations::digest() const {
md5_hasher h;
db::schema_tables::feed_hash_for_schema_digest(h, _columnfamilies);
db::schema_tables::feed_hash_for_schema_digest(h, _columns);
if (_view_virtual_columns && !_view_virtual_columns->partition().empty()) {
db::schema_tables::feed_hash_for_schema_digest(h, *_view_virtual_columns);
}
if (_indices && !_indices->partition().empty()) {
db::schema_tables::feed_hash_for_schema_digest(h, *_indices);
}
@@ -94,6 +102,7 @@ static mutation_opt compact(const mutation& m) {
bool schema_mutations::operator==(const schema_mutations& other) const {
return compact(_columnfamilies) == compact(other._columnfamilies)
&& compact(_columns) == compact(other._columns)
&& compact(_view_virtual_columns) == compact(other._view_virtual_columns)
&& compact(_indices) == compact(other._indices)
&& compact(_dropped_columns) == compact(other._dropped_columns)
&& compact(_scylla_tables) == compact(other._scylla_tables)
@@ -105,7 +114,8 @@ bool schema_mutations::operator!=(const schema_mutations& other) const {
}
bool schema_mutations::live() const {
return _columnfamilies.live_row_count() > 0 || _columns.live_row_count() > 0;
return _columnfamilies.live_row_count() > 0 || _columns.live_row_count() > 0 ||
(_view_virtual_columns && _view_virtual_columns->live_row_count() > 0);
}
bool schema_mutations::is_view() const {

View File

@@ -31,14 +31,16 @@
class schema_mutations {
mutation _columnfamilies;
mutation _columns;
mutation_opt _view_virtual_columns;
mutation_opt _indices;
mutation_opt _dropped_columns;
mutation_opt _scylla_tables;
public:
schema_mutations(mutation columnfamilies, mutation columns, mutation_opt indices, mutation_opt dropped_columns,
schema_mutations(mutation columnfamilies, mutation columns, mutation_opt view_virtual_columns, mutation_opt indices, mutation_opt dropped_columns,
mutation_opt scylla_tables)
: _columnfamilies(std::move(columnfamilies))
, _columns(std::move(columns))
, _view_virtual_columns(std::move(view_virtual_columns))
, _indices(std::move(indices))
, _dropped_columns(std::move(dropped_columns))
, _scylla_tables(std::move(scylla_tables))
@@ -48,7 +50,8 @@ public:
bool is_view,
stdx::optional<canonical_mutation> indices,
stdx::optional<canonical_mutation> dropped_columns,
stdx::optional<canonical_mutation> scylla_tables);
stdx::optional<canonical_mutation> scylla_tables,
stdx::optional<canonical_mutation> view_virtual_columns);
schema_mutations(schema_mutations&&) = default;
schema_mutations& operator=(schema_mutations&&) = default;
@@ -65,6 +68,10 @@ public:
return _columns;
}
const mutation_opt& view_virtual_columns_mutation() const {
return _view_virtual_columns;
}
const mutation_opt& scylla_tables() const {
return _scylla_tables;
}
@@ -88,6 +95,13 @@ public:
return canonical_mutation(_columns);
}
stdx::optional<canonical_mutation> view_virtual_columns_canonical_mutation() const {
if (_view_virtual_columns) {
return canonical_mutation(*_view_virtual_columns);
}
return {};
}
stdx::optional<canonical_mutation> indices_canonical_mutation() const {
if (_indices) {
return canonical_mutation(*_indices);

View File

@@ -1216,3 +1216,345 @@ SEASTAR_TEST_CASE(test_marker_timestamp_is_not_shadowed_by_previous_updatewith_f
});
}, cfg);
}
// A reproducer for issue #3362, not involving TTLs.
// The test involves a view that selects no column except the base's primary
// key, so view rows contain no cells besides a row marker, so as a base
// row appears and disappears as we update and delete individual cells in
// that row, we need to insert and delete the row marker with varying
// timestamps to make sure the view row appears and disappears as needed.
// But as we shall see, after enough trickery, we run out of timestamps
// to use to revive the row marker, and fail to revive it. So to fix
// issue #3362, we needed to remember all cells separately ("virtual
// cells").
SEASTAR_TEST_CASE(test_3362_no_ttls) {
return do_with_cql_env_thread([] (auto& e) {
e.execute_cql("create table cf (p int, c int, a int, b int, primary key (p, c))").get();
e.execute_cql("create materialized view vcf as select p, c from cf "
"where p is not null and c is not null "
"primary key (p, c)").get();
// In row p=1 c=1, insert two cells - b=1 at timestamp 10, a=1 at timestamp 20:
BOOST_TEST_PASSPOINT();
e.execute_cql("update cf using timestamp 10 set b = 1 where p = 1 and c = 1").get();
eventually([&] {
auto msg = e.execute_cql("select * from vcf where p = 1 and c = 1").get0();
assert_that(msg).is_rows().with_rows({{ {int32_type->decompose(1)}, {int32_type->decompose(1)} }});
});
BOOST_TEST_PASSPOINT();
e.execute_cql("update cf using timestamp 20 set a = 1 where p = 1 and c = 1").get();
eventually([&] {
auto msg = e.execute_cql("select * from vcf where p = 1 and c = 1").get0();
assert_that(msg).is_rows().with_rows({{ {int32_type->decompose(1)}, {int32_type->decompose(1)} }});
});
// Delete just a=1 (with timestamp 21). The base row will still exist (with b=1),
// and accordingly the view row too:
BOOST_TEST_PASSPOINT();
e.execute_cql("delete a from cf using timestamp 21 where p = 1 and c = 1").get();
eventually([&] {
auto msg = e.execute_cql("select * from vcf where p = 1 and c = 1").get0();
assert_that(msg).is_rows().with_rows({{ {int32_type->decompose(1)}, {int32_type->decompose(1)} }});
});
// At this point, we still have the base row with b=1 at timestamp 10
// (and a=1 was deleted at timestamp 21). If we delete the b=1 at
// timestamp 11, nothing will remain in the base row, and the view
// row should disappear as well:
BOOST_TEST_PASSPOINT();
e.execute_cql("delete b from cf using timestamp 11 where p = 1 and c = 1").get();
eventually([&] {
auto msg = e.execute_cql("select * from vcf where p = 1 and c = 1").get0();
assert_that(msg).is_rows().is_empty();
});
// Now we finally reproduce #3362: We now add b=1 again, at timestamp
// 12 (it was earlier deleted in timestamp 11). The base row is live
// again, and so should the view row.
// With issue #3362, the view row failed to become alive. The reason
// is that to make the above is_empty() succeed, the implementation
// deletes the row marker with timestamp 21 (the maximal timestamp
// seen in the row). But now, we add a row marker again with the same
// timestamp 21, but the deletion wins so the row marker is still
// missing. (note that had data won over deletions, the is_empty()
// test above would have failed instead).
BOOST_TEST_PASSPOINT();
e.execute_cql("update cf using timestamp 12 set b = 1 where p = 1 and c = 1").get();
eventually([&] {
auto msg = e.execute_cql("select * from vcf where p = 1 and c = 1").get0();
assert_that(msg).is_rows().with_rows({{ {int32_type->decompose(1)}, {int32_type->decompose(1)} }});
});
});
}
// This is another reproducer for issue #3362, using TTLs instead of
// numerous back-and-forth additions and deletions.
SEASTAR_TEST_CASE(test_3362_with_ttls) {
return do_with_cql_env_thread([] (auto& e) {
e.execute_cql("create table cf (p int, c int, a int, b int, primary key (p, c))").get();
e.execute_cql("create materialized view vcf as select p, c from cf "
"where p is not null and c is not null "
"primary key (p, c)").get();
// In row p=1 c=1, insert two cells - a=1 with ttl, and b=1 without
// ttl. The ttl'ed cell is inserted first, with a newer timestamp.
// The problem is that the view row's marker gets, with a new
// timestamp, a ttl. Then, when we go to add another column with an
// older timestamp, and try to set the row marker without a
// ttl - the older timestamp of this update looses, and we wrongly
// remain with a ttl on the view row marker.
BOOST_TEST_PASSPOINT();
e.execute_cql("update cf using timestamp 2 and ttl 5 set a = 1 where p = 1 and c = 1").get();
eventually([&] {
auto msg = e.execute_cql("select * from vcf where p = 1 and c = 1").get0();
assert_that(msg).is_rows().with_rows({{ {int32_type->decompose(1)}, {int32_type->decompose(1)} }});
});
BOOST_TEST_PASSPOINT();
e.execute_cql("update cf using timestamp 1 set b = 1 where p = 1 and c = 1").get();
eventually([&] {
auto msg = e.execute_cql("select * from vcf where p = 1 and c = 1").get0();
assert_that(msg).is_rows().with_rows({{ {int32_type->decompose(1)}, {int32_type->decompose(1)} }});
});
// Pass the time 6 seconds forward. Cell 'a' will have expired, but
// cell 'b' will still exist, so the base row still exists and the
// corresponding view row should also exist too.
forward_jump_clocks(6s);
BOOST_TEST_PASSPOINT();
// verify that the base row still exists (cell b didn't expire)
eventually([&] {
auto msg = e.execute_cql("select * from cf where p = 1 and c = 1").get0();
assert_that(msg).is_rows().with_rows({{ {int32_type->decompose(1)}, {int32_type->decompose(1)}, {}, {{int32_type->decompose(1)}} }});
});
BOOST_TEST_PASSPOINT();
// verify that the view row still exists too.
// This check failing is issue #3362.
eventually([&] {
auto msg = e.execute_cql("select * from vcf where p = 1 and c = 1").get0();
assert_that(msg).is_rows().with_rows({{ {int32_type->decompose(1)}, {int32_type->decompose(1)} }});
});
});
}
// The following are more test for issue #3362, same as test_3362_not_ttls
// and test_3362_with_ttls, just with a collection with items "1" and "2"
// instead of separate columns a and b. For brevity, comments were removed,
// so refer to the comments in the original code above.
enum class collection_kind { set, list, map };
void do_test_3362_no_ttls_with_collections(cql_test_env& e, collection_kind t) {
sstring type, pref, suf;
switch(t) {
case collection_kind::set:
type = "set<int>";
pref = "{";
suf = "}";
break;
case collection_kind::list:
type = "list<int>";
pref = "[";
suf = "]";
break;
case collection_kind::map:
type = "map<int, int>";
pref = "{";
suf = " : 17}";
break;
}
e.execute_cql(sprint("create table cf (p int, c int, a %s, primary key (p, c))", type)).get();
e.execute_cql("create materialized view vcf as select p, c from cf "
"where p is not null and c is not null "
"primary key (p, c)").get();
e.execute_cql(sprint("update cf using timestamp 10 set a = a + %s2%s where p = 1 and c = 1", pref, suf)).get();
eventually([&] {
auto msg = e.execute_cql("select * from vcf where p = 1 and c = 1").get0();
assert_that(msg).is_rows().with_rows({{ {int32_type->decompose(1)}, {int32_type->decompose(1)} }});
});
e.execute_cql(sprint("update cf using timestamp 20 set a = a + %s1%s where p = 1 and c = 1", pref, suf)).get();
eventually([&] {
auto msg = e.execute_cql("select * from vcf where p = 1 and c = 1").get0();
assert_that(msg).is_rows().with_rows({{ {int32_type->decompose(1)}, {int32_type->decompose(1)} }});
});
if (t == collection_kind::map) {
e.execute_cql("delete a[1] from cf using timestamp 21 where p = 1 and c = 1").get();
} else {
e.execute_cql(sprint("update cf using timestamp 21 set a = a - %s1%s where p = 1 and c = 1", pref, suf)).get();
}
eventually([&] {
auto msg = e.execute_cql("select * from vcf where p = 1 and c = 1").get0();
assert_that(msg).is_rows().with_rows({{ {int32_type->decompose(1)}, {int32_type->decompose(1)} }});
});
if (t == collection_kind::map) {
e.execute_cql("delete a[2] from cf using timestamp 11 where p = 1 and c = 1").get();
} else {
e.execute_cql(sprint("update cf using timestamp 11 set a = a - %s2%s where p = 1 and c = 1", pref, suf)).get();
}
eventually([&] {
auto msg = e.execute_cql("select * from vcf where p = 1 and c = 1").get0();
assert_that(msg).is_rows().is_empty();
});
e.execute_cql(sprint("update cf using timestamp 12 set a = a + %s2%s where p = 1 and c = 1", pref, suf)).get();
eventually([&] {
auto msg = e.execute_cql("select * from vcf where p = 1 and c = 1").get0();
assert_that(msg).is_rows().with_rows({{ {int32_type->decompose(1)}, {int32_type->decompose(1)} }});
});
}
SEASTAR_TEST_CASE(test_3362_no_ttls_with_set) {
return do_with_cql_env_thread([] (auto& e) {
do_test_3362_no_ttls_with_collections(e, collection_kind::set);
});
}
SEASTAR_TEST_CASE(test_3362_no_ttls_with_list) {
return do_with_cql_env_thread([] (auto& e) {
do_test_3362_no_ttls_with_collections(e, collection_kind::list);
});
}
SEASTAR_TEST_CASE(test_3362_no_ttls_with_map) {
return do_with_cql_env_thread([] (auto& e) {
do_test_3362_no_ttls_with_collections(e, collection_kind::map);
});
}
void do_test_3362_with_ttls_with_collections(cql_test_env& e, collection_kind t) {
sstring type, pref, suf;
switch(t) {
case collection_kind::set:
type = "set<int>";
pref = "{";
suf = "}";
break;
case collection_kind::list:
type = "list<int>";
pref = "[";
suf = "]";
break;
case collection_kind::map:
type = "map<int, int>";
pref = "{";
suf = " : 17}";
break;
}
e.execute_cql(sprint("create table cf (p int, c int, a %s, primary key (p, c))", type)).get();
e.execute_cql("create materialized view vcf as select p, c from cf "
"where p is not null and c is not null "
"primary key (p, c)").get();
e.execute_cql(sprint("update cf using timestamp 2 and ttl 5 set a = a + %s1%s where p = 1 and c = 1", pref, suf)).get();
eventually([&] {
auto msg = e.execute_cql("select * from vcf where p = 1 and c = 1").get0();
assert_that(msg).is_rows().with_rows({{ {int32_type->decompose(1)}, {int32_type->decompose(1)} }});
});
e.execute_cql(sprint("update cf using timestamp 1 set a = a + %s2%s where p = 1 and c = 1", pref, suf)).get();
eventually([&] {
auto msg = e.execute_cql("select * from vcf where p = 1 and c = 1").get0();
assert_that(msg).is_rows().with_rows({{ {int32_type->decompose(1)}, {int32_type->decompose(1)} }});
});
forward_jump_clocks(6s);
eventually([&] {
auto msg = e.execute_cql("select * from vcf where p = 1 and c = 1").get0();
assert_that(msg).is_rows().with_rows({{ {int32_type->decompose(1)}, {int32_type->decompose(1)} }});
});
}
SEASTAR_TEST_CASE(test_3362_with_ttls_with_set) {
return do_with_cql_env_thread([] (auto& e) {
do_test_3362_with_ttls_with_collections(e, collection_kind::set);
});
}
SEASTAR_TEST_CASE(test_3362_with_ttls_with_list) {
return do_with_cql_env_thread([] (auto& e) {
do_test_3362_with_ttls_with_collections(e, collection_kind::list);
});
}
SEASTAR_TEST_CASE(test_3362_with_ttls_with_map) {
return do_with_cql_env_thread([] (auto& e) {
do_test_3362_with_ttls_with_collections(e, collection_kind::map);
});
}
// This is a version of test_3362_with_ttls with frozen collection fields
// instead of integer fields in test_3362_with_ttls. The intention is to
// verify that we properly fixed #3362 in this case - by replacing the
// frozen collection by a single virtual cell, not a collection.
SEASTAR_TEST_CASE(test_3362_with_ttls_frozen) {
return do_with_cql_env_thread([] (auto& e) {
e.execute_cql("create table cf (p int, c int, a frozen<set<int>>, b frozen<set<int>>, primary key (p, c))").get();
e.execute_cql("create materialized view vcf as select p, c from cf "
"where p is not null and c is not null "
"primary key (p, c)").get();
BOOST_TEST_PASSPOINT();
e.execute_cql("update cf using timestamp 2 and ttl 5 set a = {1,2} where p = 1 and c = 1").get();
eventually([&] {
auto msg = e.execute_cql("select * from vcf where p = 1 and c = 1").get0();
assert_that(msg).is_rows().with_rows({{ {int32_type->decompose(1)}, {int32_type->decompose(1)} }});
});
BOOST_TEST_PASSPOINT();
e.execute_cql("update cf using timestamp 1 set b = {3,4} where p = 1 and c = 1").get();
eventually([&] {
auto msg = e.execute_cql("select * from vcf where p = 1 and c = 1").get0();
assert_that(msg).is_rows().with_rows({{ {int32_type->decompose(1)}, {int32_type->decompose(1)} }});
});
forward_jump_clocks(6s);
BOOST_TEST_PASSPOINT();
eventually([&] {
auto msg = e.execute_cql("select * from vcf where p = 1 and c = 1").get0();
assert_that(msg).is_rows().with_rows({{ {int32_type->decompose(1)}, {int32_type->decompose(1)} }});
});
});
}
// This is a version of test_3362_with_ttls with the added twist that the
// unselected column involved did not exist when the base table and view
// were originally created, but only added later with an "alter table".
// For this test to work, "alter table" will need to add the virtual
// columns in the view table for the newly created unselected column in
// the base table.
SEASTAR_TEST_CASE(test_3362_with_ttls_alter_add) {
return do_with_cql_env_thread([] (auto& e) {
e.execute_cql("create table cf (p int, c int, primary key (p, c))").get();
e.execute_cql("create materialized view vcf as select p, c from cf "
"where p is not null and c is not null "
"primary key (p, c)").get();
// Add with "alter table" two additional columns to the base table -
// a and b. These are not selected in the materialized view, and we
// want to check that they are treated like unselected columns
// (namely, virtual columns are added to the view).
e.execute_cql("alter table cf add a int").get();
e.execute_cql("alter table cf add b int").get();
BOOST_TEST_PASSPOINT();
e.execute_cql("update cf using timestamp 2 and ttl 5 set a = 1 where p = 1 and c = 1").get();
eventually([&] {
auto msg = e.execute_cql("select * from vcf where p = 1 and c = 1").get0();
assert_that(msg).is_rows().with_rows({{ {int32_type->decompose(1)}, {int32_type->decompose(1)} }});
});
BOOST_TEST_PASSPOINT();
e.execute_cql("update cf using timestamp 1 set b = 1 where p = 1 and c = 1").get();
eventually([&] {
auto msg = e.execute_cql("select * from vcf where p = 1 and c = 1").get0();
assert_that(msg).is_rows().with_rows({{ {int32_type->decompose(1)}, {int32_type->decompose(1)} }});
});
forward_jump_clocks(6s);
BOOST_TEST_PASSPOINT();
eventually([&] {
auto msg = e.execute_cql("select * from cf where p = 1 and c = 1").get0();
assert_that(msg).is_rows().with_rows({{ {int32_type->decompose(1)}, {int32_type->decompose(1)}, {}, {{int32_type->decompose(1)}} }});
});
BOOST_TEST_PASSPOINT();
eventually([&] {
auto msg = e.execute_cql("select * from vcf where p = 1 and c = 1").get0();
assert_that(msg).is_rows().with_rows({{ {int32_type->decompose(1)}, {int32_type->decompose(1)} }});
});
});
}
// test_3362_with_ttls_alter_add() above is about handling changes to virtual
// columns as the base table columns change, but only for the "add" case.
// Theoretically we could have had problems in the "drop" and "rename" cases
// as well, but today, those are not supported:
// 1. Today we do not allow "alter table drop" to drop any column from a base
// table with views - even unselected columns.
// If we every do allow this, we need to also check that we drop the
// virtual column from the view.
// 2. Today we do not allow "alter table rename" to rename any non-pk
// column, so unselected columns also cannot be renamed. If this
// limitation is ever lifted, we will need to check that if we
// rename an unselected base column, the virtual column in the view is
// also renamed.

View File

@@ -3298,3 +3298,52 @@ SEASTAR_TEST_CASE(test_alter_table_with_updates) {
});
});
}
// Test that a regular column which we did not add to the view is really
// not in the view. Even if to fix issue #3362 we add "virtual cells"
// for the unselected columns, those should not be visible to the end-user
// of the view table.
SEASTAR_TEST_CASE(test_unselected_column) {
return do_with_cql_env_thread([] (auto& e) {
e.execute_cql("create table cf (p int, c int, x int, y list<int>, z set<int>, w map<int,int>, primary key (p, c))").get();
e.execute_cql("create materialized view vcf as select p, c from cf "
"where p is not null and c is not null "
"primary key (c, p)").get();
e.execute_cql("insert into cf (p, c, x) values (1, 2, 3)").get();
BOOST_TEST_PASSPOINT();
auto msg = e.execute_cql("select * from cf").get0();
assert_that(msg).is_rows().with_size(1)
.with_row({{int32_type->decompose(1)}, {int32_type->decompose(2)}, {}, {int32_type->decompose(3)}, {}, {}});
BOOST_TEST_PASSPOINT();
// Check that when we ask for all of vcf's columns, we only get the
// ones we actually selected - c and p, not x, y, z, or w:
eventually([&] {
auto msg = e.execute_cql("select * from vcf").get0();
assert_that(msg).is_rows().with_size(1)
.with_row({{int32_type->decompose(2)}, {int32_type->decompose(1)}});
});
// Check that we cannot explicitly select the x, y, z or w columns in
// vcf as they are not one of the columns we selected for the view.
try {
e.execute_cql("select x from vcf").get();
BOOST_ASSERT(false);
} catch (exceptions::invalid_request_exception&) {
// we expect: exceptions::invalid_request_exception: Undefined name x in selection clause
}
try {
e.execute_cql("select y from vcf").get();
BOOST_ASSERT(false);
} catch (exceptions::invalid_request_exception&) {
}
try {
e.execute_cql("select z from vcf").get();
BOOST_ASSERT(false);
} catch (exceptions::invalid_request_exception&) {
}
try {
e.execute_cql("select w from vcf").get();
BOOST_ASSERT(false);
} catch (exceptions::invalid_request_exception&) {
}
});
}