Files
scylladb/cdc/split.cc
Piotr Dulikowski 204e204586 cdc: do not attempt to log empty mutations
It is possible to produce an empty mutation using CQL. For example, the
following query:

DELETE FROM ks.tbl WHERE pk = 0 AND ck < 1 AND ck > 2;

will attempt to delete from an empty range of rows. This is translated
to the following mutation:

{ks.tbl {key: pk{000400000000}, token:-3485513579396041028}
 {mutation_partition:
  static: cont=1 {row: },
  clustered: {}}}

Such mutation does not contain any timestamp, therefore it is difficult
to determine what timestamp was used while making the query. This is
problematic for CDC, because an entry in CDC log should be written with
the same timestamp as a part of the mutation.

Because an empty mutation does not modify the table in any way, we can
safely skip logging such mutations in CDC and still preserve the
ability to reconstruct the current state of the base table from full
CDC log.

Tests: unit(dev)
2020-03-05 08:32:54 +01:00

453 lines
16 KiB
C++

/*
* Copyright (C) 2020 ScyllaDB
*/
/*
* This file is part of Scylla.
*
* Scylla is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Scylla is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
*/
#include "mutation.hh"
#include "schema.hh"
#include "split.hh"
struct atomic_column_update {
column_id id;
atomic_cell cell;
};
// see the comment inside `clustered_row_insert` for motivation for separating
// nonatomic deletions from nonatomic updates
struct nonatomic_column_deletion {
column_id id;
tombstone t;
};
struct nonatomic_column_update {
column_id id;
utils::chunked_vector<std::pair<bytes, atomic_cell>> cells;
};
struct static_row_update {
gc_clock::duration ttl;
std::vector<atomic_column_update> atomic_entries;
std::vector<nonatomic_column_deletion> nonatomic_deletions;
std::vector<nonatomic_column_update> nonatomic_updates;
};
struct clustered_row_insert {
gc_clock::duration ttl;
clustering_key key;
row_marker marker;
std::vector<atomic_column_update> atomic_entries;
std::vector<nonatomic_column_deletion> nonatomic_deletions;
// INSERTs can't express updates of individual cells inside a non-atomic
// (without deleting the entire field first), so no `nonatomic_updates` field
// overwriting a nonatomic column inside an INSERT will be split into two changes:
// one with a nonatomic deletion, and one with a nonatomic update
};
struct clustered_row_update {
gc_clock::duration ttl;
clustering_key key;
std::vector<atomic_column_update> atomic_entries;
std::vector<nonatomic_column_deletion> nonatomic_deletions;
std::vector<nonatomic_column_update> nonatomic_updates;
};
struct clustered_row_deletion {
clustering_key key;
tombstone t;
};
struct clustered_range_deletion {
range_tombstone rt;
};
struct partition_deletion {
tombstone t;
};
struct batch {
std::vector<static_row_update> static_updates;
std::vector<clustered_row_insert> clustered_inserts;
std::vector<clustered_row_update> clustered_updates;
std::vector<clustered_row_deletion> clustered_row_deletions;
std::vector<clustered_range_deletion> clustered_range_deletions;
std::optional<partition_deletion> partition_deletions;
};
using set_of_changes = std::map<api::timestamp_type, batch>;
struct row_update {
std::vector<atomic_column_update> atomic_entries;
std::vector<nonatomic_column_deletion> nonatomic_deletions;
std::vector<nonatomic_column_update> nonatomic_updates;
};
static
std::map<std::pair<api::timestamp_type, gc_clock::duration>, row_update>
extract_row_updates(const row& r, column_kind ckind, const schema& schema) {
std::map<std::pair<api::timestamp_type, gc_clock::duration>, row_update> result;
r.for_each_cell([&] (column_id id, const atomic_cell_or_collection& cell) {
auto& cdef = schema.column_at(ckind, id);
if (cdef.is_atomic()) {
auto view = cell.as_atomic_cell(cdef);
auto timestamp_and_ttl = std::pair(
view.timestamp(),
view.is_live_and_has_ttl() ? view.ttl() : gc_clock::duration(0)
);
result[timestamp_and_ttl].atomic_entries.push_back({id, atomic_cell(*cdef.type, view)});
return;
}
cell.as_collection_mutation().with_deserialized(*cdef.type, [&] (collection_mutation_view_description mview) {
auto desc = mview.materialize(*cdef.type);
for (auto& [k, v]: desc.cells) {
auto timestamp_and_ttl = std::pair(
v.timestamp(),
v.is_live_and_has_ttl() ? v.ttl() : gc_clock::duration(0)
);
auto& updates = result[timestamp_and_ttl].nonatomic_updates;
if (updates.empty() || updates.back().id != id) {
updates.push_back({id, {}});
}
updates.back().cells.push_back({std::move(k), std::move(v)});
}
if (desc.tomb) {
auto timestamp_and_ttl = std::pair(desc.tomb.timestamp, gc_clock::duration(0));
result[timestamp_and_ttl].nonatomic_deletions.push_back({id, desc.tomb});
}
});
});
return result;
};
set_of_changes extract_changes(const mutation& base_mutation, const schema& base_schema) {
set_of_changes res;
auto& p = base_mutation.partition();
auto sr_updates = extract_row_updates(p.static_row().get(), column_kind::static_column, base_schema);
for (auto& [k, up]: sr_updates) {
auto [timestamp, ttl] = k;
res[timestamp].static_updates.push_back({
ttl,
std::move(up.atomic_entries),
std::move(up.nonatomic_deletions),
std::move(up.nonatomic_updates)
});
}
for (const rows_entry& cr : p.clustered_rows()) {
auto cr_updates = extract_row_updates(cr.row().cells(), column_kind::regular_column, base_schema);
const auto& marker = cr.row().marker();
auto marker_timestamp = marker.timestamp();
auto marker_ttl = marker.is_expiring() ? marker.ttl() : gc_clock::duration(0);
if (marker.is_live()) {
// make sure that an entry corresponding to the row marker's timestamp and ttl is in the map
(void)cr_updates[std::pair(marker_timestamp, marker_ttl)];
}
auto is_insert = [&] (api::timestamp_type timestamp, gc_clock::duration ttl) {
if (!marker.is_live()) {
return false;
}
return timestamp == marker_timestamp && ttl == marker_ttl;
};
for (auto& [k, up]: cr_updates) {
auto [timestamp, ttl] = k;
if (is_insert(timestamp, ttl)) {
res[timestamp].clustered_inserts.push_back({
ttl,
cr.key(),
marker,
std::move(up.atomic_entries),
std::move(up.nonatomic_deletions)
});
if (!up.nonatomic_updates.empty()) {
// nonatomic updates cannot be expressed with an INSERT.
res[timestamp].clustered_updates.push_back({
ttl,
cr.key(),
{},
{},
std::move(up.nonatomic_updates)
});
}
} else {
res[timestamp].clustered_updates.push_back({
ttl,
cr.key(),
std::move(up.atomic_entries),
std::move(up.nonatomic_deletions),
std::move(up.nonatomic_updates)
});
}
}
auto row_tomb = cr.row().deleted_at().regular();
if (row_tomb) {
res[row_tomb.timestamp].clustered_row_deletions.push_back({cr.key(), row_tomb});
}
}
for (const auto& rt: p.row_tombstones()) {
if (rt.tomb.timestamp != api::missing_timestamp) {
res[rt.tomb.timestamp].clustered_range_deletions.push_back({rt});
}
}
auto partition_tomb_timestamp = p.partition_tombstone().timestamp;
if (partition_tomb_timestamp != api::missing_timestamp) {
res[partition_tomb_timestamp].partition_deletions = {p.partition_tombstone()};
}
return res;
}
namespace cdc {
bool should_split(const mutation& base_mutation, const schema& base_schema) {
auto& p = base_mutation.partition();
api::timestamp_type found_ts = api::missing_timestamp;
std::optional<gc_clock::duration> found_ttl; // 0 = "no ttl"
auto check_or_set = [&] (api::timestamp_type ts, gc_clock::duration ttl) {
if (found_ts != api::missing_timestamp && found_ts != ts) {
return true;
}
found_ts = ts;
if (found_ttl && *found_ttl != ttl) {
return true;
}
found_ttl = ttl;
return false;
};
bool had_static_row = false;
bool should_split = false;
p.static_row().get().for_each_cell([&] (column_id id, const atomic_cell_or_collection& cell) {
had_static_row = true;
auto& cdef = base_schema.column_at(column_kind::static_column, id);
if (cdef.is_atomic()) {
auto view = cell.as_atomic_cell(cdef);
if (check_or_set(view.timestamp(), view.is_live_and_has_ttl() ? view.ttl() : gc_clock::duration(0))) {
should_split = true;
}
return;
}
cell.as_collection_mutation().with_deserialized(*cdef.type, [&] (collection_mutation_view_description mview) {
auto desc = mview.materialize(*cdef.type);
for (auto& [k, v]: desc.cells) {
if (check_or_set(v.timestamp(), v.is_live_and_has_ttl() ? v.ttl() : gc_clock::duration(0))) {
should_split = true;
return;
}
}
if (desc.tomb) {
if (check_or_set(desc.tomb.timestamp, gc_clock::duration(0))) {
should_split = true;
return;
}
}
});
});
if (should_split) {
return true;
}
bool had_clustered_row = false;
if (!p.clustered_rows().empty() && had_static_row) {
return true;
}
for (const rows_entry& cr : p.clustered_rows()) {
had_clustered_row = true;
const auto& marker = cr.row().marker();
if (marker.is_live() && check_or_set(marker.timestamp(), marker.is_expiring() ? marker.ttl() : gc_clock::duration(0))) {
return true;
}
bool is_insert = marker.is_live();
bool had_cells = false;
cr.row().cells().for_each_cell([&] (column_id id, const atomic_cell_or_collection& cell) {
had_cells = true;
auto& cdef = base_schema.column_at(column_kind::regular_column, id);
if (cdef.is_atomic()) {
auto view = cell.as_atomic_cell(cdef);
if (check_or_set(view.timestamp(), view.is_live_and_has_ttl() ? view.ttl() : gc_clock::duration(0))) {
should_split = true;
}
return;
}
cell.as_collection_mutation().with_deserialized(*cdef.type, [&] (collection_mutation_view_description mview) {
for (auto& [k, v]: mview.cells) {
if (check_or_set(v.timestamp(), v.is_live_and_has_ttl() ? v.ttl() : gc_clock::duration(0))) {
should_split = true;
return;
}
if (is_insert) {
// nonatomic updates cannot be expressed with an INSERT.
should_split = true;
return;
}
}
if (mview.tomb) {
if (check_or_set(mview.tomb.timestamp, gc_clock::duration(0))) {
should_split = true;
return;
}
}
});
});
if (should_split) {
return true;
}
auto row_tomb = cr.row().deleted_at().regular();
if (row_tomb) {
if (had_cells) {
return true;
}
// there were no cells, so no ttl
assert(!found_ttl);
if (found_ts != api::missing_timestamp && found_ts != row_tomb.timestamp) {
return true;
}
found_ts = row_tomb.timestamp;
}
}
if (!p.row_tombstones().empty() && (had_static_row || had_clustered_row)) {
return true;
}
for (const auto& rt: p.row_tombstones()) {
if (rt.tomb) {
if (found_ts != api::missing_timestamp && found_ts != rt.tomb.timestamp) {
return true;
}
found_ts = rt.tomb.timestamp;
}
}
if (p.partition_tombstone().timestamp != api::missing_timestamp
&& (!p.row_tombstones().empty() || had_static_row || had_clustered_row)) {
return true;
}
// A mutation with no timestamp will be split into 0 mutations
return found_ts == api::missing_timestamp;
}
std::vector<mutation> split(const mutation& base_mutation, const schema_ptr& base_schema) {
auto changes = extract_changes(base_mutation, *base_schema);
std::vector<mutation> result;
for (auto& [change_ts, btch] : changes) {
for (auto& sr_update : btch.static_updates) {
result.emplace_back(base_schema, base_mutation.key());
auto& m = result.back();
for (auto& atomic_update : sr_update.atomic_entries) {
auto& cdef = base_schema->column_at(column_kind::static_column, atomic_update.id);
m.set_static_cell(cdef, std::move(atomic_update.cell));
}
for (auto& nonatomic_delete : sr_update.nonatomic_deletions) {
auto& cdef = base_schema->column_at(column_kind::static_column, nonatomic_delete.id);
m.set_static_cell(cdef, collection_mutation_description{nonatomic_delete.t, {}}.serialize(*cdef.type));
}
for (auto& nonatomic_update : sr_update.nonatomic_updates) {
auto& cdef = base_schema->column_at(column_kind::static_column, nonatomic_update.id);
m.set_static_cell(cdef, collection_mutation_description{{}, std::move(nonatomic_update.cells)}.serialize(*cdef.type));
}
}
for (auto& cr_insert : btch.clustered_inserts) {
result.emplace_back(base_schema, base_mutation.key());
auto& row = result.back().partition().clustered_row(*base_schema, cr_insert.key);
for (auto& atomic_update : cr_insert.atomic_entries) {
auto& cdef = base_schema->column_at(column_kind::regular_column, atomic_update.id);
row.cells().apply(cdef, std::move(atomic_update.cell));
}
for (auto& nonatomic_delete : cr_insert.nonatomic_deletions) {
auto& cdef = base_schema->column_at(column_kind::regular_column, nonatomic_delete.id);
row.cells().apply(cdef, collection_mutation_description{nonatomic_delete.t, {}}.serialize(*cdef.type));
}
row.apply(cr_insert.marker);
}
for (auto& cr_update : btch.clustered_updates) {
result.emplace_back(base_schema, base_mutation.key());
auto& row = result.back().partition().clustered_row(*base_schema, cr_update.key).cells();
for (auto& atomic_update : cr_update.atomic_entries) {
auto& cdef = base_schema->column_at(column_kind::regular_column, atomic_update.id);
row.apply(cdef, std::move(atomic_update.cell));
}
for (auto& nonatomic_delete : cr_update.nonatomic_deletions) {
auto& cdef = base_schema->column_at(column_kind::regular_column, nonatomic_delete.id);
row.apply(cdef, collection_mutation_description{nonatomic_delete.t, {}}.serialize(*cdef.type));
}
for (auto& nonatomic_update : cr_update.nonatomic_updates) {
auto& cdef = base_schema->column_at(column_kind::static_column, nonatomic_update.id);
row.apply(cdef, collection_mutation_description{{}, std::move(nonatomic_update.cells)}.serialize(*cdef.type));
}
}
for (auto& cr_delete : btch.clustered_row_deletions) {
result.emplace_back(base_schema, base_mutation.key());
result.back().partition().apply_delete(*base_schema, cr_delete.key, cr_delete.t);
}
for (auto& crange_delete : btch.clustered_range_deletions) {
result.emplace_back(base_schema, base_mutation.key());
result.back().partition().apply_delete(*base_schema, crange_delete.rt);
}
if (btch.partition_deletions) {
result.emplace_back(base_schema, base_mutation.key());
result.back().partition().apply(btch.partition_deletions->t);
}
}
return result;
}
} // namespace cdc