/* * Copyright (C) 2020 ScyllaDB */ /* * This file is part of Scylla. * * Scylla is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Scylla is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Scylla. If not, see . */ #include "mutation.hh" #include "schema.hh" #include "split.hh" #include "log.hh" struct atomic_column_update { column_id id; atomic_cell cell; }; // see the comment inside `clustered_row_insert` for motivation for separating // nonatomic deletions from nonatomic updates struct nonatomic_column_deletion { column_id id; tombstone t; }; struct nonatomic_column_update { column_id id; utils::chunked_vector> cells; }; struct static_row_update { gc_clock::duration ttl; std::vector atomic_entries; std::vector nonatomic_deletions; std::vector nonatomic_updates; }; struct clustered_row_insert { gc_clock::duration ttl; clustering_key key; row_marker marker; std::vector atomic_entries; std::vector nonatomic_deletions; // INSERTs can't express updates of individual cells inside a non-atomic // (without deleting the entire field first), so no `nonatomic_updates` field // overwriting a nonatomic column inside an INSERT will be split into two changes: // one with a nonatomic deletion, and one with a nonatomic update }; struct clustered_row_update { gc_clock::duration ttl; clustering_key key; std::vector atomic_entries; std::vector nonatomic_deletions; std::vector nonatomic_updates; }; struct clustered_row_deletion { clustering_key key; tombstone t; }; struct clustered_range_deletion { range_tombstone rt; }; struct partition_deletion { tombstone t; }; struct batch { std::vector static_updates; std::vector clustered_inserts; std::vector clustered_updates; std::vector clustered_row_deletions; std::vector clustered_range_deletions; std::optional partition_deletions; }; using set_of_changes = std::map; struct row_update { std::vector atomic_entries; std::vector nonatomic_deletions; std::vector nonatomic_updates; }; static std::map, row_update> extract_row_updates(const row& r, column_kind ckind, const schema& schema) { std::map, row_update> result; r.for_each_cell([&] (column_id id, const atomic_cell_or_collection& cell) { auto& cdef = schema.column_at(ckind, id); if (cdef.is_atomic()) { auto view = cell.as_atomic_cell(cdef); auto timestamp_and_ttl = std::pair( view.timestamp(), view.is_live_and_has_ttl() ? view.ttl() : gc_clock::duration(0) ); result[timestamp_and_ttl].atomic_entries.push_back({id, atomic_cell(*cdef.type, view)}); return; } cell.as_collection_mutation().with_deserialized(*cdef.type, [&] (collection_mutation_view_description mview) { auto desc = mview.materialize(*cdef.type); for (auto& [k, v]: desc.cells) { auto timestamp_and_ttl = std::pair( v.timestamp(), v.is_live_and_has_ttl() ? v.ttl() : gc_clock::duration(0) ); auto& updates = result[timestamp_and_ttl].nonatomic_updates; if (updates.empty() || updates.back().id != id) { updates.push_back({id, {}}); } updates.back().cells.push_back({std::move(k), std::move(v)}); } if (desc.tomb) { auto timestamp_and_ttl = std::pair(desc.tomb.timestamp, gc_clock::duration(0)); result[timestamp_and_ttl].nonatomic_deletions.push_back({id, desc.tomb}); } }); }); return result; }; set_of_changes extract_changes(const mutation& base_mutation, const schema& base_schema) { set_of_changes res; auto& p = base_mutation.partition(); auto sr_updates = extract_row_updates(p.static_row().get(), column_kind::static_column, base_schema); for (auto& [k, up]: sr_updates) { auto [timestamp, ttl] = k; res[timestamp].static_updates.push_back({ ttl, std::move(up.atomic_entries), std::move(up.nonatomic_deletions), std::move(up.nonatomic_updates) }); } for (const rows_entry& cr : p.clustered_rows()) { auto cr_updates = extract_row_updates(cr.row().cells(), column_kind::regular_column, base_schema); const auto& marker = cr.row().marker(); auto marker_timestamp = marker.timestamp(); auto marker_ttl = marker.is_expiring() ? marker.ttl() : gc_clock::duration(0); if (marker.is_live()) { // make sure that an entry corresponding to the row marker's timestamp and ttl is in the map (void)cr_updates[std::pair(marker_timestamp, marker_ttl)]; } auto is_insert = [&] (api::timestamp_type timestamp, gc_clock::duration ttl) { if (!marker.is_live()) { return false; } return timestamp == marker_timestamp && ttl == marker_ttl; }; for (auto& [k, up]: cr_updates) { auto [timestamp, ttl] = k; if (is_insert(timestamp, ttl)) { res[timestamp].clustered_inserts.push_back({ ttl, cr.key(), marker, std::move(up.atomic_entries), std::move(up.nonatomic_deletions) }); if (!up.nonatomic_updates.empty()) { // nonatomic updates cannot be expressed with an INSERT. res[timestamp].clustered_updates.push_back({ ttl, cr.key(), {}, {}, std::move(up.nonatomic_updates) }); } } else { res[timestamp].clustered_updates.push_back({ ttl, cr.key(), std::move(up.atomic_entries), std::move(up.nonatomic_deletions), std::move(up.nonatomic_updates) }); } } auto row_tomb = cr.row().deleted_at().regular(); if (row_tomb) { res[row_tomb.timestamp].clustered_row_deletions.push_back({cr.key(), row_tomb}); } } for (const auto& rt: p.row_tombstones()) { if (rt.tomb.timestamp != api::missing_timestamp) { res[rt.tomb.timestamp].clustered_range_deletions.push_back({rt}); } } auto partition_tomb_timestamp = p.partition_tombstone().timestamp; if (partition_tomb_timestamp != api::missing_timestamp) { res[partition_tomb_timestamp].partition_deletions = {p.partition_tombstone()}; } return res; } namespace cdc { bool should_split(const mutation& base_mutation, const schema& base_schema) { auto& p = base_mutation.partition(); api::timestamp_type found_ts = api::missing_timestamp; std::optional found_ttl; // 0 = "no ttl" auto check_or_set = [&] (api::timestamp_type ts, gc_clock::duration ttl) { if (found_ts != api::missing_timestamp && found_ts != ts) { return true; } found_ts = ts; if (found_ttl && *found_ttl != ttl) { return true; } found_ttl = ttl; return false; }; bool had_static_row = false; bool should_split = false; p.static_row().get().for_each_cell([&] (column_id id, const atomic_cell_or_collection& cell) { had_static_row = true; auto& cdef = base_schema.column_at(column_kind::static_column, id); if (cdef.is_atomic()) { auto view = cell.as_atomic_cell(cdef); if (check_or_set(view.timestamp(), view.is_live_and_has_ttl() ? view.ttl() : gc_clock::duration(0))) { should_split = true; } return; } cell.as_collection_mutation().with_deserialized(*cdef.type, [&] (collection_mutation_view_description mview) { auto desc = mview.materialize(*cdef.type); for (auto& [k, v]: desc.cells) { if (check_or_set(v.timestamp(), v.is_live_and_has_ttl() ? v.ttl() : gc_clock::duration(0))) { should_split = true; return; } } if (desc.tomb) { if (check_or_set(desc.tomb.timestamp, gc_clock::duration(0))) { should_split = true; return; } } }); }); if (should_split) { return true; } bool had_clustered_row = false; if (!p.clustered_rows().empty() && had_static_row) { return true; } for (const rows_entry& cr : p.clustered_rows()) { had_clustered_row = true; const auto& marker = cr.row().marker(); if (marker.is_live() && check_or_set(marker.timestamp(), marker.is_expiring() ? marker.ttl() : gc_clock::duration(0))) { return true; } bool is_insert = marker.is_live(); bool had_cells = false; cr.row().cells().for_each_cell([&] (column_id id, const atomic_cell_or_collection& cell) { had_cells = true; auto& cdef = base_schema.column_at(column_kind::regular_column, id); if (cdef.is_atomic()) { auto view = cell.as_atomic_cell(cdef); if (check_or_set(view.timestamp(), view.is_live_and_has_ttl() ? view.ttl() : gc_clock::duration(0))) { should_split = true; } return; } cell.as_collection_mutation().with_deserialized(*cdef.type, [&] (collection_mutation_view_description mview) { for (auto& [k, v]: mview.cells) { if (check_or_set(v.timestamp(), v.is_live_and_has_ttl() ? v.ttl() : gc_clock::duration(0))) { should_split = true; return; } if (is_insert) { // nonatomic updates cannot be expressed with an INSERT. should_split = true; return; } } if (mview.tomb) { if (check_or_set(mview.tomb.timestamp, gc_clock::duration(0))) { should_split = true; return; } } }); }); if (should_split) { return true; } auto row_tomb = cr.row().deleted_at().regular(); if (row_tomb) { if (had_cells) { return true; } // there were no cells, so no ttl assert(!found_ttl); if (found_ts != api::missing_timestamp && found_ts != row_tomb.timestamp) { return true; } found_ts = row_tomb.timestamp; } } if (!p.row_tombstones().empty() && (had_static_row || had_clustered_row)) { return true; } for (const auto& rt: p.row_tombstones()) { if (rt.tomb) { if (found_ts != api::missing_timestamp && found_ts != rt.tomb.timestamp) { return true; } found_ts = rt.tomb.timestamp; } } if (p.partition_tombstone().timestamp != api::missing_timestamp && (!p.row_tombstones().empty() || had_static_row || had_clustered_row)) { return true; } // A mutation with no timestamp will be split into 0 mutations return found_ts == api::missing_timestamp; } void for_each_change(const mutation& base_mutation, const schema_ptr& base_schema, seastar::noncopyable_function f) { auto changes = extract_changes(base_mutation, *base_schema); auto pk = base_mutation.key(); for (auto& [change_ts, btch] : changes) { auto tuuid = timeuuid_type->decompose(generate_timeuuid(change_ts)); int batch_no = 0; for (auto& sr_update : btch.static_updates) { mutation m(base_schema, pk); for (auto& atomic_update : sr_update.atomic_entries) { auto& cdef = base_schema->column_at(column_kind::static_column, atomic_update.id); m.set_static_cell(cdef, std::move(atomic_update.cell)); } for (auto& nonatomic_delete : sr_update.nonatomic_deletions) { auto& cdef = base_schema->column_at(column_kind::static_column, nonatomic_delete.id); m.set_static_cell(cdef, collection_mutation_description{nonatomic_delete.t, {}}.serialize(*cdef.type)); } for (auto& nonatomic_update : sr_update.nonatomic_updates) { auto& cdef = base_schema->column_at(column_kind::static_column, nonatomic_update.id); m.set_static_cell(cdef, collection_mutation_description{{}, std::move(nonatomic_update.cells)}.serialize(*cdef.type)); } f(std::move(m), change_ts, tuuid, batch_no); } for (auto& cr_insert : btch.clustered_inserts) { mutation m(base_schema, pk); auto& row = m.partition().clustered_row(*base_schema, cr_insert.key); for (auto& atomic_update : cr_insert.atomic_entries) { auto& cdef = base_schema->column_at(column_kind::regular_column, atomic_update.id); row.cells().apply(cdef, std::move(atomic_update.cell)); } for (auto& nonatomic_delete : cr_insert.nonatomic_deletions) { auto& cdef = base_schema->column_at(column_kind::regular_column, nonatomic_delete.id); row.cells().apply(cdef, collection_mutation_description{nonatomic_delete.t, {}}.serialize(*cdef.type)); } row.apply(cr_insert.marker); f(std::move(m), change_ts, tuuid, batch_no); } for (auto& cr_update : btch.clustered_updates) { mutation m(base_schema, pk); auto& row = m.partition().clustered_row(*base_schema, cr_update.key).cells(); for (auto& atomic_update : cr_update.atomic_entries) { auto& cdef = base_schema->column_at(column_kind::regular_column, atomic_update.id); row.apply(cdef, std::move(atomic_update.cell)); } for (auto& nonatomic_delete : cr_update.nonatomic_deletions) { auto& cdef = base_schema->column_at(column_kind::regular_column, nonatomic_delete.id); row.apply(cdef, collection_mutation_description{nonatomic_delete.t, {}}.serialize(*cdef.type)); } for (auto& nonatomic_update : cr_update.nonatomic_updates) { auto& cdef = base_schema->column_at(column_kind::regular_column, nonatomic_update.id); row.apply(cdef, collection_mutation_description{{}, std::move(nonatomic_update.cells)}.serialize(*cdef.type)); } f(std::move(m), change_ts, tuuid, batch_no); } for (auto& cr_delete : btch.clustered_row_deletions) { mutation m(base_schema, pk); m.partition().apply_delete(*base_schema, cr_delete.key, cr_delete.t); f(std::move(m), change_ts, tuuid, batch_no); } for (auto& crange_delete : btch.clustered_range_deletions) { mutation m(base_schema, pk); m.partition().apply_delete(*base_schema, crange_delete.rt); f(std::move(m), change_ts, tuuid, batch_no); } if (btch.partition_deletions) { mutation m(base_schema, pk); m.partition().apply(btch.partition_deletions->t); f(std::move(m), change_ts, tuuid, batch_no); } } } } // namespace cdc