Commitaab6b0ee27introduced the controversial new IMR format, which relied on a very template-heavy infrastructure to generate serialization and deserialization code via template meta-programming. The promise was that this new format, beyond solving the problems the previous open-coded representation had (working on linearized buffers), will speed up migrating other components to this IMR format, as the IMR infrastructure reduces code bloat, makes the code more readable via declarative type descriptions as well as safer. However, the results were almost the opposite. The template meta-programming used by the IMR infrastructure proved very hard to understand. Developers don't want to read or modify it. Maintainers don't want to see it being used anywhere else. In short, nobody wants to touch it. This commit does a conceptual revert ofaab6b0ee27. A verbatim revert is not possible because related code evolved a lot since the merge. Also, going back to the previous code would mean we regress as we'd revert the move to fragmented buffers. So this revert is only conceptual, it changes the underlying infrastructure back to the previous open-coded one, but keeps the fragmented buffers, as well as the interface of the related components (to the extent possible). Fixes: #5578
271 lines
9.7 KiB
C++
271 lines
9.7 KiB
C++
/*
|
|
* Copyright (C) 2016 ScyllaDB
|
|
*/
|
|
|
|
/*
|
|
* This file is part of Scylla.
|
|
*
|
|
* Scylla is free software: you can redistribute it and/or modify
|
|
* it under the terms of the GNU Affero General Public License as published by
|
|
* the Free Software Foundation, either version 3 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* Scylla is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
#include "counters.hh"
|
|
#include "mutation.hh"
|
|
#include "combine.hh"
|
|
|
|
std::ostream& operator<<(std::ostream& os, const counter_id& id) {
|
|
return os << id.to_uuid();
|
|
}
|
|
|
|
std::ostream& operator<<(std::ostream& os, counter_shard_view csv) {
|
|
return os << "{global_shard id: " << csv.id() << " value: " << csv.value()
|
|
<< " clock: " << csv.logical_clock() << "}";
|
|
}
|
|
|
|
std::ostream& operator<<(std::ostream& os, counter_cell_view ccv) {
|
|
return os << "{counter_cell timestamp: " << ccv.timestamp() << " shards: {" << ::join(", ", ccv.shards()) << "}}";
|
|
}
|
|
|
|
void counter_cell_builder::do_sort_and_remove_duplicates()
|
|
{
|
|
boost::range::sort(_shards, [] (auto& a, auto& b) { return a.id() < b.id(); });
|
|
|
|
std::vector<counter_shard> new_shards;
|
|
new_shards.reserve(_shards.size());
|
|
for (auto& cs : _shards) {
|
|
if (new_shards.empty() || new_shards.back().id() != cs.id()) {
|
|
new_shards.emplace_back(cs);
|
|
} else {
|
|
new_shards.back().apply(cs);
|
|
}
|
|
}
|
|
_shards = std::move(new_shards);
|
|
_sorted = true;
|
|
}
|
|
|
|
static bool apply_in_place(const column_definition& cdef, atomic_cell_mutable_view dst, atomic_cell_mutable_view src)
|
|
{
|
|
auto dst_ccmv = counter_cell_mutable_view(dst);
|
|
auto src_ccmv = counter_cell_mutable_view(src);
|
|
auto dst_shards = dst_ccmv.shards();
|
|
auto src_shards = src_ccmv.shards();
|
|
|
|
auto dst_it = dst_shards.begin();
|
|
auto src_it = src_shards.begin();
|
|
|
|
while (src_it != src_shards.end()) {
|
|
while (dst_it != dst_shards.end() && dst_it->id() < src_it->id()) {
|
|
++dst_it;
|
|
}
|
|
if (dst_it == dst_shards.end() || dst_it->id() != src_it->id()) {
|
|
// Fast-path failed. Revert and fall back to the slow path.
|
|
if (dst_it == dst_shards.end()) {
|
|
--dst_it;
|
|
}
|
|
while (src_it != src_shards.begin()) {
|
|
--src_it;
|
|
while (dst_it->id() != src_it->id()) {
|
|
--dst_it;
|
|
}
|
|
src_it->swap_value_and_clock(*dst_it);
|
|
}
|
|
return false;
|
|
}
|
|
if (dst_it->logical_clock() < src_it->logical_clock()) {
|
|
dst_it->swap_value_and_clock(*src_it);
|
|
} else {
|
|
src_it->set_value_and_clock(*dst_it);
|
|
}
|
|
++src_it;
|
|
}
|
|
|
|
auto dst_ts = dst_ccmv.timestamp();
|
|
auto src_ts = src_ccmv.timestamp();
|
|
dst_ccmv.set_timestamp(std::max(dst_ts, src_ts));
|
|
src_ccmv.set_timestamp(dst_ts);
|
|
return true;
|
|
}
|
|
|
|
void counter_cell_view::apply(const column_definition& cdef, atomic_cell_or_collection& dst, atomic_cell_or_collection& src)
|
|
{
|
|
auto dst_ac = dst.as_atomic_cell(cdef);
|
|
auto src_ac = src.as_atomic_cell(cdef);
|
|
|
|
if (!dst_ac.is_live() || !src_ac.is_live()) {
|
|
if (dst_ac.is_live() || (!src_ac.is_live() && compare_atomic_cell_for_merge(dst_ac, src_ac) < 0)) {
|
|
std::swap(dst, src);
|
|
}
|
|
return;
|
|
}
|
|
|
|
if (dst_ac.is_counter_update() && src_ac.is_counter_update()) {
|
|
auto src_v = src_ac.counter_update_value();
|
|
auto dst_v = dst_ac.counter_update_value();
|
|
dst = atomic_cell::make_live_counter_update(std::max(dst_ac.timestamp(), src_ac.timestamp()),
|
|
src_v + dst_v);
|
|
return;
|
|
}
|
|
|
|
assert(!dst_ac.is_counter_update());
|
|
assert(!src_ac.is_counter_update());
|
|
|
|
auto src_ccv = counter_cell_view(src_ac);
|
|
auto dst_ccv = counter_cell_view(dst_ac);
|
|
if (dst_ccv.shard_count() >= src_ccv.shard_count()) {
|
|
auto dst_amc = dst.as_mutable_atomic_cell(cdef);
|
|
auto src_amc = src.as_mutable_atomic_cell(cdef);
|
|
if (apply_in_place(cdef, dst_amc, src_amc)) {
|
|
return;
|
|
}
|
|
}
|
|
|
|
auto dst_shards = dst_ccv.shards();
|
|
auto src_shards = src_ccv.shards();
|
|
|
|
counter_cell_builder result;
|
|
combine(dst_shards.begin(), dst_shards.end(), src_shards.begin(), src_shards.end(),
|
|
result.inserter(), counter_shard_view::less_compare_by_id(), [] (auto& x, auto& y) {
|
|
return x.logical_clock() < y.logical_clock() ? y : x;
|
|
});
|
|
|
|
auto cell = result.build(std::max(dst_ac.timestamp(), src_ac.timestamp()));
|
|
src = std::exchange(dst, atomic_cell_or_collection(std::move(cell)));
|
|
}
|
|
|
|
std::optional<atomic_cell> counter_cell_view::difference(atomic_cell_view a, atomic_cell_view b)
|
|
{
|
|
assert(!a.is_counter_update());
|
|
assert(!b.is_counter_update());
|
|
|
|
if (!b.is_live() || !a.is_live()) {
|
|
if (b.is_live() || (!a.is_live() && compare_atomic_cell_for_merge(b, a) < 0)) {
|
|
return atomic_cell(*counter_type, a);
|
|
}
|
|
return { };
|
|
}
|
|
|
|
auto a_ccv = counter_cell_view(a);
|
|
auto b_ccv = counter_cell_view(b);
|
|
auto a_shards = a_ccv.shards();
|
|
auto b_shards = b_ccv.shards();
|
|
|
|
auto a_it = a_shards.begin();
|
|
auto a_end = a_shards.end();
|
|
auto b_it = b_shards.begin();
|
|
auto b_end = b_shards.end();
|
|
|
|
counter_cell_builder result;
|
|
while (a_it != a_end) {
|
|
while (b_it != b_end && (*b_it).id() < (*a_it).id()) {
|
|
++b_it;
|
|
}
|
|
if (b_it == b_end || (*a_it).id() != (*b_it).id() || (*a_it).logical_clock() > (*b_it).logical_clock()) {
|
|
result.add_shard(counter_shard(*a_it));
|
|
}
|
|
++a_it;
|
|
}
|
|
|
|
std::optional<atomic_cell> diff;
|
|
if (!result.empty()) {
|
|
diff = result.build(std::max(a.timestamp(), b.timestamp()));
|
|
} else if (a.timestamp() > b.timestamp()) {
|
|
diff = atomic_cell::make_live(*counter_type, a.timestamp(), bytes_view());
|
|
}
|
|
return diff;
|
|
}
|
|
|
|
|
|
void transform_counter_updates_to_shards(mutation& m, const mutation* current_state, uint64_t clock_offset, utils::UUID local_id) {
|
|
// FIXME: allow current_state to be frozen_mutation
|
|
|
|
auto transform_new_row_to_shards = [&s = *m.schema(), clock_offset, local_id] (column_kind kind, auto& cells) {
|
|
cells.for_each_cell([&] (column_id id, atomic_cell_or_collection& ac_o_c) {
|
|
auto& cdef = s.column_at(kind, id);
|
|
auto acv = ac_o_c.as_atomic_cell(cdef);
|
|
if (!acv.is_live()) {
|
|
return; // continue -- we are in lambda
|
|
}
|
|
auto delta = acv.counter_update_value();
|
|
auto cs = counter_shard(counter_id(local_id), delta, clock_offset + 1);
|
|
ac_o_c = counter_cell_builder::from_single_shard(acv.timestamp(), cs);
|
|
});
|
|
};
|
|
|
|
if (!current_state) {
|
|
transform_new_row_to_shards(column_kind::static_column, m.partition().static_row());
|
|
for (auto& cr : m.partition().clustered_rows()) {
|
|
transform_new_row_to_shards(column_kind::regular_column, cr.row().cells());
|
|
}
|
|
return;
|
|
}
|
|
|
|
clustering_key::less_compare cmp(*m.schema());
|
|
|
|
auto transform_row_to_shards = [&s = *m.schema(), clock_offset, local_id] (column_kind kind, auto& transformee, auto& state) {
|
|
std::deque<std::pair<column_id, counter_shard>> shards;
|
|
state.for_each_cell([&] (column_id id, const atomic_cell_or_collection& ac_o_c) {
|
|
auto& cdef = s.column_at(kind, id);
|
|
auto acv = ac_o_c.as_atomic_cell(cdef);
|
|
if (!acv.is_live()) {
|
|
return; // continue -- we are in lambda
|
|
}
|
|
auto ccv = counter_cell_view(acv);
|
|
auto cs = ccv.get_shard(counter_id(local_id));
|
|
if (!cs) {
|
|
return; // continue
|
|
}
|
|
shards.emplace_back(std::make_pair(id, counter_shard(*cs)));
|
|
});
|
|
|
|
transformee.for_each_cell([&] (column_id id, atomic_cell_or_collection& ac_o_c) {
|
|
auto& cdef = s.column_at(kind, id);
|
|
auto acv = ac_o_c.as_atomic_cell(cdef);
|
|
if (!acv.is_live()) {
|
|
return; // continue -- we are in lambda
|
|
}
|
|
while (!shards.empty() && shards.front().first < id) {
|
|
shards.pop_front();
|
|
}
|
|
|
|
auto delta = acv.counter_update_value();
|
|
|
|
if (shards.empty() || shards.front().first > id) {
|
|
auto cs = counter_shard(counter_id(local_id), delta, clock_offset + 1);
|
|
ac_o_c = counter_cell_builder::from_single_shard(acv.timestamp(), cs);
|
|
} else {
|
|
auto& cs = shards.front().second;
|
|
cs.update(delta, clock_offset + 1);
|
|
ac_o_c = counter_cell_builder::from_single_shard(acv.timestamp(), cs);
|
|
shards.pop_front();
|
|
}
|
|
});
|
|
};
|
|
|
|
transform_row_to_shards(column_kind::static_column, m.partition().static_row(), current_state->partition().static_row());
|
|
|
|
auto& cstate = current_state->partition();
|
|
auto it = cstate.clustered_rows().begin();
|
|
auto end = cstate.clustered_rows().end();
|
|
for (auto& cr : m.partition().clustered_rows()) {
|
|
while (it != end && cmp(it->key(), cr.key())) {
|
|
++it;
|
|
}
|
|
if (it == end || cmp(cr.key(), it->key())) {
|
|
transform_new_row_to_shards(column_kind::regular_column, cr.row().cells());
|
|
continue;
|
|
}
|
|
|
|
transform_row_to_shards(column_kind::regular_column, cr.row().cells(), it->row().cells());
|
|
}
|
|
}
|