row: Introduce radix tree storage type

Currently class row uses a union of a vector and a set to keep
the cells and switches between them. Add the 3rd type with the
radix tree, but never switch to it, just to show how the operations
would look like. Later on vector and set will be removed and the
whole row will be immediately switched to the radix tree storage.

NB: All the added places have indentation deliberately broken, so
that next patch will just remove the surrounding (old) code away
and (most of) the new one will happen in its place instantly.

Signed-off-by: Pavel Emelyanov <xemul@scylladb.com>
This commit is contained in:
Pavel Emelyanov
2020-10-23 19:07:27 +03:00
parent 5f276b279e
commit f006acc853
2 changed files with 132 additions and 0 deletions

View File

@@ -827,6 +827,14 @@ void appending_hash<row>::operator()<legacy_xx_hasher_without_null_digest>(legac
}
cell_hash_opt row::cell_hash_for(column_id id) const {
if (_type == storage_type::array) {
auto& _cells = _storage.array;
const cell_and_hash* cah = _cells.get(id);
return cah != nullptr ? cah->hash : cell_hash_opt();
}
if (_type == storage_type::vector) {
return id < max_vector_size && _storage.vector.present.test(id) ? _storage.vector.v[id].hash : cell_hash_opt();
}
@@ -926,6 +934,18 @@ static auto prefixed(const sstring& prefix, const RangeOfPrintable& r) {
std::ostream&
operator<<(std::ostream& os, const row::printer& p) {
if (p._row._type == row::storage_type::array) {
auto& cells = p._row._storage.array;
os << "{{row:";
cells.walk([&] (column_id id, const cell_and_hash& cah) {
auto& cdef = p._schema.column_at(p._kind, id);
os << "\n " << cdef.name_as_text() << atomic_cell_or_collection::printer(cdef, cah.cell);
return true;
});
return os << "}}";
}
auto add_printer = [&] (const auto& c) {
auto& column_def = p._schema.column_at(p._kind, c.first);
return std::pair<sstring, atomic_cell_or_collection::printer>(std::piecewise_construct,
@@ -942,6 +962,8 @@ operator<<(std::ostream& os, const row::printer& p) {
case row::storage_type::vector:
cells = ::join(",", prefixed("\n ", p._row.get_range_vector() | boost::adaptors::transformed(add_printer)));
break;
case row::storage_type::array:
break;
}
return fmt_print(os, "{{row: {}}}", cells);
}
@@ -1185,6 +1207,17 @@ row::apply(const column_definition& column, atomic_cell_or_collection&& value, c
template<typename Func>
void row::consume_with(Func&& func) {
if (_type == storage_type::array) {
auto& _cells = _storage.array;
_cells.weed([func, this] (column_id id, cell_and_hash& cah) {
_size--;
func(id, cah);
return true;
});
}
if (_type == storage_type::vector) {
unsigned i = 0;
for (; i < _storage.vector.v.size(); i++) {
@@ -1213,6 +1246,21 @@ row::apply_monotonically(const column_definition& column, atomic_cell_or_collect
// our mutations are not yet immutable
auto id = column.id;
if (_type == storage_type::array) {
auto& _cells = _storage.array;
cell_and_hash* cah = _cells.get(id);
if (cah == nullptr) {
// FIXME -- add .locate method to radix_tree to find or allocate a spot
_cells.emplace(id, std::move(value), std::move(hash));
_size++;
} else {
::apply_monotonically(column, *cah, value, std::move(hash));
}
}
if (_type == storage_type::vector && id < max_vector_size) {
if (id >= _storage.vector.v.size()) {
_storage.vector.v.resize(id);
@@ -1244,6 +1292,13 @@ row::apply_monotonically(const column_definition& column, atomic_cell_or_collect
void
row::append_cell(column_id id, atomic_cell_or_collection value) {
if (_type == storage_type::array) {
auto& _cells = _storage.array;
_cells.emplace(id, std::move(value), cell_hash_opt());
}
if (_type == storage_type::vector && id < max_vector_size) {
if (_storage.vector.v.size() > id) {
on_internal_error(mplog, format("Attempted to append cell#{} to row already having {} cells", id, _storage.vector.v.size()));
@@ -1263,6 +1318,13 @@ row::append_cell(column_id id, atomic_cell_or_collection value) {
const cell_and_hash*
row::find_cell_and_hash(column_id id) const {
if (_type == storage_type::array) {
auto& _cells = _storage.array;
return _cells.get(id);
}
if (_type == storage_type::vector) {
if (id >= _storage.vector.v.size() || !_storage.vector.present.test(id)) {
return nullptr;
@@ -1285,6 +1347,17 @@ row::find_cell(column_id id) const {
size_t row::external_memory_usage(const schema& s, column_kind kind) const {
size_t mem = 0;
if (_type == storage_type::array) {
auto& _cells = _storage.array;
return _cells.memory_usage([&] (column_id id, const cell_and_hash& cah) noexcept {
auto& cdef = s.column_at(kind, id);
return cah.cell.external_memory_usage(*cdef.type);
});
}
if (_type == storage_type::vector) {
mem += _storage.vector.v.used_space_external_memory_usage();
column_id id = 0;
@@ -1538,6 +1611,17 @@ row::row(const schema& s, column_kind kind, const row& o)
: _type(o._type)
, _size(o._size)
{
if (_type == storage_type::array) {
auto clone_cell_and_hash = [&s, &kind] (column_id id, const cell_and_hash& cah) {
auto& cdef = s.column_at(kind, id);
return cell_and_hash(cah.cell.copy(*cdef.type), cah.hash);
};
_storage.array.clone_from(o._storage.array, clone_cell_and_hash);
}
if (_type == storage_type::vector) {
auto& other_vec = o._storage.vector;
auto& vec = *new (&_storage.vector) vector_storage;

View File

@@ -47,6 +47,7 @@
#include "utils/intrusive_btree.hh"
#include "utils/preempt.hh"
#include "utils/managed_ref.hh"
#include "utils/compact-radix-tree.hh"
class mutation_fragment;
@@ -142,6 +143,7 @@ class row {
enum class storage_type {
vector,
set,
array,
};
storage_type _type = storage_type::vector;
size_type _size = 0;
@@ -168,11 +170,14 @@ private:
}
};
using sparse_array_type = compact_radix_tree::tree<cell_and_hash, column_id>;
union storage {
storage() { }
~storage() { }
map_type set;
vector_storage vector;
sparse_array_type array;
} _storage;
public:
row();
@@ -194,6 +199,20 @@ public:
template<typename Func>
void remove_if(Func&& func) {
if (_type == storage_type::array) {
auto& _cells = _storage.array;
_cells.weed([func, this] (column_id id, cell_and_hash& cah) {
if (!func(id, cah.cell)) {
return false;
}
_size--;
return true;
});
}
if (_type == storage_type::vector) {
for (unsigned i = 0; i < _storage.vector.v.size(); i++) {
if (!_storage.vector.present.test(i)) {
@@ -260,6 +279,16 @@ public:
// noexcept if Func doesn't throw.
template<typename Func>
void for_each_cell(Func&& func) {
if (_type == storage_type::array) {
auto& _cells = _storage.array;
_cells.walk([func] (column_id id, cell_and_hash& cah) {
maybe_invoke_with_hash(func, id, cah);
return true;
});
}
if (_type == storage_type::vector) {
for (auto i : bitsets::for_each_set(_storage.vector.present)) {
maybe_invoke_with_hash(func, i, _storage.vector.v[i]);
@@ -273,6 +302,16 @@ public:
template<typename Func>
void for_each_cell(Func&& func) const {
if (_type == storage_type::array) {
auto& _cells = _storage.array;
_cells.walk([func] (column_id id, const cell_and_hash& cah) {
maybe_invoke_with_hash(func, id, cah);
return true;
});
}
if (_type == storage_type::vector) {
for (auto i : bitsets::for_each_set(_storage.vector.present)) {
maybe_invoke_with_hash(func, i, _storage.vector.v[i]);
@@ -286,6 +325,15 @@ public:
template<typename Func>
void for_each_cell_until(Func&& func) const {
if (_type == storage_type::array) {
auto& _cells = _storage.array;
_cells.walk([func] (column_id id, const cell_and_hash& cah) {
return maybe_invoke_with_hash(func, id, cah) != stop_iteration::yes;
});
}
if (_type == storage_type::vector) {
for (auto i : bitsets::for_each_set(_storage.vector.present)) {
if (maybe_invoke_with_hash(func, i, _storage.vector.v[i]) == stop_iteration::yes) {