row: Introduce radix tree storage type
Currently class row uses a union of a vector and a set to keep the cells and switches between them. Add the 3rd type with the radix tree, but never switch to it, just to show how the operations would look like. Later on vector and set will be removed and the whole row will be immediately switched to the radix tree storage. NB: All the added places have indentation deliberately broken, so that next patch will just remove the surrounding (old) code away and (most of) the new one will happen in its place instantly. Signed-off-by: Pavel Emelyanov <xemul@scylladb.com>
This commit is contained in:
@@ -827,6 +827,14 @@ void appending_hash<row>::operator()<legacy_xx_hasher_without_null_digest>(legac
|
||||
}
|
||||
|
||||
cell_hash_opt row::cell_hash_for(column_id id) const {
|
||||
if (_type == storage_type::array) {
|
||||
auto& _cells = _storage.array;
|
||||
|
||||
const cell_and_hash* cah = _cells.get(id);
|
||||
return cah != nullptr ? cah->hash : cell_hash_opt();
|
||||
|
||||
}
|
||||
|
||||
if (_type == storage_type::vector) {
|
||||
return id < max_vector_size && _storage.vector.present.test(id) ? _storage.vector.v[id].hash : cell_hash_opt();
|
||||
}
|
||||
@@ -926,6 +934,18 @@ static auto prefixed(const sstring& prefix, const RangeOfPrintable& r) {
|
||||
|
||||
std::ostream&
|
||||
operator<<(std::ostream& os, const row::printer& p) {
|
||||
if (p._row._type == row::storage_type::array) {
|
||||
auto& cells = p._row._storage.array;
|
||||
|
||||
os << "{{row:";
|
||||
cells.walk([&] (column_id id, const cell_and_hash& cah) {
|
||||
auto& cdef = p._schema.column_at(p._kind, id);
|
||||
os << "\n " << cdef.name_as_text() << atomic_cell_or_collection::printer(cdef, cah.cell);
|
||||
return true;
|
||||
});
|
||||
return os << "}}";
|
||||
}
|
||||
|
||||
auto add_printer = [&] (const auto& c) {
|
||||
auto& column_def = p._schema.column_at(p._kind, c.first);
|
||||
return std::pair<sstring, atomic_cell_or_collection::printer>(std::piecewise_construct,
|
||||
@@ -942,6 +962,8 @@ operator<<(std::ostream& os, const row::printer& p) {
|
||||
case row::storage_type::vector:
|
||||
cells = ::join(",", prefixed("\n ", p._row.get_range_vector() | boost::adaptors::transformed(add_printer)));
|
||||
break;
|
||||
case row::storage_type::array:
|
||||
break;
|
||||
}
|
||||
return fmt_print(os, "{{row: {}}}", cells);
|
||||
}
|
||||
@@ -1185,6 +1207,17 @@ row::apply(const column_definition& column, atomic_cell_or_collection&& value, c
|
||||
|
||||
template<typename Func>
|
||||
void row::consume_with(Func&& func) {
|
||||
if (_type == storage_type::array) {
|
||||
auto& _cells = _storage.array;
|
||||
|
||||
_cells.weed([func, this] (column_id id, cell_and_hash& cah) {
|
||||
_size--;
|
||||
func(id, cah);
|
||||
return true;
|
||||
});
|
||||
|
||||
}
|
||||
|
||||
if (_type == storage_type::vector) {
|
||||
unsigned i = 0;
|
||||
for (; i < _storage.vector.v.size(); i++) {
|
||||
@@ -1213,6 +1246,21 @@ row::apply_monotonically(const column_definition& column, atomic_cell_or_collect
|
||||
|
||||
// our mutations are not yet immutable
|
||||
auto id = column.id;
|
||||
|
||||
if (_type == storage_type::array) {
|
||||
auto& _cells = _storage.array;
|
||||
|
||||
cell_and_hash* cah = _cells.get(id);
|
||||
if (cah == nullptr) {
|
||||
// FIXME -- add .locate method to radix_tree to find or allocate a spot
|
||||
_cells.emplace(id, std::move(value), std::move(hash));
|
||||
_size++;
|
||||
} else {
|
||||
::apply_monotonically(column, *cah, value, std::move(hash));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if (_type == storage_type::vector && id < max_vector_size) {
|
||||
if (id >= _storage.vector.v.size()) {
|
||||
_storage.vector.v.resize(id);
|
||||
@@ -1244,6 +1292,13 @@ row::apply_monotonically(const column_definition& column, atomic_cell_or_collect
|
||||
|
||||
void
|
||||
row::append_cell(column_id id, atomic_cell_or_collection value) {
|
||||
if (_type == storage_type::array) {
|
||||
auto& _cells = _storage.array;
|
||||
|
||||
_cells.emplace(id, std::move(value), cell_hash_opt());
|
||||
|
||||
}
|
||||
|
||||
if (_type == storage_type::vector && id < max_vector_size) {
|
||||
if (_storage.vector.v.size() > id) {
|
||||
on_internal_error(mplog, format("Attempted to append cell#{} to row already having {} cells", id, _storage.vector.v.size()));
|
||||
@@ -1263,6 +1318,13 @@ row::append_cell(column_id id, atomic_cell_or_collection value) {
|
||||
|
||||
const cell_and_hash*
|
||||
row::find_cell_and_hash(column_id id) const {
|
||||
if (_type == storage_type::array) {
|
||||
auto& _cells = _storage.array;
|
||||
|
||||
return _cells.get(id);
|
||||
|
||||
}
|
||||
|
||||
if (_type == storage_type::vector) {
|
||||
if (id >= _storage.vector.v.size() || !_storage.vector.present.test(id)) {
|
||||
return nullptr;
|
||||
@@ -1285,6 +1347,17 @@ row::find_cell(column_id id) const {
|
||||
|
||||
size_t row::external_memory_usage(const schema& s, column_kind kind) const {
|
||||
size_t mem = 0;
|
||||
|
||||
if (_type == storage_type::array) {
|
||||
auto& _cells = _storage.array;
|
||||
|
||||
return _cells.memory_usage([&] (column_id id, const cell_and_hash& cah) noexcept {
|
||||
auto& cdef = s.column_at(kind, id);
|
||||
return cah.cell.external_memory_usage(*cdef.type);
|
||||
});
|
||||
|
||||
}
|
||||
|
||||
if (_type == storage_type::vector) {
|
||||
mem += _storage.vector.v.used_space_external_memory_usage();
|
||||
column_id id = 0;
|
||||
@@ -1538,6 +1611,17 @@ row::row(const schema& s, column_kind kind, const row& o)
|
||||
: _type(o._type)
|
||||
, _size(o._size)
|
||||
{
|
||||
if (_type == storage_type::array) {
|
||||
|
||||
auto clone_cell_and_hash = [&s, &kind] (column_id id, const cell_and_hash& cah) {
|
||||
auto& cdef = s.column_at(kind, id);
|
||||
return cell_and_hash(cah.cell.copy(*cdef.type), cah.hash);
|
||||
};
|
||||
|
||||
_storage.array.clone_from(o._storage.array, clone_cell_and_hash);
|
||||
|
||||
}
|
||||
|
||||
if (_type == storage_type::vector) {
|
||||
auto& other_vec = o._storage.vector;
|
||||
auto& vec = *new (&_storage.vector) vector_storage;
|
||||
|
||||
@@ -47,6 +47,7 @@
|
||||
#include "utils/intrusive_btree.hh"
|
||||
#include "utils/preempt.hh"
|
||||
#include "utils/managed_ref.hh"
|
||||
#include "utils/compact-radix-tree.hh"
|
||||
|
||||
class mutation_fragment;
|
||||
|
||||
@@ -142,6 +143,7 @@ class row {
|
||||
enum class storage_type {
|
||||
vector,
|
||||
set,
|
||||
array,
|
||||
};
|
||||
storage_type _type = storage_type::vector;
|
||||
size_type _size = 0;
|
||||
@@ -168,11 +170,14 @@ private:
|
||||
}
|
||||
};
|
||||
|
||||
using sparse_array_type = compact_radix_tree::tree<cell_and_hash, column_id>;
|
||||
|
||||
union storage {
|
||||
storage() { }
|
||||
~storage() { }
|
||||
map_type set;
|
||||
vector_storage vector;
|
||||
sparse_array_type array;
|
||||
} _storage;
|
||||
public:
|
||||
row();
|
||||
@@ -194,6 +199,20 @@ public:
|
||||
|
||||
template<typename Func>
|
||||
void remove_if(Func&& func) {
|
||||
if (_type == storage_type::array) {
|
||||
auto& _cells = _storage.array;
|
||||
|
||||
_cells.weed([func, this] (column_id id, cell_and_hash& cah) {
|
||||
if (!func(id, cah.cell)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
_size--;
|
||||
return true;
|
||||
});
|
||||
|
||||
}
|
||||
|
||||
if (_type == storage_type::vector) {
|
||||
for (unsigned i = 0; i < _storage.vector.v.size(); i++) {
|
||||
if (!_storage.vector.present.test(i)) {
|
||||
@@ -260,6 +279,16 @@ public:
|
||||
// noexcept if Func doesn't throw.
|
||||
template<typename Func>
|
||||
void for_each_cell(Func&& func) {
|
||||
if (_type == storage_type::array) {
|
||||
auto& _cells = _storage.array;
|
||||
|
||||
_cells.walk([func] (column_id id, cell_and_hash& cah) {
|
||||
maybe_invoke_with_hash(func, id, cah);
|
||||
return true;
|
||||
});
|
||||
|
||||
}
|
||||
|
||||
if (_type == storage_type::vector) {
|
||||
for (auto i : bitsets::for_each_set(_storage.vector.present)) {
|
||||
maybe_invoke_with_hash(func, i, _storage.vector.v[i]);
|
||||
@@ -273,6 +302,16 @@ public:
|
||||
|
||||
template<typename Func>
|
||||
void for_each_cell(Func&& func) const {
|
||||
if (_type == storage_type::array) {
|
||||
auto& _cells = _storage.array;
|
||||
|
||||
_cells.walk([func] (column_id id, const cell_and_hash& cah) {
|
||||
maybe_invoke_with_hash(func, id, cah);
|
||||
return true;
|
||||
});
|
||||
|
||||
}
|
||||
|
||||
if (_type == storage_type::vector) {
|
||||
for (auto i : bitsets::for_each_set(_storage.vector.present)) {
|
||||
maybe_invoke_with_hash(func, i, _storage.vector.v[i]);
|
||||
@@ -286,6 +325,15 @@ public:
|
||||
|
||||
template<typename Func>
|
||||
void for_each_cell_until(Func&& func) const {
|
||||
if (_type == storage_type::array) {
|
||||
auto& _cells = _storage.array;
|
||||
|
||||
_cells.walk([func] (column_id id, const cell_and_hash& cah) {
|
||||
return maybe_invoke_with_hash(func, id, cah) != stop_iteration::yes;
|
||||
});
|
||||
|
||||
}
|
||||
|
||||
if (_type == storage_type::vector) {
|
||||
for (auto i : bitsets::for_each_set(_storage.vector.present)) {
|
||||
if (maybe_invoke_with_hash(func, i, _storage.vector.v[i]) == stop_iteration::yes) {
|
||||
|
||||
Reference in New Issue
Block a user