Merge "Switch partitions cache from BST to B+tree & array" from Pavel E.

The data model is now

        bplus::tree<Key = int64_t, T = array<entry>>

where entry can be cache_entry or memtable_entry.

The whole thing is encapsulated into a collection called "double_decker"
from patch #3. The array<T> is an array of T-s with 0-bytes overhead used
to resolve hash conflicts (patch #2).

branch:
tests: unit(debug)
tests before v7:
        unit(debug) for new collections, memtable and row_cache
        unit(dev) for the rest
        perf(dev)

* https://github.com/xemul/scylla/commits/row-cache-over-bptree-9:
  test: Print more sizes in memory_footprint_test
  memtable: Switch onto B+ rails
  row_cache: Switch partition tree onto B+ rails
  memtable: Count partitions separately
  token: Introduce raw() helper and raw comparator
  row-cache: Use ring_position_comparator in some places
  dht: Detach ring_position_comparator_for_sstables
  double-decker: A combination of B+tree with array
  intrusive-array: Array with trusted bounds
  utils: B+ tree implementation
  test: Move perf measurement helpers into header
This commit is contained in:
Tomasz Grabiec
2020-07-15 14:54:26 +02:00
26 changed files with 5179 additions and 259 deletions

View File

@@ -333,6 +333,7 @@ scylla_tests = set([
'test/boost/estimated_histogram_test',
'test/boost/logalloc_test',
'test/boost/managed_vector_test',
'test/boost/intrusive_array_test',
'test/boost/map_difference_test',
'test/boost/memtable_test',
'test/boost/meta_test',
@@ -388,6 +389,8 @@ scylla_tests = set([
'test/boost/view_schema_ckey_test',
'test/boost/vint_serialization_test',
'test/boost/virtual_reader_test',
'test/boost/bptree_test',
'test/boost/double_decker_test',
'test/manual/ec2_snitch_test',
'test/manual/gce_snitch_test',
'test/manual/gossip',
@@ -404,6 +407,7 @@ scylla_tests = set([
'test/perf/perf_fast_forward',
'test/perf/perf_hash',
'test/perf/perf_mutation',
'test/perf/perf_bptree',
'test/perf/perf_row_cache_update',
'test/perf/perf_simple_query',
'test/perf/perf_sstable',
@@ -411,6 +415,8 @@ scylla_tests = set([
'test/unit/lsa_sync_eviction_test',
'test/unit/row_cache_alloc_stress_test',
'test/unit/row_cache_stress_test',
'test/unit/bptree_stress_test',
'test/unit/bptree_compaction_test',
])
perf_tests = set([
@@ -958,6 +964,7 @@ pure_boost_tests = set([
'test/boost/small_vector_test',
'test/boost/top_k_test',
'test/boost/vint_serialization_test',
'test/boost/bptree_test',
'test/manual/streaming_histogram_test',
])
@@ -971,10 +978,13 @@ tests_not_using_seastar_test_framework = set([
'test/perf/perf_cql_parser',
'test/perf/perf_hash',
'test/perf/perf_mutation',
'test/perf/perf_bptree',
'test/perf/perf_row_cache_update',
'test/unit/lsa_async_eviction_test',
'test/unit/lsa_sync_eviction_test',
'test/unit/row_cache_alloc_stress_test',
'test/unit/bptree_stress_test',
'test/unit/bptree_compaction_test',
'test/manual/sstable_scan_footprint_test',
]) | pure_boost_tests

View File

@@ -316,11 +316,7 @@ int ring_position_tri_compare(const schema& s, ring_position_view lh, ring_posit
}
}
int ring_position_comparator::operator()(ring_position_view lh, ring_position_view rh) const {
return ring_position_tri_compare(s, lh, rh);
}
int ring_position_comparator::operator()(ring_position_view lh, sstables::decorated_key_view rh) const {
int ring_position_comparator_for_sstables::operator()(ring_position_view lh, sstables::decorated_key_view rh) const {
auto token_cmp = tri_compare(*lh._token, rh.token());
if (token_cmp) {
return token_cmp;
@@ -334,7 +330,7 @@ int ring_position_comparator::operator()(ring_position_view lh, sstables::decora
return lh._weight;
}
int ring_position_comparator::operator()(sstables::decorated_key_view a, ring_position_view b) const {
int ring_position_comparator_for_sstables::operator()(sstables::decorated_key_view a, ring_position_view b) const {
return -(*this)(b, a);
}

View File

@@ -330,6 +330,7 @@ public:
class ring_position_view {
friend int ring_position_tri_compare(const schema& s, ring_position_view lh, ring_position_view rh);
friend class ring_position_comparator;
friend class ring_position_comparator_for_sstables;
friend class ring_position_ext;
// Order is lexicographical on (_token, _key) tuples, where _key part may be missing, and
@@ -566,11 +567,40 @@ public:
int ring_position_tri_compare(const schema& s, ring_position_view lh, ring_position_view rh);
template <typename T>
requires std::is_convertible<T, ring_position_view>::value
ring_position_view ring_position_view_to_compare(const T& val) {
return val;
}
// Trichotomic comparator for ring order
struct ring_position_comparator {
const schema& s;
ring_position_comparator(const schema& s_) : s(s_) {}
int operator()(ring_position_view, ring_position_view) const;
int operator()(ring_position_view lh, ring_position_view rh) const {
return ring_position_tri_compare(s, lh, rh);
}
template <typename T>
int operator()(const T& lh, ring_position_view rh) const {
return ring_position_tri_compare(s, ring_position_view_to_compare(lh), rh);
}
template <typename T>
int operator()(ring_position_view lh, const T& rh) const {
return ring_position_tri_compare(s, lh, ring_position_view_to_compare(rh));
}
template <typename T1, typename T2>
int operator()(const T1& lh, const T2& rh) const {
return ring_position_tri_compare(s, ring_position_view_to_compare(lh), ring_position_view_to_compare(rh));
}
};
struct ring_position_comparator_for_sstables {
const schema& s;
ring_position_comparator_for_sstables(const schema& s_) : s(s_) {}
int operator()(ring_position_view, sstables::decorated_key_view) const;
int operator()(sstables::decorated_key_view, ring_position_view) const;
};

View File

@@ -59,13 +59,7 @@ int tri_compare(const token& t1, const token& t2) {
} else if (t1._kind > t2._kind) {
return 1;
} else if (t1._kind == token_kind::key) {
auto l1 = long_token(t1);
auto l2 = long_token(t2);
if (l1 == l2) {
return 0;
} else {
return l1 < l2 ? -1 : 1;
}
return tri_compare_raw(long_token(t1), long_token(t2));
}
return 0;
}

View File

@@ -160,6 +160,47 @@ public:
return 0; // hardcoded for now; unlikely to change
}
int64_t raw() const noexcept {
if (is_minimum()) {
return std::numeric_limits<int64_t>::min();
}
if (is_maximum()) {
return std::numeric_limits<int64_t>::max();
}
return _data;
}
};
static inline int tri_compare_raw(const int64_t l1, const int64_t l2) noexcept {
if (l1 == l2) {
return 0;
} else {
return l1 < l2 ? -1 : 1;
}
}
template <typename T>
concept TokenCarrier = requires (const T& v) {
{ v.token() } -> std::same_as<const token&>;
};
struct raw_token_less_comparator {
bool operator()(const int64_t k1, const int64_t k2) const noexcept {
return dht::tri_compare_raw(k1, k2) < 0;
}
template <typename Key>
requires TokenCarrier<Key>
bool operator()(const Key& k1, const int64_t k2) const noexcept {
return dht::tri_compare_raw(k1.token().raw(), k2) < 0;
}
template <typename Key>
requires TokenCarrier<Key>
bool operator()(const int64_t k1, const Key& k2) const noexcept {
return dht::tri_compare_raw(k1, k2.token().raw()) < 0;
}
};
const token& minimum_token() noexcept;

View File

@@ -117,7 +117,7 @@ memtable::memtable(schema_ptr schema, dirty_memory_manager& dmm, table_stats& ta
, _cleaner(*this, no_cache_tracker, table_stats.memtable_app_stats, compaction_scheduling_group)
, _memtable_list(memtable_list)
, _schema(std::move(schema))
, partitions(memtable_entry::compare(_schema))
, partitions(dht::raw_token_less_comparator{})
, _table_stats(table_stats) {
}
@@ -137,12 +137,16 @@ uint64_t memtable::dirty_size() const {
return occupancy().total_space();
}
void memtable::evict_entry(memtable_entry& e, mutation_cleaner& cleaner) noexcept {
e.partition().evict(cleaner);
nr_partitions--;
}
void memtable::clear() noexcept {
auto dirty_before = dirty_size();
with_allocator(allocator(), [this] {
partitions.clear_and_dispose([this] (memtable_entry* e) {
e->partition().evict(_cleaner);
current_deleter<memtable_entry>()(e);
partitions.clear_and_dispose([this] (memtable_entry* e) noexcept {
evict_entry(*e, _cleaner);
});
});
remove_flushed_memory(dirty_before - dirty_size());
@@ -154,6 +158,7 @@ future<> memtable::clear_gently() noexcept {
auto& alloc = allocator();
auto p = std::move(partitions);
nr_partitions = 0;
while (!p.empty()) {
auto dirty_before = dirty_size();
with_allocator(alloc, [&] () noexcept {
@@ -161,9 +166,7 @@ future<> memtable::clear_gently() noexcept {
if (p.begin()->clear_gently() == stop_iteration::no) {
break;
}
p.erase_and_dispose(p.begin(), [&] (auto e) {
alloc.destroy(e);
});
p.begin().erase(dht::raw_token_less_comparator{});
if (need_preempt()) {
break;
}
@@ -172,6 +175,13 @@ future<> memtable::clear_gently() noexcept {
remove_flushed_memory(dirty_before - dirty_size());
seastar::thread::yield();
}
/*
* The collection is not guaranteed to free everything
* with the last erase. If anything gets freed in destructor,
* it will be unaccounted from wrong allocator, so handle it
*/
with_allocator(alloc, [&p] { p.clear(); });
});
auto f = t->join();
return f.then([t = std::move(t)] {});
@@ -205,12 +215,17 @@ memtable::find_or_create_partition(const dht::decorated_key& key) {
assert(!reclaiming_enabled());
// call lower_bound so we have a hint for the insert, just in case.
auto i = partitions.lower_bound(key, memtable_entry::compare(_schema));
if (i == partitions.end() || !key.equal(*_schema, i->key())) {
memtable_entry* entry = current_allocator().construct<memtable_entry>(
_schema, dht::decorated_key(key), mutation_partition(_schema));
partitions.insert_before(i, *entry);
partitions_type::bound_hint hint;
auto i = partitions.lower_bound(key, dht::ring_position_comparator(*_schema), hint);
if (i == partitions.end() || !hint.match) {
partitions_type::iterator entry = partitions.emplace_before(i,
key.token().raw(), hint,
_schema, dht::decorated_key(key), mutation_partition(_schema));
++nr_partitions;
++_table_stats.memtable_partition_insertions;
if (!hint.emplace_keeps_iterators()) {
current_allocator().invalidate_references();
}
return entry->partition();
} else {
++_table_stats.memtable_partition_hits;
@@ -223,14 +238,14 @@ boost::iterator_range<memtable::partitions_type::const_iterator>
memtable::slice(const dht::partition_range& range) const {
if (query::is_single_partition(range)) {
const query::ring_position& pos = range.start()->value();
auto i = partitions.find(pos, memtable_entry::compare(_schema));
auto i = partitions.find(pos, dht::ring_position_comparator(*_schema));
if (i != partitions.end()) {
return boost::make_iterator_range(i, std::next(i));
} else {
return boost::make_iterator_range(i, i);
}
} else {
auto cmp = memtable_entry::compare(_schema);
auto cmp = dht::ring_position_comparator(*_schema);
auto i1 = range.start()
? (range.start()->is_inclusive()
@@ -259,7 +274,7 @@ class iterator_reader {
size_t _last_partition_count = 0;
memtable::partitions_type::iterator lookup_end() {
auto cmp = memtable_entry::compare(_memtable->_schema);
auto cmp = dht::ring_position_comparator(*_memtable->_schema);
return _range->end()
? (_range->end()->is_inclusive()
? _memtable->partitions.upper_bound(_range->end()->value(), cmp)
@@ -269,7 +284,7 @@ class iterator_reader {
void update_iterators() {
// We must be prepared that iterators may get invalidated during compaction.
auto current_reclaim_counter = _memtable->reclaim_counter();
auto cmp = memtable_entry::compare(_memtable->_schema);
auto cmp = dht::ring_position_comparator(*_memtable->_schema);
if (_last) {
if (current_reclaim_counter != _last_reclaim_counter ||
_last_partition_count != _memtable->partition_count()) {
@@ -652,7 +667,7 @@ memtable::make_flat_reader(schema_ptr s,
const query::ring_position& pos = range.start()->value();
auto snp = _read_section(*this, [&] () -> partition_snapshot_ptr {
managed_bytes::linearization_context_guard lcg;
auto i = partitions.find(pos, memtable_entry::compare(_schema));
auto i = partitions.find(pos, dht::ring_position_comparator(*_schema));
if (i != partitions.end()) {
upgrade_entry(*i);
return i->snapshot(*this);
@@ -759,20 +774,12 @@ mutation_source memtable::as_data_source() {
});
}
size_t memtable::partition_count() const {
return partitions.size();
}
memtable_entry::memtable_entry(memtable_entry&& o) noexcept
: _link()
, _schema(std::move(o._schema))
: _schema(std::move(o._schema))
, _key(std::move(o._key))
, _pe(std::move(o._pe))
{
using container_type = memtable::partitions_type;
container_type::node_algorithms::replace_node(o._link.this_ptr(), _link.this_ptr());
container_type::node_algorithms::init(o._link.this_ptr());
}
, _flags(o._flags)
{ }
stop_iteration memtable_entry::clear_gently() noexcept {
return _pe.clear_gently(no_cache_tracker);
@@ -808,6 +815,10 @@ void memtable::set_schema(schema_ptr new_schema) noexcept {
_schema = std::move(new_schema);
}
size_t memtable_entry::object_memory_size(allocation_strategy& allocator) {
return memtable::partitions_type::estimated_object_memory_size_in_allocator(allocator, this);
}
std::ostream& operator<<(std::ostream& out, memtable& mt) {
logalloc::reclaim_lock rl(mt);
return out << "{memtable: [" << ::join(",\n", mt.partitions) << "]}";

View File

@@ -32,11 +32,11 @@
#include "db/commitlog/replay_position.hh"
#include "db/commitlog/rp_set.hh"
#include "utils/extremum_tracking.hh"
#include "utils/logalloc.hh"
#include "partition_version.hh"
#include "flat_mutation_reader.hh"
#include "mutation_cleaner.hh"
#include "sstables/types.hh"
#include "utils/double-decker.hh"
class frozen_mutation;
@@ -44,11 +44,22 @@ class frozen_mutation;
namespace bi = boost::intrusive;
class memtable_entry {
bi::set_member_hook<> _link;
schema_ptr _schema;
dht::decorated_key _key;
partition_entry _pe;
struct {
bool _head : 1;
bool _tail : 1;
bool _train : 1;
} _flags{};
public:
bool is_head() const noexcept { return _flags._head; }
void set_head(bool v) noexcept { _flags._head = v; }
bool is_tail() const noexcept { return _flags._tail; }
void set_tail(bool v) noexcept { _flags._tail = v; }
bool with_train() const noexcept { return _flags._train; }
void set_train(bool v) noexcept { _flags._train = v; }
friend class memtable;
memtable_entry(schema_ptr s, dht::decorated_key key, mutation_partition p)
@@ -77,8 +88,10 @@ public:
return _key.key().external_memory_usage();
}
size_t object_memory_size(allocation_strategy& allocator);
size_t size_in_allocator_without_rows(allocation_strategy& allocator) {
return allocator.object_memory_size_in_allocator(this) + external_memory_usage_without_rows();
return object_memory_size(allocator) + external_memory_usage_without_rows();
}
size_t size_in_allocator(allocation_strategy& allocator) {
@@ -89,34 +102,7 @@ public:
return size;
}
struct compare {
dht::decorated_key::less_comparator _c;
compare(schema_ptr s)
: _c(std::move(s))
{}
bool operator()(const dht::decorated_key& k1, const memtable_entry& k2) const {
return _c(k1, k2._key);
}
bool operator()(const memtable_entry& k1, const memtable_entry& k2) const {
return _c(k1._key, k2._key);
}
bool operator()(const memtable_entry& k1, const dht::decorated_key& k2) const {
return _c(k1._key, k2);
}
bool operator()(const memtable_entry& k1, const dht::ring_position& k2) const {
return _c(k1._key, k2);
}
bool operator()(const dht::ring_position& k1, const memtable_entry& k2) const {
return _c(k1, k2._key);
}
};
friend dht::ring_position_view ring_position_view_to_compare(const memtable_entry& mt) { return mt._key; }
friend std::ostream& operator<<(std::ostream&, const memtable_entry&);
};
@@ -126,9 +112,9 @@ struct table_stats;
// Managed by lw_shared_ptr<>.
class memtable final : public enable_lw_shared_from_this<memtable>, private logalloc::region {
public:
using partitions_type = bi::set<memtable_entry,
bi::member_hook<memtable_entry, bi::set_member_hook<>, &memtable_entry::_link>,
bi::compare<memtable_entry::compare>>;
using partitions_type = double_decker<int64_t, memtable_entry,
dht::raw_token_less_comparator, dht::ring_position_comparator,
16, bplus::key_search::linear>;
private:
dirty_memory_manager& _dirty_mgr;
mutation_cleaner _cleaner;
@@ -137,6 +123,7 @@ private:
logalloc::allocating_section _read_section;
logalloc::allocating_section _allocating_section;
partitions_type partitions;
size_t nr_partitions = 0;
db::replay_position _replay_position;
db::rp_set _rp_set;
// mutation source to which reads fall-back after mark_flushed()
@@ -203,6 +190,7 @@ public:
void apply(const mutation& m, db::rp_handle&& = {});
// The mutation is upgraded to current schema.
void apply(const frozen_mutation& m, const schema_ptr& m_schema, db::rp_handle&& = {});
void evict_entry(memtable_entry& e, mutation_cleaner& cleaner) noexcept;
static memtable& from_region(logalloc::region& r) {
return static_cast<memtable&>(r);
@@ -236,7 +224,7 @@ public:
return _memtable_list;
}
size_t partition_count() const;
size_t partition_count() const { return nr_partitions; }
logalloc::occupancy_stats occupancy() const;
// Creates a reader of data in this memtable for given partition range.

View File

@@ -283,13 +283,13 @@ public:
// Can be called on invalid cursor, in which case it brings it back to validity.
// Strong exception guarantees.
bool advance_to(dht::ring_position_view pos) {
auto cmp = cache_entry::compare(_cache.get()._schema);
if (cmp(_end_pos, pos)) { // next() may have moved _start_pos past the _end_pos.
dht::ring_position_comparator cmp(*_cache.get()._schema);
if (cmp(_end_pos, pos) < 0) { // next() may have moved _start_pos past the _end_pos.
_end_pos = pos;
}
_end = _cache.get()._partitions.lower_bound(_end_pos, cmp);
_it = _cache.get()._partitions.lower_bound(pos, cmp);
auto same = !cmp(pos, _it->position());
auto same = cmp(pos, _it->position()) >= 0;
set_position(*_it);
_last_reclaim_count = _cache.get().get_cache_tracker().allocator().invalidate_counter();
return same;
@@ -375,13 +375,14 @@ private:
_cache._read_section(_cache._tracker.region(), [this] {
with_allocator(_cache._tracker.allocator(), [this] {
dht::decorated_key dk = _read_context->range().start()->value().as_decorated_key();
_cache.do_find_or_create_entry(dk, nullptr, [&] (auto i) {
_cache.do_find_or_create_entry(dk, nullptr, [&] (auto i, const row_cache::partitions_type::bound_hint& hint) {
mutation_partition mp(_cache._schema);
cache_entry* entry = current_allocator().construct<cache_entry>(
bool cont = i->continuous();
row_cache::partitions_type::iterator entry = _cache._partitions.emplace_before(i, dk.token().raw(), hint,
_cache._schema, std::move(dk), std::move(mp));
_cache._tracker.insert(*entry);
entry->set_continuous(i->continuous());
return _cache._partitions.insert_before(i, *entry);
entry->set_continuous(cont);
return entry;
}, [&] (auto i) {
_cache._tracker.on_miss_already_populated();
});
@@ -496,7 +497,7 @@ private:
return;
}
if (!_reader.range().end() || !_reader.range().end()->is_inclusive()) {
cache_entry::compare cmp(_cache._schema);
dht::ring_position_comparator cmp(*_cache._schema);
auto it = _reader.range().end() ? _cache._partitions.find(_reader.range().end()->value(), cmp)
: std::prev(_cache._partitions.end());
if (it != _cache._partitions.end()) {
@@ -754,10 +755,10 @@ row_cache::make_reader(schema_ptr s,
if (!ctx->is_range_query() && !fwd_mr) {
auto mr = _read_section(_tracker.region(), [&] {
return with_linearized_managed_bytes([&] {
cache_entry::compare cmp(_schema);
dht::ring_position_comparator cmp(*_schema);
auto&& pos = ctx->range().start()->value();
auto i = _partitions.lower_bound(pos, cmp);
if (i != _partitions.end() && !cmp(pos, i->position())) {
if (i != _partitions.end() && cmp(pos, i->position()) >= 0) {
cache_entry& e = *i;
upgrade_entry(e);
on_partition_hit();
@@ -789,22 +790,20 @@ row_cache::make_reader(schema_ptr s,
row_cache::~row_cache() {
with_allocator(_tracker.allocator(), [this] {
_partitions.clear_and_dispose([this, deleter = current_deleter<cache_entry>()] (auto&& p) mutable {
_partitions.clear_and_dispose([this] (cache_entry* p) mutable noexcept {
if (!p->is_dummy_entry()) {
_tracker.on_partition_erase();
}
p->evict(_tracker);
deleter(p);
});
});
}
void row_cache::clear_now() noexcept {
with_allocator(_tracker.allocator(), [this] {
auto it = _partitions.erase_and_dispose(_partitions.begin(), partitions_end(), [this, deleter = current_deleter<cache_entry>()] (auto&& p) mutable {
auto it = _partitions.erase_and_dispose(_partitions.begin(), partitions_end(), [this] (cache_entry* p) noexcept {
_tracker.on_partition_erase();
p->evict(_tracker);
deleter(p);
});
_tracker.clear_continuity(*it);
});
@@ -820,9 +819,11 @@ cache_entry& row_cache::do_find_or_create_entry(const dht::decorated_key& key,
{
return with_allocator(_tracker.allocator(), [&] () -> cache_entry& {
return with_linearized_managed_bytes([&] () -> cache_entry& {
auto i = _partitions.lower_bound(key, cache_entry::compare(_schema));
if (i == _partitions.end() || !i->key().equal(*_schema, key)) {
i = create_entry(i);
partitions_type::bound_hint hint;
dht::ring_position_comparator cmp(*_schema);
auto i = _partitions.lower_bound(key, cmp, hint);
if (i == _partitions.end() || !hint.match) {
i = create_entry(i, hint);
} else {
visit_entry(i);
}
@@ -845,10 +846,11 @@ cache_entry& row_cache::do_find_or_create_entry(const dht::decorated_key& key,
}
cache_entry& row_cache::find_or_create(const dht::decorated_key& key, tombstone t, row_cache::phase_type phase, const previous_entry_pointer* previous) {
return do_find_or_create_entry(key, previous, [&] (auto i) { // create
auto entry = current_allocator().construct<cache_entry>(cache_entry::incomplete_tag{}, _schema, key, t);
return do_find_or_create_entry(key, previous, [&] (auto i, const partitions_type::bound_hint& hint) { // create
partitions_type::iterator entry = _partitions.emplace_before(i, key.token().raw(), hint,
cache_entry::incomplete_tag{}, _schema, key, t);
_tracker.insert(*entry);
return _partitions.insert_before(i, *entry);
return entry;
}, [&] (auto i) { // visit
_tracker.on_miss_already_populated();
cache_entry& e = *i;
@@ -859,14 +861,13 @@ cache_entry& row_cache::find_or_create(const dht::decorated_key& key, tombstone
void row_cache::populate(const mutation& m, const previous_entry_pointer* previous) {
_populate_section(_tracker.region(), [&] {
do_find_or_create_entry(m.decorated_key(), previous, [&] (auto i) {
cache_entry* entry = current_allocator().construct<cache_entry>(
do_find_or_create_entry(m.decorated_key(), previous, [&] (auto i, const partitions_type::bound_hint& hint) {
partitions_type::iterator entry = _partitions.emplace_before(i, m.decorated_key().token().raw(), hint,
m.schema(), m.decorated_key(), m.partition());
_tracker.insert(*entry);
entry->set_continuous(i->continuous());
i = _partitions.insert_before(i, *entry);
upgrade_entry(*i);
return i;
upgrade_entry(*entry);
return entry;
}, [&] (auto i) {
throw std::runtime_error(format("cache already contains entry for {}", m.key()));
});
@@ -898,15 +899,14 @@ void row_cache::invalidate_sync(memtable& m) noexcept {
bool blow_cache = false;
// Note: clear_and_dispose() ought not to look up any keys, so it doesn't require
// with_linearized_managed_bytes(), but invalidate() does.
m.partitions.clear_and_dispose([this, deleter = current_deleter<memtable_entry>(), &blow_cache] (memtable_entry* entry) {
m.partitions.clear_and_dispose([this, &m, &blow_cache] (memtable_entry* entry) noexcept {
with_linearized_managed_bytes([&] () noexcept {
try {
invalidate_locked(entry->key());
} catch (...) {
blow_cache = true;
}
entry->partition().evict(_tracker.memtable_cleaner());
deleter(entry);
m.evict_entry(*entry, _tracker.memtable_cleaner());
});
});
if (blow_cache) {
@@ -950,7 +950,7 @@ future<> row_cache::do_update(external_updater eu, memtable& m, Updater updater)
partition_presence_checker is_present = _prev_snapshot->make_partition_presence_checker();
while (!m.partitions.empty()) {
with_allocator(_tracker.allocator(), [&] () {
auto cmp = cache_entry::compare(_schema);
auto cmp = dht::ring_position_comparator(*_schema);
{
size_t partition_count = 0;
{
@@ -966,8 +966,9 @@ future<> row_cache::do_update(external_updater eu, memtable& m, Updater updater)
with_linearized_managed_bytes([&] {
memtable_entry& mem_e = *m.partitions.begin();
size_entry = mem_e.size_in_allocator_without_rows(_tracker.allocator());
auto cache_i = _partitions.lower_bound(mem_e.key(), cmp);
update = updater(_update_section, cache_i, mem_e, is_present, real_dirty_acc);
partitions_type::bound_hint hint;
auto cache_i = _partitions.lower_bound(mem_e.key(), cmp, hint);
update = updater(_update_section, cache_i, mem_e, is_present, real_dirty_acc, hint);
});
});
}
@@ -982,10 +983,9 @@ future<> row_cache::do_update(external_updater eu, memtable& m, Updater updater)
_update_section(_tracker.region(), [&] {
with_linearized_managed_bytes([&] {
auto i = m.partitions.begin();
memtable_entry& mem_e = *i;
m.partitions.erase(i);
mem_e.partition().evict(_tracker.memtable_cleaner());
current_allocator().destroy(&mem_e);
i.erase_and_dispose(dht::raw_token_less_comparator{}, [&] (memtable_entry* e) noexcept {
m.evict_entry(*e, _tracker.memtable_cleaner());
});
});
});
++partition_count;
@@ -1015,11 +1015,11 @@ future<> row_cache::do_update(external_updater eu, memtable& m, Updater updater)
future<> row_cache::update(external_updater eu, memtable& m) {
return do_update(std::move(eu), m, [this] (logalloc::allocating_section& alloc,
row_cache::partitions_type::iterator cache_i, memtable_entry& mem_e, partition_presence_checker& is_present,
real_dirty_memory_accounter& acc) mutable {
real_dirty_memory_accounter& acc, const partitions_type::bound_hint& hint) mutable {
// If cache doesn't contain the entry we cannot insert it because the mutation may be incomplete.
// FIXME: keep a bitmap indicating which sstables we do cover, so we don't have to
// search it.
if (cache_i != partitions_end() && cache_i->key().equal(*_schema, mem_e.key())) {
if (cache_i != partitions_end() && hint.match) {
cache_entry& entry = *cache_i;
upgrade_entry(entry);
assert(entry._schema == _schema);
@@ -1031,12 +1031,11 @@ future<> row_cache::update(external_updater eu, memtable& m) {
|| with_allocator(standard_allocator(), [&] { return is_present(mem_e.key()); })
== partition_presence_checker_result::definitely_doesnt_exist) {
// Partition is absent in underlying. First, insert a neutral partition entry.
cache_entry* entry = current_allocator().construct<cache_entry>(cache_entry::evictable_tag(),
_schema, dht::decorated_key(mem_e.key()),
partitions_type::iterator entry = _partitions.emplace_before(cache_i, mem_e.key().token().raw(), hint,
cache_entry::evictable_tag(), _schema, dht::decorated_key(mem_e.key()),
partition_entry::make_evictable(*_schema, mutation_partition(_schema)));
entry->set_continuous(cache_i->continuous());
_tracker.insert(*entry);
_partitions.insert_before(cache_i, *entry);
mem_e.upgrade_schema(_schema, _tracker.memtable_cleaner());
return entry->partition().apply_to_incomplete(*_schema, std::move(mem_e.partition()), _tracker.memtable_cleaner(),
alloc, _tracker.region(), _tracker, _underlying_phase, acc);
@@ -1049,7 +1048,7 @@ future<> row_cache::update(external_updater eu, memtable& m) {
future<> row_cache::update_invalidating(external_updater eu, memtable& m) {
return do_update(std::move(eu), m, [this] (logalloc::allocating_section& alloc,
row_cache::partitions_type::iterator cache_i, memtable_entry& mem_e, partition_presence_checker& is_present,
real_dirty_memory_accounter& acc)
real_dirty_memory_accounter& acc, const partitions_type::bound_hint&)
{
if (cache_i != partitions_end() && cache_i->key().equal(*_schema, mem_e.key())) {
// FIXME: Invalidate only affected row ranges.
@@ -1072,7 +1071,7 @@ void row_cache::refresh_snapshot() {
void row_cache::touch(const dht::decorated_key& dk) {
_read_section(_tracker.region(), [&] {
with_linearized_managed_bytes([&] {
auto i = _partitions.find(dk, cache_entry::compare(_schema));
auto i = _partitions.find(dk, dht::ring_position_comparator(*_schema));
if (i != _partitions.end()) {
for (partition_version& pv : i->partition().versions_from_oldest()) {
for (rows_entry& row : pv.partition().clustered_rows()) {
@@ -1087,7 +1086,7 @@ void row_cache::touch(const dht::decorated_key& dk) {
void row_cache::unlink_from_lru(const dht::decorated_key& dk) {
_read_section(_tracker.region(), [&] {
with_linearized_managed_bytes([&] {
auto i = _partitions.find(dk, cache_entry::compare(_schema));
auto i = _partitions.find(dk, dht::ring_position_comparator(*_schema));
if (i != _partitions.end()) {
for (partition_version& pv : i->partition().versions_from_oldest()) {
for (rows_entry& row : pv.partition().clustered_rows()) {
@@ -1100,15 +1099,14 @@ void row_cache::unlink_from_lru(const dht::decorated_key& dk) {
}
void row_cache::invalidate_locked(const dht::decorated_key& dk) {
auto pos = _partitions.lower_bound(dk, cache_entry::compare(_schema));
auto pos = _partitions.lower_bound(dk, dht::ring_position_comparator(*_schema));
if (pos == partitions_end() || !pos->key().equal(*_schema, dk)) {
_tracker.clear_continuity(*pos);
} else {
auto it = _partitions.erase_and_dispose(pos,
[this, &dk, deleter = current_deleter<cache_entry>()](auto&& p) mutable {
auto it = pos.erase_and_dispose(dht::raw_token_less_comparator{},
[this](cache_entry* p) mutable noexcept {
_tracker.on_partition_erase();
p->evict(_tracker);
deleter(p);
});
_tracker.clear_continuity(*it);
}
@@ -1138,17 +1136,16 @@ future<> row_cache::invalidate(external_updater eu, dht::partition_range_vector&
while (true) {
auto done = _update_section(_tracker.region(), [&] {
return with_linearized_managed_bytes([&] {
auto cmp = cache_entry::compare(_schema);
auto cmp = dht::ring_position_comparator(*_schema);
auto it = _partitions.lower_bound(*_prev_snapshot_pos, cmp);
auto end = _partitions.lower_bound(dht::ring_position_view::for_range_end(range), cmp);
return with_allocator(_tracker.allocator(), [&] {
auto deleter = current_deleter<cache_entry>();
while (it != end) {
it = _partitions.erase_and_dispose(it, [&] (cache_entry* p) mutable {
_tracker.on_partition_erase();
p->evict(_tracker);
deleter(p);
});
it = it.erase_and_dispose(dht::raw_token_less_comparator{},
[&] (cache_entry* p) mutable noexcept {
_tracker.on_partition_erase();
p->evict(_tracker);
});
// it != end is necessary for correctness. We cannot set _prev_snapshot_pos to end->position()
// because after resuming something may be inserted before "end" which falls into the next range.
if (need_preempt() && it != end) {
@@ -1185,14 +1182,14 @@ void row_cache::evict() {
row_cache::row_cache(schema_ptr s, snapshot_source src, cache_tracker& tracker, is_continuous cont)
: _tracker(tracker)
, _schema(std::move(s))
, _partitions(cache_entry::compare(_schema))
, _partitions(dht::raw_token_less_comparator{})
, _underlying(src())
, _snapshot_source(std::move(src))
{
with_allocator(_tracker.allocator(), [this, cont] {
cache_entry* entry = current_allocator().construct<cache_entry>(cache_entry::dummy_entry_tag());
_partitions.insert_before(_partitions.end(), *entry);
entry->set_continuous(bool(cont));
cache_entry entry(cache_entry::dummy_entry_tag{});
entry.set_continuous(bool(cont));
_partitions.insert(entry.position().token().raw(), std::move(entry), dht::ring_position_comparator{*_schema});
});
}
@@ -1201,13 +1198,7 @@ cache_entry::cache_entry(cache_entry&& o) noexcept
, _key(std::move(o._key))
, _pe(std::move(o._pe))
, _flags(o._flags)
, _cache_link()
{
{
using container_type = row_cache::partitions_type;
container_type::node_algorithms::replace_node(o._cache_link.this_ptr(), _cache_link.this_ptr());
container_type::node_algorithms::init(o._cache_link.this_ptr());
}
}
cache_entry::~cache_entry() {
@@ -1222,11 +1213,11 @@ void row_cache::set_schema(schema_ptr new_schema) noexcept {
}
void cache_entry::on_evicted(cache_tracker& tracker) noexcept {
auto it = row_cache::partitions_type::s_iterator_to(*this);
row_cache::partitions_type::iterator it(this);
std::next(it)->set_continuous(false);
evict(tracker);
current_deleter<cache_entry>()(this);
tracker.on_partition_eviction();
it.erase(dht::raw_token_less_comparator{});
}
void rows_entry::on_evicted(cache_tracker& tracker) noexcept {

View File

@@ -31,7 +31,6 @@
#include "mutation_reader.hh"
#include "mutation_partition.hh"
#include "utils/logalloc.hh"
#include "utils/phased_barrier.hh"
#include "utils/histogram.hh"
#include "partition_version.hh"
@@ -40,6 +39,7 @@
#include <seastar/core/metrics_registration.hh>
#include "flat_mutation_reader.hh"
#include "mutation_cleaner.hh"
#include "utils/double-decker.hh"
namespace bi = boost::intrusive;
@@ -61,11 +61,6 @@ class lsa_manager;
//
// TODO: Make memtables use this format too.
class cache_entry {
// We need auto_unlink<> option on the _cache_link because when entry is
// evicted from cache via LRU we don't have a reference to the container
// and don't want to store it with each entry.
using cache_link_type = bi::set_member_hook<bi::link_mode<bi::auto_unlink>>;
schema_ptr _schema;
dht::decorated_key _key;
partition_entry _pe;
@@ -73,8 +68,10 @@ class cache_entry {
struct {
bool _continuous : 1;
bool _dummy_entry : 1;
bool _head : 1;
bool _tail : 1;
bool _train : 1;
} _flags{};
cache_link_type _cache_link;
friend class size_calculator;
flat_mutation_reader do_read(row_cache&, cache::read_context& reader);
@@ -82,6 +79,13 @@ public:
friend class row_cache;
friend class cache_tracker;
bool is_head() const noexcept { return _flags._head; }
void set_head(bool v) noexcept { _flags._head = v; }
bool is_tail() const noexcept { return _flags._tail; }
void set_tail(bool v) noexcept { _flags._tail = v; }
bool with_train() const noexcept { return _flags._train; }
void set_train(bool v) noexcept { _flags._train = v; }
struct dummy_entry_tag{};
struct incomplete_tag{};
struct evictable_tag{};
@@ -137,6 +141,9 @@ public:
}
return _key;
}
friend dht::ring_position_view ring_position_view_to_compare(const cache_entry& ce) noexcept { return ce.position(); }
const partition_entry& partition() const noexcept { return _pe; }
partition_entry& partition() { return _pe; }
const schema_ptr& schema() const noexcept { return _schema; }
@@ -148,38 +155,6 @@ public:
bool is_dummy_entry() const noexcept { return _flags._dummy_entry; }
struct compare {
dht::ring_position_less_comparator _c;
compare(schema_ptr s)
: _c(*s)
{}
bool operator()(const dht::decorated_key& k1, const cache_entry& k2) const {
return _c(k1, k2.position());
}
bool operator()(dht::ring_position_view k1, const cache_entry& k2) const {
return _c(k1, k2.position());
}
bool operator()(const cache_entry& k1, const cache_entry& k2) const {
return _c(k1.position(), k2.position());
}
bool operator()(const cache_entry& k1, const dht::decorated_key& k2) const {
return _c(k1.position(), k2);
}
bool operator()(const cache_entry& k1, dht::ring_position_view k2) const {
return _c(k1.position(), k2);
}
bool operator()(dht::ring_position_view k1, dht::ring_position_view k2) const {
return _c(k1, k2);
}
};
friend std::ostream& operator<<(std::ostream&, cache_entry&);
};
@@ -315,10 +290,9 @@ void cache_tracker::insert(partition_entry& pe) noexcept {
class row_cache final {
public:
using phase_type = utils::phased_barrier::phase_type;
using partitions_type = bi::set<cache_entry,
bi::member_hook<cache_entry, cache_entry::cache_link_type, &cache_entry::_cache_link>,
bi::constant_time_size<false>, // we need this to have bi::auto_unlink on hooks
bi::compare<cache_entry::compare>>;
using partitions_type = double_decker<int64_t, cache_entry,
dht::raw_token_less_comparator, dht::ring_position_comparator,
16, bplus::key_search::linear>;
friend class cache::autoupdating_underlying_reader;
friend class single_partition_populating_reader;
friend class cache_entry;

View File

@@ -299,7 +299,7 @@ public:
// Less-comparator for lookups in the partition index.
class index_comparator {
dht::ring_position_comparator _tri_cmp;
dht::ring_position_comparator_for_sstables _tri_cmp;
public:
index_comparator(const schema& s) : _tri_cmp(s) {}

332
test/boost/bptree_test.cc Normal file
View File

@@ -0,0 +1,332 @@
/*
* Copyright (C) 2020 ScyllaDB
*/
/*
* This file is part of Scylla.
*
* Scylla is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Scylla is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
*/
#define BOOST_TEST_MODULE bptree
#include <boost/test/unit_test.hpp>
#include <fmt/core.h>
#include "utils/bptree.hh"
#include "test/unit/tree_test_key.hh"
struct int_compare {
bool operator()(const int& a, const int& b) const noexcept { return a < b; }
};
using namespace bplus;
using test_key = tree_test_key_base;
using test_tree = tree<int, unsigned long, int_compare, 4, key_search::both, with_debug::yes>;
BOOST_AUTO_TEST_CASE(test_ops_empty_tree) {
/* Sanity checks for no nullptr dereferences */
test_tree t(int_compare{});
t.erase(1);
t.find(1);
}
BOOST_AUTO_TEST_CASE(test_double_insert) {
/* No assertions should happen in ~tree */
test_tree t(int_compare{});
auto i = t.emplace(1, 1);
BOOST_REQUIRE(i.second);
i = t.emplace(1, 1);
BOOST_REQUIRE(!i.second);
t.erase(1);
}
BOOST_AUTO_TEST_CASE(test_cookie_find) {
struct int_to_key_compare {
bool operator()(const test_key& a, const int& b) const noexcept { return (int)a < b; }
bool operator()(const int& a, const test_key& b) const noexcept { return a < (int)b; }
bool operator()(const test_key& a, const test_key& b) const noexcept {
test_key_compare cmp;
return cmp(a, b);
}
};
using test_tree = tree<test_key, int, int_to_key_compare, 4, key_search::both, with_debug::yes>;
test_tree t(int_to_key_compare{});
t.emplace(test_key{1}, 132);
auto i = t.find(1);
BOOST_REQUIRE(*i == 132);
}
BOOST_AUTO_TEST_CASE(test_double_erase) {
test_tree t(int_compare{});
t.emplace(1, 1);
t.emplace(2, 2);
auto i = t.erase(1);
BOOST_REQUIRE(*i == 2);
i = t.erase(1);
BOOST_REQUIRE(i == t.end());
i = t.erase(2);
BOOST_REQUIRE(i == t.end());
t.erase(2);
}
BOOST_AUTO_TEST_CASE(test_remove_corner_case) {
/* Sanity check for erasure to be precise */
test_tree t(int_compare{});
t.emplace(1, 1);
t.emplace(2, 123);
t.emplace(3, 3);
t.erase(1);
t.erase(3);
auto f = t.find(2);
BOOST_REQUIRE(*f == 123);
t.erase(2);
}
BOOST_AUTO_TEST_CASE(test_end_iterator) {
/* Check std::prev(end()) */
test_tree t(int_compare{});
t.emplace(1, 123);
auto i = std::prev(t.end());
BOOST_REQUIRE(*i = 123);
t.erase(1);
}
BOOST_AUTO_TEST_CASE(test_next_to_end_iterator) {
/* Same, but with "artificial" end iterator */
test_tree t(int_compare{});
auto i = t.emplace(1, 123).first;
i++;
BOOST_REQUIRE(i == t.end());
i--;
BOOST_REQUIRE(*i = 123);
t.erase(1);
}
BOOST_AUTO_TEST_CASE(test_clear) {
/* Quick check for tree::clear */
test_tree t(int_compare{});
for (int i = 0; i < 32; i++) {
t.emplace(i, i);
}
t.clear();
}
BOOST_AUTO_TEST_CASE(test_post_clear) {
/* Check that tree is work-able after clear */
test_tree t(int_compare{});
t.emplace(1, 1);
t.clear();
t.emplace(2, 2);
t.erase(2);
}
BOOST_AUTO_TEST_CASE(test_iterator_erase) {
/* Check iterator::erase */
test_tree t(int_compare{});
auto it = t.emplace(2, 2);
t.emplace(1, 321);
it.first.erase(int_compare{});
BOOST_REQUIRE(*t.find(1) == 321);
t.erase(1);
}
BOOST_AUTO_TEST_CASE(test_iterator_equal) {
test_tree t(int_compare{});
auto i1 = t.emplace(1, 1);
auto i2 = t.emplace(2, 2);
auto i3 = t.find(1);
BOOST_REQUIRE(i1.first == i3);
BOOST_REQUIRE(i1.first != i2.first);
}
BOOST_AUTO_TEST_CASE(test_lower_bound) {
test_tree t(int_compare{});
t.emplace(1, 11);
t.emplace(3, 13);
bool match;
BOOST_REQUIRE(*t.lower_bound(0, match) == 11 && !match);
BOOST_REQUIRE(*t.lower_bound(1, match) == 11 && match);
BOOST_REQUIRE(*t.lower_bound(2, match) == 13 && !match);
BOOST_REQUIRE(*t.lower_bound(3, match) == 13 && match);
BOOST_REQUIRE(t.lower_bound(4, match) == t.end() && !match);
}
BOOST_AUTO_TEST_CASE(test_upper_bound) {
test_tree t(int_compare{});
t.emplace(1, 11);
t.emplace(3, 13);
BOOST_REQUIRE(*t.upper_bound(0) == 11);
BOOST_REQUIRE(*t.upper_bound(1) == 13);
BOOST_REQUIRE(*t.upper_bound(2) == 13);
BOOST_REQUIRE(t.upper_bound(3) == t.end());
BOOST_REQUIRE(t.upper_bound(4) == t.end());
}
BOOST_AUTO_TEST_CASE(test_insert_iterator_index) {
/* Check insertion iterator ++ and duplicate key */
test_tree t(int_compare{});
t.emplace(1, 10);
t.emplace(3, 13);
auto i = t.emplace(2, 2).first;
i++;
BOOST_REQUIRE(*i == 13);
auto i2 = t.emplace(2, 2); /* 2nd insert finds the previous */
BOOST_REQUIRE(!i2.second);
i2.first++;
BOOST_REQUIRE(*(i2.first) == 13);
}
BOOST_AUTO_TEST_CASE(test_insert_before) {
/* Check iterator::insert_before */
test_tree t(int_compare{});
auto i3 = t.emplace(3, 13).first;
auto i2 = i3.emplace_before(2, int_compare{}, 12);
BOOST_REQUIRE(++i2 == i3);
BOOST_REQUIRE(*i3 == 13);
BOOST_REQUIRE(*--i2 == 12);
BOOST_REQUIRE(*--i3 == 12);
}
BOOST_AUTO_TEST_CASE(test_insert_before_end) {
/* The same but for end() iterator */
test_tree t(int_compare{});
auto i = t.emplace(1, 1).first;
auto i2 = t.end().emplace_before(2, int_compare{}, 12);
BOOST_REQUIRE(++i == i2);
BOOST_REQUIRE(++i2 == t.end());
}
BOOST_AUTO_TEST_CASE(test_insert_before_end_empty) {
/* The same, but for empty tree */
test_tree t(int_compare{});
auto i = t.end().emplace_before(42, int_compare{}, 142);
BOOST_REQUIRE(i == t.begin());
t.erase(42);
}
BOOST_AUTO_TEST_CASE(test_iterators) {
test_tree t(int_compare{});
for (auto i = t.rbegin(); i != t.rend(); i++) {
BOOST_REQUIRE(false);
}
for (auto i = t.begin(); i != t.end(); i++) {
BOOST_REQUIRE(false);
}
t.emplace(1, 7);
t.emplace(2, 9);
{
auto i = t.begin();
BOOST_REQUIRE(*(i++) == 7);
BOOST_REQUIRE(*(i++) == 9);
BOOST_REQUIRE(i == t.end());
}
{
auto i = t.rbegin();
BOOST_REQUIRE(*(i++) == 9);
BOOST_REQUIRE(*(i++) == 7);
BOOST_REQUIRE(i == t.rend());
}
}
/*
* Special test that makes sure "self-iterator" works OK.
* See comment near the bptree::iterator(T* d) constructor
* for details.
*/
class tree_data {
int _key;
int _cookie;
public:
explicit tree_data(int cookie) : _key(-1), _cookie(cookie) {}
tree_data(int key, int cookie) : _key(key), _cookie(cookie) {}
int cookie() const { return _cookie; }
int key() const {
assert(_key != -1);
return _key;
}
};
BOOST_AUTO_TEST_CASE(test_data_self_iterator) {
using test_tree = tree<int, tree_data, int_compare, 4, key_search::both, with_debug::yes>;
test_tree t(int_compare{});
auto i = t.emplace(1, 42);
BOOST_REQUIRE(i.second);
tree_data* d = &(*i.first);
BOOST_REQUIRE(d->cookie() == 42);
test_tree::iterator di(d);
BOOST_REQUIRE(di->cookie() == 42);
di.erase(int_compare{});
BOOST_REQUIRE(t.find(1) == t.end());
}
BOOST_AUTO_TEST_CASE(test_insert_before_nokey) {
using test_tree = tree<int, tree_data, int_compare, 4, key_search::both, with_debug::yes>;
test_tree t(int_compare{});
auto i = t.emplace(2, 52).first;
auto ni = i.emplace_before(int_compare{}, 1, 42);
BOOST_REQUIRE(ni->cookie() == 42);
ni++;
BOOST_REQUIRE(ni == i);
}
BOOST_AUTO_TEST_CASE(test_self_iterator_rover) {
test_tree t(int_compare{});
auto i = t.emplace(2, 42).first;
unsigned long* d = &(*i);
test_tree::iterator di(d);
i = di.emplace_before(1, int_compare{}, 31);
BOOST_REQUIRE(*i == 31);
BOOST_REQUIRE(*(++i) == 42);
BOOST_REQUIRE(++i == t.end());
BOOST_REQUIRE(++di == t.end());
}
BOOST_AUTO_TEST_CASE(test_erase_range) {
/* Quick check for tree::erase(from, to) */
test_tree t(int_compare{});
for (int i = 0; i < 32; i++) {
t.emplace(i, i);
}
auto b = t.find(8);
auto e = t.find(25);
t.erase(b, e);
BOOST_REQUIRE(*t.find(7) == 7);
BOOST_REQUIRE(t.find(8) == t.end());
BOOST_REQUIRE(t.find(24) == t.end());
BOOST_REQUIRE(*t.find(25) == 25);
}

View File

@@ -0,0 +1,397 @@
/*
* Copyright (C) 2020 ScyllaDB
*/
/*
* This file is part of Scylla.
*
* Scylla is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Scylla is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
*/
#define BOOST_TEST_MODULE double_decker
#include <seastar/core/print.hh>
#include <boost/test/unit_test.hpp>
#include <fmt/core.h>
#include <string>
#include "utils/double-decker.hh"
#include "test/lib/random_utils.hh"
class compound_key {
public:
int key;
std::string sub_key;
compound_key(int k, std::string sk) noexcept : key(k), sub_key(sk) {}
compound_key(const compound_key& other) = delete;
compound_key(compound_key&& other) noexcept : key(other.key), sub_key(std::move(other.sub_key)) {}
compound_key& operator=(const compound_key& other) = delete;
compound_key& operator=(compound_key&& other) noexcept {
key = other.key;
sub_key = std::move(other.sub_key);
return *this;
}
std::string format() const {
return seastar::format("{}.{}", key, sub_key);
}
bool operator==(const compound_key& other) const {
return key == other.key && sub_key == other.sub_key;
}
bool operator!=(const compound_key& other) const { return !(*this == other); }
struct compare {
int operator()(const int& a, const int& b) const { return a - b; }
int operator()(const int& a, const compound_key& b) const { return a - b.key; }
int operator()(const compound_key& a, const int& b) const { return a.key - b; }
int operator()(const compound_key& a, const compound_key& b) const {
if (a.key != b.key) {
return this->operator()(a.key, b.key);
} else {
return a.sub_key.compare(b.sub_key);
}
}
};
struct less_compare {
compare cmp;
template <typename A, typename B>
bool operator()(const A& a, const B& b) const noexcept {
return cmp(a, b) < 0;
}
};
};
class test_data {
compound_key _key;
bool _head = false;
bool _tail = false;
bool _train = false;
int *_cookie;
int *_cookie2;
public:
bool is_head() const noexcept { return _head; }
bool is_tail() const noexcept { return _tail; }
bool with_train() const noexcept { return _train; }
void set_head(bool v) noexcept { _head = v; }
void set_tail(bool v) noexcept { _tail = v; }
void set_train(bool v) noexcept { _train = v; }
test_data(int key, std::string sub) : _key(key, sub), _cookie(new int(0)), _cookie2(new int(0)) {}
test_data(const test_data& other) = delete;
test_data(test_data&& other) noexcept : _key(std::move(other._key)),
_head(other._head), _tail(other._tail), _train(other._train),
_cookie(other._cookie), _cookie2(new int(0)) {
other._cookie = nullptr;
}
~test_data() {
if (_cookie != nullptr) {
delete _cookie;
}
delete _cookie2;
}
bool operator==(const compound_key& k) { return _key == k; }
test_data& operator=(const test_data& other) = delete;
test_data& operator=(test_data&& other) = delete;
std::string format() const { return _key.format(); }
struct compare {
compound_key::compare kcmp;
int operator()(const int& a, const int& b) { return kcmp(a, b); }
int operator()(const compound_key& a, const int& b) { return kcmp(a.key, b); }
int operator()(const int& a, const compound_key& b) { return kcmp(a, b.key); }
int operator()(const compound_key& a, const compound_key& b) { return kcmp(a, b); }
int operator()(const compound_key& a, const test_data& b) { return kcmp(a, b._key); }
int operator()(const test_data& a, const compound_key& b) { return kcmp(a._key, b); }
int operator()(const test_data& a, const test_data& b) { return kcmp(a._key, b._key); }
};
};
using collection = double_decker<int, test_data, compound_key::less_compare, test_data::compare, 4,
bplus::key_search::both, bplus::with_debug::yes>;
using oracle = std::set<compound_key, compound_key::less_compare>;
BOOST_AUTO_TEST_CASE(test_lower_bound) {
collection c(compound_key::less_compare{});
test_data::compare cmp;
c.insert(3, test_data(3, "e"), cmp);
c.insert(5, test_data(5, "i"), cmp);
c.insert(5, test_data(5, "o"), cmp);
collection::bound_hint h;
BOOST_REQUIRE(*c.lower_bound(compound_key(2, "a"), cmp, h) == compound_key(3, "e") && !h.key_match);
BOOST_REQUIRE(*c.lower_bound(compound_key(3, "a"), cmp, h) == compound_key(3, "e") && h.key_match && !h.key_tail && !h.match);
BOOST_REQUIRE(*c.lower_bound(compound_key(3, "e"), cmp, h) == compound_key(3, "e") && h.key_match && !h.key_tail && h.match);
BOOST_REQUIRE(*c.lower_bound(compound_key(3, "o"), cmp, h) == compound_key(5, "i") && h.key_match && h.key_tail && !h.match);
BOOST_REQUIRE(*c.lower_bound(compound_key(4, "i"), cmp, h) == compound_key(5, "i") && !h.key_match);
BOOST_REQUIRE(*c.lower_bound(compound_key(5, "a"), cmp, h) == compound_key(5, "i") && h.key_match && !h.key_tail && !h.match);
BOOST_REQUIRE(*c.lower_bound(compound_key(5, "i"), cmp, h) == compound_key(5, "i") && h.key_match && !h.key_tail && h.match);
BOOST_REQUIRE(*c.lower_bound(compound_key(5, "l"), cmp, h) == compound_key(5, "o") && h.key_match && !h.key_tail && !h.match);
BOOST_REQUIRE(*c.lower_bound(compound_key(5, "o"), cmp, h) == compound_key(5, "o") && h.key_match && !h.key_tail && h.match);
BOOST_REQUIRE(c.lower_bound(compound_key(5, "q"), cmp, h) == c.end() && h.key_match && h.key_tail);
BOOST_REQUIRE(c.lower_bound(compound_key(6, "q"), cmp, h) == c.end() && !h.key_match);
c.clear();
}
BOOST_AUTO_TEST_CASE(test_upper_bound) {
collection c(compound_key::less_compare{});
test_data::compare cmp;
c.insert(3, test_data(3, "e"), cmp);
c.insert(5, test_data(5, "i"), cmp);
c.insert(5, test_data(5, "o"), cmp);
BOOST_REQUIRE(*c.upper_bound(compound_key(2, "a"), cmp) == compound_key(3, "e"));
BOOST_REQUIRE(*c.upper_bound(compound_key(3, "a"), cmp) == compound_key(3, "e"));
BOOST_REQUIRE(*c.upper_bound(compound_key(3, "e"), cmp) == compound_key(5, "i"));
BOOST_REQUIRE(*c.upper_bound(compound_key(3, "o"), cmp) == compound_key(5, "i"));
BOOST_REQUIRE(*c.upper_bound(compound_key(4, "i"), cmp) == compound_key(5, "i"));
BOOST_REQUIRE(*c.upper_bound(compound_key(5, "a"), cmp) == compound_key(5, "i"));
BOOST_REQUIRE(*c.upper_bound(compound_key(5, "i"), cmp) == compound_key(5, "o"));
BOOST_REQUIRE(*c.upper_bound(compound_key(5, "l"), cmp) == compound_key(5, "o"));
BOOST_REQUIRE(c.upper_bound(compound_key(5, "o"), cmp) == c.end());
BOOST_REQUIRE(c.upper_bound(compound_key(5, "q"), cmp) == c.end());
BOOST_REQUIRE(c.upper_bound(compound_key(6, "q"), cmp) == c.end());
c.clear();
}
BOOST_AUTO_TEST_CASE(test_self_iterator) {
collection c(compound_key::less_compare{});
test_data::compare cmp;
c.insert(1, std::move(test_data(1, "a")), cmp);
c.insert(1, std::move(test_data(1, "b")), cmp);
c.insert(2, std::move(test_data(2, "c")), cmp);
c.insert(3, std::move(test_data(3, "d")), cmp);
c.insert(3, std::move(test_data(3, "e")), cmp);
auto erase_by_ptr = [&] (int key, std::string sub) {
test_data* d = &*c.find(compound_key(key, sub), cmp);
collection::iterator di(d);
di.erase(compound_key::less_compare{});
};
erase_by_ptr(1, "b");
erase_by_ptr(2, "c");
erase_by_ptr(3, "d");
auto i = c.begin();
BOOST_REQUIRE(*i++ == compound_key(1, "a"));
BOOST_REQUIRE(*i++ == compound_key(3, "e"));
BOOST_REQUIRE(i == c.end());
c.clear();
}
BOOST_AUTO_TEST_CASE(test_end_iterator) {
collection c(compound_key::less_compare{});
test_data::compare cmp;
c.insert(1, std::move(test_data(1, "a")), cmp);
auto i = std::prev(c.end());
BOOST_REQUIRE(*i == compound_key(1, "a"));
c.clear();
}
void validate_sorted(collection& c) {
auto i = c.begin();
if (i == c.end()) {
return;
}
while (1) {
auto cur = i;
i++;
if (i == c.end()) {
break;
}
test_data::compare cmp;
BOOST_REQUIRE(cmp(*cur, *i) < 0);
}
}
void compare_with_set(collection& c, oracle& s) {
test_data::compare cmp;
/* All keys must be findable */
for (auto i = s.begin(); i != s.end(); i++) {
auto j = c.find(*i, cmp);
BOOST_REQUIRE(j != c.end() && *j == *i);
}
/* Both iterators must coinside */
auto i = c.begin();
auto j = s.begin();
while (i != c.end()) {
BOOST_REQUIRE(*i == *j);
i++;
j++;
}
}
BOOST_AUTO_TEST_CASE(test_insert_via_emplace) {
collection c(compound_key::less_compare{});
test_data::compare cmp;
oracle s;
int nr = 0;
while (nr < 4000) {
compound_key k(tests::random::get_int<int>(900), tests::random::get_sstring(4));
collection::bound_hint h;
auto i = c.lower_bound(k, cmp, h);
if (i == c.end() || !h.match) {
auto it = c.emplace_before(i, k.key, h, k.key, k.sub_key);
BOOST_REQUIRE(*it == k);
s.insert(std::move(k));
nr++;
}
}
compare_with_set(c, s);
c.clear();
}
BOOST_AUTO_TEST_CASE(test_insert_and_erase) {
collection c(compound_key::less_compare{});
test_data::compare cmp;
int nr = 0;
while (nr < 500) {
compound_key k(tests::random::get_int<int>(100), tests::random::get_sstring(3));
if (c.find(k, cmp) == c.end()) {
auto it = c.insert(k.key, std::move(test_data(k.key, k.sub_key)), cmp);
BOOST_REQUIRE(*it == k);
nr++;
}
}
validate_sorted(c);
while (nr > 0) {
int n = tests::random::get_int<int>() % nr;
auto i = c.begin();
while (n > 0) {
i++;
n--;
}
i.erase(compound_key::less_compare{});
nr--;
validate_sorted(c);
}
}
BOOST_AUTO_TEST_CASE(test_compaction) {
logalloc::region reg;
with_allocator(reg.allocator(), [&] {
collection c(compound_key::less_compare{});
test_data::compare cmp;
oracle s;
{
logalloc::reclaim_lock rl(reg);
int nr = 0;
while (nr < 1500) {
compound_key k(tests::random::get_int<int>(400), tests::random::get_sstring(3));
if (c.find(k, cmp) == c.end()) {
auto it = c.insert(k.key, std::move(test_data(k.key, k.sub_key)), cmp);
BOOST_REQUIRE(*it == k);
s.insert(std::move(k));
nr++;
}
}
}
reg.full_compaction();
compare_with_set(c, s);
c.clear();
});
}
BOOST_AUTO_TEST_CASE(test_range_erase) {
std::vector<compound_key> keys;
test_data::compare cmp;
keys.emplace_back(1, "a");
keys.emplace_back(1, "b");
keys.emplace_back(1, "c");
keys.emplace_back(1, "d");
keys.emplace_back(2, "a");
keys.emplace_back(2, "b");
keys.emplace_back(2, "c");
keys.emplace_back(2, "d");
keys.emplace_back(2, "e");
keys.emplace_back(3, "a");
keys.emplace_back(3, "b");
keys.emplace_back(3, "c");
for (size_t f = 0; f < keys.size(); f++) {
for (size_t t = f; t <= keys.size(); t++) {
collection c(compound_key::less_compare{});
for (auto&& k : keys) {
c.insert(k.key, std::move(test_data(k.key, k.sub_key)), cmp);
}
auto iter_at = [&c] (size_t at) -> collection::iterator {
auto it = c.begin();
for (size_t i = 0; i < at; i++, it++) ;
return it;
};
auto n = c.erase(iter_at(f), iter_at(t));
auto r = c.begin();
for (size_t i = 0; i < keys.size(); i++) {
if (!(i >= f && i < t)) {
if (i == t) {
BOOST_REQUIRE(*n == keys[i]);
}
BOOST_REQUIRE(*(r++) == keys[i]);
}
}
if (t == keys.size()) {
BOOST_REQUIRE(n == c.end());
}
BOOST_REQUIRE(r == c.end());
}
}
}

View File

@@ -0,0 +1,243 @@
/*
* Copyright (C) 2020 ScyllaDB
*/
/*
* This file is part of Scylla.
*
* Scylla is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Scylla is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
*/
#include <boost/test/unit_test.hpp>
#include <seastar/testing/thread_test_case.hh>
#include <fmt/core.h>
#include "utils/intrusive-array.hh"
#include "utils/logalloc.hh"
class element {
bool _head = false;
bool _tail = false;
bool _train = false;
long _data;
int *_cookie;
int *_cookie2;
public:
explicit element(long val) : _data(val), _cookie(new int(0)), _cookie2(new int(0)) { }
element(const element& other) = delete;
element(element&& other) noexcept : _head(other._head), _tail(other._tail), _train(other._train),
_data(other._data), _cookie(other._cookie), _cookie2(new int(0)) {
other._cookie = nullptr;
}
~element() {
if (_cookie != nullptr) {
delete _cookie;
}
delete _cookie2;
}
bool is_head() const noexcept { return _head; }
void set_head(bool v) noexcept { _head = v; }
bool is_tail() const noexcept { return _tail; }
void set_tail(bool v) noexcept { _tail = v; }
bool with_train() const noexcept { return _train; }
void set_train(bool v) noexcept { _train = v; }
bool operator==(long v) const { return v == _data; }
long operator*() const { return _data; }
bool bound_check(int idx, int size) {
return ((idx == 0) == is_head()) && ((idx == size - 1) == is_tail());
}
};
using test_array = intrusive_array<element>;
static bool size_check(test_array& a, size_t size, unsigned short tlen) {
return a[size - 1].is_tail() && a.size() == size &&
size_for_allocation_strategy(a) == (size + tlen) * sizeof(element) &&
((tlen != 0) == a[0].with_train()) &&
((tlen == 0) || *reinterpret_cast<unsigned short*>(&a[size]) == tlen);
}
void show(const char *pfx, test_array& a, int sz) {
int i;
fmt::print("{}", pfx);
for (i = 0; i < sz; i++) {
fmt::print("{}{}{}", a[i].is_head() ? 'H' : ' ', *a[i], a[i].is_tail() ? 'T' : ' ');
}
if (a[0].with_train()) {
fmt::print(" ~{}", *reinterpret_cast<unsigned short *>(&a[i]));
}
fmt::print("\n");
}
SEASTAR_THREAD_TEST_CASE(test_basic_construct) {
test_array array(12);
for (auto i = array.begin(); i != array.end(); i++) {
BOOST_REQUIRE(*i == 12);
}
}
test_array* grow(test_array& from, size_t nsize, int npos, long ndat) {
BOOST_REQUIRE(from.size() + 1 == nsize);
auto ptr = current_allocator().alloc(&get_standard_migrator<test_array>(), sizeof(element) * nsize, alignof(test_array));
return new (ptr) test_array(from, test_array::grow_tag{npos}, ndat);
}
test_array* shrink(test_array& from, size_t nszie, int spos) {
BOOST_REQUIRE(from.size() - 1 == nszie);
auto ptr = current_allocator().alloc(&get_standard_migrator<test_array>(), sizeof(element) * nszie, alignof(test_array));
return new (ptr) test_array(from, test_array::shrink_tag{spos});
}
void grow_shrink_and_check(test_array& cur, int size, int depth) {
for (int i = 0; i <= size; i++) {
long nel = size + 12;
test_array* narr = grow(cur, size + 1, i, nel);
int idx = 0;
BOOST_REQUIRE(size_check(*narr, size + 1, 0));
for (auto ni = narr->begin(); ni != narr->end(); ni++) {
if (idx == i) {
BOOST_REQUIRE(*ni == nel);
} else if (idx < i) {
BOOST_REQUIRE(*ni == *cur[idx]);
} else {
BOOST_REQUIRE(*ni == *cur[idx - 1]);
}
BOOST_REQUIRE(ni->bound_check(idx, size + 1));
idx++;
}
if (size < depth) {
grow_shrink_and_check(*narr, size + 1, depth);
}
current_allocator().destroy(narr);
}
if (size > 1) {
for (int i = 0; i < size; i++) {
test_array* narr = shrink(cur, size - 1, i);
int idx = 0;
BOOST_REQUIRE(size_check(*narr, size - 1, 0));
for (auto ni = narr->begin(); ni != narr->end(); ni++) {
if (idx == i) {
continue;
} else if (idx < i) {
BOOST_REQUIRE(*ni == *cur[idx]);
} else {
BOOST_REQUIRE(*ni == *cur[idx + 1]);
}
BOOST_REQUIRE(ni->bound_check(idx, size - 1));
idx++;
}
current_allocator().destroy(narr);
}
}
}
SEASTAR_THREAD_TEST_CASE(test_grow_shrink_construct) {
test_array array(12);
grow_shrink_and_check(array, 1, 5);
}
SEASTAR_THREAD_TEST_CASE(test_erase) {
test_array a1(10);
test_array *a2 = grow(a1, 2, 1, 20);
test_array *a3 = grow(*a2, 3, 2, 30);
for (int i = 0; i < 4; i++) {
for (int j = 0; j < 3; j++) {
for (int k = 0; k < 2; k++) {
std::vector<int> x({10, 20, 30, 40});
test_array *a4 = grow(*a3, 4, 3, 40);
auto test_fn = [&] (int idx, int sz) {
a4->erase(idx);
x.erase(x.begin() + idx);
BOOST_REQUIRE(size_check(*a4, sz, 4 - sz));
for (int a = 0; a < sz; a++) {
BOOST_REQUIRE(x[a] == *(*a4)[a]);
}
};
test_fn(i, 3);
test_fn(j, 2);
test_fn(k, 1);
current_allocator().destroy(a4);
}
}
}
current_allocator().destroy(a3);
current_allocator().destroy(a2);
}
SEASTAR_THREAD_TEST_CASE(test_lower_bound) {
test_array a1(12);
struct compare {
int operator()(const element& a, const element& b) const { return *a - *b; }
};
test_array *a2 = grow(a1, 2, 1, 14);
auto i = a2->lower_bound(element(13), compare{});
BOOST_REQUIRE(*i == 14 && a2->index_of(i) == 1);
test_array *a3 = grow(*a2, 3, 2, 17);
bool match;
BOOST_REQUIRE(*a3->lower_bound(element(11), compare{}, match) == 12 && !match);
BOOST_REQUIRE(*a3->lower_bound(element(12), compare{}, match) == 12 && match);
BOOST_REQUIRE(*a3->lower_bound(element(13), compare{}, match) == 14 && !match);
BOOST_REQUIRE(*a3->lower_bound(element(14), compare{}, match) == 14 && match);
BOOST_REQUIRE(*a3->lower_bound(element(15), compare{}, match) == 17 && !match);
BOOST_REQUIRE(*a3->lower_bound(element(16), compare{}, match) == 17 && !match);
BOOST_REQUIRE(*a3->lower_bound(element(17), compare{}, match) == 17 && match);
BOOST_REQUIRE(a3->lower_bound(element(18), compare{}, match) == a3->end());
current_allocator().destroy(a3);
current_allocator().destroy(a2);
}
SEASTAR_THREAD_TEST_CASE(test_from_element) {
test_array a1(12);
test_array *a2 = grow(a1, 2, 1, 14);
test_array *a3 = grow(*a2, 3, 2, 17);
element* i = &((*a3)[2]);
BOOST_REQUIRE(*i == 17);
int idx;
test_array& x = test_array::from_element(i, idx);
BOOST_REQUIRE(&x == a3 && idx == 2);
current_allocator().destroy(a3);
current_allocator().destroy(a2);
}

View File

@@ -57,11 +57,13 @@ class size_calculator {
public:
static void print_cache_entry_size() {
std::cout << prefix() << "sizeof(cache_entry) = " << sizeof(cache_entry) << "\n";
std::cout << prefix() << "sizeof(memtable_entry) = " << sizeof(memtable_entry) << "\n";
std::cout << prefix() << "sizeof(bptree::node) = " << sizeof(row_cache::partitions_type::outer_tree::node) << "\n";
std::cout << prefix() << "sizeof(bptree::data) = " << sizeof(row_cache::partitions_type::outer_tree::data) << "\n";
{
nest n;
std::cout << prefix() << "sizeof(decorated_key) = " << sizeof(dht::decorated_key) << "\n";
std::cout << prefix() << "sizeof(cache_link_type) = " << sizeof(cache_entry::cache_link_type) << "\n";
print_mutation_partition_size();
}

View File

@@ -24,7 +24,10 @@
#include <seastar/core/print.hh>
#include <seastar/core/future-util.hh>
#include <seastar/core/distributed.hh>
#include <seastar/core/weak_ptr.hh>
#include "seastarx.hh"
#include "utils/extremum_tracking.hh"
#include "utils/estimated_histogram.hh"
#include <chrono>
#include <iosfwd>
@@ -126,3 +129,71 @@ std::vector<double> time_parallel(Func func, unsigned concurrency_per_core, int
}
return results;
}
template<typename Func>
auto duration_in_seconds(Func&& f) {
using clk = std::chrono::steady_clock;
auto start = clk::now();
f();
auto end = clk::now();
return std::chrono::duration_cast<std::chrono::duration<float>>(end - start);
}
class scheduling_latency_measurer : public weakly_referencable<scheduling_latency_measurer> {
using clk = std::chrono::steady_clock;
clk::time_point _last = clk::now();
utils::estimated_histogram _hist{300};
min_max_tracker<clk::duration> _minmax;
bool _stop = false;
private:
void schedule_tick();
void tick() {
auto old = _last;
_last = clk::now();
auto latency = _last - old;
_minmax.update(latency);
_hist.add(latency.count());
if (!_stop) {
schedule_tick();
}
}
public:
void start() {
schedule_tick();
}
void stop() {
_stop = true;
later().get(); // so that the last scheduled tick is counted
}
const utils::estimated_histogram& histogram() const {
return _hist;
}
clk::duration min() const { return _minmax.min(); }
clk::duration max() const { return _minmax.max(); }
};
void scheduling_latency_measurer::schedule_tick() {
seastar::schedule(make_task(default_scheduling_group(), [self = weak_from_this()] () mutable {
if (self) {
self->tick();
}
}));
}
std::ostream& operator<<(std::ostream& out, const scheduling_latency_measurer& slm) {
auto to_ms = [] (int64_t nanos) {
return float(nanos) / 1e6;
};
return out << sprint("{count: %d, "
//"min: %.6f [ms], "
//"50%%: %.6f [ms], "
//"90%%: %.6f [ms], "
"99%%: %.6f [ms], "
"max: %.6f [ms]}",
slm.histogram().count(),
//to_ms(slm.min().count()),
//to_ms(slm.histogram().percentile(0.5)),
//to_ms(slm.histogram().percentile(0.9)),
to_ms(slm.histogram().percentile(0.99)),
to_ms(slm.max().count()));
}

240
test/perf/perf_bptree.cc Normal file
View File

@@ -0,0 +1,240 @@
/*
* Copyright (C) 2020 ScyllaDB
*/
/*
* This file is part of Scylla.
*
* Scylla is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Scylla is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
*/
#include <seastar/core/app-template.hh>
#include <seastar/core/thread.hh>
#include <algorithm>
#include <vector>
#include <random>
#include <fmt/core.h>
#include "perf.hh"
using per_key_t = int64_t;
struct key_compare {
bool operator()(const per_key_t& a, const per_key_t& b) const noexcept { return a < b; }
};
#include "utils/bptree.hh"
using namespace bplus;
using namespace seastar;
constexpr int TEST_NODE_SIZE = 4;
/* On node size 32 (this test) linear search works better */
using test_tree = tree<per_key_t, unsigned long, key_compare, TEST_NODE_SIZE, key_search::linear>;
class collection_tester {
public:
virtual void insert(per_key_t k) = 0;
virtual void lower_bound(per_key_t k) = 0;
virtual void erase(per_key_t k) = 0;
virtual void drain(int batch) = 0;
virtual void show_stats() = 0;
virtual ~collection_tester() {};
};
class bptree_tester : public collection_tester {
test_tree _t;
public:
bptree_tester() : _t(key_compare{}) {}
virtual void insert(per_key_t k) override { _t.emplace(k, 0); }
virtual void lower_bound(per_key_t k) override {
auto i = _t.lower_bound(k);
assert(i != _t.end());
}
virtual void erase(per_key_t k) override { _t.erase(k); }
virtual void drain(int batch) override {
int x = 0;
auto i = _t.begin();
while (i != _t.end()) {
i = i.erase(key_compare{});
if (++x % batch == 0) {
seastar::thread::yield();
}
}
}
virtual void show_stats() {
struct bplus::stats st = _t.get_stats();
fmt::print("nodes: {}\n", st.nodes);
for (int i = 0; i < (int)st.nodes_filled.size(); i++) {
fmt::print(" {}: {} ({}%)\n", i, st.nodes_filled[i], st.nodes_filled[i] * 100 / st.nodes);
}
fmt::print("leaves: {}\n", st.leaves);
for (int i = 0; i < (int)st.leaves_filled.size(); i++) {
fmt::print(" {}: {} ({}%)\n", i, st.leaves_filled[i], st.leaves_filled[i] * 100 / st.leaves);
}
fmt::print("datas: {}\n", st.datas);
}
virtual ~bptree_tester() {
_t.clear();
}
};
class set_tester : public collection_tester {
std::set<per_key_t> _s;
public:
virtual void insert(per_key_t k) override { _s.insert(k); }
virtual void lower_bound(per_key_t k) override {
auto i = _s.lower_bound(k);
assert(i != _s.end());
}
virtual void erase(per_key_t k) override { _s.erase(k); }
virtual void drain(int batch) override {
int x = 0;
auto i = _s.begin();
while (i != _s.end()) {
i = _s.erase(i);
if (++x % batch == 0) {
seastar::thread::yield();
}
}
}
virtual void show_stats() { }
virtual ~set_tester() = default;
};
class map_tester : public collection_tester {
std::map<per_key_t, unsigned long> _m;
public:
virtual void insert(per_key_t k) override { _m[k] = 0; }
virtual void lower_bound(per_key_t k) override {
auto i = _m.lower_bound(k);
assert(i != _m.end());
}
virtual void erase(per_key_t k) override { _m.erase(k); }
virtual void drain(int batch) override {
int x = 0;
auto i = _m.begin();
while (i != _m.end()) {
i = _m.erase(i);
if (++x % batch == 0) {
seastar::thread::yield();
}
}
}
virtual void show_stats() { }
virtual ~map_tester() = default;
};
int main(int argc, char **argv) {
namespace bpo = boost::program_options;
app_template app;
app.add_options()
("count", bpo::value<int>()->default_value(5000000), "number of keys to fill the tree with")
("batch", bpo::value<int>()->default_value(50), "number of operations between deferring points")
("iters", bpo::value<int>()->default_value(1), "number of iterations")
("col", bpo::value<std::string>()->default_value("bptree"), "collection to test")
("test", bpo::value<std::string>()->default_value("erase"), "what to test (erase, drain, find)")
("stats", bpo::value<bool>()->default_value(false), "show stats");
return app.run(argc, argv, [&app] {
auto count = app.configuration()["count"].as<int>();
auto iters = app.configuration()["iters"].as<int>();
auto batch = app.configuration()["batch"].as<int>();
auto col = app.configuration()["col"].as<std::string>();
auto tst = app.configuration()["test"].as<std::string>();
auto stats = app.configuration()["stats"].as<bool>();
return seastar::async([count, iters, batch, col, tst, stats] {
std::unique_ptr<collection_tester> c;
if (col == "bptree") {
c = std::make_unique<bptree_tester>();
} else if (col == "set") {
c = std::make_unique<set_tester>();
} else if (col == "map") {
c = std::make_unique<map_tester>();
} else {
fmt::print("Unknown collection\n");
return;
}
std::vector<per_key_t> keys;
for (per_key_t i = 0; i < count; i++) {
keys.push_back(i + 1);
}
std::random_device rd;
std::mt19937 g(rd());
fmt::print("Inserting {:d} k:v pairs into {} {:d} times\n", count, col, iters);
for (auto rep = 0; rep < iters; rep++) {
std::shuffle(keys.begin(), keys.end(), g);
seastar::thread::yield();
auto d = duration_in_seconds([&] {
for (int i = 0; i < count; i++) {
c->insert(keys[i]);
if ((i + 1) % batch == 0) {
seastar::thread::yield();
}
}
});
fmt::print("fill: {:.6f} ms\n", d.count() * 1000);
if (stats) {
c->show_stats();
}
if (tst == "erase") {
std::shuffle(keys.begin(), keys.end(), g);
seastar::thread::yield();
d = duration_in_seconds([&] {
for (int i = 0; i < count; i++) {
c->erase(keys[i]);
if ((i + 1) % batch == 0) {
seastar::thread::yield();
}
}
});
fmt::print("erase: {:.6f} ms\n", d.count() * 1000);
} else if (tst == "drain") {
d = duration_in_seconds([&] {
c->drain(batch);
});
fmt::print("drain: {:.6f} ms\n", d.count() * 1000);
} else if (tst == "find") {
std::shuffle(keys.begin(), keys.end(), g);
seastar::thread::yield();
d = duration_in_seconds([&] {
for (int i = 0; i < count; i++) {
c->lower_bound(keys[i]);
if ((i + 1) % batch == 0) {
seastar::thread::yield();
}
}
});
fmt::print("find: {:.6f} ms\n", d.count() * 1000);
}
}
});
});
}

View File

@@ -19,16 +19,13 @@
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
*/
#include <chrono>
#include <seastar/core/distributed.hh>
#include <seastar/core/app-template.hh>
#include <seastar/core/sstring.hh>
#include <seastar/core/thread.hh>
#include <seastar/core/weak_ptr.hh>
#include <seastar/core/reactor.hh>
#include "utils/managed_bytes.hh"
#include "utils/extremum_tracking.hh"
#include "utils/logalloc.hh"
#include "row_cache.hh"
#include "log.hh"
@@ -41,74 +38,6 @@ static const int update_iterations = 16;
static const int cell_size = 128;
static bool cancelled = false;
template<typename Func>
auto duration_in_seconds(Func&& f) {
using clk = std::chrono::steady_clock;
auto start = clk::now();
f();
auto end = clk::now();
return std::chrono::duration_cast<std::chrono::duration<float>>(end - start);
}
class scheduling_latency_measurer : public weakly_referencable<scheduling_latency_measurer> {
using clk = std::chrono::steady_clock;
clk::time_point _last = clk::now();
utils::estimated_histogram _hist{300};
min_max_tracker<clk::duration> _minmax;
bool _stop = false;
private:
void schedule_tick();
void tick() {
auto old = _last;
_last = clk::now();
auto latency = _last - old;
_minmax.update(latency);
_hist.add(latency.count());
if (!_stop) {
schedule_tick();
}
}
public:
void start() {
schedule_tick();
}
void stop() {
_stop = true;
later().get(); // so that the last scheduled tick is counted
}
const utils::estimated_histogram& histogram() const {
return _hist;
}
clk::duration min() const { return _minmax.min(); }
clk::duration max() const { return _minmax.max(); }
};
void scheduling_latency_measurer::schedule_tick() {
seastar::schedule(make_task(default_scheduling_group(), [self = weak_from_this()] () mutable {
if (self) {
self->tick();
}
}));
}
std::ostream& operator<<(std::ostream& out, const scheduling_latency_measurer& slm) {
auto to_ms = [] (int64_t nanos) {
return float(nanos) / 1e6;
};
return out << sprint("{count: %d, "
//"min: %.6f [ms], "
//"50%%: %.6f [ms], "
//"90%%: %.6f [ms], "
"99%%: %.6f [ms], "
"max: %.6f [ms]}",
slm.histogram().count(),
//to_ms(slm.min().count()),
//to_ms(slm.histogram().percentile(0.5)),
//to_ms(slm.histogram().percentile(0.9)),
to_ms(slm.histogram().percentile(0.99)),
to_ms(slm.max().count()));
}
template<typename MutationGenerator>
void run_test(const sstring& name, schema_ptr s, MutationGenerator&& gen) {
cache_tracker tracker;

View File

@@ -0,0 +1,207 @@
/*
* Copyright (C) 2020 ScyllaDB
*/
/*
* This file is part of Scylla.
*
* Scylla is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Scylla is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
*/
#include <seastar/core/app-template.hh>
#include <seastar/core/thread.hh>
#include <map>
#include <vector>
#include <random>
#include <string>
#include <iostream>
#include <fmt/core.h>
#include "utils/logalloc.hh"
constexpr int TEST_NODE_SIZE = 7;
#include "tree_test_key.hh"
#include "utils/bptree.hh"
#include "bptree_validation.hh"
using namespace bplus;
using namespace seastar;
using test_key = tree_test_key_base;
class test_data {
int _value;
public:
test_data() : _value(0) {}
test_data(test_key& k) : _value((int)k + 10) {}
operator unsigned long() const { return _value; }
bool match_key(const test_key& k) const { return _value == (int)k + 10; }
};
using test_tree = tree<test_key, test_data, test_key_compare, TEST_NODE_SIZE, key_search::both, with_debug::yes>;
using test_validator = validator<test_key, test_data, test_key_compare, TEST_NODE_SIZE>;
class reference {
reference* _ref = nullptr;
public:
reference() = default;
reference(const reference& other) = delete;
reference(reference&& other) noexcept : _ref(other._ref) {
if (_ref != nullptr) {
_ref->_ref = this;
}
other._ref = nullptr;
}
~reference() {
if (_ref != nullptr) {
_ref->_ref = nullptr;
}
}
void link(reference& other) {
assert(_ref == nullptr);
_ref = &other;
other._ref = this;
}
reference* get() {
assert(_ref != nullptr);
return _ref;
}
};
class tree_pointer {
reference _ref;
class tree_wrapper {
friend class tree_pointer;
test_tree _tree;
reference _ref;
public:
tree_wrapper() : _tree(test_key_compare{}) {}
};
tree_wrapper* get_wrapper() {
return boost::intrusive::get_parent_from_member(_ref.get(), &tree_wrapper::_ref);
}
public:
tree_pointer(const tree_pointer& other) = delete;
tree_pointer(tree_pointer&& other) = delete;
tree_pointer() {
tree_wrapper *t = current_allocator().construct<tree_wrapper>();
_ref.link(t->_ref);
}
test_tree* operator->() {
tree_wrapper *tw = get_wrapper();
return &tw->_tree;
}
test_tree& operator*() {
tree_wrapper *tw = get_wrapper();
return tw->_tree;
}
~tree_pointer() {
tree_wrapper *tw = get_wrapper();
current_allocator().destroy(tw);
}
};
int main(int argc, char **argv) {
namespace bpo = boost::program_options;
app_template app;
app.add_options()
("count", bpo::value<int>()->default_value(10000), "number of keys to fill the tree with")
("iters", bpo::value<int>()->default_value(13), "number of iterations")
("verb", bpo::value<bool>()->default_value(false), "be verbose");
return app.run(argc, argv, [&app] {
auto count = app.configuration()["count"].as<int>();
auto iter = app.configuration()["iters"].as<int>();
auto verb = app.configuration()["verb"].as<bool>();
return seastar::async([count, iter, verb] {
std::vector<int> keys;
for (int i = 0; i < count; i++) {
keys.push_back(i + 1);
}
std::random_device rd;
std::mt19937 g(rd());
fmt::print("Compacting {:d} k:v pairs {:d} times\n", count, iter);
test_validator tv;
logalloc::region mem;
with_allocator(mem.allocator(), [&] {
tree_pointer t;
for (auto rep = 0; rep < iter; rep++) {
{
std::shuffle(keys.begin(), keys.end(), g);
logalloc::reclaim_lock rl(mem);
for (int i = 0; i < count; i++) {
test_key k(keys[i]);
auto ti = t->emplace(std::move(copy_key(k)), k);
assert(ti.second);
seastar::thread::maybe_yield();
}
}
mem.full_compaction();
if (verb) {
fmt::print("After fill + compact\n");
tv.print_tree(*t, '|');
}
tv.validate(*t);
{
std::shuffle(keys.begin(), keys.end(), g);
logalloc::reclaim_lock rl(mem);
for (int i = 0; i < count; i++) {
test_key k(keys[i]);
t->erase(k);
seastar::thread::maybe_yield();
}
}
mem.full_compaction();
if (verb) {
fmt::print("After erase + compact\n");
tv.print_tree(*t, '|');
}
tv.validate(*t);
}
});
});
});
}

View File

@@ -0,0 +1,232 @@
/*
* Copyright (C) 2020 ScyllaDB
*/
/*
* This file is part of Scylla.
*
* Scylla is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Scylla is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
*/
#include <seastar/core/app-template.hh>
#include <seastar/core/thread.hh>
#include <map>
#include <vector>
#include <random>
#include <string>
#include <iostream>
#include <fmt/core.h>
#include <fmt/ostream.h>
constexpr int TEST_NODE_SIZE = 16;
#include "tree_test_key.hh"
#include "utils/bptree.hh"
#include "bptree_validation.hh"
using namespace bplus;
using namespace seastar;
using test_key = tree_test_key_base;
class test_data {
int _value;
public:
test_data() : _value(0) {}
test_data(test_key& k) : _value((int)k + 10) {}
operator unsigned long() const { return _value; }
bool match_key(const test_key& k) const { return _value == (int)k + 10; }
};
std::ostream& operator<<(std::ostream& os, test_data d) {
os << (unsigned long)d;
return os;
}
using test_tree = tree<test_key, test_data, test_key_compare, TEST_NODE_SIZE, key_search::both, with_debug::yes>;
using test_node = typename test_tree::node;
using test_validator = validator<test_key, test_data, test_key_compare, TEST_NODE_SIZE>;
using test_iterator_checker = iterator_checker<test_key, test_data, test_key_compare, TEST_NODE_SIZE>;
int main(int argc, char **argv) {
namespace bpo = boost::program_options;
app_template app;
app.add_options()
("count", bpo::value<int>()->default_value(4132), "number of keys to fill the tree with")
("iters", bpo::value<int>()->default_value(9), "number of iterations")
("keys", bpo::value<std::string>()->default_value("rand"), "how to generate keys (rand, asc, desc)")
("verb", bpo::value<bool>()->default_value(false), "be verbose");
return app.run(argc, argv, [&app] {
auto count = app.configuration()["count"].as<int>();
auto iters = app.configuration()["iters"].as<int>();
auto ks = app.configuration()["keys"].as<std::string>();
auto verb = app.configuration()["verb"].as<bool>();
return seastar::async([count, iters, ks, verb] {
auto t = std::make_unique<test_tree>(test_key_compare{});
std::map<int, unsigned long> oracle;
int p = count / 10;
if (p == 0) {
p = 1;
}
std::vector<int> keys;
for (int i = 0; i < count; i++) {
keys.push_back(i + 1);
}
std::random_device rd;
std::mt19937 g(rd());
fmt::print("Inserting {:d} k:v pairs {:d} times\n", count, iters);
test_validator tv;
if (ks == "desc") {
fmt::print("Reversing keys vector\n");
std::reverse(keys.begin(), keys.end());
}
bool shuffle = ks == "rand";
if (shuffle) {
fmt::print("Will shuffle keys each iteration\n");
}
for (auto rep = 0; rep < iters; rep++) {
if (verb) {
fmt::print("Iteration {:d}\n", rep);
}
auto* itc = new test_iterator_checker(tv, *t);
if (shuffle) {
std::shuffle(keys.begin(), keys.end(), g);
}
for (int i = 0; i < count; i++) {
test_key k(keys[i]);
if (verb) {
fmt::print("+++ {}\n", (int)k);
}
if (rep % 2 != 1) {
auto ir = t->emplace(std::move(copy_key(k)), k);
assert(ir.second);
} else {
auto ir = t->lower_bound(k);
ir.emplace_before(std::move(copy_key(k)), test_key_compare{}, k);
}
oracle[keys[i]] = keys[i] + 10;
if (verb) {
fmt::print("Validating\n");
tv.print_tree(*t, '|');
}
/* Limit validation rate for many keys */
if (i % (i/1000 + 1) == 0) {
tv.validate(*t);
}
if (i % 7 == 0) {
if (!itc->step()) {
delete itc;
itc = new test_iterator_checker(tv, *t);
}
}
seastar::thread::maybe_yield();
}
auto sz = t->size_slow();
if (sz != (size_t)count) {
fmt::print("Size {} != count {}\n", sz, count);
throw "size";
}
auto ti = t->begin();
for (auto oe : oracle) {
if (*ti != oe.second) {
fmt::print("Data mismatch {} vs {}\n", oe.second, *ti);
throw "oracle";
}
ti++;
}
if (shuffle) {
std::shuffle(keys.begin(), keys.end(), g);
}
for (int i = 0; i < count; i++) {
test_key k(keys[i]);
/*
* kill iterator if we're removing what it points to,
* otherwise it's not invalidated
*/
if (itc->here(k)) {
delete itc;
itc = nullptr;
}
if (verb) {
fmt::print("--- {}\n", (int)k);
}
if (rep % 3 != 2) {
t->erase(k);
} else {
auto ri = t->find(k);
auto ni = ri;
ni++;
auto eni = ri.erase(test_key_compare{});
assert(ni == eni);
}
oracle.erase(keys[i]);
if (verb) {
fmt::print("Validating\n");
tv.print_tree(*t, '|');
}
if ((count-i) % ((count-i)/1000 + 1) == 0) {
tv.validate(*t);
}
if (itc == nullptr) {
itc = new test_iterator_checker(tv, *t);
}
if (i % 5 == 0) {
if (!itc->step()) {
delete itc;
itc = new test_iterator_checker(tv, *t);
}
}
seastar::thread::maybe_yield();
}
delete itc;
}
});
});
}

View File

@@ -0,0 +1,318 @@
/*
* Copyright (C) 2020 ScyllaDB
*/
/*
* This file is part of Scylla.
*
* Scylla is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Scylla is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
namespace bplus {
template <typename K, typename T, typename Less, size_t NodeSize>
class validator {
using tree = class tree<K, T, Less, NodeSize, key_search::both, with_debug::yes>;
using node = typename tree::node;
void validate_node(const tree& t, const node& n, int& prev, int& min, bool is_root);
void validate_list(const tree& t);
public:
void print_tree(const tree& t, char pfx) const {
fmt::print("/ {} <- | {} | -> {}\n", t._left->id(), t._root->id(), t._right->id());
print_node(*t._root, pfx, 2);
fmt::print("\\\n");
}
void print_node(const node& n, char pfx, int indent) const {
int i;
fmt::print("{:<{}c}{:s} {:d} ({:d} keys, {:x} flags):", pfx, indent,
n.is_leaf() ? "leaf" : "node", n.id(), n._num_keys, n._flags);
if (n.is_leaf()) {
for (i = 0; i < n._num_keys; i++) {
fmt::print(" {}", (int)n._keys[i].v);
}
fmt::print("\n");
return;
}
fmt::print("\n");
if (n._kids[0].n != nullptr) {
print_node(*n._kids[0].n, pfx, indent + 2);
}
for (i = 0; i < n._num_keys; i++) {
fmt::print("{:<{}c}---{}---\n", pfx, indent, (int)n._keys[i].v);
print_node(*n._kids[i + 1].n, pfx, indent + 2);
}
}
void validate(const tree& t);
};
template <typename K, typename T, typename L, size_t NS>
void validator<K, T, L, NS>::validate_node(const tree& t, const node& n, int& prev_key, int& min_key, bool is_root) {
int i;
if (n.is_root() != is_root) {
fmt::print("node {} needs to {} root, but {}\n", n.id(), is_root ? "be" : "be not", n._flags);
throw "root broken";
}
for (i = 0; i < n._num_keys; i++) {
if (!n._keys[i].v.is_alive()) {
fmt::print("node {} key {} is not alive\n", n.id(), i);
throw "key dead";
}
}
if (n.is_leaf()) {
for (i = 0; i < n._num_keys; i++) {
if (t._less(n._keys[i].v, K(prev_key))) {
fmt::print("node misordered @{} (prev {})\n", (int)n._keys[i].v, prev_key);
throw "misorder";
}
if (n._kids[i + 1].d->_leaf != &n) {
fmt::print("data mispoint\n");
throw "data backlink";
}
prev_key = n._keys[i].v;
if (!n._kids[i + 1].d->value.match_key(n._keys[i].v)) {
fmt::print("node value corrupted @{:d}.{:d}\n", n.id(), i);
throw "data corruption";
}
}
if (n._num_keys > 0) {
min_key = (int)n._keys[0].v;
}
} else if (n._num_keys > 0) {
node* k = n._kids[0].n;
if (k->_parent != &n) {
fmt::print("node {:d} -parent-> {:d}, expect {:d}\n", k->id(), k->_parent->id(), n.id());
throw "mis-parented node";
}
validate_node(t, *k, prev_key, min_key, false);
for (i = 0; i < n._num_keys; i++) {
k = n._kids[i + 1].n;
if (k->_parent != &n) {
fmt::print("node {:d} -parent-> {:d}, expect {:d}\n",
k->id(), k->_parent ? k->_parent->id() : -1, n.id());
throw "mis-parented node";
}
if (t._less(k->_keys[0].v, n._keys[i].v)) {
fmt::print("node {:d}.{:d}, separation key {}, kid has {}\n", n.id(), k->id(),
(int)n._keys[i].v, (int)k->_keys[0].v);
throw "separation key mismatch";
}
int min = 0;
validate_node(t, *k, prev_key, min, false);
if (t._less(n._keys[i].v, K(min)) || t._less(K(min), n._keys[i].v)) {
fmt::print("node {:d}.[{:d}]{:d}, separation key {}, min {}\n",
n.id(), i, k->id(), (int)n._keys[i].v, min);
if (strict_separation_key || t._less(K(min), n._keys[i].v)) {
throw "separation key screw";
}
}
}
}
}
template <typename K, typename T, typename L, size_t NS>
void validator<K, T, L, NS>::validate_list(const tree& t) {
int prev = 0;
node* lh = t.left_leaf_slow();
node* rh = t.right_leaf_slow();
if (lh != t._left) {
fmt::print("left {:d}, slow {:d}\n", t._left->id(), lh->id());
throw "list broken";
}
if (!(lh->_flags & node::NODE_LEFTMOST)) {
fmt::print("left {:d} is not marked as such {}\n", t._left->id(), t._left->_flags);;
throw "list broken";
}
if (rh != t._right) {
fmt::print("right {:d}, slow {:d}\n", t._right->id(), rh->id());
throw "list broken";
}
if (!(rh->_flags & node::NODE_RIGHTMOST)) {
fmt::print("right {:d} is not marked as such {}\n", t._right->id(), t._right->_flags);;
throw "list broken";
}
node* r = lh;
while (1) {
node *ln;
if (!r->is_rightmost()) {
ln = r->get_next();
if (ln->get_prev() != r) {
fmt::print("next leaf {:d} points to {:d}, expect {:d}\n", ln->id(), ln->get_prev()->id(), r->id());
throw "list broken";
}
} else if (r->_rightmost_tree != &t) {
fmt::print("right leaf doesn't point to tree\n");
throw "list broken";
}
if (!r->is_leftmost()) {
ln = r->get_prev();
if (ln->get_next() != r) {
fmt::print("prev leaf {:d} points to {:d}, expect {:d}\n", ln->id(), ln->get_next()->id(), r->id());
throw "list broken";
}
} else if (r->_kids[0]._leftmost_tree != &t) {
fmt::print("left leaf doesn't point to tree\n");
throw "list broken";
}
if (r->_num_keys > 0 && t._less(r->_keys[0].v, K(prev))) {
fmt::print("list misorder on element {:d}, keys {}..., prev {:d}\n", r->id(), (int)r->_keys[0].v, prev);
throw "list broken";
}
if (!r->is_root() && r->_parent != nullptr) {
const auto p = r->_parent;
int i = p->index_for(r->_keys[0].v, t._less);
if (i > 0) {
if (p->_kids[i - 1].n != r->get_prev()) {
fmt::print("list misorder on parent check: node {:d}.{:d}, parent prev {:d}, list prev {:d}\n",
p->id(), r->id(), p->_kids[i - 1].n->id(), r->get_prev()->id());
throw "list broken";
}
}
if (i < p->_num_keys - 1) {
if (p->_kids[i + 1].n != r->get_next()) {
fmt::print("list misorder on parent check: node {:d}.{:d}, parent next {:d}, list next {:d}\n",
p->id(), r->id(), p->_kids[i + 1].n->id(), r->get_next()->id());
throw "list broken";
}
}
}
if (r->_num_keys > 0) {
prev = (int)r->_keys[r->_num_keys - 1].v;
}
if (r != t._left && r != t._right && (r->_flags & (node::NODE_LEFTMOST | node::NODE_RIGHTMOST))) {
fmt::print("middle {:d} is marked as left/right {}\n", r->id(), r->_flags);;
throw "list broken";
}
if (r->is_rightmost()) {
break;
}
r = r->get_next();
}
}
template <typename K, typename T, typename L, size_t NS>
void validator<K, T, L, NS>::validate(const tree& t) {
try {
validate_list(t);
int min = 0, prev = 0;
if (t._root->_root_tree != &t) {
fmt::print("root doesn't point to tree\n");
throw "root broken";
}
validate_node(t, *t._root, prev, min, true);
} catch (...) {
print_tree(t, '|');
fmt::print("[ ");
node* lh = t._left;
while (1) {
fmt::print(" {:d}", lh->id());
if (lh->is_rightmost()) {
break;
}
lh = lh->get_next();
}
fmt::print("]\n");
throw;
}
}
template <typename K, typename T, typename Less, size_t NodeSize>
class iterator_checker {
using tree = class tree<K, T, Less, NodeSize, key_search::both, with_debug::yes>;
validator<K, T, Less, NodeSize>& _tv;
tree& _t;
typename tree::iterator _fwd, _fend;
T _fprev;
public:
iterator_checker(validator<K, T, Less, NodeSize>& tv, tree& t) : _tv(tv), _t(t),
_fwd(t.begin()), _fend(t.end()) {
}
bool step() {
try {
return forward_check();
} catch(...) {
_tv.print_tree(_t, ':');
throw;
}
}
bool here(const K& k) {
return _fwd != _fend && _fwd->match_key(k);
}
private:
bool forward_check() {
if (_fwd == _fend) {
return false;
}
_fwd++;
if (_fwd == _fend) {
return false;
}
T val = *_fwd;
_fwd++;
if (_fwd == _fend) {
return false;
}
_fwd--;
if (val != *_fwd) {
fmt::print("Iterator broken, {:d} != {:d}\n", val, *_fwd);
throw "iterator";
}
if (val < _fprev) {
fmt::print("Iterator broken, {:d} < {:d}\n", val, _fprev);
throw "iterator";
}
_fprev = val;
return true;
}
};
} // namespace

101
test/unit/tree_test_key.hh Normal file
View File

@@ -0,0 +1,101 @@
/*
* Copyright (C) 2020 ScyllaDB
*/
/*
* This file is part of Scylla.
*
* Scylla is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Scylla is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
/*
* Helper class that helps to check that tree
* - works with keys without default contstuctor
* - moves the keys around properly
*/
class tree_test_key_base {
int _val;
int* _cookie;
int* _p_cookie;
public:
bool is_alive() const {
if (_val == -1) {
fmt::print("key value is reset\n");
return false;
}
if (_cookie == nullptr) {
fmt::print("key cookie is reset\n");
return false;
}
if (*_cookie != 0) {
fmt::print("key cookie value is corrupted {}\n", *_cookie);
return false;
}
return true;
}
bool less(const tree_test_key_base& o) const noexcept {
return _val < o._val;
}
explicit tree_test_key_base(int nr, int cookie = 0) : _val(nr) {
_cookie = new int(cookie);
_p_cookie = new int(1);
}
operator int() const noexcept { return _val; }
tree_test_key_base& operator=(const tree_test_key_base& other) = delete;
tree_test_key_base& operator=(tree_test_key_base&& other) = delete;
private:
/*
* Keep this private to make bptree.hh explicitly call the
* copy_key in the places where the key is copied
*/
tree_test_key_base(const tree_test_key_base& other) : _val(other._val) {
_cookie = new int(*other._cookie);
_p_cookie = new int(*other._p_cookie);
}
friend tree_test_key_base copy_key(const tree_test_key_base&);
public:
tree_test_key_base(tree_test_key_base&& other) noexcept : _val(other._val) {
other._val = -1;
_cookie = other._cookie;
other._cookie = nullptr;
_p_cookie = new int(*other._p_cookie);
}
~tree_test_key_base() {
if (_cookie != nullptr) {
delete _cookie;
}
assert(_p_cookie != nullptr);
delete _p_cookie;
}
};
tree_test_key_base copy_key(const tree_test_key_base& other) { return tree_test_key_base(other); }
struct test_key_compare {
bool operator()(const tree_test_key_base& a, const tree_test_key_base& b) const noexcept { return a.less(b); }
};

1941
utils/bptree.hh Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,53 @@
/*
* Copyright (C) 2020 ScyllaDB
*/
/*
* This file is part of Scylla.
*
* Scylla is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Scylla is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include <type_traits>
#include <seastar/util/concepts.hh>
SEASTAR_CONCEPT(
template <typename Func, typename T>
concept Disposer = requires (Func f, T* val) {
{ f(val) } noexcept -> std::same_as<void>;
};
)
SEASTAR_CONCEPT(
template <typename Key1, typename Key2, typename Less>
concept LessComparable = requires (const Key1& a, const Key2& b, Less less) {
{ less(a, b) } -> std::same_as<bool>;
{ less(b, a) } -> std::same_as<bool>;
};
template <typename Key1, typename Key2, typename Less>
concept LessNothrowComparable = LessComparable<Key1, Key2, Less> && std::is_nothrow_invocable_v<Less, Key1, Key2>;
)
SEASTAR_CONCEPT(
template <typename T1, typename T2, typename Compare>
concept Comparable = requires (const T1& a, const T2& b, Compare cmp) {
// The Comparable is trichotomic comparator that should return
// negative value when a < b
// zero when a == b
// positive value when a > b
{ cmp(a, b) } -> std::same_as<int>;
};
)

412
utils/double-decker.hh Normal file
View File

@@ -0,0 +1,412 @@
/*
* Copyright (C) 2020 ScyllaDB
*/
/*
* This file is part of Scylla.
*
* Scylla is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Scylla is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include <type_traits>
#include <seastar/util/concepts.hh>
#include "utils/bptree.hh"
#include "utils/intrusive-array.hh"
#include "utils/collection-concepts.hh"
#include <fmt/core.h>
/*
* The double-decker is the ordered keeper of key:value pairs having
* the pairs sorted by both key and value (key first).
*
* The keys collisions are expected to be rare enough to afford holding
* the values in a sorted array with the help of linear algorithms.
*/
template <typename Key, typename T, typename Less, typename Compare, int NodeSize,
bplus::key_search Search = bplus::key_search::binary, bplus::with_debug Debug = bplus::with_debug::no>
SEASTAR_CONCEPT( requires Comparable<T, T, Compare> && std::is_nothrow_move_constructible_v<T> )
class double_decker {
public:
using inner_array = intrusive_array<T>;
using outer_tree = bplus::tree<Key, inner_array, Less, NodeSize, Search, Debug>;
using outer_iterator = typename outer_tree::iterator;
using outer_const_iterator = typename outer_tree::const_iterator;
private:
outer_tree _tree;
public:
template <bool Const>
class iterator_base {
friend class double_decker;
using outer_iterator = std::conditional_t<Const, typename double_decker::outer_const_iterator, typename double_decker::outer_iterator>;
protected:
outer_iterator _bucket;
int _idx;
public:
iterator_base() = default;
iterator_base(outer_iterator bkt, int idx) noexcept : _bucket(bkt), _idx(idx) {}
using iterator_category = std::bidirectional_iterator_tag;
using difference_type = ssize_t;
using value_type = std::conditional_t<Const, const T, T>;
using pointer = value_type*;
using reference = value_type&;
reference operator*() const noexcept { return (*_bucket)[_idx]; }
pointer operator->() const noexcept { return &((*_bucket)[_idx]); }
iterator_base& operator++() noexcept {
if ((*_bucket)[_idx++].is_tail()) {
_bucket++;
_idx = 0;
}
return *this;
}
iterator_base operator++(int) noexcept {
iterator_base cur = *this;
operator++();
return cur;
}
iterator_base& operator--() noexcept {
if (_idx-- == 0) {
_bucket--;
_idx = _bucket->index_of(_bucket->end()) - 1;
}
return *this;
}
iterator_base operator--(int) noexcept {
iterator_base cur = *this;
operator--();
return cur;
}
bool operator==(const iterator_base& o) const noexcept { return _bucket == o._bucket && _idx == o._idx; }
bool operator!=(const iterator_base& o) const noexcept { return !(*this == o); }
};
using const_iterator = iterator_base<true>;
class iterator final : public iterator_base<false> {
friend class double_decker;
using super = iterator_base<false>;
iterator(const const_iterator&& other) noexcept : super(std::move(other._bucket), other._idx) {}
public:
iterator() noexcept : super() {}
iterator(outer_iterator bkt, int idx) noexcept : super(bkt, idx) {}
iterator(T* ptr) noexcept {
inner_array& arr = inner_array::from_element(ptr, super::_idx);
super::_bucket = outer_iterator(&arr);
}
template <typename Func>
SEASTAR_CONCEPT(requires Disposer<Func, T>)
iterator erase_and_dispose(Less less, Func&& disp) noexcept {
disp(&**this); // * to deref this, * to call operator*, & to get addr from ref
if (super::_bucket->is_single_element()) {
outer_iterator bkt = super::_bucket.erase(less);
return iterator(bkt, 0);
}
bool tail = (*super::_bucket)[super::_idx].is_tail();
super::_bucket->erase(super::_idx);
if (tail) {
super::_bucket++;
super::_idx = 0;
}
return *this;
}
iterator erase(Less less) noexcept { return erase_and_dispose(less, bplus::default_dispose<T>); }
};
/*
* Structure that shed some more light on how the lower_bound
* actually found the bounding elements.
*/
struct bound_hint {
/*
* Set to true if the element fully matched to the key
* according to Compare
*/
bool match;
/*
* Set to true if the bucket for the given key exists
*/
bool key_match;
/*
* Set to true if the given key is more than anything
* on the bucket and iterator was switched to the next
* one (or when the key_match is false)
*/
bool key_tail;
/*
* This helper says whether the emplace will invalidate (some)
* iterators or not. Emplacing with !key_match will go and create
* new node in B+ which doesn't invalidate iterators. In another
* case some existing B+ data node will be reconstructed, so the
* iterators on those nodes will become invalid.
*/
bool emplace_keeps_iterators() const noexcept { return !key_match; }
};
iterator begin() noexcept { return iterator(_tree.begin(), 0); }
const_iterator begin() const noexcept { return const_iterator(_tree.begin(), 0); }
const_iterator cbegin() const noexcept { return const_iterator(_tree.begin(), 0); }
iterator end() noexcept { return iterator(_tree.end(), 0); }
const_iterator end() const noexcept { return const_iterator(_tree.end(), 0); }
const_iterator cend() const noexcept { return const_iterator(_tree.end(), 0); }
explicit double_decker(Less less) noexcept : _tree(less) { }
double_decker(const double_decker& other) = delete;
double_decker(double_decker&& other) noexcept : _tree(std::move(other._tree)) {}
iterator insert(Key k, T value, Compare cmp) {
std::pair<outer_iterator, bool> oip = _tree.emplace(std::move(k), std::move(value));
outer_iterator& bkt = oip.first;
int idx = 0;
if (!oip.second) {
/*
* Unlikely, but in this case we reconstruct the array. The value
* must not have been moved by emplace() above.
*/
idx = bkt->index_of(bkt->lower_bound(value, cmp));
size_t new_size = (bkt->size() + 1) * sizeof(T);
bkt.reconstruct(new_size, *bkt,
typename inner_array::grow_tag{idx}, std::move(value));
}
return iterator(bkt, idx);
}
template <typename... Args>
iterator emplace_before(iterator i, Key k, const bound_hint& hint, Args&&... args) {
assert(!hint.match);
outer_iterator& bucket = i._bucket;
if (!hint.key_match) {
/*
* The most expected case -- no key conflict, respectively the
* bucket is not found, and i points to the next one. Just go
* ahead and emplace the new bucket before the i and push the
* 0th element into it.
*/
outer_iterator nb = bucket.emplace_before(std::move(k), _tree.less(), std::forward<Args>(args)...);
return iterator(nb, 0);
}
/*
* Key conflict, need to expand some inner vector, but still there
* are two cases -- whether the bounding element is on k's bucket
* or the bound search overflew and switched to the next one.
*/
int idx = i._idx;
if (hint.key_tail) {
/*
* The latter case -- i points to the next one. Need to shift
* back and append the new element to its tail.
*/
bucket--;
idx = bucket->index_of(bucket->end());
}
size_t new_size = (bucket->size() + 1) * sizeof(T);
bucket.reconstruct(new_size, *bucket,
typename inner_array::grow_tag{idx}, std::forward<Args>(args)...);
return iterator(bucket, idx);
}
template <typename K = Key>
SEASTAR_CONCEPT( requires Comparable<K, T, Compare> )
const_iterator find(const K& key, Compare cmp) const {
outer_const_iterator bkt = _tree.find(key);
int idx = 0;
if (bkt != _tree.end()) {
bool match = false;
idx = bkt->index_of(bkt->lower_bound(key, cmp, match));
if (!match) {
bkt = _tree.end();
idx = 0;
}
}
return const_iterator(bkt, idx);
}
template <typename K = Key>
SEASTAR_CONCEPT( requires Comparable<K, T, Compare> )
iterator find(const K& k, Compare cmp) {
return iterator(const_cast<const double_decker*>(this)->find(k, std::move(cmp)));
}
template <typename K = Key>
SEASTAR_CONCEPT( requires Comparable<K, T, Compare> )
const_iterator lower_bound(const K& key, Compare cmp, bound_hint& hint) const {
outer_const_iterator bkt = _tree.lower_bound(key, hint.key_match);
hint.key_tail = false;
hint.match = false;
if (bkt == _tree.end() || !hint.key_match) {
return const_iterator(bkt, 0);
}
int i = bkt->index_of(bkt->lower_bound(key, cmp, hint.match));
if (i != 0 && (*bkt)[i - 1].is_tail()) {
/*
* The lower_bound is after the last element -- shift
* to the net bucket's 0'th one.
*/
bkt++;
i = 0;
hint.key_tail = true;
}
return const_iterator(bkt, i);
}
template <typename K = Key>
SEASTAR_CONCEPT( requires Comparable<K, T, Compare> )
iterator lower_bound(const K& key, Compare cmp, bound_hint& hint) {
return iterator(const_cast<const double_decker*>(this)->lower_bound(key, std::move(cmp), hint));
}
template <typename K = Key>
SEASTAR_CONCEPT( requires Comparable<K, T, Compare> )
const_iterator lower_bound(const K& key, Compare cmp) const {
bound_hint hint;
return lower_bound(key, cmp, hint);
}
template <typename K = Key>
SEASTAR_CONCEPT( requires Comparable<K, T, Compare> )
iterator lower_bound(const K& key, Compare cmp) {
return iterator(const_cast<const double_decker*>(this)->lower_bound(key, std::move(cmp)));
}
template <typename K = Key>
SEASTAR_CONCEPT( requires Comparable<K, T, Compare> )
const_iterator upper_bound(const K& key, Compare cmp) const {
bool key_match;
outer_const_iterator bkt = _tree.lower_bound(key, key_match);
if (bkt == _tree.end() || !key_match) {
return const_iterator(bkt, 0);
}
int i = bkt->index_of(bkt->upper_bound(key, cmp));
if (i != 0 && (*bkt)[i - 1].is_tail()) {
// Beyond the end() boundary
bkt++;
i = 0;
}
return const_iterator(bkt, i);
}
template <typename K = Key>
SEASTAR_CONCEPT( requires Comparable<K, T, Compare> )
iterator upper_bound(const K& key, Compare cmp) {
return iterator(const_cast<const double_decker*>(this)->upper_bound(key, std::move(cmp)));
}
template <typename Func>
SEASTAR_CONCEPT(requires Disposer<Func, T>)
void clear_and_dispose(Func&& disp) noexcept {
_tree.clear_and_dispose([&disp] (inner_array* arr) noexcept {
arr->for_each(disp);
});
}
void clear() noexcept { clear_and_dispose(bplus::default_dispose<T>); }
template <typename Func>
SEASTAR_CONCEPT(requires Disposer<Func, T>)
iterator erase_and_dispose(iterator begin, iterator end, Func&& disp) noexcept {
bool same_bucket = begin._bucket == end._bucket;
// Drop the tail of the starting bucket if it's not fully erased
while (begin._idx != 0) {
if (same_bucket) {
if (begin == end) {
return begin;
}
end._idx--;
}
begin = begin.erase_and_dispose(_tree.less(), disp);
}
// Drop all the buckets in between
outer_iterator nb = _tree.erase_and_dispose(begin._bucket, end._bucket, [&disp] (inner_array* arr) noexcept {
arr->for_each(disp);
});
assert(nb == end._bucket);
/*
* Drop the head of the ending bucket. Every erased element is the 0th
* one, when erased it will shift the rest left and reconstruct the array,
* thus we cannot rely on the end to keep neither _bucket not _idx.
*
* Said that -- just erase the required number of elements. A corner case
* when end points to the tree end is handled, _idx is 0 in this case.
*/
iterator next(nb, 0);
while (end._idx-- != 0) {
next = next.erase_and_dispose(_tree.less(), disp);
}
return next;
}
iterator erase(iterator begin, iterator end) noexcept {
return erase_and_dispose(begin, end, bplus::default_dispose<T>);
}
bool empty() const noexcept { return _tree.empty(); }
static size_t estimated_object_memory_size_in_allocator(allocation_strategy& allocator, const T* obj) noexcept {
/*
* The T-s are merged together in array, so getting any run-time
* value of a pointer would be wrong. So here's some guessing of
* how much memory would this thing occupy in memory
*/
return sizeof(typename outer_tree::data);
}
};

354
utils/intrusive-array.hh Normal file
View File

@@ -0,0 +1,354 @@
/*
* Copyright (C) 2020 ScyllaDB
*/
/*
* This file is part of Scylla.
*
* Scylla is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Scylla is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include <array>
#include <cassert>
#include <seastar/util/concepts.hh>
#include "utils/allocation_strategy.hh"
#include "utils/collection-concepts.hh"
SEASTAR_CONCEPT(
template <typename T>
concept BoundsKeeper = requires (T val, bool bit) {
{ val.is_head() } noexcept -> std::same_as<bool>;
{ val.set_head(bit) } noexcept -> std::same_as<void>;
{ val.is_tail() } noexcept -> std::same_as<bool>;
{ val.set_tail(bit) } noexcept -> std::same_as<void>;
{ val.with_train() } noexcept -> std::same_as<bool>;
{ val.set_train(bit) } noexcept -> std::same_as<void>;
};
)
/*
* A plain array of T-s that grows and shrinks by constructing a new
* instances. Holds at least one element. Has facilities for sorting
* the elements and for doing "container_of" by the given element
* pointer. LSA-compactible.
*
* Important feature of the array is zero memory overhead -- it doesn't
* keep its size/capacity onboard. The size is calculated each time by
* walking the array of T-s and checking which one of them is the tail
* element. Respectively, the T must keep head/tail flags on itself.
*/
template <typename T>
SEASTAR_CONCEPT( requires BoundsKeeper<T> && std::is_nothrow_move_constructible_v<T> )
class intrusive_array {
// Sanity constant to avoid infinite loops searching for tail
static constexpr int max_len = std::numeric_limits<short int>::max();
union maybe_constructed {
maybe_constructed() { }
~maybe_constructed() { }
T object;
/*
* Train is 1 or more allocated but unoccupied memory slots after
* the tail one. Being unused, this memory keeps the train length.
* An array with the train is marked with the respective flag on
* the 0th element. Train is created by the erase() call and can
* be up to 65535 elements long
*
* Train length is included into the storage_size() to make
* allocator and compaction work correctly, but is not included
* into the number_of_elements(), so the array behaves just like
* there's no train
*
* Respectively both grow and shrink constructors do not carry
* the train (and drop the bit from 0th element) and don't expect
* the memory for the new array to include one
*/
unsigned short train_len;
static_assert(sizeof(T) >= sizeof(unsigned short));
};
maybe_constructed _data[1];
size_t number_of_elements() const noexcept {
for (int i = 0; i < max_len; i++) {
if (_data[i].object.is_tail()) {
return i + 1;
}
}
std::abort();
}
size_t storage_size() const noexcept {
size_t nr = number_of_elements();
if (_data[0].object.with_train()) {
nr += _data[nr].train_len;
}
return nr * sizeof(T);
}
public:
using iterator = T*;
using const_iterator = const T*;
/*
* There are 3 constructing options for the array: initial, grow
* and shrink.
*
* * initial just creates a 1-element array
* * grow -- makes a new one moving all elements from the original
* array and inserting the one (only one) more element at the given
* position
* * shrink -- also makes a new array skipping the not needed
* element while moving them from the original one
*
* In all cases the enough big memory chunk must be provided by the
* caller!
*
* Note, that none of them calls destructors on T-s, unlike vector.
* This is because when the older array is destroyed it has no idea
* about whether or not it was grown/shrunk and thus it destroys
* T-s itself.
*/
// Initial
template <typename... Args>
intrusive_array(Args&&... args) {
new (&_data[0].object) T(std::forward<Args>(args)...);
_data[0].object.set_head(true);
_data[0].object.set_tail(true);
}
// Growing
struct grow_tag {
int add_pos;
};
template <typename... Args>
intrusive_array(intrusive_array& from, grow_tag grow, Args&&... args) {
// The add_pos is strongly _expected_ to be within bounds
int i, off = 0;
bool tail = false;
for (i = 0; !tail; i++) {
if (i == grow.add_pos) {
off = 1;
continue;
}
tail = from._data[i - off].object.is_tail();
new (&_data[i].object) T(std::move(from._data[i - off].object));
}
assert(grow.add_pos <= i && i < max_len);
new (&_data[grow.add_pos].object) T(std::forward<Args>(args)...);
_data[0].object.set_head(true);
_data[0].object.set_train(false);
if (grow.add_pos == 0) {
_data[1].object.set_head(false);
}
_data[i - off].object.set_tail(true);
if (off == 0) {
_data[i - 1].object.set_tail(false);
}
}
// Shrinking
struct shrink_tag {
int del_pos;
};
intrusive_array(intrusive_array& from, shrink_tag shrink) {
int i, off = 0;
bool tail = false;
for (i = 0; !tail; i++) {
tail = from._data[i].object.is_tail();
if (i == shrink.del_pos) {
off = 1;
} else {
new (&_data[i - off].object) T(std::move(from._data[i].object));
}
}
_data[0].object.set_head(true);
_data[0].object.set_train(false);
_data[i - off - 1].object.set_tail(true);
}
intrusive_array(const intrusive_array& other) = delete;
intrusive_array(intrusive_array&& other) noexcept {
bool tail = false;
int i;
for (i = 0; !tail; i++) {
tail = other._data[i].object.is_tail();
new (&_data[i].object) T(std::move(other._data[i].object));
}
if (_data[0].object.with_train()) {
_data[i].train_len = other._data[i].train_len;
}
}
~intrusive_array() {
bool tail = false;
for (int i = 0; !tail; i++) {
tail = _data[i].object.is_tail();
_data[i].object.~T();
}
}
/*
* Drops the element in-place at position @pos and grows the
* "train". To be used in places where reconstruction is not
* welcome (e.g. because it throws)
*
* Single-elemented array cannot be erased from, just drop it
* alltogether if needed
*/
void erase(int pos) noexcept {
assert(!is_single_element());
assert(pos < max_len);
bool with_train = _data[0].object.with_train();
bool tail = _data[pos].object.is_tail();
_data[pos].object.~T();
if (tail) {
assert(pos > 0);
_data[pos - 1].object.set_tail(true);
} else {
while (!tail) {
new (&_data[pos].object) T(std::move(_data[pos + 1].object));
_data[pos + 1].object.~T();
tail = _data[pos++].object.is_tail();
}
_data[0].object.set_head(true);
}
_data[0].object.set_train(true);
unsigned short train_len = with_train ? _data[pos + 1].train_len : 0;
assert(train_len < max_len);
_data[pos].train_len = train_len + 1;
}
T& operator[](int pos) noexcept { return _data[pos].object; }
const T& operator[](int pos) const noexcept { return _data[pos].object; }
iterator begin() noexcept { return &_data[0].object; }
const_iterator begin() const noexcept { return &_data[0].object; }
const_iterator cbegin() const noexcept { return &_data[0].object; }
iterator end() noexcept { return &_data[number_of_elements()].object; }
const_iterator end() const noexcept { return &_data[number_of_elements()].object; }
const_iterator cend() const noexcept { return &_data[number_of_elements()].object; }
size_t index_of(iterator i) const noexcept { return i - &_data[0].object; }
size_t index_of(const_iterator i) const noexcept { return i - &_data[0].object; }
bool is_single_element() const noexcept { return _data[0].object.is_tail(); }
// A helper for keeping the array sorted
template <typename K, typename Compare>
SEASTAR_CONCEPT( requires Comparable<K, T, Compare> )
const_iterator lower_bound(const K& val, Compare cmp, bool& match) const {
int i = 0;
do {
int x = cmp(_data[i].object, val);
if (x >= 0) {
match = (x == 0);
break;
}
} while (!_data[i++].object.is_tail());
return &_data[i].object;
}
template <typename K, typename Compare>
SEASTAR_CONCEPT( requires Comparable<K, T, Compare> )
iterator lower_bound(const K& val, Compare cmp, bool& match) {
return const_cast<iterator>(const_cast<const intrusive_array*>(this)->lower_bound(val, std::move(cmp), match));
}
template <typename K, typename Compare>
SEASTAR_CONCEPT( requires Comparable<K, T, Compare> )
const_iterator lower_bound(const K& val, Compare cmp) const {
bool match = false;
return lower_bound(val, cmp, match);
}
template <typename K, typename Compare>
SEASTAR_CONCEPT( requires Comparable<K, T, Compare> )
iterator lower_bound(const K& val, Compare cmp) {
return const_cast<iterator>(const_cast<const intrusive_array*>(this)->lower_bound(val, std::move(cmp)));
}
// And its peer ... just to be used
template <typename K, typename Compare>
SEASTAR_CONCEPT( requires Comparable<K, T, Compare> )
const_iterator upper_bound(const K& val, Compare cmp) const {
int i = 0;
do {
if (cmp(_data[i].object, val) > 0) {
break;
}
} while (!_data[i++].object.is_tail());
return &_data[i].object;
}
template <typename K, typename Compare>
SEASTAR_CONCEPT( requires Comparable<K, T, Compare> )
iterator upper_bound(const K& val, Compare cmp) {
return const_cast<iterator>(const_cast<const intrusive_array*>(this)->upper_bound(val, std::move(cmp)));
}
template <typename Func>
SEASTAR_CONCEPT(requires Disposer<Func, T>)
void for_each(Func&& fn) noexcept {
bool tail = false;
for (int i = 0; !tail; i++) {
tail = _data[i].object.is_tail();
fn(&_data[i].object);
}
}
size_t size() const noexcept { return number_of_elements(); }
friend size_t size_for_allocation_strategy(const intrusive_array& obj) noexcept {
return obj.storage_size();
}
static intrusive_array& from_element(T* ptr, int& idx) noexcept {
idx = 0;
while (!ptr->is_head()) {
assert(idx < max_len); // may the force be with us...
idx++;
ptr--;
}
static_assert(offsetof(intrusive_array, _data[0].object) == 0);
return *reinterpret_cast<intrusive_array*>(ptr);
}
};

53
utils/neat-object-id.hh Normal file
View File

@@ -0,0 +1,53 @@
/*
* Copyright (C) 2020 ScyllaDB
*/
/*
* This file is part of Scylla.
*
* Scylla is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Scylla is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include <atomic>
namespace utils {
/*
* The neat_id class is purely a debugging thing -- when reading
* the logs with object IDs in it it's more handy to look at those
* consisting * of 1-3 digits, rather than 16 hex-digits of a printed
* pointer.
*
* Embed with [[no_unique_address]] tag for memory efficiency
*/
template <bool Debug>
struct neat_id {
unsigned int operator()() const noexcept { return reinterpret_cast<uintptr_t>(this); }
};
template <>
struct neat_id<true> {
unsigned int _id;
static unsigned int _next() noexcept {
static std::atomic<unsigned int> rover {1};
return rover.fetch_add(1);
}
neat_id() noexcept : _id(_next()) {}
unsigned int operator()() const noexcept { return _id; }
};
} // namespace