partitioned_sstable_set: fix quadratic space complexity

streaming generates lots of small sstables with large token range,
which triggers O(N^2) in space in interval map.
level 0 sstables will now be stored in a structure that has O(N)
in space complexity and which will be included for every read.

Fixes #2287.

Signed-off-by: Raphael S. Carvalho <raphaelsc@scylladb.com>
Message-Id: <20170417185509.6633-1-raphaelsc@scylladb.com>
(cherry picked from commit 11b74050a1)
This commit is contained in:
Raphael S. Carvalho
2017-04-17 15:55:09 -03:00
committed by Avi Kivity
parent 2f0970e83c
commit bbbb4dffbd
2 changed files with 88 additions and 50 deletions

View File

@@ -191,7 +191,8 @@ class partitioned_sstable_set : public sstable_set_impl {
using map_iterator = interval_map_type::const_iterator;
private:
schema_ptr _schema;
interval_map_type _sstables;
std::vector<shared_sstable> _unleveled_sstables;
interval_map_type _leveled_sstables;
private:
static interval_type make_interval(const schema& s, const query::partition_range& range) {
return interval_type::closed(
@@ -207,16 +208,16 @@ private:
}
std::pair<map_iterator, map_iterator> query(const query::partition_range& range) const {
if (range.start() && range.end()) {
return _sstables.equal_range(make_interval(range));
return _leveled_sstables.equal_range(make_interval(range));
}
else if (range.start() && !range.end()) {
auto start = singular(range.start()->value());
return { _sstables.lower_bound(start), _sstables.end() };
return { _leveled_sstables.lower_bound(start), _leveled_sstables.end() };
} else if (!range.start() && range.end()) {
auto end = singular(range.end()->value());
return { _sstables.begin(), _sstables.upper_bound(end) };
return { _leveled_sstables.begin(), _leveled_sstables.upper_bound(end) };
} else {
return { _sstables.begin(), _sstables.end() };
return { _leveled_sstables.begin(), _leveled_sstables.end() };
}
}
public:
@@ -234,29 +235,39 @@ public:
while (b != e) {
boost::copy(b++->second, std::inserter(result, result.end()));
}
return std::vector<shared_sstable>(result.begin(), result.end());
auto r = _unleveled_sstables;
r.insert(r.end(), result.begin(), result.end());
return r;
}
virtual void insert(shared_sstable sst) override {
auto first = sst->get_first_decorated_key().token();
auto last = sst->get_last_decorated_key().token();
using bound = query::partition_range::bound;
_sstables.add({
make_interval(
query::partition_range(
bound(dht::ring_position::starting_at(first)),
bound(dht::ring_position::ending_at(last)))),
value_set({sst})});
if (sst->get_sstable_level() == 0) {
_unleveled_sstables.push_back(std::move(sst));
} else {
auto first = sst->get_first_decorated_key().token();
auto last = sst->get_last_decorated_key().token();
using bound = query::partition_range::bound;
_leveled_sstables.add({
make_interval(
query::partition_range(
bound(dht::ring_position::starting_at(first)),
bound(dht::ring_position::ending_at(last)))),
value_set({sst})});
}
}
virtual void erase(shared_sstable sst) override {
auto first = sst->get_first_decorated_key().token();
auto last = sst->get_last_decorated_key().token();
using bound = query::partition_range::bound;
_sstables.subtract({
make_interval(
query::partition_range(
bound(dht::ring_position::starting_at(first)),
bound(dht::ring_position::ending_at(last)))),
value_set({sst})});
if (sst->get_sstable_level() == 0) {
_unleveled_sstables.erase(std::remove(_unleveled_sstables.begin(), _unleveled_sstables.end(), sst), _unleveled_sstables.end());
} else {
auto first = sst->get_first_decorated_key().token();
auto last = sst->get_last_decorated_key().token();
using bound = query::partition_range::bound;
_leveled_sstables.subtract({
make_interval(
query::partition_range(
bound(dht::ring_position::starting_at(first)),
bound(dht::ring_position::ending_at(last)))),
value_set({sst})});
}
}
virtual std::unique_ptr<incremental_selector_impl> make_incremental_selector() const override;
class incremental_selector;
@@ -264,6 +275,7 @@ public:
class partitioned_sstable_set::incremental_selector : public incremental_selector_impl {
schema_ptr _schema;
const std::vector<shared_sstable>& _unleveled_sstables;
map_iterator _it;
const map_iterator _end;
private:
@@ -272,32 +284,35 @@ private:
{i.upper().token(), boost::icl::is_right_closed(i.bounds())});
}
public:
incremental_selector(schema_ptr schema, const interval_map_type& sstables)
incremental_selector(schema_ptr schema, const std::vector<shared_sstable>& unleveled_sstables, const interval_map_type& leveled_sstables)
: _schema(std::move(schema))
, _it(sstables.begin())
, _end(sstables.end()) {
, _unleveled_sstables(unleveled_sstables)
, _it(leveled_sstables.begin())
, _end(leveled_sstables.end()) {
}
virtual std::pair<nonwrapping_range<dht::token>, std::vector<shared_sstable>> select(const dht::token& token) override {
auto pr = query::partition_range::make(dht::ring_position::starting_at(token), dht::ring_position::ending_at(token));
auto interval = make_interval(*_schema, std::move(pr));
auto ssts = _unleveled_sstables;
while (_it != _end) {
if (boost::icl::contains(_it->first, interval)) {
return std::make_pair(to_token_range(_it->first), std::vector<shared_sstable>(_it->second.begin(), _it->second.end()));
ssts.insert(ssts.end(), _it->second.begin(), _it->second.end());
return std::make_pair(to_token_range(_it->first), std::move(ssts));
}
// we don't want to skip current interval if token lies before it.
if (boost::icl::lower_less(interval, _it->first)) {
return std::make_pair(nonwrapping_range<dht::token>::make({token, true}, {_it->first.lower().token(), false}),
std::vector<shared_sstable>());
std::move(ssts));
}
_it++;
}
return std::make_pair(nonwrapping_range<dht::token>::make_open_ended_both_sides(), std::vector<shared_sstable>());
return std::make_pair(nonwrapping_range<dht::token>::make_open_ended_both_sides(), std::move(ssts));
}
};
std::unique_ptr<incremental_selector_impl> partitioned_sstable_set::make_incremental_selector() const {
return std::make_unique<incremental_selector>(_schema, _sstables);
return std::make_unique<incremental_selector>(_schema, _unleveled_sstables, _leveled_sstables);
}
class compaction_strategy_impl {

View File

@@ -1702,9 +1702,9 @@ static lw_shared_ptr<sstable> add_sstable_for_overlapping_test(lw_shared_ptr<col
column_family_test(cf).add_sstable(sst);
return sst;
}
static lw_shared_ptr<sstable> sstable_for_overlapping_test(const schema_ptr& schema, int64_t gen, sstring first_key, sstring last_key) {
static lw_shared_ptr<sstable> sstable_for_overlapping_test(const schema_ptr& schema, int64_t gen, sstring first_key, sstring last_key, uint32_t level = 0) {
auto sst = make_lw_shared<sstable>(schema, "", gen, la, big);
sstables::test(sst).set_values(std::move(first_key), std::move(last_key), {});
sstables::test(sst).set_values_for_leveled_strategy(0, level, 0, std::move(first_key), std::move(last_key));
return sst;
}
@@ -3100,29 +3100,52 @@ SEASTAR_TEST_CASE(sstable_set_incremental_selector) {
auto cs = sstables::make_compaction_strategy(sstables::compaction_strategy_type::leveled, s->compaction_strategy_options());
auto key_and_token_pair = token_generation_for_current_shard(8);
sstable_set set = cs.make_sstable_set(s);
set.insert(sstable_for_overlapping_test(s, 1, key_and_token_pair[0].first, key_and_token_pair[1].first));
set.insert(sstable_for_overlapping_test(s, 2, key_and_token_pair[0].first, key_and_token_pair[1].first));
set.insert(sstable_for_overlapping_test(s, 3, key_and_token_pair[3].first, key_and_token_pair[4].first));
set.insert(sstable_for_overlapping_test(s, 4, key_and_token_pair[4].first, key_and_token_pair[4].first));
set.insert(sstable_for_overlapping_test(s, 5, key_and_token_pair[4].first, key_and_token_pair[5].first));
sstable_set::incremental_selector selector = set.make_incremental_selector();
auto check = [&selector] (const dht::token& token, std::unordered_set<int64_t> expected_gens) {
auto check = [] (sstable_set::incremental_selector& selector, const dht::token& token, std::unordered_set<int64_t> expected_gens) {
auto sstables = selector.select(token);
BOOST_REQUIRE(sstables.size() == expected_gens.size());
for (auto& sst : sstables) {
BOOST_REQUIRE(expected_gens.count(sst->generation()) == 1);
}
};
check(key_and_token_pair[0].second, {1, 2});
check(key_and_token_pair[1].second, {1, 2});
check(key_and_token_pair[2].second, {});
check(key_and_token_pair[3].second, {3});
check(key_and_token_pair[4].second, {3, 4, 5});
check(key_and_token_pair[5].second, {5});
check(key_and_token_pair[6].second, {});
check(key_and_token_pair[7].second, {});
{
sstable_set set = cs.make_sstable_set(s);
set.insert(sstable_for_overlapping_test(s, 1, key_and_token_pair[0].first, key_and_token_pair[1].first, 1));
set.insert(sstable_for_overlapping_test(s, 2, key_and_token_pair[0].first, key_and_token_pair[1].first, 1));
set.insert(sstable_for_overlapping_test(s, 3, key_and_token_pair[3].first, key_and_token_pair[4].first, 1));
set.insert(sstable_for_overlapping_test(s, 4, key_and_token_pair[4].first, key_and_token_pair[4].first, 1));
set.insert(sstable_for_overlapping_test(s, 5, key_and_token_pair[4].first, key_and_token_pair[5].first, 1));
sstable_set::incremental_selector sel = set.make_incremental_selector();
check(sel, key_and_token_pair[0].second, {1, 2});
check(sel, key_and_token_pair[1].second, {1, 2});
check(sel, key_and_token_pair[2].second, {});
check(sel, key_and_token_pair[3].second, {3});
check(sel, key_and_token_pair[4].second, {3, 4, 5});
check(sel, key_and_token_pair[5].second, {5});
check(sel, key_and_token_pair[6].second, {});
check(sel, key_and_token_pair[7].second, {});
}
{
sstable_set set = cs.make_sstable_set(s);
set.insert(sstable_for_overlapping_test(s, 0, key_and_token_pair[0].first, key_and_token_pair[1].first, 0));
set.insert(sstable_for_overlapping_test(s, 1, key_and_token_pair[0].first, key_and_token_pair[1].first, 1));
set.insert(sstable_for_overlapping_test(s, 2, key_and_token_pair[0].first, key_and_token_pair[1].first, 1));
set.insert(sstable_for_overlapping_test(s, 3, key_and_token_pair[3].first, key_and_token_pair[4].first, 1));
set.insert(sstable_for_overlapping_test(s, 4, key_and_token_pair[4].first, key_and_token_pair[4].first, 1));
set.insert(sstable_for_overlapping_test(s, 5, key_and_token_pair[4].first, key_and_token_pair[5].first, 1));
sstable_set::incremental_selector sel = set.make_incremental_selector();
check(sel, key_and_token_pair[0].second, {0, 1, 2});
check(sel, key_and_token_pair[1].second, {0, 1, 2});
check(sel, key_and_token_pair[2].second, {0});
check(sel, key_and_token_pair[3].second, {0, 3});
check(sel, key_and_token_pair[4].second, {0, 3, 4, 5});
check(sel, key_and_token_pair[5].second, {0, 5});
check(sel, key_and_token_pair[6].second, {0});
check(sel, key_and_token_pair[7].second, {0});
}
return make_ready_future<>();
}