Files
scylladb/tests/row_cache_stress_test.cc
Paweł Dziepak 96b0577343 row_cache: deglobalise row cache tracker
Row cache tracker has numerous implicit dependencies on ohter objects
(e.g. LSA migrators for data held by mutation_cleaner). The fact that
both cache tracker and some of those dependencies are thread local
objects makes it hard to guarantee correct destruction order.

Let's deglobalise cache tracker and put in in the database class.
2018-06-25 09:37:43 +01:00

368 lines
14 KiB
C++

/*
* Copyright (C) 2017 ScyllaDB
*/
/*
* This file is part of Scylla.
*
* Scylla is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Scylla is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
*/
#include <boost/range/irange.hpp>
#include "seastarx.hh"
#include "tests/simple_schema.hh"
#include "core/app-template.hh"
#include "memtable.hh"
#include "row_cache.hh"
#include "partition_slice_builder.hh"
#include "utils/int_range.hh"
#include "utils/div_ceil.hh"
#include "tests/memtable_snapshot_source.hh"
#include <seastar/core/reactor.hh>
logging::logger test_log("test");
static thread_local bool cancelled = false;
using namespace std::chrono_literals;
namespace row_cache_stress_test {
struct table {
simple_schema s;
std::vector<dht::decorated_key> p_keys;
std::vector<api::timestamp_type> p_writetime; // committed writes
std::vector<clustering_key> c_keys;
uint64_t mutation_phase = 0;
uint64_t mutations = 0;
uint64_t reads_started = 0;
uint64_t scans_started = 0;
lw_shared_ptr<memtable> mt;
lw_shared_ptr<memtable> prev_mt;
memtable_snapshot_source underlying;
cache_tracker tracker;
row_cache cache;
table(unsigned partitions, unsigned rows)
: mt(make_lw_shared<memtable>(s.schema()))
, underlying(s.schema())
, cache(s.schema(), snapshot_source([this] { return underlying(); }), tracker)
{
p_keys = s.make_pkeys(partitions);
p_writetime.resize(p_keys.size());
c_keys = s.make_ckeys(rows);
}
size_t index_of_key(const dht::decorated_key& dk) {
for (auto i : boost::irange<size_t>(0, p_keys.size())) {
if (p_keys[i].equal(*s.schema(), dk)) {
return i;
}
}
throw std::runtime_error(sprint("key not found: %s", dk));
}
sstring value_tag(int key, uint64_t phase) {
return sprint("k_0x%x_p_0x%x", key, phase);
}
mutation get_mutation(int key, api::timestamp_type t, const sstring& tag) {
mutation m(s.schema(), p_keys[key]);
for (auto ck : c_keys) {
s.add_row(m, ck, tag, t);
}
return m;
}
// Must not be called concurrently
void flush() {
test_log.trace("flushing");
prev_mt = std::exchange(mt, make_lw_shared<memtable>(s.schema()));
auto flushed = make_lw_shared<memtable>(s.schema());
flushed->apply(*prev_mt).get();
prev_mt->mark_flushed(flushed->as_data_source());
test_log.trace("updating cache");
cache.update([&] {
underlying.apply(flushed);
}, *prev_mt).get();
test_log.trace("flush done");
prev_mt = {};
}
void mutate_next_phase() {
test_log.trace("mutating, phase={}", mutation_phase);
for (auto i : boost::irange<int>(0, p_keys.size())) {
auto t = s.new_timestamp();
auto tag = value_tag(i, mutation_phase);
auto m = get_mutation(i, t, tag);
mt->apply(std::move(m));
p_writetime[i] = t;
test_log.trace("updated key {}, {} @{}", i, tag, t);
++mutations;
later().get();
}
test_log.trace("mutated whole ring");
++mutation_phase;
// FIXME: mutate concurrently with flush
flush();
}
struct reader {
dht::partition_range pr;
query::partition_slice slice;
flat_mutation_reader rd;
};
std::unique_ptr<reader> make_reader(dht::partition_range pr, query::partition_slice slice) {
test_log.trace("making reader, pk={} ck={}", pr, slice);
auto r = std::make_unique<reader>(reader{std::move(pr), std::move(slice), make_empty_flat_reader(s.schema())});
std::vector<flat_mutation_reader> rd;
if (prev_mt) {
rd.push_back(prev_mt->make_flat_reader(s.schema(), r->pr, r->slice));
}
rd.push_back(mt->make_flat_reader(s.schema(), r->pr, r->slice));
rd.push_back(cache.make_reader(s.schema(), r->pr, r->slice));
r->rd = make_combined_reader(s.schema(), std::move(rd), streamed_mutation::forwarding::yes, mutation_reader::forwarding::no);
return r;
}
std::unique_ptr<reader> make_single_key_reader(int pk, int_range ck_range) {
++reads_started;
auto slice = partition_slice_builder(*s.schema())
.with_range(ck_range.transform([this] (int key) { return c_keys[key]; }))
.build();
auto pr = dht::partition_range::make_singular(p_keys[pk]);
return make_reader(std::move(pr), std::move(slice));
}
std::unique_ptr<reader> make_scanning_reader() {
++scans_started;
return make_reader(query::full_partition_range, s.schema()->full_slice());
}
};
struct reader_id {
sstring name;
friend std::ostream& operator<<(std::ostream& out, reader_id id) {
return out << id.name;
}
};
class validating_consumer {
table& _t;
reader_id _id;
stdx::optional<sstring> _value;
size_t _row_count = 0;
size_t _key = 0;
std::vector<api::timestamp_type> _writetimes;
public:
validating_consumer(table& t, reader_id id)
: _t(t)
, _id(id)
, _writetimes(t.p_writetime)
{ }
void consume_new_partition(const dht::decorated_key& key) {
test_log.trace("reader {}: enters partition {}", _id, key);
_value = {};
_key = _t.index_of_key(key);
}
stop_iteration consume_end_of_partition() { return stop_iteration::no; }
stop_iteration consume(tombstone) { return stop_iteration::no; }
stop_iteration consume(const static_row&) { return stop_iteration::no; }
stop_iteration consume(const range_tombstone&) { return stop_iteration::no; }
stop_iteration consume(const clustering_row& row) {
++_row_count;
sstring value;
api::timestamp_type t;
std::tie(value, t) = _t.s.get_value(row);
test_log.trace("reader {}: {} @{}, {}", _id, value, t, row);
if (_value && value != _value) {
throw std::runtime_error(sprint("Saw values from two different writes in partition %d: %s and %s", _key, _value, value));
}
auto lowest_timestamp = _writetimes[_key];
if (t < lowest_timestamp) {
throw std::runtime_error(sprint("Expected to see the write @%d, but saw @%d (%s), c_key=%s", lowest_timestamp, t, value, row.key()));
}
_value = std::move(value);
return stop_iteration::no;
}
size_t consume_end_of_stream() {
test_log.trace("reader {}: done, {} rows", _id, _row_count);
return _row_count;
}
};
template<typename T>
class monotonic_counter {
std::function<T()> _getter;
T _prev;
public:
monotonic_counter(std::function<T()> getter)
: _getter(std::move(getter)) {
_prev = _getter();
}
// Return change in value since the last call to change() or rate().
auto change() {
auto now = _getter();
return now - std::exchange(_prev, now);
}
};
}
using namespace row_cache_stress_test;
int main(int argc, char** argv) {
namespace bpo = boost::program_options;
app_template app;
app.add_options()
("trace", "Enables trace-level logging for the test actions")
("concurrency", bpo::value<unsigned>()->default_value(10), "Number of concurrent single partition readers")
("scan-concurrency", bpo::value<unsigned>()->default_value(2), "Number of concurrent ring scanners")
("partitions", bpo::value<unsigned>()->default_value(10), "Number of partitions")
("rows", bpo::value<unsigned>()->default_value(10000), "Number of rows in each partitions")
("seconds", bpo::value<unsigned>()->default_value(600), "Duration [s] after which the test terminates with a success")
;
return app.run(argc, argv, [&app] {
if (app.configuration().count("trace")) {
test_log.set_level(seastar::log_level::trace);
}
return seastar::async([&app] {
auto concurrency = app.configuration()["concurrency"].as<unsigned>();
auto scan_concurrency = app.configuration()["scan-concurrency"].as<unsigned>();
auto partitions = app.configuration()["partitions"].as<unsigned>();
auto rows = app.configuration()["rows"].as<unsigned>();
auto seconds = app.configuration()["seconds"].as<unsigned>();
row_cache_stress_test::table t(partitions, rows);
engine().at_exit([] {
cancelled = true;
return make_ready_future();
});
timer<> completion_timer;
completion_timer.set_callback([&] {
test_log.info("Test done.");
cancelled = true;
});
completion_timer.arm(std::chrono::seconds(seconds));
auto fail = [&] (sstring msg) {
test_log.error("{}", msg);
cancelled = true;
completion_timer.cancel();
};
// Stats printer
timer<> stats_printer;
monotonic_counter<uint64_t> reads([&] { return t.reads_started; });
monotonic_counter<uint64_t> scans([&] { return t.scans_started; });
monotonic_counter<uint64_t> mutations([&] { return t.mutations; });
monotonic_counter<uint64_t> flushes([&] { return t.mutation_phase; });
stats_printer.set_callback([&] {
auto MB = 1024 * 1024;
test_log.info("reads/s: {}, scans/s: {}, mutations/s: {}, flushes/s: {}, Cache: {}/{} [MB], LSA: {}/{} [MB], std free: {} [MB]",
reads.change(), scans.change(), mutations.change(), flushes.change(),
t.tracker.region().occupancy().used_space() / MB,
t.tracker.region().occupancy().total_space() / MB,
logalloc::shard_tracker().region_occupancy().used_space() / MB,
logalloc::shard_tracker().region_occupancy().total_space() / MB,
seastar::memory::stats().free_memory() / MB);
});
stats_printer.arm_periodic(1s);
auto single_partition_reader = [&] (int i, reader_id id) {
auto n_keys = t.c_keys.size();
// Assign ranges so that there is ~30% overlap between adjacent readers.
auto len = div_ceil(n_keys, concurrency);
len = std::min(n_keys, len + div_ceil(len, 3)); // so that read ranges overlap
auto start = (n_keys - len) * i / (std::max(concurrency - 1, 1u));
int_range ck_range = make_int_range(start, start + len);
int pk = t.p_keys.size() / 2; // FIXME: spread over 3 consecutive partitions
test_log.info("{} is using pk={} ck={}", id, pk, ck_range);
while (!cancelled) {
test_log.trace("{}: starting read", id);
auto rd = t.make_single_key_reader(pk, ck_range);
auto row_count = rd->rd.consume(validating_consumer(t, id)).get0();
if (row_count != len) {
throw std::runtime_error(sprint("Expected %d fragments, got %d", len, row_count));
}
}
};
auto scanning_reader = [&] (reader_id id) {
auto expected_row_count = t.p_keys.size() * t.c_keys.size();
while (!cancelled) {
test_log.trace("{}: starting read", id);
auto rd = t.make_scanning_reader();
auto row_count = rd->rd.consume(validating_consumer(t, id)).get0();
if (row_count != expected_row_count) {
throw std::runtime_error(sprint("Expected %d fragments, got %d", expected_row_count, row_count));
}
}
};
// populate the initial phase, readers expect constant fragment count.
t.mutate_next_phase();
auto readers = parallel_for_each(boost::irange(0u, concurrency), [&] (auto i) {
reader_id id{sprint("single-%d", i)};
return seastar::async([&, i, id] {
single_partition_reader(i, id);
}).handle_exception([&, id] (auto e) {
fail(sprint("%s failed: %s", id, e));
});
});
auto scanning_readers = parallel_for_each(boost::irange(0u, scan_concurrency), [&] (auto i) {
reader_id id{sprint("scan-%d", i)};
return seastar::async([&, id] {
scanning_reader(id);
}).handle_exception([&, id] (auto e) {
fail(sprint("%s failed: %s", id, e));
});
});
timer<> evictor;
evictor.set_callback([&] {
test_log.trace("evicting");
t.cache.evict();
});
evictor.arm_periodic(3s);
// Mutator
while (!cancelled) {
t.mutate_next_phase();
}
stats_printer.cancel();
completion_timer.cancel();
evictor.cancel();
readers.get();
scanning_readers.get();
});
});
}