Merge "New modes for sstable perf tests" from Glauber

"index_read, sequential_read, and write"
This commit is contained in:
Avi Kivity
2015-08-27 17:26:42 +03:00
4 changed files with 210 additions and 64 deletions

View File

@@ -122,7 +122,7 @@ urchin_tests = [
'tests/perf/perf_hash',
'tests/perf/perf_cql_parser',
'tests/perf/perf_simple_query',
'tests/perf/perf_sstable_index',
'tests/perf/perf_sstable',
'tests/cql_query_test',
'tests/storage_proxy_test',
'tests/mutation_reader_test',
@@ -398,7 +398,7 @@ deps = {
for t in urchin_tests:
deps[t] = urchin_tests_dependencies + [t + '.cc']
if 'types_test' not in t and 'keys_test' not in t and 'partitioner_test' not in t and 'map_difference_test' not in t and 'frozen_mutation_test' not in t and 'perf_mutation' not in t and 'cartesian_product_test' not in t and 'perf_hash' not in t and 'perf_cql_parser' not in t and 'message' not in t and 'perf_simple_query' not in t and 'serialization' not in t and t != 'tests/gossip' and 'compound_test' not in t and 'range_test' not in t and 'crc_test' not in t and 'perf_sstable_index' not in t:
if 'types_test' not in t and 'keys_test' not in t and 'partitioner_test' not in t and 'map_difference_test' not in t and 'frozen_mutation_test' not in t and 'perf_mutation' not in t and 'cartesian_product_test' not in t and 'perf_hash' not in t and 'perf_cql_parser' not in t and 'message' not in t and 'perf_simple_query' not in t and 'serialization' not in t and t != 'tests/gossip' and 'compound_test' not in t and 'range_test' not in t and 'crc_test' not in t and 'perf_sstable' not in t:
deps[t] += urchin_tests_seastar_deps
deps['tests/sstable_test'] += ['tests/sstable_datafile_test.cc']

113
tests/perf/perf_sstable.cc Normal file
View File

@@ -0,0 +1,113 @@
/*
* Copyright 2015 Cloudius Systems
*/
#include <boost/test/unit_test.hpp>
#include <core/distributed.hh>
#include <core/app-template.hh>
#include <core/sstring.hh>
#include <random>
#include "perf_sstable.hh"
using namespace sstables;
static unsigned iterations = 30;
static unsigned parallelism = 1;
future<> test_write(distributed<test_env>& dt) {
return dt.invoke_on_all([] (test_env &t) {
t.fill_memtable();
}).then([&dt] {
return time_runs(iterations, parallelism, dt, &test_env::flush_memtable);
});
}
future<> test_index_read(distributed<test_env>& dt) {
return time_runs(iterations, parallelism, dt, &test_env::read_all_indexes);
}
future<> test_sequential_read(distributed<test_env>& dt) {
return time_runs(iterations, parallelism, dt, &test_env::read_sequential_partitions);
}
enum class test_modes {
sequential_read,
index_read,
write,
index_write,
};
static std::unordered_map<sstring, test_modes> test_mode = {
{"sequential_read", test_modes::sequential_read },
{"index_read", test_modes::index_read },
{"write", test_modes::write },
{"index_write", test_modes::index_write },
};
int main(int argc, char** argv) {
namespace bpo = boost::program_options;
app_template app;
app.add_options()
("parallelism", bpo::value<unsigned>()->default_value(1), "number parallel requests")
("iterations", bpo::value<unsigned>()->default_value(30), "number of iterations")
("partitions", bpo::value<unsigned>()->default_value(5000000), "number of partitions")
("buffer_size", bpo::value<unsigned>()->default_value(64), "sstable buffer size, in KB")
("key_size", bpo::value<unsigned>()->default_value(128), "size of partition key")
("num_columns", bpo::value<unsigned>()->default_value(5), "number of columns per row")
("column_size", bpo::value<unsigned>()->default_value(64), "size in bytes for each column")
("mode", bpo::value<sstring>()->default_value("index_write"), "one of: random_read, sequential_read, index_read, write, index_write (default)")
("testdir", bpo::value<sstring>()->default_value("/var/lib/cassandra/perf-tests"), "directory in which to store the sstables");
return app.run(argc, argv, [&app] {
auto test = make_lw_shared<distributed<test_env>>();
auto cfg = test_env::conf();
iterations = app.configuration()["iterations"].as<unsigned>();
parallelism = app.configuration()["parallelism"].as<unsigned>();
cfg.partitions = app.configuration()["partitions"].as<unsigned>();
cfg.key_size = app.configuration()["key_size"].as<unsigned>();
cfg.buffer_size = app.configuration()["buffer_size"].as<unsigned>() << 10;
sstring dir = app.configuration()["testdir"].as<sstring>();
cfg.dir = dir;
auto mode = test_mode[app.configuration()["mode"].as<sstring>()];
if ((mode == test_modes::index_read) || (mode == test_modes::index_write)) {
cfg.num_columns = 0;
cfg.column_size = 0;
} else {
cfg.num_columns = app.configuration()["num_columns"].as<unsigned>();
cfg.column_size = app.configuration()["column_size"].as<unsigned>();
}
return test->start(std::move(cfg)).then([mode, dir, test] {
engine().at_exit([test] { return test->stop(); });
if ((mode == test_modes::index_read) ||
(mode == test_modes::sequential_read)) {
return test->invoke_on_all([] (test_env &t) {
return t.load_sstables(iterations);
}).then_wrapped([] (future<> f) {
try {
f.get();
} catch (...) {
std::cerr << "An error occurred when trying to load test sstables. Did you run write mode yet?" << std::endl;
throw;
}
});
} else if ((mode == test_modes::index_write) || (mode == test_modes::write)) {
return test_setup::create_empty_test_dir(dir);
} else {
throw std::invalid_argument("Invalid mode");
}
}).then([test, mode] {
if (mode == test_modes::index_read) {
return test_index_read(*test).then([test] {});
} else if (mode == test_modes::sequential_read) {
return test_sequential_read(*test).then([test] {});
} else if ((mode == test_modes::index_write) || (mode == test_modes::write)) {
return test_write(*test).then([test] {});
} else {
throw std::invalid_argument("Invalid mode");
}
}).then([] {
return engine().exit(0);
}).or_terminate();
});
}

View File

@@ -5,6 +5,7 @@
#pragma once
#include "../sstable_test.hh"
#include "sstables/sstables.hh"
#include "mutation_reader.hh"
#include <boost/accumulators/accumulators.hpp>
#include <boost/accumulators/statistics.hpp>
#include <boost/range/irange.hpp>
@@ -16,6 +17,8 @@ public:
struct conf {
unsigned partitions;
unsigned key_size;
unsigned num_columns;
unsigned column_size;
size_t buffer_size;
sstring dir;
};
@@ -25,12 +28,20 @@ private:
return _cfg.dir + "/" + to_sstring(engine().cpu_id());
}
sstring random_key() {
sstring key(sstring::initialized_later{}, size_t(_cfg.key_size));
for (auto& b: key) {
sstring random_string(unsigned size) {
sstring str(sstring::initialized_later{}, size_t(size));
for (auto& b: str) {
b = _distribution(_generator);
}
return key;
return str;
}
sstring random_key() {
return random_string(_cfg.key_size);
}
sstring random_column() {
return random_string(_cfg.column_size);
}
conf _cfg;
@@ -40,9 +51,33 @@ private:
lw_shared_ptr<memtable> _mt;
std::vector<lw_shared_ptr<sstable>> _sst;
schema_ptr create_schema() {
std::vector<schema::column> columns;
for (unsigned i = 0; i < _cfg.num_columns; ++i) {
columns.push_back(schema::column{ to_bytes(sprint("column%04d", i)), utf8_type });
}
schema_builder builder(make_lw_shared(schema(generate_legacy_id("ks", "perf-test"), "ks", "perf-test",
// partition key
{{"name", utf8_type}},
// clustering key
{},
// regular columns
{ columns },
// static columns
{},
// regular column name type
utf8_type,
// comment
"Perf tests"
)));
return builder.build(schema_builder::compact_storage::no);
}
public:
test_env(conf cfg) : _cfg(std::move(cfg))
, s(uncompressed_schema())
, s(create_schema())
, _distribution('@', '~')
, _mt(make_lw_shared<memtable>(s))
{}
@@ -52,10 +87,19 @@ public:
void fill_memtable() {
for (unsigned i = 0; i < _cfg.partitions; i++) {
auto key = partition_key::from_deeply_exploded(*s, { boost::any(random_key()) });
_mt->apply(mutation(key, s));
auto mut = mutation(key, s);
for (auto& cdef: s->regular_columns()) {
mut.set_clustered_cell(clustering_key::make_empty(*s), cdef, atomic_cell::make_live(0, utf8_type->decompose(random_column())));
}
_mt->apply(std::move(mut));
}
}
future<> load_sstables(unsigned iterations) {
_sst.push_back(make_lw_shared<sstable>("ks", "cf", this->dir(), 0, sstable::version_types::ka, sstable::format_types::big));
return _sst.back()->load();
}
using clk = std::chrono::high_resolution_clock;
static auto now() {
return clk::now();
@@ -75,6 +119,51 @@ public:
return partitions / duration;
});
}
future<double> read_all_indexes(int idx) {
return do_with(test(_sst[0]), [] (auto& sst) {
auto start = test_env::now();
auto total = make_lw_shared<size_t>(0);
auto& summary = sst.get_summary();
auto idx = boost::irange(0, int(summary.header.size));
return do_for_each(idx.begin(), idx.end(), [&sst, total] (uint64_t entry) {
return sst.read_indexes(entry).then([total] (auto il) {
*total += il.size();
});
}).then([total, start] {
auto end = test_env::now();
auto duration = std::chrono::duration<double>(end - start).count();
return *total / duration;
});
});
}
future<double> read_sequential_partitions(int idx) {
return do_with(_sst[0]->read_rows(s), [this] (sstables::mutation_reader& r) {
auto start = test_env::now();
auto total = make_lw_shared<size_t>(0);
auto done = make_lw_shared<bool>(false);
return do_until([done] { return *done; }, [this, done, total, &r] {
return r.read().then([this, done, total] (mutation_opt m) {
if (!m) {
*done = true;
} else {
auto row = m->partition().find_row(clustering_key::make_empty(*s));
if (!row || row->size() != _cfg.num_columns) {
throw std::invalid_argument("Invalid sstable found. Maybe you ran write mode with different num_columns settings?");
} else {
(*total)++;
}
}
});
}).then([total, start] {
auto end = test_env::now();
auto duration = std::chrono::duration<double>(end - start).count();
return *total / duration;
});
});
}
};
// The function func should carry on with the test, and return the number of partitions processed.

View File

@@ -1,56 +0,0 @@
/*
* Copyright 2015 Cloudius Systems
*/
#include <boost/test/unit_test.hpp>
#include <core/distributed.hh>
#include <core/app-template.hh>
#include <core/sstring.hh>
#include <random>
#include "perf_sstable.hh"
using namespace sstables;
static unsigned iterations = 30;
static unsigned parallelism = 1;
future<> test_write(distributed<test_env>& dt) {
return dt.invoke_on_all([] (test_env &t) {
t.fill_memtable();
}).then([&dt] {
return time_runs(iterations, parallelism, dt, &test_env::flush_memtable);
});
}
int main(int argc, char** argv) {
namespace bpo = boost::program_options;
app_template app;
app.add_options()
("parallelism", bpo::value<unsigned>()->default_value(1), "number parallel requests")
("iterations", bpo::value<unsigned>()->default_value(30), "number of iterations")
("partitions", bpo::value<unsigned>()->default_value(5000000), "number of partitions")
("buffer_size", bpo::value<unsigned>()->default_value(64), "sstable buffer size, in KB")
("key_size", bpo::value<unsigned>()->default_value(128), "size of partition key")
("testdir", bpo::value<sstring>()->default_value("/var/lib/cassandra/perf-tests"), "directory in which to store the sstables");
return app.run(argc, argv, [&app] {
auto test = make_lw_shared<distributed<test_env>>();
auto cfg = test_env::conf();
iterations = app.configuration()["iterations"].as<unsigned>();
parallelism = app.configuration()["parallelism"].as<unsigned>();
cfg.partitions = app.configuration()["partitions"].as<unsigned>();
cfg.key_size = app.configuration()["key_size"].as<unsigned>();
cfg.buffer_size = app.configuration()["buffer_size"].as<unsigned>() << 10;
sstring dir = app.configuration()["testdir"].as<sstring>();
cfg.dir = dir;
return test->start(std::move(cfg)).then([dir, test] {
engine().at_exit([test] { return test->stop(); });
return test_setup::create_empty_test_dir(dir);
}).then([test] {
return test_write(*test).then([test] {});
}).then([] {
return engine().exit(0);
}).or_terminate();
});
}