mirror of
https://github.com/scylladb/scylladb.git
synced 2026-04-26 03:20:37 +00:00
Merge "New modes for sstable perf tests" from Glauber
"index_read, sequential_read, and write"
This commit is contained in:
@@ -122,7 +122,7 @@ urchin_tests = [
|
||||
'tests/perf/perf_hash',
|
||||
'tests/perf/perf_cql_parser',
|
||||
'tests/perf/perf_simple_query',
|
||||
'tests/perf/perf_sstable_index',
|
||||
'tests/perf/perf_sstable',
|
||||
'tests/cql_query_test',
|
||||
'tests/storage_proxy_test',
|
||||
'tests/mutation_reader_test',
|
||||
@@ -398,7 +398,7 @@ deps = {
|
||||
|
||||
for t in urchin_tests:
|
||||
deps[t] = urchin_tests_dependencies + [t + '.cc']
|
||||
if 'types_test' not in t and 'keys_test' not in t and 'partitioner_test' not in t and 'map_difference_test' not in t and 'frozen_mutation_test' not in t and 'perf_mutation' not in t and 'cartesian_product_test' not in t and 'perf_hash' not in t and 'perf_cql_parser' not in t and 'message' not in t and 'perf_simple_query' not in t and 'serialization' not in t and t != 'tests/gossip' and 'compound_test' not in t and 'range_test' not in t and 'crc_test' not in t and 'perf_sstable_index' not in t:
|
||||
if 'types_test' not in t and 'keys_test' not in t and 'partitioner_test' not in t and 'map_difference_test' not in t and 'frozen_mutation_test' not in t and 'perf_mutation' not in t and 'cartesian_product_test' not in t and 'perf_hash' not in t and 'perf_cql_parser' not in t and 'message' not in t and 'perf_simple_query' not in t and 'serialization' not in t and t != 'tests/gossip' and 'compound_test' not in t and 'range_test' not in t and 'crc_test' not in t and 'perf_sstable' not in t:
|
||||
deps[t] += urchin_tests_seastar_deps
|
||||
|
||||
deps['tests/sstable_test'] += ['tests/sstable_datafile_test.cc']
|
||||
|
||||
113
tests/perf/perf_sstable.cc
Normal file
113
tests/perf/perf_sstable.cc
Normal file
@@ -0,0 +1,113 @@
|
||||
/*
|
||||
* Copyright 2015 Cloudius Systems
|
||||
*/
|
||||
|
||||
#include <boost/test/unit_test.hpp>
|
||||
#include <core/distributed.hh>
|
||||
#include <core/app-template.hh>
|
||||
#include <core/sstring.hh>
|
||||
#include <random>
|
||||
#include "perf_sstable.hh"
|
||||
|
||||
using namespace sstables;
|
||||
|
||||
static unsigned iterations = 30;
|
||||
static unsigned parallelism = 1;
|
||||
|
||||
future<> test_write(distributed<test_env>& dt) {
|
||||
return dt.invoke_on_all([] (test_env &t) {
|
||||
t.fill_memtable();
|
||||
}).then([&dt] {
|
||||
return time_runs(iterations, parallelism, dt, &test_env::flush_memtable);
|
||||
});
|
||||
}
|
||||
|
||||
future<> test_index_read(distributed<test_env>& dt) {
|
||||
return time_runs(iterations, parallelism, dt, &test_env::read_all_indexes);
|
||||
}
|
||||
|
||||
future<> test_sequential_read(distributed<test_env>& dt) {
|
||||
return time_runs(iterations, parallelism, dt, &test_env::read_sequential_partitions);
|
||||
}
|
||||
|
||||
enum class test_modes {
|
||||
sequential_read,
|
||||
index_read,
|
||||
write,
|
||||
index_write,
|
||||
};
|
||||
|
||||
static std::unordered_map<sstring, test_modes> test_mode = {
|
||||
{"sequential_read", test_modes::sequential_read },
|
||||
{"index_read", test_modes::index_read },
|
||||
{"write", test_modes::write },
|
||||
{"index_write", test_modes::index_write },
|
||||
};
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
namespace bpo = boost::program_options;
|
||||
app_template app;
|
||||
app.add_options()
|
||||
("parallelism", bpo::value<unsigned>()->default_value(1), "number parallel requests")
|
||||
("iterations", bpo::value<unsigned>()->default_value(30), "number of iterations")
|
||||
("partitions", bpo::value<unsigned>()->default_value(5000000), "number of partitions")
|
||||
("buffer_size", bpo::value<unsigned>()->default_value(64), "sstable buffer size, in KB")
|
||||
("key_size", bpo::value<unsigned>()->default_value(128), "size of partition key")
|
||||
("num_columns", bpo::value<unsigned>()->default_value(5), "number of columns per row")
|
||||
("column_size", bpo::value<unsigned>()->default_value(64), "size in bytes for each column")
|
||||
("mode", bpo::value<sstring>()->default_value("index_write"), "one of: random_read, sequential_read, index_read, write, index_write (default)")
|
||||
("testdir", bpo::value<sstring>()->default_value("/var/lib/cassandra/perf-tests"), "directory in which to store the sstables");
|
||||
|
||||
return app.run(argc, argv, [&app] {
|
||||
auto test = make_lw_shared<distributed<test_env>>();
|
||||
|
||||
auto cfg = test_env::conf();
|
||||
iterations = app.configuration()["iterations"].as<unsigned>();
|
||||
parallelism = app.configuration()["parallelism"].as<unsigned>();
|
||||
cfg.partitions = app.configuration()["partitions"].as<unsigned>();
|
||||
cfg.key_size = app.configuration()["key_size"].as<unsigned>();
|
||||
cfg.buffer_size = app.configuration()["buffer_size"].as<unsigned>() << 10;
|
||||
sstring dir = app.configuration()["testdir"].as<sstring>();
|
||||
cfg.dir = dir;
|
||||
auto mode = test_mode[app.configuration()["mode"].as<sstring>()];
|
||||
if ((mode == test_modes::index_read) || (mode == test_modes::index_write)) {
|
||||
cfg.num_columns = 0;
|
||||
cfg.column_size = 0;
|
||||
} else {
|
||||
cfg.num_columns = app.configuration()["num_columns"].as<unsigned>();
|
||||
cfg.column_size = app.configuration()["column_size"].as<unsigned>();
|
||||
}
|
||||
return test->start(std::move(cfg)).then([mode, dir, test] {
|
||||
engine().at_exit([test] { return test->stop(); });
|
||||
if ((mode == test_modes::index_read) ||
|
||||
(mode == test_modes::sequential_read)) {
|
||||
return test->invoke_on_all([] (test_env &t) {
|
||||
return t.load_sstables(iterations);
|
||||
}).then_wrapped([] (future<> f) {
|
||||
try {
|
||||
f.get();
|
||||
} catch (...) {
|
||||
std::cerr << "An error occurred when trying to load test sstables. Did you run write mode yet?" << std::endl;
|
||||
throw;
|
||||
}
|
||||
});
|
||||
} else if ((mode == test_modes::index_write) || (mode == test_modes::write)) {
|
||||
return test_setup::create_empty_test_dir(dir);
|
||||
} else {
|
||||
throw std::invalid_argument("Invalid mode");
|
||||
}
|
||||
}).then([test, mode] {
|
||||
if (mode == test_modes::index_read) {
|
||||
return test_index_read(*test).then([test] {});
|
||||
} else if (mode == test_modes::sequential_read) {
|
||||
return test_sequential_read(*test).then([test] {});
|
||||
} else if ((mode == test_modes::index_write) || (mode == test_modes::write)) {
|
||||
return test_write(*test).then([test] {});
|
||||
} else {
|
||||
throw std::invalid_argument("Invalid mode");
|
||||
}
|
||||
}).then([] {
|
||||
return engine().exit(0);
|
||||
}).or_terminate();
|
||||
});
|
||||
}
|
||||
@@ -5,6 +5,7 @@
|
||||
#pragma once
|
||||
#include "../sstable_test.hh"
|
||||
#include "sstables/sstables.hh"
|
||||
#include "mutation_reader.hh"
|
||||
#include <boost/accumulators/accumulators.hpp>
|
||||
#include <boost/accumulators/statistics.hpp>
|
||||
#include <boost/range/irange.hpp>
|
||||
@@ -16,6 +17,8 @@ public:
|
||||
struct conf {
|
||||
unsigned partitions;
|
||||
unsigned key_size;
|
||||
unsigned num_columns;
|
||||
unsigned column_size;
|
||||
size_t buffer_size;
|
||||
sstring dir;
|
||||
};
|
||||
@@ -25,12 +28,20 @@ private:
|
||||
return _cfg.dir + "/" + to_sstring(engine().cpu_id());
|
||||
}
|
||||
|
||||
sstring random_key() {
|
||||
sstring key(sstring::initialized_later{}, size_t(_cfg.key_size));
|
||||
for (auto& b: key) {
|
||||
sstring random_string(unsigned size) {
|
||||
sstring str(sstring::initialized_later{}, size_t(size));
|
||||
for (auto& b: str) {
|
||||
b = _distribution(_generator);
|
||||
}
|
||||
return key;
|
||||
return str;
|
||||
}
|
||||
|
||||
sstring random_key() {
|
||||
return random_string(_cfg.key_size);
|
||||
}
|
||||
|
||||
sstring random_column() {
|
||||
return random_string(_cfg.column_size);
|
||||
}
|
||||
|
||||
conf _cfg;
|
||||
@@ -40,9 +51,33 @@ private:
|
||||
lw_shared_ptr<memtable> _mt;
|
||||
std::vector<lw_shared_ptr<sstable>> _sst;
|
||||
|
||||
schema_ptr create_schema() {
|
||||
std::vector<schema::column> columns;
|
||||
|
||||
for (unsigned i = 0; i < _cfg.num_columns; ++i) {
|
||||
columns.push_back(schema::column{ to_bytes(sprint("column%04d", i)), utf8_type });
|
||||
}
|
||||
|
||||
schema_builder builder(make_lw_shared(schema(generate_legacy_id("ks", "perf-test"), "ks", "perf-test",
|
||||
// partition key
|
||||
{{"name", utf8_type}},
|
||||
// clustering key
|
||||
{},
|
||||
// regular columns
|
||||
{ columns },
|
||||
// static columns
|
||||
{},
|
||||
// regular column name type
|
||||
utf8_type,
|
||||
// comment
|
||||
"Perf tests"
|
||||
)));
|
||||
return builder.build(schema_builder::compact_storage::no);
|
||||
}
|
||||
|
||||
public:
|
||||
test_env(conf cfg) : _cfg(std::move(cfg))
|
||||
, s(uncompressed_schema())
|
||||
, s(create_schema())
|
||||
, _distribution('@', '~')
|
||||
, _mt(make_lw_shared<memtable>(s))
|
||||
{}
|
||||
@@ -52,10 +87,19 @@ public:
|
||||
void fill_memtable() {
|
||||
for (unsigned i = 0; i < _cfg.partitions; i++) {
|
||||
auto key = partition_key::from_deeply_exploded(*s, { boost::any(random_key()) });
|
||||
_mt->apply(mutation(key, s));
|
||||
auto mut = mutation(key, s);
|
||||
for (auto& cdef: s->regular_columns()) {
|
||||
mut.set_clustered_cell(clustering_key::make_empty(*s), cdef, atomic_cell::make_live(0, utf8_type->decompose(random_column())));
|
||||
}
|
||||
_mt->apply(std::move(mut));
|
||||
}
|
||||
}
|
||||
|
||||
future<> load_sstables(unsigned iterations) {
|
||||
_sst.push_back(make_lw_shared<sstable>("ks", "cf", this->dir(), 0, sstable::version_types::ka, sstable::format_types::big));
|
||||
return _sst.back()->load();
|
||||
}
|
||||
|
||||
using clk = std::chrono::high_resolution_clock;
|
||||
static auto now() {
|
||||
return clk::now();
|
||||
@@ -75,6 +119,51 @@ public:
|
||||
return partitions / duration;
|
||||
});
|
||||
}
|
||||
|
||||
future<double> read_all_indexes(int idx) {
|
||||
return do_with(test(_sst[0]), [] (auto& sst) {
|
||||
auto start = test_env::now();
|
||||
auto total = make_lw_shared<size_t>(0);
|
||||
auto& summary = sst.get_summary();
|
||||
auto idx = boost::irange(0, int(summary.header.size));
|
||||
|
||||
return do_for_each(idx.begin(), idx.end(), [&sst, total] (uint64_t entry) {
|
||||
return sst.read_indexes(entry).then([total] (auto il) {
|
||||
*total += il.size();
|
||||
});
|
||||
}).then([total, start] {
|
||||
auto end = test_env::now();
|
||||
auto duration = std::chrono::duration<double>(end - start).count();
|
||||
return *total / duration;
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
future<double> read_sequential_partitions(int idx) {
|
||||
return do_with(_sst[0]->read_rows(s), [this] (sstables::mutation_reader& r) {
|
||||
auto start = test_env::now();
|
||||
auto total = make_lw_shared<size_t>(0);
|
||||
auto done = make_lw_shared<bool>(false);
|
||||
return do_until([done] { return *done; }, [this, done, total, &r] {
|
||||
return r.read().then([this, done, total] (mutation_opt m) {
|
||||
if (!m) {
|
||||
*done = true;
|
||||
} else {
|
||||
auto row = m->partition().find_row(clustering_key::make_empty(*s));
|
||||
if (!row || row->size() != _cfg.num_columns) {
|
||||
throw std::invalid_argument("Invalid sstable found. Maybe you ran write mode with different num_columns settings?");
|
||||
} else {
|
||||
(*total)++;
|
||||
}
|
||||
}
|
||||
});
|
||||
}).then([total, start] {
|
||||
auto end = test_env::now();
|
||||
auto duration = std::chrono::duration<double>(end - start).count();
|
||||
return *total / duration;
|
||||
});
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
// The function func should carry on with the test, and return the number of partitions processed.
|
||||
|
||||
@@ -1,56 +0,0 @@
|
||||
/*
|
||||
* Copyright 2015 Cloudius Systems
|
||||
*/
|
||||
|
||||
#include <boost/test/unit_test.hpp>
|
||||
#include <core/distributed.hh>
|
||||
#include <core/app-template.hh>
|
||||
#include <core/sstring.hh>
|
||||
#include <random>
|
||||
#include "perf_sstable.hh"
|
||||
|
||||
using namespace sstables;
|
||||
|
||||
static unsigned iterations = 30;
|
||||
static unsigned parallelism = 1;
|
||||
|
||||
future<> test_write(distributed<test_env>& dt) {
|
||||
return dt.invoke_on_all([] (test_env &t) {
|
||||
t.fill_memtable();
|
||||
}).then([&dt] {
|
||||
return time_runs(iterations, parallelism, dt, &test_env::flush_memtable);
|
||||
});
|
||||
}
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
namespace bpo = boost::program_options;
|
||||
app_template app;
|
||||
app.add_options()
|
||||
("parallelism", bpo::value<unsigned>()->default_value(1), "number parallel requests")
|
||||
("iterations", bpo::value<unsigned>()->default_value(30), "number of iterations")
|
||||
("partitions", bpo::value<unsigned>()->default_value(5000000), "number of partitions")
|
||||
("buffer_size", bpo::value<unsigned>()->default_value(64), "sstable buffer size, in KB")
|
||||
("key_size", bpo::value<unsigned>()->default_value(128), "size of partition key")
|
||||
("testdir", bpo::value<sstring>()->default_value("/var/lib/cassandra/perf-tests"), "directory in which to store the sstables");
|
||||
|
||||
return app.run(argc, argv, [&app] {
|
||||
auto test = make_lw_shared<distributed<test_env>>();
|
||||
|
||||
auto cfg = test_env::conf();
|
||||
iterations = app.configuration()["iterations"].as<unsigned>();
|
||||
parallelism = app.configuration()["parallelism"].as<unsigned>();
|
||||
cfg.partitions = app.configuration()["partitions"].as<unsigned>();
|
||||
cfg.key_size = app.configuration()["key_size"].as<unsigned>();
|
||||
cfg.buffer_size = app.configuration()["buffer_size"].as<unsigned>() << 10;
|
||||
sstring dir = app.configuration()["testdir"].as<sstring>();
|
||||
cfg.dir = dir;
|
||||
return test->start(std::move(cfg)).then([dir, test] {
|
||||
engine().at_exit([test] { return test->stop(); });
|
||||
return test_setup::create_empty_test_dir(dir);
|
||||
}).then([test] {
|
||||
return test_write(*test).then([test] {});
|
||||
}).then([] {
|
||||
return engine().exit(0);
|
||||
}).or_terminate();
|
||||
});
|
||||
}
|
||||
Reference in New Issue
Block a user