diff --git a/configure.py b/configure.py index 2c4531b3f0..87a3687155 100755 --- a/configure.py +++ b/configure.py @@ -122,7 +122,7 @@ urchin_tests = [ 'tests/perf/perf_hash', 'tests/perf/perf_cql_parser', 'tests/perf/perf_simple_query', - 'tests/perf/perf_sstable_index', + 'tests/perf/perf_sstable', 'tests/cql_query_test', 'tests/storage_proxy_test', 'tests/mutation_reader_test', @@ -398,7 +398,7 @@ deps = { for t in urchin_tests: deps[t] = urchin_tests_dependencies + [t + '.cc'] - if 'types_test' not in t and 'keys_test' not in t and 'partitioner_test' not in t and 'map_difference_test' not in t and 'frozen_mutation_test' not in t and 'perf_mutation' not in t and 'cartesian_product_test' not in t and 'perf_hash' not in t and 'perf_cql_parser' not in t and 'message' not in t and 'perf_simple_query' not in t and 'serialization' not in t and t != 'tests/gossip' and 'compound_test' not in t and 'range_test' not in t and 'crc_test' not in t and 'perf_sstable_index' not in t: + if 'types_test' not in t and 'keys_test' not in t and 'partitioner_test' not in t and 'map_difference_test' not in t and 'frozen_mutation_test' not in t and 'perf_mutation' not in t and 'cartesian_product_test' not in t and 'perf_hash' not in t and 'perf_cql_parser' not in t and 'message' not in t and 'perf_simple_query' not in t and 'serialization' not in t and t != 'tests/gossip' and 'compound_test' not in t and 'range_test' not in t and 'crc_test' not in t and 'perf_sstable' not in t: deps[t] += urchin_tests_seastar_deps deps['tests/sstable_test'] += ['tests/sstable_datafile_test.cc'] diff --git a/tests/perf/perf_sstable.cc b/tests/perf/perf_sstable.cc new file mode 100644 index 0000000000..0d0609a34e --- /dev/null +++ b/tests/perf/perf_sstable.cc @@ -0,0 +1,113 @@ +/* + * Copyright 2015 Cloudius Systems + */ + +#include +#include +#include +#include +#include +#include "perf_sstable.hh" + +using namespace sstables; + +static unsigned iterations = 30; +static unsigned parallelism = 1; + +future<> test_write(distributed& dt) { + return dt.invoke_on_all([] (test_env &t) { + t.fill_memtable(); + }).then([&dt] { + return time_runs(iterations, parallelism, dt, &test_env::flush_memtable); + }); +} + +future<> test_index_read(distributed& dt) { + return time_runs(iterations, parallelism, dt, &test_env::read_all_indexes); +} + +future<> test_sequential_read(distributed& dt) { + return time_runs(iterations, parallelism, dt, &test_env::read_sequential_partitions); +} + +enum class test_modes { + sequential_read, + index_read, + write, + index_write, +}; + +static std::unordered_map test_mode = { + {"sequential_read", test_modes::sequential_read }, + {"index_read", test_modes::index_read }, + {"write", test_modes::write }, + {"index_write", test_modes::index_write }, +}; + +int main(int argc, char** argv) { + namespace bpo = boost::program_options; + app_template app; + app.add_options() + ("parallelism", bpo::value()->default_value(1), "number parallel requests") + ("iterations", bpo::value()->default_value(30), "number of iterations") + ("partitions", bpo::value()->default_value(5000000), "number of partitions") + ("buffer_size", bpo::value()->default_value(64), "sstable buffer size, in KB") + ("key_size", bpo::value()->default_value(128), "size of partition key") + ("num_columns", bpo::value()->default_value(5), "number of columns per row") + ("column_size", bpo::value()->default_value(64), "size in bytes for each column") + ("mode", bpo::value()->default_value("index_write"), "one of: random_read, sequential_read, index_read, write, index_write (default)") + ("testdir", bpo::value()->default_value("/var/lib/cassandra/perf-tests"), "directory in which to store the sstables"); + + return app.run(argc, argv, [&app] { + auto test = make_lw_shared>(); + + auto cfg = test_env::conf(); + iterations = app.configuration()["iterations"].as(); + parallelism = app.configuration()["parallelism"].as(); + cfg.partitions = app.configuration()["partitions"].as(); + cfg.key_size = app.configuration()["key_size"].as(); + cfg.buffer_size = app.configuration()["buffer_size"].as() << 10; + sstring dir = app.configuration()["testdir"].as(); + cfg.dir = dir; + auto mode = test_mode[app.configuration()["mode"].as()]; + if ((mode == test_modes::index_read) || (mode == test_modes::index_write)) { + cfg.num_columns = 0; + cfg.column_size = 0; + } else { + cfg.num_columns = app.configuration()["num_columns"].as(); + cfg.column_size = app.configuration()["column_size"].as(); + } + return test->start(std::move(cfg)).then([mode, dir, test] { + engine().at_exit([test] { return test->stop(); }); + if ((mode == test_modes::index_read) || + (mode == test_modes::sequential_read)) { + return test->invoke_on_all([] (test_env &t) { + return t.load_sstables(iterations); + }).then_wrapped([] (future<> f) { + try { + f.get(); + } catch (...) { + std::cerr << "An error occurred when trying to load test sstables. Did you run write mode yet?" << std::endl; + throw; + } + }); + } else if ((mode == test_modes::index_write) || (mode == test_modes::write)) { + return test_setup::create_empty_test_dir(dir); + } else { + throw std::invalid_argument("Invalid mode"); + } + }).then([test, mode] { + if (mode == test_modes::index_read) { + return test_index_read(*test).then([test] {}); + } else if (mode == test_modes::sequential_read) { + return test_sequential_read(*test).then([test] {}); + } else if ((mode == test_modes::index_write) || (mode == test_modes::write)) { + return test_write(*test).then([test] {}); + } else { + throw std::invalid_argument("Invalid mode"); + } + }).then([] { + return engine().exit(0); + }).or_terminate(); + }); +} diff --git a/tests/perf/perf_sstable.hh b/tests/perf/perf_sstable.hh index 0a30db7076..129cb2bb2e 100644 --- a/tests/perf/perf_sstable.hh +++ b/tests/perf/perf_sstable.hh @@ -5,6 +5,7 @@ #pragma once #include "../sstable_test.hh" #include "sstables/sstables.hh" +#include "mutation_reader.hh" #include #include #include @@ -16,6 +17,8 @@ public: struct conf { unsigned partitions; unsigned key_size; + unsigned num_columns; + unsigned column_size; size_t buffer_size; sstring dir; }; @@ -25,12 +28,20 @@ private: return _cfg.dir + "/" + to_sstring(engine().cpu_id()); } - sstring random_key() { - sstring key(sstring::initialized_later{}, size_t(_cfg.key_size)); - for (auto& b: key) { + sstring random_string(unsigned size) { + sstring str(sstring::initialized_later{}, size_t(size)); + for (auto& b: str) { b = _distribution(_generator); } - return key; + return str; + } + + sstring random_key() { + return random_string(_cfg.key_size); + } + + sstring random_column() { + return random_string(_cfg.column_size); } conf _cfg; @@ -40,9 +51,33 @@ private: lw_shared_ptr _mt; std::vector> _sst; + schema_ptr create_schema() { + std::vector columns; + + for (unsigned i = 0; i < _cfg.num_columns; ++i) { + columns.push_back(schema::column{ to_bytes(sprint("column%04d", i)), utf8_type }); + } + + schema_builder builder(make_lw_shared(schema(generate_legacy_id("ks", "perf-test"), "ks", "perf-test", + // partition key + {{"name", utf8_type}}, + // clustering key + {}, + // regular columns + { columns }, + // static columns + {}, + // regular column name type + utf8_type, + // comment + "Perf tests" + ))); + return builder.build(schema_builder::compact_storage::no); + } + public: test_env(conf cfg) : _cfg(std::move(cfg)) - , s(uncompressed_schema()) + , s(create_schema()) , _distribution('@', '~') , _mt(make_lw_shared(s)) {} @@ -52,10 +87,19 @@ public: void fill_memtable() { for (unsigned i = 0; i < _cfg.partitions; i++) { auto key = partition_key::from_deeply_exploded(*s, { boost::any(random_key()) }); - _mt->apply(mutation(key, s)); + auto mut = mutation(key, s); + for (auto& cdef: s->regular_columns()) { + mut.set_clustered_cell(clustering_key::make_empty(*s), cdef, atomic_cell::make_live(0, utf8_type->decompose(random_column()))); + } + _mt->apply(std::move(mut)); } } + future<> load_sstables(unsigned iterations) { + _sst.push_back(make_lw_shared("ks", "cf", this->dir(), 0, sstable::version_types::ka, sstable::format_types::big)); + return _sst.back()->load(); + } + using clk = std::chrono::high_resolution_clock; static auto now() { return clk::now(); @@ -75,6 +119,51 @@ public: return partitions / duration; }); } + + future read_all_indexes(int idx) { + return do_with(test(_sst[0]), [] (auto& sst) { + auto start = test_env::now(); + auto total = make_lw_shared(0); + auto& summary = sst.get_summary(); + auto idx = boost::irange(0, int(summary.header.size)); + + return do_for_each(idx.begin(), idx.end(), [&sst, total] (uint64_t entry) { + return sst.read_indexes(entry).then([total] (auto il) { + *total += il.size(); + }); + }).then([total, start] { + auto end = test_env::now(); + auto duration = std::chrono::duration(end - start).count(); + return *total / duration; + }); + }); + } + + future read_sequential_partitions(int idx) { + return do_with(_sst[0]->read_rows(s), [this] (sstables::mutation_reader& r) { + auto start = test_env::now(); + auto total = make_lw_shared(0); + auto done = make_lw_shared(false); + return do_until([done] { return *done; }, [this, done, total, &r] { + return r.read().then([this, done, total] (mutation_opt m) { + if (!m) { + *done = true; + } else { + auto row = m->partition().find_row(clustering_key::make_empty(*s)); + if (!row || row->size() != _cfg.num_columns) { + throw std::invalid_argument("Invalid sstable found. Maybe you ran write mode with different num_columns settings?"); + } else { + (*total)++; + } + } + }); + }).then([total, start] { + auto end = test_env::now(); + auto duration = std::chrono::duration(end - start).count(); + return *total / duration; + }); + }); + } }; // The function func should carry on with the test, and return the number of partitions processed. diff --git a/tests/perf/perf_sstable_index.cc b/tests/perf/perf_sstable_index.cc deleted file mode 100644 index 0c0c10c7e4..0000000000 --- a/tests/perf/perf_sstable_index.cc +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Copyright 2015 Cloudius Systems - */ - -#include -#include -#include -#include -#include -#include "perf_sstable.hh" - -using namespace sstables; - -static unsigned iterations = 30; -static unsigned parallelism = 1; - -future<> test_write(distributed& dt) { - return dt.invoke_on_all([] (test_env &t) { - t.fill_memtable(); - }).then([&dt] { - return time_runs(iterations, parallelism, dt, &test_env::flush_memtable); - }); -} - -int main(int argc, char** argv) { - namespace bpo = boost::program_options; - app_template app; - app.add_options() - ("parallelism", bpo::value()->default_value(1), "number parallel requests") - ("iterations", bpo::value()->default_value(30), "number of iterations") - ("partitions", bpo::value()->default_value(5000000), "number of partitions") - ("buffer_size", bpo::value()->default_value(64), "sstable buffer size, in KB") - ("key_size", bpo::value()->default_value(128), "size of partition key") - ("testdir", bpo::value()->default_value("/var/lib/cassandra/perf-tests"), "directory in which to store the sstables"); - - return app.run(argc, argv, [&app] { - auto test = make_lw_shared>(); - - auto cfg = test_env::conf(); - iterations = app.configuration()["iterations"].as(); - parallelism = app.configuration()["parallelism"].as(); - cfg.partitions = app.configuration()["partitions"].as(); - cfg.key_size = app.configuration()["key_size"].as(); - cfg.buffer_size = app.configuration()["buffer_size"].as() << 10; - sstring dir = app.configuration()["testdir"].as(); - cfg.dir = dir; - return test->start(std::move(cfg)).then([dir, test] { - engine().at_exit([test] { return test->stop(); }); - return test_setup::create_empty_test_dir(dir); - }).then([test] { - return test_write(*test).then([test] {}); - }).then([] { - return engine().exit(0); - }).or_terminate(); - }); -}