From 79db076f202ee42f95ebf609c78f60dcaf009a39 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Thu, 27 Aug 2015 08:51:14 -0500 Subject: [PATCH 1/2] sstables: allow specification of buffer_size for data_stream For sstable reads, bigger buffers are not always better, because it can be the case that we want to read just a piece of data. However, as it so happens, we have already two variants for read: when we want to read a single key, we will use read_row(), which will try to bring all data in: so it will use smaller buffer. For read_rows(), that will naturally span multiple buffers, we have end and start points: with that, we can have a good estimation of the expected buffer size, and we can have it go up until we reaches the 128k limit we have for writes. Before: 209578.62 +- 135.73 partitions / sec (30 runs, 1 concurrent ops) After: 291703.98 +- 218.95 partitions / sec (30 runs, 1 concurrent ops) Gain: 39.19 % Signed-off-by: Glauber Costa --- sstables/row.cc | 3 ++- sstables/sstables.cc | 4 ++-- sstables/sstables.hh | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/sstables/row.cc b/sstables/row.cc index e67efcd17a..8fcc378992 100644 --- a/sstables/row.cc +++ b/sstables/row.cc @@ -532,8 +532,9 @@ future<> data_consume_context::read() { data_consume_context sstable::data_consume_rows( row_consumer& consumer, uint64_t start, uint64_t end) { + auto estimated_size = std::min(uint64_t(sstable_buffer_size), align_up(end - start, uint64_t(8 << 10))); return std::make_unique( - consumer, data_stream_at(start), end - start); + consumer, data_stream_at(start, estimated_size), end - start); } data_consume_context sstable::data_consume_rows(row_consumer& consumer) { diff --git a/sstables/sstables.cc b/sstables/sstables.cc index 8713239c8a..bd7652d665 100644 --- a/sstables/sstables.cc +++ b/sstables/sstables.cc @@ -1470,12 +1470,12 @@ sstable::component_type sstable::component_from_sstring(sstring &s) { return reverse_map(s, _component_map); } -input_stream sstable::data_stream_at(uint64_t pos) { +input_stream sstable::data_stream_at(uint64_t pos, uint64_t buf_size) { if (_compression) { return make_compressed_file_input_stream( _data_file, &_compression, pos); } else { - return make_file_input_stream(_data_file, pos); + return make_file_input_stream(_data_file, pos, buf_size); } } diff --git a/sstables/sstables.hh b/sstables/sstables.hh index 27e4b96d86..69e53068cd 100644 --- a/sstables/sstables.hh +++ b/sstables/sstables.hh @@ -334,7 +334,7 @@ private: future read_indexes(uint64_t summary_idx); - input_stream data_stream_at(uint64_t pos); + input_stream data_stream_at(uint64_t pos, uint64_t buf_size = 8192); // Read exactly the specific byte range from the data file (after // uncompression, if the file is compressed). This can be used to read From 74a4843d2ae88bda568c3c008e4ea16c0efa3576 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Wed, 26 Aug 2015 11:40:30 -0500 Subject: [PATCH 2/2] sstable index: try to read everything in a single run We know the correct boundaries now, so we can use that information to feed the default buffer size: if it is small enough (smaller than 128k), we can try to bring everything at once. For the default key sized 128 that we use in the index read perf: (smp == 1, partitions = 500000, concurrency == 1) Before: 423493.26 +- 811.03 partitions / sec (30 runs, 1 concurrent ops) After: 707311.86 +- 1865.47 partitions / sec (30 runs, 1 concurrent ops) For a gain of 67 %. Signed-off-by: Glauber Costa --- sstables/sstables.cc | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/sstables/sstables.cc b/sstables/sstables.cc index bd7652d665..59a33c0f21 100644 --- a/sstables/sstables.cc +++ b/sstables/sstables.cc @@ -21,6 +21,7 @@ #include "unimplemented.hh" #include #include +#include namespace sstables { @@ -752,14 +753,23 @@ future sstable::read_indexes(uint64_t summary_idx) { uint64_t position = _summary.entries[summary_idx].position; uint64_t quantity = _summary.header.sampling_level; + uint64_t estimated_size; + if (++summary_idx >= _summary.header.size) { + estimated_size = index_size() - position; + } else { + estimated_size = _summary.entries[summary_idx].position - position; + } + + estimated_size = std::min(uint64_t(sstable_buffer_size), align_up(estimated_size, uint64_t(8 << 10))); + struct reader { uint64_t count = 0; std::vector indexes; shared_file_random_access_reader stream; - reader(file f, uint64_t quantity) : stream(f) { indexes.reserve(quantity); } + reader(file f, uint64_t quantity, uint64_t estimated_size) : stream(f, estimated_size) { indexes.reserve(quantity); } }; - auto r = make_lw_shared(_index_file, quantity); + auto r = make_lw_shared(_index_file, quantity, estimated_size); r->stream.seek(position);