Merge "Buffer related read performance improvement" from Glauber

"As we could see, the flamegraphs shows a lot of performance still left in the
table.  However, from the I/O point of view, we have determined through our
write performance testing, that 128k is the sweet spot for buffers. Worse yet:
reads are still trapped at 8k.

While it is true that when we want to read just a little data, smaller is
better, it is also true that reads (and now that includes the index), tend to
give hints about the size they want read.

So we can read the whole thing at once if smaller than 128k, or chop it at 128k
increments if they are not.

The performance gains coming from doing this are considerable: 39 % for data,
67 % for index."
This commit is contained in:
Avi Kivity
2015-08-27 18:07:27 +03:00
3 changed files with 17 additions and 6 deletions

View File

@@ -532,8 +532,9 @@ future<> data_consume_context::read() {
data_consume_context sstable::data_consume_rows(
row_consumer& consumer, uint64_t start, uint64_t end) {
auto estimated_size = std::min(uint64_t(sstable_buffer_size), align_up(end - start, uint64_t(8 << 10)));
return std::make_unique<data_consume_context::impl>(
consumer, data_stream_at(start), end - start);
consumer, data_stream_at(start, estimated_size), end - start);
}
data_consume_context sstable::data_consume_rows(row_consumer& consumer) {

View File

@@ -21,6 +21,7 @@
#include "unimplemented.hh"
#include <boost/algorithm/string.hpp>
#include <regex>
#include <core/align.hh>
namespace sstables {
@@ -752,14 +753,23 @@ future<index_list> sstable::read_indexes(uint64_t summary_idx) {
uint64_t position = _summary.entries[summary_idx].position;
uint64_t quantity = _summary.header.sampling_level;
uint64_t estimated_size;
if (++summary_idx >= _summary.header.size) {
estimated_size = index_size() - position;
} else {
estimated_size = _summary.entries[summary_idx].position - position;
}
estimated_size = std::min(uint64_t(sstable_buffer_size), align_up(estimated_size, uint64_t(8 << 10)));
struct reader {
uint64_t count = 0;
std::vector<index_entry> indexes;
shared_file_random_access_reader stream;
reader(file f, uint64_t quantity) : stream(f) { indexes.reserve(quantity); }
reader(file f, uint64_t quantity, uint64_t estimated_size) : stream(f, estimated_size) { indexes.reserve(quantity); }
};
auto r = make_lw_shared<reader>(_index_file, quantity);
auto r = make_lw_shared<reader>(_index_file, quantity, estimated_size);
r->stream.seek(position);
@@ -1470,12 +1480,12 @@ sstable::component_type sstable::component_from_sstring(sstring &s) {
return reverse_map(s, _component_map);
}
input_stream<char> sstable::data_stream_at(uint64_t pos) {
input_stream<char> sstable::data_stream_at(uint64_t pos, uint64_t buf_size) {
if (_compression) {
return make_compressed_file_input_stream(
_data_file, &_compression, pos);
} else {
return make_file_input_stream(_data_file, pos);
return make_file_input_stream(_data_file, pos, buf_size);
}
}

View File

@@ -334,7 +334,7 @@ private:
future<index_list> read_indexes(uint64_t summary_idx);
input_stream<char> data_stream_at(uint64_t pos);
input_stream<char> data_stream_at(uint64_t pos, uint64_t buf_size = 8192);
// Read exactly the specific byte range from the data file (after
// uncompression, if the file is compressed). This can be used to read