sstables: allow specification of buffer_size for data_stream
For sstable reads, bigger buffers are not always better, because it can be the case that we want to read just a piece of data. However, as it so happens, we have already two variants for read: when we want to read a single key, we will use read_row(), which will try to bring all data in: so it will use smaller buffer. For read_rows(), that will naturally span multiple buffers, we have end and start points: with that, we can have a good estimation of the expected buffer size, and we can have it go up until we reaches the 128k limit we have for writes. Before: 209578.62 +- 135.73 partitions / sec (30 runs, 1 concurrent ops) After: 291703.98 +- 218.95 partitions / sec (30 runs, 1 concurrent ops) Gain: 39.19 % Signed-off-by: Glauber Costa <glommer@cloudius-systems.com>
This commit is contained in:
@@ -532,8 +532,9 @@ future<> data_consume_context::read() {
|
||||
|
||||
data_consume_context sstable::data_consume_rows(
|
||||
row_consumer& consumer, uint64_t start, uint64_t end) {
|
||||
auto estimated_size = std::min(uint64_t(sstable_buffer_size), align_up(end - start, uint64_t(8 << 10)));
|
||||
return std::make_unique<data_consume_context::impl>(
|
||||
consumer, data_stream_at(start), end - start);
|
||||
consumer, data_stream_at(start, estimated_size), end - start);
|
||||
}
|
||||
|
||||
data_consume_context sstable::data_consume_rows(row_consumer& consumer) {
|
||||
|
||||
@@ -1470,12 +1470,12 @@ sstable::component_type sstable::component_from_sstring(sstring &s) {
|
||||
return reverse_map(s, _component_map);
|
||||
}
|
||||
|
||||
input_stream<char> sstable::data_stream_at(uint64_t pos) {
|
||||
input_stream<char> sstable::data_stream_at(uint64_t pos, uint64_t buf_size) {
|
||||
if (_compression) {
|
||||
return make_compressed_file_input_stream(
|
||||
_data_file, &_compression, pos);
|
||||
} else {
|
||||
return make_file_input_stream(_data_file, pos);
|
||||
return make_file_input_stream(_data_file, pos, buf_size);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -334,7 +334,7 @@ private:
|
||||
|
||||
future<index_list> read_indexes(uint64_t summary_idx);
|
||||
|
||||
input_stream<char> data_stream_at(uint64_t pos);
|
||||
input_stream<char> data_stream_at(uint64_t pos, uint64_t buf_size = 8192);
|
||||
|
||||
// Read exactly the specific byte range from the data file (after
|
||||
// uncompression, if the file is compressed). This can be used to read
|
||||
|
||||
Reference in New Issue
Block a user