From 79db076f202ee42f95ebf609c78f60dcaf009a39 Mon Sep 17 00:00:00 2001
From: Glauber Costa <glommer@cloudius-systems.com>
Date: Thu, 27 Aug 2015 08:51:14 -0500
Subject: [PATCH 1/2] sstables: allow specification of buffer_size for
 data_stream

For sstable reads, bigger buffers are not always better, because it can be
the case that we want to read just a piece of data.

However, as it so happens, we have already two variants for read: when we
want to read a single key, we will use read_row(), which will try to bring all
data in: so it will use smaller buffer.

For read_rows(), that will naturally span multiple buffers, we have end and
start points: with that, we can have a good estimation of the expected buffer
size, and we can have it go up until we reaches the 128k limit we have for
writes.

Before:
209578.62 +- 135.73 partitions / sec (30 runs, 1 concurrent ops)
After:
291703.98 +- 218.95 partitions / sec (30 runs, 1 concurrent ops)

Gain:
39.19 %

Signed-off-by: Glauber Costa <glommer@cloudius-systems.com>
---
 sstables/row.cc      | 3 ++-
 sstables/sstables.cc | 4 ++--
 sstables/sstables.hh | 2 +-
 3 files changed, 5 insertions(+), 4 deletions(-)
diff --git a/sstables/row.cc b/sstables/row.cc
index e67efcd17a..8fcc378992 100644
--- a/sstables/row.cc
+++ b/sstables/row.cc
@@ -532,8 +532,9 @@ future<> data_consume_context::read() {
 
 data_consume_context sstable::data_consume_rows(
         row_consumer& consumer, uint64_t start, uint64_t end) {
+    auto estimated_size = std::min(uint64_t(sstable_buffer_size), align_up(end - start, uint64_t(8 << 10)));
     return std::make_unique<data_consume_context::impl>(
-            consumer, data_stream_at(start), end - start);
+            consumer, data_stream_at(start, estimated_size), end - start);
 }
 
 data_consume_context sstable::data_consume_rows(row_consumer& consumer) {
diff --git a/sstables/sstables.cc b/sstables/sstables.cc
index 8713239c8a..bd7652d665 100644
--- a/sstables/sstables.cc
+++ b/sstables/sstables.cc
@@ -1470,12 +1470,12 @@ sstable::component_type sstable::component_from_sstring(sstring &s) {
     return reverse_map(s, _component_map);
 }
 
-input_stream<char> sstable::data_stream_at(uint64_t pos) {
+input_stream<char> sstable::data_stream_at(uint64_t pos, uint64_t buf_size) {
     if (_compression) {
         return make_compressed_file_input_stream(
                 _data_file, &_compression, pos);
     } else {
-        return make_file_input_stream(_data_file, pos);
+        return make_file_input_stream(_data_file, pos, buf_size);
     }
 }
 
diff --git a/sstables/sstables.hh b/sstables/sstables.hh
index 27e4b96d86..69e53068cd 100644
--- a/sstables/sstables.hh
+++ b/sstables/sstables.hh
@@ -334,7 +334,7 @@ private:
 
     future<index_list> read_indexes(uint64_t summary_idx);
 
-    input_stream<char> data_stream_at(uint64_t pos);
+    input_stream<char> data_stream_at(uint64_t pos, uint64_t buf_size = 8192);
 
     // Read exactly the specific byte range from the data file (after
     // uncompression, if the file is compressed). This can be used to read

From 74a4843d2ae88bda568c3c008e4ea16c0efa3576 Mon Sep 17 00:00:00 2001
From: Glauber Costa <glommer@cloudius-systems.com>
Date: Wed, 26 Aug 2015 11:40:30 -0500
Subject: [PATCH 2/2] sstable index: try to read everything in a single run

We know the correct boundaries now, so we can use that information to
feed the default buffer size: if it is small enough (smaller than 128k),
we can try to bring everything at once.

For the default key sized 128 that we use in the index read perf:
(smp == 1, partitions = 500000, concurrency == 1)

Before:
423493.26 +- 811.03 partitions / sec (30 runs, 1 concurrent ops)

After:
707311.86 +- 1865.47 partitions / sec (30 runs, 1 concurrent ops)

For a gain of 67 %.

Signed-off-by: Glauber Costa <glommer@cloudius-systems.com>
---
 sstables/sstables.cc | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/sstables/sstables.cc b/sstables/sstables.cc
index bd7652d665..59a33c0f21 100644
--- a/sstables/sstables.cc
+++ b/sstables/sstables.cc
@@ -21,6 +21,7 @@
 #include "unimplemented.hh"
 #include <boost/algorithm/string.hpp>
 #include <regex>
+#include <core/align.hh>
 
 namespace sstables {
 
@@ -752,14 +753,23 @@ future<index_list> sstable::read_indexes(uint64_t summary_idx) {
     uint64_t position = _summary.entries[summary_idx].position;
     uint64_t quantity = _summary.header.sampling_level;
 
+    uint64_t estimated_size;
+    if (++summary_idx >= _summary.header.size) {
+        estimated_size = index_size() - position;
+    } else {
+        estimated_size = _summary.entries[summary_idx].position - position;
+    }
+
+    estimated_size = std::min(uint64_t(sstable_buffer_size), align_up(estimated_size, uint64_t(8 << 10)));
+
     struct reader {
         uint64_t count = 0;
         std::vector<index_entry> indexes;
         shared_file_random_access_reader stream;
-        reader(file f, uint64_t quantity) : stream(f) { indexes.reserve(quantity); }
+        reader(file f, uint64_t quantity, uint64_t estimated_size) : stream(f, estimated_size) { indexes.reserve(quantity); }
     };
 
-    auto r = make_lw_shared<reader>(_index_file, quantity);
+    auto r = make_lw_shared<reader>(_index_file, quantity, estimated_size);
 
     r->stream.seek(position);