From d9700a2826b60cd35e0a7ce6447cbabb16be3692 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@scylladb.com>
Date: Sun, 20 Nov 2016 18:57:41 +0200
Subject: [PATCH] storage_proxy: don't query concurrently needlessly during
 range queries

storage_proxy has an optimization where it tries to query multiple token
ranges concurrently to satisfy very large requests (an optimization which is
likely meaningless when paging is enabled, as it always should be).  However,
the rows-per-range code severely underestimates the number of rows per range,
resulting in a large number of "read-ahead" internal queries being performed,
the results of most of which are discarded.

Fix by disabling this code. We should likely remove it completely, but let's
start with a band-aid that can be backported.

Fixes #1863.

Message-Id: <20161120165741.2488-1-avi@scylladb.com>
(cherry picked from commit 6bdb8ba31d7159f72e35a04548f01803bc38d4ca)
---
 service/storage_proxy.cc | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/service/storage_proxy.cc b/service/storage_proxy.cc
index 3e7cf55538..5a61f5eb7e 100644
--- a/service/storage_proxy.cc
+++ b/service/storage_proxy.cc
@@ -2643,12 +2643,19 @@ storage_proxy::query_partition_key_range(lw_shared_ptr<query::read_command> cmd,
         }
     }
 
+    // estimate_result_rows_per_range() is currently broken, and this is not needed
+    // when paging is available in any case
+#if 0
     // our estimate of how many result rows there will be per-range
     float result_rows_per_range = estimate_result_rows_per_range(cmd, ks);
     // underestimate how many rows we will get per-range in order to increase the likelihood that we'll
     // fetch enough rows in the first round
     result_rows_per_range -= result_rows_per_range * CONCURRENT_SUBREQUESTS_MARGIN;
     int concurrency_factor = result_rows_per_range == 0.0 ? 1 : std::max(1, std::min(int(ranges.size()), int(std::ceil(cmd->row_limit / result_rows_per_range))));
+#else
+    int result_rows_per_range = 0;
+    int concurrency_factor = 1;
+#endif
 
     std::vector<foreign_ptr<lw_shared_ptr<query::result>>> results;
     results.reserve(ranges.size()/concurrency_factor + 1);