From d9700a2826b60cd35e0a7ce6447cbabb16be3692 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 20 Nov 2016 18:57:41 +0200 Subject: [PATCH] storage_proxy: don't query concurrently needlessly during range queries storage_proxy has an optimization where it tries to query multiple token ranges concurrently to satisfy very large requests (an optimization which is likely meaningless when paging is enabled, as it always should be). However, the rows-per-range code severely underestimates the number of rows per range, resulting in a large number of "read-ahead" internal queries being performed, the results of most of which are discarded. Fix by disabling this code. We should likely remove it completely, but let's start with a band-aid that can be backported. Fixes #1863. Message-Id: <20161120165741.2488-1-avi@scylladb.com> (cherry picked from commit 6bdb8ba31d7159f72e35a04548f01803bc38d4ca) --- service/storage_proxy.cc | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/service/storage_proxy.cc b/service/storage_proxy.cc index 3e7cf55538..5a61f5eb7e 100644 --- a/service/storage_proxy.cc +++ b/service/storage_proxy.cc @@ -2643,12 +2643,19 @@ storage_proxy::query_partition_key_range(lw_shared_ptr cmd, } } + // estimate_result_rows_per_range() is currently broken, and this is not needed + // when paging is available in any case +#if 0 // our estimate of how many result rows there will be per-range float result_rows_per_range = estimate_result_rows_per_range(cmd, ks); // underestimate how many rows we will get per-range in order to increase the likelihood that we'll // fetch enough rows in the first round result_rows_per_range -= result_rows_per_range * CONCURRENT_SUBREQUESTS_MARGIN; int concurrency_factor = result_rows_per_range == 0.0 ? 1 : std::max(1, std::min(int(ranges.size()), int(std::ceil(cmd->row_limit / result_rows_per_range)))); +#else + int result_rows_per_range = 0; + int concurrency_factor = 1; +#endif std::vector>> results; results.reserve(ranges.size()/concurrency_factor + 1);