From 703aed327751bcf048f7afd4268497639dab8bca Mon Sep 17 00:00:00 2001 From: Kamil Braun Date: Mon, 20 Sep 2021 14:39:45 +0200 Subject: [PATCH] table: add option to automatically bypass cache for reversed queries Currently the new reversing sstable algorithms do not support fast forwarding and the cache does not yet handle reversed results. This forced us to disable the cache for reversed queries if we want to guarantee bounded memory. We introduce an option that does this automatically (without specifying `bypass cache` in the query) and turn it on by default. If the user decides that they prefer to keep the cache at the cost of fetching entire partitions into memory (which may be viable if their partitions are small) during reversed queries, the option can be turned off. It is live-updateable. --- conf/scylla.yaml | 17 +++++++++++++++++ database.cc | 1 + database.hh | 3 +++ db/config.cc | 2 ++ db/config.hh | 1 + table.cc | 18 +++++++++++++++++- 6 files changed, 41 insertions(+), 1 deletion(-) diff --git a/conf/scylla.yaml b/conf/scylla.yaml index dfe4e3c320..97c3a3b006 100644 --- a/conf/scylla.yaml +++ b/conf/scylla.yaml @@ -510,3 +510,20 @@ commitlog_total_space_in_mb: -1 # # Keep at 12 for new clusters. murmur3_partitioner_ignore_msb_bits: 12 + +# Use a new implementation of reversed reads in sstables when performing +# reversed queries. Bypasses the cache for reversed reads even when +# `bypass cache` is not explicitly specified in the query. +# The new implementation does not require unbounded memory +# (compared to the old implementation which had to fetch entire partitions +# into memory) but does not work with the cache yet. Turn this option off +# if your partitions are small so the old implementation is good enough; +# your queries can then utilize the cache and potentially be faster +# (since they don't need to use sstables as much). You can also turn +# this option off and explicitly perform `bypass cache` queries to +# use the new method. +# This option is temporary and will be removed as soon as the cache +# and sstable reverse read algorithms are updated to handle reversed +# queries correctly. +# The option can be updated without restarting Scylla. +# reversed_reads_auto_bypass_cache: true diff --git a/database.cc b/database.cc index 06def25da6..514d4c8b15 100644 --- a/database.cc +++ b/database.cc @@ -1109,6 +1109,7 @@ keyspace::make_column_family_config(const schema& s, const database& db) const { cfg.streaming_scheduling_group = _config.streaming_scheduling_group; cfg.statement_scheduling_group = _config.statement_scheduling_group; cfg.enable_metrics_reporting = db_config.enable_keyspace_column_family_metrics(); + cfg.reversed_reads_auto_bypass_cache = db_config.reversed_reads_auto_bypass_cache; // avoid self-reporting if (is_system_table(s)) { diff --git a/database.hh b/database.hh index 230476a292..f39c7c5f43 100644 --- a/database.hh +++ b/database.hh @@ -380,6 +380,9 @@ public: db::timeout_semaphore* view_update_concurrency_semaphore; size_t view_update_concurrency_semaphore_limit; db::data_listeners* data_listeners = nullptr; + // Not really table-specific (it's a global configuration parameter), but stored here + // for easy access from `table` member functions: + utils::updateable_value reversed_reads_auto_bypass_cache{true}; }; struct no_commitlog {}; diff --git a/db/config.cc b/db/config.cc index 4c3fa4a3b9..9a267cada3 100644 --- a/db/config.cc +++ b/db/config.cc @@ -811,6 +811,8 @@ db::config::config(std::shared_ptr exts) , cdc_dont_rewrite_streams(this, "cdc_dont_rewrite_streams", value_status::Used, false, "Disable rewriting streams from cdc_streams_descriptions to cdc_streams_descriptions_v2. Should not be necessary, but the procedure is expensive and prone to failures; this config option is left as a backdoor in case some user requires manual intervention.") , strict_allow_filtering(this, "strict_allow_filtering", liveness::LiveUpdate, value_status::Used, strict_allow_filtering_default(), "Match Cassandra in requiring ALLOW FILTERING on slow queries. Can be true, false, or warn. When false, Scylla accepts some slow queries even without ALLOW FILTERING that Cassandra rejects. Warn is same as false, but with warning.") + , reversed_reads_auto_bypass_cache(this, "reversed_reads_auto_bypass_cache", liveness::LiveUpdate, value_status::Used, true, + "Use a new implementation of reversed reads in sstables when performing reversed queries. The new implementation does not require unbounded memory (compared to the old implementation which had to fetch entire partitions into memory) but disables the cache. Turn this option off if your partitions are small so the old implementation is good enough; your queries can then utilize the cache and potentially be faster (since they don't need to use sstables as much). This option is temporary and will be removed as soon as the cache and sstable reverse read algorithms are updated to handle reversed queries correctly.") , alternator_port(this, "alternator_port", value_status::Used, 0, "Alternator API port") , alternator_https_port(this, "alternator_https_port", value_status::Used, 0, "Alternator API HTTPS port") , alternator_address(this, "alternator_address", value_status::Used, "0.0.0.0", "Alternator API listening address") diff --git a/db/config.hh b/db/config.hh index a948a18475..7c64dac90b 100644 --- a/db/config.hh +++ b/db/config.hh @@ -346,6 +346,7 @@ public: named_value max_concurrent_requests_per_shard; named_value cdc_dont_rewrite_streams; named_value strict_allow_filtering; + named_value reversed_reads_auto_bypass_cache; named_value alternator_port; named_value alternator_https_port; diff --git a/table.cc b/table.cc index 7d9e81ad91..bef554d65a 100644 --- a/table.cc +++ b/table.cc @@ -179,7 +179,23 @@ table::make_reader(schema_ptr s, readers.emplace_back(mt->make_flat_reader(s, permit, range, slice, pc, trace_state, fwd, fwd_mr)); } - if (cache_enabled() && !slice.options.contains(query::partition_slice::option::bypass_cache)) { + const auto bypass_cache = slice.options.contains(query::partition_slice::option::bypass_cache); + const auto reversed = slice.options.contains(query::partition_slice::option::reversed); + if (cache_enabled() && !bypass_cache && !(reversed && _config.reversed_reads_auto_bypass_cache())) { + // There are two supported methods of performing reversed queries now. + // In the old 'inefficient' method the cache/sstable performs a forward query and we wrap the reader in + // `make_reversing_reader` which fetches entire partitions in memory and reverses them; this method + // uses unbounded memory. + // There's also a new method where the sstable performs the query directly in reverse, which has bounded + // memory usage. However, for this method the cache must currently be disabled since fast forwarding + // is not yet supported by sstables in reverse mode and the cache algorithms do not handle reverse results + // yet. + // When the user explicitly bypasses the cache in a query, the new method is automatically used. + // Otherwise, we will use the old method with cache enabled, unless the `reversed_reads_auto_bypass_cache` + // option is set - which will automatically bypass the cache for reversed queries. + // FIXME: remove this workaround (and the `reversed_reads_auto_bypass_cache` option) after: + // - support for reversed reads is implemented in the cache, + // - fast forwarding is implemented in reversed sstable readers. readers.emplace_back(_cache.make_reader(s, permit, range, slice, pc, std::move(trace_state), fwd, fwd_mr)); } else { readers.emplace_back(make_sstable_reader(s, permit, _sstables, range, slice, pc, std::move(trace_state), fwd, fwd_mr));