Compare commits
5 Commits
next
...
copilot/ad
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
4c5a0b25e5 | ||
|
|
68f6e57e78 | ||
|
|
433f845d73 | ||
|
|
622aabdbd2 | ||
|
|
0346c6451d |
@@ -1489,6 +1489,8 @@ db::config::config(std::shared_ptr<db::extensions> exts)
|
||||
, nodeops_heartbeat_interval_seconds(this, "nodeops_heartbeat_interval_seconds", liveness::LiveUpdate, value_status::Used, 10, "Period of heartbeat ticks in node operations.")
|
||||
, cache_index_pages(this, "cache_index_pages", liveness::LiveUpdate, value_status::Used, true,
|
||||
"Keep SSTable index pages in the global cache after a SSTable read. Expected to improve performance for workloads with big partitions, but may degrade performance for workloads with small partitions. The amount of memory usable by index cache is limited with ``index_cache_fraction``.")
|
||||
, partition_index_cache_enabled(this, "partition_index_cache_enabled", liveness::LiveUpdate, value_status::Used, true,
|
||||
"Enable partition_index_cache. When disabled, partition index entries are not cached in the global partition_index_cache, reducing memory usage but potentially degrading read performance. The ``cache_index_pages`` option controls caching of the index file pages themselves.")
|
||||
, index_cache_fraction(this, "index_cache_fraction", liveness::LiveUpdate, value_status::Used, 0.2,
|
||||
"The maximum fraction of cache memory permitted for use by index cache. Clamped to the [0.0; 1.0] range. Must be small enough to not deprive the row cache of memory, but should be big enough to fit a large fraction of the index. The default value 0.2 means that at least 80\% of cache memory is reserved for the row cache, while at most 20\% is usable by the index cache.")
|
||||
, consistent_cluster_management(this, "consistent_cluster_management", value_status::Deprecated, true, "Use RAFT for cluster management and DDL.")
|
||||
|
||||
@@ -496,6 +496,7 @@ public:
|
||||
named_value<uint32_t> nodeops_heartbeat_interval_seconds;
|
||||
|
||||
named_value<bool> cache_index_pages;
|
||||
named_value<bool> partition_index_cache_enabled;
|
||||
named_value<double> index_cache_fraction;
|
||||
|
||||
named_value<bool> consistent_cluster_management;
|
||||
|
||||
1
main.cc
1
main.cc
@@ -825,6 +825,7 @@ To start the scylla server proper, simply invoke as: scylla server (or just scyl
|
||||
// See the comment at the definition of sstables::global_cache_index_pages.
|
||||
smp::invoke_on_all([&cfg] {
|
||||
sstables::global_cache_index_pages = cfg->cache_index_pages.operator utils::updateable_value<bool>();
|
||||
sstables::global_partition_index_cache_enabled = cfg->partition_index_cache_enabled.operator utils::updateable_value<bool>();
|
||||
}).get();
|
||||
|
||||
::sighup_handler sighup_handler(opts, *cfg);
|
||||
|
||||
@@ -779,15 +779,18 @@ public:
|
||||
index_reader(shared_sstable sst, reader_permit permit,
|
||||
tracing::trace_state_ptr trace_state = {},
|
||||
use_caching caching = use_caching::yes,
|
||||
bool single_partition_read = false)
|
||||
bool single_partition_read = false,
|
||||
use_caching use_partition_index_cache = use_caching::yes)
|
||||
: _sstable(std::move(sst))
|
||||
, _permit(std::move(permit))
|
||||
, _trace_state(std::move(trace_state))
|
||||
, _local_index_cache(caching ? nullptr
|
||||
// When use_partition_index_cache is true, use the global shared cache (_sstable->_index_cache).
|
||||
// When false, create a local non-shared cache that won't persist across reads.
|
||||
, _local_index_cache(use_partition_index_cache ? nullptr
|
||||
: std::make_unique<partition_index_cache>(_sstable->manager().get_cache_tracker().get_lru(),
|
||||
_sstable->manager().get_cache_tracker().region(),
|
||||
_sstable->manager().get_cache_tracker().get_partition_index_cache_stats()))
|
||||
, _index_cache(caching ? *_sstable->_index_cache : *_local_index_cache)
|
||||
, _index_cache(use_partition_index_cache ? *_sstable->_index_cache : *_local_index_cache)
|
||||
, _alloc_section(abstract_formatter([sst = _sstable] (fmt::format_context& ctx) {
|
||||
fmt::format_to(ctx.out(), "index_reader {}", sst->get_filename());
|
||||
}))
|
||||
|
||||
@@ -1191,9 +1191,11 @@ private:
|
||||
}
|
||||
index_reader& get_index_reader() {
|
||||
if (!_index_reader) {
|
||||
auto caching = use_caching(global_cache_index_pages && !_slice.options.contains(query::partition_slice::option::bypass_cache));
|
||||
auto bypass_cache = _slice.options.contains(query::partition_slice::option::bypass_cache);
|
||||
auto caching = use_caching(global_cache_index_pages && !bypass_cache);
|
||||
auto use_partition_index_cache = use_caching(global_partition_index_cache_enabled && !bypass_cache);
|
||||
_index_reader = std::make_unique<index_reader>(_sst, _consumer.permit(),
|
||||
_consumer.trace_state(), caching, _single_partition_read);
|
||||
_consumer.trace_state(), caching, _single_partition_read, use_partition_index_cache);
|
||||
}
|
||||
return *_index_reader;
|
||||
}
|
||||
|
||||
@@ -105,6 +105,17 @@ namespace sstables {
|
||||
//
|
||||
thread_local utils::updateable_value<bool> global_cache_index_pages(true);
|
||||
|
||||
// The below flag governs whether the partition_index_cache is enabled.
|
||||
//
|
||||
// If set to true, partition index entries are cached in a global partition_index_cache.
|
||||
// If false, each index_reader creates its own local partition_index_cache that is
|
||||
// not shared and not persistent across reads.
|
||||
//
|
||||
// This is independent from global_cache_index_pages, which controls caching of
|
||||
// the index file pages themselves (cached_file).
|
||||
//
|
||||
thread_local utils::updateable_value<bool> global_partition_index_cache_enabled(true);
|
||||
|
||||
logging::logger sstlog("sstable");
|
||||
|
||||
[[noreturn]] void on_parse_error(sstring message, std::optional<component_name> filename) {
|
||||
@@ -2528,8 +2539,10 @@ sstable::make_reader(
|
||||
) {
|
||||
const auto reversed = slice.is_reversed();
|
||||
|
||||
auto index_caching = use_caching(global_cache_index_pages && !slice.options.contains(query::partition_slice::option::bypass_cache));
|
||||
auto index_reader = make_index_reader(permit, trace_state, index_caching, range.is_singular());
|
||||
auto bypass_cache = slice.options.contains(query::partition_slice::option::bypass_cache);
|
||||
auto index_caching = use_caching(global_cache_index_pages && !bypass_cache);
|
||||
auto partition_index_caching = use_caching(global_partition_index_cache_enabled && !bypass_cache);
|
||||
auto index_reader = make_index_reader(permit, trace_state, index_caching, range.is_singular(), partition_index_caching);
|
||||
|
||||
if (_version >= version_types::mc && (!reversed || range.is_singular())) {
|
||||
return mx::make_reader(
|
||||
@@ -3701,7 +3714,8 @@ std::unique_ptr<abstract_index_reader> sstable::make_index_reader(
|
||||
reader_permit permit,
|
||||
tracing::trace_state_ptr trace_state,
|
||||
use_caching caching,
|
||||
bool single_partition_read
|
||||
bool single_partition_read,
|
||||
use_caching use_partition_index_cache
|
||||
) {
|
||||
if (!_index_file) {
|
||||
if (!_partitions_db_footer) [[unlikely]] {
|
||||
@@ -3737,7 +3751,7 @@ std::unique_ptr<abstract_index_reader> sstable::make_index_reader(
|
||||
std::move(trace_state)
|
||||
);
|
||||
}
|
||||
return std::make_unique<index_reader>(shared_from_this(), std::move(permit), std::move(trace_state), caching, single_partition_read);
|
||||
return std::make_unique<index_reader>(shared_from_this(), std::move(permit), std::move(trace_state), caching, single_partition_read, use_partition_index_cache);
|
||||
}
|
||||
|
||||
// Returns error code, 0 is success
|
||||
|
||||
@@ -69,6 +69,7 @@ namespace sstables {
|
||||
struct abstract_index_reader;
|
||||
class sstable_directory;
|
||||
extern thread_local utils::updateable_value<bool> global_cache_index_pages;
|
||||
extern thread_local utils::updateable_value<bool> global_partition_index_cache_enabled;
|
||||
|
||||
namespace mc {
|
||||
class writer;
|
||||
@@ -1083,7 +1084,8 @@ public:
|
||||
reader_permit permit,
|
||||
tracing::trace_state_ptr trace_state = {},
|
||||
use_caching caching = use_caching::yes,
|
||||
bool single_partition_read = false);
|
||||
bool single_partition_read = false,
|
||||
use_caching use_partition_index_cache = use_caching::yes);
|
||||
|
||||
// Allow the test cases from sstable_test.cc to test private methods. We use
|
||||
// a placeholder to avoid cluttering this class too much. The sstable_test class
|
||||
|
||||
@@ -50,6 +50,7 @@ class TestBypassCache(Tester):
|
||||
insert_data=True,
|
||||
smp=1,
|
||||
cache_index_pages=None,
|
||||
partition_index_cache_enabled=None,
|
||||
):
|
||||
self.keyspace_name = keyspace_name
|
||||
self.table_name = table_name
|
||||
@@ -59,6 +60,8 @@ class TestBypassCache(Tester):
|
||||
jvm_args = ["--smp", str(smp)]
|
||||
if cache_index_pages is not None:
|
||||
jvm_args += ["--cache-index-pages", "1" if cache_index_pages else "0"]
|
||||
if partition_index_cache_enabled is not None:
|
||||
jvm_args += ["--partition-index-cache-enabled", "1" if partition_index_cache_enabled else "0"]
|
||||
cluster.populate(nodes).start(jvm_args=jvm_args)
|
||||
node1 = cluster.nodelist()[0]
|
||||
session = self.patient_cql_connection(node1)
|
||||
@@ -309,3 +312,68 @@ class TestBypassCache(Tester):
|
||||
alter_cmd = f"ALTER TABLE {self.keyspace_name}.{self.table_name} WITH CACHING = {{'enabled': 'false'}}"
|
||||
session.execute(alter_cmd)
|
||||
assert not self.verify_used_memory_grow(node=node, session=session), "expected to have writes without cache"
|
||||
|
||||
def test_partition_index_cache_disabled_independently(self):
|
||||
"""
|
||||
Test that partition_index_cache_enabled can be disabled independently of cache_index_pages.
|
||||
When partition_index_cache_enabled=False but cache_index_pages=True:
|
||||
- The cached_file cache (scylla_sstables_index_page_cache_*) should still be active
|
||||
- The partition_index_cache (scylla_sstables_index_page_*) should NOT be populated
|
||||
"""
|
||||
# Test with cache_index_pages=True but partition_index_cache_enabled=False
|
||||
session = self.prepare(insert_data=False, cache_index_pages=True, partition_index_cache_enabled=False)
|
||||
node = self.cluster.nodelist()[0]
|
||||
create_c1c2_table(session, cf=self.table_name)
|
||||
insert_c1c2(session, n=NUM_OF_QUERY_EXECUTIONS, cf=self.table_name, ks=self.keyspace_name)
|
||||
node.flush()
|
||||
query = f"select * from {self.table_name}"
|
||||
|
||||
# When partition_index_cache is disabled:
|
||||
# - For BIG-index sstables (me format), scylla_sstables_index_page_hits should NOT increase
|
||||
# because partition_index_cache is disabled (but cached_file cache should work)
|
||||
# - For BTI-index sstables (ms format), only cached_file cache is used anyway
|
||||
if self.sstable_format != "ms":
|
||||
# For BIG-index format, verify partition_index_cache is not used
|
||||
errors = self.run_query_and_check_metrics(
|
||||
node,
|
||||
session,
|
||||
query,
|
||||
metrics_validators={
|
||||
"scylla_sstables_index_page_hits": self.gen_less_than(self.cache_thresh()),
|
||||
"scylla_sstables_index_page_cache_hits": "increased_by_at_least_1", # cached_file should still work
|
||||
"scylla_sstables_index_page_cache_misses": "increased_by_at_least_1",
|
||||
"scylla_sstables_index_page_cache_populations": "increased_by_at_least_1",
|
||||
},
|
||||
)
|
||||
assert not errors, "partition_index_cache should be disabled while cached_file cache is enabled:\n" + "\n".join(errors)
|
||||
|
||||
def test_partition_index_cache_enabled_independently(self):
|
||||
"""
|
||||
Test that partition_index_cache_enabled can be enabled independently of cache_index_pages.
|
||||
When partition_index_cache_enabled=True but cache_index_pages=False:
|
||||
- The cached_file cache (scylla_sstables_index_page_cache_*) should NOT be populated
|
||||
- The partition_index_cache (scylla_sstables_index_page_*) should still be active (for BIG-index)
|
||||
"""
|
||||
# Test with cache_index_pages=False but partition_index_cache_enabled=True
|
||||
session = self.prepare(insert_data=False, cache_index_pages=False, partition_index_cache_enabled=True)
|
||||
node = self.cluster.nodelist()[0]
|
||||
create_c1c2_table(session, cf=self.table_name)
|
||||
insert_c1c2(session, n=NUM_OF_QUERY_EXECUTIONS, cf=self.table_name, ks=self.keyspace_name)
|
||||
node.flush()
|
||||
query = f"select * from {self.table_name}"
|
||||
|
||||
# When cache_index_pages is disabled but partition_index_cache is enabled:
|
||||
# - For BIG-index sstables (me format), partition_index_cache should still populate
|
||||
# - For BTI-index sstables (ms format), there's no partition_index_cache to use
|
||||
if self.sstable_format != "ms":
|
||||
# For BIG-index format, verify partition_index_cache is used
|
||||
errors = self.run_query_and_check_metrics(
|
||||
node,
|
||||
session,
|
||||
query,
|
||||
metrics_validators={
|
||||
"scylla_sstables_index_page_hits": self.gen_more_than(self.cache_thresh()),
|
||||
"scylla_sstables_index_page_cache_hits": self.gen_less_than(self.cache_thresh()), # cached_file should not be used much
|
||||
},
|
||||
)
|
||||
assert not errors, "partition_index_cache should be enabled while cached_file cache is disabled:\n" + "\n".join(errors)
|
||||
|
||||
Reference in New Issue
Block a user