Compare commits

...

5 Commits

Author SHA1 Message Date
copilot-swe-agent[bot]
4c5a0b25e5 Address code review comments
- Add clarifying comment to index_reader constructor explaining the cache logic
- Remove redundant comment from make_index_reader

Co-authored-by: tgrabiec <283695+tgrabiec@users.noreply.github.com>
2025-12-18 22:59:56 +00:00
copilot-swe-agent[bot]
68f6e57e78 Fix partition_index_cache_enabled to work independently
- Updated make_index_reader signature to accept use_partition_index_cache parameter
- Compute partition_index_caching independently at call sites based on global_partition_index_cache_enabled
- Both caching flags now properly respect bypass_cache option
- This allows partition_index_cache to be enabled/disabled independently from cache_index_pages

Co-authored-by: tgrabiec <283695+tgrabiec@users.noreply.github.com>
2025-12-18 22:58:23 +00:00
copilot-swe-agent[bot]
433f845d73 Add tests for partition_index_cache_enabled option
- Added partition_index_cache_enabled parameter to prepare() method
- Added test_partition_index_cache_disabled_independently to verify the option works with cache_index_pages=True
- Added test_partition_index_cache_enabled_independently to verify the option works with cache_index_pages=False

Co-authored-by: tgrabiec <283695+tgrabiec@users.noreply.github.com>
2025-12-18 22:53:45 +00:00
copilot-swe-agent[bot]
622aabdbd2 Add partition_index_cache_enabled config option
- Added new config option 'partition_index_cache_enabled' (default: true)
- Added global variable 'global_partition_index_cache_enabled'
- Modified index_reader to take separate flags for cached_file and partition_index_cache
- Updated make_index_reader and kl/reader.cc to compute both flags independently
- cache_index_pages now only controls cached_file access
- partition_index_cache_enabled controls partition_index_cache usage

Co-authored-by: tgrabiec <283695+tgrabiec@users.noreply.github.com>
2025-12-18 22:50:12 +00:00
copilot-swe-agent[bot]
0346c6451d Initial plan 2025-12-18 22:44:36 +00:00
8 changed files with 103 additions and 10 deletions

View File

@@ -1489,6 +1489,8 @@ db::config::config(std::shared_ptr<db::extensions> exts)
, nodeops_heartbeat_interval_seconds(this, "nodeops_heartbeat_interval_seconds", liveness::LiveUpdate, value_status::Used, 10, "Period of heartbeat ticks in node operations.")
, cache_index_pages(this, "cache_index_pages", liveness::LiveUpdate, value_status::Used, true,
"Keep SSTable index pages in the global cache after a SSTable read. Expected to improve performance for workloads with big partitions, but may degrade performance for workloads with small partitions. The amount of memory usable by index cache is limited with ``index_cache_fraction``.")
, partition_index_cache_enabled(this, "partition_index_cache_enabled", liveness::LiveUpdate, value_status::Used, true,
"Enable partition_index_cache. When disabled, partition index entries are not cached in the global partition_index_cache, reducing memory usage but potentially degrading read performance. The ``cache_index_pages`` option controls caching of the index file pages themselves.")
, index_cache_fraction(this, "index_cache_fraction", liveness::LiveUpdate, value_status::Used, 0.2,
"The maximum fraction of cache memory permitted for use by index cache. Clamped to the [0.0; 1.0] range. Must be small enough to not deprive the row cache of memory, but should be big enough to fit a large fraction of the index. The default value 0.2 means that at least 80\% of cache memory is reserved for the row cache, while at most 20\% is usable by the index cache.")
, consistent_cluster_management(this, "consistent_cluster_management", value_status::Deprecated, true, "Use RAFT for cluster management and DDL.")

View File

@@ -496,6 +496,7 @@ public:
named_value<uint32_t> nodeops_heartbeat_interval_seconds;
named_value<bool> cache_index_pages;
named_value<bool> partition_index_cache_enabled;
named_value<double> index_cache_fraction;
named_value<bool> consistent_cluster_management;

View File

@@ -825,6 +825,7 @@ To start the scylla server proper, simply invoke as: scylla server (or just scyl
// See the comment at the definition of sstables::global_cache_index_pages.
smp::invoke_on_all([&cfg] {
sstables::global_cache_index_pages = cfg->cache_index_pages.operator utils::updateable_value<bool>();
sstables::global_partition_index_cache_enabled = cfg->partition_index_cache_enabled.operator utils::updateable_value<bool>();
}).get();
::sighup_handler sighup_handler(opts, *cfg);

View File

@@ -779,15 +779,18 @@ public:
index_reader(shared_sstable sst, reader_permit permit,
tracing::trace_state_ptr trace_state = {},
use_caching caching = use_caching::yes,
bool single_partition_read = false)
bool single_partition_read = false,
use_caching use_partition_index_cache = use_caching::yes)
: _sstable(std::move(sst))
, _permit(std::move(permit))
, _trace_state(std::move(trace_state))
, _local_index_cache(caching ? nullptr
// When use_partition_index_cache is true, use the global shared cache (_sstable->_index_cache).
// When false, create a local non-shared cache that won't persist across reads.
, _local_index_cache(use_partition_index_cache ? nullptr
: std::make_unique<partition_index_cache>(_sstable->manager().get_cache_tracker().get_lru(),
_sstable->manager().get_cache_tracker().region(),
_sstable->manager().get_cache_tracker().get_partition_index_cache_stats()))
, _index_cache(caching ? *_sstable->_index_cache : *_local_index_cache)
, _index_cache(use_partition_index_cache ? *_sstable->_index_cache : *_local_index_cache)
, _alloc_section(abstract_formatter([sst = _sstable] (fmt::format_context& ctx) {
fmt::format_to(ctx.out(), "index_reader {}", sst->get_filename());
}))

View File

@@ -1191,9 +1191,11 @@ private:
}
index_reader& get_index_reader() {
if (!_index_reader) {
auto caching = use_caching(global_cache_index_pages && !_slice.options.contains(query::partition_slice::option::bypass_cache));
auto bypass_cache = _slice.options.contains(query::partition_slice::option::bypass_cache);
auto caching = use_caching(global_cache_index_pages && !bypass_cache);
auto use_partition_index_cache = use_caching(global_partition_index_cache_enabled && !bypass_cache);
_index_reader = std::make_unique<index_reader>(_sst, _consumer.permit(),
_consumer.trace_state(), caching, _single_partition_read);
_consumer.trace_state(), caching, _single_partition_read, use_partition_index_cache);
}
return *_index_reader;
}

View File

@@ -105,6 +105,17 @@ namespace sstables {
//
thread_local utils::updateable_value<bool> global_cache_index_pages(true);
// The below flag governs whether the partition_index_cache is enabled.
//
// If set to true, partition index entries are cached in a global partition_index_cache.
// If false, each index_reader creates its own local partition_index_cache that is
// not shared and not persistent across reads.
//
// This is independent from global_cache_index_pages, which controls caching of
// the index file pages themselves (cached_file).
//
thread_local utils::updateable_value<bool> global_partition_index_cache_enabled(true);
logging::logger sstlog("sstable");
[[noreturn]] void on_parse_error(sstring message, std::optional<component_name> filename) {
@@ -2528,8 +2539,10 @@ sstable::make_reader(
) {
const auto reversed = slice.is_reversed();
auto index_caching = use_caching(global_cache_index_pages && !slice.options.contains(query::partition_slice::option::bypass_cache));
auto index_reader = make_index_reader(permit, trace_state, index_caching, range.is_singular());
auto bypass_cache = slice.options.contains(query::partition_slice::option::bypass_cache);
auto index_caching = use_caching(global_cache_index_pages && !bypass_cache);
auto partition_index_caching = use_caching(global_partition_index_cache_enabled && !bypass_cache);
auto index_reader = make_index_reader(permit, trace_state, index_caching, range.is_singular(), partition_index_caching);
if (_version >= version_types::mc && (!reversed || range.is_singular())) {
return mx::make_reader(
@@ -3701,7 +3714,8 @@ std::unique_ptr<abstract_index_reader> sstable::make_index_reader(
reader_permit permit,
tracing::trace_state_ptr trace_state,
use_caching caching,
bool single_partition_read
bool single_partition_read,
use_caching use_partition_index_cache
) {
if (!_index_file) {
if (!_partitions_db_footer) [[unlikely]] {
@@ -3737,7 +3751,7 @@ std::unique_ptr<abstract_index_reader> sstable::make_index_reader(
std::move(trace_state)
);
}
return std::make_unique<index_reader>(shared_from_this(), std::move(permit), std::move(trace_state), caching, single_partition_read);
return std::make_unique<index_reader>(shared_from_this(), std::move(permit), std::move(trace_state), caching, single_partition_read, use_partition_index_cache);
}
// Returns error code, 0 is success

View File

@@ -69,6 +69,7 @@ namespace sstables {
struct abstract_index_reader;
class sstable_directory;
extern thread_local utils::updateable_value<bool> global_cache_index_pages;
extern thread_local utils::updateable_value<bool> global_partition_index_cache_enabled;
namespace mc {
class writer;
@@ -1083,7 +1084,8 @@ public:
reader_permit permit,
tracing::trace_state_ptr trace_state = {},
use_caching caching = use_caching::yes,
bool single_partition_read = false);
bool single_partition_read = false,
use_caching use_partition_index_cache = use_caching::yes);
// Allow the test cases from sstable_test.cc to test private methods. We use
// a placeholder to avoid cluttering this class too much. The sstable_test class

View File

@@ -50,6 +50,7 @@ class TestBypassCache(Tester):
insert_data=True,
smp=1,
cache_index_pages=None,
partition_index_cache_enabled=None,
):
self.keyspace_name = keyspace_name
self.table_name = table_name
@@ -59,6 +60,8 @@ class TestBypassCache(Tester):
jvm_args = ["--smp", str(smp)]
if cache_index_pages is not None:
jvm_args += ["--cache-index-pages", "1" if cache_index_pages else "0"]
if partition_index_cache_enabled is not None:
jvm_args += ["--partition-index-cache-enabled", "1" if partition_index_cache_enabled else "0"]
cluster.populate(nodes).start(jvm_args=jvm_args)
node1 = cluster.nodelist()[0]
session = self.patient_cql_connection(node1)
@@ -309,3 +312,68 @@ class TestBypassCache(Tester):
alter_cmd = f"ALTER TABLE {self.keyspace_name}.{self.table_name} WITH CACHING = {{'enabled': 'false'}}"
session.execute(alter_cmd)
assert not self.verify_used_memory_grow(node=node, session=session), "expected to have writes without cache"
def test_partition_index_cache_disabled_independently(self):
"""
Test that partition_index_cache_enabled can be disabled independently of cache_index_pages.
When partition_index_cache_enabled=False but cache_index_pages=True:
- The cached_file cache (scylla_sstables_index_page_cache_*) should still be active
- The partition_index_cache (scylla_sstables_index_page_*) should NOT be populated
"""
# Test with cache_index_pages=True but partition_index_cache_enabled=False
session = self.prepare(insert_data=False, cache_index_pages=True, partition_index_cache_enabled=False)
node = self.cluster.nodelist()[0]
create_c1c2_table(session, cf=self.table_name)
insert_c1c2(session, n=NUM_OF_QUERY_EXECUTIONS, cf=self.table_name, ks=self.keyspace_name)
node.flush()
query = f"select * from {self.table_name}"
# When partition_index_cache is disabled:
# - For BIG-index sstables (me format), scylla_sstables_index_page_hits should NOT increase
# because partition_index_cache is disabled (but cached_file cache should work)
# - For BTI-index sstables (ms format), only cached_file cache is used anyway
if self.sstable_format != "ms":
# For BIG-index format, verify partition_index_cache is not used
errors = self.run_query_and_check_metrics(
node,
session,
query,
metrics_validators={
"scylla_sstables_index_page_hits": self.gen_less_than(self.cache_thresh()),
"scylla_sstables_index_page_cache_hits": "increased_by_at_least_1", # cached_file should still work
"scylla_sstables_index_page_cache_misses": "increased_by_at_least_1",
"scylla_sstables_index_page_cache_populations": "increased_by_at_least_1",
},
)
assert not errors, "partition_index_cache should be disabled while cached_file cache is enabled:\n" + "\n".join(errors)
def test_partition_index_cache_enabled_independently(self):
"""
Test that partition_index_cache_enabled can be enabled independently of cache_index_pages.
When partition_index_cache_enabled=True but cache_index_pages=False:
- The cached_file cache (scylla_sstables_index_page_cache_*) should NOT be populated
- The partition_index_cache (scylla_sstables_index_page_*) should still be active (for BIG-index)
"""
# Test with cache_index_pages=False but partition_index_cache_enabled=True
session = self.prepare(insert_data=False, cache_index_pages=False, partition_index_cache_enabled=True)
node = self.cluster.nodelist()[0]
create_c1c2_table(session, cf=self.table_name)
insert_c1c2(session, n=NUM_OF_QUERY_EXECUTIONS, cf=self.table_name, ks=self.keyspace_name)
node.flush()
query = f"select * from {self.table_name}"
# When cache_index_pages is disabled but partition_index_cache is enabled:
# - For BIG-index sstables (me format), partition_index_cache should still populate
# - For BTI-index sstables (ms format), there's no partition_index_cache to use
if self.sstable_format != "ms":
# For BIG-index format, verify partition_index_cache is used
errors = self.run_query_and_check_metrics(
node,
session,
query,
metrics_validators={
"scylla_sstables_index_page_hits": self.gen_more_than(self.cache_thresh()),
"scylla_sstables_index_page_cache_hits": self.gen_less_than(self.cache_thresh()), # cached_file should not be used much
},
)
assert not errors, "partition_index_cache should be enabled while cached_file cache is disabled:\n" + "\n".join(errors)