scylladb

mirror of https://github.com/scylladb/scylladb.git synced 2026-04-27 03:45:11 +00:00

Files

Piotr Jastrzebski 47917bcf22 filter: hash key once per sstable set not sstable

Before this commit the primary key was hashed for bloom filter check
for each sstable.
This commit makes the key be hashed once per sstable set and reused
for bloom filter lookups in all sstables in the set.

I tested this change using perf_simple_query with the following modifications:
1. Create more than one sstable to have sstable set of more than one elements
2. Try to prevent compactions (I wasn't 100% successful)
3. Use a key that's not present to avoid reading from disk

```
diff --git a/test/perf/perf_simple_query.cc b/test/perf/perf_simple_query.cc
index 26dbf1e99..6bd460df2 100644
--- a/test/perf/perf_simple_query.cc
+++ b/test/perf/perf_simple_query.cc
@@ -105,6 +105,8 @@ std::ostream& operator<<(std::ostream& os, const test_config& cfg) {

 static void create_partitions(cql_test_env& env, test_config& cfg) {
     std::cout << "Creating " << cfg.partitions << " partitions..." << std::endl;
+    // Create 10 sstables each with all the data
+    for (unsigned count = 0; count < 10; ++count) {
     for (unsigned sequence = 0; sequence < cfg.partitions; ++sequence) {
         if (cfg.counters) {
             execute_counter_update_for_key(env, make_key(sequence));
@@ -117,6 +119,7 @@ static void create_partitions(cql_test_env& env, test_config& cfg) {
         std::cout << "Flushing partitions..." << std::endl;
         env.db().invoke_on_all(&replica::database::flush_all_memtables).get();
     }
+    }
 }

 static int64_t make_random_seq(test_config& cfg) {
@@ -137,8 +140,18 @@ static std::vector<perf_result> test_read(cql_test_env& env, test_config& cfg) {
         query += " using timeout " + cfg.timeout;
     }
     auto id = env.prepare(query).get0();
-    return time_parallel([&env, &cfg, id] {
-            bytes key = make_random_key(cfg);
+    // Always use the same key that is not present
+    // to make sure we don't read from disk and make
+    // the benchmark CPU bounded.
+    int64_t key_value = 6;
+    bytes key(bytes::initialized_later(), 5*sizeof(key_value));
+    auto i = key.begin();
+    write<uint64_t>(i, key_value);
+    write<uint64_t>(i, key_value);
+    write<uint64_t>(i, key_value);
+    write<uint64_t>(i, key_value);
+    write<uint64_t>(i, key_value);
+    return time_parallel([&env, id, key] {
             return env.execute_prepared(id, {{cql3::raw_value::make_value(std::move(key))}}).discard_result();
         }, cfg.concurrency, cfg.duration_in_seconds, cfg.operations_per_shard, cfg.stop_on_error);
 }
@@ -423,6 +436,10 @@ static std::vector<perf_result> do_cql_test(cql_test_env& env, test_config& cfg)
                 .with_column("C2", bytes_type)
                 .with_column("C3", bytes_type)
                 .with_column("C4", bytes_type)
+		// Try to prevent compaction
+		// to keep the number of sstables high
+		.set_compaction_enabled(false)
+		.set_min_compaction_threshold(2000000000)
                 .build();
     }).get();

@@ -539,6 +556,11 @@ int scylla_simple_query_main(int argc, char** argv) {
             const auto enable_cache = app.configuration()["enable-cache"].as<bool>();
             std::cout << "enable-cache=" << enable_cache << '\n';
             db_cfg->enable_cache(enable_cache);
+	    // Try to prevent compaction
+	    // to keep the number of sstables high
+	    db_cfg->concurrent_compactors(1);
+	    db_cfg->compaction_enforce_min_threshold(true);
+	    db_cfg->compaction_throughput_mb_per_sec(1);

             cql_test_config cfg(db_cfg);
           return do_with_cql_env_thread([&app] (auto&& env) {
```

The following command showed 2-3% improvement on my machine but this
depends on the lenght of the key and the number of sstables in the set.

```
./build/release/scylla perf-simple-query --bypass-cache --flush -c 1
--random-seed=2068087418 --enable-cache false
```

Signed-off-by: Piotr Jastrzebski <haaawk@gmail.com>

Closes scylladb/scylladb#15538

2023-09-26 16:27:11 +03:00