diff --git a/sstables/abstract_index_reader.hh b/sstables/abstract_index_reader.hh index f779de9ebf..fe8f3cfdcd 100644 --- a/sstables/abstract_index_reader.hh +++ b/sstables/abstract_index_reader.hh @@ -12,6 +12,10 @@ #include "mutation/position_in_partition.hh" #include "sstables/types.hh" +namespace utils { + struct hashed_key; +} + namespace sstables { struct data_file_positions_range { @@ -61,7 +65,12 @@ public: // Returns `true` iff it's possible that `key` is a partition key present in the sstable. // (In other words, if it returns `false`, then the key is definitely not present. // Otherwise it's unknown if it's present). - // + // + // If `hash` is provided, it must be the murmur hash of the partition key in `key`. + // (Some index use the hash to filter out false positives. They could compute the hash + // themselves, but since the caller often has to compute the hash anyway, + // it can be passed down to avoid recomputation). + // // If the return value is `false`, the reader becomes broken and cannot be used again. // (This method is only used for single-partition reads, so no reason to keep the reader // usable after we know that the entire sstable read is already doomed). @@ -71,6 +80,7 @@ public: // Note: this is the most important and performance-sensitive method of the reader. // This is what's used by sstable readers to find positions for single-partition reads. virtual future advance_lower_and_check_if_present(dht::ring_position_view key) = 0; + virtual future advance_lower_and_check_if_present(dht::ring_position_view key, const utils::hashed_key& hash) = 0; // Advances lower bound to the first PK greater than dk. // // Preconditions: dk >= lower bound, dk is present in the sstable diff --git a/sstables/index_reader.hh b/sstables/index_reader.hh index bee970d3c8..f5ae10ff28 100644 --- a/sstables/index_reader.hh +++ b/sstables/index_reader.hh @@ -977,6 +977,9 @@ public: }); }); } + future advance_lower_and_check_if_present(dht::ring_position_view key, const utils::hashed_key&) override { + return advance_lower_and_check_if_present(key); + } // Advances the upper bound to the partition immediately following the partition of the lower bound. // diff --git a/sstables/trie/bti_index_reader.cc b/sstables/trie/bti_index_reader.cc index 1ff032d921..a11a316d8b 100644 --- a/sstables/trie/bti_index_reader.cc +++ b/sstables/trie/bti_index_reader.cc @@ -13,6 +13,7 @@ #include "bti_node_reader.hh" #include "trie_traversal.hh" #include "bti_key_translation.hh" +#include "utils/i_filter.hh" #include #include #include @@ -432,6 +433,7 @@ public: virtual sstables::data_file_positions_range data_file_positions() const override; virtual future> last_block_offset() override; virtual future advance_lower_and_check_if_present(dht::ring_position_view key) override; + virtual future advance_lower_and_check_if_present(dht::ring_position_view key, const utils::hashed_key&) override; virtual future<> advance_to_next_partition() override; virtual sstables::indexable_element element_kind() const override; virtual future<> advance_past_definitely_present_partition(const dht::decorated_key& dk) override; @@ -778,13 +780,21 @@ bti_index_reader::bti_index_reader( } future bti_index_reader::advance_lower_and_check_if_present(dht::ring_position_view key) { + const partition_key* pk = key.key(); + // Note: if `pk` is null, then `key` is not a "at partition key" position, + // and in this case the return value of the function doesn't matter. + // So we can just make up some hash instead of adding a code path + // for no hash. + auto hk = pk + ? utils::make_hashed_key(static_cast(key::from_partition_key(*_s, *pk))) + : utils::hashed_key({0, 0}); + return advance_lower_and_check_if_present(key, hk); +} + +future bti_index_reader::advance_lower_and_check_if_present(dht::ring_position_view key, const utils::hashed_key& hash) { trie_logger.debug("bti_index_reader::advance_lower_and_check_if_present: this={} key={}", fmt::ptr(this), key); auto k = lazy_comparable_bytes_from_ring_position(*_s, key); - // FIXME: we calculate the murmur hash when creating the "presence checker" - // on bloom filters. - // It's a waste of work to hash the key again. - // The hash should be passed to here from above. - std::byte expected_hash = hash_byte_from_key(*_s, *key.key()); + std::byte expected_hash = std::byte(hash.hash()[1]); auto res = co_await _lower.set_to_partition(k, expected_hash); co_return res == set_result::possible_match; co_return true; diff --git a/test/boost/sstable_inexact_index_test.cc b/test/boost/sstable_inexact_index_test.cc index 232b42cc61..dfb0a965d4 100644 --- a/test/boost/sstable_inexact_index_test.cc +++ b/test/boost/sstable_inexact_index_test.cc @@ -117,6 +117,9 @@ struct inexact_partition_index : abstract_index_reader { testlog.trace("(true); } + future advance_lower_and_check_if_present(dht::ring_position_view rpv, const utils::hashed_key& hash) override { + return advance_lower_and_check_if_present(rpv); + } future<> advance_upper_past(position_in_partition_view pos) override { auto cmp = position_in_partition::less_compare(*_s); auto pk_idx = pos_idx_to_pk_idx(_lower);