diff --git a/sstables/binary_search.hh b/sstables/binary_search.hh new file mode 100644 index 0000000000..49d269fb18 --- /dev/null +++ b/sstables/binary_search.hh @@ -0,0 +1,89 @@ +/* + * Copyright (C) 2017 ScyllaDB + * + */ + +/* + * This file is part of Scylla. + * + * Scylla is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Scylla is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Scylla. If not, see . + */ + +#pragma once + +#include "sstables/key.hh" +#include "dht/i_partitioner.hh" + +namespace sstables { + +/** + * @returns: >= 0, if key is found. That is the index where the key is found. + * -1, if key is not found, and is smaller than the first key in the list. + * <= -2, if key is not found, but is greater than one of the keys. By adding 2 and + * negating, one can determine the index before which the key would have to + * be inserted. + * + * Origin uses this slightly modified binary search for the Summary, that will + * indicate in which bucket the element would be in case it is not a match. + * + * For the Index entries, it uses a "normal", java.lang binary search. Because + * we have made the explicit decision to open code the comparator for + * efficiency, using a separate binary search would be possible, but very + * messy. + * + * It's easier to reuse the same code for both binary searches, and just ignore + * the extra information when not needed. + * + * This code should work in all kinds of vectors in whose's elements is possible to aquire + * a key view via get_key(). + */ +template +int binary_search(const T& entries, const key& sk, const dht::token& token) { + int low = 0, mid = entries.size(), high = mid - 1, result = -1; + + auto& partitioner = dht::global_partitioner(); + + while (low <= high) { + // The token comparison should yield the right result most of the time. + // So we avoid expensive copying operations that happens at key + // creation by keeping only a key view, and then manually carrying out + // both parts of the comparison ourselves. + mid = low + ((high - low) >> 1); + key_view mid_key = entries[mid].get_key(); + auto mid_token = partitioner.get_token(mid_key); + + if (token == mid_token) { + result = sk.tri_compare(mid_key); + } else { + result = token < mid_token ? -1 : 1; + } + + if (result > 0) { + low = mid + 1; + } else if (result < 0) { + high = mid - 1; + } else { + return mid; + } + } + + return -mid - (result < 0 ? 1 : 2); +} + +template +int binary_search(const T& entries, const key& sk) { + return binary_search(entries, sk, dht::global_partitioner().get_token(key_view(sk))); +} + +} diff --git a/sstables/partition.cc b/sstables/partition.cc index 72716d855a..01d51ef136 100644 --- a/sstables/partition.cc +++ b/sstables/partition.cc @@ -33,68 +33,10 @@ #include "counters.hh" #include "utils/data_input.hh" #include "clustering_ranges_walker.hh" +#include "binary_search.hh" namespace sstables { -/** - * @returns: >= 0, if key is found. That is the index where the key is found. - * -1, if key is not found, and is smaller than the first key in the list. - * <= -2, if key is not found, but is greater than one of the keys. By adding 2 and - * negating, one can determine the index before which the key would have to - * be inserted. - * - * Origin uses this slightly modified binary search for the Summary, that will - * indicate in which bucket the element would be in case it is not a match. - * - * For the Index entries, it uses a "normal", java.lang binary search. Because - * we have made the explicit decision to open code the comparator for - * efficiency, using a separate binary search would be possible, but very - * messy. - * - * It's easier to reuse the same code for both binary searches, and just ignore - * the extra information when not needed. - * - * This code should work in all kinds of vectors in whose's elements is possible to aquire - * a key view via get_key(). - */ -template -int sstable::binary_search(const T& entries, const key& sk, const dht::token& token) { - int low = 0, mid = entries.size(), high = mid - 1, result = -1; - - auto& partitioner = dht::global_partitioner(); - - while (low <= high) { - // The token comparison should yield the right result most of the time. - // So we avoid expensive copying operations that happens at key - // creation by keeping only a key view, and then manually carrying out - // both parts of the comparison ourselves. - mid = low + ((high - low) >> 1); - key_view mid_key = entries[mid].get_key(); - auto mid_token = partitioner.get_token(mid_key); - - if (token == mid_token) { - result = sk.tri_compare(mid_key); - } else { - result = token < mid_token ? -1 : 1; - } - - if (result > 0) { - low = mid + 1; - } else if (result < 0) { - high = mid - 1; - } else { - return mid; - } - } - - return -mid - (result < 0 ? 1 : 2); -} - -// Force generation, so we make it available outside this compilation unit without moving that -// much code to .hh -template int sstable::binary_search<>(const std::vector& entries, const key& sk); -template int sstable::binary_search<>(const std::vector& entries, const key& sk); - static inline bytes pop_back(std::vector& vec) { auto b = std::move(vec.back()); vec.pop_back(); @@ -1146,7 +1088,7 @@ sstables::sstable::find_disk_ranges( } return read_indexes(summary_idx, pc).then([this, schema, &slice, &key, token, summary_idx, &pc] (auto index_list) { - auto index_idx = this->binary_search(index_list, key, token); + auto index_idx = binary_search(index_list, key, token); if (index_idx < 0) { return make_ready_future(); } diff --git a/sstables/sstables.cc b/sstables/sstables.cc index 85dcc588ab..d83b5ced78 100644 --- a/sstables/sstables.cc +++ b/sstables/sstables.cc @@ -55,6 +55,7 @@ #include "utils/phased_barrier.hh" #include "range_tombstone_list.hh" #include "counters.hh" +#include "binary_search.hh" #include "checked-file-impl.hh" #include "service/storage_service.hh" diff --git a/sstables/sstables.hh b/sstables/sstables.hh index a933a04c2f..29b6962b36 100644 --- a/sstables/sstables.hh +++ b/sstables/sstables.hh @@ -578,14 +578,6 @@ private: // Returns data file position for an entry right after all entries mapped by given summary page. future data_end_position(uint64_t summary_idx, const io_priority_class& pc); - template - int binary_search(const T& entries, const key& sk, const dht::token& token); - - template - int binary_search(const T& entries, const key& sk) { - return binary_search(entries, sk, dht::global_partitioner().get_token(key_view(sk))); - } - // find_disk_ranges finds the ranges of bytes we need to read from the // sstable to read the desired columns out of the given key. This range // may be the entire byte range of the given partition - as found using diff --git a/tests/sstable_test.hh b/tests/sstable_test.hh index 5c26927e47..3c6c003c96 100644 --- a/tests/sstable_test.hh +++ b/tests/sstable_test.hh @@ -23,6 +23,7 @@ #pragma once #include "sstables/sstables.hh" +#include "sstables/binary_search.hh" #include "database.hh" #include "schema.hh" #include "schema_builder.hh" @@ -92,7 +93,7 @@ public: template int binary_search(const T& entries, const key& sk) { - return _sst->binary_search(entries, sk); + return sstables::binary_search(entries, sk); } void change_generation_number(int64_t generation) {