From 4ed7e529dbd2e61c7d306d14fb04dce2dae12b89 Mon Sep 17 00:00:00 2001 From: Tomasz Grabiec Date: Tue, 11 Apr 2017 18:20:07 +0200 Subject: [PATCH] sstables: Move binary_search() to a header There are instantiations of binary_search() used in sstables.cc, but defined in partition.cc. The instantiations are explicitly declared in partition.cc, but the types changed and they became obsolete. The thing worked because partition.cc also instantiated it with the right type. But after that code will be removed, it no longer would, and we would get a linker error. To avoid such problems, define binary_search() in a header. --- sstables/binary_search.hh | 89 +++++++++++++++++++++++++++++++++++++++ sstables/partition.cc | 62 +-------------------------- sstables/sstables.cc | 1 + sstables/sstables.hh | 8 ---- tests/sstable_test.hh | 3 +- 5 files changed, 94 insertions(+), 69 deletions(-) create mode 100644 sstables/binary_search.hh diff --git a/sstables/binary_search.hh b/sstables/binary_search.hh new file mode 100644 index 0000000000..49d269fb18 --- /dev/null +++ b/sstables/binary_search.hh @@ -0,0 +1,89 @@ +/* + * Copyright (C) 2017 ScyllaDB + * + */ + +/* + * This file is part of Scylla. + * + * Scylla is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Scylla is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Scylla. If not, see . + */ + +#pragma once + +#include "sstables/key.hh" +#include "dht/i_partitioner.hh" + +namespace sstables { + +/** + * @returns: >= 0, if key is found. That is the index where the key is found. + * -1, if key is not found, and is smaller than the first key in the list. + * <= -2, if key is not found, but is greater than one of the keys. By adding 2 and + * negating, one can determine the index before which the key would have to + * be inserted. + * + * Origin uses this slightly modified binary search for the Summary, that will + * indicate in which bucket the element would be in case it is not a match. + * + * For the Index entries, it uses a "normal", java.lang binary search. Because + * we have made the explicit decision to open code the comparator for + * efficiency, using a separate binary search would be possible, but very + * messy. + * + * It's easier to reuse the same code for both binary searches, and just ignore + * the extra information when not needed. + * + * This code should work in all kinds of vectors in whose's elements is possible to aquire + * a key view via get_key(). + */ +template +int binary_search(const T& entries, const key& sk, const dht::token& token) { + int low = 0, mid = entries.size(), high = mid - 1, result = -1; + + auto& partitioner = dht::global_partitioner(); + + while (low <= high) { + // The token comparison should yield the right result most of the time. + // So we avoid expensive copying operations that happens at key + // creation by keeping only a key view, and then manually carrying out + // both parts of the comparison ourselves. + mid = low + ((high - low) >> 1); + key_view mid_key = entries[mid].get_key(); + auto mid_token = partitioner.get_token(mid_key); + + if (token == mid_token) { + result = sk.tri_compare(mid_key); + } else { + result = token < mid_token ? -1 : 1; + } + + if (result > 0) { + low = mid + 1; + } else if (result < 0) { + high = mid - 1; + } else { + return mid; + } + } + + return -mid - (result < 0 ? 1 : 2); +} + +template +int binary_search(const T& entries, const key& sk) { + return binary_search(entries, sk, dht::global_partitioner().get_token(key_view(sk))); +} + +} diff --git a/sstables/partition.cc b/sstables/partition.cc index 72716d855a..01d51ef136 100644 --- a/sstables/partition.cc +++ b/sstables/partition.cc @@ -33,68 +33,10 @@ #include "counters.hh" #include "utils/data_input.hh" #include "clustering_ranges_walker.hh" +#include "binary_search.hh" namespace sstables { -/** - * @returns: >= 0, if key is found. That is the index where the key is found. - * -1, if key is not found, and is smaller than the first key in the list. - * <= -2, if key is not found, but is greater than one of the keys. By adding 2 and - * negating, one can determine the index before which the key would have to - * be inserted. - * - * Origin uses this slightly modified binary search for the Summary, that will - * indicate in which bucket the element would be in case it is not a match. - * - * For the Index entries, it uses a "normal", java.lang binary search. Because - * we have made the explicit decision to open code the comparator for - * efficiency, using a separate binary search would be possible, but very - * messy. - * - * It's easier to reuse the same code for both binary searches, and just ignore - * the extra information when not needed. - * - * This code should work in all kinds of vectors in whose's elements is possible to aquire - * a key view via get_key(). - */ -template -int sstable::binary_search(const T& entries, const key& sk, const dht::token& token) { - int low = 0, mid = entries.size(), high = mid - 1, result = -1; - - auto& partitioner = dht::global_partitioner(); - - while (low <= high) { - // The token comparison should yield the right result most of the time. - // So we avoid expensive copying operations that happens at key - // creation by keeping only a key view, and then manually carrying out - // both parts of the comparison ourselves. - mid = low + ((high - low) >> 1); - key_view mid_key = entries[mid].get_key(); - auto mid_token = partitioner.get_token(mid_key); - - if (token == mid_token) { - result = sk.tri_compare(mid_key); - } else { - result = token < mid_token ? -1 : 1; - } - - if (result > 0) { - low = mid + 1; - } else if (result < 0) { - high = mid - 1; - } else { - return mid; - } - } - - return -mid - (result < 0 ? 1 : 2); -} - -// Force generation, so we make it available outside this compilation unit without moving that -// much code to .hh -template int sstable::binary_search<>(const std::vector& entries, const key& sk); -template int sstable::binary_search<>(const std::vector& entries, const key& sk); - static inline bytes pop_back(std::vector& vec) { auto b = std::move(vec.back()); vec.pop_back(); @@ -1146,7 +1088,7 @@ sstables::sstable::find_disk_ranges( } return read_indexes(summary_idx, pc).then([this, schema, &slice, &key, token, summary_idx, &pc] (auto index_list) { - auto index_idx = this->binary_search(index_list, key, token); + auto index_idx = binary_search(index_list, key, token); if (index_idx < 0) { return make_ready_future(); } diff --git a/sstables/sstables.cc b/sstables/sstables.cc index 85dcc588ab..d83b5ced78 100644 --- a/sstables/sstables.cc +++ b/sstables/sstables.cc @@ -55,6 +55,7 @@ #include "utils/phased_barrier.hh" #include "range_tombstone_list.hh" #include "counters.hh" +#include "binary_search.hh" #include "checked-file-impl.hh" #include "service/storage_service.hh" diff --git a/sstables/sstables.hh b/sstables/sstables.hh index a933a04c2f..29b6962b36 100644 --- a/sstables/sstables.hh +++ b/sstables/sstables.hh @@ -578,14 +578,6 @@ private: // Returns data file position for an entry right after all entries mapped by given summary page. future data_end_position(uint64_t summary_idx, const io_priority_class& pc); - template - int binary_search(const T& entries, const key& sk, const dht::token& token); - - template - int binary_search(const T& entries, const key& sk) { - return binary_search(entries, sk, dht::global_partitioner().get_token(key_view(sk))); - } - // find_disk_ranges finds the ranges of bytes we need to read from the // sstable to read the desired columns out of the given key. This range // may be the entire byte range of the given partition - as found using diff --git a/tests/sstable_test.hh b/tests/sstable_test.hh index 5c26927e47..3c6c003c96 100644 --- a/tests/sstable_test.hh +++ b/tests/sstable_test.hh @@ -23,6 +23,7 @@ #pragma once #include "sstables/sstables.hh" +#include "sstables/binary_search.hh" #include "database.hh" #include "schema.hh" #include "schema_builder.hh" @@ -92,7 +93,7 @@ public: template int binary_search(const T& entries, const key& sk) { - return _sst->binary_search(entries, sk); + return sstables::binary_search(entries, sk); } void change_generation_number(int64_t generation) {