sstables: Move binary_search() to a header

There are instantiations of binary_search() used in sstables.cc, but
defined in partition.cc. The instantiations are explicitly declared in
partition.cc, but the types changed and they became obsolete. The
thing worked because partition.cc also instantiated it with the right
type. But after that code will be removed, it no longer would, and we
would get a linker error. To avoid such problems, define
binary_search() in a header.
This commit is contained in:
Tomasz Grabiec
2017-04-11 18:20:07 +02:00
parent bedd0ab6f9
commit 4ed7e529db
5 changed files with 94 additions and 69 deletions

89
sstables/binary_search.hh Normal file
View File

@@ -0,0 +1,89 @@
/*
* Copyright (C) 2017 ScyllaDB
*
*/
/*
* This file is part of Scylla.
*
* Scylla is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Scylla is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include "sstables/key.hh"
#include "dht/i_partitioner.hh"
namespace sstables {
/**
* @returns: >= 0, if key is found. That is the index where the key is found.
* -1, if key is not found, and is smaller than the first key in the list.
* <= -2, if key is not found, but is greater than one of the keys. By adding 2 and
* negating, one can determine the index before which the key would have to
* be inserted.
*
* Origin uses this slightly modified binary search for the Summary, that will
* indicate in which bucket the element would be in case it is not a match.
*
* For the Index entries, it uses a "normal", java.lang binary search. Because
* we have made the explicit decision to open code the comparator for
* efficiency, using a separate binary search would be possible, but very
* messy.
*
* It's easier to reuse the same code for both binary searches, and just ignore
* the extra information when not needed.
*
* This code should work in all kinds of vectors in whose's elements is possible to aquire
* a key view via get_key().
*/
template <typename T>
int binary_search(const T& entries, const key& sk, const dht::token& token) {
int low = 0, mid = entries.size(), high = mid - 1, result = -1;
auto& partitioner = dht::global_partitioner();
while (low <= high) {
// The token comparison should yield the right result most of the time.
// So we avoid expensive copying operations that happens at key
// creation by keeping only a key view, and then manually carrying out
// both parts of the comparison ourselves.
mid = low + ((high - low) >> 1);
key_view mid_key = entries[mid].get_key();
auto mid_token = partitioner.get_token(mid_key);
if (token == mid_token) {
result = sk.tri_compare(mid_key);
} else {
result = token < mid_token ? -1 : 1;
}
if (result > 0) {
low = mid + 1;
} else if (result < 0) {
high = mid - 1;
} else {
return mid;
}
}
return -mid - (result < 0 ? 1 : 2);
}
template <typename T>
int binary_search(const T& entries, const key& sk) {
return binary_search(entries, sk, dht::global_partitioner().get_token(key_view(sk)));
}
}

View File

@@ -33,68 +33,10 @@
#include "counters.hh"
#include "utils/data_input.hh"
#include "clustering_ranges_walker.hh"
#include "binary_search.hh"
namespace sstables {
/**
* @returns: >= 0, if key is found. That is the index where the key is found.
* -1, if key is not found, and is smaller than the first key in the list.
* <= -2, if key is not found, but is greater than one of the keys. By adding 2 and
* negating, one can determine the index before which the key would have to
* be inserted.
*
* Origin uses this slightly modified binary search for the Summary, that will
* indicate in which bucket the element would be in case it is not a match.
*
* For the Index entries, it uses a "normal", java.lang binary search. Because
* we have made the explicit decision to open code the comparator for
* efficiency, using a separate binary search would be possible, but very
* messy.
*
* It's easier to reuse the same code for both binary searches, and just ignore
* the extra information when not needed.
*
* This code should work in all kinds of vectors in whose's elements is possible to aquire
* a key view via get_key().
*/
template <typename T>
int sstable::binary_search(const T& entries, const key& sk, const dht::token& token) {
int low = 0, mid = entries.size(), high = mid - 1, result = -1;
auto& partitioner = dht::global_partitioner();
while (low <= high) {
// The token comparison should yield the right result most of the time.
// So we avoid expensive copying operations that happens at key
// creation by keeping only a key view, and then manually carrying out
// both parts of the comparison ourselves.
mid = low + ((high - low) >> 1);
key_view mid_key = entries[mid].get_key();
auto mid_token = partitioner.get_token(mid_key);
if (token == mid_token) {
result = sk.tri_compare(mid_key);
} else {
result = token < mid_token ? -1 : 1;
}
if (result > 0) {
low = mid + 1;
} else if (result < 0) {
high = mid - 1;
} else {
return mid;
}
}
return -mid - (result < 0 ? 1 : 2);
}
// Force generation, so we make it available outside this compilation unit without moving that
// much code to .hh
template int sstable::binary_search<>(const std::vector<summary_entry>& entries, const key& sk);
template int sstable::binary_search<>(const std::vector<index_entry>& entries, const key& sk);
static inline bytes pop_back(std::vector<bytes>& vec) {
auto b = std::move(vec.back());
vec.pop_back();
@@ -1146,7 +1088,7 @@ sstables::sstable::find_disk_ranges(
}
return read_indexes(summary_idx, pc).then([this, schema, &slice, &key, token, summary_idx, &pc] (auto index_list) {
auto index_idx = this->binary_search(index_list, key, token);
auto index_idx = binary_search(index_list, key, token);
if (index_idx < 0) {
return make_ready_future<disk_read_range>();
}

View File

@@ -55,6 +55,7 @@
#include "utils/phased_barrier.hh"
#include "range_tombstone_list.hh"
#include "counters.hh"
#include "binary_search.hh"
#include "checked-file-impl.hh"
#include "service/storage_service.hh"

View File

@@ -578,14 +578,6 @@ private:
// Returns data file position for an entry right after all entries mapped by given summary page.
future<uint64_t> data_end_position(uint64_t summary_idx, const io_priority_class& pc);
template <typename T>
int binary_search(const T& entries, const key& sk, const dht::token& token);
template <typename T>
int binary_search(const T& entries, const key& sk) {
return binary_search(entries, sk, dht::global_partitioner().get_token(key_view(sk)));
}
// find_disk_ranges finds the ranges of bytes we need to read from the
// sstable to read the desired columns out of the given key. This range
// may be the entire byte range of the given partition - as found using

View File

@@ -23,6 +23,7 @@
#pragma once
#include "sstables/sstables.hh"
#include "sstables/binary_search.hh"
#include "database.hh"
#include "schema.hh"
#include "schema_builder.hh"
@@ -92,7 +93,7 @@ public:
template <typename T>
int binary_search(const T& entries, const key& sk) {
return _sst->binary_search(entries, sk);
return sstables::binary_search(entries, sk);
}
void change_generation_number(int64_t generation) {