SSTables that contain many keys - a common case with small partitions in long lived nodes - can generate filters that are quite large. I have seen stalls over 80ms when reading a filter that was the result of a 6h write load of very small keys after nodetool compact (filter was in the 100s of MB) Similar care should be taken when creating the filter, as if the estimated number of partitions is big, the resulting large_bitset can be quite big as well. If we treat the i_filter.hh and large_bitset.hh interfaces as truly generic, then maybe we should have an in_thread version along with a common version. But the bloom filter is the only user for both and even if that changes in the future, it is still a good idea to run something with a massive loop in a thread. So for simplicity, I am just asserting that we are on a thread to avoid surprises, and inserting preemption points in the loops. Signed-off-by: Glauber Costa <glauber@scylladb.com>
186 lines
5.9 KiB
C++
186 lines
5.9 KiB
C++
/*
|
|
* Copyright (C) 2015 ScyllaDB
|
|
*/
|
|
|
|
/*
|
|
* This file is part of Scylla.
|
|
*
|
|
* Scylla is free software: you can redistribute it and/or modify
|
|
* it under the terms of the GNU Affero General Public License as published by
|
|
* the Free Software Foundation, either version 3 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* Scylla is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
// A bitset containing a very large number of bits, so it uses fragmented
|
|
// storage in order not to stress the memory allocator.
|
|
|
|
#pragma once
|
|
|
|
#include <memory>
|
|
#include <vector>
|
|
#include <limits>
|
|
#include <iterator>
|
|
#include <algorithm>
|
|
#include <seastar/core/thread.hh>
|
|
#include <seastar/core/preempt.hh>
|
|
|
|
using namespace seastar;
|
|
|
|
class large_bitset {
|
|
static constexpr size_t block_size() { return 128 * 1024; }
|
|
using int_type = unsigned long;
|
|
static constexpr size_t bits_per_int() {
|
|
return std::numeric_limits<int_type>::digits;
|
|
}
|
|
static constexpr size_t ints_per_block() {
|
|
return block_size() / sizeof(int_type);
|
|
}
|
|
static constexpr size_t bits_per_block() {
|
|
return ints_per_block() * bits_per_int();
|
|
}
|
|
size_t _nr_bits = 0;
|
|
std::vector<std::unique_ptr<int_type[]>> _storage;
|
|
public:
|
|
explicit large_bitset(size_t nr_bits);
|
|
large_bitset(large_bitset&&) = default;
|
|
large_bitset(const large_bitset&) = delete;
|
|
large_bitset& operator=(const large_bitset&) = delete;
|
|
size_t size() const {
|
|
return _nr_bits;
|
|
}
|
|
|
|
size_t memory_size() const {
|
|
return block_size() * _storage.size() + sizeof(_nr_bits);
|
|
}
|
|
|
|
bool test(size_t idx) const {
|
|
auto idx1 = idx / bits_per_block();
|
|
idx %= bits_per_block();
|
|
auto idx2 = idx / bits_per_int();
|
|
idx %= bits_per_int();
|
|
auto idx3 = idx;
|
|
return (_storage[idx1][idx2] >> idx3) & 1;
|
|
}
|
|
void set(size_t idx) {
|
|
auto idx1 = idx / bits_per_block();
|
|
idx %= bits_per_block();
|
|
auto idx2 = idx / bits_per_int();
|
|
idx %= bits_per_int();
|
|
auto idx3 = idx;
|
|
_storage[idx1][idx2] |= int_type(1) << idx3;
|
|
}
|
|
void clear(size_t idx) {
|
|
auto idx1 = idx / bits_per_block();
|
|
idx %= bits_per_block();
|
|
auto idx2 = idx / bits_per_int();
|
|
idx %= bits_per_int();
|
|
auto idx3 = idx;
|
|
_storage[idx1][idx2] &= ~(int_type(1) << idx3);
|
|
}
|
|
void clear();
|
|
// load data from host bitmap (in host byte order); returns end bit position
|
|
template <typename IntegerIterator>
|
|
size_t load(IntegerIterator start, IntegerIterator finish, size_t position = 0);
|
|
template <typename IntegerIterator>
|
|
IntegerIterator save(IntegerIterator out, size_t position = 0, size_t n = std::numeric_limits<size_t>::max());
|
|
};
|
|
|
|
template <typename IntegerIterator>
|
|
size_t
|
|
large_bitset::load(IntegerIterator start, IntegerIterator finish, size_t position) {
|
|
assert(thread::running_in_thread());
|
|
|
|
using input_int_type = typename std::iterator_traits<IntegerIterator>::value_type;
|
|
if (position % bits_per_int() == 0 && sizeof(input_int_type) == sizeof(int_type)) {
|
|
auto idx = position;
|
|
auto idx1 = idx / bits_per_block();
|
|
idx %= bits_per_block();
|
|
auto idx2 = idx / bits_per_int();
|
|
while (start != finish) {
|
|
auto now = std::min<size_t>(ints_per_block() - idx2, std::distance(start, finish));
|
|
std::copy_n(start, now, _storage[idx1].get() + idx2);
|
|
start += now;
|
|
++idx1;
|
|
idx2 = 0;
|
|
if (need_preempt()) {
|
|
thread::yield();
|
|
}
|
|
}
|
|
} else {
|
|
while (start != finish) {
|
|
auto bitmask = *start++;
|
|
for (size_t i = 0; i < std::numeric_limits<input_int_type>::digits; ++i) {
|
|
if (bitmask & 1) {
|
|
set(position);
|
|
} else {
|
|
clear(position);
|
|
}
|
|
bitmask >>= 1;
|
|
++position;
|
|
if (need_preempt()) {
|
|
thread::yield();
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return position;
|
|
}
|
|
|
|
template <typename IntegerIterator>
|
|
IntegerIterator
|
|
large_bitset::save(IntegerIterator out, size_t position, size_t n) {
|
|
assert(thread::running_in_thread());
|
|
n = std::min(n, size() - position);
|
|
using output_int_type = typename std::iterator_traits<IntegerIterator>::value_type;
|
|
if (position % bits_per_int() == 0
|
|
&& n % bits_per_int() == 0
|
|
&& sizeof(output_int_type) == sizeof(int_type)) {
|
|
auto idx = position;
|
|
auto idx1 = idx / bits_per_block();
|
|
idx %= bits_per_block();
|
|
auto idx2 = idx / bits_per_int();
|
|
auto n_ints = n / bits_per_int();
|
|
while (n_ints) {
|
|
auto now = std::min(ints_per_block() - idx2, n_ints);
|
|
out = std::copy_n(_storage[idx1].get() + idx2, now, out);
|
|
++idx1;
|
|
idx2 = 0;
|
|
n_ints -= now;
|
|
if (need_preempt()) {
|
|
thread::yield();
|
|
}
|
|
}
|
|
} else {
|
|
output_int_type result = 0;
|
|
unsigned bitpos = 0;
|
|
while (n) {
|
|
result |= output_int_type(test(position)) << bitpos;
|
|
++position;
|
|
++bitpos;
|
|
--n;
|
|
if (bitpos == std::numeric_limits<output_int_type>::digits) {
|
|
*out = result;
|
|
++out;
|
|
result = 0;
|
|
bitpos = 0;
|
|
}
|
|
if (need_preempt()) {
|
|
thread::yield();
|
|
}
|
|
}
|
|
if (bitpos) {
|
|
*out = result;
|
|
++out;
|
|
}
|
|
}
|
|
return out;
|
|
}
|