assert() is traditionally disabled in release builds, but not in
scylladb. This hasn't caused problems so far, but the latest abseil
release includes a commit [1] that causes a 1000 insn/op regression when
NDEBUG is not defined.
Clearly, we must move towards a build system where NDEBUG is defined in
release builds. But we can't just define it blindly without vetting
all the assert() calls, as some were written with the expectation that
they are enabled in release mode.
To solve the conundrum, change all assert() calls to a new SCYLLA_ASSERT()
macro in utils/assert.hh. This macro is always defined and is not conditional
on NDEBUG, so we can later (after vetting Seastar) enable NDEBUG in release
mode.
[1] 66ef711d68
Closes scylladb/scylladb#20006
174 lines
6.9 KiB
C++
174 lines
6.9 KiB
C++
/*
|
|
* Copyright 2015-present ScyllaDB
|
|
*
|
|
* Modified by ScyllaDB
|
|
*/
|
|
|
|
/*
|
|
* SPDX-License-Identifier: (AGPL-3.0-or-later and Apache-2.0)
|
|
*/
|
|
|
|
#pragma once
|
|
|
|
#include "utils/assert.hh"
|
|
#include <vector>
|
|
#include <array>
|
|
#include <algorithm>
|
|
#include <cassert>
|
|
|
|
namespace sstables {
|
|
|
|
class downsampling {
|
|
public:
|
|
/**
|
|
* The base (down)sampling level determines the granularity at which we can down/upsample.
|
|
*
|
|
* A higher number allows us to approximate more closely the ideal sampling. (It could also mean we do a lot of
|
|
* expensive almost-no-op resamplings from N to N-1, but the thresholds in IndexSummaryManager prevent that.)
|
|
*
|
|
* BSL must be a power of two in order to have good sampling patterns. This cannot be changed without rebuilding
|
|
* all index summaries at full sampling; for now we treat it as a constant.
|
|
*/
|
|
static constexpr int BASE_SAMPLING_LEVEL = 128;
|
|
|
|
static thread_local std::array<std::vector<int>, BASE_SAMPLING_LEVEL> _sample_pattern_cache;
|
|
|
|
static thread_local std::array<std::vector<int>, BASE_SAMPLING_LEVEL> _original_index_cache;
|
|
|
|
/**
|
|
* Gets a list L of starting indices for downsampling rounds: the first round should start with the offset
|
|
* given by L[0], the second by the offset in L[1], etc.
|
|
*
|
|
* @param sampling_level the base sampling level
|
|
*
|
|
* @return A list of `sampling_level` unique indices between 0 and `sampling_level`
|
|
*/
|
|
static const std::vector<int>& get_sampling_pattern(int sampling_level) {
|
|
SCYLLA_ASSERT(sampling_level > 0 && sampling_level <= BASE_SAMPLING_LEVEL);
|
|
auto& entry = _sample_pattern_cache[sampling_level-1];
|
|
if (!entry.empty()) {
|
|
return entry;
|
|
}
|
|
|
|
if (sampling_level <= 1) {
|
|
SCYLLA_ASSERT(_sample_pattern_cache[0].empty());
|
|
_sample_pattern_cache[0].push_back(0);
|
|
return _sample_pattern_cache[0];
|
|
}
|
|
|
|
std::vector<int> odds;
|
|
std::vector<int> evens;
|
|
odds.resize(sampling_level / 2);
|
|
evens.resize(sampling_level / 2);
|
|
for (int i = 1; i < sampling_level; i += 2) {
|
|
odds[i/2] = i;
|
|
}
|
|
for (int i = 0; i < sampling_level; i += 2) {
|
|
evens[i/2] = i;
|
|
}
|
|
|
|
// especially for latter rounds, it's important that we spread out the start points, so we'll
|
|
// make a recursive call to get an ordering for this list of start points
|
|
const std::vector<int>& ordering = get_sampling_pattern(sampling_level/2);
|
|
std::vector<int> start_indices;
|
|
start_indices.reserve(sampling_level);
|
|
|
|
for (auto index : ordering) {
|
|
start_indices.push_back(odds[index]);
|
|
}
|
|
for (auto index : ordering) {
|
|
start_indices.push_back(evens[index]);
|
|
}
|
|
|
|
_sample_pattern_cache[sampling_level-1] = std::move(start_indices);
|
|
return _sample_pattern_cache[sampling_level-1];
|
|
}
|
|
|
|
/**
|
|
* Returns a list that can be used to translate current index summary indexes to their original index before
|
|
* downsampling. (This repeats every `sampling_level`, so that's how many entries we return.)
|
|
*
|
|
* For example, if [7, 15] is returned, the current index summary entry at index 0 was originally
|
|
* at index 7, and the current index 1 was originally at index 15.
|
|
*
|
|
* @param sampling_level the current sampling level for the index summary
|
|
*
|
|
* @return a list of original indexes for current summary entries
|
|
*/
|
|
static const std::vector<int>& get_original_indexes(int sampling_level) {
|
|
SCYLLA_ASSERT(sampling_level > 0 && sampling_level <= BASE_SAMPLING_LEVEL);
|
|
auto& entry = _original_index_cache[sampling_level-1];
|
|
if (!entry.empty()) {
|
|
return entry;
|
|
}
|
|
|
|
const std::vector<int>& pattern = get_sampling_pattern(BASE_SAMPLING_LEVEL);
|
|
std::vector<int> original_indexes;
|
|
|
|
auto pattern_end = pattern.begin() + (BASE_SAMPLING_LEVEL - sampling_level);
|
|
for (int j = 0; j < BASE_SAMPLING_LEVEL; j++) {
|
|
auto it = std::find(pattern.begin(), pattern_end, j);
|
|
if (it == pattern_end) {
|
|
// add j to original_indexes if not found in pattern.
|
|
original_indexes.push_back(j);
|
|
}
|
|
}
|
|
|
|
_original_index_cache[sampling_level-1] = std::move(original_indexes);
|
|
return _original_index_cache[sampling_level-1];
|
|
}
|
|
|
|
/**
|
|
* Calculates the effective index interval after the entry at `index` in an IndexSummary. In other words, this
|
|
* returns the number of partitions in the primary on-disk index before the next partition that has an entry in
|
|
* the index summary. If sampling_level == BASE_SAMPLING_LEVEL, this will be equal to the index interval.
|
|
* @param index an index into an IndexSummary
|
|
* @param sampling_level the current sampling level for that IndexSummary
|
|
* @param min_index_interval the min index interval (effective index interval at full sampling)
|
|
* @return the number of partitions before the next index summary entry, inclusive on one end
|
|
*/
|
|
static int get_effective_index_interval_after_index(int index, int sampling_level, int min_index_interval) {
|
|
SCYLLA_ASSERT(index >= -1);
|
|
const std::vector<int>& original_indexes = get_original_indexes(sampling_level);
|
|
if (index == -1) {
|
|
return original_indexes[0] * min_index_interval;
|
|
}
|
|
|
|
index %= sampling_level;
|
|
if (size_t(index) == original_indexes.size() - 1) {
|
|
// account for partitions after the "last" entry as well as partitions before the "first" entry
|
|
return ((BASE_SAMPLING_LEVEL - original_indexes[index]) + original_indexes[0]) * min_index_interval;
|
|
} else {
|
|
return (original_indexes[index + 1] - original_indexes[index]) * min_index_interval;
|
|
}
|
|
}
|
|
#if 0
|
|
public static int[] getStartPoints(int currentSamplingLevel, int newSamplingLevel)
|
|
{
|
|
List<Integer> allStartPoints = getSamplingPattern(BASE_SAMPLING_LEVEL);
|
|
|
|
// calculate starting indexes for sampling rounds
|
|
int initialRound = BASE_SAMPLING_LEVEL - currentSamplingLevel;
|
|
int numRounds = Math.abs(currentSamplingLevel - newSamplingLevel);
|
|
int[] startPoints = new int[numRounds];
|
|
for (int i = 0; i < numRounds; ++i)
|
|
{
|
|
int start = allStartPoints.get(initialRound + i);
|
|
|
|
// our "ideal" start points will be affected by the removal of items in earlier rounds, so go through all
|
|
// earlier rounds, and if we see an index that comes before our ideal start point, decrement the start point
|
|
int adjustment = 0;
|
|
for (int j = 0; j < initialRound; ++j)
|
|
{
|
|
if (allStartPoints.get(j) < start)
|
|
adjustment++;
|
|
}
|
|
startPoints[i] = start - adjustment;
|
|
}
|
|
return startPoints;
|
|
}
|
|
#endif
|
|
};
|
|
|
|
}
|