Compare commits
69 Commits
next-5.2
...
scylla-2.1
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d4c46afc50 | ||
|
|
f371d17884 | ||
|
|
0a82a885a4 | ||
|
|
17febfdb0e | ||
|
|
830bf99528 | ||
|
|
90000d9861 | ||
|
|
46dae42dcd | ||
|
|
d6395634ad | ||
|
|
d886b3def4 | ||
|
|
bcb06bb043 | ||
|
|
4606300b25 | ||
|
|
282d93de99 | ||
|
|
52d3403cb0 | ||
|
|
97f6073699 | ||
|
|
5454e6e168 | ||
|
|
498fb11c70 | ||
|
|
a6b4881994 | ||
|
|
9848df6667 | ||
|
|
2090a5f8f6 | ||
|
|
7634ed39eb | ||
|
|
fb9b15904a | ||
|
|
4e11f05aa7 | ||
|
|
516a1ae834 | ||
|
|
be5127388d | ||
|
|
6d0679ca72 | ||
|
|
eb67b427b2 | ||
|
|
2931324b34 | ||
|
|
614519c4be | ||
|
|
203b924c76 | ||
|
|
f4f957fa53 | ||
|
|
39e614a444 | ||
|
|
d8521d0fa2 | ||
|
|
f60696b55f | ||
|
|
1b15a0926a | ||
|
|
32efd3902c | ||
|
|
6b2f7f8c39 | ||
|
|
370a6482e3 | ||
|
|
981644167b | ||
|
|
6f669da227 | ||
|
|
bdf1173075 | ||
|
|
106c69ad45 | ||
|
|
740fcc73b8 | ||
|
|
cefbb0b999 | ||
|
|
02f43f5e4c | ||
|
|
8850ef7c59 | ||
|
|
8567723a7b | ||
|
|
b0b7c73acd | ||
|
|
eb82d66849 | ||
|
|
eb12fb3733 | ||
|
|
60d011c9c0 | ||
|
|
7c3390bde8 | ||
|
|
95b55a0e9d | ||
|
|
7785d8f396 | ||
|
|
b805e37d30 | ||
|
|
a790b8cd20 | ||
|
|
a10ea80a63 | ||
|
|
91a5c9d20c | ||
|
|
f846b897bf | ||
|
|
8d7c34bf68 | ||
|
|
7449586a26 | ||
|
|
b601b9f078 | ||
|
|
1ec81cda37 | ||
|
|
e87a2bc9c0 | ||
|
|
b84d13d325 | ||
|
|
b5abf6541d | ||
|
|
8cf869cb37 | ||
|
|
df509761b0 | ||
|
|
b90e11264e | ||
|
|
84b2bff0a6 |
2
.gitmodules
vendored
2
.gitmodules
vendored
@@ -1,6 +1,6 @@
|
||||
[submodule "seastar"]
|
||||
path = seastar
|
||||
url = ../seastar
|
||||
url = ../scylla-seastar
|
||||
ignore = dirty
|
||||
[submodule "swagger-ui"]
|
||||
path = swagger-ui
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
#!/bin/sh
|
||||
|
||||
VERSION=666.development
|
||||
VERSION=2.1.rc1
|
||||
|
||||
if test -f version
|
||||
then
|
||||
|
||||
@@ -31,46 +31,13 @@
|
||||
#include "partition_snapshot_reader.hh"
|
||||
#include "partition_snapshot_row_cursor.hh"
|
||||
#include "read_context.hh"
|
||||
#include "flat_mutation_reader.hh"
|
||||
|
||||
namespace cache {
|
||||
|
||||
extern logging::logger clogger;
|
||||
|
||||
class lsa_manager {
|
||||
row_cache& _cache;
|
||||
public:
|
||||
lsa_manager(row_cache& cache) : _cache(cache) { }
|
||||
template<typename Func>
|
||||
decltype(auto) run_in_read_section(const Func& func) {
|
||||
return _cache._read_section(_cache._tracker.region(), [&func] () {
|
||||
return with_linearized_managed_bytes([&func] () {
|
||||
return func();
|
||||
});
|
||||
});
|
||||
}
|
||||
template<typename Func>
|
||||
decltype(auto) run_in_update_section(const Func& func) {
|
||||
return _cache._update_section(_cache._tracker.region(), [&func] () {
|
||||
return with_linearized_managed_bytes([&func] () {
|
||||
return func();
|
||||
});
|
||||
});
|
||||
}
|
||||
template<typename Func>
|
||||
void run_in_update_section_with_allocator(Func&& func) {
|
||||
return _cache._update_section(_cache._tracker.region(), [this, &func] () {
|
||||
return with_linearized_managed_bytes([this, &func] () {
|
||||
return with_allocator(_cache._tracker.region().allocator(), [this, &func] () mutable {
|
||||
return func();
|
||||
});
|
||||
});
|
||||
});
|
||||
}
|
||||
logalloc::region& region() { return _cache._tracker.region(); }
|
||||
logalloc::allocating_section& read_section() { return _cache._read_section; }
|
||||
};
|
||||
|
||||
class cache_streamed_mutation final : public streamed_mutation::impl {
|
||||
class cache_flat_mutation_reader final : public flat_mutation_reader::impl {
|
||||
enum class state {
|
||||
before_static_row,
|
||||
|
||||
@@ -154,14 +121,19 @@ class cache_streamed_mutation final : public streamed_mutation::impl {
|
||||
void maybe_add_to_cache(const range_tombstone& rt);
|
||||
void maybe_add_to_cache(const static_row& sr);
|
||||
void maybe_set_static_row_continuous();
|
||||
void finish_reader() {
|
||||
push_mutation_fragment(partition_end());
|
||||
_end_of_stream = true;
|
||||
_state = state::end_of_stream;
|
||||
}
|
||||
public:
|
||||
cache_streamed_mutation(schema_ptr s,
|
||||
dht::decorated_key dk,
|
||||
query::clustering_key_filter_ranges&& crr,
|
||||
lw_shared_ptr<read_context> ctx,
|
||||
lw_shared_ptr<partition_snapshot> snp,
|
||||
row_cache& cache)
|
||||
: streamed_mutation::impl(std::move(s), std::move(dk), snp->partition_tombstone())
|
||||
cache_flat_mutation_reader(schema_ptr s,
|
||||
dht::decorated_key dk,
|
||||
query::clustering_key_filter_ranges&& crr,
|
||||
lw_shared_ptr<read_context> ctx,
|
||||
lw_shared_ptr<partition_snapshot> snp,
|
||||
row_cache& cache)
|
||||
: flat_mutation_reader::impl(std::move(s))
|
||||
, _snp(std::move(snp))
|
||||
, _position_cmp(*_schema)
|
||||
, _ck_ranges(std::move(crr))
|
||||
@@ -175,17 +147,32 @@ public:
|
||||
, _next_row(*_schema, *_snp)
|
||||
{
|
||||
clogger.trace("csm {}: table={}.{}", this, _schema->ks_name(), _schema->cf_name());
|
||||
push_mutation_fragment(partition_start(std::move(dk), _snp->partition_tombstone()));
|
||||
}
|
||||
cache_streamed_mutation(const cache_streamed_mutation&) = delete;
|
||||
cache_streamed_mutation(cache_streamed_mutation&&) = delete;
|
||||
cache_flat_mutation_reader(const cache_flat_mutation_reader&) = delete;
|
||||
cache_flat_mutation_reader(cache_flat_mutation_reader&&) = delete;
|
||||
virtual future<> fill_buffer() override;
|
||||
virtual ~cache_streamed_mutation() {
|
||||
virtual ~cache_flat_mutation_reader() {
|
||||
maybe_merge_versions(_snp, _lsa_manager.region(), _lsa_manager.read_section());
|
||||
}
|
||||
virtual void next_partition() override {
|
||||
clear_buffer_to_next_partition();
|
||||
if (is_buffer_empty()) {
|
||||
_end_of_stream = true;
|
||||
}
|
||||
}
|
||||
virtual future<> fast_forward_to(const dht::partition_range&) override {
|
||||
clear_buffer();
|
||||
_end_of_stream = true;
|
||||
return make_ready_future<>();
|
||||
}
|
||||
virtual future<> fast_forward_to(position_range pr) override {
|
||||
throw std::bad_function_call();
|
||||
}
|
||||
};
|
||||
|
||||
inline
|
||||
future<> cache_streamed_mutation::process_static_row() {
|
||||
future<> cache_flat_mutation_reader::process_static_row() {
|
||||
if (_snp->version()->partition().static_row_continuous()) {
|
||||
_read_context->cache().on_row_hit();
|
||||
row sr = _lsa_manager.run_in_read_section([this] {
|
||||
@@ -209,12 +196,11 @@ future<> cache_streamed_mutation::process_static_row() {
|
||||
}
|
||||
|
||||
inline
|
||||
future<> cache_streamed_mutation::fill_buffer() {
|
||||
future<> cache_flat_mutation_reader::fill_buffer() {
|
||||
if (_state == state::before_static_row) {
|
||||
auto after_static_row = [this] {
|
||||
if (_ck_ranges_curr == _ck_ranges_end) {
|
||||
_end_of_stream = true;
|
||||
_state = state::end_of_stream;
|
||||
finish_reader();
|
||||
return make_ready_future<>();
|
||||
}
|
||||
_state = state::reading_from_cache;
|
||||
@@ -236,7 +222,7 @@ future<> cache_streamed_mutation::fill_buffer() {
|
||||
}
|
||||
|
||||
inline
|
||||
future<> cache_streamed_mutation::do_fill_buffer() {
|
||||
future<> cache_flat_mutation_reader::do_fill_buffer() {
|
||||
if (_state == state::move_to_underlying) {
|
||||
_state = state::reading_from_underlying;
|
||||
auto end = _next_row_in_range ? position_in_partition(_next_row.position())
|
||||
@@ -277,8 +263,8 @@ future<> cache_streamed_mutation::do_fill_buffer() {
|
||||
}
|
||||
|
||||
inline
|
||||
future<> cache_streamed_mutation::read_from_underlying() {
|
||||
return consume_mutation_fragments_until(_read_context->get_streamed_mutation(),
|
||||
future<> cache_flat_mutation_reader::read_from_underlying() {
|
||||
return consume_mutation_fragments_until(_read_context->underlying().underlying(),
|
||||
[this] { return _state != state::reading_from_underlying || is_buffer_full(); },
|
||||
[this] (mutation_fragment mf) {
|
||||
_read_context->cache().on_row_miss();
|
||||
@@ -361,7 +347,7 @@ future<> cache_streamed_mutation::read_from_underlying() {
|
||||
}
|
||||
|
||||
inline
|
||||
void cache_streamed_mutation::maybe_update_continuity() {
|
||||
void cache_flat_mutation_reader::maybe_update_continuity() {
|
||||
if (can_populate() && (!_ck_ranges_curr->start() || _last_row.refresh(*_snp))) {
|
||||
if (_next_row.is_in_latest_version()) {
|
||||
clogger.trace("csm {}: mark {} continuous", this, _next_row.get_iterator_in_latest_version()->position());
|
||||
@@ -387,7 +373,7 @@ void cache_streamed_mutation::maybe_update_continuity() {
|
||||
}
|
||||
|
||||
inline
|
||||
void cache_streamed_mutation::maybe_add_to_cache(const mutation_fragment& mf) {
|
||||
void cache_flat_mutation_reader::maybe_add_to_cache(const mutation_fragment& mf) {
|
||||
if (mf.is_range_tombstone()) {
|
||||
maybe_add_to_cache(mf.as_range_tombstone());
|
||||
} else {
|
||||
@@ -398,7 +384,7 @@ void cache_streamed_mutation::maybe_add_to_cache(const mutation_fragment& mf) {
|
||||
}
|
||||
|
||||
inline
|
||||
void cache_streamed_mutation::maybe_add_to_cache(const clustering_row& cr) {
|
||||
void cache_flat_mutation_reader::maybe_add_to_cache(const clustering_row& cr) {
|
||||
if (!can_populate()) {
|
||||
_last_row = nullptr;
|
||||
_read_context->cache().on_mispopulate();
|
||||
@@ -435,18 +421,18 @@ void cache_streamed_mutation::maybe_add_to_cache(const clustering_row& cr) {
|
||||
}
|
||||
|
||||
inline
|
||||
bool cache_streamed_mutation::after_current_range(position_in_partition_view p) {
|
||||
bool cache_flat_mutation_reader::after_current_range(position_in_partition_view p) {
|
||||
return _position_cmp(p, _upper_bound) >= 0;
|
||||
}
|
||||
|
||||
inline
|
||||
void cache_streamed_mutation::start_reading_from_underlying() {
|
||||
void cache_flat_mutation_reader::start_reading_from_underlying() {
|
||||
clogger.trace("csm {}: start_reading_from_underlying(), range=[{}, {})", this, _lower_bound, _next_row_in_range ? _next_row.position() : _upper_bound);
|
||||
_state = state::move_to_underlying;
|
||||
}
|
||||
|
||||
inline
|
||||
void cache_streamed_mutation::copy_from_cache_to_buffer() {
|
||||
void cache_flat_mutation_reader::copy_from_cache_to_buffer() {
|
||||
clogger.trace("csm {}: copy_from_cache, next={}, next_row_in_range={}", this, _next_row.position(), _next_row_in_range);
|
||||
position_in_partition_view next_lower_bound = _next_row.dummy() ? _next_row.position() : position_in_partition_view::after_key(_next_row.key());
|
||||
for (auto&& rts : _snp->range_tombstones(*_schema, _lower_bound, _next_row_in_range ? next_lower_bound : _upper_bound)) {
|
||||
@@ -465,15 +451,14 @@ void cache_streamed_mutation::copy_from_cache_to_buffer() {
|
||||
}
|
||||
|
||||
inline
|
||||
void cache_streamed_mutation::move_to_end() {
|
||||
void cache_flat_mutation_reader::move_to_end() {
|
||||
drain_tombstones();
|
||||
_end_of_stream = true;
|
||||
_state = state::end_of_stream;
|
||||
finish_reader();
|
||||
clogger.trace("csm {}: eos", this);
|
||||
}
|
||||
|
||||
inline
|
||||
void cache_streamed_mutation::move_to_next_range() {
|
||||
void cache_flat_mutation_reader::move_to_next_range() {
|
||||
auto next_it = std::next(_ck_ranges_curr);
|
||||
if (next_it == _ck_ranges_end) {
|
||||
move_to_end();
|
||||
@@ -484,7 +469,7 @@ void cache_streamed_mutation::move_to_next_range() {
|
||||
}
|
||||
|
||||
inline
|
||||
void cache_streamed_mutation::move_to_range(query::clustering_row_ranges::const_iterator next_it) {
|
||||
void cache_flat_mutation_reader::move_to_range(query::clustering_row_ranges::const_iterator next_it) {
|
||||
auto lb = position_in_partition::for_range_start(*next_it);
|
||||
auto ub = position_in_partition_view::for_range_end(*next_it);
|
||||
_last_row = nullptr;
|
||||
@@ -520,7 +505,7 @@ void cache_streamed_mutation::move_to_range(query::clustering_row_ranges::const_
|
||||
|
||||
// _next_row must be inside the range.
|
||||
inline
|
||||
void cache_streamed_mutation::move_to_next_entry() {
|
||||
void cache_flat_mutation_reader::move_to_next_entry() {
|
||||
clogger.trace("csm {}: move_to_next_entry(), curr={}", this, _next_row.position());
|
||||
if (no_clustering_row_between(*_schema, _next_row.position(), _upper_bound)) {
|
||||
move_to_next_range();
|
||||
@@ -538,7 +523,7 @@ void cache_streamed_mutation::move_to_next_entry() {
|
||||
}
|
||||
|
||||
inline
|
||||
void cache_streamed_mutation::drain_tombstones(position_in_partition_view pos) {
|
||||
void cache_flat_mutation_reader::drain_tombstones(position_in_partition_view pos) {
|
||||
while (true) {
|
||||
reserve_one();
|
||||
auto mfo = _tombstones.get_next(pos);
|
||||
@@ -550,7 +535,7 @@ void cache_streamed_mutation::drain_tombstones(position_in_partition_view pos) {
|
||||
}
|
||||
|
||||
inline
|
||||
void cache_streamed_mutation::drain_tombstones() {
|
||||
void cache_flat_mutation_reader::drain_tombstones() {
|
||||
while (true) {
|
||||
reserve_one();
|
||||
auto mfo = _tombstones.get_next();
|
||||
@@ -562,7 +547,7 @@ void cache_streamed_mutation::drain_tombstones() {
|
||||
}
|
||||
|
||||
inline
|
||||
void cache_streamed_mutation::add_to_buffer(mutation_fragment&& mf) {
|
||||
void cache_flat_mutation_reader::add_to_buffer(mutation_fragment&& mf) {
|
||||
clogger.trace("csm {}: add_to_buffer({})", this, mf);
|
||||
if (mf.is_clustering_row()) {
|
||||
add_clustering_row_to_buffer(std::move(mf));
|
||||
@@ -573,7 +558,7 @@ void cache_streamed_mutation::add_to_buffer(mutation_fragment&& mf) {
|
||||
}
|
||||
|
||||
inline
|
||||
void cache_streamed_mutation::add_to_buffer(const partition_snapshot_row_cursor& row) {
|
||||
void cache_flat_mutation_reader::add_to_buffer(const partition_snapshot_row_cursor& row) {
|
||||
if (!row.dummy()) {
|
||||
_read_context->cache().on_row_hit();
|
||||
add_clustering_row_to_buffer(row.row());
|
||||
@@ -584,7 +569,7 @@ void cache_streamed_mutation::add_to_buffer(const partition_snapshot_row_cursor&
|
||||
// (1) no fragment with position >= _lower_bound was pushed yet
|
||||
// (2) If _lower_bound > mf.position(), mf was emitted
|
||||
inline
|
||||
void cache_streamed_mutation::add_clustering_row_to_buffer(mutation_fragment&& mf) {
|
||||
void cache_flat_mutation_reader::add_clustering_row_to_buffer(mutation_fragment&& mf) {
|
||||
clogger.trace("csm {}: add_clustering_row_to_buffer({})", this, mf);
|
||||
auto& row = mf.as_clustering_row();
|
||||
auto key = row.key();
|
||||
@@ -600,7 +585,7 @@ void cache_streamed_mutation::add_clustering_row_to_buffer(mutation_fragment&& m
|
||||
}
|
||||
|
||||
inline
|
||||
void cache_streamed_mutation::add_to_buffer(range_tombstone&& rt) {
|
||||
void cache_flat_mutation_reader::add_to_buffer(range_tombstone&& rt) {
|
||||
clogger.trace("csm {}: add_to_buffer({})", this, rt);
|
||||
// This guarantees that rt starts after any emitted clustering_row
|
||||
if (!rt.trim_front(*_schema, _lower_bound)) {
|
||||
@@ -612,7 +597,7 @@ void cache_streamed_mutation::add_to_buffer(range_tombstone&& rt) {
|
||||
}
|
||||
|
||||
inline
|
||||
void cache_streamed_mutation::maybe_add_to_cache(const range_tombstone& rt) {
|
||||
void cache_flat_mutation_reader::maybe_add_to_cache(const range_tombstone& rt) {
|
||||
if (can_populate()) {
|
||||
clogger.trace("csm {}: maybe_add_to_cache({})", this, rt);
|
||||
_lsa_manager.run_in_update_section_with_allocator([&] {
|
||||
@@ -624,7 +609,7 @@ void cache_streamed_mutation::maybe_add_to_cache(const range_tombstone& rt) {
|
||||
}
|
||||
|
||||
inline
|
||||
void cache_streamed_mutation::maybe_add_to_cache(const static_row& sr) {
|
||||
void cache_flat_mutation_reader::maybe_add_to_cache(const static_row& sr) {
|
||||
if (can_populate()) {
|
||||
clogger.trace("csm {}: populate({})", this, sr);
|
||||
_read_context->cache().on_row_insert();
|
||||
@@ -637,7 +622,7 @@ void cache_streamed_mutation::maybe_add_to_cache(const static_row& sr) {
|
||||
}
|
||||
|
||||
inline
|
||||
void cache_streamed_mutation::maybe_set_static_row_continuous() {
|
||||
void cache_flat_mutation_reader::maybe_set_static_row_continuous() {
|
||||
if (can_populate()) {
|
||||
clogger.trace("csm {}: set static row continuous", this);
|
||||
_snp->version()->partition().set_static_row_continuous(true);
|
||||
@@ -647,19 +632,19 @@ void cache_streamed_mutation::maybe_set_static_row_continuous() {
|
||||
}
|
||||
|
||||
inline
|
||||
bool cache_streamed_mutation::can_populate() const {
|
||||
bool cache_flat_mutation_reader::can_populate() const {
|
||||
return _snp->at_latest_version() && _read_context->cache().phase_of(_read_context->key()) == _read_context->phase();
|
||||
}
|
||||
|
||||
} // namespace cache
|
||||
|
||||
inline streamed_mutation make_cache_streamed_mutation(schema_ptr s,
|
||||
dht::decorated_key dk,
|
||||
query::clustering_key_filter_ranges crr,
|
||||
row_cache& cache,
|
||||
lw_shared_ptr<cache::read_context> ctx,
|
||||
lw_shared_ptr<partition_snapshot> snp)
|
||||
inline flat_mutation_reader make_cache_flat_mutation_reader(schema_ptr s,
|
||||
dht::decorated_key dk,
|
||||
query::clustering_key_filter_ranges crr,
|
||||
row_cache& cache,
|
||||
lw_shared_ptr<cache::read_context> ctx,
|
||||
lw_shared_ptr<partition_snapshot> snp)
|
||||
{
|
||||
return make_streamed_mutation<cache::cache_streamed_mutation>(
|
||||
return make_flat_mutation_reader<cache::cache_flat_mutation_reader>(
|
||||
std::move(s), std::move(dk), std::move(crr), std::move(ctx), std::move(snp), cache);
|
||||
}
|
||||
@@ -42,17 +42,6 @@ std::ostream& operator<<(std::ostream& out, const bound_kind k);
|
||||
bound_kind invert_kind(bound_kind k);
|
||||
int32_t weight(bound_kind k);
|
||||
|
||||
static inline bound_kind flip_bound_kind(bound_kind bk)
|
||||
{
|
||||
switch (bk) {
|
||||
case bound_kind::excl_end: return bound_kind::excl_start;
|
||||
case bound_kind::incl_end: return bound_kind::incl_start;
|
||||
case bound_kind::excl_start: return bound_kind::excl_end;
|
||||
case bound_kind::incl_start: return bound_kind::incl_end;
|
||||
}
|
||||
abort();
|
||||
}
|
||||
|
||||
class bound_view {
|
||||
public:
|
||||
const static thread_local clustering_key empty_prefix;
|
||||
|
||||
@@ -241,7 +241,7 @@ public:
|
||||
using component_view = std::pair<bytes_view, eoc>;
|
||||
private:
|
||||
template<typename Value, typename = std::enable_if_t<!std::is_same<const data_value, std::decay_t<Value>>::value>>
|
||||
static size_t size(Value& val) {
|
||||
static size_t size(const Value& val) {
|
||||
return val.size();
|
||||
}
|
||||
static size_t size(const data_value& val) {
|
||||
@@ -445,17 +445,16 @@ public:
|
||||
return _is_compound;
|
||||
}
|
||||
|
||||
// The following factory functions assume this composite is a compound value.
|
||||
template <typename ClusteringElement>
|
||||
static composite from_clustering_element(const schema& s, const ClusteringElement& ce) {
|
||||
return serialize_value(ce.components(s));
|
||||
return serialize_value(ce.components(s), s.is_compound());
|
||||
}
|
||||
|
||||
static composite from_exploded(const std::vector<bytes_view>& v, eoc marker = eoc::none) {
|
||||
static composite from_exploded(const std::vector<bytes_view>& v, bool is_compound, eoc marker = eoc::none) {
|
||||
if (v.size() == 0) {
|
||||
return composite(bytes(size_t(1), bytes::value_type(marker)));
|
||||
return composite(bytes(size_t(1), bytes::value_type(marker)), is_compound);
|
||||
}
|
||||
return serialize_value(v, true, marker);
|
||||
return serialize_value(v, is_compound, marker);
|
||||
}
|
||||
|
||||
static composite static_prefix(const schema& s) {
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
# one logical cluster from joining another.
|
||||
# It is recommended to change the default value when creating a new cluster.
|
||||
# You can NOT modify this value for an existing cluster
|
||||
#cluster_name: 'ScyllaDB Cluster'
|
||||
#cluster_name: 'Test Cluster'
|
||||
|
||||
# This defines the number of tokens randomly assigned to this node on the ring
|
||||
# The more tokens, relative to other nodes, the larger the proportion of data
|
||||
@@ -87,6 +87,13 @@ listen_address: localhost
|
||||
# Leaving this blank will set it to the same value as listen_address
|
||||
# broadcast_address: 1.2.3.4
|
||||
|
||||
|
||||
# When using multiple physical network interfaces, set this to true to listen on broadcast_address
|
||||
# in addition to the listen_address, allowing nodes to communicate in both interfaces.
|
||||
# Ignore this property if the network configuration automatically routes between the public and private networks such as EC2.
|
||||
#
|
||||
# listen_on_broadcast_address: false
|
||||
|
||||
# port for the CQL native transport to listen for clients on
|
||||
# For security reasons, you should not expose this port to the internet. Firewall it if needed.
|
||||
native_transport_port: 9042
|
||||
|
||||
@@ -189,7 +189,7 @@ scylla_tests = [
|
||||
'tests/perf/perf_simple_query',
|
||||
'tests/perf/perf_fast_forward',
|
||||
'tests/perf/perf_cache_eviction',
|
||||
'tests/cache_streamed_mutation_test',
|
||||
'tests/cache_flat_mutation_reader_test',
|
||||
'tests/row_cache_stress_test',
|
||||
'tests/memory_footprint',
|
||||
'tests/perf/perf_sstable',
|
||||
@@ -703,6 +703,7 @@ warnings = [
|
||||
'-Wno-misleading-indentation',
|
||||
'-Wno-overflow',
|
||||
'-Wno-noexcept-type',
|
||||
'-Wno-nonnull-compare'
|
||||
]
|
||||
|
||||
warnings = [w
|
||||
|
||||
@@ -101,6 +101,10 @@ public:
|
||||
return boost::algorithm::all_of(_restrictions->restrictions(), [b] (auto&& r) { return r.second->has_bound(b); });
|
||||
}
|
||||
|
||||
virtual bool is_inclusive(statements::bound b) const override {
|
||||
return boost::algorithm::all_of(_restrictions->restrictions(), [b] (auto&& r) { return r.second->is_inclusive(b); });
|
||||
}
|
||||
|
||||
virtual bool uses_function(const sstring& ks_name, const sstring& function_name) const override {
|
||||
return _restrictions->uses_function(ks_name, function_name);
|
||||
}
|
||||
|
||||
@@ -78,7 +78,7 @@ cql3::statements::create_user_statement::execute(distributed<service::storage_pr
|
||||
throw exceptions::invalid_request_exception(sprint("User %s already exists", _username));
|
||||
}
|
||||
if (exists && _if_not_exists) {
|
||||
make_ready_future<::shared_ptr<cql_transport::messages::result_message>>();
|
||||
return make_ready_future<::shared_ptr<cql_transport::messages::result_message>>();
|
||||
}
|
||||
return auth_service.underlying_authenticator().create(_username, _opts->options()).then([this, &auth_service] {
|
||||
return auth_service.insert_user(_username, _superuser).then([] {
|
||||
|
||||
@@ -106,6 +106,9 @@ delete_statement::prepare_internal(database& db, schema_ptr schema, shared_ptr<v
|
||||
|| !stmt->restrictions()->get_clustering_columns_restrictions()->has_bound(bound::END)) {
|
||||
throw exceptions::invalid_request_exception("A range deletion operation needs to specify both bounds");
|
||||
}
|
||||
if (!schema->is_compound() && stmt->restrictions()->get_clustering_columns_restrictions()->is_slice()) {
|
||||
throw exceptions::invalid_request_exception("Range deletions on \"compact storage\" schemas are not supported");
|
||||
}
|
||||
return stmt;
|
||||
}
|
||||
|
||||
|
||||
255
database.cc
255
database.cc
@@ -236,8 +236,8 @@ logalloc::occupancy_stats column_family::occupancy() const {
|
||||
}
|
||||
|
||||
static
|
||||
bool belongs_to_current_shard(const streamed_mutation& m) {
|
||||
return dht::shard_of(m.decorated_key().token()) == engine().cpu_id();
|
||||
bool belongs_to_current_shard(const dht::decorated_key& dk) {
|
||||
return dht::shard_of(dk.token()) == engine().cpu_id();
|
||||
}
|
||||
|
||||
// Stores ranges for all components of the same clustering key, index 0 referring to component
|
||||
@@ -387,14 +387,11 @@ class incremental_reader_selector : public reader_selector {
|
||||
mutation_reader::forwarding _fwd_mr;
|
||||
sstables::sstable_set::incremental_selector _selector;
|
||||
std::unordered_set<sstables::shared_sstable> _read_sstables;
|
||||
sstable_reader_factory_type _fn;
|
||||
|
||||
mutation_reader create_reader(sstables::shared_sstable sst) {
|
||||
flat_mutation_reader create_reader(sstables::shared_sstable sst) {
|
||||
tracing::trace(_trace_state, "Reading partition range {} from sstable {}", *_pr, seastar::value_of([&sst] { return sst->get_filename(); }));
|
||||
mutation_reader reader = sst->read_range_rows(_s, *_pr, _slice, _pc, _resource_tracker, _fwd, _fwd_mr);
|
||||
if (sst->is_shared()) {
|
||||
reader = make_filtering_reader(std::move(reader), belongs_to_current_shard);
|
||||
}
|
||||
return std::move(reader);
|
||||
return _fn(sst, *_pr);
|
||||
}
|
||||
|
||||
public:
|
||||
@@ -406,7 +403,8 @@ public:
|
||||
reader_resource_tracker resource_tracker,
|
||||
tracing::trace_state_ptr trace_state,
|
||||
streamed_mutation::forwarding fwd,
|
||||
mutation_reader::forwarding fwd_mr)
|
||||
mutation_reader::forwarding fwd_mr,
|
||||
sstable_reader_factory_type fn)
|
||||
: _s(s)
|
||||
, _pr(&pr)
|
||||
, _sstables(std::move(sstables))
|
||||
@@ -416,7 +414,8 @@ public:
|
||||
, _trace_state(std::move(trace_state))
|
||||
, _fwd(fwd)
|
||||
, _fwd_mr(fwd_mr)
|
||||
, _selector(_sstables->make_incremental_selector()) {
|
||||
, _selector(_sstables->make_incremental_selector())
|
||||
, _fn(std::move(fn)) {
|
||||
_selector_position = _pr->start() ? _pr->start()->value().token() : dht::minimum_token();
|
||||
|
||||
dblog.trace("incremental_reader_selector {}: created for range: {} with {} sstables",
|
||||
@@ -431,7 +430,7 @@ public:
|
||||
incremental_reader_selector(incremental_reader_selector&&) = delete;
|
||||
incremental_reader_selector& operator=(incremental_reader_selector&&) = delete;
|
||||
|
||||
virtual std::vector<mutation_reader> create_new_readers(const dht::token* const t) override {
|
||||
virtual std::vector<flat_mutation_reader> create_new_readers(const dht::token* const t) override {
|
||||
dblog.trace("incremental_reader_selector {}: {}({})", this, __FUNCTION__, seastar::lazy_deref(t));
|
||||
|
||||
const auto& position = (t ? *t : _selector_position);
|
||||
@@ -456,12 +455,14 @@ public:
|
||||
|
||||
dblog.trace("incremental_reader_selector {}: {} new sstables to consider, advancing selector to {}", this, selection.sstables.size(), _selector_position);
|
||||
|
||||
return boost::copy_range<std::vector<mutation_reader>>(selection.sstables
|
||||
return boost::copy_range<std::vector<flat_mutation_reader>>(selection.sstables
|
||||
| boost::adaptors::filtered([this] (auto& sst) { return _read_sstables.emplace(sst).second; })
|
||||
| boost::adaptors::transformed([this] (auto& sst) { return this->create_reader(sst); }));
|
||||
| boost::adaptors::transformed([this] (auto& sst) {
|
||||
return this->create_reader(sst);
|
||||
}));
|
||||
}
|
||||
|
||||
virtual std::vector<mutation_reader> fast_forward_to(const dht::partition_range& pr) override {
|
||||
virtual std::vector<flat_mutation_reader> fast_forward_to(const dht::partition_range& pr) override {
|
||||
_pr = ≺
|
||||
|
||||
if (_pr->start()->value().token() >= _selector_position) {
|
||||
@@ -472,71 +473,35 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
class single_key_sstable_reader final : public mutation_reader::impl {
|
||||
column_family* _cf;
|
||||
schema_ptr _schema;
|
||||
const dht::partition_range& _pr;
|
||||
sstables::key _key;
|
||||
std::vector<streamed_mutation> _mutations;
|
||||
bool _done = false;
|
||||
lw_shared_ptr<sstables::sstable_set> _sstables;
|
||||
utils::estimated_histogram& _sstable_histogram;
|
||||
// Use a pointer instead of copying, so we don't need to regenerate the reader if
|
||||
// the priority changes.
|
||||
const io_priority_class& _pc;
|
||||
const query::partition_slice& _slice;
|
||||
reader_resource_tracker _resource_tracker;
|
||||
tracing::trace_state_ptr _trace_state;
|
||||
streamed_mutation::forwarding _fwd;
|
||||
public:
|
||||
single_key_sstable_reader(column_family* cf,
|
||||
schema_ptr schema,
|
||||
lw_shared_ptr<sstables::sstable_set> sstables,
|
||||
utils::estimated_histogram& sstable_histogram,
|
||||
const dht::partition_range& pr, // must be singular
|
||||
const query::partition_slice& slice,
|
||||
const io_priority_class& pc,
|
||||
reader_resource_tracker resource_tracker,
|
||||
tracing::trace_state_ptr trace_state,
|
||||
streamed_mutation::forwarding fwd)
|
||||
: _cf(cf)
|
||||
, _schema(std::move(schema))
|
||||
, _pr(pr)
|
||||
, _key(sstables::key::from_partition_key(*_schema, *pr.start()->value().key()))
|
||||
, _sstables(std::move(sstables))
|
||||
, _sstable_histogram(sstable_histogram)
|
||||
, _pc(pc)
|
||||
, _slice(slice)
|
||||
, _resource_tracker(std::move(resource_tracker))
|
||||
, _trace_state(std::move(trace_state))
|
||||
, _fwd(fwd)
|
||||
{ }
|
||||
|
||||
virtual future<streamed_mutation_opt> operator()() override {
|
||||
if (_done) {
|
||||
return make_ready_future<streamed_mutation_opt>();
|
||||
}
|
||||
auto candidates = filter_sstable_for_reader(_sstables->select(_pr), *_cf, _schema, _key, _slice);
|
||||
return parallel_for_each(std::move(candidates),
|
||||
[this](const sstables::shared_sstable& sstable) {
|
||||
tracing::trace(_trace_state, "Reading key {} from sstable {}", _pr, seastar::value_of([&sstable] { return sstable->get_filename(); }));
|
||||
return sstable->read_row(_schema, _pr.start()->value(), _slice, _pc, _resource_tracker, _fwd).then([this](auto smo) {
|
||||
if (smo) {
|
||||
_mutations.emplace_back(std::move(*smo));
|
||||
}
|
||||
});
|
||||
}).then([this] () -> streamed_mutation_opt {
|
||||
_done = true;
|
||||
if (_mutations.empty()) {
|
||||
return { };
|
||||
}
|
||||
_sstable_histogram.add(_mutations.size());
|
||||
return merge_mutations(std::move(_mutations));
|
||||
});
|
||||
static flat_mutation_reader
|
||||
create_single_key_sstable_reader(column_family* cf,
|
||||
schema_ptr schema,
|
||||
lw_shared_ptr<sstables::sstable_set> sstables,
|
||||
utils::estimated_histogram& sstable_histogram,
|
||||
const dht::partition_range& pr, // must be singular
|
||||
const query::partition_slice& slice,
|
||||
const io_priority_class& pc,
|
||||
reader_resource_tracker resource_tracker,
|
||||
tracing::trace_state_ptr trace_state,
|
||||
streamed_mutation::forwarding fwd,
|
||||
mutation_reader::forwarding fwd_mr)
|
||||
{
|
||||
auto key = sstables::key::from_partition_key(*schema, *pr.start()->value().key());
|
||||
auto readers = boost::copy_range<std::vector<flat_mutation_reader>>(
|
||||
filter_sstable_for_reader(sstables->select(pr), *cf, schema, key, slice)
|
||||
| boost::adaptors::transformed([&] (const sstables::shared_sstable& sstable) {
|
||||
tracing::trace(trace_state, "Reading key {} from sstable {}", pr, seastar::value_of([&sstable] { return sstable->get_filename(); }));
|
||||
return sstable->read_row_flat(schema, pr.start()->value(), slice, pc, resource_tracker, fwd);
|
||||
})
|
||||
);
|
||||
if (readers.empty()) {
|
||||
return make_empty_flat_reader(schema);
|
||||
}
|
||||
};
|
||||
sstable_histogram.add(readers.size());
|
||||
return make_combined_reader(schema, std::move(readers), fwd, fwd_mr);
|
||||
}
|
||||
|
||||
mutation_reader
|
||||
flat_mutation_reader
|
||||
column_family::make_sstable_reader(schema_ptr s,
|
||||
lw_shared_ptr<sstables::sstable_set> sstables,
|
||||
const dht::partition_range& pr,
|
||||
@@ -556,7 +521,7 @@ column_family::make_sstable_reader(schema_ptr s,
|
||||
if (pr.is_singular() && pr.start()->value().has_key()) {
|
||||
const dht::ring_position& pos = pr.start()->value();
|
||||
if (dht::shard_of(pos.token()) != engine().cpu_id()) {
|
||||
return make_empty_reader(); // range doesn't belong to this shard
|
||||
return make_empty_flat_reader(s); // range doesn't belong to this shard
|
||||
}
|
||||
|
||||
if (config.resources_sem) {
|
||||
@@ -568,13 +533,13 @@ column_family::make_sstable_reader(schema_ptr s,
|
||||
tracing::trace_state_ptr trace_state,
|
||||
streamed_mutation::forwarding fwd,
|
||||
mutation_reader::forwarding fwd_mr) {
|
||||
return make_mutation_reader<single_key_sstable_reader>(const_cast<column_family*>(this), std::move(s), std::move(sstables),
|
||||
_stats.estimated_sstable_per_read, pr, slice, pc, reader_resource_tracker(config.resources_sem), std::move(trace_state), fwd);
|
||||
return create_single_key_sstable_reader(const_cast<column_family*>(this), std::move(s), std::move(sstables),
|
||||
_stats.estimated_sstable_per_read, pr, slice, pc, reader_resource_tracker(config.resources_sem), std::move(trace_state), fwd, fwd_mr);
|
||||
});
|
||||
return make_restricted_reader(config, std::move(ms), std::move(s), pr, slice, pc, std::move(trace_state), fwd, fwd_mr);
|
||||
return make_restricted_flat_reader(config, std::move(ms), std::move(s), pr, slice, pc, std::move(trace_state), fwd, fwd_mr);
|
||||
} else {
|
||||
return make_mutation_reader<single_key_sstable_reader>(const_cast<column_family*>(this), std::move(s), std::move(sstables),
|
||||
_stats.estimated_sstable_per_read, pr, slice, pc, no_resource_tracking(), std::move(trace_state), fwd);
|
||||
return create_single_key_sstable_reader(const_cast<column_family*>(this), std::move(s), std::move(sstables),
|
||||
_stats.estimated_sstable_per_read, pr, slice, pc, no_resource_tracking(), std::move(trace_state), fwd, fwd_mr);
|
||||
}
|
||||
} else {
|
||||
if (config.resources_sem) {
|
||||
@@ -586,17 +551,13 @@ column_family::make_sstable_reader(schema_ptr s,
|
||||
tracing::trace_state_ptr trace_state,
|
||||
streamed_mutation::forwarding fwd,
|
||||
mutation_reader::forwarding fwd_mr) {
|
||||
return make_mutation_reader<combined_mutation_reader>(
|
||||
std::make_unique<incremental_reader_selector>(std::move(s), std::move(sstables), pr, slice, pc,
|
||||
reader_resource_tracker(config.resources_sem), std::move(trace_state), fwd, fwd_mr),
|
||||
fwd_mr);
|
||||
return make_local_shard_sstable_reader(std::move(s), std::move(sstables), pr, slice, pc,
|
||||
reader_resource_tracker(config.resources_sem), std::move(trace_state), fwd, fwd_mr);
|
||||
});
|
||||
return make_restricted_reader(config, std::move(ms), std::move(s), pr, slice, pc, std::move(trace_state), fwd, fwd_mr);
|
||||
return make_restricted_flat_reader(config, std::move(ms), std::move(s), pr, slice, pc, std::move(trace_state), fwd, fwd_mr);
|
||||
} else {
|
||||
return make_mutation_reader<combined_mutation_reader>(
|
||||
std::make_unique<incremental_reader_selector>(std::move(s), std::move(sstables), pr, slice, pc,
|
||||
no_resource_tracking(), std::move(trace_state), fwd, fwd_mr),
|
||||
fwd_mr);
|
||||
return make_local_shard_sstable_reader(std::move(s), std::move(sstables), pr, slice, pc,
|
||||
no_resource_tracking(), std::move(trace_state), fwd, fwd_mr);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -605,10 +566,8 @@ column_family::make_sstable_reader(schema_ptr s,
|
||||
future<column_family::const_mutation_partition_ptr>
|
||||
column_family::find_partition(schema_ptr s, const dht::decorated_key& key) const {
|
||||
return do_with(dht::partition_range::make_singular(key), [s = std::move(s), this] (auto& range) {
|
||||
return do_with(this->make_reader(s, range), [] (mutation_reader& reader) {
|
||||
return reader().then([] (auto sm) {
|
||||
return mutation_from_streamed_mutation(std::move(sm));
|
||||
}).then([] (mutation_opt&& mo) -> std::unique_ptr<const mutation_partition> {
|
||||
return do_with(this->make_reader(s, range), [s] (flat_mutation_reader& reader) {
|
||||
return read_mutation_from_flat_mutation_reader(reader).then([] (mutation_opt&& mo) -> std::unique_ptr<const mutation_partition> {
|
||||
if (!mo) {
|
||||
return {};
|
||||
}
|
||||
@@ -639,7 +598,7 @@ column_family::find_row(schema_ptr s, const dht::decorated_key& partition_key, c
|
||||
});
|
||||
}
|
||||
|
||||
mutation_reader
|
||||
flat_mutation_reader
|
||||
column_family::make_reader(schema_ptr s,
|
||||
const dht::partition_range& range,
|
||||
const query::partition_slice& slice,
|
||||
@@ -648,10 +607,10 @@ column_family::make_reader(schema_ptr s,
|
||||
streamed_mutation::forwarding fwd,
|
||||
mutation_reader::forwarding fwd_mr) const {
|
||||
if (_virtual_reader) {
|
||||
return (*_virtual_reader)(s, range, slice, pc, trace_state, fwd, fwd_mr);
|
||||
return (*_virtual_reader).make_flat_mutation_reader(s, range, slice, pc, trace_state, fwd, fwd_mr);
|
||||
}
|
||||
|
||||
std::vector<mutation_reader> readers;
|
||||
std::vector<flat_mutation_reader> readers;
|
||||
readers.reserve(_memtables->size() + 1);
|
||||
|
||||
// We're assuming that cache and memtables are both read atomically
|
||||
@@ -675,16 +634,16 @@ column_family::make_reader(schema_ptr s,
|
||||
// https://github.com/scylladb/scylla/issues/185
|
||||
|
||||
for (auto&& mt : *_memtables) {
|
||||
readers.emplace_back(mt->make_reader(s, range, slice, pc, trace_state, fwd, fwd_mr));
|
||||
readers.emplace_back(mt->make_flat_reader(s, range, slice, pc, trace_state, fwd, fwd_mr));
|
||||
}
|
||||
|
||||
if (_config.enable_cache) {
|
||||
readers.emplace_back(_cache.make_reader(s, range, slice, pc, std::move(trace_state), fwd, fwd_mr));
|
||||
readers.emplace_back(_cache.make_flat_reader(s, range, slice, pc, std::move(trace_state), fwd, fwd_mr));
|
||||
} else {
|
||||
readers.emplace_back(make_sstable_reader(s, _sstables, range, slice, pc, std::move(trace_state), fwd, fwd_mr));
|
||||
}
|
||||
|
||||
return make_combined_reader(std::move(readers), fwd_mr);
|
||||
return make_combined_reader(s, std::move(readers), fwd, fwd_mr);
|
||||
}
|
||||
|
||||
flat_mutation_reader
|
||||
@@ -695,16 +654,16 @@ column_family::make_streaming_reader(schema_ptr s,
|
||||
|
||||
auto source = mutation_source([this] (schema_ptr s, const dht::partition_range& range, const query::partition_slice& slice,
|
||||
const io_priority_class& pc, tracing::trace_state_ptr trace_state, streamed_mutation::forwarding fwd, mutation_reader::forwarding fwd_mr) {
|
||||
std::vector<mutation_reader> readers;
|
||||
std::vector<flat_mutation_reader> readers;
|
||||
readers.reserve(_memtables->size() + 1);
|
||||
for (auto&& mt : *_memtables) {
|
||||
readers.emplace_back(mt->make_reader(s, range, slice, pc, trace_state, fwd, fwd_mr));
|
||||
readers.emplace_back(mt->make_flat_reader(s, range, slice, pc, trace_state, fwd, fwd_mr));
|
||||
}
|
||||
readers.emplace_back(make_sstable_reader(s, _sstables, range, slice, pc, std::move(trace_state), fwd, fwd_mr));
|
||||
return make_combined_reader(std::move(readers), fwd_mr);
|
||||
return make_combined_reader(s, std::move(readers), fwd, fwd_mr);
|
||||
});
|
||||
|
||||
return make_flat_multi_range_reader(s, std::move(source), ranges, slice, pc, nullptr, streamed_mutation::forwarding::no, mutation_reader::forwarding::no);
|
||||
return make_flat_multi_range_reader(s, std::move(source), ranges, slice, pc, nullptr, mutation_reader::forwarding::no);
|
||||
}
|
||||
|
||||
future<std::vector<locked_cell>> column_family::lock_counter_cells(const mutation& m, timeout_clock::time_point timeout) {
|
||||
@@ -720,7 +679,7 @@ column_family::for_all_partitions(schema_ptr s, Func&& func) const {
|
||||
"bad Func signature");
|
||||
|
||||
struct iteration_state {
|
||||
mutation_reader reader;
|
||||
flat_mutation_reader reader;
|
||||
Func func;
|
||||
bool ok = true;
|
||||
bool empty = false;
|
||||
@@ -734,9 +693,7 @@ column_family::for_all_partitions(schema_ptr s, Func&& func) const {
|
||||
|
||||
return do_with(iteration_state(std::move(s), *this, std::move(func)), [] (iteration_state& is) {
|
||||
return do_until([&is] { return is.done(); }, [&is] {
|
||||
return is.reader().then([] (auto sm) {
|
||||
return mutation_from_streamed_mutation(std::move(sm));
|
||||
}).then([&is](mutation_opt&& mo) {
|
||||
return read_mutation_from_flat_mutation_reader(is.reader).then([&is](mutation_opt&& mo) {
|
||||
if (!mo) {
|
||||
is.empty = true;
|
||||
} else {
|
||||
@@ -915,7 +872,8 @@ column_family::seal_active_streaming_memtable_immediate(flush_permit&& permit) {
|
||||
adder();
|
||||
return old->clear_gently();
|
||||
}
|
||||
}).handle_exception([old, permit = std::move(permit)] (auto ep) {
|
||||
}).handle_exception([old, permit = std::move(permit), newtab] (auto ep) {
|
||||
newtab->mark_for_deletion();
|
||||
dblog.error("failed to write streamed sstable: {}", ep);
|
||||
return make_exception_future<>(ep);
|
||||
});
|
||||
@@ -954,7 +912,8 @@ future<> column_family::seal_active_streaming_memtable_big(streaming_memtable_bi
|
||||
auto monitor = seastar::make_shared<permit_monitor>(permit.release_sstable_write_permit());
|
||||
return write_memtable_to_sstable(*old, newtab, std::move(monitor), incremental_backups_enabled(), priority, true, _config.background_writer_scheduling_group).then([this, newtab, old, &smb, permit = std::move(permit)] {
|
||||
smb.sstables.emplace_back(newtab);
|
||||
}).handle_exception([] (auto ep) {
|
||||
}).handle_exception([newtab] (auto ep) {
|
||||
newtab->mark_for_deletion();
|
||||
dblog.error("failed to write streamed sstable: {}", ep);
|
||||
return make_exception_future<>(ep);
|
||||
});
|
||||
@@ -3047,17 +3006,6 @@ void database::register_connection_drop_notifier(netw::messaging_service& ms) {
|
||||
});
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream& out, const atomic_cell_or_collection& c) {
|
||||
return out << to_hex(c._data);
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, const mutation& m) {
|
||||
const ::schema& s = *m.schema();
|
||||
fprint(os, "{%s.%s key %s data ", s.ks_name(), s.cf_name(), m.decorated_key());
|
||||
os << m.partition() << "}";
|
||||
return os;
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream& out, const column_family& cf) {
|
||||
return fprint(out, "{column_family: %s/%s}", cf._schema->ks_name(), cf._schema->cf_name());
|
||||
}
|
||||
@@ -4268,7 +4216,7 @@ void column_family::drop_hit_rate(gms::inet_address addr) {
|
||||
_cluster_cache_hit_rates.erase(addr);
|
||||
}
|
||||
|
||||
mutation_reader make_range_sstable_reader(schema_ptr s,
|
||||
flat_mutation_reader make_local_shard_sstable_reader(schema_ptr s,
|
||||
lw_shared_ptr<sstables::sstable_set> sstables,
|
||||
const dht::partition_range& pr,
|
||||
const query::partition_slice& slice,
|
||||
@@ -4278,15 +4226,53 @@ mutation_reader make_range_sstable_reader(schema_ptr s,
|
||||
streamed_mutation::forwarding fwd,
|
||||
mutation_reader::forwarding fwd_mr)
|
||||
{
|
||||
return make_mutation_reader<combined_mutation_reader>(std::make_unique<incremental_reader_selector>(std::move(s),
|
||||
std::move(sstables),
|
||||
pr,
|
||||
slice,
|
||||
pc,
|
||||
std::move(resource_tracker),
|
||||
std::move(trace_state),
|
||||
fwd,
|
||||
fwd_mr), fwd_mr);
|
||||
auto reader_factory_fn = [s, &slice, &pc, resource_tracker, fwd, fwd_mr] (sstables::shared_sstable& sst, const dht::partition_range& pr) {
|
||||
flat_mutation_reader reader = sst->read_range_rows_flat(s, pr, slice, pc, resource_tracker, fwd, fwd_mr);
|
||||
if (sst->is_shared()) {
|
||||
using sig = bool (&)(const dht::decorated_key&);
|
||||
reader = make_filtering_reader(std::move(reader), sig(belongs_to_current_shard));
|
||||
}
|
||||
return reader;
|
||||
};
|
||||
return make_flat_mutation_reader<combined_mutation_reader>(s, std::make_unique<incremental_reader_selector>(s,
|
||||
std::move(sstables),
|
||||
pr,
|
||||
slice,
|
||||
pc,
|
||||
std::move(resource_tracker),
|
||||
std::move(trace_state),
|
||||
fwd,
|
||||
fwd_mr,
|
||||
std::move(reader_factory_fn)),
|
||||
fwd,
|
||||
fwd_mr);
|
||||
}
|
||||
|
||||
flat_mutation_reader make_range_sstable_reader(schema_ptr s,
|
||||
lw_shared_ptr<sstables::sstable_set> sstables,
|
||||
const dht::partition_range& pr,
|
||||
const query::partition_slice& slice,
|
||||
const io_priority_class& pc,
|
||||
reader_resource_tracker resource_tracker,
|
||||
tracing::trace_state_ptr trace_state,
|
||||
streamed_mutation::forwarding fwd,
|
||||
mutation_reader::forwarding fwd_mr)
|
||||
{
|
||||
auto reader_factory_fn = [s, &slice, &pc, resource_tracker, fwd, fwd_mr] (sstables::shared_sstable& sst, const dht::partition_range& pr) {
|
||||
return sst->read_range_rows_flat(s, pr, slice, pc, resource_tracker, fwd, fwd_mr);
|
||||
};
|
||||
return make_flat_mutation_reader<combined_mutation_reader>(s, std::make_unique<incremental_reader_selector>(s,
|
||||
std::move(sstables),
|
||||
pr,
|
||||
slice,
|
||||
pc,
|
||||
std::move(resource_tracker),
|
||||
std::move(trace_state),
|
||||
fwd,
|
||||
fwd_mr,
|
||||
std::move(reader_factory_fn)),
|
||||
fwd,
|
||||
fwd_mr);
|
||||
}
|
||||
|
||||
future<>
|
||||
@@ -4300,7 +4286,8 @@ write_memtable_to_sstable(memtable& mt, sstables::shared_sstable sst,
|
||||
cfg.leave_unsealed = leave_unsealed;
|
||||
cfg.thread_scheduling_group = tsg;
|
||||
cfg.monitor = std::move(monitor);
|
||||
return sst->write_components(mt.make_flush_reader(mt.schema(), pc), mt.partition_count(), mt.schema(), cfg, pc);
|
||||
return sst->write_components(mt.make_flush_reader(mt.schema(), pc),
|
||||
mt.partition_count(), mt.schema(), cfg, pc);
|
||||
}
|
||||
|
||||
future<>
|
||||
|
||||
21
database.hh
21
database.hh
@@ -503,7 +503,7 @@ private:
|
||||
// Caller needs to ensure that column_family remains live (FIXME: relax this).
|
||||
// The 'range' parameter must be live as long as the reader is used.
|
||||
// Mutations returned by the reader will all have given schema.
|
||||
mutation_reader make_sstable_reader(schema_ptr schema,
|
||||
flat_mutation_reader make_sstable_reader(schema_ptr schema,
|
||||
lw_shared_ptr<sstables::sstable_set> sstables,
|
||||
const dht::partition_range& range,
|
||||
const query::partition_slice& slice,
|
||||
@@ -570,7 +570,7 @@ public:
|
||||
// Mutations returned by the reader will all have given schema.
|
||||
// If I/O needs to be issued to read anything in the specified range, the operations
|
||||
// will be scheduled under the priority class given by pc.
|
||||
mutation_reader make_reader(schema_ptr schema,
|
||||
flat_mutation_reader make_reader(schema_ptr schema,
|
||||
const dht::partition_range& range,
|
||||
const query::partition_slice& slice,
|
||||
const io_priority_class& pc = default_priority_class(),
|
||||
@@ -578,7 +578,7 @@ public:
|
||||
streamed_mutation::forwarding fwd = streamed_mutation::forwarding::no,
|
||||
mutation_reader::forwarding fwd_mr = mutation_reader::forwarding::yes) const;
|
||||
|
||||
mutation_reader make_reader(schema_ptr schema, const dht::partition_range& range = query::full_partition_range) const {
|
||||
flat_mutation_reader make_reader(schema_ptr schema, const dht::partition_range& range = query::full_partition_range) const {
|
||||
auto& full_slice = schema->full_slice();
|
||||
return make_reader(std::move(schema), range, full_slice);
|
||||
}
|
||||
@@ -841,7 +841,20 @@ public:
|
||||
friend class distributed_loader;
|
||||
};
|
||||
|
||||
mutation_reader make_range_sstable_reader(schema_ptr s,
|
||||
using sstable_reader_factory_type = std::function<flat_mutation_reader(sstables::shared_sstable&, const dht::partition_range& pr)>;
|
||||
|
||||
// Filters out mutation that doesn't belong to current shard.
|
||||
flat_mutation_reader make_local_shard_sstable_reader(schema_ptr s,
|
||||
lw_shared_ptr<sstables::sstable_set> sstables,
|
||||
const dht::partition_range& pr,
|
||||
const query::partition_slice& slice,
|
||||
const io_priority_class& pc,
|
||||
reader_resource_tracker resource_tracker,
|
||||
tracing::trace_state_ptr trace_state,
|
||||
streamed_mutation::forwarding fwd,
|
||||
mutation_reader::forwarding fwd_mr);
|
||||
|
||||
flat_mutation_reader make_range_sstable_reader(schema_ptr s,
|
||||
lw_shared_ptr<sstables::sstable_set> sstables,
|
||||
const dht::partition_range& pr,
|
||||
const query::partition_slice& slice,
|
||||
|
||||
@@ -2209,13 +2209,14 @@ static future<view_ptr> create_view_from_table_row(distributed<service::storage_
|
||||
*/
|
||||
future<std::vector<view_ptr>> create_views_from_schema_partition(distributed<service::storage_proxy>& proxy, const schema_result::mapped_type& result)
|
||||
{
|
||||
auto views = make_lw_shared<std::vector<view_ptr>>();
|
||||
return parallel_for_each(result->rows().begin(), result->rows().end(), [&proxy, views = std::move(views)] (auto&& row) {
|
||||
return create_view_from_table_row(proxy, row).then([views] (auto&& v) {
|
||||
views->push_back(std::move(v));
|
||||
return do_with(std::vector<view_ptr>(), [&] (auto& views) {
|
||||
return parallel_for_each(result->rows().begin(), result->rows().end(), [&proxy, &views] (auto&& row) {
|
||||
return create_view_from_table_row(proxy, row).then([&views] (auto&& v) {
|
||||
views.push_back(std::move(v));
|
||||
});
|
||||
}).then([&views] {
|
||||
return std::move(views);
|
||||
});
|
||||
}).then([views] {
|
||||
return std::move(*views);
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@@ -42,43 +42,87 @@ namespace db {
|
||||
|
||||
namespace size_estimates {
|
||||
|
||||
class size_estimates_mutation_reader final : public mutation_reader::impl {
|
||||
class size_estimates_mutation_reader final : public flat_mutation_reader::impl {
|
||||
struct token_range {
|
||||
bytes start;
|
||||
bytes end;
|
||||
};
|
||||
schema_ptr _schema;
|
||||
const dht::partition_range& _prange;
|
||||
const dht::partition_range* _prange;
|
||||
const query::partition_slice& _slice;
|
||||
using ks_range = std::vector<sstring>;
|
||||
stdx::optional<ks_range> _keyspaces;
|
||||
ks_range::const_iterator _current_partition;
|
||||
streamed_mutation::forwarding _fwd;
|
||||
flat_mutation_reader_opt _partition_reader;
|
||||
public:
|
||||
size_estimates_mutation_reader(schema_ptr schema, const dht::partition_range& prange, const query::partition_slice& slice, streamed_mutation::forwarding fwd)
|
||||
: _schema(schema)
|
||||
, _prange(prange)
|
||||
: impl(schema)
|
||||
, _schema(std::move(schema))
|
||||
, _prange(&prange)
|
||||
, _slice(slice)
|
||||
, _fwd(fwd)
|
||||
{ }
|
||||
|
||||
virtual future<streamed_mutation_opt> operator()() override {
|
||||
private:
|
||||
future<> get_next_partition() {
|
||||
// For each specified range, estimate (crudely) mean partition size and partitions count.
|
||||
auto& db = service::get_local_storage_proxy().get_db().local();
|
||||
if (!_keyspaces) {
|
||||
_keyspaces = get_keyspaces(*_schema, db, _prange);
|
||||
_keyspaces = get_keyspaces(*_schema, db, *_prange);
|
||||
_current_partition = _keyspaces->begin();
|
||||
}
|
||||
if (_current_partition == _keyspaces->end()) {
|
||||
return make_ready_future<streamed_mutation_opt>();
|
||||
_end_of_stream = true;
|
||||
return make_ready_future<>();
|
||||
}
|
||||
return get_local_ranges().then([&db, this] (auto&& ranges) {
|
||||
auto estimates = this->estimates_for_current_keyspace(db, std::move(ranges));
|
||||
auto mutations = db::system_keyspace::make_size_estimates_mutation(*_current_partition, std::move(estimates));
|
||||
++_current_partition;
|
||||
return streamed_mutation_opt(streamed_mutation_from_mutation(std::move(mutations), _fwd));
|
||||
std::vector<mutation> ms;
|
||||
ms.emplace_back(std::move(mutations));
|
||||
_partition_reader = flat_mutation_reader_from_mutations(std::move(ms), _fwd);
|
||||
});
|
||||
}
|
||||
public:
|
||||
virtual future<> fill_buffer() override {
|
||||
return do_until([this] { return is_end_of_stream() || is_buffer_full(); }, [this] {
|
||||
if (!_partition_reader) {
|
||||
return get_next_partition();
|
||||
}
|
||||
return _partition_reader->consume_pausable([this] (mutation_fragment mf) {
|
||||
push_mutation_fragment(std::move(mf));
|
||||
return stop_iteration(is_buffer_full());
|
||||
}).then([this] {
|
||||
if (_partition_reader->is_end_of_stream() && _partition_reader->is_buffer_empty()) {
|
||||
_partition_reader = stdx::nullopt;
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
virtual void next_partition() override {
|
||||
clear_buffer_to_next_partition();
|
||||
if (is_buffer_empty()) {
|
||||
_partition_reader = stdx::nullopt;
|
||||
}
|
||||
}
|
||||
virtual future<> fast_forward_to(const dht::partition_range& pr) override {
|
||||
clear_buffer();
|
||||
_prange = ≺
|
||||
_keyspaces = stdx::nullopt;
|
||||
_partition_reader = stdx::nullopt;
|
||||
_end_of_stream = false;
|
||||
return make_ready_future<>();
|
||||
}
|
||||
virtual future<> fast_forward_to(position_range pr) override {
|
||||
forward_buffer_to(pr.start());
|
||||
_end_of_stream = false;
|
||||
if (_partition_reader) {
|
||||
return _partition_reader->fast_forward_to(std::move(pr));
|
||||
}
|
||||
return make_ready_future<>();
|
||||
}
|
||||
/**
|
||||
* Returns the primary ranges for the local node.
|
||||
* Used for testing as well.
|
||||
@@ -270,14 +314,14 @@ private:
|
||||
};
|
||||
|
||||
struct virtual_reader {
|
||||
mutation_reader operator()(schema_ptr schema,
|
||||
flat_mutation_reader operator()(schema_ptr schema,
|
||||
const dht::partition_range& range,
|
||||
const query::partition_slice& slice,
|
||||
const io_priority_class& pc,
|
||||
tracing::trace_state_ptr trace_state,
|
||||
streamed_mutation::forwarding fwd,
|
||||
mutation_reader::forwarding fwd_mr) {
|
||||
return make_mutation_reader<size_estimates_mutation_reader>(schema, range, slice, fwd);
|
||||
return make_flat_mutation_reader<size_estimates_mutation_reader>(schema, range, slice, fwd);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
2
dist/ami/files/scylla-ami
vendored
2
dist/ami/files/scylla-ami
vendored
Submodule dist/ami/files/scylla-ami updated: be90a3fb9f...3366c93173
@@ -6,7 +6,7 @@ After=network.target
|
||||
Type=simple
|
||||
User=scylla
|
||||
Group=scylla
|
||||
ExecStart=/usr/lib/scylla/scylla-housekeeping --uuid-file /var/lib/scylla-housekeeping/housekeeping.uuid --repo-files '/etc/yum.repos.d/scylla*.repo' -q -c /etc/scylla.d/housekeeping.cfg version --mode d
|
||||
ExecStart=/usr/lib/scylla/scylla-housekeeping --uuid-file /var/lib/scylla-housekeeping/housekeeping.uuid -q -c /etc/scylla.d/housekeeping.cfg --repo-files @@REPOFILES@@ version --mode d
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
@@ -6,7 +6,7 @@ After=network.target
|
||||
Type=simple
|
||||
User=scylla
|
||||
Group=scylla
|
||||
ExecStart=/usr/lib/scylla/scylla-housekeeping --uuid-file /var/lib/scylla-housekeeping/housekeeping.uuid -q --repo-files '/etc/yum.repos.d/scylla*.repo' -c /etc/scylla.d/housekeeping.cfg version --mode r
|
||||
ExecStart=/usr/lib/scylla/scylla-housekeeping --uuid-file /var/lib/scylla-housekeeping/housekeeping.uuid -q -c /etc/scylla.d/housekeeping.cfg --repo-files @@REPOFILES@@ version --mode r
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
80
dist/debian/build_deb.sh
vendored
80
dist/debian/build_deb.sh
vendored
@@ -5,7 +5,6 @@ print_usage() {
|
||||
echo "build_deb.sh -target <codename> --dist --rebuild-dep"
|
||||
echo " --target target distribution codename"
|
||||
echo " --dist create a public distribution package"
|
||||
echo " --rebuild-dep rebuild dependency packages"
|
||||
echo " --no-clean don't rebuild pbuilder tgz"
|
||||
exit 1
|
||||
}
|
||||
@@ -17,16 +16,11 @@ install_deps() {
|
||||
sudo dpkg -P ${DEB_FILE%%_*.deb}
|
||||
}
|
||||
|
||||
REBUILD=0
|
||||
DIST=0
|
||||
TARGET=
|
||||
NO_CLEAN=0
|
||||
while [ $# -gt 0 ]; do
|
||||
case "$1" in
|
||||
"--rebuild-dep")
|
||||
REBUILD=1
|
||||
shift 1
|
||||
;;
|
||||
"--dist")
|
||||
DIST=1
|
||||
shift 1
|
||||
@@ -111,11 +105,6 @@ if [ -z "$TARGET" ]; then
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
if [ $REBUILD -eq 1 ] && [ "$TARGET" != "$CODENAME" ]; then
|
||||
echo "Rebuild dependencies doesn't support cross-build."
|
||||
echo "Please run it on following distribution: $TARGET"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
VERSION=$(./SCYLLA-VERSION-GEN)
|
||||
SCYLLA_VERSION=$(cat build/SCYLLA-VERSION-FILE | sed 's/\.rc/~rc/')
|
||||
@@ -138,8 +127,8 @@ if [ "$TARGET" = "jessie" ]; then
|
||||
cp dist/debian/scylla-server.cron.d debian/
|
||||
sed -i -e "s/@@REVISION@@/1~$TARGET/g" debian/changelog
|
||||
sed -i -e "s/@@DH_INSTALLINIT@@//g" debian/rules
|
||||
sed -i -e "s/@@COMPILER@@/g++-5/g" debian/rules
|
||||
sed -i -e "s/@@BUILD_DEPENDS@@/libsystemd-dev, g++-5, libunwind-dev, antlr3, libthrift-dev, antlr3-c++-dev/g" debian/control
|
||||
sed -i -e "s#@@COMPILER@@#/opt/scylladb/bin/g++-7#g" debian/rules
|
||||
sed -i -e "s/@@BUILD_DEPENDS@@/libsystemd-dev, g++-7-scylla72, libunwind-dev, scylla-antlr35, scylla-libthrift010-dev, scylla-antlr35-c++-dev, libboost-program-options1.63-dev, libboost-filesystem1.63-dev, libboost-system1.63-dev, libboost-thread1.63-dev, libboost-test1.63-dev/g" debian/control
|
||||
sed -i -e "s/@@DEPENDS@@//g" debian/control
|
||||
sed -i -e "s#@@INSTALL@@##g" debian/scylla-server.install
|
||||
sed -i -e "s#@@HKDOTTIMER_D@@#dist/common/systemd/scylla-housekeeping-daily.timer /lib/systemd/system#g" debian/scylla-server.install
|
||||
@@ -148,16 +137,17 @@ if [ "$TARGET" = "jessie" ]; then
|
||||
sed -i -e "s#@@SYSCTL@@#dist/debian/sysctl.d/99-scylla.conf etc/sysctl.d#g" debian/scylla-server.install
|
||||
sed -i -e "s#@@SCRIPTS_SAVE_COREDUMP@@#dist/debian/scripts/scylla_save_coredump usr/lib/scylla#g" debian/scylla-server.install
|
||||
sed -i -e "s#@@SCRIPTS_DELAY_FSTRIM@@#dist/debian/scripts/scylla_delay_fstrim usr/lib/scylla#g" debian/scylla-server.install
|
||||
elif [ "$TARGET" = "stretch" ] || [ "$TARGET" = "buster" ] || [ "$TARGET" = "sid" ]; then
|
||||
elif [ "$TARGET" = "stretch" ]; then
|
||||
cp dist/debian/scylla-server.cron.d debian/
|
||||
sed -i -e "s/@@REVISION@@/1~$TARGET/g" debian/changelog
|
||||
sed -i -e "s/@@DH_INSTALLINIT@@//g" debian/rules
|
||||
sed -i -e "s/@@COMPILER@@/g++/g" debian/rules
|
||||
sed -i -e "s/@@BUILD_DEPENDS@@/libsystemd-dev, g++, libunwind8-dev, antlr3, libthrift-dev, antlr3-c++-dev/g" debian/control
|
||||
sed -i -e "s#@@COMPILER@@#/opt/scylladb/bin/g++-7#g" debian/rules
|
||||
sed -i -e "s/@@BUILD_DEPENDS@@/libsystemd-dev, g++-7-scylla72, libunwind-dev, antlr3, scylla-libthrift010-dev, scylla-antlr35-c++-dev, libboost-program-options1.62-dev, libboost-filesystem1.62-dev, libboost-system1.62-dev, libboost-thread1.62-dev, libboost-test1.62-dev/g" debian/control
|
||||
sed -i -e "s/@@DEPENDS@@//g" debian/control
|
||||
sed -i -e "s#@@INSTALL@@##g" debian/scylla-server.install
|
||||
sed -i -e "s#@@HKDOTTIMER_D@@#dist/common/systemd/scylla-housekeeping-daily.timer /lib/systemd/system#g" debian/scylla-server.install
|
||||
sed -i -e "s#@@HKDOTTIMER_R@@#dist/common/systemd/scylla-housekeeping-restart.timer /lib/systemd/system#g" debian/scylla-server.install
|
||||
sed -i -e "s#@@FTDOTTIMER@@#dist/common/systemd/scylla-fstrim.timer /lib/systemd/system#g" debian/scylla-server.install
|
||||
sed -i -e "s#@@SYSCTL@@#dist/debian/sysctl.d/99-scylla.conf etc/sysctl.d#g" debian/scylla-server.install
|
||||
sed -i -e "s#@@SCRIPTS_SAVE_COREDUMP@@#dist/debian/scripts/scylla_save_coredump usr/lib/scylla#g" debian/scylla-server.install
|
||||
sed -i -e "s#@@SCRIPTS_DELAY_FSTRIM@@#dist/debian/scripts/scylla_delay_fstrim usr/lib/scylla#g" debian/scylla-server.install
|
||||
@@ -165,8 +155,8 @@ elif [ "$TARGET" = "trusty" ]; then
|
||||
cp dist/debian/scylla-server.cron.d debian/
|
||||
sed -i -e "s/@@REVISION@@/0ubuntu1~$TARGET/g" debian/changelog
|
||||
sed -i -e "s/@@DH_INSTALLINIT@@/--upstart-only/g" debian/rules
|
||||
sed -i -e "s/@@COMPILER@@/g++-7/g" debian/rules
|
||||
sed -i -e "s/@@BUILD_DEPENDS@@/g++-7, libunwind8-dev, scylla-antlr35, scylla-libthrift010-dev, scylla-antlr35-c++-dev/g" debian/control
|
||||
sed -i -e "s#@@COMPILER@@#/opt/scylladb/bin/g++-7#g" debian/rules
|
||||
sed -i -e "s/@@BUILD_DEPENDS@@/g++-7-scylla72, libunwind8-dev, scylla-antlr35, scylla-libthrift010-dev, scylla-antlr35-c++-dev, scylla-libboost-program-options163-dev, scylla-libboost-filesystem163-dev, scylla-libboost-system163-dev, scylla-libboost-thread163-dev, scylla-libboost-test163-dev/g" debian/control
|
||||
sed -i -e "s/@@DEPENDS@@/hugepages, num-utils/g" debian/control
|
||||
sed -i -e "s#@@INSTALL@@#dist/debian/sudoers.d/scylla etc/sudoers.d#g" debian/scylla-server.install
|
||||
sed -i -e "s#@@HKDOTTIMER_D@@##g" debian/scylla-server.install
|
||||
@@ -175,11 +165,37 @@ elif [ "$TARGET" = "trusty" ]; then
|
||||
sed -i -e "s#@@SYSCTL@@#dist/debian/sysctl.d/99-scylla.conf etc/sysctl.d#g" debian/scylla-server.install
|
||||
sed -i -e "s#@@SCRIPTS_SAVE_COREDUMP@@#dist/debian/scripts/scylla_save_coredump usr/lib/scylla#g" debian/scylla-server.install
|
||||
sed -i -e "s#@@SCRIPTS_DELAY_FSTRIM@@#dist/debian/scripts/scylla_delay_fstrim usr/lib/scylla#g" debian/scylla-server.install
|
||||
elif [ "$TARGET" = "xenial" ] || [ "$TARGET" = "yakkety" ] || [ "$TARGET" = "zesty" ] || [ "$TARGET" = "artful" ]; then
|
||||
elif [ "$TARGET" = "xenial" ]; then
|
||||
sed -i -e "s/@@REVISION@@/0ubuntu1~$TARGET/g" debian/changelog
|
||||
sed -i -e "s/@@DH_INSTALLINIT@@//g" debian/rules
|
||||
sed -i -e "s#@@COMPILER@@#/opt/scylladb/bin/g++-7#g" debian/rules
|
||||
sed -i -e "s/@@BUILD_DEPENDS@@/libsystemd-dev, g++-7-scylla72, libunwind-dev, antlr3, scylla-libthrift010-dev, scylla-antlr35-c++-dev, scylla-libboost-program-options163-dev, scylla-libboost-filesystem163-dev, scylla-libboost-system163-dev, scylla-libboost-thread163-dev, scylla-libboost-test163-dev/g" debian/control
|
||||
sed -i -e "s/@@DEPENDS@@/hugepages, /g" debian/control
|
||||
sed -i -e "s#@@INSTALL@@##g" debian/scylla-server.install
|
||||
sed -i -e "s#@@HKDOTTIMER_D@@#dist/common/systemd/scylla-housekeeping-daily.timer /lib/systemd/system#g" debian/scylla-server.install
|
||||
sed -i -e "s#@@HKDOTTIMER_R@@#dist/common/systemd/scylla-housekeeping-restart.timer /lib/systemd/system#g" debian/scylla-server.install
|
||||
sed -i -e "s#@@FTDOTTIMER@@#dist/common/systemd/scylla-fstrim.timer /lib/systemd/system#g" debian/scylla-server.install
|
||||
sed -i -e "s#@@SYSCTL@@##g" debian/scylla-server.install
|
||||
sed -i -e "s#@@SCRIPTS_SAVE_COREDUMP@@##g" debian/scylla-server.install
|
||||
sed -i -e "s#@@SCRIPTS_DELAY_FSTRIM@@##g" debian/scylla-server.install
|
||||
elif [ "$TARGET" = "bionic" ]; then
|
||||
sed -i -e "s/@@REVISION@@/0ubuntu1~$TARGET/g" debian/changelog
|
||||
sed -i -e "s/@@DH_INSTALLINIT@@//g" debian/rules
|
||||
sed -i -e "s#@@COMPILER@@#g++-7#g" debian/rules
|
||||
sed -i -e "s/@@BUILD_DEPENDS@@/libsystemd-dev, g++, libunwind-dev, antlr3, scylla-libthrift010-dev, scylla-antlr35-c++-dev, libboost-program-options-dev, libboost-filesystem-dev, libboost-system-dev, libboost-thread-dev, libboost-test-dev/g" debian/control
|
||||
sed -i -e "s/@@DEPENDS@@/hugepages, /g" debian/control
|
||||
sed -i -e "s#@@INSTALL@@##g" debian/scylla-server.install
|
||||
sed -i -e "s#@@HKDOTTIMER_D@@#dist/common/systemd/scylla-housekeeping-daily.timer /lib/systemd/system#g" debian/scylla-server.install
|
||||
sed -i -e "s#@@HKDOTTIMER_R@@#dist/common/systemd/scylla-housekeeping-restart.timer /lib/systemd/system#g" debian/scylla-server.install
|
||||
sed -i -e "s#@@FTDOTTIMER@@#dist/common/systemd/scylla-fstrim.timer /lib/systemd/system#g" debian/scylla-server.install
|
||||
sed -i -e "s#@@SYSCTL@@##g" debian/scylla-server.install
|
||||
sed -i -e "s#@@SCRIPTS_SAVE_COREDUMP@@##g" debian/scylla-server.install
|
||||
sed -i -e "s#@@SCRIPTS_DELAY_FSTRIM@@##g" debian/scylla-server.install
|
||||
elif [ "$TARGET" = "yakkety" ] || [ "$TARGET" = "zesty" ] || [ "$TARGET" = "artful" ]; then
|
||||
sed -i -e "s/@@REVISION@@/0ubuntu1~$TARGET/g" debian/changelog
|
||||
sed -i -e "s/@@DH_INSTALLINIT@@//g" debian/rules
|
||||
sed -i -e "s/@@COMPILER@@/g++-7/g" debian/rules
|
||||
sed -i -e "s/@@BUILD_DEPENDS@@/libsystemd-dev, g++-7, libunwind-dev, antlr3, scylla-libthrift010-dev, scylla-antlr35-c++-dev/g" debian/control
|
||||
sed -i -e "s/@@BUILD_DEPENDS@@/libsystemd-dev, g++-7, libunwind-dev, antlr3, scylla-libthrift010-dev, scylla-antlr35-c++-dev, libboost-program-options-dev, libboost-filesystem-dev, libboost-system-dev, libboost-thread-dev, libboost-test-dev/g" debian/control
|
||||
sed -i -e "s/@@DEPENDS@@/hugepages, /g" debian/control
|
||||
sed -i -e "s#@@INSTALL@@##g" debian/scylla-server.install
|
||||
sed -i -e "s#@@HKDOTTIMER_D@@#dist/common/systemd/scylla-housekeeping-daily.timer /lib/systemd/system#g" debian/scylla-server.install
|
||||
@@ -198,23 +214,23 @@ else
|
||||
fi
|
||||
cp dist/common/systemd/scylla-server.service.in debian/scylla-server.service
|
||||
sed -i -e "s#@@SYSCONFDIR@@#/etc/default#g" debian/scylla-server.service
|
||||
cp dist/common/systemd/scylla-housekeeping-daily.service debian/scylla-server.scylla-housekeeping-daily.service
|
||||
cp dist/common/systemd/scylla-housekeeping-restart.service debian/scylla-server.scylla-housekeeping-restart.service
|
||||
cp dist/common/systemd/scylla-housekeeping-daily.service.in debian/scylla-server.scylla-housekeeping-daily.service
|
||||
sed -i -e "s#@@REPOFILES@@#'/etc/apt/sources.list.d/scylla*.list'#g" debian/scylla-server.scylla-housekeeping-daily.service
|
||||
cp dist/common/systemd/scylla-housekeeping-restart.service.in debian/scylla-server.scylla-housekeeping-restart.service
|
||||
sed -i -e "s#@@REPOFILES@@#'/etc/apt/sources.list.d/scylla*.list'#g" debian/scylla-server.scylla-housekeeping-restart.service
|
||||
cp dist/common/systemd/scylla-fstrim.service debian/scylla-server.scylla-fstrim.service
|
||||
cp dist/common/systemd/node-exporter.service debian/scylla-server.node-exporter.service
|
||||
|
||||
if [ $REBUILD -eq 1 ]; then
|
||||
./dist/debian/dep/build_dependency.sh
|
||||
fi
|
||||
|
||||
cp ./dist/debian/pbuilderrc ~/.pbuilderrc
|
||||
if [ $NO_CLEAN -eq 0 ]; then
|
||||
sudo rm -fv /var/cache/pbuilder/scylla-server-$TARGET.tgz
|
||||
sudo -E DIST=$TARGET REBUILD=$REBUILD /usr/sbin/pbuilder clean
|
||||
sudo -E DIST=$TARGET REBUILD=$REBUILD /usr/sbin/pbuilder create
|
||||
sudo -E DIST=$TARGET /usr/sbin/pbuilder clean
|
||||
sudo -E DIST=$TARGET /usr/sbin/pbuilder create
|
||||
fi
|
||||
sudo -E DIST=$TARGET REBUILD=$REBUILD /usr/sbin/pbuilder update
|
||||
if [ $REBUILD -eq 1 ]; then
|
||||
sudo -E DIST=$TARGET REBUILD=$REBUILD /usr/sbin/pbuilder execute --save-after-exec dist/debian/dep/pbuilder_install_deps.sh
|
||||
sudo -E DIST=$TARGET /usr/sbin/pbuilder update
|
||||
if [ "$TARGET" = "trusty" ] || [ "$TARGET" = "xenial" ] || [ "$TARGET" = "yakkety" ] || [ "$TARGET" = "zesty" ] || [ "$TARGET" = "artful" ] || [ "$TARGET" = "bionic" ]; then
|
||||
sudo -E DIST=$TARGET /usr/sbin/pbuilder execute --save-after-exec dist/debian/ubuntu_enable_ppa.sh
|
||||
elif [ "$TARGET" = "jessie" ] || [ "$TARGET" = "stretch" ]; then
|
||||
sudo -E DIST=$TARGET /usr/sbin/pbuilder execute --save-after-exec dist/debian/debian_install_gpgkey.sh
|
||||
fi
|
||||
sudo -E DIST=$TARGET REBUILD=$REBUILD pdebuild --buildresult build/debs
|
||||
sudo -E DIST=$TARGET pdebuild --buildresult build/debs
|
||||
|
||||
4
dist/debian/control.in
vendored
4
dist/debian/control.in
vendored
@@ -5,7 +5,7 @@ Section: database
|
||||
Priority: optional
|
||||
X-Python3-Version: >= 3.4
|
||||
Standards-Version: 3.9.5
|
||||
Build-Depends: python3-setuptools (>= 0.6b3), python3-all, python3-all-dev, debhelper (>= 9), libyaml-cpp-dev, liblz4-dev, libsnappy-dev, libcrypto++-dev, libjsoncpp-dev, libaio-dev, thrift-compiler, ragel, ninja-build, git, scylla-libboost-program-options163-dev | libboost-program-options1.55-dev | libboost-program-options-dev, scylla-libboost-filesystem163-dev | libboost-filesystem1.55-dev | libboost-filesystem-dev, scylla-libboost-system163-dev | libboost-system1.55-dev | libboost-system-dev, scylla-libboost-thread163-dev | libboost-thread1.55-dev | libboost-thread-dev, scylla-libboost-test163-dev | libboost-test1.55-dev | libboost-test-dev, libgnutls28-dev, libhwloc-dev, libnuma-dev, libpciaccess-dev, xfslibs-dev, python3-pyparsing, libxml2-dev, libsctp-dev, python-urwid, pciutils, libprotobuf-dev, protobuf-compiler, systemtap-sdt-dev, cmake, libssl-dev, @@BUILD_DEPENDS@@
|
||||
Build-Depends: python3-setuptools, python3-all, python3-all-dev, debhelper (>= 9), libyaml-cpp-dev, liblz4-dev, libsnappy-dev, libcrypto++-dev, libjsoncpp-dev, libaio-dev, thrift-compiler, ragel, ninja-build, git, libgnutls28-dev, libhwloc-dev, libnuma-dev, libpciaccess-dev, xfslibs-dev, python3-pyparsing, libxml2-dev, libsctp-dev, python-urwid, pciutils, libprotobuf-dev, protobuf-compiler, systemtap-sdt-dev, cmake, libssl-dev, @@BUILD_DEPENDS@@
|
||||
|
||||
Package: scylla-conf
|
||||
Architecture: any
|
||||
@@ -41,7 +41,7 @@ Description: Scylla kernel tuning configuration
|
||||
Package: scylla
|
||||
Section: metapackages
|
||||
Architecture: any
|
||||
Depends: scylla-server, scylla-jmx, scylla-tools, scylla-kernel-conf
|
||||
Depends: scylla-server, scylla-jmx, scylla-tools, scylla-tools-core, scylla-kernel-conf
|
||||
Description: Scylla database metapackage
|
||||
Scylla is a highly scalable, eventually consistent, distributed,
|
||||
partitioned row DB.
|
||||
|
||||
13
dist/debian/debian_install_gpgkey.sh
vendored
Normal file
13
dist/debian/debian_install_gpgkey.sh
vendored
Normal file
@@ -0,0 +1,13 @@
|
||||
#!/bin/bash
|
||||
. /etc/os-release
|
||||
|
||||
apt-get -y install gnupg-curl ca-certificates
|
||||
if [ "$VERSION_ID" = "8" ]; then
|
||||
apt-key adv --fetch-keys https://download.opensuse.org/repositories/home:/scylladb:/scylla-3rdparty-jessie/Debian_8.0/Release.key
|
||||
elif [ "$VERSION_ID" = "9" ]; then
|
||||
apt-key adv --fetch-keys https://download.opensuse.org/repositories/home:/scylladb:/scylla-3rdparty-stretch/Debian_9.0/Release.key
|
||||
else
|
||||
echo "Unsupported distribution."
|
||||
exit 1
|
||||
fi
|
||||
apt-get update
|
||||
3
dist/debian/dep/antlr3-3.5.2/antlr3
vendored
3
dist/debian/dep/antlr3-3.5.2/antlr3
vendored
@@ -1,3 +0,0 @@
|
||||
#!/bin/sh
|
||||
|
||||
exec /usr/bin/java -jar /usr/share/java/antlr-3.5.2-complete-no-st3.jar $*
|
||||
@@ -1,5 +0,0 @@
|
||||
antlr3 (3.5.2-ubuntu1) trusty; urgency=medium
|
||||
|
||||
* Initial release.
|
||||
|
||||
-- Takuya ASADA <syuu@scylladb.com> Mon, 24 Aug 2015 09:22:55 +0000
|
||||
1
dist/debian/dep/antlr3-3.5.2/debian/compat
vendored
1
dist/debian/dep/antlr3-3.5.2/debian/compat
vendored
@@ -1 +0,0 @@
|
||||
9
|
||||
13
dist/debian/dep/antlr3-3.5.2/debian/control
vendored
13
dist/debian/dep/antlr3-3.5.2/debian/control
vendored
@@ -1,13 +0,0 @@
|
||||
Source: antlr3
|
||||
Maintainer: Takuya ASADA <syuu@scylladb.com>
|
||||
Section: misc
|
||||
Priority: optional
|
||||
Standards-Version: 3.5.2
|
||||
Build-Depends: debhelper (>= 9)
|
||||
|
||||
Package: antlr3
|
||||
Architecture: all
|
||||
Depends: ${shlibs:Depends}, ${misc:Depends}, openjdk-7-jre-headless
|
||||
Replaces: antlr3-tool
|
||||
Description: language tool for constructing recognizers, compilers etc
|
||||
A language tool that provides a framework for constructing recognizers, interpreters, compilers, and translators from grammatical descriptions containing actions in a variety of target languages.
|
||||
16
dist/debian/dep/antlr3-3.5.2/debian/copyright
vendored
16
dist/debian/dep/antlr3-3.5.2/debian/copyright
vendored
@@ -1,16 +0,0 @@
|
||||
Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
|
||||
Upstream-Name: ANTLR
|
||||
Upstream-Contact: http://www.antlr.org/
|
||||
Source: https://github.com/antlr/antlr3
|
||||
|
||||
Files: antlr-3.5.2-complete-no-st3.jar
|
||||
Copyright: Copyright (c) 2013 Terence Parr
|
||||
License: BSD-3-clause
|
||||
|
||||
Files: antlr3
|
||||
Copyright: Copyright (c) 2015 ScyllaDB
|
||||
License: AGPL-3.0
|
||||
|
||||
Files: debian/*
|
||||
Copyright: Copyright (c) 2015 ScyllaDB
|
||||
License: AGPL-3.0
|
||||
12
dist/debian/dep/antlr3-3.5.2/debian/rules
vendored
12
dist/debian/dep/antlr3-3.5.2/debian/rules
vendored
@@ -1,12 +0,0 @@
|
||||
#!/usr/bin/make -f
|
||||
|
||||
override_dh_auto_install:
|
||||
mkdir -p $(CURDIR)/debian/antlr3/usr/share/java
|
||||
cp $(CURDIR)/antlr-3.5.2-complete-no-st3.jar \
|
||||
$(CURDIR)/debian/antlr3/usr/share/java
|
||||
|
||||
mkdir -p $(CURDIR)/debian/antlr3/usr/bin
|
||||
cp $(CURDIR)/antlr3 \
|
||||
$(CURDIR)/debian/antlr3/usr/bin
|
||||
%:
|
||||
dh $@
|
||||
@@ -1,5 +0,0 @@
|
||||
antlr3-c++-dev (3.5.2-ubuntu1) trusty; urgency=medium
|
||||
|
||||
* Initial release.
|
||||
|
||||
-- Takuya ASADA <syuu@scylladb.com> Mon, 24 Aug 2015 09:22:55 +0000
|
||||
@@ -1 +0,0 @@
|
||||
9
|
||||
@@ -1,12 +0,0 @@
|
||||
Source: antlr3-c++-dev
|
||||
Maintainer: Takuya ASADA <syuu@scylladb.com>
|
||||
Section: misc
|
||||
Priority: optional
|
||||
Standards-Version: 3.5.2
|
||||
Build-Depends: debhelper (>= 9)
|
||||
|
||||
Package: antlr3-c++-dev
|
||||
Architecture: all
|
||||
Depends: ${shlibs:Depends}, ${misc:Depends}
|
||||
Description: language tool for constructing recognizers, compilers etc
|
||||
A language tool that provides a framework for constructing recognizers, interpreters, compilers, and translators from grammatical descriptions containing actions in a variety of target languages.
|
||||
@@ -1,12 +0,0 @@
|
||||
Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
|
||||
Upstream-Name: ANTLR
|
||||
Upstream-Contact: http://www.antlr.org/
|
||||
Source: https://github.com/antlr/antlr3
|
||||
|
||||
Files: *
|
||||
Copyright: Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB
|
||||
License: BSD-3-clause
|
||||
|
||||
Files: debian/*
|
||||
Copyright: Copyright (c) 2015 ScyllaDB
|
||||
License: AGPL-3.0
|
||||
@@ -1,8 +0,0 @@
|
||||
#!/usr/bin/make -f
|
||||
|
||||
override_dh_auto_install:
|
||||
mkdir -p $(CURDIR)/debian/antlr3-c++-dev/usr/include
|
||||
cp $(CURDIR)/runtime/Cpp/include/* \
|
||||
$(CURDIR)/debian/antlr3-c++-dev/usr/include
|
||||
%:
|
||||
dh $@
|
||||
123
dist/debian/dep/build_dependency.sh
vendored
123
dist/debian/dep/build_dependency.sh
vendored
@@ -1,123 +0,0 @@
|
||||
#!/bin/bash -e
|
||||
|
||||
. /etc/os-release
|
||||
install_deps() {
|
||||
echo Y | sudo mk-build-deps
|
||||
DEB_FILE=`ls *-build-deps*.deb`
|
||||
sudo gdebi -n $DEB_FILE
|
||||
sudo rm -f $DEB_FILE
|
||||
sudo dpkg -P ${DEB_FILE%%_*.deb}
|
||||
}
|
||||
|
||||
CODENAME=`lsb_release -c|awk '{print $2}'`
|
||||
|
||||
# workaround fix for #2444
|
||||
if [ "$CODENAME" = "jessie" ]; then
|
||||
if [ ! -e /etc/apt/sources.list.d/jessie-backports.list ]; then
|
||||
sudo sh -c 'echo deb "http://httpredir.debian.org/debian jessie-backports main" > /etc/apt/sources.list.d/jessie-backports.list'
|
||||
fi
|
||||
sudo apt-get -y update
|
||||
sudo apt-get install -t jessie-backports -y texlive
|
||||
fi
|
||||
|
||||
if [ ! -f /usr/bin/gdebi ]; then
|
||||
sudo apt-get install -y gdebi-core
|
||||
fi
|
||||
if [ ! -f /usr/bin/mk-build-deps ]; then
|
||||
sudo apt-get install -y devscripts
|
||||
fi
|
||||
if [ ! -f /usr/bin/equivs ]; then
|
||||
sudo apt-get install -y equivs
|
||||
fi
|
||||
|
||||
if [ "$CODENAME" = "trusty" ] || [ "$CODENAME" = "jessie" ]; then
|
||||
if [ ! -f build/antlr3_*.deb ]; then
|
||||
rm -rf build/antlr3-3.5.2
|
||||
mkdir -p build/antlr3-3.5.2
|
||||
cp -a dist/debian/dep/antlr3-3.5.2/* build/antlr3-3.5.2
|
||||
cd build/antlr3-3.5.2
|
||||
wget -nv http://www.antlr3.org/download/antlr-3.5.2-complete-no-st3.jar
|
||||
install_deps
|
||||
debuild -r fakeroot --no-tgz-check -us -uc
|
||||
cd -
|
||||
fi
|
||||
if [ ! -f build/scylla-env_*.deb ]; then
|
||||
rm -rf build/scylla-env-1.0
|
||||
cp -a dist/common/dep/scylla-env-1.0 build/
|
||||
cd build/scylla-env-1.0
|
||||
debuild -r fakeroot --no-tgz-check -us -uc
|
||||
cd -
|
||||
fi
|
||||
if [ ! -f build/scylla-gdb_*.deb ]; then
|
||||
rm -rf build/gdb-7.11
|
||||
if [ ! -f build/gdb_7.11-0ubuntu1.dsc ]; then
|
||||
wget -nv -O build/gdb_7.11-0ubuntu1.dsc http://archive.ubuntu.com/ubuntu/pool/main/g/gdb/gdb_7.11-0ubuntu1.dsc
|
||||
fi
|
||||
if [ ! -f build/gdb_7.11.orig.tar.xz ]; then
|
||||
wget -nv -O build/gdb_7.11.orig.tar.xz http://archive.ubuntu.com/ubuntu/pool/main/g/gdb/gdb_7.11.orig.tar.xz
|
||||
fi
|
||||
if [ ! -f build/gdb_7.11-0ubuntu1.debian.tar.xz ]; then
|
||||
wget -nv -O build/gdb_7.11-0ubuntu1.debian.tar.xz http://archive.ubuntu.com/ubuntu/pool/main/g/gdb/gdb_7.11-0ubuntu1.debian.tar.xz
|
||||
fi
|
||||
cd build
|
||||
dpkg-source -x gdb_7.11-0ubuntu1.dsc
|
||||
mv gdb_7.11.orig.tar.xz scylla-gdb_7.11.orig.tar.xz
|
||||
cd -
|
||||
cd build/gdb-7.11
|
||||
patch -p0 < ../../dist/debian/dep/gdb.diff
|
||||
install_deps
|
||||
debuild -r fakeroot --no-tgz-check -us -uc
|
||||
cd -
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ ! -f build/antlr3-c++-dev_*.deb ]; then
|
||||
rm -rf build/antlr3-c++-dev-3.5.2
|
||||
if [ ! -f build/3.5.2.tar.gz ]; then
|
||||
wget -nv -O build/3.5.2.tar.gz https://github.com/antlr/antlr3/archive/3.5.2.tar.gz
|
||||
fi
|
||||
cd build
|
||||
tar xpf 3.5.2.tar.gz
|
||||
mv antlr3-3.5.2 antlr3-c++-dev-3.5.2
|
||||
cd -
|
||||
cp -a dist/debian/dep/antlr3-c++-dev-3.5.2/debian build/antlr3-c++-dev-3.5.2
|
||||
cd build/antlr3-c++-dev-3.5.2
|
||||
install_deps
|
||||
debuild -r fakeroot --no-tgz-check -us -uc
|
||||
cd -
|
||||
fi
|
||||
|
||||
if [ ! -f build/libthrift0_*.deb ]; then
|
||||
rm -rf build/thrift-0.10.0
|
||||
if [ ! -f build/thrift-0.10.0.tar.gz ]; then
|
||||
wget -nv -O build/thrift-0.10.0.tar.gz http://archive.apache.org/dist/thrift/0.10.0/thrift-0.10.0.tar.gz
|
||||
fi
|
||||
cd build
|
||||
tar xpf thrift-0.10.0.tar.gz
|
||||
cd thrift-0.10.0
|
||||
patch -p0 < ../../dist/debian/dep/thrift.diff
|
||||
install_deps
|
||||
debuild -r fakeroot --no-tgz-check -us -uc
|
||||
cd ../..
|
||||
fi
|
||||
|
||||
if [ "$CODENAME" = "jessie" ]; then
|
||||
if [ ! -f build/gcc-5_*.deb ]; then
|
||||
cd build
|
||||
wget https://launchpad.net/debian/+archive/primary/+files/gcc-5_5.4.1-5.dsc
|
||||
wget https://launchpad.net/debian/+archive/primary/+files/gcc-5_5.4.1.orig.tar.gz
|
||||
wget https://launchpad.net/debian/+archive/primary/+files/gcc-5_5.4.1-5.diff.gz
|
||||
dpkg-source -x gcc-5_5.4.1-5.dsc
|
||||
cd gcc-5-5.4.1
|
||||
# resolve build time dependencies manually, since mk-build-deps doesn't works for gcc package
|
||||
sudo apt-get install -y g++-multilib libc6-dev-i386 lib32gcc1 libc6-dev-x32 libx32gcc1 libc6-dbg m4 libtool autoconf2.64 autogen gawk zlib1g-dev systemtap-sdt-dev gperf bison flex gdb texinfo locales sharutils libantlr-java libffi-dev gnat-4.9 libisl-dev libmpc-dev libmpfr-dev libgmp-dev dejagnu realpath chrpath quilt doxygen graphviz ghostscript texlive-latex-base xsltproc libxml2-utils docbook-xsl-ns
|
||||
patch -p0 < ../../dist/debian/dep/debian-gcc-5-jessie.diff
|
||||
./debian/rules control
|
||||
debuild -r fakeroot -us -uc
|
||||
cd ../..
|
||||
fi
|
||||
fi
|
||||
|
||||
rm -rf /var/tmp/pbuilder
|
||||
mkdir /var/tmp/pbuilder
|
||||
cp -v build/*.deb /var/tmp/pbuilder/
|
||||
245
dist/debian/dep/debian-gcc-5-jessie.diff
vendored
245
dist/debian/dep/debian-gcc-5-jessie.diff
vendored
@@ -1,245 +0,0 @@
|
||||
diff -Nur ../gcc-5-5.4.1.orig/debian/patches/unwind_dw2_fde_nolock.diff ./debian/patches/unwind_dw2_fde_nolock.diff
|
||||
--- ../gcc-5-5.4.1.orig/debian/patches/unwind_dw2_fde_nolock.diff 1970-01-01 00:00:00.000000000 +0000
|
||||
+++ ./debian/patches/unwind_dw2_fde_nolock.diff 2017-08-09 00:23:51.095939513 +0000
|
||||
@@ -0,0 +1,95 @@
|
||||
+commit 2e452daf02a37ec310b2431375ceca569d0d6284
|
||||
+Author: jakub <jakub@138bc75d-0d04-0410-961f-82ee72b054a4>
|
||||
+Date: Fri Sep 16 19:17:47 2016 +0000
|
||||
+
|
||||
+ PR libgcc/71744
|
||||
+ * unwind-dw2-fde.c (ATOMIC_FDE_FAST_PATH): Define if __register_frame*
|
||||
+ is not the primary registry and atomics are available.
|
||||
+ (any_objects_registered): New variable.
|
||||
+ (__register_frame_info_bases, __register_frame_info_table_bases):
|
||||
+ Atomically store 1 to any_objects_registered after registering first
|
||||
+ unwind info.
|
||||
+ (_Unwind_Find_FDE): Return early if any_objects_registered is 0.
|
||||
+
|
||||
+ git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@240193 138bc75d-0d04-0410-961f-82ee72b054a4
|
||||
+
|
||||
+diff --git a/src/libgcc/unwind-dw2-fde.c b/src/libgcc/unwind-dw2-fde.c
|
||||
+index 0bcf516..6ae2377 100644
|
||||
+--- a/src/libgcc/unwind-dw2-fde.c
|
||||
++++ b/src/libgcc/unwind-dw2-fde.c
|
||||
+@@ -35,6 +35,11 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
+ #include "unwind-pe.h"
|
||||
+ #include "unwind-dw2-fde.h"
|
||||
+ #include "gthr.h"
|
||||
++#else
|
||||
++#if (defined(__GTHREAD_MUTEX_INIT) || defined(__GTHREAD_MUTEX_INIT_FUNCTION)) \
|
||||
++ && defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4)
|
||||
++#define ATOMIC_FDE_FAST_PATH 1
|
||||
++#endif
|
||||
+ #endif
|
||||
+
|
||||
+ /* The unseen_objects list contains objects that have been registered
|
||||
+@@ -43,6 +48,9 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
+ by decreasing value of pc_begin. */
|
||||
+ static struct object *unseen_objects;
|
||||
+ static struct object *seen_objects;
|
||||
++#ifdef ATOMIC_FDE_FAST_PATH
|
||||
++static int any_objects_registered;
|
||||
++#endif
|
||||
+
|
||||
+ #ifdef __GTHREAD_MUTEX_INIT
|
||||
+ static __gthread_mutex_t object_mutex = __GTHREAD_MUTEX_INIT;
|
||||
+@@ -96,6 +104,16 @@ __register_frame_info_bases (const void *begin, struct object *ob,
|
||||
+
|
||||
+ ob->next = unseen_objects;
|
||||
+ unseen_objects = ob;
|
||||
++#ifdef ATOMIC_FDE_FAST_PATH
|
||||
++ /* Set flag that at least one library has registered FDEs.
|
||||
++ Use relaxed MO here, it is up to the app to ensure that the library
|
||||
++ loading/initialization happens-before using that library in other
|
||||
++ threads (in particular unwinding with that library's functions
|
||||
++ appearing in the backtraces). Calling that library's functions
|
||||
++ without waiting for the library to initialize would be racy. */
|
||||
++ if (!any_objects_registered)
|
||||
++ __atomic_store_n (&any_objects_registered, 1, __ATOMIC_RELAXED);
|
||||
++#endif
|
||||
+
|
||||
+ __gthread_mutex_unlock (&object_mutex);
|
||||
+ }
|
||||
+@@ -140,6 +158,16 @@ __register_frame_info_table_bases (void *begin, struct object *ob,
|
||||
+
|
||||
+ ob->next = unseen_objects;
|
||||
+ unseen_objects = ob;
|
||||
++#ifdef ATOMIC_FDE_FAST_PATH
|
||||
++ /* Set flag that at least one library has registered FDEs.
|
||||
++ Use relaxed MO here, it is up to the app to ensure that the library
|
||||
++ loading/initialization happens-before using that library in other
|
||||
++ threads (in particular unwinding with that library's functions
|
||||
++ appearing in the backtraces). Calling that library's functions
|
||||
++ without waiting for the library to initialize would be racy. */
|
||||
++ if (!any_objects_registered)
|
||||
++ __atomic_store_n (&any_objects_registered, 1, __ATOMIC_RELAXED);
|
||||
++#endif
|
||||
+
|
||||
+ __gthread_mutex_unlock (&object_mutex);
|
||||
+ }
|
||||
+@@ -1001,6 +1029,19 @@ _Unwind_Find_FDE (void *pc, struct dwarf_eh_bases *bases)
|
||||
+ struct object *ob;
|
||||
+ const fde *f = NULL;
|
||||
+
|
||||
++#ifdef ATOMIC_FDE_FAST_PATH
|
||||
++ /* For targets where unwind info is usually not registered through these
|
||||
++ APIs anymore, avoid taking a global lock.
|
||||
++ Use relaxed MO here, it is up to the app to ensure that the library
|
||||
++ loading/initialization happens-before using that library in other
|
||||
++ threads (in particular unwinding with that library's functions
|
||||
++ appearing in the backtraces). Calling that library's functions
|
||||
++ without waiting for the library to initialize would be racy. */
|
||||
++ if (__builtin_expect (!__atomic_load_n (&any_objects_registered,
|
||||
++ __ATOMIC_RELAXED), 1))
|
||||
++ return NULL;
|
||||
++#endif
|
||||
++
|
||||
+ init_object_mutex_once ();
|
||||
+ __gthread_mutex_lock (&object_mutex);
|
||||
+
|
||||
diff -Nur ../gcc-5-5.4.1.orig/debian/rules.conf ./debian/rules.conf
|
||||
--- ../gcc-5-5.4.1.orig/debian/rules.conf 2017-08-09 00:26:09.000000000 +0000
|
||||
+++ ./debian/rules.conf 2017-08-09 00:26:51.177254418 +0000
|
||||
@@ -206,7 +206,7 @@
|
||||
ifneq (,$(filter $(distrelease),vivid))
|
||||
BINUTILSBDV = 2.25-3~
|
||||
else ifneq (,$(filter $(distrelease),jessie))
|
||||
- BINUTILSBDV = 2.25-7~
|
||||
+ BINUTILSBDV = 2.25-5~
|
||||
else ifneq (,$(filter $(distrelease),sid stretch xenial))
|
||||
BINUTILSBDV = 2.26.1
|
||||
endif
|
||||
@@ -386,10 +386,10 @@
|
||||
MPFR_BUILD_DEP = libmpfr-dev (>= 3.0.0-9~),
|
||||
endif
|
||||
|
||||
-ISL_BUILD_DEP = libisl-dev,
|
||||
-ifneq (,$(filter $(distrelease),jessie stretch sid experimental))
|
||||
- ISL_BUILD_DEP = libisl-dev (>= 0.14),
|
||||
-endif
|
||||
+#ISL_BUILD_DEP = libisl-dev,
|
||||
+#ifneq (,$(filter $(distrelease),jessie stretch sid experimental))
|
||||
+# ISL_BUILD_DEP = libisl-dev (>= 0.14),
|
||||
+#endif
|
||||
|
||||
ifneq (,$(filter $(distrelease),lenny etch squeeze wheezy dapper hardy jaunty karmic lucid maverick natty oneiric precise quantal raring))
|
||||
MPC_BUILD_DEP = libmpc-dev,
|
||||
@@ -411,13 +411,6 @@
|
||||
SDT_BUILD_DEP = systemtap-sdt-dev [linux-any kfreebsd-any hurd-any],
|
||||
endif
|
||||
|
||||
-# ensure that the common libs, built from the next GCC version are available
|
||||
-ifeq ($(PKGSOURCE),gcc-$(BASE_VERSION))
|
||||
- ifneq ($(with_common_libs),yes)
|
||||
- BASE_BUILD_DEP = gcc-6-base,
|
||||
- endif
|
||||
-endif
|
||||
-
|
||||
ifneq ($(DEB_CROSS),yes)
|
||||
# all archs for which to create b-d's
|
||||
any_archs = alpha amd64 armel armhf arm64 i386 mips mipsel mips64 mips64el powerpc ppc64 ppc64el m68k sh4 sparc64 s390x x32
|
||||
diff -Nur ../gcc-5-5.4.1.orig/debian/rules.defs ./debian/rules.defs
|
||||
--- ../gcc-5-5.4.1.orig/debian/rules.defs 2017-08-09 00:26:09.000000000 +0000
|
||||
+++ ./debian/rules.defs 2017-08-09 00:26:51.177254418 +0000
|
||||
@@ -412,7 +412,7 @@
|
||||
# gcc versions (fixincludes, libgcj-common) ...
|
||||
#with_common_pkgs := yes
|
||||
# ... and some libraries, which do not change (libgcc1, libssp0).
|
||||
-#with_common_libs := yes
|
||||
+with_common_libs := yes
|
||||
# XXX: should with_common_libs be "yes" only if this is the default compiler
|
||||
# version on the targeted arch?
|
||||
|
||||
@@ -560,7 +560,7 @@
|
||||
# C ---------------------------
|
||||
enabled_languages := c
|
||||
|
||||
-with_jit = yes
|
||||
+with_jit = no
|
||||
|
||||
# FIXME: compiler bug
|
||||
jit_no_cpus := ia64
|
||||
@@ -660,6 +660,8 @@
|
||||
endif
|
||||
with_ada := $(call envfilt, ada, , , $(with_ada))
|
||||
|
||||
+with_ada := no
|
||||
+
|
||||
|
||||
ifeq ($(DEB_STAGE)-$(filter libgnat, $(with_rtlibs)),rtlibs-)
|
||||
with_ada := disabled for rtlibs stage
|
||||
@@ -786,6 +788,8 @@
|
||||
endif
|
||||
with_java := $(call envfilt, java, , c++, $(with_java))
|
||||
|
||||
+with_java := no
|
||||
+
|
||||
ifeq ($(DEB_STAGE)-$(filter libgcj, $(with_rtlibs)),rtlibs-)
|
||||
with_java := disabled for rtlibs stage
|
||||
endif
|
||||
@@ -915,6 +919,8 @@
|
||||
endif
|
||||
with_go := $(call envfilt, go, , , $(with_go))
|
||||
|
||||
+with_go := no
|
||||
+
|
||||
# Build all packages needed for Go development
|
||||
ifneq (,$(findstring gcc, $(PKGSOURCE)))
|
||||
ifeq ($(with_go),yes)
|
||||
@@ -961,6 +967,8 @@
|
||||
endif
|
||||
with_d := $(call envfilt, d, , , $(with_d))
|
||||
|
||||
+with_d := no
|
||||
+
|
||||
ifeq ($(with_base_only),yes)
|
||||
with_d := no
|
||||
endif
|
||||
@@ -1016,6 +1024,8 @@
|
||||
|
||||
with_fortran := $(call envfilt, fortran, , , $(with_fortran))
|
||||
|
||||
+with_fortran := no
|
||||
+
|
||||
# Build all packages needed for Fortran development
|
||||
ifeq ($(with_fortran),yes)
|
||||
ifeq ($(with_dev),yes)
|
||||
@@ -1063,6 +1073,8 @@
|
||||
endif
|
||||
with_objc := $(call envfilt, objc, obj-c++, , $(with_objc))
|
||||
|
||||
+with_objc := no
|
||||
+
|
||||
ifeq ($(with_objc),yes)
|
||||
# the ObjC runtime with garbage collection enabled needs the Boehm GC
|
||||
with_objc_gc := yes
|
||||
@@ -1103,6 +1115,8 @@
|
||||
endif
|
||||
with_objcxx := $(call envfilt, obj-c++, , c++ objc, $(with_objcxx))
|
||||
|
||||
+with_objcxx := no
|
||||
+
|
||||
ifeq ($(with_objcxx),yes)
|
||||
enabled_languages += obj-c++
|
||||
endif
|
||||
@@ -1480,6 +1494,9 @@
|
||||
with_check := disabled for D
|
||||
endif
|
||||
with_check := $(call envfilt, check, , , $(with_check))
|
||||
+
|
||||
+with_check := no
|
||||
+
|
||||
ifdef WITHOUT_CHECK
|
||||
with_check := disabled by environment
|
||||
endif
|
||||
diff -Nur ../gcc-5-5.4.1.orig/debian/rules.patch ./debian/rules.patch
|
||||
--- ../gcc-5-5.4.1.orig/debian/rules.patch 2017-08-09 00:26:09.000000000 +0000
|
||||
+++ ./debian/rules.patch 2017-08-09 00:24:35.795274920 +0000
|
||||
@@ -113,6 +113,7 @@
|
||||
libjava-mips64el \
|
||||
libffi-pax \
|
||||
libffi-race-condition \
|
||||
+ unwind_dw2_fde_nolock \
|
||||
|
||||
# this is still needed on powerpc, e.g. firefox and insighttoolkit4 will ftbfs.
|
||||
ifneq (,$(filter $(DEB_TARGET_ARCH),powerpc))
|
||||
3101
dist/debian/dep/gdb.diff
vendored
3101
dist/debian/dep/gdb.diff
vendored
File diff suppressed because it is too large
Load Diff
38
dist/debian/dep/pbuilder_install_deps.sh
vendored
38
dist/debian/dep/pbuilder_install_deps.sh
vendored
@@ -1,38 +0,0 @@
|
||||
#!/bin/bash -e
|
||||
|
||||
sudo apt update
|
||||
if [ ! -f /usr/bin/gdebi ]; then
|
||||
sudo apt install -y gdebi-core
|
||||
fi
|
||||
if [ ! -f /usr/bin/lsb_release ]; then
|
||||
sudo apt install -y lsb-release
|
||||
fi
|
||||
|
||||
CODENAME=`lsb_release -c|awk '{print $2}'`
|
||||
|
||||
sudo gdebi -n /var/tmp/pbuilder/antlr3-c++-dev_*.deb
|
||||
sudo gdebi -n /var/tmp/pbuilder/libthrift0_*.deb
|
||||
sudo gdebi -n /var/tmp/pbuilder/libthrift-dev_*.deb
|
||||
if [ "$CODENAME" = "trusty" ] || [ "$CODENAME" = "jessie" ]; then
|
||||
sudo gdebi -n /var/tmp/pbuilder/antlr3_*.deb
|
||||
fi
|
||||
if [ "$CODENAME" = "jessie" ]; then
|
||||
sudo gdebi -n /var/tmp/pbuilder/gcc-5-base_*.deb
|
||||
sudo gdebi -n /var/tmp/pbuilder/libatomic1_*.deb
|
||||
sudo gdebi -n /var/tmp/pbuilder/libcilkrts5_*.deb
|
||||
sudo gdebi -n /var/tmp/pbuilder/libgcc1_*.deb
|
||||
sudo gdebi -n /var/tmp/pbuilder/libgomp1_*.deb
|
||||
sudo gdebi -n /var/tmp/pbuilder/libitm1_*.deb
|
||||
sudo gdebi -n /var/tmp/pbuilder/liblsan0_*.deb
|
||||
sudo gdebi -n /var/tmp/pbuilder/libstdc++6_*.deb
|
||||
sudo gdebi -n /var/tmp/pbuilder/libtsan0_*.deb
|
||||
sudo gdebi -n /var/tmp/pbuilder/libubsan0_*.deb
|
||||
sudo gdebi -n /var/tmp/pbuilder/libasan2_*.deb
|
||||
sudo gdebi -n /var/tmp/pbuilder/libcc1-0_*.deb
|
||||
sudo gdebi -n /var/tmp/pbuilder/libmpx0_*.deb
|
||||
sudo gdebi -n /var/tmp/pbuilder/libgcc-5-dev_*.deb
|
||||
sudo gdebi -n /var/tmp/pbuilder/libstdc++-5-dev_*.deb
|
||||
sudo gdebi -n /var/tmp/pbuilder/cpp-5_*.deb
|
||||
sudo gdebi -n /var/tmp/pbuilder/gcc-5_*.deb
|
||||
sudo gdebi -n /var/tmp/pbuilder/g++-5_*.deb
|
||||
fi
|
||||
344
dist/debian/dep/thrift.diff
vendored
344
dist/debian/dep/thrift.diff
vendored
@@ -1,344 +0,0 @@
|
||||
diff -Nur debian/changelog ../thrift-0.10.0.new/debian/changelog
|
||||
--- debian/changelog 2016-12-19 20:05:45.000000000 +0000
|
||||
+++ ../thrift-0.10.0.new/debian/changelog 2017-06-20 19:14:33.902186804 +0000
|
||||
@@ -1,3 +1,8 @@
|
||||
+thrift (0.10.0-1) stable; urgency=low
|
||||
+ * update to 0.10.0
|
||||
+
|
||||
+ -- Takuya ASADA <syuu@scylladb.com> Mon, 19 Jun 2017 23:08:43 +0000
|
||||
+
|
||||
thrift (0.10.0) stable; urgency=low
|
||||
|
||||
* update to 0.10.0
|
||||
diff -Nur debian/control ../thrift-0.10.0.new/debian/control
|
||||
--- debian/control 2016-12-21 03:04:19.000000000 +0000
|
||||
+++ ../thrift-0.10.0.new/debian/control 2017-06-20 19:14:33.902186804 +0000
|
||||
@@ -1,14 +1,10 @@
|
||||
Source: thrift
|
||||
Section: devel
|
||||
Priority: extra
|
||||
-Build-Depends: debhelper (>= 9), build-essential, mono-mcs, python-dev, ant,
|
||||
- mono-devel, libmono-system-web4.0-cil, erlang-base, ruby-dev | ruby1.9.1-dev, ruby-bundler ,autoconf, automake,
|
||||
+Build-Depends: debhelper (>= 9), build-essential, autoconf, automake,
|
||||
pkg-config, libtool, bison, flex, libboost-dev | libboost1.53-dev,
|
||||
- python-all, python-setuptools, python-all-dev, python-all-dbg,
|
||||
- python3-all, python3-setuptools, python3-all-dev, python3-all-dbg,
|
||||
- openjdk-7-jdk | openjdk-8-jdk | default-jdk,
|
||||
- libboost-test-dev | libboost-test1.53-dev, libevent-dev, libssl-dev, perl (>= 5.8.0-7),
|
||||
- php5 | php7.0, php5-dev | php7.0-dev, libglib2.0-dev, qtchooser, qtbase5-dev-tools
|
||||
+ libboost-test-dev | libboost-test1.53-dev, libevent-dev, libssl-dev,
|
||||
+ libglib2.0-dev, qtchooser, qtbase5-dev-tools
|
||||
Maintainer: Thrift Developer's <dev@thrift.apache.org>
|
||||
Homepage: http://thrift.apache.org/
|
||||
Vcs-Git: https://git-wip-us.apache.org/repos/asf/thrift.git
|
||||
@@ -17,144 +13,6 @@
|
||||
X-Python-Version: >= 2.6
|
||||
X-Python3-Version: >= 3.3
|
||||
|
||||
-Package: thrift-compiler
|
||||
-Architecture: any
|
||||
-Depends: ${shlibs:Depends}, ${misc:Depends}
|
||||
-Description: Compiler for Thrift definition files
|
||||
- Thrift is a software framework for scalable cross-language services
|
||||
- development. It combines a software stack with a code generation engine to
|
||||
- build services that work efficiently and seamlessly.
|
||||
- .
|
||||
- This package contains the Thrift compiler that is used for translating
|
||||
- from .thrift files (containing the definitions) to the language binding
|
||||
- for the supported languages.
|
||||
-
|
||||
-Package: python-thrift
|
||||
-Architecture: any
|
||||
-Section: python
|
||||
-Depends: ${python:Depends}, ${shlibs:Depends}, ${misc:Depends}, python-six
|
||||
-Recommends: python-twisted-web, python-backports.ssl-match-hostname, python-ipaddress
|
||||
-Provides: ${python:Provides}
|
||||
-Description: Python bindings for Thrift (Python 2)
|
||||
- Thrift is a software framework for scalable cross-language services
|
||||
- development. It combines a software stack with a code generation engine to
|
||||
- build services that work efficiently and seamlessly.
|
||||
- .
|
||||
- This package contains the Python bindings for Thrift. You will need the thrift
|
||||
- tool (in the thrift-compiler package) to compile your definition to Python
|
||||
- classes, and then the modules in this package will allow you to use those
|
||||
- classes in your programs.
|
||||
- .
|
||||
- This package installs the library for Python 2.
|
||||
-
|
||||
-Package: python-thrift-dbg
|
||||
-Architecture: any
|
||||
-Section: debug
|
||||
-Depends: ${shlibs:Depends}, ${misc:Depends}, python-thrift (= ${binary:Version}), python-all-dbg
|
||||
-Provides: ${python:Provides}
|
||||
-Description: Python bindings for Thrift (debug version)
|
||||
- Thrift is a software framework for scalable cross-language services
|
||||
- development. It combines a software stack with a code generation engine to
|
||||
- build services that work efficiently and seamlessly.
|
||||
- .
|
||||
- This package contains the Python bindings for Thrift with debugging symbols.
|
||||
- You will need the thrift tool (in the thrift-compiler package) to compile your
|
||||
- definition to Python classes, and then the modules in this package will allow
|
||||
- you to use those classes in your programs.
|
||||
-
|
||||
-Package: python3-thrift
|
||||
-Architecture: any
|
||||
-Section: python
|
||||
-Depends: ${python3:Depends}, ${shlibs:Depends}, ${misc:Depends}, python3-six
|
||||
-Recommends: python3-twisted-web
|
||||
-Provides: ${python:Provides}
|
||||
-Description: Python bindings for Thrift (Python 3)
|
||||
- Thrift is a software framework for scalable cross-language services
|
||||
- development. It combines a software stack with a code generation engine to
|
||||
- build services that work efficiently and seamlessly.
|
||||
- .
|
||||
- This package contains the Python bindings for Thrift. You will need the thrift
|
||||
- tool (in the thrift-compiler package) to compile your definition to Python
|
||||
- classes, and then the modules in this package will allow you to use those
|
||||
- classes in your programs.
|
||||
- .
|
||||
- This package installs the library for Python 3.
|
||||
-
|
||||
-Package: python3-thrift-dbg
|
||||
-Architecture: any
|
||||
-Section: debug
|
||||
-Depends: ${shlibs:Depends}, ${misc:Depends}, python3-thrift (= ${binary:Version}), python3-all-dbg
|
||||
-Provides: ${python:Provides}
|
||||
-Description: Python bindings for Thrift (debug version)
|
||||
- Thrift is a software framework for scalable cross-language services
|
||||
- development. It combines a software stack with a code generation engine to
|
||||
- build services that work efficiently and seamlessly.
|
||||
- .
|
||||
- This package contains the Python bindings for Thrift with debugging symbols.
|
||||
- You will need the thrift tool (in the thrift-compiler package) to compile your
|
||||
- definition to Python classes, and then the modules in this package will allow
|
||||
- you to use those classes in your programs.
|
||||
-
|
||||
-Package: ruby-thrift
|
||||
-Architecture: all
|
||||
-Section: libs
|
||||
-Depends: ruby | ruby-interpreter, ${shlibs:Depends}, ${misc:Depends}
|
||||
-Provides: libthrift-ruby
|
||||
-Replaces: libthrift-ruby
|
||||
-Breaks: libthrift-ruby
|
||||
-Description: Ruby bindings for Thrift
|
||||
- Thrift is a software framework for scalable cross-language services
|
||||
- development. It combines a software stack with a code generation engine to
|
||||
- build services that work efficiently and seamlessly.
|
||||
- .
|
||||
- This package contains the Ruby bindings for Thrift. You will need the thrift
|
||||
- tool (in the thrift-compiler package) to compile your definition to Ruby
|
||||
- classes, and then the modules in this package will allow you to use those
|
||||
- classes in your programs.
|
||||
-
|
||||
-Package: libthrift-java
|
||||
-Architecture: all
|
||||
-Section: java
|
||||
-Depends: ${misc:Depends}
|
||||
-Description: Java bindings for Thrift
|
||||
- Thrift is a software framework for scalable cross-language services
|
||||
- development. It combines a software stack with a code generation engine to
|
||||
- build services that work efficiently and seamlessly.
|
||||
- .
|
||||
- This package contains the Java bindings for Thrift. You will need the thrift
|
||||
- tool (in the thrift-compiler package) to compile your definition to Java
|
||||
- classes, and then the modules in this package will allow you to use those
|
||||
- classes in your programs.
|
||||
-
|
||||
-Package: libthrift-cil
|
||||
-Architecture: all
|
||||
-Section: cli-mono
|
||||
-Depends: cli-common, libmono-corlib4.0-cil (>= 2.10) | libmono-corlib4.5-cil (>=3.2), libmono-system4.0-cil (>= 2.10),
|
||||
- libmono-system-web4.0-cil (>= 2.10), ${misc:Depends}
|
||||
-Description: CLI bindings for Thrift
|
||||
- Thrift is a software framework for scalable cross-language services
|
||||
- development. It combines a software stack with a code generation engine to
|
||||
- build services that work efficiently and seamlessly.
|
||||
- .
|
||||
- This package contains the CLI bindings for Thrift. You will need the thrift
|
||||
- tool (in the thrift-compiler package) to compile your definition to C#
|
||||
- classes, and then the modules in this package will allow you to use those
|
||||
- classes in your programs.
|
||||
-
|
||||
-Package: libthrift-perl
|
||||
-Architecture: all
|
||||
-Section: perl
|
||||
-Depends: perl (>= 5.8.0-7), ${misc:Depends}
|
||||
-Description: Perl bindings for Thrift
|
||||
- Thrift is a software framework for scalable cross-language services
|
||||
- development. It combines a software stack with a code generation engine to
|
||||
- build services that work efficiently and seamlessly.
|
||||
- .
|
||||
- This package contains the Perl bindings for Thrift. You will need the thrift
|
||||
- tool (in the thrift-compiler package) to compile your definition to Perl
|
||||
- classes, and then the modules in this package will allow you to use those
|
||||
- classes in your programs.
|
||||
-
|
||||
Package: libthrift0
|
||||
Architecture: any
|
||||
Depends: ${shlibs:Depends}, ${misc:Depends}
|
||||
@@ -177,15 +35,3 @@
|
||||
.
|
||||
This package contains the development libraries required for writing C++
|
||||
applications using Thrift.
|
||||
-
|
||||
-Package: php5-thrift
|
||||
-Architecture: any
|
||||
-Section: php
|
||||
-Depends: ${php:Depends}, ${shlibs:Depends}, ${misc:Depends}
|
||||
-Provides: ${php:Provides}
|
||||
-Description: PHP bindings for Thrift
|
||||
- Thrift is a software framework for scalable cross-language services
|
||||
- development. It combines a software stack with a code generation engine to
|
||||
- build services that work efficiently and seamlessly.
|
||||
- .
|
||||
- This package contains the PHP bindings for Thrift.
|
||||
diff -Nur debian/libthrift0.install ../thrift-0.10.0.new/debian/libthrift0.install
|
||||
--- debian/libthrift0.install 2016-04-03 17:19:43.000000000 +0000
|
||||
+++ ../thrift-0.10.0.new/debian/libthrift0.install 2017-06-20 19:22:46.321957388 +0000
|
||||
@@ -1,5 +1,4 @@
|
||||
usr/lib/libthrift.so*
|
||||
usr/lib/libthrift-*.so*
|
||||
usr/lib/libthriftnb*.so*
|
||||
-usr/lib/libthriftqt*.so*
|
||||
usr/lib/libthriftz*.so*
|
||||
diff -Nur debian/rules ../thrift-0.10.0.new/debian/rules
|
||||
--- debian/rules 2016-12-19 20:05:45.000000000 +0000
|
||||
+++ ../thrift-0.10.0.new/debian/rules 2017-06-20 19:22:48.285956469 +0000
|
||||
@@ -51,18 +51,6 @@
|
||||
# Compile C (glib) library
|
||||
$(MAKE) -C $(CURDIR)/lib/c_glib
|
||||
|
||||
- # Python library
|
||||
- cd $(CURDIR)/lib/py && \
|
||||
- for py in $(PYVERS); do \
|
||||
- $$py setup.py build; \
|
||||
- $$py-dbg setup.py build; \
|
||||
- done
|
||||
-
|
||||
- # PHP
|
||||
- cd $(CURDIR)/lib/php/src/ext/thrift_protocol && \
|
||||
- phpize && \
|
||||
- ./configure && $(MAKE)
|
||||
-
|
||||
touch $@
|
||||
|
||||
build-indep: build-indep-stamp
|
||||
@@ -71,19 +59,6 @@
|
||||
# Add here commands to compile the indep part of the package.
|
||||
#$(MAKE) doc
|
||||
|
||||
- # Java
|
||||
- cd $(CURDIR)/lib/java && \
|
||||
- ant
|
||||
-
|
||||
- # C#
|
||||
- $(MAKE) -C $(CURDIR)/lib/csharp
|
||||
-
|
||||
- # Ruby
|
||||
- $(MAKE) -C $(CURDIR)/lib/rb
|
||||
-
|
||||
- # Perl
|
||||
- $(MAKE) -C $(CURDIR)/lib/perl INSTALLDIRS=vendor
|
||||
-
|
||||
touch $@
|
||||
|
||||
clean:
|
||||
@@ -96,8 +71,6 @@
|
||||
# Add here commands to clean up after the build process.
|
||||
-$(MAKE) clean
|
||||
|
||||
- $(CURDIR)/cleanup.sh
|
||||
-
|
||||
dh_clean
|
||||
|
||||
install: install-indep install-arch
|
||||
@@ -111,29 +84,6 @@
|
||||
# debian/<package>-doc.
|
||||
#INSTALLDOC#
|
||||
|
||||
- # Java
|
||||
- mkdir -p $(CURDIR)/debian/libthrift-java/usr/share/java/ && \
|
||||
- cp $(CURDIR)/lib/java/build/libthrift*.jar \
|
||||
- $(CURDIR)/debian/libthrift-java/usr/share/java/
|
||||
-
|
||||
- # Ruby
|
||||
- mkdir -p $(CURDIR)/debian/ruby-thrift/usr/lib/ruby/1.9.1 && \
|
||||
- cp $(CURDIR)/lib/rb/lib/thrift.rb \
|
||||
- $(CURDIR)/debian/ruby-thrift/usr/lib/ruby/1.9.1
|
||||
- cp -r $(CURDIR)/lib/rb/lib/thrift \
|
||||
- $(CURDIR)/debian/ruby-thrift/usr/lib/ruby/1.9.1
|
||||
-
|
||||
- # C#
|
||||
- mkdir -p $(CURDIR)/debian/libthrift-cil/usr/lib/cli/thrift/ && \
|
||||
- cp $(CURDIR)/lib/csharp/Thrift.dll \
|
||||
- $(CURDIR)/debian/libthrift-cil/usr/lib/cli/thrift/Thrift.dll
|
||||
-
|
||||
- # Perl
|
||||
- $(MAKE) -C $(CURDIR)/lib/perl install DESTDIR=$(CURDIR)/debian/libthrift-perl
|
||||
- mv $(CURDIR)/debian/libthrift-perl/usr/local/lib/perl5 $(CURDIR)/debian/libthrift-perl/usr/lib
|
||||
- rmdir $(CURDIR)/debian/libthrift-perl/usr/local/lib
|
||||
- rmdir $(CURDIR)/debian/libthrift-perl/usr/local
|
||||
-
|
||||
dh_install -i
|
||||
|
||||
install-arch:
|
||||
@@ -146,40 +96,6 @@
|
||||
# debian/tmp.
|
||||
#$(MAKE) DESTDIR=$(CURDIR)/debian/thrift install
|
||||
|
||||
- # Compiler
|
||||
- mkdir -p $(CURDIR)/debian/thrift-compiler/usr/bin && \
|
||||
- cp $(CURDIR)/compiler/cpp/thrift \
|
||||
- $(CURDIR)/debian/thrift-compiler/usr/bin/thrift && \
|
||||
- rmdir $(CURDIR)/debian/thrift-compiler/usr/sbin
|
||||
-
|
||||
- # Python
|
||||
- cd $(CURDIR)/lib/py && \
|
||||
- python2 setup.py install --install-layout=deb --no-compile --root=$(CURDIR)/debian/python-thrift && \
|
||||
- python2-dbg setup.py install --install-layout=deb --no-compile --root=$(CURDIR)/debian/python-thrift-dbg && \
|
||||
- python3 setup.py install --install-layout=deb --no-compile --root=$(CURDIR)/debian/python3-thrift && \
|
||||
- python3-dbg setup.py install --install-layout=deb --no-compile --root=$(CURDIR)/debian/python3-thrift-dbg
|
||||
-
|
||||
- find $(CURDIR)/debian/python-thrift -name "*.py[co]" -print0 | xargs -0 rm -f
|
||||
- find $(CURDIR)/debian/python-thrift -name "__pycache__" -print0 | xargs -0 rm -fr
|
||||
- find $(CURDIR)/debian/python-thrift-dbg -name "__pycache__" -print0 | xargs -0 rm -fr
|
||||
- find $(CURDIR)/debian/python-thrift-dbg -name "*.py[co]" -print0 | xargs -0 rm -f
|
||||
- find $(CURDIR)/debian/python-thrift-dbg -name "*.py" -print0 | xargs -0 rm -f
|
||||
- find $(CURDIR)/debian/python-thrift-dbg -name "*.egg-info" -print0 | xargs -0 rm -rf
|
||||
- find $(CURDIR)/debian/python-thrift-dbg -depth -type d -empty -exec rmdir {} \;
|
||||
-
|
||||
- find $(CURDIR)/debian/python3-thrift -name "*.py[co]" -print0 | xargs -0 rm -f
|
||||
- find $(CURDIR)/debian/python3-thrift -name "__pycache__" -print0 | xargs -0 rm -fr
|
||||
- find $(CURDIR)/debian/python3-thrift-dbg -name "__pycache__" -print0 | xargs -0 rm -fr
|
||||
- find $(CURDIR)/debian/python3-thrift-dbg -name "*.py[co]" -print0 | xargs -0 rm -f
|
||||
- find $(CURDIR)/debian/python3-thrift-dbg -name "*.py" -print0 | xargs -0 rm -f
|
||||
- find $(CURDIR)/debian/python3-thrift-dbg -name "*.egg-info" -print0 | xargs -0 rm -rf
|
||||
- find $(CURDIR)/debian/python3-thrift-dbg -depth -type d -empty -exec rmdir {} \;
|
||||
-
|
||||
- # PHP
|
||||
- mkdir -p $(CURDIR)/debian/php5-thrift
|
||||
- cd $(CURDIR)/lib/php && \
|
||||
- $(MAKE) DESTDIR=$(CURDIR)/debian/php5-thrift install
|
||||
-
|
||||
# C++ and C (glib)
|
||||
mkdir -p $(CURDIR)/debian/tmp; \
|
||||
cd $(CURDIR)/lib/cpp && \
|
||||
@@ -201,9 +117,8 @@
|
||||
dh_installexamples
|
||||
dh_installman
|
||||
dh_link
|
||||
- dh_strip -ppython-thrift --dbg-package=python-thrift-dbg
|
||||
- dh_strip -ppython3-thrift --dbg-package=python3-thrift-dbg
|
||||
dh_strip -pthrift-compiler -plibthrift0
|
||||
+ dh_strip
|
||||
dh_compress
|
||||
dh_fixperms
|
||||
dh_makeshlibs
|
||||
@@ -219,7 +134,6 @@
|
||||
|
||||
# Build architecture dependent packages using the common target.
|
||||
binary-arch: build-arch install-arch
|
||||
- echo "php:Depends=phpapi-$(shell php-config5 --phpapi)" > $(CURDIR)/debian/substvars
|
||||
$(MAKE) -f debian/rules DH_OPTIONS=-s binary-common
|
||||
|
||||
binary: binary-arch binary-indep
|
||||
19
dist/debian/pbuilderrc
vendored
19
dist/debian/pbuilderrc
vendored
@@ -4,24 +4,23 @@ BASETGZ="/var/cache/pbuilder/scylla-server-$DIST.tgz"
|
||||
DISTRIBUTION="$DIST"
|
||||
BUILDRESULT="/var/cache/pbuilder/scylla-server-$DIST/result/"
|
||||
APTCACHE="/var/cache/pbuilder/scylla-server-$DIST/aptcache/"
|
||||
ALLOWUNTRUSTED=yes
|
||||
EXTRAPACKAGES="sudo"
|
||||
if [ $REBUILD -eq 1 ]; then
|
||||
BINDMOUNTS="/var/tmp/pbuilder"
|
||||
fi
|
||||
|
||||
if [ "$DIST" = "trusty" ] || [ "$DIST" = "xenial" ] || [ "$DIST" = "yakkety" ] || [ "$DIST" = "zesty" ] || [ "$DIST" = "artful" ]; then
|
||||
if [ "$DIST" = "trusty" ] || [ "$DIST" = "xenial" ] || [ "$DIST" = "yakkety" ] || [ "$DIST" = "zesty" ] || [ "$DIST" = "artful" ] || [ "$DIST" = "bionic" ]; then
|
||||
MIRRORSITE="http://archive.ubuntu.com/ubuntu/"
|
||||
COMPONENTS="main restricted universe multiverse"
|
||||
DEBOOTSTRAPOPTS="--keyring=/usr/share/keyrings/ubuntu-archive-keyring.gpg"
|
||||
OTHERMIRROR="deb http://archive.ubuntu.com/ubuntu/ $DIST-updates main restricted universe multiverse|deb http://ppa.launchpad.net/ubuntu-toolchain-r/test/ubuntu $DIST main|deb [arch=amd64] http://ppa.launchpad.net/scylladb/ppa/ubuntu $DIST main"
|
||||
elif [ "$DIST" = "jessie" ] || [ "$DIST" = "stretch" ] || [ "$DIST" = "buster" ] || [ "$DIST" = "sid" ]; then
|
||||
OTHERMIRROR="deb http://archive.ubuntu.com/ubuntu/ $DIST-updates main restricted universe multiverse"
|
||||
elif [ "$DIST" = "jessie" ]; then
|
||||
MIRRORSITE="http://deb.debian.org/debian/"
|
||||
COMPONENTS="main contrib non-free"
|
||||
DEBOOTSTRAPOPTS="--keyring=/usr/share/keyrings/debian-archive-keyring.gpg"
|
||||
if [ $REBUILD -eq 0 ]; then
|
||||
OTHERMIRROR="deb [arch=amd64] http://downloads.scylladb.com/deb/3rdparty/$DIST $DIST scylladb/non-free"
|
||||
fi
|
||||
OTHERMIRROR="deb [arch=amd64] http://download.opensuse.org/repositories/home:/scylladb:/scylla-3rdparty-jessie/Debian_8.0/ ./"
|
||||
elif [ "$DIST" = "stretch" ]; then
|
||||
MIRRORSITE="http://deb.debian.org/debian/"
|
||||
COMPONENTS="main contrib non-free"
|
||||
DEBOOTSTRAPOPTS="--keyring=/usr/share/keyrings/debian-archive-keyring.gpg"
|
||||
OTHERMIRROR="deb [arch=amd64] http://download.opensuse.org/repositories/home:/scylladb:/scylla-3rdparty-stretch/Debian_9.0/ ./"
|
||||
else
|
||||
echo "Unknown distribution: $DIST"
|
||||
exit 1
|
||||
|
||||
4
dist/debian/rules.in
vendored
4
dist/debian/rules.in
vendored
@@ -1,7 +1,9 @@
|
||||
#!/usr/bin/make -f
|
||||
|
||||
export PYBUILD_DISABLE=1
|
||||
|
||||
override_dh_auto_configure:
|
||||
./configure.py --enable-dpdk --mode=release --static-stdc++ --static-thrift --static-boost --compiler=@@COMPILER@@ --cflags="-I/opt/scylladb/include -L/opt/scylladb/lib" --ldflags="-L/opt/scylladb/lib/x86_64-linux-gnu/"
|
||||
./configure.py --enable-dpdk --mode=release --static-thrift --static-boost --compiler=@@COMPILER@@ --cflags="-I/opt/scylladb/include -L/opt/scylladb/lib/x86-linux-gnu/" --ldflags="-Wl,-rpath=/opt/scylladb/lib"
|
||||
|
||||
override_dh_auto_build:
|
||||
PATH="/opt/scylladb/bin:$$PATH" ninja
|
||||
|
||||
4
dist/debian/ubuntu_enable_ppa.sh
vendored
Normal file
4
dist/debian/ubuntu_enable_ppa.sh
vendored
Normal file
@@ -0,0 +1,4 @@
|
||||
#!/bin/bash
|
||||
apt install -y software-properties-common
|
||||
add-apt-repository -y ppa:scylladb/ppa
|
||||
apt update
|
||||
41
dist/redhat/mock/scylla-epel-7-x86_64.cfg
vendored
41
dist/redhat/mock/scylla-epel-7-x86_64.cfg
vendored
@@ -21,53 +21,34 @@ mdpolicy=group:primary
|
||||
best=1
|
||||
|
||||
# repos
|
||||
[base]
|
||||
[scylla-centos-base]
|
||||
name=BaseOS
|
||||
mirrorlist=http://mirrorlist.centos.org/?release=7&arch=x86_64&repo=os
|
||||
failovermethod=priority
|
||||
gpgkey=file:///usr/share/distribution-gpg-keys/centos/RPM-GPG-KEY-CentOS-7
|
||||
gpgkey=http://vault.centos.org/RPM-GPG-KEY-CentOS-7
|
||||
gpgcheck=1
|
||||
|
||||
[updates]
|
||||
[scylla-centos-updates]
|
||||
name=updates
|
||||
enabled=1
|
||||
mirrorlist=http://mirrorlist.centos.org/?release=7&arch=x86_64&repo=updates
|
||||
failovermethod=priority
|
||||
gpgkey=file:///usr/share/distribution-gpg-keys/centos/RPM-GPG-KEY-CentOS-7
|
||||
gpgkey=http://vault.centos.org/RPM-GPG-KEY-CentOS-7
|
||||
gpgcheck=1
|
||||
|
||||
[epel]
|
||||
name=epel
|
||||
mirrorlist=http://mirrors.fedoraproject.org/mirrorlist?repo=epel-7&arch=x86_64
|
||||
failovermethod=priority
|
||||
gpgkey=file:///usr/share/distribution-gpg-keys/epel/RPM-GPG-KEY-EPEL-7
|
||||
gpgcheck=1
|
||||
|
||||
[extras]
|
||||
[scylla-centos-extras]
|
||||
name=extras
|
||||
mirrorlist=http://mirrorlist.centos.org/?release=7&arch=x86_64&repo=extras
|
||||
failovermethod=priority
|
||||
gpgkey=file:///usr/share/distribution-gpg-keys/centos/RPM-GPG-KEY-CentOS-7
|
||||
gpgkey=http://vault.centos.org/RPM-GPG-KEY-CentOS-7
|
||||
gpgcheck=1
|
||||
|
||||
[testing]
|
||||
name=epel-testing
|
||||
enabled=0
|
||||
mirrorlist=http://mirrors.fedoraproject.org/mirrorlist?repo=testing-epel7&arch=x86_64
|
||||
[scylla-epel]
|
||||
name=epel
|
||||
mirrorlist=http://mirrors.fedoraproject.org/mirrorlist?repo=epel-7&arch=x86_64
|
||||
failovermethod=priority
|
||||
|
||||
|
||||
[local]
|
||||
name=local
|
||||
baseurl=https://kojipkgs.fedoraproject.org/repos/epel7-build/latest/x86_64/
|
||||
cost=2000
|
||||
enabled=0
|
||||
|
||||
[epel-debuginfo]
|
||||
name=epel-debug
|
||||
mirrorlist=http://mirrors.fedoraproject.org/mirrorlist?repo=epel-debug-7&arch=x86_64
|
||||
failovermethod=priority
|
||||
enabled=0
|
||||
gpgkey=https://dl.fedoraproject.org/pub/epel/RPM-GPG-KEY-EPEL-7
|
||||
gpgcheck=1
|
||||
|
||||
[scylladb-scylla-3rdparty]
|
||||
name=Copr repo for scylla-3rdparty owned by scylladb
|
||||
|
||||
8
dist/redhat/scylla.spec.in
vendored
8
dist/redhat/scylla.spec.in
vendored
@@ -7,14 +7,14 @@ Group: Applications/Databases
|
||||
License: AGPLv3
|
||||
URL: http://www.scylladb.com/
|
||||
Source0: %{name}-@@VERSION@@-@@RELEASE@@.tar
|
||||
Requires: scylla-server = @@VERSION@@ scylla-jmx = @@VERSION@@ scylla-tools = @@VERSION@@ scylla-kernel-conf = @@VERSION@@ scylla-libgcc72 scylla-libstdc++72
|
||||
Requires: scylla-server = @@VERSION@@ scylla-jmx = @@VERSION@@ scylla-tools = @@VERSION@@ scylla-tools-core = @@VERSION@@ scylla-kernel-conf = @@VERSION@@ scylla-libgcc72 scylla-libstdc++72
|
||||
Obsoletes: scylla-server < 1.1
|
||||
|
||||
%description
|
||||
Scylla is a highly scalable, eventually consistent, distributed,
|
||||
partitioned row DB.
|
||||
This package installs all required packages for ScyllaDB, including
|
||||
scylla-server, scylla-jmx, scylla-tools.
|
||||
scylla-server, scylla-jmx, scylla-tools, scylla-tools-core.
|
||||
|
||||
# this is needed to prevent python compilation error on CentOS (#2235)
|
||||
%if 0%{?rhel}
|
||||
@@ -78,6 +78,10 @@ python3.4 ./configure.py --enable-dpdk --mode=release --static-boost --compiler=
|
||||
ninja-build %{?_smp_mflags} build/release/scylla build/release/iotune
|
||||
cp dist/common/systemd/scylla-server.service.in build/scylla-server.service
|
||||
sed -i -e "s#@@SYSCONFDIR@@#/etc/sysconfig#g" build/scylla-server.service
|
||||
cp dist/common/systemd/scylla-housekeeping-restart.service.in build/scylla-housekeeping-restart.service
|
||||
sed -i -e "s#@@REPOFILES@@#'/etc/yum.repos.d/scylla*.repo'#g" build/scylla-housekeeping-restart.service
|
||||
cp dist/common/systemd/scylla-housekeeping-daily.service.in build/scylla-housekeeping-daily.service
|
||||
sed -i -e "s#@@REPOFILES@@#'/etc/yum.repos.d/scylla*.repo'#g" build/scylla-housekeeping-daily.service
|
||||
|
||||
%install
|
||||
rm -rf $RPM_BUILD_ROOT
|
||||
|
||||
@@ -21,6 +21,7 @@
|
||||
|
||||
#include "flat_mutation_reader.hh"
|
||||
#include "mutation_reader.hh"
|
||||
#include "seastar/util/reference_wrapper.hh"
|
||||
#include <algorithm>
|
||||
|
||||
#include <boost/range/adaptor/transformed.hpp>
|
||||
@@ -43,6 +44,132 @@ void flat_mutation_reader::impl::clear_buffer_to_next_partition() {
|
||||
_buffer_size = boost::accumulate(_buffer | boost::adaptors::transformed(std::mem_fn(&mutation_fragment::memory_usage)), size_t(0));
|
||||
}
|
||||
|
||||
flat_mutation_reader flat_mutation_reader::impl::reverse_partitions(flat_mutation_reader::impl& original) {
|
||||
// FIXME: #1413 Full partitions get accumulated in memory.
|
||||
|
||||
class partition_reversing_mutation_reader final : public flat_mutation_reader::impl {
|
||||
flat_mutation_reader::impl* _source;
|
||||
range_tombstone_list _range_tombstones;
|
||||
std::stack<mutation_fragment> _mutation_fragments;
|
||||
mutation_fragment_opt _partition_end;
|
||||
private:
|
||||
stop_iteration emit_partition() {
|
||||
auto emit_range_tombstone = [&] {
|
||||
auto it = std::prev(_range_tombstones.tombstones().end());
|
||||
auto& rt = *it;
|
||||
_range_tombstones.tombstones().erase(it);
|
||||
auto rt_owner = alloc_strategy_unique_ptr<range_tombstone>(&rt);
|
||||
push_mutation_fragment(mutation_fragment(std::move(rt)));
|
||||
};
|
||||
position_in_partition::less_compare cmp(*_source->_schema);
|
||||
while (!_mutation_fragments.empty() && !is_buffer_full()) {
|
||||
auto& mf = _mutation_fragments.top();
|
||||
if (!_range_tombstones.empty() && !cmp(_range_tombstones.tombstones().rbegin()->end_position(), mf.position())) {
|
||||
emit_range_tombstone();
|
||||
} else {
|
||||
push_mutation_fragment(std::move(mf));
|
||||
_mutation_fragments.pop();
|
||||
}
|
||||
}
|
||||
while (!_range_tombstones.empty() && !is_buffer_full()) {
|
||||
emit_range_tombstone();
|
||||
}
|
||||
if (is_buffer_full()) {
|
||||
return stop_iteration::yes;
|
||||
}
|
||||
push_mutation_fragment(*std::exchange(_partition_end, stdx::nullopt));
|
||||
return stop_iteration::no;
|
||||
}
|
||||
future<stop_iteration> consume_partition_from_source() {
|
||||
if (_source->is_buffer_empty()) {
|
||||
if (_source->is_end_of_stream()) {
|
||||
_end_of_stream = true;
|
||||
return make_ready_future<stop_iteration>(stop_iteration::yes);
|
||||
}
|
||||
return _source->fill_buffer().then([] { return stop_iteration::no; });
|
||||
}
|
||||
while (!_source->is_buffer_empty() && !is_buffer_full()) {
|
||||
auto mf = _source->pop_mutation_fragment();
|
||||
if (mf.is_partition_start() || mf.is_static_row()) {
|
||||
push_mutation_fragment(std::move(mf));
|
||||
} else if (mf.is_end_of_partition()) {
|
||||
_partition_end = std::move(mf);
|
||||
if (emit_partition()) {
|
||||
return make_ready_future<stop_iteration>(stop_iteration::yes);
|
||||
}
|
||||
} else if (mf.is_range_tombstone()) {
|
||||
_range_tombstones.apply(*_source->_schema, std::move(mf.as_range_tombstone()));
|
||||
} else {
|
||||
_mutation_fragments.emplace(std::move(mf));
|
||||
}
|
||||
}
|
||||
return make_ready_future<stop_iteration>(is_buffer_full());
|
||||
}
|
||||
public:
|
||||
explicit partition_reversing_mutation_reader(flat_mutation_reader::impl& mr)
|
||||
: flat_mutation_reader::impl(mr._schema)
|
||||
, _source(&mr)
|
||||
, _range_tombstones(*mr._schema)
|
||||
{ }
|
||||
|
||||
virtual future<> fill_buffer() override {
|
||||
return repeat([&] {
|
||||
if (_partition_end) {
|
||||
// We have consumed full partition from source, now it is
|
||||
// time to emit it.
|
||||
auto stop = emit_partition();
|
||||
if (stop) {
|
||||
return make_ready_future<stop_iteration>(stop_iteration::yes);
|
||||
}
|
||||
}
|
||||
return consume_partition_from_source();
|
||||
});
|
||||
}
|
||||
|
||||
virtual void next_partition() override {
|
||||
clear_buffer_to_next_partition();
|
||||
if (is_buffer_empty() && !is_end_of_stream()) {
|
||||
while (!_mutation_fragments.empty()) {
|
||||
_mutation_fragments.pop();
|
||||
}
|
||||
_range_tombstones.clear();
|
||||
_partition_end = stdx::nullopt;
|
||||
_source->next_partition();
|
||||
}
|
||||
}
|
||||
|
||||
virtual future<> fast_forward_to(const dht::partition_range&) override {
|
||||
throw std::bad_function_call();
|
||||
}
|
||||
|
||||
virtual future<> fast_forward_to(position_range) override {
|
||||
throw std::bad_function_call();
|
||||
}
|
||||
};
|
||||
|
||||
return make_flat_mutation_reader<partition_reversing_mutation_reader>(original);
|
||||
}
|
||||
|
||||
template<typename Source>
|
||||
future<bool> flat_mutation_reader::impl::fill_buffer_from(Source& source) {
|
||||
if (source.is_buffer_empty()) {
|
||||
if (source.is_end_of_stream()) {
|
||||
return make_ready_future<bool>(true);
|
||||
}
|
||||
return source.fill_buffer().then([this, &source] {
|
||||
return fill_buffer_from(source);
|
||||
});
|
||||
} else {
|
||||
while (!source.is_buffer_empty() && !is_buffer_full()) {
|
||||
push_mutation_fragment(source.pop_mutation_fragment());
|
||||
}
|
||||
return make_ready_future<bool>(source.is_end_of_stream() && source.is_buffer_empty());
|
||||
}
|
||||
}
|
||||
|
||||
template future<bool> flat_mutation_reader::impl::fill_buffer_from<streamed_mutation>(streamed_mutation&);
|
||||
template future<bool> flat_mutation_reader::impl::fill_buffer_from<flat_mutation_reader>(flat_mutation_reader&);
|
||||
|
||||
flat_mutation_reader flat_mutation_reader_from_mutation_reader(schema_ptr s, mutation_reader&& legacy_reader, streamed_mutation::forwarding fwd) {
|
||||
class converting_reader final : public flat_mutation_reader::impl {
|
||||
mutation_reader _legacy_reader;
|
||||
@@ -77,21 +204,11 @@ flat_mutation_reader flat_mutation_reader_from_mutation_reader(schema_ptr s, mut
|
||||
if (!_sm) {
|
||||
return get_next_sm();
|
||||
} else {
|
||||
if (_sm->is_buffer_empty()) {
|
||||
if (_sm->is_end_of_stream()) {
|
||||
on_sm_finished();
|
||||
return make_ready_future<>();
|
||||
}
|
||||
return _sm->fill_buffer();
|
||||
} else {
|
||||
while (!_sm->is_buffer_empty() && !is_buffer_full()) {
|
||||
this->push_mutation_fragment(_sm->pop_mutation_fragment());
|
||||
}
|
||||
if (_sm->is_end_of_stream() && _sm->is_buffer_empty()) {
|
||||
return fill_buffer_from(*_sm).then([this] (bool sm_finished) {
|
||||
if (sm_finished) {
|
||||
on_sm_finished();
|
||||
}
|
||||
return make_ready_future<>();
|
||||
}
|
||||
});
|
||||
}
|
||||
});
|
||||
}
|
||||
@@ -119,13 +236,45 @@ flat_mutation_reader flat_mutation_reader_from_mutation_reader(schema_ptr s, mut
|
||||
if (_sm) {
|
||||
return _sm->fast_forward_to(std::move(cr));
|
||||
} else {
|
||||
throw std::runtime_error("fast forward needs _sm to be set");
|
||||
_end_of_stream = true;
|
||||
return make_ready_future<>();
|
||||
}
|
||||
};
|
||||
};
|
||||
return make_flat_mutation_reader<converting_reader>(std::move(s), std::move(legacy_reader), fwd);
|
||||
}
|
||||
|
||||
flat_mutation_reader make_delegating_reader(flat_mutation_reader& r) {
|
||||
class reader : public flat_mutation_reader::impl {
|
||||
reference_wrapper<flat_mutation_reader> _underlying;
|
||||
public:
|
||||
reader(flat_mutation_reader& r) : impl(r.schema()), _underlying(ref(r)) { }
|
||||
virtual future<> fill_buffer() override {
|
||||
return fill_buffer_from(_underlying.get()).then([this] (bool underlying_finished) {
|
||||
_end_of_stream = underlying_finished;
|
||||
});
|
||||
}
|
||||
virtual future<> fast_forward_to(position_range pr) override {
|
||||
_end_of_stream = false;
|
||||
forward_buffer_to(pr.start());
|
||||
return _underlying.get().fast_forward_to(std::move(pr));
|
||||
}
|
||||
virtual void next_partition() override {
|
||||
clear_buffer_to_next_partition();
|
||||
if (is_buffer_empty()) {
|
||||
_underlying.get().next_partition();
|
||||
}
|
||||
_end_of_stream = _underlying.get().is_end_of_stream() && _underlying.get().is_buffer_empty();
|
||||
}
|
||||
virtual future<> fast_forward_to(const dht::partition_range& pr) override {
|
||||
_end_of_stream = false;
|
||||
clear_buffer();
|
||||
return _underlying.get().fast_forward_to(pr);
|
||||
}
|
||||
};
|
||||
return make_flat_mutation_reader<reader>(r);
|
||||
}
|
||||
|
||||
flat_mutation_reader make_forwardable(flat_mutation_reader m) {
|
||||
class reader : public flat_mutation_reader::impl {
|
||||
flat_mutation_reader _underlying;
|
||||
@@ -190,6 +339,7 @@ flat_mutation_reader make_forwardable(flat_mutation_reader m) {
|
||||
};
|
||||
}
|
||||
virtual future<> fast_forward_to(const dht::partition_range& pr) override {
|
||||
_end_of_stream = false;
|
||||
clear_buffer();
|
||||
_next = {};
|
||||
return _underlying.fast_forward_to(pr);
|
||||
@@ -198,6 +348,65 @@ flat_mutation_reader make_forwardable(flat_mutation_reader m) {
|
||||
return make_flat_mutation_reader<reader>(std::move(m));
|
||||
}
|
||||
|
||||
flat_mutation_reader make_nonforwardable(flat_mutation_reader r, bool single_partition) {
|
||||
class reader : public flat_mutation_reader::impl {
|
||||
flat_mutation_reader _underlying;
|
||||
bool _single_partition;
|
||||
bool _static_row_done = false;
|
||||
bool is_end_end_of_underlying_stream() const {
|
||||
return _underlying.is_buffer_empty() && _underlying.is_end_of_stream();
|
||||
}
|
||||
future<> on_end_of_underlying_stream() {
|
||||
if (!_static_row_done) {
|
||||
_static_row_done = true;
|
||||
return _underlying.fast_forward_to(position_range::all_clustered_rows());
|
||||
}
|
||||
push_mutation_fragment(partition_end());
|
||||
if (_single_partition) {
|
||||
_end_of_stream = true;
|
||||
return make_ready_future<>();
|
||||
}
|
||||
_underlying.next_partition();
|
||||
_static_row_done = false;
|
||||
return _underlying.fill_buffer().then([this] {
|
||||
_end_of_stream = is_end_end_of_underlying_stream();
|
||||
});
|
||||
}
|
||||
public:
|
||||
reader(flat_mutation_reader r, bool single_partition)
|
||||
: impl(r.schema())
|
||||
, _underlying(std::move(r))
|
||||
, _single_partition(single_partition)
|
||||
{ }
|
||||
virtual future<> fill_buffer() override {
|
||||
return do_until([this] { return is_end_of_stream() || is_buffer_full(); }, [this] {
|
||||
return fill_buffer_from(_underlying).then([this] (bool underlying_finished) {
|
||||
if (underlying_finished) {
|
||||
return on_end_of_underlying_stream();
|
||||
}
|
||||
return make_ready_future<>();
|
||||
});
|
||||
});
|
||||
}
|
||||
virtual future<> fast_forward_to(position_range pr) override {
|
||||
throw std::bad_function_call();
|
||||
}
|
||||
virtual void next_partition() override {
|
||||
clear_buffer_to_next_partition();
|
||||
if (is_buffer_empty()) {
|
||||
_underlying.next_partition();
|
||||
}
|
||||
_end_of_stream = is_end_end_of_underlying_stream();
|
||||
}
|
||||
virtual future<> fast_forward_to(const dht::partition_range& pr) override {
|
||||
_end_of_stream = false;
|
||||
clear_buffer();
|
||||
return _underlying.fast_forward_to(pr);
|
||||
}
|
||||
};
|
||||
return make_flat_mutation_reader<reader>(std::move(r), single_partition);
|
||||
}
|
||||
|
||||
class empty_flat_reader final : public flat_mutation_reader::impl {
|
||||
public:
|
||||
empty_flat_reader(schema_ptr s) : impl(std::move(s)) { _end_of_stream = true; }
|
||||
@@ -373,12 +582,12 @@ private:
|
||||
public:
|
||||
flat_multi_range_mutation_reader(schema_ptr s, mutation_source source, const ranges_vector& ranges,
|
||||
const query::partition_slice& slice, const io_priority_class& pc,
|
||||
tracing::trace_state_ptr trace_state, streamed_mutation::forwarding fwd,
|
||||
tracing::trace_state_ptr trace_state,
|
||||
mutation_reader::forwarding fwd_mr)
|
||||
: impl(s)
|
||||
, _ranges(ranges)
|
||||
, _current_range(_ranges.begin())
|
||||
, _reader(source.make_flat_mutation_reader(s, *_current_range, slice, pc, trace_state, fwd,
|
||||
, _reader(source.make_flat_mutation_reader(s, *_current_range, slice, pc, trace_state, streamed_mutation::forwarding::no,
|
||||
_ranges.size() > 1 ? mutation_reader::forwarding::yes : fwd_mr))
|
||||
{
|
||||
}
|
||||
@@ -412,20 +621,23 @@ public:
|
||||
}
|
||||
|
||||
virtual future<> fast_forward_to(position_range pr) override {
|
||||
return _reader.fast_forward_to(std::move(pr));
|
||||
throw std::bad_function_call();
|
||||
}
|
||||
|
||||
virtual void next_partition() override {
|
||||
return _reader.next_partition();
|
||||
clear_buffer_to_next_partition();
|
||||
if (is_buffer_empty() && !is_end_of_stream()) {
|
||||
_reader.next_partition();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
flat_mutation_reader
|
||||
make_flat_multi_range_reader(schema_ptr s, mutation_source source, const dht::partition_range_vector& ranges,
|
||||
const query::partition_slice& slice, const io_priority_class& pc,
|
||||
tracing::trace_state_ptr trace_state, streamed_mutation::forwarding fwd,
|
||||
tracing::trace_state_ptr trace_state,
|
||||
mutation_reader::forwarding fwd_mr)
|
||||
{
|
||||
return make_flat_mutation_reader<flat_multi_range_mutation_reader>(std::move(s), std::move(source), ranges,
|
||||
slice, pc, std::move(trace_state), fwd, fwd_mr);
|
||||
slice, pc, std::move(trace_state), fwd_mr);
|
||||
}
|
||||
|
||||
@@ -52,6 +52,11 @@ GCC6_CONCEPT(
|
||||
obj.consume_end_of_partition();
|
||||
};
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
concept bool PartitionFilter = requires(T filter, const dht::decorated_key& dk) {
|
||||
{ filter(dk) } -> bool;
|
||||
};
|
||||
)
|
||||
|
||||
/*
|
||||
@@ -68,15 +73,27 @@ GCC6_CONCEPT(
|
||||
*/
|
||||
class flat_mutation_reader final {
|
||||
public:
|
||||
// Causes a stream of reversed mutations to be emitted.
|
||||
// 1. Static row is still emitted first.
|
||||
// 2. Range tombstones are ordered by their end position.
|
||||
// 3. Clustered rows and range tombstones are emitted in descending order.
|
||||
// Because of 2 and 3 the guarantee that a range tombstone is emitted before
|
||||
// any mutation fragment affected by it still holds.
|
||||
// Ordering of partitions themselves remains unchanged.
|
||||
using consume_reversed_partitions = seastar::bool_class<class consume_reversed_partitions_tag>;
|
||||
|
||||
class impl {
|
||||
private:
|
||||
circular_buffer<mutation_fragment> _buffer;
|
||||
size_t _buffer_size = 0;
|
||||
bool _consume_done = false;
|
||||
protected:
|
||||
static constexpr size_t max_buffer_size_in_bytes = 8 * 1024;
|
||||
size_t max_buffer_size_in_bytes = 8 * 1024;
|
||||
bool _end_of_stream = false;
|
||||
schema_ptr _schema;
|
||||
friend class flat_mutation_reader;
|
||||
template <typename Source>
|
||||
friend future<bool> fill_buffer_from(flat_mutation_reader::impl&, Source&);
|
||||
protected:
|
||||
template<typename... Args>
|
||||
void push_mutation_fragment(Args&&... args) {
|
||||
@@ -89,6 +106,16 @@ public:
|
||||
}
|
||||
void forward_buffer_to(const position_in_partition& pos);
|
||||
void clear_buffer_to_next_partition();
|
||||
template<typename Source>
|
||||
future<bool> fill_buffer_from(Source&);
|
||||
// When succeeds, makes sure that the next push_mutation_fragment() will not fail.
|
||||
void reserve_one() {
|
||||
if (_buffer.capacity() == _buffer.size()) {
|
||||
_buffer.reserve(_buffer.size() * 2 + 1);
|
||||
}
|
||||
}
|
||||
private:
|
||||
static flat_mutation_reader reverse_partitions(flat_mutation_reader::impl&);
|
||||
public:
|
||||
impl(schema_ptr s) : _schema(std::move(s)) { }
|
||||
virtual ~impl() {}
|
||||
@@ -135,6 +162,79 @@ public:
|
||||
});
|
||||
}
|
||||
|
||||
template<typename Consumer, typename Filter>
|
||||
GCC6_CONCEPT(
|
||||
requires FlatMutationReaderConsumer<Consumer>() && PartitionFilter<Filter>
|
||||
)
|
||||
// A variant of consume_pausable() that expects to be run in
|
||||
// a seastar::thread.
|
||||
// Partitions for which filter(decorated_key) returns false are skipped
|
||||
// entirely and never reach the consumer.
|
||||
void consume_pausable_in_thread(Consumer consumer, Filter filter) {
|
||||
while (true) {
|
||||
if (is_buffer_empty()) {
|
||||
if (is_end_of_stream()) {
|
||||
return;
|
||||
}
|
||||
fill_buffer().get();
|
||||
continue;
|
||||
}
|
||||
auto mf = pop_mutation_fragment();
|
||||
if (mf.is_partition_start() && !filter(mf.as_partition_start().key())) {
|
||||
next_partition();
|
||||
continue;
|
||||
}
|
||||
if (consumer(std::move(mf)) == stop_iteration::yes) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
private:
|
||||
template<typename Consumer>
|
||||
struct consumer_adapter {
|
||||
flat_mutation_reader::impl& _reader;
|
||||
stdx::optional<dht::decorated_key> _decorated_key;
|
||||
Consumer _consumer;
|
||||
consumer_adapter(flat_mutation_reader::impl& reader, Consumer c)
|
||||
: _reader(reader)
|
||||
, _consumer(std::move(c))
|
||||
{ }
|
||||
stop_iteration operator()(mutation_fragment&& mf) {
|
||||
return std::move(mf).consume(*this);
|
||||
}
|
||||
stop_iteration consume(static_row&& sr) {
|
||||
return handle_result(_consumer.consume(std::move(sr)));
|
||||
}
|
||||
stop_iteration consume(clustering_row&& cr) {
|
||||
return handle_result(_consumer.consume(std::move(cr)));
|
||||
}
|
||||
stop_iteration consume(range_tombstone&& rt) {
|
||||
return handle_result(_consumer.consume(std::move(rt)));
|
||||
}
|
||||
stop_iteration consume(partition_start&& ps) {
|
||||
_decorated_key.emplace(std::move(ps.key()));
|
||||
_consumer.consume_new_partition(*_decorated_key);
|
||||
if (ps.partition_tombstone()) {
|
||||
_consumer.consume(ps.partition_tombstone());
|
||||
}
|
||||
return stop_iteration::no;
|
||||
}
|
||||
stop_iteration consume(partition_end&& pe) {
|
||||
return _consumer.consume_end_of_partition();
|
||||
}
|
||||
private:
|
||||
stop_iteration handle_result(stop_iteration si) {
|
||||
if (si) {
|
||||
if (_consumer.consume_end_of_partition()) {
|
||||
return stop_iteration::yes;
|
||||
}
|
||||
_reader.next_partition();
|
||||
}
|
||||
return stop_iteration::no;
|
||||
}
|
||||
};
|
||||
public:
|
||||
template<typename Consumer>
|
||||
GCC6_CONCEPT(
|
||||
requires FlattenedConsumer<Consumer>()
|
||||
@@ -144,64 +244,46 @@ public:
|
||||
// When consumer returns stop_iteration::yes from methods other than consume_end_of_partition then the read
|
||||
// of the current partition is ended, consume_end_of_partition is called and if it returns stop_iteration::no
|
||||
// then the read moves to the next partition.
|
||||
// Reference to the decorated key that is passed to consume_new_partition() remains valid until after
|
||||
// the call to consume_end_of_partition().
|
||||
//
|
||||
// This method is useful because most of current consumers use this semantic.
|
||||
//
|
||||
//
|
||||
// This method returns whatever is returned from Consumer::consume_end_of_stream().S
|
||||
auto consume(Consumer consumer) {
|
||||
struct consumer_adapter {
|
||||
flat_mutation_reader::impl& _reader;
|
||||
Consumer _consumer;
|
||||
consumer_adapter(flat_mutation_reader::impl& reader, Consumer c)
|
||||
: _reader(reader)
|
||||
, _consumer(std::move(c))
|
||||
{ }
|
||||
stop_iteration operator()(mutation_fragment&& mf) {
|
||||
return std::move(mf).consume(*this);
|
||||
}
|
||||
stop_iteration consume(static_row&& sr) {
|
||||
return handle_result(_consumer.consume(std::move(sr)));
|
||||
}
|
||||
stop_iteration consume(clustering_row&& cr) {
|
||||
return handle_result(_consumer.consume(std::move(cr)));
|
||||
}
|
||||
stop_iteration consume(range_tombstone&& rt) {
|
||||
return handle_result(_consumer.consume(std::move(rt)));
|
||||
}
|
||||
stop_iteration consume(partition_start&& ps) {
|
||||
_consumer.consume_new_partition(ps.key());
|
||||
if (ps.partition_tombstone()) {
|
||||
_consumer.consume(ps.partition_tombstone());
|
||||
}
|
||||
return stop_iteration::no;
|
||||
}
|
||||
stop_iteration consume(partition_end&& pe) {
|
||||
return _consumer.consume_end_of_partition();
|
||||
}
|
||||
private:
|
||||
stop_iteration handle_result(stop_iteration si) {
|
||||
if (si) {
|
||||
if (_consumer.consume_end_of_partition()) {
|
||||
return stop_iteration::yes;
|
||||
}
|
||||
_reader.next_partition();
|
||||
}
|
||||
return stop_iteration::no;
|
||||
}
|
||||
};
|
||||
return do_with(consumer_adapter(*this, std::move(consumer)), [this] (consumer_adapter& adapter) {
|
||||
return do_with(consumer_adapter<Consumer>(*this, std::move(consumer)), [this] (consumer_adapter<Consumer>& adapter) {
|
||||
return consume_pausable(std::ref(adapter)).then([this, &adapter] {
|
||||
return adapter._consumer.consume_end_of_stream();
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
template<typename Consumer, typename Filter>
|
||||
GCC6_CONCEPT(
|
||||
requires FlattenedConsumer<Consumer>() && PartitionFilter<Filter>
|
||||
)
|
||||
// A variant of consumee() that expects to be run in a seastar::thread.
|
||||
// Partitions for which filter(decorated_key) returns false are skipped
|
||||
// entirely and never reach the consumer.
|
||||
auto consume_in_thread(Consumer consumer, Filter filter) {
|
||||
auto adapter = consumer_adapter<Consumer>(*this, std::move(consumer));
|
||||
consume_pausable_in_thread(std::ref(adapter), std::move(filter));
|
||||
return adapter._consumer.consume_end_of_stream();
|
||||
};
|
||||
|
||||
/*
|
||||
* fast_forward_to is forbidden on flat_mutation_reader created for a single partition.
|
||||
*/
|
||||
virtual future<> fast_forward_to(const dht::partition_range&) = 0;
|
||||
virtual future<> fast_forward_to(position_range) = 0;
|
||||
};
|
||||
private:
|
||||
std::unique_ptr<impl> _impl;
|
||||
|
||||
flat_mutation_reader() = default;
|
||||
explicit operator bool() const noexcept { return bool(_impl); }
|
||||
friend class optimized_optional<flat_mutation_reader>;
|
||||
public:
|
||||
// Documented in mutation_reader::forwarding in mutation_reader.hh.
|
||||
class partition_range_forwarding_tag;
|
||||
@@ -225,10 +307,31 @@ public:
|
||||
GCC6_CONCEPT(
|
||||
requires FlattenedConsumer<Consumer>()
|
||||
)
|
||||
auto consume(Consumer consumer) {
|
||||
auto consume(Consumer consumer, consume_reversed_partitions reversed = consume_reversed_partitions::no) {
|
||||
if (reversed) {
|
||||
return do_with(impl::reverse_partitions(*_impl), [&] (auto& reversed_partition_stream) {
|
||||
return reversed_partition_stream._impl->consume(std::move(consumer));
|
||||
});
|
||||
}
|
||||
return _impl->consume(std::move(consumer));
|
||||
}
|
||||
|
||||
template<typename Consumer, typename Filter>
|
||||
GCC6_CONCEPT(
|
||||
requires FlattenedConsumer<Consumer>() && PartitionFilter<Filter>
|
||||
)
|
||||
auto consume_in_thread(Consumer consumer, Filter filter) {
|
||||
return _impl->consume_in_thread(std::move(consumer), std::move(filter));
|
||||
}
|
||||
|
||||
template<typename Consumer>
|
||||
GCC6_CONCEPT(
|
||||
requires FlattenedConsumer<Consumer>()
|
||||
)
|
||||
auto consume_in_thread(Consumer consumer) {
|
||||
return consume_in_thread(std::move(consumer), [] (const dht::decorated_key&) { return true; });
|
||||
}
|
||||
|
||||
void next_partition() { _impl->next_partition(); }
|
||||
|
||||
future<> fill_buffer() { return _impl->fill_buffer(); }
|
||||
@@ -271,8 +374,17 @@ public:
|
||||
bool is_buffer_full() const { return _impl->is_buffer_full(); }
|
||||
mutation_fragment pop_mutation_fragment() { return _impl->pop_mutation_fragment(); }
|
||||
const schema_ptr& schema() const { return _impl->_schema; }
|
||||
void set_max_buffer_size(size_t size) {
|
||||
_impl->max_buffer_size_in_bytes = size;
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct move_constructor_disengages<flat_mutation_reader> {
|
||||
enum { value = true };
|
||||
};
|
||||
using flat_mutation_reader_opt = optimized_optional<flat_mutation_reader>;
|
||||
|
||||
template<typename Impl, typename... Args>
|
||||
flat_mutation_reader make_flat_mutation_reader(Args &&... args) {
|
||||
return flat_mutation_reader(std::make_unique<Impl>(std::forward<Args>(args)...));
|
||||
@@ -280,10 +392,89 @@ flat_mutation_reader make_flat_mutation_reader(Args &&... args) {
|
||||
|
||||
class mutation_reader;
|
||||
|
||||
// Consumes mutation fragments until StopCondition is true.
|
||||
// The consumer will stop iff StopCondition returns true, in particular
|
||||
// reaching the end of stream alone won't stop the reader.
|
||||
template<typename StopCondition, typename ConsumeMutationFragment, typename ConsumeEndOfStream>
|
||||
GCC6_CONCEPT(requires requires(StopCondition stop, ConsumeMutationFragment consume_mf, ConsumeEndOfStream consume_eos, mutation_fragment mf) {
|
||||
{ stop() } -> bool;
|
||||
{ consume_mf(std::move(mf)) } -> void;
|
||||
{ consume_eos() } -> future<>;
|
||||
})
|
||||
future<> consume_mutation_fragments_until(flat_mutation_reader& r, StopCondition&& stop,
|
||||
ConsumeMutationFragment&& consume_mf, ConsumeEndOfStream&& consume_eos) {
|
||||
return do_until([stop] { return stop(); }, [&r, stop, consume_mf, consume_eos] {
|
||||
while (!r.is_buffer_empty()) {
|
||||
consume_mf(r.pop_mutation_fragment());
|
||||
if (stop()) {
|
||||
return make_ready_future<>();
|
||||
}
|
||||
}
|
||||
if (r.is_end_of_stream()) {
|
||||
return consume_eos();
|
||||
}
|
||||
return r.fill_buffer();
|
||||
});
|
||||
}
|
||||
|
||||
// Creates a stream which is like r but with transformation applied to the elements.
|
||||
template<typename T>
|
||||
GCC6_CONCEPT(
|
||||
requires StreamedMutationTranformer<T>()
|
||||
)
|
||||
flat_mutation_reader transform(flat_mutation_reader r, T t) {
|
||||
class transforming_reader : public flat_mutation_reader::impl {
|
||||
flat_mutation_reader _reader;
|
||||
T _t;
|
||||
struct consumer {
|
||||
transforming_reader* _owner;
|
||||
stop_iteration operator()(mutation_fragment&& mf) {
|
||||
_owner->push_mutation_fragment(_owner->_t(std::move(mf)));
|
||||
return stop_iteration(_owner->is_buffer_full());
|
||||
}
|
||||
};
|
||||
public:
|
||||
transforming_reader(flat_mutation_reader&& r, T&& t)
|
||||
: impl(t(r.schema()))
|
||||
, _reader(std::move(r))
|
||||
, _t(std::move(t))
|
||||
{}
|
||||
virtual future<> fill_buffer() override {
|
||||
if (_end_of_stream) {
|
||||
return make_ready_future<>();
|
||||
}
|
||||
return _reader.consume_pausable(consumer{this}).then([this] {
|
||||
if (_reader.is_end_of_stream() && _reader.is_buffer_empty()) {
|
||||
_end_of_stream = true;
|
||||
}
|
||||
});
|
||||
}
|
||||
virtual void next_partition() override {
|
||||
clear_buffer_to_next_partition();
|
||||
if (is_buffer_empty()) {
|
||||
_reader.next_partition();
|
||||
}
|
||||
}
|
||||
virtual future<> fast_forward_to(const dht::partition_range& pr) override {
|
||||
clear_buffer();
|
||||
_end_of_stream = false;
|
||||
return _reader.fast_forward_to(pr);
|
||||
}
|
||||
virtual future<> fast_forward_to(position_range pr) override {
|
||||
throw std::bad_function_call();
|
||||
}
|
||||
};
|
||||
return make_flat_mutation_reader<transforming_reader>(std::move(r), std::move(t));
|
||||
}
|
||||
|
||||
flat_mutation_reader flat_mutation_reader_from_mutation_reader(schema_ptr, mutation_reader&&, streamed_mutation::forwarding);
|
||||
|
||||
flat_mutation_reader make_delegating_reader(flat_mutation_reader&);
|
||||
|
||||
flat_mutation_reader make_forwardable(flat_mutation_reader m);
|
||||
|
||||
flat_mutation_reader make_nonforwardable(flat_mutation_reader, bool);
|
||||
|
||||
flat_mutation_reader make_empty_flat_reader(schema_ptr s);
|
||||
|
||||
flat_mutation_reader flat_mutation_reader_from_mutations(std::vector<mutation>, streamed_mutation::forwarding fwd = streamed_mutation::forwarding::no);
|
||||
@@ -291,5 +482,27 @@ flat_mutation_reader flat_mutation_reader_from_mutations(std::vector<mutation>,
|
||||
flat_mutation_reader
|
||||
make_flat_multi_range_reader(schema_ptr s, mutation_source source, const dht::partition_range_vector& ranges,
|
||||
const query::partition_slice& slice, const io_priority_class& pc = default_priority_class(),
|
||||
tracing::trace_state_ptr trace_state = nullptr, streamed_mutation::forwarding fwd = streamed_mutation::forwarding::no,
|
||||
tracing::trace_state_ptr trace_state = nullptr,
|
||||
flat_mutation_reader::partition_range_forwarding fwd_mr = flat_mutation_reader::partition_range_forwarding::yes);
|
||||
|
||||
// Calls the consumer for each element of the reader's stream until end of stream
|
||||
// is reached or the consumer requests iteration to stop by returning stop_iteration::yes.
|
||||
// The consumer should accept mutation as the argument and return stop_iteration.
|
||||
// The returned future<> resolves when consumption ends.
|
||||
template <typename Consumer>
|
||||
inline
|
||||
future<> consume_partitions(flat_mutation_reader& reader, Consumer consumer) {
|
||||
static_assert(std::is_same<future<stop_iteration>, futurize_t<std::result_of_t<Consumer(mutation&&)>>>::value, "bad Consumer signature");
|
||||
using futurator = futurize<std::result_of_t<Consumer(mutation&&)>>;
|
||||
|
||||
return do_with(std::move(consumer), [&reader] (Consumer& c) -> future<> {
|
||||
return repeat([&reader, &c] () {
|
||||
return read_mutation_from_flat_mutation_reader(reader).then([&c] (mutation_opt&& mo) -> future<stop_iteration> {
|
||||
if (!mo) {
|
||||
return make_ready_future<stop_iteration>(stop_iteration::yes);
|
||||
}
|
||||
return futurator::apply(c, std::move(*mo));
|
||||
});
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
@@ -149,9 +149,6 @@ stop_iteration streamed_mutation_freezer::consume(clustering_row&& cr) {
|
||||
}
|
||||
|
||||
stop_iteration streamed_mutation_freezer::consume(range_tombstone&& rt) {
|
||||
if (_reversed) {
|
||||
rt.flip();
|
||||
}
|
||||
_rts.apply(_schema, std::move(rt));
|
||||
return stop_iteration::no;
|
||||
}
|
||||
|
||||
2
main.cc
2
main.cc
@@ -499,7 +499,7 @@ int main(int ac, char** av) {
|
||||
auto prio = get_or_default(ssl_opts, "priority_string", sstring());
|
||||
auto clauth = is_true(get_or_default(ssl_opts, "require_client_auth", "false"));
|
||||
if (cluster_name.empty()) {
|
||||
cluster_name = "ScyllaDB Cluster";
|
||||
cluster_name = "Test Cluster";
|
||||
startlog.warn("Using default cluster name is not recommended. Using a unique cluster name will reduce the chance of adding nodes to the wrong cluster by mistake");
|
||||
}
|
||||
init_ms_fd_gossiper(listen_address
|
||||
|
||||
80
memtable.cc
80
memtable.cc
@@ -428,55 +428,75 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
class flush_reader final : public mutation_reader::impl, private iterator_reader {
|
||||
class flush_reader final : public flat_mutation_reader::impl, private iterator_reader {
|
||||
// FIXME: Similarly to scanning_reader we have an underlying
|
||||
// flat_mutation_reader for each partition. This is suboptimal.
|
||||
// Partition snapshot reader should be devirtualised and called directly
|
||||
// without using any intermediate buffers.
|
||||
flat_mutation_reader_opt _partition_reader;
|
||||
flush_memory_accounter _flushed_memory;
|
||||
public:
|
||||
flush_reader(schema_ptr s, lw_shared_ptr<memtable> m)
|
||||
: iterator_reader(std::move(s), m, query::full_partition_range)
|
||||
: impl(s)
|
||||
, iterator_reader(std::move(s), m, query::full_partition_range)
|
||||
, _flushed_memory(*m)
|
||||
{}
|
||||
flush_reader(const flush_reader&) = delete;
|
||||
flush_reader(flush_reader&&) = delete;
|
||||
flush_reader& operator=(flush_reader&&) = delete;
|
||||
flush_reader& operator=(const flush_reader&) = delete;
|
||||
|
||||
virtual future<streamed_mutation_opt> operator()() override {
|
||||
private:
|
||||
void get_next_partition() {
|
||||
return read_section()(region(), [&] {
|
||||
return with_linearized_managed_bytes([&] {
|
||||
memtable_entry* e = fetch_entry();
|
||||
if (!e) {
|
||||
return make_ready_future<streamed_mutation_opt>(stdx::nullopt);
|
||||
} else {
|
||||
if (e) {
|
||||
auto cr = query::clustering_key_filter_ranges::get_ranges(*schema(), schema()->full_slice(), e->key().key());
|
||||
auto snp = e->partition().read(region(), schema());
|
||||
auto mpsr = make_partition_snapshot_reader<partition_snapshot_accounter>(schema(), e->key(), std::move(cr),
|
||||
auto mpsr = make_partition_snapshot_flat_reader<partition_snapshot_accounter>(schema(), e->key(), std::move(cr),
|
||||
snp, region(), read_section(), mtbl(), streamed_mutation::forwarding::no, _flushed_memory);
|
||||
_flushed_memory.account_component(*e);
|
||||
_flushed_memory.account_component(*snp);
|
||||
auto ret = make_ready_future<streamed_mutation_opt>(std::move(mpsr));
|
||||
_partition_reader = std::move(mpsr);
|
||||
advance();
|
||||
return ret;
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
virtual future<> fast_forward_to(const dht::partition_range& pr) override {
|
||||
return iterator_reader::fast_forward_to(pr);
|
||||
public:
|
||||
virtual future<> fill_buffer() override {
|
||||
return do_until([this] { return is_end_of_stream() || is_buffer_full(); }, [this] {
|
||||
if (!_partition_reader) {
|
||||
get_next_partition();
|
||||
if (!_partition_reader) {
|
||||
_end_of_stream = true;
|
||||
return make_ready_future<>();
|
||||
}
|
||||
}
|
||||
return _partition_reader->consume_pausable([this] (mutation_fragment mf) {
|
||||
push_mutation_fragment(std::move(mf));
|
||||
return stop_iteration(is_buffer_full());
|
||||
}).then([this] {
|
||||
if (_partition_reader->is_end_of_stream() && _partition_reader->is_buffer_empty()) {
|
||||
_partition_reader = stdx::nullopt;
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
virtual void next_partition() override {
|
||||
clear_buffer_to_next_partition();
|
||||
if (is_buffer_empty()) {
|
||||
_partition_reader = stdx::nullopt;
|
||||
}
|
||||
}
|
||||
virtual future<> fast_forward_to(const dht::partition_range&) override {
|
||||
throw std::bad_function_call();
|
||||
}
|
||||
virtual future<> fast_forward_to(position_range) override {
|
||||
throw std::bad_function_call();
|
||||
}
|
||||
};
|
||||
|
||||
mutation_reader
|
||||
memtable::make_reader(schema_ptr s,
|
||||
const dht::partition_range& range,
|
||||
const query::partition_slice& slice,
|
||||
const io_priority_class& pc,
|
||||
tracing::trace_state_ptr trace_state_ptr,
|
||||
streamed_mutation::forwarding fwd,
|
||||
mutation_reader::forwarding fwd_mr) {
|
||||
return mutation_reader_from_flat_mutation_reader(
|
||||
make_flat_reader(std::move(s), range, slice, pc, std::move(trace_state_ptr), fwd, fwd_mr));
|
||||
}
|
||||
|
||||
flat_mutation_reader
|
||||
memtable::make_flat_reader(schema_ptr s,
|
||||
const dht::partition_range& range,
|
||||
@@ -507,14 +527,14 @@ memtable::make_flat_reader(schema_ptr s,
|
||||
}
|
||||
}
|
||||
|
||||
mutation_reader
|
||||
flat_mutation_reader
|
||||
memtable::make_flush_reader(schema_ptr s, const io_priority_class& pc) {
|
||||
if (group()) {
|
||||
return make_mutation_reader<flush_reader>(std::move(s), shared_from_this());
|
||||
return make_flat_mutation_reader<flush_reader>(s, shared_from_this());
|
||||
} else {
|
||||
auto& full_slice = s->full_slice();
|
||||
return mutation_reader_from_flat_mutation_reader(make_flat_mutation_reader<scanning_reader>(std::move(s), shared_from_this(),
|
||||
query::full_partition_range, full_slice, pc, mutation_reader::forwarding::no));
|
||||
return make_flat_mutation_reader<scanning_reader>(std::move(s), shared_from_this(),
|
||||
query::full_partition_range, full_slice, pc, mutation_reader::forwarding::no);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -529,8 +549,8 @@ memtable::update(db::rp_handle&& h) {
|
||||
|
||||
future<>
|
||||
memtable::apply(memtable& mt) {
|
||||
return do_with(mt.make_reader(_schema), [this] (auto&& rd) mutable {
|
||||
return consume(rd, [self = this->shared_from_this(), &rd] (mutation&& m) {
|
||||
return do_with(mt.make_flat_reader(_schema), [this] (auto&& rd) mutable {
|
||||
return consume_partitions(rd, [self = this->shared_from_this(), &rd] (mutation&& m) {
|
||||
self->apply(m);
|
||||
return stop_iteration::no;
|
||||
});
|
||||
|
||||
15
memtable.hh
15
memtable.hh
@@ -194,19 +194,6 @@ public:
|
||||
// The 'range' parameter must be live as long as the reader is being used
|
||||
//
|
||||
// Mutations returned by the reader will all have given schema.
|
||||
mutation_reader make_reader(schema_ptr,
|
||||
const dht::partition_range& range,
|
||||
const query::partition_slice& slice,
|
||||
const io_priority_class& pc = default_priority_class(),
|
||||
tracing::trace_state_ptr trace_state_ptr = nullptr,
|
||||
streamed_mutation::forwarding fwd = streamed_mutation::forwarding::no,
|
||||
mutation_reader::forwarding fwd_mr = mutation_reader::forwarding::yes);
|
||||
|
||||
mutation_reader make_reader(schema_ptr s, const dht::partition_range& range = query::full_partition_range) {
|
||||
auto& full_slice = s->full_slice();
|
||||
return make_reader(s, range, full_slice);
|
||||
}
|
||||
|
||||
flat_mutation_reader make_flat_reader(schema_ptr,
|
||||
const dht::partition_range& range,
|
||||
const query::partition_slice& slice,
|
||||
@@ -221,7 +208,7 @@ public:
|
||||
return make_flat_reader(s, range, full_slice);
|
||||
}
|
||||
|
||||
mutation_reader make_flush_reader(schema_ptr, const io_priority_class& pc);
|
||||
flat_mutation_reader make_flush_reader(schema_ptr, const io_priority_class& pc);
|
||||
|
||||
mutation_source as_data_source();
|
||||
|
||||
|
||||
@@ -514,7 +514,6 @@ shared_ptr<messaging_service::rpc_protocol_client_wrapper> messaging_service::ge
|
||||
}();
|
||||
|
||||
auto remote_addr = ipv4_addr(get_preferred_ip(id.addr).raw_addr(), must_encrypt ? _ssl_port : _port);
|
||||
auto local_addr = ipv4_addr{_listen_address.raw_addr(), 0};
|
||||
|
||||
rpc::client_options opts;
|
||||
// send keepalive messages each minute if connection is idle, drop connection after 10 failures
|
||||
@@ -526,9 +525,9 @@ shared_ptr<messaging_service::rpc_protocol_client_wrapper> messaging_service::ge
|
||||
|
||||
auto client = must_encrypt ?
|
||||
::make_shared<rpc_protocol_client_wrapper>(*_rpc, std::move(opts),
|
||||
remote_addr, local_addr, _credentials) :
|
||||
remote_addr, ipv4_addr(), _credentials) :
|
||||
::make_shared<rpc_protocol_client_wrapper>(*_rpc, std::move(opts),
|
||||
remote_addr, local_addr);
|
||||
remote_addr);
|
||||
|
||||
it = _clients[idx].emplace(id, shard_info(std::move(client))).first;
|
||||
uint32_t src_cpu_id = engine().cpu_id();
|
||||
|
||||
17
mutation.cc
17
mutation.cc
@@ -269,13 +269,13 @@ future<mutation> mutation_from_streamed_mutation(streamed_mutation& sm) {
|
||||
});
|
||||
}
|
||||
|
||||
future<mutation_opt> read_mutation_from_flat_mutation_reader(schema_ptr s, flat_mutation_reader& r) {
|
||||
future<mutation_opt> read_mutation_from_flat_mutation_reader(flat_mutation_reader& r) {
|
||||
if (r.is_buffer_empty()) {
|
||||
if (r.is_end_of_stream()) {
|
||||
return make_ready_future<mutation_opt>();
|
||||
}
|
||||
return r.fill_buffer().then([&r, s = std::move(s)] {
|
||||
return read_mutation_from_flat_mutation_reader(std::move(s), r);
|
||||
return r.fill_buffer().then([&r] {
|
||||
return read_mutation_from_flat_mutation_reader(r);
|
||||
});
|
||||
}
|
||||
// r.is_buffer_empty() is always false at this point
|
||||
@@ -320,5 +320,12 @@ future<mutation_opt> read_mutation_from_flat_mutation_reader(schema_ptr s, flat_
|
||||
return _builder->consume_end_of_stream();
|
||||
}
|
||||
};
|
||||
return r.consume(adapter(std::move(s)));
|
||||
}
|
||||
return r.consume(adapter(r.schema()));
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, const mutation& m) {
|
||||
const ::schema& s = *m.schema();
|
||||
fprint(os, "{%s.%s key %s data ", s.ks_name(), s.cf_name(), m.decorated_key());
|
||||
os << m.partition() << "}";
|
||||
return os;
|
||||
}
|
||||
|
||||
@@ -194,4 +194,4 @@ future<mutation> mutation_from_streamed_mutation(streamed_mutation& sm);
|
||||
class flat_mutation_reader;
|
||||
|
||||
// Reads a single partition from a reader. Returns empty optional if there are no more partitions to be read.
|
||||
future<mutation_opt> read_mutation_from_flat_mutation_reader(schema_ptr, flat_mutation_reader&);
|
||||
future<mutation_opt> read_mutation_from_flat_mutation_reader(flat_mutation_reader&);
|
||||
@@ -859,6 +859,11 @@ mutation_partition::query_compacted(query::result::partition_writer& pw, const s
|
||||
}
|
||||
}
|
||||
|
||||
std::ostream&
|
||||
operator<<(std::ostream& out, const atomic_cell_or_collection& c) {
|
||||
return out << to_hex(c._data);
|
||||
}
|
||||
|
||||
std::ostream&
|
||||
operator<<(std::ostream& os, const std::pair<column_id, const atomic_cell_or_collection&>& c) {
|
||||
return fprint(os, "{column: %s %s}", c.first, c.second);
|
||||
@@ -1997,8 +2002,11 @@ future<> data_query(
|
||||
auto cfq = make_stable_flattened_mutations_consumer<compact_for_query<emit_only_live_rows::yes, query_result_builder>>(
|
||||
*s, query_time, slice, row_limit, partition_limit, std::move(qrb));
|
||||
|
||||
auto reader = source(s, range, slice, service::get_local_sstable_query_read_priority(), std::move(trace_ptr));
|
||||
return consume_flattened(std::move(reader), std::move(cfq), is_reversed);
|
||||
return do_with(source.make_flat_mutation_reader(s, range, slice, service::get_local_sstable_query_read_priority(), std::move(trace_ptr),
|
||||
streamed_mutation::forwarding::no, mutation_reader::forwarding::no),
|
||||
[cfq = std::move(cfq), is_reversed] (flat_mutation_reader& reader) mutable {
|
||||
return reader.consume(std::move(cfq), flat_mutation_reader::consume_reversed_partitions(is_reversed));
|
||||
});
|
||||
}
|
||||
|
||||
class reconcilable_result_builder {
|
||||
@@ -2101,8 +2109,11 @@ static do_mutation_query(schema_ptr s,
|
||||
auto cfq = make_stable_flattened_mutations_consumer<compact_for_query<emit_only_live_rows::no, reconcilable_result_builder>>(
|
||||
*s, query_time, slice, row_limit, partition_limit, std::move(rrb));
|
||||
|
||||
auto reader = source(s, range, slice, service::get_local_sstable_query_read_priority(), std::move(trace_ptr));
|
||||
return consume_flattened(std::move(reader), std::move(cfq), is_reversed);
|
||||
return do_with(source.make_flat_mutation_reader(s, range, slice, service::get_local_sstable_query_read_priority(), std::move(trace_ptr),
|
||||
streamed_mutation::forwarding::no, mutation_reader::forwarding::no),
|
||||
[cfq = std::move(cfq), is_reversed] (flat_mutation_reader& reader) mutable {
|
||||
return reader.consume(std::move(cfq), flat_mutation_reader::consume_reversed_partitions(is_reversed));
|
||||
});
|
||||
}
|
||||
|
||||
static thread_local auto mutation_query_stage = seastar::make_execution_stage("mutation_query", do_mutation_query);
|
||||
@@ -2248,12 +2259,29 @@ future<mutation_opt> counter_write_query(schema_ptr s, const mutation_source& so
|
||||
const query::partition_slice& slice,
|
||||
tracing::trace_state_ptr trace_ptr)
|
||||
{
|
||||
return do_with(dht::partition_range::make_singular(dk), [&] (auto& prange) {
|
||||
auto cwqrb = counter_write_query_result_builder(*s);
|
||||
auto cfq = make_stable_flattened_mutations_consumer<compact_for_query<emit_only_live_rows::yes, counter_write_query_result_builder>>(
|
||||
*s, gc_clock::now(), slice, query::max_rows, query::max_rows, std::move(cwqrb));
|
||||
auto reader = source(s, prange, slice,
|
||||
service::get_local_sstable_query_read_priority(), std::move(trace_ptr));
|
||||
return consume_flattened(std::move(reader), std::move(cfq), false);
|
||||
});
|
||||
struct range_and_reader {
|
||||
dht::partition_range range;
|
||||
flat_mutation_reader reader;
|
||||
|
||||
range_and_reader(range_and_reader&&) = delete;
|
||||
range_and_reader(const range_and_reader&) = delete;
|
||||
|
||||
range_and_reader(schema_ptr s, const mutation_source& source,
|
||||
const dht::decorated_key& dk,
|
||||
const query::partition_slice& slice,
|
||||
tracing::trace_state_ptr trace_ptr)
|
||||
: range(dht::partition_range::make_singular(dk))
|
||||
, reader(source.make_flat_mutation_reader(s, range, slice, service::get_local_sstable_query_read_priority(),
|
||||
std::move(trace_ptr), streamed_mutation::forwarding::no,
|
||||
mutation_reader::forwarding::no))
|
||||
{ }
|
||||
};
|
||||
|
||||
// do_with() doesn't support immovable objects
|
||||
auto r_a_r = std::make_unique<range_and_reader>(s, source, dk, slice, std::move(trace_ptr));
|
||||
auto cwqrb = counter_write_query_result_builder(*s);
|
||||
auto cfq = make_stable_flattened_mutations_consumer<compact_for_query<emit_only_live_rows::yes, counter_write_query_result_builder>>(
|
||||
*s, gc_clock::now(), slice, query::max_rows, query::max_rows, std::move(cwqrb));
|
||||
auto f = r_a_r->reader.consume(std::move(cfq), flat_mutation_reader::consume_reversed_partitions::no);
|
||||
return f.finally([r_a_r = std::move(r_a_r)] { });
|
||||
}
|
||||
|
||||
@@ -33,10 +33,10 @@
|
||||
// Dumb selector implementation for combined_mutation_reader that simply
|
||||
// forwards it's list of readers.
|
||||
class list_reader_selector : public reader_selector {
|
||||
std::vector<mutation_reader> _readers;
|
||||
std::vector<flat_mutation_reader> _readers;
|
||||
|
||||
public:
|
||||
explicit list_reader_selector(std::vector<mutation_reader> readers)
|
||||
explicit list_reader_selector(std::vector<flat_mutation_reader> readers)
|
||||
: _readers(std::move(readers)) {
|
||||
_selector_position = dht::minimum_token();
|
||||
}
|
||||
@@ -47,17 +47,17 @@ public:
|
||||
list_reader_selector(list_reader_selector&&) = default;
|
||||
list_reader_selector& operator=(list_reader_selector&&) = default;
|
||||
|
||||
virtual std::vector<mutation_reader> create_new_readers(const dht::token* const) override {
|
||||
virtual std::vector<flat_mutation_reader> create_new_readers(const dht::token* const) override {
|
||||
_selector_position = dht::maximum_token();
|
||||
return std::exchange(_readers, {});
|
||||
}
|
||||
|
||||
virtual std::vector<mutation_reader> fast_forward_to(const dht::partition_range&) override {
|
||||
virtual std::vector<flat_mutation_reader> fast_forward_to(const dht::partition_range&) override {
|
||||
return {};
|
||||
}
|
||||
};
|
||||
|
||||
void combined_mutation_reader::maybe_add_readers(const dht::token* const t) {
|
||||
void mutation_reader_merger::maybe_add_readers(const dht::token* const t) {
|
||||
if (!_selector->has_new_readers(t)) {
|
||||
return;
|
||||
}
|
||||
@@ -65,103 +65,291 @@ void combined_mutation_reader::maybe_add_readers(const dht::token* const t) {
|
||||
add_readers(_selector->create_new_readers(t));
|
||||
}
|
||||
|
||||
void combined_mutation_reader::add_readers(std::vector<mutation_reader> new_readers) {
|
||||
void mutation_reader_merger::add_readers(std::vector<flat_mutation_reader> new_readers) {
|
||||
for (auto&& new_reader : new_readers) {
|
||||
_all_readers.emplace_back(std::move(new_reader));
|
||||
auto* r = &_all_readers.back();
|
||||
_next.emplace_back(r);
|
||||
_next.emplace_back(r, mutation_fragment::kind::partition_end);
|
||||
}
|
||||
}
|
||||
|
||||
const dht::token* combined_mutation_reader::current_position() const {
|
||||
if (_ptables.empty()) {
|
||||
const dht::token* mutation_reader_merger::current_position() const {
|
||||
if (!_key) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return &_ptables.front().m.decorated_key().token();
|
||||
return &_key->token();
|
||||
}
|
||||
|
||||
future<> combined_mutation_reader::prepare_next() {
|
||||
maybe_add_readers(current_position());
|
||||
struct mutation_reader_merger::reader_heap_compare {
|
||||
const schema& s;
|
||||
|
||||
return parallel_for_each(_next, [this] (mutation_reader* mr) {
|
||||
return (*mr)().then([this, mr] (streamed_mutation_opt next) {
|
||||
if (next) {
|
||||
_ptables.emplace_back(mutation_and_reader { std::move(*next), mr });
|
||||
boost::range::push_heap(_ptables, &heap_compare);
|
||||
explicit reader_heap_compare(const schema& s)
|
||||
: s(s) {
|
||||
}
|
||||
|
||||
bool operator()(const mutation_reader_merger::reader_and_fragment& a, const mutation_reader_merger::reader_and_fragment& b) {
|
||||
// Invert comparison as this is a max-heap.
|
||||
return b.fragment.as_partition_start().key().less_compare(s, a.fragment.as_partition_start().key());
|
||||
}
|
||||
};
|
||||
|
||||
struct mutation_reader_merger::fragment_heap_compare {
|
||||
position_in_partition::less_compare cmp;
|
||||
|
||||
explicit fragment_heap_compare(const schema& s)
|
||||
: cmp(s) {
|
||||
}
|
||||
|
||||
bool operator()(const mutation_reader_merger::reader_and_fragment& a, const mutation_reader_merger::reader_and_fragment& b) {
|
||||
// Invert comparison as this is a max-heap.
|
||||
return cmp(b.fragment.position(), a.fragment.position());
|
||||
}
|
||||
};
|
||||
|
||||
future<> mutation_reader_merger::prepare_next() {
|
||||
return parallel_for_each(_next, [this] (reader_and_last_fragment_kind rk) {
|
||||
return (*rk.reader)().then([this, rk] (mutation_fragment_opt mfo) {
|
||||
if (mfo) {
|
||||
if (mfo->is_partition_start()) {
|
||||
_reader_heap.emplace_back(rk.reader, std::move(*mfo));
|
||||
boost::push_heap(_reader_heap, reader_heap_compare(*_schema));
|
||||
} else {
|
||||
_fragment_heap.emplace_back(rk.reader, std::move(*mfo));
|
||||
boost::range::push_heap(_fragment_heap, fragment_heap_compare(*_schema));
|
||||
}
|
||||
} else if (_fwd_sm == streamed_mutation::forwarding::yes && rk.last_kind != mutation_fragment::kind::partition_end) {
|
||||
// When in streamed_mutation::forwarding mode we need
|
||||
// to keep track of readers that returned
|
||||
// end-of-stream to know what readers to ff. We can't
|
||||
// just ff all readers as we might drop fragments from
|
||||
// partitions we haven't even read yet.
|
||||
// Readers whoose last emitted fragment was a partition
|
||||
// end are out of data for good for the current range.
|
||||
_halted_readers.push_back(rk);
|
||||
} else if (_fwd_mr == mutation_reader::forwarding::no) {
|
||||
_all_readers.remove_if([mr] (auto& r) { return &r == mr; });
|
||||
_all_readers.remove_if([mr = rk.reader] (auto& r) { return &r == mr; });
|
||||
}
|
||||
});
|
||||
}).then([this] {
|
||||
_next.clear();
|
||||
|
||||
// We are either crossing partition boundary or ran out of
|
||||
// readers. If there are halted readers then we are just
|
||||
// waiting for a fast-forward so there is nothing to do.
|
||||
if (_fragment_heap.empty() && _halted_readers.empty()) {
|
||||
if (_reader_heap.empty()) {
|
||||
_key = {};
|
||||
} else {
|
||||
_key = _reader_heap.front().fragment.as_partition_start().key();
|
||||
}
|
||||
|
||||
maybe_add_readers(current_position());
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
future<streamed_mutation_opt> combined_mutation_reader::next() {
|
||||
if ((_current.empty() && !_next.empty()) || _selector->has_new_readers(current_position())) {
|
||||
return prepare_next().then([this] { return next(); });
|
||||
}
|
||||
if (_ptables.empty()) {
|
||||
return make_ready_future<streamed_mutation_opt>();
|
||||
void mutation_reader_merger::prepare_forwardable_readers() {
|
||||
_next.reserve(_halted_readers.size() + _fragment_heap.size() + _next.size());
|
||||
|
||||
std::move(_halted_readers.begin(), _halted_readers.end(), std::back_inserter(_next));
|
||||
for (auto& df : _fragment_heap) {
|
||||
_next.emplace_back(df.reader, df.fragment.mutation_fragment_kind());
|
||||
}
|
||||
|
||||
while (!_ptables.empty()) {
|
||||
boost::range::pop_heap(_ptables, &heap_compare);
|
||||
auto& candidate = _ptables.back();
|
||||
streamed_mutation& m = candidate.m;
|
||||
|
||||
_current.emplace_back(std::move(m));
|
||||
_next.emplace_back(candidate.read);
|
||||
_ptables.pop_back();
|
||||
|
||||
if (_ptables.empty() || !_current.back().decorated_key().equal(*_current.back().schema(), _ptables.front().m.decorated_key())) {
|
||||
// key has changed, so emit accumulated mutation
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (_current.size() == 1) {
|
||||
auto m = std::move(_current.back());
|
||||
_current.pop_back();
|
||||
return make_ready_future<streamed_mutation_opt>(std::move(m));
|
||||
}
|
||||
return make_ready_future<streamed_mutation_opt>(merge_mutations(std::exchange(_current, {})));
|
||||
_halted_readers.clear();
|
||||
_fragment_heap.clear();
|
||||
}
|
||||
|
||||
combined_mutation_reader::combined_mutation_reader(std::unique_ptr<reader_selector> selector, mutation_reader::forwarding fwd_mr)
|
||||
mutation_reader_merger::mutation_reader_merger(schema_ptr schema,
|
||||
std::unique_ptr<reader_selector> selector,
|
||||
streamed_mutation::forwarding fwd_sm,
|
||||
mutation_reader::forwarding fwd_mr)
|
||||
: _selector(std::move(selector))
|
||||
, _fwd_mr(fwd_mr)
|
||||
{
|
||||
, _schema(std::move(schema))
|
||||
, _fwd_sm(fwd_sm)
|
||||
, _fwd_mr(fwd_mr) {
|
||||
maybe_add_readers(nullptr);
|
||||
}
|
||||
|
||||
future<> combined_mutation_reader::fast_forward_to(const dht::partition_range& pr) {
|
||||
_ptables.clear();
|
||||
auto rs = _all_readers | boost::adaptors::transformed([] (auto& r) { return &r; });
|
||||
_next.assign(rs.begin(), rs.end());
|
||||
future<mutation_reader_merger::mutation_fragment_batch> mutation_reader_merger::operator()() {
|
||||
if (!_next.empty()) {
|
||||
return prepare_next().then([this] { return (*this)(); });
|
||||
}
|
||||
|
||||
return parallel_for_each(_next, [this, &pr] (mutation_reader* mr) {
|
||||
return mr->fast_forward_to(pr);
|
||||
_current.clear();
|
||||
|
||||
// If we ran out of fragments for the current partition, select the
|
||||
// readers for the next one.
|
||||
if (_fragment_heap.empty()) {
|
||||
if (!_halted_readers.empty() || _reader_heap.empty()) {
|
||||
return make_ready_future<mutation_fragment_batch>(_current);
|
||||
}
|
||||
|
||||
auto key = [] (const std::vector<reader_and_fragment>& heap) -> const dht::decorated_key& {
|
||||
return heap.front().fragment.as_partition_start().key();
|
||||
};
|
||||
|
||||
do {
|
||||
boost::range::pop_heap(_reader_heap, reader_heap_compare(*_schema));
|
||||
// All fragments here are partition_start so no need to
|
||||
// heap-sort them.
|
||||
_fragment_heap.emplace_back(std::move(_reader_heap.back()));
|
||||
_reader_heap.pop_back();
|
||||
}
|
||||
while (!_reader_heap.empty() && key(_fragment_heap).equal(*_schema, key(_reader_heap)));
|
||||
}
|
||||
|
||||
const auto equal = position_in_partition::equal_compare(*_schema);
|
||||
do {
|
||||
boost::range::pop_heap(_fragment_heap, fragment_heap_compare(*_schema));
|
||||
auto& n = _fragment_heap.back();
|
||||
const auto kind = n.fragment.mutation_fragment_kind();
|
||||
_current.emplace_back(std::move(n.fragment));
|
||||
_next.emplace_back(n.reader, kind);
|
||||
_fragment_heap.pop_back();
|
||||
}
|
||||
while (!_fragment_heap.empty() && equal(_current.back().position(), _fragment_heap.front().fragment.position()));
|
||||
|
||||
return make_ready_future<mutation_fragment_batch>(_current);
|
||||
}
|
||||
|
||||
void mutation_reader_merger::next_partition() {
|
||||
prepare_forwardable_readers();
|
||||
for (auto& rk : _next) {
|
||||
rk.last_kind = mutation_fragment::kind::partition_end;
|
||||
rk.reader->next_partition();
|
||||
}
|
||||
}
|
||||
|
||||
future<> mutation_reader_merger::fast_forward_to(const dht::partition_range& pr) {
|
||||
_next.clear();
|
||||
_halted_readers.clear();
|
||||
_fragment_heap.clear();
|
||||
_reader_heap.clear();
|
||||
|
||||
return parallel_for_each(_all_readers, [this, &pr] (flat_mutation_reader& mr) {
|
||||
_next.emplace_back(&mr, mutation_fragment::kind::partition_end);
|
||||
return mr.fast_forward_to(pr);
|
||||
}).then([this, &pr] {
|
||||
add_readers(_selector->fast_forward_to(pr));
|
||||
});
|
||||
}
|
||||
|
||||
future<streamed_mutation_opt> combined_mutation_reader::operator()() {
|
||||
return next();
|
||||
future<> mutation_reader_merger::fast_forward_to(position_range pr) {
|
||||
prepare_forwardable_readers();
|
||||
return parallel_for_each(_next, [this, pr = std::move(pr)] (reader_and_last_fragment_kind rk) {
|
||||
return rk.reader->fast_forward_to(pr);
|
||||
});
|
||||
}
|
||||
|
||||
combined_mutation_reader::combined_mutation_reader(schema_ptr schema,
|
||||
std::unique_ptr<reader_selector> selector,
|
||||
streamed_mutation::forwarding fwd_sm,
|
||||
mutation_reader::forwarding fwd_mr)
|
||||
: impl(std::move(schema))
|
||||
, _producer(_schema, mutation_reader_merger(_schema, std::move(selector), fwd_sm, fwd_mr))
|
||||
, _fwd_sm(fwd_sm) {
|
||||
}
|
||||
|
||||
future<> combined_mutation_reader::fill_buffer() {
|
||||
return repeat([this] {
|
||||
return _producer().then([this] (mutation_fragment_opt mfo) {
|
||||
if (!mfo) {
|
||||
_end_of_stream = true;
|
||||
return stop_iteration::yes;
|
||||
}
|
||||
push_mutation_fragment(std::move(*mfo));
|
||||
if (is_buffer_full()) {
|
||||
return stop_iteration::yes;
|
||||
}
|
||||
return stop_iteration::no;
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
void combined_mutation_reader::next_partition() {
|
||||
if (_fwd_sm == streamed_mutation::forwarding::yes) {
|
||||
clear_buffer();
|
||||
_end_of_stream = false;
|
||||
_producer.next_partition();
|
||||
} else {
|
||||
clear_buffer_to_next_partition();
|
||||
// If the buffer is empty at this point then all fragments in it
|
||||
// belonged to the current partition, so either:
|
||||
// * All (forwardable) readers are still positioned in the
|
||||
// inside of the current partition, or
|
||||
// * They are between the current one and the next one.
|
||||
// Either way we need to call next_partition on them.
|
||||
if (is_buffer_empty()) {
|
||||
_producer.next_partition();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
future<> combined_mutation_reader::fast_forward_to(const dht::partition_range& pr) {
|
||||
clear_buffer();
|
||||
_end_of_stream = false;
|
||||
return _producer.fast_forward_to(pr);
|
||||
}
|
||||
|
||||
future<> combined_mutation_reader::fast_forward_to(position_range pr) {
|
||||
forward_buffer_to(pr.start());
|
||||
_end_of_stream = false;
|
||||
return _producer.fast_forward_to(std::move(pr));
|
||||
}
|
||||
|
||||
mutation_reader
|
||||
make_combined_reader(std::vector<mutation_reader> readers, mutation_reader::forwarding fwd_mr) {
|
||||
return make_mutation_reader<combined_mutation_reader>(std::make_unique<list_reader_selector>(std::move(readers)), fwd_mr);
|
||||
make_combined_reader(schema_ptr schema,
|
||||
std::vector<mutation_reader> readers,
|
||||
streamed_mutation::forwarding fwd_sm,
|
||||
mutation_reader::forwarding fwd_mr) {
|
||||
std::vector<flat_mutation_reader> flat_readers;
|
||||
flat_readers.reserve(readers.size());
|
||||
for (auto& reader : readers) {
|
||||
flat_readers.emplace_back(flat_mutation_reader_from_mutation_reader(schema, std::move(reader), fwd_sm));
|
||||
}
|
||||
|
||||
return mutation_reader_from_flat_mutation_reader(make_flat_mutation_reader<combined_mutation_reader>(
|
||||
schema,
|
||||
std::make_unique<list_reader_selector>(std::move(flat_readers)),
|
||||
fwd_sm,
|
||||
fwd_mr));
|
||||
}
|
||||
|
||||
mutation_reader
|
||||
make_combined_reader(mutation_reader&& a, mutation_reader&& b, mutation_reader::forwarding fwd_mr) {
|
||||
make_combined_reader(schema_ptr schema,
|
||||
mutation_reader&& a,
|
||||
mutation_reader&& b,
|
||||
streamed_mutation::forwarding fwd_sm,
|
||||
mutation_reader::forwarding fwd_mr) {
|
||||
std::vector<mutation_reader> v;
|
||||
v.reserve(2);
|
||||
v.push_back(std::move(a));
|
||||
v.push_back(std::move(b));
|
||||
return make_combined_reader(std::move(v), fwd_mr);
|
||||
return make_combined_reader(std::move(schema), std::move(v), fwd_sm, fwd_mr);
|
||||
}
|
||||
|
||||
flat_mutation_reader make_combined_reader(schema_ptr schema,
|
||||
std::vector<flat_mutation_reader> readers,
|
||||
streamed_mutation::forwarding fwd_sm,
|
||||
mutation_reader::forwarding fwd_mr) {
|
||||
return make_flat_mutation_reader<combined_mutation_reader>(schema,
|
||||
std::make_unique<list_reader_selector>(std::move(readers)),
|
||||
fwd_sm,
|
||||
fwd_mr);
|
||||
}
|
||||
|
||||
flat_mutation_reader make_combined_reader(schema_ptr schema,
|
||||
flat_mutation_reader&& a,
|
||||
flat_mutation_reader&& b,
|
||||
streamed_mutation::forwarding fwd_sm,
|
||||
mutation_reader::forwarding fwd_mr) {
|
||||
std::vector<flat_mutation_reader> v;
|
||||
v.reserve(2);
|
||||
v.push_back(std::move(a));
|
||||
v.push_back(std::move(b));
|
||||
return make_combined_reader(std::move(schema), std::move(v), fwd_sm, fwd_mr);
|
||||
}
|
||||
|
||||
class reader_returning final : public mutation_reader::impl {
|
||||
@@ -348,7 +536,7 @@ file reader_resource_tracker::track(file f) const {
|
||||
}
|
||||
|
||||
|
||||
class restricting_mutation_reader : public mutation_reader::impl {
|
||||
class restricting_mutation_reader : public flat_mutation_reader::impl {
|
||||
struct mutation_source_and_params {
|
||||
mutation_source _ms;
|
||||
schema_ptr _s;
|
||||
@@ -359,13 +547,13 @@ class restricting_mutation_reader : public mutation_reader::impl {
|
||||
streamed_mutation::forwarding _fwd;
|
||||
mutation_reader::forwarding _fwd_mr;
|
||||
|
||||
mutation_reader operator()() {
|
||||
return _ms(std::move(_s), _range.get(), _slice.get(), _pc.get(), std::move(_trace_state), _fwd, _fwd_mr);
|
||||
flat_mutation_reader operator()() {
|
||||
return _ms.make_flat_mutation_reader(std::move(_s), _range.get(), _slice.get(), _pc.get(), std::move(_trace_state), _fwd, _fwd_mr);
|
||||
}
|
||||
};
|
||||
|
||||
const restricted_mutation_reader_config& _config;
|
||||
boost::variant<mutation_source_and_params, mutation_reader> _reader_or_mutation_source;
|
||||
boost::variant<mutation_source_and_params, flat_mutation_reader> _reader_or_mutation_source;
|
||||
|
||||
static const std::size_t new_reader_base_cost{16 * 1024};
|
||||
|
||||
@@ -375,7 +563,7 @@ class restricting_mutation_reader : public mutation_reader::impl {
|
||||
: _config.resources_sem->wait(new_reader_base_cost);
|
||||
|
||||
return f.then([this] {
|
||||
mutation_reader reader = boost::get<mutation_source_and_params>(_reader_or_mutation_source)();
|
||||
flat_mutation_reader reader = boost::get<mutation_source_and_params>(_reader_or_mutation_source)();
|
||||
_reader_or_mutation_source = std::move(reader);
|
||||
|
||||
if (_config.active_reads) {
|
||||
@@ -385,6 +573,22 @@ class restricting_mutation_reader : public mutation_reader::impl {
|
||||
return make_ready_future<>();
|
||||
});
|
||||
}
|
||||
|
||||
template<typename Function>
|
||||
GCC6_CONCEPT(
|
||||
requires std::is_move_constructible<Function>::value
|
||||
&& requires(Function fn, flat_mutation_reader& reader) {
|
||||
fn(reader);
|
||||
}
|
||||
)
|
||||
decltype(auto) with_reader(Function fn) {
|
||||
if (auto* reader = boost::get<flat_mutation_reader>(&_reader_or_mutation_source)) {
|
||||
return fn(*reader);
|
||||
}
|
||||
return create_reader().then([this, fn = std::move(fn)] () mutable {
|
||||
return fn(boost::get<flat_mutation_reader>(_reader_or_mutation_source));
|
||||
});
|
||||
}
|
||||
public:
|
||||
restricting_mutation_reader(const restricted_mutation_reader_config& config,
|
||||
mutation_source ms,
|
||||
@@ -395,7 +599,8 @@ public:
|
||||
tracing::trace_state_ptr trace_state,
|
||||
streamed_mutation::forwarding fwd,
|
||||
mutation_reader::forwarding fwd_mr)
|
||||
: _config(config)
|
||||
: impl(s)
|
||||
, _config(config)
|
||||
, _reader_or_mutation_source(
|
||||
mutation_source_and_params{std::move(ms), std::move(s), range, slice, pc, std::move(trace_state), fwd, fwd_mr}) {
|
||||
if (_config.resources_sem->waiters() >= _config.max_queue_length) {
|
||||
@@ -403,49 +608,64 @@ public:
|
||||
}
|
||||
}
|
||||
~restricting_mutation_reader() {
|
||||
if (boost::get<mutation_reader>(&_reader_or_mutation_source)) {
|
||||
if (boost::get<flat_mutation_reader>(&_reader_or_mutation_source)) {
|
||||
_config.resources_sem->signal(new_reader_base_cost);
|
||||
if (_config.active_reads) {
|
||||
--(*_config.active_reads);
|
||||
}
|
||||
}
|
||||
}
|
||||
future<streamed_mutation_opt> operator()() override {
|
||||
// FIXME: we should defer freeing until the mutation is freed, perhaps,
|
||||
// rather than just returned
|
||||
if (auto* reader = boost::get<mutation_reader>(&_reader_or_mutation_source)) {
|
||||
return (*reader)();
|
||||
}
|
||||
|
||||
return create_reader().then([this] {
|
||||
return boost::get<mutation_reader>(_reader_or_mutation_source)();
|
||||
virtual future<> fill_buffer() override {
|
||||
return with_reader([this] (flat_mutation_reader& reader) {
|
||||
return reader.fill_buffer().then([this, &reader] {
|
||||
_end_of_stream = reader.is_end_of_stream();
|
||||
while (!reader.is_buffer_empty()) {
|
||||
push_mutation_fragment(reader.pop_mutation_fragment());
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
virtual future<> fast_forward_to(const dht::partition_range& pr) override {
|
||||
if (auto* reader = boost::get<mutation_reader>(&_reader_or_mutation_source)) {
|
||||
return reader->fast_forward_to(pr);
|
||||
virtual void next_partition() override {
|
||||
clear_buffer_to_next_partition();
|
||||
if (!is_buffer_empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
return create_reader().then([this, &pr] {
|
||||
return boost::get<mutation_reader>(_reader_or_mutation_source).fast_forward_to(pr);
|
||||
_end_of_stream = false;
|
||||
if (auto* reader = boost::get<flat_mutation_reader>(&_reader_or_mutation_source)) {
|
||||
return reader->next_partition();
|
||||
}
|
||||
}
|
||||
virtual future<> fast_forward_to(const dht::partition_range& pr) override {
|
||||
clear_buffer();
|
||||
_end_of_stream = false;
|
||||
return with_reader([&pr] (flat_mutation_reader& reader) {
|
||||
return reader.fast_forward_to(pr);
|
||||
});
|
||||
}
|
||||
virtual future<> fast_forward_to(position_range pr) override {
|
||||
forward_buffer_to(pr.start());
|
||||
_end_of_stream = false;
|
||||
return with_reader([pr = std::move(pr)] (flat_mutation_reader& reader) mutable {
|
||||
return reader.fast_forward_to(std::move(pr));
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
mutation_reader
|
||||
make_restricted_reader(const restricted_mutation_reader_config& config,
|
||||
mutation_source ms,
|
||||
schema_ptr s,
|
||||
const dht::partition_range& range,
|
||||
const query::partition_slice& slice,
|
||||
const io_priority_class& pc,
|
||||
tracing::trace_state_ptr trace_state,
|
||||
streamed_mutation::forwarding fwd,
|
||||
mutation_reader::forwarding fwd_mr) {
|
||||
return make_mutation_reader<restricting_mutation_reader>(config, std::move(ms), std::move(s), range, slice, pc, std::move(trace_state), fwd, fwd_mr);
|
||||
flat_mutation_reader
|
||||
make_restricted_flat_reader(const restricted_mutation_reader_config& config,
|
||||
mutation_source ms,
|
||||
schema_ptr s,
|
||||
const dht::partition_range& range,
|
||||
const query::partition_slice& slice,
|
||||
const io_priority_class& pc,
|
||||
tracing::trace_state_ptr trace_state,
|
||||
streamed_mutation::forwarding fwd,
|
||||
mutation_reader::forwarding fwd_mr) {
|
||||
return make_flat_mutation_reader<restricting_mutation_reader>(config, std::move(ms), std::move(s), range, slice, pc, std::move(trace_state), fwd, fwd_mr);
|
||||
}
|
||||
|
||||
|
||||
snapshot_source make_empty_snapshot_source() {
|
||||
return snapshot_source([] {
|
||||
return make_empty_mutation_source();
|
||||
@@ -475,7 +695,7 @@ mutation_source make_combined_mutation_source(std::vector<mutation_source> adden
|
||||
for (auto&& ms : addends) {
|
||||
rd.emplace_back(ms(s, pr, slice, pc, tr, fwd));
|
||||
}
|
||||
return make_combined_reader(std::move(rd), mutation_reader::forwarding::yes);
|
||||
return make_combined_reader(s, std::move(rd), fwd);
|
||||
});
|
||||
}
|
||||
|
||||
@@ -547,3 +767,9 @@ mutation_reader mutation_reader_from_flat_mutation_reader(flat_mutation_reader&&
|
||||
};
|
||||
return make_mutation_reader<converting_reader>(std::move(mr));
|
||||
}
|
||||
|
||||
future<streamed_mutation_opt> streamed_mutation_from_flat_mutation_reader(flat_mutation_reader&& r) {
|
||||
return do_with(mutation_reader_from_flat_mutation_reader(std::move(r)), [] (auto&& rd) {
|
||||
return rd();
|
||||
});
|
||||
}
|
||||
|
||||
@@ -97,6 +97,143 @@ public:
|
||||
future<> fast_forward_to(const dht::partition_range& pr) { return _impl->fast_forward_to(pr); }
|
||||
};
|
||||
|
||||
GCC6_CONCEPT(
|
||||
template<typename Producer>
|
||||
concept bool FragmentProducer = requires(Producer p, dht::partition_range part_range, position_range pos_range) {
|
||||
// The returned fragments are expected to have the same
|
||||
// position_in_partition. Iterators and references are expected
|
||||
// to be valid until the next call to operator()().
|
||||
{ p() } -> future<boost::iterator_range<std::vector<mutation_fragment>::iterator>>;
|
||||
// These have the same semantics as their
|
||||
// flat_mutation_reader counterparts.
|
||||
{ p.next_partition() };
|
||||
{ p.fast_forward_to(part_range) } -> future<>;
|
||||
{ p.fast_forward_to(pos_range) } -> future<>;
|
||||
};
|
||||
)
|
||||
|
||||
/**
|
||||
* Merge mutation-fragments produced by producer.
|
||||
*
|
||||
* Merge a non-decreasing stream of mutation-fragments into strictly
|
||||
* increasing stream. The merger is stateful, it's intended to be kept
|
||||
* around *at least* for merging an entire partition. That is, creating
|
||||
* a new instance for each batch of fragments will produce incorrect
|
||||
* results.
|
||||
*
|
||||
* Call operator() to get the next mutation fragment. operator() will
|
||||
* consume fragments from the producer using operator().
|
||||
* Any fast-forwarding has to be communicated to the merger object using
|
||||
* fast_forward_to() and next_partition(), as appropriate.
|
||||
*/
|
||||
template<class Producer>
|
||||
GCC6_CONCEPT(
|
||||
requires FragmentProducer<Producer>
|
||||
)
|
||||
class mutation_fragment_merger {
|
||||
using iterator = std::vector<mutation_fragment>::iterator;
|
||||
|
||||
const schema_ptr _schema;
|
||||
Producer _producer;
|
||||
range_tombstone_stream _deferred_tombstones;
|
||||
iterator _it;
|
||||
iterator _end;
|
||||
bool _end_of_stream = false;
|
||||
|
||||
void apply(mutation_fragment& to, mutation_fragment&& frag) {
|
||||
if (to.is_range_tombstone()) {
|
||||
if (auto remainder = to.as_mutable_range_tombstone().apply(*_schema, std::move(frag).as_range_tombstone())) {
|
||||
_deferred_tombstones.apply(std::move(*remainder));
|
||||
}
|
||||
} else {
|
||||
to.apply(*_schema, std::move(frag));
|
||||
}
|
||||
}
|
||||
|
||||
future<> fetch() {
|
||||
if (!empty()) {
|
||||
return make_ready_future<>();
|
||||
}
|
||||
|
||||
return _producer().then([this] (boost::iterator_range<iterator> fragments) {
|
||||
_it = fragments.begin();
|
||||
_end = fragments.end();
|
||||
if (empty()) {
|
||||
_end_of_stream = true;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
bool empty() const {
|
||||
return _it == _end;
|
||||
}
|
||||
|
||||
const mutation_fragment& top() const {
|
||||
return *_it;
|
||||
}
|
||||
|
||||
mutation_fragment pop() {
|
||||
return std::move(*_it++);
|
||||
}
|
||||
|
||||
public:
|
||||
mutation_fragment_merger(schema_ptr schema, Producer&& producer)
|
||||
: _schema(std::move(schema))
|
||||
, _producer(std::move(producer))
|
||||
, _deferred_tombstones(*_schema) {
|
||||
}
|
||||
|
||||
future<mutation_fragment_opt> operator()() {
|
||||
if (_end_of_stream) {
|
||||
return make_ready_future<mutation_fragment_opt>(_deferred_tombstones.get_next());
|
||||
}
|
||||
|
||||
return fetch().then([this] () -> mutation_fragment_opt {
|
||||
if (empty()) {
|
||||
return _deferred_tombstones.get_next();
|
||||
}
|
||||
|
||||
auto current = [&] {
|
||||
if (auto rt = _deferred_tombstones.get_next(top())) {
|
||||
return std::move(*rt);
|
||||
}
|
||||
return pop();
|
||||
}();
|
||||
|
||||
const auto equal = position_in_partition::equal_compare(*_schema);
|
||||
|
||||
// Position of current is always either < or == than those
|
||||
// of the batch. In the former case there is nothing further
|
||||
// to do.
|
||||
if (empty() || !equal(current.position(), top().position())) {
|
||||
return current;
|
||||
}
|
||||
while (!empty()) {
|
||||
apply(current, pop());
|
||||
}
|
||||
return current;
|
||||
});
|
||||
}
|
||||
|
||||
void next_partition() {
|
||||
_deferred_tombstones.reset();
|
||||
_end_of_stream = false;
|
||||
_producer.next_partition();
|
||||
}
|
||||
|
||||
future<> fast_forward_to(const dht::partition_range& pr) {
|
||||
_deferred_tombstones.reset();
|
||||
_end_of_stream = false;
|
||||
return _producer.fast_forward_to(pr);
|
||||
}
|
||||
|
||||
future<> fast_forward_to(position_range pr) {
|
||||
_deferred_tombstones.forward_to(pr.start());
|
||||
_end_of_stream = false;
|
||||
return _producer.fast_forward_to(std::move(pr));
|
||||
}
|
||||
};
|
||||
|
||||
// Impl: derived from mutation_reader::impl; Args/args: arguments for Impl's constructor
|
||||
template <typename Impl, typename... Args>
|
||||
inline
|
||||
@@ -111,8 +248,8 @@ protected:
|
||||
public:
|
||||
virtual ~reader_selector() = default;
|
||||
// Call only if has_new_readers() returned true.
|
||||
virtual std::vector<mutation_reader> create_new_readers(const dht::token* const t) = 0;
|
||||
virtual std::vector<mutation_reader> fast_forward_to(const dht::partition_range& pr) = 0;
|
||||
virtual std::vector<flat_mutation_reader> create_new_readers(const dht::token* const t) = 0;
|
||||
virtual std::vector<flat_mutation_reader> fast_forward_to(const dht::partition_range& pr) = 0;
|
||||
|
||||
// Can be false-positive but never false-negative!
|
||||
bool has_new_readers(const dht::token* const t) const noexcept {
|
||||
@@ -120,60 +257,114 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
// Combines multiple mutation_readers into one.
|
||||
class combined_mutation_reader : public mutation_reader::impl {
|
||||
std::unique_ptr<reader_selector> _selector;
|
||||
std::list<mutation_reader> _all_readers;
|
||||
// Merges the output of the sub-readers into a single non-decreasing
|
||||
// stream of mutation-fragments.
|
||||
class mutation_reader_merger {
|
||||
public:
|
||||
struct reader_and_fragment {
|
||||
flat_mutation_reader* reader;
|
||||
mutation_fragment fragment;
|
||||
|
||||
struct mutation_and_reader {
|
||||
streamed_mutation m;
|
||||
mutation_reader* read;
|
||||
|
||||
bool operator<(const mutation_and_reader& other) const {
|
||||
return read < other.read;
|
||||
reader_and_fragment(flat_mutation_reader* r, mutation_fragment f)
|
||||
: reader(r)
|
||||
, fragment(std::move(f)) {
|
||||
}
|
||||
|
||||
struct less_compare {
|
||||
bool operator()(const mutation_and_reader& a, mutation_reader* b) const {
|
||||
return a.read < b;
|
||||
}
|
||||
bool operator()(mutation_reader* a, const mutation_and_reader& b) const {
|
||||
return a < b.read;
|
||||
}
|
||||
bool operator()(const mutation_and_reader& a, const mutation_and_reader& b) const {
|
||||
return a < b;
|
||||
}
|
||||
};
|
||||
};
|
||||
std::vector<mutation_and_reader> _ptables;
|
||||
// comparison function for std::make_heap()/std::push_heap()
|
||||
static bool heap_compare(const mutation_and_reader& a, const mutation_and_reader& b) {
|
||||
auto&& s = a.m.schema();
|
||||
// order of comparison is inverted, because heaps produce greatest value first
|
||||
return b.m.decorated_key().less_compare(*s, a.m.decorated_key());
|
||||
}
|
||||
std::vector<streamed_mutation> _current;
|
||||
std::vector<mutation_reader*> _next;
|
||||
|
||||
struct reader_and_last_fragment_kind {
|
||||
flat_mutation_reader* reader;
|
||||
mutation_fragment::kind last_kind;
|
||||
|
||||
reader_and_last_fragment_kind(flat_mutation_reader* r, mutation_fragment::kind k)
|
||||
: reader(r)
|
||||
, last_kind(k) {
|
||||
}
|
||||
};
|
||||
|
||||
using mutation_fragment_batch = boost::iterator_range<std::vector<mutation_fragment>::iterator>;
|
||||
private:
|
||||
struct reader_heap_compare;
|
||||
struct fragment_heap_compare;
|
||||
|
||||
std::unique_ptr<reader_selector> _selector;
|
||||
// We need a list because we need stable addresses across additions
|
||||
// and removals.
|
||||
std::list<flat_mutation_reader> _all_readers;
|
||||
// Readers positioned at a partition, different from the one we are
|
||||
// reading from now. For these readers the attached fragment is
|
||||
// always partition_start. Used to pick the next partition.
|
||||
std::vector<reader_and_fragment> _reader_heap;
|
||||
// Readers and their current fragments, belonging to the current
|
||||
// partition.
|
||||
std::vector<reader_and_fragment> _fragment_heap;
|
||||
std::vector<reader_and_last_fragment_kind> _next;
|
||||
// Readers that reached EOS.
|
||||
std::vector<reader_and_last_fragment_kind> _halted_readers;
|
||||
std::vector<mutation_fragment> _current;
|
||||
dht::decorated_key_opt _key;
|
||||
const schema_ptr _schema;
|
||||
streamed_mutation::forwarding _fwd_sm;
|
||||
mutation_reader::forwarding _fwd_mr;
|
||||
private:
|
||||
const dht::token* current_position() const;
|
||||
void maybe_add_readers(const dht::token* const t);
|
||||
void add_readers(std::vector<mutation_reader> new_readers);
|
||||
void add_readers(std::vector<flat_mutation_reader> new_readers);
|
||||
future<> prepare_next();
|
||||
// Produces next mutation or disengaged optional if there are no more.
|
||||
future<streamed_mutation_opt> next();
|
||||
// Collect all forwardable readers into _next, and remove them from
|
||||
// their previous containers (_halted_readers and _fragment_heap).
|
||||
void prepare_forwardable_readers();
|
||||
public:
|
||||
// The specified mutation_reader::forwarding tag must be the same for all included readers.
|
||||
combined_mutation_reader(std::unique_ptr<reader_selector> selector, mutation_reader::forwarding fwd_mr);
|
||||
virtual future<streamed_mutation_opt> operator()() override;
|
||||
mutation_reader_merger(schema_ptr schema,
|
||||
std::unique_ptr<reader_selector> selector,
|
||||
streamed_mutation::forwarding fwd_sm,
|
||||
mutation_reader::forwarding fwd_mr);
|
||||
// Produces the next batch of mutation-fragments of the same
|
||||
// position.
|
||||
future<mutation_fragment_batch> operator()();
|
||||
void next_partition();
|
||||
future<> fast_forward_to(const dht::partition_range& pr);
|
||||
future<> fast_forward_to(position_range pr);
|
||||
};
|
||||
|
||||
// Combines multiple mutation_readers into one.
|
||||
class combined_mutation_reader : public flat_mutation_reader::impl {
|
||||
mutation_fragment_merger<mutation_reader_merger> _producer;
|
||||
streamed_mutation::forwarding _fwd_sm;
|
||||
public:
|
||||
// The specified streamed_mutation::forwarding and
|
||||
// mutation_reader::forwarding tag must be the same for all included
|
||||
// readers.
|
||||
combined_mutation_reader(schema_ptr schema,
|
||||
std::unique_ptr<reader_selector> selector,
|
||||
streamed_mutation::forwarding fwd_sm,
|
||||
mutation_reader::forwarding fwd_mr);
|
||||
virtual future<> fill_buffer() override;
|
||||
virtual void next_partition() override;
|
||||
virtual future<> fast_forward_to(const dht::partition_range& pr) override;
|
||||
virtual future<> fast_forward_to(position_range pr) override;
|
||||
};
|
||||
|
||||
// Creates a mutation reader which combines data return by supplied readers.
|
||||
// Returns mutation of the same schema only when all readers return mutations
|
||||
// of the same schema.
|
||||
mutation_reader make_combined_reader(std::vector<mutation_reader>, mutation_reader::forwarding);
|
||||
mutation_reader make_combined_reader(mutation_reader&& a, mutation_reader&& b, mutation_reader::forwarding fwd_mr = mutation_reader::forwarding::yes);
|
||||
mutation_reader make_combined_reader(schema_ptr schema,
|
||||
std::vector<mutation_reader> readers,
|
||||
streamed_mutation::forwarding fwd_sm = streamed_mutation::forwarding::no,
|
||||
mutation_reader::forwarding fwd_mr = mutation_reader::forwarding::yes);
|
||||
mutation_reader make_combined_reader(schema_ptr schema,
|
||||
mutation_reader&& a,
|
||||
mutation_reader&& b,
|
||||
streamed_mutation::forwarding fwd_sm = streamed_mutation::forwarding::no,
|
||||
mutation_reader::forwarding fwd_mr = mutation_reader::forwarding::yes);
|
||||
flat_mutation_reader make_combined_reader(schema_ptr schema,
|
||||
std::vector<flat_mutation_reader>,
|
||||
streamed_mutation::forwarding fwd_sm = streamed_mutation::forwarding::no,
|
||||
mutation_reader::forwarding fwd_mr = mutation_reader::forwarding::yes);
|
||||
flat_mutation_reader make_combined_reader(schema_ptr schema,
|
||||
flat_mutation_reader&& a,
|
||||
flat_mutation_reader&& b,
|
||||
streamed_mutation::forwarding fwd_sm = streamed_mutation::forwarding::no,
|
||||
mutation_reader::forwarding fwd_mr = mutation_reader::forwarding::yes);
|
||||
// reads from the input readers, in order
|
||||
mutation_reader make_reader_returning(mutation, streamed_mutation::forwarding fwd = streamed_mutation::forwarding::no);
|
||||
mutation_reader make_reader_returning(streamed_mutation);
|
||||
@@ -184,45 +375,57 @@ mutation_reader make_reader_returning_many(std::vector<mutation>, const dht::par
|
||||
mutation_reader make_reader_returning_many(std::vector<streamed_mutation>);
|
||||
mutation_reader make_empty_reader();
|
||||
|
||||
/*
|
||||
template<typename T>
|
||||
concept bool StreamedMutationFilter() {
|
||||
return requires(T t, const streamed_mutation& sm) {
|
||||
{ t(sm) } -> bool;
|
||||
};
|
||||
}
|
||||
*/
|
||||
template <typename MutationFilter>
|
||||
class filtering_reader : public mutation_reader::impl {
|
||||
mutation_reader _rd;
|
||||
MutationFilter _filter;
|
||||
streamed_mutation_opt _current;
|
||||
static_assert(std::is_same<bool, std::result_of_t<MutationFilter(const streamed_mutation&)>>::value, "bad MutationFilter signature");
|
||||
public:
|
||||
filtering_reader(mutation_reader rd, MutationFilter&& filter)
|
||||
: _rd(std::move(rd)), _filter(std::forward<MutationFilter>(filter)) {
|
||||
GCC6_CONCEPT(
|
||||
requires requires(MutationFilter mf, const dht::decorated_key& dk) {
|
||||
{ mf(dk) } -> bool;
|
||||
}
|
||||
virtual future<streamed_mutation_opt> operator()() override {\
|
||||
return repeat([this] {
|
||||
return _rd().then([this] (streamed_mutation_opt&& mo) mutable {
|
||||
if (!mo) {
|
||||
_current = std::move(mo);
|
||||
return stop_iteration::yes;
|
||||
} else {
|
||||
if (_filter(*mo)) {
|
||||
_current = std::move(mo);
|
||||
return stop_iteration::yes;
|
||||
)
|
||||
class filtering_reader : public flat_mutation_reader::impl {
|
||||
flat_mutation_reader _rd;
|
||||
MutationFilter _filter;
|
||||
static_assert(std::is_same<bool, std::result_of_t<MutationFilter(const dht::decorated_key&)>>::value, "bad MutationFilter signature");
|
||||
public:
|
||||
filtering_reader(flat_mutation_reader rd, MutationFilter&& filter)
|
||||
: impl(rd.schema())
|
||||
, _rd(std::move(rd))
|
||||
, _filter(std::forward<MutationFilter>(filter)) {
|
||||
}
|
||||
virtual future<> fill_buffer() override {
|
||||
return do_until([this] { return is_buffer_full() || is_end_of_stream(); }, [this] {
|
||||
return _rd.fill_buffer().then([this] {
|
||||
while (!_rd.is_buffer_empty()) {
|
||||
auto mf = _rd.pop_mutation_fragment();
|
||||
if (mf.is_partition_start()) {
|
||||
auto& dk = mf.as_partition_start().key();
|
||||
if (!_filter(dk)) {
|
||||
_rd.next_partition();
|
||||
continue;
|
||||
}
|
||||
}
|
||||
return stop_iteration::no;
|
||||
push_mutation_fragment(std::move(mf));
|
||||
}
|
||||
_end_of_stream = _rd.is_end_of_stream();
|
||||
});
|
||||
}).then([this] {
|
||||
return make_ready_future<streamed_mutation_opt>(std::move(_current));
|
||||
});
|
||||
};
|
||||
}
|
||||
virtual void next_partition() override {
|
||||
clear_buffer_to_next_partition();
|
||||
if (is_buffer_empty()) {
|
||||
_end_of_stream = false;
|
||||
_rd.next_partition();
|
||||
}
|
||||
}
|
||||
virtual future<> fast_forward_to(const dht::partition_range& pr) override {
|
||||
clear_buffer();
|
||||
_end_of_stream = false;
|
||||
return _rd.fast_forward_to(pr);
|
||||
}
|
||||
virtual future<> fast_forward_to(position_range pr) override {
|
||||
forward_buffer_to(pr.start());
|
||||
_end_of_stream = false;
|
||||
return _rd.fast_forward_to(std::move(pr));
|
||||
}
|
||||
};
|
||||
|
||||
// Creates a mutation_reader wrapper which creates a new stream of mutations
|
||||
@@ -231,8 +434,8 @@ public:
|
||||
// accepts mutation const& and returns a bool. The mutation stays in the
|
||||
// stream if and only if the filter returns true.
|
||||
template <typename MutationFilter>
|
||||
mutation_reader make_filtering_reader(mutation_reader rd, MutationFilter&& filter) {
|
||||
return make_mutation_reader<filtering_reader<MutationFilter>>(std::move(rd), std::forward<MutationFilter>(filter));
|
||||
flat_mutation_reader make_filtering_reader(flat_mutation_reader rd, MutationFilter&& filter) {
|
||||
return make_flat_mutation_reader<filtering_reader<MutationFilter>>(std::move(rd), std::forward<MutationFilter>(filter));
|
||||
}
|
||||
|
||||
// Calls the consumer for each element of the reader's stream until end of stream
|
||||
@@ -501,7 +704,7 @@ struct restricted_mutation_reader_config {
|
||||
// a semaphore to track and limit the memory usage of readers. It also
|
||||
// contains a timeout and a maximum queue size for inactive readers
|
||||
// whose construction is blocked.
|
||||
mutation_reader make_restricted_reader(const restricted_mutation_reader_config& config,
|
||||
flat_mutation_reader make_restricted_flat_reader(const restricted_mutation_reader_config& config,
|
||||
mutation_source ms,
|
||||
schema_ptr s,
|
||||
const dht::partition_range& range,
|
||||
@@ -511,12 +714,12 @@ mutation_reader make_restricted_reader(const restricted_mutation_reader_config&
|
||||
streamed_mutation::forwarding fwd = streamed_mutation::forwarding::no,
|
||||
mutation_reader::forwarding fwd_mr = mutation_reader::forwarding::yes);
|
||||
|
||||
inline mutation_reader make_restricted_reader(const restricted_mutation_reader_config& config,
|
||||
mutation_source ms,
|
||||
schema_ptr s,
|
||||
const dht::partition_range& range = query::full_partition_range) {
|
||||
inline flat_mutation_reader make_restricted_flat_reader(const restricted_mutation_reader_config& config,
|
||||
mutation_source ms,
|
||||
schema_ptr s,
|
||||
const dht::partition_range& range = query::full_partition_range) {
|
||||
auto& full_slice = s->full_slice();
|
||||
return make_restricted_reader(config, std::move(ms), std::move(s), range, full_slice);
|
||||
return make_restricted_flat_reader(config, std::move(ms), std::move(s), range, full_slice);
|
||||
}
|
||||
|
||||
template<>
|
||||
@@ -547,87 +750,6 @@ future<stop_iteration> do_consume_streamed_mutation_flattened(streamed_mutation&
|
||||
return make_ready_future<stop_iteration>(c.consume_end_of_partition());
|
||||
}
|
||||
|
||||
template<typename Consumer>
|
||||
GCC6_CONCEPT(
|
||||
requires FlattenedConsumer<Consumer>()
|
||||
)
|
||||
auto consume_flattened(mutation_reader mr, Consumer&& c, bool reverse_mutations = false)
|
||||
{
|
||||
return do_with(std::move(mr), std::move(c), stdx::optional<streamed_mutation>(), [reverse_mutations] (auto& mr, auto& c, auto& sm) {
|
||||
return repeat([&, reverse_mutations] {
|
||||
return mr().then([&, reverse_mutations] (auto smopt) {
|
||||
if (!smopt) {
|
||||
return make_ready_future<stop_iteration>(stop_iteration::yes);
|
||||
}
|
||||
if (!reverse_mutations) {
|
||||
sm.emplace(std::move(*smopt));
|
||||
} else {
|
||||
sm.emplace(reverse_streamed_mutation(std::move(*smopt)));
|
||||
}
|
||||
c.consume_new_partition(sm->decorated_key());
|
||||
if (sm->partition_tombstone()) {
|
||||
c.consume(sm->partition_tombstone());
|
||||
}
|
||||
return do_consume_streamed_mutation_flattened(*sm, c);
|
||||
});
|
||||
}).then([&] {
|
||||
return c.consume_end_of_stream();
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
/*
|
||||
template<typename T>
|
||||
concept bool StreamedMutationFilter() {
|
||||
return requires(T obj, const streamed_mutation& sm) {
|
||||
{ filter(sm); } -> bool;
|
||||
};
|
||||
}
|
||||
*/
|
||||
// This version of consume_flattened() must be run inside a thread and
|
||||
// guarantees that all FlattenedConsumer functions will also be called in the same thread
|
||||
// context.
|
||||
template<typename FlattenedConsumer, typename StreamedMutationFilter>
|
||||
auto consume_flattened_in_thread(mutation_reader& mr, FlattenedConsumer& c, StreamedMutationFilter&& filter)
|
||||
{
|
||||
while (true) {
|
||||
auto smopt = mr().get0();
|
||||
if (!smopt) {
|
||||
break;
|
||||
}
|
||||
auto& sm = *smopt;
|
||||
if (!filter(sm)) {
|
||||
continue;
|
||||
}
|
||||
c.consume_new_partition(sm.decorated_key());
|
||||
if (sm.partition_tombstone()) {
|
||||
c.consume(sm.partition_tombstone());
|
||||
}
|
||||
do {
|
||||
if (sm.is_buffer_empty()) {
|
||||
if (sm.is_end_of_stream()) {
|
||||
break;
|
||||
}
|
||||
sm.fill_buffer().get0();
|
||||
} else {
|
||||
if (sm.pop_mutation_fragment().consume_streamed_mutation(c) == stop_iteration::yes) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
} while (true);
|
||||
if (c.consume_end_of_partition() == stop_iteration::yes) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return c.consume_end_of_stream();
|
||||
}
|
||||
|
||||
template<typename FlattenedConsumer>
|
||||
auto consume_flattened_in_thread(mutation_reader& mr, FlattenedConsumer& c)
|
||||
{
|
||||
return consume_flattened_in_thread(mr, c, [] (auto&&) { return true; });
|
||||
}
|
||||
|
||||
// Adapts a non-movable FlattenedConsumer to a movable one.
|
||||
template<typename FlattenedConsumer>
|
||||
class stable_flattened_mutations_consumer {
|
||||
@@ -647,3 +769,5 @@ template<typename FlattenedConsumer, typename... Args>
|
||||
stable_flattened_mutations_consumer<FlattenedConsumer> make_stable_flattened_mutations_consumer(Args&&... args) {
|
||||
return { std::make_unique<FlattenedConsumer>(std::forward<Args>(args)...) };
|
||||
}
|
||||
|
||||
future<streamed_mutation_opt> streamed_mutation_from_flat_mutation_reader(flat_mutation_reader&&);
|
||||
|
||||
@@ -213,6 +213,14 @@ void partition_entry::set_version(partition_version* new_version)
|
||||
_version = partition_version_ref(*new_version);
|
||||
}
|
||||
|
||||
partition_version& partition_entry::add_version(const schema& s) {
|
||||
auto new_version = current_allocator().construct<partition_version>(mutation_partition(s.shared_from_this()));
|
||||
new_version->partition().set_static_row_continuous(_version->partition().static_row_continuous());
|
||||
new_version->insert_before(*_version);
|
||||
set_version(new_version);
|
||||
return *new_version;
|
||||
}
|
||||
|
||||
void partition_entry::apply(const schema& s, const mutation_partition& mp, const schema& mp_schema)
|
||||
{
|
||||
if (!_snapshot) {
|
||||
@@ -555,13 +563,19 @@ lw_shared_ptr<partition_snapshot> partition_entry::read(logalloc::region& r,
|
||||
std::vector<range_tombstone>
|
||||
partition_snapshot::range_tombstones(const ::schema& s, position_in_partition_view start, position_in_partition_view end)
|
||||
{
|
||||
partition_version* v = &*version();
|
||||
if (!v->next()) {
|
||||
return boost::copy_range<std::vector<range_tombstone>>(
|
||||
v->partition().row_tombstones().slice(s, start, end));
|
||||
}
|
||||
range_tombstone_list list(s);
|
||||
for (auto&& v : versions()) {
|
||||
for (auto&& rt : v.partition().row_tombstones().slice(s, start, end)) {
|
||||
while (v) {
|
||||
for (auto&& rt : v->partition().row_tombstones().slice(s, start, end)) {
|
||||
list.apply(s, rt);
|
||||
}
|
||||
v = v->next();
|
||||
}
|
||||
return boost::copy_range<std::vector<range_tombstone>>(list);
|
||||
return boost::copy_range<std::vector<range_tombstone>>(list.slice(s, start, end));
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream& out, partition_entry& e) {
|
||||
|
||||
@@ -356,17 +356,15 @@ public:
|
||||
// succeeds the result will be as if the first attempt didn't fail.
|
||||
void apply_to_incomplete(const schema& s, partition_entry&& pe, const schema& pe_schema);
|
||||
|
||||
partition_version& add_version(const schema& s);
|
||||
|
||||
// Ensures that the latest version can be populated with data from given phase
|
||||
// by inserting a new version if necessary.
|
||||
// Doesn't affect value or continuity of the partition.
|
||||
// Returns a reference to the new latest version.
|
||||
partition_version& open_version(const schema& s, partition_snapshot::phase_type phase = partition_snapshot::max_phase) {
|
||||
if (_snapshot && _snapshot->_phase != phase) {
|
||||
auto new_version = current_allocator().construct<partition_version>(mutation_partition(s.shared_from_this()));
|
||||
new_version->partition().set_static_row_continuous(_version->partition().static_row_continuous());
|
||||
new_version->insert_before(*_version);
|
||||
set_version(new_version);
|
||||
return *new_version;
|
||||
return add_version(s);
|
||||
}
|
||||
return *_version;
|
||||
}
|
||||
|
||||
@@ -65,21 +65,30 @@ void range_tombstone_accumulator::update_current_tombstone() {
|
||||
}
|
||||
|
||||
void range_tombstone_accumulator::drop_unneeded_tombstones(const clustering_key_prefix& ck, int w) {
|
||||
auto cmp = [&] (bound_view bv, const clustering_key_prefix& ck, int w) {
|
||||
return _reversed ? _cmp(ck, w, bv.prefix, weight(bv.kind)) : _cmp(bv.prefix, weight(bv.kind), ck, w);
|
||||
auto cmp = [&] (const range_tombstone& rt, const clustering_key_prefix& ck, int w) {
|
||||
if (_reversed) {
|
||||
auto bv = rt.start_bound();
|
||||
return _cmp(ck, w, bv.prefix, weight(bv.kind));
|
||||
}
|
||||
auto bv = rt.end_bound();
|
||||
return _cmp(bv.prefix, weight(bv.kind), ck, w);
|
||||
};
|
||||
while (!_range_tombstones.empty() && cmp(_range_tombstones.begin()->end_bound(), ck, w)) {
|
||||
while (!_range_tombstones.empty() && cmp(*_range_tombstones.begin(), ck, w)) {
|
||||
_range_tombstones.pop_front();
|
||||
}
|
||||
update_current_tombstone();
|
||||
}
|
||||
|
||||
void range_tombstone_accumulator::apply(range_tombstone rt) {
|
||||
drop_unneeded_tombstones(rt.start, weight(rt.start_kind));
|
||||
if (_reversed) {
|
||||
drop_unneeded_tombstones(rt.end, weight(rt.end_kind));
|
||||
} else {
|
||||
drop_unneeded_tombstones(rt.start, weight(rt.start_kind));
|
||||
}
|
||||
_current_tombstone.apply(rt.tomb);
|
||||
|
||||
auto cmp = [&] (const range_tombstone& rt1, const range_tombstone& rt2) {
|
||||
return _reversed ? _cmp(rt2.end_bound(), rt1.end_bound()) : _cmp(rt1.end_bound(), rt2.end_bound());
|
||||
return _reversed ? _cmp(rt2.start_bound(), rt1.start_bound()) : _cmp(rt1.end_bound(), rt2.end_bound());
|
||||
};
|
||||
_range_tombstones.insert(boost::upper_bound(_range_tombstones, rt, cmp), std::move(rt));
|
||||
}
|
||||
|
||||
@@ -176,15 +176,6 @@ public:
|
||||
size_t memory_usage() const {
|
||||
return sizeof(range_tombstone) + external_memory_usage();
|
||||
}
|
||||
|
||||
// Flips start and end bound so that range tombstone can be used in reversed
|
||||
// streams.
|
||||
void flip() {
|
||||
std::swap(start, end);
|
||||
std::swap(start_kind, end_kind);
|
||||
start_kind = flip_bound_kind(start_kind);
|
||||
end_kind = flip_bound_kind(end_kind);
|
||||
}
|
||||
private:
|
||||
void move_assign(range_tombstone&& rt) {
|
||||
start = std::move(rt.start);
|
||||
@@ -202,11 +193,12 @@ private:
|
||||
}
|
||||
};
|
||||
|
||||
// This is a helper intended for accumulating tombstones from a streamed
|
||||
// mutation and determining what is the tombstone for a given clustering row.
|
||||
// The accumulator expects the incoming range tombstones and clustered rows to
|
||||
// follow the ordering used by the mutation readers.
|
||||
//
|
||||
// After apply(rt) or tombstone_for_row(ck) are called there are followng
|
||||
// restrictions for subsequent calls:
|
||||
// Unless the accumulator is in the reverse mode, after apply(rt) or
|
||||
// tombstone_for_row(ck) are called there are followng restrictions for
|
||||
// subsequent calls:
|
||||
// - apply(rt1) can be invoked only if rt.start_bound() < rt1.start_bound()
|
||||
// and ck < rt1.start_bound()
|
||||
// - tombstone_for_row(ck1) can be invoked only if rt.start_bound() < ck1
|
||||
@@ -214,6 +206,15 @@ private:
|
||||
//
|
||||
// In other words position in partition of the mutation fragments passed to the
|
||||
// accumulator must be increasing.
|
||||
//
|
||||
// If the accumulator was created with the reversed flag set it expects the
|
||||
// stream of the range tombstone to come from a reverse partitions and follow
|
||||
// the ordering that they use. In particular, the restrictions from non-reversed
|
||||
// mode change to:
|
||||
// - apply(rt1) can be invoked only if rt.end_bound() > rt1.end_bound() and
|
||||
// ck > rt1.end_bound()
|
||||
// - tombstone_for_row(ck1) can be invoked only if rt.end_bound() > ck1 and
|
||||
// ck > ck1.
|
||||
class range_tombstone_accumulator {
|
||||
bound_view::compare _cmp;
|
||||
tombstone _partition_tombstone;
|
||||
|
||||
@@ -124,6 +124,7 @@ void range_tombstone_list::insert_from(const schema& s,
|
||||
if (less(end_bound, it->end_bound())) {
|
||||
end = it->end;
|
||||
end_kind = it->end_kind;
|
||||
end_bound = bound_view(end, end_kind);
|
||||
}
|
||||
it = rev.erase(it);
|
||||
} else if (c > 0) {
|
||||
|
||||
100
read_context.hh
100
read_context.hh
@@ -31,13 +31,13 @@
|
||||
namespace cache {
|
||||
|
||||
/*
|
||||
* Represent a reader to the underlying source.
|
||||
* This reader automatically makes sure that it's up to date with all cache updates
|
||||
*/
|
||||
* Represent a flat reader to the underlying source.
|
||||
* This reader automatically makes sure that it's up to date with all cache updates
|
||||
*/
|
||||
class autoupdating_underlying_reader final {
|
||||
row_cache& _cache;
|
||||
read_context& _read_context;
|
||||
stdx::optional<mutation_reader> _reader;
|
||||
stdx::optional<flat_mutation_reader> _reader;
|
||||
utils::phased_barrier::phase_type _reader_creation_phase;
|
||||
dht::partition_range _range = { };
|
||||
stdx::optional<dht::decorated_key> _last_key;
|
||||
@@ -47,17 +47,7 @@ public:
|
||||
: _cache(cache)
|
||||
, _read_context(context)
|
||||
{ }
|
||||
// Reads next partition without changing mutation source snapshot.
|
||||
future<streamed_mutation_opt> read_next_same_phase() {
|
||||
_last_key = std::move(_new_last_key);
|
||||
return (*_reader)().then([this] (auto&& smopt) {
|
||||
if (smopt) {
|
||||
_new_last_key = smopt->decorated_key();
|
||||
}
|
||||
return std::move(smopt);
|
||||
});
|
||||
}
|
||||
future<streamed_mutation_opt> operator()() {
|
||||
future<mutation_fragment_opt> move_to_next_partition() {
|
||||
_last_key = std::move(_new_last_key);
|
||||
auto start = population_range_start();
|
||||
auto phase = _cache.phase_of(start);
|
||||
@@ -66,7 +56,8 @@ public:
|
||||
auto cmp = dht::ring_position_comparator(*_cache._schema);
|
||||
auto&& new_range = _range.split_after(*_last_key, cmp);
|
||||
if (!new_range) {
|
||||
return make_ready_future<streamed_mutation_opt>(streamed_mutation_opt());
|
||||
_reader = {};
|
||||
return make_ready_future<mutation_fragment_opt>();
|
||||
}
|
||||
_range = std::move(*new_range);
|
||||
_last_key = {};
|
||||
@@ -79,11 +70,17 @@ public:
|
||||
_reader = _cache.create_underlying_reader(_read_context, snap, _range);
|
||||
_reader_creation_phase = phase;
|
||||
}
|
||||
return (*_reader)().then([this] (auto&& smopt) {
|
||||
if (smopt) {
|
||||
_new_last_key = smopt->decorated_key();
|
||||
_reader->next_partition();
|
||||
|
||||
if (_reader->is_end_of_stream() && _reader->is_buffer_empty()) {
|
||||
return make_ready_future<mutation_fragment_opt>();
|
||||
}
|
||||
return (*_reader)().then([this] (auto&& mfopt) {
|
||||
if (mfopt) {
|
||||
assert(mfopt->is_partition_start());
|
||||
_new_last_key = mfopt->as_partition_start().key();
|
||||
}
|
||||
return std::move(smopt);
|
||||
return std::move(mfopt);
|
||||
});
|
||||
}
|
||||
future<> fast_forward_to(dht::partition_range&& range) {
|
||||
@@ -114,6 +111,7 @@ public:
|
||||
const dht::partition_range& range() const {
|
||||
return _range;
|
||||
}
|
||||
flat_mutation_reader& underlying() { return *_reader; }
|
||||
dht::ring_position_view population_range_start() const {
|
||||
return _last_key ? dht::ring_position_view::for_after_key(*_last_key)
|
||||
: dht::ring_position_view::for_range_start(_range);
|
||||
@@ -130,19 +128,17 @@ class read_context final : public enable_lw_shared_from_this<read_context> {
|
||||
streamed_mutation::forwarding _fwd;
|
||||
mutation_reader::forwarding _fwd_mr;
|
||||
bool _range_query;
|
||||
// When reader enters a partition, it must be set up for reading that
|
||||
// partition from the underlying mutation source (_underlying) in one of two ways:
|
||||
//
|
||||
// 1) either _underlying is already in that partition
|
||||
//
|
||||
// 2) _underlying is before the partition, then _underlying_snapshot and _key
|
||||
// are set so that _underlying_flat can be fast forwarded to the right partition.
|
||||
//
|
||||
autoupdating_underlying_reader _underlying;
|
||||
uint64_t _underlying_created = 0;
|
||||
|
||||
// When reader enters a partition, it must be set up for reading that
|
||||
// partition from the underlying mutation source (_sm) in one of two ways:
|
||||
//
|
||||
// 1) either _underlying is already in that partition, then _sm is set to the
|
||||
// stream obtained from it.
|
||||
//
|
||||
// 2) _underlying is before the partition, then _underlying_snapshot and _key
|
||||
// are set so that _sm can be created on demand.
|
||||
//
|
||||
streamed_mutation_opt _sm;
|
||||
mutation_source_opt _underlying_snapshot;
|
||||
dht::partition_range _sm_range;
|
||||
stdx::optional<dht::decorated_key> _key;
|
||||
@@ -168,6 +164,9 @@ public:
|
||||
, _underlying(_cache, *this)
|
||||
{
|
||||
++_cache._tracker._stats.reads;
|
||||
if (range.is_singular() && range.start()->value().has_key()) {
|
||||
_key = range.start()->value().as_decorated_key();
|
||||
}
|
||||
}
|
||||
~read_context() {
|
||||
++_cache._tracker._stats.reads_done;
|
||||
@@ -190,52 +189,37 @@ public:
|
||||
bool is_range_query() const { return _range_query; }
|
||||
autoupdating_underlying_reader& underlying() { return _underlying; }
|
||||
row_cache::phase_type phase() const { return _phase; }
|
||||
const dht::decorated_key& key() const { return _key ? *_key : _sm->decorated_key(); }
|
||||
const dht::decorated_key& key() const { return *_key; }
|
||||
void on_underlying_created() { ++_underlying_created; }
|
||||
private:
|
||||
future<> create_sm();
|
||||
future<> ensure_sm_created() {
|
||||
if (_sm) {
|
||||
return make_ready_future<>();
|
||||
future<> ensure_underlying() {
|
||||
if (_underlying_snapshot) {
|
||||
return create_underlying(true);
|
||||
}
|
||||
return create_sm();
|
||||
return make_ready_future<>();
|
||||
}
|
||||
public:
|
||||
// Prepares the underlying streamed_mutation to represent dk in given snapshot.
|
||||
// Partitions must be entered with strictly monotonic keys.
|
||||
// The key must be after the current range of the underlying() reader.
|
||||
// The phase argument must match the snapshot's phase.
|
||||
future<> create_underlying(bool skip_first_fragment);
|
||||
void enter_partition(const dht::decorated_key& dk, mutation_source& snapshot, row_cache::phase_type phase) {
|
||||
_phase = phase;
|
||||
_sm = {};
|
||||
_underlying_snapshot = snapshot;
|
||||
_key = dk;
|
||||
}
|
||||
// Prepares the underlying streamed_mutation to be sm.
|
||||
// The phase argument must match the phase of the snapshot used to obtain sm.
|
||||
void enter_partition(streamed_mutation&& sm, row_cache::phase_type phase) {
|
||||
void enter_partition(const dht::decorated_key& dk, row_cache::phase_type phase) {
|
||||
_phase = phase;
|
||||
_sm = std::move(sm);
|
||||
_underlying_snapshot = {};
|
||||
_key = dk;
|
||||
}
|
||||
// Fast forwards the underlying streamed_mutation to given range.
|
||||
future<> fast_forward_to(position_range range) {
|
||||
return ensure_sm_created().then([this, range = std::move(range)] () mutable {
|
||||
++_cache._tracker._stats.underlying_row_skips;
|
||||
return _sm->fast_forward_to(std::move(range));
|
||||
return ensure_underlying().then([this, range = std::move(range)] {
|
||||
return _underlying.underlying().fast_forward_to(std::move(range));
|
||||
});
|
||||
}
|
||||
// Returns the underlying streamed_mutation.
|
||||
// The caller has to ensure that the streamed mutation was already created
|
||||
// (e.g. the most recent call to enter_partition(const dht::decorated_key&, ...)
|
||||
// was followed by a call to fast_forward_to()).
|
||||
streamed_mutation& get_streamed_mutation() noexcept {
|
||||
return *_sm;
|
||||
}
|
||||
// Gets the next fragment from the underlying streamed_mutation
|
||||
// Gets the next fragment from the underlying reader
|
||||
future<mutation_fragment_opt> get_next_fragment() {
|
||||
return ensure_sm_created().then([this] {
|
||||
return (*_sm)();
|
||||
return ensure_underlying().then([this] {
|
||||
return _underlying.underlying()();
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
328
row_cache.cc
328
row_cache.cc
@@ -32,10 +32,10 @@
|
||||
#include <boost/version.hpp>
|
||||
#include <sys/sdt.h>
|
||||
#include "stdx.hh"
|
||||
#include "cache_streamed_mutation.hh"
|
||||
#include "read_context.hh"
|
||||
#include "schema_upgrader.hh"
|
||||
#include "dirty_memory_manager.hh"
|
||||
#include "cache_flat_mutation_reader.hh"
|
||||
|
||||
namespace cache {
|
||||
|
||||
@@ -48,10 +48,10 @@ using namespace cache;
|
||||
|
||||
thread_local seastar::thread_scheduling_group row_cache::_update_thread_scheduling_group(1ms, 0.2);
|
||||
|
||||
mutation_reader
|
||||
flat_mutation_reader
|
||||
row_cache::create_underlying_reader(read_context& ctx, mutation_source& src, const dht::partition_range& pr) {
|
||||
ctx.on_underlying_created();
|
||||
return src(_schema, pr, ctx.slice(), ctx.pc(), ctx.trace_state(), streamed_mutation::forwarding::yes);
|
||||
return src.make_flat_mutation_reader(_schema, pr, ctx.slice(), ctx.pc(), ctx.trace_state(), streamed_mutation::forwarding::yes);
|
||||
}
|
||||
|
||||
cache_tracker& global_cache_tracker() {
|
||||
@@ -311,7 +311,7 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
future<> read_context::create_sm() {
|
||||
future<> read_context::create_underlying(bool skip_first_fragment) {
|
||||
if (_range_query) {
|
||||
// FIXME: Singular-range mutation readers don't support fast_forward_to(), so need to use a wide range
|
||||
// here in case the same reader will need to be fast forwarded later.
|
||||
@@ -319,53 +319,44 @@ future<> read_context::create_sm() {
|
||||
} else {
|
||||
_sm_range = dht::partition_range::make_singular({dht::ring_position(*_key)});
|
||||
}
|
||||
return _underlying.fast_forward_to(std::move(_sm_range), *_underlying_snapshot, _phase).then([this] {
|
||||
return _underlying.read_next_same_phase().then([this] (auto&& smo) {
|
||||
if (!smo) {
|
||||
_sm = make_empty_streamed_mutation(_cache.schema(), *_key, streamed_mutation::forwarding::yes);
|
||||
} else {
|
||||
_sm = std::move(*smo);
|
||||
}
|
||||
});
|
||||
return _underlying.fast_forward_to(std::move(_sm_range), *_underlying_snapshot, _phase).then([this, skip_first_fragment] {
|
||||
_underlying_snapshot = {};
|
||||
if (skip_first_fragment) {
|
||||
return _underlying.underlying()().then([](auto &&mf) {});
|
||||
} else {
|
||||
return make_ready_future<>();
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
static streamed_mutation read_directly_from_underlying(streamed_mutation&& sm, read_context& reader) {
|
||||
if (reader.schema()->version() != sm.schema()->version()) {
|
||||
sm = transform(std::move(sm), schema_upgrader(reader.schema()));
|
||||
static flat_mutation_reader read_directly_from_underlying(read_context& reader) {
|
||||
flat_mutation_reader res = make_delegating_reader(reader.underlying().underlying());
|
||||
if (reader.schema()->version() != reader.underlying().underlying().schema()->version()) {
|
||||
res = transform(std::move(res), schema_upgrader(reader.schema()));
|
||||
}
|
||||
if (reader.fwd() == streamed_mutation::forwarding::no) {
|
||||
sm = streamed_mutation_from_forwarding_streamed_mutation(std::move(sm));
|
||||
res = make_nonforwardable(std::move(res), true);
|
||||
}
|
||||
return std::move(sm);
|
||||
return std::move(res);
|
||||
}
|
||||
|
||||
// Reader which populates the cache using data from the delegate.
|
||||
class single_partition_populating_reader final : public mutation_reader::impl {
|
||||
class single_partition_populating_reader final : public flat_mutation_reader::impl {
|
||||
row_cache& _cache;
|
||||
mutation_reader _delegate;
|
||||
lw_shared_ptr<read_context> _read_context;
|
||||
public:
|
||||
single_partition_populating_reader(row_cache& cache,
|
||||
lw_shared_ptr<read_context> context)
|
||||
: _cache(cache)
|
||||
, _read_context(std::move(context))
|
||||
{ }
|
||||
|
||||
virtual future<streamed_mutation_opt> operator()() override {
|
||||
if (!_read_context) {
|
||||
return make_ready_future<streamed_mutation_opt>(streamed_mutation_opt());
|
||||
}
|
||||
flat_mutation_reader_opt _reader;
|
||||
private:
|
||||
future<> create_reader() {
|
||||
auto src_and_phase = _cache.snapshot_of(_read_context->range().start()->value());
|
||||
auto phase = src_and_phase.phase;
|
||||
_delegate = _cache.create_underlying_reader(*_read_context, src_and_phase.snapshot, _read_context->range());
|
||||
return _delegate().then([this, phase] (auto sm) mutable -> streamed_mutation_opt {
|
||||
auto ctx = std::move(_read_context);
|
||||
if (!sm) {
|
||||
if (phase == _cache.phase_of(ctx->range().start()->value())) {
|
||||
_cache._read_section(_cache._tracker.region(), [this, ctx = std::move(ctx)] {
|
||||
with_allocator(_cache._tracker.allocator(), [this, &ctx] {
|
||||
dht::decorated_key dk = ctx->range().start()->value().as_decorated_key();
|
||||
_read_context->enter_partition(_read_context->range().start()->value().as_decorated_key(), src_and_phase.snapshot, phase);
|
||||
return _read_context->create_underlying(false).then([this, phase] {
|
||||
return _read_context->underlying().underlying()().then([this, phase] (auto&& mfopt) {
|
||||
if (!mfopt) {
|
||||
if (phase == _cache.phase_of(_read_context->range().start()->value())) {
|
||||
_cache._read_section(_cache._tracker.region(), [this] {
|
||||
with_allocator(_cache._tracker.allocator(), [this] {
|
||||
dht::decorated_key dk = _read_context->range().start()->value().as_decorated_key();
|
||||
_cache.do_find_or_create_entry(dk, nullptr, [&] (auto i) {
|
||||
mutation_partition mp(_cache._schema);
|
||||
cache_entry* entry = current_allocator().construct<cache_entry>(
|
||||
@@ -381,19 +372,66 @@ public:
|
||||
} else {
|
||||
_cache._tracker.on_mispopulate();
|
||||
}
|
||||
return std::move(sm);
|
||||
}
|
||||
if (phase == _cache.phase_of(ctx->range().start()->value())) {
|
||||
return _cache._read_section(_cache._tracker.region(), [&] {
|
||||
cache_entry& e = _cache.find_or_create(sm->decorated_key(), sm->partition_tombstone(), phase);
|
||||
return e.read(_cache, *ctx, std::move(*sm), phase);
|
||||
_end_of_stream = true;
|
||||
} else if (phase == _cache.phase_of(_read_context->range().start()->value())) {
|
||||
_reader = _cache._read_section(_cache._tracker.region(), [&] {
|
||||
cache_entry& e = _cache.find_or_create(mfopt->as_partition_start().key(), mfopt->as_partition_start().partition_tombstone(), phase);
|
||||
return e.read(_cache, *_read_context, phase);
|
||||
});
|
||||
} else {
|
||||
_cache._tracker.on_mispopulate();
|
||||
return read_directly_from_underlying(std::move(*sm), *ctx);
|
||||
_reader = read_directly_from_underlying(*_read_context);
|
||||
push_mutation_fragment(std::move(*mfopt));
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
public:
|
||||
single_partition_populating_reader(row_cache& cache,
|
||||
lw_shared_ptr<read_context> context)
|
||||
: impl(context->schema())
|
||||
, _cache(cache)
|
||||
, _read_context(std::move(context))
|
||||
{ }
|
||||
|
||||
virtual future<> fill_buffer() override {
|
||||
if (!_reader) {
|
||||
return create_reader().then([this] {
|
||||
if (_end_of_stream) {
|
||||
return make_ready_future<>();
|
||||
}
|
||||
return fill_buffer();
|
||||
});
|
||||
}
|
||||
return do_until([this] { return is_end_of_stream() || is_buffer_full(); }, [this] {
|
||||
return fill_buffer_from(*_reader).then([this] (bool reader_finished) {
|
||||
if (reader_finished) {
|
||||
_end_of_stream = true;
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
virtual void next_partition() override {
|
||||
if (_reader) {
|
||||
clear_buffer();
|
||||
_end_of_stream = true;
|
||||
}
|
||||
}
|
||||
virtual future<> fast_forward_to(const dht::partition_range&) override {
|
||||
clear_buffer();
|
||||
_end_of_stream = true;
|
||||
return make_ready_future<>();
|
||||
}
|
||||
virtual future<> fast_forward_to(position_range pr) override {
|
||||
if (!_reader) {
|
||||
_end_of_stream = true;
|
||||
return make_ready_future<>();
|
||||
}
|
||||
assert(bool(_read_context->fwd()));
|
||||
_end_of_stream = false;
|
||||
forward_buffer_to(pr.start());
|
||||
return _reader->fast_forward_to(std::move(pr));
|
||||
}
|
||||
};
|
||||
|
||||
void cache_tracker::clear_continuity(cache_entry& ce) {
|
||||
@@ -469,25 +507,31 @@ public:
|
||||
, _read_context(ctx)
|
||||
{}
|
||||
|
||||
future<streamed_mutation_opt> operator()() {
|
||||
return _reader().then([this] (streamed_mutation_opt smopt) mutable -> streamed_mutation_opt {
|
||||
future<flat_mutation_reader_opt, mutation_fragment_opt > operator()() {
|
||||
return _reader.move_to_next_partition().then([this] (auto&& mfopt) mutable {
|
||||
{
|
||||
if (!smopt) {
|
||||
if (!mfopt) {
|
||||
handle_end_of_stream();
|
||||
return std::move(smopt);
|
||||
return make_ready_future<flat_mutation_reader_opt, mutation_fragment_opt>(stdx::nullopt, stdx::nullopt);
|
||||
}
|
||||
_cache.on_partition_miss();
|
||||
if (_reader.creation_phase() == _cache.phase_of(smopt->decorated_key())) {
|
||||
const partition_start& ps = mfopt->as_partition_start();
|
||||
const dht::decorated_key& key = ps.key();
|
||||
if (_reader.creation_phase() == _cache.phase_of(key)) {
|
||||
return _cache._read_section(_cache._tracker.region(), [&] {
|
||||
cache_entry& e = _cache.find_or_create(smopt->decorated_key(), smopt->partition_tombstone(), _reader.creation_phase(),
|
||||
can_set_continuity() ? &*_last_key : nullptr);
|
||||
_last_key = row_cache::previous_entry_pointer(smopt->decorated_key());
|
||||
return e.read(_cache, _read_context, std::move(*smopt), _reader.creation_phase());
|
||||
cache_entry& e = _cache.find_or_create(key,
|
||||
ps.partition_tombstone(),
|
||||
_reader.creation_phase(),
|
||||
can_set_continuity() ? &*_last_key : nullptr);
|
||||
_last_key = row_cache::previous_entry_pointer(key);
|
||||
return make_ready_future<flat_mutation_reader_opt, mutation_fragment_opt>(
|
||||
e.read(_cache, _read_context, _reader.creation_phase()), stdx::nullopt);
|
||||
});
|
||||
} else {
|
||||
_cache._tracker.on_mispopulate();
|
||||
_last_key = row_cache::previous_entry_pointer(smopt->decorated_key());
|
||||
return read_directly_from_underlying(std::move(*smopt), _read_context);
|
||||
_last_key = row_cache::previous_entry_pointer(key);
|
||||
return make_ready_future<flat_mutation_reader_opt, mutation_fragment_opt>(
|
||||
read_directly_from_underlying(_read_context), std::move(mfopt));
|
||||
}
|
||||
}
|
||||
});
|
||||
@@ -507,7 +551,7 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
class scanning_and_populating_reader final : public mutation_reader::impl {
|
||||
class scanning_and_populating_reader final : public flat_mutation_reader::impl {
|
||||
const dht::partition_range* _pr;
|
||||
row_cache& _cache;
|
||||
lw_shared_ptr<read_context> _read_context;
|
||||
@@ -517,8 +561,9 @@ class scanning_and_populating_reader final : public mutation_reader::impl {
|
||||
bool _advance_primary = false;
|
||||
stdx::optional<dht::partition_range::bound> _lower_bound;
|
||||
dht::partition_range _secondary_range;
|
||||
flat_mutation_reader_opt _reader;
|
||||
private:
|
||||
streamed_mutation read_from_entry(cache_entry& ce) {
|
||||
flat_mutation_reader read_from_entry(cache_entry& ce) {
|
||||
_cache.upgrade_entry(ce);
|
||||
_cache._tracker.touch(ce);
|
||||
_cache.on_partition_hit();
|
||||
@@ -530,9 +575,9 @@ private:
|
||||
: dht::ring_position_view::min();
|
||||
}
|
||||
|
||||
streamed_mutation_opt do_read_from_primary() {
|
||||
flat_mutation_reader_opt do_read_from_primary() {
|
||||
return _cache._read_section(_cache._tracker.region(), [this] {
|
||||
return with_linearized_managed_bytes([&] () -> streamed_mutation_opt {
|
||||
return with_linearized_managed_bytes([&] () -> flat_mutation_reader_opt {
|
||||
bool not_moved = true;
|
||||
if (!_primary.valid()) {
|
||||
not_moved = _primary.advance_to(as_ring_position_view(_lower_bound));
|
||||
@@ -549,11 +594,11 @@ private:
|
||||
return stdx::nullopt;
|
||||
}
|
||||
cache_entry& e = _primary.entry();
|
||||
auto sm = read_from_entry(e);
|
||||
auto fr = read_from_entry(e);
|
||||
_lower_bound = dht::partition_range::bound{e.key(), false};
|
||||
// Delay the call to next() so that we don't see stale continuity on next invocation.
|
||||
_advance_primary = true;
|
||||
return streamed_mutation_opt(std::move(sm));
|
||||
return flat_mutation_reader_opt(std::move(fr));
|
||||
} else {
|
||||
if (_primary.in_range()) {
|
||||
cache_entry& e = _primary.entry();
|
||||
@@ -578,47 +623,88 @@ private:
|
||||
});
|
||||
}
|
||||
|
||||
future<streamed_mutation_opt> read_from_primary() {
|
||||
auto smo = do_read_from_primary();
|
||||
future<flat_mutation_reader_opt> read_from_primary() {
|
||||
auto fro = do_read_from_primary();
|
||||
if (!_secondary_in_progress) {
|
||||
return make_ready_future<streamed_mutation_opt>(std::move(smo));
|
||||
return make_ready_future<flat_mutation_reader_opt>(std::move(fro));
|
||||
}
|
||||
return _secondary_reader.fast_forward_to(std::move(_secondary_range)).then([this] {
|
||||
return read_from_secondary();
|
||||
});
|
||||
}
|
||||
|
||||
future<streamed_mutation_opt> read_from_secondary() {
|
||||
return _secondary_reader().then([this] (streamed_mutation_opt smopt) {
|
||||
if (smopt) {
|
||||
return make_ready_future<streamed_mutation_opt>(std::move(smopt));
|
||||
future<flat_mutation_reader_opt> read_from_secondary() {
|
||||
return _secondary_reader().then([this] (flat_mutation_reader_opt fropt, mutation_fragment_opt ps) {
|
||||
if (fropt) {
|
||||
if (ps) {
|
||||
push_mutation_fragment(std::move(*ps));
|
||||
}
|
||||
return make_ready_future<flat_mutation_reader_opt>(std::move(fropt));
|
||||
} else {
|
||||
_secondary_in_progress = false;
|
||||
return read_from_primary();
|
||||
}
|
||||
});
|
||||
}
|
||||
future<> read_next_partition() {
|
||||
return (_secondary_in_progress ? read_from_secondary() : read_from_primary()).then([this] (auto&& fropt) {
|
||||
if (bool(fropt)) {
|
||||
_reader = std::move(fropt);
|
||||
} else {
|
||||
_end_of_stream = true;
|
||||
}
|
||||
});
|
||||
}
|
||||
void on_end_of_stream() {
|
||||
if (_read_context->fwd() == streamed_mutation::forwarding::yes) {
|
||||
_end_of_stream = true;
|
||||
} else {
|
||||
_reader = {};
|
||||
}
|
||||
}
|
||||
public:
|
||||
scanning_and_populating_reader(row_cache& cache,
|
||||
const dht::partition_range& range,
|
||||
lw_shared_ptr<read_context> context)
|
||||
: _pr(&range)
|
||||
: impl(context->schema())
|
||||
, _pr(&range)
|
||||
, _cache(cache)
|
||||
, _read_context(std::move(context))
|
||||
, _primary(cache, range)
|
||||
, _secondary_reader(cache, *_read_context)
|
||||
, _lower_bound(range.start())
|
||||
{ }
|
||||
|
||||
future<streamed_mutation_opt> operator()() {
|
||||
if (_secondary_in_progress) {
|
||||
return read_from_secondary();
|
||||
virtual future<> fill_buffer() override {
|
||||
return do_until([this] { return is_end_of_stream() || is_buffer_full(); }, [this] {
|
||||
if (!_reader) {
|
||||
return read_next_partition();
|
||||
} else {
|
||||
return fill_buffer_from(*_reader).then([this] (bool reader_finished) {
|
||||
if (reader_finished) {
|
||||
on_end_of_stream();
|
||||
}
|
||||
});
|
||||
}
|
||||
});
|
||||
}
|
||||
virtual void next_partition() override {
|
||||
if (_read_context->fwd() == streamed_mutation::forwarding::yes) {
|
||||
if (_reader) {
|
||||
clear_buffer();
|
||||
_reader->next_partition();
|
||||
_end_of_stream = false;
|
||||
}
|
||||
} else {
|
||||
return read_from_primary();
|
||||
clear_buffer_to_next_partition();
|
||||
if (_reader && is_buffer_empty()) {
|
||||
_reader->next_partition();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
future<> fast_forward_to(const dht::partition_range& pr) {
|
||||
virtual future<> fast_forward_to(const dht::partition_range& pr) override {
|
||||
clear_buffer();
|
||||
_reader = {};
|
||||
_end_of_stream = false;
|
||||
_secondary_in_progress = false;
|
||||
_advance_primary = false;
|
||||
_pr = ≺
|
||||
@@ -626,11 +712,21 @@ public:
|
||||
_lower_bound = pr.start();
|
||||
return make_ready_future<>();
|
||||
}
|
||||
virtual future<> fast_forward_to(position_range cr) override {
|
||||
forward_buffer_to(cr.start());
|
||||
if (_reader) {
|
||||
_end_of_stream = false;
|
||||
return _reader->fast_forward_to(std::move(cr));
|
||||
} else {
|
||||
_end_of_stream = true;
|
||||
return make_ready_future<>();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
mutation_reader
|
||||
flat_mutation_reader
|
||||
row_cache::make_scanning_reader(const dht::partition_range& range, lw_shared_ptr<read_context> context) {
|
||||
return make_mutation_reader<scanning_and_populating_reader>(*this, range, std::move(context));
|
||||
return make_flat_mutation_reader<scanning_and_populating_reader>(*this, range, std::move(context));
|
||||
}
|
||||
|
||||
mutation_reader
|
||||
@@ -642,33 +738,47 @@ row_cache::make_reader(schema_ptr s,
|
||||
streamed_mutation::forwarding fwd,
|
||||
mutation_reader::forwarding fwd_mr)
|
||||
{
|
||||
auto ctx = make_lw_shared<read_context>(*this, std::move(s), range, slice, pc, trace_state, fwd, fwd_mr);
|
||||
return mutation_reader_from_flat_mutation_reader(
|
||||
make_flat_reader(std::move(s), range, slice, pc, std::move(trace_state), fwd, fwd_mr));
|
||||
}
|
||||
|
||||
flat_mutation_reader
|
||||
row_cache::make_flat_reader(schema_ptr s,
|
||||
const dht::partition_range& range,
|
||||
const query::partition_slice& slice,
|
||||
const io_priority_class& pc,
|
||||
tracing::trace_state_ptr trace_state,
|
||||
streamed_mutation::forwarding fwd,
|
||||
mutation_reader::forwarding fwd_mr)
|
||||
{
|
||||
auto ctx = make_lw_shared<read_context>(*this, s, range, slice, pc, trace_state, fwd, fwd_mr);
|
||||
|
||||
if (!ctx->is_range_query()) {
|
||||
return _read_section(_tracker.region(), [&] {
|
||||
return with_linearized_managed_bytes([&] {
|
||||
cache_entry::compare cmp(_schema);
|
||||
auto&& pos = ctx->range().start()->value();
|
||||
auto i = _partitions.lower_bound(pos, cmp);
|
||||
if (i != _partitions.end() && !cmp(pos, i->position())) {
|
||||
cache_entry& e = *i;
|
||||
_tracker.touch(e);
|
||||
upgrade_entry(e);
|
||||
on_partition_hit();
|
||||
return make_reader_returning(e.read(*this, *ctx));
|
||||
} else if (i->continuous()) {
|
||||
return make_empty_reader();
|
||||
} else {
|
||||
on_partition_miss();
|
||||
return make_mutation_reader<single_partition_populating_reader>(*this, std::move(ctx));
|
||||
}
|
||||
});
|
||||
return with_linearized_managed_bytes([&] {
|
||||
cache_entry::compare cmp(_schema);
|
||||
auto&& pos = ctx->range().start()->value();
|
||||
auto i = _partitions.lower_bound(pos, cmp);
|
||||
if (i != _partitions.end() && !cmp(pos, i->position())) {
|
||||
cache_entry& e = *i;
|
||||
_tracker.touch(e);
|
||||
upgrade_entry(e);
|
||||
on_partition_hit();
|
||||
return e.read(*this, *ctx);
|
||||
} else if (i->continuous()) {
|
||||
return make_empty_flat_reader(std::move(s));
|
||||
} else {
|
||||
on_partition_miss();
|
||||
return make_flat_mutation_reader<single_partition_populating_reader>(*this, std::move(ctx));
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
return make_scanning_reader(range, std::move(ctx));
|
||||
}
|
||||
|
||||
|
||||
row_cache::~row_cache() {
|
||||
with_allocator(_tracker.allocator(), [this] {
|
||||
_partitions.clear_and_dispose([this, deleter = current_deleter<cache_entry>()] (auto&& p) mutable {
|
||||
@@ -1063,35 +1173,29 @@ void row_cache::set_schema(schema_ptr new_schema) noexcept {
|
||||
_schema = std::move(new_schema);
|
||||
}
|
||||
|
||||
streamed_mutation cache_entry::read(row_cache& rc, read_context& reader) {
|
||||
flat_mutation_reader cache_entry::read(row_cache& rc, read_context& reader) {
|
||||
auto source_and_phase = rc.snapshot_of(_key);
|
||||
reader.enter_partition(_key, source_and_phase.snapshot, source_and_phase.phase);
|
||||
return do_read(rc, reader);
|
||||
}
|
||||
|
||||
streamed_mutation cache_entry::read(row_cache& rc, read_context& reader,
|
||||
streamed_mutation&& sm, row_cache::phase_type phase) {
|
||||
reader.enter_partition(std::move(sm), phase);
|
||||
try {
|
||||
return do_read(rc, reader);
|
||||
} catch (...) {
|
||||
sm = std::move(reader.get_streamed_mutation());
|
||||
throw;
|
||||
}
|
||||
flat_mutation_reader cache_entry::read(row_cache& rc, read_context& reader, row_cache::phase_type phase) {
|
||||
reader.enter_partition(_key, phase);
|
||||
return do_read(rc, reader);
|
||||
}
|
||||
|
||||
// Assumes reader is in the corresponding partition
|
||||
streamed_mutation cache_entry::do_read(row_cache& rc, read_context& reader) {
|
||||
flat_mutation_reader cache_entry::do_read(row_cache& rc, read_context& reader) {
|
||||
auto snp = _pe.read(rc._tracker.region(), _schema, reader.phase());
|
||||
auto ckr = query::clustering_key_filter_ranges::get_ranges(*_schema, reader.slice(), _key.key());
|
||||
auto sm = make_cache_streamed_mutation(_schema, _key, std::move(ckr), rc, reader.shared_from_this(), std::move(snp));
|
||||
auto r = make_cache_flat_mutation_reader(_schema, _key, std::move(ckr), rc, reader.shared_from_this(), std::move(snp));
|
||||
if (reader.schema()->version() != _schema->version()) {
|
||||
sm = transform(std::move(sm), schema_upgrader(reader.schema()));
|
||||
r = transform(std::move(r), schema_upgrader(reader.schema()));
|
||||
}
|
||||
if (reader.fwd() == streamed_mutation::forwarding::yes) {
|
||||
sm = make_forwardable(std::move(sm));
|
||||
r = make_forwardable(std::move(r));
|
||||
}
|
||||
return std::move(sm);
|
||||
return std::move(r);
|
||||
}
|
||||
|
||||
const schema_ptr& row_cache::schema() const {
|
||||
|
||||
70
row_cache.hh
70
row_cache.hh
@@ -36,6 +36,7 @@
|
||||
#include "utils/estimated_histogram.hh"
|
||||
#include "tracing/trace_state.hh"
|
||||
#include <seastar/core/metrics_registration.hh>
|
||||
#include "flat_mutation_reader.hh"
|
||||
|
||||
namespace bi = boost::intrusive;
|
||||
|
||||
@@ -46,6 +47,7 @@ namespace cache {
|
||||
|
||||
class autoupdating_underlying_reader;
|
||||
class cache_streamed_mutation;
|
||||
class cache_flat_mutation_reader;
|
||||
class read_context;
|
||||
class lsa_manager;
|
||||
|
||||
@@ -76,7 +78,7 @@ class cache_entry {
|
||||
cache_link_type _cache_link;
|
||||
friend class size_calculator;
|
||||
|
||||
streamed_mutation do_read(row_cache&, cache::read_context& reader);
|
||||
flat_mutation_reader do_read(row_cache&, cache::read_context& reader);
|
||||
public:
|
||||
friend class row_cache;
|
||||
friend class cache_tracker;
|
||||
@@ -138,8 +140,8 @@ public:
|
||||
partition_entry& partition() { return _pe; }
|
||||
const schema_ptr& schema() const { return _schema; }
|
||||
schema_ptr& schema() { return _schema; }
|
||||
streamed_mutation read(row_cache&, cache::read_context& reader);
|
||||
streamed_mutation read(row_cache&, cache::read_context& reader, streamed_mutation&& underlying, utils::phased_barrier::phase_type);
|
||||
flat_mutation_reader read(row_cache&, cache::read_context&);
|
||||
flat_mutation_reader read(row_cache&, cache::read_context&, utils::phased_barrier::phase_type);
|
||||
bool continuous() const { return _flags._continuous; }
|
||||
void set_continuous(bool value) { _flags._continuous = value; }
|
||||
|
||||
@@ -191,6 +193,7 @@ public:
|
||||
friend class cache::read_context;
|
||||
friend class cache::autoupdating_underlying_reader;
|
||||
friend class cache::cache_streamed_mutation;
|
||||
friend class cache::cache_flat_mutation_reader;
|
||||
struct stats {
|
||||
uint64_t partition_hits;
|
||||
uint64_t partition_misses;
|
||||
@@ -272,6 +275,7 @@ public:
|
||||
friend class single_partition_populating_reader;
|
||||
friend class cache_entry;
|
||||
friend class cache::cache_streamed_mutation;
|
||||
friend class cache::cache_flat_mutation_reader;
|
||||
friend class cache::lsa_manager;
|
||||
friend class cache::read_context;
|
||||
friend class partition_range_cursor;
|
||||
@@ -336,8 +340,8 @@ private:
|
||||
logalloc::allocating_section _update_section;
|
||||
logalloc::allocating_section _populate_section;
|
||||
logalloc::allocating_section _read_section;
|
||||
mutation_reader create_underlying_reader(cache::read_context&, mutation_source&, const dht::partition_range&);
|
||||
mutation_reader make_scanning_reader(const dht::partition_range&, lw_shared_ptr<cache::read_context>);
|
||||
flat_mutation_reader create_underlying_reader(cache::read_context&, mutation_source&, const dht::partition_range&);
|
||||
flat_mutation_reader make_scanning_reader(const dht::partition_range&, lw_shared_ptr<cache::read_context>);
|
||||
void on_partition_hit();
|
||||
void on_partition_miss();
|
||||
void on_row_hit();
|
||||
@@ -452,6 +456,19 @@ public:
|
||||
return make_reader(std::move(s), range, full_slice);
|
||||
}
|
||||
|
||||
flat_mutation_reader make_flat_reader(schema_ptr,
|
||||
const dht::partition_range&,
|
||||
const query::partition_slice&,
|
||||
const io_priority_class& = default_priority_class(),
|
||||
tracing::trace_state_ptr trace_state = nullptr,
|
||||
streamed_mutation::forwarding fwd = streamed_mutation::forwarding::no,
|
||||
mutation_reader::forwarding fwd_mr = mutation_reader::forwarding::no);
|
||||
|
||||
flat_mutation_reader make_flat_reader(schema_ptr s, const dht::partition_range& range = query::full_partition_range) {
|
||||
auto& full_slice = s->full_slice();
|
||||
return make_flat_reader(std::move(s), range, full_slice);
|
||||
}
|
||||
|
||||
const stats& stats() const { return _stats; }
|
||||
public:
|
||||
// Populate cache from given mutation, which must be fully continuous.
|
||||
@@ -523,3 +540,46 @@ public:
|
||||
friend class cache_tracker;
|
||||
friend class mark_end_as_continuous;
|
||||
};
|
||||
|
||||
namespace cache {
|
||||
|
||||
class lsa_manager {
|
||||
row_cache &_cache;
|
||||
public:
|
||||
lsa_manager(row_cache &cache) : _cache(cache) {}
|
||||
|
||||
template<typename Func>
|
||||
decltype(auto) run_in_read_section(const Func &func) {
|
||||
return _cache._read_section(_cache._tracker.region(), [&func]() {
|
||||
return with_linearized_managed_bytes([&func]() {
|
||||
return func();
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
template<typename Func>
|
||||
decltype(auto) run_in_update_section(const Func &func) {
|
||||
return _cache._update_section(_cache._tracker.region(), [&func]() {
|
||||
return with_linearized_managed_bytes([&func]() {
|
||||
return func();
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
template<typename Func>
|
||||
void run_in_update_section_with_allocator(Func &&func) {
|
||||
return _cache._update_section(_cache._tracker.region(), [this, &func]() {
|
||||
return with_linearized_managed_bytes([this, &func]() {
|
||||
return with_allocator(_cache._tracker.region().allocator(), [this, &func]() mutable {
|
||||
return func();
|
||||
});
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
logalloc::region ®ion() { return _cache._tracker.region(); }
|
||||
|
||||
logalloc::allocating_section &read_section() { return _cache._read_section; }
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
2
seastar
2
seastar
Submodule seastar updated: 78cd87fbbb...8d254a1adf
@@ -89,6 +89,7 @@ static const sstring INDEXES_FEATURE = "INDEXES";
|
||||
static const sstring DIGEST_MULTIPARTITION_READ_FEATURE = "DIGEST_MULTIPARTITION_READ";
|
||||
static const sstring CORRECT_COUNTER_ORDER_FEATURE = "CORRECT_COUNTER_ORDER";
|
||||
static const sstring SCHEMA_TABLES_V3 = "SCHEMA_TABLES_V3";
|
||||
static const sstring CORRECT_NON_COMPOUND_RANGE_TOMBSTONES = "CORRECT_NON_COMPOUND_RANGE_TOMBSTONES";
|
||||
|
||||
distributed<storage_service> _the_storage_service;
|
||||
|
||||
@@ -133,7 +134,8 @@ sstring storage_service::get_config_supported_features() {
|
||||
COUNTERS_FEATURE,
|
||||
DIGEST_MULTIPARTITION_READ_FEATURE,
|
||||
CORRECT_COUNTER_ORDER_FEATURE,
|
||||
SCHEMA_TABLES_V3
|
||||
SCHEMA_TABLES_V3,
|
||||
CORRECT_NON_COMPOUND_RANGE_TOMBSTONES,
|
||||
};
|
||||
if (service::get_local_storage_service()._db.local().get_config().experimental()) {
|
||||
features.push_back(MATERIALIZED_VIEWS_FEATURE);
|
||||
@@ -344,6 +346,7 @@ void storage_service::register_features() {
|
||||
_digest_multipartition_read_feature = gms::feature(DIGEST_MULTIPARTITION_READ_FEATURE);
|
||||
_correct_counter_order_feature = gms::feature(CORRECT_COUNTER_ORDER_FEATURE);
|
||||
_schema_tables_v3 = gms::feature(SCHEMA_TABLES_V3);
|
||||
_correct_non_compound_range_tombstones = gms::feature(CORRECT_NON_COMPOUND_RANGE_TOMBSTONES);
|
||||
|
||||
if (_db.local().get_config().experimental()) {
|
||||
_materialized_views_feature = gms::feature(MATERIALIZED_VIEWS_FEATURE);
|
||||
|
||||
@@ -269,6 +269,7 @@ private:
|
||||
gms::feature _digest_multipartition_read_feature;
|
||||
gms::feature _correct_counter_order_feature;
|
||||
gms::feature _schema_tables_v3;
|
||||
gms::feature _correct_non_compound_range_tombstones;
|
||||
public:
|
||||
void enable_all_features() {
|
||||
_range_tombstones_feature.enable();
|
||||
@@ -279,6 +280,7 @@ public:
|
||||
_digest_multipartition_read_feature.enable();
|
||||
_correct_counter_order_feature.enable();
|
||||
_schema_tables_v3.enable();
|
||||
_correct_non_compound_range_tombstones.enable();
|
||||
}
|
||||
|
||||
void finish_bootstrapping() {
|
||||
@@ -2243,6 +2245,10 @@ public:
|
||||
const gms::feature& cluster_supports_schema_tables_v3() const {
|
||||
return _schema_tables_v3;
|
||||
}
|
||||
|
||||
bool cluster_supports_reading_correctly_serialized_range_tombstones() const {
|
||||
return bool(_correct_non_compound_range_tombstones);
|
||||
}
|
||||
};
|
||||
|
||||
inline future<> init_storage_service(distributed<database>& db, sharded<auth::service>& auth_service) {
|
||||
|
||||
@@ -205,7 +205,20 @@ public:
|
||||
return attr;
|
||||
}
|
||||
private:
|
||||
::mutation_reader setup() {
|
||||
// Default range sstable reader that will only return mutation that belongs to current shard.
|
||||
virtual flat_mutation_reader make_sstable_reader(lw_shared_ptr<sstables::sstable_set> ssts) const {
|
||||
return ::make_local_shard_sstable_reader(_cf.schema(),
|
||||
std::move(ssts),
|
||||
query::full_partition_range,
|
||||
_cf.schema()->full_slice(),
|
||||
service::get_local_compaction_priority(),
|
||||
no_resource_tracking(),
|
||||
nullptr,
|
||||
::streamed_mutation::forwarding::no,
|
||||
::mutation_reader::forwarding::no);
|
||||
}
|
||||
|
||||
flat_mutation_reader setup() {
|
||||
auto ssts = make_lw_shared<sstables::sstable_set>(_cf.get_compaction_strategy().make_sstable_set(_cf.schema()));
|
||||
auto schema = _cf.schema();
|
||||
sstring formatted_msg = "[";
|
||||
@@ -237,15 +250,7 @@ private:
|
||||
_info->cf = schema->cf_name();
|
||||
report_start(formatted_msg);
|
||||
|
||||
return ::make_range_sstable_reader(_cf.schema(),
|
||||
ssts,
|
||||
query::full_partition_range,
|
||||
_cf.schema()->full_slice(),
|
||||
service::get_local_compaction_priority(),
|
||||
no_resource_tracking(),
|
||||
nullptr,
|
||||
::streamed_mutation::forwarding::no,
|
||||
::mutation_reader::forwarding::no);
|
||||
return make_sstable_reader(std::move(ssts));
|
||||
}
|
||||
|
||||
compaction_info finish(std::chrono::time_point<db_clock> started_at, std::chrono::time_point<db_clock> ended_at) {
|
||||
@@ -284,8 +289,8 @@ private:
|
||||
};
|
||||
}
|
||||
|
||||
virtual std::function<bool(const streamed_mutation& sm)> filter_func() const {
|
||||
return [] (const streamed_mutation& sm) {
|
||||
virtual std::function<bool(const dht::decorated_key&)> filter_func() const {
|
||||
return [] (const dht::decorated_key&) {
|
||||
return true;
|
||||
};
|
||||
}
|
||||
@@ -368,9 +373,9 @@ public:
|
||||
};
|
||||
}
|
||||
|
||||
virtual std::function<bool(const streamed_mutation& sm)> filter_func() const override {
|
||||
return [] (const streamed_mutation& sm) {
|
||||
return dht::shard_of(sm.decorated_key().token()) == engine().cpu_id();
|
||||
virtual std::function<bool(const dht::decorated_key&)> filter_func() const override {
|
||||
return [] (const dht::decorated_key& dk){
|
||||
return dht::shard_of(dk.token()) == engine().cpu_id();
|
||||
};
|
||||
}
|
||||
|
||||
@@ -415,15 +420,15 @@ public:
|
||||
clogger.info("Cleaned {}", formatted_msg);
|
||||
}
|
||||
|
||||
std::function<bool(const streamed_mutation& sm)> filter_func() const override {
|
||||
std::function<bool(const dht::decorated_key&)> filter_func() const override {
|
||||
dht::token_range_vector owned_ranges = service::get_local_storage_service().get_local_ranges(_cf.schema()->ks_name());
|
||||
|
||||
return [this, owned_ranges = std::move(owned_ranges)] (const streamed_mutation& sm) {
|
||||
if (dht::shard_of(sm.decorated_key().token()) != engine().cpu_id()) {
|
||||
return [this, owned_ranges = std::move(owned_ranges)] (const dht::decorated_key& dk) {
|
||||
if (dht::shard_of(dk.token()) != engine().cpu_id()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!belongs_to_current_node(sm.decorated_key().token(), owned_ranges)) {
|
||||
if (!belongs_to_current_node(dk.token(), owned_ranges)) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
@@ -446,6 +451,19 @@ public:
|
||||
_info->type = compaction_type::Reshard;
|
||||
}
|
||||
|
||||
// Use reader that makes sure no non-local mutation will not be filtered out.
|
||||
flat_mutation_reader make_sstable_reader(lw_shared_ptr<sstables::sstable_set> ssts) const override {
|
||||
return ::make_range_sstable_reader(_cf.schema(),
|
||||
std::move(ssts),
|
||||
query::full_partition_range,
|
||||
_cf.schema()->full_slice(),
|
||||
service::get_local_compaction_priority(),
|
||||
no_resource_tracking(),
|
||||
nullptr,
|
||||
::streamed_mutation::forwarding::no,
|
||||
::mutation_reader::forwarding::no);
|
||||
}
|
||||
|
||||
void report_start(const sstring& formatted_msg) const override {
|
||||
clogger.info("Resharding {}", formatted_msg);
|
||||
}
|
||||
@@ -498,7 +516,7 @@ future<compaction_info> compaction::run(std::unique_ptr<compaction> c) {
|
||||
|
||||
auto start_time = db_clock::now();
|
||||
try {
|
||||
consume_flattened_in_thread(reader, cfc, c->filter_func());
|
||||
reader.consume_in_thread(std::move(cfc), c->filter_func());
|
||||
} catch (...) {
|
||||
delete_sstables_for_interrupted_compaction(c->_info->new_sstables, c->_info->ks, c->_info->cf);
|
||||
c = nullptr; // make sure writers are stopped while running in thread context
|
||||
|
||||
@@ -24,6 +24,7 @@
|
||||
#include "consumer.hh"
|
||||
#include "downsampling.hh"
|
||||
#include "sstables/shared_index_lists.hh"
|
||||
#include <seastar/util/bool_class.hh>
|
||||
|
||||
namespace sstables {
|
||||
|
||||
@@ -44,12 +45,16 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
// See #2993
|
||||
class trust_promoted_index_tag;
|
||||
using trust_promoted_index = bool_class<trust_promoted_index_tag>;
|
||||
|
||||
// IndexConsumer is a concept that implements:
|
||||
//
|
||||
// bool should_continue();
|
||||
// void consume_entry(index_entry&& ie, uintt64_t offset);
|
||||
template <class IndexConsumer>
|
||||
class index_consume_entry_context: public data_consumer::continuous_data_consumer<index_consume_entry_context<IndexConsumer>> {
|
||||
class index_consume_entry_context : public data_consumer::continuous_data_consumer<index_consume_entry_context<IndexConsumer>> {
|
||||
using proceed = data_consumer::proceed;
|
||||
using continuous_data_consumer = data_consumer::continuous_data_consumer<index_consume_entry_context<IndexConsumer>>;
|
||||
private:
|
||||
@@ -69,6 +74,8 @@ private:
|
||||
temporary_buffer<char> _key;
|
||||
temporary_buffer<char> _promoted;
|
||||
|
||||
trust_promoted_index _trust_pi;
|
||||
|
||||
public:
|
||||
void verify_end_state() {
|
||||
}
|
||||
@@ -111,6 +118,9 @@ public:
|
||||
}
|
||||
case state::CONSUME_ENTRY: {
|
||||
auto len = (_key.size() + _promoted.size() + 14);
|
||||
if (_trust_pi == trust_promoted_index::no) {
|
||||
_promoted = temporary_buffer<char>();
|
||||
}
|
||||
_consumer.consume_entry(index_entry(std::move(_key), this->_u64, std::move(_promoted)), _entry_offset);
|
||||
_entry_offset += len;
|
||||
_state = state::START;
|
||||
@@ -122,10 +132,10 @@ public:
|
||||
return proceed::yes;
|
||||
}
|
||||
|
||||
index_consume_entry_context(IndexConsumer& consumer,
|
||||
index_consume_entry_context(IndexConsumer& consumer, trust_promoted_index trust_pi,
|
||||
input_stream<char>&& input, uint64_t start, uint64_t maxlen)
|
||||
: continuous_data_consumer(std::move(input), start, maxlen)
|
||||
, _consumer(consumer), _entry_offset(start)
|
||||
, _consumer(consumer), _entry_offset(start), _trust_pi(trust_pi)
|
||||
{}
|
||||
|
||||
void reset(uint64_t offset) {
|
||||
@@ -190,7 +200,9 @@ class index_reader {
|
||||
|
||||
reader(shared_sstable sst, const io_priority_class& pc, uint64_t begin, uint64_t end, uint64_t quantity)
|
||||
: _consumer(quantity)
|
||||
, _context(_consumer, create_file_input_stream(sst, pc, begin, end), begin, end - begin)
|
||||
, _context(_consumer,
|
||||
trust_promoted_index(sst->has_correct_promoted_index_entries()),
|
||||
create_file_input_stream(sst, pc, begin, end), begin, end - begin)
|
||||
{ }
|
||||
};
|
||||
|
||||
|
||||
@@ -34,6 +34,7 @@
|
||||
#include "utils/data_input.hh"
|
||||
#include "clustering_ranges_walker.hh"
|
||||
#include "binary_search.hh"
|
||||
#include "../dht/i_partitioner.hh"
|
||||
|
||||
namespace sstables {
|
||||
|
||||
@@ -43,7 +44,7 @@ static inline bytes_view pop_back(std::vector<bytes_view>& vec) {
|
||||
return b;
|
||||
}
|
||||
|
||||
class sstable_streamed_mutation;
|
||||
class sstable_mutation_reader;
|
||||
|
||||
class mp_row_consumer : public row_consumer {
|
||||
public:
|
||||
@@ -52,12 +53,12 @@ public:
|
||||
tombstone tomb;
|
||||
};
|
||||
private:
|
||||
sstable_mutation_reader* _reader;
|
||||
schema_ptr _schema;
|
||||
const query::partition_slice& _slice;
|
||||
bool _out_of_range = false;
|
||||
stdx::optional<query::clustering_key_filter_ranges> _ck_ranges;
|
||||
stdx::optional<clustering_ranges_walker> _ck_ranges_walker;
|
||||
sstable_streamed_mutation* _sm;
|
||||
|
||||
bool _skip_partition = false;
|
||||
// When set, the fragment pending in _in_progress should not be emitted.
|
||||
@@ -96,10 +97,10 @@ private:
|
||||
// _range_tombstones holds only tombstones which are relevant for current ranges.
|
||||
range_tombstone_stream _range_tombstones;
|
||||
bool _first_row_encountered = false;
|
||||
|
||||
// See #2986
|
||||
bool _treat_non_compound_rt_as_compound;
|
||||
public:
|
||||
void set_streamed_mutation(sstable_streamed_mutation* sm) {
|
||||
_sm = sm;
|
||||
}
|
||||
struct column {
|
||||
bool is_static;
|
||||
bytes_view col_name;
|
||||
@@ -302,23 +303,29 @@ private:
|
||||
public:
|
||||
mutation_opt mut;
|
||||
|
||||
mp_row_consumer(const schema_ptr schema,
|
||||
mp_row_consumer(sstable_mutation_reader* reader,
|
||||
const schema_ptr schema,
|
||||
const query::partition_slice& slice,
|
||||
const io_priority_class& pc,
|
||||
reader_resource_tracker resource_tracker,
|
||||
streamed_mutation::forwarding fwd)
|
||||
streamed_mutation::forwarding fwd,
|
||||
const shared_sstable& sst)
|
||||
: row_consumer(std::move(resource_tracker), pc)
|
||||
, _reader(reader)
|
||||
, _schema(schema)
|
||||
, _slice(slice)
|
||||
, _fwd(fwd)
|
||||
, _range_tombstones(*_schema)
|
||||
, _treat_non_compound_rt_as_compound(!sst->has_correct_non_compound_range_tombstones())
|
||||
{ }
|
||||
|
||||
mp_row_consumer(const schema_ptr schema,
|
||||
mp_row_consumer(sstable_mutation_reader* reader,
|
||||
const schema_ptr schema,
|
||||
const io_priority_class& pc,
|
||||
reader_resource_tracker resource_tracker,
|
||||
streamed_mutation::forwarding fwd)
|
||||
: mp_row_consumer(schema, schema->full_slice(), pc, std::move(resource_tracker), fwd) { }
|
||||
streamed_mutation::forwarding fwd,
|
||||
const shared_sstable& sst)
|
||||
: mp_row_consumer(reader, schema, schema->full_slice(), pc, std::move(resource_tracker), fwd, sst) { }
|
||||
|
||||
virtual proceed consume_row_start(sstables::key_view key, sstables::deletion_time deltime) override {
|
||||
if (!_is_mutation_end) {
|
||||
@@ -622,7 +629,8 @@ public:
|
||||
return proceed::yes;
|
||||
}
|
||||
|
||||
auto start = composite_view(column::fix_static_name(*_schema, start_col)).explode();
|
||||
auto compound = _schema->is_compound() || _treat_non_compound_rt_as_compound;
|
||||
auto start = composite_view(column::fix_static_name(*_schema, start_col), compound).explode();
|
||||
|
||||
// Note how this is slightly different from the check in is_collection. Collection tombstones
|
||||
// do not have extra data.
|
||||
@@ -631,9 +639,9 @@ public:
|
||||
// won't have a full clustering prefix (otherwise it isn't a range)
|
||||
if (start.size() <= _schema->clustering_key_size()) {
|
||||
auto start_ck = clustering_key_prefix::from_exploded_view(start);
|
||||
auto start_kind = start_marker_to_bound_kind(start_col);
|
||||
auto end = clustering_key_prefix::from_exploded_view(composite_view(column::fix_static_name(*_schema, end_col)).explode());
|
||||
auto end_kind = end_marker_to_bound_kind(end_col);
|
||||
auto start_kind = compound ? start_marker_to_bound_kind(start_col) : bound_kind::incl_start;
|
||||
auto end = clustering_key_prefix::from_exploded_view(composite_view(column::fix_static_name(*_schema, end_col), compound).explode());
|
||||
auto end_kind = compound ? end_marker_to_bound_kind(end_col) : bound_kind::incl_end;
|
||||
if (range_tombstone::is_single_clustering_row_tombstone(*_schema, start_ck, start_kind, end, end_kind)) {
|
||||
auto ret = flush_if_needed(std::move(start_ck));
|
||||
if (!_skip_in_progress) {
|
||||
@@ -734,51 +742,167 @@ public:
|
||||
stdx::optional<position_in_partition_view> maybe_skip();
|
||||
};
|
||||
|
||||
struct sstable_data_source : public enable_lw_shared_from_this<sstable_data_source> {
|
||||
static inline void ensure_len(bytes_view v, size_t len) {
|
||||
if (v.size() < len) {
|
||||
throw malformed_sstable_exception(sprint("Expected {} bytes, but remaining is {}", len, v.size()));
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static inline T read_be(const signed char* p) {
|
||||
return ::read_be<T>(reinterpret_cast<const char*>(p));
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
static inline T consume_be(bytes_view& p) {
|
||||
ensure_len(p, sizeof(T));
|
||||
T i = read_be<T>(p.data());
|
||||
p.remove_prefix(sizeof(T));
|
||||
return i;
|
||||
}
|
||||
|
||||
static inline bytes_view consume_bytes(bytes_view& p, size_t len) {
|
||||
ensure_len(p, len);
|
||||
auto ret = bytes_view(p.data(), len);
|
||||
p.remove_prefix(len);
|
||||
return ret;
|
||||
}
|
||||
|
||||
promoted_index promoted_index_view::parse(const schema& s) const {
|
||||
bytes_view data = _bytes;
|
||||
|
||||
sstables::deletion_time del_time;
|
||||
del_time.local_deletion_time = consume_be<uint32_t>(data);
|
||||
del_time.marked_for_delete_at = consume_be<uint64_t>(data);
|
||||
|
||||
auto num_blocks = consume_be<uint32_t>(data);
|
||||
std::deque<promoted_index::entry> entries;
|
||||
while (num_blocks--) {
|
||||
uint16_t len = consume_be<uint16_t>(data);
|
||||
auto start_ck = composite_view(consume_bytes(data, len), s.is_compound());
|
||||
len = consume_be<uint16_t>(data);
|
||||
auto end_ck = composite_view(consume_bytes(data, len), s.is_compound());
|
||||
uint64_t offset = consume_be<uint64_t>(data);
|
||||
uint64_t width = consume_be<uint64_t>(data);
|
||||
entries.emplace_back(promoted_index::entry{start_ck, end_ck, offset, width});
|
||||
}
|
||||
|
||||
return promoted_index{del_time, std::move(entries)};
|
||||
}
|
||||
|
||||
sstables::deletion_time promoted_index_view::get_deletion_time() const {
|
||||
bytes_view data = _bytes;
|
||||
sstables::deletion_time del_time;
|
||||
del_time.local_deletion_time = consume_be<uint32_t>(data);
|
||||
del_time.marked_for_delete_at = consume_be<uint64_t>(data);
|
||||
return del_time;
|
||||
}
|
||||
|
||||
static
|
||||
future<> advance_to_upper_bound(index_reader& ix, const schema& s, const query::partition_slice& slice, dht::ring_position_view key) {
|
||||
auto& ranges = slice.row_ranges(s, *key.key());
|
||||
if (ranges.empty()) {
|
||||
return ix.advance_past(position_in_partition_view::for_static_row());
|
||||
} else {
|
||||
return ix.advance_past(position_in_partition_view::for_range_end(ranges[ranges.size() - 1]));
|
||||
}
|
||||
}
|
||||
|
||||
class sstable_mutation_reader : public flat_mutation_reader::impl {
|
||||
friend class mp_row_consumer;
|
||||
shared_sstable _sst;
|
||||
mp_row_consumer _consumer;
|
||||
bool _index_in_current_partition = false; // Whether _lh_index is in current partition
|
||||
bool _will_likely_slice = false;
|
||||
bool _read_enabled = true;
|
||||
data_consume_context _context;
|
||||
data_consume_context_opt _context;
|
||||
std::unique_ptr<index_reader> _lh_index; // For lower bound
|
||||
std::unique_ptr<index_reader> _rh_index; // For upper bound
|
||||
schema_ptr _schema;
|
||||
stdx::optional<dht::decorated_key> _key;
|
||||
// We avoid unnecessary lookup for single partition reads thanks to this flag
|
||||
bool _single_partition_read = false;
|
||||
std::function<future<> ()> _initialize;
|
||||
streamed_mutation::forwarding _fwd;
|
||||
stdx::optional<dht::decorated_key> _current_partition_key;
|
||||
bool _partition_finished = true;
|
||||
public:
|
||||
sstable_mutation_reader(shared_sstable sst, schema_ptr schema,
|
||||
const io_priority_class &pc,
|
||||
reader_resource_tracker resource_tracker,
|
||||
streamed_mutation::forwarding fwd)
|
||||
: impl(std::move(schema))
|
||||
, _sst(std::move(sst))
|
||||
, _consumer(this, _schema, _schema->full_slice(), pc, std::move(resource_tracker), fwd, _sst)
|
||||
, _initialize([this] {
|
||||
_context = _sst->data_consume_rows(_consumer);
|
||||
return make_ready_future<>();
|
||||
})
|
||||
, _fwd(fwd) { }
|
||||
sstable_mutation_reader(shared_sstable sst,
|
||||
schema_ptr schema,
|
||||
const dht::partition_range& pr,
|
||||
const query::partition_slice& slice,
|
||||
const io_priority_class& pc,
|
||||
reader_resource_tracker resource_tracker,
|
||||
streamed_mutation::forwarding fwd,
|
||||
mutation_reader::forwarding fwd_mr)
|
||||
: impl(std::move(schema))
|
||||
, _sst(std::move(sst))
|
||||
, _consumer(this, _schema, slice, pc, std::move(resource_tracker), fwd, _sst)
|
||||
, _initialize([this, pr, &pc, &slice, resource_tracker = std::move(resource_tracker), fwd_mr] () mutable {
|
||||
_lh_index = _sst->get_index_reader(pc); // lh = left hand
|
||||
_rh_index = _sst->get_index_reader(pc);
|
||||
auto f = seastar::when_all_succeed(_lh_index->advance_to_start(pr), _rh_index->advance_to_end(pr));
|
||||
return f.then([this, &pc, &slice, fwd_mr] () mutable {
|
||||
sstable::disk_read_range drr{_lh_index->data_file_position(),
|
||||
_rh_index->data_file_position()};
|
||||
auto last_end = fwd_mr ? _sst->data_size() : drr.end;
|
||||
_read_enabled = bool(drr);
|
||||
_context = _sst->data_consume_rows(_consumer, std::move(drr), last_end);
|
||||
_index_in_current_partition = true;
|
||||
_will_likely_slice = will_likely_slice(slice);
|
||||
});
|
||||
})
|
||||
, _fwd(fwd) { }
|
||||
sstable_mutation_reader(shared_sstable sst,
|
||||
schema_ptr schema,
|
||||
dht::ring_position_view key,
|
||||
const query::partition_slice& slice,
|
||||
const io_priority_class& pc,
|
||||
reader_resource_tracker resource_tracker,
|
||||
streamed_mutation::forwarding fwd,
|
||||
mutation_reader::forwarding fwd_mr)
|
||||
: impl(std::move(schema))
|
||||
, _sst(std::move(sst))
|
||||
, _consumer(this, _schema, slice, pc, std::move(resource_tracker), fwd, _sst)
|
||||
, _single_partition_read(true)
|
||||
, _initialize([this, key = std::move(key), &pc, &slice, fwd_mr] () mutable {
|
||||
_lh_index = _sst->get_index_reader(pc);
|
||||
auto f = _lh_index->advance_and_check_if_present(key);
|
||||
return f.then([this, &slice, &pc, key] (bool present) mutable {
|
||||
if (!present) {
|
||||
_sst->get_filter_tracker().add_false_positive();
|
||||
return make_ready_future<>();
|
||||
}
|
||||
|
||||
struct single_partition_tag {};
|
||||
_sst->get_filter_tracker().add_true_positive();
|
||||
|
||||
sstable_data_source(schema_ptr s, shared_sstable sst, mp_row_consumer&& consumer)
|
||||
: _sst(std::move(sst))
|
||||
, _consumer(std::move(consumer))
|
||||
, _context(_sst->data_consume_rows(_consumer))
|
||||
, _schema(std::move(s))
|
||||
{ }
|
||||
_rh_index = std::make_unique<index_reader>(*_lh_index);
|
||||
auto f = advance_to_upper_bound(*_rh_index, *_schema, slice, key);
|
||||
return f.then([this, &slice, &pc] () mutable {
|
||||
_read_enabled = _lh_index->data_file_position() != _rh_index->data_file_position();
|
||||
_context = _sst->data_consume_single_partition(_consumer,
|
||||
{ _lh_index->data_file_position(), _rh_index->data_file_position() });
|
||||
_will_likely_slice = will_likely_slice(slice);
|
||||
_index_in_current_partition = true;
|
||||
});
|
||||
});
|
||||
})
|
||||
, _fwd(fwd) { }
|
||||
|
||||
sstable_data_source(schema_ptr s, shared_sstable sst, mp_row_consumer&& consumer, sstable::disk_read_range toread, uint64_t last_end,
|
||||
std::unique_ptr<index_reader> lh_index = {}, std::unique_ptr<index_reader> rh_index = {})
|
||||
: _sst(std::move(sst))
|
||||
, _consumer(std::move(consumer))
|
||||
, _read_enabled(bool(toread))
|
||||
, _context(_sst->data_consume_rows(_consumer, std::move(toread), last_end))
|
||||
, _lh_index(std::move(lh_index))
|
||||
, _rh_index(std::move(rh_index))
|
||||
, _schema(std::move(s))
|
||||
{ }
|
||||
|
||||
sstable_data_source(single_partition_tag, schema_ptr s, shared_sstable sst, mp_row_consumer&& consumer,
|
||||
std::unique_ptr<index_reader> lh_index, std::unique_ptr<index_reader> rh_index)
|
||||
: _sst(std::move(sst))
|
||||
, _consumer(std::move(consumer))
|
||||
, _read_enabled(lh_index->data_file_position() != rh_index->data_file_position())
|
||||
, _context(_sst->data_consume_single_partition(_consumer,
|
||||
sstable::disk_read_range(lh_index->data_file_position(), rh_index->data_file_position())))
|
||||
, _lh_index(std::move(lh_index))
|
||||
, _rh_index(std::move(rh_index))
|
||||
, _schema(std::move(s))
|
||||
{ }
|
||||
|
||||
~sstable_data_source() {
|
||||
// Reference to _consumer is passed to data_consume_rows() in the constructor so we must not allow move/copy
|
||||
sstable_mutation_reader(sstable_mutation_reader&&) = delete;
|
||||
sstable_mutation_reader(const sstable_mutation_reader&) = delete;
|
||||
~sstable_mutation_reader() {
|
||||
auto close = [] (std::unique_ptr<index_reader>& ptr) {
|
||||
if (ptr) {
|
||||
auto f = ptr->close();
|
||||
@@ -788,67 +912,120 @@ struct sstable_data_source : public enable_lw_shared_from_this<sstable_data_sour
|
||||
close(_lh_index);
|
||||
close(_rh_index);
|
||||
}
|
||||
|
||||
private:
|
||||
static bool will_likely_slice(const query::partition_slice& slice) {
|
||||
return (!slice.default_row_ranges().empty() && !slice.default_row_ranges()[0].is_full())
|
||||
|| slice.get_specific_ranges();
|
||||
}
|
||||
index_reader& lh_index() {
|
||||
if (!_lh_index) {
|
||||
_lh_index = _sst->get_index_reader(_consumer.io_priority());
|
||||
}
|
||||
return *_lh_index;
|
||||
}
|
||||
|
||||
static bool will_likely_slice(const query::partition_slice& slice) {
|
||||
return (!slice.default_row_ranges().empty() && !slice.default_row_ranges()[0].is_full())
|
||||
|| slice.get_specific_ranges();
|
||||
}
|
||||
private:
|
||||
future<> advance_to_next_partition();
|
||||
future<streamed_mutation_opt> read_from_index();
|
||||
future<streamed_mutation_opt> read_from_datafile();
|
||||
public:
|
||||
// Assumes that we're currently positioned at partition boundary.
|
||||
future<streamed_mutation_opt> read_partition();
|
||||
// Can be called from any position.
|
||||
future<streamed_mutation_opt> read_next_partition();
|
||||
future<> fast_forward_to(const dht::partition_range&);
|
||||
};
|
||||
|
||||
class sstable_streamed_mutation : public streamed_mutation::impl {
|
||||
friend class mp_row_consumer;
|
||||
lw_shared_ptr<sstable_data_source> _ds;
|
||||
tombstone _t;
|
||||
position_in_partition::less_compare _cmp;
|
||||
position_in_partition::equal_compare _eq;
|
||||
public:
|
||||
sstable_streamed_mutation(schema_ptr s, dht::decorated_key dk, tombstone t, lw_shared_ptr<sstable_data_source> ds)
|
||||
: streamed_mutation::impl(s, std::move(dk), t)
|
||||
, _ds(std::move(ds))
|
||||
, _t(t)
|
||||
, _cmp(*s)
|
||||
, _eq(*s)
|
||||
{
|
||||
_ds->_consumer.set_streamed_mutation(this);
|
||||
}
|
||||
|
||||
sstable_streamed_mutation(sstable_streamed_mutation&&) = delete;
|
||||
|
||||
virtual future<> fill_buffer() final override {
|
||||
return do_until([this] { return !is_buffer_empty() || is_end_of_stream(); }, [this] {
|
||||
_ds->_consumer.push_ready_fragments();
|
||||
if (is_buffer_full() || is_end_of_stream()) {
|
||||
future<> advance_to_next_partition() {
|
||||
sstlog.trace("reader {}: advance_to_next_partition()", this);
|
||||
auto& consumer = _consumer;
|
||||
if (consumer.is_mutation_end()) {
|
||||
sstlog.trace("reader {}: already at partition boundary", this);
|
||||
_index_in_current_partition = false;
|
||||
return make_ready_future<>();
|
||||
}
|
||||
if (_single_partition_read) {
|
||||
return make_ready_future<>();
|
||||
}
|
||||
return (_index_in_current_partition
|
||||
? _lh_index->advance_to_next_partition()
|
||||
: lh_index().advance_to(dht::ring_position_view::for_after_key(*_current_partition_key))).then([this] {
|
||||
_index_in_current_partition = true;
|
||||
if (bool(_rh_index) && _lh_index->data_file_position() > _rh_index->data_file_position()) {
|
||||
_read_enabled = false;
|
||||
return make_ready_future<>();
|
||||
}
|
||||
return advance_context(_ds->_consumer.maybe_skip()).then([this] {
|
||||
return _ds->_context.read();
|
||||
});
|
||||
return _context->skip_to(_lh_index->element_kind(), _lh_index->data_file_position());
|
||||
});
|
||||
}
|
||||
|
||||
future<> fast_forward_to(position_range range) override {
|
||||
_end_of_stream = false;
|
||||
forward_buffer_to(range.start());
|
||||
return advance_context(_ds->_consumer.fast_forward_to(std::move(range)));
|
||||
future<> read_from_index() {
|
||||
sstlog.trace("reader {}: read from index", this);
|
||||
auto tomb = _lh_index->partition_tombstone();
|
||||
if (!tomb) {
|
||||
sstlog.trace("reader {}: no tombstone", this);
|
||||
return read_from_datafile();
|
||||
}
|
||||
auto pk = _lh_index->partition_key().to_partition_key(*_schema);
|
||||
auto key = dht::global_partitioner().decorate_key(*_schema, std::move(pk));
|
||||
_consumer.setup_for_partition(key.key());
|
||||
on_next_partition(std::move(key), tombstone(*tomb));
|
||||
return make_ready_future<>();
|
||||
}
|
||||
future<> read_from_datafile() {
|
||||
sstlog.trace("reader {}: read from data file", this);
|
||||
return _context->read().then([this] {
|
||||
auto& consumer = _consumer;
|
||||
auto mut = consumer.get_mutation();
|
||||
if (!mut) {
|
||||
sstlog.trace("reader {}: eof", this);
|
||||
return make_ready_future<>();
|
||||
}
|
||||
on_next_partition(dht::global_partitioner().decorate_key(*_schema, std::move(mut->key)), mut->tomb);
|
||||
return make_ready_future<>();
|
||||
});
|
||||
}
|
||||
// Assumes that we're currently positioned at partition boundary.
|
||||
future<> read_partition() {
|
||||
sstlog.trace("reader {}: reading partition", this);
|
||||
|
||||
if (!_read_enabled) {
|
||||
return make_ready_future<>();
|
||||
}
|
||||
|
||||
if (!_consumer.is_mutation_end()) {
|
||||
if (_single_partition_read) {
|
||||
_read_enabled = false;
|
||||
return make_ready_future<>();
|
||||
}
|
||||
// FIXME: give more details from _context
|
||||
throw malformed_sstable_exception("consumer not at partition boundary", _sst->get_filename());
|
||||
}
|
||||
|
||||
// It's better to obtain partition information from the index if we already have it.
|
||||
// We can save on IO if the user will skip past the front of partition immediately.
|
||||
//
|
||||
// It is also better to pay the cost of reading the index if we know that we will
|
||||
// need to use the index anyway soon.
|
||||
//
|
||||
if (_index_in_current_partition) {
|
||||
if (_context->eof()) {
|
||||
sstlog.trace("reader {}: eof", this);
|
||||
return make_ready_future<>();
|
||||
}
|
||||
if (_lh_index->partition_data_ready()) {
|
||||
return read_from_index();
|
||||
}
|
||||
if (_will_likely_slice) {
|
||||
return _lh_index->read_partition_data().then([this] {
|
||||
return read_from_index();
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// FIXME: advance index to current partition if _will_likely_slice
|
||||
return read_from_datafile();
|
||||
}
|
||||
// Can be called from any position.
|
||||
future<> read_next_partition() {
|
||||
sstlog.trace("reader {}: read next partition", this);
|
||||
// If next partition exists then on_next_partition will be called
|
||||
// and _end_of_stream will be set to false again.
|
||||
_end_of_stream = true;
|
||||
if (!_read_enabled) {
|
||||
sstlog.trace("reader {}: eof", this);
|
||||
return make_ready_future<>();
|
||||
}
|
||||
return advance_to_next_partition().then([this] {
|
||||
return read_partition();
|
||||
});
|
||||
}
|
||||
private:
|
||||
future<> advance_context(stdx::optional<position_in_partition_view> pos) {
|
||||
if (!pos) {
|
||||
return make_ready_future<>();
|
||||
@@ -856,35 +1033,171 @@ private:
|
||||
if (pos->is_before_all_fragments(*_schema)) {
|
||||
return make_ready_future<>();
|
||||
}
|
||||
assert (_current_partition_key);
|
||||
return [this] {
|
||||
if (!_ds->_index_in_current_partition) {
|
||||
_ds->_index_in_current_partition = true;
|
||||
return _ds->lh_index().advance_to(_key);
|
||||
if (!_index_in_current_partition) {
|
||||
_index_in_current_partition = true;
|
||||
return lh_index().advance_to(*_current_partition_key);
|
||||
}
|
||||
return make_ready_future();
|
||||
}().then([this, pos] {
|
||||
return _ds->lh_index().advance_to(*pos).then([this] {
|
||||
index_reader& idx = *_ds->_lh_index;
|
||||
return _ds->_context.skip_to(idx.element_kind(), idx.data_file_position());
|
||||
return lh_index().advance_to(*pos).then([this] {
|
||||
index_reader& idx = *_lh_index;
|
||||
return _context->skip_to(idx.element_kind(), idx.data_file_position());
|
||||
});
|
||||
});
|
||||
}
|
||||
void on_next_partition(dht::decorated_key key, tombstone tomb) {
|
||||
_partition_finished = false;
|
||||
_end_of_stream = false;
|
||||
_current_partition_key = std::move(key);
|
||||
push_mutation_fragment(
|
||||
mutation_fragment(partition_start(*_current_partition_key, tomb)));
|
||||
}
|
||||
bool is_initialized() const {
|
||||
return bool(_context);
|
||||
}
|
||||
future<> ensure_initialized() {
|
||||
if (is_initialized()) {
|
||||
return make_ready_future<>();
|
||||
}
|
||||
return _initialize();
|
||||
}
|
||||
public:
|
||||
void on_end_of_stream() {
|
||||
if (_fwd == streamed_mutation::forwarding::yes) {
|
||||
_end_of_stream = true;
|
||||
} else {
|
||||
this->push_mutation_fragment(mutation_fragment(partition_end()));
|
||||
_partition_finished = true;
|
||||
}
|
||||
}
|
||||
virtual future<> fast_forward_to(const dht::partition_range& pr) override {
|
||||
return ensure_initialized().then([this, &pr] {
|
||||
if (!is_initialized()) {
|
||||
_end_of_stream = true;
|
||||
return make_ready_future<>();
|
||||
} else {
|
||||
clear_buffer();
|
||||
_partition_finished = true;
|
||||
_end_of_stream = false;
|
||||
assert(_lh_index);
|
||||
assert(_rh_index);
|
||||
auto f1 = _lh_index->advance_to_start(pr);
|
||||
auto f2 = _rh_index->advance_to_end(pr);
|
||||
return seastar::when_all_succeed(std::move(f1), std::move(f2)).then([this] {
|
||||
auto start = _lh_index->data_file_position();
|
||||
auto end = _rh_index->data_file_position();
|
||||
if (start != end) {
|
||||
_read_enabled = true;
|
||||
_index_in_current_partition = true;
|
||||
return _context->fast_forward_to(start, end);
|
||||
}
|
||||
_index_in_current_partition = false;
|
||||
_read_enabled = false;
|
||||
return make_ready_future<>();
|
||||
});
|
||||
}
|
||||
});
|
||||
}
|
||||
virtual future<> fill_buffer() override {
|
||||
if (_end_of_stream) {
|
||||
return make_ready_future<>();
|
||||
}
|
||||
if (!is_initialized()) {
|
||||
return _initialize().then([this] {
|
||||
if (!is_initialized()) {
|
||||
_end_of_stream = true;
|
||||
return make_ready_future<>();
|
||||
} else {
|
||||
return fill_buffer();
|
||||
}
|
||||
});
|
||||
}
|
||||
return do_until([this] { return is_end_of_stream() || is_buffer_full(); }, [this] {
|
||||
if (_partition_finished) {
|
||||
return read_next_partition();
|
||||
} else {
|
||||
return do_until([this] { return is_buffer_full() || _partition_finished || _end_of_stream; }, [this] {
|
||||
_consumer.push_ready_fragments();
|
||||
if (is_buffer_full() || _partition_finished || _end_of_stream) {
|
||||
return make_ready_future<>();
|
||||
}
|
||||
return advance_context(_consumer.maybe_skip()).then([this] {
|
||||
return _context->read();
|
||||
});
|
||||
});
|
||||
}
|
||||
});
|
||||
}
|
||||
virtual void next_partition() override {
|
||||
if (is_initialized()) {
|
||||
if (_fwd == streamed_mutation::forwarding::yes) {
|
||||
clear_buffer();
|
||||
_partition_finished = true;
|
||||
_end_of_stream = false;
|
||||
} else {
|
||||
clear_buffer_to_next_partition();
|
||||
if (!_partition_finished && is_buffer_empty()) {
|
||||
_partition_finished = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
// If _ds is not created then next_partition() has no effect because there was no partition_start emitted yet.
|
||||
}
|
||||
virtual future<> fast_forward_to(position_range cr) override {
|
||||
forward_buffer_to(cr.start());
|
||||
if (!_partition_finished) {
|
||||
_end_of_stream = false;
|
||||
return advance_context(_consumer.fast_forward_to(std::move(cr)));
|
||||
} else {
|
||||
_end_of_stream = true;
|
||||
return make_ready_future<>();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
flat_mutation_reader sstable::read_rows_flat(schema_ptr schema, const io_priority_class& pc, streamed_mutation::forwarding fwd) {
|
||||
return make_flat_mutation_reader<sstable_mutation_reader>(shared_from_this(), std::move(schema), pc, no_resource_tracking(), fwd);
|
||||
}
|
||||
|
||||
flat_mutation_reader
|
||||
sstables::sstable::read_row_flat(schema_ptr schema,
|
||||
dht::ring_position_view key,
|
||||
const query::partition_slice& slice,
|
||||
const io_priority_class& pc,
|
||||
reader_resource_tracker resource_tracker,
|
||||
streamed_mutation::forwarding fwd)
|
||||
{
|
||||
return make_flat_mutation_reader<sstable_mutation_reader>(shared_from_this(), std::move(schema), std::move(key), slice, pc, std::move(resource_tracker), fwd, mutation_reader::forwarding::no);
|
||||
}
|
||||
|
||||
flat_mutation_reader
|
||||
sstable::read_range_rows_flat(schema_ptr schema,
|
||||
const dht::partition_range& range,
|
||||
const query::partition_slice& slice,
|
||||
const io_priority_class& pc,
|
||||
reader_resource_tracker resource_tracker,
|
||||
streamed_mutation::forwarding fwd,
|
||||
mutation_reader::forwarding fwd_mr) {
|
||||
return make_flat_mutation_reader<sstable_mutation_reader>(
|
||||
shared_from_this(), std::move(schema), range, slice, pc, std::move(resource_tracker), fwd, fwd_mr);
|
||||
}
|
||||
|
||||
row_consumer::proceed
|
||||
mp_row_consumer::push_ready_fragments_with_ready_set() {
|
||||
// We're merging two streams here, one is _range_tombstones
|
||||
// and the other is the main fragment stream represented by
|
||||
// _ready and _out_of_range (which means end of stream).
|
||||
|
||||
while (!_sm->is_buffer_full()) {
|
||||
while (!_reader->is_buffer_full()) {
|
||||
auto mfo = _range_tombstones.get_next(*_ready);
|
||||
if (mfo) {
|
||||
_sm->push_mutation_fragment(std::move(*mfo));
|
||||
_reader->push_mutation_fragment(std::move(*mfo));
|
||||
} else {
|
||||
_sm->push_mutation_fragment(std::move(*_ready));
|
||||
_reader->push_mutation_fragment(std::move(*_ready));
|
||||
_ready = {};
|
||||
return proceed(!_sm->is_buffer_full());
|
||||
return proceed(!_reader->is_buffer_full());
|
||||
}
|
||||
}
|
||||
return proceed::no;
|
||||
@@ -893,13 +1206,13 @@ mp_row_consumer::push_ready_fragments_with_ready_set() {
|
||||
row_consumer::proceed
|
||||
mp_row_consumer::push_ready_fragments_out_of_range() {
|
||||
// Emit all range tombstones relevant to the current forwarding range first.
|
||||
while (!_sm->is_buffer_full()) {
|
||||
while (!_reader->is_buffer_full()) {
|
||||
auto mfo = _range_tombstones.get_next(_fwd_end);
|
||||
if (!mfo) {
|
||||
_sm->_end_of_stream = true;
|
||||
_reader->on_end_of_stream();
|
||||
break;
|
||||
}
|
||||
_sm->push_mutation_fragment(std::move(*mfo));
|
||||
_reader->push_mutation_fragment(std::move(*mfo));
|
||||
}
|
||||
return proceed::no;
|
||||
}
|
||||
@@ -907,7 +1220,7 @@ mp_row_consumer::push_ready_fragments_out_of_range() {
|
||||
row_consumer::proceed
|
||||
mp_row_consumer::push_ready_fragments() {
|
||||
if (_ready) {
|
||||
return push_ready_fragments_with_ready_set();
|
||||
return push_ready_fragments_with_ready_set();
|
||||
}
|
||||
|
||||
if (_out_of_range) {
|
||||
@@ -972,320 +1285,4 @@ stdx::optional<position_in_partition_view> mp_row_consumer::maybe_skip() {
|
||||
return _ck_ranges_walker->lower_bound();
|
||||
}
|
||||
|
||||
future<streamed_mutation_opt>
|
||||
sstables::sstable::read_row(schema_ptr schema,
|
||||
const sstables::key& key,
|
||||
const query::partition_slice& slice,
|
||||
const io_priority_class& pc,
|
||||
reader_resource_tracker resource_tracker,
|
||||
streamed_mutation::forwarding fwd)
|
||||
{
|
||||
return do_with(dht::global_partitioner().decorate_key(*schema,
|
||||
key.to_partition_key(*schema)),
|
||||
[this, schema, &slice, &pc, resource_tracker = std::move(resource_tracker), fwd] (auto& dk) {
|
||||
return this->read_row(schema, dk, slice, pc, std::move(resource_tracker), fwd);
|
||||
});
|
||||
}
|
||||
|
||||
static inline void ensure_len(bytes_view v, size_t len) {
|
||||
if (v.size() < len) {
|
||||
throw malformed_sstable_exception(sprint("Expected {} bytes, but remaining is {}", len, v.size()));
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static inline T read_be(const signed char* p) {
|
||||
return ::read_be<T>(reinterpret_cast<const char*>(p));
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
static inline T consume_be(bytes_view& p) {
|
||||
ensure_len(p, sizeof(T));
|
||||
T i = read_be<T>(p.data());
|
||||
p.remove_prefix(sizeof(T));
|
||||
return i;
|
||||
}
|
||||
|
||||
static inline bytes_view consume_bytes(bytes_view& p, size_t len) {
|
||||
ensure_len(p, len);
|
||||
auto ret = bytes_view(p.data(), len);
|
||||
p.remove_prefix(len);
|
||||
return ret;
|
||||
}
|
||||
|
||||
promoted_index promoted_index_view::parse(const schema& s) const {
|
||||
bytes_view data = _bytes;
|
||||
|
||||
sstables::deletion_time del_time;
|
||||
del_time.local_deletion_time = consume_be<uint32_t>(data);
|
||||
del_time.marked_for_delete_at = consume_be<uint64_t>(data);
|
||||
|
||||
auto num_blocks = consume_be<uint32_t>(data);
|
||||
std::deque<promoted_index::entry> entries;
|
||||
while (num_blocks--) {
|
||||
uint16_t len = consume_be<uint16_t>(data);
|
||||
auto start_ck = composite_view(consume_bytes(data, len), s.is_compound());
|
||||
len = consume_be<uint16_t>(data);
|
||||
auto end_ck = composite_view(consume_bytes(data, len), s.is_compound());
|
||||
uint64_t offset = consume_be<uint64_t>(data);
|
||||
uint64_t width = consume_be<uint64_t>(data);
|
||||
entries.emplace_back(promoted_index::entry{start_ck, end_ck, offset, width});
|
||||
}
|
||||
|
||||
return promoted_index{del_time, std::move(entries)};
|
||||
}
|
||||
|
||||
sstables::deletion_time promoted_index_view::get_deletion_time() const {
|
||||
bytes_view data = _bytes;
|
||||
sstables::deletion_time del_time;
|
||||
del_time.local_deletion_time = consume_be<uint32_t>(data);
|
||||
del_time.marked_for_delete_at = consume_be<uint64_t>(data);
|
||||
return del_time;
|
||||
}
|
||||
|
||||
|
||||
class sstable_mutation_reader : public mutation_reader::impl {
|
||||
private:
|
||||
lw_shared_ptr<sstable_data_source> _ds;
|
||||
std::function<future<lw_shared_ptr<sstable_data_source>> ()> _get_data_source;
|
||||
public:
|
||||
sstable_mutation_reader(shared_sstable sst, schema_ptr schema, sstable::disk_read_range toread, uint64_t last_end,
|
||||
const io_priority_class &pc,
|
||||
reader_resource_tracker resource_tracker,
|
||||
streamed_mutation::forwarding fwd)
|
||||
: _get_data_source([this, sst = std::move(sst), s = std::move(schema), toread, last_end, &pc, resource_tracker = std::move(resource_tracker), fwd] {
|
||||
auto consumer = mp_row_consumer(s, s->full_slice(), pc, std::move(resource_tracker), fwd);
|
||||
auto ds = make_lw_shared<sstable_data_source>(std::move(s), std::move(sst), std::move(consumer), std::move(toread), last_end);
|
||||
return make_ready_future<lw_shared_ptr<sstable_data_source>>(std::move(ds));
|
||||
}) { }
|
||||
sstable_mutation_reader(shared_sstable sst, schema_ptr schema,
|
||||
const io_priority_class &pc,
|
||||
reader_resource_tracker resource_tracker,
|
||||
streamed_mutation::forwarding fwd)
|
||||
: _get_data_source([this, sst = std::move(sst), s = std::move(schema), &pc, resource_tracker = std::move(resource_tracker), fwd] {
|
||||
auto consumer = mp_row_consumer(s, s->full_slice(), pc, std::move(resource_tracker), fwd);
|
||||
auto ds = make_lw_shared<sstable_data_source>(std::move(s), std::move(sst), std::move(consumer));
|
||||
return make_ready_future<lw_shared_ptr<sstable_data_source>>(std::move(ds));
|
||||
}) { }
|
||||
sstable_mutation_reader(shared_sstable sst,
|
||||
schema_ptr schema,
|
||||
const dht::partition_range& pr,
|
||||
const query::partition_slice& slice,
|
||||
const io_priority_class& pc,
|
||||
reader_resource_tracker resource_tracker,
|
||||
streamed_mutation::forwarding fwd,
|
||||
mutation_reader::forwarding fwd_mr)
|
||||
: _get_data_source([this, pr, sst = std::move(sst), s = std::move(schema), &pc, &slice, resource_tracker = std::move(resource_tracker), fwd, fwd_mr] () mutable {
|
||||
auto lh_index = sst->get_index_reader(pc); // lh = left hand
|
||||
auto rh_index = sst->get_index_reader(pc);
|
||||
auto f = seastar::when_all_succeed(lh_index->advance_to_start(pr), rh_index->advance_to_end(pr));
|
||||
return f.then([this, lh_index = std::move(lh_index), rh_index = std::move(rh_index), sst = std::move(sst), s = std::move(s), &pc, &slice, resource_tracker = std::move(resource_tracker), fwd, fwd_mr] () mutable {
|
||||
sstable::disk_read_range drr{lh_index->data_file_position(),
|
||||
rh_index->data_file_position()};
|
||||
auto consumer = mp_row_consumer(s, slice, pc, std::move(resource_tracker), fwd);
|
||||
auto ds = make_lw_shared<sstable_data_source>(std::move(s), std::move(sst), std::move(consumer), drr, (fwd_mr ? sst->data_size() : drr.end), std::move(lh_index), std::move(rh_index));
|
||||
ds->_index_in_current_partition = true;
|
||||
ds->_will_likely_slice = sstable_data_source::will_likely_slice(slice);
|
||||
return ds;
|
||||
});
|
||||
}) { }
|
||||
|
||||
// Reference to _consumer is passed to data_consume_rows() in the constructor so we must not allow move/copy
|
||||
sstable_mutation_reader(sstable_mutation_reader&&) = delete;
|
||||
sstable_mutation_reader(const sstable_mutation_reader&) = delete;
|
||||
|
||||
future<streamed_mutation_opt> operator()() {
|
||||
if (_ds) {
|
||||
return _ds->read_next_partition();
|
||||
}
|
||||
return (_get_data_source)().then([this] (lw_shared_ptr<sstable_data_source> ds) {
|
||||
// We must get the sstable_data_source and backup it in case we enable read
|
||||
// again in the future.
|
||||
_ds = std::move(ds);
|
||||
return _ds->read_partition();
|
||||
});
|
||||
}
|
||||
|
||||
future<> fast_forward_to(const dht::partition_range& pr) {
|
||||
if (_ds) {
|
||||
return _ds->fast_forward_to(pr);
|
||||
}
|
||||
return (_get_data_source)().then([this, &pr] (lw_shared_ptr<sstable_data_source> ds) {
|
||||
// We must get the sstable_data_source and backup it in case we enable read
|
||||
// again in the future.
|
||||
_ds = std::move(ds);
|
||||
return _ds->fast_forward_to(pr);
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
future<> sstable_data_source::fast_forward_to(const dht::partition_range& pr) {
|
||||
assert(_lh_index);
|
||||
assert(_rh_index);
|
||||
auto f1 = _lh_index->advance_to_start(pr);
|
||||
auto f2 = _rh_index->advance_to_end(pr);
|
||||
return seastar::when_all_succeed(std::move(f1), std::move(f2)).then([this] {
|
||||
auto start = _lh_index->data_file_position();
|
||||
auto end = _rh_index->data_file_position();
|
||||
if (start != end) {
|
||||
_read_enabled = true;
|
||||
_index_in_current_partition = true;
|
||||
return _context.fast_forward_to(start, end);
|
||||
}
|
||||
_index_in_current_partition = false;
|
||||
_read_enabled = false;
|
||||
return make_ready_future<>();
|
||||
});
|
||||
}
|
||||
|
||||
future<> sstable_data_source::advance_to_next_partition() {
|
||||
sstlog.trace("reader {}: advance_to_next_partition()", this);
|
||||
auto& consumer = _consumer;
|
||||
if (consumer.is_mutation_end()) {
|
||||
sstlog.trace("reader {}: already at partition boundary", this);
|
||||
_index_in_current_partition = false;
|
||||
return make_ready_future<>();
|
||||
}
|
||||
return (_index_in_current_partition
|
||||
? _lh_index->advance_to_next_partition()
|
||||
: lh_index().advance_to(dht::ring_position_view::for_after_key(*_key))).then([this] {
|
||||
_index_in_current_partition = true;
|
||||
return _context.skip_to(_lh_index->element_kind(), _lh_index->data_file_position());
|
||||
});
|
||||
}
|
||||
|
||||
future<streamed_mutation_opt> sstable_data_source::read_next_partition() {
|
||||
sstlog.trace("reader {}: read next partition", this);
|
||||
if (!_read_enabled) {
|
||||
sstlog.trace("reader {}: eof", this);
|
||||
return make_ready_future<streamed_mutation_opt>();
|
||||
}
|
||||
return advance_to_next_partition().then([this] {
|
||||
return read_partition();
|
||||
});
|
||||
}
|
||||
|
||||
future<streamed_mutation_opt> sstable_data_source::read_partition() {
|
||||
sstlog.trace("reader {}: reading partition", this);
|
||||
|
||||
if (!_consumer.is_mutation_end()) {
|
||||
// FIXME: give more details from _context
|
||||
throw malformed_sstable_exception("consumer not at partition boundary", _sst->get_filename());
|
||||
}
|
||||
|
||||
if (!_read_enabled) {
|
||||
return make_ready_future<streamed_mutation_opt>();
|
||||
}
|
||||
|
||||
// It's better to obtain partition information from the index if we already have it.
|
||||
// We can save on IO if the user will skip past the front of partition immediately.
|
||||
//
|
||||
// It is also better to pay the cost of reading the index if we know that we will
|
||||
// need to use the index anyway soon.
|
||||
//
|
||||
if (_index_in_current_partition) {
|
||||
if (_context.eof()) {
|
||||
sstlog.trace("reader {}: eof", this);
|
||||
return make_ready_future<streamed_mutation_opt>(stdx::nullopt);
|
||||
}
|
||||
if (_lh_index->partition_data_ready()) {
|
||||
return read_from_index();
|
||||
}
|
||||
if (_will_likely_slice) {
|
||||
return _lh_index->read_partition_data().then([this] {
|
||||
return read_from_index();
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// FIXME: advance index to current partition if _will_likely_slice
|
||||
return read_from_datafile();
|
||||
}
|
||||
|
||||
future<streamed_mutation_opt> sstable_data_source::read_from_index() {
|
||||
sstlog.trace("reader {}: read from index", this);
|
||||
auto tomb = _lh_index->partition_tombstone();
|
||||
if (!tomb) {
|
||||
sstlog.trace("reader {}: no tombstone", this);
|
||||
return read_from_datafile();
|
||||
}
|
||||
auto pk = _lh_index->partition_key().to_partition_key(*_schema);
|
||||
_key = dht::global_partitioner().decorate_key(*_schema, std::move(pk));
|
||||
auto sm = make_streamed_mutation<sstable_streamed_mutation>(_schema, *_key, tombstone(*tomb), shared_from_this());
|
||||
_consumer.setup_for_partition(_key->key());
|
||||
return make_ready_future<streamed_mutation_opt>(std::move(sm));
|
||||
}
|
||||
|
||||
future<streamed_mutation_opt> sstable_data_source::read_from_datafile() {
|
||||
sstlog.trace("reader {}: read from data file", this);
|
||||
return _context.read().then([this] {
|
||||
auto& consumer = _consumer;
|
||||
auto mut = consumer.get_mutation();
|
||||
if (!mut) {
|
||||
sstlog.trace("reader {}: eof", this);
|
||||
return make_ready_future<streamed_mutation_opt>();
|
||||
}
|
||||
_key = dht::global_partitioner().decorate_key(*_schema, std::move(mut->key));
|
||||
auto sm = make_streamed_mutation<sstable_streamed_mutation>(_schema, *_key, mut->tomb, shared_from_this());
|
||||
return make_ready_future<streamed_mutation_opt>(std::move(sm));
|
||||
});
|
||||
}
|
||||
|
||||
mutation_reader sstable::read_rows(schema_ptr schema, const io_priority_class& pc, streamed_mutation::forwarding fwd) {
|
||||
return make_mutation_reader<sstable_mutation_reader>(shared_from_this(), schema, pc, no_resource_tracking(), fwd);
|
||||
}
|
||||
|
||||
static
|
||||
future<> advance_to_upper_bound(index_reader& ix, const schema& s, const query::partition_slice& slice, dht::ring_position_view key) {
|
||||
auto& ranges = slice.row_ranges(s, *key.key());
|
||||
if (ranges.empty()) {
|
||||
return ix.advance_past(position_in_partition_view::for_static_row());
|
||||
} else {
|
||||
return ix.advance_past(position_in_partition_view::for_range_end(ranges[ranges.size() - 1]));
|
||||
}
|
||||
}
|
||||
|
||||
future<streamed_mutation_opt>
|
||||
sstables::sstable::read_row(schema_ptr schema,
|
||||
dht::ring_position_view key,
|
||||
const query::partition_slice& slice,
|
||||
const io_priority_class& pc,
|
||||
reader_resource_tracker resource_tracker,
|
||||
streamed_mutation::forwarding fwd)
|
||||
{
|
||||
auto lh_index = get_index_reader(pc);
|
||||
auto f = lh_index->advance_and_check_if_present(key);
|
||||
return f.then([this, &slice, &pc, resource_tracker = std::move(resource_tracker), fwd, lh_index = std::move(lh_index), s = std::move(schema), key] (bool present) mutable {
|
||||
if (!present) {
|
||||
_filter_tracker.add_false_positive();
|
||||
return make_ready_future<streamed_mutation_opt>(stdx::nullopt);
|
||||
}
|
||||
|
||||
_filter_tracker.add_true_positive();
|
||||
|
||||
auto rh_index = std::make_unique<index_reader>(*lh_index);
|
||||
auto f = advance_to_upper_bound(*rh_index, *_schema, slice, key);
|
||||
return f.then([this, &slice, &pc, resource_tracker = std::move(resource_tracker), fwd, lh_index = std::move(lh_index), rh_index = std::move(rh_index), s = std::move(s)] () mutable {
|
||||
auto consumer = mp_row_consumer(s, slice, pc, std::move(resource_tracker), fwd);
|
||||
auto ds = make_lw_shared<sstable_data_source>(sstable_data_source::single_partition_tag(), std::move(s),
|
||||
shared_from_this(), std::move(consumer), std::move(lh_index), std::move(rh_index));
|
||||
ds->_will_likely_slice = sstable_data_source::will_likely_slice(slice);
|
||||
ds->_index_in_current_partition = true;
|
||||
return ds->read_partition().finally([ds]{});
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
mutation_reader
|
||||
sstable::read_range_rows(schema_ptr schema,
|
||||
const dht::partition_range& range,
|
||||
const query::partition_slice& slice,
|
||||
const io_priority_class& pc,
|
||||
reader_resource_tracker resource_tracker,
|
||||
streamed_mutation::forwarding fwd,
|
||||
mutation_reader::forwarding fwd_mr) {
|
||||
return make_mutation_reader<sstable_mutation_reader>(
|
||||
shared_from_this(), std::move(schema), range, slice, pc, std::move(resource_tracker), fwd, fwd_mr);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -387,66 +387,39 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
// data_consume_rows() and data_consume_rows_at_once() both can read just a
|
||||
// single row or many rows. The difference is that data_consume_rows_at_once()
|
||||
// is optimized to reading one or few rows (reading it all into memory), while
|
||||
// data_consume_rows() uses a read buffer, so not all the rows need to fit
|
||||
// memory in the same time (they are delivered to the consumer one by one).
|
||||
class data_consume_context::impl {
|
||||
private:
|
||||
shared_sstable _sst;
|
||||
std::unique_ptr<data_consume_rows_context> _ctx;
|
||||
public:
|
||||
impl(shared_sstable sst, row_consumer& consumer, input_stream<char>&& input, uint64_t start, uint64_t maxlen)
|
||||
: _sst(std::move(sst))
|
||||
, _ctx(new data_consume_rows_context(consumer, std::move(input), start, maxlen))
|
||||
{ }
|
||||
~impl() {
|
||||
if (_ctx) {
|
||||
auto f = _ctx->close();
|
||||
f.handle_exception([ctx = std::move(_ctx), sst = std::move(_sst)] (auto) { });
|
||||
}
|
||||
}
|
||||
future<> read() {
|
||||
return _ctx->consume_input(*_ctx);
|
||||
}
|
||||
future<> fast_forward_to(uint64_t begin, uint64_t end) {
|
||||
_ctx->reset(indexable_element::partition);
|
||||
return _ctx->fast_forward_to(begin, end);
|
||||
}
|
||||
future<> skip_to(indexable_element el, uint64_t begin) {
|
||||
sstlog.trace("data_consume_rows_context {}: skip_to({} -> {}, el={})", _ctx.get(), _ctx->position(), begin, static_cast<int>(el));
|
||||
if (begin <= _ctx->position()) {
|
||||
return make_ready_future<>();
|
||||
}
|
||||
_ctx->reset(el);
|
||||
return _ctx->skip_to(begin);
|
||||
}
|
||||
bool eof() const {
|
||||
return _ctx->eof();
|
||||
data_consume_context::~data_consume_context() {
|
||||
if (_ctx) {
|
||||
auto f = _ctx->close();
|
||||
f.handle_exception([ctx = std::move(_ctx), sst = std::move(_sst)](auto) {});
|
||||
}
|
||||
};
|
||||
data_consume_context::data_consume_context(data_consume_context&& o) noexcept = default;
|
||||
data_consume_context& data_consume_context::operator=(data_consume_context&& o) noexcept = default;
|
||||
|
||||
data_consume_context::~data_consume_context() = default;
|
||||
data_consume_context::data_consume_context(data_consume_context&& o) noexcept
|
||||
: _pimpl(std::move(o._pimpl))
|
||||
data_consume_context::data_consume_context(shared_sstable sst, row_consumer& consumer, input_stream<char>&& input, uint64_t start, uint64_t maxlen)
|
||||
: _sst(std::move(sst)), _ctx(new data_consume_rows_context(consumer, std::move(input), start, maxlen))
|
||||
{ }
|
||||
data_consume_context& data_consume_context::operator=(data_consume_context&& o) noexcept {
|
||||
_pimpl = std::move(o._pimpl);
|
||||
return *this;
|
||||
data_consume_context::data_consume_context() = default;
|
||||
data_consume_context::operator bool() const noexcept {
|
||||
return bool(_ctx);
|
||||
}
|
||||
data_consume_context::data_consume_context(std::unique_ptr<impl> p) : _pimpl(std::move(p)) { }
|
||||
future<> data_consume_context::read() {
|
||||
return _pimpl->read();
|
||||
return _ctx->consume_input(*_ctx);
|
||||
}
|
||||
future<> data_consume_context::fast_forward_to(uint64_t begin, uint64_t end) {
|
||||
return _pimpl->fast_forward_to(begin, end);
|
||||
_ctx->reset(indexable_element::partition);
|
||||
return _ctx->fast_forward_to(begin, end);
|
||||
}
|
||||
future<> data_consume_context::skip_to(indexable_element el, uint64_t begin) {
|
||||
return _pimpl->skip_to(el, begin);
|
||||
sstlog.trace("data_consume_rows_context {}: skip_to({} -> {}, el={})", _ctx.get(), _ctx->position(), begin, static_cast<int>(el));
|
||||
if (begin <= _ctx->position()) {
|
||||
return make_ready_future<>();
|
||||
}
|
||||
_ctx->reset(el);
|
||||
return _ctx->skip_to(begin);
|
||||
}
|
||||
bool data_consume_context::eof() const {
|
||||
return _pimpl->eof();
|
||||
return _ctx->eof();
|
||||
}
|
||||
|
||||
data_consume_context sstable::data_consume_rows(
|
||||
@@ -456,16 +429,14 @@ data_consume_context sstable::data_consume_rows(
|
||||
// This potentially enables read-ahead beyond end, until last_end, which
|
||||
// can be beneficial if the user wants to fast_forward_to() on the
|
||||
// returned context, and may make small skips.
|
||||
return std::make_unique<data_consume_context::impl>(shared_from_this(),
|
||||
consumer, data_stream(toread.start, last_end - toread.start,
|
||||
consumer.io_priority(), consumer.resource_tracker(), _partition_range_history), toread.start, toread.end - toread.start);
|
||||
return { shared_from_this(), consumer, data_stream(toread.start, last_end - toread.start,
|
||||
consumer.io_priority(), consumer.resource_tracker(), _partition_range_history), toread.start, toread.end - toread.start };
|
||||
}
|
||||
|
||||
data_consume_context sstable::data_consume_single_partition(
|
||||
row_consumer& consumer, sstable::disk_read_range toread) {
|
||||
return std::make_unique<data_consume_context::impl>(shared_from_this(),
|
||||
consumer, data_stream(toread.start, toread.end - toread.start,
|
||||
consumer.io_priority(), consumer.resource_tracker(), _single_partition_history), toread.start, toread.end - toread.start);
|
||||
return { shared_from_this(), consumer, data_stream(toread.start, toread.end - toread.start,
|
||||
consumer.io_priority(), consumer.resource_tracker(), _single_partition_history), toread.start, toread.end - toread.start };
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -1321,8 +1321,14 @@ future<> sstable::update_info_for_opened_data() {
|
||||
// Get disk usage for this sstable (includes all components).
|
||||
_bytes_on_disk = 0;
|
||||
return do_for_each(_recognized_components, [this] (component_type c) {
|
||||
return this->sstable_write_io_check([&] {
|
||||
return engine().file_size(this->filename(c));
|
||||
return this->sstable_write_io_check([&, c] {
|
||||
return engine().file_exists(this->filename(c)).then([this, c] (bool exists) {
|
||||
// ignore summary that isn't present in disk but was previously generated by read_summary().
|
||||
if (!exists && c == component_type::Summary && _components->summary.memory_footprint()) {
|
||||
return make_ready_future<uint64_t>(0);
|
||||
}
|
||||
return engine().file_size(this->filename(c));
|
||||
});
|
||||
}).then([this] (uint64_t bytes) {
|
||||
_bytes_on_disk += bytes;
|
||||
});
|
||||
@@ -1432,6 +1438,110 @@ static composite::eoc bound_kind_to_end_marker(bound_kind end_kind) {
|
||||
: composite::eoc::end;
|
||||
}
|
||||
|
||||
class bytes_writer_for_column_name {
|
||||
bytes _buf;
|
||||
bytes::iterator _pos;
|
||||
public:
|
||||
void prepare(size_t size) {
|
||||
_buf = bytes(bytes::initialized_later(), size);
|
||||
_pos = _buf.begin();
|
||||
}
|
||||
|
||||
template<typename... Args>
|
||||
void write(Args&&... args) {
|
||||
auto write_one = [this] (bytes_view data) {
|
||||
_pos = std::copy(data.begin(), data.end(), _pos);
|
||||
};
|
||||
auto ignore = { (write_one(bytes_view(args)), 0)... };
|
||||
(void)ignore;
|
||||
}
|
||||
|
||||
bytes&& release() && {
|
||||
return std::move(_buf);
|
||||
}
|
||||
};
|
||||
|
||||
class file_writer_for_column_name {
|
||||
file_writer& _fw;
|
||||
public:
|
||||
file_writer_for_column_name(file_writer& fw) : _fw(fw) { }
|
||||
|
||||
void prepare(uint16_t size) {
|
||||
sstables::write(_fw, size);
|
||||
}
|
||||
|
||||
template<typename... Args>
|
||||
void write(Args&&... args) {
|
||||
sstables::write(_fw, std::forward<Args>(args)...);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Writer>
|
||||
static void write_compound_non_dense_column_name(Writer& out, const composite& clustering_key, const std::vector<bytes_view>& column_names, composite::eoc marker = composite::eoc::none) {
|
||||
// was defined in the schema, for example.
|
||||
auto c = composite::from_exploded(column_names, true, marker);
|
||||
auto ck_bview = bytes_view(clustering_key);
|
||||
|
||||
// The marker is not a component, so if the last component is empty (IOW,
|
||||
// only serializes to the marker), then we just replace the key's last byte
|
||||
// with the marker. If the component however it is not empty, then the
|
||||
// marker should be in the end of it, and we just join them together as we
|
||||
// do for any normal component
|
||||
if (c.size() == 1) {
|
||||
ck_bview.remove_suffix(1);
|
||||
}
|
||||
size_t sz = ck_bview.size() + c.size();
|
||||
if (sz > std::numeric_limits<uint16_t>::max()) {
|
||||
throw std::runtime_error(sprint("Column name too large (%d > %d)", sz, std::numeric_limits<uint16_t>::max()));
|
||||
}
|
||||
out.prepare(uint16_t(sz));
|
||||
out.write(ck_bview, c);
|
||||
}
|
||||
|
||||
static void write_compound_non_dense_column_name(file_writer& out, const composite& clustering_key, const std::vector<bytes_view>& column_names, composite::eoc marker = composite::eoc::none) {
|
||||
auto w = file_writer_for_column_name(out);
|
||||
write_compound_non_dense_column_name(w, clustering_key, column_names, marker);
|
||||
}
|
||||
|
||||
template<typename Writer>
|
||||
static void write_column_name(Writer& out, bytes_view column_names) {
|
||||
size_t sz = column_names.size();
|
||||
if (sz > std::numeric_limits<uint16_t>::max()) {
|
||||
throw std::runtime_error(sprint("Column name too large (%d > %d)", sz, std::numeric_limits<uint16_t>::max()));
|
||||
}
|
||||
out.prepare(uint16_t(sz));
|
||||
out.write(column_names);
|
||||
}
|
||||
|
||||
static void write_column_name(file_writer& out, bytes_view column_names) {
|
||||
auto w = file_writer_for_column_name(out);
|
||||
write_column_name(w, column_names);
|
||||
}
|
||||
|
||||
template<typename Writer>
|
||||
static void write_column_name(Writer& out, const schema& s, const composite& clustering_element, const std::vector<bytes_view>& column_names, composite::eoc marker = composite::eoc::none) {
|
||||
if (s.is_dense()) {
|
||||
write_column_name(out, bytes_view(clustering_element));
|
||||
} else if (s.is_compound()) {
|
||||
write_compound_non_dense_column_name(out, clustering_element, column_names, marker);
|
||||
} else {
|
||||
write_column_name(out, column_names[0]);
|
||||
}
|
||||
}
|
||||
|
||||
void sstable::write_range_tombstone_bound(file_writer& out,
|
||||
const schema& s,
|
||||
const composite& clustering_element,
|
||||
const std::vector<bytes_view>& column_names,
|
||||
composite::eoc marker) {
|
||||
if (!_correctly_serialize_non_compound_range_tombstones && !clustering_element.is_compound()) {
|
||||
auto vals = clustering_element.values();
|
||||
write_compound_non_dense_column_name(out, composite::serialize_value(vals, true), column_names, marker);
|
||||
} else {
|
||||
write_column_name(out, s, clustering_element, column_names, marker);
|
||||
}
|
||||
}
|
||||
|
||||
static void output_promoted_index_entry(bytes_ostream& promoted_index,
|
||||
const bytes& first_col,
|
||||
const bytes& last_col,
|
||||
@@ -1450,29 +1560,6 @@ static void output_promoted_index_entry(bytes_ostream& promoted_index,
|
||||
promoted_index.write(q, 8);
|
||||
}
|
||||
|
||||
// FIXME: use this in write_column_name() instead of repeating the code
|
||||
static bytes serialize_colname(const composite& clustering_key,
|
||||
const std::vector<bytes_view>& column_names, composite::eoc marker) {
|
||||
auto c = composite::from_exploded(column_names, marker);
|
||||
auto ck_bview = bytes_view(clustering_key);
|
||||
// The marker is not a component, so if the last component is empty (IOW,
|
||||
// only serializes to the marker), then we just replace the key's last byte
|
||||
// with the marker. If the component however it is not empty, then the
|
||||
// marker should be in the end of it, and we just join them together as we
|
||||
// do for any normal component
|
||||
if (c.size() == 1) {
|
||||
ck_bview.remove_suffix(1);
|
||||
}
|
||||
size_t sz = ck_bview.size() + c.size();
|
||||
if (sz > std::numeric_limits<uint16_t>::max()) {
|
||||
throw std::runtime_error(sprint("Column name too large (%d > %d)", sz, std::numeric_limits<uint16_t>::max()));
|
||||
}
|
||||
bytes colname(bytes::initialized_later(), sz);
|
||||
std::copy(ck_bview.begin(), ck_bview.end(), colname.begin());
|
||||
std::copy(c.get_bytes().begin(), c.get_bytes().end(), colname.begin() + ck_bview.size());
|
||||
return colname;
|
||||
}
|
||||
|
||||
// Call maybe_flush_pi_block() before writing the given sstable atom to the
|
||||
// output. This may start a new promoted-index block depending on how much
|
||||
// data we've already written since the start of the current block. Starting
|
||||
@@ -1490,7 +1577,18 @@ void sstable::maybe_flush_pi_block(file_writer& out,
|
||||
const composite& clustering_key,
|
||||
const std::vector<bytes_view>& column_names,
|
||||
composite::eoc marker) {
|
||||
bytes colname = serialize_colname(clustering_key, column_names, marker);
|
||||
if (!_schema->clustering_key_size()) {
|
||||
return;
|
||||
}
|
||||
bytes_writer_for_column_name w;
|
||||
write_column_name(w, *_schema, clustering_key, column_names, marker);
|
||||
maybe_flush_pi_block(out, clustering_key, std::move(w).release());
|
||||
}
|
||||
|
||||
// Overload can only be called if the schema has clustering keys.
|
||||
void sstable::maybe_flush_pi_block(file_writer& out,
|
||||
const composite& clustering_key,
|
||||
bytes colname) {
|
||||
if (_pi_write.block_first_colname.empty()) {
|
||||
// This is the first column in the partition, or first column since we
|
||||
// closed a promoted-index block. Remember its name and position -
|
||||
@@ -1515,17 +1613,15 @@ void sstable::maybe_flush_pi_block(file_writer& out,
|
||||
// block includes them), but we set block_next_start_offset after - so
|
||||
// even if we wrote a lot of open tombstones, we still get a full
|
||||
// block size of new data.
|
||||
if (!clustering_key.empty()) {
|
||||
auto& rts = _pi_write.tombstone_accumulator->range_tombstones_for_row(
|
||||
clustering_key_prefix::from_range(clustering_key.values()));
|
||||
for (const auto& rt : rts) {
|
||||
auto start = composite::from_clustering_element(*_pi_write.schemap, rt.start);
|
||||
auto end = composite::from_clustering_element(*_pi_write.schemap, rt.end);
|
||||
write_range_tombstone(out,
|
||||
start, bound_kind_to_start_marker(rt.start_kind),
|
||||
end, bound_kind_to_end_marker(rt.end_kind),
|
||||
{}, rt.tomb);
|
||||
}
|
||||
auto& rts = _pi_write.tombstone_accumulator->range_tombstones_for_row(
|
||||
clustering_key_prefix::from_range(clustering_key.values()));
|
||||
for (const auto& rt : rts) {
|
||||
auto start = composite::from_clustering_element(*_pi_write.schemap, rt.start);
|
||||
auto end = composite::from_clustering_element(*_pi_write.schemap, rt.end);
|
||||
write_range_tombstone(out,
|
||||
start, bound_kind_to_start_marker(rt.start_kind),
|
||||
end, bound_kind_to_end_marker(rt.end_kind),
|
||||
{}, rt.tomb);
|
||||
}
|
||||
_pi_write.block_next_start_offset = out.offset() + _pi_write.desired_block_size;
|
||||
_pi_write.block_first_colname = colname;
|
||||
@@ -1537,37 +1633,6 @@ void sstable::maybe_flush_pi_block(file_writer& out,
|
||||
}
|
||||
}
|
||||
|
||||
void sstable::write_column_name(file_writer& out, const composite& clustering_key, const std::vector<bytes_view>& column_names, composite::eoc marker) {
|
||||
// was defined in the schema, for example.
|
||||
auto c = composite::from_exploded(column_names, marker);
|
||||
auto ck_bview = bytes_view(clustering_key);
|
||||
|
||||
// The marker is not a component, so if the last component is empty (IOW,
|
||||
// only serializes to the marker), then we just replace the key's last byte
|
||||
// with the marker. If the component however it is not empty, then the
|
||||
// marker should be in the end of it, and we just join them together as we
|
||||
// do for any normal component
|
||||
if (c.size() == 1) {
|
||||
ck_bview.remove_suffix(1);
|
||||
}
|
||||
size_t sz = ck_bview.size() + c.size();
|
||||
if (sz > std::numeric_limits<uint16_t>::max()) {
|
||||
throw std::runtime_error(sprint("Column name too large (%d > %d)", sz, std::numeric_limits<uint16_t>::max()));
|
||||
}
|
||||
uint16_t sz16 = sz;
|
||||
write(out, sz16, ck_bview, c);
|
||||
}
|
||||
|
||||
void sstable::write_column_name(file_writer& out, bytes_view column_names) {
|
||||
size_t sz = column_names.size();
|
||||
if (sz > std::numeric_limits<uint16_t>::max()) {
|
||||
throw std::runtime_error(sprint("Column name too large (%d > %d)", sz, std::numeric_limits<uint16_t>::max()));
|
||||
}
|
||||
uint16_t sz16 = sz;
|
||||
write(out, sz16, column_names);
|
||||
}
|
||||
|
||||
|
||||
static inline void update_cell_stats(column_stats& c_stats, uint64_t timestamp) {
|
||||
c_stats.update_min_timestamp(timestamp);
|
||||
c_stats.update_max_timestamp(timestamp);
|
||||
@@ -1653,13 +1718,12 @@ void sstable::write_cell(file_writer& out, atomic_cell_view cell, const column_d
|
||||
}
|
||||
}
|
||||
|
||||
void sstable::write_row_marker(file_writer& out, const row_marker& marker, const composite& clustering_key) {
|
||||
if (marker.is_missing()) {
|
||||
void sstable::maybe_write_row_marker(file_writer& out, const schema& schema, const row_marker& marker, const composite& clustering_key) {
|
||||
if (!schema.is_compound() || schema.is_dense() || marker.is_missing()) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Write row mark cell to the beginning of clustered row.
|
||||
write_column_name(out, clustering_key, { bytes_view() });
|
||||
index_and_write_column_name(out, clustering_key, { bytes_view() });
|
||||
uint64_t timestamp = marker.timestamp();
|
||||
uint32_t value_length = 0;
|
||||
|
||||
@@ -1695,21 +1759,25 @@ void sstable::write_deletion_time(file_writer& out, const tombstone t) {
|
||||
write(out, deletion_time, timestamp);
|
||||
}
|
||||
|
||||
void sstable::write_row_tombstone(file_writer& out, const composite& key, const row_tombstone t) {
|
||||
void sstable::index_tombstone(file_writer& out, const composite& key, range_tombstone&& rt, composite::eoc marker) {
|
||||
maybe_flush_pi_block(out, key, {}, marker);
|
||||
// Remember the range tombstone so when we need to open a new promoted
|
||||
// index block, we can figure out which ranges are still open and need
|
||||
// to be repeated in the data file. Note that apply() also drops ranges
|
||||
// already closed by rt.start, so the accumulator doesn't grow boundless.
|
||||
_pi_write.tombstone_accumulator->apply(std::move(rt));
|
||||
}
|
||||
|
||||
void sstable::maybe_write_row_tombstone(file_writer& out, const composite& key, const clustering_row& clustered_row) {
|
||||
auto t = clustered_row.tomb();
|
||||
if (!t) {
|
||||
return;
|
||||
}
|
||||
|
||||
auto write_tombstone = [&] (tombstone t, column_mask mask) {
|
||||
write_column_name(out, key, {}, composite::eoc::start);
|
||||
write(out, mask);
|
||||
write_column_name(out, key, {}, composite::eoc::end);
|
||||
write_deletion_time(out, t);
|
||||
};
|
||||
|
||||
write_tombstone(t.regular(), column_mask::range_tombstone);
|
||||
auto rt = range_tombstone(clustered_row.key(), bound_kind::incl_start, clustered_row.key(), bound_kind::incl_end, t.tomb());
|
||||
index_tombstone(out, key, std::move(rt), composite::eoc::none);
|
||||
write_range_tombstone(out, key, composite::eoc::start, key, composite::eoc::end, {}, t.regular());
|
||||
if (t.is_shadowable()) {
|
||||
write_tombstone(t.shadowable().tomb(), column_mask::shadowable);
|
||||
write_range_tombstone(out, key, composite::eoc::start, key, composite::eoc::end, {}, t.shadowable().tomb(), column_mask::shadowable);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1719,27 +1787,26 @@ void sstable::write_range_tombstone(file_writer& out,
|
||||
const composite& end,
|
||||
composite::eoc end_marker,
|
||||
std::vector<bytes_view> suffix,
|
||||
const tombstone t) {
|
||||
if (!t) {
|
||||
return;
|
||||
const tombstone t,
|
||||
column_mask mask) {
|
||||
if (!_schema->is_compound() && (start_marker == composite::eoc::end || end_marker == composite::eoc::start)) {
|
||||
throw std::logic_error(sprint("Cannot represent marker type in range tombstone for non-compound schemas"));
|
||||
}
|
||||
|
||||
write_column_name(out, start, suffix, start_marker);
|
||||
column_mask mask = column_mask::range_tombstone;
|
||||
write_range_tombstone_bound(out, *_schema, start, suffix, start_marker);
|
||||
write(out, mask);
|
||||
write_column_name(out, end, suffix, end_marker);
|
||||
write_range_tombstone_bound(out, *_schema, end, suffix, end_marker);
|
||||
write_deletion_time(out, t);
|
||||
}
|
||||
|
||||
void sstable::write_collection(file_writer& out, const composite& clustering_key, const column_definition& cdef, collection_mutation_view collection) {
|
||||
|
||||
auto t = static_pointer_cast<const collection_type_impl>(cdef.type);
|
||||
auto mview = t->deserialize_mutation_form(collection);
|
||||
const bytes& column_name = cdef.name();
|
||||
write_range_tombstone(out, clustering_key, clustering_key, { bytes_view(column_name) }, mview.tomb);
|
||||
if (mview.tomb) {
|
||||
write_range_tombstone(out, clustering_key, composite::eoc::start, clustering_key, composite::eoc::end, { column_name }, mview.tomb);
|
||||
}
|
||||
for (auto& cp: mview.cells) {
|
||||
maybe_flush_pi_block(out, clustering_key, { column_name, cp.first });
|
||||
write_column_name(out, clustering_key, { column_name, cp.first });
|
||||
index_and_write_column_name(out, clustering_key, { column_name, cp.first });
|
||||
write_cell(out, cp.second, cdef);
|
||||
}
|
||||
}
|
||||
@@ -1749,24 +1816,8 @@ void sstable::write_collection(file_writer& out, const composite& clustering_key
|
||||
void sstable::write_clustered_row(file_writer& out, const schema& schema, const clustering_row& clustered_row) {
|
||||
auto clustering_key = composite::from_clustering_element(schema, clustered_row.key());
|
||||
|
||||
if (schema.is_compound() && !schema.is_dense()) {
|
||||
maybe_flush_pi_block(out, clustering_key, { bytes_view() });
|
||||
write_row_marker(out, clustered_row.marker(), clustering_key);
|
||||
}
|
||||
// Before writing cells, range tombstone must be written if the row has any (deletable_row::t).
|
||||
if (clustered_row.tomb()) {
|
||||
maybe_flush_pi_block(out, clustering_key, {});
|
||||
write_row_tombstone(out, clustering_key, clustered_row.tomb());
|
||||
// Because we currently may break a partition to promoted-index blocks
|
||||
// in the middle of a clustered row, we also need to track the current
|
||||
// row's tombstone - not just range tombstones - which may effect the
|
||||
// beginning of a new block.
|
||||
// TODO: consider starting a new block only between rows, so the
|
||||
// following code can be dropped:
|
||||
_pi_write.tombstone_accumulator->apply(range_tombstone(
|
||||
clustered_row.key(), bound_kind::incl_start,
|
||||
clustered_row.key(), bound_kind::incl_end, clustered_row.tomb().tomb()));
|
||||
}
|
||||
maybe_write_row_marker(out, schema, clustered_row.marker(), clustering_key);
|
||||
maybe_write_row_tombstone(out, clustering_key, clustered_row);
|
||||
|
||||
if (schema.clustering_key_size()) {
|
||||
column_name_helper::min_max_components(schema, _collector.min_column_names(), _collector.max_column_names(),
|
||||
@@ -1784,30 +1835,14 @@ void sstable::write_clustered_row(file_writer& out, const schema& schema, const
|
||||
}
|
||||
assert(column_definition.is_regular());
|
||||
atomic_cell_view cell = c.as_atomic_cell();
|
||||
const bytes& column_name = column_definition.name();
|
||||
|
||||
if (schema.is_compound()) {
|
||||
if (schema.is_dense()) {
|
||||
maybe_flush_pi_block(out, composite(), { bytes_view(clustering_key) });
|
||||
write_column_name(out, bytes_view(clustering_key));
|
||||
} else {
|
||||
maybe_flush_pi_block(out, clustering_key, { bytes_view(column_name) });
|
||||
write_column_name(out, clustering_key, { bytes_view(column_name) });
|
||||
}
|
||||
} else {
|
||||
if (schema.is_dense()) {
|
||||
maybe_flush_pi_block(out, composite(), { bytes_view(clustered_row.key().get_component(schema, 0)) });
|
||||
write_column_name(out, bytes_view(clustered_row.key().get_component(schema, 0)));
|
||||
} else {
|
||||
maybe_flush_pi_block(out, composite(), { bytes_view(column_name) });
|
||||
write_column_name(out, bytes_view(column_name));
|
||||
}
|
||||
}
|
||||
std::vector<bytes_view> column_name = { column_definition.name() };
|
||||
index_and_write_column_name(out, clustering_key, column_name);
|
||||
write_cell(out, cell, column_definition);
|
||||
});
|
||||
}
|
||||
|
||||
void sstable::write_static_row(file_writer& out, const schema& schema, const row& static_row) {
|
||||
assert(schema.is_compound());
|
||||
static_row.for_each_cell([&] (column_id id, const atomic_cell_or_collection& c) {
|
||||
auto&& column_definition = schema.static_column_at(id);
|
||||
if (!column_definition.is_atomic()) {
|
||||
@@ -1817,20 +1852,28 @@ void sstable::write_static_row(file_writer& out, const schema& schema, const row
|
||||
}
|
||||
assert(column_definition.is_static());
|
||||
const auto& column_name = column_definition.name();
|
||||
if (schema.is_compound()) {
|
||||
auto sp = composite::static_prefix(schema);
|
||||
maybe_flush_pi_block(out, sp, { bytes_view(column_name) });
|
||||
write_column_name(out, sp, { bytes_view(column_name) });
|
||||
} else {
|
||||
assert(!schema.is_dense());
|
||||
maybe_flush_pi_block(out, composite(), { bytes_view(column_name) });
|
||||
write_column_name(out, bytes_view(column_name));
|
||||
}
|
||||
auto sp = composite::static_prefix(schema);
|
||||
index_and_write_column_name(out, sp, { bytes_view(column_name) });
|
||||
atomic_cell_view cell = c.as_atomic_cell();
|
||||
write_cell(out, cell, column_definition);
|
||||
});
|
||||
}
|
||||
|
||||
void sstable::index_and_write_column_name(file_writer& out,
|
||||
const composite& clustering_element,
|
||||
const std::vector<bytes_view>& column_names,
|
||||
composite::eoc marker) {
|
||||
if (_schema->clustering_key_size()) {
|
||||
bytes_writer_for_column_name w;
|
||||
write_column_name(w, *_schema, clustering_element, column_names, marker);
|
||||
auto&& colname = std::move(w).release();
|
||||
maybe_flush_pi_block(out, clustering_element, colname);
|
||||
write_column_name(out, colname);
|
||||
} else {
|
||||
write_column_name(out, *_schema, clustering_element, column_names, marker);
|
||||
}
|
||||
}
|
||||
|
||||
static void write_index_header(file_writer& out, disk_string_view<uint16_t>& key, uint64_t pos) {
|
||||
write(out, key, pos);
|
||||
}
|
||||
@@ -2026,6 +2069,7 @@ components_writer::components_writer(sstable& sst, const schema& s, file_writer&
|
||||
{
|
||||
_sst._components->filter = utils::i_filter::get_filter(estimated_partitions, _schema.bloom_filter_fp_chance());
|
||||
_sst._pi_write.desired_block_size = cfg.promoted_index_block_size.value_or(get_config().column_index_size_in_kb() * 1024);
|
||||
_sst._correctly_serialize_non_compound_range_tombstones = cfg.correctly_serialize_non_compound_range_tombstones;
|
||||
|
||||
prepare_summary(_sst._components->summary, estimated_partitions, _schema.min_index_interval());
|
||||
|
||||
@@ -2100,17 +2144,13 @@ stop_iteration components_writer::consume(clustering_row&& cr) {
|
||||
|
||||
stop_iteration components_writer::consume(range_tombstone&& rt) {
|
||||
ensure_tombstone_is_written();
|
||||
// Remember the range tombstone so when we need to open a new promoted
|
||||
// index block, we can figure out which ranges are still open and need
|
||||
// to be repeated in the data file. Note that apply() also drops ranges
|
||||
// already closed by rt.start, so the accumulator doesn't grow boundless.
|
||||
_sst._pi_write.tombstone_accumulator->apply(rt);
|
||||
auto start = composite::from_clustering_element(_schema, std::move(rt.start));
|
||||
auto start = composite::from_clustering_element(_schema, rt.start);
|
||||
auto start_marker = bound_kind_to_start_marker(rt.start_kind);
|
||||
auto end = composite::from_clustering_element(_schema, std::move(rt.end));
|
||||
auto end = composite::from_clustering_element(_schema, rt.end);
|
||||
auto end_marker = bound_kind_to_end_marker(rt.end_kind);
|
||||
_sst.maybe_flush_pi_block(_out, start, {}, start_marker);
|
||||
_sst.write_range_tombstone(_out, std::move(start), start_marker, std::move(end), end_marker, {}, rt.tomb);
|
||||
auto tomb = rt.tomb;
|
||||
_sst.index_tombstone(_out, start, std::move(rt), start_marker);
|
||||
_sst.write_range_tombstone(_out, std::move(start), start_marker, std::move(end), end_marker, {}, tomb);
|
||||
return stop_iteration::no;
|
||||
}
|
||||
|
||||
@@ -2189,12 +2229,13 @@ sstable::read_scylla_metadata(const io_priority_class& pc) {
|
||||
}
|
||||
|
||||
void
|
||||
sstable::write_scylla_metadata(const io_priority_class& pc, shard_id shard) {
|
||||
sstable::write_scylla_metadata(const io_priority_class& pc, shard_id shard, sstable_enabled_features features) {
|
||||
auto&& first_key = get_first_decorated_key();
|
||||
auto&& last_key = get_last_decorated_key();
|
||||
auto sm = create_sharding_metadata(_schema, first_key, last_key, shard);
|
||||
_components->scylla_metadata.emplace();
|
||||
_components->scylla_metadata->data.set<scylla_metadata_type::Sharding>(std::move(sm));
|
||||
_components->scylla_metadata->data.set<scylla_metadata_type::Features>(std::move(features));
|
||||
|
||||
write_simple<component_type::Scylla>(*_components->scylla_metadata, pc);
|
||||
}
|
||||
@@ -2247,6 +2288,7 @@ sstable_writer::sstable_writer(sstable& sst, const schema& s, uint64_t estimated
|
||||
, _leave_unsealed(cfg.leave_unsealed)
|
||||
, _shard(shard)
|
||||
, _monitor(cfg.monitor)
|
||||
, _correctly_serialize_non_compound_range_tombstones(cfg.correctly_serialize_non_compound_range_tombstones)
|
||||
{
|
||||
_sst.generate_toc(_schema.get_compressor_params().get_compressor(), _schema.bloom_filter_fp_chance());
|
||||
_sst.write_toc(_pc);
|
||||
@@ -2256,6 +2298,10 @@ sstable_writer::sstable_writer(sstable& sst, const schema& s, uint64_t estimated
|
||||
_components_writer.emplace(_sst, _schema, *_writer, estimated_partitions, cfg, _pc);
|
||||
}
|
||||
|
||||
static sstable_enabled_features all_features() {
|
||||
return sstable_enabled_features{(1 << sstable_feature::End) - 1};
|
||||
}
|
||||
|
||||
void sstable_writer::consume_end_of_stream()
|
||||
{
|
||||
_components_writer->consume_end_of_stream();
|
||||
@@ -2265,7 +2311,11 @@ void sstable_writer::consume_end_of_stream()
|
||||
_sst.write_filter(_pc);
|
||||
_sst.write_statistics(_pc);
|
||||
_sst.write_compression(_pc);
|
||||
_sst.write_scylla_metadata(_pc, _shard);
|
||||
auto features = all_features();
|
||||
if (!_correctly_serialize_non_compound_range_tombstones) {
|
||||
features.disable(sstable_feature::NonCompoundRangeTombstones);
|
||||
}
|
||||
_sst.write_scylla_metadata(_pc, _shard, std::move(features));
|
||||
|
||||
_monitor->on_write_completed();
|
||||
|
||||
@@ -2295,7 +2345,7 @@ sstable_writer sstable::get_writer(const schema& s, uint64_t estimated_partition
|
||||
}
|
||||
|
||||
future<> sstable::write_components(
|
||||
mutation_reader mr,
|
||||
flat_mutation_reader mr,
|
||||
uint64_t estimated_partitions,
|
||||
schema_ptr schema,
|
||||
const sstable_writer_config& cfg,
|
||||
@@ -2307,7 +2357,7 @@ future<> sstable::write_components(
|
||||
attr.scheduling_group = cfg.thread_scheduling_group;
|
||||
return seastar::async(std::move(attr), [this, mr = std::move(mr), estimated_partitions, schema = std::move(schema), cfg, &pc] () mutable {
|
||||
auto wr = get_writer(*schema, estimated_partitions, cfg, pc);
|
||||
consume_flattened_in_thread(mr, wr);
|
||||
mr.consume_in_thread(std::move(wr));
|
||||
});
|
||||
}
|
||||
|
||||
@@ -2319,19 +2369,18 @@ future<> sstable::generate_summary(const io_priority_class& pc) {
|
||||
sstlog.info("Summary file {} not found. Generating Summary...", filename(sstable::component_type::Summary));
|
||||
class summary_generator {
|
||||
summary& _summary;
|
||||
uint64_t _data_size;
|
||||
size_t _summary_byte_cost;
|
||||
uint64_t _next_data_offset_to_write_summary = 0;
|
||||
public:
|
||||
std::experimental::optional<key> first_key, last_key;
|
||||
|
||||
summary_generator(summary& s, uint64_t data_size) : _summary(s), _data_size(data_size), _summary_byte_cost(summary_byte_cost()) {}
|
||||
summary_generator(summary& s) : _summary(s), _summary_byte_cost(summary_byte_cost()) {}
|
||||
bool should_continue() {
|
||||
return true;
|
||||
}
|
||||
void consume_entry(index_entry&& ie, uint64_t index_offset) {
|
||||
auto token = dht::global_partitioner().get_token(ie.get_key());
|
||||
components_writer::maybe_add_summary_entry(_summary, token, ie.get_key_bytes(), _data_size, index_offset,
|
||||
components_writer::maybe_add_summary_entry(_summary, token, ie.get_key_bytes(), ie.position(), index_offset,
|
||||
_next_data_offset_to_write_summary, _summary_byte_cost);
|
||||
if (!first_key) {
|
||||
first_key = key(to_bytes(ie.get_key_bytes()));
|
||||
@@ -2343,9 +2392,7 @@ future<> sstable::generate_summary(const io_priority_class& pc) {
|
||||
|
||||
return open_checked_file_dma(_read_error_handler, filename(component_type::Index), open_flags::ro).then([this, &pc] (file index_file) {
|
||||
return do_with(std::move(index_file), [this, &pc] (file index_file) {
|
||||
return seastar::when_all_succeed(
|
||||
io_check([&] { return engine().file_size(this->filename(sstable::component_type::Data)); }),
|
||||
index_file.size()).then([this, &pc, index_file] (auto data_size, auto index_size) {
|
||||
return index_file.size().then([this, &pc, index_file] (auto index_size) {
|
||||
// an upper bound. Surely to be less than this.
|
||||
auto estimated_partitions = index_size / sizeof(uint64_t);
|
||||
prepare_summary(_components->summary, estimated_partitions, _schema->min_index_interval());
|
||||
@@ -2354,9 +2401,10 @@ future<> sstable::generate_summary(const io_priority_class& pc) {
|
||||
options.buffer_size = sstable_buffer_size;
|
||||
options.io_priority_class = pc;
|
||||
auto stream = make_file_input_stream(index_file, 0, index_size, std::move(options));
|
||||
return do_with(summary_generator(_components->summary, data_size),
|
||||
return do_with(summary_generator(_components->summary),
|
||||
[this, &pc, stream = std::move(stream), index_size] (summary_generator& s) mutable {
|
||||
auto ctx = make_lw_shared<index_consume_entry_context<summary_generator>>(s, std::move(stream), 0, index_size);
|
||||
auto ctx = make_lw_shared<index_consume_entry_context<summary_generator>>(
|
||||
s, trust_promoted_index::yes, std::move(stream), 0, index_size);
|
||||
return ctx->consume_input(*ctx).finally([ctx] {
|
||||
return ctx->close();
|
||||
}).then([this, ctx, &s] {
|
||||
@@ -2998,30 +3046,6 @@ future<> init_metrics() {
|
||||
});
|
||||
}
|
||||
|
||||
struct single_partition_reader_adaptor final : public mutation_reader::impl {
|
||||
sstables::shared_sstable _sst;
|
||||
schema_ptr _s;
|
||||
dht::ring_position_view _key;
|
||||
const query::partition_slice& _slice;
|
||||
const io_priority_class& _pc;
|
||||
streamed_mutation::forwarding _fwd;
|
||||
public:
|
||||
single_partition_reader_adaptor(sstables::shared_sstable sst, schema_ptr s, dht::ring_position_view key,
|
||||
const query::partition_slice& slice, const io_priority_class& pc, streamed_mutation::forwarding fwd)
|
||||
: _sst(sst), _s(s), _key(key), _slice(slice), _pc(pc), _fwd(fwd)
|
||||
{ }
|
||||
virtual future<streamed_mutation_opt> operator()() override {
|
||||
if (!_sst) {
|
||||
return make_ready_future<streamed_mutation_opt>(stdx::nullopt);
|
||||
}
|
||||
auto sst = std::move(_sst);
|
||||
return sst->read_row(_s, _key, _slice, _pc, no_resource_tracking(), _fwd);
|
||||
}
|
||||
virtual future<> fast_forward_to(const dht::partition_range& pr) override {
|
||||
throw std::bad_function_call();
|
||||
}
|
||||
};
|
||||
|
||||
mutation_source sstable::as_mutation_source() {
|
||||
return mutation_source([sst = shared_from_this()] (schema_ptr s,
|
||||
const dht::partition_range& range,
|
||||
@@ -3035,14 +3059,16 @@ mutation_source sstable::as_mutation_source() {
|
||||
// consequence, fast_forward_to() will *NOT* work on the result,
|
||||
// regardless of what the fwd_mr parameter says.
|
||||
if (range.is_singular() && range.start()->value().has_key()) {
|
||||
const dht::ring_position& pos = range.start()->value();
|
||||
return make_mutation_reader<single_partition_reader_adaptor>(sst, s, pos, slice, pc, fwd);
|
||||
return sst->read_row_flat(s, range.start()->value(), slice, pc, no_resource_tracking(), fwd);
|
||||
} else {
|
||||
return sst->read_range_rows(s, range, slice, pc, no_resource_tracking(), fwd, fwd_mr);
|
||||
return sst->read_range_rows_flat(s, range, slice, pc, no_resource_tracking(), fwd, fwd_mr);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
bool supports_correct_non_compound_range_tombstones() {
|
||||
return service::get_local_storage_service().cluster_supports_reading_correctly_serialized_range_tombstones();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -43,6 +43,7 @@
|
||||
#include "schema.hh"
|
||||
#include "mutation.hh"
|
||||
#include "utils/i_filter.hh"
|
||||
#include "utils/optimized_optional.hh"
|
||||
#include "core/stream.hh"
|
||||
#include "writer.hh"
|
||||
#include "metadata_collector.hh"
|
||||
@@ -56,6 +57,7 @@
|
||||
#include "sstables/shared_index_lists.hh"
|
||||
#include "sstables/progress_monitor.hh"
|
||||
#include "db/commitlog/replay_position.hh"
|
||||
#include "flat_mutation_reader.hh"
|
||||
|
||||
namespace seastar {
|
||||
class thread_scheduling_group;
|
||||
@@ -65,6 +67,8 @@ namespace sstables {
|
||||
|
||||
extern logging::logger sstlog;
|
||||
|
||||
class data_consume_rows_context;
|
||||
|
||||
// data_consume_context is an object returned by sstable::data_consume_rows()
|
||||
// which allows knowing when the consumer stops reading, and starting it again
|
||||
// (e.g., when the consumer wants to stop after every sstable row).
|
||||
@@ -80,25 +84,34 @@ extern logging::logger sstlog;
|
||||
// and the time the returned future is completed, the object lives on.
|
||||
// Moreover, the sstable object used for the sstable::data_consume_rows()
|
||||
// call which created this data_consume_context, must also be kept alive.
|
||||
//
|
||||
// data_consume_rows() and data_consume_rows_at_once() both can read just a
|
||||
// single row or many rows. The difference is that data_consume_rows_at_once()
|
||||
// is optimized to reading one or few rows (reading it all into memory), while
|
||||
// data_consume_rows() uses a read buffer, so not all the rows need to fit
|
||||
// memory in the same time (they are delivered to the consumer one by one).
|
||||
class data_consume_context {
|
||||
class impl;
|
||||
std::unique_ptr<impl> _pimpl;
|
||||
shared_sstable _sst;
|
||||
std::unique_ptr<data_consume_rows_context> _ctx;
|
||||
// This object can only be constructed by sstable::data_consume_rows()
|
||||
data_consume_context(std::unique_ptr<impl>);
|
||||
data_consume_context(shared_sstable,row_consumer& consumer, input_stream<char>&& input, uint64_t start, uint64_t maxlen);
|
||||
friend class sstable;
|
||||
data_consume_context();
|
||||
explicit operator bool() const noexcept;
|
||||
friend class optimized_optional<data_consume_context>;
|
||||
public:
|
||||
future<> read();
|
||||
future<> fast_forward_to(uint64_t begin, uint64_t end);
|
||||
future<> skip_to(indexable_element, uint64_t begin);
|
||||
uint64_t position() const;
|
||||
bool eof() const;
|
||||
// Define (as defaults) the destructor and move operations in the source
|
||||
// file, so here we don't need to know the incomplete impl type.
|
||||
~data_consume_context();
|
||||
data_consume_context(data_consume_context&&) noexcept;
|
||||
data_consume_context& operator=(data_consume_context&&) noexcept;
|
||||
};
|
||||
|
||||
using data_consume_context_opt = optimized_optional<data_consume_context>;
|
||||
|
||||
class key;
|
||||
class sstable_writer;
|
||||
struct foreign_sstable_open_info;
|
||||
@@ -106,6 +119,8 @@ struct sstable_open_info;
|
||||
|
||||
class index_reader;
|
||||
|
||||
bool supports_correct_non_compound_range_tombstones();
|
||||
|
||||
struct sstable_writer_config {
|
||||
std::experimental::optional<size_t> promoted_index_block_size;
|
||||
uint64_t max_sstable_size = std::numeric_limits<uint64_t>::max();
|
||||
@@ -114,6 +129,7 @@ struct sstable_writer_config {
|
||||
stdx::optional<db::replay_position> replay_position;
|
||||
seastar::thread_scheduling_group* thread_scheduling_group = nullptr;
|
||||
seastar::shared_ptr<write_monitor> monitor = default_write_monitor();
|
||||
bool correctly_serialize_non_compound_range_tombstones = supports_correct_non_compound_range_tombstones();
|
||||
};
|
||||
|
||||
static constexpr inline size_t default_sstable_buffer_size() {
|
||||
@@ -259,7 +275,7 @@ public:
|
||||
// a filter on the clustering keys which we want to read, which
|
||||
// additionally determines also if all the static columns will also be
|
||||
// returned in the result.
|
||||
future<streamed_mutation_opt> read_row(
|
||||
flat_mutation_reader read_row_flat(
|
||||
schema_ptr schema,
|
||||
dht::ring_position_view key,
|
||||
const query::partition_slice& slice,
|
||||
@@ -267,26 +283,13 @@ public:
|
||||
reader_resource_tracker resource_tracker = no_resource_tracking(),
|
||||
streamed_mutation::forwarding fwd = streamed_mutation::forwarding::no);
|
||||
|
||||
future<streamed_mutation_opt> read_row(schema_ptr schema, dht::ring_position_view key) {
|
||||
flat_mutation_reader read_row_flat(schema_ptr schema, dht::ring_position_view key) {
|
||||
auto& full_slice = schema->full_slice();
|
||||
return read_row(std::move(schema), std::move(key), full_slice);
|
||||
}
|
||||
|
||||
future<streamed_mutation_opt> read_row(
|
||||
schema_ptr schema,
|
||||
const sstables::key& key,
|
||||
const query::partition_slice& slice,
|
||||
const io_priority_class& pc = default_priority_class(),
|
||||
reader_resource_tracker resource_tracker = no_resource_tracking(),
|
||||
streamed_mutation::forwarding fwd = streamed_mutation::forwarding::no);
|
||||
|
||||
future<streamed_mutation_opt> read_row(schema_ptr schema, const sstables::key& key) {
|
||||
auto& full_slice = schema->full_slice();
|
||||
return read_row(std::move(schema), key, full_slice);
|
||||
return read_row_flat(std::move(schema), std::move(key), full_slice);
|
||||
}
|
||||
|
||||
// Returns a mutation_reader for given range of partitions
|
||||
mutation_reader read_range_rows(
|
||||
flat_mutation_reader read_range_rows_flat(
|
||||
schema_ptr schema,
|
||||
const dht::partition_range& range,
|
||||
const query::partition_slice& slice,
|
||||
@@ -295,12 +298,12 @@ public:
|
||||
streamed_mutation::forwarding fwd = streamed_mutation::forwarding::no,
|
||||
mutation_reader::forwarding fwd_mr = mutation_reader::forwarding::yes);
|
||||
|
||||
mutation_reader read_range_rows(schema_ptr schema, const dht::partition_range& range) {
|
||||
flat_mutation_reader read_range_rows_flat(schema_ptr schema, const dht::partition_range& range) {
|
||||
auto& full_slice = schema->full_slice();
|
||||
return read_range_rows(std::move(schema), range, full_slice);
|
||||
return read_range_rows_flat(std::move(schema), range, full_slice);
|
||||
}
|
||||
|
||||
// read_rows() returns each of the rows in the sstable, in sequence,
|
||||
// read_rows_flat() returns each of the rows in the sstable, in sequence,
|
||||
// converted to a "mutation" data structure.
|
||||
// This function is implemented efficiently - doing buffered, sequential
|
||||
// read of the data file (no need to access the index file).
|
||||
@@ -311,15 +314,15 @@ public:
|
||||
// The caller must ensure (e.g., using do_with()) that the context object,
|
||||
// as well as the sstable, remains alive as long as a read() is in
|
||||
// progress (i.e., returned a future which hasn't completed yet).
|
||||
mutation_reader read_rows(schema_ptr schema,
|
||||
const io_priority_class& pc = default_priority_class(),
|
||||
streamed_mutation::forwarding fwd = streamed_mutation::forwarding::no);
|
||||
flat_mutation_reader read_rows_flat(schema_ptr schema,
|
||||
const io_priority_class& pc = default_priority_class(),
|
||||
streamed_mutation::forwarding fwd = streamed_mutation::forwarding::no);
|
||||
|
||||
// Returns mutation_source containing all writes contained in this sstable.
|
||||
// The mutation_source shares ownership of this sstable.
|
||||
mutation_source as_mutation_source();
|
||||
|
||||
future<> write_components(mutation_reader mr,
|
||||
future<> write_components(flat_mutation_reader mr,
|
||||
uint64_t estimated_partitions,
|
||||
schema_ptr schema,
|
||||
const sstable_writer_config&,
|
||||
@@ -482,6 +485,10 @@ private:
|
||||
lw_shared_ptr<file_input_stream_history> _single_partition_history = make_lw_shared<file_input_stream_history>();
|
||||
lw_shared_ptr<file_input_stream_history> _partition_range_history = make_lw_shared<file_input_stream_history>();
|
||||
|
||||
//FIXME: Set by sstable_writer to influence sstable writing behavior.
|
||||
// Remove when doing #3012
|
||||
bool _correctly_serialize_non_compound_range_tombstones;
|
||||
|
||||
// _pi_write is used temporarily for building the promoted
|
||||
// index (column sample) of one partition when writing a new sstable.
|
||||
struct {
|
||||
@@ -504,6 +511,10 @@ private:
|
||||
const std::vector<bytes_view>& column_names,
|
||||
composite::eoc marker = composite::eoc::none);
|
||||
|
||||
void maybe_flush_pi_block(file_writer& out,
|
||||
const composite& clustering_key,
|
||||
bytes colname);
|
||||
|
||||
schema_ptr _schema;
|
||||
sstring _dir;
|
||||
unsigned long _generation = 0;
|
||||
@@ -537,7 +548,7 @@ private:
|
||||
void write_compression(const io_priority_class& pc);
|
||||
|
||||
future<> read_scylla_metadata(const io_priority_class& pc);
|
||||
void write_scylla_metadata(const io_priority_class& pc, shard_id shard = engine().cpu_id());
|
||||
void write_scylla_metadata(const io_priority_class& pc, shard_id shard, sstable_enabled_features features);
|
||||
|
||||
future<> read_filter(const io_priority_class& pc);
|
||||
|
||||
@@ -599,20 +610,23 @@ private:
|
||||
bool filter_has_key(const schema& s, const dht::decorated_key& dk) { return filter_has_key(key::from_partition_key(s, dk._key)); }
|
||||
|
||||
// NOTE: functions used to generate sstable components.
|
||||
void write_row_marker(file_writer& out, const row_marker& marker, const composite& clustering_key);
|
||||
void maybe_write_row_marker(file_writer& out, const schema& schema, const row_marker& marker, const composite& clustering_key);
|
||||
void write_clustered_row(file_writer& out, const schema& schema, const clustering_row& clustered_row);
|
||||
void write_static_row(file_writer& out, const schema& schema, const row& static_row);
|
||||
void write_cell(file_writer& out, atomic_cell_view cell, const column_definition& cdef);
|
||||
void write_column_name(file_writer& out, const composite& clustering_key, const std::vector<bytes_view>& column_names, composite::eoc marker = composite::eoc::none);
|
||||
void write_column_name(file_writer& out, bytes_view column_names);
|
||||
void write_range_tombstone(file_writer& out, const composite& start, composite::eoc start_marker, const composite& end, composite::eoc end_marker, std::vector<bytes_view> suffix, const tombstone t);
|
||||
void write_range_tombstone(file_writer& out, const composite& start, const composite& end, std::vector<bytes_view> suffix, const tombstone t) {
|
||||
write_range_tombstone(out, start, composite::eoc::start, end, composite::eoc::end, std::move(suffix), std::move(t));
|
||||
}
|
||||
void write_range_tombstone(file_writer& out, const composite& start, composite::eoc start_marker, const composite& end, composite::eoc end_marker,
|
||||
std::vector<bytes_view> suffix, const tombstone t, const column_mask = column_mask::range_tombstone);
|
||||
void write_range_tombstone_bound(file_writer& out, const schema& s, const composite& clustering_element, const std::vector<bytes_view>& column_names, composite::eoc marker = composite::eoc::none);
|
||||
void index_tombstone(file_writer& out, const composite& key, range_tombstone&& rt, composite::eoc marker);
|
||||
void write_collection(file_writer& out, const composite& clustering_key, const column_definition& cdef, collection_mutation_view collection);
|
||||
void write_row_tombstone(file_writer& out, const composite& key, const row_tombstone t);
|
||||
void maybe_write_row_tombstone(file_writer& out, const composite& key, const clustering_row& clustered_row);
|
||||
void write_deletion_time(file_writer& out, const tombstone t);
|
||||
|
||||
void index_and_write_column_name(file_writer& out,
|
||||
const composite& clustering,
|
||||
const std::vector<bytes_view>& column_names,
|
||||
composite::eoc marker = composite::eoc::none);
|
||||
|
||||
stdx::optional<std::pair<uint64_t, uint64_t>> get_sample_indexes_for_range(const dht::token_range& range);
|
||||
|
||||
std::vector<unsigned> compute_shards_for_this_sstable() const;
|
||||
@@ -625,6 +639,14 @@ public:
|
||||
return has_component(component_type::Scylla);
|
||||
}
|
||||
|
||||
bool has_correct_promoted_index_entries() const {
|
||||
return _schema->is_compound() || !has_scylla_component() || _components->scylla_metadata->has_feature(sstable_feature::NonCompoundPIEntries);
|
||||
}
|
||||
|
||||
bool has_correct_non_compound_range_tombstones() const {
|
||||
return _schema->is_compound() || !has_scylla_component() || _components->scylla_metadata->has_feature(sstable_feature::NonCompoundRangeTombstones);
|
||||
}
|
||||
|
||||
bool filter_has_key(const key& key) {
|
||||
return _components->filter->is_present(bytes_view(key));
|
||||
}
|
||||
@@ -639,6 +661,8 @@ public:
|
||||
|
||||
static utils::hashed_key make_hashed_key(const schema& s, const partition_key& key);
|
||||
|
||||
filter_tracker& get_filter_tracker() { return _filter_tracker; }
|
||||
|
||||
uint64_t filter_get_false_positive() {
|
||||
return _filter_tracker.false_positive;
|
||||
}
|
||||
@@ -835,6 +859,7 @@ class sstable_writer {
|
||||
stdx::optional<components_writer> _components_writer;
|
||||
shard_id _shard; // Specifies which shard new sstable will belong to.
|
||||
seastar::shared_ptr<write_monitor> _monitor;
|
||||
bool _correctly_serialize_non_compound_range_tombstones;
|
||||
private:
|
||||
void prepare_file_writer();
|
||||
void finish_file_writer();
|
||||
@@ -844,7 +869,8 @@ public:
|
||||
~sstable_writer();
|
||||
sstable_writer(sstable_writer&& o) : _sst(o._sst), _schema(o._schema), _pc(o._pc), _backup(o._backup),
|
||||
_leave_unsealed(o._leave_unsealed), _compression_enabled(o._compression_enabled), _writer(std::move(o._writer)),
|
||||
_components_writer(std::move(o._components_writer)), _shard(o._shard), _monitor(std::move(o._monitor)) {}
|
||||
_components_writer(std::move(o._components_writer)), _shard(o._shard), _monitor(std::move(o._monitor)),
|
||||
_correctly_serialize_non_compound_range_tombstones(o._correctly_serialize_non_compound_range_tombstones) { }
|
||||
void consume_new_partition(const dht::decorated_key& dk) { return _components_writer->consume_new_partition(dk); }
|
||||
void consume(tombstone t) { _components_writer->consume(t); }
|
||||
stop_iteration consume(static_row&& sr) { return _components_writer->consume(std::move(sr)); }
|
||||
|
||||
@@ -362,6 +362,28 @@ struct sharding_metadata {
|
||||
auto describe_type(Describer f) { return f(token_ranges); }
|
||||
};
|
||||
|
||||
// Scylla-specific list of features an sstable supports.
|
||||
enum sstable_feature : uint8_t {
|
||||
NonCompoundPIEntries = 0, // See #2993
|
||||
NonCompoundRangeTombstones = 1, // See #2986
|
||||
End = 2
|
||||
};
|
||||
|
||||
// Scylla-specific features enabled for a particular sstable.
|
||||
struct sstable_enabled_features {
|
||||
uint64_t enabled_features;
|
||||
|
||||
bool is_enabled(sstable_feature f) const {
|
||||
return enabled_features & (1 << f);
|
||||
}
|
||||
|
||||
void disable(sstable_feature f) {
|
||||
enabled_features &= ~(1<< f);
|
||||
}
|
||||
|
||||
template <typename Describer>
|
||||
auto describe_type(Describer f) { return f(enabled_features); }
|
||||
};
|
||||
|
||||
// Numbers are found on disk, so they do matter. Also, setting their sizes of
|
||||
// that of an uint32_t is a bit wasteful, but it simplifies the code a lot
|
||||
@@ -373,16 +395,22 @@ enum class metadata_type : uint32_t {
|
||||
Stats = 2,
|
||||
};
|
||||
|
||||
|
||||
enum class scylla_metadata_type : uint32_t {
|
||||
Sharding = 1,
|
||||
Features = 2,
|
||||
};
|
||||
|
||||
struct scylla_metadata {
|
||||
disk_set_of_tagged_union<scylla_metadata_type,
|
||||
disk_tagged_union_member<scylla_metadata_type, scylla_metadata_type::Sharding, sharding_metadata>
|
||||
disk_tagged_union_member<scylla_metadata_type, scylla_metadata_type::Sharding, sharding_metadata>,
|
||||
disk_tagged_union_member<scylla_metadata_type, scylla_metadata_type::Features, sstable_enabled_features>
|
||||
> data;
|
||||
|
||||
bool has_feature(sstable_feature f) const {
|
||||
auto features = data.get<scylla_metadata_type::Features, sstable_enabled_features>();
|
||||
return features && features->is_enabled(f);
|
||||
}
|
||||
|
||||
template <typename Describer>
|
||||
auto describe_type(Describer f) { return f(data); }
|
||||
};
|
||||
|
||||
@@ -139,6 +139,10 @@ void mutation_fragment::apply(const schema& s, mutation_fragment&& mf)
|
||||
assert(!is_range_tombstone());
|
||||
_data->_size_in_bytes = stdx::nullopt;
|
||||
switch (_kind) {
|
||||
case mutation_fragment::kind::partition_start:
|
||||
_data->_partition_start.partition_tombstone().apply(mf._data->_partition_start.partition_tombstone());
|
||||
mf._data->_partition_start.~partition_start();
|
||||
break;
|
||||
case kind::static_row:
|
||||
_data->_static_row.apply(s, std::move(mf._data->_static_row));
|
||||
mf._data->_static_row.~static_row();
|
||||
@@ -147,6 +151,10 @@ void mutation_fragment::apply(const schema& s, mutation_fragment&& mf)
|
||||
_data->_clustering_row.apply(s, std::move(mf._data->_clustering_row));
|
||||
mf._data->_clustering_row.~clustering_row();
|
||||
break;
|
||||
case mutation_fragment::kind::partition_end:
|
||||
// Nothing to do for this guy.
|
||||
mf._data->_partition_end.~partition_end();
|
||||
break;
|
||||
default: abort();
|
||||
}
|
||||
mf._data.reset();
|
||||
@@ -602,79 +610,6 @@ void range_tombstone_stream::reset() {
|
||||
_list.clear();
|
||||
}
|
||||
|
||||
streamed_mutation reverse_streamed_mutation(streamed_mutation sm) {
|
||||
class reversing_steamed_mutation final : public streamed_mutation::impl {
|
||||
streamed_mutation_opt _source;
|
||||
mutation_fragment_opt _static_row;
|
||||
std::stack<mutation_fragment> _mutation_fragments;
|
||||
private:
|
||||
future<> consume_source() {
|
||||
return repeat([&] {
|
||||
return (*_source)().then([&] (mutation_fragment_opt mf) {
|
||||
if (!mf) {
|
||||
return stop_iteration::yes;
|
||||
} else if (mf->is_static_row()) {
|
||||
_static_row = std::move(mf);
|
||||
} else {
|
||||
if (mf->is_range_tombstone()) {
|
||||
mf->as_mutable_range_tombstone().flip();
|
||||
}
|
||||
_mutation_fragments.emplace(std::move(*mf));
|
||||
}
|
||||
return stop_iteration::no;
|
||||
});
|
||||
}).then([&] {
|
||||
_source = { };
|
||||
});
|
||||
}
|
||||
public:
|
||||
explicit reversing_steamed_mutation(streamed_mutation sm)
|
||||
: streamed_mutation::impl(sm.schema(), sm.decorated_key(), sm.partition_tombstone())
|
||||
, _source(std::move(sm))
|
||||
{ }
|
||||
|
||||
virtual future<> fill_buffer() override {
|
||||
if (_source) {
|
||||
return consume_source().then([this] { return fill_buffer(); });
|
||||
}
|
||||
if (_static_row) {
|
||||
push_mutation_fragment(std::move(*_static_row));
|
||||
_static_row = { };
|
||||
}
|
||||
while (!is_end_of_stream() && !is_buffer_full()) {
|
||||
if (_mutation_fragments.empty()) {
|
||||
_end_of_stream = true;
|
||||
} else {
|
||||
push_mutation_fragment(std::move(_mutation_fragments.top()));
|
||||
_mutation_fragments.pop();
|
||||
}
|
||||
}
|
||||
return make_ready_future<>();
|
||||
}
|
||||
};
|
||||
|
||||
return make_streamed_mutation<reversing_steamed_mutation>(std::move(sm));
|
||||
}
|
||||
|
||||
streamed_mutation streamed_mutation_returning(schema_ptr s, dht::decorated_key key, std::vector<mutation_fragment> frags, tombstone t) {
|
||||
class reader : public streamed_mutation::impl {
|
||||
public:
|
||||
explicit reader(schema_ptr s, dht::decorated_key key, std::vector<mutation_fragment> frags, tombstone t)
|
||||
: streamed_mutation::impl(std::move(s), std::move(key), t)
|
||||
{
|
||||
for (auto&& f : frags) {
|
||||
push_mutation_fragment(std::move(f));
|
||||
}
|
||||
_end_of_stream = true;
|
||||
}
|
||||
|
||||
virtual future<> fill_buffer() override {
|
||||
return make_ready_future<>();
|
||||
}
|
||||
};
|
||||
return make_streamed_mutation<reader>(std::move(s), std::move(key), std::move(frags), t);
|
||||
}
|
||||
|
||||
position_range position_range::from_range(const query::clustering_range& range) {
|
||||
auto bv_range = bound_view::from_range(range);
|
||||
return {
|
||||
|
||||
@@ -174,7 +174,8 @@ public:
|
||||
|
||||
dht::decorated_key& key() { return _key; }
|
||||
const dht::decorated_key& key() const { return _key; }
|
||||
tombstone partition_tombstone() const { return _partition_tombstone; }
|
||||
const tombstone& partition_tombstone() const { return _partition_tombstone; }
|
||||
tombstone& partition_tombstone() { return _partition_tombstone; }
|
||||
|
||||
position_in_partition_view position() const;
|
||||
|
||||
@@ -712,12 +713,10 @@ auto consume(streamed_mutation& m, Consumer consumer) {
|
||||
class mutation;
|
||||
|
||||
streamed_mutation streamed_mutation_from_mutation(mutation, streamed_mutation::forwarding fwd = streamed_mutation::forwarding::no);
|
||||
streamed_mutation streamed_mutation_returning(schema_ptr, dht::decorated_key, std::vector<mutation_fragment>, tombstone t = {});
|
||||
streamed_mutation streamed_mutation_from_forwarding_streamed_mutation(streamed_mutation&&);
|
||||
|
||||
//Requires all streamed_mutations to have the same schema.
|
||||
streamed_mutation merge_mutations(std::vector<streamed_mutation>);
|
||||
streamed_mutation reverse_streamed_mutation(streamed_mutation);
|
||||
|
||||
streamed_mutation make_empty_streamed_mutation(schema_ptr, dht::decorated_key, streamed_mutation::forwarding fwd = streamed_mutation::forwarding::no);
|
||||
|
||||
|
||||
@@ -358,6 +358,13 @@ void stream_session::transfer_task_completed(UUID cf_id) {
|
||||
maybe_completed();
|
||||
}
|
||||
|
||||
void stream_session::transfer_task_completed_all() {
|
||||
_transfers.clear();
|
||||
sslog.debug("[Stream #{}] transfer task_completed: all done, stream_receive_task.size={} stream_transfer_task.size={}",
|
||||
plan_id(), _receivers.size(), _transfers.size());
|
||||
maybe_completed();
|
||||
}
|
||||
|
||||
void stream_session::send_failed_complete_message() {
|
||||
auto plan_id = this->plan_id();
|
||||
if (_received_failed_complete_message) {
|
||||
@@ -401,11 +408,15 @@ void stream_session::start_streaming_files() {
|
||||
if (!_transfers.empty()) {
|
||||
set_state(stream_session_state::STREAMING);
|
||||
}
|
||||
for (auto it = _transfers.begin(); it != _transfers.end();) {
|
||||
stream_transfer_task& task = it->second;
|
||||
it++;
|
||||
task.start();
|
||||
}
|
||||
do_for_each(_transfers.begin(), _transfers.end(), [this] (auto& item) {
|
||||
sslog.debug("[Stream #{}] Start to send cf_id={}", plan_id(), item.first);
|
||||
return item.second.execute();
|
||||
}).then([this] {
|
||||
this->transfer_task_completed_all();
|
||||
}).handle_exception([this] (auto ep) {
|
||||
sslog.warn("[Stream #{}] Failed to send: {}", plan_id(), ep);
|
||||
this->on_error();
|
||||
});
|
||||
}
|
||||
|
||||
std::vector<column_family*> stream_session::get_column_family_stores(const sstring& keyspace, const std::vector<sstring>& column_families) {
|
||||
|
||||
@@ -332,6 +332,7 @@ public:
|
||||
|
||||
void receive_task_completed(UUID cf_id);
|
||||
void transfer_task_completed(UUID cf_id);
|
||||
void transfer_task_completed_all();
|
||||
private:
|
||||
void send_failed_complete_message();
|
||||
bool maybe_completed();
|
||||
|
||||
@@ -132,7 +132,7 @@ future<> send_mutations(lw_shared_ptr<send_info> si) {
|
||||
});
|
||||
}
|
||||
|
||||
void stream_transfer_task::start() {
|
||||
future<> stream_transfer_task::execute() {
|
||||
auto plan_id = session->plan_id();
|
||||
auto cf_id = this->cf_id;
|
||||
auto dst_cpu_id = session->dst_cpu_id;
|
||||
@@ -141,7 +141,7 @@ void stream_transfer_task::start() {
|
||||
sslog.debug("[Stream #{}] stream_transfer_task: cf_id={}", plan_id, cf_id);
|
||||
sort_and_merge_ranges();
|
||||
_shard_ranges = dht::split_ranges_to_shards(_ranges, *schema);
|
||||
parallel_for_each(_shard_ranges, [this, dst_cpu_id, plan_id, cf_id, id] (auto& item) {
|
||||
return parallel_for_each(_shard_ranges, [this, dst_cpu_id, plan_id, cf_id, id] (auto& item) {
|
||||
auto& shard = item.first;
|
||||
auto& prs = item.second;
|
||||
return session->get_db().invoke_on(shard, [plan_id, cf_id, id, dst_cpu_id, prs = std::move(prs)] (database& db) mutable {
|
||||
@@ -158,10 +158,9 @@ void stream_transfer_task::start() {
|
||||
}).then([this, id, plan_id, cf_id] {
|
||||
sslog.debug("[Stream #{}] GOT STREAM_MUTATION_DONE Reply from {}", plan_id, id.addr);
|
||||
session->start_keep_alive_timer();
|
||||
session->transfer_task_completed(cf_id);
|
||||
}).handle_exception([this, plan_id, id] (auto ep){
|
||||
sslog.warn("[Stream #{}] stream_transfer_task: Fail to send to {}: {}", plan_id, id, ep);
|
||||
this->session->on_error();
|
||||
std::rethrow_exception(ep);
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@@ -77,7 +77,7 @@ public:
|
||||
return _total_size;
|
||||
}
|
||||
|
||||
void start();
|
||||
future<> execute();
|
||||
|
||||
void append_ranges(const dht::token_range_vector& ranges);
|
||||
void sort_and_merge_ranges();
|
||||
|
||||
4
test.py
4
test.py
@@ -44,6 +44,7 @@ boost_tests = [
|
||||
'schema_change_test',
|
||||
'sstable_mutation_test',
|
||||
'sstable_atomic_deletion_test',
|
||||
'sstable_resharding_test',
|
||||
'commitlog_test',
|
||||
'hash_test',
|
||||
'test-serialization',
|
||||
@@ -57,7 +58,7 @@ boost_tests = [
|
||||
'canonical_mutation_test',
|
||||
'gossiping_property_file_snitch_test',
|
||||
'row_cache_test',
|
||||
'cache_streamed_mutation_test',
|
||||
'cache_flat_mutation_reader_test',
|
||||
'network_topology_strategy_test',
|
||||
'query_processor_test',
|
||||
'batchlog_manager_test',
|
||||
@@ -140,7 +141,6 @@ if __name__ == "__main__":
|
||||
help='Verbose reporting')
|
||||
args = parser.parse_args()
|
||||
|
||||
black_hole = open('/dev/null', 'w')
|
||||
print_status = print_status_verbose if args.verbose else print_status_short
|
||||
|
||||
test_to_run = []
|
||||
|
||||
@@ -31,7 +31,6 @@
|
||||
#include "partition_version.hh"
|
||||
#include "mutation.hh"
|
||||
#include "memtable.hh"
|
||||
#include "cache_streamed_mutation.hh"
|
||||
#include "row_cache.hh"
|
||||
|
||||
#include "disk-error-handler.hh"
|
||||
@@ -291,7 +291,7 @@ BOOST_AUTO_TEST_CASE(test_composite_serialize_value) {
|
||||
|
||||
BOOST_AUTO_TEST_CASE(test_composite_from_exploded) {
|
||||
using components = std::vector<composite::component>;
|
||||
BOOST_REQUIRE_EQUAL(composite::from_exploded({bytes_view(bytes({'e', 'l', '1'}))}, composite::eoc::start).components(),
|
||||
BOOST_REQUIRE_EQUAL(composite::from_exploded({bytes_view(bytes({'e', 'l', '1'}))}, true, composite::eoc::start).components(),
|
||||
components({std::make_pair(bytes("el1"), composite::eoc::start)}));
|
||||
}
|
||||
|
||||
|
||||
@@ -753,6 +753,40 @@ SEASTAR_TEST_CASE(test_range_deletion_scenarios) {
|
||||
});
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_range_deletion_scenarios_with_compact_storage) {
|
||||
return do_with_cql_env_thread([] (auto& e) {
|
||||
e.execute_cql("create table cf (p int, c int, v text, primary key (p, c)) with compact storage;").get();
|
||||
for (auto i = 0; i < 10; ++i) {
|
||||
e.execute_cql(sprint("insert into cf (p, c, v) values (1, %d, 'abc');", i)).get();
|
||||
}
|
||||
|
||||
try {
|
||||
e.execute_cql("delete from cf where p = 1 and c <= 3").get();
|
||||
BOOST_FAIL("should've thrown");
|
||||
} catch (...) { }
|
||||
try {
|
||||
e.execute_cql("delete from cf where p = 1 and c >= 0").get();
|
||||
BOOST_FAIL("should've thrown");
|
||||
} catch (...) { }
|
||||
try {
|
||||
e.execute_cql("delete from cf where p = 1 and c > 0 and c <= 3").get();
|
||||
BOOST_FAIL("should've thrown");
|
||||
} catch (...) { }
|
||||
try {
|
||||
e.execute_cql("delete from cf where p = 1 and c >= 0 and c < 3").get();
|
||||
BOOST_FAIL("should've thrown");
|
||||
} catch (...) { }
|
||||
try {
|
||||
e.execute_cql("delete from cf where p = 1 and c > 0 and c < 3").get();
|
||||
BOOST_FAIL("should've thrown");
|
||||
} catch (...) { }
|
||||
try {
|
||||
e.execute_cql("delete from cf where p = 1 and c >= 0 and c <= 3").get();
|
||||
BOOST_FAIL("should've thrown");
|
||||
} catch (...) { }
|
||||
});
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_map_insert_update) {
|
||||
return do_with_cql_env([] (auto& e) {
|
||||
auto make_my_map_type = [] { return map_type_impl::get_instance(int32_type, int32_type, true); };
|
||||
@@ -2304,6 +2338,38 @@ SEASTAR_TEST_CASE(test_reversed_slice_with_empty_range_before_all_rows) {
|
||||
});
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_query_with_range_tombstones) {
|
||||
return do_with_cql_env([] (auto& e) {
|
||||
return seastar::async([&e] {
|
||||
e.execute_cql("CREATE TABLE test (pk int, ck int, v int, PRIMARY KEY (pk, ck));").get();
|
||||
|
||||
e.execute_cql("INSERT INTO test (pk, ck, v) VALUES (0, 0, 0);").get();
|
||||
e.execute_cql("INSERT INTO test (pk, ck, v) VALUES (0, 2, 2);").get();
|
||||
e.execute_cql("INSERT INTO test (pk, ck, v) VALUES (0, 4, 4);").get();
|
||||
e.execute_cql("INSERT INTO test (pk, ck, v) VALUES (0, 5, 5);").get();
|
||||
e.execute_cql("INSERT INTO test (pk, ck, v) VALUES (0, 6, 6);").get();
|
||||
|
||||
e.execute_cql("DELETE FROM test WHERE pk = 0 AND ck >= 1 AND ck <= 3;").get();
|
||||
e.execute_cql("DELETE FROM test WHERE pk = 0 AND ck > 4 AND ck <= 8;").get();
|
||||
e.execute_cql("DELETE FROM test WHERE pk = 0 AND ck > 0 AND ck <= 1;").get();
|
||||
|
||||
assert_that(e.execute_cql("SELECT v FROM test WHERE pk = 0 ORDER BY ck DESC;").get0())
|
||||
.is_rows()
|
||||
.with_rows({
|
||||
{ int32_type->decompose(4) },
|
||||
{ int32_type->decompose(0) },
|
||||
});
|
||||
|
||||
assert_that(e.execute_cql("SELECT v FROM test WHERE pk = 0;").get0())
|
||||
.is_rows()
|
||||
.with_rows({
|
||||
{ int32_type->decompose(0) },
|
||||
{ int32_type->decompose(4) },
|
||||
});
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_alter_table_validation) {
|
||||
return do_with_cql_env([] (auto& e) {
|
||||
return e.execute_cql("create table tatv (p1 int, c1 int, c2 int, r1 int, r2 set<int>, PRIMARY KEY (p1, c1, c2));").discard_result().then_wrapped([&e] (auto f) {
|
||||
|
||||
@@ -27,6 +27,7 @@
|
||||
// Intended to be called in a seastar thread
|
||||
class flat_reader_assertions {
|
||||
flat_mutation_reader _reader;
|
||||
dht::partition_range _pr;
|
||||
private:
|
||||
mutation_fragment_opt read_next() {
|
||||
return _reader().get0();
|
||||
@@ -36,7 +37,8 @@ public:
|
||||
: _reader(std::move(reader))
|
||||
{ }
|
||||
|
||||
flat_reader_assertions& produces_partition_start(const dht::decorated_key& dk) {
|
||||
flat_reader_assertions& produces_partition_start(const dht::decorated_key& dk,
|
||||
stdx::optional<tombstone> tomb = stdx::nullopt) {
|
||||
BOOST_TEST_MESSAGE(sprint("Expecting partition start with key %s", dk));
|
||||
auto mfopt = read_next();
|
||||
if (!mfopt) {
|
||||
@@ -48,6 +50,9 @@ public:
|
||||
if (!mfopt->as_partition_start().key().equal(*_reader.schema(), dk)) {
|
||||
BOOST_FAIL(sprint("Expected: partition start with key %s, got: %s", dk, *mfopt));
|
||||
}
|
||||
if (tomb && mfopt->as_partition_start().partition_tombstone() != *tomb) {
|
||||
BOOST_FAIL(sprint("Expected: partition start with tombstone %s, got: %s", *tomb, *mfopt));
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
@@ -100,8 +105,91 @@ public:
|
||||
return *this;
|
||||
}
|
||||
|
||||
flat_reader_assertions& produces(mutation_fragment::kind k, std::vector<int> ck_elements) {
|
||||
std::vector<bytes> ck_bytes;
|
||||
for (auto&& e : ck_elements) {
|
||||
ck_bytes.emplace_back(int32_type->decompose(e));
|
||||
}
|
||||
auto ck = clustering_key_prefix::from_exploded(*_reader.schema(), std::move(ck_bytes));
|
||||
|
||||
auto mfopt = read_next();
|
||||
if (!mfopt) {
|
||||
BOOST_FAIL(sprint("Expected mutation fragment %s, got end of stream", ck));
|
||||
}
|
||||
if (mfopt->mutation_fragment_kind() != k) {
|
||||
BOOST_FAIL(sprint("Expected mutation fragment kind %s, got: %s", k, mfopt->mutation_fragment_kind()));
|
||||
}
|
||||
clustering_key::equality ck_eq(*_reader.schema());
|
||||
if (!ck_eq(mfopt->key(), ck)) {
|
||||
BOOST_FAIL(sprint("Expected key %s, got: %s", ck, mfopt->key()));
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
flat_reader_assertions& produces_partition(const mutation& m) {
|
||||
return produces(m);
|
||||
}
|
||||
|
||||
flat_reader_assertions& produces(const mutation& m) {
|
||||
auto mo = read_mutation_from_flat_mutation_reader(_reader).get0();
|
||||
BOOST_REQUIRE(bool(mo));
|
||||
memory::disable_failure_guard dfg;
|
||||
assert_that(*mo).is_equal_to(m);
|
||||
return *this;
|
||||
}
|
||||
|
||||
flat_reader_assertions& produces(const dht::decorated_key& dk) {
|
||||
produces_partition_start(dk);
|
||||
next_partition();
|
||||
return *this;
|
||||
}
|
||||
|
||||
template<typename Range>
|
||||
flat_reader_assertions& produces(const Range& range) {
|
||||
for (auto&& m : range) {
|
||||
produces(m);
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
void has_monotonic_positions() {
|
||||
position_in_partition::less_compare less(*_reader.schema());
|
||||
mutation_fragment_opt previous_fragment;
|
||||
mutation_fragment_opt previous_partition;
|
||||
bool inside_partition = false;
|
||||
for (;;) {
|
||||
auto mfo = read_next();
|
||||
if (!mfo) {
|
||||
break;
|
||||
}
|
||||
if (mfo->is_partition_start()) {
|
||||
BOOST_REQUIRE(!inside_partition);
|
||||
auto& dk = mfo->as_partition_start().key();
|
||||
if (previous_partition && !previous_partition->as_partition_start().key().less_compare(*_reader.schema(), dk)) {
|
||||
BOOST_FAIL(sprint("previous partition had greater key: prev=%s, current=%s", *previous_partition, *mfo));
|
||||
}
|
||||
previous_partition = std::move(mfo);
|
||||
previous_fragment = stdx::nullopt;
|
||||
inside_partition = true;
|
||||
} else if (mfo->is_end_of_partition()) {
|
||||
BOOST_REQUIRE(inside_partition);
|
||||
inside_partition = false;
|
||||
} else {
|
||||
BOOST_REQUIRE(inside_partition);
|
||||
if (previous_fragment) {
|
||||
if (!less(previous_fragment->position(), mfo->position())) {
|
||||
BOOST_FAIL(sprint("previous fragment has greater position: prev=%s, current=%s", *previous_fragment, *mfo));
|
||||
}
|
||||
}
|
||||
previous_fragment = std::move(mfo);
|
||||
}
|
||||
}
|
||||
BOOST_REQUIRE(!inside_partition);
|
||||
}
|
||||
|
||||
flat_reader_assertions& fast_forward_to(const dht::partition_range& pr) {
|
||||
_reader.fast_forward_to(pr);
|
||||
_pr = pr;
|
||||
_reader.fast_forward_to(_pr).get();
|
||||
return *this;
|
||||
}
|
||||
|
||||
@@ -109,6 +197,25 @@ public:
|
||||
_reader.next_partition();
|
||||
return *this;
|
||||
}
|
||||
|
||||
flat_reader_assertions& fast_forward_to(position_range pr) {
|
||||
_reader.fast_forward_to(std::move(pr)).get();
|
||||
return *this;
|
||||
}
|
||||
|
||||
mutation_assertion next_mutation() {
|
||||
auto mo = read_mutation_from_flat_mutation_reader(_reader).get0();
|
||||
BOOST_REQUIRE(bool(mo));
|
||||
return mutation_assertion(std::move(*mo));
|
||||
}
|
||||
|
||||
future<> fill_buffer() {
|
||||
return _reader.fill_buffer();
|
||||
}
|
||||
|
||||
void set_max_buffer_size(size_t size) {
|
||||
_reader.set_max_buffer_size(size);
|
||||
}
|
||||
};
|
||||
|
||||
inline
|
||||
|
||||
@@ -96,14 +96,13 @@ static void test_conversion_to_flat_mutation_reader_through_mutation_reader(cons
|
||||
|
||||
static void test_conversion(const std::vector<mutation>& mutations) {
|
||||
BOOST_REQUIRE(!mutations.empty());
|
||||
auto schema = mutations[0].schema();
|
||||
auto flat_reader = flat_mutation_reader_from_mutations(std::vector<mutation>(mutations), streamed_mutation::forwarding::no);
|
||||
for (auto& m : mutations) {
|
||||
mutation_opt m2 = read_mutation_from_flat_mutation_reader(schema, flat_reader).get0();
|
||||
mutation_opt m2 = read_mutation_from_flat_mutation_reader(flat_reader).get0();
|
||||
BOOST_REQUIRE(m2);
|
||||
BOOST_REQUIRE_EQUAL(m, *m2);
|
||||
}
|
||||
BOOST_REQUIRE(!read_mutation_from_flat_mutation_reader(schema, flat_reader).get0());
|
||||
BOOST_REQUIRE(!read_mutation_from_flat_mutation_reader(flat_reader).get0());
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -453,12 +452,20 @@ SEASTAR_TEST_CASE(test_multi_range_reader) {
|
||||
auto keys = s.make_pkeys(10);
|
||||
auto ring = s.to_ring_positions(keys);
|
||||
|
||||
auto ms = boost::copy_range<std::vector<mutation>>(keys | boost::adaptors::transformed([&s] (auto& key) {
|
||||
return mutation(key, s.schema());
|
||||
auto crs = boost::copy_range<std::vector<mutation_fragment>>(boost::irange(0, 3) | boost::adaptors::transformed([&] (auto n) {
|
||||
return s.make_row(s.make_ckey(n), "value");
|
||||
}));
|
||||
|
||||
auto ms = boost::copy_range<std::vector<mutation>>(keys | boost::adaptors::transformed([&] (auto& key) {
|
||||
auto m = mutation(key, s.schema());
|
||||
for (auto& mf : crs) {
|
||||
m.apply(mf);
|
||||
}
|
||||
return m;
|
||||
}));
|
||||
|
||||
auto source = mutation_source([&] (schema_ptr, const dht::partition_range& range) {
|
||||
return make_reader_returning_many(std::move(ms), range);
|
||||
return make_reader_returning_many(ms, range);
|
||||
});
|
||||
|
||||
auto ranges = dht::partition_range_vector {
|
||||
@@ -468,18 +475,188 @@ SEASTAR_TEST_CASE(test_multi_range_reader) {
|
||||
};
|
||||
auto fft_range = dht::partition_range::make_starting_with(ring[9]);
|
||||
|
||||
assert_that(make_flat_multi_range_reader(s.schema(), std::move(source), ranges, s.schema()->full_slice()))
|
||||
.produces_partition_start(keys[1])
|
||||
.produces_partition_end()
|
||||
.produces_partition_start(keys[2])
|
||||
.produces_partition_end()
|
||||
.produces_partition_start(keys[4])
|
||||
.produces_partition_end()
|
||||
.produces_partition_start(keys[6])
|
||||
.produces_partition_end()
|
||||
BOOST_TEST_MESSAGE("read full partitions and fast forward");
|
||||
assert_that(make_flat_multi_range_reader(s.schema(), source, ranges, s.schema()->full_slice()))
|
||||
.produces(ms[1])
|
||||
.produces(ms[2])
|
||||
.produces(ms[4])
|
||||
.produces(ms[6])
|
||||
.fast_forward_to(fft_range)
|
||||
.produces(ms[9])
|
||||
.produces_end_of_stream();
|
||||
|
||||
BOOST_TEST_MESSAGE("read, skip partitions and fast forward");
|
||||
assert_that(make_flat_multi_range_reader(s.schema(), source, ranges, s.schema()->full_slice()))
|
||||
.produces_partition_start(keys[1])
|
||||
.next_partition()
|
||||
.produces_partition_start(keys[2])
|
||||
.produces_row_with_key(crs[0].as_clustering_row().key())
|
||||
.next_partition()
|
||||
.produces(ms[4])
|
||||
.next_partition()
|
||||
.produces_partition_start(keys[6])
|
||||
.produces_row_with_key(crs[0].as_clustering_row().key())
|
||||
.produces_row_with_key(crs[1].as_clustering_row().key())
|
||||
.fast_forward_to(fft_range)
|
||||
.next_partition()
|
||||
.produces_partition_start(keys[9])
|
||||
.produces_partition_end()
|
||||
.next_partition()
|
||||
.produces_end_of_stream();
|
||||
});
|
||||
}
|
||||
|
||||
using reversed_partitions = seastar::bool_class<class reversed_partitions_tag>;
|
||||
using skip_after_first_fragment = seastar::bool_class<class skip_after_first_fragment_tag>;
|
||||
using skip_after_first_partition = seastar::bool_class<class skip_after_first_partition_tag>;
|
||||
using in_thread = seastar::bool_class<class in_thread_tag>;
|
||||
|
||||
struct flat_stream_consumer {
|
||||
schema_ptr _schema;
|
||||
reversed_partitions _reversed;
|
||||
skip_after_first_fragment _skip_partition;
|
||||
skip_after_first_partition _skip_stream;
|
||||
std::vector<mutation> _mutations;
|
||||
stdx::optional<position_in_partition> _previous_position;
|
||||
bool _inside_partition = false;
|
||||
private:
|
||||
void verify_order(position_in_partition_view pos) {
|
||||
position_in_partition::less_compare cmp(*_schema);
|
||||
if (!_reversed) {
|
||||
BOOST_REQUIRE(!_previous_position || _previous_position->is_static_row()
|
||||
|| cmp(*_previous_position, pos));
|
||||
} else {
|
||||
BOOST_REQUIRE(!_previous_position || _previous_position->is_static_row()
|
||||
|| cmp(pos, *_previous_position));
|
||||
}
|
||||
}
|
||||
public:
|
||||
flat_stream_consumer(schema_ptr s, reversed_partitions reversed,
|
||||
skip_after_first_fragment skip_partition = skip_after_first_fragment::no,
|
||||
skip_after_first_partition skip_stream = skip_after_first_partition::no)
|
||||
: _schema(std::move(s))
|
||||
, _reversed(reversed)
|
||||
, _skip_partition(skip_partition)
|
||||
, _skip_stream(skip_stream)
|
||||
{ }
|
||||
void consume_new_partition(dht::decorated_key dk) {
|
||||
BOOST_REQUIRE(!_inside_partition);
|
||||
BOOST_REQUIRE(!_previous_position);
|
||||
_mutations.emplace_back(dk, _schema);
|
||||
_inside_partition = true;
|
||||
}
|
||||
void consume(tombstone pt) {
|
||||
BOOST_REQUIRE(_inside_partition);
|
||||
BOOST_REQUIRE(!_previous_position);
|
||||
BOOST_REQUIRE_GE(_mutations.size(), 1);
|
||||
_mutations.back().partition().apply(pt);
|
||||
}
|
||||
stop_iteration consume(static_row&& sr) {
|
||||
BOOST_REQUIRE(_inside_partition);
|
||||
BOOST_REQUIRE(!_previous_position);
|
||||
BOOST_REQUIRE_GE(_mutations.size(), 1);
|
||||
_previous_position.emplace(sr.position());
|
||||
_mutations.back().partition().apply(*_schema, mutation_fragment(std::move(sr)));
|
||||
return stop_iteration(bool(_skip_partition));
|
||||
}
|
||||
stop_iteration consume(clustering_row&& cr) {
|
||||
BOOST_REQUIRE(_inside_partition);
|
||||
verify_order(cr.position());
|
||||
BOOST_REQUIRE_GE(_mutations.size(), 1);
|
||||
_previous_position.emplace(cr.position());
|
||||
_mutations.back().partition().apply(*_schema, mutation_fragment(std::move(cr)));
|
||||
return stop_iteration(bool(_skip_partition));
|
||||
}
|
||||
stop_iteration consume(range_tombstone&& rt) {
|
||||
BOOST_REQUIRE(_inside_partition);
|
||||
auto pos = _reversed ? rt.end_position() : rt.position();
|
||||
verify_order(pos);
|
||||
BOOST_REQUIRE_GE(_mutations.size(), 1);
|
||||
_previous_position.emplace(pos);
|
||||
_mutations.back().partition().apply(*_schema, mutation_fragment(std::move(rt)));
|
||||
return stop_iteration(bool(_skip_partition));
|
||||
}
|
||||
stop_iteration consume_end_of_partition() {
|
||||
BOOST_REQUIRE(_inside_partition);
|
||||
BOOST_REQUIRE_GE(_mutations.size(), 1);
|
||||
_previous_position = stdx::nullopt;
|
||||
_inside_partition = false;
|
||||
return stop_iteration(bool(_skip_stream));
|
||||
}
|
||||
std::vector<mutation> consume_end_of_stream() {
|
||||
BOOST_REQUIRE(!_inside_partition);
|
||||
return std::move(_mutations);
|
||||
}
|
||||
};
|
||||
|
||||
void test_flat_stream(schema_ptr s, std::vector<mutation> muts, reversed_partitions reversed, in_thread thread) {
|
||||
auto reversed_msg = reversed ? ", reversed partitions" : "";
|
||||
|
||||
auto consume_fn = [&] (flat_mutation_reader& fmr, flat_stream_consumer fsc) {
|
||||
if (thread) {
|
||||
assert(bool(!reversed));
|
||||
return fmr.consume_in_thread(std::move(fsc));
|
||||
} else {
|
||||
auto reversed_flag = flat_mutation_reader::consume_reversed_partitions(bool(reversed));
|
||||
return fmr.consume(std::move(fsc), reversed_flag).get0();
|
||||
}
|
||||
};
|
||||
|
||||
BOOST_TEST_MESSAGE(sprint("Consume all%s", reversed_msg));
|
||||
auto fmr = flat_mutation_reader_from_mutations(muts, streamed_mutation::forwarding::no);
|
||||
auto muts2 = consume_fn(fmr, flat_stream_consumer(s, reversed));
|
||||
BOOST_REQUIRE_EQUAL(muts, muts2);
|
||||
|
||||
BOOST_TEST_MESSAGE(sprint("Consume first fragment from partition%s", reversed_msg));
|
||||
fmr = flat_mutation_reader_from_mutations(muts, streamed_mutation::forwarding::no);
|
||||
muts2 = consume_fn(fmr, flat_stream_consumer(s, reversed, skip_after_first_fragment::yes));
|
||||
BOOST_REQUIRE_EQUAL(muts.size(), muts2.size());
|
||||
for (auto j = 0u; j < muts.size(); j++) {
|
||||
BOOST_REQUIRE(muts[j].decorated_key().equal(*muts[j].schema(), muts2[j].decorated_key()));
|
||||
auto& mp = muts2[j].partition();
|
||||
BOOST_REQUIRE_LE(mp.static_row().empty() + mp.clustered_rows().calculate_size() + mp.row_tombstones().size(), 1);
|
||||
auto m = muts[j];
|
||||
m.apply(muts2[j]);
|
||||
BOOST_REQUIRE_EQUAL(m, muts[j]);
|
||||
}
|
||||
|
||||
BOOST_TEST_MESSAGE(sprint("Consume first partition%s", reversed_msg));
|
||||
fmr = flat_mutation_reader_from_mutations(muts, streamed_mutation::forwarding::no);
|
||||
muts2 = consume_fn(fmr, flat_stream_consumer(s, reversed, skip_after_first_fragment::no,
|
||||
skip_after_first_partition::yes));
|
||||
BOOST_REQUIRE_EQUAL(muts2.size(), 1);
|
||||
BOOST_REQUIRE_EQUAL(muts2[0], muts[0]);
|
||||
|
||||
if (thread) {
|
||||
auto filter = [&] (const dht::decorated_key& dk) {
|
||||
for (auto j = 0; j < muts.size(); j += 2) {
|
||||
if (dk.equal(*s, muts[j].decorated_key())) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
};
|
||||
BOOST_TEST_MESSAGE("Consume all, filtered");
|
||||
fmr = flat_mutation_reader_from_mutations(muts, streamed_mutation::forwarding::no);
|
||||
muts2 = fmr.consume_in_thread(flat_stream_consumer(s, reversed), std::move(filter));
|
||||
BOOST_REQUIRE_EQUAL(muts.size() / 2, muts2.size());
|
||||
for (auto j = 1; j < muts.size(); j += 2) {
|
||||
BOOST_REQUIRE_EQUAL(muts[j], muts2[j / 2]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_consume_flat) {
|
||||
return seastar::async([] {
|
||||
auto test_random_streams = [&] (random_mutation_generator&& gen) {
|
||||
for (auto i = 0; i < 4; i++) {
|
||||
auto muts = gen(4);
|
||||
test_flat_stream(gen.schema(), muts, reversed_partitions::no, in_thread::no);
|
||||
test_flat_stream(gen.schema(), muts, reversed_partitions::yes, in_thread::no);
|
||||
test_flat_stream(gen.schema(), muts, reversed_partitions::no, in_thread::yes);
|
||||
}
|
||||
};
|
||||
|
||||
test_random_streams(random_mutation_generator(random_mutation_generator::generate_counters::no));
|
||||
test_random_streams(random_mutation_generator(random_mutation_generator::generate_counters::yes));
|
||||
});
|
||||
}
|
||||
|
||||
@@ -63,9 +63,9 @@ private:
|
||||
auto count = _memtables.size();
|
||||
auto op = _apply.start();
|
||||
auto new_mt = make_lw_shared<memtable>(_memtables.back()->schema());
|
||||
std::vector<mutation_reader> readers;
|
||||
std::vector<flat_mutation_reader> readers;
|
||||
for (auto&& mt : _memtables) {
|
||||
readers.push_back(mt->make_reader(new_mt->schema(),
|
||||
readers.push_back(mt->make_flat_reader(new_mt->schema(),
|
||||
query::full_partition_range,
|
||||
new_mt->schema()->full_slice(),
|
||||
default_priority_class(),
|
||||
@@ -73,8 +73,8 @@ private:
|
||||
streamed_mutation::forwarding::no,
|
||||
mutation_reader::forwarding::yes));
|
||||
}
|
||||
auto&& rd = make_combined_reader(std::move(readers), mutation_reader::forwarding::yes);
|
||||
consume(rd, [&] (mutation&& m) {
|
||||
auto&& rd = make_combined_reader(new_mt->schema(), std::move(readers));
|
||||
consume_partitions(rd, [&] (mutation&& m) {
|
||||
new_mt->apply(std::move(m));
|
||||
return stop_iteration::no;
|
||||
}).get();
|
||||
|
||||
@@ -32,6 +32,8 @@
|
||||
#include "mutation_source_test.hh"
|
||||
#include "mutation_reader_assertions.hh"
|
||||
#include "mutation_assertions.hh"
|
||||
#include "flat_mutation_reader_assertions.hh"
|
||||
#include "flat_mutation_reader.hh"
|
||||
|
||||
#include "disk-error-handler.hh"
|
||||
|
||||
@@ -84,6 +86,68 @@ SEASTAR_TEST_CASE(test_memtable_conforms_to_mutation_source) {
|
||||
});
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_memtable_flush_reader) {
|
||||
// Memtable flush reader is severly limited, it always assumes that
|
||||
// the full partition range is being read and that
|
||||
// streamed_mutation::forwarding is set to no. Therefore, we cannot use
|
||||
// run_mutation_source_tests() to test it.
|
||||
return seastar::async([] {
|
||||
auto make_memtable = [] (dirty_memory_manager& mgr, std::vector<mutation> muts) {
|
||||
assert(!muts.empty());
|
||||
auto mt = make_lw_shared<memtable>(muts.front().schema(), mgr);
|
||||
for (auto& m : muts) {
|
||||
mt->apply(m);
|
||||
}
|
||||
return mt;
|
||||
};
|
||||
|
||||
auto test_random_streams = [&] (random_mutation_generator&& gen) {
|
||||
for (auto i = 0; i < 4; i++) {
|
||||
dirty_memory_manager mgr;
|
||||
auto muts = gen(4);
|
||||
|
||||
BOOST_TEST_MESSAGE("Simple read");
|
||||
auto mt = make_memtable(mgr, muts);
|
||||
assert_that(mt->make_flush_reader(gen.schema(), default_priority_class()))
|
||||
.produces_partition(muts[0])
|
||||
.produces_partition(muts[1])
|
||||
.produces_partition(muts[2])
|
||||
.produces_partition(muts[3])
|
||||
.produces_end_of_stream();
|
||||
|
||||
BOOST_TEST_MESSAGE("Read with next_partition() calls between partition");
|
||||
mt = make_memtable(mgr, muts);
|
||||
assert_that(mt->make_flush_reader(gen.schema(), default_priority_class()))
|
||||
.next_partition()
|
||||
.produces_partition(muts[0])
|
||||
.next_partition()
|
||||
.produces_partition(muts[1])
|
||||
.next_partition()
|
||||
.produces_partition(muts[2])
|
||||
.next_partition()
|
||||
.produces_partition(muts[3])
|
||||
.next_partition()
|
||||
.produces_end_of_stream();
|
||||
|
||||
BOOST_TEST_MESSAGE("Read with next_partition() calls inside partitions");
|
||||
mt = make_memtable(mgr, muts);
|
||||
assert_that(mt->make_flush_reader(gen.schema(), default_priority_class()))
|
||||
.produces_partition(muts[0])
|
||||
.produces_partition_start(muts[1].decorated_key(), muts[1].partition().partition_tombstone())
|
||||
.next_partition()
|
||||
.produces_partition(muts[2])
|
||||
.next_partition()
|
||||
.produces_partition_start(muts[3].decorated_key(), muts[3].partition().partition_tombstone())
|
||||
.next_partition()
|
||||
.produces_end_of_stream();
|
||||
}
|
||||
};
|
||||
|
||||
test_random_streams(random_mutation_generator(random_mutation_generator::generate_counters::no));
|
||||
test_random_streams(random_mutation_generator(random_mutation_generator::generate_counters::yes));
|
||||
});
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_adding_a_column_during_reading_doesnt_affect_read_result) {
|
||||
return seastar::async([] {
|
||||
auto common_builder = schema_builder("ks", "cf")
|
||||
@@ -107,8 +171,8 @@ SEASTAR_TEST_CASE(test_adding_a_column_during_reading_doesnt_affect_read_result)
|
||||
mt->apply(m);
|
||||
}
|
||||
|
||||
auto check_rd_s1 = assert_that(mt->make_reader(s1));
|
||||
auto check_rd_s2 = assert_that(mt->make_reader(s2));
|
||||
auto check_rd_s1 = assert_that(mt->make_flat_reader(s1));
|
||||
auto check_rd_s2 = assert_that(mt->make_flat_reader(s2));
|
||||
check_rd_s1.next_mutation().has_schema(s1).is_equal_to(ring[0]);
|
||||
check_rd_s2.next_mutation().has_schema(s2).is_equal_to(ring[0]);
|
||||
mt->set_schema(s2);
|
||||
@@ -119,13 +183,13 @@ SEASTAR_TEST_CASE(test_adding_a_column_during_reading_doesnt_affect_read_result)
|
||||
check_rd_s1.produces_end_of_stream();
|
||||
check_rd_s2.produces_end_of_stream();
|
||||
|
||||
assert_that(mt->make_reader(s1))
|
||||
assert_that(mt->make_flat_reader(s1))
|
||||
.produces(ring[0])
|
||||
.produces(ring[1])
|
||||
.produces(ring[2])
|
||||
.produces_end_of_stream();
|
||||
|
||||
assert_that(mt->make_reader(s2))
|
||||
assert_that(mt->make_flat_reader(s2))
|
||||
.produces(ring[0])
|
||||
.produces(ring[1])
|
||||
.produces(ring[2])
|
||||
@@ -156,8 +220,9 @@ SEASTAR_TEST_CASE(test_virtual_dirty_accounting_on_flush) {
|
||||
}
|
||||
|
||||
// Create a reader which will cause many partition versions to be created
|
||||
auto rd1 = mt->make_reader(s);
|
||||
streamed_mutation_opt part0_stream = rd1().get0();
|
||||
flat_mutation_reader_opt rd1 = mt->make_flat_reader(s);
|
||||
rd1->set_max_buffer_size(1);
|
||||
rd1->fill_buffer().get();
|
||||
|
||||
// Override large cell value with a short one
|
||||
{
|
||||
@@ -172,18 +237,17 @@ SEASTAR_TEST_CASE(test_virtual_dirty_accounting_on_flush) {
|
||||
virtual_dirty_values.push_back(mgr.virtual_dirty_memory());
|
||||
|
||||
auto flush_reader_check = assert_that(mt->make_flush_reader(s, service::get_local_priority_manager().memtable_flush_priority()));
|
||||
flush_reader_check.produces(current_ring[0]);
|
||||
flush_reader_check.produces_partition(current_ring[0]);
|
||||
virtual_dirty_values.push_back(mgr.virtual_dirty_memory());
|
||||
flush_reader_check.produces(current_ring[1]);
|
||||
flush_reader_check.produces_partition(current_ring[1]);
|
||||
virtual_dirty_values.push_back(mgr.virtual_dirty_memory());
|
||||
|
||||
part0_stream = {};
|
||||
|
||||
while (rd1().get0()) ;
|
||||
while ((*rd1)().get0()) ;
|
||||
rd1 = {};
|
||||
|
||||
logalloc::shard_tracker().full_compaction();
|
||||
|
||||
flush_reader_check.produces(current_ring[2]);
|
||||
flush_reader_check.produces_partition(current_ring[2]);
|
||||
virtual_dirty_values.push_back(mgr.virtual_dirty_memory());
|
||||
flush_reader_check.produces_end_of_stream();
|
||||
virtual_dirty_values.push_back(mgr.virtual_dirty_memory());
|
||||
@@ -219,56 +283,47 @@ SEASTAR_TEST_CASE(test_partition_version_consistency_after_lsa_compaction_happen
|
||||
m3.set_clustered_cell(ck3, to_bytes("col"), data_value(bytes(bytes::initialized_later(), 8)), next_timestamp());
|
||||
|
||||
mt->apply(m1);
|
||||
auto rd1 = mt->make_reader(s);
|
||||
streamed_mutation_opt stream1 = rd1().get0();
|
||||
stdx::optional<flat_reader_assertions> rd1 = assert_that(mt->make_flat_reader(s));
|
||||
rd1->set_max_buffer_size(1);
|
||||
rd1->fill_buffer().get();
|
||||
|
||||
mt->apply(m2);
|
||||
auto rd2 = mt->make_reader(s);
|
||||
streamed_mutation_opt stream2 = rd2().get0();
|
||||
stdx::optional<flat_reader_assertions> rd2 = assert_that(mt->make_flat_reader(s));
|
||||
rd2->set_max_buffer_size(1);
|
||||
rd2->fill_buffer().get();
|
||||
|
||||
mt->apply(m3);
|
||||
auto rd3 = mt->make_reader(s);
|
||||
streamed_mutation_opt stream3 = rd3().get0();
|
||||
stdx::optional<flat_reader_assertions> rd3 = assert_that(mt->make_flat_reader(s));
|
||||
rd3->set_max_buffer_size(1);
|
||||
rd3->fill_buffer().get();
|
||||
|
||||
logalloc::shard_tracker().full_compaction();
|
||||
|
||||
auto rd4 = mt->make_reader(s);
|
||||
streamed_mutation_opt stream4 = rd4().get0();
|
||||
auto rd5 = mt->make_reader(s);
|
||||
streamed_mutation_opt stream5 = rd5().get0();
|
||||
auto rd6 = mt->make_reader(s);
|
||||
streamed_mutation_opt stream6 = rd6().get0();
|
||||
|
||||
assert_that(mutation_from_streamed_mutation(std::move(stream1)).get0()).has_mutation().is_equal_to(m1);
|
||||
assert_that(mutation_from_streamed_mutation(std::move(stream2)).get0()).has_mutation().is_equal_to(m1 + m2);
|
||||
assert_that(mutation_from_streamed_mutation(std::move(stream3)).get0()).has_mutation().is_equal_to(m1 + m2 + m3);
|
||||
auto rd4 = assert_that(mt->make_flat_reader(s));
|
||||
rd4.set_max_buffer_size(1);
|
||||
rd4.fill_buffer().get();
|
||||
auto rd5 = assert_that(mt->make_flat_reader(s));
|
||||
rd5.set_max_buffer_size(1);
|
||||
rd5.fill_buffer().get();
|
||||
auto rd6 = assert_that(mt->make_flat_reader(s));
|
||||
rd6.set_max_buffer_size(1);
|
||||
rd6.fill_buffer().get();
|
||||
|
||||
rd1->next_mutation().is_equal_to(m1);
|
||||
rd2->next_mutation().is_equal_to(m1 + m2);
|
||||
rd3->next_mutation().is_equal_to(m1 + m2 + m3);
|
||||
rd3 = {};
|
||||
|
||||
assert_that(mutation_from_streamed_mutation(std::move(stream4)).get0()).has_mutation().is_equal_to(m1 + m2 + m3);
|
||||
|
||||
rd4.next_mutation().is_equal_to(m1 + m2 + m3);
|
||||
rd1 = {};
|
||||
|
||||
assert_that(mutation_from_streamed_mutation(std::move(stream5)).get0()).has_mutation().is_equal_to(m1 + m2 + m3);
|
||||
|
||||
rd5.next_mutation().is_equal_to(m1 + m2 + m3);
|
||||
rd2 = {};
|
||||
|
||||
assert_that(mutation_from_streamed_mutation(std::move(stream6)).get0()).has_mutation().is_equal_to(m1 + m2 + m3);
|
||||
rd6.next_mutation().is_equal_to(m1 + m2 + m3);
|
||||
});
|
||||
}
|
||||
|
||||
struct function_invoking_consumer {
|
||||
std::function<void()> func;
|
||||
|
||||
template<typename T>
|
||||
stop_iteration consume(T t) {
|
||||
func();
|
||||
return stop_iteration::no;
|
||||
}
|
||||
|
||||
void consume_end_of_stream() { }
|
||||
};
|
||||
|
||||
// Reproducer for #1746
|
||||
SEASTAR_TEST_CASE(test_segment_migration_during_flush) {
|
||||
return seastar::async([] {
|
||||
@@ -300,15 +355,14 @@ SEASTAR_TEST_CASE(test_segment_migration_during_flush) {
|
||||
|
||||
auto rd = mt->make_flush_reader(s, service::get_local_priority_manager().memtable_flush_priority());
|
||||
|
||||
auto consume_mutation = [] (streamed_mutation_opt part) {
|
||||
assert(part);
|
||||
consume(*part, function_invoking_consumer{[] {
|
||||
logalloc::shard_tracker().full_compaction();
|
||||
}}).get();
|
||||
};
|
||||
|
||||
for (int i = 0; i < partitions; ++i) {
|
||||
consume_mutation(rd().get0());
|
||||
auto mfopt = rd().get0();
|
||||
BOOST_REQUIRE(bool(mfopt));
|
||||
BOOST_REQUIRE(mfopt->is_partition_start());
|
||||
while (!mfopt->is_end_of_partition()) {
|
||||
logalloc::shard_tracker().full_compaction();
|
||||
mfopt = rd().get0();
|
||||
}
|
||||
virtual_dirty_values.push_back(mgr.virtual_dirty_memory());
|
||||
}
|
||||
|
||||
@@ -337,24 +391,12 @@ SEASTAR_TEST_CASE(test_fast_forward_to_after_memtable_is_flushed) {
|
||||
mt2->apply(m);
|
||||
}
|
||||
|
||||
auto rd = mt->make_reader(s);
|
||||
|
||||
auto sm_opt = rd().get0();
|
||||
BOOST_REQUIRE(sm_opt);
|
||||
BOOST_REQUIRE(sm_opt->key().equal(*s, ring[0].key()));
|
||||
auto rd = assert_that(mt->make_flat_reader(s));
|
||||
rd.produces(ring[0]);
|
||||
mt->mark_flushed(mt2->as_data_source());
|
||||
sm_opt = rd().get0();
|
||||
BOOST_REQUIRE(sm_opt);
|
||||
BOOST_REQUIRE(sm_opt->key().equal(*s, ring[1].key()));
|
||||
|
||||
rd.produces(ring[1]);
|
||||
auto range = dht::partition_range::make_starting_with(dht::ring_position(ring[3].decorated_key()));
|
||||
rd.fast_forward_to(range);
|
||||
sm_opt = rd().get0();
|
||||
BOOST_REQUIRE(sm_opt);
|
||||
BOOST_REQUIRE(sm_opt->key().equal(*s, ring[3].key()));
|
||||
sm_opt = rd().get0();
|
||||
BOOST_REQUIRE(sm_opt);
|
||||
BOOST_REQUIRE(sm_opt->key().equal(*s, ring[4].key()));
|
||||
BOOST_REQUIRE(!rd().get0());
|
||||
rd.produces(ring[3]).produces(ring[4]).produces_end_of_stream();
|
||||
});
|
||||
}
|
||||
|
||||
@@ -30,9 +30,11 @@
|
||||
#include "tests/test-utils.hh"
|
||||
#include "tests/mutation_assertions.hh"
|
||||
#include "tests/mutation_reader_assertions.hh"
|
||||
#include "tests/flat_mutation_reader_assertions.hh"
|
||||
#include "tests/tmpdir.hh"
|
||||
#include "tests/sstable_utils.hh"
|
||||
#include "tests/simple_schema.hh"
|
||||
#include "tests/test_services.hh"
|
||||
|
||||
#include "mutation_reader.hh"
|
||||
#include "schema_builder.hh"
|
||||
@@ -62,7 +64,7 @@ SEASTAR_TEST_CASE(test_combining_two_readers_with_the_same_row) {
|
||||
mutation m2(partition_key::from_single_value(*s, "key1"), s);
|
||||
m2.set_clustered_cell(clustering_key::make_empty(), "v", data_value(bytes("v2")), 2);
|
||||
|
||||
assert_that(make_combined_reader(make_reader_returning(m1), make_reader_returning(m2)))
|
||||
assert_that(make_combined_reader(s, make_reader_returning(m1), make_reader_returning(m2)))
|
||||
.produces(m2)
|
||||
.produces_end_of_stream();
|
||||
});
|
||||
@@ -78,7 +80,7 @@ SEASTAR_TEST_CASE(test_combining_two_non_overlapping_readers) {
|
||||
mutation m2(partition_key::from_single_value(*s, "keyA"), s);
|
||||
m2.set_clustered_cell(clustering_key::make_empty(), "v", data_value(bytes("v2")), 2);
|
||||
|
||||
auto cr = make_combined_reader(make_reader_returning(m1), make_reader_returning(m2));
|
||||
auto cr = make_combined_reader(s, make_reader_returning(m1), make_reader_returning(m2));
|
||||
assert_that(std::move(cr))
|
||||
.produces(m2)
|
||||
.produces(m1)
|
||||
@@ -100,7 +102,7 @@ SEASTAR_TEST_CASE(test_combining_two_partially_overlapping_readers) {
|
||||
mutation m3(partition_key::from_single_value(*s, "keyC"), s);
|
||||
m3.set_clustered_cell(clustering_key::make_empty(), "v", data_value(bytes("v3")), 1);
|
||||
|
||||
assert_that(make_combined_reader(make_reader_returning_many({m1, m2}, slice), make_reader_returning_many({m2, m3}, slice)))
|
||||
assert_that(make_combined_reader(s, make_reader_returning_many({m1, m2}, slice), make_reader_returning_many({m2, m3}, slice)))
|
||||
.produces(m1)
|
||||
.produces(m2)
|
||||
.produces(m3)
|
||||
@@ -123,7 +125,7 @@ SEASTAR_TEST_CASE(test_combining_one_reader_with_many_partitions) {
|
||||
|
||||
std::vector<mutation_reader> v;
|
||||
v.push_back(make_reader_returning_many({m1, m2, m3}));
|
||||
assert_that(make_combined_reader(std::move(v), mutation_reader::forwarding::no))
|
||||
assert_that(make_combined_reader(s, std::move(v), streamed_mutation::forwarding::no, mutation_reader::forwarding::no))
|
||||
.produces(m1)
|
||||
.produces(m2)
|
||||
.produces(m3)
|
||||
@@ -151,8 +153,8 @@ SEASTAR_TEST_CASE(test_filtering) {
|
||||
auto m4 = make_mutation_with_key(s, "key4");
|
||||
|
||||
// All pass
|
||||
assert_that(make_filtering_reader(make_reader_returning_many({m1, m2, m3, m4}),
|
||||
[] (const streamed_mutation& m) { return true; }))
|
||||
assert_that(make_filtering_reader(flat_mutation_reader_from_mutations({m1, m2, m3, m4}),
|
||||
[] (const dht::decorated_key& dk) { return true; }))
|
||||
.produces(m1)
|
||||
.produces(m2)
|
||||
.produces(m3)
|
||||
@@ -160,48 +162,48 @@ SEASTAR_TEST_CASE(test_filtering) {
|
||||
.produces_end_of_stream();
|
||||
|
||||
// None pass
|
||||
assert_that(make_filtering_reader(make_reader_returning_many({m1, m2, m3, m4}),
|
||||
[] (const streamed_mutation& m) { return false; }))
|
||||
assert_that(make_filtering_reader(flat_mutation_reader_from_mutations({m1, m2, m3, m4}),
|
||||
[] (const dht::decorated_key& dk) { return false; }))
|
||||
.produces_end_of_stream();
|
||||
|
||||
// Trim front
|
||||
assert_that(make_filtering_reader(make_reader_returning_many({m1, m2, m3, m4}),
|
||||
[&] (const streamed_mutation& m) { return !m.key().equal(*s, m1.key()); }))
|
||||
assert_that(make_filtering_reader(flat_mutation_reader_from_mutations({m1, m2, m3, m4}),
|
||||
[&] (const dht::decorated_key& dk) { return !dk.key().equal(*s, m1.key()); }))
|
||||
.produces(m2)
|
||||
.produces(m3)
|
||||
.produces(m4)
|
||||
.produces_end_of_stream();
|
||||
|
||||
assert_that(make_filtering_reader(make_reader_returning_many({m1, m2, m3, m4}),
|
||||
[&] (const streamed_mutation& m) { return !m.key().equal(*s, m1.key()) && !m.key().equal(*s, m2.key()); }))
|
||||
assert_that(make_filtering_reader(flat_mutation_reader_from_mutations({m1, m2, m3, m4}),
|
||||
[&] (const dht::decorated_key& dk) { return !dk.key().equal(*s, m1.key()) && !dk.key().equal(*s, m2.key()); }))
|
||||
.produces(m3)
|
||||
.produces(m4)
|
||||
.produces_end_of_stream();
|
||||
|
||||
// Trim back
|
||||
assert_that(make_filtering_reader(make_reader_returning_many({m1, m2, m3, m4}),
|
||||
[&] (const streamed_mutation& m) { return !m.key().equal(*s, m4.key()); }))
|
||||
assert_that(make_filtering_reader(flat_mutation_reader_from_mutations({m1, m2, m3, m4}),
|
||||
[&] (const dht::decorated_key& dk) { return !dk.key().equal(*s, m4.key()); }))
|
||||
.produces(m1)
|
||||
.produces(m2)
|
||||
.produces(m3)
|
||||
.produces_end_of_stream();
|
||||
|
||||
assert_that(make_filtering_reader(make_reader_returning_many({m1, m2, m3, m4}),
|
||||
[&] (const streamed_mutation& m) { return !m.key().equal(*s, m4.key()) && !m.key().equal(*s, m3.key()); }))
|
||||
assert_that(make_filtering_reader(flat_mutation_reader_from_mutations({m1, m2, m3, m4}),
|
||||
[&] (const dht::decorated_key& dk) { return !dk.key().equal(*s, m4.key()) && !dk.key().equal(*s, m3.key()); }))
|
||||
.produces(m1)
|
||||
.produces(m2)
|
||||
.produces_end_of_stream();
|
||||
|
||||
// Trim middle
|
||||
assert_that(make_filtering_reader(make_reader_returning_many({m1, m2, m3, m4}),
|
||||
[&] (const streamed_mutation& m) { return !m.key().equal(*s, m3.key()); }))
|
||||
assert_that(make_filtering_reader(flat_mutation_reader_from_mutations({m1, m2, m3, m4}),
|
||||
[&] (const dht::decorated_key& dk) { return !dk.key().equal(*s, m3.key()); }))
|
||||
.produces(m1)
|
||||
.produces(m2)
|
||||
.produces(m4)
|
||||
.produces_end_of_stream();
|
||||
|
||||
assert_that(make_filtering_reader(make_reader_returning_many({m1, m2, m3, m4}),
|
||||
[&] (const streamed_mutation& m) { return !m.key().equal(*s, m2.key()) && !m.key().equal(*s, m3.key()); }))
|
||||
assert_that(make_filtering_reader(flat_mutation_reader_from_mutations({m1, m2, m3, m4}),
|
||||
[&] (const dht::decorated_key& dk) { return !dk.key().equal(*s, m2.key()) && !dk.key().equal(*s, m3.key()); }))
|
||||
.produces(m1)
|
||||
.produces(m4)
|
||||
.produces_end_of_stream();
|
||||
@@ -214,7 +216,7 @@ SEASTAR_TEST_CASE(test_combining_two_readers_with_one_reader_empty) {
|
||||
mutation m1(partition_key::from_single_value(*s, "key1"), s);
|
||||
m1.set_clustered_cell(clustering_key::make_empty(), "v", data_value(bytes("v1")), 1);
|
||||
|
||||
assert_that(make_combined_reader(make_reader_returning(m1), make_empty_reader()))
|
||||
assert_that(make_combined_reader(s, make_reader_returning(m1), make_empty_reader()))
|
||||
.produces(m1)
|
||||
.produces_end_of_stream();
|
||||
});
|
||||
@@ -222,7 +224,7 @@ SEASTAR_TEST_CASE(test_combining_two_readers_with_one_reader_empty) {
|
||||
|
||||
SEASTAR_TEST_CASE(test_combining_two_empty_readers) {
|
||||
return seastar::async([] {
|
||||
assert_that(make_combined_reader(make_empty_reader(), make_empty_reader()))
|
||||
assert_that(make_combined_reader(make_schema(), make_empty_reader(), make_empty_reader()))
|
||||
.produces_end_of_stream();
|
||||
});
|
||||
}
|
||||
@@ -231,7 +233,7 @@ SEASTAR_TEST_CASE(test_combining_one_empty_reader) {
|
||||
return seastar::async([] {
|
||||
std::vector<mutation_reader> v;
|
||||
v.push_back(make_empty_reader());
|
||||
assert_that(make_combined_reader(std::move(v), mutation_reader::forwarding::no))
|
||||
assert_that(make_combined_reader(make_schema(), std::move(v), streamed_mutation::forwarding::no, mutation_reader::forwarding::no))
|
||||
.produces_end_of_stream();
|
||||
});
|
||||
}
|
||||
@@ -286,7 +288,7 @@ SEASTAR_TEST_CASE(test_fast_forwarding_combining_reader) {
|
||||
boost::range::transform(mutations, std::back_inserter(readers), [&pr] (auto& ms) {
|
||||
return make_reader_returning_many(ms, pr);
|
||||
});
|
||||
return make_combined_reader(std::move(readers), mutation_reader::forwarding::yes);
|
||||
return make_combined_reader(s, std::move(readers));
|
||||
};
|
||||
|
||||
auto pr = dht::partition_range::make_open_ended_both_sides();
|
||||
@@ -318,6 +320,67 @@ SEASTAR_TEST_CASE(test_fast_forwarding_combining_reader) {
|
||||
});
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_sm_fast_forwarding_combining_reader) {
|
||||
return seastar::async([] {
|
||||
storage_service_for_tests ssft;
|
||||
simple_schema s;
|
||||
|
||||
const auto pkeys = s.make_pkeys(4);
|
||||
const auto ckeys = s.make_ckeys(4);
|
||||
|
||||
auto make_mutation = [&] (uint32_t n) {
|
||||
mutation m(pkeys[n], s.schema());
|
||||
|
||||
int i{0};
|
||||
s.add_row(m, ckeys[i], sprint("val_%i", i));
|
||||
++i;
|
||||
s.add_row(m, ckeys[i], sprint("val_%i", i));
|
||||
++i;
|
||||
s.add_row(m, ckeys[i], sprint("val_%i", i));
|
||||
++i;
|
||||
s.add_row(m, ckeys[i], sprint("val_%i", i));
|
||||
|
||||
return m;
|
||||
};
|
||||
|
||||
std::vector<std::vector<mutation>> readers_mutations{
|
||||
{make_mutation(0), make_mutation(1), make_mutation(2), make_mutation(3)},
|
||||
{make_mutation(0)},
|
||||
{make_mutation(2)},
|
||||
};
|
||||
|
||||
std::vector<flat_mutation_reader> readers;
|
||||
for (auto& mutations : readers_mutations) {
|
||||
readers.emplace_back(flat_mutation_reader_from_mutation_reader(s.schema(),
|
||||
make_reader_returning_many(mutations, s.schema()->full_slice(), streamed_mutation::forwarding::yes),
|
||||
streamed_mutation::forwarding::yes));
|
||||
}
|
||||
|
||||
assert_that(make_combined_reader(s.schema(), std::move(readers), streamed_mutation::forwarding::yes, mutation_reader::forwarding::no))
|
||||
.produces_partition_start(pkeys[0])
|
||||
.produces_end_of_stream()
|
||||
.fast_forward_to(position_range::all_clustered_rows())
|
||||
.produces_row_with_key(ckeys[0])
|
||||
.next_partition()
|
||||
.produces_partition_start(pkeys[1])
|
||||
.produces_end_of_stream()
|
||||
.fast_forward_to(position_range(position_in_partition::before_key(ckeys[2]), position_in_partition::after_key(ckeys[2])))
|
||||
.produces_row_with_key(ckeys[2])
|
||||
.produces_end_of_stream()
|
||||
.fast_forward_to(position_range(position_in_partition::after_key(ckeys[2]), position_in_partition::after_all_clustered_rows()))
|
||||
.produces_row_with_key(ckeys[3])
|
||||
.produces_end_of_stream()
|
||||
.next_partition()
|
||||
.produces_partition_start(pkeys[2])
|
||||
.fast_forward_to(position_range::all_clustered_rows())
|
||||
.produces_row_with_key(ckeys[0])
|
||||
.produces_row_with_key(ckeys[1])
|
||||
.produces_row_with_key(ckeys[2])
|
||||
.produces_row_with_key(ckeys[3])
|
||||
.produces_end_of_stream();
|
||||
});
|
||||
}
|
||||
|
||||
struct sst_factory {
|
||||
schema_ptr s;
|
||||
sstring path;
|
||||
@@ -343,6 +406,7 @@ struct sst_factory {
|
||||
|
||||
SEASTAR_TEST_CASE(combined_mutation_reader_test) {
|
||||
return seastar::async([] {
|
||||
storage_service_for_tests ssft;
|
||||
//logging::logger_registry().set_logger_level("database", logging::log_level::trace);
|
||||
|
||||
simple_schema s;
|
||||
@@ -424,19 +488,21 @@ SEASTAR_TEST_CASE(combined_mutation_reader_test) {
|
||||
for (auto table : tables) {
|
||||
sstables->insert(table);
|
||||
|
||||
sstable_mutation_readers.emplace_back(table->read_range_rows(
|
||||
sstable_mutation_readers.emplace_back(
|
||||
mutation_reader_from_flat_mutation_reader(table->read_range_rows_flat(
|
||||
s.schema(),
|
||||
query::full_partition_range,
|
||||
s.schema()->full_slice(),
|
||||
seastar::default_priority_class(),
|
||||
no_resource_tracking(),
|
||||
streamed_mutation::forwarding::no,
|
||||
mutation_reader::forwarding::yes));
|
||||
mutation_reader::forwarding::yes)));
|
||||
}
|
||||
|
||||
auto list_reader = make_combined_reader(std::move(sstable_mutation_readers), mutation_reader::forwarding::yes);
|
||||
auto list_reader = make_combined_reader(s.schema(),
|
||||
std::move(sstable_mutation_readers));
|
||||
|
||||
auto incremental_reader = make_range_sstable_reader(
|
||||
auto incremental_reader = make_local_shard_sstable_reader(
|
||||
s.schema(),
|
||||
sstables,
|
||||
query::full_partition_range,
|
||||
@@ -467,6 +533,175 @@ SEASTAR_TEST_CASE(combined_mutation_reader_test) {
|
||||
});
|
||||
}
|
||||
|
||||
static mutation make_mutation_with_key(simple_schema& s, dht::decorated_key dk) {
|
||||
static int i{0};
|
||||
|
||||
mutation m(std::move(dk), s.schema());
|
||||
s.add_row(m, s.make_ckey(++i), sprint("val_%i", i));
|
||||
return m;
|
||||
}
|
||||
|
||||
class dummy_incremental_selector : public reader_selector {
|
||||
schema_ptr _s;
|
||||
std::vector<std::vector<mutation>> _readers_mutations;
|
||||
streamed_mutation::forwarding _fwd;
|
||||
dht::partition_range _pr;
|
||||
|
||||
const dht::token& position() const {
|
||||
return _readers_mutations.back().front().token();
|
||||
}
|
||||
flat_mutation_reader pop_reader() {
|
||||
auto muts = std::move(_readers_mutations.back());
|
||||
_readers_mutations.pop_back();
|
||||
_selector_position = _readers_mutations.empty() ? dht::maximum_token() : position();
|
||||
return flat_mutation_reader_from_mutation_reader(_s, make_reader_returning_many(std::move(muts), _pr), _fwd);
|
||||
}
|
||||
public:
|
||||
// readers_mutations is expected to be sorted on both levels.
|
||||
// 1) the inner vector is expected to be sorted by decorated_key.
|
||||
// 2) the outer vector is expected to be sorted by the decorated_key
|
||||
// of its first mutation.
|
||||
dummy_incremental_selector(schema_ptr s,
|
||||
std::vector<std::vector<mutation>> reader_mutations,
|
||||
dht::partition_range pr = query::full_partition_range,
|
||||
streamed_mutation::forwarding fwd = streamed_mutation::forwarding::no)
|
||||
: _s(std::move(s))
|
||||
, _readers_mutations(std::move(reader_mutations))
|
||||
, _fwd(fwd)
|
||||
, _pr(std::move(pr)) {
|
||||
// So we can pop the next reader off the back
|
||||
boost::reverse(_readers_mutations);
|
||||
_selector_position = position();
|
||||
}
|
||||
virtual std::vector<flat_mutation_reader> create_new_readers(const dht::token* const t) override {
|
||||
if (_readers_mutations.empty()) {
|
||||
return {};
|
||||
}
|
||||
|
||||
std::vector<flat_mutation_reader> readers;
|
||||
|
||||
if (!t) {
|
||||
readers.emplace_back(pop_reader());
|
||||
return readers;
|
||||
}
|
||||
|
||||
while (!_readers_mutations.empty() && *t >= _selector_position) {
|
||||
readers.emplace_back(pop_reader());
|
||||
}
|
||||
return readers;
|
||||
}
|
||||
virtual std::vector<flat_mutation_reader> fast_forward_to(const dht::partition_range& pr) override {
|
||||
return create_new_readers(&pr.start()->value().token());
|
||||
}
|
||||
};
|
||||
|
||||
SEASTAR_TEST_CASE(reader_selector_gap_between_readers_test) {
|
||||
return seastar::async([] {
|
||||
storage_service_for_tests ssft;
|
||||
|
||||
simple_schema s;
|
||||
auto pkeys = s.make_pkeys(3);
|
||||
|
||||
auto mut1 = make_mutation_with_key(s, pkeys[0]);
|
||||
auto mut2a = make_mutation_with_key(s, pkeys[1]);
|
||||
auto mut2b = make_mutation_with_key(s, pkeys[1]);
|
||||
auto mut3 = make_mutation_with_key(s, pkeys[2]);
|
||||
std::vector<std::vector<mutation>> readers_mutations{
|
||||
{mut1},
|
||||
{mut2a},
|
||||
{mut2b},
|
||||
{mut3}
|
||||
};
|
||||
|
||||
auto reader = make_flat_mutation_reader<combined_mutation_reader>(s.schema(),
|
||||
std::make_unique<dummy_incremental_selector>(s.schema(), std::move(readers_mutations)),
|
||||
streamed_mutation::forwarding::no,
|
||||
mutation_reader::forwarding::no);
|
||||
|
||||
assert_that(std::move(reader))
|
||||
.produces_partition(mut1)
|
||||
.produces_partition(mut2a + mut2b)
|
||||
.produces_partition(mut3)
|
||||
.produces_end_of_stream();
|
||||
});
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(reader_selector_overlapping_readers_test) {
|
||||
return seastar::async([] {
|
||||
storage_service_for_tests ssft;
|
||||
|
||||
simple_schema s;
|
||||
auto pkeys = s.make_pkeys(3);
|
||||
|
||||
auto mut1 = make_mutation_with_key(s, pkeys[0]);
|
||||
auto mut2a = make_mutation_with_key(s, pkeys[1]);
|
||||
auto mut2b = make_mutation_with_key(s, pkeys[1]);
|
||||
auto mut3a = make_mutation_with_key(s, pkeys[2]);
|
||||
auto mut3b = make_mutation_with_key(s, pkeys[2]);
|
||||
auto mut3c = make_mutation_with_key(s, pkeys[2]);
|
||||
|
||||
tombstone tomb(100, {});
|
||||
mut2b.partition().apply(tomb);
|
||||
|
||||
std::vector<std::vector<mutation>> readers_mutations{
|
||||
{mut1, mut2a, mut3a},
|
||||
{mut2b, mut3b},
|
||||
{mut3c}
|
||||
};
|
||||
|
||||
auto reader = make_flat_mutation_reader<combined_mutation_reader>(s.schema(),
|
||||
std::make_unique<dummy_incremental_selector>(s.schema(), std::move(readers_mutations)),
|
||||
streamed_mutation::forwarding::no,
|
||||
mutation_reader::forwarding::no);
|
||||
|
||||
assert_that(std::move(reader))
|
||||
.produces_partition(mut1)
|
||||
.produces_partition(mut2a + mut2b)
|
||||
.produces_partition(mut3a + mut3b + mut3c)
|
||||
.produces_end_of_stream();
|
||||
});
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(reader_selector_fast_forwarding_test) {
|
||||
return seastar::async([] {
|
||||
storage_service_for_tests ssft;
|
||||
|
||||
simple_schema s;
|
||||
auto pkeys = s.make_pkeys(5);
|
||||
|
||||
auto mut1a = make_mutation_with_key(s, pkeys[0]);
|
||||
auto mut1b = make_mutation_with_key(s, pkeys[0]);
|
||||
auto mut2a = make_mutation_with_key(s, pkeys[1]);
|
||||
auto mut2c = make_mutation_with_key(s, pkeys[1]);
|
||||
auto mut3a = make_mutation_with_key(s, pkeys[2]);
|
||||
auto mut3d = make_mutation_with_key(s, pkeys[2]);
|
||||
auto mut4b = make_mutation_with_key(s, pkeys[3]);
|
||||
auto mut5b = make_mutation_with_key(s, pkeys[4]);
|
||||
std::vector<std::vector<mutation>> readers_mutations{
|
||||
{mut1a, mut2a, mut3a},
|
||||
{mut1b, mut4b, mut5b},
|
||||
{mut2c},
|
||||
{mut3d},
|
||||
};
|
||||
|
||||
auto reader = make_flat_mutation_reader<combined_mutation_reader>(s.schema(),
|
||||
std::make_unique<dummy_incremental_selector>(s.schema(),
|
||||
std::move(readers_mutations),
|
||||
dht::partition_range::make_ending_with(dht::partition_range::bound(pkeys[1], false))),
|
||||
streamed_mutation::forwarding::no,
|
||||
mutation_reader::forwarding::yes);
|
||||
|
||||
assert_that(std::move(reader))
|
||||
.produces_partition(mut1a + mut1b)
|
||||
.produces_end_of_stream()
|
||||
.fast_forward_to(dht::partition_range::make(dht::partition_range::bound(pkeys[2], true), dht::partition_range::bound(pkeys[3], true)))
|
||||
.produces_partition(mut3a + mut3d)
|
||||
.fast_forward_to(dht::partition_range::make_starting_with(dht::partition_range::bound(pkeys[4], true)))
|
||||
.produces_partition(mut5b)
|
||||
.produces_end_of_stream();
|
||||
});
|
||||
}
|
||||
|
||||
static const std::size_t new_reader_base_cost{16 * 1024};
|
||||
|
||||
template<typename EventuallySucceedingFunction>
|
||||
@@ -514,13 +749,14 @@ sstables::shared_sstable create_sstable(simple_schema& sschema, const sstring& p
|
||||
}
|
||||
|
||||
|
||||
class tracking_reader : public mutation_reader::impl {
|
||||
mutation_reader _reader;
|
||||
class tracking_reader : public flat_mutation_reader::impl {
|
||||
flat_mutation_reader _reader;
|
||||
std::size_t _call_count{0};
|
||||
std::size_t _ff_count{0};
|
||||
public:
|
||||
tracking_reader(semaphore* resources_sem, schema_ptr schema, lw_shared_ptr<sstables::sstable> sst)
|
||||
: _reader(sst->read_range_rows(
|
||||
: impl(schema)
|
||||
, _reader(sst->read_range_rows_flat(
|
||||
schema,
|
||||
query::full_partition_range,
|
||||
schema->full_slice(),
|
||||
@@ -530,9 +766,23 @@ public:
|
||||
mutation_reader::forwarding::yes)) {
|
||||
}
|
||||
|
||||
virtual future<streamed_mutation_opt> operator()() override {
|
||||
|
||||
virtual future<> fill_buffer() override {
|
||||
++_call_count;
|
||||
return _reader();
|
||||
return _reader.fill_buffer().then([this] {
|
||||
_end_of_stream = _reader.is_end_of_stream();
|
||||
while (!_reader.is_buffer_empty()) {
|
||||
push_mutation_fragment(_reader.pop_mutation_fragment());
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
virtual void next_partition() override {
|
||||
_end_of_stream = false;
|
||||
clear_buffer_to_next_partition();
|
||||
if (is_buffer_empty()) {
|
||||
_reader.next_partition();
|
||||
}
|
||||
}
|
||||
|
||||
virtual future<> fast_forward_to(const dht::partition_range& pr) override {
|
||||
@@ -543,6 +793,10 @@ public:
|
||||
return make_ready_future<>();
|
||||
}
|
||||
|
||||
virtual future<> fast_forward_to(position_range) override {
|
||||
throw std::bad_function_call();
|
||||
}
|
||||
|
||||
std::size_t call_count() const {
|
||||
return _call_count;
|
||||
}
|
||||
@@ -553,25 +807,28 @@ public:
|
||||
};
|
||||
|
||||
class reader_wrapper {
|
||||
mutation_reader _reader;
|
||||
flat_mutation_reader _reader;
|
||||
tracking_reader* _tracker{nullptr};
|
||||
|
||||
public:
|
||||
reader_wrapper(
|
||||
const restricted_mutation_reader_config& config,
|
||||
schema_ptr schema,
|
||||
lw_shared_ptr<sstables::sstable> sst) {
|
||||
auto ms = mutation_source([this, &config, sst=std::move(sst)] (schema_ptr schema, const dht::partition_range&) {
|
||||
lw_shared_ptr<sstables::sstable> sst) : _reader(make_empty_flat_reader(schema)) {
|
||||
auto ms = mutation_source([this, &config, sst=std::move(sst)] (schema_ptr schema, const dht::partition_range&, auto&&...) {
|
||||
auto tracker_ptr = std::make_unique<tracking_reader>(config.resources_sem, std::move(schema), std::move(sst));
|
||||
_tracker = tracker_ptr.get();
|
||||
return mutation_reader(std::move(tracker_ptr));
|
||||
return flat_mutation_reader(std::move(tracker_ptr));
|
||||
});
|
||||
|
||||
_reader = make_restricted_reader(config, std::move(ms), std::move(schema));
|
||||
_reader = make_restricted_flat_reader(config, std::move(ms), schema);
|
||||
}
|
||||
|
||||
future<streamed_mutation_opt> operator()() {
|
||||
return _reader();
|
||||
future<> operator()() {
|
||||
while (!_reader.is_buffer_empty()) {
|
||||
_reader.pop_mutation_fragment();
|
||||
}
|
||||
return _reader.fill_buffer();
|
||||
}
|
||||
|
||||
future<> fast_forward_to(const dht::partition_range& pr) {
|
||||
@@ -723,6 +980,7 @@ SEASTAR_TEST_CASE(reader_restriction_file_tracking) {
|
||||
|
||||
SEASTAR_TEST_CASE(restricted_reader_reading) {
|
||||
return async([&] {
|
||||
storage_service_for_tests ssft;
|
||||
restriction_data rd(new_reader_base_cost);
|
||||
|
||||
{
|
||||
@@ -770,6 +1028,7 @@ SEASTAR_TEST_CASE(restricted_reader_reading) {
|
||||
|
||||
SEASTAR_TEST_CASE(restricted_reader_timeout) {
|
||||
return async([&] {
|
||||
storage_service_for_tests ssft;
|
||||
restriction_data rd(new_reader_base_cost, std::chrono::duration_cast<std::chrono::nanoseconds>(std::chrono::milliseconds{10}));
|
||||
|
||||
{
|
||||
@@ -797,6 +1056,7 @@ SEASTAR_TEST_CASE(restricted_reader_timeout) {
|
||||
|
||||
SEASTAR_TEST_CASE(restricted_reader_max_queue_length) {
|
||||
return async([&] {
|
||||
storage_service_for_tests ssft;
|
||||
restriction_data rd(new_reader_base_cost, {}, 1);
|
||||
|
||||
{
|
||||
@@ -823,6 +1083,7 @@ SEASTAR_TEST_CASE(restricted_reader_max_queue_length) {
|
||||
|
||||
SEASTAR_TEST_CASE(restricted_reader_create_reader) {
|
||||
return async([&] {
|
||||
storage_service_for_tests ssft;
|
||||
restriction_data rd(new_reader_base_cost);
|
||||
|
||||
{
|
||||
|
||||
@@ -845,6 +845,41 @@ static void test_query_only_static_row(populate_fn populate) {
|
||||
}
|
||||
}
|
||||
|
||||
void test_streamed_mutation_forwarding_succeeds_with_no_data(populate_fn populate) {
|
||||
simple_schema s;
|
||||
auto cks = s.make_ckeys(6);
|
||||
|
||||
auto pkey = s.make_pkey(0);
|
||||
mutation m(pkey, s.schema());
|
||||
s.add_row(m, cks[0], "data");
|
||||
|
||||
auto source = populate(s.schema(), {m});
|
||||
assert_that(source.make_flat_mutation_reader(s.schema(),
|
||||
query::full_partition_range,
|
||||
s.schema()->full_slice(),
|
||||
default_priority_class(),
|
||||
nullptr,
|
||||
streamed_mutation::forwarding::yes
|
||||
))
|
||||
.produces_partition_start(pkey)
|
||||
.produces_end_of_stream()
|
||||
.fast_forward_to(position_range(position_in_partition::for_key(cks[0]), position_in_partition::before_key(cks[1])))
|
||||
.produces_row_with_key(cks[0])
|
||||
.produces_end_of_stream()
|
||||
.fast_forward_to(position_range(position_in_partition::for_key(cks[1]), position_in_partition::before_key(cks[3])))
|
||||
.produces_end_of_stream()
|
||||
.fast_forward_to(position_range(position_in_partition::for_key(cks[4]), position_in_partition::before_key(cks[5])))
|
||||
.produces_end_of_stream()
|
||||
.next_partition()
|
||||
.produces_end_of_stream()
|
||||
.fast_forward_to(position_range(position_in_partition::for_key(cks[0]), position_in_partition::before_key(cks[1])))
|
||||
.produces_end_of_stream()
|
||||
.fast_forward_to(position_range(position_in_partition::for_key(cks[1]), position_in_partition::before_key(cks[3])))
|
||||
.produces_end_of_stream()
|
||||
.fast_forward_to(position_range(position_in_partition::for_key(cks[4]), position_in_partition::before_key(cks[5])))
|
||||
.produces_end_of_stream();
|
||||
}
|
||||
|
||||
void run_mutation_reader_tests(populate_fn populate) {
|
||||
test_fast_forwarding_across_partitions_to_empty_range(populate);
|
||||
test_clustering_slices(populate);
|
||||
@@ -911,6 +946,7 @@ void test_next_partition(populate_fn populate) {
|
||||
void run_flat_mutation_reader_tests(populate_fn populate) {
|
||||
run_conversion_to_mutation_reader_tests(populate);
|
||||
test_next_partition(populate);
|
||||
test_streamed_mutation_forwarding_succeeds_with_no_data(populate);
|
||||
}
|
||||
|
||||
void run_mutation_source_tests(populate_fn populate) {
|
||||
@@ -1376,7 +1412,7 @@ public:
|
||||
set_random_cells(row.cells(), column_kind::regular_column);
|
||||
row.marker() = random_row_marker();
|
||||
} else {
|
||||
m.partition().clustered_row(*_schema, ckey, is_dummy::yes, continuous);
|
||||
m.partition().clustered_row(*_schema, position_in_partition_view::after_key(ckey), is_dummy::yes, continuous);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -46,8 +46,10 @@
|
||||
#include "tests/mutation_assertions.hh"
|
||||
#include "tests/mutation_reader_assertions.hh"
|
||||
#include "tests/result_set_assertions.hh"
|
||||
#include "tests/test_services.hh"
|
||||
#include "mutation_source_test.hh"
|
||||
#include "cell_locking.hh"
|
||||
#include "flat_mutation_reader_assertions.hh"
|
||||
|
||||
#include "disk-error-handler.hh"
|
||||
#include "simple_schema.hh"
|
||||
@@ -66,8 +68,8 @@ static atomic_cell make_atomic_cell(bytes value) {
|
||||
|
||||
static mutation_partition get_partition(memtable& mt, const partition_key& key) {
|
||||
auto dk = dht::global_partitioner().decorate_key(*mt.schema(), key);
|
||||
auto reader = mt.make_reader(mt.schema(), dht::partition_range::make_singular(dk));
|
||||
auto mo = mutation_from_streamed_mutation(reader().get0()).get0();
|
||||
auto reader = mt.make_flat_reader(mt.schema(), dht::partition_range::make_singular(dk));
|
||||
auto mo = read_mutation_from_flat_mutation_reader(reader).get0();
|
||||
BOOST_REQUIRE(bool(mo));
|
||||
return std::move(mo->partition());
|
||||
}
|
||||
@@ -281,6 +283,7 @@ SEASTAR_TEST_CASE(test_list_mutations) {
|
||||
|
||||
SEASTAR_TEST_CASE(test_multiple_memtables_one_partition) {
|
||||
return seastar::async([] {
|
||||
storage_service_for_tests ssft;
|
||||
auto s = make_lw_shared(schema({}, some_keyspace, some_column_family,
|
||||
{{"p1", utf8_type}}, {{"c1", int32_type}}, {{"r1", int32_type}}, {}, utf8_type));
|
||||
|
||||
@@ -345,6 +348,7 @@ SEASTAR_TEST_CASE(test_flush_in_the_middle_of_a_scan) {
|
||||
|
||||
return with_column_family(s, cfg, [s](column_family& cf) {
|
||||
return seastar::async([s, &cf] {
|
||||
storage_service_for_tests ssft;
|
||||
// populate
|
||||
auto new_key = [&] {
|
||||
static thread_local int next = 0;
|
||||
@@ -408,6 +412,7 @@ SEASTAR_TEST_CASE(test_flush_in_the_middle_of_a_scan) {
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_multiple_memtables_multiple_partitions) {
|
||||
return seastar::async([] {
|
||||
auto s = make_lw_shared(schema({}, some_keyspace, some_column_family,
|
||||
{{"p1", int32_type}}, {{"c1", int32_type}}, {{"r1", int32_type}}, {}, utf8_type));
|
||||
|
||||
@@ -418,7 +423,7 @@ SEASTAR_TEST_CASE(test_multiple_memtables_multiple_partitions) {
|
||||
cfg.enable_disk_writes = false;
|
||||
cfg.enable_incremental_backups = false;
|
||||
cfg.cf_stats = &*cf_stats;
|
||||
return with_column_family(s, cfg, [s] (auto& cf) mutable {
|
||||
with_column_family(s, cfg, [s] (auto& cf) mutable {
|
||||
std::map<int32_t, std::map<int32_t, int32_t>> shadow, result;
|
||||
|
||||
const column_definition& r1_col = *s->get_column_definition("r1");
|
||||
@@ -458,7 +463,8 @@ SEASTAR_TEST_CASE(test_multiple_memtables_multiple_partitions) {
|
||||
BOOST_REQUIRE(shadow == result);
|
||||
});
|
||||
});
|
||||
}).then([cf_stats] {});
|
||||
}).then([cf_stats] {}).get();
|
||||
});
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_cell_ordering) {
|
||||
|
||||
@@ -33,12 +33,120 @@
|
||||
#include "tests/mutation_assertions.hh"
|
||||
#include "tests/mutation_reader_assertions.hh"
|
||||
#include "tests/simple_schema.hh"
|
||||
#include "tests/range_tombstone_list_assertions.hh"
|
||||
|
||||
thread_local disk_error_signal_type commit_error;
|
||||
thread_local disk_error_signal_type general_disk_error;
|
||||
|
||||
using namespace std::chrono_literals;
|
||||
|
||||
|
||||
// Verifies that tombstones in "list" are monotonic, overlap with the requested range,
|
||||
// and have information equivalent with "expected" in that range.
|
||||
static
|
||||
void check_tombstone_slice(const schema& s, std::vector<range_tombstone> list,
|
||||
const query::clustering_range& range,
|
||||
std::initializer_list<range_tombstone> expected)
|
||||
{
|
||||
range_tombstone_list actual(s);
|
||||
position_in_partition::less_compare less(s);
|
||||
position_in_partition prev_pos = position_in_partition::before_all_clustered_rows();
|
||||
|
||||
for (auto&& rt : list) {
|
||||
if (!less(rt.position(), position_in_partition::for_range_end(range))) {
|
||||
BOOST_FAIL(sprint("Range tombstone out of range: %s, range: %s", rt, range));
|
||||
}
|
||||
if (!less(position_in_partition::for_range_start(range), rt.end_position())) {
|
||||
BOOST_FAIL(sprint("Range tombstone out of range: %s, range: %s", rt, range));
|
||||
}
|
||||
if (!less(prev_pos, rt.position())) {
|
||||
BOOST_FAIL(sprint("Range tombstone breaks position monotonicity: %s, list: %s", rt, list));
|
||||
}
|
||||
prev_pos = position_in_partition(rt.position());
|
||||
actual.apply(s, rt);
|
||||
}
|
||||
|
||||
actual.trim(s, query::clustering_row_ranges{range});
|
||||
|
||||
range_tombstone_list expected_list(s);
|
||||
for (auto&& rt : expected) {
|
||||
expected_list.apply(s, rt);
|
||||
}
|
||||
expected_list.trim(s, query::clustering_row_ranges{range});
|
||||
|
||||
assert_that(s, actual).is_equal_to(expected_list);
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_range_tombstone_slicing) {
|
||||
return seastar::async([] {
|
||||
logalloc::region r;
|
||||
simple_schema table;
|
||||
auto s = table.schema();
|
||||
with_allocator(r.allocator(), [&] {
|
||||
logalloc::reclaim_lock l(r);
|
||||
|
||||
auto rt1 = table.make_range_tombstone(table.make_ckey_range(1, 2));
|
||||
auto rt2 = table.make_range_tombstone(table.make_ckey_range(4, 7));
|
||||
auto rt3 = table.make_range_tombstone(table.make_ckey_range(6, 9));
|
||||
|
||||
mutation_partition m1(s);
|
||||
m1.apply_delete(*s, rt1);
|
||||
m1.apply_delete(*s, rt2);
|
||||
m1.apply_delete(*s, rt3);
|
||||
|
||||
partition_entry e(m1);
|
||||
|
||||
auto snap = e.read(r, s);
|
||||
|
||||
auto check_range = [&s] (partition_snapshot& snap, const query::clustering_range& range,
|
||||
std::initializer_list<range_tombstone> expected) {
|
||||
auto tombstones = snap.range_tombstones(*s,
|
||||
position_in_partition::for_range_start(range),
|
||||
position_in_partition::for_range_end(range));
|
||||
check_tombstone_slice(*s, tombstones, range, expected);
|
||||
};
|
||||
|
||||
check_range(*snap, table.make_ckey_range(0, 0), {});
|
||||
check_range(*snap, table.make_ckey_range(1, 1), {rt1});
|
||||
check_range(*snap, table.make_ckey_range(3, 4), {rt2});
|
||||
check_range(*snap, table.make_ckey_range(3, 5), {rt2});
|
||||
check_range(*snap, table.make_ckey_range(3, 6), {rt2, rt3});
|
||||
check_range(*snap, table.make_ckey_range(6, 6), {rt2, rt3});
|
||||
check_range(*snap, table.make_ckey_range(7, 10), {rt2, rt3});
|
||||
check_range(*snap, table.make_ckey_range(8, 10), {rt3});
|
||||
check_range(*snap, table.make_ckey_range(10, 10), {});
|
||||
check_range(*snap, table.make_ckey_range(0, 10), {rt1, rt2, rt3});
|
||||
|
||||
auto rt4 = table.make_range_tombstone(table.make_ckey_range(1, 2));
|
||||
auto rt5 = table.make_range_tombstone(table.make_ckey_range(5, 8));
|
||||
|
||||
mutation_partition m2(s);
|
||||
m2.apply_delete(*s, rt4);
|
||||
m2.apply_delete(*s, rt5);
|
||||
|
||||
auto&& v2 = e.add_version(*s);
|
||||
v2.partition().apply(*s, m2, *s);
|
||||
auto snap2 = e.read(r, s);
|
||||
|
||||
check_range(*snap2, table.make_ckey_range(0, 0), {});
|
||||
check_range(*snap2, table.make_ckey_range(1, 1), {rt4});
|
||||
check_range(*snap2, table.make_ckey_range(3, 4), {rt2});
|
||||
check_range(*snap2, table.make_ckey_range(3, 5), {rt2, rt5});
|
||||
check_range(*snap2, table.make_ckey_range(3, 6), {rt2, rt3, rt5});
|
||||
check_range(*snap2, table.make_ckey_range(4, 4), {rt2});
|
||||
check_range(*snap2, table.make_ckey_range(5, 5), {rt2, rt5});
|
||||
check_range(*snap2, table.make_ckey_range(6, 6), {rt2, rt3, rt5});
|
||||
check_range(*snap2, table.make_ckey_range(7, 10), {rt2, rt3, rt5});
|
||||
check_range(*snap2, table.make_ckey_range(8, 8), {rt3, rt5});
|
||||
check_range(*snap2, table.make_ckey_range(9, 9), {rt3});
|
||||
check_range(*snap2, table.make_ckey_range(8, 10), {rt3, rt5});
|
||||
check_range(*snap2, table.make_ckey_range(10, 10), {});
|
||||
check_range(*snap2, table.make_ckey_range(0, 10), {rt4, rt2, rt3, rt5});
|
||||
});
|
||||
});
|
||||
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_apply_to_incomplete) {
|
||||
return seastar::async([] {
|
||||
logalloc::region r;
|
||||
|
||||
@@ -184,12 +184,12 @@ uint64_t consume_all(mutation_reader& rd) {
|
||||
|
||||
// cf should belong to ks.test
|
||||
static test_result scan_rows_with_stride(column_family& cf, int n_rows, int n_read = 1, int n_skip = 0) {
|
||||
auto rd = cf.make_reader(cf.schema(),
|
||||
auto rd = mutation_reader_from_flat_mutation_reader(cf.make_reader(cf.schema(),
|
||||
query::full_partition_range,
|
||||
cf.schema()->full_slice(),
|
||||
default_priority_class(),
|
||||
nullptr,
|
||||
n_skip ? streamed_mutation::forwarding::yes : streamed_mutation::forwarding::no);
|
||||
n_skip ? streamed_mutation::forwarding::yes : streamed_mutation::forwarding::no));
|
||||
|
||||
metrics_snapshot before;
|
||||
|
||||
@@ -232,7 +232,7 @@ static test_result scan_with_stride_partitions(column_family& cf, int n, int n_r
|
||||
int pk = 0;
|
||||
auto pr = n_skip ? dht::partition_range::make_ending_with(dht::partition_range::bound(keys[0], false)) // covering none
|
||||
: query::full_partition_range;
|
||||
auto rd = cf.make_reader(cf.schema(), pr, cf.schema()->full_slice());
|
||||
auto rd = mutation_reader_from_flat_mutation_reader(cf.make_reader(cf.schema(), pr, cf.schema()->full_slice()));
|
||||
|
||||
metrics_snapshot before;
|
||||
|
||||
@@ -258,12 +258,12 @@ static test_result scan_with_stride_partitions(column_family& cf, int n, int n_r
|
||||
}
|
||||
|
||||
static test_result slice_rows(column_family& cf, int offset = 0, int n_read = 1) {
|
||||
auto rd = cf.make_reader(cf.schema(),
|
||||
auto rd = mutation_reader_from_flat_mutation_reader(cf.make_reader(cf.schema(),
|
||||
query::full_partition_range,
|
||||
cf.schema()->full_slice(),
|
||||
default_priority_class(),
|
||||
nullptr,
|
||||
streamed_mutation::forwarding::yes);
|
||||
streamed_mutation::forwarding::yes));
|
||||
|
||||
metrics_snapshot before;
|
||||
streamed_mutation_opt smo = rd().get0();
|
||||
@@ -291,9 +291,9 @@ static test_result select_spread_rows(column_family& cf, int stride = 0, int n_r
|
||||
}
|
||||
|
||||
auto slice = sb.build();
|
||||
auto rd = cf.make_reader(cf.schema(),
|
||||
auto rd = mutation_reader_from_flat_mutation_reader(cf.make_reader(cf.schema(),
|
||||
query::full_partition_range,
|
||||
slice);
|
||||
slice));
|
||||
|
||||
return test_reading_all(rd);
|
||||
}
|
||||
@@ -305,13 +305,13 @@ static test_result test_slicing_using_restrictions(column_family& cf, int_range
|
||||
}))
|
||||
.build();
|
||||
auto pr = dht::partition_range::make_singular(make_pkey(*cf.schema(), 0));
|
||||
auto rd = cf.make_reader(cf.schema(), pr, slice);
|
||||
auto rd = mutation_reader_from_flat_mutation_reader(cf.make_reader(cf.schema(), pr, slice));
|
||||
return test_reading_all(rd);
|
||||
}
|
||||
|
||||
static test_result slice_rows_single_key(column_family& cf, int offset = 0, int n_read = 1) {
|
||||
auto pr = dht::partition_range::make_singular(make_pkey(*cf.schema(), 0));
|
||||
auto rd = cf.make_reader(cf.schema(), pr, cf.schema()->full_slice(), default_priority_class(), nullptr, streamed_mutation::forwarding::yes);
|
||||
auto rd = mutation_reader_from_flat_mutation_reader(cf.make_reader(cf.schema(), pr, cf.schema()->full_slice(), default_priority_class(), nullptr, streamed_mutation::forwarding::yes));
|
||||
|
||||
metrics_snapshot before;
|
||||
streamed_mutation_opt smo = rd().get0();
|
||||
@@ -336,7 +336,7 @@ static test_result slice_partitions(column_family& cf, int n, int offset = 0, in
|
||||
dht::partition_range::bound(keys[std::min(n, offset + n_read) - 1], true)
|
||||
);
|
||||
|
||||
auto rd = cf.make_reader(cf.schema(), pr, cf.schema()->full_slice());
|
||||
auto rd = mutation_reader_from_flat_mutation_reader(cf.make_reader(cf.schema(), pr, cf.schema()->full_slice()));
|
||||
metrics_snapshot before;
|
||||
|
||||
uint64_t fragments = consume_all(rd);
|
||||
@@ -367,12 +367,12 @@ static test_result test_forwarding_with_restriction(column_family& cf, table_con
|
||||
.build();
|
||||
|
||||
auto pr = single_partition ? dht::partition_range::make_singular(make_pkey(*cf.schema(), 0)) : query::full_partition_range;
|
||||
auto rd = cf.make_reader(cf.schema(),
|
||||
auto rd = mutation_reader_from_flat_mutation_reader(cf.make_reader(cf.schema(),
|
||||
pr,
|
||||
slice,
|
||||
default_priority_class(),
|
||||
nullptr,
|
||||
streamed_mutation::forwarding::yes);
|
||||
streamed_mutation::forwarding::yes));
|
||||
|
||||
uint64_t fragments = 0;
|
||||
metrics_snapshot before;
|
||||
|
||||
@@ -153,14 +153,12 @@ public:
|
||||
}
|
||||
|
||||
future<double> read_sequential_partitions(int idx) {
|
||||
return do_with(_sst[0]->read_rows(s), [this] (mutation_reader& r) {
|
||||
return do_with(_sst[0]->read_rows_flat(s), [this] (flat_mutation_reader& r) {
|
||||
auto start = test_env::now();
|
||||
auto total = make_lw_shared<size_t>(0);
|
||||
auto done = make_lw_shared<bool>(false);
|
||||
return do_until([done] { return *done; }, [this, done, total, &r] {
|
||||
return r().then([] (auto sm) {
|
||||
return mutation_from_streamed_mutation(std::move(sm));
|
||||
}).then([this, done, total] (mutation_opt m) {
|
||||
return read_mutation_from_flat_mutation_reader(r).then([this, done, total] (mutation_opt m) {
|
||||
if (!m) {
|
||||
*done = true;
|
||||
} else {
|
||||
|
||||
@@ -592,6 +592,24 @@ BOOST_AUTO_TEST_CASE(test_add_overlapping_range_to_range_with_empty_end) {
|
||||
BOOST_REQUIRE(it == l.end());
|
||||
}
|
||||
|
||||
// Reproduces https://github.com/scylladb/scylla/issues/3083
|
||||
BOOST_AUTO_TEST_CASE(test_coalescing_with_end_bound_inclusiveness_change_with_prefix_bound) {
|
||||
range_tombstone_list l(*s);
|
||||
|
||||
auto rt1 = rtie(4, 8, 4);
|
||||
auto rt2 = range_tombstone(key({8, 1}), bound_kind::incl_start, key({10}), bound_kind::excl_end, {1, gc_now});
|
||||
|
||||
l.apply(*s, rt1);
|
||||
l.apply(*s, rt2);
|
||||
|
||||
l.apply(*s, rt(1, 5, 4));
|
||||
|
||||
auto it = l.begin();
|
||||
assert_rt(rtie(1, 8, 4), *it++);
|
||||
assert_rt(rt2, *it++);
|
||||
BOOST_REQUIRE(it == l.end());
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(test_search_with_empty_start) {
|
||||
range_tombstone_list l(*s);
|
||||
|
||||
@@ -835,3 +853,51 @@ BOOST_AUTO_TEST_CASE(test_exception_safety) {
|
||||
}
|
||||
} while (injector.failed());
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(test_accumulator) {
|
||||
auto ts1 = 1;
|
||||
auto ts2 = 2;
|
||||
|
||||
BOOST_TEST_MESSAGE("Forward");
|
||||
auto acc = range_tombstone_accumulator(*s, false);
|
||||
acc.apply(rtie(0, 4, ts1));
|
||||
BOOST_REQUIRE_EQUAL(acc.tombstone_for_row(key({ 0 })), tombstone(ts1, gc_now));
|
||||
acc.apply(rtie(1, 2, ts2));
|
||||
BOOST_REQUIRE_EQUAL(acc.tombstone_for_row(key({ 1 })), tombstone(ts2, gc_now));
|
||||
BOOST_REQUIRE_EQUAL(acc.tombstone_for_row(key({ 2 })), tombstone(ts1, gc_now));
|
||||
BOOST_REQUIRE_EQUAL(acc.tombstone_for_row(key({ 3 })), tombstone(ts1, gc_now));
|
||||
BOOST_REQUIRE_EQUAL(acc.tombstone_for_row(key({ 4 })), tombstone());
|
||||
BOOST_REQUIRE_EQUAL(acc.tombstone_for_row(key({ 5 })), tombstone());
|
||||
acc.apply(rtie(6, 8, ts2));
|
||||
BOOST_REQUIRE_EQUAL(acc.tombstone_for_row(key({ 9 })), tombstone());
|
||||
acc.apply(rtie(10, 12, ts1));
|
||||
BOOST_REQUIRE_EQUAL(acc.tombstone_for_row(key({ 10 })), tombstone(ts1, gc_now));
|
||||
acc.apply(rtie(11, 14, ts2));
|
||||
BOOST_REQUIRE_EQUAL(acc.tombstone_for_row(key({ 11 })), tombstone(ts2, gc_now));
|
||||
BOOST_REQUIRE_EQUAL(acc.tombstone_for_row(key({ 12 })), tombstone(ts2, gc_now));
|
||||
BOOST_REQUIRE_EQUAL(acc.tombstone_for_row(key({ 13 })), tombstone(ts2, gc_now));
|
||||
BOOST_REQUIRE_EQUAL(acc.tombstone_for_row(key({ 14 })), tombstone());
|
||||
BOOST_REQUIRE_EQUAL(acc.tombstone_for_row(key({ 15 })), tombstone());
|
||||
|
||||
BOOST_TEST_MESSAGE("Reversed");
|
||||
acc = range_tombstone_accumulator(*s, true);
|
||||
|
||||
BOOST_REQUIRE_EQUAL(acc.tombstone_for_row(key({ 15 })), tombstone());
|
||||
BOOST_REQUIRE_EQUAL(acc.tombstone_for_row(key({ 14 })), tombstone());
|
||||
acc.apply(rtie(11, 14, ts2));
|
||||
BOOST_REQUIRE_EQUAL(acc.tombstone_for_row(key({ 13 })), tombstone(ts2, gc_now));
|
||||
BOOST_REQUIRE_EQUAL(acc.tombstone_for_row(key({ 12 })), tombstone(ts2, gc_now));
|
||||
acc.apply(rtie(10, 12, ts1));
|
||||
BOOST_REQUIRE_EQUAL(acc.tombstone_for_row(key({ 11 })), tombstone(ts2, gc_now));
|
||||
BOOST_REQUIRE_EQUAL(acc.tombstone_for_row(key({ 10 })), tombstone(ts1, gc_now));
|
||||
BOOST_REQUIRE_EQUAL(acc.tombstone_for_row(key({ 9 })), tombstone());
|
||||
acc.apply(rtie(6, 8, ts2));
|
||||
BOOST_REQUIRE_EQUAL(acc.tombstone_for_row(key({ 5 })), tombstone());
|
||||
BOOST_REQUIRE_EQUAL(acc.tombstone_for_row(key({ 4 })), tombstone());
|
||||
acc.apply(rtie(0, 4, ts1));
|
||||
BOOST_REQUIRE_EQUAL(acc.tombstone_for_row(key({ 3 })), tombstone(ts1, gc_now));
|
||||
BOOST_REQUIRE_EQUAL(acc.tombstone_for_row(key({ 2 })), tombstone(ts1, gc_now));
|
||||
acc.apply(rtie(1, 2, ts2));
|
||||
BOOST_REQUIRE_EQUAL(acc.tombstone_for_row(key({ 1 })), tombstone(ts2, gc_now));
|
||||
BOOST_REQUIRE_EQUAL(acc.tombstone_for_row(key({ 0 })), tombstone(ts1, gc_now));
|
||||
}
|
||||
|
||||
@@ -124,19 +124,19 @@ struct table {
|
||||
struct reader {
|
||||
dht::partition_range pr;
|
||||
query::partition_slice slice;
|
||||
mutation_reader rd;
|
||||
flat_mutation_reader rd;
|
||||
};
|
||||
|
||||
std::unique_ptr<reader> make_reader(dht::partition_range pr, query::partition_slice slice) {
|
||||
test_log.trace("making reader, pk={} ck={}", pr, slice);
|
||||
auto r = std::make_unique<reader>(reader{std::move(pr), std::move(slice)});
|
||||
std::vector<mutation_reader> rd;
|
||||
auto r = std::make_unique<reader>(reader{std::move(pr), std::move(slice), make_empty_flat_reader(s.schema())});
|
||||
std::vector<flat_mutation_reader> rd;
|
||||
if (prev_mt) {
|
||||
rd.push_back(prev_mt->make_reader(s.schema(), r->pr, r->slice));
|
||||
rd.push_back(prev_mt->make_flat_reader(s.schema(), r->pr, r->slice));
|
||||
}
|
||||
rd.push_back(mt->make_reader(s.schema(), r->pr, r->slice));
|
||||
rd.push_back(cache.make_reader(s.schema(), r->pr, r->slice));
|
||||
r->rd = make_combined_reader(std::move(rd), mutation_reader::forwarding::no);
|
||||
rd.push_back(mt->make_flat_reader(s.schema(), r->pr, r->slice));
|
||||
rd.push_back(cache.make_flat_reader(s.schema(), r->pr, r->slice));
|
||||
r->rd = make_combined_reader(s.schema(), std::move(rd), streamed_mutation::forwarding::yes, mutation_reader::forwarding::no);
|
||||
return r;
|
||||
}
|
||||
|
||||
@@ -303,7 +303,7 @@ int main(int argc, char** argv) {
|
||||
while (!cancelled) {
|
||||
test_log.trace("{}: starting read", id);
|
||||
auto rd = t.make_single_key_reader(pk, ck_range);
|
||||
auto row_count = consume_flattened(std::move(rd->rd), validating_consumer(t, id)).get0();
|
||||
auto row_count = rd->rd.consume(validating_consumer(t, id)).get0();
|
||||
if (row_count != len) {
|
||||
throw std::runtime_error(sprint("Expected %d fragments, got %d", len, row_count));
|
||||
}
|
||||
@@ -315,7 +315,7 @@ int main(int argc, char** argv) {
|
||||
while (!cancelled) {
|
||||
test_log.trace("{}: starting read", id);
|
||||
auto rd = t.make_scanning_reader();
|
||||
auto row_count = consume_flattened(std::move(rd->rd), validating_consumer(t, id)).get0();
|
||||
auto row_count = rd->rd.consume(validating_consumer(t, id)).get0();
|
||||
if (row_count != expected_row_count) {
|
||||
throw std::runtime_error(sprint("Expected %d fragments, got %d", expected_row_count, row_count));
|
||||
}
|
||||
|
||||
@@ -1362,12 +1362,11 @@ SEASTAR_TEST_CASE(test_mvcc) {
|
||||
|
||||
auto m12 = m1 + m2;
|
||||
|
||||
stdx::optional<mutation_reader> mt1_reader_opt;
|
||||
stdx::optional<streamed_mutation_opt> mt1_reader_sm_opt;
|
||||
stdx::optional<flat_mutation_reader> mt1_reader_opt;
|
||||
if (with_active_memtable_reader) {
|
||||
mt1_reader_opt = mt1->make_reader(s);
|
||||
mt1_reader_sm_opt = (*mt1_reader_opt)().get0();
|
||||
BOOST_REQUIRE(*mt1_reader_sm_opt);
|
||||
mt1_reader_opt = mt1->make_flat_reader(s);
|
||||
mt1_reader_opt->set_max_buffer_size(1);
|
||||
mt1_reader_opt->fill_buffer().get();
|
||||
}
|
||||
|
||||
auto mt1_copy = make_lw_shared<memtable>(s);
|
||||
@@ -1389,8 +1388,8 @@ SEASTAR_TEST_CASE(test_mvcc) {
|
||||
assert_that_stream(std::move(*sm3)).has_monotonic_positions();
|
||||
|
||||
if (with_active_memtable_reader) {
|
||||
assert(mt1_reader_sm_opt);
|
||||
auto mt1_reader_mutation = mutation_from_streamed_mutation(std::move(*mt1_reader_sm_opt)).get0();
|
||||
assert(mt1_reader_opt);
|
||||
auto mt1_reader_mutation = read_mutation_from_flat_mutation_reader(*mt1_reader_opt).get0();
|
||||
BOOST_REQUIRE(mt1_reader_mutation);
|
||||
assert_that(*mt1_reader_mutation).is_equal_to(m2);
|
||||
}
|
||||
@@ -2695,3 +2694,47 @@ SEASTAR_TEST_CASE(test_concurrent_setting_of_continuity_on_read_upper_bound) {
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_tombstone_merging_of_overlapping_tombstones_in_many_versions) {
|
||||
return seastar::async([] {
|
||||
simple_schema s;
|
||||
cache_tracker tracker;
|
||||
memtable_snapshot_source underlying(s.schema());
|
||||
|
||||
auto pk = s.make_pkey(0);
|
||||
auto pr = dht::partition_range::make_singular(pk);
|
||||
|
||||
mutation m1(pk, s.schema());
|
||||
m1.partition().apply_delete(*s.schema(),
|
||||
s.make_range_tombstone(s.make_ckey_range(2, 107), s.new_tombstone()));
|
||||
s.add_row(m1, s.make_ckey(5), "val");
|
||||
|
||||
// What is important here is that it contains a newer range tombstone
|
||||
// which trims [2, 107] from m1 into (100, 107], which starts after ck=5.
|
||||
mutation m2(pk, s.schema());
|
||||
m2.partition().apply_delete(*s.schema(),
|
||||
s.make_range_tombstone(s.make_ckey_range(1, 100), s.new_tombstone()));
|
||||
|
||||
row_cache cache(s.schema(), snapshot_source([&] { return underlying(); }), tracker);
|
||||
|
||||
auto make_sm = [&] {
|
||||
auto rd = cache.make_reader(s.schema());
|
||||
auto smo = rd().get0();
|
||||
BOOST_REQUIRE(smo);
|
||||
streamed_mutation& sm = *smo;
|
||||
sm.set_max_buffer_size(1);
|
||||
return std::move(sm);
|
||||
};
|
||||
|
||||
apply(cache, underlying, m1);
|
||||
populate_range(cache, pr, s.make_ckey_range(0, 3));
|
||||
|
||||
auto sm1 = make_sm();
|
||||
|
||||
apply(cache, underlying, m2);
|
||||
|
||||
assert_that(cache.make_reader(s.schema()))
|
||||
.produces(m1 + m2)
|
||||
.produces_end_of_stream();
|
||||
});
|
||||
}
|
||||
|
||||
@@ -52,7 +52,7 @@ public:
|
||||
auto& prev = pi->entries[0];
|
||||
for (size_t i = 1; i < pi->entries.size(); ++i) {
|
||||
auto& cur = pi->entries[i];
|
||||
if (!pos_cmp(prev.end, cur.start)) {
|
||||
if (pos_cmp(cur.start, prev.end)) {
|
||||
std::cout << "promoted index:\n";
|
||||
for (auto& e : pi->entries) {
|
||||
std::cout << " " << e.start << "-" << e.end << ": +" << e.offset << " len=" << e.width << std::endl;
|
||||
@@ -66,6 +66,16 @@ public:
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
index_reader_assertions& is_empty(const schema& s) {
|
||||
_r->read_partition_data().get();
|
||||
while (!_r->eof()) {
|
||||
auto* pi = _r->current_partition_entry().get_promoted_index(s);
|
||||
BOOST_REQUIRE(pi == nullptr);
|
||||
_r->advance_to_next_partition().get();
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
};
|
||||
|
||||
inline
|
||||
|
||||
@@ -50,6 +50,8 @@
|
||||
#include "cell_locking.hh"
|
||||
#include "simple_schema.hh"
|
||||
#include "memtable-sstable.hh"
|
||||
#include "tests/sstable_assertions.hh"
|
||||
#include "flat_mutation_reader_assertions.hh"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <ftw.h>
|
||||
@@ -813,10 +815,9 @@ SEASTAR_TEST_CASE(datafile_generation_11) {
|
||||
auto sst = make_sstable(s, "tests/sstables/tests-temporary", 11, la, big);
|
||||
return write_memtable_to_sstable(*mt, sst).then([s, sst, mt, verifier, tomb, &static_set_col] {
|
||||
return reusable_sst(s, "tests/sstables/tests-temporary", 11).then([s, verifier, tomb, &static_set_col] (auto sstp) mutable {
|
||||
return do_with(sstables::key("key1"), [sstp, s, verifier, tomb, &static_set_col] (auto& key) {
|
||||
return sstp->read_row(s, key).then([] (auto sm) {
|
||||
return mutation_from_streamed_mutation(std::move(sm));
|
||||
}).then([sstp, s, verifier, tomb, &static_set_col] (auto mutation) {
|
||||
return do_with(make_dkey(s, "key1"), [sstp, s, verifier, tomb, &static_set_col] (auto& key) {
|
||||
auto rd = make_lw_shared<flat_mutation_reader>(sstp->read_row_flat(s, key));
|
||||
return read_mutation_from_flat_mutation_reader(*rd).then([sstp, s, verifier, tomb, &static_set_col, rd] (auto mutation) {
|
||||
auto verify_set = [&tomb] (auto m) {
|
||||
BOOST_REQUIRE(bool(m.tomb) == true);
|
||||
BOOST_REQUIRE(m.tomb == tomb);
|
||||
@@ -842,10 +843,9 @@ SEASTAR_TEST_CASE(datafile_generation_11) {
|
||||
verify_set(m);
|
||||
});
|
||||
}).then([sstp, s, verifier] {
|
||||
return do_with(sstables::key("key2"), [sstp, s, verifier] (auto& key) {
|
||||
return sstp->read_row(s, key).then([] (auto sm) {
|
||||
return mutation_from_streamed_mutation(std::move(sm));
|
||||
}).then([sstp, s, verifier] (auto mutation) {
|
||||
return do_with(make_dkey(s, "key2"), [sstp, s, verifier] (auto& key) {
|
||||
auto rd = make_lw_shared<flat_mutation_reader>(sstp->read_row_flat(s, key));
|
||||
return read_mutation_from_flat_mutation_reader(*rd).then([sstp, s, verifier, rd] (auto mutation) {
|
||||
auto m = verifier(mutation);
|
||||
BOOST_REQUIRE(!m.tomb);
|
||||
BOOST_REQUIRE(m.cells.size() == 1);
|
||||
@@ -876,10 +876,9 @@ SEASTAR_TEST_CASE(datafile_generation_12) {
|
||||
auto sst = make_sstable(s, "tests/sstables/tests-temporary", 12, la, big);
|
||||
return write_memtable_to_sstable(*mt, sst).then([s, tomb] {
|
||||
return reusable_sst(s, "tests/sstables/tests-temporary", 12).then([s, tomb] (auto sstp) mutable {
|
||||
return do_with(sstables::key("key1"), [sstp, s, tomb] (auto& key) {
|
||||
return sstp->read_row(s, key).then([] (auto sm) {
|
||||
return mutation_from_streamed_mutation(std::move(sm));
|
||||
}).then([sstp, s, tomb] (auto mutation) {
|
||||
return do_with(make_dkey(s, "key1"), [sstp, s, tomb] (auto& key) {
|
||||
auto rd = make_lw_shared<flat_mutation_reader>(sstp->read_row_flat(s, key));
|
||||
return read_mutation_from_flat_mutation_reader(*rd).then([sstp, s, tomb, rd] (auto mutation) {
|
||||
auto& mp = mutation->partition();
|
||||
BOOST_REQUIRE(mp.row_tombstones().size() == 1);
|
||||
for (auto& rt: mp.row_tombstones()) {
|
||||
@@ -913,10 +912,9 @@ static future<> sstable_compression_test(compressor c, unsigned generation) {
|
||||
auto sst = make_sstable(s, "tests/sstables/tests-temporary", generation, la, big);
|
||||
return write_memtable_to_sstable(*mtp, sst).then([s, tomb, generation] {
|
||||
return reusable_sst(s, "tests/sstables/tests-temporary", generation).then([s, tomb] (auto sstp) mutable {
|
||||
return do_with(sstables::key("key1"), [sstp, s, tomb] (auto& key) {
|
||||
return sstp->read_row(s, key).then([] (auto sm) {
|
||||
return mutation_from_streamed_mutation(std::move(sm));
|
||||
}).then([sstp, s, tomb] (auto mutation) {
|
||||
return do_with(make_dkey(s, "key1"), [sstp, s, tomb] (auto& key) {
|
||||
auto rd = make_lw_shared<flat_mutation_reader>(sstp->read_row_flat(s, key));
|
||||
return read_mutation_from_flat_mutation_reader(*rd).then([sstp, s, tomb, rd] (auto mutation) {
|
||||
auto& mp = mutation->partition();
|
||||
BOOST_REQUIRE(mp.row_tombstones().size() == 1);
|
||||
for (auto& rt: mp.row_tombstones()) {
|
||||
@@ -999,16 +997,18 @@ static future<std::vector<sstables::shared_sstable>> open_sstables(schema_ptr s,
|
||||
}
|
||||
|
||||
// mutation_reader for sstable keeping all the required objects alive.
|
||||
static mutation_reader sstable_reader(shared_sstable sst, schema_ptr s) {
|
||||
return sst->as_mutation_source()(s);
|
||||
static flat_mutation_reader sstable_reader(shared_sstable sst, schema_ptr s) {
|
||||
return sst->as_mutation_source().make_flat_mutation_reader(s, query::full_partition_range, s->full_slice());
|
||||
|
||||
}
|
||||
|
||||
static mutation_reader sstable_reader(shared_sstable sst, schema_ptr s, const dht::partition_range& pr) {
|
||||
return sst->as_mutation_source()(s, pr);
|
||||
static flat_mutation_reader sstable_reader(shared_sstable sst, schema_ptr s, const dht::partition_range& pr) {
|
||||
return sst->as_mutation_source().make_flat_mutation_reader(s, pr, s->full_slice());
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(compaction_manager_test) {
|
||||
return seastar::async([] {
|
||||
storage_service_for_tests ssft;
|
||||
BOOST_REQUIRE(smp::count == 1);
|
||||
auto s = make_lw_shared(schema({}, some_keyspace, some_column_family,
|
||||
{{"p1", utf8_type}}, {{"c1", utf8_type}}, {{"r1", int32_type}}, {}, utf8_type));
|
||||
@@ -1030,7 +1030,7 @@ SEASTAR_TEST_CASE(compaction_manager_test) {
|
||||
|
||||
auto generations = make_lw_shared<std::vector<unsigned long>>({1, 2, 3, 4});
|
||||
|
||||
return do_for_each(*generations, [generations, cf, cm, s, tmp] (unsigned long generation) {
|
||||
do_for_each(*generations, [generations, cf, cm, s, tmp] (unsigned long generation) {
|
||||
// create 4 sstables of similar size to be compacted later on.
|
||||
|
||||
auto mt = make_lw_shared<memtable>(s);
|
||||
@@ -1083,7 +1083,8 @@ SEASTAR_TEST_CASE(compaction_manager_test) {
|
||||
});
|
||||
}).finally([s, cm, tmp, cl_stats] {
|
||||
return cm->stop().then([cm] {});
|
||||
});
|
||||
}).get();
|
||||
});
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(compact) {
|
||||
@@ -1124,9 +1125,7 @@ SEASTAR_TEST_CASE(compact) {
|
||||
// nadav - deleted partition
|
||||
return open_sstable(s, "tests/sstables/tests-temporary", generation).then([s] (shared_sstable sst) {
|
||||
auto reader = make_lw_shared(sstable_reader(sst, s)); // reader holds sst and s alive.
|
||||
return (*reader)().then([] (auto sm) {
|
||||
return mutation_from_streamed_mutation(std::move(sm));
|
||||
}).then([reader, s] (mutation_opt m) {
|
||||
return read_mutation_from_flat_mutation_reader(*reader).then([reader, s] (mutation_opt m) {
|
||||
BOOST_REQUIRE(m);
|
||||
BOOST_REQUIRE(m->key().equal(*s, partition_key::from_singular(*s, data_value(sstring("jerry")))));
|
||||
BOOST_REQUIRE(!m->partition().partition_tombstone());
|
||||
@@ -1136,10 +1135,8 @@ SEASTAR_TEST_CASE(compact) {
|
||||
BOOST_REQUIRE(!row.deleted_at());
|
||||
auto &cells = row.cells();
|
||||
BOOST_REQUIRE(cells.cell_at(s->get_column_definition("age")->id).as_atomic_cell().value() == bytes({0,0,0,40}));
|
||||
BOOST_REQUIRE(cells.cell_at(s->get_column_definition("height")->id).as_atomic_cell().value() == bytes({0,0,0,(char)170}));
|
||||
return (*reader)();
|
||||
}).then([] (auto sm) {
|
||||
return mutation_from_streamed_mutation(std::move(sm));
|
||||
BOOST_REQUIRE(cells.cell_at(s->get_column_definition("height")->id).as_atomic_cell().value() == bytes({0,0,0,(int8_t)170}));
|
||||
return read_mutation_from_flat_mutation_reader(*reader);
|
||||
}).then([reader, s] (mutation_opt m) {
|
||||
BOOST_REQUIRE(m);
|
||||
BOOST_REQUIRE(m->key().equal(*s, partition_key::from_singular(*s, data_value(sstring("tom")))));
|
||||
@@ -1150,10 +1147,8 @@ SEASTAR_TEST_CASE(compact) {
|
||||
BOOST_REQUIRE(!row.deleted_at());
|
||||
auto &cells = row.cells();
|
||||
BOOST_REQUIRE(cells.cell_at(s->get_column_definition("age")->id).as_atomic_cell().value() == bytes({0,0,0,20}));
|
||||
BOOST_REQUIRE(cells.cell_at(s->get_column_definition("height")->id).as_atomic_cell().value() == bytes({0,0,0,(char)180}));
|
||||
return (*reader)();
|
||||
}).then([] (auto sm) {
|
||||
return mutation_from_streamed_mutation(std::move(sm));
|
||||
BOOST_REQUIRE(cells.cell_at(s->get_column_definition("height")->id).as_atomic_cell().value() == bytes({0,0,0,(int8_t)180}));
|
||||
return read_mutation_from_flat_mutation_reader(*reader);
|
||||
}).then([reader, s] (mutation_opt m) {
|
||||
BOOST_REQUIRE(m);
|
||||
BOOST_REQUIRE(m->key().equal(*s, partition_key::from_singular(*s, data_value(sstring("john")))));
|
||||
@@ -1165,17 +1160,15 @@ SEASTAR_TEST_CASE(compact) {
|
||||
auto &cells = row.cells();
|
||||
BOOST_REQUIRE(cells.cell_at(s->get_column_definition("age")->id).as_atomic_cell().value() == bytes({0,0,0,20}));
|
||||
BOOST_REQUIRE(cells.find_cell(s->get_column_definition("height")->id) == nullptr);
|
||||
return (*reader)();
|
||||
}).then([] (auto sm) {
|
||||
return mutation_from_streamed_mutation(std::move(sm));
|
||||
return read_mutation_from_flat_mutation_reader(*reader);
|
||||
}).then([reader, s] (mutation_opt m) {
|
||||
BOOST_REQUIRE(m);
|
||||
BOOST_REQUIRE(m->key().equal(*s, partition_key::from_singular(*s, data_value(sstring("nadav")))));
|
||||
BOOST_REQUIRE(m->partition().partition_tombstone());
|
||||
auto &rows = m->partition().clustered_rows();
|
||||
BOOST_REQUIRE(rows.calculate_size() == 0);
|
||||
return (*reader)();
|
||||
}).then([reader] (streamed_mutation_opt m) {
|
||||
return read_mutation_from_flat_mutation_reader(*reader);
|
||||
}).then([reader] (mutation_opt m) {
|
||||
BOOST_REQUIRE(!m);
|
||||
});
|
||||
});
|
||||
@@ -1314,9 +1307,9 @@ static future<> check_compacted_sstables(unsigned long generation, std::vector<u
|
||||
auto reader = sstable_reader(sst, s); // reader holds sst and s alive.
|
||||
auto keys = make_lw_shared<std::vector<partition_key>>();
|
||||
|
||||
return do_with(std::move(reader), [generations, s, keys] (mutation_reader& reader) {
|
||||
return do_for_each(*generations, [&reader, s, keys] (unsigned long generation) mutable {
|
||||
return reader().then([generation, keys] (streamed_mutation_opt m) {
|
||||
return do_with(std::move(reader), [generations, s, keys] (flat_mutation_reader& reader) {
|
||||
return do_for_each(*generations, [&reader, keys] (unsigned long generation) mutable {
|
||||
return read_mutation_from_flat_mutation_reader(reader).then([generation, keys] (mutation_opt m) {
|
||||
BOOST_REQUIRE(m);
|
||||
keys->push_back(m->key());
|
||||
});
|
||||
@@ -1395,10 +1388,9 @@ SEASTAR_TEST_CASE(datafile_generation_37) {
|
||||
auto sst = make_sstable(s, "tests/sstables/tests-temporary", 37, la, big);
|
||||
return write_memtable_to_sstable(*mtp, sst).then([s] {
|
||||
return reusable_sst(s, "tests/sstables/tests-temporary", 37).then([s] (auto sstp) {
|
||||
return do_with(sstables::key("key1"), [sstp, s] (auto& key) {
|
||||
return sstp->read_row(s, key).then([] (auto sm) {
|
||||
return mutation_from_streamed_mutation(std::move(sm));
|
||||
}).then([sstp, s] (auto mutation) {
|
||||
return do_with(make_dkey(s, "key1"), [sstp, s] (auto& key) {
|
||||
auto rd = make_lw_shared<flat_mutation_reader>(sstp->read_row_flat(s, key));
|
||||
return read_mutation_from_flat_mutation_reader(*rd).then([sstp, s, rd] (auto mutation) {
|
||||
auto& mp = mutation->partition();
|
||||
|
||||
auto clustering = clustering_key_prefix::from_exploded(*s, {to_bytes("cl1")});
|
||||
@@ -1431,10 +1423,9 @@ SEASTAR_TEST_CASE(datafile_generation_38) {
|
||||
auto sst = make_sstable(s, "tests/sstables/tests-temporary", 38, la, big);
|
||||
return write_memtable_to_sstable(*mtp, sst).then([s] {
|
||||
return reusable_sst(s, "tests/sstables/tests-temporary", 38).then([s] (auto sstp) {
|
||||
return do_with(sstables::key("key1"), [sstp, s] (auto& key) {
|
||||
return sstp->read_row(s, key).then([] (auto sm) {
|
||||
return mutation_from_streamed_mutation(std::move(sm));
|
||||
}).then([sstp, s] (auto mutation) {
|
||||
return do_with(make_dkey(s, "key1"), [sstp, s] (auto& key) {
|
||||
auto rd = make_lw_shared<flat_mutation_reader>(sstp->read_row_flat(s, key));
|
||||
return read_mutation_from_flat_mutation_reader(*rd).then([sstp, s, rd] (auto mutation) {
|
||||
auto& mp = mutation->partition();
|
||||
auto clustering = clustering_key_prefix::from_exploded(*s, {to_bytes("cl1"), to_bytes("cl2")});
|
||||
|
||||
@@ -1468,10 +1459,9 @@ SEASTAR_TEST_CASE(datafile_generation_39) {
|
||||
auto sst = make_sstable(s, "tests/sstables/tests-temporary", 39, la, big);
|
||||
return write_memtable_to_sstable(*mtp, sst).then([s] {
|
||||
return reusable_sst(s, "tests/sstables/tests-temporary", 39).then([s] (auto sstp) {
|
||||
return do_with(sstables::key("key1"), [sstp, s] (auto& key) {
|
||||
return sstp->read_row(s, key).then([] (auto sm) {
|
||||
return mutation_from_streamed_mutation(std::move(sm));
|
||||
}).then([sstp, s] (auto mutation) {
|
||||
return do_with(make_dkey(s, "key1"), [sstp, s] (auto& key) {
|
||||
auto rd = make_lw_shared<flat_mutation_reader>(sstp->read_row_flat(s, key));
|
||||
return read_mutation_from_flat_mutation_reader(*rd).then([sstp, s, rd] (auto mutation) {
|
||||
auto& mp = mutation->partition();
|
||||
auto row = mp.clustered_row(*s, clustering_key::make_empty());
|
||||
match_live_cell(row.cells(), *s, "cl1", data_value(data_value(to_bytes("cl1"))));
|
||||
@@ -1565,10 +1555,9 @@ SEASTAR_TEST_CASE(datafile_generation_41) {
|
||||
auto sst = make_sstable(s, "tests/sstables/tests-temporary", 41, la, big);
|
||||
return write_memtable_to_sstable(*mt, sst).then([s, tomb] {
|
||||
return reusable_sst(s, "tests/sstables/tests-temporary", 41).then([s, tomb] (auto sstp) mutable {
|
||||
return do_with(sstables::key("key1"), [sstp, s, tomb] (auto& key) {
|
||||
return sstp->read_row(s, key).then([] (auto sm) {
|
||||
return mutation_from_streamed_mutation(std::move(sm));
|
||||
}).then([sstp, s, tomb] (auto mutation) {
|
||||
return do_with(make_dkey(s, "key1"), [sstp, s, tomb] (auto& key) {
|
||||
auto rd = make_lw_shared<flat_mutation_reader>(sstp->read_row_flat(s, key));
|
||||
return read_mutation_from_flat_mutation_reader(*rd).then([sstp, s, tomb, rd] (auto mutation) {
|
||||
auto& mp = mutation->partition();
|
||||
BOOST_REQUIRE(mp.clustered_rows().calculate_size() == 1);
|
||||
auto c_row = *(mp.clustered_rows().begin());
|
||||
@@ -1627,7 +1616,7 @@ SEASTAR_TEST_CASE(datafile_generation_47) {
|
||||
return reusable_sst(s, "tests/sstables/tests-temporary", 47).then([s] (auto sstp) mutable {
|
||||
auto reader = make_lw_shared(sstable_reader(sstp, s));
|
||||
return repeat([reader] {
|
||||
return (*reader)().then([] (streamed_mutation_opt m) {
|
||||
return (*reader)().then([] (mutation_fragment_opt m) {
|
||||
if (!m) {
|
||||
return make_ready_future<stop_iteration>(stop_iteration::yes);
|
||||
}
|
||||
@@ -1642,8 +1631,6 @@ SEASTAR_TEST_CASE(datafile_generation_47) {
|
||||
SEASTAR_TEST_CASE(test_counter_write) {
|
||||
return test_setup::do_with_test_directory([] {
|
||||
return seastar::async([] {
|
||||
storage_service_for_tests ssft;
|
||||
|
||||
auto s = schema_builder(some_keyspace, some_column_family)
|
||||
.with_column("p1", utf8_type, column_kind::partition_key)
|
||||
.with_column("c1", utf8_type, column_kind::clustering_key)
|
||||
@@ -2311,6 +2298,7 @@ SEASTAR_TEST_CASE(check_read_indexes) {
|
||||
SEASTAR_TEST_CASE(tombstone_purge_test) {
|
||||
BOOST_REQUIRE(smp::count == 1);
|
||||
return seastar::async([] {
|
||||
storage_service_for_tests ssft;
|
||||
cell_locker_stats cl_stats;
|
||||
|
||||
// In a column family with gc_grace_seconds set to 0, check that a tombstone
|
||||
@@ -2347,7 +2335,7 @@ SEASTAR_TEST_CASE(tombstone_purge_test) {
|
||||
return m;
|
||||
};
|
||||
|
||||
auto make_expiring = [&] (partition_key key, bool ttl) {
|
||||
auto make_expiring = [&] (partition_key key, int ttl) {
|
||||
mutation m(key, s);
|
||||
m.set_clustered_cell(clustering_key::make_empty(), bytes("value"), data_value(int32_t(1)),
|
||||
gc_clock::now().time_since_epoch().count(), gc_clock::duration(ttl));
|
||||
@@ -2363,9 +2351,7 @@ SEASTAR_TEST_CASE(tombstone_purge_test) {
|
||||
|
||||
auto assert_that_produces_dead_cell = [&] (auto& sst, partition_key& key) {
|
||||
auto reader = make_lw_shared(sstable_reader(sst, s));
|
||||
(*reader)().then([&key] (auto sm) {
|
||||
return mutation_from_streamed_mutation(std::move(sm));
|
||||
}).then([reader, s, &key] (mutation_opt m) {
|
||||
read_mutation_from_flat_mutation_reader(*reader).then([reader, s, &key] (mutation_opt m) {
|
||||
BOOST_REQUIRE(m);
|
||||
BOOST_REQUIRE(m->key().equal(*s, key));
|
||||
auto& rows = m->partition().clustered_rows();
|
||||
@@ -2375,7 +2361,7 @@ SEASTAR_TEST_CASE(tombstone_purge_test) {
|
||||
BOOST_REQUIRE_EQUAL(cells.size(), 1);
|
||||
BOOST_REQUIRE(!cells.cell_at(s->get_column_definition("value")->id).as_atomic_cell().is_live());
|
||||
return (*reader)();
|
||||
}).then([reader, s] (streamed_mutation_opt m) {
|
||||
}).then([reader, s] (mutation_fragment_opt m) {
|
||||
BOOST_REQUIRE(!m);
|
||||
}).get();
|
||||
};
|
||||
@@ -2383,6 +2369,8 @@ SEASTAR_TEST_CASE(tombstone_purge_test) {
|
||||
auto alpha = partition_key::from_exploded(*s, {to_bytes("alpha")});
|
||||
auto beta = partition_key::from_exploded(*s, {to_bytes("beta")});
|
||||
|
||||
auto ttl = 5;
|
||||
|
||||
{
|
||||
auto mut1 = make_insert(alpha);
|
||||
auto mut2 = make_insert(beta);
|
||||
@@ -2393,7 +2381,7 @@ SEASTAR_TEST_CASE(tombstone_purge_test) {
|
||||
make_sstable_containing(sst_gen, {mut3})
|
||||
};
|
||||
|
||||
forward_jump_clocks(std::chrono::seconds(1));
|
||||
forward_jump_clocks(std::chrono::seconds(ttl));
|
||||
|
||||
auto result = compact(sstables, sstables);
|
||||
BOOST_REQUIRE_EQUAL(1, result.size());
|
||||
@@ -2411,7 +2399,7 @@ SEASTAR_TEST_CASE(tombstone_purge_test) {
|
||||
auto sst1 = make_sstable_containing(sst_gen, {mut1});
|
||||
auto sst2 = make_sstable_containing(sst_gen, {mut2, mut3});
|
||||
|
||||
forward_jump_clocks(std::chrono::seconds(1));
|
||||
forward_jump_clocks(std::chrono::seconds(ttl));
|
||||
|
||||
auto result = compact({sst1, sst2}, {sst2});
|
||||
BOOST_REQUIRE_EQUAL(1, result.size());
|
||||
@@ -2430,7 +2418,7 @@ SEASTAR_TEST_CASE(tombstone_purge_test) {
|
||||
auto sst1 = make_sstable_containing(sst_gen, {mut1, mut2, mut3});
|
||||
auto sst2 = make_sstable_containing(sst_gen, {mut4});
|
||||
|
||||
forward_jump_clocks(std::chrono::seconds(1));
|
||||
forward_jump_clocks(std::chrono::seconds(ttl));
|
||||
|
||||
auto result = compact({sst1, sst2}, {sst1});
|
||||
BOOST_REQUIRE_EQUAL(1, result.size());
|
||||
@@ -2449,7 +2437,7 @@ SEASTAR_TEST_CASE(tombstone_purge_test) {
|
||||
auto sst1 = make_sstable_containing(sst_gen, {mut1, mut2, mut3});
|
||||
auto sst2 = make_sstable_containing(sst_gen, {mut4});
|
||||
|
||||
forward_jump_clocks(std::chrono::seconds(1));
|
||||
forward_jump_clocks(std::chrono::seconds(ttl));
|
||||
|
||||
auto result = compact({sst1, sst2}, {sst1});
|
||||
BOOST_REQUIRE_EQUAL(1, result.size());
|
||||
@@ -2462,12 +2450,12 @@ SEASTAR_TEST_CASE(tombstone_purge_test) {
|
||||
{
|
||||
// check that expired cell will not be purged if it will ressurect overwritten data.
|
||||
auto mut1 = make_insert(alpha);
|
||||
auto mut2 = make_expiring(alpha, 1);
|
||||
auto mut2 = make_expiring(alpha, ttl);
|
||||
|
||||
auto sst1 = make_sstable_containing(sst_gen, {mut1});
|
||||
auto sst2 = make_sstable_containing(sst_gen, {mut2});
|
||||
|
||||
forward_jump_clocks(std::chrono::seconds(5));
|
||||
forward_jump_clocks(std::chrono::seconds(ttl));
|
||||
|
||||
auto result = compact({sst1, sst2}, {sst2});
|
||||
BOOST_REQUIRE_EQUAL(1, result.size());
|
||||
@@ -2478,25 +2466,25 @@ SEASTAR_TEST_CASE(tombstone_purge_test) {
|
||||
}
|
||||
{
|
||||
auto mut1 = make_insert(alpha);
|
||||
auto mut2 = make_expiring(beta, 1);
|
||||
auto mut2 = make_expiring(beta, ttl);
|
||||
|
||||
auto sst1 = make_sstable_containing(sst_gen, {mut1});
|
||||
auto sst2 = make_sstable_containing(sst_gen, {mut2});
|
||||
|
||||
forward_jump_clocks(std::chrono::seconds(5));
|
||||
forward_jump_clocks(std::chrono::seconds(ttl));
|
||||
|
||||
auto result = compact({sst1, sst2}, {sst2});
|
||||
BOOST_REQUIRE_EQUAL(0, result.size());
|
||||
}
|
||||
{
|
||||
auto mut1 = make_insert(alpha);
|
||||
auto mut2 = make_expiring(alpha, 1);
|
||||
auto mut2 = make_expiring(alpha, ttl);
|
||||
auto mut3 = make_insert(beta);
|
||||
|
||||
auto sst1 = make_sstable_containing(sst_gen, {mut1});
|
||||
auto sst2 = make_sstable_containing(sst_gen, {mut2, mut3});
|
||||
|
||||
forward_jump_clocks(std::chrono::seconds(5));
|
||||
forward_jump_clocks(std::chrono::seconds(ttl));
|
||||
|
||||
auto result = compact({sst1, sst2}, {sst1, sst2});
|
||||
BOOST_REQUIRE_EQUAL(1, result.size());
|
||||
@@ -2536,9 +2524,7 @@ SEASTAR_TEST_CASE(check_multi_schema) {
|
||||
auto f = sst->load();
|
||||
return f.then([sst, s] {
|
||||
auto reader = make_lw_shared(sstable_reader(sst, s));
|
||||
return (*reader)().then([] (auto sm) {
|
||||
return mutation_from_streamed_mutation(std::move(sm));
|
||||
}).then([reader, s] (mutation_opt m) {
|
||||
return read_mutation_from_flat_mutation_reader(*reader).then([reader, s] (mutation_opt m) {
|
||||
BOOST_REQUIRE(m);
|
||||
BOOST_REQUIRE(m->key().equal(*s, partition_key::from_singular(*s, 0)));
|
||||
auto& rows = m->partition().clustered_rows();
|
||||
@@ -2549,7 +2535,7 @@ SEASTAR_TEST_CASE(check_multi_schema) {
|
||||
BOOST_REQUIRE_EQUAL(cells.size(), 1);
|
||||
BOOST_REQUIRE_EQUAL(cells.cell_at(s->get_column_definition("e")->id).as_atomic_cell().value(), int32_type->decompose(5));
|
||||
return (*reader)();
|
||||
}).then([reader, s] (streamed_mutation_opt m) {
|
||||
}).then([reader, s] (mutation_fragment_opt m) {
|
||||
BOOST_REQUIRE(!m);
|
||||
});
|
||||
});
|
||||
@@ -2599,12 +2585,14 @@ SEASTAR_TEST_CASE(sstable_rewrite) {
|
||||
auto newsst = (*new_tables)[0];
|
||||
BOOST_REQUIRE(newsst->generation() == 52);
|
||||
auto reader = make_lw_shared(sstable_reader(newsst, s));
|
||||
return (*reader)().then([s, reader, key] (streamed_mutation_opt m) {
|
||||
return (*reader)().then([s, reader, key] (mutation_fragment_opt m) {
|
||||
BOOST_REQUIRE(m);
|
||||
BOOST_REQUIRE(m->is_partition_start());
|
||||
auto pkey = partition_key::from_exploded(*s, {to_bytes(key)});
|
||||
BOOST_REQUIRE(m->key().equal(*s, pkey));
|
||||
BOOST_REQUIRE(m->as_partition_start().key().key().equal(*s, pkey));
|
||||
reader->next_partition();
|
||||
return (*reader)();
|
||||
}).then([reader] (streamed_mutation_opt m) {
|
||||
}).then([reader] (mutation_fragment_opt m) {
|
||||
BOOST_REQUIRE(!m);
|
||||
});
|
||||
}).then([cm, cf, cl_stats] {});
|
||||
@@ -2615,22 +2603,24 @@ SEASTAR_TEST_CASE(sstable_rewrite) {
|
||||
void test_sliced_read_row_presence(shared_sstable sst, schema_ptr s, const query::partition_slice& ps,
|
||||
std::vector<std::pair<partition_key, std::vector<clustering_key>>> expected)
|
||||
{
|
||||
auto reader = sst->as_mutation_source()(s, query::full_partition_range, ps);
|
||||
auto reader = sst->as_mutation_source().make_flat_mutation_reader(s, query::full_partition_range, ps);
|
||||
|
||||
partition_key::equality pk_eq(*s);
|
||||
clustering_key::equality ck_eq(*s);
|
||||
|
||||
auto smopt = reader().get0();
|
||||
while (smopt) {
|
||||
auto mfopt = reader().get0();
|
||||
while (mfopt) {
|
||||
BOOST_REQUIRE(mfopt->is_partition_start());
|
||||
auto it = std::find_if(expected.begin(), expected.end(), [&] (auto&& x) {
|
||||
return pk_eq(x.first, smopt->key());
|
||||
return pk_eq(x.first, mfopt->as_partition_start().key().key());
|
||||
});
|
||||
BOOST_REQUIRE(it != expected.end());
|
||||
auto expected_cr = std::move(it->second);
|
||||
expected.erase(it);
|
||||
|
||||
auto mfopt = (*smopt)().get0();
|
||||
while (mfopt) {
|
||||
mfopt = reader().get0();
|
||||
BOOST_REQUIRE(mfopt);
|
||||
while (!mfopt->is_end_of_partition()) {
|
||||
if (mfopt->is_clustering_row()) {
|
||||
auto& cr = mfopt->as_clustering_row();
|
||||
auto it = std::find_if(expected_cr.begin(), expected_cr.end(), [&] (auto&& x) {
|
||||
@@ -2642,11 +2632,12 @@ void test_sliced_read_row_presence(shared_sstable sst, schema_ptr s, const query
|
||||
BOOST_REQUIRE(it != expected_cr.end());
|
||||
expected_cr.erase(it);
|
||||
}
|
||||
mfopt = (*smopt)().get0();
|
||||
mfopt = reader().get0();
|
||||
BOOST_REQUIRE(mfopt);
|
||||
}
|
||||
BOOST_REQUIRE(expected_cr.empty());
|
||||
|
||||
smopt = reader().get0();
|
||||
mfopt = reader().get0();
|
||||
}
|
||||
BOOST_REQUIRE(expected.empty());
|
||||
}
|
||||
@@ -2767,16 +2758,16 @@ SEASTAR_TEST_CASE(test_wrong_range_tombstone_order) {
|
||||
.with_column("r", int32_type)
|
||||
.build();
|
||||
clustering_key::equality ck_eq(*s);
|
||||
auto pkey = partition_key::from_exploded(*s, { int32_type->decompose(0) });
|
||||
auto dkey = dht::global_partitioner().decorate_key(*s, std::move(pkey));
|
||||
|
||||
auto sst = make_sstable(s, "tests/sstables/wrong_range_tombstone_order", 1, sstables::sstable::version_types::ka, big);
|
||||
sst->load().get0();
|
||||
auto reader = sstable_reader(sst, s);
|
||||
|
||||
auto smopt = reader().get0();
|
||||
BOOST_REQUIRE(smopt);
|
||||
|
||||
using kind = mutation_fragment::kind;
|
||||
assert_that_stream(std::move(*smopt))
|
||||
assert_that(std::move(reader))
|
||||
.produces_partition_start(dkey)
|
||||
.produces(kind::range_tombstone, { 0 })
|
||||
.produces(kind::clustering_row, { 1 })
|
||||
.produces(kind::clustering_row, { 1, 1 })
|
||||
@@ -2792,10 +2783,8 @@ SEASTAR_TEST_CASE(test_wrong_range_tombstone_order) {
|
||||
.produces(kind::range_tombstone, { 2, 1 })
|
||||
.produces(kind::range_tombstone, { 2, 2 })
|
||||
.produces(kind::range_tombstone, { 2, 2 })
|
||||
.produces_partition_end()
|
||||
.produces_end_of_stream();
|
||||
|
||||
smopt = reader().get0();
|
||||
BOOST_REQUIRE(!smopt);
|
||||
});
|
||||
}
|
||||
|
||||
@@ -2839,11 +2828,11 @@ SEASTAR_TEST_CASE(test_counter_read) {
|
||||
sst->load().get();
|
||||
auto reader = sstable_reader(sst, s);
|
||||
|
||||
auto smopt = reader().get0();
|
||||
BOOST_REQUIRE(smopt);
|
||||
auto& sm = *smopt;
|
||||
auto mfopt = reader().get0();
|
||||
BOOST_REQUIRE(mfopt);
|
||||
BOOST_REQUIRE(mfopt->is_partition_start());
|
||||
|
||||
auto mfopt = sm().get0();
|
||||
mfopt = reader().get0();
|
||||
BOOST_REQUIRE(mfopt);
|
||||
BOOST_REQUIRE(mfopt->is_clustering_row());
|
||||
const clustering_row* cr = &mfopt->as_clustering_row();
|
||||
@@ -2871,7 +2860,7 @@ SEASTAR_TEST_CASE(test_counter_read) {
|
||||
}
|
||||
});
|
||||
|
||||
mfopt = sm().get0();
|
||||
mfopt = reader().get0();
|
||||
BOOST_REQUIRE(mfopt);
|
||||
BOOST_REQUIRE(mfopt->is_clustering_row());
|
||||
cr = &mfopt->as_clustering_row();
|
||||
@@ -2884,11 +2873,12 @@ SEASTAR_TEST_CASE(test_counter_read) {
|
||||
}
|
||||
});
|
||||
|
||||
mfopt = sm().get0();
|
||||
BOOST_REQUIRE(!mfopt);
|
||||
mfopt = reader().get0();
|
||||
BOOST_REQUIRE(mfopt);
|
||||
BOOST_REQUIRE(mfopt->is_end_of_partition());
|
||||
|
||||
smopt = reader().get0();
|
||||
BOOST_REQUIRE(!smopt);
|
||||
mfopt = reader().get0();
|
||||
BOOST_REQUIRE(!mfopt);
|
||||
});
|
||||
}
|
||||
|
||||
@@ -3127,6 +3117,7 @@ SEASTAR_TEST_CASE(time_window_strategy_correctness_test) {
|
||||
using namespace std::chrono;
|
||||
|
||||
return seastar::async([] {
|
||||
storage_service_for_tests ssft;
|
||||
auto s = schema_builder("tests", "time_window_strategy")
|
||||
.with_column("id", utf8_type, column_kind::partition_key)
|
||||
.with_column("value", int32_type).build();
|
||||
@@ -3258,15 +3249,17 @@ SEASTAR_TEST_CASE(test_promoted_index_read) {
|
||||
auto sst = make_sstable(s, "tests/sstables/promoted_index_read", 1, sstables::sstable::version_types::ka, big);
|
||||
sst->load().get0();
|
||||
|
||||
auto rd = sstable_reader(sst, s);
|
||||
auto smopt = rd().get0();
|
||||
BOOST_REQUIRE(smopt);
|
||||
auto pkey = partition_key::from_exploded(*s, { int32_type->decompose(0) });
|
||||
auto dkey = dht::global_partitioner().decorate_key(*s, std::move(pkey));
|
||||
|
||||
auto rd = sstable_reader(sst, s);
|
||||
using kind = mutation_fragment::kind;
|
||||
assert_that_stream(std::move(*smopt))
|
||||
assert_that(std::move(rd))
|
||||
.produces_partition_start(dkey)
|
||||
.produces(kind::range_tombstone, { 0 })
|
||||
.produces(kind::clustering_row, { 0, 0 })
|
||||
.produces(kind::clustering_row, { 0, 1 })
|
||||
.produces_partition_end()
|
||||
.produces_end_of_stream();
|
||||
});
|
||||
}
|
||||
@@ -3324,6 +3317,7 @@ static void test_min_max_clustering_key(schema_ptr s, std::vector<bytes> explode
|
||||
|
||||
SEASTAR_TEST_CASE(min_max_clustering_key_test) {
|
||||
return seastar::async([] {
|
||||
storage_service_for_tests ssft;
|
||||
{
|
||||
auto s = schema_builder("ks", "cf")
|
||||
.with_column("pk", utf8_type, column_kind::partition_key)
|
||||
@@ -3371,6 +3365,7 @@ SEASTAR_TEST_CASE(min_max_clustering_key_test) {
|
||||
|
||||
SEASTAR_TEST_CASE(min_max_clustering_key_test_2) {
|
||||
return seastar::async([] {
|
||||
storage_service_for_tests ssft;
|
||||
auto s = schema_builder("ks", "cf")
|
||||
.with_column("pk", utf8_type, column_kind::partition_key)
|
||||
.with_column("ck1", utf8_type, column_kind::clustering_key)
|
||||
@@ -3419,6 +3414,7 @@ SEASTAR_TEST_CASE(min_max_clustering_key_test_2) {
|
||||
|
||||
SEASTAR_TEST_CASE(sstable_tombstone_metadata_check) {
|
||||
return seastar::async([] {
|
||||
storage_service_for_tests ssft;
|
||||
auto s = schema_builder("ks", "cf")
|
||||
.with_column("pk", utf8_type, column_kind::partition_key)
|
||||
.with_column("ck1", utf8_type, column_kind::clustering_key)
|
||||
@@ -3572,22 +3568,17 @@ SEASTAR_TEST_CASE(test_partition_skipping) {
|
||||
|
||||
// Must be run in a seastar thread
|
||||
static
|
||||
shared_sstable make_sstable(sstring path, schema_ptr s, mutation_reader rd, sstable_writer_config cfg) {
|
||||
shared_sstable make_sstable(sstring path, flat_mutation_reader rd, sstable_writer_config cfg) {
|
||||
auto s = rd.schema();
|
||||
auto sst = make_sstable(s, path, 1, sstables::sstable::version_types::ka, big);
|
||||
sst->write_components(std::move(rd), 1, s, cfg).get();
|
||||
sst->load().get();
|
||||
return sst;
|
||||
}
|
||||
|
||||
// Must be run in a seastar thread
|
||||
static
|
||||
shared_sstable make_sstable(sstring path, streamed_mutation sm, sstable_writer_config cfg) {
|
||||
auto s = sm.schema();
|
||||
return make_sstable(path, s, make_reader_returning(std::move(sm)), cfg);
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_repeated_tombstone_skipping) {
|
||||
return seastar::async([] {
|
||||
storage_service_for_tests ssft;
|
||||
simple_schema table;
|
||||
|
||||
std::vector<mutation_fragment> fragments;
|
||||
@@ -3619,7 +3610,11 @@ SEASTAR_TEST_CASE(test_repeated_tombstone_skipping) {
|
||||
tmpdir dir;
|
||||
sstable_writer_config cfg;
|
||||
cfg.promoted_index_block_size = 100;
|
||||
auto sst = make_sstable(dir.path, streamed_mutation_returning(table.schema(), table.make_pkey("pk"), std::move(fragments)), cfg);
|
||||
auto mut = mutation(table.make_pkey("key"), table.schema());
|
||||
for (auto&& mf : fragments) {
|
||||
mut.apply(mf);
|
||||
}
|
||||
auto sst = make_sstable(dir.path, flat_mutation_reader_from_mutations({ std::move(mut) }, streamed_mutation::forwarding::no), cfg);
|
||||
auto ms = as_mutation_source(sst);
|
||||
|
||||
for (uint32_t i = 3; i < seq; i++) {
|
||||
@@ -3632,10 +3627,8 @@ SEASTAR_TEST_CASE(test_repeated_tombstone_skipping) {
|
||||
.with_range(query::clustering_range::make_singular(ck2))
|
||||
.with_range(query::clustering_range::make_singular(ck3))
|
||||
.build();
|
||||
mutation_reader rd = ms(table.schema(), query::full_partition_range, slice);
|
||||
streamed_mutation_opt smo = rd().get0();
|
||||
BOOST_REQUIRE(bool(smo));
|
||||
assert_that_stream(std::move(*smo)).has_monotonic_positions();
|
||||
flat_mutation_reader rd = ms.make_flat_mutation_reader(table.schema(), query::full_partition_range, slice);
|
||||
assert_that(std::move(rd)).has_monotonic_positions();
|
||||
}
|
||||
});
|
||||
}
|
||||
@@ -3655,6 +3648,7 @@ uint64_t consume_all(streamed_mutation& sm) {
|
||||
|
||||
SEASTAR_TEST_CASE(test_skipping_using_index) {
|
||||
return seastar::async([] {
|
||||
storage_service_for_tests ssft;
|
||||
simple_schema table;
|
||||
|
||||
const unsigned rows_per_part = 10;
|
||||
@@ -3681,10 +3675,10 @@ SEASTAR_TEST_CASE(test_skipping_using_index) {
|
||||
tmpdir dir;
|
||||
sstable_writer_config cfg;
|
||||
cfg.promoted_index_block_size = 1; // So that every fragment is indexed
|
||||
auto sst = make_sstable(dir.path, table.schema(), make_reader_returning_many(partitions), cfg);
|
||||
auto sst = make_sstable(dir.path, flat_mutation_reader_from_mutations(partitions, streamed_mutation::forwarding::no), cfg);
|
||||
|
||||
auto ms = as_mutation_source(sst);
|
||||
auto rd = ms(table.schema(),
|
||||
auto rd = ms.make_flat_mutation_reader(table.schema(),
|
||||
query::full_partition_range,
|
||||
table.schema()->full_slice(),
|
||||
default_priority_class(),
|
||||
@@ -3692,33 +3686,35 @@ SEASTAR_TEST_CASE(test_skipping_using_index) {
|
||||
streamed_mutation::forwarding::yes,
|
||||
mutation_reader::forwarding::yes);
|
||||
|
||||
auto assertions = assert_that(std::move(rd));
|
||||
// Consume first partition completely so that index is stale
|
||||
{
|
||||
streamed_mutation_opt smo = rd().get0();
|
||||
BOOST_REQUIRE(bool(smo));
|
||||
BOOST_REQUIRE_EQUAL(0, consume_all(*smo));
|
||||
smo->fast_forward_to(position_range::all_clustered_rows()).get();
|
||||
BOOST_REQUIRE_EQUAL(rows_per_part, consume_all(*smo));
|
||||
assertions
|
||||
.produces_partition_start(keys[0])
|
||||
.fast_forward_to(position_range::all_clustered_rows());
|
||||
for (auto i = 0; i < rows_per_part; i++) {
|
||||
assertions.produces_row_with_key(table.make_ckey(i));
|
||||
}
|
||||
assertions.produces_end_of_stream();
|
||||
}
|
||||
|
||||
{
|
||||
auto base = rows_per_part;
|
||||
streamed_mutation_opt smo = rd().get0();
|
||||
BOOST_REQUIRE(bool(smo));
|
||||
|
||||
assert_that_stream(std::move(*smo))
|
||||
.fwd_to(position_range(
|
||||
assertions
|
||||
.next_partition()
|
||||
.produces_partition_start(keys[1])
|
||||
.fast_forward_to(position_range(
|
||||
position_in_partition::for_key(table.make_ckey(base)),
|
||||
position_in_partition::for_key(table.make_ckey(base + 3))))
|
||||
.produces_row_with_key(table.make_ckey(base))
|
||||
.produces_row_with_key(table.make_ckey(base + 1))
|
||||
.produces_row_with_key(table.make_ckey(base + 2))
|
||||
.fwd_to(position_range(
|
||||
.fast_forward_to(position_range(
|
||||
position_in_partition::for_key(table.make_ckey(base + 5)),
|
||||
position_in_partition::for_key(table.make_ckey(base + 6))))
|
||||
.produces_row_with_key(table.make_ckey(base + 5))
|
||||
.produces_end_of_stream()
|
||||
.fwd_to(position_range(
|
||||
.fast_forward_to(position_range(
|
||||
position_in_partition::for_key(table.make_ckey(base + rows_per_part)), // Skip all rows in current partition
|
||||
position_in_partition::after_all_clustered_rows()))
|
||||
.produces_end_of_stream();
|
||||
@@ -3727,17 +3723,16 @@ SEASTAR_TEST_CASE(test_skipping_using_index) {
|
||||
// Consume few fragments then skip
|
||||
{
|
||||
auto base = rows_per_part * 2;
|
||||
streamed_mutation_opt smo = rd().get0();
|
||||
BOOST_REQUIRE(bool(smo));
|
||||
|
||||
assert_that_stream(std::move(*smo))
|
||||
.fwd_to(position_range(
|
||||
assertions
|
||||
.next_partition()
|
||||
.produces_partition_start(keys[2])
|
||||
.fast_forward_to(position_range(
|
||||
position_in_partition::for_key(table.make_ckey(base)),
|
||||
position_in_partition::for_key(table.make_ckey(base + 3))))
|
||||
.produces_row_with_key(table.make_ckey(base))
|
||||
.produces_row_with_key(table.make_ckey(base + 1))
|
||||
.produces_row_with_key(table.make_ckey(base + 2))
|
||||
.fwd_to(position_range(
|
||||
.fast_forward_to(position_range(
|
||||
position_in_partition::for_key(table.make_ckey(base + rows_per_part - 1)), // last row
|
||||
position_in_partition::after_all_clustered_rows()))
|
||||
.produces_row_with_key(table.make_ckey(base + rows_per_part - 1))
|
||||
@@ -3746,17 +3741,18 @@ SEASTAR_TEST_CASE(test_skipping_using_index) {
|
||||
|
||||
// Consume nothing from the next partition
|
||||
{
|
||||
streamed_mutation_opt smo = rd().get0();
|
||||
BOOST_REQUIRE(bool(smo));
|
||||
assertions
|
||||
.next_partition()
|
||||
.produces_partition_start(keys[3])
|
||||
.next_partition();
|
||||
}
|
||||
|
||||
{
|
||||
auto base = rows_per_part * 4;
|
||||
streamed_mutation_opt smo = rd().get0();
|
||||
BOOST_REQUIRE(bool(smo));
|
||||
|
||||
assert_that_stream(std::move(*smo))
|
||||
.fwd_to(position_range(
|
||||
assertions
|
||||
.next_partition()
|
||||
.produces_partition_start(keys[4])
|
||||
.fast_forward_to(position_range(
|
||||
position_in_partition::for_key(table.make_ckey(base + rows_per_part - 1)), // last row
|
||||
position_in_partition::after_all_clustered_rows()))
|
||||
.produces_row_with_key(table.make_ckey(base + rows_per_part - 1))
|
||||
@@ -3921,6 +3917,7 @@ SEASTAR_TEST_CASE(sstable_resharding_strategy_tests) {
|
||||
|
||||
SEASTAR_TEST_CASE(sstable_tombstone_histogram_test) {
|
||||
return seastar::async([] {
|
||||
storage_service_for_tests ssft;
|
||||
auto builder = schema_builder("tests", "tombstone_histogram_test")
|
||||
.with_column("id", utf8_type, column_kind::partition_key)
|
||||
.with_column("value", int32_type);
|
||||
@@ -3977,6 +3974,7 @@ SEASTAR_TEST_CASE(sstable_bad_tombstone_histogram_test) {
|
||||
|
||||
SEASTAR_TEST_CASE(sstable_expired_data_ratio) {
|
||||
return seastar::async([] {
|
||||
storage_service_for_tests ssft;
|
||||
auto tmp = make_lw_shared<tmpdir>();
|
||||
auto s = make_lw_shared(schema({}, some_keyspace, some_column_family,
|
||||
{{"p1", utf8_type}}, {{"c1", utf8_type}}, {{"r1", utf8_type}}, {}, utf8_type));
|
||||
@@ -4081,6 +4079,7 @@ SEASTAR_TEST_CASE(sstable_expired_data_ratio) {
|
||||
|
||||
SEASTAR_TEST_CASE(sstable_owner_shards) {
|
||||
return seastar::async([] {
|
||||
storage_service_for_tests ssft;
|
||||
cell_locker_stats cl_stats;
|
||||
|
||||
auto builder = schema_builder("tests", "test")
|
||||
@@ -4152,6 +4151,7 @@ SEASTAR_TEST_CASE(sstable_owner_shards) {
|
||||
|
||||
SEASTAR_TEST_CASE(test_summary_entry_spanning_more_keys_than_min_interval) {
|
||||
return seastar::async([] {
|
||||
storage_service_for_tests ssft;
|
||||
auto s = make_lw_shared(schema({}, some_keyspace, some_column_family,
|
||||
{{"p1", int32_type}}, {{"c1", utf8_type}}, {{"r1", int32_type}}, {}, utf8_type));
|
||||
|
||||
@@ -4179,7 +4179,7 @@ SEASTAR_TEST_CASE(test_summary_entry_spanning_more_keys_than_min_interval) {
|
||||
|
||||
std::set<mutation, mutation_decorated_key_less_comparator> merged;
|
||||
merged.insert(mutations.begin(), mutations.end());
|
||||
auto rd = assert_that(sst->as_mutation_source()(s));
|
||||
auto rd = assert_that(sst->as_mutation_source().make_flat_mutation_reader(s, query::full_partition_range));
|
||||
auto keys_read = 0;
|
||||
for (auto&& m : merged) {
|
||||
keys_read++;
|
||||
@@ -4189,7 +4189,7 @@ SEASTAR_TEST_CASE(test_summary_entry_spanning_more_keys_than_min_interval) {
|
||||
BOOST_REQUIRE(keys_read == keys_written);
|
||||
|
||||
auto r = dht::partition_range::make({mutations.back().decorated_key(), true}, {mutations.back().decorated_key(), true});
|
||||
assert_that(sst->as_mutation_source()(s, r))
|
||||
assert_that(sst->as_mutation_source().make_flat_mutation_reader(s, r))
|
||||
.produces(slice(mutations, r))
|
||||
.produces_end_of_stream();
|
||||
});
|
||||
@@ -4244,25 +4244,29 @@ SEASTAR_TEST_CASE(test_wrong_counter_shard_order) {
|
||||
};
|
||||
|
||||
{
|
||||
auto smopt = reader().get0();
|
||||
BOOST_REQUIRE(smopt);
|
||||
auto& sm = *smopt;
|
||||
verify_row(sm().get0(), 28545);
|
||||
verify_row(sm().get0(), 27967);
|
||||
verify_row(sm().get0(), 28342);
|
||||
verify_row(sm().get0(), 28325);
|
||||
BOOST_REQUIRE(!sm().get0());
|
||||
auto mfopt = reader().get0();
|
||||
BOOST_REQUIRE(mfopt);
|
||||
BOOST_REQUIRE(mfopt->is_partition_start());
|
||||
verify_row(reader().get0(), 28545);
|
||||
verify_row(reader().get0(), 27967);
|
||||
verify_row(reader().get0(), 28342);
|
||||
verify_row(reader().get0(), 28325);
|
||||
mfopt = reader().get0();
|
||||
BOOST_REQUIRE(mfopt);
|
||||
BOOST_REQUIRE(mfopt->is_end_of_partition());
|
||||
}
|
||||
|
||||
{
|
||||
auto smopt = reader().get0();
|
||||
BOOST_REQUIRE(smopt);
|
||||
auto& sm = *smopt;
|
||||
verify_row(sm().get0(), 28386);
|
||||
verify_row(sm().get0(), 28378);
|
||||
verify_row(sm().get0(), 28129);
|
||||
verify_row(sm().get0(), 28260);
|
||||
BOOST_REQUIRE(!sm().get0());
|
||||
auto mfopt = reader().get0();
|
||||
BOOST_REQUIRE(mfopt);
|
||||
BOOST_REQUIRE(mfopt->is_partition_start());
|
||||
verify_row(reader().get0(), 28386);
|
||||
verify_row(reader().get0(), 28378);
|
||||
verify_row(reader().get0(), 28129);
|
||||
verify_row(reader().get0(), 28260);
|
||||
mfopt = reader().get0();
|
||||
BOOST_REQUIRE(mfopt);
|
||||
BOOST_REQUIRE(mfopt->is_end_of_partition());
|
||||
}
|
||||
|
||||
BOOST_REQUIRE(!reader().get0());
|
||||
@@ -4271,6 +4275,7 @@ SEASTAR_TEST_CASE(test_wrong_counter_shard_order) {
|
||||
|
||||
SEASTAR_TEST_CASE(compaction_correctness_with_partitioned_sstable_set) {
|
||||
return seastar::async([] {
|
||||
storage_service_for_tests ssft;
|
||||
cell_locker_stats cl_stats;
|
||||
|
||||
auto builder = schema_builder("tests", "tombstone_purge")
|
||||
@@ -4330,3 +4335,106 @@ SEASTAR_TEST_CASE(compaction_correctness_with_partitioned_sstable_set) {
|
||||
.produces_end_of_stream();
|
||||
});
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_broken_promoted_index_is_skipped) {
|
||||
// create table ks.test (pk int, ck int, v int, primary key(pk, ck)) with compact storage;
|
||||
//
|
||||
// Populated with:
|
||||
//
|
||||
// insert into ks.test (pk, ck, v) values (1, 1, 1);
|
||||
// insert into ks.test (pk, ck, v) values (1, 2, 1);
|
||||
// insert into ks.test (pk, ck, v) values (1, 3, 1);
|
||||
// delete from ks.test where pk = 1 and ck = 2;
|
||||
return seastar::async([] {
|
||||
auto s = schema_builder("ks", "test")
|
||||
.with_column("pk", int32_type, column_kind::partition_key)
|
||||
.with_column("ck", int32_type, column_kind::clustering_key)
|
||||
.with_column("v", int32_type)
|
||||
.build(schema_builder::compact_storage::yes);
|
||||
|
||||
auto sst = sstables::make_sstable(s, "tests/sstables/broken_non_compound_pi_and_range_tombstone", 1, sstables::sstable::version_types::ka, big);
|
||||
sst->load().get0();
|
||||
|
||||
{
|
||||
assert_that(sst->get_index_reader(default_priority_class())).is_empty(*s);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_old_format_non_compound_range_tombstone_is_read) {
|
||||
// create table ks.test (pk int, ck int, v int, primary key(pk, ck)) with compact storage;
|
||||
//
|
||||
// Populated with:
|
||||
//
|
||||
// insert into ks.test (pk, ck, v) values (1, 1, 1);
|
||||
// insert into ks.test (pk, ck, v) values (1, 2, 1);
|
||||
// insert into ks.test (pk, ck, v) values (1, 3, 1);
|
||||
// delete from ks.test where pk = 1 and ck = 2;
|
||||
return seastar::async([] {
|
||||
auto s = schema_builder("ks", "test")
|
||||
.with_column("pk", int32_type, column_kind::partition_key)
|
||||
.with_column("ck", int32_type, column_kind::clustering_key)
|
||||
.with_column("v", int32_type)
|
||||
.build(schema_builder::compact_storage::yes);
|
||||
|
||||
auto sst = sstables::make_sstable(s, "tests/sstables/broken_non_compound_pi_and_range_tombstone", 1, sstables::sstable::version_types::ka, big);
|
||||
sst->load().get0();
|
||||
|
||||
auto pk = partition_key::from_exploded(*s, { int32_type->decompose(1) });
|
||||
auto dk = dht::global_partitioner().decorate_key(*s, pk);
|
||||
auto ck = clustering_key::from_exploded(*s, {int32_type->decompose(2)});
|
||||
mutation m(dk, s);
|
||||
m.set_clustered_cell(ck, *s->get_column_definition("v"), atomic_cell::make_live(1511270919978349, int32_type->decompose(1), { }));
|
||||
m.partition().apply_delete(*s, ck, {1511270943827278, gc_clock::from_time_t(1511270943)});
|
||||
|
||||
{
|
||||
auto slice = partition_slice_builder(*s).with_range(query::clustering_range::make_singular({ck})).build();
|
||||
assert_that(sst->as_mutation_source().make_flat_mutation_reader(s, dht::partition_range::make_singular(dk), slice))
|
||||
.produces(m)
|
||||
.produces_end_of_stream();
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(summary_rebuild_sanity) {
|
||||
return seastar::async([] {
|
||||
storage_service_for_tests ssft;
|
||||
auto builder = schema_builder("tests", "test")
|
||||
.with_column("id", utf8_type, column_kind::partition_key)
|
||||
.with_column("value", utf8_type);
|
||||
builder.set_compressor_params(compression_parameters({ }));
|
||||
auto s = builder.build(schema_builder::compact_storage::no);
|
||||
const column_definition& col = *s->get_column_definition("value");
|
||||
|
||||
auto make_insert = [&] (partition_key key) {
|
||||
mutation m(key, s);
|
||||
m.set_clustered_cell(clustering_key::make_empty(), col, make_atomic_cell(bytes(1024, 'a')));
|
||||
return m;
|
||||
};
|
||||
|
||||
std::vector<mutation> mutations;
|
||||
for (auto i = 0; i < s->min_index_interval()*2; i++) {
|
||||
auto key = to_bytes("key" + to_sstring(i));
|
||||
mutations.push_back(make_insert(partition_key::from_exploded(*s, {std::move(key)})));
|
||||
}
|
||||
|
||||
auto tmp = make_lw_shared<tmpdir>();
|
||||
auto sst_gen = [s, tmp, gen = make_lw_shared<unsigned>(1)] () mutable {
|
||||
return make_sstable(s, tmp->path, (*gen)++, la, big);
|
||||
};
|
||||
auto sst = make_sstable_containing(sst_gen, mutations);
|
||||
|
||||
summary s1 = sstables::test(sst).get_summary();
|
||||
BOOST_REQUIRE(s1.entries.size() > 1);
|
||||
|
||||
sstables::test(sst).remove_component(sstable::component_type::Summary).get();
|
||||
sst = reusable_sst(s, tmp->path, 1).get0();
|
||||
summary s2 = sstables::test(sst).get_summary();
|
||||
|
||||
BOOST_REQUIRE(::memcmp(&s1.header, &s2.header, sizeof(summary::header)) == 0);
|
||||
BOOST_REQUIRE(s1.positions == s2.positions);
|
||||
BOOST_REQUIRE(s1.entries == s2.entries);
|
||||
BOOST_REQUIRE(s1.first_key.value == s2.first_key.value);
|
||||
BOOST_REQUIRE(s1.last_key.value == s2.last_key.value);
|
||||
});
|
||||
}
|
||||
|
||||
@@ -33,21 +33,26 @@
|
||||
#include "mutation_reader.hh"
|
||||
#include "mutation_reader_assertions.hh"
|
||||
#include "mutation_source_test.hh"
|
||||
#include "partition_slice_builder.hh"
|
||||
#include "tmpdir.hh"
|
||||
#include "memtable-sstable.hh"
|
||||
#include "disk-error-handler.hh"
|
||||
#include "tests/sstable_assertions.hh"
|
||||
#include "tests/test_services.hh"
|
||||
#include "flat_mutation_reader_assertions.hh"
|
||||
|
||||
thread_local disk_error_signal_type commit_error;
|
||||
thread_local disk_error_signal_type general_disk_error;
|
||||
|
||||
using namespace sstables;
|
||||
|
||||
|
||||
SEASTAR_TEST_CASE(nonexistent_key) {
|
||||
return reusable_sst(uncompressed_schema(), "tests/sstables/uncompressed", 1).then([] (auto sstp) {
|
||||
return do_with(key::from_bytes(to_bytes("invalid_key")), [sstp] (auto& key) {
|
||||
return do_with(make_dkey(uncompressed_schema(), "invalid_key"), [sstp] (auto& key) {
|
||||
auto s = uncompressed_schema();
|
||||
return sstp->read_row(s, key).then([sstp, s, &key] (auto mutation) {
|
||||
auto rd = make_lw_shared<flat_mutation_reader>(sstp->read_row_flat(s, key));
|
||||
return (*rd)().then([sstp, s, &key, rd] (auto mutation) {
|
||||
BOOST_REQUIRE(!mutation);
|
||||
return make_ready_future<>();
|
||||
});
|
||||
@@ -57,11 +62,10 @@ SEASTAR_TEST_CASE(nonexistent_key) {
|
||||
|
||||
future<> test_no_clustered(bytes&& key, std::unordered_map<bytes, data_value> &&map) {
|
||||
return reusable_sst(uncompressed_schema(), "tests/sstables/uncompressed", 1).then([k = std::move(key), map = std::move(map)] (auto sstp) mutable {
|
||||
return do_with(sstables::key(std::move(k)), [sstp, map = std::move(map)] (auto& key) {
|
||||
return do_with(make_dkey(uncompressed_schema(), std::move(k)), [sstp, map = std::move(map)] (auto& key) {
|
||||
auto s = uncompressed_schema();
|
||||
return sstp->read_row(s, key).then([] (auto sm) {
|
||||
return mutation_from_streamed_mutation(std::move(sm));
|
||||
}).then([sstp, s, &key, map = std::move(map)] (auto mutation) {
|
||||
auto rd = make_lw_shared<flat_mutation_reader>(sstp->read_row_flat(s, key));
|
||||
return read_mutation_from_flat_mutation_reader(*rd).then([sstp, s, &key, rd, map = std::move(map)] (auto mutation) {
|
||||
BOOST_REQUIRE(mutation);
|
||||
auto& mp = mutation->partition();
|
||||
for (auto&& e : mp.range(*s, nonwrapping_range<clustering_key_prefix>())) {
|
||||
@@ -125,11 +129,10 @@ SEASTAR_TEST_CASE(uncompressed_4) {
|
||||
template <int Generation>
|
||||
future<mutation> generate_clustered(bytes&& key) {
|
||||
return reusable_sst(complex_schema(), "tests/sstables/complex", Generation).then([k = std::move(key)] (auto sstp) mutable {
|
||||
return do_with(sstables::key(std::move(k)), [sstp] (auto& key) {
|
||||
return do_with(make_dkey(complex_schema(), std::move(k)), [sstp] (auto& key) {
|
||||
auto s = complex_schema();
|
||||
return sstp->read_row(s, key).then([] (auto sm) {
|
||||
return mutation_from_streamed_mutation(std::move(sm));
|
||||
}).then([sstp, s, &key] (auto mutation) {
|
||||
auto rd = make_lw_shared<flat_mutation_reader>(sstp->read_row_flat(s, key));
|
||||
return read_mutation_from_flat_mutation_reader(*rd).then([sstp, s, &key, rd] (auto mutation) {
|
||||
BOOST_REQUIRE(mutation);
|
||||
return std::move(*mutation);
|
||||
});
|
||||
@@ -314,18 +317,20 @@ future<> test_range_reads(const dht::token& min, const dht::token& max, std::vec
|
||||
auto stop = make_lw_shared<bool>(false);
|
||||
return do_with(dht::partition_range::make(dht::ring_position::starting_at(min),
|
||||
dht::ring_position::ending_at(max)), [&, sstp, s] (auto& pr) {
|
||||
auto mutations = sstp->read_range_rows(s, pr);
|
||||
auto mutations = make_lw_shared<flat_mutation_reader>(sstp->read_range_rows_flat(s, pr));
|
||||
return do_until([stop] { return *stop; },
|
||||
// Note: The data in the following lambda, including
|
||||
// "mutations", continues to live until after the last
|
||||
// iteration's future completes, so its lifetime is safe.
|
||||
[sstp, mutations = std::move(mutations), &expected, expected_size, count, stop] () mutable {
|
||||
return mutations().then([&expected, expected_size, count, stop] (streamed_mutation_opt mutation) mutable {
|
||||
if (mutation) {
|
||||
return (*mutations)().then([&expected, expected_size, count, stop, mutations] (mutation_fragment_opt mfopt) mutable {
|
||||
if (mfopt) {
|
||||
BOOST_REQUIRE(mfopt->is_partition_start());
|
||||
BOOST_REQUIRE(*count < expected_size);
|
||||
BOOST_REQUIRE(std::vector<bytes>({expected.back()}) == mutation->key().explode());
|
||||
BOOST_REQUIRE(std::vector<bytes>({expected.back()}) == mfopt->as_partition_start().key().key().explode());
|
||||
expected.pop_back();
|
||||
(*count)++;
|
||||
mutations->next_partition();
|
||||
} else {
|
||||
*stop = true;
|
||||
}
|
||||
@@ -377,7 +382,7 @@ void test_mutation_source(sstable_writer_config cfg, sstables::sstable::version_
|
||||
mt->apply(m);
|
||||
}
|
||||
|
||||
sst->write_components(mt->make_reader(s), partitions.size(), s, cfg).get();
|
||||
sst->write_components(mt->make_flat_reader(s), partitions.size(), s, cfg).get();
|
||||
sst->load().get();
|
||||
|
||||
return as_mutation_source(sst);
|
||||
@@ -387,6 +392,7 @@ void test_mutation_source(sstable_writer_config cfg, sstables::sstable::version_
|
||||
|
||||
SEASTAR_TEST_CASE(test_sstable_conforms_to_mutation_source) {
|
||||
return seastar::async([] {
|
||||
storage_service_for_tests ssft;
|
||||
for (auto version : {sstables::sstable::version_types::ka, sstables::sstable::version_types::la}) {
|
||||
for (auto index_block_size : {1, 128, 64*1024}) {
|
||||
sstable_writer_config cfg;
|
||||
@@ -399,6 +405,7 @@ SEASTAR_TEST_CASE(test_sstable_conforms_to_mutation_source) {
|
||||
|
||||
SEASTAR_TEST_CASE(test_sstable_can_write_and_read_range_tombstone) {
|
||||
return seastar::async([] {
|
||||
storage_service_for_tests ssft;
|
||||
auto dir = make_lw_shared<tmpdir>();
|
||||
auto s = make_lw_shared(schema({}, "ks", "cf",
|
||||
{{"p1", utf8_type}}, {{"c1", int32_type}}, {{"r1", int32_type}}, {}, utf8_type));
|
||||
@@ -421,9 +428,8 @@ SEASTAR_TEST_CASE(test_sstable_can_write_and_read_range_tombstone) {
|
||||
sstables::sstable::format_types::big);
|
||||
write_memtable_to_sstable(*mt, sst).get();
|
||||
sst->load().get();
|
||||
auto mr = sst->read_rows(s);
|
||||
auto sm = mr().get0();
|
||||
auto mut = mutation_from_streamed_mutation(std::move(sm)).get0();
|
||||
auto mr = sst->read_rows_flat(s);
|
||||
auto mut = read_mutation_from_flat_mutation_reader(mr).get0();
|
||||
BOOST_REQUIRE(bool(mut));
|
||||
auto& rts = mut->partition().row_tombstones();
|
||||
BOOST_REQUIRE(rts.size() == 1);
|
||||
@@ -439,11 +445,11 @@ SEASTAR_TEST_CASE(test_sstable_can_write_and_read_range_tombstone) {
|
||||
|
||||
SEASTAR_TEST_CASE(compact_storage_sparse_read) {
|
||||
return reusable_sst(compact_sparse_schema(), "tests/sstables/compact_sparse", 1).then([] (auto sstp) {
|
||||
return do_with(sstables::key("first_row"), [sstp] (auto& key) {
|
||||
return do_with(make_dkey(compact_sparse_schema(), "first_row"), [sstp] (auto& key) {
|
||||
auto s = compact_sparse_schema();
|
||||
return sstp->read_row(s, key).then([] (auto sm) {
|
||||
return mutation_from_streamed_mutation(std::move(sm));
|
||||
}).then([sstp, s, &key] (auto mutation) {
|
||||
auto rd = make_lw_shared<flat_mutation_reader>(sstp->read_row_flat(s, key));
|
||||
return read_mutation_from_flat_mutation_reader(*rd).then([sstp, s, &key, rd] (auto mutation) {
|
||||
BOOST_REQUIRE(mutation);
|
||||
auto& mp = mutation->partition();
|
||||
auto row = mp.clustered_row(*s, clustering_key::make_empty());
|
||||
match_live_cell(row.cells(), *s, "cl1", data_value(to_bytes("cl1")));
|
||||
@@ -456,11 +462,10 @@ SEASTAR_TEST_CASE(compact_storage_sparse_read) {
|
||||
|
||||
SEASTAR_TEST_CASE(compact_storage_simple_dense_read) {
|
||||
return reusable_sst(compact_simple_dense_schema(), "tests/sstables/compact_simple_dense", 1).then([] (auto sstp) {
|
||||
return do_with(sstables::key("first_row"), [sstp] (auto& key) {
|
||||
return do_with(make_dkey(compact_simple_dense_schema(), "first_row"), [sstp] (auto& key) {
|
||||
auto s = compact_simple_dense_schema();
|
||||
return sstp->read_row(s, key).then([] (auto sm) {
|
||||
return mutation_from_streamed_mutation(std::move(sm));
|
||||
}).then([sstp, s, &key] (auto mutation) {
|
||||
auto rd = make_lw_shared<flat_mutation_reader>(sstp->read_row_flat(s, key));
|
||||
return read_mutation_from_flat_mutation_reader(*rd).then([sstp, s, &key, rd] (auto mutation) {
|
||||
auto& mp = mutation->partition();
|
||||
|
||||
auto exploded = exploded_clustering_prefix({"cl1"});
|
||||
@@ -476,11 +481,10 @@ SEASTAR_TEST_CASE(compact_storage_simple_dense_read) {
|
||||
|
||||
SEASTAR_TEST_CASE(compact_storage_dense_read) {
|
||||
return reusable_sst(compact_dense_schema(), "tests/sstables/compact_dense", 1).then([] (auto sstp) {
|
||||
return do_with(sstables::key("first_row"), [sstp] (auto& key) {
|
||||
return do_with(make_dkey(compact_dense_schema(), "first_row"), [sstp] (auto& key) {
|
||||
auto s = compact_dense_schema();
|
||||
return sstp->read_row(s, key).then([] (auto sm) {
|
||||
return mutation_from_streamed_mutation(std::move(sm));
|
||||
}).then([sstp, s, &key] (auto mutation) {
|
||||
auto rd = make_lw_shared<flat_mutation_reader>(sstp->read_row_flat(s, key));
|
||||
return read_mutation_from_flat_mutation_reader(*rd).then([sstp, s, &key, rd] (auto mutation) {
|
||||
auto& mp = mutation->partition();
|
||||
|
||||
auto exploded = exploded_clustering_prefix({"cl1", "cl2"});
|
||||
@@ -501,11 +505,9 @@ SEASTAR_TEST_CASE(compact_storage_dense_read) {
|
||||
SEASTAR_TEST_CASE(broken_ranges_collection) {
|
||||
return reusable_sst(peers_schema(), "tests/sstables/broken_ranges", 2).then([] (auto sstp) {
|
||||
auto s = peers_schema();
|
||||
auto reader = make_lw_shared<mutation_reader>(sstp->as_mutation_source()(s));
|
||||
auto reader = make_lw_shared<flat_mutation_reader>(sstp->as_mutation_source().make_flat_mutation_reader(s, query::full_partition_range));
|
||||
return repeat([s, reader] {
|
||||
return (*reader)().then([] (auto sm) {
|
||||
return mutation_from_streamed_mutation(std::move(sm));
|
||||
}).then([s, reader] (mutation_opt mut) {
|
||||
return read_mutation_from_flat_mutation_reader(*reader).then([s, reader] (mutation_opt mut) {
|
||||
auto key_equal = [s, &mut] (sstring ip) {
|
||||
return mut->key().equal(*s, partition_key::from_deeply_exploded(*s, { net::ipv4_address(ip) }));
|
||||
};
|
||||
@@ -569,11 +571,9 @@ static future<sstable_ptr> ka_sst(schema_ptr schema, sstring dir, unsigned long
|
||||
SEASTAR_TEST_CASE(tombstone_in_tombstone) {
|
||||
return ka_sst(tombstone_overlap_schema(), "tests/sstables/tombstone_overlap", 1).then([] (auto sstp) {
|
||||
auto s = tombstone_overlap_schema();
|
||||
return do_with(sstp->read_rows(s), [sstp, s] (auto& reader) {
|
||||
return do_with(sstp->read_rows_flat(s), [sstp, s] (auto& reader) {
|
||||
return repeat([sstp, s, &reader] {
|
||||
return reader().then([] (auto sm) {
|
||||
return mutation_from_streamed_mutation(std::move(sm));
|
||||
}).then([s] (mutation_opt mut) {
|
||||
return read_mutation_from_flat_mutation_reader(reader).then([s] (mutation_opt mut) {
|
||||
if (!mut) {
|
||||
return stop_iteration::yes;
|
||||
}
|
||||
@@ -634,11 +634,9 @@ SEASTAR_TEST_CASE(tombstone_in_tombstone) {
|
||||
SEASTAR_TEST_CASE(range_tombstone_reading) {
|
||||
return ka_sst(tombstone_overlap_schema(), "tests/sstables/tombstone_overlap", 4).then([] (auto sstp) {
|
||||
auto s = tombstone_overlap_schema();
|
||||
return do_with(sstp->read_rows(s), [sstp, s] (auto& reader) {
|
||||
return do_with(sstp->read_rows_flat(s), [sstp, s] (auto& reader) {
|
||||
return repeat([sstp, s, &reader] {
|
||||
return reader().then([] (auto sm) {
|
||||
return mutation_from_streamed_mutation(std::move(sm));
|
||||
}).then([s] (mutation_opt mut) {
|
||||
return read_mutation_from_flat_mutation_reader(reader).then([s] (mutation_opt mut) {
|
||||
if (!mut) {
|
||||
return stop_iteration::yes;
|
||||
}
|
||||
@@ -713,11 +711,9 @@ static schema_ptr tombstone_overlap_schema2() {
|
||||
SEASTAR_TEST_CASE(tombstone_in_tombstone2) {
|
||||
return ka_sst(tombstone_overlap_schema2(), "tests/sstables/tombstone_overlap", 3).then([] (auto sstp) {
|
||||
auto s = tombstone_overlap_schema2();
|
||||
return do_with(sstp->read_rows(s), [sstp, s] (auto& reader) {
|
||||
return do_with(sstp->read_rows_flat(s), [sstp, s] (auto& reader) {
|
||||
return repeat([sstp, s, &reader] {
|
||||
return reader().then([] (auto sm) {
|
||||
return mutation_from_streamed_mutation(std::move(sm));
|
||||
}).then([s] (mutation_opt mut) {
|
||||
return read_mutation_from_flat_mutation_reader(reader).then([s] (mutation_opt mut) {
|
||||
if (!mut) {
|
||||
return stop_iteration::yes;
|
||||
}
|
||||
@@ -773,6 +769,7 @@ SEASTAR_TEST_CASE(tombstone_in_tombstone2) {
|
||||
|
||||
SEASTAR_TEST_CASE(test_non_compound_table_row_is_not_marked_as_static) {
|
||||
return seastar::async([] {
|
||||
storage_service_for_tests ssft;
|
||||
auto dir = make_lw_shared<tmpdir>();
|
||||
schema_builder builder("ks", "cf");
|
||||
builder.with_column("p", utf8_type, column_kind::partition_key);
|
||||
@@ -797,15 +794,15 @@ SEASTAR_TEST_CASE(test_non_compound_table_row_is_not_marked_as_static) {
|
||||
sstables::sstable::format_types::big);
|
||||
write_memtable_to_sstable(*mt, sst).get();
|
||||
sst->load().get();
|
||||
auto mr = sst->read_rows(s);
|
||||
auto sm = mr().get0();
|
||||
auto mut = mutation_from_streamed_mutation(std::move(sm)).get0();
|
||||
auto mr = sst->read_rows_flat(s);
|
||||
auto mut = read_mutation_from_flat_mutation_reader(mr).get0();
|
||||
BOOST_REQUIRE(bool(mut));
|
||||
});
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_promoted_index_blocks_are_monotonic) {
|
||||
return seastar::async([] {
|
||||
storage_service_for_tests ssft;
|
||||
auto dir = make_lw_shared<tmpdir>();
|
||||
schema_builder builder("ks", "cf");
|
||||
builder.with_column("p", utf8_type, column_kind::partition_key);
|
||||
@@ -847,8 +844,288 @@ SEASTAR_TEST_CASE(test_promoted_index_blocks_are_monotonic) {
|
||||
sstables::sstable::format_types::big);
|
||||
sstable_writer_config cfg;
|
||||
cfg.promoted_index_block_size = 1;
|
||||
sst->write_components(mt->make_reader(s), 1, s, cfg).get();
|
||||
sst->write_components(mt->make_flat_reader(s), 1, s, cfg).get();
|
||||
sst->load().get();
|
||||
assert_that(sst->get_index_reader(default_priority_class())).has_monotonic_positions(*s);
|
||||
});
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_promoted_index_blocks_are_monotonic_compound_dense) {
|
||||
return seastar::async([] {
|
||||
storage_service_for_tests ssft;
|
||||
auto dir = make_lw_shared<tmpdir>();
|
||||
schema_builder builder("ks", "cf");
|
||||
builder.with_column("p", utf8_type, column_kind::partition_key);
|
||||
builder.with_column("c1", int32_type, column_kind::clustering_key);
|
||||
builder.with_column("c2", int32_type, column_kind::clustering_key);
|
||||
builder.with_column("v", int32_type);
|
||||
auto s = builder.build(schema_builder::compact_storage::yes);
|
||||
|
||||
auto dk = dht::global_partitioner().decorate_key(*s, partition_key::from_exploded(*s, {to_bytes("key1")}));
|
||||
auto cell = atomic_cell::make_live(1, int32_type->decompose(88), { });
|
||||
mutation m(dk, s);
|
||||
|
||||
auto ck1 = clustering_key::from_exploded(*s, {int32_type->decompose(1), int32_type->decompose(2)});
|
||||
m.set_clustered_cell(ck1, *s->get_column_definition("v"), cell);
|
||||
|
||||
auto ck2 = clustering_key::from_exploded(*s, {int32_type->decompose(1), int32_type->decompose(4)});
|
||||
m.set_clustered_cell(ck2, *s->get_column_definition("v"), cell);
|
||||
|
||||
auto ck3 = clustering_key::from_exploded(*s, {int32_type->decompose(1), int32_type->decompose(6)});
|
||||
m.set_clustered_cell(ck3, *s->get_column_definition("v"), cell);
|
||||
|
||||
auto ck4 = clustering_key::from_exploded(*s, {int32_type->decompose(3), int32_type->decompose(9)});
|
||||
m.set_clustered_cell(ck4, *s->get_column_definition("v"), cell);
|
||||
|
||||
m.partition().apply_row_tombstone(*s, range_tombstone(
|
||||
clustering_key_prefix::from_exploded(*s, {int32_type->decompose(1)}),
|
||||
bound_kind::incl_start,
|
||||
clustering_key_prefix::from_exploded(*s, {int32_type->decompose(2)}),
|
||||
bound_kind::incl_end,
|
||||
{1, gc_clock::now()}));
|
||||
|
||||
auto mt = make_lw_shared<memtable>(s);
|
||||
mt->apply(std::move(m));
|
||||
|
||||
auto sst = sstables::make_sstable(s,
|
||||
dir->path,
|
||||
1 /* generation */,
|
||||
sstables::sstable::version_types::ka,
|
||||
sstables::sstable::format_types::big);
|
||||
sstable_writer_config cfg;
|
||||
cfg.promoted_index_block_size = 1;
|
||||
sst->write_components(mt->make_flat_reader(s), 1, s, cfg).get();
|
||||
sst->load().get();
|
||||
|
||||
{
|
||||
assert_that(sst->get_index_reader(default_priority_class())).has_monotonic_positions(*s);
|
||||
}
|
||||
|
||||
{
|
||||
auto slice = partition_slice_builder(*s).with_range(query::clustering_range::make_starting_with({ck1})).build();
|
||||
assert_that(sst->as_mutation_source().make_flat_mutation_reader(s, dht::partition_range::make_singular(dk), slice))
|
||||
.produces(m)
|
||||
.produces_end_of_stream();
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_promoted_index_blocks_are_monotonic_non_compound_dense) {
|
||||
return seastar::async([] {
|
||||
storage_service_for_tests ssft;
|
||||
auto dir = make_lw_shared<tmpdir>();
|
||||
schema_builder builder("ks", "cf");
|
||||
builder.with_column("p", utf8_type, column_kind::partition_key);
|
||||
builder.with_column("c1", int32_type, column_kind::clustering_key);
|
||||
builder.with_column("v", int32_type);
|
||||
auto s = builder.build(schema_builder::compact_storage::yes);
|
||||
|
||||
auto dk = dht::global_partitioner().decorate_key(*s, partition_key::from_exploded(*s, {to_bytes("key1")}));
|
||||
auto cell = atomic_cell::make_live(1, int32_type->decompose(88), { });
|
||||
mutation m(dk, s);
|
||||
|
||||
auto ck1 = clustering_key::from_exploded(*s, {int32_type->decompose(1)});
|
||||
m.set_clustered_cell(ck1, *s->get_column_definition("v"), cell);
|
||||
|
||||
auto ck2 = clustering_key::from_exploded(*s, {int32_type->decompose(2)});
|
||||
m.set_clustered_cell(ck2, *s->get_column_definition("v"), cell);
|
||||
|
||||
auto ck3 = clustering_key::from_exploded(*s, {int32_type->decompose(3)});
|
||||
m.set_clustered_cell(ck3, *s->get_column_definition("v"), cell);
|
||||
|
||||
m.partition().apply_row_tombstone(*s, range_tombstone(
|
||||
clustering_key_prefix::from_exploded(*s, {int32_type->decompose(1)}),
|
||||
bound_kind::incl_start,
|
||||
clustering_key_prefix::from_exploded(*s, {int32_type->decompose(2)}),
|
||||
bound_kind::incl_end,
|
||||
{1, gc_clock::now()}));
|
||||
|
||||
auto mt = make_lw_shared<memtable>(s);
|
||||
mt->apply(std::move(m));
|
||||
|
||||
auto sst = sstables::make_sstable(s,
|
||||
dir->path,
|
||||
1 /* generation */,
|
||||
sstables::sstable::version_types::ka,
|
||||
sstables::sstable::format_types::big);
|
||||
sstable_writer_config cfg;
|
||||
cfg.promoted_index_block_size = 1;
|
||||
sst->write_components(mt->make_flat_reader(s), 1, s, cfg).get();
|
||||
sst->load().get();
|
||||
|
||||
{
|
||||
assert_that(sst->get_index_reader(default_priority_class())).has_monotonic_positions(*s);
|
||||
}
|
||||
|
||||
{
|
||||
auto slice = partition_slice_builder(*s).with_range(query::clustering_range::make_starting_with({ck1})).build();
|
||||
assert_that(sst->as_mutation_source().make_flat_mutation_reader(s, dht::partition_range::make_singular(dk), slice))
|
||||
.produces(m)
|
||||
.produces_end_of_stream();
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_promoted_index_repeats_open_tombstones) {
|
||||
return seastar::async([] {
|
||||
storage_service_for_tests ssft;
|
||||
auto dir = make_lw_shared<tmpdir>();
|
||||
int id = 0;
|
||||
for (auto& compact : { schema_builder::compact_storage::no, schema_builder::compact_storage::yes }) {
|
||||
schema_builder builder("ks", sprint("cf%d", id++));
|
||||
builder.with_column("p", utf8_type, column_kind::partition_key);
|
||||
builder.with_column("c1", bytes_type, column_kind::clustering_key);
|
||||
builder.with_column("v", int32_type);
|
||||
auto s = builder.build(compact);
|
||||
|
||||
auto dk = dht::global_partitioner().decorate_key(*s, partition_key::from_exploded(*s, {to_bytes("key1")}));
|
||||
auto cell = atomic_cell::make_live(1, int32_type->decompose(88), { });
|
||||
mutation m(dk, s);
|
||||
|
||||
m.partition().apply_row_tombstone(*s, range_tombstone(
|
||||
clustering_key_prefix::from_exploded(*s, {bytes_type->decompose(data_value(to_bytes("ck1")))}),
|
||||
bound_kind::incl_start,
|
||||
clustering_key_prefix::from_exploded(*s, {bytes_type->decompose(data_value(to_bytes("ck5")))}),
|
||||
bound_kind::incl_end,
|
||||
{1, gc_clock::now()}));
|
||||
|
||||
auto ck = clustering_key::from_exploded(*s, {bytes_type->decompose(data_value(to_bytes("ck3")))});
|
||||
m.set_clustered_cell(ck, *s->get_column_definition("v"), cell);
|
||||
|
||||
auto mt = make_lw_shared<memtable>(s);
|
||||
mt->apply(m);
|
||||
|
||||
auto sst = sstables::make_sstable(s,
|
||||
dir->path,
|
||||
1 /* generation */,
|
||||
sstables::sstable::version_types::ka,
|
||||
sstables::sstable::format_types::big);
|
||||
sstable_writer_config cfg;
|
||||
cfg.promoted_index_block_size = 1;
|
||||
sst->write_components(mt->make_flat_reader(s), 1, s, cfg).get();
|
||||
sst->load().get();
|
||||
|
||||
{
|
||||
auto slice = partition_slice_builder(*s).with_range(query::clustering_range::make_starting_with({ck})).build();
|
||||
assert_that(sst->as_mutation_source().make_flat_mutation_reader(s, dht::partition_range::make_singular(dk), slice))
|
||||
.produces(m)
|
||||
.produces_end_of_stream();
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_range_tombstones_are_correctly_seralized_for_non_compound_dense_schemas) {
|
||||
return seastar::async([] {
|
||||
storage_service_for_tests ssft;
|
||||
auto dir = make_lw_shared<tmpdir>();
|
||||
schema_builder builder("ks", "cf");
|
||||
builder.with_column("p", utf8_type, column_kind::partition_key);
|
||||
builder.with_column("c", int32_type, column_kind::clustering_key);
|
||||
builder.with_column("v", int32_type);
|
||||
auto s = builder.build(schema_builder::compact_storage::yes);
|
||||
|
||||
auto dk = dht::global_partitioner().decorate_key(*s, partition_key::from_exploded(*s, {to_bytes("key1")}));
|
||||
mutation m(dk, s);
|
||||
|
||||
m.partition().apply_row_tombstone(*s, range_tombstone(
|
||||
clustering_key_prefix::from_exploded(*s, {int32_type->decompose(1)}),
|
||||
bound_kind::incl_start,
|
||||
clustering_key_prefix::from_exploded(*s, {int32_type->decompose(2)}),
|
||||
bound_kind::incl_end,
|
||||
{1, gc_clock::now()}));
|
||||
|
||||
auto mt = make_lw_shared<memtable>(s);
|
||||
mt->apply(m);
|
||||
|
||||
auto sst = sstables::make_sstable(s,
|
||||
dir->path,
|
||||
1 /* generation */,
|
||||
sstables::sstable::version_types::ka,
|
||||
sstables::sstable::format_types::big);
|
||||
sstable_writer_config cfg;
|
||||
sst->write_components(mt->make_flat_reader(s), 1, s, cfg).get();
|
||||
sst->load().get();
|
||||
|
||||
{
|
||||
auto slice = partition_slice_builder(*s).build();
|
||||
assert_that(sst->as_mutation_source().make_flat_mutation_reader(s, dht::partition_range::make_singular(dk), slice))
|
||||
.produces(m)
|
||||
.produces_end_of_stream();
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_promoted_index_is_absent_for_schemas_without_clustering_key) {
|
||||
return seastar::async([] {
|
||||
storage_service_for_tests ssft;
|
||||
auto dir = make_lw_shared<tmpdir>();
|
||||
schema_builder builder("ks", "cf");
|
||||
builder.with_column("p", utf8_type, column_kind::partition_key);
|
||||
builder.with_column("v", int32_type);
|
||||
auto s = builder.build(schema_builder::compact_storage::yes);
|
||||
|
||||
auto dk = dht::global_partitioner().decorate_key(*s, partition_key::from_exploded(*s, {to_bytes("key1")}));
|
||||
mutation m(dk, s);
|
||||
for (auto&& v : { 1, 2, 3, 4 }) {
|
||||
auto cell = atomic_cell::make_live(1, int32_type->decompose(v), { });
|
||||
m.set_clustered_cell(clustering_key_prefix::make_empty(), *s->get_column_definition("v"), cell);
|
||||
}
|
||||
auto mt = make_lw_shared<memtable>(s);
|
||||
mt->apply(m);
|
||||
|
||||
auto sst = sstables::make_sstable(s,
|
||||
dir->path,
|
||||
1 /* generation */,
|
||||
sstables::sstable::version_types::ka,
|
||||
sstables::sstable::format_types::big);
|
||||
sstable_writer_config cfg;
|
||||
cfg.promoted_index_block_size = 1;
|
||||
sst->write_components(mt->make_flat_reader(s), 1, s, cfg).get();
|
||||
sst->load().get();
|
||||
|
||||
assert_that(sst->get_index_reader(default_priority_class())).is_empty(*s);
|
||||
});
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_can_write_and_read_non_compound_range_tombstone_as_compound) {
|
||||
return seastar::async([] {
|
||||
storage_service_for_tests ssft;
|
||||
auto dir = make_lw_shared<tmpdir>();
|
||||
schema_builder builder("ks", "cf");
|
||||
builder.with_column("p", utf8_type, column_kind::partition_key);
|
||||
builder.with_column("c", int32_type, column_kind::clustering_key);
|
||||
builder.with_column("v", int32_type);
|
||||
auto s = builder.build(schema_builder::compact_storage::yes);
|
||||
|
||||
auto dk = dht::global_partitioner().decorate_key(*s, partition_key::from_exploded(*s, {to_bytes("key1")}));
|
||||
mutation m(dk, s);
|
||||
|
||||
m.partition().apply_row_tombstone(*s, range_tombstone(
|
||||
clustering_key_prefix::from_exploded(*s, {int32_type->decompose(1)}),
|
||||
bound_kind::incl_start,
|
||||
clustering_key_prefix::from_exploded(*s, {int32_type->decompose(2)}),
|
||||
bound_kind::incl_end,
|
||||
{1, gc_clock::now()}));
|
||||
|
||||
auto mt = make_lw_shared<memtable>(s);
|
||||
mt->apply(m);
|
||||
|
||||
auto sst = sstables::make_sstable(s,
|
||||
dir->path,
|
||||
1 /* generation */,
|
||||
sstables::sstable::version_types::ka,
|
||||
sstables::sstable::format_types::big);
|
||||
sstable_writer_config cfg;
|
||||
cfg.correctly_serialize_non_compound_range_tombstones = false;
|
||||
sst->write_components(mt->make_flat_reader(s), 1, s, cfg).get();
|
||||
sst->load().get();
|
||||
|
||||
{
|
||||
auto slice = partition_slice_builder(*s).build();
|
||||
assert_that(sst->as_mutation_source()(s, dht::partition_range::make_singular(dk), slice))
|
||||
.produces(m)
|
||||
.produces_end_of_stream();
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
@@ -65,6 +65,7 @@ static schema_ptr get_schema() {
|
||||
}
|
||||
|
||||
void run_sstable_resharding_test() {
|
||||
storage_service_for_tests ssft;
|
||||
auto tmp = make_lw_shared<tmpdir>();
|
||||
auto s = get_schema();
|
||||
auto cm = make_lw_shared<compaction_manager>();
|
||||
@@ -94,6 +95,7 @@ void run_sstable_resharding_test() {
|
||||
}
|
||||
auto sst = sstables::make_sstable(s, tmp->path, 0, sstables::sstable::version_types::ka, sstables::sstable::format_types::big);
|
||||
sst->load().get();
|
||||
sst->set_unshared();
|
||||
|
||||
auto creator = [&cf, tmp] (shard_id shard) mutable {
|
||||
// we need generation calculated by instance of cf at requested shard,
|
||||
@@ -119,7 +121,7 @@ void run_sstable_resharding_test() {
|
||||
auto shard = shards.front();
|
||||
BOOST_REQUIRE(column_family_test::calculate_shard_from_sstable_generation(new_sst->generation()) == shard);
|
||||
|
||||
assert_that(sst->as_mutation_source()(s))
|
||||
assert_that(new_sst->as_mutation_source()(s))
|
||||
.produces(muts.at(shard))
|
||||
.produces_end_of_stream();
|
||||
}
|
||||
|
||||
@@ -882,9 +882,10 @@ SEASTAR_TEST_CASE(not_find_key_composite_bucket0) {
|
||||
// See CASSANDRA-7593. This sstable writes 0 in the range_start. We need to handle that case as well
|
||||
SEASTAR_TEST_CASE(wrong_range) {
|
||||
return reusable_sst(uncompressed_schema(), "tests/sstables/wrongrange", 114).then([] (auto sstp) {
|
||||
return do_with(sstables::key("todata"), [sstp] (auto& key) {
|
||||
return do_with(make_dkey(uncompressed_schema(), "todata"), [sstp] (auto& key) {
|
||||
auto s = columns_schema();
|
||||
return sstp->read_row(s, key).then([sstp, s, &key] (auto mutation) {
|
||||
auto rd = make_lw_shared<flat_mutation_reader>(sstp->read_row_flat(s, key));
|
||||
return read_mutation_from_flat_mutation_reader(*rd).then([sstp, s, &key, rd] (auto mutation) {
|
||||
return make_ready_future<>();
|
||||
});
|
||||
});
|
||||
@@ -1056,17 +1057,19 @@ static query::partition_slice make_partition_slice(const schema& s, sstring ck1,
|
||||
static future<int> count_rows(sstable_ptr sstp, schema_ptr s, sstring key, sstring ck1, sstring ck2) {
|
||||
return seastar::async([sstp, s, key, ck1, ck2] () mutable {
|
||||
auto ps = make_partition_slice(*s, ck1, ck2);
|
||||
auto row = sstp->read_row(s, sstables::key(key.c_str()), ps).get0();
|
||||
if (!row) {
|
||||
auto dkey = make_dkey(s, key.c_str());
|
||||
auto rd = sstp->read_row_flat(s, dkey, ps);
|
||||
auto mfopt = rd().get0();
|
||||
if (!mfopt) {
|
||||
return 0;
|
||||
}
|
||||
int nrows = 0;
|
||||
auto mfopt = (*row)().get0();
|
||||
mfopt = rd().get0();
|
||||
while (mfopt) {
|
||||
if (mfopt->is_clustering_row()) {
|
||||
nrows++;
|
||||
}
|
||||
mfopt = (*row)().get0();
|
||||
mfopt = rd().get0();
|
||||
}
|
||||
return nrows;
|
||||
});
|
||||
@@ -1075,17 +1078,19 @@ static future<int> count_rows(sstable_ptr sstp, schema_ptr s, sstring key, sstri
|
||||
// Count the number of CQL rows in one partition
|
||||
static future<int> count_rows(sstable_ptr sstp, schema_ptr s, sstring key) {
|
||||
return seastar::async([sstp, s, key] () mutable {
|
||||
auto row = sstp->read_row(s, sstables::key(key.c_str())).get0();
|
||||
if (!row) {
|
||||
auto dkey = make_dkey(s, key.c_str());
|
||||
auto rd = sstp->read_row_flat(s, dkey);
|
||||
auto mfopt = rd().get0();
|
||||
if (!mfopt) {
|
||||
return 0;
|
||||
}
|
||||
int nrows = 0;
|
||||
auto mfopt = (*row)().get0();
|
||||
mfopt = rd().get0();
|
||||
while (mfopt) {
|
||||
if (mfopt->is_clustering_row()) {
|
||||
nrows++;
|
||||
}
|
||||
mfopt = (*row)().get0();
|
||||
mfopt = rd().get0();
|
||||
}
|
||||
return nrows;
|
||||
});
|
||||
@@ -1096,18 +1101,19 @@ static future<int> count_rows(sstable_ptr sstp, schema_ptr s, sstring key) {
|
||||
static future<int> count_rows(sstable_ptr sstp, schema_ptr s, sstring ck1, sstring ck2) {
|
||||
return seastar::async([sstp, s, ck1, ck2] () mutable {
|
||||
auto ps = make_partition_slice(*s, ck1, ck2);
|
||||
auto reader = sstp->read_range_rows(s, query::full_partition_range, ps);
|
||||
auto reader = sstp->read_range_rows_flat(s, query::full_partition_range, ps);
|
||||
int nrows = 0;
|
||||
auto smopt = reader().get0();
|
||||
while (smopt) {
|
||||
auto mfopt = (*smopt)().get0();
|
||||
while (mfopt) {
|
||||
auto mfopt = reader().get0();
|
||||
while (mfopt) {
|
||||
mfopt = reader().get0();
|
||||
BOOST_REQUIRE(mfopt);
|
||||
while (!mfopt->is_end_of_partition()) {
|
||||
if (mfopt->is_clustering_row()) {
|
||||
nrows++;
|
||||
}
|
||||
mfopt = (*smopt)().get0();
|
||||
mfopt = reader().get0();
|
||||
}
|
||||
smopt = reader().get0();
|
||||
mfopt = reader().get0();
|
||||
}
|
||||
return nrows;
|
||||
});
|
||||
|
||||
@@ -30,6 +30,7 @@
|
||||
#include "schema_builder.hh"
|
||||
#include "core/thread.hh"
|
||||
#include "sstables/index_reader.hh"
|
||||
#include "tests/test_services.hh"
|
||||
|
||||
static auto la = sstables::sstable::version_types::la;
|
||||
static auto big = sstables::sstable::format_types::big;
|
||||
@@ -184,6 +185,10 @@ public:
|
||||
_sst->set_first_and_last_keys();
|
||||
_sst->_components->statistics.contents[metadata_type::Compaction] = std::make_unique<compaction_metadata>();
|
||||
}
|
||||
|
||||
future<> remove_component(sstable::component_type c) {
|
||||
return remove_file(_sst->filename(c));
|
||||
}
|
||||
};
|
||||
|
||||
inline future<sstable_ptr> reusable_sst(schema_ptr schema, sstring dir, unsigned long generation) {
|
||||
@@ -621,12 +626,12 @@ public:
|
||||
}
|
||||
|
||||
static future<> do_with_test_directory(std::function<future<> ()>&& fut, sstring p = path()) {
|
||||
return test_setup::create_empty_test_dir(p).then([fut = std::move(fut), p] () mutable {
|
||||
return fut();
|
||||
}).finally([p] {
|
||||
return test_setup::empty_test_dir(p).then([p] {
|
||||
return engine().remove_file(p);
|
||||
});
|
||||
return seastar::async([p, fut = std::move(fut)] {
|
||||
storage_service_for_tests ssft;
|
||||
test_setup::create_empty_test_dir(p).get();
|
||||
fut().get();
|
||||
test_setup::empty_test_dir(p).get();
|
||||
engine().remove_file(p).get();
|
||||
});
|
||||
}
|
||||
};
|
||||
@@ -636,3 +641,10 @@ inline
|
||||
::mutation_source as_mutation_source(sstables::shared_sstable sst) {
|
||||
return sst->as_mutation_source();
|
||||
}
|
||||
|
||||
|
||||
inline dht::decorated_key make_dkey(schema_ptr s, bytes b)
|
||||
{
|
||||
auto sst_key = sstables::key::from_bytes(b);
|
||||
return dht::global_partitioner().decorate_key(*s, sst_key.to_partition_key(*s));
|
||||
}
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user