diff --git a/cache_flat_mutation_reader.hh b/cache_flat_mutation_reader.hh index b2b75c317e..79c7d6b8e2 100644 --- a/cache_flat_mutation_reader.hh +++ b/cache_flat_mutation_reader.hh @@ -81,7 +81,17 @@ class cache_flat_mutation_reader final : public flat_mutation_reader::impl { position_in_partition _lower_bound; position_in_partition_view _upper_bound; - lw_shared_ptr _read_context; + // cache_flat_mutation_reader may be constructed either + // with a read_context&, where it knows that the read_context + // is owned externally, by the caller. In this case + // _read_context_holder would be disengaged. + // Or, it could be constructed with a std::unique_ptr, + // in which case it assumes ownership of the read_context + // and it is now responsible for closing it. + // In this case, _read_context_holder would be engaged + // and _read_context will reference its content. + std::unique_ptr _read_context_holder; + read_context& _read_context; partition_snapshot_row_cursor _next_row; state _state = state::before_static_row; @@ -101,7 +111,7 @@ class cache_flat_mutation_reader final : public flat_mutation_reader::impl { bool _lower_bound_changed = false; // Points to the underlying reader conforming to _schema, - // either to *_underlying_holder or _read_context->underlying().underlying(). + // either to *_underlying_holder or _read_context.underlying().underlying(). flat_mutation_reader* _underlying = nullptr; flat_mutation_reader_opt _underlying_holder; @@ -146,10 +156,10 @@ public: cache_flat_mutation_reader(schema_ptr s, dht::decorated_key dk, query::clustering_key_filter_ranges&& crr, - lw_shared_ptr ctx, + read_context& ctx, partition_snapshot_ptr snp, row_cache& cache) - : flat_mutation_reader::impl(std::move(s), ctx->permit()) + : flat_mutation_reader::impl(std::move(s), ctx.permit()) , _snp(std::move(snp)) , _ck_ranges(std::move(crr)) , _ck_ranges_curr(_ck_ranges.begin()) @@ -157,12 +167,25 @@ public: , _lsa_manager(cache) , _lower_bound(position_in_partition::before_all_clustered_rows()) , _upper_bound(position_in_partition_view::before_all_clustered_rows()) - , _read_context(std::move(ctx)) + , _read_context_holder() + , _read_context(ctx) // ctx is owned by the caller, who's responsible for closing it. , _next_row(*_schema, *_snp) { clogger.trace("csm {}: table={}.{}", fmt::ptr(this), _schema->ks_name(), _schema->cf_name()); push_mutation_fragment(*_schema, _permit, partition_start(std::move(dk), _snp->partition_tombstone())); } + cache_flat_mutation_reader(schema_ptr s, + dht::decorated_key dk, + query::clustering_key_filter_ranges&& crr, + std::unique_ptr unique_ctx, + partition_snapshot_ptr snp, + row_cache& cache) + : cache_flat_mutation_reader(s, std::move(dk), std::move(crr), *unique_ctx, std::move(snp), cache) + { + // Assume ownership of the read_context. + // It is our responsibility to close it now. + _read_context_holder = std::move(unique_ctx); + } cache_flat_mutation_reader(const cache_flat_mutation_reader&) = delete; cache_flat_mutation_reader(cache_flat_mutation_reader&&) = delete; virtual future<> fill_buffer(db::timeout_clock::time_point timeout) override; @@ -181,21 +204,26 @@ public: virtual future<> fast_forward_to(position_range pr, db::timeout_clock::time_point timeout) override { return make_exception_future<>(make_backtraced_exception_ptr()); } + virtual future<> close() noexcept { + auto close_read_context = _read_context_holder ? _read_context_holder->close() : make_ready_future<>(); + auto close_underlying = _underlying_holder ? _underlying_holder->close() : make_ready_future<>(); + return when_all_succeed(std::move(close_read_context), std::move(close_underlying)).discard_result(); + } }; inline future<> cache_flat_mutation_reader::process_static_row(db::timeout_clock::time_point timeout) { if (_snp->static_row_continuous()) { - _read_context->cache().on_row_hit(); + _read_context.cache().on_row_hit(); static_row sr = _lsa_manager.run_in_read_section([this] { - return _snp->static_row(_read_context->digest_requested()); + return _snp->static_row(_read_context.digest_requested()); }); if (!sr.empty()) { push_mutation_fragment(mutation_fragment(*_schema, _permit, std::move(sr))); } return make_ready_future<>(); } else { - _read_context->cache().on_row_miss(); + _read_context.cache().on_row_miss(); return ensure_underlying(timeout).then([this, timeout] { return (*_underlying)(timeout).then([this] (mutation_fragment_opt&& sr) { if (sr) { @@ -246,8 +274,8 @@ future<> cache_flat_mutation_reader::ensure_underlying(db::timeout_clock::time_p if (_underlying) { return make_ready_future<>(); } - return _read_context->ensure_underlying(timeout).then([this] { - flat_mutation_reader& ctx_underlying = _read_context->underlying().underlying(); + return _read_context.ensure_underlying(timeout).then([this] { + flat_mutation_reader& ctx_underlying = _read_context.underlying().underlying(); if (ctx_underlying.schema() != _schema) { _underlying_holder = make_delegating_reader(ctx_underlying); _underlying_holder->upgrade_schema(_schema); @@ -268,7 +296,7 @@ future<> cache_flat_mutation_reader::do_fill_buffer(db::timeout_clock::time_poin } _state = state::reading_from_underlying; _population_range_starts_before_all_rows = _lower_bound.is_before_all_clustered_rows(*_schema); - if (!_read_context->partition_exists()) { + if (!_read_context.partition_exists()) { return read_from_underlying(timeout); } auto end = _next_row_in_range ? position_in_partition(_next_row.position()) @@ -317,7 +345,7 @@ future<> cache_flat_mutation_reader::read_from_underlying(db::timeout_clock::tim return consume_mutation_fragments_until(*_underlying, [this] { return _state != state::reading_from_underlying || is_buffer_full(); }, [this] (mutation_fragment mf) { - _read_context->cache().on_row_miss(); + _read_context.cache().on_row_miss(); maybe_add_to_cache(mf); add_to_buffer(std::move(mf)); }, @@ -326,7 +354,7 @@ future<> cache_flat_mutation_reader::read_from_underlying(db::timeout_clock::tim _lsa_manager.run_in_update_section([this] { auto same_pos = _next_row.maybe_refresh(); if (!same_pos) { - _read_context->cache().on_mispopulate(); // FIXME: Insert dummy entry at _upper_bound. + _read_context.cache().on_mispopulate(); // FIXME: Insert dummy entry at _upper_bound. _next_row_in_range = !after_current_range(_next_row.position()); if (!_next_row.continuous()) { start_reading_from_underlying(); @@ -380,7 +408,7 @@ future<> cache_flat_mutation_reader::read_from_underlying(db::timeout_clock::tim }); } } else { - _read_context->cache().on_mispopulate(); + _read_context.cache().on_mispopulate(); } try { move_to_next_range(); @@ -432,7 +460,7 @@ void cache_flat_mutation_reader::maybe_update_continuity() { maybe_drop_last_entry(); }); } else { - _read_context->cache().on_mispopulate(); + _read_context.cache().on_mispopulate(); } } @@ -452,7 +480,7 @@ void cache_flat_mutation_reader::maybe_add_to_cache(const clustering_row& cr) { if (!can_populate()) { _last_row = nullptr; _population_range_starts_before_all_rows = false; - _read_context->cache().on_mispopulate(); + _read_context.cache().on_mispopulate(); return; } clogger.trace("csm {}: populate({})", fmt::ptr(this), clustering_row::printer(*_schema, cr)); @@ -460,7 +488,7 @@ void cache_flat_mutation_reader::maybe_add_to_cache(const clustering_row& cr) { mutation_partition& mp = _snp->version()->partition(); rows_entry::tri_compare cmp(*_schema); - if (_read_context->digest_requested()) { + if (_read_context.digest_requested()) { cr.cells().prepare_hash(*_schema, column_kind::regular_column); } auto new_entry = alloc_strategy_unique_ptr( @@ -479,7 +507,7 @@ void cache_flat_mutation_reader::maybe_add_to_cache(const clustering_row& cr) { clogger.trace("csm {}: set_continuous({})", fmt::ptr(this), e.position()); e.set_continuous(true); } else { - _read_context->cache().on_mispopulate(); + _read_context.cache().on_mispopulate(); } with_allocator(standard_allocator(), [&] { _last_row = partition_snapshot_row_weakref(*_snp, it, true); @@ -569,7 +597,7 @@ void cache_flat_mutation_reader::move_to_range(query::clustering_row_ranges::con _snp->tracker()->insert(*it); _last_row = partition_snapshot_row_weakref(*_snp, it, true); } else { - _read_context->cache().on_mispopulate(); + _read_context.cache().on_mispopulate(); } } start_reading_from_underlying(); @@ -596,7 +624,7 @@ void cache_flat_mutation_reader::maybe_drop_last_entry() noexcept { && _snp->at_oldest_version()) { with_allocator(_snp->region().allocator(), [&] { - _last_row->on_evicted(_read_context->cache()._tracker); + _last_row->on_evicted(_read_context.cache()._tracker); }); _last_row = nullptr; @@ -645,8 +673,8 @@ void cache_flat_mutation_reader::add_to_buffer(mutation_fragment&& mf) { inline void cache_flat_mutation_reader::add_to_buffer(const partition_snapshot_row_cursor& row) { if (!row.dummy()) { - _read_context->cache().on_row_hit(); - if (_read_context->digest_requested()) { + _read_context.cache().on_row_hit(); + if (_read_context.digest_requested()) { row.latest_row().cells().prepare_hash(*_schema, column_kind::regular_column); } add_clustering_row_to_buffer(mutation_fragment(*_schema, _permit, row.row())); @@ -656,7 +684,7 @@ void cache_flat_mutation_reader::add_to_buffer(const partition_snapshot_row_curs _lower_bound = row.position(); _lower_bound_changed = true; } - _read_context->cache()._tracker.on_dummy_row_hit(); + _read_context.cache()._tracker.on_dummy_row_hit(); } } @@ -704,7 +732,7 @@ void cache_flat_mutation_reader::maybe_add_to_cache(const range_tombstone& rt) { _snp->version()->partition().row_tombstones().apply_monotonically(*_schema, rt); }); } else { - _read_context->cache().on_mispopulate(); + _read_context.cache().on_mispopulate(); } } @@ -712,15 +740,15 @@ inline void cache_flat_mutation_reader::maybe_add_to_cache(const static_row& sr) { if (can_populate()) { clogger.trace("csm {}: populate({})", fmt::ptr(this), static_row::printer(*_schema, sr)); - _read_context->cache().on_static_row_insert(); + _read_context.cache().on_static_row_insert(); _lsa_manager.run_in_update_section_with_allocator([&] { - if (_read_context->digest_requested()) { + if (_read_context.digest_requested()) { sr.cells().prepare_hash(*_schema, column_kind::static_column); } _snp->version()->partition().static_row().apply(*_schema, column_kind::static_column, sr.cells()); }); } else { - _read_context->cache().on_mispopulate(); + _read_context.cache().on_mispopulate(); } } @@ -730,24 +758,38 @@ void cache_flat_mutation_reader::maybe_set_static_row_continuous() { clogger.trace("csm {}: set static row continuous", fmt::ptr(this)); _snp->version()->partition().set_static_row_continuous(true); } else { - _read_context->cache().on_mispopulate(); + _read_context.cache().on_mispopulate(); } } inline bool cache_flat_mutation_reader::can_populate() const { - return _snp->at_latest_version() && _read_context->cache().phase_of(_read_context->key()) == _read_context->phase(); + return _snp->at_latest_version() && _read_context.cache().phase_of(_read_context.key()) == _read_context.phase(); } } // namespace cache +// pass a reference to ctx to cache_flat_mutation_reader +// keeping its ownership at caller's. inline flat_mutation_reader make_cache_flat_mutation_reader(schema_ptr s, dht::decorated_key dk, query::clustering_key_filter_ranges crr, row_cache& cache, - lw_shared_ptr ctx, + cache::read_context& ctx, partition_snapshot_ptr snp) { return make_flat_mutation_reader( - std::move(s), std::move(dk), std::move(crr), std::move(ctx), std::move(snp), cache); + std::move(s), std::move(dk), std::move(crr), ctx, std::move(snp), cache); +} + +// transfer ownership of ctx to cache_flat_mutation_reader +inline flat_mutation_reader make_cache_flat_mutation_reader(schema_ptr s, + dht::decorated_key dk, + query::clustering_key_filter_ranges crr, + row_cache& cache, + std::unique_ptr unique_ctx, + partition_snapshot_ptr snp) +{ + return make_flat_mutation_reader( + std::move(s), std::move(dk), std::move(crr), std::move(unique_ctx), std::move(snp), cache); } diff --git a/database.cc b/database.cc index 7dfd36b47c..cfdbc57176 100644 --- a/database.cc +++ b/database.cc @@ -720,9 +720,6 @@ void database::set_format_by_config() { } database::~database() { - _read_concurrency_sem.clear_inactive_reads(); - _streaming_concurrency_sem.clear_inactive_reads(); - _system_read_concurrency_sem.clear_inactive_reads(); } void database::update_version(const utils::UUID& version) { @@ -945,7 +942,7 @@ bool database::update_column_family(schema_ptr new_schema) { future<> database::remove(const column_family& cf) noexcept { auto s = cf.schema(); auto& ks = find_keyspace(s->ks_name()); - _querier_cache.evict_all_for_table(s->id()); + co_await _querier_cache.evict_all_for_table(s->id()); _column_families.erase(s->id()); ks.metadata()->remove_column_family(s); _ks_cf_to_uuid.erase(std::make_pair(s->ks_name(), s->cf_name())); @@ -956,7 +953,6 @@ future<> database::remove(const column_family& cf) noexcept { // Drop view mutations received after base table drop. } } - co_return; } future<> database::drop_column_family(const sstring& ks_name, const sstring& cf_name, timestamp_func tsf, bool snapshot) { @@ -2015,6 +2011,14 @@ database::stop() { return _user_sstables_manager->close(); }).then([this] { return _system_sstables_manager->close(); + }).finally([this] { + return when_all_succeed( + _read_concurrency_sem.stop(), + _streaming_concurrency_sem.stop(), + _compaction_concurrency_sem.stop(), + _system_read_concurrency_sem.stop()).discard_result().finally([this] { + return _querier_cache.stop(); + }); }); } @@ -2259,11 +2263,11 @@ flat_mutation_reader make_multishard_streaming_reader(distributed& db, return cf.make_streaming_reader(std::move(schema), *_contexts[shard].range, slice, fwd_mr); } - virtual void destroy_reader(shard_id shard, future reader_fut) noexcept override { - // Move to the background. - (void)reader_fut.then([this, zis = shared_from_this(), shard] (stopped_reader&& reader) mutable { + virtual future<> destroy_reader(shard_id shard, future reader_fut) noexcept override { + return reader_fut.then([this, zis = shared_from_this(), shard] (stopped_reader&& reader) mutable { return smp::submit_to(shard, [ctx = std::move(_contexts[shard]), handle = std::move(reader.handle)] () mutable { - ctx.semaphore->unregister_inactive_read(std::move(*handle)); + auto reader_opt = ctx.semaphore->unregister_inactive_read(std::move(*handle)); + return reader_opt ? reader_opt->close() : make_ready_future<>(); }); }).handle_exception([shard] (std::exception_ptr e) { dblog.warn("Failed to destroy shard reader of streaming multishard reader on shard {}: {}", shard, e); diff --git a/db/size_estimates_virtual_reader.cc b/db/size_estimates_virtual_reader.cc index 0d4d78c1ed..aa73548630 100644 --- a/db/size_estimates_virtual_reader.cc +++ b/db/size_estimates_virtual_reader.cc @@ -255,10 +255,18 @@ future<> size_estimates_mutation_reader::get_next_partition() { ++_current_partition; std::vector ms; ms.emplace_back(std::move(mutations)); - _partition_reader = flat_mutation_reader_from_mutations(_permit, std::move(ms), _fwd); + auto reader = flat_mutation_reader_from_mutations(_permit, std::move(ms), _fwd); + auto close_partition_reader = _partition_reader ? _partition_reader->close() : make_ready_future<>(); + return close_partition_reader.then([this, reader = std::move(reader)] () mutable { + _partition_reader = std::move(reader); + }); }); } +future<> size_estimates_mutation_reader::close_partition_reader() noexcept { + return _partition_reader ? _partition_reader->close() : make_ready_future<>(); +} + future<> size_estimates_mutation_reader::fill_buffer(db::timeout_clock::time_point timeout) { return do_until([this, timeout] { return is_end_of_stream() || is_buffer_full(); }, [this, timeout] { if (!_partition_reader) { @@ -269,8 +277,9 @@ future<> size_estimates_mutation_reader::fill_buffer(db::timeout_clock::time_poi return stop_iteration(is_buffer_full()); }, timeout).then([this] { if (_partition_reader->is_end_of_stream() && _partition_reader->is_buffer_empty()) { - _partition_reader = std::nullopt; + return _partition_reader->close(); } + return make_ready_future<>(); }); }); } @@ -278,7 +287,7 @@ future<> size_estimates_mutation_reader::fill_buffer(db::timeout_clock::time_poi future<> size_estimates_mutation_reader::next_partition() { clear_buffer_to_next_partition(); if (is_buffer_empty()) { - _partition_reader = std::nullopt; + return close_partition_reader(); } return make_ready_future<>(); } @@ -287,9 +296,8 @@ future<> size_estimates_mutation_reader::fast_forward_to(const dht::partition_ra clear_buffer(); _prange = ≺ _keyspaces = std::nullopt; - _partition_reader = std::nullopt; _end_of_stream = false; - return make_ready_future<>(); + return close_partition_reader(); } future<> size_estimates_mutation_reader::fast_forward_to(position_range pr, db::timeout_clock::time_point timeout) { @@ -301,6 +309,10 @@ future<> size_estimates_mutation_reader::fast_forward_to(position_range pr, db:: return make_ready_future<>(); } +future<> size_estimates_mutation_reader::close() noexcept { + return close_partition_reader(); +} + std::vector size_estimates_mutation_reader::estimates_for_current_keyspace(std::vector local_ranges) const { // For each specified range, estimate (crudely) mean partition size and partitions count. diff --git a/db/size_estimates_virtual_reader.hh b/db/size_estimates_virtual_reader.hh index 0656f7934b..6a7be3ef9b 100644 --- a/db/size_estimates_virtual_reader.hh +++ b/db/size_estimates_virtual_reader.hh @@ -51,8 +51,10 @@ public: virtual future<> next_partition() override; virtual future<> fast_forward_to(const dht::partition_range&, db::timeout_clock::time_point) override; virtual future<> fast_forward_to(position_range, db::timeout_clock::time_point) override; + virtual future<> close() noexcept override; private: future<> get_next_partition(); + future<> close_partition_reader() noexcept; std::vector estimates_for_current_keyspace(std::vector local_ranges) const; diff --git a/db/view/build_progress_virtual_reader.hh b/db/view/build_progress_virtual_reader.hh index 1fce9bf3c5..d678cd49b6 100644 --- a/db/view/build_progress_virtual_reader.hh +++ b/db/view/build_progress_virtual_reader.hh @@ -182,6 +182,10 @@ class build_progress_virtual_reader { _end_of_stream = false; return _underlying.fast_forward_to(std::move(range), timeout); } + + virtual future<> close() noexcept override { + return _underlying.close(); + } }; public: diff --git a/db/view/view.cc b/db/view/view.cc index 45d95f68b3..0593a51fe8 100644 --- a/db/view/view.cc +++ b/db/view/view.cc @@ -854,6 +854,10 @@ public: future> build(); + future<> close() noexcept { + return when_all_succeed(_updates.close(), _existings->close()).discard_result(); + } + private: void generate_update(clustering_row&& update, std::optional&& existing); future on_results(); @@ -1039,7 +1043,9 @@ future> generate_view_updates( })); auto builder = std::make_unique(base, std::move(vs), std::move(updates), std::move(existings), now); auto f = builder->build(); - return f.finally([builder = std::move(builder)] { }); + return f.finally([builder = std::move(builder)] { + return builder->close(); + }); } query::clustering_row_ranges calculate_affected_clustering_ranges(const schema& base, @@ -1405,6 +1411,10 @@ future<> view_builder::stop() { // ignored }).handle_exception_type([] (const semaphore_timed_out&) { // ignored + }).finally([this] { + return parallel_for_each(_base_to_build_step, [] (std::pair& p) { + return p.second.reader.close(); + }); }); }); } @@ -1435,7 +1445,8 @@ view_builder::build_step& view_builder::get_or_create_build_step(utils::UUID bas return it->second; } -void view_builder::initialize_reader_at_current_token(build_step& step) { +future<> view_builder::initialize_reader_at_current_token(build_step& step) { + return step.reader.close().then([this, &step] { step.pslice = make_partition_slice(*step.base->schema()); step.prange = dht::partition_range(dht::ring_position::starting_at(step.current_token()), dht::ring_position::max()); step.reader = step.base->get_sstable_set().make_local_shard_sstable_reader( @@ -1447,6 +1458,7 @@ void view_builder::initialize_reader_at_current_token(build_step& step) { nullptr, streamed_mutation::forwarding::no, mutation_reader::forwarding::no); + }); } void view_builder::load_view_status(view_builder::view_build_status status, std::unordered_set& loaded_views) { @@ -1633,13 +1645,12 @@ future<> view_builder::calculate_shard_build_step(view_builder_init_state& vbi) vbi.bookkeeping_ops.push_back(add_new_view(view, get_or_create_build_step(view->view_info()->base_id()))); } - for (auto& [_, build_step] : _base_to_build_step) { - initialize_reader_at_current_token(build_step); - } - - auto f = seastar::when_all_succeed(vbi.bookkeeping_ops.begin(), vbi.bookkeeping_ops.end()); - return f.handle_exception([this] (std::exception_ptr ep) { - vlogger.warn("Failed to update materialized view bookkeeping while synchronizing view builds on all shards ({}), continuing anyway.", ep); + return parallel_for_each(_base_to_build_step, [this] (auto& p) { + return initialize_reader_at_current_token(p.second); + }).then([this, &vbi] { + return seastar::when_all_succeed(vbi.bookkeeping_ops.begin(), vbi.bookkeeping_ops.end()).handle_exception([this] (std::exception_ptr ep) { + vlogger.warn("Failed to update materialized view bookkeeping while synchronizing view builds on all shards ({}), continuing anyway.", ep); + }); }); } @@ -1677,7 +1688,7 @@ void view_builder::on_create_view(const sstring& ks_name, const sstring& view_na // being built to receive duplicate updates, but it simplifies things as we don't have // to keep around a list of new views to build the next time the reader crosses a token // threshold. - initialize_reader_at_current_token(step); + return initialize_reader_at_current_token(step).then([this, view, &step] () mutable { return add_new_view(view, step).then_wrapped([this, view] (future<>&& f) { if (f.failed()) { vlogger.error("Error setting up view for building {}.{}: {}", view->ks_name(), view->cf_name(), f.get_exception()); @@ -1685,6 +1696,7 @@ void view_builder::on_create_view(const sstring& ks_name, const sstring& view_na // Waited on indirectly in stop(). (void)_build_step.trigger(); }); + }); }); }).handle_exception_type([] (no_such_column_family&) { }); } @@ -1760,10 +1772,13 @@ future<> view_builder::do_build_step() { auto base = _current_step->second.base->schema(); vlogger.warn("Error executing build step for base {}.{}: {}", base->ks_name(), base->cf_name(), std::current_exception()); r.retry(_as).get(); - initialize_reader_at_current_token(_current_step->second); + initialize_reader_at_current_token(_current_step->second).get(); } if (_current_step->second.build_status.empty()) { + auto base = _current_step->second.base->schema(); + auto reader = std::move(_current_step->second.reader); _current_step = _base_to_build_step.erase(_current_step); + reader.close().get(); } else { ++_current_step; } @@ -1930,6 +1945,7 @@ public: return stop_iteration(_step.build_status.empty()); } + // Must be called in a seastar thread. built_views consume_end_of_stream() { inject_failure("view_builder_consume_end_of_stream"); if (vlogger.is_enabled(log_level::debug)) { @@ -1945,7 +1961,7 @@ public: for (auto&& vs : _step.build_status) { vs.next_token = dht::minimum_token(); } - _builder.initialize_reader_at_current_token(_step); + _builder.initialize_reader_at_current_token(_step).get(); check_for_built_views(); } return std::move(_built_views); diff --git a/db/view/view_builder.hh b/db/view/view_builder.hh index a74150f625..f4d3d39f44 100644 --- a/db/view/view_builder.hh +++ b/db/view/view_builder.hh @@ -216,7 +216,7 @@ public: private: build_step& get_or_create_build_step(utils::UUID); - void initialize_reader_at_current_token(build_step&); + future<> initialize_reader_at_current_token(build_step&); void load_view_status(view_build_status, std::unordered_set&); void reshard(std::vector>, std::unordered_set&); void setup_shard_build_step(view_builder_init_state& vbi, std::vector, std::vector); diff --git a/db/view/view_update_generator.cc b/db/view/view_update_generator.cc index 703d8ce454..87fe71da3b 100644 --- a/db/view/view_update_generator.cc +++ b/db/view/view_update_generator.cc @@ -96,6 +96,7 @@ future<> view_update_generator::start() { inject_failure("view_update_generator_consume_staging_sstable"); auto result = staging_sstable_reader.consume_in_thread(view_updating_consumer(s, std::move(permit), *t, sstables, _as, staging_sstable_reader_handle), db::no_timeout); + staging_sstable_reader.close().get(); if (result == stop_iteration::yes) { break; } diff --git a/flat_mutation_reader.cc b/flat_mutation_reader.cc index f8c4fa04be..90a9d51ec5 100644 --- a/flat_mutation_reader.cc +++ b/flat_mutation_reader.cc @@ -31,9 +31,34 @@ #include #include #include "utils/exceptions.hh" +#include logging::logger fmr_logger("flat_mutation_reader"); +flat_mutation_reader& flat_mutation_reader::operator=(flat_mutation_reader&& o) noexcept { + if (_impl) { + impl* ip = _impl.get(); + // Abort to enforce calling close() before readers are closed + // to prevent leaks and potential use-after-free due to background + // tasks left behind. + on_internal_error_noexcept(fmr_logger, format("{} [{}]: permit {}: was not closed before overwritten by move-assign", typeid(*ip).name(), fmt::ptr(ip), ip->_permit.description())); + abort(); + } + _impl = std::move(o._impl); + return *this; +} + +flat_mutation_reader::~flat_mutation_reader() { + if (_impl) { + impl* ip = _impl.get(); + // Abort to enforce calling close() before readers are closed + // to prevent leaks and potential use-after-free due to background + // tasks left behind. + on_internal_error_noexcept(fmr_logger, format("{} [{}]: permit {}: was not closed before destruction", typeid(*ip).name(), fmt::ptr(ip), ip->_permit.description())); + abort(); + } +} + static size_t compute_buffer_size(const schema& s, const flat_mutation_reader::tracked_buffer& buffer) { return boost::accumulate( @@ -189,6 +214,11 @@ flat_mutation_reader make_reversing_reader(flat_mutation_reader& original, query virtual future<> fast_forward_to(position_range, db::timeout_clock::time_point) override { return make_exception_future<>(make_backtraced_exception_ptr()); } + + virtual future<> close() noexcept override { + // we don't own _source therefore do not close it + return make_ready_future<>(); + } }; return make_flat_mutation_reader(original, max_size); @@ -213,12 +243,8 @@ future flat_mutation_reader::impl::fill_buffer_from(Source& source, db::ti template future flat_mutation_reader::impl::fill_buffer_from(flat_mutation_reader&, db::timeout_clock::time_point); -flat_mutation_reader& to_reference(reference_wrapper& wrapper) { - return wrapper.get(); -} - flat_mutation_reader make_delegating_reader(flat_mutation_reader& r) { - return make_flat_mutation_reader>>(ref(r)); + return make_flat_mutation_reader(r); } flat_mutation_reader make_forwardable(flat_mutation_reader m) { @@ -298,6 +324,9 @@ flat_mutation_reader make_forwardable(flat_mutation_reader m) { }; return _underlying.fast_forward_to(pr, timeout); } + virtual future<> close() noexcept override { + return _underlying.close(); + } }; return make_flat_mutation_reader(std::move(m)); } @@ -361,6 +390,9 @@ flat_mutation_reader make_nonforwardable(flat_mutation_reader r, bool single_par clear_buffer(); return _underlying.fast_forward_to(pr, timeout); } + virtual future<> close() noexcept override { + return _underlying.close(); + } }; return make_flat_mutation_reader(std::move(r), single_partition); } @@ -372,6 +404,7 @@ public: virtual future<> next_partition() override { return make_ready_future<>(); } virtual future<> fast_forward_to(const dht::partition_range& pr, db::timeout_clock::time_point timeout) override { return make_ready_future<>(); }; virtual future<> fast_forward_to(position_range cr, db::timeout_clock::time_point timeout) override { return make_ready_future<>(); }; + virtual future<> close() noexcept override { return make_ready_future<>(); } }; flat_mutation_reader make_empty_flat_reader(schema_ptr s, reader_permit permit) { @@ -588,6 +621,9 @@ flat_mutation_reader_from_mutations(reader_permit permit, std::vector virtual future<> fast_forward_to(position_range cr, db::timeout_clock::time_point timeout) override { throw std::runtime_error("This reader can't be fast forwarded to another position."); }; + virtual future<> close() noexcept override { + return make_ready_future<>(); + }; }; assert(!mutations.empty()); schema_ptr s = mutations[0].schema(); @@ -664,6 +700,9 @@ public: } return make_ready_future<>(); } + virtual future<> close() noexcept override { + return _reader ? _reader->close() : make_ready_future<>(); + } }; template @@ -732,6 +771,10 @@ public: } return make_ready_future<>(); } + + virtual future<> close() noexcept override { + return _reader.close(); + } }; flat_mutation_reader @@ -874,6 +917,9 @@ make_flat_mutation_reader_from_fragments(schema_ptr schema, reader_permit permit do_fast_forward_to(pr); return make_ready_future<>(); } + virtual future<> close() noexcept override { + return make_ready_future<>(); + } }; return make_flat_mutation_reader(std::move(schema), std::move(permit), std::move(fragments), pr); } @@ -940,6 +986,9 @@ public: virtual future<> fast_forward_to(position_range, db::timeout_clock::time_point) override { return make_exception_future<>(make_backtraced_exception_ptr()); } + virtual future<> close() noexcept override { + return make_ready_future<>(); + } }; flat_mutation_reader make_generating_reader(schema_ptr s, reader_permit permit, std::function ()> get_next_fragment) { @@ -950,6 +999,12 @@ void flat_mutation_reader::do_upgrade_schema(const schema_ptr& s) { *this = transform(std::move(*this), schema_upgrader(s)); } +void flat_mutation_reader::on_close_error(std::unique_ptr i, std::exception_ptr ep) noexcept { + impl* ip = i.get(); + on_internal_error_noexcept(fmr_logger, + format("Failed to close {} [{}]: permit {}: {}", typeid(*ip).name(), fmt::ptr(ip), ip->_permit.description(), ep)); +} + invalid_mutation_fragment_stream::invalid_mutation_fragment_stream(std::runtime_error e) : std::runtime_error(std::move(e)) { } diff --git a/flat_mutation_reader.hh b/flat_mutation_reader.hh index 346d35e85f..9d15f63e42 100644 --- a/flat_mutation_reader.hh +++ b/flat_mutation_reader.hh @@ -275,6 +275,16 @@ public: virtual future<> fast_forward_to(const dht::partition_range&, db::timeout_clock::time_point timeout) = 0; virtual future<> fast_forward_to(position_range, db::timeout_clock::time_point timeout) = 0; + // close should cancel any outstanding background operations, + // if possible, and wait on them to complete. + // It should also transitively close underlying resources + // and wait on them too. + // + // Once closed, the reader should be unusable. + // + // Similar to destructors, close must never fail. + virtual future<> close() noexcept = 0; + size_t buffer_size() const { return _buffer_size; } @@ -304,12 +314,20 @@ private: explicit operator bool() const noexcept { return bool(_impl); } friend class optimized_optional; void do_upgrade_schema(const schema_ptr&); + static void on_close_error(std::unique_ptr, std::exception_ptr ep) noexcept; public: // Documented in mutation_reader::forwarding. class partition_range_forwarding_tag; using partition_range_forwarding = bool_class; flat_mutation_reader(std::unique_ptr impl) noexcept : _impl(std::move(impl)) {} + flat_mutation_reader(const flat_mutation_reader&) = delete; + flat_mutation_reader(flat_mutation_reader&&) = default; + + flat_mutation_reader& operator=(const flat_mutation_reader&) = delete; + flat_mutation_reader& operator=(flat_mutation_reader&& o) noexcept; + + ~flat_mutation_reader(); future operator()(db::timeout_clock::time_point timeout) { return _impl->operator()(timeout); @@ -442,6 +460,26 @@ public: future<> fast_forward_to(position_range cr, db::timeout_clock::time_point timeout) { return _impl->fast_forward_to(std::move(cr), timeout); } + // Closes the reader. + // + // Note: The reader object can can be safely destroyed after close returns. + // since close makes sure to keep the underlying impl object alive until + // the latter's close call is resolved. + future<> close() noexcept { + if (auto i = std::move(_impl)) { + auto f = i->close(); + // most close implementations are expexcted to return a ready future + // so expedite prcessing it. + if (f.available() && !f.failed()) { + return std::move(f); + } + // close must not fail + return f.handle_exception([i = std::move(i)] (std::exception_ptr ep) mutable { + on_close_error(std::move(i), std::move(ep)); + }); + } + return make_ready_future<>(); + } bool is_end_of_stream() const { return _impl->is_end_of_stream(); } bool is_buffer_empty() const { return _impl->is_buffer_empty(); } bool is_buffer_full() const { return _impl->is_buffer_full(); } @@ -605,46 +643,64 @@ flat_mutation_reader transform(flat_mutation_reader r, T t) { _end_of_stream = false; return _reader.fast_forward_to(std::move(pr), timeout); } + virtual future<> close() noexcept override { + return _reader.close(); + } }; return make_flat_mutation_reader(std::move(r), std::move(t)); } -inline flat_mutation_reader& to_reference(flat_mutation_reader& r) { return r; } -inline const flat_mutation_reader& to_reference(const flat_mutation_reader& r) { return r; } - -template class delegating_reader : public flat_mutation_reader::impl { - Underlying _underlying; + flat_mutation_reader_opt _underlying_holder; + flat_mutation_reader* _underlying; public: - delegating_reader(Underlying&& r) : impl(to_reference(r).schema(), to_reference(r).permit()), _underlying(std::forward(r)) { } + // when passed a lvalue reference to the reader + // we don't own it and the caller is responsible + // for evenetually closing the reader. + delegating_reader(flat_mutation_reader& r) + : impl(r.schema(), r.permit()) + , _underlying_holder() + , _underlying(&r) + { } + // when passed a rvalue reference to the reader + // we assume ownership of it and will close it + // in close(). + delegating_reader(flat_mutation_reader&& r) + : impl(r.schema(), r.permit()) + , _underlying_holder(std::move(r)) + , _underlying(&*_underlying_holder) + { } virtual future<> fill_buffer(db::timeout_clock::time_point timeout) override { if (is_buffer_full()) { return make_ready_future<>(); } - return to_reference(_underlying).fill_buffer(timeout).then([this] { - _end_of_stream = to_reference(_underlying).is_end_of_stream(); - to_reference(_underlying).move_buffer_content_to(*this); + return _underlying->fill_buffer(timeout).then([this] { + _end_of_stream = _underlying->is_end_of_stream(); + _underlying->move_buffer_content_to(*this); }); } virtual future<> fast_forward_to(position_range pr, db::timeout_clock::time_point timeout) override { _end_of_stream = false; forward_buffer_to(pr.start()); - return to_reference(_underlying).fast_forward_to(std::move(pr), timeout); + return _underlying->fast_forward_to(std::move(pr), timeout); } virtual future<> next_partition() override { clear_buffer_to_next_partition(); auto maybe_next_partition = make_ready_future<>(); if (is_buffer_empty()) { - maybe_next_partition = to_reference(_underlying).next_partition(); + maybe_next_partition = _underlying->next_partition(); } return maybe_next_partition.then([this] { - _end_of_stream = to_reference(_underlying).is_end_of_stream() && to_reference(_underlying).is_buffer_empty(); + _end_of_stream = _underlying->is_end_of_stream() && _underlying->is_buffer_empty(); }); } virtual future<> fast_forward_to(const dht::partition_range& pr, db::timeout_clock::time_point timeout) override { _end_of_stream = false; clear_buffer(); - return to_reference(_underlying).fast_forward_to(pr, timeout); + return _underlying->fast_forward_to(pr, timeout); + } + virtual future<> close() noexcept override { + return _underlying_holder ? _underlying_holder->close() : make_ready_future<>(); } }; flat_mutation_reader make_delegating_reader(flat_mutation_reader&); diff --git a/frozen_mutation.cc b/frozen_mutation.cc index 9796ced670..b52d9b9e23 100644 --- a/frozen_mutation.cc +++ b/frozen_mutation.cc @@ -19,6 +19,8 @@ * along with Scylla. If not, see . */ +#include + #include "frozen_mutation.hh" #include "mutation_partition.hh" #include "mutation.hh" @@ -268,8 +270,9 @@ public: future<> fragment_and_freeze(flat_mutation_reader mr, frozen_mutation_consumer_fn c, size_t fragment_size) { + return with_closeable(std::move(mr), [c = std::move(c), fragment_size] (flat_mutation_reader& mr) mutable { fragmenting_mutation_freezer freezer(*mr.schema(), c, fragment_size); - return do_with(std::move(mr), std::move(freezer), [] (auto& mr, auto& freezer) { + return do_with(std::move(freezer), [&mr] (auto& freezer) { return repeat([&] { return mr(db::no_timeout).then([&] (auto mfopt) { if (!mfopt) { @@ -279,4 +282,5 @@ future<> fragment_and_freeze(flat_mutation_reader mr, frozen_mutation_consumer_f }); }); }); + }); } diff --git a/index/built_indexes_virtual_reader.hh b/index/built_indexes_virtual_reader.hh index 340ddb8929..e15df04131 100644 --- a/index/built_indexes_virtual_reader.hh +++ b/index/built_indexes_virtual_reader.hh @@ -112,6 +112,10 @@ class built_indexes_virtual_reader { _end_of_stream = false; return _underlying.fast_forward_to(std::move(range), timeout); } + + virtual future<> close() noexcept override { + return _underlying.close(); + } }; public: diff --git a/memtable.cc b/memtable.cc index bccf9a42c7..5ecd019ab1 100644 --- a/memtable.cc +++ b/memtable.cc @@ -19,6 +19,8 @@ * along with Scylla. If not, see . */ +#include + #include "memtable.hh" #include "database.hh" #include "frozen_mutation.hh" @@ -400,12 +402,17 @@ class scanning_reader final : public flat_mutation_reader::impl, private iterato if (_delegate_range) { _end_of_stream = true; } else { - _delegate = { }; + return close_delegate(); } } + return make_ready_future<>(); }); } + future<> close_delegate() noexcept { + return _delegate ? _delegate->close() : make_ready_future<>(); + }; + public: scanning_reader(schema_ptr s, lw_shared_ptr m, @@ -464,7 +471,7 @@ public: clear_buffer_to_next_partition(); if (is_buffer_empty()) { if (!_delegate_range) { - _delegate = {}; + return close_delegate(); } else { return _delegate->next_partition(); } @@ -477,13 +484,17 @@ public: if (_delegate_range) { return _delegate->fast_forward_to(pr, timeout); } else { - _delegate = {}; + return close_delegate().then([this, &pr, timeout] { return iterator_reader::fast_forward_to(pr, timeout); + }); } } virtual future<> fast_forward_to(position_range cr, db::timeout_clock::time_point timeout) override { throw std::runtime_error("This reader can't be fast forwarded to another partition."); }; + virtual future<> close() noexcept override { + return close_delegate(); + }; }; void memtable::add_flushed_memory(uint64_t delta) { @@ -602,6 +613,9 @@ private: _partition_reader = std::move(mpsr); } } + future<> close_partition_reader() noexcept { + return _partition_reader ? _partition_reader->close() : make_ready_future<>(); + } public: virtual future<> fill_buffer(db::timeout_clock::time_point timeout) override { return do_until([this] { return is_end_of_stream() || is_buffer_full(); }, [this, timeout] { @@ -617,15 +631,16 @@ public: return stop_iteration(is_buffer_full()); }, timeout).then([this] { if (_partition_reader->is_end_of_stream() && _partition_reader->is_buffer_empty()) { - _partition_reader = std::nullopt; + return _partition_reader->close(); } + return make_ready_future<>(); }); }); } virtual future<> next_partition() override { clear_buffer_to_next_partition(); if (is_buffer_empty()) { - _partition_reader = std::nullopt; + return close_partition_reader(); } return make_ready_future<>(); } @@ -635,6 +650,9 @@ public: virtual future<> fast_forward_to(position_range, db::timeout_clock::time_point timeout) override { return make_exception_future<>(make_backtraced_exception_ptr()); } + virtual future<> close() noexcept override { + return close_partition_reader(); + } }; partition_snapshot_ptr memtable_entry::snapshot(memtable& mtbl) { @@ -705,7 +723,7 @@ memtable::update(db::rp_handle&& h) { future<> memtable::apply(memtable& mt, reader_permit permit) { - return do_with(mt.make_flat_reader(_schema, std::move(permit)), [this] (auto&& rd) mutable { + return with_closeable(mt.make_flat_reader(_schema, std::move(permit)), [this] (auto&& rd) mutable { return consume_partitions(rd, [self = this->shared_from_this(), &rd] (mutation&& m) { self->apply(m); return stop_iteration::no; diff --git a/multishard_mutation_query.cc b/multishard_mutation_query.cc index 690c6da1cc..9482d44741 100644 --- a/multishard_mutation_query.cc +++ b/multishard_mutation_query.cc @@ -247,7 +247,7 @@ public: tracing::trace_state_ptr trace_state, mutation_reader::forwarding fwd_mr) override; - virtual void destroy_reader(shard_id shard, future reader_fut) noexcept override; + virtual future<> destroy_reader(shard_id shard, future reader_fut) noexcept override; virtual reader_concurrency_semaphore& semaphore() override { const auto shard = this_shard_id(); @@ -323,9 +323,9 @@ flat_mutation_reader read_context::create_reader( std::move(trace_state), streamed_mutation::forwarding::no, fwd_mr); } -void read_context::destroy_reader(shard_id shard, future reader_fut) noexcept { +future<> read_context::destroy_reader(shard_id shard, future reader_fut) noexcept { // Future is waited on indirectly in `stop()` (via `_dismantling_gate`). - (void)with_gate(_dismantling_gate, [this, shard, reader_fut = std::move(reader_fut)] () mutable { + return with_gate(_dismantling_gate, [this, shard, reader_fut = std::move(reader_fut)] () mutable { return reader_fut.then_wrapped([this, shard] (future&& reader_fut) { auto& rm = _readers[shard]; @@ -353,23 +353,18 @@ void read_context::destroy_reader(shard_id shard, future reader_ } future<> read_context::stop() { - auto pr = promise<>(); - auto fut = pr.get_future(); auto gate_fut = _dismantling_gate.is_closed() ? make_ready_future<>() : _dismantling_gate.close(); - // Forwarded to `fut`. - (void)gate_fut.then([this] { - for (shard_id shard = 0; shard != smp::count; ++shard) { - if (_readers[shard].state == reader_state::saving) { - // Move to the background. - (void)_db.invoke_on(shard, [rm = std::move(_readers[shard])] (database& db) mutable { - rm.rparts->permit.semaphore().unregister_inactive_read(std::move(*rm.handle)); + return gate_fut.then([this] { + return parallel_for_each(smp::all_cpus(), [this] (unsigned shard) { + if (_readers[shard].handle && *_readers[shard].handle) { + return _db.invoke_on(shard, [rm = std::move(_readers[shard])] (database& db) mutable { + auto reader_opt = rm.rparts->permit.semaphore().unregister_inactive_read(std::move(*rm.handle)); + return reader_opt ? reader_opt->close() : make_ready_future<>(); }); } - } - }).finally([pr = std::move(pr)] () mutable { - pr.set_value(); + return make_ready_future<>(); + }); }); - return fut; } read_context::dismantle_buffer_stats read_context::dismantle_combined_buffer(flat_mutation_reader::tracked_buffer combined_buffer, @@ -592,9 +587,6 @@ future<> read_context::save_readers(flat_mutation_reader::tracked_buffer unconsu namespace { -template -using consume_result = std::tuple, ResultType>; - template using compact_for_result_state = compact_for_query_state; @@ -605,12 +597,13 @@ struct page_consume_result { flat_mutation_reader::tracked_buffer unconsumed_fragments; lw_shared_ptr> compaction_state; - page_consume_result(consume_result&& result, flat_mutation_reader::tracked_buffer&& unconsumed_fragments, - lw_shared_ptr>&& compaction_state) - : last_ckey(std::get>(std::move(result))) - , result(std::get(std::move(result))) + page_consume_result(std::optional&& ckey, typename ResultBuilder::result_type&& result, flat_mutation_reader::tracked_buffer&& unconsumed_fragments, + lw_shared_ptr>&& compaction_state) noexcept + : last_ckey(std::move(ckey)) + , result(std::move(result)) , unconsumed_fragments(std::move(unconsumed_fragments)) , compaction_state(std::move(compaction_state)) { + static_assert(std::is_nothrow_move_constructible_v); } }; @@ -635,15 +628,25 @@ future> read_page( mutation_reader::forwarding fwd_mr) { return make_multishard_combining_reader(ctx, std::move(s), std::move(permit), pr, ps, pc, std::move(trace_state), fwd_mr); }); - auto reader = make_flat_multi_range_reader(s, ctx->permit(), std::move(ms), ranges, - cmd.slice, service::get_local_sstable_query_read_priority(), trace_state, mutation_reader::forwarding::no); - auto compaction_state = make_lw_shared>(*s, cmd.timestamp, cmd.slice, cmd.get_row_limit(), cmd.partition_limit); - auto result = co_await query::consume_page(reader, compaction_state, cmd.slice, std::move(result_builder), cmd.get_row_limit(), - cmd.partition_limit, cmd.timestamp, timeout, *cmd.max_result_size); - co_return page_consume_result(std::move(result), reader.detach_buffer(), std::move(compaction_state)); + auto reader = make_flat_multi_range_reader(s, ctx->permit(), std::move(ms), ranges, + cmd.slice, service::get_local_sstable_query_read_priority(), trace_state, mutation_reader::forwarding::no); + + std::exception_ptr ex; + try { + auto [ckey, result] = co_await query::consume_page(reader, compaction_state, cmd.slice, std::move(result_builder), cmd.get_row_limit(), + cmd.partition_limit, cmd.timestamp, timeout, *cmd.max_result_size); + auto buffer = reader.detach_buffer(); + co_await reader.close(); + // page_consume_result cannot fail so there's no risk of double-closing reader. + co_return page_consume_result(std::move(ckey), std::move(result), std::move(buffer), std::move(compaction_state)); + } catch (...) { + ex = std::current_exception(); + } + co_await reader.close(); + std::rethrow_exception(std::move(ex)); } template diff --git a/mutation_partition.cc b/mutation_partition.cc index 0c1e10e29c..04c91e2f69 100644 --- a/mutation_partition.cc +++ b/mutation_partition.cc @@ -1926,7 +1926,7 @@ uint64_t mutation_querier::consume_end_of_stream() { } } -query_result_builder::query_result_builder(const schema& s, query::result::builder& rb) +query_result_builder::query_result_builder(const schema& s, query::result::builder& rb) noexcept : _schema(s), _rb(rb) { } @@ -2297,7 +2297,9 @@ future counter_write_query(schema_ptr s, const mutation_source& so auto cfq = make_stable_flattened_mutations_consumer>( *s, gc_clock::now(), slice, query::max_rows, query::max_partitions, std::move(cwqrb)); auto f = r_a_r->reader.consume(std::move(cfq), timeout); - return f.finally([r_a_r = std::move(r_a_r)] { }); + return f.finally([r_a_r = std::move(r_a_r)] { + return r_a_r->reader.close(); + }); } mutation_cleaner_impl::~mutation_cleaner_impl() { diff --git a/mutation_reader.cc b/mutation_reader.cc index c4f330d2e2..71176bf212 100644 --- a/mutation_reader.cc +++ b/mutation_reader.cc @@ -24,9 +24,11 @@ #include #include -#include "mutation_reader.hh" #include #include +#include + +#include "mutation_reader.hh" #include "flat_mutation_reader.hh" #include "schema_registry.hh" #include "mutation_compactor.hh" @@ -144,6 +146,10 @@ public: future<> fast_forward_to(position_range pr, db::timeout_clock::time_point timeout) { return _producer.fast_forward_to(std::move(pr), timeout); } + + future<> close() noexcept { + return _producer.close(); + } }; // Merges the output of the sub-readers into a single non-decreasing @@ -245,6 +251,7 @@ public: future<> next_partition(); future<> fast_forward_to(const dht::partition_range& pr, db::timeout_clock::time_point timeout); future<> fast_forward_to(position_range pr, db::timeout_clock::time_point timeout); + future<> close() noexcept; }; /* Merge a non-decreasing stream of mutation fragment batches @@ -272,6 +279,7 @@ public: virtual future<> next_partition() override; virtual future<> fast_forward_to(const dht::partition_range& pr, db::timeout_clock::time_point timeout) override; virtual future<> fast_forward_to(position_range pr, db::timeout_clock::time_point timeout) override; + virtual future<> close() noexcept override; }; // Dumb selector implementation for mutation_reader_merger that simply @@ -376,6 +384,7 @@ future<> mutation_reader_merger::prepare_next(db::timeout_clock::time_point time future mutation_reader_merger::prepare_one(db::timeout_clock::time_point timeout, reader_and_last_fragment_kind rk, reader_galloping reader_galloping) { return (*rk.reader)(timeout).then([this, rk, reader_galloping] (mutation_fragment_opt mfo) { + auto to_close = make_ready_future<>(); if (mfo) { if (mfo->is_partition_start()) { _reader_heap.emplace_back(rk.reader, std::move(*mfo)); @@ -388,7 +397,7 @@ future mutation_reader_merger::prepare_one( _current.clear(); _current.push_back(std::move(*mfo)); _galloping_reader.last_kind = _current.back().mutation_fragment_kind(); - return needs_merge::no; + return make_ready_future(needs_merge::no); } _gallop_mode_hits = 0; @@ -409,10 +418,14 @@ future mutation_reader_merger::prepare_one( } else if (_fwd_mr == mutation_reader::forwarding::no) { _to_remove.splice(_to_remove.end(), _all_readers, rk.reader); if (_to_remove.size() >= 4) { - _to_remove.clear(); + auto to_remove = std::move(_to_remove); + to_close = parallel_for_each(to_remove, [] (flat_mutation_reader& r) { + return r.close(); + }); if (reader_galloping) { // Galloping reader iterator may have become invalid at this point, so - to be safe - clear it - _galloping_reader.reader = { }; + auto fut = _galloping_reader.reader->close(); + to_close = when_all_succeed(std::move(to_close), std::move(fut)).discard_result(); } } } @@ -420,7 +433,11 @@ future mutation_reader_merger::prepare_one( if (reader_galloping) { _gallop_mode_hits = 0; } + // to_close is a chain of flat_mutation_reader close futures, + // therefore it can not fail. + return to_close.then([] { return needs_merge::yes; + }); }); } @@ -589,6 +606,16 @@ future<> mutation_reader_merger::fast_forward_to(position_range pr, db::timeout_ }); } +future<> mutation_reader_merger::close() noexcept { + return parallel_for_each(std::move(_to_remove), [] (flat_mutation_reader& mr) { + return mr.close(); + }).then([this] { + return parallel_for_each(std::move(_all_readers), [] (flat_mutation_reader& mr) { + return mr.close(); + }); + }); +} + template future<> merging_reader

::fill_buffer(db::timeout_clock::time_point timeout) { return repeat([this, timeout] { @@ -641,6 +668,11 @@ future<> merging_reader

::fast_forward_to(position_range pr, db::timeout_clock return _merger.fast_forward_to(std::move(pr), timeout); } +template +future<> merging_reader

::close() noexcept { + return _merger.close(); +} + flat_mutation_reader make_combined_reader(schema_ptr schema, reader_permit permit, std::unique_ptr selector, @@ -777,6 +809,12 @@ public: return reader.fast_forward_to(std::move(pr), timeout); }, timeout); } + virtual future<> close() noexcept override { + if (auto* state = std::get_if(&_state)) { + return state->reader.close(); + } + return make_ready_future<>(); + } }; flat_mutation_reader @@ -903,8 +941,6 @@ public: foreign_unique_ptr reader, streamed_mutation::forwarding fwd_sm = streamed_mutation::forwarding::no); - ~foreign_reader(); - // this is captured. foreign_reader(const foreign_reader&) = delete; foreign_reader& operator=(const foreign_reader&) = delete; @@ -915,6 +951,7 @@ public: virtual future<> next_partition() override; virtual future<> fast_forward_to(const dht::partition_range& pr, db::timeout_clock::time_point timeout) override; virtual future<> fast_forward_to(position_range pr, db::timeout_clock::time_point timeout) override; + virtual future<> close() noexcept override; }; foreign_reader::foreign_reader(schema_ptr schema, @@ -926,20 +963,6 @@ foreign_reader::foreign_reader(schema_ptr schema, , _fwd_sm(fwd_sm) { } -foreign_reader::~foreign_reader() { - if (!_read_ahead_future && !_reader) { - return; - } - // Can't wait on this future directly. Right now we don't wait on it at all. - // If this proves problematic we can collect these somewhere and wait on them. - (void)smp::submit_to(_reader.get_owner_shard(), [reader = std::move(_reader), read_ahead_future = std::move(_read_ahead_future)] () mutable { - if (read_ahead_future) { - return read_ahead_future->finally([r = std::move(reader)] {}); - } - return make_ready_future<>(); - }); -} - future<> foreign_reader::fill_buffer(db::timeout_clock::time_point timeout) { if (_end_of_stream || is_buffer_full()) { return make_ready_future(); @@ -991,6 +1014,26 @@ future<> foreign_reader::fast_forward_to(position_range pr, db::timeout_clock::t }); } +future<> foreign_reader::close() noexcept { + if (!_reader) { + if (_read_ahead_future) { + on_internal_error(mrlog, "foreign_reader::close can't wait on read_ahead future with disengaged reader"); + } + return make_ready_future<>(); + } + return smp::submit_to(_reader.get_owner_shard(), + [reader = std::move(_reader), read_ahead_future = std::exchange(_read_ahead_future, nullptr)] () mutable { + auto read_ahead = read_ahead_future ? std::move(*read_ahead_future.get()) : make_ready_future<>(); + return read_ahead.then_wrapped([reader = std::move(reader)] (future<> f) mutable { + if (f.failed()) { + auto ex = f.get_exception(); + mrlog.warn("foreign_reader: benign read_ahead failure during close: {}. Ignoring.", ex); + } + return reader->close(); + }); + }); +} + flat_mutation_reader make_foreign_reader(schema_ptr schema, reader_permit permit, foreign_ptr> reader, @@ -1071,6 +1114,15 @@ public: virtual future<> fast_forward_to(position_range, db::timeout_clock::time_point timeout) override { throw_with_backtrace(); } + virtual future<> close() noexcept override { + if (_reader) { + return _reader->close(); + } + if (auto reader_opt = try_resume()) { + return reader_opt->close(); + } + return make_ready_future<>(); + } reader_concurrency_semaphore::inactive_read_handle inactive_read_handle() && { return std::move(_irh); } @@ -1446,7 +1498,7 @@ future<> evictable_reader::fill_buffer(db::timeout_clock::time_point timeout) { if (is_end_of_stream()) { return make_ready_future<>(); } - return do_with(resume_or_create_reader(), [this, timeout] (flat_mutation_reader& reader) mutable { + return with_closeable(resume_or_create_reader(), [this, timeout] (flat_mutation_reader& reader) mutable { return fill_buffer(reader, timeout).then([this, &reader] { _end_of_stream = reader.is_end_of_stream() && reader.is_buffer_empty(); maybe_pause(std::move(reader)); @@ -1542,7 +1594,6 @@ private: const io_priority_class& _pc; tracing::global_trace_state_ptr _trace_state; const mutation_reader::forwarding _fwd_mr; - bool _stopped = false; std::optional> _read_ahead; foreign_ptr> _reader; @@ -1576,8 +1627,6 @@ public: shard_reader(const shard_reader&) = delete; shard_reader& operator=(const shard_reader&) = delete; - void stop() noexcept; - const mutation_fragment& peek_buffer() const { return buffer().front(); } @@ -1585,6 +1634,7 @@ public: virtual future<> next_partition() override; virtual future<> fast_forward_to(const dht::partition_range& pr, db::timeout_clock::time_point timeout) override; virtual future<> fast_forward_to(position_range, db::timeout_clock::time_point timeout) override; + virtual future<> close() noexcept override; bool done() const { return _reader && is_buffer_empty() && is_end_of_stream(); } @@ -1594,18 +1644,17 @@ public: } }; -void shard_reader::stop() noexcept { +future<> shard_reader::close() noexcept { // Nothing to do if there was no reader created, nor is there a background // read ahead in progress which will create one. if (!_reader && !_read_ahead) { - return; + return make_ready_future<>(); } - _stopped = true; - auto f = _read_ahead ? *std::exchange(_read_ahead, std::nullopt) : make_ready_future<>(); - _lifecycle_policy->destroy_reader(_shard, f.then([this] { + // TODO: return future upstream as part of close() + return _lifecycle_policy->destroy_reader(_shard, f.then([this] { return smp::submit_to(_shard, [this] { auto ret = std::tuple( make_foreign(std::make_unique(std::move(*_reader).inactive_read_handle())), @@ -1772,8 +1821,6 @@ public: tracing::trace_state_ptr trace_state, mutation_reader::forwarding fwd_mr); - ~multishard_combining_reader(); - // this is captured. multishard_combining_reader(const multishard_combining_reader&) = delete; multishard_combining_reader& operator=(const multishard_combining_reader&) = delete; @@ -1784,6 +1831,7 @@ public: virtual future<> next_partition() override; virtual future<> fast_forward_to(const dht::partition_range& pr, db::timeout_clock::time_point timeout) override; virtual future<> fast_forward_to(position_range pr, db::timeout_clock::time_point timeout) override; + virtual future<> close() noexcept override; }; void multishard_combining_reader::on_partition_range_change(const dht::partition_range& pr) { @@ -1881,12 +1929,6 @@ multishard_combining_reader::multishard_combining_reader( } } -multishard_combining_reader::~multishard_combining_reader() { - for (auto& sr : _shard_readers) { - sr->stop(); - } -} - future<> multishard_combining_reader::fill_buffer(db::timeout_clock::time_point timeout) { _crossed_shards = false; return do_until([this] { return is_buffer_full() || is_end_of_stream(); }, [this, timeout] { @@ -1927,6 +1969,13 @@ future<> multishard_combining_reader::fast_forward_to(position_range pr, db::tim return make_exception_future<>(make_backtraced_exception_ptr()); } +future<> multishard_combining_reader::close() noexcept { + auto shard_readers = std::move(_shard_readers); + return parallel_for_each(shard_readers, [] (lw_shared_ptr& sr) { + return sr->close(); + }); +} + reader_concurrency_semaphore::inactive_read_handle reader_lifecycle_policy::pause(reader_concurrency_semaphore& sem, flat_mutation_reader reader) { return sem.register_inactive_read(std::move(reader)); @@ -2020,6 +2069,9 @@ public: virtual future<> fast_forward_to(position_range, db::timeout_clock::time_point) override { return make_exception_future<>(make_backtraced_exception_ptr()); } + virtual future<> close() noexcept override { + return make_ready_future<>(); + } future<> push(mutation_fragment&& mf) { push_and_maybe_notify(std::move(mf)); if (!is_buffer_full()) { @@ -2253,6 +2305,9 @@ public: virtual future<> fast_forward_to(position_range pr, db::timeout_clock::time_point timeout) override { return make_exception_future<>(make_backtraced_exception_ptr()); } + virtual future<> close() noexcept override { + return _reader.close(); + } }; } // anonymous namespace @@ -2379,6 +2434,12 @@ class clustering_order_reader_merger { return _gallop_mode_hits >= _gallop_mode_entering_threshold; } + future<> erase_reader(reader_iterator it) noexcept { + return std::move(it->reader).close().then([this, it = std::move(it)] { + _all_readers.erase(it); + }); + } + // Retrieve the next fragment from the reader pointed to by `it`. // The function assumes that we're not in galloping mode, `it` is in `_unpeeked_readers`, // and all fragments previously returned from the reader have already been returned by operator(). @@ -2401,7 +2462,7 @@ class clustering_order_reader_merger { // it makes the code simpler (to check for this here we would need additional state); it is a bit wasteful // but completely empty readers should be rare. if (_cmp(it->upper_bound, _pr_end) < 0) { - _all_readers.erase(it); + return erase_reader(std::move(it)); } else { _halted_readers.push_back(it); } @@ -2440,7 +2501,7 @@ class clustering_order_reader_merger { } if (mf->is_end_of_partition()) { - _all_readers.erase(it); + return erase_reader(std::move(it)); } else { _peeked_readers.emplace_back(it); boost::range::push_heap(_peeked_readers, _peeked_cmp); @@ -2466,6 +2527,7 @@ class clustering_order_reader_merger { // Otherwise, the reader is pushed onto _peeked_readers and we retry in non-galloping mode. future peek_galloping_reader(db::timeout_clock::time_point timeout) { return _galloping_reader->reader.peek(timeout).then([this, timeout] (mutation_fragment* mf) { + bool erase = false; if (mf) { if (mf->is_partition_start()) { on_internal_error(mrlog, format( @@ -2481,7 +2543,7 @@ class clustering_order_reader_merger { } if (mf->is_end_of_partition()) { - _all_readers.erase(_galloping_reader); + erase = true; } else { if (_reader_queue->empty(mf->position()) && (_peeked_readers.empty() @@ -2495,23 +2557,27 @@ class clustering_order_reader_merger { // or there is a yet unselected reader which possibly has a smaller position. // In either case we exit the galloping mode. - _peeked_readers.emplace_back(_galloping_reader); + _peeked_readers.emplace_back(std::move(_galloping_reader)); boost::range::push_heap(_peeked_readers, _peeked_cmp); } } else { // See comment in `peek_reader`. if (_cmp(_galloping_reader->upper_bound, _pr_end) < 0) { - _all_readers.erase(_galloping_reader); + erase = true; } else { - _halted_readers.push_back(_galloping_reader); + _halted_readers.push_back(std::move(_galloping_reader)); } } + auto maybe_erase = erase ? erase_reader(std::move(_galloping_reader)) : make_ready_future<>(); + // The galloping reader has either been removed, halted, or lost with the other readers. // Proceed with the normal path. + return maybe_erase.then([this, timeout] { _galloping_reader = {}; _gallop_mode_hits = 0; return (*this)(timeout); + }); }); } @@ -2655,6 +2721,14 @@ public: return it->reader.fast_forward_to(pr, timeout); }); } + + future<> close() noexcept { + return parallel_for_each(std::move(_all_readers), [] (reader_and_upper_bound& r) { + return r.reader.close(); + }).finally([this] { + return _reader_queue->close(); + }); + } }; flat_mutation_reader make_clustering_combined_reader(schema_ptr schema, diff --git a/mutation_reader.hh b/mutation_reader.hh index 643a08c390..c763c73350 100644 --- a/mutation_reader.hh +++ b/mutation_reader.hh @@ -121,6 +121,9 @@ public: _end_of_stream = false; return _rd.fast_forward_to(std::move(pr), timeout); } + virtual future<> close() noexcept { + return _rd.close(); + } }; // Creates a mutation_reader wrapper which creates a new stream of mutations @@ -476,7 +479,7 @@ public: /// all the readers being cleaned up is up to the implementation. /// /// This method will be called from a destructor so it cannot throw. - virtual void destroy_reader(shard_id shard, future reader) noexcept = 0; + virtual future<> destroy_reader(shard_id shard, future reader) noexcept = 0; /// Get the relevant semaphore for this read. /// @@ -665,6 +668,9 @@ public: // Return the next batch of readers from `pref(b)`. virtual std::vector pop(position_in_partition_view bound) = 0; + + // Close all readers + virtual future<> close() noexcept = 0; }; flat_mutation_reader make_clustering_combined_reader(schema_ptr schema, diff --git a/mutation_writer/feed_writers.hh b/mutation_writer/feed_writers.hh index 146ad763cd..3ddf2ac0b8 100644 --- a/mutation_writer/feed_writers.hh +++ b/mutation_writer/feed_writers.hh @@ -73,6 +73,8 @@ future<> feed_writer(flat_mutation_reader&& rd, Writer&& wr) { wr.consume_end_of_stream(); return wr.close(); } + }).finally([&rd] { + return rd.close(); }); }); } diff --git a/mutation_writer/multishard_writer.cc b/mutation_writer/multishard_writer.cc index 2cb6ba56c6..49066aa7ad 100644 --- a/mutation_writer/multishard_writer.cc +++ b/mutation_writer/multishard_writer.cc @@ -26,6 +26,7 @@ #include #include #include +#include namespace mutation_writer { @@ -41,6 +42,7 @@ public: flat_mutation_reader reader, std::function (flat_mutation_reader reader)> consumer); future<> consume(); + future<> close() noexcept; }; // The multishard_writer class gets mutation_fragments generated from @@ -75,6 +77,7 @@ public: flat_mutation_reader producer, std::function (flat_mutation_reader)> consumer); future operator()(); + future<> close() noexcept; }; shard_writer::shard_writer(schema_ptr s, @@ -94,6 +97,10 @@ future<> shard_writer::consume() { }); } +future<> shard_writer::close() noexcept { + return _reader.close(); +} + multishard_writer::multishard_writer( schema_ptr s, flat_mutation_reader producer, @@ -199,7 +206,22 @@ future distribute_reader_and_consume_on_shards(schema_ptr s, std::function (flat_mutation_reader)> consumer, utils::phased_barrier::operation&& op) { return do_with(multishard_writer(std::move(s), std::move(producer), std::move(consumer)), std::move(op), [] (multishard_writer& writer, utils::phased_barrier::operation&) { - return writer(); + return writer().finally([&writer] { + return writer.close(); + }); + }); +} + +future<> multishard_writer::close() noexcept { + return _producer.close().then([this] { + return parallel_for_each(boost::irange(size_t(0), _shard_writers.size()), [this] (auto shard) { + if (auto w = std::move(_shard_writers[shard])) { + return smp::submit_to(shard, [w = std::move(w)] () mutable { + return w->close(); + }); + } + return make_ready_future<>(); + }); }); } diff --git a/partition_snapshot_reader.hh b/partition_snapshot_reader.hh index d6ec3cb44c..801b561630 100644 --- a/partition_snapshot_reader.hh +++ b/partition_snapshot_reader.hh @@ -386,6 +386,9 @@ public: virtual future<> fast_forward_to(position_range cr, db::timeout_clock::time_point timeout) override { throw std::runtime_error("This reader can't be fast forwarded to another position."); }; + virtual future<> close() noexcept override { + return make_ready_future<>(); + } }; template diff --git a/querier.cc b/querier.cc index 5becc9229a..379a30699c 100644 --- a/querier.cc +++ b/querier.cc @@ -19,6 +19,8 @@ * along with Scylla. If not, see . */ +#include + #include "querier.hh" #include "schema.hh" @@ -309,15 +311,15 @@ void querier_cache::insert(utils::UUID key, shard_mutation_querier&& q, tracing: } template -static std::optional lookup_querier( +std::optional querier_cache::lookup_querier( querier_cache::index& index, - querier_cache::stats& stats, utils::UUID key, const schema& s, dht::partition_ranges_view ranges, const query::partition_slice& slice, tracing::trace_state_ptr trace_state) { auto base_ptr = find_querier(index, key, ranges, trace_state); + auto& stats = _stats; ++stats.lookups; if (!base_ptr) { ++stats.misses; @@ -344,6 +346,14 @@ static std::optional lookup_querier( tracing::trace(trace_state, "Dropping querier because {}", cannot_use_reason(can_be_used)); ++stats.drops; + + // Close and drop the querier in the background. + // It is safe to do so, since _closing_gate is closed and + // waited on in querier_cache::stop() + (void)with_gate(_closing_gate, [this, q = std::move(q)] () mutable { + return q.close().finally([q = std::move(q)] {}); + }); + return std::nullopt; } @@ -352,7 +362,7 @@ std::optional querier_cache::lookup_data_querier(utils::UUID key, const dht::partition_range& range, const query::partition_slice& slice, tracing::trace_state_ptr trace_state) { - return lookup_querier(_data_querier_index, _stats, key, s, range, slice, std::move(trace_state)); + return lookup_querier(_data_querier_index, key, s, range, slice, std::move(trace_state)); } std::optional querier_cache::lookup_mutation_querier(utils::UUID key, @@ -360,7 +370,7 @@ std::optional querier_cache::lookup_mutation_querier(utils::UU const dht::partition_range& range, const query::partition_slice& slice, tracing::trace_state_ptr trace_state) { - return lookup_querier(_mutation_querier_index, _stats, key, s, range, slice, std::move(trace_state)); + return lookup_querier(_mutation_querier_index, key, s, range, slice, std::move(trace_state)); } std::optional querier_cache::lookup_shard_mutation_querier(utils::UUID key, @@ -368,46 +378,76 @@ std::optional querier_cache::lookup_shard_mutation_queri const dht::partition_range_vector& ranges, const query::partition_slice& slice, tracing::trace_state_ptr trace_state) { - return lookup_querier(_shard_mutation_querier_index, _stats, key, s, ranges, slice, + return lookup_querier(_shard_mutation_querier_index, key, s, ranges, slice, std::move(trace_state)); } +future<> querier_base::close() noexcept { + struct variant_closer { + querier_base& q; + future<> operator()(flat_mutation_reader& reader) { + return reader.close(); + } + future<> operator()(reader_concurrency_semaphore::inactive_read_handle& irh) { + auto reader_opt = q.permit().semaphore().unregister_inactive_read(std::move(irh)); + return reader_opt ? reader_opt->close() : make_ready_future<>(); + } + }; + return std::visit(variant_closer{*this}, _reader); +} + void querier_cache::set_entry_ttl(std::chrono::seconds entry_ttl) { _entry_ttl = entry_ttl; } -bool querier_cache::evict_one() { - auto maybe_evict_from_index = [this] (index& idx) -> bool { +future querier_cache::evict_one() noexcept { + for (auto ip : {&_data_querier_index, &_mutation_querier_index, &_shard_mutation_querier_index}) { + auto& idx = *ip; if (idx.empty()) { - return false; + continue; } auto it = idx.begin(); - it->second->permit().semaphore().unregister_inactive_read(querier_utils::get_inactive_read_handle(*it->second)); + auto reader_opt = it->second->permit().semaphore().unregister_inactive_read(querier_utils::get_inactive_read_handle(*it->second)); idx.erase(it); ++_stats.resource_based_evictions; --_stats.population; - return true; - }; - - return maybe_evict_from_index(_data_querier_index) || maybe_evict_from_index(_mutation_querier_index) || maybe_evict_from_index(_shard_mutation_querier_index); + if (reader_opt) { + co_await reader_opt->close(); + } + co_return true; + } + co_return false; } -void querier_cache::evict_all_for_table(const utils::UUID& schema_id) { - auto evict_from_index = [this, schema_id] (index& idx) { +future<> querier_cache::evict_all_for_table(const utils::UUID& schema_id) noexcept { + for (auto ip : {&_data_querier_index, &_mutation_querier_index, &_shard_mutation_querier_index}) { + auto& idx = *ip; for (auto it = idx.begin(); it != idx.end();) { if (it->second->schema().id() == schema_id) { - it->second->permit().semaphore().unregister_inactive_read(querier_utils::get_inactive_read_handle(*it->second)); + auto reader_opt = it->second->permit().semaphore().unregister_inactive_read(querier_utils::get_inactive_read_handle(*it->second)); it = idx.erase(it); --_stats.population; + if (reader_opt) { + co_await reader_opt->close(); + } } else { ++it; } } - }; + } + co_return; +} - evict_from_index(_data_querier_index); - evict_from_index(_mutation_querier_index); - evict_from_index(_shard_mutation_querier_index); +future<> querier_cache::stop() noexcept { + co_await _closing_gate.close(); + + for (auto* ip : {&_data_querier_index, &_mutation_querier_index, &_shard_mutation_querier_index}) { + auto& idx = *ip; + for (auto it = idx.begin(); it != idx.end(); it = idx.erase(it)) { + co_await it->second->close(); + --_stats.population; + } + } } querier_cache_context::querier_cache_context(querier_cache& cache, utils::UUID key, query::is_first_page is_first_page) diff --git a/querier.hh b/querier.hh index f853d30202..c92d334e22 100644 --- a/querier.hh +++ b/querier.hh @@ -21,6 +21,8 @@ #pragma once +#include + #include "mutation_compactor.hh" #include "mutation_reader.hh" @@ -96,7 +98,7 @@ auto consume_page(flat_mutation_reader& reader, auto consume = [&reader, &slice, reader_consumer = std::move(reader_consumer), timeout, max_size] () mutable { if (slice.options.contains(query::partition_slice::option::reversed)) { - return do_with(make_reversing_reader(reader, max_size), + return with_closeable(make_reversing_reader(reader, max_size), [reader_consumer = std::move(reader_consumer), timeout] (flat_mutation_reader& reversing_reader) mutable { return reversing_reader.consume(std::move(reader_consumer), timeout); }); @@ -174,6 +176,8 @@ public: size_t memory_usage() const { return _permit.consumed_resources().memory; } + + future<> close() noexcept; }; /// One-stop object for serving queries. @@ -366,6 +370,16 @@ private: index _shard_mutation_querier_index; std::chrono::seconds _entry_ttl; stats _stats; + gate _closing_gate; + + template + std::optional lookup_querier( + querier_cache::index& index, + utils::UUID key, + const schema& s, + dht::partition_ranges_view ranges, + const query::partition_slice& slice, + tracing::trace_state_ptr trace_state); public: explicit querier_cache(std::chrono::seconds entry_ttl = default_entry_ttl); @@ -429,12 +443,17 @@ public: /// /// Return true if a querier was evicted and false otherwise (if the cache /// is empty). - bool evict_one(); + future evict_one() noexcept; /// Evict all queriers that belong to a table. /// /// Should be used when dropping a table. - void evict_all_for_table(const utils::UUID& schema_id); + future<> evict_all_for_table(const utils::UUID& schema_id) noexcept; + + /// Close all queriers and wait on background work. + /// + /// Should be used before destroying the querier_cache. + future<> stop() noexcept; const stats& get_stats() const { return _stats; diff --git a/query-result-writer.hh b/query-result-writer.hh index c2cecd8e44..f698b836f3 100644 --- a/query-result-writer.hh +++ b/query-result-writer.hh @@ -230,7 +230,7 @@ class query_result_builder { std::optional _mutation_consumer; stop_iteration _stop; public: - query_result_builder(const schema& s, query::result::builder& rb); + query_result_builder(const schema& s, query::result::builder& rb) noexcept; void consume_new_partition(const dht::decorated_key& dk); void consume(tombstone t); diff --git a/read_context.hh b/read_context.hh index 7171242d39..05ce9376bc 100644 --- a/read_context.hh +++ b/read_context.hh @@ -42,6 +42,10 @@ class autoupdating_underlying_reader final { dht::partition_range _range = { }; std::optional _last_key; std::optional _new_last_key; + + future<> close_reader() noexcept { + return _reader ? _reader->close() : make_ready_future<>(); + } public: autoupdating_underlying_reader(row_cache& cache, read_context& context) : _cache(cache) @@ -51,13 +55,15 @@ public: _last_key = std::move(_new_last_key); auto start = population_range_start(); auto phase = _cache.phase_of(start); + auto refresh_reader = make_ready_future<>(); if (!_reader || _reader_creation_phase != phase) { if (_last_key) { auto cmp = dht::ring_position_comparator(*_cache._schema); auto&& new_range = _range.split_after(*_last_key, cmp); if (!new_range) { - _reader = {}; + return close_reader().then([] { return make_ready_future(); + }); } _range = std::move(*new_range); _last_key = {}; @@ -65,12 +71,12 @@ public: if (_reader) { ++_cache._tracker._stats.underlying_recreations; } - auto& snap = _cache.snapshot_for_phase(phase); - _reader = {}; // See issue #2644 - _reader = _cache.create_underlying_reader(_read_context, snap, _range); + refresh_reader = close_reader().then([this, phase] { + _reader = _cache.create_underlying_reader(_read_context, _cache.snapshot_for_phase(phase), _range); _reader_creation_phase = phase; + }); } - + return refresh_reader.then([this, timeout] { return _reader->next_partition().then([this, timeout] { if (_reader->is_end_of_stream() && _reader->is_buffer_empty()) { return make_ready_future(); @@ -83,6 +89,7 @@ public: return std::move(mfopt); }); }); + }); } future<> fast_forward_to(dht::partition_range&& range, db::timeout_clock::time_point timeout) { auto snapshot_and_phase = _cache.snapshot_of(dht::ring_position_view::for_range_start(_range)); @@ -98,12 +105,15 @@ public: return _reader->fast_forward_to(_range, timeout); } else { ++_cache._tracker._stats.underlying_recreations; - _reader = {}; // See issue #2644 } } + return close_reader().then([this, &snapshot, phase] { _reader = _cache.create_underlying_reader(_read_context, snapshot, _range); _reader_creation_phase = phase; - return make_ready_future<>(); + }); + } + future<> close() noexcept { + return close_reader(); } utils::phased_barrier::phase_type creation_phase() const { return _reader_creation_phase; @@ -224,6 +234,9 @@ public: _underlying_snapshot = {}; _key = dk; } + future<> close() noexcept { + return _underlying.close(); + } }; } diff --git a/reader_concurrency_semaphore.cc b/reader_concurrency_semaphore.cc index 79a9a9762b..5e8196129b 100644 --- a/reader_concurrency_semaphore.cc +++ b/reader_concurrency_semaphore.cc @@ -23,6 +23,7 @@ #include #include #include +#include #include "reader_concurrency_semaphore.hh" #include "utils/exceptions.hh" @@ -95,10 +96,8 @@ public: { } ~impl() { if (_resources) { - on_internal_error_noexcept(rcslog, format("reader_permit::impl::~impl(): permit {}.{}:{} detected a leak of {{count={}, memory={}}} resources", - _schema ? _schema->ks_name() : "*", - _schema ? _schema->cf_name() : "*", - _op_name_view, + on_internal_error_noexcept(rcslog, format("reader_permit::impl::~impl(): permit {} detected a leak of {{count={}, memory={}}} resources", + description(), _resources.count, _resources.memory)); signal(_resources); @@ -165,6 +164,13 @@ public: reader_resources resources() const { return _resources; } + + sstring description() const { + return format("{}.{}:{}", + _schema ? _schema->ks_name() : "*", + _schema ? _schema->cf_name() : "*", + _op_name_view); + } }; struct reader_concurrency_semaphore::permit_list { @@ -224,6 +230,10 @@ reader_resources reader_permit::consumed_resources() const { return _impl->resources(); } +sstring reader_permit::description() const { + return _impl->description(); +} + std::ostream& operator<<(std::ostream& os, reader_permit::state s) { switch (s) { case reader_permit::state::registered: @@ -409,8 +419,10 @@ reader_concurrency_semaphore::reader_concurrency_semaphore(no_limits, sstring na std::move(name)) {} reader_concurrency_semaphore::~reader_concurrency_semaphore() { - broken(std::make_exception_ptr(broken_semaphore{})); - clear_inactive_reads(); + // FIXME: assert(_stopped) once all reader_concurrency_semaphore:s + // are properly closed (needs de-static-fying tests::the_semaphore) + assert(_inactive_reads.empty() && !_close_readers_gate.get_count()); + broken(); } reader_concurrency_semaphore::inactive_read_handle reader_concurrency_semaphore::register_inactive_read(flat_mutation_reader reader) noexcept { @@ -440,6 +452,7 @@ reader_concurrency_semaphore::inactive_read_handle reader_concurrency_semaphore: } else { ++_stats.permit_based_evictions; } + close_reader(std::move(reader)); return inactive_read_handle(); } @@ -459,6 +472,12 @@ flat_mutation_reader_opt reader_concurrency_semaphore::unregister_inactive_read( return {}; } if (irh._sem != this) { + // unregister from the other semaphore + // and close the reader, in case on_internal_error + // doesn't abort. + auto irp = std::move(irh._irp); + irp->unlink(); + irh._sem->close_reader(std::move(irp->reader)); on_internal_error(rcslog, fmt::format( "reader_concurrency_semaphore::unregister_inactive_read(): " "attempted to unregister an inactive read with a handle belonging to another semaphore: " @@ -485,12 +504,28 @@ bool reader_concurrency_semaphore::try_evict_one_inactive_read(evict_reason reas void reader_concurrency_semaphore::clear_inactive_reads() { while (!_inactive_reads.empty()) { + auto& ir = _inactive_reads.front(); + close_reader(std::move(ir.reader)); // Destroying the read unlinks it too. std::unique_ptr _(&*_inactive_reads.begin()); } } -void reader_concurrency_semaphore::evict(inactive_read& ir, evict_reason reason) noexcept { +std::runtime_error reader_concurrency_semaphore::stopped_exception() { + return std::runtime_error(format("{} was stopped", _name)); +} + +future<> reader_concurrency_semaphore::stop() noexcept { + assert(!_stopped); + _stopped = true; + clear_inactive_reads(); + co_await _close_readers_gate.close(); + broken(std::make_exception_ptr(stopped_exception())); + co_return; +} + +flat_mutation_reader reader_concurrency_semaphore::detach_inactive_reader(inactive_read& ir, evict_reason reason) noexcept { + auto reader = std::move(ir.reader); ir.detach(); std::unique_ptr irp(&ir); try { @@ -511,6 +546,19 @@ void reader_concurrency_semaphore::evict(inactive_read& ir, evict_reason reason) break; } --_stats.inactive_reads; + return std::move(reader); +} + +void reader_concurrency_semaphore::evict(inactive_read& ir, evict_reason reason) noexcept { + close_reader(detach_inactive_reader(ir, reason)); +} + +void reader_concurrency_semaphore::close_reader(flat_mutation_reader reader) { + // It is safe to discard the future since it is waited on indirectly + // by closing the _close_readers_gate in stop(). + (void)with_gate(_close_readers_gate, [reader = std::move(reader)] () mutable { + return reader.close(); + }); } bool reader_concurrency_semaphore::has_available_units(const resources& r) const { @@ -539,22 +587,33 @@ future reader_concurrency_semaphore::enqueue_wait return fut; } +void reader_concurrency_semaphore::evict_readers_in_background() { + // Evict inactive readers in the background while wait list isn't empty + // This is safe since stop() closes _gate; + (void)with_gate(_close_readers_gate, [this] { + return do_until([this] { return _wait_list.empty() || _inactive_reads.empty(); }, [this] { + return detach_inactive_reader(_inactive_reads.front(), evict_reason::permit).close(); + }); + }); + } + future reader_concurrency_semaphore::do_wait_admission(reader_permit permit, size_t memory, db::timeout_clock::time_point timeout) { auto r = resources(1, static_cast(memory)); + auto first = _wait_list.empty(); - if (!_wait_list.empty()) { - return enqueue_waiter(std::move(permit), r, timeout); + if (first && has_available_units(r)) { + permit.on_admission(); + return make_ready_future(reader_permit::resource_units(std::move(permit), r)); } - while (!has_available_units(r)) { - if (!try_evict_one_inactive_read(evict_reason::permit)) { - return enqueue_waiter(std::move(permit), r, timeout); - } + auto fut = enqueue_waiter(std::move(permit), r, timeout); + + if (first && !_inactive_reads.empty()) { + evict_readers_in_background(); } - permit.on_admission(); - return make_ready_future(reader_permit::resource_units(std::move(permit), r)); + return fut; } reader_permit reader_concurrency_semaphore::make_permit(const schema* const schema, const char* const op_name) { @@ -566,8 +625,11 @@ reader_permit reader_concurrency_semaphore::make_permit(const schema* const sche } void reader_concurrency_semaphore::broken(std::exception_ptr ex) { + if (!ex) { + ex = std::make_exception_ptr(broken_semaphore{}); + } while (!_wait_list.empty()) { - _wait_list.front().pr.set_exception(std::make_exception_ptr(broken_semaphore{})); + _wait_list.front().pr.set_exception(ex); _wait_list.pop_front(); } } diff --git a/reader_concurrency_semaphore.hh b/reader_concurrency_semaphore.hh index b4b2b7a583..c41b7916da 100644 --- a/reader_concurrency_semaphore.hh +++ b/reader_concurrency_semaphore.hh @@ -23,6 +23,7 @@ #include #include +#include #include "reader_permit.hh" #include "flat_mutation_reader.hh" @@ -170,8 +171,11 @@ private: inactive_reads_type _inactive_reads; stats _stats; std::unique_ptr _permit_list; + bool _stopped = false; + gate _close_readers_gate; private: + [[nodiscard]] flat_mutation_reader detach_inactive_reader(inactive_read&, evict_reason reason) noexcept; void evict(inactive_read&, evict_reason reason) noexcept; bool has_available_units(const resources& r) const; @@ -179,9 +183,14 @@ private: // Add the permit to the wait queue and return the future which resolves when // the permit is admitted (popped from the queue). future enqueue_waiter(reader_permit permit, resources r, db::timeout_clock::time_point timeout); - + void evict_readers_in_background(); future do_wait_admission(reader_permit permit, size_t memory, db::timeout_clock::time_point timeout); + std::runtime_error stopped_exception(); + + // closes reader in the background. + void close_reader(flat_mutation_reader reader); + public: struct no_limits { }; @@ -249,8 +258,14 @@ public: /// (if there was no reader to evict). bool try_evict_one_inactive_read(evict_reason = evict_reason::manual); + /// Clear all inactive reads. void clear_inactive_reads(); + /// Stop the reader_concurrency_semaphore and clear all inactive reads. + /// + /// Wait on all async background work to complete. + future<> stop() noexcept; + const stats& get_stats() const { return _stats; } @@ -294,5 +309,5 @@ public: return _wait_list.size(); } - void broken(std::exception_ptr ex); + void broken(std::exception_ptr ex = {}); }; diff --git a/reader_permit.hh b/reader_permit.hh index 4e46fb6521..6974a3ca6c 100644 --- a/reader_permit.hh +++ b/reader_permit.hh @@ -135,6 +135,8 @@ public: resource_units consume_resources(reader_resources res); reader_resources consumed_resources() const; + + sstring description() const; }; class reader_permit::resource_units { diff --git a/repair/repair.cc b/repair/repair.cc index d932a43b98..6e058f05bd 100644 --- a/repair/repair.cc +++ b/repair/repair.cc @@ -51,6 +51,7 @@ #include #include #include +#include logging::logger rlogger("repair"); diff --git a/repair/row_level.cc b/repair/row_level.cc index 7ef3119c8e..1bcb316df4 100644 --- a/repair/row_level.cc +++ b/repair/row_level.cc @@ -493,9 +493,17 @@ public: }); } - void on_end_of_stream() { + future<> on_end_of_stream() noexcept { + return _reader.close().then([this] { _reader = make_empty_flat_reader(_schema, _permit); _reader_handle.reset(); + }); + } + + future<> close() noexcept { + return _reader.close().then([this] { + _reader_handle.reset(); + }); } lw_shared_ptr& get_current_dk() { @@ -872,8 +880,11 @@ public: auto f1 = _sink_source_for_get_full_row_hashes.close(); auto f2 = _sink_source_for_get_row_diff.close(); auto f3 = _sink_source_for_put_row_diff.close(); + rlogger.info("repair_meta::stop"); return when_all_succeed(std::move(gate_future), std::move(f1), std::move(f2), std::move(f3)).discard_result().finally([this] { - return _repair_writer->wait_for_writer_done(); + return _repair_writer->wait_for_writer_done().finally([this] { + return close(); + }); }); } @@ -1065,6 +1076,10 @@ public: return std::pair, bool>(sync_boundary_min, already_synced); } + future<> close() noexcept { + return _repair_reader.close(); + } + private: future do_estimate_partitions_on_all_shards() { return estimate_partitions(_db, _schema->ks_name(), _schema->cf_name(), _range); @@ -1193,15 +1208,17 @@ private: _gate.check(); return _repair_reader.read_mutation_fragment().then([this, &cur_size, &new_rows_size, &cur_rows] (mutation_fragment_opt mfopt) mutable { if (!mfopt) { - _repair_reader.on_end_of_stream(); + return _repair_reader.on_end_of_stream().then([] { return stop_iteration::yes; + }); } - return handle_mutation_fragment(*mfopt, cur_size, new_rows_size, cur_rows); + return make_ready_future(handle_mutation_fragment(*mfopt, cur_size, new_rows_size, cur_rows)); }); }).then_wrapped([this, &cur_rows, &new_rows_size] (future<> fut) mutable { if (fut.failed()) { - _repair_reader.on_end_of_stream(); - return make_exception_future(fut.get_exception()); + return make_exception_future(fut.get_exception()).finally([this] { + return _repair_reader.on_end_of_stream(); + }); } _repair_reader.pause(); return make_ready_future(value_type(std::move(cur_rows), new_rows_size)); @@ -2815,6 +2832,11 @@ public: _all_live_peer_nodes, _all_live_peer_nodes.size(), this); + auto auto_close_master = defer([&master] { + master.close().handle_exception([] (std::exception_ptr ep) { + rlogger.warn("Failed auto-closing Row Level Repair (Master): {}. Ignored.", ep); + }).get(); + }); rlogger.debug(">>> Started Row Level Repair (Master): local={}, peers={}, repair_meta_id={}, keyspace={}, cf={}, schema_version={}, range={}, seed={}, max_row_buf_size={}", master.myip(), _all_live_peer_nodes, master.repair_meta_id(), _ri.keyspace, _cf_name, schema_version, _range, _seed, max_row_buf_size); diff --git a/row_cache.cc b/row_cache.cc index d4a1393834..672e69f98b 100644 --- a/row_cache.cc +++ b/row_cache.cc @@ -354,7 +354,7 @@ static flat_mutation_reader read_directly_from_underlying(read_context& reader) // Reader which populates the cache using data from the delegate. class single_partition_populating_reader final : public flat_mutation_reader::impl { row_cache& _cache; - lw_shared_ptr _read_context; + std::unique_ptr _read_context; flat_mutation_reader_opt _reader; private: future<> create_reader(db::timeout_clock::time_point timeout) { @@ -387,7 +387,7 @@ private: } public: single_partition_populating_reader(row_cache& cache, - lw_shared_ptr context) + std::unique_ptr context) : impl(context->schema(), context->permit()) , _cache(cache) , _read_context(std::move(context)) @@ -425,6 +425,11 @@ public: virtual future<> fast_forward_to(position_range pr, db::timeout_clock::time_point timeout) override { return make_exception_future<>(make_backtraced_exception_ptr()); } + virtual future<> close() noexcept override { + auto close_reader = _reader ? _reader->close() : make_ready_future<>(); + auto close_read_context = _read_context->close(); + return when_all_succeed(std::move(close_reader), std::move(close_read_context)).discard_result(); + } }; void cache_tracker::clear_continuity(cache_entry& ce) noexcept { @@ -544,12 +549,15 @@ public: return _reader.fast_forward_to(std::move(pr), timeout); } + future<> close() noexcept { + return _reader.close(); + } }; class scanning_and_populating_reader final : public flat_mutation_reader::impl { const dht::partition_range* _pr; row_cache& _cache; - lw_shared_ptr _read_context; + std::unique_ptr _read_context; partition_range_cursor _primary; range_populating_reader _secondary_reader; bool _read_next_partition = false; @@ -641,6 +649,8 @@ private: }); } future<> read_next_partition(db::timeout_clock::time_point timeout) { + auto close_reader = _reader ? _reader->close() : make_ready_future<>(); + return close_reader.then([this, timeout] { _read_next_partition = false; return (_secondary_in_progress ? read_from_secondary(timeout) : read_from_primary(timeout)).then([this] (auto&& fropt) { if (bool(fropt)) { @@ -649,11 +659,12 @@ private: _end_of_stream = true; } }); + }); } public: scanning_and_populating_reader(row_cache& cache, const dht::partition_range& range, - lw_shared_ptr context) + std::unique_ptr context) : impl(context->schema(), context->permit()) , _pr(&range) , _cache(cache) @@ -684,22 +695,27 @@ public: } virtual future<> fast_forward_to(const dht::partition_range& pr, db::timeout_clock::time_point timeout) override { clear_buffer(); - _reader = {}; _end_of_stream = false; _secondary_in_progress = false; _advance_primary = false; _pr = ≺ _primary = partition_range_cursor{_cache, pr}; _lower_bound = pr.start(); - return make_ready_future<>(); + return _reader->close(); } virtual future<> fast_forward_to(position_range cr, db::timeout_clock::time_point timeout) override { return make_exception_future<>(make_backtraced_exception_ptr()); } + virtual future<> close() noexcept override { + auto close_reader = _reader ? _reader->close() : make_ready_future<>(); + auto close_secondary_reader = _secondary_reader.close(); + auto close_read_context = _read_context->close(); + return when_all_succeed(std::move(close_reader), std::move(close_secondary_reader), std::move(close_read_context)).discard_result(); + } }; flat_mutation_reader -row_cache::make_scanning_reader(const dht::partition_range& range, lw_shared_ptr context) { +row_cache::make_scanning_reader(const dht::partition_range& range, std::unique_ptr context) { return make_flat_mutation_reader(*this, range, std::move(context)); } @@ -713,27 +729,29 @@ row_cache::make_reader(schema_ptr s, streamed_mutation::forwarding fwd, mutation_reader::forwarding fwd_mr) { - auto ctx = make_lw_shared(*this, s, std::move(permit), range, slice, pc, trace_state, fwd_mr); + auto make_context = [&] { + return std::make_unique(*this, s, std::move(permit), range, slice, pc, trace_state, fwd_mr); + }; - if (!ctx->is_range_query() && !fwd_mr) { + if (query::is_single_partition(range) && !fwd_mr) { tracing::trace(trace_state, "Querying cache for range {} and slice {}", range, seastar::value_of([&slice] { return slice.get_all_ranges(); })); auto mr = _read_section(_tracker.region(), [&] { dht::ring_position_comparator cmp(*_schema); - auto&& pos = ctx->range().start()->value(); + auto&& pos = range.start()->value(); partitions_type::bound_hint hint; auto i = _partitions.lower_bound(pos, cmp, hint); if (hint.match) { cache_entry& e = *i; upgrade_entry(e); on_partition_hit(); - return e.read(*this, *ctx); + return e.read(*this, make_context()); } else if (i->continuous()) { - return make_empty_flat_reader(std::move(s), ctx->permit()); + return make_empty_flat_reader(std::move(s), std::move(permit)); } else { tracing::trace(trace_state, "Range {} not found in cache", range); on_partition_miss(); - return make_flat_mutation_reader(*this, std::move(ctx)); + return make_flat_mutation_reader(*this, make_context()); } }); @@ -746,7 +764,7 @@ row_cache::make_reader(schema_ptr s, tracing::trace(trace_state, "Scanning cache for range {} and slice {}", range, seastar::value_of([&slice] { return slice.get_all_ranges(); })); - auto mr = make_scanning_reader(range, std::move(ctx)); + auto mr = make_scanning_reader(range, make_context()); if (fwd == streamed_mutation::forwarding::yes) { return make_forwardable(std::move(mr)); } else { @@ -1241,16 +1259,37 @@ flat_mutation_reader cache_entry::read(row_cache& rc, read_context& reader, row_ return do_read(rc, reader); } +flat_mutation_reader cache_entry::read(row_cache& rc, std::unique_ptr unique_ctx) { + auto source_and_phase = rc.snapshot_of(_key); + unique_ctx->enter_partition(_key, source_and_phase.snapshot, source_and_phase.phase); + return do_read(rc, std::move(unique_ctx)); +} + +flat_mutation_reader cache_entry::read(row_cache& rc, std::unique_ptr unique_ctx, row_cache::phase_type phase) { + unique_ctx->enter_partition(_key, phase); + return do_read(rc, std::move(unique_ctx)); +} + // Assumes reader is in the corresponding partition flat_mutation_reader cache_entry::do_read(row_cache& rc, read_context& reader) { auto snp = _pe.read(rc._tracker.region(), rc._tracker.cleaner(), _schema, &rc._tracker, reader.phase()); auto ckr = query::clustering_key_filter_ranges::get_ranges(*_schema, reader.slice(), _key.key()); - auto r = make_cache_flat_mutation_reader(_schema, _key, std::move(ckr), rc, reader.shared_from_this(), std::move(snp)); + auto r = make_cache_flat_mutation_reader(_schema, _key, std::move(ckr), rc, reader, std::move(snp)); r.upgrade_schema(rc.schema()); r.upgrade_schema(reader.schema()); return r; } +flat_mutation_reader cache_entry::do_read(row_cache& rc, std::unique_ptr unique_ctx) { + auto snp = _pe.read(rc._tracker.region(), rc._tracker.cleaner(), _schema, &rc._tracker, unique_ctx->phase()); + auto ckr = query::clustering_key_filter_ranges::get_ranges(*_schema, unique_ctx->slice(), _key.key()); + schema_ptr reader_schema = unique_ctx->schema(); + auto r = make_cache_flat_mutation_reader(_schema, _key, std::move(ckr), rc, std::move(unique_ctx), std::move(snp)); + r.upgrade_schema(rc.schema()); + r.upgrade_schema(reader_schema); + return r; +} + const schema_ptr& row_cache::schema() const { return _schema; } diff --git a/row_cache.hh b/row_cache.hh index f89b5b7307..46cb26e701 100644 --- a/row_cache.hh +++ b/row_cache.hh @@ -73,7 +73,8 @@ class cache_entry { } _flags{}; friend class size_calculator; - flat_mutation_reader do_read(row_cache&, cache::read_context& reader); + flat_mutation_reader do_read(row_cache&, cache::read_context& ctx); + flat_mutation_reader do_read(row_cache&, std::unique_ptr unique_ctx); public: friend class row_cache; friend class cache_tracker; @@ -142,7 +143,9 @@ public: const schema_ptr& schema() const noexcept { return _schema; } schema_ptr& schema() noexcept { return _schema; } flat_mutation_reader read(row_cache&, cache::read_context&); + flat_mutation_reader read(row_cache&, std::unique_ptr); flat_mutation_reader read(row_cache&, cache::read_context&, utils::phased_barrier::phase_type); + flat_mutation_reader read(row_cache&, std::unique_ptr, utils::phased_barrier::phase_type); bool continuous() const noexcept { return _flags._continuous; } void set_continuous(bool value) noexcept { _flags._continuous = value; } @@ -379,7 +382,7 @@ private: logalloc::allocating_section _populate_section; logalloc::allocating_section _read_section; flat_mutation_reader create_underlying_reader(cache::read_context&, mutation_source&, const dht::partition_range&); - flat_mutation_reader make_scanning_reader(const dht::partition_range&, lw_shared_ptr); + flat_mutation_reader make_scanning_reader(const dht::partition_range&, std::unique_ptr); void on_partition_hit(); void on_partition_miss(); void on_row_hit(); diff --git a/seastar b/seastar index 980a29fb70..0b2c25d133 160000 --- a/seastar +++ b/seastar @@ -1 +1 @@ -Subproject commit 980a29fb701e7cb637882ea06b7ee23ece6a7cdd +Subproject commit 0b2c25d13359d56ea2673fceb08e31d0d10dc85e diff --git a/sstables/compaction.cc b/sstables/compaction.cc index a73935dfbf..b200bb968f 100644 --- a/sstables/compaction.cc +++ b/sstables/compaction.cc @@ -51,6 +51,7 @@ #include #include +#include #include "sstables.hh" #include "sstables/progress_monitor.hh" @@ -643,13 +644,15 @@ private: auto now = gc_clock::now(); auto consumer = make_interposer_consumer([this, gc_consumer = std::move(gc_consumer), now] (flat_mutation_reader reader) mutable { - using compact_mutations = compact_for_compaction; - auto cfc = make_stable_flattened_mutations_consumer(*schema(), now, + return seastar::async([this, reader = std::move(reader), gc_consumer = std::move(gc_consumer), now] () mutable { + auto close_reader = deferred_close(reader); + + using compact_mutations = compact_for_compaction; + auto cfc = make_stable_flattened_mutations_consumer(*schema(), now, max_purgeable_func(), get_compacting_sstable_writer(), std::move(gc_consumer)); - return seastar::async([cfc = std::move(cfc), reader = std::move(reader), this] () mutable { reader.consume_in_thread(std::move(cfc), db::no_timeout); }); }); @@ -1307,6 +1310,9 @@ class scrub_compaction final : public regular_compaction { virtual future<> fast_forward_to(position_range pr, db::timeout_clock::time_point timeout) override { return make_exception_future<>(make_backtraced_exception_ptr()); } + virtual future<> close() noexcept override { + return _reader.close(); + } }; private: diff --git a/sstables/consumer.hh b/sstables/consumer.hh index 9dd193254a..ef462e3921 100644 --- a/sstables/consumer.hh +++ b/sstables/consumer.hh @@ -625,7 +625,7 @@ public: return _remain == 0; } - future<> close() { + future<> close() noexcept { return _input.close(); } }; diff --git a/sstables/index_entry.hh b/sstables/index_entry.hh index b2ba28982d..ce5d760347 100644 --- a/sstables/index_entry.hh +++ b/sstables/index_entry.hh @@ -170,7 +170,8 @@ public: }; virtual ~clustered_index_cursor() {}; - virtual future<> close() = 0; + // Note: Close must not fail + virtual future<> close() noexcept = 0; // Advances the cursor to given position. When the cursor has more accurate information about // location of the fragments from the range [pos, +inf) in the data file (since it was last advanced) diff --git a/sstables/index_reader.hh b/sstables/index_reader.hh index 0bb5dc7dd4..e478aeb382 100644 --- a/sstables/index_reader.hh +++ b/sstables/index_reader.hh @@ -439,7 +439,7 @@ private: std::optional _upper_bound; private: - static future<> reset_clustered_cursor(index_bound& bound) { + static future<> reset_clustered_cursor(index_bound& bound) noexcept { if (bound.clustered_cursor) { return bound.clustered_cursor->close().then([&bound] { bound.clustered_cursor.reset(); @@ -700,7 +700,7 @@ private: return _sstable->data_size(); } - static future<> close(index_bound& b) { + static future<> close(index_bound& b) noexcept { return reset_clustered_cursor(b); } public: @@ -897,7 +897,8 @@ public: const shared_sstable& sstable() const { return _sstable; } - future<> close() { + future<> close() noexcept { + // index_bound::close must not fail return close(_lower_bound).then([this] { if (_upper_bound) { return close(*_upper_bound); diff --git a/sstables/mx/bsearch_clustered_cursor.hh b/sstables/mx/bsearch_clustered_cursor.hh index 3778508955..d60fc056c5 100644 --- a/sstables/mx/bsearch_clustered_cursor.hh +++ b/sstables/mx/bsearch_clustered_cursor.hh @@ -515,7 +515,7 @@ public: }); } - future<> close() override { + future<> close() noexcept override { return make_ready_future<>(); } }; diff --git a/sstables/scanning_clustered_index_cursor.hh b/sstables/scanning_clustered_index_cursor.hh index 1592f59e20..fc70487b97 100644 --- a/sstables/scanning_clustered_index_cursor.hh +++ b/sstables/scanning_clustered_index_cursor.hh @@ -220,7 +220,7 @@ public: return make_ready_future>(std::move(ei)); } - future<> close() override { + future<> close() noexcept override { if (!_reader_closed) { _reader_closed = true; return _reader.close(); diff --git a/sstables/sstable_mutation_reader.hh b/sstables/sstable_mutation_reader.hh index 54643706ea..470b8a3909 100644 --- a/sstables/sstable_mutation_reader.hh +++ b/sstables/sstable_mutation_reader.hh @@ -246,19 +246,10 @@ public: sstable_mutation_reader(sstable_mutation_reader&&) = delete; sstable_mutation_reader(const sstable_mutation_reader&) = delete; ~sstable_mutation_reader() { - _monitor.on_read_completed(); - auto close = [this] (std::unique_ptr& ptr) { - if (ptr) { - auto f = ptr->close(); - // FIXME: discarded future. - (void)f.handle_exception([index = std::move(ptr)] (auto&&) { }); - } - }; - close(_index_reader); - if (_context) { - auto f = _context->close(); - //FIXME: discarded future. - (void)f.handle_exception([ctx = std::move(_context), sst = _sst](auto) {}); + if (_context || _index_reader) { + sstlog.warn("sstable_mutation_reader was not closed. Closing in the background. Backtrace: {}", current_backtrace()); + // FIXME: discarded future. + (void)close(); } } private: @@ -540,6 +531,25 @@ public: return make_ready_future<>(); } } + virtual future<> close() noexcept override { + auto close_context = make_ready_future<>(); + if (_context) { + _monitor.on_read_completed(); + // move _context to prevent double-close from destructor. + close_context = _context->close().finally([_ = std::move(_context)] {}); + } + + auto close_index_reader = make_ready_future<>(); + if (_index_reader) { + // move _index_reader to prevent double-close from destructor. + close_index_reader = _index_reader->close().finally([_ = std::move(_index_reader)] {}); + } + + return when_all_succeed(std::move(close_context), std::move(close_index_reader)).discard_result().handle_exception([] (std::exception_ptr ep) { + // close can not fail as it is called either from the destructor or from flat_mutation_reader::close + sstlog.warn("Failed closing of sstable_mutation_reader: {}. Ignored since the reader is already done.", ep); + }); + } }; } diff --git a/sstables/sstable_set.cc b/sstables/sstable_set.cc index 126bdee862..d5383ac965 100644 --- a/sstables/sstable_set.cc +++ b/sstables/sstable_set.cc @@ -540,6 +540,11 @@ public: virtual bool empty(position_in_partition_view bound) const override { return !_dummy_reader && (_it == _end || _cmp(_it->first, bound) > 0); } + + virtual future<> close() noexcept override { + _it = _end; + return make_ready_future<>(); + } }; std::unique_ptr time_series_sstable_set::make_min_position_reader_queue( diff --git a/sstables/sstables.cc b/sstables/sstables.cc index cbfb09ee27..889980ffb5 100644 --- a/sstables/sstables.cc +++ b/sstables/sstables.cc @@ -34,6 +34,7 @@ #include #include #include +#include #include #include @@ -1697,6 +1698,7 @@ future<> sstable::write_components( const io_priority_class& pc) { assert_large_data_handler_is_running(); return seastar::async([this, mr = std::move(mr), estimated_partitions, schema = std::move(schema), cfg, stats, &pc] () mutable { + auto close_mr = deferred_close(mr); auto wr = get_writer(*schema, estimated_partitions, cfg, stats, pc); mr.consume_in_thread(std::move(wr), db::no_timeout); }).finally([this] { diff --git a/streaming/stream_transfer_task.cc b/streaming/stream_transfer_task.cc index 3d1e55cd2e..0665a5e87c 100644 --- a/streaming/stream_transfer_task.cc +++ b/streaming/stream_transfer_task.cc @@ -230,6 +230,8 @@ future<> stream_transfer_task::execute() { return make_ready_future<>(); } return send_mutation_fragments(std::move(si)); + }).finally([si] { + return si->reader.close(); }); }).then([this, plan_id, cf_id, id] { sslog.debug("[Stream #{}] SEND STREAM_MUTATION_DONE to {}, cf_id={}", plan_id, id, cf_id); diff --git a/table.cc b/table.cc index 4d545e9295..971ea51910 100644 --- a/table.cc +++ b/table.cc @@ -19,6 +19,10 @@ * along with Scylla. If not, see . */ +#include +#include +#include + #include "database.hh" #include "sstables/sstables.hh" #include "sstables/sstables_manager.hh" @@ -36,8 +40,6 @@ #include "db/query_context.hh" #include "query-result-writer.hh" #include "db/view/view.hh" -#include -#include #include #include #include "utils/error_injection.hh" @@ -134,7 +136,7 @@ void table::refresh_compound_sstable_set() { future table::find_partition(schema_ptr s, reader_permit permit, const dht::decorated_key& key) const { return do_with(dht::partition_range::make_singular(key), [s = std::move(s), permit = std::move(permit), this] (auto& range) mutable { - return do_with(this->make_reader(std::move(s), std::move(permit), range), [] (flat_mutation_reader& reader) { + return with_closeable(this->make_reader(std::move(s), std::move(permit), range), [] (flat_mutation_reader& reader) { return read_mutation_from_flat_mutation_reader(reader, db::no_timeout).then([] (mutation_opt&& mo) -> std::unique_ptr { if (!mo) { return {}; @@ -311,6 +313,8 @@ table::for_all_partitions_slow(schema_ptr s, reader_permit permit, std::function }); }).then([&is] { return is.ok; + }).finally([&is] { + return is.reader.close(); }); }); } @@ -1941,12 +1945,21 @@ table::query(schema_ptr s, : query::data_querier(as_mutation_source(), s, class_config.semaphore.make_permit(s.get(), "data-query"), range, qs.cmd.slice, service::get_local_sstable_query_read_priority(), trace_state); + std::exception_ptr ex; + try { co_await q.consume_page(query_result_builder(*s, qs.builder), qs.remaining_rows(), qs.remaining_partitions(), qs.cmd.timestamp, timeout, class_config.max_memory_for_unlimited_query); if (q.are_limits_reached() || qs.builder.is_short_read()) { cache_ctx.insert(std::move(q), std::move(trace_state)); } + } catch (...) { + ex = std::current_exception(); + } + co_await q.close(); + if (ex) { + std::rethrow_exception(std::move(ex)); + } } co_return make_lw_shared(qs.builder.build()); @@ -1971,14 +1984,21 @@ table::mutation_query(schema_ptr s, : query::mutation_querier(as_mutation_source(), s, class_config.semaphore.make_permit(s.get(), "mutation-query"), range, cmd.slice, service::get_local_sstable_query_read_priority(), trace_state); + std::exception_ptr ex; + try { auto rrb = reconcilable_result_builder(*s, cmd.slice, std::move(accounter)); auto r = co_await q.consume_page(std::move(rrb), cmd.get_row_limit(), cmd.partition_limit, cmd.timestamp, timeout, class_config.max_memory_for_unlimited_query); if (q.are_limits_reached() || r.is_short_read()) { cache_ctx.insert(std::move(q), std::move(trace_state)); } - + co_await q.close(); co_return r; + } catch (...) { + ex = std::current_exception(); + } + co_await q.close(); + std::rethrow_exception(std::move(ex)); } mutation_source diff --git a/test/boost/broken_sstable_test.cc b/test/boost/broken_sstable_test.cc index 613d6605ae..8b4ea91a1a 100644 --- a/test/boost/broken_sstable_test.cc +++ b/test/boost/broken_sstable_test.cc @@ -21,6 +21,8 @@ #include #include +#include + #include "test/boost/sstable_test.hh" #include "test/lib/exception_utils.hh" @@ -44,6 +46,7 @@ static void broken_sst(sstring dir, unsigned long generation, schema_ptr s, sstr try { sstable_ptr sstp = env.reusable_sst(s, dir, generation, version).get0(); auto r = sstp->make_reader(s, tests::make_permit(), query::full_partition_range, s->full_slice()); + auto close_r = deferred_close(r); r.consume(my_consumer{}, db::no_timeout).get(); BOOST_FAIL("expecting exception"); } catch (malformed_sstable_exception& e) { diff --git a/test/boost/commitlog_test.cc b/test/boost/commitlog_test.cc index 0db64c49c0..e2ca07dcb1 100644 --- a/test/boost/commitlog_test.cc +++ b/test/boost/commitlog_test.cc @@ -38,6 +38,8 @@ #include #include #include +#include + #include "utils/UUID_gen.hh" #include "test/lib/tmpdir.hh" #include "db/commitlog/commitlog.hh" @@ -647,6 +649,7 @@ SEASTAR_TEST_CASE(test_commitlog_replay_invalid_key){ { auto rd = mt.make_flat_reader(s, tests::make_permit()); + auto close_rd = deferred_close(rd); auto mopt = read_mutation_from_flat_mutation_reader(rd, db::no_timeout).get0(); BOOST_REQUIRE(mopt); diff --git a/test/boost/flat_mutation_reader_test.cc b/test/boost/flat_mutation_reader_test.cc index d17eea2897..e3e84a14af 100644 --- a/test/boost/flat_mutation_reader_test.cc +++ b/test/boost/flat_mutation_reader_test.cc @@ -23,6 +23,7 @@ #include #include #include +#include #include "mutation.hh" #include "mutation_fragment.hh" @@ -90,6 +91,7 @@ struct mock_consumer { static size_t count_fragments(mutation m) { auto r = flat_mutation_reader_from_mutations(tests::make_permit(), {m}); + auto close_reader = deferred_close(r); size_t res = 0; auto mfopt = r(db::no_timeout).get0(); while (bool(mfopt)) { @@ -105,6 +107,7 @@ SEASTAR_TEST_CASE(test_flat_mutation_reader_consume_single_partition) { size_t fragments_in_m = count_fragments(m); for (size_t depth = 1; depth <= fragments_in_m + 1; ++depth) { auto r = flat_mutation_reader_from_mutations(tests::make_permit(), {m}); + auto close_reader = deferred_close(r); auto result = r.consume(mock_consumer(*m.schema(), depth), db::no_timeout).get0(); BOOST_REQUIRE(result._consume_end_of_stream_called); BOOST_REQUIRE_EQUAL(1, result._consume_new_partition_call_count); @@ -127,12 +130,14 @@ SEASTAR_TEST_CASE(test_flat_mutation_reader_consume_two_partitions) { size_t fragments_in_m2 = count_fragments(m2); for (size_t depth = 1; depth < fragments_in_m1; ++depth) { auto r = flat_mutation_reader_from_mutations(tests::make_permit(), {m1, m2}); + auto close_r = deferred_close(r); auto result = r.consume(mock_consumer(*m1.schema(), depth), db::no_timeout).get0(); BOOST_REQUIRE(result._consume_end_of_stream_called); BOOST_REQUIRE_EQUAL(1, result._consume_new_partition_call_count); BOOST_REQUIRE_EQUAL(1, result._consume_end_of_partition_call_count); BOOST_REQUIRE_EQUAL(m1.partition().partition_tombstone() ? 1 : 0, result._consume_tombstone_call_count); auto r2 = flat_mutation_reader_from_mutations(tests::make_permit(), {m1, m2}); + auto close_r2 = deferred_close(r2); auto start = r2(db::no_timeout).get0(); BOOST_REQUIRE(start); BOOST_REQUIRE(start->is_partition_start()); @@ -144,6 +149,7 @@ SEASTAR_TEST_CASE(test_flat_mutation_reader_consume_two_partitions) { } for (size_t depth = fragments_in_m1; depth < fragments_in_m1 + fragments_in_m2 + 1; ++depth) { auto r = flat_mutation_reader_from_mutations(tests::make_permit(), {m1, m2}); + auto close_r = deferred_close(r); auto result = r.consume(mock_consumer(*m1.schema(), depth), db::no_timeout).get0(); BOOST_REQUIRE(result._consume_end_of_stream_called); BOOST_REQUIRE_EQUAL(2, result._consume_new_partition_call_count); @@ -157,6 +163,7 @@ SEASTAR_TEST_CASE(test_flat_mutation_reader_consume_two_partitions) { } BOOST_REQUIRE_EQUAL(tombstones_count, result._consume_tombstone_call_count); auto r2 = flat_mutation_reader_from_mutations(tests::make_permit(), {m1, m2}); + auto close_r2 = deferred_close(r2); auto start = r2(db::no_timeout).get0(); BOOST_REQUIRE(start); BOOST_REQUIRE(start->is_partition_start()); @@ -286,6 +293,7 @@ SEASTAR_THREAD_TEST_CASE(test_flat_mutation_reader_move_buffer_content_to) { virtual future<> next_partition() { return make_ready_future<>(); } virtual future<> fast_forward_to(const dht::partition_range&, db::timeout_clock::time_point) override { return make_ready_future<>(); } virtual future<> fast_forward_to(position_range, db::timeout_clock::time_point) override { return make_ready_future<>(); } + virtual future<> close() noexcept override { return make_ready_future<>(); }; }; simple_schema s; @@ -304,6 +312,7 @@ SEASTAR_THREAD_TEST_CASE(test_flat_mutation_reader_move_buffer_content_to) { const auto max_buffer_size = size_t{100}; auto reader = flat_mutation_reader_from_mutations(tests::make_permit(), {mut_orig}, dht::partition_range::make_open_ended_both_sides()); + auto close_reader = deferred_close(reader); auto dummy_impl = std::make_unique(s.schema(), tests::make_permit()); reader.set_max_buffer_size(max_buffer_size); @@ -338,6 +347,7 @@ SEASTAR_THREAD_TEST_CASE(test_flat_mutation_reader_move_buffer_content_to) { } auto dummy_reader = flat_mutation_reader(std::move(dummy_impl)); + auto close_dummy_reader = deferred_close(dummy_reader); auto mut_new = read_mutation_from_flat_mutation_reader(dummy_reader, db::no_timeout).get0(); assert_that(mut_new) @@ -542,21 +552,27 @@ void test_flat_stream(schema_ptr s, std::vector muts, reversed_partiti return fmr.consume_in_thread(std::move(fsc), db::no_timeout); } else { if (reversed) { - auto reverse_reader = make_reversing_reader(fmr, query::max_result_size(size_t(1) << 20)); - return reverse_reader.consume(std::move(fsc), db::no_timeout).get0(); + return with_closeable(make_reversing_reader(fmr, query::max_result_size(size_t(1) << 20)), [fsc = std::move(fsc)] (flat_mutation_reader& reverse_reader) mutable { + return reverse_reader.consume(std::move(fsc), db::no_timeout); + }).get0(); } return fmr.consume(std::move(fsc), db::no_timeout).get0(); } }; + { testlog.info("Consume all{}", reversed_msg); auto fmr = flat_mutation_reader_from_mutations(tests::make_permit(), muts); + auto close_fmr = deferred_close(fmr); auto muts2 = consume_fn(fmr, flat_stream_consumer(s, reversed)); BOOST_REQUIRE_EQUAL(muts, muts2); + } + { testlog.info("Consume first fragment from partition{}", reversed_msg); - fmr = flat_mutation_reader_from_mutations(tests::make_permit(), muts); - muts2 = consume_fn(fmr, flat_stream_consumer(s, reversed, skip_after_first_fragment::yes)); + auto fmr = flat_mutation_reader_from_mutations(tests::make_permit(), muts); + auto close_fmr = deferred_close(fmr); + auto muts2 = consume_fn(fmr, flat_stream_consumer(s, reversed, skip_after_first_fragment::yes)); BOOST_REQUIRE_EQUAL(muts.size(), muts2.size()); for (auto j = 0u; j < muts.size(); j++) { BOOST_REQUIRE(muts[j].decorated_key().equal(*muts[j].schema(), muts2[j].decorated_key())); @@ -566,13 +582,17 @@ void test_flat_stream(schema_ptr s, std::vector muts, reversed_partiti m.apply(muts2[j]); BOOST_REQUIRE_EQUAL(m, muts[j]); } + } + { testlog.info("Consume first partition{}", reversed_msg); - fmr = flat_mutation_reader_from_mutations(tests::make_permit(), muts); - muts2 = consume_fn(fmr, flat_stream_consumer(s, reversed, skip_after_first_fragment::no, + auto fmr = flat_mutation_reader_from_mutations(tests::make_permit(), muts); + auto close_fmr = deferred_close(fmr); + auto muts2 = consume_fn(fmr, flat_stream_consumer(s, reversed, skip_after_first_fragment::no, skip_after_first_partition::yes)); BOOST_REQUIRE_EQUAL(muts2.size(), 1); BOOST_REQUIRE_EQUAL(muts2[0], muts[0]); + } if (thread) { auto filter = flat_mutation_reader::filter([&] (const dht::decorated_key& dk) { @@ -584,8 +604,9 @@ void test_flat_stream(schema_ptr s, std::vector muts, reversed_partiti return true; }); testlog.info("Consume all, filtered"); - fmr = flat_mutation_reader_from_mutations(tests::make_permit(), muts); - muts2 = fmr.consume_in_thread(flat_stream_consumer(s, reversed), std::move(filter), db::no_timeout); + auto fmr = flat_mutation_reader_from_mutations(tests::make_permit(), muts); + auto close_fmr = deferred_close(fmr); + auto muts2 = fmr.consume_in_thread(flat_stream_consumer(s, reversed), std::move(filter), db::no_timeout); BOOST_REQUIRE_EQUAL(muts.size() / 2, muts2.size()); for (auto j = size_t(1); j < muts.size(); j += 2) { BOOST_REQUIRE_EQUAL(muts[j], muts2[j / 2]); @@ -674,6 +695,7 @@ SEASTAR_TEST_CASE(test_abandoned_flat_mutation_reader_from_mutation) { return seastar::async([] { for_each_mutation([&] (const mutation& m) { auto rd = flat_mutation_reader_from_mutations(tests::make_permit(), {mutation(m)}); + auto close_rd = deferred_close(rd); rd(db::no_timeout).get(); rd(db::no_timeout).get(); // We rely on AddressSanitizer telling us if nothing was leaked. @@ -724,13 +746,18 @@ SEASTAR_THREAD_TEST_CASE(test_mutation_reader_from_fragments_as_mutation_source) tracing::trace_state_ptr, streamed_mutation::forwarding fwd_sm, mutation_reader::forwarding) mutable { - std::deque fragments; - flat_mutation_reader_from_mutations(tests::make_permit(), squash_mutations(muts)).consume_pausable([&fragments] (mutation_fragment mf) { - fragments.emplace_back(std::move(mf)); - return stop_iteration::no; - }, db::no_timeout).get(); + auto get_fragments = [&muts] { + std::deque fragments; + auto rd = flat_mutation_reader_from_mutations(tests::make_permit(), squash_mutations(muts)); + auto close_rd = deferred_close(rd); + rd.consume_pausable([&fragments] (mutation_fragment mf) { + fragments.emplace_back(std::move(mf)); + return stop_iteration::no; + }, db::no_timeout).get(); + return std::move(fragments); + }; - auto rd = make_flat_mutation_reader_from_fragments(schema, tests::make_permit(), std::move(fragments), range, slice); + auto rd = make_flat_mutation_reader_from_fragments(schema, tests::make_permit(), get_fragments(), range, slice); if (fwd_sm) { return make_forwardable(std::move(rd)); } @@ -770,7 +797,11 @@ SEASTAR_THREAD_TEST_CASE(test_reverse_reader_memory_limit) { const uint64_t hard_limit = size_t(1) << 18; auto reader = flat_mutation_reader_from_mutations(tests::make_permit(), {mut}); + // need to close both readers since the reverse_reader + // doesn't own the reader passed to it by ref. + auto close_reader = deferred_close(reader); auto reverse_reader = make_reversing_reader(reader, query::max_result_size(size_t(1) << 10, hard_limit)); + auto close_reverse_reader = deferred_close(reverse_reader); try { reverse_reader.consume(phony_consumer{}, db::no_timeout).get(); diff --git a/test/boost/frozen_mutation_test.cc b/test/boost/frozen_mutation_test.cc index f0e07a411e..6f0def8298 100644 --- a/test/boost/frozen_mutation_test.cc +++ b/test/boost/frozen_mutation_test.cc @@ -25,6 +25,7 @@ #include "test/lib/test_services.hh" #include #include +#include #include "frozen_mutation.hh" #include "schema_builder.hh" @@ -108,6 +109,7 @@ SEASTAR_THREAD_TEST_CASE(test_frozen_mutation_fragment) { auto& s = *m.schema(); std::vector mfs; auto rd = flat_mutation_reader_from_mutations(tests::make_permit(), { m }); + auto close_rd = deferred_close(rd); rd.consume_pausable([&] (mutation_fragment mf) { mfs.emplace_back(std::move(mf)); return stop_iteration::no; diff --git a/test/boost/memtable_test.cc b/test/boost/memtable_test.cc index 3e686bb5a3..476eedc7d3 100644 --- a/test/boost/memtable_test.cc +++ b/test/boost/memtable_test.cc @@ -27,6 +27,7 @@ #include #include #include "schema_builder.hh" +#include #include #include "memtable.hh" @@ -89,9 +90,17 @@ SEASTAR_TEST_CASE(test_memtable_with_many_versions_conforms_to_mutation_source) return seastar::async([] { lw_shared_ptr mt; std::vector readers; + auto clear_readers = [&readers] { + parallel_for_each(readers, [] (flat_mutation_reader& rd) { + return rd.close(); + }).finally([&readers] { + readers.clear(); + }).get(); + }; + auto cleanup_readers = defer([&] { clear_readers(); }); std::deque ranges_storage; run_mutation_source_tests([&] (schema_ptr s, const std::vector& muts) { - readers.clear(); + clear_readers(); mt = make_lw_shared(s); for (auto&& m : muts) { @@ -100,7 +109,7 @@ SEASTAR_TEST_CASE(test_memtable_with_many_versions_conforms_to_mutation_source) flat_mutation_reader rd = mt->make_flat_reader(s, tests::make_permit(), ranges_storage.emplace_back(dht::partition_range::make_singular(m.decorated_key()))); rd.set_max_buffer_size(1); rd.fill_buffer(db::no_timeout).get(); - readers.push_back(std::move(rd)); + readers.emplace_back(std::move(rd)); } return mt->as_data_source(); @@ -251,6 +260,7 @@ SEASTAR_TEST_CASE(test_virtual_dirty_accounting_on_flush) { // Create a reader which will cause many partition versions to be created flat_mutation_reader_opt rd1 = mt->make_flat_reader(s, tests::make_permit()); + auto close_rd1 = deferred_close(*rd1); rd1->set_max_buffer_size(1); rd1->fill_buffer(db::no_timeout).get(); @@ -273,7 +283,7 @@ SEASTAR_TEST_CASE(test_virtual_dirty_accounting_on_flush) { virtual_dirty_values.push_back(mgr.virtual_dirty_memory()); while ((*rd1)(db::no_timeout).get0()) ; - rd1 = {}; + close_rd1.close_now(); logalloc::shard_tracker().full_compaction(); @@ -385,6 +395,7 @@ SEASTAR_TEST_CASE(test_segment_migration_during_flush) { virtual_dirty_values.push_back(mgr.virtual_dirty_memory()); auto rd = mt->make_flush_reader(s, service::get_local_priority_manager().memtable_flush_priority()); + auto close_rd = deferred_close(rd); for (int i = 0; i < partitions; ++i) { auto mfopt = rd(db::no_timeout).get0(); @@ -504,6 +515,7 @@ SEASTAR_TEST_CASE(test_hash_is_cached) { { auto rd = mt->make_flat_reader(s, tests::make_permit()); + auto close_rd = deferred_close(rd); rd(db::no_timeout).get0()->as_partition_start(); clustering_row row = std::move(*rd(db::no_timeout).get0()).as_clustering_row(); BOOST_REQUIRE(!row.cells().cell_hash_for(0)); @@ -513,6 +525,7 @@ SEASTAR_TEST_CASE(test_hash_is_cached) { auto slice = s->full_slice(); slice.options.set(); auto rd = mt->make_flat_reader(s, tests::make_permit(), query::full_partition_range, slice); + auto close_rd = deferred_close(rd); rd(db::no_timeout).get0()->as_partition_start(); clustering_row row = std::move(*rd(db::no_timeout).get0()).as_clustering_row(); BOOST_REQUIRE(row.cells().cell_hash_for(0)); @@ -520,6 +533,7 @@ SEASTAR_TEST_CASE(test_hash_is_cached) { { auto rd = mt->make_flat_reader(s, tests::make_permit()); + auto close_rd = deferred_close(rd); rd(db::no_timeout).get0()->as_partition_start(); clustering_row row = std::move(*rd(db::no_timeout).get0()).as_clustering_row(); BOOST_REQUIRE(row.cells().cell_hash_for(0)); @@ -530,6 +544,7 @@ SEASTAR_TEST_CASE(test_hash_is_cached) { { auto rd = mt->make_flat_reader(s, tests::make_permit()); + auto close_rd = deferred_close(rd); rd(db::no_timeout).get0()->as_partition_start(); clustering_row row = std::move(*rd(db::no_timeout).get0()).as_clustering_row(); BOOST_REQUIRE(!row.cells().cell_hash_for(0)); @@ -539,6 +554,7 @@ SEASTAR_TEST_CASE(test_hash_is_cached) { auto slice = s->full_slice(); slice.options.set(); auto rd = mt->make_flat_reader(s, tests::make_permit(), query::full_partition_range, slice); + auto close_rd = deferred_close(rd); rd(db::no_timeout).get0()->as_partition_start(); clustering_row row = std::move(*rd(db::no_timeout).get0()).as_clustering_row(); BOOST_REQUIRE(row.cells().cell_hash_for(0)); @@ -546,6 +562,7 @@ SEASTAR_TEST_CASE(test_hash_is_cached) { { auto rd = mt->make_flat_reader(s, tests::make_permit()); + auto close_rd = deferred_close(rd); rd(db::no_timeout).get0()->as_partition_start(); clustering_row row = std::move(*rd(db::no_timeout).get0()).as_clustering_row(); BOOST_REQUIRE(row.cells().cell_hash_for(0)); diff --git a/test/boost/multishard_mutation_query_test.cc b/test/boost/multishard_mutation_query_test.cc index 7cbeddaae3..f692647864 100644 --- a/test/boost/multishard_mutation_query_test.cc +++ b/test/boost/multishard_mutation_query_test.cc @@ -295,7 +295,7 @@ SEASTAR_THREAD_TEST_CASE(test_evict_a_shard_reader_on_each_page) { check_cache_population(env.db(), 1); env.db().invoke_on(page % smp::count, [&] (database& db) { - db.get_querier_cache().evict_one(); + return db.get_querier_cache().evict_one(); }).get(); tests::require_equal(aggregate_querier_cache_stat(env.db(), &query::querier_cache::stats::drops), 0u); diff --git a/test/boost/mutation_fragment_test.cc b/test/boost/mutation_fragment_test.cc index f1e1d37ba0..a68fe1bb17 100644 --- a/test/boost/mutation_fragment_test.cc +++ b/test/boost/mutation_fragment_test.cc @@ -22,6 +22,8 @@ #include #include +#include +#include #include "test/lib/mutation_source_test.hh" #include "mutation_fragment.hh" @@ -112,6 +114,7 @@ SEASTAR_TEST_CASE(test_mutation_merger_conforms_to_mutation_source) { muts.push_back(mutation(m.schema(), m.decorated_key())); } auto rd = flat_mutation_reader_from_mutations(tests::make_permit(), {m}); + auto close_rd = deferred_close(rd); rd.consume(fragment_scatterer{muts}, db::no_timeout).get(); for (int i = 0; i < n; ++i) { memtables[i]->apply(std::move(muts[i])); @@ -398,6 +401,7 @@ SEASTAR_TEST_CASE(test_schema_upgrader_is_equivalent_with_mutation_upgrade) { // upgrade m1 to m2's schema auto reader = transform(flat_mutation_reader_from_mutations(tests::make_permit(), {m1}), schema_upgrader(m2.schema())); + auto close_reader = deferred_close(reader); auto from_upgrader = read_mutation_from_flat_mutation_reader(reader, db::no_timeout).get0(); auto regular = m1; @@ -409,12 +413,13 @@ SEASTAR_TEST_CASE(test_schema_upgrader_is_equivalent_with_mutation_upgrade) { }); } -SEASTAR_TEST_CASE(test_mutation_fragment_mutate_exception_safety) { +SEASTAR_THREAD_TEST_CASE(test_mutation_fragment_mutate_exception_safety) { struct dummy_exception { }; simple_schema s; reader_concurrency_semaphore sem(1, 100, get_name()); + auto stop_sem = deferred_stop(sem); auto permit = sem.make_permit(s.schema().get(), get_name()); const auto available_res = sem.available_resources(); @@ -467,6 +472,4 @@ SEASTAR_TEST_CASE(test_mutation_fragment_mutate_exception_safety) { } catch (dummy_exception&) { } BOOST_REQUIRE(available_res == sem.available_resources()); } - - return make_ready_future<>(); } diff --git a/test/boost/mutation_query_test.cc b/test/boost/mutation_query_test.cc index 47c041c3c5..a415089ee7 100644 --- a/test/boost/mutation_query_test.cc +++ b/test/boost/mutation_query_test.cc @@ -90,6 +90,7 @@ static reconcilable_result mutation_query(schema_ptr s, const mutation_source& s uint64_t row_limit, uint32_t partition_limit, gc_clock::time_point query_time) { auto querier = query::mutation_querier(source, s, tests::make_permit(), range, slice, service::get_local_sstable_query_read_priority(), {}); + auto close_querier = deferred_close(querier); auto rrb = reconcilable_result_builder(*s, slice, make_accounter()); return querier.consume_page(std::move(rrb), row_limit, partition_limit, query_time, db::no_timeout, query::max_result_size(std::numeric_limits::max())).get(); @@ -538,6 +539,7 @@ SEASTAR_TEST_CASE(test_partition_limit) { static void data_query(schema_ptr s, const mutation_source& source, const dht::partition_range& range, const query::partition_slice& slice, query::result::builder& builder) { auto querier = query::data_querier(source, s, tests::make_permit(), range, slice, service::get_local_sstable_query_read_priority(), {}); + auto close_querier = deferred_close(querier); auto qrb = query_result_builder(*s, builder); querier.consume_page(std::move(qrb), std::numeric_limits::max(), std::numeric_limits::max(), gc_clock::now(), db::no_timeout, query::max_result_size(std::numeric_limits::max())).get(); diff --git a/test/boost/mutation_reader_test.cc b/test/boost/mutation_reader_test.cc index 35ccde109b..ec1846c280 100644 --- a/test/boost/mutation_reader_test.cc +++ b/test/boost/mutation_reader_test.cc @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -896,12 +897,13 @@ sstables::shared_sstable create_sstable(sstables::test_env& env, simple_schema& , mutations); } -SEASTAR_TEST_CASE(test_reader_concurrency_semaphore_clear_inactive_reads) { +SEASTAR_THREAD_TEST_CASE(test_reader_concurrency_semaphore_clear_inactive_reads) { simple_schema s; std::vector handles; { reader_concurrency_semaphore semaphore(reader_concurrency_semaphore::no_limits{}, get_name()); + auto stop_sem = deferred_stop(semaphore); for (int i = 0; i < 10; ++i) { handles.emplace_back(semaphore.register_inactive_read(make_empty_flat_reader(s.schema(), semaphore.make_permit(s.schema().get(), get_name())))); @@ -924,14 +926,13 @@ SEASTAR_TEST_CASE(test_reader_concurrency_semaphore_clear_inactive_reads) { // Check that the destructor also clears inactive reads. BOOST_REQUIRE(std::all_of(handles.begin(), handles.end(), [] (const reader_concurrency_semaphore::inactive_read_handle& handle) { return !bool(handle); })); - - return make_ready_future<>(); } SEASTAR_THREAD_TEST_CASE(test_reader_concurrency_semaphore_destroyed_permit_releases_units) { simple_schema s; const auto initial_resources = reader_concurrency_semaphore::resources{10, 1024 * 1024}; reader_concurrency_semaphore semaphore(initial_resources.count, initial_resources.memory, get_name()); + auto stop_sem = deferred_stop(semaphore); // Not admitted, active { @@ -1028,6 +1029,10 @@ public: return make_exception_future<>(make_backtraced_exception_ptr()); } + future<> close() noexcept { + return _reader.close(); + } + std::size_t call_count() const { return _call_count; } @@ -1063,6 +1068,7 @@ public: return flat_mutation_reader(std::move(tracker_ptr)); }); + _reader.close().get(); _reader = make_restricted_flat_reader(std::move(ms), schema, semaphore.make_permit(schema.get(), "reader-wrapper")); } @@ -1074,6 +1080,14 @@ public: : reader_wrapper(semaphore, std::move(schema), std::move(sst), db::timeout_clock::now() + timeout_duration) { } + reader_wrapper(const reader_wrapper&) = delete; + reader_wrapper(reader_wrapper&&) = default; + + // must be called in a seastar thread. + ~reader_wrapper() { + _reader.close().get(); + } + future<> operator()() { while (!_reader.is_buffer_empty()) { _reader.pop_mutation_fragment(); @@ -1096,6 +1110,10 @@ public: bool created() const { return bool(_tracker); } + + future<> close() noexcept { + return _reader.close(); + } }; class dummy_file_impl : public file_impl { @@ -1159,6 +1177,7 @@ class dummy_file_impl : public file_impl { SEASTAR_TEST_CASE(reader_restriction_file_tracking) { return async([&] { reader_concurrency_semaphore semaphore(100, 4 * 1024, get_name()); + auto stop_sem = deferred_stop(semaphore); auto permit = semaphore.make_permit(nullptr, get_name()); permit.wait_admission(0, db::no_timeout).get(); @@ -1216,6 +1235,7 @@ SEASTAR_TEST_CASE(restricted_reader_reading) { return sstables::test_env::do_with_async([&] (sstables::test_env& env) { storage_service_for_tests ssft; reader_concurrency_semaphore semaphore(2, new_reader_base_cost, get_name()); + auto stop_sem = deferred_stop(semaphore); { simple_schema s; @@ -1245,6 +1265,7 @@ SEASTAR_TEST_CASE(restricted_reader_reading) { BOOST_REQUIRE_EQUAL(semaphore.waiters(), 2); // Move reader1 to the heap, so that we can safely destroy it. + reader1.close().get(); auto reader1_ptr = std::make_unique(std::move(reader1)); reader1_ptr.reset(); @@ -1258,6 +1279,7 @@ SEASTAR_TEST_CASE(restricted_reader_reading) { BOOST_REQUIRE_EQUAL(semaphore.waiters(), 1); // Move reader2 to the heap, so that we can safely destroy it. + reader2.close().get(); auto reader2_ptr = std::make_unique(std::move(reader2)); reader2_ptr.reset(); @@ -1276,6 +1298,7 @@ SEASTAR_TEST_CASE(restricted_reader_reading) { BOOST_REQUIRE_EQUAL(reader3.call_count(), 2); read3_fut.get(); } + reader3.close().get(); } // All units should have been deposited back. @@ -1287,6 +1310,7 @@ SEASTAR_TEST_CASE(restricted_reader_timeout) { return sstables::test_env::do_with_async([&] (sstables::test_env& env) { storage_service_for_tests ssft; reader_concurrency_semaphore semaphore(2, new_reader_base_cost, get_name()); + auto stop_sem = deferred_stop(semaphore); { simple_schema s; @@ -1315,10 +1339,15 @@ SEASTAR_TEST_CASE(restricted_reader_timeout) { } else { // We need special cleanup when the test failed to avoid invalid // memory access. + reader1->close().get(); reader1.reset(); BOOST_CHECK(eventually_true([&] { return read2_fut.available(); })); + + reader2->close().get(); reader2.reset(); BOOST_CHECK(eventually_true([&] { return read3_fut.available(); })); + + reader3->close().get(); reader3.reset(); } } @@ -1333,6 +1362,7 @@ SEASTAR_TEST_CASE(restricted_reader_max_queue_length) { storage_service_for_tests ssft; reader_concurrency_semaphore semaphore(2, new_reader_base_cost, get_name(), 2); + auto stop_sem = deferred_stop(semaphore); { simple_schema s; @@ -1354,11 +1384,15 @@ SEASTAR_TEST_CASE(restricted_reader_max_queue_length) { // The queue should now be full. BOOST_REQUIRE_THROW(reader4().get(), std::runtime_error); + reader4.close().get(); + reader1_ptr->close().get(); reader1_ptr.reset(); read2_fut.get(); + reader2_ptr->close().get(); reader2_ptr.reset(); read3_fut.get(); + reader3_ptr->close().get(); } REQUIRE_EVENTUALLY_EQUAL(new_reader_base_cost, semaphore.available_resources().memory); @@ -1369,6 +1403,7 @@ SEASTAR_TEST_CASE(restricted_reader_create_reader) { return sstables::test_env::do_with_async([&] (sstables::test_env& env) { storage_service_for_tests ssft; reader_concurrency_semaphore semaphore(100, new_reader_base_cost, get_name()); + auto stop_sem = deferred_stop(semaphore); { simple_schema s; @@ -1377,6 +1412,7 @@ SEASTAR_TEST_CASE(restricted_reader_create_reader) { { auto reader = reader_wrapper(semaphore, s.schema(), sst); + auto close_reader = deferred_close(reader); // This fast-forward is stupid, I know but the // underlying dummy reader won't care, so it's fine. reader.fast_forward_to(query::full_partition_range).get(); @@ -1388,6 +1424,7 @@ SEASTAR_TEST_CASE(restricted_reader_create_reader) { { auto reader = reader_wrapper(semaphore, s.schema(), sst); + auto close_reader = deferred_close(reader); reader().get(); BOOST_REQUIRE(reader.created()); @@ -1469,6 +1506,7 @@ SEASTAR_TEST_CASE(test_fast_forwarding_combined_reader_is_consistent_with_slicin flat_mutation_reader rd = make_combined_reader(s, tests::make_permit(), std::move(readers), streamed_mutation::forwarding::yes, mutation_reader::forwarding::yes); + auto close_rd = deferred_close(rd); std::vector ranges = gen.make_random_ranges(3); @@ -1539,6 +1577,7 @@ SEASTAR_TEST_CASE(test_combined_reader_slicing_with_overlapping_range_tombstones auto rd = make_combined_reader(s, tests::make_permit(), std::move(readers), streamed_mutation::forwarding::no, mutation_reader::forwarding::no); + auto close_rd = deferred_close(rd); auto prange = position_range(range); mutation result(m1.schema(), m1.decorated_key()); @@ -1564,6 +1603,7 @@ SEASTAR_TEST_CASE(test_combined_reader_slicing_with_overlapping_range_tombstones auto rd = make_combined_reader(s, tests::make_permit(), std::move(readers), streamed_mutation::forwarding::yes, mutation_reader::forwarding::no); + auto close_rd = deferred_close(rd); auto prange = position_range(range); mutation result(m1.schema(), m1.decorated_key()); @@ -1609,7 +1649,9 @@ SEASTAR_TEST_CASE(test_combined_mutation_source_is_a_mutation_source) { int source_index = 0; for (auto&& m : muts) { - flat_mutation_reader_from_mutations(tests::make_permit(), {m}).consume_pausable([&] (mutation_fragment&& mf) { + auto rd = flat_mutation_reader_from_mutations(tests::make_permit(), {m}); + auto close_rd = deferred_close(rd); + rd.consume_pausable([&] (mutation_fragment&& mf) { mutation mf_m(m.schema(), m.decorated_key()); mf_m.partition().apply(*s, mf); memtables[source_index++ % memtables.size()]->apply(mf_m); @@ -2172,27 +2214,30 @@ public: virtual future<> fast_forward_to(position_range, db::timeout_clock::time_point) override { return make_exception_future<>(make_backtraced_exception_ptr()); } + virtual future<> close() noexcept override { + return make_ready_future<>(); + }; }; -// Test a background pending read-ahead outliving the reader. +// Test a background pending read-ahead. // // Foreign reader launches a new background read-ahead (fill_buffer()) after // each remote operation (fill_buffer() and fast_forward_to()) is completed. // This read-ahead executes on the background and is only synchronized with // when a next remote operation is executed. If the reader is destroyed before // this synchronization can happen then the remote read-ahead will outlive its -// owner. Check that when this happens the orphan read-ahead will terminate -// gracefully and will not cause any memory errors. +// owner. Check that when the reader is closed, it waits on any background +// readhead to complete gracefully and will not cause any memory errors. // // Theory of operation: // 1) Call foreign_reader::fill_buffer() -> will start read-ahead in the // background; // 2) [shard 1] puppet_reader blocks the read-ahead; -// 3) Destroy foreign_reader; +// 3) Start closing the foreign_reader; // 4) Unblock read-ahead -> the now orphan read-ahead fiber executes; // // Best run with smp >= 2 -SEASTAR_THREAD_TEST_CASE(test_foreign_reader_destroyed_with_pending_read_ahead) { +SEASTAR_THREAD_TEST_CASE(test_stopping_reader_with_pending_read_ahead) { if (smp::count < 2) { std::cerr << "Cannot run test " << get_name() << " with smp::count < 2" << std::endl; return; @@ -2217,27 +2262,33 @@ SEASTAR_THREAD_TEST_CASE(test_foreign_reader_destroyed_with_pending_read_ahead) auto& remote_control = std::get<0>(remote_control_remote_reader); auto& remote_reader = std::get<1>(remote_control_remote_reader); - { auto reader = make_foreign_reader(s.schema(), tests::make_permit(), std::move(remote_reader)); reader.fill_buffer(db::no_timeout).get(); BOOST_REQUIRE(!reader.is_buffer_empty()); - } BOOST_REQUIRE(!smp::submit_to(shard_of_interest, [remote_control = remote_control.get()] { return remote_control->destroyed; }).get0()); - smp::submit_to(shard_of_interest, [remote_control = remote_control.get()] { + bool buffer_filled = false; + auto destroyed_after_close = reader.close().then([&] { + // close shuold wait on readahead and complete + // only after `remote_control->buffer_filled.set_value()` + // is executed below. + BOOST_REQUIRE(buffer_filled); + return smp::submit_to(shard_of_interest, [remote_control = remote_control.get()] { + return remote_control->destroyed; + }); + }); + + smp::submit_to(shard_of_interest, [remote_control = remote_control.get(), &buffer_filled] { + buffer_filled = true; remote_control->buffer_filled.set_value(); }).get0(); - BOOST_REQUIRE(eventually_true([&] { - return smp::submit_to(shard_of_interest, [remote_control = remote_control.get()] { - return remote_control->destroyed; - }).get0(); - })); + BOOST_REQUIRE(destroyed_after_close.get0()); return make_ready_future<>(); }).get(); @@ -2490,7 +2541,8 @@ SEASTAR_THREAD_TEST_CASE(test_multishard_combining_reader_destroyed_with_pending })); // Destroy reader. - reader = flat_mutation_reader(nullptr); + testlog.debug("Starting to close the reader"); + auto fut = reader.close(); parallel_for_each(boost::irange(0u, smp::count), [&remote_controls] (unsigned shard) mutable { return smp::submit_to(shard, [control = remote_controls.at(shard).get()] { @@ -2498,6 +2550,9 @@ SEASTAR_THREAD_TEST_CASE(test_multishard_combining_reader_destroyed_with_pending }); }).get(); + fut.get(); + testlog.debug("Reader is closed"); + BOOST_REQUIRE(eventually_true([&] { return map_reduce(boost::irange(0u, smp::count), [&] (unsigned shard) { return smp::submit_to(shard, [&remote_controls, shard] { @@ -2575,7 +2630,7 @@ SEASTAR_THREAD_TEST_CASE(test_multishard_combining_reader_fast_forwarded_with_pe BOOST_REQUIRE(reader.is_buffer_empty()); BOOST_REQUIRE(reader.is_end_of_stream()); - reader = flat_mutation_reader(nullptr); + reader.close().get(); BOOST_REQUIRE(eventually_true([&] { return map_reduce(boost::irange(0u, smp::count), [&] (unsigned shard) { @@ -2720,6 +2775,7 @@ SEASTAR_THREAD_TEST_CASE(test_multishard_combining_reader_non_strictly_monotonic { auto fragments = make_fragments_with_non_monotonic_positions(s, s.make_pkey(pk), max_buffer_size, tombstone_deletion_time); auto rd = make_flat_mutation_reader_from_fragments(s.schema(), tests::make_permit(), std::move(fragments)); + auto close_rd = deferred_close(rd); rd.set_max_buffer_size(max_buffer_size); rd.fill_buffer(db::no_timeout).get(); @@ -2778,6 +2834,7 @@ SEASTAR_THREAD_TEST_CASE(test_multishard_combining_reader_non_strictly_monotonic auto fragments = make_fragments_with_non_monotonic_positions(s, s.make_pkey(pk), max_buffer_size, tombstone_deletion_time); auto rd = make_flat_mutation_reader_from_fragments(s.schema(), tests::make_permit(), std::move(fragments)); + auto close_rd = deferred_close(rd); auto mut_opt = read_mutation_from_flat_mutation_reader(rd, db::no_timeout).get0(); BOOST_REQUIRE(mut_opt); @@ -2836,6 +2893,7 @@ SEASTAR_THREAD_TEST_CASE(test_multishard_streaming_reader) { } return std::nullopt; }); + auto close_tested_reader = deferred_close(tested_reader); auto reader_factory = [db = &env.db()] ( schema_ptr s, @@ -2854,6 +2912,7 @@ SEASTAR_THREAD_TEST_CASE(test_multishard_streaming_reader) { [&remote_partitioner] (const dht::decorated_key& pkey) { return remote_partitioner.shard_of(pkey.token()) == 0; }); + auto close_reference_reader = deferred_close(reference_reader); std::vector reference_muts; while (auto mut_opt = read_mutation_from_flat_mutation_reader(reference_reader, db::no_timeout).get0()) { @@ -2886,6 +2945,7 @@ SEASTAR_THREAD_TEST_CASE(test_queue_reader) { { auto read_all = [] (flat_mutation_reader& reader, std::vector& muts) { return async([&reader, &muts] { + auto close_reader = deferred_close(reader); while (auto mut_opt = read_mutation_from_flat_mutation_reader(reader, db::no_timeout).get0()) { muts.emplace_back(std::move(*mut_opt)); } @@ -2895,6 +2955,7 @@ SEASTAR_THREAD_TEST_CASE(test_queue_reader) { auto write_all = [] (queue_reader_handle& handle, const std::vector& muts) { return async([&] { auto reader = flat_mutation_reader_from_mutations(tests::make_permit(), muts); + auto close_reader = deferred_close(reader); while (auto mf_opt = reader(db::no_timeout).get0()) { handle.push(std::move(*mf_opt)).get(); } @@ -2905,8 +2966,10 @@ SEASTAR_THREAD_TEST_CASE(test_queue_reader) { auto actual_muts = std::vector{}; actual_muts.reserve(20); - auto [reader, handle] = make_queue_reader(gen.schema(), tests::make_permit()); - + auto p = make_queue_reader(gen.schema(), tests::make_permit()); + auto& reader = std::get<0>(p); + auto& handle = std::get<1>(p); + auto close_reader = deferred_close(reader); when_all_succeed(read_all(reader, actual_muts), write_all(handle, expected_muts)).get(); BOOST_REQUIRE_EQUAL(actual_muts.size(), expected_muts.size()); for (size_t i = 0; i < expected_muts.size(); ++i) { @@ -2916,10 +2979,14 @@ SEASTAR_THREAD_TEST_CASE(test_queue_reader) { // abort() -- check that consumer is aborted { - auto [reader, handle] = make_queue_reader(gen.schema(), tests::make_permit()); + auto p = make_queue_reader(gen.schema(), tests::make_permit()); + auto& reader = std::get<0>(p); + auto& handle = std::get<1>(p); + auto close_reader = deferred_close(reader); auto fill_buffer_fut = reader.fill_buffer(db::no_timeout); auto expected_reader = flat_mutation_reader_from_mutations(tests::make_permit(), expected_muts); + auto close_expected_reader = deferred_close(expected_reader); handle.push(std::move(*expected_reader(db::no_timeout).get0())).get(); @@ -2934,10 +3001,14 @@ SEASTAR_THREAD_TEST_CASE(test_queue_reader) { // abort() -- check that producer is aborted { - auto [reader, handle] = make_queue_reader(gen.schema(), tests::make_permit()); + auto p = make_queue_reader(gen.schema(), tests::make_permit()); + auto& reader = std::get<0>(p); + auto& handle = std::get<1>(p); + auto close_reader = deferred_close(reader); reader.set_max_buffer_size(1); auto expected_reader = flat_mutation_reader_from_mutations(tests::make_permit(), expected_muts); + auto close_expected_reader = deferred_close(expected_reader); auto push_fut = make_ready_future<>(); while (push_fut.available()) { @@ -2955,11 +3026,14 @@ SEASTAR_THREAD_TEST_CASE(test_queue_reader) { // Detached handle { - auto [reader, handle] = make_queue_reader(gen.schema(), tests::make_permit()); + auto p = make_queue_reader(gen.schema(), tests::make_permit()); + auto& reader = std::get<0>(p); + auto& handle = std::get<1>(p); auto fill_buffer_fut = reader.fill_buffer(db::no_timeout); { auto throwaway_reader = std::move(reader); + throwaway_reader.close().get(); } BOOST_REQUIRE_THROW(handle.push(mutation_fragment(*gen.schema(), tests::make_permit(), partition_end{})).get(), std::runtime_error); @@ -2969,17 +3043,24 @@ SEASTAR_THREAD_TEST_CASE(test_queue_reader) { // Abandoned handle aborts, move-assignment { - auto [reader, handle] = make_queue_reader(gen.schema(), tests::make_permit()); + auto p = make_queue_reader(gen.schema(), tests::make_permit()); + auto& reader = std::get<0>(p); + auto& handle = std::get<1>(p); + auto close_reader = deferred_close(reader); auto fill_buffer_fut = reader.fill_buffer(db::no_timeout); auto expected_reader = flat_mutation_reader_from_mutations(tests::make_permit(), expected_muts); + auto close_expected_reader = deferred_close(expected_reader); handle.push(std::move(*expected_reader(db::no_timeout).get0())).get(); BOOST_REQUIRE(!fill_buffer_fut.available()); { - auto [throwaway_reader, throwaway_handle] = make_queue_reader(gen.schema(), tests::make_permit()); + auto p = make_queue_reader(gen.schema(), tests::make_permit()); + auto& throwaway_reader = std::get<0>(p); + auto& throwaway_handle = std::get<1>(p); + auto close_throwaway_reader = deferred_close(throwaway_reader); // Overwrite handle handle = std::move(throwaway_handle); } @@ -2990,10 +3071,14 @@ SEASTAR_THREAD_TEST_CASE(test_queue_reader) { // Abandoned handle aborts, destructor { - auto [reader, handle] = make_queue_reader(gen.schema(), tests::make_permit()); + auto p = make_queue_reader(gen.schema(), tests::make_permit()); + auto& reader = std::get<0>(p); + auto& handle = std::get<1>(p); + auto close_reader = deferred_close(reader); auto fill_buffer_fut = reader.fill_buffer(db::no_timeout); auto expected_reader = flat_mutation_reader_from_mutations(tests::make_permit(), expected_muts); + auto close_expected_reader = deferred_close(expected_reader); handle.push(std::move(*expected_reader(db::no_timeout).get0())).get(); @@ -3010,13 +3095,23 @@ SEASTAR_THREAD_TEST_CASE(test_queue_reader) { // Life-cycle, relies on ASAN for error reporting { - auto [reader, handle] = make_queue_reader(gen.schema(), tests::make_permit()); + auto p = make_queue_reader(gen.schema(), tests::make_permit()); + auto& reader = std::get<0>(p); + auto& handle = std::get<1>(p); + auto close_reader = deferred_close(reader); { - auto [throwaway_reader, throwaway_handle] = make_queue_reader(gen.schema(), tests::make_permit()); + auto throwaway_p = make_queue_reader(gen.schema(), tests::make_permit()); + auto& throwaway_reader = std::get<0>(throwaway_p); + auto& throwaway_handle = std::get<1>(throwaway_p); + auto close_throwaway_reader = deferred_close(throwaway_reader); // Overwrite handle handle = std::move(throwaway_handle); - auto [another_throwaway_reader, another_throwaway_handle] = make_queue_reader(gen.schema(), tests::make_permit()); + auto another_throwaway_p = make_queue_reader(gen.schema(), tests::make_permit()); + auto& another_throwaway_reader = std::get<0>(another_throwaway_p); + auto& another_throwaway_handle = std::get<1>(another_throwaway_p); + auto close_another_throwaway_reader = deferred_close(another_throwaway_reader); + // Overwrite with moved-from handle (move assignment operator) another_throwaway_handle = std::move(throwaway_handle); @@ -3027,7 +3122,10 @@ SEASTAR_THREAD_TEST_CASE(test_queue_reader) { // push_end_of_stream() detaches handle from reader, relies on ASAN for error reporting { - auto [reader, handle] = make_queue_reader(gen.schema(), tests::make_permit()); + auto p = make_queue_reader(gen.schema(), tests::make_permit()); + auto& reader = std::get<0>(p); + auto& handle = std::get<1>(p); + auto close_reader = deferred_close(reader); { auto throwaway_handle = std::move(handle); throwaway_handle.push_end_of_stream(); @@ -3198,6 +3296,9 @@ SEASTAR_THREAD_TEST_CASE(test_manual_paused_evictable_reader_is_mutation_source) virtual future<> fast_forward_to(position_range pr, db::timeout_clock::time_point timeout) override { throw_with_backtrace(); } + virtual future<> close() noexcept override { + return _reader.close(); + } }; auto make_populate = [] (schema_ptr s, const std::vector& mutations, gc_clock::time_point query_time) { @@ -3335,6 +3436,7 @@ flat_mutation_reader create_evictable_reader_and_evict_after_first_buffer( SEASTAR_THREAD_TEST_CASE(test_evictable_reader_trim_range_tombstones) { reader_concurrency_semaphore semaphore(reader_concurrency_semaphore::no_limits{}, get_name()); + auto stop_sem = deferred_stop(semaphore); simple_schema s; auto permit = semaphore.make_permit(s.schema().get(), get_name()); @@ -3369,6 +3471,7 @@ SEASTAR_THREAD_TEST_CASE(test_evictable_reader_trim_range_tombstones) { auto rd = create_evictable_reader_and_evict_after_first_buffer(s.schema(), permit, query::full_partition_range, s.schema()->full_slice(), std::move(first_buffer), last_fragment_position, std::move(second_buffer), max_buffer_size); + auto close_rd = deferred_close(rd); rd.fill_buffer(db::no_timeout).get(); @@ -3395,6 +3498,7 @@ void check_evictable_reader_validation_is_triggered( auto rd = create_evictable_reader_and_evict_after_first_buffer(std::move(schema), std::move(permit), prange, slice, std::move(first_buffer), last_fragment_position, std::move(second_buffer), max_buffer_size); + auto close_rd = deferred_close(rd); const bool fail = !error_prefix.empty(); @@ -3426,6 +3530,7 @@ SEASTAR_THREAD_TEST_CASE(test_evictable_reader_self_validation) { }); reader_concurrency_semaphore semaphore(reader_concurrency_semaphore::no_limits{}, get_name()); + auto stop_sem = deferred_stop(semaphore); simple_schema s; auto permit = semaphore.make_permit(s.schema().get(), get_name()); @@ -3777,9 +3882,13 @@ SEASTAR_THREAD_TEST_CASE(test_evictable_reader_recreate_before_fast_forward_to) virtual future<> fast_forward_to(position_range, db::timeout_clock::time_point) override { return make_exception_future<>(make_backtraced_exception_ptr()); } + virtual future<> close() noexcept override { + return make_ready_future<>(); + }; }; reader_concurrency_semaphore semaphore(reader_concurrency_semaphore::no_limits{}, get_name()); + auto stop_sem = deferred_stop(semaphore); simple_schema s; auto permit = semaphore.make_permit(s.schema().get(), get_name()); auto pkeys = s.make_pkeys(6); @@ -4175,6 +4284,12 @@ SEASTAR_THREAD_TEST_CASE(clustering_combined_reader_mutation_source_test) { return _it->second.fast_forward_to(std::move(pr), timeout); } + virtual future<> close() noexcept override { + return parallel_for_each(_readers, [] (auto& p) { + return p.second.close(); + }); + } + future next_fragment(db::timeout_clock::time_point timeout) { if (_it == _readers.end() || _range.get().after(_it->first, dht::ring_position_comparator(*_schema))) { co_return mutation_fragment_opt{}; @@ -4207,7 +4322,9 @@ SEASTAR_THREAD_TEST_CASE(clustering_combined_reader_mutation_source_test) { std::optional> bounds; mutation good(m.schema(), dk); std::optional bad; - flat_mutation_reader_from_mutations(tests::make_permit(), {m}).consume_pausable([&] (mutation_fragment&& mf) { + auto rd = flat_mutation_reader_from_mutations(tests::make_permit(), {m}); + auto close_rd = deferred_close(rd); + rd.consume_pausable([&] (mutation_fragment&& mf) { if ((mf.is_partition_start() && mf.as_partition_start().partition_tombstone()) || mf.is_static_row()) { if (!bad) { bad = mutation{m.schema(), dk}; @@ -4292,6 +4409,7 @@ SEASTAR_THREAD_TEST_CASE(test_clustering_combining_of_empty_readers) { auto r = make_clustering_combined_reader( s, tests::make_permit(), streamed_mutation::forwarding::no, std::make_unique(*s, std::move(rs))); + auto close_r = deferred_close(r); auto mf = r(db::no_timeout).get0(); if (mf) { diff --git a/test/boost/mutation_test.cc b/test/boost/mutation_test.cc index 52c6713773..4cc2658941 100644 --- a/test/boost/mutation_test.cc +++ b/test/boost/mutation_test.cc @@ -32,6 +32,7 @@ #include #include #include +#include #include "database.hh" #include "utils/UUID_gen.hh" @@ -93,6 +94,7 @@ static mutation_partition get_partition(memtable& mt, const partition_key& key) auto dk = dht::decorate_key(*mt.schema(), key); auto range = dht::partition_range::make_singular(dk); auto reader = mt.make_flat_reader(mt.schema(), tests::make_permit(), range); + auto close_reader = deferred_close(reader); auto mo = read_mutation_from_flat_mutation_reader(reader, db::no_timeout).get0(); BOOST_REQUIRE(bool(mo)); return std::move(mo->partition()); @@ -440,6 +442,7 @@ SEASTAR_THREAD_TEST_CASE(test_large_collection_allocation) { mt->apply(make_mutation_with_collection(pk, std::move(cmd2))); // this should trigger a merge of the two collections auto rd = mt->make_flat_reader(schema, tests::make_permit()); + auto close_rd = deferred_close(rd); auto res_mut_opt = read_mutation_from_flat_mutation_reader(rd, db::no_timeout).get0(); BOOST_REQUIRE(res_mut_opt); @@ -2951,6 +2954,7 @@ void run_compaction_data_stream_split_test(const schema& schema, gc_clock::time_ testlog.info("Original data: {}", create_stats(expected_mutations_summary)); auto reader = flat_mutation_reader_from_mutations(tests::make_permit(), std::move(mutations)); + auto close_reader = deferred_close(reader); auto get_max_purgeable = [] (const dht::decorated_key&) { return api::max_timestamp; }; diff --git a/test/boost/mutation_writer_test.cc b/test/boost/mutation_writer_test.cc index 4ae7f5c320..07999a5fb2 100644 --- a/test/boost/mutation_writer_test.cc +++ b/test/boost/mutation_writer_test.cc @@ -24,6 +24,7 @@ #include #include #include +#include #include "mutation_fragment.hh" #include "test/lib/mutation_source_test.hh" @@ -66,14 +67,18 @@ SEASTAR_TEST_CASE(test_multishard_writer) { shards_before[shard]++; } auto source_reader = partition_nr > 0 ? flat_mutation_reader_from_mutations(tests::make_permit(), muts) : make_empty_flat_reader(s, tests::make_permit()); + auto close_source_reader = deferred_close(source_reader); auto& sharder = s->get_sharder(); size_t partitions_received = distribute_reader_and_consume_on_shards(s, std::move(source_reader), [&sharder, &shards_after, error] (flat_mutation_reader reader) mutable { if (error) { + return reader.close().then([] { return make_exception_future<>(std::runtime_error("Failed to write")); + }); } - return repeat([&sharder, &shards_after, reader = std::move(reader), error] () mutable { + return with_closeable(std::move(reader), [&sharder, &shards_after, error] (flat_mutation_reader& reader) { + return repeat([&sharder, &shards_after, &reader, error] () mutable { return reader(db::no_timeout).then([&sharder, &shards_after, error] (mutation_fragment_opt mf_opt) mutable { if (mf_opt) { if (mf_opt->is_partition_start()) { @@ -86,6 +91,7 @@ SEASTAR_TEST_CASE(test_multishard_writer) { return make_ready_future(stop_iteration::yes); } }); + }); }); } ).get0(); @@ -123,6 +129,7 @@ SEASTAR_TEST_CASE(test_multishard_writer_producer_aborts) { auto muts = gen(partition_nr); schema_ptr s = gen.schema(); auto source_reader = partition_nr > 0 ? flat_mutation_reader_from_mutations(tests::make_permit(), muts) : make_empty_flat_reader(s, tests::make_permit()); + auto close_source_reader = deferred_close(source_reader); int mf_produced = 0; auto get_next_mutation_fragment = [&source_reader, &mf_produced] () mutable { if (mf_produced++ > 800) { @@ -137,9 +144,12 @@ SEASTAR_TEST_CASE(test_multishard_writer_producer_aborts) { make_generating_reader(s, tests::make_permit(), std::move(get_next_mutation_fragment)), [&sharder, error] (flat_mutation_reader reader) mutable { if (error) { + return reader.close().then([] { return make_exception_future<>(std::runtime_error("Failed to write")); + }); } - return repeat([&sharder, reader = std::move(reader), error] () mutable { + return with_closeable(std::move(reader), [&sharder, error] (flat_mutation_reader& reader) { + return repeat([&sharder, &reader, error] () mutable { return reader(db::no_timeout).then([&sharder, error] (mutation_fragment_opt mf_opt) mutable { if (mf_opt) { if (mf_opt->is_partition_start()) { @@ -151,6 +161,7 @@ SEASTAR_TEST_CASE(test_multishard_writer_producer_aborts) { return make_ready_future(stop_iteration::yes); } }); + }); }); } ).get0(); @@ -334,7 +345,7 @@ SEASTAR_THREAD_TEST_CASE(test_timestamp_based_splitting_mutation_writer) { std::unordered_map> buckets; auto consumer = [&] (flat_mutation_reader bucket_reader) { - return do_with(std::move(bucket_reader), [&] (flat_mutation_reader& rd) { + return with_closeable(std::move(bucket_reader), [&] (flat_mutation_reader& rd) { return rd.consume(test_bucket_writer(random_schema.schema(), classify_fn, buckets), db::no_timeout); }); }; @@ -347,6 +358,7 @@ SEASTAR_THREAD_TEST_CASE(test_timestamp_based_splitting_mutation_writer) { boost::adaptors::transformed([] (std::vector muts) { return flat_mutation_reader_from_mutations(tests::make_permit(), std::move(muts)); })); auto reader = make_combined_reader(random_schema.schema(), tests::make_permit(), std::move(bucket_readers), streamed_mutation::forwarding::no, mutation_reader::forwarding::no); + auto close_reader = deferred_close(reader); const auto now = gc_clock::now(); for (auto& m : muts) { @@ -364,7 +376,6 @@ SEASTAR_THREAD_TEST_CASE(test_timestamp_based_splitting_mutation_writer) { testlog.debug("Comparing mutation #{}", i); assert_that(combined_mutations[i]).is_equal_to(muts[i]); } - } SEASTAR_THREAD_TEST_CASE(test_timestamp_based_splitting_mutation_writer_abort) { @@ -402,7 +413,7 @@ SEASTAR_THREAD_TEST_CASE(test_timestamp_based_splitting_mutation_writer_abort) { int throw_after = tests::random::get_int(muts.size() - 1); testlog.info("Will raise exception after {}/{} mutations", throw_after, muts.size()); auto consumer = [&] (flat_mutation_reader bucket_reader) { - return do_with(std::move(bucket_reader), [&] (flat_mutation_reader& rd) { + return with_closeable(std::move(bucket_reader), [&] (flat_mutation_reader& rd) { return rd.consume(test_bucket_writer(random_schema.schema(), classify_fn, buckets, throw_after), db::no_timeout); }); }; diff --git a/test/boost/querier_cache_test.cc b/test/boost/querier_cache_test.cc index ac6753d45a..f849a6c34b 100644 --- a/test/boost/querier_cache_test.cc +++ b/test/boost/querier_cache_test.cc @@ -29,6 +29,7 @@ #include #include #include +#include using namespace std::chrono_literals; @@ -175,6 +176,11 @@ public: : test_querier_cache(test_querier_cache::make_value, entry_ttl) { } + ~test_querier_cache() { + _cache.stop().get(); + _sem.stop().get(); + } + const simple_schema& get_simple_schema() const { return _s; } @@ -310,7 +316,10 @@ public: const dht::partition_range& lookup_range, const query::partition_slice& lookup_slice) { - _cache.lookup_data_querier(make_cache_key(lookup_key), lookup_schema, lookup_range, lookup_slice, nullptr); + auto querier_opt = _cache.lookup_data_querier(make_cache_key(lookup_key), lookup_schema, lookup_range, lookup_slice, nullptr); + if (querier_opt) { + querier_opt->close().get(); + } BOOST_REQUIRE_EQUAL(_cache.get_stats().lookups, ++_expected_stats.lookups); return *this; } @@ -320,13 +329,16 @@ public: const dht::partition_range& lookup_range, const query::partition_slice& lookup_slice) { - _cache.lookup_mutation_querier(make_cache_key(lookup_key), lookup_schema, lookup_range, lookup_slice, nullptr); + auto querier_opt = _cache.lookup_mutation_querier(make_cache_key(lookup_key), lookup_schema, lookup_range, lookup_slice, nullptr); + if (querier_opt) { + querier_opt->close().get(); + } BOOST_REQUIRE_EQUAL(_cache.get_stats().lookups, ++_expected_stats.lookups); return *this; } test_querier_cache& evict_all_for_table() { - _cache.evict_all_for_table(get_schema()->id()); + _cache.evict_all_for_table(get_schema()->id()).get(); return *this; } @@ -767,7 +779,9 @@ SEASTAR_THREAD_TEST_CASE(test_immediate_evict_on_insert) { SEASTAR_THREAD_TEST_CASE(test_unique_inactive_read_handle) { reader_concurrency_semaphore sem1(reader_concurrency_semaphore::no_limits{}, "sem1"); + auto stop_sem1 = deferred_stop(sem1); reader_concurrency_semaphore sem2(reader_concurrency_semaphore::no_limits{}, ""); // to see the message for an unnamed semaphore + auto stop_sem2 = deferred_stop(sem2); auto schema = schema_builder("ks", "cf") .with_column("pk", int32_type, column_kind::partition_key) diff --git a/test/boost/row_cache_test.cc b/test/boost/row_cache_test.cc index 3f0c56fe03..d1b0fbd61f 100644 --- a/test/boost/row_cache_test.cc +++ b/test/boost/row_cache_test.cc @@ -25,6 +25,7 @@ #include #include #include +#include #include #include "test/lib/mutation_assertions.hh" @@ -127,6 +128,7 @@ snapshot_source snapshot_source_from_snapshot(mutation_source src) { bool has_key(row_cache& cache, const dht::decorated_key& key) { auto range = dht::partition_range::make_singular(key); auto reader = cache.make_reader(cache.schema(), tests::make_permit(), range); + auto close_reader = deferred_close(reader); auto mo = read_mutation_from_flat_mutation_reader(reader, db::no_timeout).get0(); if (!bool(mo)) { return false; @@ -182,23 +184,23 @@ SEASTAR_TEST_CASE(test_cache_works_after_clearing) { }); } -class partition_counting_reader final : public delegating_reader { +class partition_counting_reader final : public delegating_reader { int& _counter; bool _count_fill_buffer = true; public: partition_counting_reader(flat_mutation_reader mr, int& counter) - : delegating_reader(std::move(mr)), _counter(counter) { } + : delegating_reader(std::move(mr)), _counter(counter) { } virtual future<> fill_buffer(db::timeout_clock::time_point timeout) override { if (_count_fill_buffer) { ++_counter; _count_fill_buffer = false; } - return delegating_reader::fill_buffer(timeout); + return delegating_reader::fill_buffer(timeout); } virtual future<> next_partition() override { _count_fill_buffer = false; ++_counter; - return delegating_reader::next_partition(); + return delegating_reader::next_partition(); } }; @@ -867,6 +869,7 @@ SEASTAR_TEST_CASE(test_eviction) { for (auto&& key : keys) { auto pr = dht::partition_range::make_singular(key); auto rd = cache.make_reader(s, tests::make_permit(), pr); + auto close_rd = deferred_close(rd); rd.set_max_buffer_size(1); rd.fill_buffer(db::no_timeout).get(); } @@ -904,7 +907,7 @@ SEASTAR_TEST_CASE(test_eviction_from_invalidated) { std::shuffle(keys.begin(), keys.end(), random); for (auto&& key : keys) { - cache.make_reader(s, tests::make_permit(), dht::partition_range::make_singular(key)); + cache.make_reader(s, tests::make_permit(), dht::partition_range::make_singular(key)).close().get(); } cache.invalidate(row_cache::external_updater([] {})).get(); @@ -948,6 +951,7 @@ SEASTAR_TEST_CASE(test_eviction_after_schema_change) { { auto pr = dht::partition_range::make_singular(m.decorated_key()); auto rd = cache.make_reader(s2, tests::make_permit(), pr); + auto close_rd = deferred_close(rd); rd.set_max_buffer_size(1); rd.fill_buffer(db::no_timeout).get(); } @@ -964,6 +968,7 @@ SEASTAR_TEST_CASE(test_eviction_after_schema_change) { void test_sliced_read_row_presence(flat_mutation_reader reader, schema_ptr s, std::deque expected) { + auto close_reader = deferred_close(reader); clustering_key::equality ck_eq(*s); auto mfopt = reader(db::no_timeout).get0(); @@ -1184,6 +1189,7 @@ SEASTAR_TEST_CASE(test_update_failure) { auto has_only = [&] (const partitions_type& partitions) { auto reader = cache.make_reader(s, tests::make_permit(), query::full_partition_range); + auto close_reader = deferred_close(reader); for (int i = 0; i < partition_count; i++) { auto mopt = read_mutation_from_flat_mutation_reader(reader, db::no_timeout).get0(); if (!mopt) { @@ -1248,15 +1254,15 @@ private: mutation_source _underlying; ::throttle& _throttle; private: - class reader : public delegating_reader { + class reader : public delegating_reader { throttle& _throttle; public: reader(throttle& t, flat_mutation_reader r) - : delegating_reader(std::move(r)) + : delegating_reader(std::move(r)) , _throttle(t) {} virtual future<> fill_buffer(db::timeout_clock::time_point timeout) override { - return delegating_reader::fill_buffer(timeout).finally([this] () { + return delegating_reader::fill_buffer(timeout).finally([this] () { return _throttle.enter(); }); } @@ -1585,6 +1591,11 @@ SEASTAR_TEST_CASE(test_mvcc) { auto m12 = m1 + m2; flat_mutation_reader_opt mt1_reader_opt; + auto close_mt1_reader = defer([&mt1_reader_opt] { + if (mt1_reader_opt) { + mt1_reader_opt->close().get(); + } + }); if (with_active_memtable_reader) { mt1_reader_opt = mt1->make_flat_reader(s, tests::make_permit()); mt1_reader_opt->set_max_buffer_size(1); @@ -2037,6 +2048,7 @@ static void populate_range(row_cache& cache, { auto slice = partition_slice_builder(*cache.schema()).with_range(r).build(); auto rd = cache.make_reader(cache.schema(), tests::make_permit(), pr, slice); + auto close_rd = deferred_close(rd); consume_all(rd); } @@ -2315,6 +2327,11 @@ SEASTAR_TEST_CASE(test_exception_safety_of_update_from_memtable) { populate_range(cache, population_range); auto rd1_v1 = assert_that(make_reader(population_range)); flat_mutation_reader_opt snap; + auto close_snap = defer([&snap] { + if (snap) { + snap->close().get(); + } + }); auto d = defer([&] { memory::scoped_critical_alloc_section dfg; @@ -2382,6 +2399,7 @@ SEASTAR_TEST_CASE(test_exception_safety_of_reads) { memory::with_allocation_failures([&] { auto rd = cache.make_reader(s, tests::make_permit(), query::full_partition_range, slice); + auto close_rd = deferred_close(rd); auto got_opt = read_mutation_from_flat_mutation_reader(rd, db::no_timeout).get0(); BOOST_REQUIRE(got_opt); BOOST_REQUIRE(!read_mutation_from_flat_mutation_reader(rd, db::no_timeout).get0()); @@ -2448,6 +2466,7 @@ SEASTAR_TEST_CASE(test_exception_safety_of_transitioning_from_underlying_read_to } auto rd = cache.make_reader(s.schema(), tests::make_permit(), pr, slice); + auto close_rd = deferred_close(rd); auto got_opt = read_mutation_from_flat_mutation_reader(rd, db::no_timeout).get0(); BOOST_REQUIRE(got_opt); auto mfopt = rd(db::no_timeout).get0(); @@ -2693,6 +2712,17 @@ SEASTAR_TEST_CASE(test_random_row_population) { std::unique_ptr slice; flat_mutation_reader reader; mutation result; + + read() = delete; + read(std::unique_ptr slice_, flat_mutation_reader reader_, mutation result_) noexcept + : slice(std::move(slice_)) + , reader(std::move(reader_)) + , result(std::move(result_)) + { } + read(read&& o) = default; + ~read() { + reader.close().get(); + } }; std::vector readers; @@ -2703,18 +2733,18 @@ SEASTAR_TEST_CASE(test_random_row_population) { } while (!readers.empty()) { - auto i = readers.begin(); - while (i != readers.end()) { + std::vector remaining_readers; + for (auto i = readers.begin(); i != readers.end(); i++) { auto mfo = i->reader(db::no_timeout).get0(); if (!mfo) { auto&& ranges = i->slice->row_ranges(*s.schema(), pk.key()); assert_that(i->result).is_equal_to(m1, ranges); - i = readers.erase(i); } else { i->result.apply(*mfo); - ++i; + remaining_readers.emplace_back(std::move(*i)); } } + readers = std::move(remaining_readers); } check_continuous(cache, pr, query::clustering_range::make({s.make_ckey(0)}, {s.make_ckey(9)})); @@ -2792,6 +2822,7 @@ SEASTAR_TEST_CASE(test_continuity_is_populated_when_read_overlaps_with_older_ver { auto rd1 = make_reader(); // to keep the old version around + auto close_rd1 = deferred_close(rd1); populate_range(cache, pr, query::clustering_range::make({s.make_ckey(2)}, {s.make_ckey(4)})); @@ -2832,6 +2863,7 @@ SEASTAR_TEST_CASE(test_continuity_is_populated_when_read_overlaps_with_older_ver populate_range(cache, pr, s.make_ckey_range(8, 8)); auto rd1 = make_reader(); // to keep the old version around + auto close_rd1 = deferred_close(rd1); apply(m3); @@ -2850,6 +2882,7 @@ SEASTAR_TEST_CASE(test_continuity_is_populated_when_read_overlaps_with_older_ver populate_range(cache, pr, query::clustering_range::make_singular(s.make_ckey(7))); auto rd1 = make_reader(); // to keep the old version around + auto close_rd1 = deferred_close(rd1); apply(m4); @@ -2927,6 +2960,7 @@ SEASTAR_TEST_CASE(test_continuity_population_with_multicolumn_clustering_key) { .with_range(query::clustering_range::make_singular(ck2)) .build(); auto rd1 = make_reader(&slice1); + auto close_rd1 = deferred_close(rd1); apply(m2); @@ -3027,6 +3061,7 @@ SEASTAR_TEST_CASE(test_concurrent_setting_of_continuity_on_read_upper_bound) { { auto rd1 = make_rd(); // to keep the old version around + auto close_rd1 = deferred_close(rd1); populate_range(cache, pr, s.make_ckey_range(0, 0)); populate_range(cache, pr, s.make_ckey_range(3, 3)); @@ -3093,6 +3128,7 @@ SEASTAR_TEST_CASE(test_tombstone_merging_of_overlapping_tombstones_in_many_versi populate_range(cache, pr, s.make_ckey_range(0, 3)); auto rd1 = make_reader(); + auto close_rd1 = deferred_close(rd1); apply(cache, underlying, m2); @@ -3150,6 +3186,7 @@ SEASTAR_TEST_CASE(test_concurrent_reads_and_eviction) { .build(); auto rd = make_reader(slice); + auto close_rd = deferred_close(rd); auto actual_opt = read_mutation_from_flat_mutation_reader(rd, db::no_timeout).get0(); BOOST_REQUIRE(actual_opt); auto actual = *actual_opt; @@ -3337,6 +3374,7 @@ SEASTAR_TEST_CASE(test_hash_is_cached) { { auto rd = cache.make_reader(s, tests::make_permit()); + auto close_rd = deferred_close(rd); rd(db::no_timeout).get0()->as_partition_start(); clustering_row row = std::move(*rd(db::no_timeout).get0()).as_clustering_row(); BOOST_REQUIRE(!row.cells().cell_hash_for(0)); @@ -3346,6 +3384,7 @@ SEASTAR_TEST_CASE(test_hash_is_cached) { auto slice = s->full_slice(); slice.options.set(); auto rd = cache.make_reader(s, tests::make_permit(), query::full_partition_range, slice); + auto close_rd = deferred_close(rd); rd(db::no_timeout).get0()->as_partition_start(); clustering_row row = std::move(*rd(db::no_timeout).get0()).as_clustering_row(); BOOST_REQUIRE(row.cells().cell_hash_for(0)); @@ -3353,6 +3392,7 @@ SEASTAR_TEST_CASE(test_hash_is_cached) { { auto rd = cache.make_reader(s, tests::make_permit()); + auto close_rd = deferred_close(rd); rd(db::no_timeout).get0()->as_partition_start(); clustering_row row = std::move(*rd(db::no_timeout).get0()).as_clustering_row(); BOOST_REQUIRE(row.cells().cell_hash_for(0)); @@ -3364,6 +3404,7 @@ SEASTAR_TEST_CASE(test_hash_is_cached) { { auto rd = cache.make_reader(s, tests::make_permit()); + auto close_rd = deferred_close(rd); rd(db::no_timeout).get0()->as_partition_start(); clustering_row row = std::move(*rd(db::no_timeout).get0()).as_clustering_row(); BOOST_REQUIRE(!row.cells().cell_hash_for(0)); @@ -3373,6 +3414,7 @@ SEASTAR_TEST_CASE(test_hash_is_cached) { auto slice = s->full_slice(); slice.options.set(); auto rd = cache.make_reader(s, tests::make_permit(), query::full_partition_range, slice); + auto close_rd = deferred_close(rd); rd(db::no_timeout).get0()->as_partition_start(); clustering_row row = std::move(*rd(db::no_timeout).get0()).as_clustering_row(); BOOST_REQUIRE(row.cells().cell_hash_for(0)); @@ -3380,6 +3422,7 @@ SEASTAR_TEST_CASE(test_hash_is_cached) { { auto rd = cache.make_reader(s, tests::make_permit()); + auto close_rd = deferred_close(rd); rd(db::no_timeout).get0()->as_partition_start(); clustering_row row = std::move(*rd(db::no_timeout).get0()).as_clustering_row(); BOOST_REQUIRE(row.cells().cell_hash_for(0)); @@ -3598,6 +3641,7 @@ SEASTAR_TEST_CASE(test_reading_progress_with_small_buffer_and_invalidation) { populate_range(cache, pkr, s.make_ckey_range(3, 7)); auto rd3 = cache.make_reader(s.schema(), tests::make_permit(), pkr); + auto close_rd3 = deferred_close(rd3); rd3.set_max_buffer_size(1); while (!rd3.is_end_of_stream()) { diff --git a/test/boost/sstable_3_x_test.cc b/test/boost/sstable_3_x_test.cc index aa940068ec..bf694f26eb 100644 --- a/test/boost/sstable_3_x_test.cc +++ b/test/boost/sstable_3_x_test.cc @@ -29,6 +29,7 @@ #include #include #include +#include #include "sstables/sstables.hh" #include "sstables/compaction_manager.hh" @@ -3191,8 +3192,9 @@ SEASTAR_THREAD_TEST_CASE(compact_deleted_row) { * } * ] */ - auto reader = compacted_sstable_reader(env, s, table_name, {1, 2}); - mutation_opt m = read_mutation_from_flat_mutation_reader(reader, db::no_timeout).get0(); + mutation_opt m = with_closeable(compacted_sstable_reader(env, s, table_name, {1, 2}), [&] (flat_mutation_reader& reader) { + return read_mutation_from_flat_mutation_reader(reader, db::no_timeout); + }).get0(); BOOST_REQUIRE(m); BOOST_REQUIRE(m->key().equal(*s, partition_key::from_singular(*s, data_value(sstring("key"))))); BOOST_REQUIRE(!m->partition().partition_tombstone()); @@ -3262,8 +3264,9 @@ SEASTAR_THREAD_TEST_CASE(compact_deleted_cell) { *] * */ - auto reader = compacted_sstable_reader(env, s, table_name, {1, 2}); - mutation_opt m = read_mutation_from_flat_mutation_reader(reader, db::no_timeout).get0(); + mutation_opt m = with_closeable(compacted_sstable_reader(env, s, table_name, {1, 2}), [&] (flat_mutation_reader& reader) { + return read_mutation_from_flat_mutation_reader(reader, db::no_timeout); + }).get0(); BOOST_REQUIRE(m); BOOST_REQUIRE(m->key().equal(*s, partition_key::from_singular(*s, data_value(sstring("key"))))); BOOST_REQUIRE(!m->partition().partition_tombstone()); diff --git a/test/boost/sstable_datafile_test.cc b/test/boost/sstable_datafile_test.cc index 4f0134f865..24ba6adbed 100644 --- a/test/boost/sstable_datafile_test.cc +++ b/test/boost/sstable_datafile_test.cc @@ -23,6 +23,8 @@ #include #include #include +#include + #include "sstables/sstables.hh" #include "sstables/key.hh" #include "sstables/compress.hh" @@ -860,6 +862,8 @@ SEASTAR_TEST_CASE(datafile_generation_11) { // The clustered set auto m = verifier(mutation); verify_set(m); + }).finally([rd] { + return rd->close().finally([rd] {}); }); }).then([sstp, s, verifier] { return do_with(dht::partition_range::make_singular(make_dkey(s, "key2")), [sstp, s, verifier] (auto& pr) { @@ -869,6 +873,8 @@ SEASTAR_TEST_CASE(datafile_generation_11) { BOOST_REQUIRE(!m.tomb); BOOST_REQUIRE(m.cells.size() == 1); BOOST_REQUIRE(m.cells[0].first == to_bytes("4")); + }).finally([rd] { + return rd->close().finally([rd] {}); }); }); }); @@ -903,6 +909,8 @@ SEASTAR_TEST_CASE(datafile_generation_12) { for (auto& rt: mp.row_tombstones()) { BOOST_REQUIRE(rt.tomb == tomb); } + }).finally([rd] { + return rd->close().finally([rd] {}); }); }); }); @@ -939,6 +947,8 @@ static future<> sstable_compression_test(compressor_ptr c, unsigned generation) for (auto& rt: mp.row_tombstones()) { BOOST_REQUIRE(rt.tomb == tomb); } + }).finally([rd] { + return rd->close().finally([rd] {}); }); }); }); @@ -1196,6 +1206,8 @@ SEASTAR_TEST_CASE(compact) { return read_mutation_from_flat_mutation_reader(*reader, db::no_timeout); }).then([reader] (mutation_opt m) { BOOST_REQUIRE(!m); + }).finally([reader] { + return reader->close(); }); }); }); @@ -1332,7 +1344,7 @@ static future<> check_compacted_sstables(test_env& env, sstring tmpdir_path, uns auto reader = sstable_reader(sst, s); // reader holds sst and s alive. auto keys = make_lw_shared>(); - return do_with(std::move(reader), [generations, s, keys] (flat_mutation_reader& reader) { + return with_closeable(std::move(reader), [generations, s, keys] (flat_mutation_reader& reader) { return do_for_each(*generations, [&reader, keys] (unsigned long generation) mutable { return read_mutation_from_flat_mutation_reader(reader, db::no_timeout).then([generation, keys] (mutation_opt m) { BOOST_REQUIRE(m); @@ -1423,6 +1435,8 @@ SEASTAR_TEST_CASE(datafile_generation_37) { auto& row = mp.clustered_row(*s, clustering); match_live_cell(row.cells(), *s, "cl2", data_value(to_bytes("cl2"))); return make_ready_future<>(); + }).finally([rd] { + return rd->close().finally([rd] {}); }); }); }); @@ -1457,6 +1471,8 @@ SEASTAR_TEST_CASE(datafile_generation_38) { auto& row = mp.clustered_row(*s, clustering); match_live_cell(row.cells(), *s, "cl3", data_value(to_bytes("cl3"))); return make_ready_future<>(); + }).finally([rd] { + return rd->close().finally([rd] {}); }); }); }); @@ -1492,6 +1508,8 @@ SEASTAR_TEST_CASE(datafile_generation_39) { match_live_cell(row.cells(), *s, "cl1", data_value(data_value(to_bytes("cl1")))); match_live_cell(row.cells(), *s, "cl2", data_value(data_value(to_bytes("cl2")))); return make_ready_future<>(); + }).finally([rd] { + return rd->close().finally([rd] {}); }); }); }); @@ -1587,6 +1605,8 @@ SEASTAR_TEST_CASE(datafile_generation_41) { BOOST_REQUIRE(mp.clustered_rows().calculate_size() == 1); auto& c_row = *(mp.clustered_rows().begin()); BOOST_REQUIRE(c_row.row().deleted_at().tomb() == tomb); + }).finally([rd] { + return rd->close().finally([rd] {}); }); }); }); @@ -1647,7 +1667,9 @@ SEASTAR_TEST_CASE(datafile_generation_47) { } return make_ready_future(stop_iteration::no); }); - }).then([sstp, reader, s] {}); + }).finally([sstp, reader, s] { + return reader->close(); + }); }); }).then([sst, mt] {}); }); @@ -2277,6 +2299,8 @@ SEASTAR_TEST_CASE(tombstone_purge_test) { return (*reader)(db::no_timeout); }).then([reader, s] (mutation_fragment_opt m) { BOOST_REQUIRE(!m); + }).finally([reader] { + return reader->close(); }).get(); }; @@ -2454,6 +2478,8 @@ SEASTAR_TEST_CASE(check_multi_schema) { return (*reader)(db::no_timeout); }).then([reader, s] (mutation_fragment_opt m) { BOOST_REQUIRE(!m); + }).finally([reader] { + return reader->close(); }); }); return make_ready_future<>(); @@ -2510,6 +2536,8 @@ SEASTAR_TEST_CASE(sstable_rewrite) { return (*reader)(db::no_timeout); }).then([reader] (mutation_fragment_opt m) { BOOST_REQUIRE(!m); + }).finally([reader] { + return reader->close(); }); }).then([cf] {}); }).then([sst, mt, s] {}); @@ -2520,6 +2548,7 @@ void test_sliced_read_row_presence(shared_sstable sst, schema_ptr s, const query std::vector>> expected) { auto reader = sst->as_mutation_source().make_reader(s, tests::make_permit(), query::full_partition_range, ps); + auto close_reader = deferred_close(reader); partition_key::equality pk_eq(*s); clustering_key::equality ck_eq(*s); @@ -2751,6 +2780,7 @@ SEASTAR_TEST_CASE(test_counter_read) { auto sst = env.make_sstable(s, get_test_dir("counter_test", s), 5, version, big); sst->load().get(); auto reader = sstable_reader(sst, s); + auto close_reader = deferred_close(reader); auto mfopt = reader(db::no_timeout).get0(); BOOST_REQUIRE(mfopt); @@ -4949,6 +4979,7 @@ SEASTAR_TEST_CASE(test_wrong_counter_shard_order) { auto sst = env.make_sstable(s, get_test_dir("wrong_counter_shard_order", s), 2, version, big); sst->load().get0(); auto reader = sstable_reader(sst, s); + auto close_reader = deferred_close(reader); auto verify_row = [&s] (mutation_fragment_opt mfopt, int64_t expected_value) { BOOST_REQUIRE(bool(mfopt)); @@ -6098,6 +6129,7 @@ SEASTAR_TEST_CASE(purged_tombstone_consumer_sstable_test) { ::mutation_reader::forwarding::no); auto r = std::move(reader); + auto close_r = deferred_close(r); r.consume_in_thread(std::move(cfc), db::no_timeout); return {std::move(non_purged), std::move(purged_only)}; @@ -6138,6 +6170,8 @@ SEASTAR_TEST_CASE(purged_tombstone_consumer_sstable_test) { return (*reader)(db::no_timeout); }).then([reader, s] (mutation_fragment_opt m) { BOOST_REQUIRE(!m); + }).finally([reader] { + return reader->close(); }).get(); }; @@ -6676,6 +6710,7 @@ SEASTAR_TEST_CASE(test_zero_estimated_partitions) { sst->load().get(); auto sst_mr = sst->as_mutation_source().make_reader(s, tests::make_permit(), query::full_partition_range, s->full_slice()); + auto close_mr = deferred_close(sst_mr); auto sst_mut = read_mutation_from_flat_mutation_reader(sst_mr, db::no_timeout).get0(); // The real test here is that we don't assert() in @@ -6841,6 +6876,7 @@ SEASTAR_TEST_CASE(test_missing_partition_end_fragment) { frags.push_back(mutation_fragment(*s, tests::make_permit(), partition_end())); auto mr = make_flat_mutation_reader_from_fragments(s, tests::make_permit(), std::move(frags)); + auto close_mr = deferred_close(mr); auto sst = env.make_sstable(s, tmpdir_path, 0, version, big); sstable_writer_config cfg = env.manager().configure_writer(); @@ -7183,6 +7219,7 @@ SEASTAR_TEST_CASE(single_key_reader_through_compound_set_test) { auto reader = compound.create_single_key_sstable_reader(&*cf, s, permit, eh, pr, s->full_slice(), default_priority_class(), tracing::trace_state_ptr(), ::streamed_mutation::forwarding::no, ::mutation_reader::forwarding::no); + auto close_reader = deferred_close(reader); auto mfopt = read_mutation_from_flat_mutation_reader(reader, db::no_timeout).get0(); BOOST_REQUIRE(mfopt); mfopt = read_mutation_from_flat_mutation_reader(reader, db::no_timeout).get0(); @@ -7247,6 +7284,7 @@ SEASTAR_TEST_CASE(test_twcs_single_key_reader_filtering) { &cf, s, permit, eh, pr, slice, default_priority_class(), tracing::trace_state_ptr(), ::streamed_mutation::forwarding::no, ::mutation_reader::forwarding::no); + auto close_reader = deferred_close(reader); auto checked_by_ck = cf_stats.sstables_checked_by_clustering_filter; auto surviving_after_ck = cf_stats.surviving_sstables_after_clustering_filter; diff --git a/test/boost/sstable_mutation_test.cc b/test/boost/sstable_mutation_test.cc index a3369dc9c1..e85ac74b42 100644 --- a/test/boost/sstable_mutation_test.cc +++ b/test/boost/sstable_mutation_test.cc @@ -24,6 +24,8 @@ #include #include #include +#include + #include "test/boost/sstable_test.hh" #include "sstables/key.hh" #include @@ -56,10 +58,11 @@ SEASTAR_THREAD_TEST_CASE(nonexistent_key) { env.reusable_sst(uncompressed_schema(), uncompressed_dir(), 1).then([] (auto sstp) { return do_with(dht::partition_range::make_singular(make_dkey(uncompressed_schema(), "invalid_key")), [sstp] (auto& pr) { auto s = uncompressed_schema(); - auto rd = make_lw_shared(sstp->make_reader(s, tests::make_permit(), pr, s->full_slice())); - return (*rd)(db::no_timeout).then([sstp, s, rd] (auto mutation) { + return with_closeable(sstp->make_reader(s, tests::make_permit(), pr, s->full_slice()), [sstp, s] (flat_mutation_reader& rd) { + return rd(db::no_timeout).then([sstp, s] (auto mutation) { BOOST_REQUIRE(!mutation); return make_ready_future<>(); + }); }); }); }).get(); @@ -70,8 +73,8 @@ future<> test_no_clustered(sstables::test_env& env, bytes&& key, std::unordered_ return env.reusable_sst(uncompressed_schema(), uncompressed_dir(), 1).then([k = std::move(key), map = std::move(map)] (auto sstp) mutable { return do_with(dht::partition_range::make_singular(make_dkey(uncompressed_schema(), std::move(k))), [sstp, map = std::move(map)] (auto& pr) { auto s = uncompressed_schema(); - auto rd = make_lw_shared(sstp->make_reader(s, tests::make_permit(), pr, s->full_slice())); - return read_mutation_from_flat_mutation_reader(*rd, db::no_timeout).then([sstp, s, rd, map = std::move(map)] (auto mutation) { + return with_closeable(sstp->make_reader(s, tests::make_permit(), pr, s->full_slice()), [sstp, s, map = std::move(map)] (flat_mutation_reader& rd) mutable { + return read_mutation_from_flat_mutation_reader(rd, db::no_timeout).then([sstp, s, map = std::move(map)] (auto mutation) { BOOST_REQUIRE(mutation); auto& mp = mutation->partition(); for (auto&& e : mp.range(*s, nonwrapping_range())) { @@ -84,6 +87,7 @@ future<> test_no_clustered(sstables::test_env& env, bytes&& key, std::unordered_ } } return make_ready_future<>(); + }); }); }); }); @@ -145,10 +149,11 @@ future generate_clustered(sstables::test_env& env, bytes&& key) { return env.reusable_sst(complex_schema(), "test/resource/sstables/complex", Generation).then([k = std::move(key)] (auto sstp) mutable { return do_with(dht::partition_range::make_singular(make_dkey(complex_schema(), std::move(k))), [sstp] (auto& pr) { auto s = complex_schema(); - auto rd = make_lw_shared(sstp->make_reader(s, tests::make_permit(), pr, s->full_slice())); - return read_mutation_from_flat_mutation_reader(*rd, db::no_timeout).then([sstp, s, rd] (auto mutation) { + return with_closeable(sstp->make_reader(s, tests::make_permit(), pr, s->full_slice()), [sstp, s] (flat_mutation_reader& rd) { + return read_mutation_from_flat_mutation_reader(rd, db::no_timeout).then([sstp, s] (auto mutation) { BOOST_REQUIRE(mutation); return std::move(*mutation); + }); }); }); }); @@ -345,20 +350,20 @@ future<> test_range_reads(sstables::test_env& env, const dht::token& min, const auto stop = make_lw_shared(false); return do_with(dht::partition_range::make(dht::ring_position::starting_at(min), dht::ring_position::ending_at(max)), [&, sstp, s] (auto& pr) { - auto mutations = make_lw_shared(sstp->make_reader(s, tests::make_permit(), pr, s->full_slice())); + return with_closeable(sstp->make_reader(s, tests::make_permit(), pr, s->full_slice()), [&, sstp] (flat_mutation_reader& mutations) { return do_until([stop] { return *stop; }, // Note: The data in the following lambda, including // "mutations", continues to live until after the last // iteration's future completes, so its lifetime is safe. - [sstp, mutations = std::move(mutations), &expected, expected_size, count, stop] () mutable { - return (*mutations)(db::no_timeout).then([&expected, expected_size, count, stop, mutations] (mutation_fragment_opt mfopt) mutable { + [sstp, &mutations, &expected, expected_size, count, stop] () mutable { + return mutations(db::no_timeout).then([&expected, expected_size, count, stop, &mutations] (mutation_fragment_opt mfopt) mutable { if (mfopt) { BOOST_REQUIRE(mfopt->is_partition_start()); BOOST_REQUIRE(*count < expected_size); BOOST_REQUIRE(std::vector({expected.back()}) == mfopt->as_partition_start().key().key().explode()); expected.pop_back(); (*count)++; - return mutations->next_partition(); + return mutations.next_partition(); } else { *stop = true; return make_ready_future<>(); @@ -367,6 +372,7 @@ future<> test_range_reads(sstables::test_env& env, const dht::token& min, const }).then([count, expected_size] { BOOST_REQUIRE(*count == expected_size); }); + }); }); }); } @@ -430,8 +436,9 @@ SEASTAR_TEST_CASE(test_sstable_can_write_and_read_range_tombstone) { sstables::sstable::format_types::big); write_memtable_to_sstable_for_test(*mt, sst).get(); sst->load().get(); - auto mr = sst->make_reader(s, tests::make_permit(), query::full_partition_range, s->full_slice()); - auto mut = read_mutation_from_flat_mutation_reader(mr, db::no_timeout).get0(); + auto mut = with_closeable(sst->make_reader(s, tests::make_permit(), query::full_partition_range, s->full_slice()), [] (flat_mutation_reader& mr) { + return read_mutation_from_flat_mutation_reader(mr, db::no_timeout); + }).get0(); BOOST_REQUIRE(bool(mut)); auto& rts = mut->partition().row_tombstones(); BOOST_REQUIRE(rts.size() == 1); @@ -451,8 +458,8 @@ SEASTAR_THREAD_TEST_CASE(compact_storage_sparse_read) { env.reusable_sst(compact_sparse_schema(), "test/resource/sstables/compact_sparse", 1).then([] (auto sstp) { return do_with(dht::partition_range::make_singular(make_dkey(compact_sparse_schema(), "first_row")), [sstp] (auto& pr) { auto s = compact_sparse_schema(); - auto rd = make_lw_shared(sstp->make_reader(s, tests::make_permit(), pr, s->full_slice())); - return read_mutation_from_flat_mutation_reader(*rd, db::no_timeout).then([sstp, s, rd] (auto mutation) { + return with_closeable(sstp->make_reader(s, tests::make_permit(), pr, s->full_slice()), [sstp, s] (flat_mutation_reader& rd) { + return read_mutation_from_flat_mutation_reader(rd, db::no_timeout).then([sstp, s] (auto mutation) { BOOST_REQUIRE(mutation); auto& mp = mutation->partition(); auto& row = mp.clustered_row(*s, clustering_key::make_empty()); @@ -460,6 +467,7 @@ SEASTAR_THREAD_TEST_CASE(compact_storage_sparse_read) { match_live_cell(row.cells(), *s, "cl2", data_value(to_bytes("cl2"))); return make_ready_future<>(); }); + }); }); }).get(); }).get(); @@ -470,8 +478,8 @@ SEASTAR_THREAD_TEST_CASE(compact_storage_simple_dense_read) { env.reusable_sst(compact_simple_dense_schema(), "test/resource/sstables/compact_simple_dense", 1).then([] (auto sstp) { return do_with(dht::partition_range::make_singular(make_dkey(compact_simple_dense_schema(), "first_row")), [sstp] (auto& pr) { auto s = compact_simple_dense_schema(); - auto rd = make_lw_shared(sstp->make_reader(s, tests::make_permit(), pr, s->full_slice())); - return read_mutation_from_flat_mutation_reader(*rd, db::no_timeout).then([sstp, s, rd] (auto mutation) { + return with_closeable(sstp->make_reader(s, tests::make_permit(), pr, s->full_slice()), [sstp, s] (flat_mutation_reader& rd) { + return read_mutation_from_flat_mutation_reader(rd, db::no_timeout).then([sstp, s] (auto mutation) { auto& mp = mutation->partition(); auto exploded = exploded_clustering_prefix({"cl1"}); @@ -480,6 +488,7 @@ SEASTAR_THREAD_TEST_CASE(compact_storage_simple_dense_read) { auto& row = mp.clustered_row(*s, clustering); match_live_cell(row.cells(), *s, "cl2", data_value(to_bytes("cl2"))); return make_ready_future<>(); + }); }); }); }).get(); @@ -491,8 +500,8 @@ SEASTAR_THREAD_TEST_CASE(compact_storage_dense_read) { env.reusable_sst(compact_dense_schema(), "test/resource/sstables/compact_dense", 1).then([] (auto sstp) { return do_with(dht::partition_range::make_singular(make_dkey(compact_dense_schema(), "first_row")), [sstp] (auto& pr) { auto s = compact_dense_schema(); - auto rd = make_lw_shared(sstp->make_reader(s, tests::make_permit(), pr, s->full_slice())); - return read_mutation_from_flat_mutation_reader(*rd, db::no_timeout).then([sstp, s, rd] (auto mutation) { + return with_closeable(sstp->make_reader(s, tests::make_permit(), pr, s->full_slice()), [sstp, s] (flat_mutation_reader& rd) { + return read_mutation_from_flat_mutation_reader(rd, db::no_timeout).then([sstp, s] (auto mutation) { auto& mp = mutation->partition(); auto exploded = exploded_clustering_prefix({"cl1", "cl2"}); @@ -501,6 +510,7 @@ SEASTAR_THREAD_TEST_CASE(compact_storage_dense_read) { auto& row = mp.clustered_row(*s, clustering); match_live_cell(row.cells(), *s, "cl3", data_value(to_bytes("cl3"))); return make_ready_future<>(); + }); }); }); }).get(); @@ -515,9 +525,9 @@ SEASTAR_THREAD_TEST_CASE(broken_ranges_collection) { test_env::do_with_async([] (test_env& env) { env.reusable_sst(peers_schema(), "test/resource/sstables/broken_ranges", 2).then([] (auto sstp) { auto s = peers_schema(); - auto reader = make_lw_shared(sstp->as_mutation_source().make_reader(s, tests::make_permit(), query::full_partition_range)); - return repeat([s, reader] { - return read_mutation_from_flat_mutation_reader(*reader, db::no_timeout).then([s, reader] (mutation_opt mut) { + return with_closeable(sstp->as_mutation_source().make_reader(s, tests::make_permit(), query::full_partition_range), [s] (flat_mutation_reader& reader) { + return repeat([s, &reader] { + return read_mutation_from_flat_mutation_reader(reader, db::no_timeout).then([s] (mutation_opt mut) { auto key_equal = [s, &mut] (sstring ip) { return mut->key().equal(*s, partition_key::from_deeply_exploded(*s, { net::inet_address(ip) })); }; @@ -538,6 +548,7 @@ SEASTAR_THREAD_TEST_CASE(broken_ranges_collection) { } return stop_iteration::no; }); + }); }); }).get(); }).get(); @@ -583,7 +594,7 @@ SEASTAR_THREAD_TEST_CASE(tombstone_in_tombstone) { test_env::do_with_async([] (test_env& env) { ka_sst(env, tombstone_overlap_schema(), "test/resource/sstables/tombstone_overlap", 1).then([] (auto sstp) { auto s = tombstone_overlap_schema(); - return do_with(sstp->make_reader(s, tests::make_permit(), query::full_partition_range, s->full_slice()), [sstp, s] (auto& reader) { + return with_closeable(sstp->make_reader(s, tests::make_permit(), query::full_partition_range, s->full_slice()), [sstp, s] (auto& reader) { return repeat([sstp, s, &reader] { return read_mutation_from_flat_mutation_reader(reader, db::no_timeout).then([s] (mutation_opt mut) { if (!mut) { @@ -648,7 +659,7 @@ SEASTAR_THREAD_TEST_CASE(range_tombstone_reading) { test_env::do_with_async([] (test_env& env) { ka_sst(env, tombstone_overlap_schema(), "test/resource/sstables/tombstone_overlap", 4).then([] (auto sstp) { auto s = tombstone_overlap_schema(); - return do_with(sstp->make_reader(s, tests::make_permit(), query::full_partition_range, s->full_slice()), [sstp, s] (auto& reader) { + return with_closeable(sstp->make_reader(s, tests::make_permit(), query::full_partition_range, s->full_slice()), [sstp, s] (auto& reader) { return repeat([sstp, s, &reader] { return read_mutation_from_flat_mutation_reader(reader, db::no_timeout).then([s] (mutation_opt mut) { if (!mut) { @@ -727,7 +738,7 @@ SEASTAR_THREAD_TEST_CASE(tombstone_in_tombstone2) { test_env::do_with_async([] (test_env& env) { ka_sst(env, tombstone_overlap_schema2(), "test/resource/sstables/tombstone_overlap", 3).then([] (auto sstp) { auto s = tombstone_overlap_schema2(); - return do_with(sstp->make_reader(s, tests::make_permit(), query::full_partition_range, s->full_slice()), [sstp, s] (auto& reader) { + return with_closeable(sstp->make_reader(s, tests::make_permit(), query::full_partition_range, s->full_slice()), [sstp, s] (auto& reader) { return repeat([sstp, s, &reader] { return read_mutation_from_flat_mutation_reader(reader, db::no_timeout).then([s] (mutation_opt mut) { if (!mut) { @@ -869,8 +880,9 @@ SEASTAR_TEST_CASE(test_non_compound_table_row_is_not_marked_as_static) { sstables::sstable::format_types::big); write_memtable_to_sstable_for_test(*mt, sst).get(); sst->load().get(); - auto mr = sst->make_reader(s, tests::make_permit(), query::full_partition_range, s->full_slice()); - auto mut = read_mutation_from_flat_mutation_reader(mr, db::no_timeout).get0(); + auto mut = with_closeable(sst->make_reader(s, tests::make_permit(), query::full_partition_range, s->full_slice()), [] (flat_mutation_reader& mr) { + return read_mutation_from_flat_mutation_reader(mr, db::no_timeout); + }).get0(); BOOST_REQUIRE(bool(mut)); } }).get(); @@ -909,6 +921,7 @@ SEASTAR_TEST_CASE(test_has_partition_key) { auto hk = sstables::sstable::make_hashed_key(*s, dk.key()); sst->load().get(); auto mr = sst->make_reader(s, tests::make_permit(), query::full_partition_range, s->full_slice()); + auto close_mr = deferred_close(mr); auto res = sst->has_partition_key(hk, dk).get0(); BOOST_REQUIRE(bool(res)); @@ -1461,13 +1474,15 @@ SEASTAR_THREAD_TEST_CASE(test_large_index_pages_do_not_cause_large_allocations) auto pr = dht::partition_range::make_singular(small_keys[0]); - auto mt_reader = mt->make_flat_reader(s, tests::make_permit(), pr); - mutation expected = *read_mutation_from_flat_mutation_reader(mt_reader, db::no_timeout).get0(); + mutation expected = *with_closeable(mt->make_flat_reader(s, tests::make_permit(), pr), [] (flat_mutation_reader& mt_reader) { + return read_mutation_from_flat_mutation_reader(mt_reader, db::no_timeout); + }).get0(); auto t0 = std::chrono::steady_clock::now(); auto large_allocs_before = memory::stats().large_allocations(); - auto sst_reader = sst->as_mutation_source().make_reader(s, tests::make_permit(), pr); - mutation actual = *read_mutation_from_flat_mutation_reader(sst_reader, db::no_timeout).get0(); + mutation actual = *with_closeable(sst->as_mutation_source().make_reader(s, tests::make_permit(), pr), [] (flat_mutation_reader& sst_reader) { + return read_mutation_from_flat_mutation_reader(sst_reader, db::no_timeout); + }).get0(); auto large_allocs_after = memory::stats().large_allocations(); auto duration = std::chrono::steady_clock::now() - t0; diff --git a/test/boost/sstable_test.cc b/test/boost/sstable_test.cc index abc6021ff8..431771b6da 100644 --- a/test/boost/sstable_test.cc +++ b/test/boost/sstable_test.cc @@ -25,6 +25,8 @@ #include #include #include +#include + #include "sstables/sstables.hh" #include "sstables/compaction_manager.hh" #include "sstables/key.hh" @@ -842,9 +844,10 @@ SEASTAR_TEST_CASE(wrong_range) { return test_using_reusable_sst(uncompressed_schema(), "test/resource/sstables/wrongrange", 114, [] (auto sstp) { return do_with(dht::partition_range::make_singular(make_dkey(uncompressed_schema(), "todata")), [sstp] (auto& range) { auto s = columns_schema(); - auto rd = make_lw_shared(sstp->make_reader(s, tests::make_permit(), range, s->full_slice())); - return read_mutation_from_flat_mutation_reader(*rd, db::no_timeout).then([sstp, s, rd] (auto mutation) { + return with_closeable(sstp->make_reader(s, tests::make_permit(), range, s->full_slice()), [sstp, s] (flat_mutation_reader& rd) { + return read_mutation_from_flat_mutation_reader(rd, db::no_timeout).then([sstp, s] (auto mutation) { return make_ready_future<>(); + }); }); }); }); @@ -979,6 +982,7 @@ static future count_rows(sstable_ptr sstp, schema_ptr s, sstring key, sstri auto ps = make_partition_slice(*s, ck1, ck2); auto pr = dht::partition_range::make_singular(make_dkey(s, key.c_str())); auto rd = sstp->make_reader(s, tests::make_permit(), pr, ps); + auto close_rd = deferred_close(rd); auto mfopt = rd(db::no_timeout).get0(); if (!mfopt) { return 0; @@ -1000,6 +1004,7 @@ static future count_rows(sstable_ptr sstp, schema_ptr s, sstring key) { return seastar::async([sstp, s, key] () mutable { auto pr = dht::partition_range::make_singular(make_dkey(s, key.c_str())); auto rd = sstp->make_reader(s, tests::make_permit(), pr, s->full_slice()); + auto close_rd = deferred_close(rd); auto mfopt = rd(db::no_timeout).get0(); if (!mfopt) { return 0; @@ -1022,6 +1027,7 @@ static future count_rows(sstable_ptr sstp, schema_ptr s, sstring ck1, sstri return seastar::async([sstp, s, ck1, ck2] () mutable { auto ps = make_partition_slice(*s, ck1, ck2); auto reader = sstp->make_reader(s, tests::make_permit(), query::full_partition_range, ps); + auto close_reader = deferred_close(reader); int nrows = 0; auto mfopt = reader(db::no_timeout).get0(); while (mfopt) { diff --git a/test/boost/view_build_test.cc b/test/boost/view_build_test.cc index bc272977fe..cee5f44101 100644 --- a/test/boost/view_build_test.cc +++ b/test/boost/view_build_test.cc @@ -29,6 +29,8 @@ #include #include +#include + #include "test/lib/cql_test_env.hh" #include "test/lib/cql_assertions.hh" #include "test/lib/sstable_utils.hh" @@ -795,6 +797,7 @@ SEASTAR_THREAD_TEST_CASE(test_view_update_generator_buffering) { }; reader_concurrency_semaphore sem(1, new_reader_base_cost, get_name()); + auto stop_sem = deferred_stop(sem); auto schema = schema_builder("ks", "cf") .with_column("pk", int32_type, column_kind::partition_key) @@ -848,7 +851,7 @@ SEASTAR_THREAD_TEST_CASE(test_view_update_generator_buffering) { mutation_reader::forwarding fwd_mr) { return make_restricted_flat_reader(mt->as_data_source(), s, std::move(permit), pr, ps, pc, std::move(ts), fwd_ms, fwd_mr); }); - auto [staging_reader, staging_reader_handle] = make_manually_paused_evictable_reader( + auto p = make_manually_paused_evictable_reader( std::move(ms), schema, permit, @@ -857,6 +860,9 @@ SEASTAR_THREAD_TEST_CASE(test_view_update_generator_buffering) { service::get_local_streaming_priority(), nullptr, ::mutation_reader::forwarding::no); + auto& staging_reader = std::get<0>(p); + auto& staging_reader_handle = std::get<1>(p); + auto close_staging_reader = deferred_close(staging_reader); std::vector collected_muts; bool ok = true; diff --git a/test/lib/flat_mutation_reader_assertions.hh b/test/lib/flat_mutation_reader_assertions.hh index 4e22acf7c8..070818a224 100644 --- a/test/lib/flat_mutation_reader_assertions.hh +++ b/test/lib/flat_mutation_reader_assertions.hh @@ -52,6 +52,23 @@ public: , _tombstones(*_reader.schema()) { } + ~flat_reader_assertions() { + _reader.close().get(); + } + + flat_reader_assertions(const flat_reader_assertions&) = delete; + flat_reader_assertions(flat_reader_assertions&&) = default; + + flat_reader_assertions& operator=(flat_reader_assertions&& o) { + if (this != &o) { + _reader.close().get(); + _reader = std::move(o._reader); + _pr = std::move(o._pr); + _tombstones = std::move(o._tombstones); + } + return *this; + } + flat_reader_assertions& produces_partition_start(const dht::decorated_key& dk, std::optional tomb = std::nullopt) { testlog.trace("Expecting partition start with key {}", dk); diff --git a/test/lib/memtable_snapshot_source.hh b/test/lib/memtable_snapshot_source.hh index 55e68175c6..82cc466bf2 100644 --- a/test/lib/memtable_snapshot_source.hh +++ b/test/lib/memtable_snapshot_source.hh @@ -28,6 +28,7 @@ #include #include #include +#include // in-memory snapshottable mutation source. // Must be destroyed in a seastar thread. @@ -77,6 +78,7 @@ private: } _memtables.push_back(new_memtable()); auto&& rd = make_combined_reader(new_mt->schema(), tests::make_permit(), std::move(readers)); + auto close_rd = deferred_close(rd); consume_partitions(rd, [&] (mutation&& m) { new_mt->apply(std::move(m)); return stop_iteration::no; diff --git a/test/lib/mutation_source_test.cc b/test/lib/mutation_source_test.cc index d849bf4dbc..46f15edb22 100644 --- a/test/lib/mutation_source_test.cc +++ b/test/lib/mutation_source_test.cc @@ -41,6 +41,7 @@ #include "types/map.hh" #include "types/list.hh" #include "types/set.hh" +#include // partitions must be sorted by decorated key static void require_no_token_duplicates(const std::vector& partitions) { @@ -387,9 +388,11 @@ static void test_streamed_mutation_forwarding_is_consistent_with_slicing(populat flat_mutation_reader sliced_reader = ms.make_reader(m.schema(), tests::make_permit(), prange, slice_with_ranges); + auto close_sliced_reader = deferred_close(sliced_reader); flat_mutation_reader fwd_reader = ms.make_reader(m.schema(), tests::make_permit(), prange, full_slice, default_priority_class(), nullptr, streamed_mutation::forwarding::yes); + auto close_fwd_reader = deferred_close(fwd_reader); std::optional builder{}; struct consumer { @@ -1336,6 +1339,7 @@ void test_slicing_with_overlapping_range_tombstones(populate_fn_ex populate) { { auto slice = partition_slice_builder(*s).with_range(range).build(); auto rd = ds.make_reader(s, tests::make_permit(), query::full_partition_range, slice); + auto close_rd = deferred_close(rd); auto prange = position_range(range); mutation result(m1.schema(), m1.decorated_key()); @@ -1355,6 +1359,7 @@ void test_slicing_with_overlapping_range_tombstones(populate_fn_ex populate) { { auto rd = ds.make_reader(s, tests::make_permit(), query::full_partition_range, s->full_slice(), default_priority_class(), nullptr, streamed_mutation::forwarding::yes); + auto close_rd = deferred_close(rd); auto prange = position_range(range); mutation result(m1.schema(), m1.decorated_key()); @@ -2339,12 +2344,14 @@ static bool compare_readers(const schema& s, flat_mutation_reader& authority, fl } void compare_readers(const schema& s, flat_mutation_reader authority, flat_mutation_reader tested) { + auto close_authority = deferred_close(authority); auto assertions = assert_that(std::move(tested)); compare_readers(s, authority, assertions); } // Assumes that the readers return fragments from (at most) a single (and the same) partition. void compare_readers(const schema& s, flat_mutation_reader authority, flat_mutation_reader tested, const std::vector& fwd_ranges) { + auto close_authority = deferred_close(authority); auto assertions = assert_that(std::move(tested)); if (compare_readers(s, authority, assertions)) { for (auto& r: fwd_ranges) { diff --git a/test/lib/normalizing_reader.cc b/test/lib/normalizing_reader.cc index 9d1e41389b..3e9e3f5c33 100644 --- a/test/lib/normalizing_reader.cc +++ b/test/lib/normalizing_reader.cc @@ -93,6 +93,9 @@ future<> normalizing_reader::fast_forward_to( _end_of_stream = false; return _rd.fast_forward_to(std::move(pr), timeout); } +future<> normalizing_reader::close() noexcept { + return _rd.close(); +} flat_mutation_reader make_normalizing_reader(flat_mutation_reader rd) { return make_flat_mutation_reader(std::move(rd)); diff --git a/test/lib/normalizing_reader.hh b/test/lib/normalizing_reader.hh index f682e1e33d..7bc76cfef2 100644 --- a/test/lib/normalizing_reader.hh +++ b/test/lib/normalizing_reader.hh @@ -46,6 +46,8 @@ public: virtual future<> fast_forward_to(const dht::partition_range& pr, db::timeout_clock::time_point timeout) override; virtual future<> fast_forward_to(position_range pr, db::timeout_clock::time_point timeout) override; + + virtual future<> close() noexcept override; }; // Creates a mutation_reader wrapper which creates a new stream of mutations diff --git a/test/lib/reader_lifecycle_policy.hh b/test/lib/reader_lifecycle_policy.hh index ac2250cd29..6b30b9ff32 100644 --- a/test/lib/reader_lifecycle_policy.hh +++ b/test/lib/reader_lifecycle_policy.hh @@ -143,19 +143,22 @@ public: _contexts[shard]->op = _operation_gate.enter(); return _factory_function(std::move(schema), *_contexts[shard]->range, *_contexts[shard]->slice, pc, std::move(trace_state), fwd_mr); } - virtual void destroy_reader(shard_id shard, future reader) noexcept override { - // Move to the background, waited via _operation_gate - (void)reader.then([shard, this] (stopped_reader&& reader) { + virtual future<> destroy_reader(shard_id shard, future reader) noexcept override { + // waited via _operation_gate + return reader.then([shard, this] (stopped_reader&& reader) { return smp::submit_to(shard, [handle = std::move(reader.handle), ctx = &*_contexts[shard]] () mutable { - ctx->semaphore->unregister_inactive_read(std::move(*handle)); - ctx->semaphore->broken(std::make_exception_ptr(broken_semaphore{})); + auto reader_opt = ctx->semaphore->unregister_inactive_read(std::move(*handle)); + auto ret = reader_opt ? reader_opt->close() : make_ready_future<>(); + ctx->semaphore->broken(); if (ctx->wait_future) { + ret = ret.then([ctx = std::move(ctx)] () mutable { return ctx->wait_future->then_wrapped([ctx = std::move(ctx)] (future f) mutable { f.ignore_ready_future(); ctx->permit.reset(); // make sure it's destroyed before the semaphore }); + }); } - return make_ready_future<>(); + return std::move(ret); }); }).finally([zis = shared_from_this()] {}); } diff --git a/test/lib/simple_position_reader_queue.hh b/test/lib/simple_position_reader_queue.hh index f5e52806c5..7cc6c28f84 100644 --- a/test/lib/simple_position_reader_queue.hh +++ b/test/lib/simple_position_reader_queue.hh @@ -70,4 +70,13 @@ struct simple_position_reader_queue : public position_reader_queue { virtual bool empty(position_in_partition_view bound) const override { return _it == _rs.end() || _cmp(bound, _it->lower) < 0; } + + virtual future<> close() noexcept override { + return do_for_each(_it, _rs.end(), [this] (reader_bounds& rb) { + auto r = std::move(rb.r); + return r.close(); + }).finally([this] { + _it = _rs.end(); + }); + } }; diff --git a/test/manual/enormous_table_scan_test.cc b/test/manual/enormous_table_scan_test.cc index 5b061afc41..07bb930fdd 100644 --- a/test/manual/enormous_table_scan_test.cc +++ b/test/manual/enormous_table_scan_test.cc @@ -134,6 +134,10 @@ public: return make_ready_future<>(); } + virtual future<> close() noexcept override { + return make_ready_future<>(); + } + private: void get_next_partition() { if (_pps != partition_production_state::not_started) { diff --git a/test/manual/sstable_scan_footprint_test.cc b/test/manual/sstable_scan_footprint_test.cc index 9572e7acd5..f7e52f322d 100644 --- a/test/manual/sstable_scan_footprint_test.cc +++ b/test/manual/sstable_scan_footprint_test.cc @@ -126,6 +126,9 @@ public: } stats_collector(const stats_collector&) = delete; stats_collector(stats_collector&&) = delete; + ~stats_collector() { + _sem.stop().get(); + } collect_guard collect() { return collect_guard{*this, _params ? _params->period : std::chrono::milliseconds(0)}; } diff --git a/test/perf/perf_fast_forward.cc b/test/perf/perf_fast_forward.cc index ef0fb99d65..96b9b2a40e 100644 --- a/test/perf/perf_fast_forward.cc +++ b/test/perf/perf_fast_forward.cc @@ -40,6 +40,7 @@ #include #include #include +#include #include "sstables/compaction_manager.hh" #include "transport/messages/result_message.hh" #include "sstables/shared_index_lists.hh" @@ -774,6 +775,7 @@ static test_result scan_rows_with_stride(column_family& cf, int n_rows, int n_re default_priority_class(), nullptr, n_skip ? streamed_mutation::forwarding::yes : streamed_mutation::forwarding::no); + auto close_rd = deferred_close(rd); metrics_snapshot before; assert_partition_start(rd); @@ -814,6 +816,7 @@ static test_result scan_with_stride_partitions(column_family& cf, int n, int n_r auto pr = n_skip ? dht::partition_range::make_ending_with(dht::partition_range::bound(keys[0], false)) // covering none : query::full_partition_range; auto rd = cf.make_reader(cf.schema(), tests::make_permit(), pr, cf.schema()->full_slice()); + auto close_rd = deferred_close(rd); metrics_snapshot before; @@ -841,6 +844,7 @@ static test_result slice_rows(column_family& cf, int offset = 0, int n_read = 1) default_priority_class(), nullptr, streamed_mutation::forwarding::yes); + auto close_rd = deferred_close(rd); metrics_snapshot before; assert_partition_start(rd); @@ -866,6 +870,8 @@ static test_result slice_rows_by_ck(column_family& cf, int offset = 0, int n_rea .build(); auto pr = dht::partition_range::make_singular(make_pkey(*cf.schema(), 0)); auto rd = cf.make_reader(cf.schema(), tests::make_permit(), pr, slice); + auto close_rd = deferred_close(rd); + return test_reading_all(rd); } @@ -880,6 +886,7 @@ static test_result select_spread_rows(column_family& cf, int stride = 0, int n_r tests::make_permit(), query::full_partition_range, slice); + auto close_rd = deferred_close(rd); return test_reading_all(rd); } @@ -893,12 +900,15 @@ static test_result test_slicing_using_restrictions(column_family& cf, int_range auto pr = dht::partition_range::make_singular(make_pkey(*cf.schema(), 0)); auto rd = cf.make_reader(cf.schema(), tests::make_permit(), pr, slice, default_priority_class(), nullptr, streamed_mutation::forwarding::no, mutation_reader::forwarding::no); + auto close_rd = deferred_close(rd); + return test_reading_all(rd); } static test_result slice_rows_single_key(column_family& cf, int offset = 0, int n_read = 1) { auto pr = dht::partition_range::make_singular(make_pkey(*cf.schema(), 0)); auto rd = cf.make_reader(cf.schema(), tests::make_permit(), pr, cf.schema()->full_slice(), default_priority_class(), nullptr, streamed_mutation::forwarding::yes, mutation_reader::forwarding::no); + auto close_rd = deferred_close(rd); metrics_snapshot before; assert_partition_start(rd); @@ -918,6 +928,7 @@ static test_result slice_partitions(column_family& cf, const std::vectorfull_slice()); + auto close_rd = deferred_close(rd); metrics_snapshot before; uint64_t fragments = consume_all_with_next_partition(rd); @@ -1026,6 +1037,7 @@ static test_result test_forwarding_with_restriction(column_family& cf, clustered default_priority_class(), nullptr, streamed_mutation::forwarding::yes, mutation_reader::forwarding::no); + auto close_rd = deferred_close(rd); uint64_t fragments = 0; metrics_snapshot before; diff --git a/test/perf/perf_mutation_readers.cc b/test/perf/perf_mutation_readers.cc index eacfdb7aa2..5d78d0441c 100644 --- a/test/perf/perf_mutation_readers.cc +++ b/test/perf/perf_mutation_readers.cc @@ -24,6 +24,7 @@ #include #include "seastar/include/seastar/testing/perf_tests.hh" +#include #include "test/lib/simple_schema.hh" #include "test/lib/reader_permit.hh" @@ -146,7 +147,7 @@ std::vector> combined::create_overlapping_partitions_disjo future<> combined::consume_all(flat_mutation_reader mr) const { - return do_with(std::move(mr), [] (auto& mr) { + return with_closeable(std::move(mr), [] (auto& mr) { perf_tests::start_measuring_time(); return mr.consume_pausable([] (mutation_fragment mf) { perf_tests::do_not_optimize(mf); @@ -270,6 +271,8 @@ future clustering_combined::consume_all(flat_mutation_reader mr) const }, db::no_timeout).then([&num_mfs] { perf_tests::stop_measuring_time(); return num_mfs; + }).finally([&mr] { + return mr.close(); }); }); } @@ -377,7 +380,7 @@ protected: } future<> consume_all(flat_mutation_reader mr) const { - return do_with(std::move(mr), [] (auto& mr) { + return with_closeable(std::move(mr), [] (auto& mr) { return mr.consume_pausable([] (mutation_fragment mf) { perf_tests::do_not_optimize(mf); return stop_iteration::no; diff --git a/test/perf/perf_row_cache_update.cc b/test/perf/perf_row_cache_update.cc index 5af930b264..af6f79349f 100644 --- a/test/perf/perf_row_cache_update.cc +++ b/test/perf/perf_row_cache_update.cc @@ -84,6 +84,7 @@ void run_test(const sstring& name, schema_ptr s, MutationGenerator&& gen) { // going away after memtable was merged to cache. auto rd = std::make_unique( make_combined_reader(s, tests::make_permit(), cache.make_reader(s, tests::make_permit()), mt->make_flat_reader(s, tests::make_permit()))); + auto close_rd = defer([&rd] { rd->close().get(); }); rd->set_max_buffer_size(1); rd->fill_buffer(db::no_timeout).get(); @@ -99,6 +100,9 @@ void run_test(const sstring& name, schema_ptr s, MutationGenerator&& gen) { }, db::no_timeout).get(); mt = {}; + + close_rd.cancel(); + rd->close().get(); rd = {}; slm.stop(); diff --git a/test/perf/perf_sstable.hh b/test/perf/perf_sstable.hh index f36cd25f83..24e2c4cdf3 100644 --- a/test/perf/perf_sstable.hh +++ b/test/perf/perf_sstable.hh @@ -20,6 +20,9 @@ */ #pragma once + +#include + #include "sstables/sstables.hh" #include "sstables/compaction_manager.hh" #include "cell_locking.hh" @@ -204,7 +207,7 @@ public: } future read_sequential_partitions(int idx) { - return do_with(_sst[0]->make_reader(s, tests::make_permit(), query::full_partition_range, s->full_slice()), [this] (flat_mutation_reader& r) { + return with_closeable(_sst[0]->make_reader(s, tests::make_permit(), query::full_partition_range, s->full_slice()), [this] (flat_mutation_reader& r) { auto start = perf_sstable_test_env::now(); auto total = make_lw_shared(0); auto done = make_lw_shared(false); diff --git a/test/unit/row_cache_alloc_stress_test.cc b/test/unit/row_cache_alloc_stress_test.cc index edad207ef3..7effb3daef 100644 --- a/test/unit/row_cache_alloc_stress_test.cc +++ b/test/unit/row_cache_alloc_stress_test.cc @@ -23,6 +23,7 @@ #include #include #include +#include #include "utils/managed_bytes.hh" #include "utils/logalloc.hh" @@ -188,6 +189,7 @@ int main(int argc, char** argv) { for (auto&& key : keys) { auto range = dht::partition_range::make_singular(key); auto reader = cache.make_reader(s, tests::make_permit(), range); + auto close_reader = deferred_close(reader); auto mo = read_mutation_from_flat_mutation_reader(reader, db::no_timeout).get0(); assert(mo); assert(mo->partition().live_row_count(*s) == @@ -205,6 +207,7 @@ int main(int argc, char** argv) { for (auto&& key : keys) { auto range = dht::partition_range::make_singular(key); auto reader = cache.make_reader(s, tests::make_permit(), range); + auto close_reader = deferred_close(reader); auto mfopt = reader(db::no_timeout).get0(); assert(mfopt); assert(mfopt->is_partition_start()); @@ -243,6 +246,7 @@ int main(int argc, char** argv) { try { auto reader = cache.make_reader(s, tests::make_permit(), range); + auto close_reader = deferred_close(reader); assert(!reader(db::no_timeout).get0()); auto evicted_from_cache = logalloc::segment_size + large_cell_size; // GCC's -fallocation-dce can remove dead calls to new and malloc, so diff --git a/test/unit/row_cache_stress_test.cc b/test/unit/row_cache_stress_test.cc index 85557e8c77..357e2c1222 100644 --- a/test/unit/row_cache_stress_test.cc +++ b/test/unit/row_cache_stress_test.cc @@ -132,6 +132,15 @@ struct table { dht::partition_range pr; query::partition_slice slice; flat_mutation_reader rd; + + reader(dht::partition_range pr_, query::partition_slice slice_) noexcept + : pr(std::move(pr_)) + , slice(std::move(slice_)) + , rd(nullptr) + { } + ~reader() { + rd.close().get(); + } }; void alter_schema() { @@ -145,7 +154,7 @@ struct table { std::unique_ptr make_reader(dht::partition_range pr, query::partition_slice slice) { testlog.trace("making reader, pk={} ck={}", pr, slice); - auto r = std::make_unique(reader{std::move(pr), std::move(slice), make_empty_flat_reader(s.schema(), tests::make_permit())}); + auto r = std::make_unique(std::move(pr), std::move(slice)); std::vector rd; if (prev_mt) { rd.push_back(prev_mt->make_flat_reader(s.schema(), tests::make_permit(), r->pr, r->slice, default_priority_class(), nullptr,