Merge 'table_helper: fix use-after-free on prepared-statement invalidation' from Marcin Maliszkiewicz

insert() held no local strong ref to the prepared modification_statement
across the suspension in execute(). On a single shard:

1. Fiber A suspends inside _insert_stmt->execute().
2. DROP TABLE / DROP KEYSPACE on the target, or LRU eviction, removes
   the prepared_statements_cache entry, releasing its strong ref.
3. Fiber B re-enters cache_table_info(), sees _prepared_stmt
   (checked_weak_ptr) invalidated, and runs _insert_stmt = nullptr,
   releasing the last strong ref. The modification_statement is freed.
4. Fiber A resumes inside execute() and touches freed *this.

Pin strong ref to _insert_stmt locally before the suspension.

Fixes https://scylladb.atlassian.net/browse/SCYLLADB-1667

Backport: all supported branches, it's memory corruption bug, long present

Closes scylladb/scylladb#29588

* github.com:scylladb/scylladb:
  test/boost: add dummy case to table_helper_test for non-injection modes
  test/boost: add regression test for table_helper insert() UAF
  utils/error_injection: add waiters() API
  table_helper: fix use-after-free on prepared-statement invalidation

(cherry picked from commit efcc0b6376)

Closes scylladb/scylladb#29747

Closes scylladb/scylladb#29802

Closes scylladb/scylladb#29812
This commit is contained in:
Piotr Dulikowski
2026-05-04 17:21:05 +02:00
committed by Marcin Maliszkiewicz
parent b04af43dff
commit e4e1de5e3b
5 changed files with 139 additions and 1 deletions

View File

@@ -1536,6 +1536,7 @@ deps['test/boost/combined_tests'] += [
'test/boost/aggregate_fcts_test.cc',
'test/boost/auth_test.cc',
'test/boost/batchlog_manager_test.cc',
'test/boost/table_helper_test.cc',
'test/boost/cache_algorithm_test.cc',
'test/boost/castas_fcts_test.cc',
'test/boost/cdc_test.cc',

View File

@@ -8,6 +8,7 @@
*/
#include "utils/assert.hh"
#include "utils/error_injection.hh"
#include <seastar/core/coroutine.hh>
#include <seastar/coroutine/parallel_for_each.hh>
#include "table_helper.hh"
@@ -135,9 +136,15 @@ future<> table_helper::cache_table_info(cql3::query_processor& qp, service::migr
future<> table_helper::insert(cql3::query_processor& qp, service::migration_manager& mm, service::query_state& qs, noncopyable_function<cql3::query_options ()> opt_maker) {
co_await cache_table_info(qp, mm, qs);
// Pin a strong ref locally: while we suspend in execute(), a concurrent
// insert() on this shard may reset _insert_stmt to nullptr if the
// prepared_statements_cache entry gets invalidated, freeing the object.
auto stmt = _insert_stmt;
auto opts = opt_maker();
opts.prepare(_prepared_stmt->bound_names);
co_await _insert_stmt->execute(qp, qs, opts, std::nullopt);
co_await utils::get_local_injector().inject("table_helper_insert_before_execute",
utils::wait_for_message(std::chrono::seconds{30}));
co_await stmt->execute(qp, qs, opts, std::nullopt);
}
future<> table_helper::setup_keyspace(cql3::query_processor& qp, service::migration_manager& mm, std::string_view keyspace_name, sstring replication_strategy_name,

View File

@@ -314,6 +314,7 @@ add_scylla_test(combined_tests
aggregate_fcts_test.cc
auth_test.cc
batchlog_manager_test.cc
table_helper_test.cc
cache_algorithm_test.cc
castas_fcts_test.cc
cdc_test.cc

View File

@@ -0,0 +1,111 @@
/*
* Copyright (C) 2026-present ScyllaDB
*/
/*
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.1
*/
#include <boost/test/unit_test.hpp>
#undef SEASTAR_TESTING_MAIN
#include <seastar/testing/test_case.hh>
#include <seastar/core/future-util.hh>
#include <seastar/core/shared_ptr.hh>
#include <vector>
#include "test/lib/cql_test_env.hh"
#include "test/lib/log.hh"
#include "table_helper.hh"
#include "cql3/query_processor.hh"
#include "cql3/query_options.hh"
#include "cql3/cql_config.hh"
#include "service/client_state.hh"
#include "service/migration_manager.hh"
#include "service/query_state.hh"
#include "types/types.hh"
#include "utils/error_injection.hh"
// Regression test for use-after-free in table_helper::insert() when the
// prepared_statements_cache entry is invalidated (e.g. DROP TABLE) while a
// concurrent insert() is suspended in execute(). The injection point inside
// insert() is used to park fiber A deterministically, then fiber B drops the
// last strong ref; without the fix, resuming A crashes.
BOOST_AUTO_TEST_SUITE(table_helper_test)
#ifdef SCYLLA_ENABLE_ERROR_INJECTION
SEASTAR_TEST_CASE(test_concurrent_invalidation) {
return do_with_cql_env_thread([] (cql_test_env& env) {
auto& qp = env.local_qp();
auto& mm = env.migration_manager().local();
env.execute_cql("CREATE KEYSPACE th_ks WITH replication = "
"{'class': 'NetworkTopologyStrategy', 'replication_factor': 1}").get();
env.execute_cql("CREATE TABLE th_ks.t (id int PRIMARY KEY, v int)").get();
const sstring create_cql = "CREATE TABLE IF NOT EXISTS th_ks.t (id int PRIMARY KEY, v int)";
const sstring insert_cql = "INSERT INTO th_ks.t (id, v) VALUES (?, ?)";
table_helper helper("th_ks", "t", create_cql, insert_cql);
service::query_state qs(service::client_state::for_internal_calls(), empty_service_permit());
auto make_opts = [] {
std::vector<cql3::raw_value> vals {
cql3::raw_value::make_value(int32_type->decompose(0)),
cql3::raw_value::make_value(int32_type->decompose(0)),
};
return cql3::query_options(cql3::default_cql_config, db::consistency_level::ONE,
std::nullopt, std::move(vals), false,
cql3::query_options::specific_options::DEFAULT);
};
// Prime the prepared cache.
helper.insert(qp, mm, qs, make_opts).get();
utils::get_local_injector().enable("table_helper_insert_before_execute", true /*one_shot*/);
// Fiber A: suspends at the injection, between cache_table_info() and execute().
auto fiber_a = helper.insert(qp, mm, qs, make_opts);
// Wait until fiber A is actually parked in wait_for_message.
while (utils::get_local_injector().waiters("table_helper_insert_before_execute") == 0) {
seastar::yield().get();
}
// Evict the prepared cache entry - drops its strong ref to the
// modification_statement. helper._insert_stmt is the only ref left.
env.execute_cql("DROP TABLE th_ks.t").discard_result().get();
// Fiber B: cache_table_info() sees the weak ref invalidated and sets
// _insert_stmt = nullptr; the re-prepare then throws (table is gone).
helper.insert(qp, mm, qs, make_opts)
.handle_exception([] (std::exception_ptr) {}).get();
// Release fiber A. Unfixed: re-reads null _insert_stmt and crashes.
utils::get_local_injector().receive_message("table_helper_insert_before_execute");
try {
fiber_a.get();
} catch (...) {
// execute() may fail (table is gone); only the crash matters.
}
});
}
#endif // SCYLLA_ENABLE_ERROR_INJECTION
#ifndef SCYLLA_ENABLE_ERROR_INJECTION
// The only test in this suite requires error injection support. Without this
// dummy case the suite would be empty, which causes boost to report
// "test tree is empty" and pytest to exit with code 5 ("no tests collected"),
// failing CI in modes (e.g. release) where error injection is disabled.
BOOST_AUTO_TEST_CASE(test_skipped_no_error_injection) {
BOOST_TEST_MESSAGE("table_helper_test requires SCYLLA_ENABLE_ERROR_INJECTION; skipping");
}
#endif
BOOST_AUTO_TEST_SUITE_END()

View File

@@ -143,6 +143,7 @@ class error_injection {
struct injection_shared_data {
size_t received_message_count{0};
size_t shared_read_message_count{0};
size_t waiter_count{0};
condition_variable received_message_cv;
error_injection_parameters parameters;
sstring injection_name;
@@ -216,6 +217,8 @@ public:
}) : optimized_optional<abort_source::subscription>{};
try {
++_shared_data->waiter_count;
auto dec = defer([this] () noexcept { --_shared_data->waiter_count; });
co_await _shared_data->received_message_cv.wait(timeout, [&] {
if (as) {
as->check();
@@ -360,6 +363,17 @@ public:
return data && !data->is_ongoing_oneshot();
}
// \brief Returns the number of handlers of the named injection that are
// currently suspended in wait_for_message().
//
// Intended for tests that need to synchronize with one or more fibers
// parked on an injection.
// \param name error injection name to check
size_t waiters(const std::string_view& injection_name) const {
auto data = get_data(injection_name);
return data ? data->shared_data->waiter_count : 0;
}
// \brief Enter into error injection if it's enabled
// \param name error injection name to check
bool enter(const std::string_view& name) {
@@ -594,6 +608,10 @@ public:
return false;
}
size_t waiters(const std::string_view& name) const {
return 0;
}
bool enter(const std::string_view& name) const {
return false;
}