From e4e1de5e3b308f0dc75cd6fabdb98fc61ff082c4 Mon Sep 17 00:00:00 2001 From: Piotr Dulikowski Date: Mon, 4 May 2026 17:21:05 +0200 Subject: [PATCH] Merge 'table_helper: fix use-after-free on prepared-statement invalidation' from Marcin Maliszkiewicz insert() held no local strong ref to the prepared modification_statement across the suspension in execute(). On a single shard: 1. Fiber A suspends inside _insert_stmt->execute(). 2. DROP TABLE / DROP KEYSPACE on the target, or LRU eviction, removes the prepared_statements_cache entry, releasing its strong ref. 3. Fiber B re-enters cache_table_info(), sees _prepared_stmt (checked_weak_ptr) invalidated, and runs _insert_stmt = nullptr, releasing the last strong ref. The modification_statement is freed. 4. Fiber A resumes inside execute() and touches freed *this. Pin strong ref to _insert_stmt locally before the suspension. Fixes https://scylladb.atlassian.net/browse/SCYLLADB-1667 Backport: all supported branches, it's memory corruption bug, long present Closes scylladb/scylladb#29588 * github.com:scylladb/scylladb: test/boost: add dummy case to table_helper_test for non-injection modes test/boost: add regression test for table_helper insert() UAF utils/error_injection: add waiters() API table_helper: fix use-after-free on prepared-statement invalidation (cherry picked from commit efcc0b63762f5a8657e26451a9e267a6d8e6c123) Closes scylladb/scylladb#29747 Closes scylladb/scylladb#29802 Closes scylladb/scylladb#29812 --- configure.py | 1 + table_helper.cc | 9 ++- test/boost/CMakeLists.txt | 1 + test/boost/table_helper_test.cc | 111 ++++++++++++++++++++++++++++++++ utils/error_injection.hh | 18 ++++++ 5 files changed, 139 insertions(+), 1 deletion(-) create mode 100644 test/boost/table_helper_test.cc diff --git a/configure.py b/configure.py index 94735c5fe4..18d96be061 100755 --- a/configure.py +++ b/configure.py @@ -1536,6 +1536,7 @@ deps['test/boost/combined_tests'] += [ 'test/boost/aggregate_fcts_test.cc', 'test/boost/auth_test.cc', 'test/boost/batchlog_manager_test.cc', + 'test/boost/table_helper_test.cc', 'test/boost/cache_algorithm_test.cc', 'test/boost/castas_fcts_test.cc', 'test/boost/cdc_test.cc', diff --git a/table_helper.cc b/table_helper.cc index 85c4472841..b530402b4b 100644 --- a/table_helper.cc +++ b/table_helper.cc @@ -8,6 +8,7 @@ */ #include "utils/assert.hh" +#include "utils/error_injection.hh" #include #include #include "table_helper.hh" @@ -135,9 +136,15 @@ future<> table_helper::cache_table_info(cql3::query_processor& qp, service::migr future<> table_helper::insert(cql3::query_processor& qp, service::migration_manager& mm, service::query_state& qs, noncopyable_function opt_maker) { co_await cache_table_info(qp, mm, qs); + // Pin a strong ref locally: while we suspend in execute(), a concurrent + // insert() on this shard may reset _insert_stmt to nullptr if the + // prepared_statements_cache entry gets invalidated, freeing the object. + auto stmt = _insert_stmt; auto opts = opt_maker(); opts.prepare(_prepared_stmt->bound_names); - co_await _insert_stmt->execute(qp, qs, opts, std::nullopt); + co_await utils::get_local_injector().inject("table_helper_insert_before_execute", + utils::wait_for_message(std::chrono::seconds{30})); + co_await stmt->execute(qp, qs, opts, std::nullopt); } future<> table_helper::setup_keyspace(cql3::query_processor& qp, service::migration_manager& mm, std::string_view keyspace_name, sstring replication_strategy_name, diff --git a/test/boost/CMakeLists.txt b/test/boost/CMakeLists.txt index 46ffde59fb..107b459eda 100644 --- a/test/boost/CMakeLists.txt +++ b/test/boost/CMakeLists.txt @@ -314,6 +314,7 @@ add_scylla_test(combined_tests aggregate_fcts_test.cc auth_test.cc batchlog_manager_test.cc + table_helper_test.cc cache_algorithm_test.cc castas_fcts_test.cc cdc_test.cc diff --git a/test/boost/table_helper_test.cc b/test/boost/table_helper_test.cc new file mode 100644 index 0000000000..0f08a9b73f --- /dev/null +++ b/test/boost/table_helper_test.cc @@ -0,0 +1,111 @@ +/* + * Copyright (C) 2026-present ScyllaDB + */ + +/* + * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.1 + */ + +#include + +#undef SEASTAR_TESTING_MAIN +#include +#include +#include +#include + +#include "test/lib/cql_test_env.hh" +#include "test/lib/log.hh" + +#include "table_helper.hh" +#include "cql3/query_processor.hh" +#include "cql3/query_options.hh" +#include "cql3/cql_config.hh" +#include "service/client_state.hh" +#include "service/migration_manager.hh" +#include "service/query_state.hh" +#include "types/types.hh" +#include "utils/error_injection.hh" + +// Regression test for use-after-free in table_helper::insert() when the +// prepared_statements_cache entry is invalidated (e.g. DROP TABLE) while a +// concurrent insert() is suspended in execute(). The injection point inside +// insert() is used to park fiber A deterministically, then fiber B drops the +// last strong ref; without the fix, resuming A crashes. + +BOOST_AUTO_TEST_SUITE(table_helper_test) + +#ifdef SCYLLA_ENABLE_ERROR_INJECTION + +SEASTAR_TEST_CASE(test_concurrent_invalidation) { + return do_with_cql_env_thread([] (cql_test_env& env) { + auto& qp = env.local_qp(); + auto& mm = env.migration_manager().local(); + + env.execute_cql("CREATE KEYSPACE th_ks WITH replication = " + "{'class': 'NetworkTopologyStrategy', 'replication_factor': 1}").get(); + env.execute_cql("CREATE TABLE th_ks.t (id int PRIMARY KEY, v int)").get(); + + const sstring create_cql = "CREATE TABLE IF NOT EXISTS th_ks.t (id int PRIMARY KEY, v int)"; + const sstring insert_cql = "INSERT INTO th_ks.t (id, v) VALUES (?, ?)"; + + table_helper helper("th_ks", "t", create_cql, insert_cql); + + service::query_state qs(service::client_state::for_internal_calls(), empty_service_permit()); + + auto make_opts = [] { + std::vector vals { + cql3::raw_value::make_value(int32_type->decompose(0)), + cql3::raw_value::make_value(int32_type->decompose(0)), + }; + return cql3::query_options(cql3::default_cql_config, db::consistency_level::ONE, + std::nullopt, std::move(vals), false, + cql3::query_options::specific_options::DEFAULT); + }; + + // Prime the prepared cache. + helper.insert(qp, mm, qs, make_opts).get(); + + utils::get_local_injector().enable("table_helper_insert_before_execute", true /*one_shot*/); + + // Fiber A: suspends at the injection, between cache_table_info() and execute(). + auto fiber_a = helper.insert(qp, mm, qs, make_opts); + + // Wait until fiber A is actually parked in wait_for_message. + while (utils::get_local_injector().waiters("table_helper_insert_before_execute") == 0) { + seastar::yield().get(); + } + + // Evict the prepared cache entry - drops its strong ref to the + // modification_statement. helper._insert_stmt is the only ref left. + env.execute_cql("DROP TABLE th_ks.t").discard_result().get(); + + // Fiber B: cache_table_info() sees the weak ref invalidated and sets + // _insert_stmt = nullptr; the re-prepare then throws (table is gone). + helper.insert(qp, mm, qs, make_opts) + .handle_exception([] (std::exception_ptr) {}).get(); + + // Release fiber A. Unfixed: re-reads null _insert_stmt and crashes. + utils::get_local_injector().receive_message("table_helper_insert_before_execute"); + + try { + fiber_a.get(); + } catch (...) { + // execute() may fail (table is gone); only the crash matters. + } + }); +} + +#endif // SCYLLA_ENABLE_ERROR_INJECTION + +#ifndef SCYLLA_ENABLE_ERROR_INJECTION +// The only test in this suite requires error injection support. Without this +// dummy case the suite would be empty, which causes boost to report +// "test tree is empty" and pytest to exit with code 5 ("no tests collected"), +// failing CI in modes (e.g. release) where error injection is disabled. +BOOST_AUTO_TEST_CASE(test_skipped_no_error_injection) { + BOOST_TEST_MESSAGE("table_helper_test requires SCYLLA_ENABLE_ERROR_INJECTION; skipping"); +} +#endif + +BOOST_AUTO_TEST_SUITE_END() diff --git a/utils/error_injection.hh b/utils/error_injection.hh index 5296b52ed6..9b36c702c1 100644 --- a/utils/error_injection.hh +++ b/utils/error_injection.hh @@ -143,6 +143,7 @@ class error_injection { struct injection_shared_data { size_t received_message_count{0}; size_t shared_read_message_count{0}; + size_t waiter_count{0}; condition_variable received_message_cv; error_injection_parameters parameters; sstring injection_name; @@ -216,6 +217,8 @@ public: }) : optimized_optional{}; try { + ++_shared_data->waiter_count; + auto dec = defer([this] () noexcept { --_shared_data->waiter_count; }); co_await _shared_data->received_message_cv.wait(timeout, [&] { if (as) { as->check(); @@ -360,6 +363,17 @@ public: return data && !data->is_ongoing_oneshot(); } + // \brief Returns the number of handlers of the named injection that are + // currently suspended in wait_for_message(). + // + // Intended for tests that need to synchronize with one or more fibers + // parked on an injection. + // \param name error injection name to check + size_t waiters(const std::string_view& injection_name) const { + auto data = get_data(injection_name); + return data ? data->shared_data->waiter_count : 0; + } + // \brief Enter into error injection if it's enabled // \param name error injection name to check bool enter(const std::string_view& name) { @@ -594,6 +608,10 @@ public: return false; } + size_t waiters(const std::string_view& name) const { + return 0; + } + bool enter(const std::string_view& name) const { return false; }