From 036d3287b3235f391acde55be2dd0d3193979bc8 Mon Sep 17 00:00:00 2001 From: Ferenc Szili Date: Tue, 26 Nov 2024 16:23:19 +0100 Subject: [PATCH 1/2] database: correctly save replay position for truncate This commit fixes a problem with way truncate saves commit log replay positions. On shards without mutations, truncate would save the replay position into system.truncated with shard number 0 regardless of the actual shard number that the replay position was saved for. --- replica/database.cc | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/replica/database.cc b/replica/database.cc index ff01efa3d7..71bc03a0f8 100644 --- a/replica/database.cc +++ b/replica/database.cc @@ -2512,7 +2512,18 @@ future<> database::truncate(db::system_keyspace& sys_ks, column_family& cf, cons // smaller than low_mark. SCYLLA_ASSERT(!st.did_flush || rp == db::replay_position() || (truncated_at <= st.low_mark_at ? rp <= st.low_mark : st.low_mark <= rp)); if (rp == db::replay_position()) { - rp = st.low_mark; + // If this shard had no mutations, st.low_mark will be an empty, default constructed + // replay_position. This is a problem because an empty replay_position has the shard_id + // part of segment_id set to 0, even though we may be running on a shard other than + // shard 0. In that case, we will save the empty low_mark as a replay position into + // system.truncated with an incorrect shard number, which could overwrite the replay + // position of the actual shard 0. So, we fix the problem by creating a replay position + // with the correct shard_id and 0 for base_id and position + if (st.low_mark == db::replay_position()) { + rp = db::replay_position(this_shard_id(), 0, 0); + } else { + rp = st.low_mark; + } } co_await coroutine::parallel_for_each(cf.views(), [this, &sys_ks, truncated_at] (view_ptr v) -> future<> { auto& vcf = find_column_family(v); From e54c07ba7580ccfd7cfdd61010c1b659b9ba6ac7 Mon Sep 17 00:00:00 2001 From: Ferenc Szili Date: Thu, 28 Nov 2024 17:20:50 +0100 Subject: [PATCH 2/2] test: add test for truncate saving replay positions This change adds a test for truncate correctly saving commit log replay positions. --- test/boost/database_test.cc | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/test/boost/database_test.cc b/test/boost/database_test.cc index 15c1b007f1..e30c5ed2d7 100644 --- a/test/boost/database_test.cc +++ b/test/boost/database_test.cc @@ -196,6 +196,28 @@ SEASTAR_TEST_CASE(test_truncate_without_snapshot_during_writes) { }, cfg); } +// Reproducer for: +// https://github.com/scylladb/scylla/issues/21719 +SEASTAR_TEST_CASE(test_truncate_saves_replay_position) { + auto cfg = make_shared(); + cfg->auto_snapshot.set(false); + return do_with_cql_env_and_compaction_groups([] (cql_test_env& e) { + BOOST_REQUIRE_GT(smp::count, 1); + const sstring ks_name = "ks"; + const sstring cf_name = "cf"; + e.execute_cql(fmt::format("CREATE TABLE {}.{} (k TEXT PRIMARY KEY, v INT);", ks_name, cf_name)).get(); + const table_id uuid = e.local_db().find_uuid(ks_name, cf_name); + + replica::database::truncate_table_on_all_shards(e.db(), e.get_system_keyspace(), ks_name, cf_name, db_clock::now(), false /* with_snapshot */).get(); + + auto res = e.execute_cql(fmt::format("SELECT * FROM system.truncated WHERE table_uuid = {}", uuid)).get(); + auto rows = dynamic_pointer_cast(res); + BOOST_REQUIRE(rows); + auto row_count = rows->rs().result_set().size(); + BOOST_REQUIRE_EQUAL(row_count, smp::count); + }, cfg); +} + SEASTAR_TEST_CASE(test_querying_with_limits) { return do_with_cql_env_and_compaction_groups([](cql_test_env& e) { // FIXME: restore indent.