diff --git a/replica/database.cc b/replica/database.cc index 9ce5de562d..ca011f8ef1 100644 --- a/replica/database.cc +++ b/replica/database.cc @@ -2512,7 +2512,18 @@ future<> database::truncate(db::system_keyspace& sys_ks, column_family& cf, cons // smaller than low_mark. SCYLLA_ASSERT(!st.did_flush || rp == db::replay_position() || (truncated_at <= st.low_mark_at ? rp <= st.low_mark : st.low_mark <= rp)); if (rp == db::replay_position()) { - rp = st.low_mark; + // If this shard had no mutations, st.low_mark will be an empty, default constructed + // replay_position. This is a problem because an empty replay_position has the shard_id + // part of segment_id set to 0, even though we may be running on a shard other than + // shard 0. In that case, we will save the empty low_mark as a replay position into + // system.truncated with an incorrect shard number, which could overwrite the replay + // position of the actual shard 0. So, we fix the problem by creating a replay position + // with the correct shard_id and 0 for base_id and position + if (st.low_mark == db::replay_position()) { + rp = db::replay_position(this_shard_id(), 0, 0); + } else { + rp = st.low_mark; + } } co_await coroutine::parallel_for_each(cf.views(), [this, &sys_ks, truncated_at] (view_ptr v) -> future<> { auto& vcf = find_column_family(v); diff --git a/test/boost/database_test.cc b/test/boost/database_test.cc index 15c1b007f1..e30c5ed2d7 100644 --- a/test/boost/database_test.cc +++ b/test/boost/database_test.cc @@ -196,6 +196,28 @@ SEASTAR_TEST_CASE(test_truncate_without_snapshot_during_writes) { }, cfg); } +// Reproducer for: +// https://github.com/scylladb/scylla/issues/21719 +SEASTAR_TEST_CASE(test_truncate_saves_replay_position) { + auto cfg = make_shared(); + cfg->auto_snapshot.set(false); + return do_with_cql_env_and_compaction_groups([] (cql_test_env& e) { + BOOST_REQUIRE_GT(smp::count, 1); + const sstring ks_name = "ks"; + const sstring cf_name = "cf"; + e.execute_cql(fmt::format("CREATE TABLE {}.{} (k TEXT PRIMARY KEY, v INT);", ks_name, cf_name)).get(); + const table_id uuid = e.local_db().find_uuid(ks_name, cf_name); + + replica::database::truncate_table_on_all_shards(e.db(), e.get_system_keyspace(), ks_name, cf_name, db_clock::now(), false /* with_snapshot */).get(); + + auto res = e.execute_cql(fmt::format("SELECT * FROM system.truncated WHERE table_uuid = {}", uuid)).get(); + auto rows = dynamic_pointer_cast(res); + BOOST_REQUIRE(rows); + auto row_count = rows->rs().result_set().size(); + BOOST_REQUIRE_EQUAL(row_count, smp::count); + }, cfg); +} + SEASTAR_TEST_CASE(test_querying_with_limits) { return do_with_cql_env_and_compaction_groups([](cql_test_env& e) { // FIXME: restore indent.