dist: fix upgrade error from 2024.1

We need to allow replacing nodetool from scylla-enterprise-tools < 2024.2, just like we did for scylla-tools < 5.5. This is required to make packages able to upgrade from 2024.1. Fixes #22820 Closes scylladb/scylladb#22821 (cherry picked from commit b5e306047f) Closes scylladb/scylladb#22867
Update pgo profiles - aarch64
2025-02-16 14:47:48 +02:00 · 2025-02-15 04:20:15 +02:00 · 2025-02-15 04:05:06 +02:00 · 2025-02-14 11:14:07 +02:00 · 2025-02-13 15:24:54 +02:00 · 2025-02-13 09:42:09 +02:00
80 changed files with 1270 additions and 539 deletions
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,6 +1,6 @@
 [submodule "seastar"]
 	path = seastar
-	url = ../seastar
+	url = ../scylla-seastar
 	ignore = dirty
 [submodule "swagger-ui"]
 	path = swagger-ui
--- a/2
+++ b/2
@@ -78,7 +78,7 @@ fi

 # Default scylla product/version tags
 PRODUCT=scylla
-VERSION=2025.1.0-dev
+VERSION=2025.1.0-rc2

 if test -f version
 then
--- a/api/api-doc/task_manager.json
+++ b/api/api-doc/task_manager.json
@@ -253,6 +253,30 @@
               ]
            }
         ]
+      },
+      {
+         "path":"/task_manager/drain/{module}",
+         "operations":[
+            {
+               "method":"POST",
+               "summary":"Drain finished local tasks",
+               "type":"void",
+               "nickname":"drain_tasks",
+               "produces":[
+                  "application/json"
+               ],
+               "parameters":[
+                  {
+                     "name":"module",
+                     "description":"The module to drain",
+                     "required":true,
+                     "allowMultiple":false,
+                     "type":"string",
+                     "paramType":"path"
+                  }
+               ]
+            }
+         ]
      }
   ],
   "models":{
--- a/api/task_manager.cc
+++ b/api/task_manager.cc
@@ -232,6 +232,32 @@ void set_task_manager(http_context& ctx, routes& r, sharded<tasks::task_manager>
        uint32_t user_ttl = cfg.user_task_ttl_seconds();
        co_return json::json_return_type(user_ttl);
    });
+
+    tm::drain_tasks.set(r, [&tm] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
+        co_await tm.invoke_on_all([&req] (tasks::task_manager& tm) -> future<> {
+            tasks::task_manager::module_ptr module;
+            try {
+                module = tm.find_module(req->get_path_param("module"));
+            } catch (...) {
+                throw bad_param_exception(fmt::format("{}", std::current_exception()));
+            }
+
+            const auto& local_tasks = module->get_local_tasks();
+            std::vector<tasks::task_id> ids;
+            ids.reserve(local_tasks.size());
+            std::transform(begin(local_tasks), end(local_tasks), std::back_inserter(ids), [] (const auto& task) {
+                return task.second->is_complete() ? task.first : tasks::task_id::create_null_id();
+            });
+
+            for (auto&& id : ids) {
+                if (id) {
+                    module->unregister_task(id);
+                }
+                co_await maybe_yield();
+            }
+        });
+        co_return json_void();
+    });
 }

 void unset_task_manager(http_context& ctx, routes& r) {
@@ -243,6 +269,7 @@ void unset_task_manager(http_context& ctx, routes& r) {
    tm::get_task_status_recursively.unset(r);
    tm::get_and_update_ttl.unset(r);
    tm::get_ttl.unset(r);
+    tm::drain_tasks.unset(r);
 }

 }
--- a/cdc/generation.cc
+++ b/cdc/generation.cc
@@ -1112,7 +1112,9 @@ future<bool> generation_service::legacy_do_handle_cdc_generation(cdc::generation
    auto sys_dist_ks = get_sys_dist_ks();
    auto gen = co_await retrieve_generation_data(gen_id, _sys_ks.local(), *sys_dist_ks, { _token_metadata.get()->count_normal_token_owners() });
    if (!gen) {
-        throw std::runtime_error(fmt::format(
+        // This may happen during raft upgrade when a node gossips about a generation that
+        // was propagated through raft and we didn't apply it yet.
+        throw generation_handling_nonfatal_exception(fmt::format(
            "Could not find CDC generation {} in distributed system tables (current time: {}),"
            " even though some node gossiped about it.",
            gen_id, db_clock::now()));
--- a/cdc/metadata.cc
+++ b/cdc/metadata.cc
@@ -186,7 +186,7 @@ bool cdc::metadata::prepare(db_clock::time_point tp) {
    }

    auto ts = to_ts(tp);
-    auto emplaced = _gens.emplace(to_ts(tp), std::nullopt).second;
+    auto [it, emplaced] = _gens.emplace(to_ts(tp), std::nullopt);

    if (_last_stream_timestamp != api::missing_timestamp) {
        auto last_correct_gen = gen_used_at(_last_stream_timestamp);
@@ -201,5 +201,5 @@ bool cdc::metadata::prepare(db_clock::time_point tp) {
        }
    }

-    return emplaced;
+    return !it->second;
 }
--- a/cql3/statements/create_index_statement.cc
+++ b/cql3/statements/create_index_statement.cc
@@ -87,6 +87,9 @@ std::vector<::shared_ptr<index_target>> create_index_statement::validate_while_e
                "Secondary indexes are not supported on COMPACT STORAGE tables that have clustering columns");
    }

+    if (!db.features().views_with_tablets && db.find_keyspace(keyspace()).get_replication_strategy().uses_tablets()) {
+        throw exceptions::invalid_request_exception(format("Secondary indexes are not supported on base tables with tablets (keyspace '{}')", keyspace()));
+    }
    validate_for_local_index(*schema);

    std::vector<::shared_ptr<index_target>> targets;
--- a/cql3/statements/create_view_statement.cc
+++ b/cql3/statements/create_view_statement.cc
@@ -140,6 +140,9 @@ std::pair<view_ptr, cql3::cql_warnings_vec> create_view_statement::prepare_view(

    schema_ptr schema = validation::validate_column_family(db, _base_name.get_keyspace(), _base_name.get_column_family());

+    if (!db.features().views_with_tablets && db.find_keyspace(keyspace()).get_replication_strategy().uses_tablets()) {
+        throw exceptions::invalid_request_exception(format("Materialized views are not supported on base tables with tablets"));
+    }
    if (schema->is_counter()) {
        throw exceptions::invalid_request_exception(format("Materialized views are not supported on counter tables"));
    }
--- a/db/config.cc
+++ b/db/config.cc
@@ -1201,7 +1201,7 @@ db::config::config(std::shared_ptr<db::extensions> exts)
            "Start serializing reads after their collective memory consumption goes above $normal_limit * $multiplier.")
    , reader_concurrency_semaphore_kill_limit_multiplier(this, "reader_concurrency_semaphore_kill_limit_multiplier", liveness::LiveUpdate, value_status::Used, 4,
            "Start killing reads after their collective memory consumption goes above $normal_limit * $multiplier.")
-    , reader_concurrency_semaphore_cpu_concurrency(this, "reader_concurrency_semaphore_cpu_concurrency", liveness::LiveUpdate, value_status::Used, 1,
+    , reader_concurrency_semaphore_cpu_concurrency(this, "reader_concurrency_semaphore_cpu_concurrency", liveness::LiveUpdate, value_status::Used, 2,
            "Admit new reads while there are less than this number of requests that need CPU.")
    , view_update_reader_concurrency_semaphore_serialize_limit_multiplier(this, "view_update_reader_concurrency_semaphore_serialize_limit_multiplier", liveness::LiveUpdate, value_status::Used, 2,
            "Start serializing view update reads after their collective memory consumption goes above $normal_limit * $multiplier.")
--- a/db/view/view.cc
+++ b/db/view/view.cc
@@ -2995,6 +2995,12 @@ public:
                    _step.build_status.pop_back();
                }
            }
+
+            // before going back to the minimum token, advance current_key to the end
+            // and check for built views in that range.
+            _step.current_key = {_step.prange.end().value_or(dht::ring_position::max()).value().token(), partition_key::make_empty()};
+            check_for_built_views();
+
            _step.current_key = {dht::minimum_token(), partition_key::make_empty()};
            for (auto&& vs : _step.build_status) {
                vs.next_token = dht::minimum_token();
--- a/dist/debian/control.template
+++ b/dist/debian/control.template
@@ -12,15 +12,16 @@ Architecture: any
 Description: Scylla database main configuration file
 Scylla is a highly scalable, eventually consistent, distributed,
 partitioned row DB.
-Replaces: %{product}-server (<< 1.1)
+Replaces: %{product}-server (<< 1.1), scylla-enterprise-conf (<< 2025.1.0~)
 Conflicts: %{product}-server (<< 1.1)
+Breaks: scylla-enterprise-conf (<< 2025.1.0~)

 Package: %{product}-server
 Architecture: any
 Depends: ${misc:Depends}, %{product}-conf (= ${binary:Version}), %{product}-python3 (= ${binary:Version})
-Replaces: %{product}-tools (<<5.5)
-Breaks: %{product}-tools (<<5.5)
-Description: Scylla database server binaries 
+Replaces: %{product}-tools (<<5.5), scylla-enterprise-tools (<< 2024.2.0~), scylla-enterprise-server (<< 2025.1.0~)
+Breaks: %{product}-tools (<<5.5), scylla-enterprise-tools (<< 2024.2.0~), scylla-enterprise-server (<< 2025.1.0~)
+Description: Scylla database server binaries
 Scylla is a highly scalable, eventually consistent, distributed,
 partitioned row DB.

@@ -29,6 +30,8 @@ Section: debug
 Priority: extra
 Architecture: any
 Depends: %{product}-server (= ${binary:Version}), ${misc:Depends}
+Replaces: scylla-enterprise-server-dbg (<< 2025.1.0~)
+Breaks: scylla-enterprise-server-dbg (<< 2025.1.0~)
 Description: debugging symbols for %{product}-server
 Scylla is a highly scalable, eventually consistent, distributed,
 partitioned row DB.
@@ -37,13 +40,17 @@ Description: debugging symbols for %{product}-server
 Package: %{product}-kernel-conf
 Architecture: any
 Depends: procps
+Replaces: scylla-enterprise-kernel-conf (<< 2025.1.0~)
+Breaks: scylla-enterprise-kernel-conf (<< 2025.1.0~)
 Description: Scylla kernel tuning configuration
 Scylla is a highly scalable, eventually consistent, distributed,
 partitioned row DB.

 Package: %{product}-node-exporter
 Architecture: any
+Replaces: scylla-enterprise-node-exporter (<< 2025.1.0~)
 Conflicts: prometheus-node-exporter
+Breaks: scylla-enterprise-node-exporter (<< 2025.1.0~)
 Description: Prometheus exporter for machine metrics
 Prometheus exporter for machine metrics, written in Go with pluggable metric collectors.

@@ -54,6 +61,49 @@ Depends: %{product}-server (= ${binary:Version})
 , %{product}-kernel-conf (= ${binary:Version})
 , %{product}-node-exporter (= ${binary:Version})
 , %{product}-cqlsh (= ${binary:Version})
+Replaces: scylla-enterprise (<< 2025.1.0~)
+Breaks: scylla-enterprise (<< 2025.1.0~)
 Description: Scylla database metapackage
 Scylla is a highly scalable, eventually consistent, distributed,
 partitioned row DB.
+
+Package: scylla-enterprise-conf
+Depends: %{product}-conf (= ${binary:Version})
+Architecture: all
+Priority: optional
+Section: oldlibs
+Description: transitional package
+ This is a transitional package. It can safely be removed.
+
+Package: scylla-enterprise-server
+Depends: %{product}-server (= ${binary:Version})
+Architecture: all
+Priority: optional
+Section: oldlibs
+Description: transitional package
+ This is a transitional package. It can safely be removed.
+
+Package: scylla-enterprise
+Depends: %{product} (= ${binary:Version})
+Architecture: all
+Priority: optional
+Section: oldlibs
+Description: transitional package
+ This is a transitional package. It can safely be removed.
+
+Package: scylla-enterprise-kernel-conf
+Depends: %{product}-kernel-conf (= ${binary:Version})
+Architecture: all
+Priority: optional
+Section: oldlibs
+Description: transitional package
+ This is a transitional package. It can safely be removed.
+
+Package: scylla-enterprise-node-exporter
+Depends: %{product}-node-exporter (= ${binary:Version})
+Architecture: all
+Priority: optional
+Section: oldlibs
+Description: transitional package
+ This is a transitional package. It can safely be removed.
+ 
--- a/dist/debian/debian/rules
+++ b/dist/debian/debian/rules
@@ -11,6 +11,8 @@ endif

 product := $(subst -server,,$(DEB_SOURCE))

+libreloc_list := $(shell find scylla/libreloc/ -maxdepth 1 -type f -not -name .*.hmac -and -not -name gnutls.config -printf '-X%f ')
+libexec_list := $(shell find scylla/libexec/ -maxdepth 1 -type f -not -name scylla -and -not -name iotune -printf '-X%f ')
 override_dh_auto_configure:

 override_dh_auto_build:
@@ -38,7 +40,7 @@ endif
 override_dh_strip:
 	# The binaries (ethtool...patchelf) don't pass dh_strip after going through patchelf. Since they are
 	# already stripped, nothing is lost if we exclude them, so that's what we do.
-	dh_strip -Xlibprotobuf.so.15 -Xld.so -Xethtool -Xgawk -Xgzip -Xhwloc-calc -Xhwloc-distrib -Xifconfig -Xlscpu -Xnetstat -Xpatchelf --dbg-package=$(product)-server-dbg
+	dh_strip $(libreloc_list) $(libexec_list) --dbg-package=$(product)-server-dbg
 	find $(CURDIR)/debian/$(product)-server-dbg/usr/lib/debug/.build-id/ -name "*.debug" -exec objcopy --decompress-debug-sections {} \;

 override_dh_makeshlibs:
--- a/dist/debian/debian/scylla-server.install
+++ b/dist/debian/debian/scylla-server.install
@@ -21,6 +21,7 @@ opt/scylladb/scyllatop/*
 opt/scylladb/scripts/libexec/*
 opt/scylladb/bin/*
 opt/scylladb/libreloc/*
+opt/scylladb/libreloc/.*.hmac
 opt/scylladb/libexec/*
 usr/lib/scylla/*
 var/lib/scylla/data
--- a/dist/redhat/scylla.spec
+++ b/dist/redhat/scylla.spec
@@ -13,7 +13,8 @@ Requires:       %{product}-python3 = %{version}-%{release}
 Requires:       %{product}-kernel-conf = %{version}-%{release}
 Requires:       %{product}-node-exporter = %{version}-%{release}
 Requires:       %{product}-cqlsh = %{version}-%{release}
-Obsoletes:      scylla-server < 1.1
+Provides:       scylla-enterprise = %{version}-%{release}
+Obsoletes:      scylla-enterprise < 2025.1.0

 %global _debugsource_template %{nil}
 %global _debuginfo_subpackages %{nil}
@@ -73,6 +74,10 @@ Requires:       %{product}-python3 = %{version}-%{release}
 AutoReqProv:    no
 Provides:       %{product}-tools:%{_bindir}/nodetool
 Provides:       %{product}-tools:%{_sysconfigdir}/bash_completion.d/nodetool-completion
+Provides:       scylla-enterprise-tools:%{_bindir}/nodetool
+Provides:       scylla-enterprise-tools:%{_sysconfigdir}/bash_completion.d/nodetool-completion
+Provides:       scylla-enterprise-server = %{version}-%{release}
+Obsoletes:      scylla-enterprise-server < 2025.1.0

 %description server
 This package contains ScyllaDB server.
@@ -132,6 +137,7 @@ ln -sfT /etc/scylla /var/lib/scylla/conf
 /opt/scylladb/scyllatop/*
 /opt/scylladb/bin/*
 /opt/scylladb/libreloc/*
+/opt/scylladb/libreloc/.*.hmac
 /opt/scylladb/libexec/*
 %{_prefix}/lib/scylla/*
 %attr(0755,scylla,scylla) %dir %{_sharedstatedir}/scylla/
@@ -156,6 +162,8 @@ ln -sfT /etc/scylla /var/lib/scylla/conf
 Group:          Applications/Databases
 Summary:        Scylla configuration package
 Obsoletes:      scylla-server < 1.1
+Provides:       scylla-enterprise-conf = %{version}-%{release}
+Obsoletes:      scylla-enterprise-conf < 2025.1.0

 %description conf
 This package contains the main scylla configuration file.
@@ -176,6 +184,8 @@ Summary:        Scylla configuration package for the Linux kernel
 Requires:       kmod
 # tuned overwrites our sysctl settings
 Obsoletes:      tuned >= 2.11.0
+Provides:       scylla-enterprise-kernel-conf = %{version}-%{release}
+Obsoletes:      scylla-enterprise-kernel-conf < 2025.1.0

 %description kernel-conf
 This package contains Linux kernel configuration changes for the Scylla database.  Install this package
@@ -212,6 +222,8 @@ Group:          Applications/Databases
 Summary:        Prometheus exporter for machine metrics
 License:        ASL 2.0
 URL:            https://github.com/prometheus/node_exporter
+Provides:       scylla-enterprise-node-exporter = %{version}-%{release}
+Obsoletes:      scylla-enterprise-node-exporter < 2025.1.0

 %description node-exporter
 Prometheus exporter for machine metrics, written in Go with pluggable metric collectors.
--- a/docs/architecture/sstable/_common/sstable_what_is.rst
+++ b/docs/architecture/sstable/_common/sstable_what_is.rst
@@ -15,7 +15,7 @@ SSTable Version Support
     - ScyllaDB Enterprise Version
     - ScyllaDB Open Source Version
   * - 3.x ('me')
-     - 2022.2
+     - 2022.2 and above
     - 5.1 and above
   * - 3.x ('md')
     - 2021.1
--- a/docs/architecture/sstable/index.rst
+++ b/docs/architecture/sstable/index.rst
@@ -9,11 +9,7 @@ ScyllaDB SSTable Format

 .. include:: _common/sstable_what_is.rst

-* In ScyllaDB 6.0 and above, *me* format is enabled by default.
-
-* In ScyllaDB Enterprise 2021.1, ScyllaDB 4.3 and above, *md* format is enabled by default.
-  
-* In ScyllaDB 3.1 and above, *mc* format is enabled by default. 
+In ScyllaDB 6.0 and above, *me* format is enabled by default.

 For more information on each of the SSTable formats, see below:

--- a/docs/architecture/sstable/sstable3/index.rst
+++ b/docs/architecture/sstable/sstable3/index.rst
@@ -12,17 +12,7 @@ ScyllaDB SSTable - 3.x

 .. include:: ../_common/sstable_what_is.rst

-* In ScyllaDB 6.0 and above, the ``me`` format is mandatory, and ``md`` format is used only when upgrading from an existing cluster using ``md``. The ``sstable_format`` parameter is ignored if it is set to ``md``.
-* In ScyllaDB 5.1 and above, the ``me`` format is enabled by default.
-* In ScyllaDB 4.3 to 5.0, the ``md`` format is enabled by default.
-* In ScyllaDB 3.1 to 4.2, the ``mc`` format is enabled by default. 
-* In ScyllaDB 3.0, the ``mc`` format is disabled by default. You can enable it by adding the ``enable_sstables_mc_format`` parameter set to ``true`` in the ``scylla.yaml`` file. For example: 
-    
-    .. code-block:: shell
-    
-       enable_sstables_mc_format: true
-
-.. REMOVE IN FUTURE VERSIONS - Remove the note above in version 5.2.
+In ScyllaDB 6.0 and above, the ``me`` format is mandatory, and ``md`` format is used only when upgrading from an existing cluster using ``md``. The ``sstable_format`` parameter is ignored if it is set to ``md``.

 Additional Information
 -------------------------
--- a/docs/cql/ddl.rst
+++ b/docs/cql/ddl.rst
@@ -202,18 +202,14 @@ An example that excludes a datacenter while using ``replication_factor``::

    DESCRIBE KEYSPACE excalibur
        CREATE KEYSPACE excalibur WITH replication = {'class': 'NetworkTopologyStrategy', 'DC1': '3'} AND durable_writes = true;
-
-
-
-.. only:: opensource
  
-  Keyspace storage options :label-caution:`Experimental`
-  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Keyspace storage options :label-caution:`Experimental`
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

-  By default, SStables of a keyspace are stored locally.
-  As an alternative, you can configure your keyspace to be stored
-  on Amazon S3 or another S3-compatible object store.
-  See :ref:`Keyspace storage options <admin-keyspace-storage-options>` for details.
+By default, SStables of a keyspace are stored locally.
+As an alternative, you can configure your keyspace to be stored
+on Amazon S3 or another S3-compatible object store.
+See :ref:`Keyspace storage options <admin-keyspace-storage-options>` for details.

 .. _tablets:

--- a/docs/dev/task_manager.md
+++ b/docs/dev/task_manager.md
@@ -64,18 +64,20 @@ Briefly:
 - `/task_manager/list_module_tasks/{module}` -
        lists (by default non-internal) tasks in the module;
 - `/task_manager/task_status/{task_id}` -
-        gets the task's status, unregisters the task if it's finished;
+        gets the task's status;
 - `/task_manager/abort_task/{task_id}` -
        aborts the task if it's abortable;
 - `/task_manager/wait_task/{task_id}` -
        waits for the task and gets its status;
 - `/task_manager/task_status_recursive/{task_id}` -
        gets statuses of the task and all its descendants in BFS
-        order, unregisters the task;
+        order;
 - `/task_manager/ttl` -
        gets or sets new ttl.
 - `/task_manager/user_ttl` -
        gets or sets new user ttl.
+- `/task_manager/drain/{module}` -
+        unregisters all finished local tasks in the module.

 # Virtual tasks

--- a/docs/getting-started/_common/os-support-info.rst
+++ b/docs/getting-started/_common/os-support-info.rst
@@ -1,21 +0,0 @@
-You can `build ScyllaDB from source <https://github.com/scylladb/scylladb#build-prerequisites>`_ on other x86_64 or aarch64 platforms, without any guarantees.
-
-+----------------------------+--------------------+-------+---------------+
-| Linux Distributions        |Ubuntu              | Debian|Rocky / CentOS |
-|                            |                    |       |/ RHEL         |
-+----------------------------+------+------+------+-------+-------+-------+
-| ScyllaDB Version / Version |20.04 |22.04 |24.04 |  11   |   8   |   9   |
-+============================+======+======+======+=======+=======+=======+
-|   6.2                      | |v|  | |v|  | |v|  | |v|   | |v|   | |v|   |
-+----------------------------+------+------+------+-------+-------+-------+
-|   6.1                      | |v|  | |v|  | |v|  | |v|   | |v|   | |v|   |
-+----------------------------+------+------+------+-------+-------+-------+
-
-* The recommended OS for ScyllaDB Open Source is Ubuntu 22.04.
-* All releases are available as a Docker container and EC2 AMI, GCP, and Azure images. 
-
-Supported Architecture
-----------------------------
-
-ScyllaDB Open Source supports x86_64 for all versions and AArch64 starting from ScyllaDB 4.6 and nightly build. 
-In particular, aarch64 support includes AWS EC2 Graviton.
--- a/docs/getting-started/installation-common/scylla-web-installer.rst
+++ b/docs/getting-started/installation-common/scylla-web-installer.rst
@@ -4,7 +4,7 @@ ScyllaDB Web Installer for Linux

 ScyllaDB Web Installer is a platform-agnostic installation script you can run with ``curl`` to install ScyllaDB on Linux.

-See `ScyllaDB Download Center <https://www.scylladb.com/download/#core>`_ for information on manually installing ScyllaDB with platform-specific installation packages.
+See :doc:`Install ScyllaDB Linux Packages </getting-started/install-scylla/install-on-linux/>` for information on manually installing ScyllaDB with platform-specific installation packages.

 Prerequisites
 --------------
@@ -20,44 +20,50 @@ To install ScyllaDB with Web Installer, run:

    curl -sSf get.scylladb.com/server | sudo bash

-By default, running the script installs the latest official version of ScyllaDB Open Source. You can use the following 
-options to install a different version or ScyllaDB Enterprise:
-
-.. list-table::
-   :widths:  20 25 55
-   :header-rows: 1
-
-   * - Option
-     - Acceptable values
-     - Description
-   * - ``--scylla-product``
-     - ``scylla`` | ``scylla-enterprise``
-     - Specifies the ScyllaDB product to install: Open Source (``scylla``) or Enterprise (``scylla-enterprise``)  The default is ``scylla``.
-   * - ``--scylla-version``
-     - ``<version number>``
-     - Specifies the ScyllaDB version to install. You can specify the major release (``x.y``) to install the latest patch for that version or a specific patch release (``x.y.x``). The default is the latest official version.
+By default, running the script installs the latest official version of ScyllaDB. 

 You can run the command with the ``-h`` or ``--help`` flag to print information about the script.

-Examples
-===========
+Installing a Non-default Version
+---------------------------------------

-Installing ScyllaDB Open Source 6.0.1:
+You can install a version other than the default.
+
+Versions 2025.1 and Later
+==============================
+
+Run the command with the ``--scylla-version`` option to specify the version
+you want to install.
+
+**Example**

 .. code:: console
+  
+  curl -sSf get.scylladb.com/server | sudo bash -s -- --scylla-version 2025.1.1

-    curl -sSf get.scylladb.com/server | sudo bash -s -- --scylla-version 6.0.1

-Installing the latest patch release for ScyllaDB Open Source 6.0:
+Versions Earlier than 2025.1
+================================
+
+To install a supported version of *ScyllaDB Enterprise*, run the command with:
+
+* ``--scylla-product scylla-enterprise`` to specify that you want to install
+  ScyllaDB Entrprise.
+* ``--scylla-version`` to specify the version you want to install.
+
+For example:

 .. code:: console
+  
+  curl -sSf get.scylladb.com/server | sudo bash -s -- --scylla-product scylla-enterprise --scylla-version 2024.1

-    curl -sSf get.scylladb.com/server | sudo bash -s -- --scylla-version 6.0
+To install a supported version of *ScyllaDB Open Source*, run the command with
+the ``--scylla-version`` option to specify the version you want to install.

-Installing ScyllaDB Enterprise 2024.1:
+For example:

 .. code:: console
-
-    curl -sSf get.scylladb.com/server | sudo bash -s -- --scylla-product scylla-enterprise --scylla-version 2024.1
+  
+  curl -sSf get.scylladb.com/server | sudo bash -s -- --scylla-version 6.2.1

 .. include:: /getting-started/_common/setup-after-install.rst
--- a/docs/getting-started/os-support.rst
+++ b/docs/getting-started/os-support.rst
@@ -1,13 +1,36 @@
 OS Support by Linux Distributions and Version
 ==============================================

-The following matrix shows which Linux distributions, containers, and images are supported with which versions of ScyllaDB.
+The following matrix shows which Linux distributions, containers, and images
+are :ref:`supported <os-support-definition>` with which versions of ScyllaDB.

-Where *supported* in this scope means:
+-------------------------------+--------------------+-------+------------------+---------------+
+| Linux Distributions           |Ubuntu              | Debian| Rocky / Centos / | Amazon Linux  |
+|                               |                    |       | RHEL             |               |
+-------------------------------+------+------+------+-------+-------+----------+---------------+
+| ScyllaDB Version / OS Version |20.04 |22.04 |24.04 |  11   |   8   |   9      | 2023          |
+===============================+======+======+======+=======+=======+==========+===============+
+|   Enterprise 2025.1           | |v|  | |v|  | |v|  | |v|   | |v|   | |v|      | |v|           |
+-------------------------------+------+------+------+-------+-------+----------+---------------+
+|   Enterprise 2024.2           | |v|  | |v|  | |v|  | |v|   | |v|   | |v|      | |v|           |
+-------------------------------+------+------+------+-------+-------+----------+---------------+
+|   Enterprise 2024.1           | |v|  | |v|  | |x|  | |v|   | |v|   | |v|      | |x|           |
+-------------------------------+------+------+------+-------+-------+----------+---------------+
+|   Open Source 6.2             | |v|  | |v|  | |v|  | |v|   | |v|   | |v|      | |v|           |
+-------------------------------+------+------+------+-------+-------+----------+---------------+
+
+All releases are available as a Docker container, EC2 AMI, GCP, and Azure images.
+
+.. _os-support-definition:
+
+By *supported*, it is meant that:

 - A binary installation package is available to `download <https://www.scylladb.com/download/>`_.
- The download and install procedures are tested as part of ScyllaDB release process for each version.
- An automated install is included from :doc:`ScyllaDB Web Installer for Linux tool </getting-started/installation-common/scylla-web-installer>` (for latest versions)
+- The download and install procedures are tested as part of the ScyllaDB release process for each version.
+- An automated install is included from :doc:`ScyllaDB Web Installer for Linux tool </getting-started/installation-common/scylla-web-installer>` (for the latest versions).
+
+You can `build ScyllaDB from source <https://github.com/scylladb/scylladb#build-prerequisites>`_
+on other x86_64 or aarch64 platforms, without any guarantees.
+

-.. scylladb_include_flag:: os-support-info.rst

--- a/docs/getting-started/requirements.rst
+++ b/docs/getting-started/requirements.rst
@@ -8,7 +8,7 @@ ScyllaDB Requirements
   :hidden:
  
   system-requirements
-   os-support
+   OS Support <os-support>
   Cloud Instance Recommendations <cloud-instance-recommendations>
   scylla-in-a-shared-environment
   
--- a/docs/operating-scylla/_common/tools_index.rst
+++ b/docs/operating-scylla/_common/tools_index.rst
@@ -8,7 +8,6 @@
 * :doc:`cassandra-stress </operating-scylla/admin-tools/cassandra-stress/>` A tool for benchmarking and load testing a ScyllaDB and Cassandra clusters.
 * :doc:`SSTabledump </operating-scylla/admin-tools/sstabledump>`
 * :doc:`SSTableMetadata </operating-scylla/admin-tools/sstablemetadata>`
-* configuration_encryptor - :doc:`encrypt at rest </operating-scylla/security/encryption-at-rest>` sensitive scylla configuration entries using system key.
 * scylla local-file-key-generator - Generate a local file (system) key for :doc:`encryption at rest </operating-scylla/security/encryption-at-rest>`, with the provided length, Key algorithm, Algorithm block mode and Algorithm padding method.
 * `scyllatop <https://www.scylladb.com/2016/03/22/scyllatop/>`_ - A terminal base top-like tool for scylladb collectd/prometheus metrics.
 * :doc:`scylla_dev_mode_setup</getting-started/installation-common/dev-mod>` - run ScyllaDB in Developer Mode.
--- a/docs/operating-scylla/admin-tools/task-manager.rst
+++ b/docs/operating-scylla/admin-tools/task-manager.rst
@@ -74,13 +74,13 @@ API calls
 	- *keyspace* - if set, tasks are filtered to contain only the ones working on this keyspace;
 	- *table* - if set, tasks are filtered to contain only the ones working on this table;

-* ``/task_manager/task_status/{task_id}`` - gets the task's status, unregisters the task if it's finished;
+* ``/task_manager/task_status/{task_id}`` - gets the task's status;
 * ``/task_manager/abort_task/{task_id}`` - aborts the task if it's abortable, otherwise 403 status code is returned;
-* ``/task_manager/wait_task/{task_id}`` - waits for the task and gets its status (does not unregister the tasks); query params:
+* ``/task_manager/wait_task/{task_id}`` - waits for the task and gets its status; query params:

 	- *timeout* - timeout in seconds; if set - 408 status code is returned if waiting times out;

-* ``/task_manager/task_status_recursive/{task_id}`` - gets statuses of the task and all its descendants in BFS order, unregisters the root task;
+* ``/task_manager/task_status_recursive/{task_id}`` - gets statuses of the task and all its descendants in BFS order;
 * ``/task_manager/ttl`` - gets or sets new ttl; query params (if setting):

 	- *ttl* - new ttl value.
@@ -89,6 +89,8 @@ API calls

 	- *user_ttl* - new user ttl value.

+* ``/task_manager/drain/{module}`` - unregisters all finished local tasks in the module.
+
 Cluster tasks are not unregistered from task manager with API calls.

 Tasks API
--- a/docs/operating-scylla/nodetool-commands/tasks/drain.rst
+++ b/docs/operating-scylla/nodetool-commands/tasks/drain.rst
@@ -0,0 +1,21 @@
+Nodetool tasks drain
+====================
+**tasks drain** - Unregisters all finished local tasks from the module.
+If a module is not specified, finished tasks in all modules are unregistered.
+
+Syntax
+-------
+.. code-block:: console
+
+   nodetool tasks drain [--module <module>]
+
+Options
+-------
+
+* ``--module`` - if set, only the specified module is drained.
+
+For example:
+
+.. code-block:: shell
+
+   > nodetool tasks drain --module repair
--- a/docs/operating-scylla/nodetool-commands/tasks/index.rst
+++ b/docs/operating-scylla/nodetool-commands/tasks/index.rst
@@ -5,6 +5,7 @@ Nodetool tasks
   :hidden:

   abort <abort>
+   drain <drain>
   user-ttl <user-ttl>
   list <list>
   modules <modules>
@@ -23,15 +24,12 @@ Task Status Retention

 * When a task completes, its status is temporarily stored on the executing node
 * Status information is retained for up to :confval:`task_ttl_in_seconds` seconds
-* The status information of a completed task is automatically removed after being queried with ``tasks status`` or ``tasks tree``
-* ``tasks wait`` returns the status, but it does not remove the task information of the queried task
-
-.. note:: Multiple status queries using ``tasks status`` and ``tasks tree`` for the same completed task will only receive a response for the first query, since the status is removed after being retrieved.

 Supported tasks suboperations
 -----------------------------

 * :doc:`abort </operating-scylla/nodetool-commands/tasks/abort>` - Aborts the task.
+* :doc:`drain </operating-scylla/nodetool-commands/tasks/drain>` - Unregisters all finished local tasks.
 * :doc:`user-ttl </operating-scylla/nodetool-commands/tasks/user-ttl>` - Gets or sets user_task_ttl value.
 * :doc:`list </operating-scylla/nodetool-commands/tasks/list>` - Lists tasks in the module.
 * :doc:`modules </operating-scylla/nodetool-commands/tasks/modules>` - Lists supported modules.
--- a/docs/operating-scylla/nodetool-commands/tasks/status.rst
+++ b/docs/operating-scylla/nodetool-commands/tasks/status.rst
@@ -1,6 +1,6 @@
 Nodetool tasks status
 =========================
-**tasks status** - Gets the status of a task manager task. If the task was finished it is unregistered.
+**tasks status** - Gets the status of a task manager task.

 Syntax
 -------
@@ -23,10 +23,10 @@ Example output
   type: repair
   kind: node
   scope: keyspace
-   state: done
+   state: running
   is_abortable: true
   start_time: 2024-07-29T15:48:55Z
-   end_time: 2024-07-29T15:48:55Z
+   end_time:
   error:
   parent_id: none
   sequence_number: 5
--- a/docs/operating-scylla/nodetool-commands/tasks/tree.rst
+++ b/docs/operating-scylla/nodetool-commands/tasks/tree.rst
@@ -1,7 +1,7 @@
 Nodetool tasks tree
 =======================
 **tasks tree** - Gets the statuses of a task manager task and all its descendants.
-The statuses are listed in BFS order. If the task was finished it is unregistered.
+The statuses are listed in BFS order.

 If task_id isn't specified, trees of all non-internal tasks are printed
 (internal tasks are the ones that have a parent or cover an operation that
--- a/docs/operating-scylla/security/auditing.rst
+++ b/docs/operating-scylla/security/auditing.rst
@@ -2,9 +2,6 @@
 ScyllaDB Auditing Guide
 ========================

-:label-tip:`ScyllaDB Enterprise`
-
-
 Auditing allows the administrator to monitor activities on a Scylla cluster, including queries and data changes. 
 The information is stored in a Syslog or a Scylla table.

--- a/docs/operating-scylla/security/encryption-at-rest.rst
+++ b/docs/operating-scylla/security/encryption-at-rest.rst
@@ -143,8 +143,6 @@ Depending on your key provider, you will either have the option of allowing Scyl
 * Replicated Key Provider - you must generate a system key yourself
 * Local Key Provider - If you do not generate your own secret key, ScyllaDB will create one for you

-When encrypting ScyllaDB config by ``configuration_encryptor``, you also need to generate a secret key and upload the key to all nodes.
-

 Use the key generator script
 ================================
@@ -820,32 +818,6 @@ Once this encryption is enabled, it is used for all system data.

   .. wasn't able to test this successfully

-.. Encrypt and Decrypt Configuration Files
-.. =======================================
-
-.. Using the Configuration Encryption tool, you can encrypt parts of the scylla.yaml file which contain encryption configuration settings. 
-
-.. **Procedure**
-
-.. 1.  Run the Configuration Encryption script:
-
-.. test code-block: none
-
-.. /bin/configuration_encryptor [options] [key-path]
-
-.. Where:
-
-.. * ``-c, --config`` - the path to the configuration file (/etc/scylla/scylla.yaml, for example)
-.. * ``-d, --decrypt`` - decrypts the configuration file at the specified path
-.. * ``-o, --output`` - (optional) writes the configuration file to a specified target. This can be the same location as the source file. 
-.. * ``-h. --help`` - help for this command
-
-.. For example:
-
-.. test code-block: none
-
-.. sudo -u scylla /bin/configuration_encryptor -c /etc/scylla/scylla.yaml /etc/scylla/encryption_keys/secret_key
-.. end of test

 When a Key is Lost
 ----------------------
--- a/docs/operating-scylla/security/ldap-authentication.rst
+++ b/docs/operating-scylla/security/ldap-authentication.rst
@@ -7,10 +7,6 @@ LDAP Authentication

   saslauthd

-:label-tip:`ScyllaDB Enterprise`
-
-.. versionadded:: 2021.1.2
-
 Scylla supports user authentication via an LDAP server by leveraging the SaslauthdAuthenticator.
 By configuring saslauthd correctly against your LDAP server, you enable Scylla to check the user’s credentials through it.

--- a/docs/operating-scylla/security/ldap-authorization.rst
+++ b/docs/operating-scylla/security/ldap-authorization.rst
@@ -2,10 +2,6 @@
 LDAP Authorization (Role Management)
 =====================================

-:label-tip:`ScyllaDB Enterprise`
-
-.. versionadded:: 2021.1.2
-
 Scylla Enterprise customers can manage and authorize users’ privileges via an :abbr:`LDAP (Lightweight Directory Access Protocol)` server.
 LDAP is an open, vendor-neutral, industry-standard protocol for accessing and maintaining distributed user access control over a standard IP network.
 If your users are already stored in an LDAP directory, you can now use the same LDAP server to regulate their roles in Scylla.
--- a/docs/operating-scylla/security/security-checklist.rst
+++ b/docs/operating-scylla/security/security-checklist.rst
@@ -31,11 +31,9 @@ Encryption on Transit, Client to Node and Node to Node
 Encryption on Transit protects your communication against a 3rd interception on the network connection.
 Configure ScyllaDB to use TLS/SSL for all the connections. Use TLS/SSL to encrypt communication between ScyllaDB nodes and client applications.

-.. only:: enterprise
-
-    Starting with version 2023.1.1, you can run ScyllaDB Enterprise on FIPS-enabled Ubuntu, 
-    which uses FIPS 140-2 certified libraries (such as OpenSSL, GnuTLS, and more) and Linux 
-    kernel in FIPS mode.
+You can run ScyllaDB on FIPS-enabled Ubuntu, 
+which uses FIPS 140-2 certified libraries (such as OpenSSL, GnuTLS, and more) and Linux 
+kernel in FIPS mode.

 * :doc:`Encryption Data in Transit Client to Node </operating-scylla/security/client-node-encryption>`

--- a/ent/encryption/encrypted_file_impl.cc
+++ b/ent/encryption/encrypted_file_impl.cc
@@ -223,7 +223,18 @@ size_t encrypted_file_impl::transform(uint64_t pos, const void* buffer, size_t l
                throw std::invalid_argument("Output data not aligned");
            }
            _key->transform_unpadded(m, i + off, align_down(rem, b), o + off, iv.data());
-            return l - pos;
+            // #22236 - ensure we don't wrap numbers here.
+            // If reading past actual end of file (_file_length), we can be decoding
+            // 1-<key block size> bytes here, that are at the boundary of last
+            // (fake) block of the file.
+            // Example:
+            // File data size: 4095 bytes
+            // Physical file size: 4095 + 16 (assume 16 bytes key block)
+            // Read 0:4096 -> 4095 bytes
+            // If caller now ignores this and just reads 4096 (or more)
+            // bytes at next block (4096), we read 15 bytes and decode.
+            // But would be past _file_length -> ensure we return zero here.
+            return std::max(l, pos) - pos;
        }
        _key->transform_unpadded(m, i + off, block_size, o + off, iv.data());
    }
@@ -271,6 +282,9 @@ future<size_t> encrypted_file_impl::write_dma(uint64_t pos, std::vector<iovec> i
 future<size_t> encrypted_file_impl::read_dma(uint64_t pos, void* buffer, size_t len, io_intent* intent) {
    assert(!(len & (block_size - 1)));
    return verify_file_length().then([this, pos, buffer, len, intent] {
+        if (pos >= *_file_length) {
+            return make_ready_future<size_t>(0);
+        }
        return _file.dma_read(pos, buffer, len, intent).then([this, pos, buffer](size_t len) {
            return transform(pos, buffer, len, buffer, mode::decrypt);
        });
@@ -279,6 +293,9 @@ future<size_t> encrypted_file_impl::read_dma(uint64_t pos, void* buffer, size_t

 future<size_t> encrypted_file_impl::read_dma(uint64_t pos, std::vector<iovec> iov, io_intent* intent) {
    return verify_file_length().then([this, pos, iov = std::move(iov), intent]() mutable {
+        if (pos >= *_file_length) {
+            return make_ready_future<size_t>(0);
+        }
        auto f = _file.dma_read(pos, iov, intent);
        return f.then([this, pos, iov = std::move(iov)](size_t len) mutable {
            size_t off = 0;
@@ -292,6 +309,9 @@ future<size_t> encrypted_file_impl::read_dma(uint64_t pos, std::vector<iovec> io

 future<temporary_buffer<uint8_t>> encrypted_file_impl::dma_read_bulk(uint64_t offset, size_t range_size, io_intent* intent) {
    return verify_file_length().then([this, offset, range_size, intent]() mutable {
+        if (offset >= *_file_length) {
+            return make_ready_future<temporary_buffer<uint8_t>>();
+        }
        auto front = offset & (block_size - 1);
        offset -= front;
        range_size += front;
@@ -305,7 +325,8 @@ future<temporary_buffer<uint8_t>> encrypted_file_impl::dma_read_bulk(uint64_t of
            auto s = transform(offset, result.get(), result.size(), result.get_write(), mode::decrypt);
            // never give back more than asked for.
            result.trim(std::min(s, range_size));
-            result.trim_front(front);
+            // #22236 - ensure we don't overtrim if we get a short read.
+            result.trim_front(std::min(front, result.size()));
            return result;
        });
    });
--- a/ent/encryption/encryption.cc
+++ b/ent/encryption/encryption.cc
@@ -660,7 +660,7 @@ public:
                            sstables::component_type::Statistics,
                            sstables::component_type::TemporaryStatistics,
            }) {
-                if (mask & int(c)) {
+                if (mask & (1 << int(c))) {
                    ccs.emplace_back(c);
                }
            }
--- a/install.sh
+++ b/install.sh
@@ -144,18 +144,6 @@ patchelf() {
    LD_LIBRARY_PATH="$PWD/libreloc" libreloc/ld.so libexec/patchelf "$@"
 }

-remove_rpath() {
-    local file="$1"
-    local rpath
-    # $file might not be an elf image
-    if rpath=$(patchelf --print-rpath "$file" 2>/dev/null); then
-      if [ -n "$rpath" ]; then
-        echo "remove rpath from $file"
-        patchelf --remove-rpath "$file"
-      fi
-    fi
-}
-
 adjust_bin() {
    local bin="$1"
    # We could add --set-rpath too, but then debugedit (called by rpmbuild) barfs
@@ -444,7 +432,7 @@ for file in dist/common/scylla.d/*.conf; do
    installconfig 644 "$file" "$retc"/scylla.d
 done

-install -d -m755 "$retc"/scylla "$rprefix/bin" "$rprefix/libexec" "$rprefix/libreloc" "$rprefix/libreloc/pkcs11" "$rprefix/scripts" "$rprefix/bin"
+install -d -m755 "$retc"/scylla "$rprefix/bin" "$rprefix/libexec" "$rprefix/libreloc" "$rprefix/libreloc/fipscheck" "$rprefix/libreloc/pkcs11" "$rprefix/scripts" "$rprefix/bin"
 if ! $without_systemd; then
    install -m644 dist/common/systemd/scylla-fstrim.service -Dt "$rsystemd"
    install -m644 dist/common/systemd/scylla-housekeeping-daily.service -Dt "$rsystemd"
@@ -458,18 +446,41 @@ install -m755 seastar/dpdk/usertools/dpdk-devbind.py -Dt "$rprefix"/scripts
 for i in $(find libreloc/ -maxdepth 1 -type f); do
    install -m755 "$i" -Dt "$rprefix/libreloc"
 done
-for lib in libreloc/*; do
-    remove_rpath "$rprefix/$lib"
+for i in $(find libreloc/fipscheck/ -maxdepth 1 -type f); do
+    install -m755 "$i" -Dt "$rprefix/libreloc/fipscheck"
 done
 for i in $(find libreloc/pkcs11/ -maxdepth 1 -type f); do
    install -m755 "$i" -Dt "$rprefix/libreloc/pkcs11"
 done

+LIBGNUTLS_SO=$(basename libreloc/libgnutls.so.*)
+LIBGNUTLS_HMAC=$(cat libreloc/.libgnutls.so.*.hmac)
+LIBNETTLE_SO=$(basename libreloc/libnettle.so.*)
+LIBNETTLE_HMAC=$(cat libreloc/.libnettle.so.*.hmac)
+LIBHOGWEED_SO=$(basename libreloc/libhogweed.so.*)
+LIBHOGWEED_HMAC=$(cat libreloc/.libhogweed.so.*.hmac)
+LIBGMP_SO=$(basename libreloc/libgmp.so.*)
+LIBGMP_HMAC=$(cat libreloc/.libgmp.so.*.hmac)
+cat << EOS > "$rprefix"/libreloc/.$LIBGNUTLS_SO.hmac
+[global]
+format-version = 1
+[$LIBGNUTLS_SO]
+path = "$prefix"/libreloc/$LIBGNUTLS_SO
+hmac = $LIBGNUTLS_HMAC
+[$LIBNETTLE_SO]
+path = "$prefix"/libreloc/$LIBNETTLE_SO
+hmac = $LIBNETTLE_HMAC
+[$LIBHOGWEED_SO]
+path = "$prefix"/libreloc/$LIBHOGWEED_SO
+hmac = $LIBHOGWEED_HMAC
+[$LIBGMP_SO]
+path = "$prefix"/libreloc/$LIBGMP_SO
+hmac = $LIBGMP_HMAC
+EOS
 # some files in libexec are symlinks, which "install" dereferences
 # use cp -P for the symlinks instead.
 install -m755 libexec/* -Dt "$rprefix/libexec"
 for bin in libexec/*; do
-    remove_rpath "$rprefix/$bin"
    adjust_bin "${bin#libexec/}"
 done
 install -m644 ubsan-suppressions.supp -Dt "$rprefix/libexec"
--- a/locator/abstract_replication_strategy.cc
+++ b/locator/abstract_replication_strategy.cc
@@ -657,6 +657,8 @@ future<> global_vnode_effective_replication_map::get_keyspace_erms(sharded<repli
        // all under the lock.
        auto lk = co_await db.get_shared_token_metadata().get_lock();
        auto erm = db.find_keyspace(keyspace_name).get_vnode_effective_replication_map();
+        utils::get_local_injector().inject("get_keyspace_erms_throw_no_such_keyspace",
+                [&keyspace_name] { throw data_dictionary::no_such_keyspace{keyspace_name}; });
        auto ring_version = erm->get_token_metadata().get_ring_version();
        _erms[0] = make_foreign(std::move(erm));
        co_await coroutine::parallel_for_each(std::views::iota(1u, smp::count), [this, &sharded_db, keyspace_name, ring_version] (unsigned shard) -> future<> {
--- a/pgo/conf/counters.yaml
+++ b/pgo/conf/counters.yaml
@@ -1,7 +1,7 @@
 DROP KEYSPACE IF EXISTS counters;

 CREATE KEYSPACE IF NOT EXISTS counters
-WITH replication = {'class': 'NetworkTopologyStrategy', 'dc1': '3'};
+WITH replication = {'class': 'NetworkTopologyStrategy', 'dc1': '3'} AND TABLETS = {'enabled': false};

 CREATE TABLE IF NOT EXISTS counters.counter1 (
                key blob PRIMARY KEY,
--- a/pgo/conf/lwt.yaml
+++ b/pgo/conf/lwt.yaml
@@ -3,7 +3,7 @@ keyspace: ks

 # The CQL for creating a keyspace (optional if it already exists)
 keyspace_definition: |
-  CREATE KEYSPACE ks WITH replication = {'class': 'NetworkTopologyStrategy', 'replication_factor': 3};
+  CREATE KEYSPACE ks WITH replication = {'class': 'NetworkTopologyStrategy', 'replication_factor': 3} AND TABLETS = {'enabled': false};

 # Table name
 table: targettable
--- a/pgo/conf/si.yaml
+++ b/pgo/conf/si.yaml
@@ -2,7 +2,7 @@ keyspace: sec_index

 keyspace_definition: |

-  CREATE KEYSPACE IF NOT EXISTS sec_index WITH replication = {'class': 'NetworkTopologyStrategy', 'replication_factor': 3};
+  CREATE KEYSPACE IF NOT EXISTS sec_index WITH replication = {'class': 'NetworkTopologyStrategy', 'replication_factor': 3} AND TABLETS = {'enabled': false};

 table: users

--- a/pgo/profiles/aarch64/profile.profdata.xz
+++ b/pgo/profiles/aarch64/profile.profdata.xz
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7cc51cd5c699c529239d1fe6fc3a7ec5dfceb3389236257388d07415f1870340
-size 5800128
+oid sha256:ec3d829264557320a7dd9e7dbf206dc4fbf6274be66df71df18d9ad6ffa8fa8d
+size 5876224
--- a/pgo/profiles/x86_64/profile.profdata.xz
+++ b/pgo/profiles/x86_64/profile.profdata.xz
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:daaeb5d5740d25ef0dcc79034959382042d007d569389940d0a7baf51fa1da59
-size 5795820
+oid sha256:a7ad75ad61b8e8f7653616cde86d1d8d0b612b91f5b3916b4778310a23a8da11
+size 5879652
--- a/reader_concurrency_semaphore.cc
+++ b/reader_concurrency_semaphore.cc
@@ -148,9 +148,6 @@ public:
        promise<> pr;
        std::optional<shared_future<>> fut;
        reader_concurrency_semaphore::read_func func;
-        // Self reference to keep the permit alive while queued for execution.
-        // Must be cleared on all code-paths, otherwise it will keep the permit alive in perpetuity.
-        reader_permit_opt permit_keepalive;
        std::optional<reader_concurrency_semaphore::inactive_read> ir;
    };

@@ -226,8 +223,6 @@ private:
    }

    void on_timeout() {
-        auto keepalive = std::exchange(_aux_data.permit_keepalive, std::nullopt);
-
        auto ex = named_semaphore_timed_out(_semaphore._name);
        _ex = std::make_exception_ptr(ex);

@@ -500,7 +495,7 @@ public:
        _trace_ptr = std::move(trace_ptr);
    }

-    void check_abort() {
+    void check_abort() const {
        if (_ex) {
            std::rethrow_exception(_ex);
        }
@@ -649,7 +644,7 @@ void reader_permit::set_trace_state(tracing::trace_state_ptr trace_ptr) noexcept
    _impl->set_trace_state(std::move(trace_ptr));
 }

-void reader_permit::check_abort() {
+void reader_permit::check_abort() const {
    return _impl->check_abort();
 }

@@ -1170,6 +1165,7 @@ void reader_concurrency_semaphore::set_notify_handler(inactive_read_handle& irh,
    auto& ir = *(*irh._permit)->aux_data().ir;
    ir.notify_handler = std::move(notify_handler);
    if (ttl_opt) {
+        irh._permit->set_timeout(db::no_timeout);
        ir.ttl_timer.set_callback([this, permit = *irh._permit] () mutable {
            evict(*permit, evict_reason::time);
        });
@@ -1628,10 +1624,10 @@ reader_permit reader_concurrency_semaphore::make_tracking_only_permit(schema_ptr
 }

 future<> reader_concurrency_semaphore::with_permit(schema_ptr schema, const char* const op_name, size_t memory,
-        db::timeout_clock::time_point timeout, tracing::trace_state_ptr trace_ptr, read_func func) {
-    auto permit = reader_permit(*this, std::move(schema), std::string_view(op_name), {1, static_cast<ssize_t>(memory)}, timeout, std::move(trace_ptr));
+        db::timeout_clock::time_point timeout, tracing::trace_state_ptr trace_ptr, reader_permit_opt& permit_holder, read_func func) {
+    permit_holder = reader_permit(*this, std::move(schema), std::string_view(op_name), {1, static_cast<ssize_t>(memory)}, timeout, std::move(trace_ptr));
+    auto permit = *permit_holder;
    permit->aux_data().func = std::move(func);
-    permit->aux_data().permit_keepalive = permit;
    return do_wait_admission(*permit);
 }

@@ -1684,6 +1680,7 @@ void reader_concurrency_semaphore::foreach_permit(noncopyable_function<void(cons
    boost::for_each(_wait_list._admission_queue, std::ref(func));
    boost::for_each(_wait_list._memory_queue, std::ref(func));
    boost::for_each(_ready_list, std::ref(func));
+    boost::for_each(_inactive_reads, std::ref(func));
 }

 void reader_concurrency_semaphore::foreach_permit(noncopyable_function<void(const reader_permit&)> func) const {
--- a/reader_concurrency_semaphore.hh
+++ b/reader_concurrency_semaphore.hh
@@ -460,7 +460,8 @@ public:
    ///
    /// Some permits cannot be associated with any table, so passing nullptr as
    /// the schema parameter is allowed.
-    future<> with_permit(schema_ptr schema, const char* const op_name, size_t memory, db::timeout_clock::time_point timeout, tracing::trace_state_ptr trace_ptr, read_func func);
+    future<> with_permit(schema_ptr schema, const char* const op_name, size_t memory, db::timeout_clock::time_point timeout,
+            tracing::trace_state_ptr trace_ptr, reader_permit_opt& permit_holder, read_func func);

    /// Run the function through the semaphore's execution stage with a pre-admitted permit
    ///
--- a/reader_permit.hh
+++ b/reader_permit.hh
@@ -174,7 +174,7 @@ public:

    // If the read was aborted, throw the exception the read was aborted with.
    // Otherwise no-op.
-    void check_abort();
+    void check_abort() const;

    query::max_result_size max_result_size() const;
    void set_max_result_size(query::max_result_size);
--- a/repair/repair.cc
+++ b/repair/repair.cc
@@ -1487,7 +1487,16 @@ future<> repair::data_sync_repair_task_impl::run() {
    auto& keyspace = _status.keyspace;
    auto& sharded_db = rs.get_db();
    auto& db = sharded_db.local();
-    auto germs = make_lw_shared(co_await locator::make_global_effective_replication_map(sharded_db, keyspace));
+    auto germs_fut = co_await coroutine::as_future(locator::make_global_effective_replication_map(sharded_db, keyspace));
+    if (germs_fut.failed()) {
+        auto ex = germs_fut.get_exception();
+        if (try_catch<data_dictionary::no_such_keyspace>(ex)) {
+            rlogger.warn("sync data: keyspace {} does not exist, skipping", keyspace);
+            co_return;
+        }
+        co_await coroutine::return_exception_ptr(std::move(ex));
+    }
+    auto germs = make_lw_shared(germs_fut.get());

    auto id = get_repair_uniq_id();

--- a/replica/database.cc
+++ b/replica/database.cc
@@ -964,7 +964,6 @@ future<> database::add_column_family(keyspace& ks, schema_ptr schema, column_fam
        }
    }
    schema = local_schema_registry().learn(schema);
-    schema->registry_entry()->mark_synced();
    auto&& rs = ks.get_replication_strategy();
    locator::effective_replication_map_ptr erm;
    if (auto pt_rs = rs.maybe_as_per_table()) {
@@ -996,6 +995,8 @@ future<> database::add_column_family(keyspace& ks, schema_ptr schema, column_fam
        co_await cf->stop();
        co_await coroutine::return_exception_ptr(f.get_exception());
    }
+    // Table must be added before entry is marked synced.
+    schema->registry_entry()->mark_synced();
 }

 future<> database::add_column_family_and_make_directory(schema_ptr schema, is_new_cf is_new) {
@@ -1553,7 +1554,9 @@ database::query(schema_ptr query_schema, const query::read_command& cmd, query::
            querier_opt->permit().set_trace_state(trace_state);
            f = co_await coroutine::as_future(semaphore.with_ready_permit(querier_opt->permit(), read_func));
        } else {
-            f = co_await coroutine::as_future(semaphore.with_permit(query_schema, "data-query", cf.estimate_read_memory_cost(), timeout, trace_state, read_func));
+            reader_permit_opt permit_holder;
+            f = co_await coroutine::as_future(semaphore.with_permit(query_schema, "data-query", cf.estimate_read_memory_cost(), timeout,
+                        trace_state, permit_holder, read_func));
        }

        if (!f.failed()) {
@@ -1615,7 +1618,9 @@ database::query_mutations(schema_ptr query_schema, const query::read_command& cm
            querier_opt->permit().set_trace_state(trace_state);
            f = co_await coroutine::as_future(semaphore.with_ready_permit(querier_opt->permit(), read_func));
        } else {
-            f = co_await coroutine::as_future(semaphore.with_permit(query_schema, "mutation-query", cf.estimate_read_memory_cost(), timeout, trace_state, read_func));
+            reader_permit_opt permit_holder;
+            f = co_await coroutine::as_future(semaphore.with_permit(query_schema, "mutation-query", cf.estimate_read_memory_cost(), timeout,
+                        trace_state, permit_holder, read_func));
        }

        if (!f.failed()) {
@@ -2557,6 +2562,12 @@ future<> database::truncate_table_on_all_shards(sharded<database>& sharded_db, s
        });
    });

+    co_await utils::get_local_injector().inject("truncate_compaction_disabled_wait", [] (auto& handler) -> future<> {
+        dblog.info("truncate_compaction_disabled_wait: wait");
+        co_await handler.wait_for_message(std::chrono::steady_clock::now() + std::chrono::minutes{5});
+        dblog.info("truncate_compaction_disabled_wait: done");
+    }, false);
+
    const auto should_flush = with_snapshot && cf.can_flush();
    dblog.trace("{} {}.{} and views on all shards", should_flush ? "Flushing" : "Clearing", s->ks_name(), s->cf_name());
    std::function<future<>(replica::table&)> flush_or_clear = should_flush ?
--- a/replica/table.cc
+++ b/replica/table.cc
@@ -1029,8 +1029,10 @@ bool tablet_storage_group_manager::all_storage_groups_split() {
        return true;
    }

-    auto split_ready = std::ranges::all_of(_storage_groups | std::views::values,
-        std::mem_fn(&storage_group::set_split_mode));
+    bool split_ready = true;
+    for (const storage_group_ptr& sg : _storage_groups | std::views::values) {
+        split_ready &= sg->set_split_mode();
+    }

    // The table replica will say to coordinator that its split status is ready by
    // mirroring the sequence number from tablet metadata into its local state,
@@ -1058,6 +1060,12 @@ sstables::compaction_type_options::split tablet_storage_group_manager::split_com
 future<> tablet_storage_group_manager::split_all_storage_groups(tasks::task_info tablet_split_task_info) {
    sstables::compaction_type_options::split opt = split_compaction_options();

+    co_await utils::get_local_injector().inject("split_storage_groups_wait", [] (auto& handler) -> future<> {
+        dblog.info("split_storage_groups_wait: waiting");
+        co_await handler.wait_for_message(std::chrono::steady_clock::now() + std::chrono::minutes{5});
+        dblog.info("split_storage_groups_wait: done");
+    }, false);
+
    co_await for_each_storage_group_gently([opt, tablet_split_task_info] (storage_group& storage_group) {
        return storage_group.split(opt, tablet_split_task_info);
    });
--- a/scripts/create-relocatable-package.py
+++ b/scripts/create-relocatable-package.py
@@ -13,8 +13,11 @@ import os
 import subprocess
 import tarfile
 import pathlib
+import shutil
 import sys
 import tempfile
+import magic
+from tempfile import mkstemp


 RELOC_PREFIX='scylla'
@@ -52,6 +55,14 @@ def ldd(executable):
            pass
        else:
            libraries[elements[0]] = os.path.realpath(elements[2])
+            hmacfile = elements[2].replace('/lib64/', '/lib64/.') + '.hmac'
+            if os.path.exists(hmacfile):
+                arcname = os.path.basename(hmacfile)
+                libraries[arcname] = os.path.realpath(hmacfile)
+            fc_hmacfile = elements[2].replace('/lib64/', '/lib64/fipscheck/') + '.hmac'
+            if os.path.exists(fc_hmacfile):
+                arcname = 'fipscheck/' + os.path.basename(fc_hmacfile)
+                libraries[arcname] = os.path.realpath(fc_hmacfile)
    return libraries

 def filter_dist(info):
@@ -64,6 +75,49 @@ SCYLLA_DIR='scylla-package'
 def reloc_add(ar, name, arcname=None):
    ar.add(name, arcname="{}/{}".format(SCYLLA_DIR, arcname if arcname else name))

+def fipshmac(f):
+    DIRECTORY='build'
+    bn = os.path.basename(f)
+    subprocess.run(['fipshmac', '-d', DIRECTORY, f], check=True)
+    return f'{DIRECTORY}/{bn}.hmac'
+
+def fix_hmac(ar, binpath, targetpath, patched_binary):
+    bn = os.path.basename(binpath)
+    dn = os.path.dirname(binpath)
+    targetpath_bn = os.path.basename(targetpath)
+    targetpath_dn = os.path.dirname(targetpath)
+    hmac = f'{dn}/.{bn}.hmac'
+    if os.path.exists(hmac):
+        hmac = fipshmac(patched_binary)
+        hmac_arcname = f'{targetpath_dn}/.{targetpath_bn}.hmac'
+        ar.reloc_add(hmac, arcname=hmac_arcname)
+    fc_hmac = f'{dn}/fipscheck/{bn}.hmac'
+    if os.path.exists(fc_hmac):
+        fc_hmac = fipshmac(patched_binary)
+        fc_hmac_arcname = f'{targetpath_dn}/fipscheck/{targetpath_bn}.hmac'
+        ar.reloc_add(fc_hmac, arcname=fc_hmac_arcname)
+
+def fix_binary(ar, path):
+    # it's a pity patchelf have to patch an actual binary.
+    patched_elf = mkstemp()[1]
+    shutil.copy2(path, patched_elf)
+
+    subprocess.check_call(['patchelf',
+                           '--remove-rpath',
+                           patched_elf])
+    return patched_elf
+
+def fix_executable(ar, binpath, targetpath):
+    patched_binary = fix_binary(ar, binpath)
+    ar.reloc_add(patched_binary, arcname=targetpath)
+    os.remove(patched_binary)
+
+def fix_sharedlib(ar, binpath, targetpath):
+    patched_binary = fix_binary(ar, binpath)
+    ar.reloc_add(patched_binary, arcname=targetpath)
+    fix_hmac(ar, binpath, targetpath, patched_binary)
+    os.remove(patched_binary)
+
 ap = argparse.ArgumentParser(description='Create a relocatable scylla package.')
 ap.add_argument('dest',
                help='Destination file (tar format)')
@@ -140,15 +194,19 @@ with tempfile.TemporaryDirectory() as tmpdir:
 for exe in executables_scylla:
    basename = os.path.basename(exe)
    if not args.stripped:
-        ar.reloc_add(exe, arcname=f'libexec/{basename}')
+        fix_executable(ar, exe, f'libexec/{basename}')
    else:
-        ar.reloc_add(f'{exe}.stripped', arcname=f'libexec/{basename}')
+        fix_executable(ar, f'{exe}.stripped', f'libexec/{basename}')
 for exe in executables_distrocmd:
    basename = os.path.basename(exe)
-    ar.reloc_add(exe, arcname=f'libexec/{basename}')
+    fix_executable(ar, exe, f'libexec/{basename}')

 for lib, libfile in libs.items():
-    ar.reloc_add(libfile, arcname='libreloc/' + lib)
+    m = magic.detect_from_filename(libfile)
+    if m and (m.mime_type.startswith('application/x-sharedlib') or m.mime_type.startswith('application/x-pie-executable')):
+        fix_sharedlib(ar, libfile, f'libreloc/{lib}')
+    else:
+        ar.reloc_add(libfile, arcname=lib, recursive=False)
 if have_gnutls:
    gnutls_config_nolink = os.path.realpath('/etc/crypto-policies/back-ends/gnutls.config')
    ar.reloc_add(gnutls_config_nolink, arcname='libreloc/gnutls.config')
--- a/2
+++ b/2
--- a/service/pager/query_pagers.cc
+++ b/service/pager/query_pagers.cc
@@ -383,18 +383,14 @@ void query_pager::handle_result(
    auto view = query::result_view(*results);

    _last_pos = position_in_partition::for_partition_start();
-    uint64_t row_count;
+    uint64_t replica_row_count, row_count;
    if constexpr(!std::is_same_v<std::decay_t<Visitor>, noop_visitor>) {
        query_result_visitor<Visitor> v(std::forward<Visitor>(visitor));
        view.consume(_cmd->slice, v);

-        if (_last_pkey) {
-            update_slice(*_last_pkey);
-        }
-
        row_count = v.total_rows - v.dropped_rows;
-        _max = _max - row_count;
-        _exhausted = (v.total_rows < page_size && !results->is_short_read() && v.dropped_rows == 0) || _max == 0;
+        replica_row_count = v.total_rows;
+
        // If per partition limit is defined, we need to accumulate rows fetched for last partition key if the key matches
        if (_cmd->slice.partition_row_limit() < query::max_rows_if_set) {
            if (_last_pkey && v.last_pkey && _last_pkey->equal(*_query_schema, *v.last_pkey)) {
@@ -403,32 +399,30 @@ void query_pager::handle_result(
                _rows_fetched_for_last_partition = v.last_partition_row_count;
            }
        }
-        const auto& last_pos = results->last_position();
-        if (last_pos && !v.dropped_rows) {
-            _last_pkey = last_pos->partition;
-            _last_pos = last_pos->position;
-        } else {
-            _last_pkey = v.last_pkey;
-            if (v.last_ckey) {
-                _last_pos = position_in_partition::for_key(*v.last_ckey);
-            }
-        }
    } else {
        row_count = results->row_count() ? *results->row_count() : std::get<1>(view.count_partitions_and_rows());
-        _max = _max - row_count;
-        _exhausted = (row_count < page_size && !results->is_short_read()) || _max == 0;
+        replica_row_count = row_count;
+    }

-        if (!_exhausted) {
-            if (_last_pkey) {
-                update_slice(*_last_pkey);
-            }
+    {
+        _max = _max - row_count;
+        _exhausted = (replica_row_count < page_size && !results->is_short_read()) || _max == 0;
+
+        if (_last_pkey) {
+            update_slice(*_last_pkey);
+        }
+
+        // The last page can be truly empty -- with unset last-position and no data to calculate it based on.
+        if (!replica_row_count && !results->is_short_read()) {
+            _last_pkey = {};
+        } else {
            auto last_pos = results->get_or_calculate_last_position();
            _last_pkey = std::move(last_pos.partition);
            _last_pos = std::move(last_pos.position);
        }
    }

-    qlogger.debug("Fetched {} rows, max_remain={} {}", row_count, _max, _exhausted ? "(exh)" : "");
+    qlogger.debug("Fetched {} rows (kept {}), max_remain={} {}", replica_row_count, row_count, _max, _exhausted ? "(exh)" : "");

    if (_last_pkey) {
        qlogger.debug("Last partition key: {}", *_last_pkey);
--- a/service/storage_proxy.cc
+++ b/service/storage_proxy.cc
@@ -1093,7 +1093,6 @@ private:
            updates.emplace_back(topology_mutation_builder(guard.write_timestamp())
                                    .set_global_topology_request(global_topology_request::truncate_table)
                                    .set_global_topology_request_id(global_request_id)
-                                    .set_session(session_id(global_request_id))
                                    .build());

            updates.emplace_back(topology_request_tracking_mutation_builder(global_request_id)
--- a/service/storage_service.cc
+++ b/service/storage_service.cc
@@ -413,20 +413,17 @@ static locator::node::state to_topology_node_state(node_state ns) {
    on_internal_error(rtlogger, format("unhandled node state: {}", ns));
 }

-// Synchronizes the local node state (token_metadata, system.peers/system.local tables,
-// gossiper) to align it with the other raft topology nodes.
-future<storage_service::nodes_to_notify_after_sync> storage_service::sync_raft_topology_nodes(mutable_token_metadata_ptr tmptr, std::optional<locator::host_id> target_node, std::unordered_set<raft::server_id> prev_normal) {
-    nodes_to_notify_after_sync nodes_to_notify;
-
-    rtlogger.trace("Start sync_raft_topology_nodes target_node={}", target_node);
-
-    const auto& am =_address_map;
+future<> storage_service::raft_topology_update_ip(locator::host_id id, gms::inet_address ip, nodes_to_notify_after_sync* nodes_to_notify) {
    const auto& t = _topology_state_machine._topology;
+    raft::server_id raft_id{id.uuid()};

-    auto update_topology = [&] (locator::host_id id, const replica_state& rs) {
-        tmptr->update_topology(id, locator::endpoint_dc_rack{rs.datacenter, rs.rack},
-                               to_topology_node_state(rs.state), rs.shard_count);
-    };
+    std::vector<future<>> sys_ks_futures;
+
+    auto node = t.find(raft_id);
+
+    if (!node) {
+        co_return;
+    }

    using host_id_to_ip_map_t = std::unordered_map<locator::host_id, gms::inet_address>;
    auto get_host_id_to_ip_map = [&, map = std::optional<host_id_to_ip_map_t>{}]() mutable -> future<const host_id_to_ip_map_t*> {
@@ -445,50 +442,13 @@ future<storage_service::nodes_to_notify_after_sync> storage_service::sync_raft_t
        co_return &*map;
    };

-    std::vector<future<>> sys_ks_futures;
+    const auto& rs = node->second;

-    auto remove_ip = [&](inet_address ip, locator::host_id host_id, bool notify) -> future<> {
-        sys_ks_futures.push_back(_sys_ks.local().remove_endpoint(ip));
-
-        if (const auto ep = _gossiper.get_endpoint_state_ptr(ip); ep && ep->get_host_id() == host_id) {
-            co_await _gossiper.force_remove_endpoint(ip, gms::null_permit_id);
-            if (notify) {
-                nodes_to_notify.left.push_back({ip, host_id});
+    switch (rs.state) {
+        case node_state::normal: {
+            if (is_me(ip)) {
+                co_return;
            }
-        }
-    };
-
-    auto process_left_node = [&] (raft::server_id id) -> future<> {
-        locator::host_id host_id{id.uuid()};
-
-        if (const auto ip = am.find(host_id)) {
-            co_await remove_ip(*ip, host_id, true);
-        }
-
-        if (t.left_nodes_rs.find(id) != t.left_nodes_rs.end()) {
-            update_topology(host_id, t.left_nodes_rs.at(id));
-        }
-
-        // However if we do that, we need to also implement unbanning a node and do it if `removenode` is aborted.
-        co_await _messaging.local().ban_host(host_id);
-    };
-
-    auto process_normal_node = [&] (raft::server_id id, const replica_state& rs) -> future<> {
-        locator::host_id host_id{id.uuid()};
-        auto ip = am.find(host_id);
-
-        rtlogger.trace("loading topology: raft id={} ip={} node state={} dc={} rack={} tokens state={} tokens={} shards={}",
-                      id, ip, rs.state, rs.datacenter, rs.rack, _topology_state_machine._topology.tstate, rs.ring.value().tokens, rs.shard_count, rs.cleanup);
-        // Save tokens, not needed for raft topology management, but needed by legacy
-        // Also ip -> id mapping is needed for address map recreation on reboot
-        if (is_me(host_id)) {
-            sys_ks_futures.push_back(_sys_ks.local().update_tokens(rs.ring.value().tokens));
-            co_await _gossiper.add_local_application_state(
-                std::pair(gms::application_state::TOKENS, gms::versioned_value::tokens(rs.ring.value().tokens)),
-                std::pair(gms::application_state::CDC_GENERATION_ID, gms::versioned_value::cdc_generation_id(_topology_state_machine._topology.committed_cdc_generations.back())),
-                std::pair(gms::application_state::STATUS, gms::versioned_value::normal(rs.ring.value().tokens))
-            );
-        } else if (ip && !is_me(*ip)) {
            // In replace-with-same-ip scenario the replaced node IP will be the same
            // as ours, we shouldn't put it into system.peers.

@@ -501,7 +461,7 @@ future<storage_service::nodes_to_notify_after_sync> storage_service::sync_raft_t
            // Populate the table with the state from the gossiper here since storage_service::on_change()
            // (which is called each time gossiper state changes) may have skipped it because the tokens
            // for the node were not in the 'normal' state yet
-            auto info = get_peer_info_for_update(*ip);
+            auto info = get_peer_info_for_update(ip);
            if (info) {
                // And then amend with the info from raft
                info->tokens = rs.ring.value().tokens;
@@ -509,29 +469,97 @@ future<storage_service::nodes_to_notify_after_sync> storage_service::sync_raft_t
                info->rack = rs.rack;
                info->release_version = rs.release_version;
                info->supported_features = fmt::to_string(fmt::join(rs.supported_features, ","));
-                sys_ks_futures.push_back(_sys_ks.local().update_peer_info(*ip, host_id, *info));
-            }
-            if (!prev_normal.contains(id)) {
-                nodes_to_notify.joined.push_back(*ip);
+                sys_ks_futures.push_back(_sys_ks.local().update_peer_info(ip, id, *info));
            }

-            if (const auto it = host_id_to_ip_map.find(host_id); it != host_id_to_ip_map.end() && it->second != *ip) {
+            if (nodes_to_notify) {
+                nodes_to_notify->joined.emplace_back(ip);
+            }
+
+            if (const auto it = host_id_to_ip_map.find(id); it != host_id_to_ip_map.end() && it->second != ip) {
                utils::get_local_injector().inject("crash-before-prev-ip-removed", [] {
                    slogger.info("crash-before-prev-ip-removed hit, killing the node");
                    _exit(1);
                });
-                // IP change is not expected to emit REMOVED_NODE notifications
-                co_await remove_ip(it->second, host_id, false);
+
+                auto old_ip = it->second;
+                sys_ks_futures.push_back(_sys_ks.local().remove_endpoint(old_ip));
+
+                if (const auto ep = _gossiper.get_endpoint_state_ptr(old_ip); ep && ep->get_host_id() == id) {
+                    co_await _gossiper.force_remove_endpoint(old_ip, gms::null_permit_id);
+                }
            }
        }
+        break;
+        case node_state::bootstrapping:
+            if (!is_me(ip)) {
+                utils::get_local_injector().inject("crash-before-bootstrapping-node-added", [] {
+                    rtlogger.error("crash-before-bootstrapping-node-added hit, killing the node");
+                    _exit(1);
+                });
+
+                // Save ip -> id mapping in peers table because we need it on restart, but do not save tokens until owned
+                sys_ks_futures.push_back(_sys_ks.local().update_peer_info(ip, id, {}));
+            }
+        break;
+        default:
+        break;
+    }
+    co_await when_all_succeed(sys_ks_futures.begin(), sys_ks_futures.end()).discard_result();
+}
+
+// Synchronizes the local node state (token_metadata, system.peers/system.local tables,
+// gossiper) to align it with the other raft topology nodes.
+future<storage_service::nodes_to_notify_after_sync> storage_service::sync_raft_topology_nodes(mutable_token_metadata_ptr tmptr, std::unordered_set<raft::server_id> prev_normal) {
+    nodes_to_notify_after_sync nodes_to_notify;
+
+    rtlogger.trace("Start sync_raft_topology_nodes");
+
+    const auto& t = _topology_state_machine._topology;
+
+    auto update_topology = [&] (locator::host_id id, const replica_state& rs) {
+        tmptr->update_topology(id, locator::endpoint_dc_rack{rs.datacenter, rs.rack},
+                               to_topology_node_state(rs.state), rs.shard_count);
+    };
+
+    std::vector<future<>> sys_ks_futures;
+
+    auto process_left_node = [&] (raft::server_id id, locator::host_id host_id, std::optional<gms::inet_address> ip) -> future<> {
+        if (ip) {
+            sys_ks_futures.push_back(_sys_ks.local().remove_endpoint(*ip));
+
+            if (const auto ep = _gossiper.get_endpoint_state_ptr(*ip); ep && ep->get_host_id() == host_id) {
+                co_await _gossiper.force_remove_endpoint(*ip, gms::null_permit_id);
+                nodes_to_notify.left.push_back({*ip, host_id});
+            }
+        }
+
+        if (t.left_nodes_rs.find(id) != t.left_nodes_rs.end()) {
+            update_topology(host_id, t.left_nodes_rs.at(id));
+        }
+
+        // However if we do that, we need to also implement unbanning a node and do it if `removenode` is aborted.
+        co_await _messaging.local().ban_host(host_id);
+    };
+
+    auto process_normal_node = [&] (raft::server_id id, locator::host_id host_id, std::optional<gms::inet_address> ip, const replica_state& rs) -> future<> {
+        rtlogger.trace("loading topology: raft id={} ip={} node state={} dc={} rack={} tokens state={} tokens={} shards={}",
+                      id, ip, rs.state, rs.datacenter, rs.rack, _topology_state_machine._topology.tstate, rs.ring.value().tokens, rs.shard_count, rs.cleanup);
+        // Save tokens, not needed for raft topology management, but needed by legacy
+        // Also ip -> id mapping is needed for address map recreation on reboot
+        if (is_me(host_id)) {
+            sys_ks_futures.push_back(_sys_ks.local().update_tokens(rs.ring.value().tokens));
+            co_await _gossiper.add_local_application_state(
+                std::pair(gms::application_state::TOKENS, gms::versioned_value::tokens(rs.ring.value().tokens)),
+                std::pair(gms::application_state::CDC_GENERATION_ID, gms::versioned_value::cdc_generation_id(_topology_state_machine._topology.committed_cdc_generations.back())),
+                std::pair(gms::application_state::STATUS, gms::versioned_value::normal(rs.ring.value().tokens))
+            );
+        }
        update_topology(host_id, rs);
        co_await tmptr->update_normal_tokens(rs.ring.value().tokens, host_id);
    };

-    auto process_transition_node = [&](raft::server_id id, const replica_state& rs) -> future<> {
-        locator::host_id host_id{id.uuid()};
-        auto ip = am.find(host_id);
-
+    auto process_transition_node = [&](raft::server_id id, locator::host_id host_id, std::optional<gms::inet_address> ip, const replica_state& rs) -> future<> {
        rtlogger.trace("loading topology: raft id={} ip={} node state={} dc={} rack={} tokens state={} tokens={}",
                      id, ip, rs.state, rs.datacenter, rs.rack, _topology_state_machine._topology.tstate,
                      seastar::value_of([&] () -> sstring {
@@ -541,29 +569,16 @@ future<storage_service::nodes_to_notify_after_sync> storage_service::sync_raft_t
        switch (rs.state) {
        case node_state::bootstrapping:
            if (rs.ring.has_value()) {
-                if (ip) {
-                    if (!is_me(*ip)) {
-                        utils::get_local_injector().inject("crash-before-bootstrapping-node-added", [] {
-                            rtlogger.error("crash-before-bootstrapping-node-added hit, killing the node");
-                            _exit(1);
-                        });
-
-                        // Save ip -> id mapping in peers table because we need it on restart, but do not save tokens until owned
-                        sys_ks_futures.push_back(_sys_ks.local().update_peer_info(*ip, host_id, {}));
-                    }
-                    update_topology(host_id, rs);
-                    if (_topology_state_machine._topology.normal_nodes.empty()) {
-                        // This is the first node in the cluster. Insert the tokens as normal to the token ring early
-                        // so we can perform writes to regular 'distributed' tables during the bootstrap procedure
-                        // (such as the CDC generation write).
-                        // It doesn't break anything to set the tokens to normal early in this single-node case.
-                        co_await tmptr->update_normal_tokens(rs.ring.value().tokens, host_id);
-                    } else {
-                        tmptr->add_bootstrap_tokens(rs.ring.value().tokens, host_id);
-                        co_await update_topology_change_info(tmptr, ::format("bootstrapping node {}/{}", id, ip));
-                    }
-                } else if (_topology_state_machine._topology.tstate == topology::transition_state::write_both_read_new) {
-                    on_internal_error(rtlogger, format("Bootstrapping node {} does not have IP mapping but the topology is in the write_both_read_new state", id));
+                update_topology(host_id, rs);
+                if (_topology_state_machine._topology.normal_nodes.empty()) {
+                    // This is the first node in the cluster. Insert the tokens as normal to the token ring early
+                    // so we can perform writes to regular 'distributed' tables during the bootstrap procedure
+                    // (such as the CDC generation write).
+                    // It doesn't break anything to set the tokens to normal early in this single-node case.
+                    co_await tmptr->update_normal_tokens(rs.ring.value().tokens, host_id);
+                } else {
+                    tmptr->add_bootstrap_tokens(rs.ring.value().tokens, host_id);
+                    co_await update_topology_change_info(tmptr, ::format("bootstrapping node {}/{}", id, ip));
                }
            }
            break;
@@ -576,7 +591,7 @@ future<storage_service::nodes_to_notify_after_sync> storage_service::sync_raft_t
        case node_state::removing:
            if (_topology_state_machine._topology.tstate == topology::transition_state::rollback_to_normal) {
                // no need for double writes anymore since op failed
-                co_await process_normal_node(id, rs);
+                co_await process_normal_node(id, host_id, ip, rs);
                break;
            }
            update_topology(host_id, rs);
@@ -587,7 +602,7 @@ future<storage_service::nodes_to_notify_after_sync> storage_service::sync_raft_t
        case node_state::replacing: {
            SCYLLA_ASSERT(_topology_state_machine._topology.req_param.contains(id));
            auto replaced_id = std::get<replace_param>(_topology_state_machine._topology.req_param[id]).replaced_id;
-            auto existing_ip = am.find(locator::host_id{replaced_id.uuid()});
+            auto existing_ip = _address_map.find(locator::host_id{replaced_id.uuid()});
            const auto replaced_host_id = locator::host_id(replaced_id.uuid());
            tmptr->update_topology(replaced_host_id, std::nullopt, locator::node::state::being_replaced);
            tmptr->add_replacing_endpoint(replaced_host_id, host_id);
@@ -599,38 +614,43 @@ future<storage_service::nodes_to_notify_after_sync> storage_service::sync_raft_t
            break;
        case node_state::rebuilding:
            // Rebuilding node is normal
-            co_await process_normal_node(id, rs);
+            co_await process_normal_node(id, host_id, ip, rs);
            break;
        default:
            on_fatal_internal_error(rtlogger, ::format("Unexpected state {} for node {}", rs.state, id));
        }
    };

-    if (target_node) {
-        raft::server_id raft_id{target_node->uuid()};
-        if (t.left_nodes.contains(raft_id)) {
-            co_await process_left_node(raft_id);
-        } else if (auto it = t.normal_nodes.find(raft_id); it != t.normal_nodes.end()) {
-            co_await process_normal_node(raft_id, it->second);
-        } else if ((it = t.transition_nodes.find(raft_id)) != t.transition_nodes.end()) {
-            co_await process_transition_node(raft_id, it->second);
+    sys_ks_futures.reserve(t.left_nodes.size() + t.normal_nodes.size() + t.transition_nodes.size());
+
+    for (const auto& id: t.left_nodes) {
+        locator::host_id host_id{id.uuid()};
+        auto ip = _address_map.find(host_id);
+        co_await process_left_node(id, host_id, ip);
+        if (ip) {
+            sys_ks_futures.push_back(raft_topology_update_ip(host_id, *ip, nullptr));
        }
-    } else {
-        sys_ks_futures.reserve(t.left_nodes.size() + t.normal_nodes.size() + t.transition_nodes.size());
-        for (const auto& id: t.left_nodes) {
-            co_await process_left_node(id);
+    }
+    for (const auto& [id, rs]: t.normal_nodes) {
+        locator::host_id host_id{id.uuid()};
+        auto ip = _address_map.find(host_id);
+        co_await process_normal_node(id, host_id, ip, rs);
+        if (ip) {
+            sys_ks_futures.push_back(raft_topology_update_ip(host_id, *ip, prev_normal.contains(id) ? nullptr : &nodes_to_notify));
        }
-        for (const auto& [id, rs]: t.normal_nodes) {
-            co_await process_normal_node(id, rs);
+    }
+    for (const auto& [id, rs]: t.transition_nodes) {
+        locator::host_id host_id{id.uuid()};
+        auto ip = _address_map.find(host_id);
+        co_await process_transition_node(id, host_id, ip, rs);
+        if (ip) {
+            sys_ks_futures.push_back(raft_topology_update_ip(host_id, *ip, nullptr));
        }
-        for (const auto& [id, rs]: t.transition_nodes) {
-            co_await process_transition_node(id, rs);
-        }
-        for (auto id : t.get_excluded_nodes()) {
-            locator::node* n = tmptr->get_topology().find_node(locator::host_id(id.uuid()));
-            if (n) {
-                n->set_excluded(true);
-            }
+    }
+    for (auto id : t.get_excluded_nodes()) {
+        locator::node* n = tmptr->get_topology().find_node(locator::host_id(id.uuid()));
+        if (n) {
+            n->set_excluded(true);
        }
    }

@@ -746,6 +766,8 @@ future<> storage_service::topology_state_load(state_change_hint hint) {
                    [[fallthrough]];
                case topology::transition_state::left_token_ring:
                    [[fallthrough]];
+                case topology::transition_state::truncate_table:
+                    [[fallthrough]];
                case topology::transition_state::rollback_to_normal:
                    return read_new_t::no;
                case topology::transition_state::write_both_read_new:
@@ -754,7 +776,7 @@ future<> storage_service::topology_state_load(state_change_hint hint) {
        }, _topology_state_machine._topology.tstate);
        tmptr->set_read_new(read_new);

-        auto nodes_to_notify = co_await sync_raft_topology_nodes(tmptr, std::nullopt, std::move(prev_normal));
+        auto nodes_to_notify = co_await sync_raft_topology_nodes(tmptr, std::move(prev_normal));

        std::optional<locator::tablet_metadata> tablets;
        if (hint.tablets_hint) {
@@ -943,14 +965,12 @@ class storage_service::ip_address_updater: public gms::i_endpoint_state_change_s
            // If we call sync_raft_topology_nodes here directly, a gossiper lock and
            // the _group0.read_apply_mutex could be taken in cross-order leading to a deadlock.
            // To avoid this, we don't wait for sync_raft_topology_nodes to finish.
-            (void)futurize_invoke(ensure_alive([this, id, h = _ss._async_gate.hold()]() -> future<> {
+            (void)futurize_invoke(ensure_alive([this, id, endpoint, h = _ss._async_gate.hold()]() -> future<> {
                auto guard = co_await _ss._group0->client().hold_read_apply_mutex(_ss._abort_source);
                co_await utils::get_local_injector().inject("ip-change-raft-sync-delay", std::chrono::milliseconds(500));
-                storage_service::nodes_to_notify_after_sync nodes_to_notify;
-                auto lock = co_await _ss.get_token_metadata_lock();
-                co_await _ss.mutate_token_metadata([this, id, &nodes_to_notify](mutable_token_metadata_ptr t) -> future<> {
-                    nodes_to_notify = co_await _ss.sync_raft_topology_nodes(std::move(t), id, {});
-                }, storage_service::acquire_merge_lock::no);
+                // Set notify_join to true since here we detected address change and drivers have to be notified
+                nodes_to_notify_after_sync nodes_to_notify;
+                co_await _ss.raft_topology_update_ip(id, endpoint, &nodes_to_notify);
                co_await _ss.notify_nodes_after_sync(std::move(nodes_to_notify));
            }));
        }
--- a/service/storage_service.hh
+++ b/service/storage_service.hh
@@ -964,11 +964,12 @@ private:
        std::vector<gms::inet_address> joined;
    };

+    future<> raft_topology_update_ip(locator::host_id id, gms::inet_address ip, nodes_to_notify_after_sync* nodes_to_notify);
    // Synchronizes the local node state (token_metadata, system.peers/system.local tables,
    // gossiper) to align it with the other raft topology nodes.
    // Optional target_node can be provided to restrict the synchronization to the specified node.
    // Returns a structure that describes which notifications to trigger after token metadata is updated.
-    future<nodes_to_notify_after_sync> sync_raft_topology_nodes(mutable_token_metadata_ptr tmptr, std::optional<locator::host_id> target_node, std::unordered_set<raft::server_id> prev_normal);
+    future<nodes_to_notify_after_sync> sync_raft_topology_nodes(mutable_token_metadata_ptr tmptr, std::unordered_set<raft::server_id> prev_normal);
    // Triggers notifications (on_joined, on_left) based on the recent changes to token metadata, as described by the passed in structure.
    // This function should be called on the result of `sync_raft_topology_nodes`, after the global token metadata is updated.
    future<> notify_nodes_after_sync(nodes_to_notify_after_sync&& nodes_to_notify);
--- a/service/task_manager_module.cc
+++ b/service/task_manager_module.cc
@@ -8,6 +8,7 @@

 #include "locator/tablets.hh"
 #include "replica/database.hh"
+#include "service/migration_manager.hh"
 #include "service/storage_service.hh"
 #include "service/task_manager_module.hh"
 #include "tasks/task_handler.hh"
@@ -78,6 +79,8 @@ static bool tablet_id_provided(const locator::tablet_task_type& task_type) {
 }

 future<std::optional<tasks::virtual_task_hint>> tablet_virtual_task::contains(tasks::task_id task_id) const {
+    co_await _ss._migration_manager.local().get_group0_barrier().trigger();
+
    auto tables = get_table_ids();
    for (auto table : tables) {
        auto& tmap = _ss.get_token_metadata().tablets().get_tablet_map(table);
--- a/service/topology_coordinator.cc
+++ b/service/topology_coordinator.cc
@@ -929,130 +929,15 @@ class topology_coordinator : public endpoint_lifecycle_subscriber {
        }
        break;
        case global_topology_request::truncate_table: {
-            // Execute a barrier to make sure the nodes we are performing truncate on see the session
-            // and are able to create a topology_guard using the frozen_guard we are sending over RPC
-            // TODO: Exclude nodes which don't contain replicas of the table we are truncating
-            guard = co_await global_tablet_token_metadata_barrier(std::move(guard));
-
-            const utils::UUID& global_request_id = _topo_sm._topology.global_request_id.value();
-            std::optional<sstring> error;
-            // We should perform TRUNCATE only if the session is still valid. It could be cleared if a previous truncate
-            // handler performed the truncate and cleared the session, but crashed before finalizing the request
-            if (_topo_sm._topology.session) {
-                const auto topology_requests_entry = co_await _sys_ks.get_topology_request_entry(global_request_id, true);
-                const table_id& table_id = topology_requests_entry.truncate_table_id;
-                lw_shared_ptr<replica::table> table = _db.get_tables_metadata().get_table_if_exists(table_id);
-
-                if (table) {
-                    const sstring& ks_name = table->schema()->ks_name();
-                    const sstring& cf_name = table->schema()->cf_name();
-
-                    rtlogger.info("Performing TRUNCATE TABLE global topology request for {}.{}", ks_name, cf_name);
-
-                    // Collect the IDs of the hosts with replicas, but ignore excluded nodes
-                    std::unordered_set<locator::host_id> replica_hosts;
-                    const std::unordered_set<raft::server_id> excluded_nodes = _topo_sm._topology.get_excluded_nodes();
-                    const locator::tablet_map& tmap = get_token_metadata_ptr()->tablets().get_tablet_map(table_id);
-                    co_await tmap.for_each_tablet([&] (locator::tablet_id tid, const locator::tablet_info& tinfo) {
-                        for (const locator::tablet_replica& replica: tinfo.replicas) {
-                            if (!excluded_nodes.contains(raft::server_id(replica.host.uuid()))) {
-                                replica_hosts.insert(replica.host);
-                            }
-                        }
-                        return make_ready_future<>();
-                    });
-
-                    // Release the guard to avoid blocking group0 for long periods of time while invoking RPCs
-                    release_guard(std::move(guard));
-
-                    co_await utils::get_local_injector().inject("truncate_table_wait", [] (auto& handler) {
-                        rtlogger.info("truncate_table_wait: start");
-                        return handler.wait_for_message(db::timeout_clock::now() + std::chrono::minutes(2));
-                    });
-
-                    // Check if all the nodes with replicas are alive
-                    for (const locator::host_id& replica_host: replica_hosts) {
-                        if (!_gossiper.is_alive(replica_host)) {
-                            throw std::runtime_error(::format("Cannot perform TRUNCATE on table {}.{} because host {} is down", ks_name, cf_name, replica_host));
-                        }
-                    }
-
-                    // Send the RPC to all replicas
-                    const service::frozen_topology_guard frozen_guard { _topo_sm._topology.session };
-                    co_await coroutine::parallel_for_each(replica_hosts, [&] (const locator::host_id& host_id) -> future<> {
-                        co_await ser::storage_proxy_rpc_verbs::send_truncate_with_tablets(&_messaging, host_id, ks_name, cf_name, frozen_guard);
-                    });
-                } else {
-                    error = ::format("Table with UUID {} does not exist.", table_id);
-                }
-
-                // Clear the session and save the error message
-                while (true) {
-                    if (!guard) {
-                        guard = co_await start_operation();
-                    }
-
-                    std::vector<canonical_mutation> updates;
-                    updates.push_back(topology_mutation_builder(guard.write_timestamp())
-                                        .del_session()
-                                        .build());
-                    if (error) {
-                        updates.push_back(topology_request_tracking_mutation_builder(global_request_id)
-                                            .set("error", *error)
-                                            .build());
-                    }
-
-                    sstring reason = "Clear truncate session";
-                    topology_change change{std::move(updates)};
-                    group0_command g0_cmd = _group0.client().prepare_command(std::move(change), guard, reason);
-                    try {
-                        co_await _group0.client().add_entry(std::move(g0_cmd), std::move(guard), _as);
-                        break;
-                    } catch (group0_concurrent_modification&) {
-                        rtlogger.info("handle_global_request(): concurrent modification, retrying");
-                    }
-                }
-            }
-
-            utils::get_local_injector().inject("truncate_crash_after_session_clear", [] {
-                rtlogger.info("truncate_crash_after_session_clear hit, killing the node");
-                _exit(1);
-            });
-
-            // Execute a barrier to ensure the TRUNCATE RPC can't run on any nodes after this point
-            if (!guard) {
-                guard = co_await start_operation();
-            }
-            guard = co_await global_tablet_token_metadata_barrier(std::move(guard));
-
-            // Finalize the request
-            while (true) {
-                if (!guard) {
-                    guard = co_await start_operation();
-                }
-                std::vector<canonical_mutation> updates;
-                updates.push_back(topology_mutation_builder(guard.write_timestamp())
-                                    .del_global_topology_request()
-                                    .del_global_topology_request_id()
-                                    .build());
-                updates.push_back(topology_request_tracking_mutation_builder(global_request_id)
-                                    .set("end_time", db_clock::now())
-                                    .set("done", true)
-                                    .build());
-
-                sstring reason = "Truncate has completed";
-                topology_change change{std::move(updates)};
-                group0_command g0_cmd = _group0.client().prepare_command(std::move(change), guard, reason);
-                try {
-                    co_await _group0.client().add_entry(std::move(g0_cmd), std::move(guard), _as);
-                    break;
-                } catch (group0_concurrent_modification&) {
-                    rtlogger.info("handle_global_request(): concurrent modification, retrying");
-                }
-            }
-
-            break;
+            rtlogger.info("TRUNCATE TABLE requested");
+            std::vector<canonical_mutation> updates;
+            updates.push_back(topology_mutation_builder(guard.write_timestamp())
+                                .set_transition_state(topology::transition_state::truncate_table)
+                                .set_session(session_id(_topo_sm._topology.global_request_id.value()))
+                                .build());
+            co_await update_topology_state(std::move(guard), std::move(updates), "TRUNCATE TABLE requested");
        }
+        break;
        }
    }

@@ -1738,6 +1623,123 @@ class topology_coordinator : public endpoint_lifecycle_subscriber {
        co_await update_topology_state(std::move(guard), std::move(updates), format("Finished tablet split finalization"));
    }

+    future<> handle_truncate_table(group0_guard guard) {
+        // Execute a barrier to make sure the nodes we are performing truncate on see the session
+        // and are able to create a topology_guard using the frozen_guard we are sending over RPC
+        // TODO: Exclude nodes which don't contain replicas of the table we are truncating
+        guard = co_await global_tablet_token_metadata_barrier(std::move(guard));
+
+        const utils::UUID& global_request_id = _topo_sm._topology.global_request_id.value();
+        std::optional<sstring> error;
+        // We should perform TRUNCATE only if the session is still valid. It could be cleared if a previous truncate
+        // handler performed the truncate and cleared the session, but crashed before finalizing the request
+        if (_topo_sm._topology.session) {
+            const auto topology_requests_entry = co_await _sys_ks.get_topology_request_entry(global_request_id, true);
+            const table_id& table_id = topology_requests_entry.truncate_table_id;
+            lw_shared_ptr<replica::table> table = _db.get_tables_metadata().get_table_if_exists(table_id);
+
+            if (table) {
+                const sstring& ks_name = table->schema()->ks_name();
+                const sstring& cf_name = table->schema()->cf_name();
+
+                rtlogger.info("Performing TRUNCATE TABLE for {}.{}", ks_name, cf_name);
+
+                // Collect the IDs of the hosts with replicas, but ignore excluded nodes
+                std::unordered_set<locator::host_id> replica_hosts;
+                const std::unordered_set<raft::server_id> excluded_nodes = _topo_sm._topology.get_excluded_nodes();
+                const locator::tablet_map& tmap = get_token_metadata_ptr()->tablets().get_tablet_map(table_id);
+                co_await tmap.for_each_tablet([&] (locator::tablet_id tid, const locator::tablet_info& tinfo) {
+                    for (const locator::tablet_replica& replica: tinfo.replicas) {
+                        if (!excluded_nodes.contains(raft::server_id(replica.host.uuid()))) {
+                            replica_hosts.insert(replica.host);
+                        }
+                    }
+                    return make_ready_future<>();
+                });
+
+                // Release the guard to avoid blocking group0 for long periods of time while invoking RPCs
+                release_guard(std::move(guard));
+
+                co_await utils::get_local_injector().inject("truncate_table_wait", [] (auto& handler) {
+                    rtlogger.info("truncate_table_wait: start");
+                    return handler.wait_for_message(db::timeout_clock::now() + std::chrono::minutes(2));
+                });
+
+                // Check if all the nodes with replicas are alive
+                for (const locator::host_id& replica_host: replica_hosts) {
+                    if (!_gossiper.is_alive(replica_host)) {
+                        throw std::runtime_error(::format("Cannot perform TRUNCATE on table {}.{} because host {} is down", ks_name, cf_name, replica_host));
+                    }
+                }
+
+                // Send the RPC to all replicas
+                const service::frozen_topology_guard frozen_guard { _topo_sm._topology.session };
+                co_await coroutine::parallel_for_each(replica_hosts, [&] (const locator::host_id& host_id) -> future<> {
+                    co_await ser::storage_proxy_rpc_verbs::send_truncate_with_tablets(&_messaging, host_id, ks_name, cf_name, frozen_guard);
+                });
+            } else {
+                error = ::format("Cannot TRUNCATE table with UUID {} because it does not exist.", table_id);
+            }
+
+            // Clear the session and save the error message
+            while (true) {
+                if (!guard) {
+                    guard = co_await start_operation();
+                }
+
+                std::vector<canonical_mutation> updates;
+                updates.push_back(topology_mutation_builder(guard.write_timestamp())
+                                    .del_session()
+                                    .build());
+                if (error) {
+                    updates.push_back(topology_request_tracking_mutation_builder(global_request_id)
+                                        .set("error", *error)
+                                        .build());
+                }
+
+                try {
+                    co_await update_topology_state(std::move(guard), std::move(updates), "Clear truncate session");
+                    break;
+                } catch (group0_concurrent_modification&) {
+                }
+            }
+        }
+
+        utils::get_local_injector().inject("truncate_crash_after_session_clear", [] {
+            rtlogger.info("truncate_crash_after_session_clear hit, killing the node");
+            _exit(1);
+        });
+
+        // Execute a barrier to ensure the TRUNCATE RPC can't run on any nodes after this point
+        if (!guard) {
+            guard = co_await start_operation();
+        }
+        guard = co_await global_tablet_token_metadata_barrier(std::move(guard));
+
+        // Finalize the request
+        while (true) {
+            if (!guard) {
+                guard = co_await start_operation();
+            }
+            std::vector<canonical_mutation> updates;
+            updates.push_back(topology_mutation_builder(guard.write_timestamp())
+                                .del_transition_state()
+                                .del_global_topology_request()
+                                .del_global_topology_request_id()
+                                .build());
+            updates.push_back(topology_request_tracking_mutation_builder(global_request_id)
+                                .set("end_time", db_clock::now())
+                                .set("done", true)
+                                .build());
+
+            try {
+                co_await update_topology_state(std::move(guard), std::move(updates), "Truncate has completed");
+                break;
+            } catch (group0_concurrent_modification&) {
+            }
+        }
+    }
+
    // This function must not release and reacquire the guard, callers rely
    // on the fact that the block which calls this is atomic.
    // FIXME: Don't take the ownership of the guard to make the above guarantee explicit.
@@ -2462,6 +2464,9 @@ class topology_coordinator : public endpoint_lifecycle_subscriber {
                co_await update_topology_state(std::move(node.guard), {builder.build(), rtbuilder.build()}, str);
            }
                break;
+            case topology::transition_state::truncate_table:
+                co_await handle_truncate_table(std::move(guard));
+                break;
        }
        co_return true;
    };
--- a/service/topology_state_machine.cc
+++ b/service/topology_state_machine.cc
@@ -152,6 +152,7 @@ static std::unordered_map<topology::transition_state, sstring> transition_state_
    {topology::transition_state::tablet_draining, "tablet draining"},
    {topology::transition_state::left_token_ring, "left token ring"},
    {topology::transition_state::rollback_to_normal, "rollback to normal"},
+    {topology::transition_state::truncate_table, "truncate table"},
 };

 // Allows old deprecated names to be recognized and point to the correct transition.
--- a/service/topology_state_machine.hh
+++ b/service/topology_state_machine.hh
@@ -119,6 +119,7 @@ struct topology {
        tablet_resize_finalization,
        left_token_ring,
        rollback_to_normal,
+        truncate_table,
    };

    std::optional<transition_state> tstate;
--- a/tasks/task_handler.cc
+++ b/tasks/task_handler.cc
@@ -85,9 +85,6 @@ future<status_helper> task_handler::get_status_helper() {
            [id = _id] (task_manager::task_variant task_v, tasks::virtual_task_hint hint) -> future<status_helper> {
        return std::visit(overloaded_functor{
            [] (task_manager::task_ptr task) -> future<status_helper> {
-                if (task->is_complete()) {
-                    task->unregister_task();
-                }
                co_return status_helper{
                    .status = co_await get_task_status(task),
                    .task = task
--- a/test.py
+++ b/test.py
@@ -390,6 +390,7 @@ class UnitTestSuite(TestSuite):
        super().__init__(path, cfg, options, mode)
        # Map of custom test command line arguments, if configured
        self.custom_args = cfg.get("custom_args", {})
+        self.extra_cmdline_options = cfg.get("extra_scylla_cmdline_options", [])
        # Map of tests that cannot run with compaction groups
        self.all_can_run_compaction_groups_except = cfg.get("all_can_run_compaction_groups_except")

@@ -401,6 +402,10 @@ class UnitTestSuite(TestSuite):
        test = UnitTest(self.next_id((shortname, self.suite_key)), shortname, suite, args)
        self.tests.append(test)

+    def prepare_arg(self, arg):
+        extra_cmdline_options = ' '.join(self.extra_cmdline_options)
+        return f'{arg} {extra_cmdline_options}'
+
    async def add_test(self, shortname, casename) -> None:
        """Create a UnitTest class with possibly custom command line
        arguments and add it to the list of tests"""
@@ -413,7 +418,7 @@ class UnitTestSuite(TestSuite):
        args = self.custom_args.get(shortname, ["-c2 -m2G"])
        args = merge_cmdline_options(args, self.options.extra_scylla_cmdline_options)
        for a in args:
-            await self.create_test(shortname, casename, self, a)
+            await self.create_test(shortname, casename, self, self.prepare_arg(a))

    @property
    def pattern(self) -> str:
@@ -531,13 +536,12 @@ class BoostTestSuite(UnitTestSuite):
        # Skip tests which are not configured, and hence are not built
        if os.path.join("test", self.name, execname if combined_test else shortname) not in self.options.tests:
                return
-
        # Default seastar arguments, if not provided in custom test options,
        # are two cores and 2G of RAM
        args = self.custom_args.get(shortname, ["-c2 -m2G"])
        args = merge_cmdline_options(args, self.options.extra_scylla_cmdline_options)
        for a in args:
-            await self.create_test(shortname, casename, self, a)
+            await self.create_test(shortname, casename, self, self.prepare_arg(a))

    def junit_tests(self) -> Iterable['Test']:
        """Boost tests produce an own XML output, so are not included in a junit report"""
--- a/test/boost/encrypted_file_test.cc
+++ b/test/boost/encrypted_file_test.cc
@@ -15,12 +15,14 @@
 #include <seastar/core/seastar.hh>
 #include <seastar/core/shared_ptr.hh>
 #include <seastar/core/thread.hh>
+#include <seastar/core/fstream.hh>

 #include <seastar/testing/test_case.hh>

 #include "ent/encryption/encryption.hh"
 #include "ent/encryption/symmetric_key.hh"
 #include "ent/encryption/encrypted_file_impl.hh"
+#include "test/lib/log.hh"
 #include "test/lib/tmpdir.hh"
 #include "test/lib/random_utils.hh"
 #include "test/lib/exception_utils.hh"
@@ -181,6 +183,66 @@ SEASTAR_TEST_CASE(test_short) {
    }
 }

+SEASTAR_TEST_CASE(test_read_across_size_boundary) {
+    auto name = "test_read_across_size_boundary";
+
+    auto [dst, k] = co_await make_file(name, open_flags::rw|open_flags::create);
+    auto size = dst.disk_write_dma_alignment() - 1;
+    co_await dst.truncate(size);
+    co_await dst.close();
+
+    auto [f, _] = co_await make_file(name, open_flags::ro, k);
+    auto a = f.disk_write_dma_alignment();
+    auto m = f.memory_dma_alignment();
+
+    auto buf = temporary_buffer<char>::aligned(m, a);
+    auto n = co_await f.dma_read(0, buf.get_write(), buf.size());
+
+    auto buf2 = temporary_buffer<char>::aligned(m, a);
+    auto n2 = co_await f.dma_read(a, buf2.get_write(), buf2.size());
+
+    auto buf3 = temporary_buffer<char>::aligned(m, a);
+    std::vector<iovec> iov({{buf3.get_write(), buf3.size()}});
+    auto n3 = co_await f.dma_read(a, std::move(iov));
+
+    auto buf4 = co_await f.dma_read_bulk<char>(a, size_t(a));
+
+    co_await f.close();
+
+    BOOST_REQUIRE_EQUAL(size, n);
+    buf.trim(n);
+    for (auto c : buf) {
+        BOOST_REQUIRE_EQUAL(c, 0);
+    }
+
+    BOOST_REQUIRE_EQUAL(0, n2);
+    BOOST_REQUIRE_EQUAL(0, n3);
+    BOOST_REQUIRE_EQUAL(0, buf4.size());
+}
+
+static future<> test_read_across_size_boundary_unaligned_helper(int64_t size_off, int64_t read_off) {
+    auto name = "test_read_across_size_boundary_unaligned";
+    auto [dst, k] = co_await make_file(name, open_flags::rw|open_flags::create);
+    auto size = dst.disk_write_dma_alignment() + size_off;
+    co_await dst.truncate(size);
+    co_await dst.close();
+
+    auto [f, k2] = co_await make_file(name, open_flags::ro, k);
+    auto buf = co_await f.dma_read_bulk<char>(f.disk_write_dma_alignment() + read_off, size_t(f.disk_write_dma_alignment()));
+
+    co_await f.close();
+
+    BOOST_REQUIRE_EQUAL(0, buf.size());
+}
+
+SEASTAR_TEST_CASE(test_read_across_size_boundary_unaligned) {
+    co_await test_read_across_size_boundary_unaligned_helper(-1, 1);
+}
+
+SEASTAR_TEST_CASE(test_read_across_size_boundary_unaligned2) {
+    co_await test_read_across_size_boundary_unaligned_helper(-2, -1);
+}
+
 SEASTAR_TEST_CASE(test_truncating_empty) {
    auto name = "test_truncating_empty";
    auto t = co_await make_file(name, open_flags::rw|open_flags::create);
@@ -263,3 +325,60 @@ SEASTAR_TEST_CASE(test_truncating_extend) {
    co_await f.close();
 }

+// Reproducer for https://github.com/scylladb/scylladb/issues/22236
+SEASTAR_TEST_CASE(test_read_from_padding) {
+    key_info kinfo {"AES/CBC/PKCSPadding", 128};
+    shared_ptr<symmetric_key> k = make_shared<symmetric_key>(kinfo);
+    testlog.info("Created symmetric key: info={} key={} ", k->info(), k->key());
+
+    size_t block_size;
+    size_t buf_size;
+
+    constexpr auto& filename = "encrypted_file";
+    const auto& filepath = dir.path() / filename;
+
+    testlog.info("Creating encrypted file {}", filepath.string());
+    {
+        auto [file, _] = co_await make_file(filename, open_flags::create | open_flags::wo, k);
+        auto ostream = co_await make_file_output_stream(file);
+
+        block_size = file.disk_write_dma_alignment();
+        buf_size = block_size - 1;
+
+        auto wbuf = seastar::temporary_buffer<char>::aligned(file.memory_dma_alignment(), buf_size);
+        co_await ostream.write(wbuf.get(), wbuf.size());
+        testlog.info("Wrote {} bytes to encrypted file {}", wbuf.size(), filepath.string());
+
+        co_await ostream.close();
+        testlog.info("Length of {}: {} bytes", filename, co_await file.size());
+    }
+
+    testlog.info("Testing DMA reads from padding area of file {}", filepath.string());
+    {
+        auto [file, _] = co_await make_file(filename, open_flags::ro, k);
+
+        // Triggering the bug requires reading from the padding area:
+        // `buf_size < read_pos < file.size()`
+        //
+        // For `dma_read()`, we have the additional requirement that `read_pos` must be aligned.
+        // For `dma_read_bulk()`, it doesn't have to.
+        uint64_t read_pos = block_size;
+        size_t read_len = block_size;
+        auto rbuf = seastar::temporary_buffer<char>::aligned(file.memory_dma_alignment(), read_len);
+        std::vector<iovec> iov {{static_cast<void*>(rbuf.get_write()), rbuf.size()}};
+
+        auto res = co_await file.dma_read_bulk<char>(read_pos, read_len);
+        BOOST_CHECK_MESSAGE(res.size() == 0, seastar::format(
+                "Bulk DMA read on pos {}, len {}: returned {} bytes instead of zero", read_pos, read_len, res.size()));
+
+        auto res_len = co_await file.dma_read(read_pos, iov);
+        BOOST_CHECK_MESSAGE(res_len == 0, seastar::format(
+                "IOV DMA read on pos {}, len {}: returned {} bytes instead of zero", read_pos, read_len, res_len));
+
+        res_len = co_await file.dma_read<char>(read_pos, rbuf.get_write(), read_len);
+        BOOST_CHECK_MESSAGE(res_len == 0, seastar::format(
+                "DMA read on pos {}, len {}: returned {} bytes instead of zero", read_pos, read_len, res_len));
+
+        co_await file.close();
+    }
+}
--- a/test/boost/querier_cache_test.cc
+++ b/test/boost/querier_cache_test.cc
@@ -110,10 +110,10 @@ private:
    }

    template <typename Querier>
-    Querier make_querier(const dht::partition_range& range) {
+    Querier make_querier(const dht::partition_range& range, db::timeout_clock::time_point timeout) {
        return Querier(_mutation_source,
            _s.schema(),
-            _sem.make_tracking_only_permit(_s.schema(), "make-querier", db::no_timeout, {}),
+            _sem.make_tracking_only_permit(_s.schema(), "make-querier", timeout, {}),
            range,
            _s.schema()->full_slice(),
            nullptr);
@@ -218,10 +218,10 @@ public:

    template <typename Querier>
    entry_info produce_first_page_and_save_querier(void(query::querier_cache::*insert_mem_ptr)(query_id, Querier&&, tracing::trace_state_ptr), unsigned key,
-            const dht::partition_range& range, const query::partition_slice& slice, uint64_t row_limit) {
+            const dht::partition_range& range, const query::partition_slice& slice, uint64_t row_limit, db::timeout_clock::time_point timeout = db::no_timeout) {
        const auto cache_key = make_cache_key(key);

-        auto querier = make_querier<Querier>(range);
+        auto querier = make_querier<Querier>(range, timeout);
        auto dk_ck = querier.consume_page(dummy_result_builder{}, row_limit, std::numeric_limits<uint32_t>::max(), gc_clock::now()).get();
        auto&& dk = dk_ck.first;
        auto&& ck = dk_ck.second;
@@ -290,27 +290,29 @@ public:
    }

    entry_info produce_first_page_and_save_mutation_querier(unsigned key, const dht::partition_range& range,
-            const query::partition_slice& slice, uint64_t row_limit = 5) {
-        return produce_first_page_and_save_querier<query::querier>(&query::querier_cache::insert_mutation_querier, key, range, slice, row_limit);
+            const query::partition_slice& slice, uint64_t row_limit = 5, db::timeout_clock::time_point timeout = db::no_timeout) {
+        return produce_first_page_and_save_querier<query::querier>(&query::querier_cache::insert_mutation_querier, key, range, slice, row_limit, timeout);
    }

-    entry_info produce_first_page_and_save_mutation_querier(unsigned key, const dht::partition_range& range, uint64_t row_limit = 5) {
-        return produce_first_page_and_save_mutation_querier(key, range, make_default_slice(), row_limit);
+    entry_info produce_first_page_and_save_mutation_querier(unsigned key, const dht::partition_range& range, uint64_t row_limit = 5,
+            db::timeout_clock::time_point timeout = db::no_timeout) {
+        return produce_first_page_and_save_mutation_querier(key, range, make_default_slice(), row_limit, timeout);
    }

    // Singular overload
-    entry_info produce_first_page_and_save_mutation_querier(unsigned key, std::size_t i, uint64_t row_limit = 5) {
+    entry_info produce_first_page_and_save_mutation_querier(unsigned key, std::size_t i, uint64_t row_limit = 5,
+            db::timeout_clock::time_point timeout = db::no_timeout) {
        return produce_first_page_and_save_mutation_querier(key, make_singular_partition_range(i), _s.schema()->full_slice(), row_limit);
    }

    // Use the whole range
-    entry_info produce_first_page_and_save_mutation_querier(unsigned key) {
-        return produce_first_page_and_save_mutation_querier(key, make_default_partition_range(), _s.schema()->full_slice());
+    entry_info produce_first_page_and_save_mutation_querier(unsigned key, db::timeout_clock::time_point timeout = db::no_timeout) {
+        return produce_first_page_and_save_mutation_querier(key, make_default_partition_range(), _s.schema()->full_slice(), 5, timeout);
    }

    // For tests testing just one insert-lookup.
-    entry_info produce_first_page_and_save_mutation_querier() {
-        return produce_first_page_and_save_mutation_querier(1);
+    entry_info produce_first_page_and_save_mutation_querier(db::timeout_clock::time_point timeout = db::no_timeout) {
+        return produce_first_page_and_save_mutation_querier(1, timeout);
    }

    test_querier_cache& assert_cache_lookup_data_querier(unsigned lookup_key,
@@ -337,9 +339,10 @@ public:
    test_querier_cache& assert_cache_lookup_mutation_querier(unsigned lookup_key,
            const schema& lookup_schema,
            const dht::partition_range& lookup_range,
-            const query::partition_slice& lookup_slice) {
+            const query::partition_slice& lookup_slice,
+            db::timeout_clock::time_point timeout = db::no_timeout) {

-        auto querier_opt = _cache.lookup_mutation_querier(make_cache_key(lookup_key), lookup_schema, lookup_range, lookup_slice, get_semaphore(), nullptr, db::no_timeout);
+        auto querier_opt = _cache.lookup_mutation_querier(make_cache_key(lookup_key), lookup_schema, lookup_range, lookup_slice, get_semaphore(), nullptr, timeout);
        if (querier_opt) {
            querier_opt->close().get();
        }
@@ -840,4 +843,69 @@ SEASTAR_THREAD_TEST_CASE(test_semaphore_mismatch) {
    }
 }

+#if SEASTAR_DEBUG
+static const std::chrono::seconds ttl_timeout_test_timeout = 4s;
+#else
+static const std::chrono::seconds ttl_timeout_test_timeout = 1s;
+#endif
+
+SEASTAR_THREAD_TEST_CASE(test_timeout_not_sticky_on_insert) {
+    test_querier_cache t;
+
+    const auto entry = t.produce_first_page_and_save_mutation_querier(db::timeout_clock::now() + ttl_timeout_test_timeout);
+
+    sleep(ttl_timeout_test_timeout * 2).get();
+
+    t.assert_cache_lookup_mutation_querier(entry.key, *t.get_schema(), entry.expected_range, entry.expected_slice)
+        .no_misses()
+        .no_drops()
+        .no_evictions();
+}
+
+SEASTAR_THREAD_TEST_CASE(test_ttl_not_sticky_on_lookup) {
+    test_querier_cache t(ttl_timeout_test_timeout);
+
+    auto& sem = t.get_semaphore();
+    auto permit1 = sem.obtain_permit(t.get_schema(), get_name(), 1024, db::no_timeout, {}).get();
+
+    const auto entry = t.produce_first_page_and_save_mutation_querier();
+
+    const auto new_timeout = db::timeout_clock::now() + 900s;
+
+    t.assert_cache_lookup_mutation_querier(entry.key, *t.get_schema(), entry.expected_range, entry.expected_slice, new_timeout)
+        .no_misses()
+        .no_drops()
+        .no_evictions();
+
+    BOOST_REQUIRE(entry.permit.timeout() == new_timeout);
+
+    sleep(ttl_timeout_test_timeout * 2).get();
+
+    // check_abort() will throw if the permit timed out due to sticky TTL during the above sleep.
+    BOOST_REQUIRE_NO_THROW(entry.permit.check_abort());
+}
+
+SEASTAR_THREAD_TEST_CASE(test_timeout_is_applied_on_lookup) {
+    test_querier_cache t;
+
+    auto& sem = t.get_semaphore();
+    auto permit1 = sem.obtain_permit(t.get_schema(), get_name(), 1024, db::no_timeout, {}).get();
+
+    const auto entry = t.produce_first_page_and_save_mutation_querier();
+
+    const auto new_timeout = db::timeout_clock::now() + ttl_timeout_test_timeout;
+
+    t.assert_cache_lookup_mutation_querier(entry.key, *t.get_schema(), entry.expected_range, entry.expected_slice, new_timeout)
+        .no_misses()
+        .no_drops()
+        .no_evictions();
+
+    BOOST_REQUIRE(entry.permit.timeout() == new_timeout);
+    BOOST_REQUIRE_NO_THROW(entry.permit.check_abort());
+
+    sleep(ttl_timeout_test_timeout * 2).get();
+
+    BOOST_REQUIRE_THROW(entry.permit.check_abort(), seastar::named_semaphore_timed_out);
+}
+
 BOOST_AUTO_TEST_SUITE_END()
--- a/test/boost/reader_concurrency_semaphore_test.cc
+++ b/test/boost/reader_concurrency_semaphore_test.cc
@@ -2259,4 +2259,57 @@ SEASTAR_THREAD_TEST_CASE(test_reader_concurrency_semaphore_live_update_cpu_concu
    require_can_admit(true, "!need_cpu");
 }

+/// Check that permits are cleaned up properly if they step on queue overload.
+SEASTAR_THREAD_TEST_CASE(test_reader_concurrency_semaphore_wait_queue_overload_cleanup) {
+    simple_schema s;
+    const auto schema = s.schema();
+
+    const std::string test_name = get_name();
+
+    reader_concurrency_semaphore semaphore(
+            utils::updateable_value<int>(1),
+            1024,
+            test_name + " semaphore",
+            1,
+            utils::updateable_value<uint32_t>(2),
+            utils::updateable_value<uint32_t>(4),
+            utils::updateable_value<uint32_t>(1),
+            reader_concurrency_semaphore::register_metrics::no);
+    auto stop_sem = deferred_stop(semaphore);
+
+    reader_permit_opt permit1 = semaphore.obtain_permit(schema, test_name, 1024, db::no_timeout, {}).get();
+    BOOST_REQUIRE_EQUAL(semaphore.get_stats().reads_admitted, 1);
+    BOOST_REQUIRE_EQUAL(semaphore.get_stats().reads_admitted_immediately, 1);
+    BOOST_REQUIRE_EQUAL(semaphore.get_stats().waiters, 0);
+    BOOST_REQUIRE_EQUAL(semaphore.get_stats().total_reads_shed_due_to_overload, 0);
+    BOOST_REQUIRE_EQUAL(semaphore.get_stats().current_permits, 1);
+
+    auto permit2_fut = semaphore.obtain_permit(schema, test_name, 1024, db::no_timeout, {});
+    BOOST_REQUIRE_EQUAL(semaphore.get_stats().reads_admitted, 1);
+    BOOST_REQUIRE_EQUAL(semaphore.get_stats().reads_admitted_immediately, 1);
+    BOOST_REQUIRE_EQUAL(semaphore.get_stats().waiters, 1);
+    BOOST_REQUIRE_EQUAL(semaphore.get_stats().total_reads_shed_due_to_overload, 0);
+    BOOST_REQUIRE_EQUAL(semaphore.get_stats().current_permits, 2);
+
+    {
+        reader_permit_opt permit_holder;
+        auto permit3_fut = semaphore.with_permit(schema, test_name.c_str(), 1024, db::no_timeout, {}, permit_holder, [] (reader_permit) {
+            BOOST_FAIL("unexpected call to with permit lambda");
+            return make_ready_future<>();
+        });
+        BOOST_REQUIRE(permit3_fut.failed());
+        BOOST_CHECK_THROW(permit3_fut.get(), std::runtime_error);
+    }
+    BOOST_REQUIRE_EQUAL(semaphore.get_stats().reads_admitted, 1);
+    BOOST_REQUIRE_EQUAL(semaphore.get_stats().reads_admitted_immediately, 1);
+    BOOST_REQUIRE_EQUAL(semaphore.get_stats().waiters, 1);
+    BOOST_REQUIRE_EQUAL(semaphore.get_stats().total_reads_shed_due_to_overload, 1);
+    // This is the critical check in this test: we check that the permit3 was
+    // destroyed and it has not become a zombie permit due to incomplete cleanup.
+    BOOST_REQUIRE_EQUAL(semaphore.get_stats().current_permits, 2);
+
+    permit1 = {};
+    permit2_fut.get();
+}
+
 BOOST_AUTO_TEST_SUITE_END()
--- a/test/boost/suite.yaml
+++ b/test/boost/suite.yaml
@@ -1,4 +1,6 @@
 type: boost
+extra_scylla_cmdline_options:
+    - '--reactor-backend linux-aio'
 # A list of long tests, which should be started early
 run_first:
    - index_with_paging_test
--- a/test/cqlpy/test_tombstone_limit.py
+++ b/test/cqlpy/test_tombstone_limit.py
@@ -357,3 +357,60 @@ def test_unpaged_query(cql, table, lowered_tombstone_limit, driver_bug_1):
    statement = SimpleStatement(f"SELECT * FROM {table} WHERE pk = {pk}", fetch_size=None)
    rows = list(cql.execute(statement))
    assert len(rows) == 4
+
+
+def test_filtering_query_tombstone_suffix_last_position(cql, test_keyspace, lowered_tombstone_limit):
+    """
+    Check that when filtering drops rows in a short page due to tombstone suffix,
+    the tombstone-suffix is not re-requested on the next page.
+    """
+    with new_test_table(cql, test_keyspace, 'pk int, ck int, v int, PRIMARY KEY (pk, ck)') as table:
+        insert_row_id = cql.prepare(f"INSERT INTO {table} (pk, ck, v) VALUES (?, ?, ?)")
+        delete_row_id = cql.prepare(f"DELETE FROM {table} WHERE pk = ? AND ck = ?")
+
+        pk = 0
+
+        page1 = []
+        for ck in range(0, 10):
+            row = (pk, ck, ck % 2)
+            cql.execute(insert_row_id, row)
+            if row[2] == 0:
+                page1.append(row)
+
+        for ck in range(10, 25):
+            cql.execute(delete_row_id, (pk, ck))
+
+        page2 = []
+        for ck in range(25, 30):
+            row = (pk, ck, ck % 2)
+            cql.execute(insert_row_id, row)
+            if row[2] == 0:
+                page2.append(row)
+
+        statement = SimpleStatement(f"SELECT * FROM {table} WHERE pk = {pk} AND v = 0 ALLOW FILTERING", fetch_size=20)
+
+        res = cql.execute(statement, trace=True)
+
+        def to_list(current_rows):
+            return list(map(lambda r: tuple(r._asdict().values()), current_rows))
+
+        assert to_list(res.current_rows) == page1
+        assert res.has_more_pages
+
+        res.fetch_next_page()
+
+        assert to_list(res.current_rows)== page2
+        assert not res.has_more_pages
+
+        tracing = res.get_all_query_traces(max_wait_sec_per=900)
+
+        assert len(tracing) == 2
+
+        found_reuse = False
+        found_drop = False
+        for event in tracing[1].events:
+            found_reuse = found_reuse or "Reusing querier" == event.description
+            found_drop = found_drop or "Dropping querier because" in event.description
+
+        assert found_reuse
+        assert not found_drop
--- a/test/nodetool/test_tasks.py
+++ b/test/nodetool/test_tasks.py
@@ -29,6 +29,15 @@ def test_abort_failure(nodetool, scylla_only):
            {"expected_requests": []},
            ["required parameter is missing"])

+def test_drain(nodetool, scylla_only):
+    nodetool("tasks", "drain", expected_requests=[
+        expected_request("GET", "/task_manager/list_modules", response=["repair", "compaction"]),
+        expected_request("POST", "/task_manager/drain/repair"),
+        expected_request("POST", "/task_manager/drain/compaction")])
+
+    nodetool("tasks", "drain", "--module", "repair", expected_requests=[
+        expected_request("POST", "/task_manager/drain/repair")])
+
 def test_user_ttl(nodetool, scylla_only):
    nodetool("tasks", "user-ttl", expected_requests=[
        expected_request("GET", "/task_manager/user_ttl")])
--- a/test/raft/suite.yaml
+++ b/test/raft/suite.yaml
@@ -1 +1,3 @@
 type: boost
+extra_scylla_cmdline_options:
+  - '--reactor-backend linux-aio'
--- a/test/rest_api/task_manager_utils.py
+++ b/test/rest_api/task_manager_utils.py
@@ -70,9 +70,11 @@ def check_child_parent_relationship(rest_api, status_tree, parent, allow_no_chil

 def drain_module_tasks(rest_api, module_name):
    tasks = [task for task in list_tasks(rest_api, module_name, True)]
+    # Wait for all tasks.
    for task in tasks:
-        # Wait for task and unregister it.
        resp = rest_api.send("GET", f"task_manager/wait_task/{task['task_id']}")
-        resp = rest_api.send("GET", f"task_manager/task_status/{task['task_id']}")
        # The task may be already unregistered.
        assert resp.status_code == requests.codes.ok or resp.status_code == requests.codes.bad_request, "Invalid status code"
+
+    resp = rest_api.send("POST", f"task_manager/drain/{module_name}")
+    resp.raise_for_status()
--- a/test/rest_api/test_task_manager.py
+++ b/test/rest_api/test_task_manager.py
@@ -56,7 +56,6 @@ def test_task_manager_status_done(rest_api):

                status = get_task_status(rest_api, task0)
                check_status_correctness(status, { "id": task0, "state": "done", "sequence_number": 1, "keyspace": "keyspace0", "table": "table0" })
-                assert_task_does_not_exist(rest_api, task0)

 def test_task_manager_status_failed(rest_api):
    with new_test_module(rest_api):
@@ -70,7 +69,6 @@ def test_task_manager_status_failed(rest_api):

                status = get_task_status(rest_api, task0)
                check_status_correctness(status, { "id": task0, "state": "failed", "error": "Test task failed", "sequence_number": 1, "keyspace": "keyspace0", "table": "table0" })
-                assert_task_does_not_exist(rest_api, task0)

 def test_task_manager_not_abortable(rest_api):
    with new_test_module(rest_api):
--- a/test/topology_custom/test_mv_building.py
+++ b/test/topology_custom/test_mv_building.py
@@ -5,9 +5,13 @@
 #
 import asyncio
 import pytest
+import logging
 from test.pylib.manager_client import ManagerClient
+from test.pylib.tablets import get_tablet_replica
+from test.pylib.util import unique_name, wait_for_view
+
+logger = logging.getLogger(__name__)

-from test.pylib.util import wait_for_view

 # This test makes sure that view building is done mainly in the streaming scheduling group
 # and not the gossip scheduling group. We do that by measuring the time each group was
@@ -54,3 +58,59 @@ async def test_start_scylla_with_view_building_disabled(manager: ManagerClient):
    log = await manager.server_open_log(server.server_id)
    res = await log.grep(r"ERROR.*\[shard [0-9]+:[a-z]+\]")
    assert len(res) == 0
+
+# Build multiple views of one base table, and while view building is running move
+# some of the base tablets to another node. Verify the view build is completed.
+# More specifically, we move all tablets except the first one to reproduce issue #21829.
+# The issue happens when we start building a view at a token F and then all partitions
+# with tokens >=F are moved, and it causes the view builder to enter an infinite loop
+# building the same token ranges repeatedly because it doesn't reach F.
+@pytest.mark.asyncio
+async def test_view_building_with_tablet_move(manager: ManagerClient, build_mode: str):
+    servers = [await manager.server_add()]
+
+    await manager.api.disable_tablet_balancing(servers[0].ip_addr)
+
+    ks = unique_name()
+    table = 'test'
+
+    view_count = 4
+    views = [f"{table}_view_{i}" for i in range(view_count)]
+
+    cql = manager.get_cql()
+    await cql.run_async(f"CREATE KEYSPACE {ks} WITH replication = {{'class': 'NetworkTopologyStrategy', 'replication_factor': 1}} AND tablets = {{'initial': 4}}")
+    await cql.run_async(f"CREATE TABLE {ks}.{table} (pk int PRIMARY KEY, c int)")
+
+    # prefill the base table with enough rows so that view building takes some time
+    # and runs during the tablet move
+    keys = 200000 if build_mode != 'debug' else 10000
+    batch_size = 50
+    for k in range(0, keys, batch_size):
+        inserts = [f"INSERT INTO {ks}.{table}(pk, c) VALUES ({i}, {i})" for i in range(k, k+batch_size)]
+        batch = "BEGIN UNLOGGED BATCH\n" + "\n".join(inserts) + "\nAPPLY BATCH\n"
+        await manager.cql.run_async(batch)
+
+    logger.info("Adding new server")
+    servers.append(await manager.server_add())
+
+    # create some views so they are built together but starting at different tokens
+    for view in views:
+        await cql.run_async(f"CREATE MATERIALIZED VIEW {ks}.{view} AS SELECT * FROM {ks}.{table} WHERE c IS NOT NULL AND pk IS NOT NULL PRIMARY KEY (c, pk)")
+        await asyncio.sleep(1)
+
+    s0_host_id = await manager.get_host_id(servers[0].server_id)
+    s1_host_id = await manager.get_host_id(servers[1].server_id)
+    dst_shard = 0
+
+    # move all tablets except the first one (with lowest token range) to the other node.
+    table_id = await manager.get_table_id(ks, table)
+    rows = await manager.cql.run_async(f"SELECT last_token FROM system.tablets where table_id = {table_id}")
+    move_tablets_tasks = []
+    for r in rows[1:]:
+        tablet_token = r.last_token
+        replica = await get_tablet_replica(manager, servers[0], ks, table, tablet_token)
+        move_tablets_tasks.append(asyncio.create_task(manager.api.move_tablet(servers[0].ip_addr, ks, table, replica[0], replica[1], s1_host_id, dst_shard, tablet_token)))
+    await asyncio.gather(*move_tablets_tasks)
+
+    for view in views:
+        await wait_for_view(cql, view, len(servers))
--- a/test/topology_custom/test_repair.py
+++ b/test/topology_custom/test_repair.py
@@ -259,3 +259,19 @@ async def test_repair_abort(manager):
    await manager.api.client.get_json(f"/task_manager/wait_task/{id}", host=servers[0].ip_addr)
    statuses = await manager.api.client.get_json(f"/task_manager/task_status_recursive/{id}", host=servers[0].ip_addr)
    assert all([status["state"] == "failed" for status in statuses])
+
+@pytest.mark.asyncio
+@skip_mode('release', 'error injections are not supported in release mode')
+async def test_keyspace_drop_during_data_sync_repair(manager):
+    cfg = {
+        'enable_tablets': False,
+        'error_injections_at_startup': ['get_keyspace_erms_throw_no_such_keyspace']
+    }
+    await manager.server_add(config=cfg)
+
+    cql = manager.get_cql()
+
+    cql.execute("CREATE KEYSPACE ks WITH replication = {'class': 'NetworkTopologyStrategy', 'replication_factor': 2}")
+    cql.execute("CREATE TABLE ks.tbl (pk int, ck int, PRIMARY KEY (pk, ck)) WITH tombstone_gc = {'mode': 'repair'}")
+
+    await manager.server_add(config=cfg)
--- a/test/topology_custom/test_tablets.py
+++ b/test/topology_custom/test_tablets.py
@@ -754,3 +754,54 @@ async def test_replace_with_no_normal_token_owners_in_dc(manager: ManagerClient,
    assert len(rows) == len(keys)
    for r in rows:
        assert r.c == r.pk
+
+@pytest.mark.asyncio
+@skip_mode('release', 'error injections are not supported in release mode')
+async def test_drop_keyspace_while_split(manager: ManagerClient):
+
+    # Reproducer for: https://github.com/scylladb/scylladb/issues/22431
+    # This tests if the split ready compaction groups are correctly created
+    # on a shard with several storage groups for the same table
+
+    logger.info("Bootstrapping cluster")
+    cmdline = [ '--target-tablet-size-in-bytes', '8192',
+                '--smp', '2' ]
+    config = { 'error_injections_at_startup': ['short_tablet_stats_refresh_interval'] }
+    servers = [await manager.server_add(config=config, cmdline=cmdline)]
+
+    s0_log = await manager.server_open_log(servers[0].server_id)
+
+    cql = manager.get_cql()
+    await wait_for_cql_and_get_hosts(cql, [servers[0]], time.time() + 60)
+
+    await manager.api.disable_tablet_balancing(servers[0].ip_addr)
+
+    # create a table so that it has at least 2 tablets (and storage groups) per shard
+    await cql.run_async("CREATE KEYSPACE test WITH replication = {'class': 'NetworkTopologyStrategy', 'replication_factor': 1} AND tablets = {'initial': 4};")
+    await cql.run_async("CREATE TABLE test.test (pk int PRIMARY KEY, c int);")
+
+    await manager.api.disable_autocompaction(servers[0].ip_addr, 'test')
+
+    keys = range(2048)
+    await asyncio.gather(*[cql.run_async(f'INSERT INTO test.test (pk, c) VALUES ({k}, {k});') for k in keys])
+    await manager.api.flush_keyspace(servers[0].ip_addr, 'test')
+
+    await manager.api.enable_injection(servers[0].ip_addr, 'truncate_compaction_disabled_wait', one_shot=False)
+    await manager.api.enable_injection(servers[0].ip_addr, 'split_storage_groups_wait', one_shot=False)
+
+    # enable the load balancer which should emmit a tablet split
+    await manager.api.enable_tablet_balancing(servers[0].ip_addr)
+
+    # wait for compaction groups to be created and split to begin
+    await s0_log.wait_for('split_storage_groups_wait: wait')
+
+    # start a DROP and wait for it to disable compaction
+    drop_ks_task = cql.run_async('DROP KEYSPACE test;')
+    await s0_log.wait_for('truncate_compaction_disabled_wait: wait')
+
+    # release split
+    await manager.api.message_injection(servers[0].ip_addr, "split_storage_groups_wait")
+
+    # release drop and wait for it to complete
+    await manager.api.message_injection(servers[0].ip_addr, "truncate_compaction_disabled_wait")
+    await drop_ks_task
--- a/test/topology_custom/test_view_build_status.py
+++ b/test/topology_custom/test_view_build_status.py
@@ -51,6 +51,12 @@ async def view_is_built_v2(cql, ks_name, view_name, node_count, **kwargs):
 async def wait_for_view_v2(cql, ks_name, view_name, node_count, **kwargs):
    await wait_for(lambda: view_is_built_v2(cql, ks_name, view_name, node_count, **kwargs), time.time() + 60)

+async def wait_for_row_count(cql, table, n, host):
+    async def row_count_is_n():
+        cnt = (await cql.run_async(f"SELECT count(*) FROM {table}", host=host))[0].count
+        return cnt == n or None
+    await wait_for(row_count_is_n, time.time() + 60)
+
 # Verify a new cluster uses the view_build_status_v2 table.
 # Create a materialized view and check that the view's build status
 # is stored in view_build_status_v2 and all nodes see all the other
@@ -143,11 +149,11 @@ async def test_view_build_status_virtual_table(manager: ManagerClient):

    await cql.run_async(f"DROP MATERIALIZED VIEW {ks_name}.vt1")

-    async def view_rows_removed(host):
-        r = await cql.run_async("SELECT * FROM system.view_build_status_v2", host=host)
-        return (len(r) == node_count) or None
-    await asyncio.gather(*(wait_for(lambda: view_rows_removed(h), time.time() + 60) for h in hosts))
-    await assert_v1_eq_v2()
+    async def view_rows_removed_and_v1_eq_v2():
+        r1, r2 = await select_v1(), await select_v2()
+        if len(r2) == node_count and r1 == r2:
+            return True
+    await wait_for(view_rows_removed_and_v1_eq_v2, time.time() + 60)

 # Cluster with 3 nodes.
 # Create materialized views. Start new server and it should get a snapshot on bootstrap.
@@ -217,8 +223,7 @@ async def test_view_build_status_migration_to_v2(request, manager: ManagerClient
    v = await get_view_builder_version(cql)
    assert v == 1

-    result = await cql.run_async("SELECT * FROM system_distributed.view_build_status")
-    assert len(result) == 3
+    await wait_for_row_count(cql, "system_distributed.view_build_status", 3, hosts[0])

    result = await cql.run_async("SELECT * FROM system.view_build_status_v2")
    assert len(result) == 0
@@ -237,8 +242,7 @@ async def test_view_build_status_migration_to_v2(request, manager: ManagerClient
    await create_mv(cql, "vt2")
    await asyncio.gather(*(wait_for_view_v2(cql, "ks", "vt2", 3, host=h) for h in hosts))

-    result = await cql.run_async("SELECT * FROM system.view_build_status_v2")
-    assert len(result) == 6
+    await wait_for_row_count(cql, "system.view_build_status_v2", 6, hosts[0])

 # Migrate the view_build_status table to v2 and write to the table during the migration.
 # The migration process goes through an intermediate stage where it writes to
@@ -353,29 +357,23 @@ async def test_view_build_status_migration_to_v2_barrier(request, manager: Manag
 async def test_view_build_status_cleanup_on_remove_node(manager: ManagerClient):
    node_count = 4
    servers = await manager.servers_add(node_count)
-    cql, _ = await manager.get_ready_cql(servers)
+    cql, hosts = await manager.get_ready_cql(servers)

    await create_keyspace(cql)
    await create_table(cql)
    await create_mv(cql, "vt1")
    await create_mv(cql, "vt2")

-    await wait_for_view(cql, "vt1", node_count)
-    await wait_for_view(cql, "vt2", node_count)
-
-    result = await cql.run_async("SELECT * FROM system.view_build_status_v2")
-    assert len(result) == node_count * 2
+    await wait_for_row_count(cql, "system.view_build_status_v2", node_count*2, hosts[0])

    await manager.server_stop_gracefully(servers[-1].server_id)
    await manager.remove_node(servers[0].server_id, servers[-1].server_id)
+    servers.pop()
+    cql, hosts = await manager.get_ready_cql(servers)

    # The 2 rows belonging to the node that was removed, one for each view, should
    # be deleted from the table.
-    async def node_rows_removed():
-        result = await cql.run_async("SELECT * FROM system.view_build_status_v2")
-        return (len(result) == (node_count - 1) * 2) or None
-
-    await wait_for(node_rows_removed, time.time() + 60)
+    await wait_for_row_count(cql, "system.view_build_status_v2", (node_count-1)*2, hosts[0])

 # Replace a node and verify that the view_build_status has rows for the new node and
 # no rows for the old node
@@ -383,18 +381,14 @@ async def test_view_build_status_cleanup_on_remove_node(manager: ManagerClient):
 async def test_view_build_status_with_replace_node(manager: ManagerClient):
    node_count = 4
    servers = await manager.servers_add(node_count)
-    cql, _ = await manager.get_ready_cql(servers)
+    cql, hosts = await manager.get_ready_cql(servers)

    await create_keyspace(cql)
    await create_table(cql)
    await create_mv(cql, "vt1")
    await create_mv(cql, "vt2")

-    await wait_for_view(cql, "vt1", node_count)
-    await wait_for_view(cql, "vt2", node_count)
-
-    result = await cql.run_async("SELECT * FROM system.view_build_status_v2")
-    assert len(result) == node_count * 2
+    await wait_for_row_count(cql, "system.view_build_status_v2", node_count*2, hosts[1])

    # replace a node
    removed_host_id = await manager.get_host_id(servers[0].server_id)
@@ -404,6 +398,9 @@ async def test_view_build_status_with_replace_node(manager: ManagerClient):
    servers = servers[1:]
    added_host_id = await manager.get_host_id(servers[-1].server_id)

+    await manager.driver_connect(server=servers[1])
+    cql = manager.get_cql()
+
    # wait for the old node rows to be removed and new node rows to be added
    async def node_rows_replaced():
        result = await cql.run_async(f"SELECT * FROM system.view_build_status_v2 WHERE host_id={removed_host_id} ALLOW FILTERING")
@@ -454,13 +451,13 @@ async def test_view_build_status_migration_to_v2_with_cleanup(request, manager:

    await wait_for_view_v1(cql, "vt1", 4)

-    result = await cql.run_async("SELECT * FROM system_distributed.view_build_status")
-    assert len(result) == 4
+    await wait_for_row_count(cql, "system_distributed.view_build_status", 4, hosts[0])

    # Insert a row that doesn't correspond to an existing view, but does correspond to a known host.
    # This row should get cleaned during migration.
+    s0_host_id = await manager.get_host_id(servers[0].server_id)
    await cql.run_async(f"INSERT INTO system_distributed.view_build_status(keyspace_name, view_name, host_id, status) \
-                          VALUES ('ks', 'view_doesnt_exist', {result[0].host_id}, 'SUCCESS')")
+                          VALUES ('ks', 'view_doesnt_exist', {s0_host_id}, 'SUCCESS')")

    # Remove the last node. the entry for this node in the view build status remains and it
    # corresponds now to an unknown node. The migration should remove it.
@@ -485,7 +482,7 @@ async def test_view_build_status_migration_to_v2_with_cleanup(request, manager:

    # Verify that after migration we kept only the entries for the known nodes and views.
    async def rows_migrated():
-        result = await cql.run_async("SELECT * FROM system.view_build_status_v2")
+        result = await cql.run_async("SELECT * FROM system.view_build_status_v2", host=hosts[0])
        return (len(result) == 3) or None

    await wait_for(rows_migrated, time.time() + 60)
@@ -520,10 +517,7 @@ async def test_migration_on_existing_raft_topology(request, manager: ManagerClie
    v = await get_view_builder_version(cql)
    assert v == 1

-    async def _view_build_finished():
-        result = await cql.run_async("SELECT * FROM system_distributed.view_build_status")
-        return len(result) == 3
-    await wait_for(_view_build_finished, time.time() + 10, period=.5)
+    await wait_for_row_count(cql, "system_distributed.view_build_status", 3, hosts[0])

    result = await cql.run_async("SELECT * FROM system.view_build_status_v2")
    assert len(result) == 0
@@ -542,8 +536,7 @@ async def test_migration_on_existing_raft_topology(request, manager: ManagerClie
    await create_mv(cql, "vt2")
    await asyncio.gather(*(wait_for_view_v2(cql, "ks", "vt2", 3, host=h) for h in hosts))

-    result = await cql.run_async("SELECT * FROM system.view_build_status_v2")
-    assert len(result) == 6
+    await wait_for_row_count(cql, "system.view_build_status_v2", 6, hosts[0])

    # Check if there is no error logs from raft topology
    for srv in servers:
--- a/test/topology_tasks/task_manager_client.py
+++ b/test/topology_tasks/task_manager_client.py
@@ -75,5 +75,4 @@ class TaskManagerClient():
        tasks = await self.list_tasks(node_ip, module_name, internal=internal)
        await asyncio.gather(*(self.api.client.get(f"/task_manager/wait_task/{stats.task_id}", host=node_ip,
                                                allow_failed=True) for stats in tasks))
-        await asyncio.gather(*(self.api.client.get(f"/task_manager/task_status/{stats.task_id}", host=node_ip,
-                                                allow_failed=True) for stats in tasks))
+        await self.api.client.get(f"/task_manager/drain/{module_name}", host=node_ip)
--- a/test/topology_tasks/test_tablet_tasks.py
+++ b/test/topology_tasks/test_tablet_tasks.py
@@ -493,3 +493,23 @@ async def test_tablet_resize_revoked(manager: ManagerClient):
        check_task_status(status, ["suspended"], "split", "table", False, keyspace, table1, [0, 1, 2])

    await asyncio.gather(revoke_resize(log, mark), wait_for_task(task0.task_id))
+
+@pytest.mark.asyncio
+@skip_mode('release', 'error injections are not supported in release mode')
+async def test_tablet_task_sees_latest_state(manager: ManagerClient):
+    servers, cql, hosts, table_id = await create_table_insert_data_for_repair(manager)
+
+    token = -1
+    async def repair_task():
+        await inject_error_on(manager, "repair_tablet_fail_on_rpc_call", servers)
+        # Check failed repair request can be deleted
+        await manager.api.tablet_repair(servers[0].ip_addr, "test", "test", token)
+
+    async def del_repair_task():
+        tablet_task_id = None
+        while tablet_task_id is None:
+            tablet_task_id = await get_tablet_task_id(cql, hosts[0], table_id, token)
+
+        await manager.api.abort_task(servers[0].ip_addr, tablet_task_id)
+
+    await asyncio.gather(repair_task(), del_repair_task())
--- a/tools/cqlsh
+++ b/tools/cqlsh
--- a/tools/java
+++ b/tools/java
--- a/tools/python3
+++ b/tools/python3
--- a/tools/scylla-nodetool.cc
+++ b/tools/scylla-nodetool.cc
@@ -2793,11 +2793,20 @@ void table_stats_operation(scylla_rest_client& client, const bpo::variables_map&
    }
 }

+std::string get_time(std::string_view time) {
+    static constexpr const char* epoch = "1970-01-01T00:00:00Z";
+    return time == epoch ? "" : std::string{time};
+}
+
 void tasks_print_status(const rjson::value& res) {
    auto status = res.GetObject();
    for (const auto& x: status) {
        if (x.value.IsString()) {
-            fmt::print("{}: {}\n", x.name.GetString(), x.value.GetString());
+            if (strcmp(x.name.GetString(), "start_time") == 0 || strcmp(x.name.GetString(), "end_time") == 0) {
+                fmt::print("{}: {}\n", x.name.GetString(), get_time(x.value.GetString()));
+            } else {
+                fmt::print("{}: {}\n", x.name.GetString(), x.value.GetString());
+            }
        } else if (x.value.IsArray()) {
            fmt::print("{}: [", x.name.GetString());
            sstring delim = "";
@@ -2845,8 +2854,8 @@ void tasks_add_tree_to_statuses_lists(Tabulate& table, const rjson::value& res)
                rjson::to_string_view(status["scope"]),
                rjson::to_string_view(status["state"]),
                status["is_abortable"].GetBool(),
-                rjson::to_string_view(status["start_time"]),
-                rjson::to_string_view(status["end_time"]),
+                get_time(rjson::to_string_view(status["start_time"])),
+                get_time(rjson::to_string_view(status["end_time"])),
                rjson::to_string_view(status["error"]),
                rjson::to_string_view(status["parent_id"]),
                status["sequence_number"].GetUint64(),
@@ -2912,6 +2921,18 @@ void tasks_abort_operation(scylla_rest_client& client, const bpo::variables_map&
    }
 }

+void tasks_drain_operation(scylla_rest_client& client, const bpo::variables_map& vm) {
+    if (vm.contains("module")) {
+        auto module = vm["module"].as<sstring>();
+        auto res = client.post(format("/task_manager/drain/{}", module));
+        return;
+    }
+    auto module_res = client.get("/task_manager/list_modules");
+    for (const auto& module : module_res.GetArray()) {
+        auto drain_res = client.post(format("/task_manager/drain/{}", module.GetString()));
+    }
+}
+
 void tasks_user_ttl_operation(scylla_rest_client& client, const bpo::variables_map& vm) {
    if (!vm.contains("set")) {
        auto res = client.get("/task_manager/user_ttl");
@@ -4268,6 +4289,20 @@ For more information, see: {}"
                                typed_option<sstring>("id", "The uuid of a task", 1),
                            },
                        },
+                        {
+                            "drain",
+                            "Drains tasks",
+fmt::format(R"(
+Unregisters all finished local tasks from the specified module. If a module is not specified,
+all modules are drained.
+
+For more information, see: {}"
+)", doc_link("operating-scylla/nodetool-commands/tasks/drain.html")),
+                            {
+                                typed_option<sstring>("module", "The module name; if specified, only the tasks from this module are unregistered"),
+                            },
+                            { },
+                        },
                        {
                            "user-ttl",
                            "Gets or sets user task ttl",
@@ -4382,6 +4417,9 @@ For more information, see: {}"
                    {
                        "abort", { tasks_abort_operation }
                    },
+                    {
+                        "drain", { tasks_drain_operation }
+                    },
                    {
                        "user-ttl", { tasks_user_ttl_operation }
                    },