sstables: Fix sstable reshaping for STCS

The heuristic of STCS reshape is correct, and it built the compaction descriptor correctly, but forgot to return it to the caller, so no reshape was ever done on behalf of STCS even when the strategy needed it. Fixes #7774. Signed-off-by: Raphael S. Carvalho <raphaelsc@scylladb.com> Message-Id: <20201209175044.1609102-1-raphaelsc@scylladb.com> (cherry picked from commit e4b55f40f3)
scylla_ntp_setup: support 'pool' directive on ntp.conf
2021-11-15 13:28:52 +02:00 · 2021-10-10 19:42:14 +03:00 · 2021-10-05 16:20:30 +03:00 · 2021-10-03 14:09:37 +03:00 · 2021-10-03 13:11:30 +03:00 · 2021-09-23 15:18:22 +03:00
333 changed files with 5466 additions and 7826 deletions
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -79,9 +79,3 @@ db/hints/* @haaawk @piodul @vladzcloudius
 # REDIS
 redis/* @nyh @syuu1228
 redis-test/* @nyh @syuu1228
-
-# READERS
-reader_* @denesb
-querier* @denesb
-test/boost/mutation_reader_test.cc @denesb
-test/boost/querier_cache_test.cc @denesb
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,6 +1,6 @@
 [submodule "seastar"]
 	path = seastar
-	url = ../seastar
+	url = ../scylla-seastar
 	ignore = dirty
 [submodule "swagger-ui"]
 	path = swagger-ui
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,5 +1,8 @@
-cmake_minimum_required(VERSION 3.18)
+##
+## For best results, first compile the project using the Ninja build-system.
+##

+cmake_minimum_required(VERSION 3.7)
 project(scylla)

 if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
@@ -17,739 +20,138 @@ else()
    set(BUILD_TYPE "release")
 endif()

-function(default_target_arch arch)
-    set(x86_instruction_sets i386 i686 x86_64)
-    if(CMAKE_SYSTEM_PROCESSOR IN_LIST x86_instruction_sets)
-        set(${arch} "westmere" PARENT_SCOPE)
-    elseif(CMAKE_SYSTEM_PROCESSOR EQUAL "aarch64")
-        set(${arch} "armv8-a+crc+crypto" PARENT_SCOPE)
-    else()
-        set(${arch} "" PARENT_SCOPE)
-    endif()
-endfunction()
-default_target_arch(target_arch)
-if(target_arch)
-    set(target_arch_flag "-march=${target_arch}")
+if (NOT DEFINED FOR_IDE AND NOT DEFINED ENV{FOR_IDE} AND NOT DEFINED ENV{CLION_IDE})
+    message(FATAL_ERROR "This CMakeLists.txt file is only valid for use in IDEs, please define FOR_IDE to acknowledge this.")
 endif()

-# Configure Seastar compile options to align with Scylla
-set(Seastar_CXX_FLAGS -fcoroutines ${target_arch_flag} CACHE INTERNAL "" FORCE)
-set(Seastar_CXX_DIALECT gnu++20 CACHE INTERNAL "" FORCE)
+# These paths are always available, since they're included in the repository. Additional DPDK headers are placed while
+# Seastar is built, and are captured in `SEASTAR_INCLUDE_DIRS` through parsing the Seastar pkg-config file (below).
+set(SEASTAR_DPDK_INCLUDE_DIRS
+        seastar/dpdk/lib/librte_eal/common/include
+        seastar/dpdk/lib/librte_eal/common/include/generic
+        seastar/dpdk/lib/librte_eal/common/include/x86
+        seastar/dpdk/lib/librte_ether)

-add_subdirectory(seastar)
-add_subdirectory(abseil)
-# Exclude absl::strerror from the default "all" target since it's not
-# used in Scylla build and, moreover, makes use of deprecated glibc APIs,
-# such as sys_nerr, which are not exposed from "stdio.h" since glibc 2.32,
-# which happens to be the case for recent Fedora distribution versions.
-#
-# Need to use the internal "absl_strerror" target name instead of namespaced
-# variant because `set_target_properties` does not understand the latter form,
-# unfortunately.
-set_target_properties(absl_strerror PROPERTIES EXCLUDE_FROM_ALL TRUE)
+find_package(PkgConfig REQUIRED)

-# System libraries dependencies
-find_package(Boost COMPONENTS filesystem program_options system thread regex REQUIRED)
-find_package(Lua REQUIRED)
-find_package(ZLIB REQUIRED)
-find_package(ICU COMPONENTS uc REQUIRED)
+set(ENV{PKG_CONFIG_PATH} "${CMAKE_SOURCE_DIR}/build/${BUILD_TYPE}/seastar:$ENV{PKG_CONFIG_PATH}")
+pkg_check_modules(SEASTAR seastar)

-set(scylla_build_dir "${CMAKE_BINARY_DIR}/build/${BUILD_TYPE}")
-set(scylla_gen_build_dir "${scylla_build_dir}/gen")
-file(MAKE_DIRECTORY "${scylla_build_dir}" "${scylla_gen_build_dir}")
+if(NOT SEASTAR_INCLUDE_DIRS)
+    # Default value. A more accurate list is populated through `pkg-config` below if `seastar.pc` is available.
+    set(SEASTAR_INCLUDE_DIRS "seastar/include")
+endif()

-# Place libraries, executables and archives in ${buildroot}/build/${mode}/
-foreach(mode RUNTIME LIBRARY ARCHIVE)
-    set(CMAKE_${mode}_OUTPUT_DIRECTORY "${scylla_build_dir}")
-endforeach()
+find_package(Boost COMPONENTS filesystem program_options system thread)

-# Generate C++ source files from thrift definitions
-function(scylla_generate_thrift)
-    set(one_value_args TARGET VAR IN_FILE OUT_DIR SERVICE)
-    cmake_parse_arguments(args "" "${one_value_args}" "" ${ARGN})
+##
+## Populate the names of all source and header files in the indicated paths in a designated variable.
+##
+## When RECURSIVE is specified, directories are traversed recursively.
+##
+## Use: scan_scylla_source_directories(VAR my_result_var [RECURSIVE] PATHS [path1 path2 ...])
+##
+function (scan_scylla_source_directories)
+    set(options RECURSIVE)
+    set(oneValueArgs VAR)
+    set(multiValueArgs PATHS)
+    cmake_parse_arguments(args "${options}" "${oneValueArgs}" "${multiValueArgs}" "${ARGN}")

-    get_filename_component(in_file_name ${args_IN_FILE} NAME_WE)
+    set(globs "")

-    set(aux_out_file_name ${args_OUT_DIR}/${in_file_name})
-    set(outputs
-        ${aux_out_file_name}_types.cpp
-        ${aux_out_file_name}_types.h
-        ${aux_out_file_name}_constants.cpp
-        ${aux_out_file_name}_constants.h
-        ${args_OUT_DIR}/${args_SERVICE}.cpp
-        ${args_OUT_DIR}/${args_SERVICE}.h)
+    foreach (dir ${args_PATHS})
+        list(APPEND globs "${dir}/*.cc" "${dir}/*.hh")
+    endforeach()

-    add_custom_command(
-        DEPENDS
-            ${args_IN_FILE}
-            thrift
-        OUTPUT ${outputs}
-        COMMAND ${CMAKE_COMMAND} -E make_directory ${args_OUT_DIR}
-        COMMAND thrift -gen cpp:cob_style,no_skeleton -out "${args_OUT_DIR}" "${args_IN_FILE}")
+    if (args_RECURSIVE)
+        set(glob_kind GLOB_RECURSE)
+    else()
+        set(glob_kind GLOB)
+    endif()

-    add_custom_target(${args_TARGET}
-        DEPENDS ${outputs})
+    file(${glob_kind} var
+            ${globs})

-    set(${args_VAR} ${outputs} PARENT_SCOPE)
+    set(${args_VAR} ${var} PARENT_SCOPE)
 endfunction()

-scylla_generate_thrift(
-    TARGET scylla_thrift_gen_cassandra
-    VAR scylla_thrift_gen_cassandra_files
-    IN_FILE interface/cassandra.thrift
-    OUT_DIR ${scylla_gen_build_dir}
-    SERVICE Cassandra)
+## Although Seastar is an external project, it is common enough to explore the sources while doing
+## Scylla development that we'll treat the Seastar sources as part of this project for easier navigation.
+scan_scylla_source_directories(
+        VAR SEASTAR_SOURCE_FILES
+        RECURSIVE

-# Parse antlr3 grammar files and generate C++ sources
-function(scylla_generate_antlr3)
-    set(one_value_args TARGET VAR IN_FILE OUT_DIR)
-    cmake_parse_arguments(args "" "${one_value_args}" "" ${ARGN})
+        PATHS
+          seastar/core
+          seastar/http
+          seastar/json
+          seastar/net
+          seastar/rpc
+          seastar/testing
+          seastar/util)

-    get_filename_component(in_file_pure_name ${args_IN_FILE} NAME)
-    get_filename_component(stem ${in_file_pure_name} NAME_WE)
+scan_scylla_source_directories(
+        VAR SCYLLA_ROOT_SOURCE_FILES
+        PATHS .)

-    set(outputs
-        "${args_OUT_DIR}/${stem}Lexer.hpp"
-        "${args_OUT_DIR}/${stem}Lexer.cpp"
-        "${args_OUT_DIR}/${stem}Parser.hpp"
-        "${args_OUT_DIR}/${stem}Parser.cpp")
+scan_scylla_source_directories(
+        VAR SCYLLA_SUB_SOURCE_FILES
+        RECURSIVE

-    add_custom_command(
-        DEPENDS
-            ${args_IN_FILE}
-        OUTPUT ${outputs}
-        # Remove #ifdef'ed code from the grammar source code
-        COMMAND sed -e "/^#if 0/,/^#endif/d" "${args_IN_FILE}" > "${args_OUT_DIR}/${in_file_pure_name}"
-        COMMAND antlr3 "${args_OUT_DIR}/${in_file_pure_name}"
-        # We replace many local `ExceptionBaseType* ex` variables with a single function-scope one.
-        # Because we add such a variable to every function, and because `ExceptionBaseType` is not a global
-        # name, we also add a global typedef to avoid compilation errors.
-        COMMAND sed -i -e "/^.*On :.*$/d" "${args_OUT_DIR}/${stem}Lexer.hpp"
-        COMMAND sed -i -e "/^.*On :.*$/d" "${args_OUT_DIR}/${stem}Lexer.cpp"
-        COMMAND sed -i -e "/^.*On :.*$/d" "${args_OUT_DIR}/${stem}Parser.hpp"
-        COMMAND sed -i
-            -e "s/^\\( *\\)\\(ImplTraits::CommonTokenType\\* [a-zA-Z0-9_]* = NULL;\\)$/\\1const \\2/"
-            -e "/^.*On :.*$/d"
-            -e "1i using ExceptionBaseType = int;"
-            -e "s/^{/{ ExceptionBaseType\\* ex = nullptr;/; s/ExceptionBaseType\\* ex = new/ex = new/; s/exceptions::syntax_exception e/exceptions::syntax_exception\\& e/"
-            "${args_OUT_DIR}/${stem}Parser.cpp"
-        VERBATIM)
+        PATHS
+          api
+          auth
+          cql3
+          db
+          dht
+          exceptions
+          gms
+          index
+          io
+          locator
+          message
+          raft
+          repair
+          service
+          sstables
+          streaming
+          test
+          thrift
+          tracing
+          transport
+          utils)

-    add_custom_target(${args_TARGET}
-        DEPENDS ${outputs})
+scan_scylla_source_directories(
+        VAR SCYLLA_GEN_SOURCE_FILES
+        RECURSIVE
+        PATHS build/${BUILD_TYPE}/gen)

-    set(${args_VAR} ${outputs} PARENT_SCOPE)
-endfunction()
-
-set(antlr3_grammar_files
-    cql3/Cql.g
-    alternator/expressions.g)
-
-set(antlr3_gen_files)
-
-foreach(f ${antlr3_grammar_files})
-    get_filename_component(grammar_file_name "${f}" NAME_WE)
-    get_filename_component(f_dir "${f}" DIRECTORY)
-    scylla_generate_antlr3(
-        TARGET scylla_antlr3_gen_${grammar_file_name}
-        VAR scylla_antlr3_gen_${grammar_file_name}_files
-        IN_FILE ${f}
-        OUT_DIR ${scylla_gen_build_dir}/${f_dir})
-    list(APPEND antlr3_gen_files "${scylla_antlr3_gen_${grammar_file_name}_files}")
-endforeach()
-
-# Generate C++ sources from ragel grammar files
-seastar_generate_ragel(
-    TARGET scylla_ragel_gen_protocol_parser
-    VAR scylla_ragel_gen_protocol_parser_file
-    IN_FILE redis/protocol_parser.rl
-    OUT_FILE ${scylla_gen_build_dir}/redis/protocol_parser.hh)
-
-# Generate C++ sources from Swagger definitions
-set(swagger_files
-    api/api-doc/cache_service.json
-    api/api-doc/collectd.json
-    api/api-doc/column_family.json
-    api/api-doc/commitlog.json
-    api/api-doc/compaction_manager.json
-    api/api-doc/config.json
-    api/api-doc/endpoint_snitch_info.json
-    api/api-doc/error_injection.json
-    api/api-doc/failure_detector.json
-    api/api-doc/gossiper.json
-    api/api-doc/hinted_handoff.json
-    api/api-doc/lsa.json
-    api/api-doc/messaging_service.json
-    api/api-doc/storage_proxy.json
-    api/api-doc/storage_service.json
-    api/api-doc/stream_manager.json
-    api/api-doc/system.json
-    api/api-doc/utils.json)
-
-set(swagger_gen_files)
-
-foreach(f ${swagger_files})
-    get_filename_component(fname "${f}" NAME_WE)
-    get_filename_component(dir "${f}" DIRECTORY)
-    seastar_generate_swagger(
-        TARGET scylla_swagger_gen_${fname}
-        VAR scylla_swagger_gen_${fname}_files
-        IN_FILE "${f}"
-        OUT_DIR "${scylla_gen_build_dir}/${dir}")
-    list(APPEND swagger_gen_files "${scylla_swagger_gen_${fname}_files}")
-endforeach()
-
-# Create C++ bindings for IDL serializers
-function(scylla_generate_idl_serializer)
-    set(one_value_args TARGET VAR IN_FILE OUT_FILE)
-    cmake_parse_arguments(args "" "${one_value_args}" "" ${ARGN})
-    get_filename_component(out_dir ${args_OUT_FILE} DIRECTORY)
-    set(idl_compiler "${CMAKE_SOURCE_DIR}/idl-compiler.py")
-
-    find_package(Python3 COMPONENTS Interpreter)
-
-    add_custom_command(
-        DEPENDS
-            ${args_IN_FILE}
-            ${idl_compiler}
-        OUTPUT ${args_OUT_FILE}
-        COMMAND ${CMAKE_COMMAND} -E make_directory ${out_dir}
-        COMMAND Python3::Interpreter ${idl_compiler} --ns ser -f ${args_IN_FILE} -o ${args_OUT_FILE})
-
-    add_custom_target(${args_TARGET}
-        DEPENDS ${args_OUT_FILE})
-
-    set(${args_VAR} ${args_OUT_FILE} PARENT_SCOPE)
-endfunction()
-
-set(idl_serializers
-    idl/cache_temperature.idl.hh
-    idl/commitlog.idl.hh
-    idl/consistency_level.idl.hh
-    idl/frozen_mutation.idl.hh
-    idl/frozen_schema.idl.hh
-    idl/gossip_digest.idl.hh
-    idl/idl_test.idl.hh
-    idl/keys.idl.hh
-    idl/messaging_service.idl.hh
-    idl/mutation.idl.hh
-    idl/paging_state.idl.hh
-    idl/partition_checksum.idl.hh
-    idl/paxos.idl.hh
-    idl/query.idl.hh
-    idl/range.idl.hh
-    idl/read_command.idl.hh
-    idl/reconcilable_result.idl.hh
-    idl/replay_position.idl.hh
-    idl/result.idl.hh
-    idl/ring_position.idl.hh
-    idl/streaming.idl.hh
-    idl/token.idl.hh
-    idl/tracing.idl.hh
-    idl/truncation_record.idl.hh
-    idl/uuid.idl.hh
-    idl/view.idl.hh)
-
-set(idl_gen_files)
-
-foreach(f ${idl_serializers})
-    get_filename_component(idl_name "${f}" NAME)
-    get_filename_component(idl_target "${idl_name}" NAME_WE)
-    get_filename_component(idl_dir "${f}" DIRECTORY)
-    string(REPLACE ".idl.hh" ".dist.hh" idl_out_hdr_name "${idl_name}")
-    scylla_generate_idl_serializer(
-        TARGET scylla_idl_gen_${idl_target}
-        VAR scylla_idl_gen_${idl_target}_files
-        IN_FILE ${f}
-        OUT_FILE ${scylla_gen_build_dir}/${idl_dir}/${idl_out_hdr_name})
-    list(APPEND idl_gen_files "${scylla_idl_gen_${idl_target}_files}")
-endforeach()
-
-set(scylla_sources
-    absl-flat_hash_map.cc
-    alternator/auth.cc
-    alternator/base64.cc
-    alternator/conditions.cc
-    alternator/executor.cc
-    alternator/expressions.cc
-    alternator/serialization.cc
-    alternator/server.cc
-    alternator/stats.cc
-    alternator/streams.cc
-    api/api.cc
-    api/cache_service.cc
-    api/collectd.cc
-    api/column_family.cc
-    api/commitlog.cc
-    api/compaction_manager.cc
-    api/config.cc
-    api/endpoint_snitch.cc
-    api/error_injection.cc
-    api/failure_detector.cc
-    api/gossiper.cc
-    api/hinted_handoff.cc
-    api/lsa.cc
-    api/messaging_service.cc
-    api/storage_proxy.cc
-    api/storage_service.cc
-    api/stream_manager.cc
-    api/system.cc
-    atomic_cell.cc
-    auth/allow_all_authenticator.cc
-    auth/allow_all_authorizer.cc
-    auth/authenticated_user.cc
-    auth/authentication_options.cc
-    auth/authenticator.cc
-    auth/common.cc
-    auth/default_authorizer.cc
-    auth/password_authenticator.cc
-    auth/passwords.cc
-    auth/permission.cc
-    auth/permissions_cache.cc
-    auth/resource.cc
-    auth/role_or_anonymous.cc
-    auth/roles-metadata.cc
-    auth/sasl_challenge.cc
-    auth/service.cc
-    auth/standard_role_manager.cc
-    auth/transitional.cc
-    bytes.cc
-    canonical_mutation.cc
-    cdc/cdc_partitioner.cc
-    cdc/generation.cc
-    cdc/log.cc
-    cdc/metadata.cc
-    cdc/split.cc
-    clocks-impl.cc
-    collection_mutation.cc
-    compress.cc
-    connection_notifier.cc
-    converting_mutation_partition_applier.cc
-    counters.cc
-    cql3/abstract_marker.cc
-    cql3/attributes.cc
-    cql3/cf_name.cc
-    cql3/column_condition.cc
-    cql3/column_identifier.cc
-    cql3/column_specification.cc
-    cql3/constants.cc
-    cql3/cql3_type.cc
-    cql3/expr/expression.cc
-    cql3/functions/aggregate_fcts.cc
-    cql3/functions/castas_fcts.cc
-    cql3/functions/error_injection_fcts.cc
-    cql3/functions/functions.cc
-    cql3/functions/user_function.cc
-    cql3/index_name.cc
-    cql3/keyspace_element_name.cc
-    cql3/lists.cc
-    cql3/maps.cc
-    cql3/operation.cc
-    cql3/query_options.cc
-    cql3/query_processor.cc
-    cql3/relation.cc
-    cql3/restrictions/statement_restrictions.cc
-    cql3/result_set.cc
-    cql3/role_name.cc
-    cql3/selection/abstract_function_selector.cc
-    cql3/selection/selectable.cc
-    cql3/selection/selection.cc
-    cql3/selection/selector.cc
-    cql3/selection/selector_factories.cc
-    cql3/selection/simple_selector.cc
-    cql3/sets.cc
-    cql3/single_column_relation.cc
-    cql3/statements/alter_keyspace_statement.cc
-    cql3/statements/alter_table_statement.cc
-    cql3/statements/alter_type_statement.cc
-    cql3/statements/alter_view_statement.cc
-    cql3/statements/authentication_statement.cc
-    cql3/statements/authorization_statement.cc
-    cql3/statements/batch_statement.cc
-    cql3/statements/cas_request.cc
-    cql3/statements/cf_prop_defs.cc
-    cql3/statements/cf_statement.cc
-    cql3/statements/create_function_statement.cc
-    cql3/statements/create_index_statement.cc
-    cql3/statements/create_keyspace_statement.cc
-    cql3/statements/create_table_statement.cc
-    cql3/statements/create_type_statement.cc
-    cql3/statements/create_view_statement.cc
-    cql3/statements/delete_statement.cc
-    cql3/statements/drop_function_statement.cc
-    cql3/statements/drop_index_statement.cc
-    cql3/statements/drop_keyspace_statement.cc
-    cql3/statements/drop_table_statement.cc
-    cql3/statements/drop_type_statement.cc
-    cql3/statements/drop_view_statement.cc
-    cql3/statements/function_statement.cc
-    cql3/statements/grant_statement.cc
-    cql3/statements/index_prop_defs.cc
-    cql3/statements/index_target.cc
-    cql3/statements/ks_prop_defs.cc
-    cql3/statements/list_permissions_statement.cc
-    cql3/statements/list_users_statement.cc
-    cql3/statements/modification_statement.cc
-    cql3/statements/permission_altering_statement.cc
-    cql3/statements/property_definitions.cc
-    cql3/statements/raw/parsed_statement.cc
-    cql3/statements/revoke_statement.cc
-    cql3/statements/role-management-statements.cc
-    cql3/statements/schema_altering_statement.cc
-    cql3/statements/select_statement.cc
-    cql3/statements/truncate_statement.cc
-    cql3/statements/update_statement.cc
-    cql3/statements/use_statement.cc
-    cql3/token_relation.cc
-    cql3/tuples.cc
-    cql3/type_json.cc
-    cql3/untyped_result_set.cc
-    cql3/update_parameters.cc
-    cql3/user_types.cc
-    cql3/ut_name.cc
-    cql3/util.cc
-    cql3/values.cc
-    cql3/variable_specifications.cc
-    data/cell.cc
-    database.cc
-    db/batchlog_manager.cc
-    db/commitlog/commitlog.cc
-    db/commitlog/commitlog_entry.cc
-    db/commitlog/commitlog_replayer.cc
-    db/config.cc
-    db/consistency_level.cc
-    db/cql_type_parser.cc
-    db/data_listeners.cc
-    db/extensions.cc
-    db/heat_load_balance.cc
-    db/hints/manager.cc
-    db/hints/resource_manager.cc
-    db/large_data_handler.cc
-    db/legacy_schema_migrator.cc
-    db/marshal/type_parser.cc
-    db/schema_tables.cc
-    db/size_estimates_virtual_reader.cc
-    db/snapshot-ctl.cc
-    db/sstables-format-selector.cc
-    db/system_distributed_keyspace.cc
-    db/system_keyspace.cc
-    db/view/row_locking.cc
-    db/view/view.cc
-    db/view/view_update_generator.cc
-    dht/boot_strapper.cc
-    dht/i_partitioner.cc
-    dht/murmur3_partitioner.cc
-    dht/range_streamer.cc
-    dht/token.cc
-    distributed_loader.cc
-    duration.cc
-    exceptions/exceptions.cc
-    flat_mutation_reader.cc
-    frozen_mutation.cc
-    frozen_schema.cc
-    gms/application_state.cc
-    gms/endpoint_state.cc
-    gms/failure_detector.cc
-    gms/feature_service.cc
-    gms/gossip_digest_ack.cc
-    gms/gossip_digest_ack2.cc
-    gms/gossip_digest_syn.cc
-    gms/gossiper.cc
-    gms/inet_address.cc
-    gms/version_generator.cc
-    gms/versioned_value.cc
-    hashers.cc
-    index/secondary_index.cc
-    index/secondary_index_manager.cc
-    init.cc
-    keys.cc
-    lister.cc
-    locator/abstract_replication_strategy.cc
-    locator/ec2_multi_region_snitch.cc
-    locator/ec2_snitch.cc
-    locator/everywhere_replication_strategy.cc
-    locator/gce_snitch.cc
-    locator/gossiping_property_file_snitch.cc
-    locator/local_strategy.cc
-    locator/network_topology_strategy.cc
-    locator/production_snitch_base.cc
-    locator/rack_inferring_snitch.cc
-    locator/simple_snitch.cc
-    locator/simple_strategy.cc
-    locator/snitch_base.cc
-    locator/token_metadata.cc
-    lua.cc
-    main.cc
-    memtable.cc
-    message/messaging_service.cc
-    multishard_mutation_query.cc
-    mutation.cc
-    raft/fsm.cc
-    raft/log.cc
-    raft/progress.cc
-    raft/raft.cc
-    raft/server.cc
-    mutation_fragment.cc
-    mutation_partition.cc
-    mutation_partition_serializer.cc
-    mutation_partition_view.cc
-    mutation_query.cc
-    mutation_reader.cc
-    mutation_writer/multishard_writer.cc
-    mutation_writer/shard_based_splitting_writer.cc
-    mutation_writer/timestamp_based_splitting_writer.cc
-    partition_slice_builder.cc
-    partition_version.cc
-    querier.cc
-    query-result-set.cc
-    query.cc
-    range_tombstone.cc
-    range_tombstone_list.cc
-    reader_concurrency_semaphore.cc
-    redis/abstract_command.cc
-    redis/command_factory.cc
-    redis/commands.cc
-    redis/keyspace_utils.cc
-    redis/lolwut.cc
-    redis/mutation_utils.cc
-    redis/options.cc
-    redis/query_processor.cc
-    redis/query_utils.cc
-    redis/server.cc
-    redis/service.cc
-    redis/stats.cc
-    repair/repair.cc
-    repair/row_level.cc
-    row_cache.cc
-    schema.cc
-    schema_mutations.cc
-    schema_registry.cc
-    service/client_state.cc
-    service/migration_manager.cc
-    service/migration_task.cc
-    service/misc_services.cc
-    service/pager/paging_state.cc
-    service/pager/query_pagers.cc
-    service/paxos/paxos_state.cc
-    service/paxos/prepare_response.cc
-    service/paxos/prepare_summary.cc
-    service/paxos/proposal.cc
-    service/priority_manager.cc
-    service/storage_proxy.cc
-    service/storage_service.cc
-    sstables/compaction.cc
-    sstables/compaction_manager.cc
-    sstables/compaction_strategy.cc
-    sstables/compress.cc
-    sstables/integrity_checked_file_impl.cc
-    sstables/kl/writer.cc
-    sstables/leveled_compaction_strategy.cc
-    sstables/m_format_read_helpers.cc
-    sstables/metadata_collector.cc
-    sstables/mp_row_consumer.cc
-    sstables/mx/writer.cc
-    sstables/partition.cc
-    sstables/prepended_input_stream.cc
-    sstables/random_access_reader.cc
-    sstables/size_tiered_compaction_strategy.cc
-    sstables/sstable_directory.cc
-    sstables/sstable_version.cc
-    sstables/sstables.cc
-    sstables/sstables_manager.cc
-    sstables/time_window_compaction_strategy.cc
-    sstables/writer.cc
-    streaming/progress_info.cc
-    streaming/session_info.cc
-    streaming/stream_coordinator.cc
-    streaming/stream_manager.cc
-    streaming/stream_plan.cc
-    streaming/stream_reason.cc
-    streaming/stream_receive_task.cc
-    streaming/stream_request.cc
-    streaming/stream_result_future.cc
-    streaming/stream_session.cc
-    streaming/stream_session_state.cc
-    streaming/stream_summary.cc
-    streaming/stream_task.cc
-    streaming/stream_transfer_task.cc
-    table.cc
-    table_helper.cc
-    thrift/controller.cc
-    thrift/handler.cc
-    thrift/server.cc
-    thrift/thrift_validation.cc
-    timeout_config.cc
-    tracing/trace_keyspace_helper.cc
-    tracing/trace_state.cc
-    tracing/traced_file.cc
-    tracing/tracing.cc
-    tracing/tracing_backend_registry.cc
-    transport/controller.cc
-    transport/cql_protocol_extension.cc
-    transport/event.cc
-    transport/event_notifier.cc
-    transport/messages/result_message.cc
-    transport/server.cc
-    types.cc
-    unimplemented.cc
-    utils/UUID_gen.cc
-    utils/arch/powerpc/crc32-vpmsum/crc32_wrapper.cc
-    utils/array-search.cc
-    utils/ascii.cc
-    utils/big_decimal.cc
-    utils/bloom_calculations.cc
-    utils/bloom_filter.cc
-    utils/buffer_input_stream.cc
-    utils/build_id.cc
-    utils/config_file.cc
-    utils/directories.cc
-    utils/disk-error-handler.cc
-    utils/dynamic_bitset.cc
-    utils/error_injection.cc
-    utils/exceptions.cc
-    utils/file_lock.cc
-    utils/generation-number.cc
-    utils/gz/crc_combine.cc
-    utils/human_readable.cc
-    utils/i_filter.cc
-    utils/large_bitset.cc
-    utils/like_matcher.cc
-    utils/limiting_data_source.cc
-    utils/logalloc.cc
-    utils/managed_bytes.cc
-    utils/multiprecision_int.cc
-    utils/murmur_hash.cc
-    utils/rate_limiter.cc
-    utils/rjson.cc
-    utils/runtime.cc
-    utils/updateable_value.cc
-    utils/utf8.cc
-    utils/uuid.cc
-    validation.cc
-    vint-serialization.cc
-    zstd.cc
-    release.cc)
-
-set(scylla_gen_sources
-    "${scylla_thrift_gen_cassandra_files}"
-    "${scylla_ragel_gen_protocol_parser_file}"
-    "${swagger_gen_files}"
-    "${idl_gen_files}"
-    "${antlr3_gen_files}")
+set(SCYLLA_SOURCE_FILES
+        ${SCYLLA_ROOT_SOURCE_FILES}
+        ${SCYLLA_GEN_SOURCE_FILES}
+        ${SCYLLA_SUB_SOURCE_FILES})

 add_executable(scylla
-    ${scylla_sources}
-    ${scylla_gen_sources})
+        ${SEASTAR_SOURCE_FILES}
+        ${SCYLLA_SOURCE_FILES})

-target_link_libraries(scylla PRIVATE
-    seastar
-    # Boost dependencies
-    Boost::filesystem
-    Boost::program_options
-    Boost::system
-    Boost::thread
-    Boost::regex
-    Boost::headers
-    # Abseil libs
-    absl::hashtablez_sampler
-    absl::raw_hash_set
-    absl::synchronization
-    absl::graphcycles_internal
-    absl::stacktrace
-    absl::symbolize
-    absl::debugging_internal
-    absl::demangle_internal
-    absl::time
-    absl::time_zone
-    absl::int128
-    absl::city
-    absl::hash
-    absl::malloc_internal
-    absl::spinlock_wait
-    absl::base
-    absl::dynamic_annotations
-    absl::raw_logging_internal
-    absl::exponential_biased
-    absl::throw_delegate
-    # System libs
-    ZLIB::ZLIB
-    ICU::uc
-    systemd
-    zstd
-    snappy
-    ${LUA_LIBRARIES}
-    thrift
-    crypt)
+# If the Seastar pkg-config information is available, append to the default flags.
+#
+# For ease of browsing the source code, we always pretend that DPDK is enabled.
+target_compile_options(scylla PUBLIC
+        -std=gnu++20
+        -DHAVE_DPDK
+        -DHAVE_HWLOC
+        "${SEASTAR_CFLAGS}")

-target_link_libraries(scylla PRIVATE
-    -Wl,--build-id=sha1 # Force SHA1 build-id generation
-    # TODO: Use lld linker if it's available, otherwise gold, else bfd
-    -fuse-ld=lld)
-# TODO: patch dynamic linker to match configure.py behavior
-
-target_compile_options(scylla PRIVATE
-    -std=gnu++20
-    -fcoroutines # TODO: Clang does not have this flag, adjust to both variants
-    ${target_arch_flag})
-# Hacks needed to expose internal APIs for xxhash dependencies
-target_compile_definitions(scylla PRIVATE XXH_PRIVATE_API HAVE_LZ4_COMPRESS_DEFAULT)
-
-target_include_directories(scylla PRIVATE
-    "${CMAKE_CURRENT_SOURCE_DIR}"
-    libdeflate
-    abseil
-    "${scylla_gen_build_dir}")
-
-###
-### Create crc_combine_table helper executable.
-### Use it to generate crc_combine_table.cc to be used in scylla at build time.
-###
-add_executable(crc_combine_table utils/gz/gen_crc_combine_table.cc)
-target_link_libraries(crc_combine_table PRIVATE seastar)
-target_include_directories(crc_combine_table PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}")
-target_compile_options(crc_combine_table PRIVATE
-    -std=gnu++20
-    -fcoroutines
-    ${target_arch_flag})
-add_dependencies(scylla crc_combine_table)
-
-# Generate an additional source file at build time that is needed for Scylla compilation
-add_custom_command(OUTPUT "${scylla_gen_build_dir}/utils/gz/crc_combine_table.cc"
-    COMMAND $<TARGET_FILE:crc_combine_table> > "${scylla_gen_build_dir}/utils/gz/crc_combine_table.cc"
-    DEPENDS crc_combine_table)
-target_sources(scylla PRIVATE "${scylla_gen_build_dir}/utils/gz/crc_combine_table.cc")
-
-###
-### Generate version file and supply appropriate compile definitions for release.cc
-###
-execute_process(COMMAND ${CMAKE_SOURCE_DIR}/SCYLLA-VERSION-GEN RESULT_VARIABLE scylla_version_gen_res)
-if(scylla_version_gen_res)
-    message(SEND_ERROR "Version file generation failed. Return code: ${scylla_version_gen_res}")
-endif()
-
-file(READ build/SCYLLA-VERSION-FILE scylla_version)
-string(STRIP "${scylla_version}" scylla_version)
-
-file(READ build/SCYLLA-RELEASE-FILE scylla_release)
-string(STRIP "${scylla_release}" scylla_release)
-
-get_property(release_cdefs SOURCE "${CMAKE_SOURCE_DIR}/release.cc" PROPERTY COMPILE_DEFINITIONS)
-list(APPEND release_cdefs "SCYLLA_VERSION=\"${scylla_version}\"" "SCYLLA_RELEASE=\"${scylla_release}\"")
-set_source_files_properties("${CMAKE_SOURCE_DIR}/release.cc" PROPERTIES COMPILE_DEFINITIONS "${release_cdefs}")
-
-###
-### Custom command for building libdeflate. Link the library to scylla.
-###
-set(libdeflate_lib "${scylla_build_dir}/libdeflate/libdeflate.a")
-add_custom_command(OUTPUT "${libdeflate_lib}"
-    COMMAND make -C libdeflate
-        BUILD_DIR=../build/${BUILD_TYPE}/libdeflate/
-        CC=${CMAKE_C_COMPILER}
-        "CFLAGS=${target_arch_flag}"
-        ../build/${BUILD_TYPE}/libdeflate//libdeflate.a) # Two backslashes are important!
-# Hack to force generating custom command to produce libdeflate.a
-add_custom_target(libdeflate DEPENDS "${libdeflate_lib}")
-target_link_libraries(scylla PRIVATE "${libdeflate_lib}")
-
-# TODO: create cmake/ directory and move utilities (generate functions etc) there
-# TODO: Build tests if BUILD_TESTING=on (using CTest module)
+# The order matters here: prefer the "static" DPDK directories to any dynamic paths from pkg-config. Some files are only
+# available dynamically, though.
+target_include_directories(scylla PUBLIC
+        .
+        ${SEASTAR_DPDK_INCLUDE_DIRS}
+        ${SEASTAR_INCLUDE_DIRS}
+        ${Boost_INCLUDE_DIRS}
+        xxhash
+        libdeflate
+        abseil
+        build/${BUILD_TYPE}/gen)
--- a/2
+++ b/2
@@ -1,7 +1,7 @@
 #!/bin/sh

 PRODUCT=scylla
-VERSION=4.4.dev
+VERSION=4.3.7

 if test -f version
 then
--- a/alternator/auth.cc
+++ b/alternator/auth.cc
@@ -129,7 +129,8 @@ future<std::string> get_key_from_roles(cql3::query_processor& qp, std::string us
            auth::meta::roles_table::qualified_name, auth::meta::roles_table::role_col_name);

    auto cl = auth::password_authenticator::consistency_for_user(username);
-    return qp.execute_internal(query, cl, auth::internal_distributed_query_state(), {sstring(username)}, true).then_wrapped([username = std::move(username)] (future<::shared_ptr<cql3::untyped_result_set>> f) {
+    auto& timeout = auth::internal_distributed_timeout_config();
+    return qp.execute_internal(query, cl, timeout, {sstring(username)}, true).then_wrapped([username = std::move(username)] (future<::shared_ptr<cql3::untyped_result_set>> f) {
        auto res = f.get0();
        auto salted_hash = std::optional<sstring>();
        if (res->empty()) {
--- a/alternator/conditions.cc
+++ b/alternator/conditions.cc
@@ -123,7 +123,7 @@ struct rjson_engaged_ptr_comp {
 // as internally they're stored in an array, and the order of elements is
 // not important in set equality. See issue #5021
 static bool check_EQ_for_sets(const rjson::value& set1, const rjson::value& set2) {
-    if (set1.Size() != set2.Size()) {
+    if (!set1.IsArray() || !set2.IsArray() || set1.Size() != set2.Size()) {
        return false;
    }
    std::set<const rjson::value*, rjson_engaged_ptr_comp> set1_raw;
@@ -137,45 +137,107 @@ static bool check_EQ_for_sets(const rjson::value& set1, const rjson::value& set2
    }
    return true;
 }
+// Moreover, the JSON being compared can be a nested document with outer
+// layers of lists and maps and some inner set - and we need to get to that
+// inner set to compare it correctly with check_EQ_for_sets() (issue #8514).
+static bool check_EQ(const rjson::value* v1, const rjson::value& v2);
+static bool check_EQ_for_lists(const rjson::value& list1, const rjson::value& list2) {
+    if (!list1.IsArray() || !list2.IsArray() || list1.Size() != list2.Size()) {
+        return false;
+    }
+    auto it1 = list1.Begin();
+    auto it2 = list2.Begin();
+    while (it1 != list1.End()) {
+        // Note: Alternator limits an item's depth (rjson::parse() limits
+        // it to around 37 levels), so this recursion is safe.
+        if (!check_EQ(&*it1, *it2)) {
+            return false;
+        }
+        ++it1;
+        ++it2;
+    }
+    return true;
+}
+static bool check_EQ_for_maps(const rjson::value& list1, const rjson::value& list2) {
+    if (!list1.IsObject() || !list2.IsObject() || list1.MemberCount() != list2.MemberCount()) {
+        return false;
+    }
+    for (auto it1 = list1.MemberBegin(); it1 != list1.MemberEnd(); ++it1) {
+        auto it2 = list2.FindMember(it1->name);
+        if (it2 == list2.MemberEnd() || !check_EQ(&it1->value, it2->value)) {
+            return false;
+        }
+    }
+    return true;
+}

 // Check if two JSON-encoded values match with the EQ relation
 static bool check_EQ(const rjson::value* v1, const rjson::value& v2) {
-    if (!v1) {
-        return false;
-    }
-    if (v1->IsObject() && v1->MemberCount() == 1 && v2.IsObject() && v2.MemberCount() == 1) {
+    if (v1 && v1->IsObject() && v1->MemberCount() == 1 && v2.IsObject() && v2.MemberCount() == 1) {
        auto it1 = v1->MemberBegin();
        auto it2 = v2.MemberBegin();
-        if ((it1->name == "SS" && it2->name == "SS") || (it1->name == "NS" && it2->name == "NS") || (it1->name == "BS" && it2->name == "BS")) {
-            return check_EQ_for_sets(it1->value, it2->value);
+        if (it1->name != it2->name) {
+            return false;
        }
+        if (it1->name == "SS" || it1->name == "NS" || it1->name == "BS") {
+            return check_EQ_for_sets(it1->value, it2->value);
+        } else if(it1->name == "L") {
+            return check_EQ_for_lists(it1->value, it2->value);
+        } else if(it1->name == "M") {
+            return check_EQ_for_maps(it1->value, it2->value);
+        } else {
+            // Other, non-nested types (number, string, etc.) can be compared
+            // literally, comparing their JSON representation.
+            return it1->value == it2->value;
+        }
+    } else {
+        // If v1 and/or v2 are missing (IsNull()) the result should be false.
+        // In the unlikely case that the object is malformed (issue #8070),
+        // let's also return false.
+        return false;
    }
-    return *v1 == v2;
 }

 // Check if two JSON-encoded values match with the NE relation
 static bool check_NE(const rjson::value* v1, const rjson::value& v2) {
-    return !v1 || *v1 != v2; // null is unequal to anything.
+    return !check_EQ(v1, v2);
 }

 // Check if two JSON-encoded values match with the BEGINS_WITH relation
-static bool check_BEGINS_WITH(const rjson::value* v1, const rjson::value& v2) {
-    // BEGINS_WITH requires that its single operand (v2) be a string or
-    // binary - otherwise it's a validation error. However, problems with
-    // the stored attribute (v1) will just return false (no match).
-    if (!v2.IsObject() || v2.MemberCount() != 1) {
-        throw api_error::validation(format("BEGINS_WITH operator encountered malformed AttributeValue: {}", v2));
-    }
-    auto it2 = v2.MemberBegin();
-    if (it2->name != "S" && it2->name != "B") {
-        throw api_error::validation(format("BEGINS_WITH operator requires String or Binary type in AttributeValue, got {}", it2->name));
-    }
-
-
+bool check_BEGINS_WITH(const rjson::value* v1, const rjson::value& v2,
+                       bool v1_from_query, bool v2_from_query) {
+    bool bad = false;
    if (!v1 || !v1->IsObject() || v1->MemberCount() != 1) {
+        if (v1_from_query) {
+            throw api_error::validation("begins_with() encountered malformed argument");
+        } else {
+            bad = true;
+        }
+    } else if (v1->MemberBegin()->name != "S" && v1->MemberBegin()->name != "B") {
+        if (v1_from_query) {
+            throw api_error::validation(format("begins_with supports only string or binary type, got: {}", *v1));
+        } else {
+            bad = true;
+        }
+    }
+    if (!v2.IsObject() || v2.MemberCount() != 1) {
+        if (v2_from_query) {
+            throw api_error::validation("begins_with() encountered malformed argument");
+        } else {
+            bad = true;
+        }
+    } else if (v2.MemberBegin()->name != "S" && v2.MemberBegin()->name != "B") {
+        if (v2_from_query) {
+            throw api_error::validation(format("begins_with() supports only string or binary type, got: {}", v2));
+        } else {
+            bad = true;
+        }
+    }
+    if (bad) {
        return false;
    }
    auto it1 = v1->MemberBegin();
+    auto it2 = v2.MemberBegin();
    if (it1->name != it2->name) {
        return false;
    }
@@ -279,24 +341,40 @@ static bool check_NOT_NULL(const rjson::value* val) {
    return val != nullptr;
 }

+// Only types S, N or B (string, number or bytes) may be compared by the
+// various comparion operators - lt, le, gt, ge, and between.
+// Note that in particular, if the value is missing (v->IsNull()), this
+// check returns false.
+static bool check_comparable_type(const rjson::value& v) {
+    if (!v.IsObject() || v.MemberCount() != 1) {
+        return false;
+    }
+    const rjson::value& type = v.MemberBegin()->name;
+    return type == "S" || type == "N" || type == "B";
+}
+
 // Check if two JSON-encoded values match with cmp.
 template <typename Comparator>
-bool check_compare(const rjson::value* v1, const rjson::value& v2, const Comparator& cmp) {
-    if (!v2.IsObject() || v2.MemberCount() != 1) {
-        throw api_error::validation(
-                        format("{} requires a single AttributeValue of type String, Number, or Binary",
-                               cmp.diagnostic));
+bool check_compare(const rjson::value* v1, const rjson::value& v2, const Comparator& cmp,
+                   bool v1_from_query, bool v2_from_query) {
+    bool bad = false;
+    if (!v1 || !check_comparable_type(*v1)) {
+        if (v1_from_query) {
+            throw api_error::validation(format("{} allow only the types String, Number, or Binary", cmp.diagnostic));
+        }
+        bad = true;
    }
-    const auto& kv2 = *v2.MemberBegin();
-    if (kv2.name != "S" && kv2.name != "N" && kv2.name != "B") {
-        throw api_error::validation(
-                        format("{} requires a single AttributeValue of type String, Number, or Binary",
-                               cmp.diagnostic));
+    if (!check_comparable_type(v2)) {
+        if (v2_from_query) {
+            throw api_error::validation(format("{} allow only the types String, Number, or Binary", cmp.diagnostic));
+        }
+        bad = true;
    }
-    if (!v1 || !v1->IsObject() || v1->MemberCount() != 1) {
+    if (bad) {
        return false;
    }
    const auto& kv1 = *v1->MemberBegin();
+    const auto& kv2 = *v2.MemberBegin();
    if (kv1.name != kv2.name) {
        return false;
    }
@@ -310,7 +388,8 @@ bool check_compare(const rjson::value* v1, const rjson::value& v2, const Compara
    if (kv1.name == "B") {
        return cmp(base64_decode(kv1.value), base64_decode(kv2.value));
    }
-    clogger.error("check_compare panic: LHS type equals RHS type, but one is in {N,S,B} while the other isn't");
+    // cannot reach here, as check_comparable_type() verifies the type is one
+    // of the above options.
    return false;
 }

@@ -341,56 +420,71 @@ struct cmp_gt {
    static constexpr const char* diagnostic = "GT operator";
 };

-// True if v is between lb and ub, inclusive.  Throws if lb > ub.
+// True if v is between lb and ub, inclusive.  Throws or returns false
+// (depending on bounds_from_query parameter) if lb > ub.
 template <typename T>
-static bool check_BETWEEN(const T& v, const T& lb, const T& ub) {
+static bool check_BETWEEN(const T& v, const T& lb, const T& ub, bool bounds_from_query) {
    if (cmp_lt()(ub, lb)) {
-        throw api_error::validation(
-                        format("BETWEEN operator requires lower_bound <= upper_bound, but {} > {}", lb, ub));
+        if (bounds_from_query) {
+            throw api_error::validation(
+                format("BETWEEN operator requires lower_bound <= upper_bound, but {} > {}", lb, ub));
+        } else {
+            return false;
+        }
    }
    return cmp_ge()(v, lb) && cmp_le()(v, ub);
 }

-static bool check_BETWEEN(const rjson::value* v, const rjson::value& lb, const rjson::value& ub) {
-    if (!v) {
+static bool check_BETWEEN(const rjson::value* v, const rjson::value& lb, const rjson::value& ub,
+                          bool v_from_query, bool lb_from_query, bool ub_from_query) {
+    if ((v && v_from_query && !check_comparable_type(*v)) ||
+        (lb_from_query && !check_comparable_type(lb)) ||
+        (ub_from_query && !check_comparable_type(ub))) {
+        throw api_error::validation("between allow only the types String, Number, or Binary");
+
+    }
+    if (!v || !v->IsObject() || v->MemberCount() != 1 ||
+        !lb.IsObject() || lb.MemberCount() != 1 ||
+        !ub.IsObject() || ub.MemberCount() != 1) {
        return false;
    }
-    if (!v->IsObject() || v->MemberCount() != 1) {
-        throw api_error::validation(format("BETWEEN operator encountered malformed AttributeValue: {}", *v));
-    }
-    if (!lb.IsObject() || lb.MemberCount() != 1) {
-        throw api_error::validation(format("BETWEEN operator encountered malformed AttributeValue: {}", lb));
-    }
-    if (!ub.IsObject() || ub.MemberCount() != 1) {
-        throw api_error::validation(format("BETWEEN operator encountered malformed AttributeValue: {}", ub));
-    }

    const auto& kv_v = *v->MemberBegin();
    const auto& kv_lb = *lb.MemberBegin();
    const auto& kv_ub = *ub.MemberBegin();
+    bool bounds_from_query = lb_from_query && ub_from_query;
    if (kv_lb.name != kv_ub.name) {
-        throw api_error::validation(
+        if (bounds_from_query) {
+           throw api_error::validation(
                format("BETWEEN operator requires the same type for lower and upper bound; instead got {} and {}",
                       kv_lb.name, kv_ub.name));
+        } else {
+            return false;
+        }
    }
    if (kv_v.name != kv_lb.name) { // Cannot compare different types, so v is NOT between lb and ub.
        return false;
    }
    if (kv_v.name == "N") {
        const char* diag = "BETWEEN operator";
-        return check_BETWEEN(unwrap_number(*v, diag), unwrap_number(lb, diag), unwrap_number(ub, diag));
+        return check_BETWEEN(unwrap_number(*v, diag), unwrap_number(lb, diag), unwrap_number(ub, diag), bounds_from_query);
    }
    if (kv_v.name == "S") {
        return check_BETWEEN(std::string_view(kv_v.value.GetString(), kv_v.value.GetStringLength()),
                             std::string_view(kv_lb.value.GetString(), kv_lb.value.GetStringLength()),
-                             std::string_view(kv_ub.value.GetString(), kv_ub.value.GetStringLength()));
+                             std::string_view(kv_ub.value.GetString(), kv_ub.value.GetStringLength()),
+                             bounds_from_query);
    }
    if (kv_v.name == "B") {
-        return check_BETWEEN(base64_decode(kv_v.value), base64_decode(kv_lb.value), base64_decode(kv_ub.value));
+        return check_BETWEEN(base64_decode(kv_v.value), base64_decode(kv_lb.value), base64_decode(kv_ub.value), bounds_from_query);
    }
-    throw api_error::validation(
-        format("BETWEEN operator requires AttributeValueList elements to be of type String, Number, or Binary; instead got {}",
+    if (v_from_query) {
+        throw api_error::validation(
+            format("BETWEEN operator requires AttributeValueList elements to be of type String, Number, or Binary; instead got {}",
               kv_lb.name));
+    } else {
+        return false;
+    }
 }

 // Verify one Expect condition on one attribute (whose content is "got")
@@ -437,19 +531,19 @@ static bool verify_expected_one(const rjson::value& condition, const rjson::valu
            return check_NE(got, (*attribute_value_list)[0]);
        case comparison_operator_type::LT:
            verify_operand_count(attribute_value_list, exact_size(1), *comparison_operator);
-            return check_compare(got, (*attribute_value_list)[0], cmp_lt{});
+            return check_compare(got, (*attribute_value_list)[0], cmp_lt{}, false, true);
        case comparison_operator_type::LE:
            verify_operand_count(attribute_value_list, exact_size(1), *comparison_operator);
-            return check_compare(got, (*attribute_value_list)[0], cmp_le{});
+            return check_compare(got, (*attribute_value_list)[0], cmp_le{}, false, true);
        case comparison_operator_type::GT:
            verify_operand_count(attribute_value_list, exact_size(1), *comparison_operator);
-            return check_compare(got, (*attribute_value_list)[0], cmp_gt{});
+            return check_compare(got, (*attribute_value_list)[0], cmp_gt{}, false, true);
        case comparison_operator_type::GE:
            verify_operand_count(attribute_value_list, exact_size(1), *comparison_operator);
-            return check_compare(got, (*attribute_value_list)[0], cmp_ge{});
+            return check_compare(got, (*attribute_value_list)[0], cmp_ge{}, false, true);
        case comparison_operator_type::BEGINS_WITH:
            verify_operand_count(attribute_value_list, exact_size(1), *comparison_operator);
-            return check_BEGINS_WITH(got, (*attribute_value_list)[0]);
+            return check_BEGINS_WITH(got, (*attribute_value_list)[0], false, true);
        case comparison_operator_type::IN:
            verify_operand_count(attribute_value_list, nonempty(), *comparison_operator);
            return check_IN(got, *attribute_value_list);
@@ -461,7 +555,8 @@ static bool verify_expected_one(const rjson::value& condition, const rjson::valu
            return check_NOT_NULL(got);
        case comparison_operator_type::BETWEEN:
            verify_operand_count(attribute_value_list, exact_size(2), *comparison_operator);
-            return check_BETWEEN(got, (*attribute_value_list)[0], (*attribute_value_list)[1]);
+            return check_BETWEEN(got, (*attribute_value_list)[0], (*attribute_value_list)[1],
+                                 false, true, true);
        case comparison_operator_type::CONTAINS:
            {
                verify_operand_count(attribute_value_list, exact_size(1), *comparison_operator);
@@ -573,7 +668,8 @@ static bool calculate_primitive_condition(const parsed::primitive_condition& con
            // Shouldn't happen unless we have a bug in the parser
            throw std::logic_error(format("Wrong number of values {} in BETWEEN primitive_condition", cond._values.size()));
        }
-        return check_BETWEEN(&calculated_values[0], calculated_values[1], calculated_values[2]);
+        return check_BETWEEN(&calculated_values[0], calculated_values[1], calculated_values[2],
+                             cond._values[0].is_constant(), cond._values[1].is_constant(), cond._values[2].is_constant());
    case parsed::primitive_condition::type::IN:
        return check_IN(calculated_values);
    case parsed::primitive_condition::type::VALUE:
@@ -604,13 +700,17 @@ static bool calculate_primitive_condition(const parsed::primitive_condition& con
    case parsed::primitive_condition::type::NE:
        return check_NE(&calculated_values[0], calculated_values[1]);
    case parsed::primitive_condition::type::GT:
-        return check_compare(&calculated_values[0], calculated_values[1], cmp_gt{});
+        return check_compare(&calculated_values[0], calculated_values[1], cmp_gt{},
+            cond._values[0].is_constant(), cond._values[1].is_constant());
    case parsed::primitive_condition::type::GE:
-        return check_compare(&calculated_values[0], calculated_values[1], cmp_ge{});
+        return check_compare(&calculated_values[0], calculated_values[1], cmp_ge{},
+            cond._values[0].is_constant(), cond._values[1].is_constant());
    case parsed::primitive_condition::type::LT:
-        return check_compare(&calculated_values[0], calculated_values[1], cmp_lt{});
+        return check_compare(&calculated_values[0], calculated_values[1], cmp_lt{},
+            cond._values[0].is_constant(), cond._values[1].is_constant());
    case parsed::primitive_condition::type::LE:
-        return check_compare(&calculated_values[0], calculated_values[1], cmp_le{});
+        return check_compare(&calculated_values[0], calculated_values[1], cmp_le{},
+            cond._values[0].is_constant(), cond._values[1].is_constant());
    default:
        // Shouldn't happen unless we have a bug in the parser
        throw std::logic_error(format("Unknown type {} in primitive_condition object", (int)(cond._op)));
--- a/alternator/conditions.hh
+++ b/alternator/conditions.hh
@@ -52,6 +52,7 @@ bool verify_expected(const rjson::value& req, const rjson::value* previous_item)
 bool verify_condition(const rjson::value& condition, bool require_all, const rjson::value* previous_item);

 bool check_CONTAINS(const rjson::value* v1, const rjson::value& v2);
+bool check_BEGINS_WITH(const rjson::value* v1, const rjson::value& v2, bool v1_from_query, bool v2_from_query);

 bool verify_condition_expression(
        const parsed::condition_expression& condition_expression,
--- a/alternator/executor.cc
+++ b/alternator/executor.cc
@@ -404,7 +404,6 @@ future<executor::request_return_type> executor::describe_table(client_state& cli
    // returned.
    rjson::set(table_description, "TableStatus", "ACTIVE");
    rjson::set(table_description, "TableArn", generate_arn_for_table(*schema));
-    rjson::set(table_description, "TableId", rjson::from_string(schema->id().to_sstring()));
    // FIXME: Instead of hardcoding, we should take into account which mode was chosen
    // when the table was created. But, Spark jobs expect something to be returned
    // and PAY_PER_REQUEST seems closer to reality than PROVISIONED.
@@ -2245,19 +2244,30 @@ update_item_operation::apply(std::unique_ptr<rjson::value> previous_item, api::t
                    rjson::value v1 = calculate_value(base, calculate_value_caller::UpdateExpression, previous_item.get());
                    rjson::value v2 = calculate_value(addition, calculate_value_caller::UpdateExpression, previous_item.get());
                    rjson::value result;
-                    std::string v1_type = get_item_type_string(v1);
-                    if (v1_type == "N") {
-                        if (get_item_type_string(v2) != "N") {
-                            throw api_error::validation(format("Incorrect operand type for operator or function. Expected {}: {}", v1_type, rjson::print(v2)));
+                    // An ADD can be used to create a new attribute (when
+                    // v1.IsNull()) or to add to a pre-existing attribute:
+                    if (v1.IsNull()) {
+                        std::string v2_type = get_item_type_string(v2);
+                        if (v2_type == "N" || v2_type == "SS" || v2_type == "NS" || v2_type == "BS") {
+                            result = v2;
+                        } else {
+                            throw api_error::validation(format("An operand in the update expression has an incorrect data type: {}", v2));
                        }
-                        result = number_add(v1, v2);
-                    } else if (v1_type == "SS" || v1_type == "NS" || v1_type == "BS") {
-                        if (get_item_type_string(v2) != v1_type) {
-                            throw api_error::validation(format("Incorrect operand type for operator or function. Expected {}: {}", v1_type, rjson::print(v2)));
-                        }
-                        result = set_sum(v1, v2);
                    } else {
-                        throw api_error::validation(format("An operand in the update expression has an incorrect data type: {}", v1));
+                        std::string v1_type = get_item_type_string(v1);
+                        if (v1_type == "N") {
+                            if (get_item_type_string(v2) != "N") {
+                                throw api_error::validation(format("Incorrect operand type for operator or function. Expected {}: {}", v1_type, rjson::print(v2)));
+                            }
+                            result = number_add(v1, v2);
+                        } else if (v1_type == "SS" || v1_type == "NS" || v1_type == "BS") {
+                            if (get_item_type_string(v2) != v1_type) {
+                                throw api_error::validation(format("Incorrect operand type for operator or function. Expected {}: {}", v1_type, rjson::print(v2)));
+                            }
+                            result = set_sum(v1, v2);
+                        } else {
+                            throw api_error::validation(format("An operand in the update expression has an incorrect data type: {}", v1));
+                        }
                    }
                    do_update(to_bytes(column_name), result);
                },
@@ -2604,9 +2614,6 @@ filter::filter(const rjson::value& request, request_type rt,
        if (expression->GetStringLength() == 0) {
            throw api_error::validation("FilterExpression must not be empty");
        }
-        if (rjson::find(request, "AttributesToGet")) {
-            throw api_error::validation("Cannot use both old-style and new-style parameters in same request: FilterExpression and AttributesToGet");
-        }
        try {
            // FIXME: make parse_condition_expression take string_view, get
            // rid of the silly conversion to std::string.
@@ -2622,9 +2629,6 @@ filter::filter(const rjson::value& request, request_type rt,
        }
    }
    if (conditions) {
-        if (rjson::find(request, "ProjectionExpression")) {
-            throw api_error::validation(format("Cannot use both old-style and new-style parameters in same request: {} and ProjectionExpression", conditions_attribute));
-        }
        bool require_all = conditional_operator != conditional_operator_type::OR;
        _imp = conditions_filter { require_all, rjson::copy(*conditions) };
    }
@@ -2788,7 +2792,7 @@ static rjson::value encode_paging_state(const schema& schema, const service::pag
    for (const column_definition& cdef : schema.partition_key_columns()) {
        rjson::set_with_string_name(last_evaluated_key, std::string_view(cdef.name_as_text()), rjson::empty_object());
        rjson::value& key_entry = last_evaluated_key[cdef.name_as_text()];
-        rjson::set_with_string_name(key_entry, type_to_string(cdef.type), rjson::parse(to_json_string(*cdef.type, *exploded_pk_it)));
+        rjson::set_with_string_name(key_entry, type_to_string(cdef.type), json_key_column_value(*exploded_pk_it, cdef));
        ++exploded_pk_it;
    }
    auto ck = paging_state.get_clustering_key();
@@ -2798,7 +2802,7 @@ static rjson::value encode_paging_state(const schema& schema, const service::pag
        for (const column_definition& cdef : schema.clustering_key_columns()) {
            rjson::set_with_string_name(last_evaluated_key, std::string_view(cdef.name_as_text()), rjson::empty_object());
            rjson::value& key_entry = last_evaluated_key[cdef.name_as_text()];
-            rjson::set_with_string_name(key_entry, type_to_string(cdef.type), rjson::parse(to_json_string(*cdef.type, *exploded_ck_it)));
+            rjson::set_with_string_name(key_entry, type_to_string(cdef.type), json_key_column_value(*exploded_ck_it, cdef));
            ++exploded_ck_it;
        }
    }
@@ -2845,7 +2849,7 @@ static future<executor::request_return_type> do_query(service::storage_proxy& pr
    auto query_state_ptr = std::make_unique<service::query_state>(client_state, trace_state, std::move(permit));

    command->slice.options.set<query::partition_slice::option::allow_short_read>();
-    auto query_options = std::make_unique<cql3::query_options>(cl, std::vector<cql3::raw_value>{});
+    auto query_options = std::make_unique<cql3::query_options>(cl, infinite_timeout_config, std::vector<cql3::raw_value>{});
    query_options = std::make_unique<cql3::query_options>(std::move(query_options), std::move(paging_state));
    auto p = service::pager::query_pagers::pager(schema, selection, *query_state_ptr, *query_options, command, std::move(partition_ranges), nullptr);

--- a/alternator/expressions.cc
+++ b/alternator/expressions.cc
@@ -603,52 +603,8 @@ std::unordered_map<std::string_view, function_handler_type*> function_handlers {
            }
            rjson::value v1 = calculate_value(f._parameters[0], caller, previous_item);
            rjson::value v2 = calculate_value(f._parameters[1], caller, previous_item);
-            // TODO: There's duplication here with check_BEGINS_WITH().
-            // But unfortunately, the two functions differ a bit.
-
-            // If one of v1 or v2 is malformed or has an unsupported type
-            // (not B or S), what we do depends on whether it came from
-            // the user's query (is_constant()), or the item. Unsupported
-            // values in the query result in an error, but if they are in
-            // the item, we silently return false (no match).
-            bool bad = false;
-            if (!v1.IsObject() || v1.MemberCount() != 1) {
-                bad = true;
-                if (f._parameters[0].is_constant()) {
-                    throw api_error::validation(format("{}: begins_with() encountered malformed AttributeValue: {}", caller, v1));
-                }
-            } else if (v1.MemberBegin()->name != "S" && v1.MemberBegin()->name != "B") {
-                bad = true;
-                if (f._parameters[0].is_constant()) {
-                    throw api_error::validation(format("{}: begins_with() supports only string or binary in AttributeValue: {}", caller, v1));
-                }
-            }
-            if (!v2.IsObject() || v2.MemberCount() != 1) {
-                bad = true;
-                if (f._parameters[1].is_constant()) {
-                    throw api_error::validation(format("{}: begins_with() encountered malformed AttributeValue: {}", caller, v2));
-                }
-            } else if (v2.MemberBegin()->name != "S" && v2.MemberBegin()->name != "B") {
-                bad = true;
-                if (f._parameters[1].is_constant()) {
-                    throw api_error::validation(format("{}: begins_with() supports only string or binary in AttributeValue: {}", caller, v2));
-                }
-            }
-            bool ret = false;
-            if (!bad) {
-                auto it1 = v1.MemberBegin();
-                auto it2 = v2.MemberBegin();
-                if (it1->name == it2->name) {
-                    if (it2->name == "S") {
-                        std::string_view val1 = rjson::to_string_view(it1->value);
-                        std::string_view val2 = rjson::to_string_view(it2->value);
-                        ret = val1.starts_with(val2);
-                    } else /* it2->name == "B" */ {
-                        ret = base64_begins_with(rjson::to_string_view(it1->value), rjson::to_string_view(it2->value));
-                    }
-                }
-            }
-            return to_bool_json(ret);
+            return to_bool_json(check_BEGINS_WITH(v1.IsNull() ? nullptr : &v1,  v2,
+                                    f._parameters[0].is_constant(), f._parameters[1].is_constant()));
        }
    },
    {"contains", [] (calculate_value_caller caller, const rjson::value* previous_item, const parsed::value::function_call& f) {
--- a/alternator/server.cc
+++ b/alternator/server.cc
@@ -243,8 +243,8 @@ future<> server::verify_signature(const request& req) {
        }
    }

-    auto cache_getter = [&qp = _qp] (std::string username) {
-        return get_key_from_roles(qp, std::move(username));
+    auto cache_getter = [] (std::string username) {
+        return get_key_from_roles(cql3::get_query_processor().local(), std::move(username));
    };
    return _key_cache.get_ptr(user, cache_getter).then([this, &req,
                                                    user = std::move(user),
@@ -328,11 +328,10 @@ void server::set_routes(routes& r) {
 //FIXME: A way to immediately invalidate the cache should be considered,
 // e.g. when the system table which stores the keys is changed.
 // For now, this propagation may take up to 1 minute.
-server::server(executor& exec, cql3::query_processor& qp)
+server::server(executor& exec)
        : _http_server("http-alternator")
        , _https_server("https-alternator")
        , _executor(exec)
-        , _qp(qp)
        , _key_cache(1024, 1min, slogger)
        , _enforce_authorization(false)
        , _enabled_servers{}
--- a/alternator/server.hh
+++ b/alternator/server.hh
@@ -41,7 +41,6 @@ class server {
    http_server _http_server;
    http_server _https_server;
    executor& _executor;
-    cql3::query_processor& _qp;

    key_cache _key_cache;
    bool _enforce_authorization;
@@ -69,7 +68,7 @@ class server {
    json_parser _json_parser;

 public:
-    server(executor& executor, cql3::query_processor& qp);
+    server(executor& executor);

    future<> init(net::inet_address addr, std::optional<uint16_t> port, std::optional<uint16_t> https_port, std::optional<tls::credentials_builder> creds,
            bool enforce_authorization, semaphore* memory_limiter);
--- a/alternator/streams.cc
+++ b/alternator/streams.cc
@@ -290,9 +290,7 @@ struct sequence_number {
 sequence_number::sequence_number(std::string_view v) 
    : uuid([&] {
        using namespace boost::multiprecision;
-        // workaround for weird clang 10 bug when calling constructor with
-        // view directly.
-        uint128_t tmp{std::string(v)};
+        uint128_t tmp{v};
        // see above
        return utils::UUID_gen::get_time_UUID_raw(uint64_t(tmp >> 64), uint64_t(tmp & std::numeric_limits<uint64_t>::max()));
    }())
@@ -477,8 +475,6 @@ future<executor::request_return_type> executor::describe_stream(client_state& cl
            status = "ENABLED";
        }
    } 
-
-    auto ttl = std::chrono::seconds(opts.ttl());
    
    rjson::set(stream_desc, "StreamStatus", rjson::from_string(status));

@@ -498,14 +494,14 @@ future<executor::request_return_type> executor::describe_stream(client_state& cl
    // TODO: label
    // TODO: creation time

-    auto normal_token_owners = _proxy.get_token_metadata_ptr()->count_normal_token_owners();
+    const auto& tm = _proxy.get_token_metadata();
    // cannot really "resume" query, must iterate all data. because we cannot query neither "time" (pk) > something,
    // or on expired...
    // TODO: maybe add secondary index to topology table to enable this?
-    return _sdks.cdc_get_versioned_streams({ normal_token_owners }).then([this, &db, schema, shard_start, limit, ret = std::move(ret), stream_desc = std::move(stream_desc), ttl](std::map<db_clock::time_point, cdc::streams_version> topologies) mutable {
+    return _sdks.cdc_get_versioned_streams({ tm.count_normal_token_owners() }).then([this, &db, schema, shard_start, limit, ret = std::move(ret), stream_desc = std::move(stream_desc)](std::map<db_clock::time_point, cdc::streams_version> topologies) mutable {

-        // filter out cdc generations older than the table or now() - cdc::ttl (typically dynamodb_streams_max_window - 24h)
-        auto low_ts = std::max(as_timepoint(schema->id()), db_clock::now() - ttl);
+        // filter out cdc generations older than the table or now() - dynamodb_streams_max_window (24h)
+        auto low_ts = std::max(as_timepoint(schema->id()), db_clock::now() - dynamodb_streams_max_window);

        auto i = topologies.lower_bound(low_ts);
        // need first gen _intersecting_ the timestamp.
@@ -887,17 +883,8 @@ future<executor::request_return_type> executor::get_records(client_state& client
    auto partition_slice = query::partition_slice(
        std::move(bounds)
        , {}, std::move(regular_columns), selection->get_query_options());
-
-	auto& opts = base->cdc_options();
-	auto mul = 2; // key-only, allow for delete + insert
-    if (opts.preimage()) {
-        ++mul;
-    }
-    if (opts.postimage()) {
-        ++mul;
-    }
    auto command = ::make_lw_shared<query::read_command>(schema->id(), schema->version(), partition_slice, _proxy.get_max_result_size(partition_slice),
-            query::row_limit(limit * mul));
+            query::row_limit(limit * 4));

    return _proxy.query(schema, std::move(command), std::move(partition_ranges), cl, service::storage_proxy::coordinator_query_options(default_timeout(), std::move(permit), client_state)).then(
            [this, schema, partition_slice = std::move(partition_slice), selection = std::move(selection), start_time = std::move(start_time), limit, key_names = std::move(key_names), attr_names = std::move(attr_names), type, iter, high_ts] (service::storage_proxy::coordinator_query_result qr) mutable {       
--- a/api/api-doc/storage_proxy.json
+++ b/api/api-doc/storage_proxy.json
@@ -68,7 +68,7 @@
               "summary":"Get the hinted handoff enabled by dc",
               "type":"array",
               "items":{
-                  "type":"array"
+                  "type":"mapper_list"
               },
               "nickname":"get_hinted_handoff_enabled_by_dc",
               "produces":[
--- a/api/api_init.hh
+++ b/api/api_init.hh
@@ -24,7 +24,7 @@
 #include <seastar/http/httpd.hh>

 namespace service { class load_meter; }
-namespace locator { class shared_token_metadata; }
+namespace locator { class token_metadata; }
 namespace cql_transport { class controller; }
 class thrift_controller;
 namespace db { class snapshot_ctl; }
@@ -39,15 +39,13 @@ struct http_context {
    distributed<database>& db;
    distributed<service::storage_proxy>& sp;
    service::load_meter& lmeter;
-    const sharded<locator::shared_token_metadata>& shared_token_metadata;
+    const sharded<locator::token_metadata>& token_metadata;

    http_context(distributed<database>& _db,
            distributed<service::storage_proxy>& _sp,
-            service::load_meter& _lm, const sharded<locator::shared_token_metadata>& _stm)
-            : db(_db), sp(_sp), lmeter(_lm), shared_token_metadata(_stm) {
+            service::load_meter& _lm, const sharded<locator::token_metadata>& _tm)
+            : db(_db), sp(_sp), lmeter(_lm), token_metadata(_tm) {
    }
-
-    const locator::token_metadata& get_token_metadata();
 };

 future<> set_server_init(http_context& ctx);
--- a/api/column_family.cc
+++ b/api/column_family.cc
@@ -331,15 +331,15 @@ void set_column_family(http_context& ctx, routes& r) {
    });

    cf::get_memtable_columns_count.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return map_reduce_cf(ctx, req->param["name"], 0, [](column_family& cf) {
+        return map_reduce_cf(ctx, req->param["name"], uint64_t{0}, [](column_family& cf) {
            return cf.active_memtable().partition_count();
-        }, std::plus<int>());
+        }, std::plus<>());
    });

    cf::get_all_memtable_columns_count.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return map_reduce_cf(ctx, 0, [](column_family& cf) {
+        return map_reduce_cf(ctx, uint64_t{0}, [](column_family& cf) {
            return cf.active_memtable().partition_count();
-        }, std::plus<int>());
+        }, std::plus<>());
    });

    cf::get_memtable_on_heap_size.set(r, [] (const_req req) {
@@ -656,7 +656,7 @@ void set_column_family(http_context& ctx, routes& r) {
    cf::get_bloom_filter_disk_space_used.set(r, [&ctx] (std::unique_ptr<request> req) {
        return map_reduce_cf(ctx, req->param["name"], uint64_t(0), [] (column_family& cf) {
            return std::accumulate(cf.get_sstables()->begin(), cf.get_sstables()->end(), uint64_t(0), [](uint64_t s, auto& sst) {
-                return sst->filter_size();
+                return s + sst->filter_size();
            });
        }, std::plus<uint64_t>());
    });
@@ -664,7 +664,7 @@ void set_column_family(http_context& ctx, routes& r) {
    cf::get_all_bloom_filter_disk_space_used.set(r, [&ctx] (std::unique_ptr<request> req) {
        return map_reduce_cf(ctx, uint64_t(0), [] (column_family& cf) {
            return std::accumulate(cf.get_sstables()->begin(), cf.get_sstables()->end(), uint64_t(0), [](uint64_t s, auto& sst) {
-                return sst->filter_size();
+                return s + sst->filter_size();
            });
        }, std::plus<uint64_t>());
    });
@@ -672,7 +672,7 @@ void set_column_family(http_context& ctx, routes& r) {
    cf::get_bloom_filter_off_heap_memory_used.set(r, [&ctx] (std::unique_ptr<request> req) {
        return map_reduce_cf(ctx, req->param["name"], uint64_t(0), [] (column_family& cf) {
            return std::accumulate(cf.get_sstables()->begin(), cf.get_sstables()->end(), uint64_t(0), [](uint64_t s, auto& sst) {
-                return sst->filter_memory_size();
+                return s + sst->filter_memory_size();
            });
        }, std::plus<uint64_t>());
    });
@@ -680,7 +680,7 @@ void set_column_family(http_context& ctx, routes& r) {
    cf::get_all_bloom_filter_off_heap_memory_used.set(r, [&ctx] (std::unique_ptr<request> req) {
        return map_reduce_cf(ctx, uint64_t(0), [] (column_family& cf) {
            return std::accumulate(cf.get_sstables()->begin(), cf.get_sstables()->end(), uint64_t(0), [](uint64_t s, auto& sst) {
-                return sst->filter_memory_size();
+                return s + sst->filter_memory_size();
            });
        }, std::plus<uint64_t>());
    });
@@ -688,7 +688,7 @@ void set_column_family(http_context& ctx, routes& r) {
    cf::get_index_summary_off_heap_memory_used.set(r, [&ctx] (std::unique_ptr<request> req) {
        return map_reduce_cf(ctx, req->param["name"], uint64_t(0), [] (column_family& cf) {
            return std::accumulate(cf.get_sstables()->begin(), cf.get_sstables()->end(), uint64_t(0), [](uint64_t s, auto& sst) {
-                return sst->get_summary().memory_footprint();
+                return s + sst->get_summary().memory_footprint();
            });
        }, std::plus<uint64_t>());
    });
@@ -696,7 +696,7 @@ void set_column_family(http_context& ctx, routes& r) {
    cf::get_all_index_summary_off_heap_memory_used.set(r, [&ctx] (std::unique_ptr<request> req) {
        return map_reduce_cf(ctx, uint64_t(0), [] (column_family& cf) {
            return std::accumulate(cf.get_sstables()->begin(), cf.get_sstables()->end(), uint64_t(0), [](uint64_t s, auto& sst) {
-                return sst->get_summary().memory_footprint();
+                return s + sst->get_summary().memory_footprint();
            });
        }, std::plus<uint64_t>());
    });
--- a/api/storage_proxy.cc
+++ b/api/storage_proxy.cc
@@ -201,39 +201,29 @@ void set_storage_proxy(http_context& ctx, routes& r) {
    });

    sp::get_hinted_handoff_enabled.set(r, [&ctx](std::unique_ptr<request> req)  {
-        const auto& filter = service::get_storage_proxy().local().get_hints_host_filter();
-        return make_ready_future<json::json_return_type>(!filter.is_disabled_for_all());
+        auto enabled = ctx.db.local().get_config().hinted_handoff_enabled();
+        return make_ready_future<json::json_return_type>(enabled);
    });

    sp::set_hinted_handoff_enabled.set(r, [](std::unique_ptr<request> req)  {
+        //TBD
+        unimplemented();
        auto enable = req->get_query_param("enable");
-        auto filter = (enable == "true" || enable == "1")
-                ? db::hints::host_filter(db::hints::host_filter::enabled_for_all_tag {})
-                : db::hints::host_filter(db::hints::host_filter::disabled_for_all_tag {});
-        return service::get_storage_proxy().invoke_on_all([filter = std::move(filter)] (service::storage_proxy& sp) {
-            return sp.change_hints_host_filter(filter);
-        }).then([] {
-            return make_ready_future<json::json_return_type>(json_void());
-        });
+        return make_ready_future<json::json_return_type>(json_void());
    });

    sp::get_hinted_handoff_enabled_by_dc.set(r, [](std::unique_ptr<request> req)  {
-        std::vector<sstring> res;
-        const auto& filter = service::get_storage_proxy().local().get_hints_host_filter();
-        const auto& dcs = filter.get_dcs();
-        res.reserve(res.size());
-        std::copy(dcs.begin(), dcs.end(), std::back_inserter(res));
+        //TBD
+        unimplemented();
+        std::vector<sp::mapper_list> res;
        return make_ready_future<json::json_return_type>(res);
    });

    sp::set_hinted_handoff_enabled_by_dc_list.set(r, [](std::unique_ptr<request> req)  {
-        auto dcs = req->get_query_param("dcs");
-        auto filter = db::hints::host_filter::parse_from_dc_list(std::move(dcs));
-        return service::get_storage_proxy().invoke_on_all([filter = std::move(filter)] (service::storage_proxy& sp) {
-            return sp.change_hints_host_filter(filter);
-        }).then([] {
-            return make_ready_future<json::json_return_type>(json_void());
-        });
+        //TBD
+        unimplemented();
+        auto enable = req->get_query_param("dcs");
+        return make_ready_future<json::json_return_type>(json_void());
    });

    sp::get_max_hint_window.set(r, [](std::unique_ptr<request> req)  {
--- a/api/storage_service.cc
+++ b/api/storage_service.cc
@@ -22,7 +22,6 @@
 #include "storage_service.hh"
 #include "api/api-doc/storage_service.json.hh"
 #include "db/config.hh"
-#include "db/schema_tables.hh"
 #include <optional>
 #include <time.h>
 #include <boost/range/adaptor/map.hpp>
@@ -45,14 +44,9 @@
 #include "db/snapshot-ctl.hh"
 #include "transport/controller.hh"
 #include "thrift/controller.hh"
-#include "locator/token_metadata.hh"

 namespace api {

-const locator::token_metadata& http_context::get_token_metadata() {
-        return *shared_token_metadata.local().get();
-}
-
 namespace ss = httpd::storage_service_json;
 using namespace json;

@@ -262,14 +256,14 @@ void set_storage_service(http_context& ctx, routes& r) {
    });

    ss::get_tokens.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return make_ready_future<json::json_return_type>(stream_range_as_array(ctx.get_token_metadata().sorted_tokens(), [](const dht::token& i) {
+        return make_ready_future<json::json_return_type>(stream_range_as_array(ctx.token_metadata.local().sorted_tokens(), [](const dht::token& i) {
           return boost::lexical_cast<std::string>(i);
        }));
    });

    ss::get_node_tokens.set(r, [&ctx] (std::unique_ptr<request> req) {
        gms::inet_address addr(req->param["endpoint"]);
-        return make_ready_future<json::json_return_type>(stream_range_as_array(ctx.get_token_metadata().get_tokens(addr), [](const dht::token& i) {
+        return make_ready_future<json::json_return_type>(stream_range_as_array(ctx.token_metadata.local().get_tokens(addr), [](const dht::token& i) {
           return boost::lexical_cast<std::string>(i);
       }));
    });
@@ -288,7 +282,7 @@ void set_storage_service(http_context& ctx, routes& r) {
    });

    ss::get_leaving_nodes.set(r, [&ctx](const_req req) {
-        return container_to_vec(ctx.get_token_metadata().get_leaving_endpoints());
+        return container_to_vec(ctx.token_metadata.local().get_leaving_endpoints());
    });

    ss::get_moving_nodes.set(r, [](const_req req) {
@@ -297,7 +291,7 @@ void set_storage_service(http_context& ctx, routes& r) {
    });

    ss::get_joining_nodes.set(r, [&ctx](const_req req) {
-        auto points = ctx.get_token_metadata().get_bootstrap_tokens();
+        auto points = ctx.token_metadata.local().get_bootstrap_tokens();
        std::unordered_set<sstring> addr;
        for (auto i: points) {
            addr.insert(boost::lexical_cast<std::string>(i.second));
@@ -366,7 +360,7 @@ void set_storage_service(http_context& ctx, routes& r) {

    ss::get_host_id_map.set(r, [&ctx](const_req req) {
        std::vector<ss::mapper> res;
-        return map_to_key_value(ctx.get_token_metadata().get_endpoint_to_host_id_map_for_reading(), res);
+        return map_to_key_value(ctx.token_metadata.local().get_endpoint_to_host_id_map_for_reading(), res);
    });

    ss::get_load.set(r, [&ctx](std::unique_ptr<request> req) {
@@ -738,12 +732,9 @@ void set_storage_service(http_context& ctx, routes& r) {
    });

    ss::reset_local_schema.set(r, [](std::unique_ptr<request> req) {
-        // FIXME: We should truncate schema tables if more than one node in the cluster.
-        auto& sp = service::get_storage_proxy();
-        auto& fs = service::get_local_storage_service().features();
-        return db::schema_tables::recalculate_schema_version(sp, fs).then([] {
-            return make_ready_future<json::json_return_type>(json_void());
-        });
+        //TBD
+        unimplemented();
+        return make_ready_future<json::json_return_type>(json_void());
    });

    ss::set_trace_probability.set(r, [](std::unique_ptr<request> req) {
--- a/auth/common.cc
+++ b/auth/common.cc
@@ -108,7 +108,7 @@ future<> wait_for_schema_agreement(::service::migration_manager& mm, const datab
    });
 }

-::service::query_state& internal_distributed_query_state() noexcept {
+const timeout_config& internal_distributed_timeout_config() noexcept {
 #ifdef DEBUG
    // Give the much slower debug tests more headroom for completing auth queries.
    static const auto t = 30s;
@@ -116,9 +116,7 @@ future<> wait_for_schema_agreement(::service::migration_manager& mm, const datab
    static const auto t = 5s;
 #endif
    static const timeout_config tc{t, t, t, t, t, t, t};
-    static thread_local ::service::client_state cs(::service::client_state::internal_tag{}, tc);
-    static thread_local ::service::query_state qs(cs, empty_service_permit());
-    return qs;
+    return tc;
 }

 }
--- a/auth/common.hh
+++ b/auth/common.hh
@@ -35,7 +35,6 @@
 #include "log.hh"
 #include "seastarx.hh"
 #include "utils/exponential_backoff_retry.hh"
-#include "service/query_state.hh"

 using namespace std::chrono_literals;

@@ -88,6 +87,6 @@ future<> wait_for_schema_agreement(::service::migration_manager&, const database
 ///
 /// Time-outs for internal, non-local CQL queries.
 ///
-::service::query_state& internal_distributed_query_state() noexcept;
+const timeout_config& internal_distributed_timeout_config() noexcept;

 }
--- a/auth/default_authorizer.cc
+++ b/auth/default_authorizer.cc
@@ -103,6 +103,7 @@ future<bool> default_authorizer::any_granted() const {
    return _qp.execute_internal(
            query,
            db::consistency_level::LOCAL_ONE,
+            infinite_timeout_config,
            {},
            true).then([this](::shared_ptr<cql3::untyped_result_set> results) {
        return !results->empty();
@@ -115,7 +116,8 @@ future<> default_authorizer::migrate_legacy_metadata() const {

    return _qp.execute_internal(
            query,
-            db::consistency_level::LOCAL_ONE).then([this](::shared_ptr<cql3::untyped_result_set> results) {
+            db::consistency_level::LOCAL_ONE,
+            infinite_timeout_config).then([this](::shared_ptr<cql3::untyped_result_set> results) {
        return do_for_each(*results, [this](const cql3::untyped_result_set_row& row) {
            return do_with(
                    row.get_as<sstring>("username"),
@@ -195,6 +197,7 @@ default_authorizer::authorize(const role_or_anonymous& maybe_role, const resourc
    return _qp.execute_internal(
            query,
            db::consistency_level::LOCAL_ONE,
+            infinite_timeout_config,
            {*maybe_role.name, r.name()}).then([](::shared_ptr<cql3::untyped_result_set> results) {
        if (results->empty()) {
            return permissions::NONE;
@@ -223,7 +226,7 @@ default_authorizer::modify(
        return _qp.execute_internal(
                query,
                db::consistency_level::ONE,
-                internal_distributed_query_state(),
+                internal_distributed_timeout_config(),
                {permissions::to_strings(set), sstring(role_name), resource.name()}).discard_result();
    });
 }
@@ -248,7 +251,7 @@ future<std::vector<permission_details>> default_authorizer::list_all() const {
    return _qp.execute_internal(
            query,
            db::consistency_level::ONE,
-            internal_distributed_query_state(),
+            internal_distributed_timeout_config(),
            {},
            true).then([](::shared_ptr<cql3::untyped_result_set> results) {
        std::vector<permission_details> all_details;
@@ -275,7 +278,7 @@ future<> default_authorizer::revoke_all(std::string_view role_name) const {
    return _qp.execute_internal(
            query,
            db::consistency_level::ONE,
-            internal_distributed_query_state(),
+            internal_distributed_timeout_config(),
            {sstring(role_name)}).discard_result().handle_exception([role_name](auto ep) {
        try {
            std::rethrow_exception(ep);
@@ -295,6 +298,7 @@ future<> default_authorizer::revoke_all(const resource& resource) const {
    return _qp.execute_internal(
            query,
            db::consistency_level::LOCAL_ONE,
+            infinite_timeout_config,
            {resource.name()}).then_wrapped([this, resource](future<::shared_ptr<cql3::untyped_result_set>> f) {
        try {
            auto res = f.get0();
@@ -311,6 +315,7 @@ future<> default_authorizer::revoke_all(const resource& resource) const {
                return _qp.execute_internal(
                        query,
                        db::consistency_level::LOCAL_ONE,
+                        infinite_timeout_config,
                        {r.get_as<sstring>(ROLE_NAME), resource.name()}).discard_result().handle_exception(
                                [resource](auto ep) {
                    try {
--- a/auth/password_authenticator.cc
+++ b/auth/password_authenticator.cc
@@ -66,7 +66,6 @@ constexpr std::string_view password_authenticator_name("org.apache.cassandra.aut

 // name of the hash column.
 static constexpr std::string_view SALTED_HASH = "salted_hash";
-static constexpr std::string_view OPTIONS = "options";
 static constexpr std::string_view DEFAULT_USER_NAME = meta::DEFAULT_SUPERUSER_NAME;
 static const sstring DEFAULT_USER_PASSWORD = sstring(meta::DEFAULT_SUPERUSER_NAME);

@@ -115,7 +114,7 @@ future<> password_authenticator::migrate_legacy_metadata() const {
    return _qp.execute_internal(
            query,
            db::consistency_level::QUORUM,
-            internal_distributed_query_state()).then([this](::shared_ptr<cql3::untyped_result_set> results) {
+            internal_distributed_timeout_config()).then([this](::shared_ptr<cql3::untyped_result_set> results) {
        return do_for_each(*results, [this](const cql3::untyped_result_set_row& row) {
            auto username = row.get_as<sstring>("username");
            auto salted_hash = row.get_as<sstring>(SALTED_HASH);
@@ -123,7 +122,7 @@ future<> password_authenticator::migrate_legacy_metadata() const {
            return _qp.execute_internal(
                    update_row_query(),
                    consistency_for_user(username),
-                    internal_distributed_query_state(),
+                    internal_distributed_timeout_config(),
                    {std::move(salted_hash), username}).discard_result();
        }).finally([results] {});
    }).then([] {
@@ -140,7 +139,7 @@ future<> password_authenticator::create_default_if_missing() const {
            return _qp.execute_internal(
                    update_row_query(),
                    db::consistency_level::QUORUM,
-                    internal_distributed_query_state(),
+                    internal_distributed_timeout_config(),
                    {passwords::hash(DEFAULT_USER_PASSWORD, rng_for_salt), DEFAULT_USER_NAME}).then([](auto&&) {
                plogger.info("Created default superuser authentication record.");
            });
@@ -204,11 +203,11 @@ bool password_authenticator::require_authentication() const {
 }

 authentication_option_set password_authenticator::supported_options() const {
-    return authentication_option_set{authentication_option::password, authentication_option::options};
+    return authentication_option_set{authentication_option::password};
 }

 authentication_option_set password_authenticator::alterable_options() const {
-    return authentication_option_set{authentication_option::password, authentication_option::options};
+    return authentication_option_set{authentication_option::password};
 }

 future<authenticated_user> password_authenticator::authenticate(
@@ -237,7 +236,7 @@ future<authenticated_user> password_authenticator::authenticate(
        return _qp.execute_internal(
                query,
                consistency_for_user(username),
-                internal_distributed_query_state(),
+                internal_distributed_timeout_config(),
                {username},
                true);
    }).then_wrapped([=](future<::shared_ptr<cql3::untyped_result_set>> f) {
@@ -263,46 +262,21 @@ future<authenticated_user> password_authenticator::authenticate(
    });
 }

-future<> password_authenticator::maybe_update_custom_options(std::string_view role_name, const authentication_options& options) const {
-    static const sstring query = format("UPDATE {} SET {} = ? WHERE {} = ?",
-            meta::roles_table::qualified_name,
-            OPTIONS,
-            meta::roles_table::role_col_name);
-
-    if (!options.options) {
-        return make_ready_future<>();
-    }
-
-    std::vector<std::pair<data_value, data_value>> entries;
-    for (const auto& entry : *options.options) {
-        entries.push_back({data_value(entry.first), data_value(entry.second)});
-    }
-    auto map_value = make_map_value(map_type_impl::get_instance(utf8_type, utf8_type, false), entries);
-
-    return _qp.execute_internal(
-            query,
-            consistency_for_user(role_name),
-            internal_distributed_query_state(),
-            {std::move(map_value), sstring(role_name)}).discard_result();
-}
-
 future<> password_authenticator::create(std::string_view role_name, const authentication_options& options) const {
    if (!options.password) {
-        return maybe_update_custom_options(role_name, options);
+        return make_ready_future<>();
    }

    return _qp.execute_internal(
            update_row_query(),
            consistency_for_user(role_name),
-            internal_distributed_query_state(),
-            {passwords::hash(*options.password, rng_for_salt), sstring(role_name)}).discard_result().then([this, role_name, &options] {
-                return maybe_update_custom_options(role_name, options);
-            });
+            internal_distributed_timeout_config(),
+            {passwords::hash(*options.password, rng_for_salt), sstring(role_name)}).discard_result();
 }

 future<> password_authenticator::alter(std::string_view role_name, const authentication_options& options) const {
    if (!options.password) {
-        return maybe_update_custom_options(role_name, options);
+        return make_ready_future<>();
    }

    static const sstring query = format("UPDATE {} SET {} = ? WHERE {} = ?",
@@ -313,10 +287,8 @@ future<> password_authenticator::alter(std::string_view role_name, const authent
    return _qp.execute_internal(
            query,
            consistency_for_user(role_name),
-            internal_distributed_query_state(),
-            {passwords::hash(*options.password, rng_for_salt), sstring(role_name)}).discard_result().then([this, role_name, &options] {
-                return maybe_update_custom_options(role_name, options);
-            }).discard_result();
+            internal_distributed_timeout_config(),
+            {passwords::hash(*options.password, rng_for_salt), sstring(role_name)}).discard_result();
 }

 future<> password_authenticator::drop(std::string_view name) const {
@@ -327,27 +299,12 @@ future<> password_authenticator::drop(std::string_view name) const {

    return _qp.execute_internal(
            query, consistency_for_user(name),
-            internal_distributed_query_state(),
+            internal_distributed_timeout_config(),
            {sstring(name)}).discard_result();
 }

 future<custom_options> password_authenticator::query_custom_options(std::string_view role_name) const {
-    static const sstring query = format("SELECT {} FROM {} WHERE {} = ?",
-            OPTIONS,
-            meta::roles_table::qualified_name,
-            meta::roles_table::role_col_name);
-
-    return _qp.execute_internal(
-            query, consistency_for_user(role_name),
-            internal_distributed_query_state(),
-            {sstring(role_name)}).then([](::shared_ptr<cql3::untyped_result_set> rs) {
-        custom_options opts;
-        const auto& row = rs->one();
-        if (row.has(OPTIONS)) {
-            row.get_map_data<sstring, sstring>(OPTIONS, std::inserter(opts, opts.end()), utf8_type, utf8_type);
-        }
-        return opts;
-    });
+    return make_ready_future<custom_options>();
 }

 const resource_set& password_authenticator::protected_resources() const {
--- a/auth/password_authenticator.hh
+++ b/auth/password_authenticator.hh
@@ -94,8 +94,6 @@ public:
    virtual ::shared_ptr<sasl_challenge> new_sasl_challenge() const override;

 private:
-    future<> maybe_update_custom_options(std::string_view role_name, const authentication_options& options) const;
-
    bool legacy_metadata_exists() const;

    future<> migrate_legacy_metadata() const;
--- a/auth/roles-metadata.cc
+++ b/auth/roles-metadata.cc
@@ -43,8 +43,7 @@ std::string_view creation_query() {
            "  can_login boolean,"
            "  is_superuser boolean,"
            "  member_of set<text>,"
-            "  salted_hash text,"
-            "  options frozen<map<text, text>>,"
+            "  salted_hash text"
            ")",
            qualified_name,
            role_col_name);
@@ -69,13 +68,14 @@ future<bool> default_role_row_satisfies(
        return qp.execute_internal(
                query,
                db::consistency_level::ONE,
+                infinite_timeout_config,
                {meta::DEFAULT_SUPERUSER_NAME},
                true).then([&qp, &p](::shared_ptr<cql3::untyped_result_set> results) {
            if (results->empty()) {
                return qp.execute_internal(
                        query,
                        db::consistency_level::QUORUM,
-                        internal_distributed_query_state(),
+                        internal_distributed_timeout_config(),
                        {meta::DEFAULT_SUPERUSER_NAME},
                        true).then([&p](::shared_ptr<cql3::untyped_result_set> results) {
                    if (results->empty()) {
@@ -100,7 +100,7 @@ future<bool> any_nondefault_role_row_satisfies(
        return qp.execute_internal(
                query,
                db::consistency_level::QUORUM,
-                internal_distributed_query_state()).then([&p](::shared_ptr<cql3::untyped_result_set> results) {
+                internal_distributed_timeout_config()).then([&p](::shared_ptr<cql3::untyped_result_set> results) {
            if (results->empty()) {
                return false;
            }
--- a/auth/service.cc
+++ b/auth/service.cc
@@ -210,6 +210,7 @@ future<bool> service::has_existing_legacy_users() const {
    return _qp.execute_internal(
            default_user_query,
            db::consistency_level::ONE,
+            infinite_timeout_config,
            {meta::DEFAULT_SUPERUSER_NAME},
            true).then([this](auto results) {
        if (!results->empty()) {
@@ -219,6 +220,7 @@ future<bool> service::has_existing_legacy_users() const {
        return _qp.execute_internal(
                default_user_query,
                db::consistency_level::QUORUM,
+                infinite_timeout_config,
                {meta::DEFAULT_SUPERUSER_NAME},
                true).then([this](auto results) {
            if (!results->empty()) {
@@ -227,7 +229,8 @@ future<bool> service::has_existing_legacy_users() const {

            return _qp.execute_internal(
                    all_users_query,
-                    db::consistency_level::QUORUM).then([](auto results) {
+                    db::consistency_level::QUORUM,
+                    infinite_timeout_config).then([](auto results) {
                return make_ready_future<bool>(!results->empty());
            });
        });
@@ -368,13 +371,10 @@ bool is_enforcing(const service& ser)  {
    return enforcing_authorizer || enforcing_authenticator;
 }

-bool is_protected(const service& ser, command_desc cmd) noexcept {
-    if (cmd.type_ == command_desc::type::ALTER_WITH_OPTS) {
-        return false; // Table attributes are OK to modify; see #7057.
-    }
-    return ser.underlying_role_manager().protected_resources().contains(cmd.resource)
-            || ser.underlying_authenticator().protected_resources().contains(cmd.resource)
-            || ser.underlying_authorizer().protected_resources().contains(cmd.resource);
+bool is_protected(const service& ser, const resource& r) noexcept {
+    return ser.underlying_role_manager().protected_resources().contains(r)
+            || ser.underlying_authenticator().protected_resources().contains(r)
+            || ser.underlying_authorizer().protected_resources().contains(r);
 }

 static void validate_authentication_options_are_supported(
--- a/auth/service.hh
+++ b/auth/service.hh
@@ -181,21 +181,10 @@ future<permission_set> get_permissions(const service&, const authenticated_user&
 ///
 bool is_enforcing(const service&);

-/// A description of a CQL command from which auth::service can tell whether or not this command could endanger
-/// internal data on which auth::service depends.
-struct command_desc {
-    auth::permission permission; ///< Nature of the command's alteration.
-    const ::auth::resource& resource; ///< Resource impacted by this command.
-    enum class type {
-        ALTER_WITH_OPTS, ///< Command is ALTER ... WITH ...
-        OTHER
-    } type_ = type::OTHER;
-};
-
 ///
 /// Protected resources cannot be modified even if the performer has permissions to do so.
 ///
-bool is_protected(const service&, command_desc) noexcept;
+bool is_protected(const service&, const resource&) noexcept;

 ///
 /// Create a role with optional authentication information.
--- a/auth/standard_role_manager.cc
+++ b/auth/standard_role_manager.cc
@@ -86,7 +86,7 @@ static future<std::optional<record>> find_record(cql3::query_processor& qp, std:
    return qp.execute_internal(
            query,
            consistency_for_role(role_name),
-            internal_distributed_query_state(),
+            internal_distributed_timeout_config(),
            {sstring(role_name)},
            true).then([](::shared_ptr<cql3::untyped_result_set> results) {
        if (results->empty()) {
@@ -165,7 +165,7 @@ future<> standard_role_manager::create_default_role_if_missing() const {
            return _qp.execute_internal(
                    query,
                    db::consistency_level::QUORUM,
-                    internal_distributed_query_state(),
+                    internal_distributed_timeout_config(),
                    {meta::DEFAULT_SUPERUSER_NAME}).then([](auto&&) {
                log.info("Created default superuser role '{}'.", meta::DEFAULT_SUPERUSER_NAME);
                return make_ready_future<>();
@@ -192,7 +192,7 @@ future<> standard_role_manager::migrate_legacy_metadata() const {
    return _qp.execute_internal(
            query,
            db::consistency_level::QUORUM,
-            internal_distributed_query_state()).then([this](::shared_ptr<cql3::untyped_result_set> results) {
+            internal_distributed_timeout_config()).then([this](::shared_ptr<cql3::untyped_result_set> results) {
        return do_for_each(*results, [this](const cql3::untyped_result_set_row& row) {
            role_config config;
            config.is_superuser = row.get_or<bool>("super", false);
@@ -253,7 +253,7 @@ future<> standard_role_manager::create_or_replace(std::string_view role_name, co
    return _qp.execute_internal(
            query,
            consistency_for_role(role_name),
-            internal_distributed_query_state(),
+            internal_distributed_timeout_config(),
            {sstring(role_name), c.is_superuser, c.can_login},
            true).discard_result();
 }
@@ -296,7 +296,7 @@ standard_role_manager::alter(std::string_view role_name, const role_config_updat
                        build_column_assignments(u),
                        meta::roles_table::role_col_name),
                consistency_for_role(role_name),
-                internal_distributed_query_state(),
+                internal_distributed_timeout_config(),
                {sstring(role_name)}).discard_result();
    });
 }
@@ -315,7 +315,7 @@ future<> standard_role_manager::drop(std::string_view role_name) const {
            return _qp.execute_internal(
                    query,
                    consistency_for_role(role_name),
-                    internal_distributed_query_state(),
+                    internal_distributed_timeout_config(),
                    {sstring(role_name)}).then([this, role_name](::shared_ptr<cql3::untyped_result_set> members) {
                return parallel_for_each(
                        members->begin(),
@@ -354,7 +354,7 @@ future<> standard_role_manager::drop(std::string_view role_name) const {
            return _qp.execute_internal(
                    query,
                    consistency_for_role(role_name),
-                    internal_distributed_query_state(),
+                    internal_distributed_timeout_config(),
                    {sstring(role_name)}).discard_result();
        };

@@ -381,7 +381,7 @@ standard_role_manager::modify_membership(
        return _qp.execute_internal(
                query,
                consistency_for_role(grantee_name),
-                internal_distributed_query_state(),
+                internal_distributed_timeout_config(),
                {role_set{sstring(role_name)}, sstring(grantee_name)}).discard_result();
    };

@@ -392,7 +392,7 @@ standard_role_manager::modify_membership(
                        format("INSERT INTO {} (role, member) VALUES (?, ?)",
                                meta::role_members_table::qualified_name),
                        consistency_for_role(role_name),
-                        internal_distributed_query_state(),
+                        internal_distributed_timeout_config(),
                        {sstring(role_name), sstring(grantee_name)}).discard_result();

            case membership_change::remove:
@@ -400,7 +400,7 @@ standard_role_manager::modify_membership(
                        format("DELETE FROM {} WHERE role = ? AND member = ?",
                                meta::role_members_table::qualified_name),
                        consistency_for_role(role_name),
-                        internal_distributed_query_state(),
+                        internal_distributed_timeout_config(),
                        {sstring(role_name), sstring(grantee_name)}).discard_result();
        }

@@ -503,7 +503,7 @@ future<role_set> standard_role_manager::query_all() const {
    return _qp.execute_internal(
            query,
            db::consistency_level::QUORUM,
-            internal_distributed_query_state()).then([](::shared_ptr<cql3::untyped_result_set> results) {
+            internal_distributed_timeout_config()).then([](::shared_ptr<cql3::untyped_result_set> results) {
        role_set roles;

        std::transform(
--- a/bytes_ostream.hh
+++ b/bytes_ostream.hh
@@ -65,14 +65,7 @@ private:
    size_type _size;
    size_type _initial_chunk_size = default_chunk_size;
 public:
-    class fragment_iterator {
-    public:
-        using iterator_category = std::input_iterator_tag;
-        using value_type = bytes_view;
-        using difference_type = std::ptrdiff_t;
-        using pointer = bytes_view*;
-        using reference = bytes_view&;
-    private:
+    class fragment_iterator : public std::iterator<std::input_iterator_tag, bytes_view> {
        chunk* _current = nullptr;
    public:
        fragment_iterator() = default;
--- a/cache_flat_mutation_reader.hh
+++ b/cache_flat_mutation_reader.hh
@@ -508,7 +508,7 @@ void cache_flat_mutation_reader::copy_from_cache_to_buffer() {
        // This guarantees that rts starts after any emitted clustering_row
        // and not before any emitted range tombstone.
        if (!less(_lower_bound, rts.position())) {
-            rts.set_start(_lower_bound);
+            rts.set_start(*_schema, _lower_bound);
        } else {
            _lower_bound = position_in_partition(rts.position());
            _lower_bound_changed = true;
@@ -644,7 +644,7 @@ void cache_flat_mutation_reader::add_to_buffer(range_tombstone&& rt) {
        return;
    }
    if (!less(_lower_bound, rt.position())) {
-        rt.set_start(_lower_bound);
+        rt.set_start(*_schema, _lower_bound);
    } else {
        _lower_bound = position_in_partition(rt.position());
        _lower_bound_changed = true;
--- a/cartesian_product.hh
+++ b/cartesian_product.hh
@@ -33,13 +33,9 @@ template<typename T>
 struct cartesian_product {
    const std::vector<std::vector<T>>& _vec_of_vecs;
 public:
-    class iterator {
+    class iterator : public std::iterator<std::forward_iterator_tag, std::vector<T>> {
    public:
-        using iterator_category = std::forward_iterator_tag;
        using value_type = std::vector<T>;
-        using difference_type = std::ptrdiff_t;
-        using pointer = std::vector<T>*;
-        using reference = std::vector<T>&;
    private:
        size_t _pos;
        const std::vector<std::vector<T>>* _vec_of_vecs;
--- a/cdc/generation.cc
+++ b/cdc/generation.cc
@@ -23,6 +23,7 @@
 #include <random>
 #include <unordered_set>
 #include <seastar/core/sleep.hh>
+#include <algorithm>

 #include "keys.hh"
 #include "schema_builder.hh"
@@ -174,19 +175,38 @@ bool topology_description::operator==(const topology_description& o) const {
    return _entries == o._entries;
 }

-const std::vector<token_range_description>& topology_description::entries() const {
+const std::vector<token_range_description>& topology_description::entries() const& {
    return _entries;
 }

+std::vector<token_range_description>&& topology_description::entries() && {
+    return std::move(_entries);
+}
+
+static std::vector<stream_id> create_stream_ids(
+        size_t index, dht::token start, dht::token end, size_t shard_count, uint8_t ignore_msb) {
+    std::vector<stream_id> result;
+    result.reserve(shard_count);
+    dht::sharder sharder(shard_count, ignore_msb);
+    for (size_t shard_idx = 0; shard_idx < shard_count; ++shard_idx) {
+        auto t = dht::find_first_token_for_shard(sharder, start, end, shard_idx);
+        // compose the id from token and the "index" of the range end owning vnode
+        // as defined by token sort order. Basically grouping within this
+        // shard set.
+        result.emplace_back(stream_id(t, index));
+    }
+    return result;
+}
+
 class topology_description_generator final {
    const db::config& _cfg;
    const std::unordered_set<dht::token>& _bootstrap_tokens;
-    const locator::token_metadata_ptr _tmptr;
+    const locator::token_metadata& _token_metadata;
    const gms::gossiper& _gossiper;

    // Compute a set of tokens that split the token ring into vnodes
    auto get_tokens() const {
-        auto tokens = _tmptr->sorted_tokens();
+        auto tokens = _token_metadata.sorted_tokens();
        auto it = tokens.insert(
                tokens.end(), _bootstrap_tokens.begin(), _bootstrap_tokens.end());
        std::sort(it, tokens.end());
@@ -201,7 +221,7 @@ class topology_description_generator final {
        if (_bootstrap_tokens.contains(end)) {
            return {smp::count, _cfg.murmur3_partitioner_ignore_msb_bits()};
        } else {
-            auto endpoint = _tmptr->get_endpoint(end);
+            auto endpoint = _token_metadata.get_endpoint(end);
            if (!endpoint) {
                throw std::runtime_error(
                        format("Can't find endpoint for token {}", end));
@@ -217,29 +237,20 @@ class topology_description_generator final {
        desc.token_range_end = end;

        auto [shard_count, ignore_msb] = get_sharding_info(end);
-        desc.streams.reserve(shard_count);
+        desc.streams = create_stream_ids(index, start, end, shard_count, ignore_msb);
        desc.sharding_ignore_msb = ignore_msb;

-        dht::sharder sharder(shard_count, ignore_msb);
-        for (size_t shard_idx = 0; shard_idx < shard_count; ++shard_idx) {
-            auto t = dht::find_first_token_for_shard(sharder, start, end, shard_idx);
-            // compose the id from token and the "index" of the range end owning vnode
-            // as defined by token sort order. Basically grouping within this
-            // shard set.
-            desc.streams.emplace_back(stream_id(t, index));
-        }
-
        return desc;
    }
 public:
    topology_description_generator(
            const db::config& cfg,
            const std::unordered_set<dht::token>& bootstrap_tokens,
-            const locator::token_metadata_ptr tmptr,
+            const locator::token_metadata& token_metadata,
            const gms::gossiper& gossiper)
        : _cfg(cfg)
        , _bootstrap_tokens(bootstrap_tokens)
-        , _tmptr(std::move(tmptr))
+        , _token_metadata(token_metadata)
        , _gossiper(gossiper)
    {}

@@ -294,23 +305,67 @@ future<db_clock::time_point> get_local_streams_timestamp() {
    });
 }

+// non-static for testing
+size_t limit_of_streams_in_topology_description() {
+    // Each stream takes 16B and we don't want to exceed 4MB so we can have
+    // at most 262144 streams but not less than 1 per vnode.
+    return 4 * 1024 * 1024 / 16;
+}
+
+// non-static for testing
+topology_description limit_number_of_streams_if_needed(topology_description&& desc) {
+    int64_t streams_count = 0;
+    for (auto& tr_desc : desc.entries()) {
+        streams_count += tr_desc.streams.size();
+    }
+
+    size_t limit = std::max(limit_of_streams_in_topology_description(), desc.entries().size());
+    if (limit >= size_t(streams_count)) {
+        return std::move(desc);
+    }
+    size_t streams_per_vnode_limit = limit / desc.entries().size();
+    auto entries = std::move(desc).entries();
+    auto start = entries.back().token_range_end;
+    for (size_t idx = 0; idx < entries.size(); ++idx) {
+        auto end = entries[idx].token_range_end;
+        if (entries[idx].streams.size() > streams_per_vnode_limit) {
+            entries[idx].streams =
+                create_stream_ids(idx, start, end, streams_per_vnode_limit, entries[idx].sharding_ignore_msb);
+        }
+        start = end;
+    }
+    return topology_description(std::move(entries));
+}
+
 // Run inside seastar::async context.
 db_clock::time_point make_new_cdc_generation(
        const db::config& cfg,
        const std::unordered_set<dht::token>& bootstrap_tokens,
-        const locator::token_metadata_ptr tmptr,
+        const locator::token_metadata& tm,
        const gms::gossiper& g,
        db::system_distributed_keyspace& sys_dist_ks,
        std::chrono::milliseconds ring_delay,
-        bool add_delay) {
+        bool for_testing) {
    using namespace std::chrono;
-    auto gen = topology_description_generator(cfg, bootstrap_tokens, tmptr, g).generate();
+    auto gen = topology_description_generator(cfg, bootstrap_tokens, tm, g).generate();
+
+    // If the cluster is large we may end up with a generation that contains
+    // large number of streams. This is problematic because we store the
+    // generation in a single row. For a generation with large number of rows
+    // this will lead to a row that can be as big as 32MB. This is much more
+    // than the limit imposed by commitlog_segment_size_in_mb. If the size of
+    // the row that describes a new generation grows above
+    // commitlog_segment_size_in_mb, the write will fail and the new node won't
+    // be able to join. To avoid such problem we make sure that such row is
+    // always smaller than 4MB. We do that by removing some CDC streams from
+    // each vnode if the total number of streams is too large.
+    gen = limit_number_of_streams_if_needed(std::move(gen));

    // Begin the race.
    auto ts = db_clock::now() + (
-            (!add_delay || ring_delay == milliseconds(0)) ? milliseconds(0) : (
+            (for_testing || ring_delay == milliseconds(0)) ? milliseconds(0) : (
                2 * ring_delay + duration_cast<milliseconds>(generation_leeway)));
-    sys_dist_ks.insert_cdc_topology_description(ts, std::move(gen), { tmptr->count_normal_token_owners() }).get();
+    sys_dist_ks.insert_cdc_topology_description(ts, std::move(gen), { tm.count_normal_token_owners() }).get();

    return ts;
 }
--- a/cdc/generation.hh
+++ b/cdc/generation.hh
@@ -40,7 +40,6 @@
 #include "database_fwd.hh"
 #include "db_clock.hh"
 #include "dht/token.hh"
-#include "locator/token_metadata.hh"

 namespace seastar {
    class abort_source;
@@ -56,6 +55,10 @@ namespace gms {
    class gossiper;
 } // namespace gms

+namespace locator {
+    class token_metadata;
+} // namespace locator
+
 namespace cdc {

 class stream_id final {
@@ -65,6 +68,7 @@ public:

    stream_id() = default;
    stream_id(bytes);
+    stream_id(dht::token, size_t);

    bool is_set() const;
    bool operator==(const stream_id&) const;
@@ -78,9 +82,6 @@ public:

    partition_key to_partition_key(const schema& log_schema) const;
    static int64_t token_from_bytes(bytes_view);
-private:
-    friend class topology_description_generator;
-    stream_id(dht::token, size_t);
 };

 /* Describes a mapping of tokens to CDC streams in a token range.
@@ -113,7 +114,8 @@ public:
    topology_description(std::vector<token_range_description> entries);
    bool operator==(const topology_description&) const;

-    const std::vector<token_range_description>& entries() const;
+    const std::vector<token_range_description>& entries() const&;
+    std::vector<token_range_description>&& entries() &&;
 };

 /**
@@ -165,11 +167,11 @@ future<db_clock::time_point> get_local_streams_timestamp();
 db_clock::time_point make_new_cdc_generation(
        const db::config& cfg,
        const std::unordered_set<dht::token>& bootstrap_tokens,
-        const locator::token_metadata_ptr tmptr,
+        const locator::token_metadata& tm,
        const gms::gossiper& g,
        db::system_distributed_keyspace& sys_dist_ks,
        std::chrono::milliseconds ring_delay,
-        bool add_delay);
+        bool for_testing);

 /* Retrieves CDC streams generation timestamp from the given endpoint's application state (broadcasted through gossip).
 * We might be during a rolling upgrade, so the timestamp might not be there (if the other node didn't upgrade yet),
--- a/cdc/log.cc
+++ b/cdc/log.cc
@@ -600,14 +600,7 @@ db_context db_context::builder::build() {

 // iterators for collection merge
 template<typename T>
-class collection_iterator {
-public:
-    using iterator_category = std::input_iterator_tag;
-    using value_type = const T;
-    using difference_type = std::ptrdiff_t;
-    using pointer = const T*;
-    using reference = const T&;
-private:
+class collection_iterator : public std::iterator<std::input_iterator_tag, const T> {
    bytes_view _v, _next;
    size_t _rem = 0;
    T _current;
@@ -987,9 +980,9 @@ static bytes get_bytes(const atomic_cell_view& acv) {
    return acv.value().linearize();
 }

-static bytes_view get_bytes_view(const atomic_cell_view& acv, std::vector<bytes>& buf) {
+static bytes_view get_bytes_view(const atomic_cell_view& acv, std::forward_list<bytes>& buf) {
    return acv.value().is_fragmented()
-        ? bytes_view{buf.emplace_back(acv.value().linearize())}
+        ? bytes_view{buf.emplace_front(acv.value().linearize())}
        : acv.value().first_fragment();
 }

@@ -1144,9 +1137,9 @@ struct process_row_visitor {

                struct udt_visitor : public collection_visitor {
                    std::vector<bytes_opt> _added_cells;
-                    std::vector<bytes>& _buf;
+                    std::forward_list<bytes>& _buf;

-                    udt_visitor(ttl_opt& ttl_column, size_t num_keys, std::vector<bytes>& buf)
+                    udt_visitor(ttl_opt& ttl_column, size_t num_keys, std::forward_list<bytes>& buf)
                        : collection_visitor(ttl_column), _added_cells(num_keys), _buf(buf) {}

                    void live_collection_cell(bytes_view key, const atomic_cell_view& cell) {
@@ -1155,7 +1148,7 @@ struct process_row_visitor {
                    }
                };

-                std::vector<bytes> buf;
+                std::forward_list<bytes> buf;
                udt_visitor v(_ttl_column, type.size(), buf);

                visit_collection(v);
@@ -1174,9 +1167,9 @@ struct process_row_visitor {

                struct map_or_list_visitor : public collection_visitor {
                    std::vector<std::pair<bytes_view, bytes_view>> _added_cells;
-                    std::vector<bytes>& _buf;
+                    std::forward_list<bytes>& _buf;

-                    map_or_list_visitor(ttl_opt& ttl_column, std::vector<bytes>& buf)
+                    map_or_list_visitor(ttl_opt& ttl_column, std::forward_list<bytes>& buf)
                        : collection_visitor(ttl_column), _buf(buf) {}

                    void live_collection_cell(bytes_view key, const atomic_cell_view& cell) {
@@ -1185,7 +1178,7 @@ struct process_row_visitor {
                    }
                };

-                std::vector<bytes> buf;
+                std::forward_list<bytes> buf;
                map_or_list_visitor v(_ttl_column, buf);

                visit_collection(v);
@@ -1297,6 +1290,13 @@ struct process_change_visitor {
                _clustering_row_states, _generate_delta_values);
        visit_row_cells(v);

+        if (_enable_updating_state) {
+            // #7716: if there are no regular columns, our visitor would not have visited any cells,
+            // hence it would not have created a row_state for this row. In effect, postimage wouldn't be produced.
+            // Ensure that the row state exists.
+            _clustering_row_states.try_emplace(ckey);
+        }
+
        _builder.set_operation(log_ck, v._cdc_op);
        _builder.set_ttl(log_ck, v._ttl_column);
    }
--- a/cdc/metadata.cc
+++ b/cdc/metadata.cc
@@ -51,7 +51,8 @@ static cdc::stream_id get_stream(
    return entry.streams[shard_id];
 }

-static cdc::stream_id get_stream(
+// non-static for testing
+cdc::stream_id get_stream(
        const std::vector<cdc::token_range_description>& entries,
        dht::token tok) {
    if (entries.empty()) {
--- a/clustering_interval_set.hh
+++ b/clustering_interval_set.hh
@@ -72,14 +72,7 @@ public:
        }
        return result;
    }
-    class position_range_iterator {
-    public:
-        using iterator_category = std::input_iterator_tag;
-        using value_type = const position_range;
-        using difference_type = std::ptrdiff_t;
-        using pointer = const position_range*;
-        using reference = const position_range&;
-    private:
+    class position_range_iterator : public std::iterator<std::input_iterator_tag, const position_range> {
        set_type::iterator _i;
    public:
        position_range_iterator(set_type::iterator i) : _i(i) {}
--- a/column_computation.hh
+++ b/column_computation.hh
@@ -54,36 +54,6 @@ public:
    virtual bytes_opt compute_value(const schema& schema, const partition_key& key, const clustering_row& row) const = 0;
 };

-/*
- * Computes token value of partition key and returns it as bytes.
- *
- * Should NOT be used (use token_column_computation), because ordering
- * of bytes is different than ordering of tokens (signed vs unsigned comparison).
- *
- * The type name stored for computations of this class is "token" - this was
- * the original implementation. (now depracated for new tables)
- */
-class legacy_token_column_computation : public column_computation {
-public:
-    virtual column_computation_ptr clone() const override {
-        return std::make_unique<legacy_token_column_computation>(*this);
-    }
-    virtual bytes serialize() const override;
-    virtual bytes_opt compute_value(const schema& schema, const partition_key& key, const clustering_row& row) const override;
-};
-
-
-/*
- * Computes token value of partition key and returns it as long_type.
- * The return type means that it can be trivially sorted (for example
- * if computed column using this computation is a clustering key),
- * preserving the correct order of tokens (using signed comparisons).
- *
- * Please use this class instead of legacy_token_column_computation.
- * 
- * The type name stored for computations of this class is "token_v2".
- * (the name "token" refers to the depracated legacy_token_column_computation)
- */
 class token_column_computation : public column_computation {
 public:
    virtual column_computation_ptr clone() const override {
--- a/compound.hh
+++ b/compound.hh
@@ -130,13 +130,7 @@ public:
    bytes decompose_value(const value_type& values) const {
        return serialize_value(values);
    }
-    class iterator {
-    public:
-        using iterator_category = std::input_iterator_tag;
-        using value_type = const bytes_view;
-        using difference_type = std::ptrdiff_t;
-        using pointer = const bytes_view*;
-        using reference = const bytes_view&;
+    class iterator : public std::iterator<std::input_iterator_tag, const bytes_view> {
    private:
        bytes_view _v;
        bytes_view _current;
--- a/compound_compat.hh
+++ b/compound_compat.hh
@@ -61,14 +61,7 @@ public:
        , _packed(packed)
    { }

-    class iterator {
-    public:
-        using iterator_category = std::input_iterator_tag;
-        using value_type = bytes::value_type;
-        using difference_type = std::ptrdiff_t;
-        using pointer = bytes::value_type*;
-        using reference = bytes::value_type&;
-    private:
+    class iterator : public std::iterator<std::input_iterator_tag, bytes::value_type> {
        bool _singular;
        // Offset within virtual output space of a component.
        //
@@ -346,14 +339,7 @@ public:
        return eoc_byte == 0 ? eoc::none : (eoc_byte < 0 ? eoc::start : eoc::end);
    }

-    class iterator {
-    public:
-        using iterator_category = std::input_iterator_tag;
-        using value_type = const component_view;
-        using difference_type = std::ptrdiff_t;
-        using pointer = const component_view*;
-        using reference = const component_view&;
-    private:
+    class iterator : public std::iterator<std::input_iterator_tag, const component_view> {
        bytes_view _v;
        component_view _current;
        bool _strict_mode = true;
--- a/conf/scylla.yaml
+++ b/conf/scylla.yaml
@@ -230,9 +230,6 @@ batch_size_fail_threshold_in_kb: 50
 # - PasswordAuthenticator relies on username/password pairs to authenticate
 #   users. It keeps usernames and hashed passwords in system_auth.credentials table.
 #   Please increase system_auth keyspace replication factor if you use this authenticator.
-# - com.scylladb.auth.TransitionalAuthenticator requires username/password pair
-#   to authenticate in the same manner as PasswordAuthenticator, but improper credentials
-#   result in being logged in as an anonymous user. Use for upgrading clusters' auth.
 # authenticator: AllowAllAuthenticator

 # Authorization backend, implementing IAuthorizer; used to limit access/provide permissions
@@ -242,9 +239,6 @@ batch_size_fail_threshold_in_kb: 50
 # - AllowAllAuthorizer allows any action to any user - set it to disable authorization.
 # - CassandraAuthorizer stores permissions in system_auth.permissions table. Please
 #   increase system_auth keyspace replication factor if you use this authorizer.
-# - com.scylladb.auth.TransitionalAuthorizer wraps around the CassandraAuthorizer, using it for
-#   authorizing permission management. Otherwise, it allows all. Use for upgrading
-#   clusters' auth.
 # authorizer: AllowAllAuthorizer

 # initial_token allows you to specify tokens manually.  While you can use # it with
--- a/configure.py
+++ b/configure.py
@@ -257,24 +257,25 @@ modes = {
        'stack-usage-threshold': 1024*40,
    },
    'release': {
-        'cxxflags': '-O3 -ffunction-sections -fdata-sections ',
-        'cxx_ld_flags': '-Wl,--gc-sections',
+        'cxxflags': '',
+        'cxx_ld_flags': '-O3 -ffunction-sections -fdata-sections -Wl,--gc-sections',
        'stack-usage-threshold': 1024*13,
    },
    'dev': {
-        'cxxflags': '-O1 -DSEASTAR_ENABLE_ALLOC_FAILURE_INJECTION -DSCYLLA_ENABLE_ERROR_INJECTION',
-        'cxx_ld_flags': '',
+        'cxxflags': '-DSEASTAR_ENABLE_ALLOC_FAILURE_INJECTION -DSCYLLA_ENABLE_ERROR_INJECTION',
+        'cxx_ld_flags': '-O1',
        'stack-usage-threshold': 1024*21,
    },
    'sanitize': {
-        'cxxflags': '-Os -DDEBUG -DSANITIZE -DDEBUG_LSA_SANITIZER -DSCYLLA_ENABLE_ERROR_INJECTION',
-        'cxx_ld_flags': '',
+        'cxxflags': '-DDEBUG -DSANITIZE -DDEBUG_LSA_SANITIZER -DSCYLLA_ENABLE_ERROR_INJECTION',
+        'cxx_ld_flags': '-Os',
        'stack-usage-threshold': 1024*50,
    }
 }

 scylla_tests = set([
    'test/boost/UUID_test',
+    'test/boost/cdc_generation_test',
    'test/boost/aggregate_fcts_test',
    'test/boost/allocation_strategy_test',
    'test/boost/alternator_base64_test',
@@ -314,7 +315,6 @@ scylla_tests = set([
    'test/boost/crc_test',
    'test/boost/data_listeners_test',
    'test/boost/database_test',
-    'test/boost/double_decker_test',
    'test/boost/duration_test',
    'test/boost/dynamic_bitset_test',
    'test/boost/enum_option_test',
@@ -330,7 +330,6 @@ scylla_tests = set([
    'test/boost/gossiping_property_file_snitch_test',
    'test/boost/hash_test',
    'test/boost/idl_test',
-    'test/boost/imr_test',
    'test/boost/input_stream_test',
    'test/boost/json_cql_query_test',
    'test/boost/json_test',
@@ -385,7 +384,6 @@ scylla_tests = set([
    'test/boost/sstable_resharding_test',
    'test/boost/sstable_directory_test',
    'test/boost/sstable_test',
-    'test/boost/sstable_move_test',
    'test/boost/storage_proxy_test',
    'test/boost/top_k_test',
    'test/boost/transport_test',
@@ -420,7 +418,7 @@ scylla_tests = set([
    'test/perf/perf_fast_forward',
    'test/perf/perf_hash',
    'test/perf/perf_mutation',
-    'test/perf/perf_collection',
+    'test/perf/perf_bptree',
    'test/perf/perf_row_cache_update',
    'test/perf/perf_simple_query',
    'test/perf/perf_sstable',
@@ -479,9 +477,9 @@ arg_parser.add_argument('--ldflags', action='store', dest='user_ldflags', defaul
                        help='Extra flags for the linker')
 arg_parser.add_argument('--target', action='store', dest='target', default=default_target_arch(),
                        help='Target architecture (-march)')
-arg_parser.add_argument('--compiler', action='store', dest='cxx', default='clang++',
+arg_parser.add_argument('--compiler', action='store', dest='cxx', default='g++',
                        help='C++ compiler path')
-arg_parser.add_argument('--c-compiler', action='store', dest='cc', default='clang',
+arg_parser.add_argument('--c-compiler', action='store', dest='cc', default='gcc',
                        help='C compiler path')
 add_tristate(arg_parser, name='dpdk', dest='dpdk',
                        help='Use dpdk (from seastar dpdk sources) (default=True for release builds)')
@@ -521,6 +519,17 @@ arg_parser.add_argument('--test-repeat', dest='test_repeat', action='store', typ
 arg_parser.add_argument('--test-timeout', dest='test_timeout', action='store', type=str, default='7200')
 args = arg_parser.parse_args()

+coroutines_test_src = '''
+#define GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__)
+#if GCC_VERSION < 100201
+    #error "Coroutines support requires at leat gcc 10.2.1"
+#endif
+'''
+compiler_supports_coroutines = try_compile(compiler=args.cxx, source=coroutines_test_src)
+
+if args.build_raft and not compiler_supports_coroutines:
+    raise Exception("--build-raft is requested, while the used compiler does not support coroutines")
+
 if not args.build_raft:
    all_artifacts.difference_update(raft_tests)
    tests.difference_update(raft_tests)
@@ -718,7 +727,6 @@ scylla_core = (['database.cc',
                'db/data_listeners.cc',
                'db/hints/manager.cc',
                'db/hints/resource_manager.cc',
-                'db/hints/host_filter.cc',
                'db/config.cc',
                'db/extensions.cc',
                'db/heat_load_balance.cc',
@@ -847,6 +855,7 @@ scylla_core = (['database.cc',
                'utils/error_injection.cc',
                'mutation_writer/timestamp_based_splitting_writer.cc',
                'mutation_writer/shard_based_splitting_writer.cc',
+                'mutation_writer/feed_writers.cc',
                'lua.cc',
                ] + [Antlr3Grammar('cql3/Cql.g')] + [Thrift('interface/cassandra.thrift', 'Cassandra')]
               )
@@ -1030,7 +1039,7 @@ tests_not_using_seastar_test_framework = set([
    'test/perf/perf_cql_parser',
    'test/perf/perf_hash',
    'test/perf/perf_mutation',
-    'test/perf/perf_collection',
+    'test/perf/perf_bptree',
    'test/perf/perf_row_cache_update',
    'test/unit/lsa_async_eviction_test',
    'test/unit/lsa_sync_eviction_test',
@@ -1145,8 +1154,6 @@ warnings = [
    '-Wno-implicit-int-float-conversion',
    '-Wno-delete-abstract-non-virtual-dtor',
    '-Wno-uninitialized-const-reference',
-    # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=77728
-    '-Wno-psabi',
 ]

 warnings = [w
@@ -1162,11 +1169,11 @@ optimization_flags = [
 optimization_flags = [o
                      for o in optimization_flags
                      if flag_supported(flag=o, compiler=args.cxx)]
-modes['release']['cxxflags'] += ' ' + ' '.join(optimization_flags)
+modes['release']['cxx_ld_flags'] += ' ' + ' '.join(optimization_flags)

 if flag_supported(flag='-Wstack-usage=4096', compiler=args.cxx):
    for mode in modes:
-        modes[mode]['cxxflags'] += f' -Wstack-usage={modes[mode]["stack-usage-threshold"]} -Wno-error=stack-usage='
+        modes[mode]['cxx_ld_flags'] += f' -Wstack-usage={modes[mode]["stack-usage-threshold"]} -Wno-error=stack-usage='

 linker_flags = linker_flags(compiler=args.cxx)

@@ -1281,8 +1288,6 @@ file = open(f'{outdir}/SCYLLA-VERSION-FILE', 'r')
 scylla_version = file.read().strip()
 file = open(f'{outdir}/SCYLLA-RELEASE-FILE', 'r')
 scylla_release = file.read().strip()
-file = open(f'{outdir}/SCYLLA-PRODUCT-FILE', 'r')
-scylla_product = file.read().strip()

 extra_cxxflags["release.cc"] = "-DSCYLLA_VERSION=\"\\\"" + scylla_version + "\\\"\" -DSCYLLA_RELEASE=\"\\\"" + scylla_release + "\\\"\""

@@ -1324,6 +1329,9 @@ args.user_cflags += f" -ffile-prefix-map={curdir}=."

 seastar_cflags = args.user_cflags

+if build_raft:
+    seastar_cflags += ' -fcoroutines'
+
 if args.target != '':
    seastar_cflags += ' -march=' + args.target
 seastar_ldflags = args.user_ldflags
@@ -1332,13 +1340,6 @@ libdeflate_cflags = seastar_cflags

 MODE_TO_CMAKE_BUILD_TYPE = {'release' : 'RelWithDebInfo', 'debug' : 'Debug', 'dev' : 'Dev', 'sanitize' : 'Sanitize' }

-# cmake likes to separate things with semicolons
-def semicolon_separated(*flags):
-    # original flags may be space separated, so convert to string still
-    # using spaces
-    f = ' '.join(flags)
-    return re.sub(' +', ';', f)
-
 def configure_seastar(build_dir, mode):
    seastar_build_dir = os.path.join(build_dir, mode, 'seastar')

@@ -1347,8 +1348,8 @@ def configure_seastar(build_dir, mode):
        '-DCMAKE_C_COMPILER={}'.format(args.cc),
        '-DCMAKE_CXX_COMPILER={}'.format(args.cxx),
        '-DCMAKE_EXPORT_NO_PACKAGE_REGISTRY=ON',
-        '-DSeastar_CXX_FLAGS={}'.format((seastar_cflags).replace(' ', ';')),
-        '-DSeastar_LD_FLAGS={}'.format(semicolon_separated(seastar_ldflags, modes[mode]['cxx_ld_flags'])),
+        '-DSeastar_CXX_FLAGS={}'.format((seastar_cflags + ' ' + modes[mode]['cxx_ld_flags']).replace(' ', ';')),
+        '-DSeastar_LD_FLAGS={}'.format(seastar_ldflags),
        '-DSeastar_CXX_DIALECT=gnu++20',
        '-DSeastar_API_LEVEL=6',
        '-DSeastar_UNUSED_RESULT_ERROR=ON',
@@ -1459,7 +1460,7 @@ if not args.staticboost:
    args.user_cflags += ' -DBOOST_TEST_DYN_LINK'

 if build_raft:
-    args.user_cflags += ' -DENABLE_SCYLLA_RAFT'
+    args.user_cflags += ' -DENABLE_SCYLLA_RAFT -fcoroutines'

 # thrift version detection, see #4538
 proc_res = subprocess.run(["thrift", "-version"], stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
@@ -1798,18 +1799,24 @@ with open(buildfile_tmp, 'w') as f:
        f.write(textwrap.dedent('''\
            build $builddir/{mode}/iotune: copy $builddir/{mode}/seastar/apps/iotune/iotune
            ''').format(**locals()))
-        f.write('build $builddir/{mode}/dist/tar/{scylla_product}-package.tar.gz: package $builddir/{mode}/scylla $builddir/{mode}/iotune $builddir/SCYLLA-RELEASE-FILE $builddir/SCYLLA-VERSION-FILE $builddir/debian/debian | always\n'.format(**locals()))
+        f.write('build $builddir/{mode}/dist/tar/scylla-package.tar.gz: package $builddir/{mode}/scylla $builddir/{mode}/iotune $builddir/SCYLLA-RELEASE-FILE $builddir/SCYLLA-VERSION-FILE $builddir/debian/debian | always\n'.format(**locals()))
+        f.write('  pool = submodule_pool\n')
        f.write('  mode = {mode}\n'.format(**locals()))
-        f.write(f'build $builddir/dist/{mode}/redhat: rpmbuild $builddir/{mode}/dist/tar/{scylla_product}-package.tar.gz\n')
+        f.write(f'build $builddir/{mode}/scylla-package.tar.gz: copy $builddir/{mode}/dist/tar/scylla-package.tar.gz\n')
+        f.write(f'build $builddir/dist/{mode}/redhat: rpmbuild $builddir/{mode}/scylla-package.tar.gz\n')
+        f.write(f'  pool = submodule_pool\n')
        f.write(f'  mode = {mode}\n')
-        f.write(f'build $builddir/dist/{mode}/debian: debbuild $builddir/{mode}/dist/tar/{scylla_product}-package.tar.gz\n')
+        f.write(f'build $builddir/dist/{mode}/debian: debbuild $builddir/{mode}/scylla-package.tar.gz\n')
+        f.write(f'  pool = submodule_pool\n')
        f.write(f'  mode = {mode}\n')
        f.write(f'build dist-server-{mode}: phony $builddir/dist/{mode}/redhat $builddir/dist/{mode}/debian\n')
-        f.write(f'build dist-jmx-{mode}: phony $builddir/{mode}/dist/tar/{scylla_product}-jmx-package.tar.gz dist-jmx-rpm dist-jmx-deb\n')
-        f.write(f'build dist-tools-{mode}: phony $builddir/{mode}/dist/tar/{scylla_product}-tools-package.tar.gz dist-tools-rpm dist-tools-deb\n')
+        f.write(f'build dist-jmx-{mode}: phony $builddir/{mode}/dist/tar/scylla-jmx-package.tar.gz dist-jmx-rpm dist-jmx-deb\n')
+        f.write(f'build dist-tools-{mode}: phony $builddir/{mode}/dist/tar/scylla-tools-package.tar.gz dist-tools-rpm dist-tools-deb\n')
        f.write(f'build dist-python3-{mode}: phony dist-python3-tar dist-python3-rpm dist-python3-deb compat-python3-rpm compat-python3-deb\n')
-        f.write(f'build dist-unified-{mode}: phony $builddir/{mode}/dist/tar/{scylla_product}-unified-package-{scylla_version}.{scylla_release}.tar.gz\n')
-        f.write(f'build $builddir/{mode}/dist/tar/{scylla_product}-unified-package-{scylla_version}.{scylla_release}.tar.gz: unified $builddir/{mode}/dist/tar/{scylla_product}-package.tar.gz $builddir/{mode}/dist/tar/{scylla_product}-python3-package.tar.gz $builddir/{mode}/dist/tar/{scylla_product}-jmx-package.tar.gz $builddir/{mode}/dist/tar/{scylla_product}-tools-package.tar.gz | always\n')
+        f.write(f'build dist-unified-{mode}: phony $builddir/{mode}/dist/tar/scylla-unified-package-{scylla_version}.{scylla_release}.tar.gz\n')
+        f.write(f'build $builddir/{mode}/scylla-unified-package-{scylla_version}.{scylla_release}.tar.gz: copy $builddir/{mode}/dist/tar/scylla-unified-package.tar.gz\n')
+        f.write(f'build $builddir/{mode}/dist/tar/scylla-unified-package-{scylla_version}.{scylla_release}.tar.gz: unified $builddir/{mode}/dist/tar/scylla-package.tar.gz $builddir/{mode}/dist/tar/scylla-python3-package.tar.gz $builddir/{mode}/dist/tar/scylla-jmx-package.tar.gz $builddir/{mode}/dist/tar/scylla-tools-package.tar.gz | always\n')
+        f.write(f'  pool = submodule_pool\n')
        f.write(f'  mode = {mode}\n')
        f.write('rule libdeflate.{mode}\n'.format(**locals()))
        f.write('  command = make -C libdeflate BUILD_DIR=../$builddir/{mode}/libdeflate/ CFLAGS="{libdeflate_cflags}" CC={args.cc} ../$builddir/{mode}/libdeflate//libdeflate.a\n'.format(**locals()))
@@ -1836,12 +1843,12 @@ with open(buildfile_tmp, 'w') as f:
    )

    f.write(textwrap.dedent(f'''\
-        build dist-unified-tar: phony {' '.join([f'$builddir/{mode}/dist/tar/{scylla_product}-unified-package-{scylla_version}.{scylla_release}.tar.gz' for mode in build_modes])}
+        build dist-unified-tar: phony {' '.join(['$builddir/{mode}/scylla-unified-package-$scylla_version.$scylla_release.tar.gz'.format(mode=mode) for mode in build_modes])}
        build dist-unified: phony dist-unified-tar

        build dist-server-deb: phony {' '.join(['$builddir/dist/{mode}/debian'.format(mode=mode) for mode in build_modes])}
        build dist-server-rpm: phony {' '.join(['$builddir/dist/{mode}/redhat'.format(mode=mode) for mode in build_modes])}
-        build dist-server-tar: phony {' '.join(['$builddir/{mode}/dist/tar/{scylla_product}-package.tar.gz'.format(mode=mode, scylla_product=scylla_product) for mode in build_modes])}
+        build dist-server-tar: phony {' '.join(['$builddir/{mode}/scylla-package.tar.gz'.format(mode=mode) for mode in build_modes])}
        build dist-server: phony dist-server-tar dist-server-rpm dist-server-deb

        rule build-submodule-reloc
@@ -1851,26 +1858,26 @@ with open(buildfile_tmp, 'w') as f:
        rule build-submodule-deb
          command = cd $dir && ./reloc/build_deb.sh --reloc-pkg $artifact

-        build tools/jmx/build/{scylla_product}-jmx-package.tar.gz: build-submodule-reloc
+        build tools/jmx/build/scylla-jmx-package.tar.gz: build-submodule-reloc
          reloc_dir = tools/jmx
-        build dist-jmx-rpm: build-submodule-rpm tools/jmx/build/{scylla_product}-jmx-package.tar.gz
+        build dist-jmx-rpm: build-submodule-rpm tools/jmx/build/scylla-jmx-package.tar.gz
          dir = tools/jmx
-          artifact = $builddir/{scylla_product}-jmx-package.tar.gz
-        build dist-jmx-deb: build-submodule-deb tools/jmx/build/{scylla_product}-jmx-package.tar.gz
+          artifact = $builddir/scylla-jmx-package.tar.gz
+        build dist-jmx-deb: build-submodule-deb tools/jmx/build/scylla-jmx-package.tar.gz
          dir = tools/jmx
-          artifact = $builddir/{scylla_product}-jmx-package.tar.gz
-        build dist-jmx-tar: phony {' '.join(['$builddir/{mode}/dist/tar/{scylla_product}-jmx-package.tar.gz'.format(mode=mode, scylla_product=scylla_product) for mode in build_modes])}
+          artifact = $builddir/scylla-jmx-package.tar.gz
+        build dist-jmx-tar: phony {' '.join(['$builddir/{mode}/dist/tar/scylla-jmx-package.tar.gz'.format(mode=mode) for mode in build_modes])}
        build dist-jmx: phony dist-jmx-tar dist-jmx-rpm dist-jmx-deb

-        build tools/java/build/{scylla_product}-tools-package.tar.gz: build-submodule-reloc
+        build tools/java/build/scylla-tools-package.tar.gz: build-submodule-reloc
          reloc_dir = tools/java
-        build dist-tools-rpm: build-submodule-rpm tools/java/build/{scylla_product}-tools-package.tar.gz
+        build dist-tools-rpm: build-submodule-rpm tools/java/build/scylla-tools-package.tar.gz
          dir = tools/java
-          artifact = $builddir/{scylla_product}-tools-package.tar.gz
-        build dist-tools-deb: build-submodule-deb tools/java/build/{scylla_product}-tools-package.tar.gz
+          artifact = $builddir/scylla-tools-package.tar.gz
+        build dist-tools-deb: build-submodule-deb tools/java/build/scylla-tools-package.tar.gz
          dir = tools/java
-          artifact = $builddir/{scylla_product}-tools-package.tar.gz
-        build dist-tools-tar: phony {' '.join(['$builddir/{mode}/dist/tar/{scylla_product}-tools-package.tar.gz'.format(mode=mode, scylla_product=scylla_product) for mode in build_modes])}
+          artifact = $builddir/scylla-tools-package.tar.gz
+        build dist-tools-tar: phony {' '.join(['$builddir/{mode}/dist/tar/scylla-tools-package.tar.gz'.format(mode=mode) for mode in build_modes])}
        build dist-tools: phony dist-tools-tar dist-tools-rpm dist-tools-deb

        rule compat-python3-reloc
@@ -1879,27 +1886,27 @@ with open(buildfile_tmp, 'w') as f:
          command = cd $dir && ./reloc/build_rpm.sh --reloc-pkg $artifact --builddir ../../build/redhat
        rule compat-python3-deb
          command = cd $dir && ./reloc/build_deb.sh --reloc-pkg $artifact --builddir ../../build/debian
-        build $builddir/release/{scylla_product}-python3-package.tar.gz: compat-python3-reloc tools/python3/build/{scylla_product}-python3-package.tar.gz
+        build $builddir/release/scylla-python3-package.tar.gz: compat-python3-reloc tools/python3/build/scylla-python3-package.tar.gz
          dir = tools/python3
-          artifact = $builddir/{scylla_product}-python3-package.tar.gz
-        build compat-python3-rpm: compat-python3-rpm tools/python3/build/{scylla_product}-python3-package.tar.gz
+          artifact = $builddir/scylla-python3-package.tar.gz
+        build compat-python3-rpm: compat-python3-rpm tools/python3/build/scylla-python3-package.tar.gz
          dir = tools/python3
-          artifact = $builddir/{scylla_product}-python3-package.tar.gz
-        build compat-python3-deb: compat-python3-deb tools/python3/build/{scylla_product}-python3-package.tar.gz
+          artifact = $builddir/scylla-python3-package.tar.gz
+        build compat-python3-deb: compat-python3-deb tools/python3/build/scylla-python3-package.tar.gz
          dir = tools/python3
-          artifact = $builddir/{scylla_product}-python3-package.tar.gz
+          artifact = $builddir/scylla-python3-package.tar.gz

-        build tools/python3/build/{scylla_product}-python3-package.tar.gz: build-submodule-reloc
+        build tools/python3/build/scylla-python3-package.tar.gz: build-submodule-reloc
          reloc_dir = tools/python3
          args = --packages "{python3_dependencies}"
-        build dist-python3-rpm: build-submodule-rpm tools/python3/build/{scylla_product}-python3-package.tar.gz
+        build dist-python3-rpm: build-submodule-rpm tools/python3/build/scylla-python3-package.tar.gz
          dir = tools/python3
-          artifact = $builddir/{scylla_product}-python3-package.tar.gz
-        build dist-python3-deb: build-submodule-deb tools/python3/build/{scylla_product}-python3-package.tar.gz
+          artifact = $builddir/scylla-python3-package.tar.gz
+        build dist-python3-deb: build-submodule-deb tools/python3/build/scylla-python3-package.tar.gz
          dir = tools/python3
-          artifact = $builddir/{scylla_product}-python3-package.tar.gz
-        build dist-python3-tar: phony {' '.join(['$builddir/{mode}/dist/tar/{scylla_product}-python3-package.tar.gz'.format(mode=mode, scylla_product=scylla_product) for mode in build_modes])}
-        build dist-python3: phony dist-python3-tar dist-python3-rpm dist-python3-deb $builddir/release/{scylla_product}-python3-package.tar.gz compat-python3-rpm compat-python3-deb
+          artifact = $builddir/scylla-python3-package.tar.gz
+        build dist-python3-tar: phony {' '.join(['$builddir/{mode}/dist/tar/scylla-python3-package.tar.gz'.format(mode=mode) for mode in build_modes])}
+        build dist-python3: phony dist-python3-tar dist-python3-rpm dist-python3-deb $builddir/release/scylla-python3-package.tar.gz compat-python3-rpm compat-python3-deb
        build dist-deb: phony dist-server-deb dist-python3-deb dist-jmx-deb dist-tools-deb
        build dist-rpm: phony dist-server-rpm dist-python3-rpm dist-jmx-rpm dist-tools-rpm
        build dist-tar: phony dist-unified-tar dist-server-tar dist-python3-tar dist-jmx-tar dist-tools-tar
@@ -1914,9 +1921,9 @@ with open(buildfile_tmp, 'w') as f:
        '''))
    for mode in build_modes:
        f.write(textwrap.dedent(f'''\
-        build $builddir/{mode}/dist/tar/{scylla_product}-python3-package.tar.gz: copy tools/python3/build/{scylla_product}-python3-package.tar.gz
-        build $builddir/{mode}/dist/tar/{scylla_product}-tools-package.tar.gz: copy tools/java/build/{scylla_product}-tools-package.tar.gz
-        build $builddir/{mode}/dist/tar/{scylla_product}-jmx-package.tar.gz: copy tools/jmx/build/{scylla_product}-jmx-package.tar.gz
+        build $builddir/{mode}/dist/tar/scylla-python3-package.tar.gz: copy tools/python3/build/scylla-python3-package.tar.gz
+        build $builddir/{mode}/dist/tar/scylla-tools-package.tar.gz: copy tools/java/build/scylla-tools-package.tar.gz
+        build $builddir/{mode}/dist/tar/scylla-jmx-package.tar.gz: copy tools/jmx/build/scylla-jmx-package.tar.gz

        build dist-{mode}: phony dist-server-{mode} dist-python3-{mode} dist-tools-{mode} dist-jmx-{mode} dist-unified-{mode}
        build dist-check-{mode}: dist-check
@@ -1942,13 +1949,6 @@ with open(buildfile_tmp, 'w') as f:
        build mode_list: mode_list
        default {modes_list}
        ''').format(modes_list=' '.join(default_modes), **globals()))
-    unit_test_list = set(test for test in build_artifacts if test in set(tests))
-    f.write(textwrap.dedent('''\
-        rule unit_test_list
-            command = /usr/bin/env echo -e '{unit_test_list}'
-            description = List configured unit tests
-        build unit_test_list: unit_test_list
-        ''').format(unit_test_list="\\n".join(unit_test_list)))
    f.write(textwrap.dedent('''\
        build always: phony
        rule scylla_version_gen
@@ -1957,6 +1957,6 @@ with open(buildfile_tmp, 'w') as f:
        rule debian_files_gen
            command = ./dist/debian/debian_files_gen.py
        build $builddir/debian/debian: debian_files_gen | always
-        ''').format(**globals()))
+        ''').format(modes_list=' '.join(build_modes), **globals()))

 os.rename(buildfile_tmp, buildfile)
--- a/connection_notifier.cc
+++ b/connection_notifier.cc
@@ -20,47 +20,44 @@
 */

 #include "connection_notifier.hh"
+#include "db/query_context.hh"
 #include "cql3/constants.hh"
 #include "database.hh"
+#include "service/storage_proxy.hh"

 #include <stdexcept>

-sstring to_string(client_type ct) {
+namespace db::system_keyspace {
+extern const char *const CLIENTS;
+}
+
+static sstring to_string(client_type ct) {
    switch (ct) {
        case client_type::cql: return "cql";
        case client_type::thrift: return "thrift";
        case client_type::alternator: return "alternator";
+        default: throw std::runtime_error("Invalid client_type");
    }
-    throw std::runtime_error("Invalid client_type");
-}
-
-static sstring to_string(client_connection_stage ccs) {
-    switch (ccs) {
-        case client_connection_stage::established: return connection_stage_literal<client_connection_stage::established>;
-        case client_connection_stage::authenticating: return connection_stage_literal<client_connection_stage::authenticating>;
-        case client_connection_stage::ready: return connection_stage_literal<client_connection_stage::ready>;
-    }
-    throw std::runtime_error("Invalid client_connection_stage");
 }

 future<> notify_new_client(client_data cd) {
    // FIXME: consider prepared statement
    const static sstring req
-            = format("INSERT INTO system.{} (address, port, client_type, connection_stage, shard_id, protocol_version, username) "
-                     "VALUES (?, ?, ?, ?, ?, ?, ?);", db::system_keyspace::CLIENTS);
+            = format("INSERT INTO system.{} (address, port, client_type, shard_id, protocol_version, username) "
+                     "VALUES (?, ?, ?, ?, ?, ?);", db::system_keyspace::CLIENTS);
    
-    return db::qctx->execute_cql(req,
-            std::move(cd.ip), cd.port, to_string(cd.ct), to_string(cd.connection_stage), cd.shard_id,
+    return db::execute_cql(req,
+            std::move(cd.ip), cd.port, to_string(cd.ct), cd.shard_id,
            cd.protocol_version.has_value() ? data_value(*cd.protocol_version) : data_value::make_null(int32_type),
            cd.username.value_or("anonymous")).discard_result();
 }

-future<> notify_disconnected_client(net::inet_address addr, int port, client_type ct) {
+future<> notify_disconnected_client(gms::inet_address addr, client_type ct, int port) {
    // FIXME: consider prepared statement
    const static sstring req
            = format("DELETE FROM system.{} where address=? AND port=? AND client_type=?;",
                     db::system_keyspace::CLIENTS);
-    return db::qctx->execute_cql(req, std::move(addr), port, to_string(ct)).discard_result();
+    return db::execute_cql(req, addr.addr(), port, to_string(ct)).discard_result();
 }

 future<> clear_clientlist() {
--- a/connection_notifier.hh
+++ b/connection_notifier.hh
@@ -20,65 +20,27 @@
 */
 #pragma once

-#include "db/query_context.hh"
-
-#include <seastar/net/inet_address.hh>
+#include "gms/inet_address.hh"
 #include <seastar/core/sstring.hh>
-#include "seastarx.hh"
-
 #include <optional>

-namespace db::system_keyspace {
-extern const char *const CLIENTS;
-}
-
 enum class client_type {
    cql = 0,
    thrift,
    alternator,
 };

-sstring to_string(client_type ct);
-
-enum class changed_column {
-    username = 0,
-    connection_stage,
-    driver_name,
-    driver_version,
-    hostname,
-    protocol_version,
-};
-
-template <changed_column column> constexpr const char* column_literal = "";
-template <> inline constexpr const char* column_literal<changed_column::username> = "username";
-template <> inline constexpr const char* column_literal<changed_column::connection_stage> = "connection_stage";
-template <> inline constexpr const char* column_literal<changed_column::driver_name> = "driver_name";
-template <> inline constexpr const char* column_literal<changed_column::driver_version> = "driver_version";
-template <> inline constexpr const char* column_literal<changed_column::hostname> = "hostname";
-template <> inline constexpr const char* column_literal<changed_column::protocol_version> = "protocol_version";
-
-enum class client_connection_stage {
-    established = 0,
-    authenticating,
-    ready,
-};
-
-template <client_connection_stage ccs> constexpr const char* connection_stage_literal = "";
-template <> inline constexpr const char* connection_stage_literal<client_connection_stage::established> = "ESTABLISHED";
-template <> inline constexpr const char* connection_stage_literal<client_connection_stage::authenticating> = "AUTHENTICATING";
-template <> inline constexpr const char* connection_stage_literal<client_connection_stage::ready> = "READY";
-
 // Representation of a row in `system.clients'. std::optionals are for nullable cells.
 struct client_data {
-    net::inet_address ip;
+    gms::inet_address ip;
    int32_t port;
    client_type ct;
-    client_connection_stage connection_stage = client_connection_stage::established;
    int32_t shard_id;  /// ID of server-side shard which is processing the connection.

    // `optional' column means that it's nullable (possibly because it's
    // unimplemented yet). If you want to fill ("implement") any of them,
    // remember to update the query in `notify_new_client()'.
+    std::optional<sstring> connection_stage;
    std::optional<sstring> driver_name;
    std::optional<sstring> driver_version;
    std::optional<sstring> hostname;
@@ -90,17 +52,6 @@ struct client_data {
 };

 future<> notify_new_client(client_data cd);
-future<> notify_disconnected_client(net::inet_address addr, int port, client_type ct);
+future<> notify_disconnected_client(gms::inet_address addr, client_type ct, int port);
+
 future<> clear_clientlist();
-
-template <changed_column column_enum_val>
-struct notify_client_change {
-    template <typename T>
-    future<> operator()(net::inet_address addr, int port, client_type ct, T&& value) {
-        const static sstring req
-                = format("UPDATE system.{} SET {}=? WHERE address=? AND port=? AND client_type=?;",
-                        db::system_keyspace::CLIENTS, column_literal<column_enum_val>);
-
-        return db::qctx->execute_cql(req, std::forward<T>(value), std::move(addr), port, to_string(ct)).discard_result();
-    }
-};
--- a/counters.hh
+++ b/counters.hh
@@ -277,14 +277,7 @@ public:
        return ac;
    }

-    class inserter_iterator {
-    public:
-        using iterator_category = std::output_iterator_tag;
-        using value_type = counter_shard;
-        using difference_type = std::ptrdiff_t;
-        using pointer = counter_shard*;
-        using reference = counter_shard&;
-    private:
+    class inserter_iterator : public std::iterator<std::output_iterator_tag, counter_shard> {
        counter_cell_builder* _builder;
    public:
        explicit inserter_iterator(counter_cell_builder& b) : _builder(&b) { }
@@ -318,14 +311,7 @@ protected:
    basic_atomic_cell_view<is_mutable> _cell;
    linearized_value_view _value;
 private:
-    class shard_iterator {
-    public:
-        using iterator_category = std::input_iterator_tag;
-        using value_type = basic_counter_shard_view<is_mutable>;
-        using difference_type = std::ptrdiff_t;
-        using pointer = basic_counter_shard_view<is_mutable>*;
-        using reference = basic_counter_shard_view<is_mutable>&;
-    private:
+    class shard_iterator : public std::iterator<std::input_iterator_tag, basic_counter_shard_view<is_mutable>> {
        pointer_type _current;
        basic_counter_shard_view<is_mutable> _current_view;
    public:
--- a/cql3/constants.hh
+++ b/cql3/constants.hh
@@ -192,9 +192,12 @@ public:

        virtual ::shared_ptr<terminal> bind(const query_options& options) override {
            auto bytes = bind_and_get(options);
-            if (!bytes) {
+            if (bytes.is_null()) {
                return ::shared_ptr<terminal>{};
            }
+            if (bytes.is_unset_value()) {
+                return UNSET_VALUE;
+            }
            return ::make_shared<constants::value>(std::move(cql3::raw_value::make_value(to_bytes(*bytes))));
        }
    };
--- a/cql3/expr/expression.cc
+++ b/cql3/expr/expression.cc
@@ -27,7 +27,9 @@
 #include <fmt/ostream.h>
 #include <unordered_map>

+#include "cql3/constants.hh"
 #include "cql3/lists.hh"
+#include "cql3/statements/request_validations.hh"
 #include "cql3/tuples.hh"
 #include "index/secondary_index_manager.hh"
 #include "types/list.hh"
@@ -417,6 +419,8 @@ bool is_one_of(const column_value& col, term& rhs, const column_value_eval_bag&
    } else if (auto mkr = dynamic_cast<lists::marker*>(&rhs)) {
        // This is `a IN ?`.  RHS elements are values representable as bytes_opt.
        const auto values = static_pointer_cast<lists::value>(mkr->bind(bag.options));
+        statements::request_validations::check_not_null(
+                values, "Invalid null value for column %s", col.col->name_as_text());
        return boost::algorithm::any_of(values->get_elements(), [&] (const bytes_opt& b) {
                return equal(b, col, bag);
            });
@@ -568,7 +572,8 @@ const auto deref = boost::adaptors::transformed([] (const bytes_opt& b) { return

 /// Returns possible values from t, which must be RHS of IN.
 value_list get_IN_values(
-        const ::shared_ptr<term>& t, const query_options& options, const serialized_compare& comparator) {
+        const ::shared_ptr<term>& t, const query_options& options, const serialized_compare& comparator,
+        sstring_view column_name) {
    // RHS is prepared differently for different CQL cases.  Cast it dynamically to discern which case this is.
    if (auto dv = dynamic_pointer_cast<lists::delayed_value>(t)) {
        // Case `a IN (1,2,3)`.
@@ -578,8 +583,12 @@ value_list get_IN_values(
        return to_sorted_vector(std::move(result_range), comparator);
    } else if (auto mkr = dynamic_pointer_cast<lists::marker>(t)) {
        // Case `a IN ?`.  Collect all list-element values.
-        const auto val = static_pointer_cast<lists::value>(mkr->bind(options));
-        return to_sorted_vector(val->get_elements() | non_null | deref, comparator);
+        const auto val = mkr->bind(options);
+        if (val == constants::UNSET_VALUE) {
+            throw exceptions::invalid_request_exception(format("Invalid unset value for column {}", column_name));
+        }
+        statements::request_validations::check_not_null(val, "Invalid null value for IN tuple");
+        return to_sorted_vector(static_pointer_cast<lists::value>(val)->get_elements() | non_null | deref, comparator);
    }
    throw std::logic_error(format("get_IN_values(single column) on invalid term {}", *t));
 }
@@ -610,13 +619,13 @@ static constexpr bool inclusive = true, exclusive = false;
 nonwrapping_range<bytes> to_range(oper_t op, const bytes& val) {
    switch (op) {
    case oper_t::GT:
-        return nonwrapping_range<bytes>::make_starting_with(interval_bound(val, exclusive));
+        return nonwrapping_range<bytes>::make_starting_with(range_bound(val, exclusive));
    case oper_t::GTE:
-        return nonwrapping_range<bytes>::make_starting_with(interval_bound(val, inclusive));
+        return nonwrapping_range<bytes>::make_starting_with(range_bound(val, inclusive));
    case oper_t::LT:
-        return nonwrapping_range<bytes>::make_ending_with(interval_bound(val, exclusive));
+        return nonwrapping_range<bytes>::make_ending_with(range_bound(val, exclusive));
    case oper_t::LTE:
-        return nonwrapping_range<bytes>::make_ending_with(interval_bound(val, inclusive));
+        return nonwrapping_range<bytes>::make_ending_with(range_bound(val, inclusive));
    default:
        throw std::logic_error(format("to_range: unknown comparison operator {}", op));
    }
@@ -686,7 +695,7 @@ value_set possible_lhs_values(const column_definition* cdef, const expression& e
                                return oper.op == oper_t::EQ ? value_set(value_list{*val})
                                        : to_range(oper.op, *val);
                            } else if (oper.op == oper_t::IN) {
-                                return get_IN_values(oper.rhs, options, type->as_less_comparator());
+                                return get_IN_values(oper.rhs, options, type->as_less_comparator(), cdef->name_as_text());
                            }
                            throw std::logic_error(format("possible_lhs_values: unhandled operator {}", oper));
                        },
@@ -731,9 +740,9 @@ value_set possible_lhs_values(const column_definition* cdef, const expression& e
                            if (oper.op == oper_t::EQ) {
                                return value_list{*val};
                            } else if (oper.op == oper_t::GT) {
-                                return nonwrapping_range<bytes>::make_starting_with(interval_bound(*val, exclusive));
+                                return nonwrapping_range<bytes>::make_starting_with(range_bound(*val, exclusive));
                            } else if (oper.op == oper_t::GTE) {
-                                return nonwrapping_range<bytes>::make_starting_with(interval_bound(*val, inclusive));
+                                return nonwrapping_range<bytes>::make_starting_with(range_bound(*val, inclusive));
                            }
                            static const bytes MININT = serialized(std::numeric_limits<int64_t>::min()),
                                    MAXINT = serialized(std::numeric_limits<int64_t>::max());
@@ -741,9 +750,9 @@ value_set possible_lhs_values(const column_definition* cdef, const expression& e
                            // that as MAXINT for some reason.
                            const auto adjusted_val = (*val == MININT) ? serialized(MAXINT) : *val;
                            if (oper.op == oper_t::LT) {
-                                return nonwrapping_range<bytes>::make_ending_with(interval_bound(adjusted_val, exclusive));
+                                return nonwrapping_range<bytes>::make_ending_with(range_bound(adjusted_val, exclusive));
                            } else if (oper.op == oper_t::LTE) {
-                                return nonwrapping_range<bytes>::make_ending_with(interval_bound(adjusted_val, inclusive));
+                                return nonwrapping_range<bytes>::make_ending_with(range_bound(adjusted_val, inclusive));
                            }
                            throw std::logic_error(format("get_token_interval invalid operator {}", oper.op));
                        },
--- a/cql3/functions/functions.cc
+++ b/cql3/functions/functions.cc
@@ -76,7 +76,7 @@ functions::init() noexcept {
    // that has less information in it. Given how unlikely it is that
    // we will run out of memory this early, having a better core dump
    // if we do seems like a good trade-off.
-    memory::scoped_critical_alloc_section dfg;
+    memory::disable_failure_guard dfg;

    std::unordered_multimap<function_name, shared_ptr<function>> ret;
    auto declare = [&ret] (shared_ptr<function> f) { ret.emplace(f->name(), f); };
--- a/cql3/maps.cc
+++ b/cql3/maps.cc
@@ -305,6 +305,12 @@ maps::setter_by_key::execute(mutation& m, const clustering_key_prefix& prefix, c
    assert(column.type->is_multi_cell()); // "Attempted to set a value for a single key on a frozen map"m
    auto key = _k->bind_and_get(params._options);
    auto value = _t->bind_and_get(params._options);
+    if (value.is_unset_value()) {
+        return;
+    }
+    if (key.is_unset_value() || value.is_unset_value()) {
+        throw invalid_request_exception("Invalid unset map key");
+    }
    if (!key) {
        throw invalid_request_exception("Invalid null map key");
    }
--- a/cql3/query_options.cc
+++ b/cql3/query_options.cc
@@ -50,11 +50,12 @@ const cql_config default_cql_config;
 thread_local const query_options::specific_options query_options::specific_options::DEFAULT{-1, {}, {}, api::missing_timestamp};

 thread_local query_options query_options::DEFAULT{default_cql_config,
-    db::consistency_level::ONE, std::nullopt,
+    db::consistency_level::ONE, infinite_timeout_config, std::nullopt,
    std::vector<cql3::raw_value_view>(), false, query_options::specific_options::DEFAULT, cql_serialization_format::latest()};

 query_options::query_options(const cql_config& cfg,
                           db::consistency_level consistency,
+                           const ::timeout_config& timeout_config,
                           std::optional<std::vector<sstring_view>> names,
                           std::vector<cql3::raw_value> values,
                           std::vector<cql3::raw_value_view> value_views,
@@ -63,6 +64,7 @@ query_options::query_options(const cql_config& cfg,
                           cql_serialization_format sf)
   : _cql_config(cfg)
   , _consistency(consistency)
+   , _timeout_config(timeout_config)
   , _names(std::move(names))
   , _values(std::move(values))
   , _value_views(value_views)
@@ -74,6 +76,7 @@ query_options::query_options(const cql_config& cfg,

 query_options::query_options(const cql_config& cfg,
                             db::consistency_level consistency,
+                             const ::timeout_config& timeout_config,
                             std::optional<std::vector<sstring_view>> names,
                             std::vector<cql3::raw_value> values,
                             bool skip_metadata,
@@ -81,6 +84,7 @@ query_options::query_options(const cql_config& cfg,
                             cql_serialization_format sf)
    : _cql_config(cfg)
    , _consistency(consistency)
+    , _timeout_config(timeout_config)
    , _names(std::move(names))
    , _values(std::move(values))
    , _value_views()
@@ -93,6 +97,7 @@ query_options::query_options(const cql_config& cfg,

 query_options::query_options(const cql_config& cfg,
                             db::consistency_level consistency,
+                             const ::timeout_config& timeout_config,
                             std::optional<std::vector<sstring_view>> names,
                             std::vector<cql3::raw_value_view> value_views,
                             bool skip_metadata,
@@ -100,6 +105,7 @@ query_options::query_options(const cql_config& cfg,
                             cql_serialization_format sf)
    : _cql_config(cfg)
    , _consistency(consistency)
+    , _timeout_config(timeout_config)
    , _names(std::move(names))
    , _values()
    , _value_views(std::move(value_views))
@@ -109,11 +115,12 @@ query_options::query_options(const cql_config& cfg,
 {
 }

-query_options::query_options(db::consistency_level cl, std::vector<cql3::raw_value> values,
+query_options::query_options(db::consistency_level cl, const ::timeout_config& timeout_config, std::vector<cql3::raw_value> values,
        specific_options options)
    : query_options(
          default_cql_config,
          cl,
+          timeout_config,
          {},
          std::move(values),
          false,
@@ -126,6 +133,7 @@ query_options::query_options(db::consistency_level cl, std::vector<cql3::raw_val
 query_options::query_options(std::unique_ptr<query_options> qo, lw_shared_ptr<service::pager::paging_state> paging_state)
        : query_options(qo->_cql_config,
        qo->_consistency,
+        qo->get_timeout_config(),
        std::move(qo->_names),
        std::move(qo->_values),
        std::move(qo->_value_views),
@@ -138,6 +146,7 @@ query_options::query_options(std::unique_ptr<query_options> qo, lw_shared_ptr<se
 query_options::query_options(std::unique_ptr<query_options> qo, lw_shared_ptr<service::pager::paging_state> paging_state, int32_t page_size)
        : query_options(qo->_cql_config,
        qo->_consistency,
+        qo->get_timeout_config(),
        std::move(qo->_names),
        std::move(qo->_values),
        std::move(qo->_value_views),
@@ -149,7 +158,7 @@ query_options::query_options(std::unique_ptr<query_options> qo, lw_shared_ptr<se

 query_options::query_options(std::vector<cql3::raw_value> values)
    : query_options(
-          db::consistency_level::ONE, std::move(values))
+          db::consistency_level::ONE, infinite_timeout_config, std::move(values))
 {}

 void query_options::prepare(const std::vector<lw_shared_ptr<column_specification>>& specs)
--- a/cql3/query_options.hh
+++ b/cql3/query_options.hh
@@ -51,6 +51,7 @@
 #include "cql3/column_identifier.hh"
 #include "cql3/values.hh"
 #include "cql_serialization_format.hh"
+#include "timeout_config.hh"

 namespace cql3 {

@@ -74,6 +75,7 @@ public:
 private:
    const cql_config& _cql_config;
    const db::consistency_level _consistency;
+    const timeout_config& _timeout_config;
    const std::optional<std::vector<sstring_view>> _names;
    std::vector<cql3::raw_value> _values;
    std::vector<cql3::raw_value_view> _value_views;
@@ -107,6 +109,7 @@ public:

    explicit query_options(const cql_config& cfg,
                           db::consistency_level consistency,
+                           const timeout_config& timeouts,
                           std::optional<std::vector<sstring_view>> names,
                           std::vector<cql3::raw_value> values,
                           bool skip_metadata,
@@ -114,6 +117,7 @@ public:
                           cql_serialization_format sf);
    explicit query_options(const cql_config& cfg,
                           db::consistency_level consistency,
+                           const timeout_config& timeouts,
                           std::optional<std::vector<sstring_view>> names,
                           std::vector<cql3::raw_value> values,
                           std::vector<cql3::raw_value_view> value_views,
@@ -122,6 +126,7 @@ public:
                           cql_serialization_format sf);
    explicit query_options(const cql_config& cfg,
                           db::consistency_level consistency,
+                           const timeout_config& timeouts,
                           std::optional<std::vector<sstring_view>> names,
                           std::vector<cql3::raw_value_view> value_views,
                           bool skip_metadata,
@@ -153,10 +158,13 @@ public:

    // forInternalUse
    explicit query_options(std::vector<cql3::raw_value> values);
-    explicit query_options(db::consistency_level, std::vector<cql3::raw_value> values, specific_options options = specific_options::DEFAULT);
+    explicit query_options(db::consistency_level, const timeout_config& timeouts,
+            std::vector<cql3::raw_value> values, specific_options options = specific_options::DEFAULT);
    explicit query_options(std::unique_ptr<query_options>, lw_shared_ptr<service::pager::paging_state> paging_state);
    explicit query_options(std::unique_ptr<query_options>, lw_shared_ptr<service::pager::paging_state> paging_state, int32_t page_size);

+    const timeout_config& get_timeout_config() const { return _timeout_config; }
+
    db::consistency_level get_consistency() const {
        return _consistency;
    }
@@ -250,7 +258,7 @@ query_options::query_options(query_options&& o, std::vector<OneMutationDataRange
    std::vector<query_options> tmp;
    tmp.reserve(values_ranges.size());
    std::transform(values_ranges.begin(), values_ranges.end(), std::back_inserter(tmp), [this](auto& values_range) {
-        return query_options(_cql_config, _consistency, {}, std::move(values_range), _skip_metadata, _options, _cql_serialization_format);
+        return query_options(_cql_config, _consistency, _timeout_config, {}, std::move(values_range), _skip_metadata, _options, _cql_serialization_format);
    });
    _batch_options = std::move(tmp);
 }
--- a/cql3/query_processor.cc
+++ b/cql3/query_processor.cc
@@ -61,6 +61,8 @@ logging::logger log("query_processor");
 logging::logger prep_cache_log("prepared_statements_cache");
 logging::logger authorized_prepared_statements_cache_log("authorized_prepared_statements_cache");

+distributed<query_processor> _the_query_processor;
+
 const sstring query_processor::CQL_VERSION = "3.3.1";

 const std::chrono::minutes prepared_statements_cache::entry_expiry = std::chrono::minutes(60);
@@ -619,6 +621,7 @@ query_options query_processor::make_internal_options(
        const statements::prepared_statement::checked_weak_ptr& p,
        const std::initializer_list<data_value>& values,
        db::consistency_level cl,
+        const timeout_config& timeout_config,
        int32_t page_size) const {
    if (p->bound_names.size() != values.size()) {
        throw std::invalid_argument(
@@ -642,10 +645,11 @@ query_options query_processor::make_internal_options(
        api::timestamp_type ts = api::missing_timestamp;
        return query_options(
                cl,
+                timeout_config,
                bound_values,
                cql3::query_options::specific_options{page_size, std::move(paging_state), serial_consistency, ts});
    }
-    return query_options(cl, bound_values);
+    return query_options(cl, timeout_config, bound_values);
 }

 statements::prepared_statement::checked_weak_ptr query_processor::prepare_internal(const sstring& query_string) {
@@ -669,7 +673,7 @@ struct internal_query_state {
 ::shared_ptr<internal_query_state> query_processor::create_paged_state(const sstring& query_string,
        const std::initializer_list<data_value>& values, int32_t page_size) {
    auto p = prepare_internal(query_string);
-    auto opts = make_internal_options(p, values, db::consistency_level::ONE, page_size);
+    auto opts = make_internal_options(p, values, db::consistency_level::ONE, infinite_timeout_config, page_size);
    ::shared_ptr<internal_query_state> res = ::make_shared<internal_query_state>(
            internal_query_state{
                    query_string,
@@ -787,16 +791,7 @@ future<::shared_ptr<untyped_result_set>>
 query_processor::execute_internal(
        const sstring& query_string,
        db::consistency_level cl,
-        const std::initializer_list<data_value>& values,
-        bool cache) {
-    return execute_internal(query_string, cl, *_internal_state, values, cache);
-}
-
-future<::shared_ptr<untyped_result_set>>
-query_processor::execute_internal(
-        const sstring& query_string,
-        db::consistency_level cl,
-        service::query_state& query_state,
+        const timeout_config& timeout_config,
        const std::initializer_list<data_value>& values,
        bool cache) {

@@ -804,13 +799,13 @@ query_processor::execute_internal(
        log.trace("execute_internal: {}\"{}\" ({})", cache ? "(cached) " : "", query_string, ::join(", ", values));
    }
    if (cache) {
-        return execute_with_params(prepare_internal(query_string), cl, query_state, values);
+        return execute_with_params(prepare_internal(query_string), cl, timeout_config, values);
    } else {
        auto p = parse_statement(query_string)->prepare(_db, _cql_stats);
        p->statement->raw_cql_statement = query_string;
        p->statement->validate(_proxy, *_internal_state);
        auto checked_weak_ptr = p->checked_weak_from_this();
-        return execute_with_params(std::move(checked_weak_ptr), cl, query_state, values).finally([p = std::move(p)] {});
+        return execute_with_params(std::move(checked_weak_ptr), cl, timeout_config, values).finally([p = std::move(p)] {});
    }
 }

@@ -818,11 +813,11 @@ future<::shared_ptr<untyped_result_set>>
 query_processor::execute_with_params(
        statements::prepared_statement::checked_weak_ptr p,
        db::consistency_level cl,
-        service::query_state& query_state,
+        const timeout_config& timeout_config,
        const std::initializer_list<data_value>& values) {
-    auto opts = make_internal_options(p, values, cl);
-    return do_with(std::move(opts), [this, &query_state, p = std::move(p)](auto & opts) {
-        return p->statement->execute(_proxy, query_state, opts).then([](auto msg) {
+    auto opts = make_internal_options(p, values, cl, timeout_config);
+    return do_with(std::move(opts), [this, p = std::move(p)](auto & opts) {
+        return p->statement->execute(_proxy, *_internal_state, opts).then([](auto msg) {
            return make_ready_future<::shared_ptr<untyped_result_set>>(::make_shared<untyped_result_set>(msg));
        });
    });
--- a/cql3/query_processor.hh
+++ b/cql3/query_processor.hh
@@ -215,7 +215,8 @@ public:
    // creating namespaces, etc) is explicitly forbidden via this interface.
    future<::shared_ptr<untyped_result_set>>
    execute_internal(const sstring& query_string, const std::initializer_list<data_value>& values = { }) {
-        return execute_internal(query_string, db::consistency_level::ONE, values, true);
+        return execute_internal(query_string, db::consistency_level::ONE,
+                infinite_timeout_config, values, true);
    }

    statements::prepared_statement::checked_weak_ptr prepare_internal(const sstring& query);
@@ -304,19 +305,14 @@ public:
    future<::shared_ptr<untyped_result_set>> execute_internal(
            const sstring& query_string,
            db::consistency_level,
-            const std::initializer_list<data_value>& = { },
-            bool cache = false);
-    future<::shared_ptr<untyped_result_set>> execute_internal(
-            const sstring& query_string,
-            db::consistency_level,
-            service::query_state& query_state,
+            const timeout_config& timeout_config,
            const std::initializer_list<data_value>& = { },
            bool cache = false);

    future<::shared_ptr<untyped_result_set>> execute_with_params(
            statements::prepared_statement::checked_weak_ptr p,
            db::consistency_level,
-            service::query_state& query_state,
+            const timeout_config& timeout_config,
            const std::initializer_list<data_value>& = { });

    future<::shared_ptr<cql_transport::messages::result_message::prepared>>
@@ -345,6 +341,7 @@ private:
            const statements::prepared_statement::checked_weak_ptr& p,
            const std::initializer_list<data_value>&,
            db::consistency_level,
+            const timeout_config& timeout_config,
            int32_t page_size = -1) const;

    future<::shared_ptr<cql_transport::messages::result_message>>
@@ -467,4 +464,14 @@ private:
            ::shared_ptr<cql_statement> statement);
 };

+extern seastar::sharded<query_processor> _the_query_processor;
+
+inline seastar::sharded<query_processor>& get_query_processor() {
+    return _the_query_processor;
+}
+
+inline query_processor& get_local_query_processor() {
+    return _the_query_processor.local();
+}
+
 }
--- a/cql3/restrictions/statement_restrictions.cc
+++ b/cql3/restrictions/statement_restrictions.cc
@@ -193,12 +193,12 @@ statement_restrictions::statement_restrictions(database& db,
    const expr::allow_local_index allow_local(
            !_partition_key_restrictions->has_unrestricted_components(*_schema)
            && _partition_key_restrictions->is_all_eq());
-    _has_queriable_ck_index = _clustering_columns_restrictions->has_supporting_index(sim, allow_local);
-    _has_queriable_pk_index = _partition_key_restrictions->has_supporting_index(sim, allow_local);
-    _has_queriable_regular_index = _nonprimary_key_restrictions->has_supporting_index(sim, allow_local);
+    const bool has_queriable_clustering_column_index = _clustering_columns_restrictions->has_supporting_index(sim, allow_local);
+    const bool has_queriable_pk_index = _partition_key_restrictions->has_supporting_index(sim, allow_local);
+    const bool has_queriable_regular_index = _nonprimary_key_restrictions->has_supporting_index(sim, allow_local);

    // At this point, the select statement if fully constructed, but we still have a few things to validate
-    process_partition_key_restrictions(for_view, allow_filtering);
+    process_partition_key_restrictions(has_queriable_pk_index, for_view, allow_filtering);

    // Some but not all of the partition key columns have been specified;
    // hence we need turn these restrictions into index expressions.
@@ -227,10 +227,10 @@ statement_restrictions::statement_restrictions(database& db,
        }
    }

-    process_clustering_columns_restrictions(select_a_collection, for_view, allow_filtering);
+    process_clustering_columns_restrictions(has_queriable_clustering_column_index, select_a_collection, for_view, allow_filtering);

    // Covers indexes on the first clustering column (among others).
-    if (_is_key_range && _has_queriable_ck_index) {
+    if (_is_key_range && has_queriable_clustering_column_index) {
        _uses_secondary_indexing = true;
    }

@@ -265,7 +265,7 @@ statement_restrictions::statement_restrictions(database& db,
    }

    if (!_nonprimary_key_restrictions->empty()) {
-        if (_has_queriable_regular_index) {
+        if (has_queriable_regular_index) {
            _uses_secondary_indexing = true;
        } else if (!allow_filtering) {
            throw exceptions::invalid_request_exception("Cannot execute this query as it might involve data filtering and "
@@ -401,7 +401,7 @@ std::vector<const column_definition*> statement_restrictions::get_column_defs_fo
    return column_defs_for_filtering;
 }

-void statement_restrictions::process_partition_key_restrictions(bool for_view, bool allow_filtering) {
+void statement_restrictions::process_partition_key_restrictions(bool has_queriable_index, bool for_view, bool allow_filtering) {
    // If there is a queriable index, no special condition are required on the other restrictions.
    // But we still need to know 2 things:
    // - If we don't have a queriable index, is the query ok
@@ -412,17 +412,17 @@ void statement_restrictions::process_partition_key_restrictions(bool for_view, b
        _is_key_range = true;
    } else if (_partition_key_restrictions->empty()) {
        _is_key_range = true;
-        _uses_secondary_indexing = _has_queriable_pk_index;
+        _uses_secondary_indexing = has_queriable_index;
    }

    if (_partition_key_restrictions->needs_filtering(*_schema)) {
-        if (!allow_filtering && !for_view && !_has_queriable_pk_index) {
+        if (!allow_filtering && !for_view && !has_queriable_index) {
            throw exceptions::invalid_request_exception("Cannot execute this query as it might involve data filtering and "
                "thus may have unpredictable performance. If you want to execute "
                "this query despite the performance unpredictability, use ALLOW FILTERING");
        }
        _is_key_range = true;
-        _uses_secondary_indexing = _has_queriable_pk_index;
+        _uses_secondary_indexing = has_queriable_index;
    }

 }
@@ -435,7 +435,7 @@ bool statement_restrictions::has_unrestricted_clustering_columns() const {
    return _clustering_columns_restrictions->has_unrestricted_components(*_schema);
 }

-void statement_restrictions::process_clustering_columns_restrictions(bool select_a_collection, bool for_view, bool allow_filtering) {
+void statement_restrictions::process_clustering_columns_restrictions(bool has_queriable_index, bool select_a_collection, bool for_view, bool allow_filtering) {
    if (!has_clustering_columns_restriction()) {
        return;
    }
@@ -445,13 +445,13 @@ void statement_restrictions::process_clustering_columns_restrictions(bool select
            "Cannot restrict clustering columns by IN relations when a collection is selected by the query");
    }
    if (find_atom(_clustering_columns_restrictions->expression, expr::is_on_collection)
-        && !_has_queriable_ck_index && !allow_filtering) {
+        && !has_queriable_index && !allow_filtering) {
        throw exceptions::invalid_request_exception(
            "Cannot restrict clustering columns by a CONTAINS relation without a secondary index or filtering");
    }

    if (has_clustering_columns_restriction() && _clustering_columns_restrictions->needs_filtering(*_schema)) {
-        if (_has_queriable_ck_index) {
+        if (has_queriable_index) {
            _uses_secondary_indexing = true;
        } else if (!allow_filtering && !for_view) {
            auto clustering_columns_iter = _schema->clustering_key_columns().begin();
@@ -490,62 +490,24 @@ std::vector<query::clustering_range> statement_restrictions::get_clustering_boun
    return _clustering_columns_restrictions->bounds_ranges(options);
 }

-namespace {
-
-/// True iff get_partition_slice_for_global_index_posting_list() will be able to calculate the token value from the
-/// given restrictions.  Keep in sync with the get_partition_slice_for_global_index_posting_list() source.
-bool token_known(const statement_restrictions& r) {
-    return !r.has_partition_key_unrestricted_components() && r.get_partition_key_restrictions()->is_all_eq();
-}
-
-} // anonymous namespace
-
 bool statement_restrictions::need_filtering() const {
-    using namespace expr;
+    uint32_t number_of_restricted_columns_for_indexing = 0;
+    for (auto&& restrictions : _index_restrictions) {
+        number_of_restricted_columns_for_indexing += restrictions->size();
+    }

-    const auto npart = _partition_key_restrictions->size();
-    if (npart > 0 && npart < _schema->partition_key_size()) {
-        // Can't calculate the token value, so a naive base-table query must be filtered.  Same for any index tables,
-        // except if there's only one restriction supported by an index.
-        return !(npart == 1 && _has_queriable_pk_index &&
-                 _clustering_columns_restrictions->empty() && _nonprimary_key_restrictions->empty());
+    int number_of_filtering_restrictions = _nonprimary_key_restrictions->size();
+    // If the whole partition key is restricted, it does not imply filtering
+    if (_partition_key_restrictions->has_unrestricted_components(*_schema) || !_partition_key_restrictions->is_all_eq()) {
+        number_of_filtering_restrictions += _partition_key_restrictions->size() + _clustering_columns_restrictions->size();
+    } else if (_clustering_columns_restrictions->has_unrestricted_components(*_schema)) {
+        number_of_filtering_restrictions += _clustering_columns_restrictions->size() - _clustering_columns_restrictions->prefix_size();
    }
-    if (_partition_key_restrictions->needs_filtering(*_schema)) {
-        // We most likely cannot calculate token(s).  Neither base-table nor index-table queries can avoid filtering.
-        return true;
-    }
-    // Now we know the partition key is either unrestricted or fully restricted.
-
-    const auto nreg = _nonprimary_key_restrictions->size();
-    if (nreg > 1 || (nreg == 1 && !_has_queriable_regular_index)) {
-        return true; // Regular columns are unsorted in storage and no single index suffices.
-    }
-    if (nreg == 1) { // Single non-key restriction supported by an index.
-        // Will the index-table query require filtering?  That depends on whether its clustering key is restricted to a
-        // continuous range.  Recall that this clustering key is (token, pk, ck) of the base table.
-        if (npart == 0 && _clustering_columns_restrictions->empty()) {
-            return false; // No clustering key restrictions => whole partitions.
-        }
-        return !token_known(*this) || _clustering_columns_restrictions->needs_filtering(*_schema);
-    }
-    // Now we know there are no nonkey restrictions.
-
-    if (dynamic_pointer_cast<multi_column_restriction>(_clustering_columns_restrictions)) {
-        // Multicolumn bounds mean lexicographic order, implying a continuous clustering range.  Multicolumn IN means a
-        // finite set of continuous ranges.  Multicolumn restrictions cannot currently be combined with single-column
-        // clustering restrictions.  Therefore, a continuous clustering range is guaranteed.
-        return false;
-    }
-    if (!_clustering_columns_restrictions->needs_filtering(*_schema)) { // Guaranteed continuous clustering range.
-        return false;
-    }
-    // Now we know there are some clustering-column restrictions that are out-of-order or not EQ.  A naive base-table
-    // query must be filtered.  What about an index-table query?  That can only avoid filtering if there is exactly one
-    // EQ supported by an index.
-    return !(_clustering_columns_restrictions->size() == 1 && _has_queriable_ck_index);
-
-    // TODO: it is also possible to avoid filtering here if a non-empty CK prefix is specified and token_known, plus
-    // there's exactly one out-of-order-but-index-supported clustering-column restriction.
+    return number_of_restricted_columns_for_indexing > 1
+            || (number_of_restricted_columns_for_indexing == 0 && _partition_key_restrictions->empty() && !_clustering_columns_restrictions->empty())
+            || (number_of_restricted_columns_for_indexing != 0 && _nonprimary_key_restrictions->has_multiple_contains())
+            || (number_of_restricted_columns_for_indexing != 0 && !_uses_secondary_indexing)
+            || (_uses_secondary_indexing && number_of_filtering_restrictions > 1);
 }

 void statement_restrictions::validate_secondary_index_selections(bool selects_only_static_columns) {
--- a/cql3/restrictions/statement_restrictions.hh
+++ b/cql3/restrictions/statement_restrictions.hh
@@ -102,8 +102,6 @@ private:
     */
    bool _is_key_range = false;

-    bool _has_queriable_regular_index = false, _has_queriable_pk_index = false, _has_queriable_ck_index = false;
-
 public:
    /**
     * Creates a new empty <code>StatementRestrictions</code>.
@@ -211,7 +209,7 @@ public:
     */
    bool has_unrestricted_clustering_columns() const;
 private:
-    void process_partition_key_restrictions(bool for_view, bool allow_filtering);
+    void process_partition_key_restrictions(bool has_queriable_index, bool for_view, bool allow_filtering);

    /**
     * Processes the clustering column restrictions.
@@ -220,7 +218,7 @@ private:
     * @param select_a_collection <code>true</code> if the query should return a collection column
     * @throws InvalidRequestException if the request is invalid
     */
-    void process_clustering_columns_restrictions(bool select_a_collection, bool for_view, bool allow_filtering);
+    void process_clustering_columns_restrictions(bool has_queriable_index, bool select_a_collection, bool for_view, bool allow_filtering);

    /**
     * Returns the <code>Restrictions</code> for the specified type of columns.
--- a/cql3/sets.cc
+++ b/cql3/sets.cc
@@ -315,7 +315,7 @@ sets::discarder::execute(mutation& m, const clustering_key_prefix& row_key, cons
    assert(column.type->is_multi_cell()); // "Attempted to remove items from a frozen set";

    auto&& value = _t->bind(params._options);
-    if (!value) {
+    if (!value || value == constants::UNSET_VALUE) {
        return;
    }

--- a/cql3/statements/alter_keyspace_statement.cc
+++ b/cql3/statements/alter_keyspace_statement.cc
@@ -93,7 +93,7 @@ void cql3::statements::alter_keyspace_statement::validate(service::storage_proxy

 future<shared_ptr<cql_transport::event::schema_change>> cql3::statements::alter_keyspace_statement::announce_migration(service::storage_proxy& proxy, bool is_local_only) const {
    auto old_ksm = proxy.get_db().local().find_keyspace(_name).metadata();
-    const auto& tm = *proxy.get_token_metadata_ptr();
+    const auto& tm = proxy.get_token_metadata();
    return service::get_local_migration_manager().announce_keyspace_update(_attrs->as_ks_metadata_update(old_ksm, tm), is_local_only).then([this] {
        using namespace cql_transport;
        return ::make_shared<event::schema_change>(
--- a/cql3/statements/alter_table_statement.cc
+++ b/cql3/statements/alter_table_statement.cc
@@ -70,9 +70,7 @@ alter_table_statement::alter_table_statement(shared_ptr<cf_name> name,
 }

 future<> alter_table_statement::check_access(service::storage_proxy& proxy, const service::client_state& state) const {
-    using cdt = auth::command_desc::type;
-    return state.has_column_family_access(keyspace(), column_family(), auth::permission::ALTER,
-                                          _type == type::opts ? cdt::ALTER_WITH_OPTS : cdt::OTHER);
+    return state.has_column_family_access(keyspace(), column_family(), auth::permission::ALTER);
 }

 void alter_table_statement::validate(service::storage_proxy& proxy, const service::client_state& state) const
--- a/cql3/statements/batch_statement.cc
+++ b/cql3/statements/batch_statement.cc
@@ -38,7 +38,6 @@
 */

 #include "batch_statement.hh"
-#include "cql3/util.hh"
 #include "raw/batch_statement.hh"
 #include "db/config.hh"
 #include "db/consistency_level_validations.hh"
@@ -260,7 +259,6 @@ static thread_local inheriting_concrete_execution_stage<

 future<shared_ptr<cql_transport::messages::result_message>> batch_statement::execute(
        service::storage_proxy& storage, service::query_state& state, const query_options& options) const {
-    cql3::util::validate_timestamp(options, _attrs);
    return batch_stage(this, seastar::ref(storage), seastar::ref(state),
                       seastar::cref(options), false, options.get_timestamp(state));
 }
@@ -286,7 +284,7 @@ future<shared_ptr<cql_transport::messages::result_message>> batch_statement::do_
    ++_stats.batches;
    _stats.statements_in_batches += _statements.size();

-    auto timeout = db::timeout_clock::now() + query_state.get_client_state().get_timeout_config().*get_timeout_config_selector();
+    auto timeout = db::timeout_clock::now() + options.get_timeout_config().*get_timeout_config_selector();
    return get_mutations(storage, options, timeout, local, now, query_state).then([this, &storage, &options, timeout, tr_state = query_state.get_trace_state(),
                                                                                                                               permit = query_state.get_permit()] (std::vector<mutation> ms) mutable {
        return execute_without_conditions(storage, std::move(ms), options.get_consistency(), timeout, std::move(tr_state), std::move(permit));
@@ -343,7 +341,7 @@ future<shared_ptr<cql_transport::messages::result_message>> batch_statement::exe
    schema_ptr schema;

    db::timeout_clock::time_point now = db::timeout_clock::now();
-    const timeout_config& cfg = qs.get_client_state().get_timeout_config();
+    const timeout_config& cfg = options.get_timeout_config();
    auto batch_timeout = now + cfg.write_timeout; // Statement timeout.
    auto cas_timeout = now + cfg.cas_timeout;     // Ballot contention timeout.
    auto read_timeout = now + cfg.read_timeout;   // Query timeout.
--- a/cql3/statements/create_index_statement.cc
+++ b/cql3/statements/create_index_statement.cc
@@ -306,6 +306,13 @@ create_index_statement::announce_migration(service::storage_proxy& proxy, bool i
                    format("Index {} is a duplicate of existing index {}", index.name(), existing_index.value().name()));
        }
    }
+    auto index_table_name = secondary_index::index_table_name(accepted_name);
+    if (db.has_schema(keyspace(), index_table_name)) {
+        return make_exception_future<::shared_ptr<cql_transport::event::schema_change>>(
+            exceptions::invalid_request_exception(format("Index {} cannot be created, because table {} already exists",
+                accepted_name, index_table_name))
+        );
+    }
    ++_cql_stats->secondary_index_creates;
    schema_builder builder{schema};
    builder.with_index(index);
--- a/cql3/statements/create_keyspace_statement.cc
+++ b/cql3/statements/create_keyspace_statement.cc
@@ -109,7 +109,7 @@ void create_keyspace_statement::validate(service::storage_proxy&, const service:
 future<shared_ptr<cql_transport::event::schema_change>> create_keyspace_statement::announce_migration(service::storage_proxy& proxy, bool is_local_only) const
 {
    return make_ready_future<>().then([this, p = proxy.shared_from_this(), is_local_only] {
-        const auto& tm = *p->get_token_metadata_ptr();
+        const auto& tm = p->get_token_metadata();
        return service::get_local_migration_manager().announce_new_keyspace(_attrs->as_ks_metadata(_name, tm), is_local_only);
    }).then_wrapped([this] (auto&& f) {
        try {
@@ -147,7 +147,7 @@ future<> cql3::statements::create_keyspace_statement::grant_permissions_to_creat
 future<::shared_ptr<messages::result_message>>
 create_keyspace_statement::execute(service::storage_proxy& proxy, service::query_state& state, const query_options& options) const {
    return schema_altering_statement::execute(proxy, state, options).then([this, p = proxy.shared_from_this()] (::shared_ptr<messages::result_message> msg) {
-        bool multidc = p->get_token_metadata_ptr()->get_topology().get_datacenter_endpoints().size() > 1;
+        bool multidc = p->get_token_metadata().get_topology().get_datacenter_endpoints().size() > 1;
        bool simple = _attrs->get_replication_strategy_class() == "SimpleStrategy";

        if (multidc && simple) {
--- a/cql3/statements/create_table_statement.cc
+++ b/cql3/statements/create_table_statement.cc
@@ -204,7 +204,6 @@ std::unique_ptr<prepared_statement> create_table_statement::raw_statement::prepa
    }

    _properties.validate(db, _properties.properties()->make_schema_extensions(db.extensions()));
-    const bool has_default_ttl = _properties.properties()->get_default_time_to_live() > 0;

    auto stmt = ::make_shared<create_table_statement>(_cf_name, _properties.properties(), _if_not_exists, _static_columns, _properties.properties()->get_id());

@@ -212,11 +211,6 @@ std::unique_ptr<prepared_statement> create_table_statement::raw_statement::prepa
    for (auto&& entry : _definitions) {
        ::shared_ptr<column_identifier> id = entry.first;
        cql3_type pt = entry.second->prepare(db, keyspace());
-
-        if (has_default_ttl && pt.is_counter()) {
-            throw exceptions::invalid_request_exception("Cannot set default_time_to_live on a table with counters");
-        }
-
        if (pt.get_type()->is_multi_cell()) {
            if (pt.get_type()->is_user_type()) {
                // check for multi-cell types (non-frozen UDTs or collections) inside a non-frozen UDT
--- a/cql3/statements/modification_statement.cc
+++ b/cql3/statements/modification_statement.cc
@@ -44,7 +44,6 @@
 #include "cql3/statements/raw/modification_statement.hh"
 #include "cql3/statements/prepared_statement.hh"
 #include "cql3/restrictions/single_column_restriction.hh"
-#include "cql3/util.hh"
 #include "validation.hh"
 #include "db/consistency_level_validations.hh"
 #include <seastar/core/shared_ptr.hh>
@@ -259,7 +258,6 @@ static thread_local inheriting_concrete_execution_stage<

 future<::shared_ptr<cql_transport::messages::result_message>>
 modification_statement::execute(service::storage_proxy& proxy, service::query_state& qs, const query_options& options) const {
-    cql3::util::validate_timestamp(options, attrs);
    return modify_stage(this, seastar::ref(proxy), seastar::ref(qs), seastar::cref(options));
 }

@@ -286,7 +284,7 @@ modification_statement::do_execute(service::storage_proxy& proxy, service::query
 future<>
 modification_statement::execute_without_condition(service::storage_proxy& proxy, service::query_state& qs, const query_options& options) const {
    auto cl = options.get_consistency();
-    auto timeout = db::timeout_clock::now() + qs.get_client_state().get_timeout_config().*get_timeout_config_selector();
+    auto timeout = db::timeout_clock::now() + options.get_timeout_config().*get_timeout_config_selector();
    return get_mutations(proxy, options, timeout, false, options.get_timestamp(qs), qs).then([this, cl, timeout, &proxy, &qs] (auto mutations) {
        if (mutations.empty()) {
            return now();
@@ -302,7 +300,7 @@ modification_statement::execute_with_condition(service::storage_proxy& proxy, se
    auto cl_for_learn = options.get_consistency();
    auto cl_for_paxos = options.check_serial_consistency();
    db::timeout_clock::time_point now = db::timeout_clock::now();
-    const timeout_config& cfg = qs.get_client_state().get_timeout_config();
+    const timeout_config& cfg = options.get_timeout_config();

    auto statement_timeout = now + cfg.write_timeout; // All CAS networking operations run with write timeout.
    auto cas_timeout = now + cfg.cas_timeout;         // When to give up due to contention.
--- a/cql3/statements/permission_altering_statement.cc
+++ b/cql3/statements/permission_altering_statement.cc
@@ -78,11 +78,11 @@ future<> cql3::statements::permission_altering_statement::check_access(service::
    return state.ensure_exists(_resource).then([this, &state] {
        // check that the user has AUTHORIZE permission on the resource or its parents, otherwise reject
        // GRANT/REVOKE.
-        return state.ensure_has_permission({auth::permission::AUTHORIZE, _resource}).then([this, &state] {
+        return state.ensure_has_permission(auth::permission::AUTHORIZE, _resource).then([this, &state] {
            return do_for_each(_permissions, [this, &state](auth::permission p) {
                // TODO: how about we re-write the access check to check a set
                // right away.
-                return state.ensure_has_permission({p, _resource});
+                return state.ensure_has_permission(p, _resource);
            });
        });
    });
--- a/cql3/statements/role-management-statements.cc
+++ b/cql3/statements/role-management-statements.cc
@@ -59,7 +59,6 @@
 #include "gms/feature_service.hh"
 #include "transport/messages/result_message.hh"
 #include "unimplemented.hh"
-#include "concrete_types.hh"

 namespace cql3 {

@@ -106,30 +105,6 @@ future<> create_role_statement::grant_permissions_to_creator(const service::clie
    });
 }

-static void validate_timeout_options(const auth::authentication_options& auth_options) {
-    if (!auth_options.options) {
-        return;
-    }
-    const auto& options = *auth_options.options;
-    auto check_duration = [&] (const sstring& repr) {
-        data_value v = duration_type->deserialize(duration_type->from_string(repr));
-        cql_duration duration = static_pointer_cast<const duration_type_impl>(duration_type)->from_value(v);
-        if (duration.months || duration.days) {
-            throw exceptions::invalid_request_exception("Timeout values cannot be longer than 24h");
-        }
-        if (duration.nanoseconds % 1'000'000 != 0) {
-            throw exceptions::invalid_request_exception("Timeout values must be expressed in millisecond granularity");
-        }
-    };
-
-    for (auto opt : {"read_timeout", "write_timeout"}) {
-        auto it = options.find(opt);
-        if (it != options.end()) {
-            check_duration(it->second);
-        }
-    }
-}
-
 void create_role_statement::validate(service::storage_proxy& p, const service::client_state&) const {
    validate_cluster_support(p);
 }
@@ -138,7 +113,7 @@ future<> create_role_statement::check_access(service::storage_proxy& proxy, cons
    state.ensure_not_anonymous();

    return async([this, &state] {
-        state.ensure_has_permission({auth::permission::CREATE, auth::root_role_resource()}).get0();
+        state.ensure_has_permission(auth::permission::CREATE, auth::root_role_resource()).get0();

        if (*_options.is_superuser) {
            if (!auth::has_superuser(*state.get_auth_service(), *state.user()).get0()) {
@@ -162,12 +137,9 @@ create_role_statement::execute(service::storage_proxy&,
            [this, &state](const auth::role_config& config, const auth::authentication_options& authen_options) {
        const auto& cs = state.get_client_state();
        auto& as = *cs.get_auth_service();
-        validate_timeout_options(authen_options);

        return auth::create_role(as, _role, config, authen_options).then([this, &cs] {
            return grant_permissions_to_creator(cs);
-        }).then([&state] () mutable {
-            return state.get_client_state().update_per_role_params();
        }).then([] {
            return void_result_message();
        }).handle_exception_type([this](const auth::role_already_exists& e) {
@@ -220,7 +192,7 @@ future<> alter_role_statement::check_access(service::storage_proxy& proxy, const
        }

        if (*user.name != _role) {
-            state.ensure_has_permission({auth::permission::ALTER, auth::make_role_resource(_role)}).get0();
+            state.ensure_has_permission(auth::permission::ALTER, auth::make_role_resource(_role)).get0();
        } else {
            const auto alterable_options = state.get_auth_service()->underlying_authenticator().alterable_options();

@@ -252,9 +224,8 @@ alter_role_statement::execute(service::storage_proxy&, service::query_state& sta
            extract_authentication_options(_options),
            [this, &state](const auth::role_config_update& update, const auth::authentication_options& authen_options) {
        auto& as = *state.get_client_state().get_auth_service();
-        return auth::alter_role(as, _role, update, authen_options).then([&state] () mutable {
-            return state.get_client_state().update_per_role_params();
-        }).then([] {
+
+        return auth::alter_role(as, _role, update, authen_options).then([] {
            return void_result_message();
        }).handle_exception_type([](const auth::nonexistant_role& e) {
            return make_exception_future<result_message_ptr>(exceptions::invalid_request_exception(e.what()));
@@ -285,7 +256,7 @@ future<> drop_role_statement::check_access(service::storage_proxy& proxy, const
    state.ensure_not_anonymous();

    return async([this, &state] {
-        state.ensure_has_permission({auth::permission::DROP, auth::make_role_resource(_role)}).get0();
+        state.ensure_has_permission(auth::permission::DROP, auth::make_role_resource(_role)).get0();

        auto& as = *state.get_auth_service();

@@ -334,7 +305,7 @@ future<> list_roles_statement::check_access(service::storage_proxy& proxy, const
    state.ensure_not_anonymous();

    return async([this, &state] {
-        if (state.check_has_permission({auth::permission::DESCRIBE, auth::root_role_resource()}).get0()) {
+        if (state.check_has_permission(auth::permission::DESCRIBE, auth::root_role_resource()).get0()) {
            return;
        }

@@ -433,9 +404,9 @@ list_roles_statement::execute(service::storage_proxy&, service::query_state& sta
        if (!_grantee) {
            // A user with DESCRIBE on the root role resource lists all roles in the system. A user without it lists
            // only the roles granted to them.
-            return cs.check_has_permission({
+            return cs.check_has_permission(
                    auth::permission::DESCRIBE,
-                    auth::root_role_resource()}).then([&cs, &rm, &a, query_mode](bool has_describe) {
+                    auth::root_role_resource()).then([&cs, &rm, &a, query_mode](bool has_describe) {
                if (has_describe) {
                    return rm.query_all().then([&rm, &a](auto&& roles) {
                        return make_results(rm, a, std::move(roles));
@@ -469,7 +440,7 @@ future<> grant_role_statement::check_access(service::storage_proxy& proxy, const
    state.ensure_not_anonymous();

    return do_with(auth::make_role_resource(_role), [this, &state](const auto& r) {
-        return state.ensure_has_permission({auth::permission::AUTHORIZE, r});
+        return state.ensure_has_permission(auth::permission::AUTHORIZE, r);
    });
 }

@@ -497,7 +468,7 @@ future<> revoke_role_statement::check_access(service::storage_proxy& proxy, cons
    state.ensure_not_anonymous();

    return do_with(auth::make_role_resource(_role), [this, &state](const auto& r) {
-        return state.ensure_has_permission({auth::permission::AUTHORIZE, r});
+        return state.ensure_has_permission(auth::permission::AUTHORIZE, r);
    });
 }

--- a/cql3/statements/select_statement.cc
+++ b/cql3/statements/select_statement.cc
@@ -366,8 +366,7 @@ select_statement::do_execute(service::storage_proxy& proxy,
    }

    command->slice.options.set<query::partition_slice::option::allow_short_read>();
-    auto timeout_duration = state.get_client_state().get_timeout_config().*get_timeout_config_selector();
-    auto timeout = db::timeout_clock::now() + timeout_duration;
+    auto timeout_duration = options.get_timeout_config().*get_timeout_config_selector();
    auto p = service::pager::query_pagers::pager(_schema, _selection,
            state, options, command, std::move(key_ranges), restrictions_need_filtering ? _restrictions : nullptr);

@@ -375,9 +374,10 @@ select_statement::do_execute(service::storage_proxy& proxy,
        return do_with(
                cql3::selection::result_set_builder(*_selection, now,
                        options.get_cql_serialization_format(), *_group_by_cell_indices),
-                [this, p, page_size, now, timeout, restrictions_need_filtering](auto& builder) {
+                [this, p, page_size, now, timeout_duration, restrictions_need_filtering](auto& builder) {
                    return do_until([p] {return p->is_exhausted();},
-                            [p, &builder, page_size, now, timeout] {
+                            [p, &builder, page_size, now, timeout_duration] {
+                                auto timeout = db::timeout_clock::now() + timeout_duration;
                                return p->fetch_page(builder, page_size, now, timeout);
                            }
                    ).then([this, p, &builder, restrictions_need_filtering] {
@@ -401,6 +401,7 @@ select_statement::do_execute(service::storage_proxy& proxy,
                        " you must either remove the ORDER BY or the IN and sort client side, or disable paging for this query");
    }

+    auto timeout = db::timeout_clock::now() + timeout_duration;
    if (_selection->is_trivial() && !restrictions_need_filtering && !_per_partition_limit) {
        return p->fetch_page_generator(page_size, now, timeout, _stats).then([this, p] (result_generator generator) {
            auto meta = [&] () -> shared_ptr<const cql3::metadata> {
@@ -455,7 +456,7 @@ generate_base_key_from_index_pk(const partition_key& index_pk, const std::option
        if (!view_col) {
            throw std::runtime_error(format("Base key column not found in the view: {}", base_col.name_as_text()));
        }
-        if (base_col.type != view_col->type) {
+        if (base_col.type->without_reversed() != *view_col->type) {
            throw std::runtime_error(format("Mismatched types for base and view columns {}: {} and {}",
                    base_col.name_as_text(), base_col.type->cql3_type_name(), view_col->type->cql3_type_name()));
        }
@@ -513,9 +514,9 @@ indexed_table_select_statement::do_execute_base_query(
        lw_shared_ptr<const service::pager::paging_state> paging_state) const {
    using value_type = std::tuple<foreign_ptr<lw_shared_ptr<query::result>>, lw_shared_ptr<query::read_command>>;
    auto cmd = prepare_command_for_base_query(proxy, options, state, now, bool(paging_state));
-    auto timeout = db::timeout_clock::now() + state.get_client_state().get_timeout_config().*get_timeout_config_selector();
+    auto timeout = db::timeout_clock::now() + options.get_timeout_config().*get_timeout_config_selector();
    uint32_t queried_ranges_count = partition_ranges.size();
-    service::query_ranges_to_vnodes_generator ranges_to_vnodes(proxy.get_token_metadata_ptr(), _schema, std::move(partition_ranges));
+    service::query_ranges_to_vnodes_generator ranges_to_vnodes(proxy.get_token_metadata(), _schema, std::move(partition_ranges));

    struct base_query_state {
        query::result_merger merger;
@@ -607,7 +608,7 @@ indexed_table_select_statement::do_execute_base_query(
        lw_shared_ptr<const service::pager::paging_state> paging_state) const {
    using value_type = std::tuple<foreign_ptr<lw_shared_ptr<query::result>>, lw_shared_ptr<query::read_command>>;
    auto cmd = prepare_command_for_base_query(proxy, options, state, now, bool(paging_state));
-    auto timeout = db::timeout_clock::now() + state.get_client_state().get_timeout_config().*get_timeout_config_selector();
+    auto timeout = db::timeout_clock::now() + options.get_timeout_config().*get_timeout_config_selector();

    struct base_query_state {
        query::result_merger merger;
@@ -689,7 +690,7 @@ select_statement::execute(service::storage_proxy& proxy,
    // is specified we need to get "limit" rows from each partition since there
    // is no way to tell which of these rows belong to the query result before
    // doing post-query ordering.
-    auto timeout = db::timeout_clock::now() + state.get_client_state().get_timeout_config().*get_timeout_config_selector();
+    auto timeout = db::timeout_clock::now() + options.get_timeout_config().*get_timeout_config_selector();
    if (needs_post_query_ordering() && _limit) {
        return do_with(std::forward<dht::partition_range_vector>(partition_ranges), [this, &proxy, &state, &options, cmd, timeout](auto& prs) {
            assert(cmd->partition_limit == query::max_partitions);
@@ -890,23 +891,6 @@ static void append_base_key_to_index_ck(std::vector<bytes_view>& exploded_index_
    std::move(begin, key_view.end(), std::back_inserter(exploded_index_ck));
 }

-bytes indexed_table_select_statement::compute_idx_token(const partition_key& key) const {
-    const column_definition& cdef = *_view_schema->clustering_key_columns().begin();
-    clustering_row empty_row(clustering_key_prefix::make_empty());
-    bytes_opt computed_value;
-    if (!cdef.is_computed()) {
-        // FIXME(pgrabowski): this legacy code is here for backward compatibility and should be removed
-        // once "computed_columns feature" is supported by every node
-        computed_value = legacy_token_column_computation().compute_value(*_schema, key, empty_row);
-    } else {
-        computed_value = cdef.get_computation().compute_value(*_schema, key, empty_row);
-    }
-    if (!computed_value) {
-        throw std::logic_error(format("No value computed for idx_token column {}", cdef.name()));
-    }
-    return *computed_value;
-}
-
 lw_shared_ptr<const service::pager::paging_state> indexed_table_select_statement::generate_view_paging_state_from_base_query_results(lw_shared_ptr<const service::pager::paging_state> paging_state,
        const foreign_ptr<lw_shared_ptr<query::result>>& results, service::storage_proxy& proxy, service::query_state& state, const query_options& options) const {
    const column_definition* cdef = _schema->get_column_definition(to_bytes(_index.target_column()));
@@ -940,7 +924,7 @@ lw_shared_ptr<const service::pager::paging_state> indexed_table_select_statement
    if (_index.metadata().local()) {
        exploded_index_ck.push_back(bytes_view(*indexed_column_value));
    } else {
-        token_bytes = compute_idx_token(last_base_pk);
+        token_bytes = dht::get_token(*_schema, last_base_pk).data();
        exploded_index_ck.push_back(bytes_view(token_bytes));
        append_base_key_to_index_ck<partition_key>(exploded_index_ck, last_base_pk, *cdef);
    }
@@ -1124,7 +1108,7 @@ query::partition_slice indexed_table_select_statement::get_partition_slice_for_g
            // Computed token column needs to be added to index view restrictions
            const column_definition& token_cdef = *_view_schema->clustering_key_columns().begin();
            auto base_pk = partition_key::from_optional_exploded(*_schema, single_pk_restrictions->values(options));
-            bytes token_value = compute_idx_token(base_pk);
+            bytes token_value = dht::get_token(*_schema, base_pk).data();
            auto token_restriction = ::make_shared<restrictions::single_column_restriction>(token_cdef);
            token_restriction->expression = expr::binary_operator{
                    &token_cdef, expr::oper_t::EQ,
@@ -1136,7 +1120,11 @@ query::partition_slice indexed_table_select_statement::get_partition_slice_for_g
                if (single_ck_restrictions) {
                    auto prefix_restrictions = single_ck_restrictions->get_longest_prefix_restrictions();
                    auto clustering_restrictions_from_base = ::make_shared<restrictions::single_column_clustering_key_restrictions>(_view_schema, *prefix_restrictions);
+                    const auto indexed_column = _view_schema->get_column_definition(to_bytes(_index.target_column()));
                    for (auto restriction_it : clustering_restrictions_from_base->restrictions()) {
+                        if (restriction_it.first == indexed_column) {
+                            continue; // In the index table, the indexed column is the partition (not clustering) key.
+                        }
                        clustering_restrictions->merge_with(restriction_it.second);
                    }
                }
@@ -1250,7 +1238,7 @@ indexed_table_select_statement::find_index_partition_ranges(service::storage_pro
 {
    using value_type = std::tuple<dht::partition_range_vector, lw_shared_ptr<const service::pager::paging_state>>;
    auto now = gc_clock::now();
-    auto timeout = db::timeout_clock::now() + state.get_client_state().get_timeout_config().*get_timeout_config_selector();
+    auto timeout = db::timeout_clock::now() + options.get_timeout_config().*get_timeout_config_selector();
    return read_posting_list(proxy, options, get_limit(options), state, now, timeout, false).then(
            [this, now, &options] (::shared_ptr<cql_transport::messages::result_message::rows> rows) {
        auto rs = cql3::untyped_result_set(rows);
@@ -1291,7 +1279,7 @@ indexed_table_select_statement::find_index_clustering_rows(service::storage_prox
 {
    using value_type = std::tuple<std::vector<indexed_table_select_statement::primary_key>, lw_shared_ptr<const service::pager::paging_state>>;
    auto now = gc_clock::now();
-    auto timeout = db::timeout_clock::now() + state.get_client_state().get_timeout_config().*get_timeout_config_selector();
+    auto timeout = db::timeout_clock::now() + options.get_timeout_config().*get_timeout_config_selector();
    return read_posting_list(proxy, options, get_limit(options), state, now, timeout, true).then(
            [this, now, &options] (::shared_ptr<cql_transport::messages::result_message::rows> rows) {

--- a/cql3/statements/select_statement.hh
+++ b/cql3/statements/select_statement.hh
@@ -300,8 +300,6 @@ private:

    query::partition_slice get_partition_slice_for_local_index_posting_list(const query_options& options) const;
    query::partition_slice get_partition_slice_for_global_index_posting_list(const query_options& options) const;
-
-    bytes compute_idx_token(const partition_key& key) const;
 };

 }
--- a/cql3/util.cc
+++ b/cql3/util.cc
@@ -119,19 +119,5 @@ void do_with_parser_impl(const sstring_view& cql, noncopyable_function<void (cql

 #endif

-void validate_timestamp(const query_options& options, const std::unique_ptr<attributes>& attrs) {
-    if (attrs->is_timestamp_set()) {
-        static constexpr int64_t MAX_DIFFERENCE = std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::days(3)).count();
-        auto now = std::chrono::duration_cast<std::chrono::microseconds>(db_clock::now().time_since_epoch()).count();
-
-        auto timestamp = attrs->get_timestamp(now, options);
-
-        if (timestamp - now > MAX_DIFFERENCE) {
-            throw exceptions::invalid_request_exception("Cannot provide a timestamp more than 3 days into the future. If this was not intended, "
-            "make sure the timestamp is in microseconds");
-        }
-    }
-}
-

 }
--- a/cql3/util.hh
+++ b/cql3/util.hh
@@ -89,10 +89,6 @@ std::unique_ptr<cql3::statements::raw::select_statement> build_select_statement(
 /// character itself is quoted by doubling it.
 sstring maybe_quote(const sstring& s);

-// Check whether timestamp is not too far in the future as this probably
-// indicates its incorrectness (for example using other units than microseconds).
-void validate_timestamp(const query_options& options, const std::unique_ptr<attributes>& attrs);
-
 } // namespace util

 } // namespace cql3
--- a/database.cc
+++ b/database.cc
@@ -57,7 +57,6 @@
 #include <boost/range/algorithm/find_if.hpp>
 #include <boost/range/algorithm/sort.hpp>
 #include <boost/range/adaptor/map.hpp>
-#include <boost/container/static_vector.hpp>
 #include "frozen_mutation.hh"
 #include <seastar/core/do_with.hh>
 #include "service/migration_manager.hh"
@@ -83,7 +82,6 @@

 #include "checked-file-impl.hh"
 #include "utils/disk-error-handler.hh"
-#include "utils/human_readable.hh"

 #include "db/timeout_clock.hh"
 #include "db/large_data_handler.hh"
@@ -92,7 +90,6 @@

 #include "user_types_metadata.hh"
 #include <seastar/core/shared_ptr_incomplete.hh>
-#include <seastar/util/memory_diagnostics.hh>

 #include "schema_builder.hh"

@@ -168,181 +165,14 @@ bool string_pair_eq::operator()(spair lhs, spair rhs) const {

 utils::UUID database::empty_version = utils::UUID_gen::get_name_UUID(bytes{});

-namespace {
-
-class memory_diagnostics_line_writer {
-    std::array<char, 4096> _line_buf;
-    memory::memory_diagnostics_writer _wr;
-
-public:
-    memory_diagnostics_line_writer(memory::memory_diagnostics_writer wr) : _wr(std::move(wr)) { }
-    void operator() (const char* fmt) {
-        _wr(fmt);
-    }
-    void operator() (const char* fmt, const auto& param1, const auto&... params) {
-        const auto begin = _line_buf.begin();
-        auto it = fmt::format_to(begin, fmt, param1, params...);
-        _wr(std::string_view(begin, it - begin));
-    }
-};
-
-const boost::container::static_vector<std::pair<size_t, boost::container::static_vector<table*, 16>>, 10>
-phased_barrier_top_10_counts(const std::unordered_map<utils::UUID, lw_shared_ptr<column_family>>& tables, std::function<size_t(table&)> op_count_getter) {
-    using table_list = boost::container::static_vector<table*, 16>;
-    using count_and_tables = std::pair<size_t, table_list>;
-    const auto less = [] (const count_and_tables& a, const count_and_tables& b) {
-        return a.first < b.first;
-    };
-
-    boost::container::static_vector<count_and_tables, 10> res;
-    count_and_tables* min_element = nullptr;
-
-    for (const auto& [tid, table] : tables) {
-        const auto count = op_count_getter(*table);
-        if (!count) {
-            continue;
-        }
-        if (res.size() < res.capacity()) {
-            auto& elem = res.emplace_back(count, table_list({table.get()}));
-            if (!min_element || min_element->first > count) {
-                min_element = &elem;
-            }
-            continue;
-        }
-        if (min_element->first > count) {
-            continue;
-        }
-
-        auto it = boost::find_if(res, [count] (const count_and_tables& x) {
-            return x.first == count;
-        });
-        if (it != res.end()) {
-            it->second.push_back(table.get());
-            continue;
-        }
-
-        // If we are here, min_element->first < count
-        *min_element = {count, table_list({table.get()})};
-        min_element = &*boost::min_element(res, less);
-    }
-
-    boost::sort(res, less);
-
-    return res;
-}
-
-} // anonymous namespace
-
-void database::setup_scylla_memory_diagnostics_producer() {
-    memory::set_additional_diagnostics_producer([this] (memory::memory_diagnostics_writer wr) {
-        auto writeln = memory_diagnostics_line_writer(std::move(wr));
-
-        const auto lsa_occupancy_stats = logalloc::lsa_global_occupancy_stats();
-        writeln("LSA\n");
-        writeln("  allocated: {}\n", utils::to_hr_size(lsa_occupancy_stats.total_space()));
-        writeln("  used:      {}\n", utils::to_hr_size(lsa_occupancy_stats.used_space()));
-        writeln("  free:      {}\n\n", utils::to_hr_size(lsa_occupancy_stats.free_space()));
-
-        const auto row_cache_occupancy_stats = _row_cache_tracker.region().occupancy();
-        writeln("Cache:\n");
-        writeln("  total: {}\n", utils::to_hr_size(row_cache_occupancy_stats.total_space()));
-        writeln("  used:  {}\n", utils::to_hr_size(row_cache_occupancy_stats.used_space()));
-        writeln("  free:  {}\n\n", utils::to_hr_size(row_cache_occupancy_stats.free_space()));
-
-        writeln("Memtables:\n");
-        writeln(" total: {}\n", utils::to_hr_size(lsa_occupancy_stats.total_space() - row_cache_occupancy_stats.total_space()));
-
-        writeln(" Regular:\n");
-        writeln("  real dirty: {}\n", utils::to_hr_size(_dirty_memory_manager.real_dirty_memory()));
-        writeln("  virt dirty: {}\n", utils::to_hr_size(_dirty_memory_manager.virtual_dirty_memory()));
-        writeln(" System:\n");
-        writeln("  real dirty: {}\n", utils::to_hr_size(_system_dirty_memory_manager.real_dirty_memory()));
-        writeln("  virt dirty: {}\n\n", utils::to_hr_size(_system_dirty_memory_manager.virtual_dirty_memory()));
-
-        writeln("Replica:\n");
-
-        writeln("  Read Concurrency Semaphores:\n");
-        const std::pair<const char*, reader_concurrency_semaphore&> semaphores[] = {
-                {"user", _read_concurrency_sem},
-                {"streaming", _streaming_concurrency_sem},
-                {"system", _system_read_concurrency_sem},
-                {"compaction", _compaction_concurrency_sem},
-        };
-        for (const auto& [name, sem] : semaphores) {
-            const auto initial_res = sem.initial_resources();
-            const auto available_res = sem.available_resources();
-            if (sem.is_unlimited()) {
-                writeln("    {}: {}/∞, {}/∞\n",
-                        name,
-                        initial_res.count - available_res.count,
-                        utils::to_hr_size(initial_res.memory - available_res.memory),
-                        sem.waiters());
-            } else {
-                writeln("    {}: {}/{}, {}/{}, queued: {}\n",
-                        name,
-                        initial_res.count - available_res.count,
-                        initial_res.count,
-                        utils::to_hr_size(initial_res.memory - available_res.memory),
-                        utils::to_hr_size(initial_res.memory),
-                        sem.waiters());
-            }
-        }
-
-        writeln("  Execution Stages:\n");
-        const std::pair<const char*, inheriting_execution_stage::stats> execution_stage_summaries[] = {
-                {"data query stage", _data_query_stage.get_stats()},
-                {"mutation query stage", _mutation_query_stage.get_stats()},
-                {"apply stage", _apply_stage.get_stats()},
-        };
-        for (const auto& [name, exec_stage_summary] : execution_stage_summaries) {
-            writeln("    {}:\n", name);
-            size_t total = 0;
-            for (const auto& [sg, stats ] : exec_stage_summary) {
-                const auto count = stats.function_calls_enqueued - stats.function_calls_executed;
-                if (!count) {
-                    continue;
-                }
-                writeln("      {}\t{}\n", sg.name(), count);
-                total += count;
-            }
-            writeln("         Total: {}\n", total);
-        }
-
-        writeln("  Tables - Ongoing Operations:\n");
-        const std::pair<const char*, std::function<size_t(table&)>> phased_barriers[] = {
-                {"Pending writes", std::mem_fn(&table::writes_in_progress)},
-                {"Pending reads", std::mem_fn(&table::reads_in_progress)},
-                {"Pending streams", std::mem_fn(&table::streams_in_progress)},
-        };
-        for (const auto& [name, op_count_getter] : phased_barriers) {
-            writeln("    {} (top 10):\n", name);
-            auto total = 0;
-            for (const auto& [count, table_list] : phased_barrier_top_10_counts(_column_families, op_count_getter)) {
-                total += count;
-                writeln("      {}", count);
-                if (table_list.empty()) {
-                    writeln("\n");
-                    continue;
-                }
-                auto it = table_list.begin();
-                for (; it != table_list.end() - 1; ++it) {
-                    writeln(" {}.{},", (*it)->schema()->ks_name(), (*it)->schema()->cf_name());
-                }
-                writeln(" {}.{}\n", (*it)->schema()->ks_name(), (*it)->schema()->cf_name());
-            }
-            writeln("      {} Total (all)\n", total);
-        }
-        writeln("\n");
-    });
-}
-
-database::database(const db::config& cfg, database_config dbcfg, service::migration_notifier& mn, gms::feature_service& feat, const locator::shared_token_metadata& stm, abort_source& as, sharded<semaphore>& sst_dir_sem)
+database::database(const db::config& cfg, database_config dbcfg, service::migration_notifier& mn, gms::feature_service& feat, const locator::token_metadata& tm, abort_source& as, sharded<semaphore>& sst_dir_sem)
    : _stats(make_lw_shared<db_stats>())
    , _cl_stats(std::make_unique<cell_locker_stats>())
    , _cfg(cfg)
    // Allow system tables a pool of 10 MB memory to write, but never block on other regions.
    , _system_dirty_memory_manager(*this, 10 << 20, cfg.virtual_dirty_soft_limit(), default_scheduling_group())
-    , _dirty_memory_manager(*this, dbcfg.available_memory * 0.50, cfg.virtual_dirty_soft_limit(), dbcfg.statement_scheduling_group)
+    , _dirty_memory_manager(*this, dbcfg.available_memory * 0.45, cfg.virtual_dirty_soft_limit(), dbcfg.statement_scheduling_group)
+    , _streaming_dirty_memory_manager(*this, dbcfg.available_memory * 0.10, cfg.virtual_dirty_soft_limit(), dbcfg.streaming_scheduling_group)
    , _dbcfg(dbcfg)
    , _memtable_controller(make_flush_controller(_cfg, dbcfg.memtable_scheduling_group, service::get_local_memtable_flush_priority(), [this, limit = float(_dirty_memory_manager.throttle_threshold())] {
        auto backlog = (_dirty_memory_manager.virtual_dirty_memory()) / limit;
@@ -389,11 +219,9 @@ database::database(const db::config& cfg, database_config dbcfg, service::migrat
    , _data_listeners(std::make_unique<db::data_listeners>(*this))
    , _mnotifier(mn)
    , _feat(feat)
-    , _shared_token_metadata(stm)
+    , _token_metadata(tm)
    , _sst_dir_semaphore(sst_dir_sem)
 {
-    assert(dbcfg.available_memory != 0); // Detect misconfigured unit tests, see #7544
-
    local_schema_registry().init(*this); // TODO: we're never unbound.
    setup_metrics();

@@ -405,8 +233,6 @@ database::database(const db::config& cfg, database_config dbcfg, service::migrat
        dblog.debug("Enabling infinite bound range deletions");
        _supports_infinite_bound_range_deletions = true;
    });
-
-    setup_scylla_memory_diagnostics_producer();
 }

 const db::extensions& database::extensions() const {
@@ -483,6 +309,7 @@ void
 database::setup_metrics() {
    _dirty_memory_manager.setup_collectd("regular");
    _system_dirty_memory_manager.setup_collectd("system");
+    _streaming_dirty_memory_manager.setup_collectd("streaming");

    namespace sm = seastar::metrics;

@@ -491,12 +318,12 @@ database::setup_metrics() {
    auto system_label_instance = class_label("system");

    _metrics.add_group("memory", {
-        sm::make_gauge("dirty_bytes", [this] { return _dirty_memory_manager.real_dirty_memory() + _system_dirty_memory_manager.real_dirty_memory(); },
+        sm::make_gauge("dirty_bytes", [this] { return _dirty_memory_manager.real_dirty_memory() + _system_dirty_memory_manager.real_dirty_memory() + _streaming_dirty_memory_manager.real_dirty_memory(); },
                       sm::description("Holds the current size of all (\"regular\", \"system\" and \"streaming\") non-free memory in bytes: used memory + released memory that hasn't been returned to a free memory pool yet. "
                                       "Total memory size minus this value represents the amount of available memory. "
                                       "If this value minus virtual_dirty_bytes is too high then this means that the dirty memory eviction lags behind.")),

-        sm::make_gauge("virtual_dirty_bytes", [this] { return _dirty_memory_manager.virtual_dirty_memory() + _system_dirty_memory_manager.virtual_dirty_memory(); },
+        sm::make_gauge("virtual_dirty_bytes", [this] { return _dirty_memory_manager.virtual_dirty_memory() + _system_dirty_memory_manager.virtual_dirty_memory() + _streaming_dirty_memory_manager.virtual_dirty_memory(); },
                       sm::description("Holds the size of all (\"regular\", \"system\" and \"streaming\") used memory in bytes. Compare it to \"dirty_bytes\" to see how many memory is wasted (neither used nor available).")),
    });

@@ -629,11 +456,6 @@ database::setup_metrics() {
                                       " to be able to admit new ones, if there is a shortage of permits."),
                       {user_label_instance}),

-        sm::make_derive("reads_shed_due_to_overload", _read_concurrency_sem.get_stats().total_reads_shed_due_to_overload,
-                       sm::description("The number of reads shed because the admission queue reached its max capacity."
-                                       " When the queue is full, excessive reads are shed to avoid overload."),
-                       {user_label_instance}),
-
        sm::make_gauge("active_reads", [this] { return max_count_streaming_concurrent_reads - _streaming_concurrency_sem.available_resources().count; },
                       sm::description("Holds the number of currently active read operations issued on behalf of streaming "),
                       {streaming_label_instance}),
@@ -659,11 +481,6 @@ database::setup_metrics() {
                                       " to be able to admit new ones, if there is a shortage of permits."),
                       {streaming_label_instance}),

-        sm::make_derive("reads_shed_due_to_overload", _streaming_concurrency_sem.get_stats().total_reads_shed_due_to_overload,
-                       sm::description("The number of reads shed because the admission queue reached its max capacity."
-                                       " When the queue is full, excessive reads are shed to avoid overload."),
-                       {streaming_label_instance}),
-
        sm::make_gauge("active_reads", [this] { return max_count_system_concurrent_reads - _system_read_concurrency_sem.available_resources().count; },
                       sm::description("Holds the number of currently active read operations from \"system\" keyspace tables. "),
                       {system_label_instance}),
@@ -688,11 +505,6 @@ database::setup_metrics() {
                                       " to be able to admit new ones, if there is a shortage of permits."),
                       {system_label_instance}),

-        sm::make_derive("reads_shed_due_to_overload", _system_read_concurrency_sem.get_stats().total_reads_shed_due_to_overload,
-                       sm::description("The number of reads shed because the admission queue reached its max capacity."
-                                       " When the queue is full, excessive reads are shed to avoid overload."),
-                       {system_label_instance}),
-
        sm::make_gauge("total_result_bytes", [this] { return get_result_memory_limiter().total_used_memory(); },
                       sm::description("Holds the current amount of memory used for results.")),

@@ -760,9 +572,6 @@ void database::set_format_by_config() {
 }

 database::~database() {
-    _read_concurrency_sem.clear_inactive_reads();
-    _streaming_concurrency_sem.clear_inactive_reads();
-    _system_read_concurrency_sem.clear_inactive_reads();
 }

 void database::update_version(const utils::UUID& version) {
@@ -850,11 +659,22 @@ future<> database::parse_system_tables(distributed<service::storage_proxy>& prox
            });
    }).then([&proxy, &mm, this] {
        return do_parse_schema_tables(proxy, db::schema_tables::VIEWS, [this, &proxy, &mm] (schema_result_value_type &v) {
-            return create_views_from_schema_partition(proxy, v.second).then([this, &mm] (std::vector<view_ptr> views) {
-                return parallel_for_each(views.begin(), views.end(), [this, &mm] (auto&& v) {
-                    return this->add_column_family_and_make_directory(v).then([this, &mm, v] {
-                        return maybe_update_legacy_secondary_index_mv_schema(mm.local(), *this, v);
-                    });
+            return create_views_from_schema_partition(proxy, v.second).then([this, &mm, &proxy] (std::vector<view_ptr> views) {
+                return parallel_for_each(views.begin(), views.end(), [this, &mm, &proxy] (auto&& v) {
+                    // TODO: Remove once computed columns are guaranteed to be featured in the whole cluster.
+                    // we fix here the schema in place in oreder to avoid races (write commands comming from other coordinators).
+                    view_ptr fixed_v = maybe_fix_legacy_secondary_index_mv_schema(*this, v, nullptr, preserve_version::yes);
+                    view_ptr v_to_add = fixed_v ? fixed_v : v;
+                    future<> f = this->add_column_family_and_make_directory(v_to_add);
+                    if (bool(fixed_v)) {
+                        v_to_add = fixed_v;
+                        auto&& keyspace = find_keyspace(v->ks_name()).metadata();
+                        auto mutations = db::schema_tables::make_update_view_mutations(keyspace, view_ptr(v), fixed_v, api::new_timestamp(), true);
+                        f = f.then([this, &proxy, mutations = std::move(mutations)] {
+                            return db::schema_tables::merge_schema(proxy, _feat, std::move(mutations));
+                        });
+                    }
+                    return f;
                });
            });
        });
@@ -905,17 +725,7 @@ future<> database::update_keyspace(const sstring& name) {
        auto tmp_ksm = db::schema_tables::create_keyspace_from_schema_partition(v);
        auto new_ksm = ::make_lw_shared<keyspace_metadata>(tmp_ksm->name(), tmp_ksm->strategy_name(), tmp_ksm->strategy_options(), tmp_ksm->durable_writes(),
                        boost::copy_range<std::vector<schema_ptr>>(ks.metadata()->cf_meta_data() | boost::adaptors::map_values), std::move(ks.metadata()->user_types()));
-
-        bool old_durable_writes = ks.metadata()->durable_writes();
-        bool new_durable_writes = new_ksm->durable_writes();
-        if (old_durable_writes != new_durable_writes) {
-            for (auto& [cf_name, cf_schema] : new_ksm->cf_meta_data()) {
-                auto& cf = find_column_family(cf_schema);
-                cf.set_durable_writes(new_durable_writes);
-            }
-        }
-
-        ks.update_from(get_shared_token_metadata(), std::move(new_ksm));
+        ks.update_from(get_token_metadata(), std::move(new_ksm));
        return get_notifier().update_keyspace(ks.metadata());
    });
 }
@@ -934,7 +744,6 @@ void database::add_column_family(keyspace& ks, schema_ptr schema, column_family:
    } else {
       cf = make_lw_shared<column_family>(schema, std::move(cfg), column_family::no_commitlog(), *_compaction_manager, *_cl_stats, _row_cache_tracker);
    }
-    cf->set_durable_writes(ks.metadata()->durable_writes());

    auto uuid = schema->id();
    if (_column_families.contains(uuid)) {
@@ -1000,7 +809,7 @@ future<> database::drop_column_family(const sstring& ks_name, const sstring& cf_
    remove(*cf);
    cf->clear_views();
    auto& ks = find_keyspace(ks_name);
-    return when_all_succeed(cf->await_pending_writes(), cf->await_pending_reads()).then_unpack([this, &ks, cf, tsf = std::move(tsf), snapshot] {
+    return cf->await_pending_ops().then([this, &ks, cf, tsf = std::move(tsf), snapshot] {
        return truncate(ks, *cf, std::move(tsf), snapshot).finally([this, cf] {
            return cf->stop();
        });
@@ -1095,12 +904,12 @@ bool database::column_family_exists(const utils::UUID& uuid) const {
 }

 void
-keyspace::create_replication_strategy(const locator::shared_token_metadata& stm, const std::map<sstring, sstring>& options) {
+keyspace::create_replication_strategy(const locator::token_metadata& tm, const std::map<sstring, sstring>& options) {
    using namespace locator;

    _replication_strategy =
            abstract_replication_strategy::create_replication_strategy(
-                _metadata->name(), _metadata->strategy_name(), stm, options);
+                _metadata->name(), _metadata->strategy_name(), tm, options);
 }

 locator::abstract_replication_strategy&
@@ -1119,9 +928,9 @@ keyspace::set_replication_strategy(std::unique_ptr<locator::abstract_replication
    _replication_strategy = std::move(replication_strategy);
 }

-void keyspace::update_from(const locator::shared_token_metadata& stm, ::lw_shared_ptr<keyspace_metadata> ksm) {
+void keyspace::update_from(const locator::token_metadata& tm, ::lw_shared_ptr<keyspace_metadata> ksm) {
    _metadata = std::move(ksm);
-   create_replication_strategy(stm, _metadata->strategy_options());
+   create_replication_strategy(tm, _metadata->strategy_options());
 }

 future<> keyspace::ensure_populated() const {
@@ -1155,6 +964,7 @@ keyspace::make_column_family_config(const schema& s, const database& db) const {
    cfg.enable_dangerous_direct_import_of_cassandra_counters = _config.enable_dangerous_direct_import_of_cassandra_counters;
    cfg.compaction_enforce_min_threshold = _config.compaction_enforce_min_threshold;
    cfg.dirty_memory_manager = _config.dirty_memory_manager;
+    cfg.streaming_dirty_memory_manager = _config.streaming_dirty_memory_manager;
    cfg.streaming_read_concurrency_semaphore = _config.streaming_read_concurrency_semaphore;
    cfg.compaction_concurrency_semaphore = _config.compaction_concurrency_semaphore;
    cfg.cf_stats = _config.cf_stats;
@@ -1234,7 +1044,7 @@ const column_family& database::find_column_family(const schema_ptr& schema) cons
 using strategy_class_registry = class_registry<
    locator::abstract_replication_strategy,
    const sstring&,
-    const locator::shared_token_metadata&,
+    const locator::token_metadata&,
    locator::snitch_ptr&,
    const std::map<sstring, sstring>&>;

@@ -1267,20 +1077,20 @@ keyspace_metadata::keyspace_metadata(std::string_view name,
    }
 }

-void keyspace_metadata::validate(const locator::shared_token_metadata& stm) const {
+void keyspace_metadata::validate(const locator::token_metadata& tm) const {
    using namespace locator;
-    abstract_replication_strategy::validate_replication_strategy(name(), strategy_name(), stm, strategy_options());
+    abstract_replication_strategy::validate_replication_strategy(name(), strategy_name(), tm, strategy_options());
 }

 void database::validate_keyspace_update(keyspace_metadata& ksm) {
-    ksm.validate(get_shared_token_metadata());
+    ksm.validate(get_token_metadata());
    if (!has_keyspace(ksm.name())) {
        throw exceptions::configuration_exception(format("Cannot update non existing keyspace '{}'.", ksm.name()));
    }
 }

 void database::validate_new_keyspace(keyspace_metadata& ksm) {
-    ksm.validate(get_shared_token_metadata());
+    ksm.validate(get_token_metadata());
    if (has_keyspace(ksm.name())) {
        throw exceptions::already_exists_exception{ksm.name()};
    }
@@ -1323,7 +1133,7 @@ std::vector<view_ptr> database::get_views() const {

 void database::create_in_memory_keyspace(const lw_shared_ptr<keyspace_metadata>& ksm) {
    keyspace ks(ksm, std::move(make_keyspace_config(*ksm)));
-    ks.create_replication_strategy(get_shared_token_metadata(), ksm->strategy_options());
+    ks.create_replication_strategy(get_token_metadata(), ksm->strategy_options());
    _keyspaces.emplace(ksm->name(), std::move(ks));
 }

@@ -1756,7 +1566,7 @@ static future<> maybe_handle_reorder(std::exception_ptr exp) {
 }

 future<> database::apply_with_commitlog(column_family& cf, const mutation& m, db::timeout_clock::time_point timeout) {
-    if (cf.commitlog() != nullptr && cf.durable_writes()) {
+    if (cf.commitlog() != nullptr) {
        return do_with(freeze(m), [this, &m, &cf, timeout] (frozen_mutation& fm) {
            commitlog_entry_writer cew(m.schema(), fm, db::commitlog::force_sync::no);
            return cf.commitlog()->add_entry(m.schema()->id(), cew, timeout);
@@ -1770,7 +1580,7 @@ future<> database::apply_with_commitlog(column_family& cf, const mutation& m, db
 future<> database::apply_with_commitlog(schema_ptr s, column_family& cf, utils::UUID uuid, const frozen_mutation& m, db::timeout_clock::time_point timeout,
        db::commitlog::force_sync sync) {
    auto cl = cf.commitlog();
-    if (cl != nullptr && cf.durable_writes()) {
+    if (cl != nullptr) {
        commitlog_entry_writer cew(s, m, sync);
        return cf.commitlog()->add_entry(uuid, cew, timeout).then([&m, this, s, timeout, cl](db::rp_handle h) {
            return this->apply_in_memory(m, s, std::move(h), timeout).handle_exception(maybe_handle_reorder);
@@ -1861,7 +1671,7 @@ database::make_keyspace_config(const keyspace_metadata& ksm) {
        }
        cfg.enable_disk_writes = !_cfg.enable_in_memory_data_store();
        cfg.enable_disk_reads = true; // we allways read from disk
-        cfg.enable_commitlog = _cfg.enable_commitlog() && !_cfg.enable_in_memory_data_store();
+        cfg.enable_commitlog = ksm.durable_writes() && _cfg.enable_commitlog() && !_cfg.enable_in_memory_data_store();
        cfg.enable_cache = _cfg.enable_cache();

    } else {
@@ -1874,6 +1684,7 @@ database::make_keyspace_config(const keyspace_metadata& ksm) {
    cfg.enable_dangerous_direct_import_of_cassandra_counters = _cfg.enable_dangerous_direct_import_of_cassandra_counters();
    cfg.compaction_enforce_min_threshold = _cfg.compaction_enforce_min_threshold;
    cfg.dirty_memory_manager = &_dirty_memory_manager;
+    cfg.streaming_dirty_memory_manager = &_streaming_dirty_memory_manager;
    cfg.streaming_read_concurrency_semaphore = &_streaming_concurrency_sem;
    cfg.compaction_concurrency_semaphore = &_compaction_concurrency_sem;
    cfg.cf_stats = &_cf_stats;
@@ -1940,7 +1751,11 @@ sstring database::get_available_index_name(const sstring &ks_name, const sstring
    auto base_name = index_metadata::get_default_index_name(cf_name, index_name_root);
    sstring accepted_name = base_name;
    int i = 0;
-    while (existing_names.contains(accepted_name)) {
+    auto name_accepted = [&] {
+        auto index_table_name = secondary_index::index_table_name(accepted_name);
+        return !has_schema(ks_name, index_table_name) && !existing_names.contains(accepted_name);
+    };
+    while (!name_accepted()) {
        accepted_name = base_name + "_" + std::to_string(++i);
    }
    return accepted_name;
@@ -2005,6 +1820,13 @@ future<>
 database::stop() {
    assert(!_large_data_handler->running());

+    // Inactive reads might hold on to sstables, blocking the
+    // `sstables_manager::close()` calls below. No one will come back for these
+    // reads at this point so clear them before proceeding with the shutdown.
+    _read_concurrency_sem.clear_inactive_reads();
+    _streaming_concurrency_sem.clear_inactive_reads();
+    _system_read_concurrency_sem.clear_inactive_reads();
+
    // try to ensure that CL has done disk flushing
    future<> maybe_shutdown_commitlog = _commitlog != nullptr ? _commitlog->shutdown() : make_ready_future<>();
    return maybe_shutdown_commitlog.then([this] {
@@ -2018,6 +1840,8 @@ database::stop() {
        return _system_dirty_memory_manager.shutdown();
    }).then([this] {
        return _dirty_memory_manager.shutdown();
+    }).then([this] {
+        return _streaming_dirty_memory_manager.shutdown();
    }).then([this] {
        return _memtable_controller.shutdown();
    }).then([this] {
@@ -2033,11 +1857,6 @@ future<> database::flush_all_memtables() {
    });
 }

-future<> database::flush(const sstring& ksname, const sstring& cfname) {
-    auto& cf = find_column_family(ksname, cfname);
-    return cf.flush();
-}
-
 future<> database::truncate(sstring ksname, sstring cfname, timestamp_func tsf) {
    auto& ks = find_keyspace(ksname);
    auto& cf = find_column_family(ksname, cfname);
@@ -2059,26 +1878,28 @@ future<> database::truncate(const keyspace& ks, column_family& cf, timestamp_fun

        return cf.run_with_compaction_disabled([this, &cf, should_flush, auto_snapshot, tsf = std::move(tsf), low_mark]() mutable {
            future<> f = make_ready_future<>();
-            if (should_flush) {
+            bool did_flush = false;
+            if (should_flush && cf.can_flush()) {
                // TODO:
                // this is not really a guarantee at all that we've actually
                // gotten all things to disk. Again, need queue-ish or something.
                f = cf.flush();
+                did_flush = true;
            } else {
                f = cf.clear();
            }
-            return f.then([this, &cf, auto_snapshot, tsf = std::move(tsf), low_mark, should_flush] {
+            return f.then([this, &cf, auto_snapshot, tsf = std::move(tsf), low_mark, should_flush, did_flush] {
                dblog.debug("Discarding sstable data for truncated CF + indexes");
                // TODO: notify truncation

-                return tsf().then([this, &cf, auto_snapshot, low_mark, should_flush](db_clock::time_point truncated_at) {
+                return tsf().then([this, &cf, auto_snapshot, low_mark, should_flush, did_flush](db_clock::time_point truncated_at) {
                    future<> f = make_ready_future<>();
                    if (auto_snapshot) {
                        auto name = format("{:d}-{}", truncated_at.time_since_epoch().count(), cf.schema()->cf_name());
                        f = cf.snapshot(*this, name);
                    }
-                    return f.then([this, &cf, truncated_at, low_mark, should_flush] {
-                        return cf.discard_sstables(truncated_at).then([this, &cf, truncated_at, low_mark, should_flush](db::replay_position rp) {
+                    return f.then([this, &cf, truncated_at, low_mark, should_flush, did_flush] {
+                        return cf.discard_sstables(truncated_at).then([this, &cf, truncated_at, low_mark, should_flush, did_flush](db::replay_position rp) {
                            // TODO: indexes.
                            // Note: since discard_sstables was changed to only count tables owned by this shard,
                            // we can get zero rp back. Changed assert, and ensure we save at least low_mark.
@@ -2086,7 +1907,7 @@ future<> database::truncate(const keyspace& ks, column_family& cf, timestamp_fun
                            // We nowadays do not flush tables with sstables but autosnapshot=false. This means
                            // the low_mark assertion does not hold, because we maybe/probably never got around to 
                            // creating the sstables that would create them.
-                            assert(!should_flush || low_mark <= rp || rp == db::replay_position());
+                            assert(!did_flush || low_mark <= rp || rp == db::replay_position());
                            rp = std::max(low_mark, rp);
                            return truncate_views(cf, truncated_at, should_flush).then([&cf, truncated_at, rp] {
                                // save_truncation_record() may actually fail after we cached the truncation time
--- a/database.hh
+++ b/database.hh
@@ -224,6 +224,10 @@ public:
        return bool(_seal_immediate_fn);
    }

+    bool can_flush() const {
+        return may_flush() && !empty();
+    }
+
    bool empty() const {
        for (auto& m : _memtables) {
           if (!m->empty()) {
@@ -378,6 +382,7 @@ public:
        utils::updateable_value<bool> compaction_enforce_min_threshold{false};
        bool enable_dangerous_direct_import_of_cassandra_counters = false;
        ::dirty_memory_manager* dirty_memory_manager = &default_dirty_memory_manager;
+        ::dirty_memory_manager* streaming_dirty_memory_manager = &default_dirty_memory_manager;
        reader_concurrency_semaphore* streaming_read_concurrency_semaphore;
        reader_concurrency_semaphore* compaction_concurrency_semaphore;
        ::cf_stats* cf_stats = nullptr;
@@ -417,6 +422,20 @@ private:

    lw_shared_ptr<memtable_list> _memtables;

+    utils::phased_barrier _streaming_flush_phaser;
+
+    // If mutations are fragmented during streaming the sstables cannot be made
+    // visible immediately after memtable flush, because that could cause
+    // readers to see only a part of a partition thus violating isolation
+    // guarantees.
+    // Mutations that are sent in fragments are kept separately in per-streaming
+    // plan memtables and the resulting sstables are not made visible until
+    // the streaming is complete.
+    struct monitored_sstable {
+        std::unique_ptr<database_sstable_write_monitor> monitor;
+        sstables::shared_sstable sstable;
+    };
+
    lw_shared_ptr<memtable_list> make_memory_only_memtable_list();
    lw_shared_ptr<memtable_list> make_memtable_list();

@@ -449,12 +468,12 @@ private:

    // Provided by the database that owns this commitlog
    db::commitlog* _commitlog;
-    bool _durable_writes;
    compaction_manager& _compaction_manager;
    secondary_index::secondary_index_manager _index_manager;
    int _compaction_disabled = 0;
    bool _compaction_disabled_by_user = false;
    utils::phased_barrier _flush_barrier;
+    seastar::gate _streaming_flush_gate;
    std::vector<view_ptr> _views;

    std::unique_ptr<cell_locker> _counter_cell_locks; // Memory-intensive; allocate only when needed.
@@ -472,7 +491,7 @@ private:

    // Operations like truncate, flush, query, etc, may depend on a column family being alive to
    // complete.  Some of them have their own gate already (like flush), used in specialized wait
-    // logic. That is particularly useful if there is a particular
+    // logic (like the streaming_flush_gate). That is particularly useful if there is a particular
    // order in which we need to close those gates. For all the others operations that don't have
    // such needs, we have this generic _async_gate, which all potentially asynchronous operations
    // have to get.  It will be closed by stop().
@@ -490,6 +509,8 @@ private:
    utils::phased_barrier _pending_reads_phaser;
    // Corresponding phaser for in-progress streams
    utils::phased_barrier _pending_streams_phaser;
+    // Corresponding phaser for in-progress flushes
+    utils::phased_barrier _pending_flushes_phaser;

    // This field cashes the last truncation time for the table.
    // The master resides in system.truncated table
@@ -730,6 +751,7 @@ public:
    // The mutation is always upgraded to current schema.
    void apply(const frozen_mutation& m, const schema_ptr& m_schema, db::rp_handle&& = {});
    void apply(const mutation& m, db::rp_handle&& = {});
+    void apply_streaming_mutation(schema_ptr, utils::UUID plan_id, const frozen_mutation&, bool fragmented);

    // Returns at most "cmd.limit" rows
    future<lw_shared_ptr<query::result>> query(schema_ptr,
@@ -745,9 +767,27 @@ public:
    void start();
    future<> stop();
    future<> flush();
+    future<> flush_streaming_mutations(utils::UUID plan_id, dht::partition_range_vector ranges = dht::partition_range_vector{});
    future<> clear(); // discards memtable(s) without flushing them to disk.
    future<db::replay_position> discard_sstables(db_clock::time_point);

+    // Make sure the generation numbers are sequential, starting from "start".
+    // Generations before "start" are left untouched.
+    //
+    // Return the highest generation number seen so far
+    //
+    // Word of warning: although this function will reshuffle anything over "start", it is
+    // very dangerous to do that with live SSTables. This is meant to be used with SSTables
+    // that are not yet managed by the system.
+    //
+    // Parameter all_generations stores the generation of all SSTables in the system, so it
+    // will be easy to determine which SSTable is new.
+    // An example usage would query all shards asking what is the highest SSTable number known
+    // to them, and then pass that + 1 as "start".
+    future<std::vector<sstables::entry_descriptor>> reshuffle_sstables(std::set<int64_t> all_generations, int64_t start);
+
+    bool can_flush() const;
+
    // FIXME: this is just an example, should be changed to something more
    // general. compact_all_sstables() starts a compaction of all sstables.
    // It doesn't flush the current memtable first. It's just a ad-hoc method,
@@ -860,14 +900,6 @@ public:
        return _global_cache_hit_rate;
    }

-    bool durable_writes() const {
-        return _durable_writes;
-    }
-
-    void set_durable_writes(bool dw) {
-        _durable_writes = dw;
-    }
-
    void set_global_cache_hit_rate(cache_temperature rate) {
        _global_cache_hit_rate = rate;
    }
@@ -892,10 +924,6 @@ public:
        return _pending_writes_phaser.advance_and_await();
    }

-    size_t writes_in_progress() const {
-        return _pending_writes_phaser.operations_in_progress();
-    }
-
    utils::phased_barrier::operation read_in_progress() {
        return _pending_reads_phaser.start();
    }
@@ -904,10 +932,6 @@ public:
        return _pending_reads_phaser.advance_and_await();
    }

-    size_t reads_in_progress() const {
-        return _pending_reads_phaser.operations_in_progress();
-    }
-
    utils::phased_barrier::operation stream_in_progress() {
        return _pending_streams_phaser.start();
    }
@@ -916,8 +940,12 @@ public:
        return _pending_streams_phaser.advance_and_await();
    }

-    size_t streams_in_progress() const {
-        return _pending_streams_phaser.operations_in_progress();
+    future<> await_pending_flushes() {
+        return _pending_flushes_phaser.advance_and_await();
+    }
+
+    future<> await_pending_ops() {
+        return when_all(await_pending_reads(), await_pending_writes(), await_pending_streams(), await_pending_flushes()).discard_result();
    }

    void add_or_update_view(view_ptr v);
@@ -1072,7 +1100,7 @@ public:
                 std::map<sstring, sstring> options,
                 bool durables_writes,
                 std::vector<schema_ptr> cf_defs = std::vector<schema_ptr>{});
-    void validate(const locator::shared_token_metadata& stm) const;
+    void validate(const locator::token_metadata& tm) const;
    const sstring& name() const {
        return _name;
    }
@@ -1120,6 +1148,7 @@ public:
        utils::updateable_value<bool> compaction_enforce_min_threshold{false};
        bool enable_dangerous_direct_import_of_cassandra_counters = false;
        ::dirty_memory_manager* dirty_memory_manager = &default_dirty_memory_manager;
+        ::dirty_memory_manager* streaming_dirty_memory_manager = &default_dirty_memory_manager;
        reader_concurrency_semaphore* streaming_read_concurrency_semaphore;
        reader_concurrency_semaphore* compaction_concurrency_semaphore;
        ::cf_stats* cf_stats = nullptr;
@@ -1141,14 +1170,14 @@ private:
 public:
    explicit keyspace(lw_shared_ptr<keyspace_metadata> metadata, config cfg);

-    void update_from(const locator::shared_token_metadata& stm, lw_shared_ptr<keyspace_metadata>);
+    void update_from(const locator::token_metadata& tm, lw_shared_ptr<keyspace_metadata>);

    /** Note: return by shared pointer value, since the meta data is
     * semi-volatile. I.e. we could do alter keyspace at any time, and
     * boom, it is replaced.
     */
    lw_shared_ptr<keyspace_metadata> metadata() const;
-    void create_replication_strategy(const locator::shared_token_metadata& stm, const std::map<sstring, sstring>& options);
+    void create_replication_strategy(const locator::token_metadata& tm, const std::map<sstring, sstring>& options);
    /**
     * This should not really be return by reference, since replication
     * strategy is also volatile in that it could be replaced at "any" time.
@@ -1205,7 +1234,6 @@ struct database_config {
    seastar::scheduling_group memory_compaction_scheduling_group;
    seastar::scheduling_group statement_scheduling_group;
    seastar::scheduling_group streaming_scheduling_group;
-    seastar::scheduling_group gossip_scheduling_group;
    size_t available_memory;
 };

@@ -1264,6 +1292,7 @@ private:

    dirty_memory_manager _system_dirty_memory_manager;
    dirty_memory_manager _dirty_memory_manager;
+    dirty_memory_manager _streaming_dirty_memory_manager;

    database_config _dbcfg;
    flush_controller _memtable_controller;
@@ -1328,7 +1357,7 @@ private:

    service::migration_notifier& _mnotifier;
    gms::feature_service& _feat;
-    const locator::shared_token_metadata& _shared_token_metadata;
+    const locator::token_metadata& _token_metadata;

    sharded<semaphore>& _sst_dir_semaphore;

@@ -1347,7 +1376,6 @@ private:
    void create_in_memory_keyspace(const lw_shared_ptr<keyspace_metadata>& ksm);
    friend void db::system_keyspace::make(database& db, bool durable, bool volatile_testing_only);
    void setup_metrics();
-    void setup_scylla_memory_diagnostics_producer();

    friend class db_apply_executor;
    future<> do_apply(schema_ptr, const frozen_mutation&, tracing::trace_state_ptr tr_state, db::timeout_clock::time_point timeout, db::commitlog::force_sync sync);
@@ -1371,7 +1399,7 @@ public:
    void set_enable_incremental_backups(bool val) { _enable_incremental_backups = val; }

    future<> parse_system_tables(distributed<service::storage_proxy>&, distributed<service::migration_manager>&);
-    database(const db::config&, database_config dbcfg, service::migration_notifier& mn, gms::feature_service& feat, const locator::shared_token_metadata& stm, abort_source& as, sharded<semaphore>& sst_dir_sem);
+    database(const db::config&, database_config dbcfg, service::migration_notifier& mn, gms::feature_service& feat, const locator::token_metadata& tm, abort_source& as, sharded<semaphore>& sst_dir_sem);
    database(database&&) = delete;
    ~database();

@@ -1397,8 +1425,7 @@ public:
        return *_compaction_manager;
    }

-    const locator::shared_token_metadata& get_shared_token_metadata() const { return _shared_token_metadata; }
-    const locator::token_metadata& get_token_metadata() const { return *_shared_token_metadata.get(); }
+    const locator::token_metadata& get_token_metadata() const { return _token_metadata; }

    service::migration_notifier& get_notifier() { return _mnotifier; }
    const service::migration_notifier& get_notifier() const { return _mnotifier; }
@@ -1531,7 +1558,6 @@ public:
    void set_format_by_config();

    future<> flush_all_memtables();
-    future<> flush(const sstring& ks, const sstring& cf);

    // See #937. Truncation now requires a callback to get a time stamp
    // that must be guaranteed to be the same for all shards.
--- a/db/batchlog_manager.cc
+++ b/db/batchlog_manager.cc
@@ -182,7 +182,7 @@ future<> db::batchlog_manager::replay_all_failed_batches() {

    // rate limit is in bytes per second. Uses Double.MAX_VALUE if disabled (set to 0 in cassandra.yaml).
    // max rate is scaled by the number of nodes in the cluster (same as for HHOM - see CASSANDRA-5272).
-    auto throttle = _replay_rate / _qp.proxy().get_token_metadata_ptr()->get_all_endpoints_count();
+    auto throttle = _replay_rate / _qp.proxy().get_token_metadata().get_all_endpoints_count();
    auto limiter = make_lw_shared<utils::rate_limiter>(throttle);

    auto batch = [this, limiter](const cql3::untyped_result_set::row& row) {
--- a/db/config.cc
+++ b/db/config.cc
@@ -68,12 +68,6 @@ seed_provider_to_json(const db::seed_provider_type& spt) {
    return value_to_json("seed_provider_type");
 }

-static
-json::json_return_type
-hinted_handoff_enabled_to_json(const db::config::hinted_handoff_enabled_type& h) {
-    return value_to_json(h.to_configuration_string());
-}
-
 template <>
 const config_type config_type_for<bool> = config_type("bool", value_to_json<bool>);

@@ -120,9 +114,6 @@ template <>
 const config_type config_type_for<std::vector<enum_option<db::experimental_features_t>>> = config_type(
        "experimental features", value_to_json<std::vector<sstring>>);

-template <>
-const config_type config_type_for<db::config::hinted_handoff_enabled_type> = config_type("hinted handoff enabled", hinted_handoff_enabled_to_json);
-
 }

 namespace YAML {
@@ -168,18 +159,6 @@ struct convert<db::config::seed_provider_type> {
    }
 };

-template<>
-struct convert<db::config::hinted_handoff_enabled_type> {
-    static bool decode(const Node& node, db::config::hinted_handoff_enabled_type& rhs) {
-        std::string opt;
-        if (!convert<std::string>::decode(node, opt)) {
-            return false;
-        }
-        rhs = db::hints::host_filter::parse_from_config_string(std::move(opt));
-        return true;
-    }
-};
-
 template <>
 class convert<enum_option<db::experimental_features_t>> {
 public:
@@ -593,7 +572,7 @@ db::config::config(std::shared_ptr<db::extensions> exts)
        "Time interval in milliseconds to reset all node scores, which allows a bad node to recover.")
    , dynamic_snitch_update_interval_in_ms(this, "dynamic_snitch_update_interval_in_ms", value_status::Unused, 100,
        "The time interval for how often the snitch calculates node scores. Because score calculation is CPU intensive, be careful when reducing this interval.")
-    , hinted_handoff_enabled(this, "hinted_handoff_enabled", value_status::Used, db::config::hinted_handoff_enabled_type(db::config::hinted_handoff_enabled_type::enabled_for_all_tag()),
+    , hinted_handoff_enabled(this, "hinted_handoff_enabled", value_status::Used, "true",
        "Enable or disable hinted handoff. To enable per data center, add data center list. For example: hinted_handoff_enabled: DC1,DC2. A hint indicates that the write needs to be replayed to an unavailable node. "
        "Related information: About hinted handoff writes")
    , hinted_handoff_throttle_in_kb(this, "hinted_handoff_throttle_in_kb", value_status::Unused, 1024,
@@ -635,7 +614,6 @@ db::config::config(std::shared_ptr<db::extensions> exts)
        "\n"
        "\torg.apache.cassandra.auth.AllowAllAuthenticator : Disables authentication; no checks are performed.\n"
        "\torg.apache.cassandra.auth.PasswordAuthenticator : Authenticates users with user names and hashed passwords stored in the system_auth.credentials table. If you use the default, 1, and the node with the lone replica goes down, you will not be able to log into the cluster because the system_auth keyspace was not replicated.\n"
-        "\tcom.scylladb.auth.TransitionalAuthenticator : Wraps around the PasswordAuthenticator, logging them in if username/password pair provided is correct and treating them as anonymous users otherwise.\n"
        "Related information: Internal authentication"
        , {"AllowAllAuthenticator", "PasswordAuthenticator", "org.apache.cassandra.auth.PasswordAuthenticator", "org.apache.cassandra.auth.AllowAllAuthenticator", "com.scylladb.auth.TransitionalAuthenticator"})
    , internode_authenticator(this, "internode_authenticator", value_status::Unused, "enabled",
@@ -645,7 +623,6 @@ db::config::config(std::shared_ptr<db::extensions> exts)
        "\n"
        "\tAllowAllAuthorizer : Disables authorization; allows any action to any user.\n"
        "\tCassandraAuthorizer : Stores permissions in system_auth.permissions table. If you use the default, 1, and the node with the lone replica goes down, you will not be able to log into the cluster because the system_auth keyspace was not replicated.\n"
-        "\tcom.scylladb.auth.TransitionalAuthorizer : Wraps around the CassandraAuthorizer, which is used to authorize permission management. Other actions are allowed for all users.\n"
        "Related information: Object permissions"
        , {"AllowAllAuthorizer", "CassandraAuthorizer", "org.apache.cassandra.auth.AllowAllAuthorizer", "org.apache.cassandra.auth.CassandraAuthorizer", "com.scylladb.auth.TransitionalAuthorizer"})
    , role_manager(this, "role_manager", value_status::Used, "org.apache.cassandra.auth.CassandraRoleManager",
--- a/db/config.hh
+++ b/db/config.hh
@@ -33,7 +33,6 @@
 #include "seastarx.hh"
 #include "utils/config_file.hh"
 #include "utils/enum_option.hh"
-#include "db/hints/host_filter.hh"

 namespace seastar { class file; struct logging_settings; }

@@ -116,7 +115,6 @@ public:
                    //program_options::string_map;
    using string_list = std::vector<sstring>;
    using seed_provider_type = db::seed_provider_type;
-    using hinted_handoff_enabled_type = db::hints::host_filter;

    /*
     * All values and documentation taken from
@@ -240,7 +238,7 @@ public:
    named_value<double> dynamic_snitch_badness_threshold;
    named_value<uint32_t> dynamic_snitch_reset_interval_in_ms;
    named_value<uint32_t> dynamic_snitch_update_interval_in_ms;
-    named_value<hinted_handoff_enabled_type> hinted_handoff_enabled;
+    named_value<sstring> hinted_handoff_enabled;
    named_value<uint32_t> hinted_handoff_throttle_in_kb;
    named_value<uint32_t> max_hint_window_in_ms;
    named_value<uint32_t> max_hints_delivery_threads;
--- a/db/hints/host_filter.cc
+++ b/db/hints/host_filter.cc
@@ -1,125 +0,0 @@
-/*
- * Copyright (C) 2020 ScyllaDB
- */
-
-/*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <string_view>
-#include <boost/algorithm/string.hpp>
-#include "to_string.hh"
-#include "host_filter.hh"
-
-namespace db {
-namespace hints {
-
-host_filter::host_filter(host_filter::enabled_for_all_tag)
-        : _enabled_kind(host_filter::enabled_kind::enabled_for_all) {
-}
-
-host_filter::host_filter(host_filter::disabled_for_all_tag)
-        : _enabled_kind(host_filter::enabled_kind::disabled_for_all) {
-}
-
-host_filter::host_filter(std::unordered_set<sstring> allowed_dcs)
-        : _enabled_kind(allowed_dcs.empty() ? enabled_kind::disabled_for_all : enabled_kind::enabled_selectively)
-        , _dcs(std::move(allowed_dcs)) {
-}
-
-bool host_filter::can_hint_for(locator::snitch_ptr& snitch, gms::inet_address ep) const {
-    switch (_enabled_kind) {
-    case enabled_kind::enabled_for_all:
-        return true;
-    case enabled_kind::enabled_selectively:
-        return _dcs.contains(snitch->get_datacenter(ep));
-    case enabled_kind::disabled_for_all:
-        return false;
-    }
-    throw std::logic_error("Uncovered variant of enabled_kind");
-}
-
-host_filter host_filter::parse_from_config_string(sstring opt) {
-    if (boost::iequals(opt, "false") || opt == "0") {
-        return host_filter(disabled_for_all_tag());
-    } else if (boost::iequals(opt, "true") || opt == "1") {
-        return host_filter(enabled_for_all_tag());
-    }
-
-    return parse_from_dc_list(std::move(opt));
-}
-
-host_filter host_filter::parse_from_dc_list(sstring opt) {
-    using namespace boost::algorithm;
-
-    std::vector<sstring> dcs;
-    split(dcs, opt, is_any_of(","));
-
-    std::for_each(dcs.begin(), dcs.end(), [] (sstring& dc) {
-        trim(dc);
-        if (dc.empty()) {
-            throw hints_configuration_parse_error("hinted_handoff_enabled: DC name may not be an empty string");
-        }
-    });
-
-    return host_filter(std::unordered_set<sstring>(dcs.begin(), dcs.end()));
-}
-
-std::istream& operator>>(std::istream& is, host_filter& f) {
-    sstring tmp;
-    is >> tmp;
-    f = host_filter::parse_from_config_string(std::move(tmp));
-    return is;
-}
-
-sstring host_filter::to_configuration_string() const {
-    switch (_enabled_kind) {
-    case enabled_kind::enabled_for_all:
-        return "true";
-    case enabled_kind::enabled_selectively:
-        return ::join(",", _dcs);
-    case enabled_kind::disabled_for_all:
-        return "false";
-    }
-    throw std::logic_error("Uncovered variant of enabled_kind");
-}
-
-
-std::string_view host_filter::enabled_kind_to_string(host_filter::enabled_kind ek) {
-    switch (ek) {
-    case host_filter::enabled_kind::enabled_for_all:
-        return "enabled_for_all";
-    case host_filter::enabled_kind::enabled_selectively:
-        return "enabled_selectively";
-    case host_filter::enabled_kind::disabled_for_all:
-        return "disabled_for_all";
-    }
-    throw std::logic_error("Uncovered variant of enabled_kind");
-}
-
-std::ostream& operator<<(std::ostream& os, const host_filter& f) {
-    os << "host_filter{enabled_kind="
-        << host_filter::enabled_kind_to_string(f._enabled_kind);
-    if (f._enabled_kind == host_filter::enabled_kind::enabled_selectively) {
-        os << ", dcs={" << ::join(",", f._dcs);
-    }
-    os << "}";
-    return os;
-}
-
-}
-}
-
--- a/db/hints/host_filter.hh
+++ b/db/hints/host_filter.hh
@@ -1,103 +0,0 @@
-/*
- * Copyright (C) 2020 ScyllaDB
- */
-
-/*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#pragma once
-
-#include <functional>
-#include <unordered_set>
-#include <exception>
-#include <iostream>
-#include <string_view>
-
-#include <seastar/core/sstring.hh>
-#include "gms/inet_address.hh"
-#include "locator/snitch_base.hh"
-#include "seastarx.hh"
-
-namespace db {
-namespace hints {
-
-// host_filter tells hints_manager towards which endpoints it is allowed to generate hints.
-class host_filter final {
-private:
-    enum class enabled_kind {
-        enabled_for_all,
-        enabled_selectively,
-        disabled_for_all,
-    };
-
-    enabled_kind _enabled_kind;
-    std::unordered_set<sstring> _dcs;
-
-    static std::string_view enabled_kind_to_string(host_filter::enabled_kind ek);
-
-public:
-    struct enabled_for_all_tag {};
-    struct disabled_for_all_tag {};
-
-    // Creates a filter that allows hints to all endpoints (default)
-    host_filter(enabled_for_all_tag tag = {});
-
-    // Creates a filter that does not allow any hints.
-    host_filter(disabled_for_all_tag);
-
-    // Creates a filter that allows sending hints to specified DCs.
-    explicit host_filter(std::unordered_set<sstring> allowed_dcs);
-
-    // Parses hint filtering configuration from the hinted_handoff_enabled option.
-    static host_filter parse_from_config_string(sstring opt);
-
-    // Parses hint filtering configuration from a list of DCs.
-    static host_filter parse_from_dc_list(sstring opt);
-
-    bool can_hint_for(locator::snitch_ptr& snitch, gms::inet_address ep) const;
-
-    inline const std::unordered_set<sstring>& get_dcs() const {
-        return _dcs;
-    }
-
-    bool operator==(const host_filter& other) const noexcept {
-        return _enabled_kind == other._enabled_kind
-                && _dcs == other._dcs;
-    }
-
-    inline bool is_enabled_for_all() const noexcept {
-        return _enabled_kind == enabled_kind::enabled_for_all;
-    }
-
-    inline bool is_disabled_for_all() const noexcept {
-        return _enabled_kind == enabled_kind::disabled_for_all;
-    }
-
-    sstring to_configuration_string() const;
-
-    friend std::ostream& operator<<(std::ostream& os, const host_filter& f);
-};
-
-std::istream& operator>>(std::istream& is, host_filter& f);
-
-class hints_configuration_parse_error : public std::runtime_error {
-public:
-    using std::runtime_error::runtime_error;
-};
-
-}
-}
--- a/db/hints/manager.cc
+++ b/db/hints/manager.cc
@@ -38,7 +38,6 @@
 #include "service/priority_manager.hh"
 #include "database.hh"
 #include "service_permit.hh"
-#include "utils/directories.hh"

 using namespace std::literals::chrono_literals;

@@ -51,9 +50,9 @@ const std::string manager::FILENAME_PREFIX("HintsLog" + commitlog::descriptor::S
 const std::chrono::seconds manager::hint_file_write_timeout = std::chrono::seconds(2);
 const std::chrono::seconds manager::hints_flush_period = std::chrono::seconds(10);

-manager::manager(sstring hints_directory, host_filter filter, int64_t max_hint_window_ms, resource_manager& res_manager, distributed<database>& db)
+manager::manager(sstring hints_directory, std::vector<sstring> hinted_dcs, int64_t max_hint_window_ms, resource_manager& res_manager, distributed<database>& db)
    : _hints_dir(fs::path(hints_directory) / format("{:d}", this_shard_id()))
-    , _host_filter(std::move(filter))
+    , _hinted_dcs(hinted_dcs.begin(), hinted_dcs.end())
    , _local_snitch_ptr(locator::i_endpoint_snitch::get_local_snitch_ptr())
    , _max_hint_window_us(max_hint_window_ms * 1000)
    , _local_db(db.local())
@@ -533,56 +532,12 @@ bool manager::can_hint_for(ep_key_type ep) const noexcept {
    return true;
 }

-future<> manager::change_host_filter(host_filter filter) {
-    if (!started()) {
-        return make_exception_future<>(std::logic_error("change_host_filter: called before the hints_manager was started"));
-    }
-
-    return with_gate(_draining_eps_gate, [this, filter = std::move(filter)] () mutable {
-        return with_semaphore(drain_lock(), 1, [this, filter = std::move(filter)] () mutable {
-            if (draining_all()) {
-                return make_exception_future<>(std::logic_error("change_host_filter: cannot change the configuration because hints all hints were drained"));
-            }
-
-            manager_logger.debug("change_host_filter: changing from {} to {}", _host_filter, filter);
-
-            // Change the host_filter now and save the old one so that we can
-            // roll back in case of failure
-            std::swap(_host_filter, filter);
-
-            // Iterate over existing hint directories and see if we can enable an endpoint manager
-            // for some of them
-            return lister::scan_dir(_hints_dir, { directory_entry_type::directory }, [this] (fs::path datadir, directory_entry de) {
-                const ep_key_type ep = ep_key_type(de.name);
-                if (_ep_managers.contains(ep) || !_host_filter.can_hint_for(_local_snitch_ptr, ep)) {
-                    return make_ready_future<>();
-                }
-                return get_ep_manager(ep).populate_segments_to_replay();
-            }).handle_exception([this, filter = std::move(filter)] (auto ep) mutable {
-                // Bring back the old filter. The finally() block will cause us to stop
-                // the additional ep_hint_managers that we started
-                _host_filter = std::move(filter);
-            }).finally([this] {
-                // Remove endpoint managers which are rejected by the filter
-                return parallel_for_each(_ep_managers, [this] (auto& pair) {
-                    if (_host_filter.can_hint_for(_local_snitch_ptr, pair.first)) {
-                        return make_ready_future<>();
-                    }
-                    return pair.second.stop(drain::no).finally([this, ep = pair.first] {
-                        _ep_managers.erase(ep);
-                    });
-                });
-            });
-        });
-    });
-}
-
 bool manager::check_dc_for(ep_key_type ep) const noexcept {
    try {
        // If target's DC is not a "hintable" DCs - don't hint.
        // If there is an end point manager then DC has already been checked and found to be ok.
-        return _host_filter.is_enabled_for_all() || have_ep_manager(ep) ||
-               _host_filter.can_hint_for(_local_snitch_ptr, ep);
+        return _hinted_dcs.empty() || have_ep_manager(ep) ||
+               _hinted_dcs.contains(_local_snitch_ptr->get_datacenter(ep));
    } catch (...) {
        // if we failed to check the DC - block this hint
        return false;
@@ -898,14 +853,12 @@ void manager::end_point_hints_manager::sender::send_hints_maybe() noexcept {

 static future<> scan_for_hints_dirs(const sstring& hints_directory, std::function<future<> (fs::path dir, directory_entry de, unsigned shard_id)> f) {
    return lister::scan_dir(hints_directory, { directory_entry_type::directory }, [f = std::move(f)] (fs::path dir, directory_entry de) mutable {
-        unsigned shard_id;
        try {
-            shard_id = std::stoi(de.name.c_str());
+            return f(std::move(dir), std::move(de), std::stoi(de.name.c_str()));
        } catch (std::invalid_argument& ex) {
            manager_logger.debug("Ignore invalid directory {}", de.name);
            return make_ready_future<>();
        }
-        return f(std::move(dir), std::move(de), shard_id);
    });
 }

@@ -1065,92 +1018,5 @@ void manager::update_backlog(size_t backlog, size_t max_backlog) {
    }
 }

-class directory_initializer::impl {
-    enum class state {
-        uninitialized = 0,
-        created_and_validated = 1,
-        rebalanced = 2,
-    };
-
-    utils::directories& _dirs;
-    sstring _hints_directory;
-    state _state = state::uninitialized;
-    seastar::named_semaphore _lock = {1, named_semaphore_exception_factory{"hints directory initialization lock"}};
-
-public:
-    impl(utils::directories& dirs, sstring hints_directory)
-            : _dirs(dirs)
-            , _hints_directory(std::move(hints_directory))
-    { }
-
-    future<> ensure_created_and_verified() {
-        if (_state > state::uninitialized) {
-            return make_ready_future<>();
-        }
-
-        return with_semaphore(_lock, 1, [this] () {
-            utils::directories::set dir_set;
-            dir_set.add_sharded(_hints_directory);
-            return _dirs.create_and_verify(std::move(dir_set)).then([this] {
-                manager_logger.debug("Creating and validating hint directories: {}", _hints_directory);
-                _state = state::created_and_validated;
-            });
-        });
-    }
-
-    future<> ensure_rebalanced() {
-        if (_state < state::created_and_validated) {
-            return make_exception_future<>(std::logic_error("hints directory needs to be created and validated before rebalancing"));
-        }
-
-        if (_state > state::created_and_validated) {
-            return make_ready_future<>();
-        }
-
-        return with_semaphore(_lock, 1, [this] () {
-            manager_logger.debug("Rebalancing hints in {}", _hints_directory);
-            return manager::rebalance(_hints_directory).then([this] {
-                _state = state::rebalanced;
-            });
-        });
-    }
-};
-
-directory_initializer::directory_initializer(std::shared_ptr<directory_initializer::impl> impl)
-        : _impl(std::move(impl))
-{ }
-
-directory_initializer::~directory_initializer()
-{ }
-
-directory_initializer directory_initializer::make_dummy() {
-    return directory_initializer{nullptr};
-}
-
-future<directory_initializer> directory_initializer::make(utils::directories& dirs, sstring hints_directory) {
-    return smp::submit_to(0, [&dirs, hints_directory = std::move(hints_directory)] () mutable {
-        auto impl = std::make_shared<directory_initializer::impl>(dirs, std::move(hints_directory));
-        return make_ready_future<directory_initializer>(directory_initializer(std::move(impl)));
-    });
-}
-
-future<> directory_initializer::ensure_created_and_verified() {
-    if (!_impl) {
-        return make_ready_future<>();
-    }
-    return smp::submit_to(0, [impl = this->_impl] () mutable {
-        return impl->ensure_created_and_verified().then([impl] {});
-    });
-}
-
-future<> directory_initializer::ensure_rebalanced() {
-    if (!_impl) {
-        return make_ready_future<>();
-    }
-    return smp::submit_to(0, [impl = this->_impl] () mutable {
-        return impl->ensure_rebalanced().then([impl] {});
-    });
-}
-
 }
 }
--- a/db/hints/manager.hh
+++ b/db/hints/manager.hh
@@ -40,16 +40,11 @@
 #include "utils/loading_shared_values.hh"
 #include "utils/fragmented_temporary_buffer.hh"
 #include "db/hints/resource_manager.hh"
-#include "db/hints/host_filter.hh"

 namespace service {
 class storage_service;
 }

-namespace utils {
-class directories;
-}
-
 namespace db {
 namespace hints {

@@ -58,25 +53,6 @@ using hints_store_ptr = node_to_hint_store_factory_type::entry_ptr;
 using hint_entry_reader = commitlog_entry_reader;
 using timer_clock_type = seastar::lowres_clock;

-/// A helper class which tracks hints directory creation
-/// and allows to perform hints directory initialization lazily.
-class directory_initializer {
-private:
-    class impl;
-    ::std::shared_ptr<impl> _impl;
-
-    directory_initializer(::std::shared_ptr<impl> impl);
-
-public:
-    /// Creates an initializer that does nothing. Useful in tests.
-    static directory_initializer make_dummy();
-    static future<directory_initializer> make(utils::directories& dirs, sstring hints_directory);
-
-    ~directory_initializer();
-    future<> ensure_created_and_verified();
-    future<> ensure_rebalanced();
-};
-
 class manager : public service::endpoint_lifecycle_subscriber {
 private:
    struct stats {
@@ -474,7 +450,7 @@ private:
    dev_t _hints_dir_device_id = 0;

    node_to_hint_store_factory_type _store_factory;
-    host_filter _host_filter;
+    std::unordered_set<sstring> _hinted_dcs;
    shared_ptr<service::storage_proxy> _proxy_anchor;
    shared_ptr<gms::gossiper> _gossiper_anchor;
    shared_ptr<service::storage_service> _strorage_service_anchor;
@@ -493,7 +469,7 @@ private:
    seastar::named_semaphore _drain_lock = {1, named_semaphore_exception_factory{"drain lock"}};

 public:
-    manager(sstring hints_directory, host_filter filter, int64_t max_hint_window_ms, resource_manager&res_manager, distributed<database>& db);
+    manager(sstring hints_directory, std::vector<sstring> hinted_dcs, int64_t max_hint_window_ms, resource_manager&res_manager, distributed<database>& db);
    virtual ~manager();
    manager(manager&&) = delete;
    manager& operator=(manager&&) = delete;
@@ -502,15 +478,6 @@ public:
    future<> stop();
    bool store_hint(gms::inet_address ep, schema_ptr s, lw_shared_ptr<const frozen_mutation> fm, tracing::trace_state_ptr tr_state) noexcept;

-    /// \brief Changes the host_filter currently used, stopping and starting ep_managers relevant to the new host_filter.
-    /// \param filter the new host_filter
-    /// \return A future that resolves when the operation is complete.
-    future<> change_host_filter(host_filter filter);
-
-    const host_filter& get_host_filter() const noexcept {
-        return _host_filter;
-    }
-
    /// \brief Check if a hint may be generated to the give end point
    /// \param ep end point to check
    /// \return true if we should generate the hint to the given end point if it becomes unavailable
@@ -537,12 +504,6 @@ public:
    /// \return TRUE if hints are allowed to be generated to \param ep.
    bool check_dc_for(ep_key_type ep) const noexcept;

-    /// \brief Checks if hints are disabled for all endpoints
-    /// \return TRUE if hints are disabled.
-    bool is_disabled_for_all() const noexcept {
-        return _host_filter.is_disabled_for_all();
-    }
-
    /// \return Size of mutations of hints in-flight (to the disk) at the moment.
    uint64_t size_of_hints_in_progress() const noexcept {
        return _stats.size_of_hints_in_progress;
@@ -596,12 +557,6 @@ public:
        _state.set(state::replay_allowed);
    }

-    /// \brief Creates an object which aids in hints directory initialization.
-    /// This object can saafely be copied and used from any shard.
-    /// \arg dirs The utils::directories object, used to create and lock hints directories
-    /// \arg hints_directory The directory with hints which should be initialized
-    directory_initializer make_directory_initializer(utils::directories& dirs, fs::path hints_directory);
-
    /// \brief Rebalance hints segments among all present shards.
    ///
    /// The difference between the number of segments on every two shard will be not greater than 1 after the
--- a/db/hints/resource_manager.cc
+++ b/db/hints/resource_manager.cc
@@ -68,14 +68,12 @@ const std::chrono::seconds space_watchdog::_watchdog_period = std::chrono::secon
 space_watchdog::space_watchdog(shard_managers_set& managers, per_device_limits_map& per_device_limits_map)
    : _shard_managers(managers)
    , _per_device_limits_map(per_device_limits_map)
-    , _update_lock(1, named_semaphore_exception_factory{"update lock"})
 {}

 void space_watchdog::start() {
    _started = seastar::async([this] {
        while (!_as.abort_requested()) {
            try {
-                const auto units = get_units(_update_lock, 1).get();
                on_timer();
            } catch (...) {
                resource_manager_logger.trace("space_watchdog: unexpected exception - stop all hints generators");
@@ -178,95 +176,56 @@ void space_watchdog::on_timer() {
 }

 future<> resource_manager::start(shared_ptr<service::storage_proxy> proxy_ptr, shared_ptr<gms::gossiper> gossiper_ptr, shared_ptr<service::storage_service> ss_ptr) {
-    _proxy_ptr = std::move(proxy_ptr);
-    _gossiper_ptr = std::move(gossiper_ptr);
-    _ss_ptr = std::move(ss_ptr);
-
-    return with_semaphore(_operation_lock, 1, [this] () {
-        return parallel_for_each(_shard_managers, [this](manager& m) {
-            return m.start(_proxy_ptr, _gossiper_ptr, _ss_ptr);
-        }).then([this]() {
-            return do_for_each(_shard_managers, [this](manager& m) {
-                return prepare_per_device_limits(m);
-            });
-        }).then([this]() {
-            return _space_watchdog.start();
-        }).then([this]() {
-            set_running();
-        });
+    return parallel_for_each(_shard_managers, [proxy_ptr, gossiper_ptr, ss_ptr](manager& m) {
+        return m.start(proxy_ptr, gossiper_ptr, ss_ptr);
+    }).then([this]() {
+        return prepare_per_device_limits();
+    }).then([this]() {
+        return _space_watchdog.start();
    });
 }

 void resource_manager::allow_replaying() noexcept {
-    set_replay_allowed();
    boost::for_each(_shard_managers, [] (manager& m) { m.allow_replaying(); });
 }

 future<> resource_manager::stop() noexcept {
-    return with_semaphore(_operation_lock, 1, [this] () {
-        return parallel_for_each(_shard_managers, [](manager& m) {
-            return m.stop();
-        }).finally([this]() {
-            return _space_watchdog.stop();
-        }).then([this]() {
-            unset_running();
-        });
+    return parallel_for_each(_shard_managers, [](manager& m) {
+        return m.stop();
+    }).finally([this]() {
+        return _space_watchdog.stop();
    });
 }

-future<> resource_manager::register_manager(manager& m) {
-    return with_semaphore(_operation_lock, 1, [this, &m] () {
-        return with_semaphore(_space_watchdog.update_lock(), 1, [this, &m] {
-            const auto [it, inserted] = _shard_managers.insert(m);
-            if (!inserted) {
-                // Already registered
-                return make_ready_future<>();
-            }
-            if (!running()) {
-                // The hints manager will be started later by resource_manager::start()
-                return make_ready_future<>();
-            }
+void resource_manager::register_manager(manager& m) {
+    _shard_managers.insert(m);
+}

-            // If the resource_manager was started, start the hints manager, too.
-            return m.start(_proxy_ptr, _gossiper_ptr, _ss_ptr).then([this, &m] {
-                // Calculate device limits for this manager so that it is accounted for
-                // by the space_watchdog
-                return prepare_per_device_limits(m).then([this, &m] {
-                    if (this->replay_allowed()) {
-                        m.allow_replaying();
+future<> resource_manager::prepare_per_device_limits() {
+    return do_for_each(_shard_managers, [this] (manager& shard_manager) mutable {
+        dev_t device_id = shard_manager.hints_dir_device_id();
+        auto it = _per_device_limits_map.find(device_id);
+        if (it == _per_device_limits_map.end()) {
+            return is_mountpoint(shard_manager.hints_dir().parent_path()).then([this, device_id, &shard_manager](bool is_mountpoint) {
+                auto [it, inserted] = _per_device_limits_map.emplace(device_id, space_watchdog::per_device_limits{});
+                // Since we possibly deferred, we need to recheck the _per_device_limits_map.
+                if (inserted) {
+                    // By default, give each group of managers 10% of the available disk space. Give each shard an equal share of the available space.
+                    it->second.max_shard_disk_space_size = std::filesystem::space(shard_manager.hints_dir().c_str()).capacity / (10 * smp::count);
+                    // If hints directory is a mountpoint, we assume it's on dedicated (i.e. not shared with data/commitlog/etc) storage.
+                    // Then, reserve 90% of all space instead of 10% above.
+                    if (is_mountpoint) {
+                        it->second.max_shard_disk_space_size *= 9;
                    }
-                });
-            }).handle_exception([this, &m] (auto ep) {
-                _shard_managers.erase(m);
-                return make_exception_future<>(ep);
+                }
+                it->second.managers.emplace_back(std::ref(shard_manager));
            });
-        });
+        } else {
+            it->second.managers.emplace_back(std::ref(shard_manager));
+            return make_ready_future<>();
+        }
    });
 }

-future<> resource_manager::prepare_per_device_limits(manager& shard_manager) {
-    dev_t device_id = shard_manager.hints_dir_device_id();
-    auto it = _per_device_limits_map.find(device_id);
-    if (it == _per_device_limits_map.end()) {
-        return is_mountpoint(shard_manager.hints_dir().parent_path()).then([this, device_id, &shard_manager](bool is_mountpoint) {
-            auto [it, inserted] = _per_device_limits_map.emplace(device_id, space_watchdog::per_device_limits{});
-            // Since we possibly deferred, we need to recheck the _per_device_limits_map.
-            if (inserted) {
-                // By default, give each group of managers 10% of the available disk space. Give each shard an equal share of the available space.
-                it->second.max_shard_disk_space_size = std::filesystem::space(shard_manager.hints_dir().c_str()).capacity / (10 * smp::count);
-                // If hints directory is a mountpoint, we assume it's on dedicated (i.e. not shared with data/commitlog/etc) storage.
-                // Then, reserve 90% of all space instead of 10% above.
-                if (is_mountpoint) {
-                    it->second.max_shard_disk_space_size *= 9;
-                }
-            }
-            it->second.managers.emplace_back(std::ref(shard_manager));
-        });
-    } else {
-        it->second.managers.emplace_back(std::ref(shard_manager));
-        return make_ready_future<>();
-    }
-}
-
 }
 }
--- a/db/hints/resource_manager.hh
+++ b/db/hints/resource_manager.hh
@@ -78,7 +78,6 @@ private:
    size_t _total_size = 0;
    shard_managers_set& _shard_managers;
    per_device_limits_map& _per_device_limits_map;
-    seastar::named_semaphore _update_lock;

    future<> _started = make_ready_future<>();
    seastar::abort_source _as;
@@ -89,10 +88,6 @@ public:
    void start();
    future<> stop() noexcept;

-    seastar::named_semaphore& update_lock() {
-        return _update_lock;
-    }
-
 private:
    /// \brief Check that hints don't occupy too much disk space.
    ///
@@ -124,47 +119,10 @@ class resource_manager {
    const size_t _min_send_hint_budget;
    seastar::named_semaphore _send_limiter;

-    seastar::named_semaphore _operation_lock;
    space_watchdog::shard_managers_set _shard_managers;
    space_watchdog::per_device_limits_map _per_device_limits_map;
    space_watchdog _space_watchdog;

-    shared_ptr<service::storage_proxy> _proxy_ptr;
-    shared_ptr<gms::gossiper> _gossiper_ptr;
-    shared_ptr<service::storage_service> _ss_ptr;
-
-    enum class state {
-        running,
-        replay_allowed,
-    };
-    using state_set = enum_set<super_enum<state,
-        state::running,
-        state::replay_allowed>>;
-
-    state_set _state;
-
-    void set_running() noexcept {
-        _state.set(state::running);
-    }
-
-    void unset_running() noexcept {
-        _state.remove(state::running);
-    }
-
-    bool running() const noexcept {
-        return _state.contains(state::running);
-    }
-
-    void set_replay_allowed() noexcept {
-        _state.set(state::replay_allowed);
-    }
-
-    bool replay_allowed() const noexcept {
-        return _state.contains(state::replay_allowed);
-    }
-
-    future<> prepare_per_device_limits(manager& shard_manager);
-
 public:
    static constexpr size_t hint_segment_size_in_mb = 32;
    static constexpr size_t max_hints_per_ep_size_mb = 128; // 4 files 32MB each
@@ -175,7 +133,6 @@ public:
        : _max_send_in_flight_memory(std::max(max_send_in_flight_memory, max_hints_send_queue_length))
        , _min_send_hint_budget(_max_send_in_flight_memory / max_hints_send_queue_length)
        , _send_limiter(_max_send_in_flight_memory, named_semaphore_exception_factory{"send limiter"})
-        , _operation_lock(1, named_semaphore_exception_factory{"operation lock"})
        , _space_watchdog(_shard_managers, _per_device_limits_map)
    {}

@@ -186,16 +143,10 @@ public:
    size_t sending_queue_length() const;

    future<> start(shared_ptr<service::storage_proxy> proxy_ptr, shared_ptr<gms::gossiper> gossiper_ptr, shared_ptr<service::storage_service> ss_ptr);
-    future<> stop() noexcept;
-
-    /// \brief Allows replaying hints for managers which are registered now or will be in the future.
    void allow_replaying() noexcept;
-
-    /// \brief Registers the hints::manager in resource_manager, and starts it, if resource_manager is already running.
-    ///
-    /// The hints::managers can be added either before or after resource_manager starts.
-    /// If resource_manager is already started, the hints manager will also be started.
-    future<> register_manager(manager& m);
+    future<> stop() noexcept;
+    void register_manager(manager& m);
+    future<> prepare_per_device_limits();
 };

 }
--- a/db/large_data_handler.cc
+++ b/db/large_data_handler.cc
@@ -83,7 +83,7 @@ static future<> try_record(std::string_view large_table, const sstables::sstable
    std::string pk_str = key_to_str(partition_key.to_partition_key(s), s);
    auto timestamp = db_clock::now();
    large_data_logger.warn("Writing large {} {}/{}: {}{} ({} bytes)", desc, ks_name, cf_name, pk_str, extra_path, size);
-    return db::qctx->execute_cql(req, ks_name, cf_name, sstable_name, size, pk_str, timestamp, args...)
+    return db::execute_cql(req, ks_name, cf_name, sstable_name, size, pk_str, timestamp, args...)
            .discard_result()
            .handle_exception([ks_name, cf_name, large_table, sstable_name] (std::exception_ptr ep) {
                large_data_logger.warn("Failed to add a record to system.large_{}s: ks = {}, table = {}, sst = {} exception = {}",
@@ -113,7 +113,7 @@ future<> cql_table_large_data_handler::record_large_cells(const sstables::sstabl
        auto ck_str = key_to_str(*clustering_key, s);
        return try_record("cell", sst, partition_key, int64_t(cell_size), cell_type, format("{} {}", ck_str, column_name), extra_fields, ck_str, column_name);
    } else {
-        return try_record("cell", sst, partition_key, int64_t(cell_size), cell_type, column_name, extra_fields, nullptr, column_name);
+        return try_record("cell", sst, partition_key, int64_t(cell_size), cell_type, column_name, extra_fields, data_value::make_null(utf8_type), column_name);
    }
 }

@@ -125,7 +125,7 @@ future<> cql_table_large_data_handler::record_large_rows(const sstables::sstable
        std::string ck_str = key_to_str(*clustering_key, s);
        return try_record("row", sst, partition_key, int64_t(row_size), "row", ck_str, extra_fields,  ck_str);
    } else {
-        return try_record("row", sst, partition_key, int64_t(row_size), "static row", "", extra_fields, nullptr);
+        return try_record("row", sst, partition_key, int64_t(row_size), "static row", "", extra_fields, data_value::make_null(utf8_type));
    }
 }

@@ -133,7 +133,7 @@ future<> cql_table_large_data_handler::delete_large_data_entries(const schema& s
    const sstring req =
            format("DELETE FROM system.{} WHERE keyspace_name = ? AND table_name = ? AND sstable_name = ?",
                    large_table_name);
-    return db::qctx->execute_cql(req, s.ks_name(), s.cf_name(), sstable_name)
+    return db::execute_cql(req, s.ks_name(), s.cf_name(), sstable_name)
            .discard_result()
            .handle_exception([&s, sstable_name, large_table_name] (std::exception_ptr ep) {
                large_data_logger.warn("Failed to drop entries from {}: ks = {}, table = {}, sst = {} exception = {}",
--- a/db/large_data_handler.hh
+++ b/db/large_data_handler.hh
@@ -111,27 +111,12 @@ public:
        return make_ready_future<>();
    }

-    future<> maybe_delete_large_data_entries(const schema& s, sstring filename, uint64_t data_size) {
+    future<> maybe_delete_large_data_entries(const schema& /*s*/, sstring /*filename*/, uint64_t /*data_size*/) {
        assert(running());
-        future<> large_partitions = make_ready_future<>();
-        if (__builtin_expect(data_size > _partition_threshold_bytes, false)) {
-            large_partitions = with_sem([&s, filename, this] () mutable {
-                return delete_large_data_entries(s, std::move(filename), db::system_keyspace::LARGE_PARTITIONS);
-            });
-        }
-        future<> large_rows = make_ready_future<>();
-        if (__builtin_expect(data_size > _row_threshold_bytes, false)) {
-            large_rows = with_sem([&s, filename, this] () mutable {
-                return delete_large_data_entries(s, std::move(filename), db::system_keyspace::LARGE_ROWS);
-            });
-        }
-        future<> large_cells = make_ready_future<>();
-        if (__builtin_expect(data_size > _cell_threshold_bytes, false)) {
-            large_cells = with_sem([&s, filename, this] () mutable {
-                return delete_large_data_entries(s, std::move(filename), db::system_keyspace::LARGE_CELLS);
-            });
-        }
-        return when_all(std::move(large_partitions), std::move(large_rows), std::move(large_cells)).discard_result();
+
+        // Deletion of large data entries is disabled due to #7668
+        // They will evetually expire based on the 30 days TTL.
+        return make_ready_future<>();
    }

    const large_data_handler::stats& stats() const { return _stats; }
--- a/db/query_context.hh
+++ b/db/query_context.hh
@@ -29,6 +29,8 @@
 #include "exceptions/exceptions.hh"
 #include "timeout_config.hh"

+class database;
+
 namespace service {
 class storage_proxy;
 }
@@ -36,8 +38,9 @@ class storage_proxy;

 namespace db {
 struct query_context {
+    distributed<database>& _db;
    distributed<cql3::query_processor>& _qp;
-    query_context(distributed<cql3::query_processor>& qp) : _qp(qp) {}
+    query_context(distributed<database>& db, distributed<cql3::query_processor>& qp) : _db(db), _qp(qp) {}

    template <typename... Args>
    future<::shared_ptr<cql3::untyped_result_set>> execute_cql(sstring req, Args&&... args) {
@@ -55,23 +58,23 @@ struct query_context {
                // let the `storage_proxy` time out the query down the call chain
                db::timeout_clock::duration::zero();

-        struct timeout_context {
-            std::unique_ptr<service::client_state> client_state;
-            service::query_state query_state;
-            timeout_context(db::timeout_clock::duration d)
-                    : client_state(std::make_unique<service::client_state>(service::client_state::internal_tag{}, timeout_config{d, d, d, d, d, d, d}))
-                    , query_state(*client_state, empty_service_permit())
-            {}
-        };
-        return do_with(timeout_context(d), [this, req = std::move(req), &args...] (auto& tctx) {
+        return do_with(timeout_config{d, d, d, d, d, d, d}, [this, req = std::move(req), &args...] (auto& tcfg) {
            return _qp.local().execute_internal(req,
                cql3::query_options::DEFAULT.get_consistency(),
-                tctx.query_state,
+                tcfg,
                { data_value(std::forward<Args>(args))... },
                true);
        });
    }

+    database& db() {
+        return _db.local();
+    }
+
+    service::storage_proxy& proxy() {
+        return _qp.local().proxy();
+    }
+
    cql3::query_processor& qp() {
        return _qp.local();
    }
@@ -79,4 +82,19 @@ struct query_context {

 // This does not have to be thread local, because all cores will share the same context.
 extern std::unique_ptr<query_context> qctx;
+
+template <typename... Args>
+static future<::shared_ptr<cql3::untyped_result_set>> execute_cql(sstring text, Args&&... args) {
+    assert(qctx);
+    return qctx->execute_cql(text, std::forward<Args>(args)...);
+}
+
+template <typename... Args>
+static future<::shared_ptr<cql3::untyped_result_set>> execute_cql_with_timeout(sstring cql,
+        db::timeout_clock::time_point timeout,
+        Args&&... args) {
+    assert(qctx);
+    return qctx->execute_cql_with_timeout(cql, timeout, std::forward<Args>(args)...);
+}
+
 }
--- a/db/schema_tables.cc
+++ b/db/schema_tables.cc
@@ -226,24 +226,24 @@ using namespace v3;

 using days = std::chrono::duration<int, std::ratio<24 * 3600>>;

-future<> save_system_schema(cql3::query_processor& qp, const sstring & ksname) {
-    auto& ks = qp.db().find_keyspace(ksname);
+future<> save_system_schema(const sstring & ksname) {
+    auto& ks = db::qctx->db().find_keyspace(ksname);
    auto ksm = ks.metadata();

    // delete old, possibly obsolete entries in schema tables
    return parallel_for_each(all_table_names(schema_features::full()), [ksm] (sstring cf) {
        auto deletion_timestamp = schema_creation_timestamp() - 1;
-        return qctx->execute_cql(format("DELETE FROM {}.{} USING TIMESTAMP {} WHERE keyspace_name = ?", NAME, cf,
+        return db::execute_cql(format("DELETE FROM {}.{} USING TIMESTAMP {} WHERE keyspace_name = ?", NAME, cf,
            deletion_timestamp), ksm->name()).discard_result();
-    }).then([ksm, &qp] {
+    }).then([ksm] {
        auto mvec  = make_create_keyspace_mutations(ksm, schema_creation_timestamp(), true);
-        return qp.proxy().mutate_locally(std::move(mvec), tracing::trace_state_ptr());
+        return qctx->proxy().mutate_locally(std::move(mvec), tracing::trace_state_ptr());
    });
 }

 /** add entries to system_schema.* for the hardcoded system definitions */
-future<> save_system_keyspace_schema(cql3::query_processor& qp) {
-    return save_system_schema(qp, NAME);
+future<> save_system_keyspace_schema() {
+    return save_system_schema(NAME);
 }

 namespace v3 {
@@ -1208,7 +1208,42 @@ static void merge_tables_and_views(distributed<service::storage_proxy>& proxy,
        return create_table_from_mutations(proxy, std::move(sm));
    });
    auto views_diff = diff_table_or_view(proxy, std::move(views_before), std::move(views_after), [&] (schema_mutations sm) {
-        return create_view_from_mutations(proxy, std::move(sm));
+        // The view schema mutation should be created with reference to the base table schema because we definitely know it by now.
+        // If we don't do it we are leaving a window where write commands to this schema are illegal.
+        // There are 3 possibilities:
+        // 1. The table was altered - in this case we want the view to correspond to this new table schema.
+        // 2. The table was just created - the table is guarantied to be published with the view in that case.
+        // 3. The view itself was altered - in that case we already know the base table so we can take it from
+        //    the database object.
+        view_ptr vp = create_view_from_mutations(proxy, std::move(sm));
+        schema_ptr base_schema;
+        for (auto&& s : tables_diff.altered) {
+            if (s.new_schema.get()->ks_name() == vp->ks_name() && s.new_schema.get()->cf_name() == vp->view_info()->base_name() ) {
+                base_schema = s.new_schema;
+                break;
+            }
+        }
+        if (!base_schema) {
+            for (auto&& s : tables_diff.created) {
+                if (s.get()->ks_name() == vp->ks_name() && s.get()->cf_name() == vp->view_info()->base_name() ) {
+                    base_schema = s;
+                    break;
+                }
+            }
+        }
+
+        if (!base_schema) {
+            base_schema = proxy.local().local_db().find_schema(vp->ks_name(), vp->view_info()->base_name());
+        }
+
+        // Now when we have a referenced base - just in case we are registering an old view (this can happen in a mixed cluster)
+        // lets make it write enabled by updating it's compute columns.
+        view_ptr fixed_vp = maybe_fix_legacy_secondary_index_mv_schema(proxy.local().get_db().local(), vp, base_schema, preserve_version::yes);
+        if(fixed_vp) {
+            vp = fixed_vp;
+        }
+        vp->view_info()->set_base_info(vp->view_info()->make_base_dependent_view_info(*base_schema));
+        return vp;
    });

    proxy.local().get_db().invoke_on_all([&] (database& db) {
@@ -3033,8 +3068,7 @@ std::vector<sstring> all_table_names(schema_features features) {
           boost::adaptors::transformed([] (auto schema) { return schema->cf_name(); }));
 }

-future<> maybe_update_legacy_secondary_index_mv_schema(service::migration_manager& mm, database& db, view_ptr v) {
-    // TODO(sarna): Remove once computed columns are guaranteed to be featured in the whole cluster.
+view_ptr maybe_fix_legacy_secondary_index_mv_schema(database& db, const view_ptr& v, schema_ptr base_schema, preserve_version preserve_version) {
    // Legacy format for a secondary index used a hardcoded "token" column, which ensured a proper
    // order for indexed queries. This "token" column is now implemented as a computed column,
    // but for the sake of compatibility we assume that there might be indexes created in the legacy
@@ -3042,26 +3076,32 @@ future<> maybe_update_legacy_secondary_index_mv_schema(service::migration_manage
    // columns marked as computed (because they were either created on a node that supports computed
    // columns or were fixed by this utility function), it's safe to remove this function altogether.
    if (v->clustering_key_size() == 0) {
-        return make_ready_future<>();
+        return view_ptr(nullptr);
    }
    const column_definition& first_view_ck = v->clustering_key_columns().front();
    if (first_view_ck.is_computed()) {
-        return make_ready_future<>();
+        return view_ptr(nullptr);
+    }
+
+    if (!base_schema) {
+        base_schema = db.find_schema(v->view_info()->base_id());
    }

-    table& base = db.find_column_family(v->view_info()->base_id());
-    schema_ptr base_schema = base.schema();
    // If the first clustering key part of a view is a column with name not found in base schema,
    // it implies it might be backing an index created before computed columns were introduced,
    // and as such it must be recreated properly.
    if (!base_schema->columns_by_name().contains(first_view_ck.name())) {
        schema_builder builder{schema_ptr(v)};
-        builder.mark_column_computed(first_view_ck.name(), std::make_unique<legacy_token_column_computation>());
-        return mm.announce_view_update(view_ptr(builder.build()), true);
+        builder.mark_column_computed(first_view_ck.name(), std::make_unique<token_column_computation>());
+        if (preserve_version) {
+            builder.with_version(v->version());
+        }
+        return view_ptr(builder.build());
    }
-    return make_ready_future<>();
+    return view_ptr(nullptr);
 }

+
 namespace legacy {

 table_schema_version schema_mutations::digest() const {
@@ -3090,9 +3130,10 @@ static auto GET_COLUMN_MAPPING_QUERY = format("SELECT column_name, clustering_or
    db::schema_tables::SCYLLA_TABLE_SCHEMA_HISTORY);

 future<column_mapping> get_column_mapping(utils::UUID table_id, table_schema_version version) {
-    auto cm_fut = qctx->qp().execute_internal(
+    auto cm_fut = cql3::get_local_query_processor().execute_internal(
        GET_COLUMN_MAPPING_QUERY,
        db::consistency_level::LOCAL_ONE,
+        infinite_timeout_config,
        {table_id, version}
    );
    return cm_fut.then([version] (shared_ptr<cql3::untyped_result_set> results) {
@@ -3132,9 +3173,10 @@ future<column_mapping> get_column_mapping(utils::UUID table_id, table_schema_ver
 }

 future<bool> column_mapping_exists(utils::UUID table_id, table_schema_version version) {
-    return qctx->qp().execute_internal(
+    return cql3::get_local_query_processor().execute_internal(
        GET_COLUMN_MAPPING_QUERY,
        db::consistency_level::LOCAL_ONE,
+        infinite_timeout_config,
        {table_id, version}
    ).then([] (shared_ptr<cql3::untyped_result_set> results) {
        return !results->empty();
@@ -3145,11 +3187,12 @@ future<> drop_column_mapping(utils::UUID table_id, table_schema_version version)
    const static sstring DEL_COLUMN_MAPPING_QUERY =
        format("DELETE FROM system.{} WHERE cf_id = ? and schema_version = ?",
            db::schema_tables::SCYLLA_TABLE_SCHEMA_HISTORY);
-    return qctx->qp().execute_internal(
+    return cql3::get_local_query_processor().execute_internal(
        DEL_COLUMN_MAPPING_QUERY,
        db::consistency_level::LOCAL_ONE,
+        infinite_timeout_config,
        {table_id, version}).discard_result();
 }

 } // namespace schema_tables
-} // namespace schema
+} // namespace schema
--- a/db/schema_tables.hh
+++ b/db/schema_tables.hh
@@ -161,10 +161,10 @@ std::vector<schema_ptr> all_tables(schema_features);
 std::vector<sstring> all_table_names(schema_features);

 // saves/creates "ks" + all tables etc, while first deleting all old schema entries (will be rewritten)
-future<> save_system_schema(cql3::query_processor& qp, const sstring & ks);
+future<> save_system_schema(const sstring & ks);

 // saves/creates "system_schema" keyspace
-future<> save_system_keyspace_schema(cql3::query_processor& qp);
+future<> save_system_keyspace_schema();

 future<utils::UUID> calculate_schema_digest(distributed<service::storage_proxy>& proxy, schema_features);

@@ -238,7 +238,9 @@ std::vector<mutation> make_update_view_mutations(lw_shared_ptr<keyspace_metadata

 std::vector<mutation> make_drop_view_mutations(lw_shared_ptr<keyspace_metadata> keyspace, view_ptr view, api::timestamp_type timestamp);

-future<> maybe_update_legacy_secondary_index_mv_schema(service::migration_manager& mm, database& db, view_ptr v);
+class preserve_version_tag {};
+using preserve_version = bool_class<preserve_version_tag>;
+view_ptr maybe_fix_legacy_secondary_index_mv_schema(database& db, const view_ptr& v, schema_ptr base_schema, preserve_version preserve_version);

 sstring serialize_kind(column_kind kind);
 column_kind deserialize_kind(sstring kind);
--- a/db/size_estimates_virtual_reader.cc
+++ b/db/size_estimates_virtual_reader.cc
@@ -67,14 +67,7 @@ struct virtual_row_comparator {
 };

 // Iterating over the cartesian product of cf_names and token_ranges.
-class virtual_row_iterator {
-public:
-    using iterator_category = std::input_iterator_tag;
-    using value_type = const virtual_row;
-    using difference_type = std::ptrdiff_t;
-    using pointer = const virtual_row*;
-    using reference = const virtual_row&;
-private:
+class virtual_row_iterator : public std::iterator<std::input_iterator_tag, const virtual_row> {
    std::reference_wrapper<const std::vector<bytes>> _cf_names;
    std::reference_wrapper<const std::vector<token_range>> _ranges;
    size_t _cf_names_idx = 0;
@@ -208,10 +201,10 @@ static future<std::vector<token_range>> get_local_ranges(database& db) {
        // All queries will be on that table, where all entries are text and there's no notion of
        // token ranges form the CQL point of view.
        auto left_inf = boost::find_if(ranges, [] (auto&& r) {
-            return !r.start() || r.start()->value() == dht::minimum_token();
+            return r.end() && (!r.start() || r.start()->value() == dht::minimum_token());
        });
        auto right_inf = boost::find_if(ranges, [] (auto&& r) {
-            return !r.end() || r.start()->value() == dht::maximum_token();
+            return r.start() && (!r.end() || r.end()->value() == dht::maximum_token());
        });
        if (left_inf != right_inf && left_inf != ranges.end() && right_inf != ranges.end()) {
            local_ranges.push_back(token_range{to_bytes(right_inf->start()), to_bytes(left_inf->end())});
--- a/db/snapshot-ctl.cc
+++ b/db/snapshot-ctl.cc
@@ -43,9 +43,13 @@

 namespace db {

-future<> snapshot_ctl::check_snapshot_not_exist(sstring ks_name, sstring name) {
+future<> snapshot_ctl::check_snapshot_not_exist(sstring ks_name, sstring name, std::optional<std::vector<sstring>> filter) {
    auto& ks = _db.local().find_keyspace(ks_name);
-    return parallel_for_each(ks.metadata()->cf_meta_data(), [this, ks_name = std::move(ks_name), name = std::move(name)] (auto& pair) {
+    return parallel_for_each(ks.metadata()->cf_meta_data(), [this, ks_name = std::move(ks_name), name = std::move(name), filter = std::move(filter)] (auto& pair) {
+        auto& cf_name = pair.first;
+        if (filter && std::find(filter->begin(), filter->end(), cf_name) == filter->end()) {
+            return make_ready_future<>();
+        }        
        auto& cf = _db.local().find_column_family(pair.second);
        return cf.snapshot_exists(name).then([ks_name = std::move(ks_name), name] (bool exists) {
            if (exists) {
@@ -111,7 +115,7 @@ future<> snapshot_ctl::take_column_family_snapshot(sstring ks_name, std::vector<
    }

    return run_snapshot_modify_operation([this, ks_name = std::move(ks_name), tables = std::move(tables), tag = std::move(tag)] {
-        return check_snapshot_not_exist(ks_name, tag).then([this, ks_name, tables = std::move(tables), tag] {
+        return check_snapshot_not_exist(ks_name, tag, tables).then([this, ks_name, tables, tag] {
            return do_with(std::vector<sstring>(std::move(tables)),[this, ks_name, tag](const std::vector<sstring>& tables) {
                return do_for_each(tables, [ks_name, tag, this] (const sstring& table_name) {
                    if (table_name.find(".") != sstring::npos) {
--- a/db/snapshot-ctl.hh
+++ b/db/snapshot-ctl.hh
@@ -40,6 +40,8 @@

 #pragma once

+#include <vector>
+
 #include <seastar/core/sharded.hh>
 #include <seastar/core/future.hh>
 #include "database.hh"
@@ -112,7 +114,7 @@ private:
    seastar::rwlock _lock;
    seastar::gate _ops;

-    future<> check_snapshot_not_exist(sstring ks_name, sstring name);
+    future<> check_snapshot_not_exist(sstring ks_name, sstring name, std::optional<std::vector<sstring>> filter = {});

    template <typename Func>
    std::result_of_t<Func()> run_snapshot_modify_operation(Func&&);
--- a/db/system_distributed_keyspace.cc
+++ b/db/system_distributed_keyspace.cc
@@ -155,20 +155,17 @@ future<> system_distributed_keyspace::stop() {
    return make_ready_future<>();
 }

-static service::query_state& internal_distributed_query_state() {
+static const timeout_config internal_distributed_timeout_config = [] {
    using namespace std::chrono_literals;
    const auto t = 10s;
-    static timeout_config tc{ t, t, t, t, t, t, t };
-    static thread_local service::client_state cs(service::client_state::internal_tag{}, tc);
-    static thread_local service::query_state qs(cs, empty_service_permit());
-    return qs;
-};
+    return timeout_config{ t, t, t, t, t, t, t };
+}();

 future<std::unordered_map<utils::UUID, sstring>> system_distributed_keyspace::view_status(sstring ks_name, sstring view_name) const {
    return _qp.execute_internal(
            format("SELECT host_id, status FROM {}.{} WHERE keyspace_name = ? AND view_name = ?", NAME, VIEW_BUILD_STATUS),
            db::consistency_level::ONE,
-            internal_distributed_query_state(),
+            internal_distributed_timeout_config,
            { std::move(ks_name), std::move(view_name) },
            false).then([this] (::shared_ptr<cql3::untyped_result_set> cql_result) {
        return boost::copy_range<std::unordered_map<utils::UUID, sstring>>(*cql_result
@@ -185,7 +182,7 @@ future<> system_distributed_keyspace::start_view_build(sstring ks_name, sstring
        return _qp.execute_internal(
                format("INSERT INTO {}.{} (keyspace_name, view_name, host_id, status) VALUES (?, ?, ?, ?)", NAME, VIEW_BUILD_STATUS),
                db::consistency_level::ONE,
-                internal_distributed_query_state(),
+                internal_distributed_timeout_config,
                { std::move(ks_name), std::move(view_name), std::move(host_id), "STARTED" },
                false).discard_result();
    });
@@ -196,7 +193,7 @@ future<> system_distributed_keyspace::finish_view_build(sstring ks_name, sstring
        return _qp.execute_internal(
                format("UPDATE {}.{} SET status = ? WHERE keyspace_name = ? AND view_name = ? AND host_id = ?", NAME, VIEW_BUILD_STATUS),
                db::consistency_level::ONE,
-                internal_distributed_query_state(),
+                internal_distributed_timeout_config,
                { "SUCCESS", std::move(ks_name), std::move(view_name), std::move(host_id) },
                false).discard_result();
    });
@@ -206,7 +203,7 @@ future<> system_distributed_keyspace::remove_view(sstring ks_name, sstring view_
    return _qp.execute_internal(
            format("DELETE FROM {}.{} WHERE keyspace_name = ? AND view_name = ?", NAME, VIEW_BUILD_STATUS),
            db::consistency_level::ONE,
-            internal_distributed_query_state(),
+            internal_distributed_timeout_config,
            { std::move(ks_name), std::move(view_name) },
            false).discard_result();
 }
@@ -284,7 +281,7 @@ system_distributed_keyspace::insert_cdc_topology_description(
    return _qp.execute_internal(
            format("INSERT INTO {}.{} (time, description) VALUES (?,?)", NAME, CDC_TOPOLOGY_DESCRIPTION),
            quorum_if_many(ctx.num_token_owners),
-            internal_distributed_query_state(),
+            internal_distributed_timeout_config,
            { time, make_list_value(cdc_generation_description_type, prepare_cdc_generation_description(description)) },
            false).discard_result();
 }
@@ -296,7 +293,7 @@ system_distributed_keyspace::read_cdc_topology_description(
    return _qp.execute_internal(
            format("SELECT description FROM {}.{} WHERE time = ?", NAME, CDC_TOPOLOGY_DESCRIPTION),
            quorum_if_many(ctx.num_token_owners),
-            internal_distributed_query_state(),
+            internal_distributed_timeout_config,
            { time },
            false
    ).then([] (::shared_ptr<cql3::untyped_result_set> cql_result) -> std::optional<cdc::topology_description> {
@@ -324,7 +321,7 @@ system_distributed_keyspace::expire_cdc_topology_description(
    return _qp.execute_internal(
            format("UPDATE {}.{} SET expired = ? WHERE time = ?", NAME, CDC_TOPOLOGY_DESCRIPTION),
            quorum_if_many(ctx.num_token_owners),
-            internal_distributed_query_state(),
+            internal_distributed_timeout_config,
            { expiration_time, streams_ts },
            false).discard_result();
 }
@@ -345,7 +342,7 @@ system_distributed_keyspace::create_cdc_desc(
    return _qp.execute_internal(
            format("INSERT INTO {}.{} (time, streams) VALUES (?,?)", NAME, CDC_DESC),
            quorum_if_many(ctx.num_token_owners),
-            internal_distributed_query_state(),
+            internal_distributed_timeout_config,
            { time, make_set_value(cdc_streams_set_type, prepare_cdc_streams(streams)) },
            false).discard_result();
 }
@@ -358,7 +355,7 @@ system_distributed_keyspace::expire_cdc_desc(
    return _qp.execute_internal(
            format("UPDATE {}.{} SET expired = ? WHERE time = ?", NAME, CDC_DESC),
            quorum_if_many(ctx.num_token_owners),
-            internal_distributed_query_state(),
+            internal_distributed_timeout_config,
            { expiration_time, streams_ts },
            false).discard_result();
 }
@@ -370,7 +367,7 @@ system_distributed_keyspace::cdc_desc_exists(
    return _qp.execute_internal(
            format("SELECT time FROM {}.{} WHERE time = ?", NAME, CDC_DESC),
            quorum_if_many(ctx.num_token_owners),
-            internal_distributed_query_state(),
+            internal_distributed_timeout_config,
            { streams_ts },
            false
    ).then([] (::shared_ptr<cql3::untyped_result_set> cql_result) -> bool {
@@ -383,7 +380,7 @@ system_distributed_keyspace::cdc_get_versioned_streams(context ctx) {
    return _qp.execute_internal(
            format("SELECT * FROM {}.{}", NAME, CDC_DESC),
            quorum_if_many(ctx.num_token_owners),
-            internal_distributed_query_state(),
+            internal_distributed_timeout_config,
            {},
            false
    ).then([] (::shared_ptr<cql3::untyped_result_set> cql_result) {
--- a/db/system_keyspace.cc
+++ b/db/system_keyspace.cc
@@ -1157,20 +1157,20 @@ schema_ptr aggregates() {

 } //</legacy>

-static future<> setup_version(distributed<gms::feature_service>& feat, sharded<netw::messaging_service>& ms, const db::config& cfg) {
-    return gms::inet_address::lookup(cfg.rpc_address()).then([&feat, &ms, &cfg](gms::inet_address a) {
+static future<> setup_version(distributed<gms::feature_service>& feat, sharded<netw::messaging_service>& ms) {
+    return gms::inet_address::lookup(qctx->db().get_config().rpc_address()).then([&feat, &ms](gms::inet_address a) {
        sstring req = sprint("INSERT INTO system.%s (key, release_version, cql_version, thrift_version, native_protocol_version, data_center, rack, partitioner, rpc_address, broadcast_address, listen_address, supported_features) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)"
                        , db::system_keyspace::LOCAL);
        auto& snitch = locator::i_endpoint_snitch::get_local_snitch_ptr();

-        return qctx->execute_cql(req, sstring(db::system_keyspace::LOCAL),
+        return execute_cql(req, sstring(db::system_keyspace::LOCAL),
                             version::release(),
                             cql3::query_processor::CQL_VERSION,
                             ::cassandra::thrift_version,
                             to_sstring(cql_serialization_format::latest_version),
                             snitch->get_datacenter(utils::fb_utilities::get_broadcast_address()),
                             snitch->get_rack(utils::fb_utilities::get_broadcast_address()),
-                             sstring(cfg.partitioner()),
+                             sstring(qctx->db().get_config().partitioner()),
                             a.addr(),
                             utils::fb_utilities::get_broadcast_address().addr(),
                             ms.local().listen_address().addr(),
@@ -1179,7 +1179,7 @@ static future<> setup_version(distributed<gms::feature_service>& feat, sharded<n
    });
 }

-future<> check_health(const sstring& cluster_name);
+future<> check_health();
 future<> force_blocking_flush(sstring cfname);

 // Changing the real load_dc_rack_info into a future would trigger a tidal wave of futurization that would spread
@@ -1199,7 +1199,7 @@ struct local_cache {
 static distributed<local_cache> _local_cache;

 static future<> build_dc_rack_info() {
-    return qctx->execute_cql(format("SELECT peer, data_center, rack from system.{}", PEERS)).then([] (::shared_ptr<cql3::untyped_result_set> msg) {
+    return execute_cql(format("SELECT peer, data_center, rack from system.{}", PEERS)).then([] (::shared_ptr<cql3::untyped_result_set> msg) {
        return do_for_each(*msg, [] (auto& row) {
            net::inet_address peer = row.template get_as<net::inet_address>("peer");
            if (!row.has("data_center") || !row.has("rack")) {
@@ -1221,7 +1221,7 @@ static future<> build_dc_rack_info() {

 static future<> build_bootstrap_info() {
    sstring req = format("SELECT bootstrapped FROM system.{} WHERE key = ? ", LOCAL);
-    return qctx->execute_cql(req, sstring(LOCAL)).then([] (auto msg) {
+    return execute_cql(req, sstring(LOCAL)).then([] (auto msg) {
        static auto state_map = std::unordered_map<sstring, bootstrap_state>({
            { "NEEDS_BOOTSTRAP", bootstrap_state::NEEDS_BOOTSTRAP },
            { "COMPLETED", bootstrap_state::COMPLETED },
@@ -1255,8 +1255,8 @@ future<> deinit_local_cache() {
    return _local_cache.stop();
 }

-void minimal_setup(distributed<cql3::query_processor>& qp) {
-    qctx = std::make_unique<query_context>(qp);
+void minimal_setup(distributed<database>& db, distributed<cql3::query_processor>& qp) {
+    qctx = std::make_unique<query_context>(db, qp);
 }

 static future<> cache_truncation_record(distributed<database>& db);
@@ -1265,8 +1265,8 @@ future<> setup(distributed<database>& db,
               distributed<cql3::query_processor>& qp,
               distributed<gms::feature_service>& feat,
               sharded<netw::messaging_service>& ms) {
-    const db::config& cfg = db.local().get_config();
-    return setup_version(feat, ms, cfg).then([&db] {
+    minimal_setup(db, qp);
+    return setup_version(feat, ms).then([&db] {
        return update_schema_version(db.local().get_version());
    }).then([] {
        return init_local_cache();
@@ -1274,13 +1274,13 @@ future<> setup(distributed<database>& db,
        return build_dc_rack_info();
    }).then([] {
        return build_bootstrap_info();
-    }).then([&cfg] {
-        return check_health(cfg.cluster_name());
-    }).then([&qp] {
-        return db::schema_tables::save_system_keyspace_schema(qp.local());
-    }).then([&qp] {
+    }).then([] {
+        return check_health();
+    }).then([] {
+        return db::schema_tables::save_system_keyspace_schema();
+    }).then([] {
        // #2514 - make sure "system" is written to system_schema.keyspaces.
-        return db::schema_tables::save_system_schema(qp.local(), NAME);
+        return db::schema_tables::save_system_schema(NAME);
    }).then([&db] {
        return cache_truncation_record(db);
    }).then([&ms] {
@@ -1314,6 +1314,16 @@ typedef std::unordered_map<truncation_key, truncation_record> truncation_map;

 static constexpr uint8_t current_version = 1;

+/**
+ * This method is used to remove information about truncation time for specified column family
+ */
+future<> remove_truncation_record(utils::UUID id) {
+    sstring req = format("DELETE * from system.{} WHERE table_uuid = ?", TRUNCATED);
+    return qctx->qp().execute_internal(req, {id}).discard_result().then([] {
+        return force_blocking_flush(TRUNCATED);
+    });
+}
+
 static future<truncation_record> get_truncation_record(utils::UUID cf_id) {
    sstring req = format("SELECT * from system.{} WHERE table_uuid = ?", TRUNCATED);
    return qctx->qp().execute_internal(req, {cf_id}).then([cf_id](::shared_ptr<cql3::untyped_result_set> rs) {
@@ -1340,13 +1350,16 @@ static future<> cache_truncation_record(distributed<database>& db) {
            auto table_uuid = row.get_as<utils::UUID>("table_uuid");
            auto ts = row.get_as<db_clock::time_point>("truncated_at");

-            return db.invoke_on_all([table_uuid, ts] (database& db) mutable {
-                try {
-                    table& cf = db.find_column_family(table_uuid);
-                    cf.cache_truncation_record(ts);
-                } catch (no_such_column_family&) {
-                    slogger.debug("Skip caching truncation time for {} since the table is no longer present", table_uuid);
-                }
+            auto cpus = boost::irange(0u, smp::count);
+            return parallel_for_each(cpus.begin(), cpus.end(), [table_uuid, ts, &db] (unsigned int c) mutable {
+                return smp::submit_to(c, [table_uuid, ts, &db] () mutable {
+                    try {
+                        table& cf = db.local().find_column_family(table_uuid);
+                        cf.cache_truncation_record(ts);
+                    } catch (no_such_column_family&) {
+                        slogger.debug("Skip caching truncation time for {} since the table is no longer present", table_uuid);
+                    }
+                });
            });
        });
    });
@@ -1412,7 +1425,7 @@ future<> update_tokens(gms::inet_address ep, const std::unordered_set<dht::token

    sstring req = format("INSERT INTO system.{} (peer, tokens) VALUES (?, ?)", PEERS);
    auto set_type = set_type_impl::get_instance(utf8_type, true);
-    return qctx->execute_cql(req, ep.addr(), make_set_value(set_type, prepare_tokens(tokens))).discard_result().then([] {
+    return execute_cql(req, ep.addr(), make_set_value(set_type, prepare_tokens(tokens))).discard_result().then([] {
        return force_blocking_flush(PEERS);
    });
 }
@@ -1420,7 +1433,7 @@ future<> update_tokens(gms::inet_address ep, const std::unordered_set<dht::token

 future<std::unordered_map<gms::inet_address, std::unordered_set<dht::token>>> load_tokens() {
    sstring req = format("SELECT peer, tokens FROM system.{}", PEERS);
-    return qctx->execute_cql(req).then([] (::shared_ptr<cql3::untyped_result_set> cql_result) {
+    return execute_cql(req).then([] (::shared_ptr<cql3::untyped_result_set> cql_result) {
        std::unordered_map<gms::inet_address, std::unordered_set<dht::token>> ret;
        for (auto& row : *cql_result) {
            auto peer = gms::inet_address(row.get_as<net::inet_address>("peer"));
@@ -1438,7 +1451,7 @@ future<std::unordered_map<gms::inet_address, std::unordered_set<dht::token>>> lo

 future<std::unordered_map<gms::inet_address, utils::UUID>> load_host_ids() {
    sstring req = format("SELECT peer, host_id FROM system.{}", PEERS);
-    return qctx->execute_cql(req).then([] (::shared_ptr<cql3::untyped_result_set> cql_result) {
+    return execute_cql(req).then([] (::shared_ptr<cql3::untyped_result_set> cql_result) {
        std::unordered_map<gms::inet_address, utils::UUID> ret;
        for (auto& row : *cql_result) {
            auto peer = gms::inet_address(row.get_as<net::inet_address>("peer"));
@@ -1452,7 +1465,7 @@ future<std::unordered_map<gms::inet_address, utils::UUID>> load_host_ids() {

 future<std::unordered_map<gms::inet_address, sstring>> load_peer_features() {
    sstring req = format("SELECT peer, supported_features FROM system.{}", PEERS);
-    return qctx->execute_cql(req).then([] (::shared_ptr<cql3::untyped_result_set> cql_result) {
+    return execute_cql(req).then([] (::shared_ptr<cql3::untyped_result_set> cql_result) {
        std::unordered_map<gms::inet_address, sstring> ret;
        for (auto& row : *cql_result) {
            if (row.has("supported_features")) {
@@ -1466,14 +1479,14 @@ future<std::unordered_map<gms::inet_address, sstring>> load_peer_features() {

 future<> update_preferred_ip(gms::inet_address ep, gms::inet_address preferred_ip) {
    sstring req = format("INSERT INTO system.{} (peer, preferred_ip) VALUES (?, ?)", PEERS);
-    return qctx->execute_cql(req, ep.addr(), preferred_ip.addr()).discard_result().then([] {
+    return execute_cql(req, ep.addr(), preferred_ip.addr()).discard_result().then([] {
        return force_blocking_flush(PEERS);
    });
 }

 future<std::unordered_map<gms::inet_address, gms::inet_address>> get_preferred_ips() {
    sstring req = format("SELECT peer, preferred_ip FROM system.{}", PEERS);
-    return qctx->execute_cql(req).then([] (::shared_ptr<cql3::untyped_result_set> cql_res_set) {
+    return execute_cql(req).then([] (::shared_ptr<cql3::untyped_result_set> cql_res_set) {
        std::unordered_map<gms::inet_address, gms::inet_address> res;

        for (auto& r : *cql_res_set) {
@@ -1514,7 +1527,7 @@ future<> update_peer_info(gms::inet_address ep, sstring column_name, Value value

    return update_cached_values(ep, column_name, value).then([ep, column_name, value] {
        sstring req = format("INSERT INTO system.{} (peer, {}) VALUES (?, ?)", PEERS, column_name);
-        return qctx->execute_cql(req, ep.addr(), value).discard_result();
+        return execute_cql(req, ep.addr(), value).discard_result();
    });
 }
 // sets are not needed, since tokens are updated by another method
@@ -1522,14 +1535,20 @@ template future<> update_peer_info<sstring>(gms::inet_address ep, sstring column
 template future<> update_peer_info<utils::UUID>(gms::inet_address ep, sstring column_name, utils::UUID);
 template future<> update_peer_info<net::inet_address>(gms::inet_address ep, sstring column_name, net::inet_address);

+future<> update_hints_dropped(gms::inet_address ep, utils::UUID time_period, int value) {
+    // with 30 day TTL
+    sstring req = format("UPDATE system.{} USING TTL 2592000 SET hints_dropped[ ? ] = ? WHERE peer = ?", PEER_EVENTS);
+    return execute_cql(req, time_period, value, ep.addr()).discard_result();
+}
+
 future<> set_scylla_local_param(const sstring& key, const sstring& value) {
    sstring req = format("UPDATE system.{} SET value = ? WHERE key = ?", SCYLLA_LOCAL);
-    return qctx->execute_cql(req, value, key).discard_result();
+    return execute_cql(req, value, key).discard_result();
 }

 future<std::optional<sstring>> get_scylla_local_param(const sstring& key){
    sstring req = format("SELECT value FROM system.{} WHERE key = ?", SCYLLA_LOCAL);
-    return qctx->execute_cql(req, key).then([] (::shared_ptr<cql3::untyped_result_set> res) {
+    return execute_cql(req, key).then([] (::shared_ptr<cql3::untyped_result_set> res) {
        if (res->empty() || !res->one().has("value")) {
            return std::optional<sstring>();
        }
@@ -1539,7 +1558,7 @@ future<std::optional<sstring>> get_scylla_local_param(const sstring& key){

 future<> update_schema_version(utils::UUID version) {
    sstring req = format("INSERT INTO system.{} (key, schema_version) VALUES (?, ?)", LOCAL);
-    return qctx->execute_cql(req, sstring(LOCAL), version).discard_result();
+    return execute_cql(req, sstring(LOCAL), version).discard_result();
 }

 /**
@@ -1550,7 +1569,7 @@ future<> remove_endpoint(gms::inet_address ep) {
        lc._cached_dc_rack_info.erase(ep);
    }).then([ep] {
        sstring req = format("DELETE FROM system.{} WHERE peer = ?", PEERS);
-        return qctx->execute_cql(req, ep.addr()).discard_result();
+        return execute_cql(req, ep.addr()).discard_result();
    }).then([] {
        return force_blocking_flush(PEERS);
    });
@@ -1563,22 +1582,23 @@ future<> update_tokens(const std::unordered_set<dht::token>& tokens) {

    sstring req = format("INSERT INTO system.{} (key, tokens) VALUES (?, ?)", LOCAL);
    auto set_type = set_type_impl::get_instance(utf8_type, true);
-    return qctx->execute_cql(req, sstring(LOCAL), make_set_value(set_type, prepare_tokens(tokens))).discard_result().then([] {
+    return execute_cql(req, sstring(LOCAL), make_set_value(set_type, prepare_tokens(tokens))).discard_result().then([] {
        return force_blocking_flush(LOCAL);
    });
 }

 future<> update_cdc_streams_timestamp(db_clock::time_point tp) {
-    return qctx->execute_cql(format("INSERT INTO system.{} (key, streams_timestamp) VALUES (?, ?)",
+    return execute_cql(format("INSERT INTO system.{} (key, streams_timestamp) VALUES (?, ?)",
                v3::CDC_LOCAL), sstring(v3::CDC_LOCAL), tp)
            .discard_result().then([] { return force_blocking_flush(v3::CDC_LOCAL); });
 }

 future<> force_blocking_flush(sstring cfname) {
    assert(qctx);
-    return qctx->_qp.invoke_on_all([cfname = std::move(cfname)] (cql3::query_processor& qp) {
+    return qctx->_db.invoke_on_all([cfname = std::move(cfname)](database& db) {
        // if (!Boolean.getBoolean("cassandra.unsafesystem"))
-        return qp.db().flush(NAME, cfname);
+        column_family& cf = db.find_column_family(NAME, cfname);
+        return cf.flush();
    });
 }

@@ -1588,16 +1608,17 @@ future<> force_blocking_flush(sstring cfname) {
 * 2. no files are there: great (new node is assumed)
 * 3. files are present but you can't read them: bad
 */
-future<> check_health(const sstring& cluster_name) {
+future<> check_health() {
    using namespace cql_transport::messages;
    sstring req = format("SELECT cluster_name FROM system.{} WHERE key=?", LOCAL);
-    return qctx->execute_cql(req, sstring(LOCAL)).then([&cluster_name] (::shared_ptr<cql3::untyped_result_set> msg) {
+    return execute_cql(req, sstring(LOCAL)).then([] (::shared_ptr<cql3::untyped_result_set> msg) {
        if (msg->empty() || !msg->one().has("cluster_name")) {
            // this is a brand new node
            sstring ins_req = format("INSERT INTO system.{} (key, cluster_name) VALUES (?, ?)", LOCAL);
-            return qctx->execute_cql(ins_req, sstring(LOCAL), cluster_name).discard_result();
+            return execute_cql(ins_req, sstring(LOCAL), qctx->db().get_config().cluster_name()).discard_result();
        } else {
            auto saved_cluster_name = msg->one().get_as<sstring>("cluster_name");
+            auto cluster_name = qctx->db().get_config().cluster_name();

            if (cluster_name != saved_cluster_name) {
                throw exceptions::configuration_exception("Saved cluster name " + saved_cluster_name + " != configured name " + cluster_name);
@@ -1610,7 +1631,7 @@ future<> check_health(const sstring& cluster_name) {

 future<std::unordered_set<dht::token>> get_saved_tokens() {
    sstring req = format("SELECT tokens FROM system.{} WHERE key = ?", LOCAL);
-    return qctx->execute_cql(req, sstring(LOCAL)).then([] (auto msg) {
+    return execute_cql(req, sstring(LOCAL)).then([] (auto msg) {
        if (msg->empty() || !msg->one().has("tokens")) {
            return make_ready_future<std::unordered_set<dht::token>>();
        }
@@ -1636,7 +1657,7 @@ future<std::unordered_set<dht::token>> get_local_tokens() {
 }

 future<std::optional<db_clock::time_point>> get_saved_cdc_streams_timestamp() {
-    return qctx->execute_cql(format("SELECT streams_timestamp FROM system.{} WHERE key = ?", v3::CDC_LOCAL), sstring(v3::CDC_LOCAL))
+    return execute_cql(format("SELECT streams_timestamp FROM system.{} WHERE key = ?", v3::CDC_LOCAL), sstring(v3::CDC_LOCAL))
            .then([] (::shared_ptr<cql3::untyped_result_set> msg)-> std::optional<db_clock::time_point> {
        if (msg->empty() || !msg->one().has("streams_timestamp")) {
            return {};
@@ -1673,7 +1694,7 @@ future<> set_bootstrap_state(bootstrap_state state) {
    sstring state_name = state_to_name.at(state);

    sstring req = format("INSERT INTO system.{} (key, bootstrapped) VALUES (?, ?)", LOCAL);
-    return qctx->execute_cql(req, sstring(LOCAL), state_name).discard_result().then([state] {
+    return execute_cql(req, sstring(LOCAL), state_name).discard_result().then([state] {
        return force_blocking_flush(LOCAL).then([state] {
            return _local_cache.invoke_on_all([state] (local_cache& lc) {
                lc._state = state;
@@ -1743,7 +1764,7 @@ void make(database& db, bool durable, bool volatile_testing_only) {
            // don't make system keyspace writes wait for user writes (if under pressure)
            kscfg.dirty_memory_manager = &db._system_dirty_memory_manager;
            keyspace _ks{ksm, std::move(kscfg)};
-            auto rs(locator::abstract_replication_strategy::create_replication_strategy(NAME, "LocalStrategy", db.get_shared_token_metadata(), ksm->strategy_options()));
+            auto rs(locator::abstract_replication_strategy::create_replication_strategy(NAME, "LocalStrategy", db.get_token_metadata(), ksm->strategy_options()));
            _ks.set_replication_strategy(std::move(rs));
            db.add_keyspace(ks_name, std::move(_ks));
        }
@@ -1763,7 +1784,7 @@ void make(database& db, bool durable, bool volatile_testing_only) {
 future<utils::UUID> get_local_host_id() {
    using namespace cql_transport::messages;
    sstring req = format("SELECT host_id FROM system.{} WHERE key=?", LOCAL);
-    return qctx->execute_cql(req, sstring(LOCAL)).then([] (::shared_ptr<cql3::untyped_result_set> msg) {
+    return execute_cql(req, sstring(LOCAL)).then([] (::shared_ptr<cql3::untyped_result_set> msg) {
        auto new_id = [] {
            auto host_id = utils::make_random_uuid();
            return set_local_host_id(host_id);
@@ -1779,7 +1800,7 @@ future<utils::UUID> get_local_host_id() {

 future<utils::UUID> set_local_host_id(const utils::UUID& host_id) {
    sstring req = format("INSERT INTO system.{} (key, host_id) VALUES (?, ?)", LOCAL);
-    return qctx->execute_cql(req, sstring(LOCAL), host_id).then([] (auto msg) {
+    return execute_cql(req, sstring(LOCAL), host_id).then([] (auto msg) {
        return force_blocking_flush(LOCAL);
    }).then([host_id] {
        return host_id;
@@ -1791,6 +1812,23 @@ load_dc_rack_info() {
    return _local_cache.local()._cached_dc_rack_info;
 }

+
+future<foreign_ptr<lw_shared_ptr<reconcilable_result>>>
+query_mutations(distributed<service::storage_proxy>& proxy, const sstring& cf_name) {
+    return query_mutations(proxy, db::system_keyspace::NAME, cf_name);
+}
+
+future<lw_shared_ptr<query::result_set>>
+query(distributed<service::storage_proxy>& proxy, const sstring& cf_name) {
+    return query(proxy, db::system_keyspace::NAME, cf_name);
+}
+
+future<lw_shared_ptr<query::result_set>>
+query(distributed<service::storage_proxy>& proxy, const sstring& cf_name, const dht::decorated_key& key, query::clustering_range row_range)
+{
+    return query(proxy, db::system_keyspace::NAME, cf_name, key, row_range);
+}
+
 future<foreign_ptr<lw_shared_ptr<reconcilable_result>>>
 query_mutations(distributed<service::storage_proxy>& proxy, const sstring& ks_name, const sstring& cf_name) {
    database& db = proxy.local().get_db().local();
@@ -1854,7 +1892,7 @@ future<> update_compaction_history(utils::UUID uuid, sstring ksname, sstring cfn
                    , COMPACTION_HISTORY);

    db_clock::time_point tp{db_clock::duration{compacted_at}};
-    return qctx->execute_cql(req, uuid, ksname, cfname, tp, bytes_in, bytes_out,
+    return execute_cql(req, uuid, ksname, cfname, tp, bytes_in, bytes_out,
                       make_map_value(map_type, prepare_rows_merged(rows_merged))).discard_result().handle_exception([] (auto ep) {
        slogger.error("update compaction history failed: {}: ignored", ep);
    });
@@ -1931,7 +1969,7 @@ mutation make_size_estimates_mutation(const sstring& ks, std::vector<range_estim
 future<> register_view_for_building(sstring ks_name, sstring view_name, const dht::token& token) {
    sstring req = format("INSERT INTO system.{} (keyspace_name, view_name, generation_number, cpu_id, first_token) VALUES (?, ?, ?, ?, ?)",
            v3::SCYLLA_VIEWS_BUILDS_IN_PROGRESS);
-    return qctx->execute_cql(
+    return execute_cql(
            std::move(req),
            std::move(ks_name),
            std::move(view_name),
@@ -1943,7 +1981,7 @@ future<> register_view_for_building(sstring ks_name, sstring view_name, const dh
 future<> update_view_build_progress(sstring ks_name, sstring view_name, const dht::token& token) {
    sstring req = format("INSERT INTO system.{} (keyspace_name, view_name, next_token, cpu_id) VALUES (?, ?, ?, ?)",
            v3::SCYLLA_VIEWS_BUILDS_IN_PROGRESS);
-    return qctx->execute_cql(
+    return execute_cql(
            std::move(req),
            std::move(ks_name),
            std::move(view_name),
@@ -1952,14 +1990,14 @@ future<> update_view_build_progress(sstring ks_name, sstring view_name, const dh
 }

 future<> remove_view_build_progress_across_all_shards(sstring ks_name, sstring view_name) {
-    return qctx->execute_cql(
+    return execute_cql(
            format("DELETE FROM system.{} WHERE keyspace_name = ? AND view_name = ?", v3::SCYLLA_VIEWS_BUILDS_IN_PROGRESS),
            std::move(ks_name),
            std::move(view_name)).discard_result();
 }

 future<> remove_view_build_progress(sstring ks_name, sstring view_name) {
-    return qctx->execute_cql(
+    return execute_cql(
            format("DELETE FROM system.{} WHERE keyspace_name = ? AND view_name = ? AND cpu_id = ?", v3::SCYLLA_VIEWS_BUILDS_IN_PROGRESS),
            std::move(ks_name),
            std::move(view_name),
@@ -1967,21 +2005,21 @@ future<> remove_view_build_progress(sstring ks_name, sstring view_name) {
 }

 future<> mark_view_as_built(sstring ks_name, sstring view_name) {
-    return qctx->execute_cql(
+    return execute_cql(
            format("INSERT INTO system.{} (keyspace_name, view_name) VALUES (?, ?)", v3::BUILT_VIEWS),
            std::move(ks_name),
            std::move(view_name)).discard_result();
 }

 future<> remove_built_view(sstring ks_name, sstring view_name) {
-    return qctx->execute_cql(
+    return execute_cql(
            format("DELETE FROM system.{} WHERE keyspace_name = ? AND view_name = ?", v3::BUILT_VIEWS),
            std::move(ks_name),
            std::move(view_name)).discard_result();
 }

 future<std::vector<view_name>> load_built_views() {
-    return qctx->execute_cql(format("SELECT * FROM system.{}", v3::BUILT_VIEWS)).then([] (::shared_ptr<cql3::untyped_result_set> cql_result) {
+    return execute_cql(format("SELECT * FROM system.{}", v3::BUILT_VIEWS)).then([] (::shared_ptr<cql3::untyped_result_set> cql_result) {
        return boost::copy_range<std::vector<view_name>>(*cql_result
                | boost::adaptors::transformed([] (const cql3::untyped_result_set::row& row) {
            auto ks_name = row.get_as<sstring>("keyspace_name");
@@ -1992,7 +2030,7 @@ future<std::vector<view_name>> load_built_views() {
 }

 future<std::vector<view_build_progress>> load_view_build_progress() {
-    return qctx->execute_cql(format("SELECT keyspace_name, view_name, first_token, next_token, cpu_id FROM system.{}",
+    return execute_cql(format("SELECT keyspace_name, view_name, first_token, next_token, cpu_id FROM system.{}",
            v3::SCYLLA_VIEWS_BUILDS_IN_PROGRESS)).then([] (::shared_ptr<cql3::untyped_result_set> cql_result) {
        std::vector<view_build_progress> progress;
        for (auto& row : *cql_result) {
@@ -2013,7 +2051,7 @@ future<std::vector<view_build_progress>> load_view_build_progress() {
        }
        return progress;
    }).handle_exception([] (const std::exception_ptr& eptr) {
-        slogger.warn("Failed to load view build progress: {}", eptr);
+        slogger.error("Failed to load view build progress: {}", eptr);
        return std::vector<view_build_progress>();
    });
 }
@@ -2023,7 +2061,7 @@ future<service::paxos::paxos_state> load_paxos_state(partition_key_view key, sch
    static auto cql = format("SELECT * FROM system.{} WHERE row_key = ? AND cf_id = ?", PAXOS);
    // FIXME: we need execute_cql_with_now()
    (void)now;
-    auto f = qctx->execute_cql_with_timeout(cql, timeout, to_legacy(*key.get_compound_type(*s), key.representation()), s->id());
+    auto f = execute_cql_with_timeout(cql, timeout, to_legacy(*key.get_compound_type(*s), key.representation()), s->id());
    return f.then([s, key = std::move(key)] (shared_ptr<cql3::untyped_result_set> results) mutable {
        if (results->empty()) {
            return service::paxos::paxos_state();
@@ -2062,7 +2100,7 @@ static int32_t paxos_ttl_sec(const schema& s) {

 future<> save_paxos_promise(const schema& s, const partition_key& key, const utils::UUID& ballot, db::timeout_clock::time_point timeout) {
    static auto cql = format("UPDATE system.{} USING TIMESTAMP ? AND TTL ? SET promise = ? WHERE row_key = ? AND cf_id = ?", PAXOS);
-    return qctx->execute_cql_with_timeout(cql,
+    return execute_cql_with_timeout(cql,
            timeout,
            utils::UUID_gen::micros_timestamp(ballot),
            paxos_ttl_sec(s),
@@ -2075,7 +2113,7 @@ future<> save_paxos_promise(const schema& s, const partition_key& key, const uti
 future<> save_paxos_proposal(const schema& s, const service::paxos::proposal& proposal, db::timeout_clock::time_point timeout) {
    static auto cql = format("UPDATE system.{} USING TIMESTAMP ? AND TTL ? SET promise = ?, proposal_ballot = ?, proposal = ? WHERE row_key = ? AND cf_id = ?", PAXOS);
    partition_key_view key = proposal.update.key();
-    return qctx->execute_cql_with_timeout(cql,
+    return execute_cql_with_timeout(cql,
            timeout,
            utils::UUID_gen::micros_timestamp(proposal.ballot),
            paxos_ttl_sec(s),
@@ -2097,7 +2135,7 @@ future<> save_paxos_decision(const schema& s, const service::paxos::proposal& de
    static auto cql = format("UPDATE system.{} USING TIMESTAMP ? AND TTL ? SET proposal_ballot = null, proposal = null,"
            " most_recent_commit_at = ?, most_recent_commit = ? WHERE row_key = ? AND cf_id = ?", PAXOS);
    partition_key_view key = decision.update.key();
-    return qctx->execute_cql_with_timeout(cql,
+    return execute_cql_with_timeout(cql,
            timeout,
            utils::UUID_gen::micros_timestamp(decision.ballot),
            paxos_ttl_sec(s),
@@ -2114,7 +2152,7 @@ future<> delete_paxos_decision(const schema& s, const partition_key& key, const
    // guarantees that if there is more recent round it will not be affected.
    static auto cql = format("DELETE most_recent_commit FROM system.{} USING TIMESTAMP ?  WHERE row_key = ? AND cf_id = ?", PAXOS);

-    return qctx->execute_cql_with_timeout(cql,
+    return execute_cql_with_timeout(cql,
            timeout,
            utils::UUID_gen::micros_timestamp(ballot),
            to_legacy(*key.get_compound_type(s), key.representation()),
--- a/db/system_keyspace.hh
+++ b/db/system_keyspace.hh
@@ -170,7 +170,7 @@ schema_ptr aggregates();
 table_schema_version generate_schema_version(utils::UUID table_id, uint16_t offset = 0);

 // Only for testing.
-void minimal_setup(distributed<cql3::query_processor>& qp);
+void minimal_setup(distributed<database>& db, distributed<cql3::query_processor>& qp);

 future<> init_local_cache();
 future<> deinit_local_cache();
@@ -203,12 +203,29 @@ future<> update_peer_info(gms::inet_address ep, sstring column_name, Value value

 future<> remove_endpoint(gms::inet_address ep);

+future<> update_hints_dropped(gms::inet_address ep, utils::UUID time_period, int value);
+
 future<> set_scylla_local_param(const sstring& key, const sstring& value);
 future<std::optional<sstring>> get_scylla_local_param(const sstring& key);

 std::vector<schema_ptr> all_tables();
 void make(database& db, bool durable, bool volatile_testing_only = false);

+future<foreign_ptr<lw_shared_ptr<reconcilable_result>>>
+query_mutations(distributed<service::storage_proxy>& proxy, const sstring& cf_name);
+
+// Returns all data from given system table.
+// Intended to be used by code which is not performance critical.
+future<lw_shared_ptr<query::result_set>> query(distributed<service::storage_proxy>& proxy, const sstring& cf_name);
+
+// Returns a slice of given system table.
+// Intended to be used by code which is not performance critical.
+future<lw_shared_ptr<query::result_set>> query(
+    distributed<service::storage_proxy>& proxy,
+    const sstring& cf_name,
+    const dht::decorated_key& key,
+    query::clustering_range row_ranges = query::clustering_range::make_open_ended_both_sides());
+
 /// overloads

 future<foreign_ptr<lw_shared_ptr<reconcilable_result>>>
@@ -397,6 +414,7 @@ enum class bootstrap_state {

    future<> save_truncation_record(utils::UUID, db_clock::time_point truncated_at, db::replay_position);
    future<> save_truncation_record(const column_family&, db_clock::time_point truncated_at, db::replay_position);
+    future<> remove_truncation_record(utils::UUID);
    future<replay_positions> get_truncated_position(utils::UUID);
    future<db::replay_position> get_truncated_position(utils::UUID, uint32_t shard);
    future<db_clock::time_point> get_truncated_at(utils::UUID);
--- a/db/view/view.cc
+++ b/db/view/view.cc
@@ -152,50 +152,41 @@ db::view::base_dependent_view_info::base_dependent_view_info(schema_ptr base_sch
 }

 // A constructor for a base info that can facilitate only reads from the materialized view.
-db::view::base_dependent_view_info::base_dependent_view_info(bool has_base_non_pk_columns_in_view_pk, std::optional<bytes>&& column_missing_in_base)
+db::view::base_dependent_view_info::base_dependent_view_info(bool has_base_non_pk_columns_in_view_pk)
        : _base_schema{nullptr}
-        , _column_missing_in_base{std::move(column_missing_in_base)}
        , has_base_non_pk_columns_in_view_pk{has_base_non_pk_columns_in_view_pk}
        , use_only_for_reads{true} {
 }

 const std::vector<column_id>& db::view::base_dependent_view_info::base_non_pk_columns_in_view_pk() const {
    if (use_only_for_reads) {
-        on_internal_error(vlogger,
-                format("base_non_pk_columns_in_view_pk(): operation unsupported when initialized only for view reads. "
-                "Missing column in the base table: {}", to_sstring_view(_column_missing_in_base.value_or(bytes()))));
+        on_internal_error(vlogger, "base_non_pk_columns_in_view_pk(): operation unsupported when initialized only for view reads.");
    }
    return _base_non_pk_columns_in_view_pk;
 }

 const schema_ptr& db::view::base_dependent_view_info::base_schema() const {
    if (use_only_for_reads) {
-        on_internal_error(vlogger,
-                format("base_schema(): operation unsupported when initialized only for view reads. "
-                "Missing column in the base table: {}", to_sstring_view(_column_missing_in_base.value_or(bytes()))));
+        on_internal_error(vlogger, "base_schema(): operation unsupported when initialized only for view reads.");
    }
    return _base_schema;
 }

 db::view::base_info_ptr view_info::make_base_dependent_view_info(const schema& base) const {
    std::vector<column_id> base_non_pk_columns_in_view_pk;
+    bool has_base_non_pk_columns_in_view_pk = false;
+    bool can_only_read_from_view = false;

    for (auto&& view_col : boost::range::join(_schema.partition_key_columns(), _schema.clustering_key_columns())) {
        if (view_col.is_computed()) {
            // we are not going to find it in the base table...
            continue;
        }
-        const bytes& view_col_name = view_col.name();
-        auto* base_col = base.get_column_definition(view_col_name);
+        auto* base_col = base.get_column_definition(view_col.name());
        if (base_col && !base_col->is_primary_key()) {
            base_non_pk_columns_in_view_pk.push_back(base_col->id);
+            has_base_non_pk_columns_in_view_pk = true;
        } else if (!base_col) {
-            vlogger.error("Column {} in view {}.{} was not found in the base table {}.{}",
-                    to_sstring_view(view_col_name), _schema.ks_name(), _schema.cf_name(), base.ks_name(), base.cf_name());
-            if (to_sstring_view(view_col_name) == "idx_token") {
-                vlogger.warn("Missing idx_token column is caused by an incorrect upgrade of a secondary index. "
-                        "Please recreate index {}.{} to avoid future issues.", _schema.ks_name(), _schema.cf_name());
-            }
            // If we didn't find the column in the base column then it must have been deleted
            // or not yet added (by alter command), this means it is for sure not a pk column
            // in the base table. This can happen if the version of the base schema is not the
@@ -203,11 +194,21 @@ db::view::base_info_ptr view_info::make_base_dependent_view_info(const schema& b
            // if we got to such a situation then it means it is only going to be used for reading
            // (computation of shadowable tombstones) and in that case the existence of such a column
            // is the only thing that is of interest to us.
-            return make_lw_shared<db::view::base_dependent_view_info>(true, view_col_name);
+            has_base_non_pk_columns_in_view_pk = true;
+            can_only_read_from_view = true;
+
+            // We can break the loop here since we have the info we wanted and the list
+            // of columns is not going to be reliable anyhow.
+            break;
        }
    }

-    return make_lw_shared<db::view::base_dependent_view_info>(base.shared_from_this(), std::move(base_non_pk_columns_in_view_pk));
+    if (can_only_read_from_view) {
+        return make_lw_shared<db::view::base_dependent_view_info>(has_base_non_pk_columns_in_view_pk);
+    } else {
+        return make_lw_shared<db::view::base_dependent_view_info>(base.shared_from_this(), std::move(base_non_pk_columns_in_view_pk));
+    }
+
 }

 bool view_info::has_base_non_pk_columns_in_view_pk() const {
@@ -218,7 +219,7 @@ bool view_info::has_base_non_pk_columns_in_view_pk() const {
    // schema integrity problem as the creator of owning view schema
    // didn't make sure to initialize it with base information.
    if (!_base_info) {
-        on_internal_error(vlogger, "Tried to perform a view query which is base info dependent without initializing it");
+        on_internal_error(vlogger, "Tried to perform a view query which is base info dependant without initializing it");
    }
    return _base_info->has_base_non_pk_columns_in_view_pk;
 }
@@ -416,7 +417,7 @@ deletable_row& view_updates::get_view_row(const partition_key& base_key, const c
                if (!service::get_local_storage_service().db().local().find_column_family(_base->id()).get_index_manager().is_index(*_view)) {
                    throw std::logic_error(format("Column {} doesn't exist in base and this view is not backing a secondary index", cdef.name_as_text()));
                }
-                computed_value = legacy_token_column_computation().compute_value(*_base, base_key, update);
+                computed_value = token_column_computation().compute_value(*_base, base_key, update);
            } else {
                computed_value = cdef.get_computation().compute_value(*_base, base_key, update);
            }
--- a/db/view/view.hh
+++ b/db/view/view.hh
@@ -53,10 +53,6 @@ private:
    // Id of a regular base table column included in the view's PK, if any.
    // Scylla views only allow one such column, alternator can have up to two.
    std::vector<column_id> _base_non_pk_columns_in_view_pk;
-    // For tracing purposes, if the view is out of sync with its base table
-    // and there exists a column which is not in base, its name is stored
-    // and added to debug messages.
-    std::optional<bytes> _column_missing_in_base = {};
 public:
    const std::vector<column_id>& base_non_pk_columns_in_view_pk() const;
    const schema_ptr& base_schema() const;
@@ -75,7 +71,7 @@ public:
    // A constructor for a base info that can facilitate reads and writes from the materialized view.
    base_dependent_view_info(schema_ptr base_schema, std::vector<column_id>&& base_non_pk_columns_in_view_pk);
    // A constructor for a base info that can facilitate only reads from the materialized view.
-    base_dependent_view_info(bool has_base_non_pk_columns_in_view_pk, std::optional<bytes>&& column_missing_in_base);
+    base_dependent_view_info(bool has_base_non_pk_columns_in_view_pk);
 };

 // Immutable snapshot of view's base-schema-dependent part.
--- a/dht/boot_strapper.cc
+++ b/dht/boot_strapper.cc
@@ -50,7 +50,7 @@ static logging::logger blogger("boot_strapper");
 namespace dht {

 future<> boot_strapper::bootstrap(streaming::stream_reason reason) {
-    blogger.debug("Beginning bootstrap process: sorted_tokens={}", get_token_metadata().sorted_tokens());
+    blogger.debug("Beginning bootstrap process: sorted_tokens={}", _token_metadata.sorted_tokens());
    sstring description;
    if (reason == streaming::stream_reason::bootstrap) {
        description = "Bootstrap";
@@ -59,7 +59,7 @@ future<> boot_strapper::bootstrap(streaming::stream_reason reason) {
    } else {
        return make_exception_future<>(std::runtime_error("Wrong stream_reason provided: it can only be replace or bootstrap"));
    }
-    auto streamer = make_lw_shared<range_streamer>(_db, _token_metadata_ptr, _abort_source, _tokens, _address, description, reason);
+    auto streamer = make_lw_shared<range_streamer>(_db, _token_metadata, _abort_source, _tokens, _address, description, reason);
    auto nodes_to_filter = gms::get_local_gossiper().get_unreachable_members();
    if (reason == streaming::stream_reason::replace && _db.local().get_replace_address()) {
        nodes_to_filter.insert(_db.local().get_replace_address().value());
@@ -70,7 +70,7 @@ future<> boot_strapper::bootstrap(streaming::stream_reason reason) {
    return do_for_each(*keyspaces, [this, keyspaces, streamer] (sstring& keyspace_name) {
        auto& ks = _db.local().find_keyspace(keyspace_name);
        auto& strategy = ks.get_replication_strategy();
-        dht::token_range_vector ranges = strategy.get_pending_address_ranges(_token_metadata_ptr, _tokens, _address, locator::can_yield::no);
+        dht::token_range_vector ranges = strategy.get_pending_address_ranges(_token_metadata, _tokens, _address);
        blogger.debug("Will stream keyspace={}, ranges={}", keyspace_name, ranges);
        return streamer->add_ranges(keyspace_name, ranges);
    }).then([this, streamer] {
@@ -83,7 +83,7 @@ future<> boot_strapper::bootstrap(streaming::stream_reason reason) {

 }

-std::unordered_set<token> boot_strapper::get_bootstrap_tokens(const token_metadata_ptr tmptr, database& db) {
+std::unordered_set<token> boot_strapper::get_bootstrap_tokens(const token_metadata& metadata, database& db) {
    auto initial_tokens = db.get_initial_tokens();
    // if user specified tokens, use those
    if (initial_tokens.size() > 0) {
@@ -91,7 +91,7 @@ std::unordered_set<token> boot_strapper::get_bootstrap_tokens(const token_metada
        std::unordered_set<token> tokens;
        for (auto& token_string : initial_tokens) {
            auto token = dht::token::from_sstring(token_string);
-            if (tmptr->get_endpoint(token)) {
+            if (metadata.get_endpoint(token)) {
                throw std::runtime_error(format("Bootstrapping to existing token {} is not allowed (decommission/removenode the old node first).", token_string));
            }
            tokens.insert(token);
@@ -109,16 +109,16 @@ std::unordered_set<token> boot_strapper::get_bootstrap_tokens(const token_metada
        blogger.warn("Picking random token for a single vnode.  You should probably add more vnodes; failing that, you should probably specify the token manually");
    }

-    auto tokens = get_random_tokens(std::move(tmptr), num_tokens);
+    auto tokens = get_random_tokens(metadata, num_tokens);
    blogger.debug("Get random bootstrap_tokens={}", tokens);
    return tokens;
 }

-std::unordered_set<token> boot_strapper::get_random_tokens(const token_metadata_ptr tmptr, size_t num_tokens) {
+std::unordered_set<token> boot_strapper::get_random_tokens(const token_metadata& metadata, size_t num_tokens) {
    std::unordered_set<token> tokens;
    while (tokens.size() < num_tokens) {
        auto token = dht::token::get_random_token();
-        auto ep = tmptr->get_endpoint(token);
+        auto ep = metadata.get_endpoint(token);
        if (!ep) {
            tokens.emplace(token);
        }
--- a/dht/boot_strapper.hh
+++ b/dht/boot_strapper.hh
@@ -50,7 +50,6 @@ namespace dht {
 class boot_strapper {
    using inet_address = gms::inet_address;
    using token_metadata = locator::token_metadata;
-    using token_metadata_ptr = locator::token_metadata_ptr;
    using token = dht::token;
    distributed<database>& _db;
    abort_source& _abort_source;
@@ -58,14 +57,14 @@ class boot_strapper {
    inet_address _address;
    /* token of the node being bootstrapped. */
    std::unordered_set<token> _tokens;
-    const token_metadata_ptr _token_metadata_ptr;
+    token_metadata _token_metadata;
 public:
-    boot_strapper(distributed<database>& db, abort_source& abort_source, inet_address addr, std::unordered_set<token> tokens, const token_metadata_ptr tmptr)
+    boot_strapper(distributed<database>& db, abort_source& abort_source, inet_address addr, std::unordered_set<token> tokens, token_metadata tmd)
        : _db(db)
        , _abort_source(abort_source)
        , _address(addr)
        , _tokens(tokens)
-        , _token_metadata_ptr(std::move(tmptr)) {
+        , _token_metadata(tmd) {
    }

    future<> bootstrap(streaming::stream_reason reason);
@@ -75,9 +74,9 @@ public:
     * otherwise, if num_tokens == 1, pick a token to assume half the load of the most-loaded node.
     * else choose num_tokens tokens at random
     */
-    static std::unordered_set<token> get_bootstrap_tokens(const token_metadata_ptr tmptr, database& db);
+    static std::unordered_set<token> get_bootstrap_tokens(const token_metadata& metadata, database& db);

-    static std::unordered_set<token> get_random_tokens(const token_metadata_ptr tmptr, size_t num_tokens);
+    static std::unordered_set<token> get_random_tokens(const token_metadata& metadata, size_t num_tokens);
 #if 0
    public static class StringSerializer implements IVersionedSerializer<String>
    {
@@ -99,11 +98,6 @@ public:
        }
    }
 #endif
-
-private:
-    const token_metadata& get_token_metadata() {
-        return *_token_metadata_ptr;
-    }
 };

 } // namespace dht
--- a/dht/range_streamer.cc
+++ b/dht/range_streamer.cc
@@ -107,7 +107,6 @@ range_streamer::get_range_fetch_map(const std::unordered_map<dht::token_range, s
    return range_fetch_map_map;
 }

-// Must be called from a seastar thread
 std::unordered_map<dht::token_range, std::vector<inet_address>>
 range_streamer::get_all_ranges_with_sources_for(const sstring& keyspace_name, dht::token_range_vector desired_ranges) {
    logger.debug("{} ks={}", __func__, keyspace_name);
@@ -115,8 +114,8 @@ range_streamer::get_all_ranges_with_sources_for(const sstring& keyspace_name, dh
    auto& ks = _db.local().find_keyspace(keyspace_name);
    auto& strat = ks.get_replication_strategy();

-    auto tm = get_token_metadata().clone_only_token_map().get0();
-    auto range_addresses = strat.get_range_addresses(tm, locator::can_yield::yes);
+    auto tm = _metadata.clone_only_token_map();
+    auto range_addresses = strat.get_range_addresses(tm);

    logger.debug("keyspace={}, desired_ranges.size={}, range_addresses.size={}", keyspace_name, desired_ranges.size(), range_addresses.size());

@@ -147,7 +146,6 @@ range_streamer::get_all_ranges_with_sources_for(const sstring& keyspace_name, dh
    return range_sources;
 }

-// Must be called from a seastar thread
 std::unordered_map<dht::token_range, std::vector<inet_address>>
 range_streamer::get_all_ranges_with_strict_sources_for(const sstring& keyspace_name, dht::token_range_vector desired_ranges) {
    logger.debug("{} ks={}", __func__, keyspace_name);
@@ -157,12 +155,12 @@ range_streamer::get_all_ranges_with_strict_sources_for(const sstring& keyspace_n
    auto& strat = ks.get_replication_strategy();

    //Active ranges
-    auto metadata_clone = get_token_metadata().clone_only_token_map().get0();
-    auto range_addresses = strat.get_range_addresses(metadata_clone, locator::can_yield::yes);
+    auto metadata_clone = _metadata.clone_only_token_map();
+    auto range_addresses = strat.get_range_addresses(metadata_clone);

    //Pending ranges
    metadata_clone.update_normal_tokens(_tokens, _address);
-    auto pending_range_addresses  = strat.get_range_addresses(metadata_clone, locator::can_yield::yes);
+    auto pending_range_addresses  = strat.get_range_addresses(metadata_clone);

    //Collects the source that will have its range moved to the new node
    std::unordered_map<dht::token_range, std::vector<inet_address>> range_sources;
@@ -223,7 +221,7 @@ bool range_streamer::use_strict_sources_for_ranges(const sstring& keyspace_name)
    return !_db.local().is_replacing()
           && use_strict_consistency()
           && !_tokens.empty()
-           && get_token_metadata().get_all_endpoints().size() != strat.get_replication_factor();
+           && _metadata.get_all_endpoints().size() != strat.get_replication_factor();
 }

 void range_streamer::add_tx_ranges(const sstring& keyspace_name, std::unordered_map<inet_address, dht::token_range_vector> ranges_per_endpoint) {
--- a/dht/range_streamer.hh
+++ b/dht/range_streamer.hh
@@ -60,7 +60,6 @@ class range_streamer {
 public:
    using inet_address = gms::inet_address;
    using token_metadata = locator::token_metadata;
-    using token_metadata_ptr = locator::token_metadata_ptr;
    using stream_plan = streaming::stream_plan;
    using stream_state = streaming::stream_state;
    static bool use_strict_consistency();
@@ -102,9 +101,9 @@ public:
        }
    };

-    range_streamer(distributed<database>& db, const token_metadata_ptr tmptr, abort_source& abort_source, std::unordered_set<token> tokens, inet_address address, sstring description, streaming::stream_reason reason)
+    range_streamer(distributed<database>& db, const token_metadata& tm, abort_source& abort_source, std::unordered_set<token> tokens, inet_address address, sstring description, streaming::stream_reason reason)
        : _db(db)
-        , _token_metadata_ptr(std::move(tmptr))
+        , _metadata(tm)
        , _abort_source(abort_source)
        , _tokens(std::move(tokens))
        , _address(address)
@@ -114,8 +113,8 @@ public:
        _abort_source.check();
    }

-    range_streamer(distributed<database>& db, const token_metadata_ptr tmptr, abort_source& abort_source, inet_address address, sstring description, streaming::stream_reason reason)
-        : range_streamer(db, std::move(tmptr), abort_source, std::unordered_set<token>(), address, description, reason) {
+    range_streamer(distributed<database>& db, const token_metadata& tm, abort_source& abort_source, inet_address address, sstring description, streaming::stream_reason reason)
+        : range_streamer(db, tm, abort_source, std::unordered_set<token>(), address, description, reason) {
    }

    void add_source_filter(std::unique_ptr<i_source_filter> filter) {
@@ -160,17 +159,13 @@ private:
        return toFetch;
    }
 #endif
-
-    const token_metadata& get_token_metadata() {
-        return *_token_metadata_ptr;
-    }
 public:
    future<> stream_async();
    future<> do_stream_async();
    size_t nr_ranges_to_stream();
 private:
    distributed<database>& _db;
-    const token_metadata_ptr _token_metadata_ptr;
+    const token_metadata& _metadata;
    abort_source& _abort_source;
    std::unordered_set<token> _tokens;
    inet_address _address;
--- a/digester.hh
+++ b/digester.hh
@@ -58,7 +58,8 @@ public:

    template<typename T, typename... Args>
    void feed_hash(const T& value, Args&&... args) {
-        std::visit([&] (auto& hasher) noexcept -> void {
+        // FIXME uncomment the noexcept marking once clang bug 50994 is fixed or gcc compilation is turned on
+        std::visit([&] (auto& hasher) /* noexcept(noexcept(::feed_hash(hasher, value, args...))) */ -> void {
            ::feed_hash(hasher, value, std::forward<Args>(args)...);
        }, _impl);
    };
--- a/dist/common/scripts/node_exporter_install
+++ b/dist/common/scripts/node_exporter_install
@@ -24,9 +24,10 @@ import os
 import sys
 import tempfile
 import tarfile
+import shutil
+import glob
 from scylla_util import *
 import argparse
-from subprocess import run

 VERSION='1.0.1'
 INSTALL_DIR=scylladir()+'/Prometheus/node_exporter'
@@ -53,7 +54,7 @@ if __name__ == '__main__':
            sys.exit(1)

    if is_gentoo_variant():
-        run('emerge -uq app-metrics/node_exporter', shell=True, check=True)
+        run('emerge -uq app-metrics/node_exporter')
        print('app-metrics/node_exporter does not install systemd service files, please fill a bug if you need them.')
        sys.exit(1)
    else:
@@ -62,6 +63,9 @@ if __name__ == '__main__':
            f.write(data)
        with tarfile.open('/var/tmp/node_exporter-{version}.linux-amd64.tar.gz'.format(version=VERSION)) as tf:
            tf.extractall(INSTALL_DIR)
+        shutil.chown(f'{INSTALL_DIR}/node_exporter-{VERSION}.linux-amd64', 'root', 'root')
+        for f in glob.glob(f'{INSTALL_DIR}/node_exporter-{VERSION}.linux-amd64/*'):
+            shutil.chown(f, 'root', 'root')
        os.remove('/var/tmp/node_exporter-{version}.linux-amd64.tar.gz'.format(version=VERSION))
        if node_exporter_p.exists():
            node_exporter_p.unlink()
--- a/dist/common/scripts/scylla_bootparam_setup
+++ b/dist/common/scripts/scylla_bootparam_setup
@@ -24,8 +24,8 @@ import os
 import re
 import sys
 import argparse
+import subprocess
 from scylla_util import *
-from subprocess import run

 if __name__ == '__main__':
    if os.getuid() > 0:
@@ -58,9 +58,9 @@ if __name__ == '__main__':
            cfg.set(grub_key, cmdline_linux)
            cfg.commit()
            if is_debian_variant():
-                run('update-grub', shell=True, check=True)
+                run('update-grub')
            else:
-                run('grub2-mkconfig -o /boot/grub2/grub.cfg', shell=True, check=True)
+                run('grub2-mkconfig -o /boot/grub2/grub.cfg')

 #    if is_ec2() and os.path.exists('/boot/grub/menu.lst'):
    if os.path.exists('/boot/grub/menu.lst'):
--- a/dist/common/scripts/scylla_coredump_setup
+++ b/dist/common/scripts/scylla_coredump_setup
@@ -26,8 +26,8 @@ import argparse
 import subprocess
 import time
 import tempfile
+import subprocess
 from scylla_util import *
-from subprocess import run

 if __name__ == '__main__':
    if os.getuid() > 0:
@@ -42,7 +42,7 @@ if __name__ == '__main__':

 # Gentoo may uses OpenRC
    if is_gentoo_variant():
-        run('sysctl -p /etc/sysctl.d/99-scylla-coredump.conf', shell=True, check=True)
+        run('sysctl -p /etc/sysctl.d/99-scylla-coredump.conf')
 # Other distributions can use systemd-coredump, so setup it
    else:
        if is_debian_variant():
@@ -80,14 +80,15 @@ WantedBy=multi-user.target
            systemd_unit('var-lib-systemd-coredump.mount').enable()
            systemd_unit('var-lib-systemd-coredump.mount').start()
        if os.path.exists('/usr/lib/sysctl.d/50-coredump.conf'):
-            run('sysctl -p /usr/lib/sysctl.d/50-coredump.conf', shell=True, check=True)
+            run('sysctl -p /usr/lib/sysctl.d/50-coredump.conf')
        else:
            with open('/etc/sysctl.d/99-scylla-coredump.conf', 'w') as f:
                f.write('kernel.core_pattern=|/usr/lib/systemd/systemd-coredump %p %u %g %s %t %e"')
-            run('sysctl -p /etc/sysctl.d/99-scylla-coredump.conf', shell=True, check=True)
+            run('sysctl -p /etc/sysctl.d/99-scylla-coredump.conf')

        fp = tempfile.NamedTemporaryFile()
-        fp.write(b'kill -SEGV $$')
+        fp.write(b'ulimit -c unlimited\n')
+        fp.write(b'kill -SEGV $$\n')
        fp.flush()
        p = subprocess.Popen(['/bin/bash', fp.name], stdout=subprocess.PIPE)
        pid = p.pid
@@ -98,7 +99,7 @@ WantedBy=multi-user.target
        # need to wait for systemd-coredump to complete collecting coredump
        time.sleep(3)
        try:
-            coreinfo = run('coredumpctl --no-pager --no-legend info {}'.format(pid), shell=True, check=True, capture_output=True, encoding='utf-8').stdout.strip()
+            coreinfo = out('coredumpctl --no-pager --no-legend info {}'.format(pid))
        except subprocess.CalledProcessError:
            print('Does not able to detect coredump, failed to configure systemd-coredump.')
            sys.exit(1)
--- a/dist/common/scripts/scylla_cpuscaling_setup
+++ b/dist/common/scripts/scylla_cpuscaling_setup
@@ -22,6 +22,7 @@

 import os
 import sys
+import argparse
 import shlex
 import distro
 from scylla_util import *
@@ -33,12 +34,22 @@ if __name__ == '__main__':
    if os.getuid() > 0:
        print('Requires root permission.')
        sys.exit(1)
-    if not os.path.exists('/sys/devices/system/cpu/cpufreq/policy0/scaling_governor'):
+    parser = argparse.ArgumentParser(description='CPU scaling setup script for Scylla.')
+    parser.add_argument('--force', dest='force', action='store_true',
+                        help='force running setup even CPU scaling unsupported')
+    args = parser.parse_args()
+
+    if not args.force and not os.path.exists('/sys/devices/system/cpu/cpu0/cpufreq/scaling_governor'):
        print('This computer doesn\'t supported CPU scaling configuration.')
        sys.exit(0)
    if is_debian_variant():
        if not shutil.which('cpufreq-set'):
            apt_install('cpufrequtils')
+        try:
+            ondemand = systemd_unit('ondemand')
+            ondemand.disable()
+        except:
+            pass
        cfg = sysconfig_parser('/etc/default/cpufrequtils')
        cfg.set('GOVERNOR', 'performance')
        cfg.commit()
--- a/dist/common/scripts/scylla_ec2_check
+++ b/dist/common/scripts/scylla_ec2_check
@@ -24,7 +24,6 @@ import os
 import sys
 import argparse
 from scylla_util import *
-from subprocess import run

 if __name__ == '__main__':
    if not is_ec2():
@@ -41,7 +40,7 @@ if __name__ == '__main__':
    aws = aws_instance()
    instance_class = aws.instance_class()
    en = aws.get_en_interface_type()
-    match = re.search(r'^driver: (\S+)$', run('ethtool -i {}'.format(args.nic), shell=True, check=True, capture_output=True, encoding='utf-8').stdout.strip(), flags=re.MULTILINE)
+    match = re.search(r'^driver: (\S+)$', out('ethtool -i {}'.format(args.nic)), flags=re.MULTILINE)
    driver = match.group(1)

    if not en:
--- a/Show More
+++ b/Show More