tests: add tests for per-role timeouts

The test cases verify that setting timeout parameters per-role works and is validated.
docs: add a paragaph about per-role parameters
2026-05-13 19:32:02 +00:00 · 2020-11-27 12:43:53 +01:00 · 2020-11-27 12:43:53 +01:00 · 2020-11-27 12:37:27 +01:00 · 2020-11-27 12:37:27 +01:00 · 2020-11-27 12:37:17 +01:00
333 changed files with 7855 additions and 5495 deletions
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -79,3 +79,9 @@ db/hints/* @haaawk @piodul @vladzcloudius
 # REDIS
 redis/* @nyh @syuu1228
 redis-test/* @nyh @syuu1228
+
+# READERS
+reader_* @denesb
+querier* @denesb
+test/boost/mutation_reader_test.cc @denesb
+test/boost/querier_cache_test.cc @denesb
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,6 +1,6 @@
 [submodule "seastar"]
 	path = seastar
-	url = ../scylla-seastar
+	url = ../seastar
 	ignore = dirty
 [submodule "swagger-ui"]
 	path = swagger-ui
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,8 +1,5 @@
-##
-## For best results, first compile the project using the Ninja build-system.
-##
+cmake_minimum_required(VERSION 3.18)

-cmake_minimum_required(VERSION 3.7)
 project(scylla)

 if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
@@ -20,138 +17,739 @@ else()
    set(BUILD_TYPE "release")
 endif()

-if (NOT DEFINED FOR_IDE AND NOT DEFINED ENV{FOR_IDE} AND NOT DEFINED ENV{CLION_IDE})
-    message(FATAL_ERROR "This CMakeLists.txt file is only valid for use in IDEs, please define FOR_IDE to acknowledge this.")
-endif()
-
-# These paths are always available, since they're included in the repository. Additional DPDK headers are placed while
-# Seastar is built, and are captured in `SEASTAR_INCLUDE_DIRS` through parsing the Seastar pkg-config file (below).
-set(SEASTAR_DPDK_INCLUDE_DIRS
-        seastar/dpdk/lib/librte_eal/common/include
-        seastar/dpdk/lib/librte_eal/common/include/generic
-        seastar/dpdk/lib/librte_eal/common/include/x86
-        seastar/dpdk/lib/librte_ether)
-
-find_package(PkgConfig REQUIRED)
-
-set(ENV{PKG_CONFIG_PATH} "${CMAKE_SOURCE_DIR}/build/${BUILD_TYPE}/seastar:$ENV{PKG_CONFIG_PATH}")
-pkg_check_modules(SEASTAR seastar)
-
-if(NOT SEASTAR_INCLUDE_DIRS)
-    # Default value. A more accurate list is populated through `pkg-config` below if `seastar.pc` is available.
-    set(SEASTAR_INCLUDE_DIRS "seastar/include")
-endif()
-
-find_package(Boost COMPONENTS filesystem program_options system thread)
-
-##
-## Populate the names of all source and header files in the indicated paths in a designated variable.
-##
-## When RECURSIVE is specified, directories are traversed recursively.
-##
-## Use: scan_scylla_source_directories(VAR my_result_var [RECURSIVE] PATHS [path1 path2 ...])
-##
-function (scan_scylla_source_directories)
-    set(options RECURSIVE)
-    set(oneValueArgs VAR)
-    set(multiValueArgs PATHS)
-    cmake_parse_arguments(args "${options}" "${oneValueArgs}" "${multiValueArgs}" "${ARGN}")
-
-    set(globs "")
-
-    foreach (dir ${args_PATHS})
-        list(APPEND globs "${dir}/*.cc" "${dir}/*.hh")
-    endforeach()
-
-    if (args_RECURSIVE)
-        set(glob_kind GLOB_RECURSE)
+function(default_target_arch arch)
+    set(x86_instruction_sets i386 i686 x86_64)
+    if(CMAKE_SYSTEM_PROCESSOR IN_LIST x86_instruction_sets)
+        set(${arch} "westmere" PARENT_SCOPE)
+    elseif(CMAKE_SYSTEM_PROCESSOR EQUAL "aarch64")
+        set(${arch} "armv8-a+crc+crypto" PARENT_SCOPE)
    else()
-        set(glob_kind GLOB)
+        set(${arch} "" PARENT_SCOPE)
    endif()
+endfunction()
+default_target_arch(target_arch)
+if(target_arch)
+    set(target_arch_flag "-march=${target_arch}")
+endif()

-    file(${glob_kind} var
-            ${globs})
+# Configure Seastar compile options to align with Scylla
+set(Seastar_CXX_FLAGS -fcoroutines ${target_arch_flag} CACHE INTERNAL "" FORCE)
+set(Seastar_CXX_DIALECT gnu++20 CACHE INTERNAL "" FORCE)

-    set(${args_VAR} ${var} PARENT_SCOPE)
+add_subdirectory(seastar)
+add_subdirectory(abseil)
+# Exclude absl::strerror from the default "all" target since it's not
+# used in Scylla build and, moreover, makes use of deprecated glibc APIs,
+# such as sys_nerr, which are not exposed from "stdio.h" since glibc 2.32,
+# which happens to be the case for recent Fedora distribution versions.
+#
+# Need to use the internal "absl_strerror" target name instead of namespaced
+# variant because `set_target_properties` does not understand the latter form,
+# unfortunately.
+set_target_properties(absl_strerror PROPERTIES EXCLUDE_FROM_ALL TRUE)
+
+# System libraries dependencies
+find_package(Boost COMPONENTS filesystem program_options system thread regex REQUIRED)
+find_package(Lua REQUIRED)
+find_package(ZLIB REQUIRED)
+find_package(ICU COMPONENTS uc REQUIRED)
+
+set(scylla_build_dir "${CMAKE_BINARY_DIR}/build/${BUILD_TYPE}")
+set(scylla_gen_build_dir "${scylla_build_dir}/gen")
+file(MAKE_DIRECTORY "${scylla_build_dir}" "${scylla_gen_build_dir}")
+
+# Place libraries, executables and archives in ${buildroot}/build/${mode}/
+foreach(mode RUNTIME LIBRARY ARCHIVE)
+    set(CMAKE_${mode}_OUTPUT_DIRECTORY "${scylla_build_dir}")
+endforeach()
+
+# Generate C++ source files from thrift definitions
+function(scylla_generate_thrift)
+    set(one_value_args TARGET VAR IN_FILE OUT_DIR SERVICE)
+    cmake_parse_arguments(args "" "${one_value_args}" "" ${ARGN})
+
+    get_filename_component(in_file_name ${args_IN_FILE} NAME_WE)
+
+    set(aux_out_file_name ${args_OUT_DIR}/${in_file_name})
+    set(outputs
+        ${aux_out_file_name}_types.cpp
+        ${aux_out_file_name}_types.h
+        ${aux_out_file_name}_constants.cpp
+        ${aux_out_file_name}_constants.h
+        ${args_OUT_DIR}/${args_SERVICE}.cpp
+        ${args_OUT_DIR}/${args_SERVICE}.h)
+
+    add_custom_command(
+        DEPENDS
+            ${args_IN_FILE}
+            thrift
+        OUTPUT ${outputs}
+        COMMAND ${CMAKE_COMMAND} -E make_directory ${args_OUT_DIR}
+        COMMAND thrift -gen cpp:cob_style,no_skeleton -out "${args_OUT_DIR}" "${args_IN_FILE}")
+
+    add_custom_target(${args_TARGET}
+        DEPENDS ${outputs})
+
+    set(${args_VAR} ${outputs} PARENT_SCOPE)
 endfunction()

-## Although Seastar is an external project, it is common enough to explore the sources while doing
-## Scylla development that we'll treat the Seastar sources as part of this project for easier navigation.
-scan_scylla_source_directories(
-        VAR SEASTAR_SOURCE_FILES
-        RECURSIVE
+scylla_generate_thrift(
+    TARGET scylla_thrift_gen_cassandra
+    VAR scylla_thrift_gen_cassandra_files
+    IN_FILE interface/cassandra.thrift
+    OUT_DIR ${scylla_gen_build_dir}
+    SERVICE Cassandra)

-        PATHS
-          seastar/core
-          seastar/http
-          seastar/json
-          seastar/net
-          seastar/rpc
-          seastar/testing
-          seastar/util)
+# Parse antlr3 grammar files and generate C++ sources
+function(scylla_generate_antlr3)
+    set(one_value_args TARGET VAR IN_FILE OUT_DIR)
+    cmake_parse_arguments(args "" "${one_value_args}" "" ${ARGN})

-scan_scylla_source_directories(
-        VAR SCYLLA_ROOT_SOURCE_FILES
-        PATHS .)
+    get_filename_component(in_file_pure_name ${args_IN_FILE} NAME)
+    get_filename_component(stem ${in_file_pure_name} NAME_WE)

-scan_scylla_source_directories(
-        VAR SCYLLA_SUB_SOURCE_FILES
-        RECURSIVE
+    set(outputs
+        "${args_OUT_DIR}/${stem}Lexer.hpp"
+        "${args_OUT_DIR}/${stem}Lexer.cpp"
+        "${args_OUT_DIR}/${stem}Parser.hpp"
+        "${args_OUT_DIR}/${stem}Parser.cpp")

-        PATHS
-          api
-          auth
-          cql3
-          db
-          dht
-          exceptions
-          gms
-          index
-          io
-          locator
-          message
-          raft
-          repair
-          service
-          sstables
-          streaming
-          test
-          thrift
-          tracing
-          transport
-          utils)
+    add_custom_command(
+        DEPENDS
+            ${args_IN_FILE}
+        OUTPUT ${outputs}
+        # Remove #ifdef'ed code from the grammar source code
+        COMMAND sed -e "/^#if 0/,/^#endif/d" "${args_IN_FILE}" > "${args_OUT_DIR}/${in_file_pure_name}"
+        COMMAND antlr3 "${args_OUT_DIR}/${in_file_pure_name}"
+        # We replace many local `ExceptionBaseType* ex` variables with a single function-scope one.
+        # Because we add such a variable to every function, and because `ExceptionBaseType` is not a global
+        # name, we also add a global typedef to avoid compilation errors.
+        COMMAND sed -i -e "/^.*On :.*$/d" "${args_OUT_DIR}/${stem}Lexer.hpp"
+        COMMAND sed -i -e "/^.*On :.*$/d" "${args_OUT_DIR}/${stem}Lexer.cpp"
+        COMMAND sed -i -e "/^.*On :.*$/d" "${args_OUT_DIR}/${stem}Parser.hpp"
+        COMMAND sed -i
+            -e "s/^\\( *\\)\\(ImplTraits::CommonTokenType\\* [a-zA-Z0-9_]* = NULL;\\)$/\\1const \\2/"
+            -e "/^.*On :.*$/d"
+            -e "1i using ExceptionBaseType = int;"
+            -e "s/^{/{ ExceptionBaseType\\* ex = nullptr;/; s/ExceptionBaseType\\* ex = new/ex = new/; s/exceptions::syntax_exception e/exceptions::syntax_exception\\& e/"
+            "${args_OUT_DIR}/${stem}Parser.cpp"
+        VERBATIM)

-scan_scylla_source_directories(
-        VAR SCYLLA_GEN_SOURCE_FILES
-        RECURSIVE
-        PATHS build/${BUILD_TYPE}/gen)
+    add_custom_target(${args_TARGET}
+        DEPENDS ${outputs})

-set(SCYLLA_SOURCE_FILES
-        ${SCYLLA_ROOT_SOURCE_FILES}
-        ${SCYLLA_GEN_SOURCE_FILES}
-        ${SCYLLA_SUB_SOURCE_FILES})
+    set(${args_VAR} ${outputs} PARENT_SCOPE)
+endfunction()
+
+set(antlr3_grammar_files
+    cql3/Cql.g
+    alternator/expressions.g)
+
+set(antlr3_gen_files)
+
+foreach(f ${antlr3_grammar_files})
+    get_filename_component(grammar_file_name "${f}" NAME_WE)
+    get_filename_component(f_dir "${f}" DIRECTORY)
+    scylla_generate_antlr3(
+        TARGET scylla_antlr3_gen_${grammar_file_name}
+        VAR scylla_antlr3_gen_${grammar_file_name}_files
+        IN_FILE ${f}
+        OUT_DIR ${scylla_gen_build_dir}/${f_dir})
+    list(APPEND antlr3_gen_files "${scylla_antlr3_gen_${grammar_file_name}_files}")
+endforeach()
+
+# Generate C++ sources from ragel grammar files
+seastar_generate_ragel(
+    TARGET scylla_ragel_gen_protocol_parser
+    VAR scylla_ragel_gen_protocol_parser_file
+    IN_FILE redis/protocol_parser.rl
+    OUT_FILE ${scylla_gen_build_dir}/redis/protocol_parser.hh)
+
+# Generate C++ sources from Swagger definitions
+set(swagger_files
+    api/api-doc/cache_service.json
+    api/api-doc/collectd.json
+    api/api-doc/column_family.json
+    api/api-doc/commitlog.json
+    api/api-doc/compaction_manager.json
+    api/api-doc/config.json
+    api/api-doc/endpoint_snitch_info.json
+    api/api-doc/error_injection.json
+    api/api-doc/failure_detector.json
+    api/api-doc/gossiper.json
+    api/api-doc/hinted_handoff.json
+    api/api-doc/lsa.json
+    api/api-doc/messaging_service.json
+    api/api-doc/storage_proxy.json
+    api/api-doc/storage_service.json
+    api/api-doc/stream_manager.json
+    api/api-doc/system.json
+    api/api-doc/utils.json)
+
+set(swagger_gen_files)
+
+foreach(f ${swagger_files})
+    get_filename_component(fname "${f}" NAME_WE)
+    get_filename_component(dir "${f}" DIRECTORY)
+    seastar_generate_swagger(
+        TARGET scylla_swagger_gen_${fname}
+        VAR scylla_swagger_gen_${fname}_files
+        IN_FILE "${f}"
+        OUT_DIR "${scylla_gen_build_dir}/${dir}")
+    list(APPEND swagger_gen_files "${scylla_swagger_gen_${fname}_files}")
+endforeach()
+
+# Create C++ bindings for IDL serializers
+function(scylla_generate_idl_serializer)
+    set(one_value_args TARGET VAR IN_FILE OUT_FILE)
+    cmake_parse_arguments(args "" "${one_value_args}" "" ${ARGN})
+    get_filename_component(out_dir ${args_OUT_FILE} DIRECTORY)
+    set(idl_compiler "${CMAKE_SOURCE_DIR}/idl-compiler.py")
+
+    find_package(Python3 COMPONENTS Interpreter)
+
+    add_custom_command(
+        DEPENDS
+            ${args_IN_FILE}
+            ${idl_compiler}
+        OUTPUT ${args_OUT_FILE}
+        COMMAND ${CMAKE_COMMAND} -E make_directory ${out_dir}
+        COMMAND Python3::Interpreter ${idl_compiler} --ns ser -f ${args_IN_FILE} -o ${args_OUT_FILE})
+
+    add_custom_target(${args_TARGET}
+        DEPENDS ${args_OUT_FILE})
+
+    set(${args_VAR} ${args_OUT_FILE} PARENT_SCOPE)
+endfunction()
+
+set(idl_serializers
+    idl/cache_temperature.idl.hh
+    idl/commitlog.idl.hh
+    idl/consistency_level.idl.hh
+    idl/frozen_mutation.idl.hh
+    idl/frozen_schema.idl.hh
+    idl/gossip_digest.idl.hh
+    idl/idl_test.idl.hh
+    idl/keys.idl.hh
+    idl/messaging_service.idl.hh
+    idl/mutation.idl.hh
+    idl/paging_state.idl.hh
+    idl/partition_checksum.idl.hh
+    idl/paxos.idl.hh
+    idl/query.idl.hh
+    idl/range.idl.hh
+    idl/read_command.idl.hh
+    idl/reconcilable_result.idl.hh
+    idl/replay_position.idl.hh
+    idl/result.idl.hh
+    idl/ring_position.idl.hh
+    idl/streaming.idl.hh
+    idl/token.idl.hh
+    idl/tracing.idl.hh
+    idl/truncation_record.idl.hh
+    idl/uuid.idl.hh
+    idl/view.idl.hh)
+
+set(idl_gen_files)
+
+foreach(f ${idl_serializers})
+    get_filename_component(idl_name "${f}" NAME)
+    get_filename_component(idl_target "${idl_name}" NAME_WE)
+    get_filename_component(idl_dir "${f}" DIRECTORY)
+    string(REPLACE ".idl.hh" ".dist.hh" idl_out_hdr_name "${idl_name}")
+    scylla_generate_idl_serializer(
+        TARGET scylla_idl_gen_${idl_target}
+        VAR scylla_idl_gen_${idl_target}_files
+        IN_FILE ${f}
+        OUT_FILE ${scylla_gen_build_dir}/${idl_dir}/${idl_out_hdr_name})
+    list(APPEND idl_gen_files "${scylla_idl_gen_${idl_target}_files}")
+endforeach()
+
+set(scylla_sources
+    absl-flat_hash_map.cc
+    alternator/auth.cc
+    alternator/base64.cc
+    alternator/conditions.cc
+    alternator/executor.cc
+    alternator/expressions.cc
+    alternator/serialization.cc
+    alternator/server.cc
+    alternator/stats.cc
+    alternator/streams.cc
+    api/api.cc
+    api/cache_service.cc
+    api/collectd.cc
+    api/column_family.cc
+    api/commitlog.cc
+    api/compaction_manager.cc
+    api/config.cc
+    api/endpoint_snitch.cc
+    api/error_injection.cc
+    api/failure_detector.cc
+    api/gossiper.cc
+    api/hinted_handoff.cc
+    api/lsa.cc
+    api/messaging_service.cc
+    api/storage_proxy.cc
+    api/storage_service.cc
+    api/stream_manager.cc
+    api/system.cc
+    atomic_cell.cc
+    auth/allow_all_authenticator.cc
+    auth/allow_all_authorizer.cc
+    auth/authenticated_user.cc
+    auth/authentication_options.cc
+    auth/authenticator.cc
+    auth/common.cc
+    auth/default_authorizer.cc
+    auth/password_authenticator.cc
+    auth/passwords.cc
+    auth/permission.cc
+    auth/permissions_cache.cc
+    auth/resource.cc
+    auth/role_or_anonymous.cc
+    auth/roles-metadata.cc
+    auth/sasl_challenge.cc
+    auth/service.cc
+    auth/standard_role_manager.cc
+    auth/transitional.cc
+    bytes.cc
+    canonical_mutation.cc
+    cdc/cdc_partitioner.cc
+    cdc/generation.cc
+    cdc/log.cc
+    cdc/metadata.cc
+    cdc/split.cc
+    clocks-impl.cc
+    collection_mutation.cc
+    compress.cc
+    connection_notifier.cc
+    converting_mutation_partition_applier.cc
+    counters.cc
+    cql3/abstract_marker.cc
+    cql3/attributes.cc
+    cql3/cf_name.cc
+    cql3/column_condition.cc
+    cql3/column_identifier.cc
+    cql3/column_specification.cc
+    cql3/constants.cc
+    cql3/cql3_type.cc
+    cql3/expr/expression.cc
+    cql3/functions/aggregate_fcts.cc
+    cql3/functions/castas_fcts.cc
+    cql3/functions/error_injection_fcts.cc
+    cql3/functions/functions.cc
+    cql3/functions/user_function.cc
+    cql3/index_name.cc
+    cql3/keyspace_element_name.cc
+    cql3/lists.cc
+    cql3/maps.cc
+    cql3/operation.cc
+    cql3/query_options.cc
+    cql3/query_processor.cc
+    cql3/relation.cc
+    cql3/restrictions/statement_restrictions.cc
+    cql3/result_set.cc
+    cql3/role_name.cc
+    cql3/selection/abstract_function_selector.cc
+    cql3/selection/selectable.cc
+    cql3/selection/selection.cc
+    cql3/selection/selector.cc
+    cql3/selection/selector_factories.cc
+    cql3/selection/simple_selector.cc
+    cql3/sets.cc
+    cql3/single_column_relation.cc
+    cql3/statements/alter_keyspace_statement.cc
+    cql3/statements/alter_table_statement.cc
+    cql3/statements/alter_type_statement.cc
+    cql3/statements/alter_view_statement.cc
+    cql3/statements/authentication_statement.cc
+    cql3/statements/authorization_statement.cc
+    cql3/statements/batch_statement.cc
+    cql3/statements/cas_request.cc
+    cql3/statements/cf_prop_defs.cc
+    cql3/statements/cf_statement.cc
+    cql3/statements/create_function_statement.cc
+    cql3/statements/create_index_statement.cc
+    cql3/statements/create_keyspace_statement.cc
+    cql3/statements/create_table_statement.cc
+    cql3/statements/create_type_statement.cc
+    cql3/statements/create_view_statement.cc
+    cql3/statements/delete_statement.cc
+    cql3/statements/drop_function_statement.cc
+    cql3/statements/drop_index_statement.cc
+    cql3/statements/drop_keyspace_statement.cc
+    cql3/statements/drop_table_statement.cc
+    cql3/statements/drop_type_statement.cc
+    cql3/statements/drop_view_statement.cc
+    cql3/statements/function_statement.cc
+    cql3/statements/grant_statement.cc
+    cql3/statements/index_prop_defs.cc
+    cql3/statements/index_target.cc
+    cql3/statements/ks_prop_defs.cc
+    cql3/statements/list_permissions_statement.cc
+    cql3/statements/list_users_statement.cc
+    cql3/statements/modification_statement.cc
+    cql3/statements/permission_altering_statement.cc
+    cql3/statements/property_definitions.cc
+    cql3/statements/raw/parsed_statement.cc
+    cql3/statements/revoke_statement.cc
+    cql3/statements/role-management-statements.cc
+    cql3/statements/schema_altering_statement.cc
+    cql3/statements/select_statement.cc
+    cql3/statements/truncate_statement.cc
+    cql3/statements/update_statement.cc
+    cql3/statements/use_statement.cc
+    cql3/token_relation.cc
+    cql3/tuples.cc
+    cql3/type_json.cc
+    cql3/untyped_result_set.cc
+    cql3/update_parameters.cc
+    cql3/user_types.cc
+    cql3/ut_name.cc
+    cql3/util.cc
+    cql3/values.cc
+    cql3/variable_specifications.cc
+    data/cell.cc
+    database.cc
+    db/batchlog_manager.cc
+    db/commitlog/commitlog.cc
+    db/commitlog/commitlog_entry.cc
+    db/commitlog/commitlog_replayer.cc
+    db/config.cc
+    db/consistency_level.cc
+    db/cql_type_parser.cc
+    db/data_listeners.cc
+    db/extensions.cc
+    db/heat_load_balance.cc
+    db/hints/manager.cc
+    db/hints/resource_manager.cc
+    db/large_data_handler.cc
+    db/legacy_schema_migrator.cc
+    db/marshal/type_parser.cc
+    db/schema_tables.cc
+    db/size_estimates_virtual_reader.cc
+    db/snapshot-ctl.cc
+    db/sstables-format-selector.cc
+    db/system_distributed_keyspace.cc
+    db/system_keyspace.cc
+    db/view/row_locking.cc
+    db/view/view.cc
+    db/view/view_update_generator.cc
+    dht/boot_strapper.cc
+    dht/i_partitioner.cc
+    dht/murmur3_partitioner.cc
+    dht/range_streamer.cc
+    dht/token.cc
+    distributed_loader.cc
+    duration.cc
+    exceptions/exceptions.cc
+    flat_mutation_reader.cc
+    frozen_mutation.cc
+    frozen_schema.cc
+    gms/application_state.cc
+    gms/endpoint_state.cc
+    gms/failure_detector.cc
+    gms/feature_service.cc
+    gms/gossip_digest_ack.cc
+    gms/gossip_digest_ack2.cc
+    gms/gossip_digest_syn.cc
+    gms/gossiper.cc
+    gms/inet_address.cc
+    gms/version_generator.cc
+    gms/versioned_value.cc
+    hashers.cc
+    index/secondary_index.cc
+    index/secondary_index_manager.cc
+    init.cc
+    keys.cc
+    lister.cc
+    locator/abstract_replication_strategy.cc
+    locator/ec2_multi_region_snitch.cc
+    locator/ec2_snitch.cc
+    locator/everywhere_replication_strategy.cc
+    locator/gce_snitch.cc
+    locator/gossiping_property_file_snitch.cc
+    locator/local_strategy.cc
+    locator/network_topology_strategy.cc
+    locator/production_snitch_base.cc
+    locator/rack_inferring_snitch.cc
+    locator/simple_snitch.cc
+    locator/simple_strategy.cc
+    locator/snitch_base.cc
+    locator/token_metadata.cc
+    lua.cc
+    main.cc
+    memtable.cc
+    message/messaging_service.cc
+    multishard_mutation_query.cc
+    mutation.cc
+    raft/fsm.cc
+    raft/log.cc
+    raft/progress.cc
+    raft/raft.cc
+    raft/server.cc
+    mutation_fragment.cc
+    mutation_partition.cc
+    mutation_partition_serializer.cc
+    mutation_partition_view.cc
+    mutation_query.cc
+    mutation_reader.cc
+    mutation_writer/multishard_writer.cc
+    mutation_writer/shard_based_splitting_writer.cc
+    mutation_writer/timestamp_based_splitting_writer.cc
+    partition_slice_builder.cc
+    partition_version.cc
+    querier.cc
+    query-result-set.cc
+    query.cc
+    range_tombstone.cc
+    range_tombstone_list.cc
+    reader_concurrency_semaphore.cc
+    redis/abstract_command.cc
+    redis/command_factory.cc
+    redis/commands.cc
+    redis/keyspace_utils.cc
+    redis/lolwut.cc
+    redis/mutation_utils.cc
+    redis/options.cc
+    redis/query_processor.cc
+    redis/query_utils.cc
+    redis/server.cc
+    redis/service.cc
+    redis/stats.cc
+    repair/repair.cc
+    repair/row_level.cc
+    row_cache.cc
+    schema.cc
+    schema_mutations.cc
+    schema_registry.cc
+    service/client_state.cc
+    service/migration_manager.cc
+    service/migration_task.cc
+    service/misc_services.cc
+    service/pager/paging_state.cc
+    service/pager/query_pagers.cc
+    service/paxos/paxos_state.cc
+    service/paxos/prepare_response.cc
+    service/paxos/prepare_summary.cc
+    service/paxos/proposal.cc
+    service/priority_manager.cc
+    service/storage_proxy.cc
+    service/storage_service.cc
+    sstables/compaction.cc
+    sstables/compaction_manager.cc
+    sstables/compaction_strategy.cc
+    sstables/compress.cc
+    sstables/integrity_checked_file_impl.cc
+    sstables/kl/writer.cc
+    sstables/leveled_compaction_strategy.cc
+    sstables/m_format_read_helpers.cc
+    sstables/metadata_collector.cc
+    sstables/mp_row_consumer.cc
+    sstables/mx/writer.cc
+    sstables/partition.cc
+    sstables/prepended_input_stream.cc
+    sstables/random_access_reader.cc
+    sstables/size_tiered_compaction_strategy.cc
+    sstables/sstable_directory.cc
+    sstables/sstable_version.cc
+    sstables/sstables.cc
+    sstables/sstables_manager.cc
+    sstables/time_window_compaction_strategy.cc
+    sstables/writer.cc
+    streaming/progress_info.cc
+    streaming/session_info.cc
+    streaming/stream_coordinator.cc
+    streaming/stream_manager.cc
+    streaming/stream_plan.cc
+    streaming/stream_reason.cc
+    streaming/stream_receive_task.cc
+    streaming/stream_request.cc
+    streaming/stream_result_future.cc
+    streaming/stream_session.cc
+    streaming/stream_session_state.cc
+    streaming/stream_summary.cc
+    streaming/stream_task.cc
+    streaming/stream_transfer_task.cc
+    table.cc
+    table_helper.cc
+    thrift/controller.cc
+    thrift/handler.cc
+    thrift/server.cc
+    thrift/thrift_validation.cc
+    timeout_config.cc
+    tracing/trace_keyspace_helper.cc
+    tracing/trace_state.cc
+    tracing/traced_file.cc
+    tracing/tracing.cc
+    tracing/tracing_backend_registry.cc
+    transport/controller.cc
+    transport/cql_protocol_extension.cc
+    transport/event.cc
+    transport/event_notifier.cc
+    transport/messages/result_message.cc
+    transport/server.cc
+    types.cc
+    unimplemented.cc
+    utils/UUID_gen.cc
+    utils/arch/powerpc/crc32-vpmsum/crc32_wrapper.cc
+    utils/array-search.cc
+    utils/ascii.cc
+    utils/big_decimal.cc
+    utils/bloom_calculations.cc
+    utils/bloom_filter.cc
+    utils/buffer_input_stream.cc
+    utils/build_id.cc
+    utils/config_file.cc
+    utils/directories.cc
+    utils/disk-error-handler.cc
+    utils/dynamic_bitset.cc
+    utils/error_injection.cc
+    utils/exceptions.cc
+    utils/file_lock.cc
+    utils/generation-number.cc
+    utils/gz/crc_combine.cc
+    utils/human_readable.cc
+    utils/i_filter.cc
+    utils/large_bitset.cc
+    utils/like_matcher.cc
+    utils/limiting_data_source.cc
+    utils/logalloc.cc
+    utils/managed_bytes.cc
+    utils/multiprecision_int.cc
+    utils/murmur_hash.cc
+    utils/rate_limiter.cc
+    utils/rjson.cc
+    utils/runtime.cc
+    utils/updateable_value.cc
+    utils/utf8.cc
+    utils/uuid.cc
+    validation.cc
+    vint-serialization.cc
+    zstd.cc
+    release.cc)
+
+set(scylla_gen_sources
+    "${scylla_thrift_gen_cassandra_files}"
+    "${scylla_ragel_gen_protocol_parser_file}"
+    "${swagger_gen_files}"
+    "${idl_gen_files}"
+    "${antlr3_gen_files}")

 add_executable(scylla
-        ${SEASTAR_SOURCE_FILES}
-        ${SCYLLA_SOURCE_FILES})
+    ${scylla_sources}
+    ${scylla_gen_sources})

-# If the Seastar pkg-config information is available, append to the default flags.
-#
-# For ease of browsing the source code, we always pretend that DPDK is enabled.
-target_compile_options(scylla PUBLIC
-        -std=gnu++20
-        -DHAVE_DPDK
-        -DHAVE_HWLOC
-        "${SEASTAR_CFLAGS}")
+target_link_libraries(scylla PRIVATE
+    seastar
+    # Boost dependencies
+    Boost::filesystem
+    Boost::program_options
+    Boost::system
+    Boost::thread
+    Boost::regex
+    Boost::headers
+    # Abseil libs
+    absl::hashtablez_sampler
+    absl::raw_hash_set
+    absl::synchronization
+    absl::graphcycles_internal
+    absl::stacktrace
+    absl::symbolize
+    absl::debugging_internal
+    absl::demangle_internal
+    absl::time
+    absl::time_zone
+    absl::int128
+    absl::city
+    absl::hash
+    absl::malloc_internal
+    absl::spinlock_wait
+    absl::base
+    absl::dynamic_annotations
+    absl::raw_logging_internal
+    absl::exponential_biased
+    absl::throw_delegate
+    # System libs
+    ZLIB::ZLIB
+    ICU::uc
+    systemd
+    zstd
+    snappy
+    ${LUA_LIBRARIES}
+    thrift
+    crypt)

-# The order matters here: prefer the "static" DPDK directories to any dynamic paths from pkg-config. Some files are only
-# available dynamically, though.
-target_include_directories(scylla PUBLIC
-        .
-        ${SEASTAR_DPDK_INCLUDE_DIRS}
-        ${SEASTAR_INCLUDE_DIRS}
-        ${Boost_INCLUDE_DIRS}
-        xxhash
-        libdeflate
-        abseil
-        build/${BUILD_TYPE}/gen)
+target_link_libraries(scylla PRIVATE
+    -Wl,--build-id=sha1 # Force SHA1 build-id generation
+    # TODO: Use lld linker if it's available, otherwise gold, else bfd
+    -fuse-ld=lld)
+# TODO: patch dynamic linker to match configure.py behavior
+
+target_compile_options(scylla PRIVATE
+    -std=gnu++20
+    -fcoroutines # TODO: Clang does not have this flag, adjust to both variants
+    ${target_arch_flag})
+# Hacks needed to expose internal APIs for xxhash dependencies
+target_compile_definitions(scylla PRIVATE XXH_PRIVATE_API HAVE_LZ4_COMPRESS_DEFAULT)
+
+target_include_directories(scylla PRIVATE
+    "${CMAKE_CURRENT_SOURCE_DIR}"
+    libdeflate
+    abseil
+    "${scylla_gen_build_dir}")
+
+###
+### Create crc_combine_table helper executable.
+### Use it to generate crc_combine_table.cc to be used in scylla at build time.
+###
+add_executable(crc_combine_table utils/gz/gen_crc_combine_table.cc)
+target_link_libraries(crc_combine_table PRIVATE seastar)
+target_include_directories(crc_combine_table PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}")
+target_compile_options(crc_combine_table PRIVATE
+    -std=gnu++20
+    -fcoroutines
+    ${target_arch_flag})
+add_dependencies(scylla crc_combine_table)
+
+# Generate an additional source file at build time that is needed for Scylla compilation
+add_custom_command(OUTPUT "${scylla_gen_build_dir}/utils/gz/crc_combine_table.cc"
+    COMMAND $<TARGET_FILE:crc_combine_table> > "${scylla_gen_build_dir}/utils/gz/crc_combine_table.cc"
+    DEPENDS crc_combine_table)
+target_sources(scylla PRIVATE "${scylla_gen_build_dir}/utils/gz/crc_combine_table.cc")
+
+###
+### Generate version file and supply appropriate compile definitions for release.cc
+###
+execute_process(COMMAND ${CMAKE_SOURCE_DIR}/SCYLLA-VERSION-GEN RESULT_VARIABLE scylla_version_gen_res)
+if(scylla_version_gen_res)
+    message(SEND_ERROR "Version file generation failed. Return code: ${scylla_version_gen_res}")
+endif()
+
+file(READ build/SCYLLA-VERSION-FILE scylla_version)
+string(STRIP "${scylla_version}" scylla_version)
+
+file(READ build/SCYLLA-RELEASE-FILE scylla_release)
+string(STRIP "${scylla_release}" scylla_release)
+
+get_property(release_cdefs SOURCE "${CMAKE_SOURCE_DIR}/release.cc" PROPERTY COMPILE_DEFINITIONS)
+list(APPEND release_cdefs "SCYLLA_VERSION=\"${scylla_version}\"" "SCYLLA_RELEASE=\"${scylla_release}\"")
+set_source_files_properties("${CMAKE_SOURCE_DIR}/release.cc" PROPERTIES COMPILE_DEFINITIONS "${release_cdefs}")
+
+###
+### Custom command for building libdeflate. Link the library to scylla.
+###
+set(libdeflate_lib "${scylla_build_dir}/libdeflate/libdeflate.a")
+add_custom_command(OUTPUT "${libdeflate_lib}"
+    COMMAND make -C libdeflate
+        BUILD_DIR=../build/${BUILD_TYPE}/libdeflate/
+        CC=${CMAKE_C_COMPILER}
+        "CFLAGS=${target_arch_flag}"
+        ../build/${BUILD_TYPE}/libdeflate//libdeflate.a) # Two backslashes are important!
+# Hack to force generating custom command to produce libdeflate.a
+add_custom_target(libdeflate DEPENDS "${libdeflate_lib}")
+target_link_libraries(scylla PRIVATE "${libdeflate_lib}")
+
+# TODO: create cmake/ directory and move utilities (generate functions etc) there
+# TODO: Build tests if BUILD_TESTING=on (using CTest module)
--- a/2
+++ b/2
@@ -1,7 +1,7 @@
 #!/bin/sh

 PRODUCT=scylla
-VERSION=4.3.7
+VERSION=4.4.dev

 if test -f version
 then
--- a/alternator/auth.cc
+++ b/alternator/auth.cc
@@ -129,8 +129,7 @@ future<std::string> get_key_from_roles(cql3::query_processor& qp, std::string us
            auth::meta::roles_table::qualified_name, auth::meta::roles_table::role_col_name);

    auto cl = auth::password_authenticator::consistency_for_user(username);
-    auto& timeout = auth::internal_distributed_timeout_config();
-    return qp.execute_internal(query, cl, timeout, {sstring(username)}, true).then_wrapped([username = std::move(username)] (future<::shared_ptr<cql3::untyped_result_set>> f) {
+    return qp.execute_internal(query, cl, auth::internal_distributed_query_state(), {sstring(username)}, true).then_wrapped([username = std::move(username)] (future<::shared_ptr<cql3::untyped_result_set>> f) {
        auto res = f.get0();
        auto salted_hash = std::optional<sstring>();
        if (res->empty()) {
--- a/alternator/conditions.cc
+++ b/alternator/conditions.cc
@@ -123,7 +123,7 @@ struct rjson_engaged_ptr_comp {
 // as internally they're stored in an array, and the order of elements is
 // not important in set equality. See issue #5021
 static bool check_EQ_for_sets(const rjson::value& set1, const rjson::value& set2) {
-    if (!set1.IsArray() || !set2.IsArray() || set1.Size() != set2.Size()) {
+    if (set1.Size() != set2.Size()) {
        return false;
    }
    std::set<const rjson::value*, rjson_engaged_ptr_comp> set1_raw;
@@ -137,107 +137,45 @@ static bool check_EQ_for_sets(const rjson::value& set1, const rjson::value& set2
    }
    return true;
 }
-// Moreover, the JSON being compared can be a nested document with outer
-// layers of lists and maps and some inner set - and we need to get to that
-// inner set to compare it correctly with check_EQ_for_sets() (issue #8514).
-static bool check_EQ(const rjson::value* v1, const rjson::value& v2);
-static bool check_EQ_for_lists(const rjson::value& list1, const rjson::value& list2) {
-    if (!list1.IsArray() || !list2.IsArray() || list1.Size() != list2.Size()) {
-        return false;
-    }
-    auto it1 = list1.Begin();
-    auto it2 = list2.Begin();
-    while (it1 != list1.End()) {
-        // Note: Alternator limits an item's depth (rjson::parse() limits
-        // it to around 37 levels), so this recursion is safe.
-        if (!check_EQ(&*it1, *it2)) {
-            return false;
-        }
-        ++it1;
-        ++it2;
-    }
-    return true;
-}
-static bool check_EQ_for_maps(const rjson::value& list1, const rjson::value& list2) {
-    if (!list1.IsObject() || !list2.IsObject() || list1.MemberCount() != list2.MemberCount()) {
-        return false;
-    }
-    for (auto it1 = list1.MemberBegin(); it1 != list1.MemberEnd(); ++it1) {
-        auto it2 = list2.FindMember(it1->name);
-        if (it2 == list2.MemberEnd() || !check_EQ(&it1->value, it2->value)) {
-            return false;
-        }
-    }
-    return true;
-}

 // Check if two JSON-encoded values match with the EQ relation
 static bool check_EQ(const rjson::value* v1, const rjson::value& v2) {
-    if (v1 && v1->IsObject() && v1->MemberCount() == 1 && v2.IsObject() && v2.MemberCount() == 1) {
-        auto it1 = v1->MemberBegin();
-        auto it2 = v2.MemberBegin();
-        if (it1->name != it2->name) {
-            return false;
-        }
-        if (it1->name == "SS" || it1->name == "NS" || it1->name == "BS") {
-            return check_EQ_for_sets(it1->value, it2->value);
-        } else if(it1->name == "L") {
-            return check_EQ_for_lists(it1->value, it2->value);
-        } else if(it1->name == "M") {
-            return check_EQ_for_maps(it1->value, it2->value);
-        } else {
-            // Other, non-nested types (number, string, etc.) can be compared
-            // literally, comparing their JSON representation.
-            return it1->value == it2->value;
-        }
-    } else {
-        // If v1 and/or v2 are missing (IsNull()) the result should be false.
-        // In the unlikely case that the object is malformed (issue #8070),
-        // let's also return false.
+    if (!v1) {
        return false;
    }
+    if (v1->IsObject() && v1->MemberCount() == 1 && v2.IsObject() && v2.MemberCount() == 1) {
+        auto it1 = v1->MemberBegin();
+        auto it2 = v2.MemberBegin();
+        if ((it1->name == "SS" && it2->name == "SS") || (it1->name == "NS" && it2->name == "NS") || (it1->name == "BS" && it2->name == "BS")) {
+            return check_EQ_for_sets(it1->value, it2->value);
+        }
+    }
+    return *v1 == v2;
 }

 // Check if two JSON-encoded values match with the NE relation
 static bool check_NE(const rjson::value* v1, const rjson::value& v2) {
-    return !check_EQ(v1, v2);
+    return !v1 || *v1 != v2; // null is unequal to anything.
 }

 // Check if two JSON-encoded values match with the BEGINS_WITH relation
-bool check_BEGINS_WITH(const rjson::value* v1, const rjson::value& v2,
-                       bool v1_from_query, bool v2_from_query) {
-    bool bad = false;
-    if (!v1 || !v1->IsObject() || v1->MemberCount() != 1) {
-        if (v1_from_query) {
-            throw api_error::validation("begins_with() encountered malformed argument");
-        } else {
-            bad = true;
-        }
-    } else if (v1->MemberBegin()->name != "S" && v1->MemberBegin()->name != "B") {
-        if (v1_from_query) {
-            throw api_error::validation(format("begins_with supports only string or binary type, got: {}", *v1));
-        } else {
-            bad = true;
-        }
-    }
+static bool check_BEGINS_WITH(const rjson::value* v1, const rjson::value& v2) {
+    // BEGINS_WITH requires that its single operand (v2) be a string or
+    // binary - otherwise it's a validation error. However, problems with
+    // the stored attribute (v1) will just return false (no match).
    if (!v2.IsObject() || v2.MemberCount() != 1) {
-        if (v2_from_query) {
-            throw api_error::validation("begins_with() encountered malformed argument");
-        } else {
-            bad = true;
-        }
-    } else if (v2.MemberBegin()->name != "S" && v2.MemberBegin()->name != "B") {
-        if (v2_from_query) {
-            throw api_error::validation(format("begins_with() supports only string or binary type, got: {}", v2));
-        } else {
-            bad = true;
-        }
+        throw api_error::validation(format("BEGINS_WITH operator encountered malformed AttributeValue: {}", v2));
    }
-    if (bad) {
+    auto it2 = v2.MemberBegin();
+    if (it2->name != "S" && it2->name != "B") {
+        throw api_error::validation(format("BEGINS_WITH operator requires String or Binary type in AttributeValue, got {}", it2->name));
+    }
+
+
+    if (!v1 || !v1->IsObject() || v1->MemberCount() != 1) {
        return false;
    }
    auto it1 = v1->MemberBegin();
-    auto it2 = v2.MemberBegin();
    if (it1->name != it2->name) {
        return false;
    }
@@ -341,40 +279,24 @@ static bool check_NOT_NULL(const rjson::value* val) {
    return val != nullptr;
 }

-// Only types S, N or B (string, number or bytes) may be compared by the
-// various comparion operators - lt, le, gt, ge, and between.
-// Note that in particular, if the value is missing (v->IsNull()), this
-// check returns false.
-static bool check_comparable_type(const rjson::value& v) {
-    if (!v.IsObject() || v.MemberCount() != 1) {
-        return false;
-    }
-    const rjson::value& type = v.MemberBegin()->name;
-    return type == "S" || type == "N" || type == "B";
-}
-
 // Check if two JSON-encoded values match with cmp.
 template <typename Comparator>
-bool check_compare(const rjson::value* v1, const rjson::value& v2, const Comparator& cmp,
-                   bool v1_from_query, bool v2_from_query) {
-    bool bad = false;
-    if (!v1 || !check_comparable_type(*v1)) {
-        if (v1_from_query) {
-            throw api_error::validation(format("{} allow only the types String, Number, or Binary", cmp.diagnostic));
-        }
-        bad = true;
+bool check_compare(const rjson::value* v1, const rjson::value& v2, const Comparator& cmp) {
+    if (!v2.IsObject() || v2.MemberCount() != 1) {
+        throw api_error::validation(
+                        format("{} requires a single AttributeValue of type String, Number, or Binary",
+                               cmp.diagnostic));
    }
-    if (!check_comparable_type(v2)) {
-        if (v2_from_query) {
-            throw api_error::validation(format("{} allow only the types String, Number, or Binary", cmp.diagnostic));
-        }
-        bad = true;
+    const auto& kv2 = *v2.MemberBegin();
+    if (kv2.name != "S" && kv2.name != "N" && kv2.name != "B") {
+        throw api_error::validation(
+                        format("{} requires a single AttributeValue of type String, Number, or Binary",
+                               cmp.diagnostic));
    }
-    if (bad) {
+    if (!v1 || !v1->IsObject() || v1->MemberCount() != 1) {
        return false;
    }
    const auto& kv1 = *v1->MemberBegin();
-    const auto& kv2 = *v2.MemberBegin();
    if (kv1.name != kv2.name) {
        return false;
    }
@@ -388,8 +310,7 @@ bool check_compare(const rjson::value* v1, const rjson::value& v2, const Compara
    if (kv1.name == "B") {
        return cmp(base64_decode(kv1.value), base64_decode(kv2.value));
    }
-    // cannot reach here, as check_comparable_type() verifies the type is one
-    // of the above options.
+    clogger.error("check_compare panic: LHS type equals RHS type, but one is in {N,S,B} while the other isn't");
    return false;
 }

@@ -420,71 +341,56 @@ struct cmp_gt {
    static constexpr const char* diagnostic = "GT operator";
 };

-// True if v is between lb and ub, inclusive.  Throws or returns false
-// (depending on bounds_from_query parameter) if lb > ub.
+// True if v is between lb and ub, inclusive.  Throws if lb > ub.
 template <typename T>
-static bool check_BETWEEN(const T& v, const T& lb, const T& ub, bool bounds_from_query) {
+static bool check_BETWEEN(const T& v, const T& lb, const T& ub) {
    if (cmp_lt()(ub, lb)) {
-        if (bounds_from_query) {
-            throw api_error::validation(
-                format("BETWEEN operator requires lower_bound <= upper_bound, but {} > {}", lb, ub));
-        } else {
-            return false;
-        }
+        throw api_error::validation(
+                        format("BETWEEN operator requires lower_bound <= upper_bound, but {} > {}", lb, ub));
    }
    return cmp_ge()(v, lb) && cmp_le()(v, ub);
 }

-static bool check_BETWEEN(const rjson::value* v, const rjson::value& lb, const rjson::value& ub,
-                          bool v_from_query, bool lb_from_query, bool ub_from_query) {
-    if ((v && v_from_query && !check_comparable_type(*v)) ||
-        (lb_from_query && !check_comparable_type(lb)) ||
-        (ub_from_query && !check_comparable_type(ub))) {
-        throw api_error::validation("between allow only the types String, Number, or Binary");
-
-    }
-    if (!v || !v->IsObject() || v->MemberCount() != 1 ||
-        !lb.IsObject() || lb.MemberCount() != 1 ||
-        !ub.IsObject() || ub.MemberCount() != 1) {
+static bool check_BETWEEN(const rjson::value* v, const rjson::value& lb, const rjson::value& ub) {
+    if (!v) {
        return false;
    }
+    if (!v->IsObject() || v->MemberCount() != 1) {
+        throw api_error::validation(format("BETWEEN operator encountered malformed AttributeValue: {}", *v));
+    }
+    if (!lb.IsObject() || lb.MemberCount() != 1) {
+        throw api_error::validation(format("BETWEEN operator encountered malformed AttributeValue: {}", lb));
+    }
+    if (!ub.IsObject() || ub.MemberCount() != 1) {
+        throw api_error::validation(format("BETWEEN operator encountered malformed AttributeValue: {}", ub));
+    }

    const auto& kv_v = *v->MemberBegin();
    const auto& kv_lb = *lb.MemberBegin();
    const auto& kv_ub = *ub.MemberBegin();
-    bool bounds_from_query = lb_from_query && ub_from_query;
    if (kv_lb.name != kv_ub.name) {
-        if (bounds_from_query) {
-           throw api_error::validation(
+        throw api_error::validation(
                format("BETWEEN operator requires the same type for lower and upper bound; instead got {} and {}",
                       kv_lb.name, kv_ub.name));
-        } else {
-            return false;
-        }
    }
    if (kv_v.name != kv_lb.name) { // Cannot compare different types, so v is NOT between lb and ub.
        return false;
    }
    if (kv_v.name == "N") {
        const char* diag = "BETWEEN operator";
-        return check_BETWEEN(unwrap_number(*v, diag), unwrap_number(lb, diag), unwrap_number(ub, diag), bounds_from_query);
+        return check_BETWEEN(unwrap_number(*v, diag), unwrap_number(lb, diag), unwrap_number(ub, diag));
    }
    if (kv_v.name == "S") {
        return check_BETWEEN(std::string_view(kv_v.value.GetString(), kv_v.value.GetStringLength()),
                             std::string_view(kv_lb.value.GetString(), kv_lb.value.GetStringLength()),
-                             std::string_view(kv_ub.value.GetString(), kv_ub.value.GetStringLength()),
-                             bounds_from_query);
+                             std::string_view(kv_ub.value.GetString(), kv_ub.value.GetStringLength()));
    }
    if (kv_v.name == "B") {
-        return check_BETWEEN(base64_decode(kv_v.value), base64_decode(kv_lb.value), base64_decode(kv_ub.value), bounds_from_query);
+        return check_BETWEEN(base64_decode(kv_v.value), base64_decode(kv_lb.value), base64_decode(kv_ub.value));
    }
-    if (v_from_query) {
-        throw api_error::validation(
-            format("BETWEEN operator requires AttributeValueList elements to be of type String, Number, or Binary; instead got {}",
+    throw api_error::validation(
+        format("BETWEEN operator requires AttributeValueList elements to be of type String, Number, or Binary; instead got {}",
               kv_lb.name));
-    } else {
-        return false;
-    }
 }

 // Verify one Expect condition on one attribute (whose content is "got")
@@ -531,19 +437,19 @@ static bool verify_expected_one(const rjson::value& condition, const rjson::valu
            return check_NE(got, (*attribute_value_list)[0]);
        case comparison_operator_type::LT:
            verify_operand_count(attribute_value_list, exact_size(1), *comparison_operator);
-            return check_compare(got, (*attribute_value_list)[0], cmp_lt{}, false, true);
+            return check_compare(got, (*attribute_value_list)[0], cmp_lt{});
        case comparison_operator_type::LE:
            verify_operand_count(attribute_value_list, exact_size(1), *comparison_operator);
-            return check_compare(got, (*attribute_value_list)[0], cmp_le{}, false, true);
+            return check_compare(got, (*attribute_value_list)[0], cmp_le{});
        case comparison_operator_type::GT:
            verify_operand_count(attribute_value_list, exact_size(1), *comparison_operator);
-            return check_compare(got, (*attribute_value_list)[0], cmp_gt{}, false, true);
+            return check_compare(got, (*attribute_value_list)[0], cmp_gt{});
        case comparison_operator_type::GE:
            verify_operand_count(attribute_value_list, exact_size(1), *comparison_operator);
-            return check_compare(got, (*attribute_value_list)[0], cmp_ge{}, false, true);
+            return check_compare(got, (*attribute_value_list)[0], cmp_ge{});
        case comparison_operator_type::BEGINS_WITH:
            verify_operand_count(attribute_value_list, exact_size(1), *comparison_operator);
-            return check_BEGINS_WITH(got, (*attribute_value_list)[0], false, true);
+            return check_BEGINS_WITH(got, (*attribute_value_list)[0]);
        case comparison_operator_type::IN:
            verify_operand_count(attribute_value_list, nonempty(), *comparison_operator);
            return check_IN(got, *attribute_value_list);
@@ -555,8 +461,7 @@ static bool verify_expected_one(const rjson::value& condition, const rjson::valu
            return check_NOT_NULL(got);
        case comparison_operator_type::BETWEEN:
            verify_operand_count(attribute_value_list, exact_size(2), *comparison_operator);
-            return check_BETWEEN(got, (*attribute_value_list)[0], (*attribute_value_list)[1],
-                                 false, true, true);
+            return check_BETWEEN(got, (*attribute_value_list)[0], (*attribute_value_list)[1]);
        case comparison_operator_type::CONTAINS:
            {
                verify_operand_count(attribute_value_list, exact_size(1), *comparison_operator);
@@ -668,8 +573,7 @@ static bool calculate_primitive_condition(const parsed::primitive_condition& con
            // Shouldn't happen unless we have a bug in the parser
            throw std::logic_error(format("Wrong number of values {} in BETWEEN primitive_condition", cond._values.size()));
        }
-        return check_BETWEEN(&calculated_values[0], calculated_values[1], calculated_values[2],
-                             cond._values[0].is_constant(), cond._values[1].is_constant(), cond._values[2].is_constant());
+        return check_BETWEEN(&calculated_values[0], calculated_values[1], calculated_values[2]);
    case parsed::primitive_condition::type::IN:
        return check_IN(calculated_values);
    case parsed::primitive_condition::type::VALUE:
@@ -700,17 +604,13 @@ static bool calculate_primitive_condition(const parsed::primitive_condition& con
    case parsed::primitive_condition::type::NE:
        return check_NE(&calculated_values[0], calculated_values[1]);
    case parsed::primitive_condition::type::GT:
-        return check_compare(&calculated_values[0], calculated_values[1], cmp_gt{},
-            cond._values[0].is_constant(), cond._values[1].is_constant());
+        return check_compare(&calculated_values[0], calculated_values[1], cmp_gt{});
    case parsed::primitive_condition::type::GE:
-        return check_compare(&calculated_values[0], calculated_values[1], cmp_ge{},
-            cond._values[0].is_constant(), cond._values[1].is_constant());
+        return check_compare(&calculated_values[0], calculated_values[1], cmp_ge{});
    case parsed::primitive_condition::type::LT:
-        return check_compare(&calculated_values[0], calculated_values[1], cmp_lt{},
-            cond._values[0].is_constant(), cond._values[1].is_constant());
+        return check_compare(&calculated_values[0], calculated_values[1], cmp_lt{});
    case parsed::primitive_condition::type::LE:
-        return check_compare(&calculated_values[0], calculated_values[1], cmp_le{},
-            cond._values[0].is_constant(), cond._values[1].is_constant());
+        return check_compare(&calculated_values[0], calculated_values[1], cmp_le{});
    default:
        // Shouldn't happen unless we have a bug in the parser
        throw std::logic_error(format("Unknown type {} in primitive_condition object", (int)(cond._op)));
--- a/alternator/conditions.hh
+++ b/alternator/conditions.hh
@@ -52,7 +52,6 @@ bool verify_expected(const rjson::value& req, const rjson::value* previous_item)
 bool verify_condition(const rjson::value& condition, bool require_all, const rjson::value* previous_item);

 bool check_CONTAINS(const rjson::value* v1, const rjson::value& v2);
-bool check_BEGINS_WITH(const rjson::value* v1, const rjson::value& v2, bool v1_from_query, bool v2_from_query);

 bool verify_condition_expression(
        const parsed::condition_expression& condition_expression,
--- a/alternator/executor.cc
+++ b/alternator/executor.cc
@@ -404,6 +404,7 @@ future<executor::request_return_type> executor::describe_table(client_state& cli
    // returned.
    rjson::set(table_description, "TableStatus", "ACTIVE");
    rjson::set(table_description, "TableArn", generate_arn_for_table(*schema));
+    rjson::set(table_description, "TableId", rjson::from_string(schema->id().to_sstring()));
    // FIXME: Instead of hardcoding, we should take into account which mode was chosen
    // when the table was created. But, Spark jobs expect something to be returned
    // and PAY_PER_REQUEST seems closer to reality than PROVISIONED.
@@ -2244,30 +2245,19 @@ update_item_operation::apply(std::unique_ptr<rjson::value> previous_item, api::t
                    rjson::value v1 = calculate_value(base, calculate_value_caller::UpdateExpression, previous_item.get());
                    rjson::value v2 = calculate_value(addition, calculate_value_caller::UpdateExpression, previous_item.get());
                    rjson::value result;
-                    // An ADD can be used to create a new attribute (when
-                    // v1.IsNull()) or to add to a pre-existing attribute:
-                    if (v1.IsNull()) {
-                        std::string v2_type = get_item_type_string(v2);
-                        if (v2_type == "N" || v2_type == "SS" || v2_type == "NS" || v2_type == "BS") {
-                            result = v2;
-                        } else {
-                            throw api_error::validation(format("An operand in the update expression has an incorrect data type: {}", v2));
+                    std::string v1_type = get_item_type_string(v1);
+                    if (v1_type == "N") {
+                        if (get_item_type_string(v2) != "N") {
+                            throw api_error::validation(format("Incorrect operand type for operator or function. Expected {}: {}", v1_type, rjson::print(v2)));
                        }
+                        result = number_add(v1, v2);
+                    } else if (v1_type == "SS" || v1_type == "NS" || v1_type == "BS") {
+                        if (get_item_type_string(v2) != v1_type) {
+                            throw api_error::validation(format("Incorrect operand type for operator or function. Expected {}: {}", v1_type, rjson::print(v2)));
+                        }
+                        result = set_sum(v1, v2);
                    } else {
-                        std::string v1_type = get_item_type_string(v1);
-                        if (v1_type == "N") {
-                            if (get_item_type_string(v2) != "N") {
-                                throw api_error::validation(format("Incorrect operand type for operator or function. Expected {}: {}", v1_type, rjson::print(v2)));
-                            }
-                            result = number_add(v1, v2);
-                        } else if (v1_type == "SS" || v1_type == "NS" || v1_type == "BS") {
-                            if (get_item_type_string(v2) != v1_type) {
-                                throw api_error::validation(format("Incorrect operand type for operator or function. Expected {}: {}", v1_type, rjson::print(v2)));
-                            }
-                            result = set_sum(v1, v2);
-                        } else {
-                            throw api_error::validation(format("An operand in the update expression has an incorrect data type: {}", v1));
-                        }
+                        throw api_error::validation(format("An operand in the update expression has an incorrect data type: {}", v1));
                    }
                    do_update(to_bytes(column_name), result);
                },
@@ -2614,6 +2604,9 @@ filter::filter(const rjson::value& request, request_type rt,
        if (expression->GetStringLength() == 0) {
            throw api_error::validation("FilterExpression must not be empty");
        }
+        if (rjson::find(request, "AttributesToGet")) {
+            throw api_error::validation("Cannot use both old-style and new-style parameters in same request: FilterExpression and AttributesToGet");
+        }
        try {
            // FIXME: make parse_condition_expression take string_view, get
            // rid of the silly conversion to std::string.
@@ -2629,6 +2622,9 @@ filter::filter(const rjson::value& request, request_type rt,
        }
    }
    if (conditions) {
+        if (rjson::find(request, "ProjectionExpression")) {
+            throw api_error::validation(format("Cannot use both old-style and new-style parameters in same request: {} and ProjectionExpression", conditions_attribute));
+        }
        bool require_all = conditional_operator != conditional_operator_type::OR;
        _imp = conditions_filter { require_all, rjson::copy(*conditions) };
    }
@@ -2792,7 +2788,7 @@ static rjson::value encode_paging_state(const schema& schema, const service::pag
    for (const column_definition& cdef : schema.partition_key_columns()) {
        rjson::set_with_string_name(last_evaluated_key, std::string_view(cdef.name_as_text()), rjson::empty_object());
        rjson::value& key_entry = last_evaluated_key[cdef.name_as_text()];
-        rjson::set_with_string_name(key_entry, type_to_string(cdef.type), json_key_column_value(*exploded_pk_it, cdef));
+        rjson::set_with_string_name(key_entry, type_to_string(cdef.type), rjson::parse(to_json_string(*cdef.type, *exploded_pk_it)));
        ++exploded_pk_it;
    }
    auto ck = paging_state.get_clustering_key();
@@ -2802,7 +2798,7 @@ static rjson::value encode_paging_state(const schema& schema, const service::pag
        for (const column_definition& cdef : schema.clustering_key_columns()) {
            rjson::set_with_string_name(last_evaluated_key, std::string_view(cdef.name_as_text()), rjson::empty_object());
            rjson::value& key_entry = last_evaluated_key[cdef.name_as_text()];
-            rjson::set_with_string_name(key_entry, type_to_string(cdef.type), json_key_column_value(*exploded_ck_it, cdef));
+            rjson::set_with_string_name(key_entry, type_to_string(cdef.type), rjson::parse(to_json_string(*cdef.type, *exploded_ck_it)));
            ++exploded_ck_it;
        }
    }
@@ -2849,7 +2845,7 @@ static future<executor::request_return_type> do_query(service::storage_proxy& pr
    auto query_state_ptr = std::make_unique<service::query_state>(client_state, trace_state, std::move(permit));

    command->slice.options.set<query::partition_slice::option::allow_short_read>();
-    auto query_options = std::make_unique<cql3::query_options>(cl, infinite_timeout_config, std::vector<cql3::raw_value>{});
+    auto query_options = std::make_unique<cql3::query_options>(cl, std::vector<cql3::raw_value>{});
    query_options = std::make_unique<cql3::query_options>(std::move(query_options), std::move(paging_state));
    auto p = service::pager::query_pagers::pager(schema, selection, *query_state_ptr, *query_options, command, std::move(partition_ranges), nullptr);

--- a/alternator/expressions.cc
+++ b/alternator/expressions.cc
@@ -603,8 +603,52 @@ std::unordered_map<std::string_view, function_handler_type*> function_handlers {
            }
            rjson::value v1 = calculate_value(f._parameters[0], caller, previous_item);
            rjson::value v2 = calculate_value(f._parameters[1], caller, previous_item);
-            return to_bool_json(check_BEGINS_WITH(v1.IsNull() ? nullptr : &v1,  v2,
-                                    f._parameters[0].is_constant(), f._parameters[1].is_constant()));
+            // TODO: There's duplication here with check_BEGINS_WITH().
+            // But unfortunately, the two functions differ a bit.
+
+            // If one of v1 or v2 is malformed or has an unsupported type
+            // (not B or S), what we do depends on whether it came from
+            // the user's query (is_constant()), or the item. Unsupported
+            // values in the query result in an error, but if they are in
+            // the item, we silently return false (no match).
+            bool bad = false;
+            if (!v1.IsObject() || v1.MemberCount() != 1) {
+                bad = true;
+                if (f._parameters[0].is_constant()) {
+                    throw api_error::validation(format("{}: begins_with() encountered malformed AttributeValue: {}", caller, v1));
+                }
+            } else if (v1.MemberBegin()->name != "S" && v1.MemberBegin()->name != "B") {
+                bad = true;
+                if (f._parameters[0].is_constant()) {
+                    throw api_error::validation(format("{}: begins_with() supports only string or binary in AttributeValue: {}", caller, v1));
+                }
+            }
+            if (!v2.IsObject() || v2.MemberCount() != 1) {
+                bad = true;
+                if (f._parameters[1].is_constant()) {
+                    throw api_error::validation(format("{}: begins_with() encountered malformed AttributeValue: {}", caller, v2));
+                }
+            } else if (v2.MemberBegin()->name != "S" && v2.MemberBegin()->name != "B") {
+                bad = true;
+                if (f._parameters[1].is_constant()) {
+                    throw api_error::validation(format("{}: begins_with() supports only string or binary in AttributeValue: {}", caller, v2));
+                }
+            }
+            bool ret = false;
+            if (!bad) {
+                auto it1 = v1.MemberBegin();
+                auto it2 = v2.MemberBegin();
+                if (it1->name == it2->name) {
+                    if (it2->name == "S") {
+                        std::string_view val1 = rjson::to_string_view(it1->value);
+                        std::string_view val2 = rjson::to_string_view(it2->value);
+                        ret = val1.starts_with(val2);
+                    } else /* it2->name == "B" */ {
+                        ret = base64_begins_with(rjson::to_string_view(it1->value), rjson::to_string_view(it2->value));
+                    }
+                }
+            }
+            return to_bool_json(ret);
        }
    },
    {"contains", [] (calculate_value_caller caller, const rjson::value* previous_item, const parsed::value::function_call& f) {
--- a/alternator/server.cc
+++ b/alternator/server.cc
@@ -243,8 +243,8 @@ future<> server::verify_signature(const request& req) {
        }
    }

-    auto cache_getter = [] (std::string username) {
-        return get_key_from_roles(cql3::get_query_processor().local(), std::move(username));
+    auto cache_getter = [&qp = _qp] (std::string username) {
+        return get_key_from_roles(qp, std::move(username));
    };
    return _key_cache.get_ptr(user, cache_getter).then([this, &req,
                                                    user = std::move(user),
@@ -328,10 +328,11 @@ void server::set_routes(routes& r) {
 //FIXME: A way to immediately invalidate the cache should be considered,
 // e.g. when the system table which stores the keys is changed.
 // For now, this propagation may take up to 1 minute.
-server::server(executor& exec)
+server::server(executor& exec, cql3::query_processor& qp)
        : _http_server("http-alternator")
        , _https_server("https-alternator")
        , _executor(exec)
+        , _qp(qp)
        , _key_cache(1024, 1min, slogger)
        , _enforce_authorization(false)
        , _enabled_servers{}
--- a/alternator/server.hh
+++ b/alternator/server.hh
@@ -41,6 +41,7 @@ class server {
    http_server _http_server;
    http_server _https_server;
    executor& _executor;
+    cql3::query_processor& _qp;

    key_cache _key_cache;
    bool _enforce_authorization;
@@ -68,7 +69,7 @@ class server {
    json_parser _json_parser;

 public:
-    server(executor& executor);
+    server(executor& executor, cql3::query_processor& qp);

    future<> init(net::inet_address addr, std::optional<uint16_t> port, std::optional<uint16_t> https_port, std::optional<tls::credentials_builder> creds,
            bool enforce_authorization, semaphore* memory_limiter);
--- a/alternator/streams.cc
+++ b/alternator/streams.cc
@@ -290,7 +290,9 @@ struct sequence_number {
 sequence_number::sequence_number(std::string_view v) 
    : uuid([&] {
        using namespace boost::multiprecision;
-        uint128_t tmp{v};
+        // workaround for weird clang 10 bug when calling constructor with
+        // view directly.
+        uint128_t tmp{std::string(v)};
        // see above
        return utils::UUID_gen::get_time_UUID_raw(uint64_t(tmp >> 64), uint64_t(tmp & std::numeric_limits<uint64_t>::max()));
    }())
@@ -475,6 +477,8 @@ future<executor::request_return_type> executor::describe_stream(client_state& cl
            status = "ENABLED";
        }
    } 
+
+    auto ttl = std::chrono::seconds(opts.ttl());
    
    rjson::set(stream_desc, "StreamStatus", rjson::from_string(status));

@@ -494,14 +498,14 @@ future<executor::request_return_type> executor::describe_stream(client_state& cl
    // TODO: label
    // TODO: creation time

-    const auto& tm = _proxy.get_token_metadata();
+    auto normal_token_owners = _proxy.get_token_metadata_ptr()->count_normal_token_owners();
    // cannot really "resume" query, must iterate all data. because we cannot query neither "time" (pk) > something,
    // or on expired...
    // TODO: maybe add secondary index to topology table to enable this?
-    return _sdks.cdc_get_versioned_streams({ tm.count_normal_token_owners() }).then([this, &db, schema, shard_start, limit, ret = std::move(ret), stream_desc = std::move(stream_desc)](std::map<db_clock::time_point, cdc::streams_version> topologies) mutable {
+    return _sdks.cdc_get_versioned_streams({ normal_token_owners }).then([this, &db, schema, shard_start, limit, ret = std::move(ret), stream_desc = std::move(stream_desc), ttl](std::map<db_clock::time_point, cdc::streams_version> topologies) mutable {

-        // filter out cdc generations older than the table or now() - dynamodb_streams_max_window (24h)
-        auto low_ts = std::max(as_timepoint(schema->id()), db_clock::now() - dynamodb_streams_max_window);
+        // filter out cdc generations older than the table or now() - cdc::ttl (typically dynamodb_streams_max_window - 24h)
+        auto low_ts = std::max(as_timepoint(schema->id()), db_clock::now() - ttl);

        auto i = topologies.lower_bound(low_ts);
        // need first gen _intersecting_ the timestamp.
@@ -883,8 +887,17 @@ future<executor::request_return_type> executor::get_records(client_state& client
    auto partition_slice = query::partition_slice(
        std::move(bounds)
        , {}, std::move(regular_columns), selection->get_query_options());
+
+	auto& opts = base->cdc_options();
+	auto mul = 2; // key-only, allow for delete + insert
+    if (opts.preimage()) {
+        ++mul;
+    }
+    if (opts.postimage()) {
+        ++mul;
+    }
    auto command = ::make_lw_shared<query::read_command>(schema->id(), schema->version(), partition_slice, _proxy.get_max_result_size(partition_slice),
-            query::row_limit(limit * 4));
+            query::row_limit(limit * mul));

    return _proxy.query(schema, std::move(command), std::move(partition_ranges), cl, service::storage_proxy::coordinator_query_options(default_timeout(), std::move(permit), client_state)).then(
            [this, schema, partition_slice = std::move(partition_slice), selection = std::move(selection), start_time = std::move(start_time), limit, key_names = std::move(key_names), attr_names = std::move(attr_names), type, iter, high_ts] (service::storage_proxy::coordinator_query_result qr) mutable {       
--- a/api/api-doc/storage_proxy.json
+++ b/api/api-doc/storage_proxy.json
@@ -68,7 +68,7 @@
               "summary":"Get the hinted handoff enabled by dc",
               "type":"array",
               "items":{
-                  "type":"mapper_list"
+                  "type":"array"
               },
               "nickname":"get_hinted_handoff_enabled_by_dc",
               "produces":[
--- a/api/api_init.hh
+++ b/api/api_init.hh
@@ -24,7 +24,7 @@
 #include <seastar/http/httpd.hh>

 namespace service { class load_meter; }
-namespace locator { class token_metadata; }
+namespace locator { class shared_token_metadata; }
 namespace cql_transport { class controller; }
 class thrift_controller;
 namespace db { class snapshot_ctl; }
@@ -39,13 +39,15 @@ struct http_context {
    distributed<database>& db;
    distributed<service::storage_proxy>& sp;
    service::load_meter& lmeter;
-    const sharded<locator::token_metadata>& token_metadata;
+    const sharded<locator::shared_token_metadata>& shared_token_metadata;

    http_context(distributed<database>& _db,
            distributed<service::storage_proxy>& _sp,
-            service::load_meter& _lm, const sharded<locator::token_metadata>& _tm)
-            : db(_db), sp(_sp), lmeter(_lm), token_metadata(_tm) {
+            service::load_meter& _lm, const sharded<locator::shared_token_metadata>& _stm)
+            : db(_db), sp(_sp), lmeter(_lm), shared_token_metadata(_stm) {
    }
+
+    const locator::token_metadata& get_token_metadata();
 };

 future<> set_server_init(http_context& ctx);
--- a/api/column_family.cc
+++ b/api/column_family.cc
@@ -331,15 +331,15 @@ void set_column_family(http_context& ctx, routes& r) {
    });

    cf::get_memtable_columns_count.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return map_reduce_cf(ctx, req->param["name"], uint64_t{0}, [](column_family& cf) {
+        return map_reduce_cf(ctx, req->param["name"], 0, [](column_family& cf) {
            return cf.active_memtable().partition_count();
-        }, std::plus<>());
+        }, std::plus<int>());
    });

    cf::get_all_memtable_columns_count.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return map_reduce_cf(ctx, uint64_t{0}, [](column_family& cf) {
+        return map_reduce_cf(ctx, 0, [](column_family& cf) {
            return cf.active_memtable().partition_count();
-        }, std::plus<>());
+        }, std::plus<int>());
    });

    cf::get_memtable_on_heap_size.set(r, [] (const_req req) {
@@ -656,7 +656,7 @@ void set_column_family(http_context& ctx, routes& r) {
    cf::get_bloom_filter_disk_space_used.set(r, [&ctx] (std::unique_ptr<request> req) {
        return map_reduce_cf(ctx, req->param["name"], uint64_t(0), [] (column_family& cf) {
            return std::accumulate(cf.get_sstables()->begin(), cf.get_sstables()->end(), uint64_t(0), [](uint64_t s, auto& sst) {
-                return s + sst->filter_size();
+                return sst->filter_size();
            });
        }, std::plus<uint64_t>());
    });
@@ -664,7 +664,7 @@ void set_column_family(http_context& ctx, routes& r) {
    cf::get_all_bloom_filter_disk_space_used.set(r, [&ctx] (std::unique_ptr<request> req) {
        return map_reduce_cf(ctx, uint64_t(0), [] (column_family& cf) {
            return std::accumulate(cf.get_sstables()->begin(), cf.get_sstables()->end(), uint64_t(0), [](uint64_t s, auto& sst) {
-                return s + sst->filter_size();
+                return sst->filter_size();
            });
        }, std::plus<uint64_t>());
    });
@@ -672,7 +672,7 @@ void set_column_family(http_context& ctx, routes& r) {
    cf::get_bloom_filter_off_heap_memory_used.set(r, [&ctx] (std::unique_ptr<request> req) {
        return map_reduce_cf(ctx, req->param["name"], uint64_t(0), [] (column_family& cf) {
            return std::accumulate(cf.get_sstables()->begin(), cf.get_sstables()->end(), uint64_t(0), [](uint64_t s, auto& sst) {
-                return s + sst->filter_memory_size();
+                return sst->filter_memory_size();
            });
        }, std::plus<uint64_t>());
    });
@@ -680,7 +680,7 @@ void set_column_family(http_context& ctx, routes& r) {
    cf::get_all_bloom_filter_off_heap_memory_used.set(r, [&ctx] (std::unique_ptr<request> req) {
        return map_reduce_cf(ctx, uint64_t(0), [] (column_family& cf) {
            return std::accumulate(cf.get_sstables()->begin(), cf.get_sstables()->end(), uint64_t(0), [](uint64_t s, auto& sst) {
-                return s + sst->filter_memory_size();
+                return sst->filter_memory_size();
            });
        }, std::plus<uint64_t>());
    });
@@ -688,7 +688,7 @@ void set_column_family(http_context& ctx, routes& r) {
    cf::get_index_summary_off_heap_memory_used.set(r, [&ctx] (std::unique_ptr<request> req) {
        return map_reduce_cf(ctx, req->param["name"], uint64_t(0), [] (column_family& cf) {
            return std::accumulate(cf.get_sstables()->begin(), cf.get_sstables()->end(), uint64_t(0), [](uint64_t s, auto& sst) {
-                return s + sst->get_summary().memory_footprint();
+                return sst->get_summary().memory_footprint();
            });
        }, std::plus<uint64_t>());
    });
@@ -696,7 +696,7 @@ void set_column_family(http_context& ctx, routes& r) {
    cf::get_all_index_summary_off_heap_memory_used.set(r, [&ctx] (std::unique_ptr<request> req) {
        return map_reduce_cf(ctx, uint64_t(0), [] (column_family& cf) {
            return std::accumulate(cf.get_sstables()->begin(), cf.get_sstables()->end(), uint64_t(0), [](uint64_t s, auto& sst) {
-                return s + sst->get_summary().memory_footprint();
+                return sst->get_summary().memory_footprint();
            });
        }, std::plus<uint64_t>());
    });
--- a/api/storage_proxy.cc
+++ b/api/storage_proxy.cc
@@ -201,29 +201,39 @@ void set_storage_proxy(http_context& ctx, routes& r) {
    });

    sp::get_hinted_handoff_enabled.set(r, [&ctx](std::unique_ptr<request> req)  {
-        auto enabled = ctx.db.local().get_config().hinted_handoff_enabled();
-        return make_ready_future<json::json_return_type>(enabled);
+        const auto& filter = service::get_storage_proxy().local().get_hints_host_filter();
+        return make_ready_future<json::json_return_type>(!filter.is_disabled_for_all());
    });

    sp::set_hinted_handoff_enabled.set(r, [](std::unique_ptr<request> req)  {
-        //TBD
-        unimplemented();
        auto enable = req->get_query_param("enable");
-        return make_ready_future<json::json_return_type>(json_void());
+        auto filter = (enable == "true" || enable == "1")
+                ? db::hints::host_filter(db::hints::host_filter::enabled_for_all_tag {})
+                : db::hints::host_filter(db::hints::host_filter::disabled_for_all_tag {});
+        return service::get_storage_proxy().invoke_on_all([filter = std::move(filter)] (service::storage_proxy& sp) {
+            return sp.change_hints_host_filter(filter);
+        }).then([] {
+            return make_ready_future<json::json_return_type>(json_void());
+        });
    });

    sp::get_hinted_handoff_enabled_by_dc.set(r, [](std::unique_ptr<request> req)  {
-        //TBD
-        unimplemented();
-        std::vector<sp::mapper_list> res;
+        std::vector<sstring> res;
+        const auto& filter = service::get_storage_proxy().local().get_hints_host_filter();
+        const auto& dcs = filter.get_dcs();
+        res.reserve(res.size());
+        std::copy(dcs.begin(), dcs.end(), std::back_inserter(res));
        return make_ready_future<json::json_return_type>(res);
    });

    sp::set_hinted_handoff_enabled_by_dc_list.set(r, [](std::unique_ptr<request> req)  {
-        //TBD
-        unimplemented();
-        auto enable = req->get_query_param("dcs");
-        return make_ready_future<json::json_return_type>(json_void());
+        auto dcs = req->get_query_param("dcs");
+        auto filter = db::hints::host_filter::parse_from_dc_list(std::move(dcs));
+        return service::get_storage_proxy().invoke_on_all([filter = std::move(filter)] (service::storage_proxy& sp) {
+            return sp.change_hints_host_filter(filter);
+        }).then([] {
+            return make_ready_future<json::json_return_type>(json_void());
+        });
    });

    sp::get_max_hint_window.set(r, [](std::unique_ptr<request> req)  {
--- a/api/storage_service.cc
+++ b/api/storage_service.cc
@@ -22,6 +22,7 @@
 #include "storage_service.hh"
 #include "api/api-doc/storage_service.json.hh"
 #include "db/config.hh"
+#include "db/schema_tables.hh"
 #include <optional>
 #include <time.h>
 #include <boost/range/adaptor/map.hpp>
@@ -44,9 +45,14 @@
 #include "db/snapshot-ctl.hh"
 #include "transport/controller.hh"
 #include "thrift/controller.hh"
+#include "locator/token_metadata.hh"

 namespace api {

+const locator::token_metadata& http_context::get_token_metadata() {
+        return *shared_token_metadata.local().get();
+}
+
 namespace ss = httpd::storage_service_json;
 using namespace json;

@@ -256,14 +262,14 @@ void set_storage_service(http_context& ctx, routes& r) {
    });

    ss::get_tokens.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return make_ready_future<json::json_return_type>(stream_range_as_array(ctx.token_metadata.local().sorted_tokens(), [](const dht::token& i) {
+        return make_ready_future<json::json_return_type>(stream_range_as_array(ctx.get_token_metadata().sorted_tokens(), [](const dht::token& i) {
           return boost::lexical_cast<std::string>(i);
        }));
    });

    ss::get_node_tokens.set(r, [&ctx] (std::unique_ptr<request> req) {
        gms::inet_address addr(req->param["endpoint"]);
-        return make_ready_future<json::json_return_type>(stream_range_as_array(ctx.token_metadata.local().get_tokens(addr), [](const dht::token& i) {
+        return make_ready_future<json::json_return_type>(stream_range_as_array(ctx.get_token_metadata().get_tokens(addr), [](const dht::token& i) {
           return boost::lexical_cast<std::string>(i);
       }));
    });
@@ -282,7 +288,7 @@ void set_storage_service(http_context& ctx, routes& r) {
    });

    ss::get_leaving_nodes.set(r, [&ctx](const_req req) {
-        return container_to_vec(ctx.token_metadata.local().get_leaving_endpoints());
+        return container_to_vec(ctx.get_token_metadata().get_leaving_endpoints());
    });

    ss::get_moving_nodes.set(r, [](const_req req) {
@@ -291,7 +297,7 @@ void set_storage_service(http_context& ctx, routes& r) {
    });

    ss::get_joining_nodes.set(r, [&ctx](const_req req) {
-        auto points = ctx.token_metadata.local().get_bootstrap_tokens();
+        auto points = ctx.get_token_metadata().get_bootstrap_tokens();
        std::unordered_set<sstring> addr;
        for (auto i: points) {
            addr.insert(boost::lexical_cast<std::string>(i.second));
@@ -360,7 +366,7 @@ void set_storage_service(http_context& ctx, routes& r) {

    ss::get_host_id_map.set(r, [&ctx](const_req req) {
        std::vector<ss::mapper> res;
-        return map_to_key_value(ctx.token_metadata.local().get_endpoint_to_host_id_map_for_reading(), res);
+        return map_to_key_value(ctx.get_token_metadata().get_endpoint_to_host_id_map_for_reading(), res);
    });

    ss::get_load.set(r, [&ctx](std::unique_ptr<request> req) {
@@ -732,9 +738,12 @@ void set_storage_service(http_context& ctx, routes& r) {
    });

    ss::reset_local_schema.set(r, [](std::unique_ptr<request> req) {
-        //TBD
-        unimplemented();
-        return make_ready_future<json::json_return_type>(json_void());
+        // FIXME: We should truncate schema tables if more than one node in the cluster.
+        auto& sp = service::get_storage_proxy();
+        auto& fs = service::get_local_storage_service().features();
+        return db::schema_tables::recalculate_schema_version(sp, fs).then([] {
+            return make_ready_future<json::json_return_type>(json_void());
+        });
    });

    ss::set_trace_probability.set(r, [](std::unique_ptr<request> req) {
--- a/auth/common.cc
+++ b/auth/common.cc
@@ -108,7 +108,7 @@ future<> wait_for_schema_agreement(::service::migration_manager& mm, const datab
    });
 }

-const timeout_config& internal_distributed_timeout_config() noexcept {
+::service::query_state& internal_distributed_query_state() noexcept {
 #ifdef DEBUG
    // Give the much slower debug tests more headroom for completing auth queries.
    static const auto t = 30s;
@@ -116,7 +116,9 @@ const timeout_config& internal_distributed_timeout_config() noexcept {
    static const auto t = 5s;
 #endif
    static const timeout_config tc{t, t, t, t, t, t, t};
-    return tc;
+    static thread_local ::service::client_state cs(::service::client_state::internal_tag{}, tc);
+    static thread_local ::service::query_state qs(cs, empty_service_permit());
+    return qs;
 }

 }
--- a/auth/common.hh
+++ b/auth/common.hh
@@ -35,6 +35,7 @@
 #include "log.hh"
 #include "seastarx.hh"
 #include "utils/exponential_backoff_retry.hh"
+#include "service/query_state.hh"

 using namespace std::chrono_literals;

@@ -87,6 +88,6 @@ future<> wait_for_schema_agreement(::service::migration_manager&, const database
 ///
 /// Time-outs for internal, non-local CQL queries.
 ///
-const timeout_config& internal_distributed_timeout_config() noexcept;
+::service::query_state& internal_distributed_query_state() noexcept;

 }
--- a/auth/default_authorizer.cc
+++ b/auth/default_authorizer.cc
@@ -103,7 +103,6 @@ future<bool> default_authorizer::any_granted() const {
    return _qp.execute_internal(
            query,
            db::consistency_level::LOCAL_ONE,
-            infinite_timeout_config,
            {},
            true).then([this](::shared_ptr<cql3::untyped_result_set> results) {
        return !results->empty();
@@ -116,8 +115,7 @@ future<> default_authorizer::migrate_legacy_metadata() const {

    return _qp.execute_internal(
            query,
-            db::consistency_level::LOCAL_ONE,
-            infinite_timeout_config).then([this](::shared_ptr<cql3::untyped_result_set> results) {
+            db::consistency_level::LOCAL_ONE).then([this](::shared_ptr<cql3::untyped_result_set> results) {
        return do_for_each(*results, [this](const cql3::untyped_result_set_row& row) {
            return do_with(
                    row.get_as<sstring>("username"),
@@ -197,7 +195,6 @@ default_authorizer::authorize(const role_or_anonymous& maybe_role, const resourc
    return _qp.execute_internal(
            query,
            db::consistency_level::LOCAL_ONE,
-            infinite_timeout_config,
            {*maybe_role.name, r.name()}).then([](::shared_ptr<cql3::untyped_result_set> results) {
        if (results->empty()) {
            return permissions::NONE;
@@ -226,7 +223,7 @@ default_authorizer::modify(
        return _qp.execute_internal(
                query,
                db::consistency_level::ONE,
-                internal_distributed_timeout_config(),
+                internal_distributed_query_state(),
                {permissions::to_strings(set), sstring(role_name), resource.name()}).discard_result();
    });
 }
@@ -251,7 +248,7 @@ future<std::vector<permission_details>> default_authorizer::list_all() const {
    return _qp.execute_internal(
            query,
            db::consistency_level::ONE,
-            internal_distributed_timeout_config(),
+            internal_distributed_query_state(),
            {},
            true).then([](::shared_ptr<cql3::untyped_result_set> results) {
        std::vector<permission_details> all_details;
@@ -278,7 +275,7 @@ future<> default_authorizer::revoke_all(std::string_view role_name) const {
    return _qp.execute_internal(
            query,
            db::consistency_level::ONE,
-            internal_distributed_timeout_config(),
+            internal_distributed_query_state(),
            {sstring(role_name)}).discard_result().handle_exception([role_name](auto ep) {
        try {
            std::rethrow_exception(ep);
@@ -298,7 +295,6 @@ future<> default_authorizer::revoke_all(const resource& resource) const {
    return _qp.execute_internal(
            query,
            db::consistency_level::LOCAL_ONE,
-            infinite_timeout_config,
            {resource.name()}).then_wrapped([this, resource](future<::shared_ptr<cql3::untyped_result_set>> f) {
        try {
            auto res = f.get0();
@@ -315,7 +311,6 @@ future<> default_authorizer::revoke_all(const resource& resource) const {
                return _qp.execute_internal(
                        query,
                        db::consistency_level::LOCAL_ONE,
-                        infinite_timeout_config,
                        {r.get_as<sstring>(ROLE_NAME), resource.name()}).discard_result().handle_exception(
                                [resource](auto ep) {
                    try {
--- a/auth/password_authenticator.cc
+++ b/auth/password_authenticator.cc
@@ -66,6 +66,7 @@ constexpr std::string_view password_authenticator_name("org.apache.cassandra.aut

 // name of the hash column.
 static constexpr std::string_view SALTED_HASH = "salted_hash";
+static constexpr std::string_view OPTIONS = "options";
 static constexpr std::string_view DEFAULT_USER_NAME = meta::DEFAULT_SUPERUSER_NAME;
 static const sstring DEFAULT_USER_PASSWORD = sstring(meta::DEFAULT_SUPERUSER_NAME);

@@ -114,7 +115,7 @@ future<> password_authenticator::migrate_legacy_metadata() const {
    return _qp.execute_internal(
            query,
            db::consistency_level::QUORUM,
-            internal_distributed_timeout_config()).then([this](::shared_ptr<cql3::untyped_result_set> results) {
+            internal_distributed_query_state()).then([this](::shared_ptr<cql3::untyped_result_set> results) {
        return do_for_each(*results, [this](const cql3::untyped_result_set_row& row) {
            auto username = row.get_as<sstring>("username");
            auto salted_hash = row.get_as<sstring>(SALTED_HASH);
@@ -122,7 +123,7 @@ future<> password_authenticator::migrate_legacy_metadata() const {
            return _qp.execute_internal(
                    update_row_query(),
                    consistency_for_user(username),
-                    internal_distributed_timeout_config(),
+                    internal_distributed_query_state(),
                    {std::move(salted_hash), username}).discard_result();
        }).finally([results] {});
    }).then([] {
@@ -139,7 +140,7 @@ future<> password_authenticator::create_default_if_missing() const {
            return _qp.execute_internal(
                    update_row_query(),
                    db::consistency_level::QUORUM,
-                    internal_distributed_timeout_config(),
+                    internal_distributed_query_state(),
                    {passwords::hash(DEFAULT_USER_PASSWORD, rng_for_salt), DEFAULT_USER_NAME}).then([](auto&&) {
                plogger.info("Created default superuser authentication record.");
            });
@@ -203,11 +204,11 @@ bool password_authenticator::require_authentication() const {
 }

 authentication_option_set password_authenticator::supported_options() const {
-    return authentication_option_set{authentication_option::password};
+    return authentication_option_set{authentication_option::password, authentication_option::options};
 }

 authentication_option_set password_authenticator::alterable_options() const {
-    return authentication_option_set{authentication_option::password};
+    return authentication_option_set{authentication_option::password, authentication_option::options};
 }

 future<authenticated_user> password_authenticator::authenticate(
@@ -236,7 +237,7 @@ future<authenticated_user> password_authenticator::authenticate(
        return _qp.execute_internal(
                query,
                consistency_for_user(username),
-                internal_distributed_timeout_config(),
+                internal_distributed_query_state(),
                {username},
                true);
    }).then_wrapped([=](future<::shared_ptr<cql3::untyped_result_set>> f) {
@@ -262,21 +263,46 @@ future<authenticated_user> password_authenticator::authenticate(
    });
 }

+future<> password_authenticator::maybe_update_custom_options(std::string_view role_name, const authentication_options& options) const {
+    static const sstring query = format("UPDATE {} SET {} = ? WHERE {} = ?",
+            meta::roles_table::qualified_name,
+            OPTIONS,
+            meta::roles_table::role_col_name);
+
+    if (!options.options) {
+        return make_ready_future<>();
+    }
+
+    std::vector<std::pair<data_value, data_value>> entries;
+    for (const auto& entry : *options.options) {
+        entries.push_back({data_value(entry.first), data_value(entry.second)});
+    }
+    auto map_value = make_map_value(map_type_impl::get_instance(utf8_type, utf8_type, false), entries);
+
+    return _qp.execute_internal(
+            query,
+            consistency_for_user(role_name),
+            internal_distributed_query_state(),
+            {std::move(map_value), sstring(role_name)}).discard_result();
+}
+
 future<> password_authenticator::create(std::string_view role_name, const authentication_options& options) const {
    if (!options.password) {
-        return make_ready_future<>();
+        return maybe_update_custom_options(role_name, options);
    }

    return _qp.execute_internal(
            update_row_query(),
            consistency_for_user(role_name),
-            internal_distributed_timeout_config(),
-            {passwords::hash(*options.password, rng_for_salt), sstring(role_name)}).discard_result();
+            internal_distributed_query_state(),
+            {passwords::hash(*options.password, rng_for_salt), sstring(role_name)}).discard_result().then([this, role_name, &options] {
+                return maybe_update_custom_options(role_name, options);
+            });
 }

 future<> password_authenticator::alter(std::string_view role_name, const authentication_options& options) const {
    if (!options.password) {
-        return make_ready_future<>();
+        return maybe_update_custom_options(role_name, options);
    }

    static const sstring query = format("UPDATE {} SET {} = ? WHERE {} = ?",
@@ -287,8 +313,10 @@ future<> password_authenticator::alter(std::string_view role_name, const authent
    return _qp.execute_internal(
            query,
            consistency_for_user(role_name),
-            internal_distributed_timeout_config(),
-            {passwords::hash(*options.password, rng_for_salt), sstring(role_name)}).discard_result();
+            internal_distributed_query_state(),
+            {passwords::hash(*options.password, rng_for_salt), sstring(role_name)}).discard_result().then([this, role_name, &options] {
+                return maybe_update_custom_options(role_name, options);
+            }).discard_result();
 }

 future<> password_authenticator::drop(std::string_view name) const {
@@ -299,12 +327,27 @@ future<> password_authenticator::drop(std::string_view name) const {

    return _qp.execute_internal(
            query, consistency_for_user(name),
-            internal_distributed_timeout_config(),
+            internal_distributed_query_state(),
            {sstring(name)}).discard_result();
 }

 future<custom_options> password_authenticator::query_custom_options(std::string_view role_name) const {
-    return make_ready_future<custom_options>();
+    static const sstring query = format("SELECT {} FROM {} WHERE {} = ?",
+            OPTIONS,
+            meta::roles_table::qualified_name,
+            meta::roles_table::role_col_name);
+
+    return _qp.execute_internal(
+            query, consistency_for_user(role_name),
+            internal_distributed_query_state(),
+            {sstring(role_name)}).then([](::shared_ptr<cql3::untyped_result_set> rs) {
+        custom_options opts;
+        const auto& row = rs->one();
+        if (row.has(OPTIONS)) {
+            row.get_map_data<sstring, sstring>(OPTIONS, std::inserter(opts, opts.end()), utf8_type, utf8_type);
+        }
+        return opts;
+    });
 }

 const resource_set& password_authenticator::protected_resources() const {
--- a/auth/password_authenticator.hh
+++ b/auth/password_authenticator.hh
@@ -94,6 +94,8 @@ public:
    virtual ::shared_ptr<sasl_challenge> new_sasl_challenge() const override;

 private:
+    future<> maybe_update_custom_options(std::string_view role_name, const authentication_options& options) const;
+
    bool legacy_metadata_exists() const;

    future<> migrate_legacy_metadata() const;
--- a/auth/roles-metadata.cc
+++ b/auth/roles-metadata.cc
@@ -43,7 +43,8 @@ std::string_view creation_query() {
            "  can_login boolean,"
            "  is_superuser boolean,"
            "  member_of set<text>,"
-            "  salted_hash text"
+            "  salted_hash text,"
+            "  options frozen<map<text, text>>,"
            ")",
            qualified_name,
            role_col_name);
@@ -68,14 +69,13 @@ future<bool> default_role_row_satisfies(
        return qp.execute_internal(
                query,
                db::consistency_level::ONE,
-                infinite_timeout_config,
                {meta::DEFAULT_SUPERUSER_NAME},
                true).then([&qp, &p](::shared_ptr<cql3::untyped_result_set> results) {
            if (results->empty()) {
                return qp.execute_internal(
                        query,
                        db::consistency_level::QUORUM,
-                        internal_distributed_timeout_config(),
+                        internal_distributed_query_state(),
                        {meta::DEFAULT_SUPERUSER_NAME},
                        true).then([&p](::shared_ptr<cql3::untyped_result_set> results) {
                    if (results->empty()) {
@@ -100,7 +100,7 @@ future<bool> any_nondefault_role_row_satisfies(
        return qp.execute_internal(
                query,
                db::consistency_level::QUORUM,
-                internal_distributed_timeout_config()).then([&p](::shared_ptr<cql3::untyped_result_set> results) {
+                internal_distributed_query_state()).then([&p](::shared_ptr<cql3::untyped_result_set> results) {
            if (results->empty()) {
                return false;
            }
--- a/auth/service.cc
+++ b/auth/service.cc
@@ -210,7 +210,6 @@ future<bool> service::has_existing_legacy_users() const {
    return _qp.execute_internal(
            default_user_query,
            db::consistency_level::ONE,
-            infinite_timeout_config,
            {meta::DEFAULT_SUPERUSER_NAME},
            true).then([this](auto results) {
        if (!results->empty()) {
@@ -220,7 +219,6 @@ future<bool> service::has_existing_legacy_users() const {
        return _qp.execute_internal(
                default_user_query,
                db::consistency_level::QUORUM,
-                infinite_timeout_config,
                {meta::DEFAULT_SUPERUSER_NAME},
                true).then([this](auto results) {
            if (!results->empty()) {
@@ -229,8 +227,7 @@ future<bool> service::has_existing_legacy_users() const {

            return _qp.execute_internal(
                    all_users_query,
-                    db::consistency_level::QUORUM,
-                    infinite_timeout_config).then([](auto results) {
+                    db::consistency_level::QUORUM).then([](auto results) {
                return make_ready_future<bool>(!results->empty());
            });
        });
@@ -371,10 +368,13 @@ bool is_enforcing(const service& ser)  {
    return enforcing_authorizer || enforcing_authenticator;
 }

-bool is_protected(const service& ser, const resource& r) noexcept {
-    return ser.underlying_role_manager().protected_resources().contains(r)
-            || ser.underlying_authenticator().protected_resources().contains(r)
-            || ser.underlying_authorizer().protected_resources().contains(r);
+bool is_protected(const service& ser, command_desc cmd) noexcept {
+    if (cmd.type_ == command_desc::type::ALTER_WITH_OPTS) {
+        return false; // Table attributes are OK to modify; see #7057.
+    }
+    return ser.underlying_role_manager().protected_resources().contains(cmd.resource)
+            || ser.underlying_authenticator().protected_resources().contains(cmd.resource)
+            || ser.underlying_authorizer().protected_resources().contains(cmd.resource);
 }

 static void validate_authentication_options_are_supported(
--- a/auth/service.hh
+++ b/auth/service.hh
@@ -181,10 +181,21 @@ future<permission_set> get_permissions(const service&, const authenticated_user&
 ///
 bool is_enforcing(const service&);

+/// A description of a CQL command from which auth::service can tell whether or not this command could endanger
+/// internal data on which auth::service depends.
+struct command_desc {
+    auth::permission permission; ///< Nature of the command's alteration.
+    const ::auth::resource& resource; ///< Resource impacted by this command.
+    enum class type {
+        ALTER_WITH_OPTS, ///< Command is ALTER ... WITH ...
+        OTHER
+    } type_ = type::OTHER;
+};
+
 ///
 /// Protected resources cannot be modified even if the performer has permissions to do so.
 ///
-bool is_protected(const service&, const resource&) noexcept;
+bool is_protected(const service&, command_desc) noexcept;

 ///
 /// Create a role with optional authentication information.
--- a/auth/standard_role_manager.cc
+++ b/auth/standard_role_manager.cc
@@ -86,7 +86,7 @@ static future<std::optional<record>> find_record(cql3::query_processor& qp, std:
    return qp.execute_internal(
            query,
            consistency_for_role(role_name),
-            internal_distributed_timeout_config(),
+            internal_distributed_query_state(),
            {sstring(role_name)},
            true).then([](::shared_ptr<cql3::untyped_result_set> results) {
        if (results->empty()) {
@@ -165,7 +165,7 @@ future<> standard_role_manager::create_default_role_if_missing() const {
            return _qp.execute_internal(
                    query,
                    db::consistency_level::QUORUM,
-                    internal_distributed_timeout_config(),
+                    internal_distributed_query_state(),
                    {meta::DEFAULT_SUPERUSER_NAME}).then([](auto&&) {
                log.info("Created default superuser role '{}'.", meta::DEFAULT_SUPERUSER_NAME);
                return make_ready_future<>();
@@ -192,7 +192,7 @@ future<> standard_role_manager::migrate_legacy_metadata() const {
    return _qp.execute_internal(
            query,
            db::consistency_level::QUORUM,
-            internal_distributed_timeout_config()).then([this](::shared_ptr<cql3::untyped_result_set> results) {
+            internal_distributed_query_state()).then([this](::shared_ptr<cql3::untyped_result_set> results) {
        return do_for_each(*results, [this](const cql3::untyped_result_set_row& row) {
            role_config config;
            config.is_superuser = row.get_or<bool>("super", false);
@@ -253,7 +253,7 @@ future<> standard_role_manager::create_or_replace(std::string_view role_name, co
    return _qp.execute_internal(
            query,
            consistency_for_role(role_name),
-            internal_distributed_timeout_config(),
+            internal_distributed_query_state(),
            {sstring(role_name), c.is_superuser, c.can_login},
            true).discard_result();
 }
@@ -296,7 +296,7 @@ standard_role_manager::alter(std::string_view role_name, const role_config_updat
                        build_column_assignments(u),
                        meta::roles_table::role_col_name),
                consistency_for_role(role_name),
-                internal_distributed_timeout_config(),
+                internal_distributed_query_state(),
                {sstring(role_name)}).discard_result();
    });
 }
@@ -315,7 +315,7 @@ future<> standard_role_manager::drop(std::string_view role_name) const {
            return _qp.execute_internal(
                    query,
                    consistency_for_role(role_name),
-                    internal_distributed_timeout_config(),
+                    internal_distributed_query_state(),
                    {sstring(role_name)}).then([this, role_name](::shared_ptr<cql3::untyped_result_set> members) {
                return parallel_for_each(
                        members->begin(),
@@ -354,7 +354,7 @@ future<> standard_role_manager::drop(std::string_view role_name) const {
            return _qp.execute_internal(
                    query,
                    consistency_for_role(role_name),
-                    internal_distributed_timeout_config(),
+                    internal_distributed_query_state(),
                    {sstring(role_name)}).discard_result();
        };

@@ -381,7 +381,7 @@ standard_role_manager::modify_membership(
        return _qp.execute_internal(
                query,
                consistency_for_role(grantee_name),
-                internal_distributed_timeout_config(),
+                internal_distributed_query_state(),
                {role_set{sstring(role_name)}, sstring(grantee_name)}).discard_result();
    };

@@ -392,7 +392,7 @@ standard_role_manager::modify_membership(
                        format("INSERT INTO {} (role, member) VALUES (?, ?)",
                                meta::role_members_table::qualified_name),
                        consistency_for_role(role_name),
-                        internal_distributed_timeout_config(),
+                        internal_distributed_query_state(),
                        {sstring(role_name), sstring(grantee_name)}).discard_result();

            case membership_change::remove:
@@ -400,7 +400,7 @@ standard_role_manager::modify_membership(
                        format("DELETE FROM {} WHERE role = ? AND member = ?",
                                meta::role_members_table::qualified_name),
                        consistency_for_role(role_name),
-                        internal_distributed_timeout_config(),
+                        internal_distributed_query_state(),
                        {sstring(role_name), sstring(grantee_name)}).discard_result();
        }

@@ -503,7 +503,7 @@ future<role_set> standard_role_manager::query_all() const {
    return _qp.execute_internal(
            query,
            db::consistency_level::QUORUM,
-            internal_distributed_timeout_config()).then([](::shared_ptr<cql3::untyped_result_set> results) {
+            internal_distributed_query_state()).then([](::shared_ptr<cql3::untyped_result_set> results) {
        role_set roles;

        std::transform(
--- a/bytes_ostream.hh
+++ b/bytes_ostream.hh
@@ -65,7 +65,14 @@ private:
    size_type _size;
    size_type _initial_chunk_size = default_chunk_size;
 public:
-    class fragment_iterator : public std::iterator<std::input_iterator_tag, bytes_view> {
+    class fragment_iterator {
+    public:
+        using iterator_category = std::input_iterator_tag;
+        using value_type = bytes_view;
+        using difference_type = std::ptrdiff_t;
+        using pointer = bytes_view*;
+        using reference = bytes_view&;
+    private:
        chunk* _current = nullptr;
    public:
        fragment_iterator() = default;
--- a/cache_flat_mutation_reader.hh
+++ b/cache_flat_mutation_reader.hh
@@ -508,7 +508,7 @@ void cache_flat_mutation_reader::copy_from_cache_to_buffer() {
        // This guarantees that rts starts after any emitted clustering_row
        // and not before any emitted range tombstone.
        if (!less(_lower_bound, rts.position())) {
-            rts.set_start(*_schema, _lower_bound);
+            rts.set_start(_lower_bound);
        } else {
            _lower_bound = position_in_partition(rts.position());
            _lower_bound_changed = true;
@@ -644,7 +644,7 @@ void cache_flat_mutation_reader::add_to_buffer(range_tombstone&& rt) {
        return;
    }
    if (!less(_lower_bound, rt.position())) {
-        rt.set_start(*_schema, _lower_bound);
+        rt.set_start(_lower_bound);
    } else {
        _lower_bound = position_in_partition(rt.position());
        _lower_bound_changed = true;
--- a/cartesian_product.hh
+++ b/cartesian_product.hh
@@ -33,9 +33,13 @@ template<typename T>
 struct cartesian_product {
    const std::vector<std::vector<T>>& _vec_of_vecs;
 public:
-    class iterator : public std::iterator<std::forward_iterator_tag, std::vector<T>> {
+    class iterator {
    public:
+        using iterator_category = std::forward_iterator_tag;
        using value_type = std::vector<T>;
+        using difference_type = std::ptrdiff_t;
+        using pointer = std::vector<T>*;
+        using reference = std::vector<T>&;
    private:
        size_t _pos;
        const std::vector<std::vector<T>>* _vec_of_vecs;
--- a/cdc/generation.cc
+++ b/cdc/generation.cc
@@ -23,7 +23,6 @@
 #include <random>
 #include <unordered_set>
 #include <seastar/core/sleep.hh>
-#include <algorithm>

 #include "keys.hh"
 #include "schema_builder.hh"
@@ -175,38 +174,19 @@ bool topology_description::operator==(const topology_description& o) const {
    return _entries == o._entries;
 }

-const std::vector<token_range_description>& topology_description::entries() const& {
+const std::vector<token_range_description>& topology_description::entries() const {
    return _entries;
 }

-std::vector<token_range_description>&& topology_description::entries() && {
-    return std::move(_entries);
-}
-
-static std::vector<stream_id> create_stream_ids(
-        size_t index, dht::token start, dht::token end, size_t shard_count, uint8_t ignore_msb) {
-    std::vector<stream_id> result;
-    result.reserve(shard_count);
-    dht::sharder sharder(shard_count, ignore_msb);
-    for (size_t shard_idx = 0; shard_idx < shard_count; ++shard_idx) {
-        auto t = dht::find_first_token_for_shard(sharder, start, end, shard_idx);
-        // compose the id from token and the "index" of the range end owning vnode
-        // as defined by token sort order. Basically grouping within this
-        // shard set.
-        result.emplace_back(stream_id(t, index));
-    }
-    return result;
-}
-
 class topology_description_generator final {
    const db::config& _cfg;
    const std::unordered_set<dht::token>& _bootstrap_tokens;
-    const locator::token_metadata& _token_metadata;
+    const locator::token_metadata_ptr _tmptr;
    const gms::gossiper& _gossiper;

    // Compute a set of tokens that split the token ring into vnodes
    auto get_tokens() const {
-        auto tokens = _token_metadata.sorted_tokens();
+        auto tokens = _tmptr->sorted_tokens();
        auto it = tokens.insert(
                tokens.end(), _bootstrap_tokens.begin(), _bootstrap_tokens.end());
        std::sort(it, tokens.end());
@@ -221,7 +201,7 @@ class topology_description_generator final {
        if (_bootstrap_tokens.contains(end)) {
            return {smp::count, _cfg.murmur3_partitioner_ignore_msb_bits()};
        } else {
-            auto endpoint = _token_metadata.get_endpoint(end);
+            auto endpoint = _tmptr->get_endpoint(end);
            if (!endpoint) {
                throw std::runtime_error(
                        format("Can't find endpoint for token {}", end));
@@ -237,20 +217,29 @@ class topology_description_generator final {
        desc.token_range_end = end;

        auto [shard_count, ignore_msb] = get_sharding_info(end);
-        desc.streams = create_stream_ids(index, start, end, shard_count, ignore_msb);
+        desc.streams.reserve(shard_count);
        desc.sharding_ignore_msb = ignore_msb;

+        dht::sharder sharder(shard_count, ignore_msb);
+        for (size_t shard_idx = 0; shard_idx < shard_count; ++shard_idx) {
+            auto t = dht::find_first_token_for_shard(sharder, start, end, shard_idx);
+            // compose the id from token and the "index" of the range end owning vnode
+            // as defined by token sort order. Basically grouping within this
+            // shard set.
+            desc.streams.emplace_back(stream_id(t, index));
+        }
+
        return desc;
    }
 public:
    topology_description_generator(
            const db::config& cfg,
            const std::unordered_set<dht::token>& bootstrap_tokens,
-            const locator::token_metadata& token_metadata,
+            const locator::token_metadata_ptr tmptr,
            const gms::gossiper& gossiper)
        : _cfg(cfg)
        , _bootstrap_tokens(bootstrap_tokens)
-        , _token_metadata(token_metadata)
+        , _tmptr(std::move(tmptr))
        , _gossiper(gossiper)
    {}

@@ -305,67 +294,23 @@ future<db_clock::time_point> get_local_streams_timestamp() {
    });
 }

-// non-static for testing
-size_t limit_of_streams_in_topology_description() {
-    // Each stream takes 16B and we don't want to exceed 4MB so we can have
-    // at most 262144 streams but not less than 1 per vnode.
-    return 4 * 1024 * 1024 / 16;
-}
-
-// non-static for testing
-topology_description limit_number_of_streams_if_needed(topology_description&& desc) {
-    int64_t streams_count = 0;
-    for (auto& tr_desc : desc.entries()) {
-        streams_count += tr_desc.streams.size();
-    }
-
-    size_t limit = std::max(limit_of_streams_in_topology_description(), desc.entries().size());
-    if (limit >= size_t(streams_count)) {
-        return std::move(desc);
-    }
-    size_t streams_per_vnode_limit = limit / desc.entries().size();
-    auto entries = std::move(desc).entries();
-    auto start = entries.back().token_range_end;
-    for (size_t idx = 0; idx < entries.size(); ++idx) {
-        auto end = entries[idx].token_range_end;
-        if (entries[idx].streams.size() > streams_per_vnode_limit) {
-            entries[idx].streams =
-                create_stream_ids(idx, start, end, streams_per_vnode_limit, entries[idx].sharding_ignore_msb);
-        }
-        start = end;
-    }
-    return topology_description(std::move(entries));
-}
-
 // Run inside seastar::async context.
 db_clock::time_point make_new_cdc_generation(
        const db::config& cfg,
        const std::unordered_set<dht::token>& bootstrap_tokens,
-        const locator::token_metadata& tm,
+        const locator::token_metadata_ptr tmptr,
        const gms::gossiper& g,
        db::system_distributed_keyspace& sys_dist_ks,
        std::chrono::milliseconds ring_delay,
-        bool for_testing) {
+        bool add_delay) {
    using namespace std::chrono;
-    auto gen = topology_description_generator(cfg, bootstrap_tokens, tm, g).generate();
-
-    // If the cluster is large we may end up with a generation that contains
-    // large number of streams. This is problematic because we store the
-    // generation in a single row. For a generation with large number of rows
-    // this will lead to a row that can be as big as 32MB. This is much more
-    // than the limit imposed by commitlog_segment_size_in_mb. If the size of
-    // the row that describes a new generation grows above
-    // commitlog_segment_size_in_mb, the write will fail and the new node won't
-    // be able to join. To avoid such problem we make sure that such row is
-    // always smaller than 4MB. We do that by removing some CDC streams from
-    // each vnode if the total number of streams is too large.
-    gen = limit_number_of_streams_if_needed(std::move(gen));
+    auto gen = topology_description_generator(cfg, bootstrap_tokens, tmptr, g).generate();

    // Begin the race.
    auto ts = db_clock::now() + (
-            (for_testing || ring_delay == milliseconds(0)) ? milliseconds(0) : (
+            (!add_delay || ring_delay == milliseconds(0)) ? milliseconds(0) : (
                2 * ring_delay + duration_cast<milliseconds>(generation_leeway)));
-    sys_dist_ks.insert_cdc_topology_description(ts, std::move(gen), { tm.count_normal_token_owners() }).get();
+    sys_dist_ks.insert_cdc_topology_description(ts, std::move(gen), { tmptr->count_normal_token_owners() }).get();

    return ts;
 }
--- a/cdc/generation.hh
+++ b/cdc/generation.hh
@@ -40,6 +40,7 @@
 #include "database_fwd.hh"
 #include "db_clock.hh"
 #include "dht/token.hh"
+#include "locator/token_metadata.hh"

 namespace seastar {
    class abort_source;
@@ -55,10 +56,6 @@ namespace gms {
    class gossiper;
 } // namespace gms

-namespace locator {
-    class token_metadata;
-} // namespace locator
-
 namespace cdc {

 class stream_id final {
@@ -68,7 +65,6 @@ public:

    stream_id() = default;
    stream_id(bytes);
-    stream_id(dht::token, size_t);

    bool is_set() const;
    bool operator==(const stream_id&) const;
@@ -82,6 +78,9 @@ public:

    partition_key to_partition_key(const schema& log_schema) const;
    static int64_t token_from_bytes(bytes_view);
+private:
+    friend class topology_description_generator;
+    stream_id(dht::token, size_t);
 };

 /* Describes a mapping of tokens to CDC streams in a token range.
@@ -114,8 +113,7 @@ public:
    topology_description(std::vector<token_range_description> entries);
    bool operator==(const topology_description&) const;

-    const std::vector<token_range_description>& entries() const&;
-    std::vector<token_range_description>&& entries() &&;
+    const std::vector<token_range_description>& entries() const;
 };

 /**
@@ -167,11 +165,11 @@ future<db_clock::time_point> get_local_streams_timestamp();
 db_clock::time_point make_new_cdc_generation(
        const db::config& cfg,
        const std::unordered_set<dht::token>& bootstrap_tokens,
-        const locator::token_metadata& tm,
+        const locator::token_metadata_ptr tmptr,
        const gms::gossiper& g,
        db::system_distributed_keyspace& sys_dist_ks,
        std::chrono::milliseconds ring_delay,
-        bool for_testing);
+        bool add_delay);

 /* Retrieves CDC streams generation timestamp from the given endpoint's application state (broadcasted through gossip).
 * We might be during a rolling upgrade, so the timestamp might not be there (if the other node didn't upgrade yet),
--- a/cdc/log.cc
+++ b/cdc/log.cc
@@ -600,7 +600,14 @@ db_context db_context::builder::build() {

 // iterators for collection merge
 template<typename T>
-class collection_iterator : public std::iterator<std::input_iterator_tag, const T> {
+class collection_iterator {
+public:
+    using iterator_category = std::input_iterator_tag;
+    using value_type = const T;
+    using difference_type = std::ptrdiff_t;
+    using pointer = const T*;
+    using reference = const T&;
+private:
    bytes_view _v, _next;
    size_t _rem = 0;
    T _current;
@@ -980,9 +987,9 @@ static bytes get_bytes(const atomic_cell_view& acv) {
    return acv.value().linearize();
 }

-static bytes_view get_bytes_view(const atomic_cell_view& acv, std::forward_list<bytes>& buf) {
+static bytes_view get_bytes_view(const atomic_cell_view& acv, std::vector<bytes>& buf) {
    return acv.value().is_fragmented()
-        ? bytes_view{buf.emplace_front(acv.value().linearize())}
+        ? bytes_view{buf.emplace_back(acv.value().linearize())}
        : acv.value().first_fragment();
 }

@@ -1137,9 +1144,9 @@ struct process_row_visitor {

                struct udt_visitor : public collection_visitor {
                    std::vector<bytes_opt> _added_cells;
-                    std::forward_list<bytes>& _buf;
+                    std::vector<bytes>& _buf;

-                    udt_visitor(ttl_opt& ttl_column, size_t num_keys, std::forward_list<bytes>& buf)
+                    udt_visitor(ttl_opt& ttl_column, size_t num_keys, std::vector<bytes>& buf)
                        : collection_visitor(ttl_column), _added_cells(num_keys), _buf(buf) {}

                    void live_collection_cell(bytes_view key, const atomic_cell_view& cell) {
@@ -1148,7 +1155,7 @@ struct process_row_visitor {
                    }
                };

-                std::forward_list<bytes> buf;
+                std::vector<bytes> buf;
                udt_visitor v(_ttl_column, type.size(), buf);

                visit_collection(v);
@@ -1167,9 +1174,9 @@ struct process_row_visitor {

                struct map_or_list_visitor : public collection_visitor {
                    std::vector<std::pair<bytes_view, bytes_view>> _added_cells;
-                    std::forward_list<bytes>& _buf;
+                    std::vector<bytes>& _buf;

-                    map_or_list_visitor(ttl_opt& ttl_column, std::forward_list<bytes>& buf)
+                    map_or_list_visitor(ttl_opt& ttl_column, std::vector<bytes>& buf)
                        : collection_visitor(ttl_column), _buf(buf) {}

                    void live_collection_cell(bytes_view key, const atomic_cell_view& cell) {
@@ -1178,7 +1185,7 @@ struct process_row_visitor {
                    }
                };

-                std::forward_list<bytes> buf;
+                std::vector<bytes> buf;
                map_or_list_visitor v(_ttl_column, buf);

                visit_collection(v);
@@ -1290,13 +1297,6 @@ struct process_change_visitor {
                _clustering_row_states, _generate_delta_values);
        visit_row_cells(v);

-        if (_enable_updating_state) {
-            // #7716: if there are no regular columns, our visitor would not have visited any cells,
-            // hence it would not have created a row_state for this row. In effect, postimage wouldn't be produced.
-            // Ensure that the row state exists.
-            _clustering_row_states.try_emplace(ckey);
-        }
-
        _builder.set_operation(log_ck, v._cdc_op);
        _builder.set_ttl(log_ck, v._ttl_column);
    }
--- a/cdc/metadata.cc
+++ b/cdc/metadata.cc
@@ -51,8 +51,7 @@ static cdc::stream_id get_stream(
    return entry.streams[shard_id];
 }

-// non-static for testing
-cdc::stream_id get_stream(
+static cdc::stream_id get_stream(
        const std::vector<cdc::token_range_description>& entries,
        dht::token tok) {
    if (entries.empty()) {
--- a/clustering_interval_set.hh
+++ b/clustering_interval_set.hh
@@ -72,7 +72,14 @@ public:
        }
        return result;
    }
-    class position_range_iterator : public std::iterator<std::input_iterator_tag, const position_range> {
+    class position_range_iterator {
+    public:
+        using iterator_category = std::input_iterator_tag;
+        using value_type = const position_range;
+        using difference_type = std::ptrdiff_t;
+        using pointer = const position_range*;
+        using reference = const position_range&;
+    private:
        set_type::iterator _i;
    public:
        position_range_iterator(set_type::iterator i) : _i(i) {}
--- a/column_computation.hh
+++ b/column_computation.hh
@@ -54,6 +54,36 @@ public:
    virtual bytes_opt compute_value(const schema& schema, const partition_key& key, const clustering_row& row) const = 0;
 };

+/*
+ * Computes token value of partition key and returns it as bytes.
+ *
+ * Should NOT be used (use token_column_computation), because ordering
+ * of bytes is different than ordering of tokens (signed vs unsigned comparison).
+ *
+ * The type name stored for computations of this class is "token" - this was
+ * the original implementation. (now depracated for new tables)
+ */
+class legacy_token_column_computation : public column_computation {
+public:
+    virtual column_computation_ptr clone() const override {
+        return std::make_unique<legacy_token_column_computation>(*this);
+    }
+    virtual bytes serialize() const override;
+    virtual bytes_opt compute_value(const schema& schema, const partition_key& key, const clustering_row& row) const override;
+};
+
+
+/*
+ * Computes token value of partition key and returns it as long_type.
+ * The return type means that it can be trivially sorted (for example
+ * if computed column using this computation is a clustering key),
+ * preserving the correct order of tokens (using signed comparisons).
+ *
+ * Please use this class instead of legacy_token_column_computation.
+ * 
+ * The type name stored for computations of this class is "token_v2".
+ * (the name "token" refers to the depracated legacy_token_column_computation)
+ */
 class token_column_computation : public column_computation {
 public:
    virtual column_computation_ptr clone() const override {
--- a/compound.hh
+++ b/compound.hh
@@ -130,7 +130,13 @@ public:
    bytes decompose_value(const value_type& values) const {
        return serialize_value(values);
    }
-    class iterator : public std::iterator<std::input_iterator_tag, const bytes_view> {
+    class iterator {
+    public:
+        using iterator_category = std::input_iterator_tag;
+        using value_type = const bytes_view;
+        using difference_type = std::ptrdiff_t;
+        using pointer = const bytes_view*;
+        using reference = const bytes_view&;
    private:
        bytes_view _v;
        bytes_view _current;
--- a/compound_compat.hh
+++ b/compound_compat.hh
@@ -61,7 +61,14 @@ public:
        , _packed(packed)
    { }

-    class iterator : public std::iterator<std::input_iterator_tag, bytes::value_type> {
+    class iterator {
+    public:
+        using iterator_category = std::input_iterator_tag;
+        using value_type = bytes::value_type;
+        using difference_type = std::ptrdiff_t;
+        using pointer = bytes::value_type*;
+        using reference = bytes::value_type&;
+    private:
        bool _singular;
        // Offset within virtual output space of a component.
        //
@@ -339,7 +346,14 @@ public:
        return eoc_byte == 0 ? eoc::none : (eoc_byte < 0 ? eoc::start : eoc::end);
    }

-    class iterator : public std::iterator<std::input_iterator_tag, const component_view> {
+    class iterator {
+    public:
+        using iterator_category = std::input_iterator_tag;
+        using value_type = const component_view;
+        using difference_type = std::ptrdiff_t;
+        using pointer = const component_view*;
+        using reference = const component_view&;
+    private:
        bytes_view _v;
        component_view _current;
        bool _strict_mode = true;
--- a/conf/scylla.yaml
+++ b/conf/scylla.yaml
@@ -230,6 +230,9 @@ batch_size_fail_threshold_in_kb: 50
 # - PasswordAuthenticator relies on username/password pairs to authenticate
 #   users. It keeps usernames and hashed passwords in system_auth.credentials table.
 #   Please increase system_auth keyspace replication factor if you use this authenticator.
+# - com.scylladb.auth.TransitionalAuthenticator requires username/password pair
+#   to authenticate in the same manner as PasswordAuthenticator, but improper credentials
+#   result in being logged in as an anonymous user. Use for upgrading clusters' auth.
 # authenticator: AllowAllAuthenticator

 # Authorization backend, implementing IAuthorizer; used to limit access/provide permissions
@@ -239,6 +242,9 @@ batch_size_fail_threshold_in_kb: 50
 # - AllowAllAuthorizer allows any action to any user - set it to disable authorization.
 # - CassandraAuthorizer stores permissions in system_auth.permissions table. Please
 #   increase system_auth keyspace replication factor if you use this authorizer.
+# - com.scylladb.auth.TransitionalAuthorizer wraps around the CassandraAuthorizer, using it for
+#   authorizing permission management. Otherwise, it allows all. Use for upgrading
+#   clusters' auth.
 # authorizer: AllowAllAuthorizer

 # initial_token allows you to specify tokens manually.  While you can use # it with
--- a/configure.py
+++ b/configure.py
@@ -257,25 +257,24 @@ modes = {
        'stack-usage-threshold': 1024*40,
    },
    'release': {
-        'cxxflags': '',
-        'cxx_ld_flags': '-O3 -ffunction-sections -fdata-sections -Wl,--gc-sections',
+        'cxxflags': '-O3 -ffunction-sections -fdata-sections ',
+        'cxx_ld_flags': '-Wl,--gc-sections',
        'stack-usage-threshold': 1024*13,
    },
    'dev': {
-        'cxxflags': '-DSEASTAR_ENABLE_ALLOC_FAILURE_INJECTION -DSCYLLA_ENABLE_ERROR_INJECTION',
-        'cxx_ld_flags': '-O1',
+        'cxxflags': '-O1 -DSEASTAR_ENABLE_ALLOC_FAILURE_INJECTION -DSCYLLA_ENABLE_ERROR_INJECTION',
+        'cxx_ld_flags': '',
        'stack-usage-threshold': 1024*21,
    },
    'sanitize': {
-        'cxxflags': '-DDEBUG -DSANITIZE -DDEBUG_LSA_SANITIZER -DSCYLLA_ENABLE_ERROR_INJECTION',
-        'cxx_ld_flags': '-Os',
+        'cxxflags': '-Os -DDEBUG -DSANITIZE -DDEBUG_LSA_SANITIZER -DSCYLLA_ENABLE_ERROR_INJECTION',
+        'cxx_ld_flags': '',
        'stack-usage-threshold': 1024*50,
    }
 }

 scylla_tests = set([
    'test/boost/UUID_test',
-    'test/boost/cdc_generation_test',
    'test/boost/aggregate_fcts_test',
    'test/boost/allocation_strategy_test',
    'test/boost/alternator_base64_test',
@@ -315,6 +314,7 @@ scylla_tests = set([
    'test/boost/crc_test',
    'test/boost/data_listeners_test',
    'test/boost/database_test',
+    'test/boost/double_decker_test',
    'test/boost/duration_test',
    'test/boost/dynamic_bitset_test',
    'test/boost/enum_option_test',
@@ -330,6 +330,7 @@ scylla_tests = set([
    'test/boost/gossiping_property_file_snitch_test',
    'test/boost/hash_test',
    'test/boost/idl_test',
+    'test/boost/imr_test',
    'test/boost/input_stream_test',
    'test/boost/json_cql_query_test',
    'test/boost/json_test',
@@ -384,6 +385,7 @@ scylla_tests = set([
    'test/boost/sstable_resharding_test',
    'test/boost/sstable_directory_test',
    'test/boost/sstable_test',
+    'test/boost/sstable_move_test',
    'test/boost/storage_proxy_test',
    'test/boost/top_k_test',
    'test/boost/transport_test',
@@ -418,7 +420,7 @@ scylla_tests = set([
    'test/perf/perf_fast_forward',
    'test/perf/perf_hash',
    'test/perf/perf_mutation',
-    'test/perf/perf_bptree',
+    'test/perf/perf_collection',
    'test/perf/perf_row_cache_update',
    'test/perf/perf_simple_query',
    'test/perf/perf_sstable',
@@ -477,9 +479,9 @@ arg_parser.add_argument('--ldflags', action='store', dest='user_ldflags', defaul
                        help='Extra flags for the linker')
 arg_parser.add_argument('--target', action='store', dest='target', default=default_target_arch(),
                        help='Target architecture (-march)')
-arg_parser.add_argument('--compiler', action='store', dest='cxx', default='g++',
+arg_parser.add_argument('--compiler', action='store', dest='cxx', default='clang++',
                        help='C++ compiler path')
-arg_parser.add_argument('--c-compiler', action='store', dest='cc', default='gcc',
+arg_parser.add_argument('--c-compiler', action='store', dest='cc', default='clang',
                        help='C compiler path')
 add_tristate(arg_parser, name='dpdk', dest='dpdk',
                        help='Use dpdk (from seastar dpdk sources) (default=True for release builds)')
@@ -519,17 +521,6 @@ arg_parser.add_argument('--test-repeat', dest='test_repeat', action='store', typ
 arg_parser.add_argument('--test-timeout', dest='test_timeout', action='store', type=str, default='7200')
 args = arg_parser.parse_args()

-coroutines_test_src = '''
-#define GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__)
-#if GCC_VERSION < 100201
-    #error "Coroutines support requires at leat gcc 10.2.1"
-#endif
-'''
-compiler_supports_coroutines = try_compile(compiler=args.cxx, source=coroutines_test_src)
-
-if args.build_raft and not compiler_supports_coroutines:
-    raise Exception("--build-raft is requested, while the used compiler does not support coroutines")
-
 if not args.build_raft:
    all_artifacts.difference_update(raft_tests)
    tests.difference_update(raft_tests)
@@ -727,6 +718,7 @@ scylla_core = (['database.cc',
                'db/data_listeners.cc',
                'db/hints/manager.cc',
                'db/hints/resource_manager.cc',
+                'db/hints/host_filter.cc',
                'db/config.cc',
                'db/extensions.cc',
                'db/heat_load_balance.cc',
@@ -855,7 +847,6 @@ scylla_core = (['database.cc',
                'utils/error_injection.cc',
                'mutation_writer/timestamp_based_splitting_writer.cc',
                'mutation_writer/shard_based_splitting_writer.cc',
-                'mutation_writer/feed_writers.cc',
                'lua.cc',
                ] + [Antlr3Grammar('cql3/Cql.g')] + [Thrift('interface/cassandra.thrift', 'Cassandra')]
               )
@@ -1039,7 +1030,7 @@ tests_not_using_seastar_test_framework = set([
    'test/perf/perf_cql_parser',
    'test/perf/perf_hash',
    'test/perf/perf_mutation',
-    'test/perf/perf_bptree',
+    'test/perf/perf_collection',
    'test/perf/perf_row_cache_update',
    'test/unit/lsa_async_eviction_test',
    'test/unit/lsa_sync_eviction_test',
@@ -1154,6 +1145,8 @@ warnings = [
    '-Wno-implicit-int-float-conversion',
    '-Wno-delete-abstract-non-virtual-dtor',
    '-Wno-uninitialized-const-reference',
+    # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=77728
+    '-Wno-psabi',
 ]

 warnings = [w
@@ -1169,11 +1162,11 @@ optimization_flags = [
 optimization_flags = [o
                      for o in optimization_flags
                      if flag_supported(flag=o, compiler=args.cxx)]
-modes['release']['cxx_ld_flags'] += ' ' + ' '.join(optimization_flags)
+modes['release']['cxxflags'] += ' ' + ' '.join(optimization_flags)

 if flag_supported(flag='-Wstack-usage=4096', compiler=args.cxx):
    for mode in modes:
-        modes[mode]['cxx_ld_flags'] += f' -Wstack-usage={modes[mode]["stack-usage-threshold"]} -Wno-error=stack-usage='
+        modes[mode]['cxxflags'] += f' -Wstack-usage={modes[mode]["stack-usage-threshold"]} -Wno-error=stack-usage='

 linker_flags = linker_flags(compiler=args.cxx)

@@ -1288,6 +1281,8 @@ file = open(f'{outdir}/SCYLLA-VERSION-FILE', 'r')
 scylla_version = file.read().strip()
 file = open(f'{outdir}/SCYLLA-RELEASE-FILE', 'r')
 scylla_release = file.read().strip()
+file = open(f'{outdir}/SCYLLA-PRODUCT-FILE', 'r')
+scylla_product = file.read().strip()

 extra_cxxflags["release.cc"] = "-DSCYLLA_VERSION=\"\\\"" + scylla_version + "\\\"\" -DSCYLLA_RELEASE=\"\\\"" + scylla_release + "\\\"\""

@@ -1329,9 +1324,6 @@ args.user_cflags += f" -ffile-prefix-map={curdir}=."

 seastar_cflags = args.user_cflags

-if build_raft:
-    seastar_cflags += ' -fcoroutines'
-
 if args.target != '':
    seastar_cflags += ' -march=' + args.target
 seastar_ldflags = args.user_ldflags
@@ -1340,6 +1332,13 @@ libdeflate_cflags = seastar_cflags

 MODE_TO_CMAKE_BUILD_TYPE = {'release' : 'RelWithDebInfo', 'debug' : 'Debug', 'dev' : 'Dev', 'sanitize' : 'Sanitize' }

+# cmake likes to separate things with semicolons
+def semicolon_separated(*flags):
+    # original flags may be space separated, so convert to string still
+    # using spaces
+    f = ' '.join(flags)
+    return re.sub(' +', ';', f)
+
 def configure_seastar(build_dir, mode):
    seastar_build_dir = os.path.join(build_dir, mode, 'seastar')

@@ -1348,8 +1347,8 @@ def configure_seastar(build_dir, mode):
        '-DCMAKE_C_COMPILER={}'.format(args.cc),
        '-DCMAKE_CXX_COMPILER={}'.format(args.cxx),
        '-DCMAKE_EXPORT_NO_PACKAGE_REGISTRY=ON',
-        '-DSeastar_CXX_FLAGS={}'.format((seastar_cflags + ' ' + modes[mode]['cxx_ld_flags']).replace(' ', ';')),
-        '-DSeastar_LD_FLAGS={}'.format(seastar_ldflags),
+        '-DSeastar_CXX_FLAGS={}'.format((seastar_cflags).replace(' ', ';')),
+        '-DSeastar_LD_FLAGS={}'.format(semicolon_separated(seastar_ldflags, modes[mode]['cxx_ld_flags'])),
        '-DSeastar_CXX_DIALECT=gnu++20',
        '-DSeastar_API_LEVEL=6',
        '-DSeastar_UNUSED_RESULT_ERROR=ON',
@@ -1460,7 +1459,7 @@ if not args.staticboost:
    args.user_cflags += ' -DBOOST_TEST_DYN_LINK'

 if build_raft:
-    args.user_cflags += ' -DENABLE_SCYLLA_RAFT -fcoroutines'
+    args.user_cflags += ' -DENABLE_SCYLLA_RAFT'

 # thrift version detection, see #4538
 proc_res = subprocess.run(["thrift", "-version"], stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
@@ -1799,24 +1798,18 @@ with open(buildfile_tmp, 'w') as f:
        f.write(textwrap.dedent('''\
            build $builddir/{mode}/iotune: copy $builddir/{mode}/seastar/apps/iotune/iotune
            ''').format(**locals()))
-        f.write('build $builddir/{mode}/dist/tar/scylla-package.tar.gz: package $builddir/{mode}/scylla $builddir/{mode}/iotune $builddir/SCYLLA-RELEASE-FILE $builddir/SCYLLA-VERSION-FILE $builddir/debian/debian | always\n'.format(**locals()))
-        f.write('  pool = submodule_pool\n')
+        f.write('build $builddir/{mode}/dist/tar/{scylla_product}-package.tar.gz: package $builddir/{mode}/scylla $builddir/{mode}/iotune $builddir/SCYLLA-RELEASE-FILE $builddir/SCYLLA-VERSION-FILE $builddir/debian/debian | always\n'.format(**locals()))
        f.write('  mode = {mode}\n'.format(**locals()))
-        f.write(f'build $builddir/{mode}/scylla-package.tar.gz: copy $builddir/{mode}/dist/tar/scylla-package.tar.gz\n')
-        f.write(f'build $builddir/dist/{mode}/redhat: rpmbuild $builddir/{mode}/scylla-package.tar.gz\n')
-        f.write(f'  pool = submodule_pool\n')
+        f.write(f'build $builddir/dist/{mode}/redhat: rpmbuild $builddir/{mode}/dist/tar/{scylla_product}-package.tar.gz\n')
        f.write(f'  mode = {mode}\n')
-        f.write(f'build $builddir/dist/{mode}/debian: debbuild $builddir/{mode}/scylla-package.tar.gz\n')
-        f.write(f'  pool = submodule_pool\n')
+        f.write(f'build $builddir/dist/{mode}/debian: debbuild $builddir/{mode}/dist/tar/{scylla_product}-package.tar.gz\n')
        f.write(f'  mode = {mode}\n')
        f.write(f'build dist-server-{mode}: phony $builddir/dist/{mode}/redhat $builddir/dist/{mode}/debian\n')
-        f.write(f'build dist-jmx-{mode}: phony $builddir/{mode}/dist/tar/scylla-jmx-package.tar.gz dist-jmx-rpm dist-jmx-deb\n')
-        f.write(f'build dist-tools-{mode}: phony $builddir/{mode}/dist/tar/scylla-tools-package.tar.gz dist-tools-rpm dist-tools-deb\n')
+        f.write(f'build dist-jmx-{mode}: phony $builddir/{mode}/dist/tar/{scylla_product}-jmx-package.tar.gz dist-jmx-rpm dist-jmx-deb\n')
+        f.write(f'build dist-tools-{mode}: phony $builddir/{mode}/dist/tar/{scylla_product}-tools-package.tar.gz dist-tools-rpm dist-tools-deb\n')
        f.write(f'build dist-python3-{mode}: phony dist-python3-tar dist-python3-rpm dist-python3-deb compat-python3-rpm compat-python3-deb\n')
-        f.write(f'build dist-unified-{mode}: phony $builddir/{mode}/dist/tar/scylla-unified-package-{scylla_version}.{scylla_release}.tar.gz\n')
-        f.write(f'build $builddir/{mode}/scylla-unified-package-{scylla_version}.{scylla_release}.tar.gz: copy $builddir/{mode}/dist/tar/scylla-unified-package.tar.gz\n')
-        f.write(f'build $builddir/{mode}/dist/tar/scylla-unified-package-{scylla_version}.{scylla_release}.tar.gz: unified $builddir/{mode}/dist/tar/scylla-package.tar.gz $builddir/{mode}/dist/tar/scylla-python3-package.tar.gz $builddir/{mode}/dist/tar/scylla-jmx-package.tar.gz $builddir/{mode}/dist/tar/scylla-tools-package.tar.gz | always\n')
-        f.write(f'  pool = submodule_pool\n')
+        f.write(f'build dist-unified-{mode}: phony $builddir/{mode}/dist/tar/{scylla_product}-unified-package-{scylla_version}.{scylla_release}.tar.gz\n')
+        f.write(f'build $builddir/{mode}/dist/tar/{scylla_product}-unified-package-{scylla_version}.{scylla_release}.tar.gz: unified $builddir/{mode}/dist/tar/{scylla_product}-package.tar.gz $builddir/{mode}/dist/tar/{scylla_product}-python3-package.tar.gz $builddir/{mode}/dist/tar/{scylla_product}-jmx-package.tar.gz $builddir/{mode}/dist/tar/{scylla_product}-tools-package.tar.gz | always\n')
        f.write(f'  mode = {mode}\n')
        f.write('rule libdeflate.{mode}\n'.format(**locals()))
        f.write('  command = make -C libdeflate BUILD_DIR=../$builddir/{mode}/libdeflate/ CFLAGS="{libdeflate_cflags}" CC={args.cc} ../$builddir/{mode}/libdeflate//libdeflate.a\n'.format(**locals()))
@@ -1843,12 +1836,12 @@ with open(buildfile_tmp, 'w') as f:
    )

    f.write(textwrap.dedent(f'''\
-        build dist-unified-tar: phony {' '.join(['$builddir/{mode}/scylla-unified-package-$scylla_version.$scylla_release.tar.gz'.format(mode=mode) for mode in build_modes])}
+        build dist-unified-tar: phony {' '.join([f'$builddir/{mode}/dist/tar/{scylla_product}-unified-package-{scylla_version}.{scylla_release}.tar.gz' for mode in build_modes])}
        build dist-unified: phony dist-unified-tar

        build dist-server-deb: phony {' '.join(['$builddir/dist/{mode}/debian'.format(mode=mode) for mode in build_modes])}
        build dist-server-rpm: phony {' '.join(['$builddir/dist/{mode}/redhat'.format(mode=mode) for mode in build_modes])}
-        build dist-server-tar: phony {' '.join(['$builddir/{mode}/scylla-package.tar.gz'.format(mode=mode) for mode in build_modes])}
+        build dist-server-tar: phony {' '.join(['$builddir/{mode}/dist/tar/{scylla_product}-package.tar.gz'.format(mode=mode, scylla_product=scylla_product) for mode in build_modes])}
        build dist-server: phony dist-server-tar dist-server-rpm dist-server-deb

        rule build-submodule-reloc
@@ -1858,26 +1851,26 @@ with open(buildfile_tmp, 'w') as f:
        rule build-submodule-deb
          command = cd $dir && ./reloc/build_deb.sh --reloc-pkg $artifact

-        build tools/jmx/build/scylla-jmx-package.tar.gz: build-submodule-reloc
+        build tools/jmx/build/{scylla_product}-jmx-package.tar.gz: build-submodule-reloc
          reloc_dir = tools/jmx
-        build dist-jmx-rpm: build-submodule-rpm tools/jmx/build/scylla-jmx-package.tar.gz
+        build dist-jmx-rpm: build-submodule-rpm tools/jmx/build/{scylla_product}-jmx-package.tar.gz
          dir = tools/jmx
-          artifact = $builddir/scylla-jmx-package.tar.gz
-        build dist-jmx-deb: build-submodule-deb tools/jmx/build/scylla-jmx-package.tar.gz
+          artifact = $builddir/{scylla_product}-jmx-package.tar.gz
+        build dist-jmx-deb: build-submodule-deb tools/jmx/build/{scylla_product}-jmx-package.tar.gz
          dir = tools/jmx
-          artifact = $builddir/scylla-jmx-package.tar.gz
-        build dist-jmx-tar: phony {' '.join(['$builddir/{mode}/dist/tar/scylla-jmx-package.tar.gz'.format(mode=mode) for mode in build_modes])}
+          artifact = $builddir/{scylla_product}-jmx-package.tar.gz
+        build dist-jmx-tar: phony {' '.join(['$builddir/{mode}/dist/tar/{scylla_product}-jmx-package.tar.gz'.format(mode=mode, scylla_product=scylla_product) for mode in build_modes])}
        build dist-jmx: phony dist-jmx-tar dist-jmx-rpm dist-jmx-deb

-        build tools/java/build/scylla-tools-package.tar.gz: build-submodule-reloc
+        build tools/java/build/{scylla_product}-tools-package.tar.gz: build-submodule-reloc
          reloc_dir = tools/java
-        build dist-tools-rpm: build-submodule-rpm tools/java/build/scylla-tools-package.tar.gz
+        build dist-tools-rpm: build-submodule-rpm tools/java/build/{scylla_product}-tools-package.tar.gz
          dir = tools/java
-          artifact = $builddir/scylla-tools-package.tar.gz
-        build dist-tools-deb: build-submodule-deb tools/java/build/scylla-tools-package.tar.gz
+          artifact = $builddir/{scylla_product}-tools-package.tar.gz
+        build dist-tools-deb: build-submodule-deb tools/java/build/{scylla_product}-tools-package.tar.gz
          dir = tools/java
-          artifact = $builddir/scylla-tools-package.tar.gz
-        build dist-tools-tar: phony {' '.join(['$builddir/{mode}/dist/tar/scylla-tools-package.tar.gz'.format(mode=mode) for mode in build_modes])}
+          artifact = $builddir/{scylla_product}-tools-package.tar.gz
+        build dist-tools-tar: phony {' '.join(['$builddir/{mode}/dist/tar/{scylla_product}-tools-package.tar.gz'.format(mode=mode, scylla_product=scylla_product) for mode in build_modes])}
        build dist-tools: phony dist-tools-tar dist-tools-rpm dist-tools-deb

        rule compat-python3-reloc
@@ -1886,27 +1879,27 @@ with open(buildfile_tmp, 'w') as f:
          command = cd $dir && ./reloc/build_rpm.sh --reloc-pkg $artifact --builddir ../../build/redhat
        rule compat-python3-deb
          command = cd $dir && ./reloc/build_deb.sh --reloc-pkg $artifact --builddir ../../build/debian
-        build $builddir/release/scylla-python3-package.tar.gz: compat-python3-reloc tools/python3/build/scylla-python3-package.tar.gz
+        build $builddir/release/{scylla_product}-python3-package.tar.gz: compat-python3-reloc tools/python3/build/{scylla_product}-python3-package.tar.gz
          dir = tools/python3
-          artifact = $builddir/scylla-python3-package.tar.gz
-        build compat-python3-rpm: compat-python3-rpm tools/python3/build/scylla-python3-package.tar.gz
+          artifact = $builddir/{scylla_product}-python3-package.tar.gz
+        build compat-python3-rpm: compat-python3-rpm tools/python3/build/{scylla_product}-python3-package.tar.gz
          dir = tools/python3
-          artifact = $builddir/scylla-python3-package.tar.gz
-        build compat-python3-deb: compat-python3-deb tools/python3/build/scylla-python3-package.tar.gz
+          artifact = $builddir/{scylla_product}-python3-package.tar.gz
+        build compat-python3-deb: compat-python3-deb tools/python3/build/{scylla_product}-python3-package.tar.gz
          dir = tools/python3
-          artifact = $builddir/scylla-python3-package.tar.gz
+          artifact = $builddir/{scylla_product}-python3-package.tar.gz

-        build tools/python3/build/scylla-python3-package.tar.gz: build-submodule-reloc
+        build tools/python3/build/{scylla_product}-python3-package.tar.gz: build-submodule-reloc
          reloc_dir = tools/python3
          args = --packages "{python3_dependencies}"
-        build dist-python3-rpm: build-submodule-rpm tools/python3/build/scylla-python3-package.tar.gz
+        build dist-python3-rpm: build-submodule-rpm tools/python3/build/{scylla_product}-python3-package.tar.gz
          dir = tools/python3
-          artifact = $builddir/scylla-python3-package.tar.gz
-        build dist-python3-deb: build-submodule-deb tools/python3/build/scylla-python3-package.tar.gz
+          artifact = $builddir/{scylla_product}-python3-package.tar.gz
+        build dist-python3-deb: build-submodule-deb tools/python3/build/{scylla_product}-python3-package.tar.gz
          dir = tools/python3
-          artifact = $builddir/scylla-python3-package.tar.gz
-        build dist-python3-tar: phony {' '.join(['$builddir/{mode}/dist/tar/scylla-python3-package.tar.gz'.format(mode=mode) for mode in build_modes])}
-        build dist-python3: phony dist-python3-tar dist-python3-rpm dist-python3-deb $builddir/release/scylla-python3-package.tar.gz compat-python3-rpm compat-python3-deb
+          artifact = $builddir/{scylla_product}-python3-package.tar.gz
+        build dist-python3-tar: phony {' '.join(['$builddir/{mode}/dist/tar/{scylla_product}-python3-package.tar.gz'.format(mode=mode, scylla_product=scylla_product) for mode in build_modes])}
+        build dist-python3: phony dist-python3-tar dist-python3-rpm dist-python3-deb $builddir/release/{scylla_product}-python3-package.tar.gz compat-python3-rpm compat-python3-deb
        build dist-deb: phony dist-server-deb dist-python3-deb dist-jmx-deb dist-tools-deb
        build dist-rpm: phony dist-server-rpm dist-python3-rpm dist-jmx-rpm dist-tools-rpm
        build dist-tar: phony dist-unified-tar dist-server-tar dist-python3-tar dist-jmx-tar dist-tools-tar
@@ -1921,9 +1914,9 @@ with open(buildfile_tmp, 'w') as f:
        '''))
    for mode in build_modes:
        f.write(textwrap.dedent(f'''\
-        build $builddir/{mode}/dist/tar/scylla-python3-package.tar.gz: copy tools/python3/build/scylla-python3-package.tar.gz
-        build $builddir/{mode}/dist/tar/scylla-tools-package.tar.gz: copy tools/java/build/scylla-tools-package.tar.gz
-        build $builddir/{mode}/dist/tar/scylla-jmx-package.tar.gz: copy tools/jmx/build/scylla-jmx-package.tar.gz
+        build $builddir/{mode}/dist/tar/{scylla_product}-python3-package.tar.gz: copy tools/python3/build/{scylla_product}-python3-package.tar.gz
+        build $builddir/{mode}/dist/tar/{scylla_product}-tools-package.tar.gz: copy tools/java/build/{scylla_product}-tools-package.tar.gz
+        build $builddir/{mode}/dist/tar/{scylla_product}-jmx-package.tar.gz: copy tools/jmx/build/{scylla_product}-jmx-package.tar.gz

        build dist-{mode}: phony dist-server-{mode} dist-python3-{mode} dist-tools-{mode} dist-jmx-{mode} dist-unified-{mode}
        build dist-check-{mode}: dist-check
@@ -1949,6 +1942,13 @@ with open(buildfile_tmp, 'w') as f:
        build mode_list: mode_list
        default {modes_list}
        ''').format(modes_list=' '.join(default_modes), **globals()))
+    unit_test_list = set(test for test in build_artifacts if test in set(tests))
+    f.write(textwrap.dedent('''\
+        rule unit_test_list
+            command = /usr/bin/env echo -e '{unit_test_list}'
+            description = List configured unit tests
+        build unit_test_list: unit_test_list
+        ''').format(unit_test_list="\\n".join(unit_test_list)))
    f.write(textwrap.dedent('''\
        build always: phony
        rule scylla_version_gen
@@ -1957,6 +1957,6 @@ with open(buildfile_tmp, 'w') as f:
        rule debian_files_gen
            command = ./dist/debian/debian_files_gen.py
        build $builddir/debian/debian: debian_files_gen | always
-        ''').format(modes_list=' '.join(build_modes), **globals()))
+        ''').format(**globals()))

 os.rename(buildfile_tmp, buildfile)
--- a/connection_notifier.cc
+++ b/connection_notifier.cc
@@ -20,44 +20,47 @@
 */

 #include "connection_notifier.hh"
-#include "db/query_context.hh"
 #include "cql3/constants.hh"
 #include "database.hh"
-#include "service/storage_proxy.hh"

 #include <stdexcept>

-namespace db::system_keyspace {
-extern const char *const CLIENTS;
-}
-
-static sstring to_string(client_type ct) {
+sstring to_string(client_type ct) {
    switch (ct) {
        case client_type::cql: return "cql";
        case client_type::thrift: return "thrift";
        case client_type::alternator: return "alternator";
-        default: throw std::runtime_error("Invalid client_type");
    }
+    throw std::runtime_error("Invalid client_type");
+}
+
+static sstring to_string(client_connection_stage ccs) {
+    switch (ccs) {
+        case client_connection_stage::established: return connection_stage_literal<client_connection_stage::established>;
+        case client_connection_stage::authenticating: return connection_stage_literal<client_connection_stage::authenticating>;
+        case client_connection_stage::ready: return connection_stage_literal<client_connection_stage::ready>;
+    }
+    throw std::runtime_error("Invalid client_connection_stage");
 }

 future<> notify_new_client(client_data cd) {
    // FIXME: consider prepared statement
    const static sstring req
-            = format("INSERT INTO system.{} (address, port, client_type, shard_id, protocol_version, username) "
-                     "VALUES (?, ?, ?, ?, ?, ?);", db::system_keyspace::CLIENTS);
+            = format("INSERT INTO system.{} (address, port, client_type, connection_stage, shard_id, protocol_version, username) "
+                     "VALUES (?, ?, ?, ?, ?, ?, ?);", db::system_keyspace::CLIENTS);
    
-    return db::execute_cql(req,
-            std::move(cd.ip), cd.port, to_string(cd.ct), cd.shard_id,
+    return db::qctx->execute_cql(req,
+            std::move(cd.ip), cd.port, to_string(cd.ct), to_string(cd.connection_stage), cd.shard_id,
            cd.protocol_version.has_value() ? data_value(*cd.protocol_version) : data_value::make_null(int32_type),
            cd.username.value_or("anonymous")).discard_result();
 }

-future<> notify_disconnected_client(gms::inet_address addr, client_type ct, int port) {
+future<> notify_disconnected_client(net::inet_address addr, int port, client_type ct) {
    // FIXME: consider prepared statement
    const static sstring req
            = format("DELETE FROM system.{} where address=? AND port=? AND client_type=?;",
                     db::system_keyspace::CLIENTS);
-    return db::execute_cql(req, addr.addr(), port, to_string(ct)).discard_result();
+    return db::qctx->execute_cql(req, std::move(addr), port, to_string(ct)).discard_result();
 }

 future<> clear_clientlist() {
--- a/connection_notifier.hh
+++ b/connection_notifier.hh
@@ -20,27 +20,65 @@
 */
 #pragma once

-#include "gms/inet_address.hh"
+#include "db/query_context.hh"
+
+#include <seastar/net/inet_address.hh>
 #include <seastar/core/sstring.hh>
+#include "seastarx.hh"
+
 #include <optional>

+namespace db::system_keyspace {
+extern const char *const CLIENTS;
+}
+
 enum class client_type {
    cql = 0,
    thrift,
    alternator,
 };

+sstring to_string(client_type ct);
+
+enum class changed_column {
+    username = 0,
+    connection_stage,
+    driver_name,
+    driver_version,
+    hostname,
+    protocol_version,
+};
+
+template <changed_column column> constexpr const char* column_literal = "";
+template <> inline constexpr const char* column_literal<changed_column::username> = "username";
+template <> inline constexpr const char* column_literal<changed_column::connection_stage> = "connection_stage";
+template <> inline constexpr const char* column_literal<changed_column::driver_name> = "driver_name";
+template <> inline constexpr const char* column_literal<changed_column::driver_version> = "driver_version";
+template <> inline constexpr const char* column_literal<changed_column::hostname> = "hostname";
+template <> inline constexpr const char* column_literal<changed_column::protocol_version> = "protocol_version";
+
+enum class client_connection_stage {
+    established = 0,
+    authenticating,
+    ready,
+};
+
+template <client_connection_stage ccs> constexpr const char* connection_stage_literal = "";
+template <> inline constexpr const char* connection_stage_literal<client_connection_stage::established> = "ESTABLISHED";
+template <> inline constexpr const char* connection_stage_literal<client_connection_stage::authenticating> = "AUTHENTICATING";
+template <> inline constexpr const char* connection_stage_literal<client_connection_stage::ready> = "READY";
+
 // Representation of a row in `system.clients'. std::optionals are for nullable cells.
 struct client_data {
-    gms::inet_address ip;
+    net::inet_address ip;
    int32_t port;
    client_type ct;
+    client_connection_stage connection_stage = client_connection_stage::established;
    int32_t shard_id;  /// ID of server-side shard which is processing the connection.

    // `optional' column means that it's nullable (possibly because it's
    // unimplemented yet). If you want to fill ("implement") any of them,
    // remember to update the query in `notify_new_client()'.
-    std::optional<sstring> connection_stage;
    std::optional<sstring> driver_name;
    std::optional<sstring> driver_version;
    std::optional<sstring> hostname;
@@ -52,6 +90,17 @@ struct client_data {
 };

 future<> notify_new_client(client_data cd);
-future<> notify_disconnected_client(gms::inet_address addr, client_type ct, int port);
-
+future<> notify_disconnected_client(net::inet_address addr, int port, client_type ct);
 future<> clear_clientlist();
+
+template <changed_column column_enum_val>
+struct notify_client_change {
+    template <typename T>
+    future<> operator()(net::inet_address addr, int port, client_type ct, T&& value) {
+        const static sstring req
+                = format("UPDATE system.{} SET {}=? WHERE address=? AND port=? AND client_type=?;",
+                        db::system_keyspace::CLIENTS, column_literal<column_enum_val>);
+
+        return db::qctx->execute_cql(req, std::forward<T>(value), std::move(addr), port, to_string(ct)).discard_result();
+    }
+};
--- a/counters.hh
+++ b/counters.hh
@@ -277,7 +277,14 @@ public:
        return ac;
    }

-    class inserter_iterator : public std::iterator<std::output_iterator_tag, counter_shard> {
+    class inserter_iterator {
+    public:
+        using iterator_category = std::output_iterator_tag;
+        using value_type = counter_shard;
+        using difference_type = std::ptrdiff_t;
+        using pointer = counter_shard*;
+        using reference = counter_shard&;
+    private:
        counter_cell_builder* _builder;
    public:
        explicit inserter_iterator(counter_cell_builder& b) : _builder(&b) { }
@@ -311,7 +318,14 @@ protected:
    basic_atomic_cell_view<is_mutable> _cell;
    linearized_value_view _value;
 private:
-    class shard_iterator : public std::iterator<std::input_iterator_tag, basic_counter_shard_view<is_mutable>> {
+    class shard_iterator {
+    public:
+        using iterator_category = std::input_iterator_tag;
+        using value_type = basic_counter_shard_view<is_mutable>;
+        using difference_type = std::ptrdiff_t;
+        using pointer = basic_counter_shard_view<is_mutable>*;
+        using reference = basic_counter_shard_view<is_mutable>&;
+    private:
        pointer_type _current;
        basic_counter_shard_view<is_mutable> _current_view;
    public:
--- a/cql3/constants.hh
+++ b/cql3/constants.hh
@@ -192,12 +192,9 @@ public:

        virtual ::shared_ptr<terminal> bind(const query_options& options) override {
            auto bytes = bind_and_get(options);
-            if (bytes.is_null()) {
+            if (!bytes) {
                return ::shared_ptr<terminal>{};
            }
-            if (bytes.is_unset_value()) {
-                return UNSET_VALUE;
-            }
            return ::make_shared<constants::value>(std::move(cql3::raw_value::make_value(to_bytes(*bytes))));
        }
    };
--- a/cql3/expr/expression.cc
+++ b/cql3/expr/expression.cc
@@ -27,9 +27,7 @@
 #include <fmt/ostream.h>
 #include <unordered_map>

-#include "cql3/constants.hh"
 #include "cql3/lists.hh"
-#include "cql3/statements/request_validations.hh"
 #include "cql3/tuples.hh"
 #include "index/secondary_index_manager.hh"
 #include "types/list.hh"
@@ -419,8 +417,6 @@ bool is_one_of(const column_value& col, term& rhs, const column_value_eval_bag&
    } else if (auto mkr = dynamic_cast<lists::marker*>(&rhs)) {
        // This is `a IN ?`.  RHS elements are values representable as bytes_opt.
        const auto values = static_pointer_cast<lists::value>(mkr->bind(bag.options));
-        statements::request_validations::check_not_null(
-                values, "Invalid null value for column %s", col.col->name_as_text());
        return boost::algorithm::any_of(values->get_elements(), [&] (const bytes_opt& b) {
                return equal(b, col, bag);
            });
@@ -572,8 +568,7 @@ const auto deref = boost::adaptors::transformed([] (const bytes_opt& b) { return

 /// Returns possible values from t, which must be RHS of IN.
 value_list get_IN_values(
-        const ::shared_ptr<term>& t, const query_options& options, const serialized_compare& comparator,
-        sstring_view column_name) {
+        const ::shared_ptr<term>& t, const query_options& options, const serialized_compare& comparator) {
    // RHS is prepared differently for different CQL cases.  Cast it dynamically to discern which case this is.
    if (auto dv = dynamic_pointer_cast<lists::delayed_value>(t)) {
        // Case `a IN (1,2,3)`.
@@ -583,12 +578,8 @@ value_list get_IN_values(
        return to_sorted_vector(std::move(result_range), comparator);
    } else if (auto mkr = dynamic_pointer_cast<lists::marker>(t)) {
        // Case `a IN ?`.  Collect all list-element values.
-        const auto val = mkr->bind(options);
-        if (val == constants::UNSET_VALUE) {
-            throw exceptions::invalid_request_exception(format("Invalid unset value for column {}", column_name));
-        }
-        statements::request_validations::check_not_null(val, "Invalid null value for IN tuple");
-        return to_sorted_vector(static_pointer_cast<lists::value>(val)->get_elements() | non_null | deref, comparator);
+        const auto val = static_pointer_cast<lists::value>(mkr->bind(options));
+        return to_sorted_vector(val->get_elements() | non_null | deref, comparator);
    }
    throw std::logic_error(format("get_IN_values(single column) on invalid term {}", *t));
 }
@@ -619,13 +610,13 @@ static constexpr bool inclusive = true, exclusive = false;
 nonwrapping_range<bytes> to_range(oper_t op, const bytes& val) {
    switch (op) {
    case oper_t::GT:
-        return nonwrapping_range<bytes>::make_starting_with(range_bound(val, exclusive));
+        return nonwrapping_range<bytes>::make_starting_with(interval_bound(val, exclusive));
    case oper_t::GTE:
-        return nonwrapping_range<bytes>::make_starting_with(range_bound(val, inclusive));
+        return nonwrapping_range<bytes>::make_starting_with(interval_bound(val, inclusive));
    case oper_t::LT:
-        return nonwrapping_range<bytes>::make_ending_with(range_bound(val, exclusive));
+        return nonwrapping_range<bytes>::make_ending_with(interval_bound(val, exclusive));
    case oper_t::LTE:
-        return nonwrapping_range<bytes>::make_ending_with(range_bound(val, inclusive));
+        return nonwrapping_range<bytes>::make_ending_with(interval_bound(val, inclusive));
    default:
        throw std::logic_error(format("to_range: unknown comparison operator {}", op));
    }
@@ -695,7 +686,7 @@ value_set possible_lhs_values(const column_definition* cdef, const expression& e
                                return oper.op == oper_t::EQ ? value_set(value_list{*val})
                                        : to_range(oper.op, *val);
                            } else if (oper.op == oper_t::IN) {
-                                return get_IN_values(oper.rhs, options, type->as_less_comparator(), cdef->name_as_text());
+                                return get_IN_values(oper.rhs, options, type->as_less_comparator());
                            }
                            throw std::logic_error(format("possible_lhs_values: unhandled operator {}", oper));
                        },
@@ -740,9 +731,9 @@ value_set possible_lhs_values(const column_definition* cdef, const expression& e
                            if (oper.op == oper_t::EQ) {
                                return value_list{*val};
                            } else if (oper.op == oper_t::GT) {
-                                return nonwrapping_range<bytes>::make_starting_with(range_bound(*val, exclusive));
+                                return nonwrapping_range<bytes>::make_starting_with(interval_bound(*val, exclusive));
                            } else if (oper.op == oper_t::GTE) {
-                                return nonwrapping_range<bytes>::make_starting_with(range_bound(*val, inclusive));
+                                return nonwrapping_range<bytes>::make_starting_with(interval_bound(*val, inclusive));
                            }
                            static const bytes MININT = serialized(std::numeric_limits<int64_t>::min()),
                                    MAXINT = serialized(std::numeric_limits<int64_t>::max());
@@ -750,9 +741,9 @@ value_set possible_lhs_values(const column_definition* cdef, const expression& e
                            // that as MAXINT for some reason.
                            const auto adjusted_val = (*val == MININT) ? serialized(MAXINT) : *val;
                            if (oper.op == oper_t::LT) {
-                                return nonwrapping_range<bytes>::make_ending_with(range_bound(adjusted_val, exclusive));
+                                return nonwrapping_range<bytes>::make_ending_with(interval_bound(adjusted_val, exclusive));
                            } else if (oper.op == oper_t::LTE) {
-                                return nonwrapping_range<bytes>::make_ending_with(range_bound(adjusted_val, inclusive));
+                                return nonwrapping_range<bytes>::make_ending_with(interval_bound(adjusted_val, inclusive));
                            }
                            throw std::logic_error(format("get_token_interval invalid operator {}", oper.op));
                        },
--- a/cql3/functions/functions.cc
+++ b/cql3/functions/functions.cc
@@ -76,7 +76,7 @@ functions::init() noexcept {
    // that has less information in it. Given how unlikely it is that
    // we will run out of memory this early, having a better core dump
    // if we do seems like a good trade-off.
-    memory::disable_failure_guard dfg;
+    memory::scoped_critical_alloc_section dfg;

    std::unordered_multimap<function_name, shared_ptr<function>> ret;
    auto declare = [&ret] (shared_ptr<function> f) { ret.emplace(f->name(), f); };
--- a/cql3/maps.cc
+++ b/cql3/maps.cc
@@ -305,12 +305,6 @@ maps::setter_by_key::execute(mutation& m, const clustering_key_prefix& prefix, c
    assert(column.type->is_multi_cell()); // "Attempted to set a value for a single key on a frozen map"m
    auto key = _k->bind_and_get(params._options);
    auto value = _t->bind_and_get(params._options);
-    if (value.is_unset_value()) {
-        return;
-    }
-    if (key.is_unset_value() || value.is_unset_value()) {
-        throw invalid_request_exception("Invalid unset map key");
-    }
    if (!key) {
        throw invalid_request_exception("Invalid null map key");
    }
--- a/cql3/query_options.cc
+++ b/cql3/query_options.cc
@@ -50,12 +50,11 @@ const cql_config default_cql_config;
 thread_local const query_options::specific_options query_options::specific_options::DEFAULT{-1, {}, {}, api::missing_timestamp};

 thread_local query_options query_options::DEFAULT{default_cql_config,
-    db::consistency_level::ONE, infinite_timeout_config, std::nullopt,
+    db::consistency_level::ONE, std::nullopt,
    std::vector<cql3::raw_value_view>(), false, query_options::specific_options::DEFAULT, cql_serialization_format::latest()};

 query_options::query_options(const cql_config& cfg,
                           db::consistency_level consistency,
-                           const ::timeout_config& timeout_config,
                           std::optional<std::vector<sstring_view>> names,
                           std::vector<cql3::raw_value> values,
                           std::vector<cql3::raw_value_view> value_views,
@@ -64,7 +63,6 @@ query_options::query_options(const cql_config& cfg,
                           cql_serialization_format sf)
   : _cql_config(cfg)
   , _consistency(consistency)
-   , _timeout_config(timeout_config)
   , _names(std::move(names))
   , _values(std::move(values))
   , _value_views(value_views)
@@ -76,7 +74,6 @@ query_options::query_options(const cql_config& cfg,

 query_options::query_options(const cql_config& cfg,
                             db::consistency_level consistency,
-                             const ::timeout_config& timeout_config,
                             std::optional<std::vector<sstring_view>> names,
                             std::vector<cql3::raw_value> values,
                             bool skip_metadata,
@@ -84,7 +81,6 @@ query_options::query_options(const cql_config& cfg,
                             cql_serialization_format sf)
    : _cql_config(cfg)
    , _consistency(consistency)
-    , _timeout_config(timeout_config)
    , _names(std::move(names))
    , _values(std::move(values))
    , _value_views()
@@ -97,7 +93,6 @@ query_options::query_options(const cql_config& cfg,

 query_options::query_options(const cql_config& cfg,
                             db::consistency_level consistency,
-                             const ::timeout_config& timeout_config,
                             std::optional<std::vector<sstring_view>> names,
                             std::vector<cql3::raw_value_view> value_views,
                             bool skip_metadata,
@@ -105,7 +100,6 @@ query_options::query_options(const cql_config& cfg,
                             cql_serialization_format sf)
    : _cql_config(cfg)
    , _consistency(consistency)
-    , _timeout_config(timeout_config)
    , _names(std::move(names))
    , _values()
    , _value_views(std::move(value_views))
@@ -115,12 +109,11 @@ query_options::query_options(const cql_config& cfg,
 {
 }

-query_options::query_options(db::consistency_level cl, const ::timeout_config& timeout_config, std::vector<cql3::raw_value> values,
+query_options::query_options(db::consistency_level cl, std::vector<cql3::raw_value> values,
        specific_options options)
    : query_options(
          default_cql_config,
          cl,
-          timeout_config,
          {},
          std::move(values),
          false,
@@ -133,7 +126,6 @@ query_options::query_options(db::consistency_level cl, const ::timeout_config& t
 query_options::query_options(std::unique_ptr<query_options> qo, lw_shared_ptr<service::pager::paging_state> paging_state)
        : query_options(qo->_cql_config,
        qo->_consistency,
-        qo->get_timeout_config(),
        std::move(qo->_names),
        std::move(qo->_values),
        std::move(qo->_value_views),
@@ -146,7 +138,6 @@ query_options::query_options(std::unique_ptr<query_options> qo, lw_shared_ptr<se
 query_options::query_options(std::unique_ptr<query_options> qo, lw_shared_ptr<service::pager::paging_state> paging_state, int32_t page_size)
        : query_options(qo->_cql_config,
        qo->_consistency,
-        qo->get_timeout_config(),
        std::move(qo->_names),
        std::move(qo->_values),
        std::move(qo->_value_views),
@@ -158,7 +149,7 @@ query_options::query_options(std::unique_ptr<query_options> qo, lw_shared_ptr<se

 query_options::query_options(std::vector<cql3::raw_value> values)
    : query_options(
-          db::consistency_level::ONE, infinite_timeout_config, std::move(values))
+          db::consistency_level::ONE, std::move(values))
 {}

 void query_options::prepare(const std::vector<lw_shared_ptr<column_specification>>& specs)
--- a/cql3/query_options.hh
+++ b/cql3/query_options.hh
@@ -51,7 +51,6 @@
 #include "cql3/column_identifier.hh"
 #include "cql3/values.hh"
 #include "cql_serialization_format.hh"
-#include "timeout_config.hh"

 namespace cql3 {

@@ -75,7 +74,6 @@ public:
 private:
    const cql_config& _cql_config;
    const db::consistency_level _consistency;
-    const timeout_config& _timeout_config;
    const std::optional<std::vector<sstring_view>> _names;
    std::vector<cql3::raw_value> _values;
    std::vector<cql3::raw_value_view> _value_views;
@@ -109,7 +107,6 @@ public:

    explicit query_options(const cql_config& cfg,
                           db::consistency_level consistency,
-                           const timeout_config& timeouts,
                           std::optional<std::vector<sstring_view>> names,
                           std::vector<cql3::raw_value> values,
                           bool skip_metadata,
@@ -117,7 +114,6 @@ public:
                           cql_serialization_format sf);
    explicit query_options(const cql_config& cfg,
                           db::consistency_level consistency,
-                           const timeout_config& timeouts,
                           std::optional<std::vector<sstring_view>> names,
                           std::vector<cql3::raw_value> values,
                           std::vector<cql3::raw_value_view> value_views,
@@ -126,7 +122,6 @@ public:
                           cql_serialization_format sf);
    explicit query_options(const cql_config& cfg,
                           db::consistency_level consistency,
-                           const timeout_config& timeouts,
                           std::optional<std::vector<sstring_view>> names,
                           std::vector<cql3::raw_value_view> value_views,
                           bool skip_metadata,
@@ -158,13 +153,10 @@ public:

    // forInternalUse
    explicit query_options(std::vector<cql3::raw_value> values);
-    explicit query_options(db::consistency_level, const timeout_config& timeouts,
-            std::vector<cql3::raw_value> values, specific_options options = specific_options::DEFAULT);
+    explicit query_options(db::consistency_level, std::vector<cql3::raw_value> values, specific_options options = specific_options::DEFAULT);
    explicit query_options(std::unique_ptr<query_options>, lw_shared_ptr<service::pager::paging_state> paging_state);
    explicit query_options(std::unique_ptr<query_options>, lw_shared_ptr<service::pager::paging_state> paging_state, int32_t page_size);

-    const timeout_config& get_timeout_config() const { return _timeout_config; }
-
    db::consistency_level get_consistency() const {
        return _consistency;
    }
@@ -258,7 +250,7 @@ query_options::query_options(query_options&& o, std::vector<OneMutationDataRange
    std::vector<query_options> tmp;
    tmp.reserve(values_ranges.size());
    std::transform(values_ranges.begin(), values_ranges.end(), std::back_inserter(tmp), [this](auto& values_range) {
-        return query_options(_cql_config, _consistency, _timeout_config, {}, std::move(values_range), _skip_metadata, _options, _cql_serialization_format);
+        return query_options(_cql_config, _consistency, {}, std::move(values_range), _skip_metadata, _options, _cql_serialization_format);
    });
    _batch_options = std::move(tmp);
 }
--- a/cql3/query_processor.cc
+++ b/cql3/query_processor.cc
@@ -61,8 +61,6 @@ logging::logger log("query_processor");
 logging::logger prep_cache_log("prepared_statements_cache");
 logging::logger authorized_prepared_statements_cache_log("authorized_prepared_statements_cache");

-distributed<query_processor> _the_query_processor;
-
 const sstring query_processor::CQL_VERSION = "3.3.1";

 const std::chrono::minutes prepared_statements_cache::entry_expiry = std::chrono::minutes(60);
@@ -621,7 +619,6 @@ query_options query_processor::make_internal_options(
        const statements::prepared_statement::checked_weak_ptr& p,
        const std::initializer_list<data_value>& values,
        db::consistency_level cl,
-        const timeout_config& timeout_config,
        int32_t page_size) const {
    if (p->bound_names.size() != values.size()) {
        throw std::invalid_argument(
@@ -645,11 +642,10 @@ query_options query_processor::make_internal_options(
        api::timestamp_type ts = api::missing_timestamp;
        return query_options(
                cl,
-                timeout_config,
                bound_values,
                cql3::query_options::specific_options{page_size, std::move(paging_state), serial_consistency, ts});
    }
-    return query_options(cl, timeout_config, bound_values);
+    return query_options(cl, bound_values);
 }

 statements::prepared_statement::checked_weak_ptr query_processor::prepare_internal(const sstring& query_string) {
@@ -673,7 +669,7 @@ struct internal_query_state {
 ::shared_ptr<internal_query_state> query_processor::create_paged_state(const sstring& query_string,
        const std::initializer_list<data_value>& values, int32_t page_size) {
    auto p = prepare_internal(query_string);
-    auto opts = make_internal_options(p, values, db::consistency_level::ONE, infinite_timeout_config, page_size);
+    auto opts = make_internal_options(p, values, db::consistency_level::ONE, page_size);
    ::shared_ptr<internal_query_state> res = ::make_shared<internal_query_state>(
            internal_query_state{
                    query_string,
@@ -791,7 +787,16 @@ future<::shared_ptr<untyped_result_set>>
 query_processor::execute_internal(
        const sstring& query_string,
        db::consistency_level cl,
-        const timeout_config& timeout_config,
+        const std::initializer_list<data_value>& values,
+        bool cache) {
+    return execute_internal(query_string, cl, *_internal_state, values, cache);
+}
+
+future<::shared_ptr<untyped_result_set>>
+query_processor::execute_internal(
+        const sstring& query_string,
+        db::consistency_level cl,
+        service::query_state& query_state,
        const std::initializer_list<data_value>& values,
        bool cache) {

@@ -799,13 +804,13 @@ query_processor::execute_internal(
        log.trace("execute_internal: {}\"{}\" ({})", cache ? "(cached) " : "", query_string, ::join(", ", values));
    }
    if (cache) {
-        return execute_with_params(prepare_internal(query_string), cl, timeout_config, values);
+        return execute_with_params(prepare_internal(query_string), cl, query_state, values);
    } else {
        auto p = parse_statement(query_string)->prepare(_db, _cql_stats);
        p->statement->raw_cql_statement = query_string;
        p->statement->validate(_proxy, *_internal_state);
        auto checked_weak_ptr = p->checked_weak_from_this();
-        return execute_with_params(std::move(checked_weak_ptr), cl, timeout_config, values).finally([p = std::move(p)] {});
+        return execute_with_params(std::move(checked_weak_ptr), cl, query_state, values).finally([p = std::move(p)] {});
    }
 }

@@ -813,11 +818,11 @@ future<::shared_ptr<untyped_result_set>>
 query_processor::execute_with_params(
        statements::prepared_statement::checked_weak_ptr p,
        db::consistency_level cl,
-        const timeout_config& timeout_config,
+        service::query_state& query_state,
        const std::initializer_list<data_value>& values) {
-    auto opts = make_internal_options(p, values, cl, timeout_config);
-    return do_with(std::move(opts), [this, p = std::move(p)](auto & opts) {
-        return p->statement->execute(_proxy, *_internal_state, opts).then([](auto msg) {
+    auto opts = make_internal_options(p, values, cl);
+    return do_with(std::move(opts), [this, &query_state, p = std::move(p)](auto & opts) {
+        return p->statement->execute(_proxy, query_state, opts).then([](auto msg) {
            return make_ready_future<::shared_ptr<untyped_result_set>>(::make_shared<untyped_result_set>(msg));
        });
    });
--- a/cql3/query_processor.hh
+++ b/cql3/query_processor.hh
@@ -215,8 +215,7 @@ public:
    // creating namespaces, etc) is explicitly forbidden via this interface.
    future<::shared_ptr<untyped_result_set>>
    execute_internal(const sstring& query_string, const std::initializer_list<data_value>& values = { }) {
-        return execute_internal(query_string, db::consistency_level::ONE,
-                infinite_timeout_config, values, true);
+        return execute_internal(query_string, db::consistency_level::ONE, values, true);
    }

    statements::prepared_statement::checked_weak_ptr prepare_internal(const sstring& query);
@@ -305,14 +304,19 @@ public:
    future<::shared_ptr<untyped_result_set>> execute_internal(
            const sstring& query_string,
            db::consistency_level,
-            const timeout_config& timeout_config,
+            const std::initializer_list<data_value>& = { },
+            bool cache = false);
+    future<::shared_ptr<untyped_result_set>> execute_internal(
+            const sstring& query_string,
+            db::consistency_level,
+            service::query_state& query_state,
            const std::initializer_list<data_value>& = { },
            bool cache = false);

    future<::shared_ptr<untyped_result_set>> execute_with_params(
            statements::prepared_statement::checked_weak_ptr p,
            db::consistency_level,
-            const timeout_config& timeout_config,
+            service::query_state& query_state,
            const std::initializer_list<data_value>& = { });

    future<::shared_ptr<cql_transport::messages::result_message::prepared>>
@@ -341,7 +345,6 @@ private:
            const statements::prepared_statement::checked_weak_ptr& p,
            const std::initializer_list<data_value>&,
            db::consistency_level,
-            const timeout_config& timeout_config,
            int32_t page_size = -1) const;

    future<::shared_ptr<cql_transport::messages::result_message>>
@@ -464,14 +467,4 @@ private:
            ::shared_ptr<cql_statement> statement);
 };

-extern seastar::sharded<query_processor> _the_query_processor;
-
-inline seastar::sharded<query_processor>& get_query_processor() {
-    return _the_query_processor;
-}
-
-inline query_processor& get_local_query_processor() {
-    return _the_query_processor.local();
-}
-
 }
--- a/cql3/restrictions/statement_restrictions.cc
+++ b/cql3/restrictions/statement_restrictions.cc
@@ -193,12 +193,12 @@ statement_restrictions::statement_restrictions(database& db,
    const expr::allow_local_index allow_local(
            !_partition_key_restrictions->has_unrestricted_components(*_schema)
            && _partition_key_restrictions->is_all_eq());
-    const bool has_queriable_clustering_column_index = _clustering_columns_restrictions->has_supporting_index(sim, allow_local);
-    const bool has_queriable_pk_index = _partition_key_restrictions->has_supporting_index(sim, allow_local);
-    const bool has_queriable_regular_index = _nonprimary_key_restrictions->has_supporting_index(sim, allow_local);
+    _has_queriable_ck_index = _clustering_columns_restrictions->has_supporting_index(sim, allow_local);
+    _has_queriable_pk_index = _partition_key_restrictions->has_supporting_index(sim, allow_local);
+    _has_queriable_regular_index = _nonprimary_key_restrictions->has_supporting_index(sim, allow_local);

    // At this point, the select statement if fully constructed, but we still have a few things to validate
-    process_partition_key_restrictions(has_queriable_pk_index, for_view, allow_filtering);
+    process_partition_key_restrictions(for_view, allow_filtering);

    // Some but not all of the partition key columns have been specified;
    // hence we need turn these restrictions into index expressions.
@@ -227,10 +227,10 @@ statement_restrictions::statement_restrictions(database& db,
        }
    }

-    process_clustering_columns_restrictions(has_queriable_clustering_column_index, select_a_collection, for_view, allow_filtering);
+    process_clustering_columns_restrictions(select_a_collection, for_view, allow_filtering);

    // Covers indexes on the first clustering column (among others).
-    if (_is_key_range && has_queriable_clustering_column_index) {
+    if (_is_key_range && _has_queriable_ck_index) {
        _uses_secondary_indexing = true;
    }

@@ -265,7 +265,7 @@ statement_restrictions::statement_restrictions(database& db,
    }

    if (!_nonprimary_key_restrictions->empty()) {
-        if (has_queriable_regular_index) {
+        if (_has_queriable_regular_index) {
            _uses_secondary_indexing = true;
        } else if (!allow_filtering) {
            throw exceptions::invalid_request_exception("Cannot execute this query as it might involve data filtering and "
@@ -401,7 +401,7 @@ std::vector<const column_definition*> statement_restrictions::get_column_defs_fo
    return column_defs_for_filtering;
 }

-void statement_restrictions::process_partition_key_restrictions(bool has_queriable_index, bool for_view, bool allow_filtering) {
+void statement_restrictions::process_partition_key_restrictions(bool for_view, bool allow_filtering) {
    // If there is a queriable index, no special condition are required on the other restrictions.
    // But we still need to know 2 things:
    // - If we don't have a queriable index, is the query ok
@@ -412,17 +412,17 @@ void statement_restrictions::process_partition_key_restrictions(bool has_queriab
        _is_key_range = true;
    } else if (_partition_key_restrictions->empty()) {
        _is_key_range = true;
-        _uses_secondary_indexing = has_queriable_index;
+        _uses_secondary_indexing = _has_queriable_pk_index;
    }

    if (_partition_key_restrictions->needs_filtering(*_schema)) {
-        if (!allow_filtering && !for_view && !has_queriable_index) {
+        if (!allow_filtering && !for_view && !_has_queriable_pk_index) {
            throw exceptions::invalid_request_exception("Cannot execute this query as it might involve data filtering and "
                "thus may have unpredictable performance. If you want to execute "
                "this query despite the performance unpredictability, use ALLOW FILTERING");
        }
        _is_key_range = true;
-        _uses_secondary_indexing = has_queriable_index;
+        _uses_secondary_indexing = _has_queriable_pk_index;
    }

 }
@@ -435,7 +435,7 @@ bool statement_restrictions::has_unrestricted_clustering_columns() const {
    return _clustering_columns_restrictions->has_unrestricted_components(*_schema);
 }

-void statement_restrictions::process_clustering_columns_restrictions(bool has_queriable_index, bool select_a_collection, bool for_view, bool allow_filtering) {
+void statement_restrictions::process_clustering_columns_restrictions(bool select_a_collection, bool for_view, bool allow_filtering) {
    if (!has_clustering_columns_restriction()) {
        return;
    }
@@ -445,13 +445,13 @@ void statement_restrictions::process_clustering_columns_restrictions(bool has_qu
            "Cannot restrict clustering columns by IN relations when a collection is selected by the query");
    }
    if (find_atom(_clustering_columns_restrictions->expression, expr::is_on_collection)
-        && !has_queriable_index && !allow_filtering) {
+        && !_has_queriable_ck_index && !allow_filtering) {
        throw exceptions::invalid_request_exception(
            "Cannot restrict clustering columns by a CONTAINS relation without a secondary index or filtering");
    }

    if (has_clustering_columns_restriction() && _clustering_columns_restrictions->needs_filtering(*_schema)) {
-        if (has_queriable_index) {
+        if (_has_queriable_ck_index) {
            _uses_secondary_indexing = true;
        } else if (!allow_filtering && !for_view) {
            auto clustering_columns_iter = _schema->clustering_key_columns().begin();
@@ -490,24 +490,62 @@ std::vector<query::clustering_range> statement_restrictions::get_clustering_boun
    return _clustering_columns_restrictions->bounds_ranges(options);
 }

-bool statement_restrictions::need_filtering() const {
-    uint32_t number_of_restricted_columns_for_indexing = 0;
-    for (auto&& restrictions : _index_restrictions) {
-        number_of_restricted_columns_for_indexing += restrictions->size();
-    }
+namespace {

-    int number_of_filtering_restrictions = _nonprimary_key_restrictions->size();
-    // If the whole partition key is restricted, it does not imply filtering
-    if (_partition_key_restrictions->has_unrestricted_components(*_schema) || !_partition_key_restrictions->is_all_eq()) {
-        number_of_filtering_restrictions += _partition_key_restrictions->size() + _clustering_columns_restrictions->size();
-    } else if (_clustering_columns_restrictions->has_unrestricted_components(*_schema)) {
-        number_of_filtering_restrictions += _clustering_columns_restrictions->size() - _clustering_columns_restrictions->prefix_size();
+/// True iff get_partition_slice_for_global_index_posting_list() will be able to calculate the token value from the
+/// given restrictions.  Keep in sync with the get_partition_slice_for_global_index_posting_list() source.
+bool token_known(const statement_restrictions& r) {
+    return !r.has_partition_key_unrestricted_components() && r.get_partition_key_restrictions()->is_all_eq();
+}
+
+} // anonymous namespace
+
+bool statement_restrictions::need_filtering() const {
+    using namespace expr;
+
+    const auto npart = _partition_key_restrictions->size();
+    if (npart > 0 && npart < _schema->partition_key_size()) {
+        // Can't calculate the token value, so a naive base-table query must be filtered.  Same for any index tables,
+        // except if there's only one restriction supported by an index.
+        return !(npart == 1 && _has_queriable_pk_index &&
+                 _clustering_columns_restrictions->empty() && _nonprimary_key_restrictions->empty());
    }
-    return number_of_restricted_columns_for_indexing > 1
-            || (number_of_restricted_columns_for_indexing == 0 && _partition_key_restrictions->empty() && !_clustering_columns_restrictions->empty())
-            || (number_of_restricted_columns_for_indexing != 0 && _nonprimary_key_restrictions->has_multiple_contains())
-            || (number_of_restricted_columns_for_indexing != 0 && !_uses_secondary_indexing)
-            || (_uses_secondary_indexing && number_of_filtering_restrictions > 1);
+    if (_partition_key_restrictions->needs_filtering(*_schema)) {
+        // We most likely cannot calculate token(s).  Neither base-table nor index-table queries can avoid filtering.
+        return true;
+    }
+    // Now we know the partition key is either unrestricted or fully restricted.
+
+    const auto nreg = _nonprimary_key_restrictions->size();
+    if (nreg > 1 || (nreg == 1 && !_has_queriable_regular_index)) {
+        return true; // Regular columns are unsorted in storage and no single index suffices.
+    }
+    if (nreg == 1) { // Single non-key restriction supported by an index.
+        // Will the index-table query require filtering?  That depends on whether its clustering key is restricted to a
+        // continuous range.  Recall that this clustering key is (token, pk, ck) of the base table.
+        if (npart == 0 && _clustering_columns_restrictions->empty()) {
+            return false; // No clustering key restrictions => whole partitions.
+        }
+        return !token_known(*this) || _clustering_columns_restrictions->needs_filtering(*_schema);
+    }
+    // Now we know there are no nonkey restrictions.
+
+    if (dynamic_pointer_cast<multi_column_restriction>(_clustering_columns_restrictions)) {
+        // Multicolumn bounds mean lexicographic order, implying a continuous clustering range.  Multicolumn IN means a
+        // finite set of continuous ranges.  Multicolumn restrictions cannot currently be combined with single-column
+        // clustering restrictions.  Therefore, a continuous clustering range is guaranteed.
+        return false;
+    }
+    if (!_clustering_columns_restrictions->needs_filtering(*_schema)) { // Guaranteed continuous clustering range.
+        return false;
+    }
+    // Now we know there are some clustering-column restrictions that are out-of-order or not EQ.  A naive base-table
+    // query must be filtered.  What about an index-table query?  That can only avoid filtering if there is exactly one
+    // EQ supported by an index.
+    return !(_clustering_columns_restrictions->size() == 1 && _has_queriable_ck_index);
+
+    // TODO: it is also possible to avoid filtering here if a non-empty CK prefix is specified and token_known, plus
+    // there's exactly one out-of-order-but-index-supported clustering-column restriction.
 }

 void statement_restrictions::validate_secondary_index_selections(bool selects_only_static_columns) {
--- a/cql3/restrictions/statement_restrictions.hh
+++ b/cql3/restrictions/statement_restrictions.hh
@@ -102,6 +102,8 @@ private:
     */
    bool _is_key_range = false;

+    bool _has_queriable_regular_index = false, _has_queriable_pk_index = false, _has_queriable_ck_index = false;
+
 public:
    /**
     * Creates a new empty <code>StatementRestrictions</code>.
@@ -209,7 +211,7 @@ public:
     */
    bool has_unrestricted_clustering_columns() const;
 private:
-    void process_partition_key_restrictions(bool has_queriable_index, bool for_view, bool allow_filtering);
+    void process_partition_key_restrictions(bool for_view, bool allow_filtering);

    /**
     * Processes the clustering column restrictions.
@@ -218,7 +220,7 @@ private:
     * @param select_a_collection <code>true</code> if the query should return a collection column
     * @throws InvalidRequestException if the request is invalid
     */
-    void process_clustering_columns_restrictions(bool has_queriable_index, bool select_a_collection, bool for_view, bool allow_filtering);
+    void process_clustering_columns_restrictions(bool select_a_collection, bool for_view, bool allow_filtering);

    /**
     * Returns the <code>Restrictions</code> for the specified type of columns.
--- a/cql3/sets.cc
+++ b/cql3/sets.cc
@@ -315,7 +315,7 @@ sets::discarder::execute(mutation& m, const clustering_key_prefix& row_key, cons
    assert(column.type->is_multi_cell()); // "Attempted to remove items from a frozen set";

    auto&& value = _t->bind(params._options);
-    if (!value || value == constants::UNSET_VALUE) {
+    if (!value) {
        return;
    }

--- a/cql3/statements/alter_keyspace_statement.cc
+++ b/cql3/statements/alter_keyspace_statement.cc
@@ -93,7 +93,7 @@ void cql3::statements::alter_keyspace_statement::validate(service::storage_proxy

 future<shared_ptr<cql_transport::event::schema_change>> cql3::statements::alter_keyspace_statement::announce_migration(service::storage_proxy& proxy, bool is_local_only) const {
    auto old_ksm = proxy.get_db().local().find_keyspace(_name).metadata();
-    const auto& tm = proxy.get_token_metadata();
+    const auto& tm = *proxy.get_token_metadata_ptr();
    return service::get_local_migration_manager().announce_keyspace_update(_attrs->as_ks_metadata_update(old_ksm, tm), is_local_only).then([this] {
        using namespace cql_transport;
        return ::make_shared<event::schema_change>(
--- a/cql3/statements/alter_table_statement.cc
+++ b/cql3/statements/alter_table_statement.cc
@@ -70,7 +70,9 @@ alter_table_statement::alter_table_statement(shared_ptr<cf_name> name,
 }

 future<> alter_table_statement::check_access(service::storage_proxy& proxy, const service::client_state& state) const {
-    return state.has_column_family_access(keyspace(), column_family(), auth::permission::ALTER);
+    using cdt = auth::command_desc::type;
+    return state.has_column_family_access(keyspace(), column_family(), auth::permission::ALTER,
+                                          _type == type::opts ? cdt::ALTER_WITH_OPTS : cdt::OTHER);
 }

 void alter_table_statement::validate(service::storage_proxy& proxy, const service::client_state& state) const
--- a/cql3/statements/batch_statement.cc
+++ b/cql3/statements/batch_statement.cc
@@ -38,6 +38,7 @@
 */

 #include "batch_statement.hh"
+#include "cql3/util.hh"
 #include "raw/batch_statement.hh"
 #include "db/config.hh"
 #include "db/consistency_level_validations.hh"
@@ -259,6 +260,7 @@ static thread_local inheriting_concrete_execution_stage<

 future<shared_ptr<cql_transport::messages::result_message>> batch_statement::execute(
        service::storage_proxy& storage, service::query_state& state, const query_options& options) const {
+    cql3::util::validate_timestamp(options, _attrs);
    return batch_stage(this, seastar::ref(storage), seastar::ref(state),
                       seastar::cref(options), false, options.get_timestamp(state));
 }
@@ -284,7 +286,7 @@ future<shared_ptr<cql_transport::messages::result_message>> batch_statement::do_
    ++_stats.batches;
    _stats.statements_in_batches += _statements.size();

-    auto timeout = db::timeout_clock::now() + options.get_timeout_config().*get_timeout_config_selector();
+    auto timeout = db::timeout_clock::now() + query_state.get_client_state().get_timeout_config().*get_timeout_config_selector();
    return get_mutations(storage, options, timeout, local, now, query_state).then([this, &storage, &options, timeout, tr_state = query_state.get_trace_state(),
                                                                                                                               permit = query_state.get_permit()] (std::vector<mutation> ms) mutable {
        return execute_without_conditions(storage, std::move(ms), options.get_consistency(), timeout, std::move(tr_state), std::move(permit));
@@ -341,7 +343,7 @@ future<shared_ptr<cql_transport::messages::result_message>> batch_statement::exe
    schema_ptr schema;

    db::timeout_clock::time_point now = db::timeout_clock::now();
-    const timeout_config& cfg = options.get_timeout_config();
+    const timeout_config& cfg = qs.get_client_state().get_timeout_config();
    auto batch_timeout = now + cfg.write_timeout; // Statement timeout.
    auto cas_timeout = now + cfg.cas_timeout;     // Ballot contention timeout.
    auto read_timeout = now + cfg.read_timeout;   // Query timeout.
--- a/cql3/statements/create_index_statement.cc
+++ b/cql3/statements/create_index_statement.cc
@@ -306,13 +306,6 @@ create_index_statement::announce_migration(service::storage_proxy& proxy, bool i
                    format("Index {} is a duplicate of existing index {}", index.name(), existing_index.value().name()));
        }
    }
-    auto index_table_name = secondary_index::index_table_name(accepted_name);
-    if (db.has_schema(keyspace(), index_table_name)) {
-        return make_exception_future<::shared_ptr<cql_transport::event::schema_change>>(
-            exceptions::invalid_request_exception(format("Index {} cannot be created, because table {} already exists",
-                accepted_name, index_table_name))
-        );
-    }
    ++_cql_stats->secondary_index_creates;
    schema_builder builder{schema};
    builder.with_index(index);
--- a/cql3/statements/create_keyspace_statement.cc
+++ b/cql3/statements/create_keyspace_statement.cc
@@ -109,7 +109,7 @@ void create_keyspace_statement::validate(service::storage_proxy&, const service:
 future<shared_ptr<cql_transport::event::schema_change>> create_keyspace_statement::announce_migration(service::storage_proxy& proxy, bool is_local_only) const
 {
    return make_ready_future<>().then([this, p = proxy.shared_from_this(), is_local_only] {
-        const auto& tm = p->get_token_metadata();
+        const auto& tm = *p->get_token_metadata_ptr();
        return service::get_local_migration_manager().announce_new_keyspace(_attrs->as_ks_metadata(_name, tm), is_local_only);
    }).then_wrapped([this] (auto&& f) {
        try {
@@ -147,7 +147,7 @@ future<> cql3::statements::create_keyspace_statement::grant_permissions_to_creat
 future<::shared_ptr<messages::result_message>>
 create_keyspace_statement::execute(service::storage_proxy& proxy, service::query_state& state, const query_options& options) const {
    return schema_altering_statement::execute(proxy, state, options).then([this, p = proxy.shared_from_this()] (::shared_ptr<messages::result_message> msg) {
-        bool multidc = p->get_token_metadata().get_topology().get_datacenter_endpoints().size() > 1;
+        bool multidc = p->get_token_metadata_ptr()->get_topology().get_datacenter_endpoints().size() > 1;
        bool simple = _attrs->get_replication_strategy_class() == "SimpleStrategy";

        if (multidc && simple) {
--- a/cql3/statements/create_table_statement.cc
+++ b/cql3/statements/create_table_statement.cc
@@ -204,6 +204,7 @@ std::unique_ptr<prepared_statement> create_table_statement::raw_statement::prepa
    }

    _properties.validate(db, _properties.properties()->make_schema_extensions(db.extensions()));
+    const bool has_default_ttl = _properties.properties()->get_default_time_to_live() > 0;

    auto stmt = ::make_shared<create_table_statement>(_cf_name, _properties.properties(), _if_not_exists, _static_columns, _properties.properties()->get_id());

@@ -211,6 +212,11 @@ std::unique_ptr<prepared_statement> create_table_statement::raw_statement::prepa
    for (auto&& entry : _definitions) {
        ::shared_ptr<column_identifier> id = entry.first;
        cql3_type pt = entry.second->prepare(db, keyspace());
+
+        if (has_default_ttl && pt.is_counter()) {
+            throw exceptions::invalid_request_exception("Cannot set default_time_to_live on a table with counters");
+        }
+
        if (pt.get_type()->is_multi_cell()) {
            if (pt.get_type()->is_user_type()) {
                // check for multi-cell types (non-frozen UDTs or collections) inside a non-frozen UDT
--- a/cql3/statements/modification_statement.cc
+++ b/cql3/statements/modification_statement.cc
@@ -44,6 +44,7 @@
 #include "cql3/statements/raw/modification_statement.hh"
 #include "cql3/statements/prepared_statement.hh"
 #include "cql3/restrictions/single_column_restriction.hh"
+#include "cql3/util.hh"
 #include "validation.hh"
 #include "db/consistency_level_validations.hh"
 #include <seastar/core/shared_ptr.hh>
@@ -258,6 +259,7 @@ static thread_local inheriting_concrete_execution_stage<

 future<::shared_ptr<cql_transport::messages::result_message>>
 modification_statement::execute(service::storage_proxy& proxy, service::query_state& qs, const query_options& options) const {
+    cql3::util::validate_timestamp(options, attrs);
    return modify_stage(this, seastar::ref(proxy), seastar::ref(qs), seastar::cref(options));
 }

@@ -284,7 +286,7 @@ modification_statement::do_execute(service::storage_proxy& proxy, service::query
 future<>
 modification_statement::execute_without_condition(service::storage_proxy& proxy, service::query_state& qs, const query_options& options) const {
    auto cl = options.get_consistency();
-    auto timeout = db::timeout_clock::now() + options.get_timeout_config().*get_timeout_config_selector();
+    auto timeout = db::timeout_clock::now() + qs.get_client_state().get_timeout_config().*get_timeout_config_selector();
    return get_mutations(proxy, options, timeout, false, options.get_timestamp(qs), qs).then([this, cl, timeout, &proxy, &qs] (auto mutations) {
        if (mutations.empty()) {
            return now();
@@ -300,7 +302,7 @@ modification_statement::execute_with_condition(service::storage_proxy& proxy, se
    auto cl_for_learn = options.get_consistency();
    auto cl_for_paxos = options.check_serial_consistency();
    db::timeout_clock::time_point now = db::timeout_clock::now();
-    const timeout_config& cfg = options.get_timeout_config();
+    const timeout_config& cfg = qs.get_client_state().get_timeout_config();

    auto statement_timeout = now + cfg.write_timeout; // All CAS networking operations run with write timeout.
    auto cas_timeout = now + cfg.cas_timeout;         // When to give up due to contention.
--- a/cql3/statements/permission_altering_statement.cc
+++ b/cql3/statements/permission_altering_statement.cc
@@ -78,11 +78,11 @@ future<> cql3::statements::permission_altering_statement::check_access(service::
    return state.ensure_exists(_resource).then([this, &state] {
        // check that the user has AUTHORIZE permission on the resource or its parents, otherwise reject
        // GRANT/REVOKE.
-        return state.ensure_has_permission(auth::permission::AUTHORIZE, _resource).then([this, &state] {
+        return state.ensure_has_permission({auth::permission::AUTHORIZE, _resource}).then([this, &state] {
            return do_for_each(_permissions, [this, &state](auth::permission p) {
                // TODO: how about we re-write the access check to check a set
                // right away.
-                return state.ensure_has_permission(p, _resource);
+                return state.ensure_has_permission({p, _resource});
            });
        });
    });
--- a/cql3/statements/role-management-statements.cc
+++ b/cql3/statements/role-management-statements.cc
@@ -59,6 +59,7 @@
 #include "gms/feature_service.hh"
 #include "transport/messages/result_message.hh"
 #include "unimplemented.hh"
+#include "concrete_types.hh"

 namespace cql3 {

@@ -105,6 +106,30 @@ future<> create_role_statement::grant_permissions_to_creator(const service::clie
    });
 }

+static void validate_timeout_options(const auth::authentication_options& auth_options) {
+    if (!auth_options.options) {
+        return;
+    }
+    const auto& options = *auth_options.options;
+    auto check_duration = [&] (const sstring& repr) {
+        data_value v = duration_type->deserialize(duration_type->from_string(repr));
+        cql_duration duration = static_pointer_cast<const duration_type_impl>(duration_type)->from_value(v);
+        if (duration.months || duration.days) {
+            throw exceptions::invalid_request_exception("Timeout values cannot be longer than 24h");
+        }
+        if (duration.nanoseconds % 1'000'000 != 0) {
+            throw exceptions::invalid_request_exception("Timeout values must be expressed in millisecond granularity");
+        }
+    };
+
+    for (auto opt : {"read_timeout", "write_timeout"}) {
+        auto it = options.find(opt);
+        if (it != options.end()) {
+            check_duration(it->second);
+        }
+    }
+}
+
 void create_role_statement::validate(service::storage_proxy& p, const service::client_state&) const {
    validate_cluster_support(p);
 }
@@ -113,7 +138,7 @@ future<> create_role_statement::check_access(service::storage_proxy& proxy, cons
    state.ensure_not_anonymous();

    return async([this, &state] {
-        state.ensure_has_permission(auth::permission::CREATE, auth::root_role_resource()).get0();
+        state.ensure_has_permission({auth::permission::CREATE, auth::root_role_resource()}).get0();

        if (*_options.is_superuser) {
            if (!auth::has_superuser(*state.get_auth_service(), *state.user()).get0()) {
@@ -137,9 +162,12 @@ create_role_statement::execute(service::storage_proxy&,
            [this, &state](const auth::role_config& config, const auth::authentication_options& authen_options) {
        const auto& cs = state.get_client_state();
        auto& as = *cs.get_auth_service();
+        validate_timeout_options(authen_options);

        return auth::create_role(as, _role, config, authen_options).then([this, &cs] {
            return grant_permissions_to_creator(cs);
+        }).then([&state] () mutable {
+            return state.get_client_state().update_per_role_params();
        }).then([] {
            return void_result_message();
        }).handle_exception_type([this](const auth::role_already_exists& e) {
@@ -192,7 +220,7 @@ future<> alter_role_statement::check_access(service::storage_proxy& proxy, const
        }

        if (*user.name != _role) {
-            state.ensure_has_permission(auth::permission::ALTER, auth::make_role_resource(_role)).get0();
+            state.ensure_has_permission({auth::permission::ALTER, auth::make_role_resource(_role)}).get0();
        } else {
            const auto alterable_options = state.get_auth_service()->underlying_authenticator().alterable_options();

@@ -224,8 +252,9 @@ alter_role_statement::execute(service::storage_proxy&, service::query_state& sta
            extract_authentication_options(_options),
            [this, &state](const auth::role_config_update& update, const auth::authentication_options& authen_options) {
        auto& as = *state.get_client_state().get_auth_service();
-
-        return auth::alter_role(as, _role, update, authen_options).then([] {
+        return auth::alter_role(as, _role, update, authen_options).then([&state] () mutable {
+            return state.get_client_state().update_per_role_params();
+        }).then([] {
            return void_result_message();
        }).handle_exception_type([](const auth::nonexistant_role& e) {
            return make_exception_future<result_message_ptr>(exceptions::invalid_request_exception(e.what()));
@@ -256,7 +285,7 @@ future<> drop_role_statement::check_access(service::storage_proxy& proxy, const
    state.ensure_not_anonymous();

    return async([this, &state] {
-        state.ensure_has_permission(auth::permission::DROP, auth::make_role_resource(_role)).get0();
+        state.ensure_has_permission({auth::permission::DROP, auth::make_role_resource(_role)}).get0();

        auto& as = *state.get_auth_service();

@@ -305,7 +334,7 @@ future<> list_roles_statement::check_access(service::storage_proxy& proxy, const
    state.ensure_not_anonymous();

    return async([this, &state] {
-        if (state.check_has_permission(auth::permission::DESCRIBE, auth::root_role_resource()).get0()) {
+        if (state.check_has_permission({auth::permission::DESCRIBE, auth::root_role_resource()}).get0()) {
            return;
        }

@@ -404,9 +433,9 @@ list_roles_statement::execute(service::storage_proxy&, service::query_state& sta
        if (!_grantee) {
            // A user with DESCRIBE on the root role resource lists all roles in the system. A user without it lists
            // only the roles granted to them.
-            return cs.check_has_permission(
+            return cs.check_has_permission({
                    auth::permission::DESCRIBE,
-                    auth::root_role_resource()).then([&cs, &rm, &a, query_mode](bool has_describe) {
+                    auth::root_role_resource()}).then([&cs, &rm, &a, query_mode](bool has_describe) {
                if (has_describe) {
                    return rm.query_all().then([&rm, &a](auto&& roles) {
                        return make_results(rm, a, std::move(roles));
@@ -440,7 +469,7 @@ future<> grant_role_statement::check_access(service::storage_proxy& proxy, const
    state.ensure_not_anonymous();

    return do_with(auth::make_role_resource(_role), [this, &state](const auto& r) {
-        return state.ensure_has_permission(auth::permission::AUTHORIZE, r);
+        return state.ensure_has_permission({auth::permission::AUTHORIZE, r});
    });
 }

@@ -468,7 +497,7 @@ future<> revoke_role_statement::check_access(service::storage_proxy& proxy, cons
    state.ensure_not_anonymous();

    return do_with(auth::make_role_resource(_role), [this, &state](const auto& r) {
-        return state.ensure_has_permission(auth::permission::AUTHORIZE, r);
+        return state.ensure_has_permission({auth::permission::AUTHORIZE, r});
    });
 }

--- a/cql3/statements/select_statement.cc
+++ b/cql3/statements/select_statement.cc
@@ -366,7 +366,8 @@ select_statement::do_execute(service::storage_proxy& proxy,
    }

    command->slice.options.set<query::partition_slice::option::allow_short_read>();
-    auto timeout_duration = options.get_timeout_config().*get_timeout_config_selector();
+    auto timeout_duration = state.get_client_state().get_timeout_config().*get_timeout_config_selector();
+    auto timeout = db::timeout_clock::now() + timeout_duration;
    auto p = service::pager::query_pagers::pager(_schema, _selection,
            state, options, command, std::move(key_ranges), restrictions_need_filtering ? _restrictions : nullptr);

@@ -374,10 +375,9 @@ select_statement::do_execute(service::storage_proxy& proxy,
        return do_with(
                cql3::selection::result_set_builder(*_selection, now,
                        options.get_cql_serialization_format(), *_group_by_cell_indices),
-                [this, p, page_size, now, timeout_duration, restrictions_need_filtering](auto& builder) {
+                [this, p, page_size, now, timeout, restrictions_need_filtering](auto& builder) {
                    return do_until([p] {return p->is_exhausted();},
-                            [p, &builder, page_size, now, timeout_duration] {
-                                auto timeout = db::timeout_clock::now() + timeout_duration;
+                            [p, &builder, page_size, now, timeout] {
                                return p->fetch_page(builder, page_size, now, timeout);
                            }
                    ).then([this, p, &builder, restrictions_need_filtering] {
@@ -401,7 +401,6 @@ select_statement::do_execute(service::storage_proxy& proxy,
                        " you must either remove the ORDER BY or the IN and sort client side, or disable paging for this query");
    }

-    auto timeout = db::timeout_clock::now() + timeout_duration;
    if (_selection->is_trivial() && !restrictions_need_filtering && !_per_partition_limit) {
        return p->fetch_page_generator(page_size, now, timeout, _stats).then([this, p] (result_generator generator) {
            auto meta = [&] () -> shared_ptr<const cql3::metadata> {
@@ -456,7 +455,7 @@ generate_base_key_from_index_pk(const partition_key& index_pk, const std::option
        if (!view_col) {
            throw std::runtime_error(format("Base key column not found in the view: {}", base_col.name_as_text()));
        }
-        if (base_col.type->without_reversed() != *view_col->type) {
+        if (base_col.type != view_col->type) {
            throw std::runtime_error(format("Mismatched types for base and view columns {}: {} and {}",
                    base_col.name_as_text(), base_col.type->cql3_type_name(), view_col->type->cql3_type_name()));
        }
@@ -514,9 +513,9 @@ indexed_table_select_statement::do_execute_base_query(
        lw_shared_ptr<const service::pager::paging_state> paging_state) const {
    using value_type = std::tuple<foreign_ptr<lw_shared_ptr<query::result>>, lw_shared_ptr<query::read_command>>;
    auto cmd = prepare_command_for_base_query(proxy, options, state, now, bool(paging_state));
-    auto timeout = db::timeout_clock::now() + options.get_timeout_config().*get_timeout_config_selector();
+    auto timeout = db::timeout_clock::now() + state.get_client_state().get_timeout_config().*get_timeout_config_selector();
    uint32_t queried_ranges_count = partition_ranges.size();
-    service::query_ranges_to_vnodes_generator ranges_to_vnodes(proxy.get_token_metadata(), _schema, std::move(partition_ranges));
+    service::query_ranges_to_vnodes_generator ranges_to_vnodes(proxy.get_token_metadata_ptr(), _schema, std::move(partition_ranges));

    struct base_query_state {
        query::result_merger merger;
@@ -608,7 +607,7 @@ indexed_table_select_statement::do_execute_base_query(
        lw_shared_ptr<const service::pager::paging_state> paging_state) const {
    using value_type = std::tuple<foreign_ptr<lw_shared_ptr<query::result>>, lw_shared_ptr<query::read_command>>;
    auto cmd = prepare_command_for_base_query(proxy, options, state, now, bool(paging_state));
-    auto timeout = db::timeout_clock::now() + options.get_timeout_config().*get_timeout_config_selector();
+    auto timeout = db::timeout_clock::now() + state.get_client_state().get_timeout_config().*get_timeout_config_selector();

    struct base_query_state {
        query::result_merger merger;
@@ -690,7 +689,7 @@ select_statement::execute(service::storage_proxy& proxy,
    // is specified we need to get "limit" rows from each partition since there
    // is no way to tell which of these rows belong to the query result before
    // doing post-query ordering.
-    auto timeout = db::timeout_clock::now() + options.get_timeout_config().*get_timeout_config_selector();
+    auto timeout = db::timeout_clock::now() + state.get_client_state().get_timeout_config().*get_timeout_config_selector();
    if (needs_post_query_ordering() && _limit) {
        return do_with(std::forward<dht::partition_range_vector>(partition_ranges), [this, &proxy, &state, &options, cmd, timeout](auto& prs) {
            assert(cmd->partition_limit == query::max_partitions);
@@ -891,6 +890,23 @@ static void append_base_key_to_index_ck(std::vector<bytes_view>& exploded_index_
    std::move(begin, key_view.end(), std::back_inserter(exploded_index_ck));
 }

+bytes indexed_table_select_statement::compute_idx_token(const partition_key& key) const {
+    const column_definition& cdef = *_view_schema->clustering_key_columns().begin();
+    clustering_row empty_row(clustering_key_prefix::make_empty());
+    bytes_opt computed_value;
+    if (!cdef.is_computed()) {
+        // FIXME(pgrabowski): this legacy code is here for backward compatibility and should be removed
+        // once "computed_columns feature" is supported by every node
+        computed_value = legacy_token_column_computation().compute_value(*_schema, key, empty_row);
+    } else {
+        computed_value = cdef.get_computation().compute_value(*_schema, key, empty_row);
+    }
+    if (!computed_value) {
+        throw std::logic_error(format("No value computed for idx_token column {}", cdef.name()));
+    }
+    return *computed_value;
+}
+
 lw_shared_ptr<const service::pager::paging_state> indexed_table_select_statement::generate_view_paging_state_from_base_query_results(lw_shared_ptr<const service::pager::paging_state> paging_state,
        const foreign_ptr<lw_shared_ptr<query::result>>& results, service::storage_proxy& proxy, service::query_state& state, const query_options& options) const {
    const column_definition* cdef = _schema->get_column_definition(to_bytes(_index.target_column()));
@@ -924,7 +940,7 @@ lw_shared_ptr<const service::pager::paging_state> indexed_table_select_statement
    if (_index.metadata().local()) {
        exploded_index_ck.push_back(bytes_view(*indexed_column_value));
    } else {
-        token_bytes = dht::get_token(*_schema, last_base_pk).data();
+        token_bytes = compute_idx_token(last_base_pk);
        exploded_index_ck.push_back(bytes_view(token_bytes));
        append_base_key_to_index_ck<partition_key>(exploded_index_ck, last_base_pk, *cdef);
    }
@@ -1108,7 +1124,7 @@ query::partition_slice indexed_table_select_statement::get_partition_slice_for_g
            // Computed token column needs to be added to index view restrictions
            const column_definition& token_cdef = *_view_schema->clustering_key_columns().begin();
            auto base_pk = partition_key::from_optional_exploded(*_schema, single_pk_restrictions->values(options));
-            bytes token_value = dht::get_token(*_schema, base_pk).data();
+            bytes token_value = compute_idx_token(base_pk);
            auto token_restriction = ::make_shared<restrictions::single_column_restriction>(token_cdef);
            token_restriction->expression = expr::binary_operator{
                    &token_cdef, expr::oper_t::EQ,
@@ -1120,11 +1136,7 @@ query::partition_slice indexed_table_select_statement::get_partition_slice_for_g
                if (single_ck_restrictions) {
                    auto prefix_restrictions = single_ck_restrictions->get_longest_prefix_restrictions();
                    auto clustering_restrictions_from_base = ::make_shared<restrictions::single_column_clustering_key_restrictions>(_view_schema, *prefix_restrictions);
-                    const auto indexed_column = _view_schema->get_column_definition(to_bytes(_index.target_column()));
                    for (auto restriction_it : clustering_restrictions_from_base->restrictions()) {
-                        if (restriction_it.first == indexed_column) {
-                            continue; // In the index table, the indexed column is the partition (not clustering) key.
-                        }
                        clustering_restrictions->merge_with(restriction_it.second);
                    }
                }
@@ -1238,7 +1250,7 @@ indexed_table_select_statement::find_index_partition_ranges(service::storage_pro
 {
    using value_type = std::tuple<dht::partition_range_vector, lw_shared_ptr<const service::pager::paging_state>>;
    auto now = gc_clock::now();
-    auto timeout = db::timeout_clock::now() + options.get_timeout_config().*get_timeout_config_selector();
+    auto timeout = db::timeout_clock::now() + state.get_client_state().get_timeout_config().*get_timeout_config_selector();
    return read_posting_list(proxy, options, get_limit(options), state, now, timeout, false).then(
            [this, now, &options] (::shared_ptr<cql_transport::messages::result_message::rows> rows) {
        auto rs = cql3::untyped_result_set(rows);
@@ -1279,7 +1291,7 @@ indexed_table_select_statement::find_index_clustering_rows(service::storage_prox
 {
    using value_type = std::tuple<std::vector<indexed_table_select_statement::primary_key>, lw_shared_ptr<const service::pager::paging_state>>;
    auto now = gc_clock::now();
-    auto timeout = db::timeout_clock::now() + options.get_timeout_config().*get_timeout_config_selector();
+    auto timeout = db::timeout_clock::now() + state.get_client_state().get_timeout_config().*get_timeout_config_selector();
    return read_posting_list(proxy, options, get_limit(options), state, now, timeout, true).then(
            [this, now, &options] (::shared_ptr<cql_transport::messages::result_message::rows> rows) {

--- a/cql3/statements/select_statement.hh
+++ b/cql3/statements/select_statement.hh
@@ -300,6 +300,8 @@ private:

    query::partition_slice get_partition_slice_for_local_index_posting_list(const query_options& options) const;
    query::partition_slice get_partition_slice_for_global_index_posting_list(const query_options& options) const;
+
+    bytes compute_idx_token(const partition_key& key) const;
 };

 }
--- a/cql3/util.cc
+++ b/cql3/util.cc
@@ -119,5 +119,19 @@ void do_with_parser_impl(const sstring_view& cql, noncopyable_function<void (cql

 #endif

+void validate_timestamp(const query_options& options, const std::unique_ptr<attributes>& attrs) {
+    if (attrs->is_timestamp_set()) {
+        static constexpr int64_t MAX_DIFFERENCE = std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::days(3)).count();
+        auto now = std::chrono::duration_cast<std::chrono::microseconds>(db_clock::now().time_since_epoch()).count();
+
+        auto timestamp = attrs->get_timestamp(now, options);
+
+        if (timestamp - now > MAX_DIFFERENCE) {
+            throw exceptions::invalid_request_exception("Cannot provide a timestamp more than 3 days into the future. If this was not intended, "
+            "make sure the timestamp is in microseconds");
+        }
+    }
+}
+

 }
--- a/cql3/util.hh
+++ b/cql3/util.hh
@@ -89,6 +89,10 @@ std::unique_ptr<cql3::statements::raw::select_statement> build_select_statement(
 /// character itself is quoted by doubling it.
 sstring maybe_quote(const sstring& s);

+// Check whether timestamp is not too far in the future as this probably
+// indicates its incorrectness (for example using other units than microseconds).
+void validate_timestamp(const query_options& options, const std::unique_ptr<attributes>& attrs);
+
 } // namespace util

 } // namespace cql3
--- a/database.cc
+++ b/database.cc
@@ -57,6 +57,7 @@
 #include <boost/range/algorithm/find_if.hpp>
 #include <boost/range/algorithm/sort.hpp>
 #include <boost/range/adaptor/map.hpp>
+#include <boost/container/static_vector.hpp>
 #include "frozen_mutation.hh"
 #include <seastar/core/do_with.hh>
 #include "service/migration_manager.hh"
@@ -82,6 +83,7 @@

 #include "checked-file-impl.hh"
 #include "utils/disk-error-handler.hh"
+#include "utils/human_readable.hh"

 #include "db/timeout_clock.hh"
 #include "db/large_data_handler.hh"
@@ -90,6 +92,7 @@

 #include "user_types_metadata.hh"
 #include <seastar/core/shared_ptr_incomplete.hh>
+#include <seastar/util/memory_diagnostics.hh>

 #include "schema_builder.hh"

@@ -165,14 +168,181 @@ bool string_pair_eq::operator()(spair lhs, spair rhs) const {

 utils::UUID database::empty_version = utils::UUID_gen::get_name_UUID(bytes{});

-database::database(const db::config& cfg, database_config dbcfg, service::migration_notifier& mn, gms::feature_service& feat, const locator::token_metadata& tm, abort_source& as, sharded<semaphore>& sst_dir_sem)
+namespace {
+
+class memory_diagnostics_line_writer {
+    std::array<char, 4096> _line_buf;
+    memory::memory_diagnostics_writer _wr;
+
+public:
+    memory_diagnostics_line_writer(memory::memory_diagnostics_writer wr) : _wr(std::move(wr)) { }
+    void operator() (const char* fmt) {
+        _wr(fmt);
+    }
+    void operator() (const char* fmt, const auto& param1, const auto&... params) {
+        const auto begin = _line_buf.begin();
+        auto it = fmt::format_to(begin, fmt, param1, params...);
+        _wr(std::string_view(begin, it - begin));
+    }
+};
+
+const boost::container::static_vector<std::pair<size_t, boost::container::static_vector<table*, 16>>, 10>
+phased_barrier_top_10_counts(const std::unordered_map<utils::UUID, lw_shared_ptr<column_family>>& tables, std::function<size_t(table&)> op_count_getter) {
+    using table_list = boost::container::static_vector<table*, 16>;
+    using count_and_tables = std::pair<size_t, table_list>;
+    const auto less = [] (const count_and_tables& a, const count_and_tables& b) {
+        return a.first < b.first;
+    };
+
+    boost::container::static_vector<count_and_tables, 10> res;
+    count_and_tables* min_element = nullptr;
+
+    for (const auto& [tid, table] : tables) {
+        const auto count = op_count_getter(*table);
+        if (!count) {
+            continue;
+        }
+        if (res.size() < res.capacity()) {
+            auto& elem = res.emplace_back(count, table_list({table.get()}));
+            if (!min_element || min_element->first > count) {
+                min_element = &elem;
+            }
+            continue;
+        }
+        if (min_element->first > count) {
+            continue;
+        }
+
+        auto it = boost::find_if(res, [count] (const count_and_tables& x) {
+            return x.first == count;
+        });
+        if (it != res.end()) {
+            it->second.push_back(table.get());
+            continue;
+        }
+
+        // If we are here, min_element->first < count
+        *min_element = {count, table_list({table.get()})};
+        min_element = &*boost::min_element(res, less);
+    }
+
+    boost::sort(res, less);
+
+    return res;
+}
+
+} // anonymous namespace
+
+void database::setup_scylla_memory_diagnostics_producer() {
+    memory::set_additional_diagnostics_producer([this] (memory::memory_diagnostics_writer wr) {
+        auto writeln = memory_diagnostics_line_writer(std::move(wr));
+
+        const auto lsa_occupancy_stats = logalloc::lsa_global_occupancy_stats();
+        writeln("LSA\n");
+        writeln("  allocated: {}\n", utils::to_hr_size(lsa_occupancy_stats.total_space()));
+        writeln("  used:      {}\n", utils::to_hr_size(lsa_occupancy_stats.used_space()));
+        writeln("  free:      {}\n\n", utils::to_hr_size(lsa_occupancy_stats.free_space()));
+
+        const auto row_cache_occupancy_stats = _row_cache_tracker.region().occupancy();
+        writeln("Cache:\n");
+        writeln("  total: {}\n", utils::to_hr_size(row_cache_occupancy_stats.total_space()));
+        writeln("  used:  {}\n", utils::to_hr_size(row_cache_occupancy_stats.used_space()));
+        writeln("  free:  {}\n\n", utils::to_hr_size(row_cache_occupancy_stats.free_space()));
+
+        writeln("Memtables:\n");
+        writeln(" total: {}\n", utils::to_hr_size(lsa_occupancy_stats.total_space() - row_cache_occupancy_stats.total_space()));
+
+        writeln(" Regular:\n");
+        writeln("  real dirty: {}\n", utils::to_hr_size(_dirty_memory_manager.real_dirty_memory()));
+        writeln("  virt dirty: {}\n", utils::to_hr_size(_dirty_memory_manager.virtual_dirty_memory()));
+        writeln(" System:\n");
+        writeln("  real dirty: {}\n", utils::to_hr_size(_system_dirty_memory_manager.real_dirty_memory()));
+        writeln("  virt dirty: {}\n\n", utils::to_hr_size(_system_dirty_memory_manager.virtual_dirty_memory()));
+
+        writeln("Replica:\n");
+
+        writeln("  Read Concurrency Semaphores:\n");
+        const std::pair<const char*, reader_concurrency_semaphore&> semaphores[] = {
+                {"user", _read_concurrency_sem},
+                {"streaming", _streaming_concurrency_sem},
+                {"system", _system_read_concurrency_sem},
+                {"compaction", _compaction_concurrency_sem},
+        };
+        for (const auto& [name, sem] : semaphores) {
+            const auto initial_res = sem.initial_resources();
+            const auto available_res = sem.available_resources();
+            if (sem.is_unlimited()) {
+                writeln("    {}: {}/∞, {}/∞\n",
+                        name,
+                        initial_res.count - available_res.count,
+                        utils::to_hr_size(initial_res.memory - available_res.memory),
+                        sem.waiters());
+            } else {
+                writeln("    {}: {}/{}, {}/{}, queued: {}\n",
+                        name,
+                        initial_res.count - available_res.count,
+                        initial_res.count,
+                        utils::to_hr_size(initial_res.memory - available_res.memory),
+                        utils::to_hr_size(initial_res.memory),
+                        sem.waiters());
+            }
+        }
+
+        writeln("  Execution Stages:\n");
+        const std::pair<const char*, inheriting_execution_stage::stats> execution_stage_summaries[] = {
+                {"data query stage", _data_query_stage.get_stats()},
+                {"mutation query stage", _mutation_query_stage.get_stats()},
+                {"apply stage", _apply_stage.get_stats()},
+        };
+        for (const auto& [name, exec_stage_summary] : execution_stage_summaries) {
+            writeln("    {}:\n", name);
+            size_t total = 0;
+            for (const auto& [sg, stats ] : exec_stage_summary) {
+                const auto count = stats.function_calls_enqueued - stats.function_calls_executed;
+                if (!count) {
+                    continue;
+                }
+                writeln("      {}\t{}\n", sg.name(), count);
+                total += count;
+            }
+            writeln("         Total: {}\n", total);
+        }
+
+        writeln("  Tables - Ongoing Operations:\n");
+        const std::pair<const char*, std::function<size_t(table&)>> phased_barriers[] = {
+                {"Pending writes", std::mem_fn(&table::writes_in_progress)},
+                {"Pending reads", std::mem_fn(&table::reads_in_progress)},
+                {"Pending streams", std::mem_fn(&table::streams_in_progress)},
+        };
+        for (const auto& [name, op_count_getter] : phased_barriers) {
+            writeln("    {} (top 10):\n", name);
+            auto total = 0;
+            for (const auto& [count, table_list] : phased_barrier_top_10_counts(_column_families, op_count_getter)) {
+                total += count;
+                writeln("      {}", count);
+                if (table_list.empty()) {
+                    writeln("\n");
+                    continue;
+                }
+                auto it = table_list.begin();
+                for (; it != table_list.end() - 1; ++it) {
+                    writeln(" {}.{},", (*it)->schema()->ks_name(), (*it)->schema()->cf_name());
+                }
+                writeln(" {}.{}\n", (*it)->schema()->ks_name(), (*it)->schema()->cf_name());
+            }
+            writeln("      {} Total (all)\n", total);
+        }
+        writeln("\n");
+    });
+}
+
+database::database(const db::config& cfg, database_config dbcfg, service::migration_notifier& mn, gms::feature_service& feat, const locator::shared_token_metadata& stm, abort_source& as, sharded<semaphore>& sst_dir_sem)
    : _stats(make_lw_shared<db_stats>())
    , _cl_stats(std::make_unique<cell_locker_stats>())
    , _cfg(cfg)
    // Allow system tables a pool of 10 MB memory to write, but never block on other regions.
    , _system_dirty_memory_manager(*this, 10 << 20, cfg.virtual_dirty_soft_limit(), default_scheduling_group())
-    , _dirty_memory_manager(*this, dbcfg.available_memory * 0.45, cfg.virtual_dirty_soft_limit(), dbcfg.statement_scheduling_group)
-    , _streaming_dirty_memory_manager(*this, dbcfg.available_memory * 0.10, cfg.virtual_dirty_soft_limit(), dbcfg.streaming_scheduling_group)
+    , _dirty_memory_manager(*this, dbcfg.available_memory * 0.50, cfg.virtual_dirty_soft_limit(), dbcfg.statement_scheduling_group)
    , _dbcfg(dbcfg)
    , _memtable_controller(make_flush_controller(_cfg, dbcfg.memtable_scheduling_group, service::get_local_memtable_flush_priority(), [this, limit = float(_dirty_memory_manager.throttle_threshold())] {
        auto backlog = (_dirty_memory_manager.virtual_dirty_memory()) / limit;
@@ -219,9 +389,11 @@ database::database(const db::config& cfg, database_config dbcfg, service::migrat
    , _data_listeners(std::make_unique<db::data_listeners>(*this))
    , _mnotifier(mn)
    , _feat(feat)
-    , _token_metadata(tm)
+    , _shared_token_metadata(stm)
    , _sst_dir_semaphore(sst_dir_sem)
 {
+    assert(dbcfg.available_memory != 0); // Detect misconfigured unit tests, see #7544
+
    local_schema_registry().init(*this); // TODO: we're never unbound.
    setup_metrics();

@@ -233,6 +405,8 @@ database::database(const db::config& cfg, database_config dbcfg, service::migrat
        dblog.debug("Enabling infinite bound range deletions");
        _supports_infinite_bound_range_deletions = true;
    });
+
+    setup_scylla_memory_diagnostics_producer();
 }

 const db::extensions& database::extensions() const {
@@ -309,7 +483,6 @@ void
 database::setup_metrics() {
    _dirty_memory_manager.setup_collectd("regular");
    _system_dirty_memory_manager.setup_collectd("system");
-    _streaming_dirty_memory_manager.setup_collectd("streaming");

    namespace sm = seastar::metrics;

@@ -318,12 +491,12 @@ database::setup_metrics() {
    auto system_label_instance = class_label("system");

    _metrics.add_group("memory", {
-        sm::make_gauge("dirty_bytes", [this] { return _dirty_memory_manager.real_dirty_memory() + _system_dirty_memory_manager.real_dirty_memory() + _streaming_dirty_memory_manager.real_dirty_memory(); },
+        sm::make_gauge("dirty_bytes", [this] { return _dirty_memory_manager.real_dirty_memory() + _system_dirty_memory_manager.real_dirty_memory(); },
                       sm::description("Holds the current size of all (\"regular\", \"system\" and \"streaming\") non-free memory in bytes: used memory + released memory that hasn't been returned to a free memory pool yet. "
                                       "Total memory size minus this value represents the amount of available memory. "
                                       "If this value minus virtual_dirty_bytes is too high then this means that the dirty memory eviction lags behind.")),

-        sm::make_gauge("virtual_dirty_bytes", [this] { return _dirty_memory_manager.virtual_dirty_memory() + _system_dirty_memory_manager.virtual_dirty_memory() + _streaming_dirty_memory_manager.virtual_dirty_memory(); },
+        sm::make_gauge("virtual_dirty_bytes", [this] { return _dirty_memory_manager.virtual_dirty_memory() + _system_dirty_memory_manager.virtual_dirty_memory(); },
                       sm::description("Holds the size of all (\"regular\", \"system\" and \"streaming\") used memory in bytes. Compare it to \"dirty_bytes\" to see how many memory is wasted (neither used nor available).")),
    });

@@ -456,6 +629,11 @@ database::setup_metrics() {
                                       " to be able to admit new ones, if there is a shortage of permits."),
                       {user_label_instance}),

+        sm::make_derive("reads_shed_due_to_overload", _read_concurrency_sem.get_stats().total_reads_shed_due_to_overload,
+                       sm::description("The number of reads shed because the admission queue reached its max capacity."
+                                       " When the queue is full, excessive reads are shed to avoid overload."),
+                       {user_label_instance}),
+
        sm::make_gauge("active_reads", [this] { return max_count_streaming_concurrent_reads - _streaming_concurrency_sem.available_resources().count; },
                       sm::description("Holds the number of currently active read operations issued on behalf of streaming "),
                       {streaming_label_instance}),
@@ -481,6 +659,11 @@ database::setup_metrics() {
                                       " to be able to admit new ones, if there is a shortage of permits."),
                       {streaming_label_instance}),

+        sm::make_derive("reads_shed_due_to_overload", _streaming_concurrency_sem.get_stats().total_reads_shed_due_to_overload,
+                       sm::description("The number of reads shed because the admission queue reached its max capacity."
+                                       " When the queue is full, excessive reads are shed to avoid overload."),
+                       {streaming_label_instance}),
+
        sm::make_gauge("active_reads", [this] { return max_count_system_concurrent_reads - _system_read_concurrency_sem.available_resources().count; },
                       sm::description("Holds the number of currently active read operations from \"system\" keyspace tables. "),
                       {system_label_instance}),
@@ -505,6 +688,11 @@ database::setup_metrics() {
                                       " to be able to admit new ones, if there is a shortage of permits."),
                       {system_label_instance}),

+        sm::make_derive("reads_shed_due_to_overload", _system_read_concurrency_sem.get_stats().total_reads_shed_due_to_overload,
+                       sm::description("The number of reads shed because the admission queue reached its max capacity."
+                                       " When the queue is full, excessive reads are shed to avoid overload."),
+                       {system_label_instance}),
+
        sm::make_gauge("total_result_bytes", [this] { return get_result_memory_limiter().total_used_memory(); },
                       sm::description("Holds the current amount of memory used for results.")),

@@ -572,6 +760,9 @@ void database::set_format_by_config() {
 }

 database::~database() {
+    _read_concurrency_sem.clear_inactive_reads();
+    _streaming_concurrency_sem.clear_inactive_reads();
+    _system_read_concurrency_sem.clear_inactive_reads();
 }

 void database::update_version(const utils::UUID& version) {
@@ -659,22 +850,11 @@ future<> database::parse_system_tables(distributed<service::storage_proxy>& prox
            });
    }).then([&proxy, &mm, this] {
        return do_parse_schema_tables(proxy, db::schema_tables::VIEWS, [this, &proxy, &mm] (schema_result_value_type &v) {
-            return create_views_from_schema_partition(proxy, v.second).then([this, &mm, &proxy] (std::vector<view_ptr> views) {
-                return parallel_for_each(views.begin(), views.end(), [this, &mm, &proxy] (auto&& v) {
-                    // TODO: Remove once computed columns are guaranteed to be featured in the whole cluster.
-                    // we fix here the schema in place in oreder to avoid races (write commands comming from other coordinators).
-                    view_ptr fixed_v = maybe_fix_legacy_secondary_index_mv_schema(*this, v, nullptr, preserve_version::yes);
-                    view_ptr v_to_add = fixed_v ? fixed_v : v;
-                    future<> f = this->add_column_family_and_make_directory(v_to_add);
-                    if (bool(fixed_v)) {
-                        v_to_add = fixed_v;
-                        auto&& keyspace = find_keyspace(v->ks_name()).metadata();
-                        auto mutations = db::schema_tables::make_update_view_mutations(keyspace, view_ptr(v), fixed_v, api::new_timestamp(), true);
-                        f = f.then([this, &proxy, mutations = std::move(mutations)] {
-                            return db::schema_tables::merge_schema(proxy, _feat, std::move(mutations));
-                        });
-                    }
-                    return f;
+            return create_views_from_schema_partition(proxy, v.second).then([this, &mm] (std::vector<view_ptr> views) {
+                return parallel_for_each(views.begin(), views.end(), [this, &mm] (auto&& v) {
+                    return this->add_column_family_and_make_directory(v).then([this, &mm, v] {
+                        return maybe_update_legacy_secondary_index_mv_schema(mm.local(), *this, v);
+                    });
                });
            });
        });
@@ -725,7 +905,17 @@ future<> database::update_keyspace(const sstring& name) {
        auto tmp_ksm = db::schema_tables::create_keyspace_from_schema_partition(v);
        auto new_ksm = ::make_lw_shared<keyspace_metadata>(tmp_ksm->name(), tmp_ksm->strategy_name(), tmp_ksm->strategy_options(), tmp_ksm->durable_writes(),
                        boost::copy_range<std::vector<schema_ptr>>(ks.metadata()->cf_meta_data() | boost::adaptors::map_values), std::move(ks.metadata()->user_types()));
-        ks.update_from(get_token_metadata(), std::move(new_ksm));
+
+        bool old_durable_writes = ks.metadata()->durable_writes();
+        bool new_durable_writes = new_ksm->durable_writes();
+        if (old_durable_writes != new_durable_writes) {
+            for (auto& [cf_name, cf_schema] : new_ksm->cf_meta_data()) {
+                auto& cf = find_column_family(cf_schema);
+                cf.set_durable_writes(new_durable_writes);
+            }
+        }
+
+        ks.update_from(get_shared_token_metadata(), std::move(new_ksm));
        return get_notifier().update_keyspace(ks.metadata());
    });
 }
@@ -744,6 +934,7 @@ void database::add_column_family(keyspace& ks, schema_ptr schema, column_family:
    } else {
       cf = make_lw_shared<column_family>(schema, std::move(cfg), column_family::no_commitlog(), *_compaction_manager, *_cl_stats, _row_cache_tracker);
    }
+    cf->set_durable_writes(ks.metadata()->durable_writes());

    auto uuid = schema->id();
    if (_column_families.contains(uuid)) {
@@ -809,7 +1000,7 @@ future<> database::drop_column_family(const sstring& ks_name, const sstring& cf_
    remove(*cf);
    cf->clear_views();
    auto& ks = find_keyspace(ks_name);
-    return cf->await_pending_ops().then([this, &ks, cf, tsf = std::move(tsf), snapshot] {
+    return when_all_succeed(cf->await_pending_writes(), cf->await_pending_reads()).then_unpack([this, &ks, cf, tsf = std::move(tsf), snapshot] {
        return truncate(ks, *cf, std::move(tsf), snapshot).finally([this, cf] {
            return cf->stop();
        });
@@ -904,12 +1095,12 @@ bool database::column_family_exists(const utils::UUID& uuid) const {
 }

 void
-keyspace::create_replication_strategy(const locator::token_metadata& tm, const std::map<sstring, sstring>& options) {
+keyspace::create_replication_strategy(const locator::shared_token_metadata& stm, const std::map<sstring, sstring>& options) {
    using namespace locator;

    _replication_strategy =
            abstract_replication_strategy::create_replication_strategy(
-                _metadata->name(), _metadata->strategy_name(), tm, options);
+                _metadata->name(), _metadata->strategy_name(), stm, options);
 }

 locator::abstract_replication_strategy&
@@ -928,9 +1119,9 @@ keyspace::set_replication_strategy(std::unique_ptr<locator::abstract_replication
    _replication_strategy = std::move(replication_strategy);
 }

-void keyspace::update_from(const locator::token_metadata& tm, ::lw_shared_ptr<keyspace_metadata> ksm) {
+void keyspace::update_from(const locator::shared_token_metadata& stm, ::lw_shared_ptr<keyspace_metadata> ksm) {
    _metadata = std::move(ksm);
-   create_replication_strategy(tm, _metadata->strategy_options());
+   create_replication_strategy(stm, _metadata->strategy_options());
 }

 future<> keyspace::ensure_populated() const {
@@ -964,7 +1155,6 @@ keyspace::make_column_family_config(const schema& s, const database& db) const {
    cfg.enable_dangerous_direct_import_of_cassandra_counters = _config.enable_dangerous_direct_import_of_cassandra_counters;
    cfg.compaction_enforce_min_threshold = _config.compaction_enforce_min_threshold;
    cfg.dirty_memory_manager = _config.dirty_memory_manager;
-    cfg.streaming_dirty_memory_manager = _config.streaming_dirty_memory_manager;
    cfg.streaming_read_concurrency_semaphore = _config.streaming_read_concurrency_semaphore;
    cfg.compaction_concurrency_semaphore = _config.compaction_concurrency_semaphore;
    cfg.cf_stats = _config.cf_stats;
@@ -1044,7 +1234,7 @@ const column_family& database::find_column_family(const schema_ptr& schema) cons
 using strategy_class_registry = class_registry<
    locator::abstract_replication_strategy,
    const sstring&,
-    const locator::token_metadata&,
+    const locator::shared_token_metadata&,
    locator::snitch_ptr&,
    const std::map<sstring, sstring>&>;

@@ -1077,20 +1267,20 @@ keyspace_metadata::keyspace_metadata(std::string_view name,
    }
 }

-void keyspace_metadata::validate(const locator::token_metadata& tm) const {
+void keyspace_metadata::validate(const locator::shared_token_metadata& stm) const {
    using namespace locator;
-    abstract_replication_strategy::validate_replication_strategy(name(), strategy_name(), tm, strategy_options());
+    abstract_replication_strategy::validate_replication_strategy(name(), strategy_name(), stm, strategy_options());
 }

 void database::validate_keyspace_update(keyspace_metadata& ksm) {
-    ksm.validate(get_token_metadata());
+    ksm.validate(get_shared_token_metadata());
    if (!has_keyspace(ksm.name())) {
        throw exceptions::configuration_exception(format("Cannot update non existing keyspace '{}'.", ksm.name()));
    }
 }

 void database::validate_new_keyspace(keyspace_metadata& ksm) {
-    ksm.validate(get_token_metadata());
+    ksm.validate(get_shared_token_metadata());
    if (has_keyspace(ksm.name())) {
        throw exceptions::already_exists_exception{ksm.name()};
    }
@@ -1133,7 +1323,7 @@ std::vector<view_ptr> database::get_views() const {

 void database::create_in_memory_keyspace(const lw_shared_ptr<keyspace_metadata>& ksm) {
    keyspace ks(ksm, std::move(make_keyspace_config(*ksm)));
-    ks.create_replication_strategy(get_token_metadata(), ksm->strategy_options());
+    ks.create_replication_strategy(get_shared_token_metadata(), ksm->strategy_options());
    _keyspaces.emplace(ksm->name(), std::move(ks));
 }

@@ -1566,7 +1756,7 @@ static future<> maybe_handle_reorder(std::exception_ptr exp) {
 }

 future<> database::apply_with_commitlog(column_family& cf, const mutation& m, db::timeout_clock::time_point timeout) {
-    if (cf.commitlog() != nullptr) {
+    if (cf.commitlog() != nullptr && cf.durable_writes()) {
        return do_with(freeze(m), [this, &m, &cf, timeout] (frozen_mutation& fm) {
            commitlog_entry_writer cew(m.schema(), fm, db::commitlog::force_sync::no);
            return cf.commitlog()->add_entry(m.schema()->id(), cew, timeout);
@@ -1580,7 +1770,7 @@ future<> database::apply_with_commitlog(column_family& cf, const mutation& m, db
 future<> database::apply_with_commitlog(schema_ptr s, column_family& cf, utils::UUID uuid, const frozen_mutation& m, db::timeout_clock::time_point timeout,
        db::commitlog::force_sync sync) {
    auto cl = cf.commitlog();
-    if (cl != nullptr) {
+    if (cl != nullptr && cf.durable_writes()) {
        commitlog_entry_writer cew(s, m, sync);
        return cf.commitlog()->add_entry(uuid, cew, timeout).then([&m, this, s, timeout, cl](db::rp_handle h) {
            return this->apply_in_memory(m, s, std::move(h), timeout).handle_exception(maybe_handle_reorder);
@@ -1671,7 +1861,7 @@ database::make_keyspace_config(const keyspace_metadata& ksm) {
        }
        cfg.enable_disk_writes = !_cfg.enable_in_memory_data_store();
        cfg.enable_disk_reads = true; // we allways read from disk
-        cfg.enable_commitlog = ksm.durable_writes() && _cfg.enable_commitlog() && !_cfg.enable_in_memory_data_store();
+        cfg.enable_commitlog = _cfg.enable_commitlog() && !_cfg.enable_in_memory_data_store();
        cfg.enable_cache = _cfg.enable_cache();

    } else {
@@ -1684,7 +1874,6 @@ database::make_keyspace_config(const keyspace_metadata& ksm) {
    cfg.enable_dangerous_direct_import_of_cassandra_counters = _cfg.enable_dangerous_direct_import_of_cassandra_counters();
    cfg.compaction_enforce_min_threshold = _cfg.compaction_enforce_min_threshold;
    cfg.dirty_memory_manager = &_dirty_memory_manager;
-    cfg.streaming_dirty_memory_manager = &_streaming_dirty_memory_manager;
    cfg.streaming_read_concurrency_semaphore = &_streaming_concurrency_sem;
    cfg.compaction_concurrency_semaphore = &_compaction_concurrency_sem;
    cfg.cf_stats = &_cf_stats;
@@ -1751,11 +1940,7 @@ sstring database::get_available_index_name(const sstring &ks_name, const sstring
    auto base_name = index_metadata::get_default_index_name(cf_name, index_name_root);
    sstring accepted_name = base_name;
    int i = 0;
-    auto name_accepted = [&] {
-        auto index_table_name = secondary_index::index_table_name(accepted_name);
-        return !has_schema(ks_name, index_table_name) && !existing_names.contains(accepted_name);
-    };
-    while (!name_accepted()) {
+    while (existing_names.contains(accepted_name)) {
        accepted_name = base_name + "_" + std::to_string(++i);
    }
    return accepted_name;
@@ -1820,13 +2005,6 @@ future<>
 database::stop() {
    assert(!_large_data_handler->running());

-    // Inactive reads might hold on to sstables, blocking the
-    // `sstables_manager::close()` calls below. No one will come back for these
-    // reads at this point so clear them before proceeding with the shutdown.
-    _read_concurrency_sem.clear_inactive_reads();
-    _streaming_concurrency_sem.clear_inactive_reads();
-    _system_read_concurrency_sem.clear_inactive_reads();
-
    // try to ensure that CL has done disk flushing
    future<> maybe_shutdown_commitlog = _commitlog != nullptr ? _commitlog->shutdown() : make_ready_future<>();
    return maybe_shutdown_commitlog.then([this] {
@@ -1840,8 +2018,6 @@ database::stop() {
        return _system_dirty_memory_manager.shutdown();
    }).then([this] {
        return _dirty_memory_manager.shutdown();
-    }).then([this] {
-        return _streaming_dirty_memory_manager.shutdown();
    }).then([this] {
        return _memtable_controller.shutdown();
    }).then([this] {
@@ -1857,6 +2033,11 @@ future<> database::flush_all_memtables() {
    });
 }

+future<> database::flush(const sstring& ksname, const sstring& cfname) {
+    auto& cf = find_column_family(ksname, cfname);
+    return cf.flush();
+}
+
 future<> database::truncate(sstring ksname, sstring cfname, timestamp_func tsf) {
    auto& ks = find_keyspace(ksname);
    auto& cf = find_column_family(ksname, cfname);
@@ -1878,28 +2059,26 @@ future<> database::truncate(const keyspace& ks, column_family& cf, timestamp_fun

        return cf.run_with_compaction_disabled([this, &cf, should_flush, auto_snapshot, tsf = std::move(tsf), low_mark]() mutable {
            future<> f = make_ready_future<>();
-            bool did_flush = false;
-            if (should_flush && cf.can_flush()) {
+            if (should_flush) {
                // TODO:
                // this is not really a guarantee at all that we've actually
                // gotten all things to disk. Again, need queue-ish or something.
                f = cf.flush();
-                did_flush = true;
            } else {
                f = cf.clear();
            }
-            return f.then([this, &cf, auto_snapshot, tsf = std::move(tsf), low_mark, should_flush, did_flush] {
+            return f.then([this, &cf, auto_snapshot, tsf = std::move(tsf), low_mark, should_flush] {
                dblog.debug("Discarding sstable data for truncated CF + indexes");
                // TODO: notify truncation

-                return tsf().then([this, &cf, auto_snapshot, low_mark, should_flush, did_flush](db_clock::time_point truncated_at) {
+                return tsf().then([this, &cf, auto_snapshot, low_mark, should_flush](db_clock::time_point truncated_at) {
                    future<> f = make_ready_future<>();
                    if (auto_snapshot) {
                        auto name = format("{:d}-{}", truncated_at.time_since_epoch().count(), cf.schema()->cf_name());
                        f = cf.snapshot(*this, name);
                    }
-                    return f.then([this, &cf, truncated_at, low_mark, should_flush, did_flush] {
-                        return cf.discard_sstables(truncated_at).then([this, &cf, truncated_at, low_mark, should_flush, did_flush](db::replay_position rp) {
+                    return f.then([this, &cf, truncated_at, low_mark, should_flush] {
+                        return cf.discard_sstables(truncated_at).then([this, &cf, truncated_at, low_mark, should_flush](db::replay_position rp) {
                            // TODO: indexes.
                            // Note: since discard_sstables was changed to only count tables owned by this shard,
                            // we can get zero rp back. Changed assert, and ensure we save at least low_mark.
@@ -1907,7 +2086,7 @@ future<> database::truncate(const keyspace& ks, column_family& cf, timestamp_fun
                            // We nowadays do not flush tables with sstables but autosnapshot=false. This means
                            // the low_mark assertion does not hold, because we maybe/probably never got around to 
                            // creating the sstables that would create them.
-                            assert(!did_flush || low_mark <= rp || rp == db::replay_position());
+                            assert(!should_flush || low_mark <= rp || rp == db::replay_position());
                            rp = std::max(low_mark, rp);
                            return truncate_views(cf, truncated_at, should_flush).then([&cf, truncated_at, rp] {
                                // save_truncation_record() may actually fail after we cached the truncation time
--- a/database.hh
+++ b/database.hh
@@ -224,10 +224,6 @@ public:
        return bool(_seal_immediate_fn);
    }

-    bool can_flush() const {
-        return may_flush() && !empty();
-    }
-
    bool empty() const {
        for (auto& m : _memtables) {
           if (!m->empty()) {
@@ -382,7 +378,6 @@ public:
        utils::updateable_value<bool> compaction_enforce_min_threshold{false};
        bool enable_dangerous_direct_import_of_cassandra_counters = false;
        ::dirty_memory_manager* dirty_memory_manager = &default_dirty_memory_manager;
-        ::dirty_memory_manager* streaming_dirty_memory_manager = &default_dirty_memory_manager;
        reader_concurrency_semaphore* streaming_read_concurrency_semaphore;
        reader_concurrency_semaphore* compaction_concurrency_semaphore;
        ::cf_stats* cf_stats = nullptr;
@@ -422,20 +417,6 @@ private:

    lw_shared_ptr<memtable_list> _memtables;

-    utils::phased_barrier _streaming_flush_phaser;
-
-    // If mutations are fragmented during streaming the sstables cannot be made
-    // visible immediately after memtable flush, because that could cause
-    // readers to see only a part of a partition thus violating isolation
-    // guarantees.
-    // Mutations that are sent in fragments are kept separately in per-streaming
-    // plan memtables and the resulting sstables are not made visible until
-    // the streaming is complete.
-    struct monitored_sstable {
-        std::unique_ptr<database_sstable_write_monitor> monitor;
-        sstables::shared_sstable sstable;
-    };
-
    lw_shared_ptr<memtable_list> make_memory_only_memtable_list();
    lw_shared_ptr<memtable_list> make_memtable_list();

@@ -468,12 +449,12 @@ private:

    // Provided by the database that owns this commitlog
    db::commitlog* _commitlog;
+    bool _durable_writes;
    compaction_manager& _compaction_manager;
    secondary_index::secondary_index_manager _index_manager;
    int _compaction_disabled = 0;
    bool _compaction_disabled_by_user = false;
    utils::phased_barrier _flush_barrier;
-    seastar::gate _streaming_flush_gate;
    std::vector<view_ptr> _views;

    std::unique_ptr<cell_locker> _counter_cell_locks; // Memory-intensive; allocate only when needed.
@@ -491,7 +472,7 @@ private:

    // Operations like truncate, flush, query, etc, may depend on a column family being alive to
    // complete.  Some of them have their own gate already (like flush), used in specialized wait
-    // logic (like the streaming_flush_gate). That is particularly useful if there is a particular
+    // logic. That is particularly useful if there is a particular
    // order in which we need to close those gates. For all the others operations that don't have
    // such needs, we have this generic _async_gate, which all potentially asynchronous operations
    // have to get.  It will be closed by stop().
@@ -509,8 +490,6 @@ private:
    utils::phased_barrier _pending_reads_phaser;
    // Corresponding phaser for in-progress streams
    utils::phased_barrier _pending_streams_phaser;
-    // Corresponding phaser for in-progress flushes
-    utils::phased_barrier _pending_flushes_phaser;

    // This field cashes the last truncation time for the table.
    // The master resides in system.truncated table
@@ -751,7 +730,6 @@ public:
    // The mutation is always upgraded to current schema.
    void apply(const frozen_mutation& m, const schema_ptr& m_schema, db::rp_handle&& = {});
    void apply(const mutation& m, db::rp_handle&& = {});
-    void apply_streaming_mutation(schema_ptr, utils::UUID plan_id, const frozen_mutation&, bool fragmented);

    // Returns at most "cmd.limit" rows
    future<lw_shared_ptr<query::result>> query(schema_ptr,
@@ -767,27 +745,9 @@ public:
    void start();
    future<> stop();
    future<> flush();
-    future<> flush_streaming_mutations(utils::UUID plan_id, dht::partition_range_vector ranges = dht::partition_range_vector{});
    future<> clear(); // discards memtable(s) without flushing them to disk.
    future<db::replay_position> discard_sstables(db_clock::time_point);

-    // Make sure the generation numbers are sequential, starting from "start".
-    // Generations before "start" are left untouched.
-    //
-    // Return the highest generation number seen so far
-    //
-    // Word of warning: although this function will reshuffle anything over "start", it is
-    // very dangerous to do that with live SSTables. This is meant to be used with SSTables
-    // that are not yet managed by the system.
-    //
-    // Parameter all_generations stores the generation of all SSTables in the system, so it
-    // will be easy to determine which SSTable is new.
-    // An example usage would query all shards asking what is the highest SSTable number known
-    // to them, and then pass that + 1 as "start".
-    future<std::vector<sstables::entry_descriptor>> reshuffle_sstables(std::set<int64_t> all_generations, int64_t start);
-
-    bool can_flush() const;
-
    // FIXME: this is just an example, should be changed to something more
    // general. compact_all_sstables() starts a compaction of all sstables.
    // It doesn't flush the current memtable first. It's just a ad-hoc method,
@@ -900,6 +860,14 @@ public:
        return _global_cache_hit_rate;
    }

+    bool durable_writes() const {
+        return _durable_writes;
+    }
+
+    void set_durable_writes(bool dw) {
+        _durable_writes = dw;
+    }
+
    void set_global_cache_hit_rate(cache_temperature rate) {
        _global_cache_hit_rate = rate;
    }
@@ -924,6 +892,10 @@ public:
        return _pending_writes_phaser.advance_and_await();
    }

+    size_t writes_in_progress() const {
+        return _pending_writes_phaser.operations_in_progress();
+    }
+
    utils::phased_barrier::operation read_in_progress() {
        return _pending_reads_phaser.start();
    }
@@ -932,6 +904,10 @@ public:
        return _pending_reads_phaser.advance_and_await();
    }

+    size_t reads_in_progress() const {
+        return _pending_reads_phaser.operations_in_progress();
+    }
+
    utils::phased_barrier::operation stream_in_progress() {
        return _pending_streams_phaser.start();
    }
@@ -940,12 +916,8 @@ public:
        return _pending_streams_phaser.advance_and_await();
    }

-    future<> await_pending_flushes() {
-        return _pending_flushes_phaser.advance_and_await();
-    }
-
-    future<> await_pending_ops() {
-        return when_all(await_pending_reads(), await_pending_writes(), await_pending_streams(), await_pending_flushes()).discard_result();
+    size_t streams_in_progress() const {
+        return _pending_streams_phaser.operations_in_progress();
    }

    void add_or_update_view(view_ptr v);
@@ -1100,7 +1072,7 @@ public:
                 std::map<sstring, sstring> options,
                 bool durables_writes,
                 std::vector<schema_ptr> cf_defs = std::vector<schema_ptr>{});
-    void validate(const locator::token_metadata& tm) const;
+    void validate(const locator::shared_token_metadata& stm) const;
    const sstring& name() const {
        return _name;
    }
@@ -1148,7 +1120,6 @@ public:
        utils::updateable_value<bool> compaction_enforce_min_threshold{false};
        bool enable_dangerous_direct_import_of_cassandra_counters = false;
        ::dirty_memory_manager* dirty_memory_manager = &default_dirty_memory_manager;
-        ::dirty_memory_manager* streaming_dirty_memory_manager = &default_dirty_memory_manager;
        reader_concurrency_semaphore* streaming_read_concurrency_semaphore;
        reader_concurrency_semaphore* compaction_concurrency_semaphore;
        ::cf_stats* cf_stats = nullptr;
@@ -1170,14 +1141,14 @@ private:
 public:
    explicit keyspace(lw_shared_ptr<keyspace_metadata> metadata, config cfg);

-    void update_from(const locator::token_metadata& tm, lw_shared_ptr<keyspace_metadata>);
+    void update_from(const locator::shared_token_metadata& stm, lw_shared_ptr<keyspace_metadata>);

    /** Note: return by shared pointer value, since the meta data is
     * semi-volatile. I.e. we could do alter keyspace at any time, and
     * boom, it is replaced.
     */
    lw_shared_ptr<keyspace_metadata> metadata() const;
-    void create_replication_strategy(const locator::token_metadata& tm, const std::map<sstring, sstring>& options);
+    void create_replication_strategy(const locator::shared_token_metadata& stm, const std::map<sstring, sstring>& options);
    /**
     * This should not really be return by reference, since replication
     * strategy is also volatile in that it could be replaced at "any" time.
@@ -1234,6 +1205,7 @@ struct database_config {
    seastar::scheduling_group memory_compaction_scheduling_group;
    seastar::scheduling_group statement_scheduling_group;
    seastar::scheduling_group streaming_scheduling_group;
+    seastar::scheduling_group gossip_scheduling_group;
    size_t available_memory;
 };

@@ -1292,7 +1264,6 @@ private:

    dirty_memory_manager _system_dirty_memory_manager;
    dirty_memory_manager _dirty_memory_manager;
-    dirty_memory_manager _streaming_dirty_memory_manager;

    database_config _dbcfg;
    flush_controller _memtable_controller;
@@ -1357,7 +1328,7 @@ private:

    service::migration_notifier& _mnotifier;
    gms::feature_service& _feat;
-    const locator::token_metadata& _token_metadata;
+    const locator::shared_token_metadata& _shared_token_metadata;

    sharded<semaphore>& _sst_dir_semaphore;

@@ -1376,6 +1347,7 @@ private:
    void create_in_memory_keyspace(const lw_shared_ptr<keyspace_metadata>& ksm);
    friend void db::system_keyspace::make(database& db, bool durable, bool volatile_testing_only);
    void setup_metrics();
+    void setup_scylla_memory_diagnostics_producer();

    friend class db_apply_executor;
    future<> do_apply(schema_ptr, const frozen_mutation&, tracing::trace_state_ptr tr_state, db::timeout_clock::time_point timeout, db::commitlog::force_sync sync);
@@ -1399,7 +1371,7 @@ public:
    void set_enable_incremental_backups(bool val) { _enable_incremental_backups = val; }

    future<> parse_system_tables(distributed<service::storage_proxy>&, distributed<service::migration_manager>&);
-    database(const db::config&, database_config dbcfg, service::migration_notifier& mn, gms::feature_service& feat, const locator::token_metadata& tm, abort_source& as, sharded<semaphore>& sst_dir_sem);
+    database(const db::config&, database_config dbcfg, service::migration_notifier& mn, gms::feature_service& feat, const locator::shared_token_metadata& stm, abort_source& as, sharded<semaphore>& sst_dir_sem);
    database(database&&) = delete;
    ~database();

@@ -1425,7 +1397,8 @@ public:
        return *_compaction_manager;
    }

-    const locator::token_metadata& get_token_metadata() const { return _token_metadata; }
+    const locator::shared_token_metadata& get_shared_token_metadata() const { return _shared_token_metadata; }
+    const locator::token_metadata& get_token_metadata() const { return *_shared_token_metadata.get(); }

    service::migration_notifier& get_notifier() { return _mnotifier; }
    const service::migration_notifier& get_notifier() const { return _mnotifier; }
@@ -1558,6 +1531,7 @@ public:
    void set_format_by_config();

    future<> flush_all_memtables();
+    future<> flush(const sstring& ks, const sstring& cf);

    // See #937. Truncation now requires a callback to get a time stamp
    // that must be guaranteed to be the same for all shards.
--- a/db/batchlog_manager.cc
+++ b/db/batchlog_manager.cc
@@ -182,7 +182,7 @@ future<> db::batchlog_manager::replay_all_failed_batches() {

    // rate limit is in bytes per second. Uses Double.MAX_VALUE if disabled (set to 0 in cassandra.yaml).
    // max rate is scaled by the number of nodes in the cluster (same as for HHOM - see CASSANDRA-5272).
-    auto throttle = _replay_rate / _qp.proxy().get_token_metadata().get_all_endpoints_count();
+    auto throttle = _replay_rate / _qp.proxy().get_token_metadata_ptr()->get_all_endpoints_count();
    auto limiter = make_lw_shared<utils::rate_limiter>(throttle);

    auto batch = [this, limiter](const cql3::untyped_result_set::row& row) {
--- a/db/config.cc
+++ b/db/config.cc
@@ -68,6 +68,12 @@ seed_provider_to_json(const db::seed_provider_type& spt) {
    return value_to_json("seed_provider_type");
 }

+static
+json::json_return_type
+hinted_handoff_enabled_to_json(const db::config::hinted_handoff_enabled_type& h) {
+    return value_to_json(h.to_configuration_string());
+}
+
 template <>
 const config_type config_type_for<bool> = config_type("bool", value_to_json<bool>);

@@ -114,6 +120,9 @@ template <>
 const config_type config_type_for<std::vector<enum_option<db::experimental_features_t>>> = config_type(
        "experimental features", value_to_json<std::vector<sstring>>);

+template <>
+const config_type config_type_for<db::config::hinted_handoff_enabled_type> = config_type("hinted handoff enabled", hinted_handoff_enabled_to_json);
+
 }

 namespace YAML {
@@ -159,6 +168,18 @@ struct convert<db::config::seed_provider_type> {
    }
 };

+template<>
+struct convert<db::config::hinted_handoff_enabled_type> {
+    static bool decode(const Node& node, db::config::hinted_handoff_enabled_type& rhs) {
+        std::string opt;
+        if (!convert<std::string>::decode(node, opt)) {
+            return false;
+        }
+        rhs = db::hints::host_filter::parse_from_config_string(std::move(opt));
+        return true;
+    }
+};
+
 template <>
 class convert<enum_option<db::experimental_features_t>> {
 public:
@@ -572,7 +593,7 @@ db::config::config(std::shared_ptr<db::extensions> exts)
        "Time interval in milliseconds to reset all node scores, which allows a bad node to recover.")
    , dynamic_snitch_update_interval_in_ms(this, "dynamic_snitch_update_interval_in_ms", value_status::Unused, 100,
        "The time interval for how often the snitch calculates node scores. Because score calculation is CPU intensive, be careful when reducing this interval.")
-    , hinted_handoff_enabled(this, "hinted_handoff_enabled", value_status::Used, "true",
+    , hinted_handoff_enabled(this, "hinted_handoff_enabled", value_status::Used, db::config::hinted_handoff_enabled_type(db::config::hinted_handoff_enabled_type::enabled_for_all_tag()),
        "Enable or disable hinted handoff. To enable per data center, add data center list. For example: hinted_handoff_enabled: DC1,DC2. A hint indicates that the write needs to be replayed to an unavailable node. "
        "Related information: About hinted handoff writes")
    , hinted_handoff_throttle_in_kb(this, "hinted_handoff_throttle_in_kb", value_status::Unused, 1024,
@@ -614,6 +635,7 @@ db::config::config(std::shared_ptr<db::extensions> exts)
        "\n"
        "\torg.apache.cassandra.auth.AllowAllAuthenticator : Disables authentication; no checks are performed.\n"
        "\torg.apache.cassandra.auth.PasswordAuthenticator : Authenticates users with user names and hashed passwords stored in the system_auth.credentials table. If you use the default, 1, and the node with the lone replica goes down, you will not be able to log into the cluster because the system_auth keyspace was not replicated.\n"
+        "\tcom.scylladb.auth.TransitionalAuthenticator : Wraps around the PasswordAuthenticator, logging them in if username/password pair provided is correct and treating them as anonymous users otherwise.\n"
        "Related information: Internal authentication"
        , {"AllowAllAuthenticator", "PasswordAuthenticator", "org.apache.cassandra.auth.PasswordAuthenticator", "org.apache.cassandra.auth.AllowAllAuthenticator", "com.scylladb.auth.TransitionalAuthenticator"})
    , internode_authenticator(this, "internode_authenticator", value_status::Unused, "enabled",
@@ -623,6 +645,7 @@ db::config::config(std::shared_ptr<db::extensions> exts)
        "\n"
        "\tAllowAllAuthorizer : Disables authorization; allows any action to any user.\n"
        "\tCassandraAuthorizer : Stores permissions in system_auth.permissions table. If you use the default, 1, and the node with the lone replica goes down, you will not be able to log into the cluster because the system_auth keyspace was not replicated.\n"
+        "\tcom.scylladb.auth.TransitionalAuthorizer : Wraps around the CassandraAuthorizer, which is used to authorize permission management. Other actions are allowed for all users.\n"
        "Related information: Object permissions"
        , {"AllowAllAuthorizer", "CassandraAuthorizer", "org.apache.cassandra.auth.AllowAllAuthorizer", "org.apache.cassandra.auth.CassandraAuthorizer", "com.scylladb.auth.TransitionalAuthorizer"})
    , role_manager(this, "role_manager", value_status::Used, "org.apache.cassandra.auth.CassandraRoleManager",
--- a/db/config.hh
+++ b/db/config.hh
@@ -33,6 +33,7 @@
 #include "seastarx.hh"
 #include "utils/config_file.hh"
 #include "utils/enum_option.hh"
+#include "db/hints/host_filter.hh"

 namespace seastar { class file; struct logging_settings; }

@@ -115,6 +116,7 @@ public:
                    //program_options::string_map;
    using string_list = std::vector<sstring>;
    using seed_provider_type = db::seed_provider_type;
+    using hinted_handoff_enabled_type = db::hints::host_filter;

    /*
     * All values and documentation taken from
@@ -238,7 +240,7 @@ public:
    named_value<double> dynamic_snitch_badness_threshold;
    named_value<uint32_t> dynamic_snitch_reset_interval_in_ms;
    named_value<uint32_t> dynamic_snitch_update_interval_in_ms;
-    named_value<sstring> hinted_handoff_enabled;
+    named_value<hinted_handoff_enabled_type> hinted_handoff_enabled;
    named_value<uint32_t> hinted_handoff_throttle_in_kb;
    named_value<uint32_t> max_hint_window_in_ms;
    named_value<uint32_t> max_hints_delivery_threads;
--- a/db/hints/host_filter.cc
+++ b/db/hints/host_filter.cc
@@ -0,0 +1,125 @@
+/*
+ * Copyright (C) 2020 ScyllaDB
+ */
+
+/*
+ * This file is part of Scylla.
+ *
+ * Scylla is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Scylla is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <string_view>
+#include <boost/algorithm/string.hpp>
+#include "to_string.hh"
+#include "host_filter.hh"
+
+namespace db {
+namespace hints {
+
+host_filter::host_filter(host_filter::enabled_for_all_tag)
+        : _enabled_kind(host_filter::enabled_kind::enabled_for_all) {
+}
+
+host_filter::host_filter(host_filter::disabled_for_all_tag)
+        : _enabled_kind(host_filter::enabled_kind::disabled_for_all) {
+}
+
+host_filter::host_filter(std::unordered_set<sstring> allowed_dcs)
+        : _enabled_kind(allowed_dcs.empty() ? enabled_kind::disabled_for_all : enabled_kind::enabled_selectively)
+        , _dcs(std::move(allowed_dcs)) {
+}
+
+bool host_filter::can_hint_for(locator::snitch_ptr& snitch, gms::inet_address ep) const {
+    switch (_enabled_kind) {
+    case enabled_kind::enabled_for_all:
+        return true;
+    case enabled_kind::enabled_selectively:
+        return _dcs.contains(snitch->get_datacenter(ep));
+    case enabled_kind::disabled_for_all:
+        return false;
+    }
+    throw std::logic_error("Uncovered variant of enabled_kind");
+}
+
+host_filter host_filter::parse_from_config_string(sstring opt) {
+    if (boost::iequals(opt, "false") || opt == "0") {
+        return host_filter(disabled_for_all_tag());
+    } else if (boost::iequals(opt, "true") || opt == "1") {
+        return host_filter(enabled_for_all_tag());
+    }
+
+    return parse_from_dc_list(std::move(opt));
+}
+
+host_filter host_filter::parse_from_dc_list(sstring opt) {
+    using namespace boost::algorithm;
+
+    std::vector<sstring> dcs;
+    split(dcs, opt, is_any_of(","));
+
+    std::for_each(dcs.begin(), dcs.end(), [] (sstring& dc) {
+        trim(dc);
+        if (dc.empty()) {
+            throw hints_configuration_parse_error("hinted_handoff_enabled: DC name may not be an empty string");
+        }
+    });
+
+    return host_filter(std::unordered_set<sstring>(dcs.begin(), dcs.end()));
+}
+
+std::istream& operator>>(std::istream& is, host_filter& f) {
+    sstring tmp;
+    is >> tmp;
+    f = host_filter::parse_from_config_string(std::move(tmp));
+    return is;
+}
+
+sstring host_filter::to_configuration_string() const {
+    switch (_enabled_kind) {
+    case enabled_kind::enabled_for_all:
+        return "true";
+    case enabled_kind::enabled_selectively:
+        return ::join(",", _dcs);
+    case enabled_kind::disabled_for_all:
+        return "false";
+    }
+    throw std::logic_error("Uncovered variant of enabled_kind");
+}
+
+
+std::string_view host_filter::enabled_kind_to_string(host_filter::enabled_kind ek) {
+    switch (ek) {
+    case host_filter::enabled_kind::enabled_for_all:
+        return "enabled_for_all";
+    case host_filter::enabled_kind::enabled_selectively:
+        return "enabled_selectively";
+    case host_filter::enabled_kind::disabled_for_all:
+        return "disabled_for_all";
+    }
+    throw std::logic_error("Uncovered variant of enabled_kind");
+}
+
+std::ostream& operator<<(std::ostream& os, const host_filter& f) {
+    os << "host_filter{enabled_kind="
+        << host_filter::enabled_kind_to_string(f._enabled_kind);
+    if (f._enabled_kind == host_filter::enabled_kind::enabled_selectively) {
+        os << ", dcs={" << ::join(",", f._dcs);
+    }
+    os << "}";
+    return os;
+}
+
+}
+}
+
--- a/db/hints/host_filter.hh
+++ b/db/hints/host_filter.hh
@@ -0,0 +1,103 @@
+/*
+ * Copyright (C) 2020 ScyllaDB
+ */
+
+/*
+ * This file is part of Scylla.
+ *
+ * Scylla is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Scylla is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <functional>
+#include <unordered_set>
+#include <exception>
+#include <iostream>
+#include <string_view>
+
+#include <seastar/core/sstring.hh>
+#include "gms/inet_address.hh"
+#include "locator/snitch_base.hh"
+#include "seastarx.hh"
+
+namespace db {
+namespace hints {
+
+// host_filter tells hints_manager towards which endpoints it is allowed to generate hints.
+class host_filter final {
+private:
+    enum class enabled_kind {
+        enabled_for_all,
+        enabled_selectively,
+        disabled_for_all,
+    };
+
+    enabled_kind _enabled_kind;
+    std::unordered_set<sstring> _dcs;
+
+    static std::string_view enabled_kind_to_string(host_filter::enabled_kind ek);
+
+public:
+    struct enabled_for_all_tag {};
+    struct disabled_for_all_tag {};
+
+    // Creates a filter that allows hints to all endpoints (default)
+    host_filter(enabled_for_all_tag tag = {});
+
+    // Creates a filter that does not allow any hints.
+    host_filter(disabled_for_all_tag);
+
+    // Creates a filter that allows sending hints to specified DCs.
+    explicit host_filter(std::unordered_set<sstring> allowed_dcs);
+
+    // Parses hint filtering configuration from the hinted_handoff_enabled option.
+    static host_filter parse_from_config_string(sstring opt);
+
+    // Parses hint filtering configuration from a list of DCs.
+    static host_filter parse_from_dc_list(sstring opt);
+
+    bool can_hint_for(locator::snitch_ptr& snitch, gms::inet_address ep) const;
+
+    inline const std::unordered_set<sstring>& get_dcs() const {
+        return _dcs;
+    }
+
+    bool operator==(const host_filter& other) const noexcept {
+        return _enabled_kind == other._enabled_kind
+                && _dcs == other._dcs;
+    }
+
+    inline bool is_enabled_for_all() const noexcept {
+        return _enabled_kind == enabled_kind::enabled_for_all;
+    }
+
+    inline bool is_disabled_for_all() const noexcept {
+        return _enabled_kind == enabled_kind::disabled_for_all;
+    }
+
+    sstring to_configuration_string() const;
+
+    friend std::ostream& operator<<(std::ostream& os, const host_filter& f);
+};
+
+std::istream& operator>>(std::istream& is, host_filter& f);
+
+class hints_configuration_parse_error : public std::runtime_error {
+public:
+    using std::runtime_error::runtime_error;
+};
+
+}
+}
--- a/db/hints/manager.cc
+++ b/db/hints/manager.cc
@@ -38,6 +38,7 @@
 #include "service/priority_manager.hh"
 #include "database.hh"
 #include "service_permit.hh"
+#include "utils/directories.hh"

 using namespace std::literals::chrono_literals;

@@ -50,9 +51,9 @@ const std::string manager::FILENAME_PREFIX("HintsLog" + commitlog::descriptor::S
 const std::chrono::seconds manager::hint_file_write_timeout = std::chrono::seconds(2);
 const std::chrono::seconds manager::hints_flush_period = std::chrono::seconds(10);

-manager::manager(sstring hints_directory, std::vector<sstring> hinted_dcs, int64_t max_hint_window_ms, resource_manager& res_manager, distributed<database>& db)
+manager::manager(sstring hints_directory, host_filter filter, int64_t max_hint_window_ms, resource_manager& res_manager, distributed<database>& db)
    : _hints_dir(fs::path(hints_directory) / format("{:d}", this_shard_id()))
-    , _hinted_dcs(hinted_dcs.begin(), hinted_dcs.end())
+    , _host_filter(std::move(filter))
    , _local_snitch_ptr(locator::i_endpoint_snitch::get_local_snitch_ptr())
    , _max_hint_window_us(max_hint_window_ms * 1000)
    , _local_db(db.local())
@@ -532,12 +533,56 @@ bool manager::can_hint_for(ep_key_type ep) const noexcept {
    return true;
 }

+future<> manager::change_host_filter(host_filter filter) {
+    if (!started()) {
+        return make_exception_future<>(std::logic_error("change_host_filter: called before the hints_manager was started"));
+    }
+
+    return with_gate(_draining_eps_gate, [this, filter = std::move(filter)] () mutable {
+        return with_semaphore(drain_lock(), 1, [this, filter = std::move(filter)] () mutable {
+            if (draining_all()) {
+                return make_exception_future<>(std::logic_error("change_host_filter: cannot change the configuration because hints all hints were drained"));
+            }
+
+            manager_logger.debug("change_host_filter: changing from {} to {}", _host_filter, filter);
+
+            // Change the host_filter now and save the old one so that we can
+            // roll back in case of failure
+            std::swap(_host_filter, filter);
+
+            // Iterate over existing hint directories and see if we can enable an endpoint manager
+            // for some of them
+            return lister::scan_dir(_hints_dir, { directory_entry_type::directory }, [this] (fs::path datadir, directory_entry de) {
+                const ep_key_type ep = ep_key_type(de.name);
+                if (_ep_managers.contains(ep) || !_host_filter.can_hint_for(_local_snitch_ptr, ep)) {
+                    return make_ready_future<>();
+                }
+                return get_ep_manager(ep).populate_segments_to_replay();
+            }).handle_exception([this, filter = std::move(filter)] (auto ep) mutable {
+                // Bring back the old filter. The finally() block will cause us to stop
+                // the additional ep_hint_managers that we started
+                _host_filter = std::move(filter);
+            }).finally([this] {
+                // Remove endpoint managers which are rejected by the filter
+                return parallel_for_each(_ep_managers, [this] (auto& pair) {
+                    if (_host_filter.can_hint_for(_local_snitch_ptr, pair.first)) {
+                        return make_ready_future<>();
+                    }
+                    return pair.second.stop(drain::no).finally([this, ep = pair.first] {
+                        _ep_managers.erase(ep);
+                    });
+                });
+            });
+        });
+    });
+}
+
 bool manager::check_dc_for(ep_key_type ep) const noexcept {
    try {
        // If target's DC is not a "hintable" DCs - don't hint.
        // If there is an end point manager then DC has already been checked and found to be ok.
-        return _hinted_dcs.empty() || have_ep_manager(ep) ||
-               _hinted_dcs.contains(_local_snitch_ptr->get_datacenter(ep));
+        return _host_filter.is_enabled_for_all() || have_ep_manager(ep) ||
+               _host_filter.can_hint_for(_local_snitch_ptr, ep);
    } catch (...) {
        // if we failed to check the DC - block this hint
        return false;
@@ -853,12 +898,14 @@ void manager::end_point_hints_manager::sender::send_hints_maybe() noexcept {

 static future<> scan_for_hints_dirs(const sstring& hints_directory, std::function<future<> (fs::path dir, directory_entry de, unsigned shard_id)> f) {
    return lister::scan_dir(hints_directory, { directory_entry_type::directory }, [f = std::move(f)] (fs::path dir, directory_entry de) mutable {
+        unsigned shard_id;
        try {
-            return f(std::move(dir), std::move(de), std::stoi(de.name.c_str()));
+            shard_id = std::stoi(de.name.c_str());
        } catch (std::invalid_argument& ex) {
            manager_logger.debug("Ignore invalid directory {}", de.name);
            return make_ready_future<>();
        }
+        return f(std::move(dir), std::move(de), shard_id);
    });
 }

@@ -1018,5 +1065,92 @@ void manager::update_backlog(size_t backlog, size_t max_backlog) {
    }
 }

+class directory_initializer::impl {
+    enum class state {
+        uninitialized = 0,
+        created_and_validated = 1,
+        rebalanced = 2,
+    };
+
+    utils::directories& _dirs;
+    sstring _hints_directory;
+    state _state = state::uninitialized;
+    seastar::named_semaphore _lock = {1, named_semaphore_exception_factory{"hints directory initialization lock"}};
+
+public:
+    impl(utils::directories& dirs, sstring hints_directory)
+            : _dirs(dirs)
+            , _hints_directory(std::move(hints_directory))
+    { }
+
+    future<> ensure_created_and_verified() {
+        if (_state > state::uninitialized) {
+            return make_ready_future<>();
+        }
+
+        return with_semaphore(_lock, 1, [this] () {
+            utils::directories::set dir_set;
+            dir_set.add_sharded(_hints_directory);
+            return _dirs.create_and_verify(std::move(dir_set)).then([this] {
+                manager_logger.debug("Creating and validating hint directories: {}", _hints_directory);
+                _state = state::created_and_validated;
+            });
+        });
+    }
+
+    future<> ensure_rebalanced() {
+        if (_state < state::created_and_validated) {
+            return make_exception_future<>(std::logic_error("hints directory needs to be created and validated before rebalancing"));
+        }
+
+        if (_state > state::created_and_validated) {
+            return make_ready_future<>();
+        }
+
+        return with_semaphore(_lock, 1, [this] () {
+            manager_logger.debug("Rebalancing hints in {}", _hints_directory);
+            return manager::rebalance(_hints_directory).then([this] {
+                _state = state::rebalanced;
+            });
+        });
+    }
+};
+
+directory_initializer::directory_initializer(std::shared_ptr<directory_initializer::impl> impl)
+        : _impl(std::move(impl))
+{ }
+
+directory_initializer::~directory_initializer()
+{ }
+
+directory_initializer directory_initializer::make_dummy() {
+    return directory_initializer{nullptr};
+}
+
+future<directory_initializer> directory_initializer::make(utils::directories& dirs, sstring hints_directory) {
+    return smp::submit_to(0, [&dirs, hints_directory = std::move(hints_directory)] () mutable {
+        auto impl = std::make_shared<directory_initializer::impl>(dirs, std::move(hints_directory));
+        return make_ready_future<directory_initializer>(directory_initializer(std::move(impl)));
+    });
+}
+
+future<> directory_initializer::ensure_created_and_verified() {
+    if (!_impl) {
+        return make_ready_future<>();
+    }
+    return smp::submit_to(0, [impl = this->_impl] () mutable {
+        return impl->ensure_created_and_verified().then([impl] {});
+    });
+}
+
+future<> directory_initializer::ensure_rebalanced() {
+    if (!_impl) {
+        return make_ready_future<>();
+    }
+    return smp::submit_to(0, [impl = this->_impl] () mutable {
+        return impl->ensure_rebalanced().then([impl] {});
+    });
+}
+
 }
 }
--- a/db/hints/manager.hh
+++ b/db/hints/manager.hh
@@ -40,11 +40,16 @@
 #include "utils/loading_shared_values.hh"
 #include "utils/fragmented_temporary_buffer.hh"
 #include "db/hints/resource_manager.hh"
+#include "db/hints/host_filter.hh"

 namespace service {
 class storage_service;
 }

+namespace utils {
+class directories;
+}
+
 namespace db {
 namespace hints {

@@ -53,6 +58,25 @@ using hints_store_ptr = node_to_hint_store_factory_type::entry_ptr;
 using hint_entry_reader = commitlog_entry_reader;
 using timer_clock_type = seastar::lowres_clock;

+/// A helper class which tracks hints directory creation
+/// and allows to perform hints directory initialization lazily.
+class directory_initializer {
+private:
+    class impl;
+    ::std::shared_ptr<impl> _impl;
+
+    directory_initializer(::std::shared_ptr<impl> impl);
+
+public:
+    /// Creates an initializer that does nothing. Useful in tests.
+    static directory_initializer make_dummy();
+    static future<directory_initializer> make(utils::directories& dirs, sstring hints_directory);
+
+    ~directory_initializer();
+    future<> ensure_created_and_verified();
+    future<> ensure_rebalanced();
+};
+
 class manager : public service::endpoint_lifecycle_subscriber {
 private:
    struct stats {
@@ -450,7 +474,7 @@ private:
    dev_t _hints_dir_device_id = 0;

    node_to_hint_store_factory_type _store_factory;
-    std::unordered_set<sstring> _hinted_dcs;
+    host_filter _host_filter;
    shared_ptr<service::storage_proxy> _proxy_anchor;
    shared_ptr<gms::gossiper> _gossiper_anchor;
    shared_ptr<service::storage_service> _strorage_service_anchor;
@@ -469,7 +493,7 @@ private:
    seastar::named_semaphore _drain_lock = {1, named_semaphore_exception_factory{"drain lock"}};

 public:
-    manager(sstring hints_directory, std::vector<sstring> hinted_dcs, int64_t max_hint_window_ms, resource_manager&res_manager, distributed<database>& db);
+    manager(sstring hints_directory, host_filter filter, int64_t max_hint_window_ms, resource_manager&res_manager, distributed<database>& db);
    virtual ~manager();
    manager(manager&&) = delete;
    manager& operator=(manager&&) = delete;
@@ -478,6 +502,15 @@ public:
    future<> stop();
    bool store_hint(gms::inet_address ep, schema_ptr s, lw_shared_ptr<const frozen_mutation> fm, tracing::trace_state_ptr tr_state) noexcept;

+    /// \brief Changes the host_filter currently used, stopping and starting ep_managers relevant to the new host_filter.
+    /// \param filter the new host_filter
+    /// \return A future that resolves when the operation is complete.
+    future<> change_host_filter(host_filter filter);
+
+    const host_filter& get_host_filter() const noexcept {
+        return _host_filter;
+    }
+
    /// \brief Check if a hint may be generated to the give end point
    /// \param ep end point to check
    /// \return true if we should generate the hint to the given end point if it becomes unavailable
@@ -504,6 +537,12 @@ public:
    /// \return TRUE if hints are allowed to be generated to \param ep.
    bool check_dc_for(ep_key_type ep) const noexcept;

+    /// \brief Checks if hints are disabled for all endpoints
+    /// \return TRUE if hints are disabled.
+    bool is_disabled_for_all() const noexcept {
+        return _host_filter.is_disabled_for_all();
+    }
+
    /// \return Size of mutations of hints in-flight (to the disk) at the moment.
    uint64_t size_of_hints_in_progress() const noexcept {
        return _stats.size_of_hints_in_progress;
@@ -557,6 +596,12 @@ public:
        _state.set(state::replay_allowed);
    }

+    /// \brief Creates an object which aids in hints directory initialization.
+    /// This object can saafely be copied and used from any shard.
+    /// \arg dirs The utils::directories object, used to create and lock hints directories
+    /// \arg hints_directory The directory with hints which should be initialized
+    directory_initializer make_directory_initializer(utils::directories& dirs, fs::path hints_directory);
+
    /// \brief Rebalance hints segments among all present shards.
    ///
    /// The difference between the number of segments on every two shard will be not greater than 1 after the
--- a/db/hints/resource_manager.cc
+++ b/db/hints/resource_manager.cc
@@ -68,12 +68,14 @@ const std::chrono::seconds space_watchdog::_watchdog_period = std::chrono::secon
 space_watchdog::space_watchdog(shard_managers_set& managers, per_device_limits_map& per_device_limits_map)
    : _shard_managers(managers)
    , _per_device_limits_map(per_device_limits_map)
+    , _update_lock(1, named_semaphore_exception_factory{"update lock"})
 {}

 void space_watchdog::start() {
    _started = seastar::async([this] {
        while (!_as.abort_requested()) {
            try {
+                const auto units = get_units(_update_lock, 1).get();
                on_timer();
            } catch (...) {
                resource_manager_logger.trace("space_watchdog: unexpected exception - stop all hints generators");
@@ -176,56 +178,95 @@ void space_watchdog::on_timer() {
 }

 future<> resource_manager::start(shared_ptr<service::storage_proxy> proxy_ptr, shared_ptr<gms::gossiper> gossiper_ptr, shared_ptr<service::storage_service> ss_ptr) {
-    return parallel_for_each(_shard_managers, [proxy_ptr, gossiper_ptr, ss_ptr](manager& m) {
-        return m.start(proxy_ptr, gossiper_ptr, ss_ptr);
-    }).then([this]() {
-        return prepare_per_device_limits();
-    }).then([this]() {
-        return _space_watchdog.start();
+    _proxy_ptr = std::move(proxy_ptr);
+    _gossiper_ptr = std::move(gossiper_ptr);
+    _ss_ptr = std::move(ss_ptr);
+
+    return with_semaphore(_operation_lock, 1, [this] () {
+        return parallel_for_each(_shard_managers, [this](manager& m) {
+            return m.start(_proxy_ptr, _gossiper_ptr, _ss_ptr);
+        }).then([this]() {
+            return do_for_each(_shard_managers, [this](manager& m) {
+                return prepare_per_device_limits(m);
+            });
+        }).then([this]() {
+            return _space_watchdog.start();
+        }).then([this]() {
+            set_running();
+        });
    });
 }

 void resource_manager::allow_replaying() noexcept {
+    set_replay_allowed();
    boost::for_each(_shard_managers, [] (manager& m) { m.allow_replaying(); });
 }

 future<> resource_manager::stop() noexcept {
-    return parallel_for_each(_shard_managers, [](manager& m) {
-        return m.stop();
-    }).finally([this]() {
-        return _space_watchdog.stop();
+    return with_semaphore(_operation_lock, 1, [this] () {
+        return parallel_for_each(_shard_managers, [](manager& m) {
+            return m.stop();
+        }).finally([this]() {
+            return _space_watchdog.stop();
+        }).then([this]() {
+            unset_running();
+        });
    });
 }

-void resource_manager::register_manager(manager& m) {
-    _shard_managers.insert(m);
-}
+future<> resource_manager::register_manager(manager& m) {
+    return with_semaphore(_operation_lock, 1, [this, &m] () {
+        return with_semaphore(_space_watchdog.update_lock(), 1, [this, &m] {
+            const auto [it, inserted] = _shard_managers.insert(m);
+            if (!inserted) {
+                // Already registered
+                return make_ready_future<>();
+            }
+            if (!running()) {
+                // The hints manager will be started later by resource_manager::start()
+                return make_ready_future<>();
+            }

-future<> resource_manager::prepare_per_device_limits() {
-    return do_for_each(_shard_managers, [this] (manager& shard_manager) mutable {
-        dev_t device_id = shard_manager.hints_dir_device_id();
-        auto it = _per_device_limits_map.find(device_id);
-        if (it == _per_device_limits_map.end()) {
-            return is_mountpoint(shard_manager.hints_dir().parent_path()).then([this, device_id, &shard_manager](bool is_mountpoint) {
-                auto [it, inserted] = _per_device_limits_map.emplace(device_id, space_watchdog::per_device_limits{});
-                // Since we possibly deferred, we need to recheck the _per_device_limits_map.
-                if (inserted) {
-                    // By default, give each group of managers 10% of the available disk space. Give each shard an equal share of the available space.
-                    it->second.max_shard_disk_space_size = std::filesystem::space(shard_manager.hints_dir().c_str()).capacity / (10 * smp::count);
-                    // If hints directory is a mountpoint, we assume it's on dedicated (i.e. not shared with data/commitlog/etc) storage.
-                    // Then, reserve 90% of all space instead of 10% above.
-                    if (is_mountpoint) {
-                        it->second.max_shard_disk_space_size *= 9;
+            // If the resource_manager was started, start the hints manager, too.
+            return m.start(_proxy_ptr, _gossiper_ptr, _ss_ptr).then([this, &m] {
+                // Calculate device limits for this manager so that it is accounted for
+                // by the space_watchdog
+                return prepare_per_device_limits(m).then([this, &m] {
+                    if (this->replay_allowed()) {
+                        m.allow_replaying();
                    }
-                }
-                it->second.managers.emplace_back(std::ref(shard_manager));
+                });
+            }).handle_exception([this, &m] (auto ep) {
+                _shard_managers.erase(m);
+                return make_exception_future<>(ep);
            });
-        } else {
-            it->second.managers.emplace_back(std::ref(shard_manager));
-            return make_ready_future<>();
-        }
+        });
    });
 }

+future<> resource_manager::prepare_per_device_limits(manager& shard_manager) {
+    dev_t device_id = shard_manager.hints_dir_device_id();
+    auto it = _per_device_limits_map.find(device_id);
+    if (it == _per_device_limits_map.end()) {
+        return is_mountpoint(shard_manager.hints_dir().parent_path()).then([this, device_id, &shard_manager](bool is_mountpoint) {
+            auto [it, inserted] = _per_device_limits_map.emplace(device_id, space_watchdog::per_device_limits{});
+            // Since we possibly deferred, we need to recheck the _per_device_limits_map.
+            if (inserted) {
+                // By default, give each group of managers 10% of the available disk space. Give each shard an equal share of the available space.
+                it->second.max_shard_disk_space_size = std::filesystem::space(shard_manager.hints_dir().c_str()).capacity / (10 * smp::count);
+                // If hints directory is a mountpoint, we assume it's on dedicated (i.e. not shared with data/commitlog/etc) storage.
+                // Then, reserve 90% of all space instead of 10% above.
+                if (is_mountpoint) {
+                    it->second.max_shard_disk_space_size *= 9;
+                }
+            }
+            it->second.managers.emplace_back(std::ref(shard_manager));
+        });
+    } else {
+        it->second.managers.emplace_back(std::ref(shard_manager));
+        return make_ready_future<>();
+    }
+}
+
 }
 }
--- a/db/hints/resource_manager.hh
+++ b/db/hints/resource_manager.hh
@@ -78,6 +78,7 @@ private:
    size_t _total_size = 0;
    shard_managers_set& _shard_managers;
    per_device_limits_map& _per_device_limits_map;
+    seastar::named_semaphore _update_lock;

    future<> _started = make_ready_future<>();
    seastar::abort_source _as;
@@ -88,6 +89,10 @@ public:
    void start();
    future<> stop() noexcept;

+    seastar::named_semaphore& update_lock() {
+        return _update_lock;
+    }
+
 private:
    /// \brief Check that hints don't occupy too much disk space.
    ///
@@ -119,10 +124,47 @@ class resource_manager {
    const size_t _min_send_hint_budget;
    seastar::named_semaphore _send_limiter;

+    seastar::named_semaphore _operation_lock;
    space_watchdog::shard_managers_set _shard_managers;
    space_watchdog::per_device_limits_map _per_device_limits_map;
    space_watchdog _space_watchdog;

+    shared_ptr<service::storage_proxy> _proxy_ptr;
+    shared_ptr<gms::gossiper> _gossiper_ptr;
+    shared_ptr<service::storage_service> _ss_ptr;
+
+    enum class state {
+        running,
+        replay_allowed,
+    };
+    using state_set = enum_set<super_enum<state,
+        state::running,
+        state::replay_allowed>>;
+
+    state_set _state;
+
+    void set_running() noexcept {
+        _state.set(state::running);
+    }
+
+    void unset_running() noexcept {
+        _state.remove(state::running);
+    }
+
+    bool running() const noexcept {
+        return _state.contains(state::running);
+    }
+
+    void set_replay_allowed() noexcept {
+        _state.set(state::replay_allowed);
+    }
+
+    bool replay_allowed() const noexcept {
+        return _state.contains(state::replay_allowed);
+    }
+
+    future<> prepare_per_device_limits(manager& shard_manager);
+
 public:
    static constexpr size_t hint_segment_size_in_mb = 32;
    static constexpr size_t max_hints_per_ep_size_mb = 128; // 4 files 32MB each
@@ -133,6 +175,7 @@ public:
        : _max_send_in_flight_memory(std::max(max_send_in_flight_memory, max_hints_send_queue_length))
        , _min_send_hint_budget(_max_send_in_flight_memory / max_hints_send_queue_length)
        , _send_limiter(_max_send_in_flight_memory, named_semaphore_exception_factory{"send limiter"})
+        , _operation_lock(1, named_semaphore_exception_factory{"operation lock"})
        , _space_watchdog(_shard_managers, _per_device_limits_map)
    {}

@@ -143,10 +186,16 @@ public:
    size_t sending_queue_length() const;

    future<> start(shared_ptr<service::storage_proxy> proxy_ptr, shared_ptr<gms::gossiper> gossiper_ptr, shared_ptr<service::storage_service> ss_ptr);
-    void allow_replaying() noexcept;
    future<> stop() noexcept;
-    void register_manager(manager& m);
-    future<> prepare_per_device_limits();
+
+    /// \brief Allows replaying hints for managers which are registered now or will be in the future.
+    void allow_replaying() noexcept;
+
+    /// \brief Registers the hints::manager in resource_manager, and starts it, if resource_manager is already running.
+    ///
+    /// The hints::managers can be added either before or after resource_manager starts.
+    /// If resource_manager is already started, the hints manager will also be started.
+    future<> register_manager(manager& m);
 };

 }
--- a/db/large_data_handler.cc
+++ b/db/large_data_handler.cc
@@ -83,7 +83,7 @@ static future<> try_record(std::string_view large_table, const sstables::sstable
    std::string pk_str = key_to_str(partition_key.to_partition_key(s), s);
    auto timestamp = db_clock::now();
    large_data_logger.warn("Writing large {} {}/{}: {}{} ({} bytes)", desc, ks_name, cf_name, pk_str, extra_path, size);
-    return db::execute_cql(req, ks_name, cf_name, sstable_name, size, pk_str, timestamp, args...)
+    return db::qctx->execute_cql(req, ks_name, cf_name, sstable_name, size, pk_str, timestamp, args...)
            .discard_result()
            .handle_exception([ks_name, cf_name, large_table, sstable_name] (std::exception_ptr ep) {
                large_data_logger.warn("Failed to add a record to system.large_{}s: ks = {}, table = {}, sst = {} exception = {}",
@@ -113,7 +113,7 @@ future<> cql_table_large_data_handler::record_large_cells(const sstables::sstabl
        auto ck_str = key_to_str(*clustering_key, s);
        return try_record("cell", sst, partition_key, int64_t(cell_size), cell_type, format("{} {}", ck_str, column_name), extra_fields, ck_str, column_name);
    } else {
-        return try_record("cell", sst, partition_key, int64_t(cell_size), cell_type, column_name, extra_fields, data_value::make_null(utf8_type), column_name);
+        return try_record("cell", sst, partition_key, int64_t(cell_size), cell_type, column_name, extra_fields, nullptr, column_name);
    }
 }

@@ -125,7 +125,7 @@ future<> cql_table_large_data_handler::record_large_rows(const sstables::sstable
        std::string ck_str = key_to_str(*clustering_key, s);
        return try_record("row", sst, partition_key, int64_t(row_size), "row", ck_str, extra_fields,  ck_str);
    } else {
-        return try_record("row", sst, partition_key, int64_t(row_size), "static row", "", extra_fields, data_value::make_null(utf8_type));
+        return try_record("row", sst, partition_key, int64_t(row_size), "static row", "", extra_fields, nullptr);
    }
 }

@@ -133,7 +133,7 @@ future<> cql_table_large_data_handler::delete_large_data_entries(const schema& s
    const sstring req =
            format("DELETE FROM system.{} WHERE keyspace_name = ? AND table_name = ? AND sstable_name = ?",
                    large_table_name);
-    return db::execute_cql(req, s.ks_name(), s.cf_name(), sstable_name)
+    return db::qctx->execute_cql(req, s.ks_name(), s.cf_name(), sstable_name)
            .discard_result()
            .handle_exception([&s, sstable_name, large_table_name] (std::exception_ptr ep) {
                large_data_logger.warn("Failed to drop entries from {}: ks = {}, table = {}, sst = {} exception = {}",
--- a/db/large_data_handler.hh
+++ b/db/large_data_handler.hh
@@ -111,12 +111,27 @@ public:
        return make_ready_future<>();
    }

-    future<> maybe_delete_large_data_entries(const schema& /*s*/, sstring /*filename*/, uint64_t /*data_size*/) {
+    future<> maybe_delete_large_data_entries(const schema& s, sstring filename, uint64_t data_size) {
        assert(running());
-
-        // Deletion of large data entries is disabled due to #7668
-        // They will evetually expire based on the 30 days TTL.
-        return make_ready_future<>();
+        future<> large_partitions = make_ready_future<>();
+        if (__builtin_expect(data_size > _partition_threshold_bytes, false)) {
+            large_partitions = with_sem([&s, filename, this] () mutable {
+                return delete_large_data_entries(s, std::move(filename), db::system_keyspace::LARGE_PARTITIONS);
+            });
+        }
+        future<> large_rows = make_ready_future<>();
+        if (__builtin_expect(data_size > _row_threshold_bytes, false)) {
+            large_rows = with_sem([&s, filename, this] () mutable {
+                return delete_large_data_entries(s, std::move(filename), db::system_keyspace::LARGE_ROWS);
+            });
+        }
+        future<> large_cells = make_ready_future<>();
+        if (__builtin_expect(data_size > _cell_threshold_bytes, false)) {
+            large_cells = with_sem([&s, filename, this] () mutable {
+                return delete_large_data_entries(s, std::move(filename), db::system_keyspace::LARGE_CELLS);
+            });
+        }
+        return when_all(std::move(large_partitions), std::move(large_rows), std::move(large_cells)).discard_result();
    }

    const large_data_handler::stats& stats() const { return _stats; }
--- a/db/query_context.hh
+++ b/db/query_context.hh
@@ -29,8 +29,6 @@
 #include "exceptions/exceptions.hh"
 #include "timeout_config.hh"

-class database;
-
 namespace service {
 class storage_proxy;
 }
@@ -38,9 +36,8 @@ class storage_proxy;

 namespace db {
 struct query_context {
-    distributed<database>& _db;
    distributed<cql3::query_processor>& _qp;
-    query_context(distributed<database>& db, distributed<cql3::query_processor>& qp) : _db(db), _qp(qp) {}
+    query_context(distributed<cql3::query_processor>& qp) : _qp(qp) {}

    template <typename... Args>
    future<::shared_ptr<cql3::untyped_result_set>> execute_cql(sstring req, Args&&... args) {
@@ -58,23 +55,23 @@ struct query_context {
                // let the `storage_proxy` time out the query down the call chain
                db::timeout_clock::duration::zero();

-        return do_with(timeout_config{d, d, d, d, d, d, d}, [this, req = std::move(req), &args...] (auto& tcfg) {
+        struct timeout_context {
+            std::unique_ptr<service::client_state> client_state;
+            service::query_state query_state;
+            timeout_context(db::timeout_clock::duration d)
+                    : client_state(std::make_unique<service::client_state>(service::client_state::internal_tag{}, timeout_config{d, d, d, d, d, d, d}))
+                    , query_state(*client_state, empty_service_permit())
+            {}
+        };
+        return do_with(timeout_context(d), [this, req = std::move(req), &args...] (auto& tctx) {
            return _qp.local().execute_internal(req,
                cql3::query_options::DEFAULT.get_consistency(),
-                tcfg,
+                tctx.query_state,
                { data_value(std::forward<Args>(args))... },
                true);
        });
    }

-    database& db() {
-        return _db.local();
-    }
-
-    service::storage_proxy& proxy() {
-        return _qp.local().proxy();
-    }
-
    cql3::query_processor& qp() {
        return _qp.local();
    }
@@ -82,19 +79,4 @@ struct query_context {

 // This does not have to be thread local, because all cores will share the same context.
 extern std::unique_ptr<query_context> qctx;
-
-template <typename... Args>
-static future<::shared_ptr<cql3::untyped_result_set>> execute_cql(sstring text, Args&&... args) {
-    assert(qctx);
-    return qctx->execute_cql(text, std::forward<Args>(args)...);
-}
-
-template <typename... Args>
-static future<::shared_ptr<cql3::untyped_result_set>> execute_cql_with_timeout(sstring cql,
-        db::timeout_clock::time_point timeout,
-        Args&&... args) {
-    assert(qctx);
-    return qctx->execute_cql_with_timeout(cql, timeout, std::forward<Args>(args)...);
-}
-
 }
--- a/db/schema_tables.cc
+++ b/db/schema_tables.cc
@@ -226,24 +226,24 @@ using namespace v3;

 using days = std::chrono::duration<int, std::ratio<24 * 3600>>;

-future<> save_system_schema(const sstring & ksname) {
-    auto& ks = db::qctx->db().find_keyspace(ksname);
+future<> save_system_schema(cql3::query_processor& qp, const sstring & ksname) {
+    auto& ks = qp.db().find_keyspace(ksname);
    auto ksm = ks.metadata();

    // delete old, possibly obsolete entries in schema tables
    return parallel_for_each(all_table_names(schema_features::full()), [ksm] (sstring cf) {
        auto deletion_timestamp = schema_creation_timestamp() - 1;
-        return db::execute_cql(format("DELETE FROM {}.{} USING TIMESTAMP {} WHERE keyspace_name = ?", NAME, cf,
+        return qctx->execute_cql(format("DELETE FROM {}.{} USING TIMESTAMP {} WHERE keyspace_name = ?", NAME, cf,
            deletion_timestamp), ksm->name()).discard_result();
-    }).then([ksm] {
+    }).then([ksm, &qp] {
        auto mvec  = make_create_keyspace_mutations(ksm, schema_creation_timestamp(), true);
-        return qctx->proxy().mutate_locally(std::move(mvec), tracing::trace_state_ptr());
+        return qp.proxy().mutate_locally(std::move(mvec), tracing::trace_state_ptr());
    });
 }

 /** add entries to system_schema.* for the hardcoded system definitions */
-future<> save_system_keyspace_schema() {
-    return save_system_schema(NAME);
+future<> save_system_keyspace_schema(cql3::query_processor& qp) {
+    return save_system_schema(qp, NAME);
 }

 namespace v3 {
@@ -1208,42 +1208,7 @@ static void merge_tables_and_views(distributed<service::storage_proxy>& proxy,
        return create_table_from_mutations(proxy, std::move(sm));
    });
    auto views_diff = diff_table_or_view(proxy, std::move(views_before), std::move(views_after), [&] (schema_mutations sm) {
-        // The view schema mutation should be created with reference to the base table schema because we definitely know it by now.
-        // If we don't do it we are leaving a window where write commands to this schema are illegal.
-        // There are 3 possibilities:
-        // 1. The table was altered - in this case we want the view to correspond to this new table schema.
-        // 2. The table was just created - the table is guarantied to be published with the view in that case.
-        // 3. The view itself was altered - in that case we already know the base table so we can take it from
-        //    the database object.
-        view_ptr vp = create_view_from_mutations(proxy, std::move(sm));
-        schema_ptr base_schema;
-        for (auto&& s : tables_diff.altered) {
-            if (s.new_schema.get()->ks_name() == vp->ks_name() && s.new_schema.get()->cf_name() == vp->view_info()->base_name() ) {
-                base_schema = s.new_schema;
-                break;
-            }
-        }
-        if (!base_schema) {
-            for (auto&& s : tables_diff.created) {
-                if (s.get()->ks_name() == vp->ks_name() && s.get()->cf_name() == vp->view_info()->base_name() ) {
-                    base_schema = s;
-                    break;
-                }
-            }
-        }
-
-        if (!base_schema) {
-            base_schema = proxy.local().local_db().find_schema(vp->ks_name(), vp->view_info()->base_name());
-        }
-
-        // Now when we have a referenced base - just in case we are registering an old view (this can happen in a mixed cluster)
-        // lets make it write enabled by updating it's compute columns.
-        view_ptr fixed_vp = maybe_fix_legacy_secondary_index_mv_schema(proxy.local().get_db().local(), vp, base_schema, preserve_version::yes);
-        if(fixed_vp) {
-            vp = fixed_vp;
-        }
-        vp->view_info()->set_base_info(vp->view_info()->make_base_dependent_view_info(*base_schema));
-        return vp;
+        return create_view_from_mutations(proxy, std::move(sm));
    });

    proxy.local().get_db().invoke_on_all([&] (database& db) {
@@ -3068,7 +3033,8 @@ std::vector<sstring> all_table_names(schema_features features) {
           boost::adaptors::transformed([] (auto schema) { return schema->cf_name(); }));
 }

-view_ptr maybe_fix_legacy_secondary_index_mv_schema(database& db, const view_ptr& v, schema_ptr base_schema, preserve_version preserve_version) {
+future<> maybe_update_legacy_secondary_index_mv_schema(service::migration_manager& mm, database& db, view_ptr v) {
+    // TODO(sarna): Remove once computed columns are guaranteed to be featured in the whole cluster.
    // Legacy format for a secondary index used a hardcoded "token" column, which ensured a proper
    // order for indexed queries. This "token" column is now implemented as a computed column,
    // but for the sake of compatibility we assume that there might be indexes created in the legacy
@@ -3076,32 +3042,26 @@ view_ptr maybe_fix_legacy_secondary_index_mv_schema(database& db, const view_ptr
    // columns marked as computed (because they were either created on a node that supports computed
    // columns or were fixed by this utility function), it's safe to remove this function altogether.
    if (v->clustering_key_size() == 0) {
-        return view_ptr(nullptr);
+        return make_ready_future<>();
    }
    const column_definition& first_view_ck = v->clustering_key_columns().front();
    if (first_view_ck.is_computed()) {
-        return view_ptr(nullptr);
-    }
-
-    if (!base_schema) {
-        base_schema = db.find_schema(v->view_info()->base_id());
+        return make_ready_future<>();
    }

+    table& base = db.find_column_family(v->view_info()->base_id());
+    schema_ptr base_schema = base.schema();
    // If the first clustering key part of a view is a column with name not found in base schema,
    // it implies it might be backing an index created before computed columns were introduced,
    // and as such it must be recreated properly.
    if (!base_schema->columns_by_name().contains(first_view_ck.name())) {
        schema_builder builder{schema_ptr(v)};
-        builder.mark_column_computed(first_view_ck.name(), std::make_unique<token_column_computation>());
-        if (preserve_version) {
-            builder.with_version(v->version());
-        }
-        return view_ptr(builder.build());
+        builder.mark_column_computed(first_view_ck.name(), std::make_unique<legacy_token_column_computation>());
+        return mm.announce_view_update(view_ptr(builder.build()), true);
    }
-    return view_ptr(nullptr);
+    return make_ready_future<>();
 }

-
 namespace legacy {

 table_schema_version schema_mutations::digest() const {
@@ -3130,10 +3090,9 @@ static auto GET_COLUMN_MAPPING_QUERY = format("SELECT column_name, clustering_or
    db::schema_tables::SCYLLA_TABLE_SCHEMA_HISTORY);

 future<column_mapping> get_column_mapping(utils::UUID table_id, table_schema_version version) {
-    auto cm_fut = cql3::get_local_query_processor().execute_internal(
+    auto cm_fut = qctx->qp().execute_internal(
        GET_COLUMN_MAPPING_QUERY,
        db::consistency_level::LOCAL_ONE,
-        infinite_timeout_config,
        {table_id, version}
    );
    return cm_fut.then([version] (shared_ptr<cql3::untyped_result_set> results) {
@@ -3173,10 +3132,9 @@ future<column_mapping> get_column_mapping(utils::UUID table_id, table_schema_ver
 }

 future<bool> column_mapping_exists(utils::UUID table_id, table_schema_version version) {
-    return cql3::get_local_query_processor().execute_internal(
+    return qctx->qp().execute_internal(
        GET_COLUMN_MAPPING_QUERY,
        db::consistency_level::LOCAL_ONE,
-        infinite_timeout_config,
        {table_id, version}
    ).then([] (shared_ptr<cql3::untyped_result_set> results) {
        return !results->empty();
@@ -3187,12 +3145,11 @@ future<> drop_column_mapping(utils::UUID table_id, table_schema_version version)
    const static sstring DEL_COLUMN_MAPPING_QUERY =
        format("DELETE FROM system.{} WHERE cf_id = ? and schema_version = ?",
            db::schema_tables::SCYLLA_TABLE_SCHEMA_HISTORY);
-    return cql3::get_local_query_processor().execute_internal(
+    return qctx->qp().execute_internal(
        DEL_COLUMN_MAPPING_QUERY,
        db::consistency_level::LOCAL_ONE,
-        infinite_timeout_config,
        {table_id, version}).discard_result();
 }

 } // namespace schema_tables
-} // namespace schema
+} // namespace schema
--- a/db/schema_tables.hh
+++ b/db/schema_tables.hh
@@ -161,10 +161,10 @@ std::vector<schema_ptr> all_tables(schema_features);
 std::vector<sstring> all_table_names(schema_features);

 // saves/creates "ks" + all tables etc, while first deleting all old schema entries (will be rewritten)
-future<> save_system_schema(const sstring & ks);
+future<> save_system_schema(cql3::query_processor& qp, const sstring & ks);

 // saves/creates "system_schema" keyspace
-future<> save_system_keyspace_schema();
+future<> save_system_keyspace_schema(cql3::query_processor& qp);

 future<utils::UUID> calculate_schema_digest(distributed<service::storage_proxy>& proxy, schema_features);

@@ -238,9 +238,7 @@ std::vector<mutation> make_update_view_mutations(lw_shared_ptr<keyspace_metadata

 std::vector<mutation> make_drop_view_mutations(lw_shared_ptr<keyspace_metadata> keyspace, view_ptr view, api::timestamp_type timestamp);

-class preserve_version_tag {};
-using preserve_version = bool_class<preserve_version_tag>;
-view_ptr maybe_fix_legacy_secondary_index_mv_schema(database& db, const view_ptr& v, schema_ptr base_schema, preserve_version preserve_version);
+future<> maybe_update_legacy_secondary_index_mv_schema(service::migration_manager& mm, database& db, view_ptr v);

 sstring serialize_kind(column_kind kind);
 column_kind deserialize_kind(sstring kind);
--- a/db/size_estimates_virtual_reader.cc
+++ b/db/size_estimates_virtual_reader.cc
@@ -67,7 +67,14 @@ struct virtual_row_comparator {
 };

 // Iterating over the cartesian product of cf_names and token_ranges.
-class virtual_row_iterator : public std::iterator<std::input_iterator_tag, const virtual_row> {
+class virtual_row_iterator {
+public:
+    using iterator_category = std::input_iterator_tag;
+    using value_type = const virtual_row;
+    using difference_type = std::ptrdiff_t;
+    using pointer = const virtual_row*;
+    using reference = const virtual_row&;
+private:
    std::reference_wrapper<const std::vector<bytes>> _cf_names;
    std::reference_wrapper<const std::vector<token_range>> _ranges;
    size_t _cf_names_idx = 0;
@@ -201,10 +208,10 @@ static future<std::vector<token_range>> get_local_ranges(database& db) {
        // All queries will be on that table, where all entries are text and there's no notion of
        // token ranges form the CQL point of view.
        auto left_inf = boost::find_if(ranges, [] (auto&& r) {
-            return r.end() && (!r.start() || r.start()->value() == dht::minimum_token());
+            return !r.start() || r.start()->value() == dht::minimum_token();
        });
        auto right_inf = boost::find_if(ranges, [] (auto&& r) {
-            return r.start() && (!r.end() || r.end()->value() == dht::maximum_token());
+            return !r.end() || r.start()->value() == dht::maximum_token();
        });
        if (left_inf != right_inf && left_inf != ranges.end() && right_inf != ranges.end()) {
            local_ranges.push_back(token_range{to_bytes(right_inf->start()), to_bytes(left_inf->end())});
--- a/db/snapshot-ctl.cc
+++ b/db/snapshot-ctl.cc
@@ -43,13 +43,9 @@

 namespace db {

-future<> snapshot_ctl::check_snapshot_not_exist(sstring ks_name, sstring name, std::optional<std::vector<sstring>> filter) {
+future<> snapshot_ctl::check_snapshot_not_exist(sstring ks_name, sstring name) {
    auto& ks = _db.local().find_keyspace(ks_name);
-    return parallel_for_each(ks.metadata()->cf_meta_data(), [this, ks_name = std::move(ks_name), name = std::move(name), filter = std::move(filter)] (auto& pair) {
-        auto& cf_name = pair.first;
-        if (filter && std::find(filter->begin(), filter->end(), cf_name) == filter->end()) {
-            return make_ready_future<>();
-        }        
+    return parallel_for_each(ks.metadata()->cf_meta_data(), [this, ks_name = std::move(ks_name), name = std::move(name)] (auto& pair) {
        auto& cf = _db.local().find_column_family(pair.second);
        return cf.snapshot_exists(name).then([ks_name = std::move(ks_name), name] (bool exists) {
            if (exists) {
@@ -115,7 +111,7 @@ future<> snapshot_ctl::take_column_family_snapshot(sstring ks_name, std::vector<
    }

    return run_snapshot_modify_operation([this, ks_name = std::move(ks_name), tables = std::move(tables), tag = std::move(tag)] {
-        return check_snapshot_not_exist(ks_name, tag, tables).then([this, ks_name, tables, tag] {
+        return check_snapshot_not_exist(ks_name, tag).then([this, ks_name, tables = std::move(tables), tag] {
            return do_with(std::vector<sstring>(std::move(tables)),[this, ks_name, tag](const std::vector<sstring>& tables) {
                return do_for_each(tables, [ks_name, tag, this] (const sstring& table_name) {
                    if (table_name.find(".") != sstring::npos) {
--- a/db/snapshot-ctl.hh
+++ b/db/snapshot-ctl.hh
@@ -40,8 +40,6 @@

 #pragma once

-#include <vector>
-
 #include <seastar/core/sharded.hh>
 #include <seastar/core/future.hh>
 #include "database.hh"
@@ -114,7 +112,7 @@ private:
    seastar::rwlock _lock;
    seastar::gate _ops;

-    future<> check_snapshot_not_exist(sstring ks_name, sstring name, std::optional<std::vector<sstring>> filter = {});
+    future<> check_snapshot_not_exist(sstring ks_name, sstring name);

    template <typename Func>
    std::result_of_t<Func()> run_snapshot_modify_operation(Func&&);
--- a/db/system_distributed_keyspace.cc
+++ b/db/system_distributed_keyspace.cc
@@ -155,17 +155,20 @@ future<> system_distributed_keyspace::stop() {
    return make_ready_future<>();
 }

-static const timeout_config internal_distributed_timeout_config = [] {
+static service::query_state& internal_distributed_query_state() {
    using namespace std::chrono_literals;
    const auto t = 10s;
-    return timeout_config{ t, t, t, t, t, t, t };
-}();
+    static timeout_config tc{ t, t, t, t, t, t, t };
+    static thread_local service::client_state cs(service::client_state::internal_tag{}, tc);
+    static thread_local service::query_state qs(cs, empty_service_permit());
+    return qs;
+};

 future<std::unordered_map<utils::UUID, sstring>> system_distributed_keyspace::view_status(sstring ks_name, sstring view_name) const {
    return _qp.execute_internal(
            format("SELECT host_id, status FROM {}.{} WHERE keyspace_name = ? AND view_name = ?", NAME, VIEW_BUILD_STATUS),
            db::consistency_level::ONE,
-            internal_distributed_timeout_config,
+            internal_distributed_query_state(),
            { std::move(ks_name), std::move(view_name) },
            false).then([this] (::shared_ptr<cql3::untyped_result_set> cql_result) {
        return boost::copy_range<std::unordered_map<utils::UUID, sstring>>(*cql_result
@@ -182,7 +185,7 @@ future<> system_distributed_keyspace::start_view_build(sstring ks_name, sstring
        return _qp.execute_internal(
                format("INSERT INTO {}.{} (keyspace_name, view_name, host_id, status) VALUES (?, ?, ?, ?)", NAME, VIEW_BUILD_STATUS),
                db::consistency_level::ONE,
-                internal_distributed_timeout_config,
+                internal_distributed_query_state(),
                { std::move(ks_name), std::move(view_name), std::move(host_id), "STARTED" },
                false).discard_result();
    });
@@ -193,7 +196,7 @@ future<> system_distributed_keyspace::finish_view_build(sstring ks_name, sstring
        return _qp.execute_internal(
                format("UPDATE {}.{} SET status = ? WHERE keyspace_name = ? AND view_name = ? AND host_id = ?", NAME, VIEW_BUILD_STATUS),
                db::consistency_level::ONE,
-                internal_distributed_timeout_config,
+                internal_distributed_query_state(),
                { "SUCCESS", std::move(ks_name), std::move(view_name), std::move(host_id) },
                false).discard_result();
    });
@@ -203,7 +206,7 @@ future<> system_distributed_keyspace::remove_view(sstring ks_name, sstring view_
    return _qp.execute_internal(
            format("DELETE FROM {}.{} WHERE keyspace_name = ? AND view_name = ?", NAME, VIEW_BUILD_STATUS),
            db::consistency_level::ONE,
-            internal_distributed_timeout_config,
+            internal_distributed_query_state(),
            { std::move(ks_name), std::move(view_name) },
            false).discard_result();
 }
@@ -281,7 +284,7 @@ system_distributed_keyspace::insert_cdc_topology_description(
    return _qp.execute_internal(
            format("INSERT INTO {}.{} (time, description) VALUES (?,?)", NAME, CDC_TOPOLOGY_DESCRIPTION),
            quorum_if_many(ctx.num_token_owners),
-            internal_distributed_timeout_config,
+            internal_distributed_query_state(),
            { time, make_list_value(cdc_generation_description_type, prepare_cdc_generation_description(description)) },
            false).discard_result();
 }
@@ -293,7 +296,7 @@ system_distributed_keyspace::read_cdc_topology_description(
    return _qp.execute_internal(
            format("SELECT description FROM {}.{} WHERE time = ?", NAME, CDC_TOPOLOGY_DESCRIPTION),
            quorum_if_many(ctx.num_token_owners),
-            internal_distributed_timeout_config,
+            internal_distributed_query_state(),
            { time },
            false
    ).then([] (::shared_ptr<cql3::untyped_result_set> cql_result) -> std::optional<cdc::topology_description> {
@@ -321,7 +324,7 @@ system_distributed_keyspace::expire_cdc_topology_description(
    return _qp.execute_internal(
            format("UPDATE {}.{} SET expired = ? WHERE time = ?", NAME, CDC_TOPOLOGY_DESCRIPTION),
            quorum_if_many(ctx.num_token_owners),
-            internal_distributed_timeout_config,
+            internal_distributed_query_state(),
            { expiration_time, streams_ts },
            false).discard_result();
 }
@@ -342,7 +345,7 @@ system_distributed_keyspace::create_cdc_desc(
    return _qp.execute_internal(
            format("INSERT INTO {}.{} (time, streams) VALUES (?,?)", NAME, CDC_DESC),
            quorum_if_many(ctx.num_token_owners),
-            internal_distributed_timeout_config,
+            internal_distributed_query_state(),
            { time, make_set_value(cdc_streams_set_type, prepare_cdc_streams(streams)) },
            false).discard_result();
 }
@@ -355,7 +358,7 @@ system_distributed_keyspace::expire_cdc_desc(
    return _qp.execute_internal(
            format("UPDATE {}.{} SET expired = ? WHERE time = ?", NAME, CDC_DESC),
            quorum_if_many(ctx.num_token_owners),
-            internal_distributed_timeout_config,
+            internal_distributed_query_state(),
            { expiration_time, streams_ts },
            false).discard_result();
 }
@@ -367,7 +370,7 @@ system_distributed_keyspace::cdc_desc_exists(
    return _qp.execute_internal(
            format("SELECT time FROM {}.{} WHERE time = ?", NAME, CDC_DESC),
            quorum_if_many(ctx.num_token_owners),
-            internal_distributed_timeout_config,
+            internal_distributed_query_state(),
            { streams_ts },
            false
    ).then([] (::shared_ptr<cql3::untyped_result_set> cql_result) -> bool {
@@ -380,7 +383,7 @@ system_distributed_keyspace::cdc_get_versioned_streams(context ctx) {
    return _qp.execute_internal(
            format("SELECT * FROM {}.{}", NAME, CDC_DESC),
            quorum_if_many(ctx.num_token_owners),
-            internal_distributed_timeout_config,
+            internal_distributed_query_state(),
            {},
            false
    ).then([] (::shared_ptr<cql3::untyped_result_set> cql_result) {
--- a/db/system_keyspace.cc
+++ b/db/system_keyspace.cc
@@ -1157,20 +1157,20 @@ schema_ptr aggregates() {

 } //</legacy>

-static future<> setup_version(distributed<gms::feature_service>& feat, sharded<netw::messaging_service>& ms) {
-    return gms::inet_address::lookup(qctx->db().get_config().rpc_address()).then([&feat, &ms](gms::inet_address a) {
+static future<> setup_version(distributed<gms::feature_service>& feat, sharded<netw::messaging_service>& ms, const db::config& cfg) {
+    return gms::inet_address::lookup(cfg.rpc_address()).then([&feat, &ms, &cfg](gms::inet_address a) {
        sstring req = sprint("INSERT INTO system.%s (key, release_version, cql_version, thrift_version, native_protocol_version, data_center, rack, partitioner, rpc_address, broadcast_address, listen_address, supported_features) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)"
                        , db::system_keyspace::LOCAL);
        auto& snitch = locator::i_endpoint_snitch::get_local_snitch_ptr();

-        return execute_cql(req, sstring(db::system_keyspace::LOCAL),
+        return qctx->execute_cql(req, sstring(db::system_keyspace::LOCAL),
                             version::release(),
                             cql3::query_processor::CQL_VERSION,
                             ::cassandra::thrift_version,
                             to_sstring(cql_serialization_format::latest_version),
                             snitch->get_datacenter(utils::fb_utilities::get_broadcast_address()),
                             snitch->get_rack(utils::fb_utilities::get_broadcast_address()),
-                             sstring(qctx->db().get_config().partitioner()),
+                             sstring(cfg.partitioner()),
                             a.addr(),
                             utils::fb_utilities::get_broadcast_address().addr(),
                             ms.local().listen_address().addr(),
@@ -1179,7 +1179,7 @@ static future<> setup_version(distributed<gms::feature_service>& feat, sharded<n
    });
 }

-future<> check_health();
+future<> check_health(const sstring& cluster_name);
 future<> force_blocking_flush(sstring cfname);

 // Changing the real load_dc_rack_info into a future would trigger a tidal wave of futurization that would spread
@@ -1199,7 +1199,7 @@ struct local_cache {
 static distributed<local_cache> _local_cache;

 static future<> build_dc_rack_info() {
-    return execute_cql(format("SELECT peer, data_center, rack from system.{}", PEERS)).then([] (::shared_ptr<cql3::untyped_result_set> msg) {
+    return qctx->execute_cql(format("SELECT peer, data_center, rack from system.{}", PEERS)).then([] (::shared_ptr<cql3::untyped_result_set> msg) {
        return do_for_each(*msg, [] (auto& row) {
            net::inet_address peer = row.template get_as<net::inet_address>("peer");
            if (!row.has("data_center") || !row.has("rack")) {
@@ -1221,7 +1221,7 @@ static future<> build_dc_rack_info() {

 static future<> build_bootstrap_info() {
    sstring req = format("SELECT bootstrapped FROM system.{} WHERE key = ? ", LOCAL);
-    return execute_cql(req, sstring(LOCAL)).then([] (auto msg) {
+    return qctx->execute_cql(req, sstring(LOCAL)).then([] (auto msg) {
        static auto state_map = std::unordered_map<sstring, bootstrap_state>({
            { "NEEDS_BOOTSTRAP", bootstrap_state::NEEDS_BOOTSTRAP },
            { "COMPLETED", bootstrap_state::COMPLETED },
@@ -1255,8 +1255,8 @@ future<> deinit_local_cache() {
    return _local_cache.stop();
 }

-void minimal_setup(distributed<database>& db, distributed<cql3::query_processor>& qp) {
-    qctx = std::make_unique<query_context>(db, qp);
+void minimal_setup(distributed<cql3::query_processor>& qp) {
+    qctx = std::make_unique<query_context>(qp);
 }

 static future<> cache_truncation_record(distributed<database>& db);
@@ -1265,8 +1265,8 @@ future<> setup(distributed<database>& db,
               distributed<cql3::query_processor>& qp,
               distributed<gms::feature_service>& feat,
               sharded<netw::messaging_service>& ms) {
-    minimal_setup(db, qp);
-    return setup_version(feat, ms).then([&db] {
+    const db::config& cfg = db.local().get_config();
+    return setup_version(feat, ms, cfg).then([&db] {
        return update_schema_version(db.local().get_version());
    }).then([] {
        return init_local_cache();
@@ -1274,13 +1274,13 @@ future<> setup(distributed<database>& db,
        return build_dc_rack_info();
    }).then([] {
        return build_bootstrap_info();
-    }).then([] {
-        return check_health();
-    }).then([] {
-        return db::schema_tables::save_system_keyspace_schema();
-    }).then([] {
+    }).then([&cfg] {
+        return check_health(cfg.cluster_name());
+    }).then([&qp] {
+        return db::schema_tables::save_system_keyspace_schema(qp.local());
+    }).then([&qp] {
        // #2514 - make sure "system" is written to system_schema.keyspaces.
-        return db::schema_tables::save_system_schema(NAME);
+        return db::schema_tables::save_system_schema(qp.local(), NAME);
    }).then([&db] {
        return cache_truncation_record(db);
    }).then([&ms] {
@@ -1314,16 +1314,6 @@ typedef std::unordered_map<truncation_key, truncation_record> truncation_map;

 static constexpr uint8_t current_version = 1;

-/**
- * This method is used to remove information about truncation time for specified column family
- */
-future<> remove_truncation_record(utils::UUID id) {
-    sstring req = format("DELETE * from system.{} WHERE table_uuid = ?", TRUNCATED);
-    return qctx->qp().execute_internal(req, {id}).discard_result().then([] {
-        return force_blocking_flush(TRUNCATED);
-    });
-}
-
 static future<truncation_record> get_truncation_record(utils::UUID cf_id) {
    sstring req = format("SELECT * from system.{} WHERE table_uuid = ?", TRUNCATED);
    return qctx->qp().execute_internal(req, {cf_id}).then([cf_id](::shared_ptr<cql3::untyped_result_set> rs) {
@@ -1350,16 +1340,13 @@ static future<> cache_truncation_record(distributed<database>& db) {
            auto table_uuid = row.get_as<utils::UUID>("table_uuid");
            auto ts = row.get_as<db_clock::time_point>("truncated_at");

-            auto cpus = boost::irange(0u, smp::count);
-            return parallel_for_each(cpus.begin(), cpus.end(), [table_uuid, ts, &db] (unsigned int c) mutable {
-                return smp::submit_to(c, [table_uuid, ts, &db] () mutable {
-                    try {
-                        table& cf = db.local().find_column_family(table_uuid);
-                        cf.cache_truncation_record(ts);
-                    } catch (no_such_column_family&) {
-                        slogger.debug("Skip caching truncation time for {} since the table is no longer present", table_uuid);
-                    }
-                });
+            return db.invoke_on_all([table_uuid, ts] (database& db) mutable {
+                try {
+                    table& cf = db.find_column_family(table_uuid);
+                    cf.cache_truncation_record(ts);
+                } catch (no_such_column_family&) {
+                    slogger.debug("Skip caching truncation time for {} since the table is no longer present", table_uuid);
+                }
            });
        });
    });
@@ -1425,7 +1412,7 @@ future<> update_tokens(gms::inet_address ep, const std::unordered_set<dht::token

    sstring req = format("INSERT INTO system.{} (peer, tokens) VALUES (?, ?)", PEERS);
    auto set_type = set_type_impl::get_instance(utf8_type, true);
-    return execute_cql(req, ep.addr(), make_set_value(set_type, prepare_tokens(tokens))).discard_result().then([] {
+    return qctx->execute_cql(req, ep.addr(), make_set_value(set_type, prepare_tokens(tokens))).discard_result().then([] {
        return force_blocking_flush(PEERS);
    });
 }
@@ -1433,7 +1420,7 @@ future<> update_tokens(gms::inet_address ep, const std::unordered_set<dht::token

 future<std::unordered_map<gms::inet_address, std::unordered_set<dht::token>>> load_tokens() {
    sstring req = format("SELECT peer, tokens FROM system.{}", PEERS);
-    return execute_cql(req).then([] (::shared_ptr<cql3::untyped_result_set> cql_result) {
+    return qctx->execute_cql(req).then([] (::shared_ptr<cql3::untyped_result_set> cql_result) {
        std::unordered_map<gms::inet_address, std::unordered_set<dht::token>> ret;
        for (auto& row : *cql_result) {
            auto peer = gms::inet_address(row.get_as<net::inet_address>("peer"));
@@ -1451,7 +1438,7 @@ future<std::unordered_map<gms::inet_address, std::unordered_set<dht::token>>> lo

 future<std::unordered_map<gms::inet_address, utils::UUID>> load_host_ids() {
    sstring req = format("SELECT peer, host_id FROM system.{}", PEERS);
-    return execute_cql(req).then([] (::shared_ptr<cql3::untyped_result_set> cql_result) {
+    return qctx->execute_cql(req).then([] (::shared_ptr<cql3::untyped_result_set> cql_result) {
        std::unordered_map<gms::inet_address, utils::UUID> ret;
        for (auto& row : *cql_result) {
            auto peer = gms::inet_address(row.get_as<net::inet_address>("peer"));
@@ -1465,7 +1452,7 @@ future<std::unordered_map<gms::inet_address, utils::UUID>> load_host_ids() {

 future<std::unordered_map<gms::inet_address, sstring>> load_peer_features() {
    sstring req = format("SELECT peer, supported_features FROM system.{}", PEERS);
-    return execute_cql(req).then([] (::shared_ptr<cql3::untyped_result_set> cql_result) {
+    return qctx->execute_cql(req).then([] (::shared_ptr<cql3::untyped_result_set> cql_result) {
        std::unordered_map<gms::inet_address, sstring> ret;
        for (auto& row : *cql_result) {
            if (row.has("supported_features")) {
@@ -1479,14 +1466,14 @@ future<std::unordered_map<gms::inet_address, sstring>> load_peer_features() {

 future<> update_preferred_ip(gms::inet_address ep, gms::inet_address preferred_ip) {
    sstring req = format("INSERT INTO system.{} (peer, preferred_ip) VALUES (?, ?)", PEERS);
-    return execute_cql(req, ep.addr(), preferred_ip.addr()).discard_result().then([] {
+    return qctx->execute_cql(req, ep.addr(), preferred_ip.addr()).discard_result().then([] {
        return force_blocking_flush(PEERS);
    });
 }

 future<std::unordered_map<gms::inet_address, gms::inet_address>> get_preferred_ips() {
    sstring req = format("SELECT peer, preferred_ip FROM system.{}", PEERS);
-    return execute_cql(req).then([] (::shared_ptr<cql3::untyped_result_set> cql_res_set) {
+    return qctx->execute_cql(req).then([] (::shared_ptr<cql3::untyped_result_set> cql_res_set) {
        std::unordered_map<gms::inet_address, gms::inet_address> res;

        for (auto& r : *cql_res_set) {
@@ -1527,7 +1514,7 @@ future<> update_peer_info(gms::inet_address ep, sstring column_name, Value value

    return update_cached_values(ep, column_name, value).then([ep, column_name, value] {
        sstring req = format("INSERT INTO system.{} (peer, {}) VALUES (?, ?)", PEERS, column_name);
-        return execute_cql(req, ep.addr(), value).discard_result();
+        return qctx->execute_cql(req, ep.addr(), value).discard_result();
    });
 }
 // sets are not needed, since tokens are updated by another method
@@ -1535,20 +1522,14 @@ template future<> update_peer_info<sstring>(gms::inet_address ep, sstring column
 template future<> update_peer_info<utils::UUID>(gms::inet_address ep, sstring column_name, utils::UUID);
 template future<> update_peer_info<net::inet_address>(gms::inet_address ep, sstring column_name, net::inet_address);

-future<> update_hints_dropped(gms::inet_address ep, utils::UUID time_period, int value) {
-    // with 30 day TTL
-    sstring req = format("UPDATE system.{} USING TTL 2592000 SET hints_dropped[ ? ] = ? WHERE peer = ?", PEER_EVENTS);
-    return execute_cql(req, time_period, value, ep.addr()).discard_result();
-}
-
 future<> set_scylla_local_param(const sstring& key, const sstring& value) {
    sstring req = format("UPDATE system.{} SET value = ? WHERE key = ?", SCYLLA_LOCAL);
-    return execute_cql(req, value, key).discard_result();
+    return qctx->execute_cql(req, value, key).discard_result();
 }

 future<std::optional<sstring>> get_scylla_local_param(const sstring& key){
    sstring req = format("SELECT value FROM system.{} WHERE key = ?", SCYLLA_LOCAL);
-    return execute_cql(req, key).then([] (::shared_ptr<cql3::untyped_result_set> res) {
+    return qctx->execute_cql(req, key).then([] (::shared_ptr<cql3::untyped_result_set> res) {
        if (res->empty() || !res->one().has("value")) {
            return std::optional<sstring>();
        }
@@ -1558,7 +1539,7 @@ future<std::optional<sstring>> get_scylla_local_param(const sstring& key){

 future<> update_schema_version(utils::UUID version) {
    sstring req = format("INSERT INTO system.{} (key, schema_version) VALUES (?, ?)", LOCAL);
-    return execute_cql(req, sstring(LOCAL), version).discard_result();
+    return qctx->execute_cql(req, sstring(LOCAL), version).discard_result();
 }

 /**
@@ -1569,7 +1550,7 @@ future<> remove_endpoint(gms::inet_address ep) {
        lc._cached_dc_rack_info.erase(ep);
    }).then([ep] {
        sstring req = format("DELETE FROM system.{} WHERE peer = ?", PEERS);
-        return execute_cql(req, ep.addr()).discard_result();
+        return qctx->execute_cql(req, ep.addr()).discard_result();
    }).then([] {
        return force_blocking_flush(PEERS);
    });
@@ -1582,23 +1563,22 @@ future<> update_tokens(const std::unordered_set<dht::token>& tokens) {

    sstring req = format("INSERT INTO system.{} (key, tokens) VALUES (?, ?)", LOCAL);
    auto set_type = set_type_impl::get_instance(utf8_type, true);
-    return execute_cql(req, sstring(LOCAL), make_set_value(set_type, prepare_tokens(tokens))).discard_result().then([] {
+    return qctx->execute_cql(req, sstring(LOCAL), make_set_value(set_type, prepare_tokens(tokens))).discard_result().then([] {
        return force_blocking_flush(LOCAL);
    });
 }

 future<> update_cdc_streams_timestamp(db_clock::time_point tp) {
-    return execute_cql(format("INSERT INTO system.{} (key, streams_timestamp) VALUES (?, ?)",
+    return qctx->execute_cql(format("INSERT INTO system.{} (key, streams_timestamp) VALUES (?, ?)",
                v3::CDC_LOCAL), sstring(v3::CDC_LOCAL), tp)
            .discard_result().then([] { return force_blocking_flush(v3::CDC_LOCAL); });
 }

 future<> force_blocking_flush(sstring cfname) {
    assert(qctx);
-    return qctx->_db.invoke_on_all([cfname = std::move(cfname)](database& db) {
+    return qctx->_qp.invoke_on_all([cfname = std::move(cfname)] (cql3::query_processor& qp) {
        // if (!Boolean.getBoolean("cassandra.unsafesystem"))
-        column_family& cf = db.find_column_family(NAME, cfname);
-        return cf.flush();
+        return qp.db().flush(NAME, cfname);
    });
 }

@@ -1608,17 +1588,16 @@ future<> force_blocking_flush(sstring cfname) {
 * 2. no files are there: great (new node is assumed)
 * 3. files are present but you can't read them: bad
 */
-future<> check_health() {
+future<> check_health(const sstring& cluster_name) {
    using namespace cql_transport::messages;
    sstring req = format("SELECT cluster_name FROM system.{} WHERE key=?", LOCAL);
-    return execute_cql(req, sstring(LOCAL)).then([] (::shared_ptr<cql3::untyped_result_set> msg) {
+    return qctx->execute_cql(req, sstring(LOCAL)).then([&cluster_name] (::shared_ptr<cql3::untyped_result_set> msg) {
        if (msg->empty() || !msg->one().has("cluster_name")) {
            // this is a brand new node
            sstring ins_req = format("INSERT INTO system.{} (key, cluster_name) VALUES (?, ?)", LOCAL);
-            return execute_cql(ins_req, sstring(LOCAL), qctx->db().get_config().cluster_name()).discard_result();
+            return qctx->execute_cql(ins_req, sstring(LOCAL), cluster_name).discard_result();
        } else {
            auto saved_cluster_name = msg->one().get_as<sstring>("cluster_name");
-            auto cluster_name = qctx->db().get_config().cluster_name();

            if (cluster_name != saved_cluster_name) {
                throw exceptions::configuration_exception("Saved cluster name " + saved_cluster_name + " != configured name " + cluster_name);
@@ -1631,7 +1610,7 @@ future<> check_health() {

 future<std::unordered_set<dht::token>> get_saved_tokens() {
    sstring req = format("SELECT tokens FROM system.{} WHERE key = ?", LOCAL);
-    return execute_cql(req, sstring(LOCAL)).then([] (auto msg) {
+    return qctx->execute_cql(req, sstring(LOCAL)).then([] (auto msg) {
        if (msg->empty() || !msg->one().has("tokens")) {
            return make_ready_future<std::unordered_set<dht::token>>();
        }
@@ -1657,7 +1636,7 @@ future<std::unordered_set<dht::token>> get_local_tokens() {
 }

 future<std::optional<db_clock::time_point>> get_saved_cdc_streams_timestamp() {
-    return execute_cql(format("SELECT streams_timestamp FROM system.{} WHERE key = ?", v3::CDC_LOCAL), sstring(v3::CDC_LOCAL))
+    return qctx->execute_cql(format("SELECT streams_timestamp FROM system.{} WHERE key = ?", v3::CDC_LOCAL), sstring(v3::CDC_LOCAL))
            .then([] (::shared_ptr<cql3::untyped_result_set> msg)-> std::optional<db_clock::time_point> {
        if (msg->empty() || !msg->one().has("streams_timestamp")) {
            return {};
@@ -1694,7 +1673,7 @@ future<> set_bootstrap_state(bootstrap_state state) {
    sstring state_name = state_to_name.at(state);

    sstring req = format("INSERT INTO system.{} (key, bootstrapped) VALUES (?, ?)", LOCAL);
-    return execute_cql(req, sstring(LOCAL), state_name).discard_result().then([state] {
+    return qctx->execute_cql(req, sstring(LOCAL), state_name).discard_result().then([state] {
        return force_blocking_flush(LOCAL).then([state] {
            return _local_cache.invoke_on_all([state] (local_cache& lc) {
                lc._state = state;
@@ -1764,7 +1743,7 @@ void make(database& db, bool durable, bool volatile_testing_only) {
            // don't make system keyspace writes wait for user writes (if under pressure)
            kscfg.dirty_memory_manager = &db._system_dirty_memory_manager;
            keyspace _ks{ksm, std::move(kscfg)};
-            auto rs(locator::abstract_replication_strategy::create_replication_strategy(NAME, "LocalStrategy", db.get_token_metadata(), ksm->strategy_options()));
+            auto rs(locator::abstract_replication_strategy::create_replication_strategy(NAME, "LocalStrategy", db.get_shared_token_metadata(), ksm->strategy_options()));
            _ks.set_replication_strategy(std::move(rs));
            db.add_keyspace(ks_name, std::move(_ks));
        }
@@ -1784,7 +1763,7 @@ void make(database& db, bool durable, bool volatile_testing_only) {
 future<utils::UUID> get_local_host_id() {
    using namespace cql_transport::messages;
    sstring req = format("SELECT host_id FROM system.{} WHERE key=?", LOCAL);
-    return execute_cql(req, sstring(LOCAL)).then([] (::shared_ptr<cql3::untyped_result_set> msg) {
+    return qctx->execute_cql(req, sstring(LOCAL)).then([] (::shared_ptr<cql3::untyped_result_set> msg) {
        auto new_id = [] {
            auto host_id = utils::make_random_uuid();
            return set_local_host_id(host_id);
@@ -1800,7 +1779,7 @@ future<utils::UUID> get_local_host_id() {

 future<utils::UUID> set_local_host_id(const utils::UUID& host_id) {
    sstring req = format("INSERT INTO system.{} (key, host_id) VALUES (?, ?)", LOCAL);
-    return execute_cql(req, sstring(LOCAL), host_id).then([] (auto msg) {
+    return qctx->execute_cql(req, sstring(LOCAL), host_id).then([] (auto msg) {
        return force_blocking_flush(LOCAL);
    }).then([host_id] {
        return host_id;
@@ -1812,23 +1791,6 @@ load_dc_rack_info() {
    return _local_cache.local()._cached_dc_rack_info;
 }

-
-future<foreign_ptr<lw_shared_ptr<reconcilable_result>>>
-query_mutations(distributed<service::storage_proxy>& proxy, const sstring& cf_name) {
-    return query_mutations(proxy, db::system_keyspace::NAME, cf_name);
-}
-
-future<lw_shared_ptr<query::result_set>>
-query(distributed<service::storage_proxy>& proxy, const sstring& cf_name) {
-    return query(proxy, db::system_keyspace::NAME, cf_name);
-}
-
-future<lw_shared_ptr<query::result_set>>
-query(distributed<service::storage_proxy>& proxy, const sstring& cf_name, const dht::decorated_key& key, query::clustering_range row_range)
-{
-    return query(proxy, db::system_keyspace::NAME, cf_name, key, row_range);
-}
-
 future<foreign_ptr<lw_shared_ptr<reconcilable_result>>>
 query_mutations(distributed<service::storage_proxy>& proxy, const sstring& ks_name, const sstring& cf_name) {
    database& db = proxy.local().get_db().local();
@@ -1892,7 +1854,7 @@ future<> update_compaction_history(utils::UUID uuid, sstring ksname, sstring cfn
                    , COMPACTION_HISTORY);

    db_clock::time_point tp{db_clock::duration{compacted_at}};
-    return execute_cql(req, uuid, ksname, cfname, tp, bytes_in, bytes_out,
+    return qctx->execute_cql(req, uuid, ksname, cfname, tp, bytes_in, bytes_out,
                       make_map_value(map_type, prepare_rows_merged(rows_merged))).discard_result().handle_exception([] (auto ep) {
        slogger.error("update compaction history failed: {}: ignored", ep);
    });
@@ -1969,7 +1931,7 @@ mutation make_size_estimates_mutation(const sstring& ks, std::vector<range_estim
 future<> register_view_for_building(sstring ks_name, sstring view_name, const dht::token& token) {
    sstring req = format("INSERT INTO system.{} (keyspace_name, view_name, generation_number, cpu_id, first_token) VALUES (?, ?, ?, ?, ?)",
            v3::SCYLLA_VIEWS_BUILDS_IN_PROGRESS);
-    return execute_cql(
+    return qctx->execute_cql(
            std::move(req),
            std::move(ks_name),
            std::move(view_name),
@@ -1981,7 +1943,7 @@ future<> register_view_for_building(sstring ks_name, sstring view_name, const dh
 future<> update_view_build_progress(sstring ks_name, sstring view_name, const dht::token& token) {
    sstring req = format("INSERT INTO system.{} (keyspace_name, view_name, next_token, cpu_id) VALUES (?, ?, ?, ?)",
            v3::SCYLLA_VIEWS_BUILDS_IN_PROGRESS);
-    return execute_cql(
+    return qctx->execute_cql(
            std::move(req),
            std::move(ks_name),
            std::move(view_name),
@@ -1990,14 +1952,14 @@ future<> update_view_build_progress(sstring ks_name, sstring view_name, const dh
 }

 future<> remove_view_build_progress_across_all_shards(sstring ks_name, sstring view_name) {
-    return execute_cql(
+    return qctx->execute_cql(
            format("DELETE FROM system.{} WHERE keyspace_name = ? AND view_name = ?", v3::SCYLLA_VIEWS_BUILDS_IN_PROGRESS),
            std::move(ks_name),
            std::move(view_name)).discard_result();
 }

 future<> remove_view_build_progress(sstring ks_name, sstring view_name) {
-    return execute_cql(
+    return qctx->execute_cql(
            format("DELETE FROM system.{} WHERE keyspace_name = ? AND view_name = ? AND cpu_id = ?", v3::SCYLLA_VIEWS_BUILDS_IN_PROGRESS),
            std::move(ks_name),
            std::move(view_name),
@@ -2005,21 +1967,21 @@ future<> remove_view_build_progress(sstring ks_name, sstring view_name) {
 }

 future<> mark_view_as_built(sstring ks_name, sstring view_name) {
-    return execute_cql(
+    return qctx->execute_cql(
            format("INSERT INTO system.{} (keyspace_name, view_name) VALUES (?, ?)", v3::BUILT_VIEWS),
            std::move(ks_name),
            std::move(view_name)).discard_result();
 }

 future<> remove_built_view(sstring ks_name, sstring view_name) {
-    return execute_cql(
+    return qctx->execute_cql(
            format("DELETE FROM system.{} WHERE keyspace_name = ? AND view_name = ?", v3::BUILT_VIEWS),
            std::move(ks_name),
            std::move(view_name)).discard_result();
 }

 future<std::vector<view_name>> load_built_views() {
-    return execute_cql(format("SELECT * FROM system.{}", v3::BUILT_VIEWS)).then([] (::shared_ptr<cql3::untyped_result_set> cql_result) {
+    return qctx->execute_cql(format("SELECT * FROM system.{}", v3::BUILT_VIEWS)).then([] (::shared_ptr<cql3::untyped_result_set> cql_result) {
        return boost::copy_range<std::vector<view_name>>(*cql_result
                | boost::adaptors::transformed([] (const cql3::untyped_result_set::row& row) {
            auto ks_name = row.get_as<sstring>("keyspace_name");
@@ -2030,7 +1992,7 @@ future<std::vector<view_name>> load_built_views() {
 }

 future<std::vector<view_build_progress>> load_view_build_progress() {
-    return execute_cql(format("SELECT keyspace_name, view_name, first_token, next_token, cpu_id FROM system.{}",
+    return qctx->execute_cql(format("SELECT keyspace_name, view_name, first_token, next_token, cpu_id FROM system.{}",
            v3::SCYLLA_VIEWS_BUILDS_IN_PROGRESS)).then([] (::shared_ptr<cql3::untyped_result_set> cql_result) {
        std::vector<view_build_progress> progress;
        for (auto& row : *cql_result) {
@@ -2051,7 +2013,7 @@ future<std::vector<view_build_progress>> load_view_build_progress() {
        }
        return progress;
    }).handle_exception([] (const std::exception_ptr& eptr) {
-        slogger.error("Failed to load view build progress: {}", eptr);
+        slogger.warn("Failed to load view build progress: {}", eptr);
        return std::vector<view_build_progress>();
    });
 }
@@ -2061,7 +2023,7 @@ future<service::paxos::paxos_state> load_paxos_state(partition_key_view key, sch
    static auto cql = format("SELECT * FROM system.{} WHERE row_key = ? AND cf_id = ?", PAXOS);
    // FIXME: we need execute_cql_with_now()
    (void)now;
-    auto f = execute_cql_with_timeout(cql, timeout, to_legacy(*key.get_compound_type(*s), key.representation()), s->id());
+    auto f = qctx->execute_cql_with_timeout(cql, timeout, to_legacy(*key.get_compound_type(*s), key.representation()), s->id());
    return f.then([s, key = std::move(key)] (shared_ptr<cql3::untyped_result_set> results) mutable {
        if (results->empty()) {
            return service::paxos::paxos_state();
@@ -2100,7 +2062,7 @@ static int32_t paxos_ttl_sec(const schema& s) {

 future<> save_paxos_promise(const schema& s, const partition_key& key, const utils::UUID& ballot, db::timeout_clock::time_point timeout) {
    static auto cql = format("UPDATE system.{} USING TIMESTAMP ? AND TTL ? SET promise = ? WHERE row_key = ? AND cf_id = ?", PAXOS);
-    return execute_cql_with_timeout(cql,
+    return qctx->execute_cql_with_timeout(cql,
            timeout,
            utils::UUID_gen::micros_timestamp(ballot),
            paxos_ttl_sec(s),
@@ -2113,7 +2075,7 @@ future<> save_paxos_promise(const schema& s, const partition_key& key, const uti
 future<> save_paxos_proposal(const schema& s, const service::paxos::proposal& proposal, db::timeout_clock::time_point timeout) {
    static auto cql = format("UPDATE system.{} USING TIMESTAMP ? AND TTL ? SET promise = ?, proposal_ballot = ?, proposal = ? WHERE row_key = ? AND cf_id = ?", PAXOS);
    partition_key_view key = proposal.update.key();
-    return execute_cql_with_timeout(cql,
+    return qctx->execute_cql_with_timeout(cql,
            timeout,
            utils::UUID_gen::micros_timestamp(proposal.ballot),
            paxos_ttl_sec(s),
@@ -2135,7 +2097,7 @@ future<> save_paxos_decision(const schema& s, const service::paxos::proposal& de
    static auto cql = format("UPDATE system.{} USING TIMESTAMP ? AND TTL ? SET proposal_ballot = null, proposal = null,"
            " most_recent_commit_at = ?, most_recent_commit = ? WHERE row_key = ? AND cf_id = ?", PAXOS);
    partition_key_view key = decision.update.key();
-    return execute_cql_with_timeout(cql,
+    return qctx->execute_cql_with_timeout(cql,
            timeout,
            utils::UUID_gen::micros_timestamp(decision.ballot),
            paxos_ttl_sec(s),
@@ -2152,7 +2114,7 @@ future<> delete_paxos_decision(const schema& s, const partition_key& key, const
    // guarantees that if there is more recent round it will not be affected.
    static auto cql = format("DELETE most_recent_commit FROM system.{} USING TIMESTAMP ?  WHERE row_key = ? AND cf_id = ?", PAXOS);

-    return execute_cql_with_timeout(cql,
+    return qctx->execute_cql_with_timeout(cql,
            timeout,
            utils::UUID_gen::micros_timestamp(ballot),
            to_legacy(*key.get_compound_type(s), key.representation()),
--- a/db/system_keyspace.hh
+++ b/db/system_keyspace.hh
@@ -170,7 +170,7 @@ schema_ptr aggregates();
 table_schema_version generate_schema_version(utils::UUID table_id, uint16_t offset = 0);

 // Only for testing.
-void minimal_setup(distributed<database>& db, distributed<cql3::query_processor>& qp);
+void minimal_setup(distributed<cql3::query_processor>& qp);

 future<> init_local_cache();
 future<> deinit_local_cache();
@@ -203,29 +203,12 @@ future<> update_peer_info(gms::inet_address ep, sstring column_name, Value value

 future<> remove_endpoint(gms::inet_address ep);

-future<> update_hints_dropped(gms::inet_address ep, utils::UUID time_period, int value);
-
 future<> set_scylla_local_param(const sstring& key, const sstring& value);
 future<std::optional<sstring>> get_scylla_local_param(const sstring& key);

 std::vector<schema_ptr> all_tables();
 void make(database& db, bool durable, bool volatile_testing_only = false);

-future<foreign_ptr<lw_shared_ptr<reconcilable_result>>>
-query_mutations(distributed<service::storage_proxy>& proxy, const sstring& cf_name);
-
-// Returns all data from given system table.
-// Intended to be used by code which is not performance critical.
-future<lw_shared_ptr<query::result_set>> query(distributed<service::storage_proxy>& proxy, const sstring& cf_name);
-
-// Returns a slice of given system table.
-// Intended to be used by code which is not performance critical.
-future<lw_shared_ptr<query::result_set>> query(
-    distributed<service::storage_proxy>& proxy,
-    const sstring& cf_name,
-    const dht::decorated_key& key,
-    query::clustering_range row_ranges = query::clustering_range::make_open_ended_both_sides());
-
 /// overloads

 future<foreign_ptr<lw_shared_ptr<reconcilable_result>>>
@@ -414,7 +397,6 @@ enum class bootstrap_state {

    future<> save_truncation_record(utils::UUID, db_clock::time_point truncated_at, db::replay_position);
    future<> save_truncation_record(const column_family&, db_clock::time_point truncated_at, db::replay_position);
-    future<> remove_truncation_record(utils::UUID);
    future<replay_positions> get_truncated_position(utils::UUID);
    future<db::replay_position> get_truncated_position(utils::UUID, uint32_t shard);
    future<db_clock::time_point> get_truncated_at(utils::UUID);
--- a/db/view/view.cc
+++ b/db/view/view.cc
@@ -152,41 +152,50 @@ db::view::base_dependent_view_info::base_dependent_view_info(schema_ptr base_sch
 }

 // A constructor for a base info that can facilitate only reads from the materialized view.
-db::view::base_dependent_view_info::base_dependent_view_info(bool has_base_non_pk_columns_in_view_pk)
+db::view::base_dependent_view_info::base_dependent_view_info(bool has_base_non_pk_columns_in_view_pk, std::optional<bytes>&& column_missing_in_base)
        : _base_schema{nullptr}
+        , _column_missing_in_base{std::move(column_missing_in_base)}
        , has_base_non_pk_columns_in_view_pk{has_base_non_pk_columns_in_view_pk}
        , use_only_for_reads{true} {
 }

 const std::vector<column_id>& db::view::base_dependent_view_info::base_non_pk_columns_in_view_pk() const {
    if (use_only_for_reads) {
-        on_internal_error(vlogger, "base_non_pk_columns_in_view_pk(): operation unsupported when initialized only for view reads.");
+        on_internal_error(vlogger,
+                format("base_non_pk_columns_in_view_pk(): operation unsupported when initialized only for view reads. "
+                "Missing column in the base table: {}", to_sstring_view(_column_missing_in_base.value_or(bytes()))));
    }
    return _base_non_pk_columns_in_view_pk;
 }

 const schema_ptr& db::view::base_dependent_view_info::base_schema() const {
    if (use_only_for_reads) {
-        on_internal_error(vlogger, "base_schema(): operation unsupported when initialized only for view reads.");
+        on_internal_error(vlogger,
+                format("base_schema(): operation unsupported when initialized only for view reads. "
+                "Missing column in the base table: {}", to_sstring_view(_column_missing_in_base.value_or(bytes()))));
    }
    return _base_schema;
 }

 db::view::base_info_ptr view_info::make_base_dependent_view_info(const schema& base) const {
    std::vector<column_id> base_non_pk_columns_in_view_pk;
-    bool has_base_non_pk_columns_in_view_pk = false;
-    bool can_only_read_from_view = false;

    for (auto&& view_col : boost::range::join(_schema.partition_key_columns(), _schema.clustering_key_columns())) {
        if (view_col.is_computed()) {
            // we are not going to find it in the base table...
            continue;
        }
-        auto* base_col = base.get_column_definition(view_col.name());
+        const bytes& view_col_name = view_col.name();
+        auto* base_col = base.get_column_definition(view_col_name);
        if (base_col && !base_col->is_primary_key()) {
            base_non_pk_columns_in_view_pk.push_back(base_col->id);
-            has_base_non_pk_columns_in_view_pk = true;
        } else if (!base_col) {
+            vlogger.error("Column {} in view {}.{} was not found in the base table {}.{}",
+                    to_sstring_view(view_col_name), _schema.ks_name(), _schema.cf_name(), base.ks_name(), base.cf_name());
+            if (to_sstring_view(view_col_name) == "idx_token") {
+                vlogger.warn("Missing idx_token column is caused by an incorrect upgrade of a secondary index. "
+                        "Please recreate index {}.{} to avoid future issues.", _schema.ks_name(), _schema.cf_name());
+            }
            // If we didn't find the column in the base column then it must have been deleted
            // or not yet added (by alter command), this means it is for sure not a pk column
            // in the base table. This can happen if the version of the base schema is not the
@@ -194,21 +203,11 @@ db::view::base_info_ptr view_info::make_base_dependent_view_info(const schema& b
            // if we got to such a situation then it means it is only going to be used for reading
            // (computation of shadowable tombstones) and in that case the existence of such a column
            // is the only thing that is of interest to us.
-            has_base_non_pk_columns_in_view_pk = true;
-            can_only_read_from_view = true;
-
-            // We can break the loop here since we have the info we wanted and the list
-            // of columns is not going to be reliable anyhow.
-            break;
+            return make_lw_shared<db::view::base_dependent_view_info>(true, view_col_name);
        }
    }

-    if (can_only_read_from_view) {
-        return make_lw_shared<db::view::base_dependent_view_info>(has_base_non_pk_columns_in_view_pk);
-    } else {
-        return make_lw_shared<db::view::base_dependent_view_info>(base.shared_from_this(), std::move(base_non_pk_columns_in_view_pk));
-    }
-
+    return make_lw_shared<db::view::base_dependent_view_info>(base.shared_from_this(), std::move(base_non_pk_columns_in_view_pk));
 }

 bool view_info::has_base_non_pk_columns_in_view_pk() const {
@@ -219,7 +218,7 @@ bool view_info::has_base_non_pk_columns_in_view_pk() const {
    // schema integrity problem as the creator of owning view schema
    // didn't make sure to initialize it with base information.
    if (!_base_info) {
-        on_internal_error(vlogger, "Tried to perform a view query which is base info dependant without initializing it");
+        on_internal_error(vlogger, "Tried to perform a view query which is base info dependent without initializing it");
    }
    return _base_info->has_base_non_pk_columns_in_view_pk;
 }
@@ -417,7 +416,7 @@ deletable_row& view_updates::get_view_row(const partition_key& base_key, const c
                if (!service::get_local_storage_service().db().local().find_column_family(_base->id()).get_index_manager().is_index(*_view)) {
                    throw std::logic_error(format("Column {} doesn't exist in base and this view is not backing a secondary index", cdef.name_as_text()));
                }
-                computed_value = token_column_computation().compute_value(*_base, base_key, update);
+                computed_value = legacy_token_column_computation().compute_value(*_base, base_key, update);
            } else {
                computed_value = cdef.get_computation().compute_value(*_base, base_key, update);
            }
--- a/db/view/view.hh
+++ b/db/view/view.hh
@@ -53,6 +53,10 @@ private:
    // Id of a regular base table column included in the view's PK, if any.
    // Scylla views only allow one such column, alternator can have up to two.
    std::vector<column_id> _base_non_pk_columns_in_view_pk;
+    // For tracing purposes, if the view is out of sync with its base table
+    // and there exists a column which is not in base, its name is stored
+    // and added to debug messages.
+    std::optional<bytes> _column_missing_in_base = {};
 public:
    const std::vector<column_id>& base_non_pk_columns_in_view_pk() const;
    const schema_ptr& base_schema() const;
@@ -71,7 +75,7 @@ public:
    // A constructor for a base info that can facilitate reads and writes from the materialized view.
    base_dependent_view_info(schema_ptr base_schema, std::vector<column_id>&& base_non_pk_columns_in_view_pk);
    // A constructor for a base info that can facilitate only reads from the materialized view.
-    base_dependent_view_info(bool has_base_non_pk_columns_in_view_pk);
+    base_dependent_view_info(bool has_base_non_pk_columns_in_view_pk, std::optional<bytes>&& column_missing_in_base);
 };

 // Immutable snapshot of view's base-schema-dependent part.
--- a/dht/boot_strapper.cc
+++ b/dht/boot_strapper.cc
@@ -50,7 +50,7 @@ static logging::logger blogger("boot_strapper");
 namespace dht {

 future<> boot_strapper::bootstrap(streaming::stream_reason reason) {
-    blogger.debug("Beginning bootstrap process: sorted_tokens={}", _token_metadata.sorted_tokens());
+    blogger.debug("Beginning bootstrap process: sorted_tokens={}", get_token_metadata().sorted_tokens());
    sstring description;
    if (reason == streaming::stream_reason::bootstrap) {
        description = "Bootstrap";
@@ -59,7 +59,7 @@ future<> boot_strapper::bootstrap(streaming::stream_reason reason) {
    } else {
        return make_exception_future<>(std::runtime_error("Wrong stream_reason provided: it can only be replace or bootstrap"));
    }
-    auto streamer = make_lw_shared<range_streamer>(_db, _token_metadata, _abort_source, _tokens, _address, description, reason);
+    auto streamer = make_lw_shared<range_streamer>(_db, _token_metadata_ptr, _abort_source, _tokens, _address, description, reason);
    auto nodes_to_filter = gms::get_local_gossiper().get_unreachable_members();
    if (reason == streaming::stream_reason::replace && _db.local().get_replace_address()) {
        nodes_to_filter.insert(_db.local().get_replace_address().value());
@@ -70,7 +70,7 @@ future<> boot_strapper::bootstrap(streaming::stream_reason reason) {
    return do_for_each(*keyspaces, [this, keyspaces, streamer] (sstring& keyspace_name) {
        auto& ks = _db.local().find_keyspace(keyspace_name);
        auto& strategy = ks.get_replication_strategy();
-        dht::token_range_vector ranges = strategy.get_pending_address_ranges(_token_metadata, _tokens, _address);
+        dht::token_range_vector ranges = strategy.get_pending_address_ranges(_token_metadata_ptr, _tokens, _address, locator::can_yield::no);
        blogger.debug("Will stream keyspace={}, ranges={}", keyspace_name, ranges);
        return streamer->add_ranges(keyspace_name, ranges);
    }).then([this, streamer] {
@@ -83,7 +83,7 @@ future<> boot_strapper::bootstrap(streaming::stream_reason reason) {

 }

-std::unordered_set<token> boot_strapper::get_bootstrap_tokens(const token_metadata& metadata, database& db) {
+std::unordered_set<token> boot_strapper::get_bootstrap_tokens(const token_metadata_ptr tmptr, database& db) {
    auto initial_tokens = db.get_initial_tokens();
    // if user specified tokens, use those
    if (initial_tokens.size() > 0) {
@@ -91,7 +91,7 @@ std::unordered_set<token> boot_strapper::get_bootstrap_tokens(const token_metada
        std::unordered_set<token> tokens;
        for (auto& token_string : initial_tokens) {
            auto token = dht::token::from_sstring(token_string);
-            if (metadata.get_endpoint(token)) {
+            if (tmptr->get_endpoint(token)) {
                throw std::runtime_error(format("Bootstrapping to existing token {} is not allowed (decommission/removenode the old node first).", token_string));
            }
            tokens.insert(token);
@@ -109,16 +109,16 @@ std::unordered_set<token> boot_strapper::get_bootstrap_tokens(const token_metada
        blogger.warn("Picking random token for a single vnode.  You should probably add more vnodes; failing that, you should probably specify the token manually");
    }

-    auto tokens = get_random_tokens(metadata, num_tokens);
+    auto tokens = get_random_tokens(std::move(tmptr), num_tokens);
    blogger.debug("Get random bootstrap_tokens={}", tokens);
    return tokens;
 }

-std::unordered_set<token> boot_strapper::get_random_tokens(const token_metadata& metadata, size_t num_tokens) {
+std::unordered_set<token> boot_strapper::get_random_tokens(const token_metadata_ptr tmptr, size_t num_tokens) {
    std::unordered_set<token> tokens;
    while (tokens.size() < num_tokens) {
        auto token = dht::token::get_random_token();
-        auto ep = metadata.get_endpoint(token);
+        auto ep = tmptr->get_endpoint(token);
        if (!ep) {
            tokens.emplace(token);
        }
--- a/dht/boot_strapper.hh
+++ b/dht/boot_strapper.hh
@@ -50,6 +50,7 @@ namespace dht {
 class boot_strapper {
    using inet_address = gms::inet_address;
    using token_metadata = locator::token_metadata;
+    using token_metadata_ptr = locator::token_metadata_ptr;
    using token = dht::token;
    distributed<database>& _db;
    abort_source& _abort_source;
@@ -57,14 +58,14 @@ class boot_strapper {
    inet_address _address;
    /* token of the node being bootstrapped. */
    std::unordered_set<token> _tokens;
-    token_metadata _token_metadata;
+    const token_metadata_ptr _token_metadata_ptr;
 public:
-    boot_strapper(distributed<database>& db, abort_source& abort_source, inet_address addr, std::unordered_set<token> tokens, token_metadata tmd)
+    boot_strapper(distributed<database>& db, abort_source& abort_source, inet_address addr, std::unordered_set<token> tokens, const token_metadata_ptr tmptr)
        : _db(db)
        , _abort_source(abort_source)
        , _address(addr)
        , _tokens(tokens)
-        , _token_metadata(tmd) {
+        , _token_metadata_ptr(std::move(tmptr)) {
    }

    future<> bootstrap(streaming::stream_reason reason);
@@ -74,9 +75,9 @@ public:
     * otherwise, if num_tokens == 1, pick a token to assume half the load of the most-loaded node.
     * else choose num_tokens tokens at random
     */
-    static std::unordered_set<token> get_bootstrap_tokens(const token_metadata& metadata, database& db);
+    static std::unordered_set<token> get_bootstrap_tokens(const token_metadata_ptr tmptr, database& db);

-    static std::unordered_set<token> get_random_tokens(const token_metadata& metadata, size_t num_tokens);
+    static std::unordered_set<token> get_random_tokens(const token_metadata_ptr tmptr, size_t num_tokens);
 #if 0
    public static class StringSerializer implements IVersionedSerializer<String>
    {
@@ -98,6 +99,11 @@ public:
        }
    }
 #endif
+
+private:
+    const token_metadata& get_token_metadata() {
+        return *_token_metadata_ptr;
+    }
 };

 } // namespace dht
--- a/dht/range_streamer.cc
+++ b/dht/range_streamer.cc
@@ -107,6 +107,7 @@ range_streamer::get_range_fetch_map(const std::unordered_map<dht::token_range, s
    return range_fetch_map_map;
 }

+// Must be called from a seastar thread
 std::unordered_map<dht::token_range, std::vector<inet_address>>
 range_streamer::get_all_ranges_with_sources_for(const sstring& keyspace_name, dht::token_range_vector desired_ranges) {
    logger.debug("{} ks={}", __func__, keyspace_name);
@@ -114,8 +115,8 @@ range_streamer::get_all_ranges_with_sources_for(const sstring& keyspace_name, dh
    auto& ks = _db.local().find_keyspace(keyspace_name);
    auto& strat = ks.get_replication_strategy();

-    auto tm = _metadata.clone_only_token_map();
-    auto range_addresses = strat.get_range_addresses(tm);
+    auto tm = get_token_metadata().clone_only_token_map().get0();
+    auto range_addresses = strat.get_range_addresses(tm, locator::can_yield::yes);

    logger.debug("keyspace={}, desired_ranges.size={}, range_addresses.size={}", keyspace_name, desired_ranges.size(), range_addresses.size());

@@ -146,6 +147,7 @@ range_streamer::get_all_ranges_with_sources_for(const sstring& keyspace_name, dh
    return range_sources;
 }

+// Must be called from a seastar thread
 std::unordered_map<dht::token_range, std::vector<inet_address>>
 range_streamer::get_all_ranges_with_strict_sources_for(const sstring& keyspace_name, dht::token_range_vector desired_ranges) {
    logger.debug("{} ks={}", __func__, keyspace_name);
@@ -155,12 +157,12 @@ range_streamer::get_all_ranges_with_strict_sources_for(const sstring& keyspace_n
    auto& strat = ks.get_replication_strategy();

    //Active ranges
-    auto metadata_clone = _metadata.clone_only_token_map();
-    auto range_addresses = strat.get_range_addresses(metadata_clone);
+    auto metadata_clone = get_token_metadata().clone_only_token_map().get0();
+    auto range_addresses = strat.get_range_addresses(metadata_clone, locator::can_yield::yes);

    //Pending ranges
    metadata_clone.update_normal_tokens(_tokens, _address);
-    auto pending_range_addresses  = strat.get_range_addresses(metadata_clone);
+    auto pending_range_addresses  = strat.get_range_addresses(metadata_clone, locator::can_yield::yes);

    //Collects the source that will have its range moved to the new node
    std::unordered_map<dht::token_range, std::vector<inet_address>> range_sources;
@@ -221,7 +223,7 @@ bool range_streamer::use_strict_sources_for_ranges(const sstring& keyspace_name)
    return !_db.local().is_replacing()
           && use_strict_consistency()
           && !_tokens.empty()
-           && _metadata.get_all_endpoints().size() != strat.get_replication_factor();
+           && get_token_metadata().get_all_endpoints().size() != strat.get_replication_factor();
 }

 void range_streamer::add_tx_ranges(const sstring& keyspace_name, std::unordered_map<inet_address, dht::token_range_vector> ranges_per_endpoint) {
--- a/dht/range_streamer.hh
+++ b/dht/range_streamer.hh
@@ -60,6 +60,7 @@ class range_streamer {
 public:
    using inet_address = gms::inet_address;
    using token_metadata = locator::token_metadata;
+    using token_metadata_ptr = locator::token_metadata_ptr;
    using stream_plan = streaming::stream_plan;
    using stream_state = streaming::stream_state;
    static bool use_strict_consistency();
@@ -101,9 +102,9 @@ public:
        }
    };

-    range_streamer(distributed<database>& db, const token_metadata& tm, abort_source& abort_source, std::unordered_set<token> tokens, inet_address address, sstring description, streaming::stream_reason reason)
+    range_streamer(distributed<database>& db, const token_metadata_ptr tmptr, abort_source& abort_source, std::unordered_set<token> tokens, inet_address address, sstring description, streaming::stream_reason reason)
        : _db(db)
-        , _metadata(tm)
+        , _token_metadata_ptr(std::move(tmptr))
        , _abort_source(abort_source)
        , _tokens(std::move(tokens))
        , _address(address)
@@ -113,8 +114,8 @@ public:
        _abort_source.check();
    }

-    range_streamer(distributed<database>& db, const token_metadata& tm, abort_source& abort_source, inet_address address, sstring description, streaming::stream_reason reason)
-        : range_streamer(db, tm, abort_source, std::unordered_set<token>(), address, description, reason) {
+    range_streamer(distributed<database>& db, const token_metadata_ptr tmptr, abort_source& abort_source, inet_address address, sstring description, streaming::stream_reason reason)
+        : range_streamer(db, std::move(tmptr), abort_source, std::unordered_set<token>(), address, description, reason) {
    }

    void add_source_filter(std::unique_ptr<i_source_filter> filter) {
@@ -159,13 +160,17 @@ private:
        return toFetch;
    }
 #endif
+
+    const token_metadata& get_token_metadata() {
+        return *_token_metadata_ptr;
+    }
 public:
    future<> stream_async();
    future<> do_stream_async();
    size_t nr_ranges_to_stream();
 private:
    distributed<database>& _db;
-    const token_metadata& _metadata;
+    const token_metadata_ptr _token_metadata_ptr;
    abort_source& _abort_source;
    std::unordered_set<token> _tokens;
    inet_address _address;
--- a/digester.hh
+++ b/digester.hh
@@ -58,8 +58,7 @@ public:

    template<typename T, typename... Args>
    void feed_hash(const T& value, Args&&... args) {
-        // FIXME uncomment the noexcept marking once clang bug 50994 is fixed or gcc compilation is turned on
-        std::visit([&] (auto& hasher) /* noexcept(noexcept(::feed_hash(hasher, value, args...))) */ -> void {
+        std::visit([&] (auto& hasher) noexcept -> void {
            ::feed_hash(hasher, value, std::forward<Args>(args)...);
        }, _impl);
    };
--- a/dist/common/scripts/node_exporter_install
+++ b/dist/common/scripts/node_exporter_install
@@ -24,10 +24,9 @@ import os
 import sys
 import tempfile
 import tarfile
-import shutil
-import glob
 from scylla_util import *
 import argparse
+from subprocess import run

 VERSION='1.0.1'
 INSTALL_DIR=scylladir()+'/Prometheus/node_exporter'
@@ -54,7 +53,7 @@ if __name__ == '__main__':
            sys.exit(1)

    if is_gentoo_variant():
-        run('emerge -uq app-metrics/node_exporter')
+        run('emerge -uq app-metrics/node_exporter', shell=True, check=True)
        print('app-metrics/node_exporter does not install systemd service files, please fill a bug if you need them.')
        sys.exit(1)
    else:
@@ -63,9 +62,6 @@ if __name__ == '__main__':
            f.write(data)
        with tarfile.open('/var/tmp/node_exporter-{version}.linux-amd64.tar.gz'.format(version=VERSION)) as tf:
            tf.extractall(INSTALL_DIR)
-        shutil.chown(f'{INSTALL_DIR}/node_exporter-{VERSION}.linux-amd64', 'root', 'root')
-        for f in glob.glob(f'{INSTALL_DIR}/node_exporter-{VERSION}.linux-amd64/*'):
-            shutil.chown(f, 'root', 'root')
        os.remove('/var/tmp/node_exporter-{version}.linux-amd64.tar.gz'.format(version=VERSION))
        if node_exporter_p.exists():
            node_exporter_p.unlink()
--- a/dist/common/scripts/scylla_bootparam_setup
+++ b/dist/common/scripts/scylla_bootparam_setup
@@ -24,8 +24,8 @@ import os
 import re
 import sys
 import argparse
-import subprocess
 from scylla_util import *
+from subprocess import run

 if __name__ == '__main__':
    if os.getuid() > 0:
@@ -58,9 +58,9 @@ if __name__ == '__main__':
            cfg.set(grub_key, cmdline_linux)
            cfg.commit()
            if is_debian_variant():
-                run('update-grub')
+                run('update-grub', shell=True, check=True)
            else:
-                run('grub2-mkconfig -o /boot/grub2/grub.cfg')
+                run('grub2-mkconfig -o /boot/grub2/grub.cfg', shell=True, check=True)

 #    if is_ec2() and os.path.exists('/boot/grub/menu.lst'):
    if os.path.exists('/boot/grub/menu.lst'):
--- a/dist/common/scripts/scylla_coredump_setup
+++ b/dist/common/scripts/scylla_coredump_setup
@@ -26,8 +26,8 @@ import argparse
 import subprocess
 import time
 import tempfile
-import subprocess
 from scylla_util import *
+from subprocess import run

 if __name__ == '__main__':
    if os.getuid() > 0:
@@ -42,7 +42,7 @@ if __name__ == '__main__':

 # Gentoo may uses OpenRC
    if is_gentoo_variant():
-        run('sysctl -p /etc/sysctl.d/99-scylla-coredump.conf')
+        run('sysctl -p /etc/sysctl.d/99-scylla-coredump.conf', shell=True, check=True)
 # Other distributions can use systemd-coredump, so setup it
    else:
        if is_debian_variant():
@@ -80,15 +80,14 @@ WantedBy=multi-user.target
            systemd_unit('var-lib-systemd-coredump.mount').enable()
            systemd_unit('var-lib-systemd-coredump.mount').start()
        if os.path.exists('/usr/lib/sysctl.d/50-coredump.conf'):
-            run('sysctl -p /usr/lib/sysctl.d/50-coredump.conf')
+            run('sysctl -p /usr/lib/sysctl.d/50-coredump.conf', shell=True, check=True)
        else:
            with open('/etc/sysctl.d/99-scylla-coredump.conf', 'w') as f:
                f.write('kernel.core_pattern=|/usr/lib/systemd/systemd-coredump %p %u %g %s %t %e"')
-            run('sysctl -p /etc/sysctl.d/99-scylla-coredump.conf')
+            run('sysctl -p /etc/sysctl.d/99-scylla-coredump.conf', shell=True, check=True)

        fp = tempfile.NamedTemporaryFile()
-        fp.write(b'ulimit -c unlimited\n')
-        fp.write(b'kill -SEGV $$\n')
+        fp.write(b'kill -SEGV $$')
        fp.flush()
        p = subprocess.Popen(['/bin/bash', fp.name], stdout=subprocess.PIPE)
        pid = p.pid
@@ -99,7 +98,7 @@ WantedBy=multi-user.target
        # need to wait for systemd-coredump to complete collecting coredump
        time.sleep(3)
        try:
-            coreinfo = out('coredumpctl --no-pager --no-legend info {}'.format(pid))
+            coreinfo = run('coredumpctl --no-pager --no-legend info {}'.format(pid), shell=True, check=True, capture_output=True, encoding='utf-8').stdout.strip()
        except subprocess.CalledProcessError:
            print('Does not able to detect coredump, failed to configure systemd-coredump.')
            sys.exit(1)
--- a/dist/common/scripts/scylla_cpuscaling_setup
+++ b/dist/common/scripts/scylla_cpuscaling_setup
@@ -22,7 +22,6 @@

 import os
 import sys
-import argparse
 import shlex
 import distro
 from scylla_util import *
@@ -34,22 +33,12 @@ if __name__ == '__main__':
    if os.getuid() > 0:
        print('Requires root permission.')
        sys.exit(1)
-    parser = argparse.ArgumentParser(description='CPU scaling setup script for Scylla.')
-    parser.add_argument('--force', dest='force', action='store_true',
-                        help='force running setup even CPU scaling unsupported')
-    args = parser.parse_args()
-
-    if not args.force and not os.path.exists('/sys/devices/system/cpu/cpu0/cpufreq/scaling_governor'):
+    if not os.path.exists('/sys/devices/system/cpu/cpufreq/policy0/scaling_governor'):
        print('This computer doesn\'t supported CPU scaling configuration.')
        sys.exit(0)
    if is_debian_variant():
        if not shutil.which('cpufreq-set'):
            apt_install('cpufrequtils')
-        try:
-            ondemand = systemd_unit('ondemand')
-            ondemand.disable()
-        except:
-            pass
        cfg = sysconfig_parser('/etc/default/cpufrequtils')
        cfg.set('GOVERNOR', 'performance')
        cfg.commit()
--- a/dist/common/scripts/scylla_ec2_check
+++ b/dist/common/scripts/scylla_ec2_check
@@ -24,6 +24,7 @@ import os
 import sys
 import argparse
 from scylla_util import *
+from subprocess import run

 if __name__ == '__main__':
    if not is_ec2():
@@ -40,7 +41,7 @@ if __name__ == '__main__':
    aws = aws_instance()
    instance_class = aws.instance_class()
    en = aws.get_en_interface_type()
-    match = re.search(r'^driver: (\S+)$', out('ethtool -i {}'.format(args.nic)), flags=re.MULTILINE)
+    match = re.search(r'^driver: (\S+)$', run('ethtool -i {}'.format(args.nic), shell=True, check=True, capture_output=True, encoding='utf-8').stdout.strip(), flags=re.MULTILINE)
    driver = match.group(1)

    if not en:
--- a/Show More
+++ b/Show More