sstables: Fix sstable reshaping for STCS

The heuristic of STCS reshape is correct, and it built the compaction descriptor correctly, but forgot to return it to the caller, so no reshape was ever done on behalf of STCS even when the strategy needed it. Fixes #7774. Signed-off-by: Raphael S. Carvalho <raphaelsc@scylladb.com> Message-Id: <20201209175044.1609102-1-raphaelsc@scylladb.com> (cherry picked from commit e4b55f40f3)
scylla_ntp_setup: support 'pool' directive on ntp.conf
2021-11-15 13:28:52 +02:00 · 2021-10-10 19:42:14 +03:00 · 2021-10-05 16:20:30 +03:00 · 2021-10-03 14:09:37 +03:00 · 2021-10-03 13:11:30 +03:00 · 2021-09-23 15:18:22 +03:00
1236 changed files with 7181 additions and 23443 deletions
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -79,9 +79,3 @@ db/hints/* @haaawk @piodul @vladzcloudius
 # REDIS
 redis/* @nyh @syuu1228
 redis-test/* @nyh @syuu1228
-
-# READERS
-reader_* @denesb
-querier* @denesb
-test/boost/mutation_reader_test.cc @denesb
-test/boost/querier_cache_test.cc @denesb
--- a/.github/workflows/pages.yml
+++ b/.github/workflows/pages.yml
@@ -1,33 +0,0 @@
-name: "CI Docs"
-
-on:
-  push:
-    branches:
-    - master
-    paths:
-    - 'docs/**'
-jobs:
-  release:
-    name: Build
-    runs-on: ubuntu-latest
-    env:
-      LATEST_VERSION: master
-    steps:
-    - name: Checkout
-      uses: actions/checkout@v2
-      with:
-        persist-credentials: false
-        fetch-depth: 0
-    - name: Set up Python
-      uses: actions/setup-python@v1
-      with:
-        python-version: 3.7
-    - name: Build docs
-      run: |
-        export PATH=$PATH:~/.local/bin
-        cd docs
-        make multiversion
-    - name: Deploy
-      run : ./docs/_utils/deploy.sh
-      env:
-        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
--- a/.gitignore
+++ b/.gitignore
@@ -25,5 +25,3 @@ tags
 testlog
 test/*/*.reject
 .vscode
-docs/_build
-docs/poetry.lock
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,5 +1,8 @@
-cmake_minimum_required(VERSION 3.18)
+##
+## For best results, first compile the project using the Ninja build-system.
+##

+cmake_minimum_required(VERSION 3.7)
 project(scylla)

 if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
@@ -17,740 +20,138 @@ else()
    set(BUILD_TYPE "release")
 endif()

-function(default_target_arch arch)
-    set(x86_instruction_sets i386 i686 x86_64)
-    if(CMAKE_SYSTEM_PROCESSOR IN_LIST x86_instruction_sets)
-        set(${arch} "westmere" PARENT_SCOPE)
-    elseif(CMAKE_SYSTEM_PROCESSOR EQUAL "aarch64")
-        set(${arch} "armv8-a+crc+crypto" PARENT_SCOPE)
-    else()
-        set(${arch} "" PARENT_SCOPE)
-    endif()
-endfunction()
-default_target_arch(target_arch)
-if(target_arch)
-    set(target_arch_flag "-march=${target_arch}")
+if (NOT DEFINED FOR_IDE AND NOT DEFINED ENV{FOR_IDE} AND NOT DEFINED ENV{CLION_IDE})
+    message(FATAL_ERROR "This CMakeLists.txt file is only valid for use in IDEs, please define FOR_IDE to acknowledge this.")
 endif()

-# Configure Seastar compile options to align with Scylla
-set(Seastar_CXX_FLAGS -fcoroutines ${target_arch_flag} CACHE INTERNAL "" FORCE)
-set(Seastar_CXX_DIALECT gnu++20 CACHE INTERNAL "" FORCE)
+# These paths are always available, since they're included in the repository. Additional DPDK headers are placed while
+# Seastar is built, and are captured in `SEASTAR_INCLUDE_DIRS` through parsing the Seastar pkg-config file (below).
+set(SEASTAR_DPDK_INCLUDE_DIRS
+        seastar/dpdk/lib/librte_eal/common/include
+        seastar/dpdk/lib/librte_eal/common/include/generic
+        seastar/dpdk/lib/librte_eal/common/include/x86
+        seastar/dpdk/lib/librte_ether)

-add_subdirectory(seastar)
-add_subdirectory(abseil)
-# Exclude absl::strerror from the default "all" target since it's not
-# used in Scylla build and, moreover, makes use of deprecated glibc APIs,
-# such as sys_nerr, which are not exposed from "stdio.h" since glibc 2.32,
-# which happens to be the case for recent Fedora distribution versions.
-#
-# Need to use the internal "absl_strerror" target name instead of namespaced
-# variant because `set_target_properties` does not understand the latter form,
-# unfortunately.
-set_target_properties(absl_strerror PROPERTIES EXCLUDE_FROM_ALL TRUE)
+find_package(PkgConfig REQUIRED)

-# System libraries dependencies
-find_package(Boost COMPONENTS filesystem program_options system thread regex REQUIRED)
-find_package(Lua REQUIRED)
-find_package(ZLIB REQUIRED)
-find_package(ICU COMPONENTS uc REQUIRED)
+set(ENV{PKG_CONFIG_PATH} "${CMAKE_SOURCE_DIR}/build/${BUILD_TYPE}/seastar:$ENV{PKG_CONFIG_PATH}")
+pkg_check_modules(SEASTAR seastar)

-set(scylla_build_dir "${CMAKE_BINARY_DIR}/build/${BUILD_TYPE}")
-set(scylla_gen_build_dir "${scylla_build_dir}/gen")
-file(MAKE_DIRECTORY "${scylla_build_dir}" "${scylla_gen_build_dir}")
+if(NOT SEASTAR_INCLUDE_DIRS)
+    # Default value. A more accurate list is populated through `pkg-config` below if `seastar.pc` is available.
+    set(SEASTAR_INCLUDE_DIRS "seastar/include")
+endif()

-# Place libraries, executables and archives in ${buildroot}/build/${mode}/
-foreach(mode RUNTIME LIBRARY ARCHIVE)
-    set(CMAKE_${mode}_OUTPUT_DIRECTORY "${scylla_build_dir}")
-endforeach()
+find_package(Boost COMPONENTS filesystem program_options system thread)

-# Generate C++ source files from thrift definitions
-function(scylla_generate_thrift)
-    set(one_value_args TARGET VAR IN_FILE OUT_DIR SERVICE)
-    cmake_parse_arguments(args "" "${one_value_args}" "" ${ARGN})
+##
+## Populate the names of all source and header files in the indicated paths in a designated variable.
+##
+## When RECURSIVE is specified, directories are traversed recursively.
+##
+## Use: scan_scylla_source_directories(VAR my_result_var [RECURSIVE] PATHS [path1 path2 ...])
+##
+function (scan_scylla_source_directories)
+    set(options RECURSIVE)
+    set(oneValueArgs VAR)
+    set(multiValueArgs PATHS)
+    cmake_parse_arguments(args "${options}" "${oneValueArgs}" "${multiValueArgs}" "${ARGN}")

-    get_filename_component(in_file_name ${args_IN_FILE} NAME_WE)
+    set(globs "")

-    set(aux_out_file_name ${args_OUT_DIR}/${in_file_name})
-    set(outputs
-        ${aux_out_file_name}_types.cpp
-        ${aux_out_file_name}_types.h
-        ${aux_out_file_name}_constants.cpp
-        ${aux_out_file_name}_constants.h
-        ${args_OUT_DIR}/${args_SERVICE}.cpp
-        ${args_OUT_DIR}/${args_SERVICE}.h)
+    foreach (dir ${args_PATHS})
+        list(APPEND globs "${dir}/*.cc" "${dir}/*.hh")
+    endforeach()

-    add_custom_command(
-        DEPENDS
-            ${args_IN_FILE}
-            thrift
-        OUTPUT ${outputs}
-        COMMAND ${CMAKE_COMMAND} -E make_directory ${args_OUT_DIR}
-        COMMAND thrift -gen cpp:cob_style,no_skeleton -out "${args_OUT_DIR}" "${args_IN_FILE}")
+    if (args_RECURSIVE)
+        set(glob_kind GLOB_RECURSE)
+    else()
+        set(glob_kind GLOB)
+    endif()

-    add_custom_target(${args_TARGET}
-        DEPENDS ${outputs})
+    file(${glob_kind} var
+            ${globs})

-    set(${args_VAR} ${outputs} PARENT_SCOPE)
+    set(${args_VAR} ${var} PARENT_SCOPE)
 endfunction()

-scylla_generate_thrift(
-    TARGET scylla_thrift_gen_cassandra
-    VAR scylla_thrift_gen_cassandra_files
-    IN_FILE interface/cassandra.thrift
-    OUT_DIR ${scylla_gen_build_dir}
-    SERVICE Cassandra)
+## Although Seastar is an external project, it is common enough to explore the sources while doing
+## Scylla development that we'll treat the Seastar sources as part of this project for easier navigation.
+scan_scylla_source_directories(
+        VAR SEASTAR_SOURCE_FILES
+        RECURSIVE

-# Parse antlr3 grammar files and generate C++ sources
-function(scylla_generate_antlr3)
-    set(one_value_args TARGET VAR IN_FILE OUT_DIR)
-    cmake_parse_arguments(args "" "${one_value_args}" "" ${ARGN})
+        PATHS
+          seastar/core
+          seastar/http
+          seastar/json
+          seastar/net
+          seastar/rpc
+          seastar/testing
+          seastar/util)

-    get_filename_component(in_file_pure_name ${args_IN_FILE} NAME)
-    get_filename_component(stem ${in_file_pure_name} NAME_WE)
+scan_scylla_source_directories(
+        VAR SCYLLA_ROOT_SOURCE_FILES
+        PATHS .)

-    set(outputs
-        "${args_OUT_DIR}/${stem}Lexer.hpp"
-        "${args_OUT_DIR}/${stem}Lexer.cpp"
-        "${args_OUT_DIR}/${stem}Parser.hpp"
-        "${args_OUT_DIR}/${stem}Parser.cpp")
+scan_scylla_source_directories(
+        VAR SCYLLA_SUB_SOURCE_FILES
+        RECURSIVE

-    add_custom_command(
-        DEPENDS
-            ${args_IN_FILE}
-        OUTPUT ${outputs}
-        # Remove #ifdef'ed code from the grammar source code
-        COMMAND sed -e "/^#if 0/,/^#endif/d" "${args_IN_FILE}" > "${args_OUT_DIR}/${in_file_pure_name}"
-        COMMAND antlr3 "${args_OUT_DIR}/${in_file_pure_name}"
-        # We replace many local `ExceptionBaseType* ex` variables with a single function-scope one.
-        # Because we add such a variable to every function, and because `ExceptionBaseType` is not a global
-        # name, we also add a global typedef to avoid compilation errors.
-        COMMAND sed -i -e "/^.*On :.*$/d" "${args_OUT_DIR}/${stem}Lexer.hpp"
-        COMMAND sed -i -e "/^.*On :.*$/d" "${args_OUT_DIR}/${stem}Lexer.cpp"
-        COMMAND sed -i -e "/^.*On :.*$/d" "${args_OUT_DIR}/${stem}Parser.hpp"
-        COMMAND sed -i
-            -e "s/^\\( *\\)\\(ImplTraits::CommonTokenType\\* [a-zA-Z0-9_]* = NULL;\\)$/\\1const \\2/"
-            -e "/^.*On :.*$/d"
-            -e "1i using ExceptionBaseType = int;"
-            -e "s/^{/{ ExceptionBaseType\\* ex = nullptr;/; s/ExceptionBaseType\\* ex = new/ex = new/; s/exceptions::syntax_exception e/exceptions::syntax_exception\\& e/"
-            "${args_OUT_DIR}/${stem}Parser.cpp"
-        VERBATIM)
+        PATHS
+          api
+          auth
+          cql3
+          db
+          dht
+          exceptions
+          gms
+          index
+          io
+          locator
+          message
+          raft
+          repair
+          service
+          sstables
+          streaming
+          test
+          thrift
+          tracing
+          transport
+          utils)

-    add_custom_target(${args_TARGET}
-        DEPENDS ${outputs})
+scan_scylla_source_directories(
+        VAR SCYLLA_GEN_SOURCE_FILES
+        RECURSIVE
+        PATHS build/${BUILD_TYPE}/gen)

-    set(${args_VAR} ${outputs} PARENT_SCOPE)
-endfunction()
-
-set(antlr3_grammar_files
-    cql3/Cql.g
-    alternator/expressions.g)
-
-set(antlr3_gen_files)
-
-foreach(f ${antlr3_grammar_files})
-    get_filename_component(grammar_file_name "${f}" NAME_WE)
-    get_filename_component(f_dir "${f}" DIRECTORY)
-    scylla_generate_antlr3(
-        TARGET scylla_antlr3_gen_${grammar_file_name}
-        VAR scylla_antlr3_gen_${grammar_file_name}_files
-        IN_FILE ${f}
-        OUT_DIR ${scylla_gen_build_dir}/${f_dir})
-    list(APPEND antlr3_gen_files "${scylla_antlr3_gen_${grammar_file_name}_files}")
-endforeach()
-
-# Generate C++ sources from ragel grammar files
-seastar_generate_ragel(
-    TARGET scylla_ragel_gen_protocol_parser
-    VAR scylla_ragel_gen_protocol_parser_file
-    IN_FILE redis/protocol_parser.rl
-    OUT_FILE ${scylla_gen_build_dir}/redis/protocol_parser.hh)
-
-# Generate C++ sources from Swagger definitions
-set(swagger_files
-    api/api-doc/cache_service.json
-    api/api-doc/collectd.json
-    api/api-doc/column_family.json
-    api/api-doc/commitlog.json
-    api/api-doc/compaction_manager.json
-    api/api-doc/config.json
-    api/api-doc/endpoint_snitch_info.json
-    api/api-doc/error_injection.json
-    api/api-doc/failure_detector.json
-    api/api-doc/gossiper.json
-    api/api-doc/hinted_handoff.json
-    api/api-doc/lsa.json
-    api/api-doc/messaging_service.json
-    api/api-doc/storage_proxy.json
-    api/api-doc/storage_service.json
-    api/api-doc/stream_manager.json
-    api/api-doc/system.json
-    api/api-doc/utils.json)
-
-set(swagger_gen_files)
-
-foreach(f ${swagger_files})
-    get_filename_component(fname "${f}" NAME_WE)
-    get_filename_component(dir "${f}" DIRECTORY)
-    seastar_generate_swagger(
-        TARGET scylla_swagger_gen_${fname}
-        VAR scylla_swagger_gen_${fname}_files
-        IN_FILE "${f}"
-        OUT_DIR "${scylla_gen_build_dir}/${dir}")
-    list(APPEND swagger_gen_files "${scylla_swagger_gen_${fname}_files}")
-endforeach()
-
-# Create C++ bindings for IDL serializers
-function(scylla_generate_idl_serializer)
-    set(one_value_args TARGET VAR IN_FILE OUT_FILE)
-    cmake_parse_arguments(args "" "${one_value_args}" "" ${ARGN})
-    get_filename_component(out_dir ${args_OUT_FILE} DIRECTORY)
-    set(idl_compiler "${CMAKE_SOURCE_DIR}/idl-compiler.py")
-
-    find_package(Python3 COMPONENTS Interpreter)
-
-    add_custom_command(
-        DEPENDS
-            ${args_IN_FILE}
-            ${idl_compiler}
-        OUTPUT ${args_OUT_FILE}
-        COMMAND ${CMAKE_COMMAND} -E make_directory ${out_dir}
-        COMMAND Python3::Interpreter ${idl_compiler} --ns ser -f ${args_IN_FILE} -o ${args_OUT_FILE})
-
-    add_custom_target(${args_TARGET}
-        DEPENDS ${args_OUT_FILE})
-
-    set(${args_VAR} ${args_OUT_FILE} PARENT_SCOPE)
-endfunction()
-
-set(idl_serializers
-    idl/cache_temperature.idl.hh
-    idl/commitlog.idl.hh
-    idl/consistency_level.idl.hh
-    idl/frozen_mutation.idl.hh
-    idl/frozen_schema.idl.hh
-    idl/gossip_digest.idl.hh
-    idl/idl_test.idl.hh
-    idl/keys.idl.hh
-    idl/messaging_service.idl.hh
-    idl/mutation.idl.hh
-    idl/paging_state.idl.hh
-    idl/partition_checksum.idl.hh
-    idl/paxos.idl.hh
-    idl/query.idl.hh
-    idl/range.idl.hh
-    idl/read_command.idl.hh
-    idl/reconcilable_result.idl.hh
-    idl/replay_position.idl.hh
-    idl/result.idl.hh
-    idl/ring_position.idl.hh
-    idl/streaming.idl.hh
-    idl/token.idl.hh
-    idl/tracing.idl.hh
-    idl/truncation_record.idl.hh
-    idl/uuid.idl.hh
-    idl/view.idl.hh)
-
-set(idl_gen_files)
-
-foreach(f ${idl_serializers})
-    get_filename_component(idl_name "${f}" NAME)
-    get_filename_component(idl_target "${idl_name}" NAME_WE)
-    get_filename_component(idl_dir "${f}" DIRECTORY)
-    string(REPLACE ".idl.hh" ".dist.hh" idl_out_hdr_name "${idl_name}")
-    scylla_generate_idl_serializer(
-        TARGET scylla_idl_gen_${idl_target}
-        VAR scylla_idl_gen_${idl_target}_files
-        IN_FILE ${f}
-        OUT_FILE ${scylla_gen_build_dir}/${idl_dir}/${idl_out_hdr_name})
-    list(APPEND idl_gen_files "${scylla_idl_gen_${idl_target}_files}")
-endforeach()
-
-set(scylla_sources
-    absl-flat_hash_map.cc
-    alternator/auth.cc
-    alternator/base64.cc
-    alternator/conditions.cc
-    alternator/executor.cc
-    alternator/expressions.cc
-    alternator/serialization.cc
-    alternator/server.cc
-    alternator/stats.cc
-    alternator/streams.cc
-    api/api.cc
-    api/cache_service.cc
-    api/collectd.cc
-    api/column_family.cc
-    api/commitlog.cc
-    api/compaction_manager.cc
-    api/config.cc
-    api/endpoint_snitch.cc
-    api/error_injection.cc
-    api/failure_detector.cc
-    api/gossiper.cc
-    api/hinted_handoff.cc
-    api/lsa.cc
-    api/messaging_service.cc
-    api/storage_proxy.cc
-    api/storage_service.cc
-    api/stream_manager.cc
-    api/system.cc
-    atomic_cell.cc
-    auth/allow_all_authenticator.cc
-    auth/allow_all_authorizer.cc
-    auth/authenticated_user.cc
-    auth/authentication_options.cc
-    auth/authenticator.cc
-    auth/common.cc
-    auth/default_authorizer.cc
-    auth/password_authenticator.cc
-    auth/passwords.cc
-    auth/permission.cc
-    auth/permissions_cache.cc
-    auth/resource.cc
-    auth/role_or_anonymous.cc
-    auth/roles-metadata.cc
-    auth/sasl_challenge.cc
-    auth/service.cc
-    auth/standard_role_manager.cc
-    auth/transitional.cc
-    bytes.cc
-    canonical_mutation.cc
-    cdc/cdc_partitioner.cc
-    cdc/generation.cc
-    cdc/log.cc
-    cdc/metadata.cc
-    cdc/split.cc
-    clocks-impl.cc
-    collection_mutation.cc
-    compress.cc
-    connection_notifier.cc
-    converting_mutation_partition_applier.cc
-    counters.cc
-    cql3/abstract_marker.cc
-    cql3/attributes.cc
-    cql3/cf_name.cc
-    cql3/column_condition.cc
-    cql3/column_identifier.cc
-    cql3/column_specification.cc
-    cql3/constants.cc
-    cql3/cql3_type.cc
-    cql3/expr/expression.cc
-    cql3/functions/aggregate_fcts.cc
-    cql3/functions/castas_fcts.cc
-    cql3/functions/error_injection_fcts.cc
-    cql3/functions/functions.cc
-    cql3/functions/user_function.cc
-    cql3/index_name.cc
-    cql3/keyspace_element_name.cc
-    cql3/lists.cc
-    cql3/maps.cc
-    cql3/operation.cc
-    cql3/query_options.cc
-    cql3/query_processor.cc
-    cql3/relation.cc
-    cql3/restrictions/statement_restrictions.cc
-    cql3/result_set.cc
-    cql3/role_name.cc
-    cql3/selection/abstract_function_selector.cc
-    cql3/selection/selectable.cc
-    cql3/selection/selection.cc
-    cql3/selection/selector.cc
-    cql3/selection/selector_factories.cc
-    cql3/selection/simple_selector.cc
-    cql3/sets.cc
-    cql3/single_column_relation.cc
-    cql3/statements/alter_keyspace_statement.cc
-    cql3/statements/alter_table_statement.cc
-    cql3/statements/alter_type_statement.cc
-    cql3/statements/alter_view_statement.cc
-    cql3/statements/authentication_statement.cc
-    cql3/statements/authorization_statement.cc
-    cql3/statements/batch_statement.cc
-    cql3/statements/cas_request.cc
-    cql3/statements/cf_prop_defs.cc
-    cql3/statements/cf_statement.cc
-    cql3/statements/create_function_statement.cc
-    cql3/statements/create_index_statement.cc
-    cql3/statements/create_keyspace_statement.cc
-    cql3/statements/create_table_statement.cc
-    cql3/statements/create_type_statement.cc
-    cql3/statements/create_view_statement.cc
-    cql3/statements/delete_statement.cc
-    cql3/statements/drop_function_statement.cc
-    cql3/statements/drop_index_statement.cc
-    cql3/statements/drop_keyspace_statement.cc
-    cql3/statements/drop_table_statement.cc
-    cql3/statements/drop_type_statement.cc
-    cql3/statements/drop_view_statement.cc
-    cql3/statements/function_statement.cc
-    cql3/statements/grant_statement.cc
-    cql3/statements/index_prop_defs.cc
-    cql3/statements/index_target.cc
-    cql3/statements/ks_prop_defs.cc
-    cql3/statements/list_permissions_statement.cc
-    cql3/statements/list_users_statement.cc
-    cql3/statements/modification_statement.cc
-    cql3/statements/permission_altering_statement.cc
-    cql3/statements/property_definitions.cc
-    cql3/statements/raw/parsed_statement.cc
-    cql3/statements/revoke_statement.cc
-    cql3/statements/role-management-statements.cc
-    cql3/statements/schema_altering_statement.cc
-    cql3/statements/select_statement.cc
-    cql3/statements/truncate_statement.cc
-    cql3/statements/update_statement.cc
-    cql3/statements/use_statement.cc
-    cql3/token_relation.cc
-    cql3/tuples.cc
-    cql3/type_json.cc
-    cql3/untyped_result_set.cc
-    cql3/update_parameters.cc
-    cql3/user_types.cc
-    cql3/ut_name.cc
-    cql3/util.cc
-    cql3/values.cc
-    cql3/variable_specifications.cc
-    data/cell.cc
-    database.cc
-    db/batchlog_manager.cc
-    db/commitlog/commitlog.cc
-    db/commitlog/commitlog_entry.cc
-    db/commitlog/commitlog_replayer.cc
-    db/config.cc
-    db/consistency_level.cc
-    db/cql_type_parser.cc
-    db/data_listeners.cc
-    db/extensions.cc
-    db/heat_load_balance.cc
-    db/hints/manager.cc
-    db/hints/resource_manager.cc
-    db/large_data_handler.cc
-    db/legacy_schema_migrator.cc
-    db/marshal/type_parser.cc
-    db/schema_tables.cc
-    db/size_estimates_virtual_reader.cc
-    db/snapshot-ctl.cc
-    db/sstables-format-selector.cc
-    db/system_distributed_keyspace.cc
-    db/system_keyspace.cc
-    db/view/row_locking.cc
-    db/view/view.cc
-    db/view/view_update_generator.cc
-    dht/boot_strapper.cc
-    dht/i_partitioner.cc
-    dht/murmur3_partitioner.cc
-    dht/range_streamer.cc
-    dht/token.cc
-    distributed_loader.cc
-    duration.cc
-    exceptions/exceptions.cc
-    flat_mutation_reader.cc
-    frozen_mutation.cc
-    frozen_schema.cc
-    gms/application_state.cc
-    gms/endpoint_state.cc
-    gms/failure_detector.cc
-    gms/feature_service.cc
-    gms/gossip_digest_ack.cc
-    gms/gossip_digest_ack2.cc
-    gms/gossip_digest_syn.cc
-    gms/gossiper.cc
-    gms/inet_address.cc
-    gms/version_generator.cc
-    gms/versioned_value.cc
-    hashers.cc
-    index/secondary_index.cc
-    index/secondary_index_manager.cc
-    init.cc
-    keys.cc
-    lister.cc
-    locator/abstract_replication_strategy.cc
-    locator/ec2_multi_region_snitch.cc
-    locator/ec2_snitch.cc
-    locator/everywhere_replication_strategy.cc
-    locator/gce_snitch.cc
-    locator/gossiping_property_file_snitch.cc
-    locator/local_strategy.cc
-    locator/network_topology_strategy.cc
-    locator/production_snitch_base.cc
-    locator/rack_inferring_snitch.cc
-    locator/simple_snitch.cc
-    locator/simple_strategy.cc
-    locator/snitch_base.cc
-    locator/token_metadata.cc
-    lua.cc
-    main.cc
-    memtable.cc
-    message/messaging_service.cc
-    multishard_mutation_query.cc
-    mutation.cc
-    raft/fsm.cc
-    raft/log.cc
-    raft/progress.cc
-    raft/raft.cc
-    raft/server.cc
-    mutation_fragment.cc
-    mutation_partition.cc
-    mutation_partition_serializer.cc
-    mutation_partition_view.cc
-    mutation_query.cc
-    mutation_reader.cc
-    mutation_writer/multishard_writer.cc
-    mutation_writer/shard_based_splitting_writer.cc
-    mutation_writer/timestamp_based_splitting_writer.cc
-    mutation_writer/feed_writers.cc
-    partition_slice_builder.cc
-    partition_version.cc
-    querier.cc
-    query-result-set.cc
-    query.cc
-    range_tombstone.cc
-    range_tombstone_list.cc
-    reader_concurrency_semaphore.cc
-    redis/abstract_command.cc
-    redis/command_factory.cc
-    redis/commands.cc
-    redis/keyspace_utils.cc
-    redis/lolwut.cc
-    redis/mutation_utils.cc
-    redis/options.cc
-    redis/query_processor.cc
-    redis/query_utils.cc
-    redis/server.cc
-    redis/service.cc
-    redis/stats.cc
-    repair/repair.cc
-    repair/row_level.cc
-    row_cache.cc
-    schema.cc
-    schema_mutations.cc
-    schema_registry.cc
-    service/client_state.cc
-    service/migration_manager.cc
-    service/migration_task.cc
-    service/misc_services.cc
-    service/pager/paging_state.cc
-    service/pager/query_pagers.cc
-    service/paxos/paxos_state.cc
-    service/paxos/prepare_response.cc
-    service/paxos/prepare_summary.cc
-    service/paxos/proposal.cc
-    service/priority_manager.cc
-    service/storage_proxy.cc
-    service/storage_service.cc
-    sstables/compaction.cc
-    sstables/compaction_manager.cc
-    sstables/compaction_strategy.cc
-    sstables/compress.cc
-    sstables/integrity_checked_file_impl.cc
-    sstables/kl/writer.cc
-    sstables/leveled_compaction_strategy.cc
-    sstables/m_format_read_helpers.cc
-    sstables/metadata_collector.cc
-    sstables/mp_row_consumer.cc
-    sstables/mx/writer.cc
-    sstables/partition.cc
-    sstables/prepended_input_stream.cc
-    sstables/random_access_reader.cc
-    sstables/size_tiered_compaction_strategy.cc
-    sstables/sstable_directory.cc
-    sstables/sstable_version.cc
-    sstables/sstables.cc
-    sstables/sstables_manager.cc
-    sstables/time_window_compaction_strategy.cc
-    sstables/writer.cc
-    streaming/progress_info.cc
-    streaming/session_info.cc
-    streaming/stream_coordinator.cc
-    streaming/stream_manager.cc
-    streaming/stream_plan.cc
-    streaming/stream_reason.cc
-    streaming/stream_receive_task.cc
-    streaming/stream_request.cc
-    streaming/stream_result_future.cc
-    streaming/stream_session.cc
-    streaming/stream_session_state.cc
-    streaming/stream_summary.cc
-    streaming/stream_task.cc
-    streaming/stream_transfer_task.cc
-    table.cc
-    table_helper.cc
-    thrift/controller.cc
-    thrift/handler.cc
-    thrift/server.cc
-    thrift/thrift_validation.cc
-    timeout_config.cc
-    tracing/trace_keyspace_helper.cc
-    tracing/trace_state.cc
-    tracing/traced_file.cc
-    tracing/tracing.cc
-    tracing/tracing_backend_registry.cc
-    transport/controller.cc
-    transport/cql_protocol_extension.cc
-    transport/event.cc
-    transport/event_notifier.cc
-    transport/messages/result_message.cc
-    transport/server.cc
-    types.cc
-    unimplemented.cc
-    utils/UUID_gen.cc
-    utils/arch/powerpc/crc32-vpmsum/crc32_wrapper.cc
-    utils/array-search.cc
-    utils/ascii.cc
-    utils/big_decimal.cc
-    utils/bloom_calculations.cc
-    utils/bloom_filter.cc
-    utils/buffer_input_stream.cc
-    utils/build_id.cc
-    utils/config_file.cc
-    utils/directories.cc
-    utils/disk-error-handler.cc
-    utils/dynamic_bitset.cc
-    utils/error_injection.cc
-    utils/exceptions.cc
-    utils/file_lock.cc
-    utils/generation-number.cc
-    utils/gz/crc_combine.cc
-    utils/human_readable.cc
-    utils/i_filter.cc
-    utils/large_bitset.cc
-    utils/like_matcher.cc
-    utils/limiting_data_source.cc
-    utils/logalloc.cc
-    utils/managed_bytes.cc
-    utils/multiprecision_int.cc
-    utils/murmur_hash.cc
-    utils/rate_limiter.cc
-    utils/rjson.cc
-    utils/runtime.cc
-    utils/updateable_value.cc
-    utils/utf8.cc
-    utils/uuid.cc
-    validation.cc
-    vint-serialization.cc
-    zstd.cc
-    release.cc)
-
-set(scylla_gen_sources
-    "${scylla_thrift_gen_cassandra_files}"
-    "${scylla_ragel_gen_protocol_parser_file}"
-    "${swagger_gen_files}"
-    "${idl_gen_files}"
-    "${antlr3_gen_files}")
+set(SCYLLA_SOURCE_FILES
+        ${SCYLLA_ROOT_SOURCE_FILES}
+        ${SCYLLA_GEN_SOURCE_FILES}
+        ${SCYLLA_SUB_SOURCE_FILES})

 add_executable(scylla
-    ${scylla_sources}
-    ${scylla_gen_sources})
+        ${SEASTAR_SOURCE_FILES}
+        ${SCYLLA_SOURCE_FILES})

-target_link_libraries(scylla PRIVATE
-    seastar
-    # Boost dependencies
-    Boost::filesystem
-    Boost::program_options
-    Boost::system
-    Boost::thread
-    Boost::regex
-    Boost::headers
-    # Abseil libs
-    absl::hashtablez_sampler
-    absl::raw_hash_set
-    absl::synchronization
-    absl::graphcycles_internal
-    absl::stacktrace
-    absl::symbolize
-    absl::debugging_internal
-    absl::demangle_internal
-    absl::time
-    absl::time_zone
-    absl::int128
-    absl::city
-    absl::hash
-    absl::malloc_internal
-    absl::spinlock_wait
-    absl::base
-    absl::dynamic_annotations
-    absl::raw_logging_internal
-    absl::exponential_biased
-    absl::throw_delegate
-    # System libs
-    ZLIB::ZLIB
-    ICU::uc
-    systemd
-    zstd
-    snappy
-    ${LUA_LIBRARIES}
-    thrift
-    crypt)
+# If the Seastar pkg-config information is available, append to the default flags.
+#
+# For ease of browsing the source code, we always pretend that DPDK is enabled.
+target_compile_options(scylla PUBLIC
+        -std=gnu++20
+        -DHAVE_DPDK
+        -DHAVE_HWLOC
+        "${SEASTAR_CFLAGS}")

-target_link_libraries(scylla PRIVATE
-    -Wl,--build-id=sha1 # Force SHA1 build-id generation
-    # TODO: Use lld linker if it's available, otherwise gold, else bfd
-    -fuse-ld=lld)
-# TODO: patch dynamic linker to match configure.py behavior
-
-target_compile_options(scylla PRIVATE
-    -std=gnu++20
-    -fcoroutines # TODO: Clang does not have this flag, adjust to both variants
-    ${target_arch_flag})
-# Hacks needed to expose internal APIs for xxhash dependencies
-target_compile_definitions(scylla PRIVATE XXH_PRIVATE_API HAVE_LZ4_COMPRESS_DEFAULT)
-
-target_include_directories(scylla PRIVATE
-    "${CMAKE_CURRENT_SOURCE_DIR}"
-    libdeflate
-    abseil
-    "${scylla_gen_build_dir}")
-
-###
-### Create crc_combine_table helper executable.
-### Use it to generate crc_combine_table.cc to be used in scylla at build time.
-###
-add_executable(crc_combine_table utils/gz/gen_crc_combine_table.cc)
-target_link_libraries(crc_combine_table PRIVATE seastar)
-target_include_directories(crc_combine_table PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}")
-target_compile_options(crc_combine_table PRIVATE
-    -std=gnu++20
-    -fcoroutines
-    ${target_arch_flag})
-add_dependencies(scylla crc_combine_table)
-
-# Generate an additional source file at build time that is needed for Scylla compilation
-add_custom_command(OUTPUT "${scylla_gen_build_dir}/utils/gz/crc_combine_table.cc"
-    COMMAND $<TARGET_FILE:crc_combine_table> > "${scylla_gen_build_dir}/utils/gz/crc_combine_table.cc"
-    DEPENDS crc_combine_table)
-target_sources(scylla PRIVATE "${scylla_gen_build_dir}/utils/gz/crc_combine_table.cc")
-
-###
-### Generate version file and supply appropriate compile definitions for release.cc
-###
-execute_process(COMMAND ${CMAKE_SOURCE_DIR}/SCYLLA-VERSION-GEN RESULT_VARIABLE scylla_version_gen_res)
-if(scylla_version_gen_res)
-    message(SEND_ERROR "Version file generation failed. Return code: ${scylla_version_gen_res}")
-endif()
-
-file(READ build/SCYLLA-VERSION-FILE scylla_version)
-string(STRIP "${scylla_version}" scylla_version)
-
-file(READ build/SCYLLA-RELEASE-FILE scylla_release)
-string(STRIP "${scylla_release}" scylla_release)
-
-get_property(release_cdefs SOURCE "${CMAKE_SOURCE_DIR}/release.cc" PROPERTY COMPILE_DEFINITIONS)
-list(APPEND release_cdefs "SCYLLA_VERSION=\"${scylla_version}\"" "SCYLLA_RELEASE=\"${scylla_release}\"")
-set_source_files_properties("${CMAKE_SOURCE_DIR}/release.cc" PROPERTIES COMPILE_DEFINITIONS "${release_cdefs}")
-
-###
-### Custom command for building libdeflate. Link the library to scylla.
-###
-set(libdeflate_lib "${scylla_build_dir}/libdeflate/libdeflate.a")
-add_custom_command(OUTPUT "${libdeflate_lib}"
-    COMMAND make -C libdeflate
-        BUILD_DIR=../build/${BUILD_TYPE}/libdeflate/
-        CC=${CMAKE_C_COMPILER}
-        "CFLAGS=${target_arch_flag}"
-        ../build/${BUILD_TYPE}/libdeflate//libdeflate.a) # Two backslashes are important!
-# Hack to force generating custom command to produce libdeflate.a
-add_custom_target(libdeflate DEPENDS "${libdeflate_lib}")
-target_link_libraries(scylla PRIVATE "${libdeflate_lib}")
-
-# TODO: create cmake/ directory and move utilities (generate functions etc) there
-# TODO: Build tests if BUILD_TESTING=on (using CTest module)
+# The order matters here: prefer the "static" DPDK directories to any dynamic paths from pkg-config. Some files are only
+# available dynamically, though.
+target_include_directories(scylla PUBLIC
+        .
+        ${SEASTAR_DPDK_INCLUDE_DIRS}
+        ${SEASTAR_INCLUDE_DIRS}
+        ${Boost_INCLUDE_DIRS}
+        xxhash
+        libdeflate
+        abseil
+        build/${BUILD_TYPE}/gen)
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -1,13 +1,11 @@
-# Contributing
-
-## Asking questions or requesting help
+# Asking questions or requesting help

 Use the [ScyllaDB user mailing list](https://groups.google.com/forum/#!forum/scylladb-users) or the [Slack workspace](http://slack.scylladb.com) for general questions and help.

-## Reporting an issue
+# Reporting an issue

 Please use the [Issue Tracker](https://github.com/scylladb/scylla/issues/) to report issues.  Fill in as much information as you can in the issue template, especially for performance problems.

-## Contributing Code to Scylla
+# Contributing Code to Scylla

 To contribute code to Scylla, you need to sign the [Contributor License Agreement](https://www.scylladb.com/open-source/contributor-agreement/) and send your changes as [patches](https://github.com/scylladb/scylla/wiki/Formatting-and-sending-patches) to the [mailing list](https://groups.google.com/forum/#!forum/scylladb-dev). We don't accept pull requests on GitHub.
--- a/README.md
+++ b/README.md
@@ -78,7 +78,10 @@ and the current compatibility of this feature as well as Scylla-specific extensi

 ## Documentation

-Documentation can be found [here](https://scylla.docs.scylladb.com).
+Documentation can be found in [./docs](./docs) and on the
+[wiki](https://github.com/scylladb/scylla/wiki). There is currently no clear
+definition of what goes where, so when looking for something be sure to check
+both.
 Seastar documentation can be found [here](http://docs.seastar.io/master/index.html).
 User documentation can be found [here](https://docs.scylladb.com/).

--- a/2
+++ b/2
@@ -1,7 +1,7 @@
 #!/bin/sh

 PRODUCT=scylla
-VERSION=4.4.9
+VERSION=4.3.7

 if test -f version
 then
--- a/alternator/error.hh
+++ b/alternator/error.hh
@@ -59,9 +59,6 @@ public:
    static api_error invalid_signature(std::string msg) {
        return api_error("InvalidSignatureException", std::move(msg));
    }
-    static api_error missing_authentication_token(std::string msg) {
-        return api_error("MissingAuthenticationTokenException", std::move(msg));
-    }
    static api_error unrecognized_client(std::string msg) {
        return api_error("UnrecognizedClientException", std::move(msg));
    }
--- a/alternator/executor.cc
+++ b/alternator/executor.cc
@@ -55,7 +55,7 @@
 #include "schema.hh"
 #include "alternator/tags_extension.hh"
 #include "alternator/rmw_operation.hh"
-#include <seastar/core/coroutine.hh>
+
 #include <boost/range/adaptors.hpp>

 logging::logger elogger("alternator-executor");
@@ -202,7 +202,7 @@ static schema_ptr get_table(service::storage_proxy& proxy, const rjson::value& r
    if (!schema) {
        // if we get here then the name was missing, since syntax or missing actual CF 
        // checks throw. Slow path, but just call get_table_name to generate exception. 
-        get_table_name(request);
+        get_table_name(request);        
    }
    return schema;
 }
@@ -220,7 +220,7 @@ static std::tuple<bool, std::string_view, std::string_view> try_get_internal_tab
    std::string_view ks_name = table_name.substr(0, delim);
    table_name.remove_prefix(ks_name.size() + 1);
    // Only internal keyspaces can be accessed to avoid leakage
-    if (!is_internal_keyspace(ks_name)) {
+    if (!is_internal_keyspace(sstring(ks_name))) {
        return {false, "", ""};
    }
    return {true, ks_name, table_name};
@@ -404,7 +404,6 @@ future<executor::request_return_type> executor::describe_table(client_state& cli
    // returned.
    rjson::set(table_description, "TableStatus", "ACTIVE");
    rjson::set(table_description, "TableArn", generate_arn_for_table(*schema));
-    rjson::set(table_description, "TableId", rjson::from_string(schema->id().to_sstring()));
    // FIXME: Instead of hardcoding, we should take into account which mode was chosen
    // when the table was created. But, Spark jobs expect something to be returned
    // and PAY_PER_REQUEST seems closer to reality than PROVISIONED.
@@ -476,8 +475,8 @@ future<executor::request_return_type> executor::delete_table(client_state& clien
        return make_ready_future<request_return_type>(api_error::resource_not_found(
                format("Requested resource not found: Table: {} not found", table_name)));
    }
-    return _mm.announce_column_family_drop(keyspace_name, table_name, service::migration_manager::drop_views::yes).then([this, keyspace_name] {
-        return _mm.announce_keyspace_drop(keyspace_name);
+    return _mm.announce_column_family_drop(keyspace_name, table_name, false, service::migration_manager::drop_views::yes).then([this, keyspace_name] {
+        return _mm.announce_keyspace_drop(keyspace_name, false);
    }).then([table_name = std::move(table_name)] {
        // FIXME: need more attributes?
        rjson::value table_description = rjson::empty_object();
@@ -704,48 +703,52 @@ static void update_tags_map(const rjson::value& tags, std::map<sstring, sstring>
 static future<> update_tags(service::migration_manager& mm, schema_ptr schema, std::map<sstring, sstring>&& tags_map) {
    schema_builder builder(schema);
    builder.add_extension(tags_extension::NAME, ::make_shared<tags_extension>(std::move(tags_map)));
-    return mm.announce_column_family_update(builder.build(), false, std::vector<view_ptr>());
+    return mm.announce_column_family_update(builder.build(), false, std::vector<view_ptr>(), false);
 }

 future<executor::request_return_type> executor::tag_resource(client_state& client_state, service_permit permit, rjson::value request) {
    _stats.api_operations.tag_resource++;

-    const rjson::value* arn = rjson::find(request, "ResourceArn");
-    if (!arn || !arn->IsString()) {
-        co_return api_error::access_denied("Incorrect resource identifier");
-    }
-    schema_ptr schema = get_table_from_arn(_proxy, rjson::to_string_view(*arn));
-    std::map<sstring, sstring> tags_map = get_tags_of_table(schema);
-    const rjson::value* tags = rjson::find(request, "Tags");
-    if (!tags || !tags->IsArray()) {
-        co_return api_error::validation("Cannot parse tags");
-    }
-    if (tags->Size() < 1) {
-        co_return api_error::validation("The number of tags must be at least 1") ;
-    }
-    update_tags_map(*tags, tags_map,  update_tags_action::add_tags);
-    co_await update_tags(_mm, schema, std::move(tags_map));
-    co_return json_string("");
+    return seastar::async([this, &client_state, request = std::move(request)] () mutable -> request_return_type {
+        const rjson::value* arn = rjson::find(request, "ResourceArn");
+        if (!arn || !arn->IsString()) {
+            return api_error::access_denied("Incorrect resource identifier");
+        }
+        schema_ptr schema = get_table_from_arn(_proxy, rjson::to_string_view(*arn));
+        std::map<sstring, sstring> tags_map = get_tags_of_table(schema);
+        const rjson::value* tags = rjson::find(request, "Tags");
+        if (!tags || !tags->IsArray()) {
+            return api_error::validation("Cannot parse tags");
+        }
+        if (tags->Size() < 1) {
+            return api_error::validation("The number of tags must be at least 1") ;
+        }
+        update_tags_map(*tags, tags_map,  update_tags_action::add_tags);
+        update_tags(_mm, schema, std::move(tags_map)).get();
+        return json_string("");
+    });
 }

 future<executor::request_return_type> executor::untag_resource(client_state& client_state, service_permit permit, rjson::value request) {
    _stats.api_operations.untag_resource++;

-    const rjson::value* arn = rjson::find(request, "ResourceArn");
-    if (!arn || !arn->IsString()) {
-        co_return api_error::access_denied("Incorrect resource identifier");
-    }
-    const rjson::value* tags = rjson::find(request, "TagKeys");
-    if (!tags || !tags->IsArray()) {
-        co_return api_error::validation(format("Cannot parse tag keys"));
-    }
+    return seastar::async([this, &client_state, request = std::move(request)] () -> request_return_type {
+        const rjson::value* arn = rjson::find(request, "ResourceArn");
+        if (!arn || !arn->IsString()) {
+            return api_error::access_denied("Incorrect resource identifier");
+        }
+        const rjson::value* tags = rjson::find(request, "TagKeys");
+        if (!tags || !tags->IsArray()) {
+            return api_error::validation(format("Cannot parse tag keys"));
+        }

-    schema_ptr schema = get_table_from_arn(_proxy, rjson::to_string_view(*arn));
+        schema_ptr schema = get_table_from_arn(_proxy, rjson::to_string_view(*arn));

-    std::map<sstring, sstring> tags_map = get_tags_of_table(schema);
-    update_tags_map(*tags, tags_map, update_tags_action::delete_tags);
-    co_await update_tags(_mm, schema, std::move(tags_map));
-    co_return json_string("");
+        std::map<sstring, sstring> tags_map = get_tags_of_table(schema);
+        update_tags_map(*tags, tags_map, update_tags_action::delete_tags);
+        update_tags(_mm, schema, std::move(tags_map)).get();
+        return json_string("");
+    });
 }

 future<executor::request_return_type> executor::list_tags_of_resource(client_state& client_state, service_permit permit, rjson::value request) {
@@ -981,7 +984,7 @@ future<executor::request_return_type> executor::create_table(client_state& clien
    return create_keyspace(keyspace_name).handle_exception_type([] (exceptions::already_exists_exception&) {
            // Ignore the fact that the keyspace may already exist. See discussion in #6340
        }).then([this, table_name, request = std::move(request), schema, view_builders = std::move(view_builders), tags_map = std::move(tags_map)] () mutable {
-        return futurize_invoke([&] { return _mm.announce_new_column_family(schema); }).then([this, table_info = std::move(request), schema, view_builders = std::move(view_builders), tags_map = std::move(tags_map)] () mutable {
+        return futurize_invoke([&] { return _mm.announce_new_column_family(schema, false); }).then([this, table_info = std::move(request), schema, view_builders = std::move(view_builders), tags_map = std::move(tags_map)] () mutable {
            return parallel_for_each(std::move(view_builders), [this, schema] (schema_builder builder) {
                return _mm.announce_new_view(view_ptr(builder.build()));
            }).then([this, table_info = std::move(table_info), schema, tags_map = std::move(tags_map)] () mutable {
@@ -1237,16 +1240,10 @@ mutation put_or_delete_item::build(schema_ptr schema, api::timestamp_type ts) co
    return m;
 }

-// The DynamoDB API doesn't let the client control the server's timeout, so
-// we have a global default_timeout() for Alternator requests. The value of
-// default_timeout is overwritten by main.cc based on the
-// "alternator_timeout_in_ms" configuration parameter.
-db::timeout_clock::duration executor::s_default_timeout = 10s;
-void executor::set_default_timeout(db::timeout_clock::duration timeout) {
-    s_default_timeout = timeout;
-}
+// The DynamoDB API doesn't let the client control the server's timeout.
+// Let's pick something reasonable:
 db::timeout_clock::time_point executor::default_timeout() {
-    return db::timeout_clock::now() + s_default_timeout;
+    return db::timeout_clock::now() + 10s;
 }
        
 static future<std::unique_ptr<rjson::value>> get_previous_item(
@@ -1882,182 +1879,18 @@ static std::string get_item_type_string(const rjson::value& v) {
    return it->name.GetString();
 }

-// attrs_to_get saves for each top-level attribute an attrs_to_get_node,
-// a hierarchy of subparts that need to be kept. The following function
-// takes a given JSON value and drops its parts which weren't asked to be
-// kept. It modifies the given JSON value, or returns false to signify that
-// the entire object should be dropped.
-// Note that The JSON value is assumed to be encoded using the DynamoDB
-// conventions - i.e., it is really a map whose key has a type string,
-// and the value is the real object.
-template<typename T>
-static bool hierarchy_filter(rjson::value& val, const attribute_path_map_node<T>& h) {
-    if (!val.IsObject() || val.MemberCount() != 1) {
-        // This shouldn't happen. We shouldn't have stored malformed objects.
-        // But today Alternator does not validate the structure of nested
-        // documents before storing them, so this can happen on read.
-        throw api_error::internal(format("Malformed value object read: {}", val));
-    }
-    const char* type = val.MemberBegin()->name.GetString();
-    rjson::value& v = val.MemberBegin()->value;
-    if (h.has_members()) {
-        const auto& members = h.get_members();
-        if (type[0] != 'M' || !v.IsObject()) {
-            // If v is not an object (dictionary, map), none of the members
-            // can match.
-            return false;
-        }
-        rjson::value newv = rjson::empty_object();
-        for (auto it = v.MemberBegin(); it != v.MemberEnd(); ++it) {
-            std::string attr = it->name.GetString();
-            auto x = members.find(attr);
-            if (x != members.end()) {
-                if (x->second) {
-                    // Only a part of this attribute is to be filtered, do it.
-                    if (hierarchy_filter(it->value, *x->second)) {
-                        rjson::set_with_string_name(newv, attr, std::move(it->value));
-                    }
-                } else {
-                    // The entire attribute is to be kept
-                    rjson::set_with_string_name(newv, attr, std::move(it->value));
-                }
-            }
-        }
-        if (newv.MemberCount() == 0) {
-            return false;
-        }
-        v = newv;
-    } else if (h.has_indexes()) {
-        const auto& indexes = h.get_indexes();
-        if (type[0] != 'L' || !v.IsArray()) {
-            return false;
-        }
-        rjson::value newv = rjson::empty_array();
-        const auto& a = v.GetArray();
-        for (unsigned i = 0; i < v.Size(); i++) {
-            auto x = indexes.find(i);
-            if (x != indexes.end()) {
-                if (x->second) {
-                    if (hierarchy_filter(a[i], *x->second)) {
-                        rjson::push_back(newv, std::move(a[i]));
-                    }
-                } else {
-                    // The entire attribute is to be kept
-                    rjson::push_back(newv, std::move(a[i]));
-                }
-            }
-        }
-        if (newv.Size() == 0) {
-            return false;
-        }
-        v = newv;
-    }
-    return true;
-}
-
-// Add a path to a attribute_path_map. Throws a validation error if the path
-// "overlaps" with one already in the filter (one is a sub-path of the other)
-// or "conflicts" with it (both a member and index is requested).
-template<typename T>
-void attribute_path_map_add(const char* source, attribute_path_map<T>& map, const parsed::path& p, T value = {}) {
-   using node = attribute_path_map_node<T>;
-    // The first step is to look for the top-level attribute (p.root()):
-    auto it = map.find(p.root());
-    if (it == map.end()) {
-        if (p.has_operators()) {
-            it = map.emplace(p.root(), node {std::nullopt}).first;
-        } else {
-            (void) map.emplace(p.root(), node {std::move(value)}).first;
-            // Value inserted for top-level node. We're done.
-            return;
-        }
-    } else if(!p.has_operators()) {
-        // If p is top-level and we already have it or a part of it
-        // in map, it's a forbidden overlapping path.
-        throw api_error::validation(format(
-            "Invalid {}: two document paths overlap at {}", source, p.root()));
-    } else if (it->second.has_value()) {
-        // If we're here, it != map.end() && p.has_operators && it->second.has_value().
-        // This means the top-level attribute already has a value, and we're
-        // trying to add a non-top-level value. It's an overlap.
-        throw api_error::validation(format("Invalid {}: two document paths overlap at {}", source, p.root()));
-    }
-    node* h = &it->second;
-    // The second step is to walk h from the top-level node to the inner node
-    // where we're supposed to insert the value:
-    for (const auto& op : p.operators()) {
-        std::visit(overloaded_functor {
-            [&] (const std::string& member) {
-                if (h->is_empty()) {
-                    *h = node {typename node::members_t()};
-                } else if (h->has_indexes()) {
-                    throw api_error::validation(format("Invalid {}: two document paths conflict at {}", source, p));
-                } else if (h->has_value()) {
-                    throw api_error::validation(format("Invalid {}: two document paths overlap at {}", source, p));
-                }
-                typename node::members_t& members = h->get_members();
-                auto it = members.find(member);
-                if (it == members.end()) {
-                    it = members.insert({member, make_shared<node>()}).first;
-                }
-                h = it->second.get();
-            },
-            [&] (unsigned index) {
-                if (h->is_empty()) {
-                    *h = node {typename node::indexes_t()};
-                } else if (h->has_members()) {
-                    throw api_error::validation(format("Invalid {}: two document paths conflict at {}", source, p));
-                } else if (h->has_value()) {
-                    throw api_error::validation(format("Invalid {}: two document paths overlap at {}", source, p));
-                }
-                typename node::indexes_t& indexes = h->get_indexes();
-                auto it = indexes.find(index);
-                if (it == indexes.end()) {
-                    it = indexes.insert({index, make_shared<node>()}).first;
-                }
-                h = it->second.get();
-            }
-        }, op);
-    }
-    // Finally, insert the value in the node h.
-    if (h->is_empty()) {
-        *h = node {std::move(value)};
-    } else {
-        throw api_error::validation(format("Invalid {}: two document paths overlap at {}", source, p));
-    }
-}
-
-// A very simplified version of the above function for the special case of
-// adding only top-level attribute. It's not only simpler, we also use a
-// different error message, referring to a "duplicate attribute"instead of
-// "overlapping paths". DynamoDB also has this distinction (errors in
-// AttributesToGet refer to duplicates, not overlaps, but errors in
-// ProjectionExpression refer to overlap - even if it's an exact duplicate).
-template<typename T>
-void attribute_path_map_add(const char* source, attribute_path_map<T>& map, const std::string& attr, T value = {}) {
-   using node = attribute_path_map_node<T>;
-    auto it = map.find(attr);
-    if (it == map.end()) {
-        map.emplace(attr, node {std::move(value)});
-    } else {
-        throw api_error::validation(format(
-            "Invalid {}: Duplicate attribute: {}", source, attr));
-    }
-}
-
 // calculate_attrs_to_get() takes either AttributesToGet or
 // ProjectionExpression parameters (having both is *not* allowed),
 // and returns the list of cells we need to read, or an empty set when
 // *all* attributes are to be returned.
-// However, in our current implementation, only top-level attributes are
-// stored as separate cells - a nested document is stored serialized together
-// (as JSON) in the same cell. So this function return a map - each key is the
-// top-level attribute we will need need to read, and the value for each
-// top-level attribute is the partial hierarchy (struct hierarchy_filter)
-// that we will need to extract from that serialized JSON.
-// For example, if ProjectionExpression lists a.b and a.c[2], we
-// return one top-level attribute name, "a", with the value "{b, c[2]}".
-static attrs_to_get calculate_attrs_to_get(const rjson::value& req, std::unordered_set<std::string>& used_attribute_names) {
+// In our current implementation, only top-level attributes are stored
+// as cells, and nested documents are stored serialized as JSON.
+// So this function currently returns only the the top-level attributes
+// but we also need to add, after the query, filtering to keep only
+// the parts of the JSON attributes that were chosen in the paths'
+// operators. Because we don't have such filtering yet (FIXME), we fail here
+// if the requested paths are anything but top-level attributes.
+std::unordered_set<std::string> calculate_attrs_to_get(const rjson::value& req, std::unordered_set<std::string>& used_attribute_names) {
    const bool has_attributes_to_get = req.HasMember("AttributesToGet");
    const bool has_projection_expression = req.HasMember("ProjectionExpression");
    if (has_attributes_to_get && has_projection_expression) {
@@ -2066,9 +1899,9 @@ static attrs_to_get calculate_attrs_to_get(const rjson::value& req, std::unorder
    }
    if (has_attributes_to_get) {
        const rjson::value& attributes_to_get = req["AttributesToGet"];
-        attrs_to_get ret;
+        std::unordered_set<std::string> ret;
        for (auto it = attributes_to_get.Begin(); it != attributes_to_get.End(); ++it) {
-            attribute_path_map_add("AttributesToGet", ret, it->GetString());
+            ret.insert(it->GetString());
        }
        return ret;
    } else if (has_projection_expression) {
@@ -2081,13 +1914,24 @@ static attrs_to_get calculate_attrs_to_get(const rjson::value& req, std::unorder
            throw api_error::validation(e.what());
        }
        resolve_projection_expression(paths_to_get, expression_attribute_names, used_attribute_names);
-        attrs_to_get ret;
-        for (const parsed::path& p : paths_to_get) {
-            attribute_path_map_add("ProjectionExpression", ret, p);
-        }
+        std::unordered_set<std::string> seen_column_names;
+        auto ret = boost::copy_range<std::unordered_set<std::string>>(paths_to_get |
+            boost::adaptors::transformed([&] (const parsed::path& p) {
+                if (p.has_operators()) {
+                    // FIXME: this check will need to change when we support non-toplevel attributes
+                    throw api_error::validation("Non-toplevel attributes in ProjectionExpression not yet implemented");
+                }
+                if (!seen_column_names.insert(p.root()).second) {
+                    // FIXME: this check will need to change when we support non-toplevel attributes
+                    throw api_error::validation(
+                            format("Invalid ProjectionExpression: two document paths overlap with each other: {} and {}.",
+                                    p.root(), p.root()));
+                }
+                return p.root();
+            }));
        return ret;
    }
-    // An empty map asks to read everything
+    // An empty set asks to read everything
    return {};
 }

@@ -2108,7 +1952,7 @@ static attrs_to_get calculate_attrs_to_get(const rjson::value& req, std::unorder
 */ 
 void executor::describe_single_item(const cql3::selection::selection& selection,
    const std::vector<bytes_opt>& result_row,
-    const attrs_to_get& attrs_to_get,
+    const std::unordered_set<std::string>& attrs_to_get,
    rjson::value& item,
    bool include_all_embedded_attributes) 
 {
@@ -2129,16 +1973,7 @@ void executor::describe_single_item(const cql3::selection::selection& selection,
                std::string attr_name = value_cast<sstring>(entry.first);
                if (include_all_embedded_attributes || attrs_to_get.empty() || attrs_to_get.contains(attr_name)) {
                    bytes value = value_cast<bytes>(entry.second);
-                    rjson::value v = deserialize_item(value);
-                    auto it = attrs_to_get.find(attr_name);
-                    if (it != attrs_to_get.end()) {
-                        // attrs_to_get may have asked for only part of this attribute:
-                        if (hierarchy_filter(v, it->second)) {
-                            rjson::set_with_string_name(item, attr_name, std::move(v));
-                        }
-                    } else {
-                        rjson::set_with_string_name(item, attr_name, std::move(v));
-                    }
+                    rjson::set_with_string_name(item, attr_name, deserialize_item(value));
                }
            }
        }
@@ -2150,7 +1985,7 @@ std::optional<rjson::value> executor::describe_single_item(schema_ptr schema,
        const query::partition_slice& slice,
        const cql3::selection::selection& selection,
        const query::result& query_result,
-        const attrs_to_get& attrs_to_get) {
+        const std::unordered_set<std::string>& attrs_to_get) {
    rjson::value item = rjson::empty_object();

    cql3::selection::result_set_builder builder(selection, gc_clock::now(), cql_serialization_format::latest());
@@ -2186,16 +2021,8 @@ static bool check_needs_read_before_write(const parsed::value& v) {
    }, v._value);
 }

-static bool check_needs_read_before_write(const attribute_path_map<parsed::update_expression::action>& update_expression) {
-    return boost::algorithm::any_of(update_expression, [](const auto& p) {
-        if (!p.second.has_value()) {
-            // If the action is not on the top-level attribute, we need to
-            // read the old item: we change only a part of the top-level
-            // attribute, and write the full top-level attribute back.
-            return true;
-        }
-        // Otherwise, the action p.second.get_value() is just on top-level
-        // attribute. Check if it needs read-before-write:
+static bool check_needs_read_before_write(const parsed::update_expression& update_expression) {
+    return boost::algorithm::any_of(update_expression.actions(), [](const parsed::update_expression::action& action) {
        return std::visit(overloaded_functor {
            [&] (const parsed::update_expression::action::set& a) -> bool {
                return check_needs_read_before_write(a._rhs._v1) || (a._rhs._op != 'v' && check_needs_read_before_write(a._rhs._v2));
@@ -2209,7 +2036,7 @@ static bool check_needs_read_before_write(const attribute_path_map<parsed::updat
            [&] (const parsed::update_expression::action::del& a) -> bool {
                return true;
            }
-        }, p.second.get_value()._action);
+        }, action._action);
    });
 }

@@ -2218,11 +2045,7 @@ public:
    // Some information parsed during the constructor to check for input
    // errors, and cached to be used again during apply().
    rjson::value* _attribute_updates;
-    // Instead of keeping a parsed::update_expression with an unsorted list
-    // list of actions, we keep them in an attribute_path_map which groups
-    // them by top-level attribute, and detects forbidden overlaps/conflicts.
-    attribute_path_map<parsed::update_expression::action> _update_expression;
-
+    parsed::update_expression _update_expression;
    parsed::condition_expression _condition_expression;

    update_item_operation(service::storage_proxy& proxy, rjson::value&& request);
@@ -2253,22 +2076,16 @@ update_item_operation::update_item_operation(service::storage_proxy& proxy, rjso
            throw api_error::validation("UpdateExpression must be a string");
        }
        try {
-            parsed::update_expression expr = parse_update_expression(update_expression->GetString());
-            resolve_update_expression(expr,
+            _update_expression = parse_update_expression(update_expression->GetString());
+            resolve_update_expression(_update_expression,
                    expression_attribute_names, expression_attribute_values,
                    used_attribute_names, used_attribute_values);
-            if (expr.empty()) {
-                throw api_error::validation("Empty expression in UpdateExpression is not allowed");
-            }
-            for (auto& action : expr.actions()) {
-                // Unfortunately we need to copy the action's path, because
-                // we std::move the action object.
-                auto p = action._path;
-                attribute_path_map_add("UpdateExpression", _update_expression, p, std::move(action));
-            }
        } catch(expressions_syntax_error& e) {
            throw api_error::validation(e.what());
        }
+        if (_update_expression.empty()) {
+            throw api_error::validation("Empty expression in UpdateExpression is not allowed");
+        }
    }
    _attribute_updates = rjson::find(_request, "AttributeUpdates");
    if (_attribute_updates) {
@@ -2310,187 +2127,6 @@ update_item_operation::needs_read_before_write() const {
           (_returnvalues != returnvalues::NONE && _returnvalues != returnvalues::UPDATED_NEW);
 }

-// action_result() returns the result of applying an UpdateItem action -
-// this result is either a JSON object or an unset optional which indicates
-// the action was a deletion. The caller (update_item_operation::apply()
-// below) will either write this JSON as the content of a column, or
-// use it as a piece in a bigger top-level attribute.
-static std::optional<rjson::value> action_result(
-        const parsed::update_expression::action& action,
-        const rjson::value* previous_item) {
-    return std::visit(overloaded_functor {
-        [&] (const parsed::update_expression::action::set& a) -> std::optional<rjson::value> {
-            return calculate_value(a._rhs, previous_item);
-        },
-        [&] (const parsed::update_expression::action::remove& a) -> std::optional<rjson::value> {
-            return std::nullopt;
-        },
-        [&] (const parsed::update_expression::action::add& a) -> std::optional<rjson::value> {
-            parsed::value base;
-            parsed::value addition;
-            base.set_path(action._path);
-            addition.set_constant(a._valref);
-            rjson::value v1 = calculate_value(base, calculate_value_caller::UpdateExpression, previous_item);
-            rjson::value v2 = calculate_value(addition, calculate_value_caller::UpdateExpression, previous_item);
-            rjson::value result;
-            // An ADD can be used to create a new attribute (when
-            // v1.IsNull()) or to add to a pre-existing attribute:
-            if (v1.IsNull()) {
-                std::string v2_type = get_item_type_string(v2);
-                if (v2_type == "N" || v2_type == "SS" || v2_type == "NS" || v2_type == "BS") {
-                    result = v2;
-                } else {
-                    throw api_error::validation(format("An operand in the update expression has an incorrect data type: {}", v2));
-                }
-            } else {
-                std::string v1_type = get_item_type_string(v1);
-                if (v1_type == "N") {
-                    if (get_item_type_string(v2) != "N") {
-                        throw api_error::validation(format("Incorrect operand type for operator or function. Expected {}: {}", v1_type, rjson::print(v2)));
-                    }
-                    result = number_add(v1, v2);
-                } else if (v1_type == "SS" || v1_type == "NS" || v1_type == "BS") {
-                    if (get_item_type_string(v2) != v1_type) {
-                        throw api_error::validation(format("Incorrect operand type for operator or function. Expected {}: {}", v1_type, rjson::print(v2)));
-                    }
-                    result = set_sum(v1, v2);
-                } else {
-                    throw api_error::validation(format("An operand in the update expression has an incorrect data type: {}", v1));
-                }
-            }
-            return result;
-        },
-        [&] (const parsed::update_expression::action::del& a) -> std::optional<rjson::value> {
-            parsed::value base;
-            parsed::value subset;
-            base.set_path(action._path);
-            subset.set_constant(a._valref);
-            rjson::value v1 = calculate_value(base, calculate_value_caller::UpdateExpression, previous_item);
-            rjson::value v2 = calculate_value(subset, calculate_value_caller::UpdateExpression, previous_item);
-            if (!v1.IsNull()) {
-                return set_diff(v1, v2);
-            }
-            // When we return nullopt here, we ask to *delete* this attribute,
-            // which is unnecessary because we know the attribute does not
-            // exist anyway. This is a waste, but a small one. Note that also
-            // for the "remove" action above we don't bother to check if the
-            // previous_item add anything to remove.
-            return std::nullopt;
-        }
-    }, action._action);
-}
-
-// Print an attribute_path_map_node<action> as the list of paths it contains:
-static std::ostream& operator<<(std::ostream& out, const attribute_path_map_node<parsed::update_expression::action>& h) {
-    if (h.has_value()) {
-        out << " " << h.get_value()._path;
-    } else if (h.has_members()) {
-        for (auto& member : h.get_members()) {
-            out << *member.second;
-        }
-    } else if (h.has_indexes()) {
-        for (auto& index : h.get_indexes()) {
-            out << *index.second;
-        }
-    }
-    return out;
-}
-
-// Apply the hierarchy of actions in an attribute_path_map_node<action> to a
-// JSON object which uses DynamoDB's serialization conventions. The complete,
-// unmodified, previous_item is also necessary for the right-hand sides of the
-// actions. Modifies obj in-place or returns false if it is to be removed.
-static bool hierarchy_actions(
-        rjson::value& obj,
-        const attribute_path_map_node<parsed::update_expression::action>& h,
-        const rjson::value* previous_item)
-{
-    if (!obj.IsObject() || obj.MemberCount() != 1) {
-        // This shouldn't happen. We shouldn't have stored malformed objects.
-        // But today Alternator does not validate the structure of nested
-        // documents before storing them, so this can happen on read.
-        throw api_error::validation(format("Malformed value object read: {}", obj));
-    }
-    const char* type = obj.MemberBegin()->name.GetString();
-    rjson::value& v = obj.MemberBegin()->value;
-    if (h.has_value()) {
-        // Action replacing everything in this position in the hierarchy
-        std::optional<rjson::value> newv = action_result(h.get_value(), previous_item);
-        if (newv) {
-            obj = std::move(*newv);
-        } else {
-            return false;
-        }
-    } else if (h.has_members()) {
-        if (type[0] != 'M' || !v.IsObject()) {
-            // A .something on a non-map doesn't work.
-            throw api_error::validation(format("UpdateExpression: document paths not valid for this item:{}", h));
-        }
-        for (const auto& member : h.get_members()) {
-            std::string attr = member.first;
-            const attribute_path_map_node<parsed::update_expression::action>& subh = *member.second;
-            rjson::value *subobj = rjson::find(v, attr);
-            if (subobj) {
-                if (!hierarchy_actions(*subobj, subh, previous_item)) {
-                    rjson::remove_member(v, attr);
-                }
-            } else {
-                // When a.b does not exist, setting a.b itself (i.e.
-                // subh.has_value()) is fine, but setting a.b.c is not.
-                if (subh.has_value()) {
-                    std::optional<rjson::value> newv = action_result(subh.get_value(), previous_item);
-                    if (newv) {
-                        rjson::set_with_string_name(v, attr, std::move(*newv));
-                    } else {
-                        throw api_error::validation(format("Can't remove document path {} - not present in item",
-                            subh.get_value()._path));
-                    }
-                } else {
-                    throw api_error::validation(format("UpdateExpression: document paths not valid for this item:{}", h));
-                }
-            }
-        }
-    } else if (h.has_indexes()) {
-        if (type[0] != 'L' || !v.IsArray()) {
-            // A [i] on a non-list doesn't work.
-            throw api_error::validation(format("UpdateExpression: document paths not valid for this item:{}", h));
-        }
-        unsigned nremoved = 0;
-        for (const auto& index : h.get_indexes()) {
-            unsigned i = index.first - nremoved;
-            const attribute_path_map_node<parsed::update_expression::action>& subh = *index.second;
-            if (i < v.Size()) {
-                if (!hierarchy_actions(v[i], subh, previous_item)) {
-                    v.Erase(v.Begin() + i);
-                    // If we have the actions "REMOVE a[1] SET a[3] = :val",
-                    // the index 3 refers to the original indexes, before any
-                    // items were removed. So we offset the next indexes
-                    // (which are guaranteed to be higher than i - indexes is
-                    // a sorted map) by an increased "nremoved".
-                    nremoved++;
-                }
-            } else {
-                // If a[7] does not exist, setting a[7] itself (i.e.
-                // subh.has_value()) is fine - and appends an item, though
-                // not necessarily with index 7. But setting a[7].b will
-                // not work.
-                if (subh.has_value()) {
-                    std::optional<rjson::value> newv = action_result(subh.get_value(), previous_item);
-                    if (newv) {
-                        rjson::push_back(v, std::move(*newv));
-                    } else {
-                        // Removing a[7] when the list has fewer elements is
-                        // silently ignored. It's not considered an error.
-                    }
-                } else {
-                    throw api_error::validation(format("UpdateExpression: document paths not valid for this item:{}", h));
-                }
-            }
-        }
-    }
-    return true;
-}
-
 std::optional<mutation>
 update_item_operation::apply(std::unique_ptr<rjson::value> previous_item, api::timestamp_type ts) const {
    if (!verify_expected(_request, previous_item.get()) ||
@@ -2505,37 +2141,17 @@ update_item_operation::apply(std::unique_ptr<rjson::value> previous_item, api::t
    auto& row = m.partition().clustered_row(*_schema, _ck);
    attribute_collector attrs_collector;
    bool any_updates = false;
-    auto do_update = [&] (bytes&& column_name, const rjson::value& json_value,
-                          const attribute_path_map_node<parsed::update_expression::action>* h = nullptr) {
+    auto do_update = [&] (bytes&& column_name, const rjson::value& json_value) {
        any_updates = true;
-        if (_returnvalues == returnvalues::ALL_NEW) {
-            rjson::replace_with_string_name(_return_attributes,
-                to_sstring_view(column_name), rjson::copy(json_value));
-        } else if (_returnvalues == returnvalues::UPDATED_NEW) {
-            rjson::value&& v = rjson::copy(json_value);
-            if (h) {
-                // If the operation was only on specific attribute paths,
-                // leave only them in _return_attributes.
-                if (hierarchy_filter(v, *h)) {
-                    rjson::set_with_string_name(_return_attributes,
-                        to_sstring_view(column_name), std::move(v));
-                }
-            } else {
-                rjson::set_with_string_name(_return_attributes,
-                    to_sstring_view(column_name), std::move(v));
-            }
+        if (_returnvalues == returnvalues::ALL_NEW ||
+            _returnvalues == returnvalues::UPDATED_NEW) {
+            rjson::set_with_string_name(_return_attributes,
+                    to_sstring_view(column_name), rjson::copy(json_value));
        } else if (_returnvalues == returnvalues::UPDATED_OLD && previous_item) {
            std::string_view cn =  to_sstring_view(column_name);
            const rjson::value* col = rjson::find(*previous_item, cn);
            if (col) {
-                rjson::value&& v = rjson::copy(*col);
-                if (h) {
-                    if (hierarchy_filter(v, *h)) {
-                        rjson::set_with_string_name(_return_attributes, cn, std::move(v));
-                    }
-                } else {
-                    rjson::set_with_string_name(_return_attributes, cn, std::move(v));
-                }
+                rjson::set_with_string_name(_return_attributes, cn, rjson::copy(*col));
            }
        }
        const column_definition* cdef = _schema->get_column_definition(column_name);
@@ -2577,7 +2193,7 @@ update_item_operation::apply(std::unique_ptr<rjson::value> previous_item, api::t
    // can just move previous_item later, when we don't need it any more.
    if (_returnvalues == returnvalues::ALL_NEW) {
        if (previous_item) {
-            _return_attributes = rjson::copy(*previous_item);
+            _return_attributes = std::move(*previous_item);
        } else {
            // If there is no previous item, usually a new item is created
            // and contains they given key. This may be cancelled at the end
@@ -2590,44 +2206,88 @@ update_item_operation::apply(std::unique_ptr<rjson::value> previous_item, api::t
    }

    if (!_update_expression.empty()) {
-        for (auto& actions : _update_expression) {
-            // The actions of _update_expression are grouped by top-level
-            // attributes. Here, all actions in actions.second share the same
-            // top-level attribute actions.first.
-            std::string column_name = actions.first;
+        std::unordered_set<std::string> seen_column_names;
+        for (auto& action : _update_expression.actions()) {
+            if (action._path.has_operators()) {
+                // FIXME: implement this case
+                throw api_error::validation("UpdateItem support for nested updates not yet implemented");
+            }
+            std::string column_name = action._path.root();
            const column_definition* cdef = _schema->get_column_definition(to_bytes(column_name));
            if (cdef && cdef->is_primary_key()) {
-                throw api_error::validation(format("UpdateItem cannot update key column {}", column_name));
+                throw api_error::validation(
+                        format("UpdateItem cannot update key column {}", column_name));
            }
-            if (actions.second.has_value()) {
-                // An action on a top-level attribute column_name. The single
-                // action is actions.second.get_value(). We can simply invoke
-                // the action and replace the attribute with its result:
-                std::optional<rjson::value> result = action_result(actions.second.get_value(), previous_item.get());
-                if (result) {
-                    do_update(to_bytes(column_name), *result);
-                } else {
+            // DynamoDB forbids multiple updates in the same expression to
+            // modify overlapping document paths. Updates of one expression
+            // have the same timestamp, so it's unclear which would "win".
+            // FIXME: currently, without full support for document paths,
+            // we only check if the paths' roots are the same.
+            if (!seen_column_names.insert(column_name).second) {
+                throw api_error::validation(
+                        format("Invalid UpdateExpression: two document paths overlap with each other: {} and {}.",
+                                column_name, column_name));
+            }
+            std::visit(overloaded_functor {
+                [&] (const parsed::update_expression::action::set& a) {
+                    auto value = calculate_value(a._rhs, previous_item.get());
+                    do_update(to_bytes(column_name), value);
+                },
+                [&] (const parsed::update_expression::action::remove& a) {
                    do_delete(to_bytes(column_name));
+                },
+                [&] (const parsed::update_expression::action::add& a) {
+                    parsed::value base;
+                    parsed::value addition;
+                    base.set_path(action._path);
+                    addition.set_constant(a._valref);
+                    rjson::value v1 = calculate_value(base, calculate_value_caller::UpdateExpression, previous_item.get());
+                    rjson::value v2 = calculate_value(addition, calculate_value_caller::UpdateExpression, previous_item.get());
+                    rjson::value result;
+                    // An ADD can be used to create a new attribute (when
+                    // v1.IsNull()) or to add to a pre-existing attribute:
+                    if (v1.IsNull()) {
+                        std::string v2_type = get_item_type_string(v2);
+                        if (v2_type == "N" || v2_type == "SS" || v2_type == "NS" || v2_type == "BS") {
+                            result = v2;
+                        } else {
+                            throw api_error::validation(format("An operand in the update expression has an incorrect data type: {}", v2));
+                        }
+                    } else {
+                        std::string v1_type = get_item_type_string(v1);
+                        if (v1_type == "N") {
+                            if (get_item_type_string(v2) != "N") {
+                                throw api_error::validation(format("Incorrect operand type for operator or function. Expected {}: {}", v1_type, rjson::print(v2)));
+                            }
+                            result = number_add(v1, v2);
+                        } else if (v1_type == "SS" || v1_type == "NS" || v1_type == "BS") {
+                            if (get_item_type_string(v2) != v1_type) {
+                                throw api_error::validation(format("Incorrect operand type for operator or function. Expected {}: {}", v1_type, rjson::print(v2)));
+                            }
+                            result = set_sum(v1, v2);
+                        } else {
+                            throw api_error::validation(format("An operand in the update expression has an incorrect data type: {}", v1));
+                        }
+                    }
+                    do_update(to_bytes(column_name), result);
+                },
+                [&] (const parsed::update_expression::action::del& a) {
+                    parsed::value base;
+                    parsed::value subset;
+                    base.set_path(action._path);
+                    subset.set_constant(a._valref);
+                    rjson::value v1 = calculate_value(base, calculate_value_caller::UpdateExpression, previous_item.get());
+                    rjson::value v2 = calculate_value(subset, calculate_value_caller::UpdateExpression, previous_item.get());
+                    if (!v1.IsNull()) {
+                        std::optional<rjson::value> result  = set_diff(v1, v2);
+                        if (result) {
+                            do_update(to_bytes(column_name), *result);
+                        } else {
+                            do_delete(to_bytes(column_name));
+                        }
+                    }
                }
-            } else {
-                // We have actions on a path or more than one path in the same
-                // top-level attribute column_name - but not on the top-level
-                // attribute as a whole. We already read the full top-level
-                // attribute (see check_needs_read_before_write()), and now we
-                // need to modify pieces of it and write back the entire
-                // top-level attribute.
-                if (!previous_item) {
-                    throw api_error::validation(format("UpdateItem cannot update nested document path on non-existent item"));
-                }
-                const rjson::value *toplevel = rjson::find(*previous_item, column_name);
-                if (!toplevel) {
-                    throw api_error::validation(format("UpdateItem cannot update document path: missing attribute {}",
-                        column_name));
-                }
-                rjson::value result = rjson::copy(*toplevel);
-                hierarchy_actions(result, actions.second, previous_item.get());
-                do_update(to_bytes(column_name), std::move(result), &actions.second);
-            }
+            }, action._action);
        }
    }
    if (_returnvalues == returnvalues::ALL_OLD && previous_item) {
@@ -2745,7 +2405,7 @@ static rjson::value describe_item(schema_ptr schema,
        const query::partition_slice& slice,
        const cql3::selection::selection& selection,
        const query::result& query_result,
-        const attrs_to_get& attrs_to_get) {
+        const std::unordered_set<std::string>& attrs_to_get) {
    std::optional<rjson::value> opt_item = executor::describe_single_item(std::move(schema), slice, selection, std::move(query_result), attrs_to_get);
    if (!opt_item) {
        // If there is no matching item, we're supposed to return an empty
@@ -2817,7 +2477,7 @@ future<executor::request_return_type> executor::batch_get_item(client_state& cli
    struct table_requests {
        schema_ptr schema;
        db::consistency_level cl;
-        attrs_to_get attrs_to_get;
+        std::unordered_set<std::string> attrs_to_get;
        struct single_request {
            partition_key pk;
            clustering_key ck;
@@ -2954,9 +2614,6 @@ filter::filter(const rjson::value& request, request_type rt,
        if (expression->GetStringLength() == 0) {
            throw api_error::validation("FilterExpression must not be empty");
        }
-        if (rjson::find(request, "AttributesToGet")) {
-            throw api_error::validation("Cannot use both old-style and new-style parameters in same request: FilterExpression and AttributesToGet");
-        }
        try {
            // FIXME: make parse_condition_expression take string_view, get
            // rid of the silly conversion to std::string.
@@ -2972,9 +2629,6 @@ filter::filter(const rjson::value& request, request_type rt,
        }
    }
    if (conditions) {
-        if (rjson::find(request, "ProjectionExpression")) {
-            throw api_error::validation(format("Cannot use both old-style and new-style parameters in same request: {} and ProjectionExpression", conditions_attribute));
-        }
        bool require_all = conditional_operator != conditional_operator_type::OR;
        _imp = conditions_filter { require_all, rjson::copy(*conditions) };
    }
@@ -3031,7 +2685,7 @@ void filter::for_filters_on(const noncopyable_function<void(std::string_view)>&
 class describe_items_visitor {
    typedef std::vector<const column_definition*> columns_t;
    const columns_t& _columns;
-    const attrs_to_get& _attrs_to_get;
+    const std::unordered_set<std::string>& _attrs_to_get;
    std::unordered_set<std::string> _extra_filter_attrs;
    const filter& _filter;
    typename columns_t::const_iterator _column_it;
@@ -3040,7 +2694,7 @@ class describe_items_visitor {
    size_t _scanned_count;

 public:
-    describe_items_visitor(const columns_t& columns, const attrs_to_get& attrs_to_get, filter& filter)
+    describe_items_visitor(const columns_t& columns, const std::unordered_set<std::string>& attrs_to_get, filter& filter)
            : _columns(columns)
            , _attrs_to_get(attrs_to_get)
            , _filter(filter)
@@ -3089,12 +2743,6 @@ public:
                    std::string attr_name = value_cast<sstring>(entry.first);
                    if (_attrs_to_get.empty() || _attrs_to_get.contains(attr_name) || _extra_filter_attrs.contains(attr_name)) {
                        bytes value = value_cast<bytes>(entry.second);
-                        // Even if _attrs_to_get asked to keep only a part of a
-                        // top-level attribute, we keep the entire attribute
-                        // at this stage, because the item filter might still
-                        // need the other parts (it was easier for us to keep
-                        // extra_filter_attrs at top-level granularity). We'll
-                        // filter the unneeded parts after item filtering.
                        rjson::set_with_string_name(_item, attr_name, deserialize_item(value));
                    }
                }
@@ -3105,24 +2753,11 @@ public:

    void end_row() {
        if (_filter.check(_item)) {
-            // As noted above, we kept entire top-level attributes listed in
-            // _attrs_to_get. We may need to only keep parts of them.
-            for (const auto& attr: _attrs_to_get) {
-                // If !attr.has_value() it means we were asked not to keep
-                // attr entirely, but just parts of it.
-                if (!attr.second.has_value()) {
-                    rjson::value* toplevel= rjson::find(_item, attr.first);
-                    if (toplevel && !hierarchy_filter(*toplevel, attr.second)) {
-                        rjson::remove_member(_item, attr.first);
-                    }
-                }
-            }
            // Remove the extra attributes _extra_filter_attrs which we had
            // to add just for the filter, and not requested to be returned:
            for (const auto& attr : _extra_filter_attrs) {
                rjson::remove_member(_item, attr);
            }
-
            rjson::push_back(_items, std::move(_item));
        }
        _item = rjson::empty_object();
@@ -3138,7 +2773,7 @@ public:
    }
 };

-static rjson::value describe_items(schema_ptr schema, const query::partition_slice& slice, const cql3::selection::selection& selection, std::unique_ptr<cql3::result_set> result_set, attrs_to_get&& attrs_to_get, filter&& filter) {
+static rjson::value describe_items(schema_ptr schema, const query::partition_slice& slice, const cql3::selection::selection& selection, std::unique_ptr<cql3::result_set> result_set, std::unordered_set<std::string>&& attrs_to_get, filter&& filter) {
    describe_items_visitor visitor(selection.get_columns(), attrs_to_get, filter);
    result_set->visit(visitor);
    auto scanned_count = visitor.get_scanned_count();
@@ -3179,7 +2814,7 @@ static future<executor::request_return_type> do_query(service::storage_proxy& pr
        const rjson::value* exclusive_start_key,
        dht::partition_range_vector&& partition_ranges,
        std::vector<query::clustering_range>&& ck_bounds,
-        attrs_to_get&& attrs_to_get,
+        std::unordered_set<std::string>&& attrs_to_get,
        uint32_t limit,
        db::consistency_level cl,
        filter&& filter,
@@ -3219,7 +2854,7 @@ static future<executor::request_return_type> do_query(service::storage_proxy& pr
    auto p = service::pager::query_pagers::pager(schema, selection, *query_state_ptr, *query_options, command, std::move(partition_ranges), nullptr);

    return p->fetch_page(limit, gc_clock::now(), executor::default_timeout()).then(
-            [p = std::move(p), schema, cql_stats, partition_slice = std::move(partition_slice),
+            [p, schema, cql_stats, partition_slice = std::move(partition_slice),
             selection = std::move(selection), query_state_ptr = std::move(query_state_ptr),
             attrs_to_get = std::move(attrs_to_get),
             query_options = std::move(query_options),
@@ -3905,7 +3540,7 @@ future<> executor::create_keyspace(std::string_view keyspace_name) {
        }
        auto opts = get_network_topology_options(rf);
        auto ksm = keyspace_metadata::new_keyspace(keyspace_name_str, "org.apache.cassandra.locator.NetworkTopologyStrategy", std::move(opts), true);
-        return _mm.announce_new_keyspace(ksm, api::new_timestamp());
+        return _mm.announce_new_keyspace(ksm, api::new_timestamp(), false);
    });
 }

--- a/alternator/executor.hh
+++ b/alternator/executor.hh
@@ -70,76 +70,6 @@ public:
    std::string to_json() const override;
 };

-namespace parsed {
-class path;
-};
-
-// An attribute_path_map object is used to hold data for various attributes
-// paths (parsed::path) in a hierarchy of attribute paths. Each attribute path
-// has a root attribute, and then modified by member and index operators -
-// for example in "a.b[2].c" we have "a" as the root, then ".b" member, then
-// "[2]" index, and finally ".c" member.
-// Data can be added to an attribute_path_map using the add() function, but
-// requires that attributes with data not be *overlapping* or *conflicting*:
-//
-// 1. Two attribute paths which are identical or an ancestor of one another
-//    are considered *overlapping* and not allowed. If a.b.c has data,
-//    we can't add more data in a.b.c or any of its descendants like a.b.c.d.
-//
-// 2. Two attribute paths which need the same parent to have both a member and
-//    an index are considered *conflicting* and not allowed. E.g., if a.b has
-//    data, you can't add a[1]. The meaning of adding both would be that the
-//    attribute a is both a map and an array, which isn't sensible.
-//
-// These two requirements are common to the two places where Alternator uses
-// this abstraction to describe how a hierarchical item is to be transformed:
-//
-// 1. In ProjectExpression: for filtering from a full top-level attribute
-//    only the parts for which user asked in ProjectionExpression.
-//
-// 2. In UpdateExpression: for taking the previous value of a top-level
-//    attribute, and modifying it based on the instructions in the user
-//    wrote in UpdateExpression.
-
-template<typename T>
-class attribute_path_map_node {
-public:
-    using data_t = T;
-    // We need the extra shared_ptr<> here because libstdc++ unordered_map
-    // doesn't work with incomplete types :-( We couldn't use lw_shared_ptr<>
-    // because it doesn't work for incomplete types either. We couldn't use
-    // std::unique_ptr<> because it makes the entire object uncopyable. We
-    // don't often need to copy such a map, but we do have some code that
-    // copies an attrs_to_get object, and is hard to find and remove.
-    // The shared_ptr should never be null.
-    using members_t =  std::unordered_map<std::string, seastar::shared_ptr<attribute_path_map_node<T>>>;
-    // The indexes list is sorted because DynamoDB requires handling writes
-    // beyond the end of a list in index order.
-    using indexes_t = std::map<unsigned, seastar::shared_ptr<attribute_path_map_node<T>>>;
-    // The prohibition on "overlap" and "conflict" explained above means
-    // That only one of data, members or indexes is non-empty.
-    std::optional<std::variant<data_t, members_t, indexes_t>> _content;
-
-    bool is_empty() const { return !_content; }
-    bool has_value() const { return _content && std::holds_alternative<data_t>(*_content); }
-    bool has_members() const { return _content && std::holds_alternative<members_t>(*_content); }
-    bool has_indexes() const { return _content && std::holds_alternative<indexes_t>(*_content); }
-    // get_members() assumes that has_members() is true
-    members_t& get_members() { return std::get<members_t>(*_content); }
-    const members_t& get_members() const { return std::get<members_t>(*_content); }
-    indexes_t& get_indexes() { return std::get<indexes_t>(*_content); }
-    const indexes_t& get_indexes() const { return std::get<indexes_t>(*_content); }
-    T& get_value() { return std::get<T>(*_content); }
-    const T& get_value() const { return std::get<T>(*_content); }
-};
-
-template<typename T>
-using attribute_path_map = std::unordered_map<std::string, attribute_path_map_node<T>>;
-
-using attrs_to_get_node = attribute_path_map_node<std::monostate>;
-using attrs_to_get = attribute_path_map<std::monostate>;
-
-
 class executor : public peering_sharded_service<executor> {
    service::storage_proxy& _proxy;
    service::migration_manager& _mm;
@@ -191,10 +121,6 @@ public:

    static sstring table_name(const schema&);
    static db::timeout_clock::time_point default_timeout();
-    static void set_default_timeout(db::timeout_clock::duration timeout);
-private:
-    static db::timeout_clock::duration s_default_timeout;
-public:
    static schema_ptr find_table(service::storage_proxy&, const rjson::value& request);

 private:
@@ -210,14 +136,16 @@ public:
        const query::partition_slice&,
        const cql3::selection::selection&,
        const query::result&,
-        const attrs_to_get&);
+        const std::unordered_set<std::string>&);

    static void describe_single_item(const cql3::selection::selection&,
        const std::vector<bytes_opt>&,
-        const attrs_to_get&,
+        const std::unordered_set<std::string>&,
        rjson::value&,
        bool = false);

+
+
    void add_stream_options(const rjson::value& stream_spec, schema_builder&) const;
    void supplement_table_info(rjson::value& descr, const schema& schema) const;
    void supplement_table_stream_info(rjson::value& descr, const schema& schema) const;
--- a/alternator/expressions.cc
+++ b/alternator/expressions.cc
@@ -130,27 +130,6 @@ void condition_expression::append(condition_expression&& a, char op) {
    }, _expression);
 }

-void path::check_depth_limit() {
-    if (1 + _operators.size() > depth_limit) {
-        throw expressions_syntax_error(format("Document path exceeded {} nesting levels", depth_limit));
-    }
-}
-
-std::ostream& operator<<(std::ostream& os, const path& p) {
-    os << p.root();
-    for (const auto& op : p.operators()) {
-        std::visit(overloaded_functor {
-            [&] (const std::string& member) {
-                os << '.' << member;
-            },
-            [&] (unsigned index) {
-                os << '[' << index << ']';
-            }
-        }, op);
-    }
-    return os;
-}
-
 } // namespace parsed

 // The following resolve_*() functions resolve references in parsed
@@ -172,9 +151,10 @@ std::ostream& operator<<(std::ostream& os, const path& p) {
 // we need to resolve the expression just once but then use it many times
 // (once for each item to be filtered).

-static std::optional<std::string> resolve_path_component(const std::string& column_name,
+static void resolve_path(parsed::path& p,
        const rjson::value* expression_attribute_names,
        std::unordered_set<std::string>& used_attribute_names) {
+    const std::string& column_name = p.root();
    if (column_name.size() > 0 && column_name.front() == '#') {
        if (!expression_attribute_names) {
            throw api_error::validation(
@@ -186,30 +166,7 @@ static std::optional<std::string> resolve_path_component(const std::string& colu
                    format("ExpressionAttributeNames missing entry '{}' required by expression", column_name));
        }
        used_attribute_names.emplace(column_name);
-        return std::string(rjson::to_string_view(*value));
-    }
-    return std::nullopt;
-}
-
-static void resolve_path(parsed::path& p,
-        const rjson::value* expression_attribute_names,
-        std::unordered_set<std::string>& used_attribute_names) {
-    std::optional<std::string> r = resolve_path_component(p.root(), expression_attribute_names, used_attribute_names);
-    if (r) {
-        p.set_root(std::move(*r));
-    }
-    for (auto& op : p.operators()) {
-        std::visit(overloaded_functor {
-            [&] (std::string& s) {
-                r = resolve_path_component(s, expression_attribute_names, used_attribute_names);
-                if (r) {
-                    s = std::move(*r);
-                }
-            },
-            [&] (unsigned index) {
-                // nothing to resolve
-            }
-        }, op);
+        p.set_root(std::string(rjson::to_string_view(*value)));
    }
 }

@@ -666,55 +623,6 @@ std::unordered_map<std::string_view, function_handler_type*> function_handlers {
    },
 };

-// Given a parsed::path and an item read from the table, extract the value
-// of a certain attribute path, such as "a" or "a.b.c[3]". Returns a null
-// value if the item or the requested attribute does not exist.
-// Note that the item is assumed to be encoded in JSON using DynamoDB
-// conventions - each level of a nested document is a map with one key -
-// a type (e.g., "M" for map) - and its value is the representation of
-// that value.
-static rjson::value extract_path(const rjson::value* item,
-        const parsed::path& p, calculate_value_caller caller) {
-    if (!item) {
-        return rjson::null_value();
-    }
-    const rjson::value* v = rjson::find(*item, p.root());
-    if (!v) {
-        return rjson::null_value();
-    }
-    for (const auto& op : p.operators()) {
-        if (!v->IsObject() || v->MemberCount() != 1) {
-            // This shouldn't happen. We shouldn't have stored malformed
-            // objects. But today Alternator does not validate the structure
-            // of nested documents before storing them, so this can happen on
-            // read.
-            throw api_error::validation(format("{}: malformed item read: {}", *item));
-        }
-        const char* type = v->MemberBegin()->name.GetString();
-        v = &(v->MemberBegin()->value);
-        std::visit(overloaded_functor {
-            [&] (const std::string& member) {
-                if (type[0] == 'M' && v->IsObject()) {
-                    v = rjson::find(*v, member);
-                } else {
-                    v = nullptr;
-                }
-            },
-            [&] (unsigned index) {
-                if (type[0] == 'L' && v->IsArray() && index < v->Size()) {
-                    v = &(v->GetArray()[index]);
-                } else {
-                    v = nullptr;
-                }
-            }
-        }, op);
-        if (!v) {
-            return rjson::null_value();
-        }
-    }
-    return rjson::copy(*v);
-}
-
 // Given a parsed::value, which can refer either to a constant value from
 // ExpressionAttributeValues, to the value of some attribute, or to a function
 // of other values, this function calculates the resulting value.
@@ -732,12 +640,21 @@ rjson::value calculate_value(const parsed::value& v,
            auto function_it = function_handlers.find(std::string_view(f._function_name));
            if (function_it == function_handlers.end()) {
                throw api_error::validation(
-                        format("{}: unknown function '{}' called.", caller, f._function_name));
+                        format("UpdateExpression: unknown function '{}' called.", f._function_name));
            }
            return function_it->second(caller, previous_item, f);
        },
        [&] (const parsed::path& p) -> rjson::value {
-            return extract_path(previous_item, p, caller);
+            if (!previous_item) {
+                return rjson::null_value();
+            }
+            std::string update_path = p.root();
+            if (p.has_operators()) {
+                // FIXME: support this
+                throw api_error::validation("Reading attribute paths not yet implemented");
+            }
+            const rjson::value* previous_value = rjson::find(*previous_item, update_path);
+            return previous_value ? rjson::copy(*previous_value) : rjson::null_value();
        }
    }, v._value);
 }
--- a/alternator/expressions_types.hh
+++ b/alternator/expressions_types.hh
@@ -49,23 +49,15 @@ class path {
    // dot (e.g., ".xyz").
    std::string _root;
    std::vector<std::variant<std::string, unsigned>> _operators;
-    // It is useful to limit the depth of a user-specified path, because is
-    // allows us to use recursive algorithms without worrying about recursion
-    // depth. DynamoDB officially limits the length of paths to 32 components
-    // (including the root) so let's use the same limit.
-    static constexpr unsigned depth_limit = 32;
-    void check_depth_limit();
 public:
    void set_root(std::string root) {
        _root = std::move(root);
    }
    void add_index(unsigned i) {
        _operators.emplace_back(i);
-        check_depth_limit();
    }
    void add_dot(std::string(name)) {
        _operators.emplace_back(std::move(name));
-        check_depth_limit();
    }
    const std::string& root() const {
        return _root;
@@ -73,13 +65,6 @@ public:
    bool has_operators() const {
        return !_operators.empty();
    }
-    const std::vector<std::variant<std::string, unsigned>>& operators() const {
-        return _operators;
-    }
-    std::vector<std::variant<std::string, unsigned>>& operators() {
-        return _operators;
-    }
-    friend std::ostream& operator<<(std::ostream&, const path&);
 };

 // When an expression is first parsed, all constants are references, like
--- a/alternator/server.cc
+++ b/alternator/server.cc
@@ -93,10 +93,6 @@ public:
                 [&] (const json::json_return_type& json_return_value) {
                     slogger.trace("api_handler success case");
                     if (json_return_value._body_writer) {
-                         // Unfortunately, write_body() forces us to choose
-                         // from a fixed and irrelevant list of "mime-types"
-                         // at this point. But we'll override it with the
-                         // one (application/x-amz-json-1.0) below.
                         rep->write_body("json", std::move(json_return_value._body_writer));
                     } else {
                         rep->_content += json_return_value._res;
@@ -109,15 +105,14 @@ public:

             return make_ready_future<std::unique_ptr<reply>>(std::move(rep));
         });
-    }) { }
+    }), _type("json") { }

    api_handler(const api_handler&) = default;
    future<std::unique_ptr<reply>> handle(const sstring& path,
            std::unique_ptr<request> req, std::unique_ptr<reply> rep) override {
        return _f_handle(std::move(req), std::move(rep)).then(
                [this](std::unique_ptr<reply> rep) {
-                    rep->set_mime_type("application/x-amz-json-1.0");
-                    rep->done();
+                    rep->done(_type);
                    return make_ready_future<std::unique_ptr<reply>>(std::move(rep));
                });
    }
@@ -131,6 +126,7 @@ protected:
    }

    future_handler_function _f_handle;
+    sstring _type;
 };

 class gated_handler : public handler_base {
@@ -193,34 +189,27 @@ future<> server::verify_signature(const request& req) {
    }
    auto authorization_it = req._headers.find("Authorization");
    if (authorization_it == req._headers.end()) {
-        throw api_error::missing_authentication_token("Authorization header is mandatory for signature verification");
+        throw api_error::invalid_signature("Authorization header is mandatory for signature verification");
    }
    std::string host = host_it->second;
-    std::string_view authorization_header = authorization_it->second;
-    auto pos = authorization_header.find_first_of(' ');
-    if (pos == std::string_view::npos || authorization_header.substr(0, pos) != "AWS4-HMAC-SHA256") {
-        throw api_error::invalid_signature(format("Authorization header must use AWS4-HMAC-SHA256 algorithm: {}", authorization_header));
-    }
-    authorization_header.remove_prefix(pos+1);
+    std::vector<std::string_view> credentials_raw = split(authorization_it->second, ' ');
    std::string credential;
    std::string user_signature;
    std::string signed_headers_str;
    std::vector<std::string_view> signed_headers;
-    do {
-        // Either one of a comma or space can mark the end of an entry
-        pos = authorization_header.find_first_of(" ,");
-        std::string_view entry = authorization_header.substr(0, pos);
-        if (pos != std::string_view::npos) {
-            authorization_header.remove_prefix(pos + 1);
-        }
-        if (entry.empty()) {
-            continue;
-        }
+    for (std::string_view entry : credentials_raw) {
        std::vector<std::string_view> entry_split = split(entry, '=');
        if (entry_split.size() != 2) {
+            if (entry != "AWS4-HMAC-SHA256") {
+                throw api_error::invalid_signature(format("Only AWS4-HMAC-SHA256 algorithm is supported. Found: {}", entry));
+            }
            continue;
        }
        std::string_view auth_value = entry_split[1];
+        // Commas appear as an additional (quite redundant) delimiter
+        if (auth_value.back() == ',') {
+            auth_value.remove_suffix(1);
+        }
        if (entry_split[0] == "Credential") {
            credential = std::string(auth_value);
        } else if (entry_split[0] == "Signature") {
@@ -230,8 +219,7 @@ future<> server::verify_signature(const request& req) {
            signed_headers = split(auth_value, ';');
            std::sort(signed_headers.begin(), signed_headers.end());
        }
-    } while (pos != std::string_view::npos);
-
+    }
    std::vector<std::string_view> credential_split = split(credential, '/');
    if (credential_split.size() != 5) {
        throw api_error::validation(format("Incorrect credential information format: {}", credential));
@@ -255,8 +243,8 @@ future<> server::verify_signature(const request& req) {
        }
    }

-    auto cache_getter = [&qp = _qp] (std::string username) {
-        return get_key_from_roles(qp, std::move(username));
+    auto cache_getter = [] (std::string username) {
+        return get_key_from_roles(cql3::get_query_processor().local(), std::move(username));
    };
    return _key_cache.get_ptr(user, cache_getter).then([this, &req,
                                                    user = std::move(user),
@@ -340,11 +328,10 @@ void server::set_routes(routes& r) {
 //FIXME: A way to immediately invalidate the cache should be considered,
 // e.g. when the system table which stores the keys is changed.
 // For now, this propagation may take up to 1 minute.
-server::server(executor& exec, cql3::query_processor& qp)
+server::server(executor& exec)
        : _http_server("http-alternator")
        , _https_server("https-alternator")
        , _executor(exec)
-        , _qp(qp)
        , _key_cache(1024, 1min, slogger)
        , _enforce_authorization(false)
        , _enabled_servers{}
--- a/alternator/server.hh
+++ b/alternator/server.hh
@@ -41,7 +41,6 @@ class server {
    http_server _http_server;
    http_server _https_server;
    executor& _executor;
-    cql3::query_processor& _qp;

    key_cache _key_cache;
    bool _enforce_authorization;
@@ -69,7 +68,7 @@ class server {
    json_parser _json_parser;

 public:
-    server(executor& executor, cql3::query_processor& qp);
+    server(executor& executor);

    future<> init(net::inet_address addr, std::optional<uint16_t> port, std::optional<uint16_t> https_port, std::optional<tls::credentials_builder> creds,
            bool enforce_authorization, semaphore* memory_limiter);
--- a/alternator/stats.cc
+++ b/alternator/stats.cc
@@ -38,7 +38,6 @@ stats::stats() : api_operations{} {
 #define OPERATION_LATENCY(name, CamelCaseName) \
                seastar::metrics::make_histogram("op_latency", \
                        seastar::metrics::description("Latency histogram of an operation via Alternator API"), {op(CamelCaseName)}, [this]{return to_metrics_histogram(api_operations.name);}),
-            OPERATION(batch_get_item, "BatchGetItem")
            OPERATION(batch_write_item, "BatchWriteItem")
            OPERATION(create_backup, "CreateBackup")
            OPERATION(create_global_table, "CreateGlobalTable")
--- a/alternator/streams.cc
+++ b/alternator/streams.cc
@@ -290,9 +290,7 @@ struct sequence_number {
 sequence_number::sequence_number(std::string_view v) 
    : uuid([&] {
        using namespace boost::multiprecision;
-        // workaround for weird clang 10 bug when calling constructor with
-        // view directly.
-        uint128_t tmp{std::string(v)};
+        uint128_t tmp{v};
        // see above
        return utils::UUID_gen::get_time_UUID_raw(uint64_t(tmp >> 64), uint64_t(tmp & std::numeric_limits<uint64_t>::max()));
    }())
@@ -477,8 +475,6 @@ future<executor::request_return_type> executor::describe_stream(client_state& cl
            status = "ENABLED";
        }
    } 
-
-    auto ttl = std::chrono::seconds(opts.ttl());
    
    rjson::set(stream_desc, "StreamStatus", rjson::from_string(status));

@@ -498,12 +494,20 @@ future<executor::request_return_type> executor::describe_stream(client_state& cl
    // TODO: label
    // TODO: creation time

-    auto normal_token_owners = _proxy.get_token_metadata_ptr()->count_normal_token_owners();
+    const auto& tm = _proxy.get_token_metadata();
+    // cannot really "resume" query, must iterate all data. because we cannot query neither "time" (pk) > something,
+    // or on expired...
+    // TODO: maybe add secondary index to topology table to enable this?
+    return _sdks.cdc_get_versioned_streams({ tm.count_normal_token_owners() }).then([this, &db, schema, shard_start, limit, ret = std::move(ret), stream_desc = std::move(stream_desc)](std::map<db_clock::time_point, cdc::streams_version> topologies) mutable {

-    // filter out cdc generations older than the table or now() - cdc::ttl (typically dynamodb_streams_max_window - 24h)
-    auto low_ts = std::max(as_timepoint(schema->id()), db_clock::now() - ttl);
+        // filter out cdc generations older than the table or now() - dynamodb_streams_max_window (24h)
+        auto low_ts = std::max(as_timepoint(schema->id()), db_clock::now() - dynamodb_streams_max_window);

-    return _sdks.cdc_get_versioned_streams(low_ts, { normal_token_owners }).then([this, &db, shard_start, limit, ret = std::move(ret), stream_desc = std::move(stream_desc)] (std::map<db_clock::time_point, cdc::streams_version> topologies) mutable {
+        auto i = topologies.lower_bound(low_ts);
+        // need first gen _intersecting_ the timestamp.
+        if (i != topologies.begin()) {
+            i = std::prev(i);
+        }

        auto e = topologies.end();
        auto prev = e;
@@ -511,7 +515,9 @@ future<executor::request_return_type> executor::describe_stream(client_state& cl

        std::optional<shard_id> last;

-        auto i = topologies.begin();
+        // i is now at the youngest generation we include. make a mark of it.
+        auto first = i;
+
        // if we're a paged query, skip to the generation where we left of.
        if (shard_start) {
            i = topologies.find(shard_start->time);
@@ -537,7 +543,7 @@ future<executor::request_return_type> executor::describe_stream(client_state& cl
        };

        // need a prev even if we are skipping stuff
-        if (i != topologies.begin()) {
+        if (i != first) {
            prev = std::prev(i);
        }

@@ -845,18 +851,16 @@ future<executor::request_return_type> executor::get_records(client_state& client
    static const bytes op_column_name = cdc::log_meta_column_name_bytes("operation");
    static const bytes eor_column_name = cdc::log_meta_column_name_bytes("end_of_batch");

-    auto key_names = boost::copy_range<attrs_to_get>(
+    auto key_names = boost::copy_range<std::unordered_set<std::string>>(
        boost::range::join(std::move(base->partition_key_columns()), std::move(base->clustering_key_columns()))
-        | boost::adaptors::transformed([&] (const column_definition& cdef) {
-            return std::make_pair<std::string, attrs_to_get_node>(cdef.name_as_text(), {}); })
+        | boost::adaptors::transformed([&] (const column_definition& cdef) { return cdef.name_as_text(); })
    );
    // Include all base table columns as values (in case pre or post is enabled).
    // This will include attributes not stored in the frozen map column
-    auto attr_names = boost::copy_range<attrs_to_get>(base->regular_columns()
+    auto attr_names = boost::copy_range<std::unordered_set<std::string>>(base->regular_columns()
        // this will include the :attrs column, which we will also force evaluating. 
        // But not having this set empty forces out any cdc columns from actual result 
-        | boost::adaptors::transformed([] (const column_definition& cdef) {
-            return std::make_pair<std::string, attrs_to_get_node>(cdef.name_as_text(), {}); })
+        | boost::adaptors::transformed([] (const column_definition& cdef) { return cdef.name_as_text(); })
    );

    std::vector<const column_definition*> columns;
@@ -879,17 +883,8 @@ future<executor::request_return_type> executor::get_records(client_state& client
    auto partition_slice = query::partition_slice(
        std::move(bounds)
        , {}, std::move(regular_columns), selection->get_query_options());
-
-	auto& opts = base->cdc_options();
-	auto mul = 2; // key-only, allow for delete + insert
-    if (opts.preimage()) {
-        ++mul;
-    }
-    if (opts.postimage()) {
-        ++mul;
-    }
    auto command = ::make_lw_shared<query::read_command>(schema->id(), schema->version(), partition_slice, _proxy.get_max_result_size(partition_slice),
-            query::row_limit(limit * mul));
+            query::row_limit(limit * 4));

    return _proxy.query(schema, std::move(command), std::move(partition_ranges), cl, service::storage_proxy::coordinator_query_options(default_timeout(), std::move(permit), client_state)).then(
            [this, schema, partition_slice = std::move(partition_slice), selection = std::move(selection), start_time = std::move(start_time), limit, key_names = std::move(key_names), attr_names = std::move(attr_names), type, iter, high_ts] (service::storage_proxy::coordinator_query_result qr) mutable {       
@@ -1020,9 +1015,7 @@ future<executor::request_return_type> executor::get_records(client_state& client
        }

        // ugh. figure out if we are and end-of-shard
-        auto normal_token_owners = _proxy.get_token_metadata_ptr()->count_normal_token_owners();
-        
-        return _sdks.cdc_current_generation_timestamp({ normal_token_owners }).then([this, iter, high_ts, start_time, ret = std::move(ret)](db_clock::time_point ts) mutable {
+        return cdc::get_local_streams_timestamp().then([this, iter, high_ts, start_time, ret = std::move(ret)](db_clock::time_point ts) mutable {
            auto& shard = iter.shard;            

            if (shard.time < ts && ts < high_ts) {
--- a/api/api-doc/column_family.json
+++ b/api/api-doc/column_family.json
@@ -2925,10 +2925,6 @@
         "id":"toppartitions_query_results",
         "description":"nodetool toppartitions query results",
         "properties":{
-            "read_cardinality":{
-               "type":"long",
-               "description":"Number of the unique operations in the sample set"
-            },
            "read":{
               "type":"array",
               "items":{
@@ -2936,10 +2932,6 @@
               },
               "description":"Read results"
            },
-            "write_cardinality":{
-               "type":"long",
-               "description":"Number of the unique operations in the sample set"
-            },
            "write":{
               "type":"array",
               "items":{
--- a/api/api-doc/gossiper.json
+++ b/api/api-doc/gossiper.json
@@ -148,30 +148,6 @@
               ]
            }
         ]
-      },
-      {
-         "path":"/gossiper/force_remove_endpoint/{addr}",
-         "operations":[
-            {
-               "method":"POST",
-               "summary":"Force remove an endpoint from gossip",
-               "type":"void",
-               "nickname":"force_remove_endpoint",
-               "produces":[
-                  "application/json"
-               ],
-               "parameters":[
-                  {
-                     "name":"addr",
-                     "description":"The endpoint address",
-                     "required":true,
-                     "allowMultiple":false,
-                     "type":"string",
-                     "paramType":"path"
-                  }
-               ]
-            }
-         ]
      }
   ]
 }
--- a/api/api-doc/storage_proxy.json
+++ b/api/api-doc/storage_proxy.json
@@ -68,7 +68,7 @@
               "summary":"Get the hinted handoff enabled by dc",
               "type":"array",
               "items":{
-                  "type":"array"
+                  "type":"mapper_list"
               },
               "nickname":"get_hinted_handoff_enabled_by_dc",
               "produces":[
--- a/api/api_init.hh
+++ b/api/api_init.hh
@@ -24,7 +24,7 @@
 #include <seastar/http/httpd.hh>

 namespace service { class load_meter; }
-namespace locator { class shared_token_metadata; }
+namespace locator { class token_metadata; }
 namespace cql_transport { class controller; }
 class thrift_controller;
 namespace db { class snapshot_ctl; }
@@ -39,15 +39,13 @@ struct http_context {
    distributed<database>& db;
    distributed<service::storage_proxy>& sp;
    service::load_meter& lmeter;
-    const sharded<locator::shared_token_metadata>& shared_token_metadata;
+    const sharded<locator::token_metadata>& token_metadata;

    http_context(distributed<database>& _db,
            distributed<service::storage_proxy>& _sp,
-            service::load_meter& _lm, const sharded<locator::shared_token_metadata>& _stm)
-            : db(_db), sp(_sp), lmeter(_lm), shared_token_metadata(_stm) {
+            service::load_meter& _lm, const sharded<locator::token_metadata>& _tm)
+            : db(_db), sp(_sp), lmeter(_lm), token_metadata(_tm) {
    }
-
-    const locator::token_metadata& get_token_metadata();
 };

 future<> set_server_init(http_context& ctx);
--- a/api/column_family.cc
+++ b/api/column_family.cc
@@ -310,7 +310,7 @@ void set_column_family(http_context& ctx, routes& r) {
        return res;
    });

-    cf::get_column_family.set(r, [&ctx] (std::unique_ptr<request> req){
+    cf::get_column_family.set(r, [&ctx] (const_req req){
            vector<cf::column_family_info> res;
            for (auto i: ctx.db.local().get_column_families_mapping()) {
                cf::column_family_info info;
@@ -319,7 +319,7 @@ void set_column_family(http_context& ctx, routes& r) {
                info.type = "ColumnFamilies";
                res.push_back(info);
            }
-            return make_ready_future<json::json_return_type>(json::stream_object(std::move(res)));
+            return res;
        });

    cf::get_column_family_name_keyspace.set(r, [&ctx] (const_req req){
@@ -991,9 +991,6 @@ void set_column_family(http_context& ctx, routes& r) {
                        apilog.debug("toppartitions query: processing results");
                        cf::toppartitions_query_results results;

-                        results.read_cardinality = topk_results.read.size();
-                        results.write_cardinality = topk_results.write.size();
-
                        for (auto& d: topk_results.read.top(q.list_size())) {
                            cf::toppartitions_record r;
                            r.partition = sstring(d.item);
--- a/api/gossiper.cc
+++ b/api/gossiper.cc
@@ -66,13 +66,6 @@ void set_gossiper(http_context& ctx, routes& r) {
            return make_ready_future<json::json_return_type>(json_void());
        });
    });
-
-    httpd::gossiper_json::force_remove_endpoint.set(r, [](std::unique_ptr<request> req) {
-        gms::inet_address ep(req->param["addr"]);
-        return gms::get_local_gossiper().force_remove_endpoint(ep).then([] {
-            return make_ready_future<json::json_return_type>(json_void());
-        });
-    });
 }

 }
--- a/api/storage_proxy.cc
+++ b/api/storage_proxy.cc
@@ -201,39 +201,29 @@ void set_storage_proxy(http_context& ctx, routes& r) {
    });

    sp::get_hinted_handoff_enabled.set(r, [&ctx](std::unique_ptr<request> req)  {
-        const auto& filter = service::get_storage_proxy().local().get_hints_host_filter();
-        return make_ready_future<json::json_return_type>(!filter.is_disabled_for_all());
+        auto enabled = ctx.db.local().get_config().hinted_handoff_enabled();
+        return make_ready_future<json::json_return_type>(enabled);
    });

    sp::set_hinted_handoff_enabled.set(r, [](std::unique_ptr<request> req)  {
+        //TBD
+        unimplemented();
        auto enable = req->get_query_param("enable");
-        auto filter = (enable == "true" || enable == "1")
-                ? db::hints::host_filter(db::hints::host_filter::enabled_for_all_tag {})
-                : db::hints::host_filter(db::hints::host_filter::disabled_for_all_tag {});
-        return service::get_storage_proxy().invoke_on_all([filter = std::move(filter)] (service::storage_proxy& sp) {
-            return sp.change_hints_host_filter(filter);
-        }).then([] {
-            return make_ready_future<json::json_return_type>(json_void());
-        });
+        return make_ready_future<json::json_return_type>(json_void());
    });

    sp::get_hinted_handoff_enabled_by_dc.set(r, [](std::unique_ptr<request> req)  {
-        std::vector<sstring> res;
-        const auto& filter = service::get_storage_proxy().local().get_hints_host_filter();
-        const auto& dcs = filter.get_dcs();
-        res.reserve(res.size());
-        std::copy(dcs.begin(), dcs.end(), std::back_inserter(res));
+        //TBD
+        unimplemented();
+        std::vector<sp::mapper_list> res;
        return make_ready_future<json::json_return_type>(res);
    });

    sp::set_hinted_handoff_enabled_by_dc_list.set(r, [](std::unique_ptr<request> req)  {
-        auto dcs = req->get_query_param("dcs");
-        auto filter = db::hints::host_filter::parse_from_dc_list(std::move(dcs));
-        return service::get_storage_proxy().invoke_on_all([filter = std::move(filter)] (service::storage_proxy& sp) {
-            return sp.change_hints_host_filter(filter);
-        }).then([] {
-            return make_ready_future<json::json_return_type>(json_void());
-        });
+        //TBD
+        unimplemented();
+        auto enable = req->get_query_param("dcs");
+        return make_ready_future<json::json_return_type>(json_void());
    });

    sp::get_max_hint_window.set(r, [](std::unique_ptr<request> req)  {
--- a/api/storage_service.cc
+++ b/api/storage_service.cc
@@ -22,7 +22,6 @@
 #include "storage_service.hh"
 #include "api/api-doc/storage_service.json.hh"
 #include "db/config.hh"
-#include "db/schema_tables.hh"
 #include <optional>
 #include <time.h>
 #include <boost/range/adaptor/map.hpp>
@@ -45,14 +44,9 @@
 #include "db/snapshot-ctl.hh"
 #include "transport/controller.hh"
 #include "thrift/controller.hh"
-#include "locator/token_metadata.hh"

 namespace api {

-const locator::token_metadata& http_context::get_token_metadata() {
-        return *shared_token_metadata.local().get();
-}
-
 namespace ss = httpd::storage_service_json;
 using namespace json;

@@ -225,7 +219,7 @@ void set_repair(http_context& ctx, routes& r, sharded<netw::messaging_service>&
            try {
                res = fut.get0();
            } catch (std::exception& e) {
-                return make_exception_future<json::json_return_type>(httpd::bad_param_exception(e.what()));
+                return make_exception_future<json::json_return_type>(httpd::server_error_exception(e.what()));
            }
            return make_ready_future<json::json_return_type>(json::json_return_type(res));
        });
@@ -262,14 +256,14 @@ void set_storage_service(http_context& ctx, routes& r) {
    });

    ss::get_tokens.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return make_ready_future<json::json_return_type>(stream_range_as_array(ctx.get_token_metadata().sorted_tokens(), [](const dht::token& i) {
+        return make_ready_future<json::json_return_type>(stream_range_as_array(ctx.token_metadata.local().sorted_tokens(), [](const dht::token& i) {
           return boost::lexical_cast<std::string>(i);
        }));
    });

    ss::get_node_tokens.set(r, [&ctx] (std::unique_ptr<request> req) {
        gms::inet_address addr(req->param["endpoint"]);
-        return make_ready_future<json::json_return_type>(stream_range_as_array(ctx.get_token_metadata().get_tokens(addr), [](const dht::token& i) {
+        return make_ready_future<json::json_return_type>(stream_range_as_array(ctx.token_metadata.local().get_tokens(addr), [](const dht::token& i) {
           return boost::lexical_cast<std::string>(i);
       }));
    });
@@ -288,7 +282,7 @@ void set_storage_service(http_context& ctx, routes& r) {
    });

    ss::get_leaving_nodes.set(r, [&ctx](const_req req) {
-        return container_to_vec(ctx.get_token_metadata().get_leaving_endpoints());
+        return container_to_vec(ctx.token_metadata.local().get_leaving_endpoints());
    });

    ss::get_moving_nodes.set(r, [](const_req req) {
@@ -297,7 +291,7 @@ void set_storage_service(http_context& ctx, routes& r) {
    });

    ss::get_joining_nodes.set(r, [&ctx](const_req req) {
-        auto points = ctx.get_token_metadata().get_bootstrap_tokens();
+        auto points = ctx.token_metadata.local().get_bootstrap_tokens();
        std::unordered_set<sstring> addr;
        for (auto i: points) {
            addr.insert(boost::lexical_cast<std::string>(i.second));
@@ -366,7 +360,7 @@ void set_storage_service(http_context& ctx, routes& r) {

    ss::get_host_id_map.set(r, [&ctx](const_req req) {
        std::vector<ss::mapper> res;
-        return map_to_key_value(ctx.get_token_metadata().get_endpoint_to_host_id_map_for_reading(), res);
+        return map_to_key_value(ctx.token_metadata.local().get_endpoint_to_host_id_map_for_reading(), res);
    });

    ss::get_load.set(r, [&ctx](std::unique_ptr<request> req) {
@@ -738,12 +732,9 @@ void set_storage_service(http_context& ctx, routes& r) {
    });

    ss::reset_local_schema.set(r, [](std::unique_ptr<request> req) {
-        // FIXME: We should truncate schema tables if more than one node in the cluster.
-        auto& sp = service::get_storage_proxy();
-        auto& fs = service::get_local_storage_service().features();
-        return db::schema_tables::recalculate_schema_version(sp, fs).then([] {
-            return make_ready_future<json::json_return_type>(json_void());
-        });
+        //TBD
+        unimplemented();
+        return make_ready_future<json::json_return_type>(json_void());
    });

    ss::set_trace_probability.set(r, [](std::unique_ptr<request> req) {
--- a/auth/common.cc
+++ b/auth/common.cc
@@ -82,7 +82,7 @@ static future<> create_metadata_table_if_missing_impl(
    b.set_uuid(uuid);
    schema_ptr table = b.build();
    return ignore_existing([&mm, table = std::move(table)] () {
-        return mm.announce_new_column_family(table);
+        return mm.announce_new_column_family(table, false);
    });
 }

--- a/auth/service.cc
+++ b/auth/service.cc
@@ -154,7 +154,7 @@ future<> service::create_keyspace_if_missing(::service::migration_manager& mm) c

        // We use min_timestamp so that default keyspace metadata will loose with any manual adjustments.
        // See issue #2129.
-        return mm.announce_new_keyspace(ksm, api::min_timestamp);
+        return mm.announce_new_keyspace(ksm, api::min_timestamp, false);
    }

    return make_ready_future<>();
@@ -371,13 +371,10 @@ bool is_enforcing(const service& ser)  {
    return enforcing_authorizer || enforcing_authenticator;
 }

-bool is_protected(const service& ser, command_desc cmd) noexcept {
-    if (cmd.type_ == command_desc::type::ALTER_WITH_OPTS) {
-        return false; // Table attributes are OK to modify; see #7057.
-    }
-    return ser.underlying_role_manager().protected_resources().contains(cmd.resource)
-            || ser.underlying_authenticator().protected_resources().contains(cmd.resource)
-            || ser.underlying_authorizer().protected_resources().contains(cmd.resource);
+bool is_protected(const service& ser, const resource& r) noexcept {
+    return ser.underlying_role_manager().protected_resources().contains(r)
+            || ser.underlying_authenticator().protected_resources().contains(r)
+            || ser.underlying_authorizer().protected_resources().contains(r);
 }

 static void validate_authentication_options_are_supported(
--- a/auth/service.hh
+++ b/auth/service.hh
@@ -181,21 +181,10 @@ future<permission_set> get_permissions(const service&, const authenticated_user&
 ///
 bool is_enforcing(const service&);

-/// A description of a CQL command from which auth::service can tell whether or not this command could endanger
-/// internal data on which auth::service depends.
-struct command_desc {
-    auth::permission permission; ///< Nature of the command's alteration.
-    const ::auth::resource& resource; ///< Resource impacted by this command.
-    enum class type {
-        ALTER_WITH_OPTS, ///< Command is ALTER ... WITH ...
-        OTHER
-    } type_ = type::OTHER;
-};
-
 ///
 /// Protected resources cannot be modified even if the performer has permissions to do so.
 ///
-bool is_protected(const service&, command_desc) noexcept;
+bool is_protected(const service&, const resource&) noexcept;

 ///
 /// Create a role with optional authentication information.
--- a/bytes.hh
+++ b/bytes.hh
@@ -28,7 +28,6 @@
 #include <iosfwd>
 #include <functional>
 #include "utils/mutable_view.hh"
-#include <xxhash.h>

 using bytes = basic_sstring<int8_t, uint32_t, 31, false>;
 using bytes_view = std::basic_string_view<int8_t>;
@@ -36,10 +35,6 @@ using bytes_mutable_view = basic_mutable_view<bytes_view::value_type>;
 using bytes_opt = std::optional<bytes>;
 using sstring_view = std::string_view;

-inline bytes to_bytes(bytes&& b) {
-    return std::move(b);
-}
-
 inline sstring_view to_sstring_view(bytes_view view) {
    return {reinterpret_cast<const char*>(view.data()), view.size()};
 }
@@ -48,6 +43,17 @@ inline bytes_view to_bytes_view(sstring_view view) {
    return {reinterpret_cast<const int8_t*>(view.data()), view.size()};
 }

+namespace std {
+
+template <>
+struct hash<bytes_view> {
+    size_t operator()(bytes_view v) const {
+        return hash<sstring_view>()({reinterpret_cast<const char*>(v.begin()), v.size()});
+    }
+};
+
+}
+
 struct fmt_hex {
    bytes_view& v;
    fmt_hex(bytes_view& v) noexcept : v(v) {}
@@ -88,30 +94,6 @@ struct appending_hash<bytes_view> {
    }
 };

-struct bytes_view_hasher : public hasher {
-    XXH64_state_t _state;
-    bytes_view_hasher(uint64_t seed = 0) noexcept {
-        XXH64_reset(&_state, seed);
-    }
-    void update(const char* ptr, size_t length) noexcept {
-        XXH64_update(&_state, ptr, length);
-    }
-    size_t finalize() {
-        return static_cast<size_t>(XXH64_digest(&_state));
-    }
-};
-
-namespace std {
-template <>
-struct hash<bytes_view> {
-    size_t operator()(bytes_view v) const {
-        bytes_view_hasher h;
-        appending_hash<bytes_view>{}(h, v);
-        return h.finalize();
-    }
-};
-} // namespace std
-
 inline int32_t compare_unsigned(bytes_view v1, bytes_view v2) {
  auto size = std::min(v1.size(), v2.size());
  if (size) {
--- a/bytes_ostream.hh
+++ b/bytes_ostream.hh
@@ -39,7 +39,7 @@ public:
    using size_type = bytes::size_type;
    using value_type = bytes::value_type;
    using fragment_type = bytes_view;
-    static constexpr size_type max_chunk_size() { return max_alloc_size() - sizeof(chunk); }
+    static constexpr size_type max_chunk_size() { return 128 * 1024; }
 private:
    static_assert(sizeof(value_type) == 1, "value_type is assumed to be one byte long");
    struct chunk {
@@ -59,21 +59,13 @@ private:
        void operator delete(void* ptr) { free(ptr); }
    };
    static constexpr size_type default_chunk_size{512};
-    static constexpr size_type max_alloc_size() { return 128 * 1024; }
 private:
    std::unique_ptr<chunk> _begin;
    chunk* _current;
    size_type _size;
    size_type _initial_chunk_size = default_chunk_size;
 public:
-    class fragment_iterator {
-    public:
-        using iterator_category = std::input_iterator_tag;
-        using value_type = bytes_view;
-        using difference_type = std::ptrdiff_t;
-        using pointer = bytes_view*;
-        using reference = bytes_view&;
-    private:
+    class fragment_iterator : public std::iterator<std::input_iterator_tag, bytes_view> {
        chunk* _current = nullptr;
    public:
        fragment_iterator() = default;
@@ -133,15 +125,16 @@ private:
        return _current->size - _current->offset;
    }
    // Figure out next chunk size.
-    //   - must be enough for data_size + sizeof(chunk)
+    //   - must be enough for data_size
    //   - must be at least _initial_chunk_size
    //   - try to double each time to prevent too many allocations
-    //   - should not exceed max_alloc_size, unless data_size requires so
+    //   - do not exceed max_chunk_size
    size_type next_alloc_size(size_t data_size) const {
        auto next_size = _current
                ? _current->size * 2
                : _initial_chunk_size;
-        next_size = std::min(next_size, max_alloc_size());
+        next_size = std::min(next_size, max_chunk_size());
+        // FIXME: check for overflow?
        return std::max<size_type>(next_size, data_size + sizeof(chunk));
    }
    // Makes room for a contiguous region of given size.
--- a/cache_flat_mutation_reader.hh
+++ b/cache_flat_mutation_reader.hh
@@ -264,9 +264,6 @@ future<> cache_flat_mutation_reader::do_fill_buffer(db::timeout_clock::time_poin
        }
        _state = state::reading_from_underlying;
        _population_range_starts_before_all_rows = _lower_bound.is_before_all_clustered_rows(*_schema);
-        if (!_read_context->partition_exists()) {
-            return read_from_underlying(timeout);
-        }
        auto end = _next_row_in_range ? position_in_partition(_next_row.position())
                                      : position_in_partition(_upper_bound);
        return _underlying->fast_forward_to(position_range{_lower_bound, std::move(end)}, timeout).then([this, timeout] {
@@ -464,7 +461,7 @@ void cache_flat_mutation_reader::maybe_add_to_cache(const clustering_row& cr) {
            cr.cells().prepare_hash(*_schema, column_kind::regular_column);
        }
        auto new_entry = alloc_strategy_unique_ptr<rows_entry>(
-            current_allocator().construct<rows_entry>(*_schema, cr.key(), cr.as_deletable_row()));
+            current_allocator().construct<rows_entry>(*_schema, cr.key(), cr.tomb(), cr.marker(), cr.cells()));
        new_entry->set_continuous(false);
        auto it = _next_row.iterators_valid() ? _next_row.get_iterator_in_latest_version()
                                              : mp.clustered_rows().lower_bound(cr.key(), less);
@@ -511,7 +508,7 @@ void cache_flat_mutation_reader::copy_from_cache_to_buffer() {
        // This guarantees that rts starts after any emitted clustering_row
        // and not before any emitted range tombstone.
        if (!less(_lower_bound, rts.position())) {
-            rts.set_start(_lower_bound);
+            rts.set_start(*_schema, _lower_bound);
        } else {
            _lower_bound = position_in_partition(rts.position());
            _lower_bound_changed = true;
@@ -647,7 +644,7 @@ void cache_flat_mutation_reader::add_to_buffer(range_tombstone&& rt) {
        return;
    }
    if (!less(_lower_bound, rt.position())) {
-        rt.set_start(_lower_bound);
+        rt.set_start(*_schema, _lower_bound);
    } else {
        _lower_bound = position_in_partition(rt.position());
        _lower_bound_changed = true;
--- a/cartesian_product.hh
+++ b/cartesian_product.hh
@@ -33,13 +33,9 @@ template<typename T>
 struct cartesian_product {
    const std::vector<std::vector<T>>& _vec_of_vecs;
 public:
-    class iterator {
+    class iterator : public std::iterator<std::forward_iterator_tag, std::vector<T>> {
    public:
-        using iterator_category = std::forward_iterator_tag;
        using value_type = std::vector<T>;
-        using difference_type = std::ptrdiff_t;
-        using pointer = std::vector<T>*;
-        using reference = std::vector<T>&;
    private:
        size_t _pos;
        const std::vector<std::vector<T>>* _vec_of_vecs;
--- a/cdc/generation.cc
+++ b/cdc/generation.cc
@@ -22,14 +22,11 @@
 #include <boost/type.hpp>
 #include <random>
 #include <unordered_set>
-#include <algorithm>
 #include <seastar/core/sleep.hh>
 #include <algorithm>
-#include <seastar/core/coroutine.hh>

 #include "keys.hh"
 #include "schema_builder.hh"
-#include "database.hh"
 #include "db/config.hh"
 #include "db/system_keyspace.hh"
 #include "db/system_distributed_keyspace.hh"
@@ -40,7 +37,6 @@
 #include "gms/gossiper.hh"

 #include "cdc/generation.hh"
-#include "cdc/cdc_options.hh"

 extern logging::logger cdc_log;

@@ -205,12 +201,12 @@ static std::vector<stream_id> create_stream_ids(
 class topology_description_generator final {
    const db::config& _cfg;
    const std::unordered_set<dht::token>& _bootstrap_tokens;
-    const locator::token_metadata_ptr _tmptr;
+    const locator::token_metadata& _token_metadata;
    const gms::gossiper& _gossiper;

    // Compute a set of tokens that split the token ring into vnodes
    auto get_tokens() const {
-        auto tokens = _tmptr->sorted_tokens();
+        auto tokens = _token_metadata.sorted_tokens();
        auto it = tokens.insert(
                tokens.end(), _bootstrap_tokens.begin(), _bootstrap_tokens.end());
        std::sort(it, tokens.end());
@@ -225,7 +221,7 @@ class topology_description_generator final {
        if (_bootstrap_tokens.contains(end)) {
            return {smp::count, _cfg.murmur3_partitioner_ignore_msb_bits()};
        } else {
-            auto endpoint = _tmptr->get_endpoint(end);
+            auto endpoint = _token_metadata.get_endpoint(end);
            if (!endpoint) {
                throw std::runtime_error(
                        format("Can't find endpoint for token {}", end));
@@ -250,11 +246,11 @@ public:
    topology_description_generator(
            const db::config& cfg,
            const std::unordered_set<dht::token>& bootstrap_tokens,
-            const locator::token_metadata_ptr tmptr,
+            const locator::token_metadata& token_metadata,
            const gms::gossiper& gossiper)
        : _cfg(cfg)
        , _bootstrap_tokens(bootstrap_tokens)
-        , _tmptr(std::move(tmptr))
+        , _token_metadata(token_metadata)
        , _gossiper(gossiper)
    {}

@@ -324,7 +320,7 @@ topology_description limit_number_of_streams_if_needed(topology_description&& de
    }

    size_t limit = std::max(limit_of_streams_in_topology_description(), desc.entries().size());
-    if (limit >= streams_count) {
+    if (limit >= size_t(streams_count)) {
        return std::move(desc);
    }
    size_t streams_per_vnode_limit = limit / desc.entries().size();
@@ -345,13 +341,13 @@ topology_description limit_number_of_streams_if_needed(topology_description&& de
 db_clock::time_point make_new_cdc_generation(
        const db::config& cfg,
        const std::unordered_set<dht::token>& bootstrap_tokens,
-        const locator::token_metadata_ptr tmptr,
+        const locator::token_metadata& tm,
        const gms::gossiper& g,
        db::system_distributed_keyspace& sys_dist_ks,
        std::chrono::milliseconds ring_delay,
-        bool add_delay) {
+        bool for_testing) {
    using namespace std::chrono;
-    auto gen = topology_description_generator(cfg, bootstrap_tokens, tmptr, g).generate();
+    auto gen = topology_description_generator(cfg, bootstrap_tokens, tm, g).generate();

    // If the cluster is large we may end up with a generation that contains
    // large number of streams. This is problematic because we store the
@@ -367,9 +363,9 @@ db_clock::time_point make_new_cdc_generation(

    // Begin the race.
    auto ts = db_clock::now() + (
-            (!add_delay || ring_delay == milliseconds(0)) ? milliseconds(0) : (
+            (for_testing || ring_delay == milliseconds(0)) ? milliseconds(0) : (
                2 * ring_delay + duration_cast<milliseconds>(generation_leeway)));
-    sys_dist_ks.insert_cdc_topology_description(ts, std::move(gen), { tmptr->count_normal_token_owners() }).get();
+    sys_dist_ks.insert_cdc_topology_description(ts, std::move(gen), { tm.count_normal_token_owners() }).get();

    return ts;
 }
@@ -380,23 +376,31 @@ std::optional<db_clock::time_point> get_streams_timestamp_for(const gms::inet_ad
    return gms::versioned_value::cdc_streams_timestamp_from_string(streams_ts_string);
 }

-static future<> do_update_streams_description(
+// Run inside seastar::async context.
+static void do_update_streams_description(
        db_clock::time_point streams_ts,
        db::system_distributed_keyspace& sys_dist_ks,
        db::system_distributed_keyspace::context ctx) {
-    if (co_await sys_dist_ks.cdc_desc_exists(streams_ts, ctx)) {
-        cdc_log.info("Generation {}: streams description table already updated.", streams_ts);
-        co_return;
+    if (sys_dist_ks.cdc_desc_exists(streams_ts, ctx).get0()) {
+        cdc_log.debug("update_streams_description: description of generation {} already inserted", streams_ts);
+        return;
    }

    // We might race with another node also inserting the description, but that's ok. It's an idempotent operation.

-    auto topo = co_await sys_dist_ks.read_cdc_topology_description(streams_ts, ctx);
+    auto topo = sys_dist_ks.read_cdc_topology_description(streams_ts, ctx).get0();
    if (!topo) {
-        throw no_generation_data_exception(streams_ts);
+        throw std::runtime_error(format("could not find streams data for timestamp {}", streams_ts));
    }

-    co_await sys_dist_ks.create_cdc_desc(streams_ts, *topo, ctx);
+    std::set<cdc::stream_id> streams_set;
+    for (auto& entry: topo->entries()) {
+        streams_set.insert(entry.streams.begin(), entry.streams.end());
+    }
+
+    std::vector<cdc::stream_id> streams_vec(streams_set.begin(), streams_set.end());
+
+    sys_dist_ks.create_cdc_desc(streams_ts, streams_vec, ctx).get();
    cdc_log.info("CDC description table successfully updated with generation {}.", streams_ts);
 }

@@ -406,7 +410,7 @@ void update_streams_description(
        noncopyable_function<unsigned()> get_num_token_owners,
        abort_source& abort_src) {
    try {
-        do_update_streams_description(streams_ts, *sys_dist_ks, { get_num_token_owners() }).get();
+        do_update_streams_description(streams_ts, *sys_dist_ks, { get_num_token_owners() });
    } catch(...) {
        cdc_log.warn(
            "Could not update CDC description table with generation {}: {}. Will retry in the background.",
@@ -419,7 +423,7 @@ void update_streams_description(
            while (true) {
                sleep_abortable(std::chrono::seconds(60), abort_src).get();
                try {
-                    do_update_streams_description(streams_ts, *sys_dist_ks, { get_num_token_owners() }).get();
+                    do_update_streams_description(streams_ts, *sys_dist_ks, { get_num_token_owners() });
                    return;
                } catch (...) {
                    cdc_log.warn(
@@ -431,176 +435,4 @@ void update_streams_description(
    }
 }

-static db_clock::time_point as_timepoint(const utils::UUID& uuid) {
-    return db_clock::time_point{std::chrono::milliseconds(utils::UUID_gen::get_adjusted_timestamp(uuid))};
-}
-
-static future<std::vector<db_clock::time_point>> get_cdc_desc_v1_timestamps(
-        db::system_distributed_keyspace& sys_dist_ks,
-        abort_source& abort_src,
-        const noncopyable_function<unsigned()>& get_num_token_owners) {
-    while (true) {
-        try {
-            co_return co_await sys_dist_ks.get_cdc_desc_v1_timestamps({ get_num_token_owners() });
-        } catch (...) {
-            cdc_log.warn(
-                    "Failed to retrieve generation timestamps for rewriting: {}. Retrying in 60s.",
-                    std::current_exception());
-        }
-        co_await sleep_abortable(std::chrono::seconds(60), abort_src);
-    }
-}
-
-// Contains a CDC log table's creation time (extracted from its schema's id)
-// and its CDC TTL setting.
-struct time_and_ttl {
-    db_clock::time_point creation_time;
-    int ttl;
-};
-
-/*
- * See `maybe_rewrite_streams_descriptions`.
- * This is the long-running-in-the-background part of that function.
- * It returns the timestamp of the last rewritten generation (if any).
- */
-static future<std::optional<db_clock::time_point>> rewrite_streams_descriptions(
-        std::vector<time_and_ttl> times_and_ttls,
-        shared_ptr<db::system_distributed_keyspace> sys_dist_ks,
-        noncopyable_function<unsigned()> get_num_token_owners,
-        abort_source& abort_src) {
-    cdc_log.info("Retrieving generation timestamps for rewriting...");
-    auto tss = co_await get_cdc_desc_v1_timestamps(*sys_dist_ks, abort_src, get_num_token_owners);
-    cdc_log.info("Generation timestamps retrieved.");
-
-    // Find first generation timestamp such that some CDC log table may contain data before this timestamp.
-    // This predicate is monotonic w.r.t the timestamps.
-    auto now = db_clock::now();
-    std::sort(tss.begin(), tss.end());
-    auto first = std::partition_point(tss.begin(), tss.end(), [&] (db_clock::time_point ts) {
-        // partition_point finds first element that does *not* satisfy the predicate.
-        return std::none_of(times_and_ttls.begin(), times_and_ttls.end(),
-                [&] (const time_and_ttl& tat) {
-            // In this CDC log table there are no entries older than the table's creation time
-            // or (now - the table's ttl). We subtract 10s to account for some possible clock drift.
-            // If ttl is set to 0 then entries in this table never expire. In that case we look
-            // only at the table's creation time.
-            auto no_entries_older_than =
-                (tat.ttl == 0 ? tat.creation_time : std::max(tat.creation_time, now - std::chrono::seconds(tat.ttl)))
-                    - std::chrono::seconds(10);
-            return no_entries_older_than < ts;
-        });
-    });
-
-    // Find first generation timestamp such that some CDC log table may contain data in this generation.
-    // This and all later generations need to be written to the new streams table.
-    if (first != tss.begin()) {
-        --first;
-    }
-
-    if (first == tss.end()) {
-        cdc_log.info("No generations to rewrite.");
-        co_return std::nullopt;
-    }
-
-    cdc_log.info("First generation to rewrite: {}", *first);
-
-    bool each_success = true;
-    co_await max_concurrent_for_each(first, tss.end(), 10, [&] (db_clock::time_point ts) -> future<> {
-        while (true) {
-            try {
-                co_return co_await do_update_streams_description(ts, *sys_dist_ks, { get_num_token_owners() });
-            } catch (const no_generation_data_exception& e) {
-                cdc_log.error("Failed to rewrite streams for generation {}: {}. Giving up.", ts, e);
-                each_success = false;
-                co_return;
-            } catch (...) {
-                cdc_log.warn("Failed to rewrite streams for generation {}: {}. Retrying in 60s.", ts, std::current_exception());
-            }
-            co_await sleep_abortable(std::chrono::seconds(60), abort_src);
-        }
-    });
-
-    if (each_success) {
-        cdc_log.info("Rewriting stream tables finished successfully.");
-    } else {
-        cdc_log.info("Rewriting stream tables finished, but some generations could not be rewritten (check the logs).");
-    }
-
-    if (first != tss.end()) {
-        co_return *std::prev(tss.end());
-    }
-
-    co_return std::nullopt;
-}
-
-future<> maybe_rewrite_streams_descriptions(
-        const database& db,
-        shared_ptr<db::system_distributed_keyspace> sys_dist_ks,
-        noncopyable_function<unsigned()> get_num_token_owners,
-        abort_source& abort_src) {
-    if (!db.has_schema(sys_dist_ks->NAME, sys_dist_ks->CDC_DESC_V1)) {
-        // This cluster never went through a Scylla version which used this table
-        // or the user deleted the table. Nothing to do.
-        co_return;
-    }
-
-    if (co_await db::system_keyspace::cdc_is_rewritten()) {
-        co_return;
-    }
-
-    if (db.get_config().cdc_dont_rewrite_streams()) {
-        cdc_log.warn("Stream rewriting disabled. Manual administrator intervention may be required...");
-        co_return;
-    }
-
-    // For each CDC log table get the TTL setting (from CDC options) and the table's creation time
-    std::vector<time_and_ttl> times_and_ttls;
-    for (auto& [_, cf] : db.get_column_families()) {
-        auto& s = *cf->schema();
-        auto base = cdc::get_base_table(db, s.ks_name(), s.cf_name());
-        if (!base) {
-            // Not a CDC log table.
-            continue;
-        }
-        auto& cdc_opts = base->cdc_options();
-        if (!cdc_opts.enabled()) {
-            // This table is named like a CDC log table but it's not one.
-            continue;
-        }
-
-        times_and_ttls.push_back(time_and_ttl{as_timepoint(s.id()), cdc_opts.ttl()});
-    }
-
-    if (times_and_ttls.empty()) {
-        // There's no point in rewriting old generations' streams (they don't contain any data).
-        cdc_log.info("No CDC log tables present, not rewriting stream tables.");
-        co_return co_await db::system_keyspace::cdc_set_rewritten(std::nullopt);
-    }
-
-    // It's safe to discard this future: the coroutine keeps system_distributed_keyspace alive
-    // and the abort source's lifetime extends the lifetime of any other service.
-    (void)(([_times_and_ttls = std::move(times_and_ttls), _sys_dist_ks = std::move(sys_dist_ks),
-                _get_num_token_owners = std::move(get_num_token_owners), &_abort_src = abort_src] () mutable -> future<> {
-        auto times_and_ttls = std::move(_times_and_ttls);
-        auto sys_dist_ks = std::move(_sys_dist_ks);
-        auto get_num_token_owners = std::move(_get_num_token_owners);
-        auto& abort_src = _abort_src;
-
-        // This code is racing with node startup. At this point, we're most likely still waiting for gossip to settle
-        // and some nodes that are UP may still be marked as DOWN by us.
-        // Let's sleep a bit to increase the chance that the first attempt at rewriting succeeds (it's still ok if
-        // it doesn't - we'll retry - but it's nice if we succeed without any warnings).
-        co_await sleep_abortable(std::chrono::seconds(10), abort_src);
-
-        cdc_log.info("Rewriting stream tables in the background...");
-        auto last_rewritten = co_await rewrite_streams_descriptions(
-                std::move(times_and_ttls),
-                std::move(sys_dist_ks),
-                std::move(get_num_token_owners),
-                abort_src);
-
-        co_await db::system_keyspace::cdc_set_rewritten(last_rewritten);
-    })());
-}
-
 } // namespace cdc
--- a/cdc/generation.hh
+++ b/cdc/generation.hh
@@ -40,8 +40,6 @@
 #include "database_fwd.hh"
 #include "db_clock.hh"
 #include "dht/token.hh"
-#include "locator/token_metadata.hh"
-#include "utils/chunked_vector.hh"

 namespace seastar {
    class abort_source;
@@ -57,6 +55,10 @@ namespace gms {
    class gossiper;
 } // namespace gms

+namespace locator {
+    class token_metadata;
+} // namespace locator
+
 namespace cdc {

 class stream_id final {
@@ -122,19 +124,14 @@ public:
 */ 
 class streams_version {
 public:
-    utils::chunked_vector<stream_id> streams;
+    std::vector<stream_id> streams;
    db_clock::time_point timestamp;
+    std::optional<db_clock::time_point> expired;

-    streams_version(utils::chunked_vector<stream_id> s, db_clock::time_point ts)
+    streams_version(std::vector<stream_id> s, db_clock::time_point ts, std::optional<db_clock::time_point> exp)
        : streams(std::move(s))
        , timestamp(ts)
-    {}
-};
-
-class no_generation_data_exception : public std::runtime_error {
-public:
-    no_generation_data_exception(db_clock::time_point generation_ts)
-        : std::runtime_error(format("could not find generation data for timestamp {}", generation_ts))
+        , expired(std::move(exp))
    {}
 };

@@ -170,11 +167,11 @@ future<db_clock::time_point> get_local_streams_timestamp();
 db_clock::time_point make_new_cdc_generation(
        const db::config& cfg,
        const std::unordered_set<dht::token>& bootstrap_tokens,
-        const locator::token_metadata_ptr tmptr,
+        const locator::token_metadata& tm,
        const gms::gossiper& g,
        db::system_distributed_keyspace& sys_dist_ks,
        std::chrono::milliseconds ring_delay,
-        bool add_delay);
+        bool for_testing);

 /* Retrieves CDC streams generation timestamp from the given endpoint's application state (broadcasted through gossip).
 * We might be during a rolling upgrade, so the timestamp might not be there (if the other node didn't upgrade yet),
@@ -199,15 +196,4 @@ void update_streams_description(
        noncopyable_function<unsigned()> get_num_token_owners,
        abort_source&);

-/* Part of the upgrade procedure. Useful in case where the version of Scylla that we're upgrading from
- * used the "cdc_streams_descriptions" table. This procedure ensures that the new "cdc_streams_descriptions_v2"
- * table contains streams of all generations that were present in the old table and may still contain data
- * (i.e. there exist CDC log tables that may contain rows with partition keys being the stream IDs from
- * these generations). */
-future<> maybe_rewrite_streams_descriptions(
-        const database&,
-        shared_ptr<db::system_distributed_keyspace>,
-        noncopyable_function<unsigned()> get_num_token_owners,
-        abort_source&);
-
 } // namespace cdc
--- a/cdc/log.cc
+++ b/cdc/log.cc
@@ -220,7 +220,7 @@ public:
            auto new_log_schema = create_log_schema(new_schema, log_schema ? std::make_optional(log_schema->id()) : std::nullopt);

            auto log_mut = log_schema 
-                ? db::schema_tables::make_update_table_mutations(db, keyspace.metadata(), log_schema, new_log_schema, timestamp, false)
+                ? db::schema_tables::make_update_table_mutations(keyspace.metadata(), log_schema, new_log_schema, timestamp, false)
                : db::schema_tables::make_create_table_mutations(keyspace.metadata(), new_log_schema, timestamp)
                ;

@@ -579,6 +579,11 @@ db_context::builder& db_context::builder::with_migration_notifier(service::migra
    return *this;
 }

+db_context::builder& db_context::builder::with_token_metadata(const locator::token_metadata& token_metadata) {
+    _token_metadata = token_metadata;
+    return *this;
+}
+
 db_context::builder& db_context::builder::with_cdc_metadata(cdc::metadata& cdc_metadata) {
    _cdc_metadata = cdc_metadata;
    return *this;
@@ -588,20 +593,14 @@ db_context db_context::builder::build() {
    return db_context{
        _proxy,
        _migration_notifier ? _migration_notifier->get() : service::get_local_storage_service().get_migration_notifier(),
+        _token_metadata ? _token_metadata->get() : service::get_local_storage_service().get_token_metadata(),
        _cdc_metadata ? _cdc_metadata->get() : service::get_local_storage_service().get_cdc_metadata(),
    };
 }

 // iterators for collection merge
 template<typename T>
-class collection_iterator {
-public:
-    using iterator_category = std::input_iterator_tag;
-    using value_type = const T;
-    using difference_type = std::ptrdiff_t;
-    using pointer = const T*;
-    using reference = const T&;
-private:
+class collection_iterator : public std::iterator<std::input_iterator_tag, const T> {
    bytes_view _v, _next;
    size_t _rem = 0;
    T _current;
@@ -709,16 +708,16 @@ private:
       }
       return false;
    }
-    int32_t compare(const T&, const value_type& v);
+    bool compare(const T&, const value_type& v);
 };

 template<>
-int32_t maybe_back_insert_iterator<std::vector<std::pair<bytes_view, bytes_view>>, bytes_view>::compare(const bytes_view& t, const value_type& v) {
+bool maybe_back_insert_iterator<std::vector<std::pair<bytes_view, bytes_view>>, bytes_view>::compare(const bytes_view& t, const value_type& v) {
    return _type.compare(t, v.first);
 }

 template<>
-int32_t maybe_back_insert_iterator<std::vector<bytes_view>, bytes_view>::compare(const bytes_view& t, const value_type& v) {
+bool maybe_back_insert_iterator<std::vector<bytes_view>, bytes_view>::compare(const bytes_view& t, const value_type& v) {
    return _type.compare(t, v);
 }

--- a/cdc/log.hh
+++ b/cdc/log.hh
@@ -100,16 +100,19 @@ public:
 struct db_context final {
    service::storage_proxy& _proxy;
    service::migration_notifier& _migration_notifier;
+    const locator::token_metadata& _token_metadata;
    cdc::metadata& _cdc_metadata;

    class builder final {
        service::storage_proxy& _proxy;
        std::optional<std::reference_wrapper<service::migration_notifier>> _migration_notifier;
+        std::optional<std::reference_wrapper<const locator::token_metadata>> _token_metadata;
        std::optional<std::reference_wrapper<cdc::metadata>> _cdc_metadata;
    public:
        builder(service::storage_proxy& proxy);

        builder& with_migration_notifier(service::migration_notifier& migration_notifier);
+        builder& with_token_metadata(const locator::token_metadata& token_metadata);
        builder& with_cdc_metadata(cdc::metadata&);

        db_context build();
--- a/clustering_bounds_comparator.hh
+++ b/clustering_bounds_comparator.hh
@@ -67,8 +67,8 @@ public:
        int operator()(const clustering_key_prefix& p1, int32_t w1, const clustering_key_prefix& p2, int32_t w2) const {
            auto type = _s.get().clustering_key_prefix_type();
            auto res = prefix_equality_tri_compare(type->types().begin(),
-                type->begin(p1.representation()), type->end(p1.representation()),
-                type->begin(p2.representation()), type->end(p2.representation()),
+                type->begin(p1), type->end(p1),
+                type->begin(p2), type->end(p2),
                ::tri_compare);
            if (res) {
                return res;
--- a/clustering_interval_set.hh
+++ b/clustering_interval_set.hh
@@ -72,14 +72,7 @@ public:
        }
        return result;
    }
-    class position_range_iterator {
-    public:
-        using iterator_category = std::input_iterator_tag;
-        using value_type = const position_range;
-        using difference_type = std::ptrdiff_t;
-        using pointer = const position_range*;
-        using reference = const position_range&;
-    private:
+    class position_range_iterator : public std::iterator<std::input_iterator_tag, const position_range> {
        set_type::iterator _i;
    public:
        position_range_iterator(set_type::iterator i) : _i(i) {}
--- a/collection_mutation.hh
+++ b/collection_mutation.hh
@@ -136,4 +136,4 @@ collection_mutation merge(const abstract_type&, collection_mutation_view, collec
 collection_mutation difference(const abstract_type&, collection_mutation_view, collection_mutation_view);

 // Serializes the given collection of cells to a sequence of bytes ready to be sent over the CQL protocol.
-bytes_ostream serialize_for_cql(const abstract_type&, collection_mutation_view, cql_serialization_format);
+bytes serialize_for_cql(const abstract_type&, collection_mutation_view, cql_serialization_format);
--- a/column_computation.hh
+++ b/column_computation.hh
@@ -54,36 +54,6 @@ public:
    virtual bytes_opt compute_value(const schema& schema, const partition_key& key, const clustering_row& row) const = 0;
 };

-/*
- * Computes token value of partition key and returns it as bytes.
- *
- * Should NOT be used (use token_column_computation), because ordering
- * of bytes is different than ordering of tokens (signed vs unsigned comparison).
- *
- * The type name stored for computations of this class is "token" - this was
- * the original implementation. (now depracated for new tables)
- */
-class legacy_token_column_computation : public column_computation {
-public:
-    virtual column_computation_ptr clone() const override {
-        return std::make_unique<legacy_token_column_computation>(*this);
-    }
-    virtual bytes serialize() const override;
-    virtual bytes_opt compute_value(const schema& schema, const partition_key& key, const clustering_row& row) const override;
-};
-
-
-/*
- * Computes token value of partition key and returns it as long_type.
- * The return type means that it can be trivially sorted (for example
- * if computed column using this computation is a clustering key),
- * preserving the correct order of tokens (using signed comparisons).
- *
- * Please use this class instead of legacy_token_column_computation.
- * 
- * The type name stored for computations of this class is "token_v2".
- * (the name "token" refers to the depracated legacy_token_column_computation)
- */
 class token_column_computation : public column_computation {
 public:
    virtual column_computation_ptr clone() const override {
--- a/compound.hh
+++ b/compound.hh
@@ -73,19 +73,12 @@ private:
     *   <len(value1)><value1><len(value2)><value2>...<len(value_n)><value_n>
     *
     */
-    template<typename RangeOfSerializedComponents, FragmentedMutableView Out>
-    static void serialize_value(RangeOfSerializedComponents&& values, Out out) {
+    template<typename RangeOfSerializedComponents, typename CharOutputIterator>
+    static void serialize_value(RangeOfSerializedComponents&& values, CharOutputIterator& out) {
        for (auto&& val : values) {
            assert(val.size() <= std::numeric_limits<size_type>::max());
            write<size_type>(out, size_type(val.size()));
-            using val_type = std::remove_cvref_t<decltype(val)>;
-            if constexpr (FragmentedView<val_type>) {
-                write_fragmented(out, val);
-            } else if constexpr (std::same_as<val_type, managed_bytes>) {
-                write_fragmented(out, managed_bytes_view(val));
-            } else {
-                write_fragmented(out, single_fragmented_view(val));
-            }
+            out = std::copy(val.begin(), val.end(), out);
        }
    }
    template <typename RangeOfSerializedComponents>
@@ -97,27 +90,25 @@ private:
        return len;
    }
 public:
-    managed_bytes serialize_single(managed_bytes&& v) const {
-        return serialize_value({std::move(v)});
-    }
-    managed_bytes serialize_single(bytes&& v) const {
+    bytes serialize_single(bytes&& v) const {
        return serialize_value({std::move(v)});
    }
    template<typename RangeOfSerializedComponents>
-    static managed_bytes serialize_value(RangeOfSerializedComponents&& values) {
+    static bytes serialize_value(RangeOfSerializedComponents&& values) {
        auto size = serialized_size(values);
        if (size > std::numeric_limits<size_type>::max()) {
            throw std::runtime_error(format("Key size too large: {:d} > {:d}", size, std::numeric_limits<size_type>::max()));
        }
-        managed_bytes b(managed_bytes::initialized_later(), size);
-        serialize_value(values, managed_bytes_mutable_view(b));
+        bytes b(bytes::initialized_later(), size);
+        auto i = b.begin();
+        serialize_value(values, i);
        return b;
    }
    template<typename T>
-    static managed_bytes serialize_value(std::initializer_list<T> values) {
+    static bytes serialize_value(std::initializer_list<T> values) {
        return serialize_value(boost::make_iterator_range(values.begin(), values.end()));
    }
-    managed_bytes serialize_optionals(const std::vector<bytes_opt>& values) const {
+    bytes serialize_optionals(const std::vector<bytes_opt>& values) const {
        return serialize_value(values | boost::adaptors::transformed([] (const bytes_opt& bo) -> bytes_view {
            if (!bo) {
                throw std::logic_error("attempted to create key component from empty optional");
@@ -125,7 +116,7 @@ public:
            return *bo;
        }));
    }
-    managed_bytes serialize_value_deep(const std::vector<data_value>& values) const {
+    bytes serialize_value_deep(const std::vector<data_value>& values) const {
        // TODO: Optimize
        std::vector<bytes> partial;
        partial.reserve(values.size());
@@ -136,26 +127,19 @@ public:
        }
        return serialize_value(partial);
    }
-    managed_bytes decompose_value(const value_type& values) const {
+    bytes decompose_value(const value_type& values) const {
        return serialize_value(values);
    }
-    class iterator {
-    public:
-        using iterator_category = std::input_iterator_tag;
-        using value_type = const managed_bytes_view;
-        using difference_type = std::ptrdiff_t;
-        using pointer = const value_type*;
-        using reference = const value_type&;
+    class iterator : public std::iterator<std::input_iterator_tag, const bytes_view> {
    private:
-        managed_bytes_view _v;
-        managed_bytes_view _current;
-        size_t _remaining = 0;
+        bytes_view _v;
+        bytes_view _current;
    private:
        void read_current() {
-            _remaining = _v.size_bytes();
            size_type len;
            {
                if (_v.empty()) {
+                    _v = bytes_view(nullptr, 0);
                    return;
                }
                len = read_simple<size_type>(_v);
@@ -163,16 +147,15 @@ public:
                    throw_with_backtrace<marshal_exception>(format("compound_type iterator - not enough bytes, expected {:d}, got {:d}", len, _v.size()));
                }
            }
-            _current = _v.prefix(len);
-            _v.remove_prefix(_current.size_bytes());
+            _current = bytes_view(_v.begin(), len);
+            _v.remove_prefix(len);
        }
    public:
        struct end_iterator_tag {};
-        iterator(const managed_bytes_view& v) : _v(v) {
+        iterator(const bytes_view& v) : _v(v) {
            read_current();
        }
-        iterator(end_iterator_tag, const managed_bytes_view& v) : _v() {}
-        iterator() {}
+        iterator(end_iterator_tag, const bytes_view& v) : _v(nullptr, 0) {}
        iterator& operator++() {
            read_current();
            return *this;
@@ -184,40 +167,29 @@ public:
        }
        const value_type& operator*() const { return _current; }
        const value_type* operator->() const { return &_current; }
-        bool operator==(const iterator& i) const { return _remaining == i._remaining; }
+        bool operator!=(const iterator& i) const { return _v.begin() != i._v.begin(); }
+        bool operator==(const iterator& i) const { return _v.begin() == i._v.begin(); }
    };
-    static iterator begin(managed_bytes_view v) {
+    static iterator begin(const bytes_view& v) {
        return iterator(v);
    }
-    static iterator end(managed_bytes_view v) {
+    static iterator end(const bytes_view& v) {
        return iterator(typename iterator::end_iterator_tag(), v);
    }
-    static boost::iterator_range<iterator> components(managed_bytes_view v) {
+    static boost::iterator_range<iterator> components(const bytes_view& v) {
        return { begin(v), end(v) };
    }
-    value_type deserialize_value(managed_bytes_view v) const {
+    value_type deserialize_value(bytes_view v) const {
        std::vector<bytes> result;
        result.reserve(_types.size());
        std::transform(begin(v), end(v), std::back_inserter(result), [] (auto&& v) {
-            return to_bytes(v);
+            return bytes(v.begin(), v.end());
        });
        return result;
    }
-    bool less(managed_bytes_view b1, managed_bytes_view b2) const {
-        return with_linearized(b1, [&] (bytes_view bv1) {
-            return with_linearized(b2, [&] (bytes_view bv2) {
-                return less(bv1, bv2);
-            });
-        });
-    }
    bool less(bytes_view b1, bytes_view b2) const {
        return compare(b1, b2) < 0;
    }
-    size_t hash(managed_bytes_view v) const{
-        return with_linearized(v, [&] (bytes_view v) {
-            return hash(v);
-        });
-    }
    size_t hash(bytes_view v) const {
        if (_byte_order_equal) {
            return std::hash<bytes_view>()(v);
@@ -230,13 +202,6 @@ public:
        }
        return h;
    }
-    int compare(managed_bytes_view b1, managed_bytes_view b2) const {
-        return with_linearized(b1, [&] (bytes_view bv1) {
-            return with_linearized(b2, [&] (bytes_view bv2) {
-                return compare(bv1, bv2);
-            });
-        });
-    }
    int compare(bytes_view b1, bytes_view b2) const {
        if (_byte_order_comparable) {
            if (_is_reversed) {
@@ -251,21 +216,15 @@ public:
            });
    }
    // Retruns true iff given prefix has no missing components
-    bool is_full(managed_bytes_view v) const {
+    bool is_full(bytes_view v) const {
        assert(AllowPrefixes == allow_prefixes::yes);
        return std::distance(begin(v), end(v)) == (ssize_t)_types.size();
    }
-    bool is_empty(managed_bytes_view v) const {
-        return v.empty();
-    }
-    bool is_empty(const managed_bytes& v) const {
-        return v.empty();
-    }
    bool is_empty(bytes_view v) const {
        return begin(v) == end(v);
    }
-    void validate(managed_bytes_view v) const {
-        std::vector<managed_bytes_view> values(begin(v), end(v));
+    void validate(bytes_view v) const {
+        std::vector<bytes_view> values(begin(v), end(v));
        if (AllowPrefixes == allow_prefixes::no && values.size() < _types.size()) {
            throw marshal_exception(fmt::format("compound::validate(): non-prefixable compound cannot be a prefix"));
        }
@@ -278,13 +237,6 @@ public:
            _types[i]->validate(values[i], cql_serialization_format::internal());
        }
    }
-    bool equal(managed_bytes_view v1, managed_bytes_view v2) const {
-        return with_linearized(v1, [&] (bytes_view bv1) {
-            return with_linearized(v2, [&] (bytes_view bv2) {
-                return equal(bv1, bv2);
-            });
-        });
-    }
    bool equal(bytes_view v1, bytes_view v2) const {
        if (_byte_order_equal) {
            return compare_unsigned(v1, v2) == 0;
--- a/compound_compat.hh
+++ b/compound_compat.hh
@@ -54,21 +54,14 @@ template <typename CompoundType>
 class legacy_compound_view {
    static_assert(!CompoundType::is_prefixable, "Legacy view not defined for prefixes");
    CompoundType& _type;
-    managed_bytes_view _packed;
+    bytes_view _packed;
 public:
-    legacy_compound_view(CompoundType& c, managed_bytes_view packed)
+    legacy_compound_view(CompoundType& c, bytes_view packed)
        : _type(c)
        , _packed(packed)
    { }

-    class iterator {
-    public:
-        using iterator_category = std::input_iterator_tag;
-        using value_type = bytes::value_type;
-        using difference_type = std::ptrdiff_t;
-        using pointer = bytes::value_type*;
-        using reference = bytes::value_type&;
-    private:
+    class iterator : public std::iterator<std::input_iterator_tag, bytes::value_type> {
        bool _singular;
        // Offset within virtual output space of a component.
        //
@@ -147,18 +140,18 @@ public:
        { }

        // @k1 and @k2 must be serialized using @type, which was passed to the constructor.
-        int operator()(managed_bytes_view k1, managed_bytes_view k2) const {
+        int operator()(bytes_view k1, bytes_view k2) const {
            if (_type.is_singular()) {
                return compare_unsigned(*_type.begin(k1), *_type.begin(k2));
            }
            return lexicographical_tri_compare(
                _type.begin(k1), _type.end(k1),
                _type.begin(k2), _type.end(k2),
-                [] (const managed_bytes_view& c1, const managed_bytes_view& c2) -> int {
+                [] (const bytes_view& c1, const bytes_view& c2) -> int {
                    if (c1.size() != c2.size() || !c1.size()) {
                        return c1.size() < c2.size() ? -1 : c1.size() ? 1 : 0;
                    }
-                    return compare_unsigned(c1, c2);
+                    return memcmp(c1.begin(), c2.begin(), c1.size());
                });
        }
    };
@@ -188,7 +181,7 @@ public:
 // @packed is assumed to be serialized using supplied @type.
 template <typename CompoundType>
 static inline
-bytes to_legacy(CompoundType& type, managed_bytes_view packed) {
+bytes to_legacy(CompoundType& type, bytes_view packed) {
    legacy_compound_view<CompoundType> lv(type, packed);
    bytes legacy_form(bytes::initialized_later(), lv.size());
    std::copy(lv.begin(), lv.end(), legacy_form.begin());
@@ -264,12 +257,6 @@ private:
    static void write_value(Value&& val, CharOutputIterator& out) {
        out = std::copy(val.begin(), val.end(), out);
    }
-    template<typename CharOutputIterator>
-    static void write_value(managed_bytes_view val, CharOutputIterator& out) {
-        for (bytes_view frag : fragment_range(val)) {
-            out = std::copy(frag.begin(), frag.end(), out);
-        }
-    }
    template <typename CharOutputIterator>
    static void write_value(const data_value& val, CharOutputIterator& out) {
        val.serialize(out);
@@ -352,14 +339,7 @@ public:
        return eoc_byte == 0 ? eoc::none : (eoc_byte < 0 ? eoc::start : eoc::end);
    }

-    class iterator {
-    public:
-        using iterator_category = std::input_iterator_tag;
-        using value_type = const component_view;
-        using difference_type = std::ptrdiff_t;
-        using pointer = const component_view*;
-        using reference = const component_view&;
-    private:
+    class iterator : public std::iterator<std::input_iterator_tag, const component_view> {
        bytes_view _v;
        component_view _current;
        bool _strict_mode = true;
@@ -411,7 +391,6 @@ public:
        iterator(end_iterator_tag) : _v(nullptr, 0) {}

    public:
-        iterator() : iterator(end_iterator_tag()) {}
        iterator& operator++() {
            read_current();
            return *this;
--- a/conf/scylla.yaml
+++ b/conf/scylla.yaml
@@ -99,8 +99,8 @@ listen_address: localhost
 # listen_on_broadcast_address: false

 # port for the CQL native transport to listen for clients on
-# For security reasons, you should not expose this port to the internet. Firewall it if needed.
-# To disable the CQL native transport, remove this option and configure native_transport_port_ssl.
+# For security reasons, you should not expose this port to the internet.  Firewall it if needed.
+# To disable the CQL native transport, set this option to 0.
 native_transport_port: 9042

 # Like native_transport_port, but clients are forwarded to specific shards, based on the
@@ -230,9 +230,6 @@ batch_size_fail_threshold_in_kb: 50
 # - PasswordAuthenticator relies on username/password pairs to authenticate
 #   users. It keeps usernames and hashed passwords in system_auth.credentials table.
 #   Please increase system_auth keyspace replication factor if you use this authenticator.
-# - com.scylladb.auth.TransitionalAuthenticator requires username/password pair
-#   to authenticate in the same manner as PasswordAuthenticator, but improper credentials
-#   result in being logged in as an anonymous user. Use for upgrading clusters' auth.
 # authenticator: AllowAllAuthenticator

 # Authorization backend, implementing IAuthorizer; used to limit access/provide permissions
@@ -242,9 +239,6 @@ batch_size_fail_threshold_in_kb: 50
 # - AllowAllAuthorizer allows any action to any user - set it to disable authorization.
 # - CassandraAuthorizer stores permissions in system_auth.permissions table. Please
 #   increase system_auth keyspace replication factor if you use this authorizer.
-# - com.scylladb.auth.TransitionalAuthorizer wraps around the CassandraAuthorizer, using it for
-#   authorizing permission management. Otherwise, it allows all. Use for upgrading
-#   clusters' auth.
 # authorizer: AllowAllAuthorizer

 # initial_token allows you to specify tokens manually.  While you can use # it with
--- a/configure.py
+++ b/configure.py
@@ -59,9 +59,6 @@ i18n_xlat = {
 }

 python3_dependencies = subprocess.run('./install-dependencies.sh --print-python3-runtime-packages', shell=True, capture_output=True, encoding='utf-8').stdout.strip()
-node_exporter_filename = subprocess.run('./install-dependencies.sh --print-node-exporter-filename', shell=True, capture_output=True, encoding='utf-8').stdout.strip()
-node_exporter_dirname = os.path.basename(node_exporter_filename).rstrip('.tar.gz')
-

 def pkgname(name):
    if name in i18n_xlat:
@@ -260,18 +257,18 @@ modes = {
        'stack-usage-threshold': 1024*40,
    },
    'release': {
-        'cxxflags': '-O3 -ffunction-sections -fdata-sections ',
-        'cxx_ld_flags': '-Wl,--gc-sections',
+        'cxxflags': '',
+        'cxx_ld_flags': '-O3 -ffunction-sections -fdata-sections -Wl,--gc-sections',
        'stack-usage-threshold': 1024*13,
    },
    'dev': {
-        'cxxflags': '-O1 -DDEVEL -DSEASTAR_ENABLE_ALLOC_FAILURE_INJECTION -DSCYLLA_ENABLE_ERROR_INJECTION',
-        'cxx_ld_flags': '',
+        'cxxflags': '-DSEASTAR_ENABLE_ALLOC_FAILURE_INJECTION -DSCYLLA_ENABLE_ERROR_INJECTION',
+        'cxx_ld_flags': '-O1',
        'stack-usage-threshold': 1024*21,
    },
    'sanitize': {
-        'cxxflags': '-Os -DDEBUG -DSANITIZE -DDEBUG_LSA_SANITIZER -DSCYLLA_ENABLE_ERROR_INJECTION',
-        'cxx_ld_flags': '',
+        'cxxflags': '-DDEBUG -DSANITIZE -DDEBUG_LSA_SANITIZER -DSCYLLA_ENABLE_ERROR_INJECTION',
+        'cxx_ld_flags': '-Os',
        'stack-usage-threshold': 1024*50,
    }
 }
@@ -281,7 +278,7 @@ scylla_tests = set([
    'test/boost/cdc_generation_test',
    'test/boost/aggregate_fcts_test',
    'test/boost/allocation_strategy_test',
-    'test/boost/alternator_unit_test',
+    'test/boost/alternator_base64_test',
    'test/boost/anchorless_list_test',
    'test/boost/auth_passwords_test',
    'test/boost/auth_resource_test',
@@ -318,7 +315,6 @@ scylla_tests = set([
    'test/boost/crc_test',
    'test/boost/data_listeners_test',
    'test/boost/database_test',
-    'test/boost/double_decker_test',
    'test/boost/duration_test',
    'test/boost/dynamic_bitset_test',
    'test/boost/enum_option_test',
@@ -333,9 +329,7 @@ scylla_tests = set([
    'test/boost/gossip_test',
    'test/boost/gossiping_property_file_snitch_test',
    'test/boost/hash_test',
-    'test/boost/hashers_test',
    'test/boost/idl_test',
-    'test/boost/imr_test',
    'test/boost/input_stream_test',
    'test/boost/json_cql_query_test',
    'test/boost/json_test',
@@ -349,7 +343,6 @@ scylla_tests = set([
    'test/boost/estimated_histogram_test',
    'test/boost/logalloc_test',
    'test/boost/managed_vector_test',
-    'test/boost/managed_bytes_test',
    'test/boost/intrusive_array_test',
    'test/boost/map_difference_test',
    'test/boost/memtable_test',
@@ -391,7 +384,6 @@ scylla_tests = set([
    'test/boost/sstable_resharding_test',
    'test/boost/sstable_directory_test',
    'test/boost/sstable_test',
-    'test/boost/sstable_move_test',
    'test/boost/storage_proxy_test',
    'test/boost/top_k_test',
    'test/boost/transport_test',
@@ -426,7 +418,7 @@ scylla_tests = set([
    'test/perf/perf_fast_forward',
    'test/perf/perf_hash',
    'test/perf/perf_mutation',
-    'test/perf/perf_collection',
+    'test/perf/perf_bptree',
    'test/perf/perf_row_cache_update',
    'test/perf/perf_simple_query',
    'test/perf/perf_sstable',
@@ -456,7 +448,6 @@ apps = set([
    'scylla',
    'test/tools/cql_repl',
    'tools/scylla-types',
-    'tools/scylla-sstable-index',
 ])

 tests = scylla_tests | perf_tests | raft_tests
@@ -486,9 +477,9 @@ arg_parser.add_argument('--ldflags', action='store', dest='user_ldflags', defaul
                        help='Extra flags for the linker')
 arg_parser.add_argument('--target', action='store', dest='target', default=default_target_arch(),
                        help='Target architecture (-march)')
-arg_parser.add_argument('--compiler', action='store', dest='cxx', default='clang++',
+arg_parser.add_argument('--compiler', action='store', dest='cxx', default='g++',
                        help='C++ compiler path')
-arg_parser.add_argument('--c-compiler', action='store', dest='cc', default='clang',
+arg_parser.add_argument('--c-compiler', action='store', dest='cc', default='gcc',
                        help='C compiler path')
 add_tristate(arg_parser, name='dpdk', dest='dpdk',
                        help='Use dpdk (from seastar dpdk sources) (default=True for release builds)')
@@ -528,6 +519,17 @@ arg_parser.add_argument('--test-repeat', dest='test_repeat', action='store', typ
 arg_parser.add_argument('--test-timeout', dest='test_timeout', action='store', type=str, default='7200')
 args = arg_parser.parse_args()

+coroutines_test_src = '''
+#define GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__)
+#if GCC_VERSION < 100201
+    #error "Coroutines support requires at leat gcc 10.2.1"
+#endif
+'''
+compiler_supports_coroutines = try_compile(compiler=args.cxx, source=coroutines_test_src)
+
+if args.build_raft and not compiler_supports_coroutines:
+    raise Exception("--build-raft is requested, while the used compiler does not support coroutines")
+
 if not args.build_raft:
    all_artifacts.difference_update(raft_tests)
    tests.difference_update(raft_tests)
@@ -582,7 +584,6 @@ scylla_core = (['database.cc',
                'sstables/mp_row_consumer.cc',
                'sstables/sstables.cc',
                'sstables/sstables_manager.cc',
-                'sstables/sstable_set.cc',
                'sstables/mx/writer.cc',
                'sstables/kl/writer.cc',
                'sstables/sstable_version.cc',
@@ -726,7 +727,6 @@ scylla_core = (['database.cc',
                'db/data_listeners.cc',
                'db/hints/manager.cc',
                'db/hints/resource_manager.cc',
-                'db/hints/host_filter.cc',
                'db/config.cc',
                'db/extensions.cc',
                'db/heat_load_balance.cc',
@@ -991,7 +991,6 @@ deps = {
    'test/tools/cql_repl': idls + ['test/tools/cql_repl.cc'] + scylla_core + scylla_tests_generic_dependencies,
    #FIXME: we don't need all of scylla_core here, only the types module, need to modularize scylla_core.
    'tools/scylla-types': idls + ['tools/scylla-types.cc'] + scylla_core,
-    'tools/scylla-sstable-index': idls + ['tools/scylla-sstable-index.cc'] + scylla_core,
 }

 pure_boost_tests = set([
@@ -1011,7 +1010,6 @@ pure_boost_tests = set([
    'test/boost/dynamic_bitset_test',
    'test/boost/enum_option_test',
    'test/boost/enum_set_test',
-    'test/boost/hashers_test',
    'test/boost/idl_test',
    'test/boost/json_test',
    'test/boost/keys_test',
@@ -1028,12 +1026,11 @@ pure_boost_tests = set([
    'test/boost/top_k_test',
    'test/boost/vint_serialization_test',
    'test/boost/bptree_test',
-    'test/boost/utf8_test',
    'test/manual/streaming_histogram_test',
 ])

 tests_not_using_seastar_test_framework = set([
-    'test/boost/alternator_unit_test',
+    'test/boost/alternator_base64_test',
    'test/boost/small_vector_test',
    'test/manual/gossip',
    'test/manual/message',
@@ -1042,7 +1039,7 @@ tests_not_using_seastar_test_framework = set([
    'test/perf/perf_cql_parser',
    'test/perf/perf_hash',
    'test/perf/perf_mutation',
-    'test/perf/perf_collection',
+    'test/perf/perf_bptree',
    'test/perf/perf_row_cache_update',
    'test/unit/lsa_async_eviction_test',
    'test/unit/lsa_sync_eviction_test',
@@ -1107,7 +1104,7 @@ deps['test/boost/linearizing_input_stream_test'] = [
 ]

 deps['test/boost/duration_test'] += ['test/lib/exception_utils.cc']
-deps['test/boost/alternator_unit_test'] += ['alternator/base64.cc']
+deps['test/boost/alternator_base64_test'] += ['alternator/base64.cc']

 deps['test/raft/replication_test'] = ['test/raft/replication_test.cc'] + scylla_raft_dependencies
 deps['test/boost/raft_fsm_test'] =  ['test/boost/raft_fsm_test.cc', 'test/lib/log.cc'] + scylla_raft_dependencies
@@ -1151,13 +1148,12 @@ warnings = [
    '-Wno-delete-non-abstract-non-virtual-dtor',
    '-Wno-unknown-attributes',
    '-Wno-braced-scalar-init',
+    '-Wno-unused-value',
    '-Wno-range-loop-construct',
    '-Wno-unused-function',
    '-Wno-implicit-int-float-conversion',
    '-Wno-delete-abstract-non-virtual-dtor',
    '-Wno-uninitialized-const-reference',
-    # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=77728
-    '-Wno-psabi',
 ]

 warnings = [w
@@ -1173,11 +1169,11 @@ optimization_flags = [
 optimization_flags = [o
                      for o in optimization_flags
                      if flag_supported(flag=o, compiler=args.cxx)]
-modes['release']['cxxflags'] += ' ' + ' '.join(optimization_flags)
+modes['release']['cxx_ld_flags'] += ' ' + ' '.join(optimization_flags)

 if flag_supported(flag='-Wstack-usage=4096', compiler=args.cxx):
    for mode in modes:
-        modes[mode]['cxxflags'] += f' -Wstack-usage={modes[mode]["stack-usage-threshold"]} -Wno-error=stack-usage='
+        modes[mode]['cxx_ld_flags'] += f' -Wstack-usage={modes[mode]["stack-usage-threshold"]} -Wno-error=stack-usage='

 linker_flags = linker_flags(compiler=args.cxx)

@@ -1292,8 +1288,6 @@ file = open(f'{outdir}/SCYLLA-VERSION-FILE', 'r')
 scylla_version = file.read().strip()
 file = open(f'{outdir}/SCYLLA-RELEASE-FILE', 'r')
 scylla_release = file.read().strip()
-file = open(f'{outdir}/SCYLLA-PRODUCT-FILE', 'r')
-scylla_product = file.read().strip()

 extra_cxxflags["release.cc"] = "-DSCYLLA_VERSION=\"\\\"" + scylla_version + "\\\"\" -DSCYLLA_RELEASE=\"\\\"" + scylla_release + "\\\"\""

@@ -1335,6 +1329,9 @@ args.user_cflags += f" -ffile-prefix-map={curdir}=."

 seastar_cflags = args.user_cflags

+if build_raft:
+    seastar_cflags += ' -fcoroutines'
+
 if args.target != '':
    seastar_cflags += ' -march=' + args.target
 seastar_ldflags = args.user_ldflags
@@ -1343,13 +1340,6 @@ libdeflate_cflags = seastar_cflags

 MODE_TO_CMAKE_BUILD_TYPE = {'release' : 'RelWithDebInfo', 'debug' : 'Debug', 'dev' : 'Dev', 'sanitize' : 'Sanitize' }

-# cmake likes to separate things with semicolons
-def semicolon_separated(*flags):
-    # original flags may be space separated, so convert to string still
-    # using spaces
-    f = ' '.join(flags)
-    return re.sub(' +', ';', f)
-
 def configure_seastar(build_dir, mode):
    seastar_build_dir = os.path.join(build_dir, mode, 'seastar')

@@ -1358,8 +1348,8 @@ def configure_seastar(build_dir, mode):
        '-DCMAKE_C_COMPILER={}'.format(args.cc),
        '-DCMAKE_CXX_COMPILER={}'.format(args.cxx),
        '-DCMAKE_EXPORT_NO_PACKAGE_REGISTRY=ON',
-        '-DSeastar_CXX_FLAGS={}'.format((seastar_cflags).replace(' ', ';')),
-        '-DSeastar_LD_FLAGS={}'.format(semicolon_separated(seastar_ldflags, modes[mode]['cxx_ld_flags'])),
+        '-DSeastar_CXX_FLAGS={}'.format((seastar_cflags + ' ' + modes[mode]['cxx_ld_flags']).replace(' ', ';')),
+        '-DSeastar_LD_FLAGS={}'.format(seastar_ldflags),
        '-DSeastar_CXX_DIALECT=gnu++20',
        '-DSeastar_API_LEVEL=6',
        '-DSeastar_UNUSED_RESULT_ERROR=ON',
@@ -1470,7 +1460,7 @@ if not args.staticboost:
    args.user_cflags += ' -DBOOST_TEST_DYN_LINK'

 if build_raft:
-    args.user_cflags += ' -DENABLE_SCYLLA_RAFT'
+    args.user_cflags += ' -DENABLE_SCYLLA_RAFT -fcoroutines'

 # thrift version detection, see #4538
 proc_res = subprocess.run(["thrift", "-version"], stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
@@ -1809,18 +1799,24 @@ with open(buildfile_tmp, 'w') as f:
        f.write(textwrap.dedent('''\
            build $builddir/{mode}/iotune: copy $builddir/{mode}/seastar/apps/iotune/iotune
            ''').format(**locals()))
-        f.write('build $builddir/{mode}/dist/tar/{scylla_product}-package.tar.gz: package $builddir/{mode}/scylla $builddir/{mode}/iotune $builddir/SCYLLA-RELEASE-FILE $builddir/SCYLLA-VERSION-FILE $builddir/debian/debian $builddir/node_exporter | always\n'.format(**locals()))
+        f.write('build $builddir/{mode}/dist/tar/scylla-package.tar.gz: package $builddir/{mode}/scylla $builddir/{mode}/iotune $builddir/SCYLLA-RELEASE-FILE $builddir/SCYLLA-VERSION-FILE $builddir/debian/debian | always\n'.format(**locals()))
+        f.write('  pool = submodule_pool\n')
        f.write('  mode = {mode}\n'.format(**locals()))
-        f.write(f'build $builddir/dist/{mode}/redhat: rpmbuild $builddir/{mode}/dist/tar/{scylla_product}-package.tar.gz\n')
+        f.write(f'build $builddir/{mode}/scylla-package.tar.gz: copy $builddir/{mode}/dist/tar/scylla-package.tar.gz\n')
+        f.write(f'build $builddir/dist/{mode}/redhat: rpmbuild $builddir/{mode}/scylla-package.tar.gz\n')
+        f.write(f'  pool = submodule_pool\n')
        f.write(f'  mode = {mode}\n')
-        f.write(f'build $builddir/dist/{mode}/debian: debbuild $builddir/{mode}/dist/tar/{scylla_product}-package.tar.gz\n')
+        f.write(f'build $builddir/dist/{mode}/debian: debbuild $builddir/{mode}/scylla-package.tar.gz\n')
+        f.write(f'  pool = submodule_pool\n')
        f.write(f'  mode = {mode}\n')
        f.write(f'build dist-server-{mode}: phony $builddir/dist/{mode}/redhat $builddir/dist/{mode}/debian\n')
-        f.write(f'build dist-jmx-{mode}: phony $builddir/{mode}/dist/tar/{scylla_product}-jmx-package.tar.gz dist-jmx-rpm dist-jmx-deb\n')
-        f.write(f'build dist-tools-{mode}: phony $builddir/{mode}/dist/tar/{scylla_product}-tools-package.tar.gz dist-tools-rpm dist-tools-deb\n')
+        f.write(f'build dist-jmx-{mode}: phony $builddir/{mode}/dist/tar/scylla-jmx-package.tar.gz dist-jmx-rpm dist-jmx-deb\n')
+        f.write(f'build dist-tools-{mode}: phony $builddir/{mode}/dist/tar/scylla-tools-package.tar.gz dist-tools-rpm dist-tools-deb\n')
        f.write(f'build dist-python3-{mode}: phony dist-python3-tar dist-python3-rpm dist-python3-deb compat-python3-rpm compat-python3-deb\n')
-        f.write(f'build dist-unified-{mode}: phony $builddir/{mode}/dist/tar/{scylla_product}-unified-package-{scylla_version}.{scylla_release}.tar.gz\n')
-        f.write(f'build $builddir/{mode}/dist/tar/{scylla_product}-unified-package-{scylla_version}.{scylla_release}.tar.gz: unified $builddir/{mode}/dist/tar/{scylla_product}-package.tar.gz $builddir/{mode}/dist/tar/{scylla_product}-python3-package.tar.gz $builddir/{mode}/dist/tar/{scylla_product}-jmx-package.tar.gz $builddir/{mode}/dist/tar/{scylla_product}-tools-package.tar.gz | always\n')
+        f.write(f'build dist-unified-{mode}: phony $builddir/{mode}/dist/tar/scylla-unified-package-{scylla_version}.{scylla_release}.tar.gz\n')
+        f.write(f'build $builddir/{mode}/scylla-unified-package-{scylla_version}.{scylla_release}.tar.gz: copy $builddir/{mode}/dist/tar/scylla-unified-package.tar.gz\n')
+        f.write(f'build $builddir/{mode}/dist/tar/scylla-unified-package-{scylla_version}.{scylla_release}.tar.gz: unified $builddir/{mode}/dist/tar/scylla-package.tar.gz $builddir/{mode}/dist/tar/scylla-python3-package.tar.gz $builddir/{mode}/dist/tar/scylla-jmx-package.tar.gz $builddir/{mode}/dist/tar/scylla-tools-package.tar.gz | always\n')
+        f.write(f'  pool = submodule_pool\n')
        f.write(f'  mode = {mode}\n')
        f.write('rule libdeflate.{mode}\n'.format(**locals()))
        f.write('  command = make -C libdeflate BUILD_DIR=../$builddir/{mode}/libdeflate/ CFLAGS="{libdeflate_cflags}" CC={args.cc} ../$builddir/{mode}/libdeflate//libdeflate.a\n'.format(**locals()))
@@ -1847,12 +1843,12 @@ with open(buildfile_tmp, 'w') as f:
    )

    f.write(textwrap.dedent(f'''\
-        build dist-unified-tar: phony {' '.join([f'$builddir/{mode}/dist/tar/{scylla_product}-unified-package-{scylla_version}.{scylla_release}.tar.gz' for mode in build_modes])}
+        build dist-unified-tar: phony {' '.join(['$builddir/{mode}/scylla-unified-package-$scylla_version.$scylla_release.tar.gz'.format(mode=mode) for mode in build_modes])}
        build dist-unified: phony dist-unified-tar

        build dist-server-deb: phony {' '.join(['$builddir/dist/{mode}/debian'.format(mode=mode) for mode in build_modes])}
        build dist-server-rpm: phony {' '.join(['$builddir/dist/{mode}/redhat'.format(mode=mode) for mode in build_modes])}
-        build dist-server-tar: phony {' '.join(['$builddir/{mode}/dist/tar/{scylla_product}-package.tar.gz'.format(mode=mode, scylla_product=scylla_product) for mode in build_modes])}
+        build dist-server-tar: phony {' '.join(['$builddir/{mode}/scylla-package.tar.gz'.format(mode=mode) for mode in build_modes])}
        build dist-server: phony dist-server-tar dist-server-rpm dist-server-deb

        rule build-submodule-reloc
@@ -1862,26 +1858,26 @@ with open(buildfile_tmp, 'w') as f:
        rule build-submodule-deb
          command = cd $dir && ./reloc/build_deb.sh --reloc-pkg $artifact

-        build tools/jmx/build/{scylla_product}-jmx-package.tar.gz: build-submodule-reloc
+        build tools/jmx/build/scylla-jmx-package.tar.gz: build-submodule-reloc
          reloc_dir = tools/jmx
-        build dist-jmx-rpm: build-submodule-rpm tools/jmx/build/{scylla_product}-jmx-package.tar.gz
+        build dist-jmx-rpm: build-submodule-rpm tools/jmx/build/scylla-jmx-package.tar.gz
          dir = tools/jmx
-          artifact = $builddir/{scylla_product}-jmx-package.tar.gz
-        build dist-jmx-deb: build-submodule-deb tools/jmx/build/{scylla_product}-jmx-package.tar.gz
+          artifact = $builddir/scylla-jmx-package.tar.gz
+        build dist-jmx-deb: build-submodule-deb tools/jmx/build/scylla-jmx-package.tar.gz
          dir = tools/jmx
-          artifact = $builddir/{scylla_product}-jmx-package.tar.gz
-        build dist-jmx-tar: phony {' '.join(['$builddir/{mode}/dist/tar/{scylla_product}-jmx-package.tar.gz'.format(mode=mode, scylla_product=scylla_product) for mode in build_modes])}
+          artifact = $builddir/scylla-jmx-package.tar.gz
+        build dist-jmx-tar: phony {' '.join(['$builddir/{mode}/dist/tar/scylla-jmx-package.tar.gz'.format(mode=mode) for mode in build_modes])}
        build dist-jmx: phony dist-jmx-tar dist-jmx-rpm dist-jmx-deb

-        build tools/java/build/{scylla_product}-tools-package.tar.gz: build-submodule-reloc
+        build tools/java/build/scylla-tools-package.tar.gz: build-submodule-reloc
          reloc_dir = tools/java
-        build dist-tools-rpm: build-submodule-rpm tools/java/build/{scylla_product}-tools-package.tar.gz
+        build dist-tools-rpm: build-submodule-rpm tools/java/build/scylla-tools-package.tar.gz
          dir = tools/java
-          artifact = $builddir/{scylla_product}-tools-package.tar.gz
-        build dist-tools-deb: build-submodule-deb tools/java/build/{scylla_product}-tools-package.tar.gz
+          artifact = $builddir/scylla-tools-package.tar.gz
+        build dist-tools-deb: build-submodule-deb tools/java/build/scylla-tools-package.tar.gz
          dir = tools/java
-          artifact = $builddir/{scylla_product}-tools-package.tar.gz
-        build dist-tools-tar: phony {' '.join(['$builddir/{mode}/dist/tar/{scylla_product}-tools-package.tar.gz'.format(mode=mode, scylla_product=scylla_product) for mode in build_modes])}
+          artifact = $builddir/scylla-tools-package.tar.gz
+        build dist-tools-tar: phony {' '.join(['$builddir/{mode}/dist/tar/scylla-tools-package.tar.gz'.format(mode=mode) for mode in build_modes])}
        build dist-tools: phony dist-tools-tar dist-tools-rpm dist-tools-deb

        rule compat-python3-reloc
@@ -1890,27 +1886,27 @@ with open(buildfile_tmp, 'w') as f:
          command = cd $dir && ./reloc/build_rpm.sh --reloc-pkg $artifact --builddir ../../build/redhat
        rule compat-python3-deb
          command = cd $dir && ./reloc/build_deb.sh --reloc-pkg $artifact --builddir ../../build/debian
-        build $builddir/release/{scylla_product}-python3-package.tar.gz: compat-python3-reloc tools/python3/build/{scylla_product}-python3-package.tar.gz
+        build $builddir/release/scylla-python3-package.tar.gz: compat-python3-reloc tools/python3/build/scylla-python3-package.tar.gz
          dir = tools/python3
-          artifact = $builddir/{scylla_product}-python3-package.tar.gz
-        build compat-python3-rpm: compat-python3-rpm tools/python3/build/{scylla_product}-python3-package.tar.gz
+          artifact = $builddir/scylla-python3-package.tar.gz
+        build compat-python3-rpm: compat-python3-rpm tools/python3/build/scylla-python3-package.tar.gz
          dir = tools/python3
-          artifact = $builddir/{scylla_product}-python3-package.tar.gz
-        build compat-python3-deb: compat-python3-deb tools/python3/build/{scylla_product}-python3-package.tar.gz
+          artifact = $builddir/scylla-python3-package.tar.gz
+        build compat-python3-deb: compat-python3-deb tools/python3/build/scylla-python3-package.tar.gz
          dir = tools/python3
-          artifact = $builddir/{scylla_product}-python3-package.tar.gz
+          artifact = $builddir/scylla-python3-package.tar.gz

-        build tools/python3/build/{scylla_product}-python3-package.tar.gz: build-submodule-reloc
+        build tools/python3/build/scylla-python3-package.tar.gz: build-submodule-reloc
          reloc_dir = tools/python3
          args = --packages "{python3_dependencies}"
-        build dist-python3-rpm: build-submodule-rpm tools/python3/build/{scylla_product}-python3-package.tar.gz
+        build dist-python3-rpm: build-submodule-rpm tools/python3/build/scylla-python3-package.tar.gz
          dir = tools/python3
-          artifact = $builddir/{scylla_product}-python3-package.tar.gz
-        build dist-python3-deb: build-submodule-deb tools/python3/build/{scylla_product}-python3-package.tar.gz
+          artifact = $builddir/scylla-python3-package.tar.gz
+        build dist-python3-deb: build-submodule-deb tools/python3/build/scylla-python3-package.tar.gz
          dir = tools/python3
-          artifact = $builddir/{scylla_product}-python3-package.tar.gz
-        build dist-python3-tar: phony {' '.join(['$builddir/{mode}/dist/tar/{scylla_product}-python3-package.tar.gz'.format(mode=mode, scylla_product=scylla_product) for mode in build_modes])}
-        build dist-python3: phony dist-python3-tar dist-python3-rpm dist-python3-deb $builddir/release/{scylla_product}-python3-package.tar.gz compat-python3-rpm compat-python3-deb
+          artifact = $builddir/scylla-python3-package.tar.gz
+        build dist-python3-tar: phony {' '.join(['$builddir/{mode}/dist/tar/scylla-python3-package.tar.gz'.format(mode=mode) for mode in build_modes])}
+        build dist-python3: phony dist-python3-tar dist-python3-rpm dist-python3-deb $builddir/release/scylla-python3-package.tar.gz compat-python3-rpm compat-python3-deb
        build dist-deb: phony dist-server-deb dist-python3-deb dist-jmx-deb dist-tools-deb
        build dist-rpm: phony dist-server-rpm dist-python3-rpm dist-jmx-rpm dist-tools-rpm
        build dist-tar: phony dist-unified-tar dist-server-tar dist-python3-tar dist-jmx-tar dist-tools-tar
@@ -1925,9 +1921,9 @@ with open(buildfile_tmp, 'w') as f:
        '''))
    for mode in build_modes:
        f.write(textwrap.dedent(f'''\
-        build $builddir/{mode}/dist/tar/{scylla_product}-python3-package.tar.gz: copy tools/python3/build/{scylla_product}-python3-package.tar.gz
-        build $builddir/{mode}/dist/tar/{scylla_product}-tools-package.tar.gz: copy tools/java/build/{scylla_product}-tools-package.tar.gz
-        build $builddir/{mode}/dist/tar/{scylla_product}-jmx-package.tar.gz: copy tools/jmx/build/{scylla_product}-jmx-package.tar.gz
+        build $builddir/{mode}/dist/tar/scylla-python3-package.tar.gz: copy tools/python3/build/scylla-python3-package.tar.gz
+        build $builddir/{mode}/dist/tar/scylla-tools-package.tar.gz: copy tools/java/build/scylla-tools-package.tar.gz
+        build $builddir/{mode}/dist/tar/scylla-jmx-package.tar.gz: copy tools/jmx/build/scylla-jmx-package.tar.gz

        build dist-{mode}: phony dist-server-{mode} dist-python3-{mode} dist-tools-{mode} dist-jmx-{mode} dist-unified-{mode}
        build dist-check-{mode}: dist-check
@@ -1953,13 +1949,6 @@ with open(buildfile_tmp, 'w') as f:
        build mode_list: mode_list
        default {modes_list}
        ''').format(modes_list=' '.join(default_modes), **globals()))
-    unit_test_list = set(test for test in build_artifacts if test in set(tests))
-    f.write(textwrap.dedent('''\
-        rule unit_test_list
-            command = /usr/bin/env echo -e '{unit_test_list}'
-            description = List configured unit tests
-        build unit_test_list: unit_test_list
-        ''').format(unit_test_list="\\n".join(unit_test_list)))
    f.write(textwrap.dedent('''\
        build always: phony
        rule scylla_version_gen
@@ -1968,9 +1957,6 @@ with open(buildfile_tmp, 'w') as f:
        rule debian_files_gen
            command = ./dist/debian/debian_files_gen.py
        build $builddir/debian/debian: debian_files_gen | always
-        rule extract_node_exporter
-            command = tar -C build -xvpf {node_exporter_filename} --no-same-owner && rm -rfv build/node_exporter && mv -v build/{node_exporter_dirname} build/node_exporter
-        build $builddir/node_exporter: extract_node_exporter | always
-        ''').format(**globals()))
+        ''').format(modes_list=' '.join(build_modes), **globals()))

 os.rename(buildfile_tmp, buildfile)
--- a/connection_notifier.cc
+++ b/connection_notifier.cc
@@ -20,47 +20,44 @@
 */

 #include "connection_notifier.hh"
+#include "db/query_context.hh"
 #include "cql3/constants.hh"
 #include "database.hh"
+#include "service/storage_proxy.hh"

 #include <stdexcept>

-sstring to_string(client_type ct) {
+namespace db::system_keyspace {
+extern const char *const CLIENTS;
+}
+
+static sstring to_string(client_type ct) {
    switch (ct) {
        case client_type::cql: return "cql";
        case client_type::thrift: return "thrift";
        case client_type::alternator: return "alternator";
+        default: throw std::runtime_error("Invalid client_type");
    }
-    throw std::runtime_error("Invalid client_type");
-}
-
-static sstring to_string(client_connection_stage ccs) {
-    switch (ccs) {
-        case client_connection_stage::established: return connection_stage_literal<client_connection_stage::established>;
-        case client_connection_stage::authenticating: return connection_stage_literal<client_connection_stage::authenticating>;
-        case client_connection_stage::ready: return connection_stage_literal<client_connection_stage::ready>;
-    }
-    throw std::runtime_error("Invalid client_connection_stage");
 }

 future<> notify_new_client(client_data cd) {
    // FIXME: consider prepared statement
    const static sstring req
-            = format("INSERT INTO system.{} (address, port, client_type, connection_stage, shard_id, protocol_version, username) "
-                     "VALUES (?, ?, ?, ?, ?, ?, ?);", db::system_keyspace::CLIENTS);
+            = format("INSERT INTO system.{} (address, port, client_type, shard_id, protocol_version, username) "
+                     "VALUES (?, ?, ?, ?, ?, ?);", db::system_keyspace::CLIENTS);
    
-    return db::qctx->execute_cql(req,
-            std::move(cd.ip), cd.port, to_string(cd.ct), to_string(cd.connection_stage), cd.shard_id,
+    return db::execute_cql(req,
+            std::move(cd.ip), cd.port, to_string(cd.ct), cd.shard_id,
            cd.protocol_version.has_value() ? data_value(*cd.protocol_version) : data_value::make_null(int32_type),
            cd.username.value_or("anonymous")).discard_result();
 }

-future<> notify_disconnected_client(net::inet_address addr, int port, client_type ct) {
+future<> notify_disconnected_client(gms::inet_address addr, client_type ct, int port) {
    // FIXME: consider prepared statement
    const static sstring req
            = format("DELETE FROM system.{} where address=? AND port=? AND client_type=?;",
                     db::system_keyspace::CLIENTS);
-    return db::qctx->execute_cql(req, std::move(addr), port, to_string(ct)).discard_result();
+    return db::execute_cql(req, addr.addr(), port, to_string(ct)).discard_result();
 }

 future<> clear_clientlist() {
--- a/connection_notifier.hh
+++ b/connection_notifier.hh
@@ -20,65 +20,27 @@
 */
 #pragma once

-#include "db/query_context.hh"
-
-#include <seastar/net/inet_address.hh>
+#include "gms/inet_address.hh"
 #include <seastar/core/sstring.hh>
-#include "seastarx.hh"
-
 #include <optional>

-namespace db::system_keyspace {
-extern const char *const CLIENTS;
-}
-
 enum class client_type {
    cql = 0,
    thrift,
    alternator,
 };

-sstring to_string(client_type ct);
-
-enum class changed_column {
-    username = 0,
-    connection_stage,
-    driver_name,
-    driver_version,
-    hostname,
-    protocol_version,
-};
-
-template <changed_column column> constexpr const char* column_literal = "";
-template <> inline constexpr const char* column_literal<changed_column::username> = "username";
-template <> inline constexpr const char* column_literal<changed_column::connection_stage> = "connection_stage";
-template <> inline constexpr const char* column_literal<changed_column::driver_name> = "driver_name";
-template <> inline constexpr const char* column_literal<changed_column::driver_version> = "driver_version";
-template <> inline constexpr const char* column_literal<changed_column::hostname> = "hostname";
-template <> inline constexpr const char* column_literal<changed_column::protocol_version> = "protocol_version";
-
-enum class client_connection_stage {
-    established = 0,
-    authenticating,
-    ready,
-};
-
-template <client_connection_stage ccs> constexpr const char* connection_stage_literal = "";
-template <> inline constexpr const char* connection_stage_literal<client_connection_stage::established> = "ESTABLISHED";
-template <> inline constexpr const char* connection_stage_literal<client_connection_stage::authenticating> = "AUTHENTICATING";
-template <> inline constexpr const char* connection_stage_literal<client_connection_stage::ready> = "READY";
-
 // Representation of a row in `system.clients'. std::optionals are for nullable cells.
 struct client_data {
-    net::inet_address ip;
+    gms::inet_address ip;
    int32_t port;
    client_type ct;
-    client_connection_stage connection_stage = client_connection_stage::established;
    int32_t shard_id;  /// ID of server-side shard which is processing the connection.

    // `optional' column means that it's nullable (possibly because it's
    // unimplemented yet). If you want to fill ("implement") any of them,
    // remember to update the query in `notify_new_client()'.
+    std::optional<sstring> connection_stage;
    std::optional<sstring> driver_name;
    std::optional<sstring> driver_version;
    std::optional<sstring> hostname;
@@ -90,17 +52,6 @@ struct client_data {
 };

 future<> notify_new_client(client_data cd);
-future<> notify_disconnected_client(net::inet_address addr, int port, client_type ct);
+future<> notify_disconnected_client(gms::inet_address addr, client_type ct, int port);
+
 future<> clear_clientlist();
-
-template <changed_column column_enum_val>
-struct notify_client_change {
-    template <typename T>
-    future<> operator()(net::inet_address addr, int port, client_type ct, T&& value) {
-        const static sstring req
-                = format("UPDATE system.{} SET {}=? WHERE address=? AND port=? AND client_type=?;",
-                        db::system_keyspace::CLIENTS, column_literal<column_enum_val>);
-
-        return db::qctx->execute_cql(req, std::forward<T>(value), std::move(addr), port, to_string(ct)).discard_result();
-    }
-};
--- a/counters.cc
+++ b/counters.cc
@@ -19,10 +19,16 @@
 * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
 */

+#include "service/storage_service.hh"
 #include "counters.hh"
 #include "mutation.hh"
 #include "combine.hh"

+counter_id counter_id::local()
+{
+    return counter_id(service::get_local_storage_service().get_local_id());
+}
+
 std::ostream& operator<<(std::ostream& os, const counter_id& id) {
    return os << id.to_uuid();
 }
@@ -191,10 +197,10 @@ std::optional<atomic_cell> counter_cell_view::difference(atomic_cell_view a, ato
 }


-void transform_counter_updates_to_shards(mutation& m, const mutation* current_state, uint64_t clock_offset, utils::UUID local_id) {
+void transform_counter_updates_to_shards(mutation& m, const mutation* current_state, uint64_t clock_offset) {
    // FIXME: allow current_state to be frozen_mutation

-    auto transform_new_row_to_shards = [&s = *m.schema(), clock_offset, local_id] (column_kind kind, auto& cells) {
+    auto transform_new_row_to_shards = [&s = *m.schema(), clock_offset] (column_kind kind, auto& cells) {
        cells.for_each_cell([&] (column_id id, atomic_cell_or_collection& ac_o_c) {
            auto& cdef = s.column_at(kind, id);
            auto acv = ac_o_c.as_atomic_cell(cdef);
@@ -202,7 +208,7 @@ void transform_counter_updates_to_shards(mutation& m, const mutation* current_st
                return; // continue -- we are in lambda
            }
            auto delta = acv.counter_update_value();
-            auto cs = counter_shard(counter_id(local_id), delta, clock_offset + 1);
+            auto cs = counter_shard(counter_id::local(), delta, clock_offset + 1);
            ac_o_c = counter_cell_builder::from_single_shard(acv.timestamp(), cs);
        });
    };
@@ -217,7 +223,7 @@ void transform_counter_updates_to_shards(mutation& m, const mutation* current_st

    clustering_key::less_compare cmp(*m.schema());

-    auto transform_row_to_shards = [&s = *m.schema(), clock_offset, local_id] (column_kind kind, auto& transformee, auto& state) {
+    auto transform_row_to_shards = [&s = *m.schema(), clock_offset] (column_kind kind, auto& transformee, auto& state) {
        std::deque<std::pair<column_id, counter_shard>> shards;
        state.for_each_cell([&] (column_id id, const atomic_cell_or_collection& ac_o_c) {
            auto& cdef = s.column_at(kind, id);
@@ -226,7 +232,7 @@ void transform_counter_updates_to_shards(mutation& m, const mutation* current_st
                return; // continue -- we are in lambda
            }
          counter_cell_view::with_linearized(acv, [&] (counter_cell_view ccv) {
-            auto cs = ccv.get_shard(counter_id(local_id));
+            auto cs = ccv.local_shard();
            if (!cs) {
                return; // continue
            }
@@ -247,7 +253,7 @@ void transform_counter_updates_to_shards(mutation& m, const mutation* current_st
            auto delta = acv.counter_update_value();

            if (shards.empty() || shards.front().first > id) {
-                auto cs = counter_shard(counter_id(local_id), delta, clock_offset + 1);
+                auto cs = counter_shard(counter_id::local(), delta, clock_offset + 1);
                ac_o_c = counter_cell_builder::from_single_shard(acv.timestamp(), cs);
            } else {
                auto& cs = shards.front().second;
--- a/counters.hh
+++ b/counters.hh
@@ -61,6 +61,8 @@ public:
        return !(*this == other);
    }
 public:
+    static counter_id local();
+
    // For tests.
    static counter_id generate_random() {
        return counter_id(utils::make_random_uuid());
@@ -275,14 +277,7 @@ public:
        return ac;
    }

-    class inserter_iterator {
-    public:
-        using iterator_category = std::output_iterator_tag;
-        using value_type = counter_shard;
-        using difference_type = std::ptrdiff_t;
-        using pointer = counter_shard*;
-        using reference = counter_shard&;
-    private:
+    class inserter_iterator : public std::iterator<std::output_iterator_tag, counter_shard> {
        counter_cell_builder* _builder;
    public:
        explicit inserter_iterator(counter_cell_builder& b) : _builder(&b) { }
@@ -316,14 +311,7 @@ protected:
    basic_atomic_cell_view<is_mutable> _cell;
    linearized_value_view _value;
 private:
-    class shard_iterator {
-    public:
-        using iterator_category = std::input_iterator_tag;
-        using value_type = basic_counter_shard_view<is_mutable>;
-        using difference_type = std::ptrdiff_t;
-        using pointer = basic_counter_shard_view<is_mutable>*;
-        using reference = basic_counter_shard_view<is_mutable>&;
-    private:
+    class shard_iterator : public std::iterator<std::input_iterator_tag, basic_counter_shard_view<is_mutable>> {
        pointer_type _current;
        basic_counter_shard_view<is_mutable> _current_view;
    public:
@@ -403,6 +391,11 @@ public:
        return *it;
    }

+    std::optional<counter_shard_view> local_shard() const {
+        // TODO: consider caching local shard position
+        return get_shard(counter_id::local());
+    }
+
    bool operator==(const basic_counter_cell_view& other) const {
        return timestamp() == other.timestamp() && boost::equal(shards(), other.shards());
    }
@@ -444,7 +437,7 @@ struct counter_cell_mutable_view : basic_counter_cell_view<mutable_view::yes> {
 // Transforms mutation dst from counter updates to counter shards using state
 // stored in current_state.
 // If current_state is present it has to be in the same schema as dst.
-void transform_counter_updates_to_shards(mutation& dst, const mutation* current_state, uint64_t clock_offset, utils::UUID local_id);
+void transform_counter_updates_to_shards(mutation& dst, const mutation* current_state, uint64_t clock_offset);

 template<>
 struct appending_hash<counter_shard_view> {
--- a/cql3/Cql.g
+++ b/cql3/Cql.g
@@ -394,7 +394,6 @@ selectStatement returns [std::unique_ptr<raw::select_statement> expr]
        bool allow_filtering = false;
        bool is_json = false;
        bool bypass_cache = false;
-        auto attrs = std::make_unique<cql3::attributes::raw>();
    }
    : K_SELECT (
                ( K_JSON { is_json = true; } )?
@@ -409,12 +408,11 @@ selectStatement returns [std::unique_ptr<raw::select_statement> expr]
      ( K_LIMIT rows=intValue { limit = rows; } )?
      ( K_ALLOW K_FILTERING  { allow_filtering = true; } )?
      ( K_BYPASS K_CACHE { bypass_cache = true; })?
-      ( usingClause[attrs] )?
      {
          auto params = make_lw_shared<raw::select_statement::parameters>(std::move(orderings), is_distinct, allow_filtering, is_json, bypass_cache);
          $expr = std::make_unique<raw::select_statement>(std::move(cf), std::move(params),
            std::move(sclause), std::move(wclause), std::move(limit), std::move(per_partition_limit),
-            std::move(gbcolumns), std::move(attrs));
+            std::move(gbcolumns));
      }
    ;

@@ -523,7 +521,6 @@ usingClause[std::unique_ptr<cql3::attributes::raw>& attrs]
 usingClauseObjective[std::unique_ptr<cql3::attributes::raw>& attrs]
    : K_TIMESTAMP ts=intValue { attrs->timestamp = ts; }
    | K_TTL t=intValue { attrs->time_to_live = t; }
-    | K_TIMEOUT to=term { attrs->timeout = to; }
    ;

 /**
@@ -1764,7 +1761,6 @@ basic_unreserved_keyword returns [sstring str]
        | K_PER
        | K_PARTITION
        | K_GROUP
-        | K_TIMEOUT
        ) { $str = $k.text; }
    ;

@@ -1920,8 +1916,6 @@ K_GROUP:       G R O U P;

 K_LIKE:        L I K E;

-K_TIMEOUT:     T I M E O U T;
-
 // Case-insensitive alpha characters
 fragment A: ('a'|'A');
 fragment B: ('b'|'B');
--- a/cql3/abstract_marker.cc
+++ b/cql3/abstract_marker.cc
@@ -70,11 +70,11 @@ abstract_marker::raw::raw(int32_t bind_index)
 ::shared_ptr<term> abstract_marker::raw::prepare(database& db, const sstring& keyspace, lw_shared_ptr<column_specification> receiver) const
 {
    if (receiver->type->is_collection()) {
-        if (receiver->type->without_reversed().is_list()) {
+        if (receiver->type->get_kind() == abstract_type::kind::list) {
            return ::make_shared<lists::marker>(_bind_index, receiver);
-        } else if (receiver->type->without_reversed().is_set()) {
+        } else if (receiver->type->get_kind() == abstract_type::kind::set) {
            return ::make_shared<sets::marker>(_bind_index, receiver);
-        } else if (receiver->type->without_reversed().is_map()) {
+        } else if (receiver->type->get_kind() == abstract_type::kind::map) {
            return ::make_shared<maps::marker>(_bind_index, receiver);
        }
        assert(0);
--- a/cql3/attributes.cc
+++ b/cql3/attributes.cc
@@ -44,13 +44,12 @@
 namespace cql3 {

 std::unique_ptr<attributes> attributes::none() {
-    return std::unique_ptr<attributes>{new attributes{{}, {}, {}}};
+    return std::unique_ptr<attributes>{new attributes{{}, {}}};
 }

-attributes::attributes(::shared_ptr<term>&& timestamp, ::shared_ptr<term>&& time_to_live, ::shared_ptr<term>&& timeout)
+attributes::attributes(::shared_ptr<term>&& timestamp, ::shared_ptr<term>&& time_to_live)
    : _timestamp{std::move(timestamp)}
    , _time_to_live{std::move(time_to_live)}
-    , _timeout{std::move(timeout)}
 { }

 bool attributes::is_timestamp_set() const {
@@ -61,10 +60,6 @@ bool attributes::is_time_to_live_set() const {
    return bool(_time_to_live);
 }

-bool attributes::is_timeout_set() const {
-    return bool(_timeout);
-}
-
 int64_t attributes::get_timestamp(int64_t now, const query_options& options) {
    if (!_timestamp) {
        return now;
@@ -77,12 +72,14 @@ int64_t attributes::get_timestamp(int64_t now, const query_options& options) {
    if (tval.is_unset_value()) {
        return now;
    }
+  return with_linearized(*tval, [&] (bytes_view val) {
    try {
-        data_type_for<int64_t>()->validate(*tval, options.get_cql_serialization_format());
+        data_type_for<int64_t>()->validate(val, options.get_cql_serialization_format());
    } catch (marshal_exception& e) {
        throw exceptions::invalid_request_exception("Invalid timestamp value");
    }
-    return value_cast<int64_t>(data_type_for<int64_t>()->deserialize(*tval));
+    return value_cast<int64_t>(data_type_for<int64_t>()->deserialize(val));
+  });
 }

 int32_t attributes::get_time_to_live(const query_options& options) {
@@ -96,15 +93,16 @@ int32_t attributes::get_time_to_live(const query_options& options) {
    if (tval.is_unset_value()) {
        return 0;
    }
-
+  auto ttl = with_linearized(*tval, [&] (bytes_view val) {
    try {
-        data_type_for<int32_t>()->validate(*tval, options.get_cql_serialization_format());
+        data_type_for<int32_t>()->validate(val, options.get_cql_serialization_format());
    }
    catch (marshal_exception& e) {
        throw exceptions::invalid_request_exception("Invalid TTL value");
    }
-    auto ttl = value_cast<int32_t>(data_type_for<int32_t>()->deserialize(*tval));

+    return value_cast<int32_t>(data_type_for<int32_t>()->deserialize(val));
+  });
    if (ttl < 0) {
        throw exceptions::invalid_request_exception("A TTL must be greater or equal to 0");
    }
@@ -117,25 +115,6 @@ int32_t attributes::get_time_to_live(const query_options& options) {
    return ttl;
 }

-
-db::timeout_clock::duration attributes::get_timeout(const query_options& options) const {
-    auto timeout = _timeout->bind_and_get(options);
-    if (timeout.is_null() || timeout.is_unset_value()) {
-        throw exceptions::invalid_request_exception("Timeout value cannot be unset/null");
-    }
-    cql_duration duration = value_cast<cql_duration>(duration_type->deserialize(*timeout));
-    if (duration.months || duration.days) {
-        throw exceptions::invalid_request_exception("Timeout values cannot be expressed in days/months");
-    }
-    if (duration.nanoseconds % 1'000'000 != 0) {
-        throw exceptions::invalid_request_exception("Timeout values cannot have granularity finer than milliseconds");
-    }
-    if (duration.nanoseconds < 0) {
-        throw exceptions::invalid_request_exception("Timeout values must be non-negative");
-    }
-    return std::chrono::duration_cast<db::timeout_clock::duration>(std::chrono::nanoseconds(duration.nanoseconds));
-}
-
 void attributes::collect_marker_specification(variable_specifications& bound_names) const {
    if (_timestamp) {
        _timestamp->collect_marker_specification(bound_names);
@@ -143,16 +122,12 @@ void attributes::collect_marker_specification(variable_specifications& bound_nam
    if (_time_to_live) {
        _time_to_live->collect_marker_specification(bound_names);
    }
-    if (_timeout) {
-        _timeout->collect_marker_specification(bound_names);
-    }
 }

 std::unique_ptr<attributes> attributes::raw::prepare(database& db, const sstring& ks_name, const sstring& cf_name) const {
    auto ts = !timestamp ? ::shared_ptr<term>{} : timestamp->prepare(db, ks_name, timestamp_receiver(ks_name, cf_name));
    auto ttl = !time_to_live ? ::shared_ptr<term>{} : time_to_live->prepare(db, ks_name, time_to_live_receiver(ks_name, cf_name));
-    auto to = !timeout ? ::shared_ptr<term>{} : timeout->prepare(db, ks_name, timeout_receiver(ks_name, cf_name));
-    return std::unique_ptr<attributes>{new attributes{std::move(ts), std::move(ttl), std::move(to)}};
+    return std::unique_ptr<attributes>{new attributes{std::move(ts), std::move(ttl)}};
 }

 lw_shared_ptr<column_specification> attributes::raw::timestamp_receiver(const sstring& ks_name, const sstring& cf_name) const {
@@ -163,8 +138,4 @@ lw_shared_ptr<column_specification> attributes::raw::time_to_live_receiver(const
    return make_lw_shared<column_specification>(ks_name, cf_name, ::make_shared<column_identifier>("[ttl]", true), data_type_for<int32_t>());
 }

-lw_shared_ptr<column_specification> attributes::raw::timeout_receiver(const sstring& ks_name, const sstring& cf_name) const {
-    return make_lw_shared<column_specification>(ks_name, cf_name, ::make_shared<column_identifier>("[timeout]", true), duration_type);
-}
-
 }
--- a/cql3/attributes.hh
+++ b/cql3/attributes.hh
@@ -54,39 +54,31 @@ class attributes final {
 private:
    const ::shared_ptr<term> _timestamp;
    const ::shared_ptr<term> _time_to_live;
-    const ::shared_ptr<term> _timeout;
 public:
    static std::unique_ptr<attributes> none();
 private:
-    attributes(::shared_ptr<term>&& timestamp, ::shared_ptr<term>&& time_to_live, ::shared_ptr<term>&& timeout);
+    attributes(::shared_ptr<term>&& timestamp, ::shared_ptr<term>&& time_to_live);
 public:
    bool is_timestamp_set() const;

    bool is_time_to_live_set() const;

-    bool is_timeout_set() const;
-
    int64_t get_timestamp(int64_t now, const query_options& options);

    int32_t get_time_to_live(const query_options& options);

-    db::timeout_clock::duration get_timeout(const query_options& options) const;
-
    void collect_marker_specification(variable_specifications& bound_names) const;

    class raw final {
    public:
        ::shared_ptr<term::raw> timestamp;
        ::shared_ptr<term::raw> time_to_live;
-        ::shared_ptr<term::raw> timeout;

        std::unique_ptr<attributes> prepare(database& db, const sstring& ks_name, const sstring& cf_name) const;
    private:
        lw_shared_ptr<column_specification> timestamp_receiver(const sstring& ks_name, const sstring& cf_name) const;

        lw_shared_ptr<column_specification> time_to_live_receiver(const sstring& ks_name, const sstring& cf_name) const;
-
-        lw_shared_ptr<column_specification> timeout_receiver(const sstring& ks_name, const sstring& cf_name) const;
    };
 };

--- a/cql3/constants.hh
+++ b/cql3/constants.hh
@@ -230,7 +230,9 @@ public:
            } else if (value.is_unset_value()) {
                return;
            }
-            auto increment = value_cast<int64_t>(long_type->deserialize_value(*value));
+            auto increment = with_linearized(*value, [] (bytes_view value_view) {
+                return value_cast<int64_t>(long_type->deserialize_value(value_view));
+            });
            m.set_cell(prefix, column, make_counter_update_cell(increment, params));
        }
    };
@@ -245,7 +247,9 @@ public:
            } else if (value.is_unset_value()) {
                return;
            }
-            auto increment = value_cast<int64_t>(long_type->deserialize_value(*value));
+            auto increment = with_linearized(*value, [] (bytes_view value_view) {
+                return value_cast<int64_t>(long_type->deserialize_value(value_view));
+            });
            if (increment == std::numeric_limits<int64_t>::min()) {
                throw exceptions::invalid_request_exception(format("The negation of {:d} overflows supported counter precision (signed 8 bytes integer)", increment));
            }
--- a/cql3/expr/expression.cc
+++ b/cql3/expr/expression.cc
@@ -45,8 +45,7 @@ using boost::adaptors::transformed;

 namespace {

-static
-bytes_opt do_get_value(const schema& schema,
+std::optional<atomic_cell_value_view> do_get_value(const schema& schema,
        const column_definition& cdef,
        const partition_key& key,
        const clustering_key_prefix& ckey,
@@ -54,9 +53,9 @@ bytes_opt do_get_value(const schema& schema,
        gc_clock::time_point now) {
    switch (cdef.kind) {
        case column_kind::partition_key:
-            return to_bytes(key.get_component(schema, cdef.component_index()));
+            return atomic_cell_value_view(key.get_component(schema, cdef.component_index()));
        case column_kind::clustering_key:
-            return to_bytes(ckey.get_component(schema, cdef.component_index()));
+            return atomic_cell_value_view(ckey.get_component(schema, cdef.component_index()));
        default:
            auto cell = cells.find_cell(cdef.id);
            if (!cell) {
@@ -64,7 +63,7 @@ bytes_opt do_get_value(const schema& schema,
            }
            assert(cdef.is_atomic());
            auto c = cell->as_atomic_cell(cdef);
-            return c.is_dead(now) ? std::nullopt : bytes_opt(c.value().linearize());
+            return c.is_dead(now) ? std::nullopt : std::optional<atomic_cell_value_view>(c.value());
    }
 }

@@ -141,8 +140,9 @@ bytes_opt get_value_from_partition_slice(

 /// Returns col's value from a mutation.
 bytes_opt get_value_from_mutation(const column_value& col, row_data_from_mutation data) {
-    return do_get_value(
+    const auto v = do_get_value(
            data.schema_, *col.col, data.partition_key_, data.clustering_key_, data.other_columns, data.now);
+    return v ? v->linearize() : bytes_opt();
 }

 /// Returns col's value from the fetched data.
@@ -156,7 +156,7 @@ bytes_opt get_value(const column_value& col, const column_value_eval_bag& bag) {

 /// Type for comparing results of get_value().
 const abstract_type* get_value_comparator(const column_definition* cdef) {
-    return &cdef->type->without_reversed();
+    return cdef->type->is_reversed() ? cdef->type->underlying_type().get() : cdef->type.get();
 }

 /// Type for comparing results of get_value().
@@ -357,12 +357,16 @@ bytes_opt next_value(query::result_row_view::iterator_type& iter, const column_d
    if (cdef->type->is_multi_cell()) {
        auto cell = iter.next_collection_cell();
        if (cell) {
-            return linearized(*cell);
+            return cell->with_linearized([] (bytes_view data) {
+                return bytes(data.cbegin(), data.cend());
+            });
        }
    } else {
        auto cell = iter.next_atomic_cell();
        if (cell) {
-            return linearized(cell->value());
+            return cell->value().with_linearized([] (bytes_view data) {
+                return bytes(data.cbegin(), data.cend());
+            });
        }
    }
    return std::nullopt;
@@ -583,7 +587,7 @@ value_list get_IN_values(
        if (val == constants::UNSET_VALUE) {
            throw exceptions::invalid_request_exception(format("Invalid unset value for column {}", column_name));
        }
-        statements::request_validations::check_not_null(val, "Invalid null value for column %s", column_name);
+        statements::request_validations::check_not_null(val, "Invalid null value for IN tuple");
        return to_sorted_vector(static_pointer_cast<lists::value>(val)->get_elements() | non_null | deref, comparator);
    }
    throw std::logic_error(format("get_IN_values(single column) on invalid term {}", *t));
@@ -615,13 +619,13 @@ static constexpr bool inclusive = true, exclusive = false;
 nonwrapping_range<bytes> to_range(oper_t op, const bytes& val) {
    switch (op) {
    case oper_t::GT:
-        return nonwrapping_range<bytes>::make_starting_with(interval_bound(val, exclusive));
+        return nonwrapping_range<bytes>::make_starting_with(range_bound(val, exclusive));
    case oper_t::GTE:
-        return nonwrapping_range<bytes>::make_starting_with(interval_bound(val, inclusive));
+        return nonwrapping_range<bytes>::make_starting_with(range_bound(val, inclusive));
    case oper_t::LT:
-        return nonwrapping_range<bytes>::make_ending_with(interval_bound(val, exclusive));
+        return nonwrapping_range<bytes>::make_ending_with(range_bound(val, exclusive));
    case oper_t::LTE:
-        return nonwrapping_range<bytes>::make_ending_with(interval_bound(val, inclusive));
+        return nonwrapping_range<bytes>::make_ending_with(range_bound(val, inclusive));
    default:
        throw std::logic_error(format("to_range: unknown comparison operator {}", op));
    }
@@ -736,9 +740,9 @@ value_set possible_lhs_values(const column_definition* cdef, const expression& e
                            if (oper.op == oper_t::EQ) {
                                return value_list{*val};
                            } else if (oper.op == oper_t::GT) {
-                                return nonwrapping_range<bytes>::make_starting_with(interval_bound(*val, exclusive));
+                                return nonwrapping_range<bytes>::make_starting_with(range_bound(*val, exclusive));
                            } else if (oper.op == oper_t::GTE) {
-                                return nonwrapping_range<bytes>::make_starting_with(interval_bound(*val, inclusive));
+                                return nonwrapping_range<bytes>::make_starting_with(range_bound(*val, inclusive));
                            }
                            static const bytes MININT = serialized(std::numeric_limits<int64_t>::min()),
                                    MAXINT = serialized(std::numeric_limits<int64_t>::max());
@@ -746,9 +750,9 @@ value_set possible_lhs_values(const column_definition* cdef, const expression& e
                            // that as MAXINT for some reason.
                            const auto adjusted_val = (*val == MININT) ? serialized(MAXINT) : *val;
                            if (oper.op == oper_t::LT) {
-                                return nonwrapping_range<bytes>::make_ending_with(interval_bound(adjusted_val, exclusive));
+                                return nonwrapping_range<bytes>::make_ending_with(range_bound(adjusted_val, exclusive));
                            } else if (oper.op == oper_t::LTE) {
-                                return nonwrapping_range<bytes>::make_ending_with(interval_bound(adjusted_val, inclusive));
+                                return nonwrapping_range<bytes>::make_ending_with(range_bound(adjusted_val, inclusive));
                            }
                            throw std::logic_error(format("get_token_interval invalid operator {}", oper.op));
                        },
@@ -781,11 +785,9 @@ bool is_supported_by(const expression& expr, const secondary_index::index& idx)
                            return idx.supports_expression(*col.col, oper.op);
                        },
                        [&] (const std::vector<column_value>& cvs) {
-                            if (cvs.size() == 1) {
-                                return idx.supports_expression(*cvs[0].col, oper.op);
-                            }
-                            // We don't use index table for multi-column restrictions, as it cannot avoid filtering.
-                            return false;
+                            return boost::algorithm::any_of(cvs, [&] (const column_value& c) {
+                                return idx.supports_expression(*c.col, oper.op);
+                            });
                        },
                        [&] (const token&) { return false; },
                    }, oper.lhs);
--- a/cql3/functions/aggregate_fcts.cc
+++ b/cql3/functions/aggregate_fcts.cc
@@ -219,7 +219,7 @@ struct aggregate_type_for<simple_date_native_type> {

 template<>
 struct aggregate_type_for<timeuuid_native_type> {
-    using type = timeuuid_native_type;
+    using type = timeuuid_native_type::primary_type;
 };

 template<>
@@ -227,7 +227,6 @@ struct aggregate_type_for<time_native_type> {
    using type = time_native_type::primary_type;
 };

-// WARNING: never invoke this on temporary values; it will return a dangling reference.
 template <typename Type>
 const Type& max_wrapper(const Type& t1, const Type& t2) {
    using std::max;
@@ -242,10 +241,6 @@ inline const net::inet_address& max_wrapper(const net::inet_address& t1, const n
    return std::memcmp(t1.data(), t2.data(), len) >= 0 ? t1 : t2;
 }

-inline const timeuuid_native_type& max_wrapper(const timeuuid_native_type& t1, const timeuuid_native_type& t2) {
-    return t1.uuid.timestamp() > t2.uuid.timestamp() ? t1 : t2;
-}
-
 template <typename Type>
 class impl_max_function_for final : public aggregate_function::aggregate {
   std::optional<typename aggregate_type_for<Type>::type> _max{};
@@ -328,7 +323,6 @@ make_max_function() {
    return make_shared<max_function_for<Type>>();
 }

-// WARNING: never invoke this on temporary values; it will return a dangling reference.
 template <typename Type>
 const Type& min_wrapper(const Type& t1, const Type& t2) {
    using std::min;
@@ -343,10 +337,6 @@ inline const net::inet_address& min_wrapper(const net::inet_address& t1, const n
    return std::memcmp(t1.data(), t2.data(), len) <= 0 ? t1 : t2;
 }

-inline timeuuid_native_type min_wrapper(timeuuid_native_type t1, timeuuid_native_type t2) {
-    return t1.uuid.timestamp() < t2.uuid.timestamp() ? t1 : t2;
-}
-
 template <typename Type>
 class impl_min_function_for final : public aggregate_function::aggregate {
   std::optional<typename aggregate_type_for<Type>::type> _min{};
--- a/cql3/functions/functions.cc
+++ b/cql3/functions/functions.cc
@@ -76,7 +76,7 @@ functions::init() noexcept {
    // that has less information in it. Given how unlikely it is that
    // we will run out of memory this early, having a better core dump
    // if we do seems like a good trade-off.
-    memory::scoped_critical_alloc_section dfg;
+    memory::disable_failure_guard dfg;

    std::unordered_multimap<function_name, shared_ptr<function>> ret;
    auto declare = [&ret] (shared_ptr<function> f) { ret.emplace(f->name(), f); };
@@ -181,18 +181,13 @@ inline
 shared_ptr<function>
 make_from_json_function(database& db, const sstring& keyspace, data_type t) {
    return make_native_scalar_function<true>("fromjson", t, {utf8_type},
-            [&db, keyspace, t](cql_serialization_format sf, const std::vector<bytes_opt>& parameters) -> bytes_opt {
-        try {
-            rjson::value json_value = rjson::parse(utf8_type->to_string(parameters[0].value()));
-            bytes_opt parsed_json_value;
-            if (!json_value.IsNull()) {
-                parsed_json_value.emplace(from_json_object(*t, json_value, sf));
-            }
-            return parsed_json_value;
-        } catch(rjson::error& e) {
-            throw exceptions::function_execution_exception("fromJson",
-                format("Failed parsing fromJson parameter: {}", e.what()), keyspace, {t->name()});
+            [&db, &keyspace, t](cql_serialization_format sf, const std::vector<bytes_opt>& parameters) -> bytes_opt {
+        rjson::value json_value = rjson::parse(utf8_type->to_string(parameters[0].value()));
+        bytes_opt parsed_json_value;
+        if (!json_value.IsNull()) {
+            parsed_json_value.emplace(from_json_object(*t, json_value, sf));
        }
+        return parsed_json_value;
    });
 }

--- a/cql3/functions/native_scalar_function.hh
+++ b/cql3/functions/native_scalar_function.hh
@@ -78,22 +78,7 @@ public:
        return Pure;
    }
    virtual bytes_opt execute(cql_serialization_format sf, const std::vector<bytes_opt>& parameters) override {
-        try {
-            return _func(sf, parameters);
-        } catch(exceptions::cassandra_exception&) {
-            // If the function's code took the time to produce an official
-            // cassandra_exception, pass it through. Otherwise, below we will
-            // wrap the unknown exception in a function_execution_exception.
-            throw;
-        } catch(...) {
-            std::vector<sstring> args;
-            args.reserve(arg_types().size());
-            for (const data_type& a : arg_types()) {
-                args.push_back(a->name());
-            }
-            throw exceptions::function_execution_exception(name().name,
-                format("Failed execution of function {}: {}", name(), std::current_exception()), name().keyspace, std::move(args));
-        }
+        return _func(sf, parameters);
    }
 };

--- a/cql3/lists.cc
+++ b/cql3/lists.cc
@@ -40,7 +40,7 @@ lw_shared_ptr<column_specification>
 lists::value_spec_of(const column_specification& column) {
    return make_lw_shared<column_specification>(column.ks_name, column.cf_name,
            ::make_shared<column_identifier>(format("value({})", *column.name), true),
-                dynamic_cast<const list_type_impl&>(column.type->without_reversed()).get_elements_type());
+                dynamic_pointer_cast<const list_type_impl>(column.type)->get_elements_type());
 }

 lw_shared_ptr<column_specification>
@@ -87,7 +87,7 @@ lists::literal::prepare(database& db, const sstring& keyspace, lw_shared_ptr<col

 void
 lists::literal::validate_assignable_to(database& db, const sstring keyspace, const column_specification& receiver) const {
-    if (!receiver.type->without_reversed().is_list()) {
+    if (!dynamic_pointer_cast<const list_type_impl>(receiver.type)) {
        throw exceptions::invalid_request_exception(format("Invalid list literal for {} of type {}",
                *receiver.name, receiver.type->as_cql3_type()));
    }
@@ -125,11 +125,18 @@ lists::literal::to_string() const {

 lists::value
 lists::value::from_serialized(const fragmented_temporary_buffer::view& val, const list_type_impl& type, cql_serialization_format sf) {
+    return with_linearized(val, [&] (bytes_view v) {
+        return from_serialized(v, type, sf);
+    });
+}
+
+lists::value
+lists::value::from_serialized(bytes_view v, const list_type_impl& type, cql_serialization_format sf) {
    try {
        // Collections have this small hack that validate cannot be called on a serialized object,
        // but compose does the validation (so we're fine).
        // FIXME: deserializeForNativeProtocol()?!
-        auto l = value_cast<list_type_impl::native_type>(type.deserialize(val, sf));
+        auto l = value_cast<list_type_impl::native_type>(type.deserialize(v, sf));
        std::vector<bytes_opt> elements;
        elements.reserve(l.size());
        for (auto&& element : l) {
@@ -220,15 +227,17 @@ lists::delayed_value::bind(const query_options& options) {
 ::shared_ptr<terminal>
 lists::marker::bind(const query_options& options) {
    const auto& value = options.get_value_at(_bind_index);
-    auto& ltype = dynamic_cast<const list_type_impl&>(_receiver->type->without_reversed());
+    auto& ltype = static_cast<const list_type_impl&>(*_receiver->type);
    if (value.is_null()) {
        return nullptr;
    } else if (value.is_unset_value()) {
        return constants::UNSET_VALUE;
    } else {
        try {
-            ltype.validate(*value, options.get_cql_serialization_format());
-            return make_shared<lists::value>(value::from_serialized(*value, ltype, options.get_cql_serialization_format()));
+            return with_linearized(*value, [&] (bytes_view v) {
+                ltype.validate(v, options.get_cql_serialization_format());
+                return make_shared<lists::value>(value::from_serialized(v, ltype, options.get_cql_serialization_format()));
+            });
        } catch (marshal_exception& e) {
            throw exceptions::invalid_request_exception(
                    format("Exception while binding column {:s}: {:s}", _receiver->name->to_cql_string(), e.what()));
@@ -299,7 +308,9 @@ lists::setter_by_index::execute(mutation& m, const clustering_key_prefix& prefix
        return;
    }

-    auto idx = value_cast<int32_t>(data_type_for<int32_t>()->deserialize(*index));
+    auto idx = with_linearized(*index, [] (bytes_view v) {
+        return value_cast<int32_t>(data_type_for<int32_t>()->deserialize(v));
+    });
    auto&& existing_list_opt = params.get_prefetched_list(m.key(), prefix, column);
    if (!existing_list_opt) {
        throw exceptions::invalid_request_exception("Attempted to set an element on a list which is null");
--- a/cql3/lists.hh
+++ b/cql3/lists.hh
@@ -73,6 +73,7 @@ public:
    };

    class value : public multi_item_terminal, collection_terminal {
+        static value from_serialized(bytes_view v, const list_type_impl& type, cql_serialization_format sf);
    public:
        std::vector<bytes_opt> _elements;
    public:
--- a/cql3/maps.cc
+++ b/cql3/maps.cc
@@ -55,14 +55,14 @@ lw_shared_ptr<column_specification>
 maps::key_spec_of(const column_specification& column) {
    return make_lw_shared<column_specification>(column.ks_name, column.cf_name,
                ::make_shared<column_identifier>(format("key({})", *column.name), true),
-                dynamic_cast<const map_type_impl&>(column.type->without_reversed()).get_keys_type());
+                 dynamic_pointer_cast<const map_type_impl>(column.type)->get_keys_type());
 }

 lw_shared_ptr<column_specification>
 maps::value_spec_of(const column_specification& column) {
    return make_lw_shared<column_specification>(column.ks_name, column.cf_name,
                ::make_shared<column_identifier>(format("value({})", *column.name), true),
-                 dynamic_cast<const map_type_impl&>(column.type->without_reversed()).get_values_type());
+                 dynamic_pointer_cast<const map_type_impl>(column.type)->get_values_type());
 }

 ::shared_ptr<term>
@@ -88,9 +88,7 @@ maps::literal::prepare(database& db, const sstring& keyspace, lw_shared_ptr<colu

        values.emplace(k, v);
    }
-    delayed_value value(
-            dynamic_cast<const map_type_impl&>(receiver->type->without_reversed()).get_keys_type()->as_less_comparator(),
-            values);
+    delayed_value value(static_pointer_cast<const map_type_impl>(receiver->type)->get_keys_type()->as_less_comparator(), values);
    if (all_terminal) {
        return value.bind(query_options::DEFAULT);
    } else {
@@ -100,7 +98,7 @@ maps::literal::prepare(database& db, const sstring& keyspace, lw_shared_ptr<colu

 void
 maps::literal::validate_assignable_to(database& db, const sstring& keyspace, const column_specification& receiver) const {
-    if (!receiver.type->without_reversed().is_map()) {
+    if (!dynamic_pointer_cast<const map_type_impl>(receiver.type)) {
        throw exceptions::invalid_request_exception(format("Invalid map literal for {} of type {}", *receiver.name, receiver.type->as_cql3_type()));
    }
    auto&& key_spec = maps::key_spec_of(receiver);
@@ -160,13 +158,15 @@ maps::value::from_serialized(const fragmented_temporary_buffer::view& fragmented
        // Collections have this small hack that validate cannot be called on a serialized object,
        // but compose does the validation (so we're fine).
        // FIXME: deserialize_for_native_protocol?!
-        auto m = value_cast<map_type_impl::native_type>(type.deserialize(fragmented_value, sf));
+      return with_linearized(fragmented_value, [&] (bytes_view value) {
+        auto m = value_cast<map_type_impl::native_type>(type.deserialize(value, sf));
        std::map<bytes, bytes, serialized_compare> map(type.get_keys_type()->as_less_comparator());
        for (auto&& e : m) {
            map.emplace(type.get_keys_type()->decompose(e.first),
                        type.get_values_type()->decompose(e.second));
        }
        return maps::value { std::move(map) };
+      });
    } catch (marshal_exception& e) {
        throw exceptions::invalid_request_exception(e.what());
    }
@@ -263,16 +263,14 @@ maps::marker::bind(const query_options& options) {
        return constants::UNSET_VALUE;
    }
    try {
-        _receiver->type->validate(*val, options.get_cql_serialization_format());
+        with_linearized(*val, [&] (bytes_view value) {
+            _receiver->type->validate(value, options.get_cql_serialization_format());
+        });
    } catch (marshal_exception& e) {
        throw exceptions::invalid_request_exception(
                format("Exception while binding column {:s}: {:s}", _receiver->name->to_cql_string(), e.what()));
    }
-    return ::make_shared<maps::value>(
-            maps::value::from_serialized(
-                    *val,
-                    dynamic_cast<const map_type_impl&>(_receiver->type->without_reversed()),
-                    options.get_cql_serialization_format()));
+    return ::make_shared<maps::value>(maps::value::from_serialized(*val, static_cast<const map_type_impl&>(*_receiver->type), options.get_cql_serialization_format()));
 }

 void
--- a/cql3/query_options.cc
+++ b/cql3/query_options.cc
@@ -42,14 +42,12 @@
 #include "cql3/cql_config.hh"
 #include "query_options.hh"
 #include "version.hh"
-#include "db/consistency_level_type.hh"

 namespace cql3 {

 const cql_config default_cql_config;

-thread_local const query_options::specific_options query_options::specific_options::DEFAULT{
-    -1, {}, db::consistency_level::SERIAL, api::missing_timestamp};
+thread_local const query_options::specific_options query_options::specific_options::DEFAULT{-1, {}, {}, api::missing_timestamp};

 thread_local query_options query_options::DEFAULT{default_cql_config,
    db::consistency_level::ONE, infinite_timeout_config, std::nullopt,
--- a/cql3/query_processor.cc
+++ b/cql3/query_processor.cc
@@ -61,6 +61,8 @@ logging::logger log("query_processor");
 logging::logger prep_cache_log("prepared_statements_cache");
 logging::logger authorized_prepared_statements_cache_log("authorized_prepared_statements_cache");

+distributed<query_processor> _the_query_processor;
+
 const sstring query_processor::CQL_VERSION = "3.3.1";

 const std::chrono::minutes prepared_statements_cache::entry_expiry = std::chrono::minutes(60);
@@ -668,14 +670,10 @@ struct internal_query_state {
    bool more_results = true;
 };

-::shared_ptr<internal_query_state> query_processor::create_paged_state(
-        const sstring& query_string,
-        db::consistency_level cl,
-        const timeout_config& timeout_config,
-        const std::initializer_list<data_value>& values,
-        int32_t page_size) {
+::shared_ptr<internal_query_state> query_processor::create_paged_state(const sstring& query_string,
+        const std::initializer_list<data_value>& values, int32_t page_size) {
    auto p = prepare_internal(query_string);
-    auto opts = make_internal_options(p, values, cl, timeout_config, page_size);
+    auto opts = make_internal_options(p, values, db::consistency_level::ONE, infinite_timeout_config, page_size);
    ::shared_ptr<internal_query_state> res = ::make_shared<internal_query_state>(
            internal_query_state{
                    query_string,
@@ -939,20 +937,17 @@ bool query_processor::migration_subscriber::should_invalidate(
    return statement->depends_on_keyspace(ks_name) && (!cf_name || statement->depends_on_column_family(*cf_name));
 }

-future<> query_processor::query_internal(
+future<> query_processor::query(
        const sstring& query_string,
-        db::consistency_level cl,
-        const timeout_config& timeout_config,
        const std::initializer_list<data_value>& values,
-        int32_t page_size,
        noncopyable_function<future<stop_iteration>(const cql3::untyped_result_set_row&)>&& f) {
-    return for_each_cql_result(create_paged_state(query_string, cl, timeout_config, values, page_size), std::move(f));
+    return for_each_cql_result(create_paged_state(query_string, values), std::move(f));
 }

-future<> query_processor::query_internal(
+future<> query_processor::query(
        const sstring& query_string,
        noncopyable_function<future<stop_iteration>(const cql3::untyped_result_set_row&)>&& f) {
-    return query_internal(query_string, db::consistency_level::ONE, infinite_timeout_config, {}, 1000, std::move(f));
+    return for_each_cql_result(create_paged_state(query_string, {}), std::move(f));
 }

 }
--- a/cql3/query_processor.hh
+++ b/cql3/query_processor.hh
@@ -224,52 +224,75 @@ public:
    /*!
     * \brief iterate over all cql results using paging
     *
-     * You create a statement with optional parameters and pass
-     * a function that goes over the result rows.
+     * You Create a statement with optional paraemter and pass
+     * a function that goes over the results.
     *
-     * The passed function would be called for all rows; return future<stop_iteration::yes>
-     * to stop iteration.
+     * The passed function would be called for all the results, return stop_iteration::yes
+     * to stop during iteration.
     *
     * For example:
-            return query_internal(
-                    "SELECT * from system.compaction_history",
-                    db::consistency_level::ONE,
-                    infinite_timeout_config,
-                    {},
-                    [&history] (const cql3::untyped_result_set::row& row) mutable {
+            return query("SELECT * from system.compaction_history",
+                         [&history] (const cql3::untyped_result_set::row& row) mutable {
+                ....
+                ....
+                return stop_iteration::no;
+            });
+
+     * You can use place holder in the query, the prepared statement will only be done once.
+     *
+     *
+     * query_string - the cql string, can contain place holder
+     * f - a function to be run on each of the query result, if the function return false the iteration would stop
+     * args - arbitrary number of query parameters
+     */
+    template<typename... Args>
+    future<> query(
+            const sstring& query_string,
+            std::function<stop_iteration(const cql3::untyped_result_set_row&)>&& f,
+            Args&&... args) {
+        return for_each_cql_result(
+                create_paged_state(query_string, { data_value(std::forward<Args>(args))... }), std::move(f));
+    }
+
+    /*!
+     * \brief iterate over all cql results using paging
+     *
+     * You Create a statement with optional paraemter and pass
+     * a function that goes over the results.
+     *
+     * The passed function would be called for all the results, return future<stop_iteration::yes>
+     * to stop during iteration.
+     *
+     * For example:
+            return query("SELECT * from system.compaction_history",
+                         [&history] (const cql3::untyped_result_set::row& row) mutable {
                ....
                ....
                return make_ready_future<stop_iteration>(stop_iteration::no);
            });

-     * You can use placeholders in the query, the statement will only be prepared once.
+     * You can use place holder in the query, the prepared statement will only be done once.
     *
-     * query_string - the cql string, can contain placeholders
-     * cl - consistency level of the query
-     * timeout_config - timeout configuration
-     * values - values to be substituted for the placeholders in the query
-     * page_size - maximum page size
-     * f - a function to be run on each row of the query result,
-     *     if the function returns stop_iteration::yes the iteration will stop
+     *
+     * query_string - the cql string, can contain place holder
+     * values - query parameters value
+     * f - a function to be run on each of the query result, if the function return stop_iteration::no the iteration
+     * would stop
     */
-    future<> query_internal(
+    future<> query(
            const sstring& query_string,
-            db::consistency_level cl,
-            const timeout_config& timeout_config,
            const std::initializer_list<data_value>& values,
-            int32_t page_size,
            noncopyable_function<future<stop_iteration>(const cql3::untyped_result_set_row&)>&& f);

    /*
     * \brief iterate over all cql results using paging
-     * An overload of query_internal without query parameters
-     * using CL = ONE, no timeout, and page size = 1000.
+     * An overload of the query with future function without query parameters.
     *
-     * query_string - the cql string, can contain placeholders
-     * f - a function to be run on each row of the query result,
-     *     if the function returns stop_iteration::yes the iteration will stop
+     * query_string - the cql string, can contain place holder
+     * f - a function to be run on each of the query result, if the function return stop_iteration::no the iteration
+     * would stop
     */
-    future<> query_internal(
+    future<> query(
            const sstring& query_string,
            noncopyable_function<future<stop_iteration>(const cql3::untyped_result_set_row&)>&& f);

@@ -331,10 +354,8 @@ private:
     */
    ::shared_ptr<internal_query_state> create_paged_state(
            const sstring& query_string,
-            db::consistency_level,
-            const timeout_config&,
-            const std::initializer_list<data_value>&,
-            int32_t page_size);
+            const std::initializer_list<data_value>& = { },
+            int32_t page_size = 1000);

    /*!
     * \brief run a query using paging
@@ -443,4 +464,14 @@ private:
            ::shared_ptr<cql_statement> statement);
 };

+extern seastar::sharded<query_processor> _the_query_processor;
+
+inline seastar::sharded<query_processor>& get_query_processor() {
+    return _the_query_processor;
+}
+
+inline query_processor& get_local_query_processor() {
+    return _the_query_processor.local();
+}
+
 }
--- a/cql3/restrictions/single_column_primary_key_restrictions.hh
+++ b/cql3/restrictions/single_column_primary_key_restrictions.hh
@@ -171,7 +171,8 @@ public:

    virtual void merge_with(::shared_ptr<restriction> restriction) override {
        if (find_atom(restriction->expression, [] (const expr::binary_operator& b) {
-                    return std::holds_alternative<std::vector<expr::column_value>>(b.lhs);
+                    return std::holds_alternative<std::vector<expr::column_value>>(b.lhs)
+                            && std::get<std::vector<expr::column_value>>(b.lhs).size() > 1;
                })) {
            throw exceptions::invalid_request_exception(
                "Mixing single column relations and multi column relations on clustering columns is not allowed");
@@ -212,22 +213,30 @@ private:
    std::vector<range_type> compute_bounds(const query_options& options) const {
        std::vector<range_type> ranges;

+        static constexpr auto invalid_null_msg = std::is_same<ValueType, partition_key>::value
+            ? "Invalid null value for partition key part %s" : "Invalid null value for clustering key part %s";
+
        // TODO: rewrite this to simply invoke possible_lhs_values on each clustering column, find the first
        // non-list, and take Cartesian product of that prefix.  No need for to_range() and std::get() here.
        if (_restrictions->is_all_eq()) {
+            if (_restrictions->size() == 1) {
+                auto&& e = *restrictions().begin();
+                const auto b = std::get<expr::binary_operator>(e.second->expression).rhs->bind_and_get(options);
+                if (!b) {
+                    throw exceptions::invalid_request_exception(sprint(invalid_null_msg, e.first->name_as_text()));
+                }
+                return {range_type::make_singular(ValueType::from_single_value(*_schema, to_bytes(b)))};
+            }
            std::vector<bytes> components;
            components.reserve(_restrictions->size());
            for (auto&& e : restrictions()) {
                const column_definition* def = e.first;
                assert(components.size() == _schema->position(*def));
-                // Because _restrictions is all EQ, possible_lhs_values must return a list, not a range.
-                const auto b = std::get<expr::value_list>(possible_lhs_values(e.first, e.second->expression, options));
-                // Furthermore, this list is either a single element (when all RHSs are the same) or empty (when at
-                // least two are different, so the restrictions cannot hold simultaneously -- ie, c=1 AND c=2).
-                if (b.empty()) {
-                    return {};
+                const auto b = std::get<expr::binary_operator>(e.second->expression).rhs->bind_and_get(options);
+                if (!b) {
+                    throw exceptions::invalid_request_exception(sprint(invalid_null_msg, e.first->name_as_text()));
                }
-                components.emplace_back(b.front());
+                components.emplace_back(to_bytes(b));
            }
            return {range_type::make_singular(ValueType::from_exploded(*_schema, std::move(components)))};
        }
@@ -315,7 +324,7 @@ public:
        std::vector<bytes_opt> res;
        for (const ValueType& r : src) {
            for (const auto& component : r.components()) {
-                res.emplace_back(to_bytes(component));
+                res.emplace_back(component);
            }
        }
        return res;
--- a/cql3/restrictions/single_column_restrictions.hh
+++ b/cql3/restrictions/single_column_restrictions.hh
@@ -108,9 +108,6 @@ public:
            return bytes_opt{};
        } else {
            const auto values = std::get<expr::value_list>(possible_lhs_values(&cdef, it->second->expression, options));
-            if (values.empty()) {
-                return bytes_opt{};
-            }
            assert(values.size() == 1);
            return values.front();
        }
@@ -122,7 +119,7 @@ public:
     * @param column_def the column definition
     * @return the restriction associated to the specified column
     */
-    ::shared_ptr<single_column_restriction> get_restriction(const column_definition& column_def) const {
+    ::shared_ptr<restriction> get_restriction(const column_definition& column_def) const {
        auto i = _restrictions.find(&column_def);
        if (i == _restrictions.end()) {
            return {};
--- a/cql3/restrictions/statement_restrictions.cc
+++ b/cql3/restrictions/statement_restrictions.cc
@@ -147,6 +147,7 @@ statement_restrictions::statement_restrictions(database& db,
        const std::vector<::shared_ptr<relation>>& where_clause,
        variable_specifications& bound_names,
        bool selects_only_static_columns,
+        bool select_a_collection,
        bool for_view,
        bool allow_filtering)
    : statement_restrictions(schema, allow_filtering)
@@ -192,12 +193,12 @@ statement_restrictions::statement_restrictions(database& db,
    const expr::allow_local_index allow_local(
            !_partition_key_restrictions->has_unrestricted_components(*_schema)
            && _partition_key_restrictions->is_all_eq());
-    _has_queriable_ck_index = _clustering_columns_restrictions->has_supporting_index(sim, allow_local);
-    _has_queriable_pk_index = _partition_key_restrictions->has_supporting_index(sim, allow_local);
-    _has_queriable_regular_index = _nonprimary_key_restrictions->has_supporting_index(sim, allow_local);
+    const bool has_queriable_clustering_column_index = _clustering_columns_restrictions->has_supporting_index(sim, allow_local);
+    const bool has_queriable_pk_index = _partition_key_restrictions->has_supporting_index(sim, allow_local);
+    const bool has_queriable_regular_index = _nonprimary_key_restrictions->has_supporting_index(sim, allow_local);

    // At this point, the select statement if fully constructed, but we still have a few things to validate
-    process_partition_key_restrictions(for_view, allow_filtering);
+    process_partition_key_restrictions(has_queriable_pk_index, for_view, allow_filtering);

    // Some but not all of the partition key columns have been specified;
    // hence we need turn these restrictions into index expressions.
@@ -226,11 +227,10 @@ statement_restrictions::statement_restrictions(database& db,
        }
    }

-    process_clustering_columns_restrictions(for_view, allow_filtering);
+    process_clustering_columns_restrictions(has_queriable_clustering_column_index, select_a_collection, for_view, allow_filtering);

    // Covers indexes on the first clustering column (among others).
-    if (_is_key_range && _has_queriable_ck_index &&
-        !dynamic_pointer_cast<multi_column_restriction>(_clustering_columns_restrictions)) {
+    if (_is_key_range && has_queriable_clustering_column_index) {
        _uses_secondary_indexing = true;
    }

@@ -265,7 +265,7 @@ statement_restrictions::statement_restrictions(database& db,
    }

    if (!_nonprimary_key_restrictions->empty()) {
-        if (_has_queriable_regular_index) {
+        if (has_queriable_regular_index) {
            _uses_secondary_indexing = true;
        } else if (!allow_filtering) {
            throw exceptions::invalid_request_exception("Cannot execute this query as it might involve data filtering and "
@@ -329,39 +329,20 @@ int statement_restrictions::score(const secondary_index::index& index) const {
    return 1;
 }

-namespace {
-
-using namespace cql3::restrictions;
-
-/// If rs contains a restrictions_map of individual columns to their restrictions, returns it.  Otherwise, returns null.
-const single_column_restrictions::restrictions_map* get_individual_restrictions_map(const restrictions* rs) {
-    if (auto regular = dynamic_cast<const single_column_restrictions*>(rs)) {
-        return &regular->restrictions();
-    } else if (auto partition = dynamic_cast<const single_column_partition_key_restrictions*>(rs)) {
-        return &partition->restrictions();
-    } else if (auto clustering = dynamic_cast<const single_column_clustering_key_restrictions*>(rs)) {
-        return &clustering->restrictions();
-    }
-    return nullptr;
-}
-
-} // anonymous namespace
-
 std::pair<std::optional<secondary_index::index>, ::shared_ptr<cql3::restrictions::restrictions>> statement_restrictions::find_idx(secondary_index::secondary_index_manager& sim) const {
    std::optional<secondary_index::index> chosen_index;
    int chosen_index_score = 0;
    ::shared_ptr<cql3::restrictions::restrictions> chosen_index_restrictions;

    for (const auto& index : sim.list_indexes()) {
-        auto cdef = _schema->get_column_definition(to_bytes(index.target_column()));
        for (::shared_ptr<cql3::restrictions::restrictions> restriction : index_restrictions()) {
-            if (auto rmap = get_individual_restrictions_map(restriction.get())) {
-                const auto found = rmap->find(cdef);
-                if (found != rmap->end() && is_supported_by(found->second->expression, index)
-                    && score(index) > chosen_index_score) {
-                    chosen_index = index;
-                    chosen_index_score = score(index);
-                    chosen_index_restrictions = restriction;
+            for (const auto& cdef : restriction->get_column_defs()) {
+                if (index.depends_on(*cdef)) {
+                    if (score(index) > chosen_index_score) {
+                        chosen_index = index;
+                        chosen_index_score = score(index);
+                        chosen_index_restrictions = restriction;
+                    }
                }
            }
        }
@@ -420,7 +401,7 @@ std::vector<const column_definition*> statement_restrictions::get_column_defs_fo
    return column_defs_for_filtering;
 }

-void statement_restrictions::process_partition_key_restrictions(bool for_view, bool allow_filtering) {
+void statement_restrictions::process_partition_key_restrictions(bool has_queriable_index, bool for_view, bool allow_filtering) {
    // If there is a queriable index, no special condition are required on the other restrictions.
    // But we still need to know 2 things:
    // - If we don't have a queriable index, is the query ok
@@ -431,17 +412,17 @@ void statement_restrictions::process_partition_key_restrictions(bool for_view, b
        _is_key_range = true;
    } else if (_partition_key_restrictions->empty()) {
        _is_key_range = true;
-        _uses_secondary_indexing = _has_queriable_pk_index;
+        _uses_secondary_indexing = has_queriable_index;
    }

    if (_partition_key_restrictions->needs_filtering(*_schema)) {
-        if (!allow_filtering && !for_view && !_has_queriable_pk_index) {
+        if (!allow_filtering && !for_view && !has_queriable_index) {
            throw exceptions::invalid_request_exception("Cannot execute this query as it might involve data filtering and "
                "thus may have unpredictable performance. If you want to execute "
                "this query despite the performance unpredictability, use ALLOW FILTERING");
        }
        _is_key_range = true;
-        _uses_secondary_indexing = _has_queriable_pk_index;
+        _uses_secondary_indexing = has_queriable_index;
    }

 }
@@ -454,19 +435,23 @@ bool statement_restrictions::has_unrestricted_clustering_columns() const {
    return _clustering_columns_restrictions->has_unrestricted_components(*_schema);
 }

-void statement_restrictions::process_clustering_columns_restrictions(bool for_view, bool allow_filtering) {
+void statement_restrictions::process_clustering_columns_restrictions(bool has_queriable_index, bool select_a_collection, bool for_view, bool allow_filtering) {
    if (!has_clustering_columns_restriction()) {
        return;
    }

+    if (clustering_key_restrictions_has_IN() && select_a_collection) {
+        throw exceptions::invalid_request_exception(
+            "Cannot restrict clustering columns by IN relations when a collection is selected by the query");
+    }
    if (find_atom(_clustering_columns_restrictions->expression, expr::is_on_collection)
-        && !_has_queriable_ck_index && !allow_filtering) {
+        && !has_queriable_index && !allow_filtering) {
        throw exceptions::invalid_request_exception(
            "Cannot restrict clustering columns by a CONTAINS relation without a secondary index or filtering");
    }

    if (has_clustering_columns_restriction() && _clustering_columns_restrictions->needs_filtering(*_schema)) {
-        if (_has_queriable_ck_index) {
+        if (has_queriable_index) {
            _uses_secondary_indexing = true;
        } else if (!allow_filtering && !for_view) {
            auto clustering_columns_iter = _schema->clustering_key_columns().begin();
@@ -505,73 +490,24 @@ std::vector<query::clustering_range> statement_restrictions::get_clustering_boun
    return _clustering_columns_restrictions->bounds_ranges(options);
 }

-namespace {
-
-/// True iff get_partition_slice_for_global_index_posting_list() will be able to calculate the token value from the
-/// given restrictions.  Keep in sync with the get_partition_slice_for_global_index_posting_list() source.
-bool token_known(const statement_restrictions& r) {
-    return !r.has_partition_key_unrestricted_components() && r.get_partition_key_restrictions()->is_all_eq();
-}
-
-} // anonymous namespace
-
 bool statement_restrictions::need_filtering() const {
-    using namespace expr;
-
-    const auto npart = _partition_key_restrictions->size();
-    if (npart > 0 && npart < _schema->partition_key_size()) {
-        // Can't calculate the token value, so a naive base-table query must be filtered.  Same for any index tables,
-        // except if there's only one restriction supported by an index.
-        return !(npart == 1 && _has_queriable_pk_index &&
-                 _clustering_columns_restrictions->empty() && _nonprimary_key_restrictions->empty());
-    }
-    if (_partition_key_restrictions->needs_filtering(*_schema)) {
-        // We most likely cannot calculate token(s).  Neither base-table nor index-table queries can avoid filtering.
-        return true;
-    }
-    // Now we know the partition key is either unrestricted or fully restricted.
-
-    const auto nreg = _nonprimary_key_restrictions->size();
-    if (nreg > 1 || (nreg == 1 && !_has_queriable_regular_index)) {
-        return true; // Regular columns are unsorted in storage and no single index suffices.
-    }
-    if (nreg == 1) { // Single non-key restriction supported by an index.
-        // Will the index-table query require filtering?  That depends on whether its clustering key is restricted to a
-        // continuous range.  Recall that this clustering key is (token, pk, ck) of the base table.
-        if (npart == 0 && _clustering_columns_restrictions->empty()) {
-            return false; // No clustering key restrictions => whole partitions.
-        }
-        return !token_known(*this) || _clustering_columns_restrictions->needs_filtering(*_schema);
-    }
-    // Now we know there are no nonkey restrictions.
-
-    if (dynamic_pointer_cast<multi_column_restriction>(_clustering_columns_restrictions)) {
-        // Multicolumn bounds mean lexicographic order, implying a continuous clustering range.  Multicolumn IN means a
-        // finite set of continuous ranges.  Multicolumn restrictions cannot currently be combined with single-column
-        // clustering restrictions.  Therefore, a continuous clustering range is guaranteed.
-        return false;
+    uint32_t number_of_restricted_columns_for_indexing = 0;
+    for (auto&& restrictions : _index_restrictions) {
+        number_of_restricted_columns_for_indexing += restrictions->size();
    }

-    if (_has_queriable_ck_index && _uses_secondary_indexing) {
-        // In cases where we use an index, clustering column restrictions might cause the need for filtering.
-        // TODO: This is overly conservative, there are some cases when this returns true but filtering
-        // is not needed. Because of that the database will sometimes perform filtering when it's not actually needed.
-        // Query performance shouldn't be affected much, at most we will filter rows that are all correct.
-        // Here are some cases to consider:
-        // On a table with primary key (p, c1, c2, c3) with an index on c3
-        // WHERE c3 = ? - doesn't require filtering
-        // WHERE c1 = ? AND c2 = ? AND c3 = ? - requires filtering
-        // WHERE p = ? AND c1 = ? AND c3 = ? - doesn't require filtering, but we conservatively report it does
-        // WHERE p = ? AND c1 LIKE ? AND c3 = ? - requires filtering
-        // WHERE p = ? AND c1 = ? AND c2 LIKE ? AND c3 = ? - requires filtering
-        // WHERE p = ? AND c1 = ? AND c2 = ? AND c3 = ? - doesn't use an index
-        // WHERE p = ? AND c1 = ? AND c2 < ? AND c3 = ? - doesn't require filtering, but we report it does
-        return _clustering_columns_restrictions->size() > 1;
+    int number_of_filtering_restrictions = _nonprimary_key_restrictions->size();
+    // If the whole partition key is restricted, it does not imply filtering
+    if (_partition_key_restrictions->has_unrestricted_components(*_schema) || !_partition_key_restrictions->is_all_eq()) {
+        number_of_filtering_restrictions += _partition_key_restrictions->size() + _clustering_columns_restrictions->size();
+    } else if (_clustering_columns_restrictions->has_unrestricted_components(*_schema)) {
+        number_of_filtering_restrictions += _clustering_columns_restrictions->size() - _clustering_columns_restrictions->prefix_size();
    }
-    // Now we know that the query doesn't use an index.
-
-    // The only thing that can cause filtering now are the clustering columns.
-    return _clustering_columns_restrictions->needs_filtering(*_schema);
+    return number_of_restricted_columns_for_indexing > 1
+            || (number_of_restricted_columns_for_indexing == 0 && _partition_key_restrictions->empty() && !_clustering_columns_restrictions->empty())
+            || (number_of_restricted_columns_for_indexing != 0 && _nonprimary_key_restrictions->has_multiple_contains())
+            || (number_of_restricted_columns_for_indexing != 0 && !_uses_secondary_indexing)
+            || (_uses_secondary_indexing && number_of_filtering_restrictions > 1);
 }

 void statement_restrictions::validate_secondary_index_selections(bool selects_only_static_columns) {
--- a/cql3/restrictions/statement_restrictions.hh
+++ b/cql3/restrictions/statement_restrictions.hh
@@ -102,8 +102,6 @@ private:
     */
    bool _is_key_range = false;

-    bool _has_queriable_regular_index = false, _has_queriable_pk_index = false, _has_queriable_ck_index = false;
-
 public:
    /**
     * Creates a new empty <code>StatementRestrictions</code>.
@@ -119,6 +117,7 @@ public:
        const std::vector<::shared_ptr<relation>>& where_clause,
        variable_specifications& bound_names,
        bool selects_only_static_columns,
+        bool select_a_collection,
        bool for_view = false,
        bool allow_filtering = false);

@@ -210,15 +209,16 @@ public:
     */
    bool has_unrestricted_clustering_columns() const;
 private:
-    void process_partition_key_restrictions(bool for_view, bool allow_filtering);
+    void process_partition_key_restrictions(bool has_queriable_index, bool for_view, bool allow_filtering);

    /**
     * Processes the clustering column restrictions.
     *
     * @param has_queriable_index <code>true</code> if some of the queried data are indexed, <code>false</code> otherwise
+     * @param select_a_collection <code>true</code> if the query should return a collection column
     * @throws InvalidRequestException if the request is invalid
     */
-    void process_clustering_columns_restrictions(bool for_view, bool allow_filtering);
+    void process_clustering_columns_restrictions(bool has_queriable_index, bool select_a_collection, bool for_view, bool allow_filtering);

    /**
     * Returns the <code>Restrictions</code> for the specified type of columns.
--- a/cql3/selection/selection.hh
+++ b/cql3/selection/selection.hh
@@ -140,6 +140,21 @@ public:
        return true;
    }

+    /**
+     * Checks if this selection contains a collection.
+     *
+     * @return <code>true</code> if this selection contains a collection, <code>false</code> otherwise.
+     */
+    bool contains_a_collection() const {
+        if (!_schema->has_multi_cell_collections()) {
+            return false;
+        }
+
+        return std::any_of(_columns.begin(), _columns.end(), [] (auto&& def) {
+           return def->type->is_collection() && def->type->is_multi_cell();
+        });
+    }
+
    /**
     * Returns the index of the specified column.
     *
--- a/cql3/sets.cc
+++ b/cql3/sets.cc
@@ -31,7 +31,7 @@ lw_shared_ptr<column_specification>
 sets::value_spec_of(const column_specification& column) {
    return make_lw_shared<column_specification>(column.ks_name, column.cf_name,
            ::make_shared<column_identifier>(format("value({})", *column.name), true),
-            dynamic_cast<const set_type_impl&>(column.type->without_reversed()).get_elements_type());
+            dynamic_pointer_cast<const set_type_impl>(column.type)->get_elements_type());
 }

 shared_ptr<term>
@@ -74,8 +74,7 @@ sets::literal::prepare(database& db, const sstring& keyspace, lw_shared_ptr<colu

        values.push_back(std::move(t));
    }
-    auto compare = dynamic_cast<const set_type_impl&>(receiver->type->without_reversed())
-            .get_elements_type()->as_less_comparator();
+    auto compare = dynamic_pointer_cast<const set_type_impl>(receiver->type)->get_elements_type()->as_less_comparator();

    auto value = ::make_shared<delayed_value>(compare, std::move(values));
    if (all_terminal) {
@@ -87,7 +86,7 @@ sets::literal::prepare(database& db, const sstring& keyspace, lw_shared_ptr<colu

 void
 sets::literal::validate_assignable_to(database& db, const sstring& keyspace, const column_specification& receiver) const {
-    if (!receiver.type->without_reversed().is_set()) {
+    if (!dynamic_pointer_cast<const set_type_impl>(receiver.type)) {
        // We've parsed empty maps as a set literal to break the ambiguity so
        // handle that case now
        if (dynamic_pointer_cast<const map_type_impl>(receiver.type) && _elements.empty()) {
@@ -107,7 +106,7 @@ sets::literal::validate_assignable_to(database& db, const sstring& keyspace, con

 assignment_testable::test_result
 sets::literal::test_assignment(database& db, const sstring& keyspace, const column_specification& receiver) const {
-    if (!receiver.type->without_reversed().is_set()) {
+    if (!dynamic_pointer_cast<const set_type_impl>(receiver.type)) {
        // We've parsed empty maps as a set literal to break the ambiguity so handle that case now
        if (dynamic_pointer_cast<const map_type_impl>(receiver.type) && _elements.empty()) {
            return assignment_testable::test_result::WEAKLY_ASSIGNABLE;
@@ -138,12 +137,14 @@ sets::value::from_serialized(const fragmented_temporary_buffer::view& val, const
        // Collections have this small hack that validate cannot be called on a serialized object,
        // but compose does the validation (so we're fine).
        // FIXME: deserializeForNativeProtocol?!
-        auto s = value_cast<set_type_impl::native_type>(type.deserialize(val, sf));
+      return with_linearized(val, [&] (bytes_view v) {
+        auto s = value_cast<set_type_impl::native_type>(type.deserialize(v, sf));
        std::set<bytes, serialized_compare> elements(type.get_elements_type()->as_less_comparator());
        for (auto&& element : s) {
            elements.insert(elements.end(), type.get_elements_type()->decompose(element));
        }
        return value(std::move(elements));
+      });
    } catch (marshal_exception& e) {
        throw exceptions::invalid_request_exception(e.what());
    }
@@ -225,11 +226,8 @@ sets::delayed_value::bind(const query_options& options) {

 sets::marker::marker(int32_t bind_index, lw_shared_ptr<column_specification> receiver)
    : abstract_marker{bind_index, std::move(receiver)} {
-    if (!_receiver->type->without_reversed().is_set()) {
-        throw std::runtime_error(format("Receiver {} for set marker has wrong type: {}",
-                                        _receiver->cf_name, _receiver->type->name()));
+        assert(dynamic_cast<const set_type_impl*>(_receiver->type.get()));
    }
-}

 ::shared_ptr<terminal>
 sets::marker::bind(const query_options& options) {
@@ -239,9 +237,11 @@ sets::marker::bind(const query_options& options) {
    } else if (value.is_unset_value()) {
        return constants::UNSET_VALUE;
    } else {
-        auto& type = dynamic_cast<const set_type_impl&>(_receiver->type->without_reversed());
+        auto& type = static_cast<const set_type_impl&>(*_receiver->type);
        try {
-            type.validate(*value, options.get_cql_serialization_format());
+            with_linearized(*value, [&] (bytes_view v) {
+                type.validate(v, options.get_cql_serialization_format());
+            });
        } catch (marshal_exception& e) {
            throw exceptions::invalid_request_exception(
                    format("Exception while binding column {:s}: {:s}", _receiver->name->to_cql_string(), e.what()));
@@ -284,7 +284,8 @@ void
 sets::adder::do_add(mutation& m, const clustering_key_prefix& row_key, const update_parameters& params,
        shared_ptr<term> value, const column_definition& column) {
    auto set_value = dynamic_pointer_cast<sets::value>(std::move(value));
-    auto& set_type = dynamic_cast<const set_type_impl&>(column.type->without_reversed());
+    auto set_type = dynamic_cast<const set_type_impl*>(column.type.get());
+    assert(set_type);
    if (column.type->is_multi_cell()) {
        if (!set_value || set_value->_elements.empty()) {
            return;
@@ -294,10 +295,10 @@ sets::adder::do_add(mutation& m, const clustering_key_prefix& row_key, const upd
        collection_mutation_description mut;

        for (auto&& e : set_value->_elements) {
-            mut.cells.emplace_back(e, params.make_cell(*set_type.value_comparator(), bytes_view(), atomic_cell::collection_member::yes));
+            mut.cells.emplace_back(e, params.make_cell(*set_type->value_comparator(), bytes_view(), atomic_cell::collection_member::yes));
        }

-        m.set_cell(row_key, column, mut.serialize(set_type));
+        m.set_cell(row_key, column, mut.serialize(*set_type));
    } else if (set_value != nullptr) {
        // for frozen sets, we're overwriting the whole cell
        auto v = set_type_impl::serialize_partially_deserialized_form(
--- a/cql3/statements/alter_keyspace_statement.cc
+++ b/cql3/statements/alter_keyspace_statement.cc
@@ -45,7 +45,7 @@
 #include "db/system_keyspace.hh"
 #include "database.hh"

-bool is_system_keyspace(std::string_view keyspace);
+bool is_system_keyspace(const sstring& keyspace);

 cql3::statements::alter_keyspace_statement::alter_keyspace_statement(sstring name, ::shared_ptr<ks_prop_defs> attrs)
    : _name(name)
@@ -91,10 +91,10 @@ void cql3::statements::alter_keyspace_statement::validate(service::storage_proxy
    }
 }

-future<shared_ptr<cql_transport::event::schema_change>> cql3::statements::alter_keyspace_statement::announce_migration(service::storage_proxy& proxy) const {
+future<shared_ptr<cql_transport::event::schema_change>> cql3::statements::alter_keyspace_statement::announce_migration(service::storage_proxy& proxy, bool is_local_only) const {
    auto old_ksm = proxy.get_db().local().find_keyspace(_name).metadata();
-    const auto& tm = *proxy.get_token_metadata_ptr();
-    return service::get_local_migration_manager().announce_keyspace_update(_attrs->as_ks_metadata_update(old_ksm, tm)).then([this] {
+    const auto& tm = proxy.get_token_metadata();
+    return service::get_local_migration_manager().announce_keyspace_update(_attrs->as_ks_metadata_update(old_ksm, tm), is_local_only).then([this] {
        using namespace cql_transport;
        return ::make_shared<event::schema_change>(
                event::schema_change::change_type::UPDATED,
--- a/cql3/statements/alter_keyspace_statement.hh
+++ b/cql3/statements/alter_keyspace_statement.hh
@@ -61,7 +61,7 @@ public:

    future<> check_access(service::storage_proxy& proxy, const service::client_state& state) const override;
    void validate(service::storage_proxy& proxy, const service::client_state& state) const override;
-    future<shared_ptr<cql_transport::event::schema_change>> announce_migration(service::storage_proxy& proxy) const override;
+    future<shared_ptr<cql_transport::event::schema_change>> announce_migration(service::storage_proxy& proxy, bool is_local_only) const override;
    virtual std::unique_ptr<prepared_statement> prepare(database& db, cql_stats& stats) override;
 };

--- a/cql3/statements/alter_table_statement.cc
+++ b/cql3/statements/alter_table_statement.cc
@@ -70,9 +70,7 @@ alter_table_statement::alter_table_statement(shared_ptr<cf_name> name,
 }

 future<> alter_table_statement::check_access(service::storage_proxy& proxy, const service::client_state& state) const {
-    using cdt = auth::command_desc::type;
-    return state.has_column_family_access(proxy.local_db(), keyspace(), column_family(), auth::permission::ALTER,
-                                          _type == type::opts ? cdt::ALTER_WITH_OPTS : cdt::OTHER);
+    return state.has_column_family_access(keyspace(), column_family(), auth::permission::ALTER);
 }

 void alter_table_statement::validate(service::storage_proxy& proxy, const service::client_state& state) const
@@ -288,7 +286,7 @@ void alter_table_statement::drop_column(const schema& schema, const table& cf, s
    }
 }

-future<shared_ptr<cql_transport::event::schema_change>> alter_table_statement::announce_migration(service::storage_proxy& proxy) const
+future<shared_ptr<cql_transport::event::schema_change>> alter_table_statement::announce_migration(service::storage_proxy& proxy, bool is_local_only) const
 {
    auto& db = proxy.get_db().local();
    auto s = validation::validate_column_family(db, keyspace(), column_family());
@@ -396,7 +394,7 @@ future<shared_ptr<cql_transport::event::schema_change>> alter_table_statement::a
        break;
    }

-    return service::get_local_migration_manager().announce_column_family_update(cfm.build(), false, std::move(view_updates))
+    return service::get_local_migration_manager().announce_column_family_update(cfm.build(), false, std::move(view_updates), is_local_only)
        .then([this] {
            using namespace cql_transport;
            return ::make_shared<event::schema_change>(
--- a/cql3/statements/alter_table_statement.hh
+++ b/cql3/statements/alter_table_statement.hh
@@ -80,7 +80,7 @@ public:

    virtual future<> check_access(service::storage_proxy& proxy, const service::client_state& state) const override;
    virtual void validate(service::storage_proxy& proxy, const service::client_state& state) const override;
-    virtual future<shared_ptr<cql_transport::event::schema_change>> announce_migration(service::storage_proxy& proxy) const override;
+    virtual future<shared_ptr<cql_transport::event::schema_change>> announce_migration(service::storage_proxy& proxy, bool is_local_only) const override;
    virtual std::unique_ptr<prepared_statement> prepare(database& db, cql_stats& stats) override;
 private:
    void add_column(const schema& schema, const table& cf, schema_builder& cfm, std::vector<view_ptr>& view_updates, const column_identifier& column_name, const cql3_type validator, const column_definition* def, bool is_static) const;
--- a/cql3/statements/alter_type_statement.cc
+++ b/cql3/statements/alter_type_statement.cc
@@ -78,7 +78,7 @@ const sstring& alter_type_statement::keyspace() const
    return _name.get_keyspace();
 }

-void alter_type_statement::do_announce_migration(database& db, ::keyspace& ks) const
+void alter_type_statement::do_announce_migration(database& db, ::keyspace& ks, bool is_local_only) const
 {
    auto&& all_types = ks.metadata()->user_types().get_all_types();
    auto to_update = all_types.find(_name.get_user_type_name());
@@ -100,7 +100,7 @@ void alter_type_statement::do_announce_migration(database& db, ::keyspace& ks) c

    // Now, we need to announce the type update to basically change it for new tables using this type,
    // but we also need to find all existing user types and CF using it and change them.
-    service::get_local_migration_manager().announce_type_update(updated).get();
+    service::get_local_migration_manager().announce_type_update(updated, is_local_only).get();

    for (auto&& schema : ks.metadata()->cf_meta_data() | boost::adaptors::map_values) {
        auto cfm = schema_builder(schema);
@@ -115,21 +115,21 @@ void alter_type_statement::do_announce_migration(database& db, ::keyspace& ks) c
        }
        if (modified) {
            if (schema->is_view()) {
-                service::get_local_migration_manager().announce_view_update(view_ptr(cfm.build())).get();
+                service::get_local_migration_manager().announce_view_update(view_ptr(cfm.build()), is_local_only).get();
            } else {
-                service::get_local_migration_manager().announce_column_family_update(cfm.build(), false, {}).get();
+                service::get_local_migration_manager().announce_column_family_update(cfm.build(), false, {}, is_local_only).get();
            }
        }
    }
 }

-future<shared_ptr<cql_transport::event::schema_change>> alter_type_statement::announce_migration(service::storage_proxy& proxy) const
+future<shared_ptr<cql_transport::event::schema_change>> alter_type_statement::announce_migration(service::storage_proxy& proxy, bool is_local_only) const
 {
-    return seastar::async([this, &proxy] {
+    return seastar::async([this, &proxy, is_local_only] {
        auto&& db = proxy.get_db().local();
        try {
            auto&& ks = db.find_keyspace(keyspace());
-            do_announce_migration(db, ks);
+            do_announce_migration(db, ks, is_local_only);
            using namespace cql_transport;
            return ::make_shared<event::schema_change>(
                    event::schema_change::change_type::UPDATED,
--- a/cql3/statements/alter_type_statement.hh
+++ b/cql3/statements/alter_type_statement.hh
@@ -63,14 +63,14 @@ public:

    virtual const sstring& keyspace() const override;

-    virtual future<shared_ptr<cql_transport::event::schema_change>> announce_migration(service::storage_proxy& proxy) const override;
+    virtual future<shared_ptr<cql_transport::event::schema_change>> announce_migration(service::storage_proxy& proxy, bool is_local_only) const override;

    class add_or_alter;
    class renames;
 protected:
    virtual user_type make_updated_type(database& db, user_type to_update) const = 0;
 private:
-    void do_announce_migration(database& db, ::keyspace& ks) const;
+    void do_announce_migration(database& db, ::keyspace& ks, bool is_local_only) const;
 };

 class alter_type_statement::add_or_alter : public alter_type_statement {
--- a/cql3/statements/alter_view_statement.cc
+++ b/cql3/statements/alter_view_statement.cc
@@ -60,10 +60,9 @@ alter_view_statement::alter_view_statement(::shared_ptr<cf_name> view_name, ::sh
 future<> alter_view_statement::check_access(service::storage_proxy& proxy, const service::client_state& state) const
 {
    try {
-        const database& db = proxy.local_db();
-        auto&& s = db.find_schema(keyspace(), column_family());
+        auto&& s = proxy.get_db().local().find_schema(keyspace(), column_family());
        if (s->is_view())  {
-            return state.has_column_family_access(db, keyspace(), s->view_info()->base_name(), auth::permission::ALTER);
+            return state.has_column_family_access(keyspace(), s->view_info()->base_name(), auth::permission::ALTER);
        }
    } catch (const no_such_column_family& e) {
        // Will be validated afterwards.
@@ -76,7 +75,7 @@ void alter_view_statement::validate(service::storage_proxy&, const service::clie
    // validated in announce_migration()
 }

-future<shared_ptr<cql_transport::event::schema_change>> alter_view_statement::announce_migration(service::storage_proxy& proxy) const
+future<shared_ptr<cql_transport::event::schema_change>> alter_view_statement::announce_migration(service::storage_proxy& proxy, bool is_local_only) const
 {
    auto&& db = proxy.get_db().local();
    schema_ptr schema = validation::validate_column_family(db, keyspace(), column_family());
@@ -108,7 +107,7 @@ future<shared_ptr<cql_transport::event::schema_change>> alter_view_statement::an
                "the corresponding data in the parent table.");
    }

-    return service::get_local_migration_manager().announce_view_update(view_ptr(builder.build())).then([this] {
+    return service::get_local_migration_manager().announce_view_update(view_ptr(builder.build()), is_local_only).then([this] {
        using namespace cql_transport;

        return ::make_shared<event::schema_change>(
--- a/cql3/statements/alter_view_statement.hh
+++ b/cql3/statements/alter_view_statement.hh
@@ -63,7 +63,7 @@ public:

    virtual void validate(service::storage_proxy&, const service::client_state& state) const override;

-    virtual future<shared_ptr<cql_transport::event::schema_change>> announce_migration(service::storage_proxy& proxy) const override;
+    virtual future<shared_ptr<cql_transport::event::schema_change>> announce_migration(service::storage_proxy& proxy, bool is_local_only) const override;

    virtual std::unique_ptr<prepared_statement> prepare(database& db, cql_stats& stats) override;
 };
--- a/cql3/statements/batch_statement.cc
+++ b/cql3/statements/batch_statement.cc
@@ -38,7 +38,6 @@
 */

 #include "batch_statement.hh"
-#include "cql3/util.hh"
 #include "raw/batch_statement.hh"
 #include "db/config.hh"
 #include "db/consistency_level_validations.hh"
@@ -59,10 +58,6 @@ timeout_for_type(batch_statement::type t) {
            : &timeout_config::write_timeout;
 }

-db::timeout_clock::duration batch_statement::get_timeout(const query_options& options) const {
-    return _attrs->is_timeout_set() ? _attrs->get_timeout(options) : options.get_timeout_config().*get_timeout_config_selector();
-}
-
 batch_statement::batch_statement(int bound_terms, type type_,
                                 std::vector<single_statement> statements,
                                 std::unique_ptr<attributes> attrs,
@@ -264,7 +259,6 @@ static thread_local inheriting_concrete_execution_stage<

 future<shared_ptr<cql_transport::messages::result_message>> batch_statement::execute(
        service::storage_proxy& storage, service::query_state& state, const query_options& options) const {
-    cql3::util::validate_timestamp(options, _attrs);
    return batch_stage(this, seastar::ref(storage), seastar::ref(state),
                       seastar::cref(options), false, options.get_timestamp(state));
 }
@@ -290,7 +284,7 @@ future<shared_ptr<cql_transport::messages::result_message>> batch_statement::do_
    ++_stats.batches;
    _stats.statements_in_batches += _statements.size();

-    auto timeout = db::timeout_clock::now() + get_timeout(options);
+    auto timeout = db::timeout_clock::now() + options.get_timeout_config().*get_timeout_config_selector();
    return get_mutations(storage, options, timeout, local, now, query_state).then([this, &storage, &options, timeout, tr_state = query_state.get_trace_state(),
                                                                                                                               permit = query_state.get_permit()] (std::vector<mutation> ms) mutable {
        return execute_without_conditions(storage, std::move(ms), options.get_consistency(), timeout, std::move(tr_state), std::move(permit));
--- a/cql3/statements/batch_statement.hh
+++ b/cql3/statements/batch_statement.hh
@@ -170,8 +170,6 @@ private:
            service::storage_proxy& storage,
            const query_options& options,
            service::query_state& state) const;
-
-    db::timeout_clock::duration get_timeout(const query_options& options) const;
 public:
    // FIXME: no cql_statement::to_string() yet
 #if 0
--- a/cql3/statements/cf_prop_defs.cc
+++ b/cql3/statements/cf_prop_defs.cc
@@ -157,7 +157,6 @@ void cf_prop_defs::validate(const database& db, const schema::extensions_map& sc
    }

    validate_minimum_int(KW_DEFAULT_TIME_TO_LIVE, 0, DEFAULT_DEFAULT_TIME_TO_LIVE);
-    validate_minimum_int(KW_PAXOSGRACESECONDS, 0, DEFAULT_GC_GRACE_SECONDS);

    auto min_index_interval = get_int(KW_MIN_INDEX_INTERVAL, DEFAULT_MIN_INDEX_INTERVAL);
    auto max_index_interval = get_int(KW_MAX_INDEX_INTERVAL, DEFAULT_MAX_INDEX_INTERVAL);
--- a/cql3/statements/create_function_statement.cc
+++ b/cql3/statements/create_function_statement.cc
@@ -59,11 +59,11 @@ std::unique_ptr<prepared_statement> create_function_statement::prepare(database&
 }

 future<shared_ptr<cql_transport::event::schema_change>> create_function_statement::announce_migration(
-        service::storage_proxy& proxy) const {
+        service::storage_proxy& proxy, bool is_local_only) const {
    if (!_func) {
        return make_ready_future<::shared_ptr<cql_transport::event::schema_change>>();
    }
-    return service::get_local_migration_manager().announce_new_function(_func).then([this] {
+    return service::get_local_migration_manager().announce_new_function(_func, is_local_only).then([this] {
        return create_schema_change(*_func, true);
    });
 }
--- a/cql3/statements/create_function_statement.hh
+++ b/cql3/statements/create_function_statement.hh
@@ -29,7 +29,7 @@ namespace statements {
 class create_function_statement final : public create_function_statement_base {
    virtual std::unique_ptr<prepared_statement> prepare(database& db, cql_stats& stats) override;
    virtual future<shared_ptr<cql_transport::event::schema_change>> announce_migration(
-            service::storage_proxy& proxy) const override;
+            service::storage_proxy& proxy, bool is_local_only) const override;
    virtual void create(service::storage_proxy& proxy, functions::function* old) const override;
    sstring _language;
    sstring _body;
--- a/cql3/statements/create_index_statement.cc
+++ b/cql3/statements/create_index_statement.cc
@@ -73,7 +73,7 @@ create_index_statement::create_index_statement(::shared_ptr<cf_name> name,

 future<>
 create_index_statement::check_access(service::storage_proxy& proxy, const service::client_state& state) const {
-    return state.has_column_family_access(proxy.local_db(), keyspace(), column_family(), auth::permission::ALTER);
+    return state.has_column_family_access(keyspace(), column_family(), auth::permission::ALTER);
 }

 void
@@ -271,7 +271,7 @@ void create_index_statement::validate_targets_for_multi_column_index(std::vector
 }

 future<::shared_ptr<cql_transport::event::schema_change>>
-create_index_statement::announce_migration(service::storage_proxy& proxy) const {
+create_index_statement::announce_migration(service::storage_proxy& proxy, bool is_local_only) const {
    auto& db = proxy.get_db().local();
    auto schema = db.find_schema(keyspace(), column_family());
    std::vector<::shared_ptr<index_target>> targets;
@@ -317,7 +317,7 @@ create_index_statement::announce_migration(service::storage_proxy& proxy) const
    schema_builder builder{schema};
    builder.with_index(index);
    return service::get_local_migration_manager().announce_column_family_update(
-            builder.build(), false, {}).then([this]() {
+            builder.build(), false, {}, is_local_only).then([this]() {
        using namespace cql_transport;
        return ::make_shared<event::schema_change>(
                event::schema_change::change_type::UPDATED,
--- a/cql3/statements/create_index_statement.hh
+++ b/cql3/statements/create_index_statement.hh
@@ -79,7 +79,7 @@ public:

    future<> check_access(service::storage_proxy& proxy, const service::client_state& state) const override;
    void validate(service::storage_proxy&, const service::client_state& state) const override;
-    future<::shared_ptr<cql_transport::event::schema_change>> announce_migration(service::storage_proxy&) const override;
+    future<::shared_ptr<cql_transport::event::schema_change>> announce_migration(service::storage_proxy&, bool is_local_only) const override;

    virtual std::unique_ptr<prepared_statement> prepare(database& db, cql_stats& stats) override;
 private:
--- a/cql3/statements/create_keyspace_statement.cc
+++ b/cql3/statements/create_keyspace_statement.cc
@@ -47,7 +47,7 @@

 #include <regex>

-bool is_system_keyspace(std::string_view keyspace);
+bool is_system_keyspace(const sstring& keyspace);

 namespace cql3 {

@@ -106,11 +106,11 @@ void create_keyspace_statement::validate(service::storage_proxy&, const service:
 #endif
 }

-future<shared_ptr<cql_transport::event::schema_change>> create_keyspace_statement::announce_migration(service::storage_proxy& proxy) const
+future<shared_ptr<cql_transport::event::schema_change>> create_keyspace_statement::announce_migration(service::storage_proxy& proxy, bool is_local_only) const
 {
-    return make_ready_future<>().then([this, p = proxy.shared_from_this()] {
-        const auto& tm = *p->get_token_metadata_ptr();
-        return service::get_local_migration_manager().announce_new_keyspace(_attrs->as_ks_metadata(_name, tm));
+    return make_ready_future<>().then([this, p = proxy.shared_from_this(), is_local_only] {
+        const auto& tm = p->get_token_metadata();
+        return service::get_local_migration_manager().announce_new_keyspace(_attrs->as_ks_metadata(_name, tm), is_local_only);
    }).then_wrapped([this] (auto&& f) {
        try {
            f.get();
@@ -147,7 +147,7 @@ future<> cql3::statements::create_keyspace_statement::grant_permissions_to_creat
 future<::shared_ptr<messages::result_message>>
 create_keyspace_statement::execute(service::storage_proxy& proxy, service::query_state& state, const query_options& options) const {
    return schema_altering_statement::execute(proxy, state, options).then([this, p = proxy.shared_from_this()] (::shared_ptr<messages::result_message> msg) {
-        bool multidc = p->get_token_metadata_ptr()->get_topology().get_datacenter_endpoints().size() > 1;
+        bool multidc = p->get_token_metadata().get_topology().get_datacenter_endpoints().size() > 1;
        bool simple = _attrs->get_replication_strategy_class() == "SimpleStrategy";

        if (multidc && simple) {
--- a/cql3/statements/create_keyspace_statement.hh
+++ b/cql3/statements/create_keyspace_statement.hh
@@ -84,7 +84,7 @@ public:
     */
    virtual void validate(service::storage_proxy&, const service::client_state& state) const override;

-    virtual future<shared_ptr<cql_transport::event::schema_change>> announce_migration(service::storage_proxy& proxy) const override;
+    virtual future<shared_ptr<cql_transport::event::schema_change>> announce_migration(service::storage_proxy& proxy, bool is_local_only) const override;

    virtual std::unique_ptr<prepared_statement> prepare(database& db, cql_stats& stats) override;

--- a/cql3/statements/create_table_statement.cc
+++ b/cql3/statements/create_table_statement.cc
@@ -97,10 +97,10 @@ std::vector<column_definition> create_table_statement::get_columns() const
    return column_defs;
 }

-future<shared_ptr<cql_transport::event::schema_change>> create_table_statement::announce_migration(service::storage_proxy& proxy) const {
+future<shared_ptr<cql_transport::event::schema_change>> create_table_statement::announce_migration(service::storage_proxy& proxy, bool is_local_only) const {
    auto schema = get_cf_meta_data(proxy.get_db().local());
-    return make_ready_future<>().then([this, schema = std::move(schema)] {
-        return service::get_local_migration_manager().announce_new_column_family(std::move(schema));
+    return make_ready_future<>().then([this, is_local_only, schema = std::move(schema)] {
+        return service::get_local_migration_manager().announce_new_column_family(std::move(schema), is_local_only);
    }).then_wrapped([this] (auto&& f) {
        try {
            f.get();
@@ -204,7 +204,6 @@ std::unique_ptr<prepared_statement> create_table_statement::raw_statement::prepa
    }

    _properties.validate(db, _properties.properties()->make_schema_extensions(db.extensions()));
-    const bool has_default_ttl = _properties.properties()->get_default_time_to_live() > 0;

    auto stmt = ::make_shared<create_table_statement>(_cf_name, _properties.properties(), _if_not_exists, _static_columns, _properties.properties()->get_id());

@@ -212,11 +211,6 @@ std::unique_ptr<prepared_statement> create_table_statement::raw_statement::prepa
    for (auto&& entry : _definitions) {
        ::shared_ptr<column_identifier> id = entry.first;
        cql3_type pt = entry.second->prepare(db, keyspace());
-
-        if (has_default_ttl && pt.is_counter()) {
-            throw exceptions::invalid_request_exception("Cannot set default_time_to_live on a table with counters");
-        }
-
        if (pt.get_type()->is_multi_cell()) {
            if (pt.get_type()->is_user_type()) {
                // check for multi-cell types (non-frozen UDTs or collections) inside a non-frozen UDT
--- a/cql3/statements/create_table_statement.hh
+++ b/cql3/statements/create_table_statement.hh
@@ -102,7 +102,7 @@ public:

    virtual void validate(service::storage_proxy&, const service::client_state& state) const override;

-    virtual future<shared_ptr<cql_transport::event::schema_change>> announce_migration(service::storage_proxy& proxy) const override;
+    virtual future<shared_ptr<cql_transport::event::schema_change>> announce_migration(service::storage_proxy& proxy, bool is_local_only) const override;

    virtual std::unique_ptr<prepared_statement> prepare(database& db, cql_stats& stats) override;

--- a/cql3/statements/create_type_statement.cc
+++ b/cql3/statements/create_type_statement.cc
@@ -138,7 +138,7 @@ inline user_type create_type_statement::create_type(database& db) const
        std::move(field_names), std::move(field_types), true /* multi cell */);
 }

-future<shared_ptr<cql_transport::event::schema_change>> create_type_statement::announce_migration(service::storage_proxy& proxy) const
+future<shared_ptr<cql_transport::event::schema_change>> create_type_statement::announce_migration(service::storage_proxy& proxy, bool is_local_only) const
 {
    auto&& db = proxy.get_db().local();

@@ -152,7 +152,7 @@ future<shared_ptr<cql_transport::event::schema_change>> create_type_statement::a

    auto type = create_type(db);
    check_for_duplicate_names(type);
-    return service::get_local_migration_manager().announce_new_type(type).then([this] {
+    return service::get_local_migration_manager().announce_new_type(type, is_local_only).then([this] {
        using namespace cql_transport;

        return ::make_shared<event::schema_change>(
--- a/cql3/statements/create_type_statement.hh
+++ b/cql3/statements/create_type_statement.hh
@@ -65,7 +65,7 @@ public:

    virtual const sstring& keyspace() const override;

-    virtual future<shared_ptr<cql_transport::event::schema_change>> announce_migration(service::storage_proxy& proxy) const override;
+    virtual future<shared_ptr<cql_transport::event::schema_change>> announce_migration(service::storage_proxy& proxy, bool is_local_only) const override;

    virtual std::unique_ptr<prepared_statement> prepare(database& db, cql_stats& stats) override;

--- a/cql3/statements/create_view_statement.cc
+++ b/cql3/statements/create_view_statement.cc
@@ -89,7 +89,7 @@ create_view_statement::create_view_statement(
 }

 future<> create_view_statement::check_access(service::storage_proxy& proxy, const service::client_state& state) const {
-    return state.has_column_family_access(proxy.local_db(), keyspace(), _base_name->get_column_family(), auth::permission::ALTER);
+    return state.has_column_family_access(keyspace(), _base_name->get_column_family(), auth::permission::ALTER);
 }

 void create_view_statement::validate(service::storage_proxy& proxy, const service::client_state& state) const {
@@ -140,7 +140,7 @@ static bool validate_primary_key(
    return new_non_pk_column;
 }

-future<shared_ptr<cql_transport::event::schema_change>> create_view_statement::announce_migration(service::storage_proxy& proxy) const {
+future<shared_ptr<cql_transport::event::schema_change>> create_view_statement::announce_migration(service::storage_proxy& proxy, bool is_local_only) const {
    // We need to make sure that:
    //  - primary key includes all columns in base table's primary key
    //  - make sure that the select statement does not have anything other than columns
@@ -225,7 +225,7 @@ future<shared_ptr<cql_transport::event::schema_change>> create_view_statement::a
    }

    auto parameters = make_lw_shared<raw::select_statement::parameters>(raw::select_statement::parameters::orderings_type(), false, true);
-    raw::select_statement raw_select(_base_name, std::move(parameters), _select_clause, _where_clause, nullptr, nullptr, {}, std::make_unique<cql3::attributes::raw>());
+    raw::select_statement raw_select(_base_name, std::move(parameters), _select_clause, _where_clause, nullptr, nullptr, {});
    raw_select.prepare_keyspace(keyspace());
    raw_select.set_bound_variables({});

@@ -350,8 +350,8 @@ future<shared_ptr<cql_transport::event::schema_change>> create_view_statement::a
    auto where_clause_text = util::relations_to_where_clause(_where_clause);
    builder.with_view_info(schema->id(), schema->cf_name(), included.empty(), std::move(where_clause_text));

-    return make_ready_future<>().then([definition = view_ptr(builder.build())]() mutable {
-        return service::get_local_migration_manager().announce_new_view(definition);
+    return make_ready_future<>().then([definition = view_ptr(builder.build()), is_local_only]() mutable {
+        return service::get_local_migration_manager().announce_new_view(definition, is_local_only);
    }).then_wrapped([this] (auto&& f) {
        try {
            f.get();
--- a/cql3/statements/create_view_statement.hh
+++ b/cql3/statements/create_view_statement.hh
@@ -68,7 +68,7 @@ public:
    // Functions we need to override to subclass schema_altering_statement
    virtual future<> check_access(service::storage_proxy& proxy, const service::client_state& state) const override;
    virtual void validate(service::storage_proxy&, const service::client_state& state) const override;
-    virtual future<shared_ptr<cql_transport::event::schema_change>> announce_migration(service::storage_proxy& proxy) const override;
+    virtual future<shared_ptr<cql_transport::event::schema_change>> announce_migration(service::storage_proxy& proxy, bool is_local_only) const override;
    virtual std::unique_ptr<prepared_statement> prepare(database& db, cql_stats& stats) override;

    // FIXME: continue here. See create_table_statement.hh and CreateViewStatement.java
--- a/cql3/statements/drop_function_statement.cc
+++ b/cql3/statements/drop_function_statement.cc
@@ -33,7 +33,7 @@ std::unique_ptr<prepared_statement> drop_function_statement::prepare(database& d
 }

 future<shared_ptr<cql_transport::event::schema_change>> drop_function_statement::announce_migration(
-        service::storage_proxy& proxy) const {
+        service::storage_proxy& proxy, bool is_local_only) const {
    if (!_func) {
        return make_ready_future<shared_ptr<cql_transport::event::schema_change>>();
    }
@@ -41,7 +41,7 @@ future<shared_ptr<cql_transport::event::schema_change>> drop_function_statement:
    if (!user_func) {
        throw exceptions::invalid_request_exception(format("'{}' is not a user defined function", _func));
    }
-    return service::get_local_migration_manager().announce_function_drop(user_func).then([this] {
+    return service::get_local_migration_manager().announce_function_drop(user_func, is_local_only).then([this] {
        return create_schema_change(*_func, false);
    });
 }
--- a/cql3/statements/drop_function_statement.hh
+++ b/cql3/statements/drop_function_statement.hh
@@ -28,7 +28,7 @@ namespace statements {
 class drop_function_statement final : public drop_function_statement_base {
    virtual std::unique_ptr<prepared_statement> prepare(database& db, cql_stats& stats) override;
    virtual future<shared_ptr<cql_transport::event::schema_change>> announce_migration(
-            service::storage_proxy& proxy) const override;
+            service::storage_proxy& proxy, bool is_local_only) const override;

 public:
    drop_function_statement(functions::function_name name, std::vector<shared_ptr<cql3_type::raw>> arg_types,
--- a/cql3/statements/drop_index_statement.cc
+++ b/cql3/statements/drop_index_statement.cc
@@ -70,7 +70,7 @@ future<> drop_index_statement::check_access(service::storage_proxy& proxy, const
    if (!cfm) {
        return make_ready_future<>();
    }
-    return state.has_column_family_access(proxy.local_db(), cfm->ks_name(), cfm->cf_name(), auth::permission::ALTER);
+    return state.has_column_family_access(cfm->ks_name(), cfm->cf_name(), auth::permission::ALTER);
 }

 void drop_index_statement::validate(service::storage_proxy& proxy, const service::client_state& state) const
@@ -86,7 +86,7 @@ void drop_index_statement::validate(service::storage_proxy& proxy, const service
    }
 }

-future<shared_ptr<cql_transport::event::schema_change>> drop_index_statement::announce_migration(service::storage_proxy& proxy) const
+future<shared_ptr<cql_transport::event::schema_change>> drop_index_statement::announce_migration(service::storage_proxy& proxy, bool is_local_only) const
 {
    auto cfm = lookup_indexed_table(proxy);
    if (!cfm) {
@@ -95,7 +95,7 @@ future<shared_ptr<cql_transport::event::schema_change>> drop_index_statement::an
    ++_cql_stats->secondary_index_drops;
    auto builder = schema_builder(cfm);
    builder.without_index(_index_name);
-    return service::get_local_migration_manager().announce_column_family_update(builder.build(), false, {}).then([cfm] {
+    return service::get_local_migration_manager().announce_column_family_update(builder.build(), false, {}, is_local_only).then([cfm] {
        // Dropping an index is akin to updating the CF
        // Note that we shouldn't call columnFamily() at this point because the index has been dropped and the call to lookupIndexedTable()
        // in that method would now throw.
--- a/cql3/statements/drop_index_statement.hh
+++ b/cql3/statements/drop_index_statement.hh
@@ -72,7 +72,7 @@ public:

    virtual void validate(service::storage_proxy&, const service::client_state& state) const override;

-    virtual future<shared_ptr<cql_transport::event::schema_change>> announce_migration(service::storage_proxy& proxy) const override;
+    virtual future<shared_ptr<cql_transport::event::schema_change>> announce_migration(service::storage_proxy& proxy, bool is_local_only) const override;

    virtual std::unique_ptr<prepared_statement> prepare(database& db, cql_stats& stats) override;
 private:
--- a/cql3/statements/drop_keyspace_statement.cc
+++ b/cql3/statements/drop_keyspace_statement.cc
@@ -74,10 +74,10 @@ const sstring& drop_keyspace_statement::keyspace() const
    return _keyspace;
 }

-future<shared_ptr<cql_transport::event::schema_change>> drop_keyspace_statement::announce_migration(service::storage_proxy& proxy) const
+future<shared_ptr<cql_transport::event::schema_change>> drop_keyspace_statement::announce_migration(service::storage_proxy& proxy, bool is_local_only) const
 {
-    return make_ready_future<>().then([this] {
-        return service::get_local_migration_manager().announce_keyspace_drop(_keyspace);
+    return make_ready_future<>().then([this, is_local_only] {
+        return service::get_local_migration_manager().announce_keyspace_drop(_keyspace, is_local_only);
    }).then_wrapped([this] (auto&& f) {
        try {
            f.get();
--- a/cql3/statements/drop_keyspace_statement.hh
+++ b/cql3/statements/drop_keyspace_statement.hh
@@ -59,7 +59,7 @@ public:

    virtual const sstring& keyspace() const override;

-    virtual future<shared_ptr<cql_transport::event::schema_change>> announce_migration(service::storage_proxy& proxy) const override;
+    virtual future<shared_ptr<cql_transport::event::schema_change>> announce_migration(service::storage_proxy& proxy, bool is_local_only) const override;

    virtual std::unique_ptr<prepared_statement> prepare(database& db, cql_stats& stats) override;
 };
--- a/cql3/statements/drop_table_statement.cc
+++ b/cql3/statements/drop_table_statement.cc
@@ -58,7 +58,7 @@ future<> drop_table_statement::check_access(service::storage_proxy& proxy, const
 {
    // invalid_request_exception is only thrown synchronously.
    try {
-        return state.has_column_family_access(proxy.local_db(), keyspace(), column_family(), auth::permission::DROP);
+        return state.has_column_family_access(keyspace(), column_family(), auth::permission::DROP);
    } catch (exceptions::invalid_request_exception&) {
        if (!_if_exists) {
            throw;
@@ -72,10 +72,10 @@ void drop_table_statement::validate(service::storage_proxy&, const service::clie
    // validated in announce_migration()
 }

-future<shared_ptr<cql_transport::event::schema_change>> drop_table_statement::announce_migration(service::storage_proxy& proxy) const
+future<shared_ptr<cql_transport::event::schema_change>> drop_table_statement::announce_migration(service::storage_proxy& proxy, bool is_local_only) const
 {
-    return make_ready_future<>().then([this] {
-        return service::get_local_migration_manager().announce_column_family_drop(keyspace(), column_family());
+    return make_ready_future<>().then([this, is_local_only] {
+        return service::get_local_migration_manager().announce_column_family_drop(keyspace(), column_family(), is_local_only);
    }).then_wrapped([this] (auto&& f) {
        try {
            f.get();
--- a/cql3/statements/drop_table_statement.hh
+++ b/cql3/statements/drop_table_statement.hh
@@ -58,7 +58,7 @@ public:

    virtual void validate(service::storage_proxy&, const service::client_state& state) const override;

-    virtual future<shared_ptr<cql_transport::event::schema_change>> announce_migration(service::storage_proxy& proxy) const override;
+    virtual future<shared_ptr<cql_transport::event::schema_change>> announce_migration(service::storage_proxy& proxy, bool is_local_only) const override;

    virtual std::unique_ptr<prepared_statement> prepare(database& db, cql_stats& stats) override;
 };
--- a/cql3/statements/drop_type_statement.cc
+++ b/cql3/statements/drop_type_statement.cc
@@ -142,7 +142,7 @@ const sstring& drop_type_statement::keyspace() const
    return _name.get_keyspace();
 }

-future<shared_ptr<cql_transport::event::schema_change>> drop_type_statement::announce_migration(service::storage_proxy& proxy) const
+future<shared_ptr<cql_transport::event::schema_change>> drop_type_statement::announce_migration(service::storage_proxy& proxy, bool is_local_only) const
 {
    auto&& db = proxy.get_db().local();

@@ -157,7 +157,7 @@ future<shared_ptr<cql_transport::event::schema_change>> drop_type_statement::ann
        return make_ready_future<::shared_ptr<cql_transport::event::schema_change>>();
    }

-    return service::get_local_migration_manager().announce_type_drop(to_drop->second).then([this] {
+    return service::get_local_migration_manager().announce_type_drop(to_drop->second, is_local_only).then([this] {
        using namespace cql_transport;

        return ::make_shared<event::schema_change>(
--- a/Show More
+++ b/Show More