storage_proxy: do not touch all_replicas.front() if it's empty.

The list of all endpoints for a query can be empty if we have replication_factor 0 or there are no live endpoints for this token. Do not access all_replicas.front() in this case. Fixes #5935. Message-Id: <20200306192521.73486-2-kostja@scylladb.com> (cherry picked from commit 9827efe554)
cql transport: do not log broken pipe error when a client closes its side of a connection abruptly
2020-06-22 18:29:15 +03:00 · 2020-06-21 13:09:22 +03:00 · 2020-06-21 13:07:21 +03:00 · 2020-06-21 13:03:05 +03:00 · 2020-06-21 12:57:48 +03:00 · 2020-06-21 12:47:05 +03:00
4659 changed files with 29035 additions and 71156 deletions
--- a/.dockerignore
+++ b/.dockerignore
@@ -1,4 +1,3 @@
 .git
 build
 seastar/build
-testlog
--- a/.gitmodules
+++ b/.gitmodules
@@ -6,15 +6,12 @@
 	path = swagger-ui
 	url = ../scylla-swagger-ui
 	ignore = dirty
+[submodule "xxHash"]
+	path = xxHash
+	url = ../xxHash
 [submodule "libdeflate"]
 	path = libdeflate
 	url = ../libdeflate
-[submodule "abseil"]
-	path = abseil
-	url = ../abseil-cpp
-[submodule "scylla-jmx"]
-	path = scylla-jmx
-	url = ../scylla-jmx
-[submodule "scylla-tools"]
-	path = scylla-tools
-	url = ../scylla-tools-java
+[submodule "zstd"]
+	path = zstd
+	url = ../zstd
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -5,25 +5,13 @@
 cmake_minimum_required(VERSION 3.7)
 project(scylla)

-if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
-  message(STATUS "Setting build type to 'Release' as none was specified.")
-  set(CMAKE_BUILD_TYPE "Release" CACHE
-      STRING "Choose the type of build." FORCE)
-  # Set the possible values of build type for cmake-gui
-  set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS
-    "Debug" "Release" "Dev" "Sanitize")
-endif()
-
-if(CMAKE_BUILD_TYPE)
-    string(TOLOWER "${CMAKE_BUILD_TYPE}" BUILD_TYPE)
-else()
-    set(BUILD_TYPE "release")
-endif()
-
 if (NOT DEFINED FOR_IDE AND NOT DEFINED ENV{FOR_IDE} AND NOT DEFINED ENV{CLION_IDE})
    message(FATAL_ERROR "This CMakeLists.txt file is only valid for use in IDEs, please define FOR_IDE to acknowledge this.")
 endif()

+# Default value. A more accurate list is populated through `pkg-config` below if `seastar.pc` is available.
+set(SEASTAR_INCLUDE_DIRS "seastar")
+
 # These paths are always available, since they're included in the repository. Additional DPDK headers are placed while
 # Seastar is built, and are captured in `SEASTAR_INCLUDE_DIRS` through parsing the Seastar pkg-config file (below).
 set(SEASTAR_DPDK_INCLUDE_DIRS
@@ -34,14 +22,9 @@ set(SEASTAR_DPDK_INCLUDE_DIRS

 find_package(PkgConfig REQUIRED)

-set(ENV{PKG_CONFIG_PATH} "${CMAKE_SOURCE_DIR}/build/${BUILD_TYPE}/seastar:$ENV{PKG_CONFIG_PATH}")
+set(ENV{PKG_CONFIG_PATH} "${CMAKE_SOURCE_DIR}/seastar/build/release:$ENV{PKG_CONFIG_PATH}")
 pkg_check_modules(SEASTAR seastar)

-if(NOT SEASTAR_INCLUDE_DIRS)
-    # Default value. A more accurate list is populated through `pkg-config` below if `seastar.pc` is available.
-    set(SEASTAR_INCLUDE_DIRS "seastar/include")
-endif()
-
 find_package(Boost COMPONENTS filesystem program_options system thread)

 ##
@@ -87,7 +70,7 @@ scan_scylla_source_directories(
          seastar/json
          seastar/net
          seastar/rpc
-          seastar/testing
+          seastar/tests
          seastar/util)

 scan_scylla_source_directories(
@@ -123,7 +106,7 @@ scan_scylla_source_directories(
 scan_scylla_source_directories(
        VAR SCYLLA_GEN_SOURCE_FILES
        RECURSIVE
-        PATHS build/${BUILD_TYPE}/gen)
+        PATHS build/release/gen)

 set(SCYLLA_SOURCE_FILES
        ${SCYLLA_ROOT_SOURCE_FILES}
@@ -134,11 +117,15 @@ add_executable(scylla
        ${SEASTAR_SOURCE_FILES}
        ${SCYLLA_SOURCE_FILES})

+# Note that since CLion does not undestand GCC6 concepts, we always disable them (even if users configure otherwise).
+# CLion seems to have trouble with `-U` (macro undefinition), so we do it this way instead.
+list(REMOVE_ITEM SEASTAR_CFLAGS "-DHAVE_GCC6_CONCEPTS")
+
 # If the Seastar pkg-config information is available, append to the default flags.
 #
 # For ease of browsing the source code, we always pretend that DPDK is enabled.
 target_compile_options(scylla PUBLIC
-        -std=gnu++20
+        -std=gnu++1z
        -DHAVE_DPDK
        -DHAVE_HWLOC
        "${SEASTAR_CFLAGS}")
@@ -152,4 +139,4 @@ target_include_directories(scylla PUBLIC
        ${Boost_INCLUDE_DIRS}
        xxhash
        libdeflate
-        build/${BUILD_TYPE}/gen)
+        build/release/gen)
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -8,4 +8,4 @@ Please use the [Issue Tracker](https://github.com/scylladb/scylla/issues/) to re

 # Contributing Code to Scylla

-To contribute code to Scylla, you need to sign the [Contributor License Agreement](https://www.scylladb.com/open-source/contributor-agreement/) and send your changes as [patches](https://github.com/scylladb/scylla/wiki/Formatting-and-sending-patches) to the [mailing list](https://groups.google.com/forum/#!forum/scylladb-dev). We don't accept pull requests on GitHub.
+To contribute code to Scylla, you need to sign the [Contributor License Agreement](http://www.scylladb.com/opensource/cla/) and send your changes as [patches](https://github.com/scylladb/scylla/wiki/Formatting-and-sending-patches) to the [mailing list](https://groups.google.com/forum/#!forum/scylladb-dev). We don't accept pull requests on GitHub.
--- a/HACKING.md
+++ b/HACKING.md
@@ -18,35 +18,23 @@ $ git submodule update --init --recursive

 ### Dependencies

-Scylla is fairly fussy about its build environment, requiring a very recent
-version of the C++20 compiler and numerous tools and libraries to build.
+Scylla depends on the system package manager for its development dependencies.

-Run `./install-dependencies.sh` (as root) to use your Linux distributions's
-package manager to install the appropriate packages on your build machine.
-However, this will only work on very recent distributions. For example,
-currently Fedora users must upgrade to Fedora 32 otherwise the C++ compiler
-will be too old, and not support the new C++20 standard that Scylla uses.
+Running `./install-dependencies.sh` (as root) installs the appropriate packages based on your Linux distribution.

-Alternatively, to avoid having to upgrade your build machine or install
-various packages on it, we provide another option - the **frozen toolchain**.
-This is a script, `./tools/toolchain/dbuild`, that can execute build or run
-commands inside a Docker image that contains exactly the right build tools and
-libraries. The `dbuild` technique is useful for beginners, but is also the way
-in which ScyllaDB produces official releases, so it is highly recommended.
+On Ubuntu and Debian based Linux distributions, some packages
+required to build Scylla are missing in the official upstream:

-To use `dbuild`, you simply prefix any build or run command with it. Building
-and running Scylla becomes as easy as:
+- libthrift-dev and libthrift
+- antlr3-c++-dev

-```bash
-$ ./tools/toolchain/dbuild ./configure.py
-$ ./tools/toolchain/dbuild ninja build/release/scylla
-$ ./tools/toolchain/dbuild ./build/release/scylla --developer-mode 1
-```
+Try running ```sudo ./scripts/scylla_current_repo``` to add Scylla upstream,
+and get the missing packages from it.

 ### Build system

 **Note**: Compiling Scylla requires, conservatively, 2 GB of memory per native
-thread, and up to 3 GB per native thread while linking. GCC >= 10 is
+thread, and up to 3 GB per native thread while linking. GCC >= 8.1.1. is
 required.

 Scylla is built with [Ninja](https://ninja-build.org/), a low-level rule-based system. A Python script, `configure.py`, generates a Ninja file (`build.ninja`) based on configuration options.
@@ -153,7 +141,7 @@ In v3:
 "Tests: unit ({mode}), dtest ({smp})"
 ```

-The usual is "Tests: unit (dev)", although running debug tests is encouraged.
+The usual is "Tests: unit (release)", although running debug tests is encouraged.

 5. When answering review comments, prefer inline quotes as they make it easier to track the conversation across multiple e-mails.

--- a/NOTICE.txt
+++ b/NOTICE.txt
@@ -1,7 +1,5 @@
 This project includes code developed by the Apache Software Foundation (http://www.apache.org/),
 especially Apache Cassandra.

-It includes files from https://github.com/antonblanchard/crc32-vpmsum (author Anton Blanchard <anton@au.ibm.com>, IBM).
+It also includes files from https://github.com/antonblanchard/crc32-vpmsum (author Anton Blanchard <anton@au.ibm.com>, IBM).
 These files are located in utils/arch/powerpc/crc32-vpmsum. Their license may be found in licenses/LICENSE-crc32-vpmsum.TXT.
-
-It includes modified code from https://gitbox.apache.org/repos/asf?p=cassandra-dtest.git (owned by The Apache Software Foundation)
--- a/README.md
+++ b/README.md
@@ -2,24 +2,22 @@

 ## Quick-start

-Scylla is fairly fussy about its build environment, requiring very recent
-versions of the C++20 compiler and of many libraries to build. The document
-[HACKING.md](HACKING.md) includes detailed information on building and
-developing Scylla, but to get Scylla building quickly on (almost) any build
-machine, Scylla offers offers a [frozen toolchain](tools/toolchain/README.md),
-This is a pre-configured Docker image which includes recent versions of all
-the required compilers, libraries and build tools. Using the frozen toolchain
-allows you to avoid changing anything in your build machine to meet Scylla's
-requirements - you just need to meet the frozen toolchain's prerequisites
-(mostly, Docker or Podman being available).
-
-Building and running Scylla with the frozen toolchain is as easy as:
+To get the build going quickly, Scylla offers a [frozen toolchain](tools/toolchain/README.md)
+which would build and run Scylla using a pre-configured Docker image.
+Using the frozen toolchain will also isolate all of the installed
+dependencies in a Docker container.
+Assuming you have met the toolchain prerequisites, which is running
+Docker in user mode, building and running is as easy as:

 ```bash
 $ ./tools/toolchain/dbuild ./configure.py
 $ ./tools/toolchain/dbuild ninja build/release/scylla
 $ ./tools/toolchain/dbuild ./build/release/scylla --developer-mode 1
-```
+ ```
+
+Please see [HACKING.md](HACKING.md) for detailed information on building and developing Scylla.
+
+**Note**: GCC >= 8.1.1 is required to compile Scylla.

 ## Running Scylla

@@ -40,10 +38,6 @@ $ ./tools/toolchain/dbuild ./build/release/scylla --developer-mode 1
 ./build/release/scylla --help
 ```

-## Testing
-
-See [test.py manual](docs/testing.md).
-
 ## Scylla APIs and compatibility
 By default, Scylla is compatible with Apache Cassandra and its APIs - CQL and
 Thrift. There is also experimental support for the API of Amazon DynamoDB,
@@ -62,27 +56,41 @@ both.
 Seastar documentation can be found [here](http://docs.seastar.io/master/index.html).
 User documentation can be found [here](https://docs.scylladb.com/).

-## Training 
+## Building Fedora RPM

-Training material and online courses can be found at [Scylla University](https://university.scylladb.com/). 
-The courses are free, self-paced and include hands-on examples. They cover a variety of topics including Scylla data modeling, 
-administration, architecture, basic NoSQL concepts, using drivers for application development, Scylla setup, failover, compactions, 
-multi-datacenters and how Scylla integrates with third-party applications.
+As a pre-requisite, you need to install [Mock](https://fedoraproject.org/wiki/Mock) on your machine:

-## Building a CentOS-based Docker image
+```
+# Install mock:
+sudo yum install mock
+
+# Add user to the "mock" group:
+usermod -a -G mock $USER && newgrp mock
+```
+
+Then, to build an RPM, run:
+
+```
+./dist/redhat/build_rpm.sh
+```
+
+The built RPM is stored in ``/var/lib/mock/<configuration>/result`` directory.
+For example, on Fedora 21 mock reports the following:
+
+```
+INFO: Done(scylla-server-0.00-1.fc21.src.rpm) Config(default) 20 minutes 7 seconds
+INFO: Results and/or logs in: /var/lib/mock/fedora-21-x86_64/result
+```
+
+## Building Fedora-based Docker image

 Build a Docker image with:

 ```
-cd dist/docker/redhat
+cd dist/docker
 docker build -t <image-name> .
 ```

-This build is based on executables downloaded from downloads.scylladb.com,
-**not** on the executables built in this source directory. See further
-instructions in dist/docker/redhat/README.md to build a docker image from
-your own executables.
-
 Run the image with:

 ```
--- a/10
+++ b/10
@@ -1,7 +1,7 @@
 #!/bin/sh

 PRODUCT=scylla
-VERSION=4.2.4
+VERSION=3.3.4

 if test -f version
 then
@@ -19,14 +19,6 @@ else
 	SCYLLA_RELEASE=$SCYLLA_BUILD.$DATE.$GIT_COMMIT
 fi

-if [ -f build/SCYLLA-RELEASE-FILE ]; then
-	RELEASE_FILE=$(cat build/SCYLLA-RELEASE-FILE)
-	GIT_COMMIT_FILE=$(cat build/SCYLLA-RELEASE-FILE |cut -d . -f 3)
-	if [ "$GIT_COMMIT" = "$GIT_COMMIT_FILE" ]; then
-		exit 0
-	fi
-fi
-
 echo "$SCYLLA_VERSION-$SCYLLA_RELEASE"
 mkdir -p build
 echo "$SCYLLA_VERSION" > build/SCYLLA-VERSION-FILE
--- a/1
+++ b/1
--- a/absl-flat_hash_map.cc
+++ b/absl-flat_hash_map.cc
@@ -1,26 +0,0 @@
-/*
- * Copyright (C) 2020 ScyllaDB
- */
-
-/*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include "absl-flat_hash_map.hh"
-
-size_t sstring_hash::operator()(std::string_view v) const noexcept {
-    return absl::Hash<std::string_view>{}(v);
-}
--- a/absl-flat_hash_map.hh
+++ b/absl-flat_hash_map.hh
@@ -1,47 +0,0 @@
-/*
- * Copyright (C) 2020 ScyllaDB
- */
-
-/*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#pragma once
-
-#include <absl/container/flat_hash_map.h>
-#include <seastar/core/sstring.hh>
-
-using namespace seastar;
-
-struct sstring_hash {
-    using is_transparent = void;
-    size_t operator()(std::string_view v) const noexcept;
-};
-
-struct sstring_eq {
-    using is_transparent = void;
-    bool operator()(std::string_view a, std::string_view b) const noexcept {
-        return a == b;
-    }
-};
-
-template <typename K, typename V, typename... Ts>
-struct flat_hash_map : public absl::flat_hash_map<K, V, Ts...> {
-};
-
-template <typename V>
-struct flat_hash_map<sstring, V>
-    : public absl::flat_hash_map<sstring, V, sstring_hash, sstring_eq> {};
--- a/alternator-test/README.md
+++ b/alternator-test/README.md
--- a/alternator-test/conftest.py
+++ b/alternator-test/conftest.py
@@ -26,14 +26,6 @@ import pytest
 import boto3
 from util import create_test_table

-# When tests are run with HTTPS, the server often won't have its SSL
-# certificate signed by a known authority. So we will disable certificate
-# verification with the "verify=False" request option. However, once we do
-# that, we start getting scary-looking warning messages, saying that this
-# makes HTTPS insecure. The following silences those warnings:
-import urllib3
-urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
-
 # Test that the Boto libraries are new enough. These tests want to test a
 # large variety of DynamoDB API features, and to do this we need a new-enough
 # version of the the Boto libraries (boto3 and botocore) so that they can
@@ -54,8 +46,6 @@ def pytest_addoption(parser):
    parser.addoption("--https", action="store_true",
        help="communicate via HTTPS protocol on port 8043 instead of HTTP when"
            " running against a local Scylla installation")
-    parser.addoption("--url", action="store",
-        help="communicate with given URL instead of defaults")

 # "dynamodb" fixture: set up client object for communicating with the DynamoDB
 # API. Currently this chooses either Amazon's DynamoDB in the default region
@@ -72,15 +62,15 @@ def dynamodb(request):
        # requires us to specify dummy region and credential parameters,
        # otherwise the user is forced to properly configure ~/.aws even
        # for local runs.
-        if request.config.getoption('url') != None:
-            local_url = request.config.getoption('url')
-        else:
-            local_url = 'https://localhost:8043' if request.config.getoption('https') else 'http://localhost:8000'
+        local_url = 'https://localhost:8043' if request.config.getoption('https') else 'http://localhost:8000'
        # Disable verifying in order to be able to use self-signed TLS certificates
        verify = not request.config.getoption('https')
+        # Silencing the 'Unverified HTTPS request warning'
+        if request.config.getoption('https'):
+            import urllib3
+            urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
        return boto3.resource('dynamodb', endpoint_url=local_url, verify=verify,
-            region_name='us-east-1', aws_access_key_id='alternator', aws_secret_access_key='secret_pass',
-            config=botocore.client.Config(retries={"max_attempts": 3}))
+            region_name='us-east-1', aws_access_key_id='alternator', aws_secret_access_key='secret_pass')

 # "test_table" fixture: Create and return a temporary table to be used in tests
 # that need a table to work on. The table is automatically deleted at the end.
@@ -125,15 +115,6 @@ def test_table_s(dynamodb):
        AttributeDefinitions=[ { 'AttributeName': 'p', 'AttributeType': 'S' } ])
    yield table
    table.delete()
-# test_table_s_2 has exactly the same schema as test_table_s, and is useful
-# for tests which need two different tables with the same schema.
-@pytest.fixture(scope="session")
-def test_table_s_2(dynamodb):
-    table = create_test_table(dynamodb,
-        KeySchema=[ { 'AttributeName': 'p', 'KeyType': 'HASH' }, ],
-        AttributeDefinitions=[ { 'AttributeName': 'p', 'AttributeType': 'S' } ])
-    yield table
-    table.delete()
@pytest.fixture(scope="session")
 def test_table_b(dynamodb):
    table = create_test_table(dynamodb,
@@ -155,13 +136,6 @@ def test_table_sn(dynamodb):
        AttributeDefinitions=[ { 'AttributeName': 'p', 'AttributeType': 'S' }, { 'AttributeName': 'c', 'AttributeType': 'N' } ])
    yield table
    table.delete()
-@pytest.fixture(scope="session")
-def test_table_ss(dynamodb):
-    table = create_test_table(dynamodb,
-        KeySchema=[ { 'AttributeName': 'p', 'KeyType': 'HASH' }, { 'AttributeName': 'c', 'KeyType': 'RANGE' } ],
-        AttributeDefinitions=[ { 'AttributeName': 'p', 'AttributeType': 'S' }, { 'AttributeName': 'c', 'AttributeType': 'S' } ])
-    yield table
-    table.delete()

 # "filled_test_table" fixture:  Create a temporary table to be used in tests
 # that involve reading data - GetItem, Scan, etc. The table is filled with
@@ -203,11 +177,3 @@ def filled_test_table(dynamodb):

    yield table, items
    table.delete()
-
-# The "scylla_only" fixture can be used by tests for Scylla-only features,
-# which do not exist on AWS DynamoDB. A test using this fixture will be
-# skipped if running with "--aws".
-@pytest.fixture(scope="session")
-def scylla_only(dynamodb):
-    if dynamodb.meta.client._endpoint.host.endswith('.amazonaws.com'):
-        pytest.skip('Scylla-only feature not supported by AWS')
--- a/alternator-test/test_authorization.py
+++ b/alternator-test/test_authorization.py
@@ -59,18 +59,6 @@ def test_expired_signature(dynamodb, test_table):
    assert not response.ok
    assert "InvalidSignatureException" in response.text and "Signature expired" in response.text

-# A test verifying that missing Authorization header is handled properly
-def test_no_authorization_header(dynamodb, test_table):
-    url = dynamodb.meta.client._endpoint.host
-    print(url)
-    headers = {'Content-Type': 'application/x-amz-json-1.0',
-               'X-Amz-Date': '20170101T010101Z',
-               'X-Amz-Target': 'DynamoDB_20120810.DescribeEndpoints',
-    }
-    response = requests.post(url, headers=headers, verify=False)
-    assert not response.ok
-    assert "InvalidSignatureException" in response.text and "Authorization header" in response.text
-
 # A test ensuring that signatures that exceed current time too much are not accepted.
 # Watch out - this test is valid only for around next 1000 years, it needs to be updated later.
 def test_signature_too_futuristic(dynamodb, test_table):
--- a/alternator-test/test_batch.py
+++ b/alternator-test/test_batch.py
@@ -20,7 +20,6 @@
 # so they are actually tested by other tests as well.

 import pytest
-import random
 from botocore.exceptions import ClientError
 from util import random_string, full_scan, full_query, multiset

@@ -45,19 +44,6 @@ def test_basic_batch_write_item(test_table):
        assert item['attribute'] == str(i)
        assert item['another'] == 'xyz' 

-# Try a batch which includes both multiple writes to the same partition
-# and several partitions. The LWT code collects multiple mutations to the
-# same partition together, and we want to test that this worked correctly.
-def test_batch_write_item_mixed(test_table):
-    partitions = [random_string() for i in range(4)]
-    items = [{'p': p, 'c': str(i)} for p in partitions for i in range(4)]
-    with test_table.batch_writer() as batch:
-        # Reorder items randomly, just for the heck of it
-        for item in random.sample(items, len(items)):
-            batch.put_item(item)
-    for item in items:
-        assert test_table.get_item(Key={'p': item['p'], 'c': item['c']}, ConsistentRead=True)['Item'] == item
-
 # Test batch write to a table with only a hash key
 def test_batch_write_hash_only(test_table_s):
    items = [{'p': random_string(), 'val': random_string()} for i in range(10)]
@@ -152,20 +138,6 @@ def test_batch_write_duplicate_write_and_delete(test_table_s, test_table):
        batch.put_item({'p': p, 'c': other})
        batch.put_item({'p': other, 'c': c})

-# The BatchWriteIem API allows writing to more than one table in the same
-# batch. This test verifies that the duplicate-key checking doesn't mistake
-# updates to the same key in different tables to be duplicates.
-def test_batch_write_nonduplicate_multiple_tables(test_table_s, test_table_s_2):
-    p = random_string()
-    # The batch_writer() function used in previous tests can't write to more
-    # than one table. So we use the lower level interface boto3 gives us.
-    reply = test_table_s.meta.client.batch_write_item(RequestItems = {
-        test_table_s.name: [{'PutRequest': {'Item': {'p': p, 'a': 'hi'}}}],
-        test_table_s_2.name: [{'PutRequest': {'Item': {'p': p, 'b': 'hello'}}}]
-    })
-    assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item'] == {'p': p, 'a': 'hi'}
-    assert test_table_s_2.get_item(Key={'p': p}, ConsistentRead=True)['Item'] == {'p': p, 'b': 'hello'}
-
 # Test that BatchWriteItem's PutRequest completely replaces an existing item.
 # It shouldn't merge it with a previously existing value. See also the same
 # test for PutItem - test_put_item_replace().
@@ -210,32 +182,6 @@ def test_batch_write_invalid_operation(test_table_s):
    for p in [p1, p2]:
        assert not 'item' in test_table_s.get_item(Key={'p': p}, ConsistentRead=True)

-# In test_item.py we have a bunch of test_empty_* tests on different ways to
-# create an empty item (which in Scylla requires the special CQL row marker
-# to be supported correctly). BatchWriteItems provides yet another way of
-# creating items, so check the empty case here too:
-def test_empty_batch_write(test_table):
-    p = random_string()
-    c = random_string()
-    with test_table.batch_writer() as batch:
-        batch.put_item({'p': p, 'c': c})
-    assert test_table.get_item(Key={'p': p, 'c': c}, ConsistentRead=True)['Item'] == {'p': p, 'c': c}
-
-# Test that BatchWriteItems allows writing to multiple tables in one operation
-def test_batch_write_multiple_tables(test_table_s, test_table):
-    p1 = random_string()
-    c1 = random_string()
-    p2 = random_string()
-    # We use the low-level batch_write_item API for lack of a more convenient
-    # API (the batch_writer() API can only write to one table). At least it
-    # spares us the need to encode the key's types...
-    reply = test_table.meta.client.batch_write_item(RequestItems = {
-        test_table.name: [{'PutRequest': {'Item': {'p': p1, 'c': c1, 'a': 'hi'}}}],
-        test_table_s.name: [{'PutRequest': {'Item': {'p': p2, 'b': 'hello'}}}]
-    })
-    assert test_table.get_item(Key={'p': p1, 'c': c1}, ConsistentRead=True)['Item'] == {'p': p1, 'c': c1, 'a': 'hi'}
-    assert test_table_s.get_item(Key={'p': p2}, ConsistentRead=True)['Item'] == {'p': p2, 'b': 'hello'}
-
 # Basic test for BatchGetItem, reading several entire items.
 # Schema has both hash and sort keys.
 def test_batch_get_item(test_table):
@@ -305,16 +251,3 @@ def test_batch_get_item_projection_expression(test_table):
        got_items = reply['Responses'][test_table.name]
        expected_items = [{k: item[k] for k in wanted if k in item} for item in items]
        assert multiset(got_items) == multiset(expected_items)
-
-# Test that we return the required UnprocessedKeys/UnprocessedItems parameters
-def test_batch_unprocessed(test_table_s):
-    p = random_string()
-    write_reply = test_table_s.meta.client.batch_write_item(RequestItems = {
-        test_table_s.name: [{'PutRequest': {'Item': {'p': p, 'a': 'hi'}}}],
-    })
-    assert 'UnprocessedItems' in write_reply and write_reply['UnprocessedItems'] == dict()
-
-    read_reply = test_table_s.meta.client.batch_get_item(RequestItems = {
-        test_table_s.name: {'Keys': [{'p': p}], 'ProjectionExpression': 'p, a', 'ConsistentRead': True}
-    })
-    assert 'UnprocessedKeys' in read_reply and read_reply['UnprocessedKeys'] == dict()
--- a/alternator-test/test_condition_expression.py
+++ b/alternator-test/test_condition_expression.py
--- a/alternator-test/test_describe_endpoints.py
+++ b/alternator-test/test_describe_endpoints.py
--- a/alternator-test/test_describe_table.py
+++ b/alternator-test/test_describe_table.py
@@ -141,6 +141,7 @@ def test_describe_table_stream_specification(test_table):
 # includes which zone it is on, which account, and of course the table's
 # name. The ARN format is described in
 # https://docs.aws.amazon.com/general/latest/gr/aws-arns-and-namespaces.html#genref-arns
+@pytest.mark.xfail(reason="DescribeTable does not return ARN")
 def test_describe_table_arn(test_table):
    got = test_table.meta.client.describe_table(TableName=test_table.name)['Table']
    assert 'TableArn' in got and got['TableArn'].startswith('arn:')
--- a/alternator-test/test_expected.py
+++ b/alternator-test/test_expected.py
@@ -237,30 +237,6 @@ def test_update_expected_1_le(test_table_s):
                            'AttributeValueList': [2, 3]}}
        )

-# Comparison operators like le work only on numbers, strings or bytes.
-# As noted in issue #8043, if any other type is included in *the query*,
-# the result should be a ValidationException, but if the wrong type appears
-# in the item, not the query, the result is a failed condition.
-def test_update_expected_1_le_validation(test_table_s):
-    p = random_string()
-    test_table_s.update_item(Key={'p': p},
-        AttributeUpdates={'a': {'Value': 1, 'Action': 'PUT'},
-                          'b': {'Value': [1,2], 'Action': 'PUT'}})
-    # Bad type (a list) in the query. Result is ValidationException.
-    with pytest.raises(ClientError, match='ValidationException'):
-        test_table_s.update_item(Key={'p': p},
-            AttributeUpdates={'z': {'Value': 17, 'Action': 'PUT'}},
-            Expected={'a': {'ComparisonOperator': 'LE',
-                            'AttributeValueList': [[1,2,3]]}}
-        )
-    with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
-        test_table_s.update_item(Key={'p': p},
-            AttributeUpdates={'z': {'Value': 17, 'Action': 'PUT'}},
-            Expected={'b': {'ComparisonOperator': 'LE',
-                            'AttributeValueList': [3]}}
-        )
-    assert not 'z' in test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']
-
 # Tests for Expected with ComparisonOperator = "LT":
 def test_update_expected_1_lt(test_table_s):
    p = random_string()
@@ -546,15 +522,6 @@ def test_update_expected_1_null(test_table_s):
            Expected={'a': {'ComparisonOperator': 'NULL', 'AttributeValueList': [2]}}
        )

-# When ComparisonOperator = "NULL", AttributeValueList should be empty if it
-# exists, but as this test verifies, it may also be missing completely.
-def test_update_expected_1_null_missing_list(test_table_s):
-    p = random_string()
-    test_table_s.update_item(Key={'p': p},
-        AttributeUpdates={'a': {'Value': 2, 'Action': 'PUT'}},
-        Expected={'a': {'ComparisonOperator': 'NULL'}})
-    assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['a'] == 2
-
 # Tests for Expected with ComparisonOperator = "CONTAINS":
 def test_update_expected_1_contains(test_table_s):
    # true cases. CONTAINS can be used for two unrelated things: check substrings
@@ -631,10 +598,6 @@ def test_update_expected_1_contains(test_table_s):
            AttributeUpdates={'z': {'Value': 17, 'Action': 'PUT'}},
            Expected={'a': {'ComparisonOperator': 'CONTAINS', 'AttributeValueList': []}}
        )
-    # Strangely, while ConditionExpression's contains() allows the argument
-    # to be of any type and checks if the attribute is perhaps a list
-    # containing that item, Expected's "CONTAINS" is more limited, and
-    # refuses a list as the argument (to be searched in a list of lists)
    with pytest.raises(ClientError, match='ValidationException'):
        test_table_s.update_item(Key={'p': p},
            AttributeUpdates={'z': {'Value': 17, 'Action': 'PUT'}},
@@ -721,10 +684,6 @@ def test_update_expected_1_not_contains(test_table_s):
            AttributeUpdates={'z': {'Value': 17, 'Action': 'PUT'}},
            Expected={'a': {'ComparisonOperator': 'NOT_CONTAINS', 'AttributeValueList': []}}
        )
-    # Strangely, while ConditionExpression's contains() allows the argument
-    # to be of any type and checks if the attribute is perhaps a list
-    # containing that item, Expected's "CONTAINS" is more limited, and
-    # refuses a list as the argument (to be searched in a list of lists)
    with pytest.raises(ClientError, match='ValidationException'):
        test_table_s.update_item(Key={'p': p},
            AttributeUpdates={'z': {'Value': 17, 'Action': 'PUT'}},
@@ -918,34 +877,6 @@ def test_update_expected_1_between(test_table_s):
            AttributeUpdates={'z': {'Value': 2, 'Action': 'PUT'}},
            Expected={'d': {'ComparisonOperator': 'BETWEEN', 'AttributeValueList': [set([1]), set([2])]}})

-# BETWEEN work only on numbers, strings or bytes. As noted in issue #8043,
-# if any other type is included in *the query*, the result should be a
-# ValidationException, but if the wrong type appears in the item, not the
-# query, the result is a failed condition.
-# BETWEEN should also generate ValidationException if the two ends of the
-# range are not of the same type or not in the correct order, but this
-# already is tested in the test above (test_update_expected_1_between).
-def test_update_expected_1_between_validation(test_table_s):
-    p = random_string()
-    test_table_s.update_item(Key={'p': p},
-        AttributeUpdates={'a': {'Value': 1, 'Action': 'PUT'},
-                          'b': {'Value': [1,2], 'Action': 'PUT'}})
-    # Bad type (a list) in the query. Result is ValidationException.
-    with pytest.raises(ClientError, match='ValidationException'):
-        test_table_s.update_item(Key={'p': p},
-            AttributeUpdates={'z': {'Value': 17, 'Action': 'PUT'}},
-            Expected={'a': {'ComparisonOperator': 'BETWEEN',
-                            'AttributeValueList': [[1,2,3], [2,3,4]]}}
-        )
-    with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
-        test_table_s.update_item(Key={'p': p},
-            AttributeUpdates={'z': {'Value': 17, 'Action': 'PUT'}},
-            Expected={'b': {'ComparisonOperator': 'BETWEEN',
-                            'AttributeValueList': [1,2]}}
-        )
-    assert not 'z' in test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']
-
-
 ##############################################################################
 # Instead of ComparisonOperator and AttributeValueList, one can specify either
 # Value or Exists:
@@ -1121,19 +1052,6 @@ def test_update_expected_empty(test_table_s):
            AttributeUpdates={'z': {'Value': 4, 'Action': 'PUT'}},
            Expected={}, ConditionalOperator='AND')

-# Specifying ConditionalOperator is forbidden if the "Expected" Attribute
-# is missing:
-def test_conditional_operator_expected_missing(test_table_s):
-    p = random_string()
-    with pytest.raises(ClientError, match='ValidationException.*ConditionalOperator'):
-        test_table_s.update_item(Key={'p': p},
-            AttributeUpdates={'z': {'Value': 4, 'Action': 'PUT'}},
-            ConditionalOperator='OR')
-    with pytest.raises(ClientError, match='ValidationException.*ConditionalOperator'):
-        test_table_s.update_item(Key={'p': p},
-            AttributeUpdates={'z': {'Value': 4, 'Action': 'PUT'}},
-            ConditionalOperator='AND')
-
 # All of the above tests tested "Expected" with the UpdateItem operation.
 # We now want to test that it works also with the PutItem and DeleteItems
 # operations. We don't need to check again all the different sub-cases tested
@@ -1159,42 +1077,3 @@ def test_put_item_expected(test_table_s):
    assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item'] == {'p': p, 'a': 2}
    with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
        test_table_s.put_item(Item={'p': p, 'a': 3}, Expected={'a': {'Value': 1}})
-
-# Reproducer for issue #6573: binary strings should be ordered as unsigned
-# bytes, i.e., byte 128 comes after 127, not before as with signed bytes.
-# Test the five ordering operators: LT, LE, GT, GE, BETWEEN
-def test_update_expected_unsigned_bytes(test_table_s):
-    p = random_string()
-    test_table_s.put_item(Item={'p': p, 'b': bytearray([127])})
-    test_table_s.update_item(Key={'p': p},
-        AttributeUpdates={'z': {'Value': 1, 'Action': 'PUT'}},
-        Expected={'b': {'ComparisonOperator': 'LT',
-                        'AttributeValueList': [bytearray([128])]}}
-    )
-    assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['z'] == 1
-    test_table_s.update_item(Key={'p': p},
-        AttributeUpdates={'z': {'Value': 2, 'Action': 'PUT'}},
-        Expected={'b': {'ComparisonOperator': 'LE',
-                        'AttributeValueList': [bytearray([128])]}}
-    )
-    assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['z'] == 2
-    test_table_s.update_item(Key={'p': p},
-        AttributeUpdates={'z': {'Value': 3, 'Action': 'PUT'}},
-        Expected={'b': {'ComparisonOperator': 'BETWEEN',
-                        'AttributeValueList': [bytearray([126]), bytearray([128])]}}
-    )
-    assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['z'] == 3
-
-    test_table_s.put_item(Item={'p': p, 'b': bytearray([128])})
-    test_table_s.update_item(Key={'p': p},
-        AttributeUpdates={'z': {'Value': 4, 'Action': 'PUT'}},
-        Expected={'b': {'ComparisonOperator': 'GT',
-                        'AttributeValueList': [bytearray([127])]}}
-    )
-    assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['z'] == 4
-    test_table_s.update_item(Key={'p': p},
-        AttributeUpdates={'z': {'Value': 5, 'Action': 'PUT'}},
-        Expected={'b': {'ComparisonOperator': 'GE',
-                        'AttributeValueList': [bytearray([127])]}}
-    )
-    assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['z'] == 5
--- a/alternator-test/test_gsi.py
+++ b/alternator-test/test_gsi.py
@@ -37,25 +37,22 @@ from util import create_test_table, random_string, full_scan, full_query, multis
 # retry.
 def assert_index_query(table, index_name, expected_items, **kwargs):
    for i in range(3):
-        if multiset(expected_items) == multiset(full_query(table, IndexName=index_name, ConsistentRead=False, **kwargs)):
+        if multiset(expected_items) == multiset(full_query(table, IndexName=index_name, **kwargs)):
            return
        print('assert_index_query retrying')
        time.sleep(1)
-    assert multiset(expected_items) == multiset(full_query(table, IndexName=index_name, ConsistentRead=False, **kwargs))
+    assert multiset(expected_items) == multiset(full_query(table, IndexName=index_name, **kwargs))

 def assert_index_scan(table, index_name, expected_items, **kwargs):
    for i in range(3):
-        if multiset(expected_items) == multiset(full_scan(table, IndexName=index_name, ConsistentRead=False, **kwargs)):
+        if multiset(expected_items) == multiset(full_scan(table, IndexName=index_name, **kwargs)):
            return
        print('assert_index_scan retrying')
        time.sleep(1)
-    assert multiset(expected_items) == multiset(full_scan(table, IndexName=index_name, ConsistentRead=False, **kwargs))
+    assert multiset(expected_items) == multiset(full_scan(table, IndexName=index_name, **kwargs))

 # Although quite silly, it is actually allowed to create an index which is
 # identical to the base table.
-# The following test does not work for KA/LA tables due to #6157,
-# so it's hereby skipped.
-@pytest.mark.skip
 def test_gsi_identical(dynamodb):
    table = create_test_table(dynamodb,
        KeySchema=[ { 'AttributeName': 'p', 'KeyType': 'HASH' }],
@@ -74,9 +71,9 @@ def test_gsi_identical(dynamodb):
    # results (in different order).
    assert multiset(items) == multiset(full_scan(table))
    assert_index_scan(table, 'hello', items)
-    # We can't scan a non-existent index
+    # We can't scan a non-existant index
    with pytest.raises(ClientError, match='ValidationException'):
-        full_scan(table, ConsistentRead=False, IndexName='wrong')
+        full_scan(table, IndexName='wrong')
    table.delete()

 # One of the simplest forms of a non-trivial GSI: The base table has a hash
@@ -153,18 +150,13 @@ def test_gsi_missing_table(dynamodb):
        dynamodb.meta.client.scan(TableName='nonexistent_table', IndexName='any_name')

 # Verify that strongly-consistent reads on GSI are *not* allowed.
+@pytest.mark.xfail(reason="GSI strong consistency not checked")
 def test_gsi_strong_consistency(test_table_gsi_1):
    with pytest.raises(ClientError, match='ValidationException.*Consistent'):
        full_query(test_table_gsi_1, KeyConditions={'c': {'AttributeValueList': ['hi'], 'ComparisonOperator': 'EQ'}}, IndexName='hello', ConsistentRead=True)
    with pytest.raises(ClientError, match='ValidationException.*Consistent'):
        full_scan(test_table_gsi_1, IndexName='hello', ConsistentRead=True)

-# Test that setting an indexed string column to an empty string is illegal,
-# since keys cannot contain empty strings
-def test_gsi_empty_value(test_table_gsi_2):
-    with pytest.raises(ClientError, match='ValidationException.*empty'):
-        test_table_gsi_2.put_item(Item={'p': random_string(), 'x': ''})
-
 # Verify that a GSI is correctly listed in describe_table
@pytest.mark.xfail(reason="DescribeTable provides index names only, no size or item count")
 def test_gsi_describe(test_table_gsi_1):
@@ -295,8 +287,8 @@ def test_gsi_missing_attribute(test_table_gsi_2):
    test_table_gsi_2.put_item(Item={'p':  p2})

    # Both items are now in the base table:
-    assert test_table_gsi_2.get_item(Key={'p':  p1}, ConsistentRead=True)['Item'] == {'p': p1, 'x': x1}
-    assert test_table_gsi_2.get_item(Key={'p':  p2}, ConsistentRead=True)['Item'] == {'p': p2}
+    assert test_table_gsi_2.get_item(Key={'p':  p1})['Item'] == {'p': p1, 'x': x1}
+    assert test_table_gsi_2.get_item(Key={'p':  p2})['Item'] == {'p': p2}

    # But only the first item is in the index: It can be found using a
    # Query, and a scan of the index won't find it (but a scan on the base
@@ -308,7 +300,7 @@ def test_gsi_missing_attribute(test_table_gsi_2):
    # and item will "never" appear in the index. We do this test last,
    # so if we had a bug and such item did appear, hopefully we had enough
    # time for the bug to become visible. At least sometimes.
-    assert not any([i['p'] == p2 for i in full_scan(test_table_gsi_2, ConsistentRead=False, IndexName='hello')])
+    assert not any([i['p'] == p2 for i in full_scan(test_table_gsi_2, IndexName='hello')])

 # Test when a table has a GSI, if the indexed attribute has the wrong type,
 # the update operation is rejected, and is added to neither base table nor
@@ -410,20 +402,20 @@ def test_gsi_missing_attribute_3(test_table_gsi_3):
    # First, add an item with a missing "a" value. It should appear in the
    # base table, but not in the index:
    test_table_gsi_3.put_item(Item={'p':  p, 'b': b})
-    assert test_table_gsi_3.get_item(Key={'p':  p}, ConsistentRead=True)['Item'] == {'p': p, 'b': b}
+    assert test_table_gsi_3.get_item(Key={'p':  p})['Item'] == {'p': p, 'b': b}
    # Note: with eventually consistent read, we can't really be sure that
    # an item will "never" appear in the index. We hope that if a bug exists
    # and such an item did appear, sometimes the delay here will be enough
    # for the unexpected item to become visible.
-    assert not any([i['p'] == p for i in full_scan(test_table_gsi_3, ConsistentRead=False, IndexName='hello')])
+    assert not any([i['p'] == p for i in full_scan(test_table_gsi_3, IndexName='hello')])
    # Same thing for an item with a missing "b" value:
    test_table_gsi_3.put_item(Item={'p':  p, 'a': a})
-    assert test_table_gsi_3.get_item(Key={'p':  p}, ConsistentRead=True)['Item'] == {'p': p, 'a': a}
-    assert not any([i['p'] == p for i in full_scan(test_table_gsi_3, ConsistentRead=False, IndexName='hello')])
+    assert test_table_gsi_3.get_item(Key={'p':  p})['Item'] == {'p': p, 'a': a}
+    assert not any([i['p'] == p for i in full_scan(test_table_gsi_3, IndexName='hello')])
    # And for an item missing both:
    test_table_gsi_3.put_item(Item={'p':  p})
-    assert test_table_gsi_3.get_item(Key={'p':  p}, ConsistentRead=True)['Item'] == {'p': p}
-    assert not any([i['p'] == p for i in full_scan(test_table_gsi_3, ConsistentRead=False, IndexName='hello')])
+    assert test_table_gsi_3.get_item(Key={'p':  p})['Item'] == {'p': p}
+    assert not any([i['p'] == p for i in full_scan(test_table_gsi_3, IndexName='hello')])

 # A fourth scenario of GSI. Two GSIs on a single base table.
@pytest.fixture(scope="session")
@@ -735,10 +727,10 @@ def test_gsi_backfill(dynamodb):
    # assert_index_scan() or assert_index_query() functions) because after
    # we waited for backfilling to complete, we know all the pre-existing
    # data is already in the index.
-    assert multiset(items1) == multiset(full_scan(table, ConsistentRead=False, IndexName='hello'))
+    assert multiset(items1) == multiset(full_scan(table, IndexName='hello'))
    # We can also use Query on the new GSI, to search on the attribute x:
    assert multiset([items1[3]]) == multiset(full_query(table,
-        ConsistentRead=False, IndexName='hello',
+        IndexName='hello',
        KeyConditions={'x': {'AttributeValueList': [items1[3]['x']], 'ComparisonOperator': 'EQ'}}))
    # Let's also test that we cannot add another index with the same name
    # that already exists
@@ -785,7 +777,7 @@ def test_gsi_delete(dynamodb):
    wait_for_gsi_gone(table, 'hello')
    # Now index is gone. We cannot query using it.
    with pytest.raises(ClientError, match='ValidationException.*hello'):
-        full_query(table, ConsistentRead=False, IndexName='hello',
+        full_query(table, IndexName='hello',
            KeyConditions={'x': {'AttributeValueList': [items[3]['x']], 'ComparisonOperator': 'EQ'}})
    table.delete()

--- a/alternator-test/test_health.py
+++ b/alternator-test/test_health.py
@@ -22,7 +22,7 @@ import requests
 # Test that a health check can be performed with a GET packet
 def test_health_works(dynamodb):
    url = dynamodb.meta.client._endpoint.host
-    response = requests.get(url, verify=False)
+    response = requests.get(url)
    assert response.ok
    assert response.content.decode('utf-8').strip()  == 'healthy: {}'.format(url.replace('https://', '').replace('http://', ''))

--- a/alternator-test/test_item.py
+++ b/alternator-test/test_item.py
@@ -0,0 +1,402 @@
+# Copyright 2019 ScyllaDB
+#
+# This file is part of Scylla.
+#
+# Scylla is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# Scylla is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+
+# Tests for the CRUD item operations: PutItem, GetItem, UpdateItem, DeleteItem
+
+import pytest
+from botocore.exceptions import ClientError
+from decimal import Decimal
+from util import random_string, random_bytes
+
+# Basic test for creating a new item with a random name, and reading it back
+# with strong consistency.
+# Only the string type is used for keys and attributes. None of the various
+# optional PutItem features (Expected, ReturnValues, ReturnConsumedCapacity,
+# ReturnItemCollectionMetrics, ConditionalOperator, ConditionExpression,
+# ExpressionAttributeNames, ExpressionAttributeValues) are used, and
+# for GetItem strong consistency is requested as well as all attributes,
+# but no other optional features (AttributesToGet, ReturnConsumedCapacity,
+# ProjectionExpression, ExpressionAttributeNames)
+def test_basic_string_put_and_get(test_table):
+    p = random_string()
+    c = random_string()
+    val = random_string()
+    val2 = random_string()
+    test_table.put_item(Item={'p': p, 'c': c, 'attribute': val, 'another': val2})
+    item = test_table.get_item(Key={'p': p, 'c': c}, ConsistentRead=True)['Item']
+    assert item['p'] == p
+    assert item['c'] == c
+    assert item['attribute'] == val
+    assert item['another'] == val2
+
+# Similar to test_basic_string_put_and_get, just uses UpdateItem instead of
+# PutItem. Because the item does not yet exist, it should work the same.
+def test_basic_string_update_and_get(test_table):
+    p = random_string()
+    c = random_string()
+    val = random_string()
+    val2 = random_string()
+    test_table.update_item(Key={'p': p, 'c': c}, AttributeUpdates={'attribute': {'Value': val, 'Action': 'PUT'}, 'another': {'Value': val2, 'Action': 'PUT'}})
+    item = test_table.get_item(Key={'p': p, 'c': c}, ConsistentRead=True)['Item']
+    assert item['p'] == p
+    assert item['c'] == c
+    assert item['attribute'] == val
+    assert item['another'] == val2
+
+# Test put_item and get_item of various types for the *attributes*,
+# including both scalars as well as nested documents, lists and sets.
+# The full list of types tested here:
+#    number, boolean, bytes, null, list, map, string set, number set,
+#    binary set.
+# The keys are still strings.
+# Note that only top-level attributes are written and read in this test -
+# this test does not attempt to modify *nested* attributes.
+# See https://boto3.amazonaws.com/v1/documentation/api/latest/reference/customizations/dynamodb.html
+# on how to pass these various types to Boto3's put_item().
+def test_put_and_get_attribute_types(test_table):
+    key = {'p': random_string(), 'c': random_string()}
+    test_items = [
+        Decimal("12.345"),
+        42,
+        True,
+        False,
+        b'xyz',
+        None,
+        ['hello', 'world', 42],
+        {'hello': 'world', 'life': 42},
+        {'hello': {'test': 'hi', 'hello': True, 'list': [1, 2, 'hi']}},
+        set(['hello', 'world', 'hi']),
+        set([1, 42, Decimal("3.14")]),
+        set([b'xyz', b'hi']),
+    ]
+    item = { str(i) : test_items[i] for i in range(len(test_items)) }
+    item.update(key)
+    test_table.put_item(Item=item)
+    got_item = test_table.get_item(Key=key, ConsistentRead=True)['Item']
+    assert item == got_item
+
+# The test_empty_* tests below verify support for empty items, with no
+# attributes except the key. This is a difficult case for Scylla, because
+# for an empty row to exist, Scylla needs to add a "CQL row marker".
+# There are several ways to create empty items - via PutItem, UpdateItem
+# and deleting attributes from non-empty items, and we need to check them
+# all, in several test_empty_* tests:
+def test_empty_put(test_table):
+    p = random_string()
+    c = random_string()
+    test_table.put_item(Item={'p': p, 'c': c})
+    item = test_table.get_item(Key={'p': p, 'c': c}, ConsistentRead=True)['Item']
+    assert item == {'p': p, 'c': c}
+def test_empty_put_delete(test_table):
+    p = random_string()
+    c = random_string()
+    test_table.put_item(Item={'p': p, 'c': c, 'hello': 'world'})
+    test_table.update_item(Key={'p': p, 'c': c}, AttributeUpdates={'hello': {'Action': 'DELETE'}})
+    item = test_table.get_item(Key={'p': p, 'c': c}, ConsistentRead=True)['Item']
+    assert item == {'p': p, 'c': c}
+def test_empty_update(test_table):
+    p = random_string()
+    c = random_string()
+    test_table.update_item(Key={'p': p, 'c': c}, AttributeUpdates={})
+    item = test_table.get_item(Key={'p': p, 'c': c}, ConsistentRead=True)['Item']
+    assert item == {'p': p, 'c': c}
+def test_empty_update_delete(test_table):
+    p = random_string()
+    c = random_string()
+    test_table.update_item(Key={'p': p, 'c': c}, AttributeUpdates={'hello': {'Value': 'world', 'Action': 'PUT'}})
+    test_table.update_item(Key={'p': p, 'c': c}, AttributeUpdates={'hello': {'Action': 'DELETE'}})
+    item = test_table.get_item(Key={'p': p, 'c': c}, ConsistentRead=True)['Item']
+    assert item == {'p': p, 'c': c}
+
+# Test error handling of UpdateItem passed a bad "Action" field.
+def test_update_bad_action(test_table):
+    p = random_string()
+    c = random_string()
+    val = random_string()
+    with pytest.raises(ClientError, match='ValidationException'):
+        test_table.update_item(Key={'p': p, 'c': c}, AttributeUpdates={'attribute': {'Value': val, 'Action': 'NONEXISTENT'}})
+
+# A more elaborate UpdateItem test, updating different attributes at different
+# times. Includes PUT and DELETE operations.
+def test_basic_string_more_update(test_table):
+    p = random_string()
+    c = random_string()
+    val1 = random_string()
+    val2 = random_string()
+    val3 = random_string()
+    val4 = random_string()
+    test_table.update_item(Key={'p': p, 'c': c}, AttributeUpdates={'a3': {'Value': val1, 'Action': 'PUT'}})
+    test_table.update_item(Key={'p': p, 'c': c}, AttributeUpdates={'a1': {'Value': val1, 'Action': 'PUT'}})
+    test_table.update_item(Key={'p': p, 'c': c}, AttributeUpdates={'a2': {'Value': val2, 'Action': 'PUT'}})
+    test_table.update_item(Key={'p': p, 'c': c}, AttributeUpdates={'a1': {'Value': val3, 'Action': 'PUT'}})
+    test_table.update_item(Key={'p': p, 'c': c}, AttributeUpdates={'a3': {'Action': 'DELETE'}})
+    item = test_table.get_item(Key={'p': p, 'c': c}, ConsistentRead=True)['Item']
+    assert item['p'] == p
+    assert item['c'] == c
+    assert item['a1'] == val3
+    assert item['a2'] == val2
+    assert not 'a3' in item
+
+# Test that item operations on a non-existant table name fail with correct
+# error code.
+def test_item_operations_nonexistent_table(dynamodb):
+    with pytest.raises(ClientError, match='ResourceNotFoundException'):
+        dynamodb.meta.client.put_item(TableName='non_existent_table',
+            Item={'a':{'S':'b'}})
+
+# Fetching a non-existant item. According to the DynamoDB doc, "If there is no
+# matching item, GetItem does not return any data and there will be no Item
+# element in the response."
+def test_get_item_missing_item(test_table):
+    p = random_string()
+    c = random_string()
+    assert not "Item" in test_table.get_item(Key={'p': p, 'c': c}, ConsistentRead=True)
+
+# Test that if we have a table with string hash and sort keys, we can't read
+# or write items with other key types to it.
+def test_put_item_wrong_key_type(test_table):
+    b = random_bytes()
+    s = random_string()
+    n = Decimal("3.14")
+    # Should succeed (correct key types)
+    test_table.put_item(Item={'p': s, 'c': s})
+    assert test_table.get_item(Key={'p': s, 'c': s}, ConsistentRead=True)['Item'] == {'p': s, 'c': s}
+    # Should fail (incorrect hash key types)
+    with pytest.raises(ClientError, match='ValidationException'):
+        test_table.put_item(Item={'p': b, 'c': s})
+    with pytest.raises(ClientError, match='ValidationException'):
+        test_table.put_item(Item={'p': n, 'c': s})
+    # Should fail (incorrect sort key types)
+    with pytest.raises(ClientError, match='ValidationException'):
+        test_table.put_item(Item={'p': s, 'c': b})
+    with pytest.raises(ClientError, match='ValidationException'):
+        test_table.put_item(Item={'p': s, 'c': n})
+    # Should fail (missing hash key)
+    with pytest.raises(ClientError, match='ValidationException'):
+        test_table.put_item(Item={'c': s})
+    # Should fail (missing sort key)
+    with pytest.raises(ClientError, match='ValidationException'):
+        test_table.put_item(Item={'p': s})
+def test_update_item_wrong_key_type(test_table, test_table_s):
+    b = random_bytes()
+    s = random_string()
+    n = Decimal("3.14")
+    # Should succeed (correct key types)
+    test_table.update_item(Key={'p': s, 'c': s}, AttributeUpdates={})
+    assert test_table.get_item(Key={'p': s, 'c': s}, ConsistentRead=True)['Item'] == {'p': s, 'c': s}
+    # Should fail (incorrect hash key types)
+    with pytest.raises(ClientError, match='ValidationException'):
+        test_table.update_item(Key={'p': b, 'c': s}, AttributeUpdates={})
+    with pytest.raises(ClientError, match='ValidationException'):
+        test_table.update_item(Key={'p': n, 'c': s}, AttributeUpdates={})
+    # Should fail (incorrect sort key types)
+    with pytest.raises(ClientError, match='ValidationException'):
+        test_table.update_item(Key={'p': s, 'c': b}, AttributeUpdates={})
+    with pytest.raises(ClientError, match='ValidationException'):
+        test_table.update_item(Key={'p': s, 'c': n}, AttributeUpdates={})
+    # Should fail (missing hash key)
+    with pytest.raises(ClientError, match='ValidationException'):
+        test_table.update_item(Key={'c': s}, AttributeUpdates={})
+    # Should fail (missing sort key)
+    with pytest.raises(ClientError, match='ValidationException'):
+        test_table.update_item(Key={'p': s}, AttributeUpdates={})
+    # Should fail (spurious key columns)
+    with pytest.raises(ClientError, match='ValidationException'):
+        test_table.get_item(Key={'p': s, 'c': s, 'spurious': s})
+    with pytest.raises(ClientError, match='ValidationException'):
+        test_table_s.get_item(Key={'p': s, 'c': s})
+def test_get_item_wrong_key_type(test_table, test_table_s):
+    b = random_bytes()
+    s = random_string()
+    n = Decimal("3.14")
+    # Should succeed (correct key types) but have empty result
+    assert not "Item" in test_table.get_item(Key={'p': s, 'c': s}, ConsistentRead=True)
+    # Should fail (incorrect hash key types)
+    with pytest.raises(ClientError, match='ValidationException'):
+        test_table.get_item(Key={'p': b, 'c': s})
+    with pytest.raises(ClientError, match='ValidationException'):
+        test_table.get_item(Key={'p': n, 'c': s})
+    # Should fail (incorrect sort key types)
+    with pytest.raises(ClientError, match='ValidationException'):
+        test_table.get_item(Key={'p': s, 'c': b})
+    with pytest.raises(ClientError, match='ValidationException'):
+        test_table.get_item(Key={'p': s, 'c': n})
+    # Should fail (missing hash key)
+    with pytest.raises(ClientError, match='ValidationException'):
+        test_table.get_item(Key={'c': s})
+    # Should fail (missing sort key)
+    with pytest.raises(ClientError, match='ValidationException'):
+        test_table.get_item(Key={'p': s})
+    # Should fail (spurious key columns)
+    with pytest.raises(ClientError, match='ValidationException'):
+        test_table.get_item(Key={'p': s, 'c': s, 'spurious': s})
+    with pytest.raises(ClientError, match='ValidationException'):
+        test_table_s.get_item(Key={'p': s, 'c': s})
+def test_delete_item_wrong_key_type(test_table, test_table_s):
+    b = random_bytes()
+    s = random_string()
+    n = Decimal("3.14")
+    # Should succeed (correct key types)
+    test_table.delete_item(Key={'p': s, 'c': s})
+    # Should fail (incorrect hash key types)
+    with pytest.raises(ClientError, match='ValidationException'):
+        test_table.delete_item(Key={'p': b, 'c': s})
+    with pytest.raises(ClientError, match='ValidationException'):
+        test_table.delete_item(Key={'p': n, 'c': s})
+    # Should fail (incorrect sort key types)
+    with pytest.raises(ClientError, match='ValidationException'):
+        test_table.delete_item(Key={'p': s, 'c': b})
+    with pytest.raises(ClientError, match='ValidationException'):
+        test_table.delete_item(Key={'p': s, 'c': n})
+    # Should fail (missing hash key)
+    with pytest.raises(ClientError, match='ValidationException'):
+        test_table.delete_item(Key={'c': s})
+    # Should fail (missing sort key)
+    with pytest.raises(ClientError, match='ValidationException'):
+        test_table.delete_item(Key={'p': s})
+    # Should fail (spurious key columns)
+    with pytest.raises(ClientError, match='ValidationException'):
+        test_table.delete_item(Key={'p': s, 'c': s, 'spurious': s})
+    with pytest.raises(ClientError, match='ValidationException'):
+        test_table_s.delete_item(Key={'p': s, 'c': s})
+
+# Most of the tests here arbitrarily used a table with both hash and sort keys
+# (both strings). Let's check that a table with *only* a hash key works ok
+# too, for PutItem, GetItem, and UpdateItem.
+def test_only_hash_key(test_table_s):
+    s = random_string()
+    test_table_s.put_item(Item={'p': s, 'hello': 'world'})
+    assert test_table_s.get_item(Key={'p': s}, ConsistentRead=True)['Item'] == {'p': s, 'hello': 'world'}
+    test_table_s.update_item(Key={'p': s}, AttributeUpdates={'hi': {'Value': 'there', 'Action': 'PUT'}})
+    assert test_table_s.get_item(Key={'p': s}, ConsistentRead=True)['Item'] == {'p': s, 'hello': 'world', 'hi': 'there'}
+
+# Tests for item operations in tables with non-string hash or sort keys.
+# These tests focus only on the type of the key - everything else is as
+# simple as we can (string attributes, no special options for GetItem
+# and PutItem). These tests also focus on individual items only, and
+# not about the sort order of sort keys - this should be verified in
+# test_query.py, for example.
+def test_bytes_hash_key(test_table_b):
+    # Bytes values are passed using base64 encoding, which has weird cases
+    # depending on len%3 and len%4. So let's try various lengths.
+    for len in range(10,18):
+        p = random_bytes(len)
+        val = random_string()
+        test_table_b.put_item(Item={'p': p, 'attribute': val})
+        assert test_table_b.get_item(Key={'p': p}, ConsistentRead=True)['Item'] == {'p': p, 'attribute': val}
+def test_bytes_sort_key(test_table_sb):
+    p = random_string()
+    c = random_bytes()
+    val = random_string()
+    test_table_sb.put_item(Item={'p': p, 'c': c, 'attribute': val})
+    assert test_table_sb.get_item(Key={'p': p, 'c': c}, ConsistentRead=True)['Item'] == {'p': p, 'c': c, 'attribute': val}
+
+# Tests for using a large binary blob as hash key, sort key, or attribute.
+# DynamoDB strictly limits the size of the binary hash key to 2048 bytes,
+# and binary sort key to 1024 bytes, and refuses anything larger. The total
+# size of an item is limited to 400KB, which also limits the size of the
+# largest attributes. For more details on these limits, see
+# https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/Limits.html
+# Alternator currently does *not* have these limitations, and can accept much
+# larger keys and attributes, but what we do in the following tests is to verify
+# that items up to DynamoDB's maximum sizes also work well in Alternator.
+def test_large_blob_hash_key(test_table_b):
+    b = random_bytes(2048)
+    test_table_b.put_item(Item={'p': b})
+    assert test_table_b.get_item(Key={'p': b}, ConsistentRead=True)['Item'] == {'p': b}
+def test_large_blob_sort_key(test_table_sb):
+    s = random_string()
+    b = random_bytes(1024)
+    test_table_sb.put_item(Item={'p': s, 'c': b})
+    assert test_table_sb.get_item(Key={'p': s, 'c': b}, ConsistentRead=True)['Item'] == {'p': s, 'c': b}
+def test_large_blob_attribute(test_table):
+    p = random_string()
+    c = random_string()
+    b = random_bytes(409500)  # a bit less than 400KB
+    test_table.put_item(Item={'p': p, 'c': c, 'attribute': b })
+    assert test_table.get_item(Key={'p': p, 'c': c}, ConsistentRead=True)['Item'] == {'p': p, 'c': c, 'attribute': b}
+
+# Checks what it is not allowed to use in a single UpdateItem request both
+# old-style AttributeUpdates and new-style UpdateExpression.
+def test_update_item_two_update_methods(test_table_s):
+    p = random_string()
+    with pytest.raises(ClientError, match='ValidationException'):
+        test_table_s.update_item(Key={'p': p},
+            AttributeUpdates={'a': {'Value': 3, 'Action': 'PUT'}},
+            UpdateExpression='SET b = :val1',
+            ExpressionAttributeValues={':val1': 4})
+
+# Verify that having neither AttributeUpdates nor UpdateExpression is
+# allowed, and results in creation of an empty item.
+def test_update_item_no_update_method(test_table_s):
+    p = random_string()
+    assert not "Item" in test_table_s.get_item(Key={'p': p}, ConsistentRead=True)
+    test_table_s.update_item(Key={'p': p})
+    assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item'] == {'p': p}
+
+# Test GetItem with the AttributesToGet parameter. Result should include the
+# selected attributes only - if one wants the key attributes as well, one
+# needs to select them explicitly. When no key attributes are selected,
+# some items may have *none* of the selected attributes. Those items are
+# returned too, as empty items - they are not outright missing.
+def test_getitem_attributes_to_get(dynamodb, test_table):
+    p = random_string()
+    c = random_string()
+    item = {'p': p, 'c': c, 'a': 'hello', 'b': 'hi'}
+    test_table.put_item(Item=item)
+    for wanted in [ ['a'],             # only non-key attribute
+                    ['c', 'a'],        # a key attribute (sort key) and non-key
+                    ['p', 'c'],        # entire key
+                    ['nonexistent']    # Our item doesn't have this
+                   ]:
+        got_item = test_table.get_item(Key={'p': p, 'c': c}, AttributesToGet=wanted, ConsistentRead=True)['Item']
+        expected_item = {k: item[k] for k in wanted if k in item}
+        assert expected_item == got_item
+
+# Basic test for DeleteItem, with hash key only
+def test_delete_item_hash(test_table_s):
+    p = random_string()
+    test_table_s.put_item(Item={'p': p})
+    assert 'Item' in test_table_s.get_item(Key={'p': p}, ConsistentRead=True)
+    test_table_s.delete_item(Key={'p': p})
+    assert not 'Item' in test_table_s.get_item(Key={'p': p}, ConsistentRead=True)
+
+# Basic test for DeleteItem, with hash and sort key
+def test_delete_item_sort(test_table):
+    p = random_string()
+    c = random_string()
+    key = {'p': p, 'c': c}
+    test_table.put_item(Item=key)
+    assert 'Item' in test_table.get_item(Key=key, ConsistentRead=True)
+    test_table.delete_item(Key=key)
+    assert not 'Item' in test_table.get_item(Key=key, ConsistentRead=True)
+
+# Test that PutItem completely replaces an existing item. It shouldn't merge
+# it with a previously existing value, as UpdateItem does!
+# We test for a table with just hash key, and for a table with both hash and
+# sort keys.
+def test_put_item_replace(test_table_s, test_table):
+    p = random_string()
+    test_table_s.put_item(Item={'p': p, 'a': 'hi'})
+    assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item'] == {'p': p, 'a': 'hi'}
+    test_table_s.put_item(Item={'p': p, 'b': 'hello'})
+    assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item'] == {'p': p, 'b': 'hello'}
+    c = random_string()
+    test_table.put_item(Item={'p': p, 'c': c, 'a': 'hi'})
+    assert test_table.get_item(Key={'p': p, 'c': c}, ConsistentRead=True)['Item'] == {'p': p, 'c': c, 'a': 'hi'}
+    test_table.put_item(Item={'p': p, 'c': c, 'b': 'hello'})
+    assert test_table.get_item(Key={'p': p, 'c': c}, ConsistentRead=True)['Item'] == {'p': p, 'c': c, 'b': 'hello'}
--- a/alternator-test/test_lsi.py
+++ b/alternator-test/test_lsi.py
@@ -26,22 +26,25 @@ import time
 from botocore.exceptions import ClientError, ParamValidationError
 from util import create_test_table, random_string, full_scan, full_query, multiset, list_tables

-# LSIs support strongly-consistent reads, so the following functions do not
-# need to retry like we did in test_gsi.py for GSIs:
+# Currently, Alternator's LSIs only support eventually consistent reads, so tests
+# that involve writing to a table and then expect to read something from it cannot
+# be guaranteed to succeed without retrying the read. The following utility
+# functions make it easy to write such tests.
 def assert_index_query(table, index_name, expected_items, **kwargs):
-    assert multiset(expected_items) == multiset(full_query(table, IndexName=index_name, **kwargs))
-def assert_index_scan(table, index_name, expected_items, **kwargs):
-    assert multiset(expected_items) == multiset(full_scan(table, IndexName=index_name, **kwargs))
-
-# A version doing retries instead of ConsistentRead, to be used just for the
-# one test below which has both GSI and LSI:
-def retrying_assert_index_query(table, index_name, expected_items, **kwargs):
    for i in range(3):
-        if multiset(expected_items) == multiset(full_query(table, IndexName=index_name, ConsistentRead=False, **kwargs)):
+        if multiset(expected_items) == multiset(full_query(table, IndexName=index_name, **kwargs)):
            return
-        print('retrying_assert_index_query retrying')
+        print('assert_index_query retrying')
        time.sleep(1)
-    assert multiset(expected_items) == multiset(full_query(table, IndexName=index_name, ConsistentRead=False, **kwargs))
+    assert multiset(expected_items) == multiset(full_query(table, IndexName=index_name, **kwargs))
+
+def assert_index_scan(table, index_name, expected_items, **kwargs):
+    for i in range(3):
+        if multiset(expected_items) == multiset(full_scan(table, IndexName=index_name, **kwargs)):
+            return
+        print('assert_index_scan retrying')
+        time.sleep(1)
+    assert multiset(expected_items) == multiset(full_scan(table, IndexName=index_name, **kwargs))

 # Although quite silly, it is actually allowed to create an index which is
 # identical to the base table.
@@ -63,7 +66,7 @@ def test_lsi_identical(dynamodb):
    # results (in different order).
    assert multiset(items) == multiset(full_scan(table))
    assert_index_scan(table, 'hello', items)
-    # We can't scan a non-existent index
+    # We can't scan a non-existant index
    with pytest.raises(ClientError, match='ValidationException'):
        full_scan(table, IndexName='wrong')
    table.delete()
@@ -212,12 +215,6 @@ def test_lsi_4(test_table_lsi_4):
            KeyConditions={'p': {'AttributeValueList': [i5], 'ComparisonOperator': 'EQ'},
                           column: {'AttributeValueList': [i5], 'ComparisonOperator': 'EQ'}})

-# Test that setting an indexed string column to an empty string is illegal,
-# since keys cannot contain empty strings
-def test_lsi_empty_value(test_table_lsi_1):
-    with pytest.raises(ClientError, match='ValidationException.*empty'):
-        test_table_lsi_1.put_item(Item={'p': random_string(), 'c': random_string(), 'b': ''})
-
 def test_lsi_describe(test_table_lsi_4):
    desc = test_table_lsi_4.meta.client.describe_table(TableName=test_table_lsi_4.name)
    assert 'Table' in desc
@@ -305,11 +302,13 @@ def test_lsi_consistent_read(test_table_lsi_1):
    expected_items = [i for i in items if i['p'] == p1 and i['b'] == b1]
    assert_index_query(test_table_lsi_1, 'hello', expected_items,
        KeyConditions={'p': {'AttributeValueList': [p1], 'ComparisonOperator': 'EQ'},
-                       'b': {'AttributeValueList': [b1], 'ComparisonOperator': 'EQ'}})
+                       'b': {'AttributeValueList': [b1], 'ComparisonOperator': 'EQ'}},
+        ConsistentRead=True)
    expected_items = [i for i in items if i['p'] == p2 and i['b'] == b2]
    assert_index_query(test_table_lsi_1, 'hello', expected_items,
        KeyConditions={'p': {'AttributeValueList': [p2], 'ComparisonOperator': 'EQ'},
-                       'b': {'AttributeValueList': [b2], 'ComparisonOperator': 'EQ'}})
+                       'b': {'AttributeValueList': [b2], 'ComparisonOperator': 'EQ'}},
+        ConsistentRead=True)

 # A table with both gsi and lsi present
@pytest.fixture(scope="session")
@@ -361,6 +360,6 @@ def test_lsi_and_gsi(test_table_lsi_gsi):

    for index in ['hello_g1', 'hello_l1']:
        expected_items = [i for i in items if i['p'] == p1 and i['x1'] == x1]
-        retrying_assert_index_query(test_table_lsi_gsi, index, expected_items,
+        assert_index_query(test_table_lsi_gsi, index, expected_items,
            KeyConditions={'p': {'AttributeValueList': [p1], 'ComparisonOperator': 'EQ'},
                           'x1': {'AttributeValueList': [x1], 'ComparisonOperator': 'EQ'}})
--- a/alternator-test/test_nested.py
+++ b/alternator-test/test_nested.py
--- a/alternator-test/test_projection_expression.py
+++ b/alternator-test/test_projection_expression.py
@@ -134,10 +134,10 @@ def test_projection_expression_path(test_table_s):
    assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a.b[0]')['Item'] == {'a': {'b': [2]}}
    assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a.b[2]')['Item'] == {'a': {'b': [{'x': 'hi', 'y': 'yo'}]}}
    assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a.b[2].y')['Item'] == {'a': {'b': [{'y': 'yo'}]}}
-    # Trying to read any sort of non-existent attribute returns an empty item.
+    # Trying to read any sort of non-existant attribute returns an empty item.
    # This includes a non-existing top-level attribute, an attempt to read
-    # beyond the end of an array or a non-existent member of a dictionary, as
-    # well as paths which begin with a non-existent prefix.
+    # beyond the end of an array or a non-existant member of a dictionary, as
+    # well as paths which begin with a non-existant prefix.
    assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='x')['Item'] == {}
    assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a.b[3]')['Item'] == {}
    assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a.x')['Item'] == {}
@@ -199,19 +199,3 @@ def test_projection_expression_and_attributes_to_get(test_table_s):
        full_scan(test_table_s,  ProjectionExpression='a', AttributesToGet=['a'])
    with pytest.raises(ClientError, match='ValidationException.*both'):
        full_query(test_table_s, KeyConditions={'p': {'AttributeValueList': [p], 'ComparisonOperator': 'EQ'}}, ProjectionExpression='a', AttributesToGet=['a'])
-
-# above in test_projection_expression_toplevel_syntax among other things
-# we noted how spurious entries in ExpressionAttributeNames, not needed
-# the the ProjectionExpression, cause an error. Sometimes we have two
-# expressions in the same request, for example, both a ProjectionExpression
-# and a KeyConditionExpression. It's only an error if a name is not
-# needed by both of these expressions
-def test_projection_expression_and_key_condition_expression(test_table_s):
-    p = random_string()
-    test_table_s.put_item(Item={'p': p, 'a': 'hello', 'b': 'hi'})
-    got_items = full_query(test_table_s,
-        KeyConditionExpression='#name1 = :val1',
-        ProjectionExpression='#name2',
-        ExpressionAttributeNames={'#name1': 'p', '#name2': 'a'},
-        ExpressionAttributeValues={':val1': p});
-    assert got_items == [{'a': 'hello'}]
--- a/alternator-test/test_query.py
+++ b/alternator-test/test_query.py
@@ -0,0 +1,516 @@
+# -*- coding: utf-8 -*-
+# Copyright 2019 ScyllaDB
+#
+# This file is part of Scylla.
+#
+# Scylla is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# Scylla is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+
+# Tests for the Query operation
+
+import random
+import pytest
+from botocore.exceptions import ClientError, ParamValidationError
+from decimal import Decimal
+from util import random_string, random_bytes, full_query, multiset
+from boto3.dynamodb.conditions import Key, Attr
+
+# Test that scanning works fine with in-stock paginator
+def test_query_basic_restrictions(dynamodb, filled_test_table):
+    test_table, items = filled_test_table
+    paginator = dynamodb.meta.client.get_paginator('query')
+
+    # EQ
+    got_items = []
+    for page in paginator.paginate(TableName=test_table.name, KeyConditions={
+            'p' : {'AttributeValueList': ['long'], 'ComparisonOperator': 'EQ'}
+        }):
+        got_items += page['Items']
+    print(got_items)
+    assert multiset([item for item in items if item['p'] == 'long']) == multiset(got_items)
+
+    # LT
+    got_items = []
+    for page in paginator.paginate(TableName=test_table.name, KeyConditions={
+            'p' : {'AttributeValueList': ['long'], 'ComparisonOperator': 'EQ'},
+            'c' : {'AttributeValueList': ['12'], 'ComparisonOperator': 'LT'}
+        }):
+        got_items += page['Items']
+    print(got_items)
+    assert multiset([item for item in items if item['p'] == 'long' and item['c'] < '12']) == multiset(got_items)
+
+    # LE
+    got_items = []
+    for page in paginator.paginate(TableName=test_table.name, KeyConditions={
+            'p' : {'AttributeValueList': ['long'], 'ComparisonOperator': 'EQ'},
+            'c' : {'AttributeValueList': ['14'], 'ComparisonOperator': 'LE'}
+        }):
+        got_items += page['Items']
+    print(got_items)
+    assert multiset([item for item in items if item['p'] == 'long' and item['c'] <= '14']) == multiset(got_items)
+
+    # GT
+    got_items = []
+    for page in paginator.paginate(TableName=test_table.name, KeyConditions={
+            'p' : {'AttributeValueList': ['long'], 'ComparisonOperator': 'EQ'},
+            'c' : {'AttributeValueList': ['15'], 'ComparisonOperator': 'GT'}
+        }):
+        got_items += page['Items']
+    print(got_items)
+    assert multiset([item for item in items if item['p'] == 'long' and item['c'] > '15']) == multiset(got_items)
+
+    # GE
+    got_items = []
+    for page in paginator.paginate(TableName=test_table.name, KeyConditions={
+            'p' : {'AttributeValueList': ['long'], 'ComparisonOperator': 'EQ'},
+            'c' : {'AttributeValueList': ['14'], 'ComparisonOperator': 'GE'}
+        }):
+        got_items += page['Items']
+    print(got_items)
+    assert multiset([item for item in items if item['p'] == 'long' and item['c'] >= '14']) == multiset(got_items)
+
+    # BETWEEN
+    got_items = []
+    for page in paginator.paginate(TableName=test_table.name, KeyConditions={
+            'p' : {'AttributeValueList': ['long'], 'ComparisonOperator': 'EQ'},
+            'c' : {'AttributeValueList': ['155', '164'], 'ComparisonOperator': 'BETWEEN'}
+        }):
+        got_items += page['Items']
+    print(got_items)
+    assert multiset([item for item in items if item['p'] == 'long' and item['c'] >= '155' and item['c'] <= '164']) == multiset(got_items)
+
+    # BEGINS_WITH
+    got_items = []
+    for page in paginator.paginate(TableName=test_table.name, KeyConditions={
+            'p' : {'AttributeValueList': ['long'], 'ComparisonOperator': 'EQ'},
+            'c' : {'AttributeValueList': ['11'], 'ComparisonOperator': 'BEGINS_WITH'}
+        }):
+        print([item for item in items if item['p'] == 'long' and item['c'].startswith('11')])
+        got_items += page['Items']
+    print(got_items)
+    assert multiset([item for item in items if item['p'] == 'long' and item['c'].startswith('11')]) == multiset(got_items)
+
+# Test that KeyConditionExpression parameter is supported
+@pytest.mark.xfail(reason="KeyConditionExpression not supported yet")
+def test_query_key_condition_expression(dynamodb, filled_test_table):
+    test_table, items = filled_test_table
+    paginator = dynamodb.meta.client.get_paginator('query')
+    got_items = []
+    for page in paginator.paginate(TableName=test_table.name, KeyConditionExpression=Key("p").eq("long") & Key("c").lt("12")):
+        got_items += page['Items']
+    print(got_items)
+    assert multiset([item for item in items if item['p'] == 'long' and item['c'] < '12']) == multiset(got_items)
+
+def test_begins_with(dynamodb, test_table):
+    paginator = dynamodb.meta.client.get_paginator('query')
+    items = [{'p': 'unorthodox_chars', 'c': sort_key, 'str': 'a'} for sort_key in [u'ÿÿÿ', u'cÿbÿ', u'cÿbÿÿabg'] ]
+    with test_table.batch_writer() as batch:
+        for item in items:
+            batch.put_item(item)
+
+    # TODO(sarna): Once bytes type is supported, /xFF character should be tested
+    got_items = []
+    for page in paginator.paginate(TableName=test_table.name, KeyConditions={
+            'p' : {'AttributeValueList': ['unorthodox_chars'], 'ComparisonOperator': 'EQ'},
+            'c' : {'AttributeValueList': [u'ÿÿ'], 'ComparisonOperator': 'BEGINS_WITH'}
+        }):
+        got_items += page['Items']
+    print(got_items)
+    assert sorted([d['c'] for d in got_items]) == sorted([d['c'] for d in items if d['c'].startswith(u'ÿÿ')])
+
+    got_items = []
+    for page in paginator.paginate(TableName=test_table.name, KeyConditions={
+            'p' : {'AttributeValueList': ['unorthodox_chars'], 'ComparisonOperator': 'EQ'},
+            'c' : {'AttributeValueList': [u'cÿbÿ'], 'ComparisonOperator': 'BEGINS_WITH'}
+        }):
+        got_items += page['Items']
+    print(got_items)
+    assert sorted([d['c'] for d in got_items]) == sorted([d['c'] for d in items if d['c'].startswith(u'cÿbÿ')])
+
+def test_begins_with_wrong_type(dynamodb, test_table_sn):
+    paginator = dynamodb.meta.client.get_paginator('query')
+    with pytest.raises(ClientError, match='ValidationException'):
+        for page in paginator.paginate(TableName=test_table_sn.name, KeyConditions={
+                'p' : {'AttributeValueList': ['unorthodox_chars'], 'ComparisonOperator': 'EQ'},
+                'c' : {'AttributeValueList': [17], 'ComparisonOperator': 'BEGINS_WITH'}
+                }):
+            pass
+
+# Items returned by Query should be sorted by the sort key. The following
+# tests verify that this is indeed the case, for the three allowed key types:
+# strings, binary, and numbers. These tests test not just the Query operation,
+# but inherently that the sort-key sorting works.
+def test_query_sort_order_string(test_table):
+    # Insert a lot of random items in one new partition:
+    # str(i) has a non-obvious sort order (e.g., "100" comes before "2") so is a nice test.
+    p = random_string()
+    items = [{'p': p, 'c': str(i)} for i in range(128)]
+    with test_table.batch_writer() as batch:
+        for item in items:
+            batch.put_item(item)
+    got_items = full_query(test_table, KeyConditions={'p': {'AttributeValueList': [p], 'ComparisonOperator': 'EQ'}})
+    assert len(items) == len(got_items)
+    # Extract just the sort key ("c") from the items
+    sort_keys = [x['c'] for x in items]
+    got_sort_keys = [x['c'] for x in got_items]
+    # Verify that got_sort_keys are already sorted (in string order)
+    assert sorted(got_sort_keys) == got_sort_keys
+    # Verify that got_sort_keys are a sorted version of the expected sort_keys
+    assert sorted(sort_keys) == got_sort_keys
+def test_query_sort_order_bytes(test_table_sb):
+    # Insert a lot of random items in one new partition:
+    # We arbitrarily use random_bytes with a random length.
+    p = random_string()
+    items = [{'p': p, 'c': random_bytes(10)} for i in range(128)]
+    with test_table_sb.batch_writer() as batch:
+        for item in items:
+            batch.put_item(item)
+    got_items = full_query(test_table_sb, KeyConditions={'p': {'AttributeValueList': [p], 'ComparisonOperator': 'EQ'}})
+    assert len(items) == len(got_items)
+    sort_keys = [x['c'] for x in items]
+    got_sort_keys = [x['c'] for x in got_items]
+    # Boto3's "Binary" objects are sorted as if bytes are signed integers.
+    # This isn't the order that DynamoDB itself uses (byte 0 should be first,
+    # not byte -128). Sorting the byte array ".value" works.
+    assert sorted(got_sort_keys, key=lambda x: x.value) == got_sort_keys
+    assert sorted(sort_keys) == got_sort_keys
+def test_query_sort_order_number(test_table_sn):
+    # This is a list of numbers, sorted in correct order, and each suitable
+    # for accurate representation by Alternator's number type.
+    numbers = [
+        Decimal("-2e10"),
+        Decimal("-7.1e2"),
+        Decimal("-4.1"),
+        Decimal("-0.1"),
+        Decimal("-1e-5"),
+        Decimal("0"),
+        Decimal("2e-5"),
+        Decimal("0.15"),
+        Decimal("1"),
+        Decimal("1.00000000000000000000000001"),
+        Decimal("3.14159"),
+        Decimal("3.1415926535897932384626433832795028841"),
+        Decimal("31.4"),
+        Decimal("1.4e10"),
+    ]
+    # Insert these numbers, in random order, into one partition:
+    p = random_string()
+    items = [{'p': p, 'c': num} for num in random.sample(numbers, len(numbers))]
+    with test_table_sn.batch_writer() as batch:
+        for item in items:
+            batch.put_item(item)
+    # Finally, verify that we get back exactly the same numbers (with identical
+    # precision), and in their original sorted order.
+    got_items = full_query(test_table_sn, KeyConditions={'p': {'AttributeValueList': [p], 'ComparisonOperator': 'EQ'}})
+    got_sort_keys = [x['c'] for x in got_items]
+    assert got_sort_keys == numbers
+
+def test_query_filtering_attributes_equality(filled_test_table):
+    test_table, items = filled_test_table
+
+    query_filter = {
+        "attribute" : {
+            "AttributeValueList" : [ "xxxx" ],
+            "ComparisonOperator": "EQ"
+        }
+    }
+    got_items = full_query(test_table, KeyConditions={'p': {'AttributeValueList': ['long'], 'ComparisonOperator': 'EQ'}}, QueryFilter=query_filter)
+    print(got_items)
+    assert multiset([item for item in items if item['p'] == 'long' and item['attribute'] == 'xxxx']) == multiset(got_items)
+
+    query_filter = {
+        "attribute" : {
+            "AttributeValueList" : [ "xxxx" ],
+            "ComparisonOperator": "EQ"
+        },
+        "another" : {
+            "AttributeValueList" : [ "yy" ],
+            "ComparisonOperator": "EQ"
+        }
+    }
+
+    got_items = full_query(test_table, KeyConditions={'p': {'AttributeValueList': ['long'], 'ComparisonOperator': 'EQ'}}, QueryFilter=query_filter)
+    print(got_items)
+    assert multiset([item for item in items if item['p'] == 'long' and item['attribute'] == 'xxxx' and item['another'] == 'yy']) == multiset(got_items)
+
+# Test that FilterExpression works as expected
+@pytest.mark.xfail(reason="FilterExpression not supported yet")
+def test_query_filter_expression(filled_test_table):
+    test_table, items = filled_test_table
+
+    got_items = full_query(test_table, KeyConditions={'p': {'AttributeValueList': ['long'], 'ComparisonOperator': 'EQ'}}, FilterExpression=Attr("attribute").eq("xxxx"))
+    print(got_items)
+    assert multiset([item for item in items if item['p'] == 'long' and item['attribute'] == 'xxxx']) == multiset(got_items)
+
+    got_items = full_query(test_table, KeyConditions={'p': {'AttributeValueList': ['long'], 'ComparisonOperator': 'EQ'}}, FilterExpression=Attr("attribute").eq("xxxx") & Attr("another").eq("yy"))
+    print(got_items)
+    assert multiset([item for item in items if item['p'] == 'long' and item['attribute'] == 'xxxx' and item['another'] == 'yy']) == multiset(got_items)
+
+# QueryFilter can only contain non-key attributes in order to be compatible
+def test_query_filtering_key_equality(filled_test_table):
+    test_table, items = filled_test_table
+
+    with pytest.raises(ClientError, match='ValidationException'):
+        query_filter = {
+            "c" : {
+                "AttributeValueList" : [ "5" ],
+                "ComparisonOperator": "EQ"
+            }
+        }
+        got_items = full_query(test_table, KeyConditions={'p': {'AttributeValueList': ['long'], 'ComparisonOperator': 'EQ'}}, QueryFilter=query_filter)
+        print(got_items)
+
+    with pytest.raises(ClientError, match='ValidationException'):
+        query_filter = {
+            "attribute" : {
+                "AttributeValueList" : [ "x" ],
+                "ComparisonOperator": "EQ"
+            },
+            "p" : {
+                "AttributeValueList" : [ "5" ],
+                "ComparisonOperator": "EQ"
+            }
+        }
+        got_items = full_query(test_table, KeyConditions={'p': {'AttributeValueList': ['long'], 'ComparisonOperator': 'EQ'}}, QueryFilter=query_filter)
+        print(got_items)
+
+# Test Query with the AttributesToGet parameter. Result should include the
+# selected attributes only - if one wants the key attributes as well, one
+# needs to select them explicitly. When no key attributes are selected,
+# some items may have *none* of the selected attributes. Those items are
+# returned too, as empty items - they are not outright missing.
+def test_query_attributes_to_get(dynamodb, test_table):
+    p = random_string()
+    items = [{'p': p, 'c': str(i), 'a': str(i*10), 'b': str(i*100) } for i in range(10)]
+    with test_table.batch_writer() as batch:
+        for item in items:
+            batch.put_item(item)
+    for wanted in [ ['a'],             # only non-key attributes
+                    ['c', 'a'],        # a key attribute (sort key) and non-key
+                    ['p', 'c'],        # entire key
+                    ['nonexistent']    # none of the items have this attribute!
+                   ]:
+        got_items = full_query(test_table, KeyConditions={'p': {'AttributeValueList': [p], 'ComparisonOperator': 'EQ'}}, AttributesToGet=wanted)
+        expected_items = [{k: x[k] for k in wanted if k in x} for x in items]
+        assert multiset(expected_items) == multiset(got_items)
+
+# Test that in a table with both hash key and sort key, which keys we can
+# Query by: We can Query by the hash key, by a combination of both hash and
+# sort keys, but *cannot* query by just the sort key, and obviously not
+# by any non-key column.
+def test_query_which_key(test_table):
+    p = random_string()
+    c = random_string()
+    p2 = random_string()
+    c2 = random_string()
+    item1 = {'p': p, 'c': c}
+    item2 = {'p': p, 'c': c2}
+    item3 = {'p': p2, 'c': c}
+    for i in [item1, item2, item3]:
+        test_table.put_item(Item=i)
+    # Query by hash key only:
+    got_items = full_query(test_table, KeyConditions={'p': {'AttributeValueList': [p], 'ComparisonOperator': 'EQ'}})
+    expected_items = [item1, item2]
+    assert multiset(expected_items) == multiset(got_items)
+    # Query by hash key *and* sort key (this is basically a GetItem):
+    got_items = full_query(test_table, KeyConditions={
+        'p': {'AttributeValueList': [p], 'ComparisonOperator': 'EQ'},
+        'c': {'AttributeValueList': [c], 'ComparisonOperator': 'EQ'}
+    })
+    expected_items = [item1]
+    assert multiset(expected_items) == multiset(got_items)
+    # Query by sort key alone is not allowed. DynamoDB reports:
+    # "Query condition missed key schema element: p".
+    with pytest.raises(ClientError, match='ValidationException'):
+        full_query(test_table, KeyConditions={
+            'c': {'AttributeValueList': [c], 'ComparisonOperator': 'EQ'}
+        })
+    # Query by a non-key isn't allowed, for the same reason - that the
+    # actual hash key (p) is missing in the query:
+    with pytest.raises(ClientError, match='ValidationException'):
+        full_query(test_table, KeyConditions={
+            'z': {'AttributeValueList': [c], 'ComparisonOperator': 'EQ'}
+        })
+    # If we try both p and a non-key we get a complaint that the sort
+    # key is missing: "Query condition missed key schema element: c"
+    with pytest.raises(ClientError, match='ValidationException'):
+        full_query(test_table, KeyConditions={
+            'p': {'AttributeValueList': [p], 'ComparisonOperator': 'EQ'},
+            'z': {'AttributeValueList': [c], 'ComparisonOperator': 'EQ'}
+        })
+    # If we try p, c and another key, we get an error that
+    # "Conditions can be of length 1 or 2 only".
+    with pytest.raises(ClientError, match='ValidationException'):
+        full_query(test_table, KeyConditions={
+            'p': {'AttributeValueList': [p], 'ComparisonOperator': 'EQ'},
+            'c': {'AttributeValueList': [c], 'ComparisonOperator': 'EQ'},
+            'z': {'AttributeValueList': [c], 'ComparisonOperator': 'EQ'}
+        })
+
+# Test the "Select" parameter of Query. The default Select mode,
+# ALL_ATTRIBUTES, returns items with all their attributes. Other modes
+# allow returning just specific attributes or just counting the results
+# without returning items at all.
+@pytest.mark.xfail(reason="Select not supported yet")
+def test_query_select(test_table_sn):
+    numbers = [Decimal(i) for i in range(10)]
+    # Insert these numbers, in random order, into one partition:
+    p = random_string()
+    items = [{'p': p, 'c': num, 'x': num} for num in random.sample(numbers, len(numbers))]
+    with test_table_sn.batch_writer() as batch:
+        for item in items:
+            batch.put_item(item)
+    # Verify that we get back the numbers in their sorted order. By default,
+    # query returns all attributes:
+    got_items = test_table_sn.query(KeyConditions={'p': {'AttributeValueList': [p], 'ComparisonOperator': 'EQ'}})['Items']
+    got_sort_keys = [x['c'] for x in got_items]
+    assert got_sort_keys == numbers
+    got_x_attributes = [x['x'] for x in got_items]
+    assert got_x_attributes == numbers
+    # Select=ALL_ATTRIBUTES does exactly the same as the default - return
+    # all attributes:
+    got_items = test_table_sn.query(KeyConditions={'p': {'AttributeValueList': [p], 'ComparisonOperator': 'EQ'}}, Select='ALL_ATTRIBUTES')['Items']
+    got_sort_keys = [x['c'] for x in got_items]
+    assert got_sort_keys == numbers
+    got_x_attributes = [x['x'] for x in got_items]
+    assert got_x_attributes == numbers
+    # Select=ALL_PROJECTED_ATTRIBUTES is not allowed on a base table (it
+    # is just for indexes, when IndexName is specified)
+    with pytest.raises(ClientError, match='ValidationException'):
+        test_table_sn.query(KeyConditions={'p': {'AttributeValueList': [p], 'ComparisonOperator': 'EQ'}}, Select='ALL_PROJECTED_ATTRIBUTES')
+    # Select=SPECIFIC_ATTRIBUTES requires that either a AttributesToGet
+    # or ProjectionExpression appears, but then really does nothing:
+    with pytest.raises(ClientError, match='ValidationException'):
+        test_table_sn.query(KeyConditions={'p': {'AttributeValueList': [p], 'ComparisonOperator': 'EQ'}}, Select='SPECIFIC_ATTRIBUTES')
+    got_items = test_table_sn.query(KeyConditions={'p': {'AttributeValueList': [p], 'ComparisonOperator': 'EQ'}}, Select='SPECIFIC_ATTRIBUTES', AttributesToGet=['x'])['Items']
+    expected_items = [{'x': i} for i in numbers]
+    assert got_items == expected_items
+    got_items = test_table_sn.query(KeyConditions={'p': {'AttributeValueList': [p], 'ComparisonOperator': 'EQ'}}, Select='SPECIFIC_ATTRIBUTES', ProjectionExpression='x')['Items']
+    assert got_items == expected_items
+    # Select=COUNT just returns a count - not any items
+    got = test_table_sn.query(KeyConditions={'p': {'AttributeValueList': [p], 'ComparisonOperator': 'EQ'}}, Select='COUNT')
+    assert got['Count'] == len(numbers)
+    assert not 'Items' in got
+    # Check again that we also get a count - not just with Select=COUNT,
+    # but without Select=COUNT we also get the items:
+    got = test_table_sn.query(KeyConditions={'p': {'AttributeValueList': [p], 'ComparisonOperator': 'EQ'}})
+    assert got['Count'] == len(numbers)
+    assert 'Items' in got
+    # Select with some unknown string generates a validation exception:
+    with pytest.raises(ClientError, match='ValidationException'):
+        test_table_sn.query(KeyConditions={'p': {'AttributeValueList': [p], 'ComparisonOperator': 'EQ'}}, Select='UNKNOWN')
+
+# Test that the "Limit" parameter can be used to return only some of the
+# items in a single partition. The items returned are the first in the
+# sorted order.
+def test_query_limit(test_table_sn):
+    numbers = [Decimal(i) for i in range(10)]
+    # Insert these numbers, in random order, into one partition:
+    p = random_string()
+    items = [{'p': p, 'c': num} for num in random.sample(numbers, len(numbers))]
+    with test_table_sn.batch_writer() as batch:
+        for item in items:
+            batch.put_item(item)
+    # Verify that we get back the numbers in their sorted order.
+    # First, no Limit so we should get all numbers (we have few of them, so
+    # it all fits in the default 1MB limitation)
+    got_items = test_table_sn.query(KeyConditions={'p': {'AttributeValueList': [p], 'ComparisonOperator': 'EQ'}})['Items']
+    got_sort_keys = [x['c'] for x in got_items]
+    assert got_sort_keys == numbers
+    # Now try a few different Limit values, and verify that the query
+    # returns exactly the first Limit sorted numbers.
+    for limit in [1, 2, 3, 7, 10, 17, 100, 10000]:
+        got_items = test_table_sn.query(KeyConditions={'p': {'AttributeValueList': [p], 'ComparisonOperator': 'EQ'}}, Limit=limit)['Items']
+        assert len(got_items) == min(limit, len(numbers))
+        got_sort_keys = [x['c'] for x in got_items]
+        assert got_sort_keys == numbers[0:limit]
+    # Unfortunately, the boto3 library forbids a Limit of 0 on its own,
+    # before even sending a request, so we can't test how the server responds.
+    with pytest.raises(ParamValidationError):
+        test_table_sn.query(KeyConditions={'p': {'AttributeValueList': [p], 'ComparisonOperator': 'EQ'}}, Limit=0)
+
+# In test_query_limit we tested just that Limit allows to stop the result
+# after right right number of items. Here we test that such a stopped result
+# can be resumed, via the LastEvaluatedKey/ExclusiveStartKey paging mechanism.
+def test_query_limit_paging(test_table_sn):
+    numbers = [Decimal(i) for i in range(20)]
+    # Insert these numbers, in random order, into one partition:
+    p = random_string()
+    items = [{'p': p, 'c': num} for num in random.sample(numbers, len(numbers))]
+    with test_table_sn.batch_writer() as batch:
+        for item in items:
+            batch.put_item(item)
+    # Verify that full_query() returns all these numbers, in sorted order.
+    # full_query() will do a query with the given limit, and resume it again
+    # and again until the last page.
+    for limit in [1, 2, 3, 7, 10, 17, 100, 10000]:
+        got_items = full_query(test_table_sn, KeyConditions={'p': {'AttributeValueList': [p], 'ComparisonOperator': 'EQ'}}, Limit=limit)
+        got_sort_keys = [x['c'] for x in got_items]
+        assert got_sort_keys == numbers
+
+# Test that the ScanIndexForward parameter works, and can be used to
+# return items sorted in reverse order. Combining this with Limit can
+# be used to return the last items instead of the first items of the
+# partition.
+@pytest.mark.xfail(reason="ScanIndexForward not supported yet")
+def test_query_reverse(test_table_sn):
+    numbers = [Decimal(i) for i in range(20)]
+    # Insert these numbers, in random order, into one partition:
+    p = random_string()
+    items = [{'p': p, 'c': num} for num in random.sample(numbers, len(numbers))]
+    with test_table_sn.batch_writer() as batch:
+        for item in items:
+            batch.put_item(item)
+    # Verify that we get back the numbers in their sorted order or reverse
+    # order, depending on the ScanIndexForward parameter being True or False.
+    # First, no Limit so we should get all numbers (we have few of them, so
+    # it all fits in the default 1MB limitation)
+    reversed_numbers = list(reversed(numbers))
+    got_items = test_table_sn.query(KeyConditions={'p': {'AttributeValueList': [p], 'ComparisonOperator': 'EQ'}}, ScanIndexForward=True)['Items']
+    got_sort_keys = [x['c'] for x in got_items]
+    assert got_sort_keys == numbers
+    got_items = test_table_sn.query(KeyConditions={'p': {'AttributeValueList': [p], 'ComparisonOperator': 'EQ'}}, ScanIndexForward=False)['Items']
+    got_sort_keys = [x['c'] for x in got_items]
+    assert got_sort_keys == reversed_numbers
+    # Now try a few different Limit values, and verify that the query
+    # returns exactly the first Limit sorted numbers - in regular or
+    # reverse order, depending on ScanIndexForward.
+    for limit in [1, 2, 3, 7, 10, 17, 100, 10000]:
+        got_items = test_table_sn.query(KeyConditions={'p': {'AttributeValueList': [p], 'ComparisonOperator': 'EQ'}}, Limit=limit, ScanIndexForward=True)['Items']
+        assert len(got_items) == min(limit, len(numbers))
+        got_sort_keys = [x['c'] for x in got_items]
+        assert got_sort_keys == numbers[0:limit]
+        got_items = test_table_sn.query(KeyConditions={'p': {'AttributeValueList': [p], 'ComparisonOperator': 'EQ'}}, Limit=limit, ScanIndexForward=False)['Items']
+        assert len(got_items) == min(limit, len(numbers))
+        got_sort_keys = [x['c'] for x in got_items]
+        assert got_sort_keys == reversed_numbers[0:limit]
+
+# Test that paging also works properly with reverse order
+# (ScanIndexForward=false), i.e., reverse-order queries can be resumed
+@pytest.mark.xfail(reason="ScanIndexForward not supported yet")
+def test_query_reverse_paging(test_table_sn):
+    numbers = [Decimal(i) for i in range(20)]
+    # Insert these numbers, in random order, into one partition:
+    p = random_string()
+    items = [{'p': p, 'c': num} for num in random.sample(numbers, len(numbers))]
+    with test_table_sn.batch_writer() as batch:
+        for item in items:
+            batch.put_item(item)
+    reversed_numbers = list(reversed(numbers))
+    # Verify that with ScanIndexForward=False, full_query() returns all
+    # these numbers in reversed sorted order - getting pages of Limit items
+    # at a time and resuming the query.
+    for limit in [1, 2, 3, 7, 10, 17, 100, 10000]:
+        got_items = full_query(test_table_sn, KeyConditions={'p': {'AttributeValueList': [p], 'ComparisonOperator': 'EQ'}}, ScanIndexForward=False, Limit=limit)
+        got_sort_keys = [x['c'] for x in got_items]
+        assert got_sort_keys == reversed_numbers
--- a/alternator-test/test_returnvalues.py
+++ b/alternator-test/test_returnvalues.py
@@ -0,0 +1,226 @@
+# Copyright 2019 ScyllaDB
+#
+# This file is part of Scylla.
+#
+# Scylla is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# Scylla is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+
+# Tests for the ReturnValues parameter for the different update operations
+# (PutItem, UpdateItem, DeleteItem).
+
+import pytest
+from botocore.exceptions import ClientError
+from util import random_string
+
+# Test trivial support for the ReturnValues parameter in PutItem, UpdateItem
+# and DeleteItem - test that "NONE" works (and changes nothing), while a
+# completely unsupported value gives an error.
+# This test is useful to check that before the ReturnValues parameter is fully
+# implemented, it returns an error when a still-unsupported ReturnValues
+# option is attempted in the request - instead of simply being ignored.
+def test_trivial_returnvalues(test_table_s):
+    # PutItem:
+    p = random_string()
+    test_table_s.put_item(Item={'p': p, 'a': 'hi'})
+    ret=test_table_s.put_item(Item={'p': p, 'a': 'hello'}, ReturnValues='NONE')
+    assert not 'Attributes' in ret
+    with pytest.raises(ClientError, match='ValidationException'):
+        test_table_s.put_item(Item={'p': p, 'a': 'hello'}, ReturnValues='DOG')
+    # UpdateItem:
+    p = random_string()
+    test_table_s.put_item(Item={'p': p, 'a': 'hi', 'b': 'dog'})
+    ret=test_table_s.update_item(Key={'p': p}, ReturnValues='NONE',
+        UpdateExpression='SET b = :val',
+        ExpressionAttributeValues={':val': 'cat'})
+    assert not 'Attributes' in ret
+    with pytest.raises(ClientError, match='ValidationException'):
+        test_table_s.update_item(Key={'p': p}, ReturnValues='DOG',
+            UpdateExpression='SET a = a + :val',
+            ExpressionAttributeValues={':val': 1})
+    # DeleteItem:
+    p = random_string()
+    test_table_s.put_item(Item={'p': p, 'a': 'hi'})
+    ret=test_table_s.delete_item(Key={'p': p}, ReturnValues='NONE')
+    assert not 'Attributes' in ret
+    with pytest.raises(ClientError, match='ValidationException'):
+        test_table_s.delete_item(Key={'p': p}, ReturnValues='DOG')
+
+# Test the ReturnValues parameter on a PutItem operation. Only two settings
+# are supported for this parameter for this operation: NONE (the default)
+# and ALL_OLD.
+@pytest.mark.xfail(reason="ReturnValues not supported")
+def test_put_item_returnvalues(test_table_s):
+    # By default, the previous value of an item is not returned:
+    p = random_string()
+    test_table_s.put_item(Item={'p': p, 'a': 'hi'})
+    ret=test_table_s.put_item(Item={'p': p, 'a': 'hello'})
+    assert not 'Attributes' in ret
+    # Using ReturnValues=NONE is the same:
+    p = random_string()
+    test_table_s.put_item(Item={'p': p, 'a': 'hi'})
+    ret=test_table_s.put_item(Item={'p': p, 'a': 'hello'}, ReturnValues='NONE')
+    assert not 'Attributes' in ret
+    # With ReturnValues=ALL_OLD, the old value of the item is returned
+    # in an "Attributes" attribute:
+    p = random_string()
+    test_table_s.put_item(Item={'p': p, 'a': 'hi'})
+    ret=test_table_s.put_item(Item={'p': p, 'a': 'hello'}, ReturnValues='ALL_OLD')
+    assert ret['Attributes'] == {'p': p, 'a': 'hi'}
+    # Other ReturnValue options - UPDATED_OLD, ALL_NEW, UPDATED_NEW,
+    # are supported by other operations but not by PutItem:
+    with pytest.raises(ClientError, match='ValidationException'):
+        test_table_s.put_item(Item={'p': p, 'a': 'hello'}, ReturnValues='UPDATED_OLD')
+    with pytest.raises(ClientError, match='ValidationException'):
+        test_table_s.put_item(Item={'p': p, 'a': 'hello'}, ReturnValues='ALL_NEW')
+    with pytest.raises(ClientError, match='ValidationException'):
+        test_table_s.put_item(Item={'p': p, 'a': 'hello'}, ReturnValues='UPDATED_NEW')
+    # Also, obviously, a non-supported setting "DOG" also returns in error:
+    with pytest.raises(ClientError, match='ValidationException'):
+        test_table_s.put_item(Item={'p': p, 'a': 'hello'}, ReturnValues='DOG')
+    # The ReturnValues value is case sensitive, so while "NONE" is supported
+    # (and tested above), "none" isn't:
+    with pytest.raises(ClientError, match='ValidationException'):
+        test_table_s.put_item(Item={'p': p, 'a': 'hello'}, ReturnValues='none')
+
+# Test the ReturnValues parameter on a DeleteItem operation. Only two settings
+# are supported for this parameter for this operation: NONE (the default)
+# and ALL_OLD.
+@pytest.mark.xfail(reason="ReturnValues not supported")
+def test_delete_item_returnvalues(test_table_s):
+    # By default, the previous value of an item is not returned:
+    p = random_string()
+    test_table_s.put_item(Item={'p': p, 'a': 'hi'})
+    ret=test_table_s.delete_item(Key={'p': p})
+    assert not 'Attributes' in ret
+    # Using ReturnValues=NONE is the same:
+    p = random_string()
+    test_table_s.put_item(Item={'p': p, 'a': 'hi'})
+    ret=test_table_s.delete_item(Key={'p': p}, ReturnValues='NONE')
+    assert not 'Attributes' in ret
+    # With ReturnValues=ALL_OLD, the old value of the item is returned
+    # in an "Attributes" attribute:
+    p = random_string()
+    test_table_s.put_item(Item={'p': p, 'a': 'hi'})
+    ret=test_table_s.delete_item(Key={'p': p}, ReturnValues='ALL_OLD')
+    assert ret['Attributes'] == {'p': p, 'a': 'hi'}
+    # Other ReturnValue options - UPDATED_OLD, ALL_NEW, UPDATED_NEW,
+    # are supported by other operations but not by PutItem:
+    with pytest.raises(ClientError, match='ValidationException'):
+        test_table_s.delete_item(Key={'p': p}, ReturnValues='UPDATE_OLD')
+    with pytest.raises(ClientError, match='ValidationException'):
+        test_table_s.delete_item(Key={'p': p}, ReturnValues='ALL_NEW')
+    with pytest.raises(ClientError, match='ValidationException'):
+        test_table_s.delete_item(Key={'p': p}, ReturnValues='UPDATE_NEW')
+    # Also, obviously, a non-supported setting "DOG" also returns in error:
+    with pytest.raises(ClientError, match='ValidationException'):
+        test_table_s.delete_item(Key={'p': p}, ReturnValues='DOG')
+    # The ReturnValues value is case sensitive, so while "NONE" is supported
+    # (and tested above), "none" isn't:
+    with pytest.raises(ClientError, match='ValidationException'):
+        test_table_s.delete_item(Key={'p': p}, ReturnValues='none')
+
+# Test the ReturnValues parameter on a UpdateItem operation. All five
+# settings are supported for this parameter for this operation: NONE
+# (the default), ALL_OLD, UPDATED_OLD, ALL_NEW and UPDATED_NEW.
+@pytest.mark.xfail(reason="ReturnValues not supported")
+def test_update_item_returnvalues(test_table_s):
+    # By default, the previous value of an item is not returned:
+    p = random_string()
+    test_table_s.put_item(Item={'p': p, 'a': 'hi', 'b': 'dog'})
+    ret=test_table_s.update_item(Key={'p': p},
+        UpdateExpression='SET b = :val',
+        ExpressionAttributeValues={':val': 'cat'})
+    assert not 'Attributes' in ret
+
+    # Using ReturnValues=NONE is the same:
+    p = random_string()
+    test_table_s.put_item(Item={'p': p, 'a': 'hi', 'b': 'dog'})
+    ret=test_table_s.update_item(Key={'p': p}, ReturnValues='NONE',
+        UpdateExpression='SET b = :val',
+        ExpressionAttributeValues={':val': 'cat'})
+    assert not 'Attributes' in ret
+
+    # With ReturnValues=ALL_OLD, the entire old value of the item (even
+    # attributes we did not modify) is returned in an "Attributes" attribute:
+    p = random_string()
+    test_table_s.put_item(Item={'p': p, 'a': 'hi', 'b': 'dog'})
+    ret=test_table_s.update_item(Key={'p': p}, ReturnValues='ALL_OLD',
+        UpdateExpression='SET b = :val',
+        ExpressionAttributeValues={':val': 'cat'})
+    assert ret['Attributes'] == {'p': p, 'a': 'hi', 'b': 'dog'}
+
+    # With ReturnValues=UPDATED_OLD, only the overwritten attributes of the
+    # old item are returned in an "Attributes" attribute:
+    p = random_string()
+    test_table_s.put_item(Item={'p': p, 'a': 'hi', 'b': 'dog'})
+    ret=test_table_s.update_item(Key={'p': p}, ReturnValues='UPDATED_OLD',
+        UpdateExpression='SET b = :val, c = :val2',
+        ExpressionAttributeValues={':val': 'cat', ':val2': 'hello'})
+    assert ret['Attributes'] == {'b': 'dog'}
+    # Even if an update overwrites an attribute by the same value again,
+    # this is considered an update, and the old value (identical to the
+    # new one) is returned:
+    ret=test_table_s.update_item(Key={'p': p}, ReturnValues='UPDATED_OLD',
+        UpdateExpression='SET b = :val',
+        ExpressionAttributeValues={':val': 'cat'})
+    assert ret['Attributes'] == {'b': 'cat'}
+    # Deleting an attribute also counts as overwriting it, of course:
+    ret=test_table_s.update_item(Key={'p': p}, ReturnValues='UPDATED_OLD',
+        UpdateExpression='REMOVE b')
+    assert ret['Attributes'] == {'b': 'cat'}
+
+    # With ReturnValues=ALL_NEW, the entire new value of the item (including
+    # old attributes we did not modify) is returned:
+    p = random_string()
+    test_table_s.put_item(Item={'p': p, 'a': 'hi', 'b': 'dog'})
+    ret=test_table_s.update_item(Key={'p': p}, ReturnValues='ALL_NEW',
+        UpdateExpression='SET b = :val',
+        ExpressionAttributeValues={':val': 'cat'})
+    assert ret['Attributes'] == {'p': p, 'a': 'hi', 'b': 'cat'}
+
+    # With ReturnValues=UPDATED_NEW, only the new value of the updated
+    # attributes are returned. Note that "updated attributes" means
+    # the newly set attributes - it doesn't require that these attributes
+    # have any previous values
+    p = random_string()
+    test_table_s.put_item(Item={'p': p, 'a': 'hi', 'b': 'dog'})
+    ret=test_table_s.update_item(Key={'p': p}, ReturnValues='UPDATED_NEW',
+        UpdateExpression='SET b = :val, c = :val2',
+        ExpressionAttributeValues={':val': 'cat', ':val2': 'hello'})
+    assert ret['Attributes'] == {'b': 'cat', 'c': 'hello'}
+    # Deleting an attribute also counts as overwriting it, but the delete
+    # column is not returned in the response - so it's empty in this case.
+    ret=test_table_s.update_item(Key={'p': p}, ReturnValues='UPDATED_NEW',
+        UpdateExpression='REMOVE b')
+    assert not 'Attributes' in ret
+    # In the above examples, UPDATED_NEW is not useful because it just
+    # returns the new values we already know from the request... UPDATED_NEW
+    # becomes more useful in read-modify-write operations:
+    p = random_string()
+    test_table_s.put_item(Item={'p': p, 'a': 1})
+    ret=test_table_s.update_item(Key={'p': p}, ReturnValues='UPDATED_NEW',
+        UpdateExpression='SET a = a + :val',
+        ExpressionAttributeValues={':val': 1})
+    assert ret['Attributes'] == {'a': 2}
+
+    # A non-supported setting "DOG" also returns in error:
+    with pytest.raises(ClientError, match='ValidationException'):
+        test_table_s.update_item(Key={'p': p}, ReturnValues='DOG',
+            UpdateExpression='SET a = a + :val',
+            ExpressionAttributeValues={':val': 1})
+    # The ReturnValues value is case sensitive, so while "NONE" is supported
+    # (and tested above), "none" isn't:
+    with pytest.raises(ClientError, match='ValidationException'):
+        test_table_s.update_item(Key={'p': p}, ReturnValues='none',
+            UpdateExpression='SET a = a + :val',
+            ExpressionAttributeValues={':val': 1})
--- a/alternator-test/test_scan.py
+++ b/alternator-test/test_scan.py
@@ -19,7 +19,7 @@

 import pytest
 from botocore.exceptions import ClientError
-from util import random_string, random_bytes, full_scan, full_scan_and_count, multiset
+from util import random_string, full_scan, full_scan_and_count, multiset
 from boto3.dynamodb.conditions import Attr

 # Test that scanning works fine with/without pagination
@@ -30,10 +30,10 @@ def test_scan_basic(filled_test_table):
        got_items = []
        while True:
            if limit:
-                response = test_table.scan(Limit=limit, ConsistentRead=True, ExclusiveStartKey=pos) if pos else test_table.scan(Limit=limit, ConsistentRead=True)
+                response = test_table.scan(Limit=limit, ExclusiveStartKey=pos) if pos else test_table.scan(Limit=limit)
                assert len(response['Items']) <= limit
            else:
-                response = test_table.scan(ExclusiveStartKey=pos, ConsistentRead=True) if pos else test_table.scan(ConsistentRead=True)
+                response = test_table.scan(ExclusiveStartKey=pos) if pos else test_table.scan()
            pos = response.get('LastEvaluatedKey', None)
            got_items += response['Items']
            if not pos:
@@ -42,11 +42,6 @@ def test_scan_basic(filled_test_table):
        assert len(items) == len(got_items)
        assert multiset(items) == multiset(got_items)

-def test_scan_nonexistent_table(dynamodb):
-    client = dynamodb.meta.client
-    with pytest.raises(ClientError, match="ResourceNotFoundException"):
-        client.scan(TableName="i_do_not_exist")
-
 def test_scan_with_paginator(dynamodb, filled_test_table):
    test_table, items = filled_test_table
    paginator = dynamodb.meta.client.get_paginator('scan')
@@ -128,6 +123,7 @@ def test_scan_with_attribute_equality_filtering(dynamodb, filled_test_table):
    assert multiset(expected_items) == multiset(got_items)

 # Test that FilterExpression works as expected
+@pytest.mark.xfail(reason="FilterExpression not supported yet")
 def test_scan_filter_expression(filled_test_table):
    test_table, items = filled_test_table

@@ -243,6 +239,7 @@ def test_scan_select(filled_test_table):
 # a scan into multiple parts, and that these parts are in fact disjoint,
 # and their union is the entire contents of the table. We do not actually
 # try to run these queries in *parallel* in this test.
+@pytest.mark.xfail(reason="parallel scan not supported yet")
 def test_scan_parallel(filled_test_table):
    test_table, items = filled_test_table
    for nsegments in [1, 2, 17]:
@@ -253,31 +250,3 @@ def test_scan_parallel(filled_test_table):
        # The following comparison verifies that each of the expected item
        # in items was returned in one - and just one - of the segments.
        assert multiset(items) == multiset(got_items)
-
-# Test correct handling of incorrect parallel scan parameters.
-# Most of the corner cases (like TotalSegments=0) are validated
-# by boto3 itself, but some checks can still be performed.
-def test_scan_parallel_incorrect(filled_test_table):
-    test_table, items = filled_test_table
-    with pytest.raises(ClientError, match='ValidationException.*Segment'):
-        full_scan(test_table, TotalSegments=1000001, Segment=0)
-    for segment in [7, 9]:
-        with pytest.raises(ClientError, match='ValidationException.*Segment'):
-            full_scan(test_table, TotalSegments=5, Segment=segment)
-
-# We used to have a bug with formatting of LastEvaluatedKey in the response
-# of Query and Scan with bytes keys (issue #7768). In test_query_paging_byte()
-# (test_query.py) we tested the case of bytes *sort* keys. In the following
-# test we check bytes *partition* keys.
-def test_scan_paging_bytes(test_table_b):
-    # We will not Scan the entire table - we have no idea what it contains.
-    # But we don't need to scan the entire table - we just need the table
-    # to contain at least two items, and then Scan it with Limit=1 and stop
-    # after one page. Before #7768 was fixed, the test failed when the
-    # LastEvaluatedKey in the response could not be parsed.
-    items = [{'p': random_bytes()}, {'p': random_bytes()}]
-    with test_table_b.batch_writer() as batch:
-        for item in items:
-            batch.put_item(item)
-    response = test_table_b.scan(ConsistentRead=True, Limit=1)
-    assert 'LastEvaluatedKey' in response
--- a/alternator-test/test_table.py
+++ b/alternator-test/test_table.py
@@ -74,11 +74,6 @@ def create_and_delete_table(dynamodb, name, **kwargs):
 def test_create_and_delete_table(dynamodb):
    create_and_delete_table(dynamodb, 'alternator_test')

-# Test that recreating a table right after deleting it works without issues
-def test_recreate_table(dynamodb):
-    create_and_delete_table(dynamodb, 'alternator_recr_test')
-    create_and_delete_table(dynamodb, 'alternator_recr_test')
-
 # DynamoDB documentation specifies that table names must be 3-255 characters,
 # and match the regex [a-zA-Z0-9._-]+. Names not matching these rules should
 # be rejected, and no table be created.
@@ -200,7 +195,7 @@ def test_create_table_invalid_schema(dynamodb):
 # Test that trying to create a table that already exists fails in the
 # appropriate way (ResourceInUseException)
 def test_create_table_already_exists(dynamodb, test_table):
-    with pytest.raises(ClientError, match='ResourceInUseException.*Table.*already exists'):
+    with pytest.raises(ClientError, match='ResourceInUseException'):
        create_table(dynamodb, test_table.name)

 # Test that BillingMode error path works as expected - only the values
@@ -232,35 +227,6 @@ def test_create_table_billing_mode_errors(dynamodb, test_table):
            KeySchema=[{ 'AttributeName': 'p', 'KeyType': 'HASH' }],
            AttributeDefinitions=[{ 'AttributeName': 'p', 'AttributeType': 'S' }])

-# Even before Alternator gains full support for the DynamoDB stream API
-# and CreateTable's StreamSpecification option, we should support the
-# options which mean it is turned *off*.
-def test_table_streams_off(dynamodb):
-    # If StreamSpecification is given, but has StreamEnabled=false, it's as
-    # if StreamSpecification was missing. StreamViewType isn't needed.
-    table = create_test_table(dynamodb, StreamSpecification={'StreamEnabled': False},
-        KeySchema=[{ 'AttributeName': 'p', 'KeyType': 'HASH' }],
-        AttributeDefinitions=[{ 'AttributeName': 'p', 'AttributeType': 'S' }]);
-    table.delete();
-    # DynamoDB doesn't allow StreamSpecification to be empty map - if it
-    # exists, it must have a StreamEnabled
-    # Unfortunately, new versions of boto3 doesn't let us pass this...
-    #with pytest.raises(ClientError, match='ValidationException'):
-    #    table = create_test_table(dynamodb, StreamSpecification={},
-    #        KeySchema=[{ 'AttributeName': 'p', 'KeyType': 'HASH' }],
-    #        AttributeDefinitions=[{ 'AttributeName': 'p', 'AttributeType': 'S' }]);
-    #    table.delete();
-    # Unfortunately, boto3 doesn't allow us to pass StreamSpecification=None.
-    # This is what we had in issue #5796.
-
-@pytest.mark.xfail(reason="streams not yet implemented")
-def test_table_streams_on(dynamodb):
-    table = create_test_table(dynamodb,
-        StreamSpecification={'StreamEnabled': True, 'StreamViewType': 'OLD_IMAGE'},
-        KeySchema=[{ 'AttributeName': 'p', 'KeyType': 'HASH' }],
-        AttributeDefinitions=[{ 'AttributeName': 'p', 'AttributeType': 'S' }]);
-    table.delete();
-
 # Our first implementation had a special column name called "attrs" where
 # we stored a map for all non-key columns. If the user tried to name one
 # of the key columns with this same name, the result was a disaster - Scylla
@@ -308,17 +274,3 @@ def test_list_tables_wrong_limit(dynamodb):
    # lower limit (min. 1) is imposed by boto3 library checks
    with pytest.raises(ClientError, match='ValidationException'):
        dynamodb.meta.client.list_tables(Limit=101)
-
-# Even before Alternator gains support for configuring server-side encryption
-# ("encryption at rest") with CreateTable's SSESpecification option, we should
-# support the option "Enabled=false" which is the default, and means the server
-# takes care of whatever server-side encryption is done, on its own.
-# Reproduces issue #7031.
-def test_table_sse_off(dynamodb):
-    # If StreamSpecification is given, but has StreamEnabled=false, it's as
-    # if StreamSpecification was missing, and fine. No other attribues are
-    # necessary.
-    table = create_test_table(dynamodb, SSESpecification = {'Enabled': False},
-        KeySchema=[{ 'AttributeName': 'p', 'KeyType': 'HASH' }],
-        AttributeDefinitions=[{ 'AttributeName': 'p', 'AttributeType': 'S' }]);
-    table.delete();
--- a/alternator-test/test_update_expression.py
+++ b/alternator-test/test_update_expression.py
@@ -370,7 +370,7 @@ def test_update_expression_cannot_modify_key(test_table):

 # Test that trying to start an expression with some nonsense like HELLO
 # instead of SET, REMOVE, ADD or DELETE, fails.
-def test_update_expression_non_existent_clause(test_table_s):
+def test_update_expression_non_existant_clause(test_table_s):
    p = random_string()
    with pytest.raises(ClientError, match='ValidationException'):
        test_table_s.update_item(Key={'p': p},
@@ -675,24 +675,6 @@ def test_update_expression_add_numbers(test_table_s):
            UpdateExpression='ADD b :val1',
            ExpressionAttributeValues={':val1': 1})

-# In test_update_expression_add_numbers() above we tested ADDing a number to
-# an existing number. The following test check that ADD can be used to
-# create a *new* number, as if it was added to zero.
-def test_update_expression_add_numbers_new(test_table_s):
-    # Test that "ADD" can create a new number attribute:
-    p = random_string()
-    test_table_s.put_item(Item={'p': p, 'a': 'hello'})
-    test_table_s.update_item(Key={'p': p},
-        UpdateExpression='ADD b :val1',
-        ExpressionAttributeValues={':val1': 7})
-    assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['b'] == 7
-    # Test that "ADD" can create an entirely new item:
-    p = random_string()
-    test_table_s.update_item(Key={'p': p},
-        UpdateExpression='ADD b :val1',
-        ExpressionAttributeValues={':val1': 8})
-    assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['b'] == 8
-
 # Test "ADD" operation for sets
 def test_update_expression_add_sets(test_table_s):
    p = random_string()
@@ -721,24 +703,6 @@ def test_update_expression_add_sets(test_table_s):
            UpdateExpression='ADD a :val1',
            ExpressionAttributeValues={':val1': 'hello'})

-# In test_update_expression_add_sets() above we tested ADDing elements to an
-# existing set. The following test checks that ADD can be used to create a
-# *new* set, by adding its first item.
-def test_update_expression_add_sets_new(test_table_s):
-    # Test that "ADD" can create a new set attribute:
-    p = random_string()
-    test_table_s.put_item(Item={'p': p, 'a': 'hello'})
-    test_table_s.update_item(Key={'p': p},
-        UpdateExpression='ADD b :val1',
-        ExpressionAttributeValues={':val1': set(['dog'])})
-    assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['b'] == set(['dog'])
-    # Test that "ADD" can create an entirely new item:
-    p = random_string()
-    test_table_s.update_item(Key={'p': p},
-        UpdateExpression='ADD b :val1',
-        ExpressionAttributeValues={':val1': set(['cat'])})
-    assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['b'] == set(['cat'])
-
 # Test "DELETE" operation for sets
 def test_update_expression_delete_sets(test_table_s):
    p = random_string()
@@ -753,28 +717,10 @@ def test_update_expression_delete_sets(test_table_s):
        UpdateExpression='DELETE a :val1',
        ExpressionAttributeValues={':val1': set(['pig'])})
    assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['a'] == set(['dog'])
-    # Deleting all the elements cannot leave an empty set (which isn't
-    # supported). Rather, it deletes the attribute altogether:
-    test_table_s.update_item(Key={'p': p},
-        UpdateExpression='DELETE a :val1',
-        ExpressionAttributeValues={':val1': set(['dog'])})
-    assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item'] == {'p': p, 'b': 'hi'}
-    # Deleting elements from a non-existent attribute is allowed, and
-    # simply does nothing:
-    test_table_s.update_item(Key={'p': p},
-        UpdateExpression='DELETE a :val1',
-        ExpressionAttributeValues={':val1': set(['dog'])})
-    assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item'] == {'p': p, 'b': 'hi'}
-    # An empty set parameter is not allowed
-    with pytest.raises(ClientError, match='ValidationException.*empty'):
-        test_table_s.update_item(Key={'p': p},
-            UpdateExpression='DELETE a :val1',
-            ExpressionAttributeValues={':val1': set([])})
    # The value to be deleted must be a set of the same type - it can't
    # be a single element or anything else. If the value has the wrong type,
    # we get an error like "Invalid UpdateExpression: Incorrect operand type
    # for operator or function; operator: DELETE, operand type: STRING".
-    test_table_s.put_item(Item={'p': p, 'a': set(['dog', 'cat', 'mouse']), 'b': 'hi'})
    with pytest.raises(ClientError, match='ValidationException.*type'):
        test_table_s.update_item(Key={'p': p},
            UpdateExpression='DELETE a :val1',
@@ -906,24 +852,3 @@ def test_nested_attribute_update_bad_path_array(test_table_s):
    with pytest.raises(ClientError, match='ValidationException.*path'):
        test_table_s.update_item(Key={'p': p}, UpdateExpression='SET a[0] = :val1',
            ExpressionAttributeValues={':val1': 7})
-
-# DynamoDB Does not allow empty sets.
-# Trying to ask UpdateItem to put one of these in an attribute should be
-# forbidden. Empty lists and maps *are* allowed.
-# Note that in test_item.py::test_update_item_empty_attribute we checked
-# this with the AttributeUpdates syntax. Here we check the same with the
-# UpdateExpression syntax.
-def test_update_expression_empty_attribute(test_table_s):
-    p = random_string()
-    # Empty sets are *not* allowed
-    with pytest.raises(ClientError, match='ValidationException.*empty'):
-        test_table_s.update_item(Key={'p': p},
-            UpdateExpression='SET a = :v',
-            ExpressionAttributeValues={':v': set()})
-    assert not 'Item' in test_table_s.get_item(Key={'p': p}, ConsistentRead=True)
-    # But empty lists, maps, strings and binary blobs *are* allowed:
-    test_table_s.update_item(Key={'p': p},
-        UpdateExpression='SET d = :v1, e = :v2, f = :v3, g = :v4',
-        ExpressionAttributeValues={':v1': [], ':v2': {}, ':v3': '', ':v4': b''})
-    assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item'] == {'p': p, 'd': [], 'e': {}, 'f': '', 'g': b''}
-#
--- a/alternator-test/util.py
+++ b/alternator-test/util.py
@@ -0,0 +1,141 @@
+# Copyright 2019 ScyllaDB
+#
+# This file is part of Scylla.
+#
+# Scylla is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# Scylla is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+
+# Various utility functions which are useful for multiple tests
+
+import string
+import random
+import collections
+import time
+
+def random_string(length=10, chars=string.ascii_uppercase + string.digits):
+    return ''.join(random.choice(chars) for x in range(length))
+
+def random_bytes(length=10):
+    return bytearray(random.getrandbits(8) for _ in range(length))
+
+# Utility functions for scan and query into an array of items:
+# TODO: add to full_scan and full_query by default ConsistentRead=True, as
+# it's not useful for tests without it!
+def full_scan(table, **kwargs):
+    response = table.scan(**kwargs)
+    items = response['Items']
+    while 'LastEvaluatedKey' in response:
+        response = table.scan(ExclusiveStartKey=response['LastEvaluatedKey'], **kwargs)
+        items.extend(response['Items'])
+    return items
+
+# full_scan_and_count returns both items and count as returned by the server.
+# Note that count isn't simply len(items) - the server returns them
+# independently. e.g., with Select='COUNT' the items are not returned, but
+# count is.
+def full_scan_and_count(table, **kwargs):
+    response = table.scan(**kwargs)
+    items = []
+    count = 0
+    if 'Items' in response:
+        items.extend(response['Items'])
+    if 'Count' in response:
+        count = count + response['Count']
+    while 'LastEvaluatedKey' in response:
+        response = table.scan(ExclusiveStartKey=response['LastEvaluatedKey'], **kwargs)
+        if 'Items' in response:
+            items.extend(response['Items'])
+        if 'Count' in response:
+            count = count + response['Count']
+    return (count, items)
+
+# Utility function for fetching the entire results of a query into an array of items
+def full_query(table, **kwargs):
+    response = table.query(**kwargs)
+    items = response['Items']
+    while 'LastEvaluatedKey' in response:
+        response = table.query(ExclusiveStartKey=response['LastEvaluatedKey'], **kwargs)
+        items.extend(response['Items'])
+    return items
+
+# To compare two lists of items (each is a dict) without regard for order,
+# "==" is not good enough because it will fail if the order is different.
+# The following function, multiset() converts the list into a multiset
+# (set with duplicates) where order doesn't matter, so the multisets can
+# be compared.
+
+def freeze(item):
+    if isinstance(item, dict):
+        return frozenset((key, freeze(value)) for key, value in item.items())
+    elif isinstance(item, list):
+        return tuple(freeze(value) for value in item)
+    return item
+
+def multiset(items):
+    return collections.Counter([freeze(item) for item in items])
+
+
+test_table_prefix = 'alternator_test_'
+def test_table_name():
+    current_ms = int(round(time.time() * 1000))
+    # In the off chance that test_table_name() is called twice in the same millisecond...
+    if test_table_name.last_ms >= current_ms:
+        current_ms = test_table_name.last_ms + 1
+    test_table_name.last_ms = current_ms
+    return test_table_prefix + str(current_ms)
+test_table_name.last_ms = 0
+
+def create_test_table(dynamodb, **kwargs):
+    name = test_table_name()
+    print("fixture creating new table {}".format(name))
+    table = dynamodb.create_table(TableName=name,
+        BillingMode='PAY_PER_REQUEST', **kwargs)
+    waiter = table.meta.client.get_waiter('table_exists')
+    # recheck every second instead of the default, lower, frequency. This can
+    # save a few seconds on AWS with its very slow table creation, but can
+    # more on tests on Scylla with its faster table creation turnaround.
+    waiter.config.delay = 1
+    waiter.config.max_attempts = 200
+    waiter.wait(TableName=name)
+    return table
+
+# DynamoDB's ListTables request returns up to a single page of table names
+# (e.g., up to 100) and it is up to the caller to call it again and again
+# to get the next page. This is a utility function which calls it repeatedly
+# as much as necessary to get the entire list.
+# We deliberately return a list and not a set, because we want the caller
+# to be able to recognize bugs in ListTables which causes the same table
+# to be returned twice.
+def list_tables(dynamodb, limit=100):
+    ret = []
+    pos = None
+    while True:
+        if pos:
+            page = dynamodb.meta.client.list_tables(Limit=limit, ExclusiveStartTableName=pos);
+        else:
+            page = dynamodb.meta.client.list_tables(Limit=limit);
+        results = page.get('TableNames', None)
+        assert(results)
+        ret = ret + results
+        newpos = page.get('LastEvaluatedTableName', None)
+        if not newpos:
+            break;
+        # It doesn't make sense for Dynamo to tell us we need more pages, but
+        # not send anything in *this* page!
+        assert len(results) > 0
+        assert newpos != pos
+        # Note that we only checked that we got back tables, not that we got
+        # any new tables not already in ret. So a buggy implementation might
+        # still cause an endless loop getting the same tables again and again.
+        pos = newpos
+    return ret
--- a/alternator/auth.cc
+++ b/alternator/auth.cc
@@ -129,8 +129,8 @@ future<std::string> get_key_from_roles(cql3::query_processor& qp, std::string us
            auth::meta::roles_table::qualified_name(), auth::meta::roles_table::role_col_name);

    auto cl = auth::password_authenticator::consistency_for_user(username);
-    auto& timeout = auth::internal_distributed_timeout_config();
-    return qp.execute_internal(query, cl, timeout, {sstring(username)}, true).then_wrapped([username = std::move(username)] (future<::shared_ptr<cql3::untyped_result_set>> f) {
+    auto timeout = auth::internal_distributed_timeout_config();
+    return qp.process(query, cl, timeout, {sstring(username)}, true).then_wrapped([username = std::move(username)] (future<::shared_ptr<cql3::untyped_result_set>> f) {
        auto res = f.get0();
        auto salted_hash = std::optional<sstring>();
        if (res->empty()) {
--- a/alternator/base64.cc
+++ b/alternator/base64.cc
@@ -77,7 +77,7 @@ std::string base64_encode(bytes_view in) {
    return ret;
 }

-static std::string base64_decode_string(std::string_view in) {
+bytes base64_decode(std::string_view in) {
    int i = 0;
    int8_t chunk4[4]; // chunk of input, each byte converted to 0..63;
    std::string ret;
@@ -104,42 +104,8 @@ static std::string base64_decode_string(std::string_view in) {
        if (i==3)
            ret += ((chunk4[1] & 0xf) << 4) + ((chunk4[2] & 0x3c) >> 2);
    }
-    return ret;
-}
-
-bytes base64_decode(std::string_view in) {
    // FIXME: This copy is sad. The problem is we need back "bytes"
    // but "bytes" doesn't have efficient append and std::string.
    // To fix this we need to use bytes' "uninitialized" feature.
-    std::string ret = base64_decode_string(in);
    return bytes(ret.begin(), ret.end());
 }
-
-static size_t base64_padding_len(std::string_view str) {
-    size_t padding = 0;
-    padding += (!str.empty() && str.back() == '=');
-    padding += (str.size() > 1 && *(str.end() - 2) == '=');
-    return padding;
-}
-
-size_t base64_decoded_len(std::string_view str) {
-    return str.size() / 4 * 3 - base64_padding_len(str);
-}
-
-bool base64_begins_with(std::string_view base, std::string_view operand) {
-    if (base.size() < operand.size() || base.size() % 4 != 0 || operand.size() % 4 != 0) {
-        return false;
-    }
-    if (base64_padding_len(operand) == 0) {
-        return base.starts_with(operand);
-    }
-    const std::string_view unpadded_base_prefix = base.substr(0, operand.size() - 4);
-    const std::string_view unpadded_operand = operand.substr(0, operand.size() - 4);
-    if (unpadded_base_prefix != unpadded_operand) {
-        return false;
-    }
-    // Decode and compare last 4 bytes of base64-encoded strings
-    const std::string base_remainder = base64_decode_string(base.substr(operand.size() - 4, operand.size()));
-    const std::string operand_remainder = base64_decode_string(operand.substr(operand.size() - 4));
-    return base_remainder.starts_with(operand_remainder);
-}
--- a/alternator/base64.hh
+++ b/alternator/base64.hh
@@ -32,7 +32,3 @@ bytes base64_decode(std::string_view);
 inline bytes base64_decode(const rjson::value& v) {
  return base64_decode(std::string_view(v.GetString(), v.GetStringLength()));
 }
-
-size_t base64_decoded_len(std::string_view str);
-
-bool base64_begins_with(std::string_view base, std::string_view operand);
--- a/alternator/conditions.cc
+++ b/alternator/conditions.cc
@@ -30,11 +30,6 @@
 #include "serialization.hh"
 #include "base64.hh"
 #include <stdexcept>
-#include <boost/algorithm/cxx11/all_of.hpp>
-#include <boost/algorithm/cxx11/any_of.hpp>
-#include "utils/overloaded_functor.hh"
-
-#include "expressions.hh"

 namespace alternator {

@@ -67,6 +62,49 @@ comparison_operator_type get_comparison_operator(const rjson::value& comparison_
    return it->second;
 }

+static ::shared_ptr<cql3::restrictions::single_column_restriction::contains> make_map_element_restriction(const column_definition& cdef, std::string_view key, const rjson::value& value) {
+    bytes raw_key = utf8_type->from_string(sstring_view(key.data(), key.size()));
+    auto key_value = ::make_shared<cql3::constants::value>(cql3::raw_value::make_value(std::move(raw_key)));
+    bytes raw_value = serialize_item(value);
+    auto entry_value = ::make_shared<cql3::constants::value>(cql3::raw_value::make_value(std::move(raw_value)));
+    return make_shared<cql3::restrictions::single_column_restriction::contains>(cdef, std::move(key_value), std::move(entry_value));
+}
+
+static ::shared_ptr<cql3::restrictions::single_column_restriction::EQ> make_key_eq_restriction(const column_definition& cdef, const rjson::value& value) {
+    bytes raw_value = get_key_from_typed_value(value, cdef, type_to_string(cdef.type));
+    auto restriction_value = ::make_shared<cql3::constants::value>(cql3::raw_value::make_value(std::move(raw_value)));
+    return make_shared<cql3::restrictions::single_column_restriction::EQ>(cdef, std::move(restriction_value));
+}
+
+::shared_ptr<cql3::restrictions::statement_restrictions> get_filtering_restrictions(schema_ptr schema, const column_definition& attrs_col, const rjson::value& query_filter) {
+    clogger.trace("Getting filtering restrictions for: {}", rjson::print(query_filter));
+    auto filtering_restrictions = ::make_shared<cql3::restrictions::statement_restrictions>(schema, true);
+    for (auto it = query_filter.MemberBegin(); it != query_filter.MemberEnd(); ++it) {
+        std::string_view column_name(it->name.GetString(), it->name.GetStringLength());
+        const rjson::value& condition = it->value;
+
+        const rjson::value& comp_definition = rjson::get(condition, "ComparisonOperator");
+        const rjson::value& attr_list = rjson::get(condition, "AttributeValueList");
+        comparison_operator_type op = get_comparison_operator(comp_definition);
+
+        if (op != comparison_operator_type::EQ) {
+            throw api_error("ValidationException", "Filtering is currently implemented for EQ operator only");
+        }
+        if (attr_list.Size() != 1) {
+            throw api_error("ValidationException", format("EQ restriction needs exactly 1 attribute value: {}", rjson::print(attr_list)));
+        }
+        if (const column_definition* cdef = schema->get_column_definition(to_bytes(column_name.data()))) {
+            // Primary key restriction
+            filtering_restrictions->add_restriction(make_key_eq_restriction(*cdef, attr_list[0]), false, true);
+        } else {
+            // Regular column restriction
+            filtering_restrictions->add_restriction(make_map_element_restriction(attrs_col, column_name, attr_list[0]), false, true);
+        }
+
+    }
+    return filtering_restrictions;
+}
+
 namespace {

 struct size_check {
@@ -98,11 +136,6 @@ struct nonempty : public size_check {

 // Check that array has the expected number of elements
 static void verify_operand_count(const rjson::value* array, const size_check& expected, const rjson::value& op) {
-    if (!array && expected(0)) {
-        // If expected() allows an empty AttributeValueList, it is also fine
-        // that it is missing.
-        return;
-    }
    if (!array || !array->IsArray()) {
        throw api_error("ValidationException", "With ComparisonOperator, AttributeValueList must be given and an array");
    }
@@ -159,63 +192,61 @@ static bool check_NE(const rjson::value* v1, const rjson::value& v2) {
 }

 // Check if two JSON-encoded values match with the BEGINS_WITH relation
-bool check_BEGINS_WITH(const rjson::value* v1, const rjson::value& v2,
-                       bool v1_from_query, bool v2_from_query) {
-    bool bad = false;
-    if (!v1 || !v1->IsObject() || v1->MemberCount() != 1) {
-        if (v1_from_query) {
-            throw api_error("ValidationException", "begins_with() encountered malformed argument");
-        } else {
-            bad = true;
-        }
-    } else if (v1->MemberBegin()->name != "S" && v1->MemberBegin()->name != "B") {
-        if (v1_from_query) {
-            throw api_error("ValidationException", format("begins_with supports only string or binary type, got: {}", *v1));
-        } else {
-            bad = true;
-        }
-    }
+static bool check_BEGINS_WITH(const rjson::value* v1, const rjson::value& v2) {
+    // BEGINS_WITH requires that its single operand (v2) be a string or
+    // binary - otherwise it's a validation error. However, problems with
+    // the stored attribute (v1) will just return false (no match).
    if (!v2.IsObject() || v2.MemberCount() != 1) {
-        if (v2_from_query) {
-            throw api_error("ValidationException", "begins_with() encountered malformed argument");
-        } else {
-            bad = true;
-        }
-    } else if (v2.MemberBegin()->name != "S" && v2.MemberBegin()->name != "B") {
-        if (v2_from_query) {
-            throw api_error("ValidationException", format("begins_with() supports only string or binary type, got: {}", v2));
-        } else {
-            bad = true;
-        }
+        throw api_error("ValidationException", format("BEGINS_WITH operator encountered malformed AttributeValue: {}", v2));
    }
-    if (bad) {
+    auto it2 = v2.MemberBegin();
+    if (it2->name != "S" && it2->name != "B") {
+        throw api_error("ValidationException", format("BEGINS_WITH operator requires String or Binary in AttributeValue, got {}", it2->name));
+    }
+
+
+    if (!v1 || !v1->IsObject() || v1->MemberCount() != 1) {
        return false;
    }
    auto it1 = v1->MemberBegin();
-    auto it2 = v2.MemberBegin();
    if (it1->name != it2->name) {
        return false;
    }
    if (it2->name == "S") {
-        return rjson::to_string_view(it1->value).starts_with(rjson::to_string_view(it2->value));
+        std::string_view val1(it1->value.GetString(), it1->value.GetStringLength());
+        std::string_view val2(it2->value.GetString(), it2->value.GetStringLength());
+        return val1.substr(0, val2.size()) == val2;
    } else /* it2->name == "B" */ {
-        return base64_begins_with(rjson::to_string_view(it1->value), rjson::to_string_view(it2->value));
+        // TODO (optimization): Check the begins_with condition directly on
+        // the base64-encoded string, without making a decoded copy.
+        bytes val1 = base64_decode(it1->value);
+        bytes val2 = base64_decode(it2->value);
+        return val1.substr(0, val2.size()) == val2;
    }
 }

+static std::string_view to_string_view(const rjson::value& v) {
+    return std::string_view(v.GetString(), v.GetStringLength());
+}
+
 static bool is_set_of(const rjson::value& type1, const rjson::value& type2) {
    return (type2 == "S" && type1 == "SS") || (type2 == "N" && type1 == "NS") || (type2 == "B" && type1 == "BS");
 }

 // Check if two JSON-encoded values match with the CONTAINS relation
-bool check_CONTAINS(const rjson::value* v1, const rjson::value& v2) {
+static bool check_CONTAINS(const rjson::value* v1, const rjson::value& v2) {
    if (!v1) {
        return false;
    }
    const auto& kv1 = *v1->MemberBegin();
    const auto& kv2 = *v2.MemberBegin();
+    if (kv2.name != "S" && kv2.name != "N" &&  kv2.name != "B") {
+        throw api_error("ValidationException",
+                        format("CONTAINS operator requires a single AttributeValue of type String, Number, or Binary, "
+                               "got {} instead", kv2.name));
+    }
    if (kv1.name == "S" && kv2.name == "S") {
-        return rjson::to_string_view(kv1.value).find(rjson::to_string_view(kv2.value)) != std::string_view::npos;
+        return to_string_view(kv1.value).find(to_string_view(kv2.value)) != std::string_view::npos;
    } else if (kv1.name == "B" && kv2.name == "B") {
        return base64_decode(kv1.value).find(base64_decode(kv2.value)) != bytes::npos;
    } else if (is_set_of(kv1.name, kv2.name)) {
@@ -275,19 +306,6 @@ static bool check_IN(const rjson::value* val, const rjson::value& array) {
    return have_match;
 }

-// Another variant of check_IN, this one for ConditionExpression. It needs to
-// check whether the first element in the given vector is equal to any of the
-// others.
-static bool check_IN(const std::vector<rjson::value>& array) {
-    const rjson::value* first = &array[0];
-    for (unsigned i = 1; i < array.size(); i++) {
-        if (check_EQ(first, array[i])) {
-            return true;
-        }
-    }
-    return false;
-}
-
 static bool check_NULL(const rjson::value* val) {
    return val == nullptr;
 }
@@ -296,38 +314,24 @@ static bool check_NOT_NULL(const rjson::value* val) {
    return val != nullptr;
 }

-// Only types S, N or B (string, number or bytes) may be compared by the
-// various comparion operators - lt, le, gt, ge, and between.
-static bool check_comparable_type(const rjson::value& v) {
-    if (!v.IsObject() || v.MemberCount() != 1) {
-        return false;
-    }
-    const rjson::value& type = v.MemberBegin()->name;
-    return type == "S" || type == "N" || type == "B";
-}
-
 // Check if two JSON-encoded values match with cmp.
 template <typename Comparator>
-bool check_compare(const rjson::value* v1, const rjson::value& v2, const Comparator& cmp,
-                   bool v1_from_query, bool v2_from_query) {
-    bool bad = false;
-    if (!v1 || !check_comparable_type(*v1)) {
-        if (v1_from_query) {
-            throw api_error("ValidationException", format("{} allow only the types String, Number, or Binary", cmp.diagnostic));
-        }
-        bad = true;
+bool check_compare(const rjson::value* v1, const rjson::value& v2, const Comparator& cmp) {
+    if (!v2.IsObject() || v2.MemberCount() != 1) {
+        throw api_error("ValidationException",
+                        format("{} requires a single AttributeValue of type String, Number, or Binary",
+                               cmp.diagnostic));
    }
-    if (!check_comparable_type(v2)) {
-        if (v2_from_query) {
-            throw api_error("ValidationException", format("{} allow only the types String, Number, or Binary", cmp.diagnostic));
-        }
-        bad = true;
+    const auto& kv2 = *v2.MemberBegin();
+    if (kv2.name != "S" && kv2.name != "N" && kv2.name != "B") {
+        throw api_error("ValidationException",
+                        format("{} requires a single AttributeValue of type String, Number, or Binary",
+                               cmp.diagnostic));
    }
-    if (bad) {
+    if (!v1 || !v1->IsObject() || v1->MemberCount() != 1) {
        return false;
    }
    const auto& kv1 = *v1->MemberBegin();
-    const auto& kv2 = *v2.MemberBegin();
    if (kv1.name != kv2.name) {
        return false;
    }
@@ -341,103 +345,84 @@ bool check_compare(const rjson::value* v1, const rjson::value& v2, const Compara
    if (kv1.name == "B") {
        return cmp(base64_decode(kv1.value), base64_decode(kv2.value));
    }
-    // cannot reach here, as check_comparable_type() verifies the type is one
-    // of the above options.
+    clogger.error("check_compare panic: LHS type equals RHS type, but one is in {N,S,B} while the other isn't");
    return false;
 }

 struct cmp_lt {
    template <typename T> bool operator()(const T& lhs, const T& rhs) const { return lhs < rhs; }
-    // We cannot use the normal comparison operators like "<" on the bytes
-    // type, because they treat individual bytes as signed but we need to
-    // compare them as *unsigned*. So we need a specialization for bytes.
-    bool operator()(const bytes& lhs, const bytes& rhs) const { return compare_unsigned(lhs, rhs) < 0; }
    static constexpr const char* diagnostic = "LT operator";
 };

 struct cmp_le {
-    template <typename T> bool operator()(const T& lhs, const T& rhs) const { return lhs <= rhs; }
-    bool operator()(const bytes& lhs, const bytes& rhs) const { return compare_unsigned(lhs, rhs) <= 0; }
+    // bytes only has <, so we cannot use <=.
+    template <typename T> bool operator()(const T& lhs, const T& rhs) const { return lhs < rhs || lhs == rhs; }
    static constexpr const char* diagnostic = "LE operator";
 };

 struct cmp_ge {
-    template <typename T> bool operator()(const T& lhs, const T& rhs) const { return lhs >= rhs; }
-    bool operator()(const bytes& lhs, const bytes& rhs) const { return compare_unsigned(lhs, rhs) >= 0; }
+    // bytes only has <, so we cannot use >=.
+    template <typename T> bool operator()(const T& lhs, const T& rhs) const { return rhs < lhs || lhs == rhs; }
    static constexpr const char* diagnostic = "GE operator";
 };

 struct cmp_gt {
-    template <typename T> bool operator()(const T& lhs, const T& rhs) const { return lhs > rhs; }
-    bool operator()(const bytes& lhs, const bytes& rhs) const { return compare_unsigned(lhs, rhs) > 0; }
+    // bytes only has <, so we cannot use >.
+    template <typename T> bool operator()(const T& lhs, const T& rhs) const { return rhs < lhs; }
    static constexpr const char* diagnostic = "GT operator";
 };

-// True if v is between lb and ub, inclusive.  Throws or returns false
-// (depending on bounds_from_query parameter) if lb > ub.
+// True if v is between lb and ub, inclusive.  Throws if lb > ub.
 template <typename T>
-static bool check_BETWEEN(const T& v, const T& lb, const T& ub, bool bounds_from_query) {
-    if (cmp_lt()(ub, lb)) {
-        if (bounds_from_query) {
-            throw api_error("ValidationException",
-                format("BETWEEN operator requires lower_bound <= upper_bound, but {} > {}", lb, ub));
-        } else {
-            return false;
-        }
+bool check_BETWEEN(const T& v, const T& lb, const T& ub) {
+    if (ub < lb) {
+        throw api_error("ValidationException",
+                        format("BETWEEN operator requires lower_bound <= upper_bound, but {} > {}", lb, ub));
    }
    return cmp_ge()(v, lb) && cmp_le()(v, ub);
 }

-static bool check_BETWEEN(const rjson::value* v, const rjson::value& lb, const rjson::value& ub,
-                          bool v_from_query, bool lb_from_query, bool ub_from_query) {
-    if ((v && v_from_query && !check_comparable_type(*v)) ||
-        (lb_from_query && !check_comparable_type(lb)) ||
-        (ub_from_query && !check_comparable_type(ub))) {
-        throw api_error("ValidationException", "between allow only the types String, Number, or Binary");
-
-    }
-    if (!v || !v->IsObject() || v->MemberCount() != 1 ||
-        !lb.IsObject() || lb.MemberCount() != 1 ||
-        !ub.IsObject() || ub.MemberCount() != 1) {
+static bool check_BETWEEN(const rjson::value* v, const rjson::value& lb, const rjson::value& ub) {
+    if (!v) {
        return false;
    }
+    if (!v->IsObject() || v->MemberCount() != 1) {
+        throw api_error("ValidationException", format("BETWEEN operator encountered malformed AttributeValue: {}", *v));
+    }
+    if (!lb.IsObject() || lb.MemberCount() != 1) {
+        throw api_error("ValidationException", format("BETWEEN operator encountered malformed AttributeValue: {}", lb));
+    }
+    if (!ub.IsObject() || ub.MemberCount() != 1) {
+        throw api_error("ValidationException", format("BETWEEN operator encountered malformed AttributeValue: {}", ub));
+    }

    const auto& kv_v = *v->MemberBegin();
    const auto& kv_lb = *lb.MemberBegin();
    const auto& kv_ub = *ub.MemberBegin();
-    bool bounds_from_query = lb_from_query && ub_from_query;
    if (kv_lb.name != kv_ub.name) {
-        if (bounds_from_query) {
-           throw api_error("ValidationException",
+        throw api_error(
+                "ValidationException",
                format("BETWEEN operator requires the same type for lower and upper bound; instead got {} and {}",
                       kv_lb.name, kv_ub.name));
-        } else {
-            return false;
-        }
    }
    if (kv_v.name != kv_lb.name) { // Cannot compare different types, so v is NOT between lb and ub.
        return false;
    }
    if (kv_v.name == "N") {
        const char* diag = "BETWEEN operator";
-        return check_BETWEEN(unwrap_number(*v, diag), unwrap_number(lb, diag), unwrap_number(ub, diag), bounds_from_query);
+        return check_BETWEEN(unwrap_number(*v, diag), unwrap_number(lb, diag), unwrap_number(ub, diag));
    }
    if (kv_v.name == "S") {
        return check_BETWEEN(std::string_view(kv_v.value.GetString(), kv_v.value.GetStringLength()),
                             std::string_view(kv_lb.value.GetString(), kv_lb.value.GetStringLength()),
-                             std::string_view(kv_ub.value.GetString(), kv_ub.value.GetStringLength()),
-                             bounds_from_query);
+                             std::string_view(kv_ub.value.GetString(), kv_ub.value.GetStringLength()));
    }
    if (kv_v.name == "B") {
-        return check_BETWEEN(base64_decode(kv_v.value), base64_decode(kv_lb.value), base64_decode(kv_ub.value), bounds_from_query);
+        return check_BETWEEN(base64_decode(kv_v.value), base64_decode(kv_lb.value), base64_decode(kv_ub.value));
    }
-    if (v_from_query) {
-        throw api_error("ValidationException",
-            format("BETWEEN operator requires AttributeValueList elements to be of type String, Number, or Binary; instead got {}",
+    throw api_error("ValidationException",
+        format("BETWEEN operator requires AttributeValueList elements to be of type String, Number, or Binary; instead got {}",
               kv_lb.name));
-    } else {
-        return false;
-    }
 }

 // Verify one Expect condition on one attribute (whose content is "got")
@@ -484,19 +469,19 @@ static bool verify_expected_one(const rjson::value& condition, const rjson::valu
            return check_NE(got, (*attribute_value_list)[0]);
        case comparison_operator_type::LT:
            verify_operand_count(attribute_value_list, exact_size(1), *comparison_operator);
-            return check_compare(got, (*attribute_value_list)[0], cmp_lt{}, false, true);
+            return check_compare(got, (*attribute_value_list)[0], cmp_lt{});
        case comparison_operator_type::LE:
            verify_operand_count(attribute_value_list, exact_size(1), *comparison_operator);
-            return check_compare(got, (*attribute_value_list)[0], cmp_le{}, false, true);
+            return check_compare(got, (*attribute_value_list)[0], cmp_le{});
        case comparison_operator_type::GT:
            verify_operand_count(attribute_value_list, exact_size(1), *comparison_operator);
-            return check_compare(got, (*attribute_value_list)[0], cmp_gt{}, false, true);
+            return check_compare(got, (*attribute_value_list)[0], cmp_gt{});
        case comparison_operator_type::GE:
            verify_operand_count(attribute_value_list, exact_size(1), *comparison_operator);
-            return check_compare(got, (*attribute_value_list)[0], cmp_ge{}, false, true);
+            return check_compare(got, (*attribute_value_list)[0], cmp_ge{});
        case comparison_operator_type::BEGINS_WITH:
            verify_operand_count(attribute_value_list, exact_size(1), *comparison_operator);
-            return check_BEGINS_WITH(got, (*attribute_value_list)[0], false, true);
+            return check_BEGINS_WITH(got, (*attribute_value_list)[0]);
        case comparison_operator_type::IN:
            verify_operand_count(attribute_value_list, nonempty(), *comparison_operator);
            return check_IN(got, *attribute_value_list);
@@ -508,196 +493,72 @@ static bool verify_expected_one(const rjson::value& condition, const rjson::valu
            return check_NOT_NULL(got);
        case comparison_operator_type::BETWEEN:
            verify_operand_count(attribute_value_list, exact_size(2), *comparison_operator);
-            return check_BETWEEN(got, (*attribute_value_list)[0], (*attribute_value_list)[1],
-                                 false, true, true);
+            return check_BETWEEN(got, (*attribute_value_list)[0], (*attribute_value_list)[1]);
        case comparison_operator_type::CONTAINS:
-            {
-                verify_operand_count(attribute_value_list, exact_size(1), *comparison_operator);
-                // Expected's "CONTAINS" has this artificial limitation.
-                // ConditionExpression's "contains()" does not...
-                const rjson::value& arg = (*attribute_value_list)[0];
-                const auto& argtype = (*arg.MemberBegin()).name;
-                if (argtype != "S" && argtype != "N" && argtype != "B") {
-                    throw api_error("ValidationException",
-                            format("CONTAINS operator requires a single AttributeValue of type String, Number, or Binary, "
-                                    "got {} instead", argtype));
-                }
-                return check_CONTAINS(got, arg);
-            }
+            verify_operand_count(attribute_value_list, exact_size(1), *comparison_operator);
+            return check_CONTAINS(got, (*attribute_value_list)[0]);
        case comparison_operator_type::NOT_CONTAINS:
-            {
-                verify_operand_count(attribute_value_list, exact_size(1), *comparison_operator);
-                // Expected's "NOT_CONTAINS" has this artificial limitation.
-                // ConditionExpression's "contains()" does not...
-                const rjson::value& arg = (*attribute_value_list)[0];
-                const auto& argtype = (*arg.MemberBegin()).name;
-                if (argtype != "S" && argtype != "N" && argtype != "B") {
-                    throw api_error("ValidationException",
-                            format("CONTAINS operator requires a single AttributeValue of type String, Number, or Binary, "
-                                    "got {} instead", argtype));
-                }
-                return check_NOT_CONTAINS(got, arg);
-            }
+            verify_operand_count(attribute_value_list, exact_size(1), *comparison_operator);
+            return check_NOT_CONTAINS(got, (*attribute_value_list)[0]);
        }
        throw std::logic_error(format("Internal error: corrupted operator enum: {}", int(op)));
    }
 }

-conditional_operator_type get_conditional_operator(const rjson::value& req) {
-    const rjson::value* conditional_operator = rjson::find(req, "ConditionalOperator");
-    if (!conditional_operator) {
-        return conditional_operator_type::MISSING;
-    }
-    if (!conditional_operator->IsString()) {
-        throw api_error("ValidationException", "'ConditionalOperator' parameter, if given, must be a string");
-    }
-    auto s = rjson::to_string_view(*conditional_operator);
-    if (s == "AND") {
-        return conditional_operator_type::AND;
-    } else if (s == "OR") {
-        return conditional_operator_type::OR;
-    } else {
-        throw api_error("ValidationException",
-                format("'ConditionalOperator' parameter must be AND, OR or missing. Found {}.", s));
-    }
-}
-
-// Check if the existing values of the item (previous_item) match the
+// Verify that the existing values of the item (previous_item) match the
 // conditions given by the Expected and ConditionalOperator parameters
 // (if they exist) in the request (an UpdateItem, PutItem or DeleteItem).
-// This function can throw an ValidationException API error if there
+// This function will throw a ConditionalCheckFailedException API error
+// if the values do not match the condition, or ValidationException if there
 // are errors in the format of the condition itself.
-bool verify_expected(const rjson::value& req, const rjson::value* previous_item) {
+void verify_expected(const rjson::value& req, const std::unique_ptr<rjson::value>& previous_item) {
    const rjson::value* expected = rjson::find(req, "Expected");
-    auto conditional_operator = get_conditional_operator(req);
-    if (conditional_operator != conditional_operator_type::MISSING &&
-        (!expected || (expected->IsObject() && expected->GetObject().ObjectEmpty()))) {
-            throw api_error("ValidationException", "'ConditionalOperator' parameter cannot be specified for missing or empty Expression");
-    }
    if (!expected) {
-        return true;
+        return;
    }
    if (!expected->IsObject()) {
        throw api_error("ValidationException", "'Expected' parameter, if given, must be an object");
    }
-    bool require_all = conditional_operator != conditional_operator_type::OR;
-    return verify_condition(*expected, require_all, previous_item);
-}
+    // ConditionalOperator can be "AND" for requiring all conditions, or
+    // "OR" for requiring one condition, and defaults to "AND" if missing.
+    const rjson::value* conditional_operator = rjson::find(req, "ConditionalOperator");
+    bool require_all = true;
+    if (conditional_operator) {
+        if (!conditional_operator->IsString()) {
+            throw api_error("ValidationException", "'ConditionalOperator' parameter, if given, must be a string");
+        }
+        std::string_view s(conditional_operator->GetString(), conditional_operator->GetStringLength());
+        if (s == "AND") {
+            // require_all is already true
+        } else if (s == "OR") {
+            require_all = false;
+        } else {
+            throw api_error("ValidationException", "'ConditionalOperator' parameter must be AND, OR or missing");
+        }
+        if (expected->GetObject().ObjectEmpty()) {
+            throw api_error("ValidationException", "'ConditionalOperator' parameter cannot be specified for empty Expression");
+        }
+    }

-bool verify_condition(const rjson::value& condition, bool require_all, const rjson::value* previous_item) {
-    for (auto it = condition.MemberBegin(); it != condition.MemberEnd(); ++it) {
+    for (auto it = expected->MemberBegin(); it != expected->MemberEnd(); ++it) {
        const rjson::value* got = nullptr;
-        if (previous_item) {
-            got = rjson::find(*previous_item, rjson::to_string_view(it->name));
+        if (previous_item && previous_item->IsObject() && previous_item->HasMember("Item")) {
+            got = rjson::find((*previous_item)["Item"], rjson::string_ref_type(it->name.GetString()));
        }
        bool success = verify_expected_one(it->value, got);
        if (success && !require_all) {
            // When !require_all, one success is enough!
-            return true;
+            return;
        } else if (!success && require_all) {
            // When require_all, one failure is enough!
-            return false;
+            throw api_error("ConditionalCheckFailedException", "Failed condition.");
        }
    }
    // If we got here and require_all, none of the checks failed, so succeed.
    // If we got here and !require_all, all of the checks failed, so fail.
-    return require_all;
-}
-
-static bool calculate_primitive_condition(const parsed::primitive_condition& cond,
-        const rjson::value* previous_item) {
-    std::vector<rjson::value> calculated_values;
-    calculated_values.reserve(cond._values.size());
-    for (const parsed::value& v : cond._values) {
-        calculated_values.push_back(calculate_value(v,
-                cond._op == parsed::primitive_condition::type::VALUE ?
-                        calculate_value_caller::ConditionExpressionAlone :
-                        calculate_value_caller::ConditionExpression,
-                previous_item));
-    }
-    switch (cond._op) {
-    case parsed::primitive_condition::type::BETWEEN:
-        if (calculated_values.size() != 3) {
-            // Shouldn't happen unless we have a bug in the parser
-            throw std::logic_error(format("Wrong number of values {} in BETWEEN primitive_condition", cond._values.size()));
-        }
-        return check_BETWEEN(&calculated_values[0], calculated_values[1], calculated_values[2],
-                             cond._values[0].is_constant(), cond._values[1].is_constant(), cond._values[2].is_constant());
-    case parsed::primitive_condition::type::IN:
-        return check_IN(calculated_values);
-    case parsed::primitive_condition::type::VALUE:
-        if (calculated_values.size() != 1) {
-            // Shouldn't happen unless we have a bug in the parser
-            throw std::logic_error(format("Unexpected values in primitive_condition", cond._values.size()));
-        }
-        // Unwrap the boolean wrapped as the value (if it is a boolean)
-        if (calculated_values[0].IsObject() && calculated_values[0].MemberCount() == 1) {
-            auto it = calculated_values[0].MemberBegin();
-            if (it->name == "BOOL" && it->value.IsBool()) {
-                return it->value.GetBool();
-            }
-        }
-        throw api_error("ValidationException",
-                format("ConditionExpression: condition results in a non-boolean value: {}",
-                        calculated_values[0]));
-    default:
-        // All the rest of the operators have exactly two parameters (and unless
-        // we have a bug in the parser, that's what we have in the parsed object:
-        if (calculated_values.size() != 2) {
-            throw std::logic_error(format("Wrong number of values {} in primitive_condition object", cond._values.size()));
-        }
-    }
-    switch (cond._op) {
-    case parsed::primitive_condition::type::EQ:
-        return check_EQ(&calculated_values[0], calculated_values[1]);
-    case parsed::primitive_condition::type::NE:
-        return check_NE(&calculated_values[0], calculated_values[1]);
-    case parsed::primitive_condition::type::GT:
-        return check_compare(&calculated_values[0], calculated_values[1], cmp_gt{},
-            cond._values[0].is_constant(), cond._values[1].is_constant());
-    case parsed::primitive_condition::type::GE:
-        return check_compare(&calculated_values[0], calculated_values[1], cmp_ge{},
-            cond._values[0].is_constant(), cond._values[1].is_constant());
-    case parsed::primitive_condition::type::LT:
-        return check_compare(&calculated_values[0], calculated_values[1], cmp_lt{},
-            cond._values[0].is_constant(), cond._values[1].is_constant());
-    case parsed::primitive_condition::type::LE:
-        return check_compare(&calculated_values[0], calculated_values[1], cmp_le{},
-            cond._values[0].is_constant(), cond._values[1].is_constant());
-    default:
-        // Shouldn't happen unless we have a bug in the parser
-        throw std::logic_error(format("Unknown type {} in primitive_condition object", (int)(cond._op)));
+    if (!require_all) {
+        throw api_error("ConditionalCheckFailedException", "None of ORed Expect conditions were successful.");
    }
 }

-// Check if the existing values of the item (previous_item) match the
-// conditions given by the given parsed ConditionExpression.
-bool verify_condition_expression(
-        const parsed::condition_expression& condition_expression,
-        const rjson::value* previous_item) {
-    if (condition_expression.empty()) {
-        return true;
-    }
-    bool ret = std::visit(overloaded_functor {
-        [&] (const parsed::primitive_condition& cond) -> bool {
-            return calculate_primitive_condition(cond, previous_item);
-        },
-        [&] (const parsed::condition_expression::condition_list& list) -> bool {
-            auto verify_condition = [&] (const parsed::condition_expression& e) {
-                return verify_condition_expression(e, previous_item);
-            };
-            switch (list.op) {
-            case '&':
-                return boost::algorithm::all_of(list.conditions, verify_condition);
-            case '|':
-                return boost::algorithm::any_of(list.conditions, verify_condition);
-            default:
-                // Shouldn't happen unless we have a bug in the parser
-                throw std::logic_error("bad operator in condition_list");
-            }
-        }
-    }, condition_expression._expression);
-    return condition_expression._negated ? !ret : ret;
-}
-
 }
--- a/alternator/conditions.hh
+++ b/alternator/conditions.hh
@@ -33,7 +33,6 @@

 #include "cql3/restrictions/statement_restrictions.hh"
 #include "serialization.hh"
-#include "expressions_types.hh"

 namespace alternator {

@@ -43,19 +42,8 @@ enum class comparison_operator_type {

 comparison_operator_type get_comparison_operator(const rjson::value& comparison_operator);

-enum class conditional_operator_type {
-    AND, OR, MISSING
-};
-conditional_operator_type get_conditional_operator(const rjson::value& req);
+::shared_ptr<cql3::restrictions::statement_restrictions> get_filtering_restrictions(schema_ptr schema, const column_definition& attrs_col, const rjson::value& query_filter);

-bool verify_expected(const rjson::value& req, const rjson::value* previous_item);
-bool verify_condition(const rjson::value& condition, bool require_all, const rjson::value* previous_item);
-
-bool check_CONTAINS(const rjson::value* v1, const rjson::value& v2);
-bool check_BEGINS_WITH(const rjson::value* v1, const rjson::value& v2, bool v1_from_query, bool v2_from_query);
-
-bool verify_condition_expression(
-        const parsed::condition_expression& condition_expression,
-        const rjson::value* previous_item);
+void verify_expected(const rjson::value& req, const std::unique_ptr<rjson::value>& previous_item);

 }
--- a/alternator/executor.cc
+++ b/alternator/executor.cc
--- a/alternator/executor.hh
+++ b/alternator/executor.hh
@@ -25,57 +25,45 @@
 #include <seastar/http/httpd.hh>
 #include "seastarx.hh"
 #include <seastar/json/json_elements.hh>
-#include <seastar/core/sharded.hh>

 #include "service/storage_proxy.hh"
 #include "service/migration_manager.hh"
 #include "service/client_state.hh"

-#include "alternator/error.hh"
 #include "stats.hh"
-#include "rjson.hh"

 namespace alternator {

-class executor : public peering_sharded_service<executor> {
+class executor {
    service::storage_proxy& _proxy;
    service::migration_manager& _mm;
-    // An smp_service_group to be used for limiting the concurrency when
-    // forwarding Alternator request between shards - if necessary for LWT.
-    smp_service_group _ssg;

 public:
    using client_state = service::client_state;
-    using request_return_type = std::variant<json::json_return_type, api_error>;
    stats _stats;
    static constexpr auto ATTRS_COLUMN_NAME = ":attrs";
-    static constexpr auto KEYSPACE_NAME_PREFIX = "alternator_";
-    static constexpr std::string_view INTERNAL_TABLE_PREFIX = ".scylla.alternator.";
+    static constexpr auto KEYSPACE_NAME = "alternator";

-    executor(service::storage_proxy& proxy, service::migration_manager& mm, smp_service_group ssg)
-        : _proxy(proxy), _mm(mm), _ssg(ssg) {}
+    executor(service::storage_proxy& proxy, service::migration_manager& mm) : _proxy(proxy), _mm(mm) {}

-    future<request_return_type> create_table(client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value request);
-    future<request_return_type> describe_table(client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value request);
-    future<request_return_type> delete_table(client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value request);
-    future<request_return_type> put_item(client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value request);
-    future<request_return_type> get_item(client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value request);
-    future<request_return_type> delete_item(client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value request);
-    future<request_return_type> update_item(client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value request);
-    future<request_return_type> list_tables(client_state& client_state, service_permit permit, rjson::value request);
-    future<request_return_type> scan(client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value request);
-    future<request_return_type> describe_endpoints(client_state& client_state, service_permit permit, rjson::value request, std::string host_header);
-    future<request_return_type> batch_write_item(client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value request);
-    future<request_return_type> batch_get_item(client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value request);
-    future<request_return_type> query(client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value request);
-    future<request_return_type> tag_resource(client_state& client_state, service_permit permit, rjson::value request);
-    future<request_return_type> untag_resource(client_state& client_state, service_permit permit, rjson::value request);
-    future<request_return_type> list_tags_of_resource(client_state& client_state, service_permit permit, rjson::value request);
+    future<json::json_return_type> create_table(client_state& client_state, tracing::trace_state_ptr trace_state, std::string content);
+    future<json::json_return_type> describe_table(client_state& client_state, tracing::trace_state_ptr trace_state, std::string content);
+    future<json::json_return_type> delete_table(client_state& client_state, tracing::trace_state_ptr trace_state, std::string content);
+    future<json::json_return_type> put_item(client_state& client_state, tracing::trace_state_ptr trace_state, std::string content);
+    future<json::json_return_type> get_item(client_state& client_state, tracing::trace_state_ptr trace_state, std::string content);
+    future<json::json_return_type> delete_item(client_state& client_state, tracing::trace_state_ptr trace_state, std::string content);
+    future<json::json_return_type> update_item(client_state& client_state, tracing::trace_state_ptr trace_state, std::string content);
+    future<json::json_return_type> list_tables(client_state& client_state, std::string content);
+    future<json::json_return_type> scan(client_state& client_state, tracing::trace_state_ptr trace_state, std::string content);
+    future<json::json_return_type> describe_endpoints(client_state& client_state, std::string content, std::string host_header);
+    future<json::json_return_type> batch_write_item(client_state& client_state, tracing::trace_state_ptr trace_state, std::string content);
+    future<json::json_return_type> batch_get_item(client_state& client_state, tracing::trace_state_ptr trace_state, std::string content);
+    future<json::json_return_type> query(client_state& client_state, tracing::trace_state_ptr trace_state, std::string content);

    future<> start();
    future<> stop() { return make_ready_future<>(); }

-    future<> create_keyspace(std::string_view keyspace_name);
+    future<> maybe_create_keyspace();

    static tracing::trace_state_ptr maybe_trace_query(client_state& client_state, sstring_view op, sstring_view query);
 };
--- a/alternator/expressions.cc
+++ b/alternator/expressions.cc
@@ -20,24 +20,15 @@
 */

 #include "expressions.hh"
-#include "serialization.hh"
-#include "base64.hh"
-#include "conditions.hh"
 #include "alternator/expressionsLexer.hpp"
 #include "alternator/expressionsParser.hpp"
-#include "utils/overloaded_functor.hh"
-#include "error.hh"

-#include "seastarx.hh"
+#include <seastarx.hh>

 #include <seastar/core/print.hh>
 #include <seastar/util/log.hh>

-#include <boost/algorithm/cxx11/any_of.hpp>
-#include <boost/algorithm/cxx11/all_of.hpp>
-
 #include <functional>
-#include <unordered_map>

 namespace alternator {

@@ -74,19 +65,13 @@ parse_projection_expression(std::string query) {
    }
 }

-parsed::condition_expression
-parse_condition_expression(std::string query) {
-    try {
-        return do_with_parser(query,  std::mem_fn(&expressionsParser::condition_expression));
-    } catch (...) {
-        throw expressions_syntax_error(format("Failed parsing ConditionExpression '{}': {}", query, std::current_exception()));
-    }
-}
+template<class... Ts> struct overloaded : Ts... { using Ts::operator()...; };
+template<class... Ts> overloaded(Ts...) -> overloaded<Ts...>;

 namespace parsed {

 void update_expression::add(update_expression::action a) {
-    std::visit(overloaded_functor {
+    std::visit(overloaded {
        [&] (action::set&)    { seen_set = true; },
        [&] (action::remove&) { seen_remove = true; },
        [&] (action::add&)    { seen_add = true; },
@@ -109,576 +94,5 @@ void update_expression::append(update_expression other) {
    seen_del |= other.seen_del;
 }

-void condition_expression::append(condition_expression&& a, char op) {
-    std::visit(overloaded_functor {
-        [&] (condition_list& x) {
-            // If 'a' has a single condition, we could, instead of inserting
-            // it insert its single condition (possibly negated if a._negated)
-            // But considering it we don't evaluate these expressions many
-            // times, this optimization is not worth extra code complexity.
-            if (!x.conditions.empty() && x.op != op) {
-                // Shouldn't happen unless we have a bug in the parser
-                throw std::logic_error("condition_expression::append called with mixed operators");
-            }
-            x.conditions.push_back(std::move(a));
-            x.op = op;
-        },
-        [&] (primitive_condition& x) {
-            // Shouldn't happen unless we have a bug in the parser
-            throw std::logic_error("condition_expression::append called on primitive_condition");
-        }
-    }, _expression);
-}
-
 } // namespace parsed
-
-// The following resolve_*() functions resolve references in parsed
-// expressions of different types. Resolving a parsed expression means
-// replacing:
-//  1. In parsed::path objects, replace references like "#name" with the
-//     attribute name from ExpressionAttributeNames,
-//  2. In parsed::constant objects, replace references like ":value" with
-//     the value from ExpressionAttributeValues.
-// These function also track which name and value references were used, to
-// allow complaining if some remain unused.
-// Note that the resolve_*() functions modify the expressions in-place,
-// so if we ever intend to cache parsed expression, we need to pass a copy
-// into this function.
-//
-// Doing the "resolving" stage before the evaluation stage has two benefits.
-// First, it allows us to be compatible with DynamoDB in catching unused
-// names and values (see issue #6572). Second, in the FilterExpression case,
-// we need to resolve the expression just once but then use it many times
-// (once for each item to be filtered).
-
-static void resolve_path(parsed::path& p,
-        const rjson::value* expression_attribute_names,
-        std::unordered_set<std::string>& used_attribute_names) {
-    const std::string& column_name = p.root();
-    if (column_name.size() > 0 && column_name.front() == '#') {
-        if (!expression_attribute_names) {
-            throw api_error("ValidationException",
-                    format("ExpressionAttributeNames missing, entry '{}' required by expression", column_name));
-        }
-        const rjson::value* value = rjson::find(*expression_attribute_names, column_name);
-        if (!value || !value->IsString()) {
-            throw api_error("ValidationException",
-                    format("ExpressionAttributeNames missing entry '{}' required by expression", column_name));
-        }
-        used_attribute_names.emplace(column_name);
-        p.set_root(std::string(rjson::to_string_view(*value)));
-    }
-}
-
-static void resolve_constant(parsed::constant& c,
-        const rjson::value* expression_attribute_values,
-        std::unordered_set<std::string>& used_attribute_values) {
-    std::visit(overloaded_functor {
-        [&] (const std::string& valref) {
-            if (!expression_attribute_values) {
-                throw api_error("ValidationException",
-                        format("ExpressionAttributeValues missing, entry '{}' required by expression", valref));
-            }
-            const rjson::value* value = rjson::find(*expression_attribute_values, valref);
-            if (!value) {
-                throw api_error("ValidationException",
-                        format("ExpressionAttributeValues missing entry '{}' required by expression", valref));
-            }
-            if (value->IsNull()) {
-                throw api_error("ValidationException",
-                        format("ExpressionAttributeValues null value for entry '{}' required by expression", valref));
-            }
-            validate_value(*value, "ExpressionAttributeValues");
-            used_attribute_values.emplace(valref);
-            c.set(*value);
-        },
-        [&] (const parsed::constant::literal& lit) {
-            // Nothing to do, already resolved
-        }
-    }, c._value);
-
-}
-
-void resolve_value(parsed::value& rhs,
-        const rjson::value* expression_attribute_names,
-        const rjson::value* expression_attribute_values,
-        std::unordered_set<std::string>& used_attribute_names,
-        std::unordered_set<std::string>& used_attribute_values) {
-    std::visit(overloaded_functor {
-        [&] (parsed::constant& c) {
-            resolve_constant(c, expression_attribute_values, used_attribute_values);
-        },
-        [&] (parsed::value::function_call& f) {
-            for (parsed::value& value : f._parameters) {
-                resolve_value(value, expression_attribute_names, expression_attribute_values,
-                        used_attribute_names, used_attribute_values);
-            }
-        },
-        [&] (parsed::path& p) {
-            resolve_path(p, expression_attribute_names, used_attribute_names);
-        }
-    }, rhs._value);
-}
-
-void resolve_set_rhs(parsed::set_rhs& rhs,
-        const rjson::value* expression_attribute_names,
-        const rjson::value* expression_attribute_values,
-        std::unordered_set<std::string>& used_attribute_names,
-        std::unordered_set<std::string>& used_attribute_values) {
-    resolve_value(rhs._v1, expression_attribute_names, expression_attribute_values,
-            used_attribute_names, used_attribute_values);
-    if (rhs._op != 'v') {
-        resolve_value(rhs._v2, expression_attribute_names, expression_attribute_values,
-                used_attribute_names, used_attribute_values);
-    }
-}
-
-void resolve_update_expression(parsed::update_expression& ue,
-        const rjson::value* expression_attribute_names,
-        const rjson::value* expression_attribute_values,
-        std::unordered_set<std::string>& used_attribute_names,
-        std::unordered_set<std::string>& used_attribute_values) {
-    for (parsed::update_expression::action& action : ue.actions()) {
-        resolve_path(action._path, expression_attribute_names, used_attribute_names);
-        std::visit(overloaded_functor {
-            [&] (parsed::update_expression::action::set& a) {
-                resolve_set_rhs(a._rhs, expression_attribute_names, expression_attribute_values,
-                        used_attribute_names, used_attribute_values);
-            },
-            [&] (parsed::update_expression::action::remove& a) {
-                // nothing to do
-            },
-            [&] (parsed::update_expression::action::add& a) {
-                resolve_constant(a._valref, expression_attribute_values, used_attribute_values);
-            },
-            [&] (parsed::update_expression::action::del& a) {
-                resolve_constant(a._valref, expression_attribute_values, used_attribute_values);
-            }
-        }, action._action);
-    }
-}
-
-static void resolve_primitive_condition(parsed::primitive_condition& pc,
-        const rjson::value* expression_attribute_names,
-        const rjson::value* expression_attribute_values,
-        std::unordered_set<std::string>& used_attribute_names,
-        std::unordered_set<std::string>& used_attribute_values) {
-    for (parsed::value& value : pc._values) {
-        resolve_value(value,
-                expression_attribute_names, expression_attribute_values,
-                used_attribute_names, used_attribute_values);
-    }
-}
-
-void resolve_condition_expression(parsed::condition_expression& ce,
-        const rjson::value* expression_attribute_names,
-        const rjson::value* expression_attribute_values,
-        std::unordered_set<std::string>& used_attribute_names,
-        std::unordered_set<std::string>& used_attribute_values) {
-    std::visit(overloaded_functor {
-        [&] (parsed::primitive_condition& cond) {
-            resolve_primitive_condition(cond,
-                    expression_attribute_names, expression_attribute_values,
-                    used_attribute_names, used_attribute_values);
-        },
-        [&] (parsed::condition_expression::condition_list& list) {
-            for (parsed::condition_expression& cond : list.conditions) {
-                resolve_condition_expression(cond,
-                        expression_attribute_names, expression_attribute_values,
-                            used_attribute_names, used_attribute_values);
-
-            }
-        }
-    }, ce._expression);
-}
-
-void resolve_projection_expression(std::vector<parsed::path>& pe,
-        const rjson::value* expression_attribute_names,
-        std::unordered_set<std::string>& used_attribute_names) {
-    for (parsed::path& p : pe) {
-        resolve_path(p, expression_attribute_names, used_attribute_names);
-    }
-}
-
-// condition_expression_on() checks whether a condition_expression places any
-// condition on the given attribute. It can be useful, for example, for
-// checking whether the condition tries to restrict a key column.
-
-static bool value_on(const parsed::value& v, std::string_view attribute) {
-    return std::visit(overloaded_functor {
-        [&] (const parsed::constant& c) {
-            return false;
-        },
-        [&] (const parsed::value::function_call& f) {
-            for (const parsed::value& value : f._parameters) {
-                if (value_on(value, attribute)) {
-                    return true;
-                }
-            }
-            return false;
-        },
-        [&] (const parsed::path& p) {
-            return p.root() == attribute;
-        }
-    }, v._value);
-}
-
-static bool primitive_condition_on(const parsed::primitive_condition& pc, std::string_view attribute) {
-    for (const parsed::value& value : pc._values) {
-        if (value_on(value, attribute)) {
-            return true;
-        }
-    }
-    return false;
-}
-
-bool condition_expression_on(const parsed::condition_expression& ce, std::string_view attribute) {
-    return std::visit(overloaded_functor {
-        [&] (const parsed::primitive_condition& cond) {
-            return primitive_condition_on(cond, attribute);
-        },
-        [&] (const parsed::condition_expression::condition_list& list) {
-            for (const parsed::condition_expression& cond : list.conditions) {
-                if (condition_expression_on(cond, attribute)) {
-                    return true;
-                }
-            }
-            return false;
-        }
-    }, ce._expression);
-}
-
-// for_condition_expression_on() runs a given function over all the attributes
-// mentioned in the expression. If the same attribute is mentioned more than
-// once, the function will be called more than once for the same attribute.
-
-static void for_value_on(const parsed::value& v, const noncopyable_function<void(std::string_view)>& func) {
-    std::visit(overloaded_functor {
-        [&] (const parsed::constant& c) { },
-        [&] (const parsed::value::function_call& f) {
-            for (const parsed::value& value : f._parameters) {
-                for_value_on(value, func);
-            }
-        },
-        [&] (const parsed::path& p) {
-            func(p.root());
-        }
-    }, v._value);
-}
-
-void for_condition_expression_on(const parsed::condition_expression& ce, const noncopyable_function<void(std::string_view)>& func) {
-    std::visit(overloaded_functor {
-        [&] (const parsed::primitive_condition& cond) {
-            for (const parsed::value& value : cond._values) {
-                for_value_on(value, func);
-            }
-        },
-        [&] (const parsed::condition_expression::condition_list& list) {
-            for (const parsed::condition_expression& cond : list.conditions) {
-                for_condition_expression_on(cond, func);
-            }
-        }
-    }, ce._expression);
-}
-
-// The following calculate_value() functions calculate, or evaluate, a parsed
-// expression. The parsed expression is assumed to have been "resolved", with
-// the matching resolve_* function.
-
-// Take two JSON-encoded list values (remember that a list value is
-// {"L": [...the actual list]}) and return the concatenation, again as
-// a list value.
-static rjson::value list_concatenate(const rjson::value& v1, const rjson::value& v2) {
-    const rjson::value* list1 = unwrap_list(v1);
-    const rjson::value* list2 = unwrap_list(v2);
-    if (!list1 || !list2) {
-        throw api_error("ValidationException", "UpdateExpression: list_append() given a non-list");
-    }
-    rjson::value cat = rjson::copy(*list1);
-    for (const auto& a : list2->GetArray()) {
-        rjson::push_back(cat, rjson::copy(a));
-    }
-    rjson::value ret = rjson::empty_object();
-    rjson::set(ret, "L", std::move(cat));
-    return ret;
-}
-
-// calculate_size() is ConditionExpression's size() function, i.e., it takes
-// a JSON-encoded value and returns its "size" as defined differently for the
-// different types - also as a JSON-encoded number.
-// It return a JSON-encoded "null" value if this value's type has no size
-// defined. Comparisons against this non-numeric value will later fail.
-static rjson::value calculate_size(const rjson::value& v) {
-    // NOTE: If v is improperly formatted for our JSON value encoding, it
-    // must come from the request itself, not from the database, so it makes
-    // sense to throw a ValidationException if we see such a problem.
-    if (!v.IsObject() || v.MemberCount() != 1) {
-        throw api_error("ValidationException", format("invalid object: {}", v));
-    }
-    auto it = v.MemberBegin();
-    int ret;
-    if (it->name == "S") {
-        if (!it->value.IsString()) {
-            throw api_error("ValidationException", format("invalid string: {}", v));
-        }
-        ret = it->value.GetStringLength();
-    } else if (it->name == "NS" || it->name == "SS" || it->name == "BS" || it->name == "L") {
-        if (!it->value.IsArray()) {
-            throw api_error("ValidationException", format("invalid set: {}", v));
-        }
-        ret = it->value.Size();
-    } else if (it->name == "M") {
-        if (!it->value.IsObject()) {
-            throw api_error("ValidationException", format("invalid map: {}", v));
-        }
-        ret = it->value.MemberCount();
-    } else if (it->name == "B") {
-        if (!it->value.IsString()) {
-            throw api_error("ValidationException", format("invalid byte string: {}", v));
-        }
-        ret = base64_decoded_len(rjson::to_string_view(it->value));
-    } else {
-        rjson::value json_ret = rjson::empty_object();
-        rjson::set(json_ret, "null", rjson::value(true));
-        return json_ret;
-    }
-    rjson::value json_ret = rjson::empty_object();
-    rjson::set(json_ret, "N", rjson::from_string(std::to_string(ret)));
-    return json_ret;
-}
-
-static const rjson::value& calculate_value(const parsed::constant& c) {
-    return std::visit(overloaded_functor {
-        [&] (const parsed::constant::literal& v) -> const rjson::value& {
-            return *v;
-        },
-        [&] (const std::string& valref) -> const rjson::value& {
-            // Shouldn't happen, we should have called resolve_value() earlier
-            // and replaced the value reference by the literal constant.
-            throw std::logic_error("calculate_value() called before resolve_value()");
-        }
-    }, c._value);
-}
-
-static rjson::value to_bool_json(bool b) {
-    rjson::value json_ret = rjson::empty_object();
-    rjson::set(json_ret, "BOOL", rjson::value(b));
-    return json_ret;
-}
-
-static bool known_type(std::string_view type) {
-    static thread_local const std::unordered_set<std::string_view> types = {
-            "N", "S", "B", "NS", "SS", "BS", "L", "M", "NULL", "BOOL"
-    };
-    return types.contains(type);
-}
-
-using function_handler_type = rjson::value(calculate_value_caller, const rjson::value*, const parsed::value::function_call&);
-static const
-std::unordered_map<std::string_view, function_handler_type*> function_handlers {
-    {"list_append", [] (calculate_value_caller caller, const rjson::value* previous_item, const parsed::value::function_call& f) {
-            if (caller != calculate_value_caller::UpdateExpression) {
-                throw api_error("ValidationException",
-                        format("{}: list_append() not allowed here", caller));
-            }
-            if (f._parameters.size() != 2) {
-                throw api_error("ValidationException",
-                        format("{}: list_append() accepts 2 parameters, got {}", caller, f._parameters.size()));
-            }
-            rjson::value v1 = calculate_value(f._parameters[0], caller, previous_item);
-            rjson::value v2 = calculate_value(f._parameters[1], caller, previous_item);
-            return list_concatenate(v1, v2);
-        }
-    },
-    {"if_not_exists", [] (calculate_value_caller caller, const rjson::value* previous_item, const parsed::value::function_call& f) {
-            if (caller != calculate_value_caller::UpdateExpression) {
-                throw api_error("ValidationException",
-                        format("{}: if_not_exists() not allowed here", caller));
-            }
-            if (f._parameters.size() != 2) {
-                throw api_error("ValidationException",
-                        format("{}: if_not_exists() accepts 2 parameters, got {}", caller, f._parameters.size()));
-            }
-            if (!std::holds_alternative<parsed::path>(f._parameters[0]._value)) {
-                throw api_error("ValidationException",
-                        format("{}: if_not_exists() must include path as its first argument", caller));
-            }
-            rjson::value v1 = calculate_value(f._parameters[0], caller, previous_item);
-            rjson::value v2 = calculate_value(f._parameters[1], caller, previous_item);
-            return v1.IsNull() ? std::move(v2) : std::move(v1);
-        }
-    },
-    {"size", [] (calculate_value_caller caller, const rjson::value* previous_item, const parsed::value::function_call& f) {
-            if (caller != calculate_value_caller::ConditionExpression) {
-                throw api_error("ValidationException",
-                        format("{}: size() not allowed here", caller));
-            }
-            if (f._parameters.size() != 1) {
-                throw api_error("ValidationException",
-                        format("{}: size() accepts 1 parameter, got {}", caller, f._parameters.size()));
-            }
-            rjson::value v = calculate_value(f._parameters[0], caller, previous_item);
-            return calculate_size(v);
-        }
-    },
-    {"attribute_exists", [] (calculate_value_caller caller, const rjson::value* previous_item, const parsed::value::function_call& f) {
-            if (caller != calculate_value_caller::ConditionExpressionAlone) {
-                throw api_error("ValidationException",
-                        format("{}: attribute_exists() not allowed here", caller));
-            }
-            if (f._parameters.size() != 1) {
-                throw api_error("ValidationException",
-                        format("{}: attribute_exists() accepts 1 parameter, got {}", caller, f._parameters.size()));
-            }
-            if (!std::holds_alternative<parsed::path>(f._parameters[0]._value)) {
-                throw api_error("ValidationException",
-                        format("{}: attribute_exists()'s parameter must be a path", caller));
-            }
-            rjson::value v = calculate_value(f._parameters[0], caller, previous_item);
-            return to_bool_json(!v.IsNull());
-        }
-    },
-    {"attribute_not_exists", [] (calculate_value_caller caller, const rjson::value* previous_item, const parsed::value::function_call& f) {
-            if (caller != calculate_value_caller::ConditionExpressionAlone) {
-                throw api_error("ValidationException",
-                        format("{}: attribute_not_exists() not allowed here", caller));
-            }
-            if (f._parameters.size() != 1) {
-                throw api_error("ValidationException",
-                        format("{}: attribute_not_exists() accepts 1 parameter, got {}", caller, f._parameters.size()));
-            }
-            if (!std::holds_alternative<parsed::path>(f._parameters[0]._value)) {
-                throw api_error("ValidationException",
-                        format("{}: attribute_not_exists()'s parameter must be a path", caller));
-            }
-            rjson::value v = calculate_value(f._parameters[0], caller, previous_item);
-            return to_bool_json(v.IsNull());
-        }
-    },
-    {"attribute_type", [] (calculate_value_caller caller, const rjson::value* previous_item, const parsed::value::function_call& f) {
-            if (caller != calculate_value_caller::ConditionExpressionAlone) {
-                throw api_error("ValidationException",
-                        format("{}: attribute_type() not allowed here", caller));
-            }
-            if (f._parameters.size() != 2) {
-                throw api_error("ValidationException",
-                        format("{}: attribute_type() accepts 2 parameters, got {}", caller, f._parameters.size()));
-            }
-            // There is no real reason for the following check (not
-            // allowing the type to come from a document attribute), but
-            // DynamoDB does this check, so we do too...
-            if (!f._parameters[1].is_constant()) {
-                throw api_error("ValidationException",
-                        format("{}: attribute_types()'s first parameter must be an expression attribute", caller));
-            }
-            rjson::value v0 = calculate_value(f._parameters[0], caller, previous_item);
-            rjson::value v1 = calculate_value(f._parameters[1], caller, previous_item);
-            if (v1.IsObject() && v1.MemberCount() == 1 && v1.MemberBegin()->name == "S") {
-                // If the type parameter is not one of the legal types
-                // we should generate an error, not a failed condition:
-                if (!known_type(rjson::to_string_view(v1.MemberBegin()->value))) {
-                    throw api_error("ValidationException",
-                            format("{}: attribute_types()'s second parameter, {}, is not a known type",
-                                    caller, v1.MemberBegin()->value));
-                }
-                if (v0.IsObject() && v0.MemberCount() == 1) {
-                    return to_bool_json(v1.MemberBegin()->value == v0.MemberBegin()->name);
-                } else {
-                    return to_bool_json(false);
-                }
-            } else {
-                throw api_error("ValidationException",
-                        format("{}: attribute_type() second parameter must refer to a string, got {}", caller, v1));
-            }
-        }
-    },
-    {"begins_with", [] (calculate_value_caller caller, const rjson::value* previous_item, const parsed::value::function_call& f) {
-            if (caller != calculate_value_caller::ConditionExpressionAlone) {
-                throw api_error("ValidationException",
-                        format("{}: begins_with() not allowed here", caller));
-            }
-            if (f._parameters.size() != 2) {
-                throw api_error("ValidationException",
-                        format("{}: begins_with() accepts 2 parameters, got {}", caller, f._parameters.size()));
-            }
-            rjson::value v1 = calculate_value(f._parameters[0], caller, previous_item);
-            rjson::value v2 = calculate_value(f._parameters[1], caller, previous_item);
-            return to_bool_json(check_BEGINS_WITH(v1.IsNull() ? nullptr : &v1,  v2,
-                                    f._parameters[0].is_constant(), f._parameters[1].is_constant()));
-        }
-    },
-    {"contains", [] (calculate_value_caller caller, const rjson::value* previous_item, const parsed::value::function_call& f) {
-            if (caller != calculate_value_caller::ConditionExpressionAlone) {
-                throw api_error("ValidationException",
-                        format("{}: contains() not allowed here", caller));
-            }
-            if (f._parameters.size() != 2) {
-                throw api_error("ValidationException",
-                        format("{}: contains() accepts 2 parameters, got {}", caller, f._parameters.size()));
-            }
-            rjson::value v1 = calculate_value(f._parameters[0], caller, previous_item);
-            rjson::value v2 = calculate_value(f._parameters[1], caller, previous_item);
-            return to_bool_json(check_CONTAINS(v1.IsNull() ? nullptr : &v1,  v2));
-        }
-    },
-};
-
-// Given a parsed::value, which can refer either to a constant value from
-// ExpressionAttributeValues, to the value of some attribute, or to a function
-// of other values, this function calculates the resulting value.
-// "caller" determines which expression - ConditionExpression or
-// UpdateExpression - is asking for this value. We need to know this because
-// DynamoDB allows a different choice of functions for different expressions.
-rjson::value calculate_value(const parsed::value& v,
-        calculate_value_caller caller,
-        const rjson::value* previous_item) {
-    return std::visit(overloaded_functor {
-        [&] (const parsed::constant& c) -> rjson::value {
-            return rjson::copy(calculate_value(c));
-        },
-        [&] (const parsed::value::function_call& f) -> rjson::value {
-            auto function_it = function_handlers.find(std::string_view(f._function_name));
-            if (function_it == function_handlers.end()) {
-                throw api_error("ValidationException",
-                        format("UpdateExpression: unknown function '{}' called.", f._function_name));
-            }
-            return function_it->second(caller, previous_item, f);
-        },
-        [&] (const parsed::path& p) -> rjson::value {
-            if (!previous_item) {
-                return rjson::null_value();
-            }
-            std::string update_path = p.root();
-            if (p.has_operators()) {
-                // FIXME: support this
-                throw api_error("ValidationException", "Reading attribute paths not yet implemented");
-            }
-            const rjson::value* previous_value = rjson::find(*previous_item, update_path);
-            return previous_value ? rjson::copy(*previous_value) : rjson::null_value();
-        }
-    }, v._value);
-}
-
-// Same as calculate_value() above, except takes a set_rhs, which may be
-// either a single value, or v1+v2 or v1-v2.
-rjson::value calculate_value(const parsed::set_rhs& rhs,
-        const rjson::value* previous_item) {
-    switch(rhs._op) {
-    case 'v':
-        return calculate_value(rhs._v1, calculate_value_caller::UpdateExpression, previous_item);
-    case '+': {
-        rjson::value v1 = calculate_value(rhs._v1, calculate_value_caller::UpdateExpression, previous_item);
-        rjson::value v2 = calculate_value(rhs._v2, calculate_value_caller::UpdateExpression, previous_item);
-        return number_add(v1, v2);
-    }
-    case '-': {
-        rjson::value v1 = calculate_value(rhs._v1, calculate_value_caller::UpdateExpression, previous_item);
-        rjson::value v2 = calculate_value(rhs._v2, calculate_value_caller::UpdateExpression, previous_item);
-        return number_subtract(v1, v2);
-    }
-    }
-    // Can't happen
-    return rjson::null_value();
-}
-
 } // namespace alternator
--- a/alternator/expressions.g
+++ b/alternator/expressions.g
@@ -145,12 +145,6 @@ REMOVE: R E M O V E;
 ADD: A D D;
 DELETE: D E L E T E;

-AND: A N D;
-OR: O R;
-NOT: N O T;
-BETWEEN: B E T W E E N;
-IN: I N;
-
 fragment ALPHA: 'A'..'Z' | 'a'..'z';
 fragment DIGIT: '0'..'9';
 fragment ALNUM: ALPHA | DIGIT | '_';
@@ -171,19 +165,19 @@ path returns [parsed::path p]:
      | '[' INTEGER ']'           { $p.add_index(std::stoi($INTEGER.text)); }
    )*;

-value returns [parsed::value v]:
-      VALREF       { $v.set_valref($VALREF.text); }
-    | path         { $v.set_path($path.p); }
-    | NAME         { $v.set_func_name($NAME.text); }
-     '(' x=value   { $v.add_func_parameter($x.v); }
-     (',' x=value  { $v.add_func_parameter($x.v); })*
+update_expression_set_value returns [parsed::value v]:
+      VALREF                             { $v.set_valref($VALREF.text); }
+    | path                               { $v.set_path($path.p); }
+    | NAME                               { $v.set_func_name($NAME.text); }
+     '(' x=update_expression_set_value   { $v.add_func_parameter($x.v); }
+     (',' x=update_expression_set_value  { $v.add_func_parameter($x.v); })*
     ')'
    ;

 update_expression_set_rhs returns [parsed::set_rhs rhs]:
-    v=value  { $rhs.set_value(std::move($v.v)); }
-    (   '+' v=value  { $rhs.set_plus(std::move($v.v)); }
-      | '-' v=value  { $rhs.set_minus(std::move($v.v)); }
+    v=update_expression_set_value  { $rhs.set_value(std::move($v.v)); }
+    (   '+' v=update_expression_set_value  { $rhs.set_plus(std::move($v.v)); }
+      | '-' v=update_expression_set_value  { $rhs.set_minus(std::move($v.v)); }
    )?
    ;

@@ -218,48 +212,3 @@ update_expression returns [parsed::update_expression e]:
 projection_expression returns [std::vector<parsed::path> v]:
    p=path      { $v.push_back(std::move($p.p)); }
    (',' p=path { $v.push_back(std::move($p.p)); } )* EOF;
-
-
-primitive_condition returns [parsed::primitive_condition c]:
-      v=value         { $c.add_value(std::move($v.v));
-                        $c.set_operator(parsed::primitive_condition::type::VALUE); }
-      (  (  '='       { $c.set_operator(parsed::primitive_condition::type::EQ); }
-          | '<' '>'   { $c.set_operator(parsed::primitive_condition::type::NE); }
-          | '<'       { $c.set_operator(parsed::primitive_condition::type::LT); }
-          | '<' '='   { $c.set_operator(parsed::primitive_condition::type::LE); }
-          | '>'       { $c.set_operator(parsed::primitive_condition::type::GT); }
-          | '>' '='   { $c.set_operator(parsed::primitive_condition::type::GE); }
-         )
-         v=value      { $c.add_value(std::move($v.v)); }
-       | BETWEEN      { $c.set_operator(parsed::primitive_condition::type::BETWEEN); }
-         v=value      { $c.add_value(std::move($v.v)); }
-         AND
-         v=value      { $c.add_value(std::move($v.v)); }
-       | IN '('       { $c.set_operator(parsed::primitive_condition::type::IN); }
-         v=value      { $c.add_value(std::move($v.v)); }
-         (',' v=value { $c.add_value(std::move($v.v)); })*
-         ')'
-      )?
-    ;
-
-// The following rules for parsing boolean expressions are verbose and
-// somewhat strange because of Antlr 3's limitations on recursive rules,
-// common rule prefixes, and (lack of) support for operator precedence.
-// These rules could have been written more clearly using a more powerful
-// parser generator - such as Yacc.
-boolean_expression returns [parsed::condition_expression e]:
-	  b=boolean_expression_1       { $e.append(std::move($b.e), '|'); }
-	  (OR b=boolean_expression_1   { $e.append(std::move($b.e), '|'); } )*
-	;
-boolean_expression_1 returns [parsed::condition_expression e]:
-	  b=boolean_expression_2       { $e.append(std::move($b.e), '&'); }
-	  (AND b=boolean_expression_2  { $e.append(std::move($b.e), '&'); } )*
-	;
-boolean_expression_2 returns [parsed::condition_expression e]:
-	  p=primitive_condition        { $e.set_primitive(std::move($p.c)); }
-	| NOT b=boolean_expression_2   { $e = std::move($b.e); $e.apply_not(); }
-	| '(' b=boolean_expression ')' { $e = std::move($b.e); }
-    ;
-
-condition_expression returns [parsed::condition_expression e]:
-    boolean_expression { e=std::move($boolean_expression.e); } EOF;
--- a/alternator/expressions.hh
+++ b/alternator/expressions.hh
@@ -24,13 +24,8 @@
 #include <string>
 #include <stdexcept>
 #include <vector>
-#include <unordered_set>
-#include <string_view>
-
-#include <seastar/util/noncopyable_function.hh>

 #include "expressions_types.hh"
-#include "rjson.hh"

 namespace alternator {

@@ -41,62 +36,6 @@ public:

 parsed::update_expression parse_update_expression(std::string query);
 std::vector<parsed::path> parse_projection_expression(std::string query);
-parsed::condition_expression parse_condition_expression(std::string query);
-
-void resolve_update_expression(parsed::update_expression& ue,
-        const rjson::value* expression_attribute_names,
-        const rjson::value* expression_attribute_values,
-        std::unordered_set<std::string>& used_attribute_names,
-        std::unordered_set<std::string>& used_attribute_values);
-void resolve_projection_expression(std::vector<parsed::path>& pe,
-        const rjson::value* expression_attribute_names,
-        std::unordered_set<std::string>& used_attribute_names);
-void resolve_condition_expression(parsed::condition_expression& ce,
-        const rjson::value* expression_attribute_names,
-        const rjson::value* expression_attribute_values,
-        std::unordered_set<std::string>& used_attribute_names,
-        std::unordered_set<std::string>& used_attribute_values);
-
-void validate_value(const rjson::value& v, const char* caller);
-
-bool condition_expression_on(const parsed::condition_expression& ce, std::string_view attribute);
-
-// for_condition_expression_on() runs the given function on the attributes
-// that the expression uses. It may run for the same attribute more than once
-// if the same attribute is used more than once in the expression.
-void for_condition_expression_on(const parsed::condition_expression& ce, const noncopyable_function<void(std::string_view)>& func);
-
-// calculate_value() behaves slightly different (especially, different
-// functions supported) when used in different types of expressions, as
-// enumerated in this enum:
-enum class calculate_value_caller {
-    UpdateExpression, ConditionExpression, ConditionExpressionAlone
-};
-
-inline std::ostream& operator<<(std::ostream& out, calculate_value_caller caller) {
-    switch (caller) {
-        case calculate_value_caller::UpdateExpression:
-            out << "UpdateExpression";
-            break;
-        case calculate_value_caller::ConditionExpression:
-            out << "ConditionExpression";
-            break;
-        case calculate_value_caller::ConditionExpressionAlone:
-            out << "ConditionExpression";
-            break;
-        default:
-            out << "unknown type of expression";
-            break;
-    }
-    return out;
-}
-
-rjson::value calculate_value(const parsed::value& v,
-        calculate_value_caller caller,
-        const rjson::value* previous_item);
-
-rjson::value calculate_value(const parsed::set_rhs& rhs,
-        const rjson::value* previous_item);


 } /* namespace alternator */
--- a/alternator/expressions_types.hh
+++ b/alternator/expressions_types.hh
@@ -25,10 +25,6 @@
 #include <string>
 #include <variant>

-#include <seastar/core/shared_ptr.hh>
-
-#include "rjson.hh"
-
 /*
 * Parsed representation of expressions and their components.
 *
@@ -67,27 +63,10 @@ public:
    }
 };

-// When an expression is first parsed, all constants are references, like
-// ":val1", into ExpressionAttributeValues. This uses std::string() variant.
-// The resolve_value() function replaces these constants by the JSON item
-// extracted from the ExpressionAttributeValues.
-struct constant {
-    // We use lw_shared_ptr<rjson::value> just to make rjson::value copyable,
-    // to make this entire object copyable as ANTLR needs.
-    using literal = lw_shared_ptr<rjson::value>;
-    std::variant<std::string, literal> _value;
-    void set(const rjson::value& v) {
-        _value = make_lw_shared<rjson::value>(rjson::copy(v));
-    }
-    void set(std::string& s) {
-        _value = s;
-    }
-};
-
 // "value" is is a value used in the right hand side of an assignment
-// expression, "SET a = ...". It can be a constant (a reference to a value
-// included in the request, e.g., ":val"), a path to an attribute from the
-// existing item (e.g., "a.b[3].c"), or a function of other such values.
+// expression, "SET a = ...". It can be a reference to a value included in
+// the request (":val"), a path to an attribute from the existing item
+// (e.g., "a.b[3].c"), or a function of other such values.
 // Note that the real right-hand-side of an assignment is actually a bit
 // more general - it allows either a value, or a value+value or value-value -
 // see class set_rhs below.
@@ -96,12 +75,9 @@ struct value {
        std::string _function_name;
        std::vector<value> _parameters;
    };
-    std::variant<constant, path, function_call> _value;
-    void set_constant(constant c) {
-        _value = std::move(c);
-    }
+    std::variant<std::string, path, function_call> _value;
    void set_valref(std::string s) {
-        _value = constant { std::move(s) };
+        _value = std::move(s);
    }
    void set_path(path p) {
        _value = std::move(p);
@@ -112,15 +88,6 @@ struct value {
    void add_func_parameter(value v) {
        std::get<function_call>(_value)._parameters.emplace_back(std::move(v));
    }
-    bool is_constant() const {
-        return std::holds_alternative<constant>(_value);
-    }
-    bool is_path() const {
-        return std::holds_alternative<path>(_value);
-    }
-    bool is_func() const {
-        return std::holds_alternative<function_call>(_value);
-    }
 };

 // The right-hand-side of a SET in an update expression can be either a
@@ -154,10 +121,10 @@ public:
        struct remove {
        };
        struct add {
-            constant _valref;
+            std::string _valref;
        };
        struct del {
-            constant _valref;
+            std::string _valref;
        };
        std::variant<set, remove, add, del> _action;

@@ -171,11 +138,11 @@ public:
        }
        void assign_add(path p, std::string v) {
            _path = std::move(p);
-            _action = add { constant { std::move(v) } };
+            _action = add { std::move(v) };
        }
        void assign_del(path p, std::string v) {
            _path = std::move(p);
-            _action = del { constant { std::move(v) } };
+            _action = del { std::move(v) };
        }
    };
 private:
@@ -193,62 +160,6 @@ public:
    const std::vector<action>& actions() const {
        return _actions;
    }
-    std::vector<action>& actions() {
-        return _actions;
-    }
-};
-
-// A primitive_condition is a condition expression involving one condition,
-// while the full condition_expression below adds boolean logic over these
-// primitive conditions.
-// The supported primitive conditions are:
-// 1. Binary operators - v1 OP v2, where OP is =, <>, <, <=, >, or >= and
-//    v1 and v2 are values - from the item (an attribute path), the query
-//    (a ":val" reference), or a function of the the above (only the size()
-//    function is supported).
-// 2. Ternary operator - v1 BETWEEN v2 and v3 (means v1 >= v2 AND v1 <= v3).
-// 3. N-ary operator - v1 IN ( v2, v3, ... )
-// 4. A single function call (attribute_exists etc.). The parser actually
-//    accepts a more general "value" here but later stages reject a value
-//    which is not a function call (because DynamoDB does it too).
-class primitive_condition {
-public:
-    enum class type {
-        UNDEFINED, VALUE, EQ, NE, LT, LE, GT, GE, BETWEEN, IN
-    };
-    type _op = type::UNDEFINED;
-    std::vector<value> _values;
-    void set_operator(type op) {
-        _op = op;
-    }
-    void add_value(value&& v) {
-        _values.push_back(std::move(v));
-    }
-    bool empty() const {
-        return _op == type::UNDEFINED;
-    }
-};
-
-class condition_expression {
-public:
-    bool _negated = false; // If true, the entire condition is negated
-    struct condition_list {
-        char op = '|'; // '&' or '|'
-        std::vector<condition_expression> conditions;
-    };
-    std::variant<primitive_condition, condition_list> _expression = condition_list();
-
-    void set_primitive(primitive_condition&& p) {
-        _expression = std::move(p);
-    }
-    void append(condition_expression&& c, char op);
-    void apply_not() {
-        _negated = !_negated;
-    }
-    bool empty() const {
-        return std::holds_alternative<condition_list>(_expression) &&
-               std::get<condition_list>(_expression).conditions.empty();
-    }
 };

 } // namespace parsed
--- a/alternator/rjson.cc
+++ b/alternator/rjson.cc
@@ -22,108 +22,14 @@
 #include "rjson.hh"
 #include "error.hh"
 #include <seastar/core/print.hh>
-#include <seastar/core/thread.hh>

 namespace rjson {

 static allocator the_allocator;

-/*
- * This wrapper class adds nested level checks to rapidjson's handlers.
- * Each rapidjson handler implements functions for accepting JSON values,
- * which includes strings, numbers, objects, arrays, etc.
- * Parsing objects and arrays needs to be performed carefully with regard
- * to stack overflow - each object/array layer adds another stack frame
- * to parsing, printing and destroying the parent JSON document.
- * To prevent stack overflow, a rapidjson handler can be wrapped with
- * guarded_json_handler, which accepts an additional max_nested_level parameter.
- * After trying to exceed the max nested level, a proper rjson::error will be thrown.
- */
-template<typename Handler, bool EnableYield>
-struct guarded_yieldable_json_handler : public Handler {
-    size_t _nested_level = 0;
-    size_t _max_nested_level;
-public:
-    using handler_base = Handler;
-
-    explicit guarded_yieldable_json_handler(size_t max_nested_level) : _max_nested_level(max_nested_level) {}
-    guarded_yieldable_json_handler(string_buffer& buf, size_t max_nested_level)
-            : handler_base(buf), _max_nested_level(max_nested_level) {}
-
-    void Parse(const char* str, size_t length) {
-        rapidjson::MemoryStream ms(static_cast<const char*>(str), length * sizeof(typename encoding::Ch));
-        rapidjson::EncodedInputStream<encoding, rapidjson::MemoryStream> is(ms);
-        rapidjson::GenericReader<encoding, encoding, allocator> reader(&the_allocator);
-        reader.Parse(is, *this);
-        if (reader.HasParseError()) {
-            throw rjson::error(format("Parsing JSON failed: {}", rapidjson::GetParseError_En(reader.GetParseErrorCode())));
-        }
-        //NOTICE: The handler has parsed the string, but in case of rapidjson::GenericDocument
-        // the data now resides in an internal stack_ variable, which is private instead of
-        // protected... which means we cannot simply access its data. Fortunately, another
-        // function for populating documents from SAX events can be abused to extract the data
-        // from the stack via gadget-oriented programming - we use an empty event generator
-        // which does nothing, and use it to call Populate(), which assumes that the generator
-        // will fill the stack with something. It won't, but our stack is already filled with
-        // data we want to steal, so once Populate() ends, our document will be properly parsed.
-        // A proper solution could be programmed once rapidjson declares this stack_ variable
-        // as protected instead of private, so that this class can access it.
-        auto dummy_generator = [](handler_base&){return true;};
-        handler_base::Populate(dummy_generator);
-    }
-
-    bool StartObject() {
-        ++_nested_level;
-        check_nested_level();
-        maybe_yield();
-        return handler_base::StartObject();
-    }
-
-    bool EndObject(rapidjson::SizeType elements_count = 0) {
-        --_nested_level;
-        return handler_base::EndObject(elements_count);
-    }
-
-    bool StartArray() {
-        ++_nested_level;
-        check_nested_level();
-        maybe_yield();
-        return handler_base::StartArray();
-    }
-
-    bool EndArray(rapidjson::SizeType elements_count = 0) {
-        --_nested_level;
-        return handler_base::EndArray(elements_count);
-    }
-
-    bool Null()                 { maybe_yield(); return handler_base::Null(); }
-    bool Bool(bool b)           { maybe_yield(); return handler_base::Bool(b); }
-    bool Int(int i)             { maybe_yield(); return handler_base::Int(i); }
-    bool Uint(unsigned u)       { maybe_yield(); return handler_base::Uint(u); }
-    bool Int64(int64_t i64)     { maybe_yield(); return handler_base::Int64(i64); }
-    bool Uint64(uint64_t u64)   { maybe_yield(); return handler_base::Uint64(u64); }
-    bool Double(double d)       { maybe_yield(); return handler_base::Double(d); }
-    bool String(const value::Ch* str, size_t length, bool copy = false) { maybe_yield(); return handler_base::String(str, length, copy); }
-    bool Key(const value::Ch* str, size_t length, bool copy = false) { maybe_yield(); return handler_base::Key(str, length, copy); }
-
-
-protected:
-    static void maybe_yield() {
-        if constexpr (EnableYield) {
-            thread::maybe_yield();
-        }
-    }
-
-    void check_nested_level() const {
-        if (RAPIDJSON_UNLIKELY(_nested_level > _max_nested_level)) {
-            throw rjson::error(format("Max nested level reached: {}", _max_nested_level));
-        }
-    }
-};
-
 std::string print(const rjson::value& value) {
    string_buffer buffer;
-    guarded_yieldable_json_handler<writer, false> writer(buffer, 78);
+    writer writer(buffer);
    value.Accept(writer);
    return std::string(buffer.GetString());
 }
@@ -132,9 +38,13 @@ rjson::value copy(const rjson::value& value) {
    return rjson::value(value, the_allocator);
 }

-rjson::value parse(std::string_view str) {
-    guarded_yieldable_json_handler<document, false> d(78);
-    d.Parse(str.data(), str.size());
+rjson::value parse(const std::string& str) {
+    return parse_raw(str.c_str(), str.size());
+}
+
+rjson::value parse_raw(const char* c_str, size_t size) {
+    rjson::document d;
+    d.Parse(c_str, size);
    if (d.HasParseError()) {
        throw rjson::error(format("Parsing JSON failed: {}", GetParseError_En(d.GetParseError())));
    }
@@ -142,22 +52,8 @@ rjson::value parse(std::string_view str) {
    return std::move(v);
 }

-rjson::value parse_yieldable(std::string_view str) {
-    guarded_yieldable_json_handler<document, true> d(78);
-    d.Parse(str.data(), str.size());
-    if (d.HasParseError()) {
-        throw rjson::error(format("Parsing JSON failed: {}", GetParseError_En(d.GetParseError())));
-    }
-    rjson::value& v = d;
-    return std::move(v);
-}
-
-rjson::value& get(rjson::value& value, std::string_view name) {
-    // Although FindMember() has a variant taking a StringRef, it ignores the
-    // given length (see https://github.com/Tencent/rapidjson/issues/1649).
-    // Luckily, the variant taking a GenericValue doesn't share this bug,
-    // and we can create a string GenericValue without copying the string.
-    auto member_it = value.FindMember(rjson::value(name.data(), name.size()));
+rjson::value& get(rjson::value& value, rjson::string_ref_type name) {
+    auto member_it = value.FindMember(name);
    if (member_it != value.MemberEnd())
        return member_it->value;
    else {
@@ -165,8 +61,8 @@ rjson::value& get(rjson::value& value, std::string_view name) {
    }
 }

-const rjson::value& get(const rjson::value& value, std::string_view name) {
-    auto member_it = value.FindMember(rjson::value(name.data(), name.size()));
+const rjson::value& get(const rjson::value& value, rjson::string_ref_type name) {
+    auto member_it = value.FindMember(name);
    if (member_it != value.MemberEnd())
        return member_it->value;
    else {
@@ -186,48 +82,24 @@ rjson::value from_string(const char* str, size_t size) {
    return rjson::value(str, size, the_allocator);
 }

-rjson::value from_string(std::string_view view) {
-    return rjson::value(view.data(), view.size(), the_allocator);
-}
-
-const rjson::value* find(const rjson::value& value, std::string_view name) {
-    // Although FindMember() has a variant taking a StringRef, it ignores the
-    // given length (see https://github.com/Tencent/rapidjson/issues/1649).
-    // Luckily, the variant taking a GenericValue doesn't share this bug,
-    // and we can create a string GenericValue without copying the string.
-    auto member_it = value.FindMember(rjson::value(name.data(), name.size()));
+const rjson::value* find(const rjson::value& value, string_ref_type name) {
+    auto member_it = value.FindMember(name);
    return member_it != value.MemberEnd() ? &member_it->value : nullptr;
 }

-rjson::value* find(rjson::value& value, std::string_view name) {
-    auto member_it = value.FindMember(rjson::value(name.data(), name.size()));
+rjson::value* find(rjson::value& value, string_ref_type name) {
+    auto member_it = value.FindMember(name);
    return member_it != value.MemberEnd() ? &member_it->value : nullptr;
 }

-bool remove_member(rjson::value& value, std::string_view name) {
-    // Although RemoveMember() has a variant taking a StringRef, it ignores
-    // given length (see https://github.com/Tencent/rapidjson/issues/1649).
-    // Luckily, the variant taking a GenericValue doesn't share this bug,
-    // and we can create a string GenericValue without copying the string.
-    return value.RemoveMember(rjson::value(name.data(), name.size()));
-}
-
 void set_with_string_name(rjson::value& base, const std::string& name, rjson::value&& member) {
    base.AddMember(rjson::value(name.c_str(), name.size(), the_allocator), std::move(member), the_allocator);
 }

-void set_with_string_name(rjson::value& base, std::string_view name, rjson::value&& member) {
-    base.AddMember(rjson::value(name.data(), name.size(), the_allocator), std::move(member), the_allocator);
-}
-
 void set_with_string_name(rjson::value& base, const std::string& name, rjson::string_ref_type member) {
    base.AddMember(rjson::value(name.c_str(), name.size(), the_allocator), rjson::value(member), the_allocator);
 }

-void set_with_string_name(rjson::value& base, std::string_view name, rjson::string_ref_type member) {
-    base.AddMember(rjson::value(name.data(), name.size(), the_allocator), rjson::value(member), the_allocator);
-}
-
 void set(rjson::value& base, rjson::string_ref_type name, rjson::value&& member) {
    base.AddMember(name, std::move(member), the_allocator);
 }
--- a/alternator/rjson.hh
+++ b/alternator/rjson.hh
@@ -104,49 +104,38 @@ inline rjson::value empty_string() {
 // The representation is dense - without any redundant indentation.
 std::string print(const rjson::value& value);

-// Returns a string_view to the string held in a JSON value (which is
-// assumed to hold a string, i.e., v.IsString() == true). This is a view
-// to the existing data - no copying is done.
-inline std::string_view to_string_view(const rjson::value& v) {
-    return std::string_view(v.GetString(), v.GetStringLength());
-}
-
 // Copies given JSON value - involves allocation
 rjson::value copy(const rjson::value& value);

 // Parses a JSON value from given string or raw character array.
 // The string/char array liveness does not need to be persisted,
-// as parse() will allocate member names and values.
+// as both parse() and parse_raw() will allocate member names and values.
 // Throws rjson::error if parsing failed.
-rjson::value parse(std::string_view str);
-// Needs to be run in thread context
-rjson::value parse_yieldable(std::string_view str);
+rjson::value parse(const std::string& str);
+rjson::value parse_raw(const char* c_str, size_t size);

 // Creates a JSON value (of JSON string type) out of internal string representations.
 // The string value is copied, so str's liveness does not need to be persisted.
 rjson::value from_string(const std::string& str);
 rjson::value from_string(const sstring& str);
 rjson::value from_string(const char* str, size_t size);
-rjson::value from_string(std::string_view view);

 // Returns a pointer to JSON member if it exists, nullptr otherwise
-rjson::value* find(rjson::value& value, std::string_view name);
-const rjson::value* find(const rjson::value& value, std::string_view name);
+rjson::value* find(rjson::value& value, rjson::string_ref_type name);
+const rjson::value* find(const rjson::value& value, rjson::string_ref_type name);

 // Returns a reference to JSON member if it exists, throws otherwise
-rjson::value& get(rjson::value& value, std::string_view name);
-const rjson::value& get(const rjson::value& value, std::string_view name);
+rjson::value& get(rjson::value& value, rjson::string_ref_type name);
+const rjson::value& get(const rjson::value& value, rjson::string_ref_type name);

 // Sets a member in given JSON object by moving the member - allocates the name.
 // Throws if base is not a JSON object.
 void set_with_string_name(rjson::value& base, const std::string& name, rjson::value&& member);
-void set_with_string_name(rjson::value& base, std::string_view name, rjson::value&& member);

 // Sets a string member in given JSON object by assigning its reference - allocates the name.
 // NOTICE: member string liveness must be ensured to be at least as long as base's.
 // Throws if base is not a JSON object.
 void set_with_string_name(rjson::value& base, const std::string& name, rjson::string_ref_type member);
-void set_with_string_name(rjson::value& base, std::string_view name, rjson::string_ref_type member);

 // Sets a member in given JSON object by moving the member.
 // NOTICE: name liveness must be ensured to be at least as long as base's.
@@ -163,9 +152,6 @@ void set(rjson::value& base, rjson::string_ref_type name, rjson::string_ref_type
 // Throws if base_array is not a JSON array.
 void push_back(rjson::value& base_array, rjson::value&& item);

-// Remove a member from a JSON object. Throws if value isn't an object.
-bool remove_member(rjson::value& value, std::string_view name);
-
 struct single_value_comp {
    bool operator()(const rjson::value& r1, const rjson::value& r2) const;
 };
--- a/alternator/rmw_operation.hh
+++ b/alternator/rmw_operation.hh
@@ -1,128 +0,0 @@
-/*
- * Copyright 2020 ScyllaDB
- */
-
-/*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#pragma once
-
-#include "seastarx.hh"
-#include "service/storage_proxy.hh"
-#include "service/storage_proxy.hh"
-#include "rjson.hh"
-#include "executor.hh"
-
-namespace alternator {
-
-// An rmw_operation encapsulates the common logic of all the item update
-// operations which may involve a read of the item before the write
-// (so-called Read-Modify-Write operations). These operations include PutItem,
-// UpdateItem and DeleteItem: All of these may be conditional operations (the
-// "Expected" parameter) which requir a read before the write, and UpdateItem
-// may also have an update expression which refers to the item's old value.
-//
-// The code below supports running the read and the write together as one
-// transaction using LWT (this is why rmw_operation is a subclass of
-// cas_request, as required by storage_proxy::cas()), but also has optional
-// modes not using LWT.
-class rmw_operation : public service::cas_request, public enable_shared_from_this<rmw_operation> {
-public:
-    // The following options choose which mechanism to use for isolating
-    // parallel write operations:
-    // * The FORBID_RMW option forbids RMW (read-modify-write) operations
-    //   such as conditional updates. For the remaining write-only
-    //   operations, ordinary quorum writes are isolated enough.
-    // * The LWT_ALWAYS option always uses LWT (lightweight transactions)
-    //   for any write operation - whether or not it also has a read.
-    // * The LWT_RMW_ONLY option uses LWT only for RMW operations, and uses
-    //   ordinary quorum writes for write-only operations.
-    //   This option is not safe if the user may send both RMW and write-only
-    //   operations on the same item.
-    // * The UNSAFE_RMW option does read-modify-write operations as separate
-    //   read and write. It is unsafe - concurrent RMW operations are not
-    //   isolated at all. This option will likely be removed in the future.
-    enum class write_isolation {
-        FORBID_RMW, LWT_ALWAYS, LWT_RMW_ONLY, UNSAFE_RMW
-    };
-    static constexpr auto WRITE_ISOLATION_TAG_KEY = "system:write_isolation";
-
-    static write_isolation get_write_isolation_for_schema(schema_ptr schema);
-
-    static write_isolation default_write_isolation;
-public:
-    static void set_default_write_isolation(std::string_view mode);
-
-protected:
-    // The full request JSON
-    rjson::value _request;
-    // All RMW operations involve a single item with a specific partition
-    // and optional clustering key, in a single table, so the following
-    // information is common to all of them:
-    schema_ptr _schema;
-    partition_key _pk = partition_key::make_empty();
-    clustering_key _ck = clustering_key::make_empty();
-    write_isolation _write_isolation;
-
-    // All RMW operations can have a ReturnValues parameter from the following
-    // choices. But note that only UpdateItem actually supports all of them:
-    enum class returnvalues {
-        NONE, ALL_OLD, UPDATED_OLD, ALL_NEW, UPDATED_NEW
-    } _returnvalues;
-    static returnvalues parse_returnvalues(const rjson::value& request);
-    // When _returnvalues != NONE, apply() should store here, in JSON form,
-    // the values which are to be returned in the "Attributes" field.
-    // The default null JSON means do not return an Attributes field at all.
-    // This field is marked "mutable" so that the const apply() can modify
-    // it (see explanation below), but note that because apply() may be
-    // called more than once, if apply() will sometimes set this field it
-    // must set it (even if just to the default empty value) every time.
-    mutable rjson::value _return_attributes;
-public:
-    // The constructor of a rmw_operation subclass should parse the request
-    // and try to discover as many input errors as it can before really
-    // attempting the read or write operations.
-    rmw_operation(service::storage_proxy& proxy, rjson::value&& request);
-    // rmw_operation subclasses (update_item_operation, put_item_operation
-    // and delete_item_operation) shall implement an apply() function which
-    // takes the previous value of the item (if it was read) and creates the
-    // write mutation. If the previous value of item does not pass the needed
-    // conditional expression, apply() should return an empty optional.
-    // apply() may throw if it encounters input errors not discovered during
-    // the constructor.
-    // apply() may be called more than once in case of contention, so it must
-    // not change the state saved in the object (issue #7218 was caused by
-    // violating this). We mark apply() "const" to let the compiler validate
-    // this for us. The output-only field _return_attributes is marked
-    // "mutable" above so that apply() can still write to it.
-    virtual std::optional<mutation> apply(std::unique_ptr<rjson::value> previous_item, api::timestamp_type ts) const = 0;
-    // Convert the above apply() into the signature needed by cas_request:
-    virtual std::optional<mutation> apply(foreign_ptr<lw_shared_ptr<query::result>> qr, const query::partition_slice& slice, api::timestamp_type ts) override;
-    virtual ~rmw_operation() = default;
-    schema_ptr schema() const { return _schema; }
-    const rjson::value& request() const { return _request; }
-    rjson::value&& move_request() && { return std::move(_request); }
-    future<executor::request_return_type> execute(service::storage_proxy& proxy,
-            service::client_state& client_state,
-            tracing::trace_state_ptr trace_state,
-            service_permit permit,
-            bool needs_read_before_write,
-            stats& stats);
-    std::optional<shard_id> shard_for_execute(bool needs_read_before_write);
-};
-
-} // namespace alternator
--- a/alternator/serialization.cc
+++ b/alternator/serialization.cc
@@ -31,8 +31,8 @@ static logging::logger slogger("alternator-serialization");

 namespace alternator {

-type_info type_info_from_string(std::string_view type) {
-    static thread_local const std::unordered_map<std::string_view, type_info> type_infos = {
+type_info type_info_from_string(std::string type) {
+    static thread_local const std::unordered_map<std::string, type_info> type_infos = {
        {"S", {alternator_type::S, utf8_type}},
        {"B", {alternator_type::B, bytes_type}},
        {"BOOL", {alternator_type::BOOL, boolean_type}},
@@ -87,7 +87,7 @@ bytes serialize_item(const rjson::value& item) {
        throw api_error("ValidationException", format("An item can contain only one attribute definition: {}", item));
    }
    auto it = item.MemberBegin();
-    type_info type_info = type_info_from_string(rjson::to_string_view(it->name)); // JSON keys are guaranteed to be strings
+    type_info type_info = type_info_from_string(it->name.GetString()); // JSON keys are guaranteed to be strings

    if (type_info.atype == alternator_type::NOT_SUPPORTED_YET) {
        slogger.trace("Non-optimal serialization of type {}", it->name.GetString());
@@ -121,7 +121,7 @@ struct to_json_visitor {
    }
    // default
    void operator()(const abstract_type& t) const {
-        rjson::set_with_string_name(deserialized, type_ident, rjson::parse(to_json_string(t, bytes(bv))));
+        rjson::set_with_string_name(deserialized, type_ident, rjson::parse(t.to_string(bytes(bv))));
    }
 };

@@ -136,7 +136,7 @@ rjson::value deserialize_item(bytes_view bv) {

    if (atype == alternator_type::NOT_SUPPORTED_YET) {
        slogger.trace("Non-optimal deserialization of alternator type {}", int8_t(atype));
-        return rjson::parse(std::string_view(reinterpret_cast<const char *>(bv.data()), bv.size()));
+        return rjson::parse_raw(reinterpret_cast<const char *>(bv.data()), bv.size());
    }
    type_representation type_representation = represent_type(atype);
    visit(*type_representation.dtype, to_json_visitor{deserialized, type_representation.ident, bv});
@@ -153,48 +153,34 @@ std::string type_to_string(data_type type) {
    };
    auto it = types.find(type);
    if (it == types.end()) {
-        // fall back to string, in order to be able to present
-        // internal Scylla types in a human-readable way
-        return "S";
+        throw std::runtime_error(format("Unknown type {}", type->name()));
    }
    return it->second;
 }

 bytes get_key_column_value(const rjson::value& item, const column_definition& column) {
    std::string column_name = column.name_as_text();
-    const rjson::value* key_typed_value = rjson::find(item, column_name);
-    if (!key_typed_value) {
-        throw api_error("ValidationException", format("Key column {} not found", column_name));
+    std::string expected_type = type_to_string(column.type);
+
+    const rjson::value& key_typed_value = rjson::get(item, rjson::value::StringRefType(column_name.c_str()));
+    if (!key_typed_value.IsObject() || key_typed_value.MemberCount() != 1) {
+        throw api_error("ValidationException",
+                format("Missing or invalid value object for key column {}: {}", column_name, item));
    }
-    return get_key_from_typed_value(*key_typed_value, column);
+    return get_key_from_typed_value(key_typed_value, column, expected_type);
 }

-// Parses the JSON encoding for a key value, which is a map with a single
-// entry, whose key is the type (expected to match the key column's type)
-// and the value is the encoded value.
-bytes get_key_from_typed_value(const rjson::value& key_typed_value, const column_definition& column) {
-    if (!key_typed_value.IsObject() || key_typed_value.MemberCount() != 1 ||
-            !key_typed_value.MemberBegin()->value.IsString()) {
-        throw api_error("ValidationException",
-                format("Malformed value object for key column {}: {}",
-                        column.name_as_text(), key_typed_value));
-    }
-
+bytes get_key_from_typed_value(const rjson::value& key_typed_value, const column_definition& column, const std::string& expected_type) {
    auto it = key_typed_value.MemberBegin();
-    if (it->name != type_to_string(column.type)) {
+    if (it->name.GetString() != expected_type) {
        throw api_error("ValidationException",
                format("Type mismatch: expected type {} for key column {}, got type {}",
-                        type_to_string(column.type), column.name_as_text(), it->name.GetString()));
-    }
-    std::string_view value_view = rjson::to_string_view(it->value);
-    if (value_view.empty()) {
-        throw api_error("ValidationException",
-                format("The AttributeValue for a key attribute cannot contain an empty string value. Key: {}", column.name_as_text()));
+                        expected_type, column.name_as_text(), it->name.GetString()));
    }
    if (column.type == bytes_type) {
        return base64_decode(it->value);
    } else {
-        return column.type->from_string(rjson::to_string_view(it->value));
+        return column.type->from_string(it->value.GetString());
    }

 }
@@ -212,11 +198,8 @@ rjson::value json_key_column_value(bytes_view cell, const column_definition& col
        auto s = to_json_string(*decimal_type, bytes(cell));
        return rjson::from_string(s);
    } else {
-        // Support for arbitrary key types is useful for parsing values of virtual tables,
-        // which can involve any type supported by Scylla.
-        // In order to guarantee that the returned type is parsable by alternator clients,
-        // they are represented simply as strings.
-        return rjson::from_string(column.type->to_string(bytes(cell)));
+        // We shouldn't get here, we shouldn't see such key columns.
+        throw std::runtime_error(format("Unexpected key type: {}", column.type->name()));
    }
 }

@@ -275,93 +258,4 @@ const std::pair<std::string, const rjson::value*> unwrap_set(const rjson::value&
    return std::make_pair(it_key, &(it->value));
 }

-const rjson::value* unwrap_list(const rjson::value& v) {
-    if (!v.IsObject() || v.MemberCount() != 1) {
-        return nullptr;
-    }
-    auto it = v.MemberBegin();
-    if (it->name != std::string("L")) {
-        return nullptr;
-    }
-    return &(it->value);
-}
-
-// Take two JSON-encoded numeric values ({"N": "thenumber"}) and return the
-// sum, again as a JSON-encoded number.
-rjson::value number_add(const rjson::value& v1, const rjson::value& v2) {
-    auto n1 = unwrap_number(v1, "UpdateExpression");
-    auto n2 = unwrap_number(v2, "UpdateExpression");
-    rjson::value ret = rjson::empty_object();
-    std::string str_ret = std::string((n1 + n2).to_string());
-    rjson::set(ret, "N", rjson::from_string(str_ret));
-    return ret;
-}
-
-rjson::value number_subtract(const rjson::value& v1, const rjson::value& v2) {
-    auto n1 = unwrap_number(v1, "UpdateExpression");
-    auto n2 = unwrap_number(v2, "UpdateExpression");
-    rjson::value ret = rjson::empty_object();
-    std::string str_ret = std::string((n1 - n2).to_string());
-    rjson::set(ret, "N", rjson::from_string(str_ret));
-    return ret;
-}
-
-// Take two JSON-encoded set values (e.g. {"SS": [...the actual set]}) and
-// return the sum of both sets, again as a set value.
-rjson::value set_sum(const rjson::value& v1, const rjson::value& v2) {
-    auto [set1_type, set1] = unwrap_set(v1);
-    auto [set2_type, set2] = unwrap_set(v2);
-    if (set1_type != set2_type) {
-        throw api_error("ValidationException", format("Mismatched set types: {} and {}", set1_type, set2_type));
-    }
-    if (!set1 || !set2) {
-        throw api_error("ValidationException", "UpdateExpression: ADD operation for sets must be given sets as arguments");
-    }
-    rjson::value sum = rjson::copy(*set1);
-    std::set<rjson::value, rjson::single_value_comp> set1_raw;
-    for (auto it = sum.Begin(); it != sum.End(); ++it) {
-        set1_raw.insert(rjson::copy(*it));
-    }
-    for (const auto& a : set2->GetArray()) {
-        if (set1_raw.count(a) == 0) {
-            rjson::push_back(sum, rjson::copy(a));
-        }
-    }
-    rjson::value ret = rjson::empty_object();
-    rjson::set_with_string_name(ret, set1_type, std::move(sum));
-    return ret;
-}
-
-// Take two JSON-encoded set values (e.g. {"SS": [...the actual list]}) and
-// return the difference of s1 - s2, again as a set value.
-// DynamoDB does not allow empty sets, so if resulting set is empty, return
-// an unset optional instead.
-std::optional<rjson::value> set_diff(const rjson::value& v1, const rjson::value& v2) {
-    auto [set1_type, set1] = unwrap_set(v1);
-    auto [set2_type, set2] = unwrap_set(v2);
-    if (set1_type != set2_type) {
-        throw api_error("ValidationException", format("Mismatched set types: {} and {}", set1_type, set2_type));
-    }
-    if (!set1 || !set2) {
-        throw api_error("ValidationException", "UpdateExpression: DELETE operation can only be performed on a set");
-    }
-    std::set<rjson::value, rjson::single_value_comp> set1_raw;
-    for (auto it = set1->Begin(); it != set1->End(); ++it) {
-        set1_raw.insert(rjson::copy(*it));
-    }
-    for (const auto& a : set2->GetArray()) {
-        set1_raw.erase(a);
-    }
-    if (set1_raw.empty()) {
-        return std::nullopt;
-    }
-    rjson::value ret = rjson::empty_object();
-    rjson::set_with_string_name(ret, set1_type, rjson::empty_array());
-    rjson::value& result_set = ret[set1_type];
-    for (const auto& a : set1_raw) {
-        rjson::push_back(result_set, rjson::copy(a));
-    }
-    return ret;
-}
-
 }
--- a/alternator/serialization.hh
+++ b/alternator/serialization.hh
@@ -24,7 +24,7 @@
 #include <string>
 #include <string_view>
 #include "types.hh"
-#include "schema_fwd.hh"
+#include "schema.hh"
 #include "keys.hh"
 #include "rjson.hh"
 #include "utils/big_decimal.hh"
@@ -45,7 +45,7 @@ struct type_representation {
    data_type dtype;
 };

-type_info type_info_from_string(std::string_view type);
+type_info type_info_from_string(std::string type);
 type_representation represent_type(alternator_type atype);

 bytes serialize_item(const rjson::value& item);
@@ -54,7 +54,7 @@ rjson::value deserialize_item(bytes_view bv);
 std::string type_to_string(data_type type);

 bytes get_key_column_value(const rjson::value& item, const column_definition& column);
-bytes get_key_from_typed_value(const rjson::value& key_typed_value, const column_definition& column);
+bytes get_key_from_typed_value(const rjson::value& key_typed_value, const column_definition& column, const std::string& expected_type);
 rjson::value json_key_column_value(bytes_view cell, const column_definition& column);

 partition_key pk_from_json(const rjson::value& item, schema_ptr schema);
@@ -69,21 +69,4 @@ big_decimal unwrap_number(const rjson::value& v, std::string_view diagnostic);
 // returned value is {"", nullptr}
 const std::pair<std::string, const rjson::value*> unwrap_set(const rjson::value& v);

-// Check if a given JSON object encodes a list (i.e., it is a {"L": [...]}
-// and returns a pointer to that list.
-const rjson::value* unwrap_list(const rjson::value& v);
-
-// Take two JSON-encoded numeric values ({"N": "thenumber"}) and return the
-// sum, again as a JSON-encoded number.
-rjson::value number_add(const rjson::value& v1, const rjson::value& v2);
-rjson::value number_subtract(const rjson::value& v1, const rjson::value& v2);
-// Take two JSON-encoded set values (e.g. {"SS": [...the actual set]}) and
-// return the sum of both sets, again as a set value.
-rjson::value set_sum(const rjson::value& v1, const rjson::value& v2);
-// Take two JSON-encoded set values (e.g. {"SS": [...the actual list]}) and
-// return the difference of s1 - s2, again as a set value.
-// DynamoDB does not allow empty sets, so if resulting set is empty, return
-// an unset optional instead.
-std::optional<rjson::value> set_diff(const rjson::value& v1, const rjson::value& v2);
-
 }
--- a/alternator/server.cc
+++ b/alternator/server.cc
@@ -23,14 +23,12 @@
 #include "log.hh"
 #include <seastar/http/function_handlers.hh>
 #include <seastar/json/json_elements.hh>
-#include "seastarx.hh"
+#include <seastarx.hh>
 #include "error.hh"
 #include "rjson.hh"
 #include "auth.hh"
 #include <cctype>
 #include "cql3/query_processor.hh"
-#include "service/storage_service.hh"
-#include "utils/overloaded_functor.hh"

 static logging::logger slogger("alternator-server");

@@ -67,9 +65,9 @@ inline std::vector<std::string_view> split(std::string_view text, char separator
 // Internal Server Error.
 class api_handler : public handler_base {
 public:
-    api_handler(const std::function<future<executor::request_return_type>(std::unique_ptr<request> req)>& _handle) : _f_handle(
-         [this, _handle](std::unique_ptr<request> req, std::unique_ptr<reply> rep) {
-         return seastar::futurize_invoke(_handle, std::move(req)).then_wrapped([this, rep = std::move(rep)](future<executor::request_return_type> resf) mutable {
+    api_handler(const future_json_function& _handle) : _f_handle(
+         [_handle](std::unique_ptr<request> req, std::unique_ptr<reply> rep) {
+         return seastar::futurize_apply(_handle, std::move(req)).then_wrapped([rep = std::move(rep)](future<json::json_return_type> resf) mutable {
             if (resf.failed()) {
                 // Exceptions of type api_error are wrapped as JSON and
                 // returned to the client as expected. Other types of
@@ -88,24 +86,20 @@ public:
                             format("Internal server error: {}", std::current_exception()),
                             reply::status_type::internal_server_error);
                 }
-                 generate_error_reply(*rep, ret);
+                 // FIXME: what is this version number?
+                 rep->_content += "{\"__type\":\"com.amazonaws.dynamodb.v20120810#" + ret._type + "\"," +
+                         "\"message\":\"" + ret._msg + "\"}";
+                 rep->_status = ret._http_code;
+                 slogger.trace("api_handler error case: {}", rep->_content);
                 return make_ready_future<std::unique_ptr<reply>>(std::move(rep));
             }
+             slogger.trace("api_handler success case");
             auto res = resf.get0();
-             std::visit(overloaded_functor {
-                 [&] (const json::json_return_type& json_return_value) {
-                     slogger.trace("api_handler success case");
-                     if (json_return_value._body_writer) {
-                         rep->write_body("json", std::move(json_return_value._body_writer));
-                     } else {
-                         rep->_content += json_return_value._res;
-                     }
-                 },
-                 [&] (const api_error& err) {
-                     generate_error_reply(*rep, err);
-                 }
-             }, res);
-
+             if (res._body_writer) {
+                 rep->write_body("json", std::move(res._body_writer));
+             } else {
+                 rep->_content += res._res;
+             }
             return make_ready_future<std::unique_ptr<reply>>(std::move(rep));
         });
    }), _type("json") { }
@@ -121,66 +115,18 @@ public:
    }

 protected:
-    void generate_error_reply(reply& rep, const api_error& err) {
-        rep._content += "{\"__type\":\"com.amazonaws.dynamodb.v20120810#" + err._type + "\"," +
-                "\"message\":\"" + err._msg + "\"}";
-        rep._status = err._http_code;
-        slogger.trace("api_handler error case: {}", rep._content);
-    }
-
    future_handler_function _f_handle;
    sstring _type;
 };

-class gated_handler : public handler_base {
-    seastar::gate& _gate;
-public:
-    gated_handler(seastar::gate& gate) : _gate(gate) {}
-    virtual future<std::unique_ptr<reply>> do_handle(const sstring& path, std::unique_ptr<request> req, std::unique_ptr<reply> rep) = 0;
-    virtual future<std::unique_ptr<reply>> handle(const sstring& path, std::unique_ptr<request> req, std::unique_ptr<reply> rep) final override {
-        return with_gate(_gate, [this, &path, req = std::move(req), rep = std::move(rep)] () mutable {
-            return do_handle(path, std::move(req), std::move(rep));
-        });
-    }
-};
-
-class health_handler : public gated_handler {
-public:
-    health_handler(seastar::gate& pending_requests) : gated_handler(pending_requests) {}
-protected:
-    virtual future<std::unique_ptr<reply>> do_handle(const sstring& path, std::unique_ptr<request> req, std::unique_ptr<reply> rep) override {
+class health_handler : public handler_base {
+    virtual future<std::unique_ptr<reply>> handle(const sstring& path, std::unique_ptr<request> req, std::unique_ptr<reply> rep) override {
        rep->set_status(reply::status_type::ok);
        rep->write_body("txt", format("healthy: {}", req->get_header("Host")));
        return make_ready_future<std::unique_ptr<reply>>(std::move(rep));
    }
 };

-class local_nodelist_handler : public gated_handler {
-public:
-    local_nodelist_handler(seastar::gate& pending_requests) : gated_handler(pending_requests) {}
-protected:
-    virtual future<std::unique_ptr<reply>> do_handle(const sstring& path, std::unique_ptr<request> req, std::unique_ptr<reply> rep) override {
-        rjson::value results = rjson::empty_array();
-        // It's very easy to get a list of all live nodes on the cluster,
-        // using gms::get_local_gossiper().get_live_members(). But getting
-        // just the list of live nodes in this DC needs more elaborate code:
-        sstring local_dc = locator::i_endpoint_snitch::get_local_snitch_ptr()->get_datacenter(
-                utils::fb_utilities::get_broadcast_address());
-        std::unordered_set<gms::inet_address> local_dc_nodes =
-                service::get_local_storage_service().get_token_metadata().
-                get_topology().get_datacenter_endpoints().at(local_dc);
-        for (auto& ip : local_dc_nodes) {
-            if (gms::get_local_gossiper().is_alive(ip)) {
-                rjson::push_back(results, rjson::from_string(ip.to_sstring()));
-            }
-        }
-        rep->set_status(reply::status_type::ok);
-        rep->set_content_type("json");
-        rep->_content = rjson::print(results);
-        return make_ready_future<std::unique_ptr<reply>>(std::move(rep));
-    }
-};
-
 future<> server::verify_signature(const request& req) {
    if (!_enforce_authorization) {
        slogger.debug("Skipping authorization");
@@ -191,7 +137,7 @@ future<> server::verify_signature(const request& req) {
        throw api_error("InvalidSignatureException", "Host header is mandatory for signature verification");
    }
    auto authorization_it = req._headers.find("Authorization");
-    if (authorization_it == req._headers.end()) {
+    if (host_it == req._headers.end()) {
        throw api_error("InvalidSignatureException", "Authorization header is mandatory for signature verification");
    }
    std::string host = host_it->second;
@@ -268,8 +214,8 @@ future<> server::verify_signature(const request& req) {
    });
 }

-future<executor::request_return_type> server::handle_api_request(std::unique_ptr<request>&& req) {
-    _executor._stats.total_operations++;
+future<json::json_return_type> server::handle_api_request(std::unique_ptr<request>&& req) {
+    _executor.local()._stats.total_operations++;
    sstring target = req->get_header(TARGET);
    std::vector<std::string_view> split_target = split(target, '.');
    //NOTICE(sarna): Target consists of Dynamo API version followed by a dot '.' and operation type (e.g. CreateTable)
@@ -278,32 +224,17 @@ future<executor::request_return_type> server::handle_api_request(std::unique_ptr
    return verify_signature(*req).then([this, op, req = std::move(req)] () mutable {
        auto callback_it = _callbacks.find(op);
        if (callback_it == _callbacks.end()) {
-            _executor._stats.unsupported_operations++;
+            _executor.local()._stats.unsupported_operations++;
            throw api_error("UnknownOperationException",
                    format("Unsupported operation {}", op));
        }
-        return with_gate(_pending_requests, [this, callback_it = std::move(callback_it), op = std::move(op), req = std::move(req)] () mutable {
-            //FIXME: Client state can provide more context, e.g. client's endpoint address
-            // We use unique_ptr because client_state cannot be moved or copied
-            return do_with(std::make_unique<executor::client_state>(executor::client_state::internal_tag()),
-                    [this, callback_it = std::move(callback_it), op = std::move(op), req = std::move(req)] (std::unique_ptr<executor::client_state>& client_state) mutable {
-                tracing::trace_state_ptr trace_state = executor::maybe_trace_query(*client_state, op, req->content);
-                tracing::trace(trace_state, op);
-                // JSON parsing can allocate up to roughly 2x the size of the raw document, + a couple of bytes for maintenance.
-                // FIXME: by this time, the whole HTTP request was already read, so some memory is already occupied.
-                // Once HTTP allows working on streams, we should grab the permit *before* reading the HTTP payload.
-                size_t mem_estimate = req->content.size() * 3 + 8000;
-                auto units_fut = get_units(*_memory_limiter, mem_estimate);
-                if (_memory_limiter->waiters()) {
-                    ++_executor._stats.requests_blocked_memory;
-                }
-                return units_fut.then([this, callback_it = std::move(callback_it), &client_state, trace_state, req = std::move(req)] (semaphore_units<> units) mutable {
-                    return _json_parser.parse(req->content).then([this, callback_it = std::move(callback_it), &client_state, trace_state,
-                            units = std::move(units), req = std::move(req)] (rjson::value json_request) mutable {
-                        return callback_it->second(_executor, *client_state, trace_state, make_service_permit(std::move(units)), std::move(json_request), std::move(req)).finally([trace_state] {});
-                    });
-                });
-            });
+        //FIXME: Client state can provide more context, e.g. client's endpoint address
+        // We use unique_ptr because client_state cannot be moved or copied
+        return do_with(std::make_unique<executor::client_state>(executor::client_state::internal_tag()), [this, callback_it = std::move(callback_it), op = std::move(op), req = std::move(req)] (std::unique_ptr<executor::client_state>& client_state) mutable {
+            client_state->set_raw_keyspace(executor::KEYSPACE_NAME);
+            tracing::trace_state_ptr trace_state = executor::maybe_trace_query(*client_state, op, req->content);
+            tracing::trace(trace_state, op);
+            return callback_it->second(_executor.local(), *client_state, trace_state, std::move(req)).finally([trace_state] {});
        });
    });
 }
@@ -313,88 +244,35 @@ void server::set_routes(routes& r) {
        return handle_api_request(std::move(req));
    });

-    r.put(operation_type::POST, "/", req_handler);
-    r.put(operation_type::GET, "/", new health_handler(_pending_requests));
-    // The "/localnodes" request is a new Alternator feature, not supported by
-    // DynamoDB and not required for DynamoDB compatibility. It allows a
-    // client to enquire - using a trivial HTTP request without requiring
-    // authentication - the list of all live nodes in the same data center of
-    // the Alternator cluster. The client can use this list to balance its
-    // request load to all the nodes in the same geographical region.
-    // Note that this API exposes - openly without authentication - the
-    // information on the cluster's members inside one data center. We do not
-    // consider this to be a security risk, because an attacker can already
-    // scan an entire subnet for nodes responding to the health request,
-    // or even just scan for open ports.
-    r.put(operation_type::GET, "/localnodes", new local_nodelist_handler(_pending_requests));
+    r.add(operation_type::POST, url("/"), req_handler);
+    r.add(operation_type::GET, url("/"), new health_handler);
 }

 //FIXME: A way to immediately invalidate the cache should be considered,
 // e.g. when the system table which stores the keys is changed.
 // For now, this propagation may take up to 1 minute.
-server::server(executor& exec)
-        : _http_server("http-alternator")
-        , _https_server("https-alternator")
-        , _executor(exec)
-        , _key_cache(1024, 1min, slogger)
-        , _enforce_authorization(false)
-        , _enabled_servers{}
-        , _pending_requests{}
+server::server(seastar::sharded<executor>& e)
+        : _executor(e), _key_cache(1024, 1min, slogger), _enforce_authorization(false)
      , _callbacks{
-        {"CreateTable", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req) {
-            return e.create_table(client_state, std::move(trace_state), std::move(permit), std::move(json_request));
-        }},
-        {"DescribeTable", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req) {
-            return e.describe_table(client_state, std::move(trace_state), std::move(permit), std::move(json_request));
-        }},
-        {"DeleteTable", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req) {
-            return e.delete_table(client_state, std::move(trace_state), std::move(permit), std::move(json_request));
-        }},
-        {"PutItem", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req) {
-            return e.put_item(client_state, std::move(trace_state), std::move(permit), std::move(json_request));
-        }},
-        {"UpdateItem", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req) {
-            return e.update_item(client_state, std::move(trace_state), std::move(permit), std::move(json_request));
-        }},
-        {"GetItem", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req) {
-            return e.get_item(client_state, std::move(trace_state), std::move(permit), std::move(json_request));
-        }},
-        {"DeleteItem", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req) {
-            return e.delete_item(client_state, std::move(trace_state), std::move(permit), std::move(json_request));
-        }},
-        {"ListTables", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req) {
-            return e.list_tables(client_state, std::move(permit), std::move(json_request));
-        }},
-        {"Scan", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req) {
-            return e.scan(client_state, std::move(trace_state), std::move(permit), std::move(json_request));
-        }},
-        {"DescribeEndpoints", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req) {
-            return e.describe_endpoints(client_state, std::move(permit), std::move(json_request), req->get_header("Host"));
-        }},
-        {"BatchWriteItem", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req) {
-            return e.batch_write_item(client_state, std::move(trace_state), std::move(permit), std::move(json_request));
-        }},
-        {"BatchGetItem", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req) {
-            return e.batch_get_item(client_state, std::move(trace_state), std::move(permit), std::move(json_request));
-        }},
-        {"Query", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req) {
-            return e.query(client_state, std::move(trace_state), std::move(permit), std::move(json_request));
-        }},
-        {"TagResource", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req) {
-            return e.tag_resource(client_state, std::move(permit), std::move(json_request));
-        }},
-        {"UntagResource", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req) {
-            return e.untag_resource(client_state, std::move(permit), std::move(json_request));
-        }},
-        {"ListTagsOfResource", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req) {
-            return e.list_tags_of_resource(client_state, std::move(permit), std::move(json_request));
-        }},
+        {"CreateTable", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, std::unique_ptr<request> req) {
+            return e.maybe_create_keyspace().then([&e, &client_state, req = std::move(req), trace_state = std::move(trace_state)] () mutable { return e.create_table(client_state, std::move(trace_state), req->content); }); }
+        },
+        {"DescribeTable", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, std::unique_ptr<request> req) { return e.describe_table(client_state, std::move(trace_state), req->content); }},
+        {"DeleteTable", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, std::unique_ptr<request> req) { return e.delete_table(client_state, std::move(trace_state), req->content); }},
+        {"PutItem", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, std::unique_ptr<request> req) { return e.put_item(client_state, std::move(trace_state), req->content); }},
+        {"UpdateItem", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, std::unique_ptr<request> req) { return e.update_item(client_state, std::move(trace_state), req->content); }},
+        {"GetItem", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, std::unique_ptr<request> req) { return e.get_item(client_state, std::move(trace_state), req->content); }},
+        {"DeleteItem", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, std::unique_ptr<request> req) { return e.delete_item(client_state, std::move(trace_state), req->content); }},
+        {"ListTables", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, std::unique_ptr<request> req) { return e.list_tables(client_state, req->content); }},
+        {"Scan", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, std::unique_ptr<request> req) { return e.scan(client_state, std::move(trace_state), req->content); }},
+        {"DescribeEndpoints", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, std::unique_ptr<request> req) { return e.describe_endpoints(client_state, req->content, req->get_header("Host")); }},
+        {"BatchWriteItem", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, std::unique_ptr<request> req) { return e.batch_write_item(client_state, std::move(trace_state), req->content); }},
+        {"BatchGetItem", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, std::unique_ptr<request> req) { return e.batch_get_item(client_state, std::move(trace_state), req->content); }},
+        {"Query", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, std::unique_ptr<request> req) { return e.query(client_state, std::move(trace_state), req->content); }},
    } {
 }

-future<> server::init(net::inet_address addr, std::optional<uint16_t> port, std::optional<uint16_t> https_port, std::optional<tls::credentials_builder> creds,
-        bool enforce_authorization, semaphore* memory_limiter) {
-    _memory_limiter = memory_limiter;
+future<> server::init(net::inet_address addr, std::optional<uint16_t> port, std::optional<uint16_t> https_port, std::optional<tls::credentials_builder> creds, bool enforce_authorization) {
    _enforce_authorization = enforce_authorization;
    if (!port && !https_port) {
        return make_exception_future<>(std::runtime_error("Either regular port or TLS port"
@@ -402,26 +280,25 @@ future<> server::init(net::inet_address addr, std::optional<uint16_t> port, std:
    }
    return seastar::async([this, addr, port, https_port, creds] {
        try {
-            _executor.start().get();
+            _executor.invoke_on_all([] (executor& e) {
+                return e.start();
+            }).get();

            if (port) {
-                set_routes(_http_server._routes);
-                _http_server.set_content_length_limit(server::content_length_limit);
-                _http_server.listen(socket_address{addr, *port}).get();
-                _enabled_servers.push_back(std::ref(_http_server));
+                _control.start().get();
+                _control.set_routes(std::bind(&server::set_routes, this, std::placeholders::_1)).get();
+                _control.listen(socket_address{addr, *port}).get();
+                slogger.info("Alternator HTTP server listening on {} port {}", addr, *port);
            }
            if (https_port) {
-                set_routes(_https_server._routes);
-                _https_server.set_content_length_limit(server::content_length_limit);
-                _https_server.set_tls_credentials(creds->build_reloadable_server_credentials([](const std::unordered_set<sstring>& files, std::exception_ptr ep) {
-                    if (ep) {
-                        slogger.warn("Exception loading {}: {}", files, ep);
-                    } else {
-                        slogger.info("Reloaded {}", files);
-                    }
-                }).get0());
-                _https_server.listen(socket_address{addr, *https_port}).get();
-                _enabled_servers.push_back(std::ref(_https_server));
+                _https_control.start().get();
+                _https_control.set_routes(std::bind(&server::set_routes, this, std::placeholders::_1)).get();
+                _https_control.server().invoke_on_all([creds] (http_server& serv) {
+                    return serv.set_tls_credentials(creds->build_server_credentials());
+                }).get();
+
+                _https_control.listen(socket_address{addr, *https_port}).get();
+                slogger.info("Alternator HTTPS server listening on {} port {}", addr, *https_port);
            }
        } catch (...) {
            slogger.error("Failed to set up Alternator HTTP server on {} port {}, TLS port {}: {}",
@@ -433,55 +310,5 @@ future<> server::init(net::inet_address addr, std::optional<uint16_t> port, std:
    });
 }

-future<> server::stop() {
-    return parallel_for_each(_enabled_servers, [] (http_server& server) {
-        return server.stop();
-    }).then([this] {
-        return _pending_requests.close();
-    }).then([this] {
-        return _json_parser.stop();
-    });
-}
-
-server::json_parser::json_parser() : _run_parse_json_thread(async([this] {
-        while (true) {
-            _document_waiting.wait().get();
-            if (_as.abort_requested()) {
-                return;
-            }
-            try {
-                _parsed_document = rjson::parse_yieldable(_raw_document);
-                _current_exception = nullptr;
-            } catch (...) {
-                _current_exception = std::current_exception();
-            }
-            _document_parsed.signal();
-        }
-    })) {
-}
-
-future<rjson::value> server::json_parser::parse(std::string_view content) {
-    if (content.size() < yieldable_parsing_threshold) {
-        return make_ready_future<rjson::value>(rjson::parse(content));
-    }
-    return with_semaphore(_parsing_sem, 1, [this, content] {
-        _raw_document = content;
-        _document_waiting.signal();
-        return _document_parsed.wait().then([this] {
-            if (_current_exception) {
-                return make_exception_future<rjson::value>(_current_exception);
-            }
-            return make_ready_future<rjson::value>(std::move(_parsed_document));
-        });
-    });
-}
-
-future<> server::json_parser::stop() {
-    _as.request_abort();
-    _document_waiting.signal();
-    _document_parsed.broken();
-    return std::move(_run_parse_json_thread);
-}
-
 }

--- a/alternator/server.hh
+++ b/alternator/server.hh
@@ -26,57 +26,28 @@
 #include <seastar/http/httpd.hh>
 #include <seastar/net/tls.hh>
 #include <optional>
-#include "alternator/auth.hh"
-#include "utils/small_vector.hh"
-#include <seastar/core/units.hh>
+#include <alternator/auth.hh>

 namespace alternator {

 class server {
-    static constexpr size_t content_length_limit = 16*MB;
-    using alternator_callback = std::function<future<executor::request_return_type>(executor&, executor::client_state&,
-            tracing::trace_state_ptr, service_permit, rjson::value, std::unique_ptr<request>)>;
+    using alternator_callback = std::function<future<json::json_return_type>(executor&, executor::client_state&, tracing::trace_state_ptr, std::unique_ptr<request>)>;
    using alternator_callbacks_map = std::unordered_map<std::string_view, alternator_callback>;

-    http_server _http_server;
-    http_server _https_server;
-    executor& _executor;
-
+    seastar::httpd::http_server_control _control;
+    seastar::httpd::http_server_control _https_control;
+    seastar::sharded<executor>& _executor;
    key_cache _key_cache;
    bool _enforce_authorization;
-    utils::small_vector<std::reference_wrapper<seastar::httpd::http_server>, 2> _enabled_servers;
-    gate _pending_requests;
    alternator_callbacks_map _callbacks;
-
-    semaphore* _memory_limiter;
-
-    class json_parser {
-        static constexpr size_t yieldable_parsing_threshold = 16*KB;
-        std::string_view _raw_document;
-        rjson::value _parsed_document;
-        std::exception_ptr _current_exception;
-        semaphore _parsing_sem{1};
-        condition_variable _document_waiting;
-        condition_variable _document_parsed;
-        abort_source _as;
-        future<> _run_parse_json_thread;
-    public:
-        json_parser();
-        future<rjson::value> parse(std::string_view content);
-        future<> stop();
-    };
-    json_parser _json_parser;
-
 public:
-    server(executor& executor);
+    server(seastar::sharded<executor>& executor);

-    future<> init(net::inet_address addr, std::optional<uint16_t> port, std::optional<uint16_t> https_port, std::optional<tls::credentials_builder> creds,
-            bool enforce_authorization, semaphore* memory_limiter);
-    future<> stop();
+    seastar::future<> init(net::inet_address addr, std::optional<uint16_t> port, std::optional<uint16_t> https_port, std::optional<tls::credentials_builder> creds, bool enforce_authorization);
 private:
    void set_routes(seastar::httpd::routes& r);
    future<> verify_signature(const seastar::httpd::request& r);
-    future<executor::request_return_type> handle_api_request(std::unique_ptr<request>&& req);
+    future<json::json_return_type> handle_api_request(std::unique_ptr<request>&& req);
 };

 }
--- a/alternator/stats.cc
+++ b/alternator/stats.cc
@@ -85,12 +85,6 @@ stats::stats() : api_operations{} {
                    seastar::metrics::description("number of total operations via Alternator API")),
            seastar::metrics::make_total_operations("reads_before_write", reads_before_write,
                    seastar::metrics::description("number of performed read-before-write operations")),
-            seastar::metrics::make_total_operations("write_using_lwt", write_using_lwt,
-                    seastar::metrics::description("number of writes that used LWT")),
-            seastar::metrics::make_total_operations("shard_bounce_for_lwt", shard_bounce_for_lwt,
-                    seastar::metrics::description("number writes that had to be bounced from this shard because of LWT requirements")),
-            seastar::metrics::make_total_operations("requests_blocked_memory", requests_blocked_memory,
-                    seastar::metrics::description("Counts a number of requests blocked due to memory pressure.")),
            seastar::metrics::make_total_operations("filtered_rows_read_total", cql_stats.filtered_rows_read_total,
                    seastar::metrics::description("number of rows read during filtering operations")),
            seastar::metrics::make_total_operations("filtered_rows_matched_total", cql_stats.filtered_rows_matched_total,
--- a/alternator/stats.hh
+++ b/alternator/stats.hh
@@ -84,9 +84,6 @@ public:
    uint64_t total_operations = 0;
    uint64_t unsupported_operations = 0;
    uint64_t reads_before_write = 0;
-    uint64_t write_using_lwt = 0;
-    uint64_t shard_bounce_for_lwt = 0;
-    uint64_t requests_blocked_memory = 0;
    // CQL-derived stats
    cql3::cql_stats cql_stats;
 private:
--- a/alternator/tags_extension.hh
+++ b/alternator/tags_extension.hh
@@ -1,53 +0,0 @@
-/*
- * Copyright 2019 ScyllaDB
- */
-
-/*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#pragma once
-
-#include "serializer.hh"
-#include "schema.hh"
-#include "db/extensions.hh"
-
-namespace alternator {
-
-class tags_extension : public schema_extension {
-public:
-    static constexpr auto NAME = "scylla_tags";
-
-    tags_extension() = default;
-    explicit tags_extension(const std::map<sstring, sstring>& tags) : _tags(std::move(tags)) {}
-    explicit tags_extension(bytes b) : _tags(tags_extension::deserialize(b)) {}
-    explicit tags_extension(const sstring& s) {
-        throw std::logic_error("Cannot create tags from string");
-    }
-    bytes serialize() const override {
-        return ser::serialize_to_buffer<bytes>(_tags);
-    }
-    static std::map<sstring, sstring> deserialize(bytes_view buffer) {
-        return ser::deserialize_from_buffer(buffer, boost::type<std::map<sstring, sstring>>());
-    }
-    const std::map<sstring, sstring>& tags() const {
-        return _tags;
-    }
-private:
-    std::map<sstring, sstring> _tags;
-};
-
-}
--- a/api/api-doc/column_family.json
+++ b/api/api-doc/column_family.json
@@ -70,7 +70,7 @@
            {
               "method":"POST",
               "summary":"Force a major compaction of this column family",
-               "type":"void",
+               "type":"string",
               "nickname":"force_major_compaction",
               "produces":[
                  "application/json"
@@ -380,54 +380,16 @@
         "operations":[
            {
               "method":"GET",
-               "summary":"check if the auto_compaction property is enabled for a given table",
+               "summary":"check if the auto compaction disabled",
               "type":"boolean",
-               "nickname":"get_auto_compaction",
+               "nickname":"is_auto_compaction_disabled",
               "produces":[
                  "application/json"
               ],
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The table name in keyspace:name format",
-                     "required":true,
-                     "allowMultiple":false,
-                     "type":"string",
-                     "paramType":"path"
-                  }
-               ]
-            },
-            {
-               "method":"POST",
-               "summary":"Enable table auto compaction",
-               "type":"void",
-               "nickname":"enable_auto_compaction",
-               "produces":[
-                  "application/json"
-               ],
-               "parameters":[
-                  {
-                     "name":"name",
-                     "description":"The table name in keyspace:name format",
-                     "required":true,
-                     "allowMultiple":false,
-                     "type":"string",
-                     "paramType":"path"
-                  }
-               ]
-            },
-            {
-               "method":"DELETE",
-               "summary":"Disable table auto compaction",
-               "type":"void",
-               "nickname":"disable_auto_compaction",
-               "produces":[
-                  "application/json"
-               ],
-               "parameters":[
-                  {
-                     "name":"name",
-                     "description":"The table name in keyspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
--- a/api/api-doc/error_injection.json
+++ b/api/api-doc/error_injection.json
@@ -1,90 +0,0 @@
-{
-   "apiVersion":"0.0.1",
-   "swaggerVersion":"1.2",
-   "basePath":"{{Protocol}}://{{Host}}",
-   "resourcePath":"/error_injection",
-   "produces":[
-      "application/json"
-   ],
-   "apis":[
-      {
-         "path":"/v2/error_injection/injection/{injection}",
-         "operations":[
-            {
-               "method":"POST",
-               "summary":"Activate an injection that triggers an error in code",
-               "type":"void",
-               "nickname":"enable_injection",
-               "produces":[
-                  "application/json"
-               ],
-               "parameters":[
-                  {
-                     "name":"injection",
-                     "description":"injection name, should correspond to an injection added in code",
-                     "required":true,
-                     "allowMultiple":false,
-                     "type":"string",
-                     "paramType":"path"
-                  },
-                  {
-                     "name":"one_shot",
-                     "description":"boolean flag indicating whether the injection should be enabled to trigger only once",
-                     "required":false,
-                     "allowMultiple":false,
-                     "type":"boolean",
-                     "paramType":"query"
-                  }
-               ]
-            },
-            {
-               "method":"DELETE",
-               "summary":"Deactivate an injection previously activated by the API",
-               "type":"void",
-               "nickname":"disable_injection",
-               "produces":[
-                  "application/json"
-               ],
-               "parameters":[
-                  {
-                     "name":"injection",
-                     "description":"injection name",
-                     "required":true,
-                     "allowMultiple":false,
-                     "type":"string",
-                     "paramType":"path"
-                  }
-               ]
-            }
-         ]
-      },
-      {
-         "path":"/v2/error_injection/injection",
-         "operations":[
-            {
-               "method":"GET",
-               "summary":"List all enabled injections on all shards, i.e. injections that will trigger an error in the code",
-               "type":"array",
-               "items":{
-                  "type":"string"
-               },
-               "nickname":"get_enabled_injections_on_all",
-               "produces":[
-                  "application/json"
-               ],
-               "parameters":[]
-            },
-            {
-               "method":"DELETE",
-               "summary":"Deactivate all injections previously activated on all shards by the API",
-               "type":"void",
-               "nickname":"disable_on_all",
-               "produces":[
-                  "application/json"
-               ],
-               "parameters":[]
-            }
-         ]
-      }
-   ]
-}
--- a/api/api-doc/storage_proxy.json
+++ b/api/api-doc/storage_proxy.json
@@ -641,21 +641,6 @@
        }
      ]
    },
-    {
-      "path": "/storage_proxy/metrics/cas_write/failed_read_round_optimization",
-      "operations": [
-        {
-          "method": "GET",
-          "summary": "Get cas write metrics",
-          "type": "long",
-          "nickname": "get_cas_write_metrics_failed_read_round_optimization",
-          "produces": [
-            "application/json"
-          ],
-          "parameters": []
-        }
-      ]
-    },
    {
      "path": "/storage_proxy/metrics/cas_read/unfinished_commit",
      "operations": [
--- a/api/api-doc/storage_service.json
+++ b/api/api-doc/storage_service.json
@@ -511,21 +511,6 @@
            }
         ]
      },
-      {
-         "path":"/storage_service/cdc_streams_check_and_repair",
-         "operations":[
-            {
-               "method":"POST",
-               "summary":"Checks that CDC streams reflect current cluster topology and regenerates them if not.",
-               "type":"void",
-               "nickname":"cdc_streams_check_and_repair",
-               "produces":[
-                  "application/json"
-               ],
-               "parameters":[]
-            }
-         ]
-      },
      {
         "path":"/storage_service/snapshots",
         "operations":[
@@ -597,15 +582,7 @@
                  },
                  {
                     "name":"kn",
-                     "description":"Comma seperated keyspaces name that their snapshot will be deleted",
-                     "required":false,
-                     "allowMultiple":false,
-                     "type":"string",
-                     "paramType":"query"
-                  },
-                  {
-                     "name":"cf",
-                     "description":"an optional table name that its snapshot will be deleted",
+                     "description":"Comma seperated keyspaces name to snapshot",
                     "required":false,
                     "allowMultiple":false,
                     "type":"string",
--- a/api/api.cc
+++ b/api/api.cc
@@ -36,7 +36,6 @@
 #include "endpoint_snitch.hh"
 #include "compaction_manager.hh"
 #include "hinted_handoff.hh"
-#include "error_injection.hh"
 #include <seastar/http/exception.hh>
 #include "stream_manager.hh"
 #include "system.hh"
@@ -69,19 +68,13 @@ future<> set_server_init(http_context& ctx) {
        rb->set_api_doc(r);
        rb02->set_api_doc(r);
        rb02->register_api_file(r, "swagger20_header");
+        set_config(rb02, ctx, r);
        rb->register_function(r, "system",
                "The system related API");
        set_system(ctx, r);
    });
 }

-future<> set_server_config(http_context& ctx) {
-    auto rb02 = std::make_shared < api_registry_builder20 > (ctx.api_doc, "/v2");
-    return ctx.http_server.set_routes([&ctx, rb02](routes& r) {
-        set_config(rb02, ctx, r);
-    });
-}
-
 static future<> register_api(http_context& ctx, const sstring& api_name,
        const sstring api_desc,
        std::function<void(http_context& ctx, routes& r)> f) {
@@ -93,30 +86,10 @@ static future<> register_api(http_context& ctx, const sstring& api_name,
    });
 }

-future<> set_transport_controller(http_context& ctx, cql_transport::controller& ctl) {
-    return ctx.http_server.set_routes([&ctx, &ctl] (routes& r) { set_transport_controller(ctx, r, ctl); });
-}
-
-future<> unset_transport_controller(http_context& ctx) {
-    return ctx.http_server.set_routes([&ctx] (routes& r) { unset_transport_controller(ctx, r); });
-}
-
-future<> set_rpc_controller(http_context& ctx, thrift_controller& ctl) {
-    return ctx.http_server.set_routes([&ctx, &ctl] (routes& r) { set_rpc_controller(ctx, r, ctl); });
-}
-
-future<> unset_rpc_controller(http_context& ctx) {
-    return ctx.http_server.set_routes([&ctx] (routes& r) { unset_rpc_controller(ctx, r); });
-}
-
 future<> set_server_storage_service(http_context& ctx) {
    return register_api(ctx, "storage_service", "The storage service API", set_storage_service);
 }

-future<> set_server_snapshot(http_context& ctx) {
-    return ctx.http_server.set_routes([&ctx] (routes& r) { set_snapshot(ctx, r); });
-}
-
 future<> set_server_snitch(http_context& ctx) {
    return register_api(ctx, "endpoint_snitch_info", "The endpoint snitch info API", set_endpoint_snitch);
 }
@@ -180,9 +153,6 @@ future<> set_server_done(http_context& ctx) {
        rb->register_function(r, "collectd",
                "The collectd API");
        set_collectd(ctx, r);
-        rb->register_function(r, "error_injection",
-                "The error injection API");
-        set_error_injection(ctx, r);
    });
 }

--- a/api/api_init.hh
+++ b/api/api_init.hh
@@ -24,9 +24,6 @@
 #include <seastar/http/httpd.hh>

 namespace service { class load_meter; }
-namespace locator { class token_metadata; }
-namespace cql_transport { class controller; }
-class thrift_controller;

 namespace api {

@@ -37,24 +34,16 @@ struct http_context {
    distributed<database>& db;
    distributed<service::storage_proxy>& sp;
    service::load_meter& lmeter;
-    sharded<locator::token_metadata>& token_metadata;
-
    http_context(distributed<database>& _db,
            distributed<service::storage_proxy>& _sp,
-            service::load_meter& _lm, sharded<locator::token_metadata>& _tm)
-            : db(_db), sp(_sp), lmeter(_lm), token_metadata(_tm) {
+            service::load_meter& _lm)
+            : db(_db), sp(_sp), lmeter(_lm) {
    }
 };

 future<> set_server_init(http_context& ctx);
-future<> set_server_config(http_context& ctx);
 future<> set_server_snitch(http_context& ctx);
 future<> set_server_storage_service(http_context& ctx);
-future<> set_transport_controller(http_context& ctx, cql_transport::controller& ctl);
-future<> unset_transport_controller(http_context& ctx);
-future<> set_rpc_controller(http_context& ctx, thrift_controller& ctl);
-future<> unset_rpc_controller(http_context& ctx);
-future<> set_server_snapshot(http_context& ctx);
 future<> set_server_gossip(http_context& ctx);
 future<> set_server_load_sstable(http_context& ctx);
 future<> set_server_messaging_service(http_context& ctx);
--- a/api/cache_service.cc
+++ b/api/cache_service.cc
@@ -208,11 +208,9 @@ void set_cache_service(http_context& ctx, routes& r) {
    });

    cs::get_row_capacity.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return ctx.db.map_reduce0([](database& db) -> uint64_t {
-            return db.row_cache_tracker().region().occupancy().used_space();
-        }, uint64_t(0), std::plus<uint64_t>()).then([](const int64_t& res) {
-            return make_ready_future<json::json_return_type>(res);
-        });
+        return map_reduce_cf(ctx, uint64_t(0), [](const column_family& cf) {
+            return cf.get_row_cache().get_cache_tracker().region().occupancy().used_space();
+        }, std::plus<uint64_t>());
    });

    cs::get_row_hits.set(r, [&ctx] (std::unique_ptr<request> req) {
@@ -253,19 +251,15 @@ void set_cache_service(http_context& ctx, routes& r) {
    cs::get_row_size.set(r, [&ctx] (std::unique_ptr<request> req) {
        // In origin row size is the weighted size.
        // We currently do not support weights, so we use num entries instead
-        return ctx.db.map_reduce0([](database& db) -> uint64_t {
-            return db.row_cache_tracker().partitions();
-        }, uint64_t(0), std::plus<uint64_t>()).then([](const int64_t& res) {
-            return make_ready_future<json::json_return_type>(res);
-        });
+        return map_reduce_cf(ctx, 0, [](const column_family& cf) {
+            return cf.get_row_cache().partitions();
+        }, std::plus<uint64_t>());
    });

    cs::get_row_entries.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return ctx.db.map_reduce0([](database& db) -> uint64_t {
-            return db.row_cache_tracker().partitions();
-        }, uint64_t(0), std::plus<uint64_t>()).then([](const int64_t& res) {
-            return make_ready_future<json::json_return_type>(res);
-        });
+        return map_reduce_cf(ctx, 0, [](const column_family& cf) {
+            return cf.get_row_cache().partitions();
+        }, std::plus<uint64_t>());
    });

    cs::get_counter_capacity.set(r, [] (std::unique_ptr<request> req) {
--- a/api/collectd.cc
+++ b/api/collectd.cc
@@ -64,7 +64,7 @@ static const char* str_to_regex(const sstring& v) {
 void set_collectd(http_context& ctx, routes& r) {
    cd::get_collectd.set(r, [&ctx](std::unique_ptr<request> req) {

-        auto id = ::make_shared<scollectd::type_instance_id>(req->param["pluginid"],
+        auto id = make_shared<scollectd::type_instance_id>(req->param["pluginid"],
                req->get_query_param("instance"), req->get_query_param("type"),
                req->get_query_param("type_instance"));

--- a/api/column_family.cc
+++ b/api/column_family.cc
@@ -650,7 +650,7 @@ void set_column_family(http_context& ctx, routes& r) {
    cf::get_bloom_filter_disk_space_used.set(r, [&ctx] (std::unique_ptr<request> req) {
        return map_reduce_cf(ctx, req->param["name"], uint64_t(0), [] (column_family& cf) {
            return std::accumulate(cf.get_sstables()->begin(), cf.get_sstables()->end(), uint64_t(0), [](uint64_t s, auto& sst) {
-                return s + sst->filter_size();
+                return sst->filter_size();
            });
        }, std::plus<uint64_t>());
    });
@@ -658,7 +658,7 @@ void set_column_family(http_context& ctx, routes& r) {
    cf::get_all_bloom_filter_disk_space_used.set(r, [&ctx] (std::unique_ptr<request> req) {
        return map_reduce_cf(ctx, uint64_t(0), [] (column_family& cf) {
            return std::accumulate(cf.get_sstables()->begin(), cf.get_sstables()->end(), uint64_t(0), [](uint64_t s, auto& sst) {
-                return s + sst->filter_size();
+                return sst->filter_size();
            });
        }, std::plus<uint64_t>());
    });
@@ -666,7 +666,7 @@ void set_column_family(http_context& ctx, routes& r) {
    cf::get_bloom_filter_off_heap_memory_used.set(r, [&ctx] (std::unique_ptr<request> req) {
        return map_reduce_cf(ctx, req->param["name"], uint64_t(0), [] (column_family& cf) {
            return std::accumulate(cf.get_sstables()->begin(), cf.get_sstables()->end(), uint64_t(0), [](uint64_t s, auto& sst) {
-                return s + sst->filter_memory_size();
+                return sst->filter_memory_size();
            });
        }, std::plus<uint64_t>());
    });
@@ -674,7 +674,7 @@ void set_column_family(http_context& ctx, routes& r) {
    cf::get_all_bloom_filter_off_heap_memory_used.set(r, [&ctx] (std::unique_ptr<request> req) {
        return map_reduce_cf(ctx, uint64_t(0), [] (column_family& cf) {
            return std::accumulate(cf.get_sstables()->begin(), cf.get_sstables()->end(), uint64_t(0), [](uint64_t s, auto& sst) {
-                return s + sst->filter_memory_size();
+                return sst->filter_memory_size();
            });
        }, std::plus<uint64_t>());
    });
@@ -682,7 +682,7 @@ void set_column_family(http_context& ctx, routes& r) {
    cf::get_index_summary_off_heap_memory_used.set(r, [&ctx] (std::unique_ptr<request> req) {
        return map_reduce_cf(ctx, req->param["name"], uint64_t(0), [] (column_family& cf) {
            return std::accumulate(cf.get_sstables()->begin(), cf.get_sstables()->end(), uint64_t(0), [](uint64_t s, auto& sst) {
-                return s + sst->get_summary().memory_footprint();
+                return sst->get_summary().memory_footprint();
            });
        }, std::plus<uint64_t>());
    });
@@ -690,7 +690,7 @@ void set_column_family(http_context& ctx, routes& r) {
    cf::get_all_index_summary_off_heap_memory_used.set(r, [&ctx] (std::unique_ptr<request> req) {
        return map_reduce_cf(ctx, uint64_t(0), [] (column_family& cf) {
            return std::accumulate(cf.get_sstables()->begin(), cf.get_sstables()->end(), uint64_t(0), [](uint64_t s, auto& sst) {
-                return s + sst->get_summary().memory_footprint();
+                return sst->get_summary().memory_footprint();
            });
        }, std::plus<uint64_t>());
    });
@@ -804,14 +804,14 @@ void set_column_family(http_context& ctx, routes& r) {

    cf::get_cas_propose.set(r, [&ctx] (std::unique_ptr<request> req) {
        return map_reduce_cf(ctx, req->param["name"], utils::estimated_histogram(0), [](column_family& cf) {
-            return cf.get_stats().estimated_cas_accept;
+            return cf.get_stats().estimated_cas_propose;
        },
        utils::estimated_histogram_merge, utils_json::estimated_histogram());
    });

    cf::get_cas_commit.set(r, [&ctx] (std::unique_ptr<request> req) {
        return map_reduce_cf(ctx, req->param["name"], utils::estimated_histogram(0), [](column_family& cf) {
-            return cf.get_stats().estimated_cas_learn;
+            return cf.get_stats().estimated_cas_commit;
        },
        utils::estimated_histogram_merge, utils_json::estimated_histogram());
    });
@@ -839,26 +839,11 @@ void set_column_family(http_context& ctx, routes& r) {
        return make_ready_future<json::json_return_type>(res);
    });

-    cf::get_auto_compaction.set(r, [&ctx] (const_req req) {
-        const utils::UUID& uuid = get_uuid(req.param["name"], ctx.db.local());
-        column_family& cf = ctx.db.local().find_column_family(uuid);
-        return !cf.is_auto_compaction_disabled_by_user();
-    });
-
-    cf::enable_auto_compaction.set(r, [&ctx](std::unique_ptr<request> req) {
-        return foreach_column_family(ctx, req->param["name"], [](column_family &cf) {
-            cf.enable_auto_compaction();
-        }).then([] {
-            return make_ready_future<json::json_return_type>(json_void());
-        });
-    });
-
-    cf::disable_auto_compaction.set(r, [&ctx](std::unique_ptr<request> req) {
-        return foreach_column_family(ctx, req->param["name"], [](column_family &cf) {
-            cf.disable_auto_compaction();
-        }).then([] {
-            return make_ready_future<json::json_return_type>(json_void());
-        });
+    cf::is_auto_compaction_disabled.set(r, [] (const_req req) {
+        // FIXME
+        // currently auto compaction is disable
+        // it should be changed when it would have an API
+        return true;
    });

    cf::get_built_indexes.set(r, [&ctx](std::unique_ptr<request> req) {
@@ -1009,15 +994,5 @@ void set_column_family(http_context& ctx, routes& r) {
        });
    });

-    cf::force_major_compaction.set(r, [&ctx](std::unique_ptr<request> req) {
-        if (req->get_query_param("split_output") != "") {
-            fail(unimplemented::cause::API);
-        }
-        return foreach_column_family(ctx, req->param["name"], [](column_family &cf) {
-            return cf.compact_all_sstables();
-        }).then([] {
-            return make_ready_future<json::json_return_type>(json_void());
-        });
-    });
 }
 }
--- a/api/commitlog.cc
+++ b/api/commitlog.cc
@@ -20,7 +20,7 @@
 */

 #include "commitlog.hh"
-#include "db/commitlog/commitlog.hh"
+#include <db/commitlog/commitlog.hh>
 #include "api/api-doc/commitlog.json.hh"
 #include "database.hh"
 #include <vector>
--- a/api/error_injection.cc
+++ b/api/error_injection.cc
@@ -1,69 +0,0 @@
-/*
- * Copyright (C) 2020 ScyllaDB
- */
-
-/*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include "api/api-doc/error_injection.json.hh"
-#include "api/api.hh"
-
-#include <seastar/http/exception.hh>
-#include "log.hh"
-#include "utils/error_injection.hh"
-#include "seastar/core/future-util.hh"
-
-namespace api {
-
-namespace hf = httpd::error_injection_json;
-
-void set_error_injection(http_context& ctx, routes& r) {
-
-    hf::enable_injection.set(r, [](std::unique_ptr<request> req) {
-        sstring injection = req->param["injection"];
-        bool one_shot = req->get_query_param("one_shot") == "True";
-        auto& errinj = utils::get_local_injector();
-        return errinj.enable_on_all(injection, one_shot).then([] {
-            return make_ready_future<json::json_return_type>(json::json_void());
-        });
-    });
-
-    hf::get_enabled_injections_on_all.set(r, [](std::unique_ptr<request> req) {
-        auto& errinj = utils::get_local_injector();
-        auto ret = errinj.enabled_injections_on_all();
-        return make_ready_future<json::json_return_type>(ret);
-    });
-
-    hf::disable_injection.set(r, [](std::unique_ptr<request> req) {
-        sstring injection = req->param["injection"];
-
-        auto& errinj = utils::get_local_injector();
-        return errinj.disable_on_all(injection).then([] {
-            return make_ready_future<json::json_return_type>(json::json_void());
-        });
-    });
-
-    hf::disable_on_all.set(r, [](std::unique_ptr<request> req) {
-        auto& errinj = utils::get_local_injector();
-        return errinj.disable_on_all().then([] {
-            return make_ready_future<json::json_return_type>(json::json_void());
-        });
-    });
-
-}
-
-} // namespace api
--- a/api/error_injection.hh
+++ b/api/error_injection.hh
@@ -1,30 +0,0 @@
-/*
- * Copyright (C) 2019 ScyllaDB
- */
-
-/*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#pragma once
-
-#include "api.hh"
-
-namespace api {
-
-void set_error_injection(http_context& ctx, routes& r);
-
-}
--- a/api/gossiper.cc
+++ b/api/gossiper.cc
@@ -21,7 +21,7 @@

 #include "gossiper.hh"
 #include "api/api-doc/gossiper.json.hh"
-#include "gms/gossiper.hh"
+#include <gms/gossiper.hh>

 namespace api {
 using namespace json;
--- a/api/storage_proxy.cc
+++ b/api/storage_proxy.cc
@@ -27,7 +27,6 @@
 #include "db/config.hh"
 #include "utils/histogram.hh"
 #include "database.hh"
-#include "seastar/core/scheduling_specific.hh"

 namespace api {

@@ -35,70 +34,12 @@ namespace sp = httpd::storage_proxy_json;
 using proxy = service::storage_proxy;
 using namespace json;

-
-/**
- * This function implement a two dimentional map reduce where
- * the first level is a distributed storage_proxy class and the
- * second level is the stats per scheduling group class.
- * @param d -  a reference to the storage_proxy distributed class.
- * @param mapper -  the internal mapper that is used to map the internal
- * stat class into a value of type `V`.
- * @param reducer - the reducer that is used in both outer and inner
- * aggregations.
- * @param initial_value - the initial value to use for both aggregations
- * @return A future that resolves to the result of the aggregation.
- */
-template<typename V, typename Reducer, typename InnerMapper>
-future<V> two_dimensional_map_reduce(distributed<service::storage_proxy>& d,
-        InnerMapper mapper, Reducer reducer, V initial_value) {
-    return d.map_reduce0( [mapper, reducer, initial_value] (const service::storage_proxy& sp) {
-        return map_reduce_scheduling_group_specific<service::storage_proxy_stats::stats>(
-                mapper, reducer, initial_value, sp.get_stats_key());
-    }, initial_value, reducer);
+static future<utils::rate_moving_average>  sum_timed_rate(distributed<proxy>& d, utils::timed_rate_moving_average proxy::stats::*f) {
+    return d.map_reduce0([f](const proxy& p) {return (p.get_stats().*f).rate();}, utils::rate_moving_average(),
+            std::plus<utils::rate_moving_average>());
 }

-/**
- * This function implement a two dimentional map reduce where
- * the first level is a distributed storage_proxy class and the
- * second level is the stats per scheduling group class.
- * @param d -  a reference to the storage_proxy distributed class.
- * @param f - a field pointer which is the implicit internal reducer.
- * @param reducer - the reducer that is used in both outer and inner
- * aggregations.
- * @param initial_value - the initial value to use for both aggregations* @return
- * @return A future that resolves to the result of the aggregation.
- */
-template<typename V, typename Reducer, typename F>
-future<V> two_dimensional_map_reduce(distributed<service::storage_proxy>& d,
-        V F::*f, Reducer reducer, V initial_value) {
-    return two_dimensional_map_reduce(d, [f] (F& stats) {
-        return stats.*f;
-    }, reducer, initial_value);
-}
-
-/**
- * A partial Specialization of sum_stats for the storage proxy
- * case where the get stats function doesn't return a
- * stats object with fields but a per scheduling group
- * stats object, the name was also changed since functions
- * partial specialization is not supported in C++.
- *
- */
-template<typename V, typename F>
-future<json::json_return_type>  sum_stats_storage_proxy(distributed<proxy>& d, V F::*f) {
-    return two_dimensional_map_reduce(d, [f] (F& stats) { return stats.*f; }, std::plus<V>(), V(0)).then([] (V val) {
-        return make_ready_future<json::json_return_type>(val);
-    });
-}
-
-
-static future<utils::rate_moving_average>  sum_timed_rate(distributed<proxy>& d, utils::timed_rate_moving_average service::storage_proxy_stats::stats::*f) {
-    return two_dimensional_map_reduce(d, [f] (service::storage_proxy_stats::stats& stats) {
-        return (stats.*f).rate();
-    }, std::plus<utils::rate_moving_average>(), utils::rate_moving_average());
-}
-
-static future<json::json_return_type>  sum_timed_rate_as_obj(distributed<proxy>& d, utils::timed_rate_moving_average service::storage_proxy_stats::stats::*f) {
+static future<json::json_return_type>  sum_timed_rate_as_obj(distributed<proxy>& d, utils::timed_rate_moving_average proxy::stats::*f) {
    return sum_timed_rate(d, f).then([](const utils::rate_moving_average& val) {
        httpd::utils_json::rate_moving_average m;
        m = val;
@@ -110,89 +51,29 @@ httpd::utils_json::rate_moving_average_and_histogram get_empty_moving_average()
    return timer_to_json(utils::rate_moving_average_and_histogram());
 }

-static future<json::json_return_type>  sum_timed_rate_as_long(distributed<proxy>& d, utils::timed_rate_moving_average service::storage_proxy_stats::stats::*f) {
+static future<json::json_return_type>  sum_timed_rate_as_long(distributed<proxy>& d, utils::timed_rate_moving_average proxy::stats::*f) {
    return sum_timed_rate(d, f).then([](const utils::rate_moving_average& val) {
        return make_ready_future<json::json_return_type>(val.count);
    });
 }

-utils_json::estimated_histogram time_to_json_histogram(const utils::time_estimated_histogram& val) {
-    utils_json::estimated_histogram res;
-    for (size_t i = 0; i < val.size(); i++) {
-        res.buckets.push(val.get(i));
-        res.bucket_offsets.push(val.get_bucket_lower_limit(i));
-    }
-    return res;
-}
-
-static future<json::json_return_type>  sum_estimated_histogram(http_context& ctx, utils::time_estimated_histogram service::storage_proxy_stats::stats::*f) {
-
-    return two_dimensional_map_reduce(ctx.sp, f, utils::time_estimated_histogram_merge,
-            utils::time_estimated_histogram()).then([](const utils::time_estimated_histogram& val) {
-        return make_ready_future<json::json_return_type>(time_to_json_histogram(val));
-    });
-}
-
-static future<json::json_return_type>  sum_estimated_histogram(http_context& ctx, utils::estimated_histogram service::storage_proxy_stats::stats::*f) {
-
-    return two_dimensional_map_reduce(ctx.sp, f, utils::estimated_histogram_merge,
-            utils::estimated_histogram()).then([](const utils::estimated_histogram& val) {
+static future<json::json_return_type>  sum_estimated_histogram(http_context& ctx, utils::estimated_histogram proxy::stats::*f) {
+    return ctx.sp.map_reduce0([f](const proxy& p) {return p.get_stats().*f;}, utils::estimated_histogram(),
+            utils::estimated_histogram_merge).then([](const utils::estimated_histogram& val) {
        utils_json::estimated_histogram res;
        res = val;
        return make_ready_future<json::json_return_type>(res);
    });
 }

-static future<json::json_return_type>  total_latency(http_context& ctx, utils::timed_rate_moving_average_and_histogram service::storage_proxy_stats::stats::*f) {
-    return two_dimensional_map_reduce(ctx.sp, [f] (service::storage_proxy_stats::stats& stats) {
-            return (stats.*f).hist.mean * (stats.*f).hist.count;
-        }, std::plus<double>(), 0.0).then([](double val) {
+static future<json::json_return_type>  total_latency(http_context& ctx, utils::timed_rate_moving_average_and_histogram proxy::stats::*f) {
+    return ctx.sp.map_reduce0([f](const proxy& p) {return (p.get_stats().*f).hist.mean * (p.get_stats().*f).hist.count;}, 0.0,
+            std::plus<double>()).then([](double val) {
        int64_t res = val;
        return make_ready_future<json::json_return_type>(res);
    });
 }

-/**
- * A partial Specialization of sum_histogram_stats
- * for the storage proxy case where the get stats
- * function doesn't return a stats object with
- * fields but a per scheduling group stats object,
- * the name was also changed since function partial
- * specialization is not supported in C++.
- */
-template<typename F>
-future<json::json_return_type>
-sum_histogram_stats_storage_proxy(distributed<proxy>& d,
-        utils::timed_rate_moving_average_and_histogram F::*f) {
-    return two_dimensional_map_reduce(d, [f] (service::storage_proxy_stats::stats& stats) {
-        return (stats.*f).hist;
-    }, std::plus<utils::ihistogram>(), utils::ihistogram()).
-            then([](const utils::ihistogram& val) {
-        return make_ready_future<json::json_return_type>(to_json(val));
-    });
-}
-
-/**
- * A partial Specialization of sum_timer_stats for the
- * storage proxy case where the get stats function
- * doesn't return a stats object with fields but a
- * per scheduling group stats object, the name
- * was also changed since partial function specialization
- * is not supported in C++.
- */
-template<typename F>
-future<json::json_return_type>
-sum_timer_stats_storage_proxy(distributed<proxy>& d,
-        utils::timed_rate_moving_average_and_histogram F::*f) {
-
-    return two_dimensional_map_reduce(d, [f] (service::storage_proxy_stats::stats& stats) {
-        return (stats.*f).rate();
-    }, std::plus<utils::rate_moving_average_and_histogram>(),
-            utils::rate_moving_average_and_histogram()).then([](const utils::rate_moving_average_and_histogram& val) {
-        return make_ready_future<json::json_return_type>(timer_to_json(val));
-    });
-}
-
 void set_storage_proxy(http_context& ctx, routes& r) {
    sp::get_total_hints.set(r, [](std::unique_ptr<request> req)  {
        //TBD
@@ -342,15 +223,15 @@ void set_storage_proxy(http_context& ctx, routes& r) {
    });

    sp::get_read_repair_attempted.set(r, [&ctx](std::unique_ptr<request> req)  {
-        return sum_stats_storage_proxy(ctx.sp, &service::storage_proxy_stats::stats::read_repair_attempts);
+        return sum_stats(ctx.sp, &proxy::stats::read_repair_attempts);
    });

    sp::get_read_repair_repaired_blocking.set(r, [&ctx](std::unique_ptr<request> req)  {
-        return sum_stats_storage_proxy(ctx.sp, &service::storage_proxy_stats::stats::read_repair_repaired_blocking);
+        return sum_stats(ctx.sp, &proxy::stats::read_repair_repaired_blocking);
    });

    sp::get_read_repair_repaired_background.set(r, [&ctx](std::unique_ptr<request> req)  {
-        return sum_stats_storage_proxy(ctx.sp, &service::storage_proxy_stats::stats::read_repair_repaired_background);
+        return sum_stats(ctx.sp, &proxy::stats::read_repair_repaired_background);
    });

    sp::get_schema_versions.set(r, [](std::unique_ptr<request> req)  {
@@ -394,10 +275,6 @@ void set_storage_proxy(http_context& ctx, routes& r) {
        return sum_stats(ctx.sp, &proxy::stats::cas_write_condition_not_met);
    });

-    sp::get_cas_write_metrics_failed_read_round_optimization.set(r, [&ctx](std::unique_ptr<request> req) {
-        return sum_stats(ctx.sp, &proxy::stats::cas_failed_read_round_optimization);
-    });
-
    sp::get_cas_read_metrics_unfinished_commit.set(r, [&ctx](std::unique_ptr<request> req) {
        return sum_stats(ctx.sp, &proxy::stats::cas_read_unfinished_commit);
    });
@@ -407,71 +284,71 @@ void set_storage_proxy(http_context& ctx, routes& r) {
    });

    sp::get_read_metrics_timeouts.set(r, [&ctx](std::unique_ptr<request> req) {
-        return sum_timed_rate_as_long(ctx.sp, &service::storage_proxy_stats::stats::read_timeouts);
+        return sum_timed_rate_as_long(ctx.sp, &proxy::stats::read_timeouts);
    });

    sp::get_read_metrics_unavailables.set(r, [&ctx](std::unique_ptr<request> req) {
-        return sum_timed_rate_as_long(ctx.sp, &service::storage_proxy_stats::stats::read_unavailables);
+        return sum_timed_rate_as_long(ctx.sp, &proxy::stats::read_unavailables);
    });

    sp::get_range_metrics_timeouts.set(r, [&ctx](std::unique_ptr<request> req) {
-        return sum_timed_rate_as_long(ctx.sp, &service::storage_proxy_stats::stats::range_slice_timeouts);
+        return sum_timed_rate_as_long(ctx.sp, &proxy::stats::range_slice_timeouts);
    });

    sp::get_range_metrics_unavailables.set(r, [&ctx](std::unique_ptr<request> req) {
-        return sum_timed_rate_as_long(ctx.sp, &service::storage_proxy_stats::stats::range_slice_unavailables);
+        return sum_timed_rate_as_long(ctx.sp, &proxy::stats::range_slice_unavailables);
    });

    sp::get_write_metrics_timeouts.set(r, [&ctx](std::unique_ptr<request> req) {
-        return sum_timed_rate_as_long(ctx.sp, &service::storage_proxy_stats::stats::write_timeouts);
+        return sum_timed_rate_as_long(ctx.sp, &proxy::stats::write_timeouts);
    });

    sp::get_write_metrics_unavailables.set(r, [&ctx](std::unique_ptr<request> req) {
-        return sum_timed_rate_as_long(ctx.sp, &service::storage_proxy_stats::stats::write_unavailables);
+        return sum_timed_rate_as_long(ctx.sp, &proxy::stats::write_unavailables);
    });

    sp::get_read_metrics_timeouts_rates.set(r, [&ctx](std::unique_ptr<request> req) {
-        return sum_timed_rate_as_obj(ctx.sp, &service::storage_proxy_stats::stats::read_timeouts);
+        return sum_timed_rate_as_obj(ctx.sp, &proxy::stats::read_timeouts);
    });

    sp::get_read_metrics_unavailables_rates.set(r, [&ctx](std::unique_ptr<request> req) {
-        return sum_timed_rate_as_obj(ctx.sp, &service::storage_proxy_stats::stats::read_unavailables);
+        return sum_timed_rate_as_obj(ctx.sp, &proxy::stats::read_unavailables);
    });

    sp::get_range_metrics_timeouts_rates.set(r, [&ctx](std::unique_ptr<request> req) {
-        return sum_timed_rate_as_obj(ctx.sp, &service::storage_proxy_stats::stats::range_slice_timeouts);
+        return sum_timed_rate_as_obj(ctx.sp, &proxy::stats::range_slice_timeouts);
    });

    sp::get_range_metrics_unavailables_rates.set(r, [&ctx](std::unique_ptr<request> req) {
-        return sum_timed_rate_as_obj(ctx.sp, &service::storage_proxy_stats::stats::range_slice_unavailables);
+        return sum_timed_rate_as_obj(ctx.sp, &proxy::stats::range_slice_unavailables);
    });

    sp::get_write_metrics_timeouts_rates.set(r, [&ctx](std::unique_ptr<request> req) {
-        return sum_timed_rate_as_obj(ctx.sp, &service::storage_proxy_stats::stats::write_timeouts);
+        return sum_timed_rate_as_obj(ctx.sp, &proxy::stats::write_timeouts);
    });

    sp::get_write_metrics_unavailables_rates.set(r, [&ctx](std::unique_ptr<request> req) {
-        return sum_timed_rate_as_obj(ctx.sp, &service::storage_proxy_stats::stats::write_unavailables);
+        return sum_timed_rate_as_obj(ctx.sp, &proxy::stats::write_unavailables);
    });

    sp::get_range_metrics_latency_histogram_depricated.set(r, [&ctx](std::unique_ptr<request> req) {
-        return sum_histogram_stats_storage_proxy(ctx.sp, &service::storage_proxy_stats::stats::range);
+        return sum_histogram_stats(ctx.sp, &proxy::stats::range);
    });

    sp::get_write_metrics_latency_histogram_depricated.set(r, [&ctx](std::unique_ptr<request> req) {
-        return sum_histogram_stats_storage_proxy(ctx.sp, &service::storage_proxy_stats::stats::write);
+        return sum_histogram_stats(ctx.sp, &proxy::stats::write);
    });

    sp::get_read_metrics_latency_histogram_depricated.set(r, [&ctx](std::unique_ptr<request> req) {
-        return sum_histogram_stats_storage_proxy(ctx.sp, &service::storage_proxy_stats::stats::read);
+        return sum_histogram_stats(ctx.sp, &proxy::stats::read);
    });

    sp::get_range_metrics_latency_histogram.set(r, [&ctx](std::unique_ptr<request> req) {
-        return sum_timer_stats_storage_proxy(ctx.sp, &service::storage_proxy_stats::stats::range);
+        return sum_timer_stats(ctx.sp, &proxy::stats::range);
    });

    sp::get_write_metrics_latency_histogram.set(r, [&ctx](std::unique_ptr<request> req) {
-        return sum_timer_stats_storage_proxy(ctx.sp, &service::storage_proxy_stats::stats::write);
+        return sum_timer_stats(ctx.sp, &proxy::stats::write);
    });
    sp::get_cas_write_metrics_latency_histogram.set(r, [&ctx](std::unique_ptr<request> req) {
        return sum_timer_stats(ctx.sp, &proxy::stats::cas_write);
@@ -490,30 +367,30 @@ void set_storage_proxy(http_context& ctx, routes& r) {
    });

    sp::get_read_metrics_latency_histogram.set(r, [&ctx](std::unique_ptr<request> req) {
-        return sum_timer_stats_storage_proxy(ctx.sp, &service::storage_proxy_stats::stats::read);
+        return sum_timer_stats(ctx.sp, &proxy::stats::read);
    });

    sp::get_read_estimated_histogram.set(r, [&ctx](std::unique_ptr<request> req) {
-        return sum_estimated_histogram(ctx, &service::storage_proxy_stats::stats::estimated_read);
+        return sum_estimated_histogram(ctx, &proxy::stats::estimated_read);
    });

    sp::get_read_latency.set(r, [&ctx](std::unique_ptr<request> req) {
-        return total_latency(ctx, &service::storage_proxy_stats::stats::read);
+        return total_latency(ctx, &proxy::stats::read);
    });
    sp::get_write_estimated_histogram.set(r, [&ctx](std::unique_ptr<request> req) {
-        return sum_estimated_histogram(ctx, &service::storage_proxy_stats::stats::estimated_write);
+        return sum_estimated_histogram(ctx, &proxy::stats::estimated_write);
    });

    sp::get_write_latency.set(r, [&ctx](std::unique_ptr<request> req) {
-        return total_latency(ctx, &service::storage_proxy_stats::stats::write);
+        return total_latency(ctx, &proxy::stats::write);
    });

    sp::get_range_estimated_histogram.set(r, [&ctx](std::unique_ptr<request> req) {
-        return sum_timer_stats_storage_proxy(ctx.sp, &service::storage_proxy_stats::stats::range);
+        return sum_timer_stats(ctx.sp, &proxy::stats::range);
    });

    sp::get_range_latency.set(r, [&ctx](std::unique_ptr<request> req) {
-        return total_latency(ctx, &service::storage_proxy_stats::stats::range);
+        return total_latency(ctx, &proxy::stats::range);
    });
 }

--- a/api/storage_service.cc
+++ b/api/storage_service.cc
@@ -41,8 +41,8 @@
 #include "sstables/sstables.hh"
 #include "database.hh"
 #include "db/extensions.hh"
-#include "transport/controller.hh"
-#include "thrift/controller.hh"
+
+sstables::sstable::version_types get_highest_supported_format();

 namespace api {

@@ -74,97 +74,35 @@ static ss::token_range token_range_endpoints_to_json(const dht::token_range_endp
    return r;
 }

-using ks_cf_func = std::function<future<json::json_return_type>(http_context&, std::unique_ptr<request>, sstring, std::vector<sstring>)>;
-
-static auto wrap_ks_cf(http_context &ctx, ks_cf_func f) {
-    return [&ctx, f = std::move(f)](std::unique_ptr<request> req) {
-        auto keyspace = validate_keyspace(ctx, req->param);
-        auto column_families = split_cf(req->get_query_param("cf"));
-        if (column_families.empty()) {
-            column_families = map_keys(ctx.db.local().find_keyspace(keyspace).metadata().get()->cf_meta_data());
-        }
-        return f(ctx, std::move(req), std::move(keyspace), std::move(column_families));
-    };
-}
-
-future<json::json_return_type> set_tables_autocompaction(http_context& ctx, const sstring &keyspace, std::vector<sstring> tables, bool enabled) {
-    if (tables.empty()) {
-        tables = map_keys(ctx.db.local().find_keyspace(keyspace).metadata().get()->cf_meta_data());
-    }
-
-    return service::get_local_storage_service().set_tables_autocompaction(keyspace, tables, enabled).then([]{
-        return make_ready_future<json::json_return_type>(json_void());
-    });
-}
-
-void set_transport_controller(http_context& ctx, routes& r, cql_transport::controller& ctl) {
-    ss::start_native_transport.set(r, [&ctl](std::unique_ptr<request> req) {
-        return ctl.start_server().then([] {
-            return make_ready_future<json::json_return_type>(json_void());
-        });
-    });
-
-    ss::stop_native_transport.set(r, [&ctl](std::unique_ptr<request> req) {
-        return ctl.stop_server().then([] {
-            return make_ready_future<json::json_return_type>(json_void());
-        });
-    });
-
-    ss::is_native_transport_running.set(r, [&ctl] (std::unique_ptr<request> req) {
-        return ctl.is_server_running().then([] (bool running) {
-            return make_ready_future<json::json_return_type>(running);
-        });
-    });
-}
-
-void unset_transport_controller(http_context& ctx, routes& r) {
-    ss::start_native_transport.unset(r);
-    ss::stop_native_transport.unset(r);
-    ss::is_native_transport_running.unset(r);
-}
-
-void set_rpc_controller(http_context& ctx, routes& r, thrift_controller& ctl) {
-    ss::stop_rpc_server.set(r, [&ctl](std::unique_ptr<request> req) {
-        return ctl.stop_server().then([] {
-            return make_ready_future<json::json_return_type>(json_void());
-        });
-    });
-
-    ss::start_rpc_server.set(r, [&ctl](std::unique_ptr<request> req) {
-        return ctl.start_server().then([] {
-            return make_ready_future<json::json_return_type>(json_void());
-        });
-    });
-
-    ss::is_rpc_server_running.set(r, [&ctl] (std::unique_ptr<request> req) {
-        return ctl.is_server_running().then([] (bool running) {
-            return make_ready_future<json::json_return_type>(running);
-        });
-    });
-}
-
-void unset_rpc_controller(http_context& ctx, routes& r) {
-    ss::stop_rpc_server.unset(r);
-    ss::start_rpc_server.unset(r);
-    ss::is_rpc_server_running.unset(r);
-}
-
 void set_storage_service(http_context& ctx, routes& r) {
+    using ks_cf_func = std::function<future<json::json_return_type>(std::unique_ptr<request>, sstring, std::vector<sstring>)>;
+
+    auto wrap_ks_cf = [&ctx](ks_cf_func f) {
+        return [&ctx, f = std::move(f)](std::unique_ptr<request> req) {
+            auto keyspace = validate_keyspace(ctx, req->param);
+            auto column_families = split_cf(req->get_query_param("cf"));
+            if (column_families.empty()) {
+                column_families = map_keys(ctx.db.local().find_keyspace(keyspace).metadata().get()->cf_meta_data());
+            }
+            return f(std::move(req), std::move(keyspace), std::move(column_families));
+        };
+    };
+
    ss::local_hostid.set(r, [](std::unique_ptr<request> req) {
        return db::system_keyspace::get_local_host_id().then([](const utils::UUID& id) {
            return make_ready_future<json::json_return_type>(id.to_sstring());
        });
    });

-    ss::get_tokens.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return make_ready_future<json::json_return_type>(stream_range_as_array(ctx.token_metadata.local().sorted_tokens(), [](const dht::token& i) {
+    ss::get_tokens.set(r, [] (std::unique_ptr<request> req) {
+        return make_ready_future<json::json_return_type>(stream_range_as_array(service::get_local_storage_service().get_token_metadata().sorted_tokens(), [](const dht::token& i) {
           return boost::lexical_cast<std::string>(i);
        }));
    });

-    ss::get_node_tokens.set(r, [&ctx] (std::unique_ptr<request> req) {
+    ss::get_node_tokens.set(r, [] (std::unique_ptr<request> req) {
        gms::inet_address addr(req->param["endpoint"]);
-        return make_ready_future<json::json_return_type>(stream_range_as_array(ctx.token_metadata.local().get_tokens(addr), [](const dht::token& i) {
+        return make_ready_future<json::json_return_type>(stream_range_as_array(service::get_local_storage_service().get_token_metadata().get_tokens(addr), [](const dht::token& i) {
           return boost::lexical_cast<std::string>(i);
       }));
    });
@@ -182,8 +120,8 @@ void set_storage_service(http_context& ctx, routes& r) {
        }));
    });

-    ss::get_leaving_nodes.set(r, [&ctx](const_req req) {
-        return container_to_vec(ctx.token_metadata.local().get_leaving_endpoints());
+    ss::get_leaving_nodes.set(r, [](const_req req) {
+        return container_to_vec(service::get_local_storage_service().get_token_metadata().get_leaving_endpoints());
    });

    ss::get_moving_nodes.set(r, [](const_req req) {
@@ -191,8 +129,8 @@ void set_storage_service(http_context& ctx, routes& r) {
        return container_to_vec(addr);
    });

-    ss::get_joining_nodes.set(r, [&ctx](const_req req) {
-        auto points = ctx.token_metadata.local().get_bootstrap_tokens();
+    ss::get_joining_nodes.set(r, [](const_req req) {
+        auto points = service::get_local_storage_service().get_token_metadata().get_bootstrap_tokens();
        std::unordered_set<sstring> addr;
        for (auto i: points) {
            addr.insert(boost::lexical_cast<std::string>(i.second));
@@ -244,9 +182,10 @@ void set_storage_service(http_context& ctx, routes& r) {
        return make_ready_future<json::json_return_type>(stream_range_as_array(service::get_local_storage_service().describe_ring(keyspace), token_range_endpoints_to_json));
    });

-    ss::get_host_id_map.set(r, [&ctx](const_req req) {
+    ss::get_host_id_map.set(r, [](const_req req) {
        std::vector<ss::mapper> res;
-        return map_to_key_value(ctx.token_metadata.local().get_endpoint_to_host_id_map_for_reading(), res);
+        return map_to_key_value(service::get_local_storage_service().
+                get_token_metadata().get_endpoint_to_host_id_map_for_reading(), res);
    });

    ss::get_load.set(r, [&ctx](std::unique_ptr<request> req) {
@@ -279,12 +218,67 @@ void set_storage_service(http_context& ctx, routes& r) {
                req.get_query_param("key")));
    });

-    ss::cdc_streams_check_and_repair.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return service::get_local_storage_service().check_and_repair_cdc_streams().then([] {
+    ss::get_snapshot_details.set(r, [](std::unique_ptr<request> req) {
+        return service::get_local_storage_service().get_snapshot_details().then([] (auto result) {
+            std::vector<ss::snapshots> res;
+            for (auto& map: result) {
+                ss::snapshots all_snapshots;
+                all_snapshots.key = map.first;
+
+                std::vector<ss::snapshot> snapshot;
+                for (auto& cf: map.second) {
+                    ss::snapshot s;
+                    s.ks = cf.ks;
+                    s.cf = cf.cf;
+                    s.live = cf.live;
+                    s.total = cf.total;
+                    snapshot.push_back(std::move(s));
+                }
+                all_snapshots.value = std::move(snapshot);
+                res.push_back(std::move(all_snapshots));
+            }
+            return make_ready_future<json::json_return_type>(std::move(res));
+        });
+    });
+
+    ss::take_snapshot.set(r, [](std::unique_ptr<request> req) {
+        auto tag = req->get_query_param("tag");
+        auto column_family = req->get_query_param("cf");
+
+        std::vector<sstring> keynames = split(req->get_query_param("kn"), ",");
+
+        auto resp = make_ready_future<>();
+        if (column_family.empty()) {
+            resp = service::get_local_storage_service().take_snapshot(tag, keynames);
+        } else {
+            if (keynames.empty()) {
+                throw httpd::bad_param_exception("The keyspace of column families must be specified");
+            }
+            if (keynames.size() > 1) {
+                throw httpd::bad_param_exception("Only one keyspace allowed when specifying a column family");
+            }
+            resp = service::get_local_storage_service().take_column_family_snapshot(keynames[0], column_family, tag);
+        }
+        return resp.then([] {
            return make_ready_future<json::json_return_type>(json_void());
        });
    });

+    ss::del_snapshot.set(r, [](std::unique_ptr<request> req) {
+        auto tag = req->get_query_param("tag");
+
+        std::vector<sstring> keynames = split(req->get_query_param("kn"), ",");
+        return service::get_local_storage_service().clear_snapshot(tag, keynames).then([] {
+            return make_ready_future<json::json_return_type>(json_void());
+        });
+    });
+
+    ss::true_snapshots_size.set(r, [](std::unique_ptr<request> req) {
+        return service::get_local_storage_service().true_snapshots_size().then([] (int64_t size) {
+            return make_ready_future<json::json_return_type>(size);
+        });
+    });
+
    ss::force_keyspace_compaction.set(r, [&ctx](std::unique_ptr<request> req) {
        auto keyspace = validate_keyspace(ctx, req->param);
        auto column_families = split_cf(req->get_query_param("cf"));
@@ -322,8 +316,8 @@ void set_storage_service(http_context& ctx, routes& r) {
                for (auto cf : column_families) {
                    column_families_vec.push_back(&db.find_column_family(keyspace, cf));
                }
-                return parallel_for_each(column_families_vec, [&cm, &db] (column_family* cf) {
-                    return cm.perform_cleanup(db, cf);
+                return parallel_for_each(column_families_vec, [&cm] (column_family* cf) {
+                    return cm.perform_cleanup(cf);
                });
            }).then([]{
                return make_ready_future<json::json_return_type>(0);
@@ -331,7 +325,32 @@ void set_storage_service(http_context& ctx, routes& r) {
        });
    });

-    ss::upgrade_sstables.set(r, wrap_ks_cf(ctx, [] (http_context& ctx, std::unique_ptr<request> req, sstring keyspace, std::vector<sstring> column_families) {
+    ss::scrub.set(r, wrap_ks_cf([&ctx](std::unique_ptr<request> req, sstring keyspace, std::vector<sstring> column_families) {
+        // TODO: respect this
+        auto skip_corrupted = req->get_query_param("skip_corrupted");
+
+        auto f = make_ready_future<>();
+        if (!req_param<bool>(*req, "disable_snapshot", false)) {
+            auto tag = format("pre-scrub-{:d}", db_clock::now().time_since_epoch().count());
+            f = parallel_for_each(column_families, [keyspace, tag](sstring cf) {
+                return service::get_local_storage_service().take_column_family_snapshot(keyspace, cf, tag);
+            });
+        }
+
+        return f.then([&ctx, keyspace, column_families] {
+            return ctx.db.invoke_on_all([=] (database& db) {
+                return do_for_each(column_families, [=, &db](sstring cfname) {
+                    auto& cm = db.get_compaction_manager();
+                    auto& cf = db.find_column_family(keyspace, cfname);
+                    return cm.perform_sstable_scrub(&cf);
+                });
+            });
+        }).then([]{
+            return make_ready_future<json::json_return_type>(0);
+        });
+    }));
+
+    ss::upgrade_sstables.set(r, wrap_ks_cf([&ctx](std::unique_ptr<request> req, sstring keyspace, std::vector<sstring> column_families) {
        bool exclude_current_version = req_param<bool>(*req, "exclude_current_version", false);

        return ctx.db.invoke_on_all([=] (database& db) {
@@ -549,6 +568,42 @@ void set_storage_service(http_context& ctx, routes& r) {
        });
    });

+    ss::stop_rpc_server.set(r, [](std::unique_ptr<request> req) {
+        return service::get_local_storage_service().stop_rpc_server().then([] {
+            return make_ready_future<json::json_return_type>(json_void());
+        });
+    });
+
+    ss::start_rpc_server.set(r, [](std::unique_ptr<request> req) {
+        return service::get_local_storage_service().start_rpc_server().then([] {
+            return make_ready_future<json::json_return_type>(json_void());
+        });
+    });
+
+    ss::is_rpc_server_running.set(r, [] (std::unique_ptr<request> req) {
+        return service::get_local_storage_service().is_rpc_server_running().then([] (bool running) {
+            return make_ready_future<json::json_return_type>(running);
+        });
+    });
+
+    ss::start_native_transport.set(r, [](std::unique_ptr<request> req) {
+        return service::get_local_storage_service().start_native_transport().then([] {
+            return make_ready_future<json::json_return_type>(json_void());
+        });
+    });
+
+    ss::stop_native_transport.set(r, [](std::unique_ptr<request> req) {
+        return service::get_local_storage_service().stop_native_transport().then([] {
+            return make_ready_future<json::json_return_type>(json_void());
+        });
+    });
+
+    ss::is_native_transport_running.set(r, [] (std::unique_ptr<request> req) {
+        return service::get_local_storage_service().is_native_transport_running().then([] (bool running) {
+            return make_ready_future<json::json_return_type>(running);
+        });
+    });
+
    ss::join_ring.set(r, [](std::unique_ptr<request> req) {
        return make_ready_future<json::json_return_type>(json_void());
    });
@@ -678,7 +733,7 @@ void set_storage_service(http_context& ctx, routes& r) {

    ss::set_trace_probability.set(r, [](std::unique_ptr<request> req) {
        auto probability = req->get_query_param("probability");
-        return futurize_invoke([probability] {
+        return futurize<json::json_return_type>::apply([probability] {
            double real_prob = std::stod(probability.c_str());
            return tracing::tracing::tracing_instance().invoke_on_all([real_prob] (auto& local_tracing) {
                local_tracing.set_trace_probability(real_prob);
@@ -733,17 +788,19 @@ void set_storage_service(http_context& ctx, routes& r) {
    });

    ss::enable_auto_compaction.set(r, [&ctx](std::unique_ptr<request> req) {
+        //TBD
+        unimplemented();
        auto keyspace = validate_keyspace(ctx, req->param);
-        auto tables = split_cf(req->get_query_param("cf"));
-
-        return set_tables_autocompaction(ctx, keyspace, tables, true);
+        auto column_family = req->get_query_param("cf");
+        return make_ready_future<json::json_return_type>(json_void());
    });

    ss::disable_auto_compaction.set(r, [&ctx](std::unique_ptr<request> req) {
+        //TBD
+        unimplemented();
        auto keyspace = validate_keyspace(ctx, req->param);
-        auto tables = split_cf(req->get_query_param("cf"));
-
-        return set_tables_autocompaction(ctx, keyspace, tables, false);
+        auto column_family = req->get_query_param("cf");
+        return make_ready_future<json::json_return_type>(json_void());
    });

    ss::deliver_hints.set(r, [](std::unique_ptr<request> req) {
@@ -979,107 +1036,4 @@ void set_storage_service(http_context& ctx, routes& r) {

 }

-void set_snapshot(http_context& ctx, routes& r) {
-    ss::get_snapshot_details.set(r, [](std::unique_ptr<request> req) {
-        std::function<future<>(output_stream<char>&&)> f = [](output_stream<char>&& s) {
-            return do_with(output_stream<char>(std::move(s)), true, [] (output_stream<char>& s, bool& first){
-                return s.write("[").then([&s, &first] {
-                    return service::get_local_storage_service().get_snapshot_details().then([&s, &first] (std::unordered_map<sstring, std::vector<service::storage_service::snapshot_details>>&& result) {
-                        return do_with(std::move(result), [&s, &first](const std::unordered_map<sstring, std::vector<service::storage_service::snapshot_details>>& result) {
-                            return do_for_each(result, [&s, &result,&first](std::tuple<sstring, std::vector<service::storage_service::snapshot_details>>&& map){
-                                return do_with(ss::snapshots(), [&s, &first, &result, &map](ss::snapshots& all_snapshots) {
-                                    all_snapshots.key = std::get<0>(map);
-                                    future<> f = first ? make_ready_future<>() : s.write(", ");
-                                    first = false;
-                                    std::vector<ss::snapshot> snapshot;
-                                    for (auto& cf: std::get<1>(map)) {
-                                        ss::snapshot snp;
-                                        snp.ks = cf.ks;
-                                        snp.cf = cf.cf;
-                                        snp.live = cf.live;
-                                        snp.total = cf.total;
-                                        snapshot.push_back(std::move(snp));
-                                    }
-                                    all_snapshots.value = std::move(snapshot);
-                                    return f.then([&s, &all_snapshots] {
-                                        return all_snapshots.write(s);
-                                    });
-                                });
-                            });
-                        });
-                    }).then([&s] {
-                        return s.write("]").then([&s] {
-                            return s.close();
-                        });
-                    });
-                });
-            });
-        };
-        return make_ready_future<json::json_return_type>(std::move(f));
-    });
-
-    ss::take_snapshot.set(r, [](std::unique_ptr<request> req) {
-        auto tag = req->get_query_param("tag");
-        auto column_families = split(req->get_query_param("cf"), ",");
-
-        std::vector<sstring> keynames = split(req->get_query_param("kn"), ",");
-
-        auto resp = make_ready_future<>();
-        if (column_families.empty()) {
-            resp = service::get_local_storage_service().take_snapshot(tag, keynames);
-        } else {
-            if (keynames.empty()) {
-                throw httpd::bad_param_exception("The keyspace of column families must be specified");
-            }
-            if (keynames.size() > 1) {
-                throw httpd::bad_param_exception("Only one keyspace allowed when specifying a column family");
-            }
-            resp = service::get_local_storage_service().take_column_family_snapshot(keynames[0], column_families, tag);
-        }
-        return resp.then([] {
-            return make_ready_future<json::json_return_type>(json_void());
-        });
-    });
-
-    ss::del_snapshot.set(r, [](std::unique_ptr<request> req) {
-        auto tag = req->get_query_param("tag");
-        auto column_family = req->get_query_param("cf");
-
-        std::vector<sstring> keynames = split(req->get_query_param("kn"), ",");
-        return service::get_local_storage_service().clear_snapshot(tag, keynames, column_family).then([] {
-            return make_ready_future<json::json_return_type>(json_void());
-        });
-    });
-
-    ss::true_snapshots_size.set(r, [](std::unique_ptr<request> req) {
-        return service::get_local_storage_service().true_snapshots_size().then([] (int64_t size) {
-            return make_ready_future<json::json_return_type>(size);
-        });
-    });
-
-    ss::scrub.set(r, wrap_ks_cf(ctx, [] (http_context& ctx, std::unique_ptr<request> req, sstring keyspace, std::vector<sstring> column_families) {
-        const auto skip_corrupted = req_param<bool>(*req, "skip_corrupted", false);
-
-        auto f = make_ready_future<>();
-        if (!req_param<bool>(*req, "disable_snapshot", false)) {
-            auto tag = format("pre-scrub-{:d}", db_clock::now().time_since_epoch().count());
-            f = parallel_for_each(column_families, [keyspace, tag](sstring cf) {
-                return service::get_local_storage_service().take_column_family_snapshot(keyspace, cf, tag);
-            });
-        }
-
-        return f.then([&ctx, keyspace, column_families, skip_corrupted] {
-            return ctx.db.invoke_on_all([=] (database& db) {
-                return do_for_each(column_families, [=, &db](sstring cfname) {
-                    auto& cm = db.get_compaction_manager();
-                    auto& cf = db.find_column_family(keyspace, cfname);
-                    return cm.perform_sstable_scrub(&cf, skip_corrupted);
-                });
-            });
-        }).then([]{
-            return make_ready_future<json::json_return_type>(0);
-        });
-    }));
-}
-
 }
--- a/api/storage_service.hh
+++ b/api/storage_service.hh
@@ -23,16 +23,8 @@

 #include "api.hh"

-namespace cql_transport { class controller; }
-class thrift_controller;
-
 namespace api {

 void set_storage_service(http_context& ctx, routes& r);
-void set_transport_controller(http_context& ctx, routes& r, cql_transport::controller& ctl);
-void unset_transport_controller(http_context& ctx, routes& r);
-void set_rpc_controller(http_context& ctx, routes& r, thrift_controller& ctl);
-void unset_rpc_controller(http_context& ctx, routes& r);
-void set_snapshot(http_context& ctx, routes& r);

 }
--- a/api/system.cc
+++ b/api/system.cc
@@ -22,7 +22,6 @@
 #include "api/api-doc/system.json.hh"
 #include "api/api.hh"

-#include <seastar/core/reactor.hh>
 #include <seastar/http/exception.hh>
 #include "log.hh"

--- a/atomic_cell.hh
+++ b/atomic_cell.hh
@@ -29,6 +29,7 @@
 #include <seastar/net//byteorder.hh>
 #include <cstdint>
 #include <iosfwd>
+#include <seastar/util/gcc6-concepts.hh>
 #include "data/cell.hh"
 #include "data/schema_info.hh"
 #include "imr/utils.hh"
--- a/auth/allow_all_authenticator.hh
+++ b/auth/allow_all_authenticator.hh
@@ -52,7 +52,7 @@ public:
        return make_ready_future<>();
    }

-    virtual std::string_view qualified_java_name() const override {
+    virtual const sstring& qualified_java_name() const override {
        return allow_all_authenticator_name();
    }

--- a/auth/allow_all_authorizer.hh
+++ b/auth/allow_all_authorizer.hh
@@ -49,7 +49,7 @@ public:
        return make_ready_future<>();
    }

-    virtual std::string_view qualified_java_name() const override {
+    virtual const sstring& qualified_java_name() const override {
        return allow_all_authorizer_name();
    }

--- a/auth/authenticator.hh
+++ b/auth/authenticator.hh
@@ -96,7 +96,7 @@ public:
    ///
    /// A fully-qualified (class with package) Java-like name for this implementation.
    ///
-    virtual std::string_view qualified_java_name() const = 0;
+    virtual const sstring& qualified_java_name() const = 0;

    virtual bool require_authentication() const = 0;

--- a/auth/authorizer.hh
+++ b/auth/authorizer.hh
@@ -100,7 +100,7 @@ public:
    ///
    /// A fully-qualified (class with package) Java-like name for this implementation.
    ///
-    virtual std::string_view qualified_java_name() const = 0;
+    virtual const sstring& qualified_java_name() const = 0;

    ///
    /// Query for the permissions granted directly to a role for a particular \ref resource (and not any of its
--- a/auth/common.cc
+++ b/auth/common.cc
@@ -59,22 +59,22 @@ future<> do_after_system_ready(seastar::abort_source& as, seastar::noncopyable_f
    }).discard_result();
 }

-static future<> create_metadata_table_if_missing_impl(
+future<> create_metadata_table_if_missing(
        std::string_view table_name,
        cql3::query_processor& qp,
        std::string_view cql,
        ::service::migration_manager& mm) {
    static auto ignore_existing = [] (seastar::noncopyable_function<future<>()> func) {
-        return futurize_invoke(std::move(func)).handle_exception_type([] (exceptions::already_exists_exception& ignored) { });
+        return futurize_apply(std::move(func)).handle_exception_type([] (exceptions::already_exists_exception& ignored) { });
    };
    auto& db = qp.db();
-    auto parsed_statement = cql3::query_processor::parse_statement(cql);
-    auto& parsed_cf_statement = static_cast<cql3::statements::raw::cf_statement&>(*parsed_statement);
+    auto parsed_statement = static_pointer_cast<cql3::statements::raw::cf_statement>(
+            cql3::query_processor::parse_statement(cql));

-    parsed_cf_statement.prepare_keyspace(meta::AUTH_KS);
+    parsed_statement->prepare_keyspace(meta::AUTH_KS);

    auto statement = static_pointer_cast<cql3::statements::create_table_statement>(
-            parsed_cf_statement.prepare(db, qp.get_cql_stats())->statement);
+            parsed_statement->prepare(db, qp.get_cql_stats())->statement);

    const auto schema = statement->get_cf_meta_data(qp.db());
    const auto uuid = generate_legacy_id(schema->ks_name(), schema->cf_name());
@@ -85,14 +85,7 @@ static future<> create_metadata_table_if_missing_impl(
    return ignore_existing([&mm, table = std::move(table)] () {
        return mm.announce_new_column_family(table, false);
    });
-}

-future<> create_metadata_table_if_missing(
-        std::string_view table_name,
-        cql3::query_processor& qp,
-        std::string_view cql,
-        ::service::migration_manager& mm) noexcept {
-    return futurize_invoke(create_metadata_table_if_missing_impl, table_name, qp, cql, mm);
 }

 future<> wait_for_schema_agreement(::service::migration_manager& mm, const database& db, seastar::abort_source& as) {
--- a/auth/common.hh
+++ b/auth/common.hh
@@ -27,10 +27,9 @@
 #include <seastar/core/future.hh>
 #include <seastar/core/abort_source.hh>
 #include <seastar/util/noncopyable_function.hh>
-#include <seastar/core/seastar.hh>
+#include <seastar/core/reactor.hh>
 #include <seastar/core/resource.hh>
 #include <seastar/core/sstring.hh>
-#include <seastar/core/smp.hh>

 #include "log.hh"
 #include "seastarx.hh"
@@ -62,7 +61,7 @@ extern const sstring AUTH_PACKAGE_NAME;

 template <class Task>
 future<> once_among_shards(Task&& f) {
-    if (this_shard_id() == 0u) {
+    if (engine().cpu_id() == 0u) {
        return f();
    }

@@ -80,7 +79,7 @@ future<> create_metadata_table_if_missing(
        std::string_view table_name,
        cql3::query_processor&,
        std::string_view cql,
-        ::service::migration_manager&) noexcept;
+        ::service::migration_manager&);

 future<> wait_for_schema_agreement(::service::migration_manager&, const database&, seastar::abort_source&);

--- a/auth/default_authorizer.cc
+++ b/auth/default_authorizer.cc
@@ -51,7 +51,7 @@ extern "C" {

 #include <boost/algorithm/string/join.hpp>
 #include <boost/range.hpp>
-#include <seastar/core/seastar.hh>
+#include <seastar/core/reactor.hh>

 #include "auth/authenticated_user.hh"
 #include "auth/common.hh"
@@ -101,7 +101,7 @@ bool default_authorizer::legacy_metadata_exists() const {
 future<bool> default_authorizer::any_granted() const {
    static const sstring query = format("SELECT * FROM {}.{} LIMIT 1", meta::AUTH_KS, PERMISSIONS_CF);

-    return _qp.execute_internal(
+    return _qp.process(
            query,
            db::consistency_level::LOCAL_ONE,
            infinite_timeout_config,
@@ -115,7 +115,7 @@ future<> default_authorizer::migrate_legacy_metadata() const {
    alogger.info("Starting migration of legacy permissions metadata.");
    static const sstring query = format("SELECT * FROM {}.{}", meta::AUTH_KS, legacy_table_name);

-    return _qp.execute_internal(
+    return _qp.process(
            query,
            db::consistency_level::LOCAL_ONE,
            infinite_timeout_config).then([this](::shared_ptr<cql3::untyped_result_set> results) {
@@ -195,7 +195,7 @@ default_authorizer::authorize(const role_or_anonymous& maybe_role, const resourc
            ROLE_NAME,
            RESOURCE_NAME);

-    return _qp.execute_internal(
+    return _qp.process(
            query,
            db::consistency_level::LOCAL_ONE,
            infinite_timeout_config,
@@ -224,7 +224,7 @@ default_authorizer::modify(
                    ROLE_NAME,
                    RESOURCE_NAME),
            [this, &role_name, set, &resource](const auto& query) {
-        return _qp.execute_internal(
+        return _qp.process(
                query,
                db::consistency_level::ONE,
                internal_distributed_timeout_config(),
@@ -249,7 +249,7 @@ future<std::vector<permission_details>> default_authorizer::list_all() const {
            meta::AUTH_KS,
            PERMISSIONS_CF);

-    return _qp.execute_internal(
+    return _qp.process(
            query,
            db::consistency_level::ONE,
            internal_distributed_timeout_config(),
@@ -276,7 +276,7 @@ future<> default_authorizer::revoke_all(std::string_view role_name) const {
            PERMISSIONS_CF,
            ROLE_NAME);

-    return _qp.execute_internal(
+    return _qp.process(
            query,
            db::consistency_level::ONE,
            internal_distributed_timeout_config(),
@@ -296,7 +296,7 @@ future<> default_authorizer::revoke_all(const resource& resource) const {
            PERMISSIONS_CF,
            RESOURCE_NAME);

-    return _qp.execute_internal(
+    return _qp.process(
            query,
            db::consistency_level::LOCAL_ONE,
            infinite_timeout_config,
@@ -313,7 +313,7 @@ future<> default_authorizer::revoke_all(const resource& resource) const {
                        ROLE_NAME,
                        RESOURCE_NAME);

-                return _qp.execute_internal(
+                return _qp.process(
                        query,
                        db::consistency_level::LOCAL_ONE,
                        infinite_timeout_config,
--- a/auth/default_authorizer.hh
+++ b/auth/default_authorizer.hh
@@ -71,7 +71,7 @@ public:

    virtual future<> stop() override;

-    virtual std::string_view qualified_java_name() const override {
+    virtual const sstring& qualified_java_name() const override {
        return default_authorizer_name();
    }

--- a/auth/password_authenticator.cc
+++ b/auth/password_authenticator.cc
@@ -48,7 +48,7 @@
 #include <optional>

 #include <boost/algorithm/cxx11/all_of.hpp>
-#include <seastar/core/seastar.hh>
+#include <seastar/core/reactor.hh>

 #include "auth/authenticated_user.hh"
 #include "auth/common.hh"
@@ -96,13 +96,10 @@ static bool has_salted_hash(const cql3::untyped_result_set_row& row) {
    return !row.get_or<sstring>(SALTED_HASH, "").empty();
 }

-static const sstring& update_row_query() {
-    static const sstring update_row_query = format("UPDATE {} SET {} = ? WHERE {} = ?",
-            meta::roles_table::qualified_name(),
-            SALTED_HASH,
-            meta::roles_table::role_col_name);
-    return update_row_query;
-}
+static const sstring update_row_query = format("UPDATE {} SET {} = ? WHERE {} = ?",
+        meta::roles_table::qualified_name(),
+        SALTED_HASH,
+        meta::roles_table::role_col_name);

 static const sstring legacy_table_name{"credentials"};

@@ -114,7 +111,7 @@ future<> password_authenticator::migrate_legacy_metadata() const {
    plogger.info("Starting migration of legacy authentication metadata.");
    static const sstring query = format("SELECT * FROM {}.{}", meta::AUTH_KS, legacy_table_name);

-    return _qp.execute_internal(
+    return _qp.process(
            query,
            db::consistency_level::QUORUM,
            internal_distributed_timeout_config()).then([this](::shared_ptr<cql3::untyped_result_set> results) {
@@ -122,8 +119,8 @@ future<> password_authenticator::migrate_legacy_metadata() const {
            auto username = row.get_as<sstring>("username");
            auto salted_hash = row.get_as<sstring>(SALTED_HASH);

-            return _qp.execute_internal(
-                    update_row_query(),
+            return _qp.process(
+                    update_row_query,
                    consistency_for_user(username),
                    internal_distributed_timeout_config(),
                    {std::move(salted_hash), username}).discard_result();
@@ -139,8 +136,8 @@ future<> password_authenticator::migrate_legacy_metadata() const {
 future<> password_authenticator::create_default_if_missing() const {
    return default_role_row_satisfies(_qp, &has_salted_hash).then([this](bool exists) {
        if (!exists) {
-            return _qp.execute_internal(
-                    update_row_query(),
+            return _qp.process(
+                    update_row_query,
                    db::consistency_level::QUORUM,
                    internal_distributed_timeout_config(),
                    {passwords::hash(DEFAULT_USER_PASSWORD, rng_for_salt), DEFAULT_USER_NAME}).then([](auto&&) {
@@ -197,7 +194,7 @@ db::consistency_level password_authenticator::consistency_for_user(std::string_v
    return db::consistency_level::LOCAL_ONE;
 }

-std::string_view password_authenticator::qualified_java_name() const {
+const sstring& password_authenticator::qualified_java_name() const {
    return password_authenticator_name();
 }

@@ -230,13 +227,13 @@ future<authenticated_user> password_authenticator::authenticate(
    // obsolete prepared statements pretty quickly.
    // Rely on query processing caching statements instead, and lets assume
    // that a map lookup string->statement is not gonna kill us much.
-    return futurize_invoke([this, username, password] {
+    return futurize_apply([this, username, password] {
        static const sstring query = format("SELECT {} FROM {} WHERE {} = ?",
                SALTED_HASH,
                meta::roles_table::qualified_name(),
                meta::roles_table::role_col_name);

-        return _qp.execute_internal(
+        return _qp.process(
                query,
                consistency_for_user(username),
                internal_distributed_timeout_config(),
@@ -270,8 +267,8 @@ future<> password_authenticator::create(std::string_view role_name, const authen
        return make_ready_future<>();
    }

-    return _qp.execute_internal(
-            update_row_query(),
+    return _qp.process(
+            update_row_query,
            consistency_for_user(role_name),
            internal_distributed_timeout_config(),
            {passwords::hash(*options.password, rng_for_salt), sstring(role_name)}).discard_result();
@@ -287,7 +284,7 @@ future<> password_authenticator::alter(std::string_view role_name, const authent
            SALTED_HASH,
            meta::roles_table::role_col_name);

-    return _qp.execute_internal(
+    return _qp.process(
            query,
            consistency_for_user(role_name),
            internal_distributed_timeout_config(),
@@ -300,7 +297,7 @@ future<> password_authenticator::drop(std::string_view name) const {
            meta::roles_table::qualified_name(),
            meta::roles_table::role_col_name);

-    return _qp.execute_internal(
+    return _qp.process(
            query, consistency_for_user(name),
            internal_distributed_timeout_config(),
            {sstring(name)}).discard_result();
--- a/auth/password_authenticator.hh
+++ b/auth/password_authenticator.hh
@@ -71,7 +71,7 @@ public:

    virtual future<> stop() override;

-    virtual std::string_view qualified_java_name() const override;
+    virtual const sstring& qualified_java_name() const override;

    virtual bool require_authentication() const override;

--- a/auth/roles-metadata.cc
+++ b/auth/roles-metadata.cc
@@ -68,14 +68,14 @@ future<bool> default_role_row_satisfies(
            meta::roles_table::role_col_name);

    return do_with(std::move(p), [&qp](const auto& p) {
-        return qp.execute_internal(
+        return qp.process(
                query,
                db::consistency_level::ONE,
                infinite_timeout_config,
                {meta::DEFAULT_SUPERUSER_NAME},
                true).then([&qp, &p](::shared_ptr<cql3::untyped_result_set> results) {
            if (results->empty()) {
-                return qp.execute_internal(
+                return qp.process(
                        query,
                        db::consistency_level::QUORUM,
                        internal_distributed_timeout_config(),
@@ -100,7 +100,7 @@ future<bool> any_nondefault_role_row_satisfies(
    static const sstring query = format("SELECT * FROM {}", meta::roles_table::qualified_name());

    return do_with(std::move(p), [&qp](const auto& p) {
-        return qp.execute_internal(
+        return qp.process(
                query,
                db::consistency_level::QUORUM,
                internal_distributed_timeout_config()).then([&p](::shared_ptr<cql3::untyped_result_set> results) {
--- a/auth/service.cc
+++ b/auth/service.cc
@@ -178,7 +178,7 @@ future<> service::start(::service::migration_manager& mm) {
        return create_keyspace_if_missing(mm);
    }).then([this] {
        return _role_manager->start().then([this] {
-            return when_all_succeed(_authorizer->start(), _authenticator->start()).discard_result();
+            return when_all_succeed(_authorizer->start(), _authenticator->start());
        });
    }).then([this] {
        _permissions_cache = std::make_unique<permissions_cache>(_permissions_cache_config, *this, log);
@@ -194,12 +194,9 @@ future<> service::stop() {
    // Only one of the shards has the listener registered, but let's try to
    // unregister on each one just to make sure.
    return _mnotifier.unregister_listener(_migration_listener.get()).then([this] {
-        if (_permissions_cache) {
-            return _permissions_cache->stop();
-        }
-        return make_ready_future<>();
+        return _permissions_cache->stop();
    }).then([this] {
-        return when_all_succeed(_role_manager->stop(), _authorizer->stop(), _authenticator->stop()).discard_result();
+        return when_all_succeed(_role_manager->stop(), _authorizer->stop(), _authenticator->stop());
    });
 }

@@ -220,7 +217,7 @@ future<bool> service::has_existing_legacy_users() const {
    // This logic is borrowed directly from Apache Cassandra. By first checking for the presence of the default user, we
    // can potentially avoid doing a range query with a high consistency level.

-    return _qp.execute_internal(
+    return _qp.process(
            default_user_query,
            db::consistency_level::ONE,
            infinite_timeout_config,
@@ -230,7 +227,7 @@ future<bool> service::has_existing_legacy_users() const {
            return make_ready_future<bool>(true);
        }

-        return _qp.execute_internal(
+        return _qp.process(
                default_user_query,
                db::consistency_level::QUORUM,
                infinite_timeout_config,
@@ -240,7 +237,7 @@ future<bool> service::has_existing_legacy_users() const {
                return make_ready_future<bool>(true);
            }

-            return _qp.execute_internal(
+            return _qp.process(
                    all_users_query,
                    db::consistency_level::QUORUM,
                    infinite_timeout_config).then([](auto results) {
@@ -419,7 +416,7 @@ future<> create_role(
            return make_ready_future<>();
        }

-        return futurize_invoke(
+        return futurize_apply(
                &validate_authentication_options_are_supported,
                options,
                ser.underlying_authenticator().supported_options()).then([&ser, name, &options] {
@@ -443,7 +440,7 @@ future<> alter_role(
            return make_ready_future<>();
        }

-        return futurize_invoke(
+        return futurize_apply(
                &validate_authentication_options_are_supported,
                options,
                ser.underlying_authenticator().supported_options()).then([&ser, name, &options] {
@@ -458,9 +455,7 @@ future<> drop_role(const service& ser, std::string_view name) {

        return when_all_succeed(
                a.revoke_all(name),
-                a.revoke_all(r))
-                    .discard_result()
-                    .handle_exception_type([](const unsupported_authorization_operation&) {
+                a.revoke_all(r)).handle_exception_type([](const unsupported_authorization_operation&) {
            // Nothing.
        });
    }).then([&ser, name] {
@@ -473,7 +468,7 @@ future<> drop_role(const service& ser, std::string_view name) {
 future<bool> has_role(const service& ser, std::string_view grantee, std::string_view name) {
    return when_all_succeed(
            validate_role_exists(ser, name),
-            ser.get_roles(grantee)).then_unpack([name](role_set all_roles) {
+            ser.get_roles(grantee)).then([name](role_set all_roles) {
        return make_ready_future<bool>(all_roles.count(sstring(name)) != 0);
    });
 }
--- a/auth/standard_role_manager.cc
+++ b/auth/standard_role_manager.cc
@@ -35,7 +35,6 @@
 #include "auth/common.hh"
 #include "auth/roles-metadata.hh"
 #include "cql3/query_processor.hh"
-#include "cql3/untyped_result_set.hh"
 #include "db/consistency_level_type.hh"
 #include "exceptions/exceptions.hh"
 #include "log.hh"
@@ -87,7 +86,7 @@ static future<std::optional<record>> find_record(cql3::query_processor& qp, std:
            meta::roles_table::qualified_name(),
            meta::roles_table::role_col_name);

-    return qp.execute_internal(
+    return qp.process(
            query,
            consistency_for_role(role_name),
            internal_distributed_timeout_config(),
@@ -161,7 +160,7 @@ future<> standard_role_manager::create_metadata_tables_if_missing() const {
                    meta::role_members_table::name,
                    _qp,
                    create_role_members_query,
-                    _migration_manager)).discard_result();
+                    _migration_manager));
 }

 future<> standard_role_manager::create_default_role_if_missing() const {
@@ -171,7 +170,7 @@ future<> standard_role_manager::create_default_role_if_missing() const {
                    meta::roles_table::qualified_name(),
                    meta::roles_table::role_col_name);

-            return _qp.execute_internal(
+            return _qp.process(
                    query,
                    db::consistency_level::QUORUM,
                    internal_distributed_timeout_config(),
@@ -198,7 +197,7 @@ future<> standard_role_manager::migrate_legacy_metadata() const {
    log.info("Starting migration of legacy user metadata.");
    static const sstring query = format("SELECT * FROM {}.{}", meta::AUTH_KS, legacy_table_name);

-    return _qp.execute_internal(
+    return _qp.process(
            query,
            db::consistency_level::QUORUM,
            internal_distributed_timeout_config()).then([this](::shared_ptr<cql3::untyped_result_set> results) {
@@ -259,7 +258,7 @@ future<> standard_role_manager::create_or_replace(std::string_view role_name, co
            meta::roles_table::qualified_name(),
            meta::roles_table::role_col_name);

-    return _qp.execute_internal(
+    return _qp.process(
            query,
            consistency_for_role(role_name),
            internal_distributed_timeout_config(),
@@ -299,7 +298,7 @@ standard_role_manager::alter(std::string_view role_name, const role_config_updat
            return make_ready_future<>();
        }

-        return _qp.execute_internal(
+        return _qp.process(
                format("UPDATE {} SET {} WHERE {} = ?",
                        meta::roles_table::qualified_name(),
                        build_column_assignments(u),
@@ -321,7 +320,7 @@ future<> standard_role_manager::drop(std::string_view role_name) const {
            static const sstring query = format("SELECT member FROM {} WHERE role = ?",
                    meta::role_members_table::qualified_name());

-            return _qp.execute_internal(
+            return _qp.process(
                    query,
                    consistency_for_role(role_name),
                    internal_distributed_timeout_config(),
@@ -360,14 +359,14 @@ future<> standard_role_manager::drop(std::string_view role_name) const {
                    meta::roles_table::qualified_name(),
                    meta::roles_table::role_col_name);

-            return _qp.execute_internal(
+            return _qp.process(
                    query,
                    consistency_for_role(role_name),
                    internal_distributed_timeout_config(),
                    {sstring(role_name)}).discard_result();
        };

-        return when_all_succeed(revoke_from_members(), revoke_members_of()).then_unpack([delete_role = std::move(delete_role)] {
+        return when_all_succeed(revoke_from_members(), revoke_members_of()).then([delete_role = std::move(delete_role)] {
            return delete_role();
        });
    });
@@ -387,7 +386,7 @@ standard_role_manager::modify_membership(
                (ch == membership_change::add ? '+' : '-'),
                meta::roles_table::role_col_name);

-        return _qp.execute_internal(
+        return _qp.process(
                query,
                consistency_for_role(grantee_name),
                internal_distributed_timeout_config(),
@@ -397,7 +396,7 @@ standard_role_manager::modify_membership(
    const auto modify_role_members = [this, role_name, grantee_name, ch] {
        switch (ch) {
            case membership_change::add:
-                return _qp.execute_internal(
+                return _qp.process(
                        format("INSERT INTO {} (role, member) VALUES (?, ?)",
                                meta::role_members_table::qualified_name()),
                        consistency_for_role(role_name),
@@ -405,7 +404,7 @@ standard_role_manager::modify_membership(
                        {sstring(role_name), sstring(grantee_name)}).discard_result();

            case membership_change::remove:
-                return _qp.execute_internal(
+                return _qp.process(
                        format("DELETE FROM {} WHERE role = ? AND member = ?",
                                meta::role_members_table::qualified_name()),
                        consistency_for_role(role_name),
@@ -416,7 +415,7 @@ standard_role_manager::modify_membership(
        return make_ready_future<>();
    };

-    return when_all_succeed(modify_roles(), modify_role_members()).discard_result();
+    return when_all_succeed(modify_roles(), modify_role_members());
 }

 future<>
@@ -445,7 +444,7 @@ standard_role_manager::grant(std::string_view grantee_name, std::string_view rol
        });
    };

-   return when_all_succeed(check_redundant(), check_cycle()).then_unpack([this, role_name, grantee_name] {
+   return when_all_succeed(check_redundant(), check_cycle()).then([this, role_name, grantee_name] {
       return this->modify_membership(grantee_name, role_name, membership_change::add);
   });
 }
@@ -509,7 +508,7 @@ future<role_set> standard_role_manager::query_all() const {
    // To avoid many copies of a view.
    static const auto role_col_name_string = sstring(meta::roles_table::role_col_name);

-    return _qp.execute_internal(
+    return _qp.process(
            query,
            db::consistency_level::QUORUM,
            internal_distributed_timeout_config()).then([](::shared_ptr<cql3::untyped_result_set> results) {
--- a/auth/transitional.cc
+++ b/auth/transitional.cc
@@ -82,7 +82,7 @@ public:
        return _authenticator->stop();
    }

-    virtual std::string_view qualified_java_name() const override {
+    virtual const sstring& qualified_java_name() const override {
        return transitional_authenticator_name();
    }

@@ -158,7 +158,7 @@ public:
            }

            virtual future<authenticated_user> get_authenticated_user() const {
-                return futurize_invoke([this] {
+                return futurize_apply([this] {
                    return _sasl->get_authenticated_user().handle_exception([](auto ep) {
                        try {
                            std::rethrow_exception(ep);
@@ -201,7 +201,7 @@ public:
        return _authorizer->stop();
    }

-    virtual std::string_view qualified_java_name() const override {
+    virtual const sstring& qualified_java_name() const override {
        return transitional_authorizer_name();
    }

--- a/backlog_controller.hh
+++ b/backlog_controller.hh
@@ -23,11 +23,7 @@
 #include <seastar/core/scheduling.hh>
 #include <seastar/core/timer.hh>
 #include <seastar/core/gate.hh>
-#include <seastar/core/file.hh>
 #include <chrono>
-#include <cmath>
-
-#include "seastarx.hh"

 // Simple proportional controller to adjust shares for processes for which a backlog can be clearly
 // defined.
--- a/build_id.cc
+++ b/build_id.cc
@@ -7,7 +7,6 @@
 #include <link.h>
 #include <seastar/core/align.hh>
 #include <sstream>
-#include <cassert>

 using namespace seastar;

--- a/bytes.cc
+++ b/bytes.cc
@@ -64,7 +64,7 @@ bytes from_hex(sstring_view s) {

 sstring to_hex(bytes_view b) {
    static char digits[] = "0123456789abcdef";
-    sstring out = uninitialized_string(b.size() * 2);
+    sstring out(sstring::initialized_later(), b.size() * 2);
    unsigned end = b.size();
    for (unsigned i = 0; i != end; ++i) {
        uint8_t x = b[i];
--- a/cache_flat_mutation_reader.hh
+++ b/cache_flat_mutation_reader.hh
@@ -176,7 +176,7 @@ public:
        return make_ready_future<>();
    }
    virtual future<> fast_forward_to(position_range pr, db::timeout_clock::time_point timeout) override {
-        return make_exception_future<>(make_backtraced_exception_ptr<std::bad_function_call>());
+        throw std::bad_function_call();
    }
 };

--- a/caching_options.hh
+++ b/caching_options.hh
@@ -39,10 +39,7 @@ class caching_options {

    sstring _key_cache;
    sstring _row_cache;
-    bool _enabled = true;
-    caching_options(sstring k, sstring r, bool enabled)
-        : _key_cache(k), _row_cache(r), _enabled(enabled)
-    {
+    caching_options(sstring k, sstring r) : _key_cache(k), _row_cache(r) {
        if ((k != "ALL") && (k != "NONE")) {
            throw exceptions::configuration_exception("Invalid key value: " + k); 
        }
@@ -62,53 +59,36 @@ class caching_options {
    caching_options() : _key_cache(default_key), _row_cache(default_row) {}
 public:

-    bool enabled() const {
-        return _enabled;
-    }
-
    std::map<sstring, sstring> to_map() const {
-        std::map<sstring, sstring> res = {{ "keys", _key_cache },
-                { "rows_per_partition", _row_cache }};
-        if (!_enabled) {
-            res.insert({"enabled", "false"});
-        }
-        return res;
+        return {{ "keys", _key_cache }, { "rows_per_partition", _row_cache }};
    }

    sstring to_sstring() const {
        return json::to_json(to_map());
    }

-    static caching_options get_disabled_caching_options() {
-        return caching_options("NONE", "NONE", false);
-    }
-
    template<typename Map>
    static caching_options from_map(const Map & map) {
        sstring k = default_key;
        sstring r = default_row;
-        bool e = true;

        for (auto& p : map) {
            if (p.first == "keys") {
                k = p.second;
            } else if (p.first == "rows_per_partition") {
                r = p.second;
-            } else if (p.first == "enabled") {
-                e = p.second == "true";
            } else {
                throw exceptions::configuration_exception("Invalid caching option: " + p.first);
            }
        }
-        return caching_options(k, r, e);
+        return caching_options(k, r);
    }
    static caching_options from_sstring(const sstring& str) {
        return from_map(json::to_map(str));
    }

    bool operator==(const caching_options& other) const {
-        return _key_cache == other._key_cache && _row_cache == other._row_cache
-            && _enabled == other._enabled;
+        return _key_cache == other._key_cache && _row_cache == other._row_cache;
    }
    bool operator!=(const caching_options& other) const {
        return !(*this == other);
--- a/canonical_mutation.cc
+++ b/canonical_mutation.cc
@@ -92,7 +92,7 @@ mutation canonical_mutation::to_mutation(schema_ptr s) const {
 }

 static sstring bytes_to_text(bytes_view bv) {
-    sstring ret = uninitialized_string(bv.size());
+    sstring ret(sstring::initialized_later(), bv.size());
    std::copy_n(reinterpret_cast<const char*>(bv.data()), bv.size(), ret.data());
    return ret;
 }
--- a/canonical_mutation.hh
+++ b/canonical_mutation.hh
@@ -22,7 +22,7 @@
 #pragma once

 #include "bytes.hh"
-#include "schema_fwd.hh"
+#include "schema.hh"
 #include "database_fwd.hh"
 #include "mutation_partition_visitor.hh"
 #include "mutation_partition_serializer.hh"
--- a/cartesian_product.hh
+++ b/cartesian_product.hh
@@ -22,9 +22,6 @@

 #pragma once

-#include <vector>
-#include <sys/types.h>
-
 // Single-pass range over cartesian product of vectors.

 // Note:
--- a/cdc/cdc.cc
+++ b/cdc/cdc.cc
@@ -0,0 +1,835 @@
+/*
+ * Copyright (C) 2019 ScyllaDB
+ */
+
+/*
+ * This file is part of Scylla.
+ *
+ * Scylla is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Scylla is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <utility>
+#include <algorithm>
+
+#include <boost/range/irange.hpp>
+#include <seastar/util/defer.hh>
+#include <seastar/core/thread.hh>
+
+#include "cdc/cdc.hh"
+#include "bytes.hh"
+#include "database.hh"
+#include "db/config.hh"
+#include "dht/murmur3_partitioner.hh"
+#include "partition_slice_builder.hh"
+#include "schema.hh"
+#include "schema_builder.hh"
+#include "service/migration_listener.hh"
+#include "service/storage_service.hh"
+#include "types/tuple.hh"
+#include "cql3/statements/select_statement.hh"
+#include "cql3/multi_column_relation.hh"
+#include "cql3/tuples.hh"
+#include "log.hh"
+#include "json.hh"
+
+using locator::snitch_ptr;
+using locator::token_metadata;
+using locator::topology;
+using seastar::sstring;
+using service::migration_notifier;
+using service::storage_proxy;
+
+namespace std {
+
+template<> struct hash<std::pair<net::inet_address, unsigned int>> {
+    std::size_t operator()(const std::pair<net::inet_address, unsigned int> &p) const {
+        return std::hash<net::inet_address>{}(p.first) ^ std::hash<int>{}(p.second);
+    }
+};
+
+}
+
+using namespace std::chrono_literals;
+
+static logging::logger cdc_log("cdc");
+
+namespace cdc {
+static schema_ptr create_log_schema(const schema&, std::optional<utils::UUID> = {});
+static schema_ptr create_stream_description_table_schema(const schema&, std::optional<utils::UUID> = {});
+static future<> populate_desc(db_context ctx, const schema& s);
+}
+
+class cdc::cdc_service::impl : service::migration_listener::empty_listener {
+    friend cdc_service;
+    db_context _ctxt;
+    bool _stopped = false;
+public:
+    impl(db_context ctxt)
+        : _ctxt(std::move(ctxt))
+    {
+        _ctxt._migration_notifier.register_listener(this);
+    }
+    ~impl() {
+        assert(_stopped);
+    }
+
+    future<> stop() {
+        return _ctxt._migration_notifier.unregister_listener(this).then([this] {
+            _stopped = true;
+        });
+    }
+
+    void on_before_create_column_family(const schema& schema, std::vector<mutation>& mutations, api::timestamp_type timestamp) override {
+        if (schema.cdc_options().enabled()) {
+            auto& db = _ctxt._proxy.get_db().local();
+            auto logname = log_name(schema.cf_name());
+            if (!db.has_schema(schema.ks_name(), logname)) {
+                // in seastar thread
+                auto log_schema = create_log_schema(schema);
+                auto stream_desc_schema = create_stream_description_table_schema(schema);
+                auto& keyspace = db.find_keyspace(schema.ks_name());
+
+                auto log_mut = db::schema_tables::make_create_table_mutations(keyspace.metadata(), log_schema, timestamp);
+                auto stream_mut = db::schema_tables::make_create_table_mutations(keyspace.metadata(), stream_desc_schema, timestamp);
+
+                mutations.insert(mutations.end(), std::make_move_iterator(log_mut.begin()), std::make_move_iterator(log_mut.end()));
+                mutations.insert(mutations.end(), std::make_move_iterator(stream_mut.begin()), std::make_move_iterator(stream_mut.end()));
+            }
+        }
+    }
+
+    void on_before_update_column_family(const schema& new_schema, const schema& old_schema, std::vector<mutation>& mutations, api::timestamp_type timestamp) override {
+        bool is_cdc = new_schema.cdc_options().enabled();
+        bool was_cdc = old_schema.cdc_options().enabled();
+
+        // we need to create or modify the log & stream schemas iff either we changed cdc status (was != is)
+        // or if cdc is on now unconditionally, since then any actual base schema changes will affect the column 
+        // etc.
+        if (was_cdc || is_cdc) {
+            auto logname = log_name(old_schema.cf_name());
+            auto descname = desc_name(old_schema.cf_name());
+            auto& db = _ctxt._proxy.get_db().local();
+            auto& keyspace = db.find_keyspace(old_schema.ks_name());
+            auto log_schema = was_cdc ? db.find_column_family(old_schema.ks_name(), logname).schema() : nullptr;
+            auto stream_desc_schema = was_cdc ? db.find_column_family(old_schema.ks_name(), descname).schema() : nullptr;
+
+            if (!is_cdc) {
+                auto log_mut = db::schema_tables::make_drop_table_mutations(keyspace.metadata(), log_schema, timestamp);
+                auto stream_mut = db::schema_tables::make_drop_table_mutations(keyspace.metadata(), stream_desc_schema, timestamp);
+
+                mutations.insert(mutations.end(), std::make_move_iterator(log_mut.begin()), std::make_move_iterator(log_mut.end()));
+                mutations.insert(mutations.end(), std::make_move_iterator(stream_mut.begin()), std::make_move_iterator(stream_mut.end()));
+                return;
+            }
+
+            auto new_log_schema = create_log_schema(new_schema, log_schema ? std::make_optional(log_schema->id()) : std::nullopt);
+            auto new_stream_desc_schema = create_stream_description_table_schema(new_schema, stream_desc_schema ? std::make_optional(stream_desc_schema->id()) : std::nullopt);
+
+            auto log_mut = log_schema 
+                ? db::schema_tables::make_update_table_mutations(keyspace.metadata(), log_schema, new_log_schema, timestamp, false)
+                : db::schema_tables::make_create_table_mutations(keyspace.metadata(), new_log_schema, timestamp)
+                ;
+            auto stream_mut = stream_desc_schema 
+                ? db::schema_tables::make_update_table_mutations(keyspace.metadata(), stream_desc_schema, new_stream_desc_schema, timestamp, false)
+                : db::schema_tables::make_create_table_mutations(keyspace.metadata(), new_stream_desc_schema, timestamp)
+                ;
+
+            mutations.insert(mutations.end(), std::make_move_iterator(log_mut.begin()), std::make_move_iterator(log_mut.end()));
+            mutations.insert(mutations.end(), std::make_move_iterator(stream_mut.begin()), std::make_move_iterator(stream_mut.end()));
+        }
+    }
+
+    void on_before_drop_column_family(const schema& schema, std::vector<mutation>& mutations, api::timestamp_type timestamp) override {
+        if (schema.cdc_options().enabled()) {
+            auto logname = log_name(schema.cf_name());
+            auto descname = desc_name(schema.cf_name());
+            auto& db = _ctxt._proxy.get_db().local();
+            auto& keyspace = db.find_keyspace(schema.ks_name());
+            auto log_schema = db.find_column_family(schema.ks_name(), logname).schema();
+            auto stream_desc_schema = db.find_column_family(schema.ks_name(), descname).schema();
+
+            auto log_mut = db::schema_tables::make_drop_table_mutations(keyspace.metadata(), log_schema, timestamp);
+            auto stream_mut = db::schema_tables::make_drop_table_mutations(keyspace.metadata(), stream_desc_schema, timestamp);
+
+            mutations.insert(mutations.end(), std::make_move_iterator(log_mut.begin()), std::make_move_iterator(log_mut.end()));
+            mutations.insert(mutations.end(), std::make_move_iterator(stream_mut.begin()), std::make_move_iterator(stream_mut.end()));
+        }
+    }
+
+    void on_create_column_family(const sstring& ks_name, const sstring& cf_name) override {
+        // This callback is done on all shards. Only do the work once. 
+        if (engine().cpu_id() != 0) {
+            return; 
+        }
+        auto& db = _ctxt._proxy.get_db().local();
+        auto& cf = db.find_column_family(ks_name, cf_name);
+        auto schema = cf.schema();
+        if (schema->cdc_options().enabled()) {
+            populate_desc(_ctxt, *schema).get();
+        }
+    }
+
+    void on_update_column_family(const sstring& ks_name, const sstring& cf_name, bool columns_changed) override {
+        on_create_column_family(ks_name, cf_name);
+    }
+
+    void on_drop_column_family(const sstring& ks_name, const sstring& cf_name) override {}
+
+    future<std::tuple<std::vector<mutation>, result_callback>> augment_mutation_call(
+        lowres_clock::time_point timeout,
+        std::vector<mutation>&& mutations
+    );
+
+    template<typename Iter>
+    future<> append_mutations(Iter i, Iter e, schema_ptr s, lowres_clock::time_point, std::vector<mutation>&);
+};
+
+cdc::cdc_service::cdc_service(service::storage_proxy& proxy)
+    : cdc_service(db_context::builder(proxy).build())
+{}
+
+cdc::cdc_service::cdc_service(db_context ctxt)
+    : _impl(std::make_unique<impl>(std::move(ctxt)))
+{
+    _impl->_ctxt._proxy.set_cdc_service(this);
+}
+
+future<> cdc::cdc_service::stop() {
+    return _impl->stop();
+}
+
+cdc::cdc_service::~cdc_service() = default;
+
+cdc::options::options(const std::map<sstring, sstring>& map) {
+    if (map.find("enabled") == std::end(map)) {
+        return;
+    }
+
+    for (auto& p : map) {
+        if (p.first == "enabled") {
+            _enabled = p.second == "true";
+        } else if (p.first == "preimage") {
+            _preimage = p.second == "true";
+        } else if (p.first == "postimage") {
+            _postimage = p.second == "true";
+        } else if (p.first == "ttl") {
+            _ttl = std::stoi(p.second);
+        } else {
+            throw exceptions::configuration_exception("Invalid CDC option: " + p.first);
+        }
+    }
+}
+
+std::map<sstring, sstring> cdc::options::to_map() const {
+    if (!_enabled) {
+        return {};
+    }
+    return {
+        { "enabled", _enabled ? "true" : "false" },
+        { "preimage", _preimage ? "true" : "false" },
+        { "postimage", _postimage ? "true" : "false" },
+        { "ttl", std::to_string(_ttl) },
+    };
+}
+
+sstring cdc::options::to_sstring() const {
+    return json::to_json(to_map());
+}
+
+bool cdc::options::operator==(const options& o) const {
+    return _enabled == o._enabled && _preimage == o._preimage && _postimage == o._postimage && _ttl == o._ttl;
+}
+bool cdc::options::operator!=(const options& o) const {
+    return !(*this == o);
+}
+
+namespace cdc {
+
+using operation_native_type = std::underlying_type_t<operation>;
+using column_op_native_type = std::underlying_type_t<column_op>;
+
+sstring log_name(const sstring& table_name) {
+    static constexpr auto cdc_log_suffix = "_scylla_cdc_log";
+    return table_name + cdc_log_suffix;
+}
+
+sstring desc_name(const sstring& table_name) {
+    static constexpr auto cdc_desc_suffix = "_scylla_cdc_desc";
+    return table_name + cdc_desc_suffix;
+}
+
+static schema_ptr create_log_schema(const schema& s, std::optional<utils::UUID> uuid) {
+    schema_builder b(s.ks_name(), log_name(s.cf_name()));
+    b.set_comment(sprint("CDC log for %s.%s", s.ks_name(), s.cf_name()));
+    b.with_column("stream_id", uuid_type, column_kind::partition_key);
+    b.with_column("time", timeuuid_type, column_kind::clustering_key);
+    b.with_column("batch_seq_no", int32_type, column_kind::clustering_key);
+    b.with_column("operation", data_type_for<operation_native_type>());
+    b.with_column("ttl", long_type);
+    auto add_columns = [&] (const schema::const_iterator_range_type& columns, bool is_data_col = false) {
+        for (const auto& column : columns) {
+            auto type = column.type;
+            if (is_data_col) {
+                type = tuple_type_impl::get_instance({ /* op */ data_type_for<column_op_native_type>(), /* value */ type, /* ttl */long_type});
+            }
+            b.with_column("_" + column.name(), type);
+        }
+    };
+    add_columns(s.partition_key_columns());
+    add_columns(s.clustering_key_columns());
+    add_columns(s.static_columns(), true);
+    add_columns(s.regular_columns(), true);
+
+    if (uuid) {
+        b.set_uuid(*uuid);
+    }
+    
+    return b.build();
+}
+
+static schema_ptr create_stream_description_table_schema(const schema& s, std::optional<utils::UUID> uuid) {
+    schema_builder b(s.ks_name(), desc_name(s.cf_name()));
+    b.set_comment(sprint("CDC description for %s.%s", s.ks_name(), s.cf_name()));
+    b.with_column("node_ip", inet_addr_type, column_kind::partition_key);
+    b.with_column("shard_id", int32_type, column_kind::partition_key);
+    b.with_column("created_at", timestamp_type, column_kind::clustering_key);
+    b.with_column("stream_id", uuid_type);
+
+    if (uuid) {
+        b.set_uuid(*uuid);
+    }
+
+    return b.build();
+}
+
+// This function assumes setup_stream_description_table was called on |s| before the call to this
+// function.
+static future<> populate_desc(db_context ctx, const schema& s) {
+    auto& db = ctx._proxy.get_db().local();
+    auto desc_schema =
+        db.find_schema(s.ks_name(), desc_name(s.cf_name()));
+    auto log_schema =
+        db.find_schema(s.ks_name(), log_name(s.cf_name()));
+    auto belongs_to = [&](const gms::inet_address& endpoint,
+                          const unsigned int shard_id,
+                          const int shard_count,
+                          const unsigned int ignore_msb_bits,
+                          const utils::UUID& stream_id) {
+        const auto log_pk = partition_key::from_singular(*log_schema,
+                                                         data_value(stream_id));
+        const auto token = ctx._partitioner.decorate_key(*log_schema, log_pk).token();
+        if (ctx._token_metadata.get_endpoint(ctx._token_metadata.first_token(token)) != endpoint) {
+            return false;
+        }
+        const auto owning_shard_id = dht::murmur3_partitioner(shard_count, ignore_msb_bits).shard_of(token);
+        return owning_shard_id == shard_id;
+    };
+
+    std::vector<mutation> mutations;
+    const auto ts = api::new_timestamp();
+    const auto ck = clustering_key::from_single_value(
+            *desc_schema, timestamp_type->decompose(ts));
+    auto cdef = desc_schema->get_column_definition(to_bytes("stream_id"));
+
+    for (const auto& dc : ctx._token_metadata.get_topology().get_datacenter_endpoints()) {
+        for (const auto& endpoint : dc.second) {
+            const auto decomposed_ip = inet_addr_type->decompose(endpoint.addr());
+            const unsigned int shard_count = ctx._snitch->get_shard_count(endpoint);
+            const unsigned int ignore_msb_bits = ctx._snitch->get_ignore_msb_bits(endpoint);
+            for (unsigned int shard_id = 0; shard_id < shard_count; ++shard_id) {
+                const auto pk = partition_key::from_exploded(
+                        *desc_schema, { decomposed_ip, int32_type->decompose(static_cast<int>(shard_id)) });
+                mutations.emplace_back(desc_schema, pk);
+
+                auto stream_id = utils::make_random_uuid();
+                while (!belongs_to(endpoint, shard_id, shard_count, ignore_msb_bits, stream_id)) {
+                    stream_id = utils::make_random_uuid();
+                }
+                auto value = atomic_cell::make_live(*uuid_type,
+                                                    ts,
+                                                    uuid_type->decompose(stream_id));
+                mutations.back().set_cell(ck, *cdef, std::move(value));
+            }
+        }
+    }
+    return ctx._proxy.mutate(std::move(mutations),
+                             db::consistency_level::QUORUM,
+                             db::no_timeout,
+                             nullptr,
+                             empty_service_permit());
+}
+
+db_context::builder::builder(service::storage_proxy& proxy) 
+    : _proxy(proxy) 
+{}
+
+db_context::builder& db_context::builder::with_migration_notifier(service::migration_notifier& migration_notifier) {
+    _migration_notifier = migration_notifier;
+    return *this;
+}
+
+db_context::builder& db_context::builder::with_token_metadata(locator::token_metadata& token_metadata) {
+    _token_metadata = token_metadata;
+    return *this;
+}
+
+db_context::builder& db_context::builder::with_snitch(locator::snitch_ptr& snitch) {
+    _snitch = snitch;
+    return *this;
+}
+
+db_context::builder& db_context::builder::with_partitioner(dht::i_partitioner& partitioner) {
+    _partitioner = partitioner;
+    return *this;
+}
+
+db_context db_context::builder::build() {
+    return db_context{
+        _proxy,
+        _migration_notifier ? _migration_notifier->get() : service::get_local_storage_service().get_migration_notifier(),
+        _token_metadata ? _token_metadata->get() : service::get_local_storage_service().get_token_metadata(),
+        _snitch ? _snitch->get() : locator::i_endpoint_snitch::get_local_snitch_ptr(),
+        _partitioner ? _partitioner->get() : dht::global_partitioner()
+    };
+}
+
+class transformer final {
+public:
+    using streams_type = std::unordered_map<std::pair<net::inet_address, unsigned int>, utils::UUID>;
+private:
+    db_context _ctx;
+    schema_ptr _schema;
+    schema_ptr _log_schema;
+    utils::UUID _time;
+    bytes _decomposed_time;
+    ::shared_ptr<const transformer::streams_type> _streams;
+    const column_definition& _op_col;
+    ttl_opt _cdc_ttl_opt;
+
+    clustering_key set_pk_columns(const partition_key& pk, int batch_no, mutation& m) const {
+        const auto log_ck = clustering_key::from_exploded(
+                *m.schema(), { _decomposed_time, int32_type->decompose(batch_no) });
+        auto pk_value = pk.explode(*_schema);
+        size_t pos = 0;
+        for (const auto& column : _schema->partition_key_columns()) {
+            assert (pos < pk_value.size());
+            auto cdef = m.schema()->get_column_definition(to_bytes("_" + column.name()));
+            auto value = atomic_cell::make_live(*column.type,
+                                                _time.timestamp(),
+                                                bytes_view(pk_value[pos]),
+                                                _cdc_ttl_opt);
+            m.set_cell(log_ck, *cdef, std::move(value));
+            ++pos;
+        }
+        return log_ck;
+    }
+
+    void set_operation(const clustering_key& ck, operation op, mutation& m) const {
+        m.set_cell(ck, _op_col, atomic_cell::make_live(*_op_col.type, _time.timestamp(), _op_col.type->decompose(operation_native_type(op)), _cdc_ttl_opt));
+    }
+
+    partition_key stream_id(const net::inet_address& ip, unsigned int shard_id) const {
+        auto it = _streams->find(std::make_pair(ip, shard_id));
+        if (it == std::end(*_streams)) {
+                throw std::runtime_error(format("No stream found for node {} and shard {}", ip, shard_id));
+        }
+        return partition_key::from_exploded(*_log_schema, { uuid_type->decompose(it->second) });
+    }
+public:
+    transformer(db_context ctx, schema_ptr s, ::shared_ptr<const transformer::streams_type> streams)
+        : _ctx(ctx)
+        , _schema(std::move(s))
+        , _log_schema(ctx._proxy.get_db().local().find_schema(_schema->ks_name(), log_name(_schema->cf_name())))
+        , _time(utils::UUID_gen::get_time_UUID())
+        , _decomposed_time(timeuuid_type->decompose(_time))
+        , _streams(std::move(streams))
+        , _op_col(*_log_schema->get_column_definition(to_bytes("operation")))
+    {
+        if (_schema->cdc_options().ttl()) {
+            _cdc_ttl_opt = std::chrono::seconds(_schema->cdc_options().ttl());
+        }
+    }
+
+    // TODO: is pre-image data based on query enough. We only have actual column data. Do we need
+    // more details like tombstones/ttl? Probably not but keep in mind.
+    mutation transform(const mutation& m, const cql3::untyped_result_set* rs = nullptr) const {
+        auto& t = m.token();
+        auto&& ep = _ctx._token_metadata.get_endpoint(
+                _ctx._token_metadata.first_token(t));
+        if (!ep) {
+            throw std::runtime_error(format("No owner found for key {}", m.decorated_key()));
+        }
+        auto shard_id = dht::murmur3_partitioner(_ctx._snitch->get_shard_count(*ep), _ctx._snitch->get_ignore_msb_bits(*ep)).shard_of(t);
+        mutation res(_log_schema, stream_id(ep->addr(), shard_id));
+        auto& p = m.partition();
+        if (p.partition_tombstone()) {
+            // Partition deletion
+            auto log_ck = set_pk_columns(m.key(), 0, res);
+            set_operation(log_ck, operation::partition_delete, res);
+        } else if (!p.row_tombstones().empty()) {
+            // range deletion
+            int batch_no = 0;
+            for (auto& rt : p.row_tombstones()) {
+                auto set_bound = [&] (const clustering_key& log_ck, const clustering_key_prefix& ckp) {
+                    auto exploded = ckp.explode(*_schema);
+                    size_t pos = 0;
+                    for (const auto& column : _schema->clustering_key_columns()) {
+                        if (pos >= exploded.size()) {
+                            break;
+                        }
+                        auto cdef = _log_schema->get_column_definition(to_bytes("_" + column.name()));
+                        auto value = atomic_cell::make_live(*column.type,
+                                                            _time.timestamp(),
+                                                            bytes_view(exploded[pos]),
+                                                            _cdc_ttl_opt);
+                        res.set_cell(log_ck, *cdef, std::move(value));
+                        ++pos;
+                    }
+                };
+                {
+                    auto log_ck = set_pk_columns(m.key(), batch_no, res);
+                    set_bound(log_ck, rt.start);
+                    // TODO: separate inclusive/exclusive range
+                    set_operation(log_ck, operation::range_delete_start, res);
+                    ++batch_no;
+                }
+                {
+                    auto log_ck = set_pk_columns(m.key(), batch_no, res);
+                    set_bound(log_ck, rt.end);
+                    // TODO: separate inclusive/exclusive range
+                    set_operation(log_ck, operation::range_delete_end, res);
+                    ++batch_no;
+                }
+            }
+        } else {
+            // should be update or deletion
+            int batch_no = 0;
+            for (const rows_entry& r : p.clustered_rows()) {
+                auto ck_value = r.key().explode(*_schema);
+
+                std::optional<clustering_key> pikey;
+                const cql3::untyped_result_set_row * pirow = nullptr;
+
+                if (rs) {
+                    for (auto& utr : *rs) {
+                        bool match = true;
+                        for (auto& c : _schema->clustering_key_columns()) {
+                            auto rv = utr.get_view(c.name_as_text());
+                            auto cv = r.key().get_component(*_schema, c.component_index());
+                            if (rv != cv) {
+                                match = false;
+                                break;
+                            }
+                        }
+                        if (match) {
+                            pikey = set_pk_columns(m.key(), batch_no, res);
+                            set_operation(*pikey, operation::pre_image, res);
+                            pirow = &utr;
+                            ++batch_no;
+                            break;
+                        }
+                    }
+                }
+
+                auto log_ck = set_pk_columns(m.key(), batch_no, res);
+
+                size_t pos = 0;
+                for (const auto& column : _schema->clustering_key_columns()) {
+                    assert (pos < ck_value.size());
+                    auto cdef = _log_schema->get_column_definition(to_bytes("_" + column.name()));
+                    res.set_cell(log_ck, *cdef, atomic_cell::make_live(*column.type, _time.timestamp(), bytes_view(ck_value[pos]), _cdc_ttl_opt));
+
+                    if (pirow) {
+                        assert(pirow->has(column.name_as_text()));
+                        res.set_cell(*pikey, *cdef, atomic_cell::make_live(*column.type, _time.timestamp(), bytes_view(ck_value[pos]), _cdc_ttl_opt));
+                    }
+
+                    ++pos;
+                }
+
+                std::vector<bytes_opt> values(3);
+
+                auto process_cells = [&](const row& r, column_kind ckind) {
+                    r.for_each_cell([&](column_id id, const atomic_cell_or_collection& cell) {
+                        auto& cdef = _schema->column_at(ckind, id);
+                        auto* dst = _log_schema->get_column_definition(to_bytes("_" + cdef.name()));
+                        // todo: collections.
+                        if (cdef.is_atomic()) {
+                            column_op op;
+
+                            values[1] = values[2] = std::nullopt;
+                            auto view = cell.as_atomic_cell(cdef);
+                            if (view.is_live()) {
+                                op = column_op::set;
+                                values[1] = view.value().linearize();
+                                if (view.is_live_and_has_ttl()) {
+                                    values[2] = long_type->decompose(data_value(view.ttl().count()));
+                                }
+                            } else {
+                                op = column_op::del;
+                            }
+
+                            values[0] = data_type_for<column_op_native_type>()->decompose(data_value(static_cast<column_op_native_type>(op)));
+                            res.set_cell(log_ck, *dst, atomic_cell::make_live(*dst->type, _time.timestamp(), tuple_type_impl::build_value(values), _cdc_ttl_opt));
+
+                            if (pirow && pirow->has(cdef.name_as_text())) {
+                                values[0] = data_type_for<column_op_native_type>()->decompose(data_value(static_cast<column_op_native_type>(column_op::set)));
+                                values[1] = pirow->get_blob(cdef.name_as_text());
+                                values[2] = std::nullopt;
+
+                                assert(std::addressof(res.partition().clustered_row(*_log_schema, *pikey)) != std::addressof(res.partition().clustered_row(*_log_schema, log_ck)));
+                                assert(pikey->explode() != log_ck.explode());
+                                res.set_cell(*pikey, *dst, atomic_cell::make_live(*dst->type, _time.timestamp(), tuple_type_impl::build_value(values), _cdc_ttl_opt));
+                            }
+                        } else {
+                            cdc_log.warn("Non-atomic cell ignored {}.{}:{}", _schema->ks_name(), _schema->cf_name(), cdef.name_as_text());
+                        }
+                    });
+                };
+
+                process_cells(r.row().cells(), column_kind::regular_column);
+                process_cells(p.static_row().get(), column_kind::static_column);
+
+                set_operation(log_ck, operation::update, res);
+                ++batch_no;
+            }
+        }
+
+        return res;
+    }
+
+    static db::timeout_clock::time_point default_timeout() {
+        return db::timeout_clock::now() + 10s;
+    }
+
+    future<lw_shared_ptr<cql3::untyped_result_set>> pre_image_select(
+            service::client_state& client_state,
+            db::consistency_level cl,
+            const mutation& m)
+    {
+        auto& p = m.partition();
+        if (p.partition_tombstone() || !p.row_tombstones().empty() || p.clustered_rows().empty()) {
+            return make_ready_future<lw_shared_ptr<cql3::untyped_result_set>>();
+        }
+
+        dht::partition_range_vector partition_ranges{dht::partition_range(m.decorated_key())};
+
+        auto&& pc = _schema->partition_key_columns();
+        auto&& cc = _schema->clustering_key_columns();
+
+        std::vector<query::clustering_range> bounds;
+        if (cc.empty()) {
+            bounds.push_back(query::clustering_range::make_open_ended_both_sides());
+        } else {
+            for (const rows_entry& r : p.clustered_rows()) {
+                auto& ck = r.key();
+                bounds.push_back(query::clustering_range::make_singular(ck));
+            }
+        }
+
+        std::vector<const column_definition*> columns;
+        columns.reserve(_schema->all_columns().size());
+
+        std::transform(pc.begin(), pc.end(), std::back_inserter(columns), [](auto& c) { return &c; });
+        std::transform(cc.begin(), cc.end(), std::back_inserter(columns), [](auto& c) { return &c; });
+
+        query::column_id_vector static_columns, regular_columns;
+
+        auto sk = column_kind::static_column;
+        auto rk = column_kind::regular_column;
+        // TODO: this assumes all mutations touch the same set of columns. This might not be true, and we may need to do more horrible set operation here.
+        for (auto& [r, cids, kind] : { std::tie(p.static_row().get(), static_columns, sk), std::tie(p.clustered_rows().begin()->row().cells(), regular_columns, rk) }) {
+            r.for_each_cell([&](column_id id, const atomic_cell_or_collection&) {
+                auto& cdef =_schema->column_at(kind, id);
+                cids.emplace_back(id);
+                columns.emplace_back(&cdef);
+            });
+        }
+
+        auto selection = cql3::selection::selection::for_columns(_schema, std::move(columns));
+        auto partition_slice = query::partition_slice(std::move(bounds), std::move(static_columns), std::move(regular_columns), selection->get_query_options());
+        auto command = ::make_lw_shared<query::read_command>(_schema->id(), _schema->version(), partition_slice, query::max_partitions);
+
+        return _ctx._proxy.query(_schema, std::move(command), std::move(partition_ranges), cl, service::storage_proxy::coordinator_query_options(default_timeout(), empty_service_permit(), client_state)).then(
+                [s = _schema, partition_slice = std::move(partition_slice), selection = std::move(selection)] (service::storage_proxy::coordinator_query_result qr) -> lw_shared_ptr<cql3::untyped_result_set> {
+                    cql3::selection::result_set_builder builder(*selection, gc_clock::now(), cql_serialization_format::latest());
+                    query::result_view::consume(*qr.query_result, partition_slice, cql3::selection::result_set_builder::visitor(builder, *s, *selection));
+                    auto result_set = builder.build();
+                    if (!result_set || result_set->empty()) {
+                        return {};
+                    }
+                    return make_lw_shared<cql3::untyped_result_set>(*result_set);
+        });
+    }
+};
+
+// This class is used to build a mapping from <node ip, shard id> to stream_id
+// It is used as a consumer for rows returned by the query to CDC Description Table
+class streams_builder {
+    const schema& _schema;
+    transformer::streams_type _streams;
+    net::inet_address _node_ip = net::inet_address();
+    unsigned int _shard_id = 0;
+    api::timestamp_type _latest_row_timestamp = api::min_timestamp;
+    utils::UUID _latest_row_stream_id = utils::UUID();
+public:
+    streams_builder(const schema& s) : _schema(s) {}
+
+    void accept_new_partition(const partition_key& key, uint32_t row_count) {
+        auto exploded = key.explode(_schema);
+        _node_ip = value_cast<net::inet_address>(inet_addr_type->deserialize(exploded[0]));
+        _shard_id = static_cast<unsigned int>(value_cast<int>(int32_type->deserialize(exploded[1])));
+        _latest_row_timestamp = api::min_timestamp;
+        _latest_row_stream_id = utils::UUID();
+    }
+
+    void accept_new_partition(uint32_t row_count) {
+        assert(false);
+    }
+
+    void accept_new_row(
+            const clustering_key& key,
+            const query::result_row_view& static_row,
+            const query::result_row_view& row) {
+        auto row_iterator = row.iterator();
+        api::timestamp_type timestamp = value_cast<db_clock::time_point>(
+                timestamp_type->deserialize(key.explode(_schema)[0])).time_since_epoch().count();
+        if (timestamp <= _latest_row_timestamp) {
+            return;
+        }
+        _latest_row_timestamp = timestamp;
+        for (auto&& cdef : _schema.regular_columns()) {
+            if (cdef.name_as_text() != "stream_id") {
+                row_iterator.skip(cdef);
+                continue;
+            }
+            auto val_opt = row_iterator.next_atomic_cell();
+            assert(val_opt);
+            val_opt->value().with_linearized([&] (bytes_view bv) {
+                _latest_row_stream_id = value_cast<utils::UUID>(uuid_type->deserialize(bv));
+            });
+        }
+    }
+
+    void accept_new_row(const query::result_row_view& static_row, const query::result_row_view& row) {
+        assert(false);
+    }
+
+    void accept_partition_end(const query::result_row_view& static_row) {
+        _streams.emplace(std::make_pair(_node_ip, _shard_id), _latest_row_stream_id);
+    }
+
+    transformer::streams_type build() {
+        return std::move(_streams);
+    }
+};
+
+static future<::shared_ptr<transformer::streams_type>> get_streams(
+        db_context ctx,
+        const sstring& ks_name,
+        const sstring& cf_name,
+        lowres_clock::time_point timeout,
+        service::query_state& qs) {
+    auto s =
+        ctx._proxy.get_db().local().find_schema(ks_name, desc_name(cf_name));
+    query::read_command cmd(
+            s->id(),
+            s->version(),
+            partition_slice_builder(*s).with_no_static_columns().build());
+    return ctx._proxy.query(
+            s,
+            make_lw_shared(std::move(cmd)),
+            {dht::partition_range::make_open_ended_both_sides()},
+            db::consistency_level::QUORUM,
+            {timeout, qs.get_permit(), qs.get_client_state()}).then([s = std::move(s)] (auto qr) mutable {
+        return query::result_view::do_with(*qr.query_result,
+                [s = std::move(s)] (query::result_view v) {
+            auto slice = partition_slice_builder(*s)
+                    .with_no_static_columns()
+                    .build();
+            streams_builder builder{ *s };
+            v.consume(slice, builder);
+            return ::make_shared<transformer::streams_type>(builder.build());
+        });
+    });
+}
+
+template <typename Func>
+future<std::vector<mutation>>
+transform_mutations(std::vector<mutation>& muts, decltype(muts.size()) batch_size, Func&& f) {
+    return parallel_for_each(
+            boost::irange(static_cast<decltype(muts.size())>(0), muts.size(), batch_size),
+            std::move(f))
+        .then([&muts] () mutable { return std::move(muts); });
+}
+
+} // namespace cdc
+
+future<std::tuple<std::vector<mutation>, cdc::result_callback>>
+cdc::cdc_service::impl::augment_mutation_call(lowres_clock::time_point timeout, std::vector<mutation>&& mutations) {
+    // we do all this because in the case of batches, we can have mixed schemas.
+    auto e = mutations.end();
+    auto i = std::find_if(mutations.begin(), e, [](const mutation& m) {
+        return m.schema()->cdc_options().enabled();
+    });
+
+    if (i == e) {
+        return make_ready_future<std::tuple<std::vector<mutation>, cdc::result_callback>>(std::make_tuple(std::move(mutations), result_callback{}));
+    }
+
+    mutations.reserve(2 * mutations.size());
+
+    return do_with(std::move(mutations), service::query_state(service::client_state::for_internal_calls(), empty_service_permit()), [this, timeout, i](std::vector<mutation>& mutations, service::query_state& qs) {
+        return transform_mutations(mutations, 1, [this, &mutations, timeout, &qs] (int idx) {
+            auto& m = mutations[idx];
+            auto s = m.schema();
+
+            if (!s->cdc_options().enabled()) {
+                return make_ready_future<>();
+            }
+            // for batches/multiple mutations this is super inefficient. either partition the mutation set by schema
+            // and re-use streams, or probably better: add a cache so this lookup is a noop on second mutation
+            return get_streams(_ctxt, s->ks_name(), s->cf_name(), timeout, qs).then([this, s = std::move(s), &qs, &mutations, idx](::shared_ptr<transformer::streams_type> streams) mutable {
+                auto& m = mutations[idx]; // should not really need because of reserve, but lets be conservative
+                transformer trans(_ctxt, s, streams);
+
+                if (!s->cdc_options().preimage()) {
+                    mutations.emplace_back(trans.transform(m));
+                    return make_ready_future<>();
+                }
+
+                // Note: further improvement here would be to coalesce the pre-image selects into one
+                // iff a batch contains several modifications to the same table. Otoh, batch is rare(?)
+                // so this is premature.
+                auto f = trans.pre_image_select(qs.get_client_state(), db::consistency_level::LOCAL_QUORUM, m);
+                return f.then([trans = std::move(trans), &mutations, idx] (lw_shared_ptr<cql3::untyped_result_set> rs) mutable {
+                    mutations.push_back(trans.transform(mutations[idx], rs.get()));
+                });
+            });
+        }).then([](std::vector<mutation> mutations) {
+            return make_ready_future<std::tuple<std::vector<mutation>, cdc::result_callback>>(std::make_tuple(std::move(mutations), result_callback{}));
+        });
+    });
+}
+
+bool cdc::cdc_service::needs_cdc_augmentation(const std::vector<mutation>& mutations) const {
+    return std::any_of(mutations.begin(), mutations.end(), [](const mutation& m) {
+        return m.schema()->cdc_options().enabled();
+    });
+}
+
+future<std::tuple<std::vector<mutation>, cdc::result_callback>>
+cdc::cdc_service::augment_mutation_call(lowres_clock::time_point timeout, std::vector<mutation>&& mutations) {
+    return _impl->augment_mutation_call(timeout, std::move(mutations));
+}
--- a/cdc/cdc.hh
+++ b/cdc/cdc.hh
@@ -0,0 +1,142 @@
+/*
+ * Copyright (C) 2019 ScyllaDB
+ */
+
+/*
+ * This file is part of Scylla.
+ *
+ * Scylla is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Scylla is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <functional>
+#include <optional>
+#include <map>
+#include <string>
+#include <vector>
+
+#include <seastar/core/future.hh>
+#include <seastar/core/lowres_clock.hh>
+#include <seastar/core/shared_ptr.hh>
+#include <seastar/core/sstring.hh>
+
+#include "exceptions/exceptions.hh"
+#include "timestamp.hh"
+#include "cdc_options.hh"
+
+class schema;
+using schema_ptr = seastar::lw_shared_ptr<const schema>;
+
+namespace locator {
+
+class snitch_ptr;
+class token_metadata;
+
+} // namespace locator
+
+namespace service {
+
+class migration_notifier;
+class storage_proxy;
+class query_state;
+
+} // namespace service
+
+namespace dht {
+
+class i_partitioner;
+
+} // namespace dht
+
+class mutation;
+class partition_key;
+
+namespace cdc {
+
+class db_context;
+
+// Callback to be invoked on mutation finish to fix
+// the whole bit about post-image.
+// TODO: decide on what the parameters are to be for this.
+using result_callback = std::function<future<>()>;
+
+/// \brief CDC service, responsible for schema listeners
+///
+/// CDC service will listen for schema changes and iff CDC is enabled/changed
+/// create/modify/delete corresponding log tables etc as part of the schema change. 
+///
+class cdc_service {
+    class impl;
+    std::unique_ptr<impl> _impl;
+public:
+    future<> stop();
+    cdc_service(service::storage_proxy&);
+    cdc_service(db_context);
+    ~cdc_service();
+
+    // If any of the mutations are cdc enabled, optionally selects preimage, and adds the
+    // appropriate augments to set the log entries.
+    // Iff post-image is enabled for any of these, a non-empty callback is also
+    // returned to be invoked post the mutation query.
+    future<std::tuple<std::vector<mutation>, result_callback>> augment_mutation_call(
+        lowres_clock::time_point timeout,
+        std::vector<mutation>&& mutations
+        );
+    bool needs_cdc_augmentation(const std::vector<mutation>&) const;
+};
+
+struct db_context final {
+    service::storage_proxy& _proxy;
+    service::migration_notifier& _migration_notifier;
+    locator::token_metadata& _token_metadata;
+    locator::snitch_ptr& _snitch;
+    dht::i_partitioner& _partitioner;
+
+    class builder final {
+        service::storage_proxy& _proxy;
+        std::optional<std::reference_wrapper<service::migration_notifier>> _migration_notifier;
+        std::optional<std::reference_wrapper<locator::token_metadata>> _token_metadata;
+        std::optional<std::reference_wrapper<locator::snitch_ptr>> _snitch;
+        std::optional<std::reference_wrapper<dht::i_partitioner>> _partitioner;
+    public:
+        builder(service::storage_proxy& proxy);
+
+        builder& with_migration_notifier(service::migration_notifier& migration_notifier);
+        builder& with_token_metadata(locator::token_metadata& token_metadata);
+        builder& with_snitch(locator::snitch_ptr& snitch);
+        builder& with_partitioner(dht::i_partitioner& partitioner);
+
+        db_context build();
+    };
+};
+
+// cdc log table operation
+enum class operation : int8_t {
+    // note: these values will eventually be read by a third party, probably not privvy to this
+    // enum decl, so don't change the constant values (or the datatype).
+    pre_image = 0, update = 1, row_delete = 2, range_delete_start = 3, range_delete_end = 4, partition_delete = 5
+};
+
+// cdc log data column operation
+enum class column_op : int8_t {
+    // same as "operation". Do not edit values or type/type unless you _really_ want to.
+    set = 0, del = 1, add = 2,
+};
+
+seastar::sstring log_name(const seastar::sstring& table_name);
+
+seastar::sstring desc_name(const seastar::sstring& table_name);
+
+} // namespace cdc
--- a/cdc/cdc_extension.hh
+++ b/cdc/cdc_extension.hh
@@ -1,52 +0,0 @@
-/*
- * Copyright 2020 ScyllaDB
- */
-/*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#pragma once
-
-#include "serializer.hh"
-#include "db/extensions.hh"
-#include "cdc/cdc_options.hh"
-#include "schema.hh"
-
-namespace cdc {
-
-class cdc_extension : public schema_extension {
-    cdc::options _cdc_options;
-public:
-    static constexpr auto NAME = "cdc";
-
-    cdc_extension() = default;
-    explicit cdc_extension(std::map<sstring, sstring> tags) : _cdc_options(std::move(tags)) {}
-    explicit cdc_extension(const bytes& b) : _cdc_options(cdc_extension::deserialize(b)) {}
-    explicit cdc_extension(const sstring& s) {
-        throw std::logic_error("Cannot create cdc info from string");
-    }
-    bytes serialize() const override {
-        return ser::serialize_to_buffer<bytes>(_cdc_options.to_map());
-    }
-    static std::map<sstring, sstring> deserialize(const bytes_view& buffer) {
-        return ser::deserialize_from_buffer(buffer, boost::type<std::map<sstring, sstring>>());
-    }
-    const options& get_options() const {
-        return _cdc_options;
-    }
-};
-
-}
--- a/cdc/cdc_partitioner.cc
+++ b/cdc/cdc_partitioner.cc
@@ -1,65 +0,0 @@
-/*
- * Copyright (C) 2020 ScyllaDB
- */
-
-/*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include "cdc_partitioner.hh"
-#include "dht/token.hh"
-#include "schema.hh"
-#include "sstables/key.hh"
-#include "utils/class_registrator.hh"
-#include "cdc/generation.hh"
-#include "keys.hh"
-
-static const sstring cdc_partitioner_name = "com.scylladb.dht.CDCPartitioner";
-
-namespace cdc {
-
-const sstring cdc_partitioner::name() const {
-    return cdc_partitioner_name;
-}
-
-static dht::token to_token(int64_t value) {
-    return dht::token(dht::token::kind::key, value);
-}
-
-static dht::token to_token(bytes_view key) {
-    // Key should be 16 B long, of which first 8 B are used for token calculation
-    if (key.size() != 2*sizeof(int64_t)) {
-        return dht::minimum_token();
-    }
-    return to_token(stream_id::token_from_bytes(key));
-}
-
-dht::token
-cdc_partitioner::get_token(const sstables::key_view& key) const {
-    return to_token(bytes_view(key));
-}
-
-dht::token
-cdc_partitioner::get_token(const schema& s, partition_key_view key) const {
-    auto exploded_key = key.explode(s);
-    return to_token(exploded_key[0]);
-}
-
-using registry = class_registrator<dht::i_partitioner, cdc_partitioner>;
-static registry registrator(cdc_partitioner_name);
-static registry registrator_short_name("CDCPartitioner");
-
-}
--- a/cdc/cdc_partitioner.hh
+++ b/cdc/cdc_partitioner.hh
@@ -1,48 +0,0 @@
-/*
- * Copyright (C) 2020 ScyllaDB
- */
-
-/*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#pragma once
-
-#include <seastar/core/sstring.hh>
-
-#include "bytes.hh"
-#include "dht/i_partitioner.hh"
-
-class schema;
-class partition_key_view;
-
-namespace sstables {
-
-class key_view;
-
-}
-
-namespace cdc {
-
-struct cdc_partitioner final : public dht::i_partitioner {
-    cdc_partitioner() = default;
-    virtual const sstring name() const override;
-    virtual dht::token get_token(const schema& s, partition_key_view key) const override;
-    virtual dht::token get_token(const sstables::key_view& key) const override;
-};
-
-
-}
--- a/cdc/generation.cc
+++ b/cdc/generation.cc
@@ -1,331 +0,0 @@
-/*
- * Copyright (C) 2019 ScyllaDB
- */
-
-/*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <boost/type.hpp>
-#include <random>
-#include <unordered_set>
-#include <seastar/core/sleep.hh>
-
-#include "keys.hh"
-#include "schema_builder.hh"
-#include "db/config.hh"
-#include "db/system_keyspace.hh"
-#include "db/system_distributed_keyspace.hh"
-#include "dht/token-sharding.hh"
-#include "locator/token_metadata.hh"
-#include "gms/application_state.hh"
-#include "gms/inet_address.hh"
-#include "gms/gossiper.hh"
-
-#include "cdc/generation.hh"
-
-extern logging::logger cdc_log;
-
-static int get_shard_count(const gms::inet_address& endpoint, const gms::gossiper& g) {
-    auto ep_state = g.get_application_state_ptr(endpoint, gms::application_state::SHARD_COUNT);
-    return ep_state ? std::stoi(ep_state->value) : -1;
-}
-
-static unsigned get_sharding_ignore_msb(const gms::inet_address& endpoint, const gms::gossiper& g) {
-    auto ep_state = g.get_application_state_ptr(endpoint, gms::application_state::IGNORE_MSB_BITS);
-    return ep_state ? std::stoi(ep_state->value) : 0;
-}
-
-namespace cdc {
-
-extern const api::timestamp_clock::duration generation_leeway =
-    std::chrono::duration_cast<api::timestamp_clock::duration>(std::chrono::seconds(5));
-
-static void copy_int_to_bytes(int64_t i, size_t offset, bytes& b) {
-    i = net::hton(i);
-    std::copy_n(reinterpret_cast<int8_t*>(&i), sizeof(int64_t), b.begin() + offset);
-}
-
-stream_id::stream_id(int64_t first, int64_t second)
-    : _value(bytes::initialized_later(), 2 * sizeof(int64_t))
-{
-    copy_int_to_bytes(first, 0, _value);
-    copy_int_to_bytes(second, sizeof(int64_t), _value);
-}
-
-stream_id::stream_id(bytes b) : _value(std::move(b)) { }
-
-bool stream_id::is_set() const {
-    return !_value.empty();
-}
-
-bool stream_id::operator==(const stream_id& o) const {
-    return _value == o._value;
-}
-
-bool stream_id::operator<(const stream_id& o) const {
-    return _value < o._value;
-}
-
-static int64_t bytes_to_int64(bytes_view b, size_t offset) {
-    assert(b.size() >= offset + sizeof(int64_t));
-    int64_t res;
-    std::copy_n(b.begin() + offset, sizeof(int64_t), reinterpret_cast<int8_t *>(&res));
-    return net::ntoh(res);
-}
-
-int64_t stream_id::first() const {
-    return token_from_bytes(_value);
-}
-
-int64_t stream_id::second() const {
-    return bytes_to_int64(_value, sizeof(int64_t));
-}
-
-int64_t stream_id::token_from_bytes(bytes_view b) {
-    return bytes_to_int64(b, 0);
-}
-
-const bytes& stream_id::to_bytes() const {
-    return _value;
-}
-
-partition_key stream_id::to_partition_key(const schema& log_schema) const {
-    return partition_key::from_single_value(log_schema, _value);
-}
-
-bool token_range_description::operator==(const token_range_description& o) const {
-    return token_range_end == o.token_range_end && streams == o.streams
-        && sharding_ignore_msb == o.sharding_ignore_msb;
-}
-
-topology_description::topology_description(std::vector<token_range_description> entries)
-    : _entries(std::move(entries)) {}
-
-bool topology_description::operator==(const topology_description& o) const {
-    return _entries == o._entries;
-}
-
-const std::vector<token_range_description>& topology_description::entries() const {
-    return _entries;
-}
-
-static stream_id create_stream_id(dht::token t) {
-    static thread_local std::mt19937_64 rand_gen(std::random_device().operator()());
-    static thread_local std::uniform_int_distribution<int64_t> rand_dist(std::numeric_limits<int64_t>::min());
-
-    return {dht::token::to_int64(t), rand_dist(rand_gen)};
-}
-
-class topology_description_generator final {
-    const db::config& _cfg;
-    const std::unordered_set<dht::token>& _bootstrap_tokens;
-    const locator::token_metadata& _token_metadata;
-    const gms::gossiper& _gossiper;
-
-    // Compute a set of tokens that split the token ring into vnodes
-    auto get_tokens() const {
-        auto tokens = _token_metadata.sorted_tokens();
-        auto it = tokens.insert(
-                tokens.end(), _bootstrap_tokens.begin(), _bootstrap_tokens.end());
-        std::sort(it, tokens.end());
-        std::inplace_merge(tokens.begin(), it, tokens.end());
-        tokens.erase(std::unique(tokens.begin(), tokens.end()), tokens.end());
-        return tokens;
-    }
-
-    // Fetch sharding parameters for a node that owns vnode ending with this.end
-    // Returns <shard_count, ignore_msb> pair.
-    std::pair<size_t, uint8_t> get_sharding_info(dht::token end) const {
-        if (_bootstrap_tokens.count(end) > 0) {
-            return {smp::count, _cfg.murmur3_partitioner_ignore_msb_bits()};
-        } else {
-            auto endpoint = _token_metadata.get_endpoint(end);
-            if (!endpoint) {
-                throw std::runtime_error(
-                        format("Can't find endpoint for token {}", end));
-            }
-            auto sc = get_shard_count(*endpoint, _gossiper);
-            return {sc > 0 ? sc : 1, get_sharding_ignore_msb(*endpoint, _gossiper)};
-        }
-    }
-
-    token_range_description create_description(dht::token start, dht::token end) const {
-        token_range_description desc;
-
-        desc.token_range_end = end;
-
-        auto [shard_count, ignore_msb] = get_sharding_info(end);
-        desc.streams.reserve(shard_count);
-        desc.sharding_ignore_msb = ignore_msb;
-
-        dht::sharder sharder(shard_count, ignore_msb);
-        for (size_t shard_idx = 0; shard_idx < shard_count; ++shard_idx) {
-            auto t = dht::find_first_token_for_shard(sharder, start, end, shard_idx);
-            desc.streams.push_back(create_stream_id(t));
-        }
-
-        return desc;
-    }
-public:
-    topology_description_generator(
-            const db::config& cfg,
-            const std::unordered_set<dht::token>& bootstrap_tokens,
-            const locator::token_metadata& token_metadata,
-            const gms::gossiper& gossiper)
-        : _cfg(cfg)
-        , _bootstrap_tokens(bootstrap_tokens)
-        , _token_metadata(token_metadata)
-        , _gossiper(gossiper)
-    {}
-
-    /*
-     * Generate a set of CDC stream identifiers such that for each shard
-     * and vnode pair there exists a stream whose token falls into this vnode
-     * and is owned by this shard. It is sometimes not possible to generate
-     * a CDC stream identifier for some (vnode, shard) pair because not all
-     * shards have to own tokens in a vnode. Small vnode can be totally owned
-     * by a single shard. In such case, a stream identifier that maps to
-     * end of the vnode is generated.
-     *
-     * Then build a cdc::topology_description which maps tokens to generated
-     * stream identifiers, such that if token T is owned by shard S in vnode V,
-     * it gets mapped to the stream identifier generated for (S, V).
-     */
-    // Run in seastar::async context.
-    topology_description generate() const {
-        const auto tokens = get_tokens();
-
-        std::vector<token_range_description> vnode_descriptions;
-        vnode_descriptions.reserve(tokens.size());
-
-        vnode_descriptions.push_back(
-                create_description(tokens.back(), tokens.front()));
-        for (size_t idx = 1; idx < tokens.size(); ++idx) {
-            vnode_descriptions.push_back(
-                    create_description(tokens[idx - 1], tokens[idx]));
-        }
-
-        return {std::move(vnode_descriptions)};
-    }
-};
-
-bool should_propose_first_generation(const gms::inet_address& me, const gms::gossiper& g) {
-    auto my_host_id = g.get_host_id(me);
-    auto& eps = g.get_endpoint_states();
-    return std::none_of(eps.begin(), eps.end(),
-            [&] (const std::pair<gms::inet_address, gms::endpoint_state>& ep) {
-        return my_host_id < g.get_host_id(ep.first);
-    });
-}
-
-future<db_clock::time_point> get_local_streams_timestamp() {
-    return db::system_keyspace::get_saved_cdc_streams_timestamp().then([] (std::optional<db_clock::time_point> ts) {
-        if (!ts) {
-            auto err = format("get_local_streams_timestamp: tried to retrieve streams timestamp after bootstrapping, but it's not present");
-            cdc_log.error("{}", err);
-            throw std::runtime_error(err);
-        }
-        return *ts;
-    });
-}
-
-// Run inside seastar::async context.
-db_clock::time_point make_new_cdc_generation(
-        const db::config& cfg,
-        const std::unordered_set<dht::token>& bootstrap_tokens,
-        const locator::token_metadata& tm,
-        const gms::gossiper& g,
-        db::system_distributed_keyspace& sys_dist_ks,
-        std::chrono::milliseconds ring_delay,
-        bool for_testing) {
-    auto gen = topology_description_generator(cfg, bootstrap_tokens, tm, g).generate();
-
-    // Begin the race.
-    auto ts = db_clock::now() + (
-            for_testing ? std::chrono::milliseconds(0) : (
-                2 * ring_delay + std::chrono::duration_cast<std::chrono::milliseconds>(generation_leeway)));
-    sys_dist_ks.insert_cdc_topology_description(ts, std::move(gen), { tm.count_normal_token_owners() }).get();
-
-    return ts;
-}
-
-std::optional<db_clock::time_point> get_streams_timestamp_for(const gms::inet_address& endpoint, const gms::gossiper& g) {
-    auto streams_ts_string = g.get_application_state_value(endpoint, gms::application_state::CDC_STREAMS_TIMESTAMP);
-    cdc_log.trace("endpoint={}, streams_ts_string={}", endpoint, streams_ts_string);
-    return gms::versioned_value::cdc_streams_timestamp_from_string(streams_ts_string);
-}
-
-// Run inside seastar::async context.
-static void do_update_streams_description(
-        db_clock::time_point streams_ts,
-        db::system_distributed_keyspace& sys_dist_ks,
-        db::system_distributed_keyspace::context ctx) {
-    if (sys_dist_ks.cdc_desc_exists(streams_ts, ctx).get0()) {
-        cdc_log.debug("update_streams_description: description of generation {} already inserted", streams_ts);
-        return;
-    }
-
-    // We might race with another node also inserting the description, but that's ok. It's an idempotent operation.
-
-    auto topo = sys_dist_ks.read_cdc_topology_description(streams_ts, ctx).get0();
-    if (!topo) {
-        throw std::runtime_error(format("could not find streams data for timestamp {}", streams_ts));
-    }
-
-    std::set<cdc::stream_id> streams_set;
-    for (auto& entry: topo->entries()) {
-        streams_set.insert(entry.streams.begin(), entry.streams.end());
-    }
-
-    std::vector<cdc::stream_id> streams_vec(streams_set.begin(), streams_set.end());
-
-    sys_dist_ks.create_cdc_desc(streams_ts, streams_vec, ctx).get();
-    cdc_log.info("CDC description table successfully updated with generation {}.", streams_ts);
-}
-
-void update_streams_description(
-        db_clock::time_point streams_ts,
-        shared_ptr<db::system_distributed_keyspace> sys_dist_ks,
-        noncopyable_function<unsigned()> get_num_token_owners,
-        abort_source& abort_src) {
-    try {
-        do_update_streams_description(streams_ts, *sys_dist_ks, { get_num_token_owners() });
-    } catch(...) {
-        cdc_log.warn(
-            "Could not update CDC description table with generation {}: {}. Will retry in the background.",
-            streams_ts, std::current_exception());
-
-        // It is safe to discard this future: we keep system distributed keyspace alive.
-        (void)seastar::async([
-            streams_ts, sys_dist_ks, get_num_token_owners = std::move(get_num_token_owners), &abort_src
-        ] {
-            while (true) {
-                sleep_abortable(std::chrono::seconds(60), abort_src).get();
-                try {
-                    do_update_streams_description(streams_ts, *sys_dist_ks, { get_num_token_owners() });
-                    return;
-                } catch (...) {
-                    cdc_log.warn(
-                        "Could not update CDC description table with generation {}: {}. Will try again.",
-                        streams_ts, std::current_exception());
-                }
-            }
-        });
-    }
-}
-
-} // namespace cdc
--- a/cdc/generation.hh
+++ b/cdc/generation.hh
@@ -1,177 +0,0 @@
-/*
- * Copyright (C) 2019 ScyllaDB
- */
-
-/*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-
-/* This module contains classes and functions used to manage CDC generations:
- * sets of CDC stream identifiers used by the cluster to choose partition keys for CDC log writes.
- * Each CDC generation begins operating at a specific time point, called the generation's timestamp
- * (`cdc_streams_timpestamp` or `streams_timestamp` in the code).
- * The generation is used by all nodes in the cluster to pick CDC streams until superseded by a new generation.
- *
- * Functions from this module are used by the node joining procedure to introduce new CDC generations to the cluster
- * (which is necessary due to new tokens being inserted into the token ring), or during rolling upgrade
- * if CDC is enabled for the first time.
- */
-
-#pragma once
-
-#include <vector>
-#include <unordered_set>
-#include <seastar/util/noncopyable_function.hh>
-
-#include "database_fwd.hh"
-#include "db_clock.hh"
-#include "dht/token.hh"
-
-namespace seastar {
-    class abort_source;
-} // namespace seastar
-
-namespace db {
-    class config;
-    class system_distributed_keyspace;
-} // namespace db
-
-namespace gms {
-    class inet_address;
-    class gossiper;
-} // namespace gms
-
-namespace locator {
-    class token_metadata;
-} // namespace locator
-
-namespace cdc {
-
-class stream_id final {
-    bytes _value;
-public:
-    stream_id() = default;
-    stream_id(int64_t, int64_t);
-    stream_id(bytes);
-    bool is_set() const;
-    bool operator==(const stream_id&) const;
-    bool operator<(const stream_id&) const;
-
-    int64_t first() const;
-    int64_t second() const;
-
-    const bytes& to_bytes() const;
-
-    partition_key to_partition_key(const schema& log_schema) const;
-    static int64_t token_from_bytes(bytes_view);
-};
-
-/* Describes a mapping of tokens to CDC streams in a token range.
- *
- * The range ends with `token_range_end`. A vector of `token_range_description`s defines the ranges entirely
- * (the end of the `i`th range is the beginning of the `i+1 % size()`th range). Ranges are left-opened, right-closed.
- *
- * Tokens in the range ending with `token_range_end` are mapped to streams in the `streams` vector as follows:
- * token `T` is mapped to `streams[j]` if and only if the used partitioner maps `T` to the `j`th shard,
- * assuming that the partitioner is configured for `streams.size()` shards and (partitioner's) `sharding_ignore_msb`
- * equals to the given `sharding_ignore_msb`.
-*/
-struct token_range_description {
-    dht::token token_range_end;
-    std::vector<stream_id> streams;
-    uint8_t sharding_ignore_msb;
-
-    bool operator==(const token_range_description&) const;
-};
-
-
-/* Describes a mapping of tokens to CDC streams in a whole token ring.
- *
- * Division of the ring to token ranges is defined in terms of `token_range_end`s
- * in the `_entries` vector. See the comment above `token_range_description` for explanation.
- */
-class topology_description {
-    std::vector<token_range_description> _entries;
-public:
-    topology_description(std::vector<token_range_description> entries);
-    bool operator==(const topology_description&) const;
-
-    const std::vector<token_range_description>& entries() const;
-};
-
-/* Should be called when we're restarting and we noticed that we didn't save any streams timestamp in our local tables,
- * which means that we're probably upgrading from a non-CDC/old CDC version (another reason could be
- * that there's a bug, or the user messed with our local tables).
- *
- * It checks whether we should be the node to propose the first generation of CDC streams.
- * The chosen condition is arbitrary, it only tries to make sure that no two nodes propose a generation of streams
- * when upgrading, and nothing bad happens if they for some reason do (it's mostly an optimization).
- */
-bool should_propose_first_generation(const gms::inet_address& me, const gms::gossiper&);
-
-/*
- * Read this node's streams generation timestamp stored in the LOCAL table.
- * Assumes that the node has successfully bootstrapped, and we're not upgrading from a non-CDC version,
- * so the timestamp is present.
- */
-future<db_clock::time_point> get_local_streams_timestamp();
-
-/* Generate a new set of CDC streams and insert it into the distributed cdc_generations table.
- * Returns the timestamp of this new generation.
- *
- * Should be called when starting the node for the first time (i.e., joining the ring).
- *
- * Assumes that the system_distributed keyspace is initialized.
- *
- * The caller of this function is expected to insert this timestamp into the gossiper as fast as possible,
- * so that other nodes learn about the generation before their clocks cross the timestmap
- * (not guaranteed in the current implementation, but expected to be the common case;
- *  we assume that `ring_delay` is enough for other nodes to learn about the new generation).
- */
-db_clock::time_point make_new_cdc_generation(
-        const db::config& cfg,
-        const std::unordered_set<dht::token>& bootstrap_tokens,
-        const locator::token_metadata& tm,
-        const gms::gossiper& g,
-        db::system_distributed_keyspace& sys_dist_ks,
-        std::chrono::milliseconds ring_delay,
-        bool for_testing);
-
-/* Retrieves CDC streams generation timestamp from the given endpoint's application state (broadcasted through gossip).
- * We might be during a rolling upgrade, so the timestamp might not be there (if the other node didn't upgrade yet),
- * but if the cluster already supports CDC, then every newly joining node will propose a new CDC generation,
- * which means it will gossip the generation's timestamp.
- */
-std::optional<db_clock::time_point> get_streams_timestamp_for(const gms::inet_address& endpoint, const gms::gossiper&);
-
-/* Inform CDC users about a generation of streams (identified by the given timestamp)
- * by inserting it into the cdc_streams table.
- *
- * Assumes that the cdc_generations table contains this generation.
- *
- * Returning from this function does not mean that the table update was successful: the function
- * might run an asynchronous task in the background.
- *
- * Run inside seastar::async context.
- */
-void update_streams_description(
-        db_clock::time_point,
-        shared_ptr<db::system_distributed_keyspace>,
-        noncopyable_function<unsigned()> get_num_token_owners,
-        abort_source&);
-
-} // namespace cdc
--- a/cdc/log.cc
+++ b/cdc/log.cc
--- a/Show More
+++ b/Show More