gossip: Fix tokens assignment in assassinate_endpoint

The tokens vector is defined a few lines above and is needed outsie the if block. Do not redefine it again in the if block, otherwise the tokens will be empty. Found by code inspection. Fixes #3551. Message-Id: <c7a06375c65c950e94236571127f533e5a60cbfd.1530002177.git.asias@scylladb.com> (cherry picked from commit c3b5a2ecd5)
locator::ec2_multi_region_snitch: don't call for ec2_snitch::gossiper_starting()
2018-06-27 12:01:19 +03:00 · 2018-06-12 19:02:48 +03:00 · 2018-05-24 12:02:15 +03:00 · 2018-05-24 11:14:20 +03:00 · 2018-05-24 11:08:13 +03:00 · 2018-05-24 15:24:29 +08:00
768 changed files with 66843 additions and 18158 deletions
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,6 +1,6 @@
 [submodule "seastar"]
 	path = seastar
-	url = ../seastar
+	url = ../scylla-seastar
 	ignore = dirty
 [submodule "swagger-ui"]
 	path = swagger-ui
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -0,0 +1,140 @@
+##
+## For best results, first compile the project using the Ninja build-system.
+##
+
+cmake_minimum_required(VERSION 3.7)
+project(scylla)
+
+if (NOT DEFINED ENV{CLION_IDE})
+    message(FATAL_ERROR "This CMakeLists.txt file is only valid for use in CLion")
+endif()
+
+# Default value. A more accurate list is populated through `pkg-config` below if `seastar.pc` is available.
+set(SEASTAR_INCLUDE_DIRS "seastar")
+
+# These paths are always available, since they're included in the repository. Additional DPDK headers are placed while
+# Seastar is built, and are captured in `SEASTAR_INCLUDE_DIRS` through parsing the Seastar pkg-config file (below).
+set(SEASTAR_DPDK_INCLUDE_DIRS
+        seastar/dpdk/lib/librte_eal/common/include
+        seastar/dpdk/lib/librte_eal/common/include/generic
+        seastar/dpdk/lib/librte_eal/common/include/x86
+        seastar/dpdk/lib/librte_ether)
+
+find_package(PkgConfig REQUIRED)
+
+set(ENV{PKG_CONFIG_PATH} "${CMAKE_SOURCE_DIR}/seastar/build/release:$ENV{PKG_CONFIG_PATH}")
+pkg_check_modules(SEASTAR seastar)
+
+find_package(Boost COMPONENTS filesystem program_options system thread)
+
+##
+## Populate the names of all source and header files in the indicated paths in a designated variable.
+##
+## When RECURSIVE is specified, directories are traversed recursively.
+##
+## Use: scan_scylla_source_directories(VAR my_result_var [RECURSIVE] PATHS [path1 path2 ...])
+##
+function (scan_scylla_source_directories)
+    set(options RECURSIVE)
+    set(oneValueArgs VAR)
+    set(multiValueArgs PATHS)
+    cmake_parse_arguments(args "${options}" "${oneValueArgs}" "${multiValueArgs}" "${ARGN}")
+
+    set(globs "")
+
+    foreach (dir ${args_PATHS})
+        list(APPEND globs "${dir}/*.cc" "${dir}/*.hh")
+    endforeach()
+
+    if (args_RECURSIVE)
+        set(glob_kind GLOB_RECURSE)
+    else()
+        set(glob_kind GLOB)
+    endif()
+
+    file(${glob_kind} var
+            ${globs})
+
+    set(${args_VAR} ${var} PARENT_SCOPE)
+endfunction()
+
+## Although Seastar is an external project, it is common enough to explore the sources while doing
+## Scylla development that we'll treat the Seastar sources as part of this project for easier navigation.
+scan_scylla_source_directories(
+        VAR SEASTAR_SOURCE_FILES
+        RECURSIVE
+
+        PATHS
+          seastar/core
+          seastar/http
+          seastar/json
+          seastar/net
+          seastar/rpc
+          seastar/tests
+          seastar/util)
+
+scan_scylla_source_directories(
+        VAR SCYLLA_ROOT_SOURCE_FILES
+        PATHS .)
+
+scan_scylla_source_directories(
+        VAR SCYLLA_SUB_SOURCE_FILES
+        RECURSIVE
+
+        PATHS
+          api
+          auth
+          cql3
+          db
+          dht
+          exceptions
+          gms
+          index
+          io
+          locator
+          message
+          repair
+          service
+          sstables
+          streaming
+          tests
+          thrift
+          tracing
+          transport
+          utils)
+
+scan_scylla_source_directories(
+        VAR SCYLLA_GEN_SOURCE_FILES
+        RECURSIVE
+        PATHS build/release/gen)
+
+set(SCYLLA_SOURCE_FILES
+        ${SCYLLA_ROOT_SOURCE_FILES}
+        ${SCYLLA_GEN_SOURCE_FILES}
+        ${SCYLLA_SUB_SOURCE_FILES})
+
+add_executable(scylla
+        ${SEASTAR_SOURCE_FILES}
+        ${SCYLLA_SOURCE_FILES})
+
+# Note that since CLion does not undestand GCC6 concepts, we always disable them (even if users configure otherwise).
+# CLion seems to have trouble with `-U` (macro undefinition), so we do it this way instead.
+list(REMOVE_ITEM SEASTAR_CFLAGS "-DHAVE_GCC6_CONCEPTS")
+
+# If the Seastar pkg-config information is available, append to the default flags.
+#
+# For ease of browsing the source code, we always pretend that DPDK is enabled.
+target_compile_options(scylla PUBLIC
+        -std=gnu++14
+        -DHAVE_DPDK
+        -DHAVE_HWLOC
+        "${SEASTAR_CFLAGS}")
+
+# The order matters here: prefer the "static" DPDK directories to any dynamic paths from pkg-config. Some files are only
+# available dynamically, though.
+target_include_directories(scylla PUBLIC
+        .
+        ${SEASTAR_DPDK_INCLUDE_DIRS}
+        ${SEASTAR_INCLUDE_DIRS}
+        ${Boost_INCLUDE_DIRS}
+        build/release/gen)
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -0,0 +1,11 @@
+# Asking questions or requesting help
+
+Use the [ScyllaDB user mailing list](https://groups.google.com/forum/#!forum/scylladb-users) for general questions and help.
+
+# Reporting an issue
+
+Please use the [Issue Tracker](https://github.com/scylladb/scylla/issues/) to report issues.  Fill in as much information as you can in the issue template, especially for performance problems.
+
+# Contributing Code to Scylla
+
+To contribute code to Scylla, you need to sign the [Contributor License Agreement](http://www.scylladb.com/opensource/cla/) and send your changes as [patches](https://github.com/scylladb/scylla/wiki/Formatting-and-sending-patches) to the [mailing list](https://groups.google.com/forum/#!forum/scylladb-dev). We don't accept pull requests on GitHub.
--- a/README.md
+++ b/README.md
@@ -83,14 +83,6 @@ Run the image with:
 docker run -p $(hostname -i):9042:9042 -i -t <image name>
 ```

-
 ## Contributing to Scylla

-Do not send pull requests.
-
-Send patches to the mailing list address scylladb-dev@googlegroups.com.
-Be sure to subscribe.
-
-In order for your patches to be merged, you must sign the Contributor's
-License Agreement, protecting your rights and ours.  See
-http://www.scylladb.com/opensource/cla/.
+[Guidelines for contributing](CONTRIBUTING.md)
--- a/9
+++ b/9
@@ -1,6 +1,6 @@
 #!/bin/sh

-VERSION=666.development
+VERSION=2.0.4

 if test -f version
 then
@@ -10,7 +10,12 @@ else
 	DATE=$(date +%Y%m%d)
 	GIT_COMMIT=$(git log --pretty=format:'%h' -n 1)
 	SCYLLA_VERSION=$VERSION
-	SCYLLA_RELEASE=$DATE.$GIT_COMMIT
+	# For custom package builds, replace "0" with "counter.your_name",
+	# where counter starts at 1 and increments for successive versions.
+	# This ensures that the package manager will select your custom
+	# package over the standard release.
+	SCYLLA_BUILD=0
+	SCYLLA_RELEASE=$SCYLLA_BUILD.$DATE.$GIT_COMMIT
 fi

 echo "$SCYLLA_VERSION-$SCYLLA_RELEASE"
--- a/api/api-doc/cache_service.json
+++ b/api/api-doc/cache_service.json
@@ -397,6 +397,36 @@
        }
      ]
    },
+    {
+      "path": "/cache_service/metrics/key/hits_moving_avrage",
+      "operations": [
+        {
+          "method": "GET",
+          "summary": "Get key hits moving avrage",
+          "type": "#/utils/rate_moving_average",
+          "nickname": "get_key_hits_moving_avrage",
+          "produces": [
+            "application/json"
+          ],
+          "parameters": []
+        }
+      ]
+    },
+    {
+      "path": "/cache_service/metrics/key/requests_moving_avrage",
+      "operations": [
+        {
+          "method": "GET",
+          "summary": "Get key requests moving avrage",
+          "type": "#/utils/rate_moving_average",
+          "nickname": "get_key_requests_moving_avrage",
+          "produces": [
+            "application/json"
+          ],
+          "parameters": []
+        }
+      ]
+    },
    {
      "path": "/cache_service/metrics/key/size",
      "operations": [
@@ -607,6 +637,36 @@
        }
      ]
    },
+    {
+      "path": "/cache_service/metrics/counter/hits_moving_avrage",
+      "operations": [
+        {
+          "method": "GET",
+          "summary": "Get counter hits moving avrage",
+          "type": "#/utils/rate_moving_average",
+          "nickname": "get_counter_hits_moving_avrage",
+          "produces": [
+            "application/json"
+          ],
+          "parameters": []
+        }
+      ]
+    },
+    {
+      "path": "/cache_service/metrics/counter/requests_moving_avrage",
+      "operations": [
+        {
+          "method": "GET",
+          "summary": "Get counter requests moving avrage",
+          "type": "#/utils/rate_moving_average",
+          "nickname": "get_counter_requests_moving_avrage",
+          "produces": [
+            "application/json"
+          ],
+          "parameters": []
+        }
+      ]
+    },
    {
      "path": "/cache_service/metrics/counter/size",
      "operations": [
--- a/api/api-doc/column_family.json
+++ b/api/api-doc/column_family.json
@@ -78,11 +78,19 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
                     "paramType":"path"
+                  },
+                  {
+                     "name":"split_output",
+                     "description":"true if the output of the major compaction should be split in several sstables",
+                     "required":false,
+                     "allowMultiple":false,
+                     "type":"bool",
+                     "paramType":"query"
                  }
               ]
            }
@@ -102,7 +110,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -129,7 +137,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -153,7 +161,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -180,7 +188,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -204,7 +212,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -244,7 +252,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -271,7 +279,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -298,7 +306,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -317,7 +325,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -349,7 +357,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -381,7 +389,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -405,7 +413,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -432,7 +440,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -459,7 +467,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -491,7 +499,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -518,7 +526,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -545,7 +553,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -569,7 +577,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -593,7 +601,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -633,7 +641,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -673,7 +681,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -713,7 +721,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -753,7 +761,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -793,7 +801,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -833,7 +841,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -873,7 +881,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -916,7 +924,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -943,7 +951,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -970,7 +978,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -994,7 +1002,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -1034,7 +1042,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -1058,7 +1066,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -1101,7 +1109,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -1144,7 +1152,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -1203,7 +1211,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -1243,7 +1251,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -1267,7 +1275,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -1310,7 +1318,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -1353,7 +1361,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -1412,7 +1420,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -1452,7 +1460,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -1492,7 +1500,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -1532,7 +1540,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -1572,7 +1580,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -1612,7 +1620,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -1652,7 +1660,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -1692,7 +1700,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -1732,7 +1740,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -1772,7 +1780,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -1812,7 +1820,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -1852,7 +1860,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -1892,7 +1900,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -1932,7 +1940,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -1972,7 +1980,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -2012,7 +2020,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -2052,7 +2060,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -2092,7 +2100,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -2116,7 +2124,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -2156,7 +2164,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -2196,7 +2204,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -2236,7 +2244,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -2276,7 +2284,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -2300,7 +2308,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -2324,7 +2332,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -2351,7 +2359,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -2378,7 +2386,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -2405,7 +2413,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -2432,7 +2440,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -2501,7 +2509,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -2525,7 +2533,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -2549,7 +2557,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -2573,7 +2581,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -2597,7 +2605,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -2621,7 +2629,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -2645,7 +2653,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -2669,7 +2677,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -2693,7 +2701,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -2717,7 +2725,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -2741,7 +2749,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -2765,7 +2773,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
--- a/api/api-doc/endpoint_snitch_info.json
+++ b/api/api-doc/endpoint_snitch_info.json
@@ -21,8 +21,8 @@
               "parameters":[
                  {
                     "name":"host",
-                     "description":"The host name",
-                     "required":true,
+                     "description":"The host name. If absent, the local server broadcast/listen address is used",
+                     "required":false,
                     "allowMultiple":false,
                     "type":"string",
                     "paramType":"query"
@@ -45,8 +45,8 @@
               "parameters":[
                  {
                     "name":"host",
-                     "description":"The host name",
-                     "required":true,
+                     "description":"The host name. If absent, the local server broadcast/listen address is used",
+                     "required":false,
                     "allowMultiple":false,
                     "type":"string",
                     "paramType":"query"
--- a/api/api-doc/failure_detector.json
+++ b/api/api-doc/failure_detector.json
@@ -42,6 +42,25 @@
            }
         ]
      },
+      {
+         "path":"/failure_detector/endpoint_phi_values",
+         "operations":[
+            {
+               "method":"GET",
+               "summary":"Get end point phi values",
+               "type":"array",
+               "items":{
+                  "type":"endpoint_phi_values"
+               },
+               "nickname":"get_endpoint_phi_values",
+               "produces":[
+                  "application/json"
+               ],
+               "parameters":[
+               ]
+            }
+         ]
+      },
      {
         "path":"/failure_detector/endpoints/",
         "operations":[
@@ -202,6 +221,20 @@
                    "description": "The application state version"
                }
            }
+        },
+        "endpoint_phi_value": {
+            "id" : "endpoint_phi_value",
+            "description": "Holds phi value for a single end point",
+            "properties": {
+                "phi": {
+                    "type": "double",
+                    "description": "Phi value"
+                },
+                "endpoint": {
+                    "type": "string",
+                    "description": "end point address"
+                }
+            }
        }
    }
 }
--- a/api/api-doc/storage_proxy.json
+++ b/api/api-doc/storage_proxy.json
@@ -777,7 +777,7 @@
      ]
    },
    {
-      "path": "/storage_proxy/metrics/read/moving_avrage_histogram",
+      "path": "/storage_proxy/metrics/read/moving_average_histogram",
      "operations": [
        {
          "method": "GET",
@@ -792,7 +792,7 @@
      ]
    },
    {
-      "path": "/storage_proxy/metrics/range/moving_avrage_histogram",
+      "path": "/storage_proxy/metrics/range/moving_average_histogram",
      "operations": [
        {
          "method": "GET",
@@ -942,7 +942,7 @@
      ]
    },
    {
-      "path": "/storage_proxy/metrics/write/moving_avrage_histogram",
+      "path": "/storage_proxy/metrics/write/moving_average_histogram",
      "operations": [
        {
          "method": "GET",
--- a/api/api-doc/storage_service.json
+++ b/api/api-doc/storage_service.json
@@ -1201,11 +1201,12 @@
               ],
               "parameters":[
                  {
-                     "name":"non_system",
-                     "description":"When set to true limit to non system",
+                     "name":"type",
+                     "description":"Which keyspaces to return",
                     "required":false,
                     "allowMultiple":false,
-                     "type":"boolean",
+                     "type":"string",
+                     "enum": [ "all", "user", "non_local_strategy" ],
                     "paramType":"query"
                  }
               ]
--- a/api/api.hh
+++ b/api/api.hh
@@ -29,6 +29,7 @@
 #include "utils/histogram.hh"
 #include "http/exception.hh"
 #include "api_init.hh"
+#include "seastarx.hh"

 namespace api {

@@ -166,33 +167,36 @@ inline int64_t max_int64(int64_t a, int64_t b) {
 * It combine total and the sub set for the ratio and its
 * to_json method return the ration sub/total
 */
-struct ratio_holder : public json::jsonable {
-    double total = 0;
-    double sub = 0;
+template<typename T>
+struct basic_ratio_holder : public json::jsonable {
+    T total = 0;
+    T sub = 0;
    virtual std::string to_json() const {
        if (total == 0) {
            return "0";
        }
        return std::to_string(sub/total);
    }
-    ratio_holder() = default;
-    ratio_holder& add(double _total, double _sub) {
+    basic_ratio_holder() = default;
+    basic_ratio_holder& add(T _total, T _sub) {
        total += _total;
        sub += _sub;
        return *this;
    }
-    ratio_holder(double _total, double _sub) {
+    basic_ratio_holder(T _total, T _sub) {
        total = _total;
        sub = _sub;
    }
-    ratio_holder& operator+=(const ratio_holder& a) {
+    basic_ratio_holder<T>& operator+=(const basic_ratio_holder<T>& a) {
        return add(a.total, a.sub);
    }
-    friend ratio_holder operator+(ratio_holder a, const ratio_holder& b) {
+    friend basic_ratio_holder<T> operator+(basic_ratio_holder a, const basic_ratio_holder<T>& b) {
        return a += b;
    }
 };

+typedef basic_ratio_holder<double>  ratio_holder;
+typedef basic_ratio_holder<int64_t> integral_ratio_holder;

 class unimplemented_exception : public base_exception {
 public:
--- a/api/cache_service.cc
+++ b/api/cache_service.cc
@@ -177,6 +177,20 @@ void set_cache_service(http_context& ctx, routes& r) {
        return make_ready_future<json::json_return_type>(0);
    });

+    cs::get_key_hits_moving_avrage.set(r, [&ctx] (std::unique_ptr<request> req) {
+        // TBD
+        // FIXME
+        // See above
+        return make_ready_future<json::json_return_type>(meter_to_json(utils::rate_moving_average()));
+    });
+
+    cs::get_key_requests_moving_avrage.set(r, [&ctx] (std::unique_ptr<request> req) {
+        // TBD
+        // FIXME
+        // See above
+        return make_ready_future<json::json_return_type>(meter_to_json(utils::rate_moving_average()));
+    });
+
    cs::get_key_size.set(r, [] (std::unique_ptr<request> req) {
        // TBD
        // FIXME
@@ -194,7 +208,7 @@ void set_cache_service(http_context& ctx, routes& r) {
    });

    cs::get_row_capacity.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return map_reduce_cf(ctx, 0, [](const column_family& cf) {
+        return map_reduce_cf(ctx, uint64_t(0), [](const column_family& cf) {
            return cf.get_row_cache().get_cache_tracker().region().occupancy().used_space();
        }, std::plus<uint64_t>());
    });
@@ -238,13 +252,13 @@ void set_cache_service(http_context& ctx, routes& r) {
        // In origin row size is the weighted size.
        // We currently do not support weights, so we use num entries instead
        return map_reduce_cf(ctx, 0, [](const column_family& cf) {
-            return cf.get_row_cache().num_entries();
+            return cf.get_row_cache().partitions();
        }, std::plus<uint64_t>());
    });

    cs::get_row_entries.set(r, [&ctx] (std::unique_ptr<request> req) {
        return map_reduce_cf(ctx, 0, [](const column_family& cf) {
-            return cf.get_row_cache().num_entries();
+            return cf.get_row_cache().partitions();
        }, std::plus<uint64_t>());
    });

@@ -280,6 +294,20 @@ void set_cache_service(http_context& ctx, routes& r) {
        return make_ready_future<json::json_return_type>(0);
    });

+    cs::get_counter_hits_moving_avrage.set(r, [&ctx] (std::unique_ptr<request> req) {
+        // TBD
+        // FIXME
+        // See above
+        return make_ready_future<json::json_return_type>(meter_to_json(utils::rate_moving_average()));
+    });
+
+    cs::get_counter_requests_moving_avrage.set(r, [&ctx] (std::unique_ptr<request> req) {
+        // TBD
+        // FIXME
+        // See above
+        return make_ready_future<json::json_return_type>(meter_to_json(utils::rate_moving_average()));
+    });
+
    cs::get_counter_size.set(r, [] (std::unique_ptr<request> req) {
        // TBD
        // FIXME
--- a/api/collectd.cc
+++ b/api/collectd.cc
@@ -40,13 +40,13 @@ static auto transformer(const std::vector<collectd_value>& values) {
    for (auto v: values) {
        switch (v._type) {
        case scollectd::data_type::GAUGE:
-            collected_value.values.push(v.u._d);
+            collected_value.values.push(v.d());
            break;
        case scollectd::data_type::DERIVE:
-            collected_value.values.push(v.u._i);
+            collected_value.values.push(v.i());
            break;
        default:
-            collected_value.values.push(v.u._ui);
+            collected_value.values.push(v.ui());
            break;
        }
    }
--- a/api/column_family.cc
+++ b/api/column_family.cc
@@ -182,17 +182,8 @@ static int64_t max_row_size(column_family& cf) {
    return res;
 }

-static double update_ratio(double acc, double f, double total) {
-    if (f && !total) {
-        throw bad_param_exception("total should include all elements");
-    } else if (total) {
-        acc += f / total;
-    }
-    return acc;
-}
-
-static ratio_holder mean_row_size(column_family& cf) {
-    ratio_holder res;
+static integral_ratio_holder mean_row_size(column_family& cf) {
+    integral_ratio_holder res;
    for (auto i: *cf.get_sstables() ) {
        auto c = i->get_stats_metadata().estimated_row_size.count();
        res.sub += i->get_stats_metadata().estimated_row_size.mean() * c;
@@ -283,6 +274,16 @@ static std::vector<uint64_t> concat_sstable_count_per_level(std::vector<uint64_t
    return a;
 }

+ratio_holder filter_false_positive_as_ratio_holder(const sstables::shared_sstable& sst) {
+    double f = sst->filter_get_false_positive();
+    return ratio_holder(f + sst->filter_get_true_positive(), f);
+}
+
+ratio_holder filter_recent_false_positive_as_ratio_holder(const sstables::shared_sstable& sst) {
+    double f = sst->filter_get_recent_false_positive();
+    return ratio_holder(f + sst->filter_get_recent_true_positive(), f);
+}
+
 void set_column_family(http_context& ctx, routes& r) {
    cf::get_column_family_name.set(r, [&ctx] (const_req req){
        vector<sstring> res;
@@ -562,11 +563,13 @@ void set_column_family(http_context& ctx, routes& r) {
    });

    cf::get_mean_row_size.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return map_reduce_cf(ctx, req->param["name"], ratio_holder(), mean_row_size, std::plus<ratio_holder>());
+        // Cassandra 3.x mean values are truncated as integrals.
+        return map_reduce_cf(ctx, req->param["name"], integral_ratio_holder(), mean_row_size, std::plus<integral_ratio_holder>());
    });

    cf::get_all_mean_row_size.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return map_reduce_cf(ctx, ratio_holder(), mean_row_size, std::plus<ratio_holder>());
+        // Cassandra 3.x mean values are truncated as integrals.
+        return map_reduce_cf(ctx, integral_ratio_holder(), mean_row_size, std::plus<integral_ratio_holder>());
    });

    cf::get_bloom_filter_false_positives.set(r, [&ctx] (std::unique_ptr<request> req) {
@@ -602,39 +605,27 @@ void set_column_family(http_context& ctx, routes& r) {
    });

    cf::get_bloom_filter_false_ratio.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return map_reduce_cf(ctx, req->param["name"], double(0), [] (column_family& cf) {
-            return std::accumulate(cf.get_sstables()->begin(), cf.get_sstables()->end(), double(0), [](double s, auto& sst) {
-                double f = sst->filter_get_false_positive();
-                return update_ratio(s, f, f + sst->filter_get_true_positive());
-            });
-        }, std::plus<double>());
+        return map_reduce_cf(ctx, req->param["name"], ratio_holder(), [] (column_family& cf) {
+            return boost::accumulate(*cf.get_sstables() | boost::adaptors::transformed(filter_false_positive_as_ratio_holder), ratio_holder());
+        }, std::plus<>());
    });

    cf::get_all_bloom_filter_false_ratio.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return map_reduce_cf(ctx, double(0), [] (column_family& cf) {
-            return std::accumulate(cf.get_sstables()->begin(), cf.get_sstables()->end(), double(0), [](double s, auto& sst) {
-                double f = sst->filter_get_false_positive();
-                return update_ratio(s, f, f + sst->filter_get_true_positive());
-            });
-        }, std::plus<double>());
+        return map_reduce_cf(ctx, ratio_holder(), [] (column_family& cf) {
+            return boost::accumulate(*cf.get_sstables() | boost::adaptors::transformed(filter_false_positive_as_ratio_holder), ratio_holder());
+        }, std::plus<>());
    });

    cf::get_recent_bloom_filter_false_ratio.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return map_reduce_cf(ctx, req->param["name"], double(0), [] (column_family& cf) {
-            return std::accumulate(cf.get_sstables()->begin(), cf.get_sstables()->end(), double(0), [](double s, auto& sst) {
-                double f = sst->filter_get_recent_false_positive();
-                return update_ratio(s, f, f + sst->filter_get_recent_true_positive());
-            });
-        }, std::plus<double>());
+        return map_reduce_cf(ctx, req->param["name"], ratio_holder(), [] (column_family& cf) {
+            return boost::accumulate(*cf.get_sstables() | boost::adaptors::transformed(filter_recent_false_positive_as_ratio_holder), ratio_holder());
+        }, std::plus<>());
    });

    cf::get_all_recent_bloom_filter_false_ratio.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return map_reduce_cf(ctx, double(0), [] (column_family& cf) {
-            return std::accumulate(cf.get_sstables()->begin(), cf.get_sstables()->end(), double(0), [](double s, auto& sst) {
-                double f = sst->filter_get_recent_false_positive();
-                return update_ratio(s, f, f + sst->filter_get_recent_true_positive());
-            });
-        }, std::plus<double>());
+        return map_reduce_cf(ctx, ratio_holder(), [] (column_family& cf) {
+            return boost::accumulate(*cf.get_sstables() | boost::adaptors::transformed(filter_recent_false_positive_as_ratio_holder), ratio_holder());
+        }, std::plus<>());
    });

    cf::get_bloom_filter_disk_space_used.set(r, [&ctx] (std::unique_ptr<request> req) {
--- a/api/compaction_manager.cc
+++ b/api/compaction_manager.cc
@@ -26,7 +26,6 @@

 namespace api {

-using namespace scollectd;
 namespace cm = httpd::compaction_manager_json;
 using namespace json;

--- a/api/endpoint_snitch.cc
+++ b/api/endpoint_snitch.cc
@@ -22,16 +22,22 @@
 #include "locator/snitch_base.hh"
 #include "endpoint_snitch.hh"
 #include "api/api-doc/endpoint_snitch_info.json.hh"
+#include "utils/fb_utilities.hh"

 namespace api {

 void set_endpoint_snitch(http_context& ctx, routes& r) {
-    httpd::endpoint_snitch_info_json::get_datacenter.set(r, [] (const_req req) {
-        return locator::i_endpoint_snitch::get_local_snitch_ptr()->get_datacenter(req.get_query_param("host"));
+    static auto host_or_broadcast = [](const_req req) {
+        auto host = req.get_query_param("host");
+        return host.empty() ? gms::inet_address(utils::fb_utilities::get_broadcast_address()) : gms::inet_address(host);
+    };
+
+    httpd::endpoint_snitch_info_json::get_datacenter.set(r, [](const_req req) {
+        return locator::i_endpoint_snitch::get_local_snitch_ptr()->get_datacenter(host_or_broadcast(req));
    });

-    httpd::endpoint_snitch_info_json::get_rack.set(r, [] (const_req req) {
-        return locator::i_endpoint_snitch::get_local_snitch_ptr()->get_rack(req.get_query_param("host"));
+    httpd::endpoint_snitch_info_json::get_rack.set(r, [](const_req req) {
+        return locator::i_endpoint_snitch::get_local_snitch_ptr()->get_rack(host_or_broadcast(req));
    });

    httpd::endpoint_snitch_info_json::get_snitch_name.set(r, [] (const_req req) {
--- a/api/failure_detector.cc
+++ b/api/failure_detector.cc
@@ -88,6 +88,20 @@ void set_failure_detector(http_context& ctx, routes& r) {
            return make_ready_future<json::json_return_type>(state);
        });
    });
+
+    fd::get_endpoint_phi_values.set(r, [](std::unique_ptr<request> req) {
+        return gms::get_arrival_samples().then([](std::map<gms::inet_address, gms::arrival_window> map) {
+            std::vector<fd::endpoint_phi_value> res;
+            auto now = gms::arrival_window::clk::now();
+            for (auto& p : map) {
+                fd::endpoint_phi_value val;
+                val.endpoint = p.first.to_sstring();
+                val.phi = p.second.phi(now);
+                res.emplace_back(std::move(val));
+            }
+            return make_ready_future<json::json_return_type>(res);
+        });
+    });
 }

 }
--- a/api/hinted_handoff.cc
+++ b/api/hinted_handoff.cc
@@ -24,7 +24,6 @@

 namespace api {

-using namespace scollectd;
 using namespace json;
 namespace hh = httpd::hinted_handoff_json;

--- a/api/lsa.cc
+++ b/api/lsa.cc
@@ -29,11 +29,11 @@

 namespace api {

-static logging::logger logger("lsa-api");
+static logging::logger alogger("lsa-api");

 void set_lsa(http_context& ctx, routes& r) {
    httpd::lsa_json::lsa_compact.set(r, [&ctx](std::unique_ptr<request> req) {
-        logger.info("Triggering compaction");
+        alogger.info("Triggering compaction");
        return ctx.db.invoke_on_all([] (database&) {
            logalloc::shard_tracker().reclaim(std::numeric_limits<size_t>::max());
        }).then([] {
--- a/api/messaging_service.cc
+++ b/api/messaging_service.cc
@@ -27,7 +27,7 @@
 #include <sstream>

 using namespace httpd::messaging_service_json;
-using namespace net;
+using namespace netw;

 namespace api {

@@ -120,13 +120,13 @@ void set_messaging_service(http_context& ctx, routes& r) {
    }));

    get_version.set(r, [](const_req req) {
-        return net::get_local_messaging_service().get_raw_version(req.get_query_param("addr"));
+        return netw::get_local_messaging_service().get_raw_version(req.get_query_param("addr"));
    });

    get_dropped_messages_by_ver.set(r, [](std::unique_ptr<request> req) {
        shared_ptr<std::vector<uint64_t>> map = make_shared<std::vector<uint64_t>>(num_verb);

-        return net::get_messaging_service().map_reduce([map](const uint64_t* local_map) mutable {
+        return netw::get_messaging_service().map_reduce([map](const uint64_t* local_map) mutable {
            for (auto i = 0; i < num_verb; i++) {
                (*map)[i]+= local_map[i];
            }
--- a/api/storage_service.cc
+++ b/api/storage_service.cc
@@ -22,6 +22,8 @@
 #include "storage_service.hh"
 #include "api/api-doc/storage_service.json.hh"
 #include "db/config.hh"
+#include <boost/range/adaptor/map.hpp>
+#include <boost/range/adaptor/filtered.hpp>
 #include <service/storage_service.hh>
 #include <db/commitlog/commitlog.hh>
 #include <gms/gossiper.hh>
@@ -457,8 +459,15 @@ void set_storage_service(http_context& ctx, routes& r) {
    });

    ss::get_keyspaces.set(r, [&ctx](const_req req) {
-        auto non_system = req.get_query_param("non_system");
-        return map_keys(ctx.db.local().keyspaces());
+        auto type = req.get_query_param("type");
+        if (type == "user") {
+            return ctx.db.local().get_non_system_keyspaces();
+        } else if (type == "non_local_strategy") {
+            return map_keys(ctx.db.local().get_keyspaces() | boost::adaptors::filtered([](const auto& p) {
+                return p.second.get_replication_strategy().get_type() != locator::replication_strategy_type::local;
+            }));
+        }
+        return map_keys(ctx.db.local().get_keyspaces());
    });

    ss::update_snitch.set(r, [](std::unique_ptr<request> req) {
@@ -542,9 +551,7 @@ void set_storage_service(http_context& ctx, routes& r) {
    });

    ss::is_joined.set(r, [] (std::unique_ptr<request> req) {
-        return service::get_local_storage_service().is_joined().then([] (bool is_joined) {
-            return make_ready_future<json::json_return_type>(is_joined);
-        });
+        return make_ready_future<json::json_return_type>(service::get_local_storage_service().is_joined());
    });

    ss::set_stream_throughput_mb_per_sec.set(r, [](std::unique_ptr<request> req) {
@@ -664,17 +671,23 @@ void set_storage_service(http_context& ctx, routes& r) {

    ss::set_trace_probability.set(r, [](std::unique_ptr<request> req) {
        auto probability = req->get_query_param("probability");
-        try {
+        return futurize<json::json_return_type>::apply([probability] {
            double real_prob = std::stod(probability.c_str());
            return tracing::tracing::tracing_instance().invoke_on_all([real_prob] (auto& local_tracing) {
                local_tracing.set_trace_probability(real_prob);
            }).then([] {
                return make_ready_future<json::json_return_type>(json_void());
            });
-        } catch (...) {
-            throw httpd::bad_param_exception(sprint("Bad format of a probability value: \"%s\"", probability.c_str()));
-        }
-
+        }).then_wrapped([probability] (auto&& f) {
+            try {
+                f.get();
+                return make_ready_future<json::json_return_type>(json_void());
+            } catch (std::out_of_range& e) {
+                throw httpd::bad_param_exception(e.what());
+            } catch (std::invalid_argument&){
+                throw httpd::bad_param_exception(sprint("Bad format in a probability value: \"%s\"", probability.c_str()));
+            }
+        });
    });

    ss::get_trace_probability.set(r, [](std::unique_ptr<request> req) {
@@ -684,8 +697,8 @@ void set_storage_service(http_context& ctx, routes& r) {
    ss::get_slow_query_info.set(r, [](const_req req) {
        ss::slow_query_info res;
        res.enable = tracing::tracing::get_local_tracing_instance().slow_query_tracing_enabled();
-        res.ttl = std::chrono::duration_cast<std::chrono::microseconds>(tracing::tracing::get_local_tracing_instance().slow_query_record_ttl()).count() ;
-        res.threshold = std::chrono::duration_cast<std::chrono::microseconds>(tracing::tracing::get_local_tracing_instance().slow_query_threshold()).count();
+        res.ttl = tracing::tracing::get_local_tracing_instance().slow_query_record_ttl().count() ;
+        res.threshold = tracing::tracing::get_local_tracing_instance().slow_query_threshold().count();
        return res;
    });

@@ -789,10 +802,8 @@ void set_storage_service(http_context& ctx, routes& r) {
        return make_ready_future<json::json_return_type>(json_void());
    });

-    ss::get_metrics_load.set(r, [](std::unique_ptr<request> req) {
-        //TBD
-        unimplemented();
-        return make_ready_future<json::json_return_type>(0);
+    ss::get_metrics_load.set(r, [&ctx](std::unique_ptr<request> req) {
+        return get_cf_stats(ctx, &column_family::stats::live_disk_space_used);
    });

    ss::get_exceptions.set(r, [](const_req req) {
--- a/atomic_cell.hh
+++ b/atomic_cell.hh
@@ -28,11 +28,12 @@
 #include "utils/managed_bytes.hh"
 #include "net/byteorder.hh"
 #include <cstdint>
-#include <iostream>
+#include <iosfwd>
+#include <seastar/util/gcc6-concepts.hh>

-template<typename T>
+template<typename T, typename Input>
 static inline
-void set_field(managed_bytes& v, unsigned offset, T val) {
+void set_field(Input& v, unsigned offset, T val) {
    reinterpret_cast<net::packed<T>*>(v.begin() + offset)->raw = net::hton(val);
 }

@@ -57,6 +58,8 @@ private:
    static constexpr int8_t LIVE_FLAG = 0x01;
    static constexpr int8_t EXPIRY_FLAG = 0x02; // When present, expiry field is present. Set only for live cells
    static constexpr int8_t REVERT_FLAG = 0x04; // transient flag used to efficiently implement ReversiblyMergeable for atomic cells.
+    static constexpr int8_t COUNTER_UPDATE_FLAG = 0x08; // Cell is a counter update.
+    static constexpr int8_t COUNTER_IN_PLACE_REVERT = 0x10;
    static constexpr unsigned flags_size = 1;
    static constexpr unsigned timestamp_offset = flags_size;
    static constexpr unsigned timestamp_size = 8;
@@ -66,14 +69,25 @@ private:
    static constexpr unsigned deletion_time_size = 4;
    static constexpr unsigned ttl_offset = expiry_offset + expiry_size;
    static constexpr unsigned ttl_size = 4;
+    friend class counter_cell_builder;
 private:
+    static bool is_counter_update(bytes_view cell) {
+        return cell[0] & COUNTER_UPDATE_FLAG;
+    }
    static bool is_revert_set(bytes_view cell) {
        return cell[0] & REVERT_FLAG;
    }
+    static bool is_counter_in_place_revert_set(bytes_view cell) {
+        return cell[0] & COUNTER_IN_PLACE_REVERT;
+    }
    template<typename BytesContainer>
    static void set_revert(BytesContainer& cell, bool revert) {
        cell[0] = (cell[0] & ~REVERT_FLAG) | (revert * REVERT_FLAG);
    }
+    template<typename BytesContainer>
+    static void set_counter_in_place_revert(BytesContainer& cell, bool flag) {
+        cell[0] = (cell[0] & ~COUNTER_IN_PLACE_REVERT) | (flag * COUNTER_IN_PLACE_REVERT);
+    }
    static bool is_live(const bytes_view& cell) {
        return cell[0] & LIVE_FLAG;
    }
@@ -87,13 +101,30 @@ private:
    static api::timestamp_type timestamp(const bytes_view& cell) {
        return get_field<api::timestamp_type>(cell, timestamp_offset);
    }
+    template<typename BytesContainer>
+    static void set_timestamp(BytesContainer& cell, api::timestamp_type ts) {
+        set_field(cell, timestamp_offset, ts);
+    }
    // Can be called on live cells only
-    static bytes_view value(bytes_view cell) {
+private:
+    template<typename BytesView>
+    static BytesView do_get_value(BytesView cell) {
        auto expiry_field_size = bool(cell[0] & EXPIRY_FLAG) * (expiry_size + ttl_size);
        auto value_offset = flags_size + timestamp_size + expiry_field_size;
        cell.remove_prefix(value_offset);
        return cell;
    }
+public:
+    static bytes_view value(bytes_view cell) {
+        return do_get_value(cell);
+    }
+    static bytes_mutable_view value(bytes_mutable_view cell) {
+        return do_get_value(cell);
+    }
+    // Can be called on live counter update cells only
+    static int64_t counter_update_value(bytes_view cell) {
+        return get_field<int64_t>(cell, flags_size + timestamp_size);
+    }
    // Can be called only when is_dead() is true.
    static gc_clock::time_point deletion_time(const bytes_view& cell) {
        assert(is_dead(cell));
@@ -126,6 +157,14 @@ private:
        std::copy_n(value.begin(), value.size(), b.begin() + value_offset);
        return b;
    }
+    static managed_bytes make_live_counter_update(api::timestamp_type timestamp, int64_t value) {
+        auto value_offset = flags_size + timestamp_size;
+        managed_bytes b(managed_bytes::initialized_later(), value_offset + sizeof(value));
+        b[0] = LIVE_FLAG | COUNTER_UPDATE_FLAG;
+        set_field(b, timestamp_offset, timestamp);
+        set_field(b, value_offset, value);
+        return b;
+    }
    static managed_bytes make_live(api::timestamp_type timestamp, bytes_view value, gc_clock::time_point expiry, gc_clock::duration ttl) {
        auto value_offset = flags_size + timestamp_size + expiry_size + ttl_size;
        managed_bytes b(managed_bytes::initialized_later(), value_offset + value.size());
@@ -136,6 +175,31 @@ private:
        std::copy_n(value.begin(), value.size(), b.begin() + value_offset);
        return b;
    }
+    // make_live_from_serializer() is intended for users that need to serialise
+    // some object or objects to the format used in atomic_cell::value().
+    // With just make_live() the patter would look like follows:
+    // 1. allocate a buffer and write to it serialised objects
+    // 2. pass that buffer to make_live()
+    // 3. make_live() needs to prepend some metadata to the cell value so it
+    //    allocates a new buffer and copies the content of the original one
+    //
+    // The allocation and copy of a buffer can be avoided.
+    // make_live_from_serializer() allows the user code to specify the timestamp
+    // and size of the cell value as well as provide the serialiser function
+    // object, which would write the serialised value of the cell to the buffer
+    // given to it by make_live_from_serializer().
+    template<typename Serializer>
+    GCC6_CONCEPT(requires requires(Serializer serializer, bytes::iterator it) {
+        serializer(it);
+    })
+    static managed_bytes make_live_from_serializer(api::timestamp_type timestamp, size_t size, Serializer&& serializer) {
+        auto value_offset = flags_size + timestamp_size;
+        managed_bytes b(managed_bytes::initialized_later(), value_offset + size);
+        b[0] = LIVE_FLAG;
+        set_field(b, timestamp_offset, timestamp);
+        serializer(b.begin() + value_offset);
+        return b;
+    }
    template<typename ByteContainer>
    friend class atomic_cell_base;
    friend class atomic_cell;
@@ -149,17 +213,23 @@ protected:
    atomic_cell_base(ByteContainer&& data) : _data(std::forward<ByteContainer>(data)) { }
    friend class atomic_cell_or_collection;
 public:
+    bool is_counter_update() const {
+        return atomic_cell_type::is_counter_update(_data);
+    }
    bool is_revert_set() const {
        return atomic_cell_type::is_revert_set(_data);
    }
+    bool is_counter_in_place_revert_set() const {
+        return atomic_cell_type::is_counter_in_place_revert_set(_data);
+    }
    bool is_live() const {
        return atomic_cell_type::is_live(_data);
    }
-    bool is_live(tombstone t) const {
-        return is_live() && !is_covered_by(t);
+    bool is_live(tombstone t, bool is_counter) const {
+        return is_live() && !is_covered_by(t, is_counter);
    }
-    bool is_live(tombstone t, gc_clock::time_point now) const {
-        return is_live() && !is_covered_by(t) && !has_expired(now);
+    bool is_live(tombstone t, gc_clock::time_point now, bool is_counter) const {
+        return is_live() && !is_covered_by(t, is_counter) && !has_expired(now);
    }
    bool is_live_and_has_ttl() const {
        return atomic_cell_type::is_live_and_has_ttl(_data);
@@ -167,17 +237,24 @@ public:
    bool is_dead(gc_clock::time_point now) const {
        return atomic_cell_type::is_dead(_data) || has_expired(now);
    }
-    bool is_covered_by(tombstone t) const {
-        return timestamp() <= t.timestamp;
+    bool is_covered_by(tombstone t, bool is_counter) const {
+        return timestamp() <= t.timestamp || (is_counter && t.timestamp != api::missing_timestamp);
    }
    // Can be called on live and dead cells
    api::timestamp_type timestamp() const {
        return atomic_cell_type::timestamp(_data);
    }
+    void set_timestamp(api::timestamp_type ts) {
+        atomic_cell_type::set_timestamp(_data, ts);
+    }
    // Can be called on live cells only
-    bytes_view value() const {
+    auto value() const {
        return atomic_cell_type::value(_data);
    }
+    // Can be called on live counter update cells only
+    int64_t counter_update_value() const {
+        return atomic_cell_type::counter_update_value(_data);
+    }
    // Can be called only when is_dead(gc_clock::time_point)
    gc_clock::time_point deletion_time() const {
        return !is_live() ? atomic_cell_type::deletion_time(_data) : expiry() - ttl();
@@ -200,6 +277,9 @@ public:
    void set_revert(bool revert) {
        atomic_cell_type::set_revert(_data, revert);
    }
+    void set_counter_in_place_revert(bool flag) {
+        atomic_cell_type::set_counter_in_place_revert(_data, flag);
+    }
 };

 class atomic_cell_view final : public atomic_cell_base<bytes_view> {
@@ -211,6 +291,14 @@ public:
    friend std::ostream& operator<<(std::ostream& os, const atomic_cell_view& acv);
 };

+class atomic_cell_mutable_view final : public atomic_cell_base<bytes_mutable_view> {
+    atomic_cell_mutable_view(bytes_mutable_view data) : atomic_cell_base(std::move(data)) {}
+public:
+    static atomic_cell_mutable_view from_bytes(bytes_mutable_view data) { return atomic_cell_mutable_view(data); }
+
+    friend class atomic_cell;
+};
+
 class atomic_cell_ref final : public atomic_cell_base<managed_bytes&> {
 public:
    atomic_cell_ref(managed_bytes& buf) : atomic_cell_base(buf) {}
@@ -239,6 +327,9 @@ public:
    static atomic_cell make_live(api::timestamp_type timestamp, const bytes& value) {
        return make_live(timestamp, bytes_view(value));
    }
+    static atomic_cell make_live_counter_update(api::timestamp_type timestamp, int64_t value) {
+        return atomic_cell_type::make_live_counter_update(timestamp, value);
+    }
    static atomic_cell make_live(api::timestamp_type timestamp, bytes_view value,
        gc_clock::time_point expiry, gc_clock::duration ttl)
    {
@@ -256,6 +347,10 @@ public:
            return atomic_cell_type::make_live(timestamp, value, gc_clock::now() + *ttl, *ttl);
        }
    }
+    template<typename Serializer>
+    static atomic_cell make_live_from_serializer(api::timestamp_type timestamp, size_t size, Serializer&& serializer) {
+        return atomic_cell_type::make_live_from_serializer(timestamp, size, std::forward<Serializer>(serializer));
+    }
    friend class atomic_cell_or_collection;
    friend std::ostream& operator<<(std::ostream& os, const atomic_cell& ac);
 };
@@ -293,11 +388,6 @@ collection_mutation::operator collection_mutation_view() const {
    return { data };
 }

-namespace db {
-template<typename T>
-class serializer;
-}
-
 class column_definition;

 int compare_atomic_cell_for_merge(atomic_cell_view left, atomic_cell_view right);
--- a/atomic_cell_hash.hh
+++ b/atomic_cell_hash.hh
@@ -26,16 +26,17 @@
 #include "types.hh"
 #include "atomic_cell.hh"
 #include "hashing.hh"
+#include "counters.hh"

 template<>
 struct appending_hash<collection_mutation_view> {
    template<typename Hasher>
-    void operator()(Hasher& h, collection_mutation_view cell) const {
+    void operator()(Hasher& h, collection_mutation_view cell, const column_definition& cdef) const {
        auto m_view = collection_type_impl::deserialize_mutation_form(cell);
        ::feed_hash(h, m_view.tomb);
        for (auto&& key_and_value : m_view.cells) {
            ::feed_hash(h, key_and_value.first);
-            ::feed_hash(h, key_and_value.second);
+            ::feed_hash(h, key_and_value.second, cdef);
        }
    }
 };
@@ -43,10 +44,14 @@ struct appending_hash<collection_mutation_view> {
 template<>
 struct appending_hash<atomic_cell_view> {
    template<typename Hasher>
-    void operator()(Hasher& h, atomic_cell_view cell) const {
+    void operator()(Hasher& h, atomic_cell_view cell, const column_definition& cdef) const {
        feed_hash(h, cell.is_live());
        feed_hash(h, cell.timestamp());
        if (cell.is_live()) {
+            if (cdef.is_counter()) {
+                ::feed_hash(h, counter_cell_view(cell));
+                return;
+            }
            if (cell.is_live_and_has_ttl()) {
                feed_hash(h, cell.expiry());
                feed_hash(h, cell.ttl());
@@ -61,15 +66,15 @@ struct appending_hash<atomic_cell_view> {
 template<>
 struct appending_hash<atomic_cell> {
    template<typename Hasher>
-    void operator()(Hasher& h, const atomic_cell& cell) const {
-        feed_hash(h, static_cast<atomic_cell_view>(cell));
+    void operator()(Hasher& h, const atomic_cell& cell, const column_definition& cdef) const {
+        feed_hash(h, static_cast<atomic_cell_view>(cell), cdef);
    }
 };

 template<>
 struct appending_hash<collection_mutation> {
    template<typename Hasher>
-    void operator()(Hasher& h, const collection_mutation& cm) const {
-        feed_hash(h, static_cast<collection_mutation_view>(cm));
+    void operator()(Hasher& h, const collection_mutation& cm, const column_definition& cdef) const {
+        feed_hash(h, static_cast<collection_mutation_view>(cm), cdef);
    }
 };
--- a/atomic_cell_or_collection.hh
+++ b/atomic_cell_or_collection.hh
@@ -39,10 +39,14 @@ public:
    static atomic_cell_or_collection from_atomic_cell(atomic_cell data) { return { std::move(data._data) }; }
    atomic_cell_view as_atomic_cell() const { return atomic_cell_view::from_bytes(_data); }
    atomic_cell_ref as_atomic_cell_ref() { return { _data }; }
+    atomic_cell_mutable_view as_mutable_atomic_cell() { return atomic_cell_mutable_view::from_bytes(_data); }
    atomic_cell_or_collection(collection_mutation cm) : _data(std::move(cm.data)) {}
    explicit operator bool() const {
        return !_data.empty();
    }
+    bool can_use_mutable_view() const {
+        return !_data.is_fragmented();
+    }
    static atomic_cell_or_collection from_collection_mutation(collection_mutation data) {
        return std::move(data.data);
    }
@@ -58,13 +62,13 @@ public:
    template<typename Hasher>
    void feed_hash(Hasher& h, const column_definition& def) const {
        if (def.is_atomic()) {
-            ::feed_hash(h, as_atomic_cell());
+            ::feed_hash(h, as_atomic_cell(), def);
        } else {
-            ::feed_hash(as_collection_mutation(), h, def.type);
+            ::feed_hash(h, as_collection_mutation(), def);
        }
    }
-    size_t memory_usage() const {
-        return _data.memory_usage();
+    size_t external_memory_usage() const {
+        return _data.external_memory_usage();
    }
    friend std::ostream& operator<<(std::ostream&, const atomic_cell_or_collection&);
 };
--- a/auth/auth.cc
+++ b/auth/auth.cc
@@ -61,7 +61,7 @@ const sstring auth::auth::USERS_CF("users");
 static const sstring USER_NAME("name");
 static const sstring SUPER("super");

-static logging::logger logger("auth");
+static logging::logger alogger("auth");

 // TODO: configurable
 using namespace std::chrono_literals;
@@ -73,12 +73,14 @@ class auth_migration_listener : public service::migration_listener {
    void on_create_user_type(const sstring& ks_name, const sstring& type_name) override {}
    void on_create_function(const sstring& ks_name, const sstring& function_name) override {}
    void on_create_aggregate(const sstring& ks_name, const sstring& aggregate_name) override {}
+    void on_create_view(const sstring& ks_name, const sstring& view_name) override {}

    void on_update_keyspace(const sstring& ks_name) override {}
    void on_update_column_family(const sstring& ks_name, const sstring& cf_name, bool) override {}
    void on_update_user_type(const sstring& ks_name, const sstring& type_name) override {}
    void on_update_function(const sstring& ks_name, const sstring& function_name) override {}
    void on_update_aggregate(const sstring& ks_name, const sstring& aggregate_name) override {}
+    void on_update_view(const sstring& ks_name, const sstring& view_name, bool columns_changed) override {}

    void on_drop_keyspace(const sstring& ks_name) override {
        auth::authorizer::get().revoke_all(auth::data_resource(ks_name));
@@ -89,6 +91,7 @@ class auth_migration_listener : public service::migration_listener {
    void on_drop_user_type(const sstring& ks_name, const sstring& type_name) override {}
    void on_drop_function(const sstring& ks_name, const sstring& function_name) override {}
    void on_drop_aggregate(const sstring& ks_name, const sstring& aggregate_name) override {}
+    void on_drop_view(const sstring& ks_name, const sstring& view_name) override {}
 };

 static auth_migration_listener auth_migration;
@@ -111,7 +114,7 @@ struct hash<auth::authenticated_user> {

 class auth::auth::permissions_cache {
 public:
-    typedef utils::loading_cache<std::pair<authenticated_user, data_resource>, permission_set, utils::tuple_hash> cache_type;
+    typedef utils::loading_cache<std::pair<authenticated_user, data_resource>, permission_set, utils::loading_cache_reload_enabled::yes, utils::simple_entry_size<permission_set>, utils::tuple_hash> cache_type;
    typedef typename cache_type::key_type key_type;

    permissions_cache()
@@ -120,25 +123,14 @@ public:
    }

    permissions_cache(const db::config& cfg)
-                    : _cache(cfg.permissions_cache_max_entries(), expiry(cfg),
-                                    std::chrono::milliseconds(
-                                                    cfg.permissions_validity_in_ms()),
-                                    [](const key_type& k) {
-                                        logger.debug("Refreshing permissions for {}", k.first.name());
-                                        return authorizer::get().authorize(::make_shared<authenticated_user>(k.first), k.second);
-                                    }) {
-    }
-
-    static std::chrono::milliseconds expiry(const db::config& cfg) {
-        auto exp = cfg.permissions_update_interval_in_ms();
-        if (exp == 0 || exp == std::numeric_limits<uint32_t>::max()) {
-            exp = cfg.permissions_validity_in_ms();
-        }
-        return std::chrono::milliseconds(exp);
-    }
+                    : _cache(cfg.permissions_cache_max_entries(), std::chrono::milliseconds(cfg.permissions_validity_in_ms()), std::chrono::milliseconds(cfg.permissions_update_interval_in_ms()), alogger,
+                        [] (const key_type& k) {
+                            alogger.debug("Refreshing permissions for {}", k.first.name());
+                            return authorizer::get().authorize(::make_shared<authenticated_user>(k.first), k.second);
+                        }) {}

    future<> stop() {
-        return make_ready_future<>();
+        return _cache.stop();
    }

    future<permission_set> get(::shared_ptr<authenticated_user> user, data_resource resource) {
@@ -149,6 +141,15 @@ private:
    cache_type _cache;
 };

+namespace std { // for ADL, yuch
+
+std::ostream& operator<<(std::ostream& os, const std::pair<auth::authenticated_user, auth::data_resource>& p) {
+    os << "{user: " << p.first.name() << ", data_resource: " << p.second << "}";
+    return os;
+}
+
+}
+
 static distributed<auth::auth::permissions_cache> perm_cache;

 /**
@@ -175,7 +176,7 @@ struct waiter {
            tmr.cancel();
            done.set_exception(std::runtime_error("shutting down"));
        }
-        logger.trace("Deleting scheduled task");
+        alogger.trace("Deleting scheduled task");
    }
    void kill() {
    }
@@ -189,7 +190,7 @@ static std::vector<waiter_ptr> & thread_waiters() {
 }

 void auth::auth::schedule_when_up(scheduled_func f) {
-    logger.trace("Adding scheduled task");
+    alogger.trace("Adding scheduled task");

    auto & waiters = thread_waiters();

@@ -205,7 +206,7 @@ void auth::auth::schedule_when_up(scheduled_func f) {
            waiters.erase(i);
        }
    }).then([f = std::move(f)] {
-        logger.trace("Running scheduled task");
+        alogger.trace("Running scheduled task");
        return f();
    }).handle_exception([](auto ep) {
        return make_ready_future();
@@ -243,7 +244,8 @@ future<> auth::auth::setup() {
        std::map<sstring, sstring> opts;
        opts["replication_factor"] = "1";
        auto ksm = keyspace_metadata::new_keyspace(AUTH_KS, "org.apache.cassandra.locator.SimpleStrategy", opts, true);
-        f = service::get_local_migration_manager().announce_new_keyspace(ksm, false);
+        // We use min_timestamp so that default keyspace metadata will loose with any manual adjustments. See issue #2129.
+        f = service::get_local_migration_manager().announce_new_keyspace(ksm, api::min_timestamp, false);
    }

    return f.then([] {
@@ -264,12 +266,12 @@ future<> auth::auth::setup() {
                    auto query = sprint("INSERT INTO %s.%s (%s, %s) VALUES (?, ?) USING TIMESTAMP 0",
                                    AUTH_KS, USERS_CF, USER_NAME, SUPER);
                    cql3::get_local_query_processor().process(query, db::consistency_level::ONE, {DEFAULT_SUPERUSER_NAME, true}).then([](auto) {
-                        logger.info("Created default superuser '{}'", DEFAULT_SUPERUSER_NAME);
+                        alogger.info("Created default superuser '{}'", DEFAULT_SUPERUSER_NAME);
                    }).handle_exception([](auto ep) {
                        try {
                            std::rethrow_exception(ep);
                        } catch (exceptions::request_execution_exception&) {
-                            logger.warn("Skipped default superuser setup: some nodes were not ready");
+                            alogger.warn("Skipped default superuser setup: some nodes were not ready");
                        }
                    });
                }
@@ -327,14 +329,13 @@ future<bool> auth::auth::is_super_user(const sstring& username) {
                    });
 }

-future<> auth::auth::insert_user(const sstring& username, bool is_super)
-                throw (exceptions::request_execution_exception) {
+future<> auth::auth::insert_user(const sstring& username, bool is_super) {
    return cql3::get_local_query_processor().process(sprint("INSERT INTO %s.%s (%s, %s) VALUES (?, ?)",
                    AUTH_KS, USERS_CF, USER_NAME, SUPER),
                    consistency_for_user(username), { username, is_super }).discard_result();
 }

-future<> auth::auth::delete_user(const sstring& username) throw(exceptions::request_execution_exception) {
+future<> auth::auth::delete_user(const sstring& username) {
    return cql3::get_local_query_processor().process(sprint("DELETE FROM %s.%s WHERE %s = ?",
                    AUTH_KS, USERS_CF, USER_NAME),
                    consistency_for_user(username), { username }).discard_result();
@@ -353,7 +354,7 @@ future<> auth::auth::setup_table(const sstring& name, const sstring& cql) {
    parsed->prepare_keyspace(AUTH_KS);
    ::shared_ptr<cql3::statements::create_table_statement> statement =
                    static_pointer_cast<cql3::statements::create_table_statement>(
-                                    parsed->prepare(db)->statement);
+                                    parsed->prepare(db, qp.get_cql_stats())->statement);
    auto schema = statement->get_cf_meta_data();
    auto uuid = generate_legacy_id(schema->ks_name(), schema->cf_name());

--- a/auth/auth.hh
+++ b/auth/auth.hh
@@ -50,11 +50,10 @@
 #include "exceptions/exceptions.hh"
 #include "permission.hh"
 #include "data_resource.hh"
+#include "authenticated_user.hh"

 namespace auth {

-class authenticated_user;
-
 class auth {
 public:
    class permissions_cache;
@@ -91,7 +90,7 @@ public:
     * @param isSuper User's new status.
     * @throws RequestExecutionException
     */
-    static future<> insert_user(const sstring& username, bool is_super) throw(exceptions::request_execution_exception);
+    static future<> insert_user(const sstring& username, bool is_super);

    /**
     * Deletes the user from AUTH_KS.USERS_CF.
@@ -99,7 +98,7 @@ public:
     * @param username Username to delete.
     * @throws RequestExecutionException
     */
-    static future<> delete_user(const sstring& username) throw(exceptions::request_execution_exception);
+    static future<> delete_user(const sstring& username);

    /**
     * Sets up Authenticator and Authorizer.
@@ -122,3 +121,5 @@ public:
    static void schedule_when_up(scheduled_func);
 };
 }
+
+std::ostream& operator<<(std::ostream& os, const std::pair<auth::authenticated_user, auth::data_resource>& p);
--- a/auth/authenticated_user.hh
+++ b/auth/authenticated_user.hh
@@ -43,6 +43,7 @@

 #include <seastar/core/sstring.hh>
 #include <seastar/core/future.hh>
+#include "seastarx.hh"

 namespace auth {

--- a/auth/authenticator.cc
+++ b/auth/authenticator.cc
@@ -72,7 +72,7 @@ sstring auth::authenticator::option_to_string(option opt) {
 static std::unique_ptr<auth::authenticator> global_authenticator;

 future<>
-auth::authenticator::setup(const sstring& type) throw (exceptions::configuration_exception) {
+auth::authenticator::setup(const sstring& type) {
    if (auth::auth::is_class_type(type, ALLOW_ALL_AUTHENTICATOR_NAME)) {
        class allow_all_authenticator : public authenticator {
        public:
@@ -88,16 +88,16 @@ auth::authenticator::setup(const sstring& type) throw (exceptions::configuration
            option_set alterable_options() const override {
                return option_set();
            }
-            future<::shared_ptr<authenticated_user>> authenticate(const credentials_map& credentials) const throw(exceptions::authentication_exception) override {
+            future<::shared_ptr<authenticated_user>> authenticate(const credentials_map& credentials) const override {
                return make_ready_future<::shared_ptr<authenticated_user>>(::make_shared<authenticated_user>());
            }
-            future<> create(sstring username, const option_map& options) throw(exceptions::request_validation_exception, exceptions::request_execution_exception) override {
+            future<> create(sstring username, const option_map& options) override {
                return make_ready_future();
            }
-            future<> alter(sstring username, const option_map& options) throw(exceptions::request_validation_exception, exceptions::request_execution_exception) override {
+            future<> alter(sstring username, const option_map& options) override {
                return make_ready_future();
            }
-            future<> drop(sstring username) throw(exceptions::request_validation_exception, exceptions::request_execution_exception) override {
+            future<> drop(sstring username) override {
                return make_ready_future();
            }
            const resource_ids& protected_resources() const override {
--- a/auth/authenticator.hh
+++ b/auth/authenticator.hh
@@ -92,7 +92,7 @@ public:
     * For example, use this method to create any required keyspaces/column families.
     * Note: Only call from main thread.
     */
-    static future<> setup(const sstring& type) throw(exceptions::configuration_exception);
+    static future<> setup(const sstring& type);

    /**
     * Returns the system authenticator. Must have called setup before calling this.
@@ -129,7 +129,7 @@ public:
     *
     * @throws authentication_exception if credentials don't match any known user.
     */
-    virtual future<::shared_ptr<authenticated_user>> authenticate(const credentials_map& credentials) const throw(exceptions::authentication_exception) = 0;
+    virtual future<::shared_ptr<authenticated_user>> authenticate(const credentials_map& credentials) const = 0;

    /**
     * Called during execution of CREATE USER query (also may be called on startup, see seedSuperuserOptions method).
@@ -141,7 +141,7 @@ public:
     * @throws exceptions::request_validation_exception
     * @throws exceptions::request_execution_exception
     */
-    virtual future<> create(sstring username, const option_map& options) throw(exceptions::request_validation_exception, exceptions::request_execution_exception) = 0;
+    virtual future<> create(sstring username, const option_map& options) = 0;

    /**
     * Called during execution of ALTER USER query.
@@ -154,7 +154,7 @@ public:
     * @throws exceptions::request_validation_exception
     * @throws exceptions::request_execution_exception
     */
-    virtual future<> alter(sstring username, const option_map& options) throw(exceptions::request_validation_exception, exceptions::request_execution_exception) = 0;
+    virtual future<> alter(sstring username, const option_map& options) = 0;


    /**
@@ -164,7 +164,7 @@ public:
     * @throws exceptions::request_validation_exception
     * @throws exceptions::request_execution_exception
     */
-    virtual future<> drop(sstring username) throw(exceptions::request_validation_exception, exceptions::request_execution_exception) = 0;
+    virtual future<> drop(sstring username) = 0;

     /**
     * Set of resources that should be made inaccessible to users and only accessible internally.
@@ -177,9 +177,9 @@ public:
    class sasl_challenge {
    public:
        virtual ~sasl_challenge() {}
-        virtual bytes evaluate_response(bytes_view client_response) throw(exceptions::authentication_exception) = 0;
+        virtual bytes evaluate_response(bytes_view client_response) = 0;
        virtual bool is_complete() const = 0;
-        virtual future<::shared_ptr<authenticated_user>> get_authenticated_user() const throw(exceptions::authentication_exception) = 0;
+        virtual future<::shared_ptr<authenticated_user>> get_authenticated_user() const = 0;
    };

    /**
--- a/auth/authorizer.hh
+++ b/auth/authorizer.hh
@@ -51,6 +51,8 @@
 #include "permission.hh"
 #include "data_resource.hh"

+#include "seastarx.hh"
+
 namespace auth {

 class authenticated_user;
--- a/auth/data_resource.cc
+++ b/auth/data_resource.cc
@@ -47,11 +47,8 @@
 const sstring auth::data_resource::ROOT_NAME("data");

 auth::data_resource::data_resource(level l, const sstring& ks, const sstring& cf)
-    : _ks(ks), _cf(cf)
+    : _level(l), _ks(ks), _cf(cf)
 {
-    if (l != get_level()) {
-        throw std::invalid_argument("level/keyspace/column mismatch");
-    }
 }

 auth::data_resource::data_resource()
@@ -67,14 +64,7 @@ auth::data_resource::data_resource(const sstring& ks, const sstring& cf)
 {}

 auth::data_resource::level auth::data_resource::get_level() const {
-    if (!_cf.empty()) {
-        assert(!_ks.empty());
-        return level::COLUMN_FAMILY;
-    }
-    if (!_ks.empty()) {
-        return level::KEYSPACE;
-    }
-    return level::ROOT;
+    return _level;
 }

 auth::data_resource auth::data_resource::from_name(
@@ -125,16 +115,14 @@ auth::data_resource auth::data_resource::get_parent() const {
    }
 }

-const sstring& auth::data_resource::keyspace() const
-                throw (std::invalid_argument) {
+const sstring& auth::data_resource::keyspace() const {
    if (is_root_level()) {
        throw std::invalid_argument("ROOT data resource has no keyspace");
    }
    return _ks;
 }

-const sstring& auth::data_resource::column_family() const
-                throw (std::invalid_argument) {
+const sstring& auth::data_resource::column_family() const {
    if (!is_column_family_level()) {
        throw std::invalid_argument(sprint("%s data resource has no column family", name()));
    }
--- a/auth/data_resource.hh
+++ b/auth/data_resource.hh
@@ -45,6 +45,7 @@
 #include <iosfwd>
 #include <set>
 #include <seastar/core/sstring.hh>
+#include "seastarx.hh"

 namespace auth {

@@ -56,6 +57,7 @@ private:

    static const sstring ROOT_NAME;

+    level _level;
    sstring _ks;
    sstring _cf;

@@ -116,13 +118,13 @@ public:
     * @return keyspace of the resource.
     * @throws std::invalid_argument if it's the root-level resource.
     */
-    const sstring& keyspace() const throw(std::invalid_argument);
+    const sstring& keyspace() const;

    /**
     * @return column family of the resource.
     * @throws std::invalid_argument if it's not a cf-level resource.
     */
-    const sstring& column_family() const throw(std::invalid_argument);
+    const sstring& column_family() const;

    /**
     * @return Whether or not the resource has a parent in the hierarchy.
--- a/auth/default_authorizer.cc
+++ b/auth/default_authorizer.cc
@@ -62,7 +62,7 @@ static const sstring RESOURCE_NAME = "resource";
 static const sstring PERMISSIONS_NAME = "permissions";
 static const sstring PERMISSIONS_CF = "permissions";

-static logging::logger logger("default_authorizer");
+static logging::logger alogger("default_authorizer");

 auth::default_authorizer::default_authorizer() {
 }
@@ -107,7 +107,7 @@ future<auth::permission_set> auth::default_authorizer::authorize(
                }
                return make_ready_future<permission_set>(permissions::from_strings(res->one().get_set<sstring>(PERMISSIONS_NAME)));
            } catch (exceptions::request_execution_exception& e) {
-                logger.warn("CassandraAuthorizer failed to authorize {} for {}", user->name(), resource);
+                alogger.warn("CassandraAuthorizer failed to authorize {} for {}", user->name(), resource);
                return make_ready_future<permission_set>(permissions::NONE);
            }
        });
@@ -196,7 +196,7 @@ future<> auth::default_authorizer::revoke_all(sstring dropped_user) {
                        try {
                            std::rethrow_exception(ep);
                        } catch (exceptions::request_execution_exception& e) {
-                            logger.warn("CassandraAuthorizer failed to revoke all permissions of {}: {}", dropped_user, e);
+                            alogger.warn("CassandraAuthorizer failed to revoke all permissions of {}: {}", dropped_user, e);
                        }
                    });
 }
@@ -217,13 +217,13 @@ future<> auth::default_authorizer::revoke_all(data_resource resource) {
                    try {
                        std::rethrow_exception(ep);
                    } catch (exceptions::request_execution_exception& e) {
-                        logger.warn("CassandraAuthorizer failed to revoke all permissions on {}: {}", resource, e);
+                        alogger.warn("CassandraAuthorizer failed to revoke all permissions on {}: {}", resource, e);
                    }

                });
            });
        } catch (exceptions::request_execution_exception& e) {
-            logger.warn("CassandraAuthorizer failed to revoke all permissions on {}: {}", resource, e);
+            alogger.warn("CassandraAuthorizer failed to revoke all permissions on {}: {}", resource, e);
            return make_ready_future();
        }
    });
--- a/auth/password_authenticator.cc
+++ b/auth/password_authenticator.cc
@@ -61,7 +61,7 @@ static const sstring DEFAULT_USER_NAME = auth::auth::DEFAULT_SUPERUSER_NAME;
 static const sstring DEFAULT_USER_PASSWORD = auth::auth::DEFAULT_SUPERUSER_NAME;
 static const sstring CREDENTIALS_CF = "credentials";

-static logging::logger logger("password_authenticator");
+static logging::logger plogger("password_authenticator");

 auth::password_authenticator::~password_authenticator()
 {}
@@ -169,7 +169,7 @@ future<> auth::password_authenticator::init() {
                                                    USER_NAME, SALTED_HASH
                                    ),
                                    db::consistency_level::ONE, {DEFAULT_USER_NAME, hashpw(DEFAULT_USER_PASSWORD)}).then([](auto) {
-                                        logger.info("Created default user '{}'", DEFAULT_USER_NAME);
+                                        plogger.info("Created default user '{}'", DEFAULT_USER_NAME);
                                    });
                }
            });
@@ -201,8 +201,7 @@ auth::authenticator::option_set auth::password_authenticator::alterable_options(
 }

 future<::shared_ptr<auth::authenticated_user> > auth::password_authenticator::authenticate(
-                const credentials_map& credentials) const
-                                throw (exceptions::authentication_exception) {
+                const credentials_map& credentials) const {
    if (!credentials.count(USERNAME_KEY)) {
        throw exceptions::authentication_exception(sprint("Required key '%s' is missing", USERNAME_KEY));
    }
@@ -218,12 +217,12 @@ future<::shared_ptr<auth::authenticated_user> > auth::password_authenticator::au
    // obsolete prepared statements pretty quickly.
    // Rely on query processing caching statements instead, and lets assume
    // that a map lookup string->statement is not gonna kill us much.
-    auto& qp = cql3::get_local_query_processor();
-    return qp.process(
-                    sprint("SELECT %s FROM %s.%s WHERE %s = ?", SALTED_HASH,
-                                    auth::AUTH_KS, CREDENTIALS_CF, USER_NAME),
-                    consistency_for_user(username), { username }, true).then_wrapped(
-                    [=](future<::shared_ptr<cql3::untyped_result_set>> f) {
+    return futurize_apply([this, username, password] {
+        auto& qp = cql3::get_local_query_processor();
+        return qp.process(sprint("SELECT %s FROM %s.%s WHERE %s = ?", SALTED_HASH,
+                                        auth::AUTH_KS, CREDENTIALS_CF, USER_NAME),
+                        consistency_for_user(username), {username}, true);
+    }).then_wrapped([=](future<::shared_ptr<cql3::untyped_result_set>> f) {
        try {
            auto res = f.get0();
            if (res->empty() || !checkpw(password, res->one().get_as<sstring>(SALTED_HASH))) {
@@ -234,14 +233,14 @@ future<::shared_ptr<auth::authenticated_user> > auth::password_authenticator::au
            std::throw_with_nested(exceptions::authentication_exception("Could not verify password"));
        } catch (exceptions::request_execution_exception& e) {
            std::throw_with_nested(exceptions::authentication_exception(e.what()));
+        } catch (...) {
+            std::throw_with_nested(exceptions::authentication_exception("authentication failed"));
        }
    });
 }

 future<> auth::password_authenticator::create(sstring username,
-                const option_map& options)
-                                throw (exceptions::request_validation_exception,
-                                exceptions::request_execution_exception) {
+                const option_map& options) {
    try {
        auto password = boost::any_cast<sstring>(options.at(option::PASSWORD));
        auto query = sprint("INSERT INTO %s.%s (%s, %s) VALUES (?, ?)",
@@ -254,9 +253,7 @@ future<> auth::password_authenticator::create(sstring username,
 }

 future<> auth::password_authenticator::alter(sstring username,
-                const option_map& options)
-                                throw (exceptions::request_validation_exception,
-                                exceptions::request_execution_exception) {
+                const option_map& options) {
    try {
        auto password = boost::any_cast<sstring>(options.at(option::PASSWORD));
        auto query = sprint("UPDATE %s.%s SET %s = ? WHERE %s = ?",
@@ -268,9 +265,7 @@ future<> auth::password_authenticator::alter(sstring username,
    }
 }

-future<> auth::password_authenticator::drop(sstring username)
-                throw (exceptions::request_validation_exception,
-                exceptions::request_execution_exception) {
+future<> auth::password_authenticator::drop(sstring username) {
    try {
        auto query = sprint("DELETE FROM %s.%s WHERE %s = ?",
                        auth::AUTH_KS, CREDENTIALS_CF, USER_NAME);
@@ -306,9 +301,8 @@ const auth::resource_ids& auth::password_authenticator::protected_resources() co
         * would expect
         * @throws javax.security.sasl.SaslException
         */
-        bytes evaluate_response(bytes_view client_response)
-                        throw (exceptions::authentication_exception) override {
-            logger.debug("Decoding credentials from client token");
+        bytes evaluate_response(bytes_view client_response) override {
+            plogger.debug("Decoding credentials from client token");

            sstring username, password;

@@ -345,8 +339,7 @@ const auth::resource_ids& auth::password_authenticator::protected_resources() co
        bool is_complete() const override {
            return _complete;
        }
-        future<::shared_ptr<authenticated_user>> get_authenticated_user() const
-                        throw (exceptions::authentication_exception) override {
+        future<::shared_ptr<authenticated_user>> get_authenticated_user() const override {
            return _authenticator.authenticate(_credentials);
        }
    private:
--- a/auth/password_authenticator.hh
+++ b/auth/password_authenticator.hh
@@ -58,10 +58,10 @@ public:
    bool require_authentication() const override;
    option_set supported_options() const override;
    option_set alterable_options() const override;
-    future<::shared_ptr<authenticated_user>> authenticate(const credentials_map& credentials) const throw(exceptions::authentication_exception) override;
-    future<> create(sstring username, const option_map& options) throw(exceptions::request_validation_exception, exceptions::request_execution_exception) override;
-    future<> alter(sstring username, const option_map& options) throw(exceptions::request_validation_exception, exceptions::request_execution_exception) override;
-    future<> drop(sstring username) throw(exceptions::request_validation_exception, exceptions::request_execution_exception) override;
+    future<::shared_ptr<authenticated_user>> authenticate(const credentials_map& credentials) const override;
+    future<> create(sstring username, const option_map& options) override;
+    future<> alter(sstring username, const option_map& options) override;
+    future<> drop(sstring username) override;
    const resource_ids& protected_resources() const override;
    ::shared_ptr<sasl_challenge> new_sasl_challenge() const override;

--- a/auth/permission.cc
+++ b/auth/permission.cc
@@ -40,6 +40,7 @@
 */

 #include <unordered_map>
+#include <boost/algorithm/string.hpp>
 #include "permission.hh"

 const auth::permission_set auth::permissions::ALL_DATA =
@@ -75,7 +76,9 @@ const sstring& auth::permissions::to_string(permission p) {
 }

 auth::permission auth::permissions::from_string(const sstring& s) {
-    return permission_names.at(s);
+    sstring upper(s);
+    boost::to_upper(upper);
+    return permission_names.at(upper);
 }

 std::unordered_set<sstring> auth::permissions::to_strings(const permission_set& set) {
--- a/auth/permission.hh
+++ b/auth/permission.hh
@@ -44,6 +44,7 @@
 #include <unordered_set>
 #include <seastar/core/sstring.hh>

+#include "seastarx.hh"
 #include "enum_set.hh"

 namespace auth {
--- a/bytes.hh
+++ b/bytes.hh
@@ -21,14 +21,17 @@

 #pragma once

+#include "seastarx.hh"
 #include "core/sstring.hh"
 #include "hashing.hh"
 #include <experimental/optional>
 #include <iosfwd>
 #include <functional>
+#include "utils/mutable_view.hh"

 using bytes = basic_sstring<int8_t, uint32_t, 31>;
 using bytes_view = std::experimental::basic_string_view<int8_t>;
+using bytes_mutable_view = basic_mutable_view<bytes_view::value_type>;
 using bytes_opt = std::experimental::optional<bytes>;
 using sstring_view = std::experimental::string_view;

--- a/bytes_ostream.hh
+++ b/bytes_ostream.hh
@@ -38,7 +38,7 @@ class bytes_ostream {
 public:
    using size_type = bytes::size_type;
    using value_type = bytes::value_type;
-    static constexpr size_type max_chunk_size = 16 * 1024;
+    static constexpr size_type max_chunk_size() { return 16 * 1024; }
 private:
    static_assert(sizeof(value_type) == 1, "value_type is assumed to be one byte long");
    struct chunk {
@@ -59,7 +59,6 @@ private:
    };
    // FIXME: consider increasing chunk size as the buffer grows
    static constexpr size_type chunk_size{512};
-    static constexpr size_type usable_chunk_size{chunk_size - sizeof(chunk)};
 private:
    std::unique_ptr<chunk> _begin;
    chunk* _current;
@@ -100,6 +99,19 @@ private:
        }
        return _current->size - _current->offset;
    }
+    // Figure out next chunk size.
+    //   - must be enough for data_size
+    //   - must be at least chunk_size
+    //   - try to double each time to prevent too many allocations
+    //   - do not exceed max_chunk_size
+    size_type next_alloc_size(size_t data_size) const {
+        auto next_size = _current
+                ? _current->size * 2
+                : chunk_size;
+        next_size = std::min(next_size, max_chunk_size());
+        // FIXME: check for overflow?
+        return std::max<size_type>(next_size, data_size + sizeof(chunk));
+    }
    // Makes room for a contiguous region of given size.
    // The region is accounted for as already written.
    // size must not be zero.
@@ -110,7 +122,7 @@ private:
            _size += size;
            return ret;
        } else {
-            auto alloc_size = size <= usable_chunk_size ? chunk_size : (size + sizeof(chunk));
+            auto alloc_size = next_alloc_size(size);
            auto space = malloc(alloc_size);
            if (!space) {
                throw std::bad_alloc();
@@ -205,7 +217,7 @@ public:
        }

        while (!v.empty()) {
-            auto this_size = std::min(v.size(), size_t(max_chunk_size));
+            auto this_size = std::min(v.size(), size_t(max_chunk_size()));
            std::copy_n(v.begin(), this_size, alloc(this_size));
            v.remove_prefix(this_size);
        }
@@ -329,7 +341,7 @@ public:
        // if its size is below max_chunk_size. We probably could also gain
        // some read performance by doing "real" reduction, i.e. merging
        // all chunks until all but the last one is max_chunk_size.
-        if (size() < max_chunk_size) {
+        if (size() < max_chunk_size()) {
            linearize();
        }
    }
--- a/cache_streamed_mutation.hh
+++ b/cache_streamed_mutation.hh
@@ -0,0 +1,538 @@
+/*
+ * Copyright (C) 2017 ScyllaDB
+ */
+
+/*
+ * This file is part of Scylla.
+ *
+ * Scylla is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Scylla is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <vector>
+#include "row_cache.hh"
+#include "mutation_reader.hh"
+#include "streamed_mutation.hh"
+#include "partition_version.hh"
+#include "utils/logalloc.hh"
+#include "query-request.hh"
+#include "partition_snapshot_reader.hh"
+#include "partition_snapshot_row_cursor.hh"
+#include "read_context.hh"
+
+namespace cache {
+
+class lsa_manager {
+    row_cache& _cache;
+public:
+    lsa_manager(row_cache& cache) : _cache(cache) { }
+    template<typename Func>
+    decltype(auto) run_in_read_section(const Func& func) {
+        return _cache._read_section(_cache._tracker.region(), [&func] () {
+            return with_linearized_managed_bytes([&func] () {
+                return func();
+            });
+        });
+    }
+    template<typename Func>
+    decltype(auto) run_in_update_section(const Func& func) {
+        return _cache._update_section(_cache._tracker.region(), [&func] () {
+            return with_linearized_managed_bytes([&func] () {
+                return func();
+            });
+        });
+    }
+    template<typename Func>
+    void run_in_update_section_with_allocator(Func&& func) {
+        return _cache._update_section(_cache._tracker.region(), [this, &func] () {
+            return with_linearized_managed_bytes([this, &func] () {
+                return with_allocator(_cache._tracker.region().allocator(), [this, &func] () mutable {
+                    return func();
+                });
+            });
+        });
+    }
+    logalloc::region& region() { return _cache._tracker.region(); }
+    logalloc::allocating_section& read_section() { return _cache._read_section; }
+};
+
+class cache_streamed_mutation final : public streamed_mutation::impl {
+    enum class state {
+        before_static_row,
+
+        // Invariants:
+        //  - position_range(_lower_bound, _upper_bound) covers all not yet emitted positions from current range
+        //  - _next_row points to the nearest row in cache >= _lower_bound
+        //  - _next_row_in_range = _next.position() < _upper_bound
+        reading_from_cache,
+
+        // Starts reading from underlying reader.
+        // The range to read is position_range(_lower_bound, min(_next_row.position(), _upper_bound)).
+        // Invariants:
+        //  - _next_row_in_range = _next.position() < _upper_bound
+        move_to_underlying,
+
+        // Invariants:
+        // - Upper bound of the read is min(_next_row.position(), _upper_bound)
+        // - _next_row_in_range = _next.position() < _upper_bound
+        // - _last_row_key contains the key of last emitted clustering_row
+        reading_from_underlying,
+
+        end_of_stream
+    };
+    lw_shared_ptr<partition_snapshot> _snp;
+    position_in_partition::tri_compare _position_cmp;
+
+    query::clustering_key_filter_ranges _ck_ranges;
+    query::clustering_row_ranges::const_iterator _ck_ranges_curr;
+    query::clustering_row_ranges::const_iterator _ck_ranges_end;
+
+    lsa_manager _lsa_manager;
+
+    stdx::optional<clustering_key> _last_row_key;
+
+    // We need to be prepared that we may get overlapping and out of order
+    // range tombstones. We must emit fragments with strictly monotonic positions,
+    // so we can't just trim such tombstones to the position of the last fragment.
+    // To solve that, range tombstones are accumulated first in a range_tombstone_stream
+    // and emitted once we have a fragment with a larger position.
+    range_tombstone_stream _tombstones;
+
+    // Holds the lower bound of a position range which hasn't been processed yet.
+    // Only fragments with positions < _lower_bound have been emitted.
+    position_in_partition _lower_bound;
+    position_in_partition_view _upper_bound;
+
+    state _state = state::before_static_row;
+    lw_shared_ptr<read_context> _read_context;
+    partition_snapshot_row_cursor _next_row;
+    bool _next_row_in_range = false;
+
+    future<> do_fill_buffer();
+    void copy_from_cache_to_buffer();
+    future<> process_static_row();
+    void move_to_end();
+    void move_to_next_range();
+    void move_to_current_range();
+    void move_to_next_entry();
+    // Emits all delayed range tombstones with positions smaller than upper_bound.
+    void drain_tombstones(position_in_partition_view upper_bound);
+    // Emits all delayed range tombstones.
+    void drain_tombstones();
+    void add_to_buffer(const partition_snapshot_row_cursor&);
+    void add_clustering_row_to_buffer(mutation_fragment&&);
+    void add_to_buffer(range_tombstone&&);
+    void add_to_buffer(mutation_fragment&&);
+    future<> read_from_underlying();
+    future<> start_reading_from_underlying();
+    bool after_current_range(position_in_partition_view position);
+    bool can_populate() const;
+    void maybe_update_continuity();
+    void maybe_add_to_cache(const mutation_fragment& mf);
+    void maybe_add_to_cache(const clustering_row& cr);
+    void maybe_add_to_cache(const range_tombstone& rt);
+    void maybe_add_to_cache(const static_row& sr);
+    void maybe_set_static_row_continuous();
+public:
+    cache_streamed_mutation(schema_ptr s,
+                            dht::decorated_key dk,
+                            query::clustering_key_filter_ranges&& crr,
+                            lw_shared_ptr<read_context> ctx,
+                            lw_shared_ptr<partition_snapshot> snp,
+                            row_cache& cache)
+        : streamed_mutation::impl(std::move(s), dk, snp->partition_tombstone())
+        , _snp(std::move(snp))
+        , _position_cmp(*_schema)
+        , _ck_ranges(std::move(crr))
+        , _ck_ranges_curr(_ck_ranges.begin())
+        , _ck_ranges_end(_ck_ranges.end())
+        , _lsa_manager(cache)
+        , _tombstones(*_schema)
+        , _lower_bound(position_in_partition::before_all_clustered_rows())
+        , _upper_bound(position_in_partition_view::before_all_clustered_rows())
+        , _read_context(std::move(ctx))
+        , _next_row(*_schema, cache._tracker.region(), *_snp)
+    { }
+    cache_streamed_mutation(const cache_streamed_mutation&) = delete;
+    cache_streamed_mutation(cache_streamed_mutation&&) = delete;
+    virtual future<> fill_buffer() override;
+    virtual ~cache_streamed_mutation() {
+        maybe_merge_versions(_snp, _lsa_manager.region(), _lsa_manager.read_section());
+    }
+};
+
+inline
+future<> cache_streamed_mutation::process_static_row() {
+    if (_snp->version()->partition().static_row_continuous()) {
+        _read_context->cache().on_row_hit();
+        row sr = _lsa_manager.run_in_read_section([this] {
+            return _snp->static_row();
+        });
+        if (!sr.empty()) {
+            push_mutation_fragment(mutation_fragment(static_row(std::move(sr))));
+        }
+        return make_ready_future<>();
+    } else {
+        _read_context->cache().on_row_miss();
+        return _read_context->get_next_fragment().then([this] (mutation_fragment_opt&& sr) {
+            if (sr) {
+                assert(sr->is_static_row());
+                maybe_add_to_cache(sr->as_static_row());
+                push_mutation_fragment(std::move(*sr));
+            }
+            maybe_set_static_row_continuous();
+        });
+    }
+}
+
+inline
+future<> cache_streamed_mutation::fill_buffer() {
+    if (_state == state::before_static_row) {
+        auto after_static_row = [this] {
+            if (_ck_ranges_curr == _ck_ranges_end) {
+                _end_of_stream = true;
+                _state = state::end_of_stream;
+                return make_ready_future<>();
+            }
+            _state = state::reading_from_cache;
+            _lsa_manager.run_in_read_section([this] {
+                move_to_current_range();
+            });
+            return fill_buffer();
+        };
+        if (_schema->has_static_columns()) {
+            return process_static_row().then(std::move(after_static_row));
+        } else {
+            return after_static_row();
+        }
+    }
+    return do_until([this] { return _end_of_stream || is_buffer_full(); }, [this] {
+        return do_fill_buffer();
+    });
+}
+
+inline
+future<> cache_streamed_mutation::do_fill_buffer() {
+    if (_state == state::move_to_underlying) {
+        _state = state::reading_from_underlying;
+        auto end = _next_row_in_range ? position_in_partition(_next_row.position())
+                                      : position_in_partition(_upper_bound);
+        return _read_context->fast_forward_to(position_range{_lower_bound, std::move(end)}).then([this] {
+            return read_from_underlying();
+        });
+    }
+    if (_state == state::reading_from_underlying) {
+        return read_from_underlying();
+    }
+    // assert(_state == state::reading_from_cache)
+    return _lsa_manager.run_in_read_section([this] {
+        auto same_pos = _next_row.maybe_refresh();
+        // FIXME: If continuity changed anywhere between _lower_bound and _next_row.position()
+        // we need to redo the lookup with _lower_bound. There is no eviction yet, so not yet a problem.
+        assert(same_pos);
+        while (!is_buffer_full() && _state == state::reading_from_cache) {
+            copy_from_cache_to_buffer();
+            if (need_preempt()) {
+                break;
+            }
+        }
+        return make_ready_future<>();
+    });
+}
+
+inline
+future<> cache_streamed_mutation::read_from_underlying() {
+    return consume_mutation_fragments_until(_read_context->get_streamed_mutation(),
+        [this] { return _state != state::reading_from_underlying || is_buffer_full(); },
+        [this] (mutation_fragment mf) {
+            _read_context->cache().on_row_miss();
+            maybe_add_to_cache(mf);
+            add_to_buffer(std::move(mf));
+        },
+        [this] {
+            _state = state::reading_from_cache;
+            _lsa_manager.run_in_update_section([this] {
+                auto same_pos = _next_row.maybe_refresh();
+                assert(same_pos); // FIXME: handle eviction
+                if (_next_row_in_range) {
+                    maybe_update_continuity();
+                    add_to_buffer(_next_row);
+                    move_to_next_entry();
+                } else {
+                    if (no_clustering_row_between(*_schema, _upper_bound, _next_row.position())) {
+                        this->maybe_update_continuity();
+                    } else {
+                        // FIXME: Insert dummy entry at _upper_bound.
+                        _read_context->cache().on_mispopulate();
+                    }
+                    move_to_next_range();
+                }
+            });
+            return make_ready_future<>();
+        });
+}
+
+inline
+void cache_streamed_mutation::maybe_update_continuity() {
+    if (can_populate() && _next_row.is_in_latest_version()) {
+        if (_last_row_key) {
+            if (_next_row.previous_row_in_latest_version_has_key(*_last_row_key)) {
+                _next_row.set_continuous(true);
+            }
+        } else if (!_ck_ranges_curr->start()) {
+            _next_row.set_continuous(true);
+        }
+    } else {
+        _read_context->cache().on_mispopulate();
+    }
+}
+
+inline
+void cache_streamed_mutation::maybe_add_to_cache(const mutation_fragment& mf) {
+    if (mf.is_range_tombstone()) {
+        maybe_add_to_cache(mf.as_range_tombstone());
+    } else {
+        assert(mf.is_clustering_row());
+        const clustering_row& cr = mf.as_clustering_row();
+        maybe_add_to_cache(cr);
+    }
+}
+
+inline
+void cache_streamed_mutation::maybe_add_to_cache(const clustering_row& cr) {
+    if (!can_populate()) {
+        _read_context->cache().on_mispopulate();
+        return;
+    }
+    _lsa_manager.run_in_update_section_with_allocator([this, &cr] {
+        mutation_partition& mp = _snp->version()->partition();
+        rows_entry::compare less(*_schema);
+
+        // FIXME: If _next_row is up to date, but latest version doesn't have iterator in
+        // current row (could be far away, so we'd do this often), then this will do
+        // the lookup in mp. This is not necessary, because _next_row has iterators for
+        // next rows in each version, even if they're not part of the current row.
+        // They're currently buried in the heap, but you could keep a vector of
+        // iterators per each version in addition to the heap.
+        auto new_entry = alloc_strategy_unique_ptr<rows_entry>(
+            current_allocator().construct<rows_entry>(cr.key(), cr.tomb(), cr.marker(), cr.cells()));
+        new_entry->set_continuous(false);
+        auto it = _next_row.has_valid_row_from_latest_version()
+                  ? _next_row.get_iterator_in_latest_version() : mp.clustered_rows().lower_bound(cr.key(), less);
+        auto insert_result = mp.clustered_rows().insert_check(it, *new_entry, less);
+        if (insert_result.second) {
+            _read_context->cache().on_row_insert();
+            new_entry.release();
+        }
+        it = insert_result.first;
+
+        rows_entry& e = *it;
+        if (_last_row_key) {
+            if (it == mp.clustered_rows().begin()) {
+                // FIXME: check whether entry for _last_row_key is in older versions and if so set
+                // continuity to true.
+                _read_context->cache().on_mispopulate();
+            } else {
+                auto prev_it = it;
+                --prev_it;
+                clustering_key_prefix::equality eq(*_schema);
+                if (eq(*_last_row_key, prev_it->key())) {
+                    e.set_continuous(true);
+                }
+            }
+        } else if (!_ck_ranges_curr->start()) {
+            e.set_continuous(true);
+        } else {
+            // FIXME: Insert dummy entry at _ck_ranges_curr->start()
+            _read_context->cache().on_mispopulate();
+        }
+    });
+}
+
+inline
+bool cache_streamed_mutation::after_current_range(position_in_partition_view p) {
+    return _position_cmp(p, _upper_bound) >= 0;
+}
+
+inline
+future<> cache_streamed_mutation::start_reading_from_underlying() {
+    _state = state::move_to_underlying;
+    return make_ready_future<>();
+}
+
+inline
+void cache_streamed_mutation::copy_from_cache_to_buffer() {
+    position_in_partition_view next_lower_bound = _next_row.dummy() ? _next_row.position() : position_in_partition_view::after_key(_next_row.key());
+    for (auto&& rts : _snp->range_tombstones(*_schema, _lower_bound, _next_row_in_range ? next_lower_bound : _upper_bound)) {
+        add_to_buffer(std::move(rts));
+        if (is_buffer_full()) {
+            return;
+        }
+    }
+    if (_next_row_in_range) {
+        add_to_buffer(_next_row);
+        move_to_next_entry();
+    } else {
+        move_to_next_range();
+    }
+}
+
+inline
+void cache_streamed_mutation::move_to_end() {
+    drain_tombstones();
+    _end_of_stream = true;
+    _state = state::end_of_stream;
+}
+
+inline
+void cache_streamed_mutation::move_to_next_range() {
+    ++_ck_ranges_curr;
+    if (_ck_ranges_curr == _ck_ranges_end) {
+        move_to_end();
+    } else {
+        move_to_current_range();
+    }
+}
+
+inline
+void cache_streamed_mutation::move_to_current_range() {
+    _last_row_key = std::experimental::nullopt;
+    _lower_bound = position_in_partition::for_range_start(*_ck_ranges_curr);
+    _upper_bound = position_in_partition_view::for_range_end(*_ck_ranges_curr);
+    auto complete_until_next = _next_row.advance_to(_lower_bound) || _next_row.continuous();
+    _next_row_in_range = !after_current_range(_next_row.position());
+    if (!complete_until_next) {
+        start_reading_from_underlying();
+    }
+}
+
+// _next_row must be inside the range.
+inline
+void cache_streamed_mutation::move_to_next_entry() {
+    if (no_clustering_row_between(*_schema, _next_row.position(), _upper_bound)) {
+        move_to_next_range();
+    } else {
+        if (!_next_row.next()) {
+            move_to_end();
+            return;
+        }
+        _next_row_in_range = !after_current_range(_next_row.position());
+        if (!_next_row.continuous()) {
+            start_reading_from_underlying();
+        }
+    }
+}
+
+inline
+void cache_streamed_mutation::drain_tombstones(position_in_partition_view pos) {
+    while (auto mfo = _tombstones.get_next(pos)) {
+        push_mutation_fragment(std::move(*mfo));
+    }
+}
+
+inline
+void cache_streamed_mutation::drain_tombstones() {
+    while (auto mfo = _tombstones.get_next()) {
+        push_mutation_fragment(std::move(*mfo));
+    }
+}
+
+inline
+void cache_streamed_mutation::add_to_buffer(mutation_fragment&& mf) {
+    if (mf.is_clustering_row()) {
+        add_clustering_row_to_buffer(std::move(mf));
+    } else {
+        assert(mf.is_range_tombstone());
+        add_to_buffer(std::move(mf).as_range_tombstone());
+    }
+}
+
+inline
+void cache_streamed_mutation::add_to_buffer(const partition_snapshot_row_cursor& row) {
+    if (!row.dummy()) {
+        _read_context->cache().on_row_hit();
+        add_clustering_row_to_buffer(row.row());
+    }
+}
+
+inline
+void cache_streamed_mutation::add_clustering_row_to_buffer(mutation_fragment&& mf) {
+    auto& row = mf.as_clustering_row();
+    drain_tombstones(row.position());
+    _last_row_key = row.key();
+    _lower_bound = position_in_partition::after_key(row.key());
+    push_mutation_fragment(std::move(mf));
+}
+
+inline
+void cache_streamed_mutation::add_to_buffer(range_tombstone&& rt) {
+    // This guarantees that rt starts after any emitted clustering_row
+    if (!rt.trim_front(*_schema, _lower_bound)) {
+        return;
+    }
+    _lower_bound = position_in_partition(rt.position());
+    _tombstones.apply(std::move(rt));
+    drain_tombstones(_lower_bound);
+}
+
+inline
+void cache_streamed_mutation::maybe_add_to_cache(const range_tombstone& rt) {
+    if (can_populate()) {
+        _lsa_manager.run_in_update_section_with_allocator([&] {
+            _snp->version()->partition().row_tombstones().apply_monotonically(*_schema, rt);
+        });
+    } else {
+        _read_context->cache().on_mispopulate();
+    }
+}
+
+inline
+void cache_streamed_mutation::maybe_add_to_cache(const static_row& sr) {
+    if (can_populate()) {
+        _read_context->cache().on_row_insert();
+        _lsa_manager.run_in_update_section_with_allocator([&] {
+            _snp->version()->partition().static_row().apply(*_schema, column_kind::static_column, sr.cells());
+        });
+    } else {
+        _read_context->cache().on_mispopulate();
+    }
+}
+
+inline
+void cache_streamed_mutation::maybe_set_static_row_continuous() {
+    if (can_populate()) {
+        _snp->version()->partition().set_static_row_continuous(true);
+    } else {
+        _read_context->cache().on_mispopulate();
+    }
+}
+
+inline
+bool cache_streamed_mutation::can_populate() const {
+    return _snp->at_latest_version() && _read_context->cache().phase_of(_read_context->key()) == _read_context->phase();
+}
+
+} // namespace cache
+
+inline streamed_mutation make_cache_streamed_mutation(schema_ptr s,
+                                                      dht::decorated_key dk,
+                                                      query::clustering_key_filter_ranges crr,
+                                                      row_cache& cache,
+                                                      lw_shared_ptr<cache::read_context> ctx,
+                                                      lw_shared_ptr<partition_snapshot> snp)
+{
+    return make_streamed_mutation<cache::cache_streamed_mutation>(
+        std::move(s), std::move(dk), std::move(crr), std::move(ctx), std::move(snp), cache);
+}
--- a/caching_options.hh
+++ b/caching_options.hh
@@ -24,6 +24,7 @@
 #include <boost/lexical_cast.hpp>
 #include "exceptions/exceptions.hh"
 #include "json.hh"
+#include "seastarx.hh"

 class schema;

@@ -58,30 +59,34 @@ class caching_options {
    caching_options() : _key_cache(default_key), _row_cache(default_row) {}
 public:

-    sstring to_sstring() const {
-        return json::to_json(std::map<sstring, sstring>({{ "keys", _key_cache }, { "rows_per_partition", _row_cache }}));
+    std::map<sstring, sstring> to_map() const {
+        return {{ "keys", _key_cache }, { "rows_per_partition", _row_cache }};
    }

-    static caching_options from_sstring(const sstring& str) {
-        auto map = json::to_map(str);
-        if (map.size() > 2) {
-            throw exceptions::configuration_exception("Invalid map: " + str); 
-        }
-        sstring k;
-        sstring r;
-        if (map.count("keys")) {
-            k = map.at("keys");
-        } else {
-            k = default_key;
-        }
+    sstring to_sstring() const {
+        return json::to_json(to_map());
+    }

-        if (map.count("rows_per_partition")) {
-            r = map.at("rows_per_partition");
-        } else {
-            r = default_row;
+    template<typename Map>
+    static caching_options from_map(const Map & map) {
+        sstring k = default_key;
+        sstring r = default_row;
+
+        for (auto& p : map) {
+            if (p.first == "keys") {
+                k = p.second;
+            } else if (p.first == "rows_per_partition") {
+                r = p.second;
+            } else {
+                throw exceptions::configuration_exception("Invalid caching option: " + p.first);
+            }
        }
        return caching_options(k, r);
    }
+    static caching_options from_sstring(const sstring& str) {
+        return from_map(json::to_map(str));
+    }
+
    bool operator==(const caching_options& other) const {
        return _key_cache == other._key_cache && _row_cache == other._row_cache;
    }
--- a/canonical_mutation.cc
+++ b/canonical_mutation.cc
@@ -22,6 +22,7 @@
 #include "canonical_mutation.hh"
 #include "mutation.hh"
 #include "mutation_partition_serializer.hh"
+#include "counters.hh"
 #include "converting_mutation_partition_applier.hh"
 #include "hashing_partition_visitor.hh"
 #include "utils/UUID.hh"
@@ -44,7 +45,7 @@ canonical_mutation::canonical_mutation(const mutation& m)
    mutation_partition_serializer part_ser(*m.schema(), m.partition());

    bytes_ostream out;
-    ser::writer_of_canonical_mutation wr(out);
+    ser::writer_of_canonical_mutation<bytes_ostream> wr(out);
    std::move(wr).write_table_id(m.schema()->id())
                 .write_schema_version(m.schema()->version())
                 .write_key(m.key())
--- a/cell_locking.hh
+++ b/cell_locking.hh
@@ -0,0 +1,566 @@
+/*
+ * Copyright (C) 2017 ScyllaDB
+ */
+
+/*
+ * This file is part of Scylla.
+ *
+ * Scylla is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Scylla is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <boost/intrusive/unordered_set.hpp>
+
+#if __has_include(<boost/container/small_vector.hpp>)
+
+#include <boost/container/small_vector.hpp>
+
+template <typename T, size_t N>
+using small_vector = boost::container::small_vector<T, N>;
+
+#else
+
+#include <vector>
+template <typename T, size_t N>
+using small_vector = std::vector<T>;
+
+#endif
+
+#include "fnv1a_hasher.hh"
+#include "streamed_mutation.hh"
+#include "mutation_partition.hh"
+
+class cells_range {
+    using ids_vector_type = small_vector<column_id, 5>;
+
+    position_in_partition_view _position;
+    ids_vector_type _ids;
+public:
+    using iterator = ids_vector_type::iterator;
+    using const_iterator = ids_vector_type::const_iterator;
+
+    cells_range()
+        : _position(position_in_partition_view(position_in_partition_view::static_row_tag_t())) { }
+
+    explicit cells_range(position_in_partition_view pos, const row& cells)
+        : _position(pos)
+    {
+        _ids.reserve(cells.size());
+        cells.for_each_cell([this] (auto id, auto&&) {
+            _ids.emplace_back(id);
+        });
+    }
+
+    position_in_partition_view position() const { return _position; }
+    bool empty() const { return _ids.empty(); }
+
+    auto begin() const { return _ids.begin(); }
+    auto end() const { return _ids.end(); }
+};
+
+class partition_cells_range {
+    const mutation_partition& _mp;
+public:
+    class iterator {
+        const mutation_partition& _mp;
+        stdx::optional<mutation_partition::rows_type::const_iterator> _position;
+        cells_range _current;
+    public:
+        explicit iterator(const mutation_partition& mp)
+            : _mp(mp)
+            , _current(position_in_partition_view(position_in_partition_view::static_row_tag_t()), mp.static_row())
+        { }
+
+        iterator(const mutation_partition& mp, mutation_partition::rows_type::const_iterator it)
+            : _mp(mp)
+            , _position(it)
+        { }
+
+        iterator& operator++() {
+            if (!_position) {
+                _position = _mp.clustered_rows().begin();
+            } else {
+                ++(*_position);
+            }
+            if (_position != _mp.clustered_rows().end()) {
+                auto it = *_position;
+                _current = cells_range(position_in_partition_view(position_in_partition_view::clustering_row_tag_t(), it->key()),
+                        it->row().cells());
+            }
+            return *this;
+        }
+
+        iterator operator++(int) {
+            iterator it(*this);
+            operator++();
+            return it;
+        }
+
+        cells_range& operator*() {
+            return _current;
+        }
+
+        cells_range* operator->() {
+            return &_current;
+        }
+
+        bool operator==(const iterator& other) const {
+            return _position == other._position;
+        }
+        bool operator!=(const iterator& other) const {
+            return !(*this == other);
+        }
+    };
+public:
+    explicit partition_cells_range(const mutation_partition& mp) : _mp(mp) { }
+
+    iterator begin() const {
+        return iterator(_mp);
+    }
+    iterator end() const {
+        return iterator(_mp, _mp.clustered_rows().end());
+    }
+};
+
+class locked_cell;
+
+struct cell_locker_stats {
+    uint64_t lock_acquisitions = 0;
+    uint64_t operations_waiting_for_lock = 0;
+};
+
+class cell_locker {
+public:
+    using timeout_clock = lowres_clock;
+private:
+    using semaphore_type = basic_semaphore<default_timeout_exception_factory, timeout_clock>;
+
+    class partition_entry;
+
+    struct cell_address {
+        position_in_partition position;
+        column_id id;
+    };
+
+    class cell_entry : public bi::unordered_set_base_hook<bi::link_mode<bi::auto_unlink>>,
+                       public enable_lw_shared_from_this<cell_entry> {
+        partition_entry& _parent;
+        cell_address _address;
+        semaphore_type _semaphore { 0 };
+
+        friend class cell_locker;
+    public:
+        cell_entry(partition_entry& parent, position_in_partition position, column_id id)
+            : _parent(parent)
+            , _address { std::move(position), id }
+        { }
+
+        // Upgrades cell_entry to another schema.
+        // Changes the value of cell_address, so cell_entry has to be
+        // temporarily removed from its parent partition_entry.
+        // Returns true if the cell_entry still exist in the new schema and
+        // should be reinserted.
+        bool upgrade(const schema& from, const schema& to, column_kind kind) noexcept {
+            auto& old_column_mapping = from.get_column_mapping();
+            auto& column = old_column_mapping.column_at(kind, _address.id);
+            auto cdef = to.get_column_definition(column.name());
+            if (!cdef) {
+                return false;
+            }
+            _address.id = cdef->id;
+            return true;
+        }
+
+        const position_in_partition& position() const {
+            return _address.position;
+        }
+
+        future<> lock(timeout_clock::time_point _timeout) {
+            return _semaphore.wait(_timeout);
+        }
+        void unlock() {
+            _semaphore.signal();
+        }
+
+        ~cell_entry() {
+            if (!is_linked()) {
+                return;
+            }
+            unlink();
+            if (!--_parent._cell_count) {
+                delete &_parent;
+            }
+        }
+
+        class hasher {
+            const schema* _schema; // pointer instead of reference for default assignment
+        public:
+            explicit hasher(const schema& s) : _schema(&s) { }
+
+            size_t operator()(const cell_address& ca) const {
+                fnv1a_hasher hasher;
+                ca.position.feed_hash(hasher, *_schema);
+                ::feed_hash(hasher, ca.id);
+                return hasher.finalize();
+            }
+            size_t operator()(const cell_entry& ce) const {
+                return operator()(ce._address);
+            }
+        };
+
+        class equal_compare {
+            position_in_partition::equal_compare _cmp;
+        private:
+            bool do_compare(const cell_address& a, const cell_address& b) const {
+                return a.id == b.id && _cmp(a.position, b.position);
+            }
+        public:
+            explicit equal_compare(const schema& s) : _cmp(s) { }
+            bool operator()(const cell_address& ca, const cell_entry& ce) const {
+                return do_compare(ca, ce._address);
+            }
+            bool operator()(const cell_entry& ce, const cell_address& ca) const {
+                return do_compare(ca, ce._address);
+            }
+            bool operator()(const cell_entry& a, const cell_entry& b) const {
+                return do_compare(a._address, b._address);
+            }
+        };
+    };
+
+    class partition_entry : public bi::unordered_set_base_hook<bi::link_mode<bi::auto_unlink>> {
+        using cells_type = bi::unordered_set<cell_entry,
+                                             bi::equal<cell_entry::equal_compare>,
+                                             bi::hash<cell_entry::hasher>,
+                                             bi::constant_time_size<false>>;
+
+        static constexpr size_t initial_bucket_count = 16;
+        using max_load_factor = std::ratio<3, 4>;
+        dht::decorated_key _key;
+        cell_locker& _parent;
+        size_t _rehash_at_size = compute_rehash_at_size(initial_bucket_count);
+        std::unique_ptr<cells_type::bucket_type[]> _buckets; // TODO: start with internal storage?
+        size_t _cell_count = 0; // cells_type::empty() is not O(1) if the hook is auto-unlink
+        cells_type::bucket_type _internal_buckets[initial_bucket_count];
+        cells_type _cells;
+        schema_ptr _schema;
+
+        friend class cell_entry;
+    private:
+        static constexpr size_t compute_rehash_at_size(size_t bucket_count) {
+            return bucket_count * max_load_factor::num / max_load_factor::den;
+        }
+        void maybe_rehash() {
+            if (_cell_count >= _rehash_at_size) {
+                auto new_bucket_count = std::min(_cells.bucket_count() * 2, _cells.bucket_count() + 1024);
+                auto buckets = std::make_unique<cells_type::bucket_type[]>(new_bucket_count);
+
+                _cells.rehash(cells_type::bucket_traits(buckets.get(), new_bucket_count));
+                _buckets = std::move(buckets);
+
+                _rehash_at_size = compute_rehash_at_size(new_bucket_count);
+            }
+        }
+    public:
+        partition_entry(schema_ptr s, cell_locker& parent, const dht::decorated_key& dk)
+            : _key(dk)
+            , _parent(parent)
+            , _cells(cells_type::bucket_traits(_internal_buckets, initial_bucket_count),
+                     cell_entry::hasher(*s), cell_entry::equal_compare(*s))
+            , _schema(s)
+        { }
+
+        ~partition_entry() {
+            if (is_linked()) {
+                _parent._partition_count--;
+            }
+        }
+
+        // Upgrades partition entry to new schema. Returns false if all
+        // cell_entries has been removed during the upgrade.
+        bool upgrade(schema_ptr new_schema);
+
+        void insert(lw_shared_ptr<cell_entry> cell) {
+            _cells.insert(*cell);
+            _cell_count++;
+            maybe_rehash();
+        }
+
+        cells_type& cells() {
+            return _cells;
+        }
+
+        struct hasher {
+            size_t operator()(const dht::decorated_key& dk) const {
+                return std::hash<dht::decorated_key>()(dk);
+            }
+            size_t operator()(const partition_entry& pe) const {
+                return operator()(pe._key);
+            }
+        };
+
+        class equal_compare {
+            dht::decorated_key_equals_comparator _cmp;
+        public:
+            explicit equal_compare(const schema& s) : _cmp(s) { }
+            bool operator()(const dht::decorated_key& dk, const partition_entry& pe) {
+                return _cmp(dk, pe._key);
+            }
+            bool operator()(const partition_entry& pe, const dht::decorated_key& dk) {
+                return _cmp(dk, pe._key);
+            }
+            bool operator()(const partition_entry& a, const partition_entry& b) {
+                return _cmp(a._key, b._key);
+            }
+        };
+    };
+
+    using partitions_type = bi::unordered_set<partition_entry,
+                                              bi::equal<partition_entry::equal_compare>,
+                                              bi::hash<partition_entry::hasher>,
+                                              bi::constant_time_size<false>>;
+
+    static constexpr size_t initial_bucket_count = 4 * 1024;
+    using max_load_factor = std::ratio<3, 4>;
+
+    std::unique_ptr<partitions_type::bucket_type[]> _buckets;
+    partitions_type _partitions;
+    size_t _partition_count = 0;
+    size_t _rehash_at_size = compute_rehash_at_size(initial_bucket_count);
+    schema_ptr _schema;
+
+    // partitions_type uses equality comparator which keeps a reference to the
+    // original schema, we must ensure that it doesn't die.
+    schema_ptr _original_schema;
+    cell_locker_stats& _stats;
+
+    friend class locked_cell;
+private:
+    struct locker;
+
+    static constexpr size_t compute_rehash_at_size(size_t bucket_count) {
+        return bucket_count * max_load_factor::num / max_load_factor::den;
+    }
+    void maybe_rehash() {
+        if (_partition_count >= _rehash_at_size) {
+            auto new_bucket_count = std::min(_partitions.bucket_count() * 2, _partitions.bucket_count() + 64 * 1024);
+            auto buckets = std::make_unique<partitions_type::bucket_type[]>(new_bucket_count);
+
+            _partitions.rehash(partitions_type::bucket_traits(buckets.get(), new_bucket_count));
+            _buckets = std::move(buckets);
+
+            _rehash_at_size = compute_rehash_at_size(new_bucket_count);
+        }
+    }
+public:
+    explicit cell_locker(schema_ptr s, cell_locker_stats& stats)
+        : _buckets(std::make_unique<partitions_type::bucket_type[]>(initial_bucket_count))
+        , _partitions(partitions_type::bucket_traits(_buckets.get(), initial_bucket_count),
+                      partition_entry::hasher(), partition_entry::equal_compare(*s))
+        , _schema(s)
+        , _original_schema(std::move(s))
+        , _stats(stats)
+    { }
+
+    ~cell_locker() {
+        assert(_partitions.empty());
+    }
+
+    void set_schema(schema_ptr s) {
+        _schema = s;
+    }
+    schema_ptr schema() const {
+        return _schema;
+    }
+
+    // partition_cells_range is required to be in cell_locker::schema()
+    future<std::vector<locked_cell>> lock_cells(const dht::decorated_key& dk, partition_cells_range&& range,
+                                                timeout_clock::time_point timeout);
+};
+
+
+class locked_cell {
+    lw_shared_ptr<cell_locker::cell_entry> _entry;
+public:
+    explicit locked_cell(lw_shared_ptr<cell_locker::cell_entry> entry)
+        : _entry(std::move(entry)) { }
+
+    locked_cell(const locked_cell&) = delete;
+    locked_cell(locked_cell&&) = default;
+
+    ~locked_cell() {
+        if (_entry) {
+            _entry->unlock();
+        }
+    }
+};
+
+struct cell_locker::locker {
+    cell_entry::hasher _hasher;
+    cell_entry::equal_compare _eq_cmp;
+    partition_entry& _partition_entry;
+
+    partition_cells_range _range;
+    partition_cells_range::iterator _current_ck;
+    cells_range::const_iterator _current_cell;
+
+    timeout_clock::time_point _timeout;
+    std::vector<locked_cell> _locks;
+    cell_locker_stats& _stats;
+private:
+    void update_ck() {
+        if (!is_done()) {
+            _current_cell = _current_ck->begin();
+        }
+    }
+
+    future<> lock_next();
+
+    bool is_done() const { return _current_ck == _range.end(); }
+public:
+    explicit locker(const ::schema& s, cell_locker_stats& st, partition_entry& pe, partition_cells_range&& range, timeout_clock::time_point timeout)
+        : _hasher(s)
+        , _eq_cmp(s)
+        , _partition_entry(pe)
+        , _range(std::move(range))
+        , _current_ck(_range.begin())
+        , _timeout(timeout)
+        , _stats(st)
+    {
+        update_ck();
+    }
+
+    locker(const locker&) = delete;
+    locker(locker&&) = delete;
+
+    future<> lock_all() {
+        // Cannot defer before first call to lock_next().
+        return lock_next().then([this] {
+            return do_until([this] { return is_done(); }, [this] {
+                return lock_next();
+            });
+        });
+    }
+
+    std::vector<locked_cell> get() && { return std::move(_locks); }
+};
+
+inline
+future<std::vector<locked_cell>> cell_locker::lock_cells(const dht::decorated_key& dk, partition_cells_range&& range, timeout_clock::time_point timeout) {
+    partition_entry::hasher pe_hash;
+    partition_entry::equal_compare pe_eq(*_schema);
+
+    auto it = _partitions.find(dk, pe_hash, pe_eq);
+    std::unique_ptr<partition_entry> partition;
+    if (it == _partitions.end()) {
+        partition = std::make_unique<partition_entry>(_schema, *this, dk);
+    } else if (!it->upgrade(_schema)) {
+        partition = std::unique_ptr<partition_entry>(&*it);
+        _partition_count--;
+        _partitions.erase(it);
+    }
+
+    if (partition) {
+        std::vector<locked_cell> locks;
+        for (auto&& r : range) {
+            if (r.empty()) {
+                continue;
+            }
+            for (auto&& c : r) {
+                auto cell = make_lw_shared<cell_entry>(*partition, position_in_partition(r.position()), c);
+                _stats.lock_acquisitions++;
+                partition->insert(cell);
+                locks.emplace_back(std::move(cell));
+            }
+        }
+
+        if (!locks.empty()) {
+            _partitions.insert(*partition.release());
+            _partition_count++;
+            maybe_rehash();
+        }
+        return make_ready_future<std::vector<locked_cell>>(std::move(locks));
+    }
+
+    auto l = std::make_unique<locker>(*_schema, _stats, *it, std::move(range), timeout);
+    auto f = l->lock_all();
+    return f.then([l = std::move(l)] {
+        return std::move(*l).get();
+    });
+}
+
+inline
+future<> cell_locker::locker::lock_next() {
+    while (!is_done()) {
+        if (_current_cell == _current_ck->end()) {
+            ++_current_ck;
+            update_ck();
+            continue;
+        }
+
+        auto cid = *_current_cell++;
+
+        cell_address ca { position_in_partition(_current_ck->position()), cid };
+        auto it = _partition_entry.cells().find(ca, _hasher, _eq_cmp);
+        if (it != _partition_entry.cells().end()) {
+            _stats.operations_waiting_for_lock++;
+            return it->lock(_timeout).then([this, ce = it->shared_from_this()] () mutable {
+                _stats.operations_waiting_for_lock--;
+                _stats.lock_acquisitions++;
+                _locks.emplace_back(std::move(ce));
+            });
+        }
+
+        auto cell = make_lw_shared<cell_entry>(_partition_entry, position_in_partition(_current_ck->position()), cid);
+        _stats.lock_acquisitions++;
+        _partition_entry.insert(cell);
+        _locks.emplace_back(std::move(cell));
+    }
+    return make_ready_future<>();
+}
+
+inline
+bool cell_locker::partition_entry::upgrade(schema_ptr new_schema) {
+    if (_schema == new_schema) {
+        return true;
+    }
+
+    auto buckets = std::make_unique<cells_type::bucket_type[]>(_cells.bucket_count());
+    auto cells = cells_type(cells_type::bucket_traits(buckets.get(), _cells.bucket_count()),
+                            cell_entry::hasher(*new_schema), cell_entry::equal_compare(*new_schema));
+
+    _cells.clear_and_dispose([&] (cell_entry* cell_ptr) noexcept {
+        auto& cell = *cell_ptr;
+        auto kind = cell.position().is_static_row() ? column_kind::static_column
+                                                    : column_kind::regular_column;
+        auto reinsert = cell.upgrade(*_schema, *new_schema, kind);
+        if (reinsert) {
+            cells.insert(cell);
+        } else {
+            _cell_count--;
+        }
+    });
+
+    // bi::unordered_set move assignment is actually a swap.
+    // Original _buckets cannot be destroyed before the container using them is
+    // so we need to explicitly make sure that the original _cells is no more.
+    _cells = std::move(cells);
+    auto destroy = [] (auto) { };
+    destroy(std::move(cells));
+
+    _buckets = std::move(buckets);
+    _schema = new_schema;
+    return _cell_count;
+}
--- a/checked-file-impl.hh
+++ b/checked-file-impl.hh
@@ -27,125 +27,136 @@
 class checked_file_impl : public file_impl {
 public:

-    checked_file_impl(disk_error_signal_type& s, file f)
-            : _signal(s) , _file(f) {
+    checked_file_impl(const io_error_handler& error_handler, file f)
+            : _error_handler(error_handler), _file(f) {
        _memory_dma_alignment = f.memory_dma_alignment();
        _disk_read_dma_alignment = f.disk_read_dma_alignment();
        _disk_write_dma_alignment = f.disk_write_dma_alignment();
    }

    virtual future<size_t> write_dma(uint64_t pos, const void* buffer, size_t len, const io_priority_class& pc) override {
-        return do_io_check(_signal, [&] {
+        return do_io_check(_error_handler, [&] {
            return get_file_impl(_file)->write_dma(pos, buffer, len, pc);
        });
    }

    virtual future<size_t> write_dma(uint64_t pos, std::vector<iovec> iov, const io_priority_class& pc) override {
-        return do_io_check(_signal, [&] {
+        return do_io_check(_error_handler, [&] {
            return get_file_impl(_file)->write_dma(pos, iov, pc);
        });
    }

    virtual future<size_t> read_dma(uint64_t pos, void* buffer, size_t len, const io_priority_class& pc) override {
-        return do_io_check(_signal, [&] {
+        return do_io_check(_error_handler, [&] {
            return get_file_impl(_file)->read_dma(pos, buffer, len, pc);
        });
    }

    virtual future<size_t> read_dma(uint64_t pos, std::vector<iovec> iov, const io_priority_class& pc) override {
-        return do_io_check(_signal, [&] {
+        return do_io_check(_error_handler, [&] {
            return get_file_impl(_file)->read_dma(pos, iov, pc);
        });
    }

    virtual future<> flush(void) override {
-        return do_io_check(_signal, [&] {
+        return do_io_check(_error_handler, [&] {
            return get_file_impl(_file)->flush();
        });
    }

    virtual future<struct stat> stat(void) override {
-        return do_io_check(_signal, [&] {
+        return do_io_check(_error_handler, [&] {
            return get_file_impl(_file)->stat();
        });
    }

    virtual future<> truncate(uint64_t length) override {
-        return do_io_check(_signal, [&] {
+        return do_io_check(_error_handler, [&] {
            return get_file_impl(_file)->truncate(length);
        });
    }

    virtual future<> discard(uint64_t offset, uint64_t length) override {
-        return do_io_check(_signal, [&] {
+        return do_io_check(_error_handler, [&] {
            return get_file_impl(_file)->discard(offset, length);
        });
    }

    virtual future<> allocate(uint64_t position, uint64_t length) override {
-        return do_io_check(_signal, [&] {
+        return do_io_check(_error_handler, [&] {
            return get_file_impl(_file)->allocate(position, length);
        });
    }

    virtual future<uint64_t> size(void) override {
-        return do_io_check(_signal, [&] {
+        return do_io_check(_error_handler, [&] {
            return get_file_impl(_file)->size();
        });
    }

    virtual future<> close() override {
-        return do_io_check(_signal, [&] {
+        return do_io_check(_error_handler, [&] {
            return get_file_impl(_file)->close();
        });
    }

+    // returns a handle for plain file, so make_checked_file() should be called
+    // on file returned by handle.
+    virtual std::unique_ptr<seastar::file_handle_impl> dup() override {
+        return get_file_impl(_file)->dup();
+    }
+
    virtual subscription<directory_entry> list_directory(std::function<future<> (directory_entry de)> next) override {
-        return do_io_check(_signal, [&] {
+        return do_io_check(_error_handler, [&] {
            return get_file_impl(_file)->list_directory(next);
        });
    }

+    virtual future<temporary_buffer<uint8_t>> dma_read_bulk(uint64_t offset, size_t range_size, const io_priority_class& pc) override {
+        return do_io_check(_error_handler, [&] {
+            return get_file_impl(_file)->dma_read_bulk(offset, range_size, pc);
+        });
+    }
 private:
-    disk_error_signal_type &_signal;
+    const io_error_handler& _error_handler;
    file _file;
 };

-inline file make_checked_file(disk_error_signal_type& signal, file& f)
+inline file make_checked_file(const io_error_handler& error_handler, file f)
 {
-    return file(::make_shared<checked_file_impl>(signal, f));
+    return file(::make_shared<checked_file_impl>(error_handler, f));
 }

 future<file>
-inline open_checked_file_dma(disk_error_signal_type& signal,
+inline open_checked_file_dma(const io_error_handler& error_handler,
                             sstring name, open_flags flags,
                             file_open_options options)
 {
-    return do_io_check(signal, [&] {
+    return do_io_check(error_handler, [&] {
        return open_file_dma(name, flags, options).then([&] (file f) {
-            return make_ready_future<file>(make_checked_file(signal, f));
+            return make_ready_future<file>(make_checked_file(error_handler, f));
        });
    });
 }

 future<file>
-inline open_checked_file_dma(disk_error_signal_type& signal,
+inline open_checked_file_dma(const io_error_handler& error_handler,
                             sstring name, open_flags flags)
 {
-    return do_io_check(signal, [&] {
+    return do_io_check(error_handler, [&] {
        return open_file_dma(name, flags).then([&] (file f) {
-            return make_ready_future<file>(make_checked_file(signal, f));
+            return make_ready_future<file>(make_checked_file(error_handler, f));
        });
    });
 }

 future<file>
-inline open_checked_directory(disk_error_signal_type& signal,
+inline open_checked_directory(const io_error_handler& error_handler,
                              sstring name)
 {
-    return do_io_check(signal, [&] {
+    return do_io_check(error_handler, [&] {
        return engine().open_directory(name).then([&] (file f) {
-            return make_ready_future<file>(make_checked_file(signal, f));
+            return make_ready_future<file>(make_checked_file(error_handler, f));
        });
    });
 }
--- a/clocks-impl.cc
+++ b/clocks-impl.cc
@@ -19,6 +19,6 @@
 * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
 */

-#include "gc_clock.hh"
+#include "clocks-impl.hh"

 std::atomic<int64_t> clocks_offset;
--- a/clocks-impl.hh
+++ b/clocks-impl.hh
@@ -0,0 +1,49 @@
+/*
+ * Copyright (C) 2017 ScyllaDB
+ */
+
+/*
+ * This file is part of Scylla.
+ *
+ * Scylla is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Scylla is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <algorithm>
+#include <atomic>
+#include <chrono>
+#include <cstdint>
+
+extern std::atomic<int64_t> clocks_offset;
+
+template<typename Duration>
+static inline void forward_jump_clocks(Duration delta)
+{
+    auto d = std::chrono::duration_cast<std::chrono::seconds>(delta).count();
+    clocks_offset.fetch_add(d, std::memory_order_relaxed);
+}
+
+static inline std::chrono::seconds get_clocks_offset()
+{
+    auto off = clocks_offset.load(std::memory_order_relaxed);
+    return std::chrono::seconds(off);
+}
+
+// Returns a time point which is earlier from t by d, or minimum time point if it cannot be represented.
+template<typename Clock, typename Duration, typename Rep, typename Period>
+inline
+auto saturating_subtract(std::chrono::time_point<Clock, Duration> t, std::chrono::duration<Rep, Period> d) -> decltype(t) {
+    return std::max(t, decltype(t)::min() + d) - d;
+}
--- a/clustering_bounds_comparator.hh
+++ b/clustering_bounds_comparator.hh
@@ -54,35 +54,62 @@ static inline bound_kind flip_bound_kind(bound_kind bk)
 }

 class bound_view {
-    const static thread_local clustering_key empty_prefix;
 public:
+    const static thread_local clustering_key empty_prefix;
    const clustering_key_prefix& prefix;
    bound_kind kind;
    bound_view(const clustering_key_prefix& prefix, bound_kind kind)
        : prefix(prefix)
        , kind(kind)
    { }
-    struct compare {
+    bound_view(const bound_view& other) noexcept = default;
+    bound_view& operator=(const bound_view& other) noexcept {
+        if (this != &other) {
+            this->~bound_view();
+            new (this) bound_view(other);
+        }
+        return *this;
+    }
+    struct tri_compare {
        // To make it assignable and to avoid taking a schema_ptr, we
        // wrap the schema reference.
        std::reference_wrapper<const schema> _s;
-        compare(const schema& s) : _s(s)
+        tri_compare(const schema& s) : _s(s)
        { }
-        bool operator()(const clustering_key_prefix& p1, int32_t w1, const clustering_key_prefix& p2, int32_t w2) const {
+        int operator()(const clustering_key_prefix& p1, int32_t w1, const clustering_key_prefix& p2, int32_t w2) const {
            auto type = _s.get().clustering_key_prefix_type();
            auto res = prefix_equality_tri_compare(type->types().begin(),
                type->begin(p1), type->end(p1),
                type->begin(p2), type->end(p2),
-                tri_compare);
+                ::tri_compare);
            if (res) {
-                return res < 0;
+                return res;
            }
            auto d1 = p1.size(_s);
            auto d2 = p2.size(_s);
            if (d1 == d2) {
-                return w1 < w2;
+                return w1 - w2;
            }
-            return d1 < d2 ? w1 <= 0 : w2 > 0;
+            return d1 < d2 ? w1 - (w1 <= 0) : -(w2 - (w2 <= 0));
+        }
+        int operator()(const bound_view b, const clustering_key_prefix& p) const {
+            return operator()(b.prefix, weight(b.kind), p, 0);
+        }
+        int operator()(const clustering_key_prefix& p, const bound_view b) const {
+            return operator()(p, 0, b.prefix, weight(b.kind));
+        }
+        int operator()(const bound_view b1, const bound_view b2) const {
+            return operator()(b1.prefix, weight(b1.kind), b2.prefix, weight(b2.kind));
+        }
+    };
+    struct compare {
+        // To make it assignable and to avoid taking a schema_ptr, we
+        // wrap the schema reference.
+        tri_compare _cmp;
+        compare(const schema& s) : _cmp(s)
+        { }
+        bool operator()(const clustering_key_prefix& p1, int32_t w1, const clustering_key_prefix& p2, int32_t w2) const {
+            return _cmp(p1, w1, p2, w2) < 0;
        }
        bool operator()(const bound_view b, const clustering_key_prefix& p) const {
            return operator()(b.prefix, weight(b.kind), p, 0);
@@ -106,20 +133,33 @@ public:
    static bound_view top() {
        return {empty_prefix, bound_kind::incl_end};
    }
-    /*
-    template<template<typename> typename T, typename U>
-    concept bool Range() {
-        return requires (T<U> range) {
-            { range.start() } -> stdx::optional<U>;
-            { range.end() } -> stdx::optional<U>;
-        };
-    };*/
-    template<template<typename> typename Range>
-    static std::pair<bound_view, bound_view> from_range(const Range<clustering_key_prefix>& range) {
-        return {
-            range.start() ? bound_view(range.start()->value(), range.start()->is_inclusive() ? bound_kind::incl_start : bound_kind::excl_start) : bottom(),
-            range.end() ? bound_view(range.end()->value(), range.end()->is_inclusive() ? bound_kind::incl_end : bound_kind::excl_end) : top(),
-        };
+    template<template<typename> typename R>
+    GCC6_CONCEPT( requires Range<R, clustering_key_prefix_view> )
+    static bound_view from_range_start(const R<clustering_key_prefix>& range) {
+        return range.start()
+               ? bound_view(range.start()->value(), range.start()->is_inclusive() ? bound_kind::incl_start : bound_kind::excl_start)
+               : bottom();
+    }
+    template<template<typename> typename R>
+    GCC6_CONCEPT( requires Range<R, clustering_key_prefix> )
+    static bound_view from_range_end(const R<clustering_key_prefix>& range) {
+        return range.end()
+               ? bound_view(range.end()->value(), range.end()->is_inclusive() ? bound_kind::incl_end : bound_kind::excl_end)
+               : top();
+    }
+    template<template<typename> typename R>
+    GCC6_CONCEPT( requires Range<R, clustering_key_prefix> )
+    static std::pair<bound_view, bound_view> from_range(const R<clustering_key_prefix>& range) {
+        return {from_range_start(range), from_range_end(range)};
+    }
+    template<template<typename> typename R>
+    GCC6_CONCEPT( requires Range<R, clustering_key_prefix_view> )
+    static stdx::optional<typename R<clustering_key_prefix_view>::bound> to_range_bound(const bound_view& bv) {
+        if (&bv.prefix == &empty_prefix) {
+            return {};
+        }
+        bool inclusive = bv.kind != bound_kind::excl_end && bv.kind != bound_kind::excl_start;
+        return {typename R<clustering_key_prefix_view>::bound(bv.prefix.view(), inclusive)};
    }
    friend std::ostream& operator<<(std::ostream& out, const bound_view& b) {
        return out << "{bound: prefix=" << b.prefix << ", kind=" << b.kind << "}";
--- a/clustering_key_filter.hh
+++ b/clustering_key_filter.hh
@@ -54,6 +54,7 @@ public:
    auto end() const { return _ref.end(); }
    bool empty() const { return _ref.empty(); }
    size_t size() const { return _ref.size(); }
+    const clustering_row_ranges& ranges() const { return _ref; }

    static clustering_key_filter_ranges get_ranges(const schema& schema, const query::partition_slice& slice, const partition_key& key) {
        const query::clustering_row_ranges& ranges = slice.row_ranges(schema, key);
--- a/clustering_ranges_walker.hh
+++ b/clustering_ranges_walker.hh
@@ -0,0 +1,219 @@
+/*
+ * Copyright (C) 2017 ScyllaDB
+ *
+ * Modified by ScyllaDB
+ */
+
+/*
+ * This file is part of Scylla.
+ *
+ * Scylla is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Scylla is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include "schema.hh"
+#include "query-request.hh"
+#include "streamed_mutation.hh"
+
+// Utility for in-order checking of overlap with position ranges.
+class clustering_ranges_walker {
+    const schema& _schema;
+    const query::clustering_row_ranges& _ranges;
+    query::clustering_row_ranges::const_iterator _current;
+    query::clustering_row_ranges::const_iterator _end;
+    bool _in_current; // next position is known to be >= _current_start
+    bool _with_static_row;
+    position_in_partition_view _current_start;
+    position_in_partition_view _current_end;
+    stdx::optional<position_in_partition> _trim;
+    size_t _change_counter = 1;
+private:
+    bool advance_to_next_range() {
+        _in_current = false;
+        if (!_current_start.is_static_row()) {
+            if (_current == _end) {
+                return false;
+            }
+            ++_current;
+        }
+        ++_change_counter;
+        if (_current == _end) {
+            _current_end = _current_start = position_in_partition_view::after_all_clustered_rows();
+            return false;
+        }
+        _current_start = position_in_partition_view::for_range_start(*_current);
+        _current_end = position_in_partition_view::for_range_end(*_current);
+        return true;
+    }
+public:
+    clustering_ranges_walker(const schema& s, const query::clustering_row_ranges& ranges, bool with_static_row = true)
+        : _schema(s)
+        , _ranges(ranges)
+        , _current(ranges.begin())
+        , _end(ranges.end())
+        , _in_current(with_static_row)
+        , _with_static_row(with_static_row)
+        , _current_start(position_in_partition_view::for_static_row())
+        , _current_end(position_in_partition_view::before_all_clustered_rows())
+    {
+        if (!with_static_row) {
+            if (_current == _end) {
+                _current_start = position_in_partition_view::before_all_clustered_rows();
+            } else {
+                _current_start = position_in_partition_view::for_range_start(*_current);
+                _current_end = position_in_partition_view::for_range_end(*_current);
+            }
+        }
+    }
+    clustering_ranges_walker(clustering_ranges_walker&& o) noexcept
+        : _schema(o._schema)
+        , _ranges(o._ranges)
+        , _current(o._current)
+        , _end(o._end)
+        , _in_current(o._in_current)
+        , _with_static_row(o._with_static_row)
+        , _current_start(o._current_start)
+        , _current_end(o._current_end)
+        , _trim(std::move(o._trim))
+        , _change_counter(o._change_counter)
+    { }
+    clustering_ranges_walker& operator=(clustering_ranges_walker&& o) {
+        if (this != &o) {
+            this->~clustering_ranges_walker();
+            new (this) clustering_ranges_walker(std::move(o));
+        }
+        return *this;
+    }
+
+    // Excludes positions smaller than pos from the ranges.
+    // pos should be monotonic.
+    // No constraints between pos and positions passed to advance_to().
+    //
+    // After the invocation, when !out_of_range(), lower_bound() returns the smallest position still contained.
+    void trim_front(position_in_partition pos) {
+        position_in_partition::less_compare less(_schema);
+
+        do {
+            if (!less(_current_start, pos)) {
+                break;
+            }
+            if (less(pos, _current_end)) {
+                _trim = std::move(pos);
+                _current_start = *_trim;
+                _in_current = false;
+                ++_change_counter;
+                break;
+            }
+        } while (advance_to_next_range());
+    }
+
+    // Returns true if given position is contained.
+    // Must be called with monotonic positions.
+    // Idempotent.
+    bool advance_to(position_in_partition_view pos) {
+        position_in_partition::less_compare less(_schema);
+
+        do {
+            if (!_in_current && less(pos, _current_start)) {
+                break;
+            }
+            // All subsequent clustering keys are larger than the start of this
+            // range so there is no need to check that again.
+            _in_current = true;
+
+            if (less(pos, _current_end)) {
+                return true;
+            }
+        } while (advance_to_next_range());
+
+        return false;
+    }
+
+    // Returns true if the range expressed by start and end (as in position_range) overlaps
+    // with clustering ranges.
+    // Must be called with monotonic start position. That position must also be greater than
+    // the last position passed to the other advance_to() overload.
+    // Idempotent.
+    bool advance_to(position_in_partition_view start, position_in_partition_view end) {
+        position_in_partition::less_compare less(_schema);
+
+        do {
+            if (!less(_current_start, end)) {
+                break;
+            }
+            if (less(start, _current_end)) {
+                return true;
+            }
+        } while (advance_to_next_range());
+
+        return false;
+    }
+
+    // Returns true if the range tombstone expressed by start and end (as in position_range) overlaps
+    // with clustering ranges.
+    // No monotonicity restrictions on argument values across calls.
+    // Does not affect lower_bound().
+    // Idempotent.
+    bool contains_tombstone(position_in_partition_view start, position_in_partition_view end) const {
+        position_in_partition::less_compare less(_schema);
+
+        if (_trim && less(end, *_trim)) {
+            return false;
+        }
+
+        auto i = _current;
+        while (i != _end) {
+            auto range_start = position_in_partition_view::for_range_start(*i);
+            if (less(end, range_start)) {
+                return false;
+            }
+            auto range_end = position_in_partition_view::for_range_end(*i);
+            if (less(start, range_end)) {
+                return true;
+            }
+            ++i;
+        }
+
+        return false;
+    }
+
+    // Returns true if advanced past all contained positions. Any later advance_to() until reset() will return false.
+    bool out_of_range() const {
+        return !_in_current && _current == _end;
+    }
+
+    // Resets the state of the walker so that advance_to() can be now called for new sequence of positions.
+    // Any range trimmings still hold after this.
+    void reset() {
+        auto trim = std::move(_trim);
+        auto ctr = _change_counter;
+        *this = clustering_ranges_walker(_schema, _ranges, _with_static_row);
+        _change_counter = ctr + 1;
+        if (trim) {
+            trim_front(std::move(*trim));
+        }
+    }
+
+    // Can be called only when !out_of_range()
+    position_in_partition_view lower_bound() const {
+        return _current_start;
+    }
+
+    // When lower_bound() changes, this also does
+    // Always > 0.
+    size_t lower_bound_change_counter() const {
+        return _change_counter;
+    }
+};
--- a/compaction_strategy.hh
+++ b/compaction_strategy.hh
@@ -39,6 +39,7 @@ class compaction_strategy_impl;
 class sstable;
 class sstable_set;
 struct compaction_descriptor;
+struct resharding_descriptor;

 class compaction_strategy {
    ::shared_ptr<compaction_strategy_impl> _compaction_strategy_impl;
@@ -54,6 +55,12 @@ public:
    // Return a list of sstables to be compacted after applying the strategy.
    compaction_descriptor get_sstables_for_compaction(column_family& cfs, std::vector<lw_shared_ptr<sstable>> candidates);

+    std::vector<resharding_descriptor> get_resharding_jobs(column_family& cf, std::vector<lw_shared_ptr<sstable>> candidates);
+
+    // Some strategies may look at the compacted and resulting sstables to
+    // get some useful information for subsequent compactions.
+    void notify_completion(const std::vector<lw_shared_ptr<sstable>>& removed, const std::vector<lw_shared_ptr<sstable>>& added);
+
    // Return if parallel compaction is allowed by strategy.
    bool parallel_compaction() const;

--- a/compatible_ring_position.hh
+++ b/compatible_ring_position.hh
@@ -39,6 +39,9 @@ public:
    compatible_ring_position(const schema& s, dht::ring_position&& rp)
            : _schema(&s), _rp(std::move(rp)) {
    }
+    const dht::token& token() const {
+        return _rp->token();
+    }
    friend int tri_compare(const compatible_ring_position& x, const compatible_ring_position& y) {
        return x._rp->tri_compare(*x._schema, *y._rp);
    }
--- a/compound.hh
+++ b/compound.hh
@@ -22,7 +22,7 @@
 #pragma once

 #include "types.hh"
-#include <iostream>
+#include <iosfwd>
 #include <algorithm>
 #include <vector>
 #include <boost/range/iterator_range.hpp>
@@ -130,10 +130,10 @@ public:
    bytes decompose_value(const value_type& values) {
        return serialize_value(values);
    }
-    class iterator : public std::iterator<std::input_iterator_tag, bytes_view> {
+    class iterator : public std::iterator<std::input_iterator_tag, const bytes_view> {
    private:
        bytes_view _v;
-        value_type _current;
+        bytes_view _current;
    private:
        void read_current() {
            size_type len;
@@ -220,6 +220,9 @@ public:
        assert(AllowPrefixes == allow_prefixes::yes);
        return std::distance(begin(v), end(v)) == (ssize_t)_types.size();
    }
+    bool is_empty(bytes_view v) const {
+        return begin(v) == end(v);
+    }
    void validate(bytes_view v) {
        // FIXME: implement
        warn(unimplemented::cause::VALIDATION);
--- a/compound_compat.hh
+++ b/compound_compat.hh
@@ -184,6 +184,8 @@ bytes to_legacy(CompoundType& type, bytes_view packed) {
    return legacy_form;
 }

+class composite_view;
+
 // Represents a value serialized according to Origin's CompositeType.
 // If is_compound is true, then the value is one or more components encoded as:
 //
@@ -202,7 +204,7 @@ public:
            , _is_compound(is_compound)
    { }

-    composite(bytes&& b)
+    explicit composite(bytes&& b)
            : _bytes(std::move(b))
            , _is_compound(true)
    { }
@@ -239,7 +241,7 @@ public:
    using component_view = std::pair<bytes_view, eoc>;
 private:
    template<typename Value, typename = std::enable_if_t<!std::is_same<const data_value, std::decay_t<Value>>::value>>
-    static size_t size(Value& val) {
+    static size_t size(const Value& val) {
        return val.size();
    }
    static size_t size(const data_value& val) {
@@ -304,23 +306,36 @@ public:
        return f(const_cast<bytes&>(_bytes));
    }

+    // marker is ignored if !is_compound
    template<typename RangeOfSerializedComponents>
-    static bytes serialize_value(RangeOfSerializedComponents&& values, bool is_compound = true) {
+    static composite serialize_value(RangeOfSerializedComponents&& values, bool is_compound = true, eoc marker = eoc::none) {
        auto size = serialized_size(values, is_compound);
        bytes b(bytes::initialized_later(), size);
        auto i = b.begin();
        serialize_value(std::forward<decltype(values)>(values), i, is_compound);
-        return b;
+        if (is_compound && !b.empty()) {
+            b.back() = eoc_type(marker);
+        }
+        return composite(std::move(b), is_compound);
+    }
+
+    template<typename RangeOfSerializedComponents>
+    static composite serialize_static(const schema& s, RangeOfSerializedComponents&& values) {
+        // FIXME: Optimize
+        auto b = bytes(size_t(2), bytes::value_type(0xff));
+        std::vector<bytes_view> sv(s.clustering_key_size());
+        b += composite::serialize_value(boost::range::join(sv, std::forward<RangeOfSerializedComponents>(values)), true).release_bytes();
+        return composite(std::move(b));
+    }
+
+    static eoc to_eoc(int8_t eoc_byte) {
+        return eoc_byte == 0 ? eoc::none : (eoc_byte < 0 ? eoc::start : eoc::end);
    }

    class iterator : public std::iterator<std::input_iterator_tag, const component_view> {
        bytes_view _v;
        component_view _current;
    private:
-        eoc to_eoc(int8_t eoc_byte) {
-            return eoc_byte == 0 ? eoc::none : (eoc_byte < 0 ? eoc::start : eoc::end);
-        }
-
        void read_current() {
            size_type len;
            {
@@ -406,6 +421,10 @@ public:
        return _bytes;
    }

+    bytes release_bytes() && {
+        return std::move(_bytes);
+    }
+
    size_t size() const {
        return _bytes.size();
    }
@@ -426,26 +445,20 @@ public:
        return _is_compound;
    }

-    // The following factory functions assume this composite is a compound value.
    template <typename ClusteringElement>
    static composite from_clustering_element(const schema& s, const ClusteringElement& ce) {
-        return serialize_value(ce.components(s));
+        return serialize_value(ce.components(s), s.is_compound());
    }

-    static composite from_exploded(const std::vector<bytes_view>& v, eoc marker = eoc::none) {
+    static composite from_exploded(const std::vector<bytes_view>& v, bool is_compound, eoc marker = eoc::none) {
        if (v.size() == 0) {
-            return bytes(size_t(1), bytes::value_type(marker));
+            return composite(bytes(size_t(1), bytes::value_type(marker)), is_compound);
        }
-        auto b = serialize_value(v);
-        b.back() = eoc_type(marker);
-        return composite(std::move(b));
+        return serialize_value(v, is_compound, marker);
    }

    static composite static_prefix(const schema& s) {
-        static bytes static_marker(size_t(2), bytes::value_type(0xff));
-
-        std::vector<bytes_view> sv(s.clustering_key_size());
-        return static_marker + serialize_value(sv);
+        return serialize_static(s, std::vector<bytes_view>());
    }

    explicit operator bytes_view() const {
@@ -456,6 +469,15 @@ public:
    friend inline std::ostream& operator<<(std::ostream& os, const std::pair<Component, eoc>& c) {
        return os << "{value=" << c.first << "; eoc=" << sprint("0x%02x", eoc_type(c.second) & 0xff) << "}";
    }
+
+    friend std::ostream& operator<<(std::ostream& os, const composite& v);
+
+    struct tri_compare {
+        const std::vector<data_type>& _types;
+        tri_compare(const std::vector<data_type>& types) : _types(types) {}
+        int operator()(const composite&, const composite&) const;
+        int operator()(composite_view, composite_view) const;
+    };
 };

 class composite_view final {
@@ -476,14 +498,15 @@ public:
            , _is_compound(true)
    { }

-    std::vector<bytes> explode() const {
+    std::vector<bytes_view> explode() const {
        if (!_is_compound) {
-            return { to_bytes(_bytes) };
+            return { _bytes };
        }

-        std::vector<bytes> ret;
+        std::vector<bytes_view> ret;
+        ret.reserve(8);
        for (auto it = begin(), e = end(); it != e; ) {
-            ret.push_back(to_bytes(it->first));
+            ret.push_back(it->first);
            auto marker = it->second;
            ++it;
            if (it != e && marker != composite::eoc::none) {
@@ -505,6 +528,15 @@ public:
        return { begin(), end() };
    }

+    composite::eoc last_eoc() const {
+        if (!_is_compound || _bytes.empty()) {
+            return composite::eoc::none;
+        }
+        bytes_view v(_bytes);
+        v.remove_prefix(v.size() - 1);
+        return composite::to_eoc(read_simple<composite::eoc_type>(v));
+    }
+
    auto values() const {
        return components() | boost::adaptors::transformed([](auto&& c) { return c.first; });
    }
@@ -527,4 +559,46 @@ public:

    bool operator==(const composite_view& k) const { return k._bytes == _bytes && k._is_compound == _is_compound; }
    bool operator!=(const composite_view& k) const { return !(k == *this); }
+
+    friend inline std::ostream& operator<<(std::ostream& os, composite_view v) {
+        return os << "{" << ::join(", ", v.components()) << ", compound=" << v._is_compound << ", static=" << v.is_static() << "}";
+    }
 };
+
+inline
+std::ostream& operator<<(std::ostream& os, const composite& v) {
+    return os << composite_view(v);
+}
+
+inline
+int composite::tri_compare::operator()(const composite& v1, const composite& v2) const {
+    return (*this)(composite_view(v1), composite_view(v2));
+}
+
+inline
+int composite::tri_compare::operator()(composite_view v1, composite_view v2) const {
+    // See org.apache.cassandra.db.composites.AbstractCType#compare
+    if (v1.empty()) {
+        return v2.empty() ? 0 : -1;
+    }
+    if (v2.empty()) {
+        return 1;
+    }
+    if (v1.is_static() != v2.is_static()) {
+        return v1.is_static() ? -1 : 1;
+    }
+    auto a_values = v1.components();
+    auto b_values = v2.components();
+    auto cmp = [&](const data_type& t, component_view c1, component_view c2) {
+        // First by value, then by EOC
+        auto r = t->compare(c1.first, c2.first);
+        if (r) {
+            return r;
+        }
+        return static_cast<int>(c1.second) - static_cast<int>(c2.second);
+    };
+    return lexicographical_tri_compare(_types.begin(), _types.end(),
+        a_values.begin(), a_values.end(),
+        b_values.begin(), b_values.end(),
+        cmp);
+}
--- a/compress.hh
+++ b/compress.hh
@@ -39,17 +39,17 @@ public:
    static constexpr auto CHUNK_LENGTH_KB = "chunk_length_kb";
    static constexpr auto CRC_CHECK_CHANCE = "crc_check_chance";
 private:
-    compressor _compressor = compressor::none;
+    compressor _compressor;
    std::experimental::optional<int> _chunk_length;
    std::experimental::optional<double> _crc_check_chance;
 public:
-    compression_parameters() = default;
-    compression_parameters(compressor c) : _compressor(c) { }
+    compression_parameters(compressor c = compressor::lz4) : _compressor(c) { }
    compression_parameters(const std::map<sstring, sstring>& options) {
        validate_options(options);

        auto it = options.find(SSTABLE_COMPRESSION);
        if (it == options.end() || it->second.empty()) {
+            _compressor = compressor::none;
            return;
        }
        const auto& compressor_class = it->second;
--- a/conf/scylla.yaml
+++ b/conf/scylla.yaml
@@ -89,6 +89,15 @@ listen_address: localhost
 # For security reasons, you should not expose this port to the internet.  Firewall it if needed.
 native_transport_port: 9042

+# Enabling native transport encryption in client_encryption_options allows you to either use
+# encryption for the standard port or to use a dedicated, additional port along with the unencrypted
+# standard native_transport_port.
+# Enabling client encryption and keeping native_transport_port_ssl disabled will use encryption
+# for native_transport_port. Setting native_transport_port_ssl to a different value
+# from native_transport_port will use encryption for native_transport_port_ssl while
+# keeping native_transport_port unencrypted.
+#native_transport_port_ssl: 9142
+
 # Throttles all outbound streaming file transfers on this node to the
 # given total throughput in Mbps. This is necessary because Scylla does
 # mostly sequential IO when streaming data during bootstrap or repair, which
@@ -192,6 +201,9 @@ api_address: 127.0.0.1
 # Caution should be taken on increasing the size of this threshold as it can lead to node instability.
 batch_size_warn_threshold_in_kb: 5

+# Fail any multiple-partition batch exceeding this value. 50kb (10x warn threshold) by default.
+batch_size_fail_threshold_in_kb: 50
+
 # Authentication backend, identifying users
 # Out of the box, Scylla provides org.apache.cassandra.auth.{AllowAllAuthenticator,
 # PasswordAuthenticator}.
@@ -217,6 +229,15 @@ batch_size_warn_threshold_in_kb: 5
 # that do not have vnodes enabled.
 # initial_token:

+# RPC address to broadcast to drivers and other Scylla nodes. This cannot
+# be set to 0.0.0.0. If left blank, this will be set to the value of
+# rpc_address. If rpc_address is set to 0.0.0.0, broadcast_rpc_address must
+# be set.
+# broadcast_rpc_address: 1.2.3.4
+
+# Uncomment to enable experimental features
+# experimental: true
+
 ###################################################
 ## Not currently supported, reserved for future use
 ###################################################
@@ -273,28 +294,6 @@ batch_size_warn_threshold_in_kb: 5
 #
 partitioner: org.apache.cassandra.dht.Murmur3Partitioner

-
-# policy for data disk failures:
-# die: shut down gossip and Thrift and kill the JVM for any fs errors or
-#      single-sstable errors, so the node can be replaced.
-# stop_paranoid: shut down gossip and Thrift even for single-sstable errors.
-# stop: shut down gossip and Thrift, leaving the node effectively dead, but
-#       can still be inspected via JMX.
-# best_effort: stop using the failed disk and respond to requests based on
-#              remaining available sstables.  This means you WILL see obsolete
-#              data at CL.ONE!
-# ignore: ignore fatal errors and let requests fail, as in pre-1.2 Scylla
-# disk_failure_policy: stop
-
-# policy for commit disk failures:
-# die: shut down gossip and Thrift and kill the JVM, so the node can be replaced.
-# stop: shut down gossip and Thrift, leaving the node effectively dead, but
-#       can still be inspected via JMX.
-# stop_commit: shutdown the commit log, letting writes collect but
-#              continuing to service reads, as in pre-2.0.5 Scylla
-# ignore: ignore fatal errors and let the batches fail
-# commit_failure_policy: stop
-
 # Maximum size of the key cache in memory.
 #
 # Each key cache hit saves 1 seek and each row cache hit saves 2 seeks at the
@@ -409,29 +408,6 @@ partitioner: org.apache.cassandra.dht.Murmur3Partitioner
 # the smaller of 1/4 of heap or 512MB.
 # file_cache_size_in_mb: 512

-# Total permitted memory to use for memtables. Scylla will stop 
-# accepting writes when the limit is exceeded until a flush completes,
-# and will trigger a flush based on memtable_cleanup_threshold
-# If omitted, Scylla will set both to 1/4 the size of the heap.
-# memtable_heap_space_in_mb: 2048
-# memtable_offheap_space_in_mb: 2048
-
-# Ratio of occupied non-flushing memtable size to total permitted size
-# that will trigger a flush of the largest memtable.  Lager mct will
-# mean larger flushes and hence less compaction, but also less concurrent
-# flush activity which can make it difficult to keep your disks fed
-# under heavy write load.
-#
-# memtable_cleanup_threshold defaults to 1 / (memtable_flush_writers + 1)
-# memtable_cleanup_threshold: 0.11
-
-# Specify the way Scylla allocates and manages memtable memory.
-# Options are:
-#   heap_buffers:    on heap nio buffers
-#   offheap_buffers: off heap (direct) nio buffers
-#   offheap_objects: native memory, eliminating nio buffer heap overhead
-# memtable_allocation_type: heap_buffers
-
 # Total space to use for commitlogs.
 #
 # If space gets above this value (it will round up to the next nearest
@@ -443,17 +419,6 @@ partitioner: org.apache.cassandra.dht.Murmur3Partitioner
 # available for Scylla.
 commitlog_total_space_in_mb: -1

-# This sets the amount of memtable flush writer threads.  These will
-# be blocked by disk io, and each one will hold a memtable in memory
-# while blocked. 
-#
-# memtable_flush_writers defaults to the smaller of (number of disks,
-# number of cores), with a minimum of 2 and a maximum of 8.
-# 
-# If your data directories are backed by SSD, you should increase this
-# to the number of cores.
-#memtable_flush_writers: 8
-
 # A fixed memory pool size in MB for for SSTable index summaries. If left
 # empty, this will default to 5% of the heap size. If the memory usage of
 # all index summaries exceeds this limit, SSTables with low read rates will
@@ -518,13 +483,6 @@ commitlog_total_space_in_mb: -1
 # Whether to start the thrift rpc server.
 # start_rpc: true

-
-# RPC address to broadcast to drivers and other Scylla nodes. This cannot
-# be set to 0.0.0.0. If left blank, this will be set to the value of
-# rpc_address. If rpc_address is set to 0.0.0.0, broadcast_rpc_address must
-# be set.
-# broadcast_rpc_address: 1.2.3.4
-
 # enable or disable keepalive on rpc/native connections
 # rpc_keepalive: true

@@ -762,22 +720,17 @@ commitlog_total_space_in_mb: -1
 #    certificate: conf/scylla.crt
 #    keyfile: conf/scylla.key
 #    truststore: <none, use system trust>
+#    require_client_auth: False
+#    priority_string: <none, use default>

 # enable or disable client/server encryption.
 # client_encryption_options:
 #    enabled: false
 #    certificate: conf/scylla.crt
 #    keyfile: conf/scylla.key
-
-    # require_client_auth: false
-    # Set trustore and truststore_password if require_client_auth is true
-    # truststore: conf/.truststore
-    # truststore_password: cassandra
-    # More advanced defaults below:
-    # protocol: TLS
-    # algorithm: SunX509
-    # store_type: JKS
-    # cipher_suites: [TLS_RSA_WITH_AES_128_CBC_SHA,TLS_RSA_WITH_AES_256_CBC_SHA,TLS_DHE_RSA_WITH_AES_128_CBC_SHA,TLS_DHE_RSA_WITH_AES_256_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA]
+#    truststore: <none, use system trust>
+#    require_client_auth: False
+#    priority_string: <none, use default>

 # internode_compression controls whether traffic between nodes is
 # compressed.
@@ -823,3 +776,23 @@ commitlog_total_space_in_mb: -1
 # By default, Scylla binds all interfaces to the prometheus API
 # It is possible to restrict the listening address to a specific one
 # prometheus_address: 0.0.0.0
+
+# Distribution of data among cores (shards) within a node
+#
+# Scylla distributes data within a node among shards, using a round-robin
+# strategy:
+#  [shard0] [shard1] ... [shardN-1] [shard0] [shard1] ... [shardN-1] ...
+#
+# Scylla versions 1.6 and below used just one repetition of the pattern;
+# this intefered with data placement among nodes (vnodes).
+#
+# Scylla versions 1.7 and above use 4096 repetitions of the pattern; this
+# provides for better data distribution.
+#
+# the value below is log (base 2) of the number of repetitions.
+#
+# Set to 0 to avoid rewriting all data when upgrading from Scylla 1.6 and
+# below.
+#
+# Keep at 12 for new clusters.
+murmur3_partitioner_ignore_msb_bits: 12
--- a/configure.py
+++ b/configure.py
@@ -34,7 +34,7 @@ for line in open('/etc/os-release'):
        os_ids += value.split(' ')

 # distribution "internationalization", converting package names.
-# Fedora name is key, values is distro -> package name dict. 
+# Fedora name is key, values is distro -> package name dict.
 i18n_xlat = {
    'boost-devel': {
        'debian': 'libboost-dev',
@@ -48,7 +48,7 @@ def pkgname(name):
        for id in os_ids:
            if id in dict:
                return dict[id]
-    return name 
+    return name

 def get_flags():
    with open('/proc/cpuinfo') as f:
@@ -93,7 +93,7 @@ def try_compile(compiler, source = '', flags = []):
 def warning_supported(warning, compiler):
    # gcc ignores -Wno-x even if it is not supported
    adjusted = re.sub('^-Wno-', '-W', warning)
-    return try_compile(flags = [adjusted], compiler = compiler)
+    return try_compile(flags = ['-Werror', adjusted], compiler = compiler)

 def debug_flag(compiler):
    src_with_auto = textwrap.dedent('''\
@@ -108,6 +108,11 @@ def debug_flag(compiler):
        print('Note: debug information disabled; upgrade your compiler')
        return ''

+def maybe_static(flag, libs):
+    if flag and not args.static:
+        libs = '-Wl,-Bstatic {} -Wl,-Bdynamic'.format(libs)
+    return libs
+
 class Thrift(object):
    def __init__(self, source, service):
        self.source = source
@@ -170,6 +175,8 @@ scylla_tests = [
    'tests/keys_test',
    'tests/partitioner_test',
    'tests/frozen_mutation_test',
+    'tests/serialized_action_test',
+    'tests/clustering_ranges_walker_test',
    'tests/perf/perf_mutation',
    'tests/lsa_async_eviction_test',
    'tests/lsa_sync_eviction_test',
@@ -178,18 +185,21 @@ scylla_tests = [
    'tests/perf/perf_hash',
    'tests/perf/perf_cql_parser',
    'tests/perf/perf_simple_query',
+    'tests/perf/perf_fast_forward',
+    'tests/cache_streamed_mutation_test',
+    'tests/row_cache_stress_test',
    'tests/memory_footprint',
    'tests/perf/perf_sstable',
    'tests/cql_query_test',
    'tests/storage_proxy_test',
    'tests/schema_change_test',
    'tests/mutation_reader_test',
-    'tests/key_reader_test',
    'tests/mutation_query_test',
    'tests/row_cache_test',
    'tests/test-serialization',
    'tests/sstable_test',
    'tests/sstable_mutation_test',
+    'tests/sstable_resharding_test',
    'tests/memtable_test',
    'tests/commitlog_test',
    'tests/cartesian_product_test',
@@ -211,6 +221,7 @@ scylla_tests = [
    'tests/murmur_hash_test',
    'tests/allocation_strategy_test',
    'tests/logalloc_test',
+    'tests/log_histogram_test',
    'tests/managed_vector_test',
    'tests/crc_test',
    'tests/flush_queue_test',
@@ -222,6 +233,12 @@ scylla_tests = [
    'tests/database_test',
    'tests/nonwrapping_range_test',
    'tests/input_stream_test',
+    'tests/sstable_atomic_deletion_test',
+    'tests/virtual_reader_test',
+    'tests/view_schema_test',
+    'tests/counter_test',
+    'tests/cell_locker_test',
+    'tests/loading_cache_test',
 ]

 apps = [
@@ -252,6 +269,8 @@ arg_parser.add_argument('--ldflags', action = 'store', dest = 'user_ldflags', de
                        help = 'Extra flags for the linker')
 arg_parser.add_argument('--compiler', action = 'store', dest = 'cxx', default = 'g++',
                        help = 'C++ compiler path')
+arg_parser.add_argument('--c-compiler', action='store', dest='cc', default='gcc',
+                        help='C compiler path')
 arg_parser.add_argument('--with-osv', action = 'store', dest = 'with_osv', default = '',
                        help = 'Shortcut for compile for OSv')
 arg_parser.add_argument('--enable-dpdk', action = 'store_true', dest = 'dpdk', default = False,
@@ -263,13 +282,19 @@ arg_parser.add_argument('--debuginfo', action = 'store', dest = 'debuginfo', typ
 arg_parser.add_argument('--static-stdc++', dest = 'staticcxx', action = 'store_true',
 			help = 'Link libgcc and libstdc++ statically')
 arg_parser.add_argument('--static-thrift', dest = 'staticthrift', action = 'store_true',
-			help = 'Link libthrift statically')
+            help = 'Link libthrift statically')
+arg_parser.add_argument('--static-boost', dest = 'staticboost', action = 'store_true',
+            help = 'Link boost statically')
 arg_parser.add_argument('--tests-debuginfo', action = 'store', dest = 'tests_debuginfo', type = int, default = 0,
                        help = 'Enable(1)/disable(0)compiler debug information generation for tests')
 arg_parser.add_argument('--python', action = 'store', dest = 'python', default = 'python3',
                        help = 'Python3 path')
 add_tristate(arg_parser, name = 'hwloc', dest = 'hwloc', help = 'hwloc support')
 add_tristate(arg_parser, name = 'xen', dest = 'xen', help = 'Xen support')
+arg_parser.add_argument('--enable-gcc6-concepts', dest='gcc6_concepts', action='store_true', default=False,
+                        help='enable experimental support for C++ Concepts as implemented in GCC 6')
+arg_parser.add_argument('--enable-alloc-failure-injector', dest='alloc_failure_injector', action='store_true', default=False,
+                        help='enable allocation failure injection')
 args = arg_parser.parse_args()

 defines = []
@@ -292,6 +317,7 @@ scylla_core = (['database.cc',
                 'memtable.cc',
                 'schema_mutations.cc',
                 'release.cc',
+                 'supervisor.cc',
                 'utils/logalloc.cc',
                 'utils/large_bitset.cc',
                 'mutation_partition.cc',
@@ -299,8 +325,8 @@ scylla_core = (['database.cc',
                 'mutation_partition_serializer.cc',
                 'mutation_reader.cc',
                 'mutation_query.cc',
-                 'key_reader.cc',
                 'keys.cc',
+                 'counters.cc',
                 'sstables/sstables.cc',
                 'sstables/compress.cc',
                 'sstables/row.cc',
@@ -309,6 +335,7 @@ scylla_core = (['database.cc',
                 'sstables/compaction.cc',
                 'sstables/compaction_strategy.cc',
                 'sstables/compaction_manager.cc',
+                 'sstables/atomic_deletion.cc',
                 'transport/event.cc',
                 'transport/event_notifier.cc',
                 'transport/server.cc',
@@ -328,10 +355,13 @@ scylla_core = (['database.cc',
                 'cql3/statements/authentication_statement.cc',
                 'cql3/statements/create_keyspace_statement.cc',
                 'cql3/statements/create_table_statement.cc',
+                 'cql3/statements/create_view_statement.cc',
                 'cql3/statements/create_type_statement.cc',
                 'cql3/statements/create_user_statement.cc',
+                 'cql3/statements/drop_index_statement.cc',
                 'cql3/statements/drop_keyspace_statement.cc',
                 'cql3/statements/drop_table_statement.cc',
+                 'cql3/statements/drop_view_statement.cc',
                 'cql3/statements/drop_type_statement.cc',
                 'cql3/statements/schema_altering_statement.cc',
                 'cql3/statements/ks_prop_defs.cc',
@@ -348,6 +378,7 @@ scylla_core = (['database.cc',
                 'cql3/statements/create_index_statement.cc',
                 'cql3/statements/truncate_statement.cc',
                 'cql3/statements/alter_table_statement.cc',
+                 'cql3/statements/alter_view_statement.cc',
                 'cql3/statements/alter_user_statement.cc',
                 'cql3/statements/drop_user_statement.cc',
                 'cql3/statements/list_users_statement.cc',
@@ -393,16 +424,22 @@ scylla_core = (['database.cc',
                 'cql3/selection/selector.cc',
                 'cql3/restrictions/statement_restrictions.cc',
                 'cql3/result_set.cc',
+                 'cql3/variable_specifications.cc',
                 'db/consistency_level.cc',
                 'db/system_keyspace.cc',
                 'db/schema_tables.cc',
+                 'db/cql_type_parser.cc',
+                 'db/legacy_schema_migrator.cc',
                 'db/commitlog/commitlog.cc',
                 'db/commitlog/commitlog_replayer.cc',
                 'db/commitlog/commitlog_entry.cc',
                 'db/config.cc',
+                 'db/heat_load_balance.cc',
                 'db/index/secondary_index.cc',
                 'db/marshal/type_parser.cc',
                 'db/batchlog_manager.cc',
+                 'db/view/view.cc',
+                 'index/secondary_index_manager.cc',
                 'io/io.cc',
                 'utils/utils.cc',
                 'utils/UUID_gen.cc',
@@ -423,6 +460,7 @@ scylla_core = (['database.cc',
                 'gms/gossip_digest_ack2.cc',
                 'gms/endpoint_state.cc',
                 'gms/application_state.cc',
+                 'gms/inet_address.cc',
                 'dht/i_partitioner.cc',
                 'dht/murmur3_partitioner.cc',
                 'dht/byte_ordered_partitioner.cc',
@@ -450,7 +488,7 @@ scylla_core = (['database.cc',
                 'service/client_state.cc',
                 'service/migration_task.cc',
                 'service/storage_service.cc',
-                 'service/load_broadcaster.cc',
+                 'service/misc_services.cc',
                 'service/pager/paging_state.cc',
                 'service/pager/query_pagers.cc',
                 'streaming/stream_task.cc',
@@ -466,12 +504,12 @@ scylla_core = (['database.cc',
                 'streaming/stream_manager.cc',
                 'streaming/stream_result_future.cc',
                 'streaming/stream_session_state.cc',
-                 'gc_clock.cc',
+                 'clocks-impl.cc',
                 'partition_slice_builder.cc',
                 'init.cc',
+                 'lister.cc',
                 'repair/repair.cc',
                 'exceptions/exceptions.cc',
-                 'dns.cc',
                 'auth/auth.cc',
                 'auth/authenticated_user.cc',
                 'auth/authenticator.cc',
@@ -485,7 +523,7 @@ scylla_core = (['database.cc',
                 'tracing/trace_state.cc',
                 'range_tombstone.cc',
                 'range_tombstone_list.cc',
-                 'db/size_estimates_recorder.cc'
+                 'disk-error-handler.cc'
                 ]
                + [Antlr3Grammar('cql3/Cql.g')]
                + [Thrift('interface/cassandra.thrift', 'Cassandra')]
@@ -546,6 +584,8 @@ idls = ['idl/gossip_digest.idl.hh',
        'idl/idl_test.idl.hh',
        'idl/commitlog.idl.hh',
        'idl/tracing.idl.hh',
+        'idl/consistency_level.idl.hh',
+        'idl/cache_temperature.idl.hh',
        ]

 scylla_tests_dependencies = scylla_core + api + idls + [
@@ -564,63 +604,80 @@ deps = {
    'scylla': idls + ['main.cc'] + scylla_core + api,
 }

-tests_not_using_seastar_test_framework = set([
-    'tests/keys_test',
+pure_boost_tests = set([
    'tests/partitioner_test',
    'tests/map_difference_test',
+    'tests/keys_test',
+    'tests/compound_test',
+    'tests/range_tombstone_list_test',
+    'tests/anchorless_list_test',
+    'tests/nonwrapping_range_test',
+    'tests/test-serialization',
+    'tests/range_test',
+    'tests/crc_test',
+    'tests/managed_vector_test',
+    'tests/dynamic_bitset_test',
+    'tests/idl_test',
+    'tests/cartesian_product_test',
+])
+
+tests_not_using_seastar_test_framework = set([
    'tests/perf/perf_mutation',
    'tests/lsa_async_eviction_test',
    'tests/lsa_sync_eviction_test',
    'tests/row_cache_alloc_stress',
    'tests/perf_row_cache_update',
-    'tests/cartesian_product_test',
    'tests/perf/perf_hash',
    'tests/perf/perf_cql_parser',
    'tests/message',
    'tests/perf/perf_simple_query',
+    'tests/perf/perf_fast_forward',
+    'tests/row_cache_stress_test',
    'tests/memory_footprint',
-    'tests/test-serialization',
    'tests/gossip',
-    'tests/compound_test',
-    'tests/range_test',
-    'tests/crc_test',
    'tests/perf/perf_sstable',
-    'tests/managed_vector_test',
-    'tests/dynamic_bitset_test',
-    'tests/idl_test',
-    'tests/range_tombstone_list_test',
-    'tests/anchorless_list_test',
-    'tests/nonwrapping_range_test',
-])
+]) | pure_boost_tests

 for t in tests_not_using_seastar_test_framework:
    if not t in scylla_tests:
        raise Exception("Test %s not found in scylla_tests" % (t))

 for t in scylla_tests:
-    deps[t] = scylla_tests_dependencies + [t + '.cc']
+    deps[t] = [t + '.cc']
    if t not in tests_not_using_seastar_test_framework:
+        deps[t] += scylla_tests_dependencies
        deps[t] += scylla_tests_seastar_deps
+    else:
+        deps[t] += scylla_core + api + idls + ['tests/cql_test_env.cc']

 deps['tests/sstable_test'] += ['tests/sstable_datafile_test.cc']

-deps['tests/bytes_ostream_test'] = ['tests/bytes_ostream_test.cc']
+deps['tests/bytes_ostream_test'] = ['tests/bytes_ostream_test.cc', 'utils/managed_bytes.cc', 'utils/logalloc.cc', 'utils/dynamic_bitset.cc']
 deps['tests/input_stream_test'] = ['tests/input_stream_test.cc']
-deps['tests/UUID_test'] = ['utils/UUID_gen.cc', 'tests/UUID_test.cc']
+deps['tests/UUID_test'] = ['utils/UUID_gen.cc', 'tests/UUID_test.cc', 'utils/uuid.cc', 'utils/managed_bytes.cc', 'utils/logalloc.cc', 'utils/dynamic_bitset.cc']
 deps['tests/murmur_hash_test'] = ['bytes.cc', 'utils/murmur_hash.cc', 'tests/murmur_hash_test.cc']
 deps['tests/allocation_strategy_test'] = ['tests/allocation_strategy_test.cc', 'utils/logalloc.cc', 'utils/dynamic_bitset.cc']
+deps['tests/log_histogram_test'] = ['tests/log_histogram_test.cc']
 deps['tests/anchorless_list_test'] = ['tests/anchorless_list_test.cc']

 warnings = [
    '-Wno-mismatched-tags',  # clang-only
    '-Wno-maybe-uninitialized', # false positives on gcc 5
+    '-Wno-tautological-compare',
+    '-Wno-parentheses-equality',
+    '-Wno-c++11-narrowing',
+    '-Wno-c++1z-extensions',
+    '-Wno-sometimes-uninitialized',
+    '-Wno-return-stack-address',
+    '-Wno-missing-braces',
+    '-Wno-unused-lambda-capture',
    ]

 warnings = [w
            for w in warnings
            if warning_supported(warning = w, compiler = args.cxx)]

-warnings = ' '.join(warnings)
+warnings = ' '.join(warnings + ['-Wno-error=deprecated-declarations'])

 dbgflag = debug_flag(args.cxx) if args.debuginfo else ''
 tests_link_rule = 'link' if args.tests_debuginfo else 'link_stripped'
@@ -674,6 +731,9 @@ if not try_compile(compiler=args.cxx, source='''\
    print('Installed boost version too old.  Please update {}.'.format(pkgname("boost-devel")))
    sys.exit(1)

+
+has_sanitize_address_use_after_scope = try_compile(compiler=args.cxx, flags=['-fsanitize-address-use-after-scope'], source='int f() {}')
+
 defines = ' '.join(['-D' + d for d in defines])

 globals().update(vars(args))
@@ -696,7 +756,7 @@ scylla_release = file.read().strip()

 extra_cxxflags["release.cc"] = "-DSCYLLA_VERSION=\"\\\"" + scylla_version + "\\\"\" -DSCYLLA_RELEASE=\"\\\"" + scylla_release + "\\\"\""

-seastar_flags = ['--disable-xen']
+seastar_flags = []
 if args.dpdk:
    # fake dependencies on dpdk, so that it is built before anything else
    seastar_flags += ['--enable-dpdk']
@@ -704,9 +764,15 @@ elif args.dpdk_target:
    seastar_flags += ['--dpdk-target', args.dpdk_target]
 if args.staticcxx:
    seastar_flags += ['--static-stdc++']
+if args.staticboost:
+    seastar_flags += ['--static-boost']
+if args.gcc6_concepts:
+    seastar_flags += ['--enable-gcc6-concepts']
+if args.alloc_failure_injector:
+    seastar_flags += ['--enable-alloc-failure-injector']

 seastar_cflags = args.user_cflags + " -march=nehalem"
-seastar_flags += ['--compiler', args.cxx, '--cflags=%s' % (seastar_cflags)]
+seastar_flags += ['--compiler', args.cxx, '--c-compiler', args.cc, '--cflags=%s' % (seastar_cflags)]

 status = subprocess.call([python, './configure.py'] + seastar_flags, cwd = 'seastar')

@@ -737,7 +803,14 @@ for mode in build_modes:
 seastar_deps = 'practically_anything_can_change_so_lets_run_it_every_time_and_restat.'

 args.user_cflags += " " + pkg_config("--cflags", "jsoncpp")
-libs = "-lyaml-cpp -llz4 -lz -lsnappy " + pkg_config("--libs", "jsoncpp") + ' -lboost_filesystem' + ' -lcrypt' + ' -lboost_date_time'
+libs = ' '.join(['-lyaml-cpp', '-llz4', '-lz', '-lsnappy', pkg_config("--libs", "jsoncpp"),
+                 maybe_static(args.staticboost, '-lboost_filesystem'), ' -lcrypt',
+                 maybe_static(args.staticboost, '-lboost_date_time'),
+                ])
+
+if not args.staticboost:
+    args.user_cflags += ' -DBOOST_TEST_DYN_LINK'
+
 for pkg in pkgs:
    args.user_cflags += ' ' + pkg_config('--cflags', pkg)
    libs += ' ' + pkg_config('--libs', pkg)
@@ -767,6 +840,8 @@ with open(buildfile, 'w') as f:
        libs = {libs}
        pool link_pool
            depth = {link_pool_depth}
+        pool seastar_pool
+            depth = 1
        rule ragel
            command = ragel -G2 -o $out $in
            description = RAGEL $out
@@ -792,7 +867,7 @@ with open(buildfile, 'w') as f:
        f.write(textwrap.dedent('''\
            cxxflags_{mode} = -I. -I $builddir/{mode}/gen -I seastar -I seastar/build/{mode}/gen
            rule cxx.{mode}
-              command = $cxx -MMD -MT $out -MF $out.d {seastar_cflags} $cxxflags $cxxflags_{mode} -c -o $out $in
+              command = $cxx -MD -MT $out -MF $out.d {seastar_cflags} $cxxflags $cxxflags_{mode} $obj_cxxflags -c -o $out $in
              description = CXX $out
              depfile = $out.d
            rule link.{mode}
@@ -810,7 +885,16 @@ with open(buildfile, 'w') as f:
                command = thrift -gen cpp:cob_style -out $builddir/{mode}/gen $in
                description = THRIFT $in
            rule antlr3.{mode}
-                command = sed -e '/^#if 0/,/^#endif/d' $in > $builddir/{mode}/gen/$in && antlr3 $builddir/{mode}/gen/$in && sed -i 's/^\\( *\)\\(ImplTraits::CommonTokenType\\* [a-zA-Z0-9_]* = NULL;\\)$$/\\1const \\2/' build/{mode}/gen/${{stem}}Parser.cpp
+                # We replace many local `ExceptionBaseType* ex` variables with a single function-scope one.
+                # Because we add such a variable to every function, and because `ExceptionBaseType` is not a global
+                # name, we also add a global typedef to avoid compilation errors. 
+                command = sed -e '/^#if 0/,/^#endif/d' $in > $builddir/{mode}/gen/$in $
+                     && antlr3 $builddir/{mode}/gen/$in $
+                     && sed -i -e 's/^\\( *\)\\(ImplTraits::CommonTokenType\\* [a-zA-Z0-9_]* = NULL;\\)$$/\\1const \\2/' $
+                        -e '1i using ExceptionBaseType = int;' $
+                        -e 's/^{{/{{ ExceptionBaseType\* ex = nullptr;/; $
+                            s/ExceptionBaseType\* ex = new/ex = new/' $
+                        build/{mode}/gen/${{stem}}Parser.cpp
                description = ANTLR3 $in
            ''').format(mode = mode, **modeval))
        f.write('build {mode}: phony {artifacts}\n'.format(mode = mode,
@@ -851,6 +935,11 @@ with open(buildfile, 'w') as f:
                f.write('build $builddir/{}/{}: ar.{} {}\n'.format(mode, binary, mode, str.join(' ', objs)))
            else:
                if binary.startswith('tests/'):
+                    local_libs = '$libs'
+                    if binary not in tests_not_using_seastar_test_framework or binary in pure_boost_tests:
+                        local_libs += ' ' + maybe_static(args.staticboost, '-lboost_unit_test_framework')
+                    if has_thrift:
+                        local_libs += ' ' + thrift_libs + ' ' + maybe_static(args.staticboost, '-lboost_system')
                    # Our code's debugging information is huge, and multiplied
                    # by many tests yields ridiculous amounts of disk space.
                    # So we strip the tests by default; The user can very
@@ -858,15 +947,15 @@ with open(buildfile, 'w') as f:
                    # to the test name, e.g., "ninja build/release/testname_g"
                    f.write('build $builddir/{}/{}: {}.{} {} {}\n'.format(mode, binary, tests_link_rule, mode, str.join(' ', objs),
                                                                                     'seastar/build/{}/libseastar.a'.format(mode)))
-                    if has_thrift:
-                        f.write('   libs =  {} -lboost_system $libs\n'.format(thrift_libs))
+                    f.write('   libs = {}\n'.format(local_libs))
                    f.write('build $builddir/{}/{}_g: link.{} {} {}\n'.format(mode, binary, mode, str.join(' ', objs),
                                                                              'seastar/build/{}/libseastar.a'.format(mode)))
+                    f.write('   libs = {}\n'.format(local_libs))
                else:
                    f.write('build $builddir/{}/{}: link.{} {} {}\n'.format(mode, binary, mode, str.join(' ', objs),
                                                                            'seastar/build/{}/libseastar.a'.format(mode)))
-                if has_thrift:
-                    f.write('   libs =  {} -lboost_system $libs\n'.format(thrift_libs))
+                    if has_thrift:
+                        f.write('   libs =  {} {} $libs\n'.format(thrift_libs, maybe_static(args.staticboost, '-lboost_system')))
            for src in srcs:
                if src.endswith('.cc'):
                    obj = '$builddir/' + mode + '/' + src.replace('.cc', '.o')
@@ -905,7 +994,7 @@ with open(buildfile, 'w') as f:
            f.write('build {}: ragel {}\n'.format(hh, src))
        for hh in swaggers:
            src = swaggers[hh]
-            f.write('build {}: swagger {}\n'.format(hh,src))
+            f.write('build {}: swagger {} | seastar/json/json2code.py\n'.format(hh,src))
        for hh in serializers:
            src = serializers[hh]
            f.write('build {}: serializer {} | idl-compiler.py\n'.format(hh,src))
@@ -922,8 +1011,12 @@ with open(buildfile, 'w') as f:
            for cc in grammar.sources('$builddir/{}/gen'.format(mode)):
                obj = cc.replace('.cpp', '.o')
                f.write('build {}: cxx.{} {} || {}\n'.format(obj, mode, cc, ' '.join(serializers)))
+                if cc.endswith('Parser.cpp') and has_sanitize_address_use_after_scope:
+                    # Parsers end up using huge amounts of stack space and overflowing their stack 
+                    f.write('  obj_cxxflags = -fno-sanitize-address-use-after-scope\n')
        f.write('build seastar/build/{mode}/libseastar.a seastar/build/{mode}/apps/iotune/iotune seastar/build/{mode}/gen/http/request_parser.hh seastar/build/{mode}/gen/http/http_response_parser.hh: ninja {seastar_deps}\n'
                .format(**locals()))
+        f.write('  pool = seastar_pool\n')
        f.write('  subdir = seastar\n')
        f.write('  target = build/{mode}/libseastar.a build/{mode}/apps/iotune/iotune build/{mode}/gen/http/request_parser.hh build/{mode}/gen/http/http_response_parser.hh\n'.format(**locals()))
        f.write(textwrap.dedent('''\
--- a/converting_mutation_partition_applier.hh
+++ b/converting_mutation_partition_applier.hh
@@ -22,6 +22,7 @@
 #pragma once

 #include "mutation_partition_view.hh"
+#include "mutation_partition.hh"
 #include "schema.hh"

 // Mutation partition visitor which applies visited data into
@@ -37,12 +38,12 @@ private:
    static bool is_compatible(const column_definition& new_def, const data_type& old_type, column_kind kind) {
        return ::is_compatible(new_def.kind, kind) && new_def.type->is_value_compatible_with(*old_type);
    }
-    void accept_cell(row& dst, column_kind kind, const column_definition& new_def, const data_type& old_type, atomic_cell_view cell) {
+    static void accept_cell(row& dst, column_kind kind, const column_definition& new_def, const data_type& old_type, atomic_cell_view cell) {
        if (is_compatible(new_def, old_type, kind) && cell.timestamp() > new_def.dropped_at()) {
            dst.apply(new_def, atomic_cell_or_collection(cell));
        }
    }
-    void accept_cell(row& dst, column_kind kind, const column_definition& new_def, const data_type& old_type, collection_mutation_view cell) {
+    static void accept_cell(row& dst, column_kind kind, const column_definition& new_def, const data_type& old_type, collection_mutation_view cell) {
        if (!is_compatible(new_def, old_type, kind)) {
            return;
        }
@@ -94,8 +95,8 @@ public:
        _p.apply_row_tombstone(_p_schema, rt);
    }

-    virtual void accept_row(clustering_key_view key, tombstone deleted_at, const row_marker& rm) override {
-        deletable_row& r = _p.clustered_row(_p_schema, key);
+    virtual void accept_row(position_in_partition_view key, const row_tombstone& deleted_at, const row_marker& rm, is_dummy dummy, is_continuous continuous) override {
+        deletable_row& r = _p.clustered_row(_p_schema, key, dummy, continuous);
        r.apply(rm);
        r.apply(deleted_at);
        _current_row = &r;
@@ -116,4 +117,14 @@ public:
            accept_cell(_current_row->cells(), column_kind::regular_column, *def, col.type(), collection);
        }
    }
+
+    // Appends the cell to dst upgrading it to the new schema.
+    // Cells must have monotonic names.
+    static void append_cell(row& dst, column_kind kind, const column_definition& new_def, const data_type& old_type, const atomic_cell_or_collection& cell) {
+        if (new_def.is_atomic()) {
+            accept_cell(dst, kind, new_def, old_type, cell.as_atomic_cell());
+        } else {
+            accept_cell(dst, kind, new_def, old_type, cell.as_collection_mutation());
+        }
+    }
 };
--- a/counters.cc
+++ b/counters.cc
@@ -0,0 +1,332 @@
+/*
+ * Copyright (C) 2016 ScyllaDB
+ */
+
+/*
+ * This file is part of Scylla.
+ *
+ * Scylla is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Scylla is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "service/storage_service.hh"
+#include "counters.hh"
+#include "mutation.hh"
+#include "combine.hh"
+
+counter_id counter_id::local()
+{
+    return counter_id(service::get_local_storage_service().get_local_id());
+}
+
+bool counter_id::less_compare_1_7_4::operator()(const counter_id& a, const counter_id& b) const
+{
+    if (a._most_significant != b._most_significant) {
+        return a._most_significant < b._most_significant;
+    } else {
+        return a._least_significant < b._least_significant;
+    }
+}
+
+std::ostream& operator<<(std::ostream& os, const counter_id& id) {
+    return os << id.to_uuid();
+}
+
+std::ostream& operator<<(std::ostream& os, counter_shard_view csv) {
+    return os << "{global_shard id: " << csv.id() << " value: " << csv.value()
+              << " clock: " << csv.logical_clock() << "}";
+}
+
+std::ostream& operator<<(std::ostream& os, counter_cell_view ccv) {
+    return os << "{counter_cell timestamp: " << ccv.timestamp() << " shards: {" << ::join(", ", ccv.shards()) << "}}";
+}
+
+void counter_cell_builder::do_sort_and_remove_duplicates()
+{
+    boost::range::sort(_shards, [] (auto& a, auto& b) { return a.id() < b.id(); });
+
+    std::vector<counter_shard> new_shards;
+    new_shards.reserve(_shards.size());
+    for (auto& cs : _shards) {
+        if (new_shards.empty() || new_shards.back().id() != cs.id()) {
+            new_shards.emplace_back(cs);
+        } else {
+            new_shards.back().apply(cs);
+        }
+    }
+    _shards = std::move(new_shards);
+    _sorted = true;
+}
+
+std::vector<counter_shard> counter_cell_view::shards_compatible_with_1_7_4() const
+{
+    auto sorted_shards = boost::copy_range<std::vector<counter_shard>>(shards());
+    counter_id::less_compare_1_7_4 cmp;
+    boost::range::sort(sorted_shards, [&] (auto& a, auto& b) {
+        return cmp(a.id(), b.id());
+    });
+    return sorted_shards;
+}
+
+static bool apply_in_place(atomic_cell_or_collection& dst, atomic_cell_or_collection& src)
+{
+    auto dst_ccmv = counter_cell_mutable_view(dst.as_mutable_atomic_cell());
+    auto src_ccmv = counter_cell_mutable_view(src.as_mutable_atomic_cell());
+    auto dst_shards = dst_ccmv.shards();
+    auto src_shards = src_ccmv.shards();
+
+    auto dst_it = dst_shards.begin();
+    auto src_it = src_shards.begin();
+
+    while (src_it != src_shards.end()) {
+        while (dst_it != dst_shards.end() && dst_it->id() < src_it->id()) {
+            ++dst_it;
+        }
+        if (dst_it == dst_shards.end() || dst_it->id() != src_it->id()) {
+            // Fast-path failed. Revert and fall back to the slow path.
+            if (dst_it == dst_shards.end()) {
+                --dst_it;
+            }
+            while (src_it != src_shards.begin()) {
+                --src_it;
+                while (dst_it->id() != src_it->id()) {
+                    --dst_it;
+                }
+                src_it->swap_value_and_clock(*dst_it);
+            }
+            return false;
+        }
+        if (dst_it->logical_clock() < src_it->logical_clock()) {
+            dst_it->swap_value_and_clock(*src_it);
+        } else {
+            src_it->set_value_and_clock(*dst_it);
+        }
+        ++src_it;
+    }
+
+    auto dst_ts = dst_ccmv.timestamp();
+    auto src_ts = src_ccmv.timestamp();
+    dst_ccmv.set_timestamp(std::max(dst_ts, src_ts));
+    src_ccmv.set_timestamp(dst_ts);
+    src.as_mutable_atomic_cell().set_counter_in_place_revert(true);
+    return true;
+}
+
+static void revert_in_place_apply(atomic_cell_or_collection& dst, atomic_cell_or_collection& src)
+{
+    assert(dst.can_use_mutable_view() && src.can_use_mutable_view());
+    auto dst_ccmv = counter_cell_mutable_view(dst.as_mutable_atomic_cell());
+    auto src_ccmv = counter_cell_mutable_view(src.as_mutable_atomic_cell());
+    auto dst_shards = dst_ccmv.shards();
+    auto src_shards = src_ccmv.shards();
+
+    auto dst_it = dst_shards.begin();
+    auto src_it = src_shards.begin();
+
+    while (src_it != src_shards.end()) {
+        while (dst_it != dst_shards.end() && dst_it->id() < src_it->id()) {
+            ++dst_it;
+        }
+        assert(dst_it != dst_shards.end() && dst_it->id() == src_it->id());
+        dst_it->swap_value_and_clock(*src_it);
+        ++src_it;
+    }
+
+    auto dst_ts = dst_ccmv.timestamp();
+    auto src_ts = src_ccmv.timestamp();
+    dst_ccmv.set_timestamp(src_ts);
+    src_ccmv.set_timestamp(dst_ts);
+    src.as_mutable_atomic_cell().set_counter_in_place_revert(false);
+}
+
+bool counter_cell_view::apply_reversibly(atomic_cell_or_collection& dst, atomic_cell_or_collection& src)
+{
+    auto dst_ac = dst.as_atomic_cell();
+    auto src_ac = src.as_atomic_cell();
+
+    if (!dst_ac.is_live() || !src_ac.is_live()) {
+        if (dst_ac.is_live() || (!src_ac.is_live() && compare_atomic_cell_for_merge(dst_ac, src_ac) < 0)) {
+            std::swap(dst, src);
+            return true;
+        }
+        return false;
+    }
+
+    if (dst_ac.is_counter_update() && src_ac.is_counter_update()) {
+        auto src_v = src_ac.counter_update_value();
+        auto dst_v = dst_ac.counter_update_value();
+        dst = atomic_cell::make_live_counter_update(std::max(dst_ac.timestamp(), src_ac.timestamp()),
+                                                    src_v + dst_v);
+        return true;
+    }
+
+    assert(!dst_ac.is_counter_update());
+    assert(!src_ac.is_counter_update());
+
+    if (counter_cell_view(dst_ac).shard_count() >= counter_cell_view(src_ac).shard_count()
+        && dst.can_use_mutable_view() && src.can_use_mutable_view()) {
+        if (apply_in_place(dst, src)) {
+            return true;
+        }
+    }
+
+    src.as_mutable_atomic_cell().set_counter_in_place_revert(false);
+    auto dst_shards = counter_cell_view(dst_ac).shards();
+    auto src_shards = counter_cell_view(src_ac).shards();
+
+    counter_cell_builder result;
+    combine(dst_shards.begin(), dst_shards.end(), src_shards.begin(), src_shards.end(),
+            result.inserter(), counter_shard_view::less_compare_by_id(), [] (auto& x, auto& y) {
+                return x.logical_clock() < y.logical_clock() ? y : x;
+            });
+
+    auto cell = result.build(std::max(dst_ac.timestamp(), src_ac.timestamp()));
+    src = std::exchange(dst, atomic_cell_or_collection(cell));
+    return true;
+}
+
+void counter_cell_view::revert_apply(atomic_cell_or_collection& dst, atomic_cell_or_collection& src)
+{
+    if (dst.as_atomic_cell().is_counter_update()) {
+        auto src_v = src.as_atomic_cell().counter_update_value();
+        auto dst_v = dst.as_atomic_cell().counter_update_value();
+        dst = atomic_cell::make_live(dst.as_atomic_cell().timestamp(),
+                                     long_type->decompose(dst_v - src_v));
+    } else if (src.as_atomic_cell().is_counter_in_place_revert_set()) {
+        revert_in_place_apply(dst, src);
+    } else {
+        std::swap(dst, src);
+    }
+}
+
+stdx::optional<atomic_cell> counter_cell_view::difference(atomic_cell_view a, atomic_cell_view b)
+{
+    assert(!a.is_counter_update());
+    assert(!b.is_counter_update());
+
+    if (!b.is_live() || !a.is_live()) {
+        if (b.is_live() || (!a.is_live() && compare_atomic_cell_for_merge(b, a) < 0)) {
+            return atomic_cell(a);
+        }
+        return { };
+    }
+
+    auto a_shards = counter_cell_view(a).shards();
+    auto b_shards = counter_cell_view(b).shards();
+
+    auto a_it = a_shards.begin();
+    auto a_end = a_shards.end();
+    auto b_it = b_shards.begin();
+    auto b_end = b_shards.end();
+
+    counter_cell_builder result;
+    while (a_it != a_end) {
+        while (b_it != b_end && (*b_it).id() < (*a_it).id()) {
+            ++b_it;
+        }
+        if (b_it == b_end || (*a_it).id() != (*b_it).id() || (*a_it).logical_clock() > (*b_it).logical_clock()) {
+            result.add_shard(counter_shard(*a_it));
+        }
+        ++a_it;
+    }
+
+    stdx::optional<atomic_cell> diff;
+    if (!result.empty()) {
+        diff = result.build(std::max(a.timestamp(), b.timestamp()));
+    } else if (a.timestamp() > b.timestamp()) {
+        diff = atomic_cell::make_live(a.timestamp(), bytes_view());
+    }
+    return diff;
+}
+
+
+void transform_counter_updates_to_shards(mutation& m, const mutation* current_state, uint64_t clock_offset) {
+    // FIXME: allow current_state to be frozen_mutation
+
+    auto transform_new_row_to_shards = [clock_offset] (auto& cells) {
+        cells.for_each_cell([clock_offset] (auto, atomic_cell_or_collection& ac_o_c) {
+            auto acv = ac_o_c.as_atomic_cell();
+            if (!acv.is_live()) {
+                return; // continue -- we are in lambda
+            }
+            auto delta = acv.counter_update_value();
+            auto cs = counter_shard(counter_id::local(), delta, clock_offset + 1);
+            ac_o_c = counter_cell_builder::from_single_shard(acv.timestamp(), cs);
+        });
+    };
+
+    if (!current_state) {
+        transform_new_row_to_shards(m.partition().static_row());
+        for (auto& cr : m.partition().clustered_rows()) {
+            transform_new_row_to_shards(cr.row().cells());
+        }
+        return;
+    }
+
+    clustering_key::less_compare cmp(*m.schema());
+
+    auto transform_row_to_shards = [clock_offset] (auto& transformee, auto& state) {
+        std::deque<std::pair<column_id, counter_shard>> shards;
+        state.for_each_cell([&] (column_id id, const atomic_cell_or_collection& ac_o_c) {
+            auto acv = ac_o_c.as_atomic_cell();
+            if (!acv.is_live()) {
+                return; // continue -- we are in lambda
+            }
+            counter_cell_view ccv(acv);
+            auto cs = ccv.local_shard();
+            if (!cs) {
+                return; // continue
+            }
+            shards.emplace_back(std::make_pair(id, counter_shard(*cs)));
+        });
+
+        transformee.for_each_cell([&] (column_id id, atomic_cell_or_collection& ac_o_c) {
+            auto acv = ac_o_c.as_atomic_cell();
+            if (!acv.is_live()) {
+                return; // continue -- we are in lambda
+            }
+            while (!shards.empty() && shards.front().first < id) {
+                shards.pop_front();
+            }
+
+            auto delta = acv.counter_update_value();
+
+            if (shards.empty() || shards.front().first > id) {
+                auto cs = counter_shard(counter_id::local(), delta, clock_offset + 1);
+                ac_o_c = counter_cell_builder::from_single_shard(acv.timestamp(), cs);
+            } else {
+                auto& cs = shards.front().second;
+                cs.update(delta, clock_offset + 1);
+                ac_o_c = counter_cell_builder::from_single_shard(acv.timestamp(), cs);
+                shards.pop_front();
+            }
+        });
+    };
+
+    transform_row_to_shards(m.partition().static_row(), current_state->partition().static_row());
+
+    auto& cstate = current_state->partition();
+    auto it = cstate.clustered_rows().begin();
+    auto end = cstate.clustered_rows().end();
+    for (auto& cr : m.partition().clustered_rows()) {
+        while (it != end && cmp(it->key(), cr.key())) {
+            ++it;
+        }
+        if (it == end || cmp(cr.key(), it->key())) {
+            transform_new_row_to_shards(cr.row().cells());
+            continue;
+        }
+
+        transform_row_to_shards(cr.row().cells(), it->row().cells());
+    }
+}
--- a/counters.hh
+++ b/counters.hh
@@ -0,0 +1,435 @@
+/*
+ * Copyright (C) 2016 ScyllaDB
+ */
+
+/*
+ * This file is part of Scylla.
+ *
+ * Scylla is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Scylla is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <boost/range/algorithm/find_if.hpp>
+
+#include "atomic_cell_or_collection.hh"
+#include "types.hh"
+
+#include "stdx.hh"
+
+class mutation;
+
+class mutation;
+
+class counter_id {
+    int64_t _least_significant;
+    int64_t _most_significant;
+public:
+    static_assert(std::is_same<decltype(std::declval<utils::UUID>().get_least_significant_bits()), int64_t>::value
+            &&  std::is_same<decltype(std::declval<utils::UUID>().get_most_significant_bits()), int64_t>::value,
+        "utils::UUID is expected to work with two signed 64-bit integers");
+
+    counter_id() = default;
+    explicit counter_id(utils::UUID uuid) noexcept
+        : _least_significant(uuid.get_least_significant_bits())
+        , _most_significant(uuid.get_most_significant_bits())
+    { }
+
+    utils::UUID to_uuid() const {
+        return utils::UUID(_most_significant, _least_significant);
+    }
+
+    bool operator<(const counter_id& other) const {
+        return to_uuid() < other.to_uuid();
+    }
+    bool operator>(const counter_id& other) const {
+        return other.to_uuid() < to_uuid();
+    }
+    bool operator==(const counter_id& other) const {
+        return to_uuid() == other.to_uuid();
+    }
+    bool operator!=(const counter_id& other) const {
+        return !(*this == other);
+    }
+public:
+    // (Wrong) Counter ID ordering used by Scylla 1.7.4 and earlier.
+    struct less_compare_1_7_4 {
+        bool operator()(const counter_id& a, const counter_id& b) const;
+    };
+public:
+    static counter_id local();
+
+    // For tests.
+    static counter_id generate_random() {
+        return counter_id(utils::make_random_uuid());
+    }
+};
+static_assert(std::is_pod<counter_id>::value, "counter_id should be a POD type");
+
+std::ostream& operator<<(std::ostream& os, const counter_id& id);
+
+template<typename View>
+class basic_counter_shard_view {
+    enum class offset : unsigned {
+        id = 0u,
+        value = unsigned(id) + sizeof(counter_id),
+        logical_clock = unsigned(value) + sizeof(int64_t),
+        total_size = unsigned(logical_clock) + sizeof(int64_t),
+    };
+private:
+    typename View::pointer _base;
+private:
+    template<typename T>
+    T read(offset off) const {
+        T value;
+        std::copy_n(_base + static_cast<unsigned>(off), sizeof(T), reinterpret_cast<signed char*>(&value));
+        return value;
+    }
+public:
+    static constexpr auto size = size_t(offset::total_size);
+public:
+    basic_counter_shard_view() = default;
+    explicit basic_counter_shard_view(typename View::pointer ptr) noexcept
+        : _base(ptr) { }
+
+    counter_id id() const { return read<counter_id>(offset::id); }
+    int64_t value() const { return read<int64_t>(offset::value); }
+    int64_t logical_clock() const { return read<int64_t>(offset::logical_clock); }
+
+    void swap_value_and_clock(basic_counter_shard_view& other) noexcept {
+        static constexpr size_t off = size_t(offset::value);
+        static constexpr size_t size = size_t(offset::total_size) - off;
+
+        typename View::value_type tmp[size];
+        std::copy_n(_base + off, size, tmp);
+        std::copy_n(other._base + off, size, _base + off);
+        std::copy_n(tmp, size, other._base + off);
+    }
+
+    void set_value_and_clock(const basic_counter_shard_view& other) noexcept {
+        static constexpr size_t off = size_t(offset::value);
+        static constexpr size_t size = size_t(offset::total_size) - off;
+        std::copy_n(other._base + off, size, _base + off);
+    }
+
+    bool operator==(const basic_counter_shard_view& other) const {
+        return id() == other.id() && value() == other.value()
+               && logical_clock() == other.logical_clock();
+    }
+    bool operator!=(const basic_counter_shard_view& other) const {
+        return !(*this == other);
+    }
+
+    struct less_compare_by_id {
+        bool operator()(const basic_counter_shard_view& x, const basic_counter_shard_view& y) const {
+            return x.id() < y.id();
+        }
+    };
+};
+
+using counter_shard_view = basic_counter_shard_view<bytes_view>;
+
+std::ostream& operator<<(std::ostream& os, counter_shard_view csv);
+
+class counter_shard {
+    counter_id _id;
+    int64_t _value;
+    int64_t _logical_clock;
+private:
+    template<typename T>
+    static void write(const T& value, bytes::iterator& out) {
+        out = std::copy_n(reinterpret_cast<const signed char*>(&value), sizeof(T), out);
+    }
+private:
+    // Shared logic for applying counter_shards and counter_shard_views.
+    // T is either counter_shard or basic_counter_shard_view<U>.
+    template<typename T>
+    GCC6_CONCEPT(requires requires(T shard) {
+        { shard.value() } -> int64_t;
+        { shard.logical_clock() } -> int64_t;
+    })
+    counter_shard& do_apply(T&& other) noexcept {
+        auto other_clock = other.logical_clock();
+        if (_logical_clock < other_clock) {
+            _logical_clock = other_clock;
+            _value = other.value();
+        }
+        return *this;
+    }
+public:
+    counter_shard(counter_id id, int64_t value, int64_t logical_clock) noexcept
+        : _id(id)
+        , _value(value)
+        , _logical_clock(logical_clock)
+    { }
+
+    explicit counter_shard(counter_shard_view csv) noexcept
+        : _id(csv.id())
+        , _value(csv.value())
+        , _logical_clock(csv.logical_clock())
+    { }
+
+    counter_id id() const { return _id; }
+    int64_t value() const { return _value; }
+    int64_t logical_clock() const { return _logical_clock; }
+
+    counter_shard& update(int64_t value_delta, int64_t clock_increment) noexcept {
+        _value += value_delta;
+        _logical_clock += clock_increment;
+        return *this;
+    }
+
+    counter_shard& apply(counter_shard_view other) noexcept {
+        return do_apply(other);
+    }
+
+    counter_shard& apply(const counter_shard& other) noexcept {
+        return do_apply(other);
+    }
+
+    static size_t serialized_size() {
+        return counter_shard_view::size;
+    }
+    void serialize(bytes::iterator& out) const {
+        write(_id, out);
+        write(_value, out);
+        write(_logical_clock, out);
+    }
+};
+
+class counter_cell_builder {
+    std::vector<counter_shard> _shards;
+    bool _sorted = true;
+private:
+    void do_sort_and_remove_duplicates();
+public:
+    counter_cell_builder() = default;
+    counter_cell_builder(size_t shard_count) {
+        _shards.reserve(shard_count);
+    }
+
+    void add_shard(const counter_shard& cs) {
+        _shards.emplace_back(cs);
+    }
+
+    void add_maybe_unsorted_shard(const counter_shard& cs) {
+        add_shard(cs);
+        if (_sorted && _shards.size() > 1) {
+            auto current = _shards.rbegin();
+            auto previous = std::next(current);
+            _sorted = current->id() > previous->id();
+        }
+    }
+
+    void sort_and_remove_duplicates() {
+        if (!_sorted) {
+            do_sort_and_remove_duplicates();
+        }
+    }
+
+    size_t serialized_size() const {
+        return _shards.size() * counter_shard::serialized_size();
+    }
+    void serialize(bytes::iterator& out) const {
+        for (auto&& cs : _shards) {
+            cs.serialize(out);
+        }
+    }
+
+    bool empty() const {
+        return _shards.empty();
+    }
+
+    atomic_cell build(api::timestamp_type timestamp) const {
+        return atomic_cell::make_live_from_serializer(timestamp, serialized_size(), [this] (bytes::iterator out) {
+            serialize(out);
+        });
+    }
+
+    static atomic_cell from_single_shard(api::timestamp_type timestamp, const counter_shard& cs) {
+        return atomic_cell::make_live_from_serializer(timestamp, counter_shard::serialized_size(), [&cs] (bytes::iterator out) {
+            cs.serialize(out);
+        });
+    }
+
+    class inserter_iterator : public std::iterator<std::output_iterator_tag, counter_shard> {
+        counter_cell_builder* _builder;
+    public:
+        explicit inserter_iterator(counter_cell_builder& b) : _builder(&b) { }
+        inserter_iterator& operator=(const counter_shard& cs) {
+            _builder->add_shard(cs);
+            return *this;
+        }
+        inserter_iterator& operator=(const counter_shard_view& csv) {
+            return operator=(counter_shard(csv));
+        }
+        inserter_iterator& operator++() { return *this; }
+        inserter_iterator& operator++(int) { return *this; }
+        inserter_iterator& operator*() { return *this; };
+    };
+
+    inserter_iterator inserter() {
+        return inserter_iterator(*this);
+    }
+};
+
+// <counter_id>   := <int64_t><int64_t>
+// <shard>        := <counter_id><int64_t:value><int64_t:logical_clock>
+// <counter_cell> := <shard>*
+template<typename View>
+class basic_counter_cell_view {
+protected:
+    atomic_cell_base<View> _cell;
+private:
+    class shard_iterator : public std::iterator<std::input_iterator_tag, basic_counter_shard_view<View>> {
+        typename View::pointer _current;
+        basic_counter_shard_view<View> _current_view;
+    public:
+        shard_iterator() = default;
+        shard_iterator(typename View::pointer ptr) noexcept
+            : _current(ptr), _current_view(ptr) { }
+
+        basic_counter_shard_view<View>& operator*() noexcept {
+            return _current_view;
+        }
+        basic_counter_shard_view<View>* operator->() noexcept {
+            return &_current_view;
+        }
+        shard_iterator& operator++() noexcept {
+            _current += counter_shard_view::size;
+            _current_view = basic_counter_shard_view<View>(_current);
+            return *this;
+        }
+        shard_iterator operator++(int) noexcept {
+            auto it = *this;
+            operator++();
+            return it;
+        }
+        shard_iterator& operator--() noexcept {
+            _current -= counter_shard_view::size;
+            _current_view = basic_counter_shard_view<View>(_current);
+            return *this;
+        }
+        shard_iterator operator--(int) noexcept {
+            auto it = *this;
+            operator--();
+            return it;
+        }
+        bool operator==(const shard_iterator& other) const noexcept {
+            return _current == other._current;
+        }
+        bool operator!=(const shard_iterator& other) const noexcept {
+            return !(*this == other);
+        }
+    };
+public:
+    boost::iterator_range<shard_iterator> shards() const {
+        auto bv = _cell.value();
+        auto begin = shard_iterator(bv.data());
+        auto end = shard_iterator(bv.data() + bv.size());
+        return boost::make_iterator_range(begin, end);
+    }
+
+    size_t shard_count() const {
+        return _cell.value().size() / counter_shard_view::size;
+    }
+public:
+    // ac must be a live counter cell
+    explicit basic_counter_cell_view(atomic_cell_base<View> ac) noexcept : _cell(ac) {
+        assert(_cell.is_live());
+        assert(!_cell.is_counter_update());
+    }
+
+    api::timestamp_type timestamp() const { return _cell.timestamp(); }
+
+    static data_type total_value_type() { return long_type; }
+
+    int64_t total_value() const {
+        return boost::accumulate(shards(), int64_t(0), [] (int64_t v, counter_shard_view cs) {
+            return v + cs.value();
+        });
+    }
+
+    stdx::optional<counter_shard_view> get_shard(const counter_id& id) const {
+        auto it = boost::range::find_if(shards(), [&id] (counter_shard_view csv) {
+            return csv.id() == id;
+        });
+        if (it == shards().end()) {
+            return { };
+        }
+        return *it;
+    }
+
+    stdx::optional<counter_shard_view> local_shard() const {
+        // TODO: consider caching local shard position
+        return get_shard(counter_id::local());
+    }
+
+    bool operator==(const basic_counter_cell_view& other) const {
+        return timestamp() == other.timestamp() && boost::equal(shards(), other.shards());
+    }
+};
+
+struct counter_cell_view : basic_counter_cell_view<bytes_view> {
+    using basic_counter_cell_view::basic_counter_cell_view;
+
+    // Returns counter shards in an order that is compatible with Scylla 1.7.4.
+    std::vector<counter_shard> shards_compatible_with_1_7_4() const;
+
+    // Reversibly applies two counter cells, at least one of them must be live.
+    // Returns true iff dst was modified.
+    static bool apply_reversibly(atomic_cell_or_collection& dst, atomic_cell_or_collection& src);
+
+    // Reverts apply performed by apply_reversible().
+    static void revert_apply(atomic_cell_or_collection& dst, atomic_cell_or_collection& src);
+
+    // Computes a counter cell containing minimal amount of data which, when
+    // applied to 'b' returns the same cell as 'a' and 'b' applied together.
+    static stdx::optional<atomic_cell> difference(atomic_cell_view a, atomic_cell_view b);
+
+    friend std::ostream& operator<<(std::ostream& os, counter_cell_view ccv);
+};
+
+struct counter_cell_mutable_view : basic_counter_cell_view<bytes_mutable_view> {
+    using basic_counter_cell_view::basic_counter_cell_view;
+
+    void set_timestamp(api::timestamp_type ts) { _cell.set_timestamp(ts); }
+};
+
+// Transforms mutation dst from counter updates to counter shards using state
+// stored in current_state.
+// If current_state is present it has to be in the same schema as dst.
+void transform_counter_updates_to_shards(mutation& dst, const mutation* current_state, uint64_t clock_offset);
+
+template<>
+struct appending_hash<counter_shard_view> {
+    template<typename Hasher>
+    void operator()(Hasher& h, const counter_shard_view& cshard) const {
+        ::feed_hash(h, cshard.id().to_uuid());
+        ::feed_hash(h, cshard.value());
+        ::feed_hash(h, cshard.logical_clock());
+    }
+};
+
+template<>
+struct appending_hash<counter_cell_view> {
+    template<typename Hasher>
+    void operator()(Hasher& h, const counter_cell_view& cell) const {
+        ::feed_hash(h, true); // is_live
+        ::feed_hash(h, cell.timestamp());
+        for (auto&& csv : cell.shards()) {
+            ::feed_hash(h, csv);
+        }
+    }
+};
--- a/cpu_controller.hh
+++ b/cpu_controller.hh
@@ -0,0 +1,89 @@
+/*
+ * Copyright (C) 2017 ScyllaDB
+ */
+
+/*
+ * This file is part of Scylla.
+ *
+ * Scylla is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Scylla is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+#include <seastar/core/thread.hh>
+#include <seastar/core/timer.hh>
+#include <chrono>
+
+// Simple proportional controller to adjust shares of memtable/streaming flushes.
+//
+// Goal is to flush as fast as we can, but not so fast that we steal all the CPU from incoming
+// requests, and at the same time minimize user-visible fluctuations in the flush quota.
+//
+// What that translates to is we'll try to keep virtual dirty's firt derivative at 0 (IOW, we keep
+// virtual dirty constant), which means that the rate of incoming writes is equal to the rate of
+// flushed bytes.
+//
+// The exact point at which the controller stops determines the desired flush CPU usage. As we
+// approach the hard dirty limit, we need to be more aggressive. We will therefore define two
+// thresholds, and increase the constant as we cross them.
+//
+//  1) the soft limit line
+//  2) halfway between soft limit and dirty limit
+//
+// The constants q1 and q2 are used to determine the proportional factor at each stage.
+//
+// Below the soft limit, we are in no particular hurry to flush, since it means we're set to
+// complete flushing before we a new memtable is ready. The quota is dirty * q1, and q1 is set to a
+// low number.
+//
+// The first half of the virtual dirty region is where we expect to be usually, so we have a low
+// slope corresponding to a sluggish response between q1 * soft_limit and q2.
+//
+// In the second half, we're getting close to the hard dirty limit so we increase the slope and
+// become more responsive, up to a maximum quota of qmax.
+//
+// For now we'll just set them in the structure not to complicate the constructor. But q1, q2 and
+// qmax can easily become parameters if we find another user.
+class flush_cpu_controller {
+    static constexpr float hard_dirty_limit = 0.50;
+    static constexpr float q1 = 0.01;
+    static constexpr float q2 = 0.2;
+    static constexpr float qmax = 1;
+
+    float _current_quota = 0.0f;
+    float _goal;
+    std::function<float()> _current_dirty;
+    std::chrono::milliseconds _interval;
+    timer<> _update_timer;
+
+    seastar::thread_scheduling_group _scheduling_group;
+    seastar::thread_scheduling_group *_current_scheduling_group = nullptr;
+
+    void adjust();
+public:
+    seastar::thread_scheduling_group* scheduling_group() {
+        return _current_scheduling_group;
+    }
+    float current_quota() const {
+        return _current_quota;
+    }
+
+    struct disabled {
+        seastar::thread_scheduling_group *backup;
+    };
+    flush_cpu_controller(disabled d) : _scheduling_group(std::chrono::nanoseconds(0), 0), _current_scheduling_group(d.backup) {}
+    flush_cpu_controller(std::chrono::milliseconds interval, float soft_limit, std::function<float()> current_dirty);
+    flush_cpu_controller(flush_cpu_controller&&) = default;
+};
+
+
--- a/cql3/Cql.g
+++ b/cql3/Cql.g
@@ -36,15 +36,19 @@ options {
 #include "cql3/statements/raw/select_statement.hh"
 #include "cql3/statements/alter_keyspace_statement.hh"
 #include "cql3/statements/alter_table_statement.hh"
+#include "cql3/statements/alter_view_statement.hh"
 #include "cql3/statements/create_keyspace_statement.hh"
 #include "cql3/statements/drop_keyspace_statement.hh"
 #include "cql3/statements/create_index_statement.hh"
 #include "cql3/statements/create_table_statement.hh"
+#include "cql3/statements/create_view_statement.hh"
 #include "cql3/statements/create_type_statement.hh"
 #include "cql3/statements/drop_type_statement.hh"
 #include "cql3/statements/alter_type_statement.hh"
 #include "cql3/statements/property_definitions.hh"
+#include "cql3/statements/drop_index_statement.hh"
 #include "cql3/statements/drop_table_statement.hh"
+#include "cql3/statements/drop_view_statement.hh"
 #include "cql3/statements/truncate_statement.hh"
 #include "cql3/statements/raw/update_statement.hh"
 #include "cql3/statements/raw/insert_statement.hh"
@@ -315,9 +319,7 @@ cqlStatement returns [shared_ptr<raw::parsed_statement> stmt]
    | st10=createIndexStatement        { $stmt = st10; }
    | st11=dropKeyspaceStatement       { $stmt = st11; }
    | st12=dropTableStatement          { $stmt = st12; }
-#if 0
    | st13=dropIndexStatement          { $stmt = st13; }
-#endif
    | st14=alterTableStatement         { $stmt = st14; }
    | st15=alterKeyspaceStatement      { $stmt = st15; }
    | st16=grantStatement              { $stmt = st16; }
@@ -340,6 +342,9 @@ cqlStatement returns [shared_ptr<raw::parsed_statement> stmt]
    | st30=createAggregateStatement    { $stmt = st30; }
    | st31=dropAggregateStatement      { $stmt = st31; }
 #endif
+    | st32=createViewStatement         { $stmt = st32; }
+    | st33=alterViewStatement          { $stmt = st33; }
+    | st34=dropViewStatement           { $stmt = st34; }
    ;

 /*
@@ -716,7 +721,7 @@ createTableStatement returns [shared_ptr<cql3::statements::create_table_statemen

 cfamDefinition[shared_ptr<cql3::statements::create_table_statement::raw_statement> expr]
    : '(' cfamColumns[expr] ( ',' cfamColumns[expr]? )* ')'
-      ( K_WITH cfamProperty[expr] ( K_AND cfamProperty[expr] )*)?
+      ( K_WITH cfamProperty[$expr->properties()] ( K_AND cfamProperty[$expr->properties()] )*)?
    ;

 cfamColumns[shared_ptr<cql3::statements::create_table_statement::raw_statement> expr]
@@ -732,15 +737,15 @@ pkDef[shared_ptr<cql3::statements::create_table_statement::raw_statement> expr]
    | '(' k1=ident { l.push_back(k1); } ( ',' kn=ident { l.push_back(kn); } )* ')' { $expr->add_key_aliases(l); }
    ;

-cfamProperty[shared_ptr<cql3::statements::create_table_statement::raw_statement> expr]
-    : property[expr->properties]
-    | K_COMPACT K_STORAGE { $expr->set_compact_storage(); }
+cfamProperty[cql3::statements::cf_properties& expr]
+    : property[$expr.properties()]
+    | K_COMPACT K_STORAGE { $expr.set_compact_storage(); }
    | K_CLUSTERING K_ORDER K_BY '(' cfamOrdering[expr] (',' cfamOrdering[expr])* ')'
    ;

-cfamOrdering[shared_ptr<cql3::statements::create_table_statement::raw_statement> expr]
+cfamOrdering[cql3::statements::cf_properties& expr]
    @init{ bool reversed=false; }
-    : k=ident (K_ASC | K_DESC { reversed=true;} ) { $expr->set_ordering(k, reversed); }
+    : k=ident (K_ASC | K_DESC { reversed=true;} ) { $expr.set_ordering(k, reversed); }
    ;


@@ -772,12 +777,13 @@ createIndexStatement returns [::shared_ptr<create_index_statement> expr]
        auto props = make_shared<index_prop_defs>();
        bool if_not_exists = false;
        auto name = ::make_shared<cql3::index_name>();
+        std::vector<::shared_ptr<index_target::raw>> targets;
    }
    : K_CREATE (K_CUSTOM { props->is_custom = true; })? K_INDEX (K_IF K_NOT K_EXISTS { if_not_exists = true; } )?
-        (idxName[name])? K_ON cf=columnFamilyName '(' id=indexIdent ')'
+        (idxName[name])? K_ON cf=columnFamilyName '(' (target1=indexIdent { targets.emplace_back(target1); } (',' target2=indexIdent { targets.emplace_back(target2); } )*)? ')'
        (K_USING cls=STRING_LITERAL { props->custom_class = sstring{$cls.text}; })?
        (K_WITH properties[props])?
-      { $expr = ::make_shared<create_index_statement>(cf, name, id, props, if_not_exists); }
+      { $expr = ::make_shared<create_index_statement>(cf, name, targets, props, if_not_exists); }
    ;

 indexIdent returns [::shared_ptr<index_target::raw> id]
@@ -787,6 +793,39 @@ indexIdent returns [::shared_ptr<index_target::raw> id]
    | K_FULL '(' c=cident ')'    { $id = index_target::raw::full_collection(c); }
    ;

+/**
+ * CREATE MATERIALIZED VIEW <viewName> AS
+ *  SELECT <columns>
+ *  FROM <CF>
+ *  WHERE <pkColumns> IS NOT NULL
+ *  PRIMARY KEY (<pkColumns>)
+ *  WITH <property> = <value> AND ...;
+ */
+createViewStatement returns [::shared_ptr<create_view_statement> expr]
+    @init {
+        bool if_not_exists = false;
+        std::vector<::shared_ptr<cql3::column_identifier::raw>> partition_keys;
+        std::vector<::shared_ptr<cql3::column_identifier::raw>> composite_keys;
+    }
+    : K_CREATE K_MATERIALIZED K_VIEW (K_IF K_NOT K_EXISTS { if_not_exists = true; })? cf=columnFamilyName K_AS
+        K_SELECT sclause=selectClause K_FROM basecf=columnFamilyName
+        (K_WHERE wclause=whereClause)?
+        K_PRIMARY K_KEY (
+        '(' '(' k1=cident { partition_keys.push_back(k1); } ( ',' kn=cident { partition_keys.push_back(kn); } )* ')' ( ',' c1=cident { composite_keys.push_back(c1); } )* ')'
+    |   '(' k1=cident { partition_keys.push_back(k1); } ( ',' cn=cident { composite_keys.push_back(cn); } )* ')'
+        )
+        {
+             $expr = ::make_shared<create_view_statement>(
+                std::move(cf),
+                std::move(basecf),
+                std::move(sclause),
+                std::move(wclause),
+                std::move(partition_keys),
+                std::move(composite_keys),
+                if_not_exists);
+        }
+        ( K_WITH cfamProperty[{ $expr->properties() }] ( K_AND cfamProperty[{ $expr->properties() }] )*)?
+    ;

 #if 0
 /**
@@ -833,7 +872,7 @@ alterKeyspaceStatement returns [shared_ptr<cql3::statements::alter_keyspace_stat
 alterTableStatement returns [shared_ptr<alter_table_statement> expr]
    @init {
        alter_table_statement::type type;
-        auto props = make_shared<cql3::statements::cf_prop_defs>();;
+        auto props = make_shared<cql3::statements::cf_prop_defs>();
        std::vector<std::pair<shared_ptr<cql3::column_identifier::raw>, shared_ptr<cql3::column_identifier::raw>>> renames;
        bool is_static = false;
    }
@@ -867,6 +906,18 @@ alterTypeStatement returns [::shared_ptr<alter_type_statement> expr]
          )
    ;

+/**
+ * ALTER MATERIALIZED VIEW <CF> WITH <property> = <value>;
+ */
+alterViewStatement returns [::shared_ptr<alter_view_statement> expr]
+    @init {
+        auto props = make_shared<cql3::statements::cf_prop_defs>();
+    }
+    : K_ALTER K_MATERIALIZED K_VIEW cf=columnFamilyName K_WITH properties[props]
+    {
+        $expr = ::make_shared<alter_view_statement>(std::move(cf), std::move(props));
+    }
+    ;

 renames[::shared_ptr<alter_type_statement::renames> expr]
    : fromId=ident K_TO toId=ident { $expr->add_rename(fromId, toId); }
@@ -897,16 +948,23 @@ dropTypeStatement returns [::shared_ptr<drop_type_statement> stmt]
    : K_DROP K_TYPE (K_IF K_EXISTS { if_exists = true; } )? name=userTypeName { $stmt = ::make_shared<drop_type_statement>(name, if_exists); }
    ;

-#if 0
+/**
+ * DROP MATERIALIZED VIEW [IF EXISTS] <view_name>
+ */
+dropViewStatement returns [::shared_ptr<drop_view_statement> stmt]
+    @init { bool if_exists = false; }
+    : K_DROP K_MATERIALIZED K_VIEW (K_IF K_EXISTS { if_exists = true; } )? cf=columnFamilyName
+      { $stmt = ::make_shared<drop_view_statement>(cf, if_exists); }
+    ;
+
 /**
 * DROP INDEX [IF EXISTS] <INDEX_NAME>
 */
-dropIndexStatement returns [DropIndexStatement expr]
-    @init { boolean ifExists = false; }
-    : K_DROP K_INDEX (K_IF K_EXISTS { ifExists = true; } )? index=indexName
-      { $expr = new DropIndexStatement(index, ifExists); }
+dropIndexStatement returns [::shared_ptr<drop_index_statement> expr]
+    @init { bool if_exists = false; }
+    : K_DROP K_INDEX (K_IF K_EXISTS { if_exists = true; } )? index=indexName
+      { $expr = ::make_shared<drop_index_statement>(index, if_exists); }
    ;
-#endif

 /**
  * TRUNCATE <CF>;
@@ -1243,6 +1301,10 @@ normalColumnOperation[operations_type& operations, ::shared_ptr<cql3::column_ide
          }
          add_raw_update(operations, key, make_shared<cql3::operation::addition>(cql3::constants::literal::integer($i.text)));
      }
+    | K_SCYLLA_COUNTER_SHARD_LIST '(' t=term ')'
+      {
+          add_raw_update(operations, key, ::make_shared<cql3::operation::set_counter_value_from_tuple_list>(t));      
+      }
    ;

 specializedColumnOperation[std::vector<std::pair<shared_ptr<cql3::column_identifier::raw>,
@@ -1304,7 +1366,8 @@ relation[std::vector<cql3::relation_ptr>& clauses]

    | K_TOKEN l=tupleOfIdentifiers type=relationType t=term
        { $clauses.emplace_back(::make_shared<cql3::token_relation>(std::move(l), *type, std::move(t))); }
-
+    | name=cident K_IS K_NOT K_NULL {
+          $clauses.emplace_back(make_shared<cql3::single_column_relation>(std::move(name), cql3::operator_type::IS_NOT, cql3::constants::NULL_LITERAL)); }
    | name=cident K_IN marker=inMarker
        { $clauses.emplace_back(make_shared<cql3::single_column_relation>(std::move(name), cql3::operator_type::IN, std::move(marker))); }
    | name=cident K_IN in_values=singleColumnInValues
@@ -1404,12 +1467,16 @@ native_type returns [shared_ptr<cql3_type> t]
    | K_FLOAT     { $t = cql3_type::float_; }
    | K_INET      { $t = cql3_type::inet; }
    | K_INT       { $t = cql3_type::int_; }
+    | K_SMALLINT  { $t = cql3_type::smallint; }
    | K_TEXT      { $t = cql3_type::text; }
    | K_TIMESTAMP { $t = cql3_type::timestamp; }
+    | K_TINYINT   { $t = cql3_type::tinyint; }
    | K_UUID      { $t = cql3_type::uuid; }
    | K_VARCHAR   { $t = cql3_type::varchar; }
    | K_VARINT    { $t = cql3_type::varint; }
    | K_TIMEUUID  { $t = cql3_type::timeuuid; }
+    | K_DATE      { $t = cql3_type::date; }
+    | K_TIME      { $t = cql3_type::time; }
    ;

 collection_type returns [shared_ptr<cql3::cql3_type::raw> pt]
@@ -1483,6 +1550,8 @@ basic_unreserved_keyword returns [sstring str]
        | K_DISTINCT
        | K_CONTAINS
        | K_STATIC
+        | K_FROZEN
+        | K_TUPLE
        | K_FUNCTION
        | K_AGGREGATE
        | K_SFUNC
@@ -1528,6 +1597,8 @@ K_KEYSPACE:    ( K E Y S P A C E
 K_KEYSPACES:   K E Y S P A C E S;
 K_COLUMNFAMILY:( C O L U M N F A M I L Y
                 | T A B L E );
+K_MATERIALIZED:M A T E R I A L I Z E D;
+K_VIEW:        V I E W;
 K_INDEX:       I N D E X;
 K_CUSTOM:      C U S T O M;
 K_ON:          O N;
@@ -1551,6 +1622,7 @@ K_DESC:        D E S C;
 K_ALLOW:       A L L O W;
 K_FILTERING:   F I L T E R I N G;
 K_IF:          I F;
+K_IS:          I S;
 K_CONTAINS:    C O N T A I N S;

 K_GRANT:       G R A N T;
@@ -1580,6 +1652,8 @@ K_DOUBLE:      D O U B L E;
 K_FLOAT:       F L O A T;
 K_INET:        I N E T;
 K_INT:         I N T;
+K_SMALLINT:    S M A L L I N T;
+K_TINYINT:     T I N Y I N T;
 K_TEXT:        T E X T;
 K_UUID:        U U I D;
 K_VARCHAR:     V A R C H A R;
@@ -1587,6 +1661,8 @@ K_VARINT:      V A R I N T;
 K_TIMEUUID:    T I M E U U I D;
 K_TOKEN:       T O K E N;
 K_WRITETIME:   W R I T E T I M E;
+K_DATE:        D A T E;
+K_TIME:        T I M E;

 K_NULL:        N U L L;
 K_NOT:         N O T;
@@ -1616,6 +1692,7 @@ K_REPLACE:     R E P L A C E;
 K_DETERMINISTIC: D E T E R M I N I S T I C;

 K_SCYLLA_TIMEUUID_LIST_INDEX: S C Y L L A '_' T I M E U U I D '_' L I S T '_' I N D E X;
+K_SCYLLA_COUNTER_SHARD_LIST: S C Y L L A '_' C O U N T E R '_' S H A R D '_' L I S T; 

 // Case-insensitive alpha characters
 fragment A: ('a'|'A');
--- a/cql3/attributes.cc
+++ b/cql3/attributes.cc
@@ -71,10 +71,12 @@ int64_t attributes::get_timestamp(int64_t now, const query_options& options) {
    }

    auto tval = _timestamp->bind_and_get(options);
-    if (!tval) {
+    if (tval.is_null()) {
        throw exceptions::invalid_request_exception("Invalid null value of timestamp");
    }
-
+    if (tval.is_unset_value()) {
+        return now;
+    }
    try {
        data_type_for<int64_t>()->validate(*tval);
    } catch (marshal_exception e) {
@@ -88,10 +90,12 @@ int32_t attributes::get_time_to_live(const query_options& options) {
        return 0;

    auto tval = _time_to_live->bind_and_get(options);
-    if (!tval) {
+    if (tval.is_null()) {
        throw exceptions::invalid_request_exception("Invalid null value of TTL");
    }
-
+    if (tval.is_unset_value()) {
+        return 0;
+    }
    try {
        data_type_for<int32_t>()->validate(*tval);
    }
--- a/cql3/column_identifier.cc
+++ b/cql3/column_identifier.cc
@@ -23,6 +23,8 @@
 #include "exceptions/exceptions.hh"
 #include "cql3/selection/simple_selector.hh"

+#include <regex>
+
 namespace cql3 {

 column_identifier::column_identifier(sstring raw_text, bool keep_case) {
@@ -59,6 +61,17 @@ sstring column_identifier::to_string() const {
    return _text;
 }

+sstring column_identifier::to_cql_string() const {
+    static const std::regex unquoted_identifier_re("[a-z][a-z0-9_]*");
+    if (std::regex_match(_text.begin(), _text.end(), unquoted_identifier_re)) {
+        return _text;
+    }
+    static const std::regex double_quote_re("\"");
+    std::string result = _text;
+    std::regex_replace(result, double_quote_re, "\"\"");
+    return '"' + result + '"';
+}
+
 column_identifier::raw::raw(sstring raw_text, bool keep_case)
    : _raw_text{raw_text}
    , _text{raw_text}
--- a/cql3/column_identifier.hh
+++ b/cql3/column_identifier.hh
@@ -47,7 +47,7 @@

 #include <algorithm>
 #include <functional>
-#include <iostream>
+#include <iosfwd>

 namespace cql3 {

@@ -80,6 +80,8 @@ public:

    sstring to_string() const;

+    sstring to_cql_string() const;
+
    friend std::ostream& operator<<(std::ostream& out, const column_identifier& i) {
        return out << i._text;
    }
--- a/cql3/constants.cc
+++ b/cql3/constants.cc
@@ -44,6 +44,7 @@

 namespace cql3 {

+thread_local const ::shared_ptr<constants::value> constants::UNSET_VALUE = ::make_shared<constants::value>(cql3::raw_value::make_unset_value());
 thread_local const ::shared_ptr<term::raw> constants::NULL_LITERAL = ::make_shared<constants::null_literal>();
 thread_local const ::shared_ptr<terminal> constants::null_literal::NULL_VALUE = ::make_shared<constants::null_literal::null_value>();

@@ -97,7 +98,9 @@ constants::literal::test_assignment(database& db, const sstring& keyspace, ::sha
                    cql3_type::kind::TEXT,
                    cql3_type::kind::INET,
                    cql3_type::kind::VARCHAR,
-                    cql3_type::kind::TIMESTAMP>::contains(kind)) {
+                    cql3_type::kind::TIMESTAMP,
+                    cql3_type::kind::DATE,
+                    cql3_type::kind::TIME>::contains(kind)) {
                return assignment_testable::test_result::WEAKLY_ASSIGNABLE;
            }
            break;
@@ -109,7 +112,10 @@ constants::literal::test_assignment(database& db, const sstring& keyspace, ::sha
                    cql3_type::kind::DOUBLE,
                    cql3_type::kind::FLOAT,
                    cql3_type::kind::INT,
+                    cql3_type::kind::SMALLINT,
                    cql3_type::kind::TIMESTAMP,
+                    cql3_type::kind::DATE,
+                    cql3_type::kind::TINYINT,
                    cql3_type::kind::VARINT>::contains(kind)) {
                return assignment_testable::test_result::WEAKLY_ASSIGNABLE;
            }
@@ -150,10 +156,10 @@ constants::literal::prepare(database& db, const sstring& keyspace, ::shared_ptr<
        throw exceptions::invalid_request_exception(sprint("Invalid %s constant (%s) for \"%s\" of type %s",
            _type, _text, *receiver->name, receiver->type->as_cql3_type()->to_string()));
    }
-    return ::make_shared<value>(std::experimental::make_optional(parsed_value(receiver->type)));
+    return ::make_shared<value>(cql3::raw_value::make_value(parsed_value(receiver->type)));
 }

-void constants::deleter::execute(mutation& m, const exploded_clustering_prefix& prefix, const update_parameters& params) {
+void constants::deleter::execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) {
    if (column.type->is_multi_cell()) {
        collection_type_impl::mutation coll_m;
        coll_m.tomb = params.make_tombstone();
--- a/cql3/constants.hh
+++ b/cql3/constants.hh
@@ -44,6 +44,7 @@
 #include "cql3/abstract_marker.hh"
 #include "cql3/update_parameters.hh"
 #include "cql3/operation.hh"
+#include "cql3/values.hh"
 #include "cql3/term.hh"
 #include "core/shared_ptr.hh"

@@ -67,18 +68,20 @@ public:
    */
    class value : public terminal {
    public:
-        bytes_opt _bytes;
-        value(bytes_opt bytes_) : _bytes(std::move(bytes_)) {}
-        virtual bytes_opt get(const query_options& options) override { return _bytes; }
-        virtual bytes_view_opt bind_and_get(const query_options& options) override { return as_bytes_view_opt(_bytes); }
+        cql3::raw_value _bytes;
+        value(cql3::raw_value bytes_) : _bytes(std::move(bytes_)) {}
+        virtual cql3::raw_value get(const query_options& options) override { return _bytes; }
+        virtual cql3::raw_value_view bind_and_get(const query_options& options) override { return _bytes.to_view(); }
        virtual sstring to_string() const override { return to_hex(*_bytes); }
    };

+    static thread_local const ::shared_ptr<value> UNSET_VALUE;
+
    class null_literal final : public term::raw {
    private:
        class null_value final : public value {
        public:
-            null_value() : value({}) {}
+            null_value() : value(cql3::raw_value::make_null()) {}
            virtual ::shared_ptr<terminal> bind(const query_options& options) override { return {}; }
            virtual sstring to_string() const override { return "null"; }
        };
@@ -169,14 +172,13 @@ public:
            assert(!_receiver->type->is_collection());
        }

-        virtual bytes_view_opt bind_and_get(const query_options& options) override {
+        virtual cql3::raw_value_view bind_and_get(const query_options& options) override {
            try {
                auto value = options.get_value_at(_bind_index);
                if (value) {
                    _receiver->type->validate(*value);
-                    return *value;
                }
-                return std::experimental::nullopt;
+                return value;
            } catch (const marshal_exception& e) {
                throw exceptions::invalid_request_exception(e.what());
            }
@@ -187,7 +189,7 @@ public:
            if (!bytes) {
                return ::shared_ptr<terminal>{};
            }
-            return ::make_shared<constants::value>(std::move(to_bytes_opt(*bytes)));
+            return ::make_shared<constants::value>(std::move(cql3::raw_value::make_value(to_bytes(*bytes))));
        }
    };

@@ -195,54 +197,48 @@ public:
    public:
        using operation::operation;

-        virtual void execute(mutation& m, const exploded_clustering_prefix& prefix, const update_parameters& params) override {
+        virtual void execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) override {
            auto value = _t->bind_and_get(params._options);
-            auto cell = value ? make_cell(*value, params) : make_dead_cell(params);
-            m.set_cell(prefix, column, std::move(cell));
+            if (value.is_null()) {
+                m.set_cell(prefix, column, std::move(make_dead_cell(params)));
+            } else if (value.is_value()) {
+                m.set_cell(prefix, column, std::move(make_cell(*value, params)));
+            }
        }
    };

-#if 0
-    public static class Adder extends Operation
-    {
-        public Adder(ColumnDefinition column, Term t)
-        {
-            super(column, t);
+    struct adder final : operation {
+        using operation::operation;
+
+        virtual void execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) override {
+            auto value = _t->bind_and_get(params._options);
+            if (value.is_null()) {
+                throw exceptions::invalid_request_exception("Invalid null value for counter increment");
+            } else if (value.is_unset_value()) {
+                return;
+            }
+            auto increment = value_cast<int64_t>(long_type->deserialize_value(*value));
+            m.set_cell(prefix, column, make_counter_update_cell(increment, params));
        }
+    };

-        public void execute(ByteBuffer rowKey, ColumnFamily cf, Composite prefix, UpdateParameters params) throws InvalidRequestException
-        {
-            ByteBuffer bytes = t.bindAndGet(params.options);
-            if (bytes == null)
-                throw new InvalidRequestException("Invalid null value for counter increment");
-            long increment = ByteBufferUtil.toLong(bytes);
-            CellName cname = cf.getComparator().create(prefix, column);
-            cf.addColumn(params.makeCounter(cname, increment));
+    struct subtracter final : operation {
+        using operation::operation;
+
+        virtual void execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) override {
+            auto value = _t->bind_and_get(params._options);
+            if (value.is_null()) {
+                throw exceptions::invalid_request_exception("Invalid null value for counter increment");
+            } else if (value.is_unset_value()) {
+                return;
+            }
+            auto increment = value_cast<int64_t>(long_type->deserialize_value(*value));
+            if (increment == std::numeric_limits<int64_t>::min()) {
+                throw exceptions::invalid_request_exception(sprint("The negation of %d overflows supported counter precision (signed 8 bytes integer)", increment));
+            }
+            m.set_cell(prefix, column, make_counter_update_cell(-increment, params));
        }
-    }
-
-    public static class Substracter extends Operation
-    {
-        public Substracter(ColumnDefinition column, Term t)
-        {
-            super(column, t);
-        }
-
-        public void execute(ByteBuffer rowKey, ColumnFamily cf, Composite prefix, UpdateParameters params) throws InvalidRequestException
-        {
-            ByteBuffer bytes = t.bindAndGet(params.options);
-            if (bytes == null)
-                throw new InvalidRequestException("Invalid null value for counter increment");
-
-            long increment = ByteBufferUtil.toLong(bytes);
-            if (increment == Long.MIN_VALUE)
-                throw new InvalidRequestException("The negation of " + increment + " overflows supported counter precision (signed 8 bytes integer)");
-
-            CellName cname = cf.getComparator().create(prefix, column);
-            cf.addColumn(params.makeCounter(cname, -increment));
-        }
-    }
-#endif
+    };

    class deleter : public operation {
    public:
@@ -250,7 +246,7 @@ public:
            : operation(column, {})
        { }

-        virtual void execute(mutation& m, const exploded_clustering_prefix& prefix, const update_parameters& params) override;
+        virtual void execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) override;
    };
 };

--- a/cql3/cql3_type.cc
+++ b/cql3/cql3_type.cc
@@ -19,11 +19,39 @@
 * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
 */

+#include <iostream>
+#include <iterator>
+#include <regex>
+
 #include "cql3_type.hh"
+#include "cql3/util.hh"
 #include "ut_name.hh"

 namespace cql3 {

+sstring cql3_type::to_string() const {
+    if (_type->is_user_type()) {
+        return "frozen<" + util::maybe_quote(_name) + ">";
+    }
+    if (_type->is_tuple()) {
+        return "frozen<" + _name + ">";
+    }
+    return _name;
+}
+
+shared_ptr<cql3_type> cql3_type::raw::prepare(database& db, const sstring& keyspace) {
+    try {
+        auto&& ks = db.find_keyspace(keyspace);
+        return prepare_internal(keyspace, ks.metadata()->user_types());
+    } catch (no_such_keyspace& nsk) {
+        throw exceptions::invalid_request_exception("Unknown keyspace " + keyspace);
+    }
+}
+
+bool cql3_type::raw::references_user_type(const sstring& name) const {
+    return false;
+}
+
 class cql3_type::raw_type : public raw {
 private:
    shared_ptr<cql3_type> _type;
@@ -35,6 +63,9 @@ public:
    virtual shared_ptr<cql3_type> prepare(database& db, const sstring& keyspace) {
        return _type;
    }
+    shared_ptr<cql3_type> prepare_internal(const sstring&, lw_shared_ptr<user_types_metadata>) override {
+        return _type;
+    }

    virtual bool supports_freezing() const {
        return false;
@@ -76,7 +107,7 @@ public:
        return true;
    }

-    virtual shared_ptr<cql3_type> prepare(database& db, const sstring& keyspace) override {
+    virtual shared_ptr<cql3_type> prepare_internal(const sstring& keyspace, lw_shared_ptr<user_types_metadata> user_types) override {
        assert(_values); // "Got null values type for a collection";

        if (!_frozen && _values->supports_freezing() && !_values->_frozen) {
@@ -93,16 +124,20 @@ public:
        }

        if (_kind == &collection_type_impl::kind::list) {
-            return make_shared(cql3_type(to_string(), list_type_impl::get_instance(_values->prepare(db, keyspace)->get_type(), !_frozen), false));
+            return make_shared(cql3_type(to_string(), list_type_impl::get_instance(_values->prepare_internal(keyspace, user_types)->get_type(), !_frozen), false));
        } else if (_kind == &collection_type_impl::kind::set) {
-            return make_shared(cql3_type(to_string(), set_type_impl::get_instance(_values->prepare(db, keyspace)->get_type(), !_frozen), false));
+            return make_shared(cql3_type(to_string(), set_type_impl::get_instance(_values->prepare_internal(keyspace, user_types)->get_type(), !_frozen), false));
        } else if (_kind == &collection_type_impl::kind::map) {
            assert(_keys); // "Got null keys type for a collection";
-            return make_shared(cql3_type(to_string(), map_type_impl::get_instance(_keys->prepare(db, keyspace)->get_type(), _values->prepare(db, keyspace)->get_type(), !_frozen), false));
+            return make_shared(cql3_type(to_string(), map_type_impl::get_instance(_keys->prepare_internal(keyspace, user_types)->get_type(), _values->prepare_internal(keyspace, user_types)->get_type(), !_frozen), false));
        }
        abort();
    }

+    bool references_user_type(const sstring& name) const override {
+        return (_keys && _keys->references_user_type(name)) || _values->references_user_type(name);
+    }
+
    virtual sstring to_string() const override {
        sstring start = _frozen ? "frozen<" : "";
        sstring end = _frozen ? ">" : "";
@@ -132,7 +167,7 @@ public:
        _frozen = true;
    }

-    virtual shared_ptr<cql3_type> prepare(database& db, const sstring& keyspace) override {
+    virtual shared_ptr<cql3_type> prepare_internal(const sstring& keyspace, lw_shared_ptr<user_types_metadata> user_types) override {
        if (_name.has_keyspace()) {
            // The provided keyspace is the one of the current statement this is part of. If it's different from the keyspace of
            // the UTName, we reject since we want to limit user types to their own keyspace (see #6643)
@@ -144,23 +179,23 @@ public:
        } else {
            _name.set_keyspace(keyspace);
        }
-
+        if (!user_types) {
+            // bootstrap mode.
+            throw exceptions::invalid_request_exception(sprint("Unknown type %s", _name));
+        }
        try {
-            auto&& ks = db.find_keyspace(_name.get_keyspace());
-            try {
-                auto&& type = ks.metadata()->user_types()->get_type(_name.get_user_type_name());
-                if (!_frozen) {
-                    throw exceptions::invalid_request_exception("Non-frozen User-Defined types are not supported, please use frozen<>");
-                }
-                return make_shared<cql3_type>(_name.to_string(), std::move(type));
-            } catch (std::out_of_range& e) {
-                throw exceptions::invalid_request_exception(sprint("Unknown type %s", _name));
+            auto&& type = user_types->get_type(_name.get_user_type_name());
+            if (!_frozen) {
+                throw exceptions::invalid_request_exception("Non-frozen User-Defined types are not supported, please use frozen<>");
            }
-        } catch (no_such_keyspace& nsk) {
-            throw exceptions::invalid_request_exception("Unknown keyspace " + _name.get_keyspace());
+            return make_shared<cql3_type>(_name.to_string(), std::move(type));
+        } catch (std::out_of_range& e) {
+            throw exceptions::invalid_request_exception(sprint("Unknown type %s", _name));
        }
    }
-
+    bool references_user_type(const sstring& name) const override {
+        return _name.get_string_type_name() == name;
+    }
    virtual bool supports_freezing() const override {
        return true;
    }
@@ -191,7 +226,7 @@ public:
        }
        _frozen = true;
    }
-    virtual shared_ptr<cql3_type> prepare(database& db, const sstring& keyspace) override {
+    virtual shared_ptr<cql3_type> prepare_internal(const sstring& keyspace, lw_shared_ptr<user_types_metadata> user_types) override {
        if (!_frozen) {
            freeze();
        }
@@ -200,10 +235,17 @@ public:
            if (t->is_counter()) {
                throw exceptions::invalid_request_exception("Counters are not allowed inside tuples");
            }
-            ts.push_back(t->prepare(db, keyspace)->get_type());
+            ts.push_back(t->prepare_internal(keyspace, user_types)->get_type());
        }
        return make_cql3_tuple_type(tuple_type_impl::get_instance(std::move(ts)));
    }
+
+    bool references_user_type(const sstring& name) const override {
+        return std::any_of(_types.begin(), _types.end(), [&name](auto t) {
+            return t->references_user_type(name);
+        });
+    }
+
    virtual sstring to_string() const override {
        return sprint("tuple<%s>", join(", ", _types));
    }
@@ -271,13 +313,18 @@ thread_local shared_ptr<cql3_type> cql3_type::bigint = make("bigint", long_type,
 thread_local shared_ptr<cql3_type> cql3_type::blob = make("blob", bytes_type, cql3_type::kind::BLOB);
 thread_local shared_ptr<cql3_type> cql3_type::boolean = make("boolean", boolean_type, cql3_type::kind::BOOLEAN);
 thread_local shared_ptr<cql3_type> cql3_type::double_ = make("double", double_type, cql3_type::kind::DOUBLE);
+thread_local shared_ptr<cql3_type> cql3_type::empty = make("empty", empty_type, cql3_type::kind::EMPTY);
 thread_local shared_ptr<cql3_type> cql3_type::float_ = make("float", float_type, cql3_type::kind::FLOAT);
 thread_local shared_ptr<cql3_type> cql3_type::int_ = make("int", int32_type, cql3_type::kind::INT);
+thread_local shared_ptr<cql3_type> cql3_type::smallint = make("smallint", short_type, cql3_type::kind::SMALLINT);
 thread_local shared_ptr<cql3_type> cql3_type::text = make("text", utf8_type, cql3_type::kind::TEXT);
 thread_local shared_ptr<cql3_type> cql3_type::timestamp = make("timestamp", timestamp_type, cql3_type::kind::TIMESTAMP);
+thread_local shared_ptr<cql3_type> cql3_type::tinyint = make("tinyint", byte_type, cql3_type::kind::TINYINT);
 thread_local shared_ptr<cql3_type> cql3_type::uuid = make("uuid", uuid_type, cql3_type::kind::UUID);
 thread_local shared_ptr<cql3_type> cql3_type::varchar = make("varchar", utf8_type, cql3_type::kind::TEXT);
 thread_local shared_ptr<cql3_type> cql3_type::timeuuid = make("timeuuid", timeuuid_type, cql3_type::kind::TIMEUUID);
+thread_local shared_ptr<cql3_type> cql3_type::date = make("date", simple_date_type, cql3_type::kind::DATE);
+thread_local shared_ptr<cql3_type> cql3_type::time = make("time", time_type, cql3_type::kind::TIME);
 thread_local shared_ptr<cql3_type> cql3_type::inet = make("inet", inet_addr_type, cql3_type::kind::INET);
 thread_local shared_ptr<cql3_type> cql3_type::varint = make("varint", varint_type, cql3_type::kind::VARINT);
 thread_local shared_ptr<cql3_type> cql3_type::decimal = make("decimal", decimal_type, cql3_type::kind::DECIMAL);
@@ -293,15 +340,20 @@ cql3_type::values() {
        cql3_type::counter,
        cql3_type::decimal,
        cql3_type::double_,
+        cql3_type::empty,
        cql3_type::float_,
-        cql3_type:inet,
+        cql3_type::inet,
        cql3_type::int_,
+        cql3_type::smallint,
        cql3_type::text,
        cql3_type::timestamp,
+        cql3_type::tinyint,
        cql3_type::uuid,
        cql3_type::varchar,
        cql3_type::varint,
        cql3_type::timeuuid,
+        cql3_type::date,
+        cql3_type::time,
    };
    return v;
 }
@@ -321,5 +373,23 @@ operator<<(std::ostream& os, const cql3_type::raw& r) {
    return os << r.to_string();
 }

+namespace util {
+
+sstring maybe_quote(const sstring& s) {
+    static const std::regex unquoted("\\w*");
+    static const std::regex double_quote("\"");
+
+    if (std::regex_match(s.begin(), s.end(), unquoted)) {
+        return s;
+    }
+    std::ostringstream ss;
+    ss << "\"";
+    std::regex_replace(std::ostreambuf_iterator<char>(ss), s.begin(), s.end(), double_quote, "\"\"");
+    ss << "\"";
+    return ss.str();
+}
+
+}
+
 }

--- a/cql3/cql3_type.hh
+++ b/cql3/cql3_type.hh
@@ -47,6 +47,7 @@
 #include "enum_set.hh"

 class database;
+class user_types_metadata;

 namespace cql3 {

@@ -63,19 +64,22 @@ public:
    bool is_counter() const { return _type->is_counter(); }
    bool is_native() const { return _native; }
    data_type get_type() const { return _type; }
-    sstring to_string() const { return _name; }
+    sstring to_string() const;

    // For UserTypes, we need to know the current keyspace to resolve the
    // actual type used, so Raw is a "not yet prepared" CQL3Type.
    class raw {
    public:
+        virtual ~raw() {}
        bool _frozen = false;
        virtual bool supports_freezing() const = 0;
        virtual bool is_collection() const;
        virtual bool is_counter() const;
+        virtual bool references_user_type(const sstring&) const;
        virtual std::experimental::optional<sstring> keyspace() const;
        virtual void freeze();
-        virtual shared_ptr<cql3_type> prepare(database& db, const sstring& keyspace) = 0;
+        virtual shared_ptr<cql3_type> prepare_internal(const sstring& keyspace, lw_shared_ptr<user_types_metadata>) = 0;
+        virtual shared_ptr<cql3_type> prepare(database& db, const sstring& keyspace);
        static shared_ptr<raw> from(shared_ptr<cql3_type> type);
        static shared_ptr<raw> user_type(ut_name name);
        static shared_ptr<raw> map(shared_ptr<raw> t1, shared_ptr<raw> t2);
@@ -98,7 +102,7 @@ private:

 public:
    enum class kind : int8_t {
-        ASCII, BIGINT, BLOB, BOOLEAN, COUNTER, DECIMAL, DOUBLE, FLOAT, INT, INET, TEXT, TIMESTAMP, UUID, VARCHAR, VARINT, TIMEUUID
+        ASCII, BIGINT, BLOB, BOOLEAN, COUNTER, DECIMAL, DOUBLE, EMPTY, FLOAT, INT, SMALLINT, TINYINT, INET, TEXT, TIMESTAMP, UUID, VARCHAR, VARINT, TIMEUUID, DATE, TIME
    };
    using kind_enum = super_enum<kind,
        kind::ASCII,
@@ -108,15 +112,20 @@ public:
        kind::COUNTER,
        kind::DECIMAL,
        kind::DOUBLE,
+        kind::EMPTY,
        kind::FLOAT,
        kind::INET,
        kind::INT,
+        kind::SMALLINT,
+        kind::TINYINT,
        kind::TEXT,
        kind::TIMESTAMP,
        kind::UUID,
        kind::VARCHAR,
        kind::VARINT,
-        kind::TIMEUUID>;
+        kind::TIMEUUID,
+        kind::DATE,
+        kind::TIME>;
    using kind_enum_set = enum_set<kind_enum>;
 private:
    std::experimental::optional<kind_enum_set::prepared> _kind;
@@ -129,13 +138,18 @@ public:
    static thread_local shared_ptr<cql3_type> blob;
    static thread_local shared_ptr<cql3_type> boolean;
    static thread_local shared_ptr<cql3_type> double_;
+    static thread_local shared_ptr<cql3_type> empty;
    static thread_local shared_ptr<cql3_type> float_;
    static thread_local shared_ptr<cql3_type> int_;
+    static thread_local shared_ptr<cql3_type> smallint;
    static thread_local shared_ptr<cql3_type> text;
    static thread_local shared_ptr<cql3_type> timestamp;
+    static thread_local shared_ptr<cql3_type> tinyint;
    static thread_local shared_ptr<cql3_type> uuid;
    static thread_local shared_ptr<cql3_type> varchar;
    static thread_local shared_ptr<cql3_type> timeuuid;
+    static thread_local shared_ptr<cql3_type> date;
+    static thread_local shared_ptr<cql3_type> time;
    static thread_local shared_ptr<cql3_type> inet;
    static thread_local shared_ptr<cql3_type> varint;
    static thread_local shared_ptr<cql3_type> decimal;
--- a/cql3/cql_statement.hh
+++ b/cql3/cql_statement.hh
@@ -46,7 +46,7 @@
 #include "service/storage_proxy.hh"
 #include "cql3/query_options.hh"

-namespace transport {
+namespace cql_transport {

 namespace messages {

@@ -89,7 +89,7 @@ public:
     * @param state the current query state
     * @param options options for this query (consistency, variables, pageSize, ...)
     */
-    virtual future<::shared_ptr<transport::messages::result_message>>
+    virtual future<::shared_ptr<cql_transport::messages::result_message>>
        execute(distributed<service::storage_proxy>& proxy, service::query_state& state, const query_options& options) = 0;

    /**
@@ -97,7 +97,7 @@ public:
     *
     * @param state the current query state
     */
-    virtual future<::shared_ptr<transport::messages::result_message>>
+    virtual future<::shared_ptr<cql_transport::messages::result_message>>
        execute_internal(distributed<service::storage_proxy>& proxy, service::query_state& state, const query_options& options) = 0;

    virtual bool uses_function(const sstring& ks_name, const sstring& function_name) const = 0;
--- a/cql3/error_listener.hh
+++ b/cql3/error_listener.hh
@@ -41,6 +41,7 @@

 #pragma once

+#include "seastarx.hh"
 #include <seastar/core/sstring.hh>
 #include <antlr3.hpp>

--- a/cql3/functions/abstract_function.hh
+++ b/cql3/functions/abstract_function.hh
@@ -43,7 +43,7 @@

 #include "types.hh"
 #include <vector>
-#include <iostream>
+#include <iosfwd>
 #include <boost/functional/hash.hpp>

 namespace cql3 {
--- a/cql3/functions/function_call.hh
+++ b/cql3/functions/function_call.hh
@@ -59,13 +59,13 @@ public:
    virtual bool uses_function(const sstring& ks_name, const sstring& function_name) const override;
    virtual void collect_marker_specification(shared_ptr<variable_specifications> bound_names) override;
    virtual shared_ptr<terminal> bind(const query_options& options) override;
-    virtual bytes_view_opt bind_and_get(const query_options& options) override;
+    virtual cql3::raw_value_view bind_and_get(const query_options& options) override;
 private:
    static bytes_opt execute_internal(cql_serialization_format sf, scalar_function& fun, std::vector<bytes_opt> params);
 public:
    virtual bool contains_bind_marker() const override;
 private:
-    static shared_ptr<terminal> make_terminal(shared_ptr<function> fun, bytes_opt result, cql_serialization_format sf);
+    static shared_ptr<terminal> make_terminal(shared_ptr<function> fun, cql3::raw_value result, cql_serialization_format sf);
 public:
    class raw : public term::raw {
        function_name _name;
--- a/cql3/functions/function_name.hh
+++ b/cql3/functions/function_name.hh
@@ -43,7 +43,7 @@

 #include "core/sstring.hh"
 #include "db/system_keyspace.hh"
-#include <iostream>
+#include <iosfwd>
 #include <functional>

 namespace cql3 {
--- a/cql3/functions/functions.cc
+++ b/cql3/functions/functions.cc
@@ -67,6 +67,18 @@ functions::init() {
    declare(aggregate_fcts::make_max_function<int64_t>());
    declare(aggregate_fcts::make_min_function<int64_t>());

+    declare(aggregate_fcts::make_count_function<float>());
+    declare(aggregate_fcts::make_max_function<float>());
+    declare(aggregate_fcts::make_min_function<float>());
+
+    declare(aggregate_fcts::make_count_function<double>());
+    declare(aggregate_fcts::make_max_function<double>());
+    declare(aggregate_fcts::make_min_function<double>());
+
+    declare(aggregate_fcts::make_count_function<sstring>());
+    declare(aggregate_fcts::make_max_function<sstring>());
+    declare(aggregate_fcts::make_min_function<sstring>());
+
    //FIXME:
    //declare(aggregate_fcts::make_count_function<bytes>());
    //declare(aggregate_fcts::make_max_function<bytes>());
@@ -78,15 +90,17 @@ functions::init() {
    declare(make_blob_as_varchar_fct());
    declare(aggregate_fcts::make_sum_function<int32_t>());
    declare(aggregate_fcts::make_sum_function<int64_t>());
-    declare(aggregate_fcts::make_avg_function<int32_t>());
-    declare(aggregate_fcts::make_avg_function<int64_t>());
+    declare(aggregate_fcts::make_sum_function<float>());
+    declare(aggregate_fcts::make_sum_function<double>());
 #if 0
-    declare(AggregateFcts.sumFunctionForFloat);
-    declare(AggregateFcts.sumFunctionForDouble);
    declare(AggregateFcts.sumFunctionForDecimal);
    declare(AggregateFcts.sumFunctionForVarint);
-    declare(AggregateFcts.avgFunctionForFloat);
-    declare(AggregateFcts.avgFunctionForDouble);
+#endif
+    declare(aggregate_fcts::make_avg_function<int32_t>());
+    declare(aggregate_fcts::make_avg_function<int64_t>());
+    declare(aggregate_fcts::make_avg_function<float>());
+    declare(aggregate_fcts::make_avg_function<double>());
+#if 0
    declare(AggregateFcts.avgFunctionForVarint);
    declare(AggregateFcts.avgFunctionForDecimal);
 #endif
@@ -299,10 +313,10 @@ function_call::collect_marker_specification(shared_ptr<variable_specifications>

 shared_ptr<terminal>
 function_call::bind(const query_options& options) {
-    return make_terminal(_fun, to_bytes_opt(bind_and_get(options)), options.get_cql_serialization_format());
+    return make_terminal(_fun, cql3::raw_value::make_value(bind_and_get(options)), options.get_cql_serialization_format());
 }

-bytes_view_opt
+cql3::raw_value_view
 function_call::bind_and_get(const query_options& options) {
    std::vector<bytes_opt> buffers;
    buffers.reserve(_terms.size());
@@ -316,7 +330,7 @@ function_call::bind_and_get(const query_options& options) {
        buffers.push_back(std::move(to_bytes_opt(val)));
    }
    auto result = execute_internal(options.get_cql_serialization_format(), *_fun, std::move(buffers));
-    return options.make_temporary(result);
+    return options.make_temporary(cql3::raw_value::make_value(result));
 }

 bytes_opt
@@ -347,7 +361,7 @@ function_call::contains_bind_marker() const {
 }

 shared_ptr<terminal>
-function_call::make_terminal(shared_ptr<function> fun, bytes_opt result, cql_serialization_format sf)  {
+function_call::make_terminal(shared_ptr<function> fun, cql3::raw_value result, cql_serialization_format sf)  {
    if (!dynamic_pointer_cast<const collection_type_impl>(fun->return_type())) {
        return ::make_shared<constants::value>(std::move(result));
    }
@@ -413,7 +427,7 @@ function_call::raw::prepare(database& db, const sstring& keyspace, ::shared_ptr<
    // If all parameters are terminal and the function is pure, we can
    // evaluate it now, otherwise we'd have to wait execution time
    if (all_terminal && scalar_fun->is_pure()) {
-        return make_terminal(scalar_fun, execute(*scalar_fun, parameters), query_options::DEFAULT.get_cql_serialization_format());
+        return make_terminal(scalar_fun, cql3::raw_value::make_value(execute(*scalar_fun, parameters)), query_options::DEFAULT.get_cql_serialization_format());
    } else {
        return ::make_shared<function_call>(scalar_fun, parameters);
    }
@@ -426,7 +440,7 @@ function_call::raw::execute(scalar_function& fun, std::vector<shared_ptr<term>>
    for (auto&& t : parameters) {
        assert(dynamic_cast<terminal*>(t.get()));
        auto&& param = static_cast<terminal*>(t.get())->get(query_options::DEFAULT);
-        buffers.push_back(std::move(param));
+        buffers.push_back(std::move(to_bytes_opt(param)));
    }

    return execute_internal(cql_serialization_format::internal(), fun, buffers);
--- a/cql3/keyspace_element_name.hh
+++ b/cql3/keyspace_element_name.hh
@@ -42,6 +42,7 @@
 #pragma once

 #include "core/sstring.hh"
+#include "seastarx.hh"

 #include <experimental/optional>

--- a/cql3/lists.cc
+++ b/cql3/lists.cc
@@ -111,7 +111,7 @@ lists::literal::test_assignment(database& db, const sstring& keyspace, shared_pt

 sstring
 lists::literal::to_string() const {
-    return ::to_string(_elements);
+    return std::to_string(_elements);
 }

 lists::value
@@ -133,9 +133,9 @@ lists::value::from_serialized(bytes_view v, list_type type, cql_serialization_fo
    }
 }

-bytes_opt
+cql3::raw_value
 lists::value::get(const query_options& options) {
-    return get_with_protocol_version(options.get_cql_serialization_format());
+    return cql3::raw_value::make_value(get_with_protocol_version(options.get_cql_serialization_format()));
 }

 bytes
@@ -196,10 +196,12 @@ lists::delayed_value::bind(const query_options& options) {
    for (auto&& t : _elements) {
        auto bo = t->bind_and_get(options);

-        if (!bo) {
+        if (bo.is_null()) {
            throw exceptions::invalid_request_exception("null is not supported inside collections");
        }
-
+        if (bo.is_unset_value()) {
+            return constants::UNSET_VALUE;
+        }
        // We don't support value > 64K because the serialization format encode the length as an unsigned short.
        if (bo->size() > std::numeric_limits<uint16_t>::max()) {
            throw exceptions::invalid_request_exception(sprint("List value is too long. List values are limited to %d bytes but %d bytes value provided",
@@ -216,8 +218,10 @@ lists::delayed_value::bind(const query_options& options) {
 lists::marker::bind(const query_options& options) {
    const auto& value = options.get_value_at(_bind_index);
    auto ltype = static_pointer_cast<const list_type_impl>(_receiver->type);
-    if (!value) {
+    if (value.is_null()) {
        return nullptr;
+    } else if (value.is_unset_value()) {
+        return constants::UNSET_VALUE;
    } else {
        return make_shared(value::from_serialized(*value, std::move(ltype), options.get_cql_serialization_format()));
    }
@@ -238,7 +242,11 @@ lists::precision_time::get_next(db_clock::time_point millis) {
 }

 void
-lists::setter::execute(mutation& m, const exploded_clustering_prefix& prefix, const update_parameters& params) {
+lists::setter::execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) {
+    const auto& value = _t->bind(params._options);
+    if (value == constants::UNSET_VALUE) {
+        return;
+    }
    if (column.type->is_multi_cell()) {
        // delete + append
        collection_type_impl::mutation mut;
@@ -247,7 +255,7 @@ lists::setter::execute(mutation& m, const exploded_clustering_prefix& prefix, co
        auto col_mut = ctype->serialize_mutation_form(std::move(mut));
        m.set_cell(prefix, column, std::move(col_mut));
    }
-    do_append(_t, m, prefix, column, params);
+    do_append(value, m, prefix, column, params);
 }

 bool
@@ -262,24 +270,24 @@ lists::setter_by_index::collect_marker_specification(shared_ptr<variable_specifi
 }

 void
-lists::setter_by_index::execute(mutation& m, const exploded_clustering_prefix& prefix, const update_parameters& params) {
+lists::setter_by_index::execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) {
    // we should not get here for frozen lists
    assert(column.type->is_multi_cell()); // "Attempted to set an individual element on a frozen list";

-    std::experimental::optional<clustering_key> row_key;
-    if (!column.is_static()) {
-        row_key = clustering_key::from_clustering_prefix(*params._schema, prefix);
-    }
-
    auto index = _idx->bind_and_get(params._options);
-    auto value = _t->bind_and_get(params._options);
-
-    if (!index) {
+    if (index.is_null()) {
        throw exceptions::invalid_request_exception("Invalid null value for list index");
    }
+    if (index.is_unset_value()) {
+        throw exceptions::invalid_request_exception("Invalid unset value for list index");
+    }
+    auto value = _t->bind_and_get(params._options);
+    if (value.is_unset_value()) {
+        return;
+    }

    auto idx = net::ntoh(int32_t(*unaligned_cast<int32_t>(index->begin())));
-    auto&& existing_list_opt = params.get_prefetched_list(m.key(), std::move(row_key), column);
+    auto&& existing_list_opt = params.get_prefetched_list(m.key().view(), prefix.view(), column);
    if (!existing_list_opt) {
        throw exceptions::invalid_request_exception("Attempted to set an element on a list which is null");
    }
@@ -314,15 +322,10 @@ lists::setter_by_uuid::requires_read() {
 }

 void
-lists::setter_by_uuid::execute(mutation& m, const exploded_clustering_prefix& prefix, const update_parameters& params) {
+lists::setter_by_uuid::execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) {
    // we should not get here for frozen lists
    assert(column.type->is_multi_cell()); // "Attempted to set an individual element on a frozen list";

-    std::experimental::optional<clustering_key> row_key;
-    if (!column.is_static()) {
-        row_key = clustering_key::from_clustering_prefix(*params._schema, prefix);
-    }
-
    auto index = _idx->bind_and_get(params._options);
    auto value = _t->bind_and_get(params._options);

@@ -342,24 +345,27 @@ lists::setter_by_uuid::execute(mutation& m, const exploded_clustering_prefix& pr
 }

 void
-lists::appender::execute(mutation& m, const exploded_clustering_prefix& prefix, const update_parameters& params) {
+lists::appender::execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) {
+    const auto& value = _t->bind(params._options);
+    if (value == constants::UNSET_VALUE) {
+        return;
+    }
    assert(column.type->is_multi_cell()); // "Attempted to append to a frozen list";
-    do_append(_t, m, prefix, column, params);
+    do_append(value, m, prefix, column, params);
 }

 void
-lists::do_append(shared_ptr<term> t,
+lists::do_append(shared_ptr<term> value,
        mutation& m,
-        const exploded_clustering_prefix& prefix,
+        const clustering_key_prefix& prefix,
        const column_definition& column,
        const update_parameters& params) {
-    auto&& value = t->bind(params._options);
    auto&& list_value = dynamic_pointer_cast<lists::value>(value);
    auto&& ltype = dynamic_pointer_cast<const list_type_impl>(column.type);
    if (column.type->is_multi_cell()) {
        // If we append null, do nothing. Note that for Setter, we've
        // already removed the previous value so we're good here too
-        if (!value) {
+        if (!value || value == constants::UNSET_VALUE) {
            return;
        }

@@ -385,10 +391,10 @@ lists::do_append(shared_ptr<term> t,
 }

 void
-lists::prepender::execute(mutation& m, const exploded_clustering_prefix& prefix, const update_parameters& params) {
+lists::prepender::execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) {
    assert(column.type->is_multi_cell()); // "Attempted to prepend to a frozen list";
    auto&& value = _t->bind(params._options);
-    if (!value) {
+    if (!value || value == constants::UNSET_VALUE) {
        return;
    }

@@ -417,15 +423,10 @@ lists::discarder::requires_read() {
 }

 void
-lists::discarder::execute(mutation& m, const exploded_clustering_prefix& prefix, const update_parameters& params) {
+lists::discarder::execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) {
    assert(column.type->is_multi_cell()); // "Attempted to delete from a frozen list";

-    std::experimental::optional<clustering_key> row_key;
-    if (!column.is_static()) {
-        row_key = clustering_key::from_clustering_prefix(*params._schema, prefix);
-    }
-
-    auto&& existing_list = params.get_prefetched_list(m.key(), std::move(row_key), column);
+    auto&& existing_list = params.get_prefetched_list(m.key().view(), prefix.view(), column);
    // We want to call bind before possibly returning to reject queries where the value provided is not a list.
    auto&& value = _t->bind(params._options);

@@ -441,7 +442,7 @@ lists::discarder::execute(mutation& m, const exploded_clustering_prefix& prefix,
        return;
    }

-    if (!value) {
+    if (!value || value == constants::UNSET_VALUE) {
        return;
    }

@@ -474,22 +475,21 @@ lists::discarder_by_index::requires_read() {
 }

 void
-lists::discarder_by_index::execute(mutation& m, const exploded_clustering_prefix& prefix, const update_parameters& params) {
+lists::discarder_by_index::execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) {
    assert(column.type->is_multi_cell()); // "Attempted to delete an item by index from a frozen list";
    auto&& index = _t->bind(params._options);
    if (!index) {
        throw exceptions::invalid_request_exception("Invalid null value for list index");
    }
+    if (index == constants::UNSET_VALUE) {
+        return;
+    }

    auto ltype = static_pointer_cast<const list_type_impl>(column.type);
    auto cvalue = dynamic_pointer_cast<constants::value>(index);
    assert(cvalue);

-    std::experimental::optional<clustering_key> row_key;
-    if (!column.is_static()) {
-        row_key = clustering_key::from_clustering_prefix(*params._schema, prefix);
-    }
-    auto&& existing_list_opt = params.get_prefetched_list(m.key(), std::move(row_key), column);
+    auto&& existing_list_opt = params.get_prefetched_list(m.key().view(), prefix.view(), column);
    int32_t idx = read_simple_exactly<int32_t>(*cvalue->_bytes);
    if (!existing_list_opt) {
        throw exceptions::invalid_request_exception("Attempted to delete an element from a list which is null");
--- a/cql3/lists.hh
+++ b/cql3/lists.hh
@@ -80,7 +80,7 @@ public:
            : _elements(std::move(elements)) {
        }
        static value from_serialized(bytes_view v, list_type type, cql_serialization_format sf);
-        virtual bytes_opt get(const query_options& options) override;
+        virtual cql3::raw_value get(const query_options& options) override;
        virtual bytes get_with_protocol_version(cql_serialization_format sf) override;
        bool equals(shared_ptr<list_type_impl> lt, const value& v);
        virtual std::vector<bytes_opt> get_elements() override;
@@ -146,7 +146,7 @@ public:
        setter(const column_definition& column, shared_ptr<term> t)
                : operation(column, std::move(t)) {
        }
-        virtual void execute(mutation& m, const exploded_clustering_prefix& prefix, const update_parameters& params) override;
+        virtual void execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) override;
    };

    class setter_by_index : public operation {
@@ -158,7 +158,7 @@ public:
        }
        virtual bool requires_read() override;
        virtual void collect_marker_specification(shared_ptr<variable_specifications> bound_names);
-        virtual void execute(mutation& m, const exploded_clustering_prefix& prefix, const update_parameters& params) override;
+        virtual void execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) override;
    };

    class setter_by_uuid : public setter_by_index {
@@ -167,25 +167,25 @@ public:
            : setter_by_index(column, std::move(idx), std::move(t)) {
        }
        virtual bool requires_read() override;
-        virtual void execute(mutation& m, const exploded_clustering_prefix& prefix, const update_parameters& params) override;
+        virtual void execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) override;
    };

    class appender : public operation {
    public:
        using operation::operation;
-        virtual void execute(mutation& m, const exploded_clustering_prefix& prefix, const update_parameters& params) override;
+        virtual void execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) override;
    };

-    static void do_append(shared_ptr<term> t,
+    static void do_append(shared_ptr<term> value,
            mutation& m,
-            const exploded_clustering_prefix& prefix,
+            const clustering_key_prefix& prefix,
            const column_definition& column,
            const update_parameters& params);

    class prepender : public operation {
    public:
        using operation::operation;
-        virtual void execute(mutation& m, const exploded_clustering_prefix& prefix, const update_parameters& params) override;
+        virtual void execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) override;
    };

    class discarder : public operation {
@@ -194,7 +194,7 @@ public:
                : operation(column, std::move(t)) {
        }
        virtual bool requires_read() override;
-        virtual void execute(mutation& m, const exploded_clustering_prefix& prefix, const update_parameters& params) override;
+        virtual void execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) override;
    };

    class discarder_by_index : public operation {
@@ -203,7 +203,7 @@ public:
                : operation(column, std::move(idx)) {
        }
        virtual bool requires_read() override;
-        virtual void execute(mutation& m, const exploded_clustering_prefix& prefix, const update_parameters& params);
+        virtual void execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params);
    };
 };

--- a/cql3/maps.cc
+++ b/cql3/maps.cc
@@ -169,9 +169,9 @@ maps::value::from_serialized(bytes_view value, map_type type, cql_serialization_
    }
 }

-bytes_opt
+cql3::raw_value
 maps::value::get(const query_options& options) {
-    return get_with_protocol_version(options.get_cql_serialization_format());
+    return cql3::raw_value::make_value(get_with_protocol_version(options.get_cql_serialization_format()));
 }

 bytes
@@ -227,18 +227,24 @@ maps::delayed_value::bind(const query_options& options) {

        // We don't support values > 64K because the serialization format encode the length as an unsigned short.
        auto key_bytes = key->bind_and_get(options);
-        if (!key_bytes) {
+        if (key_bytes.is_null()) {
            throw exceptions::invalid_request_exception("null is not supported inside collections");
        }
+        if (key_bytes.is_unset_value()) {
+            throw exceptions::invalid_request_exception("unset value is not supported inside collections");
+        }
        if (key_bytes->size() > std::numeric_limits<uint16_t>::max()) {
            throw exceptions::invalid_request_exception(sprint("Map key is too long. Map keys are limited to %d bytes but %d bytes keys provided",
                                                   std::numeric_limits<uint16_t>::max(),
                                                   key_bytes->size()));
        }
        auto value_bytes = value->bind_and_get(options);
-        if (!value_bytes) {
+        if (value_bytes.is_null()) {
            throw exceptions::invalid_request_exception("null is not supported inside collections");\
        }
+        if (value_bytes.is_unset_value()) {
+            return constants::UNSET_VALUE;
+        }
        if (value_bytes->size() > std::numeric_limits<uint16_t>::max()) {
            throw exceptions::invalid_request_exception(sprint("Map value is too long. Map values are limited to %d bytes but %d bytes value provided",
                                                    std::numeric_limits<uint16_t>::max(),
@@ -252,17 +258,22 @@ maps::delayed_value::bind(const query_options& options) {
 ::shared_ptr<terminal>
 maps::marker::bind(const query_options& options) {
    auto val = options.get_value_at(_bind_index);
-    return val ?
-            ::make_shared<maps::value>(
-                    maps::value::from_serialized(*val,
-                            static_pointer_cast<const map_type_impl>(
-                                    _receiver->type),
-                            options.get_cql_serialization_format())) :
-            nullptr;
+    if (val.is_null()) {
+        return nullptr;
+    }
+    if (val.is_unset_value()) {
+        return constants::UNSET_VALUE;
+    }
+    return ::make_shared<maps::value>(maps::value::from_serialized(*val, static_pointer_cast<const map_type_impl>(_receiver->type),
+                                      options.get_cql_serialization_format()));
 }

 void
-maps::setter::execute(mutation& m, const exploded_clustering_prefix& row_key, const update_parameters& params) {
+maps::setter::execute(mutation& m, const clustering_key_prefix& row_key, const update_parameters& params) {
+    auto value = _t->bind(params._options);
+    if (value == constants::UNSET_VALUE) {
+        return;
+    }
    if (column.type->is_multi_cell()) {
        // delete + put
        collection_type_impl::mutation mut;
@@ -271,7 +282,7 @@ maps::setter::execute(mutation& m, const exploded_clustering_prefix& row_key, co
        auto col_mut = ctype->serialize_mutation_form(std::move(mut));
        m.set_cell(row_key, column, std::move(col_mut));
    }
-    do_put(m, row_key, params, _t, column);
+    do_put(m, row_key, params, value, column);
 }

 void
@@ -281,7 +292,7 @@ maps::setter_by_key::collect_marker_specification(shared_ptr<variable_specificat
 }

 void
-maps::setter_by_key::execute(mutation& m, const exploded_clustering_prefix& prefix, const update_parameters& params) {
+maps::setter_by_key::execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) {
    using exceptions::invalid_request_exception;
    assert(column.type->is_multi_cell()); // "Attempted to set a value for a single key on a frozen map"m
    auto key = _k->bind_and_get(params._options);
@@ -304,15 +315,17 @@ maps::setter_by_key::execute(mutation& m, const exploded_clustering_prefix& pref
 }

 void
-maps::putter::execute(mutation& m, const exploded_clustering_prefix& prefix, const update_parameters& params) {
+maps::putter::execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) {
    assert(column.type->is_multi_cell()); // "Attempted to add items to a frozen map";
-    do_put(m, prefix, params, _t, column);
+    auto value = _t->bind(params._options);
+    if (value != constants::UNSET_VALUE) {
+        do_put(m, prefix, params, value, column);
+    }
 }

 void
-maps::do_put(mutation& m, const exploded_clustering_prefix& prefix, const update_parameters& params,
-        shared_ptr<term> t, const column_definition& column) {
-    auto value = t->bind(params._options);
+maps::do_put(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params,
+        shared_ptr<term> value, const column_definition& column) {
    auto map_value = dynamic_pointer_cast<maps::value>(value);
    if (column.type->is_multi_cell()) {
        collection_type_impl::mutation mut;
@@ -340,12 +353,15 @@ maps::do_put(mutation& m, const exploded_clustering_prefix& prefix, const update
 }

 void
-maps::discarder_by_key::execute(mutation& m, const exploded_clustering_prefix& prefix, const update_parameters& params) {
+maps::discarder_by_key::execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) {
    assert(column.type->is_multi_cell()); // "Attempted to delete a single key in a frozen map";
    auto&& key = _t->bind(params._options);
    if (!key) {
        throw exceptions::invalid_request_exception("Invalid null map key");
    }
+    if (key == constants::UNSET_VALUE) {
+        throw exceptions::invalid_request_exception("Invalid unset map key");
+    }
    collection_type_impl::mutation mut;
    mut.cells.emplace_back(*key->get(params._options), params.make_dead_cell());
    auto mtype = static_cast<const map_type_impl*>(column.type.get());
--- a/cql3/maps.hh
+++ b/cql3/maps.hh
@@ -82,7 +82,7 @@ public:
            : map(std::move(map)) {
        }
        static value from_serialized(bytes_view value, map_type type, cql_serialization_format sf);
-        virtual bytes_opt get(const query_options& options) override;
+        virtual cql3::raw_value get(const query_options& options) override;
        virtual bytes get_with_protocol_version(cql_serialization_format sf);
        bool equals(map_type mt, const value& v);
        virtual sstring to_string() const;
@@ -116,7 +116,7 @@ public:
                : operation(column, std::move(t)) {
        }

-        virtual void execute(mutation& m, const exploded_clustering_prefix& row_key, const update_parameters& params) override;
+        virtual void execute(mutation& m, const clustering_key_prefix& row_key, const update_parameters& params) override;
    };

    class setter_by_key : public operation {
@@ -126,7 +126,7 @@ public:
            : operation(column, std::move(t)), _k(std::move(k)) {
        }
        virtual void collect_marker_specification(shared_ptr<variable_specifications> bound_names) override;
-        virtual void execute(mutation& m, const exploded_clustering_prefix& prefix, const update_parameters& params) override;
+        virtual void execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) override;
    };

    class putter : public operation {
@@ -134,18 +134,18 @@ public:
        putter(const column_definition& column, shared_ptr<term> t)
            : operation(column, std::move(t)) {
        }
-        virtual void execute(mutation& m, const exploded_clustering_prefix& prefix, const update_parameters& params) override;
+        virtual void execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) override;
    };

-    static void do_put(mutation& m, const exploded_clustering_prefix& prefix, const update_parameters& params,
-            shared_ptr<term> t, const column_definition& column);
+    static void do_put(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params,
+            shared_ptr<term> value, const column_definition& column);

    class discarder_by_key : public operation {
    public:
        discarder_by_key(const column_definition& column, shared_ptr<term> k)
                : operation(column, std::move(k)) {
        }
-        virtual void execute(mutation& m, const exploded_clustering_prefix& prefix, const update_parameters& params) override;
+        virtual void execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) override;
    };
 };

--- a/cql3/multi_column_relation.hh
+++ b/cql3/multi_column_relation.hh
@@ -184,6 +184,13 @@ protected:
        throw exceptions::invalid_request_exception(sprint("%s cannot be used for Multi-column relations", get_operator()));
    }

+    virtual ::shared_ptr<relation> maybe_rename_identifier(const column_identifier::raw& from, column_identifier::raw to) override {
+        auto new_entities = boost::copy_range<decltype(_entities)>(_entities | boost::adaptors::transformed([&] (auto&& entity) {
+            return *entity == from ? ::make_shared<column_identifier::raw>(to) : entity;
+        }));
+        return ::make_shared(multi_column_relation(std::move(new_entities), _relation_type, _values_or_marker, _in_values, _in_marker));
+    }
+
    virtual shared_ptr<term> to_term(const std::vector<shared_ptr<column_specification>>& receivers,
                                     ::shared_ptr<term::raw> raw, database& db, const sstring& keyspace,
                                     ::shared_ptr<variable_specifications> bound_names) override {
--- a/cql3/operation.cc
+++ b/cql3/operation.cc
@@ -36,6 +36,7 @@
 * You should have received a copy of the GNU General Public License
 * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
 */
+#include <utility>

 #include "operation.hh"
 #include "operation_impl.hh"
@@ -88,13 +89,10 @@ operation::addition::prepare(database& db, const sstring& keyspace, const column

    auto ctype = dynamic_pointer_cast<const collection_type_impl>(receiver.type);
    if (!ctype) {
-        fail(unimplemented::cause::COUNTERS);
-        // FIXME: implelement
-#if 0
-        if (!(receiver.type instanceof CounterColumnType))
-            throw new InvalidRequestException(String.format("Invalid operation (%s) for non counter column %s", toString(receiver), receiver.name));
-        return new Constants.Adder(receiver, v);
-#endif
+        if (!receiver.is_counter()) {
+            throw exceptions::invalid_request_exception(sprint("Invalid operation (%s) for non counter column %s", receiver, receiver.name()));
+        }
+        return make_shared<constants::adder>(receiver, v);
    } else if (!ctype->is_multi_cell()) {
        throw exceptions::invalid_request_exception(sprint("Invalid operation (%s) for frozen collection column %s", receiver, receiver.name()));
    }
@@ -119,12 +117,11 @@ shared_ptr<operation>
 operation::subtraction::prepare(database& db, const sstring& keyspace, const column_definition& receiver) {
    auto ctype = dynamic_pointer_cast<const collection_type_impl>(receiver.type);
    if (!ctype) {
-        fail(unimplemented::cause::COUNTERS);
-#if 0
-        if (!(receiver.type instanceof CounterColumnType))
-            throw new InvalidRequestException(String.format("Invalid operation (%s) for non counter column %s", toString(receiver), receiver.name));
-        return new Constants.Substracter(receiver, value.prepare(keyspace, receiver));
-#endif
+        if (!receiver.is_counter()) {
+            throw exceptions::invalid_request_exception(sprint("Invalid operation (%s) for non counter column %s", receiver, receiver.name()));
+        }
+        auto v = _value->prepare(db, keyspace, receiver.column_specification);
+        return make_shared<constants::subtracter>(receiver, v);
    }
    if (!ctype->is_multi_cell()) {
        throw exceptions::invalid_request_exception(
@@ -196,6 +193,78 @@ operation::set_value::prepare(database& db, const sstring& keyspace, const colum
    }
 }

+::shared_ptr <operation>
+operation::set_counter_value_from_tuple_list::prepare(database& db, const sstring& keyspace, const column_definition& receiver) {
+    static thread_local const data_type counter_tuple_type = tuple_type_impl::get_instance({int32_type, uuid_type, long_type, long_type});
+    static thread_local const data_type counter_tuple_list_type = list_type_impl::get_instance(counter_tuple_type, true);
+
+    if (!receiver.type->is_counter()) {
+        throw exceptions::invalid_request_exception(sprint("Column %s is not a counter", receiver.name_as_text()));
+    }
+
+    // We need to fake a column of list<tuple<...>> to prepare the value term
+    auto & os = receiver.column_specification;
+    auto spec = make_shared<cql3::column_specification>(os->ks_name, os->cf_name, os->name, counter_tuple_list_type);
+    auto v = _value->prepare(db, keyspace, spec);
+
+    // Will not be used elsewhere, so make it local.
+    class counter_setter : public operation {
+    public:
+        using operation::operation;
+
+        bool is_raw_counter_shard_write() const override {
+            return true;
+        }
+        void execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) override {
+            const auto& value = _t->bind(params._options);
+            auto&& list_value = dynamic_pointer_cast<lists::value>(value);
+
+            if (!list_value) {
+                throw std::invalid_argument("Invalid input data to counter set");
+            }
+
+            counter_id last(utils::UUID(0, 0));
+            counter_cell_builder ccb(list_value->_elements.size());
+            for (auto& bo : list_value->_elements) {
+                // lexical etc cast fails should be enough type checking here.
+                auto tuple = value_cast<tuple_type_impl::native_type>(counter_tuple_type->deserialize(*bo));
+                auto shard = value_cast<int>(tuple[0]);
+                auto id = counter_id(value_cast<utils::UUID>(tuple[1]));
+                auto clock = value_cast<int64_t>(tuple[2]);
+                auto value = value_cast<int64_t>(tuple[3]);
+
+                using namespace std::rel_ops;
+
+                if (id <= last) {
+                    throw marshal_exception(
+                                    sprint("invalid counter id order, %s <= %s",
+                                                    id.to_uuid().to_sstring(),
+                                                    last.to_uuid().to_sstring()));
+                }
+                last = id;
+                // TODO: maybe allow more than global values to propagate,
+                // though we don't (yet at least) in sstable::partition so...
+                switch (shard) {
+                case 'g':
+                    ccb.add_shard(counter_shard(id, value, clock));
+                    break;
+                case 'l':
+                    throw marshal_exception("encountered a local shard in a counter cell");
+                case 'r':
+                    throw marshal_exception("encountered remote shards in a counter cell");
+                default:
+                    throw marshal_exception(sprint("encountered unknown shard %d in a counter cell", shard));
+                }
+            }
+            // Note. this is a counter value cell, not an update.
+            // see counters.cc, we need to detect this.
+            m.set_cell(prefix, column, ccb.build(params.timestamp()));
+        }
+    };
+
+    return make_shared<counter_setter>(receiver, v);
+};
+
 bool
 operation::set_value::is_compatible_with(::shared_ptr <raw_update> other) {
    // We don't allow setting multiple time the same column, because 1)
--- a/cql3/operation.hh
+++ b/cql3/operation.hh
@@ -95,10 +95,18 @@ public:
        return params.make_cell(value);
    }

+    atomic_cell make_counter_update_cell(int64_t delta, const update_parameters& params) const {
+        return params.make_counter_update_cell(delta);
+    }
+
    virtual bool uses_function(const sstring& ks_name, const sstring& function_name) const {
        return _t && _t->uses_function(ks_name, function_name);
    }

+    virtual bool is_raw_counter_shard_write() const {
+        return false;
+    }
+
    /**
    * @return whether the operation requires a read of the previous value to be executed
    * (only lists setterByIdx, discard and discardByIdx requires that).
@@ -122,7 +130,7 @@ public:
    /**
     * Execute the operation.
     */
-    virtual void execute(mutation& m, const exploded_clustering_prefix& prefix, const update_parameters& params) = 0;
+    virtual void execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) = 0;

    /**
     * A parsed raw UPDATE operation.
@@ -189,6 +197,7 @@ public:
    };

    class set_value;
+    class set_counter_value_from_tuple_list;

    class set_element : public raw_update {
        const shared_ptr<term::raw> _selector;
--- a/cql3/operation_impl.hh
+++ b/cql3/operation_impl.hh
@@ -50,7 +50,7 @@
 namespace cql3 {

 class operation::set_value : public raw_update {
-private:
+protected:
    ::shared_ptr<term::raw> _value;
 public:
    set_value(::shared_ptr<term::raw> value) : _value(std::move(value)) {}
@@ -67,6 +67,12 @@ public:
    virtual bool is_compatible_with(::shared_ptr <raw_update> other) override;
 };

+class operation::set_counter_value_from_tuple_list : public set_value {
+public:
+    using set_value::set_value;
+    ::shared_ptr <operation> prepare(database& db, const sstring& keyspace, const column_definition& receiver) override;
+};
+
 class operation::column_deletion : public raw_deletion {
 private:
    ::shared_ptr<column_identifier::raw> _id;
--- a/cql3/operator.cc
+++ b/cql3/operator.cc
@@ -52,5 +52,6 @@ const operator_type operator_type::IN(7, operator_type::IN, "IN");
 const operator_type operator_type::CONTAINS(5, operator_type::CONTAINS, "CONTAINS");
 const operator_type operator_type::CONTAINS_KEY(6, operator_type::CONTAINS_KEY, "CONTAINS_KEY");
 const operator_type operator_type::NEQ(8, operator_type::NEQ, "!=");
+const operator_type operator_type::IS_NOT(9, operator_type::IS_NOT, "IS NOT");

 }
--- a/cql3/operator.hh
+++ b/cql3/operator.hh
@@ -42,8 +42,9 @@
 #pragma once

 #include <cstddef>
-#include <iostream>
+#include <iosfwd>
 #include "core/sstring.hh"
+#include "seastarx.hh"

 namespace cql3 {

@@ -58,6 +59,7 @@ public:
    static const operator_type CONTAINS;
    static const operator_type CONTAINS_KEY;
    static const operator_type NEQ;
+    static const operator_type IS_NOT;
 private:
    int32_t _b;
    const operator_type& _reverse;
--- a/cql3/prepared_statements_cache.hh
+++ b/cql3/prepared_statements_cache.hh
@@ -0,0 +1,171 @@
+/*
+ * Copyright (C) 2017 ScyllaDB
+ *
+ * Modified by ScyllaDB
+ */
+
+/*
+ * This file is part of Scylla.
+ *
+ * Scylla is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Scylla is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include "utils/loading_cache.hh"
+#include "cql3/statements/prepared_statement.hh"
+
+namespace cql3 {
+
+using prepared_cache_entry = std::unique_ptr<statements::prepared_statement>;
+
+struct prepared_cache_entry_size {
+    size_t operator()(const prepared_cache_entry& val) {
+        // TODO: improve the size approximation
+        return 10000;
+    }
+};
+
+typedef bytes cql_prepared_id_type;
+typedef int32_t thrift_prepared_id_type;
+
+/// \brief The key of the prepared statements cache
+///
+/// We are going to store the CQL and Thrift prepared statements in the same cache therefore we need generate the key
+/// that is going to be unique in both cases. Thrift use int32_t as a prepared statement ID, CQL - MD5 digest.
+///
+/// We are going to use an std::pair<CQL_PREP_ID_TYPE, int64_t> as a key. For CQL statements we will use {CQL_PREP_ID, std::numeric_limits<int64_t>::max()} as a key
+/// and for Thrift - {CQL_PREP_ID_TYPE(0), THRIFT_PREP_ID}. This way CQL and Thrift keys' values will never collide.
+class prepared_cache_key_type {
+public:
+    using cache_key_type = std::pair<cql_prepared_id_type, int64_t>;
+
+private:
+    cache_key_type _key;
+
+public:
+    prepared_cache_key_type() = default;
+    explicit prepared_cache_key_type(cql_prepared_id_type cql_id) : _key(std::move(cql_id), std::numeric_limits<int64_t>::max()) {}
+    explicit prepared_cache_key_type(thrift_prepared_id_type thrift_id) : _key(cql_prepared_id_type(), thrift_id) {}
+
+    cache_key_type& key() { return _key; }
+    const cache_key_type& key() const { return _key; }
+
+    static const cql_prepared_id_type& cql_id(const prepared_cache_key_type& key) {
+        return key.key().first;
+    }
+    static thrift_prepared_id_type thrift_id(const prepared_cache_key_type& key) {
+        return key.key().second;
+    }
+};
+
+class prepared_statements_cache {
+public:
+    struct stats {
+        uint64_t prepared_cache_evictions = 0;
+    };
+
+    static stats& shard_stats() {
+        static thread_local stats _stats;
+        return _stats;
+    }
+
+    struct prepared_cache_stats_updater {
+        static void inc_hits() noexcept {}
+        static void inc_misses() noexcept {}
+        static void inc_blocks() noexcept {}
+        static void inc_evictions() noexcept {
+            ++shard_stats().prepared_cache_evictions;
+        }
+    };
+
+private:
+    using cache_key_type = typename prepared_cache_key_type::cache_key_type;
+    using cache_type = utils::loading_cache<cache_key_type, prepared_cache_entry, utils::loading_cache_reload_enabled::no, prepared_cache_entry_size, utils::tuple_hash, std::equal_to<cache_key_type>, prepared_cache_stats_updater>;
+    using cache_value_ptr = typename cache_type::value_ptr;
+    using cache_iterator = typename cache_type::iterator;
+    using checked_weak_ptr = typename statements::prepared_statement::checked_weak_ptr;
+    struct value_extractor_fn {
+        checked_weak_ptr operator()(prepared_cache_entry& e) const {
+            return e->checked_weak_from_this();
+        }
+    };
+
+    static const std::chrono::minutes entry_expiry;
+
+public:
+    using key_type = prepared_cache_key_type;
+    using value_type = checked_weak_ptr;
+    using statement_is_too_big = typename cache_type::entry_is_too_big;
+    /// \note both iterator::reference and iterator::value_type are checked_weak_ptr
+    using iterator = boost::transform_iterator<value_extractor_fn, cache_iterator>;
+
+private:
+    cache_type _cache;
+    value_extractor_fn _value_extractor_fn;
+
+public:
+    prepared_statements_cache(logging::logger& logger)
+        : _cache(memory::stats().total_memory() / 256, entry_expiry, logger)
+    {}
+
+    template <typename LoadFunc>
+    future<value_type> get(const key_type& key, LoadFunc&& load) {
+        return _cache.get_ptr(key.key(), [load = std::forward<LoadFunc>(load)] (const cache_key_type&) { return load(); }).then([] (cache_value_ptr v_ptr) {
+            return make_ready_future<value_type>((*v_ptr)->checked_weak_from_this());
+        });
+    }
+
+    iterator find(const key_type& key) {
+        return boost::make_transform_iterator(_cache.find(key.key()), _value_extractor_fn);
+    }
+
+    iterator end() {
+        return boost::make_transform_iterator(_cache.end(), _value_extractor_fn);
+    }
+
+    iterator begin() {
+        return boost::make_transform_iterator(_cache.begin(), _value_extractor_fn);
+    }
+
+    template <typename Pred>
+    void remove_if(Pred&& pred) {
+        static_assert(std::is_same<bool, std::result_of_t<Pred(::shared_ptr<cql_statement>)>>::value, "Bad Pred signature");
+
+        _cache.remove_if([&pred] (const prepared_cache_entry& e) {
+            return pred(e->statement);
+        });
+    }
+
+    size_t size() const {
+        return _cache.size();
+    }
+
+    size_t memory_footprint() const {
+        return _cache.memory_footprint();
+    }
+};
+}
+
+namespace std { // for prepared_statements_cache log printouts
+inline std::ostream& operator<<(std::ostream& os, const typename cql3::prepared_cache_key_type::cache_key_type& p) {
+    os << "{cql_id: " << p.first << ", thrift_id: " << p.second << "}";
+    return os;
+}
+
+inline std::ostream& operator<<(std::ostream& os, const cql3::prepared_cache_key_type& p) {
+    os << p.key();
+    return os;
+}
+}
--- a/cql3/query_options.cc
+++ b/cql3/query_options.cc
@@ -47,11 +47,11 @@ namespace cql3 {
 thread_local const query_options::specific_options query_options::specific_options::DEFAULT{-1, {}, {}, api::missing_timestamp};

 thread_local query_options query_options::DEFAULT{db::consistency_level::ONE, std::experimental::nullopt,
-    std::vector<bytes_view_opt>(), false, query_options::specific_options::DEFAULT, cql_serialization_format::latest()};
+    std::vector<cql3::raw_value_view>(), false, query_options::specific_options::DEFAULT, cql_serialization_format::latest()};

 query_options::query_options(db::consistency_level consistency,
                             std::experimental::optional<std::vector<sstring_view>> names,
-                             std::vector<bytes_opt> values,
+                             std::vector<cql3::raw_value> values,
                             bool skip_metadata,
                             specific_options options,
                             cql_serialization_format sf)
@@ -68,7 +68,7 @@ query_options::query_options(db::consistency_level consistency,

 query_options::query_options(db::consistency_level consistency,
                             std::experimental::optional<std::vector<sstring_view>> names,
-                             std::vector<bytes_view_opt> value_views,
+                             std::vector<cql3::raw_value_view> value_views,
                             bool skip_metadata,
                             specific_options options,
                             cql_serialization_format sf)
@@ -82,18 +82,7 @@ query_options::query_options(db::consistency_level consistency,
 {
 }

-query_options::query_options(query_options&& o, std::vector<std::vector<bytes_view_opt>> value_views)
-    : query_options(std::move(o))
-{
-    std::vector<query_options> tmp;
-    tmp.reserve(value_views.size());
-    std::transform(value_views.begin(), value_views.end(), std::back_inserter(tmp), [this](auto& vals) {
-        return query_options(_consistency, {}, vals, _skip_metadata, _options, _cql_serialization_format);
-    });
-    _batch_options = std::move(tmp);
-}
-
-query_options::query_options(db::consistency_level cl, std::vector<bytes_opt> values)
+query_options::query_options(db::consistency_level cl, std::vector<cql3::raw_value> values)
    : query_options(
          cl,
          {},
@@ -105,7 +94,7 @@ query_options::query_options(db::consistency_level cl, std::vector<bytes_opt> va
 {
 }

-query_options::query_options(std::vector<bytes_opt> values)
+query_options::query_options(std::vector<cql3::raw_value> values)
    : query_options(
          db::consistency_level::ONE, std::move(values))
 {}
@@ -115,7 +104,7 @@ db::consistency_level query_options::get_consistency() const
    return _consistency;
 }

-bytes_view_opt query_options::get_value_at(size_t idx) const
+cql3::raw_value_view query_options::get_value_at(size_t idx) const
 {
    return _value_views.at(idx);
 }
@@ -125,14 +114,14 @@ size_t query_options::get_values_count() const
    return _value_views.size();
 }

-bytes_view_opt query_options::make_temporary(bytes_opt value) const
+cql3::raw_value_view query_options::make_temporary(cql3::raw_value value) const
 {
    if (value) {
        _temporaries.emplace_back(value->begin(), value->end());
        auto& temporary = _temporaries.back();
-        return bytes_view{temporary.data(), temporary.size()};
+        return cql3::raw_value_view::make_value(bytes_view{temporary.data(), temporary.size()});
    }
-    return std::experimental::nullopt;
+    return cql3::raw_value_view::make_null();
 }

 bool query_options::skip_metadata() const
@@ -192,7 +181,7 @@ void query_options::prepare(const std::vector<::shared_ptr<column_specification>
    }

    auto& names = *_names;
-    std::vector<bytes_opt> ordered_values;
+    std::vector<cql3::raw_value> ordered_values;
    ordered_values.reserve(specs.size());
    for (auto&& spec : specs) {
        auto& spec_name = spec->name->text();
@@ -211,9 +200,9 @@ void query_options::fill_value_views()
 {
    for (auto&& value : _values) {
        if (value) {
-            _value_views.emplace_back(bytes_view{*value});
+            _value_views.emplace_back(cql3::raw_value_view::make_value(bytes_view{*value}));
        } else {
-            _value_views.emplace_back(std::experimental::nullopt);
+            _value_views.emplace_back(cql3::raw_value_view::make_null());
        }
    }
 }
--- a/cql3/query_options.hh
+++ b/cql3/query_options.hh
@@ -41,6 +41,7 @@

 #pragma once

+#include <seastar/util/gcc6-concepts.hh>
 #include "timestamp.hh"
 #include "bytes.hh"
 #include "db/consistency_level.hh"
@@ -48,6 +49,7 @@
 #include "service/pager/paging_state.hh"
 #include "cql3/column_specification.hh"
 #include "cql3/column_identifier.hh"
+#include "cql3/values.hh"
 #include "cql_serialization_format.hh"

 namespace cql3 {
@@ -69,43 +71,80 @@ public:
 private:
    const db::consistency_level _consistency;
    const std::experimental::optional<std::vector<sstring_view>> _names;
-    std::vector<bytes_opt> _values;
-    std::vector<bytes_view_opt> _value_views;
+    std::vector<cql3::raw_value> _values;
+    std::vector<cql3::raw_value_view> _value_views;
    mutable std::vector<std::vector<int8_t>> _temporaries;
    const bool _skip_metadata;
    const specific_options _options;
    cql_serialization_format _cql_serialization_format;
    std::experimental::optional<std::vector<query_options>> _batch_options;
+
+private:
+    /**
+     * @brief Batch query_options constructor.
+     *
+     * Requirements:
+     *   - @tparam OneMutationDataRange has a begin() and end() iterators.
+     *   - The values of @tparam OneMutationDataRange are of either raw_value_view or raw_value types.
+     *
+     * @param o Base query_options object. query_options objects for each statement in the batch will derive the values from it.
+     * @param values_ranges a vector of values ranges for each statement in the batch.
+     */
+    template<typename OneMutationDataRange>
+    GCC6_CONCEPT( requires requires (OneMutationDataRange range) {
+         std::begin(range);
+         std::end(range);
+    } && ( requires (OneMutationDataRange range) { { *range.begin() } -> raw_value_view; } ||
+           requires (OneMutationDataRange range) { { *range.begin() } -> raw_value; } ) )
+    explicit query_options(query_options&& o, std::vector<OneMutationDataRange> values_ranges);
+
 public:
    query_options(query_options&&) = default;
    query_options(const query_options&) = delete;

    explicit query_options(db::consistency_level consistency,
                           std::experimental::optional<std::vector<sstring_view>> names,
-                           std::vector<bytes_opt> values,
+                           std::vector<cql3::raw_value> values,
                           bool skip_metadata,
                           specific_options options,
                           cql_serialization_format sf);
    explicit query_options(db::consistency_level consistency,
                           std::experimental::optional<std::vector<sstring_view>> names,
-                           std::vector<bytes_view_opt> value_views,
+                           std::vector<cql3::raw_value_view> value_views,
                           bool skip_metadata,
                           specific_options options,
                           cql_serialization_format sf);

-    // Batch query_options constructor
-    explicit query_options(query_options&&, std::vector<std::vector<bytes_view_opt>> value_views);
+    /**
+     * @brief Batch query_options factory.
+     *
+     * Requirements:
+     *   - @tparam OneMutationDataRange has a begin() and end() iterators.
+     *   - The values of @tparam OneMutationDataRange are of either raw_value_view or raw_value types.
+     *
+     * @param o Base query_options object. query_options objects for each statement in the batch will derive the values from it.
+     * @param values_ranges a vector of values ranges for each statement in the batch.
+     */
+    template<typename OneMutationDataRange>
+    GCC6_CONCEPT( requires requires (OneMutationDataRange range) {
+         std::begin(range);
+         std::end(range);
+    } && ( requires (OneMutationDataRange range) { { *range.begin() } -> raw_value_view; } ||
+           requires (OneMutationDataRange range) { { *range.begin() } -> raw_value; } ) )
+    static query_options make_batch_options(query_options&& o, std::vector<OneMutationDataRange> values_ranges) {
+        return query_options(std::move(o), std::move(values_ranges));
+    }

    // It can't be const because of prepare()
    static thread_local query_options DEFAULT;

    // forInternalUse
-    explicit query_options(std::vector<bytes_opt> values);
-    explicit query_options(db::consistency_level, std::vector<bytes_opt> values);
+    explicit query_options(std::vector<cql3::raw_value> values);
+    explicit query_options(db::consistency_level, std::vector<cql3::raw_value> values);

    db::consistency_level get_consistency() const;
-    bytes_view_opt get_value_at(size_t idx) const;
-    bytes_view_opt make_temporary(bytes_opt value) const;
+    cql3::raw_value_view get_value_at(size_t idx) const;
+    cql3::raw_value_view make_temporary(cql3::raw_value value) const;
    size_t get_values_count() const;
    bool skip_metadata() const;
    /**  The pageSize for this query. Will be <= 0 if not relevant for the query.  */
@@ -129,4 +168,21 @@ private:
    void fill_value_views();
 };

+template<typename OneMutationDataRange>
+GCC6_CONCEPT( requires requires (OneMutationDataRange range) {
+     std::begin(range);
+     std::end(range);
+} && ( requires (OneMutationDataRange range) { { *range.begin() } -> raw_value_view; } ||
+       requires (OneMutationDataRange range) { { *range.begin() } -> raw_value; } ) )
+query_options::query_options(query_options&& o, std::vector<OneMutationDataRange> values_ranges)
+    : query_options(std::move(o))
+{
+    std::vector<query_options> tmp;
+    tmp.reserve(values_ranges.size());
+    std::transform(values_ranges.begin(), values_ranges.end(), std::back_inserter(tmp), [this](auto& values_range) {
+        return query_options(_consistency, {}, std::move(values_range), _skip_metadata, _options, _cql_serialization_format);
+    });
+    _batch_options = std::move(tmp);
+}
+
 }
--- a/cql3/query_processor.cc
+++ b/cql3/query_processor.cc
@@ -38,11 +38,13 @@
 * You should have received a copy of the GNU General Public License
 * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
 */
+#include <seastar/core/metrics.hh>

 #include "cql3/query_processor.hh"
 #include "cql3/CqlParser.hpp"
 #include "cql3/error_collector.hh"
 #include "cql3/statements/batch_statement.hh"
+#include "cql3/util.hh"

 #include "transport/messages/result_message.hh"

@@ -52,13 +54,16 @@
 namespace cql3 {

 using namespace statements;
-using namespace transport::messages;
+using namespace cql_transport::messages;

 logging::logger log("query_processor");
+logging::logger prep_cache_log("prepared_statements_cache");

 distributed<query_processor> _the_query_processor;

-const sstring query_processor::CQL_VERSION = "3.2.1";
+const sstring query_processor::CQL_VERSION = "3.3.1";
+
+const std::chrono::minutes prepared_statements_cache::entry_expiry = std::chrono::minutes(60);

 class query_processor::internal_state {
    service::query_state _qs;
@@ -93,12 +98,53 @@ query_processor::query_processor(distributed<service::storage_proxy>& proxy,
    , _proxy(proxy)
    , _db(db)
    , _internal_state(new internal_state())
+    , _prepared_cache(prep_cache_log)
 {
-    _collectd_regs.push_back(
-        scollectd::add_polled_metric(scollectd::type_instance_id("query_processor"
-                , scollectd::per_cpu_plugin_instance
-                , "total_operations", "statements_prepared")
-                , scollectd::make_typed(scollectd::data_type::DERIVE, _stats.prepare_invocations)));
+    namespace sm = seastar::metrics;
+
+    _metrics.add_group("query_processor", {
+        sm::make_derive("statements_prepared", _stats.prepare_invocations,
+                        sm::description("Counts a total number of parsed CQL requests.")),
+    });
+
+    _metrics.add_group("cql", {
+        sm::make_derive("reads", _cql_stats.reads,
+                        sm::description("Counts a total number of CQL read requests.")),
+
+        sm::make_derive("inserts", _cql_stats.inserts,
+                        sm::description("Counts a total number of CQL INSERT requests.")),
+
+        sm::make_derive("updates", _cql_stats.updates,
+                        sm::description("Counts a total number of CQL UPDATE requests.")),
+
+        sm::make_derive("deletes", _cql_stats.deletes,
+                        sm::description("Counts a total number of CQL DELETE requests.")),
+
+        sm::make_derive("batches", _cql_stats.batches,
+                        sm::description("Counts a total number of CQL BATCH requests.")),
+
+        sm::make_derive("statements_in_batches", _cql_stats.statements_in_batches,
+                        sm::description("Counts a total number of sub-statements in CQL BATCH requests.")),
+
+        sm::make_derive("batches_pure_logged", _cql_stats.batches_pure_logged,
+                        sm::description("Counts a total number of LOGGED batches that were executed as LOGGED batches.")),
+
+        sm::make_derive("batches_pure_unlogged", _cql_stats.batches_pure_unlogged,
+                        sm::description("Counts a total number of UNLOGGED batches that were executed as UNLOGGED batches.")),
+
+        sm::make_derive("batches_unlogged_from_logged", _cql_stats.batches_unlogged_from_logged,
+                        sm::description("Counts a total number of LOGGED batches that were executed as UNLOGGED batches.")),
+
+        sm::make_derive("prepared_cache_evictions", [] { return prepared_statements_cache::shard_stats().prepared_cache_evictions; },
+                        sm::description("Counts a number of prepared statements cache entries evictions.")),
+
+        sm::make_gauge("prepared_cache_size", [this] { return _prepared_cache.size(); },
+                        sm::description("A number of entries in the prepared statements cache.")),
+
+        sm::make_gauge("prepared_cache_memory_footprint", [this] { return _prepared_cache.memory_footprint(); },
+                        sm::description("Size (in bytes) of the prepared statements cache.")),
+    });
+
    service::get_local_migration_manager().register_listener(_migration_subscriber.get());
 }

@@ -146,7 +192,7 @@ query_processor::process_statement(::shared_ptr<cql_statement> statement,

        statement->validate(_proxy, client_state);

-        auto fut = make_ready_future<::shared_ptr<transport::messages::result_message>>();
+        auto fut = make_ready_future<::shared_ptr<cql_transport::messages::result_message>>();
        if (client_state.is_internal()) {
            fut = statement->execute_internal(_proxy, query_state, options);
        } else  {
@@ -163,80 +209,34 @@ query_processor::process_statement(::shared_ptr<cql_statement> statement,
    });
 }

-future<::shared_ptr<transport::messages::result_message::prepared>>
-query_processor::prepare(const std::experimental::string_view& query_string, service::query_state& query_state)
+future<::shared_ptr<cql_transport::messages::result_message::prepared>>
+query_processor::prepare(sstring query_string, service::query_state& query_state)
 {
    auto& client_state = query_state.get_client_state();
-    return prepare(query_string, client_state, client_state.is_thrift());
+    return prepare(std::move(query_string), client_state, client_state.is_thrift());
 }

-future<::shared_ptr<transport::messages::result_message::prepared>>
-query_processor::prepare(const std::experimental::string_view& query_string,
-                         const service::client_state& client_state,
-                         bool for_thrift)
+future<::shared_ptr<cql_transport::messages::result_message::prepared>>
+query_processor::prepare(sstring query_string, const service::client_state& client_state, bool for_thrift)
 {
-    auto existing = get_stored_prepared_statement(query_string, client_state.get_raw_keyspace(), for_thrift);
-    if (existing) {
-        return make_ready_future<::shared_ptr<transport::messages::result_message::prepared>>(existing);
+    using namespace cql_transport::messages;
+    if (for_thrift) {
+        return prepare_one<result_message::prepared::thrift>(std::move(query_string), client_state, compute_thrift_id, prepared_cache_key_type::thrift_id);
+    } else {
+        return prepare_one<result_message::prepared::cql>(std::move(query_string), client_state, compute_id, prepared_cache_key_type::cql_id);
    }
-    auto prepared = get_statement(query_string, client_state);
-    auto bound_terms = prepared->statement->get_bound_terms();
-    if (bound_terms > std::numeric_limits<uint16_t>::max()) {
-        throw exceptions::invalid_request_exception(sprint("Too many markers(?). %d markers exceed the allowed maximum of %d", bound_terms, std::numeric_limits<uint16_t>::max()));
-    }
-    assert(bound_terms == prepared->bound_names.size());
-    return store_prepared_statement(query_string, client_state.get_raw_keyspace(), std::move(prepared), for_thrift);
 }

-::shared_ptr<transport::messages::result_message::prepared>
+::shared_ptr<cql_transport::messages::result_message::prepared>
 query_processor::get_stored_prepared_statement(const std::experimental::string_view& query_string,
                                               const sstring& keyspace,
                                               bool for_thrift)
 {
+    using namespace cql_transport::messages;
    if (for_thrift) {
-        auto statement_id = compute_thrift_id(query_string, keyspace);
-        auto it = _thrift_prepared_statements.find(statement_id);
-        if (it == _thrift_prepared_statements.end()) {
-            return ::shared_ptr<result_message::prepared>();
-        }
-        return ::make_shared<result_message::prepared::thrift>(statement_id, it->second);
+        return get_stored_prepared_statement_one<result_message::prepared::thrift>(query_string, keyspace, compute_thrift_id, prepared_cache_key_type::thrift_id);
    } else {
-        auto statement_id = compute_id(query_string, keyspace);
-        auto it = _prepared_statements.find(statement_id);
-        if (it == _prepared_statements.end()) {
-            return ::shared_ptr<result_message::prepared>();
-        }
-        return ::make_shared<result_message::prepared::cql>(statement_id, it->second);
-    }
-}
-
-future<::shared_ptr<transport::messages::result_message::prepared>>
-query_processor::store_prepared_statement(const std::experimental::string_view& query_string,
-                                          const sstring& keyspace,
-                                          ::shared_ptr<statements::prepared_statement> prepared,
-                                          bool for_thrift)
-{
-#if 0
-    // Concatenate the current keyspace so we don't mix prepared statements between keyspace (#5352).
-    // (if the keyspace is null, queryString has to have a fully-qualified keyspace so it's fine.
-    long statementSize = measure(prepared.statement);
-    // don't execute the statement if it's bigger than the allowed threshold
-    if (statementSize > MAX_CACHE_PREPARED_MEMORY)
-        throw new InvalidRequestException(String.format("Prepared statement of size %d bytes is larger than allowed maximum of %d bytes.",
-                                                        statementSize,
-                                                        MAX_CACHE_PREPARED_MEMORY));
-#endif
-    prepared->raw_cql_statement = query_string.data();
-    if (for_thrift) {
-        auto statement_id = compute_thrift_id(query_string, keyspace);
-        _thrift_prepared_statements.emplace(statement_id, prepared);
-        auto msg = ::make_shared<result_message::prepared::thrift>(statement_id, prepared);
-        return make_ready_future<::shared_ptr<result_message::prepared>>(std::move(msg));
-    } else {
-        auto statement_id = compute_id(query_string, keyspace);
-        _prepared_statements.emplace(statement_id, prepared);
-        auto msg = ::make_shared<result_message::prepared::cql>(statement_id, prepared);
-        return make_ready_future<::shared_ptr<result_message::prepared>>(std::move(msg));
+        return get_stored_prepared_statement_one<result_message::prepared::cql>(query_string, keyspace, compute_id, prepared_cache_key_type::cql_id);
    }
 }

@@ -253,22 +253,22 @@ static sstring hash_target(const std::experimental::string_view& query_string, c
    return keyspace + query_string.to_string();
 }

-bytes query_processor::compute_id(const std::experimental::string_view& query_string, const sstring& keyspace)
+prepared_cache_key_type query_processor::compute_id(const std::experimental::string_view& query_string, const sstring& keyspace)
 {
-    return md5_calculate(hash_target(query_string, keyspace));
+    return prepared_cache_key_type(md5_calculate(hash_target(query_string, keyspace)));
 }

-int32_t query_processor::compute_thrift_id(const std::experimental::string_view& query_string, const sstring& keyspace)
+prepared_cache_key_type query_processor::compute_thrift_id(const std::experimental::string_view& query_string, const sstring& keyspace)
 {
    auto target = hash_target(query_string, keyspace);
    uint32_t h = 0;
    for (auto&& c : hash_target(query_string, keyspace)) {
        h = 31*h + c;
    }
-    return static_cast<int32_t>(h);
+    return prepared_cache_key_type(static_cast<int32_t>(h));
 }

-::shared_ptr<prepared_statement>
+std::unique_ptr<prepared_statement>
 query_processor::get_statement(const sstring_view& query, const service::client_state& client_state)
 {
 #if 0
@@ -285,31 +285,18 @@ query_processor::get_statement(const sstring_view& query, const service::client_
        Tracing.trace("Preparing statement");
 #endif
    ++_stats.prepare_invocations;
-    return statement->prepare(_db.local());
+    return statement->prepare(_db.local(), _cql_stats);
 }

 ::shared_ptr<raw::parsed_statement>
 query_processor::parse_statement(const sstring_view& query)
 {
    try {
-        cql3_parser::CqlLexer::collector_type lexer_error_collector(query);
-        cql3_parser::CqlParser::collector_type parser_error_collector(query);
-        cql3_parser::CqlLexer::InputStreamType input{reinterpret_cast<const ANTLR_UINT8*>(query.begin()), ANTLR_ENC_UTF8, static_cast<ANTLR_UINT32>(query.size()), nullptr};
-        cql3_parser::CqlLexer lexer{&input};
-        lexer.set_error_listener(lexer_error_collector);
-        cql3_parser::CqlParser::TokenStreamType tstream(ANTLR_SIZE_HINT, lexer.get_tokSource());
-        cql3_parser::CqlParser parser{&tstream};
-        parser.set_error_listener(parser_error_collector);
-
-        auto statement = parser.query();
-
-        lexer_error_collector.throw_first_syntax_error();
-        parser_error_collector.throw_first_syntax_error();
-
+        auto statement = util::do_with_parser(query,  std::mem_fn(&cql3_parser::CqlParser::query));
        if (!statement) {
            throw exceptions::syntax_exception("Parsing failed");
        }
-        return std::move(statement);
+        return statement;
    } catch (const exceptions::recognition_exception& e) {
        throw exceptions::syntax_exception(sprint("Invalid or malformed CQL query string: %s", e.what()));
    } catch (const exceptions::cassandra_exception& e) {
@@ -320,7 +307,7 @@ query_processor::parse_statement(const sstring_view& query)
    }
 }

-query_options query_processor::make_internal_options(::shared_ptr<statements::prepared_statement> p,
+query_options query_processor::make_internal_options(const statements::prepared_statement::checked_weak_ptr& p,
                                                     const std::initializer_list<data_value>& values,
                                                     db::consistency_level cl)
 {
@@ -328,29 +315,29 @@ query_options query_processor::make_internal_options(::shared_ptr<statements::pr
        throw std::invalid_argument(sprint("Invalid number of values. Expecting %d but got %d", p->bound_names.size(), values.size()));
    }
    auto ni = p->bound_names.begin();
-    std::vector<bytes_opt> bound_values;
+    std::vector<cql3::raw_value> bound_values;
    for (auto& v : values) {
        auto& n = *ni++;
        if (v.type() == bytes_type) {
-            bound_values.push_back({value_cast<bytes>(v)});
+            bound_values.push_back(cql3::raw_value::make_value(value_cast<bytes>(v)));
        } else if (v.is_null()) {
-            bound_values.push_back({});
+            bound_values.push_back(cql3::raw_value::make_null());
        } else {
-            bound_values.push_back({n->type->decompose(v)});
+            bound_values.push_back(cql3::raw_value::make_value(n->type->decompose(v)));
        }
    }
    return query_options(cl, bound_values);
 }

-::shared_ptr<statements::prepared_statement> query_processor::prepare_internal(const sstring& query_string)
+statements::prepared_statement::checked_weak_ptr query_processor::prepare_internal(const sstring& query_string)
 {
    auto& p = _internal_statements[query_string];
    if (p == nullptr) {
-        auto np = parse_statement(query_string)->prepare(_db.local());
+        auto np = parse_statement(query_string)->prepare(_db.local(), _cql_stats);
        np->statement->validate(_proxy, *_internal_state);
        p = std::move(np); // inserts it into map
    }
-    return p;
+    return p->checked_weak_from_this();
 }

 future<::shared_ptr<untyped_result_set>>
@@ -360,17 +347,16 @@ query_processor::execute_internal(const sstring& query_string,
    if (log.is_enabled(logging::log_level::trace)) {
        log.trace("execute_internal: \"{}\" ({})", query_string, ::join(", ", values));
    }
-    auto p = prepare_internal(query_string);
-    return execute_internal(p, values);
+    return execute_internal(prepare_internal(query_string), values);
 }

 future<::shared_ptr<untyped_result_set>>
-query_processor::execute_internal(::shared_ptr<statements::prepared_statement> p,
+query_processor::execute_internal(statements::prepared_statement::checked_weak_ptr p,
                                  const std::initializer_list<data_value>& values)
 {
    auto opts = make_internal_options(p, values);
    return do_with(std::move(opts), [this, p = std::move(p)](auto& opts) {
-        return p->statement->execute_internal(_proxy, *_internal_state, opts).then([p](auto msg) {
+        return p->statement->execute_internal(_proxy, *_internal_state, opts).then([stmt = p->statement](auto msg) {
            return make_ready_future<::shared_ptr<untyped_result_set>>(::make_shared<untyped_result_set>(msg));
        });
    });
@@ -382,27 +368,30 @@ query_processor::process(const sstring& query_string,
                         const std::initializer_list<data_value>& values,
                         bool cache)
 {
-    auto p = cache ? prepare_internal(query_string) : parse_statement(query_string)->prepare(_db.local());
-    if (!cache) {
+    if (cache) {
+        return process(prepare_internal(query_string), cl, values);
+    } else {
+        auto p = parse_statement(query_string)->prepare(_db.local(), _cql_stats);
        p->statement->validate(_proxy, *_internal_state);
+        auto checked_weak_ptr = p->checked_weak_from_this();
+        return process(std::move(checked_weak_ptr), cl, values).finally([p = std::move(p)] {});
    }
-    return process(p, cl, values);
 }

 future<::shared_ptr<untyped_result_set>>
-query_processor::process(::shared_ptr<statements::prepared_statement> p,
+query_processor::process(statements::prepared_statement::checked_weak_ptr p,
                         db::consistency_level cl,
                         const std::initializer_list<data_value>& values)
 {
    auto opts = make_internal_options(p, values, cl);
    return do_with(std::move(opts), [this, p = std::move(p)](auto & opts) {
-        return p->statement->execute(_proxy, *_internal_state, opts).then([p](auto msg) {
+        return p->statement->execute(_proxy, *_internal_state, opts).then([](auto msg) {
            return make_ready_future<::shared_ptr<untyped_result_set>>(::make_shared<untyped_result_set>(msg));
        });
    });
 }

-future<::shared_ptr<transport::messages::result_message>>
+future<::shared_ptr<cql_transport::messages::result_message>>
 query_processor::process_batch(::shared_ptr<statements::batch_statement> batch,
                               service::query_state& query_state,
                               query_options& options)
@@ -441,6 +430,10 @@ void query_processor::migration_subscriber::on_create_aggregate(const sstring& k
    log.warn("{} event ignored", __func__);
 }

+void query_processor::migration_subscriber::on_create_view(const sstring& ks_name, const sstring& view_name)
+{
+}
+
 void query_processor::migration_subscriber::on_update_keyspace(const sstring& ks_name)
 {
 }
@@ -464,6 +457,10 @@ void query_processor::migration_subscriber::on_update_aggregate(const sstring& k
 {
 }

+void query_processor::migration_subscriber::on_update_view(const sstring& ks_name, const sstring& view_name, bool columns_changed)
+{
+}
+
 void query_processor::migration_subscriber::on_drop_keyspace(const sstring& ks_name)
 {
    remove_invalid_prepared_statements(ks_name, std::experimental::nullopt);
@@ -488,9 +485,13 @@ void query_processor::migration_subscriber::on_drop_aggregate(const sstring& ks_
    log.warn("{} event ignored", __func__);
 }

+void query_processor::migration_subscriber::on_drop_view(const sstring& ks_name, const sstring& view_name)
+{
+}
+
 void query_processor::migration_subscriber::remove_invalid_prepared_statements(sstring ks_name, std::experimental::optional<sstring> cf_name)
 {
-    _qp->invalidate_prepared_statements([&] (::shared_ptr<cql_statement> stmt) {
+    _qp->_prepared_cache.remove_if([&] (::shared_ptr<cql_statement> stmt) {
        return this->should_invalidate(ks_name, cf_name, stmt);
    });
 }
--- a/cql3/query_processor.hh
+++ b/cql3/query_processor.hh
@@ -43,6 +43,7 @@

 #include <experimental/string_view>
 #include <unordered_map>
+#include <seastar/core/metrics_registration.hh>

 #include "core/shared_ptr.hh"
 #include "exceptions/exceptions.hh"
@@ -56,6 +57,7 @@
 #include "statements/prepared_statement.hh"
 #include "transport/messages/result_message.hh"
 #include "untyped_result_set.hh"
+#include "prepared_statements_cache.hh"

 namespace cql3 {

@@ -63,9 +65,32 @@ namespace statements {
 class batch_statement;
 }

+class prepared_statement_is_too_big : public std::exception {
+public:
+    static constexpr int max_query_prefix = 100;
+
+private:
+    sstring _msg;
+
+public:
+    prepared_statement_is_too_big(const sstring& query_string)
+        : _msg(seastar::format("Prepared statement is too big: {}", query_string.substr(0, max_query_prefix)))
+    {
+        // mark that we clipped the query string
+        if (query_string.size() > max_query_prefix) {
+            _msg += "...";
+        }
+    }
+
+    virtual const char* what() const noexcept override {
+        return _msg.c_str();
+    }
+};
+
 class query_processor {
 public:
    class migration_subscriber;
+
 private:
    std::unique_ptr<migration_subscriber> _migration_subscriber;
    distributed<service::storage_proxy>& _proxy;
@@ -75,7 +100,9 @@ private:
        uint64_t prepare_invocations = 0;
    } _stats;

-    std::vector<scollectd::registration> _collectd_regs;
+    cql_stats _cql_stats;
+
+    seastar::metrics::metric_groups _metrics;

    class internal_state;
    std::unique_ptr<internal_state> _internal_state;
@@ -92,6 +119,11 @@ public:
    distributed<service::storage_proxy>& proxy() {
        return _proxy;
    }
+
+    cql_stats& get_cql_stats() {
+        return _cql_stats;
+    }
+
 #if 0
    public static final QueryProcessor instance = new QueryProcessor();
 #endif
@@ -119,10 +151,8 @@ private:
        }
    };
 #endif
-
-    std::unordered_map<bytes, ::shared_ptr<statements::prepared_statement>> _prepared_statements;
-    std::unordered_map<int32_t, ::shared_ptr<statements::prepared_statement>> _thrift_prepared_statements;
-    std::unordered_map<sstring, ::shared_ptr<statements::prepared_statement>> _internal_statements;
+    prepared_statements_cache _prepared_cache;
+    std::unordered_map<sstring, std::unique_ptr<statements::prepared_statement>> _internal_statements;
 #if 0

    // A map for prepared statements used internally (which we don't want to mix with user statement, in particular we don't
@@ -213,21 +243,14 @@ private:
    }
 #endif
 public:
-    ::shared_ptr<statements::prepared_statement> get_prepared(const bytes& id) {
-        auto it = _prepared_statements.find(id);
-        if (it == _prepared_statements.end()) {
-            return ::shared_ptr<statements::prepared_statement>{};
+    statements::prepared_statement::checked_weak_ptr get_prepared(const prepared_cache_key_type& key) {
+        auto it = _prepared_cache.find(key);
+        if (it == _prepared_cache.end()) {
+            return statements::prepared_statement::checked_weak_ptr();
        }
-        return it->second;
+        return *it;
    }

-    ::shared_ptr<statements::prepared_statement> get_prepared_for_thrift(int32_t id) {
-        auto it = _thrift_prepared_statements.find(id);
-        if (it == _thrift_prepared_statements.end()) {
-            return ::shared_ptr<statements::prepared_statement>{};
-        }
-        return it->second;
-    }
 #if 0
    public static void validateKey(ByteBuffer key) throws InvalidRequestException
    {
@@ -267,7 +290,7 @@ public:
    }
 #endif
 public:
-    future<::shared_ptr<transport::messages::result_message>> process_statement(::shared_ptr<cql_statement> statement,
+    future<::shared_ptr<cql_transport::messages::result_message>> process_statement(::shared_ptr<cql_statement> statement,
            service::query_state& query_state, const query_options& options);

 #if 0
@@ -278,7 +301,7 @@ public:
    }
 #endif

-    future<::shared_ptr<transport::messages::result_message>> process(const std::experimental::string_view& query_string,
+    future<::shared_ptr<cql_transport::messages::result_message>> process(const std::experimental::string_view& query_string,
            service::query_state& query_state, query_options& options);

 #if 0
@@ -332,23 +355,23 @@ public:
    }
 #endif
 private:
-    query_options make_internal_options(::shared_ptr<statements::prepared_statement>, const std::initializer_list<data_value>&, db::consistency_level = db::consistency_level::ONE);
+    query_options make_internal_options(const statements::prepared_statement::checked_weak_ptr& p, const std::initializer_list<data_value>&, db::consistency_level = db::consistency_level::ONE);
 public:
    future<::shared_ptr<untyped_result_set>> execute_internal(
            const sstring& query_string,
            const std::initializer_list<data_value>& = { });

-    ::shared_ptr<statements::prepared_statement> prepare_internal(const sstring& query);
+    statements::prepared_statement::checked_weak_ptr prepare_internal(const sstring& query);

    future<::shared_ptr<untyped_result_set>> execute_internal(
-            ::shared_ptr<statements::prepared_statement>,
+            statements::prepared_statement::checked_weak_ptr p,
            const std::initializer_list<data_value>& = { });

    future<::shared_ptr<untyped_result_set>> process(
                    const sstring& query_string,
                    db::consistency_level, const std::initializer_list<data_value>& = { }, bool cache = false);
    future<::shared_ptr<untyped_result_set>> process(
-                    ::shared_ptr<statements::prepared_statement>,
+                    statements::prepared_statement::checked_weak_ptr p,
                    db::consistency_level, const std::initializer_list<data_value>& = { });

    /*
@@ -426,43 +449,62 @@ public:
    }
 #endif

-    future<::shared_ptr<transport::messages::result_message::prepared>>
-    prepare(const std::experimental::string_view& query_string, service::query_state& query_state);
+    future<::shared_ptr<cql_transport::messages::result_message::prepared>>
+    prepare(sstring query_string, service::query_state& query_state);

-    future<::shared_ptr<transport::messages::result_message::prepared>>
-    prepare(const std::experimental::string_view& query_string, const service::client_state& client_state, bool for_thrift);
+    future<::shared_ptr<cql_transport::messages::result_message::prepared>>
+    prepare(sstring query_string, const service::client_state& client_state, bool for_thrift);

-    static bytes compute_id(const std::experimental::string_view& query_string, const sstring& keyspace);
-    static int32_t compute_thrift_id(const std::experimental::string_view& query_string, const sstring& keyspace);
+    static prepared_cache_key_type compute_id(const std::experimental::string_view& query_string, const sstring& keyspace);
+    static prepared_cache_key_type compute_thrift_id(const std::experimental::string_view& query_string, const sstring& keyspace);

 private:
-    ::shared_ptr<transport::messages::result_message::prepared>
-    get_stored_prepared_statement(const std::experimental::string_view& query_string, const sstring& keyspace, bool for_thrift);
+    ///
+    /// \tparam ResultMsgType type of the returned result message (CQL or Thrift)
+    /// \tparam PreparedKeyGenerator a function that generates the prepared statement cache key for given query and keyspace
+    /// \tparam IdGetter a function that returns the corresponding prepared statement ID (CQL or Thrift) for a given prepared statement cache key
+    /// \param query_string
+    /// \param client_state
+    /// \param id_gen prepared ID generator, called before the first deferring
+    /// \param id_getter prepared ID getter, passed to deferred context by reference. The caller must ensure its liveness.
+    /// \return
+    template <typename ResultMsgType, typename PreparedKeyGenerator, typename IdGetter>
+    future<::shared_ptr<cql_transport::messages::result_message::prepared>>
+    prepare_one(sstring query_string, const service::client_state& client_state, PreparedKeyGenerator&& id_gen, IdGetter&& id_getter) {
+        return do_with(id_gen(query_string, client_state.get_raw_keyspace()), std::move(query_string), [this, &client_state, &id_getter] (const prepared_cache_key_type& key, const sstring& query_string) {
+            return _prepared_cache.get(key, [this, &query_string, &client_state] {
+                auto prepared = get_statement(query_string, client_state);
+                auto bound_terms = prepared->statement->get_bound_terms();
+                if (bound_terms > std::numeric_limits<uint16_t>::max()) {
+                    throw exceptions::invalid_request_exception(sprint("Too many markers(?). %d markers exceed the allowed maximum of %d", bound_terms, std::numeric_limits<uint16_t>::max()));
+                }
+                assert(bound_terms == prepared->bound_names.size());
+                prepared->raw_cql_statement = query_string;
+                return make_ready_future<std::unique_ptr<statements::prepared_statement>>(std::move(prepared));
+            }).then([&key, &id_getter] (auto prep_ptr) {
+                return make_ready_future<::shared_ptr<cql_transport::messages::result_message::prepared>>(::make_shared<ResultMsgType>(id_getter(key), std::move(prep_ptr)));
+            }).handle_exception_type([&query_string] (typename prepared_statements_cache::statement_is_too_big&) {
+                return make_exception_future<::shared_ptr<cql_transport::messages::result_message::prepared>>(prepared_statement_is_too_big(query_string));
+            });
+        });
+    };

-    future<::shared_ptr<transport::messages::result_message::prepared>>
-    store_prepared_statement(const std::experimental::string_view& query_string, const sstring& keyspace, ::shared_ptr<statements::prepared_statement> prepared, bool for_thrift);
+    template <typename ResultMsgType, typename KeyGenerator, typename IdGetter>
+    ::shared_ptr<cql_transport::messages::result_message::prepared>
+    get_stored_prepared_statement_one(const std::experimental::string_view& query_string, const sstring& keyspace, KeyGenerator&& key_gen, IdGetter&& id_getter)
+    {
+        auto cache_key = key_gen(query_string, keyspace);
+        auto it = _prepared_cache.find(cache_key);
+        if (it == _prepared_cache.end()) {
+            return ::shared_ptr<cql_transport::messages::result_message::prepared>();
+        }

-    // Erases the statements for which filter returns true.
-    template <typename Pred>
-    void invalidate_prepared_statements(Pred filter) {
-        static_assert(std::is_same<bool, std::result_of_t<Pred(::shared_ptr<cql_statement>)>>::value,
-                      "bad Pred signature");
-        for (auto it = _prepared_statements.begin(); it != _prepared_statements.end(); ) {
-            if (filter(it->second->statement)) {
-                it = _prepared_statements.erase(it);
-            } else {
-                ++it;
-            }
-        }
-        for (auto it = _thrift_prepared_statements.begin(); it != _thrift_prepared_statements.end(); ) {
-            if (filter(it->second->statement)) {
-                it = _thrift_prepared_statements.erase(it);
-            } else {
-                ++it;
-            }
-        }
+        return ::make_shared<ResultMsgType>(id_getter(cache_key), *it);
    }

+    ::shared_ptr<cql_transport::messages::result_message::prepared>
+    get_stored_prepared_statement(const std::experimental::string_view& query_string, const sstring& keyspace, bool for_thrift);
+
 #if 0
    public ResultMessage processPrepared(CQLStatement statement, QueryState queryState, QueryOptions options)
    throws RequestExecutionException, RequestValidationException
@@ -489,10 +531,10 @@ private:
 #endif

 public:
-    future<::shared_ptr<transport::messages::result_message>> process_batch(::shared_ptr<statements::batch_statement>,
+    future<::shared_ptr<cql_transport::messages::result_message>> process_batch(::shared_ptr<statements::batch_statement>,
            service::query_state& query_state, query_options& options);

-    ::shared_ptr<statements::prepared_statement> get_statement(const std::experimental::string_view& query,
+    std::unique_ptr<statements::prepared_statement> get_statement(const std::experimental::string_view& query,
            const service::client_state& client_state);
    static ::shared_ptr<statements::raw::parsed_statement> parse_statement(const std::experimental::string_view& query);

@@ -518,18 +560,21 @@ public:
    virtual void on_create_user_type(const sstring& ks_name, const sstring& type_name) override;
    virtual void on_create_function(const sstring& ks_name, const sstring& function_name) override;
    virtual void on_create_aggregate(const sstring& ks_name, const sstring& aggregate_name) override;
+    virtual void on_create_view(const sstring& ks_name, const sstring& view_name) override;

    virtual void on_update_keyspace(const sstring& ks_name) override;
    virtual void on_update_column_family(const sstring& ks_name, const sstring& cf_name, bool columns_changed) override;
    virtual void on_update_user_type(const sstring& ks_name, const sstring& type_name) override;
    virtual void on_update_function(const sstring& ks_name, const sstring& function_name) override;
    virtual void on_update_aggregate(const sstring& ks_name, const sstring& aggregate_name) override;
+    virtual void on_update_view(const sstring& ks_name, const sstring& view_name, bool columns_changed) override;

    virtual void on_drop_keyspace(const sstring& ks_name) override;
    virtual void on_drop_column_family(const sstring& ks_name, const sstring& cf_name) override;
    virtual void on_drop_user_type(const sstring& ks_name, const sstring& type_name) override;
    virtual void on_drop_function(const sstring& ks_name, const sstring& function_name) override;
    virtual void on_drop_aggregate(const sstring& ks_name, const sstring& aggregate_name) override;
+    virtual void on_drop_view(const sstring& ks_name, const sstring& view_name) override;
 private:
    void remove_invalid_prepared_statements(sstring ks_name, std::experimental::optional<sstring> cf_name);
    bool should_invalidate(sstring ks_name, std::experimental::optional<sstring> cf_name, ::shared_ptr<cql_statement> statement);
--- a/cql3/relation.hh
+++ b/cql3/relation.hh
@@ -156,6 +156,10 @@ public:
            return new_contains_restriction(db, schema, bound_names, false);
        } else if (_relation_type == operator_type::CONTAINS_KEY) {
            return new_contains_restriction(db, schema, bound_names, true);
+        } else if (_relation_type == operator_type::IS_NOT) {
+            // This case is not supposed to happen: statement_restrictions
+            // constructor does not call this function for views' IS_NOT.
+            throw exceptions::invalid_request_exception(sprint("Unsupported \"IS NOT\" relation: %s", to_string()));
        } else {
            throw exceptions::invalid_request_exception(sprint("Unsupported \"!=\" relation: %s", to_string()));
        }
@@ -216,6 +220,15 @@ public:
    virtual ::shared_ptr<restrictions::restriction> new_contains_restriction(database& db, schema_ptr schema,
        ::shared_ptr<variable_specifications> bound_names, bool isKey) = 0;

+    /**
+     * Renames an identifier in this Relation, if applicable.
+     * @param from the old identifier
+     * @param to the new identifier
+     * @return a pointer object, if the old identifier is not in the set of entities that this relation covers;
+     *         otherwise a new Relation with "from" replaced by "to" is returned.
+     */
+    virtual ::shared_ptr<relation> maybe_rename_identifier(const column_identifier::raw& from, column_identifier::raw to) = 0;
+
 protected:

    /**
--- a/cql3/restrictions/abstract_restriction.hh
+++ b/cql3/restrictions/abstract_restriction.hh
@@ -94,6 +94,26 @@ public:
        return true;
    }

+    /**
+     * Whether the specified row satisfied this restriction.
+     * Assumes the row is live, but not all cells. If a cell
+     * isn't live and there's a restriction on its column,
+     * then the function returns false.
+     *
+     * @param schema the schema the row belongs to
+     * @param key the partition key
+     * @param ckey the clustering key
+     * @param cells the remaining row columns
+     * @return the restriction resulting of the merge
+     * @throws InvalidRequestException if the restrictions cannot be merged
+     */
+    virtual bool is_satisfied_by(const schema& schema,
+                                 const partition_key& key,
+                                 const clustering_key_prefix& ckey,
+                                 const row& cells,
+                                 const query_options& options,
+                                 gc_clock::time_point now) const = 0;
+
 protected:
 #if 0
    protected static ByteBuffer validateIndexedValue(ColumnSpecification columnSpec,
@@ -113,7 +133,7 @@ protected:
     * @param function_name the function name
     * @return <code>true</code> if the specified term is using the specified function, <code>false</code> otherwise.
     */
-    static bool uses_function(::shared_ptr<term> term, const sstring& ks_name, const sstring& function_name) {
+    static bool term_uses_function(::shared_ptr<term> term, const sstring& ks_name, const sstring& function_name) {
        return bool(term) && term->uses_function(ks_name, function_name);
    }

@@ -125,9 +145,9 @@ protected:
     * @param function_name the function name
     * @return <code>true</code> if one of the specified term is using the specified function, <code>false</code> otherwise.
     */
-    static bool uses_function(const std::vector<::shared_ptr<term>>& terms, const sstring& ks_name, const sstring& function_name) {
+    static bool term_uses_function(const std::vector<::shared_ptr<term>>& terms, const sstring& ks_name, const sstring& function_name) {
        for (auto&& value : terms) {
-            if (uses_function(value, ks_name, function_name)) {
+            if (term_uses_function(value, ks_name, function_name)) {
                return true;
            }
        }
--- a/cql3/restrictions/multi_column_restriction.hh
+++ b/cql3/restrictions/multi_column_restriction.hh
@@ -85,6 +85,20 @@ public:
        do_merge_with(as_pkr);
    }

+    bool is_satisfied_by(const schema& schema,
+                         const partition_key& key,
+                         const clustering_key_prefix& ckey,
+                         const row& cells,
+                         const query_options& options,
+                         gc_clock::time_point now) const override {
+        for (auto&& range : bounds_ranges(options)) {
+            if (!range.contains(ckey, clustering_key_prefix::prefix_equal_tri_compare(schema))) {
+                return false;
+            }
+        }
+        return true;
+    }
+
 protected:
    virtual void do_merge_with(::shared_ptr<primary_key_restrictions<clustering_key_prefix>> other) = 0;

@@ -155,7 +169,7 @@ public:
    { }

    virtual bool uses_function(const sstring& ks_name, const sstring& function_name) const override {
-        return abstract_restriction::uses_function(_value, ks_name, function_name);
+        return abstract_restriction::term_uses_function(_value, ks_name, function_name);
    }

    virtual sstring to_string() const override {
@@ -304,11 +318,11 @@ public:
    { }

    virtual bool uses_function(const sstring& ks_name, const sstring& function_name) const override  {
-        return abstract_restriction::uses_function(_values, ks_name, function_name);
+        return abstract_restriction::term_uses_function(_values, ks_name, function_name);
    }

    virtual sstring to_string() const override  {
-        return sprint("IN(%s)", ::to_string(_values));
+        return sprint("IN(%s)", std::to_string(_values));
    }

 protected:
@@ -428,8 +442,8 @@ public:
    }

    virtual bool uses_function(const sstring& ks_name, const sstring& function_name) const override {
-        return (_slice.has_bound(statements::bound::START) && abstract_restriction::uses_function(_slice.bound(statements::bound::START), ks_name, function_name))
-                || (_slice.has_bound(statements::bound::END) && abstract_restriction::uses_function(_slice.bound(statements::bound::END), ks_name, function_name));
+        return (_slice.has_bound(statements::bound::START) && abstract_restriction::term_uses_function(_slice.bound(statements::bound::START), ks_name, function_name))
+                || (_slice.has_bound(statements::bound::END) && abstract_restriction::term_uses_function(_slice.bound(statements::bound::END), ks_name, function_name));
    }

    virtual bool is_inclusive(statements::bound b) const override {
--- a/cql3/restrictions/primary_key_restrictions.hh
+++ b/cql3/restrictions/primary_key_restrictions.hh
@@ -67,7 +67,7 @@ template<typename ValueType>
 struct range_type_for;

 template<>
-struct range_type_for<partition_key> : public std::remove_reference<query::partition_range> {};
+struct range_type_for<partition_key> : public std::remove_reference<dht::partition_range> {};
 template<>
 struct range_type_for<clustering_key_prefix> : public std::remove_reference<query::clustering_range> {};

--- a/cql3/restrictions/single_column_primary_key_restrictions.hh
+++ b/cql3/restrictions/single_column_primary_key_restrictions.hh
@@ -46,6 +46,7 @@
 #include "cartesian_product.hh"
 #include "cql3/restrictions/primary_key_restrictions.hh"
 #include "cql3/restrictions/single_column_restrictions.hh"
+#include <boost/algorithm/cxx11/all_of.hpp>
 #include <boost/range/adaptor/transformed.hpp>
 #include <boost/range/adaptor/filtered.hpp>

@@ -96,6 +97,14 @@ public:
        return _in;
    }

+    virtual bool has_bound(statements::bound b) const override {
+        return boost::algorithm::all_of(_restrictions->restrictions(), [b] (auto&& r) { return r.second->has_bound(b); });
+    }
+
+    virtual bool is_inclusive(statements::bound b) const override {
+        return boost::algorithm::all_of(_restrictions->restrictions(), [b] (auto&& r) { return r.second->is_inclusive(b); });
+    }
+
    virtual bool uses_function(const sstring& ks_name, const sstring& function_name) const override {
        return _restrictions->uses_function(ks_name, function_name);
    }
@@ -115,7 +124,7 @@ public:
                if (restriction->is_slice()) {
                    throw exceptions::invalid_request_exception(sprint(
                        "PRIMARY KEY column \"%s\" cannot be restricted (preceding column \"%s\" is restricted by a non-EQ relation)",
-                        _restrictions->next_column(new_column)->name_as_text(), new_column.name_as_text()));
+                        last_column.name_as_text(), new_column.name_as_text()));
                }
            }

@@ -331,12 +340,23 @@ public:
    sstring to_string() const override {
        return sprint("Restrictions(%s)", join(", ", get_column_defs()));
    }
+
+    virtual bool is_satisfied_by(const schema& schema,
+                                 const partition_key& key,
+                                 const clustering_key_prefix& ckey,
+                                 const row& cells,
+                                 const query_options& options,
+                                 gc_clock::time_point now) const override {
+        return boost::algorithm::all_of(
+            _restrictions->restrictions() | boost::adaptors::map_values,
+            [&] (auto&& r) { return r->is_satisfied_by(schema, key, ckey, cells, options, now); });
+    }
 };

 template<>
-std::vector<query::partition_range>
+dht::partition_range_vector
 single_column_primary_key_restrictions<partition_key>::bounds_ranges(const query_options& options) const {
-    std::vector<query::partition_range> ranges;
+    dht::partition_range_vector ranges;
    ranges.reserve(size());
    for (query::range<partition_key>& r : compute_bounds(options)) {
        if (!r.is_singular()) {
--- a/cql3/restrictions/single_column_restriction.hh
+++ b/cql3/restrictions/single_column_restriction.hh
@@ -49,6 +49,8 @@
 #include "schema.hh"
 #include "to_string.hh"
 #include "exceptions/exceptions.hh"
+#include "keys.hh"
+#include "mutation_partition.hh"

 namespace cql3 {

@@ -105,6 +107,13 @@ public:

    class slice;
    class contains;
+
+protected:
+    bytes_view_opt get_value(const schema& schema,
+            const partition_key& key,
+            const clustering_key_prefix& ckey,
+            const row& cells,
+            gc_clock::time_point now) const;
 };

 class single_column_restriction::EQ final : public single_column_restriction {
@@ -117,7 +126,7 @@ public:
    { }

    virtual bool uses_function(const sstring& ks_name, const sstring& function_name) const override {
-        return abstract_restriction::uses_function(_value, ks_name, function_name);
+        return abstract_restriction::term_uses_function(_value, ks_name, function_name);
    }

    virtual bool is_EQ() const override {
@@ -143,6 +152,13 @@ public:
            "%s cannot be restricted by more than one relation if it includes an Equal", _column_def.name_as_text()));
    }

+    virtual bool is_satisfied_by(const schema& schema,
+                                 const partition_key& key,
+                                 const clustering_key_prefix& ckey,
+                                 const row& cells,
+                                 const query_options& options,
+                                 gc_clock::time_point now) const override;
+
 #if 0
        @Override
        protected boolean isSupportedBy(SecondaryIndex index)
@@ -167,6 +183,13 @@ public:
            "%s cannot be restricted by more than one relation if it includes a IN", _column_def.name_as_text()));
    }

+    virtual bool is_satisfied_by(const schema& schema,
+                                 const partition_key& key,
+                                 const clustering_key_prefix& ckey,
+                                 const row& cells,
+                                 const query_options& options,
+                                 gc_clock::time_point now) const override;
+
 #if 0
    @Override
    protected final boolean isSupportedBy(SecondaryIndex index)
@@ -186,7 +209,7 @@ public:
    { }

    virtual bool uses_function(const sstring& ks_name, const sstring& function_name) const override {
-        return abstract_restriction::uses_function(_values, ks_name, function_name);
+        return abstract_restriction::term_uses_function(_values, ks_name, function_name);
    }

    virtual std::vector<bytes_opt> values(const query_options& options) const override {
@@ -198,7 +221,7 @@ public:
    }

    virtual sstring to_string() const override {
-        return sprint("IN(%s)", ::to_string(_values));
+        return sprint("IN(%s)", std::to_string(_values));
    }
 };

@@ -237,8 +260,8 @@ public:
    { }

    virtual bool uses_function(const sstring& ks_name, const sstring& function_name) const override {
-        return (_slice.has_bound(statements::bound::START) && abstract_restriction::uses_function(_slice.bound(statements::bound::START), ks_name, function_name))
-                || (_slice.has_bound(statements::bound::END) && abstract_restriction::uses_function(_slice.bound(statements::bound::END), ks_name, function_name));
+        return (_slice.has_bound(statements::bound::START) && abstract_restriction::term_uses_function(_slice.bound(statements::bound::START), ks_name, function_name))
+                || (_slice.has_bound(statements::bound::END) && abstract_restriction::term_uses_function(_slice.bound(statements::bound::END), ks_name, function_name));
    }

    virtual bool is_slice() const override {
@@ -310,6 +333,13 @@ public:
    virtual sstring to_string() const override {
        return sprint("SLICE%s", _slice);
    }
+
+    virtual bool is_satisfied_by(const schema& schema,
+                                 const partition_key& key,
+                                 const clustering_key_prefix& ckey,
+                                 const row& cells,
+                                 const query_options& options,
+                                 gc_clock::time_point now) const override;
 };

 // This holds CONTAINS, CONTAINS_KEY, and map[key] = value restrictions because we might want to have any combination of them.
@@ -403,15 +433,15 @@ public:
    }

    virtual bool uses_function(const sstring& ks_name, const sstring& function_name) const override {
-        return abstract_restriction::uses_function(_values, ks_name, function_name)
-            || abstract_restriction::uses_function(_keys, ks_name, function_name)
-            || abstract_restriction::uses_function(_entry_keys, ks_name, function_name)
-            || abstract_restriction::uses_function(_entry_values, ks_name, function_name);
+        return abstract_restriction::term_uses_function(_values, ks_name, function_name)
+            || abstract_restriction::term_uses_function(_keys, ks_name, function_name)
+            || abstract_restriction::term_uses_function(_entry_keys, ks_name, function_name)
+            || abstract_restriction::term_uses_function(_entry_values, ks_name, function_name);
    }

    virtual sstring to_string() const override {
        return sprint("CONTAINS(values=%s, keys=%s, entryKeys=%s, entryValues=%s)",
-            ::to_string(_values), ::to_string(_keys), ::to_string(_entry_keys), ::to_string(_entry_values));
+            std::to_string(_values), std::to_string(_keys), std::to_string(_entry_keys), std::to_string(_entry_values));
    }

    virtual bool has_bound(statements::bound b) const override {
@@ -426,6 +456,13 @@ public:
        throw exceptions::unsupported_operation_exception();
    }

+    virtual bool is_satisfied_by(const schema& schema,
+                                 const partition_key& key,
+                                 const clustering_key_prefix& ckey,
+                                 const row& cells,
+                                 const query_options& options,
+                                 gc_clock::time_point now) const override;
+
 #if 0
        private List<ByteBuffer> keys(const query_options& options) {
            return bindAndGet(keys, options);
--- a/cql3/restrictions/single_column_restrictions.hh
+++ b/cql3/restrictions/single_column_restrictions.hh
@@ -75,7 +75,7 @@ private:
     * The _restrictions per column.
     */
 public:
-    using restrictions_map = std::map<const column_definition*, ::shared_ptr<restriction>, column_definition_comparator>;
+    using restrictions_map = std::map<const column_definition*, ::shared_ptr<single_column_restriction>, column_definition_comparator>;
 private:
    restrictions_map _restrictions;
    bool _is_all_eq = true;
--- a/cql3/restrictions/statement_restrictions.cc
+++ b/cql3/restrictions/statement_restrictions.cc
@@ -28,6 +28,11 @@
 #include "single_column_primary_key_restrictions.hh"
 #include "token_restriction.hh"

+#include "cql3/single_column_relation.hh"
+#include "cql3/constants.hh"
+
+#include "stdx.hh"
+
 namespace cql3 {
 namespace restrictions {

@@ -85,6 +90,14 @@ public:
    sstring to_string() const override {
        return "Initial restrictions";
    }
+    virtual bool is_satisfied_by(const schema& schema,
+                                 const partition_key& key,
+                                 const clustering_key_prefix& ckey,
+                                 const row& cells,
+                                 const query_options& options,
+                                 gc_clock::time_point now) const override {
+        return true;
+    }
 };

 template<>
@@ -131,13 +144,22 @@ statement_restrictions::statement_restrictions(schema_ptr schema)
    , _clustering_columns_restrictions(get_initial_key_restrictions<clustering_key_prefix>())
    , _nonprimary_key_restrictions(::make_shared<single_column_restrictions>(schema))
 { }
+#if 0
+static const column_definition*
+to_column_definition(const schema_ptr& schema, const ::shared_ptr<column_identifier::raw>& entity) {
+    return get_column_definition(schema,
+            *entity->prepare_column_identifier(schema));
+}
+#endif

 statement_restrictions::statement_restrictions(database& db,
        schema_ptr schema,
+        statements::statement_type type,
        const std::vector<::shared_ptr<relation>>& where_clause,
        ::shared_ptr<variable_specifications> bound_names,
        bool selects_only_static_columns,
-        bool select_a_collection)
+        bool select_a_collection,
+        bool for_view)
    : statement_restrictions(schema)
 {
    /*
@@ -149,7 +171,31 @@ statement_restrictions::statement_restrictions(database& db,
     */
    if (!where_clause.empty()) {
        for (auto&& relation : where_clause) {
-            add_restriction(relation->to_restriction(db, schema, bound_names));
+            if (relation->get_operator() == cql3::operator_type::IS_NOT) {
+                single_column_relation* r =
+                        dynamic_cast<single_column_relation*>(relation.get());
+                // The "IS NOT NULL" restriction is only supported (and
+                // mandatory) for materialized view creation:
+                if (!r) {
+                    throw exceptions::invalid_request_exception("IS NOT only supports single column");
+                }
+                // currently, the grammar only allows the NULL argument to be
+                // "IS NOT", so this assertion should not be able to fail
+                assert(r->get_value() == cql3::constants::NULL_LITERAL);
+
+                auto col_id = r->get_entity()->prepare_column_identifier(schema);
+                const auto *cd = get_column_definition(schema, *col_id);
+                if (!cd) {
+                    throw exceptions::invalid_request_exception(sprint("restriction '%s' unknown column %s", relation->to_string(), r->get_entity()->to_string()));
+                }
+                _not_null_columns.insert(cd);
+
+                if (!for_view) {
+                    throw exceptions::invalid_request_exception(sprint("restriction '%s' is only supported in materialized view creation", relation->to_string()));
+                }
+            } else {
+                add_restriction(relation->to_restriction(db, schema, bound_names));
+            }
        }
    }

@@ -164,7 +210,7 @@ statement_restrictions::statement_restrictions(database& db,
            || nonprimary_key_restrictions->has_supporting_index(secondaryIndexManager);*/

    // At this point, the select statement if fully constructed, but we still have a few things to validate
-    process_partition_key_restrictions(has_queriable_index);
+    process_partition_key_restrictions(has_queriable_index, for_view);

    // Some but not all of the partition key columns have been specified;
    // hence we need turn these restrictions into index expressions.
@@ -173,11 +219,18 @@ statement_restrictions::statement_restrictions(database& db,
    }

    if (selects_only_static_columns && has_clustering_columns_restriction()) {
-        throw exceptions::invalid_request_exception(
-            "Cannot restrict clustering columns when selecting only static columns");
+        if (type.is_update() || type.is_delete()) {
+            throw exceptions::invalid_request_exception(sprint(
+                "Invalid restrictions on clustering columns since the %s statement modifies only static columns", type));
+        }
+
+        if (type.is_select()) {
+            throw exceptions::invalid_request_exception(
+                "Cannot restrict clustering columns when selecting only static columns");
+        }
    }

-    process_clustering_columns_restrictions(has_queriable_index, select_a_collection);
+    process_clustering_columns_restrictions(has_queriable_index, select_a_collection, for_view);

    // Covers indexes on the first clustering column (among others).
    if (_is_key_range && has_queriable_clustering_column_index)
@@ -219,7 +272,7 @@ statement_restrictions::statement_restrictions(database& db,
        _index_restrictions.push_back(_nonprimary_key_restrictions);
    }

-    if (_uses_secondary_indexing) {
+    if (_uses_secondary_indexing && !for_view) {
        fail(unimplemented::cause::INDEXES);
 #if 0
        validate_secondary_index_selections(selects_only_static_columns);
@@ -254,7 +307,7 @@ bool statement_restrictions::uses_function(const sstring& ks_name, const sstring
            || _nonprimary_key_restrictions->uses_function(ks_name, function_name);
 }

-void statement_restrictions::process_partition_key_restrictions(bool has_queriable_index) {
+void statement_restrictions::process_partition_key_restrictions(bool has_queriable_index, bool for_view) {
    // If there is a queriable index, no special condition are required on the other restrictions.
    // But we still need to know 2 things:
    // - If we don't have a queriable index, is the query ok
@@ -264,7 +317,7 @@ void statement_restrictions::process_partition_key_restrictions(bool has_queriab
    if (_partition_key_restrictions->is_on_token()) {
        _is_key_range = true;
    } else if (has_partition_key_unrestricted_components()) {
-        if (!_partition_key_restrictions->empty()) {
+        if (!_partition_key_restrictions->empty() && !for_view) {
            if (!has_queriable_index) {
                throw exceptions::invalid_request_exception(sprint("Partition key parts: %s must be restricted as other parts are",
                    join(", ", get_partition_key_unrestricted_components())));
@@ -280,7 +333,11 @@ bool statement_restrictions::has_partition_key_unrestricted_components() const {
    return _partition_key_restrictions->size() < _schema->partition_key_size();
 }

-void statement_restrictions::process_clustering_columns_restrictions(bool has_queriable_index, bool select_a_collection) {
+bool statement_restrictions::has_unrestricted_clustering_columns() const {
+    return _clustering_columns_restrictions->size() < _schema->clustering_key_size();
+}
+
+void statement_restrictions::process_clustering_columns_restrictions(bool has_queriable_index, bool select_a_collection, bool for_view) {
    if (!has_clustering_columns_restriction()) {
        return;
    }
@@ -300,7 +357,7 @@ void statement_restrictions::process_clustering_columns_restrictions(bool has_qu
        const column_definition* clustering_column = &(*clustering_columns_iter);
        ++clustering_columns_iter;

-        if (clustering_column != restricted_column) {
+        if (clustering_column != restricted_column && !for_view) {
            if (!has_queriable_index) {
                throw exceptions::invalid_request_exception(sprint(
                    "PRIMARY KEY column \"%s\" cannot be restricted as preceding column \"%s\" is not restricted",
@@ -317,9 +374,9 @@ void statement_restrictions::process_clustering_columns_restrictions(bool has_qu
    }
 }

-std::vector<query::partition_range> statement_restrictions::get_partition_key_ranges(const query_options& options) const {
+dht::partition_range_vector statement_restrictions::get_partition_key_ranges(const query_options& options) const {
    if (_partition_key_restrictions->empty()) {
-        return {query::partition_range::make_open_ended_both_sides()};
+        return {dht::partition_range::make_open_ended_both_sides()};
    }
    return _partition_key_restrictions->bounds_ranges(options);
 }
@@ -357,5 +414,274 @@ void statement_restrictions::validate_secondary_index_selections(bool selects_on
    }
 }

+static bytes_view_opt do_get_value(const schema& schema,
+        const column_definition& cdef,
+        const partition_key& key,
+        const clustering_key_prefix& ckey,
+        const row& cells,
+        gc_clock::time_point now) {
+    switch(cdef.kind) {
+        case column_kind::partition_key:
+            return key.get_component(schema, cdef.component_index());
+        case column_kind::clustering_key:
+            return ckey.get_component(schema, cdef.component_index());
+        default:
+            auto cell = cells.find_cell(cdef.id);
+            if (!cell) {
+                return stdx::nullopt;
+            }
+            assert(cdef.is_atomic());
+            auto c = cell->as_atomic_cell();
+            return c.is_dead(now) ? stdx::nullopt : bytes_view_opt(c.value());
+    }
+}
+
+bytes_view_opt single_column_restriction::get_value(const schema& schema,
+        const partition_key& key,
+        const clustering_key_prefix& ckey,
+        const row& cells,
+        gc_clock::time_point now) const {
+    return do_get_value(schema, _column_def, key, ckey, cells, std::move(now));
+}
+
+bool single_column_restriction::EQ::is_satisfied_by(const schema& schema,
+        const partition_key& key,
+        const clustering_key_prefix& ckey,
+        const row& cells,
+        const query_options& options,
+        gc_clock::time_point now) const {
+    if (_column_def.type->is_counter()) {
+        fail(unimplemented::cause::COUNTERS);
+    }
+    auto operand = value(options);
+    if (operand) {
+        auto cell_value = get_value(schema, key, ckey, cells, now);
+        return cell_value && _column_def.type->compare(*operand, *cell_value) == 0;
+    }
+    return false;
+}
+
+bool single_column_restriction::IN::is_satisfied_by(const schema& schema,
+        const partition_key& key,
+        const clustering_key_prefix& ckey,
+        const row& cells,
+        const query_options& options,
+        gc_clock::time_point now) const {
+    if (_column_def.type->is_counter()) {
+        fail(unimplemented::cause::COUNTERS);
+    }
+    auto cell_value = get_value(schema, key, ckey, cells, now);
+    if (!cell_value) {
+        return false;
+    }
+    auto operands = values(options);
+    return std::any_of(operands.begin(), operands.end(), [&] (auto&& operand) {
+        return operand && _column_def.type->compare(*operand, *cell_value) == 0;
+    });
+}
+
+static query::range<bytes_view> to_range(const term_slice& slice, const query_options& options) {
+    using range_type = query::range<bytes_view>;
+    auto extract_bound = [&] (statements::bound bound) -> stdx::optional<range_type::bound> {
+        if (!slice.has_bound(bound)) {
+            return { };
+        }
+        auto value = slice.bound(bound)->bind_and_get(options);
+        if (!value) {
+            return { };
+        }
+        return { range_type::bound(*value, slice.is_inclusive(bound)) };
+    };
+    return range_type(
+        extract_bound(statements::bound::START),
+        extract_bound(statements::bound::END));
+}
+
+bool single_column_restriction::slice::is_satisfied_by(const schema& schema,
+        const partition_key& key,
+        const clustering_key_prefix& ckey,
+        const row& cells,
+        const query_options& options,
+        gc_clock::time_point now) const {
+    if (_column_def.type->is_counter()) {
+        fail(unimplemented::cause::COUNTERS);
+    }
+    auto cell_value = get_value(schema, key, ckey, cells, now);
+    if (!cell_value) {
+        return false;
+    }
+    return to_range(_slice, options).contains(*cell_value, _column_def.type->as_tri_comparator());
+}
+
+bool single_column_restriction::contains::is_satisfied_by(const schema& schema,
+        const partition_key& key,
+        const clustering_key_prefix& ckey,
+        const row& cells,
+        const query_options& options,
+        gc_clock::time_point now) const {
+    if (_column_def.type->is_counter()) {
+        fail(unimplemented::cause::COUNTERS);
+    }
+    if (!_column_def.type->is_collection()) {
+        return false;
+    }
+
+    auto col_type = static_pointer_cast<const collection_type_impl>(_column_def.type);
+    if ((!_keys.empty() || !_entry_keys.empty()) && !col_type->is_map()) {
+        return false;
+    }
+    assert(_entry_keys.size() == _entry_values.size());
+
+    auto&& map_key_type = col_type->name_comparator();
+    auto&& element_type = col_type->is_set() ? col_type->name_comparator() : col_type->value_comparator();
+    if (_column_def.type->is_multi_cell()) {
+        auto cell = cells.find_cell(_column_def.id);
+        auto&& elements = col_type->deserialize_mutation_form(cell->as_collection_mutation()).cells;
+        auto end = std::remove_if(elements.begin(), elements.end(), [now] (auto&& element) {
+            return element.second.is_dead(now);
+        });
+        for (auto&& value : _values) {
+            auto val = value->bind_and_get(options);
+            if (!val) {
+                continue;
+            }
+            auto found = std::find_if(elements.begin(), end, [&] (auto&& element) {
+                return element_type->compare(element.second.value(), *val) == 0;
+            });
+            if (found == end) {
+                return false;
+            }
+        }
+        for (auto&& key : _keys) {
+            auto k = key->bind_and_get(options);
+            if (!k) {
+                continue;
+            }
+            auto found = std::find_if(elements.begin(), end, [&] (auto&& element) {
+                return map_key_type->compare(element.first, *k) == 0;
+            });
+            if (found == end) {
+                return false;
+            }
+        }
+        for (uint32_t i = 0; i < _entry_keys.size(); ++i) {
+            auto map_key = _entry_keys[i]->bind_and_get(options);
+            auto map_value = _entry_values[i]->bind_and_get(options);
+            if (!map_key || !map_value) {
+                continue;
+            }
+            auto found = std::find_if(elements.begin(), end, [&] (auto&& element) {
+                return map_key_type->compare(element.first, *map_key) == 0;
+            });
+            if (found == end || element_type->compare(found->second.value(), *map_value) != 0) {
+                return false;
+            }
+        }
+    } else {
+        auto cell_value = get_value(schema, key, ckey, cells, now);
+        if (!cell_value) {
+            return false;
+        }
+        auto deserialized = _column_def.type->deserialize(*cell_value);
+        for (auto&& value : _values) {
+            auto val = value->bind_and_get(options);
+            if (!val) {
+                continue;
+            }
+            auto exists_in = [&](auto&& range) {
+                auto found = std::find_if(range.begin(), range.end(), [&] (auto&& element) {
+                    return element_type->compare(element.serialize(), *val) == 0;
+                });
+                return found != range.end();
+            };
+            if (col_type->is_list()) {
+                if (!exists_in(value_cast<list_type_impl::native_type>(deserialized))) {
+                    return false;
+                }
+            } else if (col_type->is_set()) {
+                if (!exists_in(value_cast<set_type_impl::native_type>(deserialized))) {
+                    return false;
+                }
+            } else {
+                auto data_map = value_cast<map_type_impl::native_type>(deserialized);
+                if (!exists_in(data_map | boost::adaptors::transformed([] (auto&& p) { return p.second; }))) {
+                    return false;
+                }
+            }
+        }
+        if (col_type->is_map()) {
+            auto& data_map = value_cast<map_type_impl::native_type>(deserialized);
+            for (auto&& key : _keys) {
+                auto k = key->bind_and_get(options);
+                if (!k) {
+                    continue;
+                }
+                auto found = std::find_if(data_map.begin(), data_map.end(), [&] (auto&& element) {
+                    return map_key_type->compare(element.first.serialize(), *k) == 0;
+                });
+                if (found == data_map.end()) {
+                    return false;
+                }
+            }
+            for (uint32_t i = 0; i < _entry_keys.size(); ++i) {
+                auto map_key = _entry_keys[i]->bind_and_get(options);
+                auto map_value = _entry_values[i]->bind_and_get(options);
+                if (!map_key || !map_value) {
+                    continue;
+                }
+                auto found = std::find_if(data_map.begin(), data_map.end(), [&] (auto&& element) {
+                    return map_key_type->compare(element.first.serialize(), *map_key) == 0;
+                });
+                if (found == data_map.end() || element_type->compare(found->second.serialize(), *map_value) != 0) {
+                    return false;
+                }
+            }
+        }
+    }
+
+    return true;
+}
+
+bool token_restriction::EQ::is_satisfied_by(const schema& schema,
+        const partition_key& key,
+        const clustering_key_prefix& ckey,
+        const row& cells,
+        const query_options& options,
+        gc_clock::time_point now) const {
+    bool satisfied = false;
+    auto cdef = _column_definitions.begin();
+    for (auto&& operand : values(options)) {
+        if (operand) {
+            auto cell_value = do_get_value(schema, **cdef, key, ckey, cells, now);
+            satisfied = cell_value && (*cdef)->type->compare(*operand, *cell_value) == 0;
+        }
+        if (!satisfied) {
+            break;
+        }
+    }
+    return satisfied;
+}
+
+bool token_restriction::slice::is_satisfied_by(const schema& schema,
+        const partition_key& key,
+        const clustering_key_prefix& ckey,
+        const row& cells,
+        const query_options& options,
+        gc_clock::time_point now) const {
+    bool satisfied = false;
+    auto range = to_range(_slice, options);
+    for (auto* cdef : _column_definitions) {
+        auto cell_value = do_get_value(schema, *cdef, key, ckey, cells, now);
+        if (!cell_value) {
+            return false;
+        }
+        satisfied = range.contains(*cell_value, cdef->type->as_tri_comparator());
+        if (!satisfied) {
+            break;
+        }
+    }
+    return satisfied;
+}
+
 }
 }
--- a/cql3/restrictions/statement_restrictions.hh
+++ b/cql3/restrictions/statement_restrictions.hh
@@ -49,6 +49,7 @@
 #include "cql3/restrictions/single_column_restrictions.hh"
 #include "cql3/relation.hh"
 #include "cql3/variable_specifications.hh"
+#include "cql3/statements/statement_type.hh"

 namespace cql3 {

@@ -83,6 +84,8 @@ private:
     */
    ::shared_ptr<single_column_restrictions> _nonprimary_key_restrictions;

+    std::unordered_set<const column_definition*> _not_null_columns;
+
    /**
     * The restrictions used to build the index expressions
     */
@@ -109,10 +112,12 @@ public:

    statement_restrictions(database& db,
        schema_ptr schema,
+        statements::statement_type type,
        const std::vector<::shared_ptr<relation>>& where_clause,
        ::shared_ptr<variable_specifications> bound_names,
        bool selects_only_static_columns,
-        bool select_a_collection);
+        bool select_a_collection,
+        bool for_view = false);
 private:
    void add_restriction(::shared_ptr<restriction> restriction);
    void add_single_column_restriction(::shared_ptr<single_column_restriction> restriction);
@@ -147,8 +152,13 @@ public:
        return _uses_secondary_indexing;
    }

-private:
-    void process_partition_key_restrictions(bool has_queriable_index);
+    ::shared_ptr<primary_key_restrictions<partition_key>> get_partition_key_restrictions() const {
+        return _partition_key_restrictions;
+    }
+
+    ::shared_ptr<primary_key_restrictions<clustering_key_prefix>> get_clustering_columns_restrictions() const {
+        return _clustering_columns_restrictions;
+    }

    /**
     * Checks if the partition key has some unrestricted components.
@@ -156,6 +166,14 @@ private:
     */
    bool has_partition_key_unrestricted_components() const;

+    /**
+     * Checks if the clustering key has some unrestricted components.
+     * @return <code>true</code> if the clustering key has some unrestricted components, <code>false</code> otherwise.
+     */
+    bool has_unrestricted_clustering_columns() const;
+private:
+    void process_partition_key_restrictions(bool has_queriable_index, bool for_view);
+
    /**
     * Returns the partition key components that are not restricted.
     * @return the partition key components that are not restricted.
@@ -169,7 +187,21 @@ private:
     * @param select_a_collection <code>true</code> if the query should return a collection column
     * @throws InvalidRequestException if the request is invalid
     */
-    void process_clustering_columns_restrictions(bool has_queriable_index, bool select_a_collection);
+    void process_clustering_columns_restrictions(bool has_queriable_index, bool select_a_collection, bool for_view);
+
+    /**
+     * Returns the <code>Restrictions</code> for the specified type of columns.
+     *
+     * @param kind the column type
+     * @return the <code>restrictions</code> for the specified type of columns
+     */
+    ::shared_ptr<restrictions> get_restrictions(column_kind kind) const {
+        switch (kind) {
+        case column_kind::partition_key: return _partition_key_restrictions;
+        case column_kind::clustering_key: return _clustering_columns_restrictions;
+        default: return _nonprimary_key_restrictions;
+        }
+    }

 #if 0
    std::vector<::shared_ptr<index_expression>> get_index_expressions(const query_options& options) {
@@ -208,7 +240,7 @@ public:
     * @return the specified bound of the partition key
     * @throws InvalidRequestException if the boundary cannot be retrieved
     */
-    std::vector<query::partition_range> get_partition_key_ranges(const query_options& options) const;
+    dht::partition_range_vector get_partition_key_ranges(const query_options& options) const;

 #if 0
    /**
@@ -346,9 +378,28 @@ public:
     * @return <code>true</code> if the query has some restrictions on the clustering columns,
     * <code>false</code> otherwise.
     */
-    bool has_clustering_columns_restriction() {
+    bool has_clustering_columns_restriction() const {
        return !_clustering_columns_restrictions->empty();
    }
+
+    /**
+     * @return true if column is restricted by some restriction, false otherwise
+     */
+    bool is_restricted(const column_definition* cdef) const {
+        if (_not_null_columns.find(cdef) != _not_null_columns.end()) {
+            return true;
+        }
+
+        auto&& restricted = get_restrictions(cdef->kind).get()->get_column_defs();
+        return std::find(restricted.begin(), restricted.end(), cdef) != restricted.end();
+    }
+
+     /**
+      * @return the non-primary key restrictions.
+      */
+    const single_column_restrictions::restrictions_map& get_non_pk_restriction() const {
+        return _nonprimary_key_restrictions->restrictions();
+    }
 };

 }
--- a/Show More
+++ b/Show More