gossip: Fix tokens assignment in assassinate_endpoint

The tokens vector is defined a few lines above and is needed outsie the if block. Do not redefine it again in the if block, otherwise the tokens will be empty. Found by code inspection. Fixes #3551. Message-Id: <c7a06375c65c950e94236571127f533e5a60cbfd.1530002177.git.asias@scylladb.com> (cherry picked from commit c3b5a2ecd5)
locator::ec2_multi_region_snitch: don't call for ec2_snitch::gossiper_starting()
2018-06-27 12:01:19 +03:00 · 2018-06-12 19:02:48 +03:00 · 2018-05-24 12:02:15 +03:00 · 2018-05-24 11:14:20 +03:00 · 2018-05-24 11:08:13 +03:00 · 2018-05-24 15:24:29 +08:00
1114 changed files with 104716 additions and 21877 deletions
--- a/.github/ISSUE_TEMPLATE.md
+++ b/.github/ISSUE_TEMPLATE.md
@@ -0,0 +1,9 @@
+*Installation details*
+Scylla version (or git commit hash):
+Cluster size:
+OS (RHEL/CentOS/Ubuntu/AWS AMI):
+
+*Hardware details (for performance issues)*          Delete if unneeded
+Platform (physical/VM/cloud instance type/docker):
+Hardware: sockets= cores= hyperthreading= memory=
+Disks: (SSD/HDD, count)
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,6 +1,6 @@
 [submodule "seastar"]
 	path = seastar
-	url = ../seastar
+	url = ../scylla-seastar
 	ignore = dirty
 [submodule "swagger-ui"]
 	path = swagger-ui
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -0,0 +1,140 @@
+##
+## For best results, first compile the project using the Ninja build-system.
+##
+
+cmake_minimum_required(VERSION 3.7)
+project(scylla)
+
+if (NOT DEFINED ENV{CLION_IDE})
+    message(FATAL_ERROR "This CMakeLists.txt file is only valid for use in CLion")
+endif()
+
+# Default value. A more accurate list is populated through `pkg-config` below if `seastar.pc` is available.
+set(SEASTAR_INCLUDE_DIRS "seastar")
+
+# These paths are always available, since they're included in the repository. Additional DPDK headers are placed while
+# Seastar is built, and are captured in `SEASTAR_INCLUDE_DIRS` through parsing the Seastar pkg-config file (below).
+set(SEASTAR_DPDK_INCLUDE_DIRS
+        seastar/dpdk/lib/librte_eal/common/include
+        seastar/dpdk/lib/librte_eal/common/include/generic
+        seastar/dpdk/lib/librte_eal/common/include/x86
+        seastar/dpdk/lib/librte_ether)
+
+find_package(PkgConfig REQUIRED)
+
+set(ENV{PKG_CONFIG_PATH} "${CMAKE_SOURCE_DIR}/seastar/build/release:$ENV{PKG_CONFIG_PATH}")
+pkg_check_modules(SEASTAR seastar)
+
+find_package(Boost COMPONENTS filesystem program_options system thread)
+
+##
+## Populate the names of all source and header files in the indicated paths in a designated variable.
+##
+## When RECURSIVE is specified, directories are traversed recursively.
+##
+## Use: scan_scylla_source_directories(VAR my_result_var [RECURSIVE] PATHS [path1 path2 ...])
+##
+function (scan_scylla_source_directories)
+    set(options RECURSIVE)
+    set(oneValueArgs VAR)
+    set(multiValueArgs PATHS)
+    cmake_parse_arguments(args "${options}" "${oneValueArgs}" "${multiValueArgs}" "${ARGN}")
+
+    set(globs "")
+
+    foreach (dir ${args_PATHS})
+        list(APPEND globs "${dir}/*.cc" "${dir}/*.hh")
+    endforeach()
+
+    if (args_RECURSIVE)
+        set(glob_kind GLOB_RECURSE)
+    else()
+        set(glob_kind GLOB)
+    endif()
+
+    file(${glob_kind} var
+            ${globs})
+
+    set(${args_VAR} ${var} PARENT_SCOPE)
+endfunction()
+
+## Although Seastar is an external project, it is common enough to explore the sources while doing
+## Scylla development that we'll treat the Seastar sources as part of this project for easier navigation.
+scan_scylla_source_directories(
+        VAR SEASTAR_SOURCE_FILES
+        RECURSIVE
+
+        PATHS
+          seastar/core
+          seastar/http
+          seastar/json
+          seastar/net
+          seastar/rpc
+          seastar/tests
+          seastar/util)
+
+scan_scylla_source_directories(
+        VAR SCYLLA_ROOT_SOURCE_FILES
+        PATHS .)
+
+scan_scylla_source_directories(
+        VAR SCYLLA_SUB_SOURCE_FILES
+        RECURSIVE
+
+        PATHS
+          api
+          auth
+          cql3
+          db
+          dht
+          exceptions
+          gms
+          index
+          io
+          locator
+          message
+          repair
+          service
+          sstables
+          streaming
+          tests
+          thrift
+          tracing
+          transport
+          utils)
+
+scan_scylla_source_directories(
+        VAR SCYLLA_GEN_SOURCE_FILES
+        RECURSIVE
+        PATHS build/release/gen)
+
+set(SCYLLA_SOURCE_FILES
+        ${SCYLLA_ROOT_SOURCE_FILES}
+        ${SCYLLA_GEN_SOURCE_FILES}
+        ${SCYLLA_SUB_SOURCE_FILES})
+
+add_executable(scylla
+        ${SEASTAR_SOURCE_FILES}
+        ${SCYLLA_SOURCE_FILES})
+
+# Note that since CLion does not undestand GCC6 concepts, we always disable them (even if users configure otherwise).
+# CLion seems to have trouble with `-U` (macro undefinition), so we do it this way instead.
+list(REMOVE_ITEM SEASTAR_CFLAGS "-DHAVE_GCC6_CONCEPTS")
+
+# If the Seastar pkg-config information is available, append to the default flags.
+#
+# For ease of browsing the source code, we always pretend that DPDK is enabled.
+target_compile_options(scylla PUBLIC
+        -std=gnu++14
+        -DHAVE_DPDK
+        -DHAVE_HWLOC
+        "${SEASTAR_CFLAGS}")
+
+# The order matters here: prefer the "static" DPDK directories to any dynamic paths from pkg-config. Some files are only
+# available dynamically, though.
+target_include_directories(scylla PUBLIC
+        .
+        ${SEASTAR_DPDK_INCLUDE_DIRS}
+        ${SEASTAR_INCLUDE_DIRS}
+        ${Boost_INCLUDE_DIRS}
+        build/release/gen)
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -0,0 +1,11 @@
+# Asking questions or requesting help
+
+Use the [ScyllaDB user mailing list](https://groups.google.com/forum/#!forum/scylladb-users) for general questions and help.
+
+# Reporting an issue
+
+Please use the [Issue Tracker](https://github.com/scylladb/scylla/issues/) to report issues.  Fill in as much information as you can in the issue template, especially for performance problems.
+
+# Contributing Code to Scylla
+
+To contribute code to Scylla, you need to sign the [Contributor License Agreement](http://www.scylladb.com/opensource/cla/) and send your changes as [patches](https://github.com/scylladb/scylla/wiki/Formatting-and-sending-patches) to the [mailing list](https://groups.google.com/forum/#!forum/scylladb-dev). We don't accept pull requests on GitHub.
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
-#Scylla
+# Scylla

-##Building Scylla
+## Building Scylla

 In addition to required packages by Seastar, the following packages are required by Scylla.

@@ -8,14 +8,14 @@ In addition to required packages by Seastar, the following packages are required
 Scylla uses submodules, so make sure you pull the submodules first by doing:
 ```
 git submodule init
-git submodule update --recursive
+git submodule update --init --recursive
 ```

 ### Building and Running Scylla on Fedora
 * Installing required packages:

 ```
-sudo yum install yaml-cpp-devel lz4-devel zlib-devel snappy-devel jsoncpp-devel thrift-devel antlr3-tool antlr3-C++-devel libasan libubsan gcc-c++ gnutls-devel ninja-build ragel libaio-devel cryptopp-devel xfsprogs-devel numactl-devel hwloc-devel libpciaccess-devel libxml2-devel python3-pyparsing
+sudo dnf install yaml-cpp-devel lz4-devel zlib-devel snappy-devel jsoncpp-devel thrift-devel antlr3-tool antlr3-C++-devel libasan libubsan gcc-c++ gnutls-devel ninja-build ragel libaio-devel cryptopp-devel xfsprogs-devel numactl-devel hwloc-devel libpciaccess-devel libxml2-devel python3-pyparsing lksctp-tools-devel protobuf-devel protobuf-compiler systemd-devel libunwind-devel
 ```

 * Build Scylla
@@ -83,14 +83,6 @@ Run the image with:
 docker run -p $(hostname -i):9042:9042 -i -t <image name>
 ```

-
 ## Contributing to Scylla

-Do not send pull requests.
-
-Send patches to the mailing list address scylladb-dev@googlegroups.com.
-Be sure to subscribe.
-
-In order for your patches to be merged, you must sign the Contributor's
-License Agreement, protecting your rights and ours.  See
-http://www.scylladb.com/opensource/cla/.
+[Guidelines for contributing](CONTRIBUTING.md)
--- a/9
+++ b/9
@@ -1,6 +1,6 @@
 #!/bin/sh

-VERSION=666.development
+VERSION=2.0.4

 if test -f version
 then
@@ -10,7 +10,12 @@ else
 	DATE=$(date +%Y%m%d)
 	GIT_COMMIT=$(git log --pretty=format:'%h' -n 1)
 	SCYLLA_VERSION=$VERSION
-	SCYLLA_RELEASE=$DATE.$GIT_COMMIT
+	# For custom package builds, replace "0" with "counter.your_name",
+	# where counter starts at 1 and increments for successive versions.
+	# This ensures that the package manager will select your custom
+	# package over the standard release.
+	SCYLLA_BUILD=0
+	SCYLLA_RELEASE=$SCYLLA_BUILD.$DATE.$GIT_COMMIT
 fi

 echo "$SCYLLA_VERSION-$SCYLLA_RELEASE"
--- a/api/api-doc/cache_service.json
+++ b/api/api-doc/cache_service.json
@@ -397,6 +397,36 @@
        }
      ]
    },
+    {
+      "path": "/cache_service/metrics/key/hits_moving_avrage",
+      "operations": [
+        {
+          "method": "GET",
+          "summary": "Get key hits moving avrage",
+          "type": "#/utils/rate_moving_average",
+          "nickname": "get_key_hits_moving_avrage",
+          "produces": [
+            "application/json"
+          ],
+          "parameters": []
+        }
+      ]
+    },
+    {
+      "path": "/cache_service/metrics/key/requests_moving_avrage",
+      "operations": [
+        {
+          "method": "GET",
+          "summary": "Get key requests moving avrage",
+          "type": "#/utils/rate_moving_average",
+          "nickname": "get_key_requests_moving_avrage",
+          "produces": [
+            "application/json"
+          ],
+          "parameters": []
+        }
+      ]
+    },
    {
      "path": "/cache_service/metrics/key/size",
      "operations": [
@@ -487,6 +517,36 @@
        }
      ]
    },
+    {
+      "path": "/cache_service/metrics/row/hits_moving_avrage",
+      "operations": [
+        {
+          "method": "GET",
+          "summary": "Get row hits moving avrage",
+          "type": "#/utils/rate_moving_average",
+          "nickname": "get_row_hits_moving_avrage",
+          "produces": [
+            "application/json"
+          ],
+          "parameters": []
+        }
+      ]
+    },
+    {
+      "path": "/cache_service/metrics/row/requests_moving_avrage",
+      "operations": [
+        {
+          "method": "GET",
+          "summary": "Get row requests moving avrage",
+          "type": "#/utils/rate_moving_average",
+          "nickname": "get_row_requests_moving_avrage",
+          "produces": [
+            "application/json"
+          ],
+          "parameters": []
+        }
+      ]
+    },
    {
      "path": "/cache_service/metrics/row/size",
      "operations": [
@@ -577,6 +637,36 @@
        }
      ]
    },
+    {
+      "path": "/cache_service/metrics/counter/hits_moving_avrage",
+      "operations": [
+        {
+          "method": "GET",
+          "summary": "Get counter hits moving avrage",
+          "type": "#/utils/rate_moving_average",
+          "nickname": "get_counter_hits_moving_avrage",
+          "produces": [
+            "application/json"
+          ],
+          "parameters": []
+        }
+      ]
+    },
+    {
+      "path": "/cache_service/metrics/counter/requests_moving_avrage",
+      "operations": [
+        {
+          "method": "GET",
+          "summary": "Get counter requests moving avrage",
+          "type": "#/utils/rate_moving_average",
+          "nickname": "get_counter_requests_moving_avrage",
+          "produces": [
+            "application/json"
+          ],
+          "parameters": []
+        }
+      ]
+    },
    {
      "path": "/cache_service/metrics/counter/size",
      "operations": [
--- a/api/api-doc/collectd.json
+++ b/api/api-doc/collectd.json
@@ -55,6 +55,57 @@
                     "paramType":"query"
                  }
               ]
+            },
+            {
+               "method":"POST",
+               "summary":"Start reporting on one or more collectd metric",
+               "type":"void",
+               "nickname":"enable_collectd",
+               "produces":[
+                  "application/json"
+               ],
+               "parameters":[
+                  {
+                     "name":"pluginid",
+                     "description":"The plugin ID, describe the component the metric belongs to. Examples are cache, thrift, etc'. Regex are supported.The plugin ID, describe the component the metric belong to. Examples are: cache, thrift etc'. regex are supported",
+                     "required":true,
+                     "allowMultiple":false,
+                     "type":"string",
+                     "paramType":"path"
+                  },
+                  {
+                     "name":"instance",
+                     "description":"The plugin instance typically #CPU indicating per CPU metric. Regex are supported. Omit for all",
+                     "required":false,
+                     "allowMultiple":false,
+                     "type":"string",
+                     "paramType":"query"
+                  },
+                  {
+                     "name":"type",
+                     "description":"The plugin type, the type of the information. Examples are total_operations, bytes, total_operations, etc'. Regex are supported. Omit for all",
+                     "required":false,
+                     "allowMultiple":false,
+                     "type":"string",
+                     "paramType":"query"
+                  },
+                  {
+                     "name":"type_instance",
+                     "description":"The plugin type instance, the specific metric. Exampls are total_writes, total_size, zones, etc'. Regex are supported, Omit for all",
+                     "required":false,
+                     "allowMultiple":false,
+                     "type":"string",
+                     "paramType":"query"
+                  },
+                  {
+                     "name":"enable",
+                     "description":"set to true to enable all, anything else or omit to disable",
+                     "required":false,
+                     "allowMultiple":false,
+                     "type":"boolean",
+                     "paramType":"query"
+                  }
+               ]
            }
         ]
      },
@@ -63,10 +114,10 @@
         "operations":[
            {
               "method":"GET",
-               "summary":"Get a collectd value",
+               "summary":"Get a list of all collectd metrics and their status",
               "type":"array",
               "items":{
-                  "type":"type_instance_id"
+                  "type":"collectd_metric_status"
               },
               "nickname":"get_collectd_items",
               "produces":[
@@ -74,6 +125,25 @@
               ],
               "parameters":[
               ]
+            },
+            {
+               "method":"POST",
+               "summary":"Enable or disable all collectd metrics",
+               "type":"void",
+               "nickname":"enable_all_collectd",
+               "produces":[
+                  "application/json"
+               ],
+               "parameters":[
+                  {
+                     "name":"enable",
+                     "description":"set to true to enable all, anything else or omit to disable",
+                     "required":false,
+                     "allowMultiple":false,
+                     "type":"boolean",
+                     "paramType":"query"
+                  }
+               ]
            }
         ]
      }
@@ -113,6 +183,20 @@
               }
            }
         }
+      },
+      "collectd_metric_status":{
+         "id":"collectd_metric_status",
+         "description":"Holds a collectd id and an enable flag",
+         "properties":{
+            "id":{
+               "description":"The metric ID",
+               "type":"type_instance_id"
+            },
+            "enable":{
+               "description":"Is the metric enabled",
+               "type":"boolean"
+            }
+         }
      }
   }
 }
--- a/api/api-doc/column_family.json
+++ b/api/api-doc/column_family.json
@@ -78,11 +78,19 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
                     "paramType":"path"
+                  },
+                  {
+                     "name":"split_output",
+                     "description":"true if the output of the major compaction should be split in several sstables",
+                     "required":false,
+                     "allowMultiple":false,
+                     "type":"bool",
+                     "paramType":"query"
                  }
               ]
            }
@@ -102,7 +110,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -129,7 +137,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -153,7 +161,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -180,7 +188,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -204,7 +212,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -244,7 +252,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -271,7 +279,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -298,7 +306,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -317,7 +325,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -349,7 +357,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -381,7 +389,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -405,7 +413,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -432,7 +440,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -459,7 +467,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -491,7 +499,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -518,7 +526,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -545,7 +553,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -569,7 +577,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -593,7 +601,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -633,7 +641,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -673,7 +681,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -713,7 +721,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -753,7 +761,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -793,7 +801,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -833,7 +841,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -873,7 +881,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -916,7 +924,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -943,7 +951,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -970,7 +978,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -994,7 +1002,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -1034,7 +1042,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -1058,7 +1066,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -1094,14 +1102,14 @@
               "method":"GET",
               "summary":"Get read latency histogram",
               "$ref": "#/utils/histogram",
-               "nickname":"get_read_latency_histogram",
+               "nickname":"get_read_latency_histogram_depricated",
               "produces":[
                  "application/json"
               ],
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -1121,6 +1129,49 @@
               "items":{
                  "$ref": "#/utils/histogram"
               },
+               "nickname":"get_all_read_latency_histogram_depricated",
+               "produces":[
+                  "application/json"
+               ],
+               "parameters":[
+               ]
+            }
+         ]
+      },
+      {
+         "path":"/column_family/metrics/read_latency/moving_average_histogram/{name}",
+         "operations":[
+            {
+               "method":"GET",
+               "summary":"Get read latency moving avrage histogram",
+               "$ref": "#/utils/rate_moving_average_and_histogram",
+               "nickname":"get_read_latency_histogram",
+               "produces":[
+                  "application/json"
+               ],
+               "parameters":[
+                  {
+                     "name":"name",
+                     "description":"The column family name in keyspace:name format",
+                     "required":true,
+                     "allowMultiple":false,
+                     "type":"string",
+                     "paramType":"path"
+                  }
+               ]
+            }
+         ]
+      },
+      {
+         "path":"/column_family/metrics/read_latency/moving_average_histogram/",
+         "operations":[
+            {
+               "method":"GET",
+               "summary":"Get read latency moving avrage histogram from all column family",
+               "type":"array",
+               "items":{
+                  "$ref": "#/utils/rate_moving_average_and_histogram"
+               },
               "nickname":"get_all_read_latency_histogram",
               "produces":[
                  "application/json"
@@ -1160,7 +1211,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -1200,7 +1251,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -1224,7 +1275,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -1260,14 +1311,14 @@
               "method":"GET",
               "summary":"Get write latency histogram",
               "$ref": "#/utils/histogram",
-               "nickname":"get_write_latency_histogram",
+               "nickname":"get_write_latency_histogram_depricated",
               "produces":[
                  "application/json"
               ],
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -1287,6 +1338,49 @@
               "items":{
                  "$ref": "#/utils/histogram"
               },
+               "nickname":"get_all_write_latency_histogram_depricated",
+               "produces":[
+                  "application/json"
+               ],
+               "parameters":[
+               ]
+            }
+         ]
+      },
+      {
+         "path":"/column_family/metrics/write_latency/moving_average_histogram/{name}",
+         "operations":[
+            {
+               "method":"GET",
+               "summary":"Get write latency moving average histogram",
+               "$ref": "#/utils/rate_moving_average_and_histogram",
+               "nickname":"get_write_latency_histogram",
+               "produces":[
+                  "application/json"
+               ],
+               "parameters":[
+                  {
+                     "name":"name",
+                     "description":"The column family name in keyspace:name format",
+                     "required":true,
+                     "allowMultiple":false,
+                     "type":"string",
+                     "paramType":"path"
+                  }
+               ]
+            }
+         ]
+      },
+      {
+         "path":"/column_family/metrics/write_latency/moving_average_histogram/",
+         "operations":[
+            {
+               "method":"GET",
+               "summary":"Get write latency moving average histogram of all column family",
+               "type":"array",
+               "items":{
+                  "$ref": "#/utils/rate_moving_average_and_histogram"
+               },
               "nickname":"get_all_write_latency_histogram",
               "produces":[
                  "application/json"
@@ -1326,7 +1420,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -1366,7 +1460,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -1406,7 +1500,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -1446,7 +1540,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -1486,7 +1580,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -1526,7 +1620,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -1566,7 +1660,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -1606,7 +1700,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -1646,7 +1740,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -1686,7 +1780,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -1726,7 +1820,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -1766,7 +1860,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -1806,7 +1900,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -1846,7 +1940,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -1886,7 +1980,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -1926,7 +2020,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -1966,7 +2060,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -2006,7 +2100,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -2030,7 +2124,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -2070,7 +2164,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -2110,7 +2204,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -2150,7 +2244,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -2190,7 +2284,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -2214,7 +2308,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -2238,7 +2332,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -2265,7 +2359,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -2292,7 +2386,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -2319,7 +2413,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -2346,7 +2440,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -2415,7 +2509,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -2439,7 +2533,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -2463,7 +2557,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -2487,7 +2581,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -2511,7 +2605,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -2535,7 +2629,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -2559,7 +2653,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -2583,7 +2677,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -2607,7 +2701,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -2631,7 +2725,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -2655,7 +2749,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -2679,7 +2773,7 @@
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
+                     "description":"The column family name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
--- a/api/api-doc/endpoint_snitch_info.json
+++ b/api/api-doc/endpoint_snitch_info.json
@@ -21,8 +21,8 @@
               "parameters":[
                  {
                     "name":"host",
-                     "description":"The host name",
-                     "required":true,
+                     "description":"The host name. If absent, the local server broadcast/listen address is used",
+                     "required":false,
                     "allowMultiple":false,
                     "type":"string",
                     "paramType":"query"
@@ -45,8 +45,8 @@
               "parameters":[
                  {
                     "name":"host",
-                     "description":"The host name",
-                     "required":true,
+                     "description":"The host name. If absent, the local server broadcast/listen address is used",
+                     "required":false,
                     "allowMultiple":false,
                     "type":"string",
                     "paramType":"query"
--- a/api/api-doc/failure_detector.json
+++ b/api/api-doc/failure_detector.json
@@ -42,6 +42,25 @@
            }
         ]
      },
+      {
+         "path":"/failure_detector/endpoint_phi_values",
+         "operations":[
+            {
+               "method":"GET",
+               "summary":"Get end point phi values",
+               "type":"array",
+               "items":{
+                  "type":"endpoint_phi_values"
+               },
+               "nickname":"get_endpoint_phi_values",
+               "produces":[
+                  "application/json"
+               ],
+               "parameters":[
+               ]
+            }
+         ]
+      },
      {
         "path":"/failure_detector/endpoints/",
         "operations":[
@@ -202,6 +221,20 @@
                    "description": "The application state version"
                }
            }
+        },
+        "endpoint_phi_value": {
+            "id" : "endpoint_phi_value",
+            "description": "Holds phi value for a single end point",
+            "properties": {
+                "phi": {
+                    "type": "double",
+                    "description": "Phi value"
+                },
+                "endpoint": {
+                    "type": "string",
+                    "description": "end point address"
+                }
+            }
        }
    }
 }
--- a/api/api-doc/storage_proxy.json
+++ b/api/api-doc/storage_proxy.json
@@ -716,6 +716,36 @@
        }
      ]
    },
+    {
+      "path": "/storage_proxy/metrics/read/timeouts_rates",
+      "operations": [
+        {
+          "method": "GET",
+          "summary": "Get read metrics rates",
+          "type": "#/utils/rate_moving_average",
+          "nickname": "get_read_metrics_timeouts_rates",
+          "produces": [
+            "application/json"
+          ],
+          "parameters": []
+        }
+      ]
+    },
+    {
+      "path": "/storage_proxy/metrics/read/unavailables_rates",
+      "operations": [
+        {
+          "method": "GET",
+          "summary": "Get read metrics rates",
+          "type": "#/utils/rate_moving_average",
+          "nickname": "get_read_metrics_unavailables_rates",
+          "produces": [
+            "application/json"
+          ],
+          "parameters": []
+        }
+      ]
+    },
    {
      "path": "/storage_proxy/metrics/read/histogram",
      "operations": [
@@ -723,7 +753,7 @@
          "method": "GET",
          "summary": "Get read metrics",
          "$ref": "#/utils/histogram",
-          "nickname": "get_read_metrics_latency_histogram",
+          "nickname": "get_read_metrics_latency_histogram_depricated",
          "produces": [
            "application/json"
          ],
@@ -738,6 +768,36 @@
          "method": "GET",
          "summary": "Get range metrics",
          "$ref": "#/utils/histogram",
+          "nickname": "get_range_metrics_latency_histogram_depricated",
+          "produces": [
+            "application/json"
+          ],
+          "parameters": []
+        }
+      ]
+    },
+    {
+      "path": "/storage_proxy/metrics/read/moving_average_histogram",
+      "operations": [
+        {
+          "method": "GET",
+          "summary": "Get read metrics",
+          "$ref": "#/utils/rate_moving_average_and_histogram",
+          "nickname": "get_read_metrics_latency_histogram",
+          "produces": [
+            "application/json"
+          ],
+          "parameters": []
+        }
+      ]
+    },
+    {
+      "path": "/storage_proxy/metrics/range/moving_average_histogram",
+      "operations": [
+        {
+          "method": "GET",
+          "summary": "Get range metrics rate and histogram",
+          "$ref": "#/utils/rate_moving_average_and_histogram",
          "nickname": "get_range_metrics_latency_histogram",
          "produces": [
            "application/json"
@@ -776,6 +836,36 @@
        }
      ]
    },
+    {
+      "path": "/storage_proxy/metrics/range/timeouts_rates",
+      "operations": [
+        {
+          "method": "GET",
+          "summary": "Get range metrics rates",
+          "type": "#/utils/rate_moving_average",
+          "nickname": "get_range_metrics_timeouts_rates",
+          "produces": [
+            "application/json"
+          ],
+          "parameters": []
+        }
+      ]
+    },
+    {
+      "path": "/storage_proxy/metrics/range/unavailables_rates",
+      "operations": [
+        {
+          "method": "GET",
+          "summary": "Get range metrics rates",
+          "type": "#/utils/rate_moving_average",
+          "nickname": "get_range_metrics_unavailables_rates",
+          "produces": [
+            "application/json"
+          ],
+          "parameters": []
+        }
+      ]
+    },
    {
      "path": "/storage_proxy/metrics/write/timeouts",
      "operations": [
@@ -806,6 +896,36 @@
        }
      ]
    },
+    {
+      "path": "/storage_proxy/metrics/write/timeouts_rates",
+      "operations": [
+        {
+          "method": "GET",
+          "summary": "Get write metrics rates",
+          "type": "#/utils/rate_moving_average",
+          "nickname": "get_write_metrics_timeouts_rates",
+          "produces": [
+            "application/json"
+          ],
+          "parameters": []
+        }
+      ]
+    },
+    {
+      "path": "/storage_proxy/metrics/write/unavailables_rates",
+      "operations": [
+        {
+          "method": "GET",
+          "summary": "Get write metrics rates",
+          "type": "#/utils/rate_moving_average",
+          "nickname": "get_write_metrics_unavailables_rates",
+          "produces": [
+            "application/json"
+          ],
+          "parameters": []
+        }
+      ]
+    },
    {
      "path": "/storage_proxy/metrics/write/histogram",
      "operations": [
@@ -813,6 +933,21 @@
          "method": "GET",
          "summary": "Get write metrics",
          "$ref": "#/utils/histogram",
+          "nickname": "get_write_metrics_latency_histogram_depricated",
+          "produces": [
+            "application/json"
+          ],
+          "parameters": []
+        }
+      ]
+    },
+    {
+      "path": "/storage_proxy/metrics/write/moving_average_histogram",
+      "operations": [
+        {
+          "method": "GET",
+          "summary": "Get write metrics",
+          "$ref": "#/utils/rate_moving_average_and_histogram",
          "nickname": "get_write_metrics_latency_histogram",
          "produces": [
            "application/json"
--- a/api/api-doc/storage_service.json
+++ b/api/api-doc/storage_service.json
@@ -177,6 +177,22 @@
            }
         ]
      },
+      {
+         "path":"/storage_service/scylla_release_version",
+         "operations":[
+            {
+               "method":"GET",
+               "summary":"Fetch a string representation of the Scylla version.",
+               "type":"string",
+               "nickname":"get_scylla_release_version",
+               "produces":[
+                  "application/json"
+               ],
+               "parameters":[
+               ]
+            }
+         ]
+      },
      {
         "path":"/storage_service/schema_version",
         "operations":[
@@ -1185,11 +1201,12 @@
               ],
               "parameters":[
                  {
-                     "name":"non_system",
-                     "description":"When set to true limit to non system",
+                     "name":"type",
+                     "description":"Which keyspaces to return",
                     "required":false,
                     "allowMultiple":false,
-                     "type":"boolean",
+                     "type":"string",
+                     "enum": [ "all", "user", "non_local_strategy" ],
                     "paramType":"query"
                  }
               ]
@@ -1720,6 +1737,57 @@
            }
         ]
      },
+      {
+         "path":"/storage_service/slow_query",
+         "operations":[
+            {
+               "method":"POST",
+               "summary":"Set slow query parameter",
+               "type":"void",
+               "nickname":"set_slow_query",
+               "produces":[
+                  "application/json"
+               ],
+               "parameters":[
+                  {
+                     "name":"enable",
+                     "description":"set it to true to enable, anything else to disable",
+                     "required":false,
+                     "allowMultiple":false,
+                     "type":"boolean",
+                     "paramType":"query"
+                  },
+                  {
+                     "name":"ttl",
+                     "description":"TTL in seconds",
+                     "required":false,
+                     "allowMultiple":false,
+                     "type":"long",
+                     "paramType":"query"
+                  },
+                  {
+                     "name":"threshold",
+                     "description":"Slow query record threshold in microseconds",
+                     "required":false,
+                     "allowMultiple":false,
+                     "type":"long",
+                     "paramType":"query"
+                  }
+               ]
+            },
+            {
+               "method":"GET",
+               "summary":"Returns the slow query record configuration.",
+               "type":"slow_query_info",
+               "nickname":"get_slow_query_info",
+               "produces":[
+                  "application/json"
+               ],
+               "parameters":[
+               ]
+            }
+         ]
+      },
      {
         "path":"/storage_service/auto_compaction/{keyspace}",
         "operations":[
@@ -2117,6 +2185,24 @@
            }
         }
      },
+      "slow_query_info": {
+         "id":"slow_query_info",
+         "description":"Slow query triggering information",
+         "properties":{
+            "enable":{
+               "type":"boolean",
+               "description":"Is slow query logging enable or disable"
+            },
+            "ttl":{
+               "type":"long",
+               "description":"The slow query TTL in seconds"
+            },
+            "threshold":{
+               "type":"long",
+               "description":"The slow query logging threshold in microseconds. Queries that takes longer, will be logged"
+            }
+         }
+      },
      "endpoint_detail":{
         "id":"endpoint_detail",
         "description":"Endpoint detail",
--- a/api/api-doc/utils.json
+++ b/api/api-doc/utils.json
@@ -65,6 +65,41 @@
               "description":"The series of values to which the counts in `buckets` correspond"
            }
         }
-      }
-   }
+      },
+    "rate_moving_average": {
+         "id":"rate_moving_average",
+         "description":"A meter metric which measures mean throughput and one, five, and fifteen-minute exponentially-weighted moving average throughputs",
+         "properties":{
+             "rates": {
+               "type":"array",
+               "items":{
+                  "type":"double"
+               },
+               "description":"One, five and fifteen mintues rates"
+            },
+            "mean_rate": {
+               "type":"double",
+               "description":"The mean rate from startup"
+            },
+            "count": {
+               "type":"long",
+               "description":"Total number of events from startup"
+            }
+         }
+    },
+    "rate_moving_average_and_histogram": {
+         "id":"rate_moving_average_and_histogram",
+         "description":"A timer metric which aggregates timing durations and provides duration statistics, plus throughput statistics",
+         "properties":{
+            "meter": {
+               "type":"rate_moving_average",
+               "description":"The metric rate moving average"
+            },
+            "hist": {
+               "type":"histogram",
+               "description":"The metric histogram"
+            }
+         }
+    }
+  }
 }
--- a/api/api.cc
+++ b/api/api.cc
@@ -61,10 +61,10 @@ future<> set_server_init(http_context& ctx) {
                new content_replace("html")));
        r.add(GET, url("/ui").remainder("path"), new httpd::directory_handler(ctx.api_dir,
                new content_replace("html")));
+        rb->set_api_doc(r);
        rb->register_function(r, "system",
                "The system related API");
        set_system(ctx, r);
-        rb->set_api_doc(r);
    });
 }

@@ -83,6 +83,10 @@ future<> set_server_storage_service(http_context& ctx) {
    return register_api(ctx, "storage_service", "The storage service API", set_storage_service);
 }

+future<> set_server_snitch(http_context& ctx) {
+    return register_api(ctx, "endpoint_snitch_info", "The endpoint snitch info API", set_endpoint_snitch);
+}
+
 future<> set_server_gossip(http_context& ctx) {
    return register_api(ctx, "gossiper",
                "The gossiper API", set_gossiper);
@@ -118,10 +122,6 @@ future<> set_server_gossip_settle(http_context& ctx) {
        rb->register_function(r, "cache_service",
                "The cache service API");
        set_cache_service(ctx,r);
-
-        rb->register_function(r, "endpoint_snitch_info",
-                "The endpoint snitch info API");
-        set_endpoint_snitch(ctx, r);
    });
 }

--- a/api/api.hh
+++ b/api/api.hh
@@ -29,6 +29,7 @@
 #include "utils/histogram.hh"
 #include "http/exception.hh"
 #include "api_init.hh"
+#include "seastarx.hh"

 namespace api {

@@ -110,61 +111,49 @@ future<json::json_return_type>  sum_stats(distributed<T>& d, V F::*f) {
    });
 }

-inline double pow2(double a) {
-    return a * a;
-}

-// FIXME: Move to utils::ihistogram::operator+=()
-inline utils::ihistogram add_histogram(utils::ihistogram res,
-        const utils::ihistogram& val) {
-    if (res.count == 0) {
-        return val;
-    }
-    if (val.count == 0) {
-        return std::move(res);
-    }
-    if (res.min > val.min) {
-        res.min = val.min;
-    }
-    if (res.max < val.max) {
-        res.max = val.max;
-    }
-    double ncount = res.count + val.count;
-    // To get an estimated sum we take the estimated mean
-    // and multiply it by the true count
-    res.sum = res.sum + val.mean * val.count;
-    double a = res.count/ncount;
-    double b = val.count/ncount;
-
-    double mean =  a * res.mean + b * val.mean;
-
-    res.variance = (res.variance + pow2(res.mean - mean) )* a +
-            (val.variance + pow2(val.mean -mean))* b;
-
-    res.mean = mean;
-    res.count = res.count + val.count;
-    for (auto i : val.sample) {
-        res.sample.push_back(i);
-    }
-    return res;
-}

 inline
 httpd::utils_json::histogram to_json(const utils::ihistogram& val) {
    httpd::utils_json::histogram h;
    h = val;
+    h.sum = val.estimated_sum();
+    return h;
+}
+
+inline
+httpd::utils_json::rate_moving_average meter_to_json(const utils::rate_moving_average& val) {
+    httpd::utils_json::rate_moving_average m;
+    m = val;
+    return m;
+}
+
+inline
+httpd::utils_json::rate_moving_average_and_histogram timer_to_json(const utils::rate_moving_average_and_histogram& val) {
+    httpd::utils_json::rate_moving_average_and_histogram h;
+    h.hist = to_json(val.hist);
+    h.meter = meter_to_json(val.rate);
    return h;
 }

 template<class T, class F>
-future<json::json_return_type>  sum_histogram_stats(distributed<T>& d, utils::ihistogram F::*f) {
+future<json::json_return_type>  sum_histogram_stats(distributed<T>& d, utils::timed_rate_moving_average_and_histogram F::*f) {

-    return d.map_reduce0([f](const T& p) {return p.get_stats().*f;}, utils::ihistogram(),
-            add_histogram).then([](const utils::ihistogram& val) {
+    return d.map_reduce0([f](const T& p) {return (p.get_stats().*f).hist;}, utils::ihistogram(),
+            std::plus<utils::ihistogram>()).then([](const utils::ihistogram& val) {
        return make_ready_future<json::json_return_type>(to_json(val));
    });
 }

+template<class T, class F>
+future<json::json_return_type>  sum_timer_stats(distributed<T>& d, utils::timed_rate_moving_average_and_histogram F::*f) {
+
+    return d.map_reduce0([f](const T& p) {return (p.get_stats().*f).rate();}, utils::rate_moving_average_and_histogram(),
+            std::plus<utils::rate_moving_average_and_histogram>()).then([](const utils::rate_moving_average_and_histogram& val) {
+        return make_ready_future<json::json_return_type>(timer_to_json(val));
+    });
+}
+
 inline int64_t min_int64(int64_t a, int64_t b) {
    return std::min(a,b);
 }
@@ -178,33 +167,36 @@ inline int64_t max_int64(int64_t a, int64_t b) {
 * It combine total and the sub set for the ratio and its
 * to_json method return the ration sub/total
 */
-struct ratio_holder : public json::jsonable {
-    double total = 0;
-    double sub = 0;
+template<typename T>
+struct basic_ratio_holder : public json::jsonable {
+    T total = 0;
+    T sub = 0;
    virtual std::string to_json() const {
        if (total == 0) {
            return "0";
        }
        return std::to_string(sub/total);
    }
-    ratio_holder() = default;
-    ratio_holder& add(double _total, double _sub) {
+    basic_ratio_holder() = default;
+    basic_ratio_holder& add(T _total, T _sub) {
        total += _total;
        sub += _sub;
        return *this;
    }
-    ratio_holder(double _total, double _sub) {
+    basic_ratio_holder(T _total, T _sub) {
        total = _total;
        sub = _sub;
    }
-    ratio_holder& operator+=(const ratio_holder& a) {
+    basic_ratio_holder<T>& operator+=(const basic_ratio_holder<T>& a) {
        return add(a.total, a.sub);
    }
-    friend ratio_holder operator+(ratio_holder a, const ratio_holder& b) {
+    friend basic_ratio_holder<T> operator+(basic_ratio_holder a, const basic_ratio_holder<T>& b) {
        return a += b;
    }
 };

+typedef basic_ratio_holder<double>  ratio_holder;
+typedef basic_ratio_holder<int64_t> integral_ratio_holder;

 class unimplemented_exception : public base_exception {
 public:
--- a/api/api_init.hh
+++ b/api/api_init.hh
@@ -38,6 +38,7 @@ struct http_context {
 };

 future<> set_server_init(http_context& ctx);
+future<> set_server_snitch(http_context& ctx);
 future<> set_server_storage_service(http_context& ctx);
 future<> set_server_gossip(http_context& ctx);
 future<> set_server_load_sstable(http_context& ctx);
--- a/api/cache_service.cc
+++ b/api/cache_service.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright 2015 Cloudius Systems
+ * Copyright (C) 2015 ScyllaDB
 */

 /*
@@ -177,6 +177,20 @@ void set_cache_service(http_context& ctx, routes& r) {
        return make_ready_future<json::json_return_type>(0);
    });

+    cs::get_key_hits_moving_avrage.set(r, [&ctx] (std::unique_ptr<request> req) {
+        // TBD
+        // FIXME
+        // See above
+        return make_ready_future<json::json_return_type>(meter_to_json(utils::rate_moving_average()));
+    });
+
+    cs::get_key_requests_moving_avrage.set(r, [&ctx] (std::unique_ptr<request> req) {
+        // TBD
+        // FIXME
+        // See above
+        return make_ready_future<json::json_return_type>(meter_to_json(utils::rate_moving_average()));
+    });
+
    cs::get_key_size.set(r, [] (std::unique_ptr<request> req) {
        // TBD
        // FIXME
@@ -194,41 +208,57 @@ void set_cache_service(http_context& ctx, routes& r) {
    });

    cs::get_row_capacity.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return map_reduce_cf(ctx, 0, [](const column_family& cf) {
+        return map_reduce_cf(ctx, uint64_t(0), [](const column_family& cf) {
            return cf.get_row_cache().get_cache_tracker().region().occupancy().used_space();
        }, std::plus<uint64_t>());
    });

    cs::get_row_hits.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return map_reduce_cf(ctx, 0, [](const column_family& cf) {
-            return cf.get_row_cache().stats().hits;
-        }, std::plus<int64_t>());
+        return map_reduce_cf(ctx, uint64_t(0), [](const column_family& cf) {
+            return cf.get_row_cache().stats().hits.count();
+        }, std::plus<uint64_t>());
    });

    cs::get_row_requests.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return map_reduce_cf(ctx, 0, [](const column_family& cf) {
-            return cf.get_row_cache().stats().hits + cf.get_row_cache().stats().misses;
-        }, std::plus<int64_t>());
+        return map_reduce_cf(ctx, uint64_t(0), [](const column_family& cf) {
+            return cf.get_row_cache().stats().hits.count() + cf.get_row_cache().stats().misses.count();
+        }, std::plus<uint64_t>());
    });

    cs::get_row_hit_rate.set(r, [&ctx] (std::unique_ptr<request> req) {
        return map_reduce_cf(ctx, ratio_holder(), [](const column_family& cf) {
-            return ratio_holder(cf.get_row_cache().stats().hits + cf.get_row_cache().stats().misses,
-                    cf.get_row_cache().stats().hits);
+            return ratio_holder(cf.get_row_cache().stats().hits.count() + cf.get_row_cache().stats().misses.count(),
+                    cf.get_row_cache().stats().hits.count());
        }, std::plus<ratio_holder>());
    });

+    cs::get_row_hits_moving_avrage.set(r, [&ctx] (std::unique_ptr<request> req) {
+        return map_reduce_cf_raw(ctx, utils::rate_moving_average(), [](const column_family& cf) {
+            return cf.get_row_cache().stats().hits.rate();
+        }, std::plus<utils::rate_moving_average>()).then([](const utils::rate_moving_average& m) {
+            return make_ready_future<json::json_return_type>(meter_to_json(m));
+        });
+    });
+
+    cs::get_row_requests_moving_avrage.set(r, [&ctx] (std::unique_ptr<request> req) {
+        return map_reduce_cf_raw(ctx, utils::rate_moving_average(), [](const column_family& cf) {
+            return cf.get_row_cache().stats().hits.rate() + cf.get_row_cache().stats().misses.rate();
+        }, std::plus<utils::rate_moving_average>()).then([](const utils::rate_moving_average& m) {
+            return make_ready_future<json::json_return_type>(meter_to_json(m));
+        });
+    });
+
    cs::get_row_size.set(r, [&ctx] (std::unique_ptr<request> req) {
        // In origin row size is the weighted size.
        // We currently do not support weights, so we use num entries instead
        return map_reduce_cf(ctx, 0, [](const column_family& cf) {
-            return cf.get_row_cache().num_entries();
+            return cf.get_row_cache().partitions();
        }, std::plus<uint64_t>());
    });

    cs::get_row_entries.set(r, [&ctx] (std::unique_ptr<request> req) {
        return map_reduce_cf(ctx, 0, [](const column_family& cf) {
-            return cf.get_row_cache().num_entries();
+            return cf.get_row_cache().partitions();
        }, std::plus<uint64_t>());
    });

@@ -264,6 +294,20 @@ void set_cache_service(http_context& ctx, routes& r) {
        return make_ready_future<json::json_return_type>(0);
    });

+    cs::get_counter_hits_moving_avrage.set(r, [&ctx] (std::unique_ptr<request> req) {
+        // TBD
+        // FIXME
+        // See above
+        return make_ready_future<json::json_return_type>(meter_to_json(utils::rate_moving_average()));
+    });
+
+    cs::get_counter_requests_moving_avrage.set(r, [&ctx] (std::unique_ptr<request> req) {
+        // TBD
+        // FIXME
+        // See above
+        return make_ready_future<json::json_return_type>(meter_to_json(utils::rate_moving_average()));
+    });
+
    cs::get_counter_size.set(r, [] (std::unique_ptr<request> req) {
        // TBD
        // FIXME
--- a/api/cache_service.hh
+++ b/api/cache_service.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright 2015 Cloudius Systems
+ * Copyright (C) 2015 ScyllaDB
 */

 /*
--- a/api/collectd.cc
+++ b/api/collectd.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright 2015 Cloudius Systems
+ * Copyright (C) 2015 ScyllaDB
 */

 /*
@@ -25,10 +25,14 @@
 #include "core/scollectd_api.hh"
 #include "endian.h"
 #include <boost/range/irange.hpp>
+#include <regex>

 namespace api {

 using namespace scollectd;
+using namespace httpd;
+
+using namespace json;
 namespace cd = httpd::collectd_json;

 static auto transformer(const std::vector<collectd_value>& values) {
@@ -36,19 +40,27 @@ static auto transformer(const std::vector<collectd_value>& values) {
    for (auto v: values) {
        switch (v._type) {
        case scollectd::data_type::GAUGE:
-            collected_value.values.push(v.u._d);
+            collected_value.values.push(v.d());
            break;
        case scollectd::data_type::DERIVE:
-            collected_value.values.push(v.u._i);
+            collected_value.values.push(v.i());
            break;
        default:
-            collected_value.values.push(v.u._ui);
+            collected_value.values.push(v.ui());
            break;
        }
    }
    return collected_value;
 }

+
+static const char* str_to_regex(const sstring& v) {
+    if (v != "") {
+        return v.c_str();
+    }
+    return ".*";
+}
+
 void set_collectd(http_context& ctx, routes& r) {
    cd::get_collectd.set(r, [&ctx](std::unique_ptr<request> req) {

@@ -72,7 +84,7 @@ void set_collectd(http_context& ctx, routes& r) {
    });

    cd::get_collectd_items.set(r, [](const_req req) {
-        std::vector<cd::type_instance_id> res;
+        std::vector<cd::collectd_metric_status> res;
        auto ids = scollectd::get_collectd_ids();
        for (auto i: ids) {
            cd::type_instance_id id;
@@ -80,10 +92,44 @@ void set_collectd(http_context& ctx, routes& r) {
            id.plugin_instance = i.plugin_instance();
            id.type = i.type();
            id.type_instance = i.type_instance();
-            res.push_back(id);
+            cd::collectd_metric_status it;
+            it.id = id;
+            it.enable = scollectd::is_enabled(i);
+            res.push_back(it);
        }
        return res;
    });
+
+    cd::enable_collectd.set(r, [](std::unique_ptr<request> req) -> future<json::json_return_type> {
+        std::regex plugin(req->param["pluginid"].c_str());
+        std::regex instance(str_to_regex(req->get_query_param("instance")));
+        std::regex type(str_to_regex(req->get_query_param("type")));
+        std::regex type_instance(str_to_regex(req->get_query_param("type_instance")));
+        bool enable = strcasecmp(req->get_query_param("enable").c_str(), "true") == 0;
+        return smp::invoke_on_all([enable, plugin, instance, type, type_instance]() {
+            for (auto id: scollectd::get_collectd_ids()) {
+                if (std::regex_match(std::string(id.plugin()), plugin) &&
+                        std::regex_match(std::string(id.plugin_instance()), instance) &&
+                        std::regex_match(std::string(id.type()), type) &&
+                        std::regex_match(std::string(id.type_instance()), type_instance)) {
+                    scollectd::enable(id, enable);
+                }
+            }
+        }).then([] {
+            return json::json_return_type(json_void());
+        });
+    });
+
+    cd::enable_all_collectd.set(r, [](std::unique_ptr<request> req) -> future<json::json_return_type> {
+        bool enable = strcasecmp(req->get_query_param("enable").c_str(), "true") == 0;
+        return smp::invoke_on_all([enable] {
+            for (auto id: scollectd::get_collectd_ids()) {
+                scollectd::enable(id, enable);
+            }
+        }).then([] {
+            return json::json_return_type(json_void());
+        });
+    });
 }

 }
--- a/api/collectd.hh
+++ b/api/collectd.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright 2015 Cloudius Systems
+ * Copyright (C) 2015 ScyllaDB
 */

 /*
--- a/api/column_family.cc
+++ b/api/column_family.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright 2015 Cloudius Systems
+ * Copyright (C) 2015 ScyllaDB
 */

 /*
@@ -24,7 +24,7 @@
 #include <vector>
 #include "http/exception.hh"
 #include "sstables/sstables.hh"
-#include "sstables/estimated_histogram.hh"
+#include "utils/estimated_histogram.hh"
 #include <algorithm>

 namespace api {
@@ -40,7 +40,7 @@ const utils::UUID& get_uuid(const sstring& name, const database& db) {
    if (pos == sstring::npos) {
        pos  = name.find(":");
        if (pos == sstring::npos) {
-            throw bad_param_exception("Column family name should be in keyspace::column_family format");
+            throw bad_param_exception("Column family name should be in keyspace:column_family format");
        }
        end = pos + 1;
    } else {
@@ -77,14 +77,14 @@ future<json::json_return_type>  get_cf_stats(http_context& ctx,
 }

 static future<json::json_return_type>  get_cf_stats_count(http_context& ctx, const sstring& name,
-        utils::ihistogram column_family::stats::*f) {
+        utils::timed_rate_moving_average_and_histogram column_family::stats::*f) {
    return map_reduce_cf(ctx, name, int64_t(0), [f](const column_family& cf) {
-        return (cf.get_stats().*f).count;
+        return (cf.get_stats().*f).hist.count;
    }, std::plus<int64_t>());
 }

 static future<json::json_return_type>  get_cf_stats_sum(http_context& ctx, const sstring& name,
-        utils::ihistogram column_family::stats::*f) {
+        utils::timed_rate_moving_average_and_histogram column_family::stats::*f) {
    auto uuid = get_uuid(name, ctx.db.local());
    return ctx.db.map_reduce0([uuid, f](database& db) {
        // Histograms information is sample of the actual load
@@ -92,7 +92,7 @@ static future<json::json_return_type>  get_cf_stats_sum(http_context& ctx, const
        // with count. The information is gather in nano second,
        // but reported in micro
        column_family& cf = db.find_column_family(uuid);
-        return ((cf.get_stats().*f).count/1000.0) * (cf.get_stats().*f).mean;
+        return ((cf.get_stats().*f).hist.count/1000.0) * (cf.get_stats().*f).hist.mean;
    }, 0.0, std::plus<double>()).then([](double res) {
        return make_ready_future<json::json_return_type>((int64_t)res);
    });
@@ -100,28 +100,29 @@ static future<json::json_return_type>  get_cf_stats_sum(http_context& ctx, const


 static future<json::json_return_type>  get_cf_stats_count(http_context& ctx,
-        utils::ihistogram column_family::stats::*f) {
+        utils::timed_rate_moving_average_and_histogram column_family::stats::*f) {
    return map_reduce_cf(ctx, int64_t(0), [f](const column_family& cf) {
-        return (cf.get_stats().*f).count;
+        return (cf.get_stats().*f).hist.count;
    }, std::plus<int64_t>());
 }

 static future<json::json_return_type>  get_cf_histogram(http_context& ctx, const sstring& name,
-        utils::ihistogram column_family::stats::*f) {
+        utils::timed_rate_moving_average_and_histogram column_family::stats::*f) {
    utils::UUID uuid = get_uuid(name, ctx.db.local());
-    return ctx.db.map_reduce0([f, uuid](const database& p) {return p.find_column_family(uuid).get_stats().*f;},
+    return ctx.db.map_reduce0([f, uuid](const database& p) {
+        return (p.find_column_family(uuid).get_stats().*f).hist;},
            utils::ihistogram(),
-            add_histogram)
+            std::plus<utils::ihistogram>())
            .then([](const utils::ihistogram& val) {
                return make_ready_future<json::json_return_type>(to_json(val));
    });
 }

-static future<json::json_return_type> get_cf_histogram(http_context& ctx, utils::ihistogram column_family::stats::*f) {
+static future<json::json_return_type> get_cf_histogram(http_context& ctx, utils::timed_rate_moving_average_and_histogram column_family::stats::*f) {
    std::function<utils::ihistogram(const database&)> fun = [f] (const database& db)  {
        utils::ihistogram res;
        for (auto i : db.get_column_families()) {
-            res = add_histogram(res, i.second->get_stats().*f);
+            res += (i.second->get_stats().*f).hist;
        }
        return res;
    };
@@ -132,6 +133,33 @@ static future<json::json_return_type> get_cf_histogram(http_context& ctx, utils:
    });
 }

+static future<json::json_return_type>  get_cf_rate_and_histogram(http_context& ctx, const sstring& name,
+        utils::timed_rate_moving_average_and_histogram column_family::stats::*f) {
+    utils::UUID uuid = get_uuid(name, ctx.db.local());
+    return ctx.db.map_reduce0([f, uuid](const database& p) {
+        return (p.find_column_family(uuid).get_stats().*f).rate();},
+            utils::rate_moving_average_and_histogram(),
+            std::plus<utils::rate_moving_average_and_histogram>())
+            .then([](const utils::rate_moving_average_and_histogram& val) {
+                return make_ready_future<json::json_return_type>(timer_to_json(val));
+    });
+}
+
+static future<json::json_return_type> get_cf_rate_and_histogram(http_context& ctx, utils::timed_rate_moving_average_and_histogram column_family::stats::*f) {
+    std::function<utils::rate_moving_average_and_histogram(const database&)> fun = [f] (const database& db)  {
+        utils::rate_moving_average_and_histogram res;
+        for (auto i : db.get_column_families()) {
+            res += (i.second->get_stats().*f).rate();
+        }
+        return res;
+    };
+    return ctx.db.map(fun).then([](const std::vector<utils::rate_moving_average_and_histogram> &res) {
+        std::vector<httpd::utils_json::rate_moving_average_and_histogram> r;
+        boost::copy(res | boost::adaptors::transformed(timer_to_json), std::back_inserter(r));
+        return make_ready_future<json::json_return_type>(r);
+    });
+}
+
 static future<json::json_return_type> get_cf_unleveled_sstables(http_context& ctx, const sstring& name) {
    return map_reduce_cf(ctx, name, int64_t(0), [](const column_family& cf) {
        return cf.get_unleveled_sstables();
@@ -141,7 +169,7 @@ static future<json::json_return_type> get_cf_unleveled_sstables(http_context& ct
 static int64_t min_row_size(column_family& cf) {
    int64_t res = INT64_MAX;
    for (auto i: *cf.get_sstables() ) {
-        res = std::min(res, i.second->get_stats_metadata().estimated_row_size.min());
+        res = std::min(res, i->get_stats_metadata().estimated_row_size.min());
    }
    return (res == INT64_MAX) ? 0 : res;
 }
@@ -149,30 +177,113 @@ static int64_t min_row_size(column_family& cf) {
 static int64_t max_row_size(column_family& cf) {
    int64_t res = 0;
    for (auto i: *cf.get_sstables() ) {
-        res = std::max(i.second->get_stats_metadata().estimated_row_size.max(), res);
+        res = std::max(i->get_stats_metadata().estimated_row_size.max(), res);
    }
    return res;
 }

-static double update_ratio(double acc, double f, double total) {
-    if (f && !total) {
-        throw bad_param_exception("total should include all elements");
-    } else if (total) {
-        acc += f / total;
-    }
-    return acc;
-}
-
-static ratio_holder mean_row_size(column_family& cf) {
-    ratio_holder res;
+static integral_ratio_holder mean_row_size(column_family& cf) {
+    integral_ratio_holder res;
    for (auto i: *cf.get_sstables() ) {
-        auto c = i.second->get_stats_metadata().estimated_row_size.count();
-        res.sub += i.second->get_stats_metadata().estimated_row_size.mean() * c;
+        auto c = i->get_stats_metadata().estimated_row_size.count();
+        res.sub += i->get_stats_metadata().estimated_row_size.mean() * c;
        res.total += c;
    }
    return res;
 }

+static std::unordered_map<sstring, uint64_t> merge_maps(std::unordered_map<sstring, uint64_t> a,
+        const std::unordered_map<sstring, uint64_t>& b) {
+    a.insert(b.begin(), b.end());
+    return a;
+}
+
+static json::json_return_type sum_map(const std::unordered_map<sstring, uint64_t>& val) {
+    uint64_t res = 0;
+    for (auto i : val) {
+        res += i.second;
+    }
+    return res;
+}
+
+static future<json::json_return_type>  sum_sstable(http_context& ctx, const sstring name, bool total) {
+    auto uuid = get_uuid(name, ctx.db.local());
+    return ctx.db.map_reduce0([uuid, total](database& db) {
+        std::unordered_map<sstring, uint64_t> m;
+        auto sstables = (total) ? db.find_column_family(uuid).get_sstables_including_compacted_undeleted() :
+                db.find_column_family(uuid).get_sstables();
+        for (auto t : *sstables) {
+            m[t->get_filename()] = t->bytes_on_disk();
+        }
+        return m;
+    }, std::unordered_map<sstring, uint64_t>(), merge_maps).
+            then([](const std::unordered_map<sstring, uint64_t>& val) {
+        return sum_map(val);
+    });
+}
+
+
+static future<json::json_return_type> sum_sstable(http_context& ctx, bool total) {
+    return map_reduce_cf_raw(ctx, std::unordered_map<sstring, uint64_t>(), [total](column_family& cf) {
+        std::unordered_map<sstring, uint64_t> m;
+        auto sstables = (total) ? cf.get_sstables_including_compacted_undeleted() :
+                cf.get_sstables();
+        for (auto t : *sstables) {
+            m[t->get_filename()] = t->bytes_on_disk();
+        }
+        return m;
+    },merge_maps).then([](const std::unordered_map<sstring, uint64_t>& val) {
+        return sum_map(val);
+    });
+}
+
+template <typename T>
+class sum_ratio {
+    uint64_t _n = 0;
+    T _total = 0;
+public:
+    future<> operator()(T value) {
+        if (value > 0) {
+            _total += value;
+            _n++;
+        }
+        return make_ready_future<>();
+    }
+    // Returns average value of all registered ratios.
+    T get() && {
+        return _n ? (_total / _n) : T(0);
+    }
+};
+
+static double get_compression_ratio(column_family& cf) {
+    sum_ratio<double> result;
+    for (auto i : *cf.get_sstables()) {
+        auto compression_ratio = i->get_compression_ratio();
+        if (compression_ratio != sstables::metadata_collector::NO_COMPRESSION_RATIO) {
+            result(compression_ratio);
+        }
+    }
+    return std::move(result).get();
+}
+
+static std::vector<uint64_t> concat_sstable_count_per_level(std::vector<uint64_t> a, std::vector<uint64_t>&& b) {
+    a.resize(std::max(a.size(), b.size()), 0UL);
+    for (auto i = 0U; i < b.size(); i++) {
+        a[i] += b[i];
+    }
+    return a;
+}
+
+ratio_holder filter_false_positive_as_ratio_holder(const sstables::shared_sstable& sst) {
+    double f = sst->filter_get_false_positive();
+    return ratio_holder(f + sst->filter_get_true_positive(), f);
+}
+
+ratio_holder filter_recent_false_positive_as_ratio_holder(const sstables::shared_sstable& sst) {
+    double f = sst->filter_get_recent_false_positive();
+    return ratio_holder(f + sst->filter_get_recent_true_positive(), f);
+}
+
 void set_column_family(http_context& ctx, routes& r) {
    cf::get_column_family_name.set(r, [&ctx] (const_req req){
        vector<sstring> res;
@@ -293,21 +404,21 @@ void set_column_family(http_context& ctx, routes& r) {
    });

    cf::get_estimated_row_size_histogram.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return map_reduce_cf(ctx, req->param["name"], sstables::estimated_histogram(0), [](column_family& cf) {
-            sstables::estimated_histogram res(0);
+        return map_reduce_cf(ctx, req->param["name"], utils::estimated_histogram(0), [](column_family& cf) {
+            utils::estimated_histogram res(0);
            for (auto i: *cf.get_sstables() ) {
-                res.merge(i.second->get_stats_metadata().estimated_row_size);
+                res.merge(i->get_stats_metadata().estimated_row_size);
            }
            return res;
        },
-        sstables::merge, utils_json::estimated_histogram());
+        utils::estimated_histogram_merge, utils_json::estimated_histogram());
    });

    cf::get_estimated_row_count.set(r, [&ctx] (std::unique_ptr<request> req) {
        return map_reduce_cf(ctx, req->param["name"], int64_t(0), [](column_family& cf) {
            uint64_t res = 0;
            for (auto i: *cf.get_sstables() ) {
-                res += i.second->get_stats_metadata().estimated_row_size.count();
+                res += i->get_stats_metadata().estimated_row_size.count();
            }
            return res;
        },
@@ -315,14 +426,14 @@ void set_column_family(http_context& ctx, routes& r) {
    });

    cf::get_estimated_column_count_histogram.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return map_reduce_cf(ctx, req->param["name"], sstables::estimated_histogram(0), [](column_family& cf) {
-            sstables::estimated_histogram res(0);
+        return map_reduce_cf(ctx, req->param["name"], utils::estimated_histogram(0), [](column_family& cf) {
+            utils::estimated_histogram res(0);
            for (auto i: *cf.get_sstables() ) {
-                res.merge(i.second->get_stats_metadata().estimated_column_count);
+                res.merge(i->get_stats_metadata().estimated_column_count);
            }
            return res;
        },
-        sstables::merge, utils_json::estimated_histogram());
+        utils::estimated_histogram_merge, utils_json::estimated_histogram());
    });

    cf::get_all_compression_ratio.set(r, [] (std::unique_ptr<request> req) {
@@ -355,10 +466,14 @@ void set_column_family(http_context& ctx, routes& r) {
        return get_cf_stats_count(ctx, &column_family::stats::writes);
    });

-    cf::get_read_latency_histogram.set(r, [&ctx] (std::unique_ptr<request> req) {
+    cf::get_read_latency_histogram_depricated.set(r, [&ctx] (std::unique_ptr<request> req) {
        return get_cf_histogram(ctx, req->param["name"], &column_family::stats::reads);
    });

+    cf::get_read_latency_histogram.set(r, [&ctx] (std::unique_ptr<request> req) {
+        return get_cf_rate_and_histogram(ctx, req->param["name"], &column_family::stats::reads);
+    });
+
    cf::get_read_latency.set(r, [&ctx] (std::unique_ptr<request> req) {
        return get_cf_stats_sum(ctx,req->param["name"] ,&column_family::stats::reads);
    });
@@ -367,24 +482,40 @@ void set_column_family(http_context& ctx, routes& r) {
        return get_cf_stats_sum(ctx, req->param["name"] ,&column_family::stats::writes);
    });

-    cf::get_all_read_latency_histogram.set(r, [&ctx] (std::unique_ptr<request> req) {
+    cf::get_all_read_latency_histogram_depricated.set(r, [&ctx] (std::unique_ptr<request> req) {
        return get_cf_histogram(ctx, &column_family::stats::writes);
    });

-    cf::get_write_latency_histogram.set(r, [&ctx] (std::unique_ptr<request> req) {
+    cf::get_all_read_latency_histogram.set(r, [&ctx] (std::unique_ptr<request> req) {
+        return get_cf_rate_and_histogram(ctx, &column_family::stats::writes);
+    });
+
+    cf::get_write_latency_histogram_depricated.set(r, [&ctx] (std::unique_ptr<request> req) {
        return get_cf_histogram(ctx, req->param["name"], &column_family::stats::writes);
    });

-    cf::get_all_write_latency_histogram.set(r, [&ctx] (std::unique_ptr<request> req) {
+    cf::get_write_latency_histogram.set(r, [&ctx] (std::unique_ptr<request> req) {
+        return get_cf_rate_and_histogram(ctx, req->param["name"], &column_family::stats::writes);
+    });
+
+    cf::get_all_write_latency_histogram_depricated.set(r, [&ctx] (std::unique_ptr<request> req) {
        return get_cf_histogram(ctx, &column_family::stats::writes);
    });

+    cf::get_all_write_latency_histogram.set(r, [&ctx] (std::unique_ptr<request> req) {
+        return get_cf_rate_and_histogram(ctx, &column_family::stats::writes);
+    });
+
    cf::get_pending_compactions.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return get_cf_stats(ctx, req->param["name"], &column_family::stats::pending_compactions);
+        return map_reduce_cf(ctx, req->param["name"], int64_t(0), [](column_family& cf) {
+            return cf.get_compaction_strategy().estimated_pending_compactions(cf);
+        }, std::plus<int64_t>());
    });

    cf::get_all_pending_compactions.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return get_cf_stats(ctx, &column_family::stats::pending_compactions);
+        return map_reduce_cf(ctx, int64_t(0), [](column_family& cf) {
+            return cf.get_compaction_strategy().estimated_pending_compactions(cf);
+        }, std::plus<int64_t>());
    });

    cf::get_live_ss_table_count.set(r, [&ctx] (std::unique_ptr<request> req) {
@@ -400,19 +531,19 @@ void set_column_family(http_context& ctx, routes& r) {
    });

    cf::get_live_disk_space_used.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return get_cf_stats(ctx, req->param["name"], &column_family::stats::live_disk_space_used);
+        return sum_sstable(ctx, req->param["name"], false);
    });

    cf::get_all_live_disk_space_used.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return get_cf_stats(ctx, &column_family::stats::live_disk_space_used);
+        return sum_sstable(ctx, false);
    });

    cf::get_total_disk_space_used.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return get_cf_stats(ctx, req->param["name"], &column_family::stats::total_disk_space_used);
+        return sum_sstable(ctx, req->param["name"], true);
    });

    cf::get_all_total_disk_space_used.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return get_cf_stats(ctx, &column_family::stats::total_disk_space_used);
+        return sum_sstable(ctx, true);
    });

    cf::get_min_row_size.set(r, [&ctx] (std::unique_ptr<request> req) {
@@ -432,17 +563,19 @@ void set_column_family(http_context& ctx, routes& r) {
    });

    cf::get_mean_row_size.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return map_reduce_cf(ctx, req->param["name"], ratio_holder(), mean_row_size, std::plus<ratio_holder>());
+        // Cassandra 3.x mean values are truncated as integrals.
+        return map_reduce_cf(ctx, req->param["name"], integral_ratio_holder(), mean_row_size, std::plus<integral_ratio_holder>());
    });

    cf::get_all_mean_row_size.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return map_reduce_cf(ctx, ratio_holder(), mean_row_size, std::plus<ratio_holder>());
+        // Cassandra 3.x mean values are truncated as integrals.
+        return map_reduce_cf(ctx, integral_ratio_holder(), mean_row_size, std::plus<integral_ratio_holder>());
    });

    cf::get_bloom_filter_false_positives.set(r, [&ctx] (std::unique_ptr<request> req) {
        return map_reduce_cf(ctx, req->param["name"], uint64_t(0), [] (column_family& cf) {
            return std::accumulate(cf.get_sstables()->begin(), cf.get_sstables()->end(), uint64_t(0), [](uint64_t s, auto& sst) {
-                return s + sst.second->filter_get_false_positive();
+                return s + sst->filter_get_false_positive();
            });
        }, std::plus<uint64_t>());
    });
@@ -450,7 +583,7 @@ void set_column_family(http_context& ctx, routes& r) {
    cf::get_all_bloom_filter_false_positives.set(r, [&ctx] (std::unique_ptr<request> req) {
        return map_reduce_cf(ctx, uint64_t(0), [] (column_family& cf) {
            return std::accumulate(cf.get_sstables()->begin(), cf.get_sstables()->end(), uint64_t(0), [](uint64_t s, auto& sst) {
-                return s + sst.second->filter_get_false_positive();
+                return s + sst->filter_get_false_positive();
            });
        }, std::plus<uint64_t>());
    });
@@ -458,7 +591,7 @@ void set_column_family(http_context& ctx, routes& r) {
    cf::get_recent_bloom_filter_false_positives.set(r, [&ctx] (std::unique_ptr<request> req) {
        return map_reduce_cf(ctx, req->param["name"], uint64_t(0), [] (column_family& cf) {
            return std::accumulate(cf.get_sstables()->begin(), cf.get_sstables()->end(), uint64_t(0), [](uint64_t s, auto& sst) {
-                return s + sst.second->filter_get_recent_false_positive();
+                return s + sst->filter_get_recent_false_positive();
            });
        }, std::plus<uint64_t>());
    });
@@ -466,51 +599,39 @@ void set_column_family(http_context& ctx, routes& r) {
    cf::get_all_recent_bloom_filter_false_positives.set(r, [&ctx] (std::unique_ptr<request> req) {
        return map_reduce_cf(ctx, uint64_t(0), [] (column_family& cf) {
            return std::accumulate(cf.get_sstables()->begin(), cf.get_sstables()->end(), uint64_t(0), [](uint64_t s, auto& sst) {
-                return s + sst.second->filter_get_recent_false_positive();
+                return s + sst->filter_get_recent_false_positive();
            });
        }, std::plus<uint64_t>());
    });

    cf::get_bloom_filter_false_ratio.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return map_reduce_cf(ctx, req->param["name"], double(0), [] (column_family& cf) {
-            return std::accumulate(cf.get_sstables()->begin(), cf.get_sstables()->end(), double(0), [](double s, auto& sst) {
-                double f = sst.second->filter_get_false_positive();
-                return update_ratio(s, f, f + sst.second->filter_get_true_positive());
-            });
-        }, std::plus<double>());
+        return map_reduce_cf(ctx, req->param["name"], ratio_holder(), [] (column_family& cf) {
+            return boost::accumulate(*cf.get_sstables() | boost::adaptors::transformed(filter_false_positive_as_ratio_holder), ratio_holder());
+        }, std::plus<>());
    });

    cf::get_all_bloom_filter_false_ratio.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return map_reduce_cf(ctx, double(0), [] (column_family& cf) {
-            return std::accumulate(cf.get_sstables()->begin(), cf.get_sstables()->end(), double(0), [](double s, auto& sst) {
-                double f = sst.second->filter_get_false_positive();
-                return update_ratio(s, f, f + sst.second->filter_get_true_positive());
-            });
-        }, std::plus<double>());
+        return map_reduce_cf(ctx, ratio_holder(), [] (column_family& cf) {
+            return boost::accumulate(*cf.get_sstables() | boost::adaptors::transformed(filter_false_positive_as_ratio_holder), ratio_holder());
+        }, std::plus<>());
    });

    cf::get_recent_bloom_filter_false_ratio.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return map_reduce_cf(ctx, req->param["name"], double(0), [] (column_family& cf) {
-            return std::accumulate(cf.get_sstables()->begin(), cf.get_sstables()->end(), double(0), [](double s, auto& sst) {
-                double f = sst.second->filter_get_recent_false_positive();
-                return update_ratio(s, f, f + sst.second->filter_get_recent_true_positive());
-            });
-        }, std::plus<double>());
+        return map_reduce_cf(ctx, req->param["name"], ratio_holder(), [] (column_family& cf) {
+            return boost::accumulate(*cf.get_sstables() | boost::adaptors::transformed(filter_recent_false_positive_as_ratio_holder), ratio_holder());
+        }, std::plus<>());
    });

    cf::get_all_recent_bloom_filter_false_ratio.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return map_reduce_cf(ctx, double(0), [] (column_family& cf) {
-            return std::accumulate(cf.get_sstables()->begin(), cf.get_sstables()->end(), double(0), [](double s, auto& sst) {
-                double f = sst.second->filter_get_recent_false_positive();
-                return update_ratio(s, f, f + sst.second->filter_get_recent_true_positive());
-            });
-        }, std::plus<double>());
+        return map_reduce_cf(ctx, ratio_holder(), [] (column_family& cf) {
+            return boost::accumulate(*cf.get_sstables() | boost::adaptors::transformed(filter_recent_false_positive_as_ratio_holder), ratio_holder());
+        }, std::plus<>());
    });

    cf::get_bloom_filter_disk_space_used.set(r, [&ctx] (std::unique_ptr<request> req) {
        return map_reduce_cf(ctx, req->param["name"], uint64_t(0), [] (column_family& cf) {
            return std::accumulate(cf.get_sstables()->begin(), cf.get_sstables()->end(), uint64_t(0), [](uint64_t s, auto& sst) {
-                return sst.second->filter_size();
+                return sst->filter_size();
            });
        }, std::plus<uint64_t>());
    });
@@ -518,7 +639,7 @@ void set_column_family(http_context& ctx, routes& r) {
    cf::get_all_bloom_filter_disk_space_used.set(r, [&ctx] (std::unique_ptr<request> req) {
        return map_reduce_cf(ctx, uint64_t(0), [] (column_family& cf) {
            return std::accumulate(cf.get_sstables()->begin(), cf.get_sstables()->end(), uint64_t(0), [](uint64_t s, auto& sst) {
-                return sst.second->filter_size();
+                return sst->filter_size();
            });
        }, std::plus<uint64_t>());
    });
@@ -526,7 +647,7 @@ void set_column_family(http_context& ctx, routes& r) {
    cf::get_bloom_filter_off_heap_memory_used.set(r, [&ctx] (std::unique_ptr<request> req) {
        return map_reduce_cf(ctx, req->param["name"], uint64_t(0), [] (column_family& cf) {
            return std::accumulate(cf.get_sstables()->begin(), cf.get_sstables()->end(), uint64_t(0), [](uint64_t s, auto& sst) {
-                return sst.second->filter_memory_size();
+                return sst->filter_memory_size();
            });
        }, std::plus<uint64_t>());
    });
@@ -534,7 +655,7 @@ void set_column_family(http_context& ctx, routes& r) {
    cf::get_all_bloom_filter_off_heap_memory_used.set(r, [&ctx] (std::unique_ptr<request> req) {
        return map_reduce_cf(ctx, uint64_t(0), [] (column_family& cf) {
            return std::accumulate(cf.get_sstables()->begin(), cf.get_sstables()->end(), uint64_t(0), [](uint64_t s, auto& sst) {
-                return sst.second->filter_memory_size();
+                return sst->filter_memory_size();
            });
        }, std::plus<uint64_t>());
    });
@@ -542,7 +663,7 @@ void set_column_family(http_context& ctx, routes& r) {
    cf::get_index_summary_off_heap_memory_used.set(r, [&ctx] (std::unique_ptr<request> req) {
        return map_reduce_cf(ctx, req->param["name"], uint64_t(0), [] (column_family& cf) {
            return std::accumulate(cf.get_sstables()->begin(), cf.get_sstables()->end(), uint64_t(0), [](uint64_t s, auto& sst) {
-                return sst.second->get_summary().memory_footprint();
+                return sst->get_summary().memory_footprint();
            });
        }, std::plus<uint64_t>());
    });
@@ -550,7 +671,7 @@ void set_column_family(http_context& ctx, routes& r) {
    cf::get_all_index_summary_off_heap_memory_used.set(r, [&ctx] (std::unique_ptr<request> req) {
        return map_reduce_cf(ctx, uint64_t(0), [] (column_family& cf) {
            return std::accumulate(cf.get_sstables()->begin(), cf.get_sstables()->end(), uint64_t(0), [](uint64_t s, auto& sst) {
-                return sst.second->get_summary().memory_footprint();
+                return sst->get_summary().memory_footprint();
            });
        }, std::plus<uint64_t>());
    });
@@ -623,27 +744,35 @@ void set_column_family(http_context& ctx, routes& r) {
    });

    cf::get_row_cache_hit.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return map_reduce_cf(ctx, req->param["name"], int64_t(0), [](const column_family& cf) {
-            return cf.get_row_cache().stats().hits;
-        }, std::plus<int64_t>());
+        return map_reduce_cf_raw(ctx, req->param["name"], utils::rate_moving_average(), [](const column_family& cf) {
+            return cf.get_row_cache().stats().hits.rate();
+        }, std::plus<utils::rate_moving_average>()).then([](const utils::rate_moving_average& m) {
+            return make_ready_future<json::json_return_type>(meter_to_json(m));
+        });
    });

    cf::get_all_row_cache_hit.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return map_reduce_cf(ctx, int64_t(0), [](const column_family& cf) {
-            return cf.get_row_cache().stats().hits;
-        }, std::plus<int64_t>());
+        return map_reduce_cf_raw(ctx, utils::rate_moving_average(), [](const column_family& cf) {
+            return cf.get_row_cache().stats().hits.rate();
+        }, std::plus<utils::rate_moving_average>()).then([](const utils::rate_moving_average& m) {
+            return make_ready_future<json::json_return_type>(meter_to_json(m));
+        });
    });

    cf::get_row_cache_miss.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return map_reduce_cf(ctx, req->param["name"], int64_t(0), [](const column_family& cf) {
-            return cf.get_row_cache().stats().misses;
-        }, std::plus<int64_t>());
+        return map_reduce_cf_raw(ctx, req->param["name"], utils::rate_moving_average(), [](const column_family& cf) {
+            return cf.get_row_cache().stats().misses.rate();
+        }, std::plus<utils::rate_moving_average>()).then([](const utils::rate_moving_average& m) {
+            return make_ready_future<json::json_return_type>(meter_to_json(m));
+        });
    });

    cf::get_all_row_cache_miss.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return map_reduce_cf(ctx, int64_t(0), [](const column_family& cf) {
-            return cf.get_row_cache().stats().misses;
-        }, std::plus<int64_t>());
+        return map_reduce_cf_raw(ctx, utils::rate_moving_average(), [](const column_family& cf) {
+            return cf.get_row_cache().stats().misses.rate();
+        }, std::plus<utils::rate_moving_average>()).then([](const utils::rate_moving_average& m) {
+            return make_ready_future<json::json_return_type>(meter_to_json(m));
+        });

    });

@@ -669,10 +798,10 @@ void set_column_family(http_context& ctx, routes& r) {
    });

    cf::get_sstables_per_read_histogram.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return map_reduce_cf(ctx, req->param["name"], sstables::estimated_histogram(0), [](column_family& cf) {
+        return map_reduce_cf(ctx, req->param["name"], utils::estimated_histogram(0), [](column_family& cf) {
            return cf.get_stats().estimated_sstable_per_read;
        },
-        sstables::merge, utils_json::estimated_histogram());
+        utils::estimated_histogram_merge, utils_json::estimated_histogram());
    });

    cf::get_tombstone_scanned_histogram.set(r, [&ctx] (std::unique_ptr<request> req) {
@@ -719,25 +848,29 @@ void set_column_family(http_context& ctx, routes& r) {
        return std::vector<sstring>();
    });

-    cf::get_compression_ratio.set(r, [](const_req) {
-        // FIXME
-        // Currently there are no compression information
-        // so we return 0 as the ratio
-        return 0;
+    cf::get_compression_ratio.set(r, [&ctx](std::unique_ptr<request> req) {
+        auto uuid = get_uuid(req->param["name"], ctx.db.local());
+
+        return ctx.db.map_reduce(sum_ratio<double>(), [uuid](database& db) {
+            column_family& cf = db.find_column_family(uuid);
+            return make_ready_future<double>(get_compression_ratio(cf));
+        }).then([] (const double& result) {
+            return make_ready_future<json::json_return_type>(result);
+        });
    });

    cf::get_read_latency_estimated_histogram.set(r, [&ctx](std::unique_ptr<request> req) {
-        return map_reduce_cf(ctx, req->param["name"], sstables::estimated_histogram(0), [](column_family& cf) {
+        return map_reduce_cf(ctx, req->param["name"], utils::estimated_histogram(0), [](column_family& cf) {
            return cf.get_stats().estimated_read;
        },
-        sstables::merge, utils_json::estimated_histogram());
+        utils::estimated_histogram_merge, utils_json::estimated_histogram());
    });

    cf::get_write_latency_estimated_histogram.set(r, [&ctx](std::unique_ptr<request> req) {
-        return map_reduce_cf(ctx, req->param["name"], sstables::estimated_histogram(0), [](column_family& cf) {
+        return map_reduce_cf(ctx, req->param["name"], utils::estimated_histogram(0), [](column_family& cf) {
            return cf.get_stats().estimated_write;
        },
-        sstables::merge, utils_json::estimated_histogram());
+        utils::estimated_histogram_merge, utils_json::estimated_histogram());
    });

    cf::set_compaction_strategy_class.set(r, [&ctx](std::unique_ptr<request> req) {
@@ -766,12 +899,11 @@ void set_column_family(http_context& ctx, routes& r) {
    });

    cf::get_sstable_count_per_level.set(r, [&ctx](std::unique_ptr<request> req) {
-        // TBD
-        // FIXME
-        // This is a workaround, until there will be an API to return the count
-        // per level, we return an empty array
-        vector<uint64_t> res;
-        return make_ready_future<json::json_return_type>(res);
+        return map_reduce_cf_raw(ctx, req->param["name"], std::vector<uint64_t>(), [](const column_family& cf) {
+            return cf.sstable_count_per_level();
+        }, concat_sstable_count_per_level).then([](const std::vector<uint64_t>& res) {
+            return make_ready_future<json::json_return_type>(res);
+        });
    });
 }
 }
--- a/api/column_family.hh
+++ b/api/column_family.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright 2015 Cloudius Systems
+ * Copyright (C) 2015 ScyllaDB
 */

 /*
@@ -34,31 +34,44 @@ future<> foreach_column_family(http_context& ctx, const sstring& name, std::func


 template<class Mapper, class I, class Reducer>
-future<json::json_return_type> map_reduce_cf(http_context& ctx, const sstring& name, I init,
+future<I> map_reduce_cf_raw(http_context& ctx, const sstring& name, I init,
        Mapper mapper, Reducer reducer) {
    auto uuid = get_uuid(name, ctx.db.local());
    return ctx.db.map_reduce0([mapper, uuid](database& db) {
        return mapper(db.find_column_family(uuid));
-    }, init, reducer).then([](const I& res) {
+    }, init, reducer);
+}
+
+
+template<class Mapper, class I, class Reducer>
+future<json::json_return_type> map_reduce_cf(http_context& ctx, const sstring& name, I init,
+        Mapper mapper, Reducer reducer) {
+    return map_reduce_cf_raw(ctx, name, init, mapper, reducer).then([](const I& res) {
        return make_ready_future<json::json_return_type>(res);
    });
 }

 template<class Mapper, class I, class Reducer, class Result>
-future<json::json_return_type> map_reduce_cf(http_context& ctx, const sstring& name, I init,
+future<I> map_reduce_cf_raw(http_context& ctx, const sstring& name, I init,
        Mapper mapper, Reducer reducer, Result result) {
    auto uuid = get_uuid(name, ctx.db.local());
    return ctx.db.map_reduce0([mapper, uuid](database& db) {
        return mapper(db.find_column_family(uuid));
-    }, init, reducer).then([result](const I& res) mutable {
+    }, init, reducer);
+}
+
+
+template<class Mapper, class I, class Reducer, class Result>
+future<json::json_return_type> map_reduce_cf(http_context& ctx, const sstring& name, I init,
+        Mapper mapper, Reducer reducer, Result result) {
+    return map_reduce_cf_raw(ctx, name, init, mapper, reducer, result).then([result](const I& res) mutable {
        result = res;
        return make_ready_future<json::json_return_type>(result);
    });
 }

-
 template<class Mapper, class I, class Reducer>
-future<json::json_return_type> map_reduce_cf(http_context& ctx, I init,
+future<I> map_reduce_cf_raw(http_context& ctx, I init,
        Mapper mapper, Reducer reducer) {
    return ctx.db.map_reduce0([mapper, init, reducer](database& db) {
        auto res = init;
@@ -66,10 +79,18 @@ future<json::json_return_type> map_reduce_cf(http_context& ctx, I init,
            res = reducer(res, mapper(*i.second.get()));
        }
        return res;
-    }, init, reducer).then([](const I& res) {
+    }, init, reducer);
+}
+
+
+template<class Mapper, class I, class Reducer>
+future<json::json_return_type> map_reduce_cf(http_context& ctx, I init,
+        Mapper mapper, Reducer reducer) {
+    return map_reduce_cf_raw(ctx, init, mapper, reducer).then([](const I& res) {
        return make_ready_future<json::json_return_type>(res);
    });
 }
+
 future<json::json_return_type>  get_cf_stats(http_context& ctx, const sstring& name,
        int64_t column_family::stats::*f);

--- a/api/commitlog.cc
+++ b/api/commitlog.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright 2015 Cloudius Systems
+ * Copyright (C) 2015 ScyllaDB
 */

 /*
--- a/api/commitlog.hh
+++ b/api/commitlog.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright 2015 Cloudius Systems
+ * Copyright (C) 2015 ScyllaDB
 */

 /*
--- a/api/compaction_manager.cc
+++ b/api/compaction_manager.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright 2015 Cloudius Systems
+ * Copyright (C) 2015 ScyllaDB
 */

 /*
@@ -22,10 +22,10 @@
 #include "compaction_manager.hh"
 #include "api/api-doc/compaction_manager.json.hh"
 #include "db/system_keyspace.hh"
+#include "column_family.hh"

 namespace api {

-using namespace scollectd;
 namespace cm = httpd::compaction_manager_json;
 using namespace json;

@@ -78,7 +78,9 @@ void set_compaction_manager(http_context& ctx, routes& r) {
    });

    cm::get_pending_tasks.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return get_cm_stats(ctx, &compaction_manager::stats::pending_tasks);
+        return map_reduce_cf(ctx, int64_t(0), [](column_family& cf) {
+            return cf.get_compaction_strategy().estimated_pending_compactions(cf);
+        }, std::plus<int64_t>());
    });

    cm::get_completed_tasks.set(r, [&ctx] (std::unique_ptr<request> req) {
--- a/api/compaction_manager.hh
+++ b/api/compaction_manager.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright 2015 Cloudius Systems
+ * Copyright (C) 2015 ScyllaDB
 */

 /*
--- a/api/endpoint_snitch.cc
+++ b/api/endpoint_snitch.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright 2015 Cloudius Systems
+ * Copyright (C) 2015 ScyllaDB
 */

 /*
@@ -22,16 +22,22 @@
 #include "locator/snitch_base.hh"
 #include "endpoint_snitch.hh"
 #include "api/api-doc/endpoint_snitch_info.json.hh"
+#include "utils/fb_utilities.hh"

 namespace api {

 void set_endpoint_snitch(http_context& ctx, routes& r) {
-    httpd::endpoint_snitch_info_json::get_datacenter.set(r, [] (const_req req) {
-        return locator::i_endpoint_snitch::get_local_snitch_ptr()->get_datacenter(req.get_query_param("host"));
+    static auto host_or_broadcast = [](const_req req) {
+        auto host = req.get_query_param("host");
+        return host.empty() ? gms::inet_address(utils::fb_utilities::get_broadcast_address()) : gms::inet_address(host);
+    };
+
+    httpd::endpoint_snitch_info_json::get_datacenter.set(r, [](const_req req) {
+        return locator::i_endpoint_snitch::get_local_snitch_ptr()->get_datacenter(host_or_broadcast(req));
    });

-    httpd::endpoint_snitch_info_json::get_rack.set(r, [] (const_req req) {
-        return locator::i_endpoint_snitch::get_local_snitch_ptr()->get_rack(req.get_query_param("host"));
+    httpd::endpoint_snitch_info_json::get_rack.set(r, [](const_req req) {
+        return locator::i_endpoint_snitch::get_local_snitch_ptr()->get_rack(host_or_broadcast(req));
    });

    httpd::endpoint_snitch_info_json::get_snitch_name.set(r, [] (const_req req) {
--- a/api/endpoint_snitch.hh
+++ b/api/endpoint_snitch.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright 2015 Cloudius Systems
+ * Copyright (C) 2015 ScyllaDB
 */

 /*
--- a/api/failure_detector.cc
+++ b/api/failure_detector.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright 2015 Cloudius Systems
+ * Copyright (C) 2015 ScyllaDB
 */

 /*
@@ -88,6 +88,20 @@ void set_failure_detector(http_context& ctx, routes& r) {
            return make_ready_future<json::json_return_type>(state);
        });
    });
+
+    fd::get_endpoint_phi_values.set(r, [](std::unique_ptr<request> req) {
+        return gms::get_arrival_samples().then([](std::map<gms::inet_address, gms::arrival_window> map) {
+            std::vector<fd::endpoint_phi_value> res;
+            auto now = gms::arrival_window::clk::now();
+            for (auto& p : map) {
+                fd::endpoint_phi_value val;
+                val.endpoint = p.first.to_sstring();
+                val.phi = p.second.phi(now);
+                res.emplace_back(std::move(val));
+            }
+            return make_ready_future<json::json_return_type>(res);
+        });
+    });
 }

 }
--- a/api/failure_detector.hh
+++ b/api/failure_detector.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright 2015 Cloudius Systems
+ * Copyright (C) 2015 ScyllaDB
 */

 /*
--- a/api/gossiper.cc
+++ b/api/gossiper.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright 2015 Cloudius Systems
+ * Copyright (C) 2015 ScyllaDB
 */

 /*
--- a/api/gossiper.hh
+++ b/api/gossiper.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright 2015 Cloudius Systems
+ * Copyright (C) 2015 ScyllaDB
 */

 /*
--- a/api/hinted_handoff.cc
+++ b/api/hinted_handoff.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright 2015 Cloudius Systems
+ * Copyright (C) 2015 ScyllaDB
 */

 /*
@@ -24,7 +24,6 @@

 namespace api {

-using namespace scollectd;
 using namespace json;
 namespace hh = httpd::hinted_handoff_json;

--- a/api/hinted_handoff.hh
+++ b/api/hinted_handoff.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright 2015 Cloudius Systems
+ * Copyright (C) 2015 ScyllaDB
 */

 /*
--- a/api/lsa.cc
+++ b/api/lsa.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright 2015 Cloudius Systems
+ * Copyright (C) 2015 ScyllaDB
 */

 /*
@@ -29,11 +29,11 @@

 namespace api {

-static logging::logger logger("lsa-api");
+static logging::logger alogger("lsa-api");

 void set_lsa(http_context& ctx, routes& r) {
    httpd::lsa_json::lsa_compact.set(r, [&ctx](std::unique_ptr<request> req) {
-        logger.info("Triggering compaction");
+        alogger.info("Triggering compaction");
        return ctx.db.invoke_on_all([] (database&) {
            logalloc::shard_tracker().reclaim(std::numeric_limits<size_t>::max());
        }).then([] {
--- a/api/lsa.hh
+++ b/api/lsa.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright 2015 Cloudius Systems
+ * Copyright (C) 2015 ScyllaDB
 */

 /*
--- a/api/messaging_service.cc
+++ b/api/messaging_service.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright 2015 Cloudius Systems
+ * Copyright (C) 2015 ScyllaDB
 */

 /*
@@ -27,7 +27,7 @@
 #include <sstream>

 using namespace httpd::messaging_service_json;
-using namespace net;
+using namespace netw;

 namespace api {

@@ -120,13 +120,13 @@ void set_messaging_service(http_context& ctx, routes& r) {
    }));

    get_version.set(r, [](const_req req) {
-        return net::get_local_messaging_service().get_raw_version(req.get_query_param("addr"));
+        return netw::get_local_messaging_service().get_raw_version(req.get_query_param("addr"));
    });

    get_dropped_messages_by_ver.set(r, [](std::unique_ptr<request> req) {
        shared_ptr<std::vector<uint64_t>> map = make_shared<std::vector<uint64_t>>(num_verb);

-        return net::get_messaging_service().map_reduce([map](const uint64_t* local_map) mutable {
+        return netw::get_messaging_service().map_reduce([map](const uint64_t* local_map) mutable {
            for (auto i = 0; i < num_verb; i++) {
                (*map)[i]+= local_map[i];
            }
--- a/api/messaging_service.hh
+++ b/api/messaging_service.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright 2015 Cloudius Systems
+ * Copyright (C) 2015 ScyllaDB
 */

 /*
--- a/api/storage_proxy.cc
+++ b/api/storage_proxy.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright 2015 Cloudius Systems
+ * Copyright (C) 2015 ScyllaDB
 */

 /*
@@ -33,17 +33,36 @@ namespace sp = httpd::storage_proxy_json;
 using proxy = service::storage_proxy;
 using namespace json;

-static future<json::json_return_type>  sum_estimated_histogram(http_context& ctx, sstables::estimated_histogram proxy::stats::*f) {
-    return ctx.sp.map_reduce0([f](const proxy& p) {return p.get_stats().*f;}, sstables::estimated_histogram(),
-            sstables::merge).then([](const sstables::estimated_histogram& val) {
+static future<utils::rate_moving_average>  sum_timed_rate(distributed<proxy>& d, utils::timed_rate_moving_average proxy::stats::*f) {
+    return d.map_reduce0([f](const proxy& p) {return (p.get_stats().*f).rate();}, utils::rate_moving_average(),
+            std::plus<utils::rate_moving_average>());
+}
+
+static future<json::json_return_type>  sum_timed_rate_as_obj(distributed<proxy>& d, utils::timed_rate_moving_average proxy::stats::*f) {
+    return sum_timed_rate(d, f).then([](const utils::rate_moving_average& val) {
+        httpd::utils_json::rate_moving_average m;
+        m = val;
+        return make_ready_future<json::json_return_type>(m);
+    });
+}
+
+static future<json::json_return_type>  sum_timed_rate_as_long(distributed<proxy>& d, utils::timed_rate_moving_average proxy::stats::*f) {
+    return sum_timed_rate(d, f).then([](const utils::rate_moving_average& val) {
+        return make_ready_future<json::json_return_type>(val.count);
+    });
+}
+
+static future<json::json_return_type>  sum_estimated_histogram(http_context& ctx, utils::estimated_histogram proxy::stats::*f) {
+    return ctx.sp.map_reduce0([f](const proxy& p) {return p.get_stats().*f;}, utils::estimated_histogram(),
+            utils::estimated_histogram_merge).then([](const utils::estimated_histogram& val) {
        utils_json::estimated_histogram res;
        res = val;
        return make_ready_future<json::json_return_type>(res);
    });
 }

-static future<json::json_return_type>  total_latency(http_context& ctx, utils::ihistogram proxy::stats::*f) {
-    return ctx.sp.map_reduce0([f](const proxy& p) {return (p.get_stats().*f).mean * (p.get_stats().*f).count;}, 0.0,
+static future<json::json_return_type>  total_latency(http_context& ctx, utils::timed_rate_moving_average_and_histogram proxy::stats::*f) {
+    return ctx.sp.map_reduce0([f](const proxy& p) {return (p.get_stats().*f).hist.mean * (p.get_stats().*f).hist.count;}, 0.0,
            std::plus<double>()).then([](double val) {
        int64_t res = val;
        return make_ready_future<json::json_return_type>(res);
@@ -291,41 +310,77 @@ void set_storage_proxy(http_context& ctx, routes& r) {
    });

    sp::get_read_metrics_timeouts.set(r, [&ctx](std::unique_ptr<request> req) {
-        return sum_stats(ctx.sp, &proxy::stats::read_timeouts);
+        return sum_timed_rate_as_long(ctx.sp, &proxy::stats::read_timeouts);
    });

    sp::get_read_metrics_unavailables.set(r, [&ctx](std::unique_ptr<request> req) {
-        return sum_stats(ctx.sp, &proxy::stats::read_unavailables);
+        return sum_timed_rate_as_long(ctx.sp, &proxy::stats::read_unavailables);
    });

    sp::get_range_metrics_timeouts.set(r, [&ctx](std::unique_ptr<request> req) {
-        return sum_stats(ctx.sp, &proxy::stats::range_slice_timeouts);
+        return sum_timed_rate_as_long(ctx.sp, &proxy::stats::range_slice_timeouts);
    });

    sp::get_range_metrics_unavailables.set(r, [&ctx](std::unique_ptr<request> req) {
-        return sum_stats(ctx.sp, &proxy::stats::range_slice_unavailables);
+        return sum_timed_rate_as_long(ctx.sp, &proxy::stats::range_slice_unavailables);
    });

    sp::get_write_metrics_timeouts.set(r, [&ctx](std::unique_ptr<request> req) {
-        return sum_stats(ctx.sp, &proxy::stats::write_timeouts);
+        return sum_timed_rate_as_long(ctx.sp, &proxy::stats::write_timeouts);
    });

    sp::get_write_metrics_unavailables.set(r, [&ctx](std::unique_ptr<request> req) {
-        return sum_stats(ctx.sp, &proxy::stats::write_unavailables);
+        return sum_timed_rate_as_long(ctx.sp, &proxy::stats::write_unavailables);
    });

-    sp::get_range_metrics_latency_histogram.set(r, [&ctx](std::unique_ptr<request> req) {
+    sp::get_read_metrics_timeouts_rates.set(r, [&ctx](std::unique_ptr<request> req) {
+        return sum_timed_rate_as_obj(ctx.sp, &proxy::stats::read_timeouts);
+    });
+
+    sp::get_read_metrics_unavailables_rates.set(r, [&ctx](std::unique_ptr<request> req) {
+        return sum_timed_rate_as_obj(ctx.sp, &proxy::stats::read_unavailables);
+    });
+
+    sp::get_range_metrics_timeouts_rates.set(r, [&ctx](std::unique_ptr<request> req) {
+        return sum_timed_rate_as_obj(ctx.sp, &proxy::stats::range_slice_timeouts);
+    });
+
+    sp::get_range_metrics_unavailables_rates.set(r, [&ctx](std::unique_ptr<request> req) {
+        return sum_timed_rate_as_obj(ctx.sp, &proxy::stats::range_slice_unavailables);
+    });
+
+    sp::get_write_metrics_timeouts_rates.set(r, [&ctx](std::unique_ptr<request> req) {
+        return sum_timed_rate_as_obj(ctx.sp, &proxy::stats::write_timeouts);
+    });
+
+    sp::get_write_metrics_unavailables_rates.set(r, [&ctx](std::unique_ptr<request> req) {
+        return sum_timed_rate_as_obj(ctx.sp, &proxy::stats::write_unavailables);
+    });
+
+    sp::get_range_metrics_latency_histogram_depricated.set(r, [&ctx](std::unique_ptr<request> req) {
        return sum_histogram_stats(ctx.sp, &proxy::stats::range);
    });

-    sp::get_write_metrics_latency_histogram.set(r, [&ctx](std::unique_ptr<request> req) {
+    sp::get_write_metrics_latency_histogram_depricated.set(r, [&ctx](std::unique_ptr<request> req) {
        return sum_histogram_stats(ctx.sp, &proxy::stats::write);
    });

-    sp::get_read_metrics_latency_histogram.set(r, [&ctx](std::unique_ptr<request> req) {
+    sp::get_read_metrics_latency_histogram_depricated.set(r, [&ctx](std::unique_ptr<request> req) {
        return sum_histogram_stats(ctx.sp, &proxy::stats::read);
    });

+    sp::get_range_metrics_latency_histogram.set(r, [&ctx](std::unique_ptr<request> req) {
+        return sum_timer_stats(ctx.sp, &proxy::stats::range);
+    });
+
+    sp::get_write_metrics_latency_histogram.set(r, [&ctx](std::unique_ptr<request> req) {
+        return sum_timer_stats(ctx.sp, &proxy::stats::write);
+    });
+
+    sp::get_read_metrics_latency_histogram.set(r, [&ctx](std::unique_ptr<request> req) {
+        return sum_timer_stats(ctx.sp, &proxy::stats::read);
+    });
+
    sp::get_read_estimated_histogram.set(r, [&ctx](std::unique_ptr<request> req) {
        return sum_estimated_histogram(ctx, &proxy::stats::estimated_read);
    });
@@ -342,7 +397,7 @@ void set_storage_proxy(http_context& ctx, routes& r) {
    });

    sp::get_range_estimated_histogram.set(r, [&ctx](std::unique_ptr<request> req) {
-        return sum_histogram_stats(ctx.sp, &proxy::stats::read);
+        return sum_timer_stats(ctx.sp, &proxy::stats::read);
    });

    sp::get_range_latency.set(r, [&ctx](std::unique_ptr<request> req) {
--- a/api/storage_proxy.hh
+++ b/api/storage_proxy.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright 2015 Cloudius Systems
+ * Copyright (C) 2015 ScyllaDB
 */

 /*
--- a/api/storage_service.cc
+++ b/api/storage_service.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright 2015 Cloudius Systems
+ * Copyright (C) 2015 ScyllaDB
 */

 /*
@@ -22,6 +22,8 @@
 #include "storage_service.hh"
 #include "api/api-doc/storage_service.json.hh"
 #include "db/config.hh"
+#include <boost/range/adaptor/map.hpp>
+#include <boost/range/adaptor/filtered.hpp>
 #include <service/storage_service.hh>
 #include <db/commitlog/commitlog.hh>
 #include <gms/gossiper.hh>
@@ -31,6 +33,7 @@
 #include "locator/snitch_base.hh"
 #include "column_family.hh"
 #include "log.hh"
+#include "release.hh"

 namespace api {

@@ -121,6 +124,9 @@ void set_storage_service(http_context& ctx, routes& r) {
        return service::get_local_storage_service().get_release_version();
    });

+    ss::get_scylla_release_version.set(r, [](const_req req) {
+        return scylla_version();
+    });
    ss::get_schema_version.set(r, [](const_req req) {
        return service::get_local_storage_service().get_schema_version();
    });
@@ -382,21 +388,21 @@ void set_storage_service(http_context& ctx, routes& r) {

    ss::remove_node.set(r, [](std::unique_ptr<request> req) {
        auto host_id = req->get_query_param("host_id");
-        return service::get_local_storage_service().remove_node(host_id).then([] {
+        return service::get_local_storage_service().removenode(host_id).then([] {
            return make_ready_future<json::json_return_type>(json_void());
        });
    });

    ss::get_removal_status.set(r, [](std::unique_ptr<request> req) {
-        //TBD
-        unimplemented();
-        return make_ready_future<json::json_return_type>("");
+        return service::get_local_storage_service().get_removal_status().then([] (auto status) {
+            return make_ready_future<json::json_return_type>(status);
+        });
    });

    ss::force_remove_completion.set(r, [](std::unique_ptr<request> req) {
-        //TBD
-        unimplemented();
-        return make_ready_future<json::json_return_type>(json_void());
+        return service::get_local_storage_service().force_remove_completion().then([] {
+            return make_ready_future<json::json_return_type>(json_void());
+        });
    });

    ss::set_logging_level.set(r, [](std::unique_ptr<request> req) {
@@ -453,8 +459,15 @@ void set_storage_service(http_context& ctx, routes& r) {
    });

    ss::get_keyspaces.set(r, [&ctx](const_req req) {
-        auto non_system = req.get_query_param("non_system");
-        return map_keys(ctx.db.local().keyspaces());
+        auto type = req.get_query_param("type");
+        if (type == "user") {
+            return ctx.db.local().get_non_system_keyspaces();
+        } else if (type == "non_local_strategy") {
+            return map_keys(ctx.db.local().get_keyspaces() | boost::adaptors::filtered([](const auto& p) {
+                return p.second.get_replication_strategy().get_type() != locator::replication_strategy_type::local;
+            }));
+        }
+        return map_keys(ctx.db.local().get_keyspaces());
    });

    ss::update_snitch.set(r, [](std::unique_ptr<request> req) {
@@ -538,9 +551,7 @@ void set_storage_service(http_context& ctx, routes& r) {
    });

    ss::is_joined.set(r, [] (std::unique_ptr<request> req) {
-        return service::get_local_storage_service().is_joined().then([] (bool is_joined) {
-            return make_ready_future<json::json_return_type>(is_joined);
-        });
+        return make_ready_future<json::json_return_type>(service::get_local_storage_service().is_joined());
    });

    ss::set_stream_throughput_mb_per_sec.set(r, [](std::unique_ptr<request> req) {
@@ -659,16 +670,59 @@ void set_storage_service(http_context& ctx, routes& r) {
    });

    ss::set_trace_probability.set(r, [](std::unique_ptr<request> req) {
-        //TBD
-        unimplemented();
        auto probability = req->get_query_param("probability");
-        return make_ready_future<json::json_return_type>(json_void());
+        return futurize<json::json_return_type>::apply([probability] {
+            double real_prob = std::stod(probability.c_str());
+            return tracing::tracing::tracing_instance().invoke_on_all([real_prob] (auto& local_tracing) {
+                local_tracing.set_trace_probability(real_prob);
+            }).then([] {
+                return make_ready_future<json::json_return_type>(json_void());
+            });
+        }).then_wrapped([probability] (auto&& f) {
+            try {
+                f.get();
+                return make_ready_future<json::json_return_type>(json_void());
+            } catch (std::out_of_range& e) {
+                throw httpd::bad_param_exception(e.what());
+            } catch (std::invalid_argument&){
+                throw httpd::bad_param_exception(sprint("Bad format in a probability value: \"%s\"", probability.c_str()));
+            }
+        });
    });

    ss::get_trace_probability.set(r, [](std::unique_ptr<request> req) {
-        //TBD
-        unimplemented();
-        return make_ready_future<json::json_return_type>(0);
+        return make_ready_future<json::json_return_type>(tracing::tracing::get_local_tracing_instance().get_trace_probability());
+    });
+
+    ss::get_slow_query_info.set(r, [](const_req req) {
+        ss::slow_query_info res;
+        res.enable = tracing::tracing::get_local_tracing_instance().slow_query_tracing_enabled();
+        res.ttl = tracing::tracing::get_local_tracing_instance().slow_query_record_ttl().count() ;
+        res.threshold = tracing::tracing::get_local_tracing_instance().slow_query_threshold().count();
+        return res;
+    });
+
+    ss::set_slow_query.set(r, [](std::unique_ptr<request> req) {
+        auto enable = req->get_query_param("enable");
+        auto ttl = req->get_query_param("ttl");
+        auto threshold = req->get_query_param("threshold");
+        try {
+            return tracing::tracing::tracing_instance().invoke_on_all([enable, ttl, threshold] (auto& local_tracing) {
+                if (threshold != "") {
+                    local_tracing.set_slow_query_threshold(std::chrono::microseconds(std::stol(threshold.c_str())));
+                }
+                if (ttl != "") {
+                    local_tracing.set_slow_query_record_ttl(std::chrono::seconds(std::stol(ttl.c_str())));
+                }
+                if (enable != "") {
+                    local_tracing.set_slow_query_enabled(strcasecmp(enable.c_str(), "true") == 0);
+                }
+            }).then([] {
+                return make_ready_future<json::json_return_type>(json_void());
+            });
+        } catch (...) {
+            throw httpd::bad_param_exception(sprint("Bad format value: "));
+        }
    });

    ss::enable_auto_compaction.set(r, [&ctx](std::unique_ptr<request> req) {
@@ -748,10 +802,8 @@ void set_storage_service(http_context& ctx, routes& r) {
        return make_ready_future<json::json_return_type>(json_void());
    });

-    ss::get_metrics_load.set(r, [](std::unique_ptr<request> req) {
-        //TBD
-        unimplemented();
-        return make_ready_future<json::json_return_type>(0);
+    ss::get_metrics_load.set(r, [&ctx](std::unique_ptr<request> req) {
+        return get_cf_stats(ctx, &column_family::stats::live_disk_space_used);
    });

    ss::get_exceptions.set(r, [](const_req req) {
--- a/api/storage_service.hh
+++ b/api/storage_service.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright 2015 Cloudius Systems
+ * Copyright (C) 2015 ScyllaDB
 */

 /*
--- a/api/stream_manager.cc
+++ b/api/stream_manager.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright 2015 Cloudius Systems
+ * Copyright (C) 2015 ScyllaDB
 */

 /*
--- a/api/stream_manager.hh
+++ b/api/stream_manager.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright 2015 Cloudius Systems
+ * Copyright (C) 2015 ScyllaDB
 */

 /*
--- a/api/system.cc
+++ b/api/system.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright 2015 Cloudius Systems
+ * Copyright (C) 2015 ScyllaDB
 */

 /*
--- a/api/system.hh
+++ b/api/system.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright 2015 Cloudius Systems
+ * Copyright (C) 2015 ScyllaDB
 */

 /*
--- a/atomic_cell.hh
+++ b/atomic_cell.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2015 Cloudius Systems, Ltd.
+ * Copyright (C) 2015 ScyllaDB
 */

 /*
@@ -28,11 +28,12 @@
 #include "utils/managed_bytes.hh"
 #include "net/byteorder.hh"
 #include <cstdint>
-#include <iostream>
+#include <iosfwd>
+#include <seastar/util/gcc6-concepts.hh>

-template<typename T>
+template<typename T, typename Input>
 static inline
-void set_field(managed_bytes& v, unsigned offset, T val) {
+void set_field(Input& v, unsigned offset, T val) {
    reinterpret_cast<net::packed<T>*>(v.begin() + offset)->raw = net::hton(val);
 }

@@ -54,9 +55,11 @@ class atomic_cell_or_collection;
 */
 class atomic_cell_type final {
 private:
-    static constexpr int8_t DEAD_FLAGS = 0;
    static constexpr int8_t LIVE_FLAG = 0x01;
    static constexpr int8_t EXPIRY_FLAG = 0x02; // When present, expiry field is present. Set only for live cells
+    static constexpr int8_t REVERT_FLAG = 0x04; // transient flag used to efficiently implement ReversiblyMergeable for atomic cells.
+    static constexpr int8_t COUNTER_UPDATE_FLAG = 0x08; // Cell is a counter update.
+    static constexpr int8_t COUNTER_IN_PLACE_REVERT = 0x10;
    static constexpr unsigned flags_size = 1;
    static constexpr unsigned timestamp_offset = flags_size;
    static constexpr unsigned timestamp_size = 8;
@@ -66,27 +69,62 @@ private:
    static constexpr unsigned deletion_time_size = 4;
    static constexpr unsigned ttl_offset = expiry_offset + expiry_size;
    static constexpr unsigned ttl_size = 4;
+    friend class counter_cell_builder;
 private:
+    static bool is_counter_update(bytes_view cell) {
+        return cell[0] & COUNTER_UPDATE_FLAG;
+    }
+    static bool is_revert_set(bytes_view cell) {
+        return cell[0] & REVERT_FLAG;
+    }
+    static bool is_counter_in_place_revert_set(bytes_view cell) {
+        return cell[0] & COUNTER_IN_PLACE_REVERT;
+    }
+    template<typename BytesContainer>
+    static void set_revert(BytesContainer& cell, bool revert) {
+        cell[0] = (cell[0] & ~REVERT_FLAG) | (revert * REVERT_FLAG);
+    }
+    template<typename BytesContainer>
+    static void set_counter_in_place_revert(BytesContainer& cell, bool flag) {
+        cell[0] = (cell[0] & ~COUNTER_IN_PLACE_REVERT) | (flag * COUNTER_IN_PLACE_REVERT);
+    }
    static bool is_live(const bytes_view& cell) {
-        return cell[0] != DEAD_FLAGS;
+        return cell[0] & LIVE_FLAG;
    }
    static bool is_live_and_has_ttl(const bytes_view& cell) {
        return cell[0] & EXPIRY_FLAG;
    }
    static bool is_dead(const bytes_view& cell) {
-        return cell[0] == DEAD_FLAGS;
+        return !is_live(cell);
    }
    // Can be called on live and dead cells
    static api::timestamp_type timestamp(const bytes_view& cell) {
        return get_field<api::timestamp_type>(cell, timestamp_offset);
    }
+    template<typename BytesContainer>
+    static void set_timestamp(BytesContainer& cell, api::timestamp_type ts) {
+        set_field(cell, timestamp_offset, ts);
+    }
    // Can be called on live cells only
-    static bytes_view value(bytes_view cell) {
+private:
+    template<typename BytesView>
+    static BytesView do_get_value(BytesView cell) {
        auto expiry_field_size = bool(cell[0] & EXPIRY_FLAG) * (expiry_size + ttl_size);
        auto value_offset = flags_size + timestamp_size + expiry_field_size;
        cell.remove_prefix(value_offset);
        return cell;
    }
+public:
+    static bytes_view value(bytes_view cell) {
+        return do_get_value(cell);
+    }
+    static bytes_mutable_view value(bytes_mutable_view cell) {
+        return do_get_value(cell);
+    }
+    // Can be called on live counter update cells only
+    static int64_t counter_update_value(bytes_view cell) {
+        return get_field<int64_t>(cell, flags_size + timestamp_size);
+    }
    // Can be called only when is_dead() is true.
    static gc_clock::time_point deletion_time(const bytes_view& cell) {
        assert(is_dead(cell));
@@ -106,7 +144,7 @@ private:
    }
    static managed_bytes make_dead(api::timestamp_type timestamp, gc_clock::time_point deletion_time) {
        managed_bytes b(managed_bytes::initialized_later(), flags_size + timestamp_size + deletion_time_size);
-        b[0] = DEAD_FLAGS;
+        b[0] = 0;
        set_field(b, timestamp_offset, timestamp);
        set_field(b, deletion_time_offset, deletion_time.time_since_epoch().count());
        return b;
@@ -119,6 +157,14 @@ private:
        std::copy_n(value.begin(), value.size(), b.begin() + value_offset);
        return b;
    }
+    static managed_bytes make_live_counter_update(api::timestamp_type timestamp, int64_t value) {
+        auto value_offset = flags_size + timestamp_size;
+        managed_bytes b(managed_bytes::initialized_later(), value_offset + sizeof(value));
+        b[0] = LIVE_FLAG | COUNTER_UPDATE_FLAG;
+        set_field(b, timestamp_offset, timestamp);
+        set_field(b, value_offset, value);
+        return b;
+    }
    static managed_bytes make_live(api::timestamp_type timestamp, bytes_view value, gc_clock::time_point expiry, gc_clock::duration ttl) {
        auto value_offset = flags_size + timestamp_size + expiry_size + ttl_size;
        managed_bytes b(managed_bytes::initialized_later(), value_offset + value.size());
@@ -129,6 +175,31 @@ private:
        std::copy_n(value.begin(), value.size(), b.begin() + value_offset);
        return b;
    }
+    // make_live_from_serializer() is intended for users that need to serialise
+    // some object or objects to the format used in atomic_cell::value().
+    // With just make_live() the patter would look like follows:
+    // 1. allocate a buffer and write to it serialised objects
+    // 2. pass that buffer to make_live()
+    // 3. make_live() needs to prepend some metadata to the cell value so it
+    //    allocates a new buffer and copies the content of the original one
+    //
+    // The allocation and copy of a buffer can be avoided.
+    // make_live_from_serializer() allows the user code to specify the timestamp
+    // and size of the cell value as well as provide the serialiser function
+    // object, which would write the serialised value of the cell to the buffer
+    // given to it by make_live_from_serializer().
+    template<typename Serializer>
+    GCC6_CONCEPT(requires requires(Serializer serializer, bytes::iterator it) {
+        serializer(it);
+    })
+    static managed_bytes make_live_from_serializer(api::timestamp_type timestamp, size_t size, Serializer&& serializer) {
+        auto value_offset = flags_size + timestamp_size;
+        managed_bytes b(managed_bytes::initialized_later(), value_offset + size);
+        b[0] = LIVE_FLAG;
+        set_field(b, timestamp_offset, timestamp);
+        serializer(b.begin() + value_offset);
+        return b;
+    }
    template<typename ByteContainer>
    friend class atomic_cell_base;
    friend class atomic_cell;
@@ -140,16 +211,25 @@ protected:
    ByteContainer _data;
 protected:
    atomic_cell_base(ByteContainer&& data) : _data(std::forward<ByteContainer>(data)) { }
-    atomic_cell_base(const ByteContainer& data) : _data(data) { }
+    friend class atomic_cell_or_collection;
 public:
+    bool is_counter_update() const {
+        return atomic_cell_type::is_counter_update(_data);
+    }
+    bool is_revert_set() const {
+        return atomic_cell_type::is_revert_set(_data);
+    }
+    bool is_counter_in_place_revert_set() const {
+        return atomic_cell_type::is_counter_in_place_revert_set(_data);
+    }
    bool is_live() const {
        return atomic_cell_type::is_live(_data);
    }
-    bool is_live(tombstone t) const {
-        return is_live() && !is_covered_by(t);
+    bool is_live(tombstone t, bool is_counter) const {
+        return is_live() && !is_covered_by(t, is_counter);
    }
-    bool is_live(tombstone t, gc_clock::time_point now) const {
-        return is_live() && !is_covered_by(t) && !has_expired(now);
+    bool is_live(tombstone t, gc_clock::time_point now, bool is_counter) const {
+        return is_live() && !is_covered_by(t, is_counter) && !has_expired(now);
    }
    bool is_live_and_has_ttl() const {
        return atomic_cell_type::is_live_and_has_ttl(_data);
@@ -157,17 +237,24 @@ public:
    bool is_dead(gc_clock::time_point now) const {
        return atomic_cell_type::is_dead(_data) || has_expired(now);
    }
-    bool is_covered_by(tombstone t) const {
-        return timestamp() <= t.timestamp;
+    bool is_covered_by(tombstone t, bool is_counter) const {
+        return timestamp() <= t.timestamp || (is_counter && t.timestamp != api::missing_timestamp);
    }
    // Can be called on live and dead cells
    api::timestamp_type timestamp() const {
        return atomic_cell_type::timestamp(_data);
    }
+    void set_timestamp(api::timestamp_type ts) {
+        atomic_cell_type::set_timestamp(_data, ts);
+    }
    // Can be called on live cells only
-    bytes_view value() const {
+    auto value() const {
        return atomic_cell_type::value(_data);
    }
+    // Can be called on live counter update cells only
+    int64_t counter_update_value() const {
+        return atomic_cell_type::counter_update_value(_data);
+    }
    // Can be called only when is_dead(gc_clock::time_point)
    gc_clock::time_point deletion_time() const {
        return !is_live() ? atomic_cell_type::deletion_time(_data) : expiry() - ttl();
@@ -187,10 +274,16 @@ public:
    bytes_view serialize() const {
        return _data;
    }
+    void set_revert(bool revert) {
+        atomic_cell_type::set_revert(_data, revert);
+    }
+    void set_counter_in_place_revert(bool flag) {
+        atomic_cell_type::set_counter_in_place_revert(_data, flag);
+    }
 };

 class atomic_cell_view final : public atomic_cell_base<bytes_view> {
-    atomic_cell_view(bytes_view data) : atomic_cell_base(data) {}
+    atomic_cell_view(bytes_view data) : atomic_cell_base(std::move(data)) {}
 public:
    static atomic_cell_view from_bytes(bytes_view data) { return atomic_cell_view(data); }

@@ -198,6 +291,19 @@ public:
    friend std::ostream& operator<<(std::ostream& os, const atomic_cell_view& acv);
 };

+class atomic_cell_mutable_view final : public atomic_cell_base<bytes_mutable_view> {
+    atomic_cell_mutable_view(bytes_mutable_view data) : atomic_cell_base(std::move(data)) {}
+public:
+    static atomic_cell_mutable_view from_bytes(bytes_mutable_view data) { return atomic_cell_mutable_view(data); }
+
+    friend class atomic_cell;
+};
+
+class atomic_cell_ref final : public atomic_cell_base<managed_bytes&> {
+public:
+    atomic_cell_ref(managed_bytes& buf) : atomic_cell_base(buf) {}
+};
+
 class atomic_cell final : public atomic_cell_base<managed_bytes> {
    atomic_cell(managed_bytes b) : atomic_cell_base(std::move(b)) {}
 public:
@@ -218,11 +324,22 @@ public:
    static atomic_cell make_live(api::timestamp_type timestamp, bytes_view value) {
        return atomic_cell_type::make_live(timestamp, value);
    }
+    static atomic_cell make_live(api::timestamp_type timestamp, const bytes& value) {
+        return make_live(timestamp, bytes_view(value));
+    }
+    static atomic_cell make_live_counter_update(api::timestamp_type timestamp, int64_t value) {
+        return atomic_cell_type::make_live_counter_update(timestamp, value);
+    }
    static atomic_cell make_live(api::timestamp_type timestamp, bytes_view value,
        gc_clock::time_point expiry, gc_clock::duration ttl)
    {
        return atomic_cell_type::make_live(timestamp, value, expiry, ttl);
    }
+    static atomic_cell make_live(api::timestamp_type timestamp, const bytes& value,
+                                 gc_clock::time_point expiry, gc_clock::duration ttl)
+    {
+        return make_live(timestamp, bytes_view(value), expiry, ttl);
+    }
    static atomic_cell make_live(api::timestamp_type timestamp, bytes_view value, ttl_opt ttl) {
        if (!ttl) {
            return atomic_cell_type::make_live(timestamp, value);
@@ -230,6 +347,10 @@ public:
            return atomic_cell_type::make_live(timestamp, value, gc_clock::now() + *ttl, *ttl);
        }
    }
+    template<typename Serializer>
+    static atomic_cell make_live_from_serializer(api::timestamp_type timestamp, size_t size, Serializer&& serializer) {
+        return atomic_cell_type::make_live_from_serializer(timestamp, size, std::forward<Serializer>(serializer));
+    }
    friend class atomic_cell_or_collection;
    friend std::ostream& operator<<(std::ostream& os, const atomic_cell& ac);
 };
@@ -267,11 +388,6 @@ collection_mutation::operator collection_mutation_view() const {
    return { data };
 }

-namespace db {
-template<typename T>
-class serializer;
-}
-
 class column_definition;

 int compare_atomic_cell_for_merge(atomic_cell_view left, atomic_cell_view right);
--- a/atomic_cell_hash.hh
+++ b/atomic_cell_hash.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2015 Cloudius Systems, Ltd.
+ * Copyright (C) 2015 ScyllaDB
 */

 /*
@@ -26,16 +26,17 @@
 #include "types.hh"
 #include "atomic_cell.hh"
 #include "hashing.hh"
+#include "counters.hh"

 template<>
 struct appending_hash<collection_mutation_view> {
    template<typename Hasher>
-    void operator()(Hasher& h, collection_mutation_view cell) const {
+    void operator()(Hasher& h, collection_mutation_view cell, const column_definition& cdef) const {
        auto m_view = collection_type_impl::deserialize_mutation_form(cell);
        ::feed_hash(h, m_view.tomb);
        for (auto&& key_and_value : m_view.cells) {
            ::feed_hash(h, key_and_value.first);
-            ::feed_hash(h, key_and_value.second);
+            ::feed_hash(h, key_and_value.second, cdef);
        }
    }
 };
@@ -43,10 +44,14 @@ struct appending_hash<collection_mutation_view> {
 template<>
 struct appending_hash<atomic_cell_view> {
    template<typename Hasher>
-    void operator()(Hasher& h, atomic_cell_view cell) const {
+    void operator()(Hasher& h, atomic_cell_view cell, const column_definition& cdef) const {
        feed_hash(h, cell.is_live());
        feed_hash(h, cell.timestamp());
        if (cell.is_live()) {
+            if (cdef.is_counter()) {
+                ::feed_hash(h, counter_cell_view(cell));
+                return;
+            }
            if (cell.is_live_and_has_ttl()) {
                feed_hash(h, cell.expiry());
                feed_hash(h, cell.ttl());
@@ -57,3 +62,19 @@ struct appending_hash<atomic_cell_view> {
        }
    }
 };
+
+template<>
+struct appending_hash<atomic_cell> {
+    template<typename Hasher>
+    void operator()(Hasher& h, const atomic_cell& cell, const column_definition& cdef) const {
+        feed_hash(h, static_cast<atomic_cell_view>(cell), cdef);
+    }
+};
+
+template<>
+struct appending_hash<collection_mutation> {
+    template<typename Hasher>
+    void operator()(Hasher& h, const collection_mutation& cm, const column_definition& cdef) const {
+        feed_hash(h, static_cast<collection_mutation_view>(cm), cdef);
+    }
+};
--- a/atomic_cell_or_collection.hh
+++ b/atomic_cell_or_collection.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2015 Cloudius Systems, Ltd.
+ * Copyright (C) 2015 ScyllaDB
 */

 /*
@@ -27,6 +27,8 @@

 // A variant type that can hold either an atomic_cell, or a serialized collection.
 // Which type is stored is determined by the schema.
+// Has an "empty" state.
+// Objects moved-from are left in an empty state.
 class atomic_cell_or_collection final {
    managed_bytes _data;
 private:
@@ -36,10 +38,15 @@ public:
    atomic_cell_or_collection(atomic_cell ac) : _data(std::move(ac._data)) {}
    static atomic_cell_or_collection from_atomic_cell(atomic_cell data) { return { std::move(data._data) }; }
    atomic_cell_view as_atomic_cell() const { return atomic_cell_view::from_bytes(_data); }
+    atomic_cell_ref as_atomic_cell_ref() { return { _data }; }
+    atomic_cell_mutable_view as_mutable_atomic_cell() { return atomic_cell_mutable_view::from_bytes(_data); }
    atomic_cell_or_collection(collection_mutation cm) : _data(std::move(cm.data)) {}
    explicit operator bool() const {
        return !_data.empty();
    }
+    bool can_use_mutable_view() const {
+        return !_data.is_fragmented();
+    }
    static atomic_cell_or_collection from_collection_mutation(collection_mutation data) {
        return std::move(data.data);
    }
@@ -55,10 +62,13 @@ public:
    template<typename Hasher>
    void feed_hash(Hasher& h, const column_definition& def) const {
        if (def.is_atomic()) {
-            ::feed_hash(h, as_atomic_cell());
+            ::feed_hash(h, as_atomic_cell(), def);
        } else {
-            ::feed_hash(as_collection_mutation(), h, def.type);
+            ::feed_hash(h, as_collection_mutation(), def);
        }
    }
+    size_t external_memory_usage() const {
+        return _data.external_memory_usage();
+    }
    friend std::ostream& operator<<(std::ostream&, const atomic_cell_or_collection&);
 };
--- a/auth/auth.cc
+++ b/auth/auth.cc
@@ -17,9 +17,9 @@
 */

 /*
- * Copyright 2016 Cloudius Systems
+ * Copyright (C) 2016 ScyllaDB
 *
- * Modified by Cloudius Systems
+ * Modified by ScyllaDB
 */

 /*
@@ -40,14 +40,19 @@
 */
 #include <seastar/core/sleep.hh>

+#include <seastar/core/distributed.hh>
+
 #include "auth.hh"
 #include "authenticator.hh"
+#include "authorizer.hh"
 #include "database.hh"
 #include "cql3/query_processor.hh"
-#include "cql3/statements/cf_statement.hh"
+#include "cql3/statements/raw/cf_statement.hh"
 #include "cql3/statements/create_table_statement.hh"
 #include "db/config.hh"
 #include "service/migration_manager.hh"
+#include "utils/loading_cache.hh"
+#include "utils/hash.hh"

 const sstring auth::auth::DEFAULT_SUPERUSER_NAME("cassandra");
 const sstring auth::auth::AUTH_KS("system_auth");
@@ -56,7 +61,7 @@ const sstring auth::auth::USERS_CF("users");
 static const sstring USER_NAME("name");
 static const sstring SUPER("super");

-static logging::logger logger("auth");
+static logging::logger alogger("auth");

 // TODO: configurable
 using namespace std::chrono_literals;
@@ -68,29 +73,85 @@ class auth_migration_listener : public service::migration_listener {
    void on_create_user_type(const sstring& ks_name, const sstring& type_name) override {}
    void on_create_function(const sstring& ks_name, const sstring& function_name) override {}
    void on_create_aggregate(const sstring& ks_name, const sstring& aggregate_name) override {}
+    void on_create_view(const sstring& ks_name, const sstring& view_name) override {}

    void on_update_keyspace(const sstring& ks_name) override {}
    void on_update_column_family(const sstring& ks_name, const sstring& cf_name, bool) override {}
    void on_update_user_type(const sstring& ks_name, const sstring& type_name) override {}
    void on_update_function(const sstring& ks_name, const sstring& function_name) override {}
    void on_update_aggregate(const sstring& ks_name, const sstring& aggregate_name) override {}
+    void on_update_view(const sstring& ks_name, const sstring& view_name, bool columns_changed) override {}

    void on_drop_keyspace(const sstring& ks_name) override {
-        // TODO:
-        //DatabaseDescriptor.getAuthorizer().revokeAll(DataResource.keyspace(ksName));
-
+        auth::authorizer::get().revoke_all(auth::data_resource(ks_name));
    }
    void on_drop_column_family(const sstring& ks_name, const sstring& cf_name) override {
-        // TODO:
-        //DatabaseDescriptor.getAuthorizer().revokeAll(DataResource.columnFamily(ksName, cfName));
+        auth::authorizer::get().revoke_all(auth::data_resource(ks_name, cf_name));
    }
    void on_drop_user_type(const sstring& ks_name, const sstring& type_name) override {}
    void on_drop_function(const sstring& ks_name, const sstring& function_name) override {}
    void on_drop_aggregate(const sstring& ks_name, const sstring& aggregate_name) override {}
+    void on_drop_view(const sstring& ks_name, const sstring& view_name) override {}
 };

 static auth_migration_listener auth_migration;

+namespace std {
+template <>
+struct hash<auth::data_resource> {
+    size_t operator()(const auth::data_resource & v) const {
+        return v.hash_value();
+    }
+};
+
+template <>
+struct hash<auth::authenticated_user> {
+    size_t operator()(const auth::authenticated_user & v) const {
+        return utils::tuple_hash()(v.name(), v.is_anonymous());
+    }
+};
+}
+
+class auth::auth::permissions_cache {
+public:
+    typedef utils::loading_cache<std::pair<authenticated_user, data_resource>, permission_set, utils::loading_cache_reload_enabled::yes, utils::simple_entry_size<permission_set>, utils::tuple_hash> cache_type;
+    typedef typename cache_type::key_type key_type;
+
+    permissions_cache()
+                    : permissions_cache(
+                                    cql3::get_local_query_processor().db().local().get_config()) {
+    }
+
+    permissions_cache(const db::config& cfg)
+                    : _cache(cfg.permissions_cache_max_entries(), std::chrono::milliseconds(cfg.permissions_validity_in_ms()), std::chrono::milliseconds(cfg.permissions_update_interval_in_ms()), alogger,
+                        [] (const key_type& k) {
+                            alogger.debug("Refreshing permissions for {}", k.first.name());
+                            return authorizer::get().authorize(::make_shared<authenticated_user>(k.first), k.second);
+                        }) {}
+
+    future<> stop() {
+        return _cache.stop();
+    }
+
+    future<permission_set> get(::shared_ptr<authenticated_user> user, data_resource resource) {
+        return _cache.get(key_type(*user, std::move(resource)));
+    }
+
+private:
+    cache_type _cache;
+};
+
+namespace std { // for ADL, yuch
+
+std::ostream& operator<<(std::ostream& os, const std::pair<auth::authenticated_user, auth::data_resource>& p) {
+    os << "{user: " << p.first.name() << ", data_resource: " << p.second << "}";
+    return os;
+}
+
+}
+
+static distributed<auth::auth::permissions_cache> perm_cache;
+
 /**
 * Poor mans job schedule. For maximum 2 jobs. Sic.
 * Still does nothing more clever than waiting 10 seconds
@@ -115,7 +176,7 @@ struct waiter {
            tmr.cancel();
            done.set_exception(std::runtime_error("shutting down"));
        }
-        logger.trace("Deleting scheduled task");
+        alogger.trace("Deleting scheduled task");
    }
    void kill() {
    }
@@ -129,7 +190,7 @@ static std::vector<waiter_ptr> & thread_waiters() {
 }

 void auth::auth::schedule_when_up(scheduled_func f) {
-    logger.trace("Adding scheduled task");
+    alogger.trace("Adding scheduled task");

    auto & waiters = thread_waiters();

@@ -145,7 +206,7 @@ void auth::auth::schedule_when_up(scheduled_func f) {
            waiters.erase(i);
        }
    }).then([f = std::move(f)] {
-        logger.trace("Running scheduled task");
+        alogger.trace("Running scheduled task");
        return f();
    }).handle_exception([](auto ep) {
        return make_ready_future();
@@ -163,29 +224,38 @@ bool auth::auth::is_class_type(const sstring& type, const sstring& classname) {
 future<> auth::auth::setup() {
    auto& db = cql3::get_local_query_processor().db().local();
    auto& cfg = db.get_config();
-    auto type = cfg.authenticator();

-    if (is_class_type(type, authenticator::ALLOW_ALL_AUTHENTICATOR_NAME)) {
-        return authenticator::setup(type).discard_result(); // just create the object
+    future<> f = perm_cache.start();
+
+    if (is_class_type(cfg.authenticator(),
+                    authenticator::ALLOW_ALL_AUTHENTICATOR_NAME)
+                    && is_class_type(cfg.authorizer(),
+                                    authorizer::ALLOW_ALL_AUTHORIZER_NAME)
+                                    ) {
+        // just create the objects
+        return f.then([&cfg] {
+            return authenticator::setup(cfg.authenticator());
+        }).then([&cfg] {
+            return authorizer::setup(cfg.authorizer());
+        });
    }

-    future<> f = make_ready_future();
-
    if (!db.has_keyspace(AUTH_KS)) {
        std::map<sstring, sstring> opts;
        opts["replication_factor"] = "1";
        auto ksm = keyspace_metadata::new_keyspace(AUTH_KS, "org.apache.cassandra.locator.SimpleStrategy", opts, true);
-        f = service::get_local_migration_manager().announce_new_keyspace(ksm, false);
+        // We use min_timestamp so that default keyspace metadata will loose with any manual adjustments. See issue #2129.
+        f = service::get_local_migration_manager().announce_new_keyspace(ksm, api::min_timestamp, false);
    }

    return f.then([] {
        return setup_table(USERS_CF, sprint("CREATE TABLE %s.%s (%s text, %s boolean, PRIMARY KEY(%s)) WITH gc_grace_seconds=%d",
                                        AUTH_KS, USERS_CF, USER_NAME, SUPER, USER_NAME,
                                        90 * 24 * 60 * 60)); // 3 months.
-    }).then([type] {
-        return authenticator::setup(type).discard_result();
-    }).then([] {
-        // TODO authorizer
+    }).then([&cfg] {
+        return authenticator::setup(cfg.authenticator());
+    }).then([&cfg] {
+        return authorizer::setup(cfg.authorizer());
    }).then([] {
        service::get_local_migration_manager().register_listener(&auth_migration); // again, only one shard...
        // instead of once-timer, just schedule this later
@@ -196,12 +266,12 @@ future<> auth::auth::setup() {
                    auto query = sprint("INSERT INTO %s.%s (%s, %s) VALUES (?, ?) USING TIMESTAMP 0",
                                    AUTH_KS, USERS_CF, USER_NAME, SUPER);
                    cql3::get_local_query_processor().process(query, db::consistency_level::ONE, {DEFAULT_SUPERUSER_NAME, true}).then([](auto) {
-                        logger.info("Created default superuser '{}'", DEFAULT_SUPERUSER_NAME);
+                        alogger.info("Created default superuser '{}'", DEFAULT_SUPERUSER_NAME);
                    }).handle_exception([](auto ep) {
                        try {
                            std::rethrow_exception(ep);
                        } catch (exceptions::request_execution_exception&) {
-                            logger.warn("Skipped default superuser setup: some nodes were not ready");
+                            alogger.warn("Skipped default superuser setup: some nodes were not ready");
                        }
                    });
                }
@@ -216,9 +286,15 @@ future<> auth::auth::shutdown() {
    // db-env-shutdown != process shutdown
    return smp::invoke_on_all([] {
        thread_waiters().clear();
+    }).then([] {
+        return perm_cache.stop();
    });
 }

+future<auth::permission_set> auth::auth::get_permissions(::shared_ptr<authenticated_user> user, data_resource resource) {
+    return perm_cache.local().get(std::move(user), std::move(resource));
+}
+
 static db::consistency_level consistency_for_user(const sstring& username) {
    if (username == auth::auth::DEFAULT_SUPERUSER_NAME) {
        return db::consistency_level::QUORUM;
@@ -253,14 +329,13 @@ future<bool> auth::auth::is_super_user(const sstring& username) {
                    });
 }

-future<> auth::auth::insert_user(const sstring& username, bool is_super)
-                throw (exceptions::request_execution_exception) {
+future<> auth::auth::insert_user(const sstring& username, bool is_super) {
    return cql3::get_local_query_processor().process(sprint("INSERT INTO %s.%s (%s, %s) VALUES (?, ?)",
                    AUTH_KS, USERS_CF, USER_NAME, SUPER),
                    consistency_for_user(username), { username, is_super }).discard_result();
 }

-future<> auth::auth::delete_user(const sstring& username) throw(exceptions::request_execution_exception) {
+future<> auth::auth::delete_user(const sstring& username) {
    return cql3::get_local_query_processor().process(sprint("DELETE FROM %s.%s WHERE %s = ?",
                    AUTH_KS, USERS_CF, USER_NAME),
                    consistency_for_user(username), { username }).discard_result();
@@ -274,15 +349,18 @@ future<> auth::auth::setup_table(const sstring& name, const sstring& cql) {
        return make_ready_future();
    }

-    ::shared_ptr<cql3::statements::cf_statement> parsed = static_pointer_cast<
-                    cql3::statements::cf_statement>(cql3::query_processor::parse_statement(cql));
+    ::shared_ptr<cql3::statements::raw::cf_statement> parsed = static_pointer_cast<
+                    cql3::statements::raw::cf_statement>(cql3::query_processor::parse_statement(cql));
    parsed->prepare_keyspace(AUTH_KS);
    ::shared_ptr<cql3::statements::create_table_statement> statement =
                    static_pointer_cast<cql3::statements::create_table_statement>(
-                                    parsed->prepare(db)->statement);
-    // Origin sets "Legacy Cf Id" for the new table. We have no need to be
-    // pre-2.1 compatible (afaik), so lets skip a whole lotta hoolaballo
-    return statement->announce_migration(qp.proxy(), false).then([statement](bool) {});
+                                    parsed->prepare(db, qp.get_cql_stats())->statement);
+    auto schema = statement->get_cf_meta_data();
+    auto uuid = generate_legacy_id(schema->ks_name(), schema->cf_name());
+
+    schema_builder b(schema);
+    b.set_uuid(uuid);
+    return service::get_local_migration_manager().announce_new_column_family(b.build(), false);
 }

 future<bool> auth::auth::has_existing_users(const sstring& cfname, const sstring& def_user_name, const sstring& name_column) {
--- a/auth/auth.hh
+++ b/auth/auth.hh
@@ -17,9 +17,9 @@
 */

 /*
- * Copyright 2016 Cloudius Systems
+ * Copyright (C) 2016 ScyllaDB
 *
- * Modified by Cloudius Systems
+ * Modified by ScyllaDB
 */

 /*
@@ -44,13 +44,20 @@
 #include <chrono>
 #include <seastar/core/sstring.hh>
 #include <seastar/core/future.hh>
+#include <seastar/core/shared_ptr.hh>
+

 #include "exceptions/exceptions.hh"
+#include "permission.hh"
+#include "data_resource.hh"
+#include "authenticated_user.hh"

 namespace auth {

 class auth {
 public:
+    class permissions_cache;
+
    static const sstring DEFAULT_SUPERUSER_NAME;
    static const sstring AUTH_KS;
    static const sstring USERS_CF;
@@ -58,12 +65,7 @@ public:

    static bool is_class_type(const sstring& type, const sstring& classname);

-#if 0
-    public static Set<Permission> getPermissions(AuthenticatedUser user, IResource resource)
-    {
-        return permissionsCache.getPermissions(user, resource);
-    }
-#endif
+    static future<permission_set> get_permissions(::shared_ptr<authenticated_user>, data_resource);

    /**
     * Checks if the username is stored in AUTH_KS.USERS_CF.
@@ -88,7 +90,7 @@ public:
     * @param isSuper User's new status.
     * @throws RequestExecutionException
     */
-    static future<> insert_user(const sstring& username, bool is_super) throw(exceptions::request_execution_exception);
+    static future<> insert_user(const sstring& username, bool is_super);

    /**
     * Deletes the user from AUTH_KS.USERS_CF.
@@ -96,7 +98,7 @@ public:
     * @param username Username to delete.
     * @throws RequestExecutionException
     */
-    static future<> delete_user(const sstring& username) throw(exceptions::request_execution_exception);
+    static future<> delete_user(const sstring& username);

    /**
     * Sets up Authenticator and Authorizer.
@@ -119,3 +121,5 @@ public:
    static void schedule_when_up(scheduled_func);
 };
 }
+
+std::ostream& operator<<(std::ostream& os, const std::pair<auth::authenticated_user, auth::data_resource>& p);
--- a/auth/authenticated_user.cc
+++ b/auth/authenticated_user.cc
@@ -17,9 +17,9 @@
 */

 /*
- * Copyright 2016 Cloudius Systems
+ * Copyright (C) 2016 ScyllaDB
 *
- * Modified by Cloudius Systems
+ * Modified by ScyllaDB
 */

 /*
@@ -41,6 +41,7 @@


 #include "authenticated_user.hh"
+#include "auth.hh"

 const sstring auth::authenticated_user::ANONYMOUS_USERNAME("anonymous");

@@ -52,10 +53,20 @@ auth::authenticated_user::authenticated_user(sstring name)
                : _name(name), _anon(false)
 {}

+auth::authenticated_user::authenticated_user(authenticated_user&&) = default;
+auth::authenticated_user::authenticated_user(const authenticated_user&) = default;
+
 const sstring& auth::authenticated_user::name() const {
    return _anon ? ANONYMOUS_USERNAME : _name;
 }

+future<bool> auth::authenticated_user::is_super() const {
+    if (is_anonymous()) {
+        return make_ready_future<bool>(false);
+    }
+    return auth::auth::is_super_user(_name);
+}
+
 bool auth::authenticated_user::operator==(const authenticated_user& v) const {
    return _anon ? v._anon : _name == v._name;
 }
--- a/auth/authenticated_user.hh
+++ b/auth/authenticated_user.hh
@@ -17,9 +17,9 @@
 */

 /*
- * Copyright 2016 Cloudius Systems
+ * Copyright (C) 2016 ScyllaDB
 *
- * Modified by Cloudius Systems
+ * Modified by ScyllaDB
 */

 /*
@@ -42,6 +42,8 @@
 #pragma once

 #include <seastar/core/sstring.hh>
+#include <seastar/core/future.hh>
+#include "seastarx.hh"

 namespace auth {

@@ -51,6 +53,8 @@ public:

    authenticated_user();
    authenticated_user(sstring name);
+    authenticated_user(authenticated_user&&);
+    authenticated_user(const authenticated_user&);

    const sstring& name() const;

@@ -60,7 +64,7 @@ public:
     * Im most cased, though not necessarily, a superuser will have Permission.ALL on every resource
     * (depends on IAuthorizer implementation).
     */
-    bool is_super() const;
+    future<bool> is_super() const;

    /**
     * If IAuthenticator doesn't require authentication, this method may return true.
--- a/auth/authenticator.cc
+++ b/auth/authenticator.cc
@@ -17,9 +17,9 @@
 */

 /*
- * Copyright 2016 Cloudius Systems
+ * Copyright (C) 2016 ScyllaDB
 *
- * Modified by Cloudius Systems
+ * Modified by ScyllaDB
 */

 /*
@@ -49,6 +49,22 @@ const sstring auth::authenticator::USERNAME_KEY("username");
 const sstring auth::authenticator::PASSWORD_KEY("password");
 const sstring auth::authenticator::ALLOW_ALL_AUTHENTICATOR_NAME("org.apache.cassandra.auth.AllowAllAuthenticator");

+auth::authenticator::option auth::authenticator::string_to_option(const sstring& name) {
+    if (strcasecmp(name.c_str(), "password") == 0) {
+        return option::PASSWORD;
+    }
+    throw std::invalid_argument(name);
+}
+
+sstring auth::authenticator::option_to_string(option opt) {
+    switch (opt) {
+    case option::PASSWORD:
+        return "PASSWORD";
+    default:
+        throw std::invalid_argument(sprint("Unknown option {}", opt));
+    }
+}
+
 /**
 * Authenticator is assumed to be a fully state-less immutable object (note all the const).
 * We thus store a single instance globally, since it should be safe/ok.
@@ -56,7 +72,7 @@ const sstring auth::authenticator::ALLOW_ALL_AUTHENTICATOR_NAME("org.apache.cass
 static std::unique_ptr<auth::authenticator> global_authenticator;

 future<>
-auth::authenticator::setup(const sstring& type) throw (exceptions::configuration_exception) {
+auth::authenticator::setup(const sstring& type) {
    if (auth::auth::is_class_type(type, ALLOW_ALL_AUTHENTICATOR_NAME)) {
        class allow_all_authenticator : public authenticator {
        public:
@@ -72,20 +88,21 @@ auth::authenticator::setup(const sstring& type) throw (exceptions::configuration
            option_set alterable_options() const override {
                return option_set();
            }
-            future<::shared_ptr<authenticated_user>> authenticate(const credentials_map& credentials) const throw(exceptions::authentication_exception) override {
+            future<::shared_ptr<authenticated_user>> authenticate(const credentials_map& credentials) const override {
                return make_ready_future<::shared_ptr<authenticated_user>>(::make_shared<authenticated_user>());
            }
-            future<> create(sstring username, const option_map& options) throw(exceptions::request_validation_exception, exceptions::request_execution_exception) override {
+            future<> create(sstring username, const option_map& options) override {
                return make_ready_future();
            }
-            future<> alter(sstring username, const option_map& options) throw(exceptions::request_validation_exception, exceptions::request_execution_exception) override {
+            future<> alter(sstring username, const option_map& options) override {
                return make_ready_future();
            }
-            future<> drop(sstring username) throw(exceptions::request_validation_exception, exceptions::request_execution_exception) override {
+            future<> drop(sstring username) override {
                return make_ready_future();
            }
-            resource_ids protected_resources() const override {
-                return resource_ids();
+            const resource_ids& protected_resources() const override {
+                static const resource_ids ids;
+                return ids;
            }
            ::shared_ptr<sasl_challenge> new_sasl_challenge() const override {
                throw std::runtime_error("Should not reach");
--- a/auth/authenticator.hh
+++ b/auth/authenticator.hh
@@ -17,9 +17,9 @@
 */

 /*
- * Copyright 2016 Cloudius Systems
+ * Copyright (C) 2016 ScyllaDB
 *
- * Modified by Cloudius Systems
+ * Modified by ScyllaDB
 */

 /*
@@ -79,22 +79,20 @@ public:
        PASSWORD
    };

+    static option string_to_option(const sstring&);
+    static sstring option_to_string(option);
+
    using option_set = enum_set<super_enum<option, option::PASSWORD>>;
    using option_map = std::unordered_map<option, boost::any, enum_hash<option>>;
    using credentials_map = std::unordered_map<sstring, sstring>;

-    /**
-     * Resource id mappings, i.e. keyspace and/or column families.
-     */
-    using resource_ids = std::set<data_resource>;
-
    /**
     * Setup is called once upon system startup to initialize the IAuthenticator.
     *
     * For example, use this method to create any required keyspaces/column families.
     * Note: Only call from main thread.
     */
-    static future<> setup(const sstring& type) throw(exceptions::configuration_exception);
+    static future<> setup(const sstring& type);

    /**
     * Returns the system authenticator. Must have called setup before calling this.
@@ -131,7 +129,7 @@ public:
     *
     * @throws authentication_exception if credentials don't match any known user.
     */
-    virtual future<::shared_ptr<authenticated_user>> authenticate(const credentials_map& credentials) const throw(exceptions::authentication_exception) = 0;
+    virtual future<::shared_ptr<authenticated_user>> authenticate(const credentials_map& credentials) const = 0;

    /**
     * Called during execution of CREATE USER query (also may be called on startup, see seedSuperuserOptions method).
@@ -143,7 +141,7 @@ public:
     * @throws exceptions::request_validation_exception
     * @throws exceptions::request_execution_exception
     */
-    virtual future<> create(sstring username, const option_map& options) throw(exceptions::request_validation_exception, exceptions::request_execution_exception) = 0;
+    virtual future<> create(sstring username, const option_map& options) = 0;

    /**
     * Called during execution of ALTER USER query.
@@ -156,7 +154,7 @@ public:
     * @throws exceptions::request_validation_exception
     * @throws exceptions::request_execution_exception
     */
-    virtual future<> alter(sstring username, const option_map& options) throw(exceptions::request_validation_exception, exceptions::request_execution_exception) = 0;
+    virtual future<> alter(sstring username, const option_map& options) = 0;


    /**
@@ -166,7 +164,7 @@ public:
     * @throws exceptions::request_validation_exception
     * @throws exceptions::request_execution_exception
     */
-    virtual future<> drop(sstring username) throw(exceptions::request_validation_exception, exceptions::request_execution_exception) = 0;
+    virtual future<> drop(sstring username) = 0;

     /**
     * Set of resources that should be made inaccessible to users and only accessible internally.
@@ -174,14 +172,14 @@ public:
     * @return Keyspaces, column families that will be unmodifiable by users; other resources.
     * @see resource_ids
     */
-    virtual resource_ids protected_resources() const = 0;
+    virtual const resource_ids& protected_resources() const = 0;

    class sasl_challenge {
    public:
        virtual ~sasl_challenge() {}
-        virtual bytes evaluate_response(bytes_view client_response) throw(exceptions::authentication_exception) = 0;
+        virtual bytes evaluate_response(bytes_view client_response) = 0;
        virtual bool is_complete() const = 0;
-        virtual future<::shared_ptr<authenticated_user>> get_authenticated_user() const throw(exceptions::authentication_exception) = 0;
+        virtual future<::shared_ptr<authenticated_user>> get_authenticated_user() const = 0;
    };

    /**
@@ -194,5 +192,9 @@ public:
    virtual ::shared_ptr<sasl_challenge> new_sasl_challenge() const = 0;
 };

+inline std::ostream& operator<<(std::ostream& os, authenticator::option opt) {
+    return os << authenticator::option_to_string(opt);
+}
+
 }

--- a/auth/authorizer.cc
+++ b/auth/authorizer.cc
@@ -0,0 +1,104 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (C) 2016 ScyllaDB
+ *
+ * Modified by ScyllaDB
+ */
+
+/*
+ * This file is part of Scylla.
+ *
+ * Scylla is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Scylla is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "authorizer.hh"
+#include "authenticated_user.hh"
+#include "default_authorizer.hh"
+#include "auth.hh"
+#include "db/config.hh"
+
+const sstring auth::authorizer::ALLOW_ALL_AUTHORIZER_NAME("org.apache.cassandra.auth.AllowAllAuthorizer");
+
+/**
+ * Authenticator is assumed to be a fully state-less immutable object (note all the const).
+ * We thus store a single instance globally, since it should be safe/ok.
+ */
+static std::unique_ptr<auth::authorizer> global_authorizer;
+
+future<>
+auth::authorizer::setup(const sstring& type) {
+    if (auth::auth::is_class_type(type, ALLOW_ALL_AUTHORIZER_NAME)) {
+        class allow_all_authorizer : public authorizer {
+        public:
+            future<permission_set> authorize(::shared_ptr<authenticated_user>, data_resource) const override {
+                return make_ready_future<permission_set>(permissions::ALL);
+            }
+            future<> grant(::shared_ptr<authenticated_user>, permission_set, data_resource, sstring) override {
+                throw exceptions::invalid_request_exception("GRANT operation is not supported by AllowAllAuthorizer");
+            }
+            future<> revoke(::shared_ptr<authenticated_user>, permission_set, data_resource, sstring) override {
+                throw exceptions::invalid_request_exception("REVOKE operation is not supported by AllowAllAuthorizer");
+            }
+            future<std::vector<permission_details>> list(::shared_ptr<authenticated_user> performer, permission_set, optional<data_resource>, optional<sstring>) const override {
+                throw exceptions::invalid_request_exception("LIST PERMISSIONS operation is not supported by AllowAllAuthorizer");
+            }
+            future<> revoke_all(sstring dropped_user) override {
+                return make_ready_future();
+            }
+            future<> revoke_all(data_resource) override {
+                return make_ready_future();
+            }
+            const resource_ids& protected_resources() override {
+                static const resource_ids ids;
+                return ids;
+            }
+            future<> validate_configuration() const override {
+                return make_ready_future();
+            }
+        };
+
+        global_authorizer = std::make_unique<allow_all_authorizer>();
+    } else if (auth::auth::is_class_type(type, default_authorizer::DEFAULT_AUTHORIZER_NAME)) {
+        auto da = std::make_unique<default_authorizer>();
+        auto f = da->init();
+        return f.then([da = std::move(da)]() mutable {
+            global_authorizer = std::move(da);
+        });
+    } else {
+        throw exceptions::configuration_exception("Invalid authorizer type: " + type);
+    }
+    return make_ready_future();
+}
+
+auth::authorizer& auth::authorizer::get() {
+    assert(global_authorizer);
+    return *global_authorizer;
+}
--- a/auth/authorizer.hh
+++ b/auth/authorizer.hh
@@ -0,0 +1,173 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (C) 2016 ScyllaDB
+ *
+ * Modified by ScyllaDB
+ */
+
+/*
+ * This file is part of Scylla.
+ *
+ * Scylla is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Scylla is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <vector>
+#include <tuple>
+
+#include <experimental/optional>
+#include <seastar/core/future.hh>
+#include <seastar/core/shared_ptr.hh>
+
+#include "permission.hh"
+#include "data_resource.hh"
+
+#include "seastarx.hh"
+
+namespace auth {
+
+class authenticated_user;
+
+struct permission_details {
+    sstring user;
+    data_resource resource;
+    permission_set permissions;
+
+    bool operator<(const permission_details& v) const {
+        return std::tie(user, resource, permissions) < std::tie(v.user, v.resource, v.permissions);
+    }
+};
+
+using std::experimental::optional;
+
+class authorizer {
+public:
+    static const sstring ALLOW_ALL_AUTHORIZER_NAME;
+
+    virtual ~authorizer() {}
+
+    /**
+     * The primary Authorizer method. Returns a set of permissions of a user on a resource.
+     *
+     * @param user Authenticated user requesting authorization.
+     * @param resource Resource for which the authorization is being requested. @see DataResource.
+     * @return Set of permissions of the user on the resource. Should never return empty. Use permission.NONE instead.
+     */
+    virtual future<permission_set> authorize(::shared_ptr<authenticated_user>, data_resource) const = 0;
+
+    /**
+     * Grants a set of permissions on a resource to a user.
+     * The opposite of revoke().
+     *
+     * @param performer User who grants the permissions.
+     * @param permissions Set of permissions to grant.
+     * @param to Grantee of the permissions.
+     * @param resource Resource on which to grant the permissions.
+     *
+     * @throws RequestValidationException
+     * @throws RequestExecutionException
+     */
+    virtual future<> grant(::shared_ptr<authenticated_user> performer, permission_set, data_resource, sstring to) = 0;
+
+    /**
+     * Revokes a set of permissions on a resource from a user.
+     * The opposite of grant().
+     *
+     * @param performer User who revokes the permissions.
+     * @param permissions Set of permissions to revoke.
+     * @param from Revokee of the permissions.
+     * @param resource Resource on which to revoke the permissions.
+     *
+     * @throws RequestValidationException
+     * @throws RequestExecutionException
+     */
+    virtual future<> revoke(::shared_ptr<authenticated_user> performer, permission_set, data_resource, sstring from) = 0;
+
+    /**
+     * Returns a list of permissions on a resource of a user.
+     *
+     * @param performer User who wants to see the permissions.
+     * @param permissions Set of Permission values the user is interested in. The result should only include the matching ones.
+     * @param resource The resource on which permissions are requested. Can be null, in which case permissions on all resources
+     *                 should be returned.
+     * @param of The user whose permissions are requested. Can be null, in which case permissions of every user should be returned.
+     *
+     * @return All of the matching permission that the requesting user is authorized to know about.
+     *
+     * @throws RequestValidationException
+     * @throws RequestExecutionException
+     */
+    virtual future<std::vector<permission_details>> list(::shared_ptr<authenticated_user> performer, permission_set, optional<data_resource>, optional<sstring>) const = 0;
+
+    /**
+     * This method is called before deleting a user with DROP USER query so that a new user with the same
+     * name wouldn't inherit permissions of the deleted user in the future.
+     *
+     * @param droppedUser The user to revoke all permissions from.
+     */
+    virtual future<> revoke_all(sstring dropped_user) = 0;
+
+    /**
+     * This method is called after a resource is removed (i.e. keyspace or a table is dropped).
+     *
+     * @param droppedResource The resource to revoke all permissions on.
+     */
+    virtual future<> revoke_all(data_resource) = 0;
+
+    /**
+     * Set of resources that should be made inaccessible to users and only accessible internally.
+     *
+     * @return Keyspaces, column families that will be unmodifiable by users; other resources.
+     */
+    virtual const resource_ids& protected_resources() = 0;
+
+    /**
+     * Validates configuration of IAuthorizer implementation (if configurable).
+     *
+     * @throws ConfigurationException when there is a configuration error.
+     */
+    virtual future<> validate_configuration() const = 0;
+
+    /**
+     * Setup is called once upon system startup to initialize the IAuthorizer.
+     *
+     * For example, use this method to create any required keyspaces/column families.
+     */
+    static future<> setup(const sstring& type);
+
+    /**
+     * Returns the system authorizer. Must have called setup before calling this.
+     */
+    static authorizer& get();
+};
+
+}
--- a/auth/data_resource.cc
+++ b/auth/data_resource.cc
@@ -17,9 +17,9 @@
 */

 /*
- * Copyright 2016 Cloudius Systems
+ * Copyright (C) 2016 ScyllaDB
 *
- * Modified by Cloudius Systems
+ * Modified by ScyllaDB
 */

 /*
@@ -47,11 +47,8 @@
 const sstring auth::data_resource::ROOT_NAME("data");

 auth::data_resource::data_resource(level l, const sstring& ks, const sstring& cf)
-    : _ks(ks), _cf(cf)
+    : _level(l), _ks(ks), _cf(cf)
 {
-    if (l != get_level()) {
-        throw std::invalid_argument("level/keyspace/column mismatch");
-    }
 }

 auth::data_resource::data_resource()
@@ -67,14 +64,7 @@ auth::data_resource::data_resource(const sstring& ks, const sstring& cf)
 {}

 auth::data_resource::level auth::data_resource::get_level() const {
-    if (!_cf.empty()) {
-        assert(!_ks.empty());
-        return level::COLUMN_FAMILY;
-    }
-    if (!_ks.empty()) {
-        return level::KEYSPACE;
-    }
-    return level::ROOT;
+    return _level;
 }

 auth::data_resource auth::data_resource::from_name(
@@ -125,16 +115,14 @@ auth::data_resource auth::data_resource::get_parent() const {
    }
 }

-const sstring& auth::data_resource::keyspace() const
-                throw (std::invalid_argument) {
+const sstring& auth::data_resource::keyspace() const {
    if (is_root_level()) {
        throw std::invalid_argument("ROOT data resource has no keyspace");
    }
    return _ks;
 }

-const sstring& auth::data_resource::column_family() const
-                throw (std::invalid_argument) {
+const sstring& auth::data_resource::column_family() const {
    if (!is_column_family_level()) {
        throw std::invalid_argument(sprint("%s data resource has no column family", name()));
    }
@@ -158,7 +146,15 @@ bool auth::data_resource::exists() const {
 }

 sstring auth::data_resource::to_string() const {
-    return name();
+    switch (get_level()) {
+        case level::ROOT:
+            return "<all keyspaces>";
+        case level::KEYSPACE:
+            return sprint("<keyspace %s>", _ks);
+        case level::COLUMN_FAMILY:
+        default:
+            return sprint("<table %s.%s>", _ks, _cf);
+    }
 }

 bool auth::data_resource::operator==(const data_resource& v) const {
@@ -170,6 +166,6 @@ bool auth::data_resource::operator<(const data_resource& v) const {
 }

 std::ostream& auth::operator<<(std::ostream& os, const data_resource& r) {
-    return os << r.name();
+    return os << r.to_string();
 }

--- a/auth/data_resource.hh
+++ b/auth/data_resource.hh
@@ -17,9 +17,9 @@
 */

 /*
- * Copyright 2016 Cloudius Systems
+ * Copyright (C) 2016 ScyllaDB
 *
- * Modified by Cloudius Systems
+ * Modified by ScyllaDB
 */

 /*
@@ -41,8 +41,11 @@

 #pragma once

+#include "utils/hash.hh"
 #include <iosfwd>
+#include <set>
 #include <seastar/core/sstring.hh>
+#include "seastarx.hh"

 namespace auth {

@@ -54,6 +57,7 @@ private:

    static const sstring ROOT_NAME;

+    level _level;
    sstring _ks;
    sstring _cf;

@@ -114,13 +118,13 @@ public:
     * @return keyspace of the resource.
     * @throws std::invalid_argument if it's the root-level resource.
     */
-    const sstring& keyspace() const throw(std::invalid_argument);
+    const sstring& keyspace() const;

    /**
     * @return column family of the resource.
     * @throws std::invalid_argument if it's not a cf-level resource.
     */
-    const sstring& column_family() const throw(std::invalid_argument);
+    const sstring& column_family() const;

    /**
     * @return Whether or not the resource has a parent in the hierarchy.
@@ -136,8 +140,17 @@ public:

    bool operator==(const data_resource&) const;
    bool operator<(const data_resource&) const;
+
+    size_t hash_value() const {
+        return utils::tuple_hash()(_ks, _cf);
+    }
 };

+/**
+ * Resource id mappings, i.e. keyspace and/or column families.
+ */
+using resource_ids = std::set<data_resource>;
+
 std::ostream& operator<<(std::ostream&, const data_resource&);

 }
--- a/auth/default_authorizer.cc
+++ b/auth/default_authorizer.cc
@@ -0,0 +1,240 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (C) 2016 ScyllaDB
+ *
+ * Modified by ScyllaDB
+ */
+
+/*
+ * This file is part of Scylla.
+ *
+ * Scylla is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Scylla is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <unistd.h>
+#include <crypt.h>
+#include <random>
+#include <chrono>
+
+#include <seastar/core/reactor.hh>
+
+#include "auth.hh"
+#include "default_authorizer.hh"
+#include "authenticated_user.hh"
+#include "permission.hh"
+#include "cql3/query_processor.hh"
+#include "exceptions/exceptions.hh"
+#include "log.hh"
+
+const sstring auth::default_authorizer::DEFAULT_AUTHORIZER_NAME(
+                "org.apache.cassandra.auth.CassandraAuthorizer");
+
+static const sstring USER_NAME = "username";
+static const sstring RESOURCE_NAME = "resource";
+static const sstring PERMISSIONS_NAME = "permissions";
+static const sstring PERMISSIONS_CF = "permissions";
+
+static logging::logger alogger("default_authorizer");
+
+auth::default_authorizer::default_authorizer() {
+}
+auth::default_authorizer::~default_authorizer() {
+}
+
+future<> auth::default_authorizer::init() {
+    sstring create_table = sprint("CREATE TABLE %s.%s ("
+                    "%s text,"
+                    "%s text,"
+                    "%s set<text>,"
+                    "PRIMARY KEY(%s, %s)"
+                    ") WITH gc_grace_seconds=%d", auth::auth::AUTH_KS,
+                    PERMISSIONS_CF, USER_NAME, RESOURCE_NAME, PERMISSIONS_NAME,
+                    USER_NAME, RESOURCE_NAME, 90 * 24 * 60 * 60); // 3 months.
+
+    return auth::setup_table(PERMISSIONS_CF, create_table);
+}
+
+
+future<auth::permission_set> auth::default_authorizer::authorize(
+                ::shared_ptr<authenticated_user> user, data_resource resource) const {
+    return user->is_super().then([this, user, resource = std::move(resource)](bool is_super) {
+        if (is_super) {
+            return make_ready_future<permission_set>(permissions::ALL);
+        }
+
+        /**
+         * TOOD: could create actual data type for permission (translating string<->perm),
+         * but this seems overkill right now. We still must store strings so...
+         */
+        auto& qp = cql3::get_local_query_processor();
+        auto query = sprint("SELECT %s FROM %s.%s WHERE %s = ? AND %s = ?"
+                        , PERMISSIONS_NAME, auth::AUTH_KS, PERMISSIONS_CF, USER_NAME, RESOURCE_NAME);
+        return qp.process(query, db::consistency_level::LOCAL_ONE, {user->name(), resource.name() })
+                        .then_wrapped([=](future<::shared_ptr<cql3::untyped_result_set>> f) {
+            try {
+                auto res = f.get0();
+
+                if (res->empty() || !res->one().has(PERMISSIONS_NAME)) {
+                    return make_ready_future<permission_set>(permissions::NONE);
+                }
+                return make_ready_future<permission_set>(permissions::from_strings(res->one().get_set<sstring>(PERMISSIONS_NAME)));
+            } catch (exceptions::request_execution_exception& e) {
+                alogger.warn("CassandraAuthorizer failed to authorize {} for {}", user->name(), resource);
+                return make_ready_future<permission_set>(permissions::NONE);
+            }
+        });
+    });
+}
+
+#include <boost/range.hpp>
+
+future<> auth::default_authorizer::modify(
+                ::shared_ptr<authenticated_user> performer, permission_set set,
+                data_resource resource, sstring user, sstring op) {
+    // TODO: why does this not check super user?
+    auto& qp = cql3::get_local_query_processor();
+    auto query = sprint("UPDATE %s.%s SET %s = %s %s ? WHERE %s = ? AND %s = ?",
+                    auth::AUTH_KS, PERMISSIONS_CF, PERMISSIONS_NAME,
+                    PERMISSIONS_NAME, op, USER_NAME, RESOURCE_NAME);
+    return qp.process(query, db::consistency_level::ONE, {
+                    permissions::to_strings(set), user, resource.name() }).discard_result();
+}
+
+
+future<> auth::default_authorizer::grant(
+                ::shared_ptr<authenticated_user> performer, permission_set set,
+                data_resource resource, sstring to) {
+    return modify(std::move(performer), std::move(set), std::move(resource), std::move(to), "+");
+}
+
+future<> auth::default_authorizer::revoke(
+                ::shared_ptr<authenticated_user> performer, permission_set set,
+                data_resource resource, sstring from) {
+    return modify(std::move(performer), std::move(set), std::move(resource), std::move(from), "-");
+}
+
+future<std::vector<auth::permission_details>> auth::default_authorizer::list(
+                ::shared_ptr<authenticated_user> performer, permission_set set,
+                optional<data_resource> resource, optional<sstring> user) const {
+    return performer->is_super().then([this, performer, set = std::move(set), resource = std::move(resource), user = std::move(user)](bool is_super) {
+        if (!is_super && (!user || performer->name() != *user)) {
+            throw exceptions::unauthorized_exception(sprint("You are not authorized to view %s's permissions", user ? *user : "everyone"));
+        }
+
+        auto query = sprint("SELECT %s, %s, %s FROM %s.%s", USER_NAME, RESOURCE_NAME, PERMISSIONS_NAME, auth::AUTH_KS, PERMISSIONS_CF);
+        auto& qp = cql3::get_local_query_processor();
+
+        // Oh, look, it is a case where it does not pay off to have
+        // parameters to process in an initializer list.
+        future<::shared_ptr<cql3::untyped_result_set>> f = make_ready_future<::shared_ptr<cql3::untyped_result_set>>();
+
+        if (resource && user) {
+            query += sprint(" WHERE %s = ? AND %s = ?", USER_NAME, RESOURCE_NAME);
+            f = qp.process(query, db::consistency_level::ONE, {*user, resource->name()});
+        } else if (resource) {
+            query += sprint(" WHERE %s = ? ALLOW FILTERING", RESOURCE_NAME);
+            f = qp.process(query, db::consistency_level::ONE, {resource->name()});
+        } else if (user) {
+            query += sprint(" WHERE %s = ?", USER_NAME);
+            f = qp.process(query, db::consistency_level::ONE, {*user});
+        } else {
+            f = qp.process(query, db::consistency_level::ONE, {});
+        }
+
+        return f.then([set](::shared_ptr<cql3::untyped_result_set> res) {
+            std::vector<permission_details> result;
+
+            for (auto& row : *res) {
+                if (row.has(PERMISSIONS_NAME)) {
+                    auto username = row.get_as<sstring>(USER_NAME);
+                    auto resource = data_resource::from_name(row.get_as<sstring>(RESOURCE_NAME));
+                    auto ps = permissions::from_strings(row.get_set<sstring>(PERMISSIONS_NAME));
+                    ps = permission_set::from_mask(ps.mask() & set.mask());
+
+                    result.emplace_back(permission_details {username, resource, ps});
+                }
+            }
+            return make_ready_future<std::vector<permission_details>>(std::move(result));
+        });
+    });
+}
+
+future<> auth::default_authorizer::revoke_all(sstring dropped_user) {
+    auto& qp = cql3::get_local_query_processor();
+    auto query = sprint("DELETE FROM %s.%s WHERE %s = ?", auth::AUTH_KS,
+                    PERMISSIONS_CF, USER_NAME);
+    return qp.process(query, db::consistency_level::ONE, { dropped_user }).discard_result().handle_exception(
+                    [dropped_user](auto ep) {
+                        try {
+                            std::rethrow_exception(ep);
+                        } catch (exceptions::request_execution_exception& e) {
+                            alogger.warn("CassandraAuthorizer failed to revoke all permissions of {}: {}", dropped_user, e);
+                        }
+                    });
+}
+
+future<> auth::default_authorizer::revoke_all(data_resource resource) {
+    auto& qp = cql3::get_local_query_processor();
+    auto query = sprint("SELECT %s FROM %s.%s WHERE %s = ? ALLOW FILTERING",
+                    USER_NAME, auth::AUTH_KS, PERMISSIONS_CF, RESOURCE_NAME);
+    return qp.process(query, db::consistency_level::LOCAL_ONE, { resource.name() })
+                    .then_wrapped([resource, &qp](future<::shared_ptr<cql3::untyped_result_set>> f) {
+        try {
+            auto res = f.get0();
+            return parallel_for_each(res->begin(), res->end(), [&qp, res, resource](const cql3::untyped_result_set::row& r) {
+                auto query = sprint("DELETE FROM %s.%s WHERE %s = ? AND %s = ?"
+                                , auth::AUTH_KS, PERMISSIONS_CF, USER_NAME, RESOURCE_NAME);
+                return qp.process(query, db::consistency_level::LOCAL_ONE, { r.get_as<sstring>(USER_NAME), resource.name() })
+                                .discard_result().handle_exception([resource](auto ep) {
+                    try {
+                        std::rethrow_exception(ep);
+                    } catch (exceptions::request_execution_exception& e) {
+                        alogger.warn("CassandraAuthorizer failed to revoke all permissions on {}: {}", resource, e);
+                    }
+
+                });
+            });
+        } catch (exceptions::request_execution_exception& e) {
+            alogger.warn("CassandraAuthorizer failed to revoke all permissions on {}: {}", resource, e);
+            return make_ready_future();
+        }
+    });
+}
+
+
+const auth::resource_ids& auth::default_authorizer::protected_resources() {
+    static const resource_ids ids({ data_resource(auth::AUTH_KS, PERMISSIONS_CF) });
+    return ids;
+}
+
+future<> auth::default_authorizer::validate_configuration() const {
+    return make_ready_future();
+}
--- a/auth/default_authorizer.hh
+++ b/auth/default_authorizer.hh
@@ -0,0 +1,78 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (C) 2016 ScyllaDB
+ *
+ * Modified by ScyllaDB
+ */
+
+/*
+ * This file is part of Scylla.
+ *
+ * Scylla is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Scylla is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include "authorizer.hh"
+
+namespace auth {
+
+class default_authorizer : public authorizer {
+public:
+    static const sstring DEFAULT_AUTHORIZER_NAME;
+
+    default_authorizer();
+    ~default_authorizer();
+
+    future<> init();
+
+    future<permission_set> authorize(::shared_ptr<authenticated_user>, data_resource) const override;
+
+    future<> grant(::shared_ptr<authenticated_user>, permission_set, data_resource, sstring) override;
+
+    future<> revoke(::shared_ptr<authenticated_user>, permission_set, data_resource, sstring) override;
+
+    future<std::vector<permission_details>> list(::shared_ptr<authenticated_user>, permission_set, optional<data_resource>, optional<sstring>) const override;
+
+    future<> revoke_all(sstring) override;
+
+    future<> revoke_all(data_resource) override;
+
+    const resource_ids& protected_resources() override;
+
+    future<> validate_configuration() const override;
+
+private:
+    future<> modify(::shared_ptr<authenticated_user>, permission_set, data_resource, sstring, sstring);
+};
+
+} /* namespace auth */
+
--- a/auth/password_authenticator.cc
+++ b/auth/password_authenticator.cc
@@ -17,9 +17,9 @@
 */

 /*
- * Copyright 2016 Cloudius Systems
+ * Copyright (C) 2016 ScyllaDB
 *
- * Modified by Cloudius Systems
+ * Modified by ScyllaDB
 */

 /*
@@ -61,7 +61,7 @@ static const sstring DEFAULT_USER_NAME = auth::auth::DEFAULT_SUPERUSER_NAME;
 static const sstring DEFAULT_USER_PASSWORD = auth::auth::DEFAULT_SUPERUSER_NAME;
 static const sstring CREDENTIALS_CF = "credentials";

-static logging::logger logger("password_authenticator");
+static logging::logger plogger("password_authenticator");

 auth::password_authenticator::~password_authenticator()
 {}
@@ -169,7 +169,7 @@ future<> auth::password_authenticator::init() {
                                                    USER_NAME, SALTED_HASH
                                    ),
                                    db::consistency_level::ONE, {DEFAULT_USER_NAME, hashpw(DEFAULT_USER_PASSWORD)}).then([](auto) {
-                                        logger.info("Created default user '{}'", DEFAULT_USER_NAME);
+                                        plogger.info("Created default user '{}'", DEFAULT_USER_NAME);
                                    });
                }
            });
@@ -201,8 +201,7 @@ auth::authenticator::option_set auth::password_authenticator::alterable_options(
 }

 future<::shared_ptr<auth::authenticated_user> > auth::password_authenticator::authenticate(
-                const credentials_map& credentials) const
-                                throw (exceptions::authentication_exception) {
+                const credentials_map& credentials) const {
    if (!credentials.count(USERNAME_KEY)) {
        throw exceptions::authentication_exception(sprint("Required key '%s' is missing", USERNAME_KEY));
    }
@@ -218,12 +217,12 @@ future<::shared_ptr<auth::authenticated_user> > auth::password_authenticator::au
    // obsolete prepared statements pretty quickly.
    // Rely on query processing caching statements instead, and lets assume
    // that a map lookup string->statement is not gonna kill us much.
-    auto& qp = cql3::get_local_query_processor();
-    return qp.process(
-                    sprint("SELECT %s FROM %s.%s WHERE %s = ?", SALTED_HASH,
-                                    auth::AUTH_KS, CREDENTIALS_CF, USER_NAME),
-                    consistency_for_user(username), { username }, true).then_wrapped(
-                    [=](future<::shared_ptr<cql3::untyped_result_set>> f) {
+    return futurize_apply([this, username, password] {
+        auto& qp = cql3::get_local_query_processor();
+        return qp.process(sprint("SELECT %s FROM %s.%s WHERE %s = ?", SALTED_HASH,
+                                        auth::AUTH_KS, CREDENTIALS_CF, USER_NAME),
+                        consistency_for_user(username), {username}, true);
+    }).then_wrapped([=](future<::shared_ptr<cql3::untyped_result_set>> f) {
        try {
            auto res = f.get0();
            if (res->empty() || !checkpw(password, res->one().get_as<sstring>(SALTED_HASH))) {
@@ -234,14 +233,14 @@ future<::shared_ptr<auth::authenticated_user> > auth::password_authenticator::au
            std::throw_with_nested(exceptions::authentication_exception("Could not verify password"));
        } catch (exceptions::request_execution_exception& e) {
            std::throw_with_nested(exceptions::authentication_exception(e.what()));
+        } catch (...) {
+            std::throw_with_nested(exceptions::authentication_exception("authentication failed"));
        }
    });
 }

 future<> auth::password_authenticator::create(sstring username,
-                const option_map& options)
-                                throw (exceptions::request_validation_exception,
-                                exceptions::request_execution_exception) {
+                const option_map& options) {
    try {
        auto password = boost::any_cast<sstring>(options.at(option::PASSWORD));
        auto query = sprint("INSERT INTO %s.%s (%s, %s) VALUES (?, ?)",
@@ -254,9 +253,7 @@ future<> auth::password_authenticator::create(sstring username,
 }

 future<> auth::password_authenticator::alter(sstring username,
-                const option_map& options)
-                                throw (exceptions::request_validation_exception,
-                                exceptions::request_execution_exception) {
+                const option_map& options) {
    try {
        auto password = boost::any_cast<sstring>(options.at(option::PASSWORD));
        auto query = sprint("UPDATE %s.%s SET %s = ? WHERE %s = ?",
@@ -268,9 +265,7 @@ future<> auth::password_authenticator::alter(sstring username,
    }
 }

-future<> auth::password_authenticator::drop(sstring username)
-                throw (exceptions::request_validation_exception,
-                exceptions::request_execution_exception) {
+future<> auth::password_authenticator::drop(sstring username) {
    try {
        auto query = sprint("DELETE FROM %s.%s WHERE %s = ?",
                        auth::AUTH_KS, CREDENTIALS_CF, USER_NAME);
@@ -281,8 +276,9 @@ future<> auth::password_authenticator::drop(sstring username)
    }
 }

-auth::authenticator::resource_ids auth::password_authenticator::protected_resources() const {
-    return { data_resource(auth::AUTH_KS, CREDENTIALS_CF) };
+const auth::resource_ids& auth::password_authenticator::protected_resources() const {
+    static const resource_ids ids({ data_resource(auth::AUTH_KS, CREDENTIALS_CF) });
+    return ids;
 }

 ::shared_ptr<auth::authenticator::sasl_challenge> auth::password_authenticator::new_sasl_challenge() const {
@@ -305,9 +301,8 @@ auth::authenticator::resource_ids auth::password_authenticator::protected_resour
         * would expect
         * @throws javax.security.sasl.SaslException
         */
-        bytes evaluate_response(bytes_view client_response)
-                        throw (exceptions::authentication_exception) override {
-            logger.debug("Decoding credentials from client token");
+        bytes evaluate_response(bytes_view client_response) override {
+            plogger.debug("Decoding credentials from client token");

            sstring username, password;

@@ -344,8 +339,7 @@ auth::authenticator::resource_ids auth::password_authenticator::protected_resour
        bool is_complete() const override {
            return _complete;
        }
-        future<::shared_ptr<authenticated_user>> get_authenticated_user() const
-                        throw (exceptions::authentication_exception) override {
+        future<::shared_ptr<authenticated_user>> get_authenticated_user() const override {
            return _authenticator.authenticate(_credentials);
        }
    private:
--- a/auth/password_authenticator.hh
+++ b/auth/password_authenticator.hh
@@ -17,9 +17,9 @@
 */

 /*
- * Copyright 2016 Cloudius Systems
+ * Copyright (C) 2016 ScyllaDB
 *
- * Modified by Cloudius Systems
+ * Modified by ScyllaDB
 */

 /*
@@ -58,11 +58,11 @@ public:
    bool require_authentication() const override;
    option_set supported_options() const override;
    option_set alterable_options() const override;
-    future<::shared_ptr<authenticated_user>> authenticate(const credentials_map& credentials) const throw(exceptions::authentication_exception) override;
-    future<> create(sstring username, const option_map& options) throw(exceptions::request_validation_exception, exceptions::request_execution_exception) override;
-    future<> alter(sstring username, const option_map& options) throw(exceptions::request_validation_exception, exceptions::request_execution_exception) override;
-    future<> drop(sstring username) throw(exceptions::request_validation_exception, exceptions::request_execution_exception) override;
-    resource_ids protected_resources() const override;
+    future<::shared_ptr<authenticated_user>> authenticate(const credentials_map& credentials) const override;
+    future<> create(sstring username, const option_map& options) override;
+    future<> alter(sstring username, const option_map& options) override;
+    future<> drop(sstring username) override;
+    const resource_ids& protected_resources() const override;
    ::shared_ptr<sasl_challenge> new_sasl_challenge() const override;


--- a/auth/permission.cc
+++ b/auth/permission.cc
@@ -17,9 +17,9 @@
 */

 /*
- * Copyright 2016 Cloudius Systems
+ * Copyright (C) 2016 ScyllaDB
 *
- * Modified by Cloudius Systems
+ * Modified by ScyllaDB
 */

 /*
@@ -39,11 +39,66 @@
 * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
 */

+#include <unordered_map>
+#include <boost/algorithm/string.hpp>
 #include "permission.hh"

-const auth::permission_set auth::ALL_DATA = auth::permission_set::of
-                < auth::permission::CREATE, auth::permission::ALTER,
-                auth::permission::DROP, auth::permission::SELECT,
-                auth::permission::MODIFY, auth::permission::AUTHORIZE>();
-const auth::permission_set auth::ALL = auth::ALL_DATA;
-const auth::permission_set auth::NONE;
+const auth::permission_set auth::permissions::ALL_DATA =
+                auth::permission_set::of<auth::permission::CREATE,
+                                auth::permission::ALTER, auth::permission::DROP,
+                                auth::permission::SELECT,
+                                auth::permission::MODIFY,
+                                auth::permission::AUTHORIZE>();
+const auth::permission_set auth::permissions::ALL = auth::permissions::ALL_DATA;
+const auth::permission_set auth::permissions::NONE;
+const auth::permission_set auth::permissions::ALTERATIONS =
+                auth::permission_set::of<auth::permission::CREATE,
+                                auth::permission::ALTER, auth::permission::DROP>();
+
+static const std::unordered_map<sstring, auth::permission> permission_names({
+    { "READ", auth::permission::READ },
+    { "WRITE", auth::permission::WRITE  },
+    { "CREATE", auth::permission::CREATE },
+    { "ALTER", auth::permission::ALTER },
+    { "DROP", auth::permission::DROP },
+    { "SELECT", auth::permission::SELECT  },
+    { "MODIFY", auth::permission::MODIFY   },
+    { "AUTHORIZE", auth::permission::AUTHORIZE },
+});
+
+const sstring& auth::permissions::to_string(permission p) {
+    for (auto& v : permission_names) {
+        if (v.second == p) {
+            return v.first;
+        }
+    }
+    throw std::out_of_range("unknown permission");
+}
+
+auth::permission auth::permissions::from_string(const sstring& s) {
+    sstring upper(s);
+    boost::to_upper(upper);
+    return permission_names.at(upper);
+}
+
+std::unordered_set<sstring> auth::permissions::to_strings(const permission_set& set) {
+    std::unordered_set<sstring> res;
+    for (auto& v : permission_names) {
+        if (set.contains(v.second)) {
+            res.emplace(v.first);
+        }
+    }
+    return res;
+}
+
+auth::permission_set auth::permissions::from_strings(const std::unordered_set<sstring>& set) {
+    permission_set res = auth::permissions::NONE;
+    for (auto& s : set) {
+        res.set(from_string(s));
+    }
+    return res;
+}
+
+bool auth::operator<(const permission_set& p1, const permission_set& p2) {
+    return p1.mask() < p2.mask();
+}
--- a/auth/permission.hh
+++ b/auth/permission.hh
@@ -17,9 +17,9 @@
 */

 /*
- * Copyright 2016 Cloudius Systems
+ * Copyright (C) 2016 ScyllaDB
 *
- * Modified by Cloudius Systems
+ * Modified by ScyllaDB
 */

 /*
@@ -41,6 +41,10 @@

 #pragma once

+#include <unordered_set>
+#include <seastar/core/sstring.hh>
+
+#include "seastarx.hh"
 #include "enum_set.hh"

 namespace auth {
@@ -74,8 +78,22 @@ typedef enum_set<super_enum<permission,
                permission::MODIFY,
                permission::AUTHORIZE>> permission_set;

+bool operator<(const permission_set&, const permission_set&);
+
+namespace permissions {
+
 extern const permission_set ALL_DATA;
 extern const permission_set ALL;
 extern const permission_set NONE;
+extern const permission_set ALTERATIONS;
+
+const sstring& to_string(permission);
+permission from_string(const sstring&);
+
+std::unordered_set<sstring> to_strings(const permission_set&);
+permission_set from_strings(const std::unordered_set<sstring>&);
+
+
+}

 }
--- a/bytes.cc
+++ b/bytes.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Cloudius Systems, Ltd.
+ * Copyright (C) 2014 ScyllaDB
 */

 /*
--- a/bytes.hh
+++ b/bytes.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2015 Cloudius Systems, Ltd.
+ * Copyright (C) 2015 ScyllaDB
 */

 /*
@@ -21,14 +21,17 @@

 #pragma once

+#include "seastarx.hh"
 #include "core/sstring.hh"
 #include "hashing.hh"
 #include <experimental/optional>
 #include <iosfwd>
 #include <functional>
+#include "utils/mutable_view.hh"

 using bytes = basic_sstring<int8_t, uint32_t, 31>;
 using bytes_view = std::experimental::basic_string_view<int8_t>;
+using bytes_mutable_view = basic_mutable_view<bytes_view::value_type>;
 using bytes_opt = std::experimental::optional<bytes>;
 using sstring_view = std::experimental::string_view;

--- a/bytes_ostream.hh
+++ b/bytes_ostream.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright 2015 Cloudius Systems
+ * Copyright (C) 2015 ScyllaDB
 */

 /*
@@ -38,6 +38,7 @@ class bytes_ostream {
 public:
    using size_type = bytes::size_type;
    using value_type = bytes::value_type;
+    static constexpr size_type max_chunk_size() { return 16 * 1024; }
 private:
    static_assert(sizeof(value_type) == 1, "value_type is assumed to be one byte long");
    struct chunk {
@@ -58,7 +59,6 @@ private:
    };
    // FIXME: consider increasing chunk size as the buffer grows
    static constexpr size_type chunk_size{512};
-    static constexpr size_type usable_chunk_size{chunk_size - sizeof(chunk)};
 private:
    std::unique_ptr<chunk> _begin;
    chunk* _current;
@@ -99,6 +99,19 @@ private:
        }
        return _current->size - _current->offset;
    }
+    // Figure out next chunk size.
+    //   - must be enough for data_size
+    //   - must be at least chunk_size
+    //   - try to double each time to prevent too many allocations
+    //   - do not exceed max_chunk_size
+    size_type next_alloc_size(size_t data_size) const {
+        auto next_size = _current
+                ? _current->size * 2
+                : chunk_size;
+        next_size = std::min(next_size, max_chunk_size());
+        // FIXME: check for overflow?
+        return std::max<size_type>(next_size, data_size + sizeof(chunk));
+    }
    // Makes room for a contiguous region of given size.
    // The region is accounted for as already written.
    // size must not be zero.
@@ -109,7 +122,7 @@ private:
            _size += size;
            return ret;
        } else {
-            auto alloc_size = size <= usable_chunk_size ? chunk_size : (size + sizeof(chunk));
+            auto alloc_size = next_alloc_size(size);
            auto space = malloc(alloc_size);
            if (!space) {
                throw std::bad_alloc();
@@ -153,19 +166,18 @@ public:
    }

    bytes_ostream& operator=(const bytes_ostream& o) {
-        _size = 0;
-        _current = nullptr;
-        _begin = {};
-        append(o);
+        if (this != &o) {
+            auto x = bytes_ostream(o);
+            *this = std::move(x);
+        }
        return *this;
    }

    bytes_ostream& operator=(bytes_ostream&& o) noexcept {
-        _size = o._size;
-        _begin = std::move(o._begin);
-        _current = o._current;
-        o._current = nullptr;
-        o._size = 0;
+        if (this != &o) {
+            this->~bytes_ostream();
+            new (this) bytes_ostream(std::move(o));
+        }
        return *this;
    }

@@ -174,7 +186,7 @@ public:
        value_type* ptr;
        // makes the place_holder looks like a stream
        seastar::simple_output_stream get_stream() {
-            return seastar::simple_output_stream{reinterpret_cast<char*>(ptr)};
+            return seastar::simple_output_stream(reinterpret_cast<char*>(ptr), sizeof(T));
        }
    };

@@ -195,19 +207,19 @@ public:
        if (v.empty()) {
            return;
        }
-        auto space_left = current_space_left();
-        if (v.size() <= space_left) {
-            memcpy(_current->data + _current->offset, v.begin(), v.size());
-            _current->offset += v.size();
-            _size += v.size();
-        } else {
-            if (space_left) {
-                memcpy(_current->data + _current->offset, v.begin(), space_left);
-                _current->offset += space_left;
-                _size += space_left;
-                v.remove_prefix(space_left);
-            }
-            memcpy(alloc(v.size()), v.begin(), v.size());
+
+        auto this_size = std::min(v.size(), size_t(current_space_left()));
+        if (this_size) {
+            memcpy(_current->data + _current->offset, v.begin(), this_size);
+            _current->offset += this_size;
+            _size += this_size;
+            v.remove_prefix(this_size);
+        }
+
+        while (!v.empty()) {
+            auto this_size = std::min(v.size(), size_t(max_chunk_size()));
+            std::copy_n(v.begin(), this_size, alloc(this_size));
+            v.remove_prefix(this_size);
        }
    }

@@ -272,13 +284,8 @@ public:
    }

    void append(const bytes_ostream& o) {
-        if (o.size() > 0) {
-            auto dst = alloc(o.size());
-            auto r = o._begin.get();
-            while (r) {
-                dst = std::copy_n(r->data, r->offset, dst);
-                r = r->next.get();
-            }
+        for (auto&& bv : o.fragments()) {
+            write(bv);
        }
    }

@@ -328,6 +335,45 @@ public:
        _current->next = nullptr;
        _current->offset = pos._offset;
    }
+
+    void reduce_chunk_count() {
+        // FIXME: This is a simplified version. It linearizes the whole buffer
+        // if its size is below max_chunk_size. We probably could also gain
+        // some read performance by doing "real" reduction, i.e. merging
+        // all chunks until all but the last one is max_chunk_size.
+        if (size() < max_chunk_size()) {
+            linearize();
+        }
+    }
+
+    bool operator==(const bytes_ostream& other) const {
+        auto as = fragments().begin();
+        auto as_end = fragments().end();
+        auto bs = other.fragments().begin();
+        auto bs_end = other.fragments().end();
+
+        auto a = *as++;
+        auto b = *bs++;
+        while (!a.empty() || !b.empty()) {
+            auto now = std::min(a.size(), b.size());
+            if (!std::equal(a.begin(), a.begin() + now, b.begin(), b.begin() + now)) {
+                return false;
+            }
+            a.remove_prefix(now);
+            if (a.empty() && as != as_end) {
+                a = *as++;
+            }
+            b.remove_prefix(now);
+            if (b.empty() && bs != bs_end) {
+                b = *bs++;
+            }
+        }
+        return true;
+    }
+
+    bool operator!=(const bytes_ostream& other) const {
+        return !(*this == other);
+    }
 };

 template<>
--- a/cache_streamed_mutation.hh
+++ b/cache_streamed_mutation.hh
@@ -0,0 +1,538 @@
+/*
+ * Copyright (C) 2017 ScyllaDB
+ */
+
+/*
+ * This file is part of Scylla.
+ *
+ * Scylla is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Scylla is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <vector>
+#include "row_cache.hh"
+#include "mutation_reader.hh"
+#include "streamed_mutation.hh"
+#include "partition_version.hh"
+#include "utils/logalloc.hh"
+#include "query-request.hh"
+#include "partition_snapshot_reader.hh"
+#include "partition_snapshot_row_cursor.hh"
+#include "read_context.hh"
+
+namespace cache {
+
+class lsa_manager {
+    row_cache& _cache;
+public:
+    lsa_manager(row_cache& cache) : _cache(cache) { }
+    template<typename Func>
+    decltype(auto) run_in_read_section(const Func& func) {
+        return _cache._read_section(_cache._tracker.region(), [&func] () {
+            return with_linearized_managed_bytes([&func] () {
+                return func();
+            });
+        });
+    }
+    template<typename Func>
+    decltype(auto) run_in_update_section(const Func& func) {
+        return _cache._update_section(_cache._tracker.region(), [&func] () {
+            return with_linearized_managed_bytes([&func] () {
+                return func();
+            });
+        });
+    }
+    template<typename Func>
+    void run_in_update_section_with_allocator(Func&& func) {
+        return _cache._update_section(_cache._tracker.region(), [this, &func] () {
+            return with_linearized_managed_bytes([this, &func] () {
+                return with_allocator(_cache._tracker.region().allocator(), [this, &func] () mutable {
+                    return func();
+                });
+            });
+        });
+    }
+    logalloc::region& region() { return _cache._tracker.region(); }
+    logalloc::allocating_section& read_section() { return _cache._read_section; }
+};
+
+class cache_streamed_mutation final : public streamed_mutation::impl {
+    enum class state {
+        before_static_row,
+
+        // Invariants:
+        //  - position_range(_lower_bound, _upper_bound) covers all not yet emitted positions from current range
+        //  - _next_row points to the nearest row in cache >= _lower_bound
+        //  - _next_row_in_range = _next.position() < _upper_bound
+        reading_from_cache,
+
+        // Starts reading from underlying reader.
+        // The range to read is position_range(_lower_bound, min(_next_row.position(), _upper_bound)).
+        // Invariants:
+        //  - _next_row_in_range = _next.position() < _upper_bound
+        move_to_underlying,
+
+        // Invariants:
+        // - Upper bound of the read is min(_next_row.position(), _upper_bound)
+        // - _next_row_in_range = _next.position() < _upper_bound
+        // - _last_row_key contains the key of last emitted clustering_row
+        reading_from_underlying,
+
+        end_of_stream
+    };
+    lw_shared_ptr<partition_snapshot> _snp;
+    position_in_partition::tri_compare _position_cmp;
+
+    query::clustering_key_filter_ranges _ck_ranges;
+    query::clustering_row_ranges::const_iterator _ck_ranges_curr;
+    query::clustering_row_ranges::const_iterator _ck_ranges_end;
+
+    lsa_manager _lsa_manager;
+
+    stdx::optional<clustering_key> _last_row_key;
+
+    // We need to be prepared that we may get overlapping and out of order
+    // range tombstones. We must emit fragments with strictly monotonic positions,
+    // so we can't just trim such tombstones to the position of the last fragment.
+    // To solve that, range tombstones are accumulated first in a range_tombstone_stream
+    // and emitted once we have a fragment with a larger position.
+    range_tombstone_stream _tombstones;
+
+    // Holds the lower bound of a position range which hasn't been processed yet.
+    // Only fragments with positions < _lower_bound have been emitted.
+    position_in_partition _lower_bound;
+    position_in_partition_view _upper_bound;
+
+    state _state = state::before_static_row;
+    lw_shared_ptr<read_context> _read_context;
+    partition_snapshot_row_cursor _next_row;
+    bool _next_row_in_range = false;
+
+    future<> do_fill_buffer();
+    void copy_from_cache_to_buffer();
+    future<> process_static_row();
+    void move_to_end();
+    void move_to_next_range();
+    void move_to_current_range();
+    void move_to_next_entry();
+    // Emits all delayed range tombstones with positions smaller than upper_bound.
+    void drain_tombstones(position_in_partition_view upper_bound);
+    // Emits all delayed range tombstones.
+    void drain_tombstones();
+    void add_to_buffer(const partition_snapshot_row_cursor&);
+    void add_clustering_row_to_buffer(mutation_fragment&&);
+    void add_to_buffer(range_tombstone&&);
+    void add_to_buffer(mutation_fragment&&);
+    future<> read_from_underlying();
+    future<> start_reading_from_underlying();
+    bool after_current_range(position_in_partition_view position);
+    bool can_populate() const;
+    void maybe_update_continuity();
+    void maybe_add_to_cache(const mutation_fragment& mf);
+    void maybe_add_to_cache(const clustering_row& cr);
+    void maybe_add_to_cache(const range_tombstone& rt);
+    void maybe_add_to_cache(const static_row& sr);
+    void maybe_set_static_row_continuous();
+public:
+    cache_streamed_mutation(schema_ptr s,
+                            dht::decorated_key dk,
+                            query::clustering_key_filter_ranges&& crr,
+                            lw_shared_ptr<read_context> ctx,
+                            lw_shared_ptr<partition_snapshot> snp,
+                            row_cache& cache)
+        : streamed_mutation::impl(std::move(s), dk, snp->partition_tombstone())
+        , _snp(std::move(snp))
+        , _position_cmp(*_schema)
+        , _ck_ranges(std::move(crr))
+        , _ck_ranges_curr(_ck_ranges.begin())
+        , _ck_ranges_end(_ck_ranges.end())
+        , _lsa_manager(cache)
+        , _tombstones(*_schema)
+        , _lower_bound(position_in_partition::before_all_clustered_rows())
+        , _upper_bound(position_in_partition_view::before_all_clustered_rows())
+        , _read_context(std::move(ctx))
+        , _next_row(*_schema, cache._tracker.region(), *_snp)
+    { }
+    cache_streamed_mutation(const cache_streamed_mutation&) = delete;
+    cache_streamed_mutation(cache_streamed_mutation&&) = delete;
+    virtual future<> fill_buffer() override;
+    virtual ~cache_streamed_mutation() {
+        maybe_merge_versions(_snp, _lsa_manager.region(), _lsa_manager.read_section());
+    }
+};
+
+inline
+future<> cache_streamed_mutation::process_static_row() {
+    if (_snp->version()->partition().static_row_continuous()) {
+        _read_context->cache().on_row_hit();
+        row sr = _lsa_manager.run_in_read_section([this] {
+            return _snp->static_row();
+        });
+        if (!sr.empty()) {
+            push_mutation_fragment(mutation_fragment(static_row(std::move(sr))));
+        }
+        return make_ready_future<>();
+    } else {
+        _read_context->cache().on_row_miss();
+        return _read_context->get_next_fragment().then([this] (mutation_fragment_opt&& sr) {
+            if (sr) {
+                assert(sr->is_static_row());
+                maybe_add_to_cache(sr->as_static_row());
+                push_mutation_fragment(std::move(*sr));
+            }
+            maybe_set_static_row_continuous();
+        });
+    }
+}
+
+inline
+future<> cache_streamed_mutation::fill_buffer() {
+    if (_state == state::before_static_row) {
+        auto after_static_row = [this] {
+            if (_ck_ranges_curr == _ck_ranges_end) {
+                _end_of_stream = true;
+                _state = state::end_of_stream;
+                return make_ready_future<>();
+            }
+            _state = state::reading_from_cache;
+            _lsa_manager.run_in_read_section([this] {
+                move_to_current_range();
+            });
+            return fill_buffer();
+        };
+        if (_schema->has_static_columns()) {
+            return process_static_row().then(std::move(after_static_row));
+        } else {
+            return after_static_row();
+        }
+    }
+    return do_until([this] { return _end_of_stream || is_buffer_full(); }, [this] {
+        return do_fill_buffer();
+    });
+}
+
+inline
+future<> cache_streamed_mutation::do_fill_buffer() {
+    if (_state == state::move_to_underlying) {
+        _state = state::reading_from_underlying;
+        auto end = _next_row_in_range ? position_in_partition(_next_row.position())
+                                      : position_in_partition(_upper_bound);
+        return _read_context->fast_forward_to(position_range{_lower_bound, std::move(end)}).then([this] {
+            return read_from_underlying();
+        });
+    }
+    if (_state == state::reading_from_underlying) {
+        return read_from_underlying();
+    }
+    // assert(_state == state::reading_from_cache)
+    return _lsa_manager.run_in_read_section([this] {
+        auto same_pos = _next_row.maybe_refresh();
+        // FIXME: If continuity changed anywhere between _lower_bound and _next_row.position()
+        // we need to redo the lookup with _lower_bound. There is no eviction yet, so not yet a problem.
+        assert(same_pos);
+        while (!is_buffer_full() && _state == state::reading_from_cache) {
+            copy_from_cache_to_buffer();
+            if (need_preempt()) {
+                break;
+            }
+        }
+        return make_ready_future<>();
+    });
+}
+
+inline
+future<> cache_streamed_mutation::read_from_underlying() {
+    return consume_mutation_fragments_until(_read_context->get_streamed_mutation(),
+        [this] { return _state != state::reading_from_underlying || is_buffer_full(); },
+        [this] (mutation_fragment mf) {
+            _read_context->cache().on_row_miss();
+            maybe_add_to_cache(mf);
+            add_to_buffer(std::move(mf));
+        },
+        [this] {
+            _state = state::reading_from_cache;
+            _lsa_manager.run_in_update_section([this] {
+                auto same_pos = _next_row.maybe_refresh();
+                assert(same_pos); // FIXME: handle eviction
+                if (_next_row_in_range) {
+                    maybe_update_continuity();
+                    add_to_buffer(_next_row);
+                    move_to_next_entry();
+                } else {
+                    if (no_clustering_row_between(*_schema, _upper_bound, _next_row.position())) {
+                        this->maybe_update_continuity();
+                    } else {
+                        // FIXME: Insert dummy entry at _upper_bound.
+                        _read_context->cache().on_mispopulate();
+                    }
+                    move_to_next_range();
+                }
+            });
+            return make_ready_future<>();
+        });
+}
+
+inline
+void cache_streamed_mutation::maybe_update_continuity() {
+    if (can_populate() && _next_row.is_in_latest_version()) {
+        if (_last_row_key) {
+            if (_next_row.previous_row_in_latest_version_has_key(*_last_row_key)) {
+                _next_row.set_continuous(true);
+            }
+        } else if (!_ck_ranges_curr->start()) {
+            _next_row.set_continuous(true);
+        }
+    } else {
+        _read_context->cache().on_mispopulate();
+    }
+}
+
+inline
+void cache_streamed_mutation::maybe_add_to_cache(const mutation_fragment& mf) {
+    if (mf.is_range_tombstone()) {
+        maybe_add_to_cache(mf.as_range_tombstone());
+    } else {
+        assert(mf.is_clustering_row());
+        const clustering_row& cr = mf.as_clustering_row();
+        maybe_add_to_cache(cr);
+    }
+}
+
+inline
+void cache_streamed_mutation::maybe_add_to_cache(const clustering_row& cr) {
+    if (!can_populate()) {
+        _read_context->cache().on_mispopulate();
+        return;
+    }
+    _lsa_manager.run_in_update_section_with_allocator([this, &cr] {
+        mutation_partition& mp = _snp->version()->partition();
+        rows_entry::compare less(*_schema);
+
+        // FIXME: If _next_row is up to date, but latest version doesn't have iterator in
+        // current row (could be far away, so we'd do this often), then this will do
+        // the lookup in mp. This is not necessary, because _next_row has iterators for
+        // next rows in each version, even if they're not part of the current row.
+        // They're currently buried in the heap, but you could keep a vector of
+        // iterators per each version in addition to the heap.
+        auto new_entry = alloc_strategy_unique_ptr<rows_entry>(
+            current_allocator().construct<rows_entry>(cr.key(), cr.tomb(), cr.marker(), cr.cells()));
+        new_entry->set_continuous(false);
+        auto it = _next_row.has_valid_row_from_latest_version()
+                  ? _next_row.get_iterator_in_latest_version() : mp.clustered_rows().lower_bound(cr.key(), less);
+        auto insert_result = mp.clustered_rows().insert_check(it, *new_entry, less);
+        if (insert_result.second) {
+            _read_context->cache().on_row_insert();
+            new_entry.release();
+        }
+        it = insert_result.first;
+
+        rows_entry& e = *it;
+        if (_last_row_key) {
+            if (it == mp.clustered_rows().begin()) {
+                // FIXME: check whether entry for _last_row_key is in older versions and if so set
+                // continuity to true.
+                _read_context->cache().on_mispopulate();
+            } else {
+                auto prev_it = it;
+                --prev_it;
+                clustering_key_prefix::equality eq(*_schema);
+                if (eq(*_last_row_key, prev_it->key())) {
+                    e.set_continuous(true);
+                }
+            }
+        } else if (!_ck_ranges_curr->start()) {
+            e.set_continuous(true);
+        } else {
+            // FIXME: Insert dummy entry at _ck_ranges_curr->start()
+            _read_context->cache().on_mispopulate();
+        }
+    });
+}
+
+inline
+bool cache_streamed_mutation::after_current_range(position_in_partition_view p) {
+    return _position_cmp(p, _upper_bound) >= 0;
+}
+
+inline
+future<> cache_streamed_mutation::start_reading_from_underlying() {
+    _state = state::move_to_underlying;
+    return make_ready_future<>();
+}
+
+inline
+void cache_streamed_mutation::copy_from_cache_to_buffer() {
+    position_in_partition_view next_lower_bound = _next_row.dummy() ? _next_row.position() : position_in_partition_view::after_key(_next_row.key());
+    for (auto&& rts : _snp->range_tombstones(*_schema, _lower_bound, _next_row_in_range ? next_lower_bound : _upper_bound)) {
+        add_to_buffer(std::move(rts));
+        if (is_buffer_full()) {
+            return;
+        }
+    }
+    if (_next_row_in_range) {
+        add_to_buffer(_next_row);
+        move_to_next_entry();
+    } else {
+        move_to_next_range();
+    }
+}
+
+inline
+void cache_streamed_mutation::move_to_end() {
+    drain_tombstones();
+    _end_of_stream = true;
+    _state = state::end_of_stream;
+}
+
+inline
+void cache_streamed_mutation::move_to_next_range() {
+    ++_ck_ranges_curr;
+    if (_ck_ranges_curr == _ck_ranges_end) {
+        move_to_end();
+    } else {
+        move_to_current_range();
+    }
+}
+
+inline
+void cache_streamed_mutation::move_to_current_range() {
+    _last_row_key = std::experimental::nullopt;
+    _lower_bound = position_in_partition::for_range_start(*_ck_ranges_curr);
+    _upper_bound = position_in_partition_view::for_range_end(*_ck_ranges_curr);
+    auto complete_until_next = _next_row.advance_to(_lower_bound) || _next_row.continuous();
+    _next_row_in_range = !after_current_range(_next_row.position());
+    if (!complete_until_next) {
+        start_reading_from_underlying();
+    }
+}
+
+// _next_row must be inside the range.
+inline
+void cache_streamed_mutation::move_to_next_entry() {
+    if (no_clustering_row_between(*_schema, _next_row.position(), _upper_bound)) {
+        move_to_next_range();
+    } else {
+        if (!_next_row.next()) {
+            move_to_end();
+            return;
+        }
+        _next_row_in_range = !after_current_range(_next_row.position());
+        if (!_next_row.continuous()) {
+            start_reading_from_underlying();
+        }
+    }
+}
+
+inline
+void cache_streamed_mutation::drain_tombstones(position_in_partition_view pos) {
+    while (auto mfo = _tombstones.get_next(pos)) {
+        push_mutation_fragment(std::move(*mfo));
+    }
+}
+
+inline
+void cache_streamed_mutation::drain_tombstones() {
+    while (auto mfo = _tombstones.get_next()) {
+        push_mutation_fragment(std::move(*mfo));
+    }
+}
+
+inline
+void cache_streamed_mutation::add_to_buffer(mutation_fragment&& mf) {
+    if (mf.is_clustering_row()) {
+        add_clustering_row_to_buffer(std::move(mf));
+    } else {
+        assert(mf.is_range_tombstone());
+        add_to_buffer(std::move(mf).as_range_tombstone());
+    }
+}
+
+inline
+void cache_streamed_mutation::add_to_buffer(const partition_snapshot_row_cursor& row) {
+    if (!row.dummy()) {
+        _read_context->cache().on_row_hit();
+        add_clustering_row_to_buffer(row.row());
+    }
+}
+
+inline
+void cache_streamed_mutation::add_clustering_row_to_buffer(mutation_fragment&& mf) {
+    auto& row = mf.as_clustering_row();
+    drain_tombstones(row.position());
+    _last_row_key = row.key();
+    _lower_bound = position_in_partition::after_key(row.key());
+    push_mutation_fragment(std::move(mf));
+}
+
+inline
+void cache_streamed_mutation::add_to_buffer(range_tombstone&& rt) {
+    // This guarantees that rt starts after any emitted clustering_row
+    if (!rt.trim_front(*_schema, _lower_bound)) {
+        return;
+    }
+    _lower_bound = position_in_partition(rt.position());
+    _tombstones.apply(std::move(rt));
+    drain_tombstones(_lower_bound);
+}
+
+inline
+void cache_streamed_mutation::maybe_add_to_cache(const range_tombstone& rt) {
+    if (can_populate()) {
+        _lsa_manager.run_in_update_section_with_allocator([&] {
+            _snp->version()->partition().row_tombstones().apply_monotonically(*_schema, rt);
+        });
+    } else {
+        _read_context->cache().on_mispopulate();
+    }
+}
+
+inline
+void cache_streamed_mutation::maybe_add_to_cache(const static_row& sr) {
+    if (can_populate()) {
+        _read_context->cache().on_row_insert();
+        _lsa_manager.run_in_update_section_with_allocator([&] {
+            _snp->version()->partition().static_row().apply(*_schema, column_kind::static_column, sr.cells());
+        });
+    } else {
+        _read_context->cache().on_mispopulate();
+    }
+}
+
+inline
+void cache_streamed_mutation::maybe_set_static_row_continuous() {
+    if (can_populate()) {
+        _snp->version()->partition().set_static_row_continuous(true);
+    } else {
+        _read_context->cache().on_mispopulate();
+    }
+}
+
+inline
+bool cache_streamed_mutation::can_populate() const {
+    return _snp->at_latest_version() && _read_context->cache().phase_of(_read_context->key()) == _read_context->phase();
+}
+
+} // namespace cache
+
+inline streamed_mutation make_cache_streamed_mutation(schema_ptr s,
+                                                      dht::decorated_key dk,
+                                                      query::clustering_key_filter_ranges crr,
+                                                      row_cache& cache,
+                                                      lw_shared_ptr<cache::read_context> ctx,
+                                                      lw_shared_ptr<partition_snapshot> snp)
+{
+    return make_streamed_mutation<cache::cache_streamed_mutation>(
+        std::move(s), std::move(dk), std::move(crr), std::move(ctx), std::move(snp), cache);
+}
--- a/caching_options.hh
+++ b/caching_options.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2015 Cloudius Systems, Ltd.
+ * Copyright (C) 2015 ScyllaDB
 */

 /*
@@ -24,6 +24,7 @@
 #include <boost/lexical_cast.hpp>
 #include "exceptions/exceptions.hh"
 #include "json.hh"
+#include "seastarx.hh"

 class schema;

@@ -58,30 +59,34 @@ class caching_options {
    caching_options() : _key_cache(default_key), _row_cache(default_row) {}
 public:

-    sstring to_sstring() const {
-        return json::to_json(std::map<sstring, sstring>({{ "keys", _key_cache }, { "rows_per_partition", _row_cache }}));
+    std::map<sstring, sstring> to_map() const {
+        return {{ "keys", _key_cache }, { "rows_per_partition", _row_cache }};
    }

-    static caching_options from_sstring(const sstring& str) {
-        auto map = json::to_map(str);
-        if (map.size() > 2) {
-            throw exceptions::configuration_exception("Invalid map: " + str); 
-        }
-        sstring k;
-        sstring r;
-        if (map.count("keys")) {
-            k = map.at("keys");
-        } else {
-            k = default_key;
-        }
+    sstring to_sstring() const {
+        return json::to_json(to_map());
+    }

-        if (map.count("rows_per_partition")) {
-            r = map.at("rows_per_partition");
-        } else {
-            r = default_row;
+    template<typename Map>
+    static caching_options from_map(const Map & map) {
+        sstring k = default_key;
+        sstring r = default_row;
+
+        for (auto& p : map) {
+            if (p.first == "keys") {
+                k = p.second;
+            } else if (p.first == "rows_per_partition") {
+                r = p.second;
+            } else {
+                throw exceptions::configuration_exception("Invalid caching option: " + p.first);
+            }
        }
        return caching_options(k, r);
    }
+    static caching_options from_sstring(const sstring& str) {
+        return from_map(json::to_map(str));
+    }
+
    bool operator==(const caching_options& other) const {
        return _key_cache == other._key_cache && _row_cache == other._row_cache;
    }
--- a/canonical_mutation.cc
+++ b/canonical_mutation.cc
@@ -22,6 +22,7 @@
 #include "canonical_mutation.hh"
 #include "mutation.hh"
 #include "mutation_partition_serializer.hh"
+#include "counters.hh"
 #include "converting_mutation_partition_applier.hh"
 #include "hashing_partition_visitor.hh"
 #include "utils/UUID.hh"
@@ -44,7 +45,7 @@ canonical_mutation::canonical_mutation(const mutation& m)
    mutation_partition_serializer part_ser(*m.schema(), m.partition());

    bytes_ostream out;
-    ser::writer_of_canonical_mutation wr(out);
+    ser::writer_of_canonical_mutation<bytes_ostream> wr(out);
    std::move(wr).write_table_id(m.schema()->id())
                 .write_schema_version(m.schema()->version())
                 .write_key(m.key())
--- a/cartesian_product.hh
+++ b/cartesian_product.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright 2015 Cloudius Systems
+ * Copyright (C) 2015 ScyllaDB
 *
 */

--- a/cell_locking.hh
+++ b/cell_locking.hh
@@ -0,0 +1,566 @@
+/*
+ * Copyright (C) 2017 ScyllaDB
+ */
+
+/*
+ * This file is part of Scylla.
+ *
+ * Scylla is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Scylla is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <boost/intrusive/unordered_set.hpp>
+
+#if __has_include(<boost/container/small_vector.hpp>)
+
+#include <boost/container/small_vector.hpp>
+
+template <typename T, size_t N>
+using small_vector = boost::container::small_vector<T, N>;
+
+#else
+
+#include <vector>
+template <typename T, size_t N>
+using small_vector = std::vector<T>;
+
+#endif
+
+#include "fnv1a_hasher.hh"
+#include "streamed_mutation.hh"
+#include "mutation_partition.hh"
+
+class cells_range {
+    using ids_vector_type = small_vector<column_id, 5>;
+
+    position_in_partition_view _position;
+    ids_vector_type _ids;
+public:
+    using iterator = ids_vector_type::iterator;
+    using const_iterator = ids_vector_type::const_iterator;
+
+    cells_range()
+        : _position(position_in_partition_view(position_in_partition_view::static_row_tag_t())) { }
+
+    explicit cells_range(position_in_partition_view pos, const row& cells)
+        : _position(pos)
+    {
+        _ids.reserve(cells.size());
+        cells.for_each_cell([this] (auto id, auto&&) {
+            _ids.emplace_back(id);
+        });
+    }
+
+    position_in_partition_view position() const { return _position; }
+    bool empty() const { return _ids.empty(); }
+
+    auto begin() const { return _ids.begin(); }
+    auto end() const { return _ids.end(); }
+};
+
+class partition_cells_range {
+    const mutation_partition& _mp;
+public:
+    class iterator {
+        const mutation_partition& _mp;
+        stdx::optional<mutation_partition::rows_type::const_iterator> _position;
+        cells_range _current;
+    public:
+        explicit iterator(const mutation_partition& mp)
+            : _mp(mp)
+            , _current(position_in_partition_view(position_in_partition_view::static_row_tag_t()), mp.static_row())
+        { }
+
+        iterator(const mutation_partition& mp, mutation_partition::rows_type::const_iterator it)
+            : _mp(mp)
+            , _position(it)
+        { }
+
+        iterator& operator++() {
+            if (!_position) {
+                _position = _mp.clustered_rows().begin();
+            } else {
+                ++(*_position);
+            }
+            if (_position != _mp.clustered_rows().end()) {
+                auto it = *_position;
+                _current = cells_range(position_in_partition_view(position_in_partition_view::clustering_row_tag_t(), it->key()),
+                        it->row().cells());
+            }
+            return *this;
+        }
+
+        iterator operator++(int) {
+            iterator it(*this);
+            operator++();
+            return it;
+        }
+
+        cells_range& operator*() {
+            return _current;
+        }
+
+        cells_range* operator->() {
+            return &_current;
+        }
+
+        bool operator==(const iterator& other) const {
+            return _position == other._position;
+        }
+        bool operator!=(const iterator& other) const {
+            return !(*this == other);
+        }
+    };
+public:
+    explicit partition_cells_range(const mutation_partition& mp) : _mp(mp) { }
+
+    iterator begin() const {
+        return iterator(_mp);
+    }
+    iterator end() const {
+        return iterator(_mp, _mp.clustered_rows().end());
+    }
+};
+
+class locked_cell;
+
+struct cell_locker_stats {
+    uint64_t lock_acquisitions = 0;
+    uint64_t operations_waiting_for_lock = 0;
+};
+
+class cell_locker {
+public:
+    using timeout_clock = lowres_clock;
+private:
+    using semaphore_type = basic_semaphore<default_timeout_exception_factory, timeout_clock>;
+
+    class partition_entry;
+
+    struct cell_address {
+        position_in_partition position;
+        column_id id;
+    };
+
+    class cell_entry : public bi::unordered_set_base_hook<bi::link_mode<bi::auto_unlink>>,
+                       public enable_lw_shared_from_this<cell_entry> {
+        partition_entry& _parent;
+        cell_address _address;
+        semaphore_type _semaphore { 0 };
+
+        friend class cell_locker;
+    public:
+        cell_entry(partition_entry& parent, position_in_partition position, column_id id)
+            : _parent(parent)
+            , _address { std::move(position), id }
+        { }
+
+        // Upgrades cell_entry to another schema.
+        // Changes the value of cell_address, so cell_entry has to be
+        // temporarily removed from its parent partition_entry.
+        // Returns true if the cell_entry still exist in the new schema and
+        // should be reinserted.
+        bool upgrade(const schema& from, const schema& to, column_kind kind) noexcept {
+            auto& old_column_mapping = from.get_column_mapping();
+            auto& column = old_column_mapping.column_at(kind, _address.id);
+            auto cdef = to.get_column_definition(column.name());
+            if (!cdef) {
+                return false;
+            }
+            _address.id = cdef->id;
+            return true;
+        }
+
+        const position_in_partition& position() const {
+            return _address.position;
+        }
+
+        future<> lock(timeout_clock::time_point _timeout) {
+            return _semaphore.wait(_timeout);
+        }
+        void unlock() {
+            _semaphore.signal();
+        }
+
+        ~cell_entry() {
+            if (!is_linked()) {
+                return;
+            }
+            unlink();
+            if (!--_parent._cell_count) {
+                delete &_parent;
+            }
+        }
+
+        class hasher {
+            const schema* _schema; // pointer instead of reference for default assignment
+        public:
+            explicit hasher(const schema& s) : _schema(&s) { }
+
+            size_t operator()(const cell_address& ca) const {
+                fnv1a_hasher hasher;
+                ca.position.feed_hash(hasher, *_schema);
+                ::feed_hash(hasher, ca.id);
+                return hasher.finalize();
+            }
+            size_t operator()(const cell_entry& ce) const {
+                return operator()(ce._address);
+            }
+        };
+
+        class equal_compare {
+            position_in_partition::equal_compare _cmp;
+        private:
+            bool do_compare(const cell_address& a, const cell_address& b) const {
+                return a.id == b.id && _cmp(a.position, b.position);
+            }
+        public:
+            explicit equal_compare(const schema& s) : _cmp(s) { }
+            bool operator()(const cell_address& ca, const cell_entry& ce) const {
+                return do_compare(ca, ce._address);
+            }
+            bool operator()(const cell_entry& ce, const cell_address& ca) const {
+                return do_compare(ca, ce._address);
+            }
+            bool operator()(const cell_entry& a, const cell_entry& b) const {
+                return do_compare(a._address, b._address);
+            }
+        };
+    };
+
+    class partition_entry : public bi::unordered_set_base_hook<bi::link_mode<bi::auto_unlink>> {
+        using cells_type = bi::unordered_set<cell_entry,
+                                             bi::equal<cell_entry::equal_compare>,
+                                             bi::hash<cell_entry::hasher>,
+                                             bi::constant_time_size<false>>;
+
+        static constexpr size_t initial_bucket_count = 16;
+        using max_load_factor = std::ratio<3, 4>;
+        dht::decorated_key _key;
+        cell_locker& _parent;
+        size_t _rehash_at_size = compute_rehash_at_size(initial_bucket_count);
+        std::unique_ptr<cells_type::bucket_type[]> _buckets; // TODO: start with internal storage?
+        size_t _cell_count = 0; // cells_type::empty() is not O(1) if the hook is auto-unlink
+        cells_type::bucket_type _internal_buckets[initial_bucket_count];
+        cells_type _cells;
+        schema_ptr _schema;
+
+        friend class cell_entry;
+    private:
+        static constexpr size_t compute_rehash_at_size(size_t bucket_count) {
+            return bucket_count * max_load_factor::num / max_load_factor::den;
+        }
+        void maybe_rehash() {
+            if (_cell_count >= _rehash_at_size) {
+                auto new_bucket_count = std::min(_cells.bucket_count() * 2, _cells.bucket_count() + 1024);
+                auto buckets = std::make_unique<cells_type::bucket_type[]>(new_bucket_count);
+
+                _cells.rehash(cells_type::bucket_traits(buckets.get(), new_bucket_count));
+                _buckets = std::move(buckets);
+
+                _rehash_at_size = compute_rehash_at_size(new_bucket_count);
+            }
+        }
+    public:
+        partition_entry(schema_ptr s, cell_locker& parent, const dht::decorated_key& dk)
+            : _key(dk)
+            , _parent(parent)
+            , _cells(cells_type::bucket_traits(_internal_buckets, initial_bucket_count),
+                     cell_entry::hasher(*s), cell_entry::equal_compare(*s))
+            , _schema(s)
+        { }
+
+        ~partition_entry() {
+            if (is_linked()) {
+                _parent._partition_count--;
+            }
+        }
+
+        // Upgrades partition entry to new schema. Returns false if all
+        // cell_entries has been removed during the upgrade.
+        bool upgrade(schema_ptr new_schema);
+
+        void insert(lw_shared_ptr<cell_entry> cell) {
+            _cells.insert(*cell);
+            _cell_count++;
+            maybe_rehash();
+        }
+
+        cells_type& cells() {
+            return _cells;
+        }
+
+        struct hasher {
+            size_t operator()(const dht::decorated_key& dk) const {
+                return std::hash<dht::decorated_key>()(dk);
+            }
+            size_t operator()(const partition_entry& pe) const {
+                return operator()(pe._key);
+            }
+        };
+
+        class equal_compare {
+            dht::decorated_key_equals_comparator _cmp;
+        public:
+            explicit equal_compare(const schema& s) : _cmp(s) { }
+            bool operator()(const dht::decorated_key& dk, const partition_entry& pe) {
+                return _cmp(dk, pe._key);
+            }
+            bool operator()(const partition_entry& pe, const dht::decorated_key& dk) {
+                return _cmp(dk, pe._key);
+            }
+            bool operator()(const partition_entry& a, const partition_entry& b) {
+                return _cmp(a._key, b._key);
+            }
+        };
+    };
+
+    using partitions_type = bi::unordered_set<partition_entry,
+                                              bi::equal<partition_entry::equal_compare>,
+                                              bi::hash<partition_entry::hasher>,
+                                              bi::constant_time_size<false>>;
+
+    static constexpr size_t initial_bucket_count = 4 * 1024;
+    using max_load_factor = std::ratio<3, 4>;
+
+    std::unique_ptr<partitions_type::bucket_type[]> _buckets;
+    partitions_type _partitions;
+    size_t _partition_count = 0;
+    size_t _rehash_at_size = compute_rehash_at_size(initial_bucket_count);
+    schema_ptr _schema;
+
+    // partitions_type uses equality comparator which keeps a reference to the
+    // original schema, we must ensure that it doesn't die.
+    schema_ptr _original_schema;
+    cell_locker_stats& _stats;
+
+    friend class locked_cell;
+private:
+    struct locker;
+
+    static constexpr size_t compute_rehash_at_size(size_t bucket_count) {
+        return bucket_count * max_load_factor::num / max_load_factor::den;
+    }
+    void maybe_rehash() {
+        if (_partition_count >= _rehash_at_size) {
+            auto new_bucket_count = std::min(_partitions.bucket_count() * 2, _partitions.bucket_count() + 64 * 1024);
+            auto buckets = std::make_unique<partitions_type::bucket_type[]>(new_bucket_count);
+
+            _partitions.rehash(partitions_type::bucket_traits(buckets.get(), new_bucket_count));
+            _buckets = std::move(buckets);
+
+            _rehash_at_size = compute_rehash_at_size(new_bucket_count);
+        }
+    }
+public:
+    explicit cell_locker(schema_ptr s, cell_locker_stats& stats)
+        : _buckets(std::make_unique<partitions_type::bucket_type[]>(initial_bucket_count))
+        , _partitions(partitions_type::bucket_traits(_buckets.get(), initial_bucket_count),
+                      partition_entry::hasher(), partition_entry::equal_compare(*s))
+        , _schema(s)
+        , _original_schema(std::move(s))
+        , _stats(stats)
+    { }
+
+    ~cell_locker() {
+        assert(_partitions.empty());
+    }
+
+    void set_schema(schema_ptr s) {
+        _schema = s;
+    }
+    schema_ptr schema() const {
+        return _schema;
+    }
+
+    // partition_cells_range is required to be in cell_locker::schema()
+    future<std::vector<locked_cell>> lock_cells(const dht::decorated_key& dk, partition_cells_range&& range,
+                                                timeout_clock::time_point timeout);
+};
+
+
+class locked_cell {
+    lw_shared_ptr<cell_locker::cell_entry> _entry;
+public:
+    explicit locked_cell(lw_shared_ptr<cell_locker::cell_entry> entry)
+        : _entry(std::move(entry)) { }
+
+    locked_cell(const locked_cell&) = delete;
+    locked_cell(locked_cell&&) = default;
+
+    ~locked_cell() {
+        if (_entry) {
+            _entry->unlock();
+        }
+    }
+};
+
+struct cell_locker::locker {
+    cell_entry::hasher _hasher;
+    cell_entry::equal_compare _eq_cmp;
+    partition_entry& _partition_entry;
+
+    partition_cells_range _range;
+    partition_cells_range::iterator _current_ck;
+    cells_range::const_iterator _current_cell;
+
+    timeout_clock::time_point _timeout;
+    std::vector<locked_cell> _locks;
+    cell_locker_stats& _stats;
+private:
+    void update_ck() {
+        if (!is_done()) {
+            _current_cell = _current_ck->begin();
+        }
+    }
+
+    future<> lock_next();
+
+    bool is_done() const { return _current_ck == _range.end(); }
+public:
+    explicit locker(const ::schema& s, cell_locker_stats& st, partition_entry& pe, partition_cells_range&& range, timeout_clock::time_point timeout)
+        : _hasher(s)
+        , _eq_cmp(s)
+        , _partition_entry(pe)
+        , _range(std::move(range))
+        , _current_ck(_range.begin())
+        , _timeout(timeout)
+        , _stats(st)
+    {
+        update_ck();
+    }
+
+    locker(const locker&) = delete;
+    locker(locker&&) = delete;
+
+    future<> lock_all() {
+        // Cannot defer before first call to lock_next().
+        return lock_next().then([this] {
+            return do_until([this] { return is_done(); }, [this] {
+                return lock_next();
+            });
+        });
+    }
+
+    std::vector<locked_cell> get() && { return std::move(_locks); }
+};
+
+inline
+future<std::vector<locked_cell>> cell_locker::lock_cells(const dht::decorated_key& dk, partition_cells_range&& range, timeout_clock::time_point timeout) {
+    partition_entry::hasher pe_hash;
+    partition_entry::equal_compare pe_eq(*_schema);
+
+    auto it = _partitions.find(dk, pe_hash, pe_eq);
+    std::unique_ptr<partition_entry> partition;
+    if (it == _partitions.end()) {
+        partition = std::make_unique<partition_entry>(_schema, *this, dk);
+    } else if (!it->upgrade(_schema)) {
+        partition = std::unique_ptr<partition_entry>(&*it);
+        _partition_count--;
+        _partitions.erase(it);
+    }
+
+    if (partition) {
+        std::vector<locked_cell> locks;
+        for (auto&& r : range) {
+            if (r.empty()) {
+                continue;
+            }
+            for (auto&& c : r) {
+                auto cell = make_lw_shared<cell_entry>(*partition, position_in_partition(r.position()), c);
+                _stats.lock_acquisitions++;
+                partition->insert(cell);
+                locks.emplace_back(std::move(cell));
+            }
+        }
+
+        if (!locks.empty()) {
+            _partitions.insert(*partition.release());
+            _partition_count++;
+            maybe_rehash();
+        }
+        return make_ready_future<std::vector<locked_cell>>(std::move(locks));
+    }
+
+    auto l = std::make_unique<locker>(*_schema, _stats, *it, std::move(range), timeout);
+    auto f = l->lock_all();
+    return f.then([l = std::move(l)] {
+        return std::move(*l).get();
+    });
+}
+
+inline
+future<> cell_locker::locker::lock_next() {
+    while (!is_done()) {
+        if (_current_cell == _current_ck->end()) {
+            ++_current_ck;
+            update_ck();
+            continue;
+        }
+
+        auto cid = *_current_cell++;
+
+        cell_address ca { position_in_partition(_current_ck->position()), cid };
+        auto it = _partition_entry.cells().find(ca, _hasher, _eq_cmp);
+        if (it != _partition_entry.cells().end()) {
+            _stats.operations_waiting_for_lock++;
+            return it->lock(_timeout).then([this, ce = it->shared_from_this()] () mutable {
+                _stats.operations_waiting_for_lock--;
+                _stats.lock_acquisitions++;
+                _locks.emplace_back(std::move(ce));
+            });
+        }
+
+        auto cell = make_lw_shared<cell_entry>(_partition_entry, position_in_partition(_current_ck->position()), cid);
+        _stats.lock_acquisitions++;
+        _partition_entry.insert(cell);
+        _locks.emplace_back(std::move(cell));
+    }
+    return make_ready_future<>();
+}
+
+inline
+bool cell_locker::partition_entry::upgrade(schema_ptr new_schema) {
+    if (_schema == new_schema) {
+        return true;
+    }
+
+    auto buckets = std::make_unique<cells_type::bucket_type[]>(_cells.bucket_count());
+    auto cells = cells_type(cells_type::bucket_traits(buckets.get(), _cells.bucket_count()),
+                            cell_entry::hasher(*new_schema), cell_entry::equal_compare(*new_schema));
+
+    _cells.clear_and_dispose([&] (cell_entry* cell_ptr) noexcept {
+        auto& cell = *cell_ptr;
+        auto kind = cell.position().is_static_row() ? column_kind::static_column
+                                                    : column_kind::regular_column;
+        auto reinsert = cell.upgrade(*_schema, *new_schema, kind);
+        if (reinsert) {
+            cells.insert(cell);
+        } else {
+            _cell_count--;
+        }
+    });
+
+    // bi::unordered_set move assignment is actually a swap.
+    // Original _buckets cannot be destroyed before the container using them is
+    // so we need to explicitly make sure that the original _cells is no more.
+    _cells = std::move(cells);
+    auto destroy = [] (auto) { };
+    destroy(std::move(cells));
+
+    _buckets = std::move(buckets);
+    _schema = new_schema;
+    return _cell_count;
+}
--- a/checked-file-impl.hh
+++ b/checked-file-impl.hh
@@ -0,0 +1,162 @@
+/*
+ * Copyright (C) 2016 ScyllaDB
+ */
+
+/*
+ * This file is part of Scylla.
+ *
+ * Scylla is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Scylla is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include "seastar/core/file.hh"
+#include "disk-error-handler.hh"
+
+class checked_file_impl : public file_impl {
+public:
+
+    checked_file_impl(const io_error_handler& error_handler, file f)
+            : _error_handler(error_handler), _file(f) {
+        _memory_dma_alignment = f.memory_dma_alignment();
+        _disk_read_dma_alignment = f.disk_read_dma_alignment();
+        _disk_write_dma_alignment = f.disk_write_dma_alignment();
+    }
+
+    virtual future<size_t> write_dma(uint64_t pos, const void* buffer, size_t len, const io_priority_class& pc) override {
+        return do_io_check(_error_handler, [&] {
+            return get_file_impl(_file)->write_dma(pos, buffer, len, pc);
+        });
+    }
+
+    virtual future<size_t> write_dma(uint64_t pos, std::vector<iovec> iov, const io_priority_class& pc) override {
+        return do_io_check(_error_handler, [&] {
+            return get_file_impl(_file)->write_dma(pos, iov, pc);
+        });
+    }
+
+    virtual future<size_t> read_dma(uint64_t pos, void* buffer, size_t len, const io_priority_class& pc) override {
+        return do_io_check(_error_handler, [&] {
+            return get_file_impl(_file)->read_dma(pos, buffer, len, pc);
+        });
+    }
+
+    virtual future<size_t> read_dma(uint64_t pos, std::vector<iovec> iov, const io_priority_class& pc) override {
+        return do_io_check(_error_handler, [&] {
+            return get_file_impl(_file)->read_dma(pos, iov, pc);
+        });
+    }
+
+    virtual future<> flush(void) override {
+        return do_io_check(_error_handler, [&] {
+            return get_file_impl(_file)->flush();
+        });
+    }
+
+    virtual future<struct stat> stat(void) override {
+        return do_io_check(_error_handler, [&] {
+            return get_file_impl(_file)->stat();
+        });
+    }
+
+    virtual future<> truncate(uint64_t length) override {
+        return do_io_check(_error_handler, [&] {
+            return get_file_impl(_file)->truncate(length);
+        });
+    }
+
+    virtual future<> discard(uint64_t offset, uint64_t length) override {
+        return do_io_check(_error_handler, [&] {
+            return get_file_impl(_file)->discard(offset, length);
+        });
+    }
+
+    virtual future<> allocate(uint64_t position, uint64_t length) override {
+        return do_io_check(_error_handler, [&] {
+            return get_file_impl(_file)->allocate(position, length);
+        });
+    }
+
+    virtual future<uint64_t> size(void) override {
+        return do_io_check(_error_handler, [&] {
+            return get_file_impl(_file)->size();
+        });
+    }
+
+    virtual future<> close() override {
+        return do_io_check(_error_handler, [&] {
+            return get_file_impl(_file)->close();
+        });
+    }
+
+    // returns a handle for plain file, so make_checked_file() should be called
+    // on file returned by handle.
+    virtual std::unique_ptr<seastar::file_handle_impl> dup() override {
+        return get_file_impl(_file)->dup();
+    }
+
+    virtual subscription<directory_entry> list_directory(std::function<future<> (directory_entry de)> next) override {
+        return do_io_check(_error_handler, [&] {
+            return get_file_impl(_file)->list_directory(next);
+        });
+    }
+
+    virtual future<temporary_buffer<uint8_t>> dma_read_bulk(uint64_t offset, size_t range_size, const io_priority_class& pc) override {
+        return do_io_check(_error_handler, [&] {
+            return get_file_impl(_file)->dma_read_bulk(offset, range_size, pc);
+        });
+    }
+private:
+    const io_error_handler& _error_handler;
+    file _file;
+};
+
+inline file make_checked_file(const io_error_handler& error_handler, file f)
+{
+    return file(::make_shared<checked_file_impl>(error_handler, f));
+}
+
+future<file>
+inline open_checked_file_dma(const io_error_handler& error_handler,
+                             sstring name, open_flags flags,
+                             file_open_options options)
+{
+    return do_io_check(error_handler, [&] {
+        return open_file_dma(name, flags, options).then([&] (file f) {
+            return make_ready_future<file>(make_checked_file(error_handler, f));
+        });
+    });
+}
+
+future<file>
+inline open_checked_file_dma(const io_error_handler& error_handler,
+                             sstring name, open_flags flags)
+{
+    return do_io_check(error_handler, [&] {
+        return open_file_dma(name, flags).then([&] (file f) {
+            return make_ready_future<file>(make_checked_file(error_handler, f));
+        });
+    });
+}
+
+future<file>
+inline open_checked_directory(const io_error_handler& error_handler,
+                              sstring name)
+{
+    return do_io_check(error_handler, [&] {
+        return engine().open_directory(name).then([&] (file f) {
+            return make_ready_future<file>(make_checked_file(error_handler, f));
+        });
+    });
+}
--- a/clocks-impl.cc
+++ b/clocks-impl.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright 2015 Cloudius Systems
+ * Copyright (C) 2015 ScyllaDB
 */

 /*
@@ -19,6 +19,6 @@
 * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
 */

-#include "gc_clock.hh"
+#include "clocks-impl.hh"

 std::atomic<int64_t> clocks_offset;
--- a/clocks-impl.hh
+++ b/clocks-impl.hh
@@ -0,0 +1,49 @@
+/*
+ * Copyright (C) 2017 ScyllaDB
+ */
+
+/*
+ * This file is part of Scylla.
+ *
+ * Scylla is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Scylla is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <algorithm>
+#include <atomic>
+#include <chrono>
+#include <cstdint>
+
+extern std::atomic<int64_t> clocks_offset;
+
+template<typename Duration>
+static inline void forward_jump_clocks(Duration delta)
+{
+    auto d = std::chrono::duration_cast<std::chrono::seconds>(delta).count();
+    clocks_offset.fetch_add(d, std::memory_order_relaxed);
+}
+
+static inline std::chrono::seconds get_clocks_offset()
+{
+    auto off = clocks_offset.load(std::memory_order_relaxed);
+    return std::chrono::seconds(off);
+}
+
+// Returns a time point which is earlier from t by d, or minimum time point if it cannot be represented.
+template<typename Clock, typename Duration, typename Rep, typename Period>
+inline
+auto saturating_subtract(std::chrono::time_point<Clock, Duration> t, std::chrono::duration<Rep, Period> d) -> decltype(t) {
+    return std::max(t, decltype(t)::min() + d) - d;
+}
--- a/clustering_bounds_comparator.hh
+++ b/clustering_bounds_comparator.hh
@@ -0,0 +1,167 @@
+
+/*
+ * Copyright (C) 2016 ScyllaDB
+ */
+
+/*
+ * This file is part of Scylla.
+ *
+ * Scylla is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Scylla is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include "keys.hh"
+#include "schema.hh"
+#include "range.hh"
+
+/**
+ * Represents the kind of bound in a range tombstone.
+ */
+enum class bound_kind : uint8_t {
+    excl_end = 0,
+    incl_start = 1,
+    // values 2 to 5 are reserved for forward Origin compatibility
+    incl_end = 6,
+    excl_start = 7,
+};
+
+std::ostream& operator<<(std::ostream& out, const bound_kind k);
+
+bound_kind invert_kind(bound_kind k);
+int32_t weight(bound_kind k);
+
+static inline bound_kind flip_bound_kind(bound_kind bk)
+{
+    switch (bk) {
+    case bound_kind::excl_end: return bound_kind::excl_start;
+    case bound_kind::incl_end: return bound_kind::incl_start;
+    case bound_kind::excl_start: return bound_kind::excl_end;
+    case bound_kind::incl_start: return bound_kind::incl_end;
+    }
+    abort();
+}
+
+class bound_view {
+public:
+    const static thread_local clustering_key empty_prefix;
+    const clustering_key_prefix& prefix;
+    bound_kind kind;
+    bound_view(const clustering_key_prefix& prefix, bound_kind kind)
+        : prefix(prefix)
+        , kind(kind)
+    { }
+    bound_view(const bound_view& other) noexcept = default;
+    bound_view& operator=(const bound_view& other) noexcept {
+        if (this != &other) {
+            this->~bound_view();
+            new (this) bound_view(other);
+        }
+        return *this;
+    }
+    struct tri_compare {
+        // To make it assignable and to avoid taking a schema_ptr, we
+        // wrap the schema reference.
+        std::reference_wrapper<const schema> _s;
+        tri_compare(const schema& s) : _s(s)
+        { }
+        int operator()(const clustering_key_prefix& p1, int32_t w1, const clustering_key_prefix& p2, int32_t w2) const {
+            auto type = _s.get().clustering_key_prefix_type();
+            auto res = prefix_equality_tri_compare(type->types().begin(),
+                type->begin(p1), type->end(p1),
+                type->begin(p2), type->end(p2),
+                ::tri_compare);
+            if (res) {
+                return res;
+            }
+            auto d1 = p1.size(_s);
+            auto d2 = p2.size(_s);
+            if (d1 == d2) {
+                return w1 - w2;
+            }
+            return d1 < d2 ? w1 - (w1 <= 0) : -(w2 - (w2 <= 0));
+        }
+        int operator()(const bound_view b, const clustering_key_prefix& p) const {
+            return operator()(b.prefix, weight(b.kind), p, 0);
+        }
+        int operator()(const clustering_key_prefix& p, const bound_view b) const {
+            return operator()(p, 0, b.prefix, weight(b.kind));
+        }
+        int operator()(const bound_view b1, const bound_view b2) const {
+            return operator()(b1.prefix, weight(b1.kind), b2.prefix, weight(b2.kind));
+        }
+    };
+    struct compare {
+        // To make it assignable and to avoid taking a schema_ptr, we
+        // wrap the schema reference.
+        tri_compare _cmp;
+        compare(const schema& s) : _cmp(s)
+        { }
+        bool operator()(const clustering_key_prefix& p1, int32_t w1, const clustering_key_prefix& p2, int32_t w2) const {
+            return _cmp(p1, w1, p2, w2) < 0;
+        }
+        bool operator()(const bound_view b, const clustering_key_prefix& p) const {
+            return operator()(b.prefix, weight(b.kind), p, 0);
+        }
+        bool operator()(const clustering_key_prefix& p, const bound_view b) const {
+            return operator()(p, 0, b.prefix, weight(b.kind));
+        }
+        bool operator()(const bound_view b1, const bound_view b2) const {
+            return operator()(b1.prefix, weight(b1.kind), b2.prefix, weight(b2.kind));
+        }
+    };
+    bool equal(const schema& s, const bound_view other) const {
+        return kind == other.kind && prefix.equal(s, other.prefix);
+    }
+    bool adjacent(const schema& s, const bound_view other) const {
+        return invert_kind(other.kind) == kind && prefix.equal(s, other.prefix);
+    }
+    static bound_view bottom() {
+        return {empty_prefix, bound_kind::incl_start};
+    }
+    static bound_view top() {
+        return {empty_prefix, bound_kind::incl_end};
+    }
+    template<template<typename> typename R>
+    GCC6_CONCEPT( requires Range<R, clustering_key_prefix_view> )
+    static bound_view from_range_start(const R<clustering_key_prefix>& range) {
+        return range.start()
+               ? bound_view(range.start()->value(), range.start()->is_inclusive() ? bound_kind::incl_start : bound_kind::excl_start)
+               : bottom();
+    }
+    template<template<typename> typename R>
+    GCC6_CONCEPT( requires Range<R, clustering_key_prefix> )
+    static bound_view from_range_end(const R<clustering_key_prefix>& range) {
+        return range.end()
+               ? bound_view(range.end()->value(), range.end()->is_inclusive() ? bound_kind::incl_end : bound_kind::excl_end)
+               : top();
+    }
+    template<template<typename> typename R>
+    GCC6_CONCEPT( requires Range<R, clustering_key_prefix> )
+    static std::pair<bound_view, bound_view> from_range(const R<clustering_key_prefix>& range) {
+        return {from_range_start(range), from_range_end(range)};
+    }
+    template<template<typename> typename R>
+    GCC6_CONCEPT( requires Range<R, clustering_key_prefix_view> )
+    static stdx::optional<typename R<clustering_key_prefix_view>::bound> to_range_bound(const bound_view& bv) {
+        if (&bv.prefix == &empty_prefix) {
+            return {};
+        }
+        bool inclusive = bv.kind != bound_kind::excl_end && bv.kind != bound_kind::excl_start;
+        return {typename R<clustering_key_prefix_view>::bound(bv.prefix.view(), inclusive)};
+    }
+    friend std::ostream& operator<<(std::ostream& out, const bound_view& b) {
+        return out << "{bound: prefix=" << b.prefix << ", kind=" << b.kind << "}";
+    }
+};
--- a/clustering_key_filter.hh
+++ b/clustering_key_filter.hh
@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2016 ScyllaDB
+ *
+ * Modified by ScyllaDB
+ */
+
+/*
+ * This file is part of Scylla.
+ *
+ * Scylla is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Scylla is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include "schema.hh"
+#include "query-request.hh"
+
+namespace query {
+
+class clustering_key_filter_ranges {
+    clustering_row_ranges _storage;
+    const clustering_row_ranges& _ref;
+public:
+    clustering_key_filter_ranges(const clustering_row_ranges& ranges) : _ref(ranges) { }
+    struct reversed { };
+    clustering_key_filter_ranges(reversed, const clustering_row_ranges& ranges)
+        : _storage(ranges.rbegin(), ranges.rend()), _ref(_storage) { }
+
+    clustering_key_filter_ranges(clustering_key_filter_ranges&& other) noexcept
+        : _storage(std::move(other._storage))
+        , _ref(&other._ref == &other._storage ? _storage : other._ref)
+    { }
+
+    clustering_key_filter_ranges& operator=(clustering_key_filter_ranges&& other) noexcept {
+        if (this != &other) {
+            this->~clustering_key_filter_ranges();
+            new (this) clustering_key_filter_ranges(std::move(other));
+        }
+        return *this;
+    }
+
+    auto begin() const { return _ref.begin(); }
+    auto end() const { return _ref.end(); }
+    bool empty() const { return _ref.empty(); }
+    size_t size() const { return _ref.size(); }
+    const clustering_row_ranges& ranges() const { return _ref; }
+
+    static clustering_key_filter_ranges get_ranges(const schema& schema, const query::partition_slice& slice, const partition_key& key) {
+        const query::clustering_row_ranges& ranges = slice.row_ranges(schema, key);
+        if (slice.options.contains(query::partition_slice::option::reversed)) {
+            return clustering_key_filter_ranges(clustering_key_filter_ranges::reversed{}, ranges);
+        }
+        return clustering_key_filter_ranges(ranges);
+    }
+};
+
+}
--- a/clustering_ranges_walker.hh
+++ b/clustering_ranges_walker.hh
@@ -0,0 +1,219 @@
+/*
+ * Copyright (C) 2017 ScyllaDB
+ *
+ * Modified by ScyllaDB
+ */
+
+/*
+ * This file is part of Scylla.
+ *
+ * Scylla is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Scylla is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include "schema.hh"
+#include "query-request.hh"
+#include "streamed_mutation.hh"
+
+// Utility for in-order checking of overlap with position ranges.
+class clustering_ranges_walker {
+    const schema& _schema;
+    const query::clustering_row_ranges& _ranges;
+    query::clustering_row_ranges::const_iterator _current;
+    query::clustering_row_ranges::const_iterator _end;
+    bool _in_current; // next position is known to be >= _current_start
+    bool _with_static_row;
+    position_in_partition_view _current_start;
+    position_in_partition_view _current_end;
+    stdx::optional<position_in_partition> _trim;
+    size_t _change_counter = 1;
+private:
+    bool advance_to_next_range() {
+        _in_current = false;
+        if (!_current_start.is_static_row()) {
+            if (_current == _end) {
+                return false;
+            }
+            ++_current;
+        }
+        ++_change_counter;
+        if (_current == _end) {
+            _current_end = _current_start = position_in_partition_view::after_all_clustered_rows();
+            return false;
+        }
+        _current_start = position_in_partition_view::for_range_start(*_current);
+        _current_end = position_in_partition_view::for_range_end(*_current);
+        return true;
+    }
+public:
+    clustering_ranges_walker(const schema& s, const query::clustering_row_ranges& ranges, bool with_static_row = true)
+        : _schema(s)
+        , _ranges(ranges)
+        , _current(ranges.begin())
+        , _end(ranges.end())
+        , _in_current(with_static_row)
+        , _with_static_row(with_static_row)
+        , _current_start(position_in_partition_view::for_static_row())
+        , _current_end(position_in_partition_view::before_all_clustered_rows())
+    {
+        if (!with_static_row) {
+            if (_current == _end) {
+                _current_start = position_in_partition_view::before_all_clustered_rows();
+            } else {
+                _current_start = position_in_partition_view::for_range_start(*_current);
+                _current_end = position_in_partition_view::for_range_end(*_current);
+            }
+        }
+    }
+    clustering_ranges_walker(clustering_ranges_walker&& o) noexcept
+        : _schema(o._schema)
+        , _ranges(o._ranges)
+        , _current(o._current)
+        , _end(o._end)
+        , _in_current(o._in_current)
+        , _with_static_row(o._with_static_row)
+        , _current_start(o._current_start)
+        , _current_end(o._current_end)
+        , _trim(std::move(o._trim))
+        , _change_counter(o._change_counter)
+    { }
+    clustering_ranges_walker& operator=(clustering_ranges_walker&& o) {
+        if (this != &o) {
+            this->~clustering_ranges_walker();
+            new (this) clustering_ranges_walker(std::move(o));
+        }
+        return *this;
+    }
+
+    // Excludes positions smaller than pos from the ranges.
+    // pos should be monotonic.
+    // No constraints between pos and positions passed to advance_to().
+    //
+    // After the invocation, when !out_of_range(), lower_bound() returns the smallest position still contained.
+    void trim_front(position_in_partition pos) {
+        position_in_partition::less_compare less(_schema);
+
+        do {
+            if (!less(_current_start, pos)) {
+                break;
+            }
+            if (less(pos, _current_end)) {
+                _trim = std::move(pos);
+                _current_start = *_trim;
+                _in_current = false;
+                ++_change_counter;
+                break;
+            }
+        } while (advance_to_next_range());
+    }
+
+    // Returns true if given position is contained.
+    // Must be called with monotonic positions.
+    // Idempotent.
+    bool advance_to(position_in_partition_view pos) {
+        position_in_partition::less_compare less(_schema);
+
+        do {
+            if (!_in_current && less(pos, _current_start)) {
+                break;
+            }
+            // All subsequent clustering keys are larger than the start of this
+            // range so there is no need to check that again.
+            _in_current = true;
+
+            if (less(pos, _current_end)) {
+                return true;
+            }
+        } while (advance_to_next_range());
+
+        return false;
+    }
+
+    // Returns true if the range expressed by start and end (as in position_range) overlaps
+    // with clustering ranges.
+    // Must be called with monotonic start position. That position must also be greater than
+    // the last position passed to the other advance_to() overload.
+    // Idempotent.
+    bool advance_to(position_in_partition_view start, position_in_partition_view end) {
+        position_in_partition::less_compare less(_schema);
+
+        do {
+            if (!less(_current_start, end)) {
+                break;
+            }
+            if (less(start, _current_end)) {
+                return true;
+            }
+        } while (advance_to_next_range());
+
+        return false;
+    }
+
+    // Returns true if the range tombstone expressed by start and end (as in position_range) overlaps
+    // with clustering ranges.
+    // No monotonicity restrictions on argument values across calls.
+    // Does not affect lower_bound().
+    // Idempotent.
+    bool contains_tombstone(position_in_partition_view start, position_in_partition_view end) const {
+        position_in_partition::less_compare less(_schema);
+
+        if (_trim && less(end, *_trim)) {
+            return false;
+        }
+
+        auto i = _current;
+        while (i != _end) {
+            auto range_start = position_in_partition_view::for_range_start(*i);
+            if (less(end, range_start)) {
+                return false;
+            }
+            auto range_end = position_in_partition_view::for_range_end(*i);
+            if (less(start, range_end)) {
+                return true;
+            }
+            ++i;
+        }
+
+        return false;
+    }
+
+    // Returns true if advanced past all contained positions. Any later advance_to() until reset() will return false.
+    bool out_of_range() const {
+        return !_in_current && _current == _end;
+    }
+
+    // Resets the state of the walker so that advance_to() can be now called for new sequence of positions.
+    // Any range trimmings still hold after this.
+    void reset() {
+        auto trim = std::move(_trim);
+        auto ctr = _change_counter;
+        *this = clustering_ranges_walker(_schema, _ranges, _with_static_row);
+        _change_counter = ctr + 1;
+        if (trim) {
+            trim_front(std::move(*trim));
+        }
+    }
+
+    // Can be called only when !out_of_range()
+    position_in_partition_view lower_bound() const {
+        return _current_start;
+    }
+
+    // When lower_bound() changes, this also does
+    // Always > 0.
+    size_t lower_bound_change_counter() const {
+        return _change_counter;
+    }
+};
--- a/combine.hh
+++ b/combine.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2015 Cloudius Systems, Ltd.
+ * Copyright (C) 2015 ScyllaDB
 */

 /*
--- a/compaction_strategy.hh
+++ b/compaction_strategy.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright 2015 Cloudius Systems
+ * Copyright (C) 2015 ScyllaDB
 */

 /*
@@ -22,6 +22,8 @@
 #pragma once

 class column_family;
+class schema;
+using schema_ptr = lw_shared_ptr<const schema>;

 namespace sstables {

@@ -30,12 +32,14 @@ enum class compaction_strategy_type {
    major,
    size_tiered,
    leveled,
-    // FIXME: Add support to DateTiered.
+    date_tiered,
 };

 class compaction_strategy_impl;
 class sstable;
+class sstable_set;
 struct compaction_descriptor;
+struct resharding_descriptor;

 class compaction_strategy {
    ::shared_ptr<compaction_strategy_impl> _compaction_strategy_impl;
@@ -51,6 +55,21 @@ public:
    // Return a list of sstables to be compacted after applying the strategy.
    compaction_descriptor get_sstables_for_compaction(column_family& cfs, std::vector<lw_shared_ptr<sstable>> candidates);

+    std::vector<resharding_descriptor> get_resharding_jobs(column_family& cf, std::vector<lw_shared_ptr<sstable>> candidates);
+
+    // Some strategies may look at the compacted and resulting sstables to
+    // get some useful information for subsequent compactions.
+    void notify_completion(const std::vector<lw_shared_ptr<sstable>>& removed, const std::vector<lw_shared_ptr<sstable>>& added);
+
+    // Return if parallel compaction is allowed by strategy.
+    bool parallel_compaction() const;
+
+    // Return if optimization to rule out sstables based on clustering key filter should be applied.
+    bool use_clustering_key_filter() const;
+
+    // An estimation of number of compaction for strategy to be satisfied.
+    int64_t estimated_pending_compactions(column_family& cf) const;
+
    static sstring name(compaction_strategy_type type) {
        switch (type) {
        case compaction_strategy_type::null:
@@ -61,6 +80,8 @@ public:
            return "SizeTieredCompactionStrategy";
        case compaction_strategy_type::leveled:
            return "LeveledCompactionStrategy";
+        case compaction_strategy_type::date_tiered:
+            return "DateTieredCompactionStrategy";
        default:
            throw std::runtime_error("Invalid Compaction Strategy");
        }
@@ -77,6 +98,8 @@ public:
            return compaction_strategy_type::size_tiered;
        } else if (short_name == "LeveledCompactionStrategy") {
            return compaction_strategy_type::leveled;
+        } else if (short_name == "DateTieredCompactionStrategy") {
+            return compaction_strategy_type::date_tiered;
        } else {
            throw exceptions::configuration_exception(sprint("Unable to find compaction strategy class '%s'", name));
        }
@@ -87,6 +110,8 @@ public:
    sstring name() const {
        return name(type());
    }
+
+    sstable_set make_sstable_set(schema_ptr schema) const;
 };

 // Creates a compaction_strategy object from one of the strategies available.
--- a/compatible_ring_position.hh
+++ b/compatible_ring_position.hh
@@ -0,0 +1,67 @@
+/*
+ * Copyright (C) 2016 ScyllaDB
+ */
+
+/*
+ * This file is part of Scylla.
+ *
+ * Scylla is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Scylla is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+
+#pragma once
+
+#include "query-request.hh"
+#include <experimental/optional>
+
+// Wraps ring_position so it is compatible with old-style C++: default constructor,
+// stateless comparators, yada yada
+class compatible_ring_position {
+    const schema* _schema = nullptr;
+    // optional to supply a default constructor, no more
+    std::experimental::optional<dht::ring_position> _rp;
+public:
+    compatible_ring_position() noexcept = default;
+    compatible_ring_position(const schema& s, const dht::ring_position& rp)
+            : _schema(&s), _rp(rp) {
+    }
+    compatible_ring_position(const schema& s, dht::ring_position&& rp)
+            : _schema(&s), _rp(std::move(rp)) {
+    }
+    const dht::token& token() const {
+        return _rp->token();
+    }
+    friend int tri_compare(const compatible_ring_position& x, const compatible_ring_position& y) {
+        return x._rp->tri_compare(*x._schema, *y._rp);
+    }
+    friend bool operator<(const compatible_ring_position& x, const compatible_ring_position& y) {
+        return tri_compare(x, y) < 0;
+    }
+    friend bool operator<=(const compatible_ring_position& x, const compatible_ring_position& y) {
+        return tri_compare(x, y) <= 0;
+    }
+    friend bool operator>(const compatible_ring_position& x, const compatible_ring_position& y) {
+        return tri_compare(x, y) > 0;
+    }
+    friend bool operator>=(const compatible_ring_position& x, const compatible_ring_position& y) {
+        return tri_compare(x, y) >= 0;
+    }
+    friend bool operator==(const compatible_ring_position& x, const compatible_ring_position& y) {
+        return tri_compare(x, y) == 0;
+    }
+    friend bool operator!=(const compatible_ring_position& x, const compatible_ring_position& y) {
+        return tri_compare(x, y) != 0;
+    }
+};
+
--- a/compound.hh
+++ b/compound.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2015 Cloudius Systems, Ltd.
+ * Copyright (C) 2015 ScyllaDB
 */

 /*
@@ -22,7 +22,7 @@
 #pragma once

 #include "types.hh"
-#include <iostream>
+#include <iosfwd>
 #include <algorithm>
 #include <vector>
 #include <boost/range/iterator_range.hpp>
@@ -130,10 +130,10 @@ public:
    bytes decompose_value(const value_type& values) {
        return serialize_value(values);
    }
-    class iterator : public std::iterator<std::input_iterator_tag, bytes_view> {
+    class iterator : public std::iterator<std::input_iterator_tag, const bytes_view> {
    private:
        bytes_view _v;
-        value_type _current;
+        bytes_view _current;
    private:
        void read_current() {
            size_type len;
@@ -220,6 +220,9 @@ public:
        assert(AllowPrefixes == allow_prefixes::yes);
        return std::distance(begin(v), end(v)) == (ssize_t)_types.size();
    }
+    bool is_empty(bytes_view v) const {
+        return begin(v) == end(v);
+    }
    void validate(bytes_view v) {
        // FIXME: implement
        warn(unimplemented::cause::VALIDATION);
--- a/compound_compat.hh
+++ b/compound_compat.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright 2015 Cloudius Systems
+ * Copyright (C) 2015 ScyllaDB
 */

 /*
@@ -21,7 +21,10 @@

 #pragma once

+#include <boost/range/algorithm/copy.hpp>
+#include <boost/range/adaptor/transformed.hpp>
 #include "compound.hh"
+#include "schema.hh"

 //
 // This header provides adaptors between the representation used by our compound_type<>
@@ -180,3 +183,422 @@ bytes to_legacy(CompoundType& type, bytes_view packed) {
    std::copy(lv.begin(), lv.end(), legacy_form.begin());
    return legacy_form;
 }
+
+class composite_view;
+
+// Represents a value serialized according to Origin's CompositeType.
+// If is_compound is true, then the value is one or more components encoded as:
+//
+//   <representation> ::= ( <component> )+
+//   <component>      ::= <length> <value> <EOC>
+//   <length>         ::= <uint16_t>
+//   <EOC>            ::= <uint8_t>
+//
+// If false, then it encodes a single value, without a prefix length or a suffix EOC.
+class composite final {
+    bytes _bytes;
+    bool _is_compound;
+public:
+    composite(bytes&& b, bool is_compound)
+            : _bytes(std::move(b))
+            , _is_compound(is_compound)
+    { }
+
+    explicit composite(bytes&& b)
+            : _bytes(std::move(b))
+            , _is_compound(true)
+    { }
+
+    composite()
+            : _bytes()
+            , _is_compound(true)
+    { }
+
+    using size_type = uint16_t;
+    using eoc_type = int8_t;
+
+    /*
+     * The 'end-of-component' byte should always be 0 for actual column name.
+     * However, it can set to 1 for query bounds. This allows to query for the
+     * equivalent of 'give me the full range'. That is, if a slice query is:
+     *   start = <3><"foo".getBytes()><0>
+     *   end   = <3><"foo".getBytes()><1>
+     * then we'll return *all* the columns whose first component is "foo".
+     * If for a component, the 'end-of-component' is != 0, there should not be any
+     * following component. The end-of-component can also be -1 to allow
+     * non-inclusive query. For instance:
+     *   end = <3><"foo".getBytes()><-1>
+     * allows to query everything that is smaller than <3><"foo".getBytes()>, but
+     * not <3><"foo".getBytes()> itself.
+     */
+    enum class eoc : eoc_type {
+        start = -1,
+        none = 0,
+        end = 1
+    };
+
+    using component = std::pair<bytes, eoc>;
+    using component_view = std::pair<bytes_view, eoc>;
+private:
+    template<typename Value, typename = std::enable_if_t<!std::is_same<const data_value, std::decay_t<Value>>::value>>
+    static size_t size(const Value& val) {
+        return val.size();
+    }
+    static size_t size(const data_value& val) {
+        return val.serialized_size();
+    }
+    template<typename Value, typename = std::enable_if_t<!std::is_same<data_value, std::decay_t<Value>>::value>>
+    static void write_value(Value&& val, bytes::iterator& out) {
+        out = std::copy(val.begin(), val.end(), out);
+    }
+    static void write_value(const data_value& val, bytes::iterator& out) {
+        val.serialize(out);
+    }
+    template<typename RangeOfSerializedComponents>
+    static void serialize_value(RangeOfSerializedComponents&& values, bytes::iterator& out, bool is_compound) {
+        if (!is_compound) {
+            auto it = values.begin();
+            write_value(std::forward<decltype(*it)>(*it), out);
+            return;
+        }
+
+        for (auto&& val : values) {
+            write<size_type>(out, static_cast<size_type>(size(val)));
+            write_value(std::forward<decltype(val)>(val), out);
+            // Range tombstones are not keys. For collections, only frozen
+            // values can be keys. Therefore, for as long as it is safe to
+            // assume that this code will be used to create keys, it is safe
+            // to assume the trailing byte is always zero.
+            write<eoc_type>(out, eoc_type(eoc::none));
+        }
+    }
+    template <typename RangeOfSerializedComponents>
+    static size_t serialized_size(RangeOfSerializedComponents&& values, bool is_compound) {
+        size_t len = 0;
+        auto it = values.begin();
+        if (it != values.end()) {
+            // CQL3 uses a specific prefix (0xFFFF) to encode "static columns"
+            // (CASSANDRA-6561). This does mean the maximum size of the first component of a
+            // composite is 65534, not 65535 (or we wouldn't be able to detect if the first 2
+            // bytes is the static prefix or not).
+            auto value_size = size(*it);
+            if (value_size > static_cast<size_type>(std::numeric_limits<size_type>::max() - uint8_t(is_compound))) {
+                throw std::runtime_error(sprint("First component size too large: %d > %d", value_size, std::numeric_limits<size_type>::max() - is_compound));
+            }
+            if (!is_compound) {
+                return value_size;
+            }
+            len += sizeof(size_type) + value_size + sizeof(eoc_type);
+            ++it;
+        }
+        for ( ; it != values.end(); ++it) {
+            auto value_size = size(*it);
+            if (value_size > std::numeric_limits<size_type>::max()) {
+                throw std::runtime_error(sprint("Component size too large: %d > %d", value_size, std::numeric_limits<size_type>::max()));
+            }
+            len += sizeof(size_type) + value_size + sizeof(eoc_type);
+        }
+        return len;
+    }
+public:
+    template <typename Describer>
+    auto describe_type(Describer f) const {
+        return f(const_cast<bytes&>(_bytes));
+    }
+
+    // marker is ignored if !is_compound
+    template<typename RangeOfSerializedComponents>
+    static composite serialize_value(RangeOfSerializedComponents&& values, bool is_compound = true, eoc marker = eoc::none) {
+        auto size = serialized_size(values, is_compound);
+        bytes b(bytes::initialized_later(), size);
+        auto i = b.begin();
+        serialize_value(std::forward<decltype(values)>(values), i, is_compound);
+        if (is_compound && !b.empty()) {
+            b.back() = eoc_type(marker);
+        }
+        return composite(std::move(b), is_compound);
+    }
+
+    template<typename RangeOfSerializedComponents>
+    static composite serialize_static(const schema& s, RangeOfSerializedComponents&& values) {
+        // FIXME: Optimize
+        auto b = bytes(size_t(2), bytes::value_type(0xff));
+        std::vector<bytes_view> sv(s.clustering_key_size());
+        b += composite::serialize_value(boost::range::join(sv, std::forward<RangeOfSerializedComponents>(values)), true).release_bytes();
+        return composite(std::move(b));
+    }
+
+    static eoc to_eoc(int8_t eoc_byte) {
+        return eoc_byte == 0 ? eoc::none : (eoc_byte < 0 ? eoc::start : eoc::end);
+    }
+
+    class iterator : public std::iterator<std::input_iterator_tag, const component_view> {
+        bytes_view _v;
+        component_view _current;
+    private:
+        void read_current() {
+            size_type len;
+            {
+                if (_v.empty()) {
+                    _v = bytes_view(nullptr, 0);
+                    return;
+                }
+                len = read_simple<size_type>(_v);
+                if (_v.size() < len) {
+                    throw marshal_exception();
+                }
+            }
+            auto value = bytes_view(_v.begin(), len);
+            _v.remove_prefix(len);
+            _current = component_view(std::move(value), to_eoc(read_simple<eoc_type>(_v)));
+        }
+    public:
+        struct end_iterator_tag {};
+
+        iterator(const bytes_view& v, bool is_compound, bool is_static)
+                : _v(v) {
+            if (is_static) {
+                _v.remove_prefix(2);
+            }
+            if (is_compound) {
+                read_current();
+            } else {
+                _current = component_view(_v, eoc::none);
+                _v.remove_prefix(_v.size());
+            }
+        }
+
+        iterator(end_iterator_tag) : _v(nullptr, 0) {}
+
+        iterator& operator++() {
+            read_current();
+            return *this;
+        }
+
+        iterator operator++(int) {
+            iterator i(*this);
+            ++(*this);
+            return i;
+        }
+
+        const value_type& operator*() const { return _current; }
+        const value_type* operator->() const { return &_current; }
+        bool operator!=(const iterator& i) const { return _v.begin() != i._v.begin(); }
+        bool operator==(const iterator& i) const { return _v.begin() == i._v.begin(); }
+    };
+
+    iterator begin() const {
+        return iterator(_bytes, _is_compound, is_static());
+    }
+
+    iterator end() const {
+        return iterator(iterator::end_iterator_tag());
+    }
+
+    boost::iterator_range<iterator> components() const & {
+        return { begin(), end() };
+    }
+
+    auto values() const & {
+        return components() | boost::adaptors::transformed([](auto&& c) { return c.first; });
+    }
+
+    std::vector<component> components() const && {
+        std::vector<component> result;
+        std::transform(begin(), end(), std::back_inserter(result), [](auto&& p) {
+            return component(bytes(p.first.begin(), p.first.end()), p.second);
+        });
+        return result;
+    }
+
+    std::vector<bytes> values() const && {
+        std::vector<bytes> result;
+        boost::copy(components() | boost::adaptors::transformed([](auto&& c) { return to_bytes(c.first); }), std::back_inserter(result));
+        return result;
+    }
+
+    const bytes& get_bytes() const {
+        return _bytes;
+    }
+
+    bytes release_bytes() && {
+        return std::move(_bytes);
+    }
+
+    size_t size() const {
+        return _bytes.size();
+    }
+
+    bool empty() const {
+        return _bytes.empty();
+    }
+
+    static bool is_static(bytes_view bytes, bool is_compound) {
+        return is_compound && bytes.size() > 2 && (bytes[0] & bytes[1] & 0xff) == 0xff;
+    }
+
+    bool is_static() const {
+        return is_static(_bytes, _is_compound);
+    }
+
+    bool is_compound() const {
+        return _is_compound;
+    }
+
+    template <typename ClusteringElement>
+    static composite from_clustering_element(const schema& s, const ClusteringElement& ce) {
+        return serialize_value(ce.components(s), s.is_compound());
+    }
+
+    static composite from_exploded(const std::vector<bytes_view>& v, bool is_compound, eoc marker = eoc::none) {
+        if (v.size() == 0) {
+            return composite(bytes(size_t(1), bytes::value_type(marker)), is_compound);
+        }
+        return serialize_value(v, is_compound, marker);
+    }
+
+    static composite static_prefix(const schema& s) {
+        return serialize_static(s, std::vector<bytes_view>());
+    }
+
+    explicit operator bytes_view() const {
+        return _bytes;
+    }
+
+    template <typename Component>
+    friend inline std::ostream& operator<<(std::ostream& os, const std::pair<Component, eoc>& c) {
+        return os << "{value=" << c.first << "; eoc=" << sprint("0x%02x", eoc_type(c.second) & 0xff) << "}";
+    }
+
+    friend std::ostream& operator<<(std::ostream& os, const composite& v);
+
+    struct tri_compare {
+        const std::vector<data_type>& _types;
+        tri_compare(const std::vector<data_type>& types) : _types(types) {}
+        int operator()(const composite&, const composite&) const;
+        int operator()(composite_view, composite_view) const;
+    };
+};
+
+class composite_view final {
+    bytes_view _bytes;
+    bool _is_compound;
+public:
+    composite_view(bytes_view b, bool is_compound = true)
+            : _bytes(b)
+            , _is_compound(is_compound)
+    { }
+
+    composite_view(const composite& c)
+            : composite_view(static_cast<bytes_view>(c), c.is_compound())
+    { }
+
+    composite_view()
+            : _bytes(nullptr, 0)
+            , _is_compound(true)
+    { }
+
+    std::vector<bytes_view> explode() const {
+        if (!_is_compound) {
+            return { _bytes };
+        }
+
+        std::vector<bytes_view> ret;
+        ret.reserve(8);
+        for (auto it = begin(), e = end(); it != e; ) {
+            ret.push_back(it->first);
+            auto marker = it->second;
+            ++it;
+            if (it != e && marker != composite::eoc::none) {
+                throw runtime_exception(sprint("non-zero component divider found (%d) mid", sprint("0x%02x", composite::eoc_type(marker) & 0xff)));
+            }
+        }
+        return ret;
+    }
+
+    composite::iterator begin() const {
+        return composite::iterator(_bytes, _is_compound, is_static());
+    }
+
+    composite::iterator end() const {
+        return composite::iterator(composite::iterator::end_iterator_tag());
+    }
+
+    boost::iterator_range<composite::iterator> components() const {
+        return { begin(), end() };
+    }
+
+    composite::eoc last_eoc() const {
+        if (!_is_compound || _bytes.empty()) {
+            return composite::eoc::none;
+        }
+        bytes_view v(_bytes);
+        v.remove_prefix(v.size() - 1);
+        return composite::to_eoc(read_simple<composite::eoc_type>(v));
+    }
+
+    auto values() const {
+        return components() | boost::adaptors::transformed([](auto&& c) { return c.first; });
+    }
+
+    size_t size() const {
+        return _bytes.size();
+    }
+
+    bool empty() const {
+        return _bytes.empty();
+    }
+
+    bool is_static() const {
+        return composite::is_static(_bytes, _is_compound);
+    }
+
+    explicit operator bytes_view() const {
+        return _bytes;
+    }
+
+    bool operator==(const composite_view& k) const { return k._bytes == _bytes && k._is_compound == _is_compound; }
+    bool operator!=(const composite_view& k) const { return !(k == *this); }
+
+    friend inline std::ostream& operator<<(std::ostream& os, composite_view v) {
+        return os << "{" << ::join(", ", v.components()) << ", compound=" << v._is_compound << ", static=" << v.is_static() << "}";
+    }
+};
+
+inline
+std::ostream& operator<<(std::ostream& os, const composite& v) {
+    return os << composite_view(v);
+}
+
+inline
+int composite::tri_compare::operator()(const composite& v1, const composite& v2) const {
+    return (*this)(composite_view(v1), composite_view(v2));
+}
+
+inline
+int composite::tri_compare::operator()(composite_view v1, composite_view v2) const {
+    // See org.apache.cassandra.db.composites.AbstractCType#compare
+    if (v1.empty()) {
+        return v2.empty() ? 0 : -1;
+    }
+    if (v2.empty()) {
+        return 1;
+    }
+    if (v1.is_static() != v2.is_static()) {
+        return v1.is_static() ? -1 : 1;
+    }
+    auto a_values = v1.components();
+    auto b_values = v2.components();
+    auto cmp = [&](const data_type& t, component_view c1, component_view c2) {
+        // First by value, then by EOC
+        auto r = t->compare(c1.first, c2.first);
+        if (r) {
+            return r;
+        }
+        return static_cast<int>(c1.second) - static_cast<int>(c2.second);
+    };
+    return lexicographical_tri_compare(_types.begin(), _types.end(),
+        a_values.begin(), a_values.end(),
+        b_values.begin(), b_values.end(),
+        cmp);
+}
--- a/compress.hh
+++ b/compress.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2015 Cloudius Systems, Ltd.
+ * Copyright (C) 2015 ScyllaDB
 */

 /*
@@ -32,24 +32,24 @@ enum class compressor {

 class compression_parameters {
 public:
-    static constexpr int32_t DEFAULT_CHUNK_LENGTH = 64 * 1024;
+    static constexpr int32_t DEFAULT_CHUNK_LENGTH = 4 * 1024;
    static constexpr double DEFAULT_CRC_CHECK_CHANCE = 1.0;

    static constexpr auto SSTABLE_COMPRESSION = "sstable_compression";
    static constexpr auto CHUNK_LENGTH_KB = "chunk_length_kb";
    static constexpr auto CRC_CHECK_CHANCE = "crc_check_chance";
 private:
-    compressor _compressor = compressor::none;
+    compressor _compressor;
    std::experimental::optional<int> _chunk_length;
    std::experimental::optional<double> _crc_check_chance;
 public:
-    compression_parameters() = default;
-    compression_parameters(compressor c) : _compressor(c) { }
+    compression_parameters(compressor c = compressor::lz4) : _compressor(c) { }
    compression_parameters(const std::map<sstring, sstring>& options) {
        validate_options(options);

        auto it = options.find(SSTABLE_COMPRESSION);
        if (it == options.end() || it->second.empty()) {
+            _compressor = compressor::none;
            return;
        }
        const auto& compressor_class = it->second;
--- a/conf/housekeeping.cfg
+++ b/conf/housekeeping.cfg
@@ -0,0 +1,2 @@
+[housekeeping]
+check-version: True
--- a/conf/scylla.yaml
+++ b/conf/scylla.yaml
@@ -89,6 +89,15 @@ listen_address: localhost
 # For security reasons, you should not expose this port to the internet.  Firewall it if needed.
 native_transport_port: 9042

+# Enabling native transport encryption in client_encryption_options allows you to either use
+# encryption for the standard port or to use a dedicated, additional port along with the unencrypted
+# standard native_transport_port.
+# Enabling client encryption and keeping native_transport_port_ssl disabled will use encryption
+# for native_transport_port. Setting native_transport_port_ssl to a different value
+# from native_transport_port will use encryption for native_transport_port_ssl while
+# keeping native_transport_port unencrypted.
+#native_transport_port_ssl: 9142
+
 # Throttles all outbound streaming file transfers on this node to the
 # given total throughput in Mbps. This is necessary because Scylla does
 # mostly sequential IO when streaming data during bootstrap or repair, which
@@ -106,6 +115,19 @@ write_request_timeout_in_ms: 2000
 # most users should never need to adjust this.
 # phi_convict_threshold: 8

+# IEndpointSnitch.  The snitch has two functions:
+# - it teaches Scylla enough about your network topology to route
+#   requests efficiently
+# - it allows Scylla to spread replicas around your cluster to avoid
+#   correlated failures. It does this by grouping machines into
+#   "datacenters" and "racks."  Scylla will do its best not to have
+#   more than one replica on the same "rack" (which may not actually
+#   be a physical location)
+#
+# IF YOU CHANGE THE SNITCH AFTER DATA IS INSERTED INTO THE CLUSTER,
+# YOU MUST RUN A FULL REPAIR, SINCE THE SNITCH AFFECTS WHERE REPLICAS
+# ARE PLACED.
+#
 # Out of the box, Scylla provides
 #  - SimpleSnitch:
 #    Treats Strategy order as proximity. This can improve cache
@@ -179,10 +201,27 @@ api_address: 127.0.0.1
 # Caution should be taken on increasing the size of this threshold as it can lead to node instability.
 batch_size_warn_threshold_in_kb: 5

+# Fail any multiple-partition batch exceeding this value. 50kb (10x warn threshold) by default.
+batch_size_fail_threshold_in_kb: 50

-###################################################
-## Not currently supported, reserved for future use
-###################################################
+# Authentication backend, identifying users
+# Out of the box, Scylla provides org.apache.cassandra.auth.{AllowAllAuthenticator,
+# PasswordAuthenticator}.
+#
+# - AllowAllAuthenticator performs no checks - set it to disable authentication.
+# - PasswordAuthenticator relies on username/password pairs to authenticate
+#   users. It keeps usernames and hashed passwords in system_auth.credentials table.
+#   Please increase system_auth keyspace replication factor if you use this authenticator.
+# authenticator: AllowAllAuthenticator
+
+# Authorization backend, implementing IAuthorizer; used to limit access/provide permissions
+# Out of the box, Scylla provides org.apache.cassandra.auth.{AllowAllAuthorizer,
+# CassandraAuthorizer}.
+#
+# - AllowAllAuthorizer allows any action to any user - set it to disable authorization.
+# - CassandraAuthorizer stores permissions in system_auth.permissions table. Please
+#   increase system_auth keyspace replication factor if you use this authorizer.
+# authorizer: AllowAllAuthorizer

 # initial_token allows you to specify tokens manually.  While you can use # it with
 # vnodes (num_tokens > 1, above) -- in which case you should provide a 
@@ -190,6 +229,19 @@ batch_size_warn_threshold_in_kb: 5
 # that do not have vnodes enabled.
 # initial_token:

+# RPC address to broadcast to drivers and other Scylla nodes. This cannot
+# be set to 0.0.0.0. If left blank, this will be set to the value of
+# rpc_address. If rpc_address is set to 0.0.0.0, broadcast_rpc_address must
+# be set.
+# broadcast_rpc_address: 1.2.3.4
+
+# Uncomment to enable experimental features
+# experimental: true
+
+###################################################
+## Not currently supported, reserved for future use
+###################################################
+
 # See http://wiki.apache.org/cassandra/HintedHandoff
 # May either be "true" or "false" to enable globally, or contain a list
 # of data centers to enable per-datacenter.
@@ -216,25 +268,6 @@ batch_size_warn_threshold_in_kb: 5
 # reduced proportionally to the number of nodes in the cluster.
 # batchlog_replay_throttle_in_kb: 1024

-# Authentication backend, identifying users
-# Out of the box, Scylla provides org.apache.cassandra.auth.{AllowAllAuthenticator,
-# PasswordAuthenticator}.
-#
-# - AllowAllAuthenticator performs no checks - set it to disable authentication.
-# - PasswordAuthenticator relies on username/password pairs to authenticate
-#   users. It keeps usernames and hashed passwords in system_auth.credentials table.
-#   Please increase system_auth keyspace replication factor if you use this authenticator.
-# authenticator: AllowAllAuthenticator
-
-# Authorization backend, implementing IAuthorizer; used to limit access/provide permissions
-# Out of the box, Scylla provides org.apache.cassandra.auth.{AllowAllAuthorizer,
-# CassandraAuthorizer}.
-#
-# - AllowAllAuthorizer allows any action to any user - set it to disable authorization.
-# - CassandraAuthorizer stores permissions in system_auth.permissions table. Please
-#   increase system_auth keyspace replication factor if you use this authorizer.
-# authorizer: AllowAllAuthorizer
-
 # Validity period for permissions cache (fetching permissions can be an
 # expensive operation depending on the authorizer, CassandraAuthorizer is
 # one example). Defaults to 2000, set to 0 to disable.
@@ -261,28 +294,6 @@ batch_size_warn_threshold_in_kb: 5
 #
 partitioner: org.apache.cassandra.dht.Murmur3Partitioner

-
-# policy for data disk failures:
-# die: shut down gossip and Thrift and kill the JVM for any fs errors or
-#      single-sstable errors, so the node can be replaced.
-# stop_paranoid: shut down gossip and Thrift even for single-sstable errors.
-# stop: shut down gossip and Thrift, leaving the node effectively dead, but
-#       can still be inspected via JMX.
-# best_effort: stop using the failed disk and respond to requests based on
-#              remaining available sstables.  This means you WILL see obsolete
-#              data at CL.ONE!
-# ignore: ignore fatal errors and let requests fail, as in pre-1.2 Scylla
-# disk_failure_policy: stop
-
-# policy for commit disk failures:
-# die: shut down gossip and Thrift and kill the JVM, so the node can be replaced.
-# stop: shut down gossip and Thrift, leaving the node effectively dead, but
-#       can still be inspected via JMX.
-# stop_commit: shutdown the commit log, letting writes collect but
-#              continuing to service reads, as in pre-2.0.5 Scylla
-# ignore: ignore fatal errors and let the batches fail
-# commit_failure_policy: stop
-
 # Maximum size of the key cache in memory.
 #
 # Each key cache hit saves 1 seek and each row cache hit saves 2 seeks at the
@@ -397,29 +408,6 @@ partitioner: org.apache.cassandra.dht.Murmur3Partitioner
 # the smaller of 1/4 of heap or 512MB.
 # file_cache_size_in_mb: 512

-# Total permitted memory to use for memtables. Scylla will stop 
-# accepting writes when the limit is exceeded until a flush completes,
-# and will trigger a flush based on memtable_cleanup_threshold
-# If omitted, Scylla will set both to 1/4 the size of the heap.
-# memtable_heap_space_in_mb: 2048
-# memtable_offheap_space_in_mb: 2048
-
-# Ratio of occupied non-flushing memtable size to total permitted size
-# that will trigger a flush of the largest memtable.  Lager mct will
-# mean larger flushes and hence less compaction, but also less concurrent
-# flush activity which can make it difficult to keep your disks fed
-# under heavy write load.
-#
-# memtable_cleanup_threshold defaults to 1 / (memtable_flush_writers + 1)
-# memtable_cleanup_threshold: 0.11
-
-# Specify the way Scylla allocates and manages memtable memory.
-# Options are:
-#   heap_buffers:    on heap nio buffers
-#   offheap_buffers: off heap (direct) nio buffers
-#   offheap_objects: native memory, eliminating nio buffer heap overhead
-# memtable_allocation_type: heap_buffers
-
 # Total space to use for commitlogs.
 #
 # If space gets above this value (it will round up to the next nearest
@@ -431,17 +419,6 @@ partitioner: org.apache.cassandra.dht.Murmur3Partitioner
 # available for Scylla.
 commitlog_total_space_in_mb: -1

-# This sets the amount of memtable flush writer threads.  These will
-# be blocked by disk io, and each one will hold a memtable in memory
-# while blocked. 
-#
-# memtable_flush_writers defaults to the smaller of (number of disks,
-# number of cores), with a minimum of 2 and a maximum of 8.
-# 
-# If your data directories are backed by SSD, you should increase this
-# to the number of cores.
-#memtable_flush_writers: 8
-
 # A fixed memory pool size in MB for for SSTable index summaries. If left
 # empty, this will default to 5% of the heap size. If the memory usage of
 # all index summaries exceeds this limit, SSTables with low read rates will
@@ -506,13 +483,6 @@ commitlog_total_space_in_mb: -1
 # Whether to start the thrift rpc server.
 # start_rpc: true

-
-# RPC address to broadcast to drivers and other Scylla nodes. This cannot
-# be set to 0.0.0.0. If left blank, this will be set to the value of
-# rpc_address. If rpc_address is set to 0.0.0.0, broadcast_rpc_address must
-# be set.
-# broadcast_rpc_address: 1.2.3.4
-
 # enable or disable keepalive on rpc/native connections
 # rpc_keepalive: true

@@ -680,58 +650,6 @@ commitlog_total_space_in_mb: -1
 # Default value is 0, which never timeout streams.
 # streaming_socket_timeout_in_ms: 0

-
-# endpoint_snitch -- Set this to a class that implements
-# IEndpointSnitch.  The snitch has two functions:
-# - it teaches Scylla enough about your network topology to route
-#   requests efficiently
-# - it allows Scylla to spread replicas around your cluster to avoid
-#   correlated failures. It does this by grouping machines into
-#   "datacenters" and "racks."  Scylla will do its best not to have
-#   more than one replica on the same "rack" (which may not actually
-#   be a physical location)
-#
-# IF YOU CHANGE THE SNITCH AFTER DATA IS INSERTED INTO THE CLUSTER,
-# YOU MUST RUN A FULL REPAIR, SINCE THE SNITCH AFFECTS WHERE REPLICAS
-# ARE PLACED.
-#
-# Out of the box, Scylla provides
-#  - SimpleSnitch:
-#    Treats Strategy order as proximity. This can improve cache
-#    locality when disabling read repair.  Only appropriate for
-#    single-datacenter deployments.
-#  - GossipingPropertyFileSnitch
-#    This should be your go-to snitch for production use.  The rack
-#    and datacenter for the local node are defined in
-#    cassandra-rackdc.properties and propagated to other nodes via
-#    gossip.  If cassandra-topology.properties exists, it is used as a
-#    fallback, allowing migration from the PropertyFileSnitch.
-#  - PropertyFileSnitch:
-#    Proximity is determined by rack and data center, which are
-#    explicitly configured in cassandra-topology.properties.
-#  - Ec2Snitch:
-#    Appropriate for EC2 deployments in a single Region. Loads Region
-#    and Availability Zone information from the EC2 API. The Region is
-#    treated as the datacenter, and the Availability Zone as the rack.
-#    Only private IPs are used, so this will not work across multiple
-#    Regions.
-#  - Ec2MultiRegionSnitch:
-#    Uses public IPs as broadcast_address to allow cross-region
-#    connectivity.  (Thus, you should set seed addresses to the public
-#    IP as well.) You will need to open the storage_port or
-#    ssl_storage_port on the public IP firewall.  (For intra-Region
-#    traffic, Scylla will switch to the private IP after
-#    establishing a connection.)
-#  - RackInferringSnitch:
-#    Proximity is determined by rack and data center, which are
-#    assumed to correspond to the 3rd and 2nd octet of each node's IP
-#    address, respectively.  Unless this happens to match your
-#    deployment conventions, this is best used as an example of
-#    writing a custom Snitch class and is provided in that spirit.
-#
-# You can use a custom Snitch by setting this to the full class name
-# of the snitch, which will be assumed to be on your classpath.
-
 # controls how often to perform the more expensive part of host score
 # calculation
 # dynamic_snitch_update_interval_in_ms: 100 
@@ -802,29 +720,24 @@ commitlog_total_space_in_mb: -1
 #    certificate: conf/scylla.crt
 #    keyfile: conf/scylla.key
 #    truststore: <none, use system trust>
+#    require_client_auth: False
+#    priority_string: <none, use default>

 # enable or disable client/server encryption.
 # client_encryption_options:
 #    enabled: false
 #    certificate: conf/scylla.crt
 #    keyfile: conf/scylla.key
-
-    # require_client_auth: false
-    # Set trustore and truststore_password if require_client_auth is true
-    # truststore: conf/.truststore
-    # truststore_password: cassandra
-    # More advanced defaults below:
-    # protocol: TLS
-    # algorithm: SunX509
-    # store_type: JKS
-    # cipher_suites: [TLS_RSA_WITH_AES_128_CBC_SHA,TLS_RSA_WITH_AES_256_CBC_SHA,TLS_DHE_RSA_WITH_AES_128_CBC_SHA,TLS_DHE_RSA_WITH_AES_256_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA]
+#    truststore: <none, use system trust>
+#    require_client_auth: False
+#    priority_string: <none, use default>

 # internode_compression controls whether traffic between nodes is
 # compressed.
 # can be:  all  - all traffic is compressed
 #          dc   - traffic between different datacenters is compressed
 #          none - nothing is compressed.
-# internode_compression: all
+# internode_compression: none

 # Enable or disable tcp_nodelay for inter-dc communication.
 # Disabling it will result in larger (but fewer) network packets being sent,
@@ -845,3 +758,41 @@ commitlog_total_space_in_mb: -1
 # true: relaxed environment checks; performance and reliability may degraade.
 #
 # developer_mode: false
+
+
+# Idle-time background processing
+#
+# Scylla can perform certain jobs in the background while the system is otherwise idle,
+# freeing processor resources when there is other work to be done.
+#
+# defragment_memory_on_idle: true
+#
+# prometheus port
+# By default, Scylla opens prometheus API port on port 9180
+# setting the port to 0 will disable the prometheus API.
+# prometheus_port: 9180
+#
+# prometheus address
+# By default, Scylla binds all interfaces to the prometheus API
+# It is possible to restrict the listening address to a specific one
+# prometheus_address: 0.0.0.0
+
+# Distribution of data among cores (shards) within a node
+#
+# Scylla distributes data within a node among shards, using a round-robin
+# strategy:
+#  [shard0] [shard1] ... [shardN-1] [shard0] [shard1] ... [shardN-1] ...
+#
+# Scylla versions 1.6 and below used just one repetition of the pattern;
+# this intefered with data placement among nodes (vnodes).
+#
+# Scylla versions 1.7 and above use 4096 repetitions of the pattern; this
+# provides for better data distribution.
+#
+# the value below is log (base 2) of the number of repetitions.
+#
+# Set to 0 to avoid rewriting all data when upgrading from Scylla 1.6 and
+# below.
+#
+# Keep at 12 for new clusters.
+murmur3_partitioner_ignore_msb_bits: 12
--- a/configure.py
+++ b/configure.py
@@ -1,6 +1,6 @@
 #!/usr/bin/python3
 #
-# Copyright 2015 Cloudius Systems
+# Copyright (C) 2015 ScyllaDB
 #

 #
@@ -34,7 +34,7 @@ for line in open('/etc/os-release'):
        os_ids += value.split(' ')

 # distribution "internationalization", converting package names.
-# Fedora name is key, values is distro -> package name dict. 
+# Fedora name is key, values is distro -> package name dict.
 i18n_xlat = {
    'boost-devel': {
        'debian': 'libboost-dev',
@@ -48,7 +48,7 @@ def pkgname(name):
        for id in os_ids:
            if id in dict:
                return dict[id]
-    return name 
+    return name

 def get_flags():
    with open('/proc/cpuinfo') as f:
@@ -93,7 +93,7 @@ def try_compile(compiler, source = '', flags = []):
 def warning_supported(warning, compiler):
    # gcc ignores -Wno-x even if it is not supported
    adjusted = re.sub('^-Wno-', '-W', warning)
-    return try_compile(flags = [adjusted], compiler = compiler)
+    return try_compile(flags = ['-Werror', adjusted], compiler = compiler)

 def debug_flag(compiler):
    src_with_auto = textwrap.dedent('''\
@@ -108,6 +108,11 @@ def debug_flag(compiler):
        print('Note: debug information disabled; upgrade your compiler')
        return ''

+def maybe_static(flag, libs):
+    if flag and not args.static:
+        libs = '-Wl,-Bstatic {} -Wl,-Bdynamic'.format(libs)
+    return libs
+
 class Thrift(object):
    def __init__(self, source, service):
        self.source = source
@@ -162,12 +167,16 @@ modes = {

 scylla_tests = [
    'tests/mutation_test',
+    'tests/streamed_mutation_test',
+    'tests/schema_registry_test',
    'tests/canonical_mutation_test',
    'tests/range_test',
    'tests/types_test',
    'tests/keys_test',
    'tests/partitioner_test',
    'tests/frozen_mutation_test',
+    'tests/serialized_action_test',
+    'tests/clustering_ranges_walker_test',
    'tests/perf/perf_mutation',
    'tests/lsa_async_eviction_test',
    'tests/lsa_sync_eviction_test',
@@ -176,18 +185,21 @@ scylla_tests = [
    'tests/perf/perf_hash',
    'tests/perf/perf_cql_parser',
    'tests/perf/perf_simple_query',
+    'tests/perf/perf_fast_forward',
+    'tests/cache_streamed_mutation_test',
+    'tests/row_cache_stress_test',
    'tests/memory_footprint',
    'tests/perf/perf_sstable',
    'tests/cql_query_test',
    'tests/storage_proxy_test',
    'tests/schema_change_test',
    'tests/mutation_reader_test',
-    'tests/key_reader_test',
    'tests/mutation_query_test',
    'tests/row_cache_test',
    'tests/test-serialization',
    'tests/sstable_test',
    'tests/sstable_mutation_test',
+    'tests/sstable_resharding_test',
    'tests/memtable_test',
    'tests/commitlog_test',
    'tests/cartesian_product_test',
@@ -209,12 +221,24 @@ scylla_tests = [
    'tests/murmur_hash_test',
    'tests/allocation_strategy_test',
    'tests/logalloc_test',
+    'tests/log_histogram_test',
    'tests/managed_vector_test',
    'tests/crc_test',
    'tests/flush_queue_test',
    'tests/dynamic_bitset_test',
    'tests/auth_test',
    'tests/idl_test',
+    'tests/range_tombstone_list_test',
+    'tests/anchorless_list_test',
+    'tests/database_test',
+    'tests/nonwrapping_range_test',
+    'tests/input_stream_test',
+    'tests/sstable_atomic_deletion_test',
+    'tests/virtual_reader_test',
+    'tests/view_schema_test',
+    'tests/counter_test',
+    'tests/cell_locker_test',
+    'tests/loading_cache_test',
 ]

 apps = [
@@ -245,6 +269,8 @@ arg_parser.add_argument('--ldflags', action = 'store', dest = 'user_ldflags', de
                        help = 'Extra flags for the linker')
 arg_parser.add_argument('--compiler', action = 'store', dest = 'cxx', default = 'g++',
                        help = 'C++ compiler path')
+arg_parser.add_argument('--c-compiler', action='store', dest='cc', default='gcc',
+                        help='C compiler path')
 arg_parser.add_argument('--with-osv', action = 'store', dest = 'with_osv', default = '',
                        help = 'Shortcut for compile for OSv')
 arg_parser.add_argument('--enable-dpdk', action = 'store_true', dest = 'dpdk', default = False,
@@ -255,12 +281,20 @@ arg_parser.add_argument('--debuginfo', action = 'store', dest = 'debuginfo', typ
                        help = 'Enable(1)/disable(0)compiler debug information generation')
 arg_parser.add_argument('--static-stdc++', dest = 'staticcxx', action = 'store_true',
 			help = 'Link libgcc and libstdc++ statically')
+arg_parser.add_argument('--static-thrift', dest = 'staticthrift', action = 'store_true',
+            help = 'Link libthrift statically')
+arg_parser.add_argument('--static-boost', dest = 'staticboost', action = 'store_true',
+            help = 'Link boost statically')
 arg_parser.add_argument('--tests-debuginfo', action = 'store', dest = 'tests_debuginfo', type = int, default = 0,
                        help = 'Enable(1)/disable(0)compiler debug information generation for tests')
 arg_parser.add_argument('--python', action = 'store', dest = 'python', default = 'python3',
                        help = 'Python3 path')
 add_tristate(arg_parser, name = 'hwloc', dest = 'hwloc', help = 'hwloc support')
 add_tristate(arg_parser, name = 'xen', dest = 'xen', help = 'Xen support')
+arg_parser.add_argument('--enable-gcc6-concepts', dest='gcc6_concepts', action='store_true', default=False,
+                        help='enable experimental support for C++ Concepts as implemented in GCC 6')
+arg_parser.add_argument('--enable-alloc-failure-injector', dest='alloc_failure_injector', action='store_true', default=False,
+                        help='enable allocation failure injection')
 args = arg_parser.parse_args()

 defines = []
@@ -275,12 +309,15 @@ scylla_core = (['database.cc',
                 'schema_registry.cc',
                 'bytes.cc',
                 'mutation.cc',
+                 'streamed_mutation.cc',
+                 'partition_version.cc',
                 'row_cache.cc',
                 'canonical_mutation.cc',
                 'frozen_mutation.cc',
                 'memtable.cc',
                 'schema_mutations.cc',
                 'release.cc',
+                 'supervisor.cc',
                 'utils/logalloc.cc',
                 'utils/large_bitset.cc',
                 'mutation_partition.cc',
@@ -288,17 +325,17 @@ scylla_core = (['database.cc',
                 'mutation_partition_serializer.cc',
                 'mutation_reader.cc',
                 'mutation_query.cc',
-                 'key_reader.cc',
                 'keys.cc',
+                 'counters.cc',
                 'sstables/sstables.cc',
                 'sstables/compress.cc',
                 'sstables/row.cc',
-                 'sstables/key.cc',
                 'sstables/partition.cc',
                 'sstables/filter.cc',
                 'sstables/compaction.cc',
+                 'sstables/compaction_strategy.cc',
                 'sstables/compaction_manager.cc',
-                 'log.cc',
+                 'sstables/atomic_deletion.cc',
                 'transport/event.cc',
                 'transport/event_notifier.cc',
                 'transport/server.cc',
@@ -315,11 +352,17 @@ scylla_core = (['database.cc',
                 'cql3/functions/functions.cc',
                 'cql3/statements/cf_prop_defs.cc',
                 'cql3/statements/cf_statement.cc',
+                 'cql3/statements/authentication_statement.cc',
                 'cql3/statements/create_keyspace_statement.cc',
                 'cql3/statements/create_table_statement.cc',
+                 'cql3/statements/create_view_statement.cc',
                 'cql3/statements/create_type_statement.cc',
+                 'cql3/statements/create_user_statement.cc',
+                 'cql3/statements/drop_index_statement.cc',
                 'cql3/statements/drop_keyspace_statement.cc',
                 'cql3/statements/drop_table_statement.cc',
+                 'cql3/statements/drop_view_statement.cc',
+                 'cql3/statements/drop_type_statement.cc',
                 'cql3/statements/schema_altering_statement.cc',
                 'cql3/statements/ks_prop_defs.cc',
                 'cql3/statements/modification_statement.cc',
@@ -335,8 +378,20 @@ scylla_core = (['database.cc',
                 'cql3/statements/create_index_statement.cc',
                 'cql3/statements/truncate_statement.cc',
                 'cql3/statements/alter_table_statement.cc',
+                 'cql3/statements/alter_view_statement.cc',
+                 'cql3/statements/alter_user_statement.cc',
+                 'cql3/statements/drop_user_statement.cc',
+                 'cql3/statements/list_users_statement.cc',
+                 'cql3/statements/authorization_statement.cc',
+                 'cql3/statements/permission_altering_statement.cc',
+                 'cql3/statements/list_permissions_statement.cc',
+                 'cql3/statements/grant_statement.cc',
+                 'cql3/statements/revoke_statement.cc',
+                 'cql3/statements/alter_type_statement.cc',
+                 'cql3/statements/alter_keyspace_statement.cc',
                 'cql3/update_parameters.cc',
                 'cql3/ut_name.cc',
+                 'cql3/user_options.cc',
                 'thrift/handler.cc',
                 'thrift/server.cc',
                 'thrift/thrift_validation.cc',
@@ -352,6 +407,7 @@ scylla_core = (['database.cc',
                 'cql3/operator.cc',
                 'cql3/relation.cc',
                 'cql3/column_identifier.cc',
+                 'cql3/column_specification.cc',
                 'cql3/constants.cc',
                 'cql3/query_processor.cc',
                 'cql3/query_options.cc',
@@ -367,16 +423,23 @@ scylla_core = (['database.cc',
                 'cql3/selection/selection.cc',
                 'cql3/selection/selector.cc',
                 'cql3/restrictions/statement_restrictions.cc',
+                 'cql3/result_set.cc',
+                 'cql3/variable_specifications.cc',
                 'db/consistency_level.cc',
                 'db/system_keyspace.cc',
                 'db/schema_tables.cc',
+                 'db/cql_type_parser.cc',
+                 'db/legacy_schema_migrator.cc',
                 'db/commitlog/commitlog.cc',
                 'db/commitlog/commitlog_replayer.cc',
                 'db/commitlog/commitlog_entry.cc',
                 'db/config.cc',
+                 'db/heat_load_balance.cc',
                 'db/index/secondary_index.cc',
                 'db/marshal/type_parser.cc',
                 'db/batchlog_manager.cc',
+                 'db/view/view.cc',
+                 'index/secondary_index_manager.cc',
                 'io/io.cc',
                 'utils/utils.cc',
                 'utils/UUID_gen.cc',
@@ -387,6 +450,7 @@ scylla_core = (['database.cc',
                 'utils/file_lock.cc',
                 'utils/dynamic_bitset.cc',
                 'utils/managed_bytes.cc',
+                 'utils/exceptions.cc',
                 'gms/version_generator.cc',
                 'gms/versioned_value.cc',
                 'gms/gossiper.cc',
@@ -396,9 +460,11 @@ scylla_core = (['database.cc',
                 'gms/gossip_digest_ack2.cc',
                 'gms/endpoint_state.cc',
                 'gms/application_state.cc',
+                 'gms/inet_address.cc',
                 'dht/i_partitioner.cc',
                 'dht/murmur3_partitioner.cc',
                 'dht/byte_ordered_partitioner.cc',
+                 'dht/random_partitioner.cc',
                 'dht/boot_strapper.cc',
                 'dht/range_streamer.cc',
                 'unimplemented.cc',
@@ -422,7 +488,7 @@ scylla_core = (['database.cc',
                 'service/client_state.cc',
                 'service/migration_task.cc',
                 'service/storage_service.cc',
-                 'service/load_broadcaster.cc',
+                 'service/misc_services.cc',
                 'service/pager/paging_state.cc',
                 'service/pager/query_pagers.cc',
                 'streaming/stream_task.cc',
@@ -438,18 +504,26 @@ scylla_core = (['database.cc',
                 'streaming/stream_manager.cc',
                 'streaming/stream_result_future.cc',
                 'streaming/stream_session_state.cc',
-                 'gc_clock.cc',
+                 'clocks-impl.cc',
                 'partition_slice_builder.cc',
                 'init.cc',
+                 'lister.cc',
                 'repair/repair.cc',
                 'exceptions/exceptions.cc',
-                 'dns.cc',
                 'auth/auth.cc',
                 'auth/authenticated_user.cc',
                 'auth/authenticator.cc',
+                 'auth/authorizer.cc',
+                 'auth/default_authorizer.cc',
                 'auth/data_resource.cc',
                 'auth/password_authenticator.cc',
                 'auth/permission.cc',
+                 'tracing/tracing.cc',
+                 'tracing/trace_keyspace_helper.cc',
+                 'tracing/trace_state.cc',
+                 'range_tombstone.cc',
+                 'range_tombstone_list.cc',
+                 'disk-error-handler.cc'
                 ]
                + [Antlr3Grammar('cql3/Cql.g')]
                + [Thrift('interface/cassandra.thrift', 'Cassandra')]
@@ -509,6 +583,9 @@ idls = ['idl/gossip_digest.idl.hh',
        'idl/query.idl.hh',
        'idl/idl_test.idl.hh',
        'idl/commitlog.idl.hh',
+        'idl/tracing.idl.hh',
+        'idl/consistency_level.idl.hh',
+        'idl/cache_temperature.idl.hh',
        ]

 scylla_tests_dependencies = scylla_core + api + idls + [
@@ -527,60 +604,80 @@ deps = {
    'scylla': idls + ['main.cc'] + scylla_core + api,
 }

-tests_not_using_seastar_test_framework = set([
-    'tests/keys_test',
+pure_boost_tests = set([
    'tests/partitioner_test',
    'tests/map_difference_test',
-    'tests/frozen_mutation_test',
-    'tests/canonical_mutation_test',
+    'tests/keys_test',
+    'tests/compound_test',
+    'tests/range_tombstone_list_test',
+    'tests/anchorless_list_test',
+    'tests/nonwrapping_range_test',
+    'tests/test-serialization',
+    'tests/range_test',
+    'tests/crc_test',
+    'tests/managed_vector_test',
+    'tests/dynamic_bitset_test',
+    'tests/idl_test',
+    'tests/cartesian_product_test',
+])
+
+tests_not_using_seastar_test_framework = set([
    'tests/perf/perf_mutation',
    'tests/lsa_async_eviction_test',
    'tests/lsa_sync_eviction_test',
    'tests/row_cache_alloc_stress',
    'tests/perf_row_cache_update',
-    'tests/cartesian_product_test',
    'tests/perf/perf_hash',
    'tests/perf/perf_cql_parser',
    'tests/message',
    'tests/perf/perf_simple_query',
+    'tests/perf/perf_fast_forward',
+    'tests/row_cache_stress_test',
    'tests/memory_footprint',
-    'tests/test-serialization',
    'tests/gossip',
-    'tests/compound_test',
-    'tests/range_test',
-    'tests/crc_test',
    'tests/perf/perf_sstable',
-    'tests/managed_vector_test',
-    'tests/dynamic_bitset_test',
-    'tests/idl_test',
-])
+]) | pure_boost_tests

 for t in tests_not_using_seastar_test_framework:
    if not t in scylla_tests:
        raise Exception("Test %s not found in scylla_tests" % (t))

 for t in scylla_tests:
-    deps[t] = scylla_tests_dependencies + [t + '.cc']
+    deps[t] = [t + '.cc']
    if t not in tests_not_using_seastar_test_framework:
+        deps[t] += scylla_tests_dependencies
        deps[t] += scylla_tests_seastar_deps
+    else:
+        deps[t] += scylla_core + api + idls + ['tests/cql_test_env.cc']

 deps['tests/sstable_test'] += ['tests/sstable_datafile_test.cc']

-deps['tests/bytes_ostream_test'] = ['tests/bytes_ostream_test.cc']
-deps['tests/UUID_test'] = ['utils/UUID_gen.cc', 'tests/UUID_test.cc']
+deps['tests/bytes_ostream_test'] = ['tests/bytes_ostream_test.cc', 'utils/managed_bytes.cc', 'utils/logalloc.cc', 'utils/dynamic_bitset.cc']
+deps['tests/input_stream_test'] = ['tests/input_stream_test.cc']
+deps['tests/UUID_test'] = ['utils/UUID_gen.cc', 'tests/UUID_test.cc', 'utils/uuid.cc', 'utils/managed_bytes.cc', 'utils/logalloc.cc', 'utils/dynamic_bitset.cc']
 deps['tests/murmur_hash_test'] = ['bytes.cc', 'utils/murmur_hash.cc', 'tests/murmur_hash_test.cc']
-deps['tests/allocation_strategy_test'] = ['tests/allocation_strategy_test.cc', 'utils/logalloc.cc', 'log.cc', 'utils/dynamic_bitset.cc']
+deps['tests/allocation_strategy_test'] = ['tests/allocation_strategy_test.cc', 'utils/logalloc.cc', 'utils/dynamic_bitset.cc']
+deps['tests/log_histogram_test'] = ['tests/log_histogram_test.cc']
+deps['tests/anchorless_list_test'] = ['tests/anchorless_list_test.cc']

 warnings = [
    '-Wno-mismatched-tags',  # clang-only
    '-Wno-maybe-uninitialized', # false positives on gcc 5
+    '-Wno-tautological-compare',
+    '-Wno-parentheses-equality',
+    '-Wno-c++11-narrowing',
+    '-Wno-c++1z-extensions',
+    '-Wno-sometimes-uninitialized',
+    '-Wno-return-stack-address',
+    '-Wno-missing-braces',
+    '-Wno-unused-lambda-capture',
    ]

 warnings = [w
            for w in warnings
            if warning_supported(warning = w, compiler = args.cxx)]

-warnings = ' '.join(warnings)
+warnings = ' '.join(warnings + ['-Wno-error=deprecated-declarations'])

 dbgflag = debug_flag(args.cxx) if args.debuginfo else ''
 tests_link_rule = 'link' if args.tests_debuginfo else 'link_stripped'
@@ -634,6 +731,9 @@ if not try_compile(compiler=args.cxx, source='''\
    print('Installed boost version too old.  Please update {}.'.format(pkgname("boost-devel")))
    sys.exit(1)

+
+has_sanitize_address_use_after_scope = try_compile(compiler=args.cxx, flags=['-fsanitize-address-use-after-scope'], source='int f() {}')
+
 defines = ' '.join(['-D' + d for d in defines])

 globals().update(vars(args))
@@ -656,7 +756,7 @@ scylla_release = file.read().strip()

 extra_cxxflags["release.cc"] = "-DSCYLLA_VERSION=\"\\\"" + scylla_version + "\\\"\" -DSCYLLA_RELEASE=\"\\\"" + scylla_release + "\\\"\""

-seastar_flags = ['--disable-xen']
+seastar_flags = []
 if args.dpdk:
    # fake dependencies on dpdk, so that it is built before anything else
    seastar_flags += ['--enable-dpdk']
@@ -664,9 +764,15 @@ elif args.dpdk_target:
    seastar_flags += ['--dpdk-target', args.dpdk_target]
 if args.staticcxx:
    seastar_flags += ['--static-stdc++']
+if args.staticboost:
+    seastar_flags += ['--static-boost']
+if args.gcc6_concepts:
+    seastar_flags += ['--enable-gcc6-concepts']
+if args.alloc_failure_injector:
+    seastar_flags += ['--enable-alloc-failure-injector']

 seastar_cflags = args.user_cflags + " -march=nehalem"
-seastar_flags += ['--compiler', args.cxx, '--cflags=%s' % (seastar_cflags)]
+seastar_flags += ['--compiler', args.cxx, '--c-compiler', args.cc, '--cflags=%s' % (seastar_cflags)]

 status = subprocess.call([python, './configure.py'] + seastar_flags, cwd = 'seastar')

@@ -697,7 +803,14 @@ for mode in build_modes:
 seastar_deps = 'practically_anything_can_change_so_lets_run_it_every_time_and_restat.'

 args.user_cflags += " " + pkg_config("--cflags", "jsoncpp")
-libs = "-lyaml-cpp -llz4 -lz -lsnappy " + pkg_config("--libs", "jsoncpp") + ' -lboost_filesystem' + ' -lcrypt' + ' -lboost_date_time'
+libs = ' '.join(['-lyaml-cpp', '-llz4', '-lz', '-lsnappy', pkg_config("--libs", "jsoncpp"),
+                 maybe_static(args.staticboost, '-lboost_filesystem'), ' -lcrypt',
+                 maybe_static(args.staticboost, '-lboost_date_time'),
+                ])
+
+if not args.staticboost:
+    args.user_cflags += ' -DBOOST_TEST_DYN_LINK'
+
 for pkg in pkgs:
    args.user_cflags += ' ' + pkg_config('--cflags', pkg)
    libs += ' ' + pkg_config('--libs', pkg)
@@ -705,6 +818,10 @@ user_cflags = args.user_cflags
 user_ldflags = args.user_ldflags
 if args.staticcxx:
    user_ldflags += " -static-libgcc -static-libstdc++"
+if args.staticthrift:
+    thrift_libs = "-Wl,-Bstatic -lthrift -Wl,-Bdynamic"
+else:
+    thrift_libs = "-lthrift"

 outdir = 'build'
 buildfile = 'build.ninja'
@@ -723,6 +840,8 @@ with open(buildfile, 'w') as f:
        libs = {libs}
        pool link_pool
            depth = {link_pool_depth}
+        pool seastar_pool
+            depth = 1
        rule ragel
            command = ragel -G2 -o $out $in
            description = RAGEL $out
@@ -748,7 +867,7 @@ with open(buildfile, 'w') as f:
        f.write(textwrap.dedent('''\
            cxxflags_{mode} = -I. -I $builddir/{mode}/gen -I seastar -I seastar/build/{mode}/gen
            rule cxx.{mode}
-              command = $cxx -MMD -MT $out -MF $out.d {seastar_cflags} $cxxflags $cxxflags_{mode} -c -o $out $in
+              command = $cxx -MD -MT $out -MF $out.d {seastar_cflags} $cxxflags $cxxflags_{mode} $obj_cxxflags -c -o $out $in
              description = CXX $out
              depfile = $out.d
            rule link.{mode}
@@ -766,7 +885,16 @@ with open(buildfile, 'w') as f:
                command = thrift -gen cpp:cob_style -out $builddir/{mode}/gen $in
                description = THRIFT $in
            rule antlr3.{mode}
-                command = sed -e '/^#if 0/,/^#endif/d' $in > $builddir/{mode}/gen/$in && antlr3 $builddir/{mode}/gen/$in && sed -i 's/^\\( *\)\\(ImplTraits::CommonTokenType\\* [a-zA-Z0-9_]* = NULL;\\)$$/\\1const \\2/' build/{mode}/gen/${{stem}}Parser.cpp
+                # We replace many local `ExceptionBaseType* ex` variables with a single function-scope one.
+                # Because we add such a variable to every function, and because `ExceptionBaseType` is not a global
+                # name, we also add a global typedef to avoid compilation errors. 
+                command = sed -e '/^#if 0/,/^#endif/d' $in > $builddir/{mode}/gen/$in $
+                     && antlr3 $builddir/{mode}/gen/$in $
+                     && sed -i -e 's/^\\( *\)\\(ImplTraits::CommonTokenType\\* [a-zA-Z0-9_]* = NULL;\\)$$/\\1const \\2/' $
+                        -e '1i using ExceptionBaseType = int;' $
+                        -e 's/^{{/{{ ExceptionBaseType\* ex = nullptr;/; $
+                            s/ExceptionBaseType\* ex = new/ex = new/' $
+                        build/{mode}/gen/${{stem}}Parser.cpp
                description = ANTLR3 $in
            ''').format(mode = mode, **modeval))
        f.write('build {mode}: phony {artifacts}\n'.format(mode = mode,
@@ -807,6 +935,11 @@ with open(buildfile, 'w') as f:
                f.write('build $builddir/{}/{}: ar.{} {}\n'.format(mode, binary, mode, str.join(' ', objs)))
            else:
                if binary.startswith('tests/'):
+                    local_libs = '$libs'
+                    if binary not in tests_not_using_seastar_test_framework or binary in pure_boost_tests:
+                        local_libs += ' ' + maybe_static(args.staticboost, '-lboost_unit_test_framework')
+                    if has_thrift:
+                        local_libs += ' ' + thrift_libs + ' ' + maybe_static(args.staticboost, '-lboost_system')
                    # Our code's debugging information is huge, and multiplied
                    # by many tests yields ridiculous amounts of disk space.
                    # So we strip the tests by default; The user can very
@@ -814,15 +947,15 @@ with open(buildfile, 'w') as f:
                    # to the test name, e.g., "ninja build/release/testname_g"
                    f.write('build $builddir/{}/{}: {}.{} {} {}\n'.format(mode, binary, tests_link_rule, mode, str.join(' ', objs),
                                                                                     'seastar/build/{}/libseastar.a'.format(mode)))
-                    if has_thrift:
-                        f.write('   libs =  -lthrift -lboost_system $libs\n')
+                    f.write('   libs = {}\n'.format(local_libs))
                    f.write('build $builddir/{}/{}_g: link.{} {} {}\n'.format(mode, binary, mode, str.join(' ', objs),
                                                                              'seastar/build/{}/libseastar.a'.format(mode)))
+                    f.write('   libs = {}\n'.format(local_libs))
                else:
                    f.write('build $builddir/{}/{}: link.{} {} {}\n'.format(mode, binary, mode, str.join(' ', objs),
                                                                            'seastar/build/{}/libseastar.a'.format(mode)))
-                if has_thrift:
-                    f.write('   libs =  -lthrift -lboost_system $libs\n')
+                    if has_thrift:
+                        f.write('   libs =  {} {} $libs\n'.format(thrift_libs, maybe_static(args.staticboost, '-lboost_system')))
            for src in srcs:
                if src.endswith('.cc'):
                    obj = '$builddir/' + mode + '/' + src.replace('.cc', '.o')
@@ -845,8 +978,8 @@ with open(buildfile, 'w') as f:
        for obj in compiles:
            src = compiles[obj]
            gen_headers = list(ragels.keys())
-            gen_headers += ['seastar/build/{}/http/request_parser.hh'.format(mode)]
-            gen_headers += ['seastar/build/{}/http/http_response_parser.hh'.format(mode)]
+            gen_headers += ['seastar/build/{}/gen/http/request_parser.hh'.format(mode)]
+            gen_headers += ['seastar/build/{}/gen/http/http_response_parser.hh'.format(mode)]
            for th in thrifts:
                gen_headers += th.headers('$builddir/{}/gen'.format(mode))
            for g in antlr3_grammars:
@@ -861,7 +994,7 @@ with open(buildfile, 'w') as f:
            f.write('build {}: ragel {}\n'.format(hh, src))
        for hh in swaggers:
            src = swaggers[hh]
-            f.write('build {}: swagger {}\n'.format(hh,src))
+            f.write('build {}: swagger {} | seastar/json/json2code.py\n'.format(hh,src))
        for hh in serializers:
            src = serializers[hh]
            f.write('build {}: serializer {} | idl-compiler.py\n'.format(hh,src))
@@ -878,10 +1011,14 @@ with open(buildfile, 'w') as f:
            for cc in grammar.sources('$builddir/{}/gen'.format(mode)):
                obj = cc.replace('.cpp', '.o')
                f.write('build {}: cxx.{} {} || {}\n'.format(obj, mode, cc, ' '.join(serializers)))
-        f.write('build seastar/build/{mode}/libseastar.a seastar/build/{mode}/apps/iotune/iotune: ninja {seastar_deps}\n'
+                if cc.endswith('Parser.cpp') and has_sanitize_address_use_after_scope:
+                    # Parsers end up using huge amounts of stack space and overflowing their stack 
+                    f.write('  obj_cxxflags = -fno-sanitize-address-use-after-scope\n')
+        f.write('build seastar/build/{mode}/libseastar.a seastar/build/{mode}/apps/iotune/iotune seastar/build/{mode}/gen/http/request_parser.hh seastar/build/{mode}/gen/http/http_response_parser.hh: ninja {seastar_deps}\n'
                .format(**locals()))
+        f.write('  pool = seastar_pool\n')
        f.write('  subdir = seastar\n')
-        f.write('  target = build/{mode}/libseastar.a build/{mode}/apps/iotune/iotune\n'.format(**locals()))
+        f.write('  target = build/{mode}/libseastar.a build/{mode}/apps/iotune/iotune build/{mode}/gen/http/request_parser.hh build/{mode}/gen/http/http_response_parser.hh\n'.format(**locals()))
        f.write(textwrap.dedent('''\
            build build/{mode}/iotune: copy seastar/build/{mode}/apps/iotune/iotune
            ''').format(**locals()))
@@ -895,14 +1032,6 @@ with open(buildfile, 'w') as f:
            command = find -name '*.[chS]' -o -name "*.cc" -o -name "*.hh" | cscope -bq -i-
            description = CSCOPE
        build cscope: cscope
-        rule request_parser_hh
-           command = {ninja} -C seastar build/release/gen/http/request_parser.hh build/debug/gen/http/request_parser.hh
-           description = GEN seastar/http/request_parser.hh
-        build seastar/build/release/http/request_parser.hh seastar/build/debug/http/request_parser.hh: request_parser_hh
-        rule http_response_parser_hh
-           command = {ninja} -C seastar build/release/gen/http/http_response_parser.hh build/debug/gen/http/http_response_parser.hh
-           description = GEN seastar/http/http_response_parser.hh
-        build seastar/build/release/http/http_response_parser.hh seastar/build/debug/http/http_response_parser.hh: http_response_parser_hh
        rule clean
            command = rm -rf build
            description = CLEAN
--- a/converting_mutation_partition_applier.hh
+++ b/converting_mutation_partition_applier.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2015 Cloudius Systems, Ltd.
+ * Copyright (C) 2015 ScyllaDB
 */

 /*
@@ -22,6 +22,7 @@
 #pragma once

 #include "mutation_partition_view.hh"
+#include "mutation_partition.hh"
 #include "schema.hh"

 // Mutation partition visitor which applies visited data into
@@ -35,14 +36,14 @@ class converting_mutation_partition_applier : public mutation_partition_visitor
    deletable_row* _current_row;
 private:
    static bool is_compatible(const column_definition& new_def, const data_type& old_type, column_kind kind) {
-        return new_def.kind == kind && new_def.type->is_value_compatible_with(*old_type);
+        return ::is_compatible(new_def.kind, kind) && new_def.type->is_value_compatible_with(*old_type);
    }
-    void accept_cell(row& dst, column_kind kind, const column_definition& new_def, const data_type& old_type, atomic_cell_view cell) {
+    static void accept_cell(row& dst, column_kind kind, const column_definition& new_def, const data_type& old_type, atomic_cell_view cell) {
        if (is_compatible(new_def, old_type, kind) && cell.timestamp() > new_def.dropped_at()) {
            dst.apply(new_def, atomic_cell_or_collection(cell));
        }
    }
-    void accept_cell(row& dst, column_kind kind, const column_definition& new_def, const data_type& old_type, collection_mutation_view cell) {
+    static void accept_cell(row& dst, column_kind kind, const column_definition& new_def, const data_type& old_type, collection_mutation_view cell) {
        if (!is_compatible(new_def, old_type, kind)) {
            return;
        }
@@ -90,12 +91,12 @@ public:
        }
    }

-    virtual void accept_row_tombstone(clustering_key_prefix_view prefix, tombstone t) override {
-        _p.apply_row_tombstone(_p_schema, prefix, t);
+    virtual void accept_row_tombstone(const range_tombstone& rt) override {
+        _p.apply_row_tombstone(_p_schema, rt);
    }

-    virtual void accept_row(clustering_key_view key, tombstone deleted_at, const row_marker& rm) override {
-        deletable_row& r = _p.clustered_row(_p_schema, key);
+    virtual void accept_row(position_in_partition_view key, const row_tombstone& deleted_at, const row_marker& rm, is_dummy dummy, is_continuous continuous) override {
+        deletable_row& r = _p.clustered_row(_p_schema, key, dummy, continuous);
        r.apply(rm);
        r.apply(deleted_at);
        _current_row = &r;
@@ -116,4 +117,14 @@ public:
            accept_cell(_current_row->cells(), column_kind::regular_column, *def, col.type(), collection);
        }
    }
+
+    // Appends the cell to dst upgrading it to the new schema.
+    // Cells must have monotonic names.
+    static void append_cell(row& dst, column_kind kind, const column_definition& new_def, const data_type& old_type, const atomic_cell_or_collection& cell) {
+        if (new_def.is_atomic()) {
+            accept_cell(dst, kind, new_def, old_type, cell.as_atomic_cell());
+        } else {
+            accept_cell(dst, kind, new_def, old_type, cell.as_collection_mutation());
+        }
+    }
 };
--- a/counters.cc
+++ b/counters.cc
@@ -0,0 +1,332 @@
+/*
+ * Copyright (C) 2016 ScyllaDB
+ */
+
+/*
+ * This file is part of Scylla.
+ *
+ * Scylla is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Scylla is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "service/storage_service.hh"
+#include "counters.hh"
+#include "mutation.hh"
+#include "combine.hh"
+
+counter_id counter_id::local()
+{
+    return counter_id(service::get_local_storage_service().get_local_id());
+}
+
+bool counter_id::less_compare_1_7_4::operator()(const counter_id& a, const counter_id& b) const
+{
+    if (a._most_significant != b._most_significant) {
+        return a._most_significant < b._most_significant;
+    } else {
+        return a._least_significant < b._least_significant;
+    }
+}
+
+std::ostream& operator<<(std::ostream& os, const counter_id& id) {
+    return os << id.to_uuid();
+}
+
+std::ostream& operator<<(std::ostream& os, counter_shard_view csv) {
+    return os << "{global_shard id: " << csv.id() << " value: " << csv.value()
+              << " clock: " << csv.logical_clock() << "}";
+}
+
+std::ostream& operator<<(std::ostream& os, counter_cell_view ccv) {
+    return os << "{counter_cell timestamp: " << ccv.timestamp() << " shards: {" << ::join(", ", ccv.shards()) << "}}";
+}
+
+void counter_cell_builder::do_sort_and_remove_duplicates()
+{
+    boost::range::sort(_shards, [] (auto& a, auto& b) { return a.id() < b.id(); });
+
+    std::vector<counter_shard> new_shards;
+    new_shards.reserve(_shards.size());
+    for (auto& cs : _shards) {
+        if (new_shards.empty() || new_shards.back().id() != cs.id()) {
+            new_shards.emplace_back(cs);
+        } else {
+            new_shards.back().apply(cs);
+        }
+    }
+    _shards = std::move(new_shards);
+    _sorted = true;
+}
+
+std::vector<counter_shard> counter_cell_view::shards_compatible_with_1_7_4() const
+{
+    auto sorted_shards = boost::copy_range<std::vector<counter_shard>>(shards());
+    counter_id::less_compare_1_7_4 cmp;
+    boost::range::sort(sorted_shards, [&] (auto& a, auto& b) {
+        return cmp(a.id(), b.id());
+    });
+    return sorted_shards;
+}
+
+static bool apply_in_place(atomic_cell_or_collection& dst, atomic_cell_or_collection& src)
+{
+    auto dst_ccmv = counter_cell_mutable_view(dst.as_mutable_atomic_cell());
+    auto src_ccmv = counter_cell_mutable_view(src.as_mutable_atomic_cell());
+    auto dst_shards = dst_ccmv.shards();
+    auto src_shards = src_ccmv.shards();
+
+    auto dst_it = dst_shards.begin();
+    auto src_it = src_shards.begin();
+
+    while (src_it != src_shards.end()) {
+        while (dst_it != dst_shards.end() && dst_it->id() < src_it->id()) {
+            ++dst_it;
+        }
+        if (dst_it == dst_shards.end() || dst_it->id() != src_it->id()) {
+            // Fast-path failed. Revert and fall back to the slow path.
+            if (dst_it == dst_shards.end()) {
+                --dst_it;
+            }
+            while (src_it != src_shards.begin()) {
+                --src_it;
+                while (dst_it->id() != src_it->id()) {
+                    --dst_it;
+                }
+                src_it->swap_value_and_clock(*dst_it);
+            }
+            return false;
+        }
+        if (dst_it->logical_clock() < src_it->logical_clock()) {
+            dst_it->swap_value_and_clock(*src_it);
+        } else {
+            src_it->set_value_and_clock(*dst_it);
+        }
+        ++src_it;
+    }
+
+    auto dst_ts = dst_ccmv.timestamp();
+    auto src_ts = src_ccmv.timestamp();
+    dst_ccmv.set_timestamp(std::max(dst_ts, src_ts));
+    src_ccmv.set_timestamp(dst_ts);
+    src.as_mutable_atomic_cell().set_counter_in_place_revert(true);
+    return true;
+}
+
+static void revert_in_place_apply(atomic_cell_or_collection& dst, atomic_cell_or_collection& src)
+{
+    assert(dst.can_use_mutable_view() && src.can_use_mutable_view());
+    auto dst_ccmv = counter_cell_mutable_view(dst.as_mutable_atomic_cell());
+    auto src_ccmv = counter_cell_mutable_view(src.as_mutable_atomic_cell());
+    auto dst_shards = dst_ccmv.shards();
+    auto src_shards = src_ccmv.shards();
+
+    auto dst_it = dst_shards.begin();
+    auto src_it = src_shards.begin();
+
+    while (src_it != src_shards.end()) {
+        while (dst_it != dst_shards.end() && dst_it->id() < src_it->id()) {
+            ++dst_it;
+        }
+        assert(dst_it != dst_shards.end() && dst_it->id() == src_it->id());
+        dst_it->swap_value_and_clock(*src_it);
+        ++src_it;
+    }
+
+    auto dst_ts = dst_ccmv.timestamp();
+    auto src_ts = src_ccmv.timestamp();
+    dst_ccmv.set_timestamp(src_ts);
+    src_ccmv.set_timestamp(dst_ts);
+    src.as_mutable_atomic_cell().set_counter_in_place_revert(false);
+}
+
+bool counter_cell_view::apply_reversibly(atomic_cell_or_collection& dst, atomic_cell_or_collection& src)
+{
+    auto dst_ac = dst.as_atomic_cell();
+    auto src_ac = src.as_atomic_cell();
+
+    if (!dst_ac.is_live() || !src_ac.is_live()) {
+        if (dst_ac.is_live() || (!src_ac.is_live() && compare_atomic_cell_for_merge(dst_ac, src_ac) < 0)) {
+            std::swap(dst, src);
+            return true;
+        }
+        return false;
+    }
+
+    if (dst_ac.is_counter_update() && src_ac.is_counter_update()) {
+        auto src_v = src_ac.counter_update_value();
+        auto dst_v = dst_ac.counter_update_value();
+        dst = atomic_cell::make_live_counter_update(std::max(dst_ac.timestamp(), src_ac.timestamp()),
+                                                    src_v + dst_v);
+        return true;
+    }
+
+    assert(!dst_ac.is_counter_update());
+    assert(!src_ac.is_counter_update());
+
+    if (counter_cell_view(dst_ac).shard_count() >= counter_cell_view(src_ac).shard_count()
+        && dst.can_use_mutable_view() && src.can_use_mutable_view()) {
+        if (apply_in_place(dst, src)) {
+            return true;
+        }
+    }
+
+    src.as_mutable_atomic_cell().set_counter_in_place_revert(false);
+    auto dst_shards = counter_cell_view(dst_ac).shards();
+    auto src_shards = counter_cell_view(src_ac).shards();
+
+    counter_cell_builder result;
+    combine(dst_shards.begin(), dst_shards.end(), src_shards.begin(), src_shards.end(),
+            result.inserter(), counter_shard_view::less_compare_by_id(), [] (auto& x, auto& y) {
+                return x.logical_clock() < y.logical_clock() ? y : x;
+            });
+
+    auto cell = result.build(std::max(dst_ac.timestamp(), src_ac.timestamp()));
+    src = std::exchange(dst, atomic_cell_or_collection(cell));
+    return true;
+}
+
+void counter_cell_view::revert_apply(atomic_cell_or_collection& dst, atomic_cell_or_collection& src)
+{
+    if (dst.as_atomic_cell().is_counter_update()) {
+        auto src_v = src.as_atomic_cell().counter_update_value();
+        auto dst_v = dst.as_atomic_cell().counter_update_value();
+        dst = atomic_cell::make_live(dst.as_atomic_cell().timestamp(),
+                                     long_type->decompose(dst_v - src_v));
+    } else if (src.as_atomic_cell().is_counter_in_place_revert_set()) {
+        revert_in_place_apply(dst, src);
+    } else {
+        std::swap(dst, src);
+    }
+}
+
+stdx::optional<atomic_cell> counter_cell_view::difference(atomic_cell_view a, atomic_cell_view b)
+{
+    assert(!a.is_counter_update());
+    assert(!b.is_counter_update());
+
+    if (!b.is_live() || !a.is_live()) {
+        if (b.is_live() || (!a.is_live() && compare_atomic_cell_for_merge(b, a) < 0)) {
+            return atomic_cell(a);
+        }
+        return { };
+    }
+
+    auto a_shards = counter_cell_view(a).shards();
+    auto b_shards = counter_cell_view(b).shards();
+
+    auto a_it = a_shards.begin();
+    auto a_end = a_shards.end();
+    auto b_it = b_shards.begin();
+    auto b_end = b_shards.end();
+
+    counter_cell_builder result;
+    while (a_it != a_end) {
+        while (b_it != b_end && (*b_it).id() < (*a_it).id()) {
+            ++b_it;
+        }
+        if (b_it == b_end || (*a_it).id() != (*b_it).id() || (*a_it).logical_clock() > (*b_it).logical_clock()) {
+            result.add_shard(counter_shard(*a_it));
+        }
+        ++a_it;
+    }
+
+    stdx::optional<atomic_cell> diff;
+    if (!result.empty()) {
+        diff = result.build(std::max(a.timestamp(), b.timestamp()));
+    } else if (a.timestamp() > b.timestamp()) {
+        diff = atomic_cell::make_live(a.timestamp(), bytes_view());
+    }
+    return diff;
+}
+
+
+void transform_counter_updates_to_shards(mutation& m, const mutation* current_state, uint64_t clock_offset) {
+    // FIXME: allow current_state to be frozen_mutation
+
+    auto transform_new_row_to_shards = [clock_offset] (auto& cells) {
+        cells.for_each_cell([clock_offset] (auto, atomic_cell_or_collection& ac_o_c) {
+            auto acv = ac_o_c.as_atomic_cell();
+            if (!acv.is_live()) {
+                return; // continue -- we are in lambda
+            }
+            auto delta = acv.counter_update_value();
+            auto cs = counter_shard(counter_id::local(), delta, clock_offset + 1);
+            ac_o_c = counter_cell_builder::from_single_shard(acv.timestamp(), cs);
+        });
+    };
+
+    if (!current_state) {
+        transform_new_row_to_shards(m.partition().static_row());
+        for (auto& cr : m.partition().clustered_rows()) {
+            transform_new_row_to_shards(cr.row().cells());
+        }
+        return;
+    }
+
+    clustering_key::less_compare cmp(*m.schema());
+
+    auto transform_row_to_shards = [clock_offset] (auto& transformee, auto& state) {
+        std::deque<std::pair<column_id, counter_shard>> shards;
+        state.for_each_cell([&] (column_id id, const atomic_cell_or_collection& ac_o_c) {
+            auto acv = ac_o_c.as_atomic_cell();
+            if (!acv.is_live()) {
+                return; // continue -- we are in lambda
+            }
+            counter_cell_view ccv(acv);
+            auto cs = ccv.local_shard();
+            if (!cs) {
+                return; // continue
+            }
+            shards.emplace_back(std::make_pair(id, counter_shard(*cs)));
+        });
+
+        transformee.for_each_cell([&] (column_id id, atomic_cell_or_collection& ac_o_c) {
+            auto acv = ac_o_c.as_atomic_cell();
+            if (!acv.is_live()) {
+                return; // continue -- we are in lambda
+            }
+            while (!shards.empty() && shards.front().first < id) {
+                shards.pop_front();
+            }
+
+            auto delta = acv.counter_update_value();
+
+            if (shards.empty() || shards.front().first > id) {
+                auto cs = counter_shard(counter_id::local(), delta, clock_offset + 1);
+                ac_o_c = counter_cell_builder::from_single_shard(acv.timestamp(), cs);
+            } else {
+                auto& cs = shards.front().second;
+                cs.update(delta, clock_offset + 1);
+                ac_o_c = counter_cell_builder::from_single_shard(acv.timestamp(), cs);
+                shards.pop_front();
+            }
+        });
+    };
+
+    transform_row_to_shards(m.partition().static_row(), current_state->partition().static_row());
+
+    auto& cstate = current_state->partition();
+    auto it = cstate.clustered_rows().begin();
+    auto end = cstate.clustered_rows().end();
+    for (auto& cr : m.partition().clustered_rows()) {
+        while (it != end && cmp(it->key(), cr.key())) {
+            ++it;
+        }
+        if (it == end || cmp(cr.key(), it->key())) {
+            transform_new_row_to_shards(cr.row().cells());
+            continue;
+        }
+
+        transform_row_to_shards(cr.row().cells(), it->row().cells());
+    }
+}
--- a/counters.hh
+++ b/counters.hh
@@ -0,0 +1,435 @@
+/*
+ * Copyright (C) 2016 ScyllaDB
+ */
+
+/*
+ * This file is part of Scylla.
+ *
+ * Scylla is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Scylla is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <boost/range/algorithm/find_if.hpp>
+
+#include "atomic_cell_or_collection.hh"
+#include "types.hh"
+
+#include "stdx.hh"
+
+class mutation;
+
+class mutation;
+
+class counter_id {
+    int64_t _least_significant;
+    int64_t _most_significant;
+public:
+    static_assert(std::is_same<decltype(std::declval<utils::UUID>().get_least_significant_bits()), int64_t>::value
+            &&  std::is_same<decltype(std::declval<utils::UUID>().get_most_significant_bits()), int64_t>::value,
+        "utils::UUID is expected to work with two signed 64-bit integers");
+
+    counter_id() = default;
+    explicit counter_id(utils::UUID uuid) noexcept
+        : _least_significant(uuid.get_least_significant_bits())
+        , _most_significant(uuid.get_most_significant_bits())
+    { }
+
+    utils::UUID to_uuid() const {
+        return utils::UUID(_most_significant, _least_significant);
+    }
+
+    bool operator<(const counter_id& other) const {
+        return to_uuid() < other.to_uuid();
+    }
+    bool operator>(const counter_id& other) const {
+        return other.to_uuid() < to_uuid();
+    }
+    bool operator==(const counter_id& other) const {
+        return to_uuid() == other.to_uuid();
+    }
+    bool operator!=(const counter_id& other) const {
+        return !(*this == other);
+    }
+public:
+    // (Wrong) Counter ID ordering used by Scylla 1.7.4 and earlier.
+    struct less_compare_1_7_4 {
+        bool operator()(const counter_id& a, const counter_id& b) const;
+    };
+public:
+    static counter_id local();
+
+    // For tests.
+    static counter_id generate_random() {
+        return counter_id(utils::make_random_uuid());
+    }
+};
+static_assert(std::is_pod<counter_id>::value, "counter_id should be a POD type");
+
+std::ostream& operator<<(std::ostream& os, const counter_id& id);
+
+template<typename View>
+class basic_counter_shard_view {
+    enum class offset : unsigned {
+        id = 0u,
+        value = unsigned(id) + sizeof(counter_id),
+        logical_clock = unsigned(value) + sizeof(int64_t),
+        total_size = unsigned(logical_clock) + sizeof(int64_t),
+    };
+private:
+    typename View::pointer _base;
+private:
+    template<typename T>
+    T read(offset off) const {
+        T value;
+        std::copy_n(_base + static_cast<unsigned>(off), sizeof(T), reinterpret_cast<signed char*>(&value));
+        return value;
+    }
+public:
+    static constexpr auto size = size_t(offset::total_size);
+public:
+    basic_counter_shard_view() = default;
+    explicit basic_counter_shard_view(typename View::pointer ptr) noexcept
+        : _base(ptr) { }
+
+    counter_id id() const { return read<counter_id>(offset::id); }
+    int64_t value() const { return read<int64_t>(offset::value); }
+    int64_t logical_clock() const { return read<int64_t>(offset::logical_clock); }
+
+    void swap_value_and_clock(basic_counter_shard_view& other) noexcept {
+        static constexpr size_t off = size_t(offset::value);
+        static constexpr size_t size = size_t(offset::total_size) - off;
+
+        typename View::value_type tmp[size];
+        std::copy_n(_base + off, size, tmp);
+        std::copy_n(other._base + off, size, _base + off);
+        std::copy_n(tmp, size, other._base + off);
+    }
+
+    void set_value_and_clock(const basic_counter_shard_view& other) noexcept {
+        static constexpr size_t off = size_t(offset::value);
+        static constexpr size_t size = size_t(offset::total_size) - off;
+        std::copy_n(other._base + off, size, _base + off);
+    }
+
+    bool operator==(const basic_counter_shard_view& other) const {
+        return id() == other.id() && value() == other.value()
+               && logical_clock() == other.logical_clock();
+    }
+    bool operator!=(const basic_counter_shard_view& other) const {
+        return !(*this == other);
+    }
+
+    struct less_compare_by_id {
+        bool operator()(const basic_counter_shard_view& x, const basic_counter_shard_view& y) const {
+            return x.id() < y.id();
+        }
+    };
+};
+
+using counter_shard_view = basic_counter_shard_view<bytes_view>;
+
+std::ostream& operator<<(std::ostream& os, counter_shard_view csv);
+
+class counter_shard {
+    counter_id _id;
+    int64_t _value;
+    int64_t _logical_clock;
+private:
+    template<typename T>
+    static void write(const T& value, bytes::iterator& out) {
+        out = std::copy_n(reinterpret_cast<const signed char*>(&value), sizeof(T), out);
+    }
+private:
+    // Shared logic for applying counter_shards and counter_shard_views.
+    // T is either counter_shard or basic_counter_shard_view<U>.
+    template<typename T>
+    GCC6_CONCEPT(requires requires(T shard) {
+        { shard.value() } -> int64_t;
+        { shard.logical_clock() } -> int64_t;
+    })
+    counter_shard& do_apply(T&& other) noexcept {
+        auto other_clock = other.logical_clock();
+        if (_logical_clock < other_clock) {
+            _logical_clock = other_clock;
+            _value = other.value();
+        }
+        return *this;
+    }
+public:
+    counter_shard(counter_id id, int64_t value, int64_t logical_clock) noexcept
+        : _id(id)
+        , _value(value)
+        , _logical_clock(logical_clock)
+    { }
+
+    explicit counter_shard(counter_shard_view csv) noexcept
+        : _id(csv.id())
+        , _value(csv.value())
+        , _logical_clock(csv.logical_clock())
+    { }
+
+    counter_id id() const { return _id; }
+    int64_t value() const { return _value; }
+    int64_t logical_clock() const { return _logical_clock; }
+
+    counter_shard& update(int64_t value_delta, int64_t clock_increment) noexcept {
+        _value += value_delta;
+        _logical_clock += clock_increment;
+        return *this;
+    }
+
+    counter_shard& apply(counter_shard_view other) noexcept {
+        return do_apply(other);
+    }
+
+    counter_shard& apply(const counter_shard& other) noexcept {
+        return do_apply(other);
+    }
+
+    static size_t serialized_size() {
+        return counter_shard_view::size;
+    }
+    void serialize(bytes::iterator& out) const {
+        write(_id, out);
+        write(_value, out);
+        write(_logical_clock, out);
+    }
+};
+
+class counter_cell_builder {
+    std::vector<counter_shard> _shards;
+    bool _sorted = true;
+private:
+    void do_sort_and_remove_duplicates();
+public:
+    counter_cell_builder() = default;
+    counter_cell_builder(size_t shard_count) {
+        _shards.reserve(shard_count);
+    }
+
+    void add_shard(const counter_shard& cs) {
+        _shards.emplace_back(cs);
+    }
+
+    void add_maybe_unsorted_shard(const counter_shard& cs) {
+        add_shard(cs);
+        if (_sorted && _shards.size() > 1) {
+            auto current = _shards.rbegin();
+            auto previous = std::next(current);
+            _sorted = current->id() > previous->id();
+        }
+    }
+
+    void sort_and_remove_duplicates() {
+        if (!_sorted) {
+            do_sort_and_remove_duplicates();
+        }
+    }
+
+    size_t serialized_size() const {
+        return _shards.size() * counter_shard::serialized_size();
+    }
+    void serialize(bytes::iterator& out) const {
+        for (auto&& cs : _shards) {
+            cs.serialize(out);
+        }
+    }
+
+    bool empty() const {
+        return _shards.empty();
+    }
+
+    atomic_cell build(api::timestamp_type timestamp) const {
+        return atomic_cell::make_live_from_serializer(timestamp, serialized_size(), [this] (bytes::iterator out) {
+            serialize(out);
+        });
+    }
+
+    static atomic_cell from_single_shard(api::timestamp_type timestamp, const counter_shard& cs) {
+        return atomic_cell::make_live_from_serializer(timestamp, counter_shard::serialized_size(), [&cs] (bytes::iterator out) {
+            cs.serialize(out);
+        });
+    }
+
+    class inserter_iterator : public std::iterator<std::output_iterator_tag, counter_shard> {
+        counter_cell_builder* _builder;
+    public:
+        explicit inserter_iterator(counter_cell_builder& b) : _builder(&b) { }
+        inserter_iterator& operator=(const counter_shard& cs) {
+            _builder->add_shard(cs);
+            return *this;
+        }
+        inserter_iterator& operator=(const counter_shard_view& csv) {
+            return operator=(counter_shard(csv));
+        }
+        inserter_iterator& operator++() { return *this; }
+        inserter_iterator& operator++(int) { return *this; }
+        inserter_iterator& operator*() { return *this; };
+    };
+
+    inserter_iterator inserter() {
+        return inserter_iterator(*this);
+    }
+};
+
+// <counter_id>   := <int64_t><int64_t>
+// <shard>        := <counter_id><int64_t:value><int64_t:logical_clock>
+// <counter_cell> := <shard>*
+template<typename View>
+class basic_counter_cell_view {
+protected:
+    atomic_cell_base<View> _cell;
+private:
+    class shard_iterator : public std::iterator<std::input_iterator_tag, basic_counter_shard_view<View>> {
+        typename View::pointer _current;
+        basic_counter_shard_view<View> _current_view;
+    public:
+        shard_iterator() = default;
+        shard_iterator(typename View::pointer ptr) noexcept
+            : _current(ptr), _current_view(ptr) { }
+
+        basic_counter_shard_view<View>& operator*() noexcept {
+            return _current_view;
+        }
+        basic_counter_shard_view<View>* operator->() noexcept {
+            return &_current_view;
+        }
+        shard_iterator& operator++() noexcept {
+            _current += counter_shard_view::size;
+            _current_view = basic_counter_shard_view<View>(_current);
+            return *this;
+        }
+        shard_iterator operator++(int) noexcept {
+            auto it = *this;
+            operator++();
+            return it;
+        }
+        shard_iterator& operator--() noexcept {
+            _current -= counter_shard_view::size;
+            _current_view = basic_counter_shard_view<View>(_current);
+            return *this;
+        }
+        shard_iterator operator--(int) noexcept {
+            auto it = *this;
+            operator--();
+            return it;
+        }
+        bool operator==(const shard_iterator& other) const noexcept {
+            return _current == other._current;
+        }
+        bool operator!=(const shard_iterator& other) const noexcept {
+            return !(*this == other);
+        }
+    };
+public:
+    boost::iterator_range<shard_iterator> shards() const {
+        auto bv = _cell.value();
+        auto begin = shard_iterator(bv.data());
+        auto end = shard_iterator(bv.data() + bv.size());
+        return boost::make_iterator_range(begin, end);
+    }
+
+    size_t shard_count() const {
+        return _cell.value().size() / counter_shard_view::size;
+    }
+public:
+    // ac must be a live counter cell
+    explicit basic_counter_cell_view(atomic_cell_base<View> ac) noexcept : _cell(ac) {
+        assert(_cell.is_live());
+        assert(!_cell.is_counter_update());
+    }
+
+    api::timestamp_type timestamp() const { return _cell.timestamp(); }
+
+    static data_type total_value_type() { return long_type; }
+
+    int64_t total_value() const {
+        return boost::accumulate(shards(), int64_t(0), [] (int64_t v, counter_shard_view cs) {
+            return v + cs.value();
+        });
+    }
+
+    stdx::optional<counter_shard_view> get_shard(const counter_id& id) const {
+        auto it = boost::range::find_if(shards(), [&id] (counter_shard_view csv) {
+            return csv.id() == id;
+        });
+        if (it == shards().end()) {
+            return { };
+        }
+        return *it;
+    }
+
+    stdx::optional<counter_shard_view> local_shard() const {
+        // TODO: consider caching local shard position
+        return get_shard(counter_id::local());
+    }
+
+    bool operator==(const basic_counter_cell_view& other) const {
+        return timestamp() == other.timestamp() && boost::equal(shards(), other.shards());
+    }
+};
+
+struct counter_cell_view : basic_counter_cell_view<bytes_view> {
+    using basic_counter_cell_view::basic_counter_cell_view;
+
+    // Returns counter shards in an order that is compatible with Scylla 1.7.4.
+    std::vector<counter_shard> shards_compatible_with_1_7_4() const;
+
+    // Reversibly applies two counter cells, at least one of them must be live.
+    // Returns true iff dst was modified.
+    static bool apply_reversibly(atomic_cell_or_collection& dst, atomic_cell_or_collection& src);
+
+    // Reverts apply performed by apply_reversible().
+    static void revert_apply(atomic_cell_or_collection& dst, atomic_cell_or_collection& src);
+
+    // Computes a counter cell containing minimal amount of data which, when
+    // applied to 'b' returns the same cell as 'a' and 'b' applied together.
+    static stdx::optional<atomic_cell> difference(atomic_cell_view a, atomic_cell_view b);
+
+    friend std::ostream& operator<<(std::ostream& os, counter_cell_view ccv);
+};
+
+struct counter_cell_mutable_view : basic_counter_cell_view<bytes_mutable_view> {
+    using basic_counter_cell_view::basic_counter_cell_view;
+
+    void set_timestamp(api::timestamp_type ts) { _cell.set_timestamp(ts); }
+};
+
+// Transforms mutation dst from counter updates to counter shards using state
+// stored in current_state.
+// If current_state is present it has to be in the same schema as dst.
+void transform_counter_updates_to_shards(mutation& dst, const mutation* current_state, uint64_t clock_offset);
+
+template<>
+struct appending_hash<counter_shard_view> {
+    template<typename Hasher>
+    void operator()(Hasher& h, const counter_shard_view& cshard) const {
+        ::feed_hash(h, cshard.id().to_uuid());
+        ::feed_hash(h, cshard.value());
+        ::feed_hash(h, cshard.logical_clock());
+    }
+};
+
+template<>
+struct appending_hash<counter_cell_view> {
+    template<typename Hasher>
+    void operator()(Hasher& h, const counter_cell_view& cell) const {
+        ::feed_hash(h, true); // is_live
+        ::feed_hash(h, cell.timestamp());
+        for (auto&& csv : cell.shards()) {
+            ::feed_hash(h, csv);
+        }
+    }
+};
--- a/cpu_controller.hh
+++ b/cpu_controller.hh
@@ -0,0 +1,89 @@
+/*
+ * Copyright (C) 2017 ScyllaDB
+ */
+
+/*
+ * This file is part of Scylla.
+ *
+ * Scylla is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Scylla is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+#include <seastar/core/thread.hh>
+#include <seastar/core/timer.hh>
+#include <chrono>
+
+// Simple proportional controller to adjust shares of memtable/streaming flushes.
+//
+// Goal is to flush as fast as we can, but not so fast that we steal all the CPU from incoming
+// requests, and at the same time minimize user-visible fluctuations in the flush quota.
+//
+// What that translates to is we'll try to keep virtual dirty's firt derivative at 0 (IOW, we keep
+// virtual dirty constant), which means that the rate of incoming writes is equal to the rate of
+// flushed bytes.
+//
+// The exact point at which the controller stops determines the desired flush CPU usage. As we
+// approach the hard dirty limit, we need to be more aggressive. We will therefore define two
+// thresholds, and increase the constant as we cross them.
+//
+//  1) the soft limit line
+//  2) halfway between soft limit and dirty limit
+//
+// The constants q1 and q2 are used to determine the proportional factor at each stage.
+//
+// Below the soft limit, we are in no particular hurry to flush, since it means we're set to
+// complete flushing before we a new memtable is ready. The quota is dirty * q1, and q1 is set to a
+// low number.
+//
+// The first half of the virtual dirty region is where we expect to be usually, so we have a low
+// slope corresponding to a sluggish response between q1 * soft_limit and q2.
+//
+// In the second half, we're getting close to the hard dirty limit so we increase the slope and
+// become more responsive, up to a maximum quota of qmax.
+//
+// For now we'll just set them in the structure not to complicate the constructor. But q1, q2 and
+// qmax can easily become parameters if we find another user.
+class flush_cpu_controller {
+    static constexpr float hard_dirty_limit = 0.50;
+    static constexpr float q1 = 0.01;
+    static constexpr float q2 = 0.2;
+    static constexpr float qmax = 1;
+
+    float _current_quota = 0.0f;
+    float _goal;
+    std::function<float()> _current_dirty;
+    std::chrono::milliseconds _interval;
+    timer<> _update_timer;
+
+    seastar::thread_scheduling_group _scheduling_group;
+    seastar::thread_scheduling_group *_current_scheduling_group = nullptr;
+
+    void adjust();
+public:
+    seastar::thread_scheduling_group* scheduling_group() {
+        return _current_scheduling_group;
+    }
+    float current_quota() const {
+        return _current_quota;
+    }
+
+    struct disabled {
+        seastar::thread_scheduling_group *backup;
+    };
+    flush_cpu_controller(disabled d) : _scheduling_group(std::chrono::nanoseconds(0), 0), _current_scheduling_group(d.backup) {}
+    flush_cpu_controller(std::chrono::milliseconds interval, float soft_limit, std::function<float()> current_dirty);
+    flush_cpu_controller(flush_cpu_controller&&) = default;
+};
+
+
--- a/cql3/Cql.g
+++ b/cql3/Cql.g
@@ -26,26 +26,43 @@ options {
@parser::namespace{cql3_parser}

@lexer::includes {
+#include "cql3/error_collector.hh"
 #include "cql3/error_listener.hh"
 }

@parser::includes {
 #include "cql3/selection/writetime_or_ttl.hh"
+#include "cql3/statements/raw/parsed_statement.hh"
+#include "cql3/statements/raw/select_statement.hh"
+#include "cql3/statements/alter_keyspace_statement.hh"
 #include "cql3/statements/alter_table_statement.hh"
+#include "cql3/statements/alter_view_statement.hh"
 #include "cql3/statements/create_keyspace_statement.hh"
 #include "cql3/statements/drop_keyspace_statement.hh"
 #include "cql3/statements/create_index_statement.hh"
 #include "cql3/statements/create_table_statement.hh"
+#include "cql3/statements/create_view_statement.hh"
 #include "cql3/statements/create_type_statement.hh"
+#include "cql3/statements/drop_type_statement.hh"
+#include "cql3/statements/alter_type_statement.hh"
 #include "cql3/statements/property_definitions.hh"
+#include "cql3/statements/drop_index_statement.hh"
 #include "cql3/statements/drop_table_statement.hh"
+#include "cql3/statements/drop_view_statement.hh"
 #include "cql3/statements/truncate_statement.hh"
-#include "cql3/statements/select_statement.hh"
-#include "cql3/statements/update_statement.hh"
-#include "cql3/statements/delete_statement.hh"
+#include "cql3/statements/raw/update_statement.hh"
+#include "cql3/statements/raw/insert_statement.hh"
+#include "cql3/statements/raw/delete_statement.hh"
 #include "cql3/statements/index_prop_defs.hh"
-#include "cql3/statements/use_statement.hh"
-#include "cql3/statements/batch_statement.hh"
+#include "cql3/statements/raw/use_statement.hh"
+#include "cql3/statements/raw/batch_statement.hh"
+#include "cql3/statements/create_user_statement.hh"
+#include "cql3/statements/alter_user_statement.hh"
+#include "cql3/statements/drop_user_statement.hh"
+#include "cql3/statements/list_users_statement.hh"
+#include "cql3/statements/grant_statement.hh"
+#include "cql3/statements/revoke_statement.hh"
+#include "cql3/statements/list_permissions_statement.hh"
 #include "cql3/statements/index_target.hh"
 #include "cql3/statements/ks_prop_defs.hh"
 #include "cql3/selection/raw_selector.hh"
@@ -108,10 +125,13 @@ struct uninitialized {
 }

@context {
-    using listener_type = cql3::error_listener<RecognizerType>;
+    using collector_type = cql3::error_collector<ComponentType, ExceptionBaseType::TokenType, ExceptionBaseType>;
+    using listener_type = cql3::error_listener<ComponentType, ExceptionBaseType>;
+
    listener_type* listener;

    std::vector<::shared_ptr<cql3::column_identifier>> _bind_variables;
+    std::vector<std::unique_ptr<TokenType>> _missing_tokens;

    // Can't use static variable, since it needs to be defined out-of-line
    static const std::unordered_set<sstring>& _reserved_type_names() {
@@ -161,15 +181,26 @@ struct uninitialized {

    void displayRecognitionError(ANTLR_UINT8** token_names, ExceptionBaseType* ex)
    {
-        std::stringstream msg;
-        ex->displayRecognitionError(token_names, msg);
-        listener->syntax_error(*this, msg.str());
+        listener->syntax_error(*this, token_names, ex);
    }

    void add_recognition_error(const sstring& msg) {
        listener->syntax_error(*this, msg);
    }

+    bool is_eof_token(CommonTokenType token) const
+    {
+        return token == CommonTokenType::TOKEN_EOF;
+    }
+
+    std::string token_text(const TokenType* token)
+    {
+        if (!token) {
+            return "";
+        }
+        return token->getText();
+    }
+
    std::map<sstring, sstring> convert_property_map(shared_ptr<cql3::maps::literal> map) {
        if (!map || map->entries.empty()) {
            return std::map<sstring, sstring>{};
@@ -216,6 +247,13 @@ struct uninitialized {
        }
        operations.emplace_back(std::move(key), std::move(update));
    }
+
+    TokenType* getMissingSymbol(IntStreamType* istream, ExceptionBaseType* e,
+                                ANTLR_UINT32 expectedTokenType, BitsetListType* follow) {
+        auto token = BaseType::getMissingSymbol(istream, e, expectedTokenType, follow);
+        _missing_tokens.emplace_back(token);
+        return token;
+    }
 }

@lexer::namespace{cql3_parser}
@@ -233,7 +271,8 @@ struct uninitialized {
 }

@lexer::context {
-    using listener_type = cql3::error_listener<RecognizerType>;
+    using collector_type = cql3::error_collector<ComponentType, ExceptionBaseType::TokenType, ExceptionBaseType>;
+    using listener_type = cql3::error_listener<ComponentType, ExceptionBaseType>;

    listener_type* listener;

@@ -243,19 +282,30 @@ struct uninitialized {

    void displayRecognitionError(ANTLR_UINT8** token_names, ExceptionBaseType* ex)
    {
-        std::stringstream msg;
-        ex->displayRecognitionError(token_names, msg);
-        listener->syntax_error(*this, msg.str());
+        listener->syntax_error(*this, token_names, ex);
+    }
+
+    bool is_eof_token(CommonTokenType token) const
+    {
+        return token == CommonTokenType::TOKEN_EOF;
+    }
+
+    std::string token_text(const TokenType* token) const
+    {
+        if (!token) {
+            return "";
+        }
+        return std::to_string(int(*token));
    }
 }

 /** STATEMENTS **/

-query returns [shared_ptr<parsed_statement> stmnt]
+query returns [shared_ptr<raw::parsed_statement> stmnt]
    : st=cqlStatement (';')* EOF { $stmnt = st; }
    ;

-cqlStatement returns [shared_ptr<parsed_statement> stmt]
+cqlStatement returns [shared_ptr<raw::parsed_statement> stmt]
    @after{ if (stmt) { stmt->set_bound_variables(_bind_variables); } }
    : st1= selectStatement             { $stmt = st1; }
    | st2= insertStatement             { $stmt = st2; }
@@ -269,11 +319,8 @@ cqlStatement returns [shared_ptr<parsed_statement> stmt]
    | st10=createIndexStatement        { $stmt = st10; }
    | st11=dropKeyspaceStatement       { $stmt = st11; }
    | st12=dropTableStatement          { $stmt = st12; }
-#if 0
    | st13=dropIndexStatement          { $stmt = st13; }
-#endif
    | st14=alterTableStatement         { $stmt = st14; }
-#if 0
    | st15=alterKeyspaceStatement      { $stmt = st15; }
    | st16=grantStatement              { $stmt = st16; }
    | st17=revokeStatement             { $stmt = st17; }
@@ -282,25 +329,29 @@ cqlStatement returns [shared_ptr<parsed_statement> stmt]
    | st20=alterUserStatement          { $stmt = st20; }
    | st21=dropUserStatement           { $stmt = st21; }
    | st22=listUsersStatement          { $stmt = st22; }
+#if 0
    | st23=createTriggerStatement      { $stmt = st23; }
    | st24=dropTriggerStatement        { $stmt = st24; }
 #endif
    | st25=createTypeStatement         { $stmt = st25; }
-#if 0
    | st26=alterTypeStatement          { $stmt = st26; }
    | st27=dropTypeStatement           { $stmt = st27; }
+#if 0
    | st28=createFunctionStatement     { $stmt = st28; }
    | st29=dropFunctionStatement       { $stmt = st29; }
    | st30=createAggregateStatement    { $stmt = st30; }
    | st31=dropAggregateStatement      { $stmt = st31; }
 #endif
+    | st32=createViewStatement         { $stmt = st32; }
+    | st33=alterViewStatement          { $stmt = st33; }
+    | st34=dropViewStatement           { $stmt = st34; }
    ;

 /*
 * USE <KEYSPACE>;
 */
-useStatement returns [::shared_ptr<use_statement> stmt]
-    : K_USE ks=keyspaceName { $stmt = ::make_shared<use_statement>(ks); }
+useStatement returns [::shared_ptr<raw::use_statement> stmt]
+    : K_USE ks=keyspaceName { $stmt = ::make_shared<raw::use_statement>(ks); }
    ;

 /**
@@ -309,11 +360,11 @@ useStatement returns [::shared_ptr<use_statement> stmt]
 * WHERE KEY = "key1" AND COL > 1 AND COL < 100
 * LIMIT <NUMBER>;
 */
-selectStatement returns [shared_ptr<select_statement::raw_statement> expr]
+selectStatement returns [shared_ptr<raw::select_statement> expr]
    @init {
        bool is_distinct = false;
        ::shared_ptr<cql3::term::raw> limit;
-        select_statement::parameters::orderings_type orderings;
+        raw::select_statement::parameters::orderings_type orderings;
        bool allow_filtering = false;
    }
    : K_SELECT ( ( K_DISTINCT { is_distinct = true; } )?
@@ -326,8 +377,8 @@ selectStatement returns [shared_ptr<select_statement::raw_statement> expr]
      ( K_LIMIT rows=intValue { limit = rows; } )?
      ( K_ALLOW K_FILTERING  { allow_filtering = true; } )?
      {
-          auto params = ::make_shared<select_statement::parameters>(std::move(orderings), is_distinct, allow_filtering);
-          $expr = ::make_shared<select_statement::raw_statement>(std::move(cf), std::move(params),
+          auto params = ::make_shared<raw::select_statement::parameters>(std::move(orderings), is_distinct, allow_filtering);
+          $expr = ::make_shared<raw::select_statement>(std::move(cf), std::move(params),
            std::move(sclause), std::move(wclause), std::move(limit));
      }
    ;
@@ -381,7 +432,7 @@ whereClause returns [std::vector<cql3::relation_ptr> clause]
    : relation[$clause] (K_AND relation[$clause])*
    ;

-orderByClause[select_statement::parameters::orderings_type& orderings]
+orderByClause[raw::select_statement::parameters::orderings_type& orderings]
    @init{
        bool reversed = false;
    }
@@ -394,7 +445,7 @@ orderByClause[select_statement::parameters::orderings_type& orderings]
 * USING TIMESTAMP <long>;
 *
 */
-insertStatement returns [::shared_ptr<update_statement::parsed_insert> expr]
+insertStatement returns [::shared_ptr<raw::insert_statement> expr]
    @init {
        auto attrs = ::make_shared<cql3::attributes::raw>();
        std::vector<::shared_ptr<cql3::column_identifier::raw>> column_names;
@@ -409,7 +460,7 @@ insertStatement returns [::shared_ptr<update_statement::parsed_insert> expr]
        ( K_IF K_NOT K_EXISTS { if_not_exists = true; } )?
        ( usingClause[attrs] )?
      {
-          $expr = ::make_shared<update_statement::parsed_insert>(std::move(cf),
+          $expr = ::make_shared<raw::insert_statement>(std::move(cf),
                                                   std::move(attrs),
                                                   std::move(column_names),
                                                   std::move(values),
@@ -432,7 +483,7 @@ usingClauseObjective[::shared_ptr<cql3::attributes::raw> attrs]
 * SET name1 = value1, name2 = value2
 * WHERE key = value;
 */
-updateStatement returns [::shared_ptr<update_statement::parsed_update> expr]
+updateStatement returns [::shared_ptr<raw::update_statement> expr]
    @init {
        auto attrs = ::make_shared<cql3::attributes::raw>();
        std::vector<std::pair<::shared_ptr<cql3::column_identifier::raw>, ::shared_ptr<cql3::operation::raw_update>>> operations;
@@ -443,7 +494,7 @@ updateStatement returns [::shared_ptr<update_statement::parsed_update> expr]
      K_WHERE wclause=whereClause
      ( K_IF conditions=updateConditions )?
      {
-          return ::make_shared<update_statement::parsed_update>(std::move(cf),
+          return ::make_shared<raw::update_statement>(std::move(cf),
                                                  std::move(attrs),
                                                  std::move(operations),
                                                  std::move(wclause),
@@ -462,7 +513,7 @@ updateConditions returns [conditions_type conditions]
 * WHERE KEY = keyname
   [IF (EXISTS | name = value, ...)];
 */
-deleteStatement returns [::shared_ptr<delete_statement::parsed> expr]
+deleteStatement returns [::shared_ptr<raw::delete_statement> expr]
    @init {
        auto attrs = ::make_shared<cql3::attributes::raw>();
        std::vector<::shared_ptr<cql3::operation::raw_deletion>> column_deletions;
@@ -474,7 +525,7 @@ deleteStatement returns [::shared_ptr<delete_statement::parsed> expr]
      K_WHERE wclause=whereClause
      ( K_IF ( K_EXISTS { if_exists = true; } | conditions=updateConditions ))?
      {
-          return ::make_shared<delete_statement::parsed>(cf,
+          return ::make_shared<raw::delete_statement>(cf,
                                            std::move(attrs),
                                            std::move(column_deletions),
                                            std::move(wclause),
@@ -521,11 +572,11 @@ usingClauseDelete[::shared_ptr<cql3::attributes::raw> attrs]
 *   ...
 * APPLY BATCH
 */
-batchStatement returns [shared_ptr<cql3::statements::batch_statement::parsed> expr]
+batchStatement returns [shared_ptr<cql3::statements::raw::batch_statement> expr]
    @init {
-        using btype = cql3::statements::batch_statement::type; 
+        using btype = cql3::statements::raw::batch_statement::type; 
        btype type = btype::LOGGED;
-        std::vector<shared_ptr<cql3::statements::modification_statement::parsed>> statements;
+        std::vector<shared_ptr<cql3::statements::raw::modification_statement>> statements;
        auto attrs = make_shared<cql3::attributes::raw>();
    }
    : K_BEGIN
@@ -534,11 +585,11 @@ batchStatement returns [shared_ptr<cql3::statements::batch_statement::parsed> ex
          ( s=batchStatementObjective ';'? { statements.push_back(std::move(s)); } )*
      K_APPLY K_BATCH
      {
-          $expr = ::make_shared<cql3::statements::batch_statement::parsed>(type, std::move(attrs), std::move(statements));
+          $expr = ::make_shared<cql3::statements::raw::batch_statement>(type, std::move(attrs), std::move(statements));
      }
    ;

-batchStatementObjective returns [shared_ptr<cql3::statements::modification_statement::parsed> statement]
+batchStatementObjective returns [shared_ptr<cql3::statements::raw::modification_statement> statement]
    : i=insertStatement  { $statement = i; }
    | u=updateStatement  { $statement = u; }
    | d=deleteStatement  { $statement = d; }
@@ -670,7 +721,7 @@ createTableStatement returns [shared_ptr<cql3::statements::create_table_statemen

 cfamDefinition[shared_ptr<cql3::statements::create_table_statement::raw_statement> expr]
    : '(' cfamColumns[expr] ( ',' cfamColumns[expr]? )* ')'
-      ( K_WITH cfamProperty[expr] ( K_AND cfamProperty[expr] )*)?
+      ( K_WITH cfamProperty[$expr->properties()] ( K_AND cfamProperty[$expr->properties()] )*)?
    ;

 cfamColumns[shared_ptr<cql3::statements::create_table_statement::raw_statement> expr]
@@ -686,15 +737,15 @@ pkDef[shared_ptr<cql3::statements::create_table_statement::raw_statement> expr]
    | '(' k1=ident { l.push_back(k1); } ( ',' kn=ident { l.push_back(kn); } )* ')' { $expr->add_key_aliases(l); }
    ;

-cfamProperty[shared_ptr<cql3::statements::create_table_statement::raw_statement> expr]
-    : property[expr->properties]
-    | K_COMPACT K_STORAGE { $expr->set_compact_storage(); }
+cfamProperty[cql3::statements::cf_properties& expr]
+    : property[$expr.properties()]
+    | K_COMPACT K_STORAGE { $expr.set_compact_storage(); }
    | K_CLUSTERING K_ORDER K_BY '(' cfamOrdering[expr] (',' cfamOrdering[expr])* ')'
    ;

-cfamOrdering[shared_ptr<cql3::statements::create_table_statement::raw_statement> expr]
+cfamOrdering[cql3::statements::cf_properties& expr]
    @init{ bool reversed=false; }
-    : k=ident (K_ASC | K_DESC { reversed=true;} ) { $expr->set_ordering(k, reversed); }
+    : k=ident (K_ASC | K_DESC { reversed=true;} ) { $expr.set_ordering(k, reversed); }
    ;


@@ -726,12 +777,13 @@ createIndexStatement returns [::shared_ptr<create_index_statement> expr]
        auto props = make_shared<index_prop_defs>();
        bool if_not_exists = false;
        auto name = ::make_shared<cql3::index_name>();
+        std::vector<::shared_ptr<index_target::raw>> targets;
    }
    : K_CREATE (K_CUSTOM { props->is_custom = true; })? K_INDEX (K_IF K_NOT K_EXISTS { if_not_exists = true; } )?
-        (idxName[name])? K_ON cf=columnFamilyName '(' id=indexIdent ')'
+        (idxName[name])? K_ON cf=columnFamilyName '(' (target1=indexIdent { targets.emplace_back(target1); } (',' target2=indexIdent { targets.emplace_back(target2); } )*)? ')'
        (K_USING cls=STRING_LITERAL { props->custom_class = sstring{$cls.text}; })?
        (K_WITH properties[props])?
-      { $expr = ::make_shared<create_index_statement>(cf, name, id, props, if_not_exists); }
+      { $expr = ::make_shared<create_index_statement>(cf, name, targets, props, if_not_exists); }
    ;

 indexIdent returns [::shared_ptr<index_target::raw> id]
@@ -741,6 +793,39 @@ indexIdent returns [::shared_ptr<index_target::raw> id]
    | K_FULL '(' c=cident ')'    { $id = index_target::raw::full_collection(c); }
    ;

+/**
+ * CREATE MATERIALIZED VIEW <viewName> AS
+ *  SELECT <columns>
+ *  FROM <CF>
+ *  WHERE <pkColumns> IS NOT NULL
+ *  PRIMARY KEY (<pkColumns>)
+ *  WITH <property> = <value> AND ...;
+ */
+createViewStatement returns [::shared_ptr<create_view_statement> expr]
+    @init {
+        bool if_not_exists = false;
+        std::vector<::shared_ptr<cql3::column_identifier::raw>> partition_keys;
+        std::vector<::shared_ptr<cql3::column_identifier::raw>> composite_keys;
+    }
+    : K_CREATE K_MATERIALIZED K_VIEW (K_IF K_NOT K_EXISTS { if_not_exists = true; })? cf=columnFamilyName K_AS
+        K_SELECT sclause=selectClause K_FROM basecf=columnFamilyName
+        (K_WHERE wclause=whereClause)?
+        K_PRIMARY K_KEY (
+        '(' '(' k1=cident { partition_keys.push_back(k1); } ( ',' kn=cident { partition_keys.push_back(kn); } )* ')' ( ',' c1=cident { composite_keys.push_back(c1); } )* ')'
+    |   '(' k1=cident { partition_keys.push_back(k1); } ( ',' cn=cident { composite_keys.push_back(cn); } )* ')'
+        )
+        {
+             $expr = ::make_shared<create_view_statement>(
+                std::move(cf),
+                std::move(basecf),
+                std::move(sclause),
+                std::move(wclause),
+                std::move(partition_keys),
+                std::move(composite_keys),
+                if_not_exists);
+        }
+        ( K_WITH cfamProperty[{ $expr->properties() }] ( K_AND cfamProperty[{ $expr->properties() }] )*)?
+    ;

 #if 0
 /**
@@ -764,15 +849,18 @@ dropTriggerStatement returns [DropTriggerStatement expr]
      { $expr = new DropTriggerStatement(cf, name.toString(), ifExists); }
    ;

+#endif
+
 /**
 * ALTER KEYSPACE <KS> WITH <property> = <value>;
 */
-alterKeyspaceStatement returns [AlterKeyspaceStatement expr]
-    @init { KSPropDefs attrs = new KSPropDefs(); }
+alterKeyspaceStatement returns [shared_ptr<cql3::statements::alter_keyspace_statement> expr]
+    @init {
+        auto attrs = make_shared<cql3::statements::ks_prop_defs>();
+    }
    : K_ALTER K_KEYSPACE ks=keyspaceName
-        K_WITH properties[attrs] { $expr = new AlterKeyspaceStatement(ks, attrs); }
+        K_WITH properties[attrs] { $expr = make_shared<cql3::statements::alter_keyspace_statement>(ks, attrs); }
    ;
-#endif

 /**
 * ALTER COLUMN FAMILY <CF> ALTER <column> TYPE <newtype>;
@@ -784,7 +872,7 @@ alterKeyspaceStatement returns [AlterKeyspaceStatement expr]
 alterTableStatement returns [shared_ptr<alter_table_statement> expr]
    @init {
        alter_table_statement::type type;
-        auto props = make_shared<cql3::statements::cf_prop_defs>();;
+        auto props = make_shared<cql3::statements::cf_prop_defs>();
        std::vector<std::pair<shared_ptr<cql3::column_identifier::raw>, shared_ptr<cql3::column_identifier::raw>>> renames;
        bool is_static = false;
    }
@@ -803,25 +891,38 @@ alterTableStatement returns [shared_ptr<alter_table_statement> expr]
    }
    ;

-#if 0
 /**
 * ALTER TYPE <name> ALTER <field> TYPE <newtype>;
 * ALTER TYPE <name> ADD <field> <newtype>;
 * ALTER TYPE <name> RENAME <field> TO <newtype> AND ...;
 */
-alterTypeStatement returns [AlterTypeStatement expr]
+alterTypeStatement returns [::shared_ptr<alter_type_statement> expr]
    : K_ALTER K_TYPE name=userTypeName
-          ( K_ALTER f=ident K_TYPE v=comparatorType { $expr = AlterTypeStatement.alter(name, f, v); }
-          | K_ADD   f=ident v=comparatorType        { $expr = AlterTypeStatement.addition(name, f, v); }
+          ( K_ALTER f=ident K_TYPE v=comparatorType { $expr = ::make_shared<alter_type_statement::add_or_alter>(name, false, f, v); }
+          | K_ADD   f=ident v=comparatorType        { $expr = ::make_shared<alter_type_statement::add_or_alter>(name, true, f, v); }
          | K_RENAME
-               { Map<ColumnIdentifier, ColumnIdentifier> renames = new HashMap<ColumnIdentifier, ColumnIdentifier>(); }
-                 id1=ident K_TO toId1=ident { renames.put(id1, toId1); }
-                 ( K_AND idn=ident K_TO toIdn=ident { renames.put(idn, toIdn); } )*
-               { $expr = AlterTypeStatement.renames(name, renames); }
+               { $expr = ::make_shared<alter_type_statement::renames>(name); }
+               renames[{ static_pointer_cast<alter_type_statement::renames>($expr) }]
          )
    ;
-#endif

+/**
+ * ALTER MATERIALIZED VIEW <CF> WITH <property> = <value>;
+ */
+alterViewStatement returns [::shared_ptr<alter_view_statement> expr]
+    @init {
+        auto props = make_shared<cql3::statements::cf_prop_defs>();
+    }
+    : K_ALTER K_MATERIALIZED K_VIEW cf=columnFamilyName K_WITH properties[props]
+    {
+        $expr = ::make_shared<alter_view_statement>(std::move(cf), std::move(props));
+    }
+    ;
+
+renames[::shared_ptr<alter_type_statement::renames> expr]
+    : fromId=ident K_TO toId=ident { $expr->add_rename(fromId, toId); }
+      ( K_AND renames[$expr] )?
+    ;

 /**
 * DROP KEYSPACE [IF EXISTS] <KSP>;
@@ -839,24 +940,31 @@ dropTableStatement returns [::shared_ptr<drop_table_statement> stmt]
    : K_DROP K_COLUMNFAMILY (K_IF K_EXISTS { if_exists = true; } )? cf=columnFamilyName { $stmt = ::make_shared<drop_table_statement>(cf, if_exists); }
    ;

-#if 0
 /**
 * DROP TYPE <name>;
 */
-dropTypeStatement returns [DropTypeStatement stmt]
-    @init { boolean ifExists = false; }
-    : K_DROP K_TYPE (K_IF K_EXISTS { ifExists = true; } )? name=userTypeName { $stmt = new DropTypeStatement(name, ifExists); }
+dropTypeStatement returns [::shared_ptr<drop_type_statement> stmt]
+    @init { bool if_exists = false; }
+    : K_DROP K_TYPE (K_IF K_EXISTS { if_exists = true; } )? name=userTypeName { $stmt = ::make_shared<drop_type_statement>(name, if_exists); }
+    ;
+
+/**
+ * DROP MATERIALIZED VIEW [IF EXISTS] <view_name>
+ */
+dropViewStatement returns [::shared_ptr<drop_view_statement> stmt]
+    @init { bool if_exists = false; }
+    : K_DROP K_MATERIALIZED K_VIEW (K_IF K_EXISTS { if_exists = true; } )? cf=columnFamilyName
+      { $stmt = ::make_shared<drop_view_statement>(cf, if_exists); }
    ;

 /**
 * DROP INDEX [IF EXISTS] <INDEX_NAME>
 */
-dropIndexStatement returns [DropIndexStatement expr]
-    @init { boolean ifExists = false; }
-    : K_DROP K_INDEX (K_IF K_EXISTS { ifExists = true; } )? index=indexName
-      { $expr = new DropIndexStatement(index, ifExists); }
+dropIndexStatement returns [::shared_ptr<drop_index_statement> expr]
+    @init { bool if_exists = false; }
+    : K_DROP K_INDEX (K_IF K_EXISTS { if_exists = true; } )? index=indexName
+      { $expr = ::make_shared<drop_index_statement>(index, if_exists); }
    ;
-#endif

 /**
  * TRUNCATE <CF>;
@@ -865,120 +973,118 @@ truncateStatement returns [::shared_ptr<truncate_statement> stmt]
    : K_TRUNCATE (K_COLUMNFAMILY)? cf=columnFamilyName { $stmt = ::make_shared<truncate_statement>(cf); }
    ;

-#if 0
 /**
 * GRANT <permission> ON <resource> TO <username>
 */
-grantStatement returns [GrantStatement stmt]
+grantStatement returns [::shared_ptr<grant_statement> stmt]
    : K_GRANT
          permissionOrAll
      K_ON
          resource
      K_TO
          username
-      { $stmt = new GrantStatement($permissionOrAll.perms, $resource.res, $username.text); }
+      { $stmt = ::make_shared<grant_statement>($permissionOrAll.perms, $resource.res, $username.text); } 
    ;

 /**
 * REVOKE <permission> ON <resource> FROM <username>
 */
-revokeStatement returns [RevokeStatement stmt]
+revokeStatement returns [::shared_ptr<revoke_statement> stmt]
    : K_REVOKE
          permissionOrAll
      K_ON
          resource
      K_FROM
          username
-      { $stmt = new RevokeStatement($permissionOrAll.perms, $resource.res, $username.text); }
+      { $stmt = ::make_shared<revoke_statement>($permissionOrAll.perms, $resource.res, $username.text); } 
    ;

-listPermissionsStatement returns [ListPermissionsStatement stmt]
+listPermissionsStatement returns [::shared_ptr<list_permissions_statement> stmt]
    @init {
-        IResource resource = null;
-        String username = null;
-        boolean recursive = true;
+		std::experimental::optional<auth::data_resource> r;
+		std::experimental::optional<sstring> u;
+		bool recursive = true;
    }
    : K_LIST
          permissionOrAll
-      ( K_ON resource { resource = $resource.res; } )?
-      ( K_OF username { username = $username.text; } )?
+      ( K_ON resource { r = $resource.res; } )?
+      ( K_OF username { u = sstring($username.text); } )?
      ( K_NORECURSIVE { recursive = false; } )?
-      { $stmt = new ListPermissionsStatement($permissionOrAll.perms, resource, username, recursive); }
+      { $stmt = ::make_shared<list_permissions_statement>($permissionOrAll.perms, std::move(r), std::move(u), recursive); } 
    ;

-permission returns [Permission perm]
+permission returns [auth::permission perm]
    : p=(K_CREATE | K_ALTER | K_DROP | K_SELECT | K_MODIFY | K_AUTHORIZE)
-    { $perm = Permission.valueOf($p.text.toUpperCase()); }
+    { $perm = auth::permissions::from_string($p.text); }
    ;

-permissionOrAll returns [Set<Permission> perms]
-    : K_ALL ( K_PERMISSIONS )?       { $perms = Permission.ALL_DATA; }
-    | p=permission ( K_PERMISSION )? { $perms = EnumSet.of($p.perm); }
+permissionOrAll returns [auth::permission_set perms]
+    : K_ALL ( K_PERMISSIONS )?       { $perms = auth::permissions::ALL_DATA; }
+    | p=permission ( K_PERMISSION )? { $perms = auth::permission_set::from_mask(auth::permission_set::mask_for($p.perm)); }
    ;

-resource returns [IResource res]
+resource returns [auth::data_resource res]
    : r=dataResource { $res = $r.res; }
    ;

-dataResource returns [DataResource res]
-    : K_ALL K_KEYSPACES { $res = DataResource.root(); }
-    | K_KEYSPACE ks = keyspaceName { $res = DataResource.keyspace($ks.id); }
+dataResource returns [auth::data_resource res]
+    : K_ALL K_KEYSPACES { $res = auth::data_resource(); }
+    | K_KEYSPACE ks = keyspaceName { $res = auth::data_resource($ks.id); }
    | ( K_COLUMNFAMILY )? cf = columnFamilyName
-      { $res = DataResource.columnFamily($cf.name.getKeyspace(), $cf.name.getColumnFamily()); }
+      { $res = auth::data_resource($cf.name->get_keyspace(), $cf.name->get_column_family()); }
    ;

 /**
 * CREATE USER [IF NOT EXISTS] <username> [WITH PASSWORD <password>] [SUPERUSER|NOSUPERUSER]
 */
-createUserStatement returns [CreateUserStatement stmt]
+createUserStatement returns [::shared_ptr<create_user_statement> stmt]
    @init {
-        UserOptions opts = new UserOptions();
-        boolean superuser = false;
-        boolean ifNotExists = false;
+    	auto opts = ::make_shared<cql3::user_options>();
+        bool superuser = false;
+        bool ifNotExists = false;
    }
    : K_CREATE K_USER (K_IF K_NOT K_EXISTS { ifNotExists = true; })? username
      ( K_WITH userOptions[opts] )?
      ( K_SUPERUSER { superuser = true; } | K_NOSUPERUSER { superuser = false; } )?
-      { $stmt = new CreateUserStatement($username.text, opts, superuser, ifNotExists); }
+      { $stmt = ::make_shared<create_user_statement>($username.text, std::move(opts), superuser, ifNotExists); }
    ;

 /**
 * ALTER USER <username> [WITH PASSWORD <password>] [SUPERUSER|NOSUPERUSER]
 */
-alterUserStatement returns [AlterUserStatement stmt]
+alterUserStatement returns [::shared_ptr<alter_user_statement> stmt]
    @init {
-        UserOptions opts = new UserOptions();
-        Boolean superuser = null;
+    	auto opts = ::make_shared<cql3::user_options>();
+    	std::experimental::optional<bool> superuser;
    }
    : K_ALTER K_USER username
      ( K_WITH userOptions[opts] )?
      ( K_SUPERUSER { superuser = true; } | K_NOSUPERUSER { superuser = false; } )?
-      { $stmt = new AlterUserStatement($username.text, opts, superuser); }
+      { $stmt = ::make_shared<alter_user_statement>($username.text, std::move(opts), std::move(superuser)); }
    ;

 /**
 * DROP USER [IF EXISTS] <username>
 */
-dropUserStatement returns [DropUserStatement stmt]
-    @init { boolean ifExists = false; }
-    : K_DROP K_USER (K_IF K_EXISTS { ifExists = true; })? username { $stmt = new DropUserStatement($username.text, ifExists); }
+dropUserStatement returns [::shared_ptr<drop_user_statement> stmt]
+    @init { bool ifExists = false; }
+    : K_DROP K_USER (K_IF K_EXISTS { ifExists = true; })? username { $stmt = ::make_shared<drop_user_statement>($username.text, ifExists); }
    ;

 /**
 * LIST USERS
 */
-listUsersStatement returns [ListUsersStatement stmt]
-    : K_LIST K_USERS { $stmt = new ListUsersStatement(); }
+listUsersStatement returns [::shared_ptr<list_users_statement> stmt]
+    : K_LIST K_USERS { $stmt = ::make_shared<list_users_statement>(); }
    ;

-userOptions[UserOptions opts]
+userOptions[::shared_ptr<cql3::user_options> opts]
    : userOption[opts]
    ;

-userOption[UserOptions opts]
-    : k=K_PASSWORD v=STRING_LITERAL { opts.put($k.text, $v.text); }
+userOption[::shared_ptr<cql3::user_options> opts]
+    : k=K_PASSWORD v=STRING_LITERAL { opts->put($k.text, $v.text); }
    ;
-#endif

 /** DEFINITIONS **/

@@ -1157,7 +1263,8 @@ columnOperation[operations_type& operations]

 columnOperationDifferentiator[operations_type& operations, ::shared_ptr<cql3::column_identifier::raw> key]
    : '=' normalColumnOperation[operations, key]
-    | '[' k=term ']' specializedColumnOperation[operations, key, k]
+    | '[' k=term ']' specializedColumnOperation[operations, key, k, false]
+    | '[' K_SCYLLA_TIMEUUID_LIST_INDEX '(' k=term ')' ']' specializedColumnOperation[operations, key, k, true]
    ;

 normalColumnOperation[operations_type& operations, ::shared_ptr<cql3::column_identifier::raw> key]
@@ -1194,16 +1301,21 @@ normalColumnOperation[operations_type& operations, ::shared_ptr<cql3::column_ide
          }
          add_raw_update(operations, key, make_shared<cql3::operation::addition>(cql3::constants::literal::integer($i.text)));
      }
+    | K_SCYLLA_COUNTER_SHARD_LIST '(' t=term ')'
+      {
+          add_raw_update(operations, key, ::make_shared<cql3::operation::set_counter_value_from_tuple_list>(t));      
+      }
    ;

 specializedColumnOperation[std::vector<std::pair<shared_ptr<cql3::column_identifier::raw>,
                                                 shared_ptr<cql3::operation::raw_update>>>& operations,
                           shared_ptr<cql3::column_identifier::raw> key,
-                           shared_ptr<cql3::term::raw> k]
+                           shared_ptr<cql3::term::raw> k,
+                           bool by_uuid]

    : '=' t=term
      {
-          add_raw_update(operations, key, make_shared<cql3::operation::set_element>(k, t));
+          add_raw_update(operations, key, make_shared<cql3::operation::set_element>(k, t, by_uuid));
      }
    ;

@@ -1254,7 +1366,8 @@ relation[std::vector<cql3::relation_ptr>& clauses]

    | K_TOKEN l=tupleOfIdentifiers type=relationType t=term
        { $clauses.emplace_back(::make_shared<cql3::token_relation>(std::move(l), *type, std::move(t))); }
-
+    | name=cident K_IS K_NOT K_NULL {
+          $clauses.emplace_back(make_shared<cql3::single_column_relation>(std::move(name), cql3::operator_type::IS_NOT, cql3::constants::NULL_LITERAL)); }
    | name=cident K_IN marker=inMarker
        { $clauses.emplace_back(make_shared<cql3::single_column_relation>(std::move(name), cql3::operator_type::IN, std::move(marker))); }
    | name=cident K_IN in_values=singleColumnInValues
@@ -1354,12 +1467,16 @@ native_type returns [shared_ptr<cql3_type> t]
    | K_FLOAT     { $t = cql3_type::float_; }
    | K_INET      { $t = cql3_type::inet; }
    | K_INT       { $t = cql3_type::int_; }
+    | K_SMALLINT  { $t = cql3_type::smallint; }
    | K_TEXT      { $t = cql3_type::text; }
    | K_TIMESTAMP { $t = cql3_type::timestamp; }
+    | K_TINYINT   { $t = cql3_type::tinyint; }
    | K_UUID      { $t = cql3_type::uuid; }
    | K_VARCHAR   { $t = cql3_type::varchar; }
    | K_VARINT    { $t = cql3_type::varint; }
    | K_TIMEUUID  { $t = cql3_type::timeuuid; }
+    | K_DATE      { $t = cql3_type::date; }
+    | K_TIME      { $t = cql3_type::time; }
    ;

 collection_type returns [shared_ptr<cql3::cql3_type::raw> pt]
@@ -1383,12 +1500,10 @@ tuple_type returns [shared_ptr<cql3::cql3_type::raw> t]
      '>' { $t = cql3::cql3_type::raw::tuple(std::move(types)); }
    ;

-#if 0
 username
    : IDENT
    | STRING_LITERAL
    ;
-#endif

 // Basically the same as cident, but we need to exlude existing CQL3 types
 // (which for some reason are not reserved otherwise)
@@ -1435,6 +1550,8 @@ basic_unreserved_keyword returns [sstring str]
        | K_DISTINCT
        | K_CONTAINS
        | K_STATIC
+        | K_FROZEN
+        | K_TUPLE
        | K_FUNCTION
        | K_AGGREGATE
        | K_SFUNC
@@ -1480,6 +1597,8 @@ K_KEYSPACE:    ( K E Y S P A C E
 K_KEYSPACES:   K E Y S P A C E S;
 K_COLUMNFAMILY:( C O L U M N F A M I L Y
                 | T A B L E );
+K_MATERIALIZED:M A T E R I A L I Z E D;
+K_VIEW:        V I E W;
 K_INDEX:       I N D E X;
 K_CUSTOM:      C U S T O M;
 K_ON:          O N;
@@ -1503,6 +1622,7 @@ K_DESC:        D E S C;
 K_ALLOW:       A L L O W;
 K_FILTERING:   F I L T E R I N G;
 K_IF:          I F;
+K_IS:          I S;
 K_CONTAINS:    C O N T A I N S;

 K_GRANT:       G R A N T;
@@ -1532,6 +1652,8 @@ K_DOUBLE:      D O U B L E;
 K_FLOAT:       F L O A T;
 K_INET:        I N E T;
 K_INT:         I N T;
+K_SMALLINT:    S M A L L I N T;
+K_TINYINT:     T I N Y I N T;
 K_TEXT:        T E X T;
 K_UUID:        U U I D;
 K_VARCHAR:     V A R C H A R;
@@ -1539,6 +1661,8 @@ K_VARINT:      V A R I N T;
 K_TIMEUUID:    T I M E U U I D;
 K_TOKEN:       T O K E N;
 K_WRITETIME:   W R I T E T I M E;
+K_DATE:        D A T E;
+K_TIME:        T I M E;

 K_NULL:        N U L L;
 K_NOT:         N O T;
@@ -1567,6 +1691,9 @@ K_OR:          O R;
 K_REPLACE:     R E P L A C E;
 K_DETERMINISTIC: D E T E R M I N I S T I C;

+K_SCYLLA_TIMEUUID_LIST_INDEX: S C Y L L A '_' T I M E U U I D '_' L I S T '_' I N D E X;
+K_SCYLLA_COUNTER_SHARD_LIST: S C Y L L A '_' C O U N T E R '_' S H A R D '_' L I S T; 
+
 // Case-insensitive alpha characters
 fragment A: ('a'|'A');
 fragment B: ('b'|'B');
@@ -1612,20 +1739,17 @@ STRING_LITERAL
        setText(txt);
    }
    :
-// FIXME:
-#if 0
      /* pg-style string literal */
      (
-        '\$' '\$'
-        ( /* collect all input until '$$' is reached again */
-          {  (input.size() - input.index() > 1)
-               && !"$$".equals(input.substring(input.index(), input.index() + 1)) }?
-             => c=. { txt.appendCodePoint(c); }
+        '$' '$'
+        (
+          (c=~('$') { txt.push_back(c); })
+          |
+          ('$' (c=~('$') { txt.push_back('$'); txt.push_back(c); }))
        )*
-        '\$' '\$'
+        '$' '$'
      )
      |
-#endif
      /* conventional quoted string literal */
      (
        '\'' (c=~('\'') { txt.push_back(c);} | '\'' '\'' { txt.push_back('\''); })* '\''
--- a/cql3/abstract_marker.cc
+++ b/cql3/abstract_marker.cc
@@ -17,9 +17,9 @@
 */

 /*
- * Copyright 2015 Cloudius Systems
+ * Copyright (C) 2015 ScyllaDB
 *
- * Modified by Cloudius Systems
+ * Modified by ScyllaDB
 */

 /*
--- a/cql3/abstract_marker.hh
+++ b/cql3/abstract_marker.hh
@@ -17,9 +17,9 @@
 */

 /*
- * Copyright 2015 Cloudius Systems
+ * Copyright (C) 2015 ScyllaDB
 *
- * Modified by Cloudius Systems
+ * Modified by ScyllaDB
 */

 /*
--- a/cql3/assignment_testable.hh
+++ b/cql3/assignment_testable.hh
@@ -17,9 +17,9 @@
 */

 /*
- * Copyright 2014 Cloudius Systems
+ * Copyright (C) 2014 ScyllaDB
 *
- * Modified by Cloudius Systems
+ * Modified by ScyllaDB
 */

 /*
--- a/cql3/attributes.cc
+++ b/cql3/attributes.cc
@@ -17,9 +17,9 @@
 */

 /*
- * Copyright 2015 Cloudius Systems
+ * Copyright (C) 2015 ScyllaDB
 *
- * Modified by Cloudius Systems
+ * Modified by ScyllaDB
 */

 /*
@@ -71,10 +71,12 @@ int64_t attributes::get_timestamp(int64_t now, const query_options& options) {
    }

    auto tval = _timestamp->bind_and_get(options);
-    if (!tval) {
+    if (tval.is_null()) {
        throw exceptions::invalid_request_exception("Invalid null value of timestamp");
    }
-
+    if (tval.is_unset_value()) {
+        return now;
+    }
    try {
        data_type_for<int64_t>()->validate(*tval);
    } catch (marshal_exception e) {
@@ -88,10 +90,12 @@ int32_t attributes::get_time_to_live(const query_options& options) {
        return 0;

    auto tval = _time_to_live->bind_and_get(options);
-    if (!tval) {
+    if (tval.is_null()) {
        throw exceptions::invalid_request_exception("Invalid null value of TTL");
    }
-
+    if (tval.is_unset_value()) {
+        return 0;
+    }
    try {
        data_type_for<int32_t>()->validate(*tval);
    }
--- a/cql3/attributes.hh
+++ b/cql3/attributes.hh
@@ -17,9 +17,9 @@
 */

 /*
- * Copyright 2015 Cloudius Systems
+ * Copyright (C) 2015 ScyllaDB
 *
- * Modified by Cloudius Systems
+ * Modified by ScyllaDB
 */

 /*
--- a/cql3/cf_name.cc
+++ b/cql3/cf_name.cc
@@ -17,9 +17,9 @@
 */

 /*
- * Copyright 2015 Cloudius Systems
+ * Copyright (C) 2015 ScyllaDB
 *
- * Modified by Cloudius Systems
+ * Modified by ScyllaDB
 */

 /*
--- a/Show More
+++ b/Show More