release: prepare for 2.2.2 by hagitsegev

Update seastar submodule
* seastar 6f61d74...88cb58c (2): > reactor: disable nowait aio due to a kernel bug > configure.py: Enhance detection for gcc -fvisibility=hidden bug Fixes #3996.
2019-01-12 18:28:25 +02:00 · 2018-12-17 15:57:58 +02:00 · 2018-12-02 13:32:59 +02:00 · 2018-11-21 12:18:25 +02:00 · 2018-10-21 19:04:08 +03:00 · 2018-10-21 18:21:18 +03:00
641 changed files with 51645 additions and 24656 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -9,3 +9,12 @@ dist/ami/files/*.rpm
 dist/ami/variables.json
 dist/ami/scylla_deploy.sh
 *.pyc
+Cql.tokens
+.kdev4
+*.kdev4
+CMakeLists.txt.user
+.cache
+.tox
+*.egg-info
+__pycache__CMakeLists.txt.user
+.gdbinit
--- a/.gitmodules
+++ b/.gitmodules
@@ -9,3 +9,6 @@
 [submodule "dist/ami/files/scylla-ami"]
 	path = dist/ami/files/scylla-ami
 	url = ../scylla-ami
+[submodule "xxHash"]
+	path = xxHash
+	url = ../xxHash
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -5,8 +5,8 @@
 cmake_minimum_required(VERSION 3.7)
 project(scylla)

-if (NOT DEFINED ENV{CLION_IDE})
-    message(FATAL_ERROR "This CMakeLists.txt file is only valid for use in CLion")
+if (NOT DEFINED FOR_IDE AND NOT DEFINED ENV{FOR_IDE} AND NOT DEFINED ENV{CLION_IDE})
+    message(FATAL_ERROR "This CMakeLists.txt file is only valid for use in IDEs, please define FOR_IDE to acknowledge this.")
 endif()

 # Default value. A more accurate list is populated through `pkg-config` below if `seastar.pc` is available.
@@ -125,7 +125,7 @@ list(REMOVE_ITEM SEASTAR_CFLAGS "-DHAVE_GCC6_CONCEPTS")
 #
 # For ease of browsing the source code, we always pretend that DPDK is enabled.
 target_compile_options(scylla PUBLIC
-        -std=gnu++14
+        -std=gnu++1z
        -DHAVE_DPDK
        -DHAVE_HWLOC
        "${SEASTAR_CFLAGS}")
@@ -137,4 +137,5 @@ target_include_directories(scylla PUBLIC
        ${SEASTAR_DPDK_INCLUDE_DIRS}
        ${SEASTAR_INCLUDE_DIRS}
        ${Boost_INCLUDE_DIRS}
+        xxhash
        build/release/gen)
--- a/HACKING.md
+++ b/HACKING.md
@@ -0,0 +1,279 @@
+# Guidelines for developing Scylla
+
+This document is intended to help developers and contributors to Scylla get started. The first part consists of general guidelines that make no assumptions about a development environment or tooling. The second part describes a particular environment and work-flow for exemplary purposes.
+
+## Overview
+
+This section covers some high-level information about the Scylla source code and work-flow.
+
+### Getting the source code
+
+Scylla uses [Git submodules](https://git-scm.com/book/en/v2/Git-Tools-Submodules) to manage its dependency on Seastar and other tools. Be sure that all submodules are correctly initialized when cloning the project:
+
+```bash
+$ git clone https://github.com/scylladb/scylla
+$ cd scylla
+$ git submodule update --init --recursive
+```
+
+### Dependencies
+
+Scylla depends on the system package manager for its development dependencies.
+
+Running `./install_dependencies.sh` (as root) installs the appropriate packages based on your Linux distribution.
+
+### Build system
+
+**Note**: Compiling Scylla requires, conservatively, 2 GB of memory per native thread, and up to 3 GB per native thread while linking.
+
+Scylla is built with [Ninja](https://ninja-build.org/), a low-level rule-based system. A Python script, `configure.py`, generates a Ninja file (`build.ninja`) based on configuration options.
+
+To build for the first time:
+
+```bash
+$ ./configure.py
+$ ninja-build
+```
+
+Afterwards, it is sufficient to just execute Ninja.
+
+The full suite of options for project configuration is available via
+
+```bash
+$ ./configure.py --help
+```
+
+The most important options are:
+
+- `--mode={release,debug,all}`: Debug mode enables [AddressSanitizer](https://github.com/google/sanitizers/wiki/AddressSanitizer) and allows for debugging with tools like GDB. Debugging builds are generally slower and generate much larger object files than release builds.
+
+- `--{enable,disable}-dpdk`: [DPDK](http://dpdk.org/) is a set of libraries and drivers for fast packet processing. During development, it's not necessary to enable support even if it is supported by your platform.
+
+Source files and build targets are tracked manually in `configure.py`, so the script needs to be updated when new files or targets are added or removed.
+
+To save time -- for instance, to avoid compiling all unit tests -- you can also specify specific targets to Ninja. For example,
+
+```bash
+$ ninja-build build/release/tests/schema_change_test
+```
+
+### Unit testing
+
+Unit tests live in the `/tests` directory. Like with application source files, test sources and executables are specified manually in `configure.py` and need to be updated when changes are made.
+
+A test target can be any executable. A non-zero return code indicates test failure.
+
+Most tests in the Scylla repository are built using the [Boost.Test](http://www.boost.org/doc/libs/1_64_0/libs/test/doc/html/index.html) library. Utilities for writing tests with Seastar futures are also included.
+
+Run all tests through the test execution wrapper with
+
+```bash
+$ ./test.py --mode={debug,release}
+```
+
+The `--name` argument can be specified to run a particular test.
+
+Alternatively, you can execute the test executable directly. For example,
+
+```bash
+$ build/release/tests/row_cache_test -- -c1 -m1G
+```
+
+The `-c1 -m1G` arguments limit this Seastar-based test to a single system thread and 1 GB of memory.
+
+### Preparing patches
+
+All changes to Scylla are submitted as patches to the public mailing list. Once a patch is approved by one of the maintainers of the project, it is committed to the maintainers' copy of the repository at https://github.com/scylladb/scylla.
+
+Detailed instructions for formatting patches for the mailing list and advice on preparing good patches are available at the [ScyllaDB website](http://docs.scylladb.com/contribute/). There are also some guidelines that can help you make the patch review process smoother:
+
+1. Before generating patches, make sure your Git configuration points to `.gitorderfile`. You can do it by running
+
+```bash
+$ git config diff.orderfile .gitorderfile
+```
+
+2. If you are sending more than a single patch, push your changes into a new branch of your fork of Scylla on GitHub and add a URL pointing to this branch to your cover letter.
+
+3. If you are sending a new revision of an earlier patchset, add a brief summary of changes in this version, for example:
+```
+In v3:
+    - declared move constructor and move assignment operator as noexcept
+    - used std::variant instead of a union
+    ...
+```
+
+4. Add information about the tests run with this fix. It can look like
+```
+"Tests: unit ({mode}), dtest ({smp})"
+```
+
+The usual is "Tests: unit (release)", although running debug tests is encouraged.
+
+5. When answering review comments, prefer inline quotes as they make it easier to track the conversation across multiple e-mails.
+
+### Finding a person to review and merge your patches
+
+You can use the `scripts/find-maintainer` script to find a subsystem maintainer and/or reviewer for your patches. The script accepts a filename in the git source tree as an argument and outputs a list of subsystems the file belongs to and their respective maintainers and reviewers. For example, if you changed the `cql3/statements/create_view_statement.hh` file, run the script as follows:
+
+```bash
+$ ./scripts/find-maintainer cql3/statements/create_view_statement.hh
+```
+
+and you will get output like this:
+
+```
+CQL QUERY LANGUAGE
+  Tomasz Grabiec <tgrabiec@scylladb.com>   [maintainer]
+  Pekka Enberg <penberg@scylladb.com>      [maintainer]
+MATERIALIZED VIEWS
+  Pekka Enberg <penberg@scylladb.com>      [maintainer]
+  Duarte Nunes <duarte@scylladb.com>       [maintainer]
+  Nadav Har'El <nyh@scylladb.com>          [reviewer]
+  Duarte Nunes <duarte@scylladb.com>       [reviewer]
+```
+
+### Running Scylla
+
+Once Scylla has been compiled, executing the (`debug` or `release`) target will start a running instance in the foreground:
+
+```bash
+$ build/release/scylla
+```
+
+The `scylla` executable requires a configuration file, `scylla.yaml`. By default, this is read from `$SCYLLA_HOME/conf/scylla.yaml`. A good starting point for development is located in the repository at `/conf/scylla.yaml`.
+
+For development, a directory at `$HOME/scylla` can be used for all Scylla-related files:
+
+```bash
+$ mkdir -p $HOME/scylla $HOME/scylla/conf
+$ cp conf/scylla.yaml $HOME/scylla/conf/scylla.yaml
+$ # Edit configuration options as appropriate
+$ SCYLLA_HOME=$HOME/scylla build/release/scylla
+```
+
+The `scylla.yaml` file in the repository by default writes all database data to `/var/lib/scylla`, which likely requires root access. Change the `data_file_directories` and `commitlog_directory` fields as appropriate.
+
+Scylla has a number of requirements for the file-system and operating system to operate ideally and at peak performance. However, during development, these requirements can be relaxed with the `--developer-mode` flag.
+
+Additionally, when running on under-powered platforms like portable laptops, the `--overprovisined` flag is useful.
+
+On a development machine, one might run Scylla as
+
+```bash
+$ SCYLLA_HOME=$HOME/scylla build/release/scylla --overprovisioned --developer-mode=yes
+```
+
+### Branches and tags
+
+Multiple release branches are maintained on the Git repository at https://github.com/scylladb/scylla. Release 1.5, for instance, is tracked on the `branch-1.5` branch.
+
+Similarly, tags are used to pin-point precise release versions, including hot-fix versions like 1.5.4. These are named `scylla-1.5.4`, for example.
+
+Most development happens on the `master` branch. Release branches are cut from `master` based on time and/or features. When a patch against `master` fixes a serious issue like a node crash or data loss, it is backported to a particular release branch with `git cherry-pick` by the project maintainers.
+
+## Example: development on Fedora 25
+
+This section describes one possible work-flow for developing Scylla on a Fedora 25 system. It is presented as an example to help you to develop a work-flow and tools that you are comfortable with.
+
+### Preface
+
+This guide will be written from the perspective of a fictitious developer, Taylor Smith.
+
+### Git work-flow
+
+Having two Git remotes is useful:
+
+- A public clone of Seastar (`"public"`)
+- A private clone of Seastar (`"private"`) for in-progress work or work that is not yet ready to share
+
+The first step to contributing a change to Scylla is to create a local branch dedicated to it. For example, a feature that fixes a bug in the CQL statement for creating tables could be called `ts/cql_create_table_error/v1`. The branch name is prefaced by the developer's initials and has a suffix indicating that this is the first version. The version suffix is useful when branches are shared publicly and changes are requested on the mailing list. Having a branch for each version of the patch (or patch set) shared publicly makes it easier to reference and compare the history of a change.
+
+Setting the upstream branch of your development branch to `master` is a useful way to track your changes. You can do this with
+
+```bash
+$ git branch -u master ts/cql_create_table_error/v1
+```
+
+As a patch set is developed, you can periodically push the branch to the private remote to back-up work.
+
+Once the patch set is ready to be reviewed, push the branch to the public remote and prepare an email to the `scylladb-dev` mailing list. Including a link to the branch on your public remote allows for reviewers to quickly test and explore your changes.
+
+### Development environment and source code navigation
+
+Scylla includes a [CMake](https://cmake.org/) file, `CMakeLists.txt`, for use only with development environments (not for building) so that they can properly analyze the source code.
+
+[CLion](https://www.jetbrains.com/clion/) is a commercial IDE offers reasonably good source code navigation and advice for code hygiene, though its C++ parser sometimes makes errors and flags false issues.
+
+Other good options that directly parse CMake files are [KDevelop](https://www.kdevelop.org/) and [QtCreator](https://wiki.qt.io/Qt_Creator).
+
+To use the `CMakeLists.txt` file with these programs, define the `FOR_IDE` CMake variable or shell environmental variable.
+
+[Eclipse](https://eclipse.org/cdt/) is another open-source option. It doesn't natively work with CMake projects, and its C++ parser has many similar issues as CLion.
+
+### Distributed compilation: `distcc` and `ccache`
+
+Scylla's compilations times can be long. Two tools help somewhat:
+
+- [ccache](https://ccache.samba.org/) caches compiled object files on disk and re-uses them when possible
+- [distcc](https://github.com/distcc/distcc) distributes compilation jobs to remote machines
+
+A reasonably-powered laptop acts as the coordinator for compilation. A second, more powerful, machine acts as a passive compilation server.
+
+Having a direct wired connection between the machines ensures that object files can be transmitted quickly and limits the overhead of remote compilation.
+The coordinator has been assigned the static IP address `10.0.0.1` and the passive compilation machine has been assigned `10.0.0.2`.
+
+On Fedora, installing the `ccache` package places symbolic links for `gcc` and `g++` in the `PATH`. This allows normal compilation to transparently invoke `ccache` for compilation and cache object files on the local file-system.
+
+Next, set `CCACHE_PREFIX` so that `ccache` is responsible for invoking `distcc` as necessary:
+
+```bash
+export CCACHE_PREFIX="distcc"
+```
+
+On each host, edit `/etc/sysconfig/distccd` to include the allowed coordinators and the total number of jobs that the machine should accept.
+This example is for the laptop, which has 2 physical cores (4 logical cores with hyper-threading):
+
+```
+OPTIONS="--allow 10.0.0.2 --allow 127.0.0.1 --jobs 4"
+```
+
+`10.0.0.2` has 8 physical cores (16 logical cores) and 64 GB of memory.
+
+As a rule-of-thumb, the number of jobs that a machine should be specified to support should be equal to the number of its native threads.
+
+Restart the `distccd` service on all machines.
+
+On the coordinator machine, edit `$HOME/.distcc/hosts` with the available hosts for compilation. Order of the hosts indicates preference.
+
+```
+10.0.0.2/16 localhost/2
+```
+
+In this example, `10.0.0.2` will be sent up to 16 jobs and the local machine will be sent up to 2. Allowing for two extra threads on the host machine for coordination, we run compilation with `16 + 2 + 2 = 20` jobs in total: `ninja-build -j20`.
+
+When a compilation is in progress, the status of jobs on all remote machines can be visualized in the terminal with `distccmon-text` or graphically as a GTK application with `distccmon-gnome`.
+
+One thing to keep in mind is that linking object files happens on the coordinating machine, which can be a bottleneck. See the next section speeding up this process.
+
+### Using the `gold` linker
+
+Linking Scylla can be slow. The gold linker can replace GNU ld and often speeds the linking process. On Fedora, you can switch the system linker using
+
+```bash
+$ sudo alternatives --config ld
+```
+
+### Testing changes in Seastar with Scylla
+
+Sometimes Scylla development is closely tied with a feature being developed in Seastar. It can be useful to compile Scylla with a particular check-out of Seastar.
+
+One way to do this it to create a local remote for the Seastar submodule in the Scylla repository:
+
+```bash
+$ cd $HOME/src/scylla
+$ cd seastar
+$ git remote add local /home/tsmith/src/seastar
+$ git remote update
+$ git checkout -t local/my_local_seastar_branch
+```
--- a/131
+++ b/131
@@ -0,0 +1,131 @@
+M: Maintainer with commit access
+R: Reviewer with subsystem expertise
+F: Filename, directory, or pattern for the subsystem
+
+---
+
+AUTH
+M: Paweł Dziepak <pdziepak@scylladb.com>
+M: Duarte Nunes <duarte@scylladb.com>
+R: Calle Wilund <calle@scylladb.com>
+R: Vlad Zolotarov <vladz@scylladb.com>
+R: Jesse Haber-Kucharsky <jhaberku@scylladb.com>
+F: auth/*
+
+CACHE
+M: Tomasz Grabiec <tgrabiec@scylladb.com>
+M: Paweł Dziepak <pdziepak@scylladb.com>
+R: Piotr Jastrzebski <piotr@scylladb.com>
+F: row_cache*
+F: *mutation*
+F: tests/mvcc*
+
+COMMITLOG / BATCHLOGa
+M: Paweł Dziepak <pdziepak@scylladb.com>
+M: Duarte Nunes <duarte@scylladb.com>
+R: Calle Wilund <calle@scylladb.com>
+F: db/commitlog/*
+F: db/batch*
+
+COORDINATOR
+M: Paweł Dziepak <pdziepak@scylladb.com>
+M: Duarte Nunes <duarte@scylladb.com>
+R: Gleb Natapov <gleb@scylladb.com>
+F: service/storage_proxy*
+
+COMPACTION
+R: Raphael S. Carvalho <raphaelsc@scylladb.com>
+R: Glauber Costa <glauber@scylladb.com>
+R: Nadav Har'El <nyh@scylladb.com>
+F: sstables/compaction*
+
+CQL TRANSPORT LAYER
+M: Pekka Enberg <penberg@scylladb.com>
+F: transport/*
+
+CQL QUERY LANGUAGE
+M: Tomasz Grabiec <tgrabiec@scylladb.com>
+M: Pekka Enberg <penberg@scylladb.com>
+F: cql3/*
+
+COUNTERS
+M: Paweł Dziepak <pdziepak@scylladb.com>
+F: counters*
+F: tests/counter_test*
+
+GOSSIP
+M: Duarte Nunes <duarte@scylladb.com>
+M: Tomasz Grabiec <tgrabiec@scylladb.com>
+R: Asias He <asias@scylladb.com>
+F: gms/*
+
+DOCKER
+M: Pekka Enberg <penberg@scylladb.com>
+F: dist/docker/*
+
+LSA
+M: Tomasz Grabiec <tgrabiec@scylladb.com>
+M: Paweł Dziepak <pdziepak@scylladb.com>
+F: utils/logalloc*
+
+MATERIALIZED VIEWS
+M: Duarte Nunes <duarte@scylladb.com>
+M: Pekka Enberg <penberg@scylladb.com>
+R: Nadav Har'El <nyh@scylladb.com>
+R: Duarte Nunes <duarte@scylladb.com>
+F: db/view/*
+F: cql3/statements/*view*
+
+PACKAGING
+R: Takuya ASADA <syuu@scylladb.com>
+F: dist/*
+
+REPAIR
+M: Tomasz Grabiec <tgrabiec@scylladb.com>
+M: Duarte Nunes <duarte@scylladb.com>
+R: Asias He <asias@scylladb.com>
+R: Nadav Har'El <nyh@scylladb.com>
+F: repair/*
+
+SCHEMA MANAGEMENT
+M: Tomasz Grabiec <tgrabiec@scylladb.com>
+M: Duarte Nunes <duarte@scylladb.com>
+M: Pekka Enberg <penberg@scylladb.com>
+F: db/schema_tables*
+F: db/legacy_schema_migrator*
+F: service/migration*
+F: schema*
+
+SECONDARY INDEXES
+M: Pekka Enberg <penberg@scylladb.com>
+M: Duarte Nunes <duarte@scylladb.com>
+R: Nadav Har'El <nyh@scylladb.com>
+R: Pekka Enberg <penberg@scylladb.com>
+F: db/index/*
+F: cql3/statements/*index*
+
+SSTABLES
+M: Tomasz Grabiec <tgrabiec@scylladb.com>
+M: Duarte Nunes <duarte@scylladb.com>
+R: Raphael S. Carvalho <raphaelsc@scylladb.com>
+R: Glauber Costa <glauber@scylladb.com>
+R: Nadav Har'El <nyh@scylladb.com>
+F: sstables/*
+
+STREAMING
+M: Tomasz Grabiec <tgrabiec@scylladb.com>
+M: Duarte Nunes <duarte@scylladb.com>
+R: Asias He <asias@scylladb.com>
+F: streaming/*
+F: service/storage_service.*
+
+THRIFT TRANSPORT LAYER
+M: Duarte Nunes <duarte@scylladb.com>
+F: thrift/*
+
+THE REST
+M: Avi Kivity <avi@scylladb.com>
+M: Paweł Dziepak <pdziepak@scylladb.com>
+M: Duarte Nunes <duarte@scylladb.com>
+M: Tomasz Grabiec <tgrabiec@scylladb.com>
+F: *
--- a/NOTICE.txt
+++ b/NOTICE.txt
@@ -1,2 +1,5 @@
 This project includes code developed by the Apache Software Foundation (http://www.apache.org/),
 especially Apache Cassandra.
+
+It also includes files from https://github.com/antonblanchard/crc32-vpmsum (author Anton Blanchard <anton@au.ibm.com>, IBM).
+These files are located in utils/arch/powerpc/crc32-vpmsum. Their license may be found in licenses/LICENSE-crc32-vpmsum.TXT.
--- a/README.md
+++ b/README.md
@@ -1,29 +1,19 @@
 # Scylla

-## Building Scylla
+## Quick-start

-In addition to required packages by Seastar, the following packages are required by Scylla.
-
-### Submodules
-Scylla uses submodules, so make sure you pull the submodules first by doing:
-```
-git submodule init
-git submodule update --init --recursive
+```bash
+$ git submodule update --init --recursive
+$ sudo ./install-dependencies.sh
+$ ./configure.py --mode=release
+$ ninja-build -j4 # Assuming 4 system threads.
+$ ./build/release/scylla
+$ # Rejoice!
 ```

-### Building and Running Scylla on Fedora
-* Installing required packages:
+Please see [HACKING.md](HACKING.md) for detailed information on building and developing Scylla.

-```
-sudo dnf install yaml-cpp-devel lz4-devel zlib-devel snappy-devel jsoncpp-devel thrift-devel antlr3-tool antlr3-C++-devel libasan libubsan gcc-c++ gnutls-devel ninja-build ragel libaio-devel cryptopp-devel xfsprogs-devel numactl-devel hwloc-devel libpciaccess-devel libxml2-devel python3-pyparsing lksctp-tools-devel protobuf-devel protobuf-compiler systemd-devel libunwind-devel
-```
-
-* Build Scylla
-```
-./configure.py --mode=release --with=scylla --disable-xen
-ninja-build build/release/scylla -j2 # you can use more cpus if you have tons of RAM
-
-```
+## Running Scylla

 * Run Scylla
 ```
--- a/2
+++ b/2
@@ -1,6 +1,6 @@
 #!/bin/sh

-VERSION=2.0.4
+VERSION=2.2.2

 if test -f version
 then
--- a/api/api-doc/storage_service.json
+++ b/api/api-doc/storage_service.json
@@ -792,6 +792,24 @@
            }
         ]
      },
+      {
+         "path":"/storage_service/active_repair/",
+         "operations":[
+            {
+               "method":"GET",
+               "summary":"Return an array with the ids of the currently active repairs",
+               "type":"array",
+               "items":{
+                  "type":"int"
+               },
+               "nickname":"get_active_repair_async",
+               "produces":[
+                  "application/json"
+               ],
+               "parameters":[]
+            }
+         ]
+      },
      {
         "path":"/storage_service/repair_async/{keyspace}",
         "operations":[
@@ -952,6 +970,22 @@
            }
         ]
      },
+      {
+         "path":"/storage_service/force_terminate_repair",
+         "operations":[
+            {
+               "method":"POST",
+               "summary":"Force terminate all repair sessions",
+               "type":"void",
+               "nickname":"force_terminate_all_repair_sessions_new",
+               "produces":[
+                  "application/json"
+               ],
+               "parameters":[
+               ]
+            }
+         ]
+      },
      {
         "path":"/storage_service/decommission",
         "operations":[
@@ -2159,11 +2193,11 @@
               "description":"The column family"
            },
            "total":{
-               "type":"int",
+               "type":"long",
               "description":"The total snapshot size"
            },
            "live":{
-               "type":"int",
+               "type":"long",
               "description":"The live snapshot size"
            }
         }
--- a/api/api-doc/swagger20_header.json
+++ b/api/api-doc/swagger20_header.json
@@ -0,0 +1,29 @@
+{
+  "swagger": "2.0",
+  "info": {
+    "version": "1.0.0",
+    "title": "Scylla API",
+    "description": "The scylla API version 2.0",
+    "termsOfService": "http://www.scylladb.com/tos/",
+    "contact": {
+      "name": "Scylla Team",
+      "email": "info@scylladb.com",
+      "url": "http://scylladb.com"
+    },
+    "license": {
+      "name": "AGPL",
+      "url": "https://github.com/scylladb/scylla/blob/master/LICENSE.AGPL"
+    }
+  },
+  "host": "{{Host}}",
+  "basePath": "/v2",
+  "schemes": [
+    "http"
+  ],
+  "consumes": [
+    "application/json"
+  ],
+  "produces": [
+    "application/json"
+  ],
+  "paths": {
--- a/api/api.cc
+++ b/api/api.cc
@@ -49,19 +49,22 @@ static std::unique_ptr<reply> exception_reply(std::exception_ptr eptr) {
        throw bad_param_exception(ex.what());
    }
    // We never going to get here
-    return std::make_unique<reply>();
+    throw std::runtime_error("exception_reply");
 }

 future<> set_server_init(http_context& ctx) {
    auto rb = std::make_shared < api_registry_builder > (ctx.api_doc);
+    auto rb02 = std::make_shared < api_registry_builder20 > (ctx.api_doc, "/v2");

-    return ctx.http_server.set_routes([rb, &ctx](routes& r) {
+    return ctx.http_server.set_routes([rb, &ctx, rb02](routes& r) {
        r.register_exeption_handler(exception_reply);
        r.put(GET, "/ui", new httpd::file_handler(ctx.api_dir + "/index.html",
                new content_replace("html")));
        r.add(GET, url("/ui").remainder("path"), new httpd::directory_handler(ctx.api_dir,
                new content_replace("html")));
        rb->set_api_doc(r);
+        rb02->set_api_doc(r);
+        rb02->register_api_file(r, "swagger20_header");
        rb->register_function(r, "system",
                "The system related API");
        set_system(ctx, r);
@@ -112,6 +115,11 @@ future<> set_server_stream_manager(http_context& ctx) {
                "The stream manager API", set_stream_manager);
 }

+future<> set_server_cache(http_context& ctx) {
+    return register_api(ctx, "cache_service",
+            "The cache service API", set_cache_service);
+}
+
 future<> set_server_gossip_settle(http_context& ctx) {
    auto rb = std::make_shared < api_registry_builder > (ctx.api_doc);

@@ -119,9 +127,6 @@ future<> set_server_gossip_settle(http_context& ctx) {
        rb->register_function(r, "failure_detector",
                "The failure detector API");
        set_failure_detector(ctx,r);
-        rb->register_function(r, "cache_service",
-                "The cache service API");
-        set_cache_service(ctx,r);
    });
 }

--- a/api/api_init.hh
+++ b/api/api_init.hh
@@ -46,7 +46,7 @@ future<> set_server_messaging_service(http_context& ctx);
 future<> set_server_storage_proxy(http_context& ctx);
 future<> set_server_stream_manager(http_context& ctx);
 future<> set_server_gossip_settle(http_context& ctx);
+future<> set_server_cache(http_context& ctx);
 future<> set_server_done(http_context& ctx);

-
 }
--- a/api/compaction_manager.cc
+++ b/api/compaction_manager.cc
@@ -20,6 +20,7 @@
 */

 #include "compaction_manager.hh"
+#include "sstables/compaction_manager.hh"
 #include "api/api-doc/compaction_manager.json.hh"
 #include "db/system_keyspace.hh"
 #include "column_family.hh"
--- a/api/storage_proxy.cc
+++ b/api/storage_proxy.cc
@@ -397,7 +397,7 @@ void set_storage_proxy(http_context& ctx, routes& r) {
    });

    sp::get_range_estimated_histogram.set(r, [&ctx](std::unique_ptr<request> req) {
-        return sum_timer_stats(ctx.sp, &proxy::stats::read);
+        return sum_timer_stats(ctx.sp, &proxy::stats::range);
    });

    sp::get_range_latency.set(r, [&ctx](std::unique_ptr<request> req) {
--- a/api/storage_service.cc
+++ b/api/storage_service.cc
@@ -34,6 +34,7 @@
 #include "column_family.hh"
 #include "log.hh"
 #include "release.hh"
+#include "sstables/compaction_manager.hh"

 namespace api {

@@ -92,10 +93,13 @@ void set_storage_service(http_context& ctx, routes& r) {
        return ctx.db.local().commitlog()->active_config().commit_log_location;
    });

-    ss::get_token_endpoint.set(r, [] (const_req req) {
-        auto token_to_ep = service::get_local_storage_service().get_token_to_endpoint_map();
-        std::vector<storage_service_json::mapper> res;
-        return map_to_key_value(token_to_ep, res);
+    ss::get_token_endpoint.set(r, [] (std::unique_ptr<request> req) {
+        return make_ready_future<json::json_return_type>(stream_range_as_array(service::get_local_storage_service().get_token_to_endpoint_map(), [](const auto& i) {
+            storage_service_json::mapper val;
+            val.key = boost::lexical_cast<std::string>(i.first);
+            val.value = boost::lexical_cast<std::string>(i.second);
+            return val;
+        }));
    });

    ss::get_leaving_nodes.set(r, [](const_req req) {
@@ -354,6 +358,12 @@ void set_storage_service(http_context& ctx, routes& r) {
                });
    });

+    ss::get_active_repair_async.set(r, [&ctx](std::unique_ptr<request> req) {
+        return get_active_repairs(ctx.db).then([] (std::vector<int> res){
+            return make_ready_future<json::json_return_type>(res);
+        });
+    });
+
    ss::repair_async_status.set(r, [&ctx](std::unique_ptr<request> req) {
        return repair_get_status(ctx.db, boost::lexical_cast<int>( req->get_query_param("id")))
                .then_wrapped([] (future<repair_status>&& fut) {
@@ -361,16 +371,22 @@ void set_storage_service(http_context& ctx, routes& r) {
            try {
                res = fut.get0();
            } catch(std::runtime_error& e) {
-                return make_ready_future<json::json_return_type>(json_exception(httpd::bad_param_exception(e.what())));
+                throw httpd::bad_param_exception(e.what());
            }
            return make_ready_future<json::json_return_type>(json::json_return_type(res));
        });
    });

    ss::force_terminate_all_repair_sessions.set(r, [](std::unique_ptr<request> req) {
-        //TBD
-        unimplemented();
-        return make_ready_future<json::json_return_type>(json_void());
+        return repair_abort_all(service::get_local_storage_service().db()).then([] {
+            return make_ready_future<json::json_return_type>(json_void());
+        });
+    });
+
+    ss::force_terminate_all_repair_sessions_new.set(r, [](std::unique_ptr<request> req) {
+        return repair_abort_all(service::get_local_storage_service().db()).then([] {
+            return make_ready_future<json::json_return_type>(json_void());
+        });
    });

    ss::decommission.set(r, [](std::unique_ptr<request> req) {
--- a/atomic_cell.hh
+++ b/atomic_cell.hh
@@ -57,7 +57,6 @@ class atomic_cell_type final {
 private:
    static constexpr int8_t LIVE_FLAG = 0x01;
    static constexpr int8_t EXPIRY_FLAG = 0x02; // When present, expiry field is present. Set only for live cells
-    static constexpr int8_t REVERT_FLAG = 0x04; // transient flag used to efficiently implement ReversiblyMergeable for atomic cells.
    static constexpr int8_t COUNTER_UPDATE_FLAG = 0x08; // Cell is a counter update.
    static constexpr int8_t COUNTER_IN_PLACE_REVERT = 0x10;
    static constexpr unsigned flags_size = 1;
@@ -74,17 +73,10 @@ private:
    static bool is_counter_update(bytes_view cell) {
        return cell[0] & COUNTER_UPDATE_FLAG;
    }
-    static bool is_revert_set(bytes_view cell) {
-        return cell[0] & REVERT_FLAG;
-    }
    static bool is_counter_in_place_revert_set(bytes_view cell) {
        return cell[0] & COUNTER_IN_PLACE_REVERT;
    }
    template<typename BytesContainer>
-    static void set_revert(BytesContainer& cell, bool revert) {
-        cell[0] = (cell[0] & ~REVERT_FLAG) | (revert * REVERT_FLAG);
-    }
-    template<typename BytesContainer>
    static void set_counter_in_place_revert(BytesContainer& cell, bool flag) {
        cell[0] = (cell[0] & ~COUNTER_IN_PLACE_REVERT) | (flag * COUNTER_IN_PLACE_REVERT);
    }
@@ -216,9 +208,6 @@ public:
    bool is_counter_update() const {
        return atomic_cell_type::is_counter_update(_data);
    }
-    bool is_revert_set() const {
-        return atomic_cell_type::is_revert_set(_data);
-    }
    bool is_counter_in_place_revert_set() const {
        return atomic_cell_type::is_counter_in_place_revert_set(_data);
    }
@@ -269,14 +258,11 @@ public:
    }
    // Can be called on live and dead cells
    bool has_expired(gc_clock::time_point now) const {
-        return is_live_and_has_ttl() && expiry() < now;
+        return is_live_and_has_ttl() && expiry() <= now;
    }
    bytes_view serialize() const {
        return _data;
    }
-    void set_revert(bool revert) {
-        atomic_cell_type::set_revert(_data, revert);
-    }
    void set_counter_in_place_revert(bool flag) {
        atomic_cell_type::set_counter_in_place_revert(_data, flag);
    }
--- a/atomic_cell_hash.hh
+++ b/atomic_cell_hash.hh
@@ -25,6 +25,7 @@

 #include "types.hh"
 #include "atomic_cell.hh"
+#include "atomic_cell_or_collection.hh"
 #include "hashing.hh"
 #include "counters.hh"

@@ -78,3 +79,15 @@ struct appending_hash<collection_mutation> {
        feed_hash(h, static_cast<collection_mutation_view>(cm), cdef);
    }
 };
+
+template<>
+struct appending_hash<atomic_cell_or_collection> {
+    template<typename Hasher>
+    void operator()(Hasher& h, const atomic_cell_or_collection& c, const column_definition& cdef) const {
+        if (cdef.is_atomic()) {
+            feed_hash(h, c.as_atomic_cell(), cdef);
+        } else {
+            feed_hash(h, c.as_collection_mutation(), cdef);
+        }
+    }
+};
--- a/atomic_cell_or_collection.hh
+++ b/atomic_cell_or_collection.hh
@@ -59,14 +59,6 @@ public:
    bool operator==(const atomic_cell_or_collection& other) const {
        return _data == other._data;
    }
-    template<typename Hasher>
-    void feed_hash(Hasher& h, const column_definition& def) const {
-        if (def.is_atomic()) {
-            ::feed_hash(h, as_atomic_cell(), def);
-        } else {
-            ::feed_hash(h, as_collection_mutation(), def);
-        }
-    }
    size_t external_memory_usage() const {
        return _data.external_memory_usage();
    }
--- a/auth/allow_all_authenticator.cc
+++ b/auth/allow_all_authenticator.cc
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2017 ScyllaDB
+ */
+
+/*
+ * This file is part of Scylla.
+ *
+ * Scylla is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Scylla is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "auth/allow_all_authenticator.hh"
+
+#include "service/migration_manager.hh"
+#include "utils/class_registrator.hh"
+
+namespace auth {
+
+const sstring& allow_all_authenticator_name() {
+    static const sstring name = meta::AUTH_PACKAGE_NAME + "AllowAllAuthenticator";
+    return name;
+}
+
+// To ensure correct initialization order, we unfortunately need to use a string literal.
+static const class_registrator<
+        authenticator,
+        allow_all_authenticator,
+        cql3::query_processor&,
+        ::service::migration_manager&> registration("org.apache.cassandra.auth.AllowAllAuthenticator");
+
+}
--- a/auth/allow_all_authenticator.hh
+++ b/auth/allow_all_authenticator.hh
@@ -0,0 +1,101 @@
+/*
+ * Copyright (C) 2017 ScyllaDB
+ */
+
+/*
+ * This file is part of Scylla.
+ *
+ * Scylla is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Scylla is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <stdexcept>
+
+#include "auth/authenticated_user.hh"
+#include "auth/authenticator.hh"
+#include "auth/common.hh"
+
+namespace cql3 {
+class query_processor;
+}
+
+namespace service {
+class migration_manager;
+}
+
+namespace auth {
+
+const sstring& allow_all_authenticator_name();
+
+class allow_all_authenticator final : public authenticator {
+public:
+    allow_all_authenticator(cql3::query_processor&, ::service::migration_manager&) {
+    }
+
+    virtual future<> start() override {
+        return make_ready_future<>();
+    }
+
+    virtual future<> stop() override {
+        return make_ready_future<>();
+    }
+
+    virtual const sstring& qualified_java_name() const override {
+        return allow_all_authenticator_name();
+    }
+
+    virtual bool require_authentication() const override {
+        return false;
+    }
+
+    virtual authentication_option_set supported_options() const override {
+        return authentication_option_set();
+    }
+
+    virtual authentication_option_set alterable_options() const override {
+        return authentication_option_set();
+    }
+
+    future<authenticated_user> authenticate(const credentials_map& credentials) const override {
+        return make_ready_future<authenticated_user>(anonymous_user());
+    }
+
+    virtual future<> create(stdx::string_view, const authentication_options& options) const override {
+        return make_ready_future();
+    }
+
+    virtual future<> alter(stdx::string_view, const authentication_options& options) const override {
+        return make_ready_future();
+    }
+
+    virtual future<> drop(stdx::string_view) const override {
+        return make_ready_future();
+    }
+
+    virtual future<custom_options> query_custom_options(stdx::string_view role_name) const override {
+        return make_ready_future<custom_options>();
+    }
+
+    virtual const resource_set& protected_resources() const override {
+        static const resource_set resources;
+        return resources;
+    }
+
+    virtual ::shared_ptr<sasl_challenge> new_sasl_challenge() const override {
+        throw std::runtime_error("Should not reach");
+    }
+};
+
+}
--- a/auth/allow_all_authorizer.cc
+++ b/auth/allow_all_authorizer.cc
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2017 ScyllaDB
+ */
+
+/*
+ * This file is part of Scylla.
+ *
+ * Scylla is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Scylla is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "auth/allow_all_authorizer.hh"
+
+#include "auth/common.hh"
+#include "utils/class_registrator.hh"
+
+namespace auth {
+
+const sstring& allow_all_authorizer_name() {
+    static const sstring name = meta::AUTH_PACKAGE_NAME + "AllowAllAuthorizer";
+    return name;
+}
+
+// To ensure correct initialization order, we unfortunately need to use a string literal.
+static const class_registrator<
+    authorizer,
+    allow_all_authorizer,
+    cql3::query_processor&,
+    ::service::migration_manager&> registration("org.apache.cassandra.auth.AllowAllAuthorizer");
+
+}
--- a/auth/allow_all_authorizer.hh
+++ b/auth/allow_all_authorizer.hh
@@ -0,0 +1,93 @@
+/*
+ * Copyright (C) 2017 ScyllaDB
+ */
+
+/*
+ * This file is part of Scylla.
+ *
+ * Scylla is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Scylla is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include "auth/authorizer.hh"
+#include "exceptions/exceptions.hh"
+#include "stdx.hh"
+
+namespace cql3 {
+class query_processor;
+}
+
+namespace service {
+class migration_manager;
+}
+
+namespace auth {
+
+const sstring& allow_all_authorizer_name();
+
+class allow_all_authorizer final  : public authorizer {
+public:
+    allow_all_authorizer(cql3::query_processor&, ::service::migration_manager&) {
+    }
+
+    virtual future<> start() override {
+        return make_ready_future<>();
+    }
+
+    virtual future<> stop() override {
+        return make_ready_future<>();
+    }
+
+    virtual const sstring& qualified_java_name() const override {
+        return allow_all_authorizer_name();
+    }
+
+    virtual future<permission_set> authorize(const role_or_anonymous&, const resource&) const override {
+        return make_ready_future<permission_set>(permissions::ALL);
+    }
+
+    virtual future<> grant(stdx::string_view, permission_set, const resource&) const override {
+        return make_exception_future<>(
+                unsupported_authorization_operation("GRANT operation is not supported by AllowAllAuthorizer"));
+    }
+
+    virtual future<> revoke(stdx::string_view, permission_set, const resource&) const override {
+        return make_exception_future<>(
+                unsupported_authorization_operation("REVOKE operation is not supported by AllowAllAuthorizer"));
+    }
+
+    virtual future<std::vector<permission_details>> list_all() const override {
+        return make_exception_future<std::vector<permission_details>>(
+                unsupported_authorization_operation(
+                        "LIST PERMISSIONS operation is not supported by AllowAllAuthorizer"));
+    }
+
+    virtual future<> revoke_all(stdx::string_view) const override {
+        return make_exception_future(
+                unsupported_authorization_operation("REVOKE operation is not supported by AllowAllAuthorizer"));
+    }
+
+    virtual future<> revoke_all(const resource&) const override {
+        return make_exception_future(
+                unsupported_authorization_operation("REVOKE operation is not supported by AllowAllAuthorizer"));
+    }
+
+    virtual const resource_set& protected_resources() const override {
+        static const resource_set resources;
+        return resources;
+    }
+};
+
+}
--- a/auth/auth.cc
+++ b/auth/auth.cc
@@ -1,384 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (C) 2016 ScyllaDB
- *
- * Modified by ScyllaDB
- */
-
-/*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
- */
-#include <seastar/core/sleep.hh>
-
-#include <seastar/core/distributed.hh>
-
-#include "auth.hh"
-#include "authenticator.hh"
-#include "authorizer.hh"
-#include "database.hh"
-#include "cql3/query_processor.hh"
-#include "cql3/statements/raw/cf_statement.hh"
-#include "cql3/statements/create_table_statement.hh"
-#include "db/config.hh"
-#include "service/migration_manager.hh"
-#include "utils/loading_cache.hh"
-#include "utils/hash.hh"
-
-const sstring auth::auth::DEFAULT_SUPERUSER_NAME("cassandra");
-const sstring auth::auth::AUTH_KS("system_auth");
-const sstring auth::auth::USERS_CF("users");
-
-static const sstring USER_NAME("name");
-static const sstring SUPER("super");
-
-static logging::logger alogger("auth");
-
-// TODO: configurable
-using namespace std::chrono_literals;
-const std::chrono::milliseconds auth::auth::SUPERUSER_SETUP_DELAY = 10000ms;
-
-class auth_migration_listener : public service::migration_listener {
-    void on_create_keyspace(const sstring& ks_name) override {}
-    void on_create_column_family(const sstring& ks_name, const sstring& cf_name) override {}
-    void on_create_user_type(const sstring& ks_name, const sstring& type_name) override {}
-    void on_create_function(const sstring& ks_name, const sstring& function_name) override {}
-    void on_create_aggregate(const sstring& ks_name, const sstring& aggregate_name) override {}
-    void on_create_view(const sstring& ks_name, const sstring& view_name) override {}
-
-    void on_update_keyspace(const sstring& ks_name) override {}
-    void on_update_column_family(const sstring& ks_name, const sstring& cf_name, bool) override {}
-    void on_update_user_type(const sstring& ks_name, const sstring& type_name) override {}
-    void on_update_function(const sstring& ks_name, const sstring& function_name) override {}
-    void on_update_aggregate(const sstring& ks_name, const sstring& aggregate_name) override {}
-    void on_update_view(const sstring& ks_name, const sstring& view_name, bool columns_changed) override {}
-
-    void on_drop_keyspace(const sstring& ks_name) override {
-        auth::authorizer::get().revoke_all(auth::data_resource(ks_name));
-    }
-    void on_drop_column_family(const sstring& ks_name, const sstring& cf_name) override {
-        auth::authorizer::get().revoke_all(auth::data_resource(ks_name, cf_name));
-    }
-    void on_drop_user_type(const sstring& ks_name, const sstring& type_name) override {}
-    void on_drop_function(const sstring& ks_name, const sstring& function_name) override {}
-    void on_drop_aggregate(const sstring& ks_name, const sstring& aggregate_name) override {}
-    void on_drop_view(const sstring& ks_name, const sstring& view_name) override {}
-};
-
-static auth_migration_listener auth_migration;
-
-namespace std {
-template <>
-struct hash<auth::data_resource> {
-    size_t operator()(const auth::data_resource & v) const {
-        return v.hash_value();
-    }
-};
-
-template <>
-struct hash<auth::authenticated_user> {
-    size_t operator()(const auth::authenticated_user & v) const {
-        return utils::tuple_hash()(v.name(), v.is_anonymous());
-    }
-};
-}
-
-class auth::auth::permissions_cache {
-public:
-    typedef utils::loading_cache<std::pair<authenticated_user, data_resource>, permission_set, utils::loading_cache_reload_enabled::yes, utils::simple_entry_size<permission_set>, utils::tuple_hash> cache_type;
-    typedef typename cache_type::key_type key_type;
-
-    permissions_cache()
-                    : permissions_cache(
-                                    cql3::get_local_query_processor().db().local().get_config()) {
-    }
-
-    permissions_cache(const db::config& cfg)
-                    : _cache(cfg.permissions_cache_max_entries(), std::chrono::milliseconds(cfg.permissions_validity_in_ms()), std::chrono::milliseconds(cfg.permissions_update_interval_in_ms()), alogger,
-                        [] (const key_type& k) {
-                            alogger.debug("Refreshing permissions for {}", k.first.name());
-                            return authorizer::get().authorize(::make_shared<authenticated_user>(k.first), k.second);
-                        }) {}
-
-    future<> stop() {
-        return _cache.stop();
-    }
-
-    future<permission_set> get(::shared_ptr<authenticated_user> user, data_resource resource) {
-        return _cache.get(key_type(*user, std::move(resource)));
-    }
-
-private:
-    cache_type _cache;
-};
-
-namespace std { // for ADL, yuch
-
-std::ostream& operator<<(std::ostream& os, const std::pair<auth::authenticated_user, auth::data_resource>& p) {
-    os << "{user: " << p.first.name() << ", data_resource: " << p.second << "}";
-    return os;
-}
-
-}
-
-static distributed<auth::auth::permissions_cache> perm_cache;
-
-/**
- * Poor mans job schedule. For maximum 2 jobs. Sic.
- * Still does nothing more clever than waiting 10 seconds
- * like origin, then runs the submitted tasks.
- *
- * Only difference compared to sleep (from which this
- * borrows _heavily_) is that if tasks have not run by the time
- * we exit (and do static clean up) we delete the promise + cont
- *
- * Should be abstracted to some sort of global server function
- * probably.
- */
-struct waiter {
-    promise<> done;
-    timer<> tmr;
-    waiter() : tmr([this] {done.set_value();})
-    {
-        tmr.arm(auth::auth::SUPERUSER_SETUP_DELAY);
-    }
-    ~waiter() {
-        if (tmr.armed()) {
-            tmr.cancel();
-            done.set_exception(std::runtime_error("shutting down"));
-        }
-        alogger.trace("Deleting scheduled task");
-    }
-    void kill() {
-    }
-};
-
-typedef std::unique_ptr<waiter> waiter_ptr;
-
-static std::vector<waiter_ptr> & thread_waiters() {
-    static thread_local std::vector<waiter_ptr> the_waiters;
-    return the_waiters;
-}
-
-void auth::auth::schedule_when_up(scheduled_func f) {
-    alogger.trace("Adding scheduled task");
-
-    auto & waiters = thread_waiters();
-
-    waiters.emplace_back(std::make_unique<waiter>());
-    auto* w = waiters.back().get();
-
-    w->done.get_future().finally([w] {
-        auto & waiters = thread_waiters();
-        auto i = std::find_if(waiters.begin(), waiters.end(), [w](const waiter_ptr& p) {
-                            return p.get() == w;
-                        });
-        if (i != waiters.end()) {
-            waiters.erase(i);
-        }
-    }).then([f = std::move(f)] {
-        alogger.trace("Running scheduled task");
-        return f();
-    }).handle_exception([](auto ep) {
-        return make_ready_future();
-    });
-}
-
-bool auth::auth::is_class_type(const sstring& type, const sstring& classname) {
-    if (type == classname) {
-        return true;
-    }
-    auto i = classname.find_last_of('.');
-    return classname.compare(i + 1, sstring::npos, type) == 0;
-}
-
-future<> auth::auth::setup() {
-    auto& db = cql3::get_local_query_processor().db().local();
-    auto& cfg = db.get_config();
-
-    future<> f = perm_cache.start();
-
-    if (is_class_type(cfg.authenticator(),
-                    authenticator::ALLOW_ALL_AUTHENTICATOR_NAME)
-                    && is_class_type(cfg.authorizer(),
-                                    authorizer::ALLOW_ALL_AUTHORIZER_NAME)
-                                    ) {
-        // just create the objects
-        return f.then([&cfg] {
-            return authenticator::setup(cfg.authenticator());
-        }).then([&cfg] {
-            return authorizer::setup(cfg.authorizer());
-        });
-    }
-
-    if (!db.has_keyspace(AUTH_KS)) {
-        std::map<sstring, sstring> opts;
-        opts["replication_factor"] = "1";
-        auto ksm = keyspace_metadata::new_keyspace(AUTH_KS, "org.apache.cassandra.locator.SimpleStrategy", opts, true);
-        // We use min_timestamp so that default keyspace metadata will loose with any manual adjustments. See issue #2129.
-        f = service::get_local_migration_manager().announce_new_keyspace(ksm, api::min_timestamp, false);
-    }
-
-    return f.then([] {
-        return setup_table(USERS_CF, sprint("CREATE TABLE %s.%s (%s text, %s boolean, PRIMARY KEY(%s)) WITH gc_grace_seconds=%d",
-                                        AUTH_KS, USERS_CF, USER_NAME, SUPER, USER_NAME,
-                                        90 * 24 * 60 * 60)); // 3 months.
-    }).then([&cfg] {
-        return authenticator::setup(cfg.authenticator());
-    }).then([&cfg] {
-        return authorizer::setup(cfg.authorizer());
-    }).then([] {
-        service::get_local_migration_manager().register_listener(&auth_migration); // again, only one shard...
-        // instead of once-timer, just schedule this later
-        schedule_when_up([] {
-            // setup default super user
-            return has_existing_users(USERS_CF, DEFAULT_SUPERUSER_NAME, USER_NAME).then([](bool exists) {
-                if (!exists) {
-                    auto query = sprint("INSERT INTO %s.%s (%s, %s) VALUES (?, ?) USING TIMESTAMP 0",
-                                    AUTH_KS, USERS_CF, USER_NAME, SUPER);
-                    cql3::get_local_query_processor().process(query, db::consistency_level::ONE, {DEFAULT_SUPERUSER_NAME, true}).then([](auto) {
-                        alogger.info("Created default superuser '{}'", DEFAULT_SUPERUSER_NAME);
-                    }).handle_exception([](auto ep) {
-                        try {
-                            std::rethrow_exception(ep);
-                        } catch (exceptions::request_execution_exception&) {
-                            alogger.warn("Skipped default superuser setup: some nodes were not ready");
-                        }
-                    });
-                }
-            });
-        });
-    });
-}
-
-future<> auth::auth::shutdown() {
-    // just make sure we don't have pending tasks.
-    // this is mostly relevant for test cases where
-    // db-env-shutdown != process shutdown
-    return smp::invoke_on_all([] {
-        thread_waiters().clear();
-    }).then([] {
-        return perm_cache.stop();
-    });
-}
-
-future<auth::permission_set> auth::auth::get_permissions(::shared_ptr<authenticated_user> user, data_resource resource) {
-    return perm_cache.local().get(std::move(user), std::move(resource));
-}
-
-static db::consistency_level consistency_for_user(const sstring& username) {
-    if (username == auth::auth::DEFAULT_SUPERUSER_NAME) {
-        return db::consistency_level::QUORUM;
-    }
-    return db::consistency_level::LOCAL_ONE;
-}
-
-static future<::shared_ptr<cql3::untyped_result_set>> select_user(const sstring& username) {
-    // Here was a thread local, explicit cache of prepared statement. In normal execution this is
-    // fine, but since we in testing set up and tear down system over and over, we'd start using
-    // obsolete prepared statements pretty quickly.
-    // Rely on query processing caching statements instead, and lets assume
-    // that a map lookup string->statement is not gonna kill us much.
-    return cql3::get_local_query_processor().process(
-                    sprint("SELECT * FROM %s.%s WHERE %s = ?",
-                                    auth::auth::AUTH_KS, auth::auth::USERS_CF,
-                                    USER_NAME), consistency_for_user(username),
-                    { username }, true);
-}
-
-future<bool> auth::auth::is_existing_user(const sstring& username) {
-    return select_user(username).then(
-                    [](::shared_ptr<cql3::untyped_result_set> res) {
-                        return make_ready_future<bool>(!res->empty());
-                    });
-}
-
-future<bool> auth::auth::is_super_user(const sstring& username) {
-    return select_user(username).then(
-                    [](::shared_ptr<cql3::untyped_result_set> res) {
-                        return make_ready_future<bool>(!res->empty() && res->one().get_as<bool>(SUPER));
-                    });
-}
-
-future<> auth::auth::insert_user(const sstring& username, bool is_super) {
-    return cql3::get_local_query_processor().process(sprint("INSERT INTO %s.%s (%s, %s) VALUES (?, ?)",
-                    AUTH_KS, USERS_CF, USER_NAME, SUPER),
-                    consistency_for_user(username), { username, is_super }).discard_result();
-}
-
-future<> auth::auth::delete_user(const sstring& username) {
-    return cql3::get_local_query_processor().process(sprint("DELETE FROM %s.%s WHERE %s = ?",
-                    AUTH_KS, USERS_CF, USER_NAME),
-                    consistency_for_user(username), { username }).discard_result();
-}
-
-future<> auth::auth::setup_table(const sstring& name, const sstring& cql) {
-    auto& qp = cql3::get_local_query_processor();
-    auto& db = qp.db().local();
-
-    if (db.has_schema(AUTH_KS, name)) {
-        return make_ready_future();
-    }
-
-    ::shared_ptr<cql3::statements::raw::cf_statement> parsed = static_pointer_cast<
-                    cql3::statements::raw::cf_statement>(cql3::query_processor::parse_statement(cql));
-    parsed->prepare_keyspace(AUTH_KS);
-    ::shared_ptr<cql3::statements::create_table_statement> statement =
-                    static_pointer_cast<cql3::statements::create_table_statement>(
-                                    parsed->prepare(db, qp.get_cql_stats())->statement);
-    auto schema = statement->get_cf_meta_data();
-    auto uuid = generate_legacy_id(schema->ks_name(), schema->cf_name());
-
-    schema_builder b(schema);
-    b.set_uuid(uuid);
-    return service::get_local_migration_manager().announce_new_column_family(b.build(), false);
-}
-
-future<bool> auth::auth::has_existing_users(const sstring& cfname, const sstring& def_user_name, const sstring& name_column) {
-    auto default_user_query = sprint("SELECT * FROM %s.%s WHERE %s = ?", AUTH_KS, cfname, name_column);
-    auto all_users_query = sprint("SELECT * FROM %s.%s LIMIT 1", AUTH_KS, cfname);
-
-    return cql3::get_local_query_processor().process(default_user_query, db::consistency_level::ONE, { def_user_name }).then([=](::shared_ptr<cql3::untyped_result_set> res) {
-        if (!res->empty()) {
-            return make_ready_future<bool>(true);
-        }
-        return cql3::get_local_query_processor().process(default_user_query, db::consistency_level::QUORUM, { def_user_name }).then([all_users_query](::shared_ptr<cql3::untyped_result_set> res) {
-            if (!res->empty()) {
-                return make_ready_future<bool>(true);
-            }
-            return cql3::get_local_query_processor().process(all_users_query, db::consistency_level::QUORUM).then([](::shared_ptr<cql3::untyped_result_set> res) {
-                return make_ready_future<bool>(!res->empty());
-            });
-        });
-    });
-}
-
--- a/auth/auth.hh
+++ b/auth/auth.hh
@@ -1,125 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (C) 2016 ScyllaDB
- *
- * Modified by ScyllaDB
- */
-
-/*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#pragma once
-
-#include <chrono>
-#include <seastar/core/sstring.hh>
-#include <seastar/core/future.hh>
-#include <seastar/core/shared_ptr.hh>
-
-
-#include "exceptions/exceptions.hh"
-#include "permission.hh"
-#include "data_resource.hh"
-#include "authenticated_user.hh"
-
-namespace auth {
-
-class auth {
-public:
-    class permissions_cache;
-
-    static const sstring DEFAULT_SUPERUSER_NAME;
-    static const sstring AUTH_KS;
-    static const sstring USERS_CF;
-    static const std::chrono::milliseconds SUPERUSER_SETUP_DELAY;
-
-    static bool is_class_type(const sstring& type, const sstring& classname);
-
-    static future<permission_set> get_permissions(::shared_ptr<authenticated_user>, data_resource);
-
-    /**
-     * Checks if the username is stored in AUTH_KS.USERS_CF.
-     *
-     * @param username Username to query.
-     * @return whether or not Cassandra knows about the user.
-     */
-    static future<bool> is_existing_user(const sstring& username);
-
-    /**
-     * Checks if the user is a known superuser.
-     *
-     * @param username Username to query.
-     * @return true is the user is a superuser, false if they aren't or don't exist at all.
-     */
-    static future<bool> is_super_user(const sstring& username);
-
-    /**
-     * Inserts the user into AUTH_KS.USERS_CF (or overwrites their superuser status as a result of an ALTER USER query).
-     *
-     * @param username Username to insert.
-     * @param isSuper User's new status.
-     * @throws RequestExecutionException
-     */
-    static future<> insert_user(const sstring& username, bool is_super);
-
-    /**
-     * Deletes the user from AUTH_KS.USERS_CF.
-     *
-     * @param username Username to delete.
-     * @throws RequestExecutionException
-     */
-    static future<> delete_user(const sstring& username);
-
-    /**
-     * Sets up Authenticator and Authorizer.
-     */
-    static future<> setup();
-    static future<> shutdown();
-
-    /**
-     * Set up table from given CREATE TABLE statement under system_auth keyspace, if not already done so.
-     *
-     * @param name name of the table
-     * @param cql CREATE TABLE statement
-     */
-    static future<> setup_table(const sstring& name, const sstring& cql);
-
-    static future<bool> has_existing_users(const sstring& cfname, const sstring& def_user_name, const sstring& name_column_name);
-
-    // For internal use. Run function "when system is up".
-    typedef std::function<future<>()> scheduled_func;
-    static void schedule_when_up(scheduled_func);
-};
-}
-
-std::ostream& operator<<(std::ostream& os, const std::pair<auth::authenticated_user, auth::data_resource>& p);
--- a/auth/authenticated_user.cc
+++ b/auth/authenticated_user.cc
@@ -39,34 +39,30 @@
 * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
 */

+#include "auth/authenticated_user.hh"

-#include "authenticated_user.hh"
-#include "auth.hh"
+#include <iostream>

-const sstring auth::authenticated_user::ANONYMOUS_USERNAME("anonymous");
+namespace auth {

-auth::authenticated_user::authenticated_user()
-                : _anon(true)
-{}
-
-auth::authenticated_user::authenticated_user(sstring name)
-                : _name(name), _anon(false)
-{}
-
-auth::authenticated_user::authenticated_user(authenticated_user&&) = default;
-auth::authenticated_user::authenticated_user(const authenticated_user&) = default;
-
-const sstring& auth::authenticated_user::name() const {
-    return _anon ? ANONYMOUS_USERNAME : _name;
+authenticated_user::authenticated_user(stdx::string_view name)
+        : name(sstring(name)) {
 }

-future<bool> auth::authenticated_user::is_super() const {
-    if (is_anonymous()) {
-        return make_ready_future<bool>(false);
+std::ostream& operator<<(std::ostream& os, const authenticated_user& u) {
+    if (!u.name) {
+        os << "anonymous";
+    } else {
+        os << *u.name;
    }
-    return auth::auth::is_super_user(_name);
+
+    return os;
+}
+
+static const authenticated_user the_anonymous_user{};
+
+const authenticated_user& anonymous_user() noexcept {
+    return the_anonymous_user;
 }

-bool auth::authenticated_user::operator==(const authenticated_user& v) const {
-    return _anon ? v._anon : _name == v._name;
 }
--- a/auth/authenticated_user.hh
+++ b/auth/authenticated_user.hh
@@ -41,43 +41,63 @@

 #pragma once

+#include <experimental/string_view>
+#include <functional>
+#include <iosfwd>
+#include <optional>
+
 #include <seastar/core/sstring.hh>
-#include <seastar/core/future.hh>
+
 #include "seastarx.hh"
+#include "stdx.hh"

 namespace auth {

-class authenticated_user {
+///
+/// A type-safe wrapper for the name of a logged-in user, or a nameless (anonymous) user.
+///
+class authenticated_user final {
 public:
-    static const sstring ANONYMOUS_USERNAME;
+    ///
+    /// An anonymous user has no name.
+    ///
+    std::optional<sstring> name{};

-    authenticated_user();
-    authenticated_user(sstring name);
-    authenticated_user(authenticated_user&&);
-    authenticated_user(const authenticated_user&);
-
-    const sstring& name() const;
-
-    /**
-     * Checks the user's superuser status.
-     * Only a superuser is allowed to perform CREATE USER and DROP USER queries.
-     * Im most cased, though not necessarily, a superuser will have Permission.ALL on every resource
-     * (depends on IAuthorizer implementation).
-     */
-    future<bool> is_super() const;
-
-    /**
-     * If IAuthenticator doesn't require authentication, this method may return true.
-     */
-    bool is_anonymous() const {
-        return _anon;
-    }
-
-    bool operator==(const authenticated_user&) const;
-private:
-    sstring _name;
-    bool _anon;
+    ///
+    /// An anonymous user.
+    ///
+    authenticated_user() = default;
+    explicit authenticated_user(stdx::string_view name);
 };

+///
+/// The user name, or "anonymous".
+///
+std::ostream& operator<<(std::ostream&, const authenticated_user&);
+
+inline bool operator==(const authenticated_user& u1, const authenticated_user& u2) noexcept {
+    return u1.name == u2.name;
+}
+
+inline bool operator!=(const authenticated_user& u1, const authenticated_user& u2) noexcept {
+    return !(u1 == u2);
+}
+
+const authenticated_user& anonymous_user() noexcept;
+
+inline bool is_anonymous(const authenticated_user& u) noexcept {
+    return u == anonymous_user();
+}
+
 }

+namespace std {
+
+template <>
+struct hash<auth::authenticated_user> final {
+    size_t operator()(const auth::authenticated_user &u) const {
+        return std::hash<std::optional<sstring>>()(u.name);
+    }
+};
+
+}
--- a/auth/authentication_options.cc
+++ b/auth/authentication_options.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2015 ScyllaDB
+ * Copyright (C) 2018 ScyllaDB
 */

 /*
@@ -19,8 +19,19 @@
 * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
 */

-// Used to ensure that all .hh files build, as well as a place to put
-// out-of-line implementations.
+#include "auth/authentication_options.hh"

-#include "io/i_versioned_serializer.hh"
-#include "io/i_serializer.hh"
+#include <iostream>
+
+namespace auth {
+
+std::ostream& operator<<(std::ostream& os, authentication_option a) {
+    switch (a) {
+        case authentication_option::password: os << "PASSWORD"; break;
+        case authentication_option::options: os << "OPTIONS"; break;
+    }
+
+    return os;
+}
+
+}
--- a/auth/authentication_options.hh
+++ b/auth/authentication_options.hh
@@ -0,0 +1,64 @@
+/*
+ * Copyright (C) 2018 ScyllaDB
+ */
+
+/*
+ * This file is part of Scylla.
+ *
+ * Scylla is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Scylla is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <iosfwd>
+#include <optional>
+#include <stdexcept>
+#include <unordered_map>
+#include <unordered_set>
+
+#include <seastar/core/print.hh>
+#include <seastar/core/sstring.hh>
+
+#include "seastarx.hh"
+
+namespace auth {
+
+enum class authentication_option {
+    password,
+    options
+};
+
+std::ostream& operator<<(std::ostream&, authentication_option);
+
+using authentication_option_set = std::unordered_set<authentication_option>;
+
+using custom_options = std::unordered_map<sstring, sstring>;
+
+struct authentication_options final {
+    std::optional<sstring> password;
+    std::optional<custom_options> options;
+};
+
+inline bool any_authentication_options(const authentication_options& aos) noexcept {
+    return aos.password || aos.options;
+}
+
+class unsupported_authentication_option : public std::invalid_argument {
+public:
+    explicit unsupported_authentication_option(authentication_option k)
+            : std::invalid_argument(sprint("The %s option is not supported.", k)) {
+    }
+};
+
+}
--- a/auth/authenticator.cc
+++ b/auth/authenticator.cc
@@ -39,89 +39,14 @@
 * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
 */

-#include "authenticator.hh"
-#include "authenticated_user.hh"
-#include "password_authenticator.hh"
-#include "auth.hh"
+#include "auth/authenticator.hh"
+
+#include "auth/authenticated_user.hh"
+#include "auth/common.hh"
+#include "auth/password_authenticator.hh"
+#include "cql3/query_processor.hh"
 #include "db/config.hh"
+#include "utils/class_registrator.hh"

 const sstring auth::authenticator::USERNAME_KEY("username");
 const sstring auth::authenticator::PASSWORD_KEY("password");
-const sstring auth::authenticator::ALLOW_ALL_AUTHENTICATOR_NAME("org.apache.cassandra.auth.AllowAllAuthenticator");
-
-auth::authenticator::option auth::authenticator::string_to_option(const sstring& name) {
-    if (strcasecmp(name.c_str(), "password") == 0) {
-        return option::PASSWORD;
-    }
-    throw std::invalid_argument(name);
-}
-
-sstring auth::authenticator::option_to_string(option opt) {
-    switch (opt) {
-    case option::PASSWORD:
-        return "PASSWORD";
-    default:
-        throw std::invalid_argument(sprint("Unknown option {}", opt));
-    }
-}
-
-/**
- * Authenticator is assumed to be a fully state-less immutable object (note all the const).
- * We thus store a single instance globally, since it should be safe/ok.
- */
-static std::unique_ptr<auth::authenticator> global_authenticator;
-
-future<>
-auth::authenticator::setup(const sstring& type) {
-    if (auth::auth::is_class_type(type, ALLOW_ALL_AUTHENTICATOR_NAME)) {
-        class allow_all_authenticator : public authenticator {
-        public:
-            const sstring& class_name() const override {
-                return ALLOW_ALL_AUTHENTICATOR_NAME;
-            }
-            bool require_authentication() const override {
-                return false;
-            }
-            option_set supported_options() const override {
-                return option_set();
-            }
-            option_set alterable_options() const override {
-                return option_set();
-            }
-            future<::shared_ptr<authenticated_user>> authenticate(const credentials_map& credentials) const override {
-                return make_ready_future<::shared_ptr<authenticated_user>>(::make_shared<authenticated_user>());
-            }
-            future<> create(sstring username, const option_map& options) override {
-                return make_ready_future();
-            }
-            future<> alter(sstring username, const option_map& options) override {
-                return make_ready_future();
-            }
-            future<> drop(sstring username) override {
-                return make_ready_future();
-            }
-            const resource_ids& protected_resources() const override {
-                static const resource_ids ids;
-                return ids;
-            }
-            ::shared_ptr<sasl_challenge> new_sasl_challenge() const override {
-                throw std::runtime_error("Should not reach");
-            }
-        };
-        global_authenticator = std::make_unique<allow_all_authenticator>();
-    } else if (auth::auth::is_class_type(type, password_authenticator::PASSWORD_AUTHENTICATOR_NAME)) {
-        auto pwa = std::make_unique<password_authenticator>();
-        auto f = pwa->init();
-        return f.then([pwa = std::move(pwa)]() mutable {
-            global_authenticator = std::move(pwa);
-        });
-    } else {
-        throw exceptions::configuration_exception("Invalid authenticator type: " + type);
-    }
-    return make_ready_future();
-}
-
-auth::authenticator& auth::authenticator::get() {
-    assert(global_authenticator);
-    return *global_authenticator;
-}
--- a/auth/authenticator.hh
+++ b/auth/authenticator.hh
@@ -41,21 +41,24 @@

 #pragma once

+#include <experimental/string_view>
 #include <memory>
-#include <unordered_map>
 #include <set>
 #include <stdexcept>
+#include <unordered_map>
+
 #include <boost/any.hpp>
-
-#include <seastar/core/sstring.hh>
-#include <seastar/core/future.hh>
-#include <seastar/core/shared_ptr.hh>
 #include <seastar/core/enum.hh>
+#include <seastar/core/future.hh>
+#include <seastar/core/sstring.hh>
+#include <seastar/core/shared_ptr.hh>

+#include "auth/authentication_options.hh"
+#include "auth/resource.hh"
 #include "bytes.hh"
-#include "data_resource.hh"
 #include "enum_set.hh"
 #include "exceptions/exceptions.hh"
+#include "stdx.hh"

 namespace db {
    class config;
@@ -65,136 +68,104 @@ namespace auth {

 class authenticated_user;

+///
+/// Abstract client for authenticating role identity.
+///
+/// All state necessary to authorize a role is stored externally to the client instance.
+///
 class authenticator {
 public:
+    ///
+    /// The name of the key to be used for the user-name part of password authentication with \ref authenticate.
+    ///
    static const sstring USERNAME_KEY;
+
+    ///
+    /// The name of the key to be used for the password part of password authentication with \ref authenticate.
+    ///
    static const sstring PASSWORD_KEY;
-    static const sstring ALLOW_ALL_AUTHENTICATOR_NAME;

-    /**
-     * Supported CREATE USER/ALTER USER options.
-     * Currently only PASSWORD is available.
-     */
-    enum class option {
-        PASSWORD
-    };
-
-    static option string_to_option(const sstring&);
-    static sstring option_to_string(option);
-
-    using option_set = enum_set<super_enum<option, option::PASSWORD>>;
-    using option_map = std::unordered_map<option, boost::any, enum_hash<option>>;
    using credentials_map = std::unordered_map<sstring, sstring>;

-    /**
-     * Setup is called once upon system startup to initialize the IAuthenticator.
-     *
-     * For example, use this method to create any required keyspaces/column families.
-     * Note: Only call from main thread.
-     */
-    static future<> setup(const sstring& type);
+    virtual ~authenticator() = default;

-    /**
-     * Returns the system authenticator. Must have called setup before calling this.
-     */
-    static authenticator& get();
+    virtual future<> start() = 0;

-    virtual ~authenticator()
-    {}
+    virtual future<> stop() = 0;

-    virtual const sstring& class_name() const = 0;
+    ///
+    /// A fully-qualified (class with package) Java-like name for this implementation.
+    ///
+    virtual const sstring& qualified_java_name() const = 0;

-    /**
-     * Whether or not the authenticator requires explicit login.
-     * If false will instantiate user with AuthenticatedUser.ANONYMOUS_USER.
-     */
    virtual bool require_authentication() const = 0;

-    /**
-     * Set of options supported by CREATE USER and ALTER USER queries.
-     * Should never return null - always return an empty set instead.
-     */
-    virtual option_set supported_options() const = 0;
+    virtual authentication_option_set supported_options() const = 0;

-    /**
-     * Subset of supportedOptions that users are allowed to alter when performing ALTER USER [themselves].
-     * Should never return null - always return an empty set instead.
-     */
-    virtual option_set alterable_options() const = 0;
+    ///
+    /// A subset of `supported_options()` that users are permitted to alter for themselves.
+    ///
+    virtual authentication_option_set alterable_options() const = 0;

-    /**
-     * Authenticates a user given a Map<String, String> of credentials.
-     * Should never return null - always throw AuthenticationException instead.
-     * Returning AuthenticatedUser.ANONYMOUS_USER is an option as well if authentication is not required.
-     *
-     * @throws authentication_exception if credentials don't match any known user.
-     */
-    virtual future<::shared_ptr<authenticated_user>> authenticate(const credentials_map& credentials) const = 0;
+    ///
+    /// Authenticate a user given implementation-specific credentials.
+    ///
+    /// If this implementation does not require authentication (\ref require_authentication), an anonymous user may
+    /// result.
+    ///
+    /// \returns an exceptional future with \ref exceptions::authentication_exception if given invalid credentials.
+    ///
+    virtual future<authenticated_user> authenticate(const credentials_map& credentials) const = 0;

-    /**
-     * Called during execution of CREATE USER query (also may be called on startup, see seedSuperuserOptions method).
-     * If authenticator is static then the body of the method should be left blank, but don't throw an exception.
-     * options are guaranteed to be a subset of supportedOptions().
-     *
-     * @param username Username of the user to create.
-     * @param options Options the user will be created with.
-     * @throws exceptions::request_validation_exception
-     * @throws exceptions::request_execution_exception
-     */
-    virtual future<> create(sstring username, const option_map& options) = 0;
+    ///
+    /// Create an authentication record for a new user. This is required before the user can log-in.
+    ///
+    /// The options provided must be a subset of `supported_options()`.
+    ///
+    virtual future<> create(stdx::string_view role_name, const authentication_options& options) const = 0;

-    /**
-     * Called during execution of ALTER USER query.
-     * options are always guaranteed to be a subset of supportedOptions(). Furthermore, if the user performing the query
-     * is not a superuser and is altering himself, then options are guaranteed to be a subset of alterableOptions().
-     * Keep the body of the method blank if your implementation doesn't support any options.
-     *
-     * @param username Username of the user that will be altered.
-     * @param options Options to alter.
-     * @throws exceptions::request_validation_exception
-     * @throws exceptions::request_execution_exception
-     */
-    virtual future<> alter(sstring username, const option_map& options) = 0;
+    ///
+    /// Alter the authentication record of an existing user.
+    ///
+    /// The options provided must be a subset of `supported_options()`.
+    ///
+    /// Callers must ensure that the specification of `alterable_options()` is adhered to.
+    ///
+    virtual future<> alter(stdx::string_view role_name, const authentication_options& options) const = 0;

+    ///
+    /// Delete the authentication record for a user. This will disallow the user from logging in.
+    ///
+    virtual future<> drop(stdx::string_view role_name) const = 0;

-    /**
-     * Called during execution of DROP USER query.
-     *
-     * @param username Username of the user that will be dropped.
-     * @throws exceptions::request_validation_exception
-     * @throws exceptions::request_execution_exception
-     */
-    virtual future<> drop(sstring username) = 0;
+    ///
+    /// Query for custom options (those corresponding to \ref authentication_options::options).
+    ///
+    /// If no options are set the result is an empty container.
+    ///
+    virtual future<custom_options> query_custom_options(stdx::string_view role_name) const = 0;

-     /**
-     * Set of resources that should be made inaccessible to users and only accessible internally.
-     *
-     * @return Keyspaces, column families that will be unmodifiable by users; other resources.
-     * @see resource_ids
-     */
-    virtual const resource_ids& protected_resources() const = 0;
+    ///
+    /// System resources used internally as part of the implementation. These are made inaccessible to users.
+    ///
+    virtual const resource_set& protected_resources() const = 0;

+    ///
+    /// A stateful SASL challenge which supports many authentication schemes (depending on the implementation).
+    ///
    class sasl_challenge {
    public:
-        virtual ~sasl_challenge() {}
+        virtual ~sasl_challenge() = default;
+
        virtual bytes evaluate_response(bytes_view client_response) = 0;
+
        virtual bool is_complete() const = 0;
-        virtual future<::shared_ptr<authenticated_user>> get_authenticated_user() const = 0;
+
+        virtual future<authenticated_user> get_authenticated_user() const = 0;
    };

-    /**
-     * Provide a sasl_challenge to be used by the CQL binary protocol server. If
-     * the configured authenticator requires authentication but does not implement this
-     * interface we refuse to start the binary protocol server as it will have no way
-     * of authenticating clients.
-     * @return sasl_challenge implementation
-     */
    virtual ::shared_ptr<sasl_challenge> new_sasl_challenge() const = 0;
 };

-inline std::ostream& operator<<(std::ostream& os, authenticator::option opt) {
-    return os << authenticator::option_to_string(opt);
-}
-
 }

--- a/auth/authorizer.cc
+++ b/auth/authorizer.cc
@@ -1,104 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (C) 2016 ScyllaDB
- *
- * Modified by ScyllaDB
- */
-
-/*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include "authorizer.hh"
-#include "authenticated_user.hh"
-#include "default_authorizer.hh"
-#include "auth.hh"
-#include "db/config.hh"
-
-const sstring auth::authorizer::ALLOW_ALL_AUTHORIZER_NAME("org.apache.cassandra.auth.AllowAllAuthorizer");
-
-/**
- * Authenticator is assumed to be a fully state-less immutable object (note all the const).
- * We thus store a single instance globally, since it should be safe/ok.
- */
-static std::unique_ptr<auth::authorizer> global_authorizer;
-
-future<>
-auth::authorizer::setup(const sstring& type) {
-    if (auth::auth::is_class_type(type, ALLOW_ALL_AUTHORIZER_NAME)) {
-        class allow_all_authorizer : public authorizer {
-        public:
-            future<permission_set> authorize(::shared_ptr<authenticated_user>, data_resource) const override {
-                return make_ready_future<permission_set>(permissions::ALL);
-            }
-            future<> grant(::shared_ptr<authenticated_user>, permission_set, data_resource, sstring) override {
-                throw exceptions::invalid_request_exception("GRANT operation is not supported by AllowAllAuthorizer");
-            }
-            future<> revoke(::shared_ptr<authenticated_user>, permission_set, data_resource, sstring) override {
-                throw exceptions::invalid_request_exception("REVOKE operation is not supported by AllowAllAuthorizer");
-            }
-            future<std::vector<permission_details>> list(::shared_ptr<authenticated_user> performer, permission_set, optional<data_resource>, optional<sstring>) const override {
-                throw exceptions::invalid_request_exception("LIST PERMISSIONS operation is not supported by AllowAllAuthorizer");
-            }
-            future<> revoke_all(sstring dropped_user) override {
-                return make_ready_future();
-            }
-            future<> revoke_all(data_resource) override {
-                return make_ready_future();
-            }
-            const resource_ids& protected_resources() override {
-                static const resource_ids ids;
-                return ids;
-            }
-            future<> validate_configuration() const override {
-                return make_ready_future();
-            }
-        };
-
-        global_authorizer = std::make_unique<allow_all_authorizer>();
-    } else if (auth::auth::is_class_type(type, default_authorizer::DEFAULT_AUTHORIZER_NAME)) {
-        auto da = std::make_unique<default_authorizer>();
-        auto f = da->init();
-        return f.then([da = std::move(da)]() mutable {
-            global_authorizer = std::move(da);
-        });
-    } else {
-        throw exceptions::configuration_exception("Invalid authorizer type: " + type);
-    }
-    return make_ready_future();
-}
-
-auth::authorizer& auth::authorizer::get() {
-    assert(global_authorizer);
-    return *global_authorizer;
-}
--- a/auth/authorizer.hh
+++ b/auth/authorizer.hh
@@ -41,133 +41,116 @@

 #pragma once

-#include <vector>
+#include <experimental/string_view>
+#include <functional>
+#include <optional>
+#include <stdexcept>
 #include <tuple>
+#include <vector>

-#include <experimental/optional>
 #include <seastar/core/future.hh>
 #include <seastar/core/shared_ptr.hh>

-#include "permission.hh"
-#include "data_resource.hh"
-
+#include "auth/permission.hh"
+#include "auth/resource.hh"
 #include "seastarx.hh"
+#include "stdx.hh"

 namespace auth {

-class authenticated_user;
+class role_or_anonymous;

 struct permission_details {
-    sstring user;
-    data_resource resource;
+    sstring role_name;
+    ::auth::resource resource;
    permission_set permissions;
-
-    bool operator<(const permission_details& v) const {
-        return std::tie(user, resource, permissions) < std::tie(v.user, v.resource, v.permissions);
-    }
 };

-using std::experimental::optional;
+inline bool operator==(const permission_details& pd1, const permission_details& pd2) {
+    return std::forward_as_tuple(pd1.role_name, pd1.resource, pd1.permissions.mask())
+            == std::forward_as_tuple(pd2.role_name, pd2.resource, pd2.permissions.mask());
+}

+inline bool operator!=(const permission_details& pd1, const permission_details& pd2) {
+    return !(pd1 == pd2);
+}
+
+inline bool operator<(const permission_details& pd1, const permission_details& pd2) {
+    return std::forward_as_tuple(pd1.role_name, pd1.resource, pd1.permissions)
+            < std::forward_as_tuple(pd2.role_name, pd2.resource, pd2.permissions);
+}
+
+class unsupported_authorization_operation : public std::invalid_argument {
+public:
+    using std::invalid_argument::invalid_argument;
+};
+
+///
+/// Abstract client for authorizing roles to access resources.
+///
+/// All state necessary to authorize a role is stored externally to the client instance.
+///
 class authorizer {
 public:
-    static const sstring ALLOW_ALL_AUTHORIZER_NAME;
+    virtual ~authorizer() = default;

-    virtual ~authorizer() {}
+    virtual future<> start() = 0;

-    /**
-     * The primary Authorizer method. Returns a set of permissions of a user on a resource.
-     *
-     * @param user Authenticated user requesting authorization.
-     * @param resource Resource for which the authorization is being requested. @see DataResource.
-     * @return Set of permissions of the user on the resource. Should never return empty. Use permission.NONE instead.
-     */
-    virtual future<permission_set> authorize(::shared_ptr<authenticated_user>, data_resource) const = 0;
+    virtual future<> stop() = 0;

-    /**
-     * Grants a set of permissions on a resource to a user.
-     * The opposite of revoke().
-     *
-     * @param performer User who grants the permissions.
-     * @param permissions Set of permissions to grant.
-     * @param to Grantee of the permissions.
-     * @param resource Resource on which to grant the permissions.
-     *
-     * @throws RequestValidationException
-     * @throws RequestExecutionException
-     */
-    virtual future<> grant(::shared_ptr<authenticated_user> performer, permission_set, data_resource, sstring to) = 0;
+    ///
+    /// A fully-qualified (class with package) Java-like name for this implementation.
+    ///
+    virtual const sstring& qualified_java_name() const = 0;

-    /**
-     * Revokes a set of permissions on a resource from a user.
-     * The opposite of grant().
-     *
-     * @param performer User who revokes the permissions.
-     * @param permissions Set of permissions to revoke.
-     * @param from Revokee of the permissions.
-     * @param resource Resource on which to revoke the permissions.
-     *
-     * @throws RequestValidationException
-     * @throws RequestExecutionException
-     */
-    virtual future<> revoke(::shared_ptr<authenticated_user> performer, permission_set, data_resource, sstring from) = 0;
+    ///
+    /// Query for the permissions granted directly to a role for a particular \ref resource (and not any of its
+    /// parents).
+    ///
+    /// The optional role name is empty when an anonymous user is authorized. Some implementations may still wish to
+    /// grant default permissions in this case.
+    ///
+    virtual future<permission_set> authorize(const role_or_anonymous&, const resource&) const = 0;

-    /**
-     * Returns a list of permissions on a resource of a user.
-     *
-     * @param performer User who wants to see the permissions.
-     * @param permissions Set of Permission values the user is interested in. The result should only include the matching ones.
-     * @param resource The resource on which permissions are requested. Can be null, in which case permissions on all resources
-     *                 should be returned.
-     * @param of The user whose permissions are requested. Can be null, in which case permissions of every user should be returned.
-     *
-     * @return All of the matching permission that the requesting user is authorized to know about.
-     *
-     * @throws RequestValidationException
-     * @throws RequestExecutionException
-     */
-    virtual future<std::vector<permission_details>> list(::shared_ptr<authenticated_user> performer, permission_set, optional<data_resource>, optional<sstring>) const = 0;
+    ///
+    /// Grant a set of permissions to a role for a particular \ref resource.
+    ///
+    /// \throws \ref unsupported_authorization_operation if granting permissions is not supported.
+    ///
+    virtual future<> grant(stdx::string_view role_name, permission_set, const resource&) const = 0;

-    /**
-     * This method is called before deleting a user with DROP USER query so that a new user with the same
-     * name wouldn't inherit permissions of the deleted user in the future.
-     *
-     * @param droppedUser The user to revoke all permissions from.
-     */
-    virtual future<> revoke_all(sstring dropped_user) = 0;
+    ///
+    /// Revoke a set of permissions from a role for a particular \ref resource.
+    ///
+    /// \throws \ref unsupported_authorization_operation if revoking permissions is not supported.
+    ///
+    virtual future<> revoke(stdx::string_view role_name, permission_set, const resource&) const = 0;

-    /**
-     * This method is called after a resource is removed (i.e. keyspace or a table is dropped).
-     *
-     * @param droppedResource The resource to revoke all permissions on.
-     */
-    virtual future<> revoke_all(data_resource) = 0;
+    ///
+    /// Query for all directly granted permissions.
+    ///
+    /// \throws \ref unsupported_authorization_operation if listing permissions is not supported.
+    ///
+    virtual future<std::vector<permission_details>> list_all() const = 0;

-    /**
-     * Set of resources that should be made inaccessible to users and only accessible internally.
-     *
-     * @return Keyspaces, column families that will be unmodifiable by users; other resources.
-     */
-    virtual const resource_ids& protected_resources() = 0;
+    ///
+    /// Revoke all permissions granted directly to a particular role.
+    ///
+    /// \throws \ref unsupported_authorization_operation if revoking permissions is not supported.
+    ///
+    virtual future<> revoke_all(stdx::string_view role_name) const = 0;

-    /**
-     * Validates configuration of IAuthorizer implementation (if configurable).
-     *
-     * @throws ConfigurationException when there is a configuration error.
-     */
-    virtual future<> validate_configuration() const = 0;
+    ///
+    /// Revoke all permissions granted to any role for a particular resource.
+    ///
+    /// \throws \ref unsupported_authorization_operation if revoking permissions is not supported.
+    ///
+    virtual future<> revoke_all(const resource&) const = 0;

-    /**
-     * Setup is called once upon system startup to initialize the IAuthorizer.
-     *
-     * For example, use this method to create any required keyspaces/column families.
-     */
-    static future<> setup(const sstring& type);
-
-    /**
-     * Returns the system authorizer. Must have called setup before calling this.
-     */
-    static authorizer& get();
+    ///
+    /// System resources used internally as part of the implementation. These are made inaccessible to users.
+    ///
+    virtual const resource_set& protected_resources() const = 0;
 };

 }
--- a/auth/common.cc
+++ b/auth/common.cc
@@ -0,0 +1,97 @@
+/*
+ * Copyright (C) 2017 ScyllaDB
+ */
+
+/*
+ * This file is part of Scylla.
+ *
+ * Scylla is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Scylla is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "auth/common.hh"
+
+#include <seastar/core/shared_ptr.hh>
+
+#include "cql3/query_processor.hh"
+#include "cql3/statements/create_table_statement.hh"
+#include "database.hh"
+#include "schema_builder.hh"
+#include "service/migration_manager.hh"
+
+namespace auth {
+
+namespace meta {
+
+const sstring DEFAULT_SUPERUSER_NAME("cassandra");
+const sstring AUTH_KS("system_auth");
+const sstring USERS_CF("users");
+const sstring AUTH_PACKAGE_NAME("org.apache.cassandra.auth.");
+
+}
+
+static logging::logger auth_log("auth");
+
+// Func must support being invoked more than once.
+future<> do_after_system_ready(seastar::abort_source& as, seastar::noncopyable_function<future<>()> func) {
+    struct empty_state { };
+    return delay_until_system_ready(as).then([&as, func = std::move(func)] () mutable {
+        return exponential_backoff_retry::do_until_value(1s, 1min, as, [func = std::move(func)] {
+            return func().then_wrapped([] (auto&& f) -> stdx::optional<empty_state> {
+                if (f.failed()) {
+                    auth_log.info("Auth task failed with error, rescheduling: {}", f.get_exception());
+                    return { };
+                }
+                return { empty_state() };
+            });
+        });
+    }).discard_result();
+}
+
+future<> create_metadata_table_if_missing(
+        stdx::string_view table_name,
+        cql3::query_processor& qp,
+        stdx::string_view cql,
+        ::service::migration_manager& mm) {
+    auto& db = qp.db().local();
+
+    if (db.has_schema(meta::AUTH_KS, sstring(table_name))) {
+        return make_ready_future<>();
+    }
+
+    auto parsed_statement = static_pointer_cast<cql3::statements::raw::cf_statement>(
+            cql3::query_processor::parse_statement(cql));
+
+    parsed_statement->prepare_keyspace(meta::AUTH_KS);
+
+    auto statement = static_pointer_cast<cql3::statements::create_table_statement>(
+            parsed_statement->prepare(db, qp.get_cql_stats())->statement);
+
+    const auto schema = statement->get_cf_meta_data(qp.db().local());
+    const auto uuid = generate_legacy_id(schema->ks_name(), schema->cf_name());
+
+    schema_builder b(schema);
+    b.set_uuid(uuid);
+
+    return mm.announce_new_column_family(b.build(), false);
+}
+
+future<> wait_for_schema_agreement(::service::migration_manager& mm, const database& db) {
+    static const auto pause = [] { return sleep(std::chrono::milliseconds(500)); };
+
+    return do_until([&db] { return db.get_version() != database::empty_version; }, pause).then([&mm] {
+        return do_until([&mm] { return mm.have_schema_agreement(); }, pause);
+    });
+}
+
+}
--- a/auth/common.hh
+++ b/auth/common.hh
@@ -0,0 +1,85 @@
+/*
+ * Copyright (C) 2017 ScyllaDB
+ */
+
+/*
+ * This file is part of Scylla.
+ *
+ * Scylla is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Scylla is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <chrono>
+#include <experimental/string_view>
+
+#include <seastar/core/future.hh>
+#include <seastar/core/abort_source.hh>
+#include <seastar/util/noncopyable_function.hh>
+#include <seastar/core/reactor.hh>
+#include <seastar/core/resource.hh>
+#include <seastar/core/sstring.hh>
+
+#include "log.hh"
+#include "seastarx.hh"
+#include "utils/exponential_backoff_retry.hh"
+
+using namespace std::chrono_literals;
+
+class database;
+
+namespace service {
+class migration_manager;
+}
+
+namespace cql3 {
+class query_processor;
+}
+
+namespace auth {
+
+namespace meta {
+
+extern const sstring DEFAULT_SUPERUSER_NAME;
+extern const sstring AUTH_KS;
+extern const sstring USERS_CF;
+extern const sstring AUTH_PACKAGE_NAME;
+
+}
+
+template <class Task>
+future<> once_among_shards(Task&& f) {
+    if (engine().cpu_id() == 0u) {
+        return f();
+    }
+
+    return make_ready_future<>();
+}
+
+inline future<> delay_until_system_ready(seastar::abort_source& as) {
+    return sleep_abortable(15s, as);
+}
+
+// Func must support being invoked more than once.
+future<> do_after_system_ready(seastar::abort_source& as, seastar::noncopyable_function<future<>()> func);
+
+future<> create_metadata_table_if_missing(
+        stdx::string_view table_name,
+        cql3::query_processor&,
+        stdx::string_view cql,
+        ::service::migration_manager&);
+
+future<> wait_for_schema_agreement(::service::migration_manager&, const database&);
+
+}
--- a/auth/data_resource.cc
+++ b/auth/data_resource.cc
@@ -1,171 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (C) 2016 ScyllaDB
- *
- * Modified by ScyllaDB
- */
-
-/*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include "data_resource.hh"
-
-#include <regex>
-#include "service/storage_proxy.hh"
-
-const sstring auth::data_resource::ROOT_NAME("data");
-
-auth::data_resource::data_resource(level l, const sstring& ks, const sstring& cf)
-    : _level(l), _ks(ks), _cf(cf)
-{
-}
-
-auth::data_resource::data_resource()
-    : data_resource(level::ROOT)
-{}
-
-auth::data_resource::data_resource(const sstring& ks)
-    : data_resource(level::KEYSPACE, ks)
-{}
-
-auth::data_resource::data_resource(const sstring& ks, const sstring& cf)
-    : data_resource(level::COLUMN_FAMILY, ks, cf)
-{}
-
-auth::data_resource::level auth::data_resource::get_level() const {
-    return _level;
-}
-
-auth::data_resource auth::data_resource::from_name(
-                const sstring& s) {
-
-    static std::regex slash_regex("/");
-
-    auto i = std::regex_token_iterator<sstring::const_iterator>(s.begin(),
-                    s.end(), slash_regex, -1);
-    auto e = std::regex_token_iterator<sstring::const_iterator>();
-    auto n = std::distance(i, e);
-
-    if (n > 3 || ROOT_NAME != sstring(*i++)) {
-        throw std::invalid_argument(sprint("%s is not a valid data resource name", s));
-    }
-
-    if (n == 1) {
-        return data_resource();
-    }
-    auto ks = *i++;
-    if (n == 2) {
-        return data_resource(ks.str());
-    }
-    auto cf = *i++;
-    return data_resource(ks.str(), cf.str());
-}
-
-sstring auth::data_resource::name() const {
-    switch (get_level()) {
-        case level::ROOT:
-            return ROOT_NAME;
-        case level::KEYSPACE:
-            return sprint("%s/%s", ROOT_NAME, _ks);
-        case level::COLUMN_FAMILY:
-        default:
-            return sprint("%s/%s/%s", ROOT_NAME, _ks, _cf);
-    }
-}
-
-auth::data_resource auth::data_resource::get_parent() const {
-    switch (get_level()) {
-    case level::KEYSPACE:
-        return data_resource();
-    case level::COLUMN_FAMILY:
-        return data_resource(_ks);
-    default:
-        throw std::invalid_argument("Root-level resource can't have a parent");
-    }
-}
-
-const sstring& auth::data_resource::keyspace() const {
-    if (is_root_level()) {
-        throw std::invalid_argument("ROOT data resource has no keyspace");
-    }
-    return _ks;
-}
-
-const sstring& auth::data_resource::column_family() const {
-    if (!is_column_family_level()) {
-        throw std::invalid_argument(sprint("%s data resource has no column family", name()));
-    }
-    return _cf;
-}
-
-bool auth::data_resource::has_parent() const {
-    return !is_root_level();
-}
-
-bool auth::data_resource::exists() const {
-    switch (get_level()) {
-        case level::ROOT:
-            return true;
-        case level::KEYSPACE:
-            return service::get_local_storage_proxy().get_db().local().has_keyspace(_ks);
-        case level::COLUMN_FAMILY:
-        default:
-            return service::get_local_storage_proxy().get_db().local().has_schema(_ks, _cf);
-    }
-}
-
-sstring auth::data_resource::to_string() const {
-    switch (get_level()) {
-        case level::ROOT:
-            return "<all keyspaces>";
-        case level::KEYSPACE:
-            return sprint("<keyspace %s>", _ks);
-        case level::COLUMN_FAMILY:
-        default:
-            return sprint("<table %s.%s>", _ks, _cf);
-    }
-}
-
-bool auth::data_resource::operator==(const data_resource& v) const {
-    return _ks == v._ks && _cf == v._cf;
-}
-
-bool auth::data_resource::operator<(const data_resource& v) const {
-    return _ks < v._ks ? true : (v._ks < _ks ? false : _cf < v._cf);
-}
-
-std::ostream& auth::operator<<(std::ostream& os, const data_resource& r) {
-    return os << r.to_string();
-}
-
--- a/auth/data_resource.hh
+++ b/auth/data_resource.hh
@@ -1,159 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (C) 2016 ScyllaDB
- *
- * Modified by ScyllaDB
- */
-
-/*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#pragma once
-
-#include "utils/hash.hh"
-#include <iosfwd>
-#include <set>
-#include <seastar/core/sstring.hh>
-#include "seastarx.hh"
-
-namespace auth {
-
-class data_resource {
-private:
-    enum class level {
-        ROOT, KEYSPACE, COLUMN_FAMILY
-    };
-
-    static const sstring ROOT_NAME;
-
-    level _level;
-    sstring _ks;
-    sstring _cf;
-
-    data_resource(level, const sstring& ks = {}, const sstring& cf = {});
-
-    level get_level() const;
-public:
-    /**
-     * Creates a DataResource representing the root-level resource.
-     * @return the root-level resource.
-     */
-    data_resource();
-    /**
-     * Creates a DataResource representing a keyspace.
-     *
-     * @param keyspace Name of the keyspace.
-     */
-    data_resource(const sstring& ks);
-    /**
-     * Creates a DataResource instance representing a column family.
-     *
-     * @param keyspace Name of the keyspace.
-     * @param columnFamily Name of the column family.
-     */
-    data_resource(const sstring& ks, const sstring& cf);
-
-    /**
-     * Parses a data resource name into a DataResource instance.
-     *
-     * @param name Name of the data resource.
-     * @return DataResource instance matching the name.
-     */
-    static data_resource from_name(const sstring&);
-
-    /**
-     * @return Printable name of the resource.
-     */
-    sstring name() const;
-
-    /**
-     * @return Parent of the resource, if any. Throws IllegalStateException if it's the root-level resource.
-     */
-    data_resource get_parent() const;
-
-    bool is_root_level() const {
-        return get_level() == level::ROOT;
-    }
-
-    bool is_keyspace_level() const {
-        return get_level() == level::KEYSPACE;
-    }
-
-    bool is_column_family_level() const {
-        return get_level() == level::COLUMN_FAMILY;
-    }
-
-    /**
-     * @return keyspace of the resource.
-     * @throws std::invalid_argument if it's the root-level resource.
-     */
-    const sstring& keyspace() const;
-
-    /**
-     * @return column family of the resource.
-     * @throws std::invalid_argument if it's not a cf-level resource.
-     */
-    const sstring& column_family() const;
-
-    /**
-     * @return Whether or not the resource has a parent in the hierarchy.
-     */
-    bool has_parent() const;
-
-    /**
-     * @return Whether or not the resource exists in scylla.
-     */
-    bool exists() const;
-
-    sstring to_string() const;
-
-    bool operator==(const data_resource&) const;
-    bool operator<(const data_resource&) const;
-
-    size_t hash_value() const {
-        return utils::tuple_hash()(_ks, _cf);
-    }
-};
-
-/**
- * Resource id mappings, i.e. keyspace and/or column families.
- */
-using resource_ids = std::set<data_resource>;
-
-std::ostream& operator<<(std::ostream&, const data_resource&);
-
-}
-
-
-
--- a/auth/default_authorizer.cc
+++ b/auth/default_authorizer.cc
@@ -39,181 +39,283 @@
 * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
 */

-#include <unistd.h>
-#include <crypt.h>
-#include <random>
-#include <chrono>
+#include "auth/default_authorizer.hh"

+extern "C" {
+#include <crypt.h>
+#include <unistd.h>
+}
+
+#include <chrono>
+#include <random>
+
+#include <boost/algorithm/string/join.hpp>
+#include <boost/range.hpp>
 #include <seastar/core/reactor.hh>

-#include "auth.hh"
-#include "default_authorizer.hh"
-#include "authenticated_user.hh"
-#include "permission.hh"
+#include "auth/authenticated_user.hh"
+#include "auth/common.hh"
+#include "auth/permission.hh"
+#include "auth/role_or_anonymous.hh"
 #include "cql3/query_processor.hh"
+#include "cql3/untyped_result_set.hh"
 #include "exceptions/exceptions.hh"
 #include "log.hh"

-const sstring auth::default_authorizer::DEFAULT_AUTHORIZER_NAME(
-                "org.apache.cassandra.auth.CassandraAuthorizer");
+namespace auth {

-static const sstring USER_NAME = "username";
+const sstring& default_authorizer_name() {
+    static const sstring name = meta::AUTH_PACKAGE_NAME + "CassandraAuthorizer";
+    return name;
+}
+
+static const sstring ROLE_NAME = "role";
 static const sstring RESOURCE_NAME = "resource";
 static const sstring PERMISSIONS_NAME = "permissions";
-static const sstring PERMISSIONS_CF = "permissions";
+static const sstring PERMISSIONS_CF = "role_permissions";

 static logging::logger alogger("default_authorizer");

-auth::default_authorizer::default_authorizer() {
-}
-auth::default_authorizer::~default_authorizer() {
+// To ensure correct initialization order, we unfortunately need to use a string literal.
+static const class_registrator<
+        authorizer,
+        default_authorizer,
+        cql3::query_processor&,
+        ::service::migration_manager&> password_auth_reg("org.apache.cassandra.auth.CassandraAuthorizer");
+
+default_authorizer::default_authorizer(cql3::query_processor& qp, ::service::migration_manager& mm)
+        : _qp(qp)
+        , _migration_manager(mm) {
 }

-future<> auth::default_authorizer::init() {
-    sstring create_table = sprint("CREATE TABLE %s.%s ("
-                    "%s text,"
-                    "%s text,"
-                    "%s set<text>,"
-                    "PRIMARY KEY(%s, %s)"
-                    ") WITH gc_grace_seconds=%d", auth::auth::AUTH_KS,
-                    PERMISSIONS_CF, USER_NAME, RESOURCE_NAME, PERMISSIONS_NAME,
-                    USER_NAME, RESOURCE_NAME, 90 * 24 * 60 * 60); // 3 months.
-
-    return auth::setup_table(PERMISSIONS_CF, create_table);
+default_authorizer::~default_authorizer() {
 }

+static const sstring legacy_table_name{"permissions"};

-future<auth::permission_set> auth::default_authorizer::authorize(
-                ::shared_ptr<authenticated_user> user, data_resource resource) const {
-    return user->is_super().then([this, user, resource = std::move(resource)](bool is_super) {
-        if (is_super) {
-            return make_ready_future<permission_set>(permissions::ALL);
-        }
+bool default_authorizer::legacy_metadata_exists() const {
+    return _qp.db().local().has_schema(meta::AUTH_KS, legacy_table_name);
+}

-        /**
-         * TOOD: could create actual data type for permission (translating string<->perm),
-         * but this seems overkill right now. We still must store strings so...
-         */
-        auto& qp = cql3::get_local_query_processor();
-        auto query = sprint("SELECT %s FROM %s.%s WHERE %s = ? AND %s = ?"
-                        , PERMISSIONS_NAME, auth::AUTH_KS, PERMISSIONS_CF, USER_NAME, RESOURCE_NAME);
-        return qp.process(query, db::consistency_level::LOCAL_ONE, {user->name(), resource.name() })
-                        .then_wrapped([=](future<::shared_ptr<cql3::untyped_result_set>> f) {
-            try {
-                auto res = f.get0();
+future<bool> default_authorizer::any_granted() const {
+    static const sstring query = sprint("SELECT * FROM %s.%s LIMIT 1", meta::AUTH_KS, PERMISSIONS_CF);

-                if (res->empty() || !res->one().has(PERMISSIONS_NAME)) {
-                    return make_ready_future<permission_set>(permissions::NONE);
-                }
-                return make_ready_future<permission_set>(permissions::from_strings(res->one().get_set<sstring>(PERMISSIONS_NAME)));
-            } catch (exceptions::request_execution_exception& e) {
-                alogger.warn("CassandraAuthorizer failed to authorize {} for {}", user->name(), resource);
-                return make_ready_future<permission_set>(permissions::NONE);
-            }
-        });
+    return _qp.process(
+            query,
+            db::consistency_level::LOCAL_ONE,
+            {},
+            true).then([this](::shared_ptr<cql3::untyped_result_set> results) {
+        return !results->empty();
    });
 }

-#include <boost/range.hpp>
+future<> default_authorizer::migrate_legacy_metadata() const {
+    alogger.info("Starting migration of legacy permissions metadata.");
+    static const sstring query = sprint("SELECT * FROM %s.%s", meta::AUTH_KS, legacy_table_name);

-future<> auth::default_authorizer::modify(
-                ::shared_ptr<authenticated_user> performer, permission_set set,
-                data_resource resource, sstring user, sstring op) {
-    // TODO: why does this not check super user?
-    auto& qp = cql3::get_local_query_processor();
-    auto query = sprint("UPDATE %s.%s SET %s = %s %s ? WHERE %s = ? AND %s = ?",
-                    auth::AUTH_KS, PERMISSIONS_CF, PERMISSIONS_NAME,
-                    PERMISSIONS_NAME, op, USER_NAME, RESOURCE_NAME);
-    return qp.process(query, db::consistency_level::ONE, {
-                    permissions::to_strings(set), user, resource.name() }).discard_result();
-}
-
-
-future<> auth::default_authorizer::grant(
-                ::shared_ptr<authenticated_user> performer, permission_set set,
-                data_resource resource, sstring to) {
-    return modify(std::move(performer), std::move(set), std::move(resource), std::move(to), "+");
-}
-
-future<> auth::default_authorizer::revoke(
-                ::shared_ptr<authenticated_user> performer, permission_set set,
-                data_resource resource, sstring from) {
-    return modify(std::move(performer), std::move(set), std::move(resource), std::move(from), "-");
-}
-
-future<std::vector<auth::permission_details>> auth::default_authorizer::list(
-                ::shared_ptr<authenticated_user> performer, permission_set set,
-                optional<data_resource> resource, optional<sstring> user) const {
-    return performer->is_super().then([this, performer, set = std::move(set), resource = std::move(resource), user = std::move(user)](bool is_super) {
-        if (!is_super && (!user || performer->name() != *user)) {
-            throw exceptions::unauthorized_exception(sprint("You are not authorized to view %s's permissions", user ? *user : "everyone"));
-        }
-
-        auto query = sprint("SELECT %s, %s, %s FROM %s.%s", USER_NAME, RESOURCE_NAME, PERMISSIONS_NAME, auth::AUTH_KS, PERMISSIONS_CF);
-        auto& qp = cql3::get_local_query_processor();
-
-        // Oh, look, it is a case where it does not pay off to have
-        // parameters to process in an initializer list.
-        future<::shared_ptr<cql3::untyped_result_set>> f = make_ready_future<::shared_ptr<cql3::untyped_result_set>>();
-
-        if (resource && user) {
-            query += sprint(" WHERE %s = ? AND %s = ?", USER_NAME, RESOURCE_NAME);
-            f = qp.process(query, db::consistency_level::ONE, {*user, resource->name()});
-        } else if (resource) {
-            query += sprint(" WHERE %s = ? ALLOW FILTERING", RESOURCE_NAME);
-            f = qp.process(query, db::consistency_level::ONE, {resource->name()});
-        } else if (user) {
-            query += sprint(" WHERE %s = ?", USER_NAME);
-            f = qp.process(query, db::consistency_level::ONE, {*user});
-        } else {
-            f = qp.process(query, db::consistency_level::ONE, {});
-        }
-
-        return f.then([set](::shared_ptr<cql3::untyped_result_set> res) {
-            std::vector<permission_details> result;
-
-            for (auto& row : *res) {
-                if (row.has(PERMISSIONS_NAME)) {
-                    auto username = row.get_as<sstring>(USER_NAME);
-                    auto resource = data_resource::from_name(row.get_as<sstring>(RESOURCE_NAME));
-                    auto ps = permissions::from_strings(row.get_set<sstring>(PERMISSIONS_NAME));
-                    ps = permission_set::from_mask(ps.mask() & set.mask());
-
-                    result.emplace_back(permission_details {username, resource, ps});
-                }
-            }
-            return make_ready_future<std::vector<permission_details>>(std::move(result));
-        });
+    return _qp.process(
+            query,
+            db::consistency_level::LOCAL_ONE).then([this](::shared_ptr<cql3::untyped_result_set> results) {
+        return do_for_each(*results, [this](const cql3::untyped_result_set_row& row) {
+            return do_with(
+                    row.get_as<sstring>("username"),
+                    parse_resource(row.get_as<sstring>(RESOURCE_NAME)),
+                    [this, &row](const auto& username, const auto& r) {
+                const permission_set perms = permissions::from_strings(row.get_set<sstring>(PERMISSIONS_NAME));
+                return grant(username, perms, r);
+            });
+        }).finally([results] {});
+    }).then([] {
+        alogger.info("Finished migrating legacy permissions metadata.");
+    }).handle_exception([](std::exception_ptr ep) {
+        alogger.error("Encountered an error during migration!");
+        std::rethrow_exception(ep);
    });
 }

-future<> auth::default_authorizer::revoke_all(sstring dropped_user) {
-    auto& qp = cql3::get_local_query_processor();
-    auto query = sprint("DELETE FROM %s.%s WHERE %s = ?", auth::AUTH_KS,
-                    PERMISSIONS_CF, USER_NAME);
-    return qp.process(query, db::consistency_level::ONE, { dropped_user }).discard_result().handle_exception(
-                    [dropped_user](auto ep) {
-                        try {
-                            std::rethrow_exception(ep);
-                        } catch (exceptions::request_execution_exception& e) {
-                            alogger.warn("CassandraAuthorizer failed to revoke all permissions of {}: {}", dropped_user, e);
+future<> default_authorizer::start() {
+    static const sstring create_table = sprint(
+            "CREATE TABLE %s.%s ("
+            "%s text,"
+            "%s text,"
+            "%s set<text>,"
+            "PRIMARY KEY(%s, %s)"
+            ") WITH gc_grace_seconds=%d",
+            meta::AUTH_KS,
+            PERMISSIONS_CF,
+            ROLE_NAME,
+            RESOURCE_NAME,
+            PERMISSIONS_NAME,
+            ROLE_NAME,
+            RESOURCE_NAME,
+            90 * 24 * 60 * 60); // 3 months.
+
+    return once_among_shards([this] {
+        return create_metadata_table_if_missing(
+                PERMISSIONS_CF,
+                _qp,
+                create_table,
+                _migration_manager).then([this] {
+            _finished = do_after_system_ready(_as, [this] {
+                return async([this] {
+                    wait_for_schema_agreement(_migration_manager, _qp.db().local()).get0();
+
+                    if (legacy_metadata_exists()) {
+                        if (!any_granted().get0()) {
+                            migrate_legacy_metadata().get0();
+                            return;
                        }
-                    });
+
+                        alogger.warn("Ignoring legacy permissions metadata since role permissions exist.");
+                    }
+                });
+            });
+        });
+    });
 }

-future<> auth::default_authorizer::revoke_all(data_resource resource) {
-    auto& qp = cql3::get_local_query_processor();
-    auto query = sprint("SELECT %s FROM %s.%s WHERE %s = ? ALLOW FILTERING",
-                    USER_NAME, auth::AUTH_KS, PERMISSIONS_CF, RESOURCE_NAME);
-    return qp.process(query, db::consistency_level::LOCAL_ONE, { resource.name() })
-                    .then_wrapped([resource, &qp](future<::shared_ptr<cql3::untyped_result_set>> f) {
+future<> default_authorizer::stop() {
+    _as.request_abort();
+    return _finished.handle_exception_type([](const sleep_aborted&) {});
+}
+
+future<permission_set>
+default_authorizer::authorize(const role_or_anonymous& maybe_role, const resource& r) const {
+    if (is_anonymous(maybe_role)) {
+        return make_ready_future<permission_set>(permissions::NONE);
+    }
+
+    static const sstring query = sprint(
+            "SELECT %s FROM %s.%s WHERE %s = ? AND %s = ?",
+            PERMISSIONS_NAME,
+            meta::AUTH_KS,
+            PERMISSIONS_CF,
+            ROLE_NAME,
+            RESOURCE_NAME);
+
+    return _qp.process(
+            query,
+            db::consistency_level::LOCAL_ONE,
+            {*maybe_role.name, r.name()}).then([](::shared_ptr<cql3::untyped_result_set> results) {
+        if (results->empty()) {
+            return permissions::NONE;
+        }
+
+        return permissions::from_strings(results->one().get_set<sstring>(PERMISSIONS_NAME));
+    });
+}
+
+future<>
+default_authorizer::modify(
+        stdx::string_view role_name,
+        permission_set set,
+        const resource& resource,
+        stdx::string_view op) const {
+    return do_with(
+            sprint(
+                    "UPDATE %s.%s SET %s = %s %s ? WHERE %s = ? AND %s = ?",
+                    meta::AUTH_KS,
+                    PERMISSIONS_CF,
+                    PERMISSIONS_NAME,
+                    PERMISSIONS_NAME,
+                    op,
+                    ROLE_NAME,
+                    RESOURCE_NAME),
+            [this, &role_name, set, &resource](const auto& query) {
+        return _qp.process(
+                query,
+                db::consistency_level::ONE,
+                {permissions::to_strings(set), sstring(role_name), resource.name()}).discard_result();
+    });
+}
+
+
+future<> default_authorizer::grant(stdx::string_view role_name, permission_set set, const resource& resource) const {
+    return modify(role_name, std::move(set), resource, "+");
+}
+
+future<> default_authorizer::revoke(stdx::string_view role_name, permission_set set, const resource& resource) const {
+    return modify(role_name, std::move(set), resource, "-");
+}
+
+future<std::vector<permission_details>> default_authorizer::list_all() const {
+    static const sstring query = sprint(
+            "SELECT %s, %s, %s FROM %s.%s",
+            ROLE_NAME,
+            RESOURCE_NAME,
+            PERMISSIONS_NAME,
+            meta::AUTH_KS,
+            PERMISSIONS_CF);
+
+    return _qp.process(
+            query,
+            db::consistency_level::ONE,
+            {},
+            true).then([](::shared_ptr<cql3::untyped_result_set> results) {
+        std::vector<permission_details> all_details;
+
+        for (const auto& row : *results) {
+            if (row.has(PERMISSIONS_NAME)) {
+                auto role_name = row.get_as<sstring>(ROLE_NAME);
+                auto resource = parse_resource(row.get_as<sstring>(RESOURCE_NAME));
+                auto perms = permissions::from_strings(row.get_set<sstring>(PERMISSIONS_NAME));
+                all_details.push_back(permission_details{std::move(role_name), std::move(resource), std::move(perms)});
+            }
+        }
+
+        return all_details;
+    });
+}
+
+future<> default_authorizer::revoke_all(stdx::string_view role_name) const {
+    static const sstring query = sprint(
+            "DELETE FROM %s.%s WHERE %s = ?",
+            meta::AUTH_KS,
+            PERMISSIONS_CF,
+            ROLE_NAME);
+
+    return _qp.process(
+            query,
+            db::consistency_level::ONE,
+            {sstring(role_name)}).discard_result().handle_exception([role_name](auto ep) {
+        try {
+            std::rethrow_exception(ep);
+        } catch (exceptions::request_execution_exception& e) {
+            alogger.warn("CassandraAuthorizer failed to revoke all permissions of {}: {}", role_name, e);
+        }
+    });
+}
+
+future<> default_authorizer::revoke_all(const resource& resource) const {
+    static const sstring query = sprint(
+            "SELECT %s FROM %s.%s WHERE %s = ? ALLOW FILTERING",
+            ROLE_NAME,
+            meta::AUTH_KS,
+            PERMISSIONS_CF,
+            RESOURCE_NAME);
+
+    return _qp.process(
+            query,
+            db::consistency_level::LOCAL_ONE,
+            {resource.name()}).then_wrapped([this, resource](future<::shared_ptr<cql3::untyped_result_set>> f) {
        try {
            auto res = f.get0();
-            return parallel_for_each(res->begin(), res->end(), [&qp, res, resource](const cql3::untyped_result_set::row& r) {
-                auto query = sprint("DELETE FROM %s.%s WHERE %s = ? AND %s = ?"
-                                , auth::AUTH_KS, PERMISSIONS_CF, USER_NAME, RESOURCE_NAME);
-                return qp.process(query, db::consistency_level::LOCAL_ONE, { r.get_as<sstring>(USER_NAME), resource.name() })
-                                .discard_result().handle_exception([resource](auto ep) {
+            return parallel_for_each(
+                    res->begin(),
+                    res->end(),
+                    [this, res, resource](const cql3::untyped_result_set::row& r) {
+                static const sstring query = sprint(
+                        "DELETE FROM %s.%s WHERE %s = ? AND %s = ?",
+                        meta::AUTH_KS,
+                        PERMISSIONS_CF,
+                        ROLE_NAME,
+                        RESOURCE_NAME);
+
+                return _qp.process(
+                        query,
+                        db::consistency_level::LOCAL_ONE,
+                        {r.get_as<sstring>(ROLE_NAME), resource.name()}).discard_result().handle_exception(
+                                [resource](auto ep) {
                    try {
                        std::rethrow_exception(ep);
                    } catch (exceptions::request_execution_exception& e) {
@@ -229,12 +331,9 @@ future<> auth::default_authorizer::revoke_all(data_resource resource) {
    });
 }

-
-const auth::resource_ids& auth::default_authorizer::protected_resources() {
-    static const resource_ids ids({ data_resource(auth::AUTH_KS, PERMISSIONS_CF) });
-    return ids;
+const resource_set& default_authorizer::protected_resources() const {
+    static const resource_set resources({ make_data_resource(meta::AUTH_KS, PERMISSIONS_CF) });
+    return resources;
 }

-future<> auth::default_authorizer::validate_configuration() const {
-    return make_ready_future();
 }
--- a/auth/default_authorizer.hh
+++ b/auth/default_authorizer.hh
@@ -41,37 +41,62 @@

 #pragma once

-#include "authorizer.hh"
+#include <functional>
+
+#include <seastar/core/abort_source.hh>
+
+#include "auth/authorizer.hh"
+#include "cql3/query_processor.hh"
+#include "service/migration_manager.hh"

 namespace auth {

-class default_authorizer : public authorizer {
-public:
-    static const sstring DEFAULT_AUTHORIZER_NAME;
+const sstring& default_authorizer_name();
+
+class default_authorizer : public authorizer {
+    cql3::query_processor& _qp;
+
+    ::service::migration_manager& _migration_manager;
+
+    abort_source _as{};
+
+    future<> _finished{make_ready_future<>()};
+
+public:
+    default_authorizer(cql3::query_processor&, ::service::migration_manager&);

-    default_authorizer();
    ~default_authorizer();

-    future<> init();
+    virtual future<> start() override;

-    future<permission_set> authorize(::shared_ptr<authenticated_user>, data_resource) const override;
+    virtual future<> stop() override;

-    future<> grant(::shared_ptr<authenticated_user>, permission_set, data_resource, sstring) override;
+    virtual const sstring& qualified_java_name() const override {
+        return default_authorizer_name();
+    }

-    future<> revoke(::shared_ptr<authenticated_user>, permission_set, data_resource, sstring) override;
+    virtual future<permission_set> authorize(const role_or_anonymous&, const resource&) const override;

-    future<std::vector<permission_details>> list(::shared_ptr<authenticated_user>, permission_set, optional<data_resource>, optional<sstring>) const override;
+    virtual future<> grant(stdx::string_view, permission_set, const resource&) const override;

-    future<> revoke_all(sstring) override;
+    virtual future<> revoke( stdx::string_view, permission_set, const resource&) const override;

-    future<> revoke_all(data_resource) override;
+    virtual future<std::vector<permission_details>> list_all() const override;

-    const resource_ids& protected_resources() override;
+    virtual future<> revoke_all(stdx::string_view) const override;

-    future<> validate_configuration() const override;
+    virtual future<> revoke_all(const resource&) const override;
+
+    virtual const resource_set& protected_resources() const override;

 private:
-    future<> modify(::shared_ptr<authenticated_user>, permission_set, data_resource, sstring, sstring);
+    bool legacy_metadata_exists() const;
+
+    future<bool> any_granted() const;
+
+    future<> migrate_legacy_metadata() const;
+
+    future<> modify(stdx::string_view, permission_set, const resource&, stdx::string_view) const;
 };

 } /* namespace auth */
--- a/auth/password_authenticator.cc
+++ b/auth/password_authenticator.cc
@@ -39,35 +39,57 @@
 * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
 */

-#include <unistd.h>
-#include <crypt.h>
-#include <random>
-#include <chrono>
+#include "auth/password_authenticator.hh"

+extern "C" {
+#include <crypt.h>
+#include <unistd.h>
+}
+
+#include <algorithm>
+#include <chrono>
+#include <random>
+
+#include <boost/algorithm/cxx11/all_of.hpp>
 #include <seastar/core/reactor.hh>

-#include "auth.hh"
-#include "password_authenticator.hh"
-#include "authenticated_user.hh"
-#include "cql3/query_processor.hh"
+#include "auth/authenticated_user.hh"
+#include "auth/common.hh"
+#include "auth/roles-metadata.hh"
+#include "cql3/untyped_result_set.hh"
 #include "log.hh"
+#include "service/migration_manager.hh"
+#include "utils/class_registrator.hh"

-const sstring auth::password_authenticator::PASSWORD_AUTHENTICATOR_NAME("org.apache.cassandra.auth.PasswordAuthenticator");
+namespace auth {
+
+const sstring& password_authenticator_name() {
+    static const sstring name = meta::AUTH_PACKAGE_NAME + "PasswordAuthenticator";
+    return name;
+}

 // name of the hash column.
 static const sstring SALTED_HASH = "salted_hash";
-static const sstring USER_NAME = "username";
-static const sstring DEFAULT_USER_NAME = auth::auth::DEFAULT_SUPERUSER_NAME;
-static const sstring DEFAULT_USER_PASSWORD = auth::auth::DEFAULT_SUPERUSER_NAME;
-static const sstring CREDENTIALS_CF = "credentials";
+static const sstring DEFAULT_USER_NAME = meta::DEFAULT_SUPERUSER_NAME;
+static const sstring DEFAULT_USER_PASSWORD = meta::DEFAULT_SUPERUSER_NAME;

 static logging::logger plogger("password_authenticator");

-auth::password_authenticator::~password_authenticator()
-{}
+// To ensure correct initialization order, we unfortunately need to use a string literal.
+static const class_registrator<
+        authenticator,
+        password_authenticator,
+        cql3::query_processor&,
+        ::service::migration_manager&> password_auth_reg("org.apache.cassandra.auth.PasswordAuthenticator");

-auth::password_authenticator::password_authenticator()
-{}
+password_authenticator::~password_authenticator() {
+}
+
+password_authenticator::password_authenticator(cql3::query_processor& qp, ::service::migration_manager& mm)
+    : _qp(qp)
+    , _migration_manager(mm)
+    , _stopped(make_ready_future<>()) {
+}

 // TODO: blowfish
 // Origin uses Java bcrypt library, i.e. blowfish salt
@@ -88,12 +110,10 @@ auth::password_authenticator::password_authenticator()
 // and some old-fashioned random salt generation.

 static constexpr size_t rand_bytes = 16;
+static thread_local crypt_data tlcrypt = { 0, };

 static sstring hashpw(const sstring& pass, const sstring& salt) {
-    // crypt_data is huge. should this be a thread_local static?
-    auto tmp = std::make_unique<crypt_data>();
-    tmp->initialized = 0;
-    auto res = crypt_r(pass.c_str(), salt.c_str(), tmp.get());
+    auto res = crypt_r(pass.c_str(), salt.c_str(), &tlcrypt);
    if (res == nullptr) {
        throw std::system_error(errno, std::system_category());
    }
@@ -122,17 +142,16 @@ static sstring gensalt() {
    sstring salt;

    if (!prefix.empty()) {
-        return prefix + salt;
+        return prefix + input;
    }

-    auto tmp = std::make_unique<crypt_data>();
-    tmp->initialized = 0;
-
    // Try in order:
    // blowfish 2011 fix, blowfish, sha512, sha256, md5
    for (sstring pfx : { "$2y$", "$2a$", "$6$", "$5$", "$1$" }) {
        salt = pfx + input;
-        if (crypt_r("fisk", salt.c_str(), tmp.get())) {
+        const char* e = crypt_r("fisk", salt.c_str(), &tlcrypt);
+
+        if (e && (e[0] != '*')) {
            prefix = pfx;
            return salt;
        }
@@ -144,63 +163,125 @@ static sstring hashpw(const sstring& pass) {
    return hashpw(pass, gensalt());
 }

-future<> auth::password_authenticator::init() {
-    gensalt(); // do this once to determine usable hashing
+static bool has_salted_hash(const cql3::untyped_result_set_row& row) {
+    return utf8_type->deserialize(row.get_blob(SALTED_HASH)) != data_value::make_null(utf8_type);
+}

-    sstring create_table = sprint(
-                    "CREATE TABLE %s.%s ("
-                                    "%s text,"
-                                    "%s text," // salt + hash + number of rounds
-                                    "options map<text,text>,"// for future extensions
-                                    "PRIMARY KEY(%s)"
-                                    ") WITH gc_grace_seconds=%d",
-                    auth::auth::AUTH_KS,
-                    CREDENTIALS_CF, USER_NAME, SALTED_HASH, USER_NAME,
-                    90 * 24 * 60 * 60); // 3 months.
+static const sstring update_row_query = sprint(
+        "UPDATE %s SET %s = ? WHERE %s = ?",
+        meta::roles_table::qualified_name(),
+        SALTED_HASH,
+        meta::roles_table::role_col_name);

-    return auth::setup_table(CREDENTIALS_CF, create_table).then([this] {
-        // instead of once-timer, just schedule this later
-        auth::schedule_when_up([] {
-            return auth::has_existing_users(CREDENTIALS_CF, DEFAULT_USER_NAME, USER_NAME).then([](bool exists) {
-                if (!exists) {
-                    cql3::get_local_query_processor().process(sprint("INSERT INTO %s.%s (%s, %s) VALUES (?, ?) USING TIMESTAMP 0",
-                                                    auth::AUTH_KS,
-                                                    CREDENTIALS_CF,
-                                                    USER_NAME, SALTED_HASH
-                                    ),
-                                    db::consistency_level::ONE, {DEFAULT_USER_NAME, hashpw(DEFAULT_USER_PASSWORD)}).then([](auto) {
-                                        plogger.info("Created default user '{}'", DEFAULT_USER_NAME);
-                                    });
-                }
-            });
-        });
+static const sstring legacy_table_name{"credentials"};
+
+bool password_authenticator::legacy_metadata_exists() const {
+    return _qp.db().local().has_schema(meta::AUTH_KS, legacy_table_name);
+}
+
+future<> password_authenticator::migrate_legacy_metadata() const {
+    plogger.info("Starting migration of legacy authentication metadata.");
+    static const sstring query = sprint("SELECT * FROM %s.%s", meta::AUTH_KS, legacy_table_name);
+
+    return _qp.process(
+            query,
+            db::consistency_level::QUORUM).then([this](::shared_ptr<cql3::untyped_result_set> results) {
+        return do_for_each(*results, [this](const cql3::untyped_result_set_row& row) {
+            auto username = row.get_as<sstring>("username");
+            auto salted_hash = row.get_as<sstring>(SALTED_HASH);
+
+            return _qp.process(
+                    update_row_query,
+                    consistency_for_user(username),
+                    {std::move(salted_hash), username}).discard_result();
+        }).finally([results] {});
+    }).then([] {
+       plogger.info("Finished migrating legacy authentication metadata.");
+    }).handle_exception([](std::exception_ptr ep) {
+        plogger.error("Encountered an error during migration!");
+        std::rethrow_exception(ep);
    });
 }

-db::consistency_level auth::password_authenticator::consistency_for_user(const sstring& username) {
-    if (username == DEFAULT_USER_NAME) {
+future<> password_authenticator::create_default_if_missing() const {
+    return default_role_row_satisfies(_qp, &has_salted_hash).then([this](bool exists) {
+        if (!exists) {
+            return _qp.process(
+                    update_row_query,
+                    db::consistency_level::QUORUM,
+                    {hashpw(DEFAULT_USER_PASSWORD), DEFAULT_USER_NAME}).then([](auto&&) {
+                plogger.info("Created default superuser authentication record.");
+            });
+        }
+
+        return make_ready_future<>();
+    });
+}
+
+future<> password_authenticator::start() {
+     return once_among_shards([this] {
+         gensalt(); // do this once to determine usable hashing
+
+         auto f = create_metadata_table_if_missing(
+                 meta::roles_table::name,
+                 _qp,
+                 meta::roles_table::creation_query(),
+                 _migration_manager);
+
+         _stopped = do_after_system_ready(_as, [this] {
+             return async([this] {
+                 wait_for_schema_agreement(_migration_manager, _qp.db().local()).get0();
+
+                 if (any_nondefault_role_row_satisfies(_qp, &has_salted_hash).get0()) {
+                     if (legacy_metadata_exists()) {
+                         plogger.warn("Ignoring legacy authentication metadata since nondefault data already exist.");
+                     }
+
+                     return;
+                 }
+
+                 if (legacy_metadata_exists()) {
+                     migrate_legacy_metadata().get0();
+                     return;
+                 }
+
+                 create_default_if_missing().get0();
+             });
+         });
+
+         return f;
+     });
+ }
+
+future<> password_authenticator::stop() {
+    _as.request_abort();
+    return _stopped.handle_exception_type([] (const sleep_aborted&) { });
+}
+
+db::consistency_level password_authenticator::consistency_for_user(stdx::string_view role_name) {
+    if (role_name == DEFAULT_USER_NAME) {
        return db::consistency_level::QUORUM;
    }
    return db::consistency_level::LOCAL_ONE;
 }

-const sstring& auth::password_authenticator::class_name() const {
-    return PASSWORD_AUTHENTICATOR_NAME;
+const sstring& password_authenticator::qualified_java_name() const {
+    return password_authenticator_name();
 }

-bool auth::password_authenticator::require_authentication() const {
+bool password_authenticator::require_authentication() const {
    return true;
 }

-auth::authenticator::option_set auth::password_authenticator::supported_options() const {
-    return option_set::of<option::PASSWORD>();
+authentication_option_set password_authenticator::supported_options() const {
+    return authentication_option_set{authentication_option::password};
 }

-auth::authenticator::option_set auth::password_authenticator::alterable_options() const {
-    return option_set::of<option::PASSWORD>();
+authentication_option_set password_authenticator::alterable_options() const {
+    return authentication_option_set{authentication_option::password};
 }

-future<::shared_ptr<auth::authenticated_user> > auth::password_authenticator::authenticate(
+future<authenticated_user> password_authenticator::authenticate(
                const credentials_map& credentials) const {
    if (!credentials.count(USERNAME_KEY)) {
        throw exceptions::authentication_exception(sprint("Required key '%s' is missing", USERNAME_KEY));
@@ -218,17 +299,24 @@ future<::shared_ptr<auth::authenticated_user> > auth::password_authenticator::au
    // Rely on query processing caching statements instead, and lets assume
    // that a map lookup string->statement is not gonna kill us much.
    return futurize_apply([this, username, password] {
-        auto& qp = cql3::get_local_query_processor();
-        return qp.process(sprint("SELECT %s FROM %s.%s WHERE %s = ?", SALTED_HASH,
-                                        auth::AUTH_KS, CREDENTIALS_CF, USER_NAME),
-                        consistency_for_user(username), {username}, true);
+        static const sstring query = sprint(
+                "SELECT %s FROM %s WHERE %s = ?",
+                SALTED_HASH,
+                meta::roles_table::qualified_name(),
+                meta::roles_table::role_col_name);
+
+        return _qp.process(
+                query,
+                consistency_for_user(username),
+                {username},
+                true);
    }).then_wrapped([=](future<::shared_ptr<cql3::untyped_result_set>> f) {
        try {
            auto res = f.get0();
            if (res->empty() || !checkpw(password, res->one().get_as<sstring>(SALTED_HASH))) {
                throw exceptions::authentication_exception("Username and/or password are incorrect");
            }
-            return make_ready_future<::shared_ptr<authenticated_user>>(::make_shared<authenticated_user>(username));
+            return make_ready_future<authenticated_user>(username);
        } catch (std::system_error &) {
            std::throw_with_nested(exceptions::authentication_exception("Could not verify password"));
        } catch (exceptions::request_execution_exception& e) {
@@ -239,54 +327,60 @@ future<::shared_ptr<auth::authenticated_user> > auth::password_authenticator::au
    });
 }

-future<> auth::password_authenticator::create(sstring username,
-                const option_map& options) {
-    try {
-        auto password = boost::any_cast<sstring>(options.at(option::PASSWORD));
-        auto query = sprint("INSERT INTO %s.%s (%s, %s) VALUES (?, ?)",
-                        auth::AUTH_KS, CREDENTIALS_CF, USER_NAME, SALTED_HASH);
-        auto& qp = cql3::get_local_query_processor();
-        return qp.process(query, consistency_for_user(username), { username, hashpw(password) }).discard_result();
-    } catch (std::out_of_range&) {
-        throw exceptions::invalid_request_exception("PasswordAuthenticator requires PASSWORD option");
+future<> password_authenticator::create(stdx::string_view role_name, const authentication_options& options) const {
+    if (!options.password) {
+        return make_ready_future<>();
    }
+
+    return _qp.process(
+            update_row_query,
+            consistency_for_user(role_name),
+            {hashpw(*options.password), sstring(role_name)}).discard_result();
 }

-future<> auth::password_authenticator::alter(sstring username,
-                const option_map& options) {
-    try {
-        auto password = boost::any_cast<sstring>(options.at(option::PASSWORD));
-        auto query = sprint("UPDATE %s.%s SET %s = ? WHERE %s = ?",
-                        auth::AUTH_KS, CREDENTIALS_CF, SALTED_HASH, USER_NAME);
-        auto& qp = cql3::get_local_query_processor();
-        return qp.process(query, consistency_for_user(username), { hashpw(password), username }).discard_result();
-    } catch (std::out_of_range&) {
-        throw exceptions::invalid_request_exception("PasswordAuthenticator requires PASSWORD option");
+future<> password_authenticator::alter(stdx::string_view role_name, const authentication_options& options) const {
+    if (!options.password) {
+        return make_ready_future<>();
    }
+
+    static const sstring query = sprint(
+            "UPDATE %s SET %s = ? WHERE %s = ?",
+            meta::roles_table::qualified_name(),
+            SALTED_HASH,
+            meta::roles_table::role_col_name);
+
+    return _qp.process(
+            query,
+            consistency_for_user(role_name),
+            {hashpw(*options.password), sstring(role_name)}).discard_result();
 }

-future<> auth::password_authenticator::drop(sstring username) {
-    try {
-        auto query = sprint("DELETE FROM %s.%s WHERE %s = ?",
-                        auth::AUTH_KS, CREDENTIALS_CF, USER_NAME);
-        auto& qp = cql3::get_local_query_processor();
-        return qp.process(query, consistency_for_user(username), { username }).discard_result();
-    } catch (std::out_of_range&) {
-        throw exceptions::invalid_request_exception("PasswordAuthenticator requires PASSWORD option");
-    }
+future<> password_authenticator::drop(stdx::string_view name) const {
+    static const sstring query = sprint(
+            "DELETE %s FROM %s WHERE %s = ?",
+            SALTED_HASH,
+            meta::roles_table::qualified_name(),
+            meta::roles_table::role_col_name);
+
+    return _qp.process(query, consistency_for_user(name), {sstring(name)}).discard_result();
 }

-const auth::resource_ids& auth::password_authenticator::protected_resources() const {
-    static const resource_ids ids({ data_resource(auth::AUTH_KS, CREDENTIALS_CF) });
-    return ids;
+future<custom_options> password_authenticator::query_custom_options(stdx::string_view role_name) const {
+    return make_ready_future<custom_options>();
 }

-::shared_ptr<auth::authenticator::sasl_challenge> auth::password_authenticator::new_sasl_challenge() const {
-    class plain_text_password_challenge: public sasl_challenge {
+const resource_set& password_authenticator::protected_resources() const {
+    static const resource_set resources({make_data_resource(meta::AUTH_KS, meta::roles_table::name)});
+    return resources;
+}
+
+::shared_ptr<authenticator::sasl_challenge> password_authenticator::new_sasl_challenge() const {
+    class plain_text_password_challenge : public sasl_challenge {
+        const password_authenticator& _self;
+
    public:
-        plain_text_password_challenge(const password_authenticator& a)
-                        : _authenticator(a)
-        {}
+        plain_text_password_challenge(const password_authenticator& self) : _self(self) {
+        }

        /**
         * SASL PLAIN mechanism specifies that credentials are encoded in a
@@ -336,16 +430,19 @@ const auth::resource_ids& auth::password_authenticator::protected_resources() co
            _complete = true;
            return {};
        }
+
        bool is_complete() const override {
            return _complete;
        }
-        future<::shared_ptr<authenticated_user>> get_authenticated_user() const override {
-            return _authenticator.authenticate(_credentials);
+
+        future<authenticated_user> get_authenticated_user() const override {
+            return _self.authenticate(_credentials);
        }
    private:
-        const password_authenticator& _authenticator;
        credentials_map _credentials;
        bool _complete = false;
    };
    return ::make_shared<plain_text_password_challenge>(*this);
 }
+
+}
--- a/auth/password_authenticator.hh
+++ b/auth/password_authenticator.hh
@@ -41,32 +41,64 @@

 #pragma once

-#include "authenticator.hh"
+#include <seastar/core/abort_source.hh>
+
+#include "auth/authenticator.hh"
+#include "cql3/query_processor.hh"
+
+namespace service {
+class migration_manager;
+}

 namespace auth {

-class password_authenticator : public authenticator {
-public:
-    static const sstring PASSWORD_AUTHENTICATOR_NAME;
+const sstring& password_authenticator_name();
+
+class password_authenticator : public authenticator {
+    cql3::query_processor& _qp;
+    ::service::migration_manager& _migration_manager;
+    future<> _stopped;
+    seastar::abort_source _as;
+
+public:
+    static db::consistency_level consistency_for_user(stdx::string_view role_name);
+
+    password_authenticator(cql3::query_processor&, ::service::migration_manager&);

-    password_authenticator();
    ~password_authenticator();

-    future<> init();
+    virtual future<> start() override;

-    const sstring& class_name() const override;
-    bool require_authentication() const override;
-    option_set supported_options() const override;
-    option_set alterable_options() const override;
-    future<::shared_ptr<authenticated_user>> authenticate(const credentials_map& credentials) const override;
-    future<> create(sstring username, const option_map& options) override;
-    future<> alter(sstring username, const option_map& options) override;
-    future<> drop(sstring username) override;
-    const resource_ids& protected_resources() const override;
-    ::shared_ptr<sasl_challenge> new_sasl_challenge() const override;
+    virtual future<> stop() override;

+    virtual const sstring& qualified_java_name() const override;

-    static db::consistency_level consistency_for_user(const sstring& username);
+    virtual bool require_authentication() const override;
+
+    virtual authentication_option_set supported_options() const override;
+
+    virtual authentication_option_set alterable_options() const override;
+
+    virtual future<authenticated_user> authenticate(const credentials_map& credentials) const override;
+
+    virtual future<> create(stdx::string_view role_name, const authentication_options& options) const override;
+
+    virtual future<> alter(stdx::string_view role_name, const authentication_options& options) const override;
+
+    virtual future<> drop(stdx::string_view role_name) const override;
+
+    virtual future<custom_options> query_custom_options(stdx::string_view role_name) const override;
+
+    virtual const resource_set& protected_resources() const override;
+
+    virtual ::shared_ptr<sasl_challenge> new_sasl_challenge() const override;
+
+private:
+    bool legacy_metadata_exists() const;
+
+    future<> migrate_legacy_metadata() const;
+
+    future<> create_default_if_missing() const;
 };

 }
--- a/auth/permission.cc
+++ b/auth/permission.cc
@@ -39,32 +39,33 @@
 * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
 */

-#include <unordered_map>
-#include <boost/algorithm/string.hpp>
-#include "permission.hh"
+#include "auth/permission.hh"
+
+#include <boost/algorithm/string.hpp>
+
+#include <unordered_map>
+
+const auth::permission_set auth::permissions::ALL = auth::permission_set::of<
+        auth::permission::CREATE,
+        auth::permission::ALTER,
+        auth::permission::DROP,
+        auth::permission::SELECT,
+        auth::permission::MODIFY,
+        auth::permission::AUTHORIZE,
+        auth::permission::DESCRIBE>();

-const auth::permission_set auth::permissions::ALL_DATA =
-                auth::permission_set::of<auth::permission::CREATE,
-                                auth::permission::ALTER, auth::permission::DROP,
-                                auth::permission::SELECT,
-                                auth::permission::MODIFY,
-                                auth::permission::AUTHORIZE>();
-const auth::permission_set auth::permissions::ALL = auth::permissions::ALL_DATA;
 const auth::permission_set auth::permissions::NONE;
-const auth::permission_set auth::permissions::ALTERATIONS =
-                auth::permission_set::of<auth::permission::CREATE,
-                                auth::permission::ALTER, auth::permission::DROP>();

 static const std::unordered_map<sstring, auth::permission> permission_names({
-    { "READ", auth::permission::READ },
-    { "WRITE", auth::permission::WRITE  },
-    { "CREATE", auth::permission::CREATE },
-    { "ALTER", auth::permission::ALTER },
-    { "DROP", auth::permission::DROP },
-    { "SELECT", auth::permission::SELECT  },
-    { "MODIFY", auth::permission::MODIFY   },
-    { "AUTHORIZE", auth::permission::AUTHORIZE },
-});
+        {"READ", auth::permission::READ},
+        {"WRITE", auth::permission::WRITE},
+        {"CREATE", auth::permission::CREATE},
+        {"ALTER", auth::permission::ALTER},
+        {"DROP", auth::permission::DROP},
+        {"SELECT", auth::permission::SELECT},
+        {"MODIFY", auth::permission::MODIFY},
+        {"AUTHORIZE", auth::permission::AUTHORIZE},
+        {"DESCRIBE", auth::permission::DESCRIBE}});

 const sstring& auth::permissions::to_string(permission p) {
    for (auto& v : permission_names) {
--- a/auth/permission.hh
+++ b/auth/permission.hh
@@ -42,10 +42,11 @@
 #pragma once

 #include <unordered_set>
+
 #include <seastar/core/sstring.hh>

-#include "seastarx.hh"
 #include "enum_set.hh"
+#include "seastarx.hh"

 namespace auth {

@@ -66,9 +67,13 @@ enum class permission {

    // permission management
    AUTHORIZE, // required for GRANT and REVOKE.
+    DESCRIBE, // required on the root-level role resource to list all roles.
+
 };

-typedef enum_set<super_enum<permission,
+typedef enum_set<
+        super_enum<
+                permission,
                permission::READ,
                permission::WRITE,
                permission::CREATE,
@@ -76,16 +81,15 @@ typedef enum_set<super_enum<permission,
                permission::DROP,
                permission::SELECT,
                permission::MODIFY,
-                permission::AUTHORIZE>> permission_set;
+                permission::AUTHORIZE,
+                permission::DESCRIBE>> permission_set;

 bool operator<(const permission_set&, const permission_set&);

 namespace permissions {

-extern const permission_set ALL_DATA;
 extern const permission_set ALL;
 extern const permission_set NONE;
-extern const permission_set ALTERATIONS;

 const sstring& to_string(permission);
 permission from_string(const sstring&);
@@ -93,7 +97,6 @@ permission from_string(const sstring&);
 std::unordered_set<sstring> to_strings(const permission_set&);
 permission_set from_strings(const std::unordered_set<sstring>&);

-
 }

 }
--- a/auth/permissions_cache.cc
+++ b/auth/permissions_cache.cc
@@ -0,0 +1,53 @@
+/*
+ * Copyright (C) 2017 ScyllaDB
+ */
+
+/*
+ * This file is part of Scylla.
+ *
+ * Scylla is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Scylla is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "auth/permissions_cache.hh"
+
+#include "auth/authorizer.hh"
+#include "auth/common.hh"
+#include "auth/service.hh"
+#include "db/config.hh"
+
+namespace auth {
+
+permissions_cache_config permissions_cache_config::from_db_config(const db::config& dc) {
+    permissions_cache_config c;
+    c.max_entries = dc.permissions_cache_max_entries();
+    c.validity_period = std::chrono::milliseconds(dc.permissions_validity_in_ms());
+    c.update_period = std::chrono::milliseconds(dc.permissions_update_interval_in_ms());
+
+    return c;
+}
+
+permissions_cache::permissions_cache(const permissions_cache_config& c, service& ser, logging::logger& log)
+        : _cache(c.max_entries, c.validity_period, c.update_period, log, [&ser, &log](const key_type& k) {
+              log.debug("Refreshing permissions for {}", k.first);
+              return ser.get_uncached_permissions(k.first, k.second);
+          }) {
+}
+
+future<permission_set> permissions_cache::get(const role_or_anonymous& maybe_role, const resource& r) {
+    return do_with(key_type(maybe_role, r), [this](const auto& k) {
+        return _cache.get(k);
+    });
+}
+
+}
--- a/auth/permissions_cache.hh
+++ b/auth/permissions_cache.hh
@@ -0,0 +1,91 @@
+/*
+ * Copyright (C) 2017 ScyllaDB
+ */
+
+/*
+ * This file is part of Scylla.
+ *
+ * Scylla is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Scylla is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <chrono>
+#include <experimental/string_view>
+#include <functional>
+#include <iostream>
+#include <optional>
+#include <utility>
+
+#include <seastar/core/future.hh>
+#include <seastar/core/shared_ptr.hh>
+#include <seastar/core/sstring.hh>
+
+#include "auth/authenticated_user.hh"
+#include "auth/permission.hh"
+#include "auth/resource.hh"
+#include "auth/role_or_anonymous.hh"
+#include "log.hh"
+#include "stdx.hh"
+#include "utils/hash.hh"
+#include "utils/loading_cache.hh"
+
+namespace std {
+
+inline std::ostream& operator<<(std::ostream& os, const pair<auth::role_or_anonymous, auth::resource>& p) {
+    os << "{role: " << p.first << ", resource: " << p.second << "}";
+    return os;
+}
+
+}
+
+namespace db {
+class config;
+}
+
+namespace auth {
+
+class service;
+
+struct permissions_cache_config final {
+    static permissions_cache_config from_db_config(const db::config&);
+
+    std::size_t max_entries;
+    std::chrono::milliseconds validity_period;
+    std::chrono::milliseconds update_period;
+};
+
+class permissions_cache final {
+    using cache_type = utils::loading_cache<
+            std::pair<role_or_anonymous, resource>,
+            permission_set,
+            utils::loading_cache_reload_enabled::yes,
+            utils::simple_entry_size<permission_set>,
+            utils::tuple_hash>;
+
+    using key_type = typename cache_type::key_type;
+
+    cache_type _cache;
+
+public:
+    explicit permissions_cache(const permissions_cache_config&, service&, logging::logger&);
+
+    future <> stop() {
+        return _cache.stop();
+    }
+
+    future<permission_set> get(const role_or_anonymous&, const resource&);
+};
+
+}
--- a/auth/resource.cc
+++ b/auth/resource.cc
@@ -0,0 +1,296 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (C) 2016 ScyllaDB
+ *
+ * Modified by ScyllaDB
+ */
+
+/*
+ * This file is part of Scylla.
+ *
+ * Scylla is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Scylla is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "auth/resource.hh"
+
+#include <algorithm>
+#include <iterator>
+#include <unordered_map>
+
+#include <boost/algorithm/string/join.hpp>
+#include <boost/algorithm/string/split.hpp>
+
+#include "service/storage_proxy.hh"
+
+namespace auth {
+
+std::ostream& operator<<(std::ostream& os, resource_kind kind) {
+    switch (kind) {
+        case resource_kind::data: os << "data"; break;
+        case resource_kind::role: os << "role"; break;
+    }
+
+    return os;
+}
+
+static const std::unordered_map<resource_kind, stdx::string_view> roots{
+        {resource_kind::data, "data"},
+        {resource_kind::role, "roles"}};
+
+static const std::unordered_map<resource_kind, std::size_t> max_parts{
+        {resource_kind::data, 2},
+        {resource_kind::role, 1}};
+
+static permission_set applicable_permissions(const data_resource_view& dv) {
+    if (dv.table()) {
+        return permission_set::of<
+                permission::ALTER,
+                permission::DROP,
+                permission::SELECT,
+                permission::MODIFY,
+                permission::AUTHORIZE>();
+    }
+
+    return permission_set::of<
+            permission::CREATE,
+            permission::ALTER,
+            permission::DROP,
+            permission::SELECT,
+            permission::MODIFY,
+            permission::AUTHORIZE>();
+}
+
+static permission_set applicable_permissions(const role_resource_view& rv) {
+    if (rv.role()) {
+        return permission_set::of<permission::ALTER, permission::DROP, permission::AUTHORIZE>();
+    }
+
+    return permission_set::of<
+            permission::CREATE,
+            permission::ALTER,
+            permission::DROP,
+            permission::AUTHORIZE,
+            permission::DESCRIBE>();
+}
+
+resource::resource(resource_kind kind) : _kind(kind), _parts{sstring(roots.at(kind))}  {
+}
+
+resource::resource(resource_kind kind, std::vector<sstring> parts) : resource(kind) {
+    _parts.reserve(parts.size() + 1);
+    _parts.insert(_parts.end(), std::make_move_iterator(parts.begin()), std::make_move_iterator(parts.end()));
+}
+
+resource::resource(data_resource_t, stdx::string_view keyspace)
+        : resource(resource_kind::data, std::vector<sstring>{sstring(keyspace)}) {
+}
+
+resource::resource(data_resource_t, stdx::string_view keyspace, stdx::string_view table)
+        : resource(resource_kind::data, std::vector<sstring>{sstring(keyspace), sstring(table)}) {
+}
+
+resource::resource(role_resource_t, stdx::string_view role)
+        : resource(resource_kind::role, std::vector<sstring>{sstring(role)}) {
+}
+
+sstring resource::name() const {
+    return boost::algorithm::join(_parts, "/");
+}
+
+std::optional<resource> resource::parent() const {
+    if (_parts.size() == 1) {
+        return {};
+    }
+
+    resource copy = *this;
+    copy._parts.pop_back();
+    return copy;
+}
+
+permission_set resource::applicable_permissions() const {
+    permission_set ps;
+
+    switch (_kind) {
+        case resource_kind::data: ps = ::auth::applicable_permissions(data_resource_view(*this)); break;
+        case resource_kind::role: ps = ::auth::applicable_permissions(role_resource_view(*this)); break;
+    }
+
+    return ps;
+}
+
+bool operator<(const resource& r1, const resource& r2) {
+    if (r1._kind != r2._kind) {
+        return r1._kind < r2._kind;
+    }
+
+    return std::lexicographical_compare(
+            r1._parts.cbegin() + 1,
+            r1._parts.cend(),
+            r2._parts.cbegin() + 1,
+            r2._parts.cend());
+}
+
+std::ostream& operator<<(std::ostream& os, const resource& r) {
+    switch (r.kind()) {
+        case resource_kind::data: return os << data_resource_view(r);
+        case resource_kind::role: return os << role_resource_view(r);
+    }
+
+    return os;
+}
+
+data_resource_view::data_resource_view(const resource& r) : _resource(r) {
+    if (r._kind != resource_kind::data) {
+        throw resource_kind_mismatch(resource_kind::data, r._kind);
+    }
+}
+
+std::optional<stdx::string_view> data_resource_view::keyspace() const {
+    if (_resource._parts.size() == 1) {
+        return {};
+    }
+
+    return _resource._parts[1];
+}
+
+std::optional<stdx::string_view> data_resource_view::table() const {
+    if (_resource._parts.size() <= 2) {
+        return {};
+    }
+
+    return _resource._parts[2];
+}
+
+std::ostream& operator<<(std::ostream& os, const data_resource_view& v) {
+    const auto keyspace = v.keyspace();
+    const auto table = v.table();
+
+    if (!keyspace) {
+        os << "<all keyspaces>";
+    } else if (!table) {
+        os << "<keyspace " << *keyspace << '>';
+    } else {
+        os << "<table " << *keyspace << '.' << *table << '>';
+    }
+
+    return os;
+}
+
+role_resource_view::role_resource_view(const resource& r) : _resource(r) {
+    if (r._kind != resource_kind::role) {
+        throw resource_kind_mismatch(resource_kind::role, r._kind);
+    }
+}
+
+std::optional<stdx::string_view> role_resource_view::role() const {
+    if (_resource._parts.size() == 1) {
+        return {};
+    }
+
+    return _resource._parts[1];
+}
+
+std::ostream& operator<<(std::ostream& os, const role_resource_view& v) {
+    const auto role = v.role();
+
+    if (!role) {
+        os << "<all roles>";
+    } else {
+        os << "<role " << *role << '>';
+    }
+
+    return os;
+}
+
+resource parse_resource(stdx::string_view name) {
+    static const std::unordered_map<stdx::string_view, resource_kind> reverse_roots = [] {
+        std::unordered_map<stdx::string_view, resource_kind> result;
+
+        for (const auto& pair : roots) {
+            result.emplace(pair.second, pair.first);
+        }
+
+        return result;
+    }();
+
+    std::vector<sstring> parts;
+    boost::split(parts, name, [](char ch) { return ch == '/'; });
+
+    if (parts.empty()) {
+        throw invalid_resource_name(name);
+    }
+
+    const auto iter = reverse_roots.find(parts[0]);
+    if (iter == reverse_roots.end()) {
+        throw invalid_resource_name(name);
+    }
+
+    const auto kind = iter->second;
+    parts.erase(parts.begin());
+
+    if (parts.size() > max_parts.at(kind)) {
+        throw invalid_resource_name(name);
+    }
+
+    return resource(kind, std::move(parts));
+}
+
+static const resource the_root_data_resource{resource_kind::data};
+
+const resource& root_data_resource() {
+    return the_root_data_resource;
+}
+
+static const resource the_root_role_resource{resource_kind::role};
+
+const resource& root_role_resource() {
+    return the_root_role_resource;
+}
+
+resource_set expand_resource_family(const resource& rr) {
+    resource r = rr;
+    resource_set rs;
+
+    while (true) {
+        const auto pr = r.parent();
+        rs.insert(std::move(r));
+
+        if (!pr) {
+            break;
+        }
+
+        r = std::move(*pr);
+    }
+
+    return rs;
+}
+
+}
--- a/auth/resource.hh
+++ b/auth/resource.hh
@@ -0,0 +1,254 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (C) 2016 ScyllaDB
+ *
+ * Modified by ScyllaDB
+ */
+
+/*
+ * This file is part of Scylla.
+ *
+ * Scylla is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Scylla is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <experimental/string_view>
+#include <iostream>
+#include <optional>
+#include <stdexcept>
+#include <tuple>
+#include <vector>
+#include <unordered_set>
+
+#include <seastar/core/print.hh>
+#include <seastar/core/sstring.hh>
+
+#include "auth/permission.hh"
+#include "seastarx.hh"
+#include "stdx.hh"
+#include "utils/hash.hh"
+
+namespace auth {
+
+class invalid_resource_name : public std::invalid_argument {
+public:
+    explicit invalid_resource_name(stdx::string_view name)
+            : std::invalid_argument(sprint("The resource name '%s' is invalid.", name)) {
+    }
+};
+
+enum class resource_kind {
+    data, role
+};
+
+std::ostream& operator<<(std::ostream&, resource_kind);
+
+///
+/// Type tag for constructing data resources.
+///
+struct data_resource_t final {};
+
+///
+/// Type tag for constructing role resources.
+///
+struct role_resource_t final {};
+
+///
+/// Resources are entities that users can be granted permissions on.
+///
+/// There are data (keyspaces and tables) and role resources. There may be other kinds of resources in the future.
+///
+/// When they are stored as system metadata, resources have the form `root/part_0/part_1/.../part_n`. Each kind of
+/// resource has a specific root prefix, followed by a maximum of `n` parts (where `n` is distinct for each kind of
+/// resource as well). In this code, this form is called the "name".
+///
+/// Since all resources have this same structure, all the different kinds are stored in instances of the same class:
+/// \ref resource. When we wish to query a resource for kind-specific data (like the table of a "data" resource), we
+/// create a kind-specific "view" of the resource.
+///
+class resource final {
+    resource_kind _kind;
+
+    std::vector<sstring> _parts;
+
+public:
+    ///
+    /// A root resource of a particular kind.
+    ///
+    explicit resource(resource_kind);
+    resource(data_resource_t, stdx::string_view keyspace);
+    resource(data_resource_t, stdx::string_view keyspace, stdx::string_view table);
+    resource(role_resource_t, stdx::string_view role);
+
+    resource_kind kind() const noexcept {
+        return _kind;
+    }
+
+    ///
+    /// A machine-friendly identifier unique to each resource.
+    ///
+    sstring name() const;
+
+    std::optional<resource> parent() const;
+
+    permission_set applicable_permissions() const;
+
+private:
+    resource(resource_kind, std::vector<sstring> parts);
+
+    friend class std::hash<resource>;
+    friend class data_resource_view;
+    friend class role_resource_view;
+
+    friend bool operator<(const resource&, const resource&);
+    friend bool operator==(const resource&, const resource&);
+    friend resource parse_resource(stdx::string_view);
+};
+
+bool operator<(const resource&, const resource&);
+
+inline bool operator==(const resource& r1, const resource& r2) {
+    return (r1._kind == r2._kind) && (r1._parts == r2._parts);
+}
+
+inline bool operator!=(const resource& r1, const resource& r2) {
+    return !(r1 == r2);
+}
+
+std::ostream& operator<<(std::ostream&, const resource&);
+
+class resource_kind_mismatch : public std::invalid_argument {
+public:
+    explicit resource_kind_mismatch(resource_kind expected, resource_kind actual)
+        : std::invalid_argument(
+            sprint("This resource has kind '%s', but was expected to have kind '%s'.", actual, expected)) {
+    }
+};
+
+/// A "data" view of \ref resource.
+///
+/// If neither `keyspace` nor `table` is present, this is the root resource.
+class data_resource_view final {
+    const resource& _resource;
+
+public:
+    ///
+    /// \throws `resource_kind_mismatch` if the argument is not a `data` resource.
+    ///
+    explicit data_resource_view(const resource& r);
+
+    std::optional<stdx::string_view> keyspace() const;
+
+    std::optional<stdx::string_view> table() const;
+};
+
+std::ostream& operator<<(std::ostream&, const data_resource_view&);
+
+///
+/// A "role" view of \ref resource.
+///
+/// If `role` is not present, this is the root resource.
+///
+class role_resource_view final {
+    const resource& _resource;
+
+public:
+    ///
+    /// \throws \ref resource_kind_mismatch if the argument is not a "role" resource.
+    ///
+    explicit role_resource_view(const resource&);
+
+    std::optional<stdx::string_view> role() const;
+};
+
+std::ostream& operator<<(std::ostream&, const role_resource_view&);
+
+///
+/// Parse a resource from its name.
+///
+/// \throws \ref invalid_resource_name when the name is malformed.
+///
+resource parse_resource(stdx::string_view name);
+
+const resource& root_data_resource();
+
+inline resource make_data_resource(stdx::string_view keyspace) {
+    return resource(data_resource_t{}, keyspace);
+}
+inline resource make_data_resource(stdx::string_view keyspace, stdx::string_view table) {
+    return resource(data_resource_t{}, keyspace, table);
+}
+
+const resource& root_role_resource();
+
+inline resource make_role_resource(stdx::string_view role) {
+    return resource(role_resource_t{}, role);
+}
+
+}
+
+namespace std {
+
+template <>
+struct hash<auth::resource> {
+    static size_t hash_data(const auth::data_resource_view& dv) {
+        return utils::tuple_hash()(std::make_tuple(auth::resource_kind::data, dv.keyspace(), dv.table()));
+    }
+
+    static size_t hash_role(const auth::role_resource_view& rv) {
+        return utils::tuple_hash()(std::make_tuple(auth::resource_kind::role, rv.role()));
+    }
+
+    size_t operator()(const auth::resource& r) const {
+        std::size_t value;
+
+        switch (r._kind) {
+        case auth::resource_kind::data: value = hash_data(auth::data_resource_view(r)); break;
+        case auth::resource_kind::role: value = hash_role(auth::role_resource_view(r)); break;
+        }
+
+        return value;
+    }
+};
+
+}
+
+namespace auth {
+
+using resource_set = std::unordered_set<resource>;
+
+//
+// A resource and all of its parents.
+//
+resource_set expand_resource_family(const resource&);
+
+}
--- a/auth/role_manager.hh
+++ b/auth/role_manager.hh
@@ -0,0 +1,169 @@
+/*
+ * Copyright (C) 2017 ScyllaDB
+ */
+
+/*
+ * This file is part of Scylla.
+ *
+ * Scylla is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Scylla is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <experimental/string_view>
+#include <memory>
+#include <optional>
+#include <stdexcept>
+#include <unordered_set>
+
+#include <seastar/core/future.hh>
+#include <seastar/core/print.hh>
+#include <seastar/core/sstring.hh>
+
+#include "auth/resource.hh"
+#include "seastarx.hh"
+#include "stdx.hh"
+
+namespace auth {
+
+struct role_config final {
+    bool is_superuser{false};
+    bool can_login{false};
+};
+
+///
+/// Differential update for altering existing roles.
+///
+struct role_config_update final {
+    std::optional<bool> is_superuser{};
+    std::optional<bool> can_login{};
+};
+
+///
+/// A logical argument error for a role-management operation.
+///
+class roles_argument_exception : public std::invalid_argument {
+public:
+    using std::invalid_argument::invalid_argument;
+};
+
+class role_already_exists : public roles_argument_exception {
+public:
+    explicit role_already_exists(stdx::string_view role_name)
+            : roles_argument_exception(sprint("Role %s already exists.", role_name)) {
+    }
+};
+
+class nonexistant_role : public roles_argument_exception {
+public:
+    explicit nonexistant_role(stdx::string_view role_name)
+            : roles_argument_exception(sprint("Role %s doesn't exist.", role_name)) {
+    }
+};
+
+class role_already_included : public roles_argument_exception {
+public:
+    role_already_included(stdx::string_view grantee_name, stdx::string_view role_name)
+            : roles_argument_exception(
+                      sprint("%s already includes role %s.", grantee_name, role_name)) {
+    }
+};
+
+class revoke_ungranted_role : public roles_argument_exception {
+public:
+    revoke_ungranted_role(stdx::string_view revokee_name, stdx::string_view role_name)
+            : roles_argument_exception(
+                      sprint("%s was not granted role %s, so it cannot be revoked.", revokee_name, role_name)) {
+    }
+};
+
+using role_set = std::unordered_set<sstring>;
+
+enum class recursive_role_query { yes, no };
+
+///
+/// Abstract client for managing roles.
+///
+/// All state necessary for managing roles is stored externally to the client instance.
+///
+/// All implementations should throw role-related exceptions as documented. Authorization is not addressed here, and
+/// access-control should never be enforced in implementations.
+///
+class role_manager {
+public:
+    virtual ~role_manager() = default;
+
+    virtual stdx::string_view qualified_java_name() const noexcept = 0;
+
+    virtual const resource_set& protected_resources() const = 0;
+
+    virtual future<> start() = 0;
+
+    virtual future<> stop() = 0;
+
+    ///
+    /// \returns an exceptional future with \ref role_already_exists for a role that has previously been created.
+    ///
+    virtual future<> create(stdx::string_view role_name, const role_config&) const = 0;
+
+    ///
+    /// \returns an exceptional future with \ref nonexistant_role if the role does not exist.
+    ///
+    virtual future<> drop(stdx::string_view role_name) const = 0;
+
+    ///
+    /// \returns an exceptional future with \ref nonexistant_role if the role does not exist.
+    ///
+    virtual future<> alter(stdx::string_view role_name, const role_config_update&) const = 0;
+
+    ///
+    /// Grant `role_name` to `grantee_name`.
+    ///
+    /// \returns an exceptional future with \ref nonexistant_role if either the role or the grantee do not exist.
+    ///
+    /// \returns an exceptional future with \ref role_already_included if granting the role would be redundant, or
+    /// create a cycle.
+    ///
+    virtual future<> grant(stdx::string_view grantee_name, stdx::string_view role_name) const = 0;
+
+    ///
+    /// Revoke `role_name` from `revokee_name`.
+    ///
+    /// \returns an exceptional future with \ref nonexistant_role if either the role or the revokee do not exist.
+    ///
+    /// \returns an exceptional future with \ref revoke_ungranted_role if the role was not granted.
+    ///
+    virtual future<> revoke(stdx::string_view revokee_name, stdx::string_view role_name) const = 0;
+
+    ///
+    /// \returns an exceptional future with \ref nonexistant_role if the role does not exist.
+    ///
+    virtual future<role_set> query_granted(stdx::string_view grantee, recursive_role_query) const = 0;
+
+    virtual future<role_set> query_all() const = 0;
+
+    virtual future<bool> exists(stdx::string_view role_name) const = 0;
+
+    ///
+    /// \returns an exceptional future with \ref nonexistant_role if the role does not exist.
+    ///
+    virtual future<bool> is_superuser(stdx::string_view role_name) const = 0;
+
+    ///
+    /// \returns an exceptional future with \ref nonexistant_role if the role does not exist.
+    ///
+    virtual future<bool> can_login(stdx::string_view role_name) const = 0;
+};
+
+}
--- a/auth/role_or_anonymous.cc
+++ b/auth/role_or_anonymous.cc
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2018 ScyllaDB
+ */
+
+/*
+ * This file is part of Scylla.
+ *
+ * Scylla is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Scylla is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "auth/role_or_anonymous.hh"
+
+#include <iostream>
+
+namespace auth {
+
+std::ostream& operator<<(std::ostream& os, const role_or_anonymous& mr) {
+    os << mr.name.value_or("<anonymous>");
+    return os;
+}
+
+bool operator==(const role_or_anonymous& mr1, const role_or_anonymous& mr2) noexcept {
+    return mr1.name == mr2.name;
+}
+
+bool is_anonymous(const role_or_anonymous& mr) noexcept {
+    return !mr.name.has_value();
+}
+
+}
--- a/auth/role_or_anonymous.hh
+++ b/auth/role_or_anonymous.hh
@@ -0,0 +1,66 @@
+/*
+ * Copyright (C) 2018 ScyllaDB
+ */
+
+/*
+ * This file is part of Scylla.
+ *
+ * Scylla is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Scylla is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <experimental/string_view>
+#include <functional>
+#include <iosfwd>
+#include <optional>
+
+#include <seastar/core/sstring.hh>
+
+#include "seastarx.hh"
+#include "stdx.hh"
+
+namespace auth {
+
+class role_or_anonymous final {
+public:
+    std::optional<sstring> name{};
+
+    role_or_anonymous() = default;
+    role_or_anonymous(stdx::string_view name) : name(name) {
+    }
+};
+
+std::ostream& operator<<(std::ostream&, const role_or_anonymous&);
+
+bool operator==(const role_or_anonymous&, const role_or_anonymous&) noexcept;
+
+inline bool operator!=(const role_or_anonymous& mr1, const role_or_anonymous& mr2) noexcept {
+    return !(mr1 == mr2);
+}
+
+bool is_anonymous(const role_or_anonymous&) noexcept;
+
+}
+
+namespace std {
+
+template <>
+struct hash<auth::role_or_anonymous> {
+    size_t operator()(const auth::role_or_anonymous& mr) const {
+        return hash<std::optional<sstring>>()(mr.name);
+    }
+};
+
+}
--- a/auth/roles-metadata.cc
+++ b/auth/roles-metadata.cc
@@ -0,0 +1,119 @@
+/*
+ * Copyright (C) 2018 ScyllaDB
+ */
+
+/*
+ * This file is part of Scylla.
+ *
+ * Scylla is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Scylla is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "auth/roles-metadata.hh"
+
+#include <boost/algorithm/cxx11/any_of.hpp>
+#include <seastar/core/print.hh>
+#include <seastar/core/shared_ptr.hh>
+#include <seastar/core/sstring.hh>
+
+#include "auth/common.hh"
+#include "cql3/query_processor.hh"
+#include "cql3/untyped_result_set.hh"
+
+namespace auth {
+
+namespace meta {
+
+namespace roles_table {
+
+stdx::string_view creation_query() {
+    static const sstring instance = sprint(
+            "CREATE TABLE %s ("
+            "  %s text PRIMARY KEY,"
+            "  can_login boolean,"
+            "  is_superuser boolean,"
+            "  member_of set<text>,"
+            "  salted_hash text"
+            ")",
+            qualified_name(),
+            role_col_name);
+
+    return instance;
+}
+
+stdx::string_view qualified_name() noexcept {
+    static const sstring instance = AUTH_KS + "." + sstring(name);
+    return instance;
+}
+
+}
+
+}
+
+future<bool> default_role_row_satisfies(
+        cql3::query_processor& qp,
+        std::function<bool(const cql3::untyped_result_set_row&)> p) {
+    static const sstring query = sprint(
+            "SELECT * FROM %s WHERE %s = ?",
+            meta::roles_table::qualified_name(),
+            meta::roles_table::role_col_name);
+
+    return do_with(std::move(p), [&qp](const auto& p) {
+        return qp.process(
+                query,
+                db::consistency_level::ONE,
+                {meta::DEFAULT_SUPERUSER_NAME},
+                true).then([&qp, &p](::shared_ptr<cql3::untyped_result_set> results) {
+            if (results->empty()) {
+                return qp.process(
+                        query,
+                        db::consistency_level::QUORUM,
+                        {meta::DEFAULT_SUPERUSER_NAME},
+                        true).then([&p](::shared_ptr<cql3::untyped_result_set> results) {
+                    if (results->empty()) {
+                        return make_ready_future<bool>(false);
+                    }
+
+                    return make_ready_future<bool>(p(results->one()));
+                });
+            }
+
+            return make_ready_future<bool>(p(results->one()));
+        });
+    });
+}
+
+future<bool> any_nondefault_role_row_satisfies(
+        cql3::query_processor& qp,
+        std::function<bool(const cql3::untyped_result_set_row&)> p) {
+    static const sstring query = sprint("SELECT * FROM %s", meta::roles_table::qualified_name());
+
+    return do_with(std::move(p), [&qp](const auto& p) {
+        return qp.process(
+                query,
+                db::consistency_level::QUORUM).then([&p](::shared_ptr<cql3::untyped_result_set> results) {
+            if (results->empty()) {
+                return false;
+            }
+
+            static const sstring col_name = sstring(meta::roles_table::role_col_name);
+
+            return boost::algorithm::any_of(*results, [&p](const cql3::untyped_result_set_row& row) {
+                const bool is_nondefault = row.get_as<sstring>(col_name) != meta::DEFAULT_SUPERUSER_NAME;
+                return is_nondefault && p(row);
+            });
+        });
+    });
+}
+
+}
--- a/auth/roles-metadata.hh
+++ b/auth/roles-metadata.hh
@@ -0,0 +1,69 @@
+/*
+ * Copyright (C) 2017 ScyllaDB
+ */
+
+/*
+ * This file is part of Scylla.
+ *
+ * Scylla is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Scylla is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <experimental/string_view>
+#include <functional>
+
+#include <seastar/core/future.hh>
+
+#include "seastarx.hh"
+#include "stdx.hh"
+
+namespace cql3 {
+class query_processor;
+class untyped_result_set_row;
+}
+
+namespace auth {
+
+namespace meta {
+
+namespace roles_table {
+
+stdx::string_view creation_query();
+
+constexpr stdx::string_view name{"roles", 5};
+
+stdx::string_view qualified_name() noexcept;
+
+constexpr stdx::string_view role_col_name{"role", 4};
+
+}
+
+}
+
+///
+/// Check that the default role satisfies a predicate, or `false` if the default role does not exist.
+///
+future<bool> default_role_row_satisfies(
+        cql3::query_processor&,
+        std::function<bool(const cql3::untyped_result_set_row&)>);
+
+///
+/// Check that any nondefault role satisfies a predicate. `false` if no nondefault roles exist.
+///
+future<bool> any_nondefault_role_row_satisfies(
+        cql3::query_processor&,
+        std::function<bool(const cql3::untyped_result_set_row&)>);
+
+}
--- a/auth/service.cc
+++ b/auth/service.cc
@@ -0,0 +1,580 @@
+/*
+ * Copyright (C) 2017 ScyllaDB
+ */
+
+/*
+ * This file is part of Scylla.
+ *
+ * Scylla is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Scylla is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "auth/service.hh"
+
+#include <algorithm>
+#include <map>
+
+#include <seastar/core/future-util.hh>
+#include <seastar/core/sharded.hh>
+#include <seastar/core/shared_ptr.hh>
+
+#include "auth/allow_all_authenticator.hh"
+#include "auth/allow_all_authorizer.hh"
+#include "auth/common.hh"
+#include "auth/password_authenticator.hh"
+#include "auth/role_or_anonymous.hh"
+#include "auth/standard_role_manager.hh"
+#include "cql3/query_processor.hh"
+#include "cql3/untyped_result_set.hh"
+#include "db/config.hh"
+#include "db/consistency_level.hh"
+#include "exceptions/exceptions.hh"
+#include "log.hh"
+#include "service/migration_listener.hh"
+#include "utils/class_registrator.hh"
+
+namespace auth {
+
+namespace meta {
+
+static const sstring user_name_col_name("name");
+static const sstring superuser_col_name("super");
+
+}
+
+static logging::logger log("auth_service");
+
+class auth_migration_listener final : public ::service::migration_listener {
+    authorizer& _authorizer;
+
+public:
+    explicit auth_migration_listener(authorizer& a) : _authorizer(a) {
+    }
+
+private:
+    void on_create_keyspace(const sstring& ks_name) override {}
+    void on_create_column_family(const sstring& ks_name, const sstring& cf_name) override {}
+    void on_create_user_type(const sstring& ks_name, const sstring& type_name) override {}
+    void on_create_function(const sstring& ks_name, const sstring& function_name) override {}
+    void on_create_aggregate(const sstring& ks_name, const sstring& aggregate_name) override {}
+    void on_create_view(const sstring& ks_name, const sstring& view_name) override {}
+
+    void on_update_keyspace(const sstring& ks_name) override {}
+    void on_update_column_family(const sstring& ks_name, const sstring& cf_name, bool) override {}
+    void on_update_user_type(const sstring& ks_name, const sstring& type_name) override {}
+    void on_update_function(const sstring& ks_name, const sstring& function_name) override {}
+    void on_update_aggregate(const sstring& ks_name, const sstring& aggregate_name) override {}
+    void on_update_view(const sstring& ks_name, const sstring& view_name, bool columns_changed) override {}
+
+    void on_drop_keyspace(const sstring& ks_name) override {
+        _authorizer.revoke_all(
+                auth::make_data_resource(ks_name)).handle_exception_type([](const unsupported_authorization_operation&) {
+            // Nothing.
+        });
+    }
+
+    void on_drop_column_family(const sstring& ks_name, const sstring& cf_name) override {
+        _authorizer.revoke_all(
+                auth::make_data_resource(
+                        ks_name, cf_name)).handle_exception_type([](const unsupported_authorization_operation&) {
+            // Nothing.
+        });
+    }
+
+    void on_drop_user_type(const sstring& ks_name, const sstring& type_name) override {}
+    void on_drop_function(const sstring& ks_name, const sstring& function_name) override {}
+    void on_drop_aggregate(const sstring& ks_name, const sstring& aggregate_name) override {}
+    void on_drop_view(const sstring& ks_name, const sstring& view_name) override {}
+};
+
+static future<> validate_role_exists(const service& ser, stdx::string_view role_name) {
+    return ser.underlying_role_manager().exists(role_name).then([role_name](bool exists) {
+        if (!exists) {
+            throw nonexistant_role(role_name);
+        }
+    });
+}
+
+service_config service_config::from_db_config(const db::config& dc) {
+    const qualified_name qualified_authorizer_name(meta::AUTH_PACKAGE_NAME, dc.authorizer());
+    const qualified_name qualified_authenticator_name(meta::AUTH_PACKAGE_NAME, dc.authenticator());
+    const qualified_name qualified_role_manager_name(meta::AUTH_PACKAGE_NAME, dc.role_manager());
+
+    service_config c;
+    c.authorizer_java_name = qualified_authorizer_name;
+    c.authenticator_java_name = qualified_authenticator_name;
+    c.role_manager_java_name = qualified_role_manager_name;
+
+    return c;
+}
+
+service::service(
+        permissions_cache_config c,
+        cql3::query_processor& qp,
+        ::service::migration_manager& mm,
+        std::unique_ptr<authorizer> z,
+        std::unique_ptr<authenticator> a,
+        std::unique_ptr<role_manager> r)
+            : _permissions_cache_config(std::move(c))
+            , _permissions_cache(nullptr)
+            , _qp(qp)
+            , _migration_manager(mm)
+            , _authorizer(std::move(z))
+            , _authenticator(std::move(a))
+            , _role_manager(std::move(r))
+            , _migration_listener(std::make_unique<auth_migration_listener>(*_authorizer)) {
+    // The password authenticator requires that the `standard_role_manager` is running so that the roles metadata table
+    // it manages is created and updated. This cross-module dependency is rather gross, but we have to maintain it for
+    // the sake of compatibility with Apache Cassandra and its choice of auth. schema.
+    if ((_authenticator->qualified_java_name() == password_authenticator_name())
+            && (_role_manager->qualified_java_name() != standard_role_manager_name())) {
+        throw incompatible_module_combination(
+                sprint(
+                        "The %s authenticator must be loaded alongside the %s role-manager.",
+                        password_authenticator_name(),
+                        standard_role_manager_name()));
+    }
+}
+
+service::service(
+        permissions_cache_config c,
+        cql3::query_processor& qp,
+        ::service::migration_manager& mm,
+        const service_config& sc)
+            : service(
+                      std::move(c),
+                      qp,
+                      mm,
+                      create_object<authorizer>(sc.authorizer_java_name, qp, mm),
+                      create_object<authenticator>(sc.authenticator_java_name, qp, mm),
+                      create_object<role_manager>(sc.role_manager_java_name, qp, mm)) {
+}
+
+future<> service::create_keyspace_if_missing() const {
+    auto& db = _qp.db().local();
+
+    if (!db.has_keyspace(meta::AUTH_KS)) {
+        std::map<sstring, sstring> opts{{"replication_factor", "1"}};
+
+        auto ksm = keyspace_metadata::new_keyspace(
+                meta::AUTH_KS,
+                "org.apache.cassandra.locator.SimpleStrategy",
+                opts,
+                true);
+
+        // We use min_timestamp so that default keyspace metadata will loose with any manual adjustments.
+        // See issue #2129.
+        return _migration_manager.announce_new_keyspace(ksm, api::min_timestamp, false);
+    }
+
+    return make_ready_future<>();
+}
+
+future<> service::start() {
+    return once_among_shards([this] {
+        return create_keyspace_if_missing();
+    }).then([this] {
+        return when_all_succeed(_role_manager->start(), _authorizer->start(), _authenticator->start());
+    }).then([this] {
+        _permissions_cache = std::make_unique<permissions_cache>(_permissions_cache_config, *this, log);
+    }).then([this] {
+        return once_among_shards([this] {
+            _migration_manager.register_listener(_migration_listener.get());
+            return make_ready_future<>();
+        });
+    });
+}
+
+future<> service::stop() {
+    return _permissions_cache->stop().then([this] {
+        return when_all_succeed(_role_manager->stop(), _authorizer->stop(), _authenticator->stop());
+    });
+}
+
+future<bool> service::has_existing_legacy_users() const {
+    if (!_qp.db().local().has_schema(meta::AUTH_KS, meta::USERS_CF)) {
+        return make_ready_future<bool>(false);
+    }
+
+    static const sstring default_user_query = sprint(
+            "SELECT * FROM %s.%s WHERE %s = ?",
+            meta::AUTH_KS,
+            meta::USERS_CF,
+            meta::user_name_col_name);
+
+    static const sstring all_users_query = sprint(
+            "SELECT * FROM %s.%s LIMIT 1",
+            meta::AUTH_KS,
+            meta::USERS_CF);
+
+    // This logic is borrowed directly from Apache Cassandra. By first checking for the presence of the default user, we
+    // can potentially avoid doing a range query with a high consistency level.
+
+    return _qp.process(
+            default_user_query,
+            db::consistency_level::ONE,
+            {meta::DEFAULT_SUPERUSER_NAME},
+            true).then([this](auto results) {
+        if (!results->empty()) {
+            return make_ready_future<bool>(true);
+        }
+
+        return _qp.process(
+                default_user_query,
+                db::consistency_level::QUORUM,
+                {meta::DEFAULT_SUPERUSER_NAME},
+                true).then([this](auto results) {
+            if (!results->empty()) {
+                return make_ready_future<bool>(true);
+            }
+
+            return _qp.process(
+                    all_users_query,
+                    db::consistency_level::QUORUM).then([](auto results) {
+                return make_ready_future<bool>(!results->empty());
+            });
+        });
+    });
+}
+
+future<permission_set>
+service::get_uncached_permissions(const role_or_anonymous& maybe_role, const resource& r) const {
+    if (is_anonymous(maybe_role)) {
+        return _authorizer->authorize(maybe_role, r);
+    }
+
+    const stdx::string_view role_name = *maybe_role.name;
+
+    return has_superuser(role_name).then([this, role_name, &r](bool superuser) {
+        if (superuser) {
+            return make_ready_future<permission_set>(r.applicable_permissions());
+        }
+
+        //
+        // Aggregate the permissions from all granted roles.
+        //
+
+        return do_with(permission_set(), [this, role_name, &r](auto& all_perms) {
+            return get_roles(role_name).then([this, &r, &all_perms](role_set all_roles) {
+                return do_with(std::move(all_roles), [this, &r, &all_perms](const auto& all_roles) {
+                    return parallel_for_each(all_roles, [this, &r, &all_perms](stdx::string_view role_name) {
+                        return _authorizer->authorize(role_name, r).then([&all_perms](permission_set perms) {
+                            all_perms = permission_set::from_mask(all_perms.mask() | perms.mask());
+                        });
+                    });
+                });
+            }).then([&all_perms] {
+                return all_perms;
+            });
+        });
+    });
+}
+
+future<permission_set> service::get_permissions(const role_or_anonymous& maybe_role, const resource& r) const {
+    return _permissions_cache->get(maybe_role, r);
+}
+
+future<bool> service::has_superuser(stdx::string_view role_name) const {
+    return this->get_roles(std::move(role_name)).then([this](role_set roles) {
+        return do_with(std::move(roles), [this](const role_set& roles) {
+            return do_with(false, roles.begin(), [this, &roles](bool& any_super, auto& iter) {
+                return do_until(
+                        [&roles, &any_super, &iter] { return any_super || (iter == roles.end()); },
+                        [this, &any_super, &iter] {
+                    return _role_manager->is_superuser(*iter++).then([&any_super](bool super) {
+                        any_super = super;
+                    });
+                }).then([&any_super] {
+                    return any_super;
+                });
+            });
+        });
+    });
+}
+
+future<role_set> service::get_roles(stdx::string_view role_name) const {
+    //
+    // We may wish to cache this information in the future (as Apache Cassandra does).
+    //
+
+    return _role_manager->query_granted(role_name, recursive_role_query::yes);
+}
+
+future<bool> service::exists(const resource& r) const {
+    switch (r.kind()) {
+        case resource_kind::data: {
+            const auto& db = _qp.db().local();
+
+            data_resource_view v(r);
+            const auto keyspace = v.keyspace();
+            const auto table = v.table();
+
+            if (table) {
+                return make_ready_future<bool>(db.has_schema(sstring(*keyspace), sstring(*table)));
+            }
+
+            if (keyspace) {
+                return make_ready_future<bool>(db.has_keyspace(sstring(*keyspace)));
+            }
+
+            return make_ready_future<bool>(true);
+        }
+
+        case resource_kind::role: {
+            role_resource_view v(r);
+            const auto role = v.role();
+
+            if (role) {
+                return _role_manager->exists(*role);
+            }
+
+            return make_ready_future<bool>(true);
+        }
+    }
+
+    return make_ready_future<bool>(false);
+}
+
+//
+// Free functions.
+//
+
+future<bool> has_superuser(const service& ser, const authenticated_user& u) {
+    if (is_anonymous(u)) {
+        return make_ready_future<bool>(false);
+    }
+
+    return ser.has_superuser(*u.name);
+}
+
+future<role_set> get_roles(const service& ser, const authenticated_user& u) {
+    if (is_anonymous(u)) {
+        return make_ready_future<role_set>();
+    }
+
+    return ser.get_roles(*u.name);
+}
+
+future<permission_set> get_permissions(const service& ser, const authenticated_user& u, const resource& r) {
+    return do_with(role_or_anonymous(), [&ser, &u, &r](auto& maybe_role) {
+        maybe_role.name = u.name;
+        return ser.get_permissions(maybe_role, r);
+    });
+}
+
+bool is_enforcing(const service& ser)  {
+    const bool enforcing_authorizer = ser.underlying_authorizer().qualified_java_name() != allow_all_authorizer_name();
+
+    const bool enforcing_authenticator = ser.underlying_authenticator().qualified_java_name()
+            != allow_all_authenticator_name();
+
+    return enforcing_authorizer || enforcing_authenticator;
+}
+
+bool is_protected(const service& ser, const resource& r) noexcept {
+    return ser.underlying_role_manager().protected_resources().count(r)
+            || ser.underlying_authenticator().protected_resources().count(r)
+            || ser.underlying_authorizer().protected_resources().count(r);
+}
+
+static void validate_authentication_options_are_supported(
+        const authentication_options& options,
+        const authentication_option_set& supported) {
+    const auto check = [&supported](authentication_option k) {
+        if (supported.count(k) == 0) {
+            throw unsupported_authentication_option(k);
+        }
+    };
+
+    if (options.password) {
+        check(authentication_option::password);
+    }
+
+    if (options.options) {
+        check(authentication_option::options);
+    }
+}
+
+
+future<> create_role(
+        const service& ser,
+        stdx::string_view name,
+        const role_config& config,
+        const authentication_options& options) {
+    return ser.underlying_role_manager().create(name, config).then([&ser, name, &options] {
+        if (!auth::any_authentication_options(options)) {
+            return make_ready_future<>();
+        }
+
+        return futurize_apply(
+                &validate_authentication_options_are_supported,
+                options,
+                ser.underlying_authenticator().supported_options()).then([&ser, name, &options] {
+            return ser.underlying_authenticator().create(name, options);
+        }).handle_exception([&ser, &name](std::exception_ptr ep) {
+            // Roll-back.
+            return ser.underlying_role_manager().drop(name).then([ep = std::move(ep)] {
+                std::rethrow_exception(ep);
+            });
+        });
+    });
+}
+
+future<> alter_role(
+        const service& ser,
+        stdx::string_view name,
+        const role_config_update& config_update,
+        const authentication_options& options) {
+    return ser.underlying_role_manager().alter(name, config_update).then([&ser, name, &options] {
+        if (!any_authentication_options(options)) {
+            return make_ready_future<>();
+        }
+
+        return futurize_apply(
+                &validate_authentication_options_are_supported,
+                options,
+                ser.underlying_authenticator().supported_options()).then([&ser, name, &options] {
+            return ser.underlying_authenticator().alter(name, options);
+        });
+    });
+}
+
+future<> drop_role(const service& ser, stdx::string_view name) {
+    return do_with(make_role_resource(name), [&ser, name](const resource& r) {
+        auto& a = ser.underlying_authorizer();
+
+        return when_all_succeed(
+                a.revoke_all(name),
+                a.revoke_all(r)).handle_exception_type([](const unsupported_authorization_operation&) {
+            // Nothing.
+        });
+    }).then([&ser, name] {
+        return ser.underlying_authenticator().drop(name);
+    }).then([&ser, name] {
+        return ser.underlying_role_manager().drop(name);
+    });
+}
+
+future<bool> has_role(const service& ser, stdx::string_view grantee, stdx::string_view name) {
+    return when_all_succeed(
+            validate_role_exists(ser, name),
+            ser.get_roles(grantee)).then([name](role_set all_roles) {
+        return make_ready_future<bool>(all_roles.count(sstring(name)) != 0);
+    });
+}
+future<bool> has_role(const service& ser, const authenticated_user& u, stdx::string_view name) {
+    if (is_anonymous(u)) {
+        return make_ready_future<bool>(false);
+    }
+
+    return has_role(ser, *u.name, name);
+}
+
+future<> grant_permissions(
+        const service& ser,
+        stdx::string_view role_name,
+        permission_set perms,
+        const resource& r) {
+    return validate_role_exists(ser, role_name).then([&ser, role_name, perms, &r] {
+        return ser.underlying_authorizer().grant(role_name, perms, r);
+    });
+}
+
+future<> grant_applicable_permissions(const service& ser, stdx::string_view role_name, const resource& r) {
+    return grant_permissions(ser, role_name, r.applicable_permissions(), r);
+}
+future<> grant_applicable_permissions(const service& ser, const authenticated_user& u, const resource& r) {
+    if (is_anonymous(u)) {
+        return make_ready_future<>();
+    }
+
+    return grant_applicable_permissions(ser, *u.name, r);
+}
+
+future<> revoke_permissions(
+        const service& ser,
+        stdx::string_view role_name,
+        permission_set perms,
+        const resource& r) {
+    return validate_role_exists(ser, role_name).then([&ser, role_name, perms, &r] {
+        return ser.underlying_authorizer().revoke(role_name, perms, r);
+    });
+}
+
+future<std::vector<permission_details>> list_filtered_permissions(
+        const service& ser,
+        permission_set perms,
+        std::optional<stdx::string_view> role_name,
+        const std::optional<std::pair<resource, recursive_permissions>>& resource_filter) {
+    return ser.underlying_authorizer().list_all().then([&ser, perms, role_name, &resource_filter](
+            std::vector<permission_details> all_details) {
+
+        if (resource_filter) {
+            const resource r = resource_filter->first;
+
+            const auto resources = resource_filter->second
+                    ? auth::expand_resource_family(r)
+                    : auth::resource_set{r};
+
+            all_details.erase(
+                    std::remove_if(
+                            all_details.begin(),
+                            all_details.end(),
+                            [&resources](const permission_details& pd) {
+                        return resources.count(pd.resource) == 0;
+                    }),
+                    all_details.end());
+        }
+
+        std::transform(
+                std::make_move_iterator(all_details.begin()),
+                std::make_move_iterator(all_details.end()),
+                all_details.begin(),
+                [perms](permission_details pd) {
+                    pd.permissions = permission_set::from_mask(pd.permissions.mask() & perms.mask());
+                    return pd;
+                });
+
+        // Eliminate rows with an empty permission set.
+        all_details.erase(
+                std::remove_if(all_details.begin(), all_details.end(), [](const permission_details& pd) {
+                    return pd.permissions.mask() == 0;
+                }),
+                all_details.end());
+
+        if (!role_name) {
+            return make_ready_future<std::vector<permission_details>>(std::move(all_details));
+        }
+
+        //
+        // Filter out rows based on whether permissions have been granted to this role (directly or indirectly).
+        //
+
+        return do_with(std::move(all_details), [&ser, role_name](auto& all_details) {
+            return ser.get_roles(*role_name).then([&all_details](role_set all_roles) {
+                all_details.erase(
+                        std::remove_if(
+                                all_details.begin(),
+                                all_details.end(),
+                                [&all_roles](const permission_details& pd) {
+                            return all_roles.count(pd.role_name) == 0;
+                        }),
+                        all_details.end());
+
+                return make_ready_future<std::vector<permission_details>>(std::move(all_details));
+            });
+        });
+    });
+}
+
+}
--- a/auth/service.hh
+++ b/auth/service.hh
@@ -0,0 +1,296 @@
+/*
+ * Copyright (C) 2017 ScyllaDB
+ */
+
+/*
+ * This file is part of Scylla.
+ *
+ * Scylla is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Scylla is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <experimental/string_view>
+#include <memory>
+#include <optional>
+
+#include <seastar/core/future.hh>
+#include <seastar/core/sstring.hh>
+#include <seastar/util/bool_class.hh>
+
+#include "auth/authenticator.hh"
+#include "auth/authorizer.hh"
+#include "auth/permission.hh"
+#include "auth/permissions_cache.hh"
+#include "auth/role_manager.hh"
+#include "seastarx.hh"
+#include "stdx.hh"
+
+namespace cql3 {
+class query_processor;
+}
+
+namespace db {
+class config;
+}
+
+namespace service {
+class migration_manager;
+class migration_listener;
+}
+
+namespace auth {
+
+class role_or_anonymous;
+
+struct service_config final {
+    static service_config from_db_config(const db::config&);
+
+    sstring authorizer_java_name;
+    sstring authenticator_java_name;
+    sstring role_manager_java_name;
+};
+
+///
+/// Due to poor (in this author's opinion) decisions of Apache Cassandra, certain choices of one role-manager,
+/// authenticator, or authorizer imply restrictions on the rest.
+///
+/// This exception is thrown when an invalid combination of modules is selected, with a message explaining the
+/// incompatibility.
+///
+class incompatible_module_combination : public std::invalid_argument {
+public:
+    using std::invalid_argument::invalid_argument;
+};
+
+///
+/// Client for access-control in the system.
+///
+/// Access control encompasses user/role management, authentication, and authorization. This client provides access to
+/// the dynamically-loaded implementations of these modules (through the `underlying_*` member functions), but also
+/// builds on their functionality with caching and abstractions for common operations.
+///
+/// All state associated with access-control is stored externally to any particular instance of this class.
+///
+class service final {
+    permissions_cache_config _permissions_cache_config;
+    std::unique_ptr<permissions_cache> _permissions_cache;
+
+    cql3::query_processor& _qp;
+
+    ::service::migration_manager& _migration_manager;
+
+    std::unique_ptr<authorizer> _authorizer;
+
+    std::unique_ptr<authenticator> _authenticator;
+
+    std::unique_ptr<role_manager> _role_manager;
+
+    // Only one of these should be registered, so we end up with some unused instances. Not the end of the world.
+    std::unique_ptr<::service::migration_listener> _migration_listener;
+
+public:
+    service(
+            permissions_cache_config,
+            cql3::query_processor&,
+            ::service::migration_manager&,
+            std::unique_ptr<authorizer>,
+            std::unique_ptr<authenticator>,
+            std::unique_ptr<role_manager>);
+
+    ///
+    /// This constructor is intended to be used when the class is sharded via \ref seastar::sharded. In that case, the
+    /// arguments must be copyable, which is why we delay construction with instance-construction instructions instead
+    /// of the instances themselves.
+    ///
+    service(
+            permissions_cache_config,
+            cql3::query_processor&,
+            ::service::migration_manager&,
+            const service_config&);
+
+    future<> start();
+
+    future<> stop();
+
+    ///
+    /// \returns an exceptional future with \ref nonexistant_role if the named role does not exist.
+    ///
+    future<permission_set> get_permissions(const role_or_anonymous&, const resource&) const;
+
+    ///
+    /// Like \ref get_permissions, but never returns cached permissions.
+    ///
+    future<permission_set> get_uncached_permissions(const role_or_anonymous&, const resource&) const;
+
+    ///
+    /// Query whether the named role has been granted a role that is a superuser.
+    ///
+    /// A role is always granted to itself. Therefore, a role that "is" a superuser also "has" superuser.
+    ///
+    /// \returns an exceptional future with \ref nonexistant_role if the role does not exist.
+    ///
+    future<bool> has_superuser(stdx::string_view role_name) const;
+
+    ///
+    /// Return the set of all roles granted to the given role, including itself and roles granted through other roles.
+    ///
+    /// \returns an exceptional future with \ref nonexistent_role if the role does not exist.
+    future<role_set> get_roles(stdx::string_view role_name) const;
+
+    future<bool> exists(const resource&) const;
+
+    const authenticator& underlying_authenticator() const {
+        return *_authenticator;
+    }
+
+    const authorizer& underlying_authorizer() const {
+        return *_authorizer;
+    }
+
+    const role_manager& underlying_role_manager() const {
+        return *_role_manager;
+    }
+
+private:
+    future<bool> has_existing_legacy_users() const;
+
+    future<> create_keyspace_if_missing() const;
+};
+
+future<bool> has_superuser(const service&, const authenticated_user&);
+
+future<role_set> get_roles(const service&, const authenticated_user&);
+
+future<permission_set> get_permissions(const service&, const authenticated_user&, const resource&);
+
+///
+/// Access-control is "enforcing" when either the authenticator or the authorizer are not their "allow-all" variants.
+///
+/// Put differently, when access control is not enforcing, all operations on resources will be allowed and users do not
+/// need to authenticate themselves.
+///
+bool is_enforcing(const service&);
+
+///
+/// Protected resources cannot be modified even if the performer has permissions to do so.
+///
+bool is_protected(const service&, const resource&) noexcept;
+
+///
+/// Create a role with optional authentication information.
+///
+/// \returns an exceptional future with \ref role_already_exists if the user or role exists.
+///
+/// \returns an exceptional future with \ref unsupported_authentication_option if an unsupported option is included.
+///
+future<> create_role(
+        const service&,
+        stdx::string_view name,
+        const role_config&,
+        const authentication_options&);
+
+///
+/// Alter an existing role and its authentication information.
+///
+/// \returns an exceptional future with \ref nonexistant_role if the named role does not exist.
+///
+/// \returns an exceptional future with \ref unsupported_authentication_option if an unsupported option is included.
+///
+future<> alter_role(
+        const service&,
+        stdx::string_view name,
+        const role_config_update&,
+        const authentication_options&);
+
+///
+/// Drop a role from the system, including all permissions and authentication information.
+///
+/// \returns an exceptional future with \ref nonexistant_role if the named role does not exist.
+///
+future<> drop_role(const service&, stdx::string_view name);
+
+///
+/// Check if `grantee` has been granted the named role.
+///
+/// \returns an exceptional future with \ref nonexistent_role if `grantee` or `name` do not exist.
+///
+future<bool> has_role(const service&, stdx::string_view grantee, stdx::string_view name);
+///
+/// Check if the authenticated user has been granted the named role.
+///
+/// \returns an exceptional future with \ref nonexistent_role if the user or `name` do not exist.
+///
+future<bool> has_role(const service&, const authenticated_user&, stdx::string_view name);
+
+///
+/// \returns an exceptional future with \ref nonexistent_role if the named role does not exist.
+///
+/// \returns an exceptional future with \ref unsupported_authorization_operation if granting permissions is not
+/// supported.
+///
+future<> grant_permissions(
+        const service&,
+        stdx::string_view role_name,
+        permission_set,
+        const resource&);
+
+///
+/// Like \ref grant_permissions, but grants all applicable permissions on the resource.
+///
+/// \returns an exceptional future with \ref nonexistent_role if the named role does not exist.
+///
+/// \returns an exceptional future with \ref unsupported_authorization_operation if granting permissions is not
+/// supported.
+///
+future<> grant_applicable_permissions(const service&, stdx::string_view role_name, const resource&);
+future<> grant_applicable_permissions(const service&, const authenticated_user&, const resource&);
+
+///
+/// \returns an exceptional future with \ref nonexistent_role if the named role does not exist.
+///
+/// \returns an exceptional future with \ref unsupported_authorization_operation if revoking permissions is not
+/// supported.
+///
+future<> revoke_permissions(
+        const service&,
+        stdx::string_view role_name,
+        permission_set,
+        const resource&);
+
+using recursive_permissions = bool_class<struct recursive_permissions_tag>;
+
+///
+/// Query for all granted permissions according to filtering criteria.
+///
+/// Only permissions included in the provided set are included.
+///
+/// If a role name is provided, only permissions granted (directly or recursively) to the role are included.
+///
+/// If a resource filter is provided, only permissions granted on the resource are included. When \ref
+/// recursive_permissions is `true`, permissions on a parent resource are included.
+///
+/// \returns an exceptional future with \ref nonexistent_role if a role name is included which refers to a role that
+/// does not exist.
+///
+/// \returns an exceptional future with \ref unsupported_authorization_operation if listing permissions is not
+/// supported.
+///
+future<std::vector<permission_details>> list_filtered_permissions(
+        const service&,
+        permission_set,
+        std::optional<stdx::string_view> role_name,
+        const std::optional<std::pair<resource, recursive_permissions>>& resource_filter);
+
+}
--- a/auth/standard_role_manager.cc
+++ b/auth/standard_role_manager.cc
@@ -0,0 +1,542 @@
+/*
+ * Copyright (C) 2017 ScyllaDB
+ */
+
+/*
+ * This file is part of Scylla.
+ *
+ * Scylla is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Scylla is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "auth/standard_role_manager.hh"
+
+#include <experimental/optional>
+#include <unordered_set>
+#include <vector>
+
+#include <boost/algorithm/string/join.hpp>
+#include <seastar/core/future-util.hh>
+#include <seastar/core/print.hh>
+#include <seastar/core/sleep.hh>
+#include <seastar/core/sstring.hh>
+#include <seastar/core/thread.hh>
+
+#include "auth/common.hh"
+#include "auth/roles-metadata.hh"
+#include "cql3/query_processor.hh"
+#include "db/consistency_level_type.hh"
+#include "exceptions/exceptions.hh"
+#include "log.hh"
+#include "utils/class_registrator.hh"
+
+namespace auth {
+
+namespace meta {
+
+namespace role_members_table {
+
+constexpr stdx::string_view name{"role_members" , 12};
+
+static stdx::string_view qualified_name() noexcept {
+    static const sstring instance = AUTH_KS + "." + sstring(name);
+    return instance;
+}
+
+}
+
+}
+
+static logging::logger log("standard_role_manager");
+
+static const class_registrator<
+        role_manager,
+        standard_role_manager,
+        cql3::query_processor&,
+        ::service::migration_manager&> registration("org.apache.cassandra.auth.CassandraRoleManager");
+
+struct record final {
+    sstring name;
+    bool is_superuser;
+    bool can_login;
+    role_set member_of;
+};
+
+static db::consistency_level consistency_for_role(stdx::string_view role_name) noexcept {
+    if (role_name == meta::DEFAULT_SUPERUSER_NAME) {
+        return db::consistency_level::QUORUM;
+    }
+
+    return db::consistency_level::LOCAL_ONE;
+}
+
+static future<stdx::optional<record>> find_record(cql3::query_processor& qp, stdx::string_view role_name) {
+    static const sstring query = sprint(
+            "SELECT * FROM %s WHERE %s = ?",
+            meta::roles_table::qualified_name(),
+            meta::roles_table::role_col_name);
+
+    return qp.process(
+            query,
+            consistency_for_role(role_name),
+            {sstring(role_name)},
+            true).then([](::shared_ptr<cql3::untyped_result_set> results) {
+        if (results->empty()) {
+            return stdx::optional<record>();
+        }
+
+        const cql3::untyped_result_set_row& row = results->one();
+
+        return stdx::make_optional(
+                record{
+                        row.get_as<sstring>(sstring(meta::roles_table::role_col_name)),
+                        row.get_as<bool>("is_superuser"),
+                        row.get_as<bool>("can_login"),
+                        (row.has("member_of")
+                                 ? row.get_set<sstring>("member_of")
+                                 : role_set())});
+    });
+}
+
+static future<record> require_record(cql3::query_processor& qp, stdx::string_view role_name) {
+    return find_record(qp, role_name).then([role_name](stdx::optional<record> mr) {
+        if (!mr) {
+            throw nonexistant_role(role_name);
+        }
+
+        return make_ready_future<record>(*mr);
+   });
+}
+
+static bool has_can_login(const cql3::untyped_result_set_row& row) {
+    return row.has("can_login") && !(boolean_type->deserialize(row.get_blob("can_login")).is_null());
+}
+
+stdx::string_view standard_role_manager_name() noexcept {
+    static const sstring instance = meta::AUTH_PACKAGE_NAME + "CassandraRoleManager";
+    return instance;
+}
+
+stdx::string_view standard_role_manager::qualified_java_name() const noexcept {
+    return standard_role_manager_name();
+}
+
+const resource_set& standard_role_manager::protected_resources() const {
+    static const resource_set resources({
+            make_data_resource(meta::AUTH_KS, meta::roles_table::name),
+            make_data_resource(meta::AUTH_KS, meta::role_members_table::name)});
+
+    return resources;
+}
+
+future<> standard_role_manager::create_metadata_tables_if_missing() const {
+    static const sstring create_role_members_query = sprint(
+            "CREATE TABLE %s ("
+            "  role text,"
+            "  member text,"
+            "  PRIMARY KEY (role, member)"
+            ")",
+            meta::role_members_table::qualified_name());
+
+
+    return when_all_succeed(
+            create_metadata_table_if_missing(
+                    meta::roles_table::name,
+                    _qp,
+                    meta::roles_table::creation_query(),
+                    _migration_manager),
+            create_metadata_table_if_missing(
+                    meta::role_members_table::name,
+                    _qp,
+                    create_role_members_query,
+                    _migration_manager));
+}
+
+future<> standard_role_manager::create_default_role_if_missing() const {
+    return default_role_row_satisfies(_qp, &has_can_login).then([this](bool exists) {
+        if (!exists) {
+            static const sstring query = sprint(
+                    "INSERT INTO %s (%s, is_superuser, can_login) VALUES (?, true, true)",
+                    meta::roles_table::qualified_name(),
+                    meta::roles_table::role_col_name);
+
+            return _qp.process(
+                    query,
+                    db::consistency_level::QUORUM,
+                    {meta::DEFAULT_SUPERUSER_NAME}).then([](auto&&) {
+                log.info("Created default superuser role '{}'.", meta::DEFAULT_SUPERUSER_NAME);
+                return make_ready_future<>();
+            });
+        }
+
+        return make_ready_future<>();
+    }).handle_exception_type([](const exceptions::unavailable_exception& e) {
+        log.warn("Skipped default role setup: some nodes were not ready; will retry");
+        return make_exception_future<>(e);
+    });
+}
+
+static const sstring legacy_table_name{"users"};
+
+bool standard_role_manager::legacy_metadata_exists() const {
+    return _qp.db().local().has_schema(meta::AUTH_KS, legacy_table_name);
+}
+
+future<> standard_role_manager::migrate_legacy_metadata() const {
+    log.info("Starting migration of legacy user metadata.");
+    static const sstring query = sprint("SELECT * FROM %s.%s", meta::AUTH_KS, legacy_table_name);
+
+    return _qp.process(
+            query,
+            db::consistency_level::QUORUM).then([this](::shared_ptr<cql3::untyped_result_set> results) {
+        return do_for_each(*results, [this](const cql3::untyped_result_set_row& row) {
+            role_config config;
+            config.is_superuser = row.get_as<bool>("super");
+            config.can_login = true;
+
+            return do_with(
+                    row.get_as<sstring>("name"),
+                    std::move(config),
+                    [this](const auto& name, const auto& config) {
+                return this->create_or_replace(name, config);
+            });
+        }).finally([results] {});
+    }).then([] {
+        log.info("Finished migrating legacy user metadata.");
+    }).handle_exception([](std::exception_ptr ep) {
+        log.error("Encountered an error during migration!");
+        std::rethrow_exception(ep);
+    });
+}
+
+future<> standard_role_manager::start() {
+    return once_among_shards([this] {
+        return this->create_metadata_tables_if_missing().then([this] {
+            _stopped = auth::do_after_system_ready(_as, [this] {
+                return seastar::async([this] {
+                    wait_for_schema_agreement(_migration_manager, _qp.db().local()).get0();
+
+                    if (any_nondefault_role_row_satisfies(_qp, &has_can_login).get0()) {
+                        if (this->legacy_metadata_exists()) {
+                            log.warn("Ignoring legacy user metadata since nondefault roles already exist.");
+                        }
+
+                        return;
+                    }
+
+                    if (this->legacy_metadata_exists()) {
+                        this->migrate_legacy_metadata().get0();
+                        return;
+                    }
+
+                    create_default_role_if_missing().get0();
+                });
+            });
+        });
+    });
+}
+
+future<> standard_role_manager::stop() {
+    _as.request_abort();
+    return _stopped.handle_exception_type([] (const sleep_aborted&) { });
+}
+
+future<> standard_role_manager::create_or_replace(stdx::string_view role_name, const role_config& c) const {
+    static const sstring query = sprint(
+            "INSERT INTO %s (%s, is_superuser, can_login) VALUES (?, ?, ?)",
+            meta::roles_table::qualified_name(),
+            meta::roles_table::role_col_name);
+
+    return _qp.process(
+            query,
+            consistency_for_role(role_name),
+            {sstring(role_name), c.is_superuser, c.can_login},
+            true).discard_result();
+}
+
+future<>
+standard_role_manager::create(stdx::string_view role_name, const role_config& c) const {
+    return this->exists(role_name).then([this, role_name, &c](bool role_exists) {
+        if (role_exists) {
+            throw role_already_exists(role_name);
+        }
+
+        return this->create_or_replace(role_name, c);
+    });
+}
+
+future<>
+standard_role_manager::alter(stdx::string_view role_name, const role_config_update& u) const {
+    static const auto build_column_assignments = [](const role_config_update& u) -> sstring {
+        std::vector<sstring> assignments;
+
+        if (u.is_superuser) {
+            assignments.push_back(sstring("is_superuser = ") + (*u.is_superuser ? "true" : "false"));
+        }
+
+        if (u.can_login) {
+            assignments.push_back(sstring("can_login = ") + (*u.can_login ? "true" : "false"));
+        }
+
+        return boost::algorithm::join(assignments, ", ");
+    };
+
+    return require_record(_qp, role_name).then([this, role_name, &u](record) {
+        if (!u.is_superuser && !u.can_login) {
+            return make_ready_future<>();
+        }
+
+        return _qp.process(
+                sprint(
+                        "UPDATE %s SET %s WHERE %s = ?",
+                        meta::roles_table::qualified_name(),
+                        build_column_assignments(u),
+                        meta::roles_table::role_col_name),
+                consistency_for_role(role_name),
+                {sstring(role_name)}).discard_result();
+    });
+}
+
+future<> standard_role_manager::drop(stdx::string_view role_name) const {
+    return this->exists(role_name).then([this, role_name](bool role_exists) {
+        if (!role_exists) {
+            throw nonexistant_role(role_name);
+        }
+
+        // First, revoke this role from all roles that are members of it.
+        const auto revoke_from_members = [this, role_name] {
+            static const sstring query = sprint(
+                    "SELECT member FROM %s WHERE role = ?",
+                    meta::role_members_table::qualified_name());
+
+            return _qp.process(
+                    query,
+                    consistency_for_role(role_name),
+                    {sstring(role_name)}).then([this, role_name](::shared_ptr<cql3::untyped_result_set> members) {
+                return parallel_for_each(
+                        members->begin(),
+                        members->end(),
+                        [this, role_name](const cql3::untyped_result_set_row& member_row) {
+                    const sstring member = member_row.template get_as<sstring>("member");
+                    return this->modify_membership(member, role_name, membership_change::remove);
+                }).finally([members] {});
+            });
+        };
+
+        // In parallel, revoke all roles that this role is members of.
+        const auto revoke_members_of = [this, grantee = role_name] {
+            return this->query_granted(
+                    grantee,
+                    recursive_role_query::no).then([this, grantee](role_set granted_roles) {
+                return do_with(
+                        std::move(granted_roles),
+                        [this, grantee](const role_set& granted_roles) {
+                    return parallel_for_each(
+                            granted_roles.begin(),
+                            granted_roles.end(),
+                            [this, grantee](const sstring& role_name) {
+                        return this->modify_membership(grantee, role_name, membership_change::remove);
+                    });
+                });
+            });
+        };
+
+        // Finally, delete the role itself.
+        auto delete_role = [this, role_name] {
+            static const sstring query = sprint(
+                    "DELETE FROM %s WHERE %s = ?",
+                    meta::roles_table::qualified_name(),
+                    meta::roles_table::role_col_name);
+
+            return _qp.process(
+                    query,
+                    consistency_for_role(role_name),
+                    {sstring(role_name)}).discard_result();
+        };
+
+        return when_all_succeed(revoke_from_members(), revoke_members_of()).then([delete_role = std::move(delete_role)] {
+            return delete_role();
+        });
+    });
+}
+
+future<>
+standard_role_manager::modify_membership(
+        stdx::string_view grantee_name,
+        stdx::string_view role_name,
+        membership_change ch) const {
+
+
+    const auto modify_roles = [this, role_name, grantee_name, ch] {
+        const auto query = sprint(
+                "UPDATE %s SET member_of = member_of %s ? WHERE %s = ?",
+                meta::roles_table::qualified_name(),
+                (ch == membership_change::add ? '+' : '-'),
+                meta::roles_table::role_col_name);
+
+        return _qp.process(
+                query,
+                consistency_for_role(grantee_name),
+                {role_set{sstring(role_name)}, sstring(grantee_name)}).discard_result();
+    };
+
+    const auto modify_role_members = [this, role_name, grantee_name, ch] {
+        switch (ch) {
+            case membership_change::add:
+                return _qp.process(
+                        sprint(
+                                "INSERT INTO %s (role, member) VALUES (?, ?)",
+                                meta::role_members_table::qualified_name()),
+                        consistency_for_role(role_name),
+                        {sstring(role_name), sstring(grantee_name)}).discard_result();
+
+            case membership_change::remove:
+                return _qp.process(
+                        sprint(
+                                "DELETE FROM %s WHERE role = ? AND member = ?",
+                                meta::role_members_table::qualified_name()),
+                        consistency_for_role(role_name),
+                        {sstring(role_name), sstring(grantee_name)}).discard_result();
+        }
+
+        return make_ready_future<>();
+    };
+
+    return when_all_succeed(modify_roles(), modify_role_members());
+}
+
+future<>
+standard_role_manager::grant(stdx::string_view grantee_name, stdx::string_view role_name) const {
+    const auto check_redundant = [this, role_name, grantee_name] {
+        return this->query_granted(
+                grantee_name,
+                recursive_role_query::yes).then([role_name, grantee_name](role_set roles) {
+            if (roles.count(sstring(role_name)) != 0) {
+                throw role_already_included(grantee_name, role_name);
+            }
+
+            return make_ready_future<>();
+        });
+    };
+
+    const auto check_cycle = [this, role_name, grantee_name] {
+        return this->query_granted(
+                role_name,
+                recursive_role_query::yes).then([role_name, grantee_name](role_set roles) {
+            if (roles.count(sstring(grantee_name)) != 0) {
+                throw role_already_included(role_name, grantee_name);
+            }
+
+            return make_ready_future<>();
+        });
+    };
+
+   return when_all_succeed(check_redundant(), check_cycle()).then([this, role_name, grantee_name] {
+       return this->modify_membership(grantee_name, role_name, membership_change::add);
+   });
+}
+
+future<>
+standard_role_manager::revoke(stdx::string_view revokee_name, stdx::string_view role_name) const {
+    return this->exists(role_name).then([this, revokee_name, role_name](bool role_exists) {
+        if (!role_exists) {
+            throw nonexistant_role(sstring(role_name));
+        }
+    }).then([this, revokee_name, role_name] {
+        return this->query_granted(
+                revokee_name,
+                recursive_role_query::no).then([revokee_name, role_name](role_set roles) {
+            if (roles.count(sstring(role_name)) == 0) {
+                throw revoke_ungranted_role(revokee_name, role_name);
+            }
+
+            return make_ready_future<>();
+        }).then([this, revokee_name, role_name] {
+            return this->modify_membership(revokee_name, role_name, membership_change::remove);
+        });
+    });
+}
+
+static future<> collect_roles(
+        cql3::query_processor& qp,
+        stdx::string_view grantee_name,
+        bool recurse,
+        role_set& roles) {
+    return require_record(qp, grantee_name).then([&qp, &roles, recurse](record r) {
+        return do_with(std::move(r.member_of), [&qp, &roles, recurse](const role_set& memberships) {
+            return do_for_each(memberships.begin(), memberships.end(), [&qp, &roles, recurse](const sstring& role_name) {
+                roles.insert(role_name);
+
+                if (recurse) {
+                    return collect_roles(qp, role_name, true, roles);
+                }
+
+                return make_ready_future<>();
+            });
+        });
+    });
+}
+
+future<role_set> standard_role_manager::query_granted(stdx::string_view grantee_name, recursive_role_query m) const {
+    const bool recurse = (m == recursive_role_query::yes);
+
+    return do_with(
+            role_set{sstring(grantee_name)},
+            [this, grantee_name, recurse](role_set& roles) {
+        return collect_roles(_qp, grantee_name, recurse, roles).then([&roles] { return roles; });
+    });
+}
+
+future<role_set> standard_role_manager::query_all() const {
+    static const sstring query = sprint(
+            "SELECT %s FROM %s",
+            meta::roles_table::role_col_name,
+            meta::roles_table::qualified_name());
+
+    // To avoid many copies of a view.
+    static const auto role_col_name_string = sstring(meta::roles_table::role_col_name);
+
+    return _qp.process(query, db::consistency_level::QUORUM).then([](::shared_ptr<cql3::untyped_result_set> results) {
+        role_set roles;
+
+        std::transform(
+                results->begin(),
+                results->end(),
+                std::inserter(roles, roles.begin()),
+                [](const cql3::untyped_result_set_row& row) {
+            return row.get_as<sstring>(role_col_name_string);
+        });
+
+        return roles;
+    });
+}
+
+future<bool> standard_role_manager::exists(stdx::string_view role_name) const  {
+    return find_record(_qp, role_name).then([](stdx::optional<record> mr) {
+        return static_cast<bool>(mr);
+    });
+}
+
+future<bool> standard_role_manager::is_superuser(stdx::string_view role_name) const {
+    return require_record(_qp, role_name).then([](record r) {
+        return r.is_superuser;
+    });
+}
+
+future<bool> standard_role_manager::can_login(stdx::string_view role_name) const {
+    return require_record(_qp, role_name).then([](record r) {
+        return r.can_login;
+    });
+}
+
+}
--- a/auth/standard_role_manager.hh
+++ b/auth/standard_role_manager.hh
@@ -0,0 +1,105 @@
+/*
+ * Copyright (C) 2017 ScyllaDB
+ */
+
+/*
+ * This file is part of Scylla.
+ *
+ * Scylla is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Scylla is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include "auth/role_manager.hh"
+
+#include <experimental/string_view>
+#include <unordered_set>
+
+#include <seastar/core/abort_source.hh>
+#include <seastar/core/future.hh>
+#include <seastar/core/sstring.hh>
+
+#include "stdx.hh"
+#include "seastarx.hh"
+
+namespace cql3 {
+class query_processor;
+}
+
+namespace service {
+class migration_manager;
+}
+
+namespace auth {
+
+stdx::string_view standard_role_manager_name() noexcept;
+
+class standard_role_manager final : public role_manager {
+    cql3::query_processor& _qp;
+    ::service::migration_manager& _migration_manager;
+    future<> _stopped;
+    seastar::abort_source _as;
+
+public:
+    standard_role_manager(cql3::query_processor& qp, ::service::migration_manager& mm)
+            : _qp(qp)
+            , _migration_manager(mm)
+            , _stopped(make_ready_future<>()) {
+    }
+
+    virtual stdx::string_view qualified_java_name() const noexcept override;
+
+    virtual const resource_set& protected_resources() const override;
+
+    virtual future<> start() override;
+
+    virtual future<> stop() override;
+
+    virtual future<> create(stdx::string_view role_name, const role_config&) const override;
+
+    virtual future<> drop(stdx::string_view role_name) const override;
+
+    virtual future<> alter(stdx::string_view role_name, const role_config_update&) const override;
+
+    virtual future<> grant(stdx::string_view grantee_name, stdx::string_view role_name) const override;
+
+    virtual future<> revoke(stdx::string_view revokee_name, stdx::string_view role_name) const override;
+
+    virtual future<role_set> query_granted(stdx::string_view grantee_name, recursive_role_query) const override;
+
+    virtual future<role_set> query_all() const override;
+
+    virtual future<bool> exists(stdx::string_view role_name) const override;
+
+    virtual future<bool> is_superuser(stdx::string_view role_name) const override;
+
+    virtual future<bool> can_login(stdx::string_view role_name) const override;
+
+private:
+    enum class membership_change { add, remove };
+
+    future<> create_metadata_tables_if_missing() const;
+
+    bool legacy_metadata_exists() const;
+
+    future<> migrate_legacy_metadata() const;
+
+    future<> create_default_role_if_missing() const;
+
+    future<> create_or_replace(stdx::string_view role_name, const role_config&) const;
+
+    future<> modify_membership(stdx::string_view role_name, stdx::string_view grantee_name, membership_change) const;
+};
+
+}
--- a/auth/transitional.cc
+++ b/auth/transitional.cc
@@ -0,0 +1,262 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (C) 2017 ScyllaDB
+ *
+ * Modified by ScyllaDB
+ */
+
+/*
+ * This file is part of Scylla.
+ *
+ * Scylla is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Scylla is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "auth/authenticated_user.hh"
+#include "auth/authenticator.hh"
+#include "auth/authorizer.hh"
+#include "auth/default_authorizer.hh"
+#include "auth/password_authenticator.hh"
+#include "auth/permission.hh"
+#include "db/config.hh"
+#include "utils/class_registrator.hh"
+
+namespace auth {
+
+static const sstring PACKAGE_NAME("com.scylladb.auth.");
+
+static const sstring& transitional_authenticator_name() {
+    static const sstring name = PACKAGE_NAME + "TransitionalAuthenticator";
+    return name;
+}
+
+static const sstring& transitional_authorizer_name() {
+    static const sstring name = PACKAGE_NAME + "TransitionalAuthorizer";
+    return name;
+}
+
+class transitional_authenticator : public authenticator {
+    std::unique_ptr<authenticator> _authenticator;
+
+public:
+    static const sstring PASSWORD_AUTHENTICATOR_NAME;
+
+    transitional_authenticator(cql3::query_processor& qp, ::service::migration_manager& mm)
+            : transitional_authenticator(std::make_unique<password_authenticator>(qp, mm)) {
+    }
+    transitional_authenticator(std::unique_ptr<authenticator> a)
+            : _authenticator(std::move(a)) {
+    }
+
+    virtual future<> start() override {
+        return _authenticator->start();
+    }
+
+    virtual future<> stop() override {
+        return _authenticator->stop();
+    }
+
+    virtual const sstring& qualified_java_name() const override {
+        return transitional_authenticator_name();
+    }
+
+    virtual bool require_authentication() const override {
+        return true;
+    }
+
+    virtual authentication_option_set supported_options() const override {
+        return _authenticator->supported_options();
+    }
+
+    virtual authentication_option_set alterable_options() const override {
+        return _authenticator->alterable_options();
+    }
+
+    virtual future<authenticated_user> authenticate(const credentials_map& credentials) const override {
+        auto i = credentials.find(authenticator::USERNAME_KEY);
+        if ((i == credentials.end() || i->second.empty())
+                && (!credentials.count(PASSWORD_KEY) || credentials.at(PASSWORD_KEY).empty())) {
+            // return anon user
+            return make_ready_future<authenticated_user>(anonymous_user());
+        }
+        return make_ready_future().then([this, &credentials] {
+            return _authenticator->authenticate(credentials);
+        }).handle_exception([](auto ep) {
+            try {
+                std::rethrow_exception(ep);
+            } catch (exceptions::authentication_exception&) {
+                // return anon user
+                return make_ready_future<authenticated_user>(anonymous_user());
+            }
+        });
+    }
+
+    virtual future<> create(stdx::string_view role_name, const authentication_options& options) const override {
+        return _authenticator->create(role_name, options);
+    }
+
+    virtual future<> alter(stdx::string_view role_name, const authentication_options& options) const override {
+        return _authenticator->alter(role_name, options);
+    }
+
+    virtual future<> drop(stdx::string_view role_name) const override {
+        return _authenticator->drop(role_name);
+    }
+
+    virtual future<custom_options> query_custom_options(stdx::string_view role_name) const override {
+        return _authenticator->query_custom_options(role_name);
+    }
+
+    virtual const resource_set& protected_resources() const override {
+        return _authenticator->protected_resources();
+    }
+
+    virtual ::shared_ptr<sasl_challenge> new_sasl_challenge() const override {
+        class sasl_wrapper : public sasl_challenge {
+        public:
+            sasl_wrapper(::shared_ptr<sasl_challenge> sasl)
+                    : _sasl(std::move(sasl)) {
+            }
+
+            virtual bytes evaluate_response(bytes_view client_response) override {
+                try {
+                    return _sasl->evaluate_response(client_response);
+                } catch (exceptions::authentication_exception&) {
+                    _complete = true;
+                    return {};
+                }
+            }
+
+            virtual bool is_complete() const override {
+                return _complete || _sasl->is_complete();
+            }
+
+            virtual future<authenticated_user> get_authenticated_user() const {
+                return futurize_apply([this] {
+                    return _sasl->get_authenticated_user().handle_exception([](auto ep) {
+                        try {
+                            std::rethrow_exception(ep);
+                        } catch (exceptions::authentication_exception&) {
+                            // return anon user
+                            return make_ready_future<authenticated_user>(anonymous_user());
+                        }
+                    });
+                });
+            }
+
+        private:
+            ::shared_ptr<sasl_challenge> _sasl;
+
+            bool _complete = false;
+        };
+        return ::make_shared<sasl_wrapper>(_authenticator->new_sasl_challenge());
+    }
+};
+
+class transitional_authorizer : public authorizer {
+    std::unique_ptr<authorizer> _authorizer;
+
+public:
+    transitional_authorizer(cql3::query_processor& qp, ::service::migration_manager& mm)
+            : transitional_authorizer(std::make_unique<default_authorizer>(qp, mm)) {
+    }
+    transitional_authorizer(std::unique_ptr<authorizer> a)
+            : _authorizer(std::move(a)) {
+    }
+
+    ~transitional_authorizer() {
+    }
+
+    virtual future<> start() override {
+        return _authorizer->start();
+    }
+
+    virtual future<> stop() override {
+        return _authorizer->stop();
+    }
+
+    virtual const sstring& qualified_java_name() const override {
+        return transitional_authorizer_name();
+    }
+
+    virtual future<permission_set> authorize(const role_or_anonymous&, const resource&) const override {
+        static const permission_set transitional_permissions =
+                permission_set::of<
+                        permission::CREATE,
+                        permission::ALTER,
+                        permission::DROP,
+                        permission::SELECT,
+                        permission::MODIFY>();
+
+        return make_ready_future<permission_set>(transitional_permissions);
+    }
+
+    virtual future<> grant(stdx::string_view s, permission_set ps, const resource& r) const override {
+        return _authorizer->grant(s, std::move(ps), r);
+    }
+
+    virtual future<> revoke(stdx::string_view s, permission_set ps, const resource& r) const override {
+        return _authorizer->revoke(s, std::move(ps), r);
+    }
+
+    virtual future<std::vector<permission_details>> list_all() const override {
+        return _authorizer->list_all();
+    }
+
+    virtual future<> revoke_all(stdx::string_view s) const override {
+        return _authorizer->revoke_all(s);
+    }
+
+    virtual future<> revoke_all(const resource& r) const override {
+        return _authorizer->revoke_all(r);
+    }
+
+    virtual const resource_set& protected_resources() const override {
+        return _authorizer->protected_resources();
+    }
+};
+
+}
+
+//
+// To ensure correct initialization order, we unfortunately need to use string literals.
+//
+
+static const class_registrator<
+        auth::authenticator,
+        auth::transitional_authenticator,
+        cql3::query_processor&,
+        ::service::migration_manager&> transitional_authenticator_reg(auth::PACKAGE_NAME + "TransitionalAuthenticator");
+
+static const class_registrator<
+        auth::authorizer,
+        auth::transitional_authorizer,
+        cql3::query_processor&,
+        ::service::migration_manager&> transitional_authorizer_reg(auth::PACKAGE_NAME + "TransitionalAuthorizer");
--- a/backlog_controller.hh
+++ b/backlog_controller.hh
@@ -0,0 +1,138 @@
+/*
+ * Copyright (C) 2017 ScyllaDB
+ */
+
+/*
+ * This file is part of Scylla.
+ *
+ * Scylla is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Scylla is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+#include <seastar/core/scheduling.hh>
+#include <seastar/core/timer.hh>
+#include <seastar/core/gate.hh>
+#include <chrono>
+
+// Simple proportional controller to adjust shares for processes for which a backlog can be clearly
+// defined.
+//
+// Goal is to consume the backlog as fast as we can, but not so fast that we steal all the CPU from
+// incoming requests, and at the same time minimize user-visible fluctuations in the quota.
+//
+// What that translates to is we'll try to keep the backlog's firt derivative at 0 (IOW, we keep
+// backlog constant). As the backlog grows we increase CPU usage, decreasing CPU usage as the
+// backlog diminishes.
+//
+// The exact point at which the controller stops determines the desired CPU usage. As the backlog
+// grows and approach a maximum desired, we need to be more aggressive. We will therefore define two
+// thresholds, and increase the constant as we cross them.
+//
+// Doing that divides the range in three (before the first, between first and second, and after
+// second threshold), and we'll be slow to grow in the first region, grow normally in the second
+// region, and aggressively in the third region.
+//
+// The constants q1 and q2 are used to determine the proportional factor at each stage.
+class backlog_controller {
+public:
+    future<> shutdown() {
+        _update_timer.cancel();
+        return std::move(_inflight_update);
+    }
+protected:
+    struct control_point {
+        float input;
+        float output;
+    };
+
+    seastar::scheduling_group _scheduling_group;
+    const ::io_priority_class& _io_priority;
+    std::chrono::milliseconds _interval;
+    timer<> _update_timer;
+
+    std::vector<control_point> _control_points;
+
+    std::function<float()> _current_backlog;
+    // updating shares for an I/O class may contact another shard and returns a future.
+    future<> _inflight_update;
+
+    virtual void update_controller(float quota);
+
+    void adjust();
+
+    backlog_controller(seastar::scheduling_group sg, const ::io_priority_class& iop, std::chrono::milliseconds interval,
+                       std::vector<control_point> control_points, std::function<float()> backlog)
+        : _scheduling_group(sg)
+        , _io_priority(iop)
+        , _interval(interval)
+        , _update_timer([this] { adjust(); })
+        , _control_points({{0,0}})
+        , _current_backlog(std::move(backlog))
+        , _inflight_update(make_ready_future<>())
+    {
+        _control_points.insert(_control_points.end(), control_points.begin(), control_points.end());
+         _update_timer.arm_periodic(_interval);
+    }
+
+    // Used when the controllers are disabled and a static share is used
+    // When that option is deprecated we should remove this.
+    backlog_controller(seastar::scheduling_group sg, const ::io_priority_class& iop, float static_shares) 
+        : _scheduling_group(sg)
+        , _io_priority(iop)
+        , _inflight_update(make_ready_future<>())
+    {
+        update_controller(static_shares);
+    }
+
+    virtual ~backlog_controller() {}
+};
+
+// memtable flush CPU controller.
+//
+// - First threshold is the soft limit line,
+// - Maximum is the point in which we'd stop consuming request,
+// - Second threshold is halfway between them.
+//
+// Below the soft limit, we are in no particular hurry to flush, since it means we're set to
+// complete flushing before we a new memtable is ready. The quota is dirty * q1, and q1 is set to a
+// low number.
+//
+// The first half of the virtual dirty region is where we expect to be usually, so we have a low
+// slope corresponding to a sluggish response between q1 * soft_limit and q2.
+//
+// In the second half, we're getting close to the hard dirty limit so we increase the slope and
+// become more responsive, up to a maximum quota of qmax.
+class flush_controller : public backlog_controller {
+    static constexpr float hard_dirty_limit = 1.0f;
+public:
+    flush_controller(seastar::scheduling_group sg, const ::io_priority_class& iop, float static_shares) : backlog_controller(sg, iop, static_shares) {}
+    flush_controller(seastar::scheduling_group sg, const ::io_priority_class& iop, std::chrono::milliseconds interval, float soft_limit, std::function<float()> current_dirty)
+        : backlog_controller(sg, iop, std::move(interval),
+          std::vector<backlog_controller::control_point>({{soft_limit, 100}, {soft_limit + (hard_dirty_limit - soft_limit) / 2, 200} , {hard_dirty_limit, 1000}}),
+          std::move(current_dirty)
+        )
+    {}
+};
+
+class compaction_controller : public backlog_controller {
+public:
+    static constexpr unsigned normalization_factor = 30;
+    compaction_controller(seastar::scheduling_group sg, const ::io_priority_class& iop, float static_shares) : backlog_controller(sg, iop, static_shares) {}
+    compaction_controller(seastar::scheduling_group sg, const ::io_priority_class& iop, std::chrono::milliseconds interval, std::function<float()> current_backlog)
+        : backlog_controller(sg, iop, std::move(interval),
+          std::vector<backlog_controller::control_point>({{0.5, 10}, {1.5, 100} , {normalization_factor, 1000}}),
+          std::move(current_backlog)
+        )
+    {}
+};
--- a/cache_flat_mutation_reader.hh
+++ b/cache_flat_mutation_reader.hh
@@ -0,0 +1,664 @@
+/*
+ * Copyright (C) 2017 ScyllaDB
+ */
+
+/*
+ * This file is part of Scylla.
+ *
+ * Scylla is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Scylla is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <vector>
+#include "row_cache.hh"
+#include "mutation_reader.hh"
+#include "mutation_fragment.hh"
+#include "partition_version.hh"
+#include "utils/logalloc.hh"
+#include "query-request.hh"
+#include "partition_snapshot_reader.hh"
+#include "partition_snapshot_row_cursor.hh"
+#include "read_context.hh"
+#include "flat_mutation_reader.hh"
+
+namespace cache {
+
+extern logging::logger clogger;
+
+class cache_flat_mutation_reader final : public flat_mutation_reader::impl {
+    enum class state {
+        before_static_row,
+
+        // Invariants:
+        //  - position_range(_lower_bound, _upper_bound) covers all not yet emitted positions from current range
+        //  - if _next_row has valid iterators:
+        //    - _next_row points to the nearest row in cache >= _lower_bound
+        //    - _next_row_in_range = _next.position() < _upper_bound
+        //  - if _next_row doesn't have valid iterators, it has no meaning.
+        reading_from_cache,
+
+        // Starts reading from underlying reader.
+        // The range to read is position_range(_lower_bound, min(_next_row.position(), _upper_bound)).
+        // Invariants:
+        //  - _next_row_in_range = _next.position() < _upper_bound
+        move_to_underlying,
+
+        // Invariants:
+        // - Upper bound of the read is min(_next_row.position(), _upper_bound)
+        // - _next_row_in_range = _next.position() < _upper_bound
+        // - _last_row points at a direct predecessor of the next row which is going to be read.
+        //   Used for populating continuity.
+        // - _population_range_starts_before_all_rows is set accordingly
+        reading_from_underlying,
+
+        end_of_stream
+    };
+    lw_shared_ptr<partition_snapshot> _snp;
+    position_in_partition::tri_compare _position_cmp;
+
+    query::clustering_key_filter_ranges _ck_ranges;
+    query::clustering_row_ranges::const_iterator _ck_ranges_curr;
+    query::clustering_row_ranges::const_iterator _ck_ranges_end;
+
+    lsa_manager _lsa_manager;
+
+    partition_snapshot_row_weakref _last_row;
+
+    // Holds the lower bound of a position range which hasn't been processed yet.
+    // Only rows with positions < _lower_bound have been emitted, and only
+    // range_tombstones with positions <= _lower_bound.
+    position_in_partition _lower_bound;
+    position_in_partition_view _upper_bound;
+
+    state _state = state::before_static_row;
+    lw_shared_ptr<read_context> _read_context;
+    partition_snapshot_row_cursor _next_row;
+    bool _next_row_in_range = false;
+
+    // True iff current population interval, since the previous clustering row, starts before all clustered rows.
+    // We cannot just look at _lower_bound, because emission of range tombstones changes _lower_bound and
+    // because we mark clustering intervals as continuous when consuming a clustering_row, it would prevent
+    // us from marking the interval as continuous.
+    // Valid when _state == reading_from_underlying.
+    bool _population_range_starts_before_all_rows;
+
+    future<> do_fill_buffer(db::timeout_clock::time_point);
+    void copy_from_cache_to_buffer();
+    future<> process_static_row(db::timeout_clock::time_point);
+    void move_to_end();
+    void move_to_next_range();
+    void move_to_range(query::clustering_row_ranges::const_iterator);
+    void move_to_next_entry();
+    void add_to_buffer(const partition_snapshot_row_cursor&);
+    void add_clustering_row_to_buffer(mutation_fragment&&);
+    void add_to_buffer(range_tombstone&&);
+    void add_to_buffer(mutation_fragment&&);
+    future<> read_from_underlying(db::timeout_clock::time_point);
+    void start_reading_from_underlying();
+    bool after_current_range(position_in_partition_view position);
+    bool can_populate() const;
+    // Marks the range between _last_row (exclusive) and _next_row (exclusive) as continuous,
+    // provided that the underlying reader still matches the latest version of the partition.
+    void maybe_update_continuity();
+    // Tries to ensure that the lower bound of the current population range exists.
+    // Returns false if it failed and range cannot be populated.
+    // Assumes can_populate().
+    bool ensure_population_lower_bound();
+    void maybe_add_to_cache(const mutation_fragment& mf);
+    void maybe_add_to_cache(const clustering_row& cr);
+    void maybe_add_to_cache(const range_tombstone& rt);
+    void maybe_add_to_cache(const static_row& sr);
+    void maybe_set_static_row_continuous();
+    void finish_reader() {
+        push_mutation_fragment(partition_end());
+        _end_of_stream = true;
+        _state = state::end_of_stream;
+    }
+    void touch_partition();
+public:
+    cache_flat_mutation_reader(schema_ptr s,
+                               dht::decorated_key dk,
+                               query::clustering_key_filter_ranges&& crr,
+                               lw_shared_ptr<read_context> ctx,
+                               lw_shared_ptr<partition_snapshot> snp,
+                               row_cache& cache)
+        : flat_mutation_reader::impl(std::move(s))
+        , _snp(std::move(snp))
+        , _position_cmp(*_schema)
+        , _ck_ranges(std::move(crr))
+        , _ck_ranges_curr(_ck_ranges.begin())
+        , _ck_ranges_end(_ck_ranges.end())
+        , _lsa_manager(cache)
+        , _lower_bound(position_in_partition::before_all_clustered_rows())
+        , _upper_bound(position_in_partition_view::before_all_clustered_rows())
+        , _read_context(std::move(ctx))
+        , _next_row(*_schema, *_snp)
+    {
+        clogger.trace("csm {}: table={}.{}", this, _schema->ks_name(), _schema->cf_name());
+        push_mutation_fragment(partition_start(std::move(dk), _snp->partition_tombstone()));
+    }
+    cache_flat_mutation_reader(const cache_flat_mutation_reader&) = delete;
+    cache_flat_mutation_reader(cache_flat_mutation_reader&&) = delete;
+    virtual future<> fill_buffer(db::timeout_clock::time_point timeout) override;
+    virtual ~cache_flat_mutation_reader() {
+        maybe_merge_versions(_snp, _lsa_manager.region(), _lsa_manager.read_section());
+    }
+    virtual void next_partition() override {
+        clear_buffer_to_next_partition();
+        if (is_buffer_empty()) {
+            _end_of_stream = true;
+        }
+    }
+    virtual future<> fast_forward_to(const dht::partition_range&, db::timeout_clock::time_point timeout) override {
+        clear_buffer();
+        _end_of_stream = true;
+        return make_ready_future<>();
+    }
+    virtual future<> fast_forward_to(position_range pr, db::timeout_clock::time_point timeout) override {
+        throw std::bad_function_call();
+    }
+};
+
+inline
+future<> cache_flat_mutation_reader::process_static_row(db::timeout_clock::time_point timeout) {
+    if (_snp->static_row_continuous()) {
+        _read_context->cache().on_row_hit();
+        static_row sr = _lsa_manager.run_in_read_section([this] {
+            return _snp->static_row(_read_context->digest_requested());
+        });
+        if (!sr.empty()) {
+            push_mutation_fragment(mutation_fragment(std::move(sr)));
+        }
+        return make_ready_future<>();
+    } else {
+        _read_context->cache().on_row_miss();
+        return _read_context->get_next_fragment(timeout).then([this] (mutation_fragment_opt&& sr) {
+            if (sr) {
+                assert(sr->is_static_row());
+                maybe_add_to_cache(sr->as_static_row());
+                push_mutation_fragment(std::move(*sr));
+            }
+            maybe_set_static_row_continuous();
+        });
+    }
+}
+
+inline
+void cache_flat_mutation_reader::touch_partition() {
+    if (_snp->at_latest_version()) {
+        rows_entry& last_dummy = *_snp->version()->partition().clustered_rows().rbegin();
+        _snp->tracker()->touch(last_dummy);
+    }
+}
+
+inline
+future<> cache_flat_mutation_reader::fill_buffer(db::timeout_clock::time_point timeout) {
+    if (_state == state::before_static_row) {
+        auto after_static_row = [this, timeout] {
+            if (_ck_ranges_curr == _ck_ranges_end) {
+                touch_partition();
+                finish_reader();
+                return make_ready_future<>();
+            }
+            _state = state::reading_from_cache;
+            _lsa_manager.run_in_read_section([this] {
+                move_to_range(_ck_ranges_curr);
+            });
+            return fill_buffer(timeout);
+        };
+        if (_schema->has_static_columns()) {
+            return process_static_row(timeout).then(std::move(after_static_row));
+        } else {
+            return after_static_row();
+        }
+    }
+    clogger.trace("csm {}: fill_buffer(), range={}, lb={}", this, *_ck_ranges_curr, _lower_bound);
+    return do_until([this] { return _end_of_stream || is_buffer_full(); }, [this, timeout] {
+        return do_fill_buffer(timeout);
+    });
+}
+
+inline
+future<> cache_flat_mutation_reader::do_fill_buffer(db::timeout_clock::time_point timeout) {
+    if (_state == state::move_to_underlying) {
+        _state = state::reading_from_underlying;
+        _population_range_starts_before_all_rows = _lower_bound.is_before_all_clustered_rows(*_schema);
+        auto end = _next_row_in_range ? position_in_partition(_next_row.position())
+                                      : position_in_partition(_upper_bound);
+        return _read_context->fast_forward_to(position_range{_lower_bound, std::move(end)}, timeout).then([this, timeout] {
+            return read_from_underlying(timeout);
+        });
+    }
+    if (_state == state::reading_from_underlying) {
+        return read_from_underlying(timeout);
+    }
+    // assert(_state == state::reading_from_cache)
+    return _lsa_manager.run_in_read_section([this] {
+        auto next_valid = _next_row.iterators_valid();
+        clogger.trace("csm {}: reading_from_cache, range=[{}, {}), next={}, valid={}", this, _lower_bound,
+            _upper_bound, _next_row.position(), next_valid);
+        // We assume that if there was eviction, and thus the range may
+        // no longer be continuous, the cursor was invalidated.
+        if (!next_valid) {
+            auto adjacent = _next_row.advance_to(_lower_bound);
+            _next_row_in_range = !after_current_range(_next_row.position());
+            if (!adjacent && !_next_row.continuous()) {
+                _last_row = nullptr; // We could insert a dummy here, but this path is unlikely.
+                start_reading_from_underlying();
+                return make_ready_future<>();
+            }
+        }
+        _next_row.maybe_refresh();
+        clogger.trace("csm {}: next={}, cont={}", this, _next_row.position(), _next_row.continuous());
+        while (!is_buffer_full() && _state == state::reading_from_cache) {
+            copy_from_cache_to_buffer();
+            if (need_preempt()) {
+                break;
+            }
+        }
+        return make_ready_future<>();
+    });
+}
+
+inline
+future<> cache_flat_mutation_reader::read_from_underlying(db::timeout_clock::time_point timeout) {
+    return consume_mutation_fragments_until(_read_context->underlying().underlying(),
+        [this] { return _state != state::reading_from_underlying || is_buffer_full(); },
+        [this] (mutation_fragment mf) {
+            _read_context->cache().on_row_miss();
+            maybe_add_to_cache(mf);
+            add_to_buffer(std::move(mf));
+        },
+        [this] {
+            _state = state::reading_from_cache;
+            _lsa_manager.run_in_update_section([this] {
+                auto same_pos = _next_row.maybe_refresh();
+                if (!same_pos) {
+                    _read_context->cache().on_mispopulate(); // FIXME: Insert dummy entry at _upper_bound.
+                    _next_row_in_range = !after_current_range(_next_row.position());
+                    if (!_next_row.continuous()) {
+                        start_reading_from_underlying();
+                    }
+                    return;
+                }
+                if (_next_row_in_range) {
+                    maybe_update_continuity();
+                    _last_row = _next_row;
+                    add_to_buffer(_next_row);
+                    try {
+                        move_to_next_entry();
+                    } catch (const std::bad_alloc&) {
+                        // We cannot reenter the section, since we may have moved to the new range, and
+                        // because add_to_buffer() should not be repeated.
+                        _snp->region().allocator().invalidate_references(); // Invalidates _next_row
+                    }
+                } else {
+                    if (no_clustering_row_between(*_schema, _upper_bound, _next_row.position())) {
+                        this->maybe_update_continuity();
+                    } else if (can_populate()) {
+                        rows_entry::compare less(*_schema);
+                        auto& rows = _snp->version()->partition().clustered_rows();
+                        if (query::is_single_row(*_schema, *_ck_ranges_curr)) {
+                            with_allocator(_snp->region().allocator(), [&] {
+                                auto e = alloc_strategy_unique_ptr<rows_entry>(
+                                    current_allocator().construct<rows_entry>(_ck_ranges_curr->start()->value()));
+                                // Use _next_row iterator only as a hint, because there could be insertions after _upper_bound.
+                                auto insert_result = rows.insert_check(_next_row.get_iterator_in_latest_version(), *e, less);
+                                auto inserted = insert_result.second;
+                                auto it = insert_result.first;
+                                if (inserted) {
+                                    _snp->tracker()->insert(*e);
+                                    e.release();
+                                    auto next = std::next(it);
+                                    it->set_continuous(next->continuous());
+                                    clogger.trace("csm {}: inserted dummy at {}, cont={}", this, it->position(), it->continuous());
+                                }
+                            });
+                        } else if (ensure_population_lower_bound()) {
+                            with_allocator(_snp->region().allocator(), [&] {
+                                auto e = alloc_strategy_unique_ptr<rows_entry>(
+                                    current_allocator().construct<rows_entry>(*_schema, _upper_bound, is_dummy::yes, is_continuous::yes));
+                                // Use _next_row iterator only as a hint, because there could be insertions after _upper_bound.
+                                auto insert_result = rows.insert_check(_next_row.get_iterator_in_latest_version(), *e, less);
+                                auto inserted = insert_result.second;
+                                if (inserted) {
+                                    clogger.trace("csm {}: inserted dummy at {}", this, _upper_bound);
+                                    _snp->tracker()->insert(*e);
+                                    e.release();
+                                } else {
+                                    clogger.trace("csm {}: mark {} as continuous", this, insert_result.first->position());
+                                    insert_result.first->set_continuous(true);
+                                }
+                            });
+                        }
+                    } else {
+                        _read_context->cache().on_mispopulate();
+                    }
+                    try {
+                        move_to_next_range();
+                    } catch (const std::bad_alloc&) {
+                        // We cannot reenter the section, since we may have moved to the new range
+                        _snp->region().allocator().invalidate_references(); // Invalidates _next_row
+                    }
+                }
+            });
+            return make_ready_future<>();
+        });
+}
+
+inline
+bool cache_flat_mutation_reader::ensure_population_lower_bound() {
+    if (_population_range_starts_before_all_rows) {
+        return true;
+    }
+    if (!_last_row.refresh(*_snp)) {
+        return false;
+    }
+    // Continuity flag we will later set for the upper bound extends to the previous row in the same version,
+    // so we need to ensure we have an entry in the latest version.
+    if (!_last_row.is_in_latest_version()) {
+        with_allocator(_snp->region().allocator(), [&] {
+            auto& rows = _snp->version()->partition().clustered_rows();
+            rows_entry::compare less(*_schema);
+            // FIXME: Avoid the copy by inserting an incomplete clustering row
+            auto e = alloc_strategy_unique_ptr<rows_entry>(
+                current_allocator().construct<rows_entry>(*_last_row));
+            e->set_continuous(false);
+            auto insert_result = rows.insert_check(rows.end(), *e, less);
+            auto inserted = insert_result.second;
+            if (inserted) {
+                clogger.trace("csm {}: inserted lower bound dummy at {}", this, e->position());
+                _snp->tracker()->insert(*e);
+                e.release();
+            }
+        });
+    }
+    return true;
+}
+
+inline
+void cache_flat_mutation_reader::maybe_update_continuity() {
+    if (can_populate() && ensure_population_lower_bound()) {
+        with_allocator(_snp->region().allocator(), [&] {
+            rows_entry& e = _next_row.ensure_entry_in_latest().row;
+            e.set_continuous(true);
+        });
+    } else {
+        _read_context->cache().on_mispopulate();
+    }
+}
+
+inline
+void cache_flat_mutation_reader::maybe_add_to_cache(const mutation_fragment& mf) {
+    if (mf.is_range_tombstone()) {
+        maybe_add_to_cache(mf.as_range_tombstone());
+    } else {
+        assert(mf.is_clustering_row());
+        const clustering_row& cr = mf.as_clustering_row();
+        maybe_add_to_cache(cr);
+    }
+}
+
+inline
+void cache_flat_mutation_reader::maybe_add_to_cache(const clustering_row& cr) {
+    if (!can_populate()) {
+        _last_row = nullptr;
+        _population_range_starts_before_all_rows = false;
+        _read_context->cache().on_mispopulate();
+        return;
+    }
+    clogger.trace("csm {}: populate({})", this, cr);
+    _lsa_manager.run_in_update_section_with_allocator([this, &cr] {
+        mutation_partition& mp = _snp->version()->partition();
+        rows_entry::compare less(*_schema);
+
+        if (_read_context->digest_requested()) {
+            cr.cells().prepare_hash(*_schema, column_kind::regular_column);
+        }
+        auto new_entry = alloc_strategy_unique_ptr<rows_entry>(
+            current_allocator().construct<rows_entry>(cr.key(), cr.tomb(), cr.marker(), cr.cells()));
+        new_entry->set_continuous(false);
+        auto it = _next_row.iterators_valid() ? _next_row.get_iterator_in_latest_version()
+                                              : mp.clustered_rows().lower_bound(cr.key(), less);
+        auto insert_result = mp.clustered_rows().insert_check(it, *new_entry, less);
+        if (insert_result.second) {
+            _snp->tracker()->insert(*new_entry);
+            new_entry.release();
+        }
+        it = insert_result.first;
+
+        rows_entry& e = *it;
+        if (ensure_population_lower_bound()) {
+            clogger.trace("csm {}: set_continuous({})", this, e.position());
+            e.set_continuous(true);
+        } else {
+            _read_context->cache().on_mispopulate();
+        }
+        with_allocator(standard_allocator(), [&] {
+            _last_row = partition_snapshot_row_weakref(*_snp, it, true);
+        });
+        _population_range_starts_before_all_rows = false;
+    });
+}
+
+inline
+bool cache_flat_mutation_reader::after_current_range(position_in_partition_view p) {
+    return _position_cmp(p, _upper_bound) >= 0;
+}
+
+inline
+void cache_flat_mutation_reader::start_reading_from_underlying() {
+    clogger.trace("csm {}: start_reading_from_underlying(), range=[{}, {})", this, _lower_bound, _next_row_in_range ? _next_row.position() : _upper_bound);
+    _state = state::move_to_underlying;
+    _next_row.touch();
+}
+
+inline
+void cache_flat_mutation_reader::copy_from_cache_to_buffer() {
+    clogger.trace("csm {}: copy_from_cache, next={}, next_row_in_range={}", this, _next_row.position(), _next_row_in_range);
+    _next_row.touch();
+    position_in_partition_view next_lower_bound = _next_row.dummy() ? _next_row.position() : position_in_partition_view::after_key(_next_row.key());
+    for (auto &&rts : _snp->range_tombstones(_lower_bound, _next_row_in_range ? next_lower_bound : _upper_bound)) {
+        // This guarantees that rts starts after any emitted clustering_row
+        // and not before any emitted range tombstone.
+        if (rts.trim_front(*_schema, _lower_bound)) {
+            _lower_bound = position_in_partition(rts.position());
+            if (is_buffer_full()) {
+                return;
+            }
+            push_mutation_fragment(std::move(rts));
+        }
+    }
+    // We add the row to the buffer even when it's full.
+    // This simplifies the code. For more info see #3139.
+    if (_next_row_in_range) {
+        _last_row = _next_row;
+        add_to_buffer(_next_row);
+        move_to_next_entry();
+    } else {
+        move_to_next_range();
+    }
+}
+
+inline
+void cache_flat_mutation_reader::move_to_end() {
+    finish_reader();
+    clogger.trace("csm {}: eos", this);
+}
+
+inline
+void cache_flat_mutation_reader::move_to_next_range() {
+    auto next_it = std::next(_ck_ranges_curr);
+    if (next_it == _ck_ranges_end) {
+        move_to_end();
+        _ck_ranges_curr = next_it;
+    } else {
+        move_to_range(next_it);
+    }
+}
+
+inline
+void cache_flat_mutation_reader::move_to_range(query::clustering_row_ranges::const_iterator next_it) {
+    auto lb = position_in_partition::for_range_start(*next_it);
+    auto ub = position_in_partition_view::for_range_end(*next_it);
+    _last_row = nullptr;
+    _lower_bound = std::move(lb);
+    _upper_bound = std::move(ub);
+    _ck_ranges_curr = next_it;
+    auto adjacent = _next_row.advance_to(_lower_bound);
+    _next_row_in_range = !after_current_range(_next_row.position());
+    clogger.trace("csm {}: move_to_range(), range={}, lb={}, ub={}, next={}", this, *_ck_ranges_curr, _lower_bound, _upper_bound, _next_row.position());
+    if (!adjacent && !_next_row.continuous()) {
+        // FIXME: We don't insert a dummy for singular range to avoid allocating 3 entries
+        // for a hit (before, at and after). If we supported the concept of an incomplete row,
+        // we could insert such a row for the lower bound if it's full instead, for both singular and
+        // non-singular ranges.
+        if (_ck_ranges_curr->start() && !query::is_single_row(*_schema, *_ck_ranges_curr)) {
+            // Insert dummy for lower bound
+            if (can_populate()) {
+                // FIXME: _lower_bound could be adjacent to the previous row, in which case we could skip this
+                clogger.trace("csm {}: insert dummy at {}", this, _lower_bound);
+                auto it = with_allocator(_lsa_manager.region().allocator(), [&] {
+                    auto& rows = _snp->version()->partition().clustered_rows();
+                    auto new_entry = current_allocator().construct<rows_entry>(*_schema, _lower_bound, is_dummy::yes, is_continuous::no);
+                    return rows.insert_before(_next_row.get_iterator_in_latest_version(), *new_entry);
+                });
+                _snp->tracker()->insert(*it);
+                _last_row = partition_snapshot_row_weakref(*_snp, it, true);
+            } else {
+                _read_context->cache().on_mispopulate();
+            }
+        }
+        start_reading_from_underlying();
+    }
+}
+
+// _next_row must be inside the range.
+inline
+void cache_flat_mutation_reader::move_to_next_entry() {
+    clogger.trace("csm {}: move_to_next_entry(), curr={}", this, _next_row.position());
+    if (no_clustering_row_between(*_schema, _next_row.position(), _upper_bound)) {
+        move_to_next_range();
+    } else {
+        if (!_next_row.next()) {
+            move_to_end();
+            return;
+        }
+        _next_row_in_range = !after_current_range(_next_row.position());
+        clogger.trace("csm {}: next={}, cont={}, in_range={}", this, _next_row.position(), _next_row.continuous(), _next_row_in_range);
+        if (!_next_row.continuous()) {
+            start_reading_from_underlying();
+        }
+    }
+}
+
+inline
+void cache_flat_mutation_reader::add_to_buffer(mutation_fragment&& mf) {
+    clogger.trace("csm {}: add_to_buffer({})", this, mf);
+    if (mf.is_clustering_row()) {
+        add_clustering_row_to_buffer(std::move(mf));
+    } else {
+        assert(mf.is_range_tombstone());
+        add_to_buffer(std::move(mf).as_range_tombstone());
+    }
+}
+
+inline
+void cache_flat_mutation_reader::add_to_buffer(const partition_snapshot_row_cursor& row) {
+    if (!row.dummy()) {
+        _read_context->cache().on_row_hit();
+        add_clustering_row_to_buffer(row.row(_read_context->digest_requested()));
+    }
+}
+
+// Maintains the following invariants, also in case of exception:
+//   (1) no fragment with position >= _lower_bound was pushed yet
+//   (2) If _lower_bound > mf.position(), mf was emitted
+inline
+void cache_flat_mutation_reader::add_clustering_row_to_buffer(mutation_fragment&& mf) {
+    clogger.trace("csm {}: add_clustering_row_to_buffer({})", this, mf);
+    auto& row = mf.as_clustering_row();
+    auto new_lower_bound = position_in_partition::after_key(row.key());
+    push_mutation_fragment(std::move(mf));
+    _lower_bound = std::move(new_lower_bound);
+}
+
+inline
+void cache_flat_mutation_reader::add_to_buffer(range_tombstone&& rt) {
+    clogger.trace("csm {}: add_to_buffer({})", this, rt);
+    // This guarantees that rt starts after any emitted clustering_row
+    // and not before any emitted range tombstone.
+    if (!rt.trim_front(*_schema, _lower_bound)) {
+        return;
+    }
+    _lower_bound = position_in_partition(rt.position());
+    push_mutation_fragment(std::move(rt));
+}
+
+inline
+void cache_flat_mutation_reader::maybe_add_to_cache(const range_tombstone& rt) {
+    if (can_populate()) {
+        clogger.trace("csm {}: maybe_add_to_cache({})", this, rt);
+        _lsa_manager.run_in_update_section_with_allocator([&] {
+            _snp->version()->partition().row_tombstones().apply_monotonically(*_schema, rt);
+        });
+    } else {
+        _read_context->cache().on_mispopulate();
+    }
+}
+
+inline
+void cache_flat_mutation_reader::maybe_add_to_cache(const static_row& sr) {
+    if (can_populate()) {
+        clogger.trace("csm {}: populate({})", this, sr);
+        _read_context->cache().on_static_row_insert();
+        _lsa_manager.run_in_update_section_with_allocator([&] {
+            if (_read_context->digest_requested()) {
+                sr.cells().prepare_hash(*_schema, column_kind::static_column);
+            }
+            _snp->version()->partition().static_row().apply(*_schema, column_kind::static_column, sr.cells());
+        });
+    } else {
+        _read_context->cache().on_mispopulate();
+    }
+}
+
+inline
+void cache_flat_mutation_reader::maybe_set_static_row_continuous() {
+    if (can_populate()) {
+        clogger.trace("csm {}: set static row continuous", this);
+        _snp->version()->partition().set_static_row_continuous(true);
+    } else {
+        _read_context->cache().on_mispopulate();
+    }
+}
+
+inline
+bool cache_flat_mutation_reader::can_populate() const {
+    return _snp->at_latest_version() && _read_context->cache().phase_of(_read_context->key()) == _read_context->phase();
+}
+
+} // namespace cache
+
+inline flat_mutation_reader make_cache_flat_mutation_reader(schema_ptr s,
+                                                            dht::decorated_key dk,
+                                                            query::clustering_key_filter_ranges crr,
+                                                            row_cache& cache,
+                                                            lw_shared_ptr<cache::read_context> ctx,
+                                                            lw_shared_ptr<partition_snapshot> snp)
+{
+    return make_flat_mutation_reader<cache::cache_flat_mutation_reader>(
+        std::move(s), std::move(dk), std::move(crr), std::move(ctx), std::move(snp), cache);
+}
--- a/cache_streamed_mutation.hh
+++ b/cache_streamed_mutation.hh
@@ -1,538 +0,0 @@
-/*
- * Copyright (C) 2017 ScyllaDB
- */
-
-/*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#pragma once
-
-#include <vector>
-#include "row_cache.hh"
-#include "mutation_reader.hh"
-#include "streamed_mutation.hh"
-#include "partition_version.hh"
-#include "utils/logalloc.hh"
-#include "query-request.hh"
-#include "partition_snapshot_reader.hh"
-#include "partition_snapshot_row_cursor.hh"
-#include "read_context.hh"
-
-namespace cache {
-
-class lsa_manager {
-    row_cache& _cache;
-public:
-    lsa_manager(row_cache& cache) : _cache(cache) { }
-    template<typename Func>
-    decltype(auto) run_in_read_section(const Func& func) {
-        return _cache._read_section(_cache._tracker.region(), [&func] () {
-            return with_linearized_managed_bytes([&func] () {
-                return func();
-            });
-        });
-    }
-    template<typename Func>
-    decltype(auto) run_in_update_section(const Func& func) {
-        return _cache._update_section(_cache._tracker.region(), [&func] () {
-            return with_linearized_managed_bytes([&func] () {
-                return func();
-            });
-        });
-    }
-    template<typename Func>
-    void run_in_update_section_with_allocator(Func&& func) {
-        return _cache._update_section(_cache._tracker.region(), [this, &func] () {
-            return with_linearized_managed_bytes([this, &func] () {
-                return with_allocator(_cache._tracker.region().allocator(), [this, &func] () mutable {
-                    return func();
-                });
-            });
-        });
-    }
-    logalloc::region& region() { return _cache._tracker.region(); }
-    logalloc::allocating_section& read_section() { return _cache._read_section; }
-};
-
-class cache_streamed_mutation final : public streamed_mutation::impl {
-    enum class state {
-        before_static_row,
-
-        // Invariants:
-        //  - position_range(_lower_bound, _upper_bound) covers all not yet emitted positions from current range
-        //  - _next_row points to the nearest row in cache >= _lower_bound
-        //  - _next_row_in_range = _next.position() < _upper_bound
-        reading_from_cache,
-
-        // Starts reading from underlying reader.
-        // The range to read is position_range(_lower_bound, min(_next_row.position(), _upper_bound)).
-        // Invariants:
-        //  - _next_row_in_range = _next.position() < _upper_bound
-        move_to_underlying,
-
-        // Invariants:
-        // - Upper bound of the read is min(_next_row.position(), _upper_bound)
-        // - _next_row_in_range = _next.position() < _upper_bound
-        // - _last_row_key contains the key of last emitted clustering_row
-        reading_from_underlying,
-
-        end_of_stream
-    };
-    lw_shared_ptr<partition_snapshot> _snp;
-    position_in_partition::tri_compare _position_cmp;
-
-    query::clustering_key_filter_ranges _ck_ranges;
-    query::clustering_row_ranges::const_iterator _ck_ranges_curr;
-    query::clustering_row_ranges::const_iterator _ck_ranges_end;
-
-    lsa_manager _lsa_manager;
-
-    stdx::optional<clustering_key> _last_row_key;
-
-    // We need to be prepared that we may get overlapping and out of order
-    // range tombstones. We must emit fragments with strictly monotonic positions,
-    // so we can't just trim such tombstones to the position of the last fragment.
-    // To solve that, range tombstones are accumulated first in a range_tombstone_stream
-    // and emitted once we have a fragment with a larger position.
-    range_tombstone_stream _tombstones;
-
-    // Holds the lower bound of a position range which hasn't been processed yet.
-    // Only fragments with positions < _lower_bound have been emitted.
-    position_in_partition _lower_bound;
-    position_in_partition_view _upper_bound;
-
-    state _state = state::before_static_row;
-    lw_shared_ptr<read_context> _read_context;
-    partition_snapshot_row_cursor _next_row;
-    bool _next_row_in_range = false;
-
-    future<> do_fill_buffer();
-    void copy_from_cache_to_buffer();
-    future<> process_static_row();
-    void move_to_end();
-    void move_to_next_range();
-    void move_to_current_range();
-    void move_to_next_entry();
-    // Emits all delayed range tombstones with positions smaller than upper_bound.
-    void drain_tombstones(position_in_partition_view upper_bound);
-    // Emits all delayed range tombstones.
-    void drain_tombstones();
-    void add_to_buffer(const partition_snapshot_row_cursor&);
-    void add_clustering_row_to_buffer(mutation_fragment&&);
-    void add_to_buffer(range_tombstone&&);
-    void add_to_buffer(mutation_fragment&&);
-    future<> read_from_underlying();
-    future<> start_reading_from_underlying();
-    bool after_current_range(position_in_partition_view position);
-    bool can_populate() const;
-    void maybe_update_continuity();
-    void maybe_add_to_cache(const mutation_fragment& mf);
-    void maybe_add_to_cache(const clustering_row& cr);
-    void maybe_add_to_cache(const range_tombstone& rt);
-    void maybe_add_to_cache(const static_row& sr);
-    void maybe_set_static_row_continuous();
-public:
-    cache_streamed_mutation(schema_ptr s,
-                            dht::decorated_key dk,
-                            query::clustering_key_filter_ranges&& crr,
-                            lw_shared_ptr<read_context> ctx,
-                            lw_shared_ptr<partition_snapshot> snp,
-                            row_cache& cache)
-        : streamed_mutation::impl(std::move(s), dk, snp->partition_tombstone())
-        , _snp(std::move(snp))
-        , _position_cmp(*_schema)
-        , _ck_ranges(std::move(crr))
-        , _ck_ranges_curr(_ck_ranges.begin())
-        , _ck_ranges_end(_ck_ranges.end())
-        , _lsa_manager(cache)
-        , _tombstones(*_schema)
-        , _lower_bound(position_in_partition::before_all_clustered_rows())
-        , _upper_bound(position_in_partition_view::before_all_clustered_rows())
-        , _read_context(std::move(ctx))
-        , _next_row(*_schema, cache._tracker.region(), *_snp)
-    { }
-    cache_streamed_mutation(const cache_streamed_mutation&) = delete;
-    cache_streamed_mutation(cache_streamed_mutation&&) = delete;
-    virtual future<> fill_buffer() override;
-    virtual ~cache_streamed_mutation() {
-        maybe_merge_versions(_snp, _lsa_manager.region(), _lsa_manager.read_section());
-    }
-};
-
-inline
-future<> cache_streamed_mutation::process_static_row() {
-    if (_snp->version()->partition().static_row_continuous()) {
-        _read_context->cache().on_row_hit();
-        row sr = _lsa_manager.run_in_read_section([this] {
-            return _snp->static_row();
-        });
-        if (!sr.empty()) {
-            push_mutation_fragment(mutation_fragment(static_row(std::move(sr))));
-        }
-        return make_ready_future<>();
-    } else {
-        _read_context->cache().on_row_miss();
-        return _read_context->get_next_fragment().then([this] (mutation_fragment_opt&& sr) {
-            if (sr) {
-                assert(sr->is_static_row());
-                maybe_add_to_cache(sr->as_static_row());
-                push_mutation_fragment(std::move(*sr));
-            }
-            maybe_set_static_row_continuous();
-        });
-    }
-}
-
-inline
-future<> cache_streamed_mutation::fill_buffer() {
-    if (_state == state::before_static_row) {
-        auto after_static_row = [this] {
-            if (_ck_ranges_curr == _ck_ranges_end) {
-                _end_of_stream = true;
-                _state = state::end_of_stream;
-                return make_ready_future<>();
-            }
-            _state = state::reading_from_cache;
-            _lsa_manager.run_in_read_section([this] {
-                move_to_current_range();
-            });
-            return fill_buffer();
-        };
-        if (_schema->has_static_columns()) {
-            return process_static_row().then(std::move(after_static_row));
-        } else {
-            return after_static_row();
-        }
-    }
-    return do_until([this] { return _end_of_stream || is_buffer_full(); }, [this] {
-        return do_fill_buffer();
-    });
-}
-
-inline
-future<> cache_streamed_mutation::do_fill_buffer() {
-    if (_state == state::move_to_underlying) {
-        _state = state::reading_from_underlying;
-        auto end = _next_row_in_range ? position_in_partition(_next_row.position())
-                                      : position_in_partition(_upper_bound);
-        return _read_context->fast_forward_to(position_range{_lower_bound, std::move(end)}).then([this] {
-            return read_from_underlying();
-        });
-    }
-    if (_state == state::reading_from_underlying) {
-        return read_from_underlying();
-    }
-    // assert(_state == state::reading_from_cache)
-    return _lsa_manager.run_in_read_section([this] {
-        auto same_pos = _next_row.maybe_refresh();
-        // FIXME: If continuity changed anywhere between _lower_bound and _next_row.position()
-        // we need to redo the lookup with _lower_bound. There is no eviction yet, so not yet a problem.
-        assert(same_pos);
-        while (!is_buffer_full() && _state == state::reading_from_cache) {
-            copy_from_cache_to_buffer();
-            if (need_preempt()) {
-                break;
-            }
-        }
-        return make_ready_future<>();
-    });
-}
-
-inline
-future<> cache_streamed_mutation::read_from_underlying() {
-    return consume_mutation_fragments_until(_read_context->get_streamed_mutation(),
-        [this] { return _state != state::reading_from_underlying || is_buffer_full(); },
-        [this] (mutation_fragment mf) {
-            _read_context->cache().on_row_miss();
-            maybe_add_to_cache(mf);
-            add_to_buffer(std::move(mf));
-        },
-        [this] {
-            _state = state::reading_from_cache;
-            _lsa_manager.run_in_update_section([this] {
-                auto same_pos = _next_row.maybe_refresh();
-                assert(same_pos); // FIXME: handle eviction
-                if (_next_row_in_range) {
-                    maybe_update_continuity();
-                    add_to_buffer(_next_row);
-                    move_to_next_entry();
-                } else {
-                    if (no_clustering_row_between(*_schema, _upper_bound, _next_row.position())) {
-                        this->maybe_update_continuity();
-                    } else {
-                        // FIXME: Insert dummy entry at _upper_bound.
-                        _read_context->cache().on_mispopulate();
-                    }
-                    move_to_next_range();
-                }
-            });
-            return make_ready_future<>();
-        });
-}
-
-inline
-void cache_streamed_mutation::maybe_update_continuity() {
-    if (can_populate() && _next_row.is_in_latest_version()) {
-        if (_last_row_key) {
-            if (_next_row.previous_row_in_latest_version_has_key(*_last_row_key)) {
-                _next_row.set_continuous(true);
-            }
-        } else if (!_ck_ranges_curr->start()) {
-            _next_row.set_continuous(true);
-        }
-    } else {
-        _read_context->cache().on_mispopulate();
-    }
-}
-
-inline
-void cache_streamed_mutation::maybe_add_to_cache(const mutation_fragment& mf) {
-    if (mf.is_range_tombstone()) {
-        maybe_add_to_cache(mf.as_range_tombstone());
-    } else {
-        assert(mf.is_clustering_row());
-        const clustering_row& cr = mf.as_clustering_row();
-        maybe_add_to_cache(cr);
-    }
-}
-
-inline
-void cache_streamed_mutation::maybe_add_to_cache(const clustering_row& cr) {
-    if (!can_populate()) {
-        _read_context->cache().on_mispopulate();
-        return;
-    }
-    _lsa_manager.run_in_update_section_with_allocator([this, &cr] {
-        mutation_partition& mp = _snp->version()->partition();
-        rows_entry::compare less(*_schema);
-
-        // FIXME: If _next_row is up to date, but latest version doesn't have iterator in
-        // current row (could be far away, so we'd do this often), then this will do
-        // the lookup in mp. This is not necessary, because _next_row has iterators for
-        // next rows in each version, even if they're not part of the current row.
-        // They're currently buried in the heap, but you could keep a vector of
-        // iterators per each version in addition to the heap.
-        auto new_entry = alloc_strategy_unique_ptr<rows_entry>(
-            current_allocator().construct<rows_entry>(cr.key(), cr.tomb(), cr.marker(), cr.cells()));
-        new_entry->set_continuous(false);
-        auto it = _next_row.has_valid_row_from_latest_version()
-                  ? _next_row.get_iterator_in_latest_version() : mp.clustered_rows().lower_bound(cr.key(), less);
-        auto insert_result = mp.clustered_rows().insert_check(it, *new_entry, less);
-        if (insert_result.second) {
-            _read_context->cache().on_row_insert();
-            new_entry.release();
-        }
-        it = insert_result.first;
-
-        rows_entry& e = *it;
-        if (_last_row_key) {
-            if (it == mp.clustered_rows().begin()) {
-                // FIXME: check whether entry for _last_row_key is in older versions and if so set
-                // continuity to true.
-                _read_context->cache().on_mispopulate();
-            } else {
-                auto prev_it = it;
-                --prev_it;
-                clustering_key_prefix::equality eq(*_schema);
-                if (eq(*_last_row_key, prev_it->key())) {
-                    e.set_continuous(true);
-                }
-            }
-        } else if (!_ck_ranges_curr->start()) {
-            e.set_continuous(true);
-        } else {
-            // FIXME: Insert dummy entry at _ck_ranges_curr->start()
-            _read_context->cache().on_mispopulate();
-        }
-    });
-}
-
-inline
-bool cache_streamed_mutation::after_current_range(position_in_partition_view p) {
-    return _position_cmp(p, _upper_bound) >= 0;
-}
-
-inline
-future<> cache_streamed_mutation::start_reading_from_underlying() {
-    _state = state::move_to_underlying;
-    return make_ready_future<>();
-}
-
-inline
-void cache_streamed_mutation::copy_from_cache_to_buffer() {
-    position_in_partition_view next_lower_bound = _next_row.dummy() ? _next_row.position() : position_in_partition_view::after_key(_next_row.key());
-    for (auto&& rts : _snp->range_tombstones(*_schema, _lower_bound, _next_row_in_range ? next_lower_bound : _upper_bound)) {
-        add_to_buffer(std::move(rts));
-        if (is_buffer_full()) {
-            return;
-        }
-    }
-    if (_next_row_in_range) {
-        add_to_buffer(_next_row);
-        move_to_next_entry();
-    } else {
-        move_to_next_range();
-    }
-}
-
-inline
-void cache_streamed_mutation::move_to_end() {
-    drain_tombstones();
-    _end_of_stream = true;
-    _state = state::end_of_stream;
-}
-
-inline
-void cache_streamed_mutation::move_to_next_range() {
-    ++_ck_ranges_curr;
-    if (_ck_ranges_curr == _ck_ranges_end) {
-        move_to_end();
-    } else {
-        move_to_current_range();
-    }
-}
-
-inline
-void cache_streamed_mutation::move_to_current_range() {
-    _last_row_key = std::experimental::nullopt;
-    _lower_bound = position_in_partition::for_range_start(*_ck_ranges_curr);
-    _upper_bound = position_in_partition_view::for_range_end(*_ck_ranges_curr);
-    auto complete_until_next = _next_row.advance_to(_lower_bound) || _next_row.continuous();
-    _next_row_in_range = !after_current_range(_next_row.position());
-    if (!complete_until_next) {
-        start_reading_from_underlying();
-    }
-}
-
-// _next_row must be inside the range.
-inline
-void cache_streamed_mutation::move_to_next_entry() {
-    if (no_clustering_row_between(*_schema, _next_row.position(), _upper_bound)) {
-        move_to_next_range();
-    } else {
-        if (!_next_row.next()) {
-            move_to_end();
-            return;
-        }
-        _next_row_in_range = !after_current_range(_next_row.position());
-        if (!_next_row.continuous()) {
-            start_reading_from_underlying();
-        }
-    }
-}
-
-inline
-void cache_streamed_mutation::drain_tombstones(position_in_partition_view pos) {
-    while (auto mfo = _tombstones.get_next(pos)) {
-        push_mutation_fragment(std::move(*mfo));
-    }
-}
-
-inline
-void cache_streamed_mutation::drain_tombstones() {
-    while (auto mfo = _tombstones.get_next()) {
-        push_mutation_fragment(std::move(*mfo));
-    }
-}
-
-inline
-void cache_streamed_mutation::add_to_buffer(mutation_fragment&& mf) {
-    if (mf.is_clustering_row()) {
-        add_clustering_row_to_buffer(std::move(mf));
-    } else {
-        assert(mf.is_range_tombstone());
-        add_to_buffer(std::move(mf).as_range_tombstone());
-    }
-}
-
-inline
-void cache_streamed_mutation::add_to_buffer(const partition_snapshot_row_cursor& row) {
-    if (!row.dummy()) {
-        _read_context->cache().on_row_hit();
-        add_clustering_row_to_buffer(row.row());
-    }
-}
-
-inline
-void cache_streamed_mutation::add_clustering_row_to_buffer(mutation_fragment&& mf) {
-    auto& row = mf.as_clustering_row();
-    drain_tombstones(row.position());
-    _last_row_key = row.key();
-    _lower_bound = position_in_partition::after_key(row.key());
-    push_mutation_fragment(std::move(mf));
-}
-
-inline
-void cache_streamed_mutation::add_to_buffer(range_tombstone&& rt) {
-    // This guarantees that rt starts after any emitted clustering_row
-    if (!rt.trim_front(*_schema, _lower_bound)) {
-        return;
-    }
-    _lower_bound = position_in_partition(rt.position());
-    _tombstones.apply(std::move(rt));
-    drain_tombstones(_lower_bound);
-}
-
-inline
-void cache_streamed_mutation::maybe_add_to_cache(const range_tombstone& rt) {
-    if (can_populate()) {
-        _lsa_manager.run_in_update_section_with_allocator([&] {
-            _snp->version()->partition().row_tombstones().apply_monotonically(*_schema, rt);
-        });
-    } else {
-        _read_context->cache().on_mispopulate();
-    }
-}
-
-inline
-void cache_streamed_mutation::maybe_add_to_cache(const static_row& sr) {
-    if (can_populate()) {
-        _read_context->cache().on_row_insert();
-        _lsa_manager.run_in_update_section_with_allocator([&] {
-            _snp->version()->partition().static_row().apply(*_schema, column_kind::static_column, sr.cells());
-        });
-    } else {
-        _read_context->cache().on_mispopulate();
-    }
-}
-
-inline
-void cache_streamed_mutation::maybe_set_static_row_continuous() {
-    if (can_populate()) {
-        _snp->version()->partition().set_static_row_continuous(true);
-    } else {
-        _read_context->cache().on_mispopulate();
-    }
-}
-
-inline
-bool cache_streamed_mutation::can_populate() const {
-    return _snp->at_latest_version() && _read_context->cache().phase_of(_read_context->key()) == _read_context->phase();
-}
-
-} // namespace cache
-
-inline streamed_mutation make_cache_streamed_mutation(schema_ptr s,
-                                                      dht::decorated_key dk,
-                                                      query::clustering_key_filter_ranges crr,
-                                                      row_cache& cache,
-                                                      lw_shared_ptr<cache::read_context> ctx,
-                                                      lw_shared_ptr<partition_snapshot> snp)
-{
-    return make_streamed_mutation<cache::cache_streamed_mutation>(
-        std::move(s), std::move(dk), std::move(crr), std::move(ctx), std::move(snp), cache);
-}
--- a/canonical_mutation.cc
+++ b/canonical_mutation.cc
@@ -75,7 +75,7 @@ mutation canonical_mutation::to_mutation(schema_ptr s) const {
    auto version = mv.schema_version();
    auto pk = mv.key();

-    mutation m(std::move(pk), std::move(s));
+    mutation m(std::move(s), std::move(pk));

    if (version == m.schema()->version()) {
        auto partition_view = mutation_partition_view::from_view(mv.partition());
--- a/cell_locking.hh
+++ b/cell_locking.hh
@@ -39,9 +39,11 @@ using small_vector = std::vector<T>;
 #endif

 #include "fnv1a_hasher.hh"
-#include "streamed_mutation.hh"
+#include "mutation_fragment.hh"
 #include "mutation_partition.hh"

+#include "db/timeout_clock.hh"
+
 class cells_range {
    using ids_vector_type = small_vector<column_id, 5>;

@@ -142,11 +144,7 @@ struct cell_locker_stats {
 };

 class cell_locker {
-public:
-    using timeout_clock = lowres_clock;
 private:
-    using semaphore_type = basic_semaphore<default_timeout_exception_factory, timeout_clock>;
-
    class partition_entry;

    struct cell_address {
@@ -158,7 +156,7 @@ private:
                       public enable_lw_shared_from_this<cell_entry> {
        partition_entry& _parent;
        cell_address _address;
-        semaphore_type _semaphore { 0 };
+        db::timeout_semaphore _semaphore { 0 };

        friend class cell_locker;
    public:
@@ -187,7 +185,7 @@ private:
            return _address.position;
        }

-        future<> lock(timeout_clock::time_point _timeout) {
+        future<> lock(db::timeout_clock::time_point _timeout) {
            return _semaphore.wait(_timeout);
        }
        void unlock() {
@@ -387,7 +385,7 @@ public:

    // partition_cells_range is required to be in cell_locker::schema()
    future<std::vector<locked_cell>> lock_cells(const dht::decorated_key& dk, partition_cells_range&& range,
-                                                timeout_clock::time_point timeout);
+                                                db::timeout_clock::time_point timeout);
 };


@@ -416,7 +414,7 @@ struct cell_locker::locker {
    partition_cells_range::iterator _current_ck;
    cells_range::const_iterator _current_cell;

-    timeout_clock::time_point _timeout;
+    db::timeout_clock::time_point _timeout;
    std::vector<locked_cell> _locks;
    cell_locker_stats& _stats;
 private:
@@ -430,7 +428,7 @@ private:

    bool is_done() const { return _current_ck == _range.end(); }
 public:
-    explicit locker(const ::schema& s, cell_locker_stats& st, partition_entry& pe, partition_cells_range&& range, timeout_clock::time_point timeout)
+    explicit locker(const ::schema& s, cell_locker_stats& st, partition_entry& pe, partition_cells_range&& range, db::timeout_clock::time_point timeout)
        : _hasher(s)
        , _eq_cmp(s)
        , _partition_entry(pe)
@@ -458,7 +456,7 @@ public:
 };

 inline
-future<std::vector<locked_cell>> cell_locker::lock_cells(const dht::decorated_key& dk, partition_cells_range&& range, timeout_clock::time_point timeout) {
+future<std::vector<locked_cell>> cell_locker::lock_cells(const dht::decorated_key& dk, partition_cells_range&& range, db::timeout_clock::time_point timeout) {
    partition_entry::hasher pe_hash;
    partition_entry::equal_compare pe_eq(*_schema);

--- a/checked-file-impl.hh
+++ b/checked-file-impl.hh
@@ -130,7 +130,7 @@ inline file make_checked_file(const io_error_handler& error_handler, file f)
 future<file>
 inline open_checked_file_dma(const io_error_handler& error_handler,
                             sstring name, open_flags flags,
-                             file_open_options options)
+                             file_open_options options = {})
 {
    return do_io_check(error_handler, [&] {
        return open_file_dma(name, flags, options).then([&] (file f) {
@@ -139,17 +139,6 @@ inline open_checked_file_dma(const io_error_handler& error_handler,
    });
 }

-future<file>
-inline open_checked_file_dma(const io_error_handler& error_handler,
-                             sstring name, open_flags flags)
-{
-    return do_io_check(error_handler, [&] {
-        return open_file_dma(name, flags).then([&] (file f) {
-            return make_ready_future<file>(make_checked_file(error_handler, f));
-        });
-    });
-}
-
 future<file>
 inline open_checked_directory(const io_error_handler& error_handler,
                              sstring name)
--- a/clustering_bounds_comparator.hh
+++ b/clustering_bounds_comparator.hh
@@ -42,17 +42,6 @@ std::ostream& operator<<(std::ostream& out, const bound_kind k);
 bound_kind invert_kind(bound_kind k);
 int32_t weight(bound_kind k);

-static inline bound_kind flip_bound_kind(bound_kind bk)
-{
-    switch (bk) {
-    case bound_kind::excl_end: return bound_kind::excl_start;
-    case bound_kind::incl_end: return bound_kind::incl_start;
-    case bound_kind::excl_start: return bound_kind::excl_end;
-    case bound_kind::incl_start: return bound_kind::incl_end;
-    }
-    abort();
-}
-
 class bound_view {
 public:
    const static thread_local clustering_key empty_prefix;
--- a/clustering_ranges_walker.hh
+++ b/clustering_ranges_walker.hh
@@ -25,7 +25,7 @@

 #include "schema.hh"
 #include "query-request.hh"
-#include "streamed_mutation.hh"
+#include "mutation_fragment.hh"

 // Utility for in-order checking of overlap with position ranges.
 class clustering_ranges_walker {
@@ -169,14 +169,14 @@ public:
    bool contains_tombstone(position_in_partition_view start, position_in_partition_view end) const {
        position_in_partition::less_compare less(_schema);

-        if (_trim && less(end, *_trim)) {
+        if (_trim && !less(*_trim, end)) {
            return false;
        }

        auto i = _current;
        while (i != _end) {
            auto range_start = position_in_partition_view::for_range_start(*i);
-            if (less(end, range_start)) {
+            if (!less(range_start, end)) {
                return false;
            }
            auto range_end = position_in_partition_view::for_range_end(*i);
--- a/coding-style.md
+++ b/coding-style.md
@@ -0,0 +1,3 @@
+# Scylla Coding Style
+
+Please see the [Seastar style document](https://github.com/scylladb/seastar/blob/master/coding-style.md).
--- a/compaction_strategy.hh
+++ b/compaction_strategy.hh
@@ -21,6 +21,10 @@

 #pragma once

+#include "sstables/shared_sstable.hh"
+#include "exceptions/exceptions.hh"
+#include "sstables/compaction_backlog_manager.hh"
+
 class column_family;
 class schema;
 using schema_ptr = lw_shared_ptr<const schema>;
@@ -33,6 +37,7 @@ enum class compaction_strategy_type {
    size_tiered,
    leveled,
    date_tiered,
+    time_window,
 };

 class compaction_strategy_impl;
@@ -53,13 +58,13 @@ public:
    compaction_strategy& operator=(compaction_strategy&&);

    // Return a list of sstables to be compacted after applying the strategy.
-    compaction_descriptor get_sstables_for_compaction(column_family& cfs, std::vector<lw_shared_ptr<sstable>> candidates);
+    compaction_descriptor get_sstables_for_compaction(column_family& cfs, std::vector<shared_sstable> candidates);

-    std::vector<resharding_descriptor> get_resharding_jobs(column_family& cf, std::vector<lw_shared_ptr<sstable>> candidates);
+    std::vector<resharding_descriptor> get_resharding_jobs(column_family& cf, std::vector<shared_sstable> candidates);

    // Some strategies may look at the compacted and resulting sstables to
    // get some useful information for subsequent compactions.
-    void notify_completion(const std::vector<lw_shared_ptr<sstable>>& removed, const std::vector<lw_shared_ptr<sstable>>& added);
+    void notify_completion(const std::vector<shared_sstable>& removed, const std::vector<shared_sstable>& added);

    // Return if parallel compaction is allowed by strategy.
    bool parallel_compaction() const;
@@ -82,6 +87,8 @@ public:
            return "LeveledCompactionStrategy";
        case compaction_strategy_type::date_tiered:
            return "DateTieredCompactionStrategy";
+        case compaction_strategy_type::time_window:
+            return "TimeWindowCompactionStrategy";
        default:
            throw std::runtime_error("Invalid Compaction Strategy");
        }
@@ -100,6 +107,8 @@ public:
            return compaction_strategy_type::leveled;
        } else if (short_name == "DateTieredCompactionStrategy") {
            return compaction_strategy_type::date_tiered;
+        } else if (short_name == "TimeWindowCompactionStrategy") {
+            return compaction_strategy_type::time_window;
        } else {
            throw exceptions::configuration_exception(sprint("Unable to find compaction strategy class '%s'", name));
        }
@@ -112,6 +121,8 @@ public:
    }

    sstable_set make_sstable_set(schema_ptr schema) const;
+
+    compaction_backlog_tracker& get_backlog_tracker();
 };

 // Creates a compaction_strategy object from one of the strategies available.
--- a/compound.hh
+++ b/compound.hh
@@ -28,6 +28,7 @@
 #include <boost/range/iterator_range.hpp>
 #include <boost/range/adaptor/transformed.hpp>
 #include "utils/serialization.hh"
+#include "util/backtrace.hh"
 #include "unimplemented.hh"

 enum class allow_prefixes { no, yes };
@@ -144,7 +145,7 @@ public:
                }
                len = read_simple<size_type>(_v);
                if (_v.size() < len) {
-                    throw marshal_exception();
+                    throw_with_backtrace<marshal_exception>(sprint("compound_type iterator - not enough bytes, expected %d, got %d", len, _v.size()));
                }
            }
            _current = bytes_view(_v.begin(), len);
--- a/compound_compat.hh
+++ b/compound_compat.hh
@@ -345,7 +345,7 @@ public:
                }
                len = read_simple<size_type>(_v);
                if (_v.size() < len) {
-                    throw marshal_exception();
+                    throw_with_backtrace<marshal_exception>(sprint("composite iterator - not enough bytes, expected %d, got %d", len, _v.size()));
                }
            }
            auto value = bytes_view(_v.begin(), len);
--- a/compress.cc
+++ b/compress.cc
@@ -0,0 +1,345 @@
+/*
+ * Copyright (C) 2016 ScyllaDB
+ */
+
+/*
+ * This file is part of Scylla.
+ *
+ * Scylla is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Scylla is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <lz4.h>
+#include <zlib.h>
+#include <snappy-c.h>
+
+#include "compress.hh"
+#include "utils/class_registrator.hh"
+
+const sstring compressor::namespace_prefix = "org.apache.cassandra.io.compress.";
+
+class lz4_processor: public compressor {
+public:
+    using compressor::compressor;
+
+    size_t uncompress(const char* input, size_t input_len, char* output,
+                    size_t output_len) const override;
+    size_t compress(const char* input, size_t input_len, char* output,
+                    size_t output_len) const override;
+    size_t compress_max_size(size_t input_len) const override;
+};
+
+class snappy_processor: public compressor {
+public:
+    using compressor::compressor;
+
+    size_t uncompress(const char* input, size_t input_len, char* output,
+                    size_t output_len) const override;
+    size_t compress(const char* input, size_t input_len, char* output,
+                    size_t output_len) const override;
+    size_t compress_max_size(size_t input_len) const override;
+};
+
+class deflate_processor: public compressor {
+public:
+    using compressor::compressor;
+
+    size_t uncompress(const char* input, size_t input_len, char* output,
+                    size_t output_len) const override;
+    size_t compress(const char* input, size_t input_len, char* output,
+                    size_t output_len) const override;
+    size_t compress_max_size(size_t input_len) const override;
+};
+
+compressor::compressor(sstring name)
+    : _name(std::move(name))
+{}
+
+std::set<sstring> compressor::option_names() const {
+    return {};
+}
+
+std::map<sstring, sstring> compressor::options() const {
+    return {};
+}
+
+shared_ptr<compressor> compressor::create(const sstring& name, const opt_getter& opts) {
+    if (name.empty()) {
+        return {};
+    }
+
+    qualified_name qn(namespace_prefix, name);
+
+    for (auto& c : { lz4, snappy, deflate }) {
+        if (c->name() == qn) {
+            return c;
+        }
+    }
+
+    return compressor_registry::create(qn, opts);
+}
+
+shared_ptr<compressor> compressor::create(const std::map<sstring, sstring>& options) {
+    auto i = options.find(compression_parameters::SSTABLE_COMPRESSION);
+    if (i != options.end() && !i->second.empty()) {
+        return create(i->second, [&options](const sstring& key) -> opt_string {
+            auto i = options.find(key);
+            if (i == options.end()) {
+                return std::experimental::nullopt;
+            }
+            return { i->second };
+        });
+    }
+    return {};
+}
+
+thread_local const shared_ptr<compressor> compressor::lz4 = make_shared<lz4_processor>(namespace_prefix + "LZ4Compressor");
+thread_local const shared_ptr<compressor> compressor::snappy = make_shared<snappy_processor>(namespace_prefix + "SnappyCompressor");
+thread_local const shared_ptr<compressor> compressor::deflate = make_shared<deflate_processor>(namespace_prefix + "DeflateCompressor");
+
+const sstring compression_parameters::SSTABLE_COMPRESSION = "sstable_compression";
+const sstring compression_parameters::CHUNK_LENGTH_KB = "chunk_length_kb";
+const sstring compression_parameters::CRC_CHECK_CHANCE = "crc_check_chance";
+
+compression_parameters::compression_parameters()
+    : compression_parameters(nullptr)
+{}
+
+compression_parameters::~compression_parameters()
+{}
+
+compression_parameters::compression_parameters(compressor_ptr c)
+    : _compressor(std::move(c))
+{}
+
+compression_parameters::compression_parameters(const std::map<sstring, sstring>& options) {
+    _compressor = compressor::create(options);
+
+    validate_options(options);
+
+    auto chunk_length = options.find(CHUNK_LENGTH_KB);
+    if (chunk_length != options.end()) {
+        try {
+            _chunk_length = std::stoi(chunk_length->second) * 1024;
+        } catch (const std::exception& e) {
+            throw exceptions::syntax_exception(sstring("Invalid integer value ") + chunk_length->second + " for " + CHUNK_LENGTH_KB);
+        }
+    }
+    auto crc_chance = options.find(CRC_CHECK_CHANCE);
+    if (crc_chance != options.end()) {
+        try {
+            _crc_check_chance = std::stod(crc_chance->second);
+        } catch (const std::exception& e) {
+            throw exceptions::syntax_exception(sstring("Invalid double value ") + crc_chance->second + "for " + CRC_CHECK_CHANCE);
+        }
+    }
+}
+
+void compression_parameters::validate() {
+    if (_chunk_length) {
+        auto chunk_length = _chunk_length.value();
+        if (chunk_length <= 0) {
+            throw exceptions::configuration_exception(sstring("Invalid negative or null ") + CHUNK_LENGTH_KB);
+        }
+        // _chunk_length must be a power of two
+        if (chunk_length & (chunk_length - 1)) {
+            throw exceptions::configuration_exception(sstring(CHUNK_LENGTH_KB) + " must be a power of 2.");
+        }
+    }
+    if (_crc_check_chance && (_crc_check_chance.value() < 0.0 || _crc_check_chance.value() > 1.0)) {
+        throw exceptions::configuration_exception(sstring(CRC_CHECK_CHANCE) + " must be between 0.0 and 1.0.");
+    }
+}
+
+std::map<sstring, sstring> compression_parameters::get_options() const {
+    if (!_compressor) {
+        return std::map<sstring, sstring>();
+    }
+    auto opts = _compressor->options();
+
+    opts.emplace(compression_parameters::SSTABLE_COMPRESSION, _compressor->name());
+    if (_chunk_length) {
+        opts.emplace(sstring(CHUNK_LENGTH_KB), std::to_string(_chunk_length.value() / 1024));
+    }
+    if (_crc_check_chance) {
+        opts.emplace(sstring(CRC_CHECK_CHANCE), std::to_string(_crc_check_chance.value()));
+    }
+    return opts;
+}
+
+bool compression_parameters::operator==(const compression_parameters& other) const {
+    return _compressor == other._compressor
+           && _chunk_length == other._chunk_length
+           && _crc_check_chance == other._crc_check_chance;
+}
+
+bool compression_parameters::operator!=(const compression_parameters& other) const {
+    return !(*this == other);
+}
+
+void compression_parameters::validate_options(const std::map<sstring, sstring>& options) {
+    // currently, there are no options specific to a particular compressor
+    static std::set<sstring> keywords({
+        sstring(SSTABLE_COMPRESSION),
+        sstring(CHUNK_LENGTH_KB),
+        sstring(CRC_CHECK_CHANCE),
+    });
+    std::set<sstring> ckw;
+    if (_compressor) {
+        ckw = _compressor->option_names();
+    }
+    for (auto&& opt : options) {
+        if (!keywords.count(opt.first) && !ckw.count(opt.first)) {
+            throw exceptions::configuration_exception(sprint("Unknown compression option '%s'.", opt.first));
+        }
+    }
+}
+
+size_t lz4_processor::uncompress(const char* input, size_t input_len,
+                char* output, size_t output_len) const {
+    // We use LZ4_decompress_safe(). According to the documentation, the
+    // function LZ4_decompress_fast() is slightly faster, but maliciously
+    // crafted compressed data can cause it to overflow the output buffer.
+    // Theoretically, our compressed data is created by us so is not malicious
+    // (and accidental corruption is avoided by the compressed-data checksum),
+    // but let's not take that chance for now, until we've actually measured
+    // the performance benefit that LZ4_decompress_fast() would bring.
+
+    // Cassandra's LZ4Compressor prepends to the chunk its uncompressed length
+    // in 4 bytes little-endian (!) order. We don't need this information -
+    // we already know the uncompressed data is at most the given chunk size
+    // (and usually is exactly that, except in the last chunk). The advance
+    // knowledge of the uncompressed size could be useful if we used
+    // LZ4_decompress_fast(), but we prefer LZ4_decompress_safe() anyway...
+    input += 4;
+    input_len -= 4;
+
+    auto ret = LZ4_decompress_safe(input, output, input_len, output_len);
+    if (ret < 0) {
+        throw std::runtime_error("LZ4 uncompression failure");
+    }
+    return ret;
+}
+
+size_t lz4_processor::compress(const char* input, size_t input_len,
+                char* output, size_t output_len) const {
+    if (output_len < LZ4_COMPRESSBOUND(input_len) + 4) {
+        throw std::runtime_error("LZ4 compression failure: length of output is too small");
+    }
+    // Write input_len (32-bit data) to beginning of output in little-endian representation.
+    output[0] = input_len & 0xFF;
+    output[1] = (input_len >> 8) & 0xFF;
+    output[2] = (input_len >> 16) & 0xFF;
+    output[3] = (input_len >> 24) & 0xFF;
+#ifdef HAVE_LZ4_COMPRESS_DEFAULT
+    auto ret = LZ4_compress_default(input, output + 4, input_len, LZ4_compressBound(input_len));
+#else
+    auto ret = LZ4_compress(input, output + 4, input_len);
+#endif
+    if (ret == 0) {
+        throw std::runtime_error("LZ4 compression failure: LZ4_compress() failed");
+    }
+    return ret + 4;
+}
+
+size_t lz4_processor::compress_max_size(size_t input_len) const {
+    return LZ4_COMPRESSBOUND(input_len) + 4;
+}
+
+size_t deflate_processor::uncompress(const char* input,
+                size_t input_len, char* output, size_t output_len) const {
+    z_stream zs;
+    zs.zalloc = Z_NULL;
+    zs.zfree = Z_NULL;
+    zs.opaque = Z_NULL;
+    zs.avail_in = 0;
+    zs.next_in = Z_NULL;
+    if (inflateInit(&zs) != Z_OK) {
+        throw std::runtime_error("deflate uncompression init failure");
+    }
+    // yuck, zlib is not const-correct, and also uses unsigned char while we use char :-(
+    zs.next_in = reinterpret_cast<unsigned char*>(const_cast<char*>(input));
+    zs.avail_in = input_len;
+    zs.next_out = reinterpret_cast<unsigned char*>(output);
+    zs.avail_out = output_len;
+    auto res = inflate(&zs, Z_FINISH);
+    inflateEnd(&zs);
+    if (res == Z_STREAM_END) {
+        return output_len - zs.avail_out;
+    } else {
+        throw std::runtime_error("deflate uncompression failure");
+    }
+}
+
+size_t deflate_processor::compress(const char* input,
+                size_t input_len, char* output, size_t output_len) const {
+    z_stream zs;
+    zs.zalloc = Z_NULL;
+    zs.zfree = Z_NULL;
+    zs.opaque = Z_NULL;
+    zs.avail_in = 0;
+    zs.next_in = Z_NULL;
+    if (deflateInit(&zs, Z_DEFAULT_COMPRESSION) != Z_OK) {
+        throw std::runtime_error("deflate compression init failure");
+    }
+    zs.next_in = reinterpret_cast<unsigned char*>(const_cast<char*>(input));
+    zs.avail_in = input_len;
+    zs.next_out = reinterpret_cast<unsigned char*>(output);
+    zs.avail_out = output_len;
+    auto res = ::deflate(&zs, Z_FINISH);
+    deflateEnd(&zs);
+    if (res == Z_STREAM_END) {
+        return output_len - zs.avail_out;
+    } else {
+        throw std::runtime_error("deflate compression failure");
+    }
+}
+
+size_t deflate_processor::compress_max_size(size_t input_len) const {
+    z_stream zs;
+    zs.zalloc = Z_NULL;
+    zs.zfree = Z_NULL;
+    zs.opaque = Z_NULL;
+    zs.avail_in = 0;
+    zs.next_in = Z_NULL;
+    if (deflateInit(&zs, Z_DEFAULT_COMPRESSION) != Z_OK) {
+        throw std::runtime_error("deflate compression init failure");
+    }
+    auto res = deflateBound(&zs, input_len);
+    deflateEnd(&zs);
+    return res;
+}
+
+size_t snappy_processor::uncompress(const char* input, size_t input_len,
+                char* output, size_t output_len) const {
+    if (snappy_uncompress(input, input_len, output, &output_len)
+            == SNAPPY_OK) {
+        return output_len;
+    } else {
+        throw std::runtime_error("snappy uncompression failure");
+    }
+}
+
+size_t snappy_processor::compress(const char* input, size_t input_len,
+                char* output, size_t output_len) const {
+    auto ret = snappy_compress(input, input_len, output, &output_len);
+    if (ret != SNAPPY_OK) {
+        throw std::runtime_error("snappy compression failure: snappy_compress() failed");
+    }
+    return output_len;
+}
+
+size_t snappy_processor::compress_max_size(size_t input_len) const {
+    return snappy_max_compressed_length(input_len);
+}
+
--- a/compress.hh
+++ b/compress.hh
@@ -21,135 +21,103 @@

 #pragma once

-#include "exceptions/exceptions.hh"
+#include <map>
+#include <set>

-enum class compressor {
-    none,
-    lz4,
-    snappy,
-    deflate,
+#include <seastar/core/future.hh>
+#include <seastar/core/shared_ptr.hh>
+#include <seastar/core/sstring.hh>
+
+#include "exceptions/exceptions.hh"
+#include "stdx.hh"
+
+
+class compressor {
+    sstring _name;
+public:
+    compressor(sstring);
+
+    virtual ~compressor() {}
+
+    /**
+     * Unpacks data in "input" to output. If output_len is of insufficient size,
+     * exception is thrown. I.e. you should keep track of the uncompressed size.
+     */
+    virtual size_t uncompress(const char* input, size_t input_len, char* output,
+                    size_t output_len) const = 0;
+    /**
+     * Packs data in "input" to output. If output_len is of insufficient size,
+     * exception is thrown. Maximum required size is obtained via "compress_max_size"
+     */
+    virtual size_t compress(const char* input, size_t input_len, char* output,
+                    size_t output_len) const = 0;
+    /**
+     * Returns the maximum output size for compressing data on "input_len" size.
+     */
+    virtual size_t compress_max_size(size_t input_len) const = 0;
+
+    /**
+     * Returns accepted option names for this compressor
+     */
+    virtual std::set<sstring> option_names() const;
+    /**
+     * Returns original options used in instantiating this compressor
+     */
+    virtual std::map<sstring, sstring> options() const;
+
+    /**
+     * Compressor class name.
+     */
+    const sstring& name() const {
+        return _name;
+    }
+
+    // to cheaply bridge sstable compression options / maps
+    using opt_string = stdx::optional<sstring>;
+    using opt_getter = std::function<opt_string(const sstring&)>;
+
+    static shared_ptr<compressor> create(const sstring& name, const opt_getter&);
+    static shared_ptr<compressor> create(const std::map<sstring, sstring>&);
+
+    static thread_local const shared_ptr<compressor> lz4;
+    static thread_local const shared_ptr<compressor> snappy;
+    static thread_local const shared_ptr<compressor> deflate;
+
+    static const sstring namespace_prefix;
 };

+template<typename BaseType, typename... Args>
+class class_registry;
+
+using compressor_ptr = shared_ptr<compressor>;
+using compressor_registry = class_registry<compressor_ptr, const typename compressor::opt_getter&>;
+
 class compression_parameters {
 public:
    static constexpr int32_t DEFAULT_CHUNK_LENGTH = 4 * 1024;
    static constexpr double DEFAULT_CRC_CHECK_CHANCE = 1.0;

-    static constexpr auto SSTABLE_COMPRESSION = "sstable_compression";
-    static constexpr auto CHUNK_LENGTH_KB = "chunk_length_kb";
-    static constexpr auto CRC_CHECK_CHANCE = "crc_check_chance";
+    static const sstring SSTABLE_COMPRESSION;
+    static const sstring CHUNK_LENGTH_KB;
+    static const sstring CRC_CHECK_CHANCE;
 private:
-    compressor _compressor;
+    compressor_ptr _compressor;
    std::experimental::optional<int> _chunk_length;
    std::experimental::optional<double> _crc_check_chance;
 public:
-    compression_parameters(compressor c = compressor::lz4) : _compressor(c) { }
-    compression_parameters(const std::map<sstring, sstring>& options) {
-        validate_options(options);
+    compression_parameters();
+    compression_parameters(compressor_ptr);
+    compression_parameters(const std::map<sstring, sstring>& options);
+    ~compression_parameters();

-        auto it = options.find(SSTABLE_COMPRESSION);
-        if (it == options.end() || it->second.empty()) {
-            _compressor = compressor::none;
-            return;
-        }
-        const auto& compressor_class = it->second;
-        if (is_compressor_class(compressor_class, "LZ4Compressor")) {
-            _compressor = compressor::lz4;
-        } else if (is_compressor_class(compressor_class, "SnappyCompressor")) {
-            _compressor = compressor::snappy;
-        } else if (is_compressor_class(compressor_class, "DeflateCompressor")) {
-            _compressor = compressor::deflate;
-        } else {
-            throw exceptions::configuration_exception(sstring("Unsupported compression class '") + compressor_class + "'.");
-        }
-        auto chunk_length = options.find(CHUNK_LENGTH_KB);
-        if (chunk_length != options.end()) {
-            try {
-                _chunk_length = std::stoi(chunk_length->second) * 1024;
-            } catch (const std::exception& e) {
-                throw exceptions::syntax_exception(sstring("Invalid integer value ") + chunk_length->second + " for " + CHUNK_LENGTH_KB);
-            }
-        }
-        auto crc_chance = options.find(CRC_CHECK_CHANCE);
-        if (crc_chance != options.end()) {
-            try {
-                _crc_check_chance = std::stod(crc_chance->second);
-            } catch (const std::exception& e) {
-                throw exceptions::syntax_exception(sstring("Invalid double value ") + crc_chance->second + "for " + CRC_CHECK_CHANCE);
-            }
-        }
-    }
-
-    compressor get_compressor() const { return _compressor; }
+    compressor_ptr get_compressor() const { return _compressor; }
    int32_t chunk_length() const { return _chunk_length.value_or(int(DEFAULT_CHUNK_LENGTH)); }
    double crc_check_chance() const { return _crc_check_chance.value_or(double(DEFAULT_CRC_CHECK_CHANCE)); }

-    void validate() {
-        if (_chunk_length) {
-            auto chunk_length = _chunk_length.value();
-            if (chunk_length <= 0) {
-                throw exceptions::configuration_exception(sstring("Invalid negative or null ") + CHUNK_LENGTH_KB);
-            }
-            // _chunk_length must be a power of two
-            if (chunk_length & (chunk_length - 1)) {
-                throw exceptions::configuration_exception(sstring(CHUNK_LENGTH_KB) + " must be a power of 2.");
-            }
-        }
-        if (_crc_check_chance && (_crc_check_chance.value() < 0.0 || _crc_check_chance.value() > 1.0)) {
-            throw exceptions::configuration_exception(sstring(CRC_CHECK_CHANCE) + " must be between 0.0 and 1.0.");
-        }
-    }
-
-    std::map<sstring, sstring> get_options() const {
-        if (_compressor == compressor::none) {
-            return std::map<sstring, sstring>();
-        }
-        std::map<sstring, sstring> opts;
-        opts.emplace(sstring(SSTABLE_COMPRESSION), compressor_name());
-        if (_chunk_length) {
-            opts.emplace(sstring(CHUNK_LENGTH_KB), std::to_string(_chunk_length.value() / 1024));
-        }
-        if (_crc_check_chance) {
-            opts.emplace(sstring(CRC_CHECK_CHANCE), std::to_string(_crc_check_chance.value()));
-        }
-        return opts;
-    }
-    bool operator==(const compression_parameters& other) const {
-        return _compressor == other._compressor
-               && _chunk_length == other._chunk_length
-               && _crc_check_chance == other._crc_check_chance;
-    }
-    bool operator!=(const compression_parameters& other) const {
-        return !(*this == other);
-    }
+    void validate();
+    std::map<sstring, sstring> get_options() const;
+    bool operator==(const compression_parameters& other) const;
+    bool operator!=(const compression_parameters& other) const;
 private:
-    void validate_options(const std::map<sstring, sstring>& options) {
-        // currently, there are no options specific to a particular compressor
-        static std::set<sstring> keywords({
-            sstring(SSTABLE_COMPRESSION),
-            sstring(CHUNK_LENGTH_KB),
-            sstring(CRC_CHECK_CHANCE),
-        });
-        for (auto&& opt : options) {
-            if (!keywords.count(opt.first)) {
-                throw exceptions::configuration_exception(sprint("Unknown compression option '%s'.", opt.first));
-            }
-        }
-    }
-    bool is_compressor_class(const sstring& value, const sstring& class_name) {
-        static const sstring namespace_prefix = "org.apache.cassandra.io.compress.";
-        return value == class_name || value == namespace_prefix + class_name;
-    }
-    sstring compressor_name() const {
-        switch (_compressor) {
-        case compressor::lz4:
-             return "org.apache.cassandra.io.compress.LZ4Compressor";
-        case compressor::snappy:
-            return "org.apache.cassandra.io.compress.SnappyCompressor";
-        case compressor::deflate:
-            return "org.apache.cassandra.io.compress.DeflateCompressor";
-        default:
-            abort();
-        }
-    }
+    void validate_options(const std::map<sstring, sstring>&);
 };
--- a/conf/scylla.yaml
+++ b/conf/scylla.yaml
@@ -12,7 +12,9 @@

 # The name of the cluster. This is mainly used to prevent machines in
 # one logical cluster from joining another.
-cluster_name: 'Test Cluster'
+# It is recommended to change the default value when creating a new cluster.
+# You can NOT modify this value for an existing cluster
+#cluster_name: 'Test Cluster'

 # This defines the number of tokens randomly assigned to this node on the ring
 # The more tokens, relative to other nodes, the larger the proportion of data
@@ -85,6 +87,13 @@ listen_address: localhost
 # Leaving this blank will set it to the same value as listen_address
 # broadcast_address: 1.2.3.4

+
+# When using multiple physical network interfaces, set this to true to listen on broadcast_address
+# in addition to the listen_address, allowing nodes to communicate in both interfaces.
+# Ignore this property if the network configuration automatically routes between the public and private networks such as EC2.
+#
+# listen_on_broadcast_address: false
+
 # port for the CQL native transport to listen for clients on
 # For security reasons, you should not expose this port to the internet.  Firewall it if needed.
 native_transport_port: 9042
@@ -98,13 +107,6 @@ native_transport_port: 9042
 # keeping native_transport_port unencrypted.
 #native_transport_port_ssl: 9142

-# Throttles all outbound streaming file transfers on this node to the
-# given total throughput in Mbps. This is necessary because Scylla does
-# mostly sequential IO when streaming data during bootstrap or repair, which
-# can lead to saturating the network connection and degrading rpc performance.
-# When unset, the default is 200 Mbps or 25 MB/s.
-# stream_throughput_outbound_megabits_per_sec: 200
-
 # How long the coordinator should wait for read operations to complete
 read_request_timeout_in_ms: 5000

@@ -238,9 +240,8 @@ batch_size_fail_threshold_in_kb: 50
 # Uncomment to enable experimental features
 # experimental: true

-###################################################
-## Not currently supported, reserved for future use
-###################################################
+# The directory where hints files are stored if hinted handoff is enabled.
+# hints_directory: /var/lib/scylla/hints

 # See http://wiki.apache.org/cassandra/HintedHandoff
 # May either be "true" or "false" to enable globally, or contain a list
@@ -264,23 +265,27 @@ batch_size_fail_threshold_in_kb: 50
 # cross-dc handoff tends to be slower
 # max_hints_delivery_threads: 2

+###################################################
+## Not currently supported, reserved for future use
+###################################################
+
 # Maximum throttle in KBs per second, total. This will be
 # reduced proportionally to the number of nodes in the cluster.
 # batchlog_replay_throttle_in_kb: 1024

 # Validity period for permissions cache (fetching permissions can be an
 # expensive operation depending on the authorizer, CassandraAuthorizer is
-# one example). Defaults to 2000, set to 0 to disable.
+# one example). Defaults to 10000, set to 0 to disable.
 # Will be disabled automatically for AllowAllAuthorizer.
-# permissions_validity_in_ms: 2000
+# permissions_validity_in_ms: 10000

 # Refresh interval for permissions cache (if enabled).
 # After this interval, cache entries become eligible for refresh. Upon next
 # access, an async reload is scheduled and the old value returned until it
-# completes. If permissions_validity_in_ms is non-zero, then this must be
-# also.
-# Defaults to the same value as permissions_validity_in_ms.
-# permissions_update_interval_in_ms: 1000
+# completes. If permissions_validity_in_ms is non-zero, then this also must have
+# a non-zero value. Defaults to 2000. It's recommended to set this value to
+# be at least 3 times smaller than the permissions_validity_in_ms.
+# permissions_update_interval_in_ms: 2000

 # The partitioner is responsible for distributing groups of rows (by
 # partition key) across nodes in the cluster.  You should leave this
--- a/configure.py
+++ b/configure.py
@@ -20,9 +20,11 @@
 # along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
 #

-import os, os.path, textwrap, argparse, sys, shlex, subprocess, tempfile, re
+import os, os.path, textwrap, argparse, sys, shlex, subprocess, tempfile, re, platform
 from distutils.spawn import find_executable

+tempfile.tempdir = "./build/tmp"
+
 configure_args = str.join(' ', [shlex.quote(x) for x in sys.argv[1:]])

 for line in open('/etc/os-release'):
@@ -34,7 +36,7 @@ for line in open('/etc/os-release'):
        os_ids += value.split(' ')

 # distribution "internationalization", converting package names.
-# Fedora name is key, values is distro -> package name dict.
+# Fedora name is key, values is distro -> package name dict. 
 i18n_xlat = {
    'boost-devel': {
        'debian': 'libboost-dev',
@@ -48,7 +50,7 @@ def pkgname(name):
        for id in os_ids:
            if id in dict:
                return dict[id]
-    return name
+    return name 

 def get_flags():
    with open('/proc/cpuinfo') as f:
@@ -83,17 +85,33 @@ def pkg_config(option, package):
    return output.decode('utf-8').strip()

 def try_compile(compiler, source = '', flags = []):
-    with tempfile.NamedTemporaryFile() as sfile:
-        sfile.file.write(bytes(source, 'utf-8'))
-        sfile.file.flush()
-        return subprocess.call([compiler, '-x', 'c++', '-o', '/dev/null', '-c', sfile.name] + flags,
-                               stdout = subprocess.DEVNULL,
-                               stderr = subprocess.DEVNULL) == 0
+    return try_compile_and_link(compiler, source, flags = flags + ['-c'])

-def warning_supported(warning, compiler):
+def ensure_tmp_dir_exists():
+    if not os.path.exists(tempfile.tempdir):
+        os.makedirs(tempfile.tempdir)
+
+def try_compile_and_link(compiler, source = '', flags = []):
+    ensure_tmp_dir_exists()
+    with tempfile.NamedTemporaryFile() as sfile:
+        ofile = tempfile.mktemp()
+        try:
+            sfile.file.write(bytes(source, 'utf-8'))
+            sfile.file.flush()
+            # We can't write to /dev/null, since in some cases (-ftest-coverage) gcc will create an auxiliary
+            # output file based on the name of the output file, and "/dev/null.gcsa" is not a good name
+            return subprocess.call([compiler, '-x', 'c++', '-o', ofile, sfile.name] + args.user_cflags.split() + flags,
+                                   stdout = subprocess.DEVNULL,
+                                   stderr = subprocess.DEVNULL) == 0
+        finally:
+            if os.path.exists(ofile):
+                os.unlink(ofile)
+
+def flag_supported(flag, compiler):
    # gcc ignores -Wno-x even if it is not supported
-    adjusted = re.sub('^-Wno-', '-W', warning)
-    return try_compile(flags = ['-Werror', adjusted], compiler = compiler)
+    adjusted = re.sub('^-Wno-', '-W', flag)
+    split = adjusted.split(' ')
+    return try_compile(flags = ['-Werror'] + split, compiler = compiler)

 def debug_flag(compiler):
    src_with_auto = textwrap.dedent('''\
@@ -108,6 +126,14 @@ def debug_flag(compiler):
        print('Note: debug information disabled; upgrade your compiler')
        return ''

+def gold_supported(compiler):
+    src_main = 'int main(int argc, char **argv) { return 0; }'
+    if try_compile_and_link(source = src_main, flags = ['-fuse-ld=gold'], compiler = compiler):
+        return '-fuse-ld=gold'
+    else:
+        print('Note: gold not found; using default system linker')
+        return ''
+
 def maybe_static(flag, libs):
    if flag and not args.static:
        libs = '-Wl,-Bstatic {} -Wl,-Bdynamic'.format(libs)
@@ -133,6 +159,13 @@ class Thrift(object):
    def endswith(self, end):
        return self.source.endswith(end)

+def default_target_arch():
+    mach = platform.machine()
+    if platform.machine() in ['i386', 'i686', 'x86_64']:
+        return 'nehalem'
+    else:
+        return ''
+
 class Antlr3Grammar(object):
    def __init__(self, source):
        self.source = source
@@ -154,20 +187,22 @@ modes = {
    'debug': {
        'sanitize': '-fsanitize=address -fsanitize=leak -fsanitize=undefined',
        'sanitize_libs': '-lasan -lubsan',
-        'opt': '-O0 -DDEBUG -DDEBUG_SHARED_PTR -DDEFAULT_ALLOCATOR',
+        'opt': '-O0 -DDEBUG -DDEBUG_SHARED_PTR -DDEFAULT_ALLOCATOR -DDEBUG_LSA_SANITIZER',
        'libs': '',
    },
    'release': {
        'sanitize': '',
        'sanitize_libs': '',
-        'opt': '-O2',
+        'opt': '-O3',
        'libs': '',
    },
 }

 scylla_tests = [
    'tests/mutation_test',
-    'tests/streamed_mutation_test',
+    'tests/mvcc_test',
+    'tests/mutation_fragment_test',
+    'tests/flat_mutation_reader_test',
    'tests/schema_registry_test',
    'tests/canonical_mutation_test',
    'tests/range_test',
@@ -176,6 +211,7 @@ scylla_tests = [
    'tests/partitioner_test',
    'tests/frozen_mutation_test',
    'tests/serialized_action_test',
+    'tests/hint_test',
    'tests/clustering_ranges_walker_test',
    'tests/perf/perf_mutation',
    'tests/lsa_async_eviction_test',
@@ -186,7 +222,8 @@ scylla_tests = [
    'tests/perf/perf_cql_parser',
    'tests/perf/perf_simple_query',
    'tests/perf/perf_fast_forward',
-    'tests/cache_streamed_mutation_test',
+    'tests/perf/perf_cache_eviction',
+    'tests/cache_flat_mutation_reader_test',
    'tests/row_cache_stress_test',
    'tests/memory_footprint',
    'tests/perf/perf_sstable',
@@ -212,6 +249,7 @@ scylla_tests = [
    'tests/config_test',
    'tests/gossiping_property_file_snitch_test',
    'tests/ec2_snitch_test',
+    'tests/gce_snitch_test',
    'tests/snitch_reset_test',
    'tests/network_topology_strategy_test',
    'tests/query_processor_test',
@@ -221,7 +259,7 @@ scylla_tests = [
    'tests/murmur_hash_test',
    'tests/allocation_strategy_test',
    'tests/logalloc_test',
-    'tests/log_histogram_test',
+    'tests/log_heap_test',
    'tests/managed_vector_test',
    'tests/crc_test',
    'tests/flush_queue_test',
@@ -233,19 +271,40 @@ scylla_tests = [
    'tests/database_test',
    'tests/nonwrapping_range_test',
    'tests/input_stream_test',
-    'tests/sstable_atomic_deletion_test',
    'tests/virtual_reader_test',
    'tests/view_schema_test',
    'tests/counter_test',
    'tests/cell_locker_test',
+    'tests/row_locker_test',
+    'tests/streaming_histogram_test',
+    'tests/duration_test',
+    'tests/vint_serialization_test',
+    'tests/compress_test',
+    'tests/chunked_vector_test',
    'tests/loading_cache_test',
+    'tests/castas_fcts_test',
+    'tests/big_decimal_test',
+    'tests/aggregate_fcts_test',
+    'tests/role_manager_test',
+    'tests/caching_options_test',
+    'tests/auth_resource_test',
+    'tests/cql_auth_query_test',
+    'tests/enum_set_test',
+    'tests/extensions_test',
+    'tests/cql_auth_syntax_test',
+    'tests/querier_cache',
+    'tests/querier_cache_resource_based_eviction',
+]
+
+perf_tests = [
+    'tests/perf/perf_mutation_readers'
 ]

 apps = [
    'scylla',
    ]

-tests = scylla_tests
+tests = scylla_tests + perf_tests

 other = [
    'iotune',
@@ -267,6 +326,8 @@ arg_parser.add_argument('--cflags', action = 'store', dest = 'user_cflags', defa
                        help = 'Extra flags for the C++ compiler')
 arg_parser.add_argument('--ldflags', action = 'store', dest = 'user_ldflags', default = '',
                        help = 'Extra flags for the linker')
+arg_parser.add_argument('--target', action = 'store', dest = 'target', default = default_target_arch(),
+                        help = 'Target architecture (-march)')
 arg_parser.add_argument('--compiler', action = 'store', dest = 'cxx', default = 'g++',
                        help = 'C++ compiler path')
 arg_parser.add_argument('--c-compiler', action='store', dest='cc', default='gcc',
@@ -285,6 +346,8 @@ arg_parser.add_argument('--static-thrift', dest = 'staticthrift', action = 'stor
            help = 'Link libthrift statically')
 arg_parser.add_argument('--static-boost', dest = 'staticboost', action = 'store_true',
            help = 'Link boost statically')
+arg_parser.add_argument('--static-yaml-cpp', dest = 'staticyamlcpp', action = 'store_true',
+            help = 'Link libyaml-cpp statically')
 arg_parser.add_argument('--tests-debuginfo', action = 'store', dest = 'tests_debuginfo', type = int, default = 0,
                        help = 'Enable(1)/disable(0)compiler debug information generation for tests')
 arg_parser.add_argument('--python', action = 'store', dest = 'python', default = 'python3',
@@ -309,7 +372,7 @@ scylla_core = (['database.cc',
                 'schema_registry.cc',
                 'bytes.cc',
                 'mutation.cc',
-                 'streamed_mutation.cc',
+                 'mutation_fragment.cc',
                 'partition_version.cc',
                 'row_cache.cc',
                 'canonical_mutation.cc',
@@ -320,22 +383,25 @@ scylla_core = (['database.cc',
                 'supervisor.cc',
                 'utils/logalloc.cc',
                 'utils/large_bitset.cc',
+                 'utils/buffer_input_stream.cc',
                 'mutation_partition.cc',
                 'mutation_partition_view.cc',
                 'mutation_partition_serializer.cc',
                 'mutation_reader.cc',
+                 'flat_mutation_reader.cc',
                 'mutation_query.cc',
                 'keys.cc',
-                 'counters.cc',
+                 'counters.cc',                 
+                 'compress.cc',
                 'sstables/sstables.cc',
                 'sstables/compress.cc',
                 'sstables/row.cc',
                 'sstables/partition.cc',
-                 'sstables/filter.cc',
                 'sstables/compaction.cc',
                 'sstables/compaction_strategy.cc',
                 'sstables/compaction_manager.cc',
-                 'sstables/atomic_deletion.cc',
+                 'sstables/integrity_checked_file_impl.cc',
+                 'sstables/prepended_input_stream.cc',
                 'transport/event.cc',
                 'transport/event_notifier.cc',
                 'transport/server.cc',
@@ -350,6 +416,7 @@ scylla_core = (['database.cc',
                 'cql3/sets.cc',
                 'cql3/maps.cc',
                 'cql3/functions/functions.cc',
+                 'cql3/functions/castas_fcts.cc',
                 'cql3/statements/cf_prop_defs.cc',
                 'cql3/statements/cf_statement.cc',
                 'cql3/statements/authentication_statement.cc',
@@ -357,7 +424,6 @@ scylla_core = (['database.cc',
                 'cql3/statements/create_table_statement.cc',
                 'cql3/statements/create_view_statement.cc',
                 'cql3/statements/create_type_statement.cc',
-                 'cql3/statements/create_user_statement.cc',
                 'cql3/statements/drop_index_statement.cc',
                 'cql3/statements/drop_keyspace_statement.cc',
                 'cql3/statements/drop_table_statement.cc',
@@ -379,8 +445,6 @@ scylla_core = (['database.cc',
                 'cql3/statements/truncate_statement.cc',
                 'cql3/statements/alter_table_statement.cc',
                 'cql3/statements/alter_view_statement.cc',
-                 'cql3/statements/alter_user_statement.cc',
-                 'cql3/statements/drop_user_statement.cc',
                 'cql3/statements/list_users_statement.cc',
                 'cql3/statements/authorization_statement.cc',
                 'cql3/statements/permission_altering_statement.cc',
@@ -389,9 +453,10 @@ scylla_core = (['database.cc',
                 'cql3/statements/revoke_statement.cc',
                 'cql3/statements/alter_type_statement.cc',
                 'cql3/statements/alter_keyspace_statement.cc',
+                 'cql3/statements/role-management-statements.cc',
                 'cql3/update_parameters.cc',
                 'cql3/ut_name.cc',
-                 'cql3/user_options.cc',
+                 'cql3/role_name.cc',
                 'thrift/handler.cc',
                 'thrift/server.cc',
                 'thrift/thrift_validation.cc',
@@ -433,15 +498,16 @@ scylla_core = (['database.cc',
                 'db/commitlog/commitlog.cc',
                 'db/commitlog/commitlog_replayer.cc',
                 'db/commitlog/commitlog_entry.cc',
+                 'db/hints/manager.cc',
                 'db/config.cc',
+                 'db/extensions.cc',
                 'db/heat_load_balance.cc',
                 'db/index/secondary_index.cc',
                 'db/marshal/type_parser.cc',
                 'db/batchlog_manager.cc',
                 'db/view/view.cc',
+                 'db/view/row_locking.cc',
                 'index/secondary_index_manager.cc',
-                 'io/io.cc',
-                 'utils/utils.cc',
                 'utils/UUID_gen.cc',
                 'utils/i_filter.cc',
                 'utils/bloom_filter.cc',
@@ -451,6 +517,7 @@ scylla_core = (['database.cc',
                 'utils/dynamic_bitset.cc',
                 'utils/managed_bytes.cc',
                 'utils/exceptions.cc',
+                 'utils/config_file.cc',
                 'gms/version_generator.cc',
                 'gms/versioned_value.cc',
                 'gms/gossiper.cc',
@@ -476,7 +543,6 @@ scylla_core = (['database.cc',
                 'locator/network_topology_strategy.cc',
                 'locator/everywhere_replication_strategy.cc',
                 'locator/token_metadata.cc',
-                 'locator/locator.cc',
                 'locator/snitch_base.cc',
                 'locator/simple_snitch.cc',
                 'locator/rack_inferring_snitch.cc',
@@ -484,6 +550,7 @@ scylla_core = (['database.cc',
                 'locator/production_snitch_base.cc',
                 'locator/ec2_snitch.cc',
                 'locator/ec2_multi_region_snitch.cc',
+                 'locator/gce_snitch.cc',
                 'message/messaging_service.cc',
                 'service/client_state.cc',
                 'service/migration_task.cc',
@@ -510,20 +577,33 @@ scylla_core = (['database.cc',
                 'lister.cc',
                 'repair/repair.cc',
                 'exceptions/exceptions.cc',
-                 'auth/auth.cc',
+                 'auth/allow_all_authenticator.cc',
+                 'auth/allow_all_authorizer.cc',
                 'auth/authenticated_user.cc',
                 'auth/authenticator.cc',
-                 'auth/authorizer.cc',
+                 'auth/common.cc',
                 'auth/default_authorizer.cc',
-                 'auth/data_resource.cc',
+                 'auth/resource.cc',
+                 'auth/roles-metadata.cc',
                 'auth/password_authenticator.cc',
                 'auth/permission.cc',
+                 'auth/permissions_cache.cc',
+                 'auth/service.cc',
+                 'auth/standard_role_manager.cc',
+                 'auth/transitional.cc',
+                 'auth/authentication_options.cc',
+                 'auth/role_or_anonymous.cc',
                 'tracing/tracing.cc',
                 'tracing/trace_keyspace_helper.cc',
                 'tracing/trace_state.cc',
+                 'table_helper.cc',
                 'range_tombstone.cc',
                 'range_tombstone_list.cc',
-                 'disk-error-handler.cc'
+                 'disk-error-handler.cc',
+                 'duration.cc',
+                 'vint-serialization.cc',
+                 'utils/arch/powerpc/crc32-vpmsum/crc32_wrapper.cc',
+                 'querier.cc',
                 ]
                + [Antlr3Grammar('cql3/Cql.g')]
                + [Thrift('interface/cassandra.thrift', 'Cassandra')]
@@ -619,6 +699,16 @@ pure_boost_tests = set([
    'tests/dynamic_bitset_test',
    'tests/idl_test',
    'tests/cartesian_product_test',
+    'tests/streaming_histogram_test',
+    'tests/duration_test',
+    'tests/vint_serialization_test',
+    'tests/compress_test',
+    'tests/chunked_vector_test',
+    'tests/big_decimal_test',
+    'tests/caching_options_test',
+    'tests/auth_resource_test',
+    'tests/enum_set_test',
+    'tests/cql_auth_syntax_test',
 ])

 tests_not_using_seastar_test_framework = set([
@@ -632,10 +722,12 @@ tests_not_using_seastar_test_framework = set([
    'tests/message',
    'tests/perf/perf_simple_query',
    'tests/perf/perf_fast_forward',
+    'tests/perf/perf_cache_eviction',
    'tests/row_cache_stress_test',
    'tests/memory_footprint',
    'tests/gossip',
    'tests/perf/perf_sstable',
+    'tests/querier_cache_resource_based_eviction',
 ]) | pure_boost_tests

 for t in tests_not_using_seastar_test_framework:
@@ -645,19 +737,27 @@ for t in tests_not_using_seastar_test_framework:
 for t in scylla_tests:
    deps[t] = [t + '.cc']
    if t not in tests_not_using_seastar_test_framework:
-        deps[t] += scylla_tests_dependencies
+        deps[t] += scylla_tests_dependencies 
        deps[t] += scylla_tests_seastar_deps
    else:
        deps[t] += scylla_core + api + idls + ['tests/cql_test_env.cc']

-deps['tests/sstable_test'] += ['tests/sstable_datafile_test.cc']
+perf_tests_seastar_deps = [
+    'seastar/tests/perf/perf_tests.cc'
+]
+
+for t in perf_tests:
+    deps[t] = [t + '.cc'] + scylla_tests_dependencies + perf_tests_seastar_deps
+
+deps['tests/sstable_test'] += ['tests/sstable_datafile_test.cc', 'tests/sstable_utils.cc']
+deps['tests/mutation_reader_test'] += ['tests/sstable_utils.cc']

 deps['tests/bytes_ostream_test'] = ['tests/bytes_ostream_test.cc', 'utils/managed_bytes.cc', 'utils/logalloc.cc', 'utils/dynamic_bitset.cc']
 deps['tests/input_stream_test'] = ['tests/input_stream_test.cc']
 deps['tests/UUID_test'] = ['utils/UUID_gen.cc', 'tests/UUID_test.cc', 'utils/uuid.cc', 'utils/managed_bytes.cc', 'utils/logalloc.cc', 'utils/dynamic_bitset.cc']
 deps['tests/murmur_hash_test'] = ['bytes.cc', 'utils/murmur_hash.cc', 'tests/murmur_hash_test.cc']
 deps['tests/allocation_strategy_test'] = ['tests/allocation_strategy_test.cc', 'utils/logalloc.cc', 'utils/dynamic_bitset.cc']
-deps['tests/log_histogram_test'] = ['tests/log_histogram_test.cc']
+deps['tests/log_heap_test'] = ['tests/log_heap_test.cc']
 deps['tests/anchorless_list_test'] = ['tests/anchorless_list_test.cc']

 warnings = [
@@ -671,14 +771,28 @@ warnings = [
    '-Wno-return-stack-address',
    '-Wno-missing-braces',
    '-Wno-unused-lambda-capture',
+    '-Wno-misleading-indentation',
+    '-Wno-overflow',
+    '-Wno-noexcept-type',
+    '-Wno-nonnull-compare'
    ]

 warnings = [w
            for w in warnings
-            if warning_supported(warning = w, compiler = args.cxx)]
+            if flag_supported(flag = w, compiler = args.cxx)]

 warnings = ' '.join(warnings + ['-Wno-error=deprecated-declarations'])

+optimization_flags = [
+    '--param inline-unit-growth=300',
+]
+optimization_flags = [o
+                      for o in optimization_flags
+                      if flag_supported(flag = o, compiler = args.cxx)]
+modes['release']['opt'] += ' ' + ' '.join(optimization_flags)
+
+gold_linker_flag = gold_supported(compiler = args.cxx)
+
 dbgflag = debug_flag(args.cxx) if args.debuginfo else ''
 tests_link_rule = 'link' if args.tests_debuginfo else 'link_stripped'

@@ -766,13 +880,20 @@ if args.staticcxx:
    seastar_flags += ['--static-stdc++']
 if args.staticboost:
    seastar_flags += ['--static-boost']
+if args.staticyamlcpp:
+    seastar_flags += ['--static-yaml-cpp']
 if args.gcc6_concepts:
    seastar_flags += ['--enable-gcc6-concepts']
 if args.alloc_failure_injector:
    seastar_flags += ['--enable-alloc-failure-injector']

-seastar_cflags = args.user_cflags + " -march=nehalem"
-seastar_flags += ['--compiler', args.cxx, '--c-compiler', args.cc, '--cflags=%s' % (seastar_cflags)]
+seastar_cflags = args.user_cflags
+if args.target != '':
+    seastar_cflags += ' -march=' + args.target
+seastar_ldflags = args.user_ldflags
+seastar_flags += ['--compiler', args.cxx, '--c-compiler', args.cc, '--cflags=%s' % (seastar_cflags), '--ldflags=%s' %(seastar_ldflags),
+                  '--c++-dialect=gnu++1z', '--optflags=%s' % (modes['release']['opt']),
+                 ]

 status = subprocess.call([python, './configure.py'] + seastar_flags, cwd = 'seastar')

@@ -803,11 +924,16 @@ for mode in build_modes:
 seastar_deps = 'practically_anything_can_change_so_lets_run_it_every_time_and_restat.'

 args.user_cflags += " " + pkg_config("--cflags", "jsoncpp")
-libs = ' '.join(['-lyaml-cpp', '-llz4', '-lz', '-lsnappy', pkg_config("--libs", "jsoncpp"),
-                 maybe_static(args.staticboost, '-lboost_filesystem'), ' -lcrypt',
+libs = ' '.join([maybe_static(args.staticyamlcpp, '-lyaml-cpp'), '-llz4', '-lz', '-lsnappy', pkg_config("--libs", "jsoncpp"),
+                 maybe_static(args.staticboost, '-lboost_filesystem'), ' -lcrypt', ' -lcryptopp',
                 maybe_static(args.staticboost, '-lboost_date_time'),
                ])

+xxhash_dir = 'xxHash'
+
+if not os.path.exists(xxhash_dir) or not os.listdir(xxhash_dir):
+    raise Exception(xxhash_dir + ' is empty. Run "git submodule update --init".')
+
 if not args.staticboost:
    args.user_cflags += ' -DBOOST_TEST_DYN_LINK'

@@ -830,13 +956,14 @@ os.makedirs(outdir, exist_ok = True)
 do_sanitize = True
 if args.static:
    do_sanitize = False
+
 with open(buildfile, 'w') as f:
    f.write(textwrap.dedent('''\
        configure_args = {configure_args}
        builddir = {outdir}
        cxx = {cxx}
        cxxflags = {user_cflags} {warnings} {defines}
-        ldflags = {user_ldflags}
+        ldflags = {gold_linker_flag} {user_ldflags}
        libs = {libs}
        pool link_pool
            depth = {link_pool_depth}
@@ -865,7 +992,7 @@ with open(buildfile, 'w') as f:
    for mode in build_modes:
        modeval = modes[mode]
        f.write(textwrap.dedent('''\
-            cxxflags_{mode} = -I. -I $builddir/{mode}/gen -I seastar -I seastar/build/{mode}/gen
+            cxxflags_{mode} = {opt} -DXXH_PRIVATE_API -I. -I $builddir/{mode}/gen -I seastar -I seastar/build/{mode}/gen
            rule cxx.{mode}
              command = $cxx -MD -MT $out -MF $out.d {seastar_cflags} $cxxflags $cxxflags_{mode} $obj_cxxflags -c -o $out $in
              description = CXX $out
@@ -893,7 +1020,8 @@ with open(buildfile, 'w') as f:
                     && sed -i -e 's/^\\( *\)\\(ImplTraits::CommonTokenType\\* [a-zA-Z0-9_]* = NULL;\\)$$/\\1const \\2/' $
                        -e '1i using ExceptionBaseType = int;' $
                        -e 's/^{{/{{ ExceptionBaseType\* ex = nullptr;/; $
-                            s/ExceptionBaseType\* ex = new/ex = new/' $
+                            s/ExceptionBaseType\* ex = new/ex = new/; $
+                            s/exceptions::syntax_exception e/exceptions::syntax_exception\& e/' $
                        build/{mode}/gen/${{stem}}Parser.cpp
                description = ANTLR3 $in
            ''').format(mode = mode, **modeval))
@@ -912,6 +1040,7 @@ with open(buildfile, 'w') as f:
            objs = ['$builddir/' + mode + '/' + src.replace('.cc', '.o')
                    for src in srcs
                    if src.endswith('.cc')]
+            objs.append('$builddir/../utils/arch/powerpc/crc32-vpmsum/crc32.S')
            has_thrift = False
            for dep in deps[binary]:
                if isinstance(dep, Thrift):
@@ -919,25 +1048,13 @@ with open(buildfile, 'w') as f:
                    objs += dep.objects('$builddir/' + mode + '/gen')
                if isinstance(dep, Antlr3Grammar):
                    objs += dep.objects('$builddir/' + mode + '/gen')
-            if binary.endswith('.pc'):
-                vars = modeval.copy()
-                vars.update(globals())
-                pc = textwrap.dedent('''\
-                        Name: Seastar
-                        URL: http://seastar-project.org/
-                        Description: Advanced C++ framework for high-performance server applications on modern hardware.
-                        Version: 1.0
-                        Libs: -L{srcdir}/{builddir} -Wl,--whole-archive -lseastar -Wl,--no-whole-archive {dbgflag} -Wl,--no-as-needed {static} {pie} -fvisibility=hidden -pthread {user_ldflags} {libs} {sanitize_libs}
-                        Cflags: -std=gnu++1y {dbgflag} {fpie} -Wall -Werror -fvisibility=hidden -pthread -I{srcdir} -I{srcdir}/{builddir}/gen {user_cflags} {warnings} {defines} {sanitize} {opt}
-                        ''').format(builddir = 'build/' + mode, srcdir = os.getcwd(), **vars)
-                f.write('build $builddir/{}/{}: gen\n  text = {}\n'.format(mode, binary, repr(pc)))
-            elif binary.endswith('.a'):
+            if binary.endswith('.a'):
                f.write('build $builddir/{}/{}: ar.{} {}\n'.format(mode, binary, mode, str.join(' ', objs)))
            else:
                if binary.startswith('tests/'):
                    local_libs = '$libs'
                    if binary not in tests_not_using_seastar_test_framework or binary in pure_boost_tests:
-                        local_libs += ' ' + maybe_static(args.staticboost, '-lboost_unit_test_framework')
+                        local_libs += ' ' + maybe_static(args.staticboost, '-lboost_unit_test_framework') 
                    if has_thrift:
                        local_libs += ' ' + thrift_libs + ' ' + maybe_static(args.staticboost, '-lboost_system')
                    # Our code's debugging information is huge, and multiplied
@@ -1027,7 +1144,7 @@ with open(buildfile, 'w') as f:
        rule configure
          command = {python} configure.py $configure_args
          generator = 1
-        build build.ninja: configure | configure.py
+        build build.ninja: configure | configure.py seastar/configure.py
        rule cscope
            command = find -name '*.[chS]' -o -name "*.cc" -o -name "*.hh" | cscope -bq -i-
            description = CSCOPE
--- a/cpu_controller.hh
+++ b/cpu_controller.hh
@@ -1,89 +0,0 @@
-/*
- * Copyright (C) 2017 ScyllaDB
- */
-
-/*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#pragma once
-#include <seastar/core/thread.hh>
-#include <seastar/core/timer.hh>
-#include <chrono>
-
-// Simple proportional controller to adjust shares of memtable/streaming flushes.
-//
-// Goal is to flush as fast as we can, but not so fast that we steal all the CPU from incoming
-// requests, and at the same time minimize user-visible fluctuations in the flush quota.
-//
-// What that translates to is we'll try to keep virtual dirty's firt derivative at 0 (IOW, we keep
-// virtual dirty constant), which means that the rate of incoming writes is equal to the rate of
-// flushed bytes.
-//
-// The exact point at which the controller stops determines the desired flush CPU usage. As we
-// approach the hard dirty limit, we need to be more aggressive. We will therefore define two
-// thresholds, and increase the constant as we cross them.
-//
-//  1) the soft limit line
-//  2) halfway between soft limit and dirty limit
-//
-// The constants q1 and q2 are used to determine the proportional factor at each stage.
-//
-// Below the soft limit, we are in no particular hurry to flush, since it means we're set to
-// complete flushing before we a new memtable is ready. The quota is dirty * q1, and q1 is set to a
-// low number.
-//
-// The first half of the virtual dirty region is where we expect to be usually, so we have a low
-// slope corresponding to a sluggish response between q1 * soft_limit and q2.
-//
-// In the second half, we're getting close to the hard dirty limit so we increase the slope and
-// become more responsive, up to a maximum quota of qmax.
-//
-// For now we'll just set them in the structure not to complicate the constructor. But q1, q2 and
-// qmax can easily become parameters if we find another user.
-class flush_cpu_controller {
-    static constexpr float hard_dirty_limit = 0.50;
-    static constexpr float q1 = 0.01;
-    static constexpr float q2 = 0.2;
-    static constexpr float qmax = 1;
-
-    float _current_quota = 0.0f;
-    float _goal;
-    std::function<float()> _current_dirty;
-    std::chrono::milliseconds _interval;
-    timer<> _update_timer;
-
-    seastar::thread_scheduling_group _scheduling_group;
-    seastar::thread_scheduling_group *_current_scheduling_group = nullptr;
-
-    void adjust();
-public:
-    seastar::thread_scheduling_group* scheduling_group() {
-        return _current_scheduling_group;
-    }
-    float current_quota() const {
-        return _current_quota;
-    }
-
-    struct disabled {
-        seastar::thread_scheduling_group *backup;
-    };
-    flush_cpu_controller(disabled d) : _scheduling_group(std::chrono::nanoseconds(0), 0), _current_scheduling_group(d.backup) {}
-    flush_cpu_controller(std::chrono::milliseconds interval, float soft_limit, std::function<float()> current_dirty);
-    flush_cpu_controller(flush_cpu_controller&&) = default;
-};
-
-
--- a/cql3/Cql.g
+++ b/cql3/Cql.g
@@ -56,13 +56,16 @@ options {
 #include "cql3/statements/index_prop_defs.hh"
 #include "cql3/statements/raw/use_statement.hh"
 #include "cql3/statements/raw/batch_statement.hh"
-#include "cql3/statements/create_user_statement.hh"
-#include "cql3/statements/alter_user_statement.hh"
-#include "cql3/statements/drop_user_statement.hh"
 #include "cql3/statements/list_users_statement.hh"
 #include "cql3/statements/grant_statement.hh"
 #include "cql3/statements/revoke_statement.hh"
 #include "cql3/statements/list_permissions_statement.hh"
+#include "cql3/statements/alter_role_statement.hh"
+#include "cql3/statements/list_roles_statement.hh"
+#include "cql3/statements/grant_role_statement.hh"
+#include "cql3/statements/revoke_role_statement.hh"
+#include "cql3/statements/drop_role_statement.hh"
+#include "cql3/statements/create_role_statement.hh"
 #include "cql3/statements/index_target.hh"
 #include "cql3/statements/ks_prop_defs.hh"
 #include "cql3/selection/raw_selector.hh"
@@ -80,6 +83,8 @@ options {
 #include "cql3/maps.hh"
 #include "cql3/sets.hh"
 #include "cql3/lists.hh"
+#include "cql3/role_name.hh"
+#include "cql3/role_options.hh"
 #include "cql3/type_cast.hh"
 #include "cql3/tuples.hh"
 #include "cql3/user_types.hh"
@@ -89,6 +94,7 @@ options {
 #include "core/sstring.hh"
 #include "CqlLexer.hpp"

+#include <algorithm>
 #include <unordered_map>
 #include <map>
 }
@@ -236,6 +242,12 @@ struct uninitialized {
        return res;
    }

+    bool convert_boolean_literal(stdx::string_view s) {
+        std::string lower_s(s.size(), '\0');
+        std::transform(s.cbegin(), s.cend(), lower_s.begin(), &::tolower);
+        return lower_s == "true";
+    }
+
    void add_raw_update(std::vector<std::pair<::shared_ptr<cql3::column_identifier::raw>,::shared_ptr<cql3::operation::raw_update>>>& operations,
        ::shared_ptr<cql3::column_identifier::raw> key, ::shared_ptr<cql3::operation::raw_update> update)
    {
@@ -345,6 +357,12 @@ cqlStatement returns [shared_ptr<raw::parsed_statement> stmt]
    | st32=createViewStatement         { $stmt = st32; }
    | st33=alterViewStatement          { $stmt = st33; }
    | st34=dropViewStatement           { $stmt = st34; }
+    | st35=listRolesStatement          { $stmt = st35; }
+    | st36=grantRoleStatement          { $stmt = st36; }
+    | st37=revokeRoleStatement         { $stmt = st37; }
+    | st38=dropRoleStatement           { $stmt = st38; }
+    | st39=createRoleStatement         { $stmt = st39; }
+    | st40=alterRoleStatement          { $stmt = st40; }
    ;

 /*
@@ -369,7 +387,6 @@ selectStatement returns [shared_ptr<raw::select_statement> expr]
    }
    : K_SELECT ( ( K_DISTINCT { is_distinct = true; } )?
                 sclause=selectClause
-               | sclause=selectCountClause
               )
      K_FROM cf=columnFamilyName
      ( K_WHERE wclause=whereClause )?
@@ -396,9 +413,11 @@ selector returns [shared_ptr<raw_selector> s]
 unaliasedSelector returns [shared_ptr<selectable::raw> s]
    @init { shared_ptr<selectable::raw> tmp; }
    :  ( c=cident                                  { tmp = c; }
+       | K_COUNT '(' countArgument ')'             { tmp = selectable::with_function::raw::make_count_rows_function(); }
       | K_WRITETIME '(' c=cident ')'              { tmp = make_shared<selectable::writetime_or_ttl::raw>(c, true); }
       | K_TTL       '(' c=cident ')'              { tmp = make_shared<selectable::writetime_or_ttl::raw>(c, false); }
       | f=functionName args=selectionFunctionArgs { tmp = ::make_shared<selectable::with_function::raw>(std::move(f), std::move(args)); }
+       | K_CAST      '(' arg=unaliasedSelector K_AS t=native_type ')'  { tmp = ::make_shared<selectable::with_cast::raw>(std::move(arg), std::move(t)); }
       )
       ( '.' fi=cident { tmp = make_shared<selectable::with_field_selection::raw>(std::move(tmp), std::move(fi)); } )*
    { $s = tmp; }
@@ -411,16 +430,6 @@ selectionFunctionArgs returns [std::vector<shared_ptr<selectable::raw>> a]
      ')'
    ;

-selectCountClause returns [std::vector<shared_ptr<raw_selector>> expr]
-    @init{ auto alias = make_shared<cql3::column_identifier>("count", false); }
-    : K_COUNT '(' countArgument ')' (K_AS c=ident { alias = c; })? {
-        auto&& with_fn = ::make_shared<cql3::selection::selectable::with_function::raw>(
-     	    cql3::functions::function_name::native_function("countRows"),
-     	        std::vector<shared_ptr<cql3::selection::selectable::raw>>()); 
-     	$expr.push_back(make_shared<cql3::selection::raw_selector>(with_fn, alias));
-     }
-    ;
-
 countArgument
    : '*'
    | i=INTEGER { if (i->getText() != "1") {
@@ -974,7 +983,7 @@ truncateStatement returns [::shared_ptr<truncate_statement> stmt]
    ;

 /**
- * GRANT <permission> ON <resource> TO <username>
+ * GRANT <permission> ON <resource> TO <grantee>
 */
 grantStatement returns [::shared_ptr<grant_statement> stmt]
    : K_GRANT
@@ -982,12 +991,12 @@ grantStatement returns [::shared_ptr<grant_statement> stmt]
      K_ON
          resource
      K_TO
-          username
-      { $stmt = ::make_shared<grant_statement>($permissionOrAll.perms, $resource.res, $username.text); } 
+          grantee=userOrRoleName
+      { $stmt = ::make_shared<grant_statement>($permissionOrAll.perms, $resource.res, std::move(grantee)); } 
    ;

 /**
- * REVOKE <permission> ON <resource> FROM <username>
+ * REVOKE <permission> ON <resource> FROM <revokee>
 */
 revokeStatement returns [::shared_ptr<revoke_statement> stmt]
    : K_REVOKE
@@ -995,80 +1004,104 @@ revokeStatement returns [::shared_ptr<revoke_statement> stmt]
      K_ON
          resource
      K_FROM
-          username
-      { $stmt = ::make_shared<revoke_statement>($permissionOrAll.perms, $resource.res, $username.text); } 
+          revokee=userOrRoleName
+      { $stmt = ::make_shared<revoke_statement>($permissionOrAll.perms, $resource.res, std::move(revokee)); } 
+    ;
+
+/**
+ * GRANT <rolename> to <grantee>
+ */
+grantRoleStatement returns [::shared_ptr<grant_role_statement> stmt]
+    : K_GRANT role=userOrRoleName K_TO grantee=userOrRoleName
+      { $stmt = ::make_shared<grant_role_statement>(std::move(role), std::move(grantee));  }
+    ;
+
+/**
+ * REVOKE <rolename> FROM <revokee>
+ */
+revokeRoleStatement returns [::shared_ptr<revoke_role_statement> stmt]
+    : K_REVOKE role=userOrRoleName K_FROM revokee=userOrRoleName
+      { $stmt = ::make_shared<revoke_role_statement>(std::move(role), std::move(revokee)); }
    ;

 listPermissionsStatement returns [::shared_ptr<list_permissions_statement> stmt]
    @init {
-		std::experimental::optional<auth::data_resource> r;
-		std::experimental::optional<sstring> u;
+		std::optional<auth::resource> r;
+		std::optional<sstring> role;
 		bool recursive = true;
    }
    : K_LIST
          permissionOrAll
      ( K_ON resource { r = $resource.res; } )?
-      ( K_OF username { u = sstring($username.text); } )?
+      ( K_OF rn=userOrRoleName { role = sstring(static_cast<cql3::role_name>(rn).to_string()); } )?
      ( K_NORECURSIVE { recursive = false; } )?
-      { $stmt = ::make_shared<list_permissions_statement>($permissionOrAll.perms, std::move(r), std::move(u), recursive); } 
+      { $stmt = ::make_shared<list_permissions_statement>($permissionOrAll.perms, std::move(r), std::move(role), recursive); } 
    ;

 permission returns [auth::permission perm]
-    : p=(K_CREATE | K_ALTER | K_DROP | K_SELECT | K_MODIFY | K_AUTHORIZE)
+    : p=(K_CREATE | K_ALTER | K_DROP | K_SELECT | K_MODIFY | K_AUTHORIZE | K_DESCRIBE)
    { $perm = auth::permissions::from_string($p.text); }
    ;

 permissionOrAll returns [auth::permission_set perms]
-    : K_ALL ( K_PERMISSIONS )?       { $perms = auth::permissions::ALL_DATA; }
+    : K_ALL ( K_PERMISSIONS )?       { $perms = auth::permissions::ALL; }
    | p=permission ( K_PERMISSION )? { $perms = auth::permission_set::from_mask(auth::permission_set::mask_for($p.perm)); }
    ;

-resource returns [auth::data_resource res]
-    : r=dataResource { $res = $r.res; }
+resource returns [uninitialized<auth::resource> res]
+    : d=dataResource { $res = std::move(d); }
+    | r=roleResource { $res = std::move(r); }
    ;

-dataResource returns [auth::data_resource res]
-    : K_ALL K_KEYSPACES { $res = auth::data_resource(); }
-    | K_KEYSPACE ks = keyspaceName { $res = auth::data_resource($ks.id); }
+dataResource returns [uninitialized<auth::resource> res]
+    : K_ALL K_KEYSPACES { $res = auth::resource(auth::resource_kind::data); }
+    | K_KEYSPACE ks = keyspaceName { $res = auth::make_data_resource($ks.id); }
    | ( K_COLUMNFAMILY )? cf = columnFamilyName
-      { $res = auth::data_resource($cf.name->get_keyspace(), $cf.name->get_column_family()); }
+      { $res = auth::make_data_resource($cf.name->get_keyspace(), $cf.name->get_column_family()); }
+    ;
+
+roleResource returns [uninitialized<auth::resource> res]
+    : K_ALL K_ROLES { $res = auth::resource(auth::resource_kind::role); }
+    | K_ROLE role = userOrRoleName { $res = auth::make_role_resource(static_cast<const cql3::role_name&>(role).to_string()); }
    ;

 /**
 * CREATE USER [IF NOT EXISTS] <username> [WITH PASSWORD <password>] [SUPERUSER|NOSUPERUSER]
 */
-createUserStatement returns [::shared_ptr<create_user_statement> stmt]
+createUserStatement returns [::shared_ptr<create_role_statement> stmt]
    @init {
-    	auto opts = ::make_shared<cql3::user_options>();
-        bool superuser = false;
+        cql3::role_options opts;
+        opts.is_superuser = false;
+        opts.can_login = true;
+
        bool ifNotExists = false;
    }
    : K_CREATE K_USER (K_IF K_NOT K_EXISTS { ifNotExists = true; })? username
-      ( K_WITH userOptions[opts] )?
-      ( K_SUPERUSER { superuser = true; } | K_NOSUPERUSER { superuser = false; } )?
-      { $stmt = ::make_shared<create_user_statement>($username.text, std::move(opts), superuser, ifNotExists); }
+      ( K_WITH K_PASSWORD v=STRING_LITERAL { opts.password = $v.text; })?
+      ( K_SUPERUSER { opts.is_superuser = true; } | K_NOSUPERUSER { opts.is_superuser = false; } )?
+      { $stmt = ::make_shared<create_role_statement>(cql3::role_name($username.text, cql3::preserve_role_case::yes), std::move(opts), ifNotExists); }
    ;

 /**
 * ALTER USER <username> [WITH PASSWORD <password>] [SUPERUSER|NOSUPERUSER]
 */
-alterUserStatement returns [::shared_ptr<alter_user_statement> stmt]
+alterUserStatement returns [::shared_ptr<alter_role_statement> stmt]
    @init {
-    	auto opts = ::make_shared<cql3::user_options>();
-    	std::experimental::optional<bool> superuser;
+        cql3::role_options opts;
    }
    : K_ALTER K_USER username
-      ( K_WITH userOptions[opts] )?
-      ( K_SUPERUSER { superuser = true; } | K_NOSUPERUSER { superuser = false; } )?
-      { $stmt = ::make_shared<alter_user_statement>($username.text, std::move(opts), std::move(superuser)); }
+      ( K_WITH K_PASSWORD v=STRING_LITERAL { opts.password = $v.text; })?
+      ( K_SUPERUSER { opts.is_superuser = true; } | K_NOSUPERUSER { opts.is_superuser = false; } )?
+      { $stmt = ::make_shared<alter_role_statement>(cql3::role_name($username.text, cql3::preserve_role_case::yes), std::move(opts)); }
    ;

 /**
 * DROP USER [IF EXISTS] <username>
 */
-dropUserStatement returns [::shared_ptr<drop_user_statement> stmt]
+dropUserStatement returns [::shared_ptr<drop_role_statement> stmt]
    @init { bool ifExists = false; }
-    : K_DROP K_USER (K_IF K_EXISTS { ifExists = true; })? username { $stmt = ::make_shared<drop_user_statement>($username.text, ifExists); }
+    : K_DROP K_USER (K_IF K_EXISTS { ifExists = true; })? username
+      { $stmt = ::make_shared<drop_role_statement>(cql3::role_name($username.text, cql3::preserve_role_case::yes), ifExists); }
    ;

 /**
@@ -1078,12 +1111,67 @@ listUsersStatement returns [::shared_ptr<list_users_statement> stmt]
    : K_LIST K_USERS { $stmt = ::make_shared<list_users_statement>(); }
    ;

-userOptions[::shared_ptr<cql3::user_options> opts]
-    : userOption[opts]
+/**
+ * CREATE ROLE [IF NOT EXISTS] <role_name> [WITH <roleOption> [AND <roleOption>]*]
+ */
+createRoleStatement returns [::shared_ptr<create_role_statement> stmt]
+    @init {
+        cql3::role_options opts;
+        opts.is_superuser = false;
+        opts.can_login = false;
+        bool if_not_exists = false;
+    }
+    : K_CREATE K_ROLE (K_IF K_NOT K_EXISTS { if_not_exists = true; })? name=userOrRoleName
+      (K_WITH roleOptions[opts])?
+      { $stmt = ::make_shared<create_role_statement>(name, std::move(opts), if_not_exists); }
    ;

-userOption[::shared_ptr<cql3::user_options> opts]
-    : k=K_PASSWORD v=STRING_LITERAL { opts->put($k.text, $v.text); }
+/**
+ * ALTER ROLE <rolename> [WITH <roleOption> [AND <roleOption>]*]
+ */
+alterRoleStatement returns [::shared_ptr<alter_role_statement> stmt]
+    @init {
+        cql3::role_options opts;
+    }
+    : K_ALTER K_ROLE name=userOrRoleName
+      (K_WITH roleOptions[opts])?
+      { $stmt = ::make_shared<alter_role_statement>(name, std::move(opts)); }
+    ;
+
+/**
+ * DROP ROLE [IF EXISTS] <rolename>
+ */
+dropRoleStatement returns [::shared_ptr<drop_role_statement> stmt]
+    @init {
+        bool if_exists = false;
+    }
+    : K_DROP K_ROLE (K_IF K_EXISTS { if_exists = true; })? name=userOrRoleName
+      { $stmt = ::make_shared<drop_role_statement>(name, if_exists); }
+    ;
+
+/**
+ * LIST ROLES [OF <rolename>] [NORECURSIVE]
+ */
+listRolesStatement returns [::shared_ptr<list_roles_statement> stmt]
+    @init {
+        bool recursive = true;
+        std::optional<cql3::role_name> grantee;
+    }
+    : K_LIST K_ROLES
+        (K_OF g=userOrRoleName { grantee = std::move(g); })?
+        (K_NORECURSIVE { recursive = false; })?
+        { $stmt = ::make_shared<list_roles_statement>(grantee, recursive); }
+    ;
+
+roleOptions[cql3::role_options& opts]
+    : roleOption[opts] (K_AND roleOption[opts])*
+    ;
+
+roleOption[cql3::role_options& opts]
+    : K_PASSWORD '=' v=STRING_LITERAL { opts.password = $v.text; }
+    | K_OPTIONS '=' m=mapLiteral { opts.options = convert_property_map(m); }
+    | K_SUPERUSER '=' b=BOOLEAN { opts.is_superuser = convert_boolean_literal($b.text); }
+    | K_LOGIN '=' b=BOOLEAN { opts.can_login = convert_boolean_literal($b.text); }
    ;

 /** DEFINITIONS **/
@@ -1124,12 +1212,13 @@ userTypeName returns [uninitialized<cql3::ut_name> name]
    : (ks=ident '.')? ut=non_type_ident { $name = cql3::ut_name(ks, ut); }
    ;

-#if 0
-userOrRoleName returns [RoleName name]
-    @init { $name = new RoleName(); }
-    : roleName[name] {return $name;}
+userOrRoleName returns [uninitialized<cql3::role_name> name]
+    : t=IDENT              { $name = cql3::role_name($t.text, cql3::preserve_role_case::no); }
+    | t=STRING_LITERAL     { $name = cql3::role_name($t.text, cql3::preserve_role_case::yes); }
+    | t=QUOTED_NAME        { $name = cql3::role_name($t.text, cql3::preserve_role_case::yes); }
+    | k=unreserved_keyword { $name = cql3::role_name(k, cql3::preserve_role_case::no); }
+    | QMARK {add_recognition_error("Bind variables cannot be used for role names");}
    ;
-#endif

 ksName[::shared_ptr<cql3::keyspace_element_name> name]
    : t=IDENT              { $name->set_keyspace($t.text, false);}
@@ -1152,21 +1241,13 @@ idxName[::shared_ptr<cql3::index_name> name]
    | QMARK {add_recognition_error("Bind variables cannot be used for index names");}
    ;

-#if 0
-roleName[RoleName name]
-    : t=IDENT              { $name.setName($t.text, false); }
-    | t=QUOTED_NAME        { $name.setName($t.text, true); }
-    | k=unreserved_keyword { $name.setName(k, false); }
-    | QMARK {addRecognitionError("Bind variables cannot be used for role names");}
-    ;
-#endif
-
 constant returns [shared_ptr<cql3::constants::literal> constant]
    @init{std::string sign;}
    : t=STRING_LITERAL { $constant = cql3::constants::literal::string(sstring{$t.text}); }
    | t=INTEGER        { $constant = cql3::constants::literal::integer(sstring{$t.text}); }
    | t=FLOAT          { $constant = cql3::constants::literal::floating_point(sstring{$t.text}); }
    | t=BOOLEAN        { $constant = cql3::constants::literal::bool_(sstring{$t.text}); }
+    | t=DURATION       { $constant = cql3::constants::literal::duration(sstring{$t.text}); }
    | t=UUID           { $constant = cql3::constants::literal::uuid(sstring{$t.text}); }
    | t=HEXNUMBER      { $constant = cql3::constants::literal::hex(sstring{$t.text}); }
    | { sign=""; } ('-' {sign = "-"; } )? t=(K_NAN | K_INFINITY) { $constant = cql3::constants::literal::floating_point(sstring{sign + $t.text}); }
@@ -1464,6 +1545,7 @@ native_type returns [shared_ptr<cql3_type> t]
    | K_COUNTER   { $t = cql3_type::counter; }
    | K_DECIMAL   { $t = cql3_type::decimal; }
    | K_DOUBLE    { $t = cql3_type::double_; }
+    | K_DURATION  { $t = cql3_type::duration; }
    | K_FLOAT     { $t = cql3_type::float_; }
    | K_INET      { $t = cql3_type::inet; }
    | K_INT       { $t = cql3_type::int_; }
@@ -1503,6 +1585,7 @@ tuple_type returns [shared_ptr<cql3::cql3_type::raw> t]
 username
    : IDENT
    | STRING_LITERAL
+    | QUOTED_NAME { add_recognition_error("Quoted strings are not supported for user names"); }
    ;

 // Basically the same as cident, but we need to exlude existing CQL3 types
@@ -1541,8 +1624,13 @@ basic_unreserved_keyword returns [sstring str]
        | K_ALL
        | K_USER
        | K_USERS
+        | K_ROLE
+        | K_ROLES
        | K_SUPERUSER
        | K_NOSUPERUSER
+        | K_LOGIN
+        | K_NOLOGIN
+        | K_OPTIONS
        | K_PASSWORD
        | K_EXISTS
        | K_CUSTOM
@@ -1569,6 +1657,7 @@ basic_unreserved_keyword returns [sstring str]
 K_SELECT:      S E L E C T;
 K_FROM:        F R O M;
 K_AS:          A S;
+K_CAST:        C A S T;
 K_WHERE:       W H E R E;
 K_AND:         A N D;
 K_KEY:         K E Y;
@@ -1633,13 +1722,19 @@ K_OF:          O F;
 K_REVOKE:      R E V O K E;
 K_MODIFY:      M O D I F Y;
 K_AUTHORIZE:   A U T H O R I Z E;
+K_DESCRIBE:    D E S C R I B E;
 K_NORECURSIVE: N O R E C U R S I V E;

 K_USER:        U S E R;
 K_USERS:       U S E R S;
+K_ROLE:        R O L E;
+K_ROLES:       R O L E S;
 K_SUPERUSER:   S U P E R U S E R;
 K_NOSUPERUSER: N O S U P E R U S E R;
 K_PASSWORD:    P A S S W O R D;
+K_LOGIN:       L O G I N;
+K_NOLOGIN:     N O L O G I N;
+K_OPTIONS:     O P T I O N S;

 K_CLUSTERING:  C L U S T E R I N G;
 K_ASCII:       A S C I I;
@@ -1649,6 +1744,7 @@ K_BOOLEAN:     B O O L E A N;
 K_COUNTER:     C O U N T E R;
 K_DECIMAL:     D E C I M A L;
 K_DOUBLE:      D O U B L E;
+K_DURATION:    D U R A T I O N;
 K_FLOAT:       F L O A T;
 K_INET:        I N E T;
 K_INT:         I N T;
@@ -1778,6 +1874,20 @@ fragment EXPONENT
    : E ('+' | '-')? DIGIT+
    ;

+fragment DURATION_UNIT
+    : Y
+    | M O
+    | W
+    | D
+    | H
+    | M
+    | S
+    | M S
+    | U S
+    | '\u00B5' S
+    | N S
+    ;
+
 INTEGER
    : '-'? DIGIT+
    ;
@@ -1802,6 +1912,13 @@ BOOLEAN
    : T R U E | F A L S E
    ;

+DURATION
+    : '-'? DIGIT+ DURATION_UNIT (DIGIT+ DURATION_UNIT)*
+    | '-'? 'P' (DIGIT+ 'Y')? (DIGIT+ 'M')? (DIGIT+ 'D')? ('T' (DIGIT+ 'H')? (DIGIT+ 'M')? (DIGIT+ 'S')?)? // ISO 8601 "format with designators"
+    | '-'? 'P' DIGIT+ 'W'
+    | '-'? 'P' DIGIT DIGIT DIGIT DIGIT '-' DIGIT DIGIT '-' DIGIT DIGIT 'T' DIGIT DIGIT ':' DIGIT DIGIT ':' DIGIT DIGIT // ISO 8601 "alternative format"
+    ;
+
 IDENT
    : LETTER (LETTER | DIGIT | '_')*
    ;
--- a/cql3/abstract_marker.cc
+++ b/cql3/abstract_marker.cc
@@ -79,6 +79,7 @@ abstract_marker::raw::raw(int32_t bind_index)
        return ::make_shared<maps::marker>(_bind_index, receiver);
    }
    assert(0);
+    return shared_ptr<term>();
 }

 assignment_testable::test_result abstract_marker::raw::test_assignment(database& db, const sstring& keyspace, ::shared_ptr<column_specification> receiver) {
--- a/cql3/attributes.cc
+++ b/cql3/attributes.cc
@@ -79,7 +79,7 @@ int64_t attributes::get_timestamp(int64_t now, const query_options& options) {
    }
    try {
        data_type_for<int64_t>()->validate(*tval);
-    } catch (marshal_exception e) {
+    } catch (marshal_exception& e) {
        throw exceptions::invalid_request_exception("Invalid timestamp value");
    }
    return value_cast<int64_t>(data_type_for<int64_t>()->deserialize(*tval));
@@ -99,7 +99,7 @@ int32_t attributes::get_time_to_live(const query_options& options) {
    try {
        data_type_for<int32_t>()->validate(*tval);
    }
-    catch (marshal_exception e) {
+    catch (marshal_exception& e) {
        throw exceptions::invalid_request_exception("Invalid TTL value");
    }

--- a/cql3/column_condition.cc
+++ b/cql3/column_condition.cc
@@ -40,11 +40,29 @@
 */

 #include "cql3/column_condition.hh"
+#include "statements/request_validations.hh"
 #include "unimplemented.hh"
 #include "lists.hh"
 #include "maps.hh"
 #include <boost/range/algorithm_ext/push_back.hpp>

+namespace {
+
+void validate_operation_on_durations(const abstract_type& type, const cql3::operator_type& op) {
+    using cql3::statements::request_validations::check_false;
+
+    if (op.is_slice() && type.references_duration()) {
+        check_false(type.is_collection(), "Slice conditions are not supported on collections containing durations");
+        check_false(type.is_tuple(), "Slice conditions are not supported on tuples containing durations");
+        check_false(type.is_user_type(), "Slice conditions are not supported on UDTs containing durations");
+
+        // We're a duration.
+        throw exceptions::invalid_request_exception(sprint("Slice conditions are not supported on durations"));
+    }
+}
+
+}
+
 namespace cql3 {

 bool
@@ -95,6 +113,7 @@ column_condition::raw::prepare(database& db, const sstring& keyspace, const colu
            }
            return column_condition::in_condition(receiver, std::move(terms));
        } else {
+            validate_operation_on_durations(*receiver.type, _op);
            return column_condition::condition(receiver, _value->prepare(db, keyspace, receiver.column_specification), _op);
        }
    }
@@ -129,6 +148,8 @@ column_condition::raw::prepare(database& db, const sstring& keyspace, const colu
                                | boost::adaptors::transformed(std::bind(&term::raw::prepare, std::placeholders::_1, std::ref(db), std::ref(keyspace), value_spec)));
        return column_condition::in_condition(receiver, _collection_element->prepare(db, keyspace, element_spec), terms);
    } else {
+        validate_operation_on_durations(*receiver.type, _op);
+
        return column_condition::condition(receiver,
                _collection_element->prepare(db, keyspace, element_spec),
                _value->prepare(db, keyspace, value_spec),
--- a/cql3/constants.cc
+++ b/cql3/constants.cc
@@ -52,14 +52,15 @@ std::ostream&
 operator<<(std::ostream&out, constants::type t)
 {
    switch (t) {
-        case constants::type::STRING:  return out << "STRING";
-        case constants::type::INTEGER: return out << "INTEGER";
-        case constants::type::UUID:    return out << "UUID";
-        case constants::type::FLOAT:   return out << "FLOAT";
-        case constants::type::BOOLEAN: return out << "BOOLEAN";
-        case constants::type::HEX:     return out << "HEX";
-    };
-    assert(0);
+        case constants::type::STRING:   return out << "STRING";
+        case constants::type::INTEGER:  return out << "INTEGER";
+        case constants::type::UUID:     return out << "UUID";
+        case constants::type::FLOAT:    return out << "FLOAT";
+        case constants::type::BOOLEAN:  return out << "BOOLEAN";
+        case constants::type::HEX:      return out << "HEX";
+        case constants::type::DURATION: return out << "DURATION";
+    }
+    abort();
 }

 bytes
@@ -145,6 +146,11 @@ constants::literal::test_assignment(database& db, const sstring& keyspace, ::sha
                return assignment_testable::test_result::WEAKLY_ASSIGNABLE;
            }
            break;
+        case type::DURATION:
+            if (kind == cql3_type::kind_enum_set::prepare<cql3_type::kind::DURATION>()) {
+                return assignment_testable::test_result::EXACT_MATCH;
+            }
+            break;
    }
    return assignment_testable::test_result::NOT_ASSIGNABLE;
 }
--- a/cql3/constants.hh
+++ b/cql3/constants.hh
@@ -60,7 +60,7 @@ public:
 #endif
 public:
    enum class type {
-        STRING, INTEGER, UUID, FLOAT, BOOLEAN, HEX
+        STRING, INTEGER, UUID, FLOAT, BOOLEAN, HEX, DURATION
    };

    /**
@@ -123,7 +123,7 @@ public:
            // This is a workaround for antlr3 not distinguishing between
            // calling in lexer setText() with an empty string and not calling
            // setText() at all.
-            if (text.size() == 1 && text[0] == -1) {
+            if (text.size() == 1 && text[0] == '\xFF') {
                text.reset();
            }
            return ::make_shared<literal>(type::STRING, text);
@@ -149,6 +149,10 @@ public:
            return ::make_shared<literal>(type::HEX, text);
        }

+        static ::shared_ptr<literal> duration(sstring text) {
+            return ::make_shared<literal>(type::DURATION, text);
+        }
+
        virtual ::shared_ptr<term> prepare(database& db, const sstring& keyspace, ::shared_ptr<column_specification> receiver);
    private:
        bytes parsed_value(data_type validator);
--- a/cql3/cql3_type.cc
+++ b/cql3/cql3_type.cc
@@ -48,6 +48,10 @@ shared_ptr<cql3_type> cql3_type::raw::prepare(database& db, const sstring& keysp
    }
 }

+bool cql3_type::raw::is_duration() const {
+    return false;
+}
+
 bool cql3_type::raw::references_user_type(const sstring& name) const {
    return false;
 }
@@ -78,6 +82,10 @@ public:
    virtual sstring to_string() const {
        return _type->to_string();
    }
+
+    virtual bool is_duration() const override {
+        return _type->get_type()->equals(duration_type);
+    }
 };

 class cql3_type::raw_collection : public raw {
@@ -126,9 +134,15 @@ public:
        if (_kind == &collection_type_impl::kind::list) {
            return make_shared(cql3_type(to_string(), list_type_impl::get_instance(_values->prepare_internal(keyspace, user_types)->get_type(), !_frozen), false));
        } else if (_kind == &collection_type_impl::kind::set) {
+            if (_values->is_duration()) {
+                throw exceptions::invalid_request_exception(sprint("Durations are not allowed inside sets: %s", *this));
+            }
            return make_shared(cql3_type(to_string(), set_type_impl::get_instance(_values->prepare_internal(keyspace, user_types)->get_type(), !_frozen), false));
        } else if (_kind == &collection_type_impl::kind::map) {
            assert(_keys); // "Got null keys type for a collection";
+            if (_keys->is_duration()) {
+                throw exceptions::invalid_request_exception(sprint("Durations are not allowed as map keys: %s", *this));
+            }
            return make_shared(cql3_type(to_string(), map_type_impl::get_instance(_keys->prepare_internal(keyspace, user_types)->get_type(), _values->prepare_internal(keyspace, user_types)->get_type(), !_frozen), false));
        }
        abort();
@@ -138,6 +152,10 @@ public:
        return (_keys && _keys->references_user_type(name)) || _values->references_user_type(name);
    }

+    bool is_duration() const override {
+        return false;
+    }
+
    virtual sstring to_string() const override {
        sstring start = _frozen ? "frozen<" : "";
        sstring end = _frozen ? ">" : "";
@@ -329,6 +347,7 @@ thread_local shared_ptr<cql3_type> cql3_type::inet = make("inet", inet_addr_type
 thread_local shared_ptr<cql3_type> cql3_type::varint = make("varint", varint_type, cql3_type::kind::VARINT);
 thread_local shared_ptr<cql3_type> cql3_type::decimal = make("decimal", decimal_type, cql3_type::kind::DECIMAL);
 thread_local shared_ptr<cql3_type> cql3_type::counter = make("counter", counter_type, cql3_type::kind::COUNTER);
+thread_local shared_ptr<cql3_type> cql3_type::duration = make("duration", duration_type, cql3_type::kind::DURATION);

 const std::vector<shared_ptr<cql3_type>>&
 cql3_type::values() {
@@ -354,6 +373,7 @@ cql3_type::values() {
        cql3_type::timeuuid,
        cql3_type::date,
        cql3_type::time,
+        cql3_type::duration,
    };
    return v;
 }
--- a/cql3/cql3_type.hh
+++ b/cql3/cql3_type.hh
@@ -75,6 +75,7 @@ public:
        virtual bool supports_freezing() const = 0;
        virtual bool is_collection() const;
        virtual bool is_counter() const;
+        virtual bool is_duration() const;
        virtual bool references_user_type(const sstring&) const;
        virtual std::experimental::optional<sstring> keyspace() const;
        virtual void freeze();
@@ -102,7 +103,7 @@ private:

 public:
    enum class kind : int8_t {
-        ASCII, BIGINT, BLOB, BOOLEAN, COUNTER, DECIMAL, DOUBLE, EMPTY, FLOAT, INT, SMALLINT, TINYINT, INET, TEXT, TIMESTAMP, UUID, VARCHAR, VARINT, TIMEUUID, DATE, TIME
+        ASCII, BIGINT, BLOB, BOOLEAN, COUNTER, DECIMAL, DOUBLE, EMPTY, FLOAT, INT, SMALLINT, TINYINT, INET, TEXT, TIMESTAMP, UUID, VARCHAR, VARINT, TIMEUUID, DATE, TIME, DURATION
    };
    using kind_enum = super_enum<kind,
        kind::ASCII,
@@ -125,7 +126,8 @@ public:
        kind::VARINT,
        kind::TIMEUUID,
        kind::DATE,
-        kind::TIME>;
+        kind::TIME,
+        kind::DURATION>;
    using kind_enum_set = enum_set<kind_enum>;
 private:
    std::experimental::optional<kind_enum_set::prepared> _kind;
@@ -154,6 +156,7 @@ public:
    static thread_local shared_ptr<cql3_type> varint;
    static thread_local shared_ptr<cql3_type> decimal;
    static thread_local shared_ptr<cql3_type> counter;
+    static thread_local shared_ptr<cql3_type> duration;

    static const std::vector<shared_ptr<cql3_type>>& values();
 public:
--- a/cql3/error_collector.hh
+++ b/cql3/error_collector.hh
@@ -68,9 +68,11 @@ class error_collector : public error_listener<RecognizerType, ExceptionBaseType>
    const sstring_view _query;

    /**
-     * The error messages.
+     * An empty bitset to be used as a workaround for AntLR null dereference
+     * bug.
     */
-    std::vector<sstring> _error_msgs;
+    static typename ExceptionBaseType::BitsetListType _empty_bit_list;
+
 public:

    /**
@@ -81,7 +83,10 @@ public:
     */
    error_collector(const sstring_view& query) : _query(query) {}

-    virtual void syntax_error(RecognizerType& recognizer, ANTLR_UINT8** token_names, ExceptionBaseType* ex) override {
+    /**
+     * Format and throw a new \c exceptions::syntax_exception.
+     */
+    [[noreturn]] virtual void syntax_error(RecognizerType& recognizer, ANTLR_UINT8** token_names, ExceptionBaseType* ex) override {
        auto hdr = get_error_header(ex);
        auto msg = get_error_message(recognizer, ex, token_names);
        std::stringstream result;
@@ -90,22 +95,15 @@ public:
        if (recognizer instanceof Parser)
            appendQuerySnippet((Parser) recognizer, builder);
 #endif
-        _error_msgs.emplace_back(result.str());
-    }

-    virtual void syntax_error(RecognizerType& recognizer, const sstring& msg) override {
-        _error_msgs.emplace_back(msg);
+        throw exceptions::syntax_exception(result.str());
    }

    /**
-     * Throws the first syntax error found by the lexer or the parser if it exists.
-     *
-     * @throws SyntaxException the syntax error.
+     * Throw a new \c exceptions::syntax_exception.
     */
-    void throw_first_syntax_error() {
-        if (!_error_msgs.empty()) {
-            throw exceptions::syntax_exception(_error_msgs[0]);
-        }
+    [[noreturn]] virtual void syntax_error(RecognizerType&, const sstring& msg) override {
+        throw exceptions::syntax_exception(msg);
    }

 private:
@@ -152,6 +150,14 @@ private:
            break;
        }
        default:
+            // AntLR Exception class has a bug of dereferencing a null
+            // pointer in the displayRecognitionError. The following
+            // if statement makes sure it will not be null before the
+            // call to that function (displayRecognitionError).
+            // bug reference: https://github.com/antlr/antlr3/issues/191
+            if (!ex->get_expectingSet()) {
+                ex->set_expectingSet(&_empty_bit_list);
+            }
            ex->displayRecognitionError(token_names, msg);
        }
        return msg.str();
@@ -353,4 +359,8 @@ private:
 #endif
 };

+template<typename RecognizerType, typename TokenType, typename ExceptionBaseType>
+typename ExceptionBaseType::BitsetListType
+error_collector<RecognizerType,TokenType,ExceptionBaseType>::_empty_bit_list = typename ExceptionBaseType::BitsetListType();
+
 }
--- a/cql3/error_listener.hh
+++ b/cql3/error_listener.hh
@@ -53,6 +53,7 @@ namespace cql3 {
 template<typename RecognizerType, typename ExceptionBaseType>
 class error_listener {
 public:
+    virtual ~error_listener() = default;

    /**
     * Invoked when a syntax error occurs.
--- a/cql3/functions/abstract_function.hh
+++ b/cql3/functions/abstract_function.hh
@@ -90,6 +90,10 @@ public:
        return false;
    }

+    virtual sstring column_name(const std::vector<sstring>& column_names) override {
+        return sprint("%s(%s)", _name, join(", ", column_names));
+    }
+
    virtual void print(std::ostream& os) const override;
 };

--- a/cql3/functions/aggregate_fcts.hh
+++ b/cql3/functions/aggregate_fcts.hh
@@ -41,6 +41,7 @@

 #pragma once

+#include "utils/big_decimal.hh"
 #include "aggregate_function.hh"
 #include "native_aggregate_function.hh"

@@ -66,6 +67,19 @@ public:
    }
 };

+static const sstring COUNT_ROWS_FUNCTION_NAME = "countRows";
+
+class count_rows_function final : public native_aggregate_function {
+public:
+    count_rows_function() : native_aggregate_function(COUNT_ROWS_FUNCTION_NAME, long_type, {}) {}
+    virtual std::unique_ptr<aggregate> new_aggregate() override {
+        return std::make_unique<impl_count_function>();
+    }
+    virtual sstring column_name(const std::vector<sstring>& column_names) override {
+        return "count";
+    }
+};
+
    /**
     * The function used to count the number of rows of a result set. This function is called when COUNT(*) or COUNT(1)
     * is specified.
@@ -73,7 +87,7 @@ public:
 inline
 shared_ptr<aggregate_function>
 make_count_rows_function() {
-    return make_native_aggregate_function_using<impl_count_function>("countRows", long_type);
+    return make_shared<count_rows_function>();
 }

 template <typename Type>
@@ -111,9 +125,70 @@ make_sum_function() {
    return make_shared<sum_function_for<Type>>();
 }

+template <typename Type>
+class impl_div_for_avg {
+public:
+    static Type div(const Type& x, const int64_t y) {
+        return x/y;
+    }
+};
+
+template <>
+class impl_div_for_avg<big_decimal> {
+public:
+    static big_decimal div(const big_decimal& x, const int64_t y) {
+        return x.div(y, big_decimal::rounding_mode::HALF_EVEN);
+    }
+};
+
+// We need a wider accumulator for average, since summing the inputs can overflow
+// the input type
+template <typename T>
+struct accumulator_for;
+
+template <>
+struct accumulator_for<int8_t> {
+    using type = __int128;
+};
+
+template <>
+struct accumulator_for<int16_t> {
+    using type = __int128;
+};
+
+template <>
+struct accumulator_for<int32_t> {
+    using type = __int128;
+};
+
+template <>
+struct accumulator_for<int64_t> {
+    using type = __int128;
+};
+
+template <>
+struct accumulator_for<float> {
+    using type = float;
+};
+
+template <>
+struct accumulator_for<double> {
+    using type = double;
+};
+
+template <>
+struct accumulator_for<boost::multiprecision::cpp_int> {
+    using type = boost::multiprecision::cpp_int;
+};
+
+template <>
+struct accumulator_for<big_decimal> {
+    using type = big_decimal;
+};
+
 template <typename Type>
 class impl_avg_function_for final : public aggregate_function::aggregate {
-   Type _sum{};
+   typename accumulator_for<Type>::type _sum{};
   int64_t _count = 0;
 public:
    virtual void reset() override {
@@ -121,9 +196,9 @@ public:
        _count = 0;
    }
    virtual opt_bytes compute(cql_serialization_format sf) override {
-        Type ret = 0;
+        Type ret{};
        if (_count) {
-            ret = _sum / _count;
+            ret = impl_div_for_avg<Type>::div(_sum, _count);
        }
        return data_type_for<Type>()->decompose(ret);
    }
@@ -152,9 +227,29 @@ make_avg_function() {
    return make_shared<avg_function_for<Type>>();
 }

+template <typename T>
+struct aggregate_type_for {
+    using type = T;
+};
+
+template<>
+struct aggregate_type_for<simple_date_native_type> {
+    using type = simple_date_native_type::primary_type;
+};
+
+template<>
+struct aggregate_type_for<timestamp_native_type> {
+    using type = timestamp_native_type::primary_type;
+};
+
+template<>
+struct aggregate_type_for<timeuuid_native_type> {
+    using type = timeuuid_native_type::primary_type;
+};
+
 template <typename Type>
 class impl_max_function_for final : public aggregate_function::aggregate {
-   std::experimental::optional<Type> _max{};
+   std::experimental::optional<typename aggregate_type_for<Type>::type> _max{};
 public:
    virtual void reset() override {
        _max = {};
@@ -163,13 +258,13 @@ public:
        if (!_max) {
            return {};
        }
-        return data_type_for<Type>()->decompose(*_max);
+        return data_type_for<Type>()->decompose(Type{*_max});
    }
    virtual void add_input(cql_serialization_format sf, const std::vector<opt_bytes>& values) override {
        if (!values[0]) {
            return;
        }
-        auto val = value_cast<Type>(data_type_for<Type>()->deserialize(*values[0]));
+        auto val = value_cast<typename aggregate_type_for<Type>::type>(data_type_for<Type>()->deserialize(*values[0]));
        if (!_max) {
            _max = val;
        } else {
@@ -201,7 +296,7 @@ make_max_function() {

 template <typename Type>
 class impl_min_function_for final : public aggregate_function::aggregate {
-   std::experimental::optional<Type> _min{};
+   std::experimental::optional<typename aggregate_type_for<Type>::type> _min{};
 public:
    virtual void reset() override {
        _min = {};
@@ -210,13 +305,13 @@ public:
        if (!_min) {
            return {};
        }
-        return data_type_for<Type>()->decompose(*_min);
+        return data_type_for<Type>()->decompose(Type{*_min});
    }
    virtual void add_input(cql_serialization_format sf, const std::vector<opt_bytes>& values) override {
        if (!values[0]) {
            return;
        }
-        auto val = value_cast<Type>(data_type_for<Type>()->deserialize(*values[0]));
+        auto val = value_cast<typename aggregate_type_for<Type>::type>(data_type_for<Type>()->deserialize(*values[0]));
        if (!_min) {
            _min = val;
        } else {
--- a/cql3/functions/castas_fcts.cc
+++ b/cql3/functions/castas_fcts.cc
@@ -0,0 +1,82 @@
+/*
+ * Copyright (C) 2017 ScyllaDB
+ */
+
+/*
+ * This file is part of Scylla.
+ *
+ * Scylla is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Scylla is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "castas_fcts.hh"
+#include "cql3/functions/native_scalar_function.hh"
+
+namespace cql3 {
+namespace functions {
+
+namespace {
+
+using bytes_opt = std::experimental::optional<bytes>;
+
+class castas_function_for : public cql3::functions::native_scalar_function {
+    castas_fctn _func;
+public:
+    castas_function_for(data_type to_type,
+                        data_type from_type,
+                        castas_fctn func)
+            : native_scalar_function("castas" + to_type->as_cql3_type()->to_string(), to_type, {from_type})
+            , _func(func) {
+    }
+    virtual bool is_pure() override {
+        return true;
+    }
+    virtual void print(std::ostream& os) const override {
+        os << "cast(" << _arg_types[0]->name() << " as " << _return_type->name() << ")";
+    }
+    virtual bytes_opt execute(cql_serialization_format sf, const std::vector<bytes_opt>& parameters) override {
+        auto from_type = arg_types()[0];
+        auto to_type = return_type();
+
+        auto&& val = parameters[0];
+        if (!val) {
+            return val;
+        }
+        auto val_from = from_type->deserialize(*val);
+        auto val_to = _func(val_from);
+        return to_type->decompose(val_to);
+    }
+};
+
+shared_ptr<function> make_castas_function(data_type to_type, data_type from_type, castas_fctn func) {
+    return ::make_shared<castas_function_for>(std::move(to_type), std::move(from_type), std::move(func));
+}
+
+} /* Anonymous Namespace */
+
+shared_ptr<function> castas_functions::get(data_type to_type, const std::vector<shared_ptr<cql3::selection::selector>>& provided_args, schema_ptr s) {
+    if (provided_args.size() != 1) {
+        throw exceptions::invalid_request_exception("Invalid CAST expression");
+    }
+    auto from_type = provided_args[0]->get_type();
+    auto from_type_key = from_type;
+    if (from_type_key->is_reversed()) {
+        from_type_key = dynamic_cast<const reversed_type_impl&>(*from_type).underlying_type();
+    }
+
+    auto f = get_castas_fctn(to_type, from_type_key);
+    return make_castas_function(to_type, from_type, f);
+}
+
+}
+}
--- a/cql3/functions/castas_fcts.hh
+++ b/cql3/functions/castas_fcts.hh
@@ -15,10 +15,11 @@
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
+
 /*
 * Modified by ScyllaDB
 *
- * Copyright 2016 ScyllaDB
+ * Copyright (C) 2017 ScyllaDB
 */

 /*
@@ -38,26 +39,25 @@
 * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
 */

-#include <string.h>
+#pragma once

-#include <boost/range/adaptor/map.hpp>
+#include <tuple>
+#include <unordered_map>

-#include "auth/authenticator.hh"
-#include "user_options.hh"
+#include "cql3/functions/function.hh"
+#include "cql3/functions/abstract_function.hh"
+#include "exceptions/exceptions.hh"
+#include "core/print.hh"
+#include "cql3/cql3_type.hh"
+#include "cql3/selection/selector.hh"
+
+namespace cql3 {
+namespace functions {
+
+class castas_functions {
+public:
+    static shared_ptr<function> get(data_type to_type, const std::vector<shared_ptr<cql3::selection::selector>>& provided_args, schema_ptr s);
+};

-void cql3::user_options::put(const sstring& name, const sstring& value) {
-    _options[auth::authenticator::string_to_option(name)] = value;
 }
-
-void cql3::user_options::validate() const {
-    auto& a = auth::authenticator::get();
-    for (auto o : _options | boost::adaptors::map_keys) {
-        if (!a.supported_options().contains(o)) {
-            throw exceptions::invalid_request_exception(
-                            sprint("%s doesn't support %s option",
-                                            a.class_name(),
-                                            a.option_to_string(o)));
-        }
-    }
 }
-
--- a/cql3/functions/function.hh
+++ b/cql3/functions/function.hh
@@ -81,6 +81,15 @@ public:
    virtual void print(std::ostream& os) const = 0;
    virtual bool uses_function(const sstring& ks_name, const sstring& function_name) = 0;
    virtual bool has_reference_to(function& f) = 0;
+
+    /**
+     * Returns the name of the function to use within a ResultSet.
+     *
+     * @param column_names the names of the columns used to call the function
+     * @return the name of the function to use within a ResultSet
+     */
+    virtual sstring column_name(const std::vector<sstring>& column_names) = 0;
+
    friend class function_call;
    friend std::ostream& operator<<(std::ostream& os, const function& f);
 };
--- a/cql3/functions/function_name.hh
+++ b/cql3/functions/function_name.hh
@@ -42,10 +42,16 @@
 #pragma once

 #include "core/sstring.hh"
-#include "db/system_keyspace.hh"
+#include "seastarx.hh"
 #include <iosfwd>
 #include <functional>

+namespace db {
+
+sstring system_keyspace_name();
+
+}
+
 namespace cql3 {

 namespace functions {
@@ -56,7 +62,7 @@ public:
    sstring name;

    static function_name native_function(sstring name) {
-        return function_name(db::system_keyspace::NAME, name);
+        return function_name(db::system_keyspace_name(), name);
    }

    function_name() = default; // for ANTLR
--- a/cql3/functions/functions.cc
+++ b/cql3/functions/functions.cc
@@ -59,6 +59,14 @@ functions::init() {
        declare(make_to_blob_function(type->get_type()));
        declare(make_from_blob_function(type->get_type()));
    }
+    declare(aggregate_fcts::make_count_function<int8_t>());
+    declare(aggregate_fcts::make_max_function<int8_t>());
+    declare(aggregate_fcts::make_min_function<int8_t>());
+
+    declare(aggregate_fcts::make_count_function<int16_t>());
+    declare(aggregate_fcts::make_max_function<int16_t>());
+    declare(aggregate_fcts::make_min_function<int16_t>());
+
    declare(aggregate_fcts::make_count_function<int32_t>());
    declare(aggregate_fcts::make_max_function<int32_t>());
    declare(aggregate_fcts::make_min_function<int32_t>());
@@ -67,6 +75,14 @@ functions::init() {
    declare(aggregate_fcts::make_max_function<int64_t>());
    declare(aggregate_fcts::make_min_function<int64_t>());

+    declare(aggregate_fcts::make_count_function<boost::multiprecision::cpp_int>());
+    declare(aggregate_fcts::make_max_function<boost::multiprecision::cpp_int>());
+    declare(aggregate_fcts::make_min_function<boost::multiprecision::cpp_int>());
+
+    declare(aggregate_fcts::make_count_function<big_decimal>());
+    declare(aggregate_fcts::make_max_function<big_decimal>());
+    declare(aggregate_fcts::make_min_function<big_decimal>());
+
    declare(aggregate_fcts::make_count_function<float>());
    declare(aggregate_fcts::make_max_function<float>());
    declare(aggregate_fcts::make_min_function<float>());
@@ -79,6 +95,15 @@ functions::init() {
    declare(aggregate_fcts::make_max_function<sstring>());
    declare(aggregate_fcts::make_min_function<sstring>());

+    declare(aggregate_fcts::make_max_function<simple_date_native_type>());
+    declare(aggregate_fcts::make_min_function<simple_date_native_type>());
+
+    declare(aggregate_fcts::make_max_function<timestamp_native_type>());
+    declare(aggregate_fcts::make_min_function<timestamp_native_type>());
+
+    declare(aggregate_fcts::make_max_function<timeuuid_native_type>());
+    declare(aggregate_fcts::make_min_function<timeuuid_native_type>());
+
    //FIXME:
    //declare(aggregate_fcts::make_count_function<bytes>());
    //declare(aggregate_fcts::make_max_function<bytes>());
@@ -88,22 +113,22 @@ functions::init() {

    declare(make_varchar_as_blob_fct());
    declare(make_blob_as_varchar_fct());
+    declare(aggregate_fcts::make_sum_function<int8_t>());
+    declare(aggregate_fcts::make_sum_function<int16_t>());
    declare(aggregate_fcts::make_sum_function<int32_t>());
    declare(aggregate_fcts::make_sum_function<int64_t>());
    declare(aggregate_fcts::make_sum_function<float>());
    declare(aggregate_fcts::make_sum_function<double>());
-#if 0
-    declare(AggregateFcts.sumFunctionForDecimal);
-    declare(AggregateFcts.sumFunctionForVarint);
-#endif
+    declare(aggregate_fcts::make_sum_function<boost::multiprecision::cpp_int>());
+    declare(aggregate_fcts::make_sum_function<big_decimal>());
+    declare(aggregate_fcts::make_avg_function<int8_t>());
+    declare(aggregate_fcts::make_avg_function<int16_t>());
    declare(aggregate_fcts::make_avg_function<int32_t>());
    declare(aggregate_fcts::make_avg_function<int64_t>());
    declare(aggregate_fcts::make_avg_function<float>());
    declare(aggregate_fcts::make_avg_function<double>());
-#if 0
-    declare(AggregateFcts.avgFunctionForVarint);
-    declare(AggregateFcts.avgFunctionForDecimal);
-#endif
+    declare(aggregate_fcts::make_avg_function<boost::multiprecision::cpp_int>());
+    declare(aggregate_fcts::make_avg_function<big_decimal>());

    // also needed for smp:
 #if 0
@@ -342,7 +367,7 @@ function_call::execute_internal(cql_serialization_format sf, scalar_function& fu
            fun.return_type()->validate(*result);
        }
        return result;
-    } catch (marshal_exception e) {
+    } catch (marshal_exception& e) {
        throw runtime_exception(sprint("Return of function %s (%s) is not a valid value for its declared return type %s",
                                       fun, to_hex(result),
                                       *fun.return_type()->as_cql3_type()
--- a/cql3/functions/native_aggregate_function.hh
+++ b/cql3/functions/native_aggregate_function.hh
@@ -64,23 +64,5 @@ public:
    }
 };

-template <class Aggregate>
-class native_aggregate_function_using : public native_aggregate_function {
-public:
-    native_aggregate_function_using(sstring name, data_type type)
-            : native_aggregate_function(std::move(name), type, {}) {
-    }
-    virtual std::unique_ptr<aggregate> new_aggregate() override {
-        return std::make_unique<Aggregate>();
-    }
-};
-
-template <class Aggregate>
-shared_ptr<native_aggregate_function>
-make_native_aggregate_function_using(sstring name, data_type type) {
-    return ::make_shared<native_aggregate_function_using<Aggregate>>(name, type);
-}
-
-
 }
 }
--- a/cql3/lists.cc
+++ b/cql3/lists.cc
@@ -202,12 +202,6 @@ lists::delayed_value::bind(const query_options& options) {
        if (bo.is_unset_value()) {
            return constants::UNSET_VALUE;
        }
-        // We don't support value > 64K because the serialization format encode the length as an unsigned short.
-        if (bo->size() > std::numeric_limits<uint16_t>::max()) {
-            throw exceptions::invalid_request_exception(sprint("List value is too long. List values are limited to %d bytes but %d bytes value provided",
-                    std::numeric_limits<uint16_t>::max(),
-                    bo->size()));
-        }

        buffers.push_back(std::move(to_bytes(*bo)));
    }
@@ -305,11 +299,6 @@ lists::setter_by_index::execute(mutation& m, const clustering_key_prefix& prefix
    if (!value) {
        mut.cells.emplace_back(eidx, params.make_dead_cell());
    } else {
-        if (value->size() > std::numeric_limits<uint16_t>::max()) {
-            throw exceptions::invalid_request_exception(
-                    sprint("List value is too long. List values are limited to %d bytes but %d bytes value provided",
-                            std::numeric_limits<uint16_t>::max(), value->size()));
-        }
        mut.cells.emplace_back(eidx, params.make_cell(*value));
    }
    auto smut = ltype->serialize_mutation_form(mut);
--- a/cql3/maps.cc
+++ b/cql3/maps.cc
@@ -245,11 +245,6 @@ maps::delayed_value::bind(const query_options& options) {
        if (value_bytes.is_unset_value()) {
            return constants::UNSET_VALUE;
        }
-        if (value_bytes->size() > std::numeric_limits<uint16_t>::max()) {
-            throw exceptions::invalid_request_exception(sprint("Map value is too long. Map values are limited to %d bytes but %d bytes value provided",
-                                                    std::numeric_limits<uint16_t>::max(),
-                                                    value_bytes->size()));
-        }
        buffers.emplace(std::move(to_bytes(*key_bytes)), std::move(to_bytes(*value_bytes)));
    }
    return ::make_shared<value>(std::move(buffers));
@@ -300,12 +295,6 @@ maps::setter_by_key::execute(mutation& m, const clustering_key_prefix& prefix, c
    if (!key) {
        throw invalid_request_exception("Invalid null map key");
    }
-    if (value && value->size() >= std::numeric_limits<uint16_t>::max()) {
-        throw invalid_request_exception(
-                sprint("Map value is too long. Map values are limited to %d bytes but %d bytes value provided",
-                       std::numeric_limits<uint16_t>::max(),
-                       value->size()));
-    }
    auto avalue = value ? params.make_cell(*value) : params.make_dead_cell();
    map_type_impl::mutation update = { {}, { { std::move(to_bytes(*key)), std::move(avalue) } } };
    // should have been verified as map earlier?
--- a/cql3/operation.cc
+++ b/cql3/operation.cc
@@ -46,15 +46,19 @@

 namespace cql3 {

+sstring
+operation::set_element::to_string(const column_definition& receiver) const {
+    return format("{}[{}] = {}", receiver.name_as_text(), *_selector, *_value);
+}

 shared_ptr<operation>
 operation::set_element::prepare(database& db, const sstring& keyspace, const column_definition& receiver) {
    using exceptions::invalid_request_exception;
    auto rtype = dynamic_pointer_cast<const collection_type_impl>(receiver.type);
    if (!rtype) {
-        throw invalid_request_exception(sprint("Invalid operation (%s) for non collection column %s", receiver, receiver.name()));
+        throw invalid_request_exception(sprint("Invalid operation (%s) for non collection column %s", to_string(receiver), receiver.name()));
    } else if (!rtype->is_multi_cell()) {
-        throw invalid_request_exception(sprint("Invalid operation (%s) for frozen collection column %s", receiver, receiver.name()));
+        throw invalid_request_exception(sprint("Invalid operation (%s) for frozen collection column %s", to_string(receiver), receiver.name()));
    }

    if (&rtype->_kind == &collection_type_impl::kind::list) {
@@ -67,7 +71,7 @@ operation::set_element::prepare(database& db, const sstring& keyspace, const col
            return make_shared<lists::setter_by_index>(receiver, idx, lval);
        }
    } else if (&rtype->_kind == &collection_type_impl::kind::set) {
-        throw invalid_request_exception(sprint("Invalid operation (%s) for set column %s", receiver, receiver.name()));
+        throw invalid_request_exception(sprint("Invalid operation (%s) for set column %s", to_string(receiver), receiver.name()));
    } else if (&rtype->_kind == &collection_type_impl::kind::map) {
        auto key = _selector->prepare(db, keyspace, maps::key_spec_of(*receiver.column_specification));
        auto mval = _value->prepare(db, keyspace, maps::value_spec_of(*receiver.column_specification));
@@ -83,6 +87,11 @@ operation::set_element::is_compatible_with(shared_ptr<raw_update> other) {
    return !dynamic_pointer_cast<set_value>(std::move(other));
 }

+sstring
+operation::addition::to_string(const column_definition& receiver) const {
+    return format("{} = {} + {}", receiver.name_as_text(), receiver.name_as_text(), *_value);
+}
+
 shared_ptr<operation>
 operation::addition::prepare(database& db, const sstring& keyspace, const column_definition& receiver) {
    auto v = _value->prepare(db, keyspace, receiver.column_specification);
@@ -90,11 +99,11 @@ operation::addition::prepare(database& db, const sstring& keyspace, const column
    auto ctype = dynamic_pointer_cast<const collection_type_impl>(receiver.type);
    if (!ctype) {
        if (!receiver.is_counter()) {
-            throw exceptions::invalid_request_exception(sprint("Invalid operation (%s) for non counter column %s", receiver, receiver.name()));
+            throw exceptions::invalid_request_exception(sprint("Invalid operation (%s) for non counter column %s", to_string(receiver), receiver.name()));
        }
        return make_shared<constants::adder>(receiver, v);
    } else if (!ctype->is_multi_cell()) {
-        throw exceptions::invalid_request_exception(sprint("Invalid operation (%s) for frozen collection column %s", receiver, receiver.name()));
+        throw exceptions::invalid_request_exception(sprint("Invalid operation (%s) for frozen collection column %s", to_string(receiver), receiver.name()));
    }

    if (&ctype->_kind == &collection_type_impl::kind::list) {
@@ -113,19 +122,24 @@ operation::addition::is_compatible_with(shared_ptr<raw_update> other) {
    return !dynamic_pointer_cast<set_value>(other);
 }

+sstring
+operation::subtraction::to_string(const column_definition& receiver) const {
+    return format("{} = {} - {}", receiver.name_as_text(), receiver.name_as_text(), *_value);
+}
+
 shared_ptr<operation>
 operation::subtraction::prepare(database& db, const sstring& keyspace, const column_definition& receiver) {
    auto ctype = dynamic_pointer_cast<const collection_type_impl>(receiver.type);
    if (!ctype) {
        if (!receiver.is_counter()) {
-            throw exceptions::invalid_request_exception(sprint("Invalid operation (%s) for non counter column %s", receiver, receiver.name()));
+            throw exceptions::invalid_request_exception(sprint("Invalid operation (%s) for non counter column %s", to_string(receiver), receiver.name()));
        }
        auto v = _value->prepare(db, keyspace, receiver.column_specification);
        return make_shared<constants::subtracter>(receiver, v);
    }
    if (!ctype->is_multi_cell()) {
        throw exceptions::invalid_request_exception(
-                sprint("Invalid operation (%s) for frozen collection column %s", receiver, receiver.name()));
+                sprint("Invalid operation (%s) for frozen collection column %s", to_string(receiver), receiver.name()));
    }

    if (&ctype->_kind == &collection_type_impl::kind::list) {
@@ -150,14 +164,19 @@ operation::subtraction::is_compatible_with(shared_ptr<raw_update> other) {
    return !dynamic_pointer_cast<set_value>(other);
 }

+sstring
+operation::prepend::to_string(const column_definition& receiver) const {
+    return format("{} = {} + {}", receiver.name_as_text(), *_value, receiver.name_as_text());
+}
+
 shared_ptr<operation>
 operation::prepend::prepare(database& db, const sstring& keyspace, const column_definition& receiver) {
    auto v = _value->prepare(db, keyspace, receiver.column_specification);

    if (!dynamic_cast<const list_type_impl*>(receiver.type.get())) {
-        throw exceptions::invalid_request_exception(sprint("Invalid operation (%s) for non list column %s", receiver, receiver.name()));
+        throw exceptions::invalid_request_exception(sprint("Invalid operation (%s) for non list column %s", to_string(receiver), receiver.name()));
    } else if (!receiver.type->is_multi_cell()) {
-        throw exceptions::invalid_request_exception(sprint("Invalid operation (%s) for frozen list column %s", receiver, receiver.name()));
+        throw exceptions::invalid_request_exception(sprint("Invalid operation (%s) for frozen list column %s", to_string(receiver), receiver.name()));
    }

    return make_shared<lists::prepender>(receiver, std::move(v));
--- a/cql3/operation.hh
+++ b/cql3/operation.hh
@@ -203,6 +203,8 @@ public:
        const shared_ptr<term::raw> _selector;
        const shared_ptr<term::raw> _value;
        const bool _by_uuid;
+    private:
+        sstring to_string(const column_definition& receiver) const;
    public:
        set_element(shared_ptr<term::raw> selector, shared_ptr<term::raw> value, bool by_uuid = false)
            : _selector(std::move(selector)), _value(std::move(value)), _by_uuid(by_uuid) {
@@ -215,6 +217,8 @@ public:

    class addition : public raw_update {
        const shared_ptr<term::raw> _value;
+    private:
+        sstring to_string(const column_definition& receiver) const;
    public:
        addition(shared_ptr<term::raw> value)
                : _value(value) {
@@ -227,6 +231,8 @@ public:

    class subtraction : public raw_update {
        const shared_ptr<term::raw> _value;
+    private:
+        sstring to_string(const column_definition& receiver) const;
    public:
        subtraction(shared_ptr<term::raw> value)
                : _value(value) {
@@ -239,6 +245,8 @@ public:

    class prepend : public raw_update {
        shared_ptr<term::raw> _value;
+    private:
+        sstring to_string(const column_definition& receiver) const;
    public:
        prepend(shared_ptr<term::raw> value)
                : _value(std::move(value)) {
--- a/cql3/operator.hh
+++ b/cql3/operator.hh
@@ -71,7 +71,12 @@ private:
        , _text(std::move(text))
    {}
 public:
+    operator_type(const operator_type&) = delete;
+    operator_type& operator=(const operator_type&) = delete;
    const operator_type& reverse() const { return _reverse; }
+    bool is_slice() const {
+        return (*this == LT) || (*this == LTE) || (*this == GT) || (*this == GTE);
+    }
    sstring to_string() const { return _text; }
    bool operator==(const operator_type& other) const { return this == &other; }
    bool operator!=(const operator_type& other) const { return this != &other; }
--- a/cql3/query_options.cc
+++ b/cql3/query_options.cc
@@ -49,6 +49,23 @@ thread_local const query_options::specific_options query_options::specific_optio
 thread_local query_options query_options::DEFAULT{db::consistency_level::ONE, std::experimental::nullopt,
    std::vector<cql3::raw_value_view>(), false, query_options::specific_options::DEFAULT, cql_serialization_format::latest()};

+query_options::query_options(db::consistency_level consistency,
+                           std::experimental::optional<std::vector<sstring_view>> names,
+                           std::vector<cql3::raw_value> values,
+                           std::vector<cql3::raw_value_view> value_views,
+                           bool skip_metadata,
+                           specific_options options,
+                           cql_serialization_format sf)
+   : _consistency(consistency)
+   , _names(std::move(names))
+   , _values(std::move(values))
+   , _value_views(value_views)
+   , _skip_metadata(skip_metadata)
+   , _options(std::move(options))
+   , _cql_serialization_format(sf)
+{
+}
+
 query_options::query_options(db::consistency_level consistency,
                             std::experimental::optional<std::vector<sstring_view>> names,
                             std::vector<cql3::raw_value> values,
@@ -82,18 +99,29 @@ query_options::query_options(db::consistency_level consistency,
 {
 }

-query_options::query_options(db::consistency_level cl, std::vector<cql3::raw_value> values)
+query_options::query_options(db::consistency_level cl, std::vector<cql3::raw_value> values, specific_options options)
    : query_options(
          cl,
          {},
          std::move(values),
          false,
-          query_options::specific_options::DEFAULT,
+          std::move(options),
          cql_serialization_format::latest()
      )
 {
 }

+query_options::query_options(std::unique_ptr<query_options> qo, ::shared_ptr<service::pager::paging_state> paging_state)
+        : query_options(qo->_consistency,
+        std::move(qo->_names),
+        std::move(qo->_values),
+        std::move(qo->_value_views),
+        qo->_skip_metadata,
+        std::move(query_options::specific_options{qo->_options.page_size, paging_state, qo->_options.serial_consistency, qo->_options.timestamp}),
+        qo->_cql_serialization_format) {
+
+}
+
 query_options::query_options(std::vector<cql3::raw_value> values)
    : query_options(
          db::consistency_level::ONE, std::move(values))
@@ -181,19 +209,18 @@ void query_options::prepare(const std::vector<::shared_ptr<column_specification>
    }

    auto& names = *_names;
-    std::vector<cql3::raw_value> ordered_values;
+    std::vector<cql3::raw_value_view> ordered_values;
    ordered_values.reserve(specs.size());
    for (auto&& spec : specs) {
        auto& spec_name = spec->name->text();
        for (size_t j = 0; j < names.size(); j++) {
            if (names[j] == spec_name) {
-                ordered_values.emplace_back(_values[j]);
+                ordered_values.emplace_back(_value_views[j]);
                break;
            }
        }
    }
-    _values = std::move(ordered_values);
-    fill_value_views();
+    _value_views = std::move(ordered_values);
 }

 void query_options::fill_value_views()
--- a/cql3/query_options.hh
+++ b/cql3/query_options.hh
@@ -108,6 +108,13 @@ public:
                           bool skip_metadata,
                           specific_options options,
                           cql_serialization_format sf);
+    explicit query_options(db::consistency_level consistency,
+                           std::experimental::optional<std::vector<sstring_view>> names,
+                           std::vector<cql3::raw_value> values,
+                           std::vector<cql3::raw_value_view> value_views,
+                           bool skip_metadata,
+                           specific_options options,
+                           cql_serialization_format sf);
    explicit query_options(db::consistency_level consistency,
                           std::experimental::optional<std::vector<sstring_view>> names,
                           std::vector<cql3::raw_value_view> value_views,
@@ -140,7 +147,8 @@ public:

    // forInternalUse
    explicit query_options(std::vector<cql3::raw_value> values);
-    explicit query_options(db::consistency_level, std::vector<cql3::raw_value> values);
+    explicit query_options(db::consistency_level, std::vector<cql3::raw_value> values, specific_options options = specific_options::DEFAULT);
+    explicit query_options(std::unique_ptr<query_options>, ::shared_ptr<service::pager::paging_state> paging_state);

    db::consistency_level get_consistency() const;
    cql3::raw_value_view get_value_at(size_t idx) const;
--- a/cql3/query_processor.cc
+++ b/cql3/query_processor.cc
@@ -38,19 +38,19 @@
 * You should have received a copy of the GNU General Public License
 * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
 */
-#include <seastar/core/metrics.hh>
+
+#define CRYPTOPP_ENABLE_NAMESPACE_WEAK 1

 #include "cql3/query_processor.hh"
+
+#include <cryptopp/md5.h>
+#include <seastar/core/metrics.hh>
+
 #include "cql3/CqlParser.hpp"
 #include "cql3/error_collector.hh"
 #include "cql3/statements/batch_statement.hh"
 #include "cql3/util.hh"

-#include "transport/messages/result_message.hh"
-
-#define CRYPTOPP_ENABLE_NAMESPACE_WEAK 1
-#include <cryptopp/md5.h>
-
 namespace cql3 {

 using namespace statements;
@@ -68,9 +68,8 @@ const std::chrono::minutes prepared_statements_cache::entry_expiry = std::chrono
 class query_processor::internal_state {
    service::query_state _qs;
 public:
-    internal_state()
-        : _qs(service::client_state{service::client_state::internal_tag()})
-    { }
+    internal_state() : _qs(service::client_state{service::client_state::internal_tag()}) {
+    }
    operator service::query_state&() {
        return _qs;
    }
@@ -92,74 +91,102 @@ api::timestamp_type query_processor::next_timestamp() {
    return _internal_state->next_timestamp();
 }

-query_processor::query_processor(distributed<service::storage_proxy>& proxy,
-                                 distributed<database>& db)
-    : _migration_subscriber{std::make_unique<migration_subscriber>(this)}
-    , _proxy(proxy)
-    , _db(db)
-    , _internal_state(new internal_state())
-    , _prepared_cache(prep_cache_log)
-{
+query_processor::query_processor(distributed<service::storage_proxy>& proxy, distributed<database>& db)
+        : _migration_subscriber{std::make_unique<migration_subscriber>(this)}
+        , _proxy(proxy)
+        , _db(db)
+        , _internal_state(new internal_state())
+        , _prepared_cache(prep_cache_log) {
    namespace sm = seastar::metrics;

-    _metrics.add_group("query_processor", {
-        sm::make_derive("statements_prepared", _stats.prepare_invocations,
-                        sm::description("Counts a total number of parsed CQL requests.")),
-    });
+    _metrics.add_group(
+            "query_processor",
+            {
+                    sm::make_derive(
+                            "statements_prepared",
+                            _stats.prepare_invocations,
+                            sm::description("Counts a total number of parsed CQL requests."))});

-    _metrics.add_group("cql", {
-        sm::make_derive("reads", _cql_stats.reads,
-                        sm::description("Counts a total number of CQL read requests.")),
+    _metrics.add_group(
+            "cql",
+            {
+                    sm::make_derive(
+                            "reads",
+                            _cql_stats.reads,
+                            sm::description("Counts a total number of CQL read requests.")),

-        sm::make_derive("inserts", _cql_stats.inserts,
-                        sm::description("Counts a total number of CQL INSERT requests.")),
+                    sm::make_derive(
+                            "inserts",
+                            _cql_stats.inserts,
+                            sm::description("Counts a total number of CQL INSERT requests.")),

-        sm::make_derive("updates", _cql_stats.updates,
-                        sm::description("Counts a total number of CQL UPDATE requests.")),
+                    sm::make_derive(
+                            "updates",
+                            _cql_stats.updates,
+                            sm::description("Counts a total number of CQL UPDATE requests.")),

-        sm::make_derive("deletes", _cql_stats.deletes,
-                        sm::description("Counts a total number of CQL DELETE requests.")),
+                    sm::make_derive(
+                            "deletes",
+                            _cql_stats.deletes,
+                            sm::description("Counts a total number of CQL DELETE requests.")),

-        sm::make_derive("batches", _cql_stats.batches,
-                        sm::description("Counts a total number of CQL BATCH requests.")),
+                    sm::make_derive(
+                            "batches",
+                            _cql_stats.batches,
+                            sm::description("Counts a total number of CQL BATCH requests.")),

-        sm::make_derive("statements_in_batches", _cql_stats.statements_in_batches,
-                        sm::description("Counts a total number of sub-statements in CQL BATCH requests.")),
+                    sm::make_derive(
+                            "statements_in_batches",
+                            _cql_stats.statements_in_batches,
+                            sm::description("Counts a total number of sub-statements in CQL BATCH requests.")),

-        sm::make_derive("batches_pure_logged", _cql_stats.batches_pure_logged,
-                        sm::description("Counts a total number of LOGGED batches that were executed as LOGGED batches.")),
+                    sm::make_derive(
+                            "batches_pure_logged",
+                            _cql_stats.batches_pure_logged,
+                            sm::description(
+                                    "Counts a total number of LOGGED batches that were executed as LOGGED batches.")),

-        sm::make_derive("batches_pure_unlogged", _cql_stats.batches_pure_unlogged,
-                        sm::description("Counts a total number of UNLOGGED batches that were executed as UNLOGGED batches.")),
+                    sm::make_derive(
+                            "batches_pure_unlogged",
+                            _cql_stats.batches_pure_unlogged,
+                            sm::description(
+                                    "Counts a total number of UNLOGGED batches that were executed as UNLOGGED "
+                                    "batches.")),

-        sm::make_derive("batches_unlogged_from_logged", _cql_stats.batches_unlogged_from_logged,
-                        sm::description("Counts a total number of LOGGED batches that were executed as UNLOGGED batches.")),
+                    sm::make_derive(
+                            "batches_unlogged_from_logged",
+                            _cql_stats.batches_unlogged_from_logged,
+                            sm::description("Counts a total number of LOGGED batches that were executed as UNLOGGED "
+                                            "batches.")),

-        sm::make_derive("prepared_cache_evictions", [] { return prepared_statements_cache::shard_stats().prepared_cache_evictions; },
-                        sm::description("Counts a number of prepared statements cache entries evictions.")),
+                    sm::make_derive(
+                            "prepared_cache_evictions",
+                            [] { return prepared_statements_cache::shard_stats().prepared_cache_evictions; },
+                            sm::description("Counts a number of prepared statements cache entries evictions.")),

-        sm::make_gauge("prepared_cache_size", [this] { return _prepared_cache.size(); },
-                        sm::description("A number of entries in the prepared statements cache.")),
+                    sm::make_gauge(
+                            "prepared_cache_size",
+                            [this] { return _prepared_cache.size(); },
+                            sm::description("A number of entries in the prepared statements cache.")),

-        sm::make_gauge("prepared_cache_memory_footprint", [this] { return _prepared_cache.memory_footprint(); },
-                        sm::description("Size (in bytes) of the prepared statements cache.")),
-    });
+                    sm::make_gauge(
+                            "prepared_cache_memory_footprint",
+                            [this] { return _prepared_cache.memory_footprint(); },
+                            sm::description("Size (in bytes) of the prepared statements cache."))});

    service::get_local_migration_manager().register_listener(_migration_subscriber.get());
 }

-query_processor::~query_processor()
-{}
+query_processor::~query_processor() {
+}

-future<> query_processor::stop()
-{
+future<> query_processor::stop() {
    service::get_local_migration_manager().unregister_listener(_migration_subscriber.get());
    return make_ready_future<>();
 }

 future<::shared_ptr<result_message>>
-query_processor::process(const sstring_view& query_string, service::query_state& query_state, query_options& options)
-{
+query_processor::process(const sstring_view& query_string, service::query_state& query_state, query_options& options) {
    log.trace("process: \"{}\"", query_string);
    tracing::trace(query_state.get_trace_state(), "Parsing a statement");
    auto p = get_statement(query_string, query_state.get_client_state());
@@ -179,14 +206,10 @@ query_processor::process(const sstring_view& query_string, service::query_state&
 }

 future<::shared_ptr<result_message>>
-query_processor::process_statement(::shared_ptr<cql_statement> statement,
-                                   service::query_state& query_state,
-                                   const query_options& options)
-{
-#if 0
-        logger.trace("Process {} @CL.{}", statement, options.getConsistency());
-#endif
-
+query_processor::process_statement(
+        ::shared_ptr<cql_statement> statement,
+        service::query_state& query_state,
+        const query_options& options) {
    return statement->check_access(query_state.get_client_state()).then([this, statement, &query_state, &options]() {
        auto& client_state = query_state.get_client_state();

@@ -210,38 +233,50 @@ query_processor::process_statement(::shared_ptr<cql_statement> statement,
 }

 future<::shared_ptr<cql_transport::messages::result_message::prepared>>
-query_processor::prepare(sstring query_string, service::query_state& query_state)
-{
+query_processor::prepare(sstring query_string, service::query_state& query_state) {
    auto& client_state = query_state.get_client_state();
    return prepare(std::move(query_string), client_state, client_state.is_thrift());
 }

 future<::shared_ptr<cql_transport::messages::result_message::prepared>>
-query_processor::prepare(sstring query_string, const service::client_state& client_state, bool for_thrift)
-{
+query_processor::prepare(sstring query_string, const service::client_state& client_state, bool for_thrift) {
    using namespace cql_transport::messages;
    if (for_thrift) {
-        return prepare_one<result_message::prepared::thrift>(std::move(query_string), client_state, compute_thrift_id, prepared_cache_key_type::thrift_id);
+        return prepare_one<result_message::prepared::thrift>(
+                std::move(query_string),
+                client_state,
+                compute_thrift_id, prepared_cache_key_type::thrift_id);
    } else {
-        return prepare_one<result_message::prepared::cql>(std::move(query_string), client_state, compute_id, prepared_cache_key_type::cql_id);
+        return prepare_one<result_message::prepared::cql>(
+                std::move(query_string),
+                client_state,
+                compute_id,
+                prepared_cache_key_type::cql_id);
    }
 }

 ::shared_ptr<cql_transport::messages::result_message::prepared>
-query_processor::get_stored_prepared_statement(const std::experimental::string_view& query_string,
-                                               const sstring& keyspace,
-                                               bool for_thrift)
-{
+query_processor::get_stored_prepared_statement(
+        const std::experimental::string_view& query_string,
+        const sstring& keyspace,
+        bool for_thrift) {
    using namespace cql_transport::messages;
    if (for_thrift) {
-        return get_stored_prepared_statement_one<result_message::prepared::thrift>(query_string, keyspace, compute_thrift_id, prepared_cache_key_type::thrift_id);
+        return get_stored_prepared_statement_one<result_message::prepared::thrift>(
+                query_string,
+                keyspace,
+                compute_thrift_id,
+                prepared_cache_key_type::thrift_id);
    } else {
-        return get_stored_prepared_statement_one<result_message::prepared::cql>(query_string, keyspace, compute_id, prepared_cache_key_type::cql_id);
+        return get_stored_prepared_statement_one<result_message::prepared::cql>(
+                query_string,
+                keyspace,
+                compute_id,
+                prepared_cache_key_type::cql_id);
    }
 }

-static bytes md5_calculate(const std::experimental::string_view& s)
-{
+static bytes md5_calculate(const std::experimental::string_view& s) {
    constexpr size_t size = CryptoPP::Weak1::MD5::DIGESTSIZE;
    CryptoPP::Weak::MD5 hash;
    unsigned char digest[size];
@@ -253,13 +288,15 @@ static sstring hash_target(const std::experimental::string_view& query_string, c
    return keyspace + query_string.to_string();
 }

-prepared_cache_key_type query_processor::compute_id(const std::experimental::string_view& query_string, const sstring& keyspace)
-{
+prepared_cache_key_type query_processor::compute_id(
+        const std::experimental::string_view& query_string,
+        const sstring& keyspace) {
    return prepared_cache_key_type(md5_calculate(hash_target(query_string, keyspace)));
 }

-prepared_cache_key_type query_processor::compute_thrift_id(const std::experimental::string_view& query_string, const sstring& keyspace)
-{
+prepared_cache_key_type query_processor::compute_thrift_id(
+        const std::experimental::string_view& query_string,
+        const sstring& keyspace) {
    auto target = hash_target(query_string, keyspace);
    uint32_t h = 0;
    for (auto&& c : hash_target(query_string, keyspace)) {
@@ -269,11 +306,7 @@ prepared_cache_key_type query_processor::compute_thrift_id(const std::experiment
 }

 std::unique_ptr<prepared_statement>
-query_processor::get_statement(const sstring_view& query, const service::client_state& client_state)
-{
-#if 0
-        Tracing.trace("Parsing {}", queryStr);
-#endif
+query_processor::get_statement(const sstring_view& query, const service::client_state& client_state) {
    ::shared_ptr<raw::parsed_statement> statement = parse_statement(query);

    // Set keyspace for statement that require login
@@ -281,16 +314,12 @@ query_processor::get_statement(const sstring_view& query, const service::client_
    if (cf_stmt) {
        cf_stmt->prepare_keyspace(client_state);
    }
-#if 0
-        Tracing.trace("Preparing statement");
-#endif
    ++_stats.prepare_invocations;
    return statement->prepare(_db.local(), _cql_stats);
 }

 ::shared_ptr<raw::parsed_statement>
-query_processor::parse_statement(const sstring_view& query)
-{
+query_processor::parse_statement(const sstring_view& query) {
    try {
        auto statement = util::do_with_parser(query,  std::mem_fn(&cql3_parser::CqlParser::query));
        if (!statement) {
@@ -307,12 +336,14 @@ query_processor::parse_statement(const sstring_view& query)
    }
 }

-query_options query_processor::make_internal_options(const statements::prepared_statement::checked_weak_ptr& p,
-                                                     const std::initializer_list<data_value>& values,
-                                                     db::consistency_level cl)
-{
+query_options query_processor::make_internal_options(
+        const statements::prepared_statement::checked_weak_ptr& p,
+        const std::initializer_list<data_value>& values,
+        db::consistency_level cl,
+        int32_t page_size) {
    if (p->bound_names.size() != values.size()) {
-        throw std::invalid_argument(sprint("Invalid number of values. Expecting %d but got %d", p->bound_names.size(), values.size()));
+        throw std::invalid_argument(
+                sprint("Invalid number of values. Expecting %d but got %d", p->bound_names.size(), values.size()));
    }
    auto ni = p->bound_names.begin();
    std::vector<cql3::raw_value> bound_values;
@@ -326,11 +357,19 @@ query_options query_processor::make_internal_options(const statements::prepared_
            bound_values.push_back(cql3::raw_value::make_value(n->type->decompose(v)));
        }
    }
+    if (page_size > 0) {
+        ::shared_ptr<service::pager::paging_state> paging_state;
+        db::consistency_level serial_consistency = db::consistency_level::SERIAL;
+        api::timestamp_type ts = api::missing_timestamp;
+        return query_options(
+                cl,
+                bound_values,
+                cql3::query_options::specific_options{page_size, std::move(paging_state), serial_consistency, ts});
+    }
    return query_options(cl, bound_values);
 }

-statements::prepared_statement::checked_weak_ptr query_processor::prepare_internal(const sstring& query_string)
-{
+statements::prepared_statement::checked_weak_ptr query_processor::prepare_internal(const sstring& query_string) {
    auto& p = _internal_statements[query_string];
    if (p == nullptr) {
        auto np = parse_statement(query_string)->prepare(_db.local(), _cql_stats);
@@ -341,33 +380,128 @@ statements::prepared_statement::checked_weak_ptr query_processor::prepare_intern
 }

 future<::shared_ptr<untyped_result_set>>
-query_processor::execute_internal(const sstring& query_string,
-                                  const std::initializer_list<data_value>& values)
-{
+query_processor::execute_internal(const sstring& query_string, const std::initializer_list<data_value>& values) {
    if (log.is_enabled(logging::log_level::trace)) {
        log.trace("execute_internal: \"{}\" ({})", query_string, ::join(", ", values));
    }
    return execute_internal(prepare_internal(query_string), values);
 }

+struct internal_query_state {
+    sstring query_string;
+    std::unique_ptr<query_options> opts;
+    statements::prepared_statement::checked_weak_ptr p;
+    bool more_results = true;
+};
+
+::shared_ptr<internal_query_state> query_processor::create_paged_state(const sstring& query_string,
+        const std::initializer_list<data_value>& values, int32_t page_size) {
+    auto p = prepare_internal(query_string);
+    auto opts = make_internal_options(p, values, db::consistency_level::ONE, page_size);
+    ::shared_ptr<internal_query_state> res = ::make_shared<internal_query_state>(
+            internal_query_state{
+                    query_string,
+                    std::make_unique<cql3::query_options>(std::move(opts)), std::move(p),
+                    true});
+    return res;
+}
+
+bool query_processor::has_more_results(::shared_ptr<cql3::internal_query_state> state) const {
+    if (state) {
+        return state->more_results;
+    }
+    return false;
+}
+
+future<> query_processor::for_each_cql_result(
+        ::shared_ptr<cql3::internal_query_state> state,
+        std::function<stop_iteration(const cql3::untyped_result_set::row&)>&& f) {
+    return do_with(seastar::shared_ptr<bool>(), [f, this, state](auto& is_done) mutable {
+        is_done = seastar::make_shared<bool>(false);
+
+        auto stop_when = [is_done]() {
+            return *is_done;
+        };
+        auto do_resuls = [is_done, state, f, this]() mutable {
+            return this->execute_paged_internal(
+                    state).then([is_done, state, f, this](::shared_ptr<cql3::untyped_result_set> msg) mutable {
+                if (msg->empty()) {
+                    *is_done = true;
+                } else {
+                    if (!this->has_more_results(state)) {
+                        *is_done = true;
+                    }
+                    for (auto& row : *msg) {
+                        if (f(row) == stop_iteration::yes) {
+                            *is_done = true;
+                            break;
+                        }
+                    }
+                }
+            });
+        };
+        return do_until(stop_when, do_resuls);
+    });
+}
+
 future<::shared_ptr<untyped_result_set>>
-query_processor::execute_internal(statements::prepared_statement::checked_weak_ptr p,
-                                  const std::initializer_list<data_value>& values)
-{
-    auto opts = make_internal_options(p, values);
+query_processor::execute_paged_internal(::shared_ptr<internal_query_state> state) {
+    return state->p->statement->execute_internal(_proxy, *_internal_state, *state->opts).then(
+            [state, this](::shared_ptr<cql_transport::messages::result_message> msg) mutable {
+        class visitor : public result_message::visitor_base {
+            ::shared_ptr<internal_query_state> _state;
+            query_processor& _qp;
+        public:
+            visitor(::shared_ptr<internal_query_state> state, query_processor& qp) : _state(state), _qp(qp) {
+            }
+            virtual ~visitor() = default;
+            void visit(const result_message::rows& rmrs) override {
+                auto& rs = rmrs.rs();
+                if (rs.get_metadata().paging_state()) {
+                    bool done = !rs.get_metadata().flags().contains<cql3::metadata::flag::HAS_MORE_PAGES>();
+
+                    if (done) {
+                        _state->more_results = false;
+                    } else {
+                        const service::pager::paging_state& st = *rs.get_metadata().paging_state();
+                        shared_ptr<service::pager::paging_state> shrd = ::make_shared<service::pager::paging_state>(st);
+                        _state->opts = std::make_unique<query_options>(std::move(_state->opts), shrd);
+                        _state->p = _qp.prepare_internal(_state->query_string);
+                    }
+                } else {
+                    _state->more_results = false;
+                }
+            }
+        };
+        visitor v(state, *this);
+        if (msg != nullptr) {
+            msg->accept(v);
+        }
+        return make_ready_future<::shared_ptr<untyped_result_set>>(::make_shared<untyped_result_set>(msg));
+    });
+}
+
+future<::shared_ptr<untyped_result_set>>
+query_processor::execute_internal(
+        statements::prepared_statement::checked_weak_ptr p,
+        const std::initializer_list<data_value>& values) {
+    query_options opts = make_internal_options(p, values);
    return do_with(std::move(opts), [this, p = std::move(p)](auto& opts) {
-        return p->statement->execute_internal(_proxy, *_internal_state, opts).then([stmt = p->statement](auto msg) {
+        return p->statement->execute_internal(
+                _proxy,
+                *_internal_state,
+                opts).then([&opts, stmt = p->statement](auto msg) {
            return make_ready_future<::shared_ptr<untyped_result_set>>(::make_shared<untyped_result_set>(msg));
        });
    });
 }

 future<::shared_ptr<untyped_result_set>>
-query_processor::process(const sstring& query_string,
-                         db::consistency_level cl,
-                         const std::initializer_list<data_value>& values,
-                         bool cache)
-{
+query_processor::process(
+        const sstring& query_string,
+        db::consistency_level cl,
+        const std::initializer_list<data_value>& values,
+        bool cache) {
    if (cache) {
        return process(prepare_internal(query_string), cl, values);
    } else {
@@ -379,10 +513,10 @@ query_processor::process(const sstring& query_string,
 }

 future<::shared_ptr<untyped_result_set>>
-query_processor::process(statements::prepared_statement::checked_weak_ptr p,
-                         db::consistency_level cl,
-                         const std::initializer_list<data_value>& values)
-{
+query_processor::process(
+        statements::prepared_statement::checked_weak_ptr p,
+        db::consistency_level cl,
+        const std::initializer_list<data_value>& values) {
    auto opts = make_internal_options(p, values, cl);
    return do_with(std::move(opts), [this, p = std::move(p)](auto & opts) {
        return p->statement->execute(_proxy, *_internal_state, opts).then([](auto msg) {
@@ -392,10 +526,10 @@ query_processor::process(statements::prepared_statement::checked_weak_ptr p,
 }

 future<::shared_ptr<cql_transport::messages::result_message>>
-query_processor::process_batch(::shared_ptr<statements::batch_statement> batch,
-                               service::query_state& query_state,
-                               query_options& options)
-{
+query_processor::process_batch(
+        ::shared_ptr<statements::batch_statement> batch,
+        service::query_state& query_state,
+        query_options& options) {
    return batch->check_access(query_state.get_client_state()).then([this, &query_state, &options, batch] {
        batch->validate();
        batch->validate(_proxy, query_state.get_client_state());
@@ -403,101 +537,90 @@ query_processor::process_batch(::shared_ptr<statements::batch_statement> batch,
    });
 }

-query_processor::migration_subscriber::migration_subscriber(query_processor* qp)
-    : _qp{qp}
-{
+query_processor::migration_subscriber::migration_subscriber(query_processor* qp) : _qp{qp} {
 }

-void query_processor::migration_subscriber::on_create_keyspace(const sstring& ks_name)
-{
+void query_processor::migration_subscriber::on_create_keyspace(const sstring& ks_name) {
 }

-void query_processor::migration_subscriber::on_create_column_family(const sstring& ks_name, const sstring& cf_name)
-{
+void query_processor::migration_subscriber::on_create_column_family(const sstring& ks_name, const sstring& cf_name) {
 }

-void query_processor::migration_subscriber::on_create_user_type(const sstring& ks_name, const sstring& type_name)
-{
+void query_processor::migration_subscriber::on_create_user_type(const sstring& ks_name, const sstring& type_name) {
 }

-void query_processor::migration_subscriber::on_create_function(const sstring& ks_name, const sstring& function_name)
-{
+void query_processor::migration_subscriber::on_create_function(const sstring& ks_name, const sstring& function_name) {
    log.warn("{} event ignored", __func__);
 }

-void query_processor::migration_subscriber::on_create_aggregate(const sstring& ks_name, const sstring& aggregate_name)
-{
+void query_processor::migration_subscriber::on_create_aggregate(const sstring& ks_name, const sstring& aggregate_name) {
    log.warn("{} event ignored", __func__);
 }

-void query_processor::migration_subscriber::on_create_view(const sstring& ks_name, const sstring& view_name)
-{
+void query_processor::migration_subscriber::on_create_view(const sstring& ks_name, const sstring& view_name) {
 }

-void query_processor::migration_subscriber::on_update_keyspace(const sstring& ks_name)
-{
+void query_processor::migration_subscriber::on_update_keyspace(const sstring& ks_name) {
 }

-void query_processor::migration_subscriber::on_update_column_family(const sstring& ks_name, const sstring& cf_name, bool columns_changed)
-{
+void query_processor::migration_subscriber::on_update_column_family(
+        const sstring& ks_name,
+        const sstring& cf_name,
+        bool columns_changed) {
    // #1255: Ignoring columns_changed deliberately.
    log.info("Column definitions for {}.{} changed, invalidating related prepared statements", ks_name, cf_name);
    remove_invalid_prepared_statements(ks_name, cf_name);
 }

-void query_processor::migration_subscriber::on_update_user_type(const sstring& ks_name, const sstring& type_name)
-{
+void query_processor::migration_subscriber::on_update_user_type(const sstring& ks_name, const sstring& type_name) {
 }

-void query_processor::migration_subscriber::on_update_function(const sstring& ks_name, const sstring& function_name)
-{
+void query_processor::migration_subscriber::on_update_function(const sstring& ks_name, const sstring& function_name) {
 }

-void query_processor::migration_subscriber::on_update_aggregate(const sstring& ks_name, const sstring& aggregate_name)
-{
+void query_processor::migration_subscriber::on_update_aggregate(const sstring& ks_name, const sstring& aggregate_name) {
 }

-void query_processor::migration_subscriber::on_update_view(const sstring& ks_name, const sstring& view_name, bool columns_changed)
-{
+void query_processor::migration_subscriber::on_update_view(
+        const sstring& ks_name,
+        const sstring& view_name, bool columns_changed) {
 }

-void query_processor::migration_subscriber::on_drop_keyspace(const sstring& ks_name)
-{
+void query_processor::migration_subscriber::on_drop_keyspace(const sstring& ks_name) {
    remove_invalid_prepared_statements(ks_name, std::experimental::nullopt);
 }

-void query_processor::migration_subscriber::on_drop_column_family(const sstring& ks_name, const sstring& cf_name)
-{
+void query_processor::migration_subscriber::on_drop_column_family(const sstring& ks_name, const sstring& cf_name) {
    remove_invalid_prepared_statements(ks_name, cf_name);
 }

-void query_processor::migration_subscriber::on_drop_user_type(const sstring& ks_name, const sstring& type_name)
-{
+void query_processor::migration_subscriber::on_drop_user_type(const sstring& ks_name, const sstring& type_name) {
 }

-void query_processor::migration_subscriber::on_drop_function(const sstring& ks_name, const sstring& function_name)
-{
+void query_processor::migration_subscriber::on_drop_function(const sstring& ks_name, const sstring& function_name) {
    log.warn("{} event ignored", __func__);
 }

-void query_processor::migration_subscriber::on_drop_aggregate(const sstring& ks_name, const sstring& aggregate_name)
-{
+void query_processor::migration_subscriber::on_drop_aggregate(const sstring& ks_name, const sstring& aggregate_name) {
    log.warn("{} event ignored", __func__);
 }

-void query_processor::migration_subscriber::on_drop_view(const sstring& ks_name, const sstring& view_name)
-{
+void query_processor::migration_subscriber::on_drop_view(const sstring& ks_name, const sstring& view_name) {
+    remove_invalid_prepared_statements(ks_name, view_name);
 }

-void query_processor::migration_subscriber::remove_invalid_prepared_statements(sstring ks_name, std::experimental::optional<sstring> cf_name)
-{
+void query_processor::migration_subscriber::remove_invalid_prepared_statements(
+        sstring ks_name,
+        std::experimental::optional<sstring> cf_name) {
    _qp->_prepared_cache.remove_if([&] (::shared_ptr<cql_statement> stmt) {
        return this->should_invalidate(ks_name, cf_name, stmt);
    });
 }

-bool query_processor::migration_subscriber::should_invalidate(sstring ks_name, std::experimental::optional<sstring> cf_name, ::shared_ptr<cql_statement> statement)
-{
+bool query_processor::migration_subscriber::should_invalidate(
+        sstring ks_name,
+        std::experimental::optional<sstring> cf_name,
+        ::shared_ptr<cql_statement> statement) {
    return statement->depends_on_keyspace(ks_name) && (!cf_name || statement->depends_on_column_family(*cf_name));
 }

--- a/cql3/query_processor.hh
+++ b/cql3/query_processor.hh
@@ -43,21 +43,22 @@

 #include <experimental/string_view>
 #include <unordered_map>
-#include <seastar/core/metrics_registration.hh>

-#include "core/shared_ptr.hh"
-#include "exceptions/exceptions.hh"
+#include <seastar/core/distributed.hh>
+#include <seastar/core/metrics_registration.hh>
+#include <seastar/core/shared_ptr.hh>
+
+#include "cql3/prepared_statements_cache.hh"
 #include "cql3/query_options.hh"
+#include "cql3/statements/prepared_statement.hh"
 #include "cql3/statements/raw/parsed_statement.hh"
 #include "cql3/statements/raw/cf_statement.hh"
+#include "cql3/untyped_result_set.hh"
+#include "exceptions/exceptions.hh"
+#include "log.hh"
 #include "service/migration_manager.hh"
 #include "service/query_state.hh"
-#include "log.hh"
-#include "core/distributed.hh"
-#include "statements/prepared_statement.hh"
 #include "transport/messages/result_message.hh"
-#include "untyped_result_set.hh"
-#include "prepared_statements_cache.hh"

 namespace cql3 {

@@ -65,14 +66,22 @@ namespace statements {
 class batch_statement;
 }

-class prepared_statement_is_too_big : public std::exception {
-public:
-    static constexpr int max_query_prefix = 100;
+class untyped_result_set;
+class untyped_result_set_row;

-private:
+/*!
+ * \brief to allow paging, holds
+ * internal state, that needs to be passed to the execute statement.
+ *
+ */
+struct internal_query_state;
+
+class prepared_statement_is_too_big : public std::exception {
    sstring _msg;

 public:
+    static constexpr int max_query_prefix = 100;
+
    prepared_statement_is_too_big(const sstring& query_string)
        : _msg(seastar::format("Prepared statement is too big: {}", query_string.substr(0, max_query_prefix)))
    {
@@ -107,15 +116,33 @@ private:
    class internal_state;
    std::unique_ptr<internal_state> _internal_state;

-public:
-    query_processor(distributed<service::storage_proxy>& proxy, distributed<database>& db);
-    ~query_processor();
+    prepared_statements_cache _prepared_cache;

+    // A map for prepared statements used internally (which we don't want to mix with user statement, in particular we
+    // don't bother with expiration on those.
+    std::unordered_map<sstring, std::unique_ptr<statements::prepared_statement>> _internal_statements;
+
+public:
    static const sstring CQL_VERSION;

+    static prepared_cache_key_type compute_id(
+            const std::experimental::string_view& query_string,
+            const sstring& keyspace);
+
+    static prepared_cache_key_type compute_thrift_id(
+            const std::experimental::string_view& query_string,
+            const sstring& keyspace);
+
+    static ::shared_ptr<statements::raw::parsed_statement> parse_statement(const std::experimental::string_view& query);
+
+    query_processor(distributed<service::storage_proxy>& proxy, distributed<database>& db);
+
+    ~query_processor();
+
    distributed<database>& db() {
        return _db;
    }
+
    distributed<service::storage_proxy>& proxy() {
        return _proxy;
    }
@@ -124,125 +151,6 @@ public:
        return _cql_stats;
    }

-#if 0
-    public static final QueryProcessor instance = new QueryProcessor();
-#endif
-private:
-#if 0
-    private static final Logger logger = LoggerFactory.getLogger(QueryProcessor.class);
-    private static final MemoryMeter meter = new MemoryMeter().withGuessing(MemoryMeter.Guess.FALLBACK_BEST).ignoreKnownSingletons();
-    private static final long MAX_CACHE_PREPARED_MEMORY = Runtime.getRuntime().maxMemory() / 256;
-
-    private static EntryWeigher<MD5Digest, ParsedStatement.Prepared> cqlMemoryUsageWeigher = new EntryWeigher<MD5Digest, ParsedStatement.Prepared>()
-    {
-        @Override
-        public int weightOf(MD5Digest key, ParsedStatement.Prepared value)
-        {
-            return Ints.checkedCast(measure(key) + measure(value.statement) + measure(value.boundNames));
-        }
-    };
-
-    private static EntryWeigher<Integer, ParsedStatement.Prepared> thriftMemoryUsageWeigher = new EntryWeigher<Integer, ParsedStatement.Prepared>()
-    {
-        @Override
-        public int weightOf(Integer key, ParsedStatement.Prepared value)
-        {
-            return Ints.checkedCast(measure(key) + measure(value.statement) + measure(value.boundNames));
-        }
-    };
-#endif
-    prepared_statements_cache _prepared_cache;
-    std::unordered_map<sstring, std::unique_ptr<statements::prepared_statement>> _internal_statements;
-#if 0
-
-    // A map for prepared statements used internally (which we don't want to mix with user statement, in particular we don't
-    // bother with expiration on those.
-    private static final ConcurrentMap<String, ParsedStatement.Prepared> internalStatements = new ConcurrentHashMap<>();
-
-    // Direct calls to processStatement do not increment the preparedStatementsExecuted/regularStatementsExecuted
-    // counters. Callers of processStatement are responsible for correctly notifying metrics
-    public static final CQLMetrics metrics = new CQLMetrics();
-
-    private static final AtomicInteger lastMinuteEvictionsCount = new AtomicInteger(0);
-
-    static
-    {
-        preparedStatements = new ConcurrentLinkedHashMap.Builder<MD5Digest, ParsedStatement.Prepared>()
-                             .maximumWeightedCapacity(MAX_CACHE_PREPARED_MEMORY)
-                             .weigher(cqlMemoryUsageWeigher)
-                             .listener(new EvictionListener<MD5Digest, ParsedStatement.Prepared>()
-                             {
-                                 public void onEviction(MD5Digest md5Digest, ParsedStatement.Prepared prepared)
-                                 {
-                                     metrics.preparedStatementsEvicted.inc();
-                                     lastMinuteEvictionsCount.incrementAndGet();
-                                 }
-                             }).build();
-
-        thriftPreparedStatements = new ConcurrentLinkedHashMap.Builder<Integer, ParsedStatement.Prepared>()
-                                   .maximumWeightedCapacity(MAX_CACHE_PREPARED_MEMORY)
-                                   .weigher(thriftMemoryUsageWeigher)
-                                   .listener(new EvictionListener<Integer, ParsedStatement.Prepared>()
-                                   {
-                                       public void onEviction(Integer integer, ParsedStatement.Prepared prepared)
-                                       {
-                                           metrics.preparedStatementsEvicted.inc();
-                                           lastMinuteEvictionsCount.incrementAndGet();
-                                       }
-                                   })
-                                   .build();
-
-        ScheduledExecutors.scheduledTasks.scheduleAtFixedRate(new Runnable()
-        {
-            public void run()
-            {
-                long count = lastMinuteEvictionsCount.getAndSet(0);
-                if (count > 0)
-                    logger.info("{} prepared statements discarded in the last minute because cache limit reached ({} bytes)",
-                                count,
-                                MAX_CACHE_PREPARED_MEMORY);
-            }
-        }, 1, 1, TimeUnit.MINUTES);
-    }
-
-    public static int preparedStatementsCount()
-    {
-        return preparedStatements.size() + thriftPreparedStatements.size();
-    }
-
-    // Work around initialization dependency
-    private static enum InternalStateInstance
-    {
-        INSTANCE;
-
-        private final QueryState queryState;
-
-        InternalStateInstance()
-        {
-            ClientState state = ClientState.forInternalCalls();
-            try
-            {
-                state.setKeyspace(SystemKeyspace.NAME);
-            }
-            catch (InvalidRequestException e)
-            {
-                throw new RuntimeException();
-            }
-            this.queryState = new QueryState(state);
-        }
-    }
-
-    private static QueryState internalQueryState()
-    {
-        return InternalStateInstance.INSTANCE.queryState;
-    }
-
-    private QueryProcessor()
-    {
-        MigrationManager.instance.register(new MigrationSubscriber());
-    }
-#endif
-public:
    statements::prepared_statement::checked_weak_ptr get_prepared(const prepared_cache_key_type& key) {
        auto it = _prepared_cache.find(key);
        if (it == _prepared_cache.end()) {
@@ -251,128 +159,69 @@ public:
        return *it;
    }

-#if 0
-    public static void validateKey(ByteBuffer key) throws InvalidRequestException
-    {
-        if (key == null || key.remaining() == 0)
-        {
-            throw new InvalidRequestException("Key may not be empty");
-        }
+    future<::shared_ptr<cql_transport::messages::result_message>>
+    process_statement(
+            ::shared_ptr<cql_statement> statement,
+            service::query_state& query_state,
+            const query_options& options);

-        // check that key can be handled by FBUtilities.writeShortByteArray
-        if (key.remaining() > FBUtilities.MAX_UNSIGNED_SHORT)
-        {
-            throw new InvalidRequestException("Key length of " + key.remaining() +
-                                              " is longer than maximum of " + FBUtilities.MAX_UNSIGNED_SHORT);
-        }
-    }
+    future<::shared_ptr<cql_transport::messages::result_message>>
+    process(
+            const std::experimental::string_view& query_string,
+            service::query_state& query_state,
+            query_options& options);

-    public static void validateCellNames(Iterable<CellName> cellNames, CellNameType type) throws InvalidRequestException
-    {
-        for (CellName name : cellNames)
-            validateCellName(name, type);
-    }
-
-    public static void validateCellName(CellName name, CellNameType type) throws InvalidRequestException
-    {
-        validateComposite(name, type);
-        if (name.isEmpty())
-            throw new InvalidRequestException("Invalid empty value for clustering column of COMPACT TABLE");
-    }
-
-    public static void validateComposite(Composite name, CType type) throws InvalidRequestException
-    {
-        long serializedSize = type.serializer().serializedSize(name, TypeSizes.NATIVE);
-        if (serializedSize > Cell.MAX_NAME_LENGTH)
-            throw new InvalidRequestException(String.format("The sum of all clustering columns is too long (%s > %s)",
-                                                            serializedSize,
-                                                            Cell.MAX_NAME_LENGTH));
-    }
-#endif
-public:
-    future<::shared_ptr<cql_transport::messages::result_message>> process_statement(::shared_ptr<cql_statement> statement,
-            service::query_state& query_state, const query_options& options);
-
-#if 0
-    public static ResultMessage process(String queryString, ConsistencyLevel cl, QueryState queryState)
-    throws RequestExecutionException, RequestValidationException
-    {
-        return instance.process(queryString, queryState, QueryOptions.forInternalCalls(cl, Collections.<ByteBuffer>emptyList()));
-    }
-#endif
-
-    future<::shared_ptr<cql_transport::messages::result_message>> process(const std::experimental::string_view& query_string,
-            service::query_state& query_state, query_options& options);
-
-#if 0
-    public static ParsedStatement.Prepared parseStatement(String queryStr, QueryState queryState) throws RequestValidationException
-    {
-        return getStatement(queryStr, queryState.getClientState());
-    }
-
-    public static UntypedResultSet process(String query, ConsistencyLevel cl) throws RequestExecutionException
-    {
-        try
-        {
-            ResultMessage result = instance.process(query, QueryState.forInternalCalls(), QueryOptions.forInternalCalls(cl, Collections.<ByteBuffer>emptyList()));
-            if (result instanceof ResultMessage.Rows)
-                return UntypedResultSet.create(((ResultMessage.Rows)result).result);
-            else
-                return null;
-        }
-        catch (RequestValidationException e)
-        {
-            throw new RuntimeException(e);
-        }
-    }
-
-    private static QueryOptions makeInternalOptions(ParsedStatement.Prepared prepared, Object[] values)
-    {
-        if (prepared.boundNames.size() != values.length)
-            throw new IllegalArgumentException(String.format("Invalid number of values. Expecting %d but got %d", prepared.boundNames.size(), values.length));
-
-        List<ByteBuffer> boundValues = new ArrayList<ByteBuffer>(values.length);
-        for (int i = 0; i < values.length; i++)
-        {
-            Object value = values[i];
-            AbstractType type = prepared.boundNames.get(i).type;
-            boundValues.add(value instanceof ByteBuffer || value == null ? (ByteBuffer)value : type.decompose(value));
-        }
-        return QueryOptions.forInternalCalls(boundValues);
-    }
-
-    private static ParsedStatement.Prepared prepareInternal(String query) throws RequestValidationException
-    {
-        ParsedStatement.Prepared prepared = internalStatements.get(query);
-        if (prepared != null)
-            return prepared;
-
-        // Note: if 2 threads prepare the same query, we'll live so don't bother synchronizing
-        prepared = parseStatement(query, internalQueryState());
-        prepared.statement.validate(internalQueryState().getClientState());
-        internalStatements.putIfAbsent(query, prepared);
-        return prepared;
-    }
-#endif
-private:
-    query_options make_internal_options(const statements::prepared_statement::checked_weak_ptr& p, const std::initializer_list<data_value>&, db::consistency_level = db::consistency_level::ONE);
-public:
-    future<::shared_ptr<untyped_result_set>> execute_internal(
-            const sstring& query_string,
-            const std::initializer_list<data_value>& = { });
+    future<::shared_ptr<untyped_result_set>>
+    execute_internal(const sstring& query_string, const std::initializer_list<data_value>& = { });

    statements::prepared_statement::checked_weak_ptr prepare_internal(const sstring& query);

-    future<::shared_ptr<untyped_result_set>> execute_internal(
-            statements::prepared_statement::checked_weak_ptr p,
-            const std::initializer_list<data_value>& = { });
+    future<::shared_ptr<untyped_result_set>>
+    execute_internal(statements::prepared_statement::checked_weak_ptr p, const std::initializer_list<data_value>& = { });
+
+    /*!
+     * \brief iterate over all cql results using paging
+     *
+     * You Create a statement with optional paraemter and pass
+     * a function that goes over the results.
+     *
+     * The passed function would be called for all the results, return stop_iteration::yes
+     * to stop during iteration.
+     *
+     * For example:
+            return query("SELECT * from system.compaction_history",
+                         [&history] (const cql3::untyped_result_set::row& row) mutable {
+                ....
+                ....
+                return stop_iteration::no;
+            });
+
+     * You can use place holder in the query, the prepared statement will only be done once.
+     *
+     *
+     * query_string - the cql string, can contain place holder
+     * f - a function to be run on each of the query result, if the function return false the iteration would stop
+     * args - arbitrary number of query parameters
+     */
+    template<typename... Args>
+    future<> query(
+            const sstring& query_string,
+            std::function<stop_iteration(const cql3::untyped_result_set_row&)>&& f,
+            Args&&... args) {
+        return for_each_cql_result(
+                create_paged_state(query_string, { data_value(std::forward<Args>(args))... }), std::move(f));
+    }

    future<::shared_ptr<untyped_result_set>> process(
-                    const sstring& query_string,
-                    db::consistency_level, const std::initializer_list<data_value>& = { }, bool cache = false);
+            const sstring& query_string,
+            db::consistency_level,
+            const std::initializer_list<data_value>& = { },
+            bool cache = false);
+
    future<::shared_ptr<untyped_result_set>> process(
-                    statements::prepared_statement::checked_weak_ptr p,
-                    db::consistency_level, const std::initializer_list<data_value>& = { });
+            statements::prepared_statement::checked_weak_ptr p,
+            db::consistency_level,
+            const std::initializer_list<data_value>& = { });

    /*
     * This function provides a timestamp that is guaranteed to be higher than any timestamp
@@ -384,115 +233,110 @@ public:
     */
    api::timestamp_type next_timestamp();

-#if 0
-    public static UntypedResultSet executeInternalWithPaging(String query, int pageSize, Object... values)
-    {
-        try
-        {
-            ParsedStatement.Prepared prepared = prepareInternal(query);
-            if (!(prepared.statement instanceof SelectStatement))
-                throw new IllegalArgumentException("Only SELECTs can be paged");
-
-            SelectStatement select = (SelectStatement)prepared.statement;
-            QueryPager pager = QueryPagers.localPager(select.getPageableCommand(makeInternalOptions(prepared, values)));
-            return UntypedResultSet.create(select, pager, pageSize);
-        }
-        catch (RequestValidationException e)
-        {
-            throw new RuntimeException("Error validating query" + e);
-        }
-    }
-
-    /**
-     * Same than executeInternal, but to use for queries we know are only executed once so that the
-     * created statement object is not cached.
-     */
-    public static UntypedResultSet executeOnceInternal(String query, Object... values)
-    {
-        try
-        {
-            ParsedStatement.Prepared prepared = parseStatement(query, internalQueryState());
-            prepared.statement.validate(internalQueryState().getClientState());
-            ResultMessage result = prepared.statement.executeInternal(internalQueryState(), makeInternalOptions(prepared, values));
-            if (result instanceof ResultMessage.Rows)
-                return UntypedResultSet.create(((ResultMessage.Rows)result).result);
-            else
-                return null;
-        }
-        catch (RequestExecutionException e)
-        {
-            throw new RuntimeException(e);
-        }
-        catch (RequestValidationException e)
-        {
-            throw new RuntimeException("Error validating query " + query, e);
-        }
-    }
-
-    public static UntypedResultSet resultify(String query, Row row)
-    {
-        return resultify(query, Collections.singletonList(row));
-    }
-
-    public static UntypedResultSet resultify(String query, List<Row> rows)
-    {
-        try
-        {
-            SelectStatement ss = (SelectStatement) getStatement(query, null).statement;
-            ResultSet cqlRows = ss.process(rows);
-            return UntypedResultSet.create(cqlRows);
-        }
-        catch (RequestValidationException e)
-        {
-            throw new AssertionError(e);
-        }
-    }
-#endif
-
    future<::shared_ptr<cql_transport::messages::result_message::prepared>>
    prepare(sstring query_string, service::query_state& query_state);

    future<::shared_ptr<cql_transport::messages::result_message::prepared>>
    prepare(sstring query_string, const service::client_state& client_state, bool for_thrift);

-    static prepared_cache_key_type compute_id(const std::experimental::string_view& query_string, const sstring& keyspace);
-    static prepared_cache_key_type compute_thrift_id(const std::experimental::string_view& query_string, const sstring& keyspace);
+    future<> stop();
+
+    future<::shared_ptr<cql_transport::messages::result_message>>
+    process_batch(::shared_ptr<statements::batch_statement>, service::query_state& query_state, query_options& options);
+
+    std::unique_ptr<statements::prepared_statement> get_statement(
+            const std::experimental::string_view& query,
+            const service::client_state& client_state);
+
+    friend class migration_subscriber;

 private:
+    query_options make_internal_options(
+            const statements::prepared_statement::checked_weak_ptr& p,
+            const std::initializer_list<data_value>&,
+            db::consistency_level = db::consistency_level::ONE,
+            int32_t page_size = -1);
+
+    /*!
+     * \brief created a state object for paging
+     *
+     * When using paging internally a state object is needed.
+     */
+    ::shared_ptr<internal_query_state> create_paged_state(
+            const sstring& query_string,
+            const std::initializer_list<data_value>& = { },
+            int32_t page_size = 1000);
+
+    /*!
+     * \brief run a query using paging
+     */
+    future<::shared_ptr<untyped_result_set>> execute_paged_internal(::shared_ptr<internal_query_state> state);
+
+    /*!
+     * \brief iterate over all results using paging
+     */
+    future<> for_each_cql_result(
+            ::shared_ptr<cql3::internal_query_state> state,
+            std::function<stop_iteration(const cql3::untyped_result_set_row&)>&& f);
+
+    /*!
+     * \brief check, based on the state if there are additional results
+     * Users of the paging, should not use the internal_query_state directly
+     */
+    bool has_more_results(::shared_ptr<cql3::internal_query_state> state) const;
+
    ///
    /// \tparam ResultMsgType type of the returned result message (CQL or Thrift)
-    /// \tparam PreparedKeyGenerator a function that generates the prepared statement cache key for given query and keyspace
-    /// \tparam IdGetter a function that returns the corresponding prepared statement ID (CQL or Thrift) for a given prepared statement cache key
+    /// \tparam PreparedKeyGenerator a function that generates the prepared statement cache key for given query and
+    ///         keyspace
+    /// \tparam IdGetter a function that returns the corresponding prepared statement ID (CQL or Thrift) for a given
+    ////        prepared statement cache key
    /// \param query_string
    /// \param client_state
    /// \param id_gen prepared ID generator, called before the first deferring
-    /// \param id_getter prepared ID getter, passed to deferred context by reference. The caller must ensure its liveness.
+    /// \param id_getter prepared ID getter, passed to deferred context by reference. The caller must ensure its
+    ////       liveness.
    /// \return
    template <typename ResultMsgType, typename PreparedKeyGenerator, typename IdGetter>
    future<::shared_ptr<cql_transport::messages::result_message::prepared>>
-    prepare_one(sstring query_string, const service::client_state& client_state, PreparedKeyGenerator&& id_gen, IdGetter&& id_getter) {
-        return do_with(id_gen(query_string, client_state.get_raw_keyspace()), std::move(query_string), [this, &client_state, &id_getter] (const prepared_cache_key_type& key, const sstring& query_string) {
+    prepare_one(
+            sstring query_string,
+            const service::client_state& client_state,
+            PreparedKeyGenerator&& id_gen,
+            IdGetter&& id_getter) {
+        return do_with(
+                id_gen(query_string, client_state.get_raw_keyspace()),
+                std::move(query_string),
+                [this, &client_state, &id_getter](const prepared_cache_key_type& key, const sstring& query_string) {
            return _prepared_cache.get(key, [this, &query_string, &client_state] {
                auto prepared = get_statement(query_string, client_state);
                auto bound_terms = prepared->statement->get_bound_terms();
                if (bound_terms > std::numeric_limits<uint16_t>::max()) {
-                    throw exceptions::invalid_request_exception(sprint("Too many markers(?). %d markers exceed the allowed maximum of %d", bound_terms, std::numeric_limits<uint16_t>::max()));
+                    throw exceptions::invalid_request_exception(
+                            sprint("Too many markers(?). %d markers exceed the allowed maximum of %d",
+                                   bound_terms,
+                                   std::numeric_limits<uint16_t>::max()));
                }
                assert(bound_terms == prepared->bound_names.size());
                prepared->raw_cql_statement = query_string;
                return make_ready_future<std::unique_ptr<statements::prepared_statement>>(std::move(prepared));
            }).then([&key, &id_getter] (auto prep_ptr) {
-                return make_ready_future<::shared_ptr<cql_transport::messages::result_message::prepared>>(::make_shared<ResultMsgType>(id_getter(key), std::move(prep_ptr)));
+                return make_ready_future<::shared_ptr<cql_transport::messages::result_message::prepared>>(
+                        ::make_shared<ResultMsgType>(id_getter(key), std::move(prep_ptr)));
            }).handle_exception_type([&query_string] (typename prepared_statements_cache::statement_is_too_big&) {
-                return make_exception_future<::shared_ptr<cql_transport::messages::result_message::prepared>>(prepared_statement_is_too_big(query_string));
+                return make_exception_future<::shared_ptr<cql_transport::messages::result_message::prepared>>(
+                        prepared_statement_is_too_big(query_string));
            });
        });
    };

    template <typename ResultMsgType, typename KeyGenerator, typename IdGetter>
    ::shared_ptr<cql_transport::messages::result_message::prepared>
-    get_stored_prepared_statement_one(const std::experimental::string_view& query_string, const sstring& keyspace, KeyGenerator&& key_gen, IdGetter&& id_getter)
-    {
+    get_stored_prepared_statement_one(
+            const std::experimental::string_view& query_string,
+            const sstring& keyspace,
+            KeyGenerator&& key_gen,
+            IdGetter&& id_getter) {
        auto cache_key = key_gen(query_string, keyspace);
        auto it = _prepared_cache.find(cache_key);
        if (it == _prepared_cache.end()) {
@@ -503,55 +347,15 @@ private:
    }

    ::shared_ptr<cql_transport::messages::result_message::prepared>
-    get_stored_prepared_statement(const std::experimental::string_view& query_string, const sstring& keyspace, bool for_thrift);
-
-#if 0
-    public ResultMessage processPrepared(CQLStatement statement, QueryState queryState, QueryOptions options)
-    throws RequestExecutionException, RequestValidationException
-    {
-        List<ByteBuffer> variables = options.getValues();
-        // Check to see if there are any bound variables to verify
-        if (!(variables.isEmpty() && (statement.getBoundTerms() == 0)))
-        {
-            if (variables.size() != statement.getBoundTerms())
-                throw new InvalidRequestException(String.format("there were %d markers(?) in CQL but %d bound variables",
-                                                                statement.getBoundTerms(),
-                                                                variables.size()));
-
-            // at this point there is a match in count between markers and variables that is non-zero
-
-            if (logger.isTraceEnabled())
-                for (int i = 0; i < variables.size(); i++)
-                    logger.trace("[{}] '{}'", i+1, variables.get(i));
-        }
-
-        metrics.preparedStatementsExecuted.inc();
-        return processStatement(statement, queryState, options);
-    }
-#endif
-
-public:
-    future<::shared_ptr<cql_transport::messages::result_message>> process_batch(::shared_ptr<statements::batch_statement>,
-            service::query_state& query_state, query_options& options);
-
-    std::unique_ptr<statements::prepared_statement> get_statement(const std::experimental::string_view& query,
-            const service::client_state& client_state);
-    static ::shared_ptr<statements::raw::parsed_statement> parse_statement(const std::experimental::string_view& query);
-
-#if 0
-    private static long measure(Object key)
-    {
-        return meter.measureDeep(key);
-    }
-#endif
-public:
-    future<> stop();
-
-    friend class migration_subscriber;
+    get_stored_prepared_statement(
+            const std::experimental::string_view& query_string,
+            const sstring& keyspace,
+            bool for_thrift);
 };

 class query_processor::migration_subscriber : public service::migration_listener {
    query_processor* _qp;
+
 public:
    migration_subscriber(query_processor* qp);

@@ -575,9 +379,14 @@ public:
    virtual void on_drop_function(const sstring& ks_name, const sstring& function_name) override;
    virtual void on_drop_aggregate(const sstring& ks_name, const sstring& aggregate_name) override;
    virtual void on_drop_view(const sstring& ks_name, const sstring& view_name) override;
+
 private:
    void remove_invalid_prepared_statements(sstring ks_name, std::experimental::optional<sstring> cf_name);
-    bool should_invalidate(sstring ks_name, std::experimental::optional<sstring> cf_name, ::shared_ptr<cql_statement> statement);
+
+    bool should_invalidate(
+            sstring ks_name,
+            std::experimental::optional<sstring> cf_name,
+            ::shared_ptr<cql_statement> statement);
 };

 extern distributed<query_processor> _the_query_processor;
--- a/Show More
+++ b/Show More