Merge "mvcc: Fix incorrect schema version being used to copy the mutation when applying (#5099 )" from Tomasz

" Currently affects only counter tables. Introduced in 27014a2. mutation_partition(s, mp) is incorrect because it uses s to interpret mp, while it should use mp_schema. We may hit this if the current node has a newer schema than the incoming mutation. This can happen during table schema altering when we receive the mutation from a node which hasn't processed the schema change yet. This is undefined behavior in general. If the alter was adding or removing columns, this may result in corruption of the write where values of one column are inserted into a different column. Fixes #5095. " * 'fix-schema-alter-counter-tables' of https://github.com/tgrabiec/scylla: mvcc: Fix incorrect schema verison being used to copy the mutation when applying mutation_partition: Track and validate schema version in debug builds tests: Use the correct schema to access mutation_partition (cherry picked from commit 83bc59a89f)
cache_flat_mutation_reader: read_from_underlying(): propagate timeout
2019-09-28 19:48:49 +03:00 · 2019-09-06 16:05:42 +02:00 · 2019-08-23 15:06:35 +02:00 · 2019-08-17 13:12:40 +03:00 · 2019-08-14 15:33:33 +02:00 · 2019-08-14 13:11:56 +02:00
1033 changed files with 40193 additions and 13878 deletions
--- a/.github/ISSUE_TEMPLATE.md
+++ b/.github/ISSUE_TEMPLATE.md
@@ -1,3 +1,9 @@
+This is Scylla's bug tracker, to be used for reporting bugs only.
+If you have a question about Scylla, and not a bug, please ask it in
+our mailing-list at scylladb-dev@googlegroups.com or in our slack channel.
+
+- [] I have read the disclaimer above, and I am reporting a suspected malfunction in Scylla.
+
 *Installation details*
 Scylla version (or git commit hash):
 Cluster size:
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -0,0 +1,4 @@
+Scylla doesn't use pull-requests, please send a patch to the [mailing list](mailto:scylladb-dev@googlegroups.com) instead.
+See our [contributing guidelines](../CONTRIBUTING.md) and our [Scylla development guidelines](../HACKING.md) for more information.
+
+If you have any questions please don't hesitate to send a mail to the [dev list](mailto:scylladb-dev@googlegroups.com).
--- a/.gitignore
+++ b/.gitignore
@@ -18,3 +18,4 @@ CMakeLists.txt.user
 *.egg-info
 __pycache__CMakeLists.txt.user
 .gdbinit
+resources
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,6 +1,6 @@
 [submodule "seastar"]
 	path = seastar
-	url = ../seastar
+	url = ../scylla-seastar
 	ignore = dirty
 [submodule "swagger-ui"]
 	path = swagger-ui
--- a/2
+++ b/2
@@ -1,6 +1,6 @@
 #!/bin/sh

-VERSION=666.development
+VERSION=2.3.6

 if test -f version
 then
--- a/api/api-doc/column_family.json
+++ b/api/api-doc/column_family.json
@@ -455,7 +455,7 @@
         "operations":[
            {
               "method":"GET",
-               "summary":"Returns a list of filenames that contain the given key on this node",
+               "summary":"Returns a list of sstable filenames that contain the given partition key on this node",
               "type":"array",
               "items":{
                  "type":"string"
@@ -475,7 +475,7 @@
                  },
                  {
                     "name":"key",
-                     "description":"The key",
+                     "description":"The partition key. In a composite-key scenario, use ':' to separate the columns in the key.",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
--- a/api/api-doc/config.json
+++ b/api/api-doc/config.json
@@ -0,0 +1,30 @@
+"/v2/config/{id}": {
+      "get": {
+        "description": "Return a config value",
+        "operationId": "find_config_id",
+        "produces": [
+          "application/json"
+        ],
+        "tags": ["config"],
+        "parameters": [
+          {
+            "name": "id",
+            "in": "path",
+            "description": "ID of config to return",
+            "required": true,
+            "type": "string"
+          }
+        ],
+        "responses": {
+          "200": {
+            "description": "Config value"
+          },
+          "default": {
+            "description": "unexpected error",
+            "schema": {
+              "$ref": "#/definitions/ErrorModel"
+            }
+          }
+        }
+      }
+}
--- a/api/api-doc/storage_service.json
+++ b/api/api-doc/storage_service.json
@@ -2129,6 +2129,41 @@
               ]
            }
         ]
+      },
+      {
+         "path":"/storage_service/view_build_statuses/{keyspace}/{view}",
+         "operations":[
+            {
+               "method":"GET",
+               "summary":"Gets the progress of a materialized view build",
+               "type":"array",
+               "items":{
+                  "type":"mapper"
+               },
+               "nickname":"view_build_statuses",
+               "produces":[
+                  "application/json"
+               ],
+               "parameters":[
+                  {
+                     "name":"keyspace",
+                     "description":"The keyspace",
+                     "required":true,
+                     "allowMultiple":false,
+                     "type":"string",
+                     "paramType":"path"
+                  },
+                  {
+                     "name":"view",
+                     "description":"View name",
+                     "required":true,
+                     "allowMultiple":false,
+                     "type":"string",
+                     "paramType":"path"
+                  }
+               ]
+            }
+         ]
      }
   ],
   "models":{
@@ -2193,11 +2228,11 @@
               "description":"The column family"
            },
            "total":{
-               "type":"int",
+               "type":"long",
               "description":"The total snapshot size"
            },
            "live":{
-               "type":"int",
+               "type":"long",
               "description":"The live snapshot size"
            }
         }
--- a/api/api.cc
+++ b/api/api.cc
@@ -39,6 +39,7 @@
 #include "http/exception.hh"
 #include "stream_manager.hh"
 #include "system.hh"
+#include "api/config.hh"

 namespace api {

@@ -65,6 +66,7 @@ future<> set_server_init(http_context& ctx) {
        rb->set_api_doc(r);
        rb02->set_api_doc(r);
        rb02->register_api_file(r, "swagger20_header");
+        set_config(rb02, ctx, r);
        rb->register_function(r, "system",
                "The system related API");
        set_system(ctx, r);
--- a/api/column_family.cc
+++ b/api/column_family.cc
@@ -429,7 +429,7 @@ void set_column_family(http_context& ctx, routes& r) {
        return map_reduce_cf(ctx, req->param["name"], utils::estimated_histogram(0), [](column_family& cf) {
            utils::estimated_histogram res(0);
            for (auto i: *cf.get_sstables() ) {
-                res.merge(i->get_stats_metadata().estimated_column_count);
+                res.merge(i->get_stats_metadata().estimated_cells_count);
            }
            return res;
        },
@@ -905,5 +905,20 @@ void set_column_family(http_context& ctx, routes& r) {
            return make_ready_future<json::json_return_type>(res);
        });
    });
+
+    cf::get_sstables_for_key.set(r, [&ctx](std::unique_ptr<request> req) {
+        auto key = req->get_query_param("key");
+        auto uuid = get_uuid(req->param["name"], ctx.db.local());
+
+        return ctx.db.map_reduce0([key, uuid] (database& db) {
+            return db.find_column_family(uuid).get_sstables_by_partition_key(key);
+        }, std::unordered_set<sstring>(),
+            [](std::unordered_set<sstring> a, std::unordered_set<sstring>&& b) mutable {
+            a.insert(b.begin(),b.end());
+            return a;
+        }).then([](const std::unordered_set<sstring>& res) {
+            return make_ready_future<json::json_return_type>(container_to_vec(res));
+        });
+    });
 }
 }
--- a/api/column_family.hh
+++ b/api/column_family.hh
@@ -24,6 +24,7 @@
 #include "api.hh"
 #include "api/api-doc/column_family.json.hh"
 #include "database.hh"
+#include <any>

 namespace api {

@@ -37,9 +38,15 @@ template<class Mapper, class I, class Reducer>
 future<I> map_reduce_cf_raw(http_context& ctx, const sstring& name, I init,
        Mapper mapper, Reducer reducer) {
    auto uuid = get_uuid(name, ctx.db.local());
-    return ctx.db.map_reduce0([mapper, uuid](database& db) {
-        return mapper(db.find_column_family(uuid));
-    }, init, reducer);
+    using mapper_type = std::function<std::any (database&)>;
+    using reducer_type = std::function<std::any (std::any, std::any)>;
+    return ctx.db.map_reduce0(mapper_type([mapper, uuid](database& db) {
+        return I(mapper(db.find_column_family(uuid)));
+    }), std::any(std::move(init)), reducer_type([reducer = std::move(reducer)] (std::any a, std::any b) mutable {
+        return I(reducer(std::any_cast<I>(std::move(a)), std::any_cast<I>(std::move(b))));
+    })).then([] (std::any r) {
+        return std::any_cast<I>(std::move(r));
+    });
 }


@@ -51,35 +58,42 @@ future<json::json_return_type> map_reduce_cf(http_context& ctx, const sstring& n
    });
 }

-template<class Mapper, class I, class Reducer, class Result>
-future<I> map_reduce_cf_raw(http_context& ctx, const sstring& name, I init,
-        Mapper mapper, Reducer reducer, Result result) {
-    auto uuid = get_uuid(name, ctx.db.local());
-    return ctx.db.map_reduce0([mapper, uuid](database& db) {
-        return mapper(db.find_column_family(uuid));
-    }, init, reducer);
-}
-
-
 template<class Mapper, class I, class Reducer, class Result>
 future<json::json_return_type> map_reduce_cf(http_context& ctx, const sstring& name, I init,
        Mapper mapper, Reducer reducer, Result result) {
-    return map_reduce_cf_raw(ctx, name, init, mapper, reducer, result).then([result](const I& res) mutable {
+    return map_reduce_cf_raw(ctx, name, init, mapper, reducer).then([result](const I& res) mutable {
        result = res;
        return make_ready_future<json::json_return_type>(result);
    });
 }

-template<class Mapper, class I, class Reducer>
-future<I> map_reduce_cf_raw(http_context& ctx, I init,
-        Mapper mapper, Reducer reducer) {
-    return ctx.db.map_reduce0([mapper, init, reducer](database& db) {
+struct map_reduce_column_families_locally {
+    std::any init;
+    std::function<std::any (column_family&)> mapper;
+    std::function<std::any (std::any, std::any)> reducer;
+    std::any operator()(database& db) const {
        auto res = init;
        for (auto i : db.get_column_families()) {
            res = reducer(res, mapper(*i.second.get()));
        }
        return res;
-    }, init, reducer);
+    }
+};
+
+template<class Mapper, class I, class Reducer>
+future<I> map_reduce_cf_raw(http_context& ctx, I init,
+        Mapper mapper, Reducer reducer) {
+    using mapper_type = std::function<std::any (column_family&)>;
+    using reducer_type = std::function<std::any (std::any, std::any)>;
+    auto wrapped_mapper = mapper_type([mapper = std::move(mapper)] (column_family& cf) mutable {
+        return I(mapper(cf));
+    });
+    auto wrapped_reducer = reducer_type([reducer = std::move(reducer)] (std::any a, std::any b) mutable {
+        return I(reducer(std::any_cast<I>(std::move(a)), std::any_cast<I>(std::move(b))));
+    });
+    return ctx.db.map_reduce0(map_reduce_column_families_locally{init, std::move(wrapped_mapper), wrapped_reducer}, std::any(init), wrapped_reducer).then([] (std::any res) {
+        return std::any_cast<I>(std::move(res));
+    });
 }


--- a/api/config.cc
+++ b/api/config.cc
@@ -0,0 +1,112 @@
+/*
+ * Copyright 2018 ScyllaDB
+ */
+
+/*
+ * This file is part of Scylla.
+ *
+ * Scylla is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Scylla is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "api/config.hh"
+#include "api/api-doc/config.json.hh"
+#include "db/config.hh"
+#include <sstream>
+#include <boost/algorithm/string/replace.hpp>
+
+namespace api {
+
+template<class T>
+json::json_return_type get_json_return_type(const T& val) {
+    return json::json_return_type(val);
+}
+
+/*
+ * As commented on db::seed_provider_type is not used
+ * and probably never will.
+ *
+ * Just in case, we will return its name
+ */
+template<>
+json::json_return_type get_json_return_type(const db::seed_provider_type& val) {
+    return json::json_return_type(val.class_name);
+}
+
+std::string format_type(const std::string& type) {
+    if (type == "int") {
+        return "integer";
+    }
+    return type;
+}
+
+future<> get_config_swagger_entry(const std::string& name, const std::string& description, const std::string& type, bool& first, output_stream<char>& os) {
+    std::stringstream ss;
+    if (first) {
+        first=false;
+    } else {
+        ss <<',';
+    };
+    ss << "\"/config/" << name <<"\": {"
+      "\"get\": {"
+        "\"description\": \"" << boost::replace_all_copy(boost::replace_all_copy(boost::replace_all_copy(description,"\n","\\n"),"\"", "''"), "\t", " ") <<"\","
+        "\"operationId\": \"find_config_"<< name <<"\","
+        "\"produces\": ["
+          "\"application/json\""
+        "],"
+        "\"tags\": [\"config\"],"
+        "\"parameters\": ["
+        "],"
+        "\"responses\": {"
+          "\"200\": {"
+            "\"description\": \"Config value\","
+             "\"schema\": {"
+               "\"type\": \"" << format_type(type) << "\""
+             "}"
+          "},"
+          "\"default\": {"
+            "\"description\": \"unexpected error\","
+            "\"schema\": {"
+              "\"$ref\": \"#/definitions/ErrorModel\""
+            "}"
+          "}"
+        "}"
+      "}"
+    "}";
+    return os.write(ss.str());
+}
+
+namespace cs = httpd::config_json;
+#define _get_config_value(name, type, deflt, status, desc, ...) if (id == #name) {return get_json_return_type(ctx.db.local().get_config().name());}
+
+
+#define _get_config_description(name, type, deflt, status, desc, ...) f = f.then([&os, &first] {return get_config_swagger_entry(#name, desc, #type, first, os);});
+
+void set_config(std::shared_ptr < api_registry_builder20 > rb, http_context& ctx, routes& r) {
+    rb->register_function(r, [] (output_stream<char>& os) {
+        return do_with(true, [&os] (bool& first) {
+            auto f = make_ready_future();
+            _make_config_values(_get_config_description)
+            return f;
+        });
+    });
+
+    cs::find_config_id.set(r, [&ctx] (const_req r) {
+        auto id = r.param["id"];
+        _make_config_values(_get_config_value)
+        throw bad_param_exception(sstring("No such config entry: ") + id);
+    });
+}
+
+}
+
--- a/api/config.hh
+++ b/api/config.hh
@@ -0,0 +1,30 @@
+/*
+ * Copyright (C) 2018 ScyllaDB
+ */
+
+/*
+ * This file is part of Scylla.
+ *
+ * Scylla is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Scylla is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include "api.hh"
+#include <seastar/http/api_docs.hh>
+
+namespace api {
+
+void set_config(std::shared_ptr<api_registry_builder20> rb, http_context& ctx, routes& r);
+}
--- a/api/storage_service.cc
+++ b/api/storage_service.cc
@@ -852,6 +852,15 @@ void set_storage_service(http_context& ctx, routes& r) {
            return make_ready_future<json::json_return_type>(map_to_key_value(ownership, res));
        });
    });
+
+    ss::view_build_statuses.set(r, [&ctx] (std::unique_ptr<request> req) {
+        auto keyspace = validate_keyspace(ctx, req->param);
+        auto view = req->param["view"];
+        return service::get_local_storage_service().view_build_statuses(std::move(keyspace), std::move(view)).then([] (std::unordered_map<sstring, sstring> status) {
+            std::vector<storage_service_json::mapper> res;
+            return make_ready_future<json::json_return_type>(map_to_key_value(std::move(status), res));
+        });
+    });
 }

 }
--- a/atomic_cell.cc
+++ b/atomic_cell.cc
@@ -0,0 +1,222 @@
+/*
+ * Copyright (C) 2018 ScyllaDB
+ */
+
+/*
+ * This file is part of Scylla.
+ *
+ * Scylla is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Scylla is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "atomic_cell.hh"
+#include "atomic_cell_or_collection.hh"
+#include "types.hh"
+
+/// LSA mirator for cells with irrelevant type
+///
+///
+const data::type_imr_descriptor& no_type_imr_descriptor() {
+    static thread_local data::type_imr_descriptor state(data::type_info::make_variable_size());
+    return state;
+}
+
+atomic_cell atomic_cell::make_dead(api::timestamp_type timestamp, gc_clock::time_point deletion_time) {
+    auto& imr_data = no_type_imr_descriptor();
+    return atomic_cell(
+            imr_data.type_info(),
+            imr_object_type::make(data::cell::make_dead(timestamp, deletion_time), &imr_data.lsa_migrator())
+    );
+}
+
+atomic_cell atomic_cell::make_live(const abstract_type& type, api::timestamp_type timestamp, bytes_view value, atomic_cell::collection_member cm) {
+    auto& imr_data = type.imr_state();
+    return atomic_cell(
+        imr_data.type_info(),
+        imr_object_type::make(data::cell::make_live(imr_data.type_info(), timestamp, value, bool(cm)), &imr_data.lsa_migrator())
+    );
+}
+
+atomic_cell atomic_cell::make_live(const abstract_type& type, api::timestamp_type timestamp, bytes_view value,
+                             gc_clock::time_point expiry, gc_clock::duration ttl, atomic_cell::collection_member cm) {
+    auto& imr_data = type.imr_state();
+    return atomic_cell(
+        imr_data.type_info(),
+        imr_object_type::make(data::cell::make_live(imr_data.type_info(), timestamp, value, expiry, ttl, bool(cm)), &imr_data.lsa_migrator())
+    );
+}
+
+atomic_cell atomic_cell::make_live_counter_update(api::timestamp_type timestamp, int64_t value) {
+    auto& imr_data = no_type_imr_descriptor();
+    return atomic_cell(
+        imr_data.type_info(),
+        imr_object_type::make(data::cell::make_live_counter_update(timestamp, value), &imr_data.lsa_migrator())
+    );
+}
+
+atomic_cell atomic_cell::make_live_uninitialized(const abstract_type& type, api::timestamp_type timestamp, size_t size) {
+    auto& imr_data = no_type_imr_descriptor();
+    return atomic_cell(
+        imr_data.type_info(),
+        imr_object_type::make(data::cell::make_live_uninitialized(imr_data.type_info(), timestamp, size), &imr_data.lsa_migrator())
+    );
+}
+
+static imr::utils::object<data::cell::structure> copy_cell(const data::type_imr_descriptor& imr_data, const uint8_t* ptr)
+{
+    using imr_object_type = imr::utils::object<data::cell::structure>;
+
+    // If the cell doesn't own any memory it is trivial and can be copied with
+    // memcpy.
+    auto f = data::cell::structure::get_member<data::cell::tags::flags>(ptr);
+    if (!f.template get<data::cell::tags::external_data>()) {
+        data::cell::context ctx(f, imr_data.type_info());
+        // XXX: We may be better off storing the total cell size in memory. Measure!
+        auto size = data::cell::structure::serialized_object_size(ptr, ctx);
+        return imr_object_type::make_raw(size, [&] (uint8_t* dst) noexcept {
+            std::copy_n(ptr, size, dst);
+        }, &imr_data.lsa_migrator());
+    }
+
+    return imr_object_type::make(data::cell::copy_fn(imr_data.type_info(), ptr), &imr_data.lsa_migrator());
+}
+
+atomic_cell::atomic_cell(const abstract_type& type, atomic_cell_view other)
+    : atomic_cell(type.imr_state().type_info(),
+                  copy_cell(type.imr_state(), other._view.raw_pointer()))
+{ }
+
+atomic_cell_or_collection atomic_cell_or_collection::copy(const abstract_type& type) const {
+    if (!_data.get()) {
+        return atomic_cell_or_collection();
+    }
+    auto& imr_data = type.imr_state();
+    return atomic_cell_or_collection(
+        copy_cell(imr_data, _data.get())
+    );
+}
+
+atomic_cell_or_collection::atomic_cell_or_collection(const abstract_type& type, atomic_cell_view acv)
+    : _data(copy_cell(type.imr_state(), acv._view.raw_pointer()))
+{
+}
+
+static collection_mutation_view get_collection_mutation_view(const uint8_t* ptr)
+{
+    auto f = data::cell::structure::get_member<data::cell::tags::flags>(ptr);
+    auto ti = data::type_info::make_collection();
+    data::cell::context ctx(f, ti);
+    auto view = data::cell::structure::get_member<data::cell::tags::cell>(ptr).as<data::cell::tags::collection>(ctx);
+    auto dv = data::cell::variable_value::make_view(view, f.get<data::cell::tags::external_data>());
+    return collection_mutation_view { dv };
+}
+
+collection_mutation_view atomic_cell_or_collection::as_collection_mutation() const {
+    return get_collection_mutation_view(_data.get());
+}
+
+collection_mutation::collection_mutation(const collection_type_impl& type, collection_mutation_view v)
+    : _data(imr_object_type::make(data::cell::make_collection(v.data), &type.imr_state().lsa_migrator()))
+{
+}
+
+collection_mutation::collection_mutation(const collection_type_impl& type, bytes_view v)
+    : _data(imr_object_type::make(data::cell::make_collection(v), &type.imr_state().lsa_migrator()))
+{
+}
+
+collection_mutation::operator collection_mutation_view() const
+{
+    return get_collection_mutation_view(_data.get());
+}
+
+bool atomic_cell_or_collection::equals(const abstract_type& type, const atomic_cell_or_collection& other) const
+{
+    auto ptr_a = _data.get();
+    auto ptr_b = other._data.get();
+
+    if (!ptr_a || !ptr_b) {
+        return !ptr_a && !ptr_b;
+    }
+
+    if (type.is_atomic()) {
+        auto a = atomic_cell_view::from_bytes(type.imr_state().type_info(), _data);
+        auto b = atomic_cell_view::from_bytes(type.imr_state().type_info(), other._data);
+        if (a.timestamp() != b.timestamp()) {
+            return false;
+        }
+        if (a.is_live()) {
+            if (!b.is_live()) {
+                return false;
+            }
+            if (a.is_counter_update()) {
+                if (!b.is_counter_update()) {
+                    return false;
+                }
+                return a.counter_update_value() == b.counter_update_value();
+            }
+            if (a.is_live_and_has_ttl()) {
+                if (!b.is_live_and_has_ttl()) {
+                    return false;
+                }
+                if (a.ttl() != b.ttl() || a.expiry() != b.expiry()) {
+                    return false;
+                }
+            }
+            return a.value() == b.value();
+        }
+        return a.deletion_time() == b.deletion_time();
+    } else {
+        return as_collection_mutation().data == other.as_collection_mutation().data;
+    }
+}
+
+size_t atomic_cell_or_collection::external_memory_usage(const abstract_type& t) const
+{
+    if (!_data.get()) {
+        return 0;
+    }
+    auto ctx = data::cell::context(_data.get(), t.imr_state().type_info());
+
+    auto view = data::cell::structure::make_view(_data.get(), ctx);
+    auto flags = view.get<data::cell::tags::flags>();
+
+    size_t external_value_size = 0;
+    if (flags.get<data::cell::tags::external_data>()) {
+        if (flags.get<data::cell::tags::collection>()) {
+            external_value_size = get_collection_mutation_view(_data.get()).data.size_bytes();
+        } else {
+            auto cell_view = data::cell::atomic_cell_view(t.imr_state().type_info(), view);
+            external_value_size = cell_view.value_size();
+        }
+        // Add overhead of chunk headers. The last one is a special case.
+        external_value_size += (external_value_size - 1) / data::cell::maximum_external_chunk_length * data::cell::external_chunk_overhead;
+        external_value_size += data::cell::external_last_chunk_overhead;
+    }
+    return data::cell::structure::serialized_object_size(_data.get(), ctx)
+        + imr_object_type::size_overhead + external_value_size;
+}
+
+std::ostream& operator<<(std::ostream& os, const atomic_cell_or_collection& c) {
+    if (!c._data.get()) {
+        return os << "{ null atomic_cell_or_collection }";
+    }
+    using dc = data::cell;
+    os << "{ ";
+    if (dc::structure::get_member<dc::tags::flags>(c._data.get()).get<dc::tags::collection>()) {
+        os << "collection";
+    } else {
+        os << "atomic cell";
+    }
+    return os << " @" << static_cast<const void*>(c._data.get()) << " }";
+}
--- a/atomic_cell.hh
+++ b/atomic_cell.hh
@@ -30,189 +30,48 @@
 #include <cstdint>
 #include <iosfwd>
 #include <seastar/util/gcc6-concepts.hh>
+#include "data/cell.hh"
+#include "data/schema_info.hh"
+#include "imr/utils.hh"

-template<typename T, typename Input>
-static inline
-void set_field(Input& v, unsigned offset, T val) {
-    reinterpret_cast<net::packed<T>*>(v.begin() + offset)->raw = net::hton(val);
-}
+class abstract_type;
+class collection_type_impl;

-template<typename T>
-static inline
-T get_field(const bytes_view& v, unsigned offset) {
-    return net::ntoh(*reinterpret_cast<const net::packed<T>*>(v.begin() + offset));
-}
+using atomic_cell_value_view = data::value_view;
+using atomic_cell_value_mutable_view = data::value_mutable_view;

-class atomic_cell_or_collection;
-
-/*
- * Represents atomic cell layout. Works on serialized form.
- *
- * Layout:
- *
- *  <live>  := <int8_t:flags><int64_t:timestamp>(<int32_t:expiry><int32_t:ttl>)?<value>
- *  <dead>  := <int8_t:    0><int64_t:timestamp><int32_t:deletion_time>
- */
-class atomic_cell_type final {
-private:
-    static constexpr int8_t LIVE_FLAG = 0x01;
-    static constexpr int8_t EXPIRY_FLAG = 0x02; // When present, expiry field is present. Set only for live cells
-    static constexpr int8_t COUNTER_UPDATE_FLAG = 0x08; // Cell is a counter update.
-    static constexpr int8_t COUNTER_IN_PLACE_REVERT = 0x10;
-    static constexpr unsigned flags_size = 1;
-    static constexpr unsigned timestamp_offset = flags_size;
-    static constexpr unsigned timestamp_size = 8;
-    static constexpr unsigned expiry_offset = timestamp_offset + timestamp_size;
-    static constexpr unsigned expiry_size = 4;
-    static constexpr unsigned deletion_time_offset = timestamp_offset + timestamp_size;
-    static constexpr unsigned deletion_time_size = 4;
-    static constexpr unsigned ttl_offset = expiry_offset + expiry_size;
-    static constexpr unsigned ttl_size = 4;
-    friend class counter_cell_builder;
-private:
-    static bool is_counter_update(bytes_view cell) {
-        return cell[0] & COUNTER_UPDATE_FLAG;
-    }
-    static bool is_counter_in_place_revert_set(bytes_view cell) {
-        return cell[0] & COUNTER_IN_PLACE_REVERT;
-    }
-    template<typename BytesContainer>
-    static void set_counter_in_place_revert(BytesContainer& cell, bool flag) {
-        cell[0] = (cell[0] & ~COUNTER_IN_PLACE_REVERT) | (flag * COUNTER_IN_PLACE_REVERT);
-    }
-    static bool is_live(const bytes_view& cell) {
-        return cell[0] & LIVE_FLAG;
-    }
-    static bool is_live_and_has_ttl(const bytes_view& cell) {
-        return cell[0] & EXPIRY_FLAG;
-    }
-    static bool is_dead(const bytes_view& cell) {
-        return !is_live(cell);
-    }
-    // Can be called on live and dead cells
-    static api::timestamp_type timestamp(const bytes_view& cell) {
-        return get_field<api::timestamp_type>(cell, timestamp_offset);
-    }
-    template<typename BytesContainer>
-    static void set_timestamp(BytesContainer& cell, api::timestamp_type ts) {
-        set_field(cell, timestamp_offset, ts);
-    }
-    // Can be called on live cells only
-private:
-    template<typename BytesView>
-    static BytesView do_get_value(BytesView cell) {
-        auto expiry_field_size = bool(cell[0] & EXPIRY_FLAG) * (expiry_size + ttl_size);
-        auto value_offset = flags_size + timestamp_size + expiry_field_size;
-        cell.remove_prefix(value_offset);
-        return cell;
-    }
-public:
-    static bytes_view value(bytes_view cell) {
-        return do_get_value(cell);
-    }
-    static bytes_mutable_view value(bytes_mutable_view cell) {
-        return do_get_value(cell);
-    }
-    // Can be called on live counter update cells only
-    static int64_t counter_update_value(bytes_view cell) {
-        return get_field<int64_t>(cell, flags_size + timestamp_size);
-    }
-    // Can be called only when is_dead() is true.
-    static gc_clock::time_point deletion_time(const bytes_view& cell) {
-        assert(is_dead(cell));
-        return gc_clock::time_point(gc_clock::duration(
-            get_field<int32_t>(cell, deletion_time_offset)));
-    }
-    // Can be called only when is_live_and_has_ttl() is true.
-    static gc_clock::time_point expiry(const bytes_view& cell) {
-        assert(is_live_and_has_ttl(cell));
-        auto expiry = get_field<int32_t>(cell, expiry_offset);
-        return gc_clock::time_point(gc_clock::duration(expiry));
-    }
-    // Can be called only when is_live_and_has_ttl() is true.
-    static gc_clock::duration ttl(const bytes_view& cell) {
-        assert(is_live_and_has_ttl(cell));
-        return gc_clock::duration(get_field<int32_t>(cell, ttl_offset));
-    }
-    static managed_bytes make_dead(api::timestamp_type timestamp, gc_clock::time_point deletion_time) {
-        managed_bytes b(managed_bytes::initialized_later(), flags_size + timestamp_size + deletion_time_size);
-        b[0] = 0;
-        set_field(b, timestamp_offset, timestamp);
-        set_field(b, deletion_time_offset, deletion_time.time_since_epoch().count());
-        return b;
-    }
-    static managed_bytes make_live(api::timestamp_type timestamp, bytes_view value) {
-        auto value_offset = flags_size + timestamp_size;
-        managed_bytes b(managed_bytes::initialized_later(), value_offset + value.size());
-        b[0] = LIVE_FLAG;
-        set_field(b, timestamp_offset, timestamp);
-        std::copy_n(value.begin(), value.size(), b.begin() + value_offset);
-        return b;
-    }
-    static managed_bytes make_live_counter_update(api::timestamp_type timestamp, int64_t value) {
-        auto value_offset = flags_size + timestamp_size;
-        managed_bytes b(managed_bytes::initialized_later(), value_offset + sizeof(value));
-        b[0] = LIVE_FLAG | COUNTER_UPDATE_FLAG;
-        set_field(b, timestamp_offset, timestamp);
-        set_field(b, value_offset, value);
-        return b;
-    }
-    static managed_bytes make_live(api::timestamp_type timestamp, bytes_view value, gc_clock::time_point expiry, gc_clock::duration ttl) {
-        auto value_offset = flags_size + timestamp_size + expiry_size + ttl_size;
-        managed_bytes b(managed_bytes::initialized_later(), value_offset + value.size());
-        b[0] = EXPIRY_FLAG | LIVE_FLAG;
-        set_field(b, timestamp_offset, timestamp);
-        set_field(b, expiry_offset, expiry.time_since_epoch().count());
-        set_field(b, ttl_offset, ttl.count());
-        std::copy_n(value.begin(), value.size(), b.begin() + value_offset);
-        return b;
-    }
-    // make_live_from_serializer() is intended for users that need to serialise
-    // some object or objects to the format used in atomic_cell::value().
-    // With just make_live() the patter would look like follows:
-    // 1. allocate a buffer and write to it serialised objects
-    // 2. pass that buffer to make_live()
-    // 3. make_live() needs to prepend some metadata to the cell value so it
-    //    allocates a new buffer and copies the content of the original one
-    //
-    // The allocation and copy of a buffer can be avoided.
-    // make_live_from_serializer() allows the user code to specify the timestamp
-    // and size of the cell value as well as provide the serialiser function
-    // object, which would write the serialised value of the cell to the buffer
-    // given to it by make_live_from_serializer().
-    template<typename Serializer>
-    GCC6_CONCEPT(requires requires(Serializer serializer, bytes::iterator it) {
-        serializer(it);
-    })
-    static managed_bytes make_live_from_serializer(api::timestamp_type timestamp, size_t size, Serializer&& serializer) {
-        auto value_offset = flags_size + timestamp_size;
-        managed_bytes b(managed_bytes::initialized_later(), value_offset + size);
-        b[0] = LIVE_FLAG;
-        set_field(b, timestamp_offset, timestamp);
-        serializer(b.begin() + value_offset);
-        return b;
-    }
-    template<typename ByteContainer>
-    friend class atomic_cell_base;
+/// View of an atomic cell
+template<mutable_view is_mutable>
+class basic_atomic_cell_view {
+protected:
+    data::cell::basic_atomic_cell_view<is_mutable> _view;
    friend class atomic_cell;
-};
+public:
+    using pointer_type = std::conditional_t<is_mutable == mutable_view::no, const uint8_t*, uint8_t*>;
+protected:
+    explicit basic_atomic_cell_view(data::cell::basic_atomic_cell_view<is_mutable> v)
+        : _view(std::move(v)) { }
+
+    basic_atomic_cell_view(const data::type_info& ti, pointer_type ptr)
+        : _view(data::cell::make_atomic_cell_view(ti, ptr))
+    { }

-template<typename ByteContainer>
-class atomic_cell_base {
-protected:
-    ByteContainer _data;
-protected:
-    atomic_cell_base(ByteContainer&& data) : _data(std::forward<ByteContainer>(data)) { }
    friend class atomic_cell_or_collection;
 public:
-    bool is_counter_update() const {
-        return atomic_cell_type::is_counter_update(_data);
+    operator basic_atomic_cell_view<mutable_view::no>() const noexcept {
+        return basic_atomic_cell_view<mutable_view::no>(_view);
    }
-    bool is_counter_in_place_revert_set() const {
-        return atomic_cell_type::is_counter_in_place_revert_set(_data);
+
+    void swap(basic_atomic_cell_view& other) noexcept {
+        using std::swap;
+        swap(_view, other._view);
+    }
+
+    bool is_counter_update() const {
+        return _view.is_counter_update();
    }
    bool is_live() const {
-        return atomic_cell_type::is_live(_data);
+        return _view.is_live();
    }
    bool is_live(tombstone t, bool is_counter) const {
        return is_live() && !is_covered_by(t, is_counter);
@@ -221,122 +80,132 @@ public:
        return is_live() && !is_covered_by(t, is_counter) && !has_expired(now);
    }
    bool is_live_and_has_ttl() const {
-        return atomic_cell_type::is_live_and_has_ttl(_data);
+        return _view.is_expiring();
    }
    bool is_dead(gc_clock::time_point now) const {
-        return atomic_cell_type::is_dead(_data) || has_expired(now);
+        return !is_live() || has_expired(now);
    }
    bool is_covered_by(tombstone t, bool is_counter) const {
        return timestamp() <= t.timestamp || (is_counter && t.timestamp != api::missing_timestamp);
    }
    // Can be called on live and dead cells
    api::timestamp_type timestamp() const {
-        return atomic_cell_type::timestamp(_data);
+        return _view.timestamp();
    }
    void set_timestamp(api::timestamp_type ts) {
-        atomic_cell_type::set_timestamp(_data, ts);
+        _view.set_timestamp(ts);
    }
    // Can be called on live cells only
-    auto value() const {
-        return atomic_cell_type::value(_data);
+    data::basic_value_view<is_mutable> value() const {
+        return _view.value();
+    }
+    // Can be called on live cells only
+    size_t value_size() const {
+        return _view.value_size();
+    }
+    bool is_value_fragmented() const {
+        return _view.is_value_fragmented();
    }
    // Can be called on live counter update cells only
    int64_t counter_update_value() const {
-        return atomic_cell_type::counter_update_value(_data);
+        return _view.counter_update_value();
    }
    // Can be called only when is_dead(gc_clock::time_point)
    gc_clock::time_point deletion_time() const {
-        return !is_live() ? atomic_cell_type::deletion_time(_data) : expiry() - ttl();
+        return !is_live() ? _view.deletion_time() : expiry() - ttl();
    }
    // Can be called only when is_live_and_has_ttl()
    gc_clock::time_point expiry() const {
-        return atomic_cell_type::expiry(_data);
+        return _view.expiry();
    }
    // Can be called only when is_live_and_has_ttl()
    gc_clock::duration ttl() const {
-        return atomic_cell_type::ttl(_data);
+        return _view.ttl();
    }
    // Can be called on live and dead cells
    bool has_expired(gc_clock::time_point now) const {
        return is_live_and_has_ttl() && expiry() <= now;
    }
+
    bytes_view serialize() const {
-        return _data;
-    }
-    void set_counter_in_place_revert(bool flag) {
-        atomic_cell_type::set_counter_in_place_revert(_data, flag);
+        return _view.serialize();
    }
 };

-class atomic_cell_view final : public atomic_cell_base<bytes_view> {
-    atomic_cell_view(bytes_view data) : atomic_cell_base(std::move(data)) {}
-public:
-    static atomic_cell_view from_bytes(bytes_view data) { return atomic_cell_view(data); }
+class atomic_cell_view final : public basic_atomic_cell_view<mutable_view::no> {
+    atomic_cell_view(const data::type_info& ti, const uint8_t* data)
+        : basic_atomic_cell_view<mutable_view::no>(ti, data) {}

+    template<mutable_view is_mutable>
+    atomic_cell_view(data::cell::basic_atomic_cell_view<is_mutable> view)
+        : basic_atomic_cell_view<mutable_view::no>(view) { }
    friend class atomic_cell;
+public:
+    static atomic_cell_view from_bytes(const data::type_info& ti, const imr::utils::object<data::cell::structure>& data) {
+        return atomic_cell_view(ti, data.get());
+    }
+
+    static atomic_cell_view from_bytes(const data::type_info& ti, bytes_view bv) {
+        return atomic_cell_view(ti, reinterpret_cast<const uint8_t*>(bv.begin()));
+    }
+
    friend std::ostream& operator<<(std::ostream& os, const atomic_cell_view& acv);
 };

-class atomic_cell_mutable_view final : public atomic_cell_base<bytes_mutable_view> {
-    atomic_cell_mutable_view(bytes_mutable_view data) : atomic_cell_base(std::move(data)) {}
+class atomic_cell_mutable_view final : public basic_atomic_cell_view<mutable_view::yes> {
+    atomic_cell_mutable_view(const data::type_info& ti, uint8_t* data)
+        : basic_atomic_cell_view<mutable_view::yes>(ti, data) {}
 public:
-    static atomic_cell_mutable_view from_bytes(bytes_mutable_view data) { return atomic_cell_mutable_view(data); }
+    static atomic_cell_mutable_view from_bytes(const data::type_info& ti, imr::utils::object<data::cell::structure>& data) {
+        return atomic_cell_mutable_view(ti, data.get());
+    }

    friend class atomic_cell;
 };

-class atomic_cell_ref final : public atomic_cell_base<managed_bytes&> {
-public:
-    atomic_cell_ref(managed_bytes& buf) : atomic_cell_base(buf) {}
-};
+using atomic_cell_ref = atomic_cell_mutable_view;

-class atomic_cell final : public atomic_cell_base<managed_bytes> {
-    atomic_cell(managed_bytes b) : atomic_cell_base(std::move(b)) {}
+class atomic_cell final : public basic_atomic_cell_view<mutable_view::yes> {
+    using imr_object_type =  imr::utils::object<data::cell::structure>;
+    imr_object_type _data;
+    atomic_cell(const data::type_info& ti, imr::utils::object<data::cell::structure>&& data)
+        : basic_atomic_cell_view<mutable_view::yes>(ti, data.get()), _data(std::move(data)) {}
 public:
-    atomic_cell(const atomic_cell&) = default;
+    class collection_member_tag;
+    using collection_member = bool_class<collection_member_tag>;
+
    atomic_cell(atomic_cell&&) = default;
-    atomic_cell& operator=(const atomic_cell&) = default;
+    atomic_cell& operator=(const atomic_cell&) = delete;
    atomic_cell& operator=(atomic_cell&&) = default;
-    static atomic_cell from_bytes(managed_bytes b) {
-        return atomic_cell(std::move(b));
+    void swap(atomic_cell& other) noexcept {
+        basic_atomic_cell_view<mutable_view::yes>::swap(other);
+        _data.swap(other._data);
    }
-    atomic_cell(atomic_cell_view other) : atomic_cell_base(managed_bytes{other._data}) {}
-    operator atomic_cell_view() const {
-        return atomic_cell_view(_data);
+    operator atomic_cell_view() const { return atomic_cell_view(_view); }
+    atomic_cell(const abstract_type& t, atomic_cell_view other);
+    static atomic_cell make_dead(api::timestamp_type timestamp, gc_clock::time_point deletion_time);
+    static atomic_cell make_live(const abstract_type& type, api::timestamp_type timestamp, bytes_view value,
+                                 collection_member = collection_member::no);
+    static atomic_cell make_live(const abstract_type& type, api::timestamp_type timestamp, const bytes& value,
+                                 collection_member cm = collection_member::no) {
+        return make_live(type, timestamp, bytes_view(value), cm);
    }
-    static atomic_cell make_dead(api::timestamp_type timestamp, gc_clock::time_point deletion_time) {
-        return atomic_cell_type::make_dead(timestamp, deletion_time);
-    }
-    static atomic_cell make_live(api::timestamp_type timestamp, bytes_view value) {
-        return atomic_cell_type::make_live(timestamp, value);
-    }
-    static atomic_cell make_live(api::timestamp_type timestamp, const bytes& value) {
-        return make_live(timestamp, bytes_view(value));
-    }
-    static atomic_cell make_live_counter_update(api::timestamp_type timestamp, int64_t value) {
-        return atomic_cell_type::make_live_counter_update(timestamp, value);
-    }
-    static atomic_cell make_live(api::timestamp_type timestamp, bytes_view value,
-        gc_clock::time_point expiry, gc_clock::duration ttl)
+    static atomic_cell make_live_counter_update(api::timestamp_type timestamp, int64_t value);
+    static atomic_cell make_live(const abstract_type&, api::timestamp_type timestamp, bytes_view value,
+        gc_clock::time_point expiry, gc_clock::duration ttl, collection_member = collection_member::no);
+    static atomic_cell make_live(const abstract_type& type, api::timestamp_type timestamp, const bytes& value,
+                                 gc_clock::time_point expiry, gc_clock::duration ttl, collection_member cm = collection_member::no)
    {
-        return atomic_cell_type::make_live(timestamp, value, expiry, ttl);
+        return make_live(type, timestamp, bytes_view(value), expiry, ttl, cm);
    }
-    static atomic_cell make_live(api::timestamp_type timestamp, const bytes& value,
-                                 gc_clock::time_point expiry, gc_clock::duration ttl)
-    {
-        return make_live(timestamp, bytes_view(value), expiry, ttl);
-    }
-    static atomic_cell make_live(api::timestamp_type timestamp, bytes_view value, ttl_opt ttl) {
+    static atomic_cell make_live(const abstract_type& type, api::timestamp_type timestamp, bytes_view value, ttl_opt ttl, collection_member cm = collection_member::no) {
        if (!ttl) {
-            return atomic_cell_type::make_live(timestamp, value);
+            return make_live(type, timestamp, value, cm);
        } else {
-            return atomic_cell_type::make_live(timestamp, value, gc_clock::now() + *ttl, *ttl);
+            return make_live(type, timestamp, value, gc_clock::now() + *ttl, *ttl, cm);
        }
    }
-    template<typename Serializer>
-    static atomic_cell make_live_from_serializer(api::timestamp_type timestamp, size_t size, Serializer&& serializer) {
-        return atomic_cell_type::make_live_from_serializer(timestamp, size, std::forward<Serializer>(serializer));
-    }
+    static atomic_cell make_live_uninitialized(const abstract_type& type, api::timestamp_type timestamp, size_t size);
    friend class atomic_cell_or_collection;
    friend std::ostream& operator<<(std::ostream& os, const atomic_cell& ac);
 };
@@ -350,33 +219,24 @@ class collection_mutation_view;
 //   list: tbd, probably ugly
 class collection_mutation {
 public:
-    managed_bytes data;
+    using imr_object_type =  imr::utils::object<data::cell::structure>;
+    imr_object_type _data;
+
    collection_mutation() {}
-    collection_mutation(managed_bytes b) : data(std::move(b)) {}
-    collection_mutation(collection_mutation_view v);
+    collection_mutation(const collection_type_impl&, collection_mutation_view v);
+    collection_mutation(const collection_type_impl&, bytes_view bv);
    operator collection_mutation_view() const;
 };

+
 class collection_mutation_view {
 public:
-    bytes_view data;
-    bytes_view serialize() const { return data; }
-    static collection_mutation_view from_bytes(bytes_view v) { return { v }; }
+    atomic_cell_value_view data;
 };

-inline
-collection_mutation::collection_mutation(collection_mutation_view v)
-        : data(v.data) {
-}
-
-inline
-collection_mutation::operator collection_mutation_view() const {
-    return { data };
-}
-
 class column_definition;

 int compare_atomic_cell_for_merge(atomic_cell_view left, atomic_cell_view right);
-void merge_column(const column_definition& def,
+void merge_column(const abstract_type& def,
        atomic_cell_or_collection& old,
        const atomic_cell_or_collection& neww);
--- a/atomic_cell_hash.hh
+++ b/atomic_cell_hash.hh
@@ -33,12 +33,15 @@ template<>
 struct appending_hash<collection_mutation_view> {
    template<typename Hasher>
    void operator()(Hasher& h, collection_mutation_view cell, const column_definition& cdef) const {
-        auto m_view = collection_type_impl::deserialize_mutation_form(cell);
+      cell.data.with_linearized([&] (bytes_view cell_bv) {
+        auto ctype = static_pointer_cast<const collection_type_impl>(cdef.type);
+        auto m_view = ctype->deserialize_mutation_form(cell_bv);
        ::feed_hash(h, m_view.tomb);
        for (auto&& key_and_value : m_view.cells) {
            ::feed_hash(h, key_and_value.first);
            ::feed_hash(h, key_and_value.second, cdef);
        }
+      });
    }
 };

@@ -50,7 +53,9 @@ struct appending_hash<atomic_cell_view> {
        feed_hash(h, cell.timestamp());
        if (cell.is_live()) {
            if (cdef.is_counter()) {
-                ::feed_hash(h, counter_cell_view(cell));
+                counter_cell_view::with_linearized(cell, [&] (counter_cell_view ccv) {
+                    ::feed_hash(h, ccv);
+                });
                return;
            }
            if (cell.is_live_and_has_ttl()) {
@@ -85,9 +90,9 @@ struct appending_hash<atomic_cell_or_collection> {
    template<typename Hasher>
    void operator()(Hasher& h, const atomic_cell_or_collection& c, const column_definition& cdef) const {
        if (cdef.is_atomic()) {
-            feed_hash(h, c.as_atomic_cell(), cdef);
+            feed_hash(h, c.as_atomic_cell(cdef), cdef);
        } else {
            feed_hash(h, c.as_collection_mutation(), cdef);
        }
    }
-};
+};
--- a/atomic_cell_or_collection.hh
+++ b/atomic_cell_or_collection.hh
@@ -25,42 +25,56 @@
 #include "schema.hh"
 #include "hashing.hh"

+#include "imr/utils.hh"
+
 // A variant type that can hold either an atomic_cell, or a serialized collection.
 // Which type is stored is determined by the schema.
-// Has an "empty" state.
-// Objects moved-from are left in an empty state.
 class atomic_cell_or_collection final {
-    managed_bytes _data;
+    // FIXME: This has made us lose small-buffer optimisation. Unfortunately,
+    // due to the changed cell format it would be less effective now, anyway.
+    // Measure the actual impact because any attempts to fix this will become
+    // irrelevant once rows are converted to the IMR as well, so maybe we can
+    // live with this like that.
+    using imr_object_type = imr::utils::object<data::cell::structure>;
+    imr_object_type _data;
 private:
-    atomic_cell_or_collection(managed_bytes&& data) : _data(std::move(data)) {}
+    atomic_cell_or_collection(imr::utils::object<data::cell::structure>&& data) : _data(std::move(data)) {}
 public:
    atomic_cell_or_collection() = default;
+    atomic_cell_or_collection(atomic_cell_or_collection&&) = default;
+    atomic_cell_or_collection(const atomic_cell_or_collection&) = delete;
+    atomic_cell_or_collection& operator=(atomic_cell_or_collection&&) = default;
+    atomic_cell_or_collection& operator=(const atomic_cell_or_collection&) = delete;
    atomic_cell_or_collection(atomic_cell ac) : _data(std::move(ac._data)) {}
+    atomic_cell_or_collection(const abstract_type& at, atomic_cell_view acv);
    static atomic_cell_or_collection from_atomic_cell(atomic_cell data) { return { std::move(data._data) }; }
-    atomic_cell_view as_atomic_cell() const { return atomic_cell_view::from_bytes(_data); }
-    atomic_cell_ref as_atomic_cell_ref() { return { _data }; }
-    atomic_cell_mutable_view as_mutable_atomic_cell() { return atomic_cell_mutable_view::from_bytes(_data); }
-    atomic_cell_or_collection(collection_mutation cm) : _data(std::move(cm.data)) {}
+    atomic_cell_view as_atomic_cell(const column_definition& cdef) const { return atomic_cell_view::from_bytes(cdef.type->imr_state().type_info(), _data); }
+    atomic_cell_ref as_atomic_cell_ref(const column_definition& cdef) { return atomic_cell_mutable_view::from_bytes(cdef.type->imr_state().type_info(), _data); }
+    atomic_cell_mutable_view as_mutable_atomic_cell(const column_definition& cdef) { return atomic_cell_mutable_view::from_bytes(cdef.type->imr_state().type_info(), _data); }
+    atomic_cell_or_collection(collection_mutation cm) : _data(std::move(cm._data)) { }
+    atomic_cell_or_collection copy(const abstract_type&) const;
    explicit operator bool() const {
-        return !_data.empty();
+        return bool(_data);
    }
-    bool can_use_mutable_view() const {
-        return !_data.is_fragmented();
+    static constexpr bool can_use_mutable_view() {
+        return true;
    }
-    static atomic_cell_or_collection from_collection_mutation(collection_mutation data) {
-        return std::move(data.data);
-    }
-    collection_mutation_view as_collection_mutation() const {
-        return collection_mutation_view{_data};
-    }
-    bytes_view serialize() const {
-        return _data;
-    }
-    bool operator==(const atomic_cell_or_collection& other) const {
-        return _data == other._data;
-    }
-    size_t external_memory_usage() const {
-        return _data.external_memory_usage();
+    void swap(atomic_cell_or_collection& other) noexcept {
+        _data.swap(other._data);
    }
+    static atomic_cell_or_collection from_collection_mutation(collection_mutation data) { return std::move(data._data); }
+    collection_mutation_view as_collection_mutation() const;
+    bytes_view serialize() const;
+    bool equals(const abstract_type& type, const atomic_cell_or_collection& other) const;
+    size_t external_memory_usage(const abstract_type&) const;
    friend std::ostream& operator<<(std::ostream&, const atomic_cell_or_collection&);
 };
+
+namespace std {
+
+inline void swap(atomic_cell_or_collection& a, atomic_cell_or_collection& b) noexcept
+{
+    a.swap(b);
+}
+
+}
--- a/auth/allow_all_authenticator.hh
+++ b/auth/allow_all_authenticator.hh
@@ -72,18 +72,22 @@ public:
        return make_ready_future<authenticated_user>(anonymous_user());
    }

-    virtual future<> create(stdx::string_view, const authentication_options& options) override {
+    virtual future<> create(stdx::string_view, const authentication_options& options) const override {
        return make_ready_future();
    }

-    virtual future<> alter(stdx::string_view, const authentication_options& options) override {
+    virtual future<> alter(stdx::string_view, const authentication_options& options) const override {
        return make_ready_future();
    }

-    virtual future<> drop(stdx::string_view) override {
+    virtual future<> drop(stdx::string_view) const override {
        return make_ready_future();
    }

+    virtual future<custom_options> query_custom_options(stdx::string_view role_name) const override {
+        return make_ready_future<custom_options>();
+    }
+
    virtual const resource_set& protected_resources() const override {
        static const resource_set resources;
        return resources;
--- a/auth/allow_all_authorizer.hh
+++ b/auth/allow_all_authorizer.hh
@@ -58,24 +58,30 @@ public:
        return make_ready_future<permission_set>(permissions::ALL);
    }

-    virtual future<> grant(stdx::string_view, permission_set, const resource&) override {
-        throw exceptions::invalid_request_exception("GRANT operation is not supported by AllowAllAuthorizer");
+    virtual future<> grant(stdx::string_view, permission_set, const resource&) const override {
+        return make_exception_future<>(
+                unsupported_authorization_operation("GRANT operation is not supported by AllowAllAuthorizer"));
    }

-    virtual future<> revoke(stdx::string_view, permission_set, const resource&) override {
-        throw exceptions::invalid_request_exception("REVOKE operation is not supported by AllowAllAuthorizer");
+    virtual future<> revoke(stdx::string_view, permission_set, const resource&) const override {
+        return make_exception_future<>(
+                unsupported_authorization_operation("REVOKE operation is not supported by AllowAllAuthorizer"));
    }

    virtual future<std::vector<permission_details>> list_all() const override {
-        throw exceptions::invalid_request_exception("LIST PERMISSIONS operation is not supported by AllowAllAuthorizer");
+        return make_exception_future<std::vector<permission_details>>(
+                unsupported_authorization_operation(
+                        "LIST PERMISSIONS operation is not supported by AllowAllAuthorizer"));
    }

-    virtual future<> revoke_all(stdx::string_view) override {
-        return make_ready_future();
+    virtual future<> revoke_all(stdx::string_view) const override {
+        return make_exception_future(
+                unsupported_authorization_operation("REVOKE operation is not supported by AllowAllAuthorizer"));
    }

-    virtual future<> revoke_all(const resource&) override {
-        return make_ready_future();
+    virtual future<> revoke_all(const resource&) const override {
+        return make_exception_future(
+                unsupported_authorization_operation("REVOKE operation is not supported by AllowAllAuthorizer"));
    }

    virtual const resource_set& protected_resources() const override {
--- a/auth/authentication_options.hh
+++ b/auth/authentication_options.hh
@@ -43,9 +43,11 @@ std::ostream& operator<<(std::ostream&, authentication_option);

 using authentication_option_set = std::unordered_set<authentication_option>;

+using custom_options = std::unordered_map<sstring, sstring>;
+
 struct authentication_options final {
    std::optional<sstring> password;
-    std::optional<std::unordered_map<sstring, sstring>> options;
+    std::optional<custom_options> options;
 };

 inline bool any_authentication_options(const authentication_options& aos) noexcept {
--- a/auth/authenticator.hh
+++ b/auth/authenticator.hh
@@ -69,7 +69,9 @@ namespace auth {
 class authenticated_user;

 ///
-/// Abstract interface for authenticating users.
+/// Abstract client for authenticating role identity.
+///
+/// All state necessary to authorize a role is stored externally to the client instance.
 ///
 class authenticator {
 public:
@@ -120,7 +122,7 @@ public:
    ///
    /// The options provided must be a subset of `supported_options()`.
    ///
-    virtual future<> create(stdx::string_view role_name, const authentication_options& options) = 0;
+    virtual future<> create(stdx::string_view role_name, const authentication_options& options) const = 0;

    ///
    /// Alter the authentication record of an existing user.
@@ -129,12 +131,19 @@ public:
    ///
    /// Callers must ensure that the specification of `alterable_options()` is adhered to.
    ///
-    virtual future<> alter(stdx::string_view role_name, const authentication_options& options) = 0;
+    virtual future<> alter(stdx::string_view role_name, const authentication_options& options) const = 0;

    ///
    /// Delete the authentication record for a user. This will disallow the user from logging in.
    ///
-    virtual future<> drop(stdx::string_view role_name) = 0;
+    virtual future<> drop(stdx::string_view role_name) const = 0;
+
+    ///
+    /// Query for custom options (those corresponding to \ref authentication_options::options).
+    ///
+    /// If no options are set the result is an empty container.
+    ///
+    virtual future<custom_options> query_custom_options(stdx::string_view role_name) const = 0;

    ///
    /// System resources used internally as part of the implementation. These are made inaccessible to users.
--- a/auth/authorizer.hh
+++ b/auth/authorizer.hh
@@ -44,6 +44,7 @@
 #include <experimental/string_view>
 #include <functional>
 #include <optional>
+#include <stdexcept>
 #include <tuple>
 #include <vector>

@@ -79,8 +80,15 @@ inline bool operator<(const permission_details& pd1, const permission_details& p
            < std::forward_as_tuple(pd2.role_name, pd2.resource, pd2.permissions);
 }

+class unsupported_authorization_operation : public std::invalid_argument {
+public:
+    using std::invalid_argument::invalid_argument;
+};
+
 ///
-/// Abstract interface for authorizing users to access resources.
+/// Abstract client for authorizing roles to access resources.
+///
+/// All state necessary to authorize a role is stored externally to the client instance.
 ///
 class authorizer {
 public:
@@ -107,27 +115,37 @@ public:
    ///
    /// Grant a set of permissions to a role for a particular \ref resource.
    ///
-    virtual future<> grant(stdx::string_view role_name, permission_set, const resource&) = 0;
+    /// \throws \ref unsupported_authorization_operation if granting permissions is not supported.
+    ///
+    virtual future<> grant(stdx::string_view role_name, permission_set, const resource&) const = 0;

    ///
    /// Revoke a set of permissions from a role for a particular \ref resource.
    ///
-    virtual future<> revoke(stdx::string_view role_name, permission_set, const resource&) = 0;
+    /// \throws \ref unsupported_authorization_operation if revoking permissions is not supported.
+    ///
+    virtual future<> revoke(stdx::string_view role_name, permission_set, const resource&) const = 0;

    ///
    /// Query for all directly granted permissions.
    ///
+    /// \throws \ref unsupported_authorization_operation if listing permissions is not supported.
+    ///
    virtual future<std::vector<permission_details>> list_all() const = 0;

    ///
    /// Revoke all permissions granted directly to a particular role.
    ///
-    virtual future<> revoke_all(stdx::string_view role_name) = 0;
+    /// \throws \ref unsupported_authorization_operation if revoking permissions is not supported.
+    ///
+    virtual future<> revoke_all(stdx::string_view role_name) const = 0;

    ///
    /// Revoke all permissions granted to any role for a particular resource.
    ///
-    virtual future<> revoke_all(const resource&) = 0;
+    /// \throws \ref unsupported_authorization_operation if revoking permissions is not supported.
+    ///
+    virtual future<> revoke_all(const resource&) const = 0;

    ///
    /// System resources used internally as part of the implementation. These are made inaccessible to users.
--- a/auth/common.cc
+++ b/auth/common.cc
@@ -25,8 +25,10 @@

 #include "cql3/query_processor.hh"
 #include "cql3/statements/create_table_statement.hh"
+#include "database.hh"
 #include "schema_builder.hh"
 #include "service/migration_manager.hh"
+#include "timeout_config.hh"

 namespace auth {

@@ -48,7 +50,7 @@ future<> do_after_system_ready(seastar::abort_source& as, seastar::noncopyable_f
        return exponential_backoff_retry::do_until_value(1s, 1min, as, [func = std::move(func)] {
            return func().then_wrapped([] (auto&& f) -> stdx::optional<empty_state> {
                if (f.failed()) {
-                    auth_log.warn("Auth task failed with error, rescheduling: {}", f.get_exception());
+                    auth_log.info("Auth task failed with error, rescheduling: {}", f.get_exception());
                    return { };
                }
                return { empty_state() };
@@ -58,13 +60,13 @@ future<> do_after_system_ready(seastar::abort_source& as, seastar::noncopyable_f
 }

 future<> create_metadata_table_if_missing(
-        const sstring& table_name,
+        stdx::string_view table_name,
        cql3::query_processor& qp,
-        const sstring& cql,
+        stdx::string_view cql,
        ::service::migration_manager& mm) {
    auto& db = qp.db().local();

-    if (db.has_schema(meta::AUTH_KS, table_name)) {
+    if (db.has_schema(meta::AUTH_KS, sstring(table_name))) {
        return make_ready_future<>();
    }

@@ -85,4 +87,18 @@ future<> create_metadata_table_if_missing(
    return mm.announce_new_column_family(b.build(), false);
 }

+future<> wait_for_schema_agreement(::service::migration_manager& mm, const database& db) {
+    static const auto pause = [] { return sleep(std::chrono::milliseconds(500)); };
+
+    return do_until([&db] { return db.get_version() != database::empty_version; }, pause).then([&mm] {
+        return do_until([&mm] { return mm.have_schema_agreement(); }, pause);
+    });
+}
+
+const timeout_config& internal_distributed_timeout_config() noexcept {
+    static const auto t = 5s;
+    static const timeout_config tc{t, t, t, t, t, t, t};
+    return tc;
+}
+
 }
--- a/auth/common.hh
+++ b/auth/common.hh
@@ -22,6 +22,7 @@
 #pragma once

 #include <chrono>
+#include <experimental/string_view>

 #include <seastar/core/future.hh>
 #include <seastar/core/abort_source.hh>
@@ -36,6 +37,9 @@

 using namespace std::chrono_literals;

+class database;
+class timeout_config;
+
 namespace service {
 class migration_manager;
 }
@@ -65,16 +69,23 @@ future<> once_among_shards(Task&& f) {
 }

 inline future<> delay_until_system_ready(seastar::abort_source& as) {
-    return sleep_abortable(10s, as);
+    return sleep_abortable(15s, as);
 }

 // Func must support being invoked more than once.
 future<> do_after_system_ready(seastar::abort_source& as, seastar::noncopyable_function<future<>()> func);

 future<> create_metadata_table_if_missing(
-        const sstring& table_name,
+        stdx::string_view table_name,
        cql3::query_processor&,
-        const sstring& cql,
+        stdx::string_view cql,
        ::service::migration_manager&);

+future<> wait_for_schema_agreement(::service::migration_manager&, const database&);
+
+///
+/// Time-outs for internal, non-local CQL queries.
+///
+const timeout_config& internal_distributed_timeout_config() noexcept;
+
 }
--- a/auth/default_authorizer.cc
+++ b/auth/default_authorizer.cc
@@ -103,19 +103,21 @@ future<bool> default_authorizer::any_granted() const {
    return _qp.process(
            query,
            db::consistency_level::LOCAL_ONE,
+            infinite_timeout_config,
            {},
            true).then([this](::shared_ptr<cql3::untyped_result_set> results) {
        return !results->empty();
    });
 }

-future<> default_authorizer::migrate_legacy_metadata() {
+future<> default_authorizer::migrate_legacy_metadata() const {
    alogger.info("Starting migration of legacy permissions metadata.");
    static const sstring query = sprint("SELECT * FROM %s.%s", meta::AUTH_KS, legacy_table_name);

    return _qp.process(
            query,
-            db::consistency_level::LOCAL_ONE).then([this](::shared_ptr<cql3::untyped_result_set> results) {
+            db::consistency_level::LOCAL_ONE,
+            infinite_timeout_config).then([this](::shared_ptr<cql3::untyped_result_set> results) {
        return do_for_each(*results, [this](const cql3::untyped_result_set_row& row) {
            return do_with(
                    row.get_as<sstring>("username"),
@@ -157,18 +159,18 @@ future<> default_authorizer::start() {
                create_table,
                _migration_manager).then([this] {
            _finished = do_after_system_ready(_as, [this] {
-                if (legacy_metadata_exists()) {
-                   return any_granted().then([this](bool any) {
-                       if (!any) {
-                           return migrate_legacy_metadata();
-                       }
+                return async([this] {
+                    wait_for_schema_agreement(_migration_manager, _qp.db().local()).get0();

-                       alogger.warn("Ignoring legacy permissions metadata since role permissions exist.");
-                       return make_ready_future<>();
-                   });
-               }
+                    if (legacy_metadata_exists()) {
+                        if (!any_granted().get0()) {
+                            migrate_legacy_metadata().get0();
+                            return;
+                        }

-               return make_ready_future<>();
+                        alogger.warn("Ignoring legacy permissions metadata since role permissions exist.");
+                    }
+                });
            });
        });
    });
@@ -196,6 +198,7 @@ default_authorizer::authorize(const role_or_anonymous& maybe_role, const resourc
    return _qp.process(
            query,
            db::consistency_level::LOCAL_ONE,
+            infinite_timeout_config,
            {*maybe_role.name, r.name()}).then([](::shared_ptr<cql3::untyped_result_set> results) {
        if (results->empty()) {
            return permissions::NONE;
@@ -210,7 +213,7 @@ default_authorizer::modify(
        stdx::string_view role_name,
        permission_set set,
        const resource& resource,
-        stdx::string_view op) {
+        stdx::string_view op) const {
    return do_with(
            sprint(
                    "UPDATE %s.%s SET %s = %s %s ? WHERE %s = ? AND %s = ?",
@@ -225,16 +228,17 @@ default_authorizer::modify(
        return _qp.process(
                query,
                db::consistency_level::ONE,
+                internal_distributed_timeout_config(),
                {permissions::to_strings(set), sstring(role_name), resource.name()}).discard_result();
    });
 }


-future<> default_authorizer::grant(stdx::string_view role_name, permission_set set, const resource& resource) {
+future<> default_authorizer::grant(stdx::string_view role_name, permission_set set, const resource& resource) const {
    return modify(role_name, std::move(set), resource, "+");
 }

-future<> default_authorizer::revoke(stdx::string_view role_name, permission_set set, const resource& resource) {
+future<> default_authorizer::revoke(stdx::string_view role_name, permission_set set, const resource& resource) const {
    return modify(role_name, std::move(set), resource, "-");
 }

@@ -250,6 +254,7 @@ future<std::vector<permission_details>> default_authorizer::list_all() const {
    return _qp.process(
            query,
            db::consistency_level::ONE,
+            internal_distributed_timeout_config(),
            {},
            true).then([](::shared_ptr<cql3::untyped_result_set> results) {
        std::vector<permission_details> all_details;
@@ -267,7 +272,7 @@ future<std::vector<permission_details>> default_authorizer::list_all() const {
    });
 }

-future<> default_authorizer::revoke_all(stdx::string_view role_name) {
+future<> default_authorizer::revoke_all(stdx::string_view role_name) const {
    static const sstring query = sprint(
            "DELETE FROM %s.%s WHERE %s = ?",
            meta::AUTH_KS,
@@ -277,6 +282,7 @@ future<> default_authorizer::revoke_all(stdx::string_view role_name) {
    return _qp.process(
            query,
            db::consistency_level::ONE,
+            internal_distributed_timeout_config(),
            {sstring(role_name)}).discard_result().handle_exception([role_name](auto ep) {
        try {
            std::rethrow_exception(ep);
@@ -286,7 +292,7 @@ future<> default_authorizer::revoke_all(stdx::string_view role_name) {
    });
 }

-future<> default_authorizer::revoke_all(const resource& resource) {
+future<> default_authorizer::revoke_all(const resource& resource) const {
    static const sstring query = sprint(
            "SELECT %s FROM %s.%s WHERE %s = ? ALLOW FILTERING",
            ROLE_NAME,
@@ -297,6 +303,7 @@ future<> default_authorizer::revoke_all(const resource& resource) {
    return _qp.process(
            query,
            db::consistency_level::LOCAL_ONE,
+            infinite_timeout_config,
            {resource.name()}).then_wrapped([this, resource](future<::shared_ptr<cql3::untyped_result_set>> f) {
        try {
            auto res = f.get0();
@@ -314,6 +321,7 @@ future<> default_authorizer::revoke_all(const resource& resource) {
                return _qp.process(
                        query,
                        db::consistency_level::LOCAL_ONE,
+                        infinite_timeout_config,
                        {r.get_as<sstring>(ROLE_NAME), resource.name()}).discard_result().handle_exception(
                                [resource](auto ep) {
                    try {
--- a/auth/default_authorizer.hh
+++ b/auth/default_authorizer.hh
@@ -77,15 +77,15 @@ public:

    virtual future<permission_set> authorize(const role_or_anonymous&, const resource&) const override;

-    virtual future<> grant(stdx::string_view, permission_set, const resource&) override;
+    virtual future<> grant(stdx::string_view, permission_set, const resource&) const override;

-    virtual future<> revoke( stdx::string_view, permission_set, const resource&) override;
+    virtual future<> revoke( stdx::string_view, permission_set, const resource&) const override;

    virtual future<std::vector<permission_details>> list_all() const override;

-    virtual future<> revoke_all(stdx::string_view) override;
+    virtual future<> revoke_all(stdx::string_view) const override;

-    virtual future<> revoke_all(const resource&) override;
+    virtual future<> revoke_all(const resource&) const override;

    virtual const resource_set& protected_resources() const override;

@@ -94,9 +94,9 @@ private:

    future<bool> any_granted() const;

-    future<> migrate_legacy_metadata();
+    future<> migrate_legacy_metadata() const;

-    future<> modify(stdx::string_view, permission_set, const resource&, stdx::string_view);
+    future<> modify(stdx::string_view, permission_set, const resource&, stdx::string_view) const;
 };

 } /* namespace auth */
--- a/auth/password_authenticator.cc
+++ b/auth/password_authenticator.cc
@@ -149,7 +149,9 @@ static sstring gensalt() {
    // blowfish 2011 fix, blowfish, sha512, sha256, md5
    for (sstring pfx : { "$2y$", "$2a$", "$6$", "$5$", "$1$" }) {
        salt = pfx + input;
-        if (crypt_r("fisk", salt.c_str(), &tlcrypt)) {
+        const char* e = crypt_r("fisk", salt.c_str(), &tlcrypt);
+
+        if (e && (e[0] != '*')) {
            prefix = pfx;
            return salt;
        }
@@ -162,7 +164,7 @@ static sstring hashpw(const sstring& pass) {
 }

 static bool has_salted_hash(const cql3::untyped_result_set_row& row) {
-    return utf8_type->deserialize(row.get_blob(SALTED_HASH)) != data_value::make_null(utf8_type);
+    return !row.get_or<sstring>(SALTED_HASH, "").empty();
 }

 static const sstring update_row_query = sprint(
@@ -177,13 +179,14 @@ bool password_authenticator::legacy_metadata_exists() const {
    return _qp.db().local().has_schema(meta::AUTH_KS, legacy_table_name);
 }

-future<> password_authenticator::migrate_legacy_metadata() {
+future<> password_authenticator::migrate_legacy_metadata() const {
    plogger.info("Starting migration of legacy authentication metadata.");
    static const sstring query = sprint("SELECT * FROM %s.%s", meta::AUTH_KS, legacy_table_name);

    return _qp.process(
            query,
-            db::consistency_level::QUORUM).then([this](::shared_ptr<cql3::untyped_result_set> results) {
+            db::consistency_level::QUORUM,
+            internal_distributed_timeout_config()).then([this](::shared_ptr<cql3::untyped_result_set> results) {
        return do_for_each(*results, [this](const cql3::untyped_result_set_row& row) {
            auto username = row.get_as<sstring>("username");
            auto salted_hash = row.get_as<sstring>(SALTED_HASH);
@@ -191,6 +194,7 @@ future<> password_authenticator::migrate_legacy_metadata() {
            return _qp.process(
                    update_row_query,
                    consistency_for_user(username),
+                    internal_distributed_timeout_config(),
                    {std::move(salted_hash), username}).discard_result();
        }).finally([results] {});
    }).then([] {
@@ -201,12 +205,13 @@ future<> password_authenticator::migrate_legacy_metadata() {
    });
 }

-future<> password_authenticator::create_default_if_missing() {
+future<> password_authenticator::create_default_if_missing() const {
    return default_role_row_satisfies(_qp, &has_salted_hash).then([this](bool exists) {
        if (!exists) {
            return _qp.process(
                    update_row_query,
                    db::consistency_level::QUORUM,
+                    internal_distributed_timeout_config(),
                    {hashpw(DEFAULT_USER_PASSWORD), DEFAULT_USER_NAME}).then([](auto&&) {
                plogger.info("Created default superuser authentication record.");
            });
@@ -220,8 +225,16 @@ future<> password_authenticator::start() {
     return once_among_shards([this] {
         gensalt(); // do this once to determine usable hashing

+         auto f = create_metadata_table_if_missing(
+                 meta::roles_table::name,
+                 _qp,
+                 meta::roles_table::creation_query(),
+                 _migration_manager);
+
         _stopped = do_after_system_ready(_as, [this] {
             return async([this] {
+                 wait_for_schema_agreement(_migration_manager, _qp.db().local()).get0();
+
                 if (any_nondefault_role_row_satisfies(_qp, &has_salted_hash).get0()) {
                     if (legacy_metadata_exists()) {
                         plogger.warn("Ignoring legacy authentication metadata since nondefault data already exist.");
@@ -239,7 +252,7 @@ future<> password_authenticator::start() {
             });
         });

-         return make_ready_future<>();
+         return f;
     });
 }

@@ -298,12 +311,17 @@ future<authenticated_user> password_authenticator::authenticate(
        return _qp.process(
                query,
                consistency_for_user(username),
+                internal_distributed_timeout_config(),
                {username},
                true);
    }).then_wrapped([=](future<::shared_ptr<cql3::untyped_result_set>> f) {
        try {
            auto res = f.get0();
-            if (res->empty() || !checkpw(password, res->one().get_as<sstring>(SALTED_HASH))) {
+            auto salted_hash = std::experimental::optional<sstring>();
+            if (!res->empty()) {
+                salted_hash = res->one().get_opt<sstring>(SALTED_HASH);
+            }
+            if (!salted_hash || !checkpw(password, *salted_hash)) {
                throw exceptions::authentication_exception("Username and/or password are incorrect");
            }
            return make_ready_future<authenticated_user>(username);
@@ -317,7 +335,7 @@ future<authenticated_user> password_authenticator::authenticate(
    });
 }

-future<> password_authenticator::create(stdx::string_view role_name, const authentication_options& options) {
+future<> password_authenticator::create(stdx::string_view role_name, const authentication_options& options) const {
    if (!options.password) {
        return make_ready_future<>();
    }
@@ -325,10 +343,11 @@ future<> password_authenticator::create(stdx::string_view role_name, const authe
    return _qp.process(
            update_row_query,
            consistency_for_user(role_name),
+            internal_distributed_timeout_config(),
            {hashpw(*options.password), sstring(role_name)}).discard_result();
 }

-future<> password_authenticator::alter(stdx::string_view role_name, const authentication_options& options) {
+future<> password_authenticator::alter(stdx::string_view role_name, const authentication_options& options) const {
    if (!options.password) {
        return make_ready_future<>();
    }
@@ -342,17 +361,25 @@ future<> password_authenticator::alter(stdx::string_view role_name, const authen
    return _qp.process(
            query,
            consistency_for_user(role_name),
+            internal_distributed_timeout_config(),
            {hashpw(*options.password), sstring(role_name)}).discard_result();
 }

-future<> password_authenticator::drop(stdx::string_view name) {
+future<> password_authenticator::drop(stdx::string_view name) const {
    static const sstring query = sprint(
            "DELETE %s FROM %s WHERE %s = ?",
            SALTED_HASH,
            meta::roles_table::qualified_name(),
            meta::roles_table::role_col_name);

-    return _qp.process(query, consistency_for_user(name), {sstring(name)}).discard_result();
+    return _qp.process(
+            query, consistency_for_user(name),
+            internal_distributed_timeout_config(),
+            {sstring(name)}).discard_result();
+}
+
+future<custom_options> password_authenticator::query_custom_options(stdx::string_view role_name) const {
+    return make_ready_future<custom_options>();
 }

 const resource_set& password_authenticator::protected_resources() const {
--- a/auth/password_authenticator.hh
+++ b/auth/password_authenticator.hh
@@ -81,11 +81,13 @@ public:

    virtual future<authenticated_user> authenticate(const credentials_map& credentials) const override;

-    virtual future<> create(stdx::string_view role_name, const authentication_options& options) override;
+    virtual future<> create(stdx::string_view role_name, const authentication_options& options) const override;

-    virtual future<> alter(stdx::string_view role_name, const authentication_options& options) override;
+    virtual future<> alter(stdx::string_view role_name, const authentication_options& options) const override;

-    virtual future<> drop(stdx::string_view role_name) override;
+    virtual future<> drop(stdx::string_view role_name) const override;
+
+    virtual future<custom_options> query_custom_options(stdx::string_view role_name) const override;

    virtual const resource_set& protected_resources() const override;

@@ -94,9 +96,9 @@ public:
 private:
    bool legacy_metadata_exists() const;

-    future<> migrate_legacy_metadata();
+    future<> migrate_legacy_metadata() const;

-    future<> create_default_if_missing();
+    future<> create_default_if_missing() const;
 };

 }
--- a/auth/role_manager.hh
+++ b/auth/role_manager.hh
@@ -93,10 +93,12 @@ using role_set = std::unordered_set<sstring>;
 enum class recursive_role_query { yes, no };

 ///
-/// Abstract role manager.
+/// Abstract client for managing roles.
 ///
-/// All implementations should throw role-related exceptions as documented, but authorization-related checking is
-/// handled by the CQL layer, and not here.
+/// All state necessary for managing roles is stored externally to the client instance.
+///
+/// All implementations should throw role-related exceptions as documented. Authorization is not addressed here, and
+/// access-control should never be enforced in implementations.
 ///
 class role_manager {
 public:
@@ -113,17 +115,17 @@ public:
    ///
    /// \returns an exceptional future with \ref role_already_exists for a role that has previously been created.
    ///
-    virtual future<> create(stdx::string_view role_name, const role_config&) = 0;
+    virtual future<> create(stdx::string_view role_name, const role_config&) const = 0;

    ///
    /// \returns an exceptional future with \ref nonexistant_role if the role does not exist.
    ///
-    virtual future<> drop(stdx::string_view role_name) = 0;
+    virtual future<> drop(stdx::string_view role_name) const = 0;

    ///
    /// \returns an exceptional future with \ref nonexistant_role if the role does not exist.
    ///
-    virtual future<> alter(stdx::string_view role_name, const role_config_update&) = 0;
+    virtual future<> alter(stdx::string_view role_name, const role_config_update&) const = 0;

    ///
    /// Grant `role_name` to `grantee_name`.
@@ -133,7 +135,7 @@ public:
    /// \returns an exceptional future with \ref role_already_included if granting the role would be redundant, or
    /// create a cycle.
    ///
-    virtual future<> grant(stdx::string_view grantee_name, stdx::string_view role_name) = 0;
+    virtual future<> grant(stdx::string_view grantee_name, stdx::string_view role_name) const = 0;

    ///
    /// Revoke `role_name` from `revokee_name`.
@@ -142,7 +144,7 @@ public:
    ///
    /// \returns an exceptional future with \ref revoke_ungranted_role if the role was not granted.
    ///
-    virtual future<> revoke(stdx::string_view revokee_name, stdx::string_view role_name) = 0;
+    virtual future<> revoke(stdx::string_view revokee_name, stdx::string_view role_name) const = 0;

    ///
    /// \returns an exceptional future with \ref nonexistant_role if the role does not exist.
--- a/auth/roles-metadata.cc
+++ b/auth/roles-metadata.cc
@@ -36,6 +36,21 @@ namespace meta {

 namespace roles_table {

+stdx::string_view creation_query() {
+    static const sstring instance = sprint(
+            "CREATE TABLE %s ("
+            "  %s text PRIMARY KEY,"
+            "  can_login boolean,"
+            "  is_superuser boolean,"
+            "  member_of set<text>,"
+            "  salted_hash text"
+            ")",
+            qualified_name(),
+            role_col_name);
+
+    return instance;
+}
+
 stdx::string_view qualified_name() noexcept {
    static const sstring instance = AUTH_KS + "." + sstring(name);
    return instance;
@@ -57,12 +72,14 @@ future<bool> default_role_row_satisfies(
        return qp.process(
                query,
                db::consistency_level::ONE,
+                infinite_timeout_config,
                {meta::DEFAULT_SUPERUSER_NAME},
                true).then([&qp, &p](::shared_ptr<cql3::untyped_result_set> results) {
            if (results->empty()) {
                return qp.process(
                        query,
                        db::consistency_level::QUORUM,
+                        internal_distributed_timeout_config(),
                        {meta::DEFAULT_SUPERUSER_NAME},
                        true).then([&p](::shared_ptr<cql3::untyped_result_set> results) {
                    if (results->empty()) {
@@ -86,7 +103,8 @@ future<bool> any_nondefault_role_row_satisfies(
    return do_with(std::move(p), [&qp](const auto& p) {
        return qp.process(
                query,
-                db::consistency_level::QUORUM).then([&p](::shared_ptr<cql3::untyped_result_set> results) {
+                db::consistency_level::QUORUM,
+                internal_distributed_timeout_config()).then([&p](::shared_ptr<cql3::untyped_result_set> results) {
            if (results->empty()) {
                return false;
            }
--- a/auth/roles-metadata.hh
+++ b/auth/roles-metadata.hh
@@ -40,6 +40,8 @@ namespace meta {

 namespace roles_table {

+stdx::string_view creation_query();
+
 constexpr stdx::string_view name{"roles", 5};

 stdx::string_view qualified_name() noexcept;
--- a/auth/service.cc
+++ b/auth/service.cc
@@ -37,7 +37,7 @@
 #include "cql3/query_processor.hh"
 #include "cql3/untyped_result_set.hh"
 #include "db/config.hh"
-#include "db/consistency_level.hh"
+#include "db/consistency_level_type.hh"
 #include "exceptions/exceptions.hh"
 #include "log.hh"
 #include "service/migration_listener.hh"
@@ -77,11 +77,18 @@ private:
    void on_update_view(const sstring& ks_name, const sstring& view_name, bool columns_changed) override {}

    void on_drop_keyspace(const sstring& ks_name) override {
-        _authorizer.revoke_all(auth::make_data_resource(ks_name));
+        _authorizer.revoke_all(
+                auth::make_data_resource(ks_name)).handle_exception_type([](const unsupported_authorization_operation&) {
+            // Nothing.
+        });
    }

    void on_drop_column_family(const sstring& ks_name, const sstring& cf_name) override {
-        _authorizer.revoke_all(auth::make_data_resource(ks_name, cf_name));
+        _authorizer.revoke_all(
+                auth::make_data_resource(
+                        ks_name, cf_name)).handle_exception_type([](const unsupported_authorization_operation&) {
+            // Nothing.
+        });
    }

    void on_drop_user_type(const sstring& ks_name, const sstring& type_name) override {}
@@ -177,9 +184,7 @@ future<> service::start() {
    return once_among_shards([this] {
        return create_keyspace_if_missing();
    }).then([this] {
-        return _role_manager->start();
-    }).then([this] {
-        return when_all_succeed(_authorizer->start(), _authenticator->start());
+        return when_all_succeed(_role_manager->start(), _authorizer->start(), _authenticator->start());
    }).then([this] {
        _permissions_cache = std::make_unique<permissions_cache>(_permissions_cache_config, *this, log);
    }).then([this] {
@@ -191,6 +196,10 @@ future<> service::start() {
 }

 future<> service::stop() {
+    // Only one of the shards has the listener registered, but let's try to
+    // unregister on each one just to make sure.
+    _migration_manager.unregister_listener(_migration_listener.get());
+
    return _permissions_cache->stop().then([this] {
        return when_all_succeed(_role_manager->stop(), _authorizer->stop(), _authenticator->stop());
    });
@@ -218,6 +227,7 @@ future<bool> service::has_existing_legacy_users() const {
    return _qp.process(
            default_user_query,
            db::consistency_level::ONE,
+            infinite_timeout_config,
            {meta::DEFAULT_SUPERUSER_NAME},
            true).then([this](auto results) {
        if (!results->empty()) {
@@ -227,6 +237,7 @@ future<bool> service::has_existing_legacy_users() const {
        return _qp.process(
                default_user_query,
                db::consistency_level::QUORUM,
+                infinite_timeout_config,
                {meta::DEFAULT_SUPERUSER_NAME},
                true).then([this](auto results) {
            if (!results->empty()) {
@@ -235,7 +246,8 @@ future<bool> service::has_existing_legacy_users() const {

            return _qp.process(
                    all_users_query,
-                    db::consistency_level::QUORUM).then([](auto results) {
+                    db::consistency_level::QUORUM,
+                    infinite_timeout_config).then([](auto results) {
                return make_ready_future<bool>(!results->empty());
            });
        });
@@ -402,7 +414,7 @@ static void validate_authentication_options_are_supported(


 future<> create_role(
-        service& ser,
+        const service& ser,
        stdx::string_view name,
        const role_config& config,
        const authentication_options& options) {
@@ -415,7 +427,7 @@ future<> create_role(
                &validate_authentication_options_are_supported,
                options,
                ser.underlying_authenticator().supported_options()).then([&ser, name, &options] {
-            return ser.underlying_authenticator().create(sstring(name), options);
+            return ser.underlying_authenticator().create(name, options);
        }).handle_exception([&ser, &name](std::exception_ptr ep) {
            // Roll-back.
            return ser.underlying_role_manager().drop(name).then([ep = std::move(ep)] {
@@ -426,7 +438,7 @@ future<> create_role(
 }

 future<> alter_role(
-        service& ser,
+        const service& ser,
        stdx::string_view name,
        const role_config_update& config_update,
        const authentication_options& options) {
@@ -444,10 +456,15 @@ future<> alter_role(
    });
 }

-future<> drop_role(service& ser, stdx::string_view name) {
+future<> drop_role(const service& ser, stdx::string_view name) {
    return do_with(make_role_resource(name), [&ser, name](const resource& r) {
        auto& a = ser.underlying_authorizer();
-        return when_all_succeed(a.revoke_all(name), a.revoke_all(r));
+
+        return when_all_succeed(
+                a.revoke_all(name),
+                a.revoke_all(r)).handle_exception_type([](const unsupported_authorization_operation&) {
+            // Nothing.
+        });
    }).then([&ser, name] {
        return ser.underlying_authenticator().drop(name);
    }).then([&ser, name] {
@@ -471,7 +488,7 @@ future<bool> has_role(const service& ser, const authenticated_user& u, stdx::str
 }

 future<> grant_permissions(
-        service& ser,
+        const service& ser,
        stdx::string_view role_name,
        permission_set perms,
        const resource& r) {
@@ -480,8 +497,19 @@ future<> grant_permissions(
    });
 }

+future<> grant_applicable_permissions(const service& ser, stdx::string_view role_name, const resource& r) {
+    return grant_permissions(ser, role_name, r.applicable_permissions(), r);
+}
+future<> grant_applicable_permissions(const service& ser, const authenticated_user& u, const resource& r) {
+    if (is_anonymous(u)) {
+        return make_ready_future<>();
+    }
+
+    return grant_applicable_permissions(ser, *u.name, r);
+}
+
 future<> revoke_permissions(
-        service& ser,
+        const service& ser,
        stdx::string_view role_name,
        permission_set perms,
        const resource& r) {
--- a/auth/service.hh
+++ b/auth/service.hh
@@ -75,12 +75,14 @@ public:
 };

 ///
-/// Central interface into access-control for the system.
+/// Client for access-control in the system.
 ///
-/// Access control encompasses user/role management, authentication, and authorization. This class provides access to
+/// Access control encompasses user/role management, authentication, and authorization. This client provides access to
 /// the dynamically-loaded implementations of these modules (through the `underlying_*` member functions), but also
 /// builds on their functionality with caching and abstractions for common operations.
 ///
+/// All state associated with access-control is stored externally to any particular instance of this class.
+///
 class service final {
    permissions_cache_config _permissions_cache_config;
    std::unique_ptr<permissions_cache> _permissions_cache;
@@ -149,26 +151,14 @@ public:

    future<bool> exists(const resource&) const;

-    authenticator& underlying_authenticator() {
-        return *_authenticator;
-    }
-
    const authenticator& underlying_authenticator() const {
        return *_authenticator;
    }

-    authorizer& underlying_authorizer() {
-        return *_authorizer;
-    }
-
    const authorizer& underlying_authorizer() const {
        return *_authorizer;
    }

-    role_manager& underlying_role_manager() {
-        return *_role_manager;
-    }
-
    const role_manager& underlying_role_manager() const {
        return *_role_manager;
    }
@@ -206,7 +196,7 @@ bool is_protected(const service&, const resource&) noexcept;
 /// \returns an exceptional future with \ref unsupported_authentication_option if an unsupported option is included.
 ///
 future<> create_role(
-        service&,
+        const service&,
        stdx::string_view name,
        const role_config&,
        const authentication_options&);
@@ -219,7 +209,7 @@ future<> create_role(
 /// \returns an exceptional future with \ref unsupported_authentication_option if an unsupported option is included.
 ///
 future<> alter_role(
-        service&,
+        const service&,
        stdx::string_view name,
        const role_config_update&,
        const authentication_options&);
@@ -229,7 +219,7 @@ future<> alter_role(
 ///
 /// \returns an exceptional future with \ref nonexistant_role if the named role does not exist.
 ///
-future<> drop_role(service&, stdx::string_view name);
+future<> drop_role(const service&, stdx::string_view name);

 ///
 /// Check if `grantee` has been granted the named role.
@@ -247,17 +237,34 @@ future<bool> has_role(const service&, const authenticated_user&, stdx::string_vi
 ///
 /// \returns an exceptional future with \ref nonexistent_role if the named role does not exist.
 ///
+/// \returns an exceptional future with \ref unsupported_authorization_operation if granting permissions is not
+/// supported.
+///
 future<> grant_permissions(
-        service&,
+        const service&,
        stdx::string_view role_name,
        permission_set,
        const resource&);

+///
+/// Like \ref grant_permissions, but grants all applicable permissions on the resource.
 ///
 /// \returns an exceptional future with \ref nonexistent_role if the named role does not exist.
 ///
+/// \returns an exceptional future with \ref unsupported_authorization_operation if granting permissions is not
+/// supported.
+///
+future<> grant_applicable_permissions(const service&, stdx::string_view role_name, const resource&);
+future<> grant_applicable_permissions(const service&, const authenticated_user&, const resource&);
+
+///
+/// \returns an exceptional future with \ref nonexistent_role if the named role does not exist.
+///
+/// \returns an exceptional future with \ref unsupported_authorization_operation if revoking permissions is not
+/// supported.
+///
 future<> revoke_permissions(
-        service&,
+        const service&,
        stdx::string_view role_name,
        permission_set,
        const resource&);
@@ -277,6 +284,9 @@ using recursive_permissions = bool_class<struct recursive_permissions_tag>;
 /// \returns an exceptional future with \ref nonexistent_role if a role name is included which refers to a role that
 /// does not exist.
 ///
+/// \returns an exceptional future with \ref unsupported_authorization_operation if listing permissions is not
+/// supported.
+///
 future<std::vector<permission_details>> list_filtered_permissions(
        const service&,
        permission_set,
--- a/auth/standard_role_manager.cc
+++ b/auth/standard_role_manager.cc
@@ -89,6 +89,7 @@ static future<stdx::optional<record>> find_record(cql3::query_processor& qp, std
    return qp.process(
            query,
            consistency_for_role(role_name),
+            internal_distributed_timeout_config(),
            {sstring(role_name)},
            true).then([](::shared_ptr<cql3::untyped_result_set> results) {
        if (results->empty()) {
@@ -118,6 +119,10 @@ static future<record> require_record(cql3::query_processor& qp, stdx::string_vie
   });
 }

+static bool has_can_login(const cql3::untyped_result_set_row& row) {
+    return row.has("can_login") && !(boolean_type->deserialize(row.get_blob("can_login")).is_null());
+}
+
 stdx::string_view standard_role_manager_name() noexcept {
    static const sstring instance = meta::AUTH_PACKAGE_NAME + "CassandraRoleManager";
    return instance;
@@ -135,18 +140,7 @@ const resource_set& standard_role_manager::protected_resources() const {
    return resources;
 }

-future<> standard_role_manager::create_metadata_tables_if_missing() {
-    static const sstring create_roles_query = sprint(
-            "CREATE TABLE %s ("
-            "  %s text PRIMARY KEY,"
-            "  can_login boolean,"
-            "  is_superuser boolean,"
-            "  member_of set<text>,"
-            "  salted_hash text"
-            ")",
-            meta::roles_table::qualified_name(),
-            meta::roles_table::role_col_name);
-
+future<> standard_role_manager::create_metadata_tables_if_missing() const {
    static const sstring create_role_members_query = sprint(
            "CREATE TABLE %s ("
            "  role text,"
@@ -158,19 +152,19 @@ future<> standard_role_manager::create_metadata_tables_if_missing() {

    return when_all_succeed(
            create_metadata_table_if_missing(
-                    sstring(meta::roles_table::name),
+                    meta::roles_table::name,
                    _qp,
-                    create_roles_query,
+                    meta::roles_table::creation_query(),
                    _migration_manager),
            create_metadata_table_if_missing(
-                    sstring(meta::role_members_table::name),
+                    meta::role_members_table::name,
                    _qp,
                    create_role_members_query,
                    _migration_manager));
 }

-future<> standard_role_manager::create_default_role_if_missing() {
-    return default_role_row_satisfies(_qp, [](auto&&) { return true; }).then([this](bool exists) {
+future<> standard_role_manager::create_default_role_if_missing() const {
+    return default_role_row_satisfies(_qp, &has_can_login).then([this](bool exists) {
        if (!exists) {
            static const sstring query = sprint(
                    "INSERT INTO %s (%s, is_superuser, can_login) VALUES (?, true, true)",
@@ -180,6 +174,7 @@ future<> standard_role_manager::create_default_role_if_missing() {
            return _qp.process(
                    query,
                    db::consistency_level::QUORUM,
+                    internal_distributed_timeout_config(),
                    {meta::DEFAULT_SUPERUSER_NAME}).then([](auto&&) {
                log.info("Created default superuser role '{}'.", meta::DEFAULT_SUPERUSER_NAME);
                return make_ready_future<>();
@@ -199,13 +194,14 @@ bool standard_role_manager::legacy_metadata_exists() const {
    return _qp.db().local().has_schema(meta::AUTH_KS, legacy_table_name);
 }

-future<> standard_role_manager::migrate_legacy_metadata() {
+future<> standard_role_manager::migrate_legacy_metadata() const {
    log.info("Starting migration of legacy user metadata.");
    static const sstring query = sprint("SELECT * FROM %s.%s", meta::AUTH_KS, legacy_table_name);

    return _qp.process(
            query,
-            db::consistency_level::QUORUM).then([this](::shared_ptr<cql3::untyped_result_set> results) {
+            db::consistency_level::QUORUM,
+            internal_distributed_timeout_config()).then([this](::shared_ptr<cql3::untyped_result_set> results) {
        return do_for_each(*results, [this](const cql3::untyped_result_set_row& row) {
            role_config config;
            config.is_superuser = row.get_as<bool>("super");
@@ -231,7 +227,9 @@ future<> standard_role_manager::start() {
        return this->create_metadata_tables_if_missing().then([this] {
            _stopped = auth::do_after_system_ready(_as, [this] {
                return seastar::async([this] {
-                    if (any_nondefault_role_row_satisfies(_qp, [](auto&&) { return true; }).get0()) {
+                    wait_for_schema_agreement(_migration_manager, _qp.db().local()).get0();
+
+                    if (any_nondefault_role_row_satisfies(_qp, &has_can_login).get0()) {
                        if (this->legacy_metadata_exists()) {
                            log.warn("Ignoring legacy user metadata since nondefault roles already exist.");
                        }
@@ -256,7 +254,7 @@ future<> standard_role_manager::stop() {
    return _stopped.handle_exception_type([] (const sleep_aborted&) { });
 }

-future<> standard_role_manager::create_or_replace(stdx::string_view role_name, const role_config& c) {
+future<> standard_role_manager::create_or_replace(stdx::string_view role_name, const role_config& c) const {
    static const sstring query = sprint(
            "INSERT INTO %s (%s, is_superuser, can_login) VALUES (?, ?, ?)",
            meta::roles_table::qualified_name(),
@@ -265,12 +263,13 @@ future<> standard_role_manager::create_or_replace(stdx::string_view role_name, c
    return _qp.process(
            query,
            consistency_for_role(role_name),
+            internal_distributed_timeout_config(),
            {sstring(role_name), c.is_superuser, c.can_login},
            true).discard_result();
 }

 future<>
-standard_role_manager::create(stdx::string_view role_name, const role_config& c) {
+standard_role_manager::create(stdx::string_view role_name, const role_config& c) const {
    return this->exists(role_name).then([this, role_name, &c](bool role_exists) {
        if (role_exists) {
            throw role_already_exists(role_name);
@@ -281,7 +280,7 @@ standard_role_manager::create(stdx::string_view role_name, const role_config& c)
 }

 future<>
-standard_role_manager::alter(stdx::string_view role_name, const role_config_update& u) {
+standard_role_manager::alter(stdx::string_view role_name, const role_config_update& u) const {
    static const auto build_column_assignments = [](const role_config_update& u) -> sstring {
        std::vector<sstring> assignments;

@@ -308,11 +307,12 @@ standard_role_manager::alter(stdx::string_view role_name, const role_config_upda
                        build_column_assignments(u),
                        meta::roles_table::role_col_name),
                consistency_for_role(role_name),
+                internal_distributed_timeout_config(),
                {sstring(role_name)}).discard_result();
    });
 }

-future<> standard_role_manager::drop(stdx::string_view role_name) {
+future<> standard_role_manager::drop(stdx::string_view role_name) const {
    return this->exists(role_name).then([this, role_name](bool role_exists) {
        if (!role_exists) {
            throw nonexistant_role(role_name);
@@ -327,6 +327,7 @@ future<> standard_role_manager::drop(stdx::string_view role_name) {
            return _qp.process(
                    query,
                    consistency_for_role(role_name),
+                    internal_distributed_timeout_config(),
                    {sstring(role_name)}).then([this, role_name](::shared_ptr<cql3::untyped_result_set> members) {
                return parallel_for_each(
                        members->begin(),
@@ -366,6 +367,7 @@ future<> standard_role_manager::drop(stdx::string_view role_name) {
            return _qp.process(
                    query,
                    consistency_for_role(role_name),
+                    internal_distributed_timeout_config(),
                    {sstring(role_name)}).discard_result();
        };

@@ -379,7 +381,7 @@ future<>
 standard_role_manager::modify_membership(
        stdx::string_view grantee_name,
        stdx::string_view role_name,
-        membership_change ch) {
+        membership_change ch) const {


    const auto modify_roles = [this, role_name, grantee_name, ch] {
@@ -392,6 +394,7 @@ standard_role_manager::modify_membership(
        return _qp.process(
                query,
                consistency_for_role(grantee_name),
+                internal_distributed_timeout_config(),
                {role_set{sstring(role_name)}, sstring(grantee_name)}).discard_result();
    };

@@ -403,6 +406,7 @@ standard_role_manager::modify_membership(
                                "INSERT INTO %s (role, member) VALUES (?, ?)",
                                meta::role_members_table::qualified_name()),
                        consistency_for_role(role_name),
+                        internal_distributed_timeout_config(),
                        {sstring(role_name), sstring(grantee_name)}).discard_result();

            case membership_change::remove:
@@ -411,6 +415,7 @@ standard_role_manager::modify_membership(
                                "DELETE FROM %s WHERE role = ? AND member = ?",
                                meta::role_members_table::qualified_name()),
                        consistency_for_role(role_name),
+                        internal_distributed_timeout_config(),
                        {sstring(role_name), sstring(grantee_name)}).discard_result();
        }

@@ -421,7 +426,7 @@ standard_role_manager::modify_membership(
 }

 future<>
-standard_role_manager::grant(stdx::string_view grantee_name, stdx::string_view role_name) {
+standard_role_manager::grant(stdx::string_view grantee_name, stdx::string_view role_name) const {
    const auto check_redundant = [this, role_name, grantee_name] {
        return this->query_granted(
                grantee_name,
@@ -452,7 +457,7 @@ standard_role_manager::grant(stdx::string_view grantee_name, stdx::string_view r
 }

 future<>
-standard_role_manager::revoke(stdx::string_view revokee_name, stdx::string_view role_name) {
+standard_role_manager::revoke(stdx::string_view revokee_name, stdx::string_view role_name) const {
    return this->exists(role_name).then([this, revokee_name, role_name](bool role_exists) {
        if (!role_exists) {
            throw nonexistant_role(sstring(role_name));
@@ -511,7 +516,10 @@ future<role_set> standard_role_manager::query_all() const {
    // To avoid many copies of a view.
    static const auto role_col_name_string = sstring(meta::roles_table::role_col_name);

-    return _qp.process(query, db::consistency_level::QUORUM).then([](::shared_ptr<cql3::untyped_result_set> results) {
+    return _qp.process(
+            query,
+            db::consistency_level::QUORUM,
+            internal_distributed_timeout_config()).then([](::shared_ptr<cql3::untyped_result_set> results) {
        role_set roles;

        std::transform(
--- a/auth/standard_role_manager.hh
+++ b/auth/standard_role_manager.hh
@@ -66,15 +66,15 @@ public:

    virtual future<> stop() override;

-    virtual future<> create(stdx::string_view role_name, const role_config&) override;
+    virtual future<> create(stdx::string_view role_name, const role_config&) const override;

-    virtual future<> drop(stdx::string_view role_name) override;
+    virtual future<> drop(stdx::string_view role_name) const override;

-    virtual future<> alter(stdx::string_view role_name, const role_config_update&) override;
+    virtual future<> alter(stdx::string_view role_name, const role_config_update&) const override;

-    virtual future<> grant(stdx::string_view grantee_name, stdx::string_view role_name) override;
+    virtual future<> grant(stdx::string_view grantee_name, stdx::string_view role_name) const override;

-    virtual future<> revoke(stdx::string_view revokee_name, stdx::string_view role_name) override;
+    virtual future<> revoke(stdx::string_view revokee_name, stdx::string_view role_name) const override;

    virtual future<role_set> query_granted(stdx::string_view grantee_name, recursive_role_query) const override;

@@ -89,17 +89,17 @@ public:
 private:
    enum class membership_change { add, remove };

-    future<> create_metadata_tables_if_missing();
+    future<> create_metadata_tables_if_missing() const;

    bool legacy_metadata_exists() const;

-    future<> migrate_legacy_metadata();
+    future<> migrate_legacy_metadata() const;

-    future<> create_default_role_if_missing();
+    future<> create_default_role_if_missing() const;

-    future<> create_or_replace(stdx::string_view role_name, const role_config&);
+    future<> create_or_replace(stdx::string_view role_name, const role_config&) const;

-    future<> modify_membership(stdx::string_view role_name, stdx::string_view grantee_name, membership_change);
+    future<> modify_membership(stdx::string_view role_name, stdx::string_view grantee_name, membership_change) const;
 };

 }
--- a/auth/transitional.cc
+++ b/auth/transitional.cc
@@ -118,18 +118,22 @@ public:
        });
    }

-    virtual future<> create(stdx::string_view role_name, const authentication_options& options) override {
+    virtual future<> create(stdx::string_view role_name, const authentication_options& options) const override {
        return _authenticator->create(role_name, options);
    }

-    virtual future<> alter(stdx::string_view role_name, const authentication_options& options) override {
+    virtual future<> alter(stdx::string_view role_name, const authentication_options& options) const override {
        return _authenticator->alter(role_name, options);
    }

-    virtual future<> drop(stdx::string_view role_name) override {
+    virtual future<> drop(stdx::string_view role_name) const override {
        return _authenticator->drop(role_name);
    }

+    virtual future<custom_options> query_custom_options(stdx::string_view role_name) const override {
+        return _authenticator->query_custom_options(role_name);
+    }
+
    virtual const resource_set& protected_resources() const override {
        return _authenticator->protected_resources();
    }
@@ -214,11 +218,11 @@ public:
        return make_ready_future<permission_set>(transitional_permissions);
    }

-    virtual future<> grant(stdx::string_view s, permission_set ps, const resource& r) override {
+    virtual future<> grant(stdx::string_view s, permission_set ps, const resource& r) const override {
        return _authorizer->grant(s, std::move(ps), r);
    }

-    virtual future<> revoke(stdx::string_view s, permission_set ps, const resource& r) override {
+    virtual future<> revoke(stdx::string_view s, permission_set ps, const resource& r) const override {
        return _authorizer->revoke(s, std::move(ps), r);
    }

@@ -226,11 +230,11 @@ public:
        return _authorizer->list_all();
    }

-    virtual future<> revoke_all(stdx::string_view s) override {
+    virtual future<> revoke_all(stdx::string_view s) const override {
        return _authorizer->revoke_all(s);
    }

-    virtual future<> revoke_all(const resource& r) override {
+    virtual future<> revoke_all(const resource& r) const override {
        return _authorizer->revoke_all(r);
    }

--- a/backlog_controller.hh
+++ b/backlog_controller.hh
@@ -47,6 +47,7 @@
 class backlog_controller {
 public:
    future<> shutdown() {
+        _update_timer.cancel();
        return std::move(_inflight_update);
    }
 protected:
@@ -95,6 +96,12 @@ protected:
    }

    virtual ~backlog_controller() {}
+public:
+    backlog_controller(backlog_controller&&) = default;
+    float backlog_of_shares(float shares) const;
+    seastar::scheduling_group sg() {
+        return _scheduling_group;
+    }
 };

 // memtable flush CPU controller.
@@ -118,7 +125,7 @@ public:
    flush_controller(seastar::scheduling_group sg, const ::io_priority_class& iop, float static_shares) : backlog_controller(sg, iop, static_shares) {}
    flush_controller(seastar::scheduling_group sg, const ::io_priority_class& iop, std::chrono::milliseconds interval, float soft_limit, std::function<float()> current_dirty)
        : backlog_controller(sg, iop, std::move(interval),
-          std::vector<backlog_controller::control_point>({{soft_limit, 100}, {soft_limit + (hard_dirty_limit - soft_limit) / 2, 200} , {hard_dirty_limit, 1000}}),
+          std::vector<backlog_controller::control_point>({{soft_limit, 10}, {soft_limit + (hard_dirty_limit - soft_limit) / 2, 200} , {hard_dirty_limit, 1000}}),
          std::move(current_dirty)
        )
    {}
@@ -126,7 +133,9 @@ public:

 class compaction_controller : public backlog_controller {
 public:
-    static constexpr unsigned normalization_factor = 10;
+    static constexpr unsigned normalization_factor = 30;
+    static constexpr float disable_backlog = std::numeric_limits<double>::infinity();
+    static constexpr float backlog_disabled(float backlog) { return std::isinf(backlog); }
    compaction_controller(seastar::scheduling_group sg, const ::io_priority_class& iop, float static_shares) : backlog_controller(sg, iop, static_shares) {}
    compaction_controller(seastar::scheduling_group sg, const ::io_priority_class& iop, std::chrono::milliseconds interval, std::function<float()> current_backlog)
        : backlog_controller(sg, iop, std::move(interval),
--- a/bytes.hh
+++ b/bytes.hh
@@ -29,7 +29,7 @@
 #include <functional>
 #include "utils/mutable_view.hh"

-using bytes = basic_sstring<int8_t, uint32_t, 31>;
+using bytes = basic_sstring<int8_t, uint32_t, 31, false>;
 using bytes_view = std::experimental::basic_string_view<int8_t>;
 using bytes_mutable_view = basic_mutable_view<bytes_view::value_type>;
 using bytes_opt = std::experimental::optional<bytes>;
@@ -78,3 +78,11 @@ struct appending_hash<bytes_view> {
        h.update(reinterpret_cast<const char*>(v.begin()), v.size() * sizeof(bytes_view::value_type));
    }
 };
+
+inline int32_t compare_unsigned(bytes_view v1, bytes_view v2) {
+    auto n = memcmp(v1.begin(), v2.begin(), std::min(v1.size(), v2.size()));
+    if (n) {
+        return n;
+    }
+    return (int32_t) (v1.size() - v2.size());
+}
--- a/bytes_ostream.hh
+++ b/bytes_ostream.hh
@@ -65,8 +65,9 @@ private:
    size_type _size;
 public:
    class fragment_iterator : public std::iterator<std::input_iterator_tag, bytes_view> {
-        chunk* _current;
+        chunk* _current = nullptr;
    public:
+        fragment_iterator() = default;
        fragment_iterator(chunk* current) : _current(current) {}
        fragment_iterator(const fragment_iterator&) = default;
        fragment_iterator& operator=(const fragment_iterator&) = default;
@@ -289,6 +290,24 @@ public:
        }
    }

+    // Removes n bytes from the end of the bytes_ostream.
+    // Beware of O(n) algorithm.
+    void remove_suffix(size_t n) {
+        _size -= n;
+        auto left = _size;
+        auto current = _begin.get();
+        while (current) {
+            if (current->offset >= left) {
+                current->offset = left;
+                _current = current;
+                current->next.reset();
+                return;
+            }
+            left -= current->offset;
+            current = current->next.get();
+        }
+    }
+
    // begin() and end() form an input range to bytes_view representing fragments.
    // Any modification of this instance invalidates iterators.
    fragment_iterator begin() const { return { _begin.get() }; }
--- a/cache_flat_mutation_reader.hh
+++ b/cache_flat_mutation_reader.hh
@@ -60,6 +60,7 @@ class cache_flat_mutation_reader final : public flat_mutation_reader::impl {
        // - _next_row_in_range = _next.position() < _upper_bound
        // - _last_row points at a direct predecessor of the next row which is going to be read.
        //   Used for populating continuity.
+        // - _population_range_starts_before_all_rows is set accordingly
        reading_from_underlying,

        end_of_stream
@@ -86,6 +87,18 @@ class cache_flat_mutation_reader final : public flat_mutation_reader::impl {
    partition_snapshot_row_cursor _next_row;
    bool _next_row_in_range = false;

+    // True iff current population interval, since the previous clustering row, starts before all clustered rows.
+    // We cannot just look at _lower_bound, because emission of range tombstones changes _lower_bound and
+    // because we mark clustering intervals as continuous when consuming a clustering_row, it would prevent
+    // us from marking the interval as continuous.
+    // Valid when _state == reading_from_underlying.
+    bool _population_range_starts_before_all_rows;
+
+    // Whether _lower_bound was changed within current fill_buffer().
+    // If it did not then we cannot break out of it (e.g. on preemption) because
+    // forward progress is not guaranteed in case iterators are getting constantly invalidated.
+    bool _lower_bound_changed = false;
+
    future<> do_fill_buffer(db::timeout_clock::time_point);
    void copy_from_cache_to_buffer();
    future<> process_static_row(db::timeout_clock::time_point);
@@ -226,6 +239,7 @@ inline
 future<> cache_flat_mutation_reader::do_fill_buffer(db::timeout_clock::time_point timeout) {
    if (_state == state::move_to_underlying) {
        _state = state::reading_from_underlying;
+        _population_range_starts_before_all_rows = _lower_bound.is_before_all_clustered_rows(*_schema);
        auto end = _next_row_in_range ? position_in_partition(_next_row.position())
                                      : position_in_partition(_upper_bound);
        return _read_context->fast_forward_to(position_range{_lower_bound, std::move(end)}, timeout).then([this, timeout] {
@@ -253,9 +267,13 @@ future<> cache_flat_mutation_reader::do_fill_buffer(db::timeout_clock::time_poin
        }
        _next_row.maybe_refresh();
        clogger.trace("csm {}: next={}, cont={}", this, _next_row.position(), _next_row.continuous());
-        while (!is_buffer_full() && _state == state::reading_from_cache) {
+        _lower_bound_changed = false;
+        while (_state == state::reading_from_cache) {
            copy_from_cache_to_buffer();
-            if (need_preempt()) {
+            // We need to check _lower_bound_changed even if is_buffer_full() because
+            // we may have emitted only a range tombstone which overlapped with _lower_bound
+            // and thus didn't cause _lower_bound to change.
+            if ((need_preempt() || is_buffer_full()) && _lower_bound_changed) {
                break;
            }
        }
@@ -346,12 +364,12 @@ future<> cache_flat_mutation_reader::read_from_underlying(db::timeout_clock::tim
                }
            });
            return make_ready_future<>();
-        });
+        }, timeout);
 }

 inline
 bool cache_flat_mutation_reader::ensure_population_lower_bound() {
-    if (!_ck_ranges_curr->start()) {
+    if (_population_range_starts_before_all_rows) {
        return true;
    }
    if (!_last_row.refresh(*_snp)) {
@@ -365,7 +383,7 @@ bool cache_flat_mutation_reader::ensure_population_lower_bound() {
            rows_entry::compare less(*_schema);
            // FIXME: Avoid the copy by inserting an incomplete clustering row
            auto e = alloc_strategy_unique_ptr<rows_entry>(
-                current_allocator().construct<rows_entry>(*_last_row));
+                current_allocator().construct<rows_entry>(*_schema, *_last_row));
            e->set_continuous(false);
            auto insert_result = rows.insert_check(rows.end(), *e, less);
            auto inserted = insert_result.second;
@@ -406,6 +424,7 @@ inline
 void cache_flat_mutation_reader::maybe_add_to_cache(const clustering_row& cr) {
    if (!can_populate()) {
        _last_row = nullptr;
+        _population_range_starts_before_all_rows = false;
        _read_context->cache().on_mispopulate();
        return;
    }
@@ -418,7 +437,7 @@ void cache_flat_mutation_reader::maybe_add_to_cache(const clustering_row& cr) {
            cr.cells().prepare_hash(*_schema, column_kind::regular_column);
        }
        auto new_entry = alloc_strategy_unique_ptr<rows_entry>(
-            current_allocator().construct<rows_entry>(cr.key(), cr.tomb(), cr.marker(), cr.cells()));
+            current_allocator().construct<rows_entry>(*_schema, cr.key(), cr.tomb(), cr.marker(), cr.cells()));
        new_entry->set_continuous(false);
        auto it = _next_row.iterators_valid() ? _next_row.get_iterator_in_latest_version()
                                              : mp.clustered_rows().lower_bound(cr.key(), less);
@@ -439,6 +458,7 @@ void cache_flat_mutation_reader::maybe_add_to_cache(const clustering_row& cr) {
        with_allocator(standard_allocator(), [&] {
            _last_row = partition_snapshot_row_weakref(*_snp, it, true);
        });
+        _population_range_starts_before_all_rows = false;
    });
 }

@@ -460,15 +480,19 @@ void cache_flat_mutation_reader::copy_from_cache_to_buffer() {
    _next_row.touch();
    position_in_partition_view next_lower_bound = _next_row.dummy() ? _next_row.position() : position_in_partition_view::after_key(_next_row.key());
    for (auto &&rts : _snp->range_tombstones(_lower_bound, _next_row_in_range ? next_lower_bound : _upper_bound)) {
+        position_in_partition::less_compare less(*_schema);
        // This guarantees that rts starts after any emitted clustering_row
        // and not before any emitted range tombstone.
-        if (rts.trim_front(*_schema, _lower_bound)) {
+        if (!less(_lower_bound, rts.position())) {
+            rts.set_start(*_schema, _lower_bound);
+        } else {
            _lower_bound = position_in_partition(rts.position());
+            _lower_bound_changed = true;
            if (is_buffer_full()) {
                return;
            }
-            push_mutation_fragment(std::move(rts));
        }
+        push_mutation_fragment(std::move(rts));
    }
    // We add the row to the buffer even when it's full.
    // This simplifies the code. For more info see #3139.
@@ -505,6 +529,7 @@ void cache_flat_mutation_reader::move_to_range(query::clustering_row_ranges::con
    _last_row = nullptr;
    _lower_bound = std::move(lb);
    _upper_bound = std::move(ub);
+    _lower_bound_changed = true;
    _ck_ranges_curr = next_it;
    auto adjacent = _next_row.advance_to(_lower_bound);
    _next_row_in_range = !after_current_range(_next_row.position());
@@ -582,6 +607,7 @@ void cache_flat_mutation_reader::add_clustering_row_to_buffer(mutation_fragment&
    auto new_lower_bound = position_in_partition::after_key(row.key());
    push_mutation_fragment(std::move(mf));
    _lower_bound = std::move(new_lower_bound);
+    _lower_bound_changed = true;
 }

 inline
@@ -589,10 +615,16 @@ void cache_flat_mutation_reader::add_to_buffer(range_tombstone&& rt) {
    clogger.trace("csm {}: add_to_buffer({})", this, rt);
    // This guarantees that rt starts after any emitted clustering_row
    // and not before any emitted range tombstone.
-    if (!rt.trim_front(*_schema, _lower_bound)) {
+    position_in_partition::less_compare less(*_schema);
+    if (!less(_lower_bound, rt.end_position())) {
        return;
    }
-    _lower_bound = position_in_partition(rt.position());
+    if (!less(_lower_bound, rt.position())) {
+        rt.set_start(*_schema, _lower_bound);
+    } else {
+        _lower_bound = position_in_partition(rt.position());
+        _lower_bound_changed = true;
+    }
    push_mutation_fragment(std::move(rt));
 }

--- a/cell_locking.hh
+++ b/cell_locking.hh
@@ -23,21 +23,7 @@

 #include <boost/intrusive/unordered_set.hpp>

-#if __has_include(<boost/container/small_vector.hpp>)
-
-#include <boost/container/small_vector.hpp>
-
-template <typename T, size_t N>
-using small_vector = boost::container::small_vector<T, N>;
-
-#else
-
-#include <vector>
-template <typename T, size_t N>
-using small_vector = std::vector<T>;
-
-#endif
-
+#include "utils/small_vector.hh"
 #include "fnv1a_hasher.hh"
 #include "mutation_fragment.hh"
 #include "mutation_partition.hh"
@@ -45,7 +31,7 @@ using small_vector = std::vector<T>;
 #include "db/timeout_clock.hh"

 class cells_range {
-    using ids_vector_type = small_vector<column_id, 5>;
+    using ids_vector_type = utils::small_vector<column_id, 5>;

    position_in_partition_view _position;
    ids_vector_type _ids;
--- a/clustering_ranges_walker.hh
+++ b/clustering_ranges_walker.hh
@@ -70,7 +70,7 @@ public:
    {
        if (!with_static_row) {
            if (_current == _end) {
-                _current_start = _current_end = position_in_partition_view::after_all_clustered_rows();
+                _current_start = position_in_partition_view::before_all_clustered_rows();
            } else {
                _current_start = position_in_partition_view::for_range_start(*_current);
                _current_end = position_in_partition_view::for_range_end(*_current);
--- a/compaction_strategy.hh
+++ b/compaction_strategy.hh
@@ -25,7 +25,8 @@
 #include "exceptions/exceptions.hh"
 #include "sstables/compaction_backlog_manager.hh"

-class column_family;
+class table;
+using column_family = table;
 class schema;
 using schema_ptr = lw_shared_ptr<const schema>;

--- a/compound_compat.hh
+++ b/compound_compat.hh
@@ -25,6 +25,7 @@
 #include <boost/range/adaptor/transformed.hpp>
 #include "compound.hh"
 #include "schema.hh"
+#include "sstables/version.hh"

 //
 // This header provides adaptors between the representation used by our compound_type<>
@@ -302,7 +303,7 @@ private:
    }
 public:
    template <typename Describer>
-    auto describe_type(Describer f) const {
+    auto describe_type(sstables::sstable_version_types v, Describer f) const {
        return f(const_cast<bytes&>(_bytes));
    }

--- a/compress.cc
+++ b/compress.cc
@@ -241,7 +241,7 @@ size_t lz4_processor::compress(const char* input, size_t input_len,
    output[1] = (input_len >> 8) & 0xFF;
    output[2] = (input_len >> 16) & 0xFF;
    output[3] = (input_len >> 24) & 0xFF;
-#ifdef HAVE_LZ4_COMPRESS_DEFAULT
+#ifdef SEASTAR_HAVE_LZ4_COMPRESS_DEFAULT
    auto ret = LZ4_compress_default(input, output + 4, input_len, LZ4_compressBound(input_len));
 #else
    auto ret = LZ4_compress(input, output + 4, input_len);
--- a/configure.py
+++ b/configure.py
@@ -228,6 +228,7 @@ scylla_tests = [
    'tests/memory_footprint',
    'tests/perf/perf_sstable',
    'tests/cql_query_test',
+    'tests/secondary_index_test',
    'tests/storage_proxy_test',
    'tests/schema_change_test',
    'tests/mutation_reader_test',
@@ -235,6 +236,7 @@ scylla_tests = [
    'tests/row_cache_test',
    'tests/test-serialization',
    'tests/sstable_test',
+    'tests/sstable_3_x_test',
    'tests/sstable_mutation_test',
    'tests/sstable_resharding_test',
    'tests/memtable_test',
@@ -273,12 +275,15 @@ scylla_tests = [
    'tests/input_stream_test',
    'tests/virtual_reader_test',
    'tests/view_schema_test',
+    'tests/view_build_test',
+    'tests/view_complex_test',
    'tests/counter_test',
    'tests/cell_locker_test',
    'tests/row_locker_test',
    'tests/streaming_histogram_test',
    'tests/duration_test',
    'tests/vint_serialization_test',
+    'tests/continuous_data_consumer_test',
    'tests/compress_test',
    'tests/chunked_vector_test',
    'tests/loading_cache_test',
@@ -293,11 +298,18 @@ scylla_tests = [
    'tests/extensions_test',
    'tests/cql_auth_syntax_test',
    'tests/querier_cache',
-    'tests/querier_cache_resource_based_eviction',
+    'tests/limiting_data_source_test',
+    'tests/meta_test',
+    'tests/imr_test',
+    'tests/partition_data_test',
+    'tests/reusable_buffer_test',
+    'tests/json_test'
 ]

 perf_tests = [
-    'tests/perf/perf_mutation_readers'
+    'tests/perf/perf_mutation_readers',
+    'tests/perf/perf_mutation_fragment',
+    'tests/perf/perf_idl',
 ]

 apps = [
@@ -358,6 +370,10 @@ arg_parser.add_argument('--enable-gcc6-concepts', dest='gcc6_concepts', action='
                        help='enable experimental support for C++ Concepts as implemented in GCC 6')
 arg_parser.add_argument('--enable-alloc-failure-injector', dest='alloc_failure_injector', action='store_true', default=False,
                        help='enable allocation failure injection')
+arg_parser.add_argument('--with-antlr3', dest='antlr3_exec', action='store', default=None,
+                        help='path to antlr3 executable')
+arg_parser.add_argument('--with-ragel', dest='ragel_exec', action='store', default=None,
+                        help='path to ragel executable')
 args = arg_parser.parse_args()

 defines = []
@@ -367,6 +383,7 @@ extra_cxxflags = {}
 cassandra_interface = Thrift(source = 'interface/cassandra.thrift', service = 'Cassandra')

 scylla_core = (['database.cc',
+                 'atomic_cell.cc',
                 'schema.cc',
                 'frozen_schema.cc',
                 'schema_registry.cc',
@@ -379,21 +396,24 @@ scylla_core = (['database.cc',
                 'frozen_mutation.cc',
                 'memtable.cc',
                 'schema_mutations.cc',
-                 'release.cc',
                 'supervisor.cc',
                 'utils/logalloc.cc',
                 'utils/large_bitset.cc',
                 'utils/buffer_input_stream.cc',
+                 'utils/limiting_data_source.cc',
                 'mutation_partition.cc',
                 'mutation_partition_view.cc',
                 'mutation_partition_serializer.cc',
                 'mutation_reader.cc',
                 'flat_mutation_reader.cc',
                 'mutation_query.cc',
+                 'json.cc',
                 'keys.cc',
                 'counters.cc',                 
                 'compress.cc',
+                 'sstables/mp_row_consumer.cc',
                 'sstables/sstables.cc',
+                 'sstables/sstable_version.cc',
                 'sstables/compress.cc',
                 'sstables/row.cc',
                 'sstables/partition.cc',
@@ -402,9 +422,12 @@ scylla_core = (['database.cc',
                 'sstables/compaction_manager.cc',
                 'sstables/integrity_checked_file_impl.cc',
                 'sstables/prepended_input_stream.cc',
+                 'sstables/m_format_write_helpers.cc',
+                 'sstables/m_format_read_helpers.cc',
                 'transport/event.cc',
                 'transport/event_notifier.cc',
                 'transport/server.cc',
+                 'transport/messages/result_message.cc',
                 'cql3/abstract_marker.cc',
                 'cql3/attributes.cc',
                 'cql3/cf_name.cc',
@@ -492,6 +515,8 @@ scylla_core = (['database.cc',
                 'cql3/variable_specifications.cc',
                 'db/consistency_level.cc',
                 'db/system_keyspace.cc',
+                 'db/system_distributed_keyspace.cc',
+                 'db/size_estimates_virtual_reader.cc',
                 'db/schema_tables.cc',
                 'db/cql_type_parser.cc',
                 'db/legacy_schema_migrator.cc',
@@ -499,15 +524,17 @@ scylla_core = (['database.cc',
                 'db/commitlog/commitlog_replayer.cc',
                 'db/commitlog/commitlog_entry.cc',
                 'db/hints/manager.cc',
+                 'db/hints/resource_manager.cc',
                 'db/config.cc',
                 'db/extensions.cc',
                 'db/heat_load_balance.cc',
-                 'db/index/secondary_index.cc',
+                 'db/large_partition_handler.cc',
                 'db/marshal/type_parser.cc',
                 'db/batchlog_manager.cc',
                 'db/view/view.cc',
                 'db/view/row_locking.cc',
                 'index/secondary_index_manager.cc',
+                 'index/secondary_index.cc',
                 'utils/UUID_gen.cc',
                 'utils/i_filter.cc',
                 'utils/bloom_filter.cc',
@@ -604,6 +631,7 @@ scylla_core = (['database.cc',
                 'vint-serialization.cc',
                 'utils/arch/powerpc/crc32-vpmsum/crc32_wrapper.cc',
                 'querier.cc',
+                 'data/cell.cc',
                 ]
                + [Antlr3Grammar('cql3/Cql.g')]
                + [Thrift('interface/cassandra.thrift', 'Cassandra')]
@@ -640,7 +668,9 @@ api = ['api/api.cc',
       'api/api-doc/stream_manager.json',
       'api/stream_manager.cc',
       'api/api-doc/system.json',
-       'api/system.cc'
+       'api/system.cc',
+       'api/config.cc',
+       'api/api-doc/config.json',
       ]

 idls = ['idl/gossip_digest.idl.hh',
@@ -668,7 +698,7 @@ idls = ['idl/gossip_digest.idl.hh',
        'idl/cache_temperature.idl.hh',
        ]

-scylla_tests_dependencies = scylla_core + api + idls + [
+scylla_tests_dependencies = scylla_core + idls + [
    'tests/cql_test_env.cc',
    'tests/cql_assertions.cc',
    'tests/result_set_assertions.cc',
@@ -681,7 +711,7 @@ scylla_tests_seastar_deps = [
 ]

 deps = {
-    'scylla': idls + ['main.cc'] + scylla_core + api,
+    'scylla': idls + ['main.cc', 'release.cc'] + scylla_core + api,
 }

 pure_boost_tests = set([
@@ -709,6 +739,11 @@ pure_boost_tests = set([
    'tests/auth_resource_test',
    'tests/enum_set_test',
    'tests/cql_auth_syntax_test',
+    'tests/meta_test',
+    'tests/imr_test',
+    'tests/partition_data_test',
+    'tests/reusable_buffer_test',
+    'tests/json_test',
 ])

 tests_not_using_seastar_test_framework = set([
@@ -727,7 +762,6 @@ tests_not_using_seastar_test_framework = set([
    'tests/memory_footprint',
    'tests/gossip',
    'tests/perf/perf_sstable',
-    'tests/querier_cache_resource_based_eviction',
 ]) | pure_boost_tests

 for t in tests_not_using_seastar_test_framework:
@@ -740,7 +774,7 @@ for t in scylla_tests:
        deps[t] += scylla_tests_dependencies 
        deps[t] += scylla_tests_seastar_deps
    else:
-        deps[t] += scylla_core + api + idls + ['tests/cql_test_env.cc']
+        deps[t] += scylla_core + idls + ['tests/cql_test_env.cc']

 perf_tests_seastar_deps = [
    'seastar/tests/perf/perf_tests.cc'
@@ -759,6 +793,10 @@ deps['tests/murmur_hash_test'] = ['bytes.cc', 'utils/murmur_hash.cc', 'tests/mur
 deps['tests/allocation_strategy_test'] = ['tests/allocation_strategy_test.cc', 'utils/logalloc.cc', 'utils/dynamic_bitset.cc']
 deps['tests/log_heap_test'] = ['tests/log_heap_test.cc']
 deps['tests/anchorless_list_test'] = ['tests/anchorless_list_test.cc']
+deps['tests/perf/perf_fast_forward'] += ['release.cc']
+deps['tests/meta_test'] = ['tests/meta_test.cc']
+deps['tests/imr_test'] = ['tests/imr_test.cc', 'utils/logalloc.cc', 'utils/dynamic_bitset.cc']
+deps['tests/reusable_buffer_test'] = ['tests/reusable_buffer_test.cc']

 warnings = [
    '-Wno-mismatched-tags',  # clang-only
@@ -832,6 +870,22 @@ for pkglist in optional_packages:
            alternatives = ':'.join(pkglist[1:])
            print('Missing optional package {pkglist[0]} (or alteratives {alternatives})'.format(**locals()))

+
+compiler_test_src = '''
+#if __GNUC__ < 7
+    #error "MAJOR"
+#elif __GNUC__ == 7
+    #if __GNUC_MINOR__ < 3
+        #error "MINOR"
+    #endif
+#endif
+
+int main() { return 0; }
+'''
+if not try_compile_and_link(compiler=args.cxx, source=compiler_test_src):
+    print('Wrong GCC version. Scylla needs GCC >= 7.3 to compile.')
+    sys.exit(1)
+
 if not try_compile(compiler=args.cxx, source='#include <boost/version.hpp>'):
    print('Boost not installed.  Please install {}.'.format(pkgname("boost-devel")))
    sys.exit(1)
@@ -957,6 +1011,16 @@ do_sanitize = True
 if args.static:
    do_sanitize = False

+if args.antlr3_exec:
+    antlr3_exec = args.antlr3_exec
+else:
+    antlr3_exec = "antlr3"
+
+if args.ragel_exec:
+    ragel_exec = args.ragel_exec
+else:
+    ragel_exec = "ragel"
+
 with open(buildfile, 'w') as f:
    f.write(textwrap.dedent('''\
        configure_args = {configure_args}
@@ -970,7 +1034,7 @@ with open(buildfile, 'w') as f:
        pool seastar_pool
            depth = 1
        rule ragel
-            command = ragel -G2 -o $out $in
+            command = {ragel_exec} -G2 -o $out $in
            description = RAGEL $out
        rule gen
            command = echo -e $text > $out
@@ -1016,7 +1080,7 @@ with open(buildfile, 'w') as f:
                # Because we add such a variable to every function, and because `ExceptionBaseType` is not a global
                # name, we also add a global typedef to avoid compilation errors. 
                command = sed -e '/^#if 0/,/^#endif/d' $in > $builddir/{mode}/gen/$in $
-                     && antlr3 $builddir/{mode}/gen/$in $
+                     && {antlr3_exec} $builddir/{mode}/gen/$in $
                     && sed -i -e 's/^\\( *\)\\(ImplTraits::CommonTokenType\\* [a-zA-Z0-9_]* = NULL;\\)$$/\\1const \\2/' $
                        -e '1i using ExceptionBaseType = int;' $
                        -e 's/^{{/{{ ExceptionBaseType\* ex = nullptr;/; $
@@ -1024,7 +1088,7 @@ with open(buildfile, 'w') as f:
                            s/exceptions::syntax_exception e/exceptions::syntax_exception\& e/' $
                        build/{mode}/gen/${{stem}}Parser.cpp
                description = ANTLR3 $in
-            ''').format(mode = mode, **modeval))
+            ''').format(mode = mode, antlr3_exec = antlr3_exec, **modeval))
        f.write('build {mode}: phony {artifacts}\n'.format(mode = mode,
            artifacts = str.join(' ', ('$builddir/' + mode + '/' + x for x in build_artifacts))))
        compiles = {}
--- a/converting_mutation_partition_applier.hh
+++ b/converting_mutation_partition_applier.hh
@@ -39,16 +39,32 @@ private:
        return ::is_compatible(new_def.kind, kind) && new_def.type->is_value_compatible_with(*old_type);
    }
    static void accept_cell(row& dst, column_kind kind, const column_definition& new_def, const data_type& old_type, atomic_cell_view cell) {
-        if (is_compatible(new_def, old_type, kind) && cell.timestamp() > new_def.dropped_at()) {
-            dst.apply(new_def, atomic_cell_or_collection(cell));
+        if (!is_compatible(new_def, old_type, kind) || cell.timestamp() <= new_def.dropped_at()) {
+            return;
        }
+        auto new_cell = [&] {
+            if (cell.is_live() && !old_type->is_counter()) {
+                if (cell.is_live_and_has_ttl()) {
+                    return atomic_cell_or_collection(
+                        atomic_cell::make_live(*new_def.type, cell.timestamp(), cell.value().linearize(), cell.expiry(), cell.ttl())
+                    );
+                }
+                return atomic_cell_or_collection(
+                    atomic_cell::make_live(*new_def.type, cell.timestamp(), cell.value().linearize())
+                );
+            } else {
+                return atomic_cell_or_collection(*new_def.type, cell);
+            }
+        }();
+        dst.apply(new_def, std::move(new_cell));
    }
    static void accept_cell(row& dst, column_kind kind, const column_definition& new_def, const data_type& old_type, collection_mutation_view cell) {
        if (!is_compatible(new_def, old_type, kind)) {
            return;
        }
+      cell.data.with_linearized([&] (bytes_view cell_bv) {
        auto&& ctype = static_pointer_cast<const collection_type_impl>(old_type);
-        auto old_view = ctype->deserialize_mutation_form(cell);
+        auto old_view = ctype->deserialize_mutation_form(cell_bv);

        collection_type_impl::mutation_view new_view;
        if (old_view.tomb.timestamp > new_def.dropped_at()) {
@@ -60,6 +76,7 @@ private:
            }
        }
        dst.apply(new_def, ctype->serialize_mutation_form(std::move(new_view)));
+      });
    }
 public:
    converting_mutation_partition_applier(
@@ -120,11 +137,11 @@ public:

    // Appends the cell to dst upgrading it to the new schema.
    // Cells must have monotonic names.
-    static void append_cell(row& dst, column_kind kind, const column_definition& new_def, const data_type& old_type, const atomic_cell_or_collection& cell) {
+    static void append_cell(row& dst, column_kind kind, const column_definition& new_def, const column_definition& old_def, const atomic_cell_or_collection& cell) {
        if (new_def.is_atomic()) {
-            accept_cell(dst, kind, new_def, old_type, cell.as_atomic_cell());
+            accept_cell(dst, kind, new_def, old_def.type, cell.as_atomic_cell(old_def));
        } else {
-            accept_cell(dst, kind, new_def, old_type, cell.as_collection_mutation());
+            accept_cell(dst, kind, new_def, old_def.type, cell.as_collection_mutation());
        }
    }
 };
--- a/counters.cc
+++ b/counters.cc
@@ -78,10 +78,10 @@ std::vector<counter_shard> counter_cell_view::shards_compatible_with_1_7_4() con
    return sorted_shards;
 }

-static bool apply_in_place(atomic_cell_or_collection& dst, atomic_cell_or_collection& src)
+static bool apply_in_place(const column_definition& cdef, atomic_cell_mutable_view dst, atomic_cell_mutable_view src)
 {
-    auto dst_ccmv = counter_cell_mutable_view(dst.as_mutable_atomic_cell());
-    auto src_ccmv = counter_cell_mutable_view(src.as_mutable_atomic_cell());
+    auto dst_ccmv = counter_cell_mutable_view(dst);
+    auto src_ccmv = counter_cell_mutable_view(src);
    auto dst_shards = dst_ccmv.shards();
    auto src_shards = src_ccmv.shards();

@@ -118,48 +118,19 @@ static bool apply_in_place(atomic_cell_or_collection& dst, atomic_cell_or_collec
    auto src_ts = src_ccmv.timestamp();
    dst_ccmv.set_timestamp(std::max(dst_ts, src_ts));
    src_ccmv.set_timestamp(dst_ts);
-    src.as_mutable_atomic_cell().set_counter_in_place_revert(true);
    return true;
 }

-static void revert_in_place_apply(atomic_cell_or_collection& dst, atomic_cell_or_collection& src)
+void counter_cell_view::apply(const column_definition& cdef, atomic_cell_or_collection& dst, atomic_cell_or_collection& src)
 {
-    assert(dst.can_use_mutable_view() && src.can_use_mutable_view());
-    auto dst_ccmv = counter_cell_mutable_view(dst.as_mutable_atomic_cell());
-    auto src_ccmv = counter_cell_mutable_view(src.as_mutable_atomic_cell());
-    auto dst_shards = dst_ccmv.shards();
-    auto src_shards = src_ccmv.shards();
-
-    auto dst_it = dst_shards.begin();
-    auto src_it = src_shards.begin();
-
-    while (src_it != src_shards.end()) {
-        while (dst_it != dst_shards.end() && dst_it->id() < src_it->id()) {
-            ++dst_it;
-        }
-        assert(dst_it != dst_shards.end() && dst_it->id() == src_it->id());
-        dst_it->swap_value_and_clock(*src_it);
-        ++src_it;
-    }
-
-    auto dst_ts = dst_ccmv.timestamp();
-    auto src_ts = src_ccmv.timestamp();
-    dst_ccmv.set_timestamp(src_ts);
-    src_ccmv.set_timestamp(dst_ts);
-    src.as_mutable_atomic_cell().set_counter_in_place_revert(false);
-}
-
-bool counter_cell_view::apply_reversibly(atomic_cell_or_collection& dst, atomic_cell_or_collection& src)
-{
-    auto dst_ac = dst.as_atomic_cell();
-    auto src_ac = src.as_atomic_cell();
+    auto dst_ac = dst.as_atomic_cell(cdef);
+    auto src_ac = src.as_atomic_cell(cdef);

    if (!dst_ac.is_live() || !src_ac.is_live()) {
        if (dst_ac.is_live() || (!src_ac.is_live() && compare_atomic_cell_for_merge(dst_ac, src_ac) < 0)) {
            std::swap(dst, src);
-            return true;
        }
-        return false;
+        return;
    }

    if (dst_ac.is_counter_update() && src_ac.is_counter_update()) {
@@ -167,22 +138,26 @@ bool counter_cell_view::apply_reversibly(atomic_cell_or_collection& dst, atomic_
        auto dst_v = dst_ac.counter_update_value();
        dst = atomic_cell::make_live_counter_update(std::max(dst_ac.timestamp(), src_ac.timestamp()),
                                                    src_v + dst_v);
-        return true;
+        return;
    }

    assert(!dst_ac.is_counter_update());
    assert(!src_ac.is_counter_update());
+ with_linearized(dst_ac, [&] (counter_cell_view dst_ccv) {
+  with_linearized(src_ac, [&] (counter_cell_view src_ccv) {

-    if (counter_cell_view(dst_ac).shard_count() >= counter_cell_view(src_ac).shard_count()
-        && dst.can_use_mutable_view() && src.can_use_mutable_view()) {
-        if (apply_in_place(dst, src)) {
-            return true;
+    if (dst_ccv.shard_count() >= src_ccv.shard_count()) {
+        auto dst_amc = dst.as_mutable_atomic_cell(cdef);
+        auto src_amc = src.as_mutable_atomic_cell(cdef);
+        if (!dst_amc.is_value_fragmented() && !src_amc.is_value_fragmented()) {
+            if (apply_in_place(cdef, dst_amc, src_amc)) {
+                return;
+            }
        }
    }

-    src.as_mutable_atomic_cell().set_counter_in_place_revert(false);
-    auto dst_shards = counter_cell_view(dst_ac).shards();
-    auto src_shards = counter_cell_view(src_ac).shards();
+    auto dst_shards = dst_ccv.shards();
+    auto src_shards = src_ccv.shards();

    counter_cell_builder result;
    combine(dst_shards.begin(), dst_shards.end(), src_shards.begin(), src_shards.end(),
@@ -191,22 +166,9 @@ bool counter_cell_view::apply_reversibly(atomic_cell_or_collection& dst, atomic_
            });

    auto cell = result.build(std::max(dst_ac.timestamp(), src_ac.timestamp()));
-    src = std::exchange(dst, atomic_cell_or_collection(cell));
-    return true;
-}
-
-void counter_cell_view::revert_apply(atomic_cell_or_collection& dst, atomic_cell_or_collection& src)
-{
-    if (dst.as_atomic_cell().is_counter_update()) {
-        auto src_v = src.as_atomic_cell().counter_update_value();
-        auto dst_v = dst.as_atomic_cell().counter_update_value();
-        dst = atomic_cell::make_live(dst.as_atomic_cell().timestamp(),
-                                     long_type->decompose(dst_v - src_v));
-    } else if (src.as_atomic_cell().is_counter_in_place_revert_set()) {
-        revert_in_place_apply(dst, src);
-    } else {
-        std::swap(dst, src);
-    }
+    src = std::exchange(dst, atomic_cell_or_collection(std::move(cell)));
+  });
+ });
 }

 stdx::optional<atomic_cell> counter_cell_view::difference(atomic_cell_view a, atomic_cell_view b)
@@ -216,13 +178,15 @@ stdx::optional<atomic_cell> counter_cell_view::difference(atomic_cell_view a, at

    if (!b.is_live() || !a.is_live()) {
        if (b.is_live() || (!a.is_live() && compare_atomic_cell_for_merge(b, a) < 0)) {
-            return atomic_cell(a);
+            return atomic_cell(*counter_type, a);
        }
        return { };
    }

-    auto a_shards = counter_cell_view(a).shards();
-    auto b_shards = counter_cell_view(b).shards();
+ return with_linearized(a, [&] (counter_cell_view a_ccv) {
+  return with_linearized(b, [&] (counter_cell_view b_ccv) {
+    auto a_shards = a_ccv.shards();
+    auto b_shards = b_ccv.shards();

    auto a_it = a_shards.begin();
    auto a_end = a_shards.end();
@@ -244,18 +208,21 @@ stdx::optional<atomic_cell> counter_cell_view::difference(atomic_cell_view a, at
    if (!result.empty()) {
        diff = result.build(std::max(a.timestamp(), b.timestamp()));
    } else if (a.timestamp() > b.timestamp()) {
-        diff = atomic_cell::make_live(a.timestamp(), bytes_view());
+        diff = atomic_cell::make_live(*counter_type, a.timestamp(), bytes_view());
    }
    return diff;
+  });
+ });
 }


 void transform_counter_updates_to_shards(mutation& m, const mutation* current_state, uint64_t clock_offset) {
    // FIXME: allow current_state to be frozen_mutation

-    auto transform_new_row_to_shards = [clock_offset] (auto& cells) {
-        cells.for_each_cell([clock_offset] (auto, atomic_cell_or_collection& ac_o_c) {
-            auto acv = ac_o_c.as_atomic_cell();
+    auto transform_new_row_to_shards = [&s = *m.schema(), clock_offset] (column_kind kind, auto& cells) {
+        cells.for_each_cell([&] (column_id id, atomic_cell_or_collection& ac_o_c) {
+            auto& cdef = s.column_at(kind, id);
+            auto acv = ac_o_c.as_atomic_cell(cdef);
            if (!acv.is_live()) {
                return; // continue -- we are in lambda
            }
@@ -266,32 +233,35 @@ void transform_counter_updates_to_shards(mutation& m, const mutation* current_st
    };

    if (!current_state) {
-        transform_new_row_to_shards(m.partition().static_row());
+        transform_new_row_to_shards(column_kind::static_column, m.partition().static_row());
        for (auto& cr : m.partition().clustered_rows()) {
-            transform_new_row_to_shards(cr.row().cells());
+            transform_new_row_to_shards(column_kind::regular_column, cr.row().cells());
        }
        return;
    }

    clustering_key::less_compare cmp(*m.schema());

-    auto transform_row_to_shards = [clock_offset] (auto& transformee, auto& state) {
+    auto transform_row_to_shards = [&s = *m.schema(), clock_offset] (column_kind kind, auto& transformee, auto& state) {
        std::deque<std::pair<column_id, counter_shard>> shards;
        state.for_each_cell([&] (column_id id, const atomic_cell_or_collection& ac_o_c) {
-            auto acv = ac_o_c.as_atomic_cell();
+            auto& cdef = s.column_at(kind, id);
+            auto acv = ac_o_c.as_atomic_cell(cdef);
            if (!acv.is_live()) {
                return; // continue -- we are in lambda
            }
-            counter_cell_view ccv(acv);
+          counter_cell_view::with_linearized(acv, [&] (counter_cell_view ccv) {
            auto cs = ccv.local_shard();
            if (!cs) {
                return; // continue
            }
            shards.emplace_back(std::make_pair(id, counter_shard(*cs)));
+          });
        });

        transformee.for_each_cell([&] (column_id id, atomic_cell_or_collection& ac_o_c) {
-            auto acv = ac_o_c.as_atomic_cell();
+            auto& cdef = s.column_at(kind, id);
+            auto acv = ac_o_c.as_atomic_cell(cdef);
            if (!acv.is_live()) {
                return; // continue -- we are in lambda
            }
@@ -313,7 +283,7 @@ void transform_counter_updates_to_shards(mutation& m, const mutation* current_st
        });
    };

-    transform_row_to_shards(m.partition().static_row(), current_state->partition().static_row());
+    transform_row_to_shards(column_kind::static_column, m.partition().static_row(), current_state->partition().static_row());

    auto& cstate = current_state->partition();
    auto it = cstate.clustered_rows().begin();
@@ -323,10 +293,10 @@ void transform_counter_updates_to_shards(mutation& m, const mutation* current_st
            ++it;
        }
        if (it == end || cmp(cr.key(), it->key())) {
-            transform_new_row_to_shards(cr.row().cells());
+            transform_new_row_to_shards(column_kind::regular_column, cr.row().cells());
            continue;
        }

-        transform_row_to_shards(cr.row().cells(), it->row().cells());
+        transform_row_to_shards(column_kind::regular_column, cr.row().cells(), it->row().cells());
    }
 }
--- a/counters.hh
+++ b/counters.hh
@@ -79,7 +79,7 @@ static_assert(std::is_pod<counter_id>::value, "counter_id should be a POD type")

 std::ostream& operator<<(std::ostream& os, const counter_id& id);

-template<typename View>
+template<mutable_view is_mutable>
 class basic_counter_shard_view {
    enum class offset : unsigned {
        id = 0u,
@@ -88,7 +88,8 @@ class basic_counter_shard_view {
        total_size = unsigned(logical_clock) + sizeof(int64_t),
    };
 private:
-    typename View::pointer _base;
+    using pointer_type = std::conditional_t<is_mutable == mutable_view::no, const signed char*, signed char*>;
+    pointer_type _base;
 private:
    template<typename T>
    T read(offset off) const {
@@ -100,7 +101,7 @@ public:
    static constexpr auto size = size_t(offset::total_size);
 public:
    basic_counter_shard_view() = default;
-    explicit basic_counter_shard_view(typename View::pointer ptr) noexcept
+    explicit basic_counter_shard_view(pointer_type ptr) noexcept
        : _base(ptr) { }

    counter_id id() const { return read<counter_id>(offset::id); }
@@ -111,7 +112,7 @@ public:
        static constexpr size_t off = size_t(offset::value);
        static constexpr size_t size = size_t(offset::total_size) - off;

-        typename View::value_type tmp[size];
+        signed char tmp[size];
        std::copy_n(_base + off, size, tmp);
        std::copy_n(other._base + off, size, _base + off);
        std::copy_n(tmp, size, other._base + off);
@@ -138,7 +139,7 @@ public:
    };
 };

-using counter_shard_view = basic_counter_shard_view<bytes_view>;
+using counter_shard_view = basic_counter_shard_view<mutable_view::no>;

 std::ostream& operator<<(std::ostream& os, counter_shard_view csv);

@@ -198,7 +199,7 @@ public:
        return do_apply(other);
    }

-    static size_t serialized_size() {
+    static constexpr size_t serialized_size() {
        return counter_shard_view::size;
    }
    void serialize(bytes::iterator& out) const {
@@ -252,15 +253,33 @@ public:
    }

    atomic_cell build(api::timestamp_type timestamp) const {
-        return atomic_cell::make_live_from_serializer(timestamp, serialized_size(), [this] (bytes::iterator out) {
-            serialize(out);
-        });
+        // If we can assume that the counter shards never cross fragment boundaries
+        // the serialisation code gets much simpler.
+        static_assert(data::cell::maximum_external_chunk_length % counter_shard::serialized_size() == 0);
+
+        auto ac = atomic_cell::make_live_uninitialized(*counter_type, timestamp, serialized_size());
+
+        auto dst_it = ac.value().begin();
+        auto dst_current = *dst_it++;
+        for (auto&& cs : _shards) {
+            if (dst_current.empty()) {
+                dst_current = *dst_it++;
+            }
+            assert(!dst_current.empty());
+            auto value_dst = dst_current.data();
+            cs.serialize(value_dst);
+            dst_current.remove_prefix(counter_shard::serialized_size());
+        }
+        return ac;
    }

    static atomic_cell from_single_shard(api::timestamp_type timestamp, const counter_shard& cs) {
-        return atomic_cell::make_live_from_serializer(timestamp, counter_shard::serialized_size(), [&cs] (bytes::iterator out) {
-            cs.serialize(out);
-        });
+        // We don't really need to bother with fragmentation here.
+        static_assert(data::cell::maximum_external_chunk_length >= counter_shard::serialized_size());
+        auto ac = atomic_cell::make_live_uninitialized(*counter_type, timestamp, counter_shard::serialized_size());
+        auto dst = ac.value().first_fragment().begin();
+        cs.serialize(dst);
+        return ac;
    }

    class inserter_iterator : public std::iterator<std::output_iterator_tag, counter_shard> {
@@ -287,28 +306,32 @@ public:
 // <counter_id>   := <int64_t><int64_t>
 // <shard>        := <counter_id><int64_t:value><int64_t:logical_clock>
 // <counter_cell> := <shard>*
-template<typename View>
+template<mutable_view is_mutable>
 class basic_counter_cell_view {
 protected:
-    atomic_cell_base<View> _cell;
+    using linearized_value_view = std::conditional_t<is_mutable == mutable_view::no,
+                                                     bytes_view, bytes_mutable_view>;
+    using pointer_type = typename linearized_value_view::pointer;
+    basic_atomic_cell_view<is_mutable> _cell;
+    linearized_value_view _value;
 private:
-    class shard_iterator : public std::iterator<std::input_iterator_tag, basic_counter_shard_view<View>> {
-        typename View::pointer _current;
-        basic_counter_shard_view<View> _current_view;
+    class shard_iterator : public std::iterator<std::input_iterator_tag, basic_counter_shard_view<is_mutable>> {
+        pointer_type _current;
+        basic_counter_shard_view<is_mutable> _current_view;
    public:
        shard_iterator() = default;
-        shard_iterator(typename View::pointer ptr) noexcept
+        shard_iterator(pointer_type ptr) noexcept
            : _current(ptr), _current_view(ptr) { }

-        basic_counter_shard_view<View>& operator*() noexcept {
+        basic_counter_shard_view<is_mutable>& operator*() noexcept {
            return _current_view;
        }
-        basic_counter_shard_view<View>* operator->() noexcept {
+        basic_counter_shard_view<is_mutable>* operator->() noexcept {
            return &_current_view;
        }
        shard_iterator& operator++() noexcept {
            _current += counter_shard_view::size;
-            _current_view = basic_counter_shard_view<View>(_current);
+            _current_view = basic_counter_shard_view<is_mutable>(_current);
            return *this;
        }
        shard_iterator operator++(int) noexcept {
@@ -318,7 +341,7 @@ private:
        }
        shard_iterator& operator--() noexcept {
            _current -= counter_shard_view::size;
-            _current_view = basic_counter_shard_view<View>(_current);
+            _current_view = basic_counter_shard_view<is_mutable>(_current);
            return *this;
        }
        shard_iterator operator--(int) noexcept {
@@ -335,22 +358,23 @@ private:
    };
 public:
    boost::iterator_range<shard_iterator> shards() const {
-        auto bv = _cell.value();
-        auto begin = shard_iterator(bv.data());
-        auto end = shard_iterator(bv.data() + bv.size());
+        auto begin = shard_iterator(_value.data());
+        auto end = shard_iterator(_value.data() + _value.size());
        return boost::make_iterator_range(begin, end);
    }

    size_t shard_count() const {
-        return _cell.value().size() / counter_shard_view::size;
+        return _cell.value().size_bytes() / counter_shard_view::size;
    }
-public:
+protected:
    // ac must be a live counter cell
-    explicit basic_counter_cell_view(atomic_cell_base<View> ac) noexcept : _cell(ac) {
+    explicit basic_counter_cell_view(basic_atomic_cell_view<is_mutable> ac, linearized_value_view vv) noexcept
+        : _cell(ac), _value(vv)
+    {
        assert(_cell.is_live());
        assert(!_cell.is_counter_update());
    }
-
+public:
    api::timestamp_type timestamp() const { return _cell.timestamp(); }

    static data_type total_value_type() { return long_type; }
@@ -381,18 +405,22 @@ public:
    }
 };

-struct counter_cell_view : basic_counter_cell_view<bytes_view> {
+struct counter_cell_view : basic_counter_cell_view<mutable_view::no> {
    using basic_counter_cell_view::basic_counter_cell_view;

+    template<typename Function>
+    static decltype(auto) with_linearized(basic_atomic_cell_view<mutable_view::no> ac, Function&& fn) {
+        return ac.value().with_linearized([&] (bytes_view value_view) {
+            counter_cell_view ccv(ac, value_view);
+            return fn(ccv);
+        });
+    }
+
    // Returns counter shards in an order that is compatible with Scylla 1.7.4.
    std::vector<counter_shard> shards_compatible_with_1_7_4() const;

    // Reversibly applies two counter cells, at least one of them must be live.
-    // Returns true iff dst was modified.
-    static bool apply_reversibly(atomic_cell_or_collection& dst, atomic_cell_or_collection& src);
-
-    // Reverts apply performed by apply_reversible().
-    static void revert_apply(atomic_cell_or_collection& dst, atomic_cell_or_collection& src);
+    static void apply(const column_definition& cdef, atomic_cell_or_collection& dst, atomic_cell_or_collection& src);

    // Computes a counter cell containing minimal amount of data which, when
    // applied to 'b' returns the same cell as 'a' and 'b' applied together.
@@ -401,9 +429,15 @@ struct counter_cell_view : basic_counter_cell_view<bytes_view> {
    friend std::ostream& operator<<(std::ostream& os, counter_cell_view ccv);
 };

-struct counter_cell_mutable_view : basic_counter_cell_view<bytes_mutable_view> {
+struct counter_cell_mutable_view : basic_counter_cell_view<mutable_view::yes> {
    using basic_counter_cell_view::basic_counter_cell_view;

+    explicit counter_cell_mutable_view(atomic_cell_mutable_view ac) noexcept
+        : basic_counter_cell_view<mutable_view::yes>(ac, ac.value().first_fragment())
+    {
+        assert(!ac.value().is_fragmented());
+    }
+
    void set_timestamp(api::timestamp_type ts) { _cell.set_timestamp(ts); }
 };

--- a/cql3/Cql.g
+++ b/cql3/Cql.g
@@ -373,7 +373,7 @@ useStatement returns [::shared_ptr<raw::use_statement> stmt]
    ;

 /**
- * SELECT <expression>
+ * SELECT [JSON] <expression>
 * FROM <CF>
 * WHERE KEY = "key1" AND COL > 1 AND COL < 100
 * LIMIT <NUMBER>;
@@ -384,9 +384,12 @@ selectStatement returns [shared_ptr<raw::select_statement> expr]
        ::shared_ptr<cql3::term::raw> limit;
        raw::select_statement::parameters::orderings_type orderings;
        bool allow_filtering = false;
+        bool is_json = false;
    }
-    : K_SELECT ( ( K_DISTINCT { is_distinct = true; } )?
-                 sclause=selectClause
+    : K_SELECT (
+                ( K_JSON { is_json = true; } )?
+                ( K_DISTINCT { is_distinct = true; } )?
+                sclause=selectClause
               )
      K_FROM cf=columnFamilyName
      ( K_WHERE wclause=whereClause )?
@@ -394,7 +397,7 @@ selectStatement returns [shared_ptr<raw::select_statement> expr]
      ( K_LIMIT rows=intValue { limit = rows; } )?
      ( K_ALLOW K_FILTERING  { allow_filtering = true; } )?
      {
-          auto params = ::make_shared<raw::select_statement::parameters>(std::move(orderings), is_distinct, allow_filtering);
+          auto params = ::make_shared<raw::select_statement::parameters>(std::move(orderings), is_distinct, allow_filtering, is_json);
          $expr = ::make_shared<raw::select_statement>(std::move(cf), std::move(params),
            std::move(sclause), std::move(wclause), std::move(limit));
      }
@@ -448,33 +451,51 @@ orderByClause[raw::select_statement::parameters::orderings_type& orderings]
    : c=cident (K_ASC | K_DESC { reversed = true; })? { orderings.emplace_back(c, reversed); }
    ;

+jsonValue returns [::shared_ptr<cql3::term::raw> value]
+    :
+    | s=STRING_LITERAL { $value = cql3::constants::literal::string(sstring{$s.text}); }
+    | ':' id=ident     { $value = new_bind_variables(id); }
+    | QMARK            { $value = new_bind_variables(shared_ptr<cql3::column_identifier>{}); }
+    ;
+
 /**
 * INSERT INTO <CF> (<column>, <column>, <column>, ...)
 * VALUES (<value>, <value>, <value>, ...)
 * USING TIMESTAMP <long>;
 *
 */
-insertStatement returns [::shared_ptr<raw::insert_statement> expr]
+insertStatement returns [::shared_ptr<raw::modification_statement> expr]
    @init {
        auto attrs = ::make_shared<cql3::attributes::raw>();
        std::vector<::shared_ptr<cql3::column_identifier::raw>> column_names;
        std::vector<::shared_ptr<cql3::term::raw>> values;
        bool if_not_exists = false;
+        ::shared_ptr<cql3::term::raw> json_value;
    }
    : K_INSERT K_INTO cf=columnFamilyName
-          '(' c1=cident { column_names.push_back(c1); }  ( ',' cn=cident { column_names.push_back(cn); } )* ')'
-        K_VALUES
-          '(' v1=term { values.push_back(v1); } ( ',' vn=term { values.push_back(vn); } )* ')'
-
-        ( K_IF K_NOT K_EXISTS { if_not_exists = true; } )?
-        ( usingClause[attrs] )?
-      {
-          $expr = ::make_shared<raw::insert_statement>(std::move(cf),
-                                                   std::move(attrs),
-                                                   std::move(column_names),
-                                                   std::move(values),
-                                                   if_not_exists);
-      }
+        ('(' c1=cident { column_names.push_back(c1); }  ( ',' cn=cident { column_names.push_back(cn); } )* ')'
+            K_VALUES
+            '(' v1=term { values.push_back(v1); } ( ',' vn=term { values.push_back(vn); } )* ')'
+            ( K_IF K_NOT K_EXISTS { if_not_exists = true; } )?
+            ( usingClause[attrs] )?
+              {
+              $expr = ::make_shared<raw::insert_statement>(std::move(cf),
+                                                       std::move(attrs),
+                                                       std::move(column_names),
+                                                       std::move(values),
+                                                       if_not_exists);
+              }
+        | K_JSON
+          json_token=jsonValue { json_value = $json_token.value; }
+            ( K_IF K_NOT K_EXISTS { if_not_exists = true; } )?
+            ( usingClause[attrs] )?
+              {
+              $expr = ::make_shared<raw::insert_json_statement>(std::move(cf),
+                                                       std::move(attrs),
+                                                       std::move(json_value),
+                                                       if_not_exists);
+              }
+        )
    ;

 usingClause[::shared_ptr<cql3::attributes::raw> attrs]
@@ -1650,6 +1671,7 @@ basic_unreserved_keyword returns [sstring str]
        | K_LANGUAGE
        | K_NON
        | K_DETERMINISTIC
+        | K_JSON
        ) { $str = $k.text; }
    ;

@@ -1786,6 +1808,7 @@ K_NON:         N O N;
 K_OR:          O R;
 K_REPLACE:     R E P L A C E;
 K_DETERMINISTIC: D E T E R M I N I S T I C;
+K_JSON:        J S O N;

 K_SCYLLA_TIMEUUID_LIST_INDEX: S C Y L L A '_' T I M E U U I D '_' L I S T '_' I N D E X;
 K_SCYLLA_COUNTER_SHARD_LIST: S C Y L L A '_' C O U N T E R '_' S H A R D '_' L I S T; 
--- a/cql3/authorized_prepared_statements_cache.hh
+++ b/cql3/authorized_prepared_statements_cache.hh
@@ -0,0 +1,187 @@
+/*
+ * Copyright (C) 2018 ScyllaDB
+ */
+
+/*
+ * This file is part of Scylla.
+ *
+ * Scylla is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Scylla is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include "cql3/prepared_statements_cache.hh"
+
+namespace cql3 {
+
+struct authorized_prepared_statements_cache_size {
+    size_t operator()(const statements::prepared_statement::checked_weak_ptr& val) {
+        // TODO: improve the size approximation - most of the entry is occupied by the key here.
+        return 100;
+    }
+};
+
+class authorized_prepared_statements_cache_key {
+public:
+    using cache_key_type = std::pair<auth::authenticated_user, typename cql3::prepared_cache_key_type::cache_key_type>;
+private:
+    cache_key_type _key;
+
+public:
+    authorized_prepared_statements_cache_key(auth::authenticated_user user, cql3::prepared_cache_key_type prepared_cache_key)
+        : _key(std::move(user), std::move(prepared_cache_key.key())) {}
+
+    cache_key_type& key() { return _key; }
+
+    const cache_key_type& key() const { return _key; }
+
+    bool operator==(const authorized_prepared_statements_cache_key& other) const {
+        return _key == other._key;
+    }
+
+    bool operator!=(const authorized_prepared_statements_cache_key& other) const {
+        return !(*this == other);
+    }
+
+    static size_t hash(const auth::authenticated_user& user, const cql3::prepared_cache_key_type::cache_key_type& prep_cache_key) {
+        return utils::hash_combine(std::hash<auth::authenticated_user>()(user), utils::tuple_hash()(prep_cache_key));
+    }
+};
+
+/// \class authorized_prepared_statements_cache
+/// \brief A cache of previously authorized statements.
+///
+/// Entries are inserted every time a new statement is authorized.
+/// Entries are evicted in any of the following cases:
+///    - When the corresponding prepared statement is not valid anymore.
+///    - Periodically, with the same period as the permission cache is refreshed.
+///    - If the corresponding entry hasn't been used for \ref entry_expiry.
+class authorized_prepared_statements_cache {
+public:
+    struct stats {
+        uint64_t authorized_prepared_statements_cache_evictions = 0;
+    };
+
+    static stats& shard_stats() {
+        static thread_local stats _stats;
+        return _stats;
+    }
+
+    struct authorized_prepared_statements_cache_stats_updater {
+        static void inc_hits() noexcept {}
+        static void inc_misses() noexcept {}
+        static void inc_blocks() noexcept {}
+        static void inc_evictions() noexcept {
+            ++shard_stats().authorized_prepared_statements_cache_evictions;
+        }
+    };
+
+private:
+    using cache_key_type = authorized_prepared_statements_cache_key;
+    using checked_weak_ptr = typename statements::prepared_statement::checked_weak_ptr;
+    using cache_type = utils::loading_cache<cache_key_type,
+                                            checked_weak_ptr,
+                                            utils::loading_cache_reload_enabled::yes,
+                                            authorized_prepared_statements_cache_size,
+                                            std::hash<cache_key_type>,
+                                            std::equal_to<cache_key_type>,
+                                            authorized_prepared_statements_cache_stats_updater>;
+
+public:
+    using key_type = cache_key_type;
+    using value_type = checked_weak_ptr;
+    using entry_is_too_big = typename cache_type::entry_is_too_big;
+    using iterator = typename cache_type::iterator;
+
+private:
+    cache_type _cache;
+    logging::logger& _logger;
+
+public:
+    // Choose the memory budget such that would allow us ~4K entries when a shard gets 1GB of RAM
+    authorized_prepared_statements_cache(std::chrono::milliseconds entry_expiration, std::chrono::milliseconds entry_refresh, size_t cache_size, logging::logger& logger)
+        : _cache(cache_size, entry_expiration, entry_refresh, logger, [this] (const key_type& k) {
+            _cache.remove(k);
+            return make_ready_future<value_type>();
+        })
+        , _logger(logger)
+    {}
+
+    future<> insert(auth::authenticated_user user, cql3::prepared_cache_key_type prep_cache_key, value_type v) noexcept {
+        return _cache.get_ptr(key_type(std::move(user), std::move(prep_cache_key)), [v = std::move(v)] (const cache_key_type&) mutable {
+            return make_ready_future<value_type>(std::move(v));
+        }).discard_result();
+    }
+
+    iterator find(const auth::authenticated_user& user, const cql3::prepared_cache_key_type& prep_cache_key) {
+        struct key_view {
+            const auth::authenticated_user& user_ref;
+            const cql3::prepared_cache_key_type& prep_cache_key_ref;
+        };
+
+        struct hasher {
+            size_t operator()(const key_view& kv) {
+                return cql3::authorized_prepared_statements_cache_key::hash(kv.user_ref, kv.prep_cache_key_ref.key());
+            }
+        };
+
+        struct equal {
+            bool operator()(const key_type& k1, const key_view& k2) {
+                return k1.key().first == k2.user_ref && k1.key().second == k2.prep_cache_key_ref.key();
+            }
+
+            bool operator()(const key_view& k2, const key_type& k1) {
+                return operator()(k1, k2);
+            }
+        };
+
+        return _cache.find(key_view{user, prep_cache_key}, hasher(), equal());
+    }
+
+    iterator end() {
+        return _cache.end();
+    }
+
+    void remove(const auth::authenticated_user& user, const cql3::prepared_cache_key_type& prep_cache_key) {
+        iterator it = find(user, prep_cache_key);
+        _cache.remove(it);
+    }
+
+    size_t size() const {
+        return _cache.size();
+    }
+
+    size_t memory_footprint() const {
+        return _cache.memory_footprint();
+    }
+
+    future<> stop() {
+        return _cache.stop();
+    }
+};
+
+}
+
+namespace std {
+template <>
+struct hash<cql3::authorized_prepared_statements_cache_key> final {
+    size_t operator()(const cql3::authorized_prepared_statements_cache_key& k) const {
+        return cql3::authorized_prepared_statements_cache_key::hash(k.key().first, k.key().second);
+    }
+};
+
+inline std::ostream& operator<<(std::ostream& out, const cql3::authorized_prepared_statements_cache_key& k) {
+    return out << "{ " << k.key().first << ", " << k.key().second << " }";
+}
+}
--- a/cql3/column_identifier.cc
+++ b/cql3/column_identifier.cc
@@ -22,6 +22,7 @@
 #include "cql3/column_identifier.hh"
 #include "exceptions/exceptions.hh"
 #include "cql3/selection/simple_selector.hh"
+#include "cql3/util.hh"

 #include <regex>

@@ -62,14 +63,11 @@ sstring column_identifier::to_string() const {
 }

 sstring column_identifier::to_cql_string() const {
-    static const std::regex unquoted_identifier_re("[a-z][a-z0-9_]*");
-    if (std::regex_match(_text.begin(), _text.end(), unquoted_identifier_re)) {
-        return _text;
-    }
-    static const std::regex double_quote_re("\"");
-    std::string result = _text;
-    std::regex_replace(result, double_quote_re, "\"\"");
-    return '"' + result + '"';
+    return util::maybe_quote(_text);
+}
+
+sstring column_identifier::raw::to_cql_string() const {
+    return util::maybe_quote(_text);
 }

 column_identifier::raw::raw(sstring raw_text, bool keep_case)
--- a/cql3/column_identifier.hh
+++ b/cql3/column_identifier.hh
@@ -123,6 +123,7 @@ public:
    bool operator!=(const raw& other) const;

    virtual sstring to_string() const;
+    sstring to_cql_string() const;

    friend std::hash<column_identifier::raw>;
    friend std::ostream& operator<<(std::ostream& out, const column_identifier::raw& id);
--- a/cql3/constants.hh
+++ b/cql3/constants.hh
@@ -85,8 +85,8 @@ public:
            virtual ::shared_ptr<terminal> bind(const query_options& options) override { return {}; }
            virtual sstring to_string() const override { return "null"; }
        };
-        static thread_local const ::shared_ptr<terminal> NULL_VALUE;
    public:
+        static thread_local const ::shared_ptr<terminal> NULL_VALUE;
        virtual ::shared_ptr<term> prepare(database& db, const sstring& keyspace, ::shared_ptr<column_specification> receiver) override {
            if (!is_assignable(test_assignment(db, keyspace, receiver))) {
                throw exceptions::invalid_request_exception("Invalid null value for counter increment/decrement");
@@ -203,10 +203,14 @@ public:

        virtual void execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) override {
            auto value = _t->bind_and_get(params._options);
+            execute(m, prefix, params, column, std::move(value));
+        }
+
+        static void execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params, const column_definition& column, cql3::raw_value_view value) {
            if (value.is_null()) {
                m.set_cell(prefix, column, std::move(make_dead_cell(params)));
            } else if (value.is_value()) {
-                m.set_cell(prefix, column, std::move(make_cell(*value, params)));
+                m.set_cell(prefix, column, std::move(make_cell(*column.type, *value, params)));
            }
        }
    };
--- a/cql3/cql3_type.cc
+++ b/cql3/cql3_type.cc
@@ -395,18 +395,15 @@ operator<<(std::ostream& os, const cql3_type::raw& r) {

 namespace util {

-sstring maybe_quote(const sstring& s) {
-    static const std::regex unquoted("\\w*");
-    static const std::regex double_quote("\"");
-
-    if (std::regex_match(s.begin(), s.end(), unquoted)) {
-        return s;
+sstring maybe_quote(const sstring& identifier) {
+    static const std::regex unquoted_identifier_re("[a-z][a-z0-9_]*");
+    if (std::regex_match(identifier.begin(), identifier.end(), unquoted_identifier_re)) {
+        return identifier;
    }
-    std::ostringstream ss;
-    ss << "\"";
-    std::regex_replace(std::ostreambuf_iterator<char>(ss), s.begin(), s.end(), double_quote, "\"\"");
-    ss << "\"";
-    return ss.str();
+    static const std::regex double_quote_re("\"");
+    std::string result = identifier;
+    std::regex_replace(result, double_quote_re, "\"\"");
+    return '"' + result + '"';
 }

 }
--- a/cql3/cql_statement.hh
+++ b/cql3/cql_statement.hh
@@ -45,6 +45,7 @@
 #include "service/query_state.hh"
 #include "service/storage_proxy.hh"
 #include "cql3/query_options.hh"
+#include "timeout_config.hh"

 namespace cql_transport {

@@ -62,10 +63,15 @@ class metadata;
 shared_ptr<const metadata> make_empty_metadata();

 class cql_statement {
+    timeout_config_selector _timeout_config_selector;
 public:
+    explicit cql_statement(timeout_config_selector timeout_selector) : _timeout_config_selector(timeout_selector) {}
+
    virtual ~cql_statement()
    { }

+    timeout_config_selector get_timeout_config_selector() const { return _timeout_config_selector; }
+
    virtual uint32_t get_bound_terms() = 0;

    /**
@@ -81,7 +87,7 @@ public:
     *
     * @param state the current client state
     */
-    virtual void validate(distributed<service::storage_proxy>& proxy, const service::client_state& state) = 0;
+    virtual void validate(service::storage_proxy& proxy, const service::client_state& state) = 0;

    /**
     * Execute the statement and return the resulting result or null if there is no result.
@@ -90,15 +96,7 @@ public:
     * @param options options for this query (consistency, variables, pageSize, ...)
     */
    virtual future<::shared_ptr<cql_transport::messages::result_message>>
-        execute(distributed<service::storage_proxy>& proxy, service::query_state& state, const query_options& options) = 0;
-
-    /**
-     * Variant of execute used for internal query against the system tables, and thus only query the local node = 0.
-     *
-     * @param state the current query state
-     */
-    virtual future<::shared_ptr<cql_transport::messages::result_message>>
-        execute_internal(distributed<service::storage_proxy>& proxy, service::query_state& state, const query_options& options) = 0;
+        execute(service::storage_proxy& proxy, service::query_state& state, const query_options& options) = 0;

    virtual bool uses_function(const sstring& ks_name, const sstring& function_name) const = 0;

@@ -111,6 +109,7 @@ public:

 class cql_statement_no_metadata : public cql_statement {
 public:
+    using cql_statement::cql_statement;
    virtual shared_ptr<const metadata> get_result_metadata() const override {
        return make_empty_metadata();
    }
--- a/cql3/error_collector.hh
+++ b/cql3/error_collector.hh
@@ -67,6 +67,12 @@ class error_collector : public error_listener<RecognizerType, ExceptionBaseType>
     */
    const sstring_view _query;

+    /**
+     * An empty bitset to be used as a workaround for AntLR null dereference
+     * bug.
+     */
+    static typename ExceptionBaseType::BitsetListType _empty_bit_list;
+
 public:

    /**
@@ -144,6 +150,14 @@ private:
            break;
        }
        default:
+            // AntLR Exception class has a bug of dereferencing a null
+            // pointer in the displayRecognitionError. The following
+            // if statement makes sure it will not be null before the
+            // call to that function (displayRecognitionError).
+            // bug reference: https://github.com/antlr/antlr3/issues/191
+            if (!ex->get_expectingSet()) {
+                ex->set_expectingSet(&_empty_bit_list);
+            }
            ex->displayRecognitionError(token_names, msg);
        }
        return msg.str();
@@ -345,4 +359,8 @@ private:
 #endif
 };

+template<typename RecognizerType, typename TokenType, typename ExceptionBaseType>
+typename ExceptionBaseType::BitsetListType
+error_collector<RecognizerType,TokenType,ExceptionBaseType>::_empty_bit_list = typename ExceptionBaseType::BitsetListType();
+
 }
--- a/cql3/functions/abstract_function.hh
+++ b/cql3/functions/abstract_function.hh
@@ -42,6 +42,7 @@
 #pragma once

 #include "types.hh"
+#include "cql3/cql3_type.hh"
 #include <vector>
 #include <iosfwd>
 #include <boost/functional/hash.hpp>
@@ -105,9 +106,9 @@ abstract_function::print(std::ostream& os) const {
        if (i > 0) {
            os << ", ";
        }
-        os << _arg_types[i]->name(); // FIXME: asCQL3Type()
+        os << _arg_types[i]->as_cql3_type()->to_string();
    }
-    os << ") -> " << _return_type->name(); // FIXME: asCQL3Type()
+    os << ") -> " << _return_type->as_cql3_type()->to_string();
 }

 }
--- a/cql3/functions/functions.cc
+++ b/cql3/functions/functions.cc
@@ -20,6 +20,7 @@
 */

 #include "functions.hh"
+
 #include "function_call.hh"
 #include "token_fct.hh"
 #include "cql3/maps.hh"
@@ -41,11 +42,22 @@ functions::init() {
    declare(time_uuid_fcts::make_min_timeuuid_fct());
    declare(time_uuid_fcts::make_max_timeuuid_fct());
    declare(time_uuid_fcts::make_date_of_fct());
-    declare(time_uuid_fcts::make_unix_timestamp_of_fcf());
+    declare(time_uuid_fcts::make_unix_timestamp_of_fct());
+    declare(time_uuid_fcts::make_currenttimestamp_fct());
+    declare(time_uuid_fcts::make_currentdate_fct());
+    declare(time_uuid_fcts::make_currenttime_fct());
+    declare(time_uuid_fcts::make_currenttimeuuid_fct());
+    declare(time_uuid_fcts::make_timeuuidtodate_fct());
+    declare(time_uuid_fcts::make_timestamptodate_fct());
+    declare(time_uuid_fcts::make_timeuuidtotimestamp_fct());
+    declare(time_uuid_fcts::make_datetotimestamp_fct());
+    declare(time_uuid_fcts::make_timeuuidtounixtimestamp_fct());
+    declare(time_uuid_fcts::make_timestamptounixtimestamp_fct());
+    declare(time_uuid_fcts::make_datetounixtimestamp_fct());
    declare(make_uuid_fct());

    for (auto&& type : cql3_type::values()) {
-        // Note: because text and varchar ends up being synonimous, our automatic makeToBlobFunction doesn't work
+        // Note: because text and varchar ends up being synonymous, our automatic makeToBlobFunction doesn't work
        // for varchar, so we special case it below. We also skip blob for obvious reasons.
        if (type == cql3_type::varchar || type == cql3_type::blob) {
            continue;
@@ -95,15 +107,22 @@ functions::init() {
    declare(aggregate_fcts::make_max_function<sstring>());
    declare(aggregate_fcts::make_min_function<sstring>());

+    declare(aggregate_fcts::make_count_function<simple_date_native_type>());
    declare(aggregate_fcts::make_max_function<simple_date_native_type>());
    declare(aggregate_fcts::make_min_function<simple_date_native_type>());

+    declare(aggregate_fcts::make_count_function<timestamp_native_type>());
    declare(aggregate_fcts::make_max_function<timestamp_native_type>());
    declare(aggregate_fcts::make_min_function<timestamp_native_type>());

+    declare(aggregate_fcts::make_count_function<timeuuid_native_type>());
    declare(aggregate_fcts::make_max_function<timeuuid_native_type>());
    declare(aggregate_fcts::make_min_function<timeuuid_native_type>());

+    declare(aggregate_fcts::make_count_function<utils::UUID>());
+    declare(aggregate_fcts::make_max_function<utils::UUID>());
+    declare(aggregate_fcts::make_min_function<utils::UUID>());
+
    //FIXME:
    //declare(aggregate_fcts::make_count_function<bytes>());
    //declare(aggregate_fcts::make_max_function<bytes>());
@@ -153,23 +172,73 @@ functions::get_overload_count(const function_name& name) {
    return _declared.count(name);
 }

+inline
+shared_ptr<function>
+make_to_json_function(data_type t) {
+    return make_native_scalar_function<true>("tojson", utf8_type, {t},
+            [t](cql_serialization_format sf, const std::vector<bytes_opt>& parameters) -> bytes_opt {
+        return utf8_type->decompose(t->to_json_string(parameters[0]));
+    });
+}
+
+inline
+shared_ptr<function>
+make_from_json_function(database& db, const sstring& keyspace, data_type t) {
+    return make_native_scalar_function<true>("fromjson", t, {utf8_type},
+            [&db, &keyspace, t](cql_serialization_format sf, const std::vector<bytes_opt>& parameters) -> bytes_opt {
+        Json::Value json_value = json::to_json_value(utf8_type->to_string(parameters[0].value()));
+        bytes_opt parsed_json_value;
+        if (!json_value.isNull()) {
+            parsed_json_value.emplace(t->from_json_object(json_value, sf));
+        }
+        return std::move(parsed_json_value);
+    });
+}
+
 shared_ptr<function>
 functions::get(database& db,
        const sstring& keyspace,
        const function_name& name,
        const std::vector<shared_ptr<assignment_testable>>& provided_args,
        const sstring& receiver_ks,
-        const sstring& receiver_cf) {
+        const sstring& receiver_cf,
+        shared_ptr<column_specification> receiver) {

    static const function_name TOKEN_FUNCTION_NAME = function_name::native_function("token");
+    static const function_name TO_JSON_FUNCTION_NAME = function_name::native_function("tojson");
+    static const function_name FROM_JSON_FUNCTION_NAME = function_name::native_function("fromjson");

    if (name.has_keyspace()
-        ? name == TOKEN_FUNCTION_NAME
-        : name.name == TOKEN_FUNCTION_NAME.name)
-    {
+                ? name == TOKEN_FUNCTION_NAME
+                : name.name == TOKEN_FUNCTION_NAME.name) {
        return ::make_shared<token_fct>(db.find_schema(receiver_ks, receiver_cf));
    }

+    if (name.has_keyspace()
+                ? name == TO_JSON_FUNCTION_NAME
+                : name.name == TO_JSON_FUNCTION_NAME.name) {
+        if (provided_args.size() != 1) {
+            throw exceptions::invalid_request_exception("toJson() accepts 1 argument only");
+        }
+        selection::selector *sp = dynamic_cast<selection::selector *>(provided_args[0].get());
+        if (!sp) {
+            throw exceptions::invalid_request_exception("toJson() is only valid in SELECT clause");
+        }
+        return make_to_json_function(sp->get_type());
+    }
+
+    if (name.has_keyspace()
+                ? name == FROM_JSON_FUNCTION_NAME
+                : name.name == FROM_JSON_FUNCTION_NAME.name) {
+        if (provided_args.size() != 1) {
+            throw exceptions::invalid_request_exception("fromJson() accepts 1 argument only");
+        }
+        if (!receiver) {
+            throw exceptions::invalid_request_exception("fromJson() can only be called if receiver type is known");
+        }
+        return make_from_json_function(db, keyspace, receiver->type);
+    }
+
    std::vector<shared_ptr<function>> candidates;
    auto&& add_declared = [&] (function_name fn) {
        auto&& fns = _declared.equal_range(fn);
@@ -414,7 +483,7 @@ function_call::raw::prepare(database& db, const sstring& keyspace, ::shared_ptr<
            [] (auto&& x) -> shared_ptr<assignment_testable> {
        return x;
    });
-    auto&& fun = functions::functions::get(db, keyspace, _name, args, receiver->ks_name, receiver->cf_name);
+    auto&& fun = functions::functions::get(db, keyspace, _name, args, receiver->ks_name, receiver->cf_name, receiver);
    if (!fun) {
        throw exceptions::invalid_request_exception(sprint("Unknown function %s called", _name));
    }
@@ -478,7 +547,7 @@ function_call::raw::test_assignment(database& db, const sstring& keyspace, share
    // of another, existing, function. In that case, we return true here because we'll throw a proper exception
    // later with a more helpful error message that if we were to return false here.
    try {
-        auto&& fun = functions::get(db, keyspace, _name, _terms, receiver->ks_name, receiver->cf_name);
+        auto&& fun = functions::get(db, keyspace, _name, _terms, receiver->ks_name, receiver->cf_name, receiver);
        if (fun && receiver->type->equals(fun->return_type())) {
            return assignment_testable::test_result::EXACT_MATCH;
        } else if (!fun || receiver->type->is_value_compatible_with(*fun->return_type())) {
--- a/cql3/functions/functions.hh
+++ b/cql3/functions/functions.hh
@@ -80,16 +80,18 @@ public:
                                    const function_name& name,
                                    const std::vector<shared_ptr<assignment_testable>>& provided_args,
                                    const sstring& receiver_ks,
-                                    const sstring& receiver_cf);
+                                    const sstring& receiver_cf,
+                                    ::shared_ptr<column_specification> receiver = nullptr);
    template <typename AssignmentTestablePtrRange>
    static shared_ptr<function> get(database& db,
                                    const sstring& keyspace,
                                    const function_name& name,
                                    AssignmentTestablePtrRange&& provided_args,
                                    const sstring& receiver_ks,
-                                    const sstring& receiver_cf) {
+                                    const sstring& receiver_cf,
+                                    ::shared_ptr<column_specification> receiver = nullptr) {
        const std::vector<shared_ptr<assignment_testable>> args(std::begin(provided_args), std::end(provided_args));
-        return get(db, keyspace, name, args, receiver_ks, receiver_cf);
+        return get(db, keyspace, name, args, receiver_ks, receiver_cf, receiver);
    }
    static std::vector<shared_ptr<function>> find(const function_name& name);
    static shared_ptr<function> find(const function_name& name, const std::vector<data_type>& arg_types);
--- a/cql3/functions/time_uuid_fcts.hh
+++ b/cql3/functions/time_uuid_fcts.hh
@@ -117,7 +117,7 @@ make_date_of_fct() {

 inline
 shared_ptr<function>
-make_unix_timestamp_of_fcf() {
+make_unix_timestamp_of_fct() {
    return make_native_scalar_function<true>("unixtimestampof", long_type, { timeuuid_type },
            [] (cql_serialization_format sf, const std::vector<bytes_opt>& values) -> bytes_opt {
        using namespace utils;
@@ -129,6 +129,163 @@ make_unix_timestamp_of_fcf() {
    });
 }

+inline shared_ptr<function>
+make_currenttimestamp_fct() {
+    return make_native_scalar_function<true>("currenttimestamp", timestamp_type, {},
+            [] (cql_serialization_format sf, const std::vector<bytes_opt>& values) -> bytes_opt {
+        return {timestamp_type->decompose(timestamp_native_type{db_clock::now()})};
+    });
+}
+
+inline shared_ptr<function>
+make_currenttime_fct() {
+    return make_native_scalar_function<true>("currenttime", time_type, {},
+            [] (cql_serialization_format sf, const std::vector<bytes_opt>& values) -> bytes_opt {
+        constexpr int64_t milliseconds_in_day = 3600 * 24 * 1000;
+        int64_t milliseconds_since_epoch = std::chrono::duration_cast<std::chrono::milliseconds>(db_clock::now().time_since_epoch()).count();
+        int64_t nanoseconds_today = (milliseconds_since_epoch % milliseconds_in_day) * 1000 * 1000;
+        return {time_type->decompose(time_native_type{nanoseconds_today})};
+    });
+}
+
+inline shared_ptr<function>
+make_currentdate_fct() {
+    return make_native_scalar_function<true>("currentdate", simple_date_type, {},
+            [] (cql_serialization_format sf, const std::vector<bytes_opt>& values) -> bytes_opt {
+        auto to_simple_date = get_castas_fctn(simple_date_type, timestamp_type);
+        return {simple_date_type->decompose(to_simple_date(timestamp_native_type{db_clock::now()}))};
+    });
+}
+
+inline
+shared_ptr<function>
+make_currenttimeuuid_fct() {
+    return make_native_scalar_function<true>("currenttimeuuid", timeuuid_type, {},
+            [] (cql_serialization_format sf, const std::vector<bytes_opt>& values) -> bytes_opt {
+        return {timeuuid_type->decompose(timeuuid_native_type{utils::UUID_gen::get_time_UUID()})};
+    });
+}
+
+inline
+shared_ptr<function>
+make_timeuuidtodate_fct() {
+    return make_native_scalar_function<true>("todate", simple_date_type, { timeuuid_type },
+            [] (cql_serialization_format sf, const std::vector<bytes_opt>& values) -> bytes_opt {
+        using namespace utils;
+        auto& bb = values[0];
+        if (!bb) {
+            return {};
+        }
+        auto ts = db_clock::time_point(db_clock::duration(UUID_gen::unix_timestamp(UUID_gen::get_UUID(*bb))));
+        auto to_simple_date = get_castas_fctn(simple_date_type, timestamp_type);
+        return {simple_date_type->decompose(to_simple_date(ts))};
+    });
+}
+
+inline
+shared_ptr<function>
+make_timestamptodate_fct() {
+    return make_native_scalar_function<true>("todate", simple_date_type, { timestamp_type },
+            [] (cql_serialization_format sf, const std::vector<bytes_opt>& values) -> bytes_opt {
+        using namespace utils;
+        auto& bb = values[0];
+        if (!bb) {
+            return {};
+        }
+        auto ts_obj = timestamp_type->deserialize(*bb);
+        if (ts_obj.is_null()) {
+            return {};
+        }
+        auto to_simple_date = get_castas_fctn(simple_date_type, timestamp_type);
+        return {simple_date_type->decompose(to_simple_date(ts_obj))};
+    });
+}
+
+inline
+shared_ptr<function>
+make_timeuuidtotimestamp_fct() {
+    return make_native_scalar_function<true>("totimestamp", timestamp_type, { timeuuid_type },
+            [] (cql_serialization_format sf, const std::vector<bytes_opt>& values) -> bytes_opt {
+        using namespace utils;
+        auto& bb = values[0];
+        if (!bb) {
+            return {};
+        }
+        auto ts = db_clock::time_point(db_clock::duration(UUID_gen::unix_timestamp(UUID_gen::get_UUID(*bb))));
+        return {timestamp_type->decompose(ts)};
+    });
+}
+
+inline
+shared_ptr<function>
+make_datetotimestamp_fct() {
+    return make_native_scalar_function<true>("totimestamp", timestamp_type, { simple_date_type },
+            [] (cql_serialization_format sf, const std::vector<bytes_opt>& values) -> bytes_opt {
+        using namespace utils;
+        auto& bb = values[0];
+        if (!bb) {
+            return {};
+        }
+        auto simple_date_obj = simple_date_type->deserialize(*bb);
+        if (simple_date_obj.is_null()) {
+            return {};
+        }
+        auto from_simple_date = get_castas_fctn(timestamp_type, simple_date_type);
+        return {timestamp_type->decompose(from_simple_date(simple_date_obj))};
+    });
+}
+
+inline
+shared_ptr<function>
+make_timeuuidtounixtimestamp_fct() {
+    return make_native_scalar_function<true>("tounixtimestamp", long_type, { timeuuid_type },
+            [] (cql_serialization_format sf, const std::vector<bytes_opt>& values) -> bytes_opt {
+        using namespace utils;
+        auto& bb = values[0];
+        if (!bb) {
+            return {};
+        }
+        return {long_type->decompose(UUID_gen::unix_timestamp(UUID_gen::get_UUID(*bb)))};
+    });
+}
+
+inline
+shared_ptr<function>
+make_timestamptounixtimestamp_fct() {
+    return make_native_scalar_function<true>("tounixtimestamp", long_type, { timestamp_type },
+            [] (cql_serialization_format sf, const std::vector<bytes_opt>& values) -> bytes_opt {
+        using namespace utils;
+        auto& bb = values[0];
+        if (!bb) {
+            return {};
+        }
+        auto ts_obj = timestamp_type->deserialize(*bb);
+        if (ts_obj.is_null()) {
+            return {};
+        }
+        return {long_type->decompose(ts_obj)};
+    });
+}
+
+inline
+shared_ptr<function>
+make_datetounixtimestamp_fct() {
+    return make_native_scalar_function<true>("tounixtimestamp", long_type, { simple_date_type },
+            [] (cql_serialization_format sf, const std::vector<bytes_opt>& values) -> bytes_opt {
+        using namespace utils;
+        auto& bb = values[0];
+        if (!bb) {
+            return {};
+        }
+        auto simple_date_obj = simple_date_type->deserialize(*bb);
+        if (simple_date_obj.is_null()) {
+            return {};
+        }
+        auto from_simple_date = get_castas_fctn(timestamp_type, simple_date_type);
+        return {long_type->decompose(from_simple_date(simple_date_obj))};
+    });
+}
+
 }
 }
 }
--- a/cql3/lists.cc
+++ b/cql3/lists.cc
@@ -237,7 +237,12 @@ lists::precision_time::get_next(db_clock::time_point millis) {

 void
 lists::setter::execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) {
-    const auto& value = _t->bind(params._options);
+    auto value = _t->bind(params._options);
+    execute(m, prefix, params, column, std::move(value));
+}
+
+void
+lists::setter::execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params, const column_definition& column, ::shared_ptr<terminal> value) {
    if (value == constants::UNSET_VALUE) {
        return;
    }
@@ -299,7 +304,7 @@ lists::setter_by_index::execute(mutation& m, const clustering_key_prefix& prefix
    if (!value) {
        mut.cells.emplace_back(eidx, params.make_dead_cell());
    } else {
-        mut.cells.emplace_back(eidx, params.make_cell(*value));
+        mut.cells.emplace_back(eidx, params.make_cell(*ltype->value_comparator(), *value, atomic_cell::collection_member::yes));
    }
    auto smut = ltype->serialize_mutation_form(mut);
    m.set_cell(prefix, column, atomic_cell_or_collection::from_collection_mutation(std::move(smut)));
@@ -326,7 +331,7 @@ lists::setter_by_uuid::execute(mutation& m, const clustering_key_prefix& prefix,

    list_type_impl::mutation mut;
    mut.cells.reserve(1);
-    mut.cells.emplace_back(to_bytes(*index), params.make_cell(*value));
+    mut.cells.emplace_back(to_bytes(*index), params.make_cell(*ltype->value_comparator(), *value, atomic_cell::collection_member::yes));
    auto smut = ltype->serialize_mutation_form(mut);
    m.set_cell(prefix, column,
                    atomic_cell_or_collection::from_collection_mutation(
@@ -365,7 +370,7 @@ lists::do_append(shared_ptr<term> value,
            auto uuid1 = utils::UUID_gen::get_time_UUID_bytes();
            auto uuid = bytes(reinterpret_cast<const int8_t*>(uuid1.data()), uuid1.size());
            // FIXME: can e be empty?
-            appended.cells.emplace_back(std::move(uuid), params.make_cell(*e));
+            appended.cells.emplace_back(std::move(uuid), params.make_cell(*ltype->value_comparator(), *e, atomic_cell::collection_member::yes));
        }
        m.set_cell(prefix, column, ltype->serialize_mutation_form(appended));
    } else {
@@ -374,7 +379,7 @@ lists::do_append(shared_ptr<term> value,
            m.set_cell(prefix, column, params.make_dead_cell());
        } else {
            auto newv = list_value->get_with_protocol_version(cql_serialization_format::internal());
-            m.set_cell(prefix, column, params.make_cell(std::move(newv)));
+            m.set_cell(prefix, column, params.make_cell(*column.type, std::move(newv)));
        }
    }
 }
@@ -395,14 +400,14 @@ lists::prepender::execute(mutation& m, const clustering_key_prefix& prefix, cons
    mut.cells.reserve(lvalue->get_elements().size());
    // We reverse the order of insertion, so that the last element gets the lastest time
    // (lists are sorted by time)
+    auto&& ltype = static_cast<const list_type_impl*>(column.type.get());
    for (auto&& v : lvalue->_elements | boost::adaptors::reversed) {
        auto&& pt = precision_time::get_next(time);
        auto uuid = utils::UUID_gen::get_time_UUID_bytes(pt.millis.time_since_epoch().count(), pt.nanos);
-        mut.cells.emplace_back(bytes(uuid.data(), uuid.size()), params.make_cell(*v));
+        mut.cells.emplace_back(bytes(uuid.data(), uuid.size()), params.make_cell(*ltype->value_comparator(), *v, atomic_cell::collection_member::yes));
    }
    // now reverse again, to get the original order back
    std::reverse(mut.cells.begin(), mut.cells.end());
-    auto&& ltype = static_cast<const list_type_impl*>(column.type.get());
    m.set_cell(prefix, column, atomic_cell_or_collection::from_collection_mutation(ltype->serialize_mutation_form(std::move(mut))));
 }

--- a/cql3/lists.hh
+++ b/cql3/lists.hh
@@ -147,6 +147,7 @@ public:
                : operation(column, std::move(t)) {
        }
        virtual void execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) override;
+        static void execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params, const column_definition& column, ::shared_ptr<terminal> value);
    };

    class setter_by_index : public operation {
--- a/cql3/maps.cc
+++ b/cql3/maps.cc
@@ -266,6 +266,11 @@ maps::marker::bind(const query_options& options) {
 void
 maps::setter::execute(mutation& m, const clustering_key_prefix& row_key, const update_parameters& params) {
    auto value = _t->bind(params._options);
+    execute(m, row_key, params, column, std::move(value));
+}
+
+void
+maps::setter::execute(mutation& m, const clustering_key_prefix& row_key, const update_parameters& params, const column_definition& column, ::shared_ptr<terminal> value) {
    if (value == constants::UNSET_VALUE) {
        return;
    }
@@ -295,10 +300,11 @@ maps::setter_by_key::execute(mutation& m, const clustering_key_prefix& prefix, c
    if (!key) {
        throw invalid_request_exception("Invalid null map key");
    }
-    auto avalue = value ? params.make_cell(*value) : params.make_dead_cell();
-    map_type_impl::mutation update = { {}, { { std::move(to_bytes(*key)), std::move(avalue) } } };
-    // should have been verified as map earlier?
    auto ctype = static_pointer_cast<const map_type_impl>(column.type);
+    auto avalue = value ? params.make_cell(*ctype->get_values_type(), *value, atomic_cell::collection_member::yes) : params.make_dead_cell();
+    map_type_impl::mutation update;
+    update.cells.emplace_back(std::move(to_bytes(*key)), std::move(avalue));
+    // should have been verified as map earlier?
    auto col_mut = ctype->serialize_mutation_form(std::move(update));
    m.set_cell(prefix, column, std::move(col_mut));
 }
@@ -323,10 +329,10 @@ maps::do_put(mutation& m, const clustering_key_prefix& prefix, const update_para
            return;
        }

-        for (auto&& e : map_value->map) {
-            mut.cells.emplace_back(e.first, params.make_cell(e.second));
-        }
        auto ctype = static_pointer_cast<const map_type_impl>(column.type);
+        for (auto&& e : map_value->map) {
+            mut.cells.emplace_back(e.first, params.make_cell(*ctype->get_values_type(), e.second, atomic_cell::collection_member::yes));
+        }
        auto col_mut = ctype->serialize_mutation_form(std::move(mut));
        m.set_cell(prefix, column, std::move(col_mut));
    } else {
@@ -336,7 +342,7 @@ maps::do_put(mutation& m, const clustering_key_prefix& prefix, const update_para
        } else {
            auto v = map_type_impl::serialize_partially_deserialized_form({map_value->map.begin(), map_value->map.end()},
                    cql_serialization_format::internal());
-            m.set_cell(prefix, column, params.make_cell(std::move(v)));
+            m.set_cell(prefix, column, params.make_cell(*column.type, std::move(v)));
        }
    }
 }
--- a/cql3/maps.hh
+++ b/cql3/maps.hh
@@ -117,6 +117,7 @@ public:
        }

        virtual void execute(mutation& m, const clustering_key_prefix& row_key, const update_parameters& params) override;
+        static void execute(mutation& m, const clustering_key_prefix& row_key, const update_parameters& params, const column_definition& column, ::shared_ptr<terminal> value);
    };

    class setter_by_key : public operation {
--- a/cql3/operation.hh
+++ b/cql3/operation.hh
@@ -87,15 +87,15 @@ public:

    virtual ~operation() {}

-    atomic_cell make_dead_cell(const update_parameters& params) const {
+    static atomic_cell make_dead_cell(const update_parameters& params) {
        return params.make_dead_cell();
    }

-    atomic_cell make_cell(bytes_view value, const update_parameters& params) const {
-        return params.make_cell(value);
+    static atomic_cell make_cell(const abstract_type& type, bytes_view value, const update_parameters& params) {
+        return params.make_cell(type, value);
    }

-    atomic_cell make_counter_update_cell(int64_t delta, const update_parameters& params) const {
+    static atomic_cell make_counter_update_cell(int64_t delta, const update_parameters& params) {
        return params.make_counter_update_cell(delta);
    }

--- a/cql3/prepared_statements_cache.hh
+++ b/cql3/prepared_statements_cache.hh
@@ -68,6 +68,14 @@ public:
    static thrift_prepared_id_type thrift_id(const prepared_cache_key_type& key) {
        return key.key().second;
    }
+
+    bool operator==(const prepared_cache_key_type& other) const {
+        return _key == other._key;
+    }
+
+    bool operator!=(const prepared_cache_key_type& other) const {
+        return !(*this == other);
+    }
 };

 class prepared_statements_cache {
@@ -102,9 +110,9 @@ private:
        }
    };

+public:
    static const std::chrono::minutes entry_expiry;

-public:
    using key_type = prepared_cache_key_type;
    using value_type = checked_weak_ptr;
    using statement_is_too_big = typename cache_type::entry_is_too_big;
@@ -116,8 +124,8 @@ private:
    value_extractor_fn _value_extractor_fn;

 public:
-    prepared_statements_cache(logging::logger& logger)
-        : _cache(memory::stats().total_memory() / 256, entry_expiry, logger)
+    prepared_statements_cache(logging::logger& logger, size_t size)
+        : _cache(size, entry_expiry, logger)
    {}

    template <typename LoadFunc>
@@ -155,6 +163,10 @@ public:
    size_t memory_footprint() const {
        return _cache.memory_footprint();
    }
+
+    future<> stop() {
+        return _cache.stop();
+    }
 };
 }

@@ -168,4 +180,11 @@ inline std::ostream& operator<<(std::ostream& os, const cql3::prepared_cache_key
    os << p.key();
    return os;
 }
+
+template<>
+struct hash<cql3::prepared_cache_key_type> final {
+    size_t operator()(const cql3::prepared_cache_key_type& k) const {
+        return utils::tuple_hash()(k.key());
+    }
+};
 }
--- a/cql3/query_options.cc
+++ b/cql3/query_options.cc
@@ -46,10 +46,11 @@ namespace cql3 {

 thread_local const query_options::specific_options query_options::specific_options::DEFAULT{-1, {}, {}, api::missing_timestamp};

-thread_local query_options query_options::DEFAULT{db::consistency_level::ONE, std::experimental::nullopt,
+thread_local query_options query_options::DEFAULT{db::consistency_level::ONE, infinite_timeout_config, std::experimental::nullopt,
    std::vector<cql3::raw_value_view>(), false, query_options::specific_options::DEFAULT, cql_serialization_format::latest()};

 query_options::query_options(db::consistency_level consistency,
+                           const ::timeout_config& timeout_config,
                           std::experimental::optional<std::vector<sstring_view>> names,
                           std::vector<cql3::raw_value> values,
                           std::vector<cql3::raw_value_view> value_views,
@@ -57,6 +58,7 @@ query_options::query_options(db::consistency_level consistency,
                           specific_options options,
                           cql_serialization_format sf)
   : _consistency(consistency)
+   , _timeout_config(timeout_config)
   , _names(std::move(names))
   , _values(std::move(values))
   , _value_views(value_views)
@@ -67,12 +69,14 @@ query_options::query_options(db::consistency_level consistency,
 }

 query_options::query_options(db::consistency_level consistency,
+                             const ::timeout_config& timeout_config,
                             std::experimental::optional<std::vector<sstring_view>> names,
                             std::vector<cql3::raw_value> values,
                             bool skip_metadata,
                             specific_options options,
                             cql_serialization_format sf)
    : _consistency(consistency)
+    , _timeout_config(timeout_config)
    , _names(std::move(names))
    , _values(std::move(values))
    , _value_views()
@@ -84,12 +88,14 @@ query_options::query_options(db::consistency_level consistency,
 }

 query_options::query_options(db::consistency_level consistency,
+                             const ::timeout_config& timeout_config,
                             std::experimental::optional<std::vector<sstring_view>> names,
                             std::vector<cql3::raw_value_view> value_views,
                             bool skip_metadata,
                             specific_options options,
                             cql_serialization_format sf)
    : _consistency(consistency)
+    , _timeout_config(timeout_config)
    , _names(std::move(names))
    , _values()
    , _value_views(std::move(value_views))
@@ -99,9 +105,10 @@ query_options::query_options(db::consistency_level consistency,
 {
 }

-query_options::query_options(db::consistency_level cl, std::vector<cql3::raw_value> values, specific_options options)
+query_options::query_options(db::consistency_level cl, const ::timeout_config& timeout_config, std::vector<cql3::raw_value> values, specific_options options)
    : query_options(
          cl,
+          timeout_config,
          {},
          std::move(values),
          false,
@@ -113,6 +120,7 @@ query_options::query_options(db::consistency_level cl, std::vector<cql3::raw_val

 query_options::query_options(std::unique_ptr<query_options> qo, ::shared_ptr<service::pager::paging_state> paging_state)
        : query_options(qo->_consistency,
+        qo->get_timeout_config(),
        std::move(qo->_names),
        std::move(qo->_values),
        std::move(qo->_value_views),
@@ -124,7 +132,7 @@ query_options::query_options(std::unique_ptr<query_options> qo, ::shared_ptr<ser

 query_options::query_options(std::vector<cql3::raw_value> values)
    : query_options(
-          db::consistency_level::ONE, std::move(values))
+          db::consistency_level::ONE, infinite_timeout_config, std::move(values))
 {}

 db::consistency_level query_options::get_consistency() const
@@ -209,19 +217,18 @@ void query_options::prepare(const std::vector<::shared_ptr<column_specification>
    }

    auto& names = *_names;
-    std::vector<cql3::raw_value> ordered_values;
+    std::vector<cql3::raw_value_view> ordered_values;
    ordered_values.reserve(specs.size());
    for (auto&& spec : specs) {
        auto& spec_name = spec->name->text();
        for (size_t j = 0; j < names.size(); j++) {
            if (names[j] == spec_name) {
-                ordered_values.emplace_back(_values[j]);
+                ordered_values.emplace_back(_value_views[j]);
                break;
            }
        }
    }
-    _values = std::move(ordered_values);
-    fill_value_views();
+    _value_views = std::move(ordered_values);
 }

 void query_options::fill_value_views()
--- a/cql3/query_options.hh
+++ b/cql3/query_options.hh
@@ -44,13 +44,14 @@
 #include <seastar/util/gcc6-concepts.hh>
 #include "timestamp.hh"
 #include "bytes.hh"
-#include "db/consistency_level.hh"
+#include "db/consistency_level_type.hh"
 #include "service/query_state.hh"
 #include "service/pager/paging_state.hh"
 #include "cql3/column_specification.hh"
 #include "cql3/column_identifier.hh"
 #include "cql3/values.hh"
 #include "cql_serialization_format.hh"
+#include "timeout_config.hh"

 namespace cql3 {

@@ -70,6 +71,7 @@ public:
    };
 private:
    const db::consistency_level _consistency;
+    const timeout_config& _timeout_config;
    const std::experimental::optional<std::vector<sstring_view>> _names;
    std::vector<cql3::raw_value> _values;
    std::vector<cql3::raw_value_view> _value_views;
@@ -103,12 +105,14 @@ public:
    query_options(const query_options&) = delete;

    explicit query_options(db::consistency_level consistency,
+                           const timeout_config& timeouts,
                           std::experimental::optional<std::vector<sstring_view>> names,
                           std::vector<cql3::raw_value> values,
                           bool skip_metadata,
                           specific_options options,
                           cql_serialization_format sf);
    explicit query_options(db::consistency_level consistency,
+                           const timeout_config& timeouts,
                           std::experimental::optional<std::vector<sstring_view>> names,
                           std::vector<cql3::raw_value> values,
                           std::vector<cql3::raw_value_view> value_views,
@@ -116,6 +120,7 @@ public:
                           specific_options options,
                           cql_serialization_format sf);
    explicit query_options(db::consistency_level consistency,
+                           const timeout_config& timeouts,
                           std::experimental::optional<std::vector<sstring_view>> names,
                           std::vector<cql3::raw_value_view> value_views,
                           bool skip_metadata,
@@ -147,10 +152,12 @@ public:

    // forInternalUse
    explicit query_options(std::vector<cql3::raw_value> values);
-    explicit query_options(db::consistency_level, std::vector<cql3::raw_value> values, specific_options options = specific_options::DEFAULT);
+    explicit query_options(db::consistency_level, const timeout_config& timeouts,
+            std::vector<cql3::raw_value> values, specific_options options = specific_options::DEFAULT);
    explicit query_options(std::unique_ptr<query_options>, ::shared_ptr<service::pager::paging_state> paging_state);

    db::consistency_level get_consistency() const;
+    const timeout_config& get_timeout_config() const { return _timeout_config; }
    cql3::raw_value_view get_value_at(size_t idx) const;
    cql3::raw_value_view make_temporary(cql3::raw_value value) const;
    size_t get_values_count() const;
@@ -161,6 +168,11 @@ public:
    ::shared_ptr<service::pager::paging_state> get_paging_state() const;
    /**  Serial consistency for conditional updates. */
    std::experimental::optional<db::consistency_level> get_serial_consistency() const;
+
+    const std::experimental::optional<std::vector<sstring_view>>& get_names() const noexcept {
+        return _names;
+    }
+
    api::timestamp_type get_timestamp(service::query_state& state) const;
    /**
     * The protocol version for the query. Will be 3 if the object don't come from
@@ -188,7 +200,7 @@ query_options::query_options(query_options&& o, std::vector<OneMutationDataRange
    std::vector<query_options> tmp;
    tmp.reserve(values_ranges.size());
    std::transform(values_ranges.begin(), values_ranges.end(), std::back_inserter(tmp), [this](auto& values_range) {
-        return query_options(_consistency, {}, std::move(values_range), _skip_metadata, _options, _cql_serialization_format);
+        return query_options(_consistency, _timeout_config, {}, std::move(values_range), _skip_metadata, _options, _cql_serialization_format);
    });
    _batch_options = std::move(tmp);
 }
--- a/cql3/query_processor.cc
+++ b/cql3/query_processor.cc
@@ -58,6 +58,7 @@ using namespace cql_transport::messages;

 logging::logger log("query_processor");
 logging::logger prep_cache_log("prepared_statements_cache");
+logging::logger authorized_prepared_statements_cache_log("authorized_prepared_statements_cache");

 distributed<query_processor> _the_query_processor;

@@ -91,12 +92,16 @@ api::timestamp_type query_processor::next_timestamp() {
    return _internal_state->next_timestamp();
 }

-query_processor::query_processor(distributed<service::storage_proxy>& proxy, distributed<database>& db)
+query_processor::query_processor(service::storage_proxy& proxy, distributed<database>& db, query_processor::memory_config mcfg)
        : _migration_subscriber{std::make_unique<migration_subscriber>(this)}
        , _proxy(proxy)
        , _db(db)
        , _internal_state(new internal_state())
-        , _prepared_cache(prep_cache_log) {
+        , _prepared_cache(prep_cache_log, mcfg.prepared_statment_cache_size)
+        , _authorized_prepared_cache(std::min(std::chrono::milliseconds(_db.local().get_config().permissions_validity_in_ms()),
+                                              std::chrono::duration_cast<std::chrono::milliseconds>(prepared_statements_cache::entry_expiry)),
+                                     std::chrono::milliseconds(_db.local().get_config().permissions_update_interval_in_ms()),
+                                     mcfg.authorized_prepared_cache_size, authorized_prepared_statements_cache_log) {
    namespace sm = seastar::metrics;

    _metrics.add_group(
@@ -159,6 +164,11 @@ query_processor::query_processor(distributed<service::storage_proxy>& proxy, dis
                            sm::description("Counts a total number of LOGGED batches that were executed as UNLOGGED "
                                            "batches.")),

+                    sm::make_derive(
+                            "rows_read",
+                            _cql_stats.rows_read,
+                            sm::description("Counts a total number of rows read during CQL requests.")),
+
                    sm::make_derive(
                            "prepared_cache_evictions",
                            [] { return prepared_statements_cache::shard_stats().prepared_cache_evictions; },
@@ -172,7 +182,46 @@ query_processor::query_processor(distributed<service::storage_proxy>& proxy, dis
                    sm::make_gauge(
                            "prepared_cache_memory_footprint",
                            [this] { return _prepared_cache.memory_footprint(); },
-                            sm::description("Size (in bytes) of the prepared statements cache."))});
+                            sm::description("Size (in bytes) of the prepared statements cache.")),
+
+                    sm::make_derive(
+                            "secondary_index_creates",
+                            _cql_stats.secondary_index_creates,
+                            sm::description("Counts a total number of CQL CREATE INDEX requests.")),
+
+                    sm::make_derive(
+                            "secondary_index_drops",
+                            _cql_stats.secondary_index_drops,
+                            sm::description("Counts a total number of CQL DROP INDEX requests.")),
+
+                    // secondary_index_reads total count is also included in all cql reads
+                    sm::make_derive(
+                            "secondary_index_reads",
+                            _cql_stats.secondary_index_reads,
+                            sm::description("Counts a total number of CQL read requests performed using secondary indexes.")),
+
+                    // secondary_index_rows_read total count is also included in all cql rows read
+                    sm::make_derive(
+                            "secondary_index_rows_read",
+                            _cql_stats.secondary_index_rows_read,
+                            sm::description("Counts a total number of rows read during CQL requests performed using secondary indexes.")),
+
+                    sm::make_derive(
+                            "authorized_prepared_statements_cache_evictions",
+                            [] { return authorized_prepared_statements_cache::shard_stats().authorized_prepared_statements_cache_evictions; },
+                            sm::description("Counts a number of authenticated prepared statements cache entries evictions.")),
+
+                    sm::make_gauge(
+                            "authorized_prepared_statements_cache_size",
+                            [this] { return _authorized_prepared_cache.size(); },
+                            sm::description("A number of entries in the authenticated prepared statements cache.")),
+
+                    sm::make_gauge(
+                            "user_prepared_auth_cache_footprint",
+                            [this] { return _authorized_prepared_cache.memory_footprint(); },
+                            sm::description("Size (in bytes) of the authenticated prepared statements cache."))
+
+            });

    service::get_local_migration_manager().register_listener(_migration_subscriber.get());
 }
@@ -182,7 +231,7 @@ query_processor::~query_processor() {

 future<> query_processor::stop() {
    service::get_local_migration_manager().unregister_listener(_migration_subscriber.get());
-    return make_ready_future<>();
+    return _authorized_prepared_cache.stop().finally([this] { return _prepared_cache.stop(); });
 }

 future<::shared_ptr<result_message>>
@@ -190,11 +239,11 @@ query_processor::process(const sstring_view& query_string, service::query_state&
    log.trace("process: \"{}\"", query_string);
    tracing::trace(query_state.get_trace_state(), "Parsing a statement");
    auto p = get_statement(query_string, query_state.get_client_state());
-    options.prepare(p->bound_names);
    auto cql_statement = p->statement;
    if (cql_statement->get_bound_terms() != options.get_values_count()) {
        throw exceptions::invalid_request_exception("Invalid amount of bind variables");
    }
+    options.prepare(p->bound_names);

    warn(unimplemented::cause::METRICS);
 #if 0
@@ -202,33 +251,55 @@ query_processor::process(const sstring_view& query_string, service::query_state&
            metrics.regularStatementsExecuted.inc();
 #endif
    tracing::trace(query_state.get_trace_state(), "Processing a statement");
-    return process_statement(std::move(cql_statement), query_state, options);
+    return process_statement_unprepared(std::move(cql_statement), query_state, options);
 }

 future<::shared_ptr<result_message>>
-query_processor::process_statement(
+query_processor::process_statement_unprepared(
        ::shared_ptr<cql_statement> statement,
        service::query_state& query_state,
        const query_options& options) {
-    return statement->check_access(query_state.get_client_state()).then([this, statement, &query_state, &options]() {
-        auto& client_state = query_state.get_client_state();
+    return statement->check_access(query_state.get_client_state()).then([this, statement, &query_state, &options] () mutable {
+        return process_authorized_statement(std::move(statement), query_state, options);
+    });
+}

-        statement->validate(_proxy, client_state);
+future<::shared_ptr<result_message>>
+query_processor::process_statement_prepared(
+        statements::prepared_statement::checked_weak_ptr prepared,
+        cql3::prepared_cache_key_type cache_key,
+        service::query_state& query_state,
+        const query_options& options,
+        bool needs_authorization) {

-        auto fut = make_ready_future<::shared_ptr<cql_transport::messages::result_message>>();
-        if (client_state.is_internal()) {
-            fut = statement->execute_internal(_proxy, query_state, options);
-        } else  {
-            fut = statement->execute(_proxy, query_state, options);
-        }
-
-        return fut.then([statement] (auto msg) {
-            if (msg) {
-                return make_ready_future<::shared_ptr<result_message>>(std::move(msg));
-            }
-            return make_ready_future<::shared_ptr<result_message>>(
-                ::make_shared<result_message::void_message>());
+    ::shared_ptr<cql_statement> statement = prepared->statement;
+    future<> fut = make_ready_future<>();
+    if (needs_authorization) {
+        fut = statement->check_access(query_state.get_client_state()).then([this, &query_state, prepared = std::move(prepared), cache_key = std::move(cache_key)] () mutable {
+            return _authorized_prepared_cache.insert(*query_state.get_client_state().user(), std::move(cache_key), std::move(prepared)).handle_exception([this] (auto eptr) {
+                log.error("failed to cache the entry", eptr);
+            });
        });
+    }
+
+    return fut.then([this, statement = std::move(statement), &query_state, &options] () mutable {
+        return process_authorized_statement(std::move(statement), query_state, options);
+    });
+}
+
+future<::shared_ptr<result_message>>
+query_processor::process_authorized_statement(const ::shared_ptr<cql_statement> statement, service::query_state& query_state, const query_options& options) {
+    auto& client_state = query_state.get_client_state();
+
+    statement->validate(_proxy, client_state);
+
+    auto fut = statement->execute(_proxy, query_state, options);
+
+    return fut.then([statement] (auto msg) {
+        if (msg) {
+            return make_ready_future<::shared_ptr<result_message>>(std::move(msg));
+        }
+        return make_ready_future<::shared_ptr<result_message>>(::make_shared<result_message::void_message>());
    });
 }

@@ -340,6 +411,7 @@ query_options query_processor::make_internal_options(
        const statements::prepared_statement::checked_weak_ptr& p,
        const std::initializer_list<data_value>& values,
        db::consistency_level cl,
+        const timeout_config& timeout_config,
        int32_t page_size) {
    if (p->bound_names.size() != values.size()) {
        throw std::invalid_argument(
@@ -363,10 +435,11 @@ query_options query_processor::make_internal_options(
        api::timestamp_type ts = api::missing_timestamp;
        return query_options(
                cl,
+                timeout_config,
                bound_values,
                cql3::query_options::specific_options{page_size, std::move(paging_state), serial_consistency, ts});
    }
-    return query_options(cl, bound_values);
+    return query_options(cl, timeout_config, bound_values);
 }

 statements::prepared_statement::checked_weak_ptr query_processor::prepare_internal(const sstring& query_string) {
@@ -397,7 +470,7 @@ struct internal_query_state {
 ::shared_ptr<internal_query_state> query_processor::create_paged_state(const sstring& query_string,
        const std::initializer_list<data_value>& values, int32_t page_size) {
    auto p = prepare_internal(query_string);
-    auto opts = make_internal_options(p, values, db::consistency_level::ONE, page_size);
+    auto opts = make_internal_options(p, values, db::consistency_level::ONE, infinite_timeout_config, page_size);
    ::shared_ptr<internal_query_state> res = ::make_shared<internal_query_state>(
            internal_query_state{
                    query_string,
@@ -446,7 +519,7 @@ future<> query_processor::for_each_cql_result(

 future<::shared_ptr<untyped_result_set>>
 query_processor::execute_paged_internal(::shared_ptr<internal_query_state> state) {
-    return state->p->statement->execute_internal(_proxy, *_internal_state, *state->opts).then(
+    return state->p->statement->execute(_proxy, *_internal_state, *state->opts).then(
            [state, this](::shared_ptr<cql_transport::messages::result_message> msg) mutable {
        class visitor : public result_message::visitor_base {
            ::shared_ptr<internal_query_state> _state;
@@ -485,9 +558,9 @@ future<::shared_ptr<untyped_result_set>>
 query_processor::execute_internal(
        statements::prepared_statement::checked_weak_ptr p,
        const std::initializer_list<data_value>& values) {
-    query_options opts = make_internal_options(p, values);
+    query_options opts = make_internal_options(p, values, db::consistency_level::ONE, infinite_timeout_config);
    return do_with(std::move(opts), [this, p = std::move(p)](auto& opts) {
-        return p->statement->execute_internal(
+        return p->statement->execute(
                _proxy,
                *_internal_state,
                opts).then([&opts, stmt = p->statement](auto msg) {
@@ -500,15 +573,16 @@ future<::shared_ptr<untyped_result_set>>
 query_processor::process(
        const sstring& query_string,
        db::consistency_level cl,
+        const timeout_config& timeout_config,
        const std::initializer_list<data_value>& values,
        bool cache) {
    if (cache) {
-        return process(prepare_internal(query_string), cl, values);
+        return process(prepare_internal(query_string), cl, timeout_config, values);
    } else {
        auto p = parse_statement(query_string)->prepare(_db.local(), _cql_stats);
        p->statement->validate(_proxy, *_internal_state);
        auto checked_weak_ptr = p->checked_weak_from_this();
-        return process(std::move(checked_weak_ptr), cl, values).finally([p = std::move(p)] {});
+        return process(std::move(checked_weak_ptr), cl, timeout_config, values).finally([p = std::move(p)] {});
    }
 }

@@ -516,8 +590,9 @@ future<::shared_ptr<untyped_result_set>>
 query_processor::process(
        statements::prepared_statement::checked_weak_ptr p,
        db::consistency_level cl,
+        const timeout_config& timeout_config,
        const std::initializer_list<data_value>& values) {
-    auto opts = make_internal_options(p, values, cl);
+    auto opts = make_internal_options(p, values, cl, timeout_config);
    return do_with(std::move(opts), [this, p = std::move(p)](auto & opts) {
        return p->statement->execute(_proxy, *_internal_state, opts).then([](auto msg) {
            return make_ready_future<::shared_ptr<untyped_result_set>>(::make_shared<untyped_result_set>(msg));
@@ -529,11 +604,18 @@ future<::shared_ptr<cql_transport::messages::result_message>>
 query_processor::process_batch(
        ::shared_ptr<statements::batch_statement> batch,
        service::query_state& query_state,
-        query_options& options) {
-    return batch->check_access(query_state.get_client_state()).then([this, &query_state, &options, batch] {
-        batch->validate();
-        batch->validate(_proxy, query_state.get_client_state());
-        return batch->execute(_proxy, query_state, options);
+        query_options& options,
+        std::unordered_map<prepared_cache_key_type, authorized_prepared_statements_cache::value_type> pending_authorization_entries) {
+    return batch->check_access(query_state.get_client_state()).then([this, &query_state, &options, batch, pending_authorization_entries = std::move(pending_authorization_entries)] () mutable {
+        return parallel_for_each(pending_authorization_entries, [this, &query_state] (auto& e) {
+            return _authorized_prepared_cache.insert(*query_state.get_client_state().user(), e.first, std::move(e.second)).handle_exception([this] (auto eptr) {
+                log.error("failed to cache the entry", eptr);
+            });
+        }).then([this, &query_state, &options, batch] {
+            batch->validate();
+            batch->validate(_proxy, query_state.get_client_state());
+            return batch->execute(_proxy, query_state, options);
+        });
    });
 }

--- a/cql3/query_processor.hh
+++ b/cql3/query_processor.hh
@@ -49,6 +49,7 @@
 #include <seastar/core/shared_ptr.hh>

 #include "cql3/prepared_statements_cache.hh"
+#include "cql3/authorized_prepared_statements_cache.hh"
 #include "cql3/query_options.hh"
 #include "cql3/statements/prepared_statement.hh"
 #include "cql3/statements/raw/parsed_statement.hh"
@@ -99,10 +100,14 @@ public:
 class query_processor {
 public:
    class migration_subscriber;
+    struct memory_config {
+        size_t prepared_statment_cache_size = 0;
+        size_t authorized_prepared_cache_size = 0;
+    };

 private:
    std::unique_ptr<migration_subscriber> _migration_subscriber;
-    distributed<service::storage_proxy>& _proxy;
+    service::storage_proxy& _proxy;
    distributed<database>& _db;

    struct stats {
@@ -117,6 +122,7 @@ private:
    std::unique_ptr<internal_state> _internal_state;

    prepared_statements_cache _prepared_cache;
+    authorized_prepared_statements_cache _authorized_prepared_cache;

    // A map for prepared statements used internally (which we don't want to mix with user statement, in particular we
    // don't bother with expiration on those.
@@ -135,7 +141,7 @@ public:

    static ::shared_ptr<statements::raw::parsed_statement> parse_statement(const std::experimental::string_view& query);

-    query_processor(distributed<service::storage_proxy>& proxy, distributed<database>& db);
+    query_processor(service::storage_proxy& proxy, distributed<database>& db, memory_config mcfg);

    ~query_processor();

@@ -143,7 +149,7 @@ public:
        return _db;
    }

-    distributed<service::storage_proxy>& proxy() {
+    service::storage_proxy& proxy() {
        return _proxy;
    }

@@ -151,6 +157,21 @@ public:
        return _cql_stats;
    }

+    statements::prepared_statement::checked_weak_ptr get_prepared(const auth::authenticated_user* user_ptr, const prepared_cache_key_type& key) {
+        if (user_ptr) {
+            auto it = _authorized_prepared_cache.find(*user_ptr, key);
+            if (it != _authorized_prepared_cache.end()) {
+                try {
+                    return it->get()->checked_weak_from_this();
+                } catch (seastar::checked_ptr_is_null_exception&) {
+                    // If the prepared statement got invalidated - remove the corresponding authorized_prepared_statements_cache entry as well.
+                    _authorized_prepared_cache.remove(*user_ptr, key);
+                }
+            }
+        }
+        return statements::prepared_statement::checked_weak_ptr();
+    }
+
    statements::prepared_statement::checked_weak_ptr get_prepared(const prepared_cache_key_type& key) {
        auto it = _prepared_cache.find(key);
        if (it == _prepared_cache.end()) {
@@ -160,11 +181,19 @@ public:
    }

    future<::shared_ptr<cql_transport::messages::result_message>>
-    process_statement(
+    process_statement_unprepared(
            ::shared_ptr<cql_statement> statement,
            service::query_state& query_state,
            const query_options& options);

+    future<::shared_ptr<cql_transport::messages::result_message>>
+    process_statement_prepared(
+            statements::prepared_statement::checked_weak_ptr statement,
+            cql3::prepared_cache_key_type cache_key,
+            service::query_state& query_state,
+            const query_options& options,
+            bool needs_authorization);
+
    future<::shared_ptr<cql_transport::messages::result_message>>
    process(
            const std::experimental::string_view& query_string,
@@ -215,12 +244,14 @@ public:
    future<::shared_ptr<untyped_result_set>> process(
            const sstring& query_string,
            db::consistency_level,
+            const timeout_config& timeout_config,
            const std::initializer_list<data_value>& = { },
            bool cache = false);

    future<::shared_ptr<untyped_result_set>> process(
            statements::prepared_statement::checked_weak_ptr p,
            db::consistency_level,
+            const timeout_config& timeout_config,
            const std::initializer_list<data_value>& = { });

    /*
@@ -242,7 +273,11 @@ public:
    future<> stop();

    future<::shared_ptr<cql_transport::messages::result_message>>
-    process_batch(::shared_ptr<statements::batch_statement>, service::query_state& query_state, query_options& options);
+    process_batch(
+            ::shared_ptr<statements::batch_statement>,
+            service::query_state& query_state,
+            query_options& options,
+            std::unordered_map<prepared_cache_key_type, authorized_prepared_statements_cache::value_type> pending_authorization_entries);

    std::unique_ptr<statements::prepared_statement> get_statement(
            const std::experimental::string_view& query,
@@ -254,9 +289,13 @@ private:
    query_options make_internal_options(
            const statements::prepared_statement::checked_weak_ptr& p,
            const std::initializer_list<data_value>&,
-            db::consistency_level = db::consistency_level::ONE,
+            db::consistency_level,
+            const timeout_config& timeout_config,
            int32_t page_size = -1);

+    future<::shared_ptr<cql_transport::messages::result_message>>
+    process_authorized_statement(const ::shared_ptr<cql_statement> statement, service::query_state& query_state, const query_options& options);
+
    /*!
     * \brief created a state object for paging
     *
--- a/cql3/restrictions/single_column_primary_key_restrictions.hh
+++ b/cql3/restrictions/single_column_primary_key_restrictions.hh
@@ -64,13 +64,15 @@ class single_column_primary_key_restrictions : public primary_key_restrictions<V
    using bounds_range_type = typename primary_key_restrictions<ValueType>::bounds_range_type;
 private:
    schema_ptr _schema;
+    bool _allow_filtering;
    ::shared_ptr<single_column_restrictions> _restrictions;
    bool _slice;
    bool _contains;
    bool _in;
 public:
-    single_column_primary_key_restrictions(schema_ptr schema)
+    single_column_primary_key_restrictions(schema_ptr schema, bool allow_filtering)
        : _schema(schema)
+        , _allow_filtering(allow_filtering)
        , _restrictions(::make_shared<single_column_restrictions>(schema))
        , _slice(false)
        , _contains(false)
@@ -110,7 +112,7 @@ public:
    }

    void do_merge_with(::shared_ptr<single_column_restriction> restriction) {
-        if (!_restrictions->empty()) {
+        if (!_restrictions->empty() && !_allow_filtering) {
            auto last_column = *_restrictions->last_column();
            auto new_column = restriction->get_column_def();

@@ -127,11 +129,6 @@ public:
                        last_column.name_as_text(), new_column.name_as_text()));
                }
            }
-
-            if (_in && _schema->position(new_column) > _schema->position(last_column)) {
-                throw exceptions::invalid_request_exception(sprint("Clustering column \"%s\" cannot be restricted by an IN relation",
-                    new_column.name_as_text()));
-            }
        }

        _slice |= restriction->is_slice();
--- a/cql3/restrictions/single_column_restriction.hh
+++ b/cql3/restrictions/single_column_restriction.hh
@@ -113,7 +113,7 @@ public:
    class contains;

 protected:
-    bytes_view_opt get_value(const schema& schema,
+    std::optional<atomic_cell_value_view> get_value(const schema& schema,
            const partition_key& key,
            const clustering_key_prefix& ckey,
            const row& cells,
@@ -202,6 +202,14 @@ public:
                                 const query_options& options,
                                 gc_clock::time_point now) const override;

+    virtual std::vector<bytes_opt> values_raw(const query_options& options) const = 0;
+
+    virtual std::vector<bytes_opt> values(const query_options& options) const override {
+        std::vector<bytes_opt> ret = values_raw(options);
+        std::sort(ret.begin(),ret.end());
+        ret.erase(std::unique(ret.begin(),ret.end()),ret.end());
+        return ret;
+    }
 #if 0
    @Override
    protected final boolean isSupportedBy(SecondaryIndex index)
@@ -224,7 +232,7 @@ public:
        return abstract_restriction::term_uses_function(_values, ks_name, function_name);
    }

-    virtual std::vector<bytes_opt> values(const query_options& options) const override {
+    virtual std::vector<bytes_opt> values_raw(const query_options& options) const override {
        std::vector<bytes_opt> ret;
        for (auto&& v : _values) {
            ret.emplace_back(to_bytes_opt(v->bind_and_get(options)));
@@ -249,7 +257,7 @@ public:
        return false;
    }

-    virtual std::vector<bytes_opt> values(const query_options& options) const override {
+    virtual std::vector<bytes_opt> values_raw(const query_options& options) const override {
        auto&& lval = dynamic_pointer_cast<multi_item_terminal>(_marker->bind(options));
        if (!lval) {
            throw exceptions::invalid_request_exception("Invalid null value for IN restriction");
--- a/cql3/restrictions/statement_restrictions.cc
+++ b/cql3/restrictions/statement_restrictions.cc
@@ -41,14 +41,17 @@ using boost::adaptors::transformed;

 template<typename T>
 class statement_restrictions::initial_key_restrictions : public primary_key_restrictions<T> {
+    bool _allow_filtering;
 public:
+    initial_key_restrictions(bool allow_filtering)
+        : _allow_filtering(allow_filtering) {}
    using bounds_range_type = typename primary_key_restrictions<T>::bounds_range_type;

    ::shared_ptr<primary_key_restrictions<T>> do_merge_to(schema_ptr schema, ::shared_ptr<restriction> restriction) const {
        if (restriction->is_multi_column()) {
            throw std::runtime_error(sprint("%s not implemented", __PRETTY_FUNCTION__));
        }
-        return ::make_shared<single_column_primary_key_restrictions<T>>(schema)->merge_to(schema, restriction);
+        return ::make_shared<single_column_primary_key_restrictions<T>>(schema, _allow_filtering)->merge_to(schema, restriction);
    }
    ::shared_ptr<primary_key_restrictions<T>> merge_to(schema_ptr schema, ::shared_ptr<restriction> restriction) override {
        if (restriction->is_multi_column()) {
@@ -57,7 +60,7 @@ public:
        if (restriction->is_on_token()) {
            return static_pointer_cast<token_restriction>(restriction);
        }
-        return ::make_shared<single_column_primary_key_restrictions<T>>(schema)->merge_to(restriction);
+        return ::make_shared<single_column_primary_key_restrictions<T>>(schema, _allow_filtering)->merge_to(restriction);
    }
    void merge_with(::shared_ptr<restriction> restriction) override {
        throw exceptions::unsupported_operation_exception();
@@ -122,9 +125,10 @@ statement_restrictions::initial_key_restrictions<clustering_key_prefix>::merge_t
 }

 template<typename T>
-::shared_ptr<primary_key_restrictions<T>> statement_restrictions::get_initial_key_restrictions() {
-    static thread_local ::shared_ptr<primary_key_restrictions<T>> initial_kr = ::make_shared<initial_key_restrictions<T>>();
-    return initial_kr;
+::shared_ptr<primary_key_restrictions<T>> statement_restrictions::get_initial_key_restrictions(bool allow_filtering) {
+    static thread_local ::shared_ptr<primary_key_restrictions<T>> initial_kr_true = ::make_shared<initial_key_restrictions<T>>(true);
+    static thread_local ::shared_ptr<primary_key_restrictions<T>> initial_kr_false = ::make_shared<initial_key_restrictions<T>>(false);
+    return allow_filtering ? initial_kr_true : initial_kr_false;
 }

 std::vector<::shared_ptr<column_identifier>>
@@ -141,10 +145,10 @@ statement_restrictions::get_partition_key_unrestricted_components() const {
    return r;
 }

-statement_restrictions::statement_restrictions(schema_ptr schema)
+statement_restrictions::statement_restrictions(schema_ptr schema, bool allow_filtering)
    : _schema(schema)
-    , _partition_key_restrictions(get_initial_key_restrictions<partition_key>())
-    , _clustering_columns_restrictions(get_initial_key_restrictions<clustering_key_prefix>())
+    , _partition_key_restrictions(get_initial_key_restrictions<partition_key>(allow_filtering))
+    , _clustering_columns_restrictions(get_initial_key_restrictions<clustering_key_prefix>(allow_filtering))
    , _nonprimary_key_restrictions(::make_shared<single_column_restrictions>(schema))
 { }
 #if 0
@@ -162,8 +166,9 @@ statement_restrictions::statement_restrictions(database& db,
        ::shared_ptr<variable_specifications> bound_names,
        bool selects_only_static_columns,
        bool select_a_collection,
-        bool for_view)
-    : statement_restrictions(schema)
+        bool for_view,
+        bool allow_filtering)
+    : statement_restrictions(schema, allow_filtering)
 {
    /*
     * WHERE clause. For a given entity, rules are: - EQ relation conflicts with anything else (including a 2nd EQ)
@@ -327,6 +332,17 @@ void statement_restrictions::process_partition_key_restrictions(bool has_queriab
        _is_key_range = true;
        _uses_secondary_indexing = has_queriable_index;
    }
+    if (_partition_key_restrictions->is_slice() && !_partition_key_restrictions->is_on_token() && !for_view) {
+        // A SELECT query may not request a slice (range) of partition keys
+        // without using token(). This is because there is no way to do this
+        // query efficiently: mumur3 turns a contiguous range of partition
+        // keys into tokens all over the token space.
+        // However, in a SELECT statement used to define a materialized view,
+        // such a slice is fine - it is used to check whether individual
+        // partitions, match, and does not present a performance problem.
+        throw exceptions::invalid_request_exception(
+                "Only EQ and IN relation are supported on the partition key (unless you use the token() function)");
+    }
 }

 bool statement_restrictions::has_partition_key_unrestricted_components() const {
@@ -414,7 +430,7 @@ void statement_restrictions::validate_secondary_index_selections(bool selects_on
    }
 }

-static bytes_view_opt do_get_value(const schema& schema,
+static std::optional<atomic_cell_value_view> do_get_value(const schema& schema,
        const column_definition& cdef,
        const partition_key& key,
        const clustering_key_prefix& ckey,
@@ -422,21 +438,21 @@ static bytes_view_opt do_get_value(const schema& schema,
        gc_clock::time_point now) {
    switch(cdef.kind) {
        case column_kind::partition_key:
-            return key.get_component(schema, cdef.component_index());
+            return atomic_cell_value_view(key.get_component(schema, cdef.component_index()));
        case column_kind::clustering_key:
-            return ckey.get_component(schema, cdef.component_index());
+            return atomic_cell_value_view(ckey.get_component(schema, cdef.component_index()));
        default:
            auto cell = cells.find_cell(cdef.id);
            if (!cell) {
-                return stdx::nullopt;
+                return std::nullopt;
            }
            assert(cdef.is_atomic());
-            auto c = cell->as_atomic_cell();
-            return c.is_dead(now) ? stdx::nullopt : bytes_view_opt(c.value());
+            auto c = cell->as_atomic_cell(cdef);
+            return c.is_dead(now) ? std::nullopt : std::optional<atomic_cell_value_view>(c.value());
    }
 }

-bytes_view_opt single_column_restriction::get_value(const schema& schema,
+std::optional<atomic_cell_value_view> single_column_restriction::get_value(const schema& schema,
        const partition_key& key,
        const clustering_key_prefix& ckey,
        const row& cells,
@@ -456,7 +472,12 @@ bool single_column_restriction::EQ::is_satisfied_by(const schema& schema,
    auto operand = value(options);
    if (operand) {
        auto cell_value = get_value(schema, key, ckey, cells, now);
-        return cell_value && _column_def.type->compare(*operand, *cell_value) == 0;
+        if (!cell_value) {
+            return false;
+        }
+        return cell_value->with_linearized([&] (bytes_view cell_value_bv) {
+            return _column_def.type->compare(*operand, cell_value_bv) == 0;
+        });
    }
    return false;
 }
@@ -475,9 +496,11 @@ bool single_column_restriction::IN::is_satisfied_by(const schema& schema,
        return false;
    }
    auto operands = values(options);
+  return cell_value->with_linearized([&] (bytes_view cell_value_bv) {
    return std::any_of(operands.begin(), operands.end(), [&] (auto&& operand) {
-        return operand && _column_def.type->compare(*operand, *cell_value) == 0;
+        return operand && _column_def.type->compare(*operand, cell_value_bv) == 0;
    });
+  });
 }

 static query::range<bytes_view> to_range(const term_slice& slice, const query_options& options) {
@@ -510,7 +533,9 @@ bool single_column_restriction::slice::is_satisfied_by(const schema& schema,
    if (!cell_value) {
        return false;
    }
-    return to_range(_slice, options).contains(*cell_value, _column_def.type->as_tri_comparator());
+    return cell_value->with_linearized([&] (bytes_view cell_value_bv) {
+        return to_range(_slice, options).contains(cell_value_bv, _column_def.type->as_tri_comparator());
+    });
 }

 bool single_column_restriction::contains::is_satisfied_by(const schema& schema,
@@ -536,7 +561,8 @@ bool single_column_restriction::contains::is_satisfied_by(const schema& schema,
    auto&& element_type = col_type->is_set() ? col_type->name_comparator() : col_type->value_comparator();
    if (_column_def.type->is_multi_cell()) {
        auto cell = cells.find_cell(_column_def.id);
-        auto&& elements = col_type->deserialize_mutation_form(cell->as_collection_mutation()).cells;
+      return cell->as_collection_mutation().data.with_linearized([&] (bytes_view collection_bv) {
+        auto&& elements = col_type->deserialize_mutation_form(collection_bv).cells;
        auto end = std::remove_if(elements.begin(), elements.end(), [now] (auto&& element) {
            return element.second.is_dead(now);
        });
@@ -546,7 +572,9 @@ bool single_column_restriction::contains::is_satisfied_by(const schema& schema,
                continue;
            }
            auto found = std::find_if(elements.begin(), end, [&] (auto&& element) {
-                return element_type->compare(element.second.value(), *val) == 0;
+                return element.second.value().with_linearized([&] (bytes_view value_bv) {
+                    return element_type->compare(value_bv, *val) == 0;
+                });
            });
            if (found == end) {
                return false;
@@ -573,16 +601,26 @@ bool single_column_restriction::contains::is_satisfied_by(const schema& schema,
            auto found = std::find_if(elements.begin(), end, [&] (auto&& element) {
                return map_key_type->compare(element.first, *map_key) == 0;
            });
-            if (found == end || element_type->compare(found->second.value(), *map_value) != 0) {
+            if (found == end) {
+                return false;
+            }
+            auto cmp = found->second.value().with_linearized([&] (bytes_view value_bv) {
+                return element_type->compare(value_bv, *map_value);
+            });
+            if (cmp != 0) {
                return false;
            }
        }
+        return true;
+      });
    } else {
        auto cell_value = get_value(schema, key, ckey, cells, now);
        if (!cell_value) {
            return false;
        }
-        auto deserialized = _column_def.type->deserialize(*cell_value);
+        auto deserialized = cell_value->with_linearized([&] (bytes_view cell_value_bv) {
+            return _column_def.type->deserialize(cell_value_bv);
+        });
        for (auto&& value : _values) {
            auto val = value->bind_and_get(options);
            if (!val) {
@@ -653,7 +691,9 @@ bool token_restriction::EQ::is_satisfied_by(const schema& schema,
    for (auto&& operand : values(options)) {
        if (operand) {
            auto cell_value = do_get_value(schema, **cdef, key, ckey, cells, now);
-            satisfied = cell_value && (*cdef)->type->compare(*operand, *cell_value) == 0;
+            satisfied = cell_value && cell_value->with_linearized([&] (bytes_view cell_value_bv) {
+                return (*cdef)->type->compare(*operand, cell_value_bv) == 0;
+            });
        }
        if (!satisfied) {
            break;
@@ -675,7 +715,9 @@ bool token_restriction::slice::is_satisfied_by(const schema& schema,
        if (!cell_value) {
            return false;
        }
-        satisfied = range.contains(*cell_value, cdef->type->as_tri_comparator());
+        satisfied = cell_value->with_linearized([&] (bytes_view cell_value_bv) {
+            return range.contains(cell_value_bv, cdef->type->as_tri_comparator());
+        });
        if (!satisfied) {
            break;
        }
--- a/cql3/restrictions/statement_restrictions.hh
+++ b/cql3/restrictions/statement_restrictions.hh
@@ -67,7 +67,7 @@ private:
    class initial_key_restrictions;

    template<typename T>
-    static ::shared_ptr<primary_key_restrictions<T>> get_initial_key_restrictions();
+    static ::shared_ptr<primary_key_restrictions<T>> get_initial_key_restrictions(bool allow_filtering);

    /**
     * Restrictions on partitioning columns
@@ -108,7 +108,7 @@ public:
     * @param cfm the column family meta data
     * @return a new empty <code>StatementRestrictions</code>.
     */
-    statement_restrictions(schema_ptr schema);
+    statement_restrictions(schema_ptr schema, bool allow_filtering);

    statement_restrictions(database& db,
        schema_ptr schema,
@@ -117,7 +117,8 @@ public:
        ::shared_ptr<variable_specifications> bound_names,
        bool selects_only_static_columns,
        bool select_a_collection,
-        bool for_view = false);
+        bool for_view = false,
+        bool allow_filtering = false);
 private:
    void add_restriction(::shared_ptr<restriction> restriction);
    void add_single_column_restriction(::shared_ptr<single_column_restriction> restriction);
--- a/cql3/result_generator.hh
+++ b/cql3/result_generator.hh
@@ -0,0 +1,139 @@
+/*
+ * Copyright (C) 2018 ScyllaDB
+ */
+
+/*
+ * This file is part of Scylla.
+ *
+ * Scylla is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Scylla is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include "selection/selection.hh"
+#include "stats.hh"
+
+namespace cql3 {
+
+class result_generator {
+    schema_ptr _schema;
+    foreign_ptr<lw_shared_ptr<query::result>> _result;
+    lw_shared_ptr<const query::read_command> _command;
+    shared_ptr<const selection::selection> _selection;
+    cql_stats* _stats;
+private:
+    template<typename Visitor>
+    class query_result_visitor {
+        const schema& _schema;
+        std::vector<bytes> _partition_key;
+        std::vector<bytes> _clustering_key;
+        uint32_t _partition_row_count = 0;
+        uint32_t _total_row_count = 0;
+        Visitor& _visitor;
+        const selection::selection& _selection;
+    private:
+        void accept_cell_value(const column_definition& def, query::result_row_view::iterator_type& i) {
+            if (def.is_multi_cell()) {
+                _visitor.accept_value(i.next_collection_cell());
+            } else {
+                auto cell = i.next_atomic_cell();
+                _visitor.accept_value(cell ? std::optional<query::result_bytes_view>(cell->value()) : std::optional<query::result_bytes_view>());
+            }
+        }
+    public:
+        query_result_visitor(const schema& s, Visitor& visitor, const selection::selection& select)
+            : _schema(s), _visitor(visitor), _selection(select) { }
+
+        void accept_new_partition(const partition_key& key, uint32_t row_count) {
+            _partition_key = key.explode(_schema);
+            accept_new_partition(row_count);
+        }
+        void accept_new_partition(uint32_t row_count) {
+            _partition_row_count = row_count;
+            _total_row_count += row_count;
+        }
+
+        void accept_new_row(const clustering_key& key, query::result_row_view static_row,
+                            query::result_row_view row) {
+            _clustering_key = key.explode(_schema);
+            accept_new_row(static_row, row);
+        }
+        void accept_new_row(query::result_row_view static_row, query::result_row_view row) {
+            auto static_row_iterator = static_row.iterator();
+            auto row_iterator = row.iterator();
+            _visitor.start_row();
+            for (auto&& def : _selection.get_columns()) {
+                switch (def->kind) {
+                case column_kind::partition_key:
+                    _visitor.accept_value(query::result_bytes_view(bytes_view(_partition_key[def->component_index()])));
+                    break;
+                case column_kind::clustering_key:
+                    if (_clustering_key.size() > def->component_index()) {
+                        _visitor.accept_value(query::result_bytes_view(bytes_view(_clustering_key[def->component_index()])));
+                    } else {
+                        _visitor.accept_value({});
+                    }
+                    break;
+                case column_kind::regular_column:
+                    accept_cell_value(*def, row_iterator);
+                    break;
+                case column_kind::static_column:
+                    accept_cell_value(*def, static_row_iterator);
+                    break;
+                }
+            }
+            _visitor.end_row();
+        }
+
+        void accept_partition_end(const query::result_row_view& static_row) {
+            if (_partition_row_count == 0) {
+                _total_row_count++;
+                _visitor.start_row();
+                auto static_row_iterator = static_row.iterator();
+                for (auto&& def : _selection.get_columns()) {
+                    if (def->is_partition_key()) {
+                        _visitor.accept_value(query::result_bytes_view(bytes_view(_partition_key[def->component_index()])));
+                    } else if (def->is_static()) {
+                        accept_cell_value(*def, static_row_iterator);
+                    } else {
+                        _visitor.accept_value({});
+                    }
+                }
+                _visitor.end_row();
+            }
+        }
+
+        uint32_t rows_read() const { return _total_row_count; }
+    };
+public:
+    result_generator() = default;
+
+    result_generator(schema_ptr s, foreign_ptr<lw_shared_ptr<query::result>> result, lw_shared_ptr<const query::read_command> cmd,
+                     ::shared_ptr<const selection::selection> select, cql_stats& stats)
+        : _schema(std::move(s))
+        , _result(std::move(result))
+        , _command(std::move(cmd))
+        , _selection(std::move(select))
+        , _stats(&stats)
+    { }
+
+    template<typename Visitor>
+    void visit(Visitor&& visitor) const {
+        query_result_visitor<Visitor> v(*_schema, visitor, *_selection);
+        query::result_view::consume(*_result, _command->slice, v);
+        _stats->rows_read += v.rows_read();
+    }
+};
+
+}
--- a/cql3/result_set.hh
+++ b/cql3/result_set.hh
@@ -47,6 +47,12 @@
 #include "service/pager/paging_state.hh"
 #include "schema.hh"

+#include "query-result-reader.hh"
+
+#include "result_generator.hh"
+
+#include <seastar/util/gcc6-concepts.hh>
+
 namespace cql3 {

 class metadata {
@@ -131,10 +137,22 @@ public:
    const std::vector<uint16_t>& partition_key_bind_indices() const;
 };

+GCC6_CONCEPT(
+
+template<typename Visitor>
+concept bool ResultVisitor = requires(Visitor& visitor) {
+    visitor.start_row();
+    visitor.accept_value(std::optional<query::result_bytes_view>());
+    visitor.end_row();
+};
+
+)
+
 class result_set {
-public:
    ::shared_ptr<metadata> _metadata;
    std::deque<std::vector<bytes_opt>> _rows;
+
+    friend class result;
 public:
    result_set(std::vector<::shared_ptr<column_specification>> metadata_);

@@ -163,6 +181,80 @@ public:

    // Returns a range of rows. A row is a range of bytes_opt.
    const std::deque<std::vector<bytes_opt>>& rows() const;
+
+    template<typename Visitor>
+    GCC6_CONCEPT(requires ResultVisitor<Visitor>)
+    void visit(Visitor&& visitor) const {
+        auto column_count = get_metadata().column_count();
+        for (auto& row : _rows) {
+            visitor.start_row();
+            for (auto i = 0u; i < column_count; i++) {
+                auto& cell = row[i];
+                visitor.accept_value(cell ? std::optional<query::result_bytes_view>(*cell) : std::optional<query::result_bytes_view>());
+            }
+            visitor.end_row();
+        }
+    }
+
+    class builder;
+};
+
+class result_set::builder {
+    result_set _result;
+    std::vector<bytes_opt> _current_row;
+public:
+    explicit builder(shared_ptr<metadata> mtd)
+        : _result(std::move(mtd)) { }
+
+    void start_row() { }
+    void accept_value(std::optional<query::result_bytes_view> value) {
+        if (!value) {
+            _current_row.emplace_back();
+            return;
+        }
+        _current_row.emplace_back(value->linearize());
+    }
+    void end_row() {
+        _result.add_row(std::exchange(_current_row, { }));
+    }
+    result_set get_result_set() && { return std::move(_result); }
+};
+
+class result {
+    std::unique_ptr<cql3::result_set> _result_set;
+    result_generator _result_generator;
+    shared_ptr<cql3::metadata> _metadata;
+public:
+    explicit result(std::unique_ptr<cql3::result_set> rs)
+        : _result_set(std::move(rs))
+        , _metadata(_result_set->_metadata)
+    { }
+
+    explicit result(result_generator generator, shared_ptr<metadata> m)
+        : _result_generator(std::move(generator))
+        , _metadata(std::move(m))
+    { }
+
+    const cql3::metadata& get_metadata() const { return *_metadata; }
+    cql3::result_set result_set() const {
+        if (_result_set) {
+            return *_result_set;
+        } else {
+            auto builder = result_set::builder(_metadata);
+            _result_generator.visit(builder);
+            return std::move(builder).get_result_set();
+        }
+    }
+    
+    template<typename Visitor>
+    GCC6_CONCEPT(requires ResultVisitor<Visitor>)
+    void visit(Visitor&& visitor) const {
+        if (_result_set) {
+            _result_set->visit(std::forward<Visitor>(visitor));
+        } else {
+            _result_generator.visit(std::forward<Visitor>(visitor));
+        }
+    }
 };

 }
--- a/cql3/selection/selectable.cc
+++ b/cql3/selection/selectable.cc
@@ -112,6 +112,32 @@ selectable::with_function::raw::make_count_rows_function() {
                    std::vector<shared_ptr<cql3::selection::selectable::raw>>());
 }

+shared_ptr<selector::factory>
+selectable::with_anonymous_function::new_selector_factory(database& db, schema_ptr s, std::vector<const column_definition*>& defs) {
+    auto&& factories = selector_factories::create_factories_and_collect_column_definitions(_args, db, s, defs);
+    return abstract_function_selector::new_factory(_function, std::move(factories));
+}
+
+sstring
+selectable::with_anonymous_function::to_string() const {
+    return sprint("%s(%s)", _function->name().name, join(", ", _args));
+}
+
+shared_ptr<selectable>
+selectable::with_anonymous_function::raw::prepare(schema_ptr s) {
+        std::vector<shared_ptr<selectable>> prepared_args;
+        prepared_args.reserve(_args.size());
+        for (auto&& arg : _args) {
+            prepared_args.push_back(arg->prepare(s));
+        }
+        return ::make_shared<with_anonymous_function>(_function, std::move(prepared_args));
+    }
+
+bool
+selectable::with_anonymous_function::raw::processes_selection() const {
+    return true;
+}
+
 shared_ptr<selector::factory>
 selectable::with_field_selection::new_selector_factory(database& db, schema_ptr s, std::vector<const column_definition*>& defs) {
    auto&& factory = _selected->new_selector_factory(db, s, defs);
--- a/cql3/selection/selectable.hh
+++ b/cql3/selection/selectable.hh
@@ -46,6 +46,7 @@
 #include "core/shared_ptr.hh"
 #include "cql3/selection/selector.hh"
 #include "cql3/cql3_type.hh"
+#include "cql3/functions/function.hh"
 #include "cql3/functions/function_name.hh"

 namespace cql3 {
@@ -82,6 +83,7 @@ public:
    class writetime_or_ttl;

    class with_function;
+    class with_anonymous_function;

    class with_field_selection;

@@ -114,6 +116,28 @@ public:
    };
 };

+class selectable::with_anonymous_function : public selectable {
+    shared_ptr<functions::function> _function;
+    std::vector<shared_ptr<selectable>> _args;
+public:
+    with_anonymous_function(::shared_ptr<functions::function> f, std::vector<shared_ptr<selectable>> args)
+        : _function(f), _args(std::move(args)) {
+    }
+
+    virtual sstring to_string() const override;
+
+    virtual shared_ptr<selector::factory> new_selector_factory(database& db, schema_ptr s, std::vector<const column_definition*>& defs) override;
+    class raw : public selectable::raw {
+        shared_ptr<functions::function> _function;
+        std::vector<shared_ptr<selectable::raw>> _args;
+    public:
+        raw(shared_ptr<functions::function> f, std::vector<shared_ptr<selectable::raw>> args)
+                : _function(f), _args(std::move(args)) {
+        }
+        virtual shared_ptr<selectable> prepare(schema_ptr s) override;
+        virtual bool processes_selection() const override;
+    };
+};

 class selectable::with_cast : public selectable {
    ::shared_ptr<selectable> _arg;
--- a/cql3/selection/selection.cc
+++ b/cql3/selection/selection.cc
@@ -53,13 +53,15 @@ selection::selection(schema_ptr schema,
    std::vector<const column_definition*> columns,
    std::vector<::shared_ptr<column_specification>> metadata_,
    bool collect_timestamps,
-    bool collect_TTLs)
+    bool collect_TTLs,
+    trivial is_trivial)
        : _schema(std::move(schema))
        , _columns(std::move(columns))
        , _metadata(::make_shared<metadata>(std::move(metadata_)))
        , _collect_timestamps(collect_timestamps)
        , _collect_TTLs(collect_TTLs)
        , _contains_static_columns(std::any_of(_columns.begin(), _columns.end(), std::mem_fn(&column_definition::is_static)))
+        , _is_trivial(is_trivial)
 { }

 query::partition_slice::option_set selection::get_query_options() {
@@ -100,7 +102,7 @@ public:
     */
    simple_selection(schema_ptr schema, std::vector<const column_definition*> columns,
        std::vector<::shared_ptr<column_specification>> metadata, bool is_wildcard)
-            : selection(schema, std::move(columns), std::move(metadata), false, false)
+            : selection(schema, std::move(columns), std::move(metadata), false, false, trivial::yes)
            , _is_wildcard(is_wildcard)
    { }

@@ -342,7 +344,7 @@ void result_set_builder::visitor::add_value(const column_definition& def,
            _builder.add_empty();
            return;
        }
-        _builder.add_collection(def, *cell);
+        _builder.add_collection(def, cell->linearize());
    } else {
        auto cell = i.next_atomic_cell();
        if (!cell) {
@@ -426,7 +428,7 @@ int32_t result_set_builder::ttl_of(size_t idx) {
 }

 bytes_opt result_set_builder::get_value(data_type t, query::result_atomic_cell_view c) {
-    return {to_bytes(c.value())};
+    return {c.value().linearize()};
 }

 }
--- a/cql3/selection/selection.hh
+++ b/cql3/selection/selection.hh
@@ -84,12 +84,15 @@ private:
    const bool _collect_timestamps;
    const bool _collect_TTLs;
    const bool _contains_static_columns;
+    bool _is_trivial;
 protected:
+    using trivial = bool_class<class trivial_tag>;
+
    selection(schema_ptr schema,
        std::vector<const column_definition*> columns,
        std::vector<::shared_ptr<column_specification>> metadata_,
        bool collect_timestamps,
-        bool collect_TTLs);
+        bool collect_TTLs, trivial is_trivial = trivial::no);

    virtual ~selection() {}
 public:
@@ -223,6 +226,12 @@ public:
        }
    }

+    /**
+     * Returns true if the selection is trivial, i.e. there are no function
+     * selectors (including casts or aggregates).
+     */
+    bool is_trivial() const { return _is_trivial; }
+
    friend class result_set_builder;
 };

--- a/cql3/selection/selector.hh
+++ b/cql3/selection/selector.hh
@@ -105,9 +105,11 @@ public:
    virtual void reset() = 0;

    virtual assignment_testable::test_result test_assignment(database& db, const sstring& keyspace, ::shared_ptr<column_specification> receiver) override {
-        if (receiver->type == get_type()) {
+        auto t1 = receiver->type->underlying_type();
+        auto t2 = get_type()->underlying_type();
+        if (t1 == t2) {
            return assignment_testable::test_result::EXACT_MATCH;
-        } else if (receiver->type->is_value_compatible_with(*get_type())) {
+        } else if (t1->is_value_compatible_with(*t2)) {
            return assignment_testable::test_result::WEAKLY_ASSIGNABLE;
        } else {
            return assignment_testable::test_result::NOT_ASSIGNABLE;
--- a/cql3/sets.cc
+++ b/cql3/sets.cc
@@ -225,7 +225,12 @@ sets::marker::bind(const query_options& options) {

 void
 sets::setter::execute(mutation& m, const clustering_key_prefix& row_key, const update_parameters& params) {
-    const auto& value = _t->bind(params._options);
+    auto value = _t->bind(params._options);
+    execute(m, row_key, params, column, std::move(value));
+}
+
+void
+sets::setter::execute(mutation& m, const clustering_key_prefix& row_key, const update_parameters& params, const column_definition& column, ::shared_ptr<terminal> value) {
    if (value == constants::UNSET_VALUE) {
        return;
    }
@@ -264,7 +269,7 @@ sets::adder::do_add(mutation& m, const clustering_key_prefix& row_key, const upd
        }

        for (auto&& e : set_value->_elements) {
-            mut.cells.emplace_back(e, params.make_cell({}));
+            mut.cells.emplace_back(e, params.make_cell(*set_type->value_comparator(), {}, atomic_cell::collection_member::yes));
        }
        auto smut = set_type->serialize_mutation_form(mut);

@@ -274,7 +279,7 @@ sets::adder::do_add(mutation& m, const clustering_key_prefix& row_key, const upd
        auto v = set_type->serialize_partially_deserialized_form(
                {set_value->_elements.begin(), set_value->_elements.end()},
                cql_serialization_format::internal());
-        m.set_cell(row_key, column, params.make_cell(std::move(v)));
+        m.set_cell(row_key, column, params.make_cell(*column.type, std::move(v)));
    } else {
        m.set_cell(row_key, column, params.make_dead_cell());
    }
--- a/cql3/sets.hh
+++ b/cql3/sets.hh
@@ -113,6 +113,7 @@ public:
                : operation(column, std::move(t)) {
        }
        virtual void execute(mutation& m, const clustering_key_prefix& row_key, const update_parameters& params) override;
+        static void execute(mutation& m, const clustering_key_prefix& row_key, const update_parameters& params, const column_definition& column, ::shared_ptr<terminal> value);
    };

    class adder : public operation {
--- a/cql3/single_column_relation.cc
+++ b/cql3/single_column_relation.cc
@@ -116,18 +116,6 @@ single_column_relation::to_receivers(schema_ptr schema, const column_definition&
            throw exceptions::invalid_request_exception(sprint(
                   "IN predicates on non-primary-key columns (%s) is not yet supported", column_def.name_as_text()));
        }
-    } else if (is_slice()) {
-        // Non EQ relation is not supported without token(), even if we have a 2ndary index (since even those
-        // are ordered by partitioner).
-        // Note: In theory we could allow it for 2ndary index queries with ALLOW FILTERING, but that would
-        // probably require some special casing
-        // Note bis: This is also why we don't bother handling the 'tuple' notation of #4851 for keys. If we
-        // lift the limitation for 2ndary
-        // index with filtering, we'll need to handle it though.
-        if (column_def.is_partition_key()) {
-            throw exceptions::invalid_request_exception(
-                "Only EQ and IN relation are supported on the partition key (unless you use the token() function)");
-        }
    }

    if (is_contains() && !receiver->type->is_collection()) {
--- a/cql3/single_column_relation.hh
+++ b/cql3/single_column_relation.hh
@@ -134,7 +134,7 @@ protected:
 #endif

    virtual sstring to_string() const override {
-        auto entity_as_string = _entity->to_string();
+        auto entity_as_string = _entity->to_cql_string();
        if (_map_key) {
            entity_as_string = sprint("%s[%s]", std::move(entity_as_string), _map_key->to_string());
        }
--- a/cql3/statements/alter_keyspace_statement.cc
+++ b/cql3/statements/alter_keyspace_statement.cc
@@ -42,7 +42,7 @@
 #include "alter_keyspace_statement.hh"
 #include "prepared_statement.hh"
 #include "service/migration_manager.hh"
-#include "database.hh"
+#include "db/system_keyspace.hh"

 bool is_system_keyspace(const sstring& keyspace);

@@ -59,7 +59,7 @@ future<> cql3::statements::alter_keyspace_statement::check_access(const service:
    return state.has_keyspace_access(_name, auth::permission::ALTER);
 }

-void cql3::statements::alter_keyspace_statement::validate(distributed<service::storage_proxy>& proxy, const service::client_state& state) {
+void cql3::statements::alter_keyspace_statement::validate(service::storage_proxy& proxy, const service::client_state& state) {
    try {
        service::get_local_storage_proxy().get_db().local().find_keyspace(_name); // throws on failure
        auto tmp = _name;
@@ -90,7 +90,7 @@ void cql3::statements::alter_keyspace_statement::validate(distributed<service::s
    }
 }

-future<shared_ptr<cql_transport::event::schema_change>> cql3::statements::alter_keyspace_statement::announce_migration(distributed<service::storage_proxy>& proxy, bool is_local_only) {
+future<shared_ptr<cql_transport::event::schema_change>> cql3::statements::alter_keyspace_statement::announce_migration(service::storage_proxy& proxy, bool is_local_only) {
    auto old_ksm = service::get_local_storage_proxy().get_db().local().find_keyspace(_name).metadata();
    return service::get_local_migration_manager().announce_keyspace_update(_attrs->as_ks_metadata_update(old_ksm), is_local_only).then([this] {
        using namespace cql_transport;
--- a/cql3/statements/alter_keyspace_statement.hh
+++ b/cql3/statements/alter_keyspace_statement.hh
@@ -60,8 +60,8 @@ public:
    const sstring& keyspace() const override;

    future<> check_access(const service::client_state& state) override;
-    void validate(distributed<service::storage_proxy>& proxy, const service::client_state& state) override;
-    future<shared_ptr<cql_transport::event::schema_change>> announce_migration(distributed<service::storage_proxy>& proxy, bool is_local_only) override;
+    void validate(service::storage_proxy& proxy, const service::client_state& state) override;
+    future<shared_ptr<cql_transport::event::schema_change>> announce_migration(service::storage_proxy& proxy, bool is_local_only) override;
    virtual std::unique_ptr<prepared> prepare(database& db, cql_stats& stats) override;
 };

--- a/cql3/statements/alter_role_statement.hh
+++ b/cql3/statements/alter_role_statement.hh
@@ -62,12 +62,12 @@ public:
                , _options(std::move(options)) {
    }

-    void validate(distributed<service::storage_proxy>&, const service::client_state&) override;
+    void validate(service::storage_proxy&, const service::client_state&) override;

    virtual future<> check_access(const service::client_state&) override;

    virtual future<::shared_ptr<cql_transport::messages::result_message>>
-    execute(distributed<service::storage_proxy>&, service::query_state&, const query_options&) override;
+    execute(service::storage_proxy&, service::query_state&, const query_options&) override;
 };

 }
--- a/cql3/statements/alter_table_statement.cc
+++ b/cql3/statements/alter_table_statement.cc
@@ -75,7 +75,7 @@ future<> alter_table_statement::check_access(const service::client_state& state)
    return state.has_column_family_access(keyspace(), column_family(), auth::permission::ALTER);
 }

-void alter_table_statement::validate(distributed<service::storage_proxy>& proxy, const service::client_state& state)
+void alter_table_statement::validate(service::storage_proxy& proxy, const service::client_state& state)
 {
    // validated in announce_migration()
 }
@@ -165,9 +165,9 @@ static void validate_column_rename(database& db, const schema& schema, const col
    }
 }

-future<shared_ptr<cql_transport::event::schema_change>> alter_table_statement::announce_migration(distributed<service::storage_proxy>& proxy, bool is_local_only)
+future<shared_ptr<cql_transport::event::schema_change>> alter_table_statement::announce_migration(service::storage_proxy& proxy, bool is_local_only)
 {
-    auto& db = proxy.local().get_db().local();
+    auto& db = proxy.get_db().local();
    auto schema = validation::validate_column_family(db, keyspace(), column_family());
    if (schema->is_view()) {
        throw exceptions::invalid_request_exception("Cannot use ALTER TABLE on Materialized View");
@@ -247,12 +247,15 @@ future<shared_ptr<cql_transport::event::schema_change>> alter_table_statement::a
        cfm.with_column(column_name->name(), type, _is_static ? column_kind::static_column : column_kind::regular_column);

        // Adding a column to a table which has an include all view requires the column to be added to the view
-        // as well
+        // as well. If the view has a regular base column in its PK, then the column ID needs to be updated in
+        // view_info; for that, rebuild the schema.
        if (!_is_static) {
            for (auto&& view : cf.views()) {
-                if (view->view_info()->include_all_columns()) {
+                if (view->view_info()->include_all_columns() || view->view_info()->base_non_pk_column_in_view_pk()) {
                    schema_builder builder(view);
-                    builder.with_column(column_name->name(), type);
+                    if (view->view_info()->include_all_columns()) {
+                        builder.with_column(column_name->name(), type);
+                    }
                    view_updates.push_back(view_ptr(builder.build()));
                }
            }
@@ -305,14 +308,10 @@ future<shared_ptr<cql_transport::event::schema_change>> alter_table_statement::a
            }
        }

-        // If a column is dropped which is included in a view, we don't allow the drop to take place.
-        auto view_names = ::join(", ", cf.views()
-                   | boost::adaptors::filtered([&] (auto&& v) { return bool(v->get_column_definition(column_name->name())); })
-                   | boost::adaptors::transformed([] (auto&& v) { return v->cf_name(); }));
-        if (!view_names.empty()) {
+        if (!cf.views().empty()) {
            throw exceptions::invalid_request_exception(sprint(
-                    "Cannot drop column %s, depended on by materialized views (%s.{%s})",
-                    column_name, keyspace(), view_names));
+                    "Cannot drop column %s on base table %s.%s with materialized views",
+                    column_name, keyspace(), column_family()));
        }
        break;
    }
--- a/cql3/statements/alter_table_statement.hh
+++ b/cql3/statements/alter_table_statement.hh
@@ -77,8 +77,8 @@ public:
                          bool is_static);

    virtual future<> check_access(const service::client_state& state) override;
-    virtual void validate(distributed<service::storage_proxy>& proxy, const service::client_state& state) override;
-    virtual future<shared_ptr<cql_transport::event::schema_change>> announce_migration(distributed<service::storage_proxy>& proxy, bool is_local_only) override;
+    virtual void validate(service::storage_proxy& proxy, const service::client_state& state) override;
+    virtual future<shared_ptr<cql_transport::event::schema_change>> announce_migration(service::storage_proxy& proxy, bool is_local_only) override;
    virtual std::unique_ptr<prepared> prepare(database& db, cql_stats& stats) override;
 };

--- a/cql3/statements/alter_type_statement.cc
+++ b/cql3/statements/alter_type_statement.cc
@@ -66,7 +66,7 @@ future<> alter_type_statement::check_access(const service::client_state& state)
    return state.has_keyspace_access(keyspace(), auth::permission::ALTER);
 }

-void alter_type_statement::validate(distributed<service::storage_proxy>& proxy, const service::client_state& state)
+void alter_type_statement::validate(service::storage_proxy& proxy, const service::client_state& state)
 {
    // Validation is left to announceMigration as it's easier to do it while constructing the updated type.
    // It doesn't really change anything anyway.
@@ -135,10 +135,10 @@ void alter_type_statement::do_announce_migration(database& db, ::keyspace& ks, b
    }
 }

-future<shared_ptr<cql_transport::event::schema_change>> alter_type_statement::announce_migration(distributed<service::storage_proxy>& proxy, bool is_local_only)
+future<shared_ptr<cql_transport::event::schema_change>> alter_type_statement::announce_migration(service::storage_proxy& proxy, bool is_local_only)
 {
    return seastar::async([this, &proxy, is_local_only] {
-        auto&& db = proxy.local().get_db().local();
+        auto&& db = proxy.get_db().local();
        try {
            auto&& ks = db.find_keyspace(keyspace());
            do_announce_migration(db, ks, is_local_only);
--- a/cql3/statements/alter_type_statement.hh
+++ b/cql3/statements/alter_type_statement.hh
@@ -59,11 +59,11 @@ public:

    virtual future<> check_access(const service::client_state& state) override;

-    virtual void validate(distributed<service::storage_proxy>& proxy, const service::client_state& state) override;
+    virtual void validate(service::storage_proxy& proxy, const service::client_state& state) override;

    virtual const sstring& keyspace() const override;

-    virtual future<shared_ptr<cql_transport::event::schema_change>> announce_migration(distributed<service::storage_proxy>& proxy, bool is_local_only) override;
+    virtual future<shared_ptr<cql_transport::event::schema_change>> announce_migration(service::storage_proxy& proxy, bool is_local_only) override;

    class add_or_alter;
    class renames;
--- a/cql3/statements/alter_view_statement.cc
+++ b/cql3/statements/alter_view_statement.cc
@@ -69,14 +69,14 @@ future<> alter_view_statement::check_access(const service::client_state& state)
    return make_ready_future<>();
 }

-void alter_view_statement::validate(distributed<service::storage_proxy>&, const service::client_state& state)
+void alter_view_statement::validate(service::storage_proxy&, const service::client_state& state)
 {
    // validated in announce_migration()
 }

-future<shared_ptr<cql_transport::event::schema_change>> alter_view_statement::announce_migration(distributed<service::storage_proxy>& proxy, bool is_local_only)
+future<shared_ptr<cql_transport::event::schema_change>> alter_view_statement::announce_migration(service::storage_proxy& proxy, bool is_local_only)
 {
-    auto&& db = proxy.local().get_db().local();
+    auto&& db = proxy.get_db().local();
    schema_ptr schema = validation::validate_column_family(db, keyspace(), column_family());
    if (!schema->is_view()) {
        throw exceptions::invalid_request_exception("Cannot use ALTER MATERIALIZED VIEW on Table");
@@ -86,10 +86,10 @@ future<shared_ptr<cql_transport::event::schema_change>> alter_view_statement::an
        throw exceptions::invalid_request_exception("ALTER MATERIALIZED VIEW WITH invoked, but no parameters found");
    }

-    _properties->validate(proxy.local().get_db().local().get_config().extensions());
+    _properties->validate(proxy.get_db().local().get_config().extensions());

    auto builder = schema_builder(schema);
-    _properties->apply_to_builder(builder, proxy.local().get_db().local().get_config().extensions());
+    _properties->apply_to_builder(builder, proxy.get_db().local().get_config().extensions());

    if (builder.get_gc_grace_seconds() == 0) {
        throw exceptions::invalid_request_exception(
--- a/cql3/statements/alter_view_statement.hh
+++ b/cql3/statements/alter_view_statement.hh
@@ -43,7 +43,7 @@

 #include <seastar/core/shared_ptr.hh>

-#include "database.hh"
+#include "database_fwd.hh"
 #include "cql3/statements/cf_prop_defs.hh"
 #include "cql3/statements/schema_altering_statement.hh"
 #include "cql3/cf_name.hh"
@@ -61,9 +61,9 @@ public:

    virtual future<> check_access(const service::client_state& state) override;

-    virtual void validate(distributed<service::storage_proxy>&, const service::client_state& state) override;
+    virtual void validate(service::storage_proxy&, const service::client_state& state) override;

-    virtual future<shared_ptr<cql_transport::event::schema_change>> announce_migration(distributed<service::storage_proxy>& proxy, bool is_local_only) override;
+    virtual future<shared_ptr<cql_transport::event::schema_change>> announce_migration(service::storage_proxy& proxy, bool is_local_only) override;

    virtual std::unique_ptr<prepared> prepare(database& db, cql_stats& stats) override;
 };
--- a/cql3/statements/authentication_statement.cc
+++ b/cql3/statements/authentication_statement.cc
@@ -67,17 +67,10 @@ bool cql3::statements::authentication_statement::depends_on_column_family(
 }

 void cql3::statements::authentication_statement::validate(
-                distributed<service::storage_proxy>&,
+                service::storage_proxy&,
                const service::client_state& state) {
 }

 future<> cql3::statements::authentication_statement::check_access(const service::client_state& state) {
    return make_ready_future<>();
 }
-
-future<::shared_ptr<cql_transport::messages::result_message>> cql3::statements::authentication_statement::execute_internal(
-                distributed<service::storage_proxy>& proxy,
-                service::query_state& state, const query_options& options) {
-    // Internal queries are exclusively on the system keyspace and makes no sense here
-    throw std::runtime_error("unsupported operation");
-}
--- a/cql3/statements/authentication_statement.hh
+++ b/cql3/statements/authentication_statement.hh
@@ -52,6 +52,8 @@ namespace statements {

 class authentication_statement : public raw::parsed_statement, public cql_statement_no_metadata, public ::enable_shared_from_this<authentication_statement> {
 public:
+    authentication_statement() : cql_statement_no_metadata(&timeout_config::other_timeout) {}
+
    uint32_t get_bound_terms() override;

    std::unique_ptr<prepared> prepare(database& db, cql_stats& stats) override;
@@ -64,10 +66,7 @@ public:

    future<> check_access(const service::client_state& state) override;

-    void validate(distributed<service::storage_proxy>&, const service::client_state& state) override;
-
-    future<::shared_ptr<cql_transport::messages::result_message>>
-    execute_internal(distributed<service::storage_proxy>& proxy, service::query_state& state, const query_options& options) override;
+    void validate(service::storage_proxy&, const service::client_state& state) override;
 };

 }
--- a/cql3/statements/authorization_statement.cc
+++ b/cql3/statements/authorization_statement.cc
@@ -67,7 +67,7 @@ bool cql3::statements::authorization_statement::depends_on_column_family(
 }

 void cql3::statements::authorization_statement::validate(
-                distributed<service::storage_proxy>&,
+                service::storage_proxy&,
                const service::client_state& state) {
 }

@@ -75,13 +75,6 @@ future<> cql3::statements::authorization_statement::check_access(const service::
    return make_ready_future<>();
 }

-future<::shared_ptr<cql_transport::messages::result_message>> cql3::statements::authorization_statement::execute_internal(
-                distributed<service::storage_proxy>& proxy,
-                service::query_state& state, const query_options& options) {
-    // Internal queries are exclusively on the system keyspace and makes no sense here
-    throw std::runtime_error("unsupported operation");
-}
-
 void cql3::statements::authorization_statement::maybe_correct_resource(auth::resource& resource, const service::client_state& state) {
    if (resource.kind() == auth::resource_kind::data) {
        const auto data_view = auth::data_resource_view(resource);
--- a/cql3/statements/authorization_statement.hh
+++ b/cql3/statements/authorization_statement.hh
@@ -56,6 +56,8 @@ namespace statements {

 class authorization_statement : public raw::parsed_statement, public cql_statement_no_metadata, public ::enable_shared_from_this<authorization_statement> {
 public:
+    authorization_statement() : cql_statement_no_metadata(&timeout_config::other_timeout) {}
+
    uint32_t get_bound_terms() override;

    std::unique_ptr<prepared> prepare(database& db, cql_stats& stats) override;
@@ -68,10 +70,7 @@ public:

    future<> check_access(const service::client_state& state) override;

-    void validate(distributed<service::storage_proxy>&, const service::client_state& state) override;
-
-    future<::shared_ptr<cql_transport::messages::result_message>>
-    execute_internal(distributed<service::storage_proxy>& proxy, service::query_state& state, const query_options& options) override;
+    void validate(service::storage_proxy&, const service::client_state& state) override;

 protected:
    static void maybe_correct_resource(auth::resource&, const service::client_state&);
--- a/Show More
+++ b/Show More