Merge "Backport empty partition range scan fixes" from Botond

" This mini-series lumps together the fix for the empty partition range scan crash (#3564) and the two follow-up patches. " * 'paging-fix-backport-2.2/v1' of https://github.com/denesb/scylla: query_pager: use query::is_single_partition() to check for singular range tests/cql_query_tess: add unit test for querying empty ranges test query_pager: be prepared to _ranges being empty
query_pager: use query::is_single_partition() to check for singular range
2018-07-05 10:29:31 +03:00 · 2018-07-04 12:57:45 +03:00 · 2018-07-04 09:52:54 +03:00 · 2018-07-04 09:52:54 +03:00 · 2018-07-01 22:40:42 +03:00 · 2018-06-28 18:55:15 +03:00
89 changed files with 1066 additions and 506 deletions
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,6 +1,6 @@
 [submodule "seastar"]
 	path = seastar
-	url = ../seastar
+	url = ../scylla-seastar
 	ignore = dirty
 [submodule "swagger-ui"]
 	path = swagger-ui
--- a/2
+++ b/2
@@ -1,6 +1,6 @@
 #!/bin/sh

-VERSION=666.development
+VERSION=2.2.0

 if test -f version
 then
--- a/auth/allow_all_authenticator.hh
+++ b/auth/allow_all_authenticator.hh
@@ -72,18 +72,22 @@ public:
        return make_ready_future<authenticated_user>(anonymous_user());
    }

-    virtual future<> create(stdx::string_view, const authentication_options& options) override {
+    virtual future<> create(stdx::string_view, const authentication_options& options) const override {
        return make_ready_future();
    }

-    virtual future<> alter(stdx::string_view, const authentication_options& options) override {
+    virtual future<> alter(stdx::string_view, const authentication_options& options) const override {
        return make_ready_future();
    }

-    virtual future<> drop(stdx::string_view) override {
+    virtual future<> drop(stdx::string_view) const override {
        return make_ready_future();
    }

+    virtual future<custom_options> query_custom_options(stdx::string_view role_name) const override {
+        return make_ready_future<custom_options>();
+    }
+
    virtual const resource_set& protected_resources() const override {
        static const resource_set resources;
        return resources;
--- a/auth/allow_all_authorizer.hh
+++ b/auth/allow_all_authorizer.hh
@@ -58,24 +58,30 @@ public:
        return make_ready_future<permission_set>(permissions::ALL);
    }

-    virtual future<> grant(stdx::string_view, permission_set, const resource&) override {
-        throw exceptions::invalid_request_exception("GRANT operation is not supported by AllowAllAuthorizer");
+    virtual future<> grant(stdx::string_view, permission_set, const resource&) const override {
+        return make_exception_future<>(
+                unsupported_authorization_operation("GRANT operation is not supported by AllowAllAuthorizer"));
    }

-    virtual future<> revoke(stdx::string_view, permission_set, const resource&) override {
-        throw exceptions::invalid_request_exception("REVOKE operation is not supported by AllowAllAuthorizer");
+    virtual future<> revoke(stdx::string_view, permission_set, const resource&) const override {
+        return make_exception_future<>(
+                unsupported_authorization_operation("REVOKE operation is not supported by AllowAllAuthorizer"));
    }

    virtual future<std::vector<permission_details>> list_all() const override {
-        throw exceptions::invalid_request_exception("LIST PERMISSIONS operation is not supported by AllowAllAuthorizer");
+        return make_exception_future<std::vector<permission_details>>(
+                unsupported_authorization_operation(
+                        "LIST PERMISSIONS operation is not supported by AllowAllAuthorizer"));
    }

-    virtual future<> revoke_all(stdx::string_view) override {
-        return make_ready_future();
+    virtual future<> revoke_all(stdx::string_view) const override {
+        return make_exception_future(
+                unsupported_authorization_operation("REVOKE operation is not supported by AllowAllAuthorizer"));
    }

-    virtual future<> revoke_all(const resource&) override {
-        return make_ready_future();
+    virtual future<> revoke_all(const resource&) const override {
+        return make_exception_future(
+                unsupported_authorization_operation("REVOKE operation is not supported by AllowAllAuthorizer"));
    }

    virtual const resource_set& protected_resources() const override {
--- a/auth/authentication_options.hh
+++ b/auth/authentication_options.hh
@@ -43,9 +43,11 @@ std::ostream& operator<<(std::ostream&, authentication_option);

 using authentication_option_set = std::unordered_set<authentication_option>;

+using custom_options = std::unordered_map<sstring, sstring>;
+
 struct authentication_options final {
    std::optional<sstring> password;
-    std::optional<std::unordered_map<sstring, sstring>> options;
+    std::optional<custom_options> options;
 };

 inline bool any_authentication_options(const authentication_options& aos) noexcept {
--- a/auth/authenticator.hh
+++ b/auth/authenticator.hh
@@ -69,7 +69,9 @@ namespace auth {
 class authenticated_user;

 ///
-/// Abstract interface for authenticating users.
+/// Abstract client for authenticating role identity.
+///
+/// All state necessary to authorize a role is stored externally to the client instance.
 ///
 class authenticator {
 public:
@@ -120,7 +122,7 @@ public:
    ///
    /// The options provided must be a subset of `supported_options()`.
    ///
-    virtual future<> create(stdx::string_view role_name, const authentication_options& options) = 0;
+    virtual future<> create(stdx::string_view role_name, const authentication_options& options) const = 0;

    ///
    /// Alter the authentication record of an existing user.
@@ -129,12 +131,19 @@ public:
    ///
    /// Callers must ensure that the specification of `alterable_options()` is adhered to.
    ///
-    virtual future<> alter(stdx::string_view role_name, const authentication_options& options) = 0;
+    virtual future<> alter(stdx::string_view role_name, const authentication_options& options) const = 0;

    ///
    /// Delete the authentication record for a user. This will disallow the user from logging in.
    ///
-    virtual future<> drop(stdx::string_view role_name) = 0;
+    virtual future<> drop(stdx::string_view role_name) const = 0;
+
+    ///
+    /// Query for custom options (those corresponding to \ref authentication_options::options).
+    ///
+    /// If no options are set the result is an empty container.
+    ///
+    virtual future<custom_options> query_custom_options(stdx::string_view role_name) const = 0;

    ///
    /// System resources used internally as part of the implementation. These are made inaccessible to users.
--- a/auth/authorizer.hh
+++ b/auth/authorizer.hh
@@ -44,6 +44,7 @@
 #include <experimental/string_view>
 #include <functional>
 #include <optional>
+#include <stdexcept>
 #include <tuple>
 #include <vector>

@@ -79,8 +80,15 @@ inline bool operator<(const permission_details& pd1, const permission_details& p
            < std::forward_as_tuple(pd2.role_name, pd2.resource, pd2.permissions);
 }

+class unsupported_authorization_operation : public std::invalid_argument {
+public:
+    using std::invalid_argument::invalid_argument;
+};
+
 ///
-/// Abstract interface for authorizing users to access resources.
+/// Abstract client for authorizing roles to access resources.
+///
+/// All state necessary to authorize a role is stored externally to the client instance.
 ///
 class authorizer {
 public:
@@ -107,27 +115,37 @@ public:
    ///
    /// Grant a set of permissions to a role for a particular \ref resource.
    ///
-    virtual future<> grant(stdx::string_view role_name, permission_set, const resource&) = 0;
+    /// \throws \ref unsupported_authorization_operation if granting permissions is not supported.
+    ///
+    virtual future<> grant(stdx::string_view role_name, permission_set, const resource&) const = 0;

    ///
    /// Revoke a set of permissions from a role for a particular \ref resource.
    ///
-    virtual future<> revoke(stdx::string_view role_name, permission_set, const resource&) = 0;
+    /// \throws \ref unsupported_authorization_operation if revoking permissions is not supported.
+    ///
+    virtual future<> revoke(stdx::string_view role_name, permission_set, const resource&) const = 0;

    ///
    /// Query for all directly granted permissions.
    ///
+    /// \throws \ref unsupported_authorization_operation if listing permissions is not supported.
+    ///
    virtual future<std::vector<permission_details>> list_all() const = 0;

    ///
    /// Revoke all permissions granted directly to a particular role.
    ///
-    virtual future<> revoke_all(stdx::string_view role_name) = 0;
+    /// \throws \ref unsupported_authorization_operation if revoking permissions is not supported.
+    ///
+    virtual future<> revoke_all(stdx::string_view role_name) const = 0;

    ///
    /// Revoke all permissions granted to any role for a particular resource.
    ///
-    virtual future<> revoke_all(const resource&) = 0;
+    /// \throws \ref unsupported_authorization_operation if revoking permissions is not supported.
+    ///
+    virtual future<> revoke_all(const resource&) const = 0;

    ///
    /// System resources used internally as part of the implementation. These are made inaccessible to users.
--- a/auth/common.cc
+++ b/auth/common.cc
@@ -25,6 +25,7 @@

 #include "cql3/query_processor.hh"
 #include "cql3/statements/create_table_statement.hh"
+#include "database.hh"
 #include "schema_builder.hh"
 #include "service/migration_manager.hh"

@@ -48,7 +49,7 @@ future<> do_after_system_ready(seastar::abort_source& as, seastar::noncopyable_f
        return exponential_backoff_retry::do_until_value(1s, 1min, as, [func = std::move(func)] {
            return func().then_wrapped([] (auto&& f) -> stdx::optional<empty_state> {
                if (f.failed()) {
-                    auth_log.warn("Auth task failed with error, rescheduling: {}", f.get_exception());
+                    auth_log.info("Auth task failed with error, rescheduling: {}", f.get_exception());
                    return { };
                }
                return { empty_state() };
@@ -58,13 +59,13 @@ future<> do_after_system_ready(seastar::abort_source& as, seastar::noncopyable_f
 }

 future<> create_metadata_table_if_missing(
-        const sstring& table_name,
+        stdx::string_view table_name,
        cql3::query_processor& qp,
-        const sstring& cql,
+        stdx::string_view cql,
        ::service::migration_manager& mm) {
    auto& db = qp.db().local();

-    if (db.has_schema(meta::AUTH_KS, table_name)) {
+    if (db.has_schema(meta::AUTH_KS, sstring(table_name))) {
        return make_ready_future<>();
    }

@@ -85,4 +86,12 @@ future<> create_metadata_table_if_missing(
    return mm.announce_new_column_family(b.build(), false);
 }

+future<> wait_for_schema_agreement(::service::migration_manager& mm, const database& db) {
+    static const auto pause = [] { return sleep(std::chrono::milliseconds(500)); };
+
+    return do_until([&db] { return db.get_version() != database::empty_version; }, pause).then([&mm] {
+        return do_until([&mm] { return mm.have_schema_agreement(); }, pause);
+    });
+}
+
 }
--- a/auth/common.hh
+++ b/auth/common.hh
@@ -22,6 +22,7 @@
 #pragma once

 #include <chrono>
+#include <experimental/string_view>

 #include <seastar/core/future.hh>
 #include <seastar/core/abort_source.hh>
@@ -36,6 +37,8 @@

 using namespace std::chrono_literals;

+class database;
+
 namespace service {
 class migration_manager;
 }
@@ -65,16 +68,18 @@ future<> once_among_shards(Task&& f) {
 }

 inline future<> delay_until_system_ready(seastar::abort_source& as) {
-    return sleep_abortable(10s, as);
+    return sleep_abortable(15s, as);
 }

 // Func must support being invoked more than once.
 future<> do_after_system_ready(seastar::abort_source& as, seastar::noncopyable_function<future<>()> func);

 future<> create_metadata_table_if_missing(
-        const sstring& table_name,
+        stdx::string_view table_name,
        cql3::query_processor&,
-        const sstring& cql,
+        stdx::string_view cql,
        ::service::migration_manager&);

+future<> wait_for_schema_agreement(::service::migration_manager&, const database&);
+
 }
--- a/auth/default_authorizer.cc
+++ b/auth/default_authorizer.cc
@@ -109,7 +109,7 @@ future<bool> default_authorizer::any_granted() const {
    });
 }

-future<> default_authorizer::migrate_legacy_metadata() {
+future<> default_authorizer::migrate_legacy_metadata() const {
    alogger.info("Starting migration of legacy permissions metadata.");
    static const sstring query = sprint("SELECT * FROM %s.%s", meta::AUTH_KS, legacy_table_name);

@@ -157,18 +157,18 @@ future<> default_authorizer::start() {
                create_table,
                _migration_manager).then([this] {
            _finished = do_after_system_ready(_as, [this] {
-                if (legacy_metadata_exists()) {
-                   return any_granted().then([this](bool any) {
-                       if (!any) {
-                           return migrate_legacy_metadata();
-                       }
+                return async([this] {
+                    wait_for_schema_agreement(_migration_manager, _qp.db().local()).get0();

-                       alogger.warn("Ignoring legacy permissions metadata since role permissions exist.");
-                       return make_ready_future<>();
-                   });
-               }
+                    if (legacy_metadata_exists()) {
+                        if (!any_granted().get0()) {
+                            migrate_legacy_metadata().get0();
+                            return;
+                        }

-               return make_ready_future<>();
+                        alogger.warn("Ignoring legacy permissions metadata since role permissions exist.");
+                    }
+                });
            });
        });
    });
@@ -210,7 +210,7 @@ default_authorizer::modify(
        stdx::string_view role_name,
        permission_set set,
        const resource& resource,
-        stdx::string_view op) {
+        stdx::string_view op) const {
    return do_with(
            sprint(
                    "UPDATE %s.%s SET %s = %s %s ? WHERE %s = ? AND %s = ?",
@@ -230,11 +230,11 @@ default_authorizer::modify(
 }


-future<> default_authorizer::grant(stdx::string_view role_name, permission_set set, const resource& resource) {
+future<> default_authorizer::grant(stdx::string_view role_name, permission_set set, const resource& resource) const {
    return modify(role_name, std::move(set), resource, "+");
 }

-future<> default_authorizer::revoke(stdx::string_view role_name, permission_set set, const resource& resource) {
+future<> default_authorizer::revoke(stdx::string_view role_name, permission_set set, const resource& resource) const {
    return modify(role_name, std::move(set), resource, "-");
 }

@@ -267,7 +267,7 @@ future<std::vector<permission_details>> default_authorizer::list_all() const {
    });
 }

-future<> default_authorizer::revoke_all(stdx::string_view role_name) {
+future<> default_authorizer::revoke_all(stdx::string_view role_name) const {
    static const sstring query = sprint(
            "DELETE FROM %s.%s WHERE %s = ?",
            meta::AUTH_KS,
@@ -286,7 +286,7 @@ future<> default_authorizer::revoke_all(stdx::string_view role_name) {
    });
 }

-future<> default_authorizer::revoke_all(const resource& resource) {
+future<> default_authorizer::revoke_all(const resource& resource) const {
    static const sstring query = sprint(
            "SELECT %s FROM %s.%s WHERE %s = ? ALLOW FILTERING",
            ROLE_NAME,
--- a/auth/default_authorizer.hh
+++ b/auth/default_authorizer.hh
@@ -77,15 +77,15 @@ public:

    virtual future<permission_set> authorize(const role_or_anonymous&, const resource&) const override;

-    virtual future<> grant(stdx::string_view, permission_set, const resource&) override;
+    virtual future<> grant(stdx::string_view, permission_set, const resource&) const override;

-    virtual future<> revoke( stdx::string_view, permission_set, const resource&) override;
+    virtual future<> revoke( stdx::string_view, permission_set, const resource&) const override;

    virtual future<std::vector<permission_details>> list_all() const override;

-    virtual future<> revoke_all(stdx::string_view) override;
+    virtual future<> revoke_all(stdx::string_view) const override;

-    virtual future<> revoke_all(const resource&) override;
+    virtual future<> revoke_all(const resource&) const override;

    virtual const resource_set& protected_resources() const override;

@@ -94,9 +94,9 @@ private:

    future<bool> any_granted() const;

-    future<> migrate_legacy_metadata();
+    future<> migrate_legacy_metadata() const;

-    future<> modify(stdx::string_view, permission_set, const resource&, stdx::string_view);
+    future<> modify(stdx::string_view, permission_set, const resource&, stdx::string_view) const;
 };

 } /* namespace auth */
--- a/auth/password_authenticator.cc
+++ b/auth/password_authenticator.cc
@@ -177,7 +177,7 @@ bool password_authenticator::legacy_metadata_exists() const {
    return _qp.db().local().has_schema(meta::AUTH_KS, legacy_table_name);
 }

-future<> password_authenticator::migrate_legacy_metadata() {
+future<> password_authenticator::migrate_legacy_metadata() const {
    plogger.info("Starting migration of legacy authentication metadata.");
    static const sstring query = sprint("SELECT * FROM %s.%s", meta::AUTH_KS, legacy_table_name);

@@ -201,7 +201,7 @@ future<> password_authenticator::migrate_legacy_metadata() {
    });
 }

-future<> password_authenticator::create_default_if_missing() {
+future<> password_authenticator::create_default_if_missing() const {
    return default_role_row_satisfies(_qp, &has_salted_hash).then([this](bool exists) {
        if (!exists) {
            return _qp.process(
@@ -220,8 +220,16 @@ future<> password_authenticator::start() {
     return once_among_shards([this] {
         gensalt(); // do this once to determine usable hashing

+         auto f = create_metadata_table_if_missing(
+                 meta::roles_table::name,
+                 _qp,
+                 meta::roles_table::creation_query(),
+                 _migration_manager);
+
         _stopped = do_after_system_ready(_as, [this] {
             return async([this] {
+                 wait_for_schema_agreement(_migration_manager, _qp.db().local()).get0();
+
                 if (any_nondefault_role_row_satisfies(_qp, &has_salted_hash).get0()) {
                     if (legacy_metadata_exists()) {
                         plogger.warn("Ignoring legacy authentication metadata since nondefault data already exist.");
@@ -239,7 +247,7 @@ future<> password_authenticator::start() {
             });
         });

-         return make_ready_future<>();
+         return f;
     });
 }

@@ -317,7 +325,7 @@ future<authenticated_user> password_authenticator::authenticate(
    });
 }

-future<> password_authenticator::create(stdx::string_view role_name, const authentication_options& options) {
+future<> password_authenticator::create(stdx::string_view role_name, const authentication_options& options) const {
    if (!options.password) {
        return make_ready_future<>();
    }
@@ -328,7 +336,7 @@ future<> password_authenticator::create(stdx::string_view role_name, const authe
            {hashpw(*options.password), sstring(role_name)}).discard_result();
 }

-future<> password_authenticator::alter(stdx::string_view role_name, const authentication_options& options) {
+future<> password_authenticator::alter(stdx::string_view role_name, const authentication_options& options) const {
    if (!options.password) {
        return make_ready_future<>();
    }
@@ -345,7 +353,7 @@ future<> password_authenticator::alter(stdx::string_view role_name, const authen
            {hashpw(*options.password), sstring(role_name)}).discard_result();
 }

-future<> password_authenticator::drop(stdx::string_view name) {
+future<> password_authenticator::drop(stdx::string_view name) const {
    static const sstring query = sprint(
            "DELETE %s FROM %s WHERE %s = ?",
            SALTED_HASH,
@@ -355,6 +363,10 @@ future<> password_authenticator::drop(stdx::string_view name) {
    return _qp.process(query, consistency_for_user(name), {sstring(name)}).discard_result();
 }

+future<custom_options> password_authenticator::query_custom_options(stdx::string_view role_name) const {
+    return make_ready_future<custom_options>();
+}
+
 const resource_set& password_authenticator::protected_resources() const {
    static const resource_set resources({make_data_resource(meta::AUTH_KS, meta::roles_table::name)});
    return resources;
--- a/auth/password_authenticator.hh
+++ b/auth/password_authenticator.hh
@@ -81,11 +81,13 @@ public:

    virtual future<authenticated_user> authenticate(const credentials_map& credentials) const override;

-    virtual future<> create(stdx::string_view role_name, const authentication_options& options) override;
+    virtual future<> create(stdx::string_view role_name, const authentication_options& options) const override;

-    virtual future<> alter(stdx::string_view role_name, const authentication_options& options) override;
+    virtual future<> alter(stdx::string_view role_name, const authentication_options& options) const override;

-    virtual future<> drop(stdx::string_view role_name) override;
+    virtual future<> drop(stdx::string_view role_name) const override;
+
+    virtual future<custom_options> query_custom_options(stdx::string_view role_name) const override;

    virtual const resource_set& protected_resources() const override;

@@ -94,9 +96,9 @@ public:
 private:
    bool legacy_metadata_exists() const;

-    future<> migrate_legacy_metadata();
+    future<> migrate_legacy_metadata() const;

-    future<> create_default_if_missing();
+    future<> create_default_if_missing() const;
 };

 }
--- a/auth/role_manager.hh
+++ b/auth/role_manager.hh
@@ -93,10 +93,12 @@ using role_set = std::unordered_set<sstring>;
 enum class recursive_role_query { yes, no };

 ///
-/// Abstract role manager.
+/// Abstract client for managing roles.
 ///
-/// All implementations should throw role-related exceptions as documented, but authorization-related checking is
-/// handled by the CQL layer, and not here.
+/// All state necessary for managing roles is stored externally to the client instance.
+///
+/// All implementations should throw role-related exceptions as documented. Authorization is not addressed here, and
+/// access-control should never be enforced in implementations.
 ///
 class role_manager {
 public:
@@ -113,17 +115,17 @@ public:
    ///
    /// \returns an exceptional future with \ref role_already_exists for a role that has previously been created.
    ///
-    virtual future<> create(stdx::string_view role_name, const role_config&) = 0;
+    virtual future<> create(stdx::string_view role_name, const role_config&) const = 0;

    ///
    /// \returns an exceptional future with \ref nonexistant_role if the role does not exist.
    ///
-    virtual future<> drop(stdx::string_view role_name) = 0;
+    virtual future<> drop(stdx::string_view role_name) const = 0;

    ///
    /// \returns an exceptional future with \ref nonexistant_role if the role does not exist.
    ///
-    virtual future<> alter(stdx::string_view role_name, const role_config_update&) = 0;
+    virtual future<> alter(stdx::string_view role_name, const role_config_update&) const = 0;

    ///
    /// Grant `role_name` to `grantee_name`.
@@ -133,7 +135,7 @@ public:
    /// \returns an exceptional future with \ref role_already_included if granting the role would be redundant, or
    /// create a cycle.
    ///
-    virtual future<> grant(stdx::string_view grantee_name, stdx::string_view role_name) = 0;
+    virtual future<> grant(stdx::string_view grantee_name, stdx::string_view role_name) const = 0;

    ///
    /// Revoke `role_name` from `revokee_name`.
@@ -142,7 +144,7 @@ public:
    ///
    /// \returns an exceptional future with \ref revoke_ungranted_role if the role was not granted.
    ///
-    virtual future<> revoke(stdx::string_view revokee_name, stdx::string_view role_name) = 0;
+    virtual future<> revoke(stdx::string_view revokee_name, stdx::string_view role_name) const = 0;

    ///
    /// \returns an exceptional future with \ref nonexistant_role if the role does not exist.
--- a/auth/roles-metadata.cc
+++ b/auth/roles-metadata.cc
@@ -36,6 +36,21 @@ namespace meta {

 namespace roles_table {

+stdx::string_view creation_query() {
+    static const sstring instance = sprint(
+            "CREATE TABLE %s ("
+            "  %s text PRIMARY KEY,"
+            "  can_login boolean,"
+            "  is_superuser boolean,"
+            "  member_of set<text>,"
+            "  salted_hash text"
+            ")",
+            qualified_name(),
+            role_col_name);
+
+    return instance;
+}
+
 stdx::string_view qualified_name() noexcept {
    static const sstring instance = AUTH_KS + "." + sstring(name);
    return instance;
--- a/auth/roles-metadata.hh
+++ b/auth/roles-metadata.hh
@@ -40,6 +40,8 @@ namespace meta {

 namespace roles_table {

+stdx::string_view creation_query();
+
 constexpr stdx::string_view name{"roles", 5};

 stdx::string_view qualified_name() noexcept;
--- a/auth/service.cc
+++ b/auth/service.cc
@@ -77,11 +77,18 @@ private:
    void on_update_view(const sstring& ks_name, const sstring& view_name, bool columns_changed) override {}

    void on_drop_keyspace(const sstring& ks_name) override {
-        _authorizer.revoke_all(auth::make_data_resource(ks_name));
+        _authorizer.revoke_all(
+                auth::make_data_resource(ks_name)).handle_exception_type([](const unsupported_authorization_operation&) {
+            // Nothing.
+        });
    }

    void on_drop_column_family(const sstring& ks_name, const sstring& cf_name) override {
-        _authorizer.revoke_all(auth::make_data_resource(ks_name, cf_name));
+        _authorizer.revoke_all(
+                auth::make_data_resource(
+                        ks_name, cf_name)).handle_exception_type([](const unsupported_authorization_operation&) {
+            // Nothing.
+        });
    }

    void on_drop_user_type(const sstring& ks_name, const sstring& type_name) override {}
@@ -177,9 +184,7 @@ future<> service::start() {
    return once_among_shards([this] {
        return create_keyspace_if_missing();
    }).then([this] {
-        return _role_manager->start();
-    }).then([this] {
-        return when_all_succeed(_authorizer->start(), _authenticator->start());
+        return when_all_succeed(_role_manager->start(), _authorizer->start(), _authenticator->start());
    }).then([this] {
        _permissions_cache = std::make_unique<permissions_cache>(_permissions_cache_config, *this, log);
    }).then([this] {
@@ -402,7 +407,7 @@ static void validate_authentication_options_are_supported(


 future<> create_role(
-        service& ser,
+        const service& ser,
        stdx::string_view name,
        const role_config& config,
        const authentication_options& options) {
@@ -415,7 +420,7 @@ future<> create_role(
                &validate_authentication_options_are_supported,
                options,
                ser.underlying_authenticator().supported_options()).then([&ser, name, &options] {
-            return ser.underlying_authenticator().create(sstring(name), options);
+            return ser.underlying_authenticator().create(name, options);
        }).handle_exception([&ser, &name](std::exception_ptr ep) {
            // Roll-back.
            return ser.underlying_role_manager().drop(name).then([ep = std::move(ep)] {
@@ -426,7 +431,7 @@ future<> create_role(
 }

 future<> alter_role(
-        service& ser,
+        const service& ser,
        stdx::string_view name,
        const role_config_update& config_update,
        const authentication_options& options) {
@@ -444,10 +449,15 @@ future<> alter_role(
    });
 }

-future<> drop_role(service& ser, stdx::string_view name) {
+future<> drop_role(const service& ser, stdx::string_view name) {
    return do_with(make_role_resource(name), [&ser, name](const resource& r) {
        auto& a = ser.underlying_authorizer();
-        return when_all_succeed(a.revoke_all(name), a.revoke_all(r));
+
+        return when_all_succeed(
+                a.revoke_all(name),
+                a.revoke_all(r)).handle_exception_type([](const unsupported_authorization_operation&) {
+            // Nothing.
+        });
    }).then([&ser, name] {
        return ser.underlying_authenticator().drop(name);
    }).then([&ser, name] {
@@ -471,7 +481,7 @@ future<bool> has_role(const service& ser, const authenticated_user& u, stdx::str
 }

 future<> grant_permissions(
-        service& ser,
+        const service& ser,
        stdx::string_view role_name,
        permission_set perms,
        const resource& r) {
@@ -480,8 +490,19 @@ future<> grant_permissions(
    });
 }

+future<> grant_applicable_permissions(const service& ser, stdx::string_view role_name, const resource& r) {
+    return grant_permissions(ser, role_name, r.applicable_permissions(), r);
+}
+future<> grant_applicable_permissions(const service& ser, const authenticated_user& u, const resource& r) {
+    if (is_anonymous(u)) {
+        return make_ready_future<>();
+    }
+
+    return grant_applicable_permissions(ser, *u.name, r);
+}
+
 future<> revoke_permissions(
-        service& ser,
+        const service& ser,
        stdx::string_view role_name,
        permission_set perms,
        const resource& r) {
--- a/auth/service.hh
+++ b/auth/service.hh
@@ -75,12 +75,14 @@ public:
 };

 ///
-/// Central interface into access-control for the system.
+/// Client for access-control in the system.
 ///
-/// Access control encompasses user/role management, authentication, and authorization. This class provides access to
+/// Access control encompasses user/role management, authentication, and authorization. This client provides access to
 /// the dynamically-loaded implementations of these modules (through the `underlying_*` member functions), but also
 /// builds on their functionality with caching and abstractions for common operations.
 ///
+/// All state associated with access-control is stored externally to any particular instance of this class.
+///
 class service final {
    permissions_cache_config _permissions_cache_config;
    std::unique_ptr<permissions_cache> _permissions_cache;
@@ -149,26 +151,14 @@ public:

    future<bool> exists(const resource&) const;

-    authenticator& underlying_authenticator() {
-        return *_authenticator;
-    }
-
    const authenticator& underlying_authenticator() const {
        return *_authenticator;
    }

-    authorizer& underlying_authorizer() {
-        return *_authorizer;
-    }
-
    const authorizer& underlying_authorizer() const {
        return *_authorizer;
    }

-    role_manager& underlying_role_manager() {
-        return *_role_manager;
-    }
-
    const role_manager& underlying_role_manager() const {
        return *_role_manager;
    }
@@ -206,7 +196,7 @@ bool is_protected(const service&, const resource&) noexcept;
 /// \returns an exceptional future with \ref unsupported_authentication_option if an unsupported option is included.
 ///
 future<> create_role(
-        service&,
+        const service&,
        stdx::string_view name,
        const role_config&,
        const authentication_options&);
@@ -219,7 +209,7 @@ future<> create_role(
 /// \returns an exceptional future with \ref unsupported_authentication_option if an unsupported option is included.
 ///
 future<> alter_role(
-        service&,
+        const service&,
        stdx::string_view name,
        const role_config_update&,
        const authentication_options&);
@@ -229,7 +219,7 @@ future<> alter_role(
 ///
 /// \returns an exceptional future with \ref nonexistant_role if the named role does not exist.
 ///
-future<> drop_role(service&, stdx::string_view name);
+future<> drop_role(const service&, stdx::string_view name);

 ///
 /// Check if `grantee` has been granted the named role.
@@ -247,17 +237,34 @@ future<bool> has_role(const service&, const authenticated_user&, stdx::string_vi
 ///
 /// \returns an exceptional future with \ref nonexistent_role if the named role does not exist.
 ///
+/// \returns an exceptional future with \ref unsupported_authorization_operation if granting permissions is not
+/// supported.
+///
 future<> grant_permissions(
-        service&,
+        const service&,
        stdx::string_view role_name,
        permission_set,
        const resource&);

+///
+/// Like \ref grant_permissions, but grants all applicable permissions on the resource.
 ///
 /// \returns an exceptional future with \ref nonexistent_role if the named role does not exist.
 ///
+/// \returns an exceptional future with \ref unsupported_authorization_operation if granting permissions is not
+/// supported.
+///
+future<> grant_applicable_permissions(const service&, stdx::string_view role_name, const resource&);
+future<> grant_applicable_permissions(const service&, const authenticated_user&, const resource&);
+
+///
+/// \returns an exceptional future with \ref nonexistent_role if the named role does not exist.
+///
+/// \returns an exceptional future with \ref unsupported_authorization_operation if revoking permissions is not
+/// supported.
+///
 future<> revoke_permissions(
-        service&,
+        const service&,
        stdx::string_view role_name,
        permission_set,
        const resource&);
@@ -277,6 +284,9 @@ using recursive_permissions = bool_class<struct recursive_permissions_tag>;
 /// \returns an exceptional future with \ref nonexistent_role if a role name is included which refers to a role that
 /// does not exist.
 ///
+/// \returns an exceptional future with \ref unsupported_authorization_operation if listing permissions is not
+/// supported.
+///
 future<std::vector<permission_details>> list_filtered_permissions(
        const service&,
        permission_set,
--- a/auth/standard_role_manager.cc
+++ b/auth/standard_role_manager.cc
@@ -118,6 +118,10 @@ static future<record> require_record(cql3::query_processor& qp, stdx::string_vie
   });
 }

+static bool has_can_login(const cql3::untyped_result_set_row& row) {
+    return row.has("can_login") && !(boolean_type->deserialize(row.get_blob("can_login")).is_null());
+}
+
 stdx::string_view standard_role_manager_name() noexcept {
    static const sstring instance = meta::AUTH_PACKAGE_NAME + "CassandraRoleManager";
    return instance;
@@ -135,18 +139,7 @@ const resource_set& standard_role_manager::protected_resources() const {
    return resources;
 }

-future<> standard_role_manager::create_metadata_tables_if_missing() {
-    static const sstring create_roles_query = sprint(
-            "CREATE TABLE %s ("
-            "  %s text PRIMARY KEY,"
-            "  can_login boolean,"
-            "  is_superuser boolean,"
-            "  member_of set<text>,"
-            "  salted_hash text"
-            ")",
-            meta::roles_table::qualified_name(),
-            meta::roles_table::role_col_name);
-
+future<> standard_role_manager::create_metadata_tables_if_missing() const {
    static const sstring create_role_members_query = sprint(
            "CREATE TABLE %s ("
            "  role text,"
@@ -158,19 +151,19 @@ future<> standard_role_manager::create_metadata_tables_if_missing() {

    return when_all_succeed(
            create_metadata_table_if_missing(
-                    sstring(meta::roles_table::name),
+                    meta::roles_table::name,
                    _qp,
-                    create_roles_query,
+                    meta::roles_table::creation_query(),
                    _migration_manager),
            create_metadata_table_if_missing(
-                    sstring(meta::role_members_table::name),
+                    meta::role_members_table::name,
                    _qp,
                    create_role_members_query,
                    _migration_manager));
 }

-future<> standard_role_manager::create_default_role_if_missing() {
-    return default_role_row_satisfies(_qp, [](auto&&) { return true; }).then([this](bool exists) {
+future<> standard_role_manager::create_default_role_if_missing() const {
+    return default_role_row_satisfies(_qp, &has_can_login).then([this](bool exists) {
        if (!exists) {
            static const sstring query = sprint(
                    "INSERT INTO %s (%s, is_superuser, can_login) VALUES (?, true, true)",
@@ -199,7 +192,7 @@ bool standard_role_manager::legacy_metadata_exists() const {
    return _qp.db().local().has_schema(meta::AUTH_KS, legacy_table_name);
 }

-future<> standard_role_manager::migrate_legacy_metadata() {
+future<> standard_role_manager::migrate_legacy_metadata() const {
    log.info("Starting migration of legacy user metadata.");
    static const sstring query = sprint("SELECT * FROM %s.%s", meta::AUTH_KS, legacy_table_name);

@@ -231,7 +224,9 @@ future<> standard_role_manager::start() {
        return this->create_metadata_tables_if_missing().then([this] {
            _stopped = auth::do_after_system_ready(_as, [this] {
                return seastar::async([this] {
-                    if (any_nondefault_role_row_satisfies(_qp, [](auto&&) { return true; }).get0()) {
+                    wait_for_schema_agreement(_migration_manager, _qp.db().local()).get0();
+
+                    if (any_nondefault_role_row_satisfies(_qp, &has_can_login).get0()) {
                        if (this->legacy_metadata_exists()) {
                            log.warn("Ignoring legacy user metadata since nondefault roles already exist.");
                        }
@@ -256,7 +251,7 @@ future<> standard_role_manager::stop() {
    return _stopped.handle_exception_type([] (const sleep_aborted&) { });
 }

-future<> standard_role_manager::create_or_replace(stdx::string_view role_name, const role_config& c) {
+future<> standard_role_manager::create_or_replace(stdx::string_view role_name, const role_config& c) const {
    static const sstring query = sprint(
            "INSERT INTO %s (%s, is_superuser, can_login) VALUES (?, ?, ?)",
            meta::roles_table::qualified_name(),
@@ -270,7 +265,7 @@ future<> standard_role_manager::create_or_replace(stdx::string_view role_name, c
 }

 future<>
-standard_role_manager::create(stdx::string_view role_name, const role_config& c) {
+standard_role_manager::create(stdx::string_view role_name, const role_config& c) const {
    return this->exists(role_name).then([this, role_name, &c](bool role_exists) {
        if (role_exists) {
            throw role_already_exists(role_name);
@@ -281,7 +276,7 @@ standard_role_manager::create(stdx::string_view role_name, const role_config& c)
 }

 future<>
-standard_role_manager::alter(stdx::string_view role_name, const role_config_update& u) {
+standard_role_manager::alter(stdx::string_view role_name, const role_config_update& u) const {
    static const auto build_column_assignments = [](const role_config_update& u) -> sstring {
        std::vector<sstring> assignments;

@@ -312,7 +307,7 @@ standard_role_manager::alter(stdx::string_view role_name, const role_config_upda
    });
 }

-future<> standard_role_manager::drop(stdx::string_view role_name) {
+future<> standard_role_manager::drop(stdx::string_view role_name) const {
    return this->exists(role_name).then([this, role_name](bool role_exists) {
        if (!role_exists) {
            throw nonexistant_role(role_name);
@@ -379,7 +374,7 @@ future<>
 standard_role_manager::modify_membership(
        stdx::string_view grantee_name,
        stdx::string_view role_name,
-        membership_change ch) {
+        membership_change ch) const {


    const auto modify_roles = [this, role_name, grantee_name, ch] {
@@ -421,7 +416,7 @@ standard_role_manager::modify_membership(
 }

 future<>
-standard_role_manager::grant(stdx::string_view grantee_name, stdx::string_view role_name) {
+standard_role_manager::grant(stdx::string_view grantee_name, stdx::string_view role_name) const {
    const auto check_redundant = [this, role_name, grantee_name] {
        return this->query_granted(
                grantee_name,
@@ -452,7 +447,7 @@ standard_role_manager::grant(stdx::string_view grantee_name, stdx::string_view r
 }

 future<>
-standard_role_manager::revoke(stdx::string_view revokee_name, stdx::string_view role_name) {
+standard_role_manager::revoke(stdx::string_view revokee_name, stdx::string_view role_name) const {
    return this->exists(role_name).then([this, revokee_name, role_name](bool role_exists) {
        if (!role_exists) {
            throw nonexistant_role(sstring(role_name));
--- a/auth/standard_role_manager.hh
+++ b/auth/standard_role_manager.hh
@@ -66,15 +66,15 @@ public:

    virtual future<> stop() override;

-    virtual future<> create(stdx::string_view role_name, const role_config&) override;
+    virtual future<> create(stdx::string_view role_name, const role_config&) const override;

-    virtual future<> drop(stdx::string_view role_name) override;
+    virtual future<> drop(stdx::string_view role_name) const override;

-    virtual future<> alter(stdx::string_view role_name, const role_config_update&) override;
+    virtual future<> alter(stdx::string_view role_name, const role_config_update&) const override;

-    virtual future<> grant(stdx::string_view grantee_name, stdx::string_view role_name) override;
+    virtual future<> grant(stdx::string_view grantee_name, stdx::string_view role_name) const override;

-    virtual future<> revoke(stdx::string_view revokee_name, stdx::string_view role_name) override;
+    virtual future<> revoke(stdx::string_view revokee_name, stdx::string_view role_name) const override;

    virtual future<role_set> query_granted(stdx::string_view grantee_name, recursive_role_query) const override;

@@ -89,17 +89,17 @@ public:
 private:
    enum class membership_change { add, remove };

-    future<> create_metadata_tables_if_missing();
+    future<> create_metadata_tables_if_missing() const;

    bool legacy_metadata_exists() const;

-    future<> migrate_legacy_metadata();
+    future<> migrate_legacy_metadata() const;

-    future<> create_default_role_if_missing();
+    future<> create_default_role_if_missing() const;

-    future<> create_or_replace(stdx::string_view role_name, const role_config&);
+    future<> create_or_replace(stdx::string_view role_name, const role_config&) const;

-    future<> modify_membership(stdx::string_view role_name, stdx::string_view grantee_name, membership_change);
+    future<> modify_membership(stdx::string_view role_name, stdx::string_view grantee_name, membership_change) const;
 };

 }
--- a/auth/transitional.cc
+++ b/auth/transitional.cc
@@ -118,18 +118,22 @@ public:
        });
    }

-    virtual future<> create(stdx::string_view role_name, const authentication_options& options) override {
+    virtual future<> create(stdx::string_view role_name, const authentication_options& options) const override {
        return _authenticator->create(role_name, options);
    }

-    virtual future<> alter(stdx::string_view role_name, const authentication_options& options) override {
+    virtual future<> alter(stdx::string_view role_name, const authentication_options& options) const override {
        return _authenticator->alter(role_name, options);
    }

-    virtual future<> drop(stdx::string_view role_name) override {
+    virtual future<> drop(stdx::string_view role_name) const override {
        return _authenticator->drop(role_name);
    }

+    virtual future<custom_options> query_custom_options(stdx::string_view role_name) const override {
+        return _authenticator->query_custom_options(role_name);
+    }
+
    virtual const resource_set& protected_resources() const override {
        return _authenticator->protected_resources();
    }
@@ -214,11 +218,11 @@ public:
        return make_ready_future<permission_set>(transitional_permissions);
    }

-    virtual future<> grant(stdx::string_view s, permission_set ps, const resource& r) override {
+    virtual future<> grant(stdx::string_view s, permission_set ps, const resource& r) const override {
        return _authorizer->grant(s, std::move(ps), r);
    }

-    virtual future<> revoke(stdx::string_view s, permission_set ps, const resource& r) override {
+    virtual future<> revoke(stdx::string_view s, permission_set ps, const resource& r) const override {
        return _authorizer->revoke(s, std::move(ps), r);
    }

@@ -226,11 +230,11 @@ public:
        return _authorizer->list_all();
    }

-    virtual future<> revoke_all(stdx::string_view s) override {
+    virtual future<> revoke_all(stdx::string_view s) const override {
        return _authorizer->revoke_all(s);
    }

-    virtual future<> revoke_all(const resource& r) override {
+    virtual future<> revoke_all(const resource& r) const override {
        return _authorizer->revoke_all(r);
    }

--- a/backlog_controller.hh
+++ b/backlog_controller.hh
@@ -47,6 +47,7 @@
 class backlog_controller {
 public:
    future<> shutdown() {
+        _update_timer.cancel();
        return std::move(_inflight_update);
    }
 protected:
@@ -126,7 +127,7 @@ public:

 class compaction_controller : public backlog_controller {
 public:
-    static constexpr unsigned normalization_factor = 10;
+    static constexpr unsigned normalization_factor = 30;
    compaction_controller(seastar::scheduling_group sg, const ::io_priority_class& iop, float static_shares) : backlog_controller(sg, iop, static_shares) {}
    compaction_controller(seastar::scheduling_group sg, const ::io_priority_class& iop, std::chrono::milliseconds interval, std::function<float()> current_backlog)
        : backlog_controller(sg, iop, std::move(interval),
--- a/clustering_ranges_walker.hh
+++ b/clustering_ranges_walker.hh
@@ -70,7 +70,7 @@ public:
    {
        if (!with_static_row) {
            if (_current == _end) {
-                _current_start = _current_end = position_in_partition_view::after_all_clustered_rows();
+                _current_start = position_in_partition_view::before_all_clustered_rows();
            } else {
                _current_start = position_in_partition_view::for_range_start(*_current);
                _current_end = position_in_partition_view::for_range_end(*_current);
--- a/cql3/statements/create_keyspace_statement.cc
+++ b/cql3/statements/create_keyspace_statement.cc
@@ -128,6 +128,17 @@ cql3::statements::create_keyspace_statement::prepare(database& db, cql_stats& st
    return std::make_unique<prepared_statement>(make_shared<create_keyspace_statement>(*this));
 }

+future<> cql3::statements::create_keyspace_statement::grant_permissions_to_creator(const service::client_state& cs) {
+    return do_with(auth::make_data_resource(keyspace()), [&cs](const auth::resource& r) {
+        return auth::grant_applicable_permissions(
+                *cs.get_auth_service(),
+                *cs.user(),
+                r).handle_exception_type([](const auth::unsupported_authorization_operation&) {
+            // Nothing.
+        });
+    });
+}
+
 }

 }
--- a/cql3/statements/create_keyspace_statement.hh
+++ b/cql3/statements/create_keyspace_statement.hh
@@ -84,6 +84,8 @@ public:
    virtual future<shared_ptr<cql_transport::event::schema_change>> announce_migration(distributed<service::storage_proxy>& proxy, bool is_local_only) override;

    virtual std::unique_ptr<prepared> prepare(database& db, cql_stats& stats) override;
+
+    virtual future<> grant_permissions_to_creator(const service::client_state&) override;
 };

 }
--- a/cql3/statements/create_role_statement.hh
+++ b/cql3/statements/create_role_statement.hh
@@ -70,6 +70,8 @@ public:
                , _if_not_exists(if_not_exists) {
    }

+    future<> grant_permissions_to_creator(const service::client_state&) const;
+
    void validate(distributed<service::storage_proxy>&, const service::client_state&) override;

    virtual future<> check_access(const service::client_state&) override;
--- a/cql3/statements/create_table_statement.cc
+++ b/cql3/statements/create_table_statement.cc
@@ -49,6 +49,8 @@
 #include "cql3/statements/create_table_statement.hh"
 #include "cql3/statements/prepared_statement.hh"

+#include "auth/resource.hh"
+#include "auth/service.hh"
 #include "schema_builder.hh"
 #include "service/storage_service.hh"

@@ -162,6 +164,16 @@ create_table_statement::prepare(database& db, cql_stats& stats) {
    abort();
 }

+future<> create_table_statement::grant_permissions_to_creator(const service::client_state& cs) {
+    return do_with(auth::make_data_resource(keyspace(), column_family()), [&cs](const auth::resource& r) {
+        return auth::grant_applicable_permissions(
+                *cs.get_auth_service(),
+                *cs.user(),
+                r).handle_exception_type([](const auth::unsupported_authorization_operation&) {
+            // Nothing.
+        });
+    });
+}

 create_table_statement::raw_statement::raw_statement(::shared_ptr<cf_name> name, bool if_not_exists)
    : cf_statement{std::move(name)}
--- a/cql3/statements/create_table_statement.hh
+++ b/cql3/statements/create_table_statement.hh
@@ -106,6 +106,8 @@ public:

    virtual std::unique_ptr<prepared> prepare(database& db, cql_stats& stats) override;

+    virtual future<> grant_permissions_to_creator(const service::client_state&) override;
+
    schema_ptr get_cf_meta_data(const database&);

    class raw_statement;
--- a/cql3/statements/grant_statement.cc
+++ b/cql3/statements/grant_statement.cc
@@ -51,5 +51,8 @@ cql3::statements::grant_statement::execute(distributed<service::storage_proxy>&
    }).handle_exception_type([](const auth::nonexistant_role& e) {
        return make_exception_future<::shared_ptr<cql_transport::messages::result_message>>(
                exceptions::invalid_request_exception(e.what()));
+    }).handle_exception_type([](const auth::unsupported_authorization_operation& e) {
+        return make_exception_future<::shared_ptr<cql_transport::messages::result_message>>(
+                exceptions::invalid_request_exception(e.what()));
    });
 }
--- a/cql3/statements/list_permissions_statement.cc
+++ b/cql3/statements/list_permissions_statement.cc
@@ -171,6 +171,9 @@ cql3::statements::list_permissions_statement::execute(
        }).handle_exception_type([](const auth::nonexistant_role& e) {
            return make_exception_future<::shared_ptr<cql_transport::messages::result_message>>(
                    exceptions::invalid_request_exception(e.what()));
+        }).handle_exception_type([](const auth::unsupported_authorization_operation& e) {
+            return make_exception_future<::shared_ptr<cql_transport::messages::result_message>>(
+                    exceptions::invalid_request_exception(e.what()));
        });
    });
 }
--- a/cql3/statements/list_users_statement.cc
+++ b/cql3/statements/list_users_statement.cc
@@ -70,7 +70,7 @@ cql3::statements::list_users_statement::execute(distributed<service::storage_pro
                make_column_spec("name", utf8_type),
                make_column_spec("super", boolean_type)});

-    static const auto make_results = [](auth::service& as, std::unordered_set<sstring>&& roles) {
+    static const auto make_results = [](const auth::service& as, std::unordered_set<sstring>&& roles) {
        using cql_transport::messages::result_message;

        auto results = std::make_unique<result_set>(metadata);
@@ -98,8 +98,8 @@ cql3::statements::list_users_statement::execute(distributed<service::storage_pro
        });
    };

-    auto& cs = state.get_client_state();
-    auto& as = *cs.get_auth_service();
+    const auto& cs = state.get_client_state();
+    const auto& as = *cs.get_auth_service();
    const auto user = cs.user();

    return auth::has_superuser(as, *user).then([&cs, &as, user](bool has_superuser) {
--- a/cql3/statements/revoke_statement.cc
+++ b/cql3/statements/revoke_statement.cc
@@ -51,5 +51,8 @@ cql3::statements::revoke_statement::execute(distributed<service::storage_proxy>&
    }).handle_exception_type([](const auth::nonexistant_role& e) {
        return make_exception_future<::shared_ptr<cql_transport::messages::result_message>>(
                exceptions::invalid_request_exception(e.what()));
+    }).handle_exception_type([](const auth::unsupported_authorization_operation& e) {
+        return make_exception_future<::shared_ptr<cql_transport::messages::result_message>>(
+                exceptions::invalid_request_exception(e.what()));
    });
 }
--- a/cql3/statements/role-management-statements.cc
+++ b/cql3/statements/role-management-statements.cc
@@ -93,6 +93,17 @@ void validate_cluster_support() {
 // `create_role_statement`
 //

+future<> create_role_statement::grant_permissions_to_creator(const service::client_state& cs) const {
+    return do_with(auth::make_role_resource(_role), [&cs](const auth::resource& r) {
+        return auth::grant_applicable_permissions(
+                *cs.get_auth_service(),
+                *cs.user(),
+                r).handle_exception_type([](const auth::unsupported_authorization_operation&) {
+            // Nothing.
+        });
+    });
+}
+
 void create_role_statement::validate(distributed<service::storage_proxy>&, const service::client_state&) {
    validate_cluster_support();
 }
@@ -123,9 +134,12 @@ create_role_statement::execute(distributed<service::storage_proxy>&,
            std::move(config),
            extract_authentication_options(_options),
            [this, &state](const auth::role_config& config, const auth::authentication_options& authen_options) {
-        auto& as = *state.get_client_state().get_auth_service();
+        const auto& cs = state.get_client_state();
+        auto& as = *cs.get_auth_service();

-        return auth::create_role(as, _role, config, authen_options).then([] {
+        return auth::create_role(as, _role, config, authen_options).then([this, &cs] {
+            return grant_permissions_to_creator(cs);
+        }).then([] {
            return void_result_message();
        }).handle_exception_type([this](const auth::role_already_exists& e) {
            if (!_if_not_exists) {
@@ -300,8 +314,6 @@ future<> list_roles_statement::check_access(const service::client_state& state)

 future<result_message_ptr>
 list_roles_statement::execute(distributed<service::storage_proxy>&, service::query_state& state, const query_options&) {
-    unimplemented::warn(unimplemented::cause::ROLES);
-
    static const sstring virtual_table_name("roles");

    static const auto make_column_spec = [](const sstring& name, const ::shared_ptr<const abstract_type>& ty) {
@@ -312,14 +324,19 @@ list_roles_statement::execute(distributed<service::storage_proxy>&, service::que
                ty);
    };

+    static const thread_local auto custom_options_type = map_type_impl::get_instance(utf8_type, utf8_type, true);
+
    static const thread_local auto metadata = ::make_shared<cql3::metadata>(
            std::vector<::shared_ptr<column_specification>>{
                    make_column_spec("role", utf8_type),
                    make_column_spec("super", boolean_type),
-                    make_column_spec("login", boolean_type)});
+                    make_column_spec("login", boolean_type),
+                    make_column_spec("options", custom_options_type)});

-    static const auto make_results = [](auth::role_manager& rm, auth::role_set&& roles)
-            -> future<result_message_ptr> {
+    static const auto make_results = [](
+            const auth::role_manager& rm,
+            const auth::authenticator& a,
+            auth::role_set&& roles) -> future<result_message_ptr> {
        auto results = std::make_unique<result_set>(metadata);

        if (roles.empty()) {
@@ -333,14 +350,26 @@ list_roles_statement::execute(distributed<service::storage_proxy>&, service::que
        return do_with(
                std::move(sorted_roles),
                std::move(results),
-                [&rm](const std::vector<sstring>& sorted_roles, std::unique_ptr<result_set>& results) {
-            return do_for_each(sorted_roles, [&results, &rm](const sstring& role) {
+                [&rm, &a](const std::vector<sstring>& sorted_roles, std::unique_ptr<result_set>& results) {
+            return do_for_each(sorted_roles, [&results, &rm, &a](const sstring& role) {
                return when_all_succeed(
                        rm.can_login(role),
-                        rm.is_superuser(role)).then([&results, &role](bool login, bool super) {
+                        rm.is_superuser(role),
+                        a.query_custom_options(role)).then([&results, &role](
+                               bool login,
+                               bool super,
+                               auth::custom_options os) {
                    results->add_column_value(utf8_type->decompose(role));
                    results->add_column_value(boolean_type->decompose(super));
                    results->add_column_value(boolean_type->decompose(login));
+
+                    results->add_column_value(
+                            custom_options_type->decompose(
+                                    make_map_value(
+                                            custom_options_type,
+                                            map_type_impl::native_type(
+                                                    std::make_move_iterator(os.begin()),
+                                                    std::make_move_iterator(os.end())))));
                });
            }).then([&results] {
                return make_ready_future<result_message_ptr>(::make_shared<result_message::rows>(std::move(results)));
@@ -348,12 +377,13 @@ list_roles_statement::execute(distributed<service::storage_proxy>&, service::que
        });
    };

-    auto& cs = state.get_client_state();
-    auto& as = *cs.get_auth_service();
+    const auto& cs = state.get_client_state();
+    const auto& as = *cs.get_auth_service();
    const auto user = cs.user();

    return auth::has_superuser(as, *user).then([this, &state, &cs, &as, user](bool super) {
-        auto& rm = as.underlying_role_manager();
+        const auto& rm = as.underlying_role_manager();
+        const auto& a = as.underlying_authenticator();
        const auto query_mode = _recursive ? auth::recursive_role_query::yes : auth::recursive_role_query::no;

        if (!_grantee) {
@@ -361,19 +391,21 @@ list_roles_statement::execute(distributed<service::storage_proxy>&, service::que
            // only the roles granted to them.
            return cs.check_has_permission(
                    auth::permission::DESCRIBE,
-                    auth::root_role_resource()).then([&cs, &rm, user, query_mode](bool has_describe) {
+                    auth::root_role_resource()).then([&cs, &rm, &a, user, query_mode](bool has_describe) {
                if (has_describe) {
-                    return rm.query_all().then([&rm](auto&& roles) { return make_results(rm, std::move(roles)); });
+                    return rm.query_all().then([&rm, &a](auto&& roles) {
+                        return make_results(rm, a, std::move(roles));
+                    });
                }

-                return rm.query_granted(*user->name, query_mode).then([&rm](auth::role_set roles) {
-                    return make_results(rm, std::move(roles));
+                return rm.query_granted(*user->name, query_mode).then([&rm, &a](auth::role_set roles) {
+                    return make_results(rm, a, std::move(roles));
                });
            });
        }

-        return rm.query_granted(*_grantee, query_mode).then([&rm](auth::role_set roles) {
-            return make_results(rm, std::move(roles));
+        return rm.query_granted(*_grantee, query_mode).then([&rm, &a](auth::role_set roles) {
+            return make_results(rm, a, std::move(roles));
        });
    }).handle_exception_type([](const auth::nonexistant_role& e) {
        return make_exception_future<result_message_ptr>(exceptions::invalid_request_exception(e.what()));
@@ -394,8 +426,6 @@ future<> grant_role_statement::check_access(const service::client_state& state)

 future<result_message_ptr>
 grant_role_statement::execute(distributed<service::storage_proxy>&, service::query_state& state, const query_options&) {
-    unimplemented::warn(unimplemented::cause::ROLES);
-
    auto& as = *state.get_client_state().get_auth_service();

    return as.underlying_role_manager().grant(_grantee, _role).then([] {
@@ -421,8 +451,6 @@ future<result_message_ptr> revoke_role_statement::execute(
        distributed<service::storage_proxy>&,
        service::query_state& state,
        const query_options&) {
-    unimplemented::warn(unimplemented::cause::ROLES);
-
    auto& rm = state.get_client_state().get_auth_service()->underlying_role_manager();

    return rm.revoke(_revokee, _role).then([] {
--- a/cql3/statements/schema_altering_statement.cc
+++ b/cql3/statements/schema_altering_statement.cc
@@ -59,6 +59,10 @@ schema_altering_statement::schema_altering_statement(::shared_ptr<cf_name> name)
 {
 }

+future<> schema_altering_statement::grant_permissions_to_creator(const service::client_state&) {
+    return make_ready_future<>();
+}
+
 bool schema_altering_statement::uses_function(const sstring& ks_name, const sstring& function_name) const
 {
    return cf_statement::uses_function(ks_name, function_name);
@@ -103,7 +107,11 @@ schema_altering_statement::execute0(distributed<service::storage_proxy>& proxy,

 future<::shared_ptr<messages::result_message>>
 schema_altering_statement::execute(distributed<service::storage_proxy>& proxy, service::query_state& state, const query_options& options) {
-    return execute0(proxy, state, options, false);
+    return execute0(proxy, state, options, false).then([this, &state](::shared_ptr<messages::result_message> result) {
+        return grant_permissions_to_creator(state.get_client_state()).then([result = std::move(result)] {
+           return result;
+        });
+    });
 }

 future<::shared_ptr<messages::result_message>>
--- a/cql3/statements/schema_altering_statement.hh
+++ b/cql3/statements/schema_altering_statement.hh
@@ -71,6 +71,14 @@ protected:

    schema_altering_statement(::shared_ptr<cf_name> name);

+    /**
+     * When a new database object (keyspace, table) is created, the creator needs to be granted all applicable
+     * permissions on it.
+     *
+     * By default, this function does nothing.
+     */
+    virtual future<> grant_permissions_to_creator(const service::client_state&);
+
    virtual bool uses_function(const sstring& ks_name, const sstring& function_name) const override;

    virtual bool depends_on_keyspace(const sstring& ks_name) const override;
--- a/database.cc
+++ b/database.cc
@@ -361,9 +361,13 @@ filter_sstable_for_reader(std::vector<sstables::shared_sstable>&& sstables, colu
    };
    sstables.erase(boost::remove_if(sstables, sstable_has_not_key), sstables.end());

+    // FIXME: Workaround for https://github.com/scylladb/scylla/issues/3552
+    // and https://github.com/scylladb/scylla/issues/3553
+    const bool filtering_broken = true;
+
    // no clustering filtering is applied if schema defines no clustering key or
    // compaction strategy thinks it will not benefit from such an optimization.
-    if (!schema->clustering_key_size() || !cf.get_compaction_strategy().use_clustering_key_filter()) {
+    if (filtering_broken || !schema->clustering_key_size() || !cf.get_compaction_strategy().use_clustering_key_filter()) {
         return sstables;
    }
    ::cf_stats* stats = cf.cf_stats();
@@ -1053,30 +1057,31 @@ column_family::try_flush_memtable_to_sstable(lw_shared_ptr<memtable> old, sstabl
    database_sstable_write_monitor monitor(std::move(permit), newtab, _compaction_manager, _compaction_strategy);
    return do_with(std::move(monitor), [this, old, newtab] (auto& monitor) {
        auto&& priority = service::get_local_memtable_flush_priority();
-        return write_memtable_to_sstable(*old, newtab, monitor, incremental_backups_enabled(), priority, false).then([this, newtab, old, &monitor] {
-         // Switch back to default scheduling group for post-flush actions, to avoid them being staved by the memtable flush
-         // controller. Cache update does not affect the input of the memtable cpu controller, so it can be subject to
-         // priority inversion.
-         return with_scheduling_group(default_scheduling_group(), [this, &monitor, old = std::move(old), newtab = std::move(newtab)] () mutable {
-          return newtab->open_data().then([this, old, newtab] () {
-            dblog.debug("Flushing to {} done", newtab->get_filename());
-            return with_scheduling_group(_config.memtable_to_cache_scheduling_group, [this, old, newtab] {
-                return update_cache(old, newtab);
+        auto f = write_memtable_to_sstable(*old, newtab, monitor, incremental_backups_enabled(), priority, false);
+        // Switch back to default scheduling group for post-flush actions, to avoid them being staved by the memtable flush
+        // controller. Cache update does not affect the input of the memtable cpu controller, so it can be subject to
+        // priority inversion.
+        return with_scheduling_group(default_scheduling_group(), [this, &monitor, old = std::move(old), newtab = std::move(newtab), f = std::move(f)] () mutable {
+            return f.then([this, newtab, old, &monitor] {
+                return newtab->open_data().then([this, old, newtab] () {
+                    dblog.debug("Flushing to {} done", newtab->get_filename());
+                    return with_scheduling_group(_config.memtable_to_cache_scheduling_group, [this, old, newtab] {
+                        return update_cache(old, newtab);
+                    });
+                }).then([this, old, newtab] () noexcept {
+                    _memtables->erase(old);
+                    dblog.debug("Memtable for {} replaced", newtab->get_filename());
+                    return stop_iteration::yes;
+                });
+            }).handle_exception([this, old, newtab, &monitor] (auto e) {
+                monitor.write_failed();
+                newtab->mark_for_deletion();
+                dblog.error("failed to write sstable {}: {}", newtab->get_filename(), e);
+                // If we failed this write we will try the write again and that will create a new flush reader
+                // that will decrease dirty memory again. So we need to reset the accounting.
+                old->revert_flushed_memory();
+                return stop_iteration(_async_gate.is_closed());
            });
-          }).then([this, old, newtab] () noexcept {
-            _memtables->erase(old);
-            dblog.debug("Memtable for {} replaced", newtab->get_filename());
-            return stop_iteration::yes;
-          }).handle_exception([this, old, newtab, &monitor] (auto e) {
-            monitor.write_failed();
-            newtab->mark_for_deletion();
-            dblog.error("failed to write sstable {}: {}", newtab->get_filename(), e);
-            // If we failed this write we will try the write again and that will create a new flush reader
-            // that will decrease dirty memory again. So we need to reset the accounting.
-            old->revert_flushed_memory();
-            return stop_iteration::no;
-          });
-         });
        });
    });
  });
@@ -1614,7 +1619,7 @@ inline bool column_family::manifest_json_filter(const lister::path&, const direc

 // TODO: possibly move it to seastar
 template <typename Service, typename PtrType, typename Func>
-static future<> invoke_shards_with_ptr(std::vector<shard_id> shards, distributed<Service>& s, PtrType ptr, Func&& func) {
+static future<> invoke_shards_with_ptr(std::unordered_set<shard_id> shards, distributed<Service>& s, PtrType ptr, Func&& func) {
    return parallel_for_each(std::move(shards), [&s, &func, ptr] (shard_id id) {
        return s.invoke_on(id, [func, foreign = make_foreign(ptr)] (Service& s) mutable {
            return func(s, std::move(foreign));
@@ -1641,7 +1646,14 @@ future<> distributed_loader::open_sstable(distributed<database>& db, sstables::e
            return f.then([&db, comps = std::move(comps), func = std::move(func)] (sstables::sstable_open_info info) {
                // shared components loaded, now opening sstable in all shards that own it with shared components
                return do_with(std::move(info), [&db, comps = std::move(comps), func = std::move(func)] (auto& info) {
-                    return invoke_shards_with_ptr(info.owners, db, std::move(info.components),
+                    // All shards that own the sstable is interested in it in addition to shard that
+                    // is responsible for its generation. We may need to add manually this shard
+                    // because sstable may not contain data that belong to it.
+                    auto shards_interested_in_this_sstable = boost::copy_range<std::unordered_set<shard_id>>(info.owners);
+                    shard_id shard_responsible_for_generation = column_family::calculate_shard_from_sstable_generation(comps.generation);
+                    shards_interested_in_this_sstable.insert(shard_responsible_for_generation);
+
+                    return invoke_shards_with_ptr(std::move(shards_interested_in_this_sstable), db, std::move(info.components),
                            [owners = info.owners, data = info.data.dup(), index = info.index.dup(), comps, func] (database& db, auto components) {
                        auto& cf = db.find_column_family(comps.ks, comps.cf);
                        return func(cf, sstables::foreign_sstable_open_info{std::move(components), owners, data, index});
@@ -2151,6 +2163,11 @@ database::database(const db::config& cfg, database_config dbcfg)
 void backlog_controller::adjust() {
    auto backlog = _current_backlog();

+    if (backlog >= _control_points.back().input) {
+        update_controller(_control_points.back().output);
+        return;
+    }
+
    // interpolate to find out which region we are. This run infrequently and there are a fixed
    // number of points so a simple loop will do.
    size_t idx = 1;
@@ -2650,6 +2667,7 @@ bool database::update_column_family(schema_ptr new_schema) {
 void database::remove(const column_family& cf) {
    auto s = cf.schema();
    auto& ks = find_keyspace(s->ks_name());
+    _querier_cache.evict_all_for_table(s->id());
    _column_families.erase(s->id());
    ks.metadata()->remove_column_family(s);
    _ks_cf_to_uuid.erase(std::make_pair(s->ks_name(), s->cf_name()));
@@ -2799,6 +2817,7 @@ keyspace::make_column_family_config(const schema& s, const db::config& db_config
    cfg.enable_disk_writes = _config.enable_disk_writes;
    cfg.enable_commitlog = _config.enable_commitlog;
    cfg.enable_cache = _config.enable_cache;
+    cfg.compaction_enforce_min_threshold = _config.compaction_enforce_min_threshold;
    cfg.dirty_memory_manager = _config.dirty_memory_manager;
    cfg.streaming_dirty_memory_manager = _config.streaming_dirty_memory_manager;
    cfg.read_concurrency_semaphore = _config.read_concurrency_semaphore;
@@ -2813,6 +2832,7 @@ keyspace::make_column_family_config(const schema& s, const db::config& db_config
    cfg.commitlog_scheduling_group = _config.commitlog_scheduling_group;
    cfg.enable_metrics_reporting = db_config.enable_keyspace_column_family_metrics();
    cfg.large_partition_warning_threshold_bytes = db_config.compaction_large_partition_warning_threshold_mb()*1024*1024;
+    cfg.view_update_concurrency_semaphore = _config.view_update_concurrency_semaphore;

    return cfg;
 }
@@ -3497,7 +3517,7 @@ future<> database::do_apply(schema_ptr s, const frozen_mutation& m, db::timeout_
    if (cf.views().empty()) {
        return apply_with_commitlog(std::move(s), cf, std::move(uuid), m, timeout);
    }
-    future<row_locker::lock_holder> f = cf.push_view_replica_updates(s, m);
+    future<row_locker::lock_holder> f = cf.push_view_replica_updates(s, m, timeout);
    return f.then([this, s = std::move(s), uuid = std::move(uuid), &m, timeout] (row_locker::lock_holder lock) {
        auto& cf = find_column_family(uuid);
        return apply_with_commitlog(std::move(s), cf, std::move(uuid), m, timeout).finally(
@@ -3567,6 +3587,7 @@ database::make_keyspace_config(const keyspace_metadata& ksm) {
        cfg.enable_commitlog = false;
        cfg.enable_cache = false;
    }
+    cfg.compaction_enforce_min_threshold = _cfg->compaction_enforce_min_threshold();
    cfg.dirty_memory_manager = &_dirty_memory_manager;
    cfg.streaming_dirty_memory_manager = &_streaming_dirty_memory_manager;
    cfg.read_concurrency_semaphore = &_read_concurrency_sem;
@@ -3581,6 +3602,7 @@ database::make_keyspace_config(const keyspace_metadata& ksm) {
    cfg.query_scheduling_group = _dbcfg.query_scheduling_group;
    cfg.commitlog_scheduling_group = _dbcfg.commitlog_scheduling_group;
    cfg.enable_metrics_reporting = _cfg->enable_keyspace_column_family_metrics();
+    cfg.view_update_concurrency_semaphore = &_view_update_concurrency_sem;
    return cfg;
 }

@@ -3757,7 +3779,10 @@ future<> database::truncate(const keyspace& ks, column_family& cf, timestamp_fun
                    return f.then([this, &cf, truncated_at, low_mark, should_flush] {
                        return cf.discard_sstables(truncated_at).then([this, &cf, truncated_at, low_mark, should_flush](db::replay_position rp) {
                            // TODO: indexes.
-                            assert(low_mark <= rp);
+                            // Note: since discard_sstables was changed to only count tables owned by this shard,
+                            // we can get zero rp back. Changed assert, and ensure we save at least low_mark.
+                            assert(low_mark <= rp || rp == db::replay_position());
+                            rp = std::max(low_mark, rp);
                            return truncate_views(cf, truncated_at, should_flush).then([&cf, truncated_at, rp] {
                                return db::system_keyspace::save_truncation_record(cf, truncated_at, rp);
                            });
@@ -4165,8 +4190,11 @@ future<db::replay_position> column_family::discard_sstables(db_clock::time_point

                for (auto& p : *cf._sstables->all()) {
                    if (p->max_data_age() <= gc_trunc) {
-                        rp = std::max(p->get_stats_metadata().position, rp);
-                        remove.emplace_back(p);
+                        // Only one shard that own the sstable will submit it for deletion to avoid race condition in delete procedure.
+                        if (*boost::min_element(p->get_shards_for_this_sstable()) == engine().cpu_id()) {
+                            rp = std::max(p->get_stats_metadata().position, rp);
+                            remove.emplace_back(p);
+                        }
                        continue;
                    }
                    pruned->insert(p);
@@ -4312,13 +4340,17 @@ std::vector<view_ptr> column_family::affected_views(const schema_ptr& base, cons
 future<> column_family::generate_and_propagate_view_updates(const schema_ptr& base,
        std::vector<view_ptr>&& views,
        mutation&& m,
-        flat_mutation_reader_opt existings) const {
+        flat_mutation_reader_opt existings,
+        db::timeout_clock::time_point timeout) const {
    auto base_token = m.token();
    return db::view::generate_view_updates(base,
                        std::move(views),
                        flat_mutation_reader_from_mutations({std::move(m)}),
-                        std::move(existings)).then([base_token = std::move(base_token)] (auto&& updates) {
-        db::view::mutate_MV(std::move(base_token), std::move(updates));
+                        std::move(existings)).then([this, timeout, base_token = std::move(base_token)] (auto&& updates) mutable {
+        return seastar::get_units(*_config.view_update_concurrency_semaphore, 1, timeout).then(
+                [base_token = std::move(base_token), updates = std::move(updates)] (auto units) mutable {
+            db::view::mutate_MV(std::move(base_token), std::move(updates)).handle_exception([units = std::move(units)] (auto ignored) { });
+        });
    });
 }

@@ -4326,7 +4358,7 @@ future<> column_family::generate_and_propagate_view_updates(const schema_ptr& ba
 * Given an update for the base table, calculates the set of potentially affected views,
 * generates the relevant updates, and sends them to the paired view replicas.
 */
-future<row_locker::lock_holder> column_family::push_view_replica_updates(const schema_ptr& s, const frozen_mutation& fm) const {
+future<row_locker::lock_holder> column_family::push_view_replica_updates(const schema_ptr& s, const frozen_mutation& fm, db::timeout_clock::time_point timeout) const {
    //FIXME: Avoid unfreezing here.
    auto m = fm.unfreeze(s);
    auto& base = schema();
@@ -4337,7 +4369,7 @@ future<row_locker::lock_holder> column_family::push_view_replica_updates(const s
    }
    auto cr_ranges = db::view::calculate_affected_clustering_ranges(*base, m.decorated_key(), m.partition(), views);
    if (cr_ranges.empty()) {
-        return generate_and_propagate_view_updates(base, std::move(views), std::move(m), { }).then([] {
+        return generate_and_propagate_view_updates(base, std::move(views), std::move(m), { }, timeout).then([] {
                // In this case we are not doing a read-before-write, just a
                // write, so no lock is needed.
                return make_ready_future<row_locker::lock_holder>();
@@ -4359,18 +4391,18 @@ future<row_locker::lock_holder> column_family::push_view_replica_updates(const s
    // We'll return this lock to the caller, which will release it after
    // writing the base-table update.
    future<row_locker::lock_holder> lockf = local_base_lock(base, m.decorated_key(), slice.default_row_ranges());
-    return lockf.then([m = std::move(m), slice = std::move(slice), views = std::move(views), base, this] (row_locker::lock_holder lock) {
+    return lockf.then([m = std::move(m), slice = std::move(slice), views = std::move(views), base, this, timeout] (row_locker::lock_holder lock) {
      return do_with(
        dht::partition_range::make_singular(m.decorated_key()),
        std::move(slice),
        std::move(m),
-        [base, views = std::move(views), lock = std::move(lock), this] (auto& pk, auto& slice, auto& m) mutable {
+        [base, views = std::move(views), lock = std::move(lock), this, timeout] (auto& pk, auto& slice, auto& m) mutable {
            auto reader = this->as_mutation_source().make_reader(
                base,
                pk,
                slice,
                service::get_local_sstable_query_read_priority());
-            return this->generate_and_propagate_view_updates(base, std::move(views), std::move(m), std::move(reader)).then([lock = std::move(lock)] () mutable {
+            return this->generate_and_propagate_view_updates(base, std::move(views), std::move(m), std::move(reader), timeout).then([lock = std::move(lock)] () mutable {
                // return the local partition/row lock we have taken so it
                // remains locked until the caller is done modifying this
                // partition/row and destroys the lock object.
@@ -4516,16 +4548,14 @@ flat_mutation_reader make_local_shard_sstable_reader(schema_ptr s,
        }
        return reader;
    };
-    return make_combined_reader(s, std::make_unique<incremental_reader_selector>(s,
-                    std::move(sstables),
-                    pr,
-                    slice,
-                    pc,
-                    std::move(resource_tracker),
-                    std::move(trace_state),
-                    fwd,
-                    fwd_mr,
-                    std::move(reader_factory_fn)),
+    auto all_readers = boost::copy_range<std::vector<flat_mutation_reader>>(
+            *sstables->all()
+            | boost::adaptors::transformed([&] (sstables::shared_sstable sst) -> flat_mutation_reader {
+                return reader_factory_fn(sst, pr);
+            })
+    );
+    return make_combined_reader(s,
+            std::move(all_readers),
            fwd,
            fwd_mr);
 }
@@ -4544,16 +4574,14 @@ flat_mutation_reader make_range_sstable_reader(schema_ptr s,
    auto reader_factory_fn = [s, &slice, &pc, resource_tracker, fwd, fwd_mr, &monitor_generator] (sstables::shared_sstable& sst, const dht::partition_range& pr) {
        return sst->read_range_rows_flat(s, pr, slice, pc, resource_tracker, fwd, fwd_mr, monitor_generator(sst));
    };
-    return make_combined_reader(s, std::make_unique<incremental_reader_selector>(s,
-                    std::move(sstables),
-                    pr,
-                    slice,
-                    pc,
-                    std::move(resource_tracker),
-                    std::move(trace_state),
-                    fwd,
-                    fwd_mr,
-                    std::move(reader_factory_fn)),
+    auto sstable_readers = boost::copy_range<std::vector<flat_mutation_reader>>(
+            *sstables->all()
+            | boost::adaptors::transformed([&] (sstables::shared_sstable sst) {
+                return reader_factory_fn(sst, pr);
+            })
+    );
+    return make_combined_reader(s,
+            std::move(sstable_readers),
            fwd,
            fwd_mr);
 }
--- a/database.hh
+++ b/database.hh
@@ -297,6 +297,7 @@ public:
        bool enable_cache = true;
        bool enable_commitlog = true;
        bool enable_incremental_backups = false;
+        bool compaction_enforce_min_threshold = false;
        ::dirty_memory_manager* dirty_memory_manager = &default_dirty_memory_manager;
        ::dirty_memory_manager* streaming_dirty_memory_manager = &default_dirty_memory_manager;
        reader_concurrency_semaphore* read_concurrency_semaphore;
@@ -310,6 +311,7 @@ public:
        seastar::scheduling_group streaming_scheduling_group;
        bool enable_metrics_reporting = false;
        uint64_t large_partition_warning_threshold_bytes = std::numeric_limits<uint64_t>::max();
+        db::timeout_semaphore* view_update_concurrency_semaphore;
    };
    struct no_commitlog {};
    struct stats {
@@ -734,6 +736,10 @@ public:
        _config.enable_incremental_backups = val;
    }

+    bool compaction_enforce_min_threshold() const {
+        return _config.compaction_enforce_min_threshold;
+    }
+
    const sstables::sstable_set& get_sstable_set() const;
    lw_shared_ptr<sstable_list> get_sstables() const;
    lw_shared_ptr<sstable_list> get_sstables_including_compacted_undeleted() const;
@@ -787,7 +793,7 @@ public:
    void add_or_update_view(view_ptr v);
    void remove_view(view_ptr v);
    const std::vector<view_ptr>& views() const;
-    future<row_locker::lock_holder> push_view_replica_updates(const schema_ptr& s, const frozen_mutation& fm) const;
+    future<row_locker::lock_holder> push_view_replica_updates(const schema_ptr& s, const frozen_mutation& fm, db::timeout_clock::time_point timeout) const;
    void add_coordinator_read_latency(utils::estimated_histogram::duration latency);
    std::chrono::milliseconds get_coordinator_read_latency_percentile(double percentile);

@@ -803,7 +809,8 @@ private:
    future<> generate_and_propagate_view_updates(const schema_ptr& base,
            std::vector<view_ptr>&& views,
            mutation&& m,
-            flat_mutation_reader_opt existings) const;
+            flat_mutation_reader_opt existings,
+            db::timeout_clock::time_point timeout) const;

    mutable row_locker _row_locker;
    future<row_locker::lock_holder> local_base_lock(const schema_ptr& s, const dht::decorated_key& pk, const query::clustering_row_ranges& rows) const;
@@ -977,6 +984,7 @@ public:
        bool enable_disk_writes = true;
        bool enable_cache = true;
        bool enable_incremental_backups = false;
+        bool compaction_enforce_min_threshold = false;
        ::dirty_memory_manager* dirty_memory_manager = &default_dirty_memory_manager;
        ::dirty_memory_manager* streaming_dirty_memory_manager = &default_dirty_memory_manager;
        reader_concurrency_semaphore* read_concurrency_semaphore;
@@ -989,6 +997,7 @@ public:
        seastar::scheduling_group query_scheduling_group;
        seastar::scheduling_group streaming_scheduling_group;
        bool enable_metrics_reporting = false;
+        db::timeout_semaphore* view_update_concurrency_semaphore = nullptr;
    };
 private:
    std::unique_ptr<locator::abstract_replication_strategy> _replication_strategy;
@@ -1119,6 +1128,8 @@ private:

    semaphore _sstable_load_concurrency_sem{max_concurrent_sstable_loads()};

+    db::timeout_semaphore _view_update_concurrency_sem{100}; // Stand-in hack for #2538
+
    concrete_execution_stage<future<lw_shared_ptr<query::result>>,
        column_family*,
        schema_ptr,
--- a/db/commitlog/commitlog.cc
+++ b/db/commitlog/commitlog.cc
@@ -723,7 +723,7 @@ public:
         */
        auto me = shared_from_this();
        auto fp = _file_pos;
-        return _pending_ops.wait_for_pending(timeout).then([me = std::move(me), fp, timeout] {
+        return _pending_ops.wait_for_pending(timeout).then([me, fp, timeout] {
            if (fp != me->_file_pos) {
                // some other request already wrote this buffer.
                // If so, wait for the operation at our intended file offset
--- a/db/config.hh
+++ b/db/config.hh
@@ -125,6 +125,9 @@ public:
    val(compaction_static_shares, float, 0, Used, \
            "If set to higher than 0, ignore the controller's output and set the compaction shares statically. Do not set this unless you know what you are doing and suspect a problem in the controller. This option will be retired when the controller reaches more maturity" \
    )   \
+    val(compaction_enforce_min_threshold, bool, false, Used, \
+            "If set to true, enforce the min_threshold option for compactions strictly. If false (default), Scylla may decide to compact even if below min_threshold" \
+    )   \
    /* Initialization properties */             \
    /* The minimal properties needed for configuring a cluster. */  \
    val(cluster_name, sstring, "", Used,   \
--- a/db/schema_tables.cc
+++ b/db/schema_tables.cc
@@ -827,15 +827,6 @@ static future<> do_merge_schema(distributed<service::storage_proxy>& proxy, std:
       /*auto& old_aggregates = */read_schema_for_keyspaces(proxy, AGGREGATES, keyspaces).get0();
 #endif

-       // Incoming mutations have the version field deleted. Delete here as well so that
-       // schemas which are otherwise equal don't appear as differing.
-       for (auto&& e : old_column_families) {
-           schema_mutations& sm = e.second;
-           if (sm.scylla_tables()) {
-               delete_schema_version(*sm.scylla_tables());
-           }
-       }
-
       proxy.local().mutate_locally(std::move(mutations)).get0();

       if (do_flush) {
--- a/db/timeout_clock.hh
+++ b/db/timeout_clock.hh
@@ -27,6 +27,6 @@

 namespace db {
 using timeout_clock = seastar::lowres_clock;
-using timeout_semaphore = basic_semaphore<default_timeout_exception_factory, timeout_clock>;
+using timeout_semaphore = seastar::basic_semaphore<seastar::default_timeout_exception_factory, timeout_clock>;
 static constexpr timeout_clock::time_point no_timeout = timeout_clock::time_point::max();
 }
--- a/db/view/view.cc
+++ b/db/view/view.cc
@@ -175,6 +175,31 @@ static bool update_requires_read_before_write(const schema& base,
    return false;
 }

+static bool is_partition_key_empty(
+        const schema& base,
+        const schema& view_schema,
+        const partition_key& base_key,
+        const clustering_row& update) {
+    // Empty partition keys are not supported on normal tables - they cannot
+    // be inserted or queried, so enforce those rules here.
+    if (view_schema.partition_key_columns().size() > 1) {
+        // Composite partition keys are different: all components
+        // are then allowed to be empty.
+        return false;
+    }
+    auto* base_col = base.get_column_definition(view_schema.partition_key_columns().front().name());
+    switch (base_col->kind) {
+    case column_kind::partition_key:
+        return base_key.get_component(base, base_col->position()).empty();
+    case column_kind::clustering_key:
+        return update.key().get_component(base, base_col->position()).empty();
+    default:
+        // No multi-cell columns in the view's partition key
+        auto& c = update.cells().cell_at(base_col->id);
+        return c.as_atomic_cell().value().empty();
+    }
+}
+
 bool matches_view_filter(const schema& base, const view_info& view, const partition_key& key, const clustering_row& update, gc_clock::time_point now) {
    return clustering_prefix_matches(base, view, key, update.key())
            && boost::algorithm::all_of(
@@ -330,7 +355,7 @@ static void add_cells_to_view(const schema& base, const schema& view, const row&
 * This method checks that the base row does match the view filter before applying anything.
 */
 void view_updates::create_entry(const partition_key& base_key, const clustering_row& update, gc_clock::time_point now) {
-    if (!matches_view_filter(*_base, _view_info, base_key, update, now)) {
+    if (is_partition_key_empty(*_base, *_view, base_key, update) || !matches_view_filter(*_base, _view_info, base_key, update, now)) {
        return;
    }
    deletable_row& r = get_view_row(base_key, update);
@@ -346,7 +371,7 @@ void view_updates::create_entry(const partition_key& base_key, const clustering_
 void view_updates::delete_old_entry(const partition_key& base_key, const clustering_row& existing, const row_tombstone& t, gc_clock::time_point now) {
    // Before deleting an old entry, make sure it was matching the view filter
    // (otherwise there is nothing to delete)
-    if (matches_view_filter(*_base, _view_info, base_key, existing, now)) {
+    if (!is_partition_key_empty(*_base, *_view, base_key, existing) && matches_view_filter(*_base, _view_info, base_key, existing, now)) {
        do_delete_old_entry(base_key, existing, t, now);
    }
 }
@@ -391,11 +416,11 @@ void view_updates::do_delete_old_entry(const partition_key& base_key, const clus
 void view_updates::update_entry(const partition_key& base_key, const clustering_row& update, const clustering_row& existing, gc_clock::time_point now) {
    // While we know update and existing correspond to the same view entry,
    // they may not match the view filter.
-    if (!matches_view_filter(*_base, _view_info, base_key, existing, now)) {
+    if (is_partition_key_empty(*_base, *_view, base_key, existing) || !matches_view_filter(*_base, _view_info, base_key, existing, now)) {
        create_entry(base_key, update, now);
        return;
    }
-    if (!matches_view_filter(*_base, _view_info, base_key, update, now)) {
+    if (is_partition_key_empty(*_base, *_view, base_key, update) || !matches_view_filter(*_base, _view_info, base_key, update, now)) {
        do_delete_old_entry(base_key, existing, row_tombstone(), now);
        return;
    }
@@ -791,7 +816,7 @@ get_view_natural_endpoint(const sstring& keyspace_name,
 // for the writes to complete.
 // FIXME: I dropped a lot of parameters the Cassandra version had,
 // we may need them back: writeCommitLog, baseComplete, queryStartNanoTime.
-void mutate_MV(const dht::token& base_token,
+future<> mutate_MV(const dht::token& base_token,
        std::vector<mutation> mutations)
 {
 #if 0
@@ -823,6 +848,7 @@ void mutate_MV(const dht::token& base_token,
                                                                                                          () -> asyncRemoveFromBatchlog(batchlogEndpoints, batchUUID));
            // add a handler for each mutation - includes checking availability, but doesn't initiate any writes, yet
 #endif
+    auto fs = std::make_unique<std::vector<future<>>>();
    for (auto& mut : mutations) {
        auto view_token = mut.token();
        auto keyspace_name = mut.schema()->ks_name();
@@ -838,9 +864,10 @@ void mutate_MV(const dht::token& base_token,
                    // do not wait for it to complete.
                    // Note also that mutate_locally(mut) copies mut (in
                    // frozen from) so don't need to increase its lifetime.
-                    service::get_local_storage_proxy().mutate_locally(mut).handle_exception([] (auto ep) {
+                    fs->push_back(service::get_local_storage_proxy().mutate_locally(mut).handle_exception([] (auto ep) {
                        vlogger.error("Error applying local view update: {}", ep);
-                    });
+                        return make_exception_future<>(std::move(ep));
+                    }));
            } else {
 #if 0
                        wrappers.add(wrapViewBatchResponseHandler(mutation,
@@ -856,9 +883,10 @@ void mutate_MV(const dht::token& base_token,
                // without a batchlog, and without checking for success
                // Note we don't wait for the asynchronous operation to complete
                // FIXME: need to extend mut's lifetime???
-                service::get_local_storage_proxy().send_to_endpoint(mut, *paired_endpoint, db::write_type::VIEW).handle_exception([paired_endpoint] (auto ep) {
+                fs->push_back(service::get_local_storage_proxy().send_to_endpoint(mut, *paired_endpoint, db::write_type::VIEW).handle_exception([paired_endpoint] (auto ep) {
                    vlogger.error("Error applying view update to {}: {}", *paired_endpoint, ep);
-                });;
+                    return make_exception_future<>(std::move(ep));
+                }));
            }
        } else {
 #if 0
@@ -901,6 +929,8 @@ void mutate_MV(const dht::token& base_token,
        viewWriteMetrics.addNano(System.nanoTime() - startTime);
    }
 #endif
+    auto f = seastar::when_all_succeed(fs->begin(), fs->end());
+    return f.finally([fs = std::move(fs)] { });
 }

 } // namespace view
--- a/db/view/view.hh
+++ b/db/view/view.hh
@@ -92,7 +92,7 @@ query::clustering_row_ranges calculate_affected_clustering_ranges(
        const mutation_partition& mp,
        const std::vector<view_ptr>& views);

-void mutate_MV(const dht::token& base_token,
+future<> mutate_MV(const dht::token& base_token,
        std::vector<mutation> mutations);

 }
--- a/dist/ami/files/scylla-ami
+++ b/dist/ami/files/scylla-ami
--- a/dist/common/modprobe.d/scylla-raid0.conf
+++ b/dist/common/modprobe.d/scylla-raid0.conf
@@ -1 +0,0 @@
-options raid0 devices_discard_performance=Y
--- a/dist/common/scripts/scylla_raid_setup
+++ b/dist/common/scripts/scylla_raid_setup
@@ -96,7 +96,9 @@ elif is_gentoo_variant; then
    emerge -uq sys-fs/mdadm sys-fs/xfsprogs
 fi
 if [ "$ID" = "ubuntu" ] && [ "$VERSION_ID" = "14.04" ]; then
+    udevadm settle
    mdadm --create --verbose --force --run $RAID --level=0 -c1024 --raid-devices=$NR_DISK $DISKS
+    udevadm settle
    mkfs.xfs $RAID -f
 else
    for dsk in $DISKS; do
@@ -107,7 +109,9 @@ else
        fi
    done
    wait
+    udevadm settle
    mdadm --create --verbose --force --run $RAID --level=0 -c1024 --raid-devices=$NR_DISK $DISKS
+    udevadm settle
    mkfs.xfs $RAID -f -K
 fi
 if is_debian_variant; then
--- a/dist/debian/build_deb.sh
+++ b/dist/debian/build_deb.sh
@@ -2,10 +2,11 @@

 . /etc/os-release
 print_usage() {
-    echo "build_deb.sh -target <codename> --dist --rebuild-dep"
+    echo "build_deb.sh -target <codename> --dist --rebuild-dep --jobs 2"
    echo "  --target target distribution codename"
    echo "  --dist  create a public distribution package"
    echo "  --no-clean  don't rebuild pbuilder tgz"
+    echo "  --jobs  specify number of jobs"
    exit 1
 }
 install_deps() {
@@ -19,6 +20,7 @@ install_deps() {
 DIST=0
 TARGET=
 NO_CLEAN=0
+JOBS=0
 while [ $# -gt 0 ]; do
    case "$1" in
        "--dist")
@@ -33,6 +35,10 @@ while [ $# -gt 0 ]; do
            NO_CLEAN=1
            shift 1
            ;;
+        "--jobs")
+            JOBS=$2
+            shift 2
+            ;;
        *)
            print_usage
            ;;
@@ -131,7 +137,7 @@ if [ "$TARGET" = "jessie" ]; then
    sed -i -e "s/@@INSTALL_FSTRIM@@/dh_installinit --no-start --name scylla-fstrim/g" debian/rules
    sed -i -e "s/@@INSTALL_NODE_EXPORTER@@/dh_installinit --no-start --name node-exporter/g" debian/rules
    sed -i -e "s#@@COMPILER@@#/opt/scylladb/bin/g++-7#g" debian/rules
-    sed -i -e "s/@@BUILD_DEPENDS@@/libsystemd-dev, scylla-gcc72-g++-7, libunwind-dev, scylla-antlr35, scylla-libthrift010-dev, scylla-antlr35-c++-dev, scylla-libboost-program-options163-dev, scylla-libboost-filesystem163-dev, scylla-libboost-system163-dev, scylla-libboost-thread163-dev, scylla-libboost-test163-dev/g" debian/control
+    sed -i -e "s/@@BUILD_DEPENDS@@/libsystemd-dev, scylla-gcc73-g++-7, libunwind-dev, scylla-antlr35, scylla-libthrift010-dev, scylla-antlr35-c++-dev, scylla-libboost-program-options165-dev, scylla-libboost-filesystem165-dev, scylla-libboost-system165-dev, scylla-libboost-thread165-dev, scylla-libboost-test165-dev/g" debian/control
    sed -i -e "s/@@DEPENDS@@//g" debian/control
    sed -i -e "s#@@INSTALL@@##g" debian/scylla-server.install
    sed -i -e "s#@@HKDOTTIMER_D@@#dist/common/systemd/scylla-housekeeping-daily.timer /lib/systemd/system#g" debian/scylla-server.install
@@ -148,7 +154,7 @@ elif [ "$TARGET" = "stretch" ]; then
    sed -i -e "s/@@INSTALL_FSTRIM@@/dh_installinit --no-start --name scylla-fstrim/g" debian/rules
    sed -i -e "s/@@INSTALL_NODE_EXPORTER@@/dh_installinit --no-start --name node-exporter/g" debian/rules
    sed -i -e "s#@@COMPILER@@#/opt/scylladb/bin/g++-7#g" debian/rules
-    sed -i -e "s/@@BUILD_DEPENDS@@/libsystemd-dev, scylla-gcc72-g++-7, libunwind-dev, antlr3, scylla-libthrift010-dev, scylla-antlr35-c++-dev, libboost-program-options1.62-dev, libboost-filesystem1.62-dev, libboost-system1.62-dev, libboost-thread1.62-dev, libboost-test1.62-dev/g" debian/control
+    sed -i -e "s/@@BUILD_DEPENDS@@/libsystemd-dev, scylla-gcc73-g++-7, libunwind-dev, antlr3, scylla-libthrift010-dev, scylla-antlr35-c++-dev, libboost-program-options1.62-dev, libboost-filesystem1.62-dev, libboost-system1.62-dev, libboost-thread1.62-dev, libboost-test1.62-dev/g" debian/control
    sed -i -e "s/@@DEPENDS@@//g" debian/control
    sed -i -e "s#@@INSTALL@@##g" debian/scylla-server.install
    sed -i -e "s#@@HKDOTTIMER_D@@#dist/common/systemd/scylla-housekeeping-daily.timer /lib/systemd/system#g" debian/scylla-server.install
@@ -166,7 +172,7 @@ elif [ "$TARGET" = "trusty" ]; then
    sed -i -e "s/@@INSTALL_FSTRIM@@//g" debian/rules
    sed -i -e "s/@@INSTALL_NODE_EXPORTER@@//g" debian/rules
    sed -i -e "s#@@COMPILER@@#/opt/scylladb/bin/g++-7#g" debian/rules
-    sed -i -e "s/@@BUILD_DEPENDS@@/scylla-gcc72-g++-7, libunwind8-dev, scylla-antlr35, scylla-libthrift010-dev, scylla-antlr35-c++-dev, scylla-libboost-program-options163-dev, scylla-libboost-filesystem163-dev, scylla-libboost-system163-dev, scylla-libboost-thread163-dev, scylla-libboost-test163-dev/g" debian/control
+    sed -i -e "s/@@BUILD_DEPENDS@@/scylla-gcc73-g++-7, libunwind8-dev, scylla-antlr35, scylla-libthrift010-dev, scylla-antlr35-c++-dev, scylla-libboost-program-options165-dev, scylla-libboost-filesystem165-dev, scylla-libboost-system165-dev, scylla-libboost-thread165-dev, scylla-libboost-test165-dev/g" debian/control
    sed -i -e "s/@@DEPENDS@@/hugepages, num-utils/g" debian/control
    sed -i -e "s#@@INSTALL@@#dist/debian/sudoers.d/scylla etc/sudoers.d#g" debian/scylla-server.install
    sed -i -e "s#@@HKDOTTIMER_D@@##g" debian/scylla-server.install
@@ -183,7 +189,7 @@ elif [ "$TARGET" = "xenial" ]; then
    sed -i -e "s/@@INSTALL_FSTRIM@@/dh_installinit --no-start --name scylla-fstrim/g" debian/rules
    sed -i -e "s/@@INSTALL_NODE_EXPORTER@@/dh_installinit --no-start --name node-exporter/g" debian/rules
    sed -i -e "s#@@COMPILER@@#/opt/scylladb/bin/g++-7#g" debian/rules
-    sed -i -e "s/@@BUILD_DEPENDS@@/libsystemd-dev, scylla-gcc72-g++-7, libunwind-dev, antlr3, scylla-libthrift010-dev, scylla-antlr35-c++-dev, scylla-libboost-program-options163-dev, scylla-libboost-filesystem163-dev, scylla-libboost-system163-dev, scylla-libboost-thread163-dev, scylla-libboost-test163-dev/g" debian/control
+    sed -i -e "s/@@BUILD_DEPENDS@@/libsystemd-dev, scylla-gcc73-g++-7, libunwind-dev, antlr3, scylla-libthrift010-dev, scylla-antlr35-c++-dev, scylla-libboost-program-options165-dev, scylla-libboost-filesystem165-dev, scylla-libboost-system165-dev, scylla-libboost-thread165-dev, scylla-libboost-test165-dev/g" debian/control
    sed -i -e "s/@@DEPENDS@@/hugepages, /g" debian/control
    sed -i -e "s#@@INSTALL@@##g" debian/scylla-server.install
    sed -i -e "s#@@HKDOTTIMER_D@@#dist/common/systemd/scylla-housekeeping-daily.timer /lib/systemd/system#g" debian/scylla-server.install
@@ -200,7 +206,7 @@ elif [ "$TARGET" = "bionic" ]; then
    sed -i -e "s/@@INSTALL_FSTRIM@@/dh_installinit --no-start --name scylla-fstrim/g" debian/rules
    sed -i -e "s/@@INSTALL_NODE_EXPORTER@@/dh_installinit --no-start --name node-exporter/g" debian/rules
    sed -i -e "s#@@COMPILER@@#g++-7#g" debian/rules
-    sed -i -e "s/@@BUILD_DEPENDS@@/libsystemd-dev, g++, libunwind-dev, antlr3, scylla-libthrift010-dev, scylla-antlr35-c++-dev, libboost-program-options-dev, libboost-filesystem-dev, libboost-system-dev, libboost-thread-dev, libboost-test-dev/g" debian/control
+    sed -i -e "s/@@BUILD_DEPENDS@@/libsystemd-dev, scylla-gcc73-g++-7, libunwind-dev, antlr3, scylla-libthrift010-dev, scylla-antlr35-c++-dev, scylla-libboost-program-options165-dev, scylla-libboost-filesystem165-dev, scylla-libboost-system165-dev, scylla-libboost-thread165-dev, scylla-libboost-test165-dev/g" debian/control
    sed -i -e "s/@@DEPENDS@@/hugepages, /g" debian/control
    sed -i -e "s#@@INSTALL@@##g" debian/scylla-server.install
    sed -i -e "s#@@HKDOTTIMER_D@@#dist/common/systemd/scylla-housekeeping-daily.timer /lib/systemd/system#g" debian/scylla-server.install
@@ -237,6 +243,9 @@ fi
 if [ "$TARGET" != "trusty" ]; then
    cp dist/common/systemd/scylla-server.service.in debian/scylla-server.service
    sed -i -e "s#@@SYSCONFDIR@@#/etc/default#g" debian/scylla-server.service
+    if [ "$TARGET" = "jessie" ]; then
+        sed -i -e "s#AmbientCapabilities=CAP_SYS_NICE##g" debian/scylla-server.service
+    fi
    cp dist/common/systemd/scylla-housekeeping-daily.service.in debian/scylla-server.scylla-housekeeping-daily.service
    sed -i -e "s#@@REPOFILES@@#'/etc/apt/sources.list.d/scylla*.list'#g" debian/scylla-server.scylla-housekeeping-daily.service
    cp dist/common/systemd/scylla-housekeeping-restart.service.in debian/scylla-server.scylla-housekeeping-restart.service
@@ -245,16 +254,19 @@ if [ "$TARGET" != "trusty" ]; then
    cp dist/common/systemd/node-exporter.service debian/scylla-server.node-exporter.service
 fi

-cp ./dist/debian/pbuilderrc ~/.pbuilderrc
+sudo cp ./dist/debian/pbuilderrc ~root/.pbuilderrc
 if [ $NO_CLEAN -eq 0 ]; then
    sudo rm -fv /var/cache/pbuilder/scylla-server-$TARGET.tgz
-    sudo -E DIST=$TARGET /usr/sbin/pbuilder clean
-    sudo -E DIST=$TARGET /usr/sbin/pbuilder create --allow-untrusted
+    sudo -H DIST=$TARGET /usr/sbin/pbuilder clean
+    sudo -H DIST=$TARGET /usr/sbin/pbuilder create --allow-untrusted
 fi
-sudo -E DIST=$TARGET /usr/sbin/pbuilder update --allow-untrusted
+if [ $JOBS -ne 0 ]; then
+    DEB_BUILD_OPTIONS="parallel=$JOBS"
+fi
+sudo -H DIST=$TARGET /usr/sbin/pbuilder update --allow-untrusted
 if [ "$TARGET" = "trusty" ] || [ "$TARGET" = "xenial" ] || [ "$TARGET" = "yakkety" ] || [ "$TARGET" = "zesty" ] || [ "$TARGET" = "artful" ] || [ "$TARGET" = "bionic" ]; then
-    sudo -E DIST=$TARGET /usr/sbin/pbuilder execute --save-after-exec dist/debian/ubuntu_enable_ppa.sh
+    sudo -H DIST=$TARGET /usr/sbin/pbuilder execute --save-after-exec dist/debian/ubuntu_enable_ppa.sh
 elif [ "$TARGET" = "jessie" ] || [ "$TARGET" = "stretch" ]; then
-    sudo -E DIST=$TARGET /usr/sbin/pbuilder execute --save-after-exec dist/debian/debian_install_gpgkey.sh
+    sudo -H DIST=$TARGET /usr/sbin/pbuilder execute --save-after-exec dist/debian/debian_install_gpgkey.sh
 fi
-sudo -E DIST=$TARGET pdebuild --buildresult build/debs
+sudo -H DIST=$TARGET DEB_BUILD_OPTIONS=$DEB_BUILD_OPTIONS pdebuild --buildresult build/debs
--- a/dist/debian/rules.in
+++ b/dist/debian/rules.in
@@ -1,12 +1,13 @@
 #!/usr/bin/make -f

 export PYBUILD_DISABLE=1
+jobs := $(shell echo $$DEB_BUILD_OPTIONS | sed -r "s/.*parallel=([0-9]+).*/-j\1/")

 override_dh_auto_configure:
-	./configure.py --enable-dpdk --mode=release --static-thrift --static-boost --static-yaml-cpp --compiler=@@COMPILER@@ --cflags="-I/opt/scylladb/include -L/opt/scylladb/lib/x86-linux-gnu/" --ldflags="-Wl,-rpath=/opt/scylladb/lib"
+	./configure.py --with=scylla --with=iotune --enable-dpdk --mode=release --static-thrift --static-boost --static-yaml-cpp --compiler=@@COMPILER@@ --cflags="-I/opt/scylladb/include -L/opt/scylladb/lib/x86-linux-gnu/" --ldflags="-Wl,-rpath=/opt/scylladb/lib"

 override_dh_auto_build:
-	PATH="/opt/scylladb/bin:$$PATH" ninja
+	PATH="/opt/scylladb/bin:$$PATH" ninja $(jobs)

 override_dh_auto_clean:
 	rm -rf build/release seastar/build
--- a/dist/docker/redhat/Dockerfile
+++ b/dist/docker/redhat/Dockerfile
@@ -26,7 +26,7 @@ ADD commandlineparser.py /commandlineparser.py
 ADD docker-entrypoint.py /docker-entrypoint.py

 # Install Scylla:
-RUN curl http://downloads.scylladb.com/rpm/unstable/centos/master/latest/scylla.repo -o /etc/yum.repos.d/scylla.repo && \
+RUN curl http://downloads.scylladb.com/rpm/centos/scylla-2.2.repo -o /etc/yum.repos.d/scylla.repo && \
    yum -y install epel-release && \
    yum -y clean expire-cache && \
    yum -y update && \
--- a/dist/docker/redhat/commandlineparser.py
+++ b/dist/docker/redhat/commandlineparser.py
@@ -9,7 +9,8 @@ def parse():
    parser.add_argument('--cpuset', default=None, help="e.g. --cpuset 0-3 for the first four CPUs")
    parser.add_argument('--smp', default=None, help="e.g --smp 2 to use two CPUs")
    parser.add_argument('--memory', default=None, help="e.g. --memory 1G to use 1 GB of RAM")
-    parser.add_argument('--overprovisioned', default='0', choices=['0', '1'], help="run in overprovisioned environment")
+    parser.add_argument('--overprovisioned', default=None, choices=['0', '1'],
+            help="run in overprovisioned environment. By default it will run in overprovisioned mode unless --cpuset is specified")
    parser.add_argument('--listen-address', default=None, dest='listenAddress')
    parser.add_argument('--broadcast-address', default=None, dest='broadcastAddress')
    parser.add_argument('--broadcast-rpc-address', default=None, dest='broadcastRpcAddress')
--- a/dist/docker/redhat/scyllasetup.py
+++ b/dist/docker/redhat/scyllasetup.py
@@ -53,7 +53,7 @@ class ScyllaSetup:
            args += [ "--memory %s" % self._memory ]
        if self._smp is not None:
            args += [ "--smp %s" % self._smp ]
-        if self._overprovisioned == "1":
+        if self._overprovisioned == "1" or (self._overprovisioned is None and self._cpuset is None):
            args += [ "--overprovisioned" ]

        if self._listenAddress is None:
--- a/dist/redhat/scylla.spec.in
+++ b/dist/redhat/scylla.spec.in
@@ -7,8 +7,12 @@ Group:          Applications/Databases
 License:        AGPLv3
 URL:            http://www.scylladb.com/
 Source0:        %{name}-@@VERSION@@-@@RELEASE@@.tar
-Requires:       scylla-server = @@VERSION@@ scylla-jmx = @@VERSION@@ scylla-tools = @@VERSION@@ scylla-tools-core = @@VERSION@@ scylla-kernel-conf = @@VERSION@@ scylla-libgcc72 scylla-libstdc++72
+Requires:       scylla-server = @@VERSION@@ scylla-jmx = @@VERSION@@ scylla-tools = @@VERSION@@ scylla-tools-core = @@VERSION@@ scylla-kernel-conf = @@VERSION@@ scylla-libgcc73 scylla-libstdc++73
 Obsoletes:	scylla-server < 1.1
+Obsoletes:      scylla-libgcc72
+Obsoletes:      scylla-libstdc++72
+Provides:       scylla-libgcc72
+Provides:      scylla-libstdc++72

 %description
 Scylla is a highly scalable, eventually consistent, distributed,
@@ -52,7 +56,7 @@ License:        AGPLv3
 URL:            http://www.scylladb.com/
 BuildRequires:  libaio-devel libstdc++-devel cryptopp-devel hwloc-devel numactl-devel libpciaccess-devel libxml2-devel zlib-devel thrift-devel yaml-cpp-devel yaml-cpp-static lz4-devel snappy-devel jsoncpp-devel systemd-devel xz-devel pcre-devel elfutils-libelf-devel bzip2-devel keyutils-libs-devel xfsprogs-devel make gnutls-devel systemd-devel lksctp-tools-devel protobuf-devel protobuf-compiler libunwind-devel systemtap-sdt-devel ninja-build cmake python ragel grep kernel-headers
 %{?fedora:BuildRequires: boost-devel antlr3-tool antlr3-C++-devel python3 gcc-c++ libasan libubsan python3-pyparsing dnf-yum}
-%{?rhel:BuildRequires: scylla-libstdc++72-static scylla-boost163-devel scylla-boost163-static scylla-antlr35-tool scylla-antlr35-C++-devel python34 scylla-gcc72-c++, scylla-python34-pyparsing20}
+%{?rhel:BuildRequires: scylla-libstdc++73-static scylla-boost163-devel scylla-boost163-static scylla-antlr35-tool scylla-antlr35-C++-devel python34 scylla-gcc73-c++, scylla-python34-pyparsing20}
 Requires:       scylla-conf systemd-libs hwloc collectd PyYAML python-urwid pciutils pyparsing python-requests curl util-linux python-setuptools pciutils python3-pyudev mdadm xfsprogs
 %{?rhel:Requires: python34 python34-PyYAML kernel >= 3.10.0-514}
 %{?fedora:Requires: python3 python3-PyYAML}
@@ -86,11 +90,11 @@ cflags="--cflags=${defines[*]}"

 %define is_housekeeping_conf %( if @@HOUSEKEEPING_CONF@@; then echo "1" ; else echo "0"; fi )
 %if 0%{?fedora}
-./configure.py %{?configure_opt} --mode=release "$cflags"
+./configure.py %{?configure_opt} --with=scylla --with=iotune --mode=release "$cflags"
 %endif
 %if 0%{?rhel}
 . /etc/profile.d/scylla.sh
-python3.4 ./configure.py %{?configure_opt} --mode=release "$cflags" --static-boost --static-yaml-cpp --compiler=/opt/scylladb/bin/g++-7.2 --python python3.4 --ldflag=-Wl,-rpath=/opt/scylladb/lib64
+python3.4 ./configure.py %{?configure_opt} --with=scylla --with=iotune --mode=release "$cflags" --static-boost --static-yaml-cpp --compiler=/opt/scylladb/bin/g++-7.3 --python python3.4 --ldflag=-Wl,-rpath=/opt/scylladb/lib64
 %endif
 ninja-build %{?_smp_mflags} build/release/scylla build/release/iotune
 cp dist/common/systemd/scylla-server.service.in build/scylla-server.service
@@ -109,9 +113,6 @@ mkdir -p $RPM_BUILD_ROOT%{_sysconfdir}/security/limits.d/
 mkdir -p $RPM_BUILD_ROOT%{_sysconfdir}/collectd.d/
 mkdir -p $RPM_BUILD_ROOT%{_sysconfdir}/scylla/
 mkdir -p $RPM_BUILD_ROOT%{_sysconfdir}/scylla.d/
-%if 0%{?rhel}
-mkdir -p $RPM_BUILD_ROOT%{_sysconfdir}/modprobe.d/
-%endif
 mkdir -p $RPM_BUILD_ROOT%{_sysctldir}/
 mkdir -p $RPM_BUILD_ROOT%{_docdir}/scylla/
 mkdir -p $RPM_BUILD_ROOT%{_unitdir}
@@ -122,9 +123,6 @@ install -m644 dist/common/limits.d/scylla.conf $RPM_BUILD_ROOT%{_sysconfdir}/sec
 install -m644 dist/common/collectd.d/scylla.conf $RPM_BUILD_ROOT%{_sysconfdir}/collectd.d/
 install -m644 dist/common/scylla.d/*.conf $RPM_BUILD_ROOT%{_sysconfdir}/scylla.d/
 install -m644 dist/common/sysctl.d/*.conf $RPM_BUILD_ROOT%{_sysctldir}/
-%if 0%{?rhel}
-install -m644 dist/common/modprobe.d/*.conf $RPM_BUILD_ROOT%{_sysconfdir}/modprobe.d/
-%endif
 install -d -m755 $RPM_BUILD_ROOT%{_sysconfdir}/scylla
 install -m644 conf/scylla.yaml $RPM_BUILD_ROOT%{_sysconfdir}/scylla/
 install -m644 conf/cassandra-rackdc.properties $RPM_BUILD_ROOT%{_sysconfdir}/scylla/
@@ -317,18 +315,9 @@ if Scylla is the main application on your server and you wish to optimize its la
 # We cannot use the sysctl_apply rpm macro because it is not present in 7.0
 # following is a "manual" expansion
 /usr/lib/systemd/systemd-sysctl 99-scylla-sched.conf >/dev/null 2>&1 || :
-# Write modprobe.d params when module already loaded
-%if 0%{?rhel}
-if [ -e /sys/module/raid0/parameters/devices_discard_performance ]; then
-    echo Y > /sys/module/raid0/parameters/devices_discard_performance
-fi
-%endif

 %files kernel-conf
 %defattr(-,root,root)
-%if 0%{?rhel}
-%config(noreplace) %{_sysconfdir}/modprobe.d/*.conf
-%endif
 %{_sysctldir}/*.conf

 %changelog
--- a/docs/docker-hub.md
+++ b/docs/docker-hub.md
@@ -77,10 +77,9 @@ $ docker run --name some-scylla --volume /var/lib/scylla:/var/lib/scylla -d scyl

 ## Configuring resource limits

-Scylla utilizes all CPUs and all memory by default.
-To configure resource limits for your Docker container, you can use the `--smp`, `--memory`, and `--cpuset` command line options documented in the section "Command-line options".
-
-If you run multiple Scylla instances on the same machine, it is highly recommended that you enable the `--overprovisioned` command line option, which enables certain optimizations for Scylla to run efficiently in an overprovisioned environment.
+The Scylla docker image defaults to running on overprovisioned mode and won't apply any CPU pinning optimizations, which it normally does in non-containerized environments.
+For better performance, it is recommended to configure resource limits for your Docker container using the `--smp`, `--memory`, and `--cpuset` command line options, as well as 
+disabling the overprovisioned flag as documented in the section "Command-line options".

 ## Restart Scylla

@@ -163,12 +162,13 @@ $ docker run --name some-scylla -d scylladb/scylla --memory 4G
 ### `--overprovisioned ENABLE`

 The `--overprovisioned` command line option enables or disables optimizations for running Scylla in an overprovisioned environment.
-If no `--overprovisioned` option is specified, Scylla defaults to running with optimizations *disabled*.
+If no `--overprovisioned` option is specified, Scylla defaults to running with optimizations *enabled*. If `--overprovisioned` is
+not specified and is left at its default, specifying `--cpuset` will automatically disable `--overprovisioned`

-For example, to enable optimizations for running in an overprovisioned environment:
+For example, to enable optimizations for running in an statically partitioned environment:

 ```console
-$ docker run --name some-scylla -d scylladb/scylla --overprovisioned 1
+$ docker run --name some-scylla -d scylladb/scylla --overprovisioned 0
 ```

 ### `--cpuset CPUSET`
--- a/flat_mutation_reader.cc
+++ b/flat_mutation_reader.cc
@@ -183,10 +183,7 @@ flat_mutation_reader make_delegating_reader(flat_mutation_reader& r) {
 flat_mutation_reader make_forwardable(flat_mutation_reader m) {
    class reader : public flat_mutation_reader::impl {
        flat_mutation_reader _underlying;
-        position_range _current = {
-            position_in_partition(position_in_partition::partition_start_tag_t()),
-            position_in_partition(position_in_partition::after_static_row_tag_t())
-        };
+        position_range _current;
        mutation_fragment_opt _next;
        // When resolves, _next is engaged or _end_of_stream is set.
        future<> ensure_next() {
@@ -201,7 +198,10 @@ flat_mutation_reader make_forwardable(flat_mutation_reader m) {
            });
        }
    public:
-        reader(flat_mutation_reader r) : impl(r.schema()), _underlying(std::move(r)) { }
+        reader(flat_mutation_reader r) : impl(r.schema()), _underlying(std::move(r)), _current({
+            position_in_partition(position_in_partition::partition_start_tag_t()),
+            position_in_partition(position_in_partition::after_static_row_tag_t())
+        }) { }
        virtual future<> fill_buffer(db::timeout_clock::time_point timeout) override {
            return repeat([this] {
                if (is_buffer_full()) {
--- a/flat_mutation_reader.hh
+++ b/flat_mutation_reader.hh
@@ -487,7 +487,9 @@ flat_mutation_reader transform(flat_mutation_reader r, T t) {
            return _reader.fast_forward_to(pr);
        }
        virtual future<> fast_forward_to(position_range pr, db::timeout_clock::time_point timeout) override {
-            throw std::bad_function_call();
+            forward_buffer_to(pr.start());
+            _end_of_stream = false;
+            return _reader.fast_forward_to(std::move(pr), timeout);
        }
        virtual size_t buffer_size() const override {
            return flat_mutation_reader::impl::buffer_size() + _reader.buffer_size();
--- a/gms/gossiper.cc
+++ b/gms/gossiper.cc
@@ -478,7 +478,8 @@ future<> gossiper::apply_state_locally(std::map<inet_address, endpoint_state> ma
                    int local_generation = local_ep_state_ptr.get_heart_beat_state().get_generation();
                    int remote_generation = remote_state.get_heart_beat_state().get_generation();
                    logger.trace("{} local generation {}, remote generation {}", ep, local_generation, remote_generation);
-                    if (local_generation != 0 && remote_generation > local_generation + MAX_GENERATION_DIFFERENCE) {
+                    // A node was removed with nodetool removenode can have a generation of 2
+                    if (local_generation > 2 && remote_generation > local_generation + MAX_GENERATION_DIFFERENCE) {
                        // assume some peer has corrupted memory and is broadcasting an unbelievable generation about another peer (or itself)
                        logger.warn("received an invalid gossip generation for peer {}; local generation = {}, received generation = {}",
                            ep, local_generation, remote_generation);
@@ -853,6 +854,7 @@ int gossiper::get_max_endpoint_state_version(endpoint_state state) {

 // Runs inside seastar::async context
 void gossiper::evict_from_membership(inet_address endpoint) {
+    auto permit = lock_endpoint(endpoint).get0();
    _unreachable_endpoints.erase(endpoint);
    container().invoke_on_all([endpoint] (auto& g) {
        g.endpoint_state_map.erase(endpoint);
@@ -1003,7 +1005,7 @@ future<> gossiper::assassinate_endpoint(sstring address) {
            logger.warn("Assassinating {} via gossip", endpoint);
            if (es) {
                auto& ss = service::get_local_storage_service();
-                auto tokens = ss.get_token_metadata().get_tokens(endpoint);
+                tokens = ss.get_token_metadata().get_tokens(endpoint);
                if (tokens.empty()) {
                    logger.warn("Unable to calculate tokens for {}.  Will use a random one", address);
                    throw std::runtime_error(sprint("Unable to calculate tokens for %s", endpoint));
--- a/locator/ec2_multi_region_snitch.cc
+++ b/locator/ec2_multi_region_snitch.cc
@@ -100,7 +100,6 @@ future<> ec2_multi_region_snitch::gossiper_starting() {
    // Note: currently gossiper "main" instance always runs on CPU0 therefore
    // this function will be executed on CPU0 only.
    //
-    ec2_snitch::gossiper_starting();

    using namespace gms;
    auto& g = get_local_gossiper();
--- a/main.cc
+++ b/main.cc
@@ -305,6 +305,12 @@ int main(int ac, char** av) {
        return 0;
    }

+    bpo::options_description deprecated("Deprecated options - ignored");
+    deprecated.add_options()
+        ("background-writer-scheduling-quota", bpo::value<float>())
+        ("auto-adjust-flush-quota", bpo::value<bool>());
+    app.get_options_description().add(deprecated);
+
    // TODO : default, always read?
    init("options-file", bpo::value<sstring>(), "configuration file (i.e. <SCYLLA_HOME>/conf/scylla.yaml)");
    cfg->add_options(init);
@@ -331,6 +337,13 @@ int main(int ac, char** av) {
            sm::make_gauge("current_version", sm::description("Current ScyllaDB version."), { sm::label_instance("version", scylla_version()), sm::shard_label("") }, [] { return 0; })
        });

+        const std::unordered_set<sstring> ignored_options = { "auto-adjust-flush-quota", "background-writer-scheduling-quota" };
+        for (auto& opt: ignored_options) {
+            if (opts.count(opt)) {
+                print("%s option ignored (deprecated)\n", opt);
+            }
+        }
+
        // Check developer mode before even reading the config file, because we may not be
        // able to read it if we need to disable strict dma mode.
        // We'll redo this later and apply it to all reactors.
--- a/querier.cc
+++ b/querier.cc
@@ -119,7 +119,7 @@ bool querier::matches(const dht::partition_range& range) const {
        bound_eq(qr.start(), range.start()) || bound_eq(qr.end(), range.end());
 }

-querier::can_use querier::can_be_used_for_page(emit_only_live_rows only_live, const schema& s,
+querier::can_use querier::can_be_used_for_page(emit_only_live_rows only_live, const ::schema& s,
        const dht::partition_range& range, const query::partition_slice& slice) const {
    if (only_live != emit_only_live_rows(std::holds_alternative<lw_shared_ptr<compact_for_data_query_state>>(_compaction_state))) {
        return can_use::no_emit_only_live_rows_mismatch;
@@ -152,34 +152,33 @@ const size_t querier_cache::max_queriers_memory_usage = memory::stats().total_me
 void querier_cache::scan_cache_entries() {
    const auto now = lowres_clock::now();

-    auto it = _meta_entries.begin();
-    const auto end = _meta_entries.end();
+    auto it = _entries.begin();
+    const auto end = _entries.end();
    while (it != end && it->is_expired(now)) {
-        if (*it) {
-            ++_stats.time_based_evictions;
-        }
-        it = _meta_entries.erase(it);
-        _stats.population = _entries.size();
+        ++_stats.time_based_evictions;
+        --_stats.population;
+        it = _entries.erase(it);
    }
 }

 querier_cache::entries::iterator querier_cache::find_querier(utils::UUID key, const dht::partition_range& range, tracing::trace_state_ptr trace_state) {
-    const auto queriers = _entries.equal_range(key);
+    const auto queriers = _index.equal_range(key);

-    if (queriers.first == _entries.end()) {
+    if (queriers.first == _index.end()) {
        tracing::trace(trace_state, "Found no cached querier for key {}", key);
        return _entries.end();
    }

-    const auto it = std::find_if(queriers.first, queriers.second, [&] (const std::pair<const utils::UUID, entry>& elem) {
-        return elem.second.get().matches(range);
+    const auto it = std::find_if(queriers.first, queriers.second, [&] (const entry& e) {
+        return e.value().matches(range);
    });

    if (it == queriers.second) {
        tracing::trace(trace_state, "Found cached querier(s) for key {} but none matches the query range {}", key, range);
+        return _entries.end();
    }
    tracing::trace(trace_state, "Found cached querier for key {} and range {}", key, range);
-    return it;
+    return it->pos();
 }

 querier_cache::querier_cache(std::chrono::seconds entry_ttl)
@@ -199,8 +198,7 @@ void querier_cache::insert(utils::UUID key, querier&& q, tracing::trace_state_pt

    tracing::trace(trace_state, "Caching querier with key {}", key);

-    auto memory_usage = boost::accumulate(
-            _entries | boost::adaptors::map_values | boost::adaptors::transformed(std::mem_fn(&querier_cache::entry::memory_usage)), size_t(0));
+    auto memory_usage = boost::accumulate(_entries | boost::adaptors::transformed(std::mem_fn(&entry::memory_usage)), size_t(0));

    // We add the memory-usage of the to-be added querier to the memory-usage
    // of all the cached queriers. We now need to makes sure this number is
@@ -210,20 +208,20 @@ void querier_cache::insert(utils::UUID key, querier&& q, tracing::trace_state_pt
    memory_usage += q.memory_usage();

    if (memory_usage >= max_queriers_memory_usage) {
-        auto it = _meta_entries.begin();
-        const auto end = _meta_entries.end();
+        auto it = _entries.begin();
+        const auto end = _entries.end();
        while (it != end && memory_usage >= max_queriers_memory_usage) {
-            if (*it) {
-                ++_stats.memory_based_evictions;
-                memory_usage -= it->get_entry().memory_usage();
-            }
-            it = _meta_entries.erase(it);
+            ++_stats.memory_based_evictions;
+            memory_usage -= it->memory_usage();
+            --_stats.population;
+            it = _entries.erase(it);
        }
    }

-    const auto it = _entries.emplace(key, entry::param{std::move(q), _entry_ttl}).first;
-    _meta_entries.emplace_back(_entries, it);
-    _stats.population = _entries.size();
+    auto& e = _entries.emplace_back(key, std::move(q), lowres_clock::now() + _entry_ttl);
+    e.set_pos(--_entries.end());
+    _index.insert(e);
+    ++_stats.population;
 }

 querier querier_cache::lookup(utils::UUID key,
@@ -240,9 +238,9 @@ querier querier_cache::lookup(utils::UUID key,
        return create_fun();
    }

-    auto q = std::move(it->second).get();
+    auto q = std::move(*it).value();
    _entries.erase(it);
-    _stats.population = _entries.size();
+    --_stats.population;

    const auto can_be_used = q.can_be_used_for_page(only_live, s, range, slice);
    if (can_be_used == querier::can_use::yes) {
@@ -265,18 +263,24 @@ bool querier_cache::evict_one() {
        return false;
    }

-    auto it = _meta_entries.begin();
-    const auto end = _meta_entries.end();
+    ++_stats.resource_based_evictions;
+    --_stats.population;
+    _entries.pop_front();
+
+    return true;
+}
+
+void querier_cache::evict_all_for_table(const utils::UUID& schema_id) {
+    auto it = _entries.begin();
+    const auto end = _entries.end();
    while (it != end) {
-        const auto is_live = bool(*it);
-        it = _meta_entries.erase(it);
-        _stats.population = _entries.size();
-        if (is_live) {
-            ++_stats.resource_based_evictions;
-            return true;
+        if (it->schema().id() == schema_id) {
+            --_stats.population;
+            it = _entries.erase(it);
+        } else {
+            ++it;
        }
    }
-    return false;
 }

 querier_cache_context::querier_cache_context(querier_cache& cache, utils::UUID key, bool is_first_page)
--- a/querier.hh
+++ b/querier.hh
@@ -24,7 +24,8 @@
 #include "mutation_compactor.hh"
 #include "mutation_reader.hh"

-#include <seastar/core/weak_ptr.hh>
+#include <boost/intrusive/set.hpp>
+
 #include <variant>

 /// One-stop object for serving queries.
@@ -207,6 +208,9 @@ public:
        return _reader.buffer_size();
    }

+    schema_ptr schema() const {
+        return _schema;
+    }
 };

 /// Special-purpose cache for saving queriers between pages.
@@ -261,75 +265,65 @@ public:
    };

 private:
-    class entry : public weakly_referencable<entry> {
-        querier _querier;
-        lowres_clock::time_point _expires;
-    public:
-        // Since entry cannot be moved and unordered_map::emplace can pass only
-        // a single param to it's mapped-type we need to force a single-param
-        // constructor for entry. Oh C++...
-        struct param {
-            querier q;
-            std::chrono::seconds ttl;
-        };
+    class entry : public boost::intrusive::set_base_hook<boost::intrusive::link_mode<boost::intrusive::auto_unlink>> {
+        // Self reference so that we can remove the entry given an `entry&`.
+        std::list<entry>::iterator _pos;
+        const utils::UUID _key;
+        const lowres_clock::time_point _expires;
+        querier _value;

-        explicit entry(param p)
-            : _querier(std::move(p.q))
-            , _expires(lowres_clock::now() + p.ttl) {
+    public:
+        entry(utils::UUID key, querier q, lowres_clock::time_point expires)
+            : _key(key)
+            , _expires(expires)
+            , _value(std::move(q)) {
+        }
+
+        std::list<entry>::iterator pos() const {
+            return _pos;
+        }
+
+        void set_pos(std::list<entry>::iterator pos) {
+            _pos = pos;
+        }
+
+        const utils::UUID& key() const {
+            return _key;
+        }
+
+        const ::schema& schema() const {
+            return *_value.schema();
        }

        bool is_expired(const lowres_clock::time_point& now) const {
            return _expires <= now;
        }

-        const querier& get() const & {
-            return _querier;
-        }
-
-        querier&& get() && {
-            return std::move(_querier);
-        }
-
        size_t memory_usage() const {
-            return _querier.memory_usage();
+            return _value.memory_usage();
+        }
+
+        const querier& value() const & {
+            return _value;
+        }
+
+        querier value() && {
+            return std::move(_value);
        }
    };

-    using entries = std::unordered_map<utils::UUID, entry>;
-
-    class meta_entry {
-        entries& _entries;
-        weak_ptr<entry> _entry_ptr;
-        entries::iterator _entry_it;
-
-    public:
-        meta_entry(entries& e, entries::iterator it)
-            : _entries(e)
-            , _entry_ptr(it->second.weak_from_this())
-            , _entry_it(it) {
-        }
-
-        ~meta_entry() {
-            if (_entry_ptr) {
-                _entries.erase(_entry_it);
-            }
-        }
-
-        bool is_expired(const lowres_clock::time_point& now) const {
-            return !_entry_ptr || _entry_ptr->is_expired(now);
-        }
-
-        explicit operator bool() const {
-            return bool(_entry_ptr);
-        }
-
-        const entry& get_entry() const {
-            return *_entry_ptr;
-        }
+    struct key_of_entry {
+        using type = utils::UUID;
+        const type& operator()(const entry& e) { return e.key(); }
    };

+    using entries = std::list<entry>;
+    using index = boost::intrusive::multiset<entry, boost::intrusive::key_of_value<key_of_entry>,
+          boost::intrusive::constant_time_size<false>>;
+
+private:
    entries _entries;
-    std::list<meta_entry> _meta_entries;
+    index _index;
    timer<lowres_clock> _expiry_timer;
    std::chrono::seconds _entry_ttl;
    stats _stats;
@@ -382,6 +376,11 @@ public:
    /// is empty).
    bool evict_one();

+    /// Evict all queriers that belong to a table.
+    ///
+    /// Should be used when dropping a table.
+    void evict_all_for_table(const utils::UUID& schema_id);
+
    const stats& get_stats() const {
        return _stats;
    }
--- a/scripts/scylla_install_pkg
+++ b/scripts/scylla_install_pkg
@@ -55,7 +55,7 @@ if [ -f /etc/debian_version ]; then
    cp /etc/hosts /etc/hosts.orig
    echo 127.0.0.1 `hostname` >> /etc/hosts
    if [ "$REPO_FOR_INSTALL" != "" ]; then
-        curl -o /etc/apt/sources.list.d/scylla_install.list $REPO_FOR_INSTALL
+        curl -L -o /etc/apt/sources.list.d/scylla_install.list $REPO_FOR_INSTALL
    fi
    apt-get -o Acquire::AllowInsecureRepositories=true \
            -o Acquire::AllowDowngradeToInsecureRepositories=true update
@@ -78,13 +78,13 @@ if [ -f /etc/debian_version ]; then
    rm /usr/sbin/policy-rc.d
    rm /etc/apt/sources.list.d/scylla_install.list
    if [ "$REPO_FOR_UPDATE" != "" ]; then
-        curl -o /etc/apt/sources.list.d/scylla.list $REPO_FOR_UPDATE
+        curl -L -o /etc/apt/sources.list.d/scylla.list $REPO_FOR_UPDATE
    fi
    apt-get -o Acquire::AllowInsecureRepositories=true \
            -o Acquire::AllowDowngradeToInsecureRepositories=true update
 else
    if [ "$REPO_FOR_INSTALL" != "" ]; then
-        curl -o /etc/yum.repos.d/scylla_install.repo $REPO_FOR_INSTALL
+        curl -L -o /etc/yum.repos.d/scylla_install.repo $REPO_FOR_INSTALL
    fi

    if [ "$ID" = "centos" ]; then
@@ -104,6 +104,6 @@ else

    rm /etc/yum.repos.d/scylla_install.repo
    if [ "$REPO_FOR_UPDATE" != "" ]; then
-        curl -o /etc/yum.repos.d/scylla.repo $REPO_FOR_UPDATE
+        curl -L -o /etc/yum.repos.d/scylla.repo $REPO_FOR_UPDATE
    fi
 fi
--- a/5
+++ b/5
@@ -87,10 +87,7 @@ def get_repo_file(dir):
    for name in files:
        with open(name, 'r') as myfile:
            for line in myfile:
-                match = re.search(".*http.?://.*/scylladb/([^/\s]+)/deb/([^/\s]+)[\s/].*", line)
-                if match:
-                    return match.group(2), match.group(1)
-                match = re.search(".*http.?://.*/scylladb/([^/]+)/rpm/[^/]+/([^/\s]+)/.*", line)
+                match = re.search(".*http.?://repositories.*/scylladb/([^/\s]+)/.*/([^/\s]+)/scylladb-.*", line)
                if match:
                    return match.group(2), match.group(1)
    return None, None
--- a/2
+++ b/2
--- a/service/client_state.hh
+++ b/service/client_state.hh
@@ -181,12 +181,9 @@ public:
            , _is_thrift(false)
    {}

-    // `nullptr` for internal instances.
-    auth::service* get_auth_service() {
-        return _auth_service;
-    }
-
-    // See above.
+    ///
+    /// `nullptr` for internal instances.
+    ///
    const auth::service* get_auth_service() const {
        return _auth_service;
    }
--- a/service/migration_manager.cc
+++ b/service/migration_manager.cc
@@ -132,10 +132,15 @@ future<> migration_manager::schedule_schema_pull(const gms::inet_address& endpoi
    return make_ready_future<>();
 }

-bool migration_manager::is_ready_for_bootstrap() {
+bool migration_manager::have_schema_agreement() {
+    const auto known_endpoints = gms::get_local_gossiper().endpoint_state_map;
+    if (known_endpoints.size() == 1) {
+        // Us.
+        return true;
+    }
    auto our_version = get_local_storage_proxy().get_db().local().get_version();
    bool match = false;
-    for (auto& x : gms::get_local_gossiper().endpoint_state_map) {
+    for (auto& x : known_endpoints) {
        auto& endpoint = x.first;
        auto& eps = x.second;
        if (endpoint == utils::fb_utilities::get_broadcast_address() || !eps.is_alive()) {
--- a/service/migration_manager.hh
+++ b/service/migration_manager.hh
@@ -144,7 +144,10 @@ public:

    future<> stop();

-    bool is_ready_for_bootstrap();
+    /**
+     * Known peers in the cluster have the same schema version as us.
+     */
+    bool have_schema_agreement();

    void init_messaging_service();
 private:
--- a/service/pager/query_pagers.cc
+++ b/service/pager/query_pagers.cc
@@ -83,7 +83,7 @@ private:
            _last_replicas = state->get_last_replicas();
        } else {
            // Reusing readers is currently only supported for singular queries.
-            if (_ranges.front().is_singular()) {
+            if (!_ranges.empty() && query::is_single_partition(_ranges.front())) {
                _cmd->query_uuid = utils::make_random_uuid();
            }
            _cmd->is_first_page = true;
--- a/service/storage_proxy.cc
+++ b/service/storage_proxy.cc
@@ -3817,9 +3817,9 @@ void storage_proxy::init_messaging_service() {
            p->_stats.forwarded_mutations += forward.size();
            return when_all(
                // mutate_locally() may throw, putting it into apply() converts exception to a future.
-                futurize<void>::apply([timeout, &p, &m, reply_to, src_addr = std::move(src_addr)] () mutable {
+                futurize<void>::apply([timeout, &p, &m, reply_to, shard, src_addr = std::move(src_addr)] () mutable {
                    // FIXME: get_schema_for_write() doesn't timeout
-                    return get_schema_for_write(m.schema_version(), std::move(src_addr)).then([&m, &p, timeout] (schema_ptr s) {
+                    return get_schema_for_write(m.schema_version(), netw::messaging_service::msg_addr{reply_to, shard}).then([&m, &p, timeout] (schema_ptr s) {
                        return p->mutate_locally(std::move(s), m, timeout);
                    });
                }).then([reply_to, shard, response_id, trace_state_ptr] () {
--- a/service/storage_service.cc
+++ b/service/storage_service.cc
@@ -408,7 +408,7 @@ void storage_service::join_token_ring(int delay) {
        }
        // if our schema hasn't matched yet, keep sleeping until it does
        // (post CASSANDRA-1391 we don't expect this to be necessary very often, but it doesn't hurt to be careful)
-        while (!get_local_migration_manager().is_ready_for_bootstrap()) {
+        while (!get_local_migration_manager().have_schema_agreement()) {
            set_mode(mode::JOINING, "waiting for schema information to complete", true);
            sleep(std::chrono::seconds(1)).get();
        }
@@ -437,7 +437,7 @@ void storage_service::join_token_ring(int delay) {
            }

            // Check the schema and pending range again
-            while (!get_local_migration_manager().is_ready_for_bootstrap()) {
+            while (!get_local_migration_manager().have_schema_agreement()) {
                set_mode(mode::JOINING, "waiting for schema information to complete", true);
                sleep(std::chrono::seconds(1)).get();
            }
--- a/sstables/compaction.cc
+++ b/sstables/compaction.cc
@@ -609,6 +609,27 @@ class resharding_compaction final : public compaction {
    shard_id _shard; // shard of current sstable writer
    std::function<shared_sstable(shard_id)> _sstable_creator;
    compaction_backlog_tracker _resharding_backlog_tracker;
+
+    // Partition count estimation for a shard S:
+    //
+    // TE, the total estimated partition count for a shard S, is defined as
+    // TE = Sum(i = 0...N) { Ei / Si }.
+    //
+    // where i is an input sstable that belongs to shard S,
+    //       Ei is the estimated partition count for sstable i,
+    //       Si is the total number of shards that own sstable i.
+    //
+    struct estimated_values {
+        uint64_t estimated_size = 0;
+        uint64_t estimated_partitions = 0;
+    };
+    std::vector<estimated_values> _estimation_per_shard;
+private:
+    // return estimated partitions per sstable for a given shard
+    uint64_t partitions_per_sstable(shard_id s) const {
+        uint64_t estimated_sstables = std::max(uint64_t(1), uint64_t(ceil(double(_estimation_per_shard[s].estimated_size) / _max_sstable_size)));
+        return ceil(double(_estimation_per_shard[s].estimated_partitions) / estimated_sstables);
+    }
 public:
    resharding_compaction(std::vector<shared_sstable> sstables, column_family& cf, std::function<shared_sstable(shard_id)> creator,
            uint64_t max_sstable_size, uint32_t sstable_level)
@@ -616,10 +637,19 @@ public:
        , _output_sstables(smp::count)
        , _sstable_creator(std::move(creator))
        , _resharding_backlog_tracker(std::make_unique<resharding_backlog_tracker>())
+        , _estimation_per_shard(smp::count)
    {
        cf.get_compaction_manager().register_backlog_tracker(_resharding_backlog_tracker);
-        for (auto& s : _sstables) {
-            _resharding_backlog_tracker.add_sstable(s);
+        for (auto& sst : _sstables) {
+            _resharding_backlog_tracker.add_sstable(sst);
+
+            const auto& shards = sst->get_shards_for_this_sstable();
+            auto size = sst->bytes_on_disk();
+            auto estimated_partitions = sst->get_estimated_key_count();
+            for (auto& s : shards) {
+                _estimation_per_shard[s].estimated_size += std::max(uint64_t(1), uint64_t(ceil(double(size) / shards.size())));
+                _estimation_per_shard[s].estimated_partitions += std::max(uint64_t(1), uint64_t(ceil(double(estimated_partitions) / shards.size())));
+            }
        }
        _info->type = compaction_type::Reshard;
    }
@@ -665,7 +695,7 @@ public:
            sstable_writer_config cfg;
            cfg.max_sstable_size = _max_sstable_size;
            auto&& priority = service::get_local_compaction_priority();
-            writer.emplace(sst->get_writer(*_cf.schema(), partitions_per_sstable(), cfg, priority, _shard));
+            writer.emplace(sst->get_writer(*_cf.schema(), partitions_per_sstable(_shard), cfg, priority, _shard));
        }
        return &*writer;
    }
--- a/sstables/compaction_manager.cc
+++ b/sstables/compaction_manager.cc
@@ -26,7 +26,7 @@
 #include <seastar/core/metrics.hh>
 #include "exceptions.hh"
 #include <cmath>
-#include <boost/algorithm/cxx11/any_of.hpp>
+#include <boost/range/algorithm/count_if.hpp>

 static logging::logger cmlog("compaction_manager");

@@ -156,14 +156,13 @@ int compaction_manager::trim_to_compact(column_family* cf, sstables::compaction_
 }

 bool compaction_manager::can_register_weight(column_family* cf, int weight) {
-    if (_weight_tracker.empty()) {
-        return true;
-    }
-
    auto has_cf_ongoing_compaction = [&] {
-        return boost::algorithm::any_of(_tasks, [&] (const lw_shared_ptr<task>& task) {
+        auto ret = boost::range::count_if(_tasks, [&] (const lw_shared_ptr<task>& task) {
            return task->compacting_cf == cf;
        });
+        // compaction task trying to proceed is already registered in task list,
+        // so we must check for an additional one.
+        return ret >= 2;
    };

    // Only one weight is allowed if parallel compaction is disabled.
--- a/sstables/compress.cc
+++ b/sstables/compress.cc
@@ -33,6 +33,7 @@
 #include "unimplemented.hh"
 #include "stdx.hh"
 #include "segmented_compress_params.hh"
+#include "utils/class_registrator.hh"

 namespace sstables {

@@ -299,7 +300,8 @@ size_t local_compression::compress_max_size(size_t input_len) const {

 void compression::set_compressor(compressor_ptr c) {
    if (c) {
-        auto& cn = c->name();
+        unqualified_name uqn(compressor::namespace_prefix, c->name());
+        const sstring& cn = uqn;
        name.value = bytes(cn.begin(), cn.end());
        for (auto& p : c->options()) {
            if (p.first != compression_parameters::SSTABLE_COMPRESSION) {
--- a/sstables/size_tiered_compaction_strategy.hh
+++ b/sstables/size_tiered_compaction_strategy.hh
@@ -294,6 +294,12 @@ size_tiered_compaction_strategy::get_sstables_for_compaction(column_family& cfs,
        return sstables::compaction_descriptor(std::move(most_interesting));
    }

+    // If we are not enforcing min_threshold explicitly, try any pair of SStables in the same tier.
+    if (!cfs.compaction_enforce_min_threshold() && is_any_bucket_interesting(buckets, 2)) {
+        std::vector<sstables::shared_sstable> most_interesting = most_interesting_bucket(std::move(buckets), 2, max_threshold);
+        return sstables::compaction_descriptor(std::move(most_interesting));
+    }
+
    // if there is no sstable to compact in standard way, try compacting single sstable whose droppable tombstone
    // ratio is greater than threshold.
    // prefer oldest sstables from biggest size tiers because they will be easier to satisfy conditions for
--- a/tests/cql_auth_query_test.cc
+++ b/tests/cql_auth_query_test.cc
@@ -28,6 +28,8 @@
 #include <seastar/util/defer.hh>

 #include "auth/authenticated_user.hh"
+#include "auth/permission.hh"
+#include "auth/service.hh"
 #include "cql3/query_processor.hh"
 #include "cql3/role_name.hh"
 #include "cql3/role_options.hh"
@@ -262,3 +264,67 @@ SEASTAR_TEST_CASE(revoke_role_restrictions) {
        });
    }, db_config_with_auth());
 }
+
+//
+// The creator of a database object is granted all applicable permissions on it.
+//
+
+///
+/// Grant a user appropriate permissions (with `grant_query`) then create a new database object (with `creation_query`).
+/// Verify that the user has been granted all applicable permissions on the new object.
+///
+static void verify_default_permissions(
+        cql_test_env& env,
+        stdx::string_view user,
+        stdx::string_view grant_query,
+        stdx::string_view creation_query,
+        const auth::resource& r) {
+    create_user_if_not_exists(env, user);
+    env.execute_cql(sstring(grant_query)).get0();
+
+    with_user(env, user, [&env, creation_query] {
+        env.execute_cql(sstring(creation_query)).get0();
+    });
+
+    const auto default_permissions = auth::get_permissions(
+            env.local_auth_service(),
+            auth::authenticated_user(user),
+            r).get0();
+
+    BOOST_REQUIRE_EQUAL(
+            auth::permissions::to_strings(default_permissions),
+            auth::permissions::to_strings(r.applicable_permissions()));
+}
+
+SEASTAR_TEST_CASE(create_role_default_permissions) {
+    return do_with_cql_env_thread([](auto&& env) {
+        verify_default_permissions(
+                env,
+                alice,
+                "GRANT CREATE ON ALL ROLES TO alice",
+                "CREATE ROLE lord",
+                auth::make_role_resource("lord"));
+    }, db_config_with_auth());
+}
+
+SEASTAR_TEST_CASE(create_keyspace_default_permissions) {
+    return do_with_cql_env_thread([](auto&& env) {
+        verify_default_permissions(
+                env,
+                alice,
+                "GRANT CREATE ON ALL KEYSPACES TO alice",
+                "CREATE KEYSPACE armies WITH REPLICATION = { 'class': 'SimpleStrategy', 'replication_factor': 1 }",
+                auth::make_data_resource("armies"));
+    }, db_config_with_auth());
+}
+
+SEASTAR_TEST_CASE(create_table_default_permissions) {
+    return do_with_cql_env_thread([](auto&& env) {
+        verify_default_permissions(
+                env,
+                alice,
+                "GRANT CREATE ON KEYSPACE ks TO alice",
+                "CREATE TABLE orcs (id int PRIMARY KEY, strength int)",
+                auth::make_data_resource("ks", "orcs"));
+    }, db_config_with_auth());
+}
--- a/tests/cql_query_test.cc
+++ b/tests/cql_query_test.cc
@@ -2607,3 +2607,18 @@ SEASTAR_TEST_CASE(test_insert_large_collection_values) {
        });
    });
 }
+
+// Corner-case test that checks for the paging code's preparedness for an empty
+// range list.
+SEASTAR_TEST_CASE(test_empty_partition_range_scan) {
+    return do_with_cql_env_thread([] (cql_test_env& e) {
+        e.execute_cql("create keyspace empty_partition_range_scan with replication = {'class': 'SimpleStrategy', 'replication_factor': 1};").get();
+        e.execute_cql("create table empty_partition_range_scan.tb (a int, b int, c int, val int, PRIMARY KEY ((a,b),c) );").get();
+
+
+        auto qo = std::make_unique<cql3::query_options>(db::consistency_level::LOCAL_ONE, std::vector<cql3::raw_value>{},
+                cql3::query_options::specific_options{1, nullptr, {}, api::new_timestamp()});
+        auto res = e.execute_cql("select * from empty_partition_range_scan.tb where token (a,b) > 1 and token(a,b) <= 1;", std::move(qo)).get0();
+        assert_that(res).is_rows().is_empty();
+    });
+}
--- a/tests/database_test.cc
+++ b/tests/database_test.cc
@@ -29,6 +29,9 @@
 #include "database.hh"
 #include "partition_slice_builder.hh"
 #include "frozen_mutation.hh"
+#include "mutation_source_test.hh"
+#include "schema_registry.hh"
+#include "service/migration_manager.hh"

 SEASTAR_TEST_CASE(test_querying_with_limits) {
    return do_with_cql_env([](cql_test_env& e) {
@@ -74,3 +77,33 @@ SEASTAR_TEST_CASE(test_querying_with_limits) {
        });
    });
 }
+
+SEASTAR_THREAD_TEST_CASE(test_database_with_data_in_sstables_is_a_mutation_source) {
+    do_with_cql_env([] (cql_test_env& e) {
+        run_mutation_source_tests([&] (schema_ptr s, const std::vector<mutation>& partitions) -> mutation_source {
+            try {
+                e.local_db().find_column_family(s->ks_name(), s->cf_name());
+                service::get_local_migration_manager().announce_column_family_drop(s->ks_name(), s->cf_name(), true).get();
+            } catch (const no_such_column_family&) {
+                // expected
+            }
+            service::get_local_migration_manager().announce_new_column_family(s, true).get();
+            column_family& cf = e.local_db().find_column_family(s);
+            for (auto&& m : partitions) {
+                e.local_db().apply(cf.schema(), freeze(m)).get();
+            }
+            cf.flush().get();
+            cf.get_row_cache().invalidate([] {}).get();
+            return mutation_source([&] (schema_ptr s,
+                    const dht::partition_range& range,
+                    const query::partition_slice& slice,
+                    const io_priority_class& pc,
+                    tracing::trace_state_ptr trace_state,
+                    streamed_mutation::forwarding fwd,
+                    mutation_reader::forwarding fwd_mr) {
+                return cf.make_reader(s, range, slice, pc, std::move(trace_state), fwd, fwd_mr);
+            });
+        });
+        return make_ready_future<>();
+    }).get();
+}
--- a/tests/mutation_source_test.cc
+++ b/tests/mutation_source_test.cc
@@ -659,6 +659,46 @@ void test_mutation_reader_fragments_have_monotonic_positions(populate_fn populat
    });
 }

+static void test_date_tiered_clustering_slicing(populate_fn populate) {
+    BOOST_TEST_MESSAGE(__PRETTY_FUNCTION__);
+
+    simple_schema ss;
+
+    auto s = schema_builder(ss.schema())
+        .set_compaction_strategy(sstables::compaction_strategy_type::date_tiered)
+        .build();
+
+    auto pkey = ss.make_pkey();
+
+    mutation m1(s, pkey);
+    ss.add_static_row(m1, "s");
+    m1.partition().apply(ss.new_tombstone());
+    ss.add_row(m1, ss.make_ckey(0), "v1");
+
+    mutation_source ms = populate(s, {m1});
+
+    // query row outside the range of existing rows to exercise sstable clustering key filter
+    {
+        auto slice = partition_slice_builder(*s)
+            .with_range(ss.make_ckey_range(1, 2))
+            .build();
+        auto prange = dht::partition_range::make_singular(pkey);
+        assert_that(ms.make_reader(s, prange, slice))
+            .produces(m1, slice.row_ranges(*s, pkey.key()))
+            .produces_end_of_stream();
+    }
+
+    {
+        auto slice = partition_slice_builder(*s)
+            .with_range(query::clustering_range::make_singular(ss.make_ckey(0)))
+            .build();
+        auto prange = dht::partition_range::make_singular(pkey);
+        assert_that(ms.make_reader(s, prange, slice))
+            .produces(m1)
+            .produces_end_of_stream();
+    }
+}
+
 static void test_clustering_slices(populate_fn populate) {
    BOOST_TEST_MESSAGE(__PRETTY_FUNCTION__);
    auto s = schema_builder("ks", "cf")
@@ -822,6 +862,7 @@ static void test_query_only_static_row(populate_fn populate) {
    auto pkeys = s.make_pkeys(1);

    mutation m1(s.schema(), pkeys[0]);
+    m1.partition().apply(s.new_tombstone());
    s.add_static_row(m1, "s1");
    s.add_row(m1, s.make_ckey(0), "v1");
    s.add_row(m1, s.make_ckey(1), "v2");
@@ -846,6 +887,59 @@ static void test_query_only_static_row(populate_fn populate) {
            .produces(m1, slice.row_ranges(*s.schema(), m1.key()))
            .produces_end_of_stream();
    }
+
+    // query just a static row, single-partition case
+    {
+        auto slice = partition_slice_builder(*s.schema())
+            .with_ranges({})
+            .build();
+        auto prange = dht::partition_range::make_singular(m1.decorated_key());
+        assert_that(ms.make_reader(s.schema(), prange, slice))
+            .produces(m1, slice.row_ranges(*s.schema(), m1.key()))
+            .produces_end_of_stream();
+    }
+}
+
+static void test_query_no_clustering_ranges_no_static_columns(populate_fn populate) {
+    simple_schema s(simple_schema::with_static::no);
+
+    auto pkeys = s.make_pkeys(1);
+
+    mutation m1(s.schema(), pkeys[0]);
+    m1.partition().apply(s.new_tombstone());
+    s.add_row(m1, s.make_ckey(0), "v1");
+    s.add_row(m1, s.make_ckey(1), "v2");
+
+    mutation_source ms = populate(s.schema(), {m1});
+
+    {
+        auto prange = dht::partition_range::make_ending_with(dht::ring_position(m1.decorated_key()));
+        assert_that(ms.make_reader(s.schema(), prange, s.schema()->full_slice()))
+            .produces(m1)
+            .produces_end_of_stream();
+    }
+
+    // multi-partition case
+    {
+        auto slice = partition_slice_builder(*s.schema())
+            .with_ranges({})
+            .build();
+        auto prange = dht::partition_range::make_ending_with(dht::ring_position(m1.decorated_key()));
+        assert_that(ms.make_reader(s.schema(), prange, slice))
+            .produces(m1, slice.row_ranges(*s.schema(), m1.key()))
+            .produces_end_of_stream();
+    }
+
+    // single-partition case
+    {
+        auto slice = partition_slice_builder(*s.schema())
+            .with_ranges({})
+            .build();
+        auto prange = dht::partition_range::make_singular(m1.decorated_key());
+        assert_that(ms.make_reader(s.schema(), prange, slice))
+            .produces(m1, slice.row_ranges(*s.schema(), m1.key()))
+            .produces_end_of_stream();
+    }
 }

 void test_streamed_mutation_forwarding_succeeds_with_no_data(populate_fn populate) {
@@ -958,6 +1052,7 @@ void test_slicing_with_overlapping_range_tombstones(populate_fn populate) {
 }

 void run_mutation_reader_tests(populate_fn populate) {
+    test_date_tiered_clustering_slicing(populate);
    test_fast_forwarding_across_partitions_to_empty_range(populate);
    test_clustering_slices(populate);
    test_mutation_reader_fragments_have_monotonic_positions(populate);
@@ -967,6 +1062,7 @@ void run_mutation_reader_tests(populate_fn populate) {
    test_streamed_mutation_forwarding_is_consistent_with_slicing(populate);
    test_range_queries(populate);
    test_query_only_static_row(populate);
+    test_query_no_clustering_ranges_no_static_columns(populate);
 }

 void test_next_partition(populate_fn populate) {
--- a/tests/perf/perf_fast_forward.cc
+++ b/tests/perf/perf_fast_forward.cc
@@ -202,7 +202,7 @@ std::string get_run_date_time() {
    using namespace boost::posix_time;
    const ptime current_time = second_clock::local_time();
    auto facet = std::make_unique<time_facet>();
-    facet->format("%Y-%M-%d %H:%M:%S");
+    facet->format("%Y-%m-%d %H:%M:%S");
    std::stringstream stream;
    stream.imbue(std::locale(std::locale::classic(), facet.release()));
    stream << current_time;
--- a/tests/querier_cache.cc
+++ b/tests/querier_cache.cc
@@ -518,7 +518,7 @@ SEASTAR_THREAD_TEST_CASE(test_memory_based_cache_eviction) {
    }, 24h);

    size_t i = 0;
-    const auto entry = t.produce_first_page_and_save_querier(i);
+    const auto entry = t.produce_first_page_and_save_querier(i++);

    const size_t queriers_needed_to_fill_cache = floor(querier_cache::max_queriers_memory_usage / entry.memory_usage);

--- a/tests/row_cache_test.cc
+++ b/tests/row_cache_test.cc
@@ -3012,11 +3012,13 @@ SEASTAR_TEST_CASE(test_concurrent_reads_and_eviction) {
                            slice, actual, ::join(",\n", possible_versions)));
                    }
                }
+            }).finally([&, id] {
+                done = true;
            });
        });

        int n_updates = 100;
-        while (!readers.available() && n_updates--) {
+        while (!done && n_updates--) {
            auto m2 = gen();
            m2.partition().make_fully_continuous();

@@ -3034,8 +3036,8 @@ SEASTAR_TEST_CASE(test_concurrent_reads_and_eviction) {
            tracker.region().evict_some();

            // Don't allow backlog to grow too much to avoid bad_alloc
-            const auto max_active_versions = 10;
-            while (versions.size() > max_active_versions) {
+            const auto max_active_versions = 7;
+            while (!done && versions.size() > max_active_versions) {
                later().get();
            }
        }
--- a/tests/schema_change_test.cc
+++ b/tests/schema_change_test.cc
@@ -249,11 +249,9 @@ SEASTAR_TEST_CASE(test_merging_does_not_alter_tables_which_didnt_change) {

            auto&& keyspace = e.db().local().find_keyspace("ks").metadata();

-            auto legacy_version = utils::UUID_gen::get_time_UUID();
            auto s0 = schema_builder("ks", "table1")
                .with_column("pk", bytes_type, column_kind::partition_key)
                .with_column("v1", bytes_type)
-                .with_version(legacy_version)
                .build();

            auto find_table = [&] () -> column_family& {
@@ -261,14 +259,11 @@ SEASTAR_TEST_CASE(test_merging_does_not_alter_tables_which_didnt_change) {
            };

            auto muts1 = db::schema_tables::make_create_table_mutations(keyspace, s0, api::new_timestamp()).get0();
-            service::get_storage_proxy().local().mutate_locally(muts1).get();
-            e.db().invoke_on_all([gs = global_schema_ptr(s0)] (database& db) {
-                return db.add_column_family_and_make_directory(gs);
-            }).get();
+            mm.announce(muts1).get();

            auto s1 = find_table().schema();

-            BOOST_REQUIRE_EQUAL(legacy_version, s1->version());
+            auto legacy_version = s1->version();

            mm.announce(muts1).get();

--- a/tests/simple_schema.hh
+++ b/tests/simple_schema.hh
@@ -47,11 +47,12 @@ public:
        return {new_timestamp(), gc_clock::now()};
    }
 public:
-    simple_schema()
+    using with_static = bool_class<class static_tag>;
+    simple_schema(with_static ws = with_static::yes)
        : _s(schema_builder("ks", "cf")
            .with_column("pk", utf8_type, column_kind::partition_key)
            .with_column("ck", utf8_type, column_kind::clustering_key)
-            .with_column("s1", utf8_type, column_kind::static_column)
+            .with_column("s1", utf8_type, ws ? column_kind::static_column : column_kind::regular_column)
            .with_column("v", utf8_type)
            .build())
        , _v_def(*_s->get_column_definition(to_bytes("v")))
--- a/tests/sstable_resharding_test.cc
+++ b/tests/sstable_resharding_test.cc
@@ -68,7 +68,8 @@ void run_sstable_resharding_test() {
    auto cl_stats = make_lw_shared<cell_locker_stats>();
    auto cf = make_lw_shared<column_family>(s, column_family::config(), column_family::no_commitlog(), *cm, *cl_stats);
    cf->mark_ready_for_writes();
-    std::unordered_map<shard_id, mutation> muts;
+    std::unordered_map<shard_id, std::vector<mutation>> muts;
+    static constexpr auto keys_per_shard = 1000u;

    // create sst shared by all shards
    {
@@ -80,18 +81,29 @@ void run_sstable_resharding_test() {
            return m;
        };
        for (auto i : boost::irange(0u, smp::count)) {
-            auto key_token_pair = token_generation_for_shard(i, 1);
-            BOOST_REQUIRE(key_token_pair.size() == 1);
-            auto m = get_mutation(key_token_pair[0].first, i);
-            muts.emplace(i, m);
-            mt->apply(std::move(m));
+            auto key_token_pair = token_generation_for_shard(i, keys_per_shard);
+            BOOST_REQUIRE(key_token_pair.size() == keys_per_shard);
+            muts[i].reserve(keys_per_shard);
+            for (auto k : boost::irange(0u, keys_per_shard)) {
+                auto m = get_mutation(key_token_pair[k].first, i);
+                muts[i].push_back(m);
+                mt->apply(std::move(m));
+            }
        }
        auto sst = sstables::make_sstable(s, tmp->path, 0, sstables::sstable::version_types::ka, sstables::sstable::format_types::big);
        write_memtable_to_sstable(*mt, sst).get();
    }
    auto sst = sstables::make_sstable(s, tmp->path, 0, sstables::sstable::version_types::ka, sstables::sstable::format_types::big);
    sst->load().get();
-    sst->set_unshared();
+
+    // FIXME: sstable write has a limitation in which it will generate sharding metadata only
+    // for a single shard. workaround that by setting shards manually. from this test perspective,
+    // it doesn't matter because we check each partition individually of each sstable created
+    // for a shard that owns the shared input sstable.
+    sstables::test(sst).set_shards(boost::copy_range<std::vector<unsigned>>(boost::irange(0u, smp::count)));
+
+    auto filter_fname = sstables::test(sst).filename(sstable::component_type::Filter);
+    uint64_t bloom_filter_size_before = file_size(filter_fname).get0();

    auto creator = [&cf, tmp] (shard_id shard) mutable {
        // we need generation calculated by instance of cf at requested shard,
@@ -108,19 +120,27 @@ void run_sstable_resharding_test() {
    auto new_sstables = sstables::reshard_sstables({ sst }, *cf, creator, std::numeric_limits<uint64_t>::max(), 0).get0();
    BOOST_REQUIRE(new_sstables.size() == smp::count);

+    uint64_t bloom_filter_size_after = 0;
+
    for (auto& sstable : new_sstables) {
        auto new_sst = sstables::make_sstable(s, tmp->path, sstable->generation(),
            sstables::sstable::version_types::ka, sstables::sstable::format_types::big);
        new_sst->load().get();
+        filter_fname = sstables::test(new_sst).filename(sstable::component_type::Filter);
+        bloom_filter_size_after += file_size(filter_fname).get0();
        auto shards = new_sst->get_shards_for_this_sstable();
        BOOST_REQUIRE(shards.size() == 1); // check sstable is unshared.
        auto shard = shards.front();
        BOOST_REQUIRE(column_family_test::calculate_shard_from_sstable_generation(new_sst->generation()) == shard);

-        assert_that(new_sst->as_mutation_source().make_reader(s))
-            .produces(muts.at(shard))
-            .produces_end_of_stream();
+        auto rd = assert_that(new_sst->as_mutation_source().make_reader(s));
+        BOOST_REQUIRE(muts[shard].size() == keys_per_shard);
+        for (auto k : boost::irange(0u, keys_per_shard)) {
+            rd.produces(muts[shard][k]);
+        }
+        rd.produces_end_of_stream();
    }
+    BOOST_REQUIRE_CLOSE_FRACTION(float(bloom_filter_size_before), float(bloom_filter_size_after), 0.1);
 }

 SEASTAR_TEST_CASE(sstable_resharding_test) {
--- a/tests/sstable_test.hh
+++ b/tests/sstable_test.hh
@@ -201,6 +201,14 @@ public:
    future<> remove_component(sstable::component_type c) {
        return remove_file(_sst->filename(c));
    }
+
+    const sstring filename(sstable::component_type c) const {
+        return _sst->filename(c);
+    }
+
+    void set_shards(std::vector<unsigned> shards) {
+        _sst->_shards = std::move(shards);
+    }
 };

 inline future<sstable_ptr> reusable_sst(schema_ptr schema, sstring dir, unsigned long generation) {
--- a/transport/event_notifier.cc
+++ b/transport/event_notifier.cc
@@ -66,12 +66,12 @@ void cql_server::event_notifier::on_create_keyspace(const sstring& ks_name)
 {
    for (auto&& conn : _schema_change_listeners) {
        using namespace cql_transport;
-        with_gate(conn->_pending_requests_gate, [&] {
-            return conn->write_response(conn->make_schema_change_event(event::schema_change{
+        if (!conn->_pending_requests_gate.is_closed()) {
+            conn->write_response(conn->make_schema_change_event(event::schema_change{
                event::schema_change::change_type::CREATED,
                ks_name
            }));
-        });
+        };
    }
 }

@@ -79,14 +79,14 @@ void cql_server::event_notifier::on_create_column_family(const sstring& ks_name,
 {
    for (auto&& conn : _schema_change_listeners) {
        using namespace cql_transport;
-        with_gate(conn->_pending_requests_gate, [&] {
-            return conn->write_response(conn->make_schema_change_event(event::schema_change{
+        if (!conn->_pending_requests_gate.is_closed()) {
+            conn->write_response(conn->make_schema_change_event(event::schema_change{
                event::schema_change::change_type::CREATED,
                event::schema_change::target_type::TABLE,
                ks_name,
                cf_name
            }));
-        });
+        };
    }
 }

@@ -94,14 +94,14 @@ void cql_server::event_notifier::on_create_user_type(const sstring& ks_name, con
 {
    for (auto&& conn : _schema_change_listeners) {
        using namespace cql_transport;
-        with_gate(conn->_pending_requests_gate, [&] {
-            return conn->write_response(conn->make_schema_change_event(event::schema_change{
+        if (!conn->_pending_requests_gate.is_closed()) {
+            conn->write_response(conn->make_schema_change_event(event::schema_change{
                event::schema_change::change_type::CREATED,
                event::schema_change::target_type::TYPE,
                ks_name,
                type_name
            }));
-        });
+        };
    }
 }

@@ -124,12 +124,12 @@ void cql_server::event_notifier::on_update_keyspace(const sstring& ks_name)
 {
    for (auto&& conn : _schema_change_listeners) {
        using namespace cql_transport;
-        with_gate(conn->_pending_requests_gate, [&] {
-            return conn->write_response(conn->make_schema_change_event(event::schema_change{
+        if (!conn->_pending_requests_gate.is_closed()) {
+            conn->write_response(conn->make_schema_change_event(event::schema_change{
                event::schema_change::change_type::UPDATED,
                ks_name
            }));
-        });
+        };
    }
 }

@@ -137,14 +137,14 @@ void cql_server::event_notifier::on_update_column_family(const sstring& ks_name,
 {
    for (auto&& conn : _schema_change_listeners) {
        using namespace cql_transport;
-        with_gate(conn->_pending_requests_gate, [&] {
-            return conn->write_response(conn->make_schema_change_event(event::schema_change{
+        if (!conn->_pending_requests_gate.is_closed()) {
+            conn->write_response(conn->make_schema_change_event(event::schema_change{
                event::schema_change::change_type::UPDATED,
                event::schema_change::target_type::TABLE,
                ks_name,
                cf_name
            }));
-        });
+        };
    }
 }

@@ -152,14 +152,14 @@ void cql_server::event_notifier::on_update_user_type(const sstring& ks_name, con
 {
    for (auto&& conn : _schema_change_listeners) {
        using namespace cql_transport;
-        with_gate(conn->_pending_requests_gate, [&] {
-            return conn->write_response(conn->make_schema_change_event(event::schema_change{
+        if (!conn->_pending_requests_gate.is_closed()) {
+            conn->write_response(conn->make_schema_change_event(event::schema_change{
                event::schema_change::change_type::UPDATED,
                event::schema_change::target_type::TYPE,
                ks_name,
                type_name
            }));
-        });
+        };
    }
 }

@@ -182,12 +182,12 @@ void cql_server::event_notifier::on_drop_keyspace(const sstring& ks_name)
 {
    for (auto&& conn : _schema_change_listeners) {
        using namespace cql_transport;
-        with_gate(conn->_pending_requests_gate, [&] {
-            return conn->write_response(conn->make_schema_change_event(event::schema_change{
+        if (!conn->_pending_requests_gate.is_closed()) {
+            conn->write_response(conn->make_schema_change_event(event::schema_change{
                event::schema_change::change_type::DROPPED,
                ks_name
            }));
-        });
+        };
    }
 }

@@ -195,14 +195,14 @@ void cql_server::event_notifier::on_drop_column_family(const sstring& ks_name, c
 {
    for (auto&& conn : _schema_change_listeners) {
        using namespace cql_transport;
-        with_gate(conn->_pending_requests_gate, [&] {
-            return conn->write_response(conn->make_schema_change_event(event::schema_change{
+        if (!conn->_pending_requests_gate.is_closed()) {
+            conn->write_response(conn->make_schema_change_event(event::schema_change{
                event::schema_change::change_type::DROPPED,
                event::schema_change::target_type::TABLE,
                ks_name,
                cf_name
            }));
-        });
+        };
    }
 }

@@ -210,14 +210,14 @@ void cql_server::event_notifier::on_drop_user_type(const sstring& ks_name, const
 {
    for (auto&& conn : _schema_change_listeners) {
        using namespace cql_transport;
-        with_gate(conn->_pending_requests_gate, [&] {
-            return conn->write_response(conn->make_schema_change_event(event::schema_change{
+        if (!conn->_pending_requests_gate.is_closed()) {
+            conn->write_response(conn->make_schema_change_event(event::schema_change{
                event::schema_change::change_type::DROPPED,
                event::schema_change::target_type::TYPE,
                ks_name,
                type_name
            }));
-        });
+        };
    }
 }

@@ -240,9 +240,9 @@ void cql_server::event_notifier::on_join_cluster(const gms::inet_address& endpoi
 {
    for (auto&& conn : _topology_change_listeners) {
        using namespace cql_transport;
-        with_gate(conn->_pending_requests_gate, [&] {
-            return conn->write_response(conn->make_topology_change_event(event::topology_change::new_node(endpoint, conn->_server_addr.port)));
-        });
+        if (!conn->_pending_requests_gate.is_closed()) {
+            conn->write_response(conn->make_topology_change_event(event::topology_change::new_node(endpoint, conn->_server_addr.port)));
+        };
    }
 }

@@ -250,9 +250,9 @@ void cql_server::event_notifier::on_leave_cluster(const gms::inet_address& endpo
 {
    for (auto&& conn : _topology_change_listeners) {
        using namespace cql_transport;
-        with_gate(conn->_pending_requests_gate, [&] {
-            return conn->write_response(conn->make_topology_change_event(event::topology_change::removed_node(endpoint, conn->_server_addr.port)));
-        });
+        if (!conn->_pending_requests_gate.is_closed()) {
+            conn->write_response(conn->make_topology_change_event(event::topology_change::removed_node(endpoint, conn->_server_addr.port)));
+        };
    }
 }

@@ -260,9 +260,9 @@ void cql_server::event_notifier::on_move(const gms::inet_address& endpoint)
 {
    for (auto&& conn : _topology_change_listeners) {
        using namespace cql_transport;
-        with_gate(conn->_pending_requests_gate, [&] {
-            return conn->write_response(conn->make_topology_change_event(event::topology_change::moved_node(endpoint, conn->_server_addr.port)));
-        });
+        if (!conn->_pending_requests_gate.is_closed()) {
+            conn->write_response(conn->make_topology_change_event(event::topology_change::moved_node(endpoint, conn->_server_addr.port)));
+        };
    }
 }

@@ -273,9 +273,9 @@ void cql_server::event_notifier::on_up(const gms::inet_address& endpoint)
    if (!was_up) {
        for (auto&& conn : _status_change_listeners) {
            using namespace cql_transport;
-            with_gate(conn->_pending_requests_gate, [&] {
-                return conn->write_response(conn->make_status_change_event(event::status_change::node_up(endpoint, conn->_server_addr.port)));
-            });
+            if (!conn->_pending_requests_gate.is_closed()) {
+                conn->write_response(conn->make_status_change_event(event::status_change::node_up(endpoint, conn->_server_addr.port)));
+            };
        }
    }
 }
@@ -287,9 +287,9 @@ void cql_server::event_notifier::on_down(const gms::inet_address& endpoint)
    if (!was_down) {
        for (auto&& conn : _status_change_listeners) {
            using namespace cql_transport;
-            with_gate(conn->_pending_requests_gate, [&] {
-                return conn->write_response(conn->make_status_change_event(event::status_change::node_down(endpoint, conn->_server_addr.port)));
-            });
+            if (!conn->_pending_requests_gate.is_closed()) {
+                conn->write_response(conn->make_status_change_event(event::status_change::node_down(endpoint, conn->_server_addr.port)));
+            };
        }
    }
 }
--- a/transport/server.cc
+++ b/transport/server.cc
@@ -611,8 +611,8 @@ future<> cql_server::connection::process()
            return write_response(make_error(0, exceptions::exception_code::SERVER_ERROR, "unknown error", tracing::trace_state_ptr()));
        }
    }).finally([this] {
-        _server._notifier->unregister_connection(this);
        return _pending_requests_gate.close().then([this] {
+            _server._notifier->unregister_connection(this);
            return _ready_to_respond.finally([this] {
                return _write_buf.close();
            });
--- a/unimplemented.cc
+++ b/unimplemented.cc
@@ -61,7 +61,6 @@ std::ostream& operator<<(std::ostream& out, cause c) {
        case cause::API: return out << "API";
        case cause::SCHEMA_CHANGE: return out << "SCHEMA_CHANGE";
        case cause::MIXED_CF: return out << "MIXED_CF";
-        case cause::ROLES: return out << "ROLES";
    }
    abort();
 }
--- a/unimplemented.hh
+++ b/unimplemented.hh
@@ -56,7 +56,6 @@ enum class cause {
    STORAGE_SERVICE,
    SCHEMA_CHANGE,
    MIXED_CF,
-    ROLES,
 };

 [[noreturn]] void fail(cause what);
--- a/utils/class_registrator.hh
+++ b/utils/class_registrator.hh
@@ -164,7 +164,7 @@ class unqualified_name {
 public:
    // can be optimized with string_views etc.
    unqualified_name(const sstring& pkg_pfx, const sstring& name)
-        : _qname(name.compare(0, pkg_pfx.size(), pkg_pfx) == 0 ? name.substr(pkg_pfx.size() + 1) : name)
+        : _qname(name.compare(0, pkg_pfx.size(), pkg_pfx) == 0 ? name.substr(pkg_pfx.size()) : name)
    {}
    operator const sstring&() const {
        return _qname;
				`@@ -1 +0,0 @@`
				`options raid0 devices_discard_performance=Y`