release: prepare for 2.2.2 by hagitsegev

Update seastar submodule
* seastar 6f61d74...88cb58c (2): > reactor: disable nowait aio due to a kernel bug > configure.py: Enhance detection for gcc -fvisibility=hidden bug Fixes #3996.
2019-01-12 18:28:25 +02:00 · 2018-12-17 15:57:58 +02:00 · 2018-12-02 13:32:59 +02:00 · 2018-11-21 12:18:25 +02:00 · 2018-10-21 19:04:08 +03:00 · 2018-10-21 18:21:18 +03:00
111 changed files with 1574 additions and 5108 deletions
--- a/4
+++ b/4
@@ -1,6 +1,6 @@
 #!/bin/sh

-VERSION=2.2.rc2
+VERSION=2.2.2

 if test -f version
 then
@@ -14,7 +14,7 @@ else
 	# where counter starts at 1 and increments for successive versions.
 	# This ensures that the package manager will select your custom
 	# package over the standard release.
-	SCYLLA_BUILD=1.mv
+	SCYLLA_BUILD=0
 	SCYLLA_RELEASE=$SCYLLA_BUILD.$DATE.$GIT_COMMIT
 fi

--- a/api/api-doc/storage_service.json
+++ b/api/api-doc/storage_service.json
@@ -2129,41 +2129,6 @@
               ]
            }
         ]
-      },
-      {
-         "path":"/storage_service/view_build_statuses/{keyspace}/{view}",
-         "operations":[
-            {
-               "method":"GET",
-               "summary":"Gets the progress of a materialized view build",
-               "type":"array",
-               "items":{
-                  "type":"mapper"
-               },
-               "nickname":"view_build_statuses",
-               "produces":[
-                  "application/json"
-               ],
-               "parameters":[
-                  {
-                     "name":"keyspace",
-                     "description":"The keyspace",
-                     "required":true,
-                     "allowMultiple":false,
-                     "type":"string",
-                     "paramType":"path"
-                  },
-                  {
-                     "name":"view",
-                     "description":"View name",
-                     "required":true,
-                     "allowMultiple":false,
-                     "type":"string",
-                     "paramType":"path"
-                  }
-               ]
-            }
-         ]
      }
   ],
   "models":{
@@ -2228,11 +2193,11 @@
               "description":"The column family"
            },
            "total":{
-               "type":"int",
+               "type":"long",
               "description":"The total snapshot size"
            },
            "live":{
-               "type":"int",
+               "type":"long",
               "description":"The live snapshot size"
            }
         }
--- a/api/storage_service.cc
+++ b/api/storage_service.cc
@@ -852,15 +852,6 @@ void set_storage_service(http_context& ctx, routes& r) {
            return make_ready_future<json::json_return_type>(map_to_key_value(ownership, res));
        });
    });
-
-    ss::view_build_statuses.set(r, [&ctx] (std::unique_ptr<request> req) {
-        auto keyspace = validate_keyspace(ctx, req->param);
-        auto view = req->param["view"];
-        return service::get_local_storage_service().view_build_statuses(std::move(keyspace), std::move(view)).then([] (std::unordered_map<sstring, sstring> status) {
-            std::vector<storage_service_json::mapper> res;
-            return make_ready_future<json::json_return_type>(map_to_key_value(std::move(status), res));
-        });
-    });
 }

 }
--- a/auth/password_authenticator.cc
+++ b/auth/password_authenticator.cc
@@ -149,7 +149,9 @@ static sstring gensalt() {
    // blowfish 2011 fix, blowfish, sha512, sha256, md5
    for (sstring pfx : { "$2y$", "$2a$", "$6$", "$5$", "$1$" }) {
        salt = pfx + input;
-        if (crypt_r("fisk", salt.c_str(), &tlcrypt)) {
+        const char* e = crypt_r("fisk", salt.c_str(), &tlcrypt);
+
+        if (e && (e[0] != '*')) {
            prefix = pfx;
            return salt;
        }
--- a/backlog_controller.hh
+++ b/backlog_controller.hh
@@ -127,7 +127,7 @@ public:

 class compaction_controller : public backlog_controller {
 public:
-    static constexpr unsigned normalization_factor = 10;
+    static constexpr unsigned normalization_factor = 30;
    compaction_controller(seastar::scheduling_group sg, const ::io_priority_class& iop, float static_shares) : backlog_controller(sg, iop, static_shares) {}
    compaction_controller(seastar::scheduling_group sg, const ::io_priority_class& iop, std::chrono::milliseconds interval, std::function<float()> current_backlog)
        : backlog_controller(sg, iop, std::move(interval),
--- a/cache_flat_mutation_reader.hh
+++ b/cache_flat_mutation_reader.hh
@@ -60,6 +60,7 @@ class cache_flat_mutation_reader final : public flat_mutation_reader::impl {
        // - _next_row_in_range = _next.position() < _upper_bound
        // - _last_row points at a direct predecessor of the next row which is going to be read.
        //   Used for populating continuity.
+        // - _population_range_starts_before_all_rows is set accordingly
        reading_from_underlying,

        end_of_stream
@@ -86,6 +87,13 @@ class cache_flat_mutation_reader final : public flat_mutation_reader::impl {
    partition_snapshot_row_cursor _next_row;
    bool _next_row_in_range = false;

+    // True iff current population interval, since the previous clustering row, starts before all clustered rows.
+    // We cannot just look at _lower_bound, because emission of range tombstones changes _lower_bound and
+    // because we mark clustering intervals as continuous when consuming a clustering_row, it would prevent
+    // us from marking the interval as continuous.
+    // Valid when _state == reading_from_underlying.
+    bool _population_range_starts_before_all_rows;
+
    future<> do_fill_buffer(db::timeout_clock::time_point);
    void copy_from_cache_to_buffer();
    future<> process_static_row(db::timeout_clock::time_point);
@@ -226,6 +234,7 @@ inline
 future<> cache_flat_mutation_reader::do_fill_buffer(db::timeout_clock::time_point timeout) {
    if (_state == state::move_to_underlying) {
        _state = state::reading_from_underlying;
+        _population_range_starts_before_all_rows = _lower_bound.is_before_all_clustered_rows(*_schema);
        auto end = _next_row_in_range ? position_in_partition(_next_row.position())
                                      : position_in_partition(_upper_bound);
        return _read_context->fast_forward_to(position_range{_lower_bound, std::move(end)}, timeout).then([this, timeout] {
@@ -351,7 +360,7 @@ future<> cache_flat_mutation_reader::read_from_underlying(db::timeout_clock::tim

 inline
 bool cache_flat_mutation_reader::ensure_population_lower_bound() {
-    if (!_ck_ranges_curr->start()) {
+    if (_population_range_starts_before_all_rows) {
        return true;
    }
    if (!_last_row.refresh(*_snp)) {
@@ -406,6 +415,7 @@ inline
 void cache_flat_mutation_reader::maybe_add_to_cache(const clustering_row& cr) {
    if (!can_populate()) {
        _last_row = nullptr;
+        _population_range_starts_before_all_rows = false;
        _read_context->cache().on_mispopulate();
        return;
    }
@@ -439,6 +449,7 @@ void cache_flat_mutation_reader::maybe_add_to_cache(const clustering_row& cr) {
        with_allocator(standard_allocator(), [&] {
            _last_row = partition_snapshot_row_weakref(*_snp, it, true);
        });
+        _population_range_starts_before_all_rows = false;
    });
 }

--- a/configure.py
+++ b/configure.py
@@ -228,7 +228,6 @@ scylla_tests = [
    'tests/memory_footprint',
    'tests/perf/perf_sstable',
    'tests/cql_query_test',
-    'tests/secondary_index_test',
    'tests/storage_proxy_test',
    'tests/schema_change_test',
    'tests/mutation_reader_test',
@@ -274,8 +273,6 @@ scylla_tests = [
    'tests/input_stream_test',
    'tests/virtual_reader_test',
    'tests/view_schema_test',
-    'tests/view_build_test',
-    'tests/view_complex_test',
    'tests/counter_test',
    'tests/cell_locker_test',
    'tests/row_locker_test',
@@ -495,7 +492,6 @@ scylla_core = (['database.cc',
                 'cql3/variable_specifications.cc',
                 'db/consistency_level.cc',
                 'db/system_keyspace.cc',
-                 'db/system_distributed_keyspace.cc',
                 'db/schema_tables.cc',
                 'db/cql_type_parser.cc',
                 'db/legacy_schema_migrator.cc',
@@ -506,12 +502,12 @@ scylla_core = (['database.cc',
                 'db/config.cc',
                 'db/extensions.cc',
                 'db/heat_load_balance.cc',
+                 'db/index/secondary_index.cc',
                 'db/marshal/type_parser.cc',
                 'db/batchlog_manager.cc',
                 'db/view/view.cc',
                 'db/view/row_locking.cc',
                 'index/secondary_index_manager.cc',
-                 'index/secondary_index.cc',
                 'utils/UUID_gen.cc',
                 'utils/i_filter.cc',
                 'utils/bloom_filter.cc',
--- a/cql3/column_identifier.cc
+++ b/cql3/column_identifier.cc
@@ -22,7 +22,6 @@
 #include "cql3/column_identifier.hh"
 #include "exceptions/exceptions.hh"
 #include "cql3/selection/simple_selector.hh"
-#include "cql3/util.hh"

 #include <regex>

@@ -63,11 +62,14 @@ sstring column_identifier::to_string() const {
 }

 sstring column_identifier::to_cql_string() const {
-    return util::maybe_quote(_text);
-}
-
-sstring column_identifier::raw::to_cql_string() const {
-    return util::maybe_quote(_text);
+    static const std::regex unquoted_identifier_re("[a-z][a-z0-9_]*");
+    if (std::regex_match(_text.begin(), _text.end(), unquoted_identifier_re)) {
+        return _text;
+    }
+    static const std::regex double_quote_re("\"");
+    std::string result = _text;
+    std::regex_replace(result, double_quote_re, "\"\"");
+    return '"' + result + '"';
 }

 column_identifier::raw::raw(sstring raw_text, bool keep_case)
--- a/cql3/column_identifier.hh
+++ b/cql3/column_identifier.hh
@@ -123,7 +123,6 @@ public:
    bool operator!=(const raw& other) const;

    virtual sstring to_string() const;
-    sstring to_cql_string() const;

    friend std::hash<column_identifier::raw>;
    friend std::ostream& operator<<(std::ostream& out, const column_identifier::raw& id);
--- a/cql3/cql3_type.cc
+++ b/cql3/cql3_type.cc
@@ -395,15 +395,18 @@ operator<<(std::ostream& os, const cql3_type::raw& r) {

 namespace util {

-sstring maybe_quote(const sstring& identifier) {
-    static const std::regex unquoted_identifier_re("[a-z][a-z0-9_]*");
-    if (std::regex_match(identifier.begin(), identifier.end(), unquoted_identifier_re)) {
-        return identifier;
+sstring maybe_quote(const sstring& s) {
+    static const std::regex unquoted("\\w*");
+    static const std::regex double_quote("\"");
+
+    if (std::regex_match(s.begin(), s.end(), unquoted)) {
+        return s;
    }
-    static const std::regex double_quote_re("\"");
-    std::string result = identifier;
-    std::regex_replace(result, double_quote_re, "\"\"");
-    return '"' + result + '"';
+    std::ostringstream ss;
+    ss << "\"";
+    std::regex_replace(std::ostreambuf_iterator<char>(ss), s.begin(), s.end(), double_quote, "\"\"");
+    ss << "\"";
+    return ss.str();
 }

 }
--- a/cql3/error_collector.hh
+++ b/cql3/error_collector.hh
@@ -67,6 +67,12 @@ class error_collector : public error_listener<RecognizerType, ExceptionBaseType>
     */
    const sstring_view _query;

+    /**
+     * An empty bitset to be used as a workaround for AntLR null dereference
+     * bug.
+     */
+    static typename ExceptionBaseType::BitsetListType _empty_bit_list;
+
 public:

    /**
@@ -144,6 +150,14 @@ private:
            break;
        }
        default:
+            // AntLR Exception class has a bug of dereferencing a null
+            // pointer in the displayRecognitionError. The following
+            // if statement makes sure it will not be null before the
+            // call to that function (displayRecognitionError).
+            // bug reference: https://github.com/antlr/antlr3/issues/191
+            if (!ex->get_expectingSet()) {
+                ex->set_expectingSet(&_empty_bit_list);
+            }
            ex->displayRecognitionError(token_names, msg);
        }
        return msg.str();
@@ -345,4 +359,8 @@ private:
 #endif
 };

+template<typename RecognizerType, typename TokenType, typename ExceptionBaseType>
+typename ExceptionBaseType::BitsetListType
+error_collector<RecognizerType,TokenType,ExceptionBaseType>::_empty_bit_list = typename ExceptionBaseType::BitsetListType();
+
 }
--- a/cql3/query_options.cc
+++ b/cql3/query_options.cc
@@ -209,19 +209,18 @@ void query_options::prepare(const std::vector<::shared_ptr<column_specification>
    }

    auto& names = *_names;
-    std::vector<cql3::raw_value> ordered_values;
+    std::vector<cql3::raw_value_view> ordered_values;
    ordered_values.reserve(specs.size());
    for (auto&& spec : specs) {
        auto& spec_name = spec->name->text();
        for (size_t j = 0; j < names.size(); j++) {
            if (names[j] == spec_name) {
-                ordered_values.emplace_back(_values[j]);
+                ordered_values.emplace_back(_value_views[j]);
                break;
            }
        }
    }
-    _values = std::move(ordered_values);
-    fill_value_views();
+    _value_views = std::move(ordered_values);
 }

 void query_options::fill_value_views()
--- a/cql3/query_processor.cc
+++ b/cql3/query_processor.cc
@@ -172,30 +172,7 @@ query_processor::query_processor(distributed<service::storage_proxy>& proxy, dis
                    sm::make_gauge(
                            "prepared_cache_memory_footprint",
                            [this] { return _prepared_cache.memory_footprint(); },
-                            sm::description("Size (in bytes) of the prepared statements cache.")),
-
-                    sm::make_derive(
-                            "secondary_index_creates",
-                            _cql_stats.secondary_index_creates,
-                            sm::description("Counts a total number of CQL CREATE INDEX requests.")),
-
-                    sm::make_derive(
-                            "secondary_index_drops",
-                            _cql_stats.secondary_index_drops,
-                            sm::description("Counts a total number of CQL DROP INDEX requests.")),
-
-                    // secondary_index_reads total count is also included in all cql reads
-                    sm::make_derive(
-                            "secondary_index_reads",
-                            _cql_stats.secondary_index_reads,
-                            sm::description("Counts a total number of CQL read requests performed using secondary indexes.")),
-
-                    // secondary_index_rows_read total count is also included in all cql rows read
-                    sm::make_derive(
-                            "secondary_index_rows_read",
-                            _cql_stats.secondary_index_rows_read,
-                            sm::description("Counts a total number of rows read during CQL requests performed using secondary indexes."))
-            });
+                            sm::description("Size (in bytes) of the prepared statements cache."))});

    service::get_local_migration_manager().register_listener(_migration_subscriber.get());
 }
--- a/cql3/restrictions/single_column_primary_key_restrictions.hh
+++ b/cql3/restrictions/single_column_primary_key_restrictions.hh
@@ -64,15 +64,13 @@ class single_column_primary_key_restrictions : public primary_key_restrictions<V
    using bounds_range_type = typename primary_key_restrictions<ValueType>::bounds_range_type;
 private:
    schema_ptr _schema;
-    bool _allow_filtering;
    ::shared_ptr<single_column_restrictions> _restrictions;
    bool _slice;
    bool _contains;
    bool _in;
 public:
-    single_column_primary_key_restrictions(schema_ptr schema, bool allow_filtering)
+    single_column_primary_key_restrictions(schema_ptr schema)
        : _schema(schema)
-        , _allow_filtering(allow_filtering)
        , _restrictions(::make_shared<single_column_restrictions>(schema))
        , _slice(false)
        , _contains(false)
@@ -112,7 +110,7 @@ public:
    }

    void do_merge_with(::shared_ptr<single_column_restriction> restriction) {
-        if (!_restrictions->empty() && !_allow_filtering) {
+        if (!_restrictions->empty()) {
            auto last_column = *_restrictions->last_column();
            auto new_column = restriction->get_column_def();

--- a/cql3/restrictions/single_column_restriction.hh
+++ b/cql3/restrictions/single_column_restriction.hh
@@ -202,6 +202,14 @@ public:
                                 const query_options& options,
                                 gc_clock::time_point now) const override;

+    virtual std::vector<bytes_opt> values_raw(const query_options& options) const = 0;
+
+    virtual std::vector<bytes_opt> values(const query_options& options) const override {
+        std::vector<bytes_opt> ret = values_raw(options);
+        std::sort(ret.begin(),ret.end());
+        ret.erase(std::unique(ret.begin(),ret.end()),ret.end());
+        return ret;
+    }
 #if 0
    @Override
    protected final boolean isSupportedBy(SecondaryIndex index)
@@ -224,7 +232,7 @@ public:
        return abstract_restriction::term_uses_function(_values, ks_name, function_name);
    }

-    virtual std::vector<bytes_opt> values(const query_options& options) const override {
+    virtual std::vector<bytes_opt> values_raw(const query_options& options) const override {
        std::vector<bytes_opt> ret;
        for (auto&& v : _values) {
            ret.emplace_back(to_bytes_opt(v->bind_and_get(options)));
@@ -249,7 +257,7 @@ public:
        return false;
    }

-    virtual std::vector<bytes_opt> values(const query_options& options) const override {
+    virtual std::vector<bytes_opt> values_raw(const query_options& options) const override {
        auto&& lval = dynamic_pointer_cast<multi_item_terminal>(_marker->bind(options));
        if (!lval) {
            throw exceptions::invalid_request_exception("Invalid null value for IN restriction");
--- a/cql3/restrictions/statement_restrictions.cc
+++ b/cql3/restrictions/statement_restrictions.cc
@@ -41,17 +41,14 @@ using boost::adaptors::transformed;

 template<typename T>
 class statement_restrictions::initial_key_restrictions : public primary_key_restrictions<T> {
-    bool _allow_filtering;
 public:
-    initial_key_restrictions(bool allow_filtering)
-        : _allow_filtering(allow_filtering) {}
    using bounds_range_type = typename primary_key_restrictions<T>::bounds_range_type;

    ::shared_ptr<primary_key_restrictions<T>> do_merge_to(schema_ptr schema, ::shared_ptr<restriction> restriction) const {
        if (restriction->is_multi_column()) {
            throw std::runtime_error(sprint("%s not implemented", __PRETTY_FUNCTION__));
        }
-        return ::make_shared<single_column_primary_key_restrictions<T>>(schema, _allow_filtering)->merge_to(schema, restriction);
+        return ::make_shared<single_column_primary_key_restrictions<T>>(schema)->merge_to(schema, restriction);
    }
    ::shared_ptr<primary_key_restrictions<T>> merge_to(schema_ptr schema, ::shared_ptr<restriction> restriction) override {
        if (restriction->is_multi_column()) {
@@ -60,7 +57,7 @@ public:
        if (restriction->is_on_token()) {
            return static_pointer_cast<token_restriction>(restriction);
        }
-        return ::make_shared<single_column_primary_key_restrictions<T>>(schema, _allow_filtering)->merge_to(restriction);
+        return ::make_shared<single_column_primary_key_restrictions<T>>(schema)->merge_to(restriction);
    }
    void merge_with(::shared_ptr<restriction> restriction) override {
        throw exceptions::unsupported_operation_exception();
@@ -125,10 +122,9 @@ statement_restrictions::initial_key_restrictions<clustering_key_prefix>::merge_t
 }

 template<typename T>
-::shared_ptr<primary_key_restrictions<T>> statement_restrictions::get_initial_key_restrictions(bool allow_filtering) {
-    static thread_local ::shared_ptr<primary_key_restrictions<T>> initial_kr_true = ::make_shared<initial_key_restrictions<T>>(true);
-    static thread_local ::shared_ptr<primary_key_restrictions<T>> initial_kr_false = ::make_shared<initial_key_restrictions<T>>(false);
-    return allow_filtering ? initial_kr_true : initial_kr_false;
+::shared_ptr<primary_key_restrictions<T>> statement_restrictions::get_initial_key_restrictions() {
+    static thread_local ::shared_ptr<primary_key_restrictions<T>> initial_kr = ::make_shared<initial_key_restrictions<T>>();
+    return initial_kr;
 }

 std::vector<::shared_ptr<column_identifier>>
@@ -145,10 +141,10 @@ statement_restrictions::get_partition_key_unrestricted_components() const {
    return r;
 }

-statement_restrictions::statement_restrictions(schema_ptr schema, bool allow_filtering)
+statement_restrictions::statement_restrictions(schema_ptr schema)
    : _schema(schema)
-    , _partition_key_restrictions(get_initial_key_restrictions<partition_key>(allow_filtering))
-    , _clustering_columns_restrictions(get_initial_key_restrictions<clustering_key_prefix>(allow_filtering))
+    , _partition_key_restrictions(get_initial_key_restrictions<partition_key>())
+    , _clustering_columns_restrictions(get_initial_key_restrictions<clustering_key_prefix>())
    , _nonprimary_key_restrictions(::make_shared<single_column_restrictions>(schema))
 { }
 #if 0
@@ -166,9 +162,8 @@ statement_restrictions::statement_restrictions(database& db,
        ::shared_ptr<variable_specifications> bound_names,
        bool selects_only_static_columns,
        bool select_a_collection,
-        bool for_view,
-        bool allow_filtering)
-    : statement_restrictions(schema, allow_filtering)
+        bool for_view)
+    : statement_restrictions(schema)
 {
    /*
     * WHERE clause. For a given entity, rules are: - EQ relation conflicts with anything else (including a 2nd EQ)
@@ -332,17 +327,6 @@ void statement_restrictions::process_partition_key_restrictions(bool has_queriab
        _is_key_range = true;
        _uses_secondary_indexing = has_queriable_index;
    }
-    if (_partition_key_restrictions->is_slice() && !_partition_key_restrictions->is_on_token() && !for_view) {
-        // A SELECT query may not request a slice (range) of partition keys
-        // without using token(). This is because there is no way to do this
-        // query efficiently: mumur3 turns a contiguous range of partition
-        // keys into tokens all over the token space.
-        // However, in a SELECT statement used to define a materialized view,
-        // such a slice is fine - it is used to check whether individual
-        // partitions, match, and does not present a performance problem.
-        throw exceptions::invalid_request_exception(
-                "Only EQ and IN relation are supported on the partition key (unless you use the token() function)");
-    }
 }

 bool statement_restrictions::has_partition_key_unrestricted_components() const {
--- a/cql3/restrictions/statement_restrictions.hh
+++ b/cql3/restrictions/statement_restrictions.hh
@@ -67,7 +67,7 @@ private:
    class initial_key_restrictions;

    template<typename T>
-    static ::shared_ptr<primary_key_restrictions<T>> get_initial_key_restrictions(bool allow_filtering);
+    static ::shared_ptr<primary_key_restrictions<T>> get_initial_key_restrictions();

    /**
     * Restrictions on partitioning columns
@@ -108,7 +108,7 @@ public:
     * @param cfm the column family meta data
     * @return a new empty <code>StatementRestrictions</code>.
     */
-    statement_restrictions(schema_ptr schema, bool allow_filtering);
+    statement_restrictions(schema_ptr schema);

    statement_restrictions(database& db,
        schema_ptr schema,
@@ -117,8 +117,7 @@ public:
        ::shared_ptr<variable_specifications> bound_names,
        bool selects_only_static_columns,
        bool select_a_collection,
-        bool for_view = false,
-        bool allow_filtering = false);
+        bool for_view = false);
 private:
    void add_restriction(::shared_ptr<restriction> restriction);
    void add_single_column_restriction(::shared_ptr<single_column_restriction> restriction);
--- a/cql3/selection/selector.hh
+++ b/cql3/selection/selector.hh
@@ -105,9 +105,11 @@ public:
    virtual void reset() = 0;

    virtual assignment_testable::test_result test_assignment(database& db, const sstring& keyspace, ::shared_ptr<column_specification> receiver) override {
-        if (receiver->type == get_type()) {
+        auto t1 = receiver->type->underlying_type();
+        auto t2 = get_type()->underlying_type();
+        if (t1 == t2) {
            return assignment_testable::test_result::EXACT_MATCH;
-        } else if (receiver->type->is_value_compatible_with(*get_type())) {
+        } else if (t1->is_value_compatible_with(*t2)) {
            return assignment_testable::test_result::WEAKLY_ASSIGNABLE;
        } else {
            return assignment_testable::test_result::NOT_ASSIGNABLE;
--- a/cql3/single_column_relation.cc
+++ b/cql3/single_column_relation.cc
@@ -116,6 +116,18 @@ single_column_relation::to_receivers(schema_ptr schema, const column_definition&
            throw exceptions::invalid_request_exception(sprint(
                   "IN predicates on non-primary-key columns (%s) is not yet supported", column_def.name_as_text()));
        }
+    } else if (is_slice()) {
+        // Non EQ relation is not supported without token(), even if we have a 2ndary index (since even those
+        // are ordered by partitioner).
+        // Note: In theory we could allow it for 2ndary index queries with ALLOW FILTERING, but that would
+        // probably require some special casing
+        // Note bis: This is also why we don't bother handling the 'tuple' notation of #4851 for keys. If we
+        // lift the limitation for 2ndary
+        // index with filtering, we'll need to handle it though.
+        if (column_def.is_partition_key()) {
+            throw exceptions::invalid_request_exception(
+                "Only EQ and IN relation are supported on the partition key (unless you use the token() function)");
+        }
    }

    if (is_contains() && !receiver->type->is_collection()) {
--- a/cql3/single_column_relation.hh
+++ b/cql3/single_column_relation.hh
@@ -134,7 +134,7 @@ protected:
 #endif

    virtual sstring to_string() const override {
-        auto entity_as_string = _entity->to_cql_string();
+        auto entity_as_string = _entity->to_string();
        if (_map_key) {
            entity_as_string = sprint("%s[%s]", std::move(entity_as_string), _map_key->to_string());
        }
--- a/cql3/statements/alter_table_statement.cc
+++ b/cql3/statements/alter_table_statement.cc
@@ -247,11 +247,10 @@ future<shared_ptr<cql_transport::event::schema_change>> alter_table_statement::a
        cfm.with_column(column_name->name(), type, _is_static ? column_kind::static_column : column_kind::regular_column);

        // Adding a column to a table which has an include all view requires the column to be added to the view
-        // as well. If the view has a regular base column in its PK, then the column ID needs to be updated in
-        // view_info; for that, rebuild the schema.
+        // as well
        if (!_is_static) {
            for (auto&& view : cf.views()) {
-                if (view->view_info()->include_all_columns() || view->view_info()->base_non_pk_column_in_view_pk()) {
+                if (view->view_info()->include_all_columns()) {
                    schema_builder builder(view);
                    builder.with_column(column_name->name(), type);
                    view_updates.push_back(view_ptr(builder.build()));
@@ -306,10 +305,14 @@ future<shared_ptr<cql_transport::event::schema_change>> alter_table_statement::a
            }
        }

-        if (!cf.views().empty()) {
+        // If a column is dropped which is included in a view, we don't allow the drop to take place.
+        auto view_names = ::join(", ", cf.views()
+                   | boost::adaptors::filtered([&] (auto&& v) { return bool(v->get_column_definition(column_name->name())); })
+                   | boost::adaptors::transformed([] (auto&& v) { return v->cf_name(); }));
+        if (!view_names.empty()) {
            throw exceptions::invalid_request_exception(sprint(
-                    "Cannot drop column %s on base table %s.%s with materialized views",
-                    column_name, keyspace(), column_family()));
+                    "Cannot drop column %s, depended on by materialized views (%s.{%s})",
+                    column_name, keyspace(), view_names));
        }
        break;
    }
--- a/cql3/statements/create_index_statement.cc
+++ b/cql3/statements/create_index_statement.cc
@@ -252,7 +252,6 @@ create_index_statement::announce_migration(distributed<service::storage_proxy>&
                    sprint("Index %s is a duplicate of existing index %s", index.name(), existing_index.value().name()));
        }
    }
-    ++_cql_stats->secondary_index_creates;
    schema_builder builder{schema};
    builder.with_index(index);
    return service::get_local_migration_manager().announce_column_family_update(
@@ -268,7 +267,6 @@ create_index_statement::announce_migration(distributed<service::storage_proxy>&

 std::unique_ptr<cql3::statements::prepared_statement>
 create_index_statement::prepare(database& db, cql_stats& stats) {
-    _cql_stats = &stats;
    return std::make_unique<prepared_statement>(make_shared<create_index_statement>(*this));
 }

@@ -281,7 +279,7 @@ index_metadata create_index_statement::make_index_metadata(schema_ptr schema,
    index_options_map new_options = options;
    auto target_option = boost::algorithm::join(targets | boost::adaptors::transformed(
            [schema](const auto &target) -> sstring {
-                return target->as_string();
+                return target->as_cql_string(schema);
            }), ",");
    new_options.emplace(index_target::target_option_name, target_option);
    return index_metadata{name, new_options, kind};
--- a/cql3/statements/create_index_statement.hh
+++ b/cql3/statements/create_index_statement.hh
@@ -70,7 +70,7 @@ class create_index_statement : public schema_altering_statement {
    const std::vector<::shared_ptr<index_target::raw>> _raw_targets;
    const ::shared_ptr<index_prop_defs> _properties;
    const bool _if_not_exists;
-    cql_stats* _cql_stats = nullptr;
+

 public:
    create_index_statement(::shared_ptr<cf_name> name, ::shared_ptr<index_name> index_name,
--- a/cql3/statements/create_view_statement.cc
+++ b/cql3/statements/create_view_statement.cc
@@ -127,25 +127,22 @@ static bool validate_primary_key(
                "Cannot use Static column '%s' in PRIMARY KEY of materialized view", def->name_as_text()));
    }

-    bool new_non_pk_column = false;
    if (base_pk.find(def) == base_pk.end()) {
        if (has_non_pk_column) {
            throw exceptions::invalid_request_exception(sprint(
                    "Cannot include more than one non-primary key column '%s' in materialized view primary key", def->name_as_text()));
        }
-        new_non_pk_column = true;
+        return true;
    }

    // We don't need to include the "IS NOT NULL" filter on a non-composite partition key
    // because we will never allow a single partition key to be NULL
-    bool is_non_composite_partition_key = def->is_partition_key() &&
-            schema->partition_key_columns().size() == 1;
-    if (!is_non_composite_partition_key && !restrictions.is_restricted(def)) {
+    if (schema->partition_key_columns().size() > 1 && !restrictions.is_restricted(def)) {
        throw exceptions::invalid_request_exception(sprint(
                "Primary key column '%s' is required to be filtered by 'IS NOT NULL'", def->name_as_text()));
    }

-    return new_non_pk_column;
+    return false;
 }

 future<shared_ptr<cql_transport::event::schema_change>> create_view_statement::announce_migration(distributed<service::storage_proxy>& proxy, bool is_local_only) {
@@ -250,6 +247,13 @@ future<shared_ptr<cql_transport::event::schema_change>> create_view_statement::a
            boost::range::join(schema->partition_key_columns(), schema->clustering_key_columns())
            | boost::adaptors::transformed([](auto&& def) { return &def; }));

+    if (_partition_keys.empty()) {
+        throw exceptions::invalid_request_exception(sprint("Must select at least a column for a Materialized View"));
+    }
+    if (_clustering_keys.empty()) {
+        throw exceptions::invalid_request_exception(sprint("No columns are defined for Materialized View other than primary key"));
+    }
+
    // Validate the primary key clause, ensuring only one non-PK base column is used in the view's PK.
    bool has_non_pk_column = false;
    std::unordered_set<const column_definition*> target_primary_keys;
@@ -291,26 +295,18 @@ future<shared_ptr<cql_transport::event::schema_change>> create_view_statement::a
        bool def_in_target_pk = std::find(target_primary_keys.begin(), target_primary_keys.end(), &def) != target_primary_keys.end();
        if (included_def && !def_in_target_pk) {
            target_non_pk_columns.push_back(&def);
-        }
-        if (def.is_primary_key() && !def_in_target_pk) {
+        } else if (def.is_primary_key() && !def_in_target_pk) {
            missing_pk_columns.push_back(&def);
        }
    }

    if (!missing_pk_columns.empty()) {
-        auto column_names = ::join(", ", missing_pk_columns | boost::adaptors::transformed(std::mem_fn(&column_definition::name_as_text)));
+        auto column_names = ::join(", ", missing_pk_columns | boost::adaptors::transformed(std::mem_fn(&column_definition::name)));
        throw exceptions::invalid_request_exception(sprint(
                        "Cannot create Materialized View %s without primary key columns from base %s (%s)",
                        column_family(), _base_name->get_column_family(), column_names));
    }

-    if (_partition_keys.empty()) {
-        throw exceptions::invalid_request_exception(sprint("Must select at least a column for a Materialized View"));
-    }
-    if (_clustering_keys.empty()) {
-        throw exceptions::invalid_request_exception(sprint("No columns are defined for Materialized View other than primary key"));
-    }
-
    schema_builder builder{keyspace(), column_family()};
    auto add_columns = [this, &builder] (std::vector<const column_definition*>& defs, column_kind kind) mutable {
        for (auto* def : defs) {
--- a/cql3/statements/drop_index_statement.cc
+++ b/cql3/statements/drop_index_statement.cc
@@ -86,7 +86,6 @@ future<shared_ptr<cql_transport::event::schema_change>> drop_index_statement::an
    if (!cfm) {
        return make_ready_future<::shared_ptr<cql_transport::event::schema_change>>(nullptr);
    }
-    ++_cql_stats->secondary_index_drops;
    auto builder = schema_builder(cfm);
    builder.without_index(_index_name);
    return service::get_local_migration_manager().announce_column_family_update(builder.build(), false, {}, is_local_only).then([cfm] {
@@ -103,7 +102,6 @@ future<shared_ptr<cql_transport::event::schema_change>> drop_index_statement::an

 std::unique_ptr<cql3::statements::prepared_statement>
 drop_index_statement::prepare(database& db, cql_stats& stats) {
-    _cql_stats = &stats;
    return std::make_unique<prepared_statement>(make_shared<drop_index_statement>(*this));
 }

--- a/cql3/statements/drop_index_statement.hh
+++ b/cql3/statements/drop_index_statement.hh
@@ -56,7 +56,6 @@ namespace statements {
 class drop_index_statement : public schema_altering_statement {
    sstring _index_name;
    bool _if_exists;
-    cql_stats* _cql_stats = nullptr;
 public:
    drop_index_statement(::shared_ptr<index_name> index_name, bool if_exists);

@@ -75,4 +74,4 @@ private:

 }

-}
+}
--- a/cql3/statements/index_prop_defs.cc
+++ b/cql3/statements/index_prop_defs.cc
@@ -41,7 +41,7 @@

 #include <set>
 #include "index_prop_defs.hh"
-#include "index/secondary_index.hh"
+#include "db/index/secondary_index.hh"

 void cql3::statements::index_prop_defs::validate() {
    static std::set<sstring> keywords({ sstring(KW_OPTIONS) });
--- a/cql3/statements/index_target.cc
+++ b/cql3/statements/index_target.cc
@@ -41,7 +41,7 @@

 #include <stdexcept>
 #include "index_target.hh"
-#include "index/secondary_index.hh"
+#include "db/index/secondary_index.hh"

 namespace cql3 {

@@ -59,10 +59,6 @@ sstring index_target::as_cql_string(schema_ptr schema) const {
    return sprint("%s(%s)", to_sstring(type), column->to_cql_string());
 }

-sstring index_target::as_string() const {
-    return column->to_string();
-}
-
 index_target::target_type index_target::from_sstring(const sstring& s)
 {
    if (s == "keys") {
--- a/cql3/statements/index_target.hh
+++ b/cql3/statements/index_target.hh
@@ -43,6 +43,7 @@

 #include "core/shared_ptr.hh"
 #include "cql3/column_identifier.hh"
+#include "db/index/secondary_index.hh"

 namespace cql3 {

@@ -64,7 +65,6 @@ struct index_target {
    }

    sstring as_cql_string(schema_ptr schema) const;
-    sstring as_string() const;

    static sstring index_option(target_type type);
    static target_type from_column_definition(const column_definition& cd);
--- a/cql3/statements/select_statement.cc
+++ b/cql3/statements/select_statement.cc
@@ -276,14 +276,13 @@ select_statement::do_execute(distributed<service::storage_proxy>& proxy,
        return do_with(
                cql3::selection::result_set_builder(*_selection, now,
                        options.get_cql_serialization_format()),
-                [this, p, page_size, now](auto& builder) {
+                [p, page_size, now](auto& builder) {
                    return do_until([p] {return p->is_exhausted();},
                            [p, &builder, page_size, now] {
                                return p->fetch_page(builder, page_size, now);
                            }
-                    ).then([this, &builder] {
+                    ).then([&builder] {
                                auto rs = builder.build();
-                                update_stats_rows_read(rs->size());
                                auto msg = ::make_shared<cql_transport::messages::result_message::rows>(std::move(rs));
                                return make_ready_future<shared_ptr<cql_transport::messages::result_message>>(std::move(msg));
                            });
@@ -303,7 +302,6 @@ select_statement::do_execute(distributed<service::storage_proxy>& proxy,
                    rs->get_metadata().set_has_more_pages(p->state());
                }

-                update_stats_rows_read(rs->size());
                auto msg = ::make_shared<cql_transport::messages::result_message::rows>(std::move(rs));
                return make_ready_future<shared_ptr<cql_transport::messages::result_message>>(std::move(msg));
            });
@@ -322,10 +320,10 @@ select_statement::execute(distributed<service::storage_proxy>& proxy,
    // is no way to tell which of these rows belong to the query result before
    // doing post-query ordering.
    if (needs_post_query_ordering() && _limit) {
-        return do_with(std::forward<dht::partition_range_vector>(partition_ranges), [this, &proxy, &state, &options, cmd](auto& prs) {
+        return do_with(std::forward<dht::partition_range_vector>(partition_ranges), [this, &proxy, &state, &options, cmd](auto prs) {
            assert(cmd->partition_limit == query::max_partitions);
            query::result_merger merger(cmd->row_limit * prs.size(), query::max_partitions);
-            return map_reduce(prs.begin(), prs.end(), [this, &proxy, &state, &options, cmd] (auto& pr) {
+            return map_reduce(prs.begin(), prs.end(), [this, &proxy, &state, &options, cmd] (auto pr) {
                dht::partition_range_vector prange { pr };
                auto command = ::make_lw_shared<query::read_command>(*cmd);
                return proxy.local().query(_schema,
@@ -347,54 +345,6 @@ select_statement::execute(distributed<service::storage_proxy>& proxy,
    }
 }

-// Function for fetching the selected columns from a list of clustering rows.
-// It is currently used only in our Secondary Index implementation - ordinary
-// CQL SELECT statements do not have the syntax to request a list of rows.
-// FIXME: The current implementation is very inefficient - it requests each
-// row separately (and all in parallel). Even multiple rows from a single
-// partition are requested separately. This last case can be easily improved,
-// but to implement the general case (multiple rows from multiple partitions)
-// efficiently, we will need more support from other layers.
-// Note that currently we do not make any assumptions on the order of the keys
-// given to this function, for more efficient implementation with a large
-// list, we should probably require that the keys be ordered in token order
-// (see also issue #3423).
-future<shared_ptr<cql_transport::messages::result_message>>
-select_statement::execute(distributed<service::storage_proxy>& proxy,
-                          lw_shared_ptr<query::read_command> cmd,
-                          std::vector<primary_key>&& primary_keys,
-                          service::query_state& state,
-                          const query_options& options,
-                          gc_clock::time_point now)
-{
-    return do_with(std::move(primary_keys), [this, &proxy, &state, &options, cmd] (auto& keys) {
-        assert(cmd->partition_limit == query::max_partitions);
-        query::result_merger merger(cmd->row_limit, query::max_partitions);
-        // there is no point to produce rows beyond the first row_limit:
-        auto end = keys.size() <= cmd->row_limit ? keys.end() : keys.begin() + cmd->row_limit;
-        return map_reduce(keys.begin(), end, [this, &proxy, &state, &options, cmd] (auto& key) {
-            auto command = ::make_lw_shared<query::read_command>(*cmd);
-            // for each partition, read just one clustering row (TODO: can
-            // get all needed rows of one partition at once.)
-            command->slice._row_ranges.clear();
-            if (key.clustering) {
-                command->slice._row_ranges.push_back(query::clustering_range::make_singular(key.clustering));
-            }
-            return proxy.local().query(_schema,
-                    command,
-                    {dht::partition_range::make_singular(key.partition)},
-                    options.get_consistency(),
-                    state.get_trace_state()).then([] (foreign_ptr<lw_shared_ptr<query::result>>&& result, service::replicas_per_token_range) {
-                return std::move(result);
-            });
-        }, std::move(merger));
-    }).then([this, &options, now, cmd] (auto result) {
-        // note that cmd here still has the garbage clustering range in slice,
-        // but process_results() ignores this part of the slice setting.
-        return this->process_results(std::move(result), cmd, options, now);
-    });
-}
-
 future<::shared_ptr<cql_transport::messages::result_message>>
 select_statement::execute_internal(distributed<service::storage_proxy>& proxy,
                                   service::query_state& state,
@@ -415,10 +365,10 @@ select_statement::execute_internal(distributed<service::storage_proxy>& proxy,
    ++_stats.reads;

    if (needs_post_query_ordering() && _limit) {
-        return do_with(std::move(partition_ranges), [this, &proxy, &state, command] (auto& prs) {
+        return do_with(std::move(partition_ranges), [this, &proxy, &state, command] (auto prs) {
            assert(command->partition_limit == query::max_partitions);
            query::result_merger merger(command->row_limit * prs.size(), query::max_partitions);
-            return map_reduce(prs.begin(), prs.end(), [this, &proxy, &state, command] (auto& pr) {
+            return map_reduce(prs.begin(), prs.end(), [this, &proxy, &state, command] (auto pr) {
                dht::partition_range_vector prange { pr };
                auto cmd = ::make_lw_shared<query::read_command>(*command);
                return proxy.local().query(_schema, cmd, std::move(prange), db::consistency_level::ONE, state.get_trace_state(),
@@ -457,7 +407,6 @@ select_statement::process_results(foreign_ptr<lw_shared_ptr<query::result>> resu
        }
        rs->trim(cmd->row_limit);
    }
-    update_stats_rows_read(rs->size());
    return ::make_shared<cql_transport::messages::result_message::rows>(std::move(rs));
 }

@@ -549,47 +498,10 @@ indexed_table_select_statement::do_execute(distributed<service::storage_proxy>&
    auto now = gc_clock::now();

    ++_stats.reads;
-    ++_stats.secondary_index_reads;

    assert(_restrictions->uses_secondary_indexing());
-
-    // Secondary index search has two steps: 1. use the index table to find a
-    // list of primary keys matching the query. 2. read the rows matching
-    // these primary keys from the base table and return the selected columns.
-    // In "whole_partitions" case, we can do the above in whole partition
-    // granularity. "partition_slices" is similar, but we fetch the same
-    // clustering prefix (make_partition_slice()) from a list of partitions.
-    // In other cases we need to list, and retrieve, individual rows and
-    // not entire partitions. See issue #3405 for more details.
-    bool whole_partitions = false;
-    bool partition_slices = false;
-    if (_schema->clustering_key_size() == 0) {
-        // Obviously, if there are no clustering columns, then we can work at
-        // the granularity of whole partitions.
-        whole_partitions = true;
-    } else {
-        if (_index.depends_on(*(_schema->clustering_key_columns().begin()))) {
-            // Searching on the *first* clustering column means in each of
-            // matching partition, we can take the same contiguous clustering
-            // slice (clustering prefix).
-            partition_slices = true;
-        } else {
-            // Search on any partition column means that either all rows
-            // match or all don't, so we can work with whole partitions.
-            for (auto& cdef : _schema->partition_key_columns()) {
-                if (_index.depends_on(cdef)) {
-                    whole_partitions = true;
-                    break;
-                }
-            }
-        }
-    }
-
-    if (whole_partitions || partition_slices) {
-        // In this case, can use our normal query machinery, which retrieves
-        // entire partitions or the same slice for many partitions.
-        return find_index_partition_ranges(proxy, state, options).then([limit, now, &state, &options, &proxy, this] (dht::partition_range_vector partition_ranges) {
-            auto command = ::make_lw_shared<query::read_command>(
+    return find_index_partition_ranges(proxy, state, options).then([limit, now, &state, &options, &proxy, this] (dht::partition_range_vector partition_ranges) {
+        auto command = ::make_lw_shared<query::read_command>(
                _schema->id(),
                _schema->version(),
                make_partition_slice(options),
@@ -599,70 +511,35 @@ indexed_table_select_statement::do_execute(distributed<service::storage_proxy>&
                query::max_partitions,
                utils::UUID(),
                options.get_timestamp(state));
-            return this->execute(proxy, command, std::move(partition_ranges), state, options, now);
-        });
-    } else {
-        // In this case, we need to retrieve a list of rows (not entire
-        // partitions) and then retrieve those specific rows.
-        return find_index_clustering_rows(proxy, state, options).then([limit, now, &state, &options, &proxy, this] (std::vector<primary_key> primary_keys) {
-            auto command = ::make_lw_shared<query::read_command>(
-                _schema->id(),
-                _schema->version(),
-                // Note: the "clustering bounds" set in make_partition_slice()
-                // here is garbage, and will be overridden by execute() anyway
-                make_partition_slice(options),
-                limit,
-                now,
-                tracing::make_trace_info(state.get_trace_state()),
-                query::max_partitions,
-                utils::UUID(),
-                options.get_timestamp(state));
-            return this->execute(proxy, command, std::move(primary_keys), state, options, now);
-        });
-    }
+        return this->execute(proxy, command, std::move(partition_ranges), state, options, now);
+    });
 }

-// Utility function for getting the schema of the materialized view used for
-// the secondary index implementation.
-static schema_ptr
-get_index_schema(distributed<service::storage_proxy>& proxy,
-                const secondary_index::index& index,
-                const schema_ptr& schema,
-                tracing::trace_state_ptr& trace_state)
-{
-    const auto& im = index.metadata();
-    sstring index_table_name = im.name() + "_index";
-    tracing::add_table_name(trace_state, schema->ks_name(), index_table_name);
-    return proxy.local().get_db().local().find_schema(schema->ks_name(), index_table_name);
-}
-
-// Utility function for reading from the index view (get_index_view()))
-// the posting-list for a particular value of the indexed column.
-// Remember a secondary index can only be created on a single column.
-//static future<service::storage_proxy::coordinator_query_result>
-static future<foreign_ptr<lw_shared_ptr<query::result>>, std::unordered_map<nonwrapping_range<dht::token>, std::vector<utils::UUID>>>
-read_posting_list(distributed<service::storage_proxy>& proxy,
-                  schema_ptr view_schema,
-                  const std::vector<::shared_ptr<restrictions::restrictions>>& index_restrictions,
-                  const query_options& options,
-                  int32_t limit,
-                  service::query_state& state,
-                  gc_clock::time_point now)
+future<dht::partition_range_vector>
+indexed_table_select_statement::find_index_partition_ranges(distributed<service::storage_proxy>& proxy,
+                                             service::query_state& state,
+                                             const query_options& options)
 {
+    const auto& im = _index.metadata();
+    sstring index_table_name = sprint("%s_index", im.name());
+    tracing::add_table_name(state.get_trace_state(), keyspace(), index_table_name);
+    auto& db = proxy.local().get_db().local();
+    const auto& view = db.find_column_family(_schema->ks_name(), index_table_name);
    dht::partition_range_vector partition_ranges;
-    // FIXME: there should be only one index restriction for this index!
-    // Perhaps even one index restriction entirely (do we support
-    // intersection queries?).
-    for (const auto& restriction : index_restrictions) {
-        auto pk = partition_key::from_optional_exploded(*view_schema, restriction->values(options));
-        auto dk = dht::global_partitioner().decorate_key(*view_schema, pk);
+    for (const auto& restriction : _restrictions->index_restrictions()) {
+        auto pk = partition_key::from_optional_exploded(*view.schema(), restriction->values(options));
+        auto dk = dht::global_partitioner().decorate_key(*view.schema(), pk);
        auto range = dht::partition_range::make_singular(dk);
        partition_ranges.emplace_back(range);
    }
-    partition_slice_builder partition_slice_builder{*view_schema};
+
+    auto now = gc_clock::now();
+    int32_t limit = get_limit(options);
+
+    partition_slice_builder partition_slice_builder{*view.schema()};
    auto cmd = ::make_lw_shared<query::read_command>(
-            view_schema->id(),
-            view_schema->version(),
+            view.schema()->id(),
+            view.schema()->version(),
            partition_slice_builder.build(),
            limit,
            now,
@@ -670,111 +547,35 @@ read_posting_list(distributed<service::storage_proxy>& proxy,
            query::max_partitions,
            utils::UUID(),
            options.get_timestamp(state));
-    return proxy.local().query(view_schema,
+    return proxy.local().query(view.schema(),
            cmd,
            std::move(partition_ranges),
            options.get_consistency(),
-            state.get_trace_state());
-}
-
-// Note: the partitions keys returned by this function will be sorted in
-// lexicographical order of the partition key columns (in the way that
-// clustering keys are sorted) - NOT in token order. See issue #3423.
-future<dht::partition_range_vector>
-indexed_table_select_statement::find_index_partition_ranges(distributed<service::storage_proxy>& proxy,
-                                             service::query_state& state,
-                                             const query_options& options)
-{
-    schema_ptr view = get_index_schema(proxy, _index, _schema, state.get_trace_state());
-    auto now = gc_clock::now();
-    return read_posting_list(proxy, view, _restrictions->index_restrictions(), options, get_limit(options), state, now).then(
-            [this, now, &options, view] (foreign_ptr<lw_shared_ptr<query::result>> result, service::replicas_per_token_range) {
+            state.get_trace_state()).then([cmd, this, &options, now, &view] (foreign_ptr<lw_shared_ptr<query::result>> result,
+                service::replicas_per_token_range) {
        std::vector<const column_definition*> columns;
        for (const column_definition& cdef : _schema->partition_key_columns()) {
-            columns.emplace_back(view->get_column_definition(cdef.name()));
+            columns.emplace_back(view.schema()->get_column_definition(cdef.name()));
        }
-        auto selection = selection::selection::for_columns(view, columns);
+        auto selection = selection::selection::for_columns(view.schema(), columns);
        cql3::selection::result_set_builder builder(*selection, now, options.get_cql_serialization_format());
-        // FIXME: read_posting_list already asks to read primary keys only.
-        // why do we need to specify this again?
-        auto slice = partition_slice_builder(*view).build();
        query::result_view::consume(*result,
-                                    slice,
-                                    cql3::selection::result_set_builder::visitor(builder, *view, *selection));
+                                    cmd->slice,
+                                    cql3::selection::result_set_builder::visitor(builder, *view.schema(), *selection));
        auto rs = cql3::untyped_result_set(::make_shared<cql_transport::messages::result_message::rows>(std::move(builder.build())));
        dht::partition_range_vector partition_ranges;
-        partition_ranges.reserve(rs.size());
-        // We are reading the list of primary keys as rows of a single
-        // partition (in the index view), so they are sorted in
-        // lexicographical order (N.B. this is NOT token order!). We need
-        // to avoid outputting the same partition key twice, but luckily in
-        // the sorted order, these will be adjacent.
-        stdx::optional<dht::decorated_key> last_dk;
        for (size_t i = 0; i < rs.size(); i++) {
            const auto& row = rs.at(i);
-            std::vector<bytes> pk_columns;
            for (const auto& column : row.get_columns()) {
-                pk_columns.push_back(row.get_blob(column->name->to_string()));
+                auto blob = row.get_blob(column->name->to_cql_string());
+                auto pk = partition_key::from_exploded(*_schema, { blob });
+                auto dk = dht::global_partitioner().decorate_key(*_schema, pk);
+                auto range = dht::partition_range::make_singular(dk);
+                partition_ranges.emplace_back(range);
            }
-            auto pk = partition_key::from_exploded(*_schema, pk_columns);
-            auto dk = dht::global_partitioner().decorate_key(*_schema, pk);
-            if (last_dk && last_dk->equal(*_schema, dk)) {
-                // Another row of the same partition, no need to output the
-                // same partition key again.
-                continue;
-            }
-            last_dk = dk;
-            auto range = dht::partition_range::make_singular(dk);
-            partition_ranges.emplace_back(range);
        }
-        return partition_ranges;
-    });
-}
-
-
-// Note: the partitions keys returned by this function will be sorted in
-// lexicographical order of the partition key columns (in the way that
-// clustering keys are sorted) - NOT in token order. See issue #3423.
-future<std::vector<indexed_table_select_statement::primary_key>>
-indexed_table_select_statement::find_index_clustering_rows(distributed<service::storage_proxy>& proxy, service::query_state& state, const query_options& options)
-{
-    schema_ptr view = get_index_schema(proxy, _index, _schema, state.get_trace_state());
-    auto now = gc_clock::now();
-    return read_posting_list(proxy, view, _restrictions->index_restrictions(), options, get_limit(options), state, now).then(
-            [this, now, &options, view] (foreign_ptr<lw_shared_ptr<query::result>> result, service::replicas_per_token_range) {
-        std::vector<const column_definition*> columns;
-        for (const column_definition& cdef : _schema->partition_key_columns()) {
-            columns.emplace_back(view->get_column_definition(cdef.name()));
-        }
-        for (const column_definition& cdef : _schema->clustering_key_columns()) {
-            columns.emplace_back(view->get_column_definition(cdef.name()));
-        }
-        auto selection = selection::selection::for_columns(view, columns);
-        cql3::selection::result_set_builder builder(*selection, now, options.get_cql_serialization_format());
-        // FIXME: read_posting_list already asks to read primary keys only.
-        // why do we need to specify this again?
-        auto slice = partition_slice_builder(*view).build();
-        query::result_view::consume(*result,
-                                    slice,
-                                    cql3::selection::result_set_builder::visitor(builder, *view, *selection));
-        auto rs = cql3::untyped_result_set(::make_shared<cql_transport::messages::result_message::rows>(std::move(builder.build())));
-        std::vector<primary_key> primary_keys;
-        primary_keys.reserve(rs.size());
-        for (size_t i = 0; i < rs.size(); i++) {
-            const auto& row = rs.at(i);
-            auto pk_columns = _schema->partition_key_columns() | boost::adaptors::transformed([&] (auto& cdef) {
-                return row.get_blob(cdef.name_as_text());
-            });
-            auto pk = partition_key::from_range(pk_columns);
-            auto dk = dht::global_partitioner().decorate_key(*_schema, pk);
-            auto ck_columns = _schema->clustering_key_columns() | boost::adaptors::transformed([&] (auto& cdef) {
-                return row.get_blob(cdef.name_as_text());
-            });
-            auto ck = clustering_key::from_range(ck_columns);
-            primary_keys.emplace_back(primary_key{std::move(dk), std::move(ck)});
-        }
-        return primary_keys;
-    });
+        return make_ready_future<dht::partition_range_vector>(partition_ranges);
+    }).finally([cmd] {});
 }

 namespace raw {
@@ -856,10 +657,8 @@ select_statement::prepare_restrictions(database& db,
                                       bool for_view)
 {
    try {
-        // FIXME: this method should take a separate allow_filtering parameter
-        // and pass it on. Currently we pass "for_view" as allow_filtering.
        return ::make_shared<restrictions::statement_restrictions>(db, schema, statement_type::SELECT, std::move(_where_clause), bound_names,
-            selection->contains_only_static_columns(), selection->contains_a_collection(), for_view, for_view);
+            selection->contains_only_static_columns(), selection->contains_a_collection(), for_view);
    } catch (const exceptions::unrecognized_entity_exception& e) {
        if (contains_alias(e.entity)) {
            throw exceptions::invalid_request_exception(sprint("Aliases aren't allowed in the where clause ('%s')", e.relation->to_string()));
@@ -1060,22 +859,15 @@ namespace util {
 shared_ptr<cql3::statements::raw::select_statement> build_select_statement(
            const sstring_view& cf_name,
            const sstring_view& where_clause,
-            bool select_all_columns,
-            const std::vector<column_definition>& selected_columns) {
+            std::vector<sstring_view> included_columns) {
    std::ostringstream out;
    out << "SELECT ";
-    if (select_all_columns) {
+    if (included_columns.empty()) {
        out << "*";
    } else {
-        // If the column name is not entirely lowercase (or digits or _),
-        // when output to CQL it must be quoted to preserve case as well
-        // as non alphanumeric characters.
-        auto cols = boost::copy_range<std::vector<sstring>>(selected_columns
-                | boost::adaptors::transformed(std::mem_fn(&column_definition::name_as_cql_string)));
-        out << join(", ", cols);
+        out << join(", ", included_columns);
    }
-    // Note that cf_name may need to be quoted, just like column names above.
-    out << " FROM " << util::maybe_quote(cf_name.to_string()) << " WHERE " << where_clause << " ALLOW FILTERING";
+    out << " FROM " << cf_name << " WHERE " << where_clause << " ALLOW FILTERING";
    return do_with_parser(out.str(), std::mem_fn(&cql3_parser::CqlParser::selectStatement));
 }

--- a/cql3/statements/select_statement.hh
+++ b/cql3/statements/select_statement.hh
@@ -124,19 +124,6 @@ public:
        lw_shared_ptr<query::read_command> cmd, dht::partition_range_vector&& partition_ranges, service::query_state& state,
         const query_options& options, gc_clock::time_point now);

-    struct primary_key {
-        dht::decorated_key partition;
-        clustering_key_prefix clustering;
-    };
-
-    future<::shared_ptr<cql_transport::messages::result_message>> execute(
-            distributed<service::storage_proxy>& proxy,
-            lw_shared_ptr<query::read_command> cmd,
-            std::vector<primary_key>&& primary_keys,
-            service::query_state& state,
-            const query_options& options,
-            gc_clock::time_point now);
-
    shared_ptr<cql_transport::messages::result_message> process_results(foreign_ptr<lw_shared_ptr<query::result>> results,
        lw_shared_ptr<query::read_command> cmd, const query_options& options, gc_clock::time_point now);

@@ -151,9 +138,6 @@ public:
 protected:
    int32_t get_limit(const query_options& options) const;
    bool needs_post_query_ordering() const;
-    virtual void update_stats_rows_read(int64_t rows_read) {
-        _stats.rows_read += rows_read;
-    }
 };

 class primary_key_select_statement : public select_statement {
@@ -205,15 +189,6 @@ private:
    future<dht::partition_range_vector> find_index_partition_ranges(distributed<service::storage_proxy>& proxy,
                                                                    service::query_state& state,
                                                                    const query_options& options);
-
-    future<std::vector<primary_key>> find_index_clustering_rows(distributed<service::storage_proxy>& proxy,
-                                                                service::query_state& state,
-                                                                const query_options& options);
-
-    virtual void update_stats_rows_read(int64_t rows_read) override {
-        _stats.rows_read += rows_read;
-        _stats.secondary_index_rows_read += rows_read;
-    }
 };

 }
--- a/cql3/stats.hh
+++ b/cql3/stats.hh
@@ -35,12 +35,6 @@ struct cql_stats {
    uint64_t batches_pure_logged = 0;
    uint64_t batches_pure_unlogged = 0;
    uint64_t batches_unlogged_from_logged = 0;
-    uint64_t rows_read = 0;
-
-    int64_t secondary_index_creates = 0;
-    int64_t secondary_index_drops = 0;
-    int64_t secondary_index_reads = 0;
-    int64_t secondary_index_rows_read = 0;
 };

 }
--- a/cql3/untyped_result_set.hh
+++ b/cql3/untyped_result_set.hh
@@ -142,7 +142,6 @@ public:
    using row = untyped_result_set_row;
    typedef std::vector<row> rows_type;
    using const_iterator = rows_type::const_iterator;
-    using iterator = rows_type::const_iterator;

    untyped_result_set(::shared_ptr<cql_transport::messages::result_message>);
    untyped_result_set(untyped_result_set&&) = default;
--- a/cql3/update_parameters.cc
+++ b/cql3/update_parameters.cc
@@ -53,6 +53,9 @@ update_parameters::get_prefetched_list(
        return {};
    }

+    if (column.is_static()) {
+        ckey = clustering_key_view::make_empty();
+    }
    auto i = _prefetched->rows.find(std::make_pair(std::move(pkey), std::move(ckey)));
    if (i == _prefetched->rows.end()) {
        return {};
--- a/cql3/util.hh
+++ b/cql3/util.hh
@@ -72,23 +72,11 @@ inline sstring rename_column_in_where_clause(const sstring_view& where_clause, c
    return relations_to_where_clause(std::move(new_relations));
 }

-/// build a CQL "select" statement with the desired parameters.
-/// If select_all_columns==true, all columns are selected and the value of
-/// selected_columns is ignored.
 shared_ptr<cql3::statements::raw::select_statement> build_select_statement(
        const sstring_view& cf_name,
        const sstring_view& where_clause,
-        bool select_all_columns,
-        const std::vector<column_definition>& selected_columns);
+        std::vector<sstring_view> included_columns);

-/// maybe_quote() takes an identifier - the name of a column, table or
-/// keyspace name - and transforms it to a string which can be used in CQL
-/// commands. Namely, if the identifier is not entirely lower-case (including
-/// digits and underscores), it needs to be quoted to be represented in CQL.
-/// Without this quoting, CQL folds uppercase letters to lower case, and
-/// forbids non-alpha-numeric characters in identifier names.
-/// Quoting involves wrapping the string in double-quotes ("). A double-quote
-/// character itself is quoted by doubling it.
 sstring maybe_quote(const sstring& s);

 } // namespace util
--- a/database.cc
+++ b/database.cc
@@ -361,9 +361,13 @@ filter_sstable_for_reader(std::vector<sstables::shared_sstable>&& sstables, colu
    };
    sstables.erase(boost::remove_if(sstables, sstable_has_not_key), sstables.end());

+    // FIXME: Workaround for https://github.com/scylladb/scylla/issues/3552
+    // and https://github.com/scylladb/scylla/issues/3553
+    const bool filtering_broken = true;
+
    // no clustering filtering is applied if schema defines no clustering key or
    // compaction strategy thinks it will not benefit from such an optimization.
-    if (!schema->clustering_key_size() || !cf.get_compaction_strategy().use_clustering_key_filter()) {
+    if (filtering_broken || !schema->clustering_key_size() || !cf.get_compaction_strategy().use_clustering_key_filter()) {
         return sstables;
    }
    ::cf_stats* stats = cf.cf_stats();
@@ -1247,17 +1251,6 @@ void column_family::set_metrics() {
                ms::make_gauge("live_sstable", ms::description("Live sstable count"), _stats.live_sstable_count)(cf)(ks),
                ms::make_gauge("pending_compaction", ms::description("Estimated number of compactions pending for this column family"), _stats.pending_compactions)(cf)(ks)
        });
-
-        // View metrics are created only for base tables, so there's no point in adding them to views (which cannot act as base tables for other views)
-        if (!_schema->is_view()) {
-            _metrics.add_group("column_family", {
-                    ms::make_total_operations("view_updates_pushed_remote", _view_stats.view_updates_pushed_remote, ms::description("Number of updates (mutations) pushed to remote view replicas"))(cf)(ks),
-                    ms::make_total_operations("view_updates_failed_remote", _view_stats.view_updates_failed_remote, ms::description("Number of updates (mutations) that failed to be pushed to remote view replicas"))(cf)(ks),
-                    ms::make_total_operations("view_updates_pushed_local", _view_stats.view_updates_pushed_local, ms::description("Number of updates (mutations) pushed to local view replicas"))(cf)(ks),
-                    ms::make_total_operations("view_updates_failed_local", _view_stats.view_updates_failed_local, ms::description("Number of updates (mutations) that failed to be pushed to local view replicas"))(cf)(ks),
-            });
-        }
-
        if (_schema->ks_name() != db::system_keyspace::NAME && _schema->ks_name() != db::schema_tables::v3::NAME && _schema->ks_name() != "system_traces") {
            _metrics.add_group("column_family", {
                    ms::make_histogram("read_latency", ms::description("Read latency histogram"), [this] {return _stats.estimated_read.get_histogram(std::chrono::microseconds(100));})(cf)(ks),
@@ -1644,9 +1637,9 @@ future<> distributed_loader::open_sstable(distributed<database>& db, sstables::e
    // to distribute evenly the resource usage among all shards.

    return db.invoke_on(column_family::calculate_shard_from_sstable_generation(comps.generation),
-            [&db, comps = std::move(comps), func = std::move(func), pc] (database& local) {
+            [&db, comps = std::move(comps), func = std::move(func), &pc] (database& local) {

-        return with_semaphore(local.sstable_load_concurrency_sem(), 1, [&db, &local, comps = std::move(comps), func = std::move(func), pc] {
+        return with_semaphore(local.sstable_load_concurrency_sem(), 1, [&db, &local, comps = std::move(comps), func = std::move(func), &pc] {
            auto& cf = local.find_column_family(comps.ks, comps.cf);

            auto f = sstables::sstable::load_shared_components(cf.schema(), cf._config.datadir, comps.generation, comps.version, comps.format, pc);
@@ -1981,31 +1974,31 @@ future<> distributed_loader::populate_column_family(distributed<database>& db, s
    // case is still an invalid case, but it is way easier for us to treat it
    // by waiting for all files to be loaded, and then checking if we saw a
    // file during scan_dir, without its corresponding TOC.
-    enum class component_status {
+    enum class status {
        has_some_file,
        has_toc_file,
        has_temporary_toc_file,
    };

    struct sstable_descriptor {
-        component_status status;
-        sstables::sstable::version_types version;
-        sstables::sstable::format_types format;
+        std::experimental::optional<sstables::sstable::version_types> version;
+        std::experimental::optional<sstables::sstable::format_types> format;
    };

-    auto verifier = make_lw_shared<std::unordered_map<unsigned long, sstable_descriptor>>();
+    auto verifier = make_lw_shared<std::unordered_map<unsigned long, status>>();
+    auto descriptor = make_lw_shared<sstable_descriptor>();

-    return do_with(std::vector<future<>>(), [&db, sstdir = std::move(sstdir), verifier, ks, cf] (std::vector<future<>>& futures) {
-        return lister::scan_dir(sstdir, { directory_entry_type::regular }, [&db, verifier, &futures] (lister::path sstdir, directory_entry de) {
+    return do_with(std::vector<future<>>(), [&db, sstdir = std::move(sstdir), verifier, descriptor, ks, cf] (std::vector<future<>>& futures) {
+        return lister::scan_dir(sstdir, { directory_entry_type::regular }, [&db, verifier, descriptor, &futures] (lister::path sstdir, directory_entry de) {
            // FIXME: The secondary indexes are in this level, but with a directory type, (starting with ".")
-            auto f = distributed_loader::probe_file(db, sstdir.native(), de.name).then([verifier, sstdir, de] (auto entry) {
+            auto f = distributed_loader::probe_file(db, sstdir.native(), de.name).then([verifier, descriptor, sstdir, de] (auto entry) {
                if (entry.component == sstables::sstable::component_type::TemporaryStatistics) {
                    return remove_file(sstables::sstable::filename(sstdir.native(), entry.ks, entry.cf, entry.version, entry.generation,
                        entry.format, sstables::sstable::component_type::TemporaryStatistics));
                }

                if (verifier->count(entry.generation)) {
-                    if (verifier->at(entry.generation).status == component_status::has_toc_file) {
+                    if (verifier->at(entry.generation) == status::has_toc_file) {
                        lister::path file_path(sstdir / de.name.c_str());
                        if (entry.component == sstables::sstable::component_type::TOC) {
                            throw sstables::malformed_sstable_exception("Invalid State encountered. TOC file already processed", file_path.native());
@@ -2013,19 +2006,27 @@ future<> distributed_loader::populate_column_family(distributed<database>& db, s
                            throw sstables::malformed_sstable_exception("Invalid State encountered. Temporary TOC file found after TOC file was processed", file_path.native());
                        }
                    } else if (entry.component == sstables::sstable::component_type::TOC) {
-                        verifier->at(entry.generation).status = component_status::has_toc_file;
+                        verifier->at(entry.generation) = status::has_toc_file;
                    } else if (entry.component == sstables::sstable::component_type::TemporaryTOC) {
-                        verifier->at(entry.generation).status = component_status::has_temporary_toc_file;
+                        verifier->at(entry.generation) = status::has_temporary_toc_file;
                    }
                } else {
                    if (entry.component == sstables::sstable::component_type::TOC) {
-                        verifier->emplace(entry.generation, sstable_descriptor{component_status::has_toc_file, entry.version, entry.format});
+                        verifier->emplace(entry.generation, status::has_toc_file);
                    } else if (entry.component == sstables::sstable::component_type::TemporaryTOC) {
-                        verifier->emplace(entry.generation, sstable_descriptor{component_status::has_temporary_toc_file, entry.version, entry.format});
+                        verifier->emplace(entry.generation, status::has_temporary_toc_file);
                    } else {
-                        verifier->emplace(entry.generation, sstable_descriptor{component_status::has_some_file, entry.version, entry.format});
+                        verifier->emplace(entry.generation, status::has_some_file);
                    }
                }
+
+                // Retrieve both version and format used for this column family.
+                if (!descriptor->version) {
+                    descriptor->version = entry.version;
+                }
+                if (!descriptor->format) {
+                    descriptor->format = entry.format;
+                }
                return make_ready_future<>();
            });

@@ -2056,12 +2057,14 @@ future<> distributed_loader::populate_column_family(distributed<database>& db, s
                }
                return make_ready_future<>();
            });
-        }).then([verifier, sstdir, ks = std::move(ks), cf = std::move(cf)] {
-            return do_for_each(*verifier, [sstdir = std::move(sstdir), ks = std::move(ks), cf = std::move(cf), verifier] (auto v) {
-                if (v.second.status == component_status::has_temporary_toc_file) {
+        }).then([verifier, sstdir, descriptor, ks = std::move(ks), cf = std::move(cf)] {
+            return do_for_each(*verifier, [sstdir = std::move(sstdir), ks = std::move(ks), cf = std::move(cf), descriptor, verifier] (auto v) {
+                if (v.second == status::has_temporary_toc_file) {
                    unsigned long gen = v.first;
-                    sstables::sstable::version_types version = v.second.version;
-                    sstables::sstable::format_types format = v.second.format;
+                    assert(descriptor->version);
+                    sstables::sstable::version_types version = descriptor->version.value();
+                    assert(descriptor->format);
+                    sstables::sstable::format_types format = descriptor->format.value();

                    if (engine().cpu_id() != 0) {
                        dblog.debug("At directory: {}, partial SSTable with generation {} not relevant for this shard, ignoring", sstdir, v.first);
@@ -2069,7 +2072,7 @@ future<> distributed_loader::populate_column_family(distributed<database>& db, s
                    }
                    // shard 0 is the responsible for removing a partial sstable.
                    return sstables::sstable::remove_sstable_with_temp_toc(ks, cf, sstdir, gen, version, format);
-                } else if (v.second.status != component_status::has_toc_file) {
+                } else if (v.second != status::has_toc_file) {
                    throw sstables::malformed_sstable_exception(sprint("At directory: %s: no TOC found for SSTable with generation %d!. Refusing to boot", sstdir, v.first));
                }
                return make_ready_future<>();
@@ -2160,6 +2163,11 @@ database::database(const db::config& cfg, database_config dbcfg)
 void backlog_controller::adjust() {
    auto backlog = _current_backlog();

+    if (backlog >= _control_points.back().input) {
+        update_controller(_control_points.back().output);
+        return;
+    }
+
    // interpolate to find out which region we are. This run infrequently and there are a fixed
    // number of points so a simple loop will do.
    size_t idx = 1;
@@ -2676,7 +2684,6 @@ future<> database::drop_column_family(const sstring& ks_name, const sstring& cf_
    auto uuid = find_uuid(ks_name, cf_name);
    auto cf = _column_families.at(uuid);
    remove(*cf);
-    cf->clear_views();
    auto& ks = find_keyspace(ks_name);
    return truncate(ks, *cf, std::move(tsf), snapshot).finally([this, cf] {
        return cf->stop();
@@ -2810,6 +2817,7 @@ keyspace::make_column_family_config(const schema& s, const db::config& db_config
    cfg.enable_disk_writes = _config.enable_disk_writes;
    cfg.enable_commitlog = _config.enable_commitlog;
    cfg.enable_cache = _config.enable_cache;
+    cfg.compaction_enforce_min_threshold = _config.compaction_enforce_min_threshold;
    cfg.dirty_memory_manager = _config.dirty_memory_manager;
    cfg.streaming_dirty_memory_manager = _config.streaming_dirty_memory_manager;
    cfg.read_concurrency_semaphore = _config.read_concurrency_semaphore;
@@ -2928,11 +2936,6 @@ bool database::has_schema(const sstring& ks_name, const sstring& cf_name) const
    return _ks_cf_to_uuid.count(std::make_pair(ks_name, cf_name)) > 0;
 }

-std::vector<view_ptr> database::get_views() const {
-    return boost::copy_range<std::vector<view_ptr>>(get_non_system_column_families()
-            | boost::adaptors::filtered([] (auto& cf) { return cf->schema()->is_view(); })
-            | boost::adaptors::transformed([] (auto& cf) { return view_ptr(cf->schema()); }));
-}

 void database::create_in_memory_keyspace(const lw_shared_ptr<keyspace_metadata>& ksm) {
    keyspace ks(ksm, std::move(make_keyspace_config(*ksm)));
@@ -3278,7 +3281,7 @@ future<mutation> database::do_apply_counter_update(column_family& cf, const froz
        std::move(regular_columns), { }, { }, cql_serialization_format::internal(), query::max_rows);

    return do_with(std::move(slice), std::move(m), std::vector<locked_cell>(),
-                   [this, &cf, timeout, trace_state = std::move(trace_state), op = cf.write_in_progress()] (const query::partition_slice& slice, mutation& m, std::vector<locked_cell>& locks) mutable {
+                   [this, &cf, timeout, trace_state = std::move(trace_state)] (const query::partition_slice& slice, mutation& m, std::vector<locked_cell>& locks) mutable {
        tracing::trace(trace_state, "Acquiring counter locks");
        return cf.lock_counter_cells(m, timeout).then([&, m_schema = cf.schema(), trace_state = std::move(trace_state), timeout, this] (std::vector<locked_cell> lcs) mutable {
            locks = std::move(lcs);
@@ -3511,19 +3514,16 @@ future<> database::do_apply(schema_ptr s, const frozen_mutation& m, db::timeout_
        throw std::runtime_error(sprint("attempted to mutate using not synced schema of %s.%s, version=%s",
                                 s->ks_name(), s->cf_name(), s->version()));
    }
-
-    // Signal to view building code that a write is in progress,
-    // so it knows when new writes start being sent to a new view.
-    auto op = cf.write_in_progress();
    if (cf.views().empty()) {
-        return apply_with_commitlog(std::move(s), cf, std::move(uuid), m, timeout).finally([op = std::move(op)] { });
+        return apply_with_commitlog(std::move(s), cf, std::move(uuid), m, timeout);
    }
    future<row_locker::lock_holder> f = cf.push_view_replica_updates(s, m, timeout);
-    return f.then([this, s = std::move(s), uuid = std::move(uuid), &m, timeout, &cf, op = std::move(op)] (row_locker::lock_holder lock) mutable {
+    return f.then([this, s = std::move(s), uuid = std::move(uuid), &m, timeout] (row_locker::lock_holder lock) {
+        auto& cf = find_column_family(uuid);
        return apply_with_commitlog(std::move(s), cf, std::move(uuid), m, timeout).finally(
                // Hold the local lock on the base-table partition or row
                // taken before the read, until the update is done.
-                [lock = std::move(lock), op = std::move(op)] { });
+                [lock = std::move(lock)] { });
    });
 }

@@ -3587,6 +3587,7 @@ database::make_keyspace_config(const keyspace_metadata& ksm) {
        cfg.enable_commitlog = false;
        cfg.enable_cache = false;
    }
+    cfg.compaction_enforce_min_threshold = _cfg->compaction_enforce_min_threshold();
    cfg.dirty_memory_manager = &_dirty_memory_manager;
    cfg.streaming_dirty_memory_manager = &_streaming_dirty_memory_manager;
    cfg.read_concurrency_semaphore = &_read_concurrency_sem;
@@ -4293,12 +4294,10 @@ void column_family::set_schema(schema_ptr s) {

 static std::vector<view_ptr>::iterator find_view(std::vector<view_ptr>& views, const view_ptr& v) {
    return std::find_if(views.begin(), views.end(), [&v] (auto&& e) {
-        return e->id() == v->id();
+        return e->cf_name() == v->cf_name();
    });
 }
-
 void column_family::add_or_update_view(view_ptr v) {
-    v->view_info()->initialize_base_dependent_fields(*schema());
    auto existing = find_view(_views, v);
    if (existing != _views.end()) {
        *existing = std::move(v);
@@ -4314,10 +4313,6 @@ void column_family::remove_view(view_ptr v) {
    }
 }

-void column_family::clear_views() {
-    _views.clear();
-}
-
 const std::vector<view_ptr>& column_family::views() const {
    return _views;
 }
@@ -4353,8 +4348,8 @@ future<> column_family::generate_and_propagate_view_updates(const schema_ptr& ba
                        flat_mutation_reader_from_mutations({std::move(m)}),
                        std::move(existings)).then([this, timeout, base_token = std::move(base_token)] (auto&& updates) mutable {
        return seastar::get_units(*_config.view_update_concurrency_semaphore, 1, timeout).then(
-                [this, base_token = std::move(base_token), updates = std::move(updates)] (auto units) mutable {
-            db::view::mutate_MV(std::move(base_token), std::move(updates), _view_stats).handle_exception([units = std::move(units)] (auto ignored) { });
+                [base_token = std::move(base_token), updates = std::move(updates)] (auto units) mutable {
+            db::view::mutate_MV(std::move(base_token), std::move(updates)).handle_exception([units = std::move(units)] (auto ignored) { });
        });
    });
 }
@@ -4402,7 +4397,7 @@ future<row_locker::lock_holder> column_family::push_view_replica_updates(const s
        std::move(slice),
        std::move(m),
        [base, views = std::move(views), lock = std::move(lock), this, timeout] (auto& pk, auto& slice, auto& m) mutable {
-            auto reader = this->make_reader(
+            auto reader = this->as_mutation_source().make_reader(
                base,
                pk,
                slice,
@@ -4494,31 +4489,6 @@ column_family::local_base_lock(const schema_ptr& s, const dht::decorated_key& pk
    }
 }

-/**
- * Given some updates on the base table and assuming there are no pre-existing, overlapping updates,
- * generates the mutations to be applied to the base table's views, and sends them to the paired
- * view replicas. The future resolves when the updates have been acknowledged by the repicas, i.e.,
- * propagating the view updates to the view replicas happens synchronously.
- *
- * @param views the affected views which need to be updated.
- * @param base_token The token to use to match the base replica with the paired replicas.
- * @param reader the base table updates being applied, which all correspond to the base token.
- * @return a future that resolves when the updates have been acknowledged by the view replicas
- */
-future<> column_family::populate_views(
-        std::vector<view_ptr> views,
-        dht::token base_token,
-        flat_mutation_reader&& reader) {
-    auto& schema = reader.schema();
-    return db::view::generate_view_updates(
-            schema,
-            std::move(views),
-            std::move(reader),
-            { }).then([base_token = std::move(base_token), this] (auto&& updates) {
-        return db::view::mutate_MV(std::move(base_token), std::move(updates), _view_stats);
-    });
-}
-
 void column_family::set_hit_rate(gms::inet_address addr, cache_temperature rate) {
    auto& e = _cluster_cache_hit_rates[addr];
    e.rate = rate;
--- a/database.hh
+++ b/database.hh
@@ -297,6 +297,7 @@ public:
        bool enable_cache = true;
        bool enable_commitlog = true;
        bool enable_incremental_backups = false;
+        bool compaction_enforce_min_threshold = false;
        ::dirty_memory_manager* dirty_memory_manager = &default_dirty_memory_manager;
        ::dirty_memory_manager* streaming_dirty_memory_manager = &default_dirty_memory_manager;
        reader_concurrency_semaphore* read_concurrency_semaphore;
@@ -345,7 +346,6 @@ private:
    schema_ptr _schema;
    config _config;
    mutable stats _stats;
-    mutable db::view::stats _view_stats;

    uint64_t _failed_counter_applies_to_memtable = 0;

@@ -463,11 +463,6 @@ private:
    double _cached_percentile = -1;
    lowres_clock::time_point _percentile_cache_timestamp;
    std::chrono::milliseconds _percentile_cache_value;
-
-    // Phaser used to synchronize with in-progress writes. This is useful for code that,
-    // after some modification, needs to ensure that news writes will see it before
-    // it can proceed, such as the view building code.
-    utils::phased_barrier _pending_writes_phaser;
 private:
    void update_stats_for_new_sstable(uint64_t disk_space_used_by_sstable, const std::vector<unsigned>& shards_for_the_sstable) noexcept;
    // Adds new sstable to the set of sstables
@@ -741,6 +736,10 @@ public:
        _config.enable_incremental_backups = val;
    }

+    bool compaction_enforce_min_threshold() const {
+        return _config.compaction_enforce_min_threshold;
+    }
+
    const sstables::sstable_set& get_sstable_set() const;
    lw_shared_ptr<sstable_list> get_sstables() const;
    lw_shared_ptr<sstable_list> get_sstables_including_compacted_undeleted() const;
@@ -791,17 +790,8 @@ public:

    future<> run_with_compaction_disabled(std::function<future<> ()> func);

-    utils::phased_barrier::operation write_in_progress() {
-        return _pending_writes_phaser.start();
-    }
-
-    future<> await_pending_writes() {
-        return _pending_writes_phaser.advance_and_await();
-    }
-
    void add_or_update_view(view_ptr v);
    void remove_view(view_ptr v);
-    void clear_views();
    const std::vector<view_ptr>& views() const;
    future<row_locker::lock_holder> push_view_replica_updates(const schema_ptr& s, const frozen_mutation& fm, db::timeout_clock::time_point timeout) const;
    void add_coordinator_read_latency(utils::estimated_histogram::duration latency);
@@ -814,12 +804,6 @@ public:
    uint64_t large_partition_warning_threshold_bytes() const {
        return _config.large_partition_warning_threshold_bytes;
    }
-
-    future<> populate_views(
-            std::vector<view_ptr>,
-            dht::token base_token,
-            flat_mutation_reader&&);
-
 private:
    std::vector<view_ptr> affected_views(const schema_ptr& base, const mutation& update) const;
    future<> generate_and_propagate_view_updates(const schema_ptr& base,
@@ -1000,6 +984,7 @@ public:
        bool enable_disk_writes = true;
        bool enable_cache = true;
        bool enable_incremental_backups = false;
+        bool compaction_enforce_min_threshold = false;
        ::dirty_memory_manager* dirty_memory_manager = &default_dirty_memory_manager;
        ::dirty_memory_manager* streaming_dirty_memory_manager = &default_dirty_memory_manager;
        reader_concurrency_semaphore* read_concurrency_semaphore;
@@ -1299,8 +1284,6 @@ public:

    std::vector<lw_shared_ptr<column_family>> get_non_system_column_families() const;

-    std::vector<view_ptr> get_views() const;
-
    const std::unordered_map<std::pair<sstring, sstring>, utils::UUID, utils::tuple_hash>&
    get_column_families_mapping() const {
        return _ks_cf_to_uuid;
--- a/db/config.hh
+++ b/db/config.hh
@@ -125,6 +125,9 @@ public:
    val(compaction_static_shares, float, 0, Used, \
            "If set to higher than 0, ignore the controller's output and set the compaction shares statically. Do not set this unless you know what you are doing and suspect a problem in the controller. This option will be retired when the controller reaches more maturity" \
    )   \
+    val(compaction_enforce_min_threshold, bool, false, Used, \
+            "If set to true, enforce the min_threshold option for compactions strictly. If false (default), Scylla may decide to compact even if below min_threshold" \
+    )   \
    /* Initialization properties */             \
    /* The minimal properties needed for configuring a cluster. */  \
    val(cluster_name, sstring, "", Used,   \
@@ -735,7 +738,6 @@ public:
    val(enable_sstable_data_integrity_check, bool, false, Used, "Enable interposer which checks for integrity of every sstable write." \
        " Performance is affected to some extent as a result. Useful to help debugging problems that may arise at another layers.") \
    val(cpu_scheduler, bool, true, Used, "Enable cpu scheduling") \
-    val(view_building, bool, true, Used, "Enable view building; should only be set to false when the node is experience issues due to view building") \
    /* done! */

 #define _make_value_member(name, type, deflt, status, desc, ...)    \
--- a/db/hints/manager.cc
+++ b/db/hints/manager.cc
@@ -42,8 +42,10 @@ const std::string manager::FILENAME_PREFIX("HintsLog" + commitlog::descriptor::S
 const std::chrono::seconds manager::hint_file_write_timeout = std::chrono::seconds(2);
 const std::chrono::seconds manager::hints_flush_period = std::chrono::seconds(10);
 const std::chrono::seconds manager::space_watchdog::_watchdog_period = std::chrono::seconds(1);
+// TODO: remove this when we switch to C++17
+constexpr size_t manager::_max_hints_send_queue_length;

-size_t db::hints::resource_manager::max_shard_disk_space_size;
+size_t db::hints::manager::max_shard_disk_space_size;

 manager::manager(sstring hints_directory, std::vector<sstring> hinted_dcs, int64_t max_hint_window_ms, distributed<database>& db)
    : _hints_dir(boost::filesystem::path(hints_directory) / format("{:d}", engine().cpu_id()).c_str())
@@ -51,8 +53,8 @@ manager::manager(sstring hints_directory, std::vector<sstring> hinted_dcs, int64
    , _local_snitch_ptr(locator::i_endpoint_snitch::get_local_snitch_ptr())
    , _max_hint_window_us(max_hint_window_ms * 1000)
    , _local_db(db.local())
-    , _max_send_in_flight_memory(std::max(memory::stats().total_memory() / 10, resource_manager::max_hints_send_queue_length))
-    , _min_send_hint_budget(_max_send_in_flight_memory / resource_manager::max_hints_send_queue_length)
+    , _max_send_in_flight_memory(std::max(memory::stats().total_memory() / 10, _max_hints_send_queue_length))
+    , _min_send_hint_budget(_max_send_in_flight_memory / _max_hints_send_queue_length)
    , _send_limiter(_max_send_in_flight_memory)
    , _space_watchdog(*this)
 {
@@ -254,8 +256,8 @@ future<db::commitlog> manager::end_point_hints_manager::add_store() noexcept {
            commitlog::config cfg;

            cfg.commit_log_location = _hints_dir.c_str();
-            cfg.commitlog_segment_size_in_mb = resource_manager::hint_segment_size_in_mb;
-            cfg.commitlog_total_space_in_mb = resource_manager::max_hints_per_ep_size_mb;
+            cfg.commitlog_segment_size_in_mb = _hint_segment_size_in_mb;
+            cfg.commitlog_total_space_in_mb = _max_hints_per_ep_size_mb;
            cfg.fname_prefix = manager::FILENAME_PREFIX;

            return commitlog::create_commitlog(std::move(cfg)).then([this] (commitlog l) {
@@ -333,7 +335,7 @@ future<> manager::end_point_hints_manager::sender::do_send_one_mutation(mutation
        // to be generated as a result of hints sending.
        if (boost::range::find(natural_endpoints, end_point_key()) != natural_endpoints.end()) {
            manager_logger.trace("Sending directly to {}", end_point_key());
-            return _proxy.send_to_endpoint(std::move(m), end_point_key(), { }, write_type::SIMPLE);
+            return _proxy.send_to_endpoint(std::move(m), end_point_key(), write_type::SIMPLE);
        } else {
            manager_logger.trace("Endpoints set has changed and {} is no longer a replica. Mutating from scratch...", end_point_key());
            return _proxy.mutate({std::move(m)}, consistency_level::ALL, nullptr);
@@ -468,9 +470,9 @@ void manager::space_watchdog::on_timer() {
            }).then([this] {
                // Adjust the quota to take into account the space we guarantee to every end point manager
                size_t adjusted_quota = 0;
-                size_t delta = _shard_manager._ep_managers.size() * resource_manager::hint_segment_size_in_mb * 1024 * 1024;
-                if (resource_manager::max_shard_disk_space_size > delta) {
-                    adjusted_quota = resource_manager::max_shard_disk_space_size - delta;
+                size_t delta = _shard_manager._ep_managers.size() * _hint_segment_size_in_mb * 1024 * 1024;
+                if (max_shard_disk_space_size > delta) {
+                    adjusted_quota = max_shard_disk_space_size - delta;
                }

                bool can_hint = _total_size < adjusted_quota;
@@ -508,7 +510,7 @@ void manager::space_watchdog::on_timer() {
 bool manager::too_many_in_flight_hints_for(ep_key_type ep) const noexcept {
    // There is no need to check the DC here because if there is an in-flight hint for this end point then this means that
    // its DC has already been checked and found to be ok.
-    return _stats.size_of_hints_in_progress > resource_manager::max_size_of_hints_in_progress && !utils::fb_utilities::is_me(ep) && hints_in_progress_for(ep) > 0 && local_gossiper().get_endpoint_downtime(ep) <= _max_hint_window_us;
+    return _stats.size_of_hints_in_progress > _max_size_of_hints_in_progress && !utils::fb_utilities::is_me(ep) && hints_in_progress_for(ep) > 0 && local_gossiper().get_endpoint_downtime(ep) <= _max_hint_window_us;
 }

 bool manager::can_hint_for(ep_key_type ep) const noexcept {
@@ -525,7 +527,7 @@ bool manager::can_hint_for(ep_key_type ep) const noexcept {
    // hints is more than the maximum allowed value.
    //
    // In the worst case there's going to be (_max_size_of_hints_in_progress + N - 1) in-flight hints, where N is the total number Nodes in the cluster.
-    if (_stats.size_of_hints_in_progress > resource_manager::max_size_of_hints_in_progress && hints_in_progress_for(ep) > 0) {
+    if (_stats.size_of_hints_in_progress > _max_size_of_hints_in_progress && hints_in_progress_for(ep) > 0) {
        manager_logger.trace("size_of_hints_in_progress {} hints_in_progress_for({}) {}", _stats.size_of_hints_in_progress, ep, hints_in_progress_for(ep));
        return false;
    }
--- a/db/hints/manager.hh
+++ b/db/hints/manager.hh
@@ -34,7 +34,6 @@
 #include "gms/gossiper.hh"
 #include "db/commitlog/commitlog.hh"
 #include "utils/loading_shared_values.hh"
-#include "db/hints/resource_manager.hh"

 namespace db {
 namespace hints {
@@ -379,8 +378,13 @@ public:
    static const std::string FILENAME_PREFIX;
    static const std::chrono::seconds hints_flush_period;
    static const std::chrono::seconds hint_file_write_timeout;
+    static size_t max_shard_disk_space_size;

 private:
+    static constexpr uint64_t _max_size_of_hints_in_progress = 10 * 1024 * 1024; // 10MB
+    static constexpr size_t _hint_segment_size_in_mb = 32;
+    static constexpr size_t _max_hints_per_ep_size_mb = 128; // 4 files 32MB each
+    static constexpr size_t _max_hints_send_queue_length = 128;
    const boost::filesystem::path _hints_dir;

    node_to_hint_store_factory_type _store_factory;
--- a/db/hints/resource_manager.hh
+++ b/db/hints/resource_manager.hh
@@ -1,47 +0,0 @@
-/*
- * Copyright (C) 2018 ScyllaDB
- */
-
-/*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#pragma once
-
-#include <cstdint>
-#include <seastar/core/semaphore.hh>
-#include <seastar/core/gate.hh>
-#include <seastar/core/memory.hh>
-#include <seastar/core/future.hh>
-#include "seastarx.hh"
-#include <unordered_set>
-#include <boost/filesystem.hpp>
-#include <gms/inet_address.hh>
-
-namespace db {
-namespace hints {
-
-class resource_manager {
-public:
-    static constexpr uint64_t max_size_of_hints_in_progress = 10 * 1024 * 1024; // 10MB
-    static constexpr size_t hint_segment_size_in_mb = 32;
-    static constexpr size_t max_hints_per_ep_size_mb = 128; // 4 files 32MB each
-    static constexpr size_t max_hints_send_queue_length = 128;
-    static size_t max_shard_disk_space_size;
-};
-
-}
-}
--- a/db/index/secondary_index.cc
+++ b/db/index/secondary_index.cc
--- a/db/index/secondary_index.hh
+++ b/db/index/secondary_index.hh
@@ -0,0 +1,389 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (C) 2015 ScyllaDB
+ *
+ * Modified by ScyllaDB
+ */
+
+/*
+ * This file is part of Scylla.
+ *
+ * Scylla is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Scylla is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include "core/sstring.hh"
+#include "seastarx.hh"
+
+namespace db {
+namespace index {
+
+/**
+ * Abstract base class for different types of secondary indexes.
+ *
+ * Do not extend this directly, please pick from PerColumnSecondaryIndex or PerRowSecondaryIndex
+ */
+class secondary_index {
+public:
+    static const sstring custom_index_option_name;
+
+    /**
+     * The name of the option used to specify that the index is on the collection keys.
+     */
+    static const sstring index_keys_option_name;
+
+    /**
+     * The name of the option used to specify that the index is on the collection values.
+     */
+    static const sstring index_values_option_name;
+
+    /**
+     * The name of the option used to specify that the index is on the collection (map) entries.
+     */
+    static const sstring index_entries_option_name;
+
+#if 0 // TODO:
+
+    public static final AbstractType<?> keyComparator = StorageService.getPartitioner().preservesOrder()
+                                                      ? BytesType.instance
+                                                      : new LocalByPartionerType(StorageService.getPartitioner());
+
+    /**
+     * Base CF that has many indexes
+     */
+    protected ColumnFamilyStore baseCfs;
+
+
+    /**
+     * The column definitions which this index is responsible for
+     */
+    protected final Set<ColumnDefinition> columnDefs = Collections.newSetFromMap(new ConcurrentHashMap<ColumnDefinition,Boolean>());
+
+    /**
+     * Perform any initialization work
+     */
+    public abstract void init();
+
+    /**
+     * Reload an existing index following a change to its configuration,
+     * or that of the indexed column(s). Differs from init() in that we expect
+     * expect new resources (such as CFS for a KEYS index) to be created by
+     * init() but not here
+     */
+    public abstract void reload();
+
+    /**
+     * Validates the index_options passed in the ColumnDef
+     * @throws ConfigurationException
+     */
+    public abstract void validateOptions() throws ConfigurationException;
+
+    /**
+     * @return The name of the index
+     */
+    abstract public String getIndexName();
+
+    /**
+     * All internal 2ndary indexes will return "_internal_" for this. Custom
+     * 2ndary indexes will return their class name. This only matter for
+     * SecondaryIndexManager.groupByIndexType.
+     */
+    String indexTypeForGrouping()
+    {
+        // Our internal indexes overwrite this
+        return getClass().getCanonicalName();
+    }
+
+    /**
+     * Return the unique name for this index and column
+     * to be stored in the SystemKeyspace that tracks if each column is built
+     *
+     * @param columnName the name of the column
+     * @return the unique name
+     */
+    abstract public String getNameForSystemKeyspace(ByteBuffer columnName);
+
+    /**
+     * Checks if the index for specified column is fully built
+     *
+     * @param columnName the column
+     * @return true if the index is fully built
+     */
+    public boolean isIndexBuilt(ByteBuffer columnName)
+    {
+        return SystemKeyspace.isIndexBuilt(baseCfs.keyspace.getName(), getNameForSystemKeyspace(columnName));
+    }
+
+    public void setIndexBuilt()
+    {
+        for (ColumnDefinition columnDef : columnDefs)
+            SystemKeyspace.setIndexBuilt(baseCfs.keyspace.getName(), getNameForSystemKeyspace(columnDef.name.bytes));
+    }
+
+    public void setIndexRemoved()
+    {
+        for (ColumnDefinition columnDef : columnDefs)
+            SystemKeyspace.setIndexRemoved(baseCfs.keyspace.getName(), getNameForSystemKeyspace(columnDef.name.bytes));
+    }
+
+    /**
+     * Called at query time
+     * Creates a implementation specific searcher instance for this index type
+     * @param columns the list of columns which belong to this index type
+     * @return the secondary index search impl
+     */
+    protected abstract SecondaryIndexSearcher createSecondaryIndexSearcher(Set<ByteBuffer> columns);
+
+    /**
+     * Forces this indexes' in memory data to disk
+     */
+    public abstract void forceBlockingFlush();
+
+    /**
+     * Allow access to the underlying column family store if there is one
+     * @return the underlying column family store or null
+     */
+    public abstract ColumnFamilyStore getIndexCfs();
+
+
+    /**
+     * Delete all files and references to this index
+     * @param columnName the indexed column to remove
+     */
+    public abstract void removeIndex(ByteBuffer columnName);
+
+    /**
+     * Remove the index and unregisters this index's mbean if one exists
+     */
+    public abstract void invalidate();
+
+    /**
+     * Truncate all the data from the current index
+     *
+     * @param truncatedAt The truncation timestamp, all data before that timestamp should be rejected.
+     */
+    public abstract void truncateBlocking(long truncatedAt);
+
+    /**
+     * Builds the index using the data in the underlying CFS
+     * Blocks till it's complete
+     */
+    protected void buildIndexBlocking()
+    {
+        logger.info(String.format("Submitting index build of %s for data in %s",
+                getIndexName(), StringUtils.join(baseCfs.getSSTables(), ", ")));
+
+        try (Refs<SSTableReader> sstables = baseCfs.selectAndReference(ColumnFamilyStore.CANONICAL_SSTABLES).refs)
+        {
+            SecondaryIndexBuilder builder = new SecondaryIndexBuilder(baseCfs,
+                                                                      Collections.singleton(getIndexName()),
+                                                                      new ReducingKeyIterator(sstables));
+            Future<?> future = CompactionManager.instance.submitIndexBuild(builder);
+            FBUtilities.waitOnFuture(future);
+            forceBlockingFlush();
+            setIndexBuilt();
+        }
+        logger.info("Index build of {} complete", getIndexName());
+    }
+
+
+    /**
+     * Builds the index using the data in the underlying CF, non blocking
+     *
+     *
+     * @return A future object which the caller can block on (optional)
+     */
+    public Future<?> buildIndexAsync()
+    {
+        // if we're just linking in the index to indexedColumns on an already-built index post-restart, we're done
+        boolean allAreBuilt = true;
+        for (ColumnDefinition cdef : columnDefs)
+        {
+            if (!SystemKeyspace.isIndexBuilt(baseCfs.keyspace.getName(), getNameForSystemKeyspace(cdef.name.bytes)))
+            {
+                allAreBuilt = false;
+                break;
+            }
+        }
+
+        if (allAreBuilt)
+            return null;
+
+        // build it asynchronously; addIndex gets called by CFS open and schema update, neither of which
+        // we want to block for a long period.  (actual build is serialized on CompactionManager.)
+        Runnable runnable = new Runnable()
+        {
+            public void run()
+            {
+                baseCfs.forceBlockingFlush();
+                buildIndexBlocking();
+            }
+        };
+        FutureTask<?> f = new FutureTask<Object>(runnable, null);
+
+        new Thread(f, "Creating index: " + getIndexName()).start();
+        return f;
+    }
+
+    public ColumnFamilyStore getBaseCfs()
+    {
+        return baseCfs;
+    }
+
+    private void setBaseCfs(ColumnFamilyStore baseCfs)
+    {
+        this.baseCfs = baseCfs;
+    }
+
+    public Set<ColumnDefinition> getColumnDefs()
+    {
+        return columnDefs;
+    }
+
+    void addColumnDef(ColumnDefinition columnDef)
+    {
+       columnDefs.add(columnDef);
+    }
+
+    void removeColumnDef(ByteBuffer name)
+    {
+        Iterator<ColumnDefinition> it = columnDefs.iterator();
+        while (it.hasNext())
+        {
+            if (it.next().name.bytes.equals(name))
+                it.remove();
+        }
+    }
+
+    /** Returns true if the index supports lookups for the given operator, false otherwise. */
+    public boolean supportsOperator(Operator operator)
+    {
+        return operator == Operator.EQ;
+    }
+
+    /**
+     * Returns the decoratedKey for a column value. Assumes an index CFS is present.
+     * @param value column value
+     * @return decorated key
+     */
+    public DecoratedKey getIndexKeyFor(ByteBuffer value)
+    {
+        return getIndexCfs().partitioner.decorateKey(value);
+    }
+
+    /**
+     * Returns true if the provided cell name is indexed by this secondary index.
+     */
+    public abstract boolean indexes(CellName name);
+
+    /**
+     * This is the primary way to create a secondary index instance for a CF column.
+     * It will validate the index_options before initializing.
+     *
+     * @param baseCfs the source of data for the Index
+     * @param cdef the meta information about this column (index_type, index_options, name, etc...)
+     *
+     * @return The secondary index instance for this column
+     * @throws ConfigurationException
+     */
+    public static SecondaryIndex createInstance(ColumnFamilyStore baseCfs, ColumnDefinition cdef) throws ConfigurationException
+    {
+        SecondaryIndex index;
+
+        switch (cdef.getIndexType())
+        {
+        case KEYS:
+            index = new KeysIndex();
+            break;
+        case COMPOSITES:
+            index = CompositesIndex.create(cdef);
+            break;
+        case CUSTOM:
+            assert cdef.getIndexOptions() != null;
+            String class_name = cdef.getIndexOptions().get(CUSTOM_INDEX_OPTION_NAME);
+            assert class_name != null;
+            try
+            {
+                index = (SecondaryIndex) Class.forName(class_name).newInstance();
+            }
+            catch (Exception e)
+            {
+                throw new RuntimeException(e);
+            }
+            break;
+            default:
+                throw new RuntimeException("Unknown index type: " + cdef.getIndexName());
+        }
+
+        index.addColumnDef(cdef);
+        index.validateOptions();
+        index.setBaseCfs(baseCfs);
+
+        return index;
+    }
+
+    public abstract boolean validate(ByteBuffer rowKey, Cell cell);
+
+    public abstract long estimateResultRows();
+
+    /**
+     * Returns the index comparator for index backed by CFS, or null.
+     *
+     * Note: it would be cleaner to have this be a member method. However we need this when opening indexes
+     * sstables, but by then the CFS won't be fully initiated, so the SecondaryIndex object won't be accessible.
+     */
+    public static CellNameType getIndexComparator(CFMetaData baseMetadata, ColumnDefinition cdef)
+    {
+        switch (cdef.getIndexType())
+        {
+            case KEYS:
+                return new SimpleDenseCellNameType(keyComparator);
+            case COMPOSITES:
+                return CompositesIndex.getIndexComparator(baseMetadata, cdef);
+            case CUSTOM:
+                return null;
+        }
+        throw new AssertionError();
+    }
+
+    @Override
+    public String toString()
+    {
+        return Objects.toStringHelper(this).add("columnDefs", columnDefs).toString();
+    }
+
+#endif
+
+};
+
+}
+}
--- a/db/system_distributed_keyspace.cc
+++ b/db/system_distributed_keyspace.cc
@@ -1,143 +0,0 @@
-/*
- * Copyright (C) 2018 ScyllaDB
- */
-
-/*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include "db/system_distributed_keyspace.hh"
-
-#include "cql3/untyped_result_set.hh"
-#include "database.hh"
-#include "db/consistency_level_type.hh"
-#include "db/system_keyspace.hh"
-#include "schema_builder.hh"
-#include "types.hh"
-
-#include <seastar/core/reactor.hh>
-#include <seastar/core/shared_ptr.hh>
-
-#include <boost/range/adaptor/transformed.hpp>
-
-#include <optional>
-#include <vector>
-#include <experimental/optional>
-
-namespace db {
-
-schema_ptr view_build_status() {
-    static thread_local auto schema = [] {
-        auto id = generate_legacy_id(system_distributed_keyspace::NAME, system_distributed_keyspace::VIEW_BUILD_STATUS);
-        return schema_builder(system_distributed_keyspace::NAME, system_distributed_keyspace::VIEW_BUILD_STATUS, std::experimental::make_optional(id))
-                .with_column("keyspace_name", utf8_type, column_kind::partition_key)
-                .with_column("view_name", utf8_type, column_kind::partition_key)
-                .with_column("host_id", uuid_type, column_kind::clustering_key)
-                .with_column("status", utf8_type)
-                .with_version(system_keyspace::generate_schema_version(id))
-                .build();
-    }();
-    return schema;
-}
-
-static std::vector<schema_ptr> all_tables() {
-    return {
-        view_build_status(),
-    };
-}
-
-system_distributed_keyspace::system_distributed_keyspace(cql3::query_processor& qp, service::migration_manager& mm)
-        : _qp(qp)
-        , _mm(mm) {
-}
-
-future<> system_distributed_keyspace::start() {
-    if (engine().cpu_id() != 0) {
-        return make_ready_future<>();
-    }
-
-    static auto ignore_existing = [] (seastar::noncopyable_function<future<>()> func) {
-        return futurize_apply(std::move(func)).handle_exception_type([] (exceptions::already_exists_exception& ignored) { });
-    };
-
-    // We use min_timestamp so that the default keyspace metadata will lose with any manual adjustments.
-    // See issue #2129.
-    return ignore_existing([this] {
-        auto ksm = keyspace_metadata::new_keyspace(
-                NAME,
-                "org.apache.cassandra.locator.SimpleStrategy",
-                {{"replication_factor", "3"}},
-                true);
-        return _mm.announce_new_keyspace(ksm, api::min_timestamp, false);
-    }).then([this] {
-        return do_with(all_tables(), [this] (std::vector<schema_ptr>& tables) {
-            return do_for_each(tables, [this] (schema_ptr table) {
-                return ignore_existing([this, table = std::move(table)] {
-                    return _mm.announce_new_column_family(std::move(table), false);
-                });
-            });
-        });
-    });
-}
-
-future<> system_distributed_keyspace::stop() {
-    return make_ready_future<>();
-}
-
-future<std::unordered_map<utils::UUID, sstring>> system_distributed_keyspace::view_status(sstring ks_name, sstring view_name) const {
-    return _qp.process(
-            sprint("SELECT host_id, status FROM %s.%s WHERE keyspace_name = ? AND view_name = ?", NAME, VIEW_BUILD_STATUS),
-            db::consistency_level::ONE,
-            { std::move(ks_name), std::move(view_name) },
-            false).then([this] (::shared_ptr<cql3::untyped_result_set> cql_result) {
-        return boost::copy_range<std::unordered_map<utils::UUID, sstring>>(*cql_result
-                | boost::adaptors::transformed([] (const cql3::untyped_result_set::row& row) {
-                    auto host_id = row.get_as<utils::UUID>("host_id");
-                    auto status = row.get_as<sstring>("status");
-                    return std::pair(std::move(host_id), std::move(status));
-                }));
-    });
-}
-
-future<> system_distributed_keyspace::start_view_build(sstring ks_name, sstring view_name) const {
-    return db::system_keyspace::get_local_host_id().then([this, ks_name = std::move(ks_name), view_name = std::move(view_name)] (utils::UUID host_id) {
-        return _qp.process(
-                sprint("INSERT INTO %s.%s (keyspace_name, view_name, host_id, status) VALUES (?, ?, ?, ?)", NAME, VIEW_BUILD_STATUS),
-                db::consistency_level::ONE,
-                { std::move(ks_name), std::move(view_name), std::move(host_id), "STARTED" },
-                false).discard_result();
-    });
-}
-
-future<> system_distributed_keyspace::finish_view_build(sstring ks_name, sstring view_name) const {
-    return db::system_keyspace::get_local_host_id().then([this, ks_name = std::move(ks_name), view_name = std::move(view_name)] (utils::UUID host_id) {
-        return _qp.process(
-                sprint("UPDATE %s.%s SET status = ? WHERE keyspace_name = ? AND view_name = ? AND host_id = ?", NAME, VIEW_BUILD_STATUS),
-                db::consistency_level::ONE,
-                { "SUCCESS", std::move(ks_name), std::move(view_name), std::move(host_id) },
-                false).discard_result();
-    });
-}
-
-future<> system_distributed_keyspace::remove_view(sstring ks_name, sstring view_name) const {
-    return _qp.process(
-            sprint("DELETE FROM %s.%s WHERE keyspace_name = ? AND view_name = ?", NAME, VIEW_BUILD_STATUS),
-            db::consistency_level::ONE,
-            { std::move(ks_name), std::move(view_name) },
-            false).discard_result();
-}
-
-}
--- a/db/system_distributed_keyspace.hh
+++ b/db/system_distributed_keyspace.hh
@@ -1,58 +0,0 @@
-/*
- * Copyright (C) 2018 ScyllaDB
- */
-
-/*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#pragma once
-
-#include "bytes.hh"
-#include "cql3/query_processor.hh"
-#include "schema.hh"
-#include "service/migration_manager.hh"
-#include "utils/UUID.hh"
-
-#include <seastar/core/future.hh>
-#include <seastar/core/sstring.hh>
-
-#include <unordered_map>
-
-namespace db {
-
-class system_distributed_keyspace {
-public:
-    static constexpr auto NAME = "system_distributed";
-    static constexpr auto VIEW_BUILD_STATUS = "view_build_status";
-
-private:
-    cql3::query_processor& _qp;
-    service::migration_manager& _mm;
-
-public:
-    system_distributed_keyspace(cql3::query_processor&, service::migration_manager&);
-
-    future<> start();
-    future<> stop();
-
-    future<std::unordered_map<utils::UUID, sstring>> view_status(sstring ks_name, sstring view_name) const;
-    future<> start_view_build(sstring ks_name, sstring view_name) const;
-    future<> finish_view_build(sstring ks_name, sstring view_name) const;
-    future<> remove_view(sstring ks_name, sstring view_name) const;
-};
-
-}
--- a/db/system_keyspace.cc
+++ b/db/system_keyspace.cc
@@ -74,7 +74,6 @@
 #include "db/size_estimates_virtual_reader.hh"
 #include "db/timeout_clock.hh"
 #include "sstables/sstables.hh"
-#include "db/view/build_progress_virtual_reader.hh"
 #include "db/schema_tables.hh"

 using days = std::chrono::duration<int, std::ratio<24 * 3600>>;
@@ -643,22 +642,6 @@ schema_ptr built_views() {
    return schema;
 }

-schema_ptr scylla_views_builds_in_progress() {
-    static thread_local auto schema = [] {
-        auto id = generate_legacy_id(NAME, SCYLLA_VIEWS_BUILDS_IN_PROGRESS);
-        return schema_builder(NAME, SCYLLA_VIEWS_BUILDS_IN_PROGRESS, stdx::make_optional(id))
-                .with_column("keyspace_name", utf8_type, column_kind::partition_key)
-                .with_column("view_name", utf8_type, column_kind::clustering_key)
-                .with_column("cpu_id", int32_type, column_kind::clustering_key)
-                .with_column("next_token", utf8_type)
-                .with_column("generation_number", int32_type)
-                .with_column("first_token", utf8_type)
-                .with_version(generate_schema_version(id))
-                .build();
-    }();
-    return schema;
-}
-
 } //</v3>

 namespace legacy {
@@ -1558,8 +1541,7 @@ std::vector<schema_ptr> all_tables() {
    r.insert(r.end(), { built_indexes(), hints(), batchlog(), paxos(), local(),
                    peers(), peer_events(), range_xfers(),
                    compactions_in_progress(), compaction_history(),
-                    sstable_activity(), size_estimates(), v3::views_builds_in_progress(), v3::built_views(),
-                    v3::scylla_views_builds_in_progress(),
+                    sstable_activity(), size_estimates(),
    });
    // legacy schema
    r.insert(r.end(), {
@@ -1576,14 +1558,10 @@ static void maybe_add_virtual_reader(schema_ptr s, database& db) {
    if (s.get() == size_estimates().get()) {
        db.find_column_family(s).set_virtual_reader(mutation_source(db::size_estimates::virtual_reader()));
    }
-    if (s.get() == v3::views_builds_in_progress().get()) {
-        db.find_column_family(s).set_virtual_reader(mutation_source(db::view::build_progress_virtual_reader(db)));
-    }
 }

 static bool maybe_write_in_user_memory(schema_ptr s, database& db) {
-    return (s.get() == batchlog().get())
-            || s == v3::scylla_views_builds_in_progress();
+    return (s.get() == batchlog().get());
 }

 void make(database& db, bool durable, bool volatile_testing_only) {
@@ -1805,85 +1783,6 @@ mutation make_size_estimates_mutation(const sstring& ks, std::vector<range_estim
    return m_to_apply;
 }

-future<> register_view_for_building(sstring ks_name, sstring view_name, const dht::token& token) {
-    sstring req = sprint("INSERT INTO system.%s (keyspace_name, view_name, generation_number, cpu_id, first_token) VALUES (?, ?, ?, ?, ?)",
-            v3::SCYLLA_VIEWS_BUILDS_IN_PROGRESS);
-    return execute_cql(
-            std::move(req),
-            std::move(ks_name),
-            std::move(view_name),
-            0,
-            int32_t(engine().cpu_id()),
-            dht::global_partitioner().to_sstring(token)).discard_result();
-}
-
-future<> update_view_build_progress(sstring ks_name, sstring view_name, const dht::token& token) {
-    sstring req = sprint("INSERT INTO system.%s (keyspace_name, view_name, next_token, cpu_id) VALUES (?, ?, ?, ?)",
-            v3::SCYLLA_VIEWS_BUILDS_IN_PROGRESS);
-    return execute_cql(
-            std::move(req),
-            std::move(ks_name),
-            std::move(view_name),
-            dht::global_partitioner().to_sstring(token),
-            int32_t(engine().cpu_id())).discard_result();
-}
-
-future<> remove_view_build_progress_across_all_shards(sstring ks_name, sstring view_name) {
-    return execute_cql(
-            sprint("DELETE FROM system.%s WHERE keyspace_name = ? AND view_name = ?", v3::SCYLLA_VIEWS_BUILDS_IN_PROGRESS),
-            std::move(ks_name),
-            std::move(view_name)).discard_result();
-}
-
-future<> mark_view_as_built(sstring ks_name, sstring view_name) {
-    return execute_cql(
-            sprint("INSERT INTO system.%s (keyspace_name, view_name) VALUES (?, ?)", v3::BUILT_VIEWS),
-            std::move(ks_name),
-            std::move(view_name)).discard_result();
-}
-
-future<> remove_built_view(sstring ks_name, sstring view_name) {
-    return execute_cql(
-            sprint("DELETE FROM system.%s WHERE keyspace_name = ? AND view_name = ?", v3::BUILT_VIEWS),
-            std::move(ks_name),
-            std::move(view_name)).discard_result();
-}
-
-future<std::vector<view_name>> load_built_views() {
-    return execute_cql(sprint("SELECT * FROM system.%s", v3::BUILT_VIEWS)).then([] (::shared_ptr<cql3::untyped_result_set> cql_result) {
-        return boost::copy_range<std::vector<view_name>>(*cql_result
-                | boost::adaptors::transformed([] (const cql3::untyped_result_set::row& row) {
-            auto ks_name = row.get_as<sstring>("keyspace_name");
-            auto cf_name = row.get_as<sstring>("view_name");
-            return std::pair(std::move(ks_name), std::move(cf_name));
-        }));
-    });
-}
-
-future<std::vector<view_build_progress>> load_view_build_progress() {
-    return execute_cql(sprint("SELECT keyspace_name, view_name, first_token, next_token, cpu_id FROM system.%s",
-            v3::SCYLLA_VIEWS_BUILDS_IN_PROGRESS)).then([] (::shared_ptr<cql3::untyped_result_set> cql_result) {
-        std::vector<view_build_progress> progress;
-        for (auto& row : *cql_result) {
-            auto ks_name = row.get_as<sstring>("keyspace_name");
-            auto cf_name = row.get_as<sstring>("view_name");
-            auto first_token = dht::global_partitioner().from_sstring(row.get_as<sstring>("first_token"));
-            auto next_token_sstring = row.get_opt<sstring>("next_token");
-            std::optional<dht::token> next_token;
-            if (next_token_sstring) {
-                next_token = dht::global_partitioner().from_sstring(std::move(next_token_sstring).value());
-            }
-            auto cpu_id = row.get_as<int32_t>("cpu_id");
-            progress.emplace_back(view_build_progress{
-                    view_name(std::move(ks_name), std::move(cf_name)),
-                    std::move(first_token),
-                    std::move(next_token),
-                    static_cast<shard_id>(cpu_id)});
-        }
-        return progress;
-    });
-}
-
 } // namespace system_keyspace

 sstring system_keyspace_name() {
--- a/db/system_keyspace.hh
+++ b/db/system_keyspace.hh
@@ -40,10 +40,8 @@

 #pragma once

-#include <optional>
 #include <unordered_map>
 #include <utility>
-#include <vector>
 #include "schema.hh"
 #include "utils/UUID.hh"
 #include "gms/inet_address.hh"
@@ -101,7 +99,6 @@ static constexpr auto SIZE_ESTIMATES = "size_estimates";
 static constexpr auto AVAILABLE_RANGES = "available_ranges";
 static constexpr auto VIEWS_BUILDS_IN_PROGRESS = "views_builds_in_progress";
 static constexpr auto BUILT_VIEWS = "built_views";
-static constexpr auto SCYLLA_VIEWS_BUILDS_IN_PROGRESS = "scylla_views_builds_in_progress";
 }

 namespace legacy {
@@ -125,14 +122,6 @@ struct range_estimates {
    int64_t mean_partition_size;
 };

-using view_name = std::pair<sstring, sstring>;
-struct view_build_progress {
-    view_name view;
-    dht::token first_token;
-    std::optional<dht::token> next_token;
-    shard_id cpu_id;
-};
-
 extern schema_ptr hints();
 extern schema_ptr batchlog();
 extern schema_ptr built_indexes(); // TODO (from Cassandra): make private
@@ -662,13 +651,5 @@ future<> set_bootstrap_state(bootstrap_state state);
 */
 mutation make_size_estimates_mutation(const sstring& ks, std::vector<range_estimates> estimates);

-future<> register_view_for_building(sstring ks_name, sstring view_name, const dht::token& token);
-future<> update_view_build_progress(sstring ks_name, sstring view_name, const dht::token& token);
-future<> remove_view_build_progress_across_all_shards(sstring ks_name, sstring view_name);
-future<> mark_view_as_built(sstring ks_name, sstring view_name);
-future<> remove_built_view(sstring ks_name, sstring view_name);
-future<std::vector<view_name>> load_built_views();
-future<std::vector<view_build_progress>> load_view_build_progress();
-
 } // namespace system_keyspace
 } // namespace db
--- a/db/view/build_progress_virtual_reader.hh
+++ b/db/view/build_progress_virtual_reader.hh
@@ -1,195 +0,0 @@
-/*
- * Copyright (C) 2018 ScyllaDB
- */
-
-/*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include "database.hh"
-#include "db/system_keyspace.hh"
-#include "db/timeout_clock.hh"
-#include "dht/i_partitioner.hh"
-#include "flat_mutation_reader.hh"
-#include "mutation_fragment.hh"
-#include "mutation_reader.hh"
-#include "query-request.hh"
-#include "schema.hh"
-#include "tracing/tracing.hh"
-
-#include <boost/range/iterator_range.hpp>
-
-#include <iterator>
-#include <memory>
-
-namespace db::view {
-
-// Allows a user to query the views_builds_in_progress system table
-// in terms of the scylla_views_builds_in_progress one, which is
-// a superset of the former. When querying, we don't have to adjust
-// the clustering key, but we have to adjust the requested regular
-// columns. When reading the results from the scylla_views_builds_in_progress
-// table, we adjust the clustering key (we shed the cpu_id column) and map
-// back the regular columns.
-class build_progress_virtual_reader {
-    database& _db;
-
-    struct build_progress_reader : flat_mutation_reader::impl {
-        column_id _scylla_next_token_col;
-        column_id _scylla_generation_number_col;
-        column_id _legacy_last_token_col;
-        column_id _legacy_generation_number_col;
-        const query::partition_slice& _legacy_slice;
-        query::partition_slice _slice;
-        flat_mutation_reader _underlying;
-
-        build_progress_reader(
-                schema_ptr legacy_schema,
-                column_family& scylla_views_build_progress,
-                const dht::partition_range& range,
-                const query::partition_slice& slice,
-                const io_priority_class& pc,
-                tracing::trace_state_ptr trace_state,
-                streamed_mutation::forwarding fwd,
-                mutation_reader::forwarding fwd_mr)
-                : flat_mutation_reader::impl(std::move(legacy_schema))
-                , _scylla_next_token_col(scylla_views_build_progress.schema()->get_column_definition("next_token")->id)
-                , _scylla_generation_number_col(scylla_views_build_progress.schema()->get_column_definition("generation_number")->id)
-                , _legacy_last_token_col(_schema->get_column_definition("last_token")->id)
-                , _legacy_generation_number_col(_schema->get_column_definition("generation_number")->id)
-                , _legacy_slice(slice)
-                , _slice(adjust_partition_slice())
-                , _underlying(scylla_views_build_progress.make_reader(
-                        scylla_views_build_progress.schema(),
-                        range,
-                        slice,
-                        pc,
-                        std::move(trace_state),
-                        fwd,
-                        fwd_mr)) {
-        }
-
-        const schema& underlying_schema() const {
-            return *_underlying.schema();
-        }
-
-        query::partition_slice adjust_partition_slice() {
-            auto slice = _legacy_slice;
-            std::vector<column_id> adjusted_columns;
-            for (auto col_id : slice.regular_columns) {
-                if (col_id == _legacy_last_token_col) {
-                    adjusted_columns.push_back(_scylla_next_token_col);
-                } else if (col_id == _legacy_generation_number_col) {
-                    adjusted_columns.push_back(_scylla_generation_number_col);
-                }
-            }
-            slice.regular_columns = std::move(adjusted_columns);
-            return slice;
-        }
-
-        clustering_key adjust_ckey(clustering_key& ck) {
-            if (ck.size(underlying_schema()) < 3) {
-                return std::move(ck);
-            }
-            // Drop the cpu_id from the clustering key
-            auto end = ck.begin(*_schema);
-            std::advance(end, 1);
-            auto r = boost::make_iterator_range(ck.begin(*_schema), std::move(end));
-            return clustering_key_prefix::from_exploded(r);
-        }
-
-        virtual future<> fill_buffer(db::timeout_clock::time_point timeout) override {
-            return _underlying.fill_buffer(timeout).then([this] {
-                _end_of_stream = _underlying.is_end_of_stream();
-                while (!_underlying.is_buffer_empty()) {
-                    auto mf = _underlying.pop_mutation_fragment();
-                    if (mf.is_clustering_row()) {
-                        auto scylla_in_progress_row = std::move(mf).as_clustering_row();
-                        auto legacy_in_progress_row = row();
-                        // Drop the first_token from the regular columns
-                        scylla_in_progress_row.cells().for_each_cell([&, this] (column_id id, atomic_cell_or_collection& c) {
-                            if (id == _scylla_next_token_col) {
-                                legacy_in_progress_row.append_cell(_legacy_last_token_col, std::move(c));
-                            } else if (id == _scylla_generation_number_col) {
-                                legacy_in_progress_row.append_cell(_legacy_generation_number_col, std::move(c));
-                            }
-                        });
-                        mf = clustering_row(
-                                adjust_ckey(scylla_in_progress_row.key()),
-                                std::move(scylla_in_progress_row.tomb()),
-                                std::move(scylla_in_progress_row.marker()),
-                                std::move(legacy_in_progress_row));
-                    } else if (mf.is_range_tombstone()) {
-                        auto scylla_in_progress_rt = std::move(mf).as_range_tombstone();
-                        mf = range_tombstone(
-                                adjust_ckey(scylla_in_progress_rt.start),
-                                scylla_in_progress_rt.start_kind,
-                                scylla_in_progress_rt.end,
-                                scylla_in_progress_rt.end_kind,
-                                scylla_in_progress_rt.tomb);
-                    }
-                    push_mutation_fragment(std::move(mf));
-                }
-            });
-        }
-
-        virtual void next_partition() override {
-            _end_of_stream = false;
-            clear_buffer_to_next_partition();
-            if (is_buffer_empty()) {
-                _underlying.next_partition();
-            }
-        }
-
-        virtual future<> fast_forward_to(const dht::partition_range& pr, db::timeout_clock::time_point timeout) override {
-            clear_buffer();
-            _end_of_stream = false;
-            return _underlying.fast_forward_to(pr, timeout);
-        }
-
-        virtual future<> fast_forward_to(position_range range, db::timeout_clock::time_point timeout) override {
-            forward_buffer_to(range.start());
-            _end_of_stream = false;
-            return _underlying.fast_forward_to(std::move(range), timeout);
-        }
-    };
-
-public:
-    build_progress_virtual_reader(database& db)
-            : _db(db) {
-    }
-
-    flat_mutation_reader operator()(
-            schema_ptr s,
-            const dht::partition_range& range,
-            const query::partition_slice& slice,
-            const io_priority_class& pc,
-            tracing::trace_state_ptr trace_state,
-            streamed_mutation::forwarding fwd,
-            mutation_reader::forwarding fwd_mr) {
-        return flat_mutation_reader(std::make_unique<build_progress_reader>(
-                std::move(s),
-                _db.find_column_family(s->ks_name(), system_keyspace::v3::SCYLLA_VIEWS_BUILDS_IN_PROGRESS),
-                range,
-                slice,
-                pc,
-                std::move(trace_state),
-                fwd,
-                fwd_mr));
-    }
-};
-
-}
--- a/db/view/view.cc
+++ b/db/view/view.cc
--- a/db/view/view.hh
+++ b/db/view/view.hh
@@ -33,13 +33,6 @@ namespace db {

 namespace view {

-struct stats {
-    int64_t view_updates_pushed_local = 0;
-    int64_t view_updates_pushed_remote = 0;
-    int64_t view_updates_failed_local = 0;
-    int64_t view_updates_failed_remote = 0;
-};
-
 /**
 * Whether the view filter considers the specified partition key.
 *
@@ -99,7 +92,8 @@ query::clustering_row_ranges calculate_affected_clustering_ranges(
        const mutation_partition& mp,
        const std::vector<view_ptr>& views);

-future<> mutate_MV(const dht::token& base_token, std::vector<mutation> mutations, db::view::stats& stats);
+future<> mutate_MV(const dht::token& base_token,
+        std::vector<mutation> mutations);

 }

--- a/db/view/view_builder.hh
+++ b/db/view/view_builder.hh
@@ -1,197 +0,0 @@
-/*
- * Copyright (C) 2018 ScyllaDB
- */
-
-/*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#pragma once
-
-#include "database_fwd.hh"
-#include "db/system_keyspace.hh"
-#include "db/system_distributed_keyspace.hh"
-#include "dht/i_partitioner.hh"
-#include "keys.hh"
-#include "query-request.hh"
-#include "service/migration_listener.hh"
-#include "service/migration_manager.hh"
-#include "sstables/sstable_set.hh"
-#include "utils/exponential_backoff_retry.hh"
-#include "utils/serialized_action.hh"
-#include "utils/UUID.hh"
-
-#include <seastar/core/abort_source.hh>
-#include <seastar/core/future.hh>
-#include <seastar/core/lowres_clock.hh>
-#include <seastar/core/semaphore.hh>
-#include <seastar/core/sharded.hh>
-#include <seastar/core/shared_future.hh>
-#include <seastar/core/shared_ptr.hh>
-
-#include <optional>
-#include <unordered_map>
-#include <vector>
-
-namespace db::view {
-
-/**
- * The view_builder is a sharded service responsible for building all defined materialized views.
- * This process entails walking over the existing data in a given base table, and using it to
- * calculate and insert the respective entries for one or more views.
- *
- * We employ a flat_mutation_reader for each base table for which we're building views.
- *
- * We aim to be resource-conscious. On a given shard, at any given moment, we consume at most
- * from one reader. We also strive for fairness, in that each build step inserts entries for
- * the views of a different base. Each build step reads and generates updates for batch_size rows.
- *
- * We lack a controller, which could potentially allow us to go faster (to execute multiple steps at
- * the same time, or consume more rows per batch), and also which would apply backpressure, so we
- * could, for example, delay executing a build step.
- *
- * View building is necessarily a sharded process. That means that on restart, if the number of shards
- * has changed, we need to calculate the most conservative token range that has been built, and build
- * the remainder.
- *
- * Interaction with the system tables:
- *   - When we start building a view, we add an entry to the scylla_views_builds_in_progress
- *     system table. If the node restarts at this point, we'll consider these newly inserted
- *     views as having made no progress, and we'll treat them as new views;
- *   - When we finish a build step, we update the progress of the views that we built during
- *     this step by writing the next token to the scylla_views_builds_in_progress table. If
- *     the node restarts here, we'll start building the views at the token in the next_token column.
- *   - When we finish building a view, we mark it as completed in the built views system table, and
- *     remove it from the in-progress system table. Under failure, the following can happen:
- *          * When we fail to mark the view as built, we'll redo the last step upon node reboot;
- *          * When we fail to delete the in-progress record, upon reboot we'll remove this record.
- *     A view is marked as completed only when all shards have finished their share of the work, that is,
- *     if a view is not built, then all shards will still have an entry in the in-progress system table,
- *   - A view that a shard finished building, but not all other shards, remains in the in-progress system
- *     table, with first_token == next_token.
- * Interaction with the distributed system table (view_build_status):
- *   - When we start building a view, we mark the view build as being in-progress;
- *   - When we finish building a view, we mark the view as being built. Upon failure,
- *     we ensure that if the view is in the in-progress system table, then it may not
- *     have been written to this table. We don't load the built views from this table
- *     when starting. When starting, the following happens:
- *          * If the view is in the system.built_views table and not the in-progress
- *            system table, then it will be in view_build_status;
- *          * If the view is in the system.built_views table and not in this one, it
- *            will still be in the in-progress system table - we detect this and mark
- *            it as built in this table too, keeping the invariant;
- *          * If the view is in this table but not in system.built_views, then it will
- *            also be in the in-progress system table - we don't detect this and will
- *            redo the missing step, for simplicity.
- */
-class view_builder final : public service::migration_listener::only_view_notifications, public seastar::peering_sharded_service<view_builder> {
-    /**
-     * Keeps track of the build progress for a particular view.
-     * When the view is built, next_token == first_token.
-     */
-    struct view_build_status final {
-        view_ptr view;
-        dht::token first_token;
-        std::optional<dht::token> next_token;
-    };
-
-    /**
-     * Keeps track of the build progress for all the views of a particular
-     * base table. Each execution of the build step comprises a query of
-     * the base table for the selected range.
-     *
-     * We pin the set of sstables that potentially contain data that should be added to a
-     * view (they are pinned by the flat_mutation_reader). Adding a view v' overwrites the
-     * set of pinned sstables, regardless of there being another view v'' being built. The
-     * new set will potentially contain new data already in v'', written as part of the write
-     * path. We assume this case is rare and optimize for fewer disk space in detriment of
-     * network bandwidth.
-     */
-    struct build_step final {
-        // Ensure we pin the column_family. It may happen that all views are removed,
-        // and that the base table is too before we can detect it.
-        lw_shared_ptr<column_family> base;
-        query::partition_slice pslice;
-        dht::partition_range prange;
-        flat_mutation_reader reader{nullptr};
-        dht::decorated_key current_key{dht::minimum_token(), partition_key::make_empty()};
-        std::vector<view_build_status> build_status;
-
-        const dht::token& current_token() const {
-            return current_key.token();
-        }
-    };
-
-    using base_to_build_step_type = std::unordered_map<utils::UUID, build_step>;
-
-    database& _db;
-    db::system_distributed_keyspace& _sys_dist_ks;
-    service::migration_manager& _mm;
-    base_to_build_step_type _base_to_build_step;
-    base_to_build_step_type::iterator _current_step = _base_to_build_step.end();
-    serialized_action _build_step{std::bind(&view_builder::do_build_step, this)};
-    // Ensures bookkeeping operations are serialized, meaning that while we execute
-    // a build step we don't consider newly added or removed views. This simplifies
-    // the algorithms. Also synchronizes an operation wrt. a call to stop().
-    seastar::semaphore _sem{1};
-    seastar::abort_source _as;
-    future<> _started = make_ready_future<>();
-    // Used to coordinate between shards the conclusion of the build process for a particular view.
-    std::unordered_set<utils::UUID> _built_views;
-    // Used for testing.
-    std::unordered_map<std::pair<sstring, sstring>, seastar::shared_promise<>, utils::tuple_hash> _build_notifiers;
-
-public:
-    static constexpr size_t batch_size = 128;
-
-public:
-    view_builder(database&, db::system_distributed_keyspace&, service::migration_manager&);
-    view_builder(view_builder&&) = delete;
-
-    /**
-     * Loads the state stored in the system tables to resume building the existing views.
-     * Requires that all views have been loaded from the system tables and are accessible
-     * through the database, and that the commitlog has been replayed.
-     */
-    future<> start();
-
-    /**
-     * Stops the view building process.
-     */
-    future<> stop();
-
-    virtual void on_create_view(const sstring& ks_name, const sstring& view_name) override;
-    virtual void on_update_view(const sstring& ks_name, const sstring& view_name, bool columns_changed) override;
-    virtual void on_drop_view(const sstring& ks_name, const sstring& view_name) override;
-
-    // For tests
-    future<> wait_until_built(const sstring& ks_name, const sstring& view_name, lowres_clock::time_point timeout);
-
-private:
-    build_step& get_or_create_build_step(utils::UUID);
-    void initialize_reader_at_current_token(build_step&);
-    void load_view_status(view_build_status, std::unordered_set<utils::UUID>&);
-    void reshard(std::vector<std::vector<view_build_status>>, std::unordered_set<utils::UUID>&);
-    future<> calculate_shard_build_step(std::vector<system_keyspace::view_name>, std::vector<system_keyspace::view_build_progress>);
-    future<> add_new_view(view_ptr, build_step&);
-    future<> do_build_step();
-    void execute(build_step&, exponential_backoff_retry);
-    future<> maybe_mark_view_as_built(view_ptr, dht::token);
-
-    struct consumer;
-};
-
-}
--- a/dist/ami/files/.bash_profile
+++ b/dist/ami/files/.bash_profile
@@ -120,7 +120,7 @@ else
 	fi
 fi
 echo -n "    "
-/usr/lib/scylla/scylla_ec2_check
+/usr/lib/scylla/scylla_ec2_check --nic eth0
 if [ $? -eq 0 ]; then
    echo
 fi
--- a/dist/common/scripts/scylla_ec2_check
+++ b/dist/common/scripts/scylla_ec2_check
@@ -2,6 +2,12 @@

 . /usr/lib/scylla/scylla_lib.sh

+print_usage() {
+    echo "scylla_ec2_check --nic eth0"
+    echo "  --nic   specify NIC"
+    exit 1
+}
+
 get_en_interface_type() {
    TYPE=`curl -s http://169.254.169.254/latest/meta-data/instance-type|cut -d . -f 1`
    SUBTYPE=`curl -s http://169.254.169.254/latest/meta-data/instance-type|cut -d . -f 2`
@@ -18,7 +24,7 @@ get_en_interface_type() {
 }

 is_vpc_enabled() {
-    MAC=`cat /sys/class/net/eth0/address`
+    MAC=`cat /sys/class/net/$1/address`
    VPC_AVAIL=`curl -s http://169.254.169.254/latest/meta-data/network/interfaces/macs/$MAC/|grep vpc-id`
    [ -n "$VPC_AVAIL" ]
 }
@@ -27,9 +33,27 @@ if ! is_ec2; then
    exit 0
 fi

+if [ $# -eq 0 ]; then
+    print_usage
+fi
+while [ $# -gt 0 ]; do
+    case "$1" in
+        "--nic")
+            verify_args $@
+            NIC="$2"
+            shift 2
+            ;;
+    esac
+done
+
+if ! is_valid_nic $NIC; then
+    echo "NIC $NIC doesn't exist."
+    exit 1
+fi
+
 TYPE=`curl -s http://169.254.169.254/latest/meta-data/instance-type`
 EN=`get_en_interface_type`
-DRIVER=`ethtool -i eth0|awk '/^driver:/ {print $2}'`
+DRIVER=`ethtool -i $NIC|awk '/^driver:/ {print $2}'`
 if [ "$EN" = "" ]; then
    tput setaf 1
    tput bold
@@ -39,7 +63,7 @@ if [ "$EN" = "" ]; then
    echo "More documentation available at: "
    echo "http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/enhanced-networking.html#enabling_enhanced_networking"
    exit 1
-elif ! is_vpc_enabled; then
+elif ! is_vpc_enabled $NIC; then
    tput setaf 1
    tput bold
    echo "VPC is not enabled!"
--- a/dist/common/scripts/scylla_lib.sh
+++ b/dist/common/scripts/scylla_lib.sh
@@ -91,6 +91,10 @@ create_perftune_conf() {
    /usr/lib/scylla/perftune.py --tune net --nic "$nic" $mode --dump-options-file > /etc/scylla.d/perftune.yaml
 }

+is_valid_nic() {
+    [ -d /sys/class/net/$1 ]
+}
+
 . /etc/os-release
 if is_debian_variant || is_gentoo_variant; then
    SYSCONFIG=/etc/default
--- a/dist/common/scripts/scylla_setup
+++ b/dist/common/scripts/scylla_setup
@@ -39,6 +39,27 @@ print_usage() {
    exit 1
 }

+interactive_choose_nic() {
+    NICS=$(for i in /sys/class/net/*;do nic=`basename $i`; if [ "$nic" != "lo" ]; then echo $nic; fi; done)
+    NR_NICS=`echo $NICS|wc -w`
+    if [ $NR_NICS -eq 0 ]; then
+        echo "NIC not found."
+        exit 1
+    elif [ $NR_NICS -eq 1 ]; then
+        NIC=$NICS
+    else
+        echo "Please select NIC from following list: "
+        while true; do
+            echo $NICS
+            echo -n "> "
+            read NIC
+            if is_valid_nic $NIC; then
+                break
+            fi
+        done
+    fi
+}
+
 interactive_ask_service() {
    echo $1
    echo $2
@@ -112,14 +133,20 @@ run_setup_script() {
    name=$1
    shift 1
    $* &&:
-    if [ $? -ne 0 ] && [ $INTERACTIVE -eq 1 ]; then
-        printf "${RED}$name setup failed. press any key to continue...${NO_COLOR}\n"
-        read
-        return 1
+    if [ $? -ne 0 ]; then
+        if [ $INTERACTIVE -eq 1 ]; then
+            printf "${RED}$name setup failed. press any key to continue...${NO_COLOR}\n"
+            read
+            return 1
+        else
+            printf "$name setup failed.\n"
+            exit 1
+        fi
    fi
    return 0
 }

+NIC="eth0"
 AMI=0
 SET_NIC=0
 DEV_MODE=0
@@ -260,7 +287,8 @@ if is_ec2; then
        EC2_CHECK=$?
    fi
    if [ $EC2_CHECK -eq 1 ]; then
-        /usr/lib/scylla/scylla_ec2_check
+        interactive_choose_nic
+        /usr/lib/scylla/scylla_ec2_check --nic $NIC
    fi
 fi

@@ -447,24 +475,6 @@ if [ $INTERACTIVE -eq 1 ]; then
    interactive_ask_service "Do you want to setup sysconfig?" "Answer yes to do system wide configuration customized for Scylla. Answer no to do nothing." "yes" &&:
    SYSCONFIG_SETUP=$?
    if [ $SYSCONFIG_SETUP -eq 1 ]; then
-        NICS=$(for i in /sys/class/net/*;do nic=`basename $i`; if [ "$nic" != "lo" ]; then echo $nic; fi; done)
-        NR_NICS=`echo $NICS|wc -w`
-        if [ $NR_NICS -eq 0 ]; then
-            echo "NIC not found."
-            exit 1
-        elif [ $NR_NICS -eq 1 ]; then
-            NIC=$NICS
-        else
-            echo "Please select NIC from following list: "
-            while true; do
-                echo $NICS
-                echo -n "> "
-                read NIC
-                if [ -e /sys/class/net/$NIC ]; then
-                    break
-                fi
-            done
-        fi
        interactive_ask_service "Do you want to optimize NIC queue settings?" "Answer yes to enable network card optimization and improve performance. Answer no to skip this optimization." "yes" &&:
        SET_NIC=$?
    fi
@@ -474,6 +484,7 @@ if [ $SYSCONFIG_SETUP -eq 1 ]; then
    if [ $SET_NIC -eq 1 ]; then
        SETUP_ARGS="--setup-nic"
    fi
+    interactive_choose_nic
    run_setup_script "NIC queue" /usr/lib/scylla/scylla_sysconfig_setup --nic $NIC $SETUP_ARGS
 fi

--- a/dist/debian/build_deb.sh
+++ b/dist/debian/build_deb.sh
@@ -254,19 +254,18 @@ if [ "$TARGET" != "trusty" ]; then
    cp dist/common/systemd/node-exporter.service debian/scylla-server.node-exporter.service
 fi

-sudo cp ./dist/debian/pbuilderrc ~root/.pbuilderrc
 if [ $NO_CLEAN -eq 0 ]; then
    sudo rm -fv /var/cache/pbuilder/scylla-server-$TARGET.tgz
-    sudo -H DIST=$TARGET /usr/sbin/pbuilder clean
-    sudo -H DIST=$TARGET /usr/sbin/pbuilder create --allow-untrusted
+    sudo DIST=$TARGET /usr/sbin/pbuilder clean --configfile ./dist/debian/pbuilderrc
+    sudo DIST=$TARGET /usr/sbin/pbuilder create --configfile ./dist/debian/pbuilderrc --allow-untrusted
 fi
 if [ $JOBS -ne 0 ]; then
    DEB_BUILD_OPTIONS="parallel=$JOBS"
 fi
-sudo -H DIST=$TARGET /usr/sbin/pbuilder update --allow-untrusted
+sudo -H DIST=$TARGET /usr/sbin/pbuilder update --configfile ./dist/debian/pbuilderrc --allow-untrusted
 if [ "$TARGET" = "trusty" ] || [ "$TARGET" = "xenial" ] || [ "$TARGET" = "yakkety" ] || [ "$TARGET" = "zesty" ] || [ "$TARGET" = "artful" ] || [ "$TARGET" = "bionic" ]; then
-    sudo -H DIST=$TARGET /usr/sbin/pbuilder execute --save-after-exec dist/debian/ubuntu_enable_ppa.sh
+    sudo DIST=$TARGET /usr/sbin/pbuilder execute --configfile ./dist/debian/pbuilderrc --save-after-exec dist/debian/ubuntu_enable_ppa.sh
 elif [ "$TARGET" = "jessie" ] || [ "$TARGET" = "stretch" ]; then
-    sudo -H DIST=$TARGET /usr/sbin/pbuilder execute --save-after-exec dist/debian/debian_install_gpgkey.sh
+    sudo DIST=$TARGET /usr/sbin/pbuilder execute --configfile ./dist/debian/pbuilderrc --save-after-exec dist/debian/debian_install_gpgkey.sh
 fi
-sudo -H DIST=$TARGET DEB_BUILD_OPTIONS=$DEB_BUILD_OPTIONS pdebuild --buildresult build/debs
+sudo -H DIST=$TARGET DEB_BUILD_OPTIONS=$DEB_BUILD_OPTIONS pdebuild --configfile ./dist/debian/pbuilderrc --buildresult build/debs
--- a/flat_mutation_reader.cc
+++ b/flat_mutation_reader.cc
@@ -183,10 +183,7 @@ flat_mutation_reader make_delegating_reader(flat_mutation_reader& r) {
 flat_mutation_reader make_forwardable(flat_mutation_reader m) {
    class reader : public flat_mutation_reader::impl {
        flat_mutation_reader _underlying;
-        position_range _current = {
-            position_in_partition(position_in_partition::partition_start_tag_t()),
-            position_in_partition(position_in_partition::after_static_row_tag_t())
-        };
+        position_range _current;
        mutation_fragment_opt _next;
        // When resolves, _next is engaged or _end_of_stream is set.
        future<> ensure_next() {
@@ -201,7 +198,10 @@ flat_mutation_reader make_forwardable(flat_mutation_reader m) {
            });
        }
    public:
-        reader(flat_mutation_reader r) : impl(r.schema()), _underlying(std::move(r)) { }
+        reader(flat_mutation_reader r) : impl(r.schema()), _underlying(std::move(r)), _current({
+            position_in_partition(position_in_partition::partition_start_tag_t()),
+            position_in_partition(position_in_partition::after_static_row_tag_t())
+        }) { }
        virtual future<> fill_buffer(db::timeout_clock::time_point timeout) override {
            return repeat([this] {
                if (is_buffer_full()) {
@@ -621,37 +621,3 @@ make_flat_multi_range_reader(schema_ptr s, mutation_source source, const dht::pa
    return make_flat_mutation_reader<flat_multi_range_mutation_reader>(std::move(s), std::move(source), ranges,
                                                             slice, pc, std::move(trace_state), fwd_mr);
 }
-
-flat_mutation_reader
-make_flat_mutation_reader_from_fragments(schema_ptr schema, std::deque<mutation_fragment> fragments) {
-    class reader : public flat_mutation_reader::impl {
-        std::deque<mutation_fragment> _fragments;
-    public:
-        reader(schema_ptr schema, std::deque<mutation_fragment> fragments)
-                : flat_mutation_reader::impl(std::move(schema))
-                , _fragments(std::move(fragments)) {
-        }
-        virtual future<> fill_buffer(db::timeout_clock::time_point) override {
-            while (!(_end_of_stream = _fragments.empty()) && !is_buffer_full()) {
-                push_mutation_fragment(std::move(_fragments.front()));
-                _fragments.pop_front();
-            }
-            return make_ready_future<>();
-        }
-        virtual void next_partition() override {
-            clear_buffer_to_next_partition();
-            if (is_buffer_empty()) {
-                while (!(_end_of_stream = _fragments.empty()) && !_fragments.front().is_partition_start()) {
-                    _fragments.pop_front();
-                }
-            }
-        }
-        virtual future<> fast_forward_to(position_range pr, db::timeout_clock::time_point timeout) override {
-            throw std::runtime_error("This reader can't be fast forwarded to another range.");
-        }
-        virtual future<> fast_forward_to(const dht::partition_range& pr, db::timeout_clock::time_point timeout) override {
-            throw std::runtime_error("This reader can't be fast forwarded to another position.");
-        }
-    };
-    return make_flat_mutation_reader<reader>(std::move(schema), std::move(fragments));
-}
--- a/flat_mutation_reader.hh
+++ b/flat_mutation_reader.hh
@@ -32,8 +32,6 @@
 #include <seastar/util/gcc6-concepts.hh>
 #include "db/timeout_clock.hh"

-#include <deque>
-
 using seastar::future;

 class mutation_source;
@@ -557,9 +555,6 @@ make_flat_multi_range_reader(schema_ptr s, mutation_source source, const dht::pa
                             tracing::trace_state_ptr trace_state = nullptr,
                             flat_mutation_reader::partition_range_forwarding fwd_mr = flat_mutation_reader::partition_range_forwarding::yes);

-flat_mutation_reader
-make_flat_mutation_reader_from_fragments(schema_ptr, std::deque<mutation_fragment>);
-
 // Calls the consumer for each element of the reader's stream until end of stream
 // is reached or the consumer requests iteration to stop by returning stop_iteration::yes.
 // The consumer should accept mutation as the argument and return stop_iteration.
--- a/gms/gossiper.cc
+++ b/gms/gossiper.cc
@@ -1005,7 +1005,7 @@ future<> gossiper::assassinate_endpoint(sstring address) {
            logger.warn("Assassinating {} via gossip", endpoint);
            if (es) {
                auto& ss = service::get_local_storage_service();
-                auto tokens = ss.get_token_metadata().get_tokens(endpoint);
+                tokens = ss.get_token_metadata().get_tokens(endpoint);
                if (tokens.empty()) {
                    logger.warn("Unable to calculate tokens for {}.  Will use a random one", address);
                    throw std::runtime_error(sprint("Unable to calculate tokens for %s", endpoint));
--- a/index/secondary_index.hh
+++ b/index/secondary_index.hh
@@ -1,77 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (C) 2015 ScyllaDB
- *
- * Modified by ScyllaDB
- */
-
-/*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#pragma once
-
-#include "core/sstring.hh"
-#include "seastarx.hh"
-
-namespace db {
-namespace index {
-
-/**
- * Abstract base class for different types of secondary indexes.
- *
- * Do not extend this directly, please pick from PerColumnSecondaryIndex or PerRowSecondaryIndex
- */
-class secondary_index {
-public:
-    static const sstring custom_index_option_name;
-
-    /**
-     * The name of the option used to specify that the index is on the collection keys.
-     */
-    static const sstring index_keys_option_name;
-
-    /**
-     * The name of the option used to specify that the index is on the collection values.
-     */
-    static const sstring index_values_option_name;
-
-    /**
-     * The name of the option used to specify that the index is on the collection (map) entries.
-     */
-    static const sstring index_entries_option_name;
-
-};
-
-}
-}
--- a/index/secondary_index_manager.cc
+++ b/index/secondary_index_manager.cc
@@ -42,7 +42,6 @@
 #include "index/secondary_index_manager.hh"

 #include "cql3/statements/index_target.hh"
-#include "cql3/util.hh"
 #include "index/target_parser.hh"
 #include "db/query_context.hh"
 #include "schema_builder.hh"
@@ -94,14 +93,11 @@ void secondary_index_manager::add_index(const index_metadata& im) {
    _indices.emplace(im.name(), index{index_target_name, im});
 }

-static sstring index_table_name(const sstring& index_name) {
-    return sprint("%s_index", index_name);
-}
-
 view_ptr secondary_index_manager::create_view_for_index(const index_metadata& im) const {
    auto schema = _cf.schema();
+    sstring index_table_name = sprint("%s_index", im.name());
    sstring index_target_name = im.options().at(cql3::statements::index_target::target_option_name);
-    schema_builder builder{schema->ks_name(), index_table_name(im.name())};
+    schema_builder builder{schema->ks_name(), index_table_name};
    auto target = target_parser::parse(schema, im);
    const auto* index_target = std::get<const column_definition*>(target);
    auto target_type = std::get<cql3::statements::index_target::target_type>(target);
@@ -110,9 +106,6 @@ view_ptr secondary_index_manager::create_view_for_index(const index_metadata& im
    }
    builder.with_column(index_target->name(), index_target->type, column_kind::partition_key);
    for (auto& col : schema->partition_key_columns()) {
-        if (col == *index_target) {
-            continue;
-        }
        builder.with_column(col.name(), col.type, column_kind::clustering_key);
    }
    for (auto& col : schema->clustering_key_columns()) {
@@ -121,7 +114,7 @@ view_ptr secondary_index_manager::create_view_for_index(const index_metadata& im
        }
        builder.with_column(col.name(), col.type, column_kind::clustering_key);
    }
-    const sstring where_clause = sprint("%s IS NOT NULL", cql3::util::maybe_quote(index_target_name));
+    const sstring where_clause = sprint("%s IS NOT NULL", index_target_name);
    builder.with_view_info(*schema, false, where_clause);
    return view_ptr{builder.build()};
 }
@@ -136,14 +129,4 @@ std::vector<index_metadata> secondary_index_manager::get_dependent_indices(const
 std::vector<index> secondary_index_manager::list_indexes() const {
    return boost::copy_range<std::vector<index>>(_indices | boost::adaptors::map_values);
 }
-
-bool secondary_index_manager::is_index(view_ptr view) const {
-    for (auto& i : list_indexes()) {
-        if (view->cf_name() == index_table_name(i.metadata().name())) {
-            return true;
-        }
-    }
-    return false;
-}
-
 }
--- a/index/secondary_index_manager.hh
+++ b/index/secondary_index_manager.hh
@@ -70,7 +70,6 @@ public:
    view_ptr create_view_for_index(const index_metadata& index) const;
    std::vector<index_metadata> get_dependent_indices(const column_definition& cdef) const;
    std::vector<index> list_indexes() const;
-    bool is_index(view_ptr) const;
 private:
    void add_index(const index_metadata& im);
 };
--- a/index/target_parser.hh
+++ b/index/target_parser.hh
@@ -79,6 +79,18 @@ struct target_parser {
            target_type = index_target::target_type::values;
        }

+        // in the case of a quoted column name the name in the target string
+        // will be enclosed in quotes, which we need to unwrap. It may also
+        // include quote characters internally, escaped like so:
+        //      abc"def -> abc""def.
+        // Because the target string is stored in a CQL compatible form, we
+        // need to un-escape any such quotes to get the actual column name
+        static const sstring quote{"\""};
+        if (boost::starts_with(target, quote)) {
+            column_name = column_name.substr(1, column_name.length()-2);
+            static const std::regex two_quotes("\"\"");
+            column_name = std::regex_replace(std::string{column_name}, two_quotes, std::string{quote});
+        }
        auto column = schema->get_column_definition(utf8_type->decompose(column_name));
        if (!column) {
            return stdx::nullopt;
--- a/init.cc
+++ b/init.cc
@@ -34,8 +34,8 @@ logging::logger startlog("init");
 // duplicated in cql_test_env.cc
 // until proper shutdown is done.

-void init_storage_service(distributed<database>& db, sharded<auth::service>& auth_service, sharded<db::system_distributed_keyspace>& sys_dist_ks) {
-    service::init_storage_service(db, auth_service, sys_dist_ks).get();
+void init_storage_service(distributed<database>& db, sharded<auth::service>& auth_service) {
+    service::init_storage_service(db, auth_service).get();
    // #293 - do not stop anything
    //engine().at_exit([] { return service::deinit_storage_service(); });
 }
--- a/init.hh
+++ b/init.hh
@@ -25,7 +25,6 @@
 #include <seastar/core/distributed.hh>
 #include "auth/service.hh"
 #include "db/config.hh"
-#include "db/system_distributed_keyspace.hh"
 #include "database.hh"
 #include "log.hh"

@@ -37,7 +36,7 @@ extern logging::logger startlog;

 class bad_configuration_error : public std::exception {};

-void init_storage_service(distributed<database>& db, sharded<auth::service>&, sharded<db::system_distributed_keyspace>&);
+void init_storage_service(distributed<database>& db, sharded<auth::service>&);
 void init_ms_fd_gossiper(sstring listen_address
                , uint16_t storage_port
                , uint16_t ssl_storage_port
--- a/keys.hh
+++ b/keys.hh
@@ -146,19 +146,6 @@ public:
    auto components(const schema& s) const {
        return components();
    }
-
-    bool is_empty() const {
-        return _bytes.empty();
-    }
-
-    explicit operator bool() const {
-        return !is_empty();
-    }
-
-    // For backward compatibility with existing code.
-    bool is_empty(const schema& s) const {
-        return is_empty();
-    }
 };

 template <typename TopLevel, typename TopLevelView>
@@ -317,19 +304,6 @@ public:
        return get_compound_type(s)->end(_bytes);
    }

-    bool is_empty() const {
-        return _bytes.empty();
-    }
-
-    explicit operator bool() const {
-        return !is_empty();
-    }
-
-    // For backward compatibility with existing code.
-    bool is_empty(const schema& s) const {
-        return is_empty();
-    }
-
    // Returns a range of bytes_view
    auto components() const {
        return TopLevelView::compound::element_type::components(representation());
@@ -542,6 +516,10 @@ public:
    bool is_full(const schema& s) const {
        return TopLevel::get_compound_type(s)->is_full(base::_bytes);
    }
+
+    bool is_empty(const schema& s) const {
+        return TopLevel::get_compound_type(s)->is_empty(base::_bytes);
+    }
 };

 template <typename TopLevel, typename TopLevelView, typename FullTopLevel>
@@ -560,6 +538,10 @@ public:
        return TopLevel::get_compound_type(s)->is_full(base::_bytes);
    }

+    bool is_empty(const schema& s) const {
+        return TopLevel::get_compound_type(s)->is_empty(base::_bytes);
+    }
+
    // Can be called only if is_full()
    FullTopLevel to_full(const schema& s) const {
        return FullTopLevel::from_exploded(s, base::explode(s));
@@ -739,6 +721,10 @@ public:
    static const compound& get_compound_type(const schema& s) {
        return s.clustering_key_prefix_type();
    }
+
+    static clustering_key_prefix_view make_empty() {
+        return { bytes_view() };
+    }
 };

 class clustering_key_prefix : public prefix_compound_wrapper<clustering_key_prefix, clustering_key_prefix_view, clustering_key> {
@@ -817,4 +803,4 @@ struct appending_hash<clustering_key_prefix> {
    void operator()(Hasher& h, const clustering_key_prefix& ck, const schema& s) const {
        appending_hash<clustering_key_prefix_view>()(h, ck.view(), s);
    }
-};
+};
--- a/locator/abstract_replication_strategy.cc
+++ b/locator/abstract_replication_strategy.cc
@@ -119,9 +119,17 @@ insert_token_range_to_sorted_container_while_unwrapping(
        const dht::token& tok,
        dht::token_range_vector& ret) {
    if (prev_tok < tok) {
-        ret.emplace_back(
-                dht::token_range::bound(prev_tok, false),
-                dht::token_range::bound(tok, true));
+        auto pos = ret.end();
+        if (!ret.empty() && !std::prev(pos)->end()) {
+            // We inserted a wrapped range (a, b] previously as
+            // (-inf, b], (a, +inf). So now we insert in the next-to-last
+            // position to keep the last range (a, +inf) at the end.
+            pos = std::prev(pos);
+        }
+        ret.insert(pos,
+                dht::token_range{
+                        dht::token_range::bound(prev_tok, false),
+                        dht::token_range::bound(tok, true)});
    } else {
        ret.emplace_back(
                dht::token_range::bound(prev_tok, false),
--- a/main.cc
+++ b/main.cc
@@ -35,12 +35,10 @@
 #include "service/load_broadcaster.hh"
 #include "streaming/stream_session.hh"
 #include "db/system_keyspace.hh"
-#include "db/system_distributed_keyspace.hh"
 #include "db/batchlog_manager.hh"
 #include "db/commitlog/commitlog.hh"
 #include "db/hints/manager.hh"
 #include "db/commitlog/commitlog_replayer.hh"
-#include "db/view/view_builder.hh"
 #include "utils/runtime.hh"
 #include "utils/file_lock.hh"
 #include "log.hh"
@@ -471,11 +469,9 @@ int main(int ac, char** av) {
            ctx.http_server.listen(ipv4_addr{ip, api_port}).get();
            startlog.info("Scylla API server listening on {}:{} ...", api_address, api_port);
            static sharded<auth::service> auth_service;
-            static sharded<db::system_distributed_keyspace> sys_dist_ks;
            supervisor::notify("initializing storage service");
-            init_storage_service(db, auth_service, sys_dist_ks);
+            init_storage_service(db, auth_service);
            supervisor::notify("starting per-shard database core");
-
            // Note: changed from using a move here, because we want the config object intact.
            database_config dbcfg;
            auto make_sched_group = [&] (sstring name, unsigned shares) {
@@ -514,24 +510,18 @@ int main(int ac, char** av) {
                    db.local().get_config().data_file_directories().cend());
            directories.insert(db.local().get_config().commitlog_directory());

-            supervisor::notify("creating hints directories");
+            if (hinted_handoff_enabled) {
+                supervisor::notify("creating hints directories");
+                using namespace boost::filesystem;

-            boost::filesystem::path hints_base_dir(db.local().get_config().hints_directory());
-            dirs.touch_and_lock(db.local().get_config().hints_directory()).get();
-            directories.insert(db.local().get_config().hints_directory());
-            for (unsigned i = 0; i < smp::count; ++i) {
-                sstring shard_dir((hints_base_dir / seastar::to_sstring(i).c_str()).native());
-                dirs.touch_and_lock(shard_dir).get();
-                directories.insert(std::move(shard_dir));
-            }
-            boost::filesystem::path view_pending_updates_base_dir = boost::filesystem::path(db.local().get_config().data_file_directories()[0]) / "view_pending_updates";
-            sstring view_pending_updates_base_dir_str = view_pending_updates_base_dir.native();
-            dirs.touch_and_lock(view_pending_updates_base_dir_str).get();
-            directories.insert(view_pending_updates_base_dir_str);
-            for (unsigned i = 0; i < smp::count; ++i) {
-                sstring shard_dir((view_pending_updates_base_dir / seastar::to_sstring(i).c_str()).native());
-                dirs.touch_and_lock(shard_dir).get();
-                directories.insert(std::move(shard_dir));
+                path hints_base_dir(db.local().get_config().hints_directory());
+                dirs.touch_and_lock(db.local().get_config().hints_directory()).get();
+                directories.insert(db.local().get_config().hints_directory());
+                for (unsigned i = 0; i < smp::count; ++i) {
+                    sstring shard_dir((hints_base_dir / seastar::to_sstring(i).c_str()).native());
+                    dirs.touch_and_lock(shard_dir).get();
+                    directories.insert(std::move(shard_dir));
+                }
            }

            supervisor::notify("verifying directories");
@@ -639,7 +629,7 @@ int main(int ac, char** av) {
            }
            // If the same sstable is shared by several shards, it cannot be
            // deleted until all shards decide to compact it. So we want to
-            // start these compactions now. Note we start compacting only after
+            // start thse compactions now. Note we start compacting only after
            // all sstables in this CF were loaded on all shards - otherwise
            // we will have races between the compaction and loading processes
            // We also want to trigger regular compaction on boot.
@@ -707,17 +697,10 @@ int main(int ac, char** av) {
            gms::get_local_gossiper().wait_for_gossip_to_settle().get();
            api::set_server_gossip_settle(ctx).get();

-            supervisor::notify("starting hinted handoff manager");
-            db::hints::manager::rebalance().get();
-            proxy.invoke_on_all([] (service::storage_proxy& local_proxy) {
-                local_proxy.start_hints_manager(gms::get_local_gossiper().shared_from_this());
-            }).get();
-
-            static sharded<db::view::view_builder> view_builder;
-            if (cfg->view_building()) {
-                supervisor::notify("starting the view builder");
-                view_builder.start(std::ref(db), std::ref(sys_dist_ks), std::ref(mm)).get();
-                view_builder.invoke_on_all(&db::view::view_builder::start).get();
+            if (hinted_handoff_enabled) {
+                supervisor::notify("starting hinted handoff manager");
+                db::hints::manager::rebalance().get();
+                proxy.invoke_on_all([] (service::storage_proxy& local_proxy) { local_proxy.start_hints_manager(gms::get_local_gossiper().shared_from_this()); }).get();
            }

            supervisor::notify("starting native transport");
@@ -750,10 +733,6 @@ int main(int ac, char** av) {
                return service::get_local_storage_service().drain_on_shutdown();
            });

-            engine().at_exit([] {
-                return view_builder.stop();
-            });
-
            engine().at_exit([&db] {
                return db.invoke_on_all([](auto& db) {
                    return db.get_compaction_manager().stop();
--- a/mutation_compactor.hh
+++ b/mutation_compactor.hh
@@ -215,7 +215,7 @@ public:
        }
        t.apply(current_tombstone);
        bool is_live = cr.marker().compact_and_expire(t.tomb(), _query_time, _can_gc, _gc_before);
-        is_live |= cr.cells().compact_and_expire(_schema, column_kind::regular_column, t, _query_time, _can_gc, _gc_before, cr.marker());
+        is_live |= cr.cells().compact_and_expire(_schema, column_kind::regular_column, t, _query_time, _can_gc, _gc_before);
        if (only_live() && is_live) {
            partition_is_not_empty(consumer);
            auto stop = consumer.consume(std::move(cr), t, true);
--- a/mutation_fragment.hh
+++ b/mutation_fragment.hh
@@ -75,11 +75,6 @@ public:
        return !_t && _marker.is_missing() && _cells.empty();
    }

-    bool is_live(const schema& s, tombstone base_tombstone = tombstone(), gc_clock::time_point now = gc_clock::time_point::min()) const {
-        base_tombstone.apply(_t.tomb());
-        return _marker.is_live(base_tombstone, now) || _cells.is_live(s, column_kind::regular_column, base_tombstone, now);
-    }
-
    void apply(const schema& s, clustering_row&& cr) {
        _marker.apply(std::move(cr._marker));
        _t.apply(cr._t, _marker);
@@ -143,10 +138,6 @@ public:
        return _cells.empty();
    }

-    bool is_live(const schema& s, gc_clock::time_point now = gc_clock::time_point::min()) const {
-        return _cells.is_live(s, column_kind::static_column, tombstone(), now);
-    }
-
    void apply(const schema& s, const row& r) {
        _cells.apply(s, column_kind::static_column, r);
    }
--- a/mutation_partition.cc
+++ b/mutation_partition.cc
@@ -35,7 +35,6 @@
 #include "intrusive_set_external_comparator.hh"
 #include "counters.hh"
 #include "row_cache.hh"
-#include "view_info.hh"
 #include <seastar/core/execution_stage.hh>

 template<bool reversed>
@@ -1090,7 +1089,7 @@ row::apply_monotonically(const column_definition& column, atomic_cell_or_collect
    if (_type == storage_type::vector && id < max_vector_size) {
        if (id >= _storage.vector.v.size()) {
            _storage.vector.v.resize(id);
-            _storage.vector.v.emplace_back(cell_and_hash{std::move(value), std::move(hash)});
+            _storage.vector.v.emplace_back(std::move(value), std::move(hash));
            _storage.vector.present.set(id);
            _size++;
        } else if (auto& cell_and_hash = _storage.vector.v[id]; !bool(cell_and_hash.cell)) {
@@ -1265,8 +1264,8 @@ uint32_t mutation_partition::do_compact(const schema& s,
        deletable_row& row = e.row();
        row_tombstone tomb = tombstone_for_row(s, e);

-        bool is_live = row.marker().compact_and_expire(tomb.tomb(), query_time, can_gc, gc_before);
-        is_live |= row.cells().compact_and_expire(s, column_kind::regular_column, tomb, query_time, can_gc, gc_before, row.marker());
+        bool is_live = row.cells().compact_and_expire(s, column_kind::regular_column, tomb, query_time, can_gc, gc_before);
+        is_live |= row.marker().compact_and_expire(tomb.tomb(), query_time, can_gc, gc_before);

        if (should_purge_row_tombstone(row.deleted_at())) {
            row.remove_tombstone();
@@ -1336,12 +1335,8 @@ deletable_row::is_live(const schema& s, tombstone base_tombstone, gc_clock::time
    // row is live. Otherwise, a row is considered live if it has any cell
    // which is live.
    base_tombstone.apply(_deleted_at.tomb());
-    return _marker.is_live(base_tombstone, query_time) || _cells.is_live(s, column_kind::regular_column, base_tombstone, query_time);
-}
-
-bool
-row::is_live(const schema& s, column_kind kind, tombstone base_tombstone, gc_clock::time_point query_time) const {
-    return has_any_live_data(s, kind, *this, base_tombstone, query_time);
+    return _marker.is_live(base_tombstone, query_time)
+           || has_any_live_data(s, column_kind::regular_column, _cells, base_tombstone, query_time);
 }

 bool
@@ -1553,30 +1548,9 @@ void row::apply_monotonically(const schema& s, column_kind kind, row&& other) {
    });
 }

-// When views contain a primary key column that is not part of the base table primary key,
-// that column determines whether the row is live or not. We need to ensure that when that
-// cell is dead, and thus the derived row marker, either by normal deletion of by TTL, so
-// is the rest of the row. To ensure that none of the regular columns keep the row alive,
-// we erase the live cells according to the shadowable_tombstone rules.
-static bool dead_marker_shadows_row(const schema& s, column_kind kind, const row_marker& marker) {
-    return s.is_view()
-            && s.view_info()->base_non_pk_column_in_view_pk()
-            && !marker.is_live()
-            && kind == column_kind::regular_column; // not applicable to static rows
-}
-
-bool row::compact_and_expire(
-        const schema& s,
-        column_kind kind,
-        row_tombstone tomb,
-        gc_clock::time_point query_time,
-        can_gc_fn& can_gc,
-        gc_clock::time_point gc_before,
-        const row_marker& marker)
+bool row::compact_and_expire(const schema& s, column_kind kind, row_tombstone tomb, gc_clock::time_point query_time,
+    can_gc_fn& can_gc, gc_clock::time_point gc_before)
 {
-    if (dead_marker_shadows_row(s, kind, marker)) {
-        tomb.apply(shadowable_tombstone(api::max_timestamp, gc_clock::time_point::max()), row_marker());
-    }
    bool any_live = false;
    remove_if([&] (column_id id, atomic_cell_or_collection& c) {
        bool erase = false;
@@ -1618,17 +1592,6 @@ bool row::compact_and_expire(
    return any_live;
 }

-bool row::compact_and_expire(
-        const schema& s,
-        column_kind kind,
-        row_tombstone tomb,
-        gc_clock::time_point query_time,
-        can_gc_fn& can_gc,
-        gc_clock::time_point gc_before) {
-    row_marker m;
-    return compact_and_expire(s, kind, tomb, query_time, can_gc, gc_before, m);
-}
-
 deletable_row deletable_row::difference(const schema& s, column_kind kind, const deletable_row& other) const
 {
    deletable_row dr;
@@ -1790,9 +1753,10 @@ void mutation_querier::query_static_row(const row& r, tombstone current_tombston
        } else if (_short_reads_allowed) {
            seastar::measuring_output_stream stream;
            ser::qr_partition__static_row__cells<seastar::measuring_output_stream> out(stream, { });
+            auto start = stream.size();
            get_compacted_row_slice(_schema, slice, column_kind::static_column,
-                                    r, slice.static_columns, _static_cells_wr);
-            _memory_accounter.update(stream.size());
+                                    r, slice.static_columns, out);
+            _memory_accounter.update(stream.size() - start);
        }
        if (_pw.requested_digest()) {
            max_timestamp max_ts{_pw.last_modified()};
@@ -1853,8 +1817,9 @@ stop_iteration mutation_querier::consume(clustering_row&& cr, row_tombstone curr
    } else if (_short_reads_allowed) {
        seastar::measuring_output_stream stream;
        ser::qr_partition__rows<seastar::measuring_output_stream> out(stream, { });
+        auto start = stream.size();
        write_row(out);
-        stop = _memory_accounter.update_and_check(stream.size());
+        stop = _memory_accounter.update_and_check(stream.size() - start);
    }

    _live_clustering_rows++;
--- a/mutation_partition.hh
+++ b/mutation_partition.hh
@@ -74,6 +74,17 @@ using cell_hash_opt = seastar::optimized_optional<cell_hash>;
 struct cell_and_hash {
    atomic_cell_or_collection cell;
    mutable cell_hash_opt hash;
+
+    cell_and_hash() = default;
+    cell_and_hash(cell_and_hash&&) noexcept = default;
+    cell_and_hash& operator=(cell_and_hash&&) noexcept = default;
+    cell_and_hash(const cell_and_hash&) = default;
+    cell_and_hash& operator=(const cell_and_hash&) = default;
+
+    cell_and_hash(atomic_cell_or_collection&& cell, cell_hash_opt hash)
+        : cell(std::move(cell))
+        , hash(hash)
+    { }
 };

 //
@@ -314,22 +325,8 @@ public:
    // Expires cells based on query_time. Expires tombstones based on gc_before
    // and max_purgeable. Removes cells covered by tomb.
    // Returns true iff there are any live cells left.
-    bool compact_and_expire(
-            const schema& s,
-            column_kind kind,
-            row_tombstone tomb,
-            gc_clock::time_point query_time,
-            can_gc_fn&,
-            gc_clock::time_point gc_before,
-            const row_marker& marker);
-
-    bool compact_and_expire(
-            const schema& s,
-            column_kind kind,
-            row_tombstone tomb,
-            gc_clock::time_point query_time,
-            can_gc_fn&,
-            gc_clock::time_point gc_before);
+    bool compact_and_expire(const schema& s, column_kind kind, row_tombstone tomb, gc_clock::time_point query_time,
+        can_gc_fn&, gc_clock::time_point gc_before);

    row difference(const schema&, column_kind, const row& other) const;

@@ -341,8 +338,6 @@ public:

    void prepare_hash(const schema& s, column_kind kind) const;

-    bool is_live(const schema&, column_kind kind, tombstone tomb = tombstone(), gc_clock::time_point now = gc_clock::time_point::min()) const;
-
    friend std::ostream& operator<<(std::ostream& os, const row& r);
 };

--- a/position_in_partition.hh
+++ b/position_in_partition.hh
@@ -273,6 +273,11 @@ public:
        return is_partition_end() || (_ck && _ck->is_empty(s) && _bound_weight > 0);
    }

+    bool is_before_all_clustered_rows(const schema& s) const {
+        return _type < partition_region::clustered
+               || (_type == partition_region::clustered && _ck->is_empty(s) && _bound_weight < 0);
+    }
+
    template<typename Hasher>
    void feed_hash(Hasher& hasher, const schema& s) const {
        ::feed_hash(hasher, _bound_weight);
--- a/querier.cc
+++ b/querier.cc
@@ -152,34 +152,33 @@ const size_t querier_cache::max_queriers_memory_usage = memory::stats().total_me
 void querier_cache::scan_cache_entries() {
    const auto now = lowres_clock::now();

-    auto it = _meta_entries.begin();
-    const auto end = _meta_entries.end();
+    auto it = _entries.begin();
+    const auto end = _entries.end();
    while (it != end && it->is_expired(now)) {
-        if (*it) {
-            ++_stats.time_based_evictions;
-        }
-        it = _meta_entries.erase(it);
-        _stats.population = _entries.size();
+        ++_stats.time_based_evictions;
+        --_stats.population;
+        it = _entries.erase(it);
    }
 }

 querier_cache::entries::iterator querier_cache::find_querier(utils::UUID key, const dht::partition_range& range, tracing::trace_state_ptr trace_state) {
-    const auto queriers = _entries.equal_range(key);
+    const auto queriers = _index.equal_range(key);

-    if (queriers.first == _entries.end()) {
+    if (queriers.first == _index.end()) {
        tracing::trace(trace_state, "Found no cached querier for key {}", key);
        return _entries.end();
    }

-    const auto it = std::find_if(queriers.first, queriers.second, [&] (const std::pair<const utils::UUID, entry>& elem) {
-        return elem.second.get().matches(range);
+    const auto it = std::find_if(queriers.first, queriers.second, [&] (const entry& e) {
+        return e.value().matches(range);
    });

    if (it == queriers.second) {
        tracing::trace(trace_state, "Found cached querier(s) for key {} but none matches the query range {}", key, range);
+        return _entries.end();
    }
    tracing::trace(trace_state, "Found cached querier for key {} and range {}", key, range);
-    return it;
+    return it->pos();
 }

 querier_cache::querier_cache(std::chrono::seconds entry_ttl)
@@ -199,8 +198,7 @@ void querier_cache::insert(utils::UUID key, querier&& q, tracing::trace_state_pt

    tracing::trace(trace_state, "Caching querier with key {}", key);

-    auto memory_usage = boost::accumulate(
-            _entries | boost::adaptors::map_values | boost::adaptors::transformed(std::mem_fn(&querier_cache::entry::memory_usage)), size_t(0));
+    auto memory_usage = boost::accumulate(_entries | boost::adaptors::transformed(std::mem_fn(&entry::memory_usage)), size_t(0));

    // We add the memory-usage of the to-be added querier to the memory-usage
    // of all the cached queriers. We now need to makes sure this number is
@@ -210,20 +208,20 @@ void querier_cache::insert(utils::UUID key, querier&& q, tracing::trace_state_pt
    memory_usage += q.memory_usage();

    if (memory_usage >= max_queriers_memory_usage) {
-        auto it = _meta_entries.begin();
-        const auto end = _meta_entries.end();
+        auto it = _entries.begin();
+        const auto end = _entries.end();
        while (it != end && memory_usage >= max_queriers_memory_usage) {
-            if (*it) {
-                ++_stats.memory_based_evictions;
-                memory_usage -= it->get_entry().memory_usage();
-            }
-            it = _meta_entries.erase(it);
+            ++_stats.memory_based_evictions;
+            memory_usage -= it->memory_usage();
+            --_stats.population;
+            it = _entries.erase(it);
        }
    }

-    const auto it = _entries.emplace(key, entry::param{std::move(q), _entry_ttl}).first;
-    _meta_entries.emplace_back(_entries, it);
-    _stats.population = _entries.size();
+    auto& e = _entries.emplace_back(key, std::move(q), lowres_clock::now() + _entry_ttl);
+    e.set_pos(--_entries.end());
+    _index.insert(e);
+    ++_stats.population;
 }

 querier querier_cache::lookup(utils::UUID key,
@@ -240,9 +238,9 @@ querier querier_cache::lookup(utils::UUID key,
        return create_fun();
    }

-    auto q = std::move(it->second).get();
+    auto q = std::move(*it).value();
    _entries.erase(it);
-    _stats.population = _entries.size();
+    --_stats.population;

    const auto can_be_used = q.can_be_used_for_page(only_live, s, range, slice);
    if (can_be_used == querier::can_use::yes) {
@@ -265,25 +263,24 @@ bool querier_cache::evict_one() {
        return false;
    }

-    auto it = _meta_entries.begin();
-    const auto end = _meta_entries.end();
-    while (it != end) {
-        const auto is_live = bool(*it);
-        it = _meta_entries.erase(it);
-        _stats.population = _entries.size();
-        if (is_live) {
-            ++_stats.resource_based_evictions;
-            return true;
-        }
-    }
-    return false;
+    ++_stats.resource_based_evictions;
+    --_stats.population;
+    _entries.pop_front();
+
+    return true;
 }

 void querier_cache::evict_all_for_table(const utils::UUID& schema_id) {
-    _meta_entries.remove_if([&] (const meta_entry& me) {
-        return !me || me.get_entry().get().schema()->id() == schema_id;
-    });
-    _stats.population = _entries.size();
+    auto it = _entries.begin();
+    const auto end = _entries.end();
+    while (it != end) {
+        if (it->schema().id() == schema_id) {
+            --_stats.population;
+            it = _entries.erase(it);
+        } else {
+            ++it;
+        }
+    }
 }

 querier_cache_context::querier_cache_context(querier_cache& cache, utils::UUID key, bool is_first_page)
--- a/querier.hh
+++ b/querier.hh
@@ -24,7 +24,8 @@
 #include "mutation_compactor.hh"
 #include "mutation_reader.hh"

-#include <seastar/core/weak_ptr.hh>
+#include <boost/intrusive/set.hpp>
+
 #include <variant>

 /// One-stop object for serving queries.
@@ -264,75 +265,65 @@ public:
    };

 private:
-    class entry : public weakly_referencable<entry> {
-        querier _querier;
-        lowres_clock::time_point _expires;
-    public:
-        // Since entry cannot be moved and unordered_map::emplace can pass only
-        // a single param to it's mapped-type we need to force a single-param
-        // constructor for entry. Oh C++...
-        struct param {
-            querier q;
-            std::chrono::seconds ttl;
-        };
+    class entry : public boost::intrusive::set_base_hook<boost::intrusive::link_mode<boost::intrusive::auto_unlink>> {
+        // Self reference so that we can remove the entry given an `entry&`.
+        std::list<entry>::iterator _pos;
+        const utils::UUID _key;
+        const lowres_clock::time_point _expires;
+        querier _value;

-        explicit entry(param p)
-            : _querier(std::move(p.q))
-            , _expires(lowres_clock::now() + p.ttl) {
+    public:
+        entry(utils::UUID key, querier q, lowres_clock::time_point expires)
+            : _key(key)
+            , _expires(expires)
+            , _value(std::move(q)) {
+        }
+
+        std::list<entry>::iterator pos() const {
+            return _pos;
+        }
+
+        void set_pos(std::list<entry>::iterator pos) {
+            _pos = pos;
+        }
+
+        const utils::UUID& key() const {
+            return _key;
+        }
+
+        const ::schema& schema() const {
+            return *_value.schema();
        }

        bool is_expired(const lowres_clock::time_point& now) const {
            return _expires <= now;
        }

-        const querier& get() const & {
-            return _querier;
-        }
-
-        querier&& get() && {
-            return std::move(_querier);
-        }
-
        size_t memory_usage() const {
-            return _querier.memory_usage();
+            return _value.memory_usage();
+        }
+
+        const querier& value() const & {
+            return _value;
+        }
+
+        querier value() && {
+            return std::move(_value);
        }
    };

-    using entries = std::unordered_map<utils::UUID, entry>;
-
-    class meta_entry {
-        entries& _entries;
-        weak_ptr<entry> _entry_ptr;
-        entries::iterator _entry_it;
-
-    public:
-        meta_entry(entries& e, entries::iterator it)
-            : _entries(e)
-            , _entry_ptr(it->second.weak_from_this())
-            , _entry_it(it) {
-        }
-
-        ~meta_entry() {
-            if (_entry_ptr) {
-                _entries.erase(_entry_it);
-            }
-        }
-
-        bool is_expired(const lowres_clock::time_point& now) const {
-            return !_entry_ptr || _entry_ptr->is_expired(now);
-        }
-
-        explicit operator bool() const {
-            return bool(_entry_ptr);
-        }
-
-        const entry& get_entry() const {
-            return *_entry_ptr;
-        }
+    struct key_of_entry {
+        using type = utils::UUID;
+        const type& operator()(const entry& e) { return e.key(); }
    };

+    using entries = std::list<entry>;
+    using index = boost::intrusive::multiset<entry, boost::intrusive::key_of_value<key_of_entry>,
+          boost::intrusive::constant_time_size<false>>;
+
+private:
    entries _entries;
-    std::list<meta_entry> _meta_entries;
+    index _index;
    timer<lowres_clock> _expiry_timer;
    std::chrono::seconds _entry_ttl;
    stats _stats;
--- a/schema.cc
+++ b/schema.cc
@@ -871,11 +871,6 @@ schema_builder& schema_builder::without_index(const sstring& name) {
    return *this;
 }

-schema_builder& schema_builder::without_indexes() {
-    _raw._indices_by_name.clear();
-    return *this;
-}
-
 schema_ptr schema_builder::build() {
    schema::raw_schema new_raw = _raw; // Copy so that build() remains idempotent.

--- a/schema_builder.hh
+++ b/schema_builder.hh
@@ -261,7 +261,6 @@ public:

    schema_builder& with_index(const index_metadata& im);
    schema_builder& without_index(const sstring& name);
-    schema_builder& without_indexes();

    default_names get_default_names() const {
        return default_names(_raw);
--- a/2
+++ b/2
--- a/service/migration_listener.hh
+++ b/service/migration_listener.hh
@@ -73,26 +73,6 @@ public:
    virtual void on_drop_function(const sstring& ks_name, const sstring& function_name) = 0;
    virtual void on_drop_aggregate(const sstring& ks_name, const sstring& aggregate_name) = 0;
    virtual void on_drop_view(const sstring& ks_name, const sstring& view_name) = 0;
-
-    class only_view_notifications;
-};
-
-class migration_listener::only_view_notifications : public migration_listener {
-    virtual void on_create_keyspace(const sstring& ks_name) { }
-    virtual void on_create_column_family(const sstring& ks_name, const sstring& cf_name) { }
-    virtual void on_create_user_type(const sstring& ks_name, const sstring& type_name) { }
-    virtual void on_create_function(const sstring& ks_name, const sstring& function_name) { }
-    virtual void on_create_aggregate(const sstring& ks_name, const sstring& aggregate_name) { }
-    virtual void on_update_keyspace(const sstring& ks_name) { }
-    virtual void on_update_column_family(const sstring& ks_name, const sstring& cf_name, bool columns_changed) { }
-    virtual void on_update_user_type(const sstring& ks_name, const sstring& type_name) { }
-    virtual void on_update_function(const sstring& ks_name, const sstring& function_name) { }
-    virtual void on_update_aggregate(const sstring& ks_name, const sstring& aggregate_name) { }
-    virtual void on_drop_keyspace(const sstring& ks_name) { }
-    virtual void on_drop_column_family(const sstring& ks_name, const sstring& cf_name) { }
-    virtual void on_drop_user_type(const sstring& ks_name, const sstring& type_name) { }
-    virtual void on_drop_function(const sstring& ks_name, const sstring& function_name) { }
-    virtual void on_drop_aggregate(const sstring& ks_name, const sstring& aggregate_name) { }
 };

 }
--- a/service/migration_manager.cc
+++ b/service/migration_manager.cc
@@ -48,8 +48,6 @@
 #include "service/migration_task.hh"
 #include "utils/runtime.hh"
 #include "gms/gossiper.hh"
-#include "view_info.hh"
-#include "schema_builder.hh"

 namespace service {

@@ -666,33 +664,19 @@ future<> migration_manager::announce_column_family_drop(const sstring& ks_name,
            throw exceptions::invalid_request_exception("Cannot use DROP TABLE on Materialized View");
        }
        auto&& views = old_cfm.views();
-        if (views.size() > schema->all_indices().size()) {
-            auto explicit_view_names = views
-                                       | boost::adaptors::filtered([&old_cfm](const view_ptr& v) { return !old_cfm.get_index_manager().is_index(v); })
-                                       | boost::adaptors::transformed([](const view_ptr& v) { return v->cf_name(); });
+        if (!views.empty()) {
            throw exceptions::invalid_request_exception(sprint(
                        "Cannot drop table when materialized views still depend on it (%s.{%s})",
-                        ks_name, ::join(", ", explicit_view_names)));
+                        ks_name, ::join(", ", views | boost::adaptors::transformed([](auto&& v) { return v->cf_name(); }))));
        }
        mlogger.info("Drop table '{}.{}'", schema->ks_name(), schema->cf_name());
-
-        auto maybe_drop_secondary_indexes = make_ready_future<std::vector<mutation>>();
-        if (!schema->all_indices().empty()) {
-            auto builder = schema_builder(schema).without_indexes();
-            maybe_drop_secondary_indexes = db::schema_tables::make_update_table_mutations(db.find_keyspace(ks_name).metadata(), schema, builder.build(), api::new_timestamp(), false);
-        }
-
-        return maybe_drop_secondary_indexes.then([announce_locally, ks_name, schema, &db, &old_cfm] (auto&& drop_si_mutations) {
-            return db::schema_tables::make_drop_table_mutations(db.find_keyspace(ks_name).metadata(), schema, api::new_timestamp())
-                .then([drop_si_mutations = std::move(drop_si_mutations), announce_locally] (auto&& mutations) mutable {
-                    mutations.insert(mutations.end(), std::make_move_iterator(drop_si_mutations.begin()), std::make_move_iterator(drop_si_mutations.end()));
-                    return announce(std::move(mutations), announce_locally);
-                });
-        });
+        return db::schema_tables::make_drop_table_mutations(db.find_keyspace(ks_name).metadata(), schema, api::new_timestamp())
+            .then([announce_locally] (auto&& mutations) {
+                return announce(std::move(mutations), announce_locally);
+            });
    } catch (const no_such_column_family& e) {
        throw exceptions::configuration_exception(sprint("Cannot drop non existing table '%s' in keyspace '%s'.", cf_name, ks_name));
    }
-
 }

 future<> migration_manager::announce_type_drop(user_type dropped_type, bool announce_locally)
@@ -763,9 +747,6 @@ future<> migration_manager::announce_view_drop(const sstring& ks_name,
        if (!view->is_view()) {
            throw exceptions::invalid_request_exception("Cannot use DROP MATERIALIZED VIEW on Table");
        }
-        if (db.find_column_family(view->view_info()->base_id()).get_index_manager().is_index(view_ptr(view))) {
-            throw exceptions::invalid_request_exception("Cannot use DROP MATERIALIZED VIEW on Index");
-        }
        auto keyspace = db.find_keyspace(ks_name).metadata();
        mlogger.info("Drop view '{}.{}'", view->ks_name(), view->cf_name());
        return db::schema_tables::make_drop_view_mutations(std::move(keyspace), view_ptr(std::move(view)), api::new_timestamp())
--- a/service/misc_services.cc
+++ b/service/misc_services.cc
@@ -144,7 +144,11 @@ future<lowres_clock::duration> cache_hitrate_calculator::recalculate_hitrates()
        return _db.invoke_on_all([this, rates = std::move(rates), cpuid = engine().cpu_id()] (database& db) {
            sstring gstate;
            for (auto& cf : db.get_column_families() | boost::adaptors::filtered(non_system_filter)) {
-                stat s = rates.at(cf.first);
+                auto it = rates.find(cf.first);
+                if (it == rates.end()) { // a table may be added before map/reduce compltes and this code runs
+                    continue;
+                }
+                stat s = it->second;
                float rate = 0;
                if (s.h) {
                    rate = s.h / (s.h + s.m);
--- a/service/pager/query_pagers.cc
+++ b/service/pager/query_pagers.cc
@@ -83,7 +83,7 @@ private:
            _last_replicas = state->get_last_replicas();
        } else {
            // Reusing readers is currently only supported for singular queries.
-            if (_ranges.front().is_singular()) {
+            if (!_ranges.empty() && query::is_single_partition(_ranges.front())) {
                _cmd->query_uuid = utils::make_random_uuid();
            }
            _cmd->is_first_page = true;
--- a/service/storage_proxy.cc
+++ b/service/storage_proxy.cc
@@ -457,7 +457,7 @@ storage_proxy::response_id_type storage_proxy::register_response_handler(shared_
            // we are here because either cl was achieved, but targets left in the handler are not
            // responding, so a hint should be written for them, or cl == any in which case
            // hints are counted towards consistency, so we need to write hints and count how much was written
-            auto hints = hint_to_dead_endpoints(e.handler->_mutation_holder, e.handler->get_targets(), e.handler->_type, e.handler->get_trace_state());
+            auto hints = hint_to_dead_endpoints(e.handler->_mutation_holder, e.handler->get_targets(), e.handler->get_trace_state());
            e.handler->signal(hints);
            if (e.handler->_cl == db::consistency_level::ANY && hints) {
                slogger.trace("Wrote hint to satisfy CL.ANY after no replicas acknowledged the write");
@@ -665,17 +665,15 @@ storage_proxy::storage_proxy(distributed<database>& db, stdx::optional<std::vect

    });

-    _hints_enabled_for_user_writes = bool(hinted_handoff_enabled);
-    if (!hinted_handoff_enabled) {
-        hinted_handoff_enabled.emplace();
-    }
-    supervisor::notify("creating hints manager");
-    slogger.trace("hinted DCs: {}", *hinted_handoff_enabled);
+    if (hinted_handoff_enabled) {
+        supervisor::notify("creating hints manager");
+        slogger.trace("hinted DCs: {}", *hinted_handoff_enabled);

-    const db::config& cfg = _db.local().get_config();
-    // Give each hints manager 10% of the available disk space. Give each shard an equal share of the available space.
-    db::hints::resource_manager::max_shard_disk_space_size = boost::filesystem::space(cfg.hints_directory().c_str()).capacity / (10 * smp::count);
-    _hints_manager.emplace(cfg.hints_directory(), *hinted_handoff_enabled, cfg.max_hint_window_in_ms(), _db);
+        const db::config& cfg = _db.local().get_config();
+        // Give each hints manager 10% of the available disk space. Give each shard an equal share of the available space.
+        db::hints::manager::max_shard_disk_space_size = boost::filesystem::space(cfg.hints_directory().c_str()).capacity / (10 * smp::count);
+        _hints_manager.emplace(cfg.hints_directory(), *hinted_handoff_enabled, cfg.max_hint_window_in_ms(), _db);
+    }
 }

 storage_proxy::rh_entry::rh_entry(shared_ptr<abstract_write_response_handler>&& h, std::function<void()>&& cb) : handler(std::move(h)), expire_timer(std::move(cb)) {}
@@ -1160,7 +1158,7 @@ storage_proxy::create_write_response_handler(const mutation& m, db::consistency_

    auto all = boost::range::join(natural_endpoints, pending_endpoints);

-    if (cannot_hint(all, type)) {
+    if (std::find_if(all.begin(), all.end(), std::bind1st(std::mem_fn(&storage_proxy::cannot_hint), this)) != all.end()) {
        // avoid OOMing due to excess hints.  we need to do this check even for "live" nodes, since we can
        // still generate hints for those if it's overloaded or simply dead but not yet known-to-be-dead.
        // The idea is that if we have over maxHintsInProgress hints in flight, this is probably due to
@@ -1204,7 +1202,7 @@ void
 storage_proxy::hint_to_dead_endpoints(response_id_type id, db::consistency_level cl) {
    auto& h = *get_write_response_handler(id);

-    size_t hints = hint_to_dead_endpoints(h._mutation_holder, h.get_dead_endpoints(), h._type, h.get_trace_state());
+    size_t hints = hint_to_dead_endpoints(h._mutation_holder, h.get_dead_endpoints(), h.get_trace_state());

    if (cl == db::consistency_level::ANY) {
        // for cl==ANY hints are counted towards consistency
@@ -1560,41 +1558,21 @@ storage_proxy::mutate_atomically(std::vector<mutation> mutations, db::consistenc
    });
 }

-template<typename Range>
-bool storage_proxy::cannot_hint(const Range& targets, db::write_type type) {
+bool storage_proxy::cannot_hint(gms::inet_address target) {
    // if hints are disabled we "can always hint" since there's going to be no hint generated in this case
-    return hints_enabled(type) && boost::algorithm::any_of(targets, std::bind(&db::hints::manager::too_many_in_flight_hints_for, &*_hints_manager, std::placeholders::_1));
+    return hints_enabled() && _hints_manager->too_many_in_flight_hints_for(target);
 }

-future<> storage_proxy::send_to_endpoint(
-        mutation m,
-        gms::inet_address target,
-        std::vector<gms::inet_address> pending_endpoints,
-        db::write_type type) {
+future<> storage_proxy::send_to_endpoint(mutation m, gms::inet_address target, db::write_type type) {
    utils::latency_counter lc;
    lc.start();

-    // View updates use consistency level ANY in order to fall back to hinted handoff in case of a failed update
-    db::consistency_level cl = (type == db::write_type::VIEW) ? db::consistency_level::ANY : db::consistency_level::ONE;
-    std::unordered_set<gms::inet_address> targets(pending_endpoints.begin(), pending_endpoints.end());
-    targets.insert(std::move(target));
-    return mutate_prepare(std::array<mutation, 1>{std::move(m)}, cl, type,
-        [this, targets = std::move(targets), pending_endpoints = std::move(pending_endpoints)] (
-                const mutation& m,
-                db::consistency_level cl,
-                db::write_type type) mutable {
+    return mutate_prepare(std::array<mutation, 1>{std::move(m)}, db::consistency_level::ONE, type,
+        [this, target] (const mutation& m, db::consistency_level cl, db::write_type type) {
            auto& ks = _db.local().find_keyspace(m.schema()->ks_name());
-            return create_write_response_handler(
-                    ks,
-                    cl,
-                    type,
-                    std::make_unique<shared_mutation>(m),
-                    std::move(targets),
-                    pending_endpoints,
-                    { },
-                    nullptr);
-        }).then([this, cl] (std::vector<unique_response_handler> ids) {
-            return mutate_begin(std::move(ids), cl);
+            return create_write_response_handler(ks, cl, type, std::make_unique<shared_mutation>(m), {target}, {}, {}, nullptr);
+        }).then([this] (std::vector<unique_response_handler> ids) {
+            return mutate_begin(std::move(ids), db::consistency_level::ONE);
        }).then_wrapped([p = shared_from_this(), lc] (future<>&& f) {
            return p->mutate_end(std::move(f), lc, nullptr);
        });
@@ -1715,9 +1693,9 @@ void storage_proxy::send_to_live_endpoints(storage_proxy::response_id_type respo

 // returns number of hints stored
 template<typename Range>
-size_t storage_proxy::hint_to_dead_endpoints(std::unique_ptr<mutation_holder>& mh, const Range& targets, db::write_type type, tracing::trace_state_ptr tr_state) noexcept
+size_t storage_proxy::hint_to_dead_endpoints(std::unique_ptr<mutation_holder>& mh, const Range& targets, tracing::trace_state_ptr tr_state) noexcept
 {
-    if (hints_enabled(type)) {
+    if (hints_enabled()) {
        return boost::count_if(targets, [this, &mh, tr_state = std::move(tr_state)] (gms::inet_address target) mutable -> bool {
            return _hints_manager->store_hint(target, mh->schema(), mh->get_mutation_for(target), tr_state);
        });
@@ -3242,9 +3220,22 @@ storage_proxy::query_partition_key_range(lw_shared_ptr<query::read_command> cmd,
    slogger.debug("Estimated result rows per range: {}; requested rows: {}, ranges.size(): {}; concurrent range requests: {}",
            result_rows_per_range, cmd->row_limit, ranges.size(), concurrency_factor);

+    // The call to `query_partition_key_range_concurrent()` below
+    // updates `cmd` directly when processing the results. Under
+    // some circumstances, when the query executes without deferring,
+    // this updating will happen before the lambda object is constructed
+    // and hence the updates will be visible to the lambda. This will
+    // result in the merger below trimming the results according to the
+    // updated (decremented) limits and causing the paging logic to
+    // declare the query exhausted due to the non-full page. To avoid
+    // this save the original values of the limits here and pass these
+    // to the lambda below.
+    const auto row_limit = cmd->row_limit;
+    const auto partition_limit = cmd->partition_limit;
+
    return query_partition_key_range_concurrent(timeout, std::move(results), cmd, cl, ranges.begin(), std::move(ranges), concurrency_factor,
                                                std::move(trace_state), cmd->row_limit, cmd->partition_limit)
-            .then([row_limit = cmd->row_limit, partition_limit = cmd->partition_limit](std::vector<foreign_ptr<lw_shared_ptr<query::result>>> results) {
+            .then([row_limit, partition_limit](std::vector<foreign_ptr<lw_shared_ptr<query::result>>> results) {
        query::result_merger merger(row_limit, partition_limit);
        merger.reserve(results.size());

@@ -3573,8 +3564,8 @@ get_restricted_ranges(locator::token_metadata& tm, const schema& s, dht::partiti
    return ranges;
 }

-bool storage_proxy::hints_enabled(db::write_type type) noexcept {
-    return _hints_enabled_for_user_writes || (type == db::write_type::VIEW && bool(_hints_manager));
+bool storage_proxy::hints_enabled() noexcept {
+    return bool(_hints_manager);
 }

 future<> storage_proxy::truncate_blocking(sstring keyspace, sstring cfname) {
@@ -3607,6 +3598,7 @@ future<> storage_proxy::truncate_blocking(sstring keyspace, sstring cfname) {
           std::rethrow_exception(ep);
       } catch (rpc::timeout_error& e) {
           slogger.trace("Truncation of {} timed out: {}", cfname, e.what());
+           throw;
       } catch (...) {
           throw;
       }
--- a/service/storage_proxy.hh
+++ b/service/storage_proxy.hh
@@ -221,7 +221,6 @@ private:
    // just skip an entry if request no longer exists.
    circular_buffer<response_id_type> _throttled_writes;
    stdx::optional<db::hints::manager> _hints_manager;
-    bool _hints_enabled_for_user_writes = false;
    stats _stats;
    static constexpr float CONCURRENT_SUBREQUESTS_MARGIN = 0.10;
    // for read repair chance calculation
@@ -248,11 +247,10 @@ private:
    response_id_type create_write_response_handler(const std::unordered_map<gms::inet_address, std::experimental::optional<mutation>>&, db::consistency_level cl, db::write_type type, tracing::trace_state_ptr tr_state);
    void send_to_live_endpoints(response_id_type response_id, clock_type::time_point timeout);
    template<typename Range>
-    size_t hint_to_dead_endpoints(std::unique_ptr<mutation_holder>& mh, const Range& targets, db::write_type type, tracing::trace_state_ptr tr_state) noexcept;
+    size_t hint_to_dead_endpoints(std::unique_ptr<mutation_holder>& mh, const Range& targets, tracing::trace_state_ptr tr_state) noexcept;
    void hint_to_dead_endpoints(response_id_type, db::consistency_level);
-    template<typename Range>
-    bool cannot_hint(const Range& targets, db::write_type type);
-    bool hints_enabled(db::write_type type) noexcept;
+    bool cannot_hint(gms::inet_address target);
+    bool hints_enabled() noexcept;
    std::vector<gms::inet_address> get_live_endpoints(keyspace& ks, const dht::token& token);
    std::vector<gms::inet_address> get_live_sorted_endpoints(keyspace& ks, const dht::token& token);
    db::read_repair_decision new_read_repair_decision(const schema& s);
@@ -382,7 +380,7 @@ public:
    // Inspired by Cassandra's StorageProxy.sendToHintedEndpoints but without
    // hinted handoff support, and just one target. See also
    // send_to_live_endpoints() - another take on the same original function.
-    future<> send_to_endpoint(mutation m, gms::inet_address target, std::vector<gms::inet_address> pending_endpoints, db::write_type type);
+    future<> send_to_endpoint(mutation m, gms::inet_address target, db::write_type type);

    /**
     * Performs the truncate operatoin, which effectively deletes all data from
--- a/service/storage_service.cc
+++ b/service/storage_service.cc
@@ -104,12 +104,11 @@ int get_generation_number() {
    return generation_number;
 }

-storage_service::storage_service(distributed<database>& db, sharded<auth::service>& auth_service, sharded<db::system_distributed_keyspace>& sys_dist_ks)
+storage_service::storage_service(distributed<database>& db, sharded<auth::service>& auth_service)
        : _db(db)
        , _auth_service(auth_service)
        , _replicate_action([this] { return do_replicate_to_all_cores(); })
-        , _update_pending_ranges_action([this] { return do_update_pending_ranges(); })
-        , _sys_dist_ks(sys_dist_ks) {
+        , _update_pending_ranges_action([this] { return do_update_pending_ranges(); }) {
    sstable_read_error.connect([this] { isolate_on_error(); });
    sstable_write_error.connect([this] { isolate_on_error(); });
    general_disk_error.connect([this] { isolate_on_error(); });
@@ -547,12 +546,6 @@ void storage_service::join_token_ring(int delay) {

        supervisor::notify("starting tracing");
        tracing::tracing::start_tracing().get();
-
-        supervisor::notify("starting system distributed keyspace");
-        _sys_dist_ks.start(
-                std::ref(cql3::get_query_processor()),
-                std::ref(service::get_migration_manager())).get();
-        _sys_dist_ks.invoke_on_all(&db::system_distributed_keyspace::start).get();
    } else {
        slogger.info("Startup complete, but write survey mode is active, not becoming an active ring member. Use JMX (StorageService->joinRing()) to finalize ring joining.");
    }
@@ -1261,9 +1254,6 @@ future<> storage_service::drain_on_shutdown() {
            tracing::tracing::tracing_instance().stop().get();
            slogger.info("Drain on shutdown: tracing is stopped");

-            ss._sys_dist_ks.invoke_on_all(&db::system_distributed_keyspace::stop).get();
-            slogger.info("Drain on shutdown: system distributed keyspace stopped");
-
            get_storage_proxy().invoke_on_all([] (storage_proxy& local_proxy) {
                return local_proxy.stop_hints_manager();
            }).get();
@@ -3428,18 +3418,5 @@ storage_service::get_natural_endpoints(const sstring& keyspace, const token& pos
    return _db.local().find_keyspace(keyspace).get_replication_strategy().get_natural_endpoints(pos);
 }

-future<std::unordered_map<sstring, sstring>>
-storage_service::view_build_statuses(sstring keyspace, sstring view_name) const {
-    return _sys_dist_ks.local().view_status(std::move(keyspace), std::move(view_name)).then([this] (std::unordered_map<utils::UUID, sstring> status) {
-        auto& endpoint_to_host_id = get_token_metadata().get_endpoint_to_host_id_map_for_reading();
-        return boost::copy_range<std::unordered_map<sstring, sstring>>(endpoint_to_host_id
-                | boost::adaptors::transformed([&status] (const std::pair<inet_address, utils::UUID>& p) {
-                    auto it = status.find(p.second);
-                    auto s = it != status.end() ? std::move(it->second) : "UNKNOWN";
-                    return std::pair(p.first.to_sstring(), std::move(s));
-                }));
-    });
-}
-
 } // namespace service

--- a/service/storage_service.hh
+++ b/service/storage_service.hh
@@ -51,7 +51,6 @@
 #include "dht/token_range_endpoints.hh"
 #include "core/sleep.hh"
 #include "gms/application_state.hh"
-#include "db/system_distributed_keyspace.hh"
 #include "core/semaphore.hh"
 #include "utils/fb_utilities.hh"
 #include "utils/serialized_action.hh"
@@ -131,7 +130,7 @@ private:
    bool _ms_stopped = false;
    bool _stream_manager_stopped = false;
 public:
-    storage_service(distributed<database>& db, sharded<auth::service>&, sharded<db::system_distributed_keyspace>&);
+    storage_service(distributed<database>& db, sharded<auth::service>&);
    void isolate_on_error();
    void isolate_on_commit_error();

@@ -728,7 +727,6 @@ private:
    future<> do_replicate_to_all_cores();
    serialized_action _replicate_action;
    serialized_action _update_pending_ranges_action;
-    sharded<db::system_distributed_keyspace>& _sys_dist_ks;
 private:
    /**
     * Replicates token_metadata contents on shard0 instance to other shards.
@@ -2023,8 +2021,6 @@ public:
    }
 #endif

-    future<std::unordered_map<sstring, sstring>> view_build_statuses(sstring keyspace, sstring view_name) const;
-
 private:
    /**
     * Seed data to the endpoints that will be responsible for it at the future
@@ -2302,8 +2298,8 @@ public:
    }
 };

-inline future<> init_storage_service(distributed<database>& db, sharded<auth::service>& auth_service, sharded<db::system_distributed_keyspace>& sys_dist_ks) {
-    return service::get_storage_service().start(std::ref(db), std::ref(auth_service), std::ref(sys_dist_ks));
+inline future<> init_storage_service(distributed<database>& db, sharded<auth::service>& auth_service) {
+    return service::get_storage_service().start(std::ref(db), std::ref(auth_service));
 }

 inline future<> deinit_storage_service() {
--- a/sstables/compress.cc
+++ b/sstables/compress.cc
@@ -33,6 +33,7 @@
 #include "unimplemented.hh"
 #include "stdx.hh"
 #include "segmented_compress_params.hh"
+#include "utils/class_registrator.hh"

 namespace sstables {

@@ -299,7 +300,8 @@ size_t local_compression::compress_max_size(size_t input_len) const {

 void compression::set_compressor(compressor_ptr c) {
    if (c) {
-        auto& cn = c->name();
+        unqualified_name uqn(compressor::namespace_prefix, c->name());
+        const sstring& cn = uqn;
        name.value = bytes(cn.begin(), cn.end());
        for (auto& p : c->options()) {
            if (p.first != compression_parameters::SSTABLE_COMPRESSION) {
--- a/sstables/size_tiered_compaction_strategy.hh
+++ b/sstables/size_tiered_compaction_strategy.hh
@@ -294,6 +294,12 @@ size_tiered_compaction_strategy::get_sstables_for_compaction(column_family& cfs,
        return sstables::compaction_descriptor(std::move(most_interesting));
    }

+    // If we are not enforcing min_threshold explicitly, try any pair of SStables in the same tier.
+    if (!cfs.compaction_enforce_min_threshold() && is_any_bucket_interesting(buckets, 2)) {
+        std::vector<sstables::shared_sstable> most_interesting = most_interesting_bucket(std::move(buckets), 2, max_threshold);
+        return sstables::compaction_descriptor(std::move(most_interesting));
+    }
+
    // if there is no sstable to compact in standard way, try compacting single sstable whose droppable tombstone
    // ratio is greater than threshold.
    // prefer oldest sstables from biggest size tiers because they will be easier to satisfy conditions for
--- a/test.py
+++ b/test.py
@@ -40,7 +40,6 @@ boost_tests = [
    'mutation_reader_test',
    'serialized_action_test',
    'cql_query_test',
-    'secondary_index_test',
    'storage_proxy_test',
    'schema_change_test',
    'sstable_mutation_test',
@@ -87,8 +86,6 @@ boost_tests = [
    'counter_test',
    'cell_locker_test',
    'view_schema_test',
-    'view_build_test',
-    'view_complex_test',
    'clustering_ranges_walker_test',
    'vint_serialization_test',
    'duration_test',
--- a/tests/aggregate_fcts_test.cc
+++ b/tests/aggregate_fcts_test.cc
@@ -215,3 +215,22 @@ SEASTAR_TEST_CASE(test_aggregate_count) {
        }
    });
 }
+
+SEASTAR_TEST_CASE(test_reverse_type_aggregation) {
+    return do_with_cql_env_thread([&] (auto& e) {
+        e.execute_cql("CREATE TABLE test(p int, c timestamp, v int, primary key (p, c)) with clustering order by (c desc)").get();
+        e.execute_cql("INSERT INTO test(p, c, v) VALUES (1, 1, 1)").get();
+        e.execute_cql("INSERT INTO test(p, c, v) VALUES (1, 2, 1)").get();
+
+        {
+            auto tp = db_clock::from_time_t({ 0 }) + std::chrono::milliseconds(1);
+            auto msg = e.execute_cql("SELECT min(c) FROM test").get0();
+            assert_that(msg).is_rows().with_size(1).with_row({{timestamp_type->decompose(tp)}});
+        }
+        {
+            auto tp = db_clock::from_time_t({ 0 }) + std::chrono::milliseconds(2);
+            auto msg = e.execute_cql("SELECT max(c) FROM test").get0();
+            assert_that(msg).is_rows().with_size(1).with_row({{timestamp_type->decompose(tp)}});
+        }
+    });
+}
--- a/tests/cql_assertions.cc
+++ b/tests/cql_assertions.cc
@@ -54,15 +54,6 @@ rows_assertions::is_empty() {
    return {*this};
 }

-rows_assertions
-rows_assertions::is_not_empty() {
-    auto row_count = _rows->rs().size();
-    if (row_count == 0) {
-        fail("Expected some rows, but was result was empty");
-    }
-    return {*this};
-}
-
 rows_assertions
 rows_assertions::with_row(std::initializer_list<bytes_opt> values) {
    std::vector<bytes_opt> expected_row(values);
--- a/tests/cql_assertions.hh
+++ b/tests/cql_assertions.hh
@@ -33,7 +33,6 @@ public:
    rows_assertions(shared_ptr<cql_transport::messages::result_message::rows> rows);
    rows_assertions with_size(size_t size);
    rows_assertions is_empty();
-    rows_assertions is_not_empty();
    rows_assertions with_row(std::initializer_list<bytes_opt> values);

    // Verifies that the result has the following rows and only that rows, in that order.
--- a/tests/cql_query_test.cc
+++ b/tests/cql_query_test.cc
@@ -81,29 +81,6 @@ SEASTAR_TEST_CASE(test_create_table_with_id_statement) {
    });
 }

-SEASTAR_TEST_CASE(test_drop_table_with_si_and_mv) {
-    return do_with_cql_env([](cql_test_env& e) {
-        return seastar::async([&e] {
-            e.execute_cql("CREATE TABLE tbl (a int, b int, c float, PRIMARY KEY (a))").get();
-            e.execute_cql("CREATE INDEX idx1 ON tbl (b)").get();
-            e.execute_cql("CREATE INDEX idx2 ON tbl (c)").get();
-            e.execute_cql("CREATE MATERIALIZED VIEW tbl_view AS SELECT c FROM tbl WHERE c IS NOT NULL PRIMARY KEY (c, a)").get();
-            // dropping a table with materialized views is prohibited
-            assert_that_failed(e.execute_cql("DROP TABLE tbl"));
-            e.execute_cql("DROP MATERIALIZED VIEW tbl_view").get();
-            // dropping a table with secondary indexes is fine
-            e.execute_cql("DROP TABLE tbl").get();
-
-            e.execute_cql("CREATE TABLE tbl (a int, b int, c float, PRIMARY KEY (a))").get();
-            e.execute_cql("CREATE INDEX idx1 ON tbl (b)").get();
-            e.execute_cql("CREATE INDEX idx2 ON tbl (c)").get();
-            e.execute_cql("CREATE MATERIALIZED VIEW tbl_view AS SELECT c FROM tbl WHERE c IS NOT NULL PRIMARY KEY (c, a)").get();
-            // dropping whole keyspace with MV and SI is fine too
-            e.execute_cql("DROP KEYSPACE ks").get();
-        });
-    });
-}
-
 SEASTAR_TEST_CASE(test_insert_statement) {
    return do_with_cql_env([] (cql_test_env& e) {
        return e.execute_cql("create table cf (p1 varchar, c1 int, r1 int, PRIMARY KEY (p1, c1));").discard_result().then([&e] {
@@ -2070,10 +2047,9 @@ SEASTAR_TEST_CASE(test_in_restriction) {
            assert_that(msg).is_rows().with_size(0);
            return e.execute_cql("select r1 from tir where p1 in (2, 0, 2, 1);");
        }).then([&e] (shared_ptr<cql_transport::messages::result_message> msg) {
-            assert_that(msg).is_rows().with_rows({
+            assert_that(msg).is_rows().with_rows_ignore_order({
                {int32_type->decompose(4)},
                {int32_type->decompose(0)},
-                {int32_type->decompose(4)},
                {int32_type->decompose(1)},
                {int32_type->decompose(2)},
                {int32_type->decompose(3)},
@@ -2095,6 +2071,22 @@ SEASTAR_TEST_CASE(test_in_restriction) {
                {int32_type->decompose(2)},
                {int32_type->decompose(1)},
            });
+            return e.prepare("select r1 from tir where p1 in ?");
+        }).then([&e] (cql3::prepared_cache_key_type prepared_id){
+            auto my_list_type = list_type_impl::get_instance(int32_type, true);
+            std::vector<cql3::raw_value> raw_values;
+            auto in_values_list = my_list_type->decompose(make_list_value(my_list_type,
+                    list_type_impl::native_type{{int(2), int(0), int(2), int(1)}}));
+            raw_values.emplace_back(cql3::raw_value::make_value(in_values_list));
+            return e.execute_prepared(prepared_id,raw_values);
+        }).then([&e] (shared_ptr<cql_transport::messages::result_message> msg) {
+            assert_that(msg).is_rows().with_rows_ignore_order({
+                {int32_type->decompose(4)},
+                {int32_type->decompose(0)},
+                {int32_type->decompose(1)},
+                {int32_type->decompose(2)},
+                {int32_type->decompose(3)},
+            });
        });
    });
 }
@@ -2538,6 +2530,54 @@ SEASTAR_TEST_CASE(test_pg_style_string_literal) {
    });
 }

+SEASTAR_TEST_CASE(test_secondary_index_regular_column_query) {
+    return do_with_cql_env([] (cql_test_env& e) {
+        return e.execute_cql("CREATE TABLE users (userid int, name text, email text, country text, PRIMARY KEY (userid));").discard_result().then([&e] {
+            return e.execute_cql("CREATE INDEX ON users (email);").discard_result();
+        }).then([&e] {
+            return e.execute_cql("CREATE INDEX ON users (country);").discard_result();
+        }).then([&e] {
+            return e.execute_cql("INSERT INTO users (userid, name, email, country) VALUES (0, 'Bondie Easseby', 'beassebyv@house.gov', 'France');").discard_result();
+        }).then([&e] {
+            return e.execute_cql("INSERT INTO users (userid, name, email, country) VALUES (1, 'Demetri Curror', 'dcurrorw@techcrunch.com', 'France');").discard_result();
+        }).then([&e] {
+            return e.execute_cql("INSERT INTO users (userid, name, email, country) VALUES (2, 'Langston Paulisch', 'lpaulischm@reverbnation.com', 'United States');").discard_result();
+        }).then([&e] {
+            return e.execute_cql("INSERT INTO users (userid, name, email, country) VALUES (3, 'Channa Devote', 'cdevote14@marriott.com', 'Denmark');").discard_result();
+        }).then([&e] {
+            return e.execute_cql("SELECT email FROM users WHERE country = 'France';");
+        }).then([&e] (shared_ptr<cql_transport::messages::result_message> msg) {
+            assert_that(msg).is_rows().with_rows({
+                { utf8_type->decompose(sstring("beassebyv@house.gov")) },
+                { utf8_type->decompose(sstring("dcurrorw@techcrunch.com")) },
+            });
+        });
+    });
+}
+
+SEASTAR_TEST_CASE(test_secondary_index_clustering_key_query) {
+    return do_with_cql_env([] (cql_test_env& e) {
+        return e.execute_cql("CREATE TABLE users (userid int, name text, email text, country text, PRIMARY KEY (userid, country));").discard_result().then([&e] {
+            return e.execute_cql("CREATE INDEX ON users (country);").discard_result();
+        }).then([&e] {
+            return e.execute_cql("INSERT INTO users (userid, name, email, country) VALUES (0, 'Bondie Easseby', 'beassebyv@house.gov', 'France');").discard_result();
+        }).then([&e] {
+            return e.execute_cql("INSERT INTO users (userid, name, email, country) VALUES (1, 'Demetri Curror', 'dcurrorw@techcrunch.com', 'France');").discard_result();
+        }).then([&e] {
+            return e.execute_cql("INSERT INTO users (userid, name, email, country) VALUES (2, 'Langston Paulisch', 'lpaulischm@reverbnation.com', 'United States');").discard_result();
+        }).then([&e] {
+            return e.execute_cql("INSERT INTO users (userid, name, email, country) VALUES (3, 'Channa Devote', 'cdevote14@marriott.com', 'Denmark');").discard_result();
+        }).then([&e] {
+            return e.execute_cql("SELECT email FROM users WHERE country = 'France';");
+        }).then([&e] (auto msg) {
+            assert_that(msg).is_rows().with_rows({
+                { utf8_type->decompose(sstring("beassebyv@house.gov")) },
+                { utf8_type->decompose(sstring("dcurrorw@techcrunch.com")) },
+            });
+        });
+    });
+}
+
 SEASTAR_TEST_CASE(test_insert_large_collection_values) {
    return do_with_cql_env([] (cql_test_env& e) {
        return seastar::async([&e] {
@@ -2582,3 +2622,81 @@ SEASTAR_TEST_CASE(test_insert_large_collection_values) {
        });
    });
 }
+
+// Corner-case test that checks for the paging code's preparedness for an empty
+// range list.
+SEASTAR_TEST_CASE(test_empty_partition_range_scan) {
+    return do_with_cql_env_thread([] (cql_test_env& e) {
+        e.execute_cql("create keyspace empty_partition_range_scan with replication = {'class': 'SimpleStrategy', 'replication_factor': 1};").get();
+        e.execute_cql("create table empty_partition_range_scan.tb (a int, b int, c int, val int, PRIMARY KEY ((a,b),c) );").get();
+
+
+        auto qo = std::make_unique<cql3::query_options>(db::consistency_level::LOCAL_ONE, std::vector<cql3::raw_value>{},
+                cql3::query_options::specific_options{1, nullptr, {}, api::new_timestamp()});
+        auto res = e.execute_cql("select * from empty_partition_range_scan.tb where token (a,b) > 1 and token(a,b) <= 1;", std::move(qo)).get0();
+        assert_that(res).is_rows().is_empty();
+    });
+}
+
+SEASTAR_TEST_CASE(test_static_multi_cell_static_lists_with_ckey) {
+    return do_with_cql_env_thread([] (cql_test_env& e) {
+        e.execute_cql("CREATE TABLE t (p int, c int, slist list<int> static, v int, PRIMARY KEY (p, c));").get();
+        e.execute_cql("INSERT INTO t (p, c, slist, v) VALUES (1, 1, [1], 1); ").get();
+
+        {
+            e.execute_cql("UPDATE t SET slist[0] = 3, v = 3 WHERE p = 1 AND c = 1;").get();
+            auto msg = e.execute_cql("SELECT slist, v FROM t WHERE p = 1 AND c = 1;").get0();
+            auto slist_type = list_type_impl::get_instance(int32_type, true);
+            assert_that(msg).is_rows().with_row({
+                { slist_type->decompose(make_list_value(slist_type, list_type_impl::native_type({{3}}))) },
+                { int32_type->decompose(3) }
+            });
+        }
+        {
+            e.execute_cql("UPDATE t SET slist = [4], v = 4 WHERE p = 1 AND c = 1;").get();
+            auto msg = e.execute_cql("SELECT slist, v FROM t WHERE p = 1 AND c = 1;").get0();
+            auto slist_type = list_type_impl::get_instance(int32_type, true);
+            assert_that(msg).is_rows().with_row({
+                { slist_type->decompose(make_list_value(slist_type, list_type_impl::native_type({{4}}))) },
+                { int32_type->decompose(4) }
+            });
+        }
+        {
+            e.execute_cql("UPDATE t SET slist = [3] + slist , v = 5 WHERE p = 1 AND c = 1;").get();
+            auto msg = e.execute_cql("SELECT slist, v FROM t WHERE p = 1 AND c = 1;").get0();
+            auto slist_type = list_type_impl::get_instance(int32_type, true);
+            assert_that(msg).is_rows().with_row({
+                { slist_type->decompose(make_list_value(slist_type, list_type_impl::native_type({3, 4}))) },
+                { int32_type->decompose(5) }
+            });
+        }
+        {
+            e.execute_cql("UPDATE t SET slist = slist + [5] , v = 6 WHERE p = 1 AND c = 1;").get();
+            auto msg = e.execute_cql("SELECT slist, v FROM t WHERE p = 1 AND c = 1;").get0();
+            auto slist_type = list_type_impl::get_instance(int32_type, true);
+            assert_that(msg).is_rows().with_row({
+                { slist_type->decompose(make_list_value(slist_type, list_type_impl::native_type({3, 4, 5}))) },
+                { int32_type->decompose(6) }
+            });
+        }
+        {
+            e.execute_cql("DELETE slist[2] from t WHERE p = 1;").get();
+            auto msg = e.execute_cql("SELECT slist, v FROM t WHERE p = 1 AND c = 1;").get0();
+            auto slist_type = list_type_impl::get_instance(int32_type, true);
+            assert_that(msg).is_rows().with_row({
+                { slist_type->decompose(make_list_value(slist_type, list_type_impl::native_type({3, 4}))) },
+                { int32_type->decompose(6) }
+            });
+        }
+        {
+            e.execute_cql("UPDATE t SET slist = slist - [4] , v = 7 WHERE p = 1 AND c = 1;").get();
+            auto msg = e.execute_cql("SELECT slist, v FROM t WHERE p = 1 AND c = 1;").get0();
+            auto slist_type = list_type_impl::get_instance(int32_type, true);
+            assert_that(msg).is_rows().with_row({
+                { slist_type->decompose(make_list_value(slist_type, list_type_impl::native_type({3}))) },
+                { int32_type->decompose(7) }
+            });
+        }
+    });
+}
+
--- a/tests/cql_test_env.cc
+++ b/tests/cql_test_env.cc
@@ -39,7 +39,6 @@
 #include "tmpdir.hh"
 #include "db/query_context.hh"
 #include "test_services.hh"
-#include "db/view/view_builder.hh"

 // TODO: remove (#293)
 #include "message/messaging_service.hh"
@@ -48,7 +47,6 @@
 #include "service/storage_service.hh"
 #include "auth/service.hh"
 #include "db/system_keyspace.hh"
-#include "db/system_distributed_keyspace.hh"

 namespace sstables {

@@ -90,7 +88,6 @@ public:
 private:
    ::shared_ptr<distributed<database>> _db;
    ::shared_ptr<sharded<auth::service>> _auth_service;
-    ::shared_ptr<sharded<db::view::view_builder>> _view_builder;
    lw_shared_ptr<tmpdir> _data_dir;
 private:
    struct core_local_state {
@@ -115,13 +112,7 @@ private:
        return ::make_shared<service::query_state>(_core_local.local().client_state);
    }
 public:
-    single_node_cql_env(
-            ::shared_ptr<distributed<database>> db,
-            ::shared_ptr<sharded<auth::service>> auth_service,
-            ::shared_ptr<sharded<db::view::view_builder>> view_builder)
-            : _db(db)
-            , _auth_service(std::move(auth_service))
-            , _view_builder(std::move(view_builder))
+    single_node_cql_env(::shared_ptr<distributed<database>> db, ::shared_ptr<sharded<auth::service>> auth_service) : _db(db), _auth_service(std::move(auth_service))
    { }

    virtual future<::shared_ptr<cql_transport::messages::result_message>> execute_cql(const sstring& text) override {
@@ -264,10 +255,6 @@ public:
        return _auth_service->local();
    }

-    virtual db::view::view_builder& local_view_builder() override {
-        return _view_builder->local();
-    }
-
    future<> start() {
        return _core_local.start(std::ref(*_auth_service));
    }
@@ -307,14 +294,12 @@ public:
                cfg->data_file_directories() = {data_dir.path};
            }
            cfg->commitlog_directory() = data_dir.path + "/commitlog.dir";
-            cfg->hints_directory() = data_dir.path + "/hints.dir";
            cfg->num_tokens() = 256;
            cfg->ring_delay_ms() = 500;
            cfg->experimental() = true;
            cfg->shutdown_announce_in_ms() = 0;
            boost::filesystem::create_directories((data_dir.path + "/system").c_str());
            boost::filesystem::create_directories(cfg->commitlog_directory().c_str());
-            boost::filesystem::create_directories(cfg->hints_directory().c_str());

            const gms::inet_address listen("127.0.0.1");
            auto& ms = netw::get_messaging_service();
@@ -322,11 +307,9 @@ public:
            auto stop_ms = defer([&ms] { ms.stop().get(); });

            auto auth_service = ::make_shared<sharded<auth::service>>();
-            auto sys_dist_ks = seastar::sharded<db::system_distributed_keyspace>();
-            auto stop_sys_dist_ks = defer([&sys_dist_ks] { sys_dist_ks.stop().get(); });

            auto& ss = service::get_storage_service();
-            ss.start(std::ref(*db), std::ref(*auth_service), std::ref(sys_dist_ks)).get();
+            ss.start(std::ref(*db), std::ref(*auth_service)).get();
            auto stop_storage_service = defer([&ss] { ss.stop().get(); });

            db->start(std::move(*cfg), database_config()).get();
@@ -385,13 +368,6 @@ public:
                auth_service->stop().get();
            });

-            auto view_builder = ::make_shared<seastar::sharded<db::view::view_builder>>();
-            view_builder->start(std::ref(*db), std::ref(sys_dist_ks), std::ref(mm)).get();
-            view_builder->invoke_on_all(&db::view::view_builder::start).get();
-            auto stop_view_builder = defer([view_builder] {
-                view_builder->stop().get();
-            });
-
            // Create the testing user.
            try {
                auth::role_config config;
@@ -407,7 +383,7 @@ public:
                // The default user may already exist if this `cql_test_env` is starting with previously populated data.
            }

-            single_node_cql_env env(db, auth_service, view_builder);
+            single_node_cql_env env(db, auth_service);
            env.start().get();
            auto stop_env = defer([&env] { env.stop().get(); });

@@ -446,13 +422,12 @@ future<> do_with_cql_env_thread(std::function<void(cql_test_env&)> func) {
 class storage_service_for_tests::impl {
    distributed<database> _db;
    sharded<auth::service> _auth_service;
-    sharded<db::system_distributed_keyspace> _sys_dist_ks;
 public:
    impl() {
        auto thread = seastar::thread_impl::get();
        assert(thread);
-        netw::get_messaging_service().start(gms::inet_address("127.0.0.1"), 7000, false).get();
-        service::get_storage_service().start(std::ref(_db), std::ref(_auth_service), std::ref(_sys_dist_ks)).get();
+        netw::get_messaging_service().start(gms::inet_address("127.0.0.1")).get();
+        service::get_storage_service().start(std::ref(_db), std::ref(_auth_service)).get();
        service::get_storage_service().invoke_on_all([] (auto& ss) {
            ss.enable_all_features();
        }).get();
--- a/tests/cql_test_env.hh
+++ b/tests/cql_test_env.hh
@@ -38,10 +38,6 @@

 class database;

-namespace db::view {
-class view_builder;
-}
-
 namespace auth {
 class service;
 }
@@ -99,28 +95,9 @@ public:
    virtual distributed<cql3::query_processor> & qp() = 0;

    virtual auth::service& local_auth_service() = 0;
-
-    virtual db::view::view_builder& local_view_builder() = 0;
 };

 future<> do_with_cql_env(std::function<future<>(cql_test_env&)> func);
 future<> do_with_cql_env(std::function<future<>(cql_test_env&)> func, const db::config&);
 future<> do_with_cql_env_thread(std::function<void(cql_test_env&)> func);
 future<> do_with_cql_env_thread(std::function<void(cql_test_env&)> func, const db::config&);
-
-template<typename EventuallySucceedingFunction>
-static void eventually(EventuallySucceedingFunction&& f, size_t max_attempts = 10) {
-    size_t attempts = 0;
-    while (true) {
-        try {
-            f();
-            break;
-        } catch (...) {
-            if (++attempts < max_attempts) {
-                sleep(std::chrono::milliseconds(1 << attempts)).get0();
-            } else {
-                throw;
-            }
-        }
-    }
-}
--- a/tests/database_test.cc
+++ b/tests/database_test.cc
@@ -29,6 +29,9 @@
 #include "database.hh"
 #include "partition_slice_builder.hh"
 #include "frozen_mutation.hh"
+#include "mutation_source_test.hh"
+#include "schema_registry.hh"
+#include "service/migration_manager.hh"

 SEASTAR_TEST_CASE(test_querying_with_limits) {
    return do_with_cql_env([](cql_test_env& e) {
@@ -74,3 +77,33 @@ SEASTAR_TEST_CASE(test_querying_with_limits) {
        });
    });
 }
+
+SEASTAR_THREAD_TEST_CASE(test_database_with_data_in_sstables_is_a_mutation_source) {
+    do_with_cql_env([] (cql_test_env& e) {
+        run_mutation_source_tests([&] (schema_ptr s, const std::vector<mutation>& partitions) -> mutation_source {
+            try {
+                e.local_db().find_column_family(s->ks_name(), s->cf_name());
+                service::get_local_migration_manager().announce_column_family_drop(s->ks_name(), s->cf_name(), true).get();
+            } catch (const no_such_column_family&) {
+                // expected
+            }
+            service::get_local_migration_manager().announce_new_column_family(s, true).get();
+            column_family& cf = e.local_db().find_column_family(s);
+            for (auto&& m : partitions) {
+                e.local_db().apply(cf.schema(), freeze(m)).get();
+            }
+            cf.flush().get();
+            cf.get_row_cache().invalidate([] {}).get();
+            return mutation_source([&] (schema_ptr s,
+                    const dht::partition_range& range,
+                    const query::partition_slice& slice,
+                    const io_priority_class& pc,
+                    tracing::trace_state_ptr trace_state,
+                    streamed_mutation::forwarding fwd,
+                    mutation_reader::forwarding fwd_mr) {
+                return cf.make_reader(s, range, slice, pc, std::move(trace_state), fwd, fwd_mr);
+            });
+        });
+        return make_ready_future<>();
+    }).get();
+}
--- a/tests/gossip.cc
+++ b/tests/gossip.cc
@@ -22,7 +22,6 @@

 #include "core/reactor.hh"
 #include "core/app-template.hh"
-#include "db/system_distributed_keyspace.hh"
 #include "message/messaging_service.hh"
 #include "gms/failure_detector.hh"
 #include "gms/gossiper.hh"
@@ -70,8 +69,7 @@ int main(int ac, char ** av) {
        utils::fb_utilities::set_broadcast_rpc_address(listen);
        auto vv = std::make_shared<gms::versioned_value::factory>();
        locator::i_endpoint_snitch::create_snitch("SimpleSnitch").then([&auth_service, &db] {
-            sharded<db::system_distributed_keyspace> sys_dist_ks;
-            return service::init_storage_service(db, auth_service, sys_dist_ks);
+            return service::init_storage_service(db, auth_service);
        }).then([vv, listen, config] {
            return netw::get_messaging_service().start(listen);
        }).then([config] {
--- a/tests/gossip_test.cc
+++ b/tests/gossip_test.cc
@@ -31,16 +31,14 @@
 #include "service/storage_service.hh"
 #include "core/distributed.hh"
 #include "database.hh"
-#include "db/system_distributed_keyspace.hh"

 SEASTAR_TEST_CASE(test_boot_shutdown){
    return seastar::async([] {
        distributed<database> db;
        sharded<auth::service> auth_service;
-        sharded<db::system_distributed_keyspace> sys_dist_ks;
        utils::fb_utilities::set_broadcast_address(gms::inet_address("127.0.0.1"));
        locator::i_endpoint_snitch::create_snitch("SimpleSnitch").get();
-        service::get_storage_service().start(std::ref(db), std::ref(auth_service), std::ref(sys_dist_ks)).get();
+        service::get_storage_service().start(std::ref(db), std::ref(auth_service)).get();
        db.start().get();
        netw::get_messaging_service().start(gms::inet_address("127.0.0.1")).get();
        gms::get_failure_detector().start().get();
--- a/tests/mutation_query_test.cc
+++ b/tests/mutation_query_test.cc
@@ -26,11 +26,13 @@

 #include <boost/test/unit_test.hpp>
 #include <query-result-set.hh>
+#include <query-result-writer.hh>

 #include "tests/test_services.hh"
 #include "tests/test-utils.hh"
 #include "tests/mutation_assertions.hh"
 #include "tests/result_set_assertions.hh"
+#include "tests/mutation_source_test.hh"

 #include "mutation_query.hh"
 #include "core/do_with.hh"
@@ -525,3 +527,22 @@ SEASTAR_TEST_CASE(test_partition_limit) {
        }
    });
 }
+
+SEASTAR_THREAD_TEST_CASE(test_result_size_calculation) {
+    random_mutation_generator gen(random_mutation_generator::generate_counters::no);
+    std::vector<mutation> mutations = gen(1);
+    schema_ptr s = gen.schema();
+    mutation_source source = make_source(std::move(mutations));
+    query::result_memory_limiter l;
+    query::partition_slice slice = make_full_slice(*s);
+    slice.options.set<query::partition_slice::option::allow_short_read>();
+
+    query::result::builder digest_only_builder(slice, query::result_options{query::result_request::only_digest, query::digest_algorithm::xxHash}, l.new_digest_read(query::result_memory_limiter::maximum_result_size).get0());
+    data_query(s, source, query::full_partition_range, slice, std::numeric_limits<uint32_t>::max(), std::numeric_limits<uint32_t>::max(), gc_clock::now(), digest_only_builder).get0();
+
+    query::result::builder result_and_digest_builder(slice, query::result_options{query::result_request::result_and_digest, query::digest_algorithm::xxHash}, l.new_data_read(query::result_memory_limiter::maximum_result_size).get0());
+    data_query(s, source, query::full_partition_range, slice, std::numeric_limits<uint32_t>::max(), std::numeric_limits<uint32_t>::max(), gc_clock::now(), result_and_digest_builder).get0();
+
+    BOOST_REQUIRE_EQUAL(digest_only_builder.memory_accounter().used_memory(), result_and_digest_builder.memory_accounter().used_memory());
+}
+
--- a/tests/mutation_source_test.cc
+++ b/tests/mutation_source_test.cc
@@ -659,6 +659,46 @@ void test_mutation_reader_fragments_have_monotonic_positions(populate_fn populat
    });
 }

+static void test_date_tiered_clustering_slicing(populate_fn populate) {
+    BOOST_TEST_MESSAGE(__PRETTY_FUNCTION__);
+
+    simple_schema ss;
+
+    auto s = schema_builder(ss.schema())
+        .set_compaction_strategy(sstables::compaction_strategy_type::date_tiered)
+        .build();
+
+    auto pkey = ss.make_pkey();
+
+    mutation m1(s, pkey);
+    ss.add_static_row(m1, "s");
+    m1.partition().apply(ss.new_tombstone());
+    ss.add_row(m1, ss.make_ckey(0), "v1");
+
+    mutation_source ms = populate(s, {m1});
+
+    // query row outside the range of existing rows to exercise sstable clustering key filter
+    {
+        auto slice = partition_slice_builder(*s)
+            .with_range(ss.make_ckey_range(1, 2))
+            .build();
+        auto prange = dht::partition_range::make_singular(pkey);
+        assert_that(ms.make_reader(s, prange, slice))
+            .produces(m1, slice.row_ranges(*s, pkey.key()))
+            .produces_end_of_stream();
+    }
+
+    {
+        auto slice = partition_slice_builder(*s)
+            .with_range(query::clustering_range::make_singular(ss.make_ckey(0)))
+            .build();
+        auto prange = dht::partition_range::make_singular(pkey);
+        assert_that(ms.make_reader(s, prange, slice))
+            .produces(m1)
+            .produces_end_of_stream();
+    }
+}
+
 static void test_clustering_slices(populate_fn populate) {
    BOOST_TEST_MESSAGE(__PRETTY_FUNCTION__);
    auto s = schema_builder("ks", "cf")
@@ -1012,6 +1052,7 @@ void test_slicing_with_overlapping_range_tombstones(populate_fn populate) {
 }

 void run_mutation_reader_tests(populate_fn populate) {
+    test_date_tiered_clustering_slicing(populate);
    test_fast_forwarding_across_partitions_to_empty_range(populate);
    test_clustering_slices(populate);
    test_mutation_reader_fragments_have_monotonic_positions(populate);
--- a/Show More
+++ b/Show More