release: prepare for 1.3.5

main: fix exception handling when initializing data or commitlog dirs
Exception handling was broken because after io checker, storage_io_error exception is wrapped around system error exceptions. Also the message when handling exception wasn't precise enough for all cases. For example, lack of permission to write to existing data directory. Fixes #883. Signed-off-by: Raphael S. Carvalho <raphaelsc@scylladb.com> Message-Id: <b2dc75010a06f16ab1b676ce905ae12e930a700a.1478542388.git.raphaelsc@scylladb.com> (cherry picked from commit 9a9f0d3a0f)
2016-11-29 09:47:38 +02:00 · 2016-11-16 15:13:44 +02:00 · 2016-11-16 13:08:41 +00:00 · 2016-11-16 15:04:24 +02:00 · 2016-11-16 12:54:16 +00:00 · 2016-11-11 10:16:14 +02:00
192 changed files with 2452 additions and 6443 deletions
--- a/README.md
+++ b/README.md
@@ -8,14 +8,14 @@ In addition to required packages by Seastar, the following packages are required
 Scylla uses submodules, so make sure you pull the submodules first by doing:
 ```
 git submodule init
-git submodule update --init --recursive
+git submodule update --recursive
 ```

 ### Building and Running Scylla on Fedora
 * Installing required packages:

 ```
-sudo dnf install yaml-cpp-devel lz4-devel zlib-devel snappy-devel jsoncpp-devel thrift-devel antlr3-tool antlr3-C++-devel libasan libubsan gcc-c++ gnutls-devel ninja-build ragel libaio-devel cryptopp-devel xfsprogs-devel numactl-devel hwloc-devel libpciaccess-devel libxml2-devel python3-pyparsing lksctp-tools-devel protobuf-devel protobuf-compiler systemd-devel libunwind-devel
+sudo yum install yaml-cpp-devel lz4-devel zlib-devel snappy-devel jsoncpp-devel thrift-devel antlr3-tool antlr3-C++-devel libasan libubsan gcc-c++ gnutls-devel ninja-build ragel libaio-devel cryptopp-devel xfsprogs-devel numactl-devel hwloc-devel libpciaccess-devel libxml2-devel python3-pyparsing lksctp-tools-devel
 ```

 * Build Scylla
--- a/2
+++ b/2
@@ -1,6 +1,6 @@
 #!/bin/sh

-VERSION=1.4.3
+VERSION=1.3.5

 if test -f version
 then
--- a/api/api-doc/storage_proxy.json
+++ b/api/api-doc/storage_proxy.json
@@ -777,7 +777,7 @@
      ]
    },
    {
-      "path": "/storage_proxy/metrics/read/moving_average_histogram",
+      "path": "/storage_proxy/metrics/read/moving_avrage_histogram",
      "operations": [
        {
          "method": "GET",
@@ -792,7 +792,7 @@
      ]
    },
    {
-      "path": "/storage_proxy/metrics/range/moving_average_histogram",
+      "path": "/storage_proxy/metrics/range/moving_avrage_histogram",
      "operations": [
        {
          "method": "GET",
@@ -942,7 +942,7 @@
      ]
    },
    {
-      "path": "/storage_proxy/metrics/write/moving_average_histogram",
+      "path": "/storage_proxy/metrics/write/moving_avrage_histogram",
      "operations": [
        {
          "method": "GET",
--- a/api/api-doc/storage_service.json
+++ b/api/api-doc/storage_service.json
@@ -1736,57 +1736,6 @@
            }
         ]
      },
-      {
-         "path":"/storage_service/slow_query",
-         "operations":[
-            {
-               "method":"POST",
-               "summary":"Set slow query parameter",
-               "type":"void",
-               "nickname":"set_slow_query",
-               "produces":[
-                  "application/json"
-               ],
-               "parameters":[
-                  {
-                     "name":"enable",
-                     "description":"set it to true to enable, anything else to disable",
-                     "required":false,
-                     "allowMultiple":false,
-                     "type":"boolean",
-                     "paramType":"query"
-                  },
-                  {
-                     "name":"ttl",
-                     "description":"TTL in seconds",
-                     "required":false,
-                     "allowMultiple":false,
-                     "type":"long",
-                     "paramType":"query"
-                  },
-                  {
-                     "name":"threshold",
-                     "description":"Slow query record threshold in microseconds",
-                     "required":false,
-                     "allowMultiple":false,
-                     "type":"long",
-                     "paramType":"query"
-                  }
-               ]
-            },
-            {
-               "method":"GET",
-               "summary":"Returns the slow query record configuration.",
-               "type":"slow_query_info",
-               "nickname":"get_slow_query_info",
-               "produces":[
-                  "application/json"
-               ],
-               "parameters":[
-               ]
-            }
-         ]
-      },
      {
         "path":"/storage_service/auto_compaction/{keyspace}",
         "operations":[
@@ -2184,24 +2133,6 @@
            }
         }
      },
-      "slow_query_info": {
-         "id":"slow_query_info",
-         "description":"Slow query triggering information",
-         "properties":{
-            "enable":{
-               "type":"boolean",
-               "description":"Is slow query logging enable or disable"
-            },
-            "ttl":{
-               "type":"long",
-               "description":"The slow query TTL in seconds"
-            },
-            "threshold":{
-               "type":"long",
-               "description":"The slow query logging threshold in microseconds. Queries that takes longer, will be logged"
-            }
-         }
-      },
      "endpoint_detail":{
         "id":"endpoint_detail",
         "description":"Endpoint detail",
--- a/api/api.hh
+++ b/api/api.hh
@@ -116,7 +116,6 @@ inline
 httpd::utils_json::histogram to_json(const utils::ihistogram& val) {
    httpd::utils_json::histogram h;
    h = val;
-    h.sum = val.estimated_sum();
    return h;
 }

@@ -130,7 +129,7 @@ httpd::utils_json::rate_moving_average meter_to_json(const utils::rate_moving_av
 inline
 httpd::utils_json::rate_moving_average_and_histogram timer_to_json(const utils::rate_moving_average_and_histogram& val) {
    httpd::utils_json::rate_moving_average_and_histogram h;
-    h.hist = to_json(val.hist);
+    h.hist = val.hist;
    h.meter = meter_to_json(val.rate);
    return h;
 }
--- a/api/column_family.cc
+++ b/api/column_family.cc
@@ -24,7 +24,7 @@
 #include <vector>
 #include "http/exception.hh"
 #include "sstables/sstables.hh"
-#include "utils/estimated_histogram.hh"
+#include "sstables/estimated_histogram.hh"
 #include <algorithm>

 namespace api {
@@ -403,14 +403,14 @@ void set_column_family(http_context& ctx, routes& r) {
    });

    cf::get_estimated_row_size_histogram.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return map_reduce_cf(ctx, req->param["name"], utils::estimated_histogram(0), [](column_family& cf) {
-            utils::estimated_histogram res(0);
+        return map_reduce_cf(ctx, req->param["name"], sstables::estimated_histogram(0), [](column_family& cf) {
+            sstables::estimated_histogram res(0);
            for (auto i: *cf.get_sstables() ) {
                res.merge(i->get_stats_metadata().estimated_row_size);
            }
            return res;
        },
-        utils::estimated_histogram_merge, utils_json::estimated_histogram());
+        sstables::merge, utils_json::estimated_histogram());
    });

    cf::get_estimated_row_count.set(r, [&ctx] (std::unique_ptr<request> req) {
@@ -425,14 +425,14 @@ void set_column_family(http_context& ctx, routes& r) {
    });

    cf::get_estimated_column_count_histogram.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return map_reduce_cf(ctx, req->param["name"], utils::estimated_histogram(0), [](column_family& cf) {
-            utils::estimated_histogram res(0);
+        return map_reduce_cf(ctx, req->param["name"], sstables::estimated_histogram(0), [](column_family& cf) {
+            sstables::estimated_histogram res(0);
            for (auto i: *cf.get_sstables() ) {
                res.merge(i->get_stats_metadata().estimated_column_count);
            }
            return res;
        },
-        utils::estimated_histogram_merge, utils_json::estimated_histogram());
+        sstables::merge, utils_json::estimated_histogram());
    });

    cf::get_all_compression_ratio.set(r, [] (std::unique_ptr<request> req) {
@@ -807,10 +807,10 @@ void set_column_family(http_context& ctx, routes& r) {
    });

    cf::get_sstables_per_read_histogram.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return map_reduce_cf(ctx, req->param["name"], utils::estimated_histogram(0), [](column_family& cf) {
+        return map_reduce_cf(ctx, req->param["name"], sstables::estimated_histogram(0), [](column_family& cf) {
            return cf.get_stats().estimated_sstable_per_read;
        },
-        utils::estimated_histogram_merge, utils_json::estimated_histogram());
+        sstables::merge, utils_json::estimated_histogram());
    });

    cf::get_tombstone_scanned_histogram.set(r, [&ctx] (std::unique_ptr<request> req) {
@@ -869,17 +869,17 @@ void set_column_family(http_context& ctx, routes& r) {
    });

    cf::get_read_latency_estimated_histogram.set(r, [&ctx](std::unique_ptr<request> req) {
-        return map_reduce_cf(ctx, req->param["name"], utils::estimated_histogram(0), [](column_family& cf) {
+        return map_reduce_cf(ctx, req->param["name"], sstables::estimated_histogram(0), [](column_family& cf) {
            return cf.get_stats().estimated_read;
        },
-        utils::estimated_histogram_merge, utils_json::estimated_histogram());
+        sstables::merge, utils_json::estimated_histogram());
    });

    cf::get_write_latency_estimated_histogram.set(r, [&ctx](std::unique_ptr<request> req) {
-        return map_reduce_cf(ctx, req->param["name"], utils::estimated_histogram(0), [](column_family& cf) {
+        return map_reduce_cf(ctx, req->param["name"], sstables::estimated_histogram(0), [](column_family& cf) {
            return cf.get_stats().estimated_write;
        },
-        utils::estimated_histogram_merge, utils_json::estimated_histogram());
+        sstables::merge, utils_json::estimated_histogram());
    });

    cf::set_compaction_strategy_class.set(r, [&ctx](std::unique_ptr<request> req) {
--- a/api/storage_proxy.cc
+++ b/api/storage_proxy.cc
@@ -52,9 +52,9 @@ static future<json::json_return_type>  sum_timed_rate_as_long(distributed<proxy>
    });
 }

-static future<json::json_return_type>  sum_estimated_histogram(http_context& ctx, utils::estimated_histogram proxy::stats::*f) {
-    return ctx.sp.map_reduce0([f](const proxy& p) {return p.get_stats().*f;}, utils::estimated_histogram(),
-            utils::estimated_histogram_merge).then([](const utils::estimated_histogram& val) {
+static future<json::json_return_type>  sum_estimated_histogram(http_context& ctx, sstables::estimated_histogram proxy::stats::*f) {
+    return ctx.sp.map_reduce0([f](const proxy& p) {return p.get_stats().*f;}, sstables::estimated_histogram(),
+            sstables::merge).then([](const sstables::estimated_histogram& val) {
        utils_json::estimated_histogram res;
        res = val;
        return make_ready_future<json::json_return_type>(res);
--- a/api/storage_service.cc
+++ b/api/storage_service.cc
@@ -681,37 +681,6 @@ void set_storage_service(http_context& ctx, routes& r) {
        return make_ready_future<json::json_return_type>(tracing::tracing::get_local_tracing_instance().get_trace_probability());
    });

-    ss::get_slow_query_info.set(r, [](const_req req) {
-        ss::slow_query_info res;
-        res.enable = tracing::tracing::get_local_tracing_instance().slow_query_tracing_enabled();
-        res.ttl = tracing::tracing::get_local_tracing_instance().slow_query_record_ttl().count() ;
-        res.threshold = tracing::tracing::get_local_tracing_instance().slow_query_threshold().count();
-        return res;
-    });
-
-    ss::set_slow_query.set(r, [](std::unique_ptr<request> req) {
-        auto enable = req->get_query_param("enable");
-        auto ttl = req->get_query_param("ttl");
-        auto threshold = req->get_query_param("threshold");
-        try {
-            return tracing::tracing::tracing_instance().invoke_on_all([enable, ttl, threshold] (auto& local_tracing) {
-                if (threshold != "") {
-                    local_tracing.set_slow_query_threshold(std::chrono::microseconds(std::stol(threshold.c_str())));
-                }
-                if (ttl != "") {
-                    local_tracing.set_slow_query_record_ttl(std::chrono::seconds(std::stol(ttl.c_str())));
-                }
-                if (enable != "") {
-                    local_tracing.set_slow_query_enabled(strcasecmp(enable.c_str(), "true") == 0);
-                }
-            }).then([] {
-                return make_ready_future<json::json_return_type>(json_void());
-            });
-        } catch (...) {
-            throw httpd::bad_param_exception(sprint("Bad format value: "));
-        }
-    });
-
    ss::enable_auto_compaction.set(r, [&ctx](std::unique_ptr<request> req) {
        //TBD
        unimplemented();
--- a/atomic_cell.hh
+++ b/atomic_cell.hh
@@ -236,19 +236,11 @@ public:
    static atomic_cell make_live(api::timestamp_type timestamp, bytes_view value) {
        return atomic_cell_type::make_live(timestamp, value);
    }
-    static atomic_cell make_live(api::timestamp_type timestamp, const bytes& value) {
-        return make_live(timestamp, bytes_view(value));
-    }
    static atomic_cell make_live(api::timestamp_type timestamp, bytes_view value,
        gc_clock::time_point expiry, gc_clock::duration ttl)
    {
        return atomic_cell_type::make_live(timestamp, value, expiry, ttl);
    }
-    static atomic_cell make_live(api::timestamp_type timestamp, const bytes& value,
-                                 gc_clock::time_point expiry, gc_clock::duration ttl)
-    {
-        return make_live(timestamp, bytes_view(value), expiry, ttl);
-    }
    static atomic_cell make_live(api::timestamp_type timestamp, bytes_view value, ttl_opt ttl) {
        if (!ttl) {
            return atomic_cell_type::make_live(timestamp, value);
--- a/bytes_ostream.hh
+++ b/bytes_ostream.hh
@@ -38,7 +38,6 @@ class bytes_ostream {
 public:
    using size_type = bytes::size_type;
    using value_type = bytes::value_type;
-    static constexpr size_type max_chunk_size = 16 * 1024;
 private:
    static_assert(sizeof(value_type) == 1, "value_type is assumed to be one byte long");
    struct chunk {
@@ -154,18 +153,19 @@ public:
    }

    bytes_ostream& operator=(const bytes_ostream& o) {
-        if (this != &o) {
-            auto x = bytes_ostream(o);
-            *this = std::move(x);
-        }
+        _size = 0;
+        _current = nullptr;
+        _begin = {};
+        append(o);
        return *this;
    }

    bytes_ostream& operator=(bytes_ostream&& o) noexcept {
-        if (this != &o) {
-            this->~bytes_ostream();
-            new (this) bytes_ostream(std::move(o));
-        }
+        _size = o._size;
+        _begin = std::move(o._begin);
+        _current = o._current;
+        o._current = nullptr;
+        o._size = 0;
        return *this;
    }

@@ -174,7 +174,7 @@ public:
        value_type* ptr;
        // makes the place_holder looks like a stream
        seastar::simple_output_stream get_stream() {
-            return seastar::simple_output_stream(reinterpret_cast<char*>(ptr), sizeof(T));
+            return seastar::simple_output_stream{reinterpret_cast<char*>(ptr)};
        }
    };

@@ -195,19 +195,19 @@ public:
        if (v.empty()) {
            return;
        }
-
-        auto this_size = std::min(v.size(), size_t(current_space_left()));
-        if (this_size) {
-            memcpy(_current->data + _current->offset, v.begin(), this_size);
-            _current->offset += this_size;
-            _size += this_size;
-            v.remove_prefix(this_size);
-        }
-
-        while (!v.empty()) {
-            auto this_size = std::min(v.size(), size_t(max_chunk_size));
-            std::copy_n(v.begin(), this_size, alloc(this_size));
-            v.remove_prefix(this_size);
+        auto space_left = current_space_left();
+        if (v.size() <= space_left) {
+            memcpy(_current->data + _current->offset, v.begin(), v.size());
+            _current->offset += v.size();
+            _size += v.size();
+        } else {
+            if (space_left) {
+                memcpy(_current->data + _current->offset, v.begin(), space_left);
+                _current->offset += space_left;
+                _size += space_left;
+                v.remove_prefix(space_left);
+            }
+            memcpy(alloc(v.size()), v.begin(), v.size());
        }
    }

@@ -272,8 +272,13 @@ public:
    }

    void append(const bytes_ostream& o) {
-        for (auto&& bv : o.fragments()) {
-            write(bv);
+        if (o.size() > 0) {
+            auto dst = alloc(o.size());
+            auto r = o._begin.get();
+            while (r) {
+                dst = std::copy_n(r->data, r->offset, dst);
+                r = r->next.get();
+            }
        }
    }

@@ -323,45 +328,6 @@ public:
        _current->next = nullptr;
        _current->offset = pos._offset;
    }
-
-    void reduce_chunk_count() {
-        // FIXME: This is a simplified version. It linearizes the whole buffer
-        // if its size is below max_chunk_size. We probably could also gain
-        // some read performance by doing "real" reduction, i.e. merging
-        // all chunks until all but the last one is max_chunk_size.
-        if (size() < max_chunk_size) {
-            linearize();
-        }
-    }
-
-    bool operator==(const bytes_ostream& other) const {
-        auto as = fragments().begin();
-        auto as_end = fragments().end();
-        auto bs = other.fragments().begin();
-        auto bs_end = other.fragments().end();
-
-        auto a = *as++;
-        auto b = *bs++;
-        while (!a.empty() || !b.empty()) {
-            auto now = std::min(a.size(), b.size());
-            if (!std::equal(a.begin(), a.begin() + now, b.begin(), b.begin() + now)) {
-                return false;
-            }
-            a.remove_prefix(now);
-            if (a.empty() && as != as_end) {
-                a = *as++;
-            }
-            b.remove_prefix(now);
-            if (b.empty() && bs != bs_end) {
-                b = *bs++;
-            }
-        }
-        return true;
-    }
-
-    bool operator!=(const bytes_ostream& other) const {
-        return !(*this == other);
-    }
 };

 template<>
--- a/clustering_key_filter.cc
+++ b/clustering_key_filter.cc
@@ -0,0 +1,138 @@
+/*
+ * Copyright (C) 2016 ScyllaDB
+ *
+ * Modified by ScyllaDB
+ */
+
+/*
+ * This file is part of Scylla.
+ *
+ * Scylla is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Scylla is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "clustering_key_filter.hh"
+#include "keys.hh"
+#include "query-request.hh"
+#include "range.hh"
+
+namespace query {
+
+const clustering_row_ranges&
+clustering_key_filtering_context::get_ranges(const partition_key& key) const {
+    static thread_local clustering_row_ranges full_range = {{}};
+    return _factory ? _factory->get_ranges(key) : full_range;
+}
+
+clustering_key_filtering_context clustering_key_filtering_context::create_no_filtering() {
+    return clustering_key_filtering_context{};
+}
+
+const clustering_key_filtering_context no_clustering_key_filtering =
+    clustering_key_filtering_context::create_no_filtering();
+
+class stateless_clustering_key_filter_factory : public clustering_key_filter_factory {
+    clustering_key_filter _filter;
+    clustering_row_ranges _ranges;
+public:
+    stateless_clustering_key_filter_factory(clustering_row_ranges&& ranges,
+                                    clustering_key_filter&& filter)
+        : _filter(std::move(filter)), _ranges(std::move(ranges)) {}
+
+    virtual clustering_key_filter get_filter(const partition_key& key) override {
+        return _filter;
+    }
+
+    virtual clustering_key_filter get_filter_for_sorted(const partition_key& key) override {
+        return _filter;
+    }
+
+    virtual const clustering_row_ranges& get_ranges(const partition_key& key) override {
+        return _ranges;
+    }
+
+    virtual bool want_static_columns(const partition_key& key) override {
+        return true;
+    }
+};
+
+class partition_slice_clustering_key_filter_factory : public clustering_key_filter_factory {
+    schema_ptr _schema;
+    const partition_slice& _slice;
+    clustering_key_prefix::prefix_equal_tri_compare _cmp;
+    clustering_row_ranges _ck_ranges;
+public:
+    partition_slice_clustering_key_filter_factory(schema_ptr s, const partition_slice& slice)
+        : _schema(std::move(s)), _slice(slice), _cmp(*_schema) {}
+
+    virtual clustering_key_filter get_filter(const partition_key& key) override {
+        const clustering_row_ranges& ranges = _slice.row_ranges(*_schema, key);
+        return [this, &ranges] (const clustering_key& key) {
+            return std::any_of(std::begin(ranges), std::end(ranges),
+                [this, &key] (const clustering_range& r) { return r.contains(key, _cmp); });
+        };
+    }
+
+    virtual clustering_key_filter get_filter_for_sorted(const partition_key& key) override {
+        const clustering_row_ranges& ranges = _slice.row_ranges(*_schema, key);
+        return [this, &ranges] (const clustering_key& key) {
+            return std::any_of(std::begin(ranges), std::end(ranges),
+                [this, &key] (const clustering_range& r) { return r.contains(key, _cmp); });
+        };
+    }
+
+    virtual const clustering_row_ranges& get_ranges(const partition_key& key) override {
+        if (_slice.options.contains(query::partition_slice::option::reversed)) {
+            _ck_ranges = _slice.row_ranges(*_schema, key);
+            std::reverse(_ck_ranges.begin(), _ck_ranges.end());
+            return _ck_ranges;
+        }
+        return _slice.row_ranges(*_schema, key);
+    }
+
+    virtual bool want_static_columns(const partition_key& key) override {
+        return true;
+    }
+};
+
+static const shared_ptr<clustering_key_filter_factory>
+create_partition_slice_filter(schema_ptr s, const partition_slice& slice) {
+    return ::make_shared<partition_slice_clustering_key_filter_factory>(std::move(s), slice);
+}
+
+const clustering_key_filtering_context
+clustering_key_filtering_context::create(schema_ptr schema, const partition_slice& slice) {
+    static thread_local clustering_key_filtering_context accept_all = clustering_key_filtering_context(
+        ::make_shared<stateless_clustering_key_filter_factory>(clustering_row_ranges{{}},
+                                                       [](const clustering_key&) { return true; }));
+    static thread_local clustering_key_filtering_context reject_all = clustering_key_filtering_context(
+        ::make_shared<stateless_clustering_key_filter_factory>(clustering_row_ranges{},
+                                                       [](const clustering_key&) { return false; }));
+
+    if (slice.get_specific_ranges()) {
+        return clustering_key_filtering_context(create_partition_slice_filter(schema, slice));
+    }
+
+    const clustering_row_ranges& ranges = slice.default_row_ranges();
+
+    if (ranges.empty()) {
+        return reject_all;
+    }
+
+    if (ranges.size() == 1 && ranges[0].is_full()) {
+        return accept_all;
+    }
+    return clustering_key_filtering_context(create_partition_slice_filter(schema, slice));
+}
+
+}
--- a/clustering_key_filter.hh
+++ b/clustering_key_filter.hh
@@ -22,46 +22,61 @@
 */

 #pragma once
+#include <functional>
+#include <vector>

+#include "core/shared_ptr.hh"
+#include "database_fwd.hh"
 #include "schema.hh"
-#include "query-request.hh"
+
+template<typename T> class range;

 namespace query {

-class clustering_key_filter_ranges {
-    clustering_row_ranges _storage;
-    const clustering_row_ranges& _ref;
+class partition_slice;
+
+// A predicate that tells if a clustering key should be accepted.
+using clustering_key_filter = std::function<bool(const clustering_key&)>;
+
+// A factory for clustering key filter which can be reused for multiple clustering keys.
+class clustering_key_filter_factory {
 public:
-    clustering_key_filter_ranges(const clustering_row_ranges& ranges) : _ref(ranges) { }
-    struct reversed { };
-    clustering_key_filter_ranges(reversed, const clustering_row_ranges& ranges)
-        : _storage(ranges.rbegin(), ranges.rend()), _ref(_storage) { }
+    // Create a clustering key filter that can be used for multiple clustering keys with no restrictions.
+    virtual clustering_key_filter get_filter(const partition_key&) = 0;
+    // Create a clustering key filter that can be used for multiple clustering keys but they have to be sorted.
+    virtual clustering_key_filter get_filter_for_sorted(const partition_key&) = 0;
+    virtual const std::vector<range<clustering_key_prefix>>& get_ranges(const partition_key&) = 0;
+    // Whether we want to get the static row, in addition to the desired clustering rows
+    virtual bool want_static_columns(const partition_key&) = 0;

-    clustering_key_filter_ranges(clustering_key_filter_ranges&& other) noexcept
-        : _storage(std::move(other._storage))
-        , _ref(&other._ref == &other._storage ? _storage : other._ref)
-    { }
-
-    clustering_key_filter_ranges& operator=(clustering_key_filter_ranges&& other) noexcept {
-        if (this != &other) {
-            this->~clustering_key_filter_ranges();
-            new (this) clustering_key_filter_ranges(std::move(other));
-        }
-        return *this;
-    }
-
-    auto begin() const { return _ref.begin(); }
-    auto end() const { return _ref.end(); }
-    bool empty() const { return _ref.empty(); }
-    size_t size() const { return _ref.size(); }
-
-    static clustering_key_filter_ranges get_ranges(const schema& schema, const query::partition_slice& slice, const partition_key& key) {
-        const query::clustering_row_ranges& ranges = slice.row_ranges(schema, key);
-        if (slice.options.contains(query::partition_slice::option::reversed)) {
-            return clustering_key_filter_ranges(clustering_key_filter_ranges::reversed{}, ranges);
-        }
-        return clustering_key_filter_ranges(ranges);
-    }
+    virtual ~clustering_key_filter_factory() = default;
 };

+class clustering_key_filtering_context {
+private:
+    shared_ptr<clustering_key_filter_factory> _factory;
+    clustering_key_filtering_context() {};
+    clustering_key_filtering_context(shared_ptr<clustering_key_filter_factory> factory) : _factory(factory) {}
+public:
+    // Create a clustering key filter that can be used for multiple clustering keys with no restrictions.
+    clustering_key_filter get_filter(const partition_key& key) const {
+        return _factory ? _factory->get_filter(key) : [] (const clustering_key&) { return true; };
+    }
+    // Create a clustering key filter that can be used for multiple clustering keys but they have to be sorted.
+    clustering_key_filter get_filter_for_sorted(const partition_key& key) const {
+        return _factory ? _factory->get_filter_for_sorted(key) : [] (const clustering_key&) { return true; };
+    }
+    const std::vector<range<clustering_key_prefix>>& get_ranges(const partition_key& key) const;
+
+    bool want_static_columns(const partition_key& key)  const {
+        return _factory ? _factory->want_static_columns(key) : true;
+    }
+
+    static const clustering_key_filtering_context create(schema_ptr, const partition_slice&);
+
+    static clustering_key_filtering_context create_no_filtering();
+};
+
+extern const clustering_key_filtering_context no_clustering_key_filtering;
+
 }
--- a/compaction_strategy.hh
+++ b/compaction_strategy.hh
@@ -56,14 +56,11 @@ public:

    // Some strategies may look at the compacted and resulting sstables to
    // get some useful information for subsequent compactions.
-    void notify_completion(const std::vector<lw_shared_ptr<sstable>>& removed, const std::vector<lw_shared_ptr<sstable>>& added);
+    void notify_completion(schema_ptr schema, const std::vector<lw_shared_ptr<sstable>>& removed, const std::vector<lw_shared_ptr<sstable>>& added);

    // Return if parallel compaction is allowed by strategy.
    bool parallel_compaction() const;

-    // Return if optimization to rule out sstables based on clustering key filter should be applied.
-    bool use_clustering_key_filter() const;
-
    // An estimation of number of compaction for strategy to be satisfied.
    int64_t estimated_pending_compactions(column_family& cf) const;

--- a/conf/scylla.yaml
+++ b/conf/scylla.yaml
@@ -211,16 +211,16 @@ batch_size_warn_threshold_in_kb: 5
 #   increase system_auth keyspace replication factor if you use this authorizer.
 # authorizer: AllowAllAuthorizer

+###################################################
+## Not currently supported, reserved for future use
+###################################################
+
 # initial_token allows you to specify tokens manually.  While you can use # it with
 # vnodes (num_tokens > 1, above) -- in which case you should provide a 
 # comma-separated list -- it's primarily used when adding nodes # to legacy clusters 
 # that do not have vnodes enabled.
 # initial_token:

-###################################################
-## Not currently supported, reserved for future use
-###################################################
-
 # See http://wiki.apache.org/cassandra/HintedHandoff
 # May either be "true" or "false" to enable globally, or contain a list
 # of data centers to enable per-datacenter.
@@ -813,13 +813,3 @@ commitlog_total_space_in_mb: -1
 # freeing processor resources when there is other work to be done.
 #
 # defragment_memory_on_idle: true
-#
-# prometheus port
-# By default, Scylla opens prometheus API port on port 9180
-# setting the port to 0 will disable the prometheus API.
-# prometheus_port: 9180
-#
-# prometheus address
-# By default, Scylla binds all interfaces to the prometheus API
-# It is possible to restrict the listening address to a specific one
-# prometheus_address: 0.0.0.0
--- a/configure.py
+++ b/configure.py
@@ -220,9 +220,6 @@ scylla_tests = [
    'tests/range_tombstone_list_test',
    'tests/anchorless_list_test',
    'tests/database_test',
-    'tests/nonwrapping_range_test',
-    'tests/input_stream_test',
-    'tests/sstable_atomic_deletion_test',
 ]

 apps = [
@@ -302,6 +299,7 @@ scylla_core = (['database.cc',
                 'mutation_query.cc',
                 'key_reader.cc',
                 'keys.cc',
+                 'clustering_key_filter.cc',
                 'sstables/sstables.cc',
                 'sstables/compress.cc',
                 'sstables/row.cc',
@@ -310,7 +308,6 @@ scylla_core = (['database.cc',
                 'sstables/compaction.cc',
                 'sstables/compaction_strategy.cc',
                 'sstables/compaction_manager.cc',
-                 'sstables/atomic_deletion.cc',
                 'transport/event.cc',
                 'transport/event_notifier.cc',
                 'transport/server.cc',
@@ -428,7 +425,6 @@ scylla_core = (['database.cc',
                 'dht/i_partitioner.cc',
                 'dht/murmur3_partitioner.cc',
                 'dht/byte_ordered_partitioner.cc',
-                 'dht/random_partitioner.cc',
                 'dht/boot_strapper.cc',
                 'dht/range_streamer.cc',
                 'unimplemented.cc',
@@ -592,7 +588,6 @@ tests_not_using_seastar_test_framework = set([
    'tests/idl_test',
    'tests/range_tombstone_list_test',
    'tests/anchorless_list_test',
-    'tests/nonwrapping_range_test',
 ])

 for t in tests_not_using_seastar_test_framework:
@@ -607,7 +602,6 @@ for t in scylla_tests:
 deps['tests/sstable_test'] += ['tests/sstable_datafile_test.cc']

 deps['tests/bytes_ostream_test'] = ['tests/bytes_ostream_test.cc']
-deps['tests/input_stream_test'] = ['tests/input_stream_test.cc']
 deps['tests/UUID_test'] = ['utils/UUID_gen.cc', 'tests/UUID_test.cc']
 deps['tests/murmur_hash_test'] = ['bytes.cc', 'utils/murmur_hash.cc', 'tests/murmur_hash_test.cc']
 deps['tests/allocation_strategy_test'] = ['tests/allocation_strategy_test.cc', 'utils/logalloc.cc', 'utils/dynamic_bitset.cc']
--- a/cql3/query_options.hh
+++ b/cql3/query_options.hh
@@ -93,6 +93,12 @@ public:
                           specific_options options,
                           cql_serialization_format sf);

+    explicit query_options(db::consistency_level consistency,
+                           std::vector<std::vector<bytes_view_opt>> value_views,
+                           bool skip_metadata,
+                           specific_options options,
+                           cql_serialization_format sf);
+
    // Batch query_options constructor
    explicit query_options(query_options&&, std::vector<std::vector<bytes_view_opt>> value_views);

--- a/cql3/query_processor.cc
+++ b/cql3/query_processor.cc
@@ -64,8 +64,8 @@ class query_processor::internal_state {
    service::query_state _qs;
 public:
    internal_state()
-        : _qs(service::client_state{service::client_state::internal_tag()})
-    { }
+            : _qs(service::client_state{service::client_state::internal_tag()}) {
+    }
    operator service::query_state&() {
        return _qs;
    }
@@ -88,7 +88,7 @@ api::timestamp_type query_processor::next_timestamp() {
 }

 query_processor::query_processor(distributed<service::storage_proxy>& proxy,
-                                 distributed<database>& db)
+        distributed<database>& db)
    : _migration_subscriber{std::make_unique<migration_subscriber>(this)}
    , _proxy(proxy)
    , _db(db)
@@ -133,9 +133,8 @@ query_processor::process(const sstring_view& query_string, service::query_state&
 }

 future<::shared_ptr<result_message>>
-query_processor::process_statement(::shared_ptr<cql_statement> statement,
-                                   service::query_state& query_state,
-                                   const query_options& options)
+query_processor::process_statement(::shared_ptr<cql_statement> statement, service::query_state& query_state,
+        const query_options& options)
 {
 #if 0
        logger.trace("Process {} @CL.{}", statement, options.getConsistency());
@@ -146,7 +145,7 @@ query_processor::process_statement(::shared_ptr<cql_statement> statement,

        statement->validate(_proxy, client_state);

-        auto fut = make_ready_future<::shared_ptr<transport::messages::result_message>>();
+        future<::shared_ptr<transport::messages::result_message>> fut = make_ready_future<::shared_ptr<transport::messages::result_message>>();
        if (client_state.is_internal()) {
            fut = statement->execute_internal(_proxy, query_state, options);
        } else  {
@@ -171,9 +170,7 @@ query_processor::prepare(const std::experimental::string_view& query_string, ser
 }

 future<::shared_ptr<transport::messages::result_message::prepared>>
-query_processor::prepare(const std::experimental::string_view& query_string,
-                         const service::client_state& client_state,
-                         bool for_thrift)
+query_processor::prepare(const std::experimental::string_view& query_string, const service::client_state& client_state, bool for_thrift)
 {
    auto existing = get_stored_prepared_statement(query_string, client_state.get_raw_keyspace(), for_thrift);
    if (existing) {
@@ -189,9 +186,7 @@ query_processor::prepare(const std::experimental::string_view& query_string,
 }

 ::shared_ptr<transport::messages::result_message::prepared>
-query_processor::get_stored_prepared_statement(const std::experimental::string_view& query_string,
-                                               const sstring& keyspace,
-                                               bool for_thrift)
+query_processor::get_stored_prepared_statement(const std::experimental::string_view& query_string, const sstring& keyspace, bool for_thrift)
 {
    if (for_thrift) {
        auto statement_id = compute_thrift_id(query_string, keyspace);
@@ -211,10 +206,8 @@ query_processor::get_stored_prepared_statement(const std::experimental::string_v
 }

 future<::shared_ptr<transport::messages::result_message::prepared>>
-query_processor::store_prepared_statement(const std::experimental::string_view& query_string,
-                                          const sstring& keyspace,
-                                          ::shared_ptr<statements::prepared_statement> prepared,
-                                          bool for_thrift)
+query_processor::store_prepared_statement(const std::experimental::string_view& query_string, const sstring& keyspace,
+        ::shared_ptr<statements::prepared_statement> prepared, bool for_thrift)
 {
 #if 0
    // Concatenate the current keyspace so we don't mix prepared statements between keyspace (#5352).
@@ -308,7 +301,7 @@ query_processor::parse_statement(const sstring_view& query)

        if (!statement) {
            throw exceptions::syntax_exception("Parsing failed");
-        }
+        };
        return std::move(statement);
    } catch (const exceptions::recognition_exception& e) {
        throw exceptions::syntax_exception(sprint("Invalid or malformed CQL query string: %s", e.what()));
@@ -320,10 +313,10 @@ query_processor::parse_statement(const sstring_view& query)
    }
 }

-query_options query_processor::make_internal_options(::shared_ptr<statements::prepared_statement> p,
-                                                     const std::initializer_list<data_value>& values,
-                                                     db::consistency_level cl)
-{
+query_options query_processor::make_internal_options(
+                ::shared_ptr<statements::prepared_statement> p,
+                const std::initializer_list<data_value>& values,
+                db::consistency_level cl) {
    if (p->bound_names.size() != values.size()) {
        throw std::invalid_argument(sprint("Invalid number of values. Expecting %d but got %d", p->bound_names.size(), values.size()));
    }
@@ -342,8 +335,8 @@ query_options query_processor::make_internal_options(::shared_ptr<statements::pr
    return query_options(cl, bound_values);
 }

-::shared_ptr<statements::prepared_statement> query_processor::prepare_internal(const sstring& query_string)
-{
+::shared_ptr<statements::prepared_statement> query_processor::prepare_internal(
+        const sstring& query_string) {
    auto& p = _internal_statements[query_string];
    if (p == nullptr) {
        auto np = parse_statement(query_string)->prepare(_db.local());
@@ -353,10 +346,9 @@ query_options query_processor::make_internal_options(::shared_ptr<statements::pr
    return p;
 }

-future<::shared_ptr<untyped_result_set>>
-query_processor::execute_internal(const sstring& query_string,
-                                  const std::initializer_list<data_value>& values)
-{
+future<::shared_ptr<untyped_result_set>> query_processor::execute_internal(
+        const sstring& query_string,
+        const std::initializer_list<data_value>& values) {
    if (log.is_enabled(logging::log_level::trace)) {
        log.trace("execute_internal: \"{}\" ({})", query_string, ::join(", ", values));
    }
@@ -364,23 +356,22 @@ query_processor::execute_internal(const sstring& query_string,
    return execute_internal(p, values);
 }

-future<::shared_ptr<untyped_result_set>>
-query_processor::execute_internal(::shared_ptr<statements::prepared_statement> p,
-                                  const std::initializer_list<data_value>& values)
-{
+future<::shared_ptr<untyped_result_set>> query_processor::execute_internal(
+        ::shared_ptr<statements::prepared_statement> p,
+        const std::initializer_list<data_value>& values) {
    auto opts = make_internal_options(p, values);
-    return do_with(std::move(opts), [this, p = std::move(p)](auto& opts) {
-        return p->statement->execute_internal(_proxy, *_internal_state, opts).then([p](auto msg) {
-            return make_ready_future<::shared_ptr<untyped_result_set>>(::make_shared<untyped_result_set>(msg));
-        });
-    });
+    return do_with(std::move(opts),
+            [this, p = std::move(p)](query_options & opts) {
+                return p->statement->execute_internal(_proxy, *_internal_state, opts).then(
+                        [p](::shared_ptr<transport::messages::result_message> msg) {
+                            return make_ready_future<::shared_ptr<untyped_result_set>>(::make_shared<untyped_result_set>(msg));
+                        });
+            });
 }

-future<::shared_ptr<untyped_result_set>>
-query_processor::process(const sstring& query_string,
-                         db::consistency_level cl,
-                         const std::initializer_list<data_value>& values,
-                         bool cache)
+future<::shared_ptr<untyped_result_set>> query_processor::process(
+                const sstring& query_string,
+                db::consistency_level cl, const std::initializer_list<data_value>& values, bool cache)
 {
    auto p = cache ? prepare_internal(query_string) : parse_statement(query_string)->prepare(_db.local());
    if (!cache) {
@@ -389,24 +380,23 @@ query_processor::process(const sstring& query_string,
    return process(p, cl, values);
 }

-future<::shared_ptr<untyped_result_set>>
-query_processor::process(::shared_ptr<statements::prepared_statement> p,
-                         db::consistency_level cl,
-                         const std::initializer_list<data_value>& values)
+future<::shared_ptr<untyped_result_set>> query_processor::process(
+                ::shared_ptr<statements::prepared_statement> p,
+                db::consistency_level cl, const std::initializer_list<data_value>& values)
 {
    auto opts = make_internal_options(p, values, cl);
-    return do_with(std::move(opts), [this, p = std::move(p)](auto & opts) {
-        return p->statement->execute(_proxy, *_internal_state, opts).then([p](auto msg) {
-            return make_ready_future<::shared_ptr<untyped_result_set>>(::make_shared<untyped_result_set>(msg));
-        });
-    });
+    return do_with(std::move(opts),
+            [this, p = std::move(p)](query_options & opts) {
+                return p->statement->execute(_proxy, *_internal_state, opts).then(
+                        [p](::shared_ptr<transport::messages::result_message> msg) {
+                            return make_ready_future<::shared_ptr<untyped_result_set>>(::make_shared<untyped_result_set>(msg));
+                        });
+            });
 }

+
 future<::shared_ptr<transport::messages::result_message>>
-query_processor::process_batch(::shared_ptr<statements::batch_statement> batch,
-                               service::query_state& query_state,
-                               query_options& options)
-{
+query_processor::process_batch(::shared_ptr<statements::batch_statement> batch, service::query_state& query_state, query_options& options) {
    return batch->check_access(query_state.get_client_state()).then([this, &query_state, &options, batch] {
        batch->validate();
        batch->validate(_proxy, query_state.get_client_state());
--- a/cql3/restrictions/multi_column_restriction.hh
+++ b/cql3/restrictions/multi_column_restriction.hh
@@ -393,12 +393,12 @@ public:
            auto prefix = clustering_key_prefix::from_optional_exploded(*_schema, vals);
            return bounds_range_type::bound(prefix, is_inclusive(b));
        };
-        auto range = wrapping_range<clustering_key_prefix>(read_bound(statements::bound::START), read_bound(statements::bound::END));
+        auto range = bounds_range_type(read_bound(statements::bound::START), read_bound(statements::bound::END));
        auto bounds = bound_view::from_range(range);
        if (bound_view::compare(*_schema)(bounds.second, bounds.first)) {
            return { };
        }
-        return { bounds_range_type(std::move(range)) };
+        return { std::move(range) };
    }
 #if 0
        @Override
--- a/cql3/selection/selection.cc
+++ b/cql3/selection/selection.cc
@@ -207,7 +207,7 @@ protected:
    auto& columns = schema->all_columns_in_select_order();
    cds.reserve(columns.size());
    for (auto& c : columns) {
-        if (!schema->is_dense() || !c.is_regular() || !c.name().empty()) {
+        if (!c.is_compact_value() || !c.name().empty()) {
            cds.emplace_back(&c);
        }
    }
@@ -393,6 +393,9 @@ void result_set_builder::visitor::accept_new_row(
        case column_kind::regular_column:
            add_value(*def, row_iterator);
            break;
+        case column_kind::compact_column:
+            add_value(*def, row_iterator);
+            break;
        case column_kind::static_column:
            add_value(*def, static_row_iterator);
            break;
--- a/cql3/single_column_relation.cc
+++ b/cql3/single_column_relation.cc
@@ -95,7 +95,7 @@ single_column_relation::to_receivers(schema_ptr schema, const column_definition&
    using namespace statements::request_validations;
    auto receiver = column_def.column_specification;

-    if (schema->is_dense() && column_def.is_regular()) {
+    if (column_def.is_compact_value()) {
        throw exceptions::invalid_request_exception(sprint(
            "Predicates on the non-primary-key column (%s) of a COMPACT table are not yet supported", column_def.name_as_text()));
    }
--- a/cql3/statements/alter_keyspace_statement.cc
+++ b/cql3/statements/alter_keyspace_statement.cc
@@ -40,7 +40,6 @@
 */

 #include "alter_keyspace_statement.hh"
-#include "prepared_statement.hh"
 #include "service/migration_manager.hh"
 #include "db/system_keyspace.hh"
 #include "database.hh"
@@ -101,9 +100,3 @@ shared_ptr<transport::event::schema_change> cql3::statements::alter_keyspace_sta
                    transport::event::schema_change::change_type::UPDATED,
                    keyspace());
 }
-
-shared_ptr<cql3::statements::prepared_statement>
-cql3::statements::alter_keyspace_statement::prepare(database& db) {
-    return make_shared<prepared_statement>(make_shared<alter_keyspace_statement>(*this));
-}
-
--- a/cql3/statements/alter_keyspace_statement.hh
+++ b/cql3/statements/alter_keyspace_statement.hh
@@ -63,7 +63,6 @@ public:
    void validate(distributed<service::storage_proxy>& proxy, const service::client_state& state) override;
    future<bool> announce_migration(distributed<service::storage_proxy>& proxy, bool is_local_only) override;
    shared_ptr<transport::event::schema_change> change_event() override;
-    virtual shared_ptr<prepared> prepare(database& db) override;
 };

 }
--- a/cql3/statements/alter_table_statement.cc
+++ b/cql3/statements/alter_table_statement.cc
@@ -40,7 +40,6 @@
 */

 #include "cql3/statements/alter_table_statement.hh"
-#include "prepared_statement.hh"
 #include "service/migration_manager.hh"
 #include "validation.hh"
 #include "db/config.hh"
@@ -181,6 +180,7 @@ future<bool> alter_table_statement::announce_migration(distributed<service::stor
            }
            break;

+        case column_kind::compact_column:
        case column_kind::regular_column:
        case column_kind::static_column:
            // Thrift allows to change a column validator so CFMetaData.validateCompatibility will let it slide
@@ -273,11 +273,6 @@ shared_ptr<transport::event::schema_change> alter_table_statement::change_event(
        transport::event::schema_change::target_type::TABLE, keyspace(), column_family());
 }

-shared_ptr<cql3::statements::prepared_statement>
-cql3::statements::alter_table_statement::prepare(database& db) {
-    return make_shared<prepared_statement>(make_shared<alter_table_statement>(*this));
-}
-
 }

 }
--- a/cql3/statements/alter_table_statement.hh
+++ b/cql3/statements/alter_table_statement.hh
@@ -80,7 +80,6 @@ public:
    virtual void validate(distributed<service::storage_proxy>& proxy, const service::client_state& state) override;
    virtual future<bool> announce_migration(distributed<service::storage_proxy>& proxy, bool is_local_only) override;
    virtual shared_ptr<transport::event::schema_change> change_event() override;
-    virtual shared_ptr<prepared> prepare(database& db) override;
 };

 }
--- a/cql3/statements/alter_type_statement.cc
+++ b/cql3/statements/alter_type_statement.cc
@@ -39,7 +39,6 @@

 #include "cql3/statements/alter_type_statement.hh"
 #include "cql3/statements/create_type_statement.hh"
-#include "prepared_statement.hh"
 #include "schema_builder.hh"
 #include "service/migration_manager.hh"
 #include "boost/range/adaptor/map.hpp"
@@ -228,16 +227,6 @@ user_type alter_type_statement::renames::make_updated_type(database& db, user_ty
    return updated;
 }

-shared_ptr<cql3::statements::prepared_statement>
-alter_type_statement::add_or_alter::prepare(database& db) {
-    return make_shared<prepared_statement>(make_shared<alter_type_statement::add_or_alter>(*this));
-}
-
-shared_ptr<cql3::statements::prepared_statement>
-alter_type_statement::renames::prepare(database& db) {
-    return make_shared<prepared_statement>(make_shared<alter_type_statement::renames>(*this));
-}
-
 }

 }
--- a/cql3/statements/alter_type_statement.hh
+++ b/cql3/statements/alter_type_statement.hh
@@ -84,7 +84,6 @@ public:
                 const shared_ptr<column_identifier> field_name,
                 const shared_ptr<cql3_type::raw> field_type);
    virtual user_type make_updated_type(database& db, user_type to_update) const override;
-    virtual shared_ptr<prepared> prepare(database& db) override;
 private:
    user_type do_add(database& db, user_type to_update) const;
    user_type do_alter(database& db, user_type to_update) const;
@@ -101,7 +100,6 @@ public:
    void add_rename(shared_ptr<column_identifier> previous_name, shared_ptr<column_identifier> new_name);

    virtual user_type make_updated_type(database& db, user_type to_update) const override;
-    virtual shared_ptr<prepared> prepare(database& db) override;
 };

 }
--- a/cql3/statements/batch_statement.cc
+++ b/cql3/statements/batch_statement.cc
@@ -58,7 +58,7 @@ bool batch_statement::depends_on_column_family(const sstring& cf_name) const
 }

 void batch_statement::verify_batch_size(const std::vector<mutation>& mutations) {
-    size_t warn_threshold = service::get_local_storage_proxy().get_db().local().get_config().batch_size_warn_threshold_in_kb() * 1024;
+    size_t warn_threshold = service::get_local_storage_proxy().get_db().local().get_config().batch_size_warn_threshold_in_kb();

    class my_partition_visitor : public mutation_partition_visitor {
    public:
@@ -87,15 +87,17 @@ void batch_statement::verify_batch_size(const std::vector<mutation>& mutations)
        m.partition().accept(*m.schema(), v);
    }

-    if (v.size > warn_threshold) {
+    auto size = v.size / 1024;
+
+    if (size > warn_threshold) {
        std::unordered_set<sstring> ks_cf_pairs;
        for (auto&& m : mutations) {
            ks_cf_pairs.insert(m.schema()->ks_name() + "." + m.schema()->cf_name());
        }
        _logger.warn(
                        "Batch of prepared statements for {} is of size {}, exceeding specified threshold of {} by {}.{}",
-                        join(", ", ks_cf_pairs), v.size, warn_threshold,
-                        v.size - warn_threshold, "");
+                        join(", ", ks_cf_pairs), size, warn_threshold,
+                        size - warn_threshold, "");
    }
 }

--- a/cql3/statements/create_index_statement.cc
+++ b/cql3/statements/create_index_statement.cc
@@ -40,7 +40,6 @@
 */

 #include "create_index_statement.hh"
-#include "prepared_statement.hh"
 #include "validation.hh"
 #include "service/storage_proxy.hh"
 #include "service/migration_manager.hh"
@@ -206,9 +205,4 @@ cql3::statements::create_index_statement::announce_migration(distributed<service
    });
 }

-shared_ptr<cql3::statements::prepared_statement>
-cql3::statements::create_index_statement::prepare(database& db) {
-    return make_shared<prepared_statement>(make_shared<create_index_statement>(*this));
-}
-

--- a/cql3/statements/create_index_statement.hh
+++ b/cql3/statements/create_index_statement.hh
@@ -87,7 +87,6 @@ public:
                transport::event::schema_change::target_type::TABLE, keyspace(),
                column_family());
    }
-    virtual shared_ptr<prepared> prepare(database& db) override;
 };

 }
--- a/cql3/statements/create_keyspace_statement.cc
+++ b/cql3/statements/create_keyspace_statement.cc
@@ -40,7 +40,6 @@
 */

 #include "cql3/statements/create_keyspace_statement.hh"
-#include "prepared_statement.hh"

 #include "service/migration_manager.hh"

@@ -123,11 +122,6 @@ shared_ptr<transport::event::schema_change> create_keyspace_statement::change_ev
    return make_shared<transport::event::schema_change>(transport::event::schema_change::change_type::CREATED, keyspace());
 }

-shared_ptr<cql3::statements::prepared_statement>
-cql3::statements::create_keyspace_statement::prepare(database& db) {
-    return make_shared<prepared_statement>(make_shared<create_keyspace_statement>(*this));
-}
-
 }

 }
--- a/cql3/statements/create_keyspace_statement.hh
+++ b/cql3/statements/create_keyspace_statement.hh
@@ -84,7 +84,6 @@ public:
    virtual future<bool> announce_migration(distributed<service::storage_proxy>& proxy, bool is_local_only) override;

    virtual shared_ptr<transport::event::schema_change> change_event() override;
-    virtual shared_ptr<prepared> prepare(database& db) override;
 };

 }
--- a/cql3/statements/create_table_statement.cc
+++ b/cql3/statements/create_table_statement.cc
@@ -156,14 +156,6 @@ void create_table_statement::add_column_metadata_from_aliases(schema_builder& bu
    }
 }

-shared_ptr<prepared_statement>
-create_table_statement::prepare(database& db) {
-    // Cannot happen; create_table_statement is never instantiated as a raw statement
-    // (instead we instantiate create_table_statement::raw_statement)
-    abort();
-}
-
-
 create_table_statement::raw_statement::raw_statement(::shared_ptr<cf_name> name, bool if_not_exists)
    : cf_statement{std::move(name)}
    , _if_not_exists{if_not_exists}
--- a/cql3/statements/create_table_statement.hh
+++ b/cql3/statements/create_table_statement.hh
@@ -103,8 +103,6 @@ public:

    virtual shared_ptr<transport::event::schema_change> change_event() override;

-    virtual shared_ptr<prepared> prepare(database& db) override;
-
    schema_ptr get_cf_meta_data();

    class raw_statement;
--- a/cql3/statements/create_type_statement.cc
+++ b/cql3/statements/create_type_statement.cc
@@ -38,7 +38,6 @@
 */

 #include "cql3/statements/create_type_statement.hh"
-#include "prepared_statement.hh"

 #include "service/migration_manager.hh"

@@ -156,11 +155,6 @@ future<bool> create_type_statement::announce_migration(distributed<service::stor
    return service::get_local_migration_manager().announce_new_type(type, is_local_only).then([] { return true; });
 }

-shared_ptr<cql3::statements::prepared_statement>
-create_type_statement::prepare(database& db) {
-    return make_shared<prepared_statement>(make_shared<create_type_statement>(*this));
-}
-
 }

 }
--- a/cql3/statements/create_type_statement.hh
+++ b/cql3/statements/create_type_statement.hh
@@ -69,8 +69,6 @@ public:

    virtual future<bool> announce_migration(distributed<service::storage_proxy>& proxy, bool is_local_only) override;

-    virtual shared_ptr<prepared> prepare(database& db) override;
-
    static void check_for_duplicate_names(user_type type);
 private:
    bool type_exists_in(::keyspace& ks);
--- a/cql3/statements/drop_keyspace_statement.cc
+++ b/cql3/statements/drop_keyspace_statement.cc
@@ -40,7 +40,6 @@
 */

 #include "cql3/statements/drop_keyspace_statement.hh"
-#include "cql3/statements/prepared_statement.hh"

 #include "service/migration_manager.hh"
 #include "transport/event.hh"
@@ -98,11 +97,6 @@ shared_ptr<transport::event::schema_change> drop_keyspace_statement::change_even

 }

-shared_ptr<cql3::statements::prepared_statement>
-drop_keyspace_statement::prepare(database& db) {
-    return make_shared<prepared_statement>(make_shared<drop_keyspace_statement>(*this));
-}
-
 }

 }
--- a/cql3/statements/drop_keyspace_statement.hh
+++ b/cql3/statements/drop_keyspace_statement.hh
@@ -62,8 +62,6 @@ public:
    virtual future<bool> announce_migration(distributed<service::storage_proxy>& proxy, bool is_local_only) override;

    virtual shared_ptr<transport::event::schema_change> change_event() override;
-
-    virtual shared_ptr<prepared> prepare(database& db) override;
 };

 }
--- a/cql3/statements/drop_table_statement.cc
+++ b/cql3/statements/drop_table_statement.cc
@@ -40,7 +40,6 @@
 */

 #include "cql3/statements/drop_table_statement.hh"
-#include "cql3/statements/prepared_statement.hh"

 #include "service/migration_manager.hh"

@@ -99,11 +98,6 @@ shared_ptr<transport::event::schema_change> drop_table_statement::change_event()
                                             column_family());
 }

-shared_ptr<cql3::statements::prepared_statement>
-drop_table_statement::prepare(database& db) {
-    return make_shared<prepared_statement>(make_shared<drop_table_statement>(*this));
-}
-
 }

 }
--- a/cql3/statements/drop_table_statement.hh
+++ b/cql3/statements/drop_table_statement.hh
@@ -61,8 +61,6 @@ public:
    virtual future<bool> announce_migration(distributed<service::storage_proxy>& proxy, bool is_local_only) override;

    virtual shared_ptr<transport::event::schema_change> change_event() override;
-
-    virtual shared_ptr<prepared> prepare(database& db) override;
 };

 }
--- a/cql3/statements/drop_type_statement.cc
+++ b/cql3/statements/drop_type_statement.cc
@@ -38,7 +38,6 @@
 */

 #include "cql3/statements/drop_type_statement.hh"
-#include "cql3/statements/prepared_statement.hh"

 #include "boost/range/adaptor/map.hpp"

@@ -117,11 +116,6 @@ future<bool> drop_type_statement::announce_migration(distributed<service::storag
    return service::get_local_migration_manager().announce_type_drop(to_drop->second, is_local_only).then([] { return true; });
 }

-shared_ptr<cql3::statements::prepared_statement>
-drop_type_statement::prepare(database& db) {
-    return make_shared<prepared_statement>(make_shared<drop_type_statement>(*this));
-}
-
 }

 }
--- a/cql3/statements/drop_type_statement.hh
+++ b/cql3/statements/drop_type_statement.hh
@@ -64,8 +64,6 @@ public:
    virtual const sstring& keyspace() const override;

    virtual future<bool> announce_migration(distributed<service::storage_proxy>& proxy, bool is_local_only) override;
-
-    virtual shared_ptr<prepared> prepare(database& db) override;
 };

 }
--- a/cql3/statements/modification_statement.cc
+++ b/cql3/statements/modification_statement.cc
@@ -447,8 +447,6 @@ modification_statement::execute(distributed<service::storage_proxy>& proxy, serv
        throw exceptions::invalid_request_exception("Conditional updates are not supported by the protocol version in use. You need to upgrade to a driver using the native protocol v2.");
    }

-    tracing::add_table_name(qs.get_trace_state(), keyspace(), column_family());
-
    if (has_conditions()) {
        return execute_with_condition(proxy, qs, options);
    }
@@ -510,9 +508,6 @@ modification_statement::execute_internal(distributed<service::storage_proxy>& pr
    if (has_conditions()) {
        throw exceptions::unsupported_operation_exception();
    }
-
-    tracing::add_table_name(qs.get_trace_state(), keyspace(), column_family());
-
    return get_mutations(proxy, options, true, options.get_timestamp(qs), qs.get_trace_state()).then(
            [&proxy] (auto mutations) {
                return proxy.local().mutate_locally(std::move(mutations));
--- a/cql3/statements/schema_altering_statement.cc
+++ b/cql3/statements/schema_altering_statement.cc
@@ -86,6 +86,11 @@ void schema_altering_statement::prepare_keyspace(const service::client_state& st
    }
 }

+::shared_ptr<prepared_statement> schema_altering_statement::prepare(database& db)
+{
+    return ::make_shared<prepared>(this->shared_from_this());
+}
+
 future<::shared_ptr<messages::result_message>>
 schema_altering_statement::execute0(distributed<service::storage_proxy>& proxy, service::query_state& state, const query_options& options, bool is_local_only) {
    // If an IF [NOT] EXISTS clause was used, this may not result in an actual schema change.  To avoid doing
--- a/cql3/statements/schema_altering_statement.hh
+++ b/cql3/statements/schema_altering_statement.hh
@@ -60,7 +60,7 @@ namespace messages = transport::messages;
 /**
 * Abstract class for statements that alter the schema.
 */
-class schema_altering_statement : public raw::cf_statement, public cql_statement_no_metadata {
+class schema_altering_statement : public raw::cf_statement, public cql_statement_no_metadata, public ::enable_shared_from_this<schema_altering_statement> {
 private:
    const bool _is_column_family_level;

@@ -81,6 +81,8 @@ protected:

    virtual void prepare_keyspace(const service::client_state& state) override;

+    virtual ::shared_ptr<prepared> prepare(database& db) override;
+
    virtual shared_ptr<transport::event::schema_change> change_event() = 0;

    /**
--- a/cql3/statements/select_statement.cc
+++ b/cql3/statements/select_statement.cc
@@ -218,8 +218,6 @@ select_statement::execute(distributed<service::storage_proxy>& proxy,
                          service::query_state& state,
                          const query_options& options)
 {
-    tracing::add_table_name(state.get_trace_state(), keyspace(), column_family());
-
    auto cl = options.get_consistency();

    validate_for_read(_schema->ks_name(), cl);
@@ -329,8 +327,6 @@ select_statement::execute_internal(distributed<service::storage_proxy>& proxy,
        make_partition_slice(options), limit, to_gc_clock(now), std::experimental::nullopt, query::max_partitions, options.get_timestamp(state));
    auto partition_ranges = _restrictions->get_partition_key_ranges(options);

-    tracing::add_table_name(state.get_trace_state(), keyspace(), column_family());
-
    if (needs_post_query_ordering() && _limit) {
        return do_with(std::move(partition_ranges), [this, &proxy, &state, command] (auto prs) {
            query::result_merger merger;
--- a/cql3/statements/update_statement.cc
+++ b/cql3/statements/update_statement.cc
@@ -64,16 +64,16 @@ void update_statement::add_update_for_key(mutation& m, const exploded_clustering
            throw exceptions::invalid_request_exception(sprint("Missing PRIMARY KEY part %s", s->clustering_key_columns().begin()->name_as_text()));
        }

-        // An empty name for the value is what we use to recognize the case where there is not column
+        // An empty name for the compact value is what we use to recognize the case where there is not column
        // outside the PK, see CreateStatement.
-        if (s->regular_begin()->name().empty()) {
+        if (s->compact_column().name().empty()) {
            // There is no column outside the PK. So no operation could have passed through validation
            assert(_column_operations.empty());
-            constants::setter(*s->regular_begin(), make_shared(constants::value(bytes()))).execute(m, prefix, params);
+            constants::setter(s->compact_column(), make_shared(constants::value(bytes()))).execute(m, prefix, params);
        } else {
            // dense means we don't have a row marker, so don't accept to set only the PK. See CASSANDRA-5648.
            if (_column_operations.empty()) {
-                throw exceptions::invalid_request_exception(sprint("Column %s is mandatory for this COMPACT STORAGE table", s->regular_begin()->name_as_text()));
+                throw exceptions::invalid_request_exception(sprint("Column %s is mandatory for this COMPACT STORAGE table", s->compact_column().name_as_text()));
            }
        }
    } else {
--- a/database.cc
+++ b/database.cc
@@ -43,7 +43,6 @@
 #include <boost/range/adaptor/map.hpp>
 #include "locator/simple_snitch.hh"
 #include <boost/algorithm/cxx11/all_of.hpp>
-#include <boost/algorithm/cxx11/any_of.hpp>
 #include <boost/function_output_iterator.hpp>
 #include <boost/range/algorithm/heap_algorithm.hpp>
 #include <boost/range/algorithm/remove_if.hpp>
@@ -155,10 +154,9 @@ mutation_source
 column_family::sstables_as_mutation_source() {
    return mutation_source([this] (schema_ptr s,
                                   const query::partition_range& r,
-                                   const query::partition_slice& slice,
-                                   const io_priority_class& pc,
-                                   tracing::trace_state_ptr trace_state) {
-        return make_sstable_reader(std::move(s), r, slice, pc, std::move(trace_state));
+                                   query::clustering_key_filtering_context ck_filtering,
+                                   const io_priority_class& pc) {
+        return make_sstable_reader(std::move(s), r, ck_filtering, pc);
    });
 }

@@ -188,138 +186,6 @@ bool belongs_to_current_shard(const streamed_mutation& m) {
    return dht::shard_of(m.decorated_key().token()) == engine().cpu_id();
 }

-// Stores ranges for all components of the same clustering key, index 0 referring to component
-// range 0, and so on.
-using ck_filter_clustering_key_components = std::vector<nonwrapping_range<bytes_view>>;
-// Stores an entry for each clustering key range specified by the filter.
-using ck_filter_clustering_key_ranges = std::vector<ck_filter_clustering_key_components>;
-
-// Used to split a clustering key range into a range for each component.
-// If a range in ck_filtering_all_ranges is composite, a range will be created
-// for each component. If it's not composite, a single range is created.
-// This split is needed to check for overlap in each component individually.
-static ck_filter_clustering_key_ranges
-ranges_for_clustering_key_filter(const schema_ptr& schema, const query::clustering_row_ranges& ck_filtering_all_ranges) {
-    ck_filter_clustering_key_ranges ranges;
-
-    for (auto& r : ck_filtering_all_ranges) {
-        // this vector stores a range for each component of a key, only one if not composite.
-        ck_filter_clustering_key_components composite_ranges;
-
-        if (r.is_full()) {
-            ranges.push_back({ nonwrapping_range<bytes_view>::make_open_ended_both_sides() });
-            continue;
-        }
-        auto start = r.start() ? r.start()->value().components() : clustering_key_prefix::make_empty().components();
-        auto end = r.end() ? r.end()->value().components() : clustering_key_prefix::make_empty().components();
-        auto start_it = start.begin();
-        auto end_it = end.begin();
-
-        // This test is enough because equal bounds in nonwrapping_range are inclusive.
-        auto is_singular = [&schema] (const auto& type_it, const bytes_view& b1, const bytes_view& b2) {
-            if (type_it == schema->clustering_key_type()->types().end()) {
-                throw std::runtime_error(sprint("clustering key filter passed more components than defined in schema of %s.%s",
-                    schema->ks_name(), schema->cf_name()));
-            }
-            return (*type_it)->compare(b1, b2) == 0;
-        };
-        auto type_it = schema->clustering_key_type()->types().begin();
-        composite_ranges.reserve(schema->clustering_key_size());
-
-        // the rule is to ignore any component cn if another component ck (k < n) is not if the form [v, v].
-        // If we have [v1, v1], [v2, v2], ... {vl3, vr3}, ....
-        // then we generate [v1, v1], [v2, v2], ... {vl3, vr3}. Where {  = '(' or '[', etc.
-        while (start_it != start.end() && end_it != end.end() && is_singular(type_it++, *start_it, *end_it)) {
-            composite_ranges.push_back(nonwrapping_range<bytes_view>({{ std::move(*start_it++), true }},
-                {{ std::move(*end_it++), true }}));
-        }
-        // handle a single non-singular tail element, if present
-        if (start_it != start.end() && end_it != end.end()) {
-            composite_ranges.push_back(nonwrapping_range<bytes_view>({{ std::move(*start_it), r.start()->is_inclusive() }},
-                {{ std::move(*end_it), r.end()->is_inclusive() }}));
-        } else if (start_it != start.end()) {
-            composite_ranges.push_back(nonwrapping_range<bytes_view>({{ std::move(*start_it), r.start()->is_inclusive() }}, {}));
-        } else if (end_it != end.end()) {
-            composite_ranges.push_back(nonwrapping_range<bytes_view>({}, {{ std::move(*end_it), r.end()->is_inclusive() }}));
-        }
-
-        ranges.push_back(std::move(composite_ranges));
-    }
-    return ranges;
-}
-
-// Return true if this sstable possibly stores clustering row(s) specified by ranges.
-static inline bool
-contains_rows(const sstables::sstable& sst, const schema_ptr& schema, const ck_filter_clustering_key_ranges& ranges) {
-    auto& clustering_key_types = schema->clustering_key_type()->types();
-    auto& clustering_components_ranges = sst.clustering_components_ranges();
-
-    if (!schema->clustering_key_size() || clustering_components_ranges.empty()) {
-        return true;
-    }
-    return boost::algorithm::any_of(ranges, [&] (const ck_filter_clustering_key_components& range) {
-        auto s = std::min(range.size(), clustering_components_ranges.size());
-        return boost::algorithm::all_of(boost::irange<unsigned>(0, s), [&] (unsigned i) {
-            auto& type = clustering_key_types[i];
-            return range[i].is_full() || range[i].overlaps(clustering_components_ranges[i], type->as_tri_comparator());
-        });
-    });
-}
-
-// Filter out sstables for reader using bloom filter and sstable metadata that keeps track
-// of a range for each clustering component.
-static std::vector<sstables::shared_sstable>
-filter_sstable_for_reader(std::vector<sstables::shared_sstable>&& sstables, column_family& cf, const schema_ptr& schema,
-        const sstables::key& key, const query::partition_slice& slice) {
-    auto sstable_has_not_key = [&] (const sstables::shared_sstable& sst) {
-        return !sst->filter_has_key(key);
-    };
-    sstables.erase(boost::remove_if(sstables, sstable_has_not_key), sstables.end());
-
-    // no clustering filtering is applied if schema defines no clustering key or
-    // compaction strategy thinks it will not benefit from such an optimization.
-    if (!schema->clustering_key_size() || !cf.get_compaction_strategy().use_clustering_key_filter()) {
-         return sstables;
-    }
-    ::cf_stats* stats = cf.cf_stats();
-    stats->clustering_filter_count++;
-    stats->sstables_checked_by_clustering_filter += sstables.size();
-
-    auto ck_filtering_all_ranges = slice.get_all_ranges();
-    // fast path to include all sstables if only one full range was specified.
-    // For example, this happens if query only specifies a partition key.
-    if (ck_filtering_all_ranges.size() == 1 && ck_filtering_all_ranges[0].is_full()) {
-        stats->clustering_filter_fast_path_count++;
-        stats->surviving_sstables_after_clustering_filter += sstables.size();
-        return sstables;
-    }
-    auto ranges = ranges_for_clustering_key_filter(schema, ck_filtering_all_ranges);
-    if (ranges.empty()) {
-        return {};
-    }
-
-    int64_t min_timestamp = std::numeric_limits<int64_t>::max();
-    auto sstable_has_clustering_key = [&min_timestamp, &schema, &ranges] (const sstables::shared_sstable& sst) {
-        if (!contains_rows(*sst, schema, ranges)) {
-            return false; // ordered after sstables that contain clustering rows.
-        } else {
-            min_timestamp = std::min(min_timestamp, sst->get_stats_metadata().min_timestamp);
-            return true;
-        }
-    };
-    auto sstable_has_relevant_tombstone = [&min_timestamp] (const sstables::shared_sstable& sst) {
-        const auto& stats = sst->get_stats_metadata();
-        // re-add sstable as candidate if it contains a tombstone that may cover a row in an included sstable.
-        return (stats.max_timestamp > min_timestamp && stats.estimated_tombstone_drop_time.bin.map.size());
-    };
-    auto skipped = std::partition(sstables.begin(), sstables.end(), sstable_has_clustering_key);
-    auto actually_skipped = std::partition(skipped, sstables.end(), sstable_has_relevant_tombstone);
-    sstables.erase(actually_skipped, sstables.end());
-    stats->surviving_sstables_after_clustering_filter += sstables.size();
-
-    return sstables;
-}
-
 class range_sstable_reader final : public mutation_reader::impl {
    const query::partition_range& _pr;
    lw_shared_ptr<sstables::sstable_set> _sstables;
@@ -327,25 +193,23 @@ class range_sstable_reader final : public mutation_reader::impl {
    // Use a pointer instead of copying, so we don't need to regenerate the reader if
    // the priority changes.
    const io_priority_class& _pc;
-    tracing::trace_state_ptr _trace_state;
+    query::clustering_key_filtering_context _ck_filtering;
 public:
    range_sstable_reader(schema_ptr s,
                         lw_shared_ptr<sstables::sstable_set> sstables,
                         const query::partition_range& pr,
-                         const query::partition_slice& slice,
-                         const io_priority_class& pc,
-                         tracing::trace_state_ptr trace_state)
+                         query::clustering_key_filtering_context ck_filtering,
+                         const io_priority_class& pc)
        : _pr(pr)
        , _sstables(std::move(sstables))
        , _pc(pc)
-        , _trace_state(std::move(trace_state))
+        , _ck_filtering(ck_filtering)
    {
        std::vector<mutation_reader> readers;
        for (const lw_shared_ptr<sstables::sstable>& sst : _sstables->select(pr)) {
-            tracing::trace(_trace_state, "Reading partition range {} from sstable {}", _pr, seastar::value_of([&sst] { return sst->get_filename(); }));
            // FIXME: make sstable::read_range_rows() return ::mutation_reader so that we can drop this wrapper.
            mutation_reader reader =
-                make_mutation_reader<sstable_range_wrapping_reader>(sst, s, pr, slice, _pc);
+                make_mutation_reader<sstable_range_wrapping_reader>(sst, s, pr, _ck_filtering, _pc);
            if (sst->is_shared()) {
                reader = make_filtering_reader(std::move(reader), belongs_to_current_shard);
            }
@@ -362,48 +226,37 @@ public:
 };

 class single_key_sstable_reader final : public mutation_reader::impl {
-    column_family* _cf;
    schema_ptr _schema;
    dht::ring_position _rp;
    sstables::key _key;
    std::vector<streamed_mutation> _mutations;
    bool _done = false;
    lw_shared_ptr<sstables::sstable_set> _sstables;
-    utils::estimated_histogram& _sstable_histogram;
    // Use a pointer instead of copying, so we don't need to regenerate the reader if
    // the priority changes.
    const io_priority_class& _pc;
-    const query::partition_slice& _slice;
-    tracing::trace_state_ptr _trace_state;
+    query::clustering_key_filtering_context _ck_filtering;
 public:
-    single_key_sstable_reader(column_family* cf,
-                              schema_ptr schema,
+    single_key_sstable_reader(schema_ptr schema,
                              lw_shared_ptr<sstables::sstable_set> sstables,
-                              utils::estimated_histogram& sstable_histogram,
                              const partition_key& key,
-                              const query::partition_slice& slice,
-                              const io_priority_class& pc,
-                              tracing::trace_state_ptr trace_state)
-        : _cf(cf)
-        , _schema(std::move(schema))
+                              query::clustering_key_filtering_context ck_filtering,
+                              const io_priority_class& pc)
+        : _schema(std::move(schema))
        , _rp(dht::global_partitioner().decorate_key(*_schema, key))
        , _key(sstables::key::from_partition_key(*_schema, key))
        , _sstables(std::move(sstables))
-        , _sstable_histogram(sstable_histogram)
        , _pc(pc)
-        , _slice(slice)
-        , _trace_state(std::move(trace_state))
+        , _ck_filtering(ck_filtering)
    { }

    virtual future<streamed_mutation_opt> operator()() override {
        if (_done) {
            return make_ready_future<streamed_mutation_opt>();
        }
-        auto candidates = filter_sstable_for_reader(_sstables->select(query::partition_range(_rp)), *_cf, _schema, _key, _slice);
-        return parallel_for_each(std::move(candidates),
+        return parallel_for_each(_sstables->select(query::partition_range(_rp)),
            [this](const lw_shared_ptr<sstables::sstable>& sstable) {
-                tracing::trace(_trace_state, "Reading key {} from sstable {}", *_rp.key(), seastar::value_of([&sstable] { return sstable->get_filename(); }));
-                return sstable->read_row(_schema, _key, _slice, _pc).then([this](auto smo) {
+                return sstable->read_row(_schema, _key, _ck_filtering, _pc).then([this](auto smo) {
                    if (smo) {
                        _mutations.emplace_back(std::move(*smo));
                    }
@@ -413,7 +266,6 @@ public:
            if (_mutations.empty()) {
                return { };
            }
-            _sstable_histogram.add(_mutations.size());
            return merge_mutations(std::move(_mutations));
        });
    }
@@ -422,13 +274,12 @@ public:
 mutation_reader
 column_family::make_sstable_reader(schema_ptr s,
                                   const query::partition_range& pr,
-                                   const query::partition_slice& slice,
-                                   const io_priority_class& pc,
-                                   tracing::trace_state_ptr trace_state) const {
+                                   query::clustering_key_filtering_context ck_filtering,
+                                   const io_priority_class& pc) const {
    // restricts a reader's concurrency if the configuration specifies it
    auto restrict_reader = [&] (mutation_reader&& in) {
        auto&& config = [this, &pc] () -> const restricted_mutation_reader_config& {
-            if (service::get_local_streaming_read_priority().id() == pc.id()) {
+            if (service::get_local_streaming_read_priority() == pc) {
                return _config.streaming_read_concurrency_config;
            }
            return _config.read_concurrency_config;
@@ -445,11 +296,10 @@ column_family::make_sstable_reader(schema_ptr s,
        if (dht::shard_of(pos.token()) != engine().cpu_id()) {
            return make_empty_reader(); // range doesn't belong to this shard
        }
-        return restrict_reader(make_mutation_reader<single_key_sstable_reader>(const_cast<column_family*>(this), std::move(s), _sstables,
-            _stats.estimated_sstable_per_read, *pos.key(), slice, pc, std::move(trace_state)));
+        return restrict_reader(make_mutation_reader<single_key_sstable_reader>(std::move(s), _sstables, *pos.key(), ck_filtering, pc));
    } else {
        // range_sstable_reader is not movable so we need to wrap it
-        return restrict_reader(make_mutation_reader<range_sstable_reader>(std::move(s), _sstables, pr, slice, pc, std::move(trace_state)));
+        return restrict_reader(make_mutation_reader<range_sstable_reader>(std::move(s), _sstables, pr, ck_filtering, pc));
    }
 }

@@ -511,9 +361,8 @@ column_family::find_row(schema_ptr s, const dht::decorated_key& partition_key, c
 mutation_reader
 column_family::make_reader(schema_ptr s,
                           const query::partition_range& range,
-                           const query::partition_slice& slice,
-                           const io_priority_class& pc,
-                           tracing::trace_state_ptr trace_state) const {
+                           const query::clustering_key_filtering_context& ck_filtering,
+                           const io_priority_class& pc) const {
    if (query::is_wrap_around(range, *s)) {
        // make_combined_reader() can't handle streams that wrap around yet.
        fail(unimplemented::cause::WRAP_AROUND);
@@ -543,40 +392,18 @@ column_family::make_reader(schema_ptr s,
    // https://github.com/scylladb/scylla/issues/185

    for (auto&& mt : *_memtables) {
-        readers.emplace_back(mt->make_reader(s, range, slice, pc));
+        readers.emplace_back(mt->make_reader(s, range, ck_filtering, pc));
    }

    if (_config.enable_cache) {
-        readers.emplace_back(_cache.make_reader(s, range, slice, pc, std::move(trace_state)));
+        readers.emplace_back(_cache.make_reader(s, range, ck_filtering, pc));
    } else {
-        readers.emplace_back(make_sstable_reader(s, range, slice, pc, std::move(trace_state)));
+        readers.emplace_back(make_sstable_reader(s, range, ck_filtering, pc));
    }

    return make_combined_reader(std::move(readers));
 }

-mutation_reader
-column_family::make_streaming_reader(schema_ptr s,
-                           const query::partition_range& range) const {
-    auto& slice = query::full_slice;
-    auto& pc = service::get_local_streaming_read_priority();
-    if (query::is_wrap_around(range, *s)) {
-        // make_combined_reader() can't handle streams that wrap around yet.
-        fail(unimplemented::cause::WRAP_AROUND);
-    }
-
-    std::vector<mutation_reader> readers;
-    readers.reserve(_memtables->size() + 1);
-
-    for (auto&& mt : *_memtables) {
-        readers.emplace_back(mt->make_reader(s, range, slice, pc));
-    }
-
-    readers.emplace_back(make_sstable_reader(s, range, slice, pc, nullptr));
-
-    return make_combined_reader(std::move(readers));
-}
-
 // Not performance critical. Currently used for testing only.
 template <typename Func>
 future<bool>
@@ -666,11 +493,7 @@ protected:
        });

    }
-    future<> done() {
-        return _listing.done().then([this] {
-            return _f.close();
-        });
-    }
+    future<> done() { return _listing.done(); }
 private:
    future<directory_entry> guarantee_type(directory_entry de) {
        if (de.type) {
@@ -798,7 +621,7 @@ future<sstables::entry_descriptor> column_family::probe_file(sstring sstdir, sst
    }

    return load_sstable(sstables::sstable(
-            _schema, sstdir, comps.generation,
+            _schema->ks_name(), _schema->cf_name(), sstdir, comps.generation,
            comps.version, comps.format)).then_wrapped([fname, comps] (future<> f) {
        try {
            f.get();
@@ -891,7 +714,7 @@ column_family::seal_active_streaming_memtable_immediate() {

        _config.streaming_dirty_memory_manager->serialize_flush([this, old] {
          return with_lock(_sstables_lock.for_read(), [this, old] {
-            auto newtab = make_lw_shared<sstables::sstable>(_schema,
+            auto newtab = make_lw_shared<sstables::sstable>(_schema->ks_name(), _schema->cf_name(),
                _config.datadir, calculate_generation_for_new_table(),
                sstables::sstable::version_types::ka,
                sstables::sstable::format_types::big);
@@ -946,7 +769,7 @@ future<> column_family::seal_active_streaming_memtable_big(streaming_memtable_bi
    return with_gate(_streaming_flush_gate, [this, old, &smb] {
        return with_gate(smb.flush_in_progress, [this, old, &smb] {
            return with_lock(_sstables_lock.for_read(), [this, old, &smb] {
-                auto newtab = make_lw_shared<sstables::sstable>(_schema,
+                auto newtab = make_lw_shared<sstables::sstable>(_schema->ks_name(), _schema->cf_name(),
                                                                _config.datadir, calculate_generation_for_new_table(),
                                                                sstables::sstable::version_types::ka,
                                                                sstables::sstable::format_types::big);
@@ -982,11 +805,6 @@ column_family::seal_active_memtable(memtable_list::flush_behavior ignored) {
    _highest_flushed_rp = old->replay_position();

    return _flush_queue->run_cf_flush(old->replay_position(), [old, this] {
-      auto memtable_size = old->occupancy().total_space();
-
-      _config.cf_stats->pending_memtables_flushes_count++;
-      _config.cf_stats->pending_memtables_flushes_bytes += memtable_size;
-
      return _config.dirty_memory_manager->serialize_flush([this, old] {
        return repeat([this, old] {
            return with_lock(_sstables_lock.for_read(), [this, old] {
@@ -994,9 +812,6 @@ column_family::seal_active_memtable(memtable_list::flush_behavior ignored) {
                return try_flush_memtable_to_sstable(old);
            });
        });
-      }).then([this, memtable_size] {
-        _config.cf_stats->pending_memtables_flushes_count--;
-        _config.cf_stats->pending_memtables_flushes_bytes -= memtable_size;
      });
    }, [old, this] {
        if (_commitlog) {
@@ -1011,11 +826,15 @@ future<stop_iteration>
 column_family::try_flush_memtable_to_sstable(lw_shared_ptr<memtable> old) {
    auto gen = calculate_generation_for_new_table();

-    auto newtab = make_lw_shared<sstables::sstable>(_schema,
+    auto newtab = make_lw_shared<sstables::sstable>(_schema->ks_name(), _schema->cf_name(),
        _config.datadir, gen,
        sstables::sstable::version_types::ka,
        sstables::sstable::format_types::big);

+    auto memtable_size = old->occupancy().total_space();
+
+    _config.cf_stats->pending_memtables_flushes_count++;
+    _config.cf_stats->pending_memtables_flushes_bytes += memtable_size;
    newtab->set_unshared();
    dblog.debug("Flushing to {}", newtab->get_filename());
    // Note that due to our sharded architecture, it is possible that
@@ -1032,7 +851,9 @@ column_family::try_flush_memtable_to_sstable(lw_shared_ptr<memtable> old) {
    auto&& priority = service::get_local_memtable_flush_priority();
    return newtab->write_components(*old, incremental_backups_enabled(), priority).then([this, newtab, old] {
        return newtab->open_data();
-    }).then_wrapped([this, old, newtab] (future<> ret) {
+    }).then_wrapped([this, old, newtab, memtable_size] (future<> ret) {
+        _config.cf_stats->pending_memtables_flushes_count--;
+        _config.cf_stats->pending_memtables_flushes_bytes -= memtable_size;
        dblog.debug("Flushing to {} done", newtab->get_filename());
        try {
            ret.get();
@@ -1100,7 +921,7 @@ future<std::vector<sstables::entry_descriptor>> column_family::flush_upload_dir(
            if (comps.component != sstables::sstable::component_type::TOC) {
                return make_ready_future<>();
            }
-            auto sst = make_lw_shared<sstables::sstable>(_schema,
+            auto sst = make_lw_shared<sstables::sstable>(_schema->ks_name(), _schema->cf_name(),
                                                        _config.datadir + "/upload", comps.generation,
                                                        comps.version, comps.format);
            work.sstables.emplace(comps.generation, std::move(sst));
@@ -1155,7 +976,7 @@ column_family::reshuffle_sstables(std::set<int64_t> all_generations, int64_t sta
            if (work.all_generations.count(comps.generation) != 0) {
                return make_ready_future<>();
            }
-            auto sst = make_lw_shared<sstables::sstable>(_schema,
+            auto sst = make_lw_shared<sstables::sstable>(_schema->ks_name(), _schema->cf_name(),
                                                         _config.datadir, comps.generation,
                                                         comps.version, comps.format);
            work.sstables.emplace(comps.generation, std::move(sst));
@@ -1247,17 +1068,10 @@ column_family::rebuild_sstable_list(const std::vector<sstables::shared_sstable>&
    // Second, delete the old sstables.  This is done in the background, so we can
    // consider this compaction completed.
    seastar::with_gate(_sstable_deletion_gate, [this, sstables_to_remove] {
-        return sstables::delete_atomically(sstables_to_remove).then_wrapped([this, sstables_to_remove] (future<> f) {
-            std::exception_ptr eptr;
-            try {
-                f.get();
-            } catch(...) {
-                eptr = std::current_exception();
-            }
+        return sstables::delete_atomically(sstables_to_remove).then([this, sstables_to_remove] {
+            auto current_sstables = _sstables;
+            auto new_sstable_list = make_lw_shared<sstable_list>();

-            // unconditionally remove compacted sstables from _sstables_compacted_but_not_deleted,
-            // or they could stay forever in the set, resulting in deleted files remaining
-            // opened and disk space not being released until shutdown.
            std::unordered_set<sstables::shared_sstable> s(
                   sstables_to_remove.begin(), sstables_to_remove.end());
            auto e = boost::range::remove_if(_sstables_compacted_but_not_deleted, [&] (sstables::shared_sstable sst) -> bool {
@@ -1265,11 +1079,6 @@ column_family::rebuild_sstable_list(const std::vector<sstables::shared_sstable>&
            });
            _sstables_compacted_but_not_deleted.erase(e, _sstables_compacted_but_not_deleted.end());
            rebuild_statistics();
-
-            if (eptr) {
-                return make_exception_future<>(eptr);
-            }
-            return make_ready_future<>();
        }).handle_exception([] (std::exception_ptr e) {
            try {
                std::rethrow_exception(e);
@@ -1293,7 +1102,7 @@ column_family::compact_sstables(sstables::compaction_descriptor descriptor, bool
        auto create_sstable = [this] {
                auto gen = this->calculate_generation_for_new_table();
                // FIXME: use "tmp" marker in names of incomplete sstable
-                auto sst = make_lw_shared<sstables::sstable>(_schema, _config.datadir, gen,
+                auto sst = make_lw_shared<sstables::sstable>(_schema->ks_name(), _schema->cf_name(), _config.datadir, gen,
                        sstables::sstable::version_types::ka,
                        sstables::sstable::format_types::big);
                sst->set_unshared();
@@ -1301,7 +1110,7 @@ column_family::compact_sstables(sstables::compaction_descriptor descriptor, bool
        };
        return sstables::compact_sstables(*sstables_to_compact, *this, create_sstable, descriptor.max_sstable_bytes, descriptor.level,
                cleanup).then([this, sstables_to_compact] (auto new_sstables) {
-            _compaction_strategy.notify_completion(*sstables_to_compact, new_sstables);
+            _compaction_strategy.notify_completion(_schema, *sstables_to_compact, new_sstables);
            return this->rebuild_sstable_list(new_sstables, *sstables_to_compact);
        });
    });
@@ -1310,8 +1119,8 @@ column_family::compact_sstables(sstables::compaction_descriptor descriptor, bool
 static bool needs_cleanup(const lw_shared_ptr<sstables::sstable>& sst,
                   const lw_shared_ptr<std::vector<range<dht::token>>>& owned_ranges,
                   schema_ptr s) {
-    auto first = sst->get_first_partition_key();
-    auto last = sst->get_last_partition_key();
+    auto first = sst->get_first_partition_key(*s);
+    auto last = sst->get_last_partition_key(*s);
    auto first_token = dht::global_partitioner().get_token(*s, first);
    auto last_token = dht::global_partitioner().get_token(*s, last);
    range<dht::token> sst_token_range = range<dht::token>::make(first_token, last_token);
@@ -1344,7 +1153,7 @@ future<>
 column_family::load_new_sstables(std::vector<sstables::entry_descriptor> new_tables) {
    return parallel_for_each(new_tables, [this] (auto comps) {
        return this->load_sstable(sstables::sstable(
-                _schema, _config.datadir,
+                _schema->ks_name(), _schema->cf_name(), _config.datadir,
                comps.generation, comps.version, comps.format), true);
    }).then([this] {
        start_rewrite();
@@ -1637,34 +1446,6 @@ database::setup_collectd() {
                , scollectd::make_typed(scollectd::data_type::GAUGE, _cf_stats.pending_memtables_flushes_bytes)
    ));

-    _collectd.push_back(
-        scollectd::add_polled_metric(scollectd::type_instance_id("database"
-                , scollectd::per_cpu_plugin_instance
-                , "total_operations", "clustering_filter")
-                , scollectd::make_typed(scollectd::data_type::DERIVE, _cf_stats.clustering_filter_count)
-    ));
-
-    _collectd.push_back(
-        scollectd::add_polled_metric(scollectd::type_instance_id("database"
-                , scollectd::per_cpu_plugin_instance
-                , "total_operations", "clustering_filter")
-                , scollectd::make_typed(scollectd::data_type::DERIVE, _cf_stats.sstables_checked_by_clustering_filter)
-    ));
-
-    _collectd.push_back(
-        scollectd::add_polled_metric(scollectd::type_instance_id("database"
-                , scollectd::per_cpu_plugin_instance
-                , "total_operations", "clustering_filter")
-                , scollectd::make_typed(scollectd::data_type::DERIVE, _cf_stats.clustering_filter_fast_path_count)
-    ));
-
-    _collectd.push_back(
-        scollectd::add_polled_metric(scollectd::type_instance_id("database"
-                , scollectd::per_cpu_plugin_instance
-                , "total_operations", "clustering_filter")
-                , scollectd::make_typed(scollectd::data_type::DERIVE, _cf_stats.surviving_sstables_after_clustering_filter)
-    ));
-
    _collectd.push_back(
        scollectd::add_polled_metric(scollectd::type_instance_id("database"
                , scollectd::per_cpu_plugin_instance
@@ -2264,15 +2045,15 @@ struct query_state {
 };

 future<lw_shared_ptr<query::result>>
-column_family::query(schema_ptr s, const query::read_command& cmd, query::result_request request, const std::vector<query::partition_range>& partition_ranges, tracing::trace_state_ptr trace_state) {
+column_family::query(schema_ptr s, const query::read_command& cmd, query::result_request request, const std::vector<query::partition_range>& partition_ranges) {
    utils::latency_counter lc;
    _stats.reads.set_latency(lc);
    auto qs_ptr = std::make_unique<query_state>(std::move(s), cmd, request, partition_ranges);
    auto& qs = *qs_ptr;
    {
-        return do_until(std::bind(&query_state::done, &qs), [this, &qs, trace_state = std::move(trace_state)] {
+        return do_until(std::bind(&query_state::done, &qs), [this, &qs] {
            auto&& range = *qs.current_partition_range++;
-            return data_query(qs.schema, as_mutation_source(trace_state), range, qs.cmd.slice, qs.limit, qs.partition_limit,
+            return data_query(qs.schema, as_mutation_source(), range, qs.cmd.slice, qs.limit, qs.partition_limit,
                              qs.cmd.timestamp, qs.builder).then([&qs] (auto&& r) {
                qs.limit -= r.live_rows;
                qs.partition_limit -= r.partitions;
@@ -2290,28 +2071,28 @@ column_family::query(schema_ptr s, const query::read_command& cmd, query::result
 }

 mutation_source
-column_family::as_mutation_source(tracing::trace_state_ptr trace_state) const {
-    return mutation_source([this, trace_state = std::move(trace_state)] (schema_ptr s,
+column_family::as_mutation_source() const {
+    return mutation_source([this] (schema_ptr s,
                                   const query::partition_range& range,
-                                   const query::partition_slice& slice,
+                                   query::clustering_key_filtering_context ck_filtering,
                                   const io_priority_class& pc) {
-        return this->make_reader(std::move(s), range, slice, pc, std::move(trace_state));
+        return this->make_reader(std::move(s), range, ck_filtering, pc);
    });
 }

 future<lw_shared_ptr<query::result>>
-database::query(schema_ptr s, const query::read_command& cmd, query::result_request request, const std::vector<query::partition_range>& ranges, tracing::trace_state_ptr trace_state) {
+database::query(schema_ptr s, const query::read_command& cmd, query::result_request request, const std::vector<query::partition_range>& ranges) {
    column_family& cf = find_column_family(cmd.cf_id);
-    return cf.query(std::move(s), cmd, request, ranges, std::move(trace_state)).then([this, s = _stats] (auto&& res) {
+    return cf.query(std::move(s), cmd, request, ranges).then([this, s = _stats] (auto&& res) {
        ++s->total_reads;
        return std::move(res);
    });
 }

 future<reconcilable_result>
-database::query_mutations(schema_ptr s, const query::read_command& cmd, const query::partition_range& range, tracing::trace_state_ptr trace_state) {
+database::query_mutations(schema_ptr s, const query::read_command& cmd, const query::partition_range& range) {
    column_family& cf = find_column_family(cmd.cf_id);
-    return mutation_query(std::move(s), cf.as_mutation_source(std::move(trace_state)), range, cmd.slice, cmd.row_limit, cmd.partition_limit,
+    return mutation_query(std::move(s), cf.as_mutation_source(), range, cmd.slice, cmd.row_limit, cmd.partition_limit,
            cmd.timestamp).then([this, s = _stats] (auto&& res) {
        ++s->total_reads;
        return std::move(res);
@@ -3012,23 +2793,15 @@ future<std::unordered_map<sstring, column_family::snapshot_details>> column_fami
 }

 future<> column_family::flush() {
+    // FIXME: this will synchronously wait for this write to finish, but doesn't guarantee
+    // anything about previous writes.
    _stats.pending_flushes++;
-
-    auto fut = _memtables->seal_active_memtable(memtable_list::flush_behavior::immediate);
-    // this rp is either:
-    // a.) Done - no-op
-    // b.) Ours
-    // c.) The last active flush not finished. If our latest memtable is
-    //     empty it still makes sense for this api call to wait for this.
-    auto high_rp = _highest_flushed_rp;
-
-    return fut.finally([this, high_rp] {
+    return _memtables->seal_active_memtable(memtable_list::flush_behavior::immediate).finally([this]() mutable {
        _stats.pending_flushes--;
        // In origin memtable_switch_count is incremented inside
        // ColumnFamilyMeetrics Flush.run
        _stats.memtable_switch_count++;
-        // wait for all up until us.
-        return _flush_queue->wait_for_pending(high_rp);
+        return make_ready_future<>();
    });
 }

--- a/database.hh
+++ b/database.hh
@@ -67,13 +67,12 @@
 #include "sstables/compaction_manager.hh"
 #include "utils/exponential_backoff_retry.hh"
 #include "utils/histogram.hh"
-#include "utils/estimated_histogram.hh"
+#include "sstables/estimated_histogram.hh"
 #include "sstables/compaction.hh"
 #include "sstables/sstable_set.hh"
 #include "key_reader.hh"
 #include <seastar/core/rwlock.hh>
 #include <seastar/core/shared_future.hh>
-#include "tracing/trace_state.hh"

 class frozen_mutation;
 class reconcilable_result;
@@ -300,15 +299,6 @@ using sstable_list = sstables::sstable_list;
 struct cf_stats {
    int64_t pending_memtables_flushes_count = 0;
    int64_t pending_memtables_flushes_bytes = 0;
-
-    // number of time the clustering filter was executed
-    int64_t clustering_filter_count = 0;
-    // sstables considered by the filter (so dividing this by the previous one we get average sstables per read)
-    int64_t sstables_checked_by_clustering_filter = 0;
-    // number of times the filter passed the fast-path checks
-    int64_t clustering_filter_fast_path_count = 0;
-    // how many sstables survived the clustering key checks
-    int64_t surviving_sstables_after_clustering_filter = 0;
 };

 class column_family {
@@ -342,9 +332,9 @@ public:
        int64_t pending_compactions = 0;
        utils::timed_rate_moving_average_and_histogram reads{256};
        utils::timed_rate_moving_average_and_histogram writes{256};
-        utils::estimated_histogram estimated_read;
-        utils::estimated_histogram estimated_write;
-        utils::estimated_histogram estimated_sstable_per_read{35};
+        sstables::estimated_histogram estimated_read;
+        sstables::estimated_histogram estimated_write;
+        sstables::estimated_histogram estimated_sstable_per_read;
        utils::timed_rate_moving_average_and_histogram tombstone_scanned;
        utils::timed_rate_moving_average_and_histogram live_scanned;
    };
@@ -356,7 +346,7 @@ public:
 private:
    schema_ptr _schema;
    config _config;
-    mutable stats _stats;
+    stats _stats;

    lw_shared_ptr<memtable_list> _memtables;

@@ -478,9 +468,8 @@ private:
    // Mutations returned by the reader will all have given schema.
    mutation_reader make_sstable_reader(schema_ptr schema,
                                        const query::partition_range& range,
-                                        const query::partition_slice& slice,
-                                        const io_priority_class& pc,
-                                        tracing::trace_state_ptr trace_state) const;
+                                        query::clustering_key_filtering_context ck_filtering,
+                                        const io_priority_class& pc) const;

    mutation_source sstables_as_mutation_source();
    key_source sstables_as_key_source() const;
@@ -518,18 +507,10 @@ public:
    // will be scheduled under the priority class given by pc.
    mutation_reader make_reader(schema_ptr schema,
            const query::partition_range& range = query::full_partition_range,
-            const query::partition_slice& slice = query::full_slice,
-            const io_priority_class& pc = default_priority_class(),
-            tracing::trace_state_ptr trace_state = nullptr) const;
+            const query::clustering_key_filtering_context& ck_filtering = query::no_clustering_key_filtering,
+            const io_priority_class& pc = default_priority_class()) const;

-    // The streaming mutation reader differs from the regular mutation reader in that:
-    //  - Reflects all writes accepted by replica prior to creation of the
-    //    reader and a _bounded_ amount of writes which arrive later.
-    //  - Does not populate the cache
-    mutation_reader make_streaming_reader(schema_ptr schema,
-            const query::partition_range& range = query::full_partition_range) const;
-
-    mutation_source as_mutation_source(tracing::trace_state_ptr trace_state) const;
+    mutation_source as_mutation_source() const;

    // Queries can be satisfied from multiple data sources, so they are returned
    // as temporaries.
@@ -571,8 +552,7 @@ public:
    // Returns at most "cmd.limit" rows
    future<lw_shared_ptr<query::result>> query(schema_ptr,
        const query::read_command& cmd, query::result_request request,
-        const std::vector<query::partition_range>& ranges,
-        tracing::trace_state_ptr trace_state);
+        const std::vector<query::partition_range>& ranges);

    future<> populate(sstring datadir);

@@ -693,10 +673,6 @@ public:
        return _stats;
    }

-    ::cf_stats* cf_stats() {
-        return _config.cf_stats;
-    }
-
    compaction_manager& get_compaction_manager() const {
        return _compaction_manager;
    }
@@ -1069,8 +1045,8 @@ public:
    unsigned shard_of(const dht::token& t);
    unsigned shard_of(const mutation& m);
    unsigned shard_of(const frozen_mutation& m);
-    future<lw_shared_ptr<query::result>> query(schema_ptr, const query::read_command& cmd, query::result_request request, const std::vector<query::partition_range>& ranges, tracing::trace_state_ptr trace_state);
-    future<reconcilable_result> query_mutations(schema_ptr, const query::read_command& cmd, const query::partition_range& range, tracing::trace_state_ptr trace_state);
+    future<lw_shared_ptr<query::result>> query(schema_ptr, const query::read_command& cmd, query::result_request request, const std::vector<query::partition_range>& ranges);
+    future<reconcilable_result> query_mutations(schema_ptr, const query::read_command& cmd, const query::partition_range& range);
    future<> apply(schema_ptr, const frozen_mutation&);
    future<> apply_streaming_mutation(schema_ptr, utils::UUID plan_id, const frozen_mutation&, bool fragmented);
    keyspace::config make_keyspace_config(const keyspace_metadata& ksm);
--- a/db/commitlog/commitlog.cc
+++ b/db/commitlog/commitlog.cc
@@ -163,7 +163,7 @@ public:

    bool _shutdown = false;

-    semaphore _new_segment_semaphore {1};
+    semaphore _new_segment_semaphore;
    semaphore _write_semaphore;
    semaphore _flush_semaphore;

@@ -453,7 +453,7 @@ public:
    segment(::shared_ptr<segment_manager> m, const descriptor& d, file && f, bool active)
            : _segment_manager(std::move(m)), _desc(std::move(d)), _file(std::move(f)),
        _file_name(_segment_manager->cfg.commit_log_location + "/" + _desc.filename()), _sync_time(
-                    clock_type::now()), _pending_ops(true) // want exception propagation
+                    clock_type::now())
    {
        ++_segment_manager->totals.segments_created;
        logger.debug("Created new {} segment {}", active ? "active" : "reserve", *this);
@@ -762,9 +762,9 @@ public:
        auto me = shared_from_this();
        ++_segment_manager->totals.pending_allocations;
        logger.trace("Previous allocation is blocking. Must wait.");
-        return _pending_ops.wait_for_pending().then_wrapped([me](auto f) { // TODO: do we need a finally?
+        return _pending_ops.wait_for_pending().then([me] { // TODO: do we need a finally?
            --me->_segment_manager->totals.pending_allocations;
-            return f.failed() ? me->_segment_manager->active_segment() : make_ready_future<sseg_ptr>(me);
+            return make_ready_future<sseg_ptr>(me);
        });
    }

@@ -793,13 +793,6 @@ public:
                });
            }
            return me->sync();
-        }).handle_exception([me, fp](auto p) {
-            // If we get an IO exception (which we assume this is)
-            // we should close the segment.
-            // TODO: should we also trunctate away any partial write
-            // we did?
-            me->_closed = true; // just mark segment as closed, no writes will be done.
-            return make_exception_future<sseg_ptr>(p);
        });
    }
    /**
@@ -1557,15 +1550,6 @@ db::commitlog::read_log_file(const sstring& filename, commit_load_reader_func ne
 subscription<temporary_buffer<char>, db::replay_position>
 db::commitlog::read_log_file(file f, commit_load_reader_func next, position_type off) {
    struct work {
-    private:
-        file_input_stream_options make_file_input_stream_options() {
-            file_input_stream_options fo;
-            fo.buffer_size = db::commitlog::segment::default_size;
-            fo.read_ahead = 10;
-            fo.io_priority_class = service::get_local_commitlog_priority();
-            return fo;
-        }
-    public:
        file f;
        stream<temporary_buffer<char>, replay_position> s;
        input_stream<char> fin;
@@ -1581,7 +1565,7 @@ db::commitlog::read_log_file(file f, commit_load_reader_func next, position_type
        bool header = true;

        work(file f, position_type o = 0)
-                : f(f), fin(make_file_input_stream(f, o, make_file_input_stream_options())), start_off(o) {
+                : f(f), fin(make_file_input_stream(f)), start_off(o) {
        }
        work(work&&) = default;

@@ -1764,8 +1748,6 @@ db::commitlog::read_log_file(file f, commit_load_reader_func next, position_type
                      throw segment_data_corruption_error("Data corruption", corrupt_size);
                  }
                });
-            }).finally([this] {
-                return fin.close();
            });
        }
    };
--- a/db/commitlog/commitlog_entry.cc
+++ b/db/commitlog/commitlog_entry.cc
@@ -73,7 +73,7 @@ void commitlog_entry_writer::compute_size() {
 }

 void commitlog_entry_writer::write(data_output& out) const {
-    seastar::simple_output_stream str(out.reserve(size()), size());
+    seastar::simple_output_stream str(out.reserve(size()));
    ser::serialize(str, get_entry());
 }

--- a/db/config.hh
+++ b/db/config.hh
@@ -399,8 +399,6 @@ public:
            "The IP address a node tells other nodes in the cluster to contact it by. It allows public and private address to be different. For example, use the broadcast_address parameter in topologies where not all nodes have access to other nodes by their private IP addresses.\n" \
            "If your Scylla cluster is deployed across multiple Amazon EC2 regions and you use the EC2MultiRegionSnitch , set the broadcast_address to public IP address of the node and the listen_address to the private IP."    \
    )   \
-    val(listen_on_broadcast_address, bool, false, Used, "When using multiple physical network interfaces, set this to true to listen on broadcast_address in addition to the listen_address, allowing nodes to communicate in both interfaces.  Ignore this property if the network configuration automatically routes between the public and private networks such as EC2." \
-        )\
    val(initial_token, sstring, /* N/A */, Used,     \
            "Used in the single-node-per-token architecture, where a node owns exactly one contiguous range in the ring space. Setting this property overrides num_tokens.\n"   \
            "If you not using vnodes or have num_tokens set it to 1 or unspecified (#num_tokens), you should always specify this parameter when setting up a production cluster for the first time and when adding capacity. For more information, see this parameter in the Cassandra 1.1 Node and Cluster Configuration documentation.\n" \
@@ -734,9 +732,6 @@ public:
    val(skip_wait_for_gossip_to_settle, int32_t, -1, Used, "An integer to configure the wait for gossip to settle. -1: wait normally, 0: do not wait at all, n: wait for at most n polls. Same as -Dcassandra.skip_wait_for_gossip_to_settle in cassandra.") \
    val(experimental, bool, false, Used, "Set to true to unlock experimental features.") \
    val(lsa_reclamation_step, size_t, 1, Used, "Minimum number of segments to reclaim in a single step") \
-    val(prometheus_port, uint16_t, 9180, Used, "Prometheus port, set to zero to disable") \
-    val(prometheus_address, sstring, "0.0.0.0", Used, "Prometheus listening address") \
-    val(abort_on_lsa_bad_alloc, bool, false, Used, "Abort when allocation in LSA region fails") \
    /* done! */

 #define _make_value_member(name, type, deflt, status, desc, ...)    \
--- a/db/schema_tables.cc
+++ b/db/schema_tables.cc
@@ -494,7 +494,7 @@ read_schema_partition_for_table(distributed<service::storage_proxy>& proxy, cons
    return query_partition_mutation(proxy.local(), std::move(schema), std::move(cmd), std::move(keyspace_key));
 }

-static semaphore the_merge_lock {1};
+static semaphore the_merge_lock;

 future<> merge_lock() {
    // ref:  #1088
@@ -1213,8 +1213,8 @@ schema_mutations make_table_mutations(schema_ptr table, api::timestamp_type time

    m.set_clustered_cell(ckey, "key_aliases", alias(table->partition_key_columns()), timestamp);
    m.set_clustered_cell(ckey, "column_aliases", alias(table->clustering_key_columns()), timestamp);
-    if (table->is_dense()) {
-        m.set_clustered_cell(ckey, "value_alias", table->regular_begin()->name_as_text(), timestamp);
+    if (table->compact_columns_count() == 1) {
+        m.set_clustered_cell(ckey, "value_alias", table->compact_column().name_as_text(), timestamp);
    } // null if none

    map_type_impl::mutation dropped_columns;
@@ -1593,6 +1593,7 @@ sstring serialize_kind(column_kind kind)
    case column_kind::clustering_key: return "clustering_key";
    case column_kind::static_column:  return "static";
    case column_kind::regular_column: return "regular";
+    case column_kind::compact_column: return "compact_value";
    default:                          throw std::invalid_argument("unknown column kind");
    }
 }
@@ -1606,8 +1607,8 @@ column_kind deserialize_kind(sstring kind) {
        return column_kind::static_column;
    } else if (kind == "regular") {
        return column_kind::regular_column;
-    } else if (kind == "compact_value") { // backward compatibility
-        return column_kind::regular_column;
+    } else if (kind == "compact_value") {
+        return column_kind::compact_column;
    } else {
        throw std::invalid_argument("unknown column kind: " + kind);
    }
--- a/db/size_estimates_recorder.cc
+++ b/db/size_estimates_recorder.cc
@@ -75,7 +75,7 @@ static std::vector<db::system_keyspace::range_estimates> estimates_for(const col
    // Each range defines both bounds.
    for (auto& range : local_ranges) {
        int64_t count{0};
-        utils::estimated_histogram hist{0};
+        sstables::estimated_histogram hist{0};
        unwrapped.clear();
        if (range.is_wrap_around(dht::ring_position_comparator(*cf.schema()))) {
            auto uw = range.unwrap();
@@ -85,10 +85,10 @@ static std::vector<db::system_keyspace::range_estimates> estimates_for(const col
            unwrapped.push_back(range);
        }
        for (auto&& uwr : unwrapped) {
-            for (auto&& sstable : cf.select_sstables(uwr)) {
-                count += sstable->get_estimated_key_count();
-                hist.merge(sstable->get_stats_metadata().estimated_row_size);
-            }
+          for (auto&& sstable : cf.select_sstables(uwr)) {
+            count += sstable->get_estimated_key_count();
+            hist.merge(sstable->get_stats_metadata().estimated_row_size);
+          }
        }
        estimates.emplace_back(db::system_keyspace::range_estimates{
                range.start()->value().token(),
--- a/dht/random_partitioner.cc
+++ b/dht/random_partitioner.cc
@@ -1,234 +0,0 @@
-/*
- * Copyright (C) 2016 ScyllaDB
- */
-
-/*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include "md5_hasher.hh"
-#include "random_partitioner.hh"
-#include "utils/class_registrator.hh"
-#include <boost/multiprecision/cpp_int.hpp>
-
-namespace dht {
-
-static const boost::multiprecision::uint128_t cppint_one{1};
-static const boost::multiprecision::uint128_t cppint127_max = cppint_one << 127;
-
-// Convert token's byte array to integer value.
-static boost::multiprecision::uint128_t token_to_cppint(const token& t) {
-    boost::multiprecision::uint128_t ret{0};
-    // If the token is minimum token, token._data will be empty,
-    // zero will be returned
-    for (uint8_t d : t._data) {
-        ret = (ret << 8) + d;
-    }
-    return ret;
-}
-
-// Store integer value for the token into token's byte array. The value must be within [0, 2 ^ 127].
-static token cppint_to_token(boost::multiprecision::uint128_t i) {
-    if (i == 0) {
-        return minimum_token();
-    }
-    if (i > cppint127_max) {
-        throw std::runtime_error(sprint("RandomPartitioner value %s must be within [0, 2 ^ 127]", i));
-    }
-    std::vector<int8_t> t;
-    while (i) {
-        static boost::multiprecision::uint128_t byte_mask = 0xFF;
-        auto data = (i & byte_mask).convert_to<uint8_t>();
-        t.push_back(data);
-        i >>= 8;
-    }
-    std::reverse(t.begin(), t.end());
-    return token(token::kind::key, managed_bytes(t.data(), t.size()));
-}
-
-// Convert a 16 bytes long raw byte array to token. Byte 0 is the most significant byte.
-static token bytes_to_token(bytes digest) {
-    if (digest.size() != 16) {
-        throw std::runtime_error(sprint("RandomPartitioner digest should be 16 bytes, it is %d", digest.size()));
-    }
-    // Translates the bytes array to signed integer i,
-    // abs(i) is stored in token's _data array.
-    if (digest[0] & 0x80) {
-        boost::multiprecision::uint128_t i = 0;
-        for (uint8_t d : digest) {
-            i = (i << 8) + d;
-        }
-        // i = abs(i) = ~i + 1
-        i = ~i + 1;
-        return cppint_to_token(i);
-    } else {
-        return token(token::kind::key, std::move(digest));
-    }
-}
-
-static float ratio_helper(boost::multiprecision::uint128_t a, boost::multiprecision::uint128_t b) {
-    boost::multiprecision::uint128_t val;
-    if (a >= b) {
-        val = a - b;
-    } else {
-        val = cppint127_max - (b - a);
-    }
-    return static_cast<float>(val.convert_to<double>() * 0x1p-127);
-}
-
-token random_partitioner::get_token(bytes data) {
-    md5_hasher h;
-    h.update(reinterpret_cast<const char*>(data.c_str()), data.size());
-    return bytes_to_token(h.finalize());
-}
-
-token random_partitioner::get_token(const schema& s, partition_key_view key) {
-    auto&& legacy = key.legacy_form(s);
-    return get_token(bytes(legacy.begin(), legacy.end()));
-}
-
-token random_partitioner::get_token(const sstables::key_view& key) {
-    auto v = bytes_view(key);
-    if (v.empty()) {
-        return minimum_token();
-    }
-    return get_token(bytes(v.begin(), v.end()));
-}
-
-int random_partitioner::tri_compare(const token& t1, const token& t2) {
-    auto l1 = token_to_cppint(t1);
-    auto l2 = token_to_cppint(t2);
-
-    if (l1 == l2) {
-        return 0;
-    } else {
-        return l1 < l2 ? -1 : 1;
-    }
-}
-
-token random_partitioner::get_random_token() {
-    boost::multiprecision::uint128_t i = dht::get_random_number<uint64_t>();
-    i = (i << 64) + dht::get_random_number<uint64_t>();
-    if (i > cppint127_max) {
-        i = ~i + 1;
-    }
-    return cppint_to_token(i);
-}
-
-std::map<token, float> random_partitioner::describe_ownership(const std::vector<token>& sorted_tokens) {
-    std::map<token, float> ownerships;
-    auto i = sorted_tokens.begin();
-
-    // 0-case
-    if (i == sorted_tokens.end()) {
-        throw runtime_exception("No nodes present in the cluster. Has this node finished starting up?");
-    }
-    // 1-case
-    if (sorted_tokens.size() == 1) {
-        ownerships[sorted_tokens[0]] = 1.0;
-    // n-case
-    } else {
-        const token& start = sorted_tokens[0];
-        auto ti = token_to_cppint(start);  // The first token and its value
-        auto cppint_start = ti;
-        auto tim1 = ti; // The last token and its value (after loop)
-        for (i++; i != sorted_tokens.end(); i++) {
-            ti = token_to_cppint(*i); // The next token and its value
-            ownerships[*i]= ratio_helper(ti, tim1);  // save (T(i) -> %age)
-            tim1 = ti;
-        }
-
-        // The start token's range extends backward to the last token, which is why both were saved above.
-        ownerships[start] = ratio_helper(cppint_start, ti);
-    }
-
-    return ownerships;
-}
-
-token random_partitioner::midpoint(const token& t1, const token& t2) const {
-    unsigned sigbytes = std::max(t1._data.size(), t2._data.size());
-    if (sigbytes == 0) {
-        // The midpoint of two minimum token is minimum token
-        return minimum_token();
-    }
-    static boost::multiprecision::uint128_t max = cppint_one << 127;
-    auto l1 = token_to_cppint(t1);
-    auto l2 = token_to_cppint(t2);
-    auto sum = l1 + l2;
-    boost::multiprecision::uint128_t mid;
-    // t1 <= t2 is the same as l1 <= l2
-    if (l1 <= l2) {
-        mid = sum / 2;
-    } else {
-        mid = (sum / 2 + max / 2) % max;
-    }
-    return cppint_to_token(mid);
-}
-
-sstring random_partitioner::to_sstring(const dht::token& t) const {
-    if (t._kind == dht::token::kind::before_all_keys) {
-        return sstring();
-    } else {
-        return token_to_cppint(t).str();
-    }
-}
-
-dht::token random_partitioner::from_sstring(const sstring& t) const {
-    if (t.empty()) {
-        return minimum_token();
-    } else {
-        boost::multiprecision::uint128_t x(t.c_str());
-        return cppint_to_token(x);
-    }
-}
-
-unsigned random_partitioner::shard_of(const token& t) const {
-    switch (t._kind) {
-        case token::kind::before_all_keys:
-            return 0;
-        case token::kind::after_all_keys:
-            return smp::count - 1;
-        case token::kind::key:
-            auto i = (boost::multiprecision::uint256_t(token_to_cppint(t)) * smp::count) >> 127;
-            // token can be [0, 2^127], make sure smp be [0, smp::count)
-            auto smp = i.convert_to<unsigned>();
-            if (smp >= smp::count) {
-                return smp::count - 1;
-            }
-            return smp;
-    }
-    assert(0);
-}
-
-bytes random_partitioner::token_to_bytes(const token& t) const {
-    static const bytes zero_byte(1, int8_t(0x00));
-    if (t.is_minimum() || t._data.empty()) {
-        return zero_byte;
-    }
-    auto data = bytes(t._data.begin(), t._data.end());
-    if (t._data[0] & 0x80) {
-        // Prepend 0x00 to the byte array to mimic BigInteger.toByteArray's
-        // byte array representation which has a sign bit.
-        return zero_byte + data;
-    }
-    return data;
-}
-
-using registry = class_registrator<i_partitioner, random_partitioner>;
-static registry registrator("org.apache.cassandra.dht.RandomPartitioner");
-static registry registrator_short_name("RandomPartitioner");
-
-}
--- a/dht/random_partitioner.hh
+++ b/dht/random_partitioner.hh
@@ -1,50 +0,0 @@
-/*
- * Copyright (C) 2016 ScyllaDB
- */
-
-/*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#pragma once
-
-#include "i_partitioner.hh"
-#include "bytes.hh"
-
-#include "sstables/key.hh"
-
-namespace dht {
-
-class random_partitioner final : public i_partitioner {
-public:
-    virtual const sstring name() { return "org.apache.cassandra.dht.RandomPartitioner"; }
-    virtual token get_token(const schema& s, partition_key_view key) override;
-    virtual token get_token(const sstables::key_view& key) override;
-    virtual token get_random_token() override;
-    virtual bool preserves_order() override { return false; }
-    virtual std::map<token, float> describe_ownership(const std::vector<token>& sorted_tokens) override;
-    virtual data_type get_token_validator() override { return varint_type; }
-    virtual bytes token_to_bytes(const token& t) const override;
-    virtual int tri_compare(const token& t1, const token& t2) override;
-    virtual token midpoint(const token& t1, const token& t2) const;
-    virtual sstring to_sstring(const dht::token& t) const override;
-    virtual dht::token from_sstring(const sstring& t) const override;
-    virtual unsigned shard_of(const token& t) const override;
-private:
-    token get_token(bytes data);
-};
-
-}
--- a/dist/ami/build_ami.sh
+++ b/dist/ami/build_ami.sh
@@ -33,7 +33,7 @@ done
 . /etc/os-release
 case "$ID" in
    "centos")
-        AMI=ami-4e1d5b59
+        AMI=ami-f3102499
        REGION=us-east-1
        SSH_USERNAME=centos
        ;;
--- a/dist/common/sbin/node_exporter_install
+++ b/dist/common/sbin/node_exporter_install
@@ -1 +0,0 @@
-/usr/lib/scylla/node_exporter_install
--- a/dist/common/scripts/node_exporter_install
+++ b/dist/common/scripts/node_exporter_install
@@ -1,56 +0,0 @@
-#!/bin/sh
-#
-# Copyright 2016 ScyllaDB
-#
-# This file is part of Scylla.
-#
-# Scylla is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Affero General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# Scylla is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
-
-if [ "`id -u`" -ne 0 ]; then
-    echo "Requires root permission."
-    exit 1
-fi
-
-if [ -f /usr/bin/node_exporter ]; then
-    echo "node_exporter already installed"
-    exit 1
-fi
-
-version=0.12.0
-dir=/usr/lib/scylla/Prometheus/node_exporter
-mkdir -p $dir
-cd $dir
-wget https://github.com/prometheus/node_exporter/releases/download/$version/node_exporter-$version.linux-amd64.tar.gz -O $dir/node_exporter-$version.linux-amd64.tar.gz
-tar -xvzf $dir/node_exporter-$version.linux-amd64.tar.gz
-rm $dir/node_exporter-$version.linux-amd64.tar.gz
-ln -s $dir/node_exporter-$version.linux-amd64/node_exporter /usr/bin
-. /etc/os-release
-
- if [ "$(cat /proc/1/comm)" = "systemd" ]; then
-    systemctl enable node-exporter
-    systemctl start node-exporter
-else
-    cat <<EOT >> /etc/init/node_exporter.conf
-# Run node_exporter
-
-start on startup
-
-script
-   /usr/bin/node_exporter
-end script
-EOT
-    service node_exporter start
-fi
-
-printf "node_exporter successfully installed\n"
--- a/dist/common/scripts/scylla_bootparam_setup
+++ b/dist/common/scripts/scylla_bootparam_setup
@@ -22,11 +22,7 @@ while [ $# -gt 0 ]; do
 done

 . /etc/os-release
-if [ "$ID" = "ubuntu" ]; then
-    . /etc/default/scylla-server
-else
-    . /etc/sysconfig/scylla-server
-fi
+
 if [ ! -f /etc/default/grub ]; then
    echo "Unsupported bootloader"
    exit 1
@@ -44,7 +40,7 @@ else
 fi
 mv /tmp/grub /etc/default/grub
 if [ "$ID" = "ubuntu" ] || [ "$ID" = "debian" ]; then
-    update-grub
+    grub-mkconfig -o /boot/grub/grub.cfg
 else
    grub2-mkconfig -o /boot/grub2/grub.cfg
 fi
--- a/dist/common/scripts/scylla_io_setup
+++ b/dist/common/scripts/scylla_io_setup
@@ -44,8 +44,8 @@ output_to_user()
 }

 if [ `is_developer_mode` -eq 0 ]; then
-    SMP=`echo $CPUSET|grep smp|sed -e "s/^.*smp\(\s\+\|=\)\([^ ]*\).*$/\2/"`
-    CPUSET=`echo $CPUSET|grep cpuset|sed -e "s/^.*\(--cpuset\(\s\+\|=\)[^ ]*\).*$/\1/"`
+    SMP=`echo $CPUSET|grep smp|sed -e "s/^.*smp\(\s\+\|=\)\([0-9]*\).*$/\2/"`
+    CPUSET=`echo $CPUSET|grep cpuset|sed -e "s/^.*\(--cpuset\(\s\+\|=\)[0-9\-]*\).*$/\1/"`
    if [ $AMI_OPT -eq 1 ]; then
        NR_CPU=`cat /proc/cpuinfo |grep processor|wc -l`
        NR_DISKS=`lsblk --list --nodeps --noheadings | grep -v xvda | grep xvd | wc -l`
--- a/dist/common/scripts/scylla_kernel_check
+++ b/dist/common/scripts/scylla_kernel_check
@@ -30,6 +30,6 @@ else
    else
        echo "Please upgrade to a newer kernel version."
    fi
-    echo " see http://www.scylladb.com/kb/kb-fs-not-qualified-aio/ for details"
+    echo " see http://docs.scylladb.com/kb/kb-fs-not-qualified-aio/ for details"
 fi
 exit $RET
--- a/dist/common/scripts/scylla_setup
+++ b/dist/common/scripts/scylla_setup
@@ -29,7 +29,6 @@ print_usage() {
    echo "  --no-sysconfig-setup		skip sysconfig setup"
    echo "  --no-io-setup		skip IO configuration setup"
    echo "  --no-version-check		skip daily version check"
-    echo "  --no-node-exporter        do not install the node exporter"
    exit 1
 }

@@ -76,21 +75,6 @@ verify_package() {
    fi
 }

-get_unused_disks() {
-    blkid -c /dev/null|cut -f 1 -d ' '|sed s/://g|grep -v loop|while read dev
-    do
-        count_raw=$(grep $dev /proc/mounts|wc -l)
-        count_pvs=0
-        if [ -f /usr/sbin/pvs ]; then
-            count_pvs=$(pvs|grep $dev|wc -l)
-        fi
-        count_swap=$(swapon -s |grep `realpath $dev`|wc -l)
-        if [ $count_raw -eq 0 -a $count_pvs -eq 0 -a $count_swap -eq 0 ]; then
-            echo -n "$dev "
-        fi
-    done
-}
-
 AMI=0
 SET_NIC=0
 DEV_MODE=0
@@ -105,7 +89,6 @@ RAID_SETUP=1
 COREDUMP_SETUP=1
 SYSCONFIG_SETUP=1
 IO_SETUP=1
-NODE_EXPORTER=1

 if [ $# -ne 0 ]; then
    INTERACTIVE=0
@@ -183,10 +166,6 @@ while [ $# -gt 0 ]; do
            IO_SETUP=0
            shift 1
            ;;
-        "--no-node-exporter")
-            NODE_EXPORTER=0
-            shift 1
-            ;;
        "-h" | "--help")
            print_usage
            shift 1
@@ -226,32 +205,31 @@ fi
 if [ $INTERACTIVE -eq 1 ]; then
    interactive_ask_service "Do you want to enable ScyllaDB services?" "Answer yes to automatically start Scylla when the node boots; answer no to skip this step." "yes" &&:
    ENABLE_SERVICE=$?
+    if [ $ENABLE_SERVICE -eq 1 ] && [ ! -f /etc/scylla.d/housekeeping.cfg ]; then
+        interactive_ask_service "Do you want to enable ScyllaDB version check?" "Answer yes to automatically start Scylla-housekeeping that check for newer version, when the node boots; answer no to skip this step." "yes" &&:
+        ENABLE_CHECK_VERSION=$?
+    fi
 fi

 if [ $ENABLE_SERVICE -eq 1 ]; then
    if [ "$ID" = "fedora" ] || [ "$ID" = "centos" ] || [ "$ID" = "ubuntu" -a "$VERSION_ID" != "14.04" ]; then
        systemctl enable scylla-server.service
        systemctl enable collectd.service
-    fi
-    if [ $INTERACTIVE -eq 1 ] && [ ! -f /etc/scylla.d/housekeeping.cfg ]; then
-        interactive_ask_service "Do you want to enable ScyllaDB version check?" "Answer yes to automatically start Scylla-housekeeping that check for newer version, when the node boots; answer no to skip this step." "yes" &&:
-        ENABLE_CHECK_VERSION=$?
+        if [ $ENABLE_CHECK_VERSION -eq 1 ]; then
+            systemctl unmask scylla-housekeeping.timer
+        else
+            systemctl mask scylla-housekeeping.timer
+            systemctl stop scylla-housekeeping.timer || true
+        fi
    fi
    if [ $ENABLE_CHECK_VERSION -eq 1 ]; then
        if [ ! -f /etc/scylla.d/housekeeping.cfg ]; then
           printf "[housekeeping]\ncheck-version: True\n" > /etc/scylla.d/housekeeping.cfg
        fi
-        if [ "$ID" = "fedora" ] || [ "$ID" = "centos" ] || [ "$ID" = "ubuntu" -a "$VERSION_ID" != "14.04" ]; then
-            systemctl unmask scylla-housekeeping.timer
-        fi
    else
        if [ ! -f /etc/scylla.d/housekeeping.cfg ]; then
           printf "[housekeeping]\ncheck-version: False\n" > /etc/scylla.d/housekeeping.cfg
        fi
-        if [ "$ID" = "fedora" ] || [ "$ID" = "centos" ] || [ "$ID" = "ubuntu" -a "$VERSION_ID" != "14.04" ]; then
-            systemctl mask scylla-housekeeping.timer
-            systemctl stop scylla-housekeeping.timer || true
-        fi
    fi
 fi

@@ -287,15 +265,21 @@ if [ $INTERACTIVE -eq 1 ]; then
    interactive_ask_service "Do you want to setup RAID and XFS?" "It is recommended to use RAID0 and XFS for Scylla data. If you select yes, you will be prompt to choose which disks to use for Scylla data. Selected disks will be formatted in the process." "yes" &&:
    RAID_SETUP=$?
    if [ $RAID_SETUP -eq 1 ]; then
-        DEVS=`get_unused_disks`
-        if [ "$DEVS" = "" ]; then
-            echo "No free disks detected, abort RAID/XFS setup."
-            echo
-            RAID_SETUP=0
-        else
-            echo "Please select disks from the following list: $DEVS"
-        fi
-        while [ "$DEVS" != "" ]; do
+        echo "Please select disks from following list: "
+        while true; do
+            blkid -c /dev/null|cut -f 1 -d ' '|sed s/://g|grep -v loop|while read dev
+            do
+                count_raw=$(grep $dev /proc/mounts|wc -l)
+                count_pvs=0
+                if [ -f /usr/sbin/pvs ]; then
+                    count_pvs=$(pvs|grep $dev|wc -l)
+                fi
+                count_swap=$(swapon --show |grep `realpath $dev`|wc -l)
+                if [ $count_raw -eq 0 -a $count_pvs -eq 0 -a $count_swap -eq 0 ]; then
+                    echo -n "$dev "
+                fi
+            done
+
            echo "type 'done' to finish selection. selected: $DISKS"
            echo -n "> "
            read dsk
@@ -374,20 +358,10 @@ if [ $INTERACTIVE -eq 1 ]; then
    interactive_ask_service "Do you want to setup IO configuration?" "Answer yes to let iotune study what are your disks IO profile and adapt Scylla to it. Answer no to skip this action." "yes" &&:
    IO_SETUP=$?
 fi
-
 if [ $IO_SETUP -eq 1 ]; then
    /usr/lib/scylla/scylla_io_setup
 fi

-if [ $INTERACTIVE -eq 1 ]; then
-    interactive_ask_service "Do you want to install node exporter, that exports prometheus data from the node?" "Answer yes to install it; answer no to skip this installation." "yes" &&:
-    NODE_EXPORTER=$?
-fi
-
-if [ $NODE_EXPORTER -eq 1 ]; then
-    /usr/lib/scylla/node_exporter_install
-fi
-
 if [ $DEV_MODE -eq 1 ]; then
    /usr/lib/scylla/scylla_dev_mode_setup --developer-mode 1
 fi
--- a/dist/common/systemd/node-exporter.service
+++ b/dist/common/systemd/node-exporter.service
@@ -1,9 +0,0 @@
-[Unit]
-Description=Node Exporter
-
-[Service]
-Type=simple
-ExecStart=/usr/bin/node_exporter
-
-[Install]
-WantedBy=multi-user.target
--- a/dist/common/systemd/scylla-housekeeping.timer
+++ b/dist/common/systemd/scylla-housekeeping.timer
@@ -4,8 +4,7 @@ After=scylla-server.service
 BindsTo=scylla-server.service

 [Timer]
-# set OnActiveSec to 3 to safely avoid issues/1846
-OnActiveSec=3
+OnBootSec=0
 OnUnitActiveSec=1d

 [Install]
--- a/dist/common/systemd/scylla-server.service.in
+++ b/dist/common/systemd/scylla-server.service.in
@@ -20,7 +20,6 @@ TimeoutStartSec=900
 KillMode=process
 Restart=on-abnormal
 User=scylla
-OOMScoreAdjust=-950

 [Install]
 WantedBy=multi-user.target
--- a/dist/docker/redhat/Dockerfile
+++ b/dist/docker/redhat/Dockerfile
@@ -7,7 +7,7 @@ ENV container docker
 VOLUME [ "/sys/fs/cgroup" ]

 #install scylla
-RUN curl http://downloads.scylladb.com/rpm/centos/scylla-1.4.repo -o /etc/yum.repos.d/scylla.repo
+RUN curl http://downloads.scylladb.com/rpm/centos/scylla-1.3.repo -o /etc/yum.repos.d/scylla.repo
 RUN yum -y install epel-release
 RUN yum -y clean expire-cache
 RUN yum -y update
--- a/dist/docker/redhat/commandlineparser.py
+++ b/dist/docker/redhat/commandlineparser.py
@@ -9,7 +9,6 @@ def parse():
    parser.add_argument('--smp', default=None, help="e.g --smp 2 to use two CPUs")
    parser.add_argument('--memory', default=None, help="e.g. --memory 1G to use 1 GB of RAM")
    parser.add_argument('--overprovisioned', default='0', choices=['0', '1'], help="run in overprovisioned environment")
-    parser.add_argument('--listen-address', default=None, dest='listenAddress')
    parser.add_argument('--broadcast-address', default=None, dest='broadcastAddress')
    parser.add_argument('--broadcast-rpc-address', default=None, dest='broadcastRpcAddress')
    return parser.parse_args()
--- a/dist/docker/redhat/scyllasetup.py
+++ b/dist/docker/redhat/scyllasetup.py
@@ -8,7 +8,6 @@ class ScyllaSetup:
        self._developerMode = arguments.developerMode
        self._seeds = arguments.seeds
        self._cpuset = arguments.cpuset
-        self._listenAddress = arguments.listenAddress
        self._broadcastAddress = arguments.broadcastAddress
        self._broadcastRpcAddress = arguments.broadcastRpcAddress
        self._smp = arguments.smp
@@ -32,15 +31,14 @@ class ScyllaSetup:

    def scyllaYAML(self):
        configuration = yaml.load(open('/etc/scylla/scylla.yaml'))
-        if self._listenAddress is None:
-            self._listenAddress = subprocess.check_output(['hostname', '-i']).decode('ascii').strip()
-        configuration['listen_address'] = self._listenAddress
-        configuration['rpc_address'] = self._listenAddress
+        IP = subprocess.check_output(['hostname', '-i']).decode('ascii').strip()
+        configuration['listen_address'] = IP
+        configuration['rpc_address'] = IP
        if self._seeds is None:
            if self._broadcastAddress is not None:
                self._seeds = self._broadcastAddress
            else:
-                self._seeds = self._listenAddress
+                self._seeds = IP
        configuration['seed_provider'] = [
                {'class_name': 'org.apache.cassandra.locator.SimpleSeedProvider',
                 'parameters': [{'seeds': self._seeds}]}
--- a/dist/redhat/build_rpm.sh
+++ b/dist/redhat/build_rpm.sh
@@ -50,9 +50,6 @@ fi
 if [ ! -f /usr/bin/git ]; then
    sudo yum -y install git
 fi
-if [ ! -f /usr/bin/rpmbuild ]; then
-    sudo yum -y install rpm-build
-fi
 mkdir -p $RPMBUILD/{BUILD,BUILDROOT,RPMS,SOURCES,SPECS,SRPMS}
 if [ "$ID" = "centos" ]; then
    sudo yum install -y epel-release
--- a/dist/redhat/scylla.spec.in
+++ b/dist/redhat/scylla.spec.in
@@ -27,10 +27,10 @@ Group:          Applications/Databases
 Summary:        The Scylla database server
 License:        AGPLv3
 URL:            http://www.scylladb.com/
-BuildRequires:  libaio-devel libstdc++-devel cryptopp-devel hwloc-devel numactl-devel libpciaccess-devel libxml2-devel zlib-devel thrift-devel yaml-cpp-devel lz4-devel snappy-devel jsoncpp-devel systemd-devel xz-devel openssl-devel libcap-devel libselinux-devel libgcrypt-devel libgpg-error-devel elfutils-devel krb5-devel libcom_err-devel libattr-devel pcre-devel elfutils-libelf-devel bzip2-devel keyutils-libs-devel xfsprogs-devel make gnutls-devel systemd-devel lksctp-tools-devel protobuf-devel protobuf-compiler libunwind-devel
+BuildRequires:  libaio-devel libstdc++-devel cryptopp-devel hwloc-devel numactl-devel libpciaccess-devel libxml2-devel zlib-devel thrift-devel yaml-cpp-devel lz4-devel snappy-devel jsoncpp-devel systemd-devel xz-devel openssl-devel libcap-devel libselinux-devel libgcrypt-devel libgpg-error-devel elfutils-devel krb5-devel libcom_err-devel libattr-devel pcre-devel elfutils-libelf-devel bzip2-devel keyutils-libs-devel xfsprogs-devel make gnutls-devel systemd-devel lksctp-tools-devel
 %{?fedora:BuildRequires: boost-devel ninja-build ragel antlr3-tool antlr3-C++-devel python3 gcc-c++ libasan libubsan python3-pyparsing dnf-yum}
 %{?rhel:BuildRequires: scylla-libstdc++-static scylla-boost-devel scylla-ninja-build scylla-ragel scylla-antlr3-tool scylla-antlr3-C++-devel python34 scylla-gcc-c++ >= 5.1.1, python34-pyparsing}
-Requires:       scylla-conf systemd-libs hwloc collectd PyYAML python-urwid pciutils pyparsing python-requests curl bc util-linux
+Requires:       scylla-conf systemd-libs hwloc collectd PyYAML python-urwid pyparsing python-requests curl bc util-linux
 Conflicts:      abrt

 %description server
@@ -108,8 +108,8 @@ cp -r scylla-housekeeping $RPM_BUILD_ROOT%{_prefix}/lib/scylla/scylla-housekeepi
 cp -P dist/common/sbin/*  $RPM_BUILD_ROOT%{_sbindir}/

 %pre server
-getent group scylla || /usr/sbin/groupadd scylla 2> /dev/null || :
-getent passwd scylla || /usr/sbin/useradd -g scylla -s /sbin/nologin -r -d %{_sharedstatedir}/scylla scylla 2> /dev/null || :
+/usr/sbin/groupadd scylla 2> /dev/null || :
+/usr/sbin/useradd -g scylla -s /sbin/nologin -r -d %{_sharedstatedir}/scylla scylla 2> /dev/null || :

 %post server
 # Upgrade coredump settings
@@ -151,7 +151,6 @@ rm -rf $RPM_BUILD_ROOT
 %{_unitdir}/scylla-server.service
 %{_unitdir}/scylla-housekeeping.service
 %{_unitdir}/scylla-housekeeping.timer
-%{_unitdir}/node-exporter.service
 %{_bindir}/scylla
 %{_bindir}/iotune
 %{_bindir}/scyllatop
@@ -160,8 +159,6 @@ rm -rf $RPM_BUILD_ROOT
 %{_prefix}/lib/scylla/scylla_prepare
 %{_prefix}/lib/scylla/scylla_stop
 %{_prefix}/lib/scylla/scylla_setup
-%{_prefix}/lib/scylla/node_exporter_install
-%{_sbindir}/node_exporter_install
 %{_prefix}/lib/scylla/scylla_coredump_setup
 %{_prefix}/lib/scylla/scylla_raid_setup
 %{_prefix}/lib/scylla/scylla_sysconfig_setup
--- a/dist/ubuntu/build_deb.sh
+++ b/dist/ubuntu/build_deb.sh
@@ -78,13 +78,13 @@ cp dist/ubuntu/scylla-server.install.in debian/scylla-server.install
 if [ "$RELEASE" = "14.04" ]; then
    sed -i -e "s/@@DH_INSTALLINIT@@/--upstart-only/g" debian/rules
    sed -i -e "s/@@COMPILER@@/g++-5/g" debian/rules
-    sed -i -e "s/@@BUILD_DEPENDS@@/g++-5, libunwind8-dev/g" debian/control
+    sed -i -e "s/@@BUILD_DEPENDS@@/g++-5/g" debian/control
    sed -i -e "s#@@INSTALL@@#dist/ubuntu/sudoers.d/scylla etc/sudoers.d#g" debian/scylla-server.install
    sed -i -e "s#@@HKDOTTIMER@@##g" debian/scylla-server.install
 else
    sed -i -e "s/@@DH_INSTALLINIT@@//g" debian/rules
    sed -i -e "s/@@COMPILER@@/g++/g" debian/rules
-    sed -i -e "s/@@BUILD_DEPENDS@@/libsystemd-dev, g++, libunwind-dev/g" debian/control
+    sed -i -e "s/@@BUILD_DEPENDS@@/libsystemd-dev, g++/g" debian/control
    sed -i -e "s#@@INSTALL@@##g" debian/scylla-server.install
    sed -i -e "s#@@HKDOTTIMER@@#dist/common/systemd/scylla-housekeeping.timer /lib/systemd/system#g" debian/scylla-server.install
 fi
@@ -102,7 +102,6 @@ fi
 cp dist/common/systemd/scylla-server.service.in debian/scylla-server.service
 sed -i -e "s#@@SYSCONFDIR@@#/etc/default#g" debian/scylla-server.service
 cp dist/common/systemd/scylla-housekeeping.service debian/scylla-server.scylla-housekeeping.service
-cp dist/common/systemd/node-exporter.service debian/scylla-server.node-exporter.service

 if [ "$RELEASE" = "14.04" ] && [ $REBUILD -eq 0 ]; then
    if [ ! -f /etc/apt/sources.list.d/scylla-3rdparty-trusty.list ]; then
--- a/dist/ubuntu/control.in
+++ b/dist/ubuntu/control.in
@@ -4,7 +4,7 @@ Homepage: http://scylladb.com
 Section: database
 Priority: optional
 Standards-Version: 3.9.5
-Build-Depends: debhelper (>= 9), libyaml-cpp-dev, liblz4-dev, libsnappy-dev, libcrypto++-dev, libjsoncpp-dev, libaio-dev, libthrift-dev, thrift-compiler, antlr3, antlr3-c++-dev, ragel, ninja-build, git, libboost-program-options1.55-dev | libboost-program-options-dev, libboost-filesystem1.55-dev | libboost-filesystem-dev, libboost-system1.55-dev | libboost-system-dev, libboost-thread1.55-dev | libboost-thread-dev, libboost-test1.55-dev | libboost-test-dev, libgnutls28-dev, libhwloc-dev, libnuma-dev, libpciaccess-dev, xfslibs-dev, python3-pyparsing, libxml2-dev, libsctp-dev, python-urwid, pciutils, libprotobuf-dev, protobuf-compiler, @@BUILD_DEPENDS@@
+Build-Depends: debhelper (>= 9), libyaml-cpp-dev, liblz4-dev, libsnappy-dev, libcrypto++-dev, libjsoncpp-dev, libaio-dev, libthrift-dev, thrift-compiler, antlr3, antlr3-c++-dev, ragel, ninja-build, git, libboost-program-options1.55-dev | libboost-program-options-dev, libboost-filesystem1.55-dev | libboost-filesystem-dev, libboost-system1.55-dev | libboost-system-dev, libboost-thread1.55-dev | libboost-thread-dev, libboost-test1.55-dev | libboost-test-dev, libgnutls28-dev, libhwloc-dev, libnuma-dev, libpciaccess-dev, xfslibs-dev, python3-pyparsing, libxml2-dev, libsctp-dev, python-urwid, @@BUILD_DEPENDS@@

 Package: scylla-conf
 Architecture: any
@@ -16,7 +16,7 @@ Conflicts: scylla-server (<< 1.1)

 Package: scylla-server
 Architecture: amd64
-Depends: ${shlibs:Depends}, ${misc:Depends}, adduser, hwloc-nox, collectd, scylla-conf, python-yaml, python-urwid, python-requests, curl, bc, util-linux, realpath, @@DEPENDS@@
+Depends: ${shlibs:Depends}, ${misc:Depends}, adduser, hwloc-nox, collectd, scylla-conf, python-yaml, python-urwid, python-requests, curl, bc, util-linux, @@DEPENDS@@
 Description: Scylla database server binaries 
 Scylla is a highly scalable, eventually consistent, distributed,
 partitioned row DB.
--- a/dist/ubuntu/rules.in
+++ b/dist/ubuntu/rules.in
@@ -12,7 +12,6 @@ override_dh_auto_clean:
 override_dh_installinit:
 	dh_installinit --no-start @@DH_INSTALLINIT@@
 	dh_installinit --no-start --name scylla-housekeeping @@DH_INSTALLINIT@@
-	dh_installinit --no-start --name node-exporter @@DH_INSTALLINIT@@

 override_dh_strip:
 	dh_strip --dbg-package=scylla-server-dbg
--- a/docs/docker-hub.md
+++ b/docs/docker-hub.md
@@ -97,18 +97,6 @@ For example, to configure Scylla to run with two seed nodes `192.168.0.100` and
 $ docker run --name some-scylla -d scylladb/scylla --seeds 192.168.0.100,192.168.0.200
 ```

-### `--listen-address ADDR`
-
-The `--listen-address` command line option configures the IP address the Scylla instance listens for client connections.
-
-For example, to configure Scylla to use listen address `10.0.0.5`:
-
-```console
-$ docker run --name some-scylla -d scylladb/scylla --listen-address 10.0.0.5
-```
-
-**Since: 1.4**
-
 ### `--broadcast-address ADDR`

 The `--broadcast-address` command line option configures the IP address the Scylla instance tells other Scylla nodes in the cluster to connect to.
--- a/frozen_mutation.cc
+++ b/frozen_mutation.cc
@@ -46,19 +46,18 @@

 using namespace db;

-ser::mutation_view frozen_mutation::mutation_view() const {
-    auto in = ser::as_input_stream(_bytes);
-    return ser::deserialize(in, boost::type<ser::mutation_view>());
-}
-
 utils::UUID
 frozen_mutation::column_family_id() const {
-    return mutation_view().table_id();
+    auto in = ser::as_input_stream(_bytes);
+    auto mv = ser::deserialize(in, boost::type<ser::mutation_view>());
+    return mv.table_id();
 }

 utils::UUID
 frozen_mutation::schema_version() const {
-    return mutation_view().schema_version();
+    auto in = ser::as_input_stream(_bytes);
+    auto mv = ser::deserialize(in, boost::type<ser::mutation_view>());
+    return mv.schema_version();
 }

 partition_key_view
@@ -72,36 +71,37 @@ frozen_mutation::decorated_key(const schema& s) const {
 }

 partition_key frozen_mutation::deserialize_key() const {
-    return mutation_view().key();
+    auto in = ser::as_input_stream(_bytes);
+    auto mv = ser::deserialize(in, boost::type<ser::mutation_view>());
+    return mv.key();
 }

-frozen_mutation::frozen_mutation(bytes_ostream&& b)
+frozen_mutation::frozen_mutation(bytes&& b)
    : _bytes(std::move(b))
    , _pk(deserialize_key())
-{
-    _bytes.reduce_chunk_count();
-}
+{ }

-frozen_mutation::frozen_mutation(bytes_ostream&& b, partition_key pk)
-    : _bytes(std::move(b))
+frozen_mutation::frozen_mutation(bytes_view bv, partition_key pk)
+    : _bytes(bytes(bv.begin(), bv.end()))
    , _pk(std::move(pk))
-{
-    _bytes.reduce_chunk_count();
-}
+{ }

 frozen_mutation::frozen_mutation(const mutation& m)
    : _pk(m.key())
 {
    mutation_partition_serializer part_ser(*m.schema(), m.partition());

-    ser::writer_of_mutation wom(_bytes);
+    bytes_ostream out;
+    ser::writer_of_mutation wom(out);
    std::move(wom).write_table_id(m.schema()->id())
                  .write_schema_version(m.schema()->version())
                  .write_key(m.key())
                  .partition([&] (auto wr) {
                      part_ser.write(std::move(wr));
                  }).end_mutation();
-    _bytes.reduce_chunk_count();
+
+    auto bv = out.linearize();
+    _bytes = bytes(bv.begin(), bv.end()); // FIXME: avoid copy
 }

 mutation
@@ -117,7 +117,9 @@ frozen_mutation freeze(const mutation& m) {
 }

 mutation_partition_view frozen_mutation::partition() const {
-    return mutation_partition_view::from_view(mutation_view().partition());
+    auto in = ser::as_input_stream(_bytes);
+    auto mv = ser::deserialize(in, boost::type<ser::mutation_view>());
+    return mutation_partition_view::from_view(mv.partition());
 }

 std::ostream& operator<<(std::ostream& out, const frozen_mutation::printer& pr) {
@@ -166,7 +168,7 @@ frozen_mutation streamed_mutation_freezer::consume_end_of_stream() {
                                                   std::move(_sr), std::move(_rts),
                                                   std::move(_crs), std::move(wr));
                  }).end_mutation();
-    return frozen_mutation(std::move(out), std::move(_key));
+    return frozen_mutation(out.linearize(), std::move(_key));
 }

 future<frozen_mutation> freeze(streamed_mutation sm) {
@@ -206,7 +208,7 @@ private:
        _rts.clear();
        _crs.clear();
        _dirty_size = 0;
-        return _consumer(frozen_mutation(std::move(out), _key), _fragmented);
+        return _consumer(frozen_mutation(out.linearize(), _key), _fragmented);
    }

    future<stop_iteration> maybe_flush() {
--- a/frozen_mutation.hh
+++ b/frozen_mutation.hh
@@ -29,10 +29,6 @@
 class mutation;
 class streamed_mutation;

-namespace ser {
-class mutation_view;
-}
-
 // Immutable, compact form of mutation.
 //
 // This form is primarily destined to be sent over the network channel.
@@ -45,20 +41,20 @@ class mutation_view;
 //
 class frozen_mutation final {
 private:
-    bytes_ostream _bytes;
+    bytes _bytes;
    partition_key _pk;
 private:
    partition_key deserialize_key() const;
-    ser::mutation_view mutation_view() const;
 public:
    frozen_mutation(const mutation& m);
-    explicit frozen_mutation(bytes_ostream&& b);
-    frozen_mutation(bytes_ostream&& b, partition_key key);
+    explicit frozen_mutation(bytes&& b);
+    frozen_mutation(bytes_view bv, partition_key key);
    frozen_mutation(frozen_mutation&& m) = default;
    frozen_mutation(const frozen_mutation& m) = default;
    frozen_mutation& operator=(frozen_mutation&&) = default;
    frozen_mutation& operator=(const frozen_mutation&) = default;
-    const bytes_ostream& representation() const { return _bytes; }
+
+    bytes_view representation() const { return _bytes; }
    utils::UUID column_family_id() const;
    utils::UUID schema_version() const; // FIXME: Should replace column_family_id()
    partition_key_view key(const schema& s) const;
--- a/gms/gossiper.cc
+++ b/gms/gossiper.cc
@@ -192,12 +192,10 @@ future<> gossiper::handle_syn_msg(msg_addr from, gossip_digest_syn syn_msg) {

    /* If the message is from a different cluster throw it away. */
    if (syn_msg.cluster_id() != get_cluster_name()) {
-        logger.warn("ClusterName mismatch from {} {}!={}", from.addr, syn_msg.cluster_id(), get_cluster_name());
        return make_ready_future<>();
    }

    if (syn_msg.partioner() != "" && syn_msg.partioner() != get_partitioner_name()) {
-        logger.warn("Partitioner mismatch from {} {}!={}", from.addr, syn_msg.partioner(), get_partitioner_name());
        return make_ready_future<>();
    }

@@ -747,10 +745,10 @@ void gossiper::convict(inet_address endpoint, double phi) {
        return;
    }
    auto& state = it->second;
+    logger.debug("Convicting {} with status {} - alive {}", endpoint, get_gossip_status(state), state.is_alive());
    if (!state.is_alive()) {
        return;
    }
-    logger.debug("Convicting {} with status {} - alive {}", endpoint, get_gossip_status(state), state.is_alive());

    logger.trace("convict ep={}, phi={}, is_alive={}, is_dead_state={}", endpoint, phi, state.is_alive(), is_dead_state(state));
    if (is_shutdown(endpoint)) {
@@ -979,10 +977,7 @@ future<> gossiper::do_gossip_to_unreachable_member(gossip_digest_syn message) {
        if (rand_dbl < prob) {
            std::set<inet_address> addrs;
            for (auto&& x : _unreachable_endpoints) {
-                // Ignore the node which is decommissioned
-                if (get_gossip_status(x.first) != sstring(versioned_value::STATUS_LEFT)) {
-                    addrs.insert(x.first);
-                }
+                addrs.insert(x.first);
            }
            logger.trace("do_gossip_to_unreachable_member: live_endpoint nr={} unreachable_endpoints nr={}",
                live_endpoint_count, unreachable_endpoint_count);
@@ -1164,7 +1159,7 @@ void gossiper::real_mark_alive(inet_address addr, endpoint_state& local_state) {
    _expire_time_endpoint_map.erase(addr);
    logger.debug("removing expire time for endpoint : {}", addr);
    if (!_in_shadow_round) {
-        logger.info("InetAddress {} is now UP, status = {}", addr, get_gossip_status(local_state));
+        logger.info("InetAddress {} is now UP", addr);
    }

    _subscribers.for_each([addr, local_state] (auto& subscriber) {
@@ -1180,7 +1175,7 @@ void gossiper::mark_dead(inet_address addr, endpoint_state& local_state) {
    _live_endpoints.erase(addr);
    _live_endpoints_just_added.remove(addr);
    _unreachable_endpoints[addr] = now();
-    logger.info("InetAddress {} is now DOWN, status = {}", addr, get_gossip_status(local_state));
+    logger.info("InetAddress {} is now DOWN", addr);
    _subscribers.for_each([addr, local_state] (auto& subscriber) {
        subscriber->on_dead(addr, local_state);
        logger.trace("Notified {}", subscriber.get());
@@ -1195,9 +1190,9 @@ void gossiper::handle_major_state_change(inet_address ep, const endpoint_state&
    }
    if (!is_dead_state(eps) && !_in_shadow_round) {
        if (endpoint_state_map.count(ep))  {
-            logger.debug("Node {} has restarted, now UP, status = {}", ep, get_gossip_status(eps));
+            logger.info("Node {} has restarted, now UP", ep);
        } else {
-            logger.debug("Node {} is now part of the cluster, status = {}", ep, get_gossip_status(eps));
+            logger.info("Node {} is now part of the cluster", ep);
        }
    }
    logger.trace("Adding endpoint state for {}, status = {}", ep, get_gossip_status(eps));
@@ -1590,14 +1585,7 @@ bool gossiper::is_in_shadow_round() {
 }

 void gossiper::add_expire_time_for_endpoint(inet_address endpoint, clk::time_point expire_time) {
-    char expire_time_buf[100];
-    auto expire_time_tm = std::chrono::system_clock::to_time_t(expire_time);
-    auto now_ = now();
-    strftime(expire_time_buf, sizeof(expire_time_buf), "%Y-%m-%d %T", std::localtime(&expire_time_tm));
-    auto diff = std::chrono::duration_cast<std::chrono::seconds>(expire_time - now_).count();
-    logger.info("Node {} will be removed from gossip at [{}]: (expire = {}, now = {}, diff = {} seconds)",
-            endpoint, expire_time_buf, expire_time.time_since_epoch().count(),
-            now_.time_since_epoch().count(), diff);
+    logger.debug("adding expire time for endpoint : {} ({})", endpoint, expire_time.time_since_epoch().count());
    _expire_time_endpoint_map[endpoint] = expire_time;
 }

@@ -1701,7 +1689,7 @@ future<> gossiper::wait_for_gossip_to_settle() {
            total_polls++;
            // Make sure 5 gossip rounds are completed sucessfully
            if (_nr_run > 5) {
-                logger.debug("Gossip looks settled. gossip round completed: {}", _nr_run);
+                logger.debug("Gossip looks settled. {} gossip round completed: {}", _nr_run);
                num_okay++;
            } else {
                logger.info("Gossip not settled after {} polls.", total_polls);
--- a/gms/gossiper.hh
+++ b/gms/gossiper.hh
@@ -102,7 +102,7 @@ private:
        return msg_addr{to, _default_cpuid};
    }
    void do_sort(std::vector<gossip_digest>& g_digest_list);
-    timer<lowres_clock> _scheduled_gossip_task;
+    timer<std::chrono::steady_clock> _scheduled_gossip_task;
    bool _enabled = false;
    std::set<inet_address> _seeds_from_config;
    sstring _cluster_name;
--- a/idl-compiler.py
+++ b/idl-compiler.py
@@ -184,9 +184,6 @@ struct serializer<$name> {

  template <typename Input>
  static $name read(Input& buf);
-
-  template <typename Input>
-  static void skip(Input& buf);
 };
 """).substitute({'name' : name, 'sizetype' : SIZETYPE, 'tmp_param' : template_param }))
    if config.ns != '':
@@ -732,19 +729,16 @@ def add_variant_read_size(hout, typ):
            add_variant_read_size(hout, p)
    read_sizes.add(t)
    fprintln(hout, Template("""
-template<typename Input>
-inline void skip(Input& v, boost::type<${type}>) {
-  return seastar::with_serialized_stream(v, [] (auto& v) {
+template<>
+inline void skip(seastar::simple_input_stream& v, boost::type<${type}>) {
    size_type ln = deserialize(v, boost::type<size_type>());
    v.skip(ln - sizeof(size_type));
-  });
 }""").substitute ({'type' : t}))


    fprintln(hout, Template("""
 template<typename Input>
 $type deserialize(Input& v, boost::type<${type}>) {
-  return seastar::with_serialized_stream(v, [] (auto& v) {
    auto in = v;
    deserialize(in, boost::type<size_type>());
    size_type o = deserialize(in, boost::type<size_type>());
@@ -755,7 +749,7 @@ $type deserialize(Input& v, boost::type<${type}>) {
        v.skip(sizeof(size_type)*2);
        return $full_type(deserialize(v, boost::type<$type>()));
    }""").substitute({'ind' : index, 'type' : param_view_type(param), 'full_type' : t}))
-    fprintln(hout, '    return ' + t + '(deserialize(v, boost::type<unknown_variant_type>()));\n  });\n}')
+    fprintln(hout, '    return ' + t + '(deserialize(v, boost::type<unknown_variant_type>()));\n}')

 def add_view(hout, info):
    [cls, namespaces, parent_template_param] = info
@@ -764,7 +758,7 @@ def add_view(hout, info):
        add_variant_read_size(hout, m["type"])

    fprintln(hout, Template("""struct ${name}_view {
-    utils::input_stream v;
+    seastar::simple_input_stream v;
    """).substitute({'name' : cls["name"]}))

    if not is_stub(cls["name"]) and is_local_type(cls["name"]):
@@ -775,47 +769,42 @@ def add_view(hout, info):
            }
        """)).substitute({'type' : cls["name"]}))

-    skip = "" if is_final(cls) else "ser::skip(in, boost::type<size_type>());"
+    skip = "" if is_final(cls) else "skip(in, boost::type<size_type>());"
    for m in members:
        full_type = param_view_type(m["type"])
        fprintln(hout, Template(reindent(4, """
-            auto $name() const {
-              return seastar::with_serialized_stream(v, [] (auto& v) {
+            $type $name() const {
               auto in = v;
               $skip
               return deserialize(in, boost::type<$type>());
-              });
            }
        """)).substitute({'name' : m["name"], 'type' : full_type, 'skip' : skip}))

-        skip = skip + Template("\n       ser::skip(in, boost::type<${type}>());").substitute({'type': full_type})
+        skip = skip + Template("\n       skip(in, boost::type<${type}>());").substitute({'type': full_type})

    fprintln(hout, "};")
-    skip_impl = "auto& in = v;\n       " + skip if is_final(cls) else "v.skip(read_frame_size(v));"
+    skip_impl = "seastar::simple_input_stream& in = v;\n       " + skip if is_final(cls) else "v.skip(read_frame_size(v));"
    if skip == "":
        skip_impl = ""

    fprintln(hout, Template("""
+template<>
+inline void skip(seastar::simple_input_stream& v, boost::type<${type}_view>) {
+    $skip_impl
+}
+
 template<>
 struct serializer<${type}_view> {
    template<typename Input>
    static ${type}_view read(Input& v) {
-      return seastar::with_serialized_stream(v, [] (auto& v) {
        auto v_start = v;
        auto start_size = v.size();
-        skip(v);
+        skip(v, boost::type<${type}_view>());
        return ${type}_view{v_start.read_substream(start_size - v.size())};
-      });
    }
    template<typename Output>
    static void write(Output& out, ${type}_view v) {
-        v.v.copy_to(out);
-    }
-    template<typename Input>
-    static void skip(Input& v) {
-      return seastar::with_serialized_stream(v, [] (auto& v) {
-        $skip_impl
-      });
+        out.write(v.v.begin(), v.v.size());
    }
 };
 """).substitute({'type' : param_type(cls["name"]), 'skip' : skip, 'skip_impl' : skip_impl}))
@@ -876,13 +865,12 @@ void serializer<$name>::write(Output& buf, const $name& obj) {""").substitute({'
    fprintln(cout, Template("""
 $template
 template <typename Input>
-$name$temp_param serializer<$name$temp_param>::read(Input& buf) {
- return seastar::with_serialized_stream(buf, [] (auto& buf) {""").substitute({'func' : DESERIALIZER, 'name' : name, 'template': template, 'temp_param' : template_class_param}))
+$name$temp_param serializer<$name$temp_param>::read(Input& buf) {""").substitute({'func' : DESERIALIZER, 'name' : name, 'template': template, 'temp_param' : template_class_param}))
    if not is_final:
        fprintln(cout, Template("""  $size_type size = $func(buf, boost::type<$size_type>());
-  auto in = buf.read_substream(size - sizeof($size_type));""").substitute({'func' : DESERIALIZER, 'size_type' : SIZETYPE}))
+  Input in = buf.read_substream(size - sizeof($size_type));""").substitute({'func' : DESERIALIZER, 'size_type' : SIZETYPE}))
    else:
-        fprintln(cout, """  auto& in = buf;""")
+        fprintln(cout, """  Input& in = buf;""")
    params = []
    local_names = {}
    for index, param in enumerate(cls["members"]):
@@ -894,31 +882,16 @@ $name$temp_param serializer<$name$temp_param>::read(Input& buf) {
            deflt = param["default"][0] if "default" in param else param_type(param["type"]) + "()"
            if deflt in local_names:
                deflt = local_names[deflt]
-            fprintln(cout, Template("""  auto $local = (in.size()>0) ?
+            fprintln(cout, Template("""  $typ $local = (in.size()>0) ?
    $func(in, boost::type<$typ>()) : $default;""").substitute({'func' : DESERIALIZER, 'typ': param_type(param["type"]), 'local' : local_param, 'default': deflt}))
        else:
-            fprintln(cout, Template("""  auto $local = $func(in, boost::type<$typ>());""").substitute({'func' : DESERIALIZER, 'typ': param_type(param["type"]), 'local' : local_param}))
+            fprintln(cout, Template("""  $typ $local = $func(in, boost::type<$typ>());""").substitute({'func' : DESERIALIZER, 'typ': param_type(param["type"]), 'local' : local_param}))
        params.append("std::move(" + local_param + ")")
    fprintln(cout, Template("""
  $name$temp_param res {$params};
  return res;
- });
 }""").substitute({'name' : name, 'params': ", ".join(params), 'temp_param' : template_class_param}))

-    fprintln(cout, Template("""
-$template
-template <typename Input>
-void serializer<$name$temp_param>::skip(Input& buf) {
- seastar::with_serialized_stream(buf, [] (auto& buf) {""").substitute({'func' : DESERIALIZER, 'name' : name, 'template': template, 'temp_param' : template_class_param}))
-    if not is_final:
-        fprintln(cout, Template("""  $size_type size = $func(buf, boost::type<$size_type>());
-  buf.skip(size - sizeof($size_type));""").substitute({'func' : DESERIALIZER, 'size_type' : SIZETYPE}))
-    else:
-        for m in get_members(cls):
-            full_type = param_view_type(m["type"])
-            fprintln(cout, "  ser::skip(buf, boost::type<%s>());" % full_type)
-    fprintln(cout, """ });\n}""")
-

 def handle_objects(tree, hout, cout, namespaces=[]):
    for obj in tree:
--- a/idl/idl_test.idl.hh
+++ b/idl/idl_test.idl.hh
@@ -70,11 +70,3 @@ struct compound_with_optional {
    std::experimental::optional<simple_compound> first;
    simple_compound second;
 };
-
-class non_final_composite_test_object {
-    simple_compound x();
-};
-
-class final_composite_test_object final {
-    simple_compound x();
-};
--- a/idl/range.idl.hh
+++ b/idl/range.idl.hh
@@ -31,10 +31,3 @@ class range {
    std::experimental::optional<range_bound<T>> end();
    bool is_singular();
 };
-
-template<typename T>
-class nonwrapping_range {
-    std::experimental::optional<range_bound<T>> start();
-    std::experimental::optional<range_bound<T>> end();
-    bool is_singular();
-};
--- a/idl/read_command.idl.hh
+++ b/idl/read_command.idl.hh
@@ -27,11 +27,11 @@ namespace query {

 class specific_ranges {
    partition_key pk();
-    std::vector<nonwrapping_range<clustering_key_prefix>> ranges();
+    std::vector<range<clustering_key_prefix>> ranges();
 };

 class partition_slice {
-    std::vector<nonwrapping_range<clustering_key_prefix>> default_row_ranges();
+    std::vector<range<clustering_key_prefix>> default_row_ranges();
    std::vector<uint32_t> static_columns;
    std::vector<uint32_t> regular_columns;
    query::partition_slice::option_set options;
--- a/idl/result.idl.hh
+++ b/idl/result.idl.hh
@@ -26,7 +26,7 @@ class result_digest final {
 };

 class result {
-    bytes buf();
+    bytes_ostream buf();
    std::experimental::optional<query::result_digest> digest();
    api::timestamp_type last_modified() [ [version 1.2] ] = api::missing_timestamp;
 };
--- a/idl/tracing.idl.hh
+++ b/idl/tracing.idl.hh
@@ -30,9 +30,6 @@ class trace_info {
    utils::UUID session_id;
    tracing::trace_type type;
    bool write_on_close;
-    tracing::trace_state_props_set state_props [[version 1.4]];
-    uint32_t slow_query_threshold_us [[version 1.4]];
-    uint32_t slow_query_ttl_sec [[version 1.4]];
 };
 }

--- a/init.cc
+++ b/init.cc
@@ -48,8 +48,7 @@ void init_ms_fd_gossiper(sstring listen_address
                , sstring ms_compress
                , db::seed_provider_type seed_provider
                , sstring cluster_name
-                , double phi
-                , bool sltba)
+                , double phi)
 {
    const gms::inet_address listen(listen_address);

@@ -91,7 +90,7 @@ void init_ms_fd_gossiper(sstring listen_address
    // Init messaging_service
    // Delay listening messaging_service until gossip message handlers are registered
    bool listen_now = false;
-    net::get_messaging_service().start(listen, storage_port, ew, cw, ssl_storage_port, creds, sltba, listen_now).get();
+    net::get_messaging_service().start(listen, storage_port, ew, cw, ssl_storage_port, creds, listen_now).get();

    // #293 - do not stop anything
    //engine().at_exit([] { return net::get_messaging_service().stop(); });
--- a/init.hh
+++ b/init.hh
@@ -37,5 +37,4 @@ void init_ms_fd_gossiper(sstring listen_address
                , sstring ms_compress
                , db::seed_provider_type seed_provider
                , sstring cluster_name = "Test Cluster"
-                , double phi = 8
-                , bool sltba = false);
+                , double phi = 8);
--- a/keys.hh
+++ b/keys.hh
@@ -168,7 +168,7 @@ public:

    template<typename RangeOfSerializedComponents>
    static TopLevel from_exploded(RangeOfSerializedComponents&& v) {
-        return TopLevel::from_range(std::forward<RangeOfSerializedComponents>(v));
+        return TopLevel(std::forward<RangeOfSerializedComponents>(v));
    }

    static TopLevel from_exploded(const schema& s, const std::vector<bytes>& v) {
@@ -615,12 +615,8 @@ public:
    using c_type = compound_type<allow_prefixes::no>;

    template<typename RangeOfSerializedComponents>
-    static partition_key from_range(RangeOfSerializedComponents&& v) {
-        return partition_key(managed_bytes(c_type::serialize_value(std::forward<RangeOfSerializedComponents>(v))));
-    }
-
-    partition_key(std::vector<bytes> v)
-        : compound_wrapper(managed_bytes(c_type::serialize_value(std::move(v))))
+    partition_key(RangeOfSerializedComponents&& v)
+        : compound_wrapper(managed_bytes(c_type::serialize_value(std::forward<RangeOfSerializedComponents>(v))))
    { }

    partition_key(partition_key&& v) = default;
@@ -709,12 +705,8 @@ class clustering_key_prefix : public prefix_compound_wrapper<clustering_key_pref
    { }
 public:
    template<typename RangeOfSerializedComponents>
-    static clustering_key_prefix from_range(RangeOfSerializedComponents&& v) {
-        return clustering_key_prefix(compound::element_type::serialize_value(std::forward<RangeOfSerializedComponents>(v)));
-    }
-
-    clustering_key_prefix(std::vector<bytes> v)
-        : prefix_compound_wrapper(compound::element_type::serialize_value(std::move(v)))
+    clustering_key_prefix(RangeOfSerializedComponents&& v)
+        : prefix_compound_wrapper(compound::element_type::serialize_value(std::forward<RangeOfSerializedComponents>(v)))
    { }

    clustering_key_prefix(clustering_key_prefix&& v) = default;
--- a/locator/abstract_replication_strategy.cc
+++ b/locator/abstract_replication_strategy.cc
@@ -54,17 +54,14 @@ static void unwrap_first_range(std::vector<range<token>>& ret) {

 std::unique_ptr<abstract_replication_strategy> abstract_replication_strategy::create_replication_strategy(const sstring& ks_name, const sstring& strategy_name, token_metadata& tk_metadata, const std::map<sstring, sstring>& config_options) {
    assert(locator::i_endpoint_snitch::get_local_snitch_ptr());
-    try {
-        return create_object<abstract_replication_strategy,
-                             const sstring&,
-                             token_metadata&,
-                             snitch_ptr&,
-                             const std::map<sstring, sstring>&>
-            (strategy_name, ks_name, tk_metadata,
-             locator::i_endpoint_snitch::get_local_snitch_ptr(), config_options);
-    } catch (const no_such_class& e) {
-        throw exceptions::configuration_exception(e.what());
-    }
+
+    return create_object<abstract_replication_strategy,
+                         const sstring&,
+                         token_metadata&,
+                         snitch_ptr&,
+                         const std::map<sstring, sstring>&>
+        (strategy_name, ks_name, tk_metadata,
+         locator::i_endpoint_snitch::get_local_snitch_ptr(), config_options);
 }

 void abstract_replication_strategy::validate_replication_strategy(const sstring& ks_name,
--- a/main.cc
+++ b/main.cc
@@ -51,7 +51,6 @@
 #include "disk-error-handler.hh"
 #include "tracing/tracing.hh"
 #include "db/size_estimates_recorder.hh"
-#include "core/prometheus.hh"

 #ifdef HAVE_LIBSYSTEMD
 #include <systemd/sd-daemon.h>
@@ -306,8 +305,6 @@ int main(int ac, char** av) {
    auto& proxy = service::get_storage_proxy();
    auto& mm = service::get_migration_manager();
    api::http_context ctx(db, proxy);
-    httpd::http_server_control prometheus_server;
-    prometheus::config pctx;
    directories dirs;

    return app.run_deprecated(ac, av, [&] {
@@ -338,7 +335,7 @@ int main(int ac, char** av) {

        tcp_syncookies_sanity();

-        return seastar::async([cfg, &db, &qp, &proxy, &mm, &ctx, &opts, &dirs, &pctx, &prometheus_server] {
+        return seastar::async([cfg, &db, &qp, &proxy, &mm, &ctx, &opts, &dirs] {
            read_config(opts, *cfg).get();
            apply_logger_settings(cfg->default_log_level(), cfg->logger_log_level(),
                    cfg->log_to_stdout(), cfg->log_to_syslog());
@@ -419,7 +416,7 @@ int main(int ac, char** av) {
            ctx.http_server.start().get();
            api::set_server_init(ctx).get();
            ctx.http_server.listen(ipv4_addr{ip, api_port}).get();
-            startlog.info("Scylla API server listening on {}:{} ...", api_address, api_port);
+            print("Scylla API server listening on %s:%s ...\n", api_address, api_port);
            supervisor_notify("initializing storage service");
            init_storage_service(db);
            supervisor_notify("starting per-shard database core");
@@ -496,8 +493,7 @@ int main(int ac, char** av) {
                    , cfg->internode_compression()
                    , seed_provider
                    , cluster_name
-                    , phi
-                    , cfg->listen_on_broadcast_address());
+                    , phi);
            supervisor_notify("starting messaging service");
            supervisor_notify("starting storage proxy");
            proxy.start(std::ref(db)).get();
@@ -630,21 +626,7 @@ int main(int ac, char** av) {
            smp::invoke_on_all([&cfg] () {
                return logalloc::shard_tracker().set_reclamation_step(cfg->lsa_reclamation_step());
            }).get();
-            if (cfg->abort_on_lsa_bad_alloc()) {
-                smp::invoke_on_all([&cfg]() {
-                    return logalloc::shard_tracker().enable_abort_on_bad_alloc();
-                }).get();
-            }
            api::set_server_done(ctx).get();
-            dns::hostent prom_addr = dns::gethostbyname(cfg->prometheus_address()).get0();
-            supervisor_notify("starting prometheus API server");
-            uint16_t pport = cfg->prometheus_port();
-            if (pport) {
-                pctx.metric_help = "Scylla server statistics";
-                prometheus_server.start().get();
-                prometheus::start(prometheus_server, pctx);
-                prometheus_server.listen(ipv4_addr{prom_addr.addresses[0].in.s_addr, pport}).get();
-            }
            supervisor_notify("serving");
            // Register at_exit last, so that storage_service::drain_on_shutdown will be called first
            engine().at_exit([] {
--- a/memtable.cc
+++ b/memtable.cc
@@ -115,7 +115,7 @@ class scanning_reader final : public mutation_reader::impl {
    stdx::optional<query::partition_range> _delegate_range;
    mutation_reader _delegate;
    const io_priority_class& _pc;
-    const query::partition_slice& _slice;
+    query::clustering_key_filtering_context _ck_filtering;
 private:
    memtable::partitions_type::iterator lookup_end() {
        auto cmp = memtable_entry::compare(_memtable->_schema);
@@ -152,13 +152,13 @@ public:
    scanning_reader(schema_ptr s,
                    lw_shared_ptr<memtable> m,
                    const query::partition_range& range,
-                    const query::partition_slice& slice,
+                    const query::clustering_key_filtering_context& ck_filtering,
                    const io_priority_class& pc)
        : _memtable(std::move(m))
        , _schema(std::move(s))
        , _range(range)
        , _pc(pc)
-        , _slice(slice)
+        , _ck_filtering(ck_filtering)
    { }

    virtual future<streamed_mutation_opt> operator()() override {
@@ -171,7 +171,7 @@ public:
            // FIXME: Use cache. See column_family::make_reader().
            _delegate_range = _last ? _range.split_after(*_last, dht::ring_position_comparator(*_memtable->_schema)) : _range;
            _delegate = make_mutation_reader<sstable_range_wrapping_reader>(
-                _memtable->_sstable, _schema, *_delegate_range, _slice, _pc);
+                _memtable->_sstable, _schema, *_delegate_range, _ck_filtering, _pc);
            _memtable = {};
            _last = {};
            return _delegate();
@@ -187,14 +187,14 @@ public:
        ++_i;
        _last = e.key();
        _memtable->upgrade_entry(e);
-        return make_ready_future<streamed_mutation_opt>(e.read(_memtable, _schema, _slice));
+        return make_ready_future<streamed_mutation_opt>(e.read(_memtable, _schema, _ck_filtering));
    }
 };

 mutation_reader
 memtable::make_reader(schema_ptr s,
                      const query::partition_range& range,
-                      const query::partition_slice& slice,
+                      const query::clustering_key_filtering_context& ck_filtering,
                      const io_priority_class& pc) {
    if (query::is_wrap_around(range, *s)) {
        fail(unimplemented::cause::WRAP_AROUND);
@@ -207,13 +207,13 @@ memtable::make_reader(schema_ptr s,
        auto i = partitions.find(pos, memtable_entry::compare(_schema));
        if (i != partitions.end()) {
            upgrade_entry(*i);
-            return make_reader_returning(i->read(shared_from_this(), s, slice));
+            return make_reader_returning(i->read(shared_from_this(), s, ck_filtering));
        } else {
            return make_empty_reader();
        }
        });
    } else {
-        return make_mutation_reader<scanning_reader>(std::move(s), shared_from_this(), range, slice, pc);
+        return make_mutation_reader<scanning_reader>(std::move(s), shared_from_this(), range, ck_filtering, pc);
    }
 }

@@ -300,15 +300,15 @@ bool memtable::is_flushed() const {
 }

 streamed_mutation
-memtable_entry::read(lw_shared_ptr<memtable> mtbl, const schema_ptr& target_schema, const query::partition_slice& slice) {
-    auto cr = query::clustering_key_filter_ranges::get_ranges(*_schema, slice, _key.key());
+memtable_entry::read(lw_shared_ptr<memtable> mtbl, const schema_ptr& target_schema, const query::clustering_key_filtering_context& ck_filtering) {
    if (_schema->version() != target_schema->version()) {
-        auto mp = mutation_partition(_pe.squashed(_schema, target_schema), *target_schema, std::move(cr));
+        auto mp = mutation_partition(_pe.squashed(_schema, target_schema), *target_schema, ck_filtering.get_ranges(_key.key()));
        mutation m = mutation(target_schema, _key, std::move(mp));
        return streamed_mutation_from_mutation(std::move(m));
    }
+    auto& cr = ck_filtering.get_ranges(_key.key());
    auto snp = _pe.read(_schema);
-    return make_partition_snapshot_reader(_schema, _key, std::move(cr), snp, *mtbl, mtbl->_read_section, mtbl);
+    return make_partition_snapshot_reader(_schema, _key, ck_filtering, cr, snp, *mtbl, mtbl->_read_section, mtbl);
 }

 void memtable::upgrade_entry(memtable_entry& e) {
--- a/memtable.hh
+++ b/memtable.hh
@@ -59,7 +59,7 @@ public:
    partition_entry& partition() { return _pe; }
    const schema_ptr& schema() const { return _schema; }
    schema_ptr& schema() { return _schema; }
-    streamed_mutation read(lw_shared_ptr<memtable> mtbl, const schema_ptr&, const query::partition_slice&);
+    streamed_mutation read(lw_shared_ptr<memtable> mtbl, const schema_ptr&, const query::clustering_key_filtering_context&);

    struct compare {
        dht::decorated_key::less_comparator _c;
@@ -144,7 +144,7 @@ public:
    // Mutations returned by the reader will all have given schema.
    mutation_reader make_reader(schema_ptr,
                                const query::partition_range& range = query::full_partition_range,
-                                const query::partition_slice& slice = query::full_slice,
+                                const query::clustering_key_filtering_context& ck_filtering = query::no_clustering_key_filtering,
                                const io_priority_class& pc = default_priority_class());

    mutation_source as_data_source();
--- a/message/messaging_service.cc
+++ b/message/messaging_service.cc
@@ -181,12 +181,10 @@ void messaging_service::foreach_client(std::function<void(const msg_addr& id, co
 }

 void messaging_service::foreach_server_connection_stats(std::function<void(const rpc::client_info&, const rpc::stats&)>&& f) const {
-    for (auto&& s : _server) {
-        if (s) {
-            s->foreach_connection([f](const rpc_protocol::server::connection& c) {
-                f(c.info(), c.get_stats());
-            });
-        }
+    if (_server) {
+        _server->foreach_connection([f](const rpc_protocol::server::connection& c) {
+            f(c.info(), c.get_stats());
+        });
    }
 }

@@ -219,7 +217,7 @@ void register_handler(messaging_service* ms, messaging_verb verb, Func&& func) {
 }

 messaging_service::messaging_service(gms::inet_address ip, uint16_t port, bool listen_now)
-    : messaging_service(std::move(ip), port, encrypt_what::none, compress_what::none, 0, nullptr, false, listen_now)
+    : messaging_service(std::move(ip), port, encrypt_what::none, compress_what::none, 0, nullptr, listen_now)
 {}

 static
@@ -233,42 +231,28 @@ rpc_resource_limits() {
 }

 void messaging_service::start_listen() {
-    bool listen_to_bc = _should_listen_to_broadcast_address && _listen_address != utils::fb_utilities::get_broadcast_address();
    rpc::server_options so;
    if (_compress_what != compress_what::none) {
        so.compressor_factory = &compressor_factory;
    }
-    if (!_server[0]) {
-        auto listen = [&] (const gms::inet_address& a) {
-            auto addr = ipv4_addr{a.raw_addr(), _port};
-            return std::unique_ptr<rpc_protocol_server_wrapper>(new rpc_protocol_server_wrapper(*_rpc,
-                    so, addr, rpc_resource_limits()));
-        };
-        _server[0] = listen(_listen_address);
-        if (listen_to_bc) {
-            _server[1] = listen(utils::fb_utilities::get_broadcast_address());
-        }
+    if (!_server) {
+        auto addr = ipv4_addr{_listen_address.raw_addr(), _port};
+        _server = std::unique_ptr<rpc_protocol_server_wrapper>(new rpc_protocol_server_wrapper(*_rpc,
+                so, addr, rpc_resource_limits()));
    }

-    if (!_server_tls[0]) {
-        auto listen = [&] (const gms::inet_address& a) {
-            return std::unique_ptr<rpc_protocol_server_wrapper>(
-                    [this, &so, &a] () -> std::unique_ptr<rpc_protocol_server_wrapper>{
+    if (!_server_tls) {
+        _server_tls = std::unique_ptr<rpc_protocol_server_wrapper>(
+            [this, &so] () -> std::unique_ptr<rpc_protocol_server_wrapper>{
                if (_encrypt_what == encrypt_what::none) {
                    return nullptr;
                }
                listen_options lo;
                lo.reuse_address = true;
-                auto addr = make_ipv4_address(ipv4_addr{a.raw_addr(), _ssl_port});
+                auto addr = make_ipv4_address(ipv4_addr{_listen_address.raw_addr(), _ssl_port});
                return std::make_unique<rpc_protocol_server_wrapper>(*_rpc,
                        so, seastar::tls::listen(_credentials, addr, lo));
-            }());
-        };
-        _server_tls[0] = listen(_listen_address);
-        if (listen_to_bc) {
-            _server_tls[1] = listen(utils::fb_utilities::get_broadcast_address());
-        }
-
+        }());
    }
 }

@@ -278,14 +262,12 @@ messaging_service::messaging_service(gms::inet_address ip
        , compress_what cw
        , uint16_t ssl_port
        , std::shared_ptr<seastar::tls::credentials_builder> credentials
-        , bool sltba
        , bool listen_now)
    : _listen_address(ip)
    , _port(port)
    , _ssl_port(ssl_port)
    , _encrypt_what(ew)
    , _compress_what(cw)
-    , _should_listen_to_broadcast_address(sltba)
    , _rpc(new rpc_protocol_wrapper(serializer { }))
    , _credentials(credentials ? credentials->build_server_credentials() : nullptr)
 {
@@ -304,7 +286,7 @@ messaging_service::messaging_service(gms::inet_address ip

    // Do this on just cpu 0, to avoid duplicate logs.
    if (engine().cpu_id() == 0) {
-        if (_server_tls[0]) {
+        if (_server_tls) {
            logger.info("Starting Encrypted Messaging Service on SSL port {}", _ssl_port);
        }
        logger.info("Starting Messaging Service on port {}", _port);
@@ -329,19 +311,15 @@ gms::inet_address messaging_service::listen_address() {
 }

 future<> messaging_service::stop_tls_server() {
-    for (auto&& s : _server_tls) {
-        if (s) {
-            return s->stop();
-        }
+    if (_server_tls) {
+        return _server_tls->stop();
    }
    return make_ready_future<>();
 }

 future<> messaging_service::stop_nontls_server() {
-    for (auto&& s : _server) {
-        if (s) {
-            return s->stop();
-        }
+    if (_server) {
+        return _server->stop();
    }
    return make_ready_future<>();
 }
@@ -796,7 +774,7 @@ future<std::vector<frozen_mutation>> messaging_service::send_migration_request(m
    return send_message<std::vector<frozen_mutation>>(this, messaging_verb::MIGRATION_REQUEST, std::move(id));
 }

-void messaging_service::register_mutation(std::function<future<rpc::no_wait_type> (const rpc::client_info&, rpc::opt_time_point, frozen_mutation fm, std::vector<inet_address> forward,
+void messaging_service::register_mutation(std::function<future<rpc::no_wait_type> (const rpc::client_info&, frozen_mutation fm, std::vector<inet_address> forward,
    inet_address reply_to, unsigned shard, response_id_type response_id, rpc::optional<std::experimental::optional<tracing::trace_info>> trace_info)>&& func) {
    register_handler(this, net::messaging_verb::MUTATION, std::move(func));
 }
--- a/message/messaging_service.hh
+++ b/message/messaging_service.hh
@@ -181,13 +181,12 @@ private:
    uint16_t _ssl_port;
    encrypt_what _encrypt_what;
    compress_what _compress_what;
-    bool _should_listen_to_broadcast_address;
    // map: Node broadcast address -> Node internal IP for communication within the same data center
    std::unordered_map<gms::inet_address, gms::inet_address> _preferred_ip_cache;
    std::unique_ptr<rpc_protocol_wrapper> _rpc;
-    std::array<std::unique_ptr<rpc_protocol_server_wrapper>, 2> _server;
+    std::unique_ptr<rpc_protocol_server_wrapper> _server;
    ::shared_ptr<seastar::tls::server_credentials> _credentials;
-    std::array<std::unique_ptr<rpc_protocol_server_wrapper>, 2> _server_tls;
+    std::unique_ptr<rpc_protocol_server_wrapper> _server_tls;
    std::array<clients_map, 3> _clients;
    uint64_t _dropped_messages[static_cast<int32_t>(messaging_verb::LAST)] = {};
    bool _stopping = false;
@@ -198,7 +197,7 @@ public:
            uint16_t port = 7000, bool listen_now = true);
    messaging_service(gms::inet_address ip, uint16_t port, encrypt_what, compress_what,
            uint16_t ssl_port, std::shared_ptr<seastar::tls::credentials_builder>,
-            bool sltba = false, bool listen_now = true);
+            bool listen_now = true);
    ~messaging_service();
 public:
    void start_listen();
@@ -278,7 +277,7 @@ public:
    // FIXME: response_id_type is an alias in service::storage_proxy::response_id_type
    using response_id_type = uint64_t;
    // Wrapper for MUTATION
-    void register_mutation(std::function<future<rpc::no_wait_type> (const rpc::client_info&, rpc::opt_time_point, frozen_mutation fm, std::vector<inet_address> forward,
+    void register_mutation(std::function<future<rpc::no_wait_type> (const rpc::client_info&, frozen_mutation fm, std::vector<inet_address> forward,
        inet_address reply_to, unsigned shard, response_id_type response_id, rpc::optional<std::experimental::optional<tracing::trace_info>> trace_info)>&& func);
    void unregister_mutation();
    future<> send_mutation(msg_addr id, clock_type::time_point timeout, const frozen_mutation& fm, std::vector<inet_address> forward,
--- a/mutation_partition.cc
+++ b/mutation_partition.cc
@@ -248,7 +248,7 @@ mutation_partition::mutation_partition(const mutation_partition& x)
 }

 mutation_partition::mutation_partition(const mutation_partition& x, const schema& schema,
-        query::clustering_key_filter_ranges ck_ranges)
+        const query::clustering_row_ranges& ck_ranges)
        : _tombstone(x._tombstone)
        , _static_row(x._static_row)
        , _rows(x._rows.value_comp())
@@ -268,7 +268,7 @@ mutation_partition::mutation_partition(const mutation_partition& x, const schema
 }

 mutation_partition::mutation_partition(mutation_partition&& x, const schema& schema,
-    query::clustering_key_filter_ranges ck_ranges)
+    const query::clustering_row_ranges& ck_ranges)
    : _tombstone(x._tombstone)
    , _static_row(std::move(x._static_row))
    , _rows(std::move(x._rows))
@@ -1830,7 +1830,7 @@ future<data_query_result> data_query(schema_ptr s, const mutation_source& source
    auto cfq = make_stable_flattened_mutations_consumer<compact_for_query<emit_only_live_rows::yes, query_result_builder>>(
            *s, query_time, slice, row_limit, partition_limit, std::move(qrb));

-    auto reader = source(s, range, slice, service::get_local_sstable_query_read_priority());
+    auto reader = source(s, range, query::clustering_key_filtering_context::create(s, slice), service::get_local_sstable_query_read_priority());
    return consume_flattened(std::move(reader), std::move(cfq), is_reversed);
 }

@@ -1904,6 +1904,6 @@ mutation_query(schema_ptr s,
    auto cfq = make_stable_flattened_mutations_consumer<compact_for_query<emit_only_live_rows::no, reconcilable_result_builder>>(
            *s, query_time, slice, row_limit, partition_limit, std::move(rrb));

-    auto reader = source(s, range, slice, service::get_local_sstable_query_read_priority());
+    auto reader = source(s, range, query::clustering_key_filtering_context::create(s, slice), service::get_local_sstable_query_read_priority());
    return consume_flattened(std::move(reader), std::move(cfq), is_reversed);
 }
--- a/mutation_partition.hh
+++ b/mutation_partition.hh
@@ -40,7 +40,6 @@
 #include "utils/managed_vector.hh"
 #include "hashing_partition_visitor.hh"
 #include "range_tombstone_list.hh"
-#include "clustering_key_filter.hh"

 //
 // Container for cells of a row. Cells are identified by column_id.
@@ -564,8 +563,8 @@ public:
    { }
    mutation_partition(mutation_partition&&) = default;
    mutation_partition(const mutation_partition&);
-    mutation_partition(const mutation_partition&, const schema&, query::clustering_key_filter_ranges);
-    mutation_partition(mutation_partition&&, const schema&, query::clustering_key_filter_ranges);
+    mutation_partition(const mutation_partition&, const schema&, const query::clustering_row_ranges&);
+    mutation_partition(mutation_partition&&, const schema&, const query::clustering_row_ranges&);
    ~mutation_partition();
    mutation_partition& operator=(const mutation_partition& x);
    mutation_partition& operator=(mutation_partition&& x) noexcept;
--- a/mutation_partition_view.hh
+++ b/mutation_partition_view.hh
@@ -23,7 +23,8 @@

 #include "database_fwd.hh"
 #include "mutation_partition_visitor.hh"
-#include "utils/input_stream.hh"
+
+#include <seastar/core/simple-stream.hh>

 namespace ser {
 class mutation_partition_view;
@@ -31,13 +32,13 @@ class mutation_partition_view;

 // View on serialized mutation partition. See mutation_partition_serializer.
 class mutation_partition_view {
-    utils::input_stream _in;
+    seastar::simple_input_stream _in;
 private:
-    mutation_partition_view(utils::input_stream v)
+    mutation_partition_view(seastar::simple_input_stream v)
        : _in(v)
    { }
 public:
-    static mutation_partition_view from_stream(utils::input_stream v) {
+    static mutation_partition_view from_stream(seastar::simple_input_stream v) {
        return { v };
    }
    static mutation_partition_view from_view(ser::mutation_partition_view v);
--- a/mutation_reader.cc
+++ b/mutation_reader.cc
@@ -163,12 +163,12 @@ public:
    }
 };

-mutation_reader make_reader_returning_many(std::vector<mutation> mutations, const query::partition_slice& slice) {
+mutation_reader make_reader_returning_many(std::vector<mutation> mutations, query::clustering_key_filtering_context ck_filtering) {
    std::vector<streamed_mutation> streamed_mutations;
    streamed_mutations.reserve(mutations.size());
    for (auto& m : mutations) {
-        auto ck_ranges = query::clustering_key_filter_ranges::get_ranges(*m.schema(), slice, m.key());
-        auto mp = mutation_partition(std::move(m.partition()), *m.schema(), std::move(ck_ranges));
+        const query::clustering_row_ranges& ck_ranges = ck_filtering.get_ranges(m.key());
+        auto mp = mutation_partition(std::move(m.partition()), *m.schema(), ck_ranges);
        auto sm = streamed_mutation_from_mutation(mutation(m.schema(), m.decorated_key(), std::move(mp)));
        streamed_mutations.emplace_back(std::move(sm));
    }
--- a/mutation_reader.hh
+++ b/mutation_reader.hh
@@ -24,11 +24,9 @@
 #include <vector>

 #include "mutation.hh"
-#include "clustering_key_filter.hh"
 #include "core/future.hh"
 #include "core/future-util.hh"
 #include "core/do_with.hh"
-#include "tracing/trace_state.hh"

 // A mutation_reader is an object which allows iterating on mutations: invoke
 // the function to get a future for the next mutation, with an unset optional
@@ -89,7 +87,7 @@ mutation_reader make_combined_reader(mutation_reader&& a, mutation_reader&& b);
 mutation_reader make_reader_returning(mutation);
 mutation_reader make_reader_returning(streamed_mutation);
 mutation_reader make_reader_returning_many(std::vector<mutation>,
-    const query::partition_slice& slice = query::full_slice);
+    query::clustering_key_filtering_context filter = query::no_clustering_key_filtering);
 mutation_reader make_reader_returning_many(std::vector<streamed_mutation>);
 mutation_reader make_empty_reader();

@@ -189,35 +187,29 @@ future<> consume(mutation_reader& reader, Consumer consumer) {
 // when invoking the source.
 class mutation_source {
    using partition_range = const query::partition_range&;
+    using clustering_filter = query::clustering_key_filtering_context;
    using io_priority = const io_priority_class&;
-    std::function<mutation_reader(schema_ptr, partition_range, const query::partition_slice&, io_priority, tracing::trace_state_ptr)> _fn;
+    std::function<mutation_reader(schema_ptr, partition_range, clustering_filter, io_priority)> _fn;
 public:
-    mutation_source(std::function<mutation_reader(schema_ptr, partition_range, const query::partition_slice&, io_priority, tracing::trace_state_ptr)> fn)
-            : _fn(std::move(fn)) {}
-    mutation_source(std::function<mutation_reader(schema_ptr, partition_range, const query::partition_slice&, io_priority)> fn)
-        : _fn([fn = std::move(fn)] (schema_ptr s, partition_range range, const query::partition_slice& slice, io_priority pc, tracing::trace_state_ptr) {
-            return fn(s, range, slice, pc);
-        }) {}
-    mutation_source(std::function<mutation_reader(schema_ptr, partition_range, const query::partition_slice&)> fn)
-        : _fn([fn = std::move(fn)] (schema_ptr s, partition_range range, const query::partition_slice& slice, io_priority, tracing::trace_state_ptr) {
-            return fn(s, range, slice);
+    mutation_source(std::function<mutation_reader(schema_ptr, partition_range, clustering_filter, io_priority)> fn)
+        : _fn(std::move(fn)) {}
+    mutation_source(std::function<mutation_reader(schema_ptr, partition_range, clustering_filter)> fn)
+        : _fn([fn = std::move(fn)] (schema_ptr s, partition_range range, clustering_filter ck_filtering, io_priority) {
+            return fn(s, range, ck_filtering);
        }) {}
    mutation_source(std::function<mutation_reader(schema_ptr, partition_range range)> fn)
-        : _fn([fn = std::move(fn)] (schema_ptr s, partition_range range, const query::partition_slice&, io_priority, tracing::trace_state_ptr) {
+        : _fn([fn = std::move(fn)] (schema_ptr s, partition_range range, clustering_filter, io_priority) {
            return fn(s, range);
        }) {}

-    mutation_reader operator()(schema_ptr s, partition_range range, const query::partition_slice& slice, io_priority pc, tracing::trace_state_ptr trace_state) const {
-        return _fn(std::move(s), range, slice, pc, std::move(trace_state));
+    mutation_reader operator()(schema_ptr s, partition_range range, clustering_filter ck_filtering, io_priority pc) const {
+        return _fn(std::move(s), range, ck_filtering, pc);
    }
-    mutation_reader operator()(schema_ptr s, partition_range range, const query::partition_slice& slice, io_priority pc) const {
-        return _fn(std::move(s), range, slice, pc, nullptr);
-    }
-    mutation_reader operator()(schema_ptr s, partition_range range, const query::partition_slice& slice) const {
-        return _fn(std::move(s), range, slice, default_priority_class(), nullptr);
+    mutation_reader operator()(schema_ptr s, partition_range range, clustering_filter ck_filtering) const {
+        return _fn(std::move(s), range, ck_filtering, default_priority_class());
    }
    mutation_reader operator()(schema_ptr s, partition_range range) const {
-        return _fn(std::move(s), range, query::full_slice, default_priority_class(), nullptr);
+        return _fn(std::move(s), range, query::no_clustering_key_filtering, default_priority_class());
    }
 };

--- a/partition_version.cc
+++ b/partition_version.cc
@@ -294,14 +294,14 @@ lw_shared_ptr<partition_snapshot> partition_entry::read(schema_ptr entry_schema)
 }

 partition_snapshot_reader::partition_snapshot_reader(schema_ptr s, dht::decorated_key dk,
-    lw_shared_ptr<partition_snapshot> snp,
-    query::clustering_key_filter_ranges crr, logalloc::region& region,
+    lw_shared_ptr<partition_snapshot> snp, query::clustering_key_filtering_context fc,
+    const query::clustering_row_ranges& crr, logalloc::region& region,
    logalloc::allocating_section& read_section, boost::any pointer_to_container)
    : streamed_mutation::impl(s, std::move(dk), tomb(*snp))
    , _container_guard(std::move(pointer_to_container))
-    , _ck_ranges(std::move(crr))
-    , _current_ck_range(_ck_ranges.begin())
-    , _ck_range_end(_ck_ranges.end())
+    , _filtering_context(fc)
+    , _current_ck_range(crr.begin())
+    , _ck_range_end(crr.end())
    , _cmp(*s)
    , _eq(*s)
    , _snapshot(snp)
@@ -463,10 +463,10 @@ future<> partition_snapshot_reader::fill_buffer()
 }

 streamed_mutation make_partition_snapshot_reader(schema_ptr s, dht::decorated_key dk,
-    query::clustering_key_filter_ranges crr,
+    query::clustering_key_filtering_context fc, const query::clustering_row_ranges& crr,
    lw_shared_ptr<partition_snapshot> snp, logalloc::region& region,
    logalloc::allocating_section& read_section, boost::any pointer_to_container)
 {
    return make_streamed_mutation<partition_snapshot_reader>(s, std::move(dk), 
-        snp, std::move(crr), region, read_section, std::move(pointer_to_container));
+        snp, fc, crr, region, read_section, std::move(pointer_to_container));
 }
--- a/partition_version.hh
+++ b/partition_version.hh
@@ -290,7 +290,8 @@ private:
    // that its lifetime is appropriately extended.
    boost::any _container_guard;

-    query::clustering_key_filter_ranges _ck_ranges;
+    // _filtering_context keeps alive the range of clustering rows
+    query::clustering_key_filtering_context _filtering_context;
    query::clustering_row_ranges::const_iterator _current_ck_range;
    query::clustering_row_ranges::const_iterator _ck_range_end;
    bool _in_ck_range = false;
@@ -320,7 +321,7 @@ private:
    static tombstone tomb(partition_snapshot& snp);
 public:
    partition_snapshot_reader(schema_ptr s, dht::decorated_key dk, lw_shared_ptr<partition_snapshot> snp,
-        query::clustering_key_filter_ranges crr,
+        query::clustering_key_filtering_context fc, const query::clustering_row_ranges& crr,
        logalloc::region& region, logalloc::allocating_section& read_section,
        boost::any pointer_to_container);
    ~partition_snapshot_reader();
@@ -328,6 +329,6 @@ public:
 };

 streamed_mutation make_partition_snapshot_reader(schema_ptr s, dht::decorated_key dk,
-    query::clustering_key_filter_ranges crr,
+    query::clustering_key_filtering_context fc, const query::clustering_row_ranges& crr,
    lw_shared_ptr<partition_snapshot> snp, logalloc::region& region,
    logalloc::allocating_section& read_section, boost::any pointer_to_container);
--- a/Show More
+++ b/Show More