sstable: fix use-after-free of temporary ioclass copy

Commit 6a3872b355 fixed some use-after-free bugs but introduced a new one because of a typo: Instead of capturing a reference to the long-living io-class object, as all the code does, one place in the code accidentally captured a *copy* of this object. This copy had a very temporary life, and when a reference to that *copy* was passed to sstable reading code which assumed that it lives at least as long as the read call, a use-after-free resulted. Fixes #1072 Signed-off-by: Nadav Har'El <nyh@scylladb.com> Message-Id: <1458595629-9314-1-git-send-email-nyh@scylladb.com> (cherry picked from commit 2eb0627665)
main: Defer API server hooks until commitlog replay
2016-03-22 08:09:25 +02:00 · 2016-03-18 09:20:40 +02:00 · 2016-03-18 08:11:19 +02:00 · 2016-03-18 07:56:28 +02:00 · 2016-03-15 11:59:16 +02:00 · 2016-03-15 11:59:10 +02:00
228 changed files with 3108 additions and 6020 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -8,4 +8,3 @@ cscope.*
 dist/ami/files/*.rpm
 dist/ami/variables.json
 dist/ami/scylla_deploy.sh
-*.pyc
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,6 +1,6 @@
 [submodule "seastar"]
 	path = seastar
-	url = ../seastar
+	url = ../scylla-seastar
 	ignore = dirty
 [submodule "swagger-ui"]
 	path = swagger-ui
--- a/2
+++ b/2
@@ -1,6 +1,6 @@
 #!/bin/sh

-VERSION=0.19
+VERSION=0.18.2

 if test -f version
 then
--- a/api/api-doc/storage_service.json
+++ b/api/api-doc/storage_service.json
@@ -836,22 +836,6 @@
                     "type":"string",
                     "paramType":"query"
                  },
-                  {
-                     "name":"startToken",
-                     "description":"Token on which to begin repair",
-                     "required":false,
-                     "allowMultiple":false,
-                     "type":"string",
-                     "paramType":"query"
-                  },
-                  {
-                     "name":"endToken",
-                     "description":"Token on which to end repair",
-                     "required":false,
-                     "allowMultiple":false,
-                     "type":"string",
-                     "paramType":"query"
-                  },
                  {
                     "name":"columnFamilies",
                     "description":"Which column families to repair in the given keyspace. Multiple columns families can be named separated by commas. If this option is missing, all column families in the keyspace are repaired.",
--- a/api/storage_proxy.cc
+++ b/api/storage_proxy.cc
@@ -214,16 +214,16 @@ void set_storage_proxy(http_context& ctx, routes& r) {
    });

    sp::get_schema_versions.set(r, [](std::unique_ptr<request> req)  {
-        return service::get_local_storage_service().describe_schema_versions().then([] (auto result) {
-            std::vector<sp::mapper_list> res;
-            for (auto e : result) {
-                sp::mapper_list entry;
-                entry.key = std::move(e.first);
-                entry.value = std::move(e.second);
-                res.emplace_back(std::move(entry));
-            }
-            return make_ready_future<json::json_return_type>(std::move(res));
-        });
+        //TBD
+        // FIXME
+        // describe_schema_versions is not implemented yet
+        // this is a work around
+        std::vector<sp::mapper_list> res;
+        sp::mapper_list entry;
+        entry.key = boost::lexical_cast<std::string>(utils::fb_utilities::get_broadcast_address());
+        entry.value.push(service::get_local_storage_service().get_schema_version());
+        res.push_back(entry);
+        return make_ready_future<json::json_return_type>(res);
    });

    sp::get_cas_read_timeouts.set(r, [](std::unique_ptr<request> req) {
--- a/api/storage_service.cc
+++ b/api/storage_service.cc
@@ -280,12 +280,10 @@ void set_storage_service(http_context& ctx, routes& r) {
        return ctx.db.invoke_on_all([keyspace, column_families] (database& db) {
            std::vector<column_family*> column_families_vec;
            auto& cm = db.get_compaction_manager();
-            for (auto cf : column_families) {
-                column_families_vec.push_back(&db.find_column_family(keyspace, cf));
+            for (auto entry : column_families) {
+                column_family* cf = &db.find_column_family(keyspace, entry);
+                cm.submit_cleanup_job(cf);
            }
-            return parallel_for_each(column_families_vec, [&cm] (column_family* cf) {
-                return cm.perform_cleanup(cf);
-            });
        }).then([]{
            return make_ready_future<json::json_return_type>(0);
        });
@@ -328,8 +326,7 @@ void set_storage_service(http_context& ctx, routes& r) {

    ss::repair_async.set(r, [&ctx](std::unique_ptr<request> req) {
        static std::vector<sstring> options = {"primaryRange", "parallelism", "incremental",
-                "jobThreads", "ranges", "columnFamilies", "dataCenters", "hosts", "trace",
-                "startToken", "endToken" };
+                "jobThreads", "ranges", "columnFamilies", "dataCenters", "hosts", "trace"};
        std::unordered_map<sstring, sstring> options_map;
        for (auto o : options) {
            auto s = req->get_query_param(o);
--- a/api/stream_manager.cc
+++ b/api/stream_manager.cc
@@ -32,16 +32,11 @@ namespace hs = httpd::stream_manager_json;

 static void set_summaries(const std::vector<streaming::stream_summary>& from,
        json::json_list<hs::stream_summary>& to) {
-    if (!from.empty()) {
+    for (auto sum : from) {
        hs::stream_summary res;
-        res.cf_id = boost::lexical_cast<std::string>(from.front().cf_id);
-        // For each stream_session, we pretend we are sending/receiving one
-        // file, to make it compatible with nodetool.
-        res.files = 1;
-        // We can not estimate total number of bytes the stream_session will
-        // send or recvieve since we don't know the size of the frozen_mutation
-        // until we read it.
-        res.total_size = 0;
+        res.cf_id = boost::lexical_cast<std::string>(sum.cf_id);
+        res.files = sum.files;
+        res.total_size = sum.total_size;
        to.push(res);
    }
 }
@@ -90,22 +85,18 @@ static hs::stream_state get_state(
 void set_stream_manager(http_context& ctx, routes& r) {
    hs::get_current_streams.set(r,
            [] (std::unique_ptr<request> req) {
-                return streaming::get_stream_manager().invoke_on_all([] (auto& sm) {
-                    return sm.update_all_progress_info();
-                }).then([] {
-                    return streaming::get_stream_manager().map_reduce0([](streaming::stream_manager& stream) {
-                        std::vector<hs::stream_state> res;
-                        for (auto i : stream.get_initiated_streams()) {
-                            res.push_back(get_state(*i.second.get()));
-                        }
-                        for (auto i : stream.get_receiving_streams()) {
-                            res.push_back(get_state(*i.second.get()));
-                        }
-                        return res;
-                    }, std::vector<hs::stream_state>(),concat<hs::stream_state>).
-                    then([](const std::vector<hs::stream_state>& res) {
-                        return make_ready_future<json::json_return_type>(res);
-                    });
+                return streaming::get_stream_manager().map_reduce0([](streaming::stream_manager& stream) {
+                    std::vector<hs::stream_state> res;
+                    for (auto i : stream.get_initiated_streams()) {
+                        res.push_back(get_state(*i.second.get()));
+                    }
+                    for (auto i : stream.get_receiving_streams()) {
+                        res.push_back(get_state(*i.second.get()));
+                    }
+                    return res;
+                }, std::vector<hs::stream_state>(),concat<hs::stream_state>).
+                then([](const std::vector<hs::stream_state>& res) {
+                    return make_ready_future<json::json_return_type>(res);
                });
            });

@@ -120,9 +111,17 @@ void set_stream_manager(http_context& ctx, routes& r) {
    hs::get_total_incoming_bytes.set(r, [](std::unique_ptr<request> req) {
        gms::inet_address peer(req->param["peer"]);
        return streaming::get_stream_manager().map_reduce0([peer](streaming::stream_manager& sm) {
-            return sm.get_progress_on_all_shards(peer).then([] (auto sbytes) {
-                return sbytes.bytes_received;
-            });
+            int64_t res = 0;
+            for (auto sr : sm.get_all_streams()) {
+                if (sr) {
+                    for (auto session : sr->get_coordinator()->get_all_stream_sessions()) {
+                        if (session->peer == peer) {
+                            res += session->get_bytes_received();
+                        }
+                    }
+                }
+            }
+            return res;
        }, 0, std::plus<int64_t>()).then([](int64_t res) {
            return make_ready_future<json::json_return_type>(res);
        });
@@ -130,9 +129,15 @@ void set_stream_manager(http_context& ctx, routes& r) {

    hs::get_all_total_incoming_bytes.set(r, [](std::unique_ptr<request> req) {
        return streaming::get_stream_manager().map_reduce0([](streaming::stream_manager& sm) {
-            return sm.get_progress_on_all_shards().then([] (auto sbytes) {
-                return sbytes.bytes_received;
-            });
+            int64_t res = 0;
+            for (auto sr : sm.get_all_streams()) {
+                if (sr) {
+                    for (auto session : sr->get_coordinator()->get_all_stream_sessions()) {
+                        res += session->get_bytes_received();
+                    }
+                }
+            }
+            return res;
        }, 0, std::plus<int64_t>()).then([](int64_t res) {
            return make_ready_future<json::json_return_type>(res);
        });
@@ -140,10 +145,18 @@ void set_stream_manager(http_context& ctx, routes& r) {

    hs::get_total_outgoing_bytes.set(r, [](std::unique_ptr<request> req) {
        gms::inet_address peer(req->param["peer"]);
-        return streaming::get_stream_manager().map_reduce0([peer] (streaming::stream_manager& sm) {
-            return sm.get_progress_on_all_shards(peer).then([] (auto sbytes) {
-                return sbytes.bytes_sent;
-            });
+        return streaming::get_stream_manager().map_reduce0([peer](streaming::stream_manager& sm) {
+            int64_t res = 0;
+            for (auto sr : sm.get_all_streams()) {
+                if (sr) {
+                    for (auto session : sr->get_coordinator()->get_all_stream_sessions()) {
+                        if (session->peer == peer) {
+                            res += session->get_bytes_sent();
+                        }
+                    }
+                }
+            }
+            return res;
        }, 0, std::plus<int64_t>()).then([](int64_t res) {
            return make_ready_future<json::json_return_type>(res);
        });
@@ -151,9 +164,15 @@ void set_stream_manager(http_context& ctx, routes& r) {

    hs::get_all_total_outgoing_bytes.set(r, [](std::unique_ptr<request> req) {
        return streaming::get_stream_manager().map_reduce0([](streaming::stream_manager& sm) {
-            return sm.get_progress_on_all_shards().then([] (auto sbytes) {
-                return sbytes.bytes_sent;
-            });
+            int64_t res = 0;
+            for (auto sr : sm.get_all_streams()) {
+                if (sr) {
+                    for (auto session : sr->get_coordinator()->get_all_stream_sessions()) {
+                        res += session->get_bytes_sent();
+                    }
+                }
+            }
+            return res;
        }, 0, std::plus<int64_t>()).then([](int64_t res) {
            return make_ready_future<json::json_return_type>(res);
        });
--- a/atomic_cell_or_collection.hh
+++ b/atomic_cell_or_collection.hh
@@ -29,6 +29,9 @@
 // Which type is stored is determined by the schema.
 class atomic_cell_or_collection final {
    managed_bytes _data;
+
+    template<typename T>
+    friend class db::serializer;
 private:
    atomic_cell_or_collection(managed_bytes&& data) : _data(std::move(data)) {}
 public:
@@ -60,5 +63,11 @@ public:
            ::feed_hash(as_collection_mutation(), h, def.type);
        }
    }
+    void linearize() {
+        _data.linearize();
+    }
+    void unlinearize() {
+        _data.scatter();
+    }
    friend std::ostream& operator<<(std::ostream&, const atomic_cell_or_collection&);
 };
--- a/auth/auth.cc
+++ b/auth/auth.cc
@@ -103,41 +103,35 @@ static auth_migration_listener auth_migration;
 * Should be abstracted to some sort of global server function
 * probably.
 */
-struct waiter {
-    promise<> done;
-    timer<> tmr;
-    waiter() : tmr([this] {done.set_value();})
-    {
-        tmr.arm(auth::auth::SUPERUSER_SETUP_DELAY);
-    }
-    ~waiter() {
-        if (tmr.armed()) {
-            tmr.cancel();
-            done.set_exception(std::runtime_error("shutting down"));
-        }
-        logger.trace("Deleting scheduled task");
-    }
-    void kill() {
-    }
-};
-
-typedef std::unique_ptr<waiter> waiter_ptr;
-
-static std::vector<waiter_ptr> & thread_waiters() {
-    static thread_local std::vector<waiter_ptr> the_waiters;
-    return the_waiters;
-}
-
 void auth::auth::schedule_when_up(scheduled_func f) {
-    logger.trace("Adding scheduled task");
+    struct waiter {
+        promise<> done;
+        timer<> tmr;
+        waiter() : tmr([this] {done.set_value();})
+        {
+            tmr.arm(SUPERUSER_SETUP_DELAY);
+        }
+        ~waiter() {
+            if (tmr.armed()) {
+                tmr.cancel();
+                done.set_exception(std::runtime_error("shutting down"));
+            }
+            logger.trace("Deleting scheduled task");
+        }
+        void kill() {
+        }
+    };

-    auto & waiters = thread_waiters();
+    typedef std::unique_ptr<waiter> waiter_ptr;
+
+    static thread_local std::vector<waiter_ptr> waiters;
+
+    logger.trace("Adding scheduled task");

    waiters.emplace_back(std::make_unique<waiter>());
    auto* w = waiters.back().get();

    w->done.get_future().finally([w] {
-        auto & waiters = thread_waiters();
        auto i = std::find_if(waiters.begin(), waiters.end(), [w](const waiter_ptr& p) {
                            return p.get() == w;
                        });
@@ -152,6 +146,7 @@ void auth::auth::schedule_when_up(scheduled_func f) {
    });
 }

+
 bool auth::auth::is_class_type(const sstring& type, const sstring& classname) {
    if (type == classname) {
        return true;
@@ -210,15 +205,6 @@ future<> auth::auth::setup() {
    });
 }

-future<> auth::auth::shutdown() {
-    // just make sure we don't have pending tasks.
-    // this is mostly relevant for test cases where
-    // db-env-shutdown != process shutdown
-    return smp::invoke_on_all([] {
-        thread_waiters().clear();
-    });
-}
-
 static db::consistency_level consistency_for_user(const sstring& username) {
    if (username == auth::auth::DEFAULT_SUPERUSER_NAME) {
        return db::consistency_level::QUORUM;
--- a/auth/auth.hh
+++ b/auth/auth.hh
@@ -102,7 +102,6 @@ public:
     * Sets up Authenticator and Authorizer.
     */
    static future<> setup();
-    static future<> shutdown();

    /**
     * Set up table from given CREATE TABLE statement under system_auth keyspace, if not already done so.
--- a/bytes_ostream.hh
+++ b/bytes_ostream.hh
@@ -21,12 +21,11 @@

 #pragma once

-#include <boost/range/iterator_range.hpp>
-
-#include "bytes.hh"
+#include "types.hh"
+#include "net/byteorder.hh"
 #include "core/unaligned.hh"
 #include "hashing.hh"
-#include "seastar/core/simple-stream.hh"
+
 /**
 * Utility for writing data into a buffer when its final size is not known up front.
 *
@@ -172,12 +171,16 @@ public:
    template <typename T>
    struct place_holder {
        value_type* ptr;
-        // makes the place_holder looks like a stream
-        seastar::simple_output_stream get_stream() {
-            return seastar::simple_output_stream{reinterpret_cast<char*>(ptr)};
-        }
    };

+    // Writes given values in big-endian format
+    template <typename T>
+    inline
+    std::enable_if_t<std::is_fundamental<T>::value, void>
+    write(T val) {
+        *reinterpret_cast<unaligned<T>*>(alloc(sizeof(T))) = net::hton(val);
+    }
+
    // Returns a place holder for a value to be written later.
    template <typename T>
    inline
@@ -215,6 +218,19 @@ public:
        write(bytes_view(reinterpret_cast<const signed char*>(ptr), size));
    }

+    // Writes given sequence of bytes with a preceding length component encoded in big-endian format
+    inline void write_blob(bytes_view v) {
+        assert((size_type)v.size() == v.size());
+        write<size_type>(v.size());
+        write(v);
+    }
+
+    // Writes given value into the place holder in big-endian format
+    template <typename T>
+    inline void set(place_holder<T> ph, T val) {
+        *reinterpret_cast<unaligned<T>*>(ph.ptr) = net::hton(val);
+    }
+
    bool is_linearized() const {
        return !_begin || !_begin->next;
    }
--- a/canonical_mutation.cc
+++ b/canonical_mutation.cc
@@ -24,66 +24,80 @@
 #include "mutation_partition_serializer.hh"
 #include "converting_mutation_partition_applier.hh"
 #include "hashing_partition_visitor.hh"
-#include "utils/UUID.hh"
-#include "serializer.hh"
-#include "idl/uuid.dist.hh"
-#include "idl/keys.dist.hh"
-#include "idl/mutation.dist.hh"
-#include "serializer_impl.hh"
-#include "serialization_visitors.hh"
-#include "idl/uuid.dist.impl.hh"
-#include "idl/keys.dist.impl.hh"
-#include "idl/mutation.dist.impl.hh"
+
+template class db::serializer<canonical_mutation>;
+
+//
+// Representation layout:
+//
+// <canonical_mutation> ::= <column_family_id> <table_schema_version> <partition_key> <column-mapping> <partition>
+//
+// For <partition> see mutation_partition_serializer.cc
+// For <column-mapping> see db::serializer<column_mapping>
+//

 canonical_mutation::canonical_mutation(bytes data)
        : _data(std::move(data))
 { }

 canonical_mutation::canonical_mutation(const mutation& m)
-{
-    mutation_partition_serializer part_ser(*m.schema(), m.partition());
-
-    bytes_ostream out;
-    ser::writer_of_canonical_mutation wr(out);
-    std::move(wr).write_table_id(m.schema()->id())
-                 .write_schema_version(m.schema()->version())
-                 .write_key(m.key())
-                 .write_mapping(m.schema()->get_column_mapping())
-                 .partition([&] (auto wr) {
-                     part_ser.write(std::move(wr));
-                 }).end_canonical_mutation();
-    _data = to_bytes(out.linearize());
-}
+    : _data([&m] {
+        bytes_ostream out;
+        db::serializer<utils::UUID>(m.column_family_id()).write(out);
+        db::serializer<table_schema_version>(m.schema()->version()).write(out);
+        db::serializer<partition_key_view>(m.key()).write(out);
+        db::serializer<column_mapping>(m.schema()->get_column_mapping()).write(out);
+        mutation_partition_serializer ser(*m.schema(), m.partition());
+        ser.write(out);
+        return to_bytes(out.linearize());
+    }())
+{ }

 utils::UUID canonical_mutation::column_family_id() const {
-    auto in = ser::as_input_stream(_data);
-    auto mv = ser::deserialize(in, boost::type<ser::canonical_mutation_view>());
-    return mv.table_id();
+    data_input in(_data);
+    return db::serializer<utils::UUID>::read(in);
 }

 mutation canonical_mutation::to_mutation(schema_ptr s) const {
-    auto in = ser::as_input_stream(_data);
-    auto mv = ser::deserialize(in, boost::type<ser::canonical_mutation_view>());
+    data_input in(_data);

-    auto cf_id = mv.table_id();
+    auto cf_id = db::serializer<utils::UUID>::read(in);
    if (s->id() != cf_id) {
        throw std::runtime_error(sprint("Attempted to deserialize canonical_mutation of table %s with schema of table %s (%s.%s)",
                                        cf_id, s->id(), s->ks_name(), s->cf_name()));
    }

-    auto version = mv.schema_version();
-    auto pk = mv.key();
+    auto version = db::serializer<table_schema_version>::read(in);
+    auto pk = partition_key(db::serializer<partition_key_view>::read(in));

    mutation m(std::move(pk), std::move(s));

    if (version == m.schema()->version()) {
-        auto partition_view = mutation_partition_view::from_view(mv.partition());
+        db::serializer<column_mapping>::skip(in);
+        auto partition_view = mutation_partition_serializer::read_as_view(in);
        m.partition().apply(*m.schema(), partition_view, *m.schema());
    } else {
-        column_mapping cm = mv.mapping();
+        column_mapping cm = db::serializer<column_mapping>::read(in);
        converting_mutation_partition_applier v(cm, *m.schema(), m.partition());
-        auto partition_view = mutation_partition_view::from_view(mv.partition());
+        auto partition_view = mutation_partition_serializer::read_as_view(in);
        partition_view.accept(cm, v);
    }
    return m;
 }
+
+template<>
+db::serializer<canonical_mutation>::serializer(const canonical_mutation& v)
+        : _item(v)
+        , _size(db::serializer<bytes>(v._data).size())
+{ }
+
+template<>
+void
+db::serializer<canonical_mutation>::write(output& out, const canonical_mutation& v) {
+    db::serializer<bytes>(v._data).write(out);
+}
+
+template<>
+canonical_mutation db::serializer<canonical_mutation>::read(input& in) {
+    return canonical_mutation(db::serializer<bytes>::read(in));
+}
--- a/canonical_mutation.hh
+++ b/canonical_mutation.hh
@@ -24,6 +24,7 @@
 #include "bytes.hh"
 #include "schema.hh"
 #include "database_fwd.hh"
+#include "db/serializer.hh"
 #include "mutation_partition_visitor.hh"
 #include "mutation_partition_serializer.hh"

@@ -32,8 +33,8 @@
 // Safe to pass serialized across nodes.
 class canonical_mutation {
    bytes _data;
+    canonical_mutation(bytes);
 public:
-    explicit canonical_mutation(bytes);
    explicit canonical_mutation(const mutation&);

    canonical_mutation(canonical_mutation&&) = default;
@@ -50,6 +51,15 @@ public:

    utils::UUID column_family_id() const;

-    const bytes& representation() const { return _data; }
-
+    friend class db::serializer<canonical_mutation>;
 };
+
+namespace db {
+
+template<> serializer<canonical_mutation>::serializer(const canonical_mutation&);
+template<> void serializer<canonical_mutation>::write(output&, const canonical_mutation&);
+template<> canonical_mutation serializer<canonical_mutation>::read(input&);
+
+extern template class serializer<canonical_mutation>;
+
+}
--- a/compound.hh
+++ b/compound.hh
@@ -43,7 +43,6 @@ public:
    static constexpr bool is_prefixable = AllowPrefixes == allow_prefixes::yes;
    using prefix_type = compound_type<allow_prefixes::yes>;
    using value_type = std::vector<bytes>;
-    using size_type = uint16_t;

    compound_type(std::vector<data_type> types)
        : _types(std::move(types))
@@ -67,7 +66,7 @@ public:
    prefix_type as_prefix() {
        return prefix_type(_types);
    }
-private:
+
    /*
     * Format:
     *   <len(value1)><value1><len(value2)><value2>...<len(value_n)><value_n>
@@ -76,8 +75,8 @@ private:
    template<typename RangeOfSerializedComponents>
    static void serialize_value(RangeOfSerializedComponents&& values, bytes::iterator& out) {
        for (auto&& val : values) {
-            assert(val.size() <= std::numeric_limits<size_type>::max());
-            write<size_type>(out, size_type(val.size()));
+            assert(val.size() <= std::numeric_limits<uint16_t>::max());
+            write<uint16_t>(out, uint16_t(val.size()));
            out = std::copy(val.begin(), val.end(), out);
        }
    }
@@ -85,21 +84,17 @@ private:
    static size_t serialized_size(RangeOfSerializedComponents&& values) {
        size_t len = 0;
        for (auto&& val : values) {
-            len += sizeof(size_type) + val.size();
+            assert(val.size() <= std::numeric_limits<uint16_t>::max());
+            len += sizeof(uint16_t) + val.size();
        }
        return len;
    }
-public:
    bytes serialize_single(bytes&& v) {
        return serialize_value({std::move(v)});
    }
    template<typename RangeOfSerializedComponents>
    static bytes serialize_value(RangeOfSerializedComponents&& values) {
-        auto size = serialized_size(values);
-        if (size > std::numeric_limits<size_type>::max()) {
-            throw std::runtime_error(sprint("Key size too large: %d > %d", size, std::numeric_limits<size_type>::max()));
-        }
-        bytes b(bytes::initialized_later(), size);
+        bytes b(bytes::initialized_later(), serialized_size(values));
        auto i = b.begin();
        serialize_value(values, i);
        return b;
@@ -136,13 +131,13 @@ public:
        value_type _current;
    private:
        void read_current() {
-            size_type len;
+            uint16_t len;
            {
                if (_v.empty()) {
                    _v = bytes_view(nullptr, 0);
                    return;
                }
-                len = read_simple<size_type>(_v);
+                len = read_simple<uint16_t>(_v);
                if (_v.size() < len) {
                    throw marshal_exception();
                }
--- a/configure.py
+++ b/configure.py
@@ -192,6 +192,7 @@ scylla_tests = [
    'tests/commitlog_test',
    'tests/cartesian_product_test',
    'tests/hash_test',
+    'tests/serializer_test',
    'tests/map_difference_test',
    'tests/message',
    'tests/gossip',
@@ -214,7 +215,6 @@ scylla_tests = [
    'tests/flush_queue_test',
    'tests/dynamic_bitset_test',
    'tests/auth_test',
-    'tests/idl_test',
 ]

 apps = [
@@ -223,11 +223,7 @@ apps = [

 tests = scylla_tests

-other = [
-    'iotune',
-    ]
-
-all_artifacts = apps + tests + other
+all_artifacts = apps + tests

 arg_parser = argparse.ArgumentParser('Configure scylla')
 arg_parser.add_argument('--static', dest = 'static', action = 'store_const', default = '',
@@ -318,7 +314,6 @@ scylla_core = (['database.cc',
                 'cql3/statements/cf_statement.cc',
                 'cql3/statements/create_keyspace_statement.cc',
                 'cql3/statements/create_table_statement.cc',
-                 'cql3/statements/create_type_statement.cc',
                 'cql3/statements/drop_keyspace_statement.cc',
                 'cql3/statements/drop_table_statement.cc',
                 'cql3/statements/schema_altering_statement.cc',
@@ -373,7 +368,7 @@ scylla_core = (['database.cc',
                 'db/schema_tables.cc',
                 'db/commitlog/commitlog.cc',
                 'db/commitlog/commitlog_replayer.cc',
-                 'db/commitlog/commitlog_entry.cc',
+                 'db/serializer.cc',
                 'db/config.cc',
                 'db/index/secondary_index.cc',
                 'db/marshal/type_parser.cc',
@@ -387,7 +382,6 @@ scylla_core = (['database.cc',
                 'utils/rate_limiter.cc',
                 'utils/file_lock.cc',
                 'utils/dynamic_bitset.cc',
-                 'utils/managed_bytes.cc',
                 'gms/version_generator.cc',
                 'gms/versioned_value.cc',
                 'gms/gossiper.cc',
@@ -409,7 +403,6 @@ scylla_core = (['database.cc',
                 'locator/simple_strategy.cc',
                 'locator/local_strategy.cc',
                 'locator/network_topology_strategy.cc',
-                 'locator/everywhere_replication_strategy.cc',
                 'locator/token_metadata.cc',
                 'locator/locator.cc',
                 'locator/snitch_base.cc',
@@ -504,12 +497,6 @@ idls = ['idl/gossip_digest.idl.hh',
        'idl/paging_state.idl.hh',
        'idl/frozen_schema.idl.hh',
        'idl/partition_checksum.idl.hh',
-        'idl/replay_position.idl.hh',
-        'idl/truncation_record.idl.hh',
-        'idl/mutation.idl.hh',
-        'idl/query.idl.hh',
-        'idl/idl_test.idl.hh',
-        'idl/commitlog.idl.hh',
        ]

 scylla_tests_dependencies = scylla_core + api + idls + [
@@ -553,7 +540,6 @@ tests_not_using_seastar_test_framework = set([
    'tests/perf/perf_sstable',
    'tests/managed_vector_test',
    'tests/dynamic_bitset_test',
-    'tests/idl_test',
 ])

 for t in tests_not_using_seastar_test_framework:
@@ -663,8 +649,6 @@ if args.dpdk:
    seastar_flags += ['--enable-dpdk']
 elif args.dpdk_target:
    seastar_flags += ['--dpdk-target', args.dpdk_target]
-if args.staticcxx:
-    seastar_flags += ['--static-stdc++']

 seastar_cflags = args.user_cflags + " -march=nehalem"
 seastar_flags += ['--compiler', args.cxx, '--cflags=%s' % (seastar_cflags)]
@@ -734,15 +718,12 @@ with open(buildfile, 'w') as f:
            command = seastar/json/json2code.py -f $in -o $out
            description = SWAGGER $out
        rule serializer
-            command = {python} ./idl-compiler.py --ns ser -f $in -o $out
+            command = ./idl-compiler.py --ns ser -f $in -o $out
            description = IDL compiler $out
        rule ninja
            command = {ninja} -C $subdir $target
            restat = 1
            description = NINJA $out
-        rule copy
-            command = cp $in $out
-            description = COPY $out
        ''').format(**globals()))
    for mode in build_modes:
        modeval = modes[mode]
@@ -779,8 +760,6 @@ with open(buildfile, 'w') as f:
        thrifts = set()
        antlr3_grammars = set()
        for binary in build_artifacts:
-            if binary in other:
-                continue
            srcs = deps[binary]
            objs = ['$builddir/' + mode + '/' + src.replace('.cc', '.o')
                    for src in srcs
@@ -877,14 +856,10 @@ with open(buildfile, 'w') as f:
                                                                   grammar.source.rsplit('.', 1)[0]))
            for cc in grammar.sources('$builddir/{}/gen'.format(mode)):
                obj = cc.replace('.cpp', '.o')
-                f.write('build {}: cxx.{} {} || {}\n'.format(obj, mode, cc, ' '.join(serializers)))
-        f.write('build seastar/build/{mode}/libseastar.a seastar/build/{mode}/apps/iotune/iotune: ninja {seastar_deps}\n'
-                .format(**locals()))
+                f.write('build {}: cxx.{} {}\n'.format(obj, mode, cc))
+        f.write('build seastar/build/{}/libseastar.a: ninja {}\n'.format(mode, seastar_deps))
        f.write('  subdir = seastar\n')
-        f.write('  target = build/{mode}/libseastar.a build/{mode}/apps/iotune/iotune\n'.format(**locals()))
-        f.write(textwrap.dedent('''\
-            build build/{mode}/iotune: copy seastar/build/{mode}/apps/iotune/iotune
-            ''').format(**locals()))
+        f.write('  target = build/{}/libseastar.a\n'.format(mode))
    f.write('build {}: phony\n'.format(seastar_deps))
    f.write(textwrap.dedent('''\
        rule configure
--- a/converting_mutation_partition_applier.hh
+++ b/converting_mutation_partition_applier.hh
@@ -75,7 +75,7 @@ public:
    }

    virtual void accept_static_cell(column_id id, atomic_cell_view cell) override {
-        const column_mapping_entry& col = _visited_column_mapping.static_column_at(id);
+        const column_mapping::column& col = _visited_column_mapping.static_column_at(id);
        const column_definition* def = _p_schema.get_column_definition(col.name());
        if (def) {
            accept_cell(_p._static_row, column_kind::static_column, *def, col.type(), cell);
@@ -83,7 +83,7 @@ public:
    }

    virtual void accept_static_cell(column_id id, collection_mutation_view collection) override {
-        const column_mapping_entry& col = _visited_column_mapping.static_column_at(id);
+        const column_mapping::column& col = _visited_column_mapping.static_column_at(id);
        const column_definition* def = _p_schema.get_column_definition(col.name());
        if (def) {
            accept_cell(_p._static_row, column_kind::static_column, *def, col.type(), collection);
@@ -102,7 +102,7 @@ public:
    }

    virtual void accept_row_cell(column_id id, atomic_cell_view cell) override {
-        const column_mapping_entry& col = _visited_column_mapping.regular_column_at(id);
+        const column_mapping::column& col = _visited_column_mapping.regular_column_at(id);
        const column_definition* def = _p_schema.get_column_definition(col.name());
        if (def) {
            accept_cell(_current_row->cells(), column_kind::regular_column, *def, col.type(), cell);
@@ -110,7 +110,7 @@ public:
    }

    virtual void accept_row_cell(column_id id, collection_mutation_view collection) override {
-        const column_mapping_entry& col = _visited_column_mapping.regular_column_at(id);
+        const column_mapping::column& col = _visited_column_mapping.regular_column_at(id);
        const column_definition* def = _p_schema.get_column_definition(col.name());
        if (def) {
            accept_cell(_current_row->cells(), column_kind::regular_column, *def, col.type(), collection);
--- a/cql3/Cql.g
+++ b/cql3/Cql.g
@@ -36,7 +36,6 @@ options {
 #include "cql3/statements/drop_keyspace_statement.hh"
 #include "cql3/statements/create_index_statement.hh"
 #include "cql3/statements/create_table_statement.hh"
-#include "cql3/statements/create_type_statement.hh"
 #include "cql3/statements/property_definitions.hh"
 #include "cql3/statements/drop_table_statement.hh"
 #include "cql3/statements/truncate_statement.hh"
@@ -284,9 +283,7 @@ cqlStatement returns [shared_ptr<parsed_statement> stmt]
    | st22=listUsersStatement          { $stmt = st22; }
    | st23=createTriggerStatement      { $stmt = st23; }
    | st24=dropTriggerStatement        { $stmt = st24; }
-#endif
    | st25=createTypeStatement         { $stmt = st25; }
-#if 0
    | st26=alterTypeStatement          { $stmt = st26; }
    | st27=dropTypeStatement           { $stmt = st27; }
    | st28=createFunctionStatement     { $stmt = st28; }
@@ -698,6 +695,7 @@ cfamOrdering[shared_ptr<cql3::statements::create_table_statement::raw_statement>
    ;


+#if 0
 /**
 * CREATE TYPE foo (
 *    <name1> <type1>,
@@ -705,16 +703,17 @@ cfamOrdering[shared_ptr<cql3::statements::create_table_statement::raw_statement>
 *    ....
 * )
 */
-createTypeStatement returns [::shared_ptr<create_type_statement> expr]
-    @init { bool if_not_exists = false; }
-    : K_CREATE K_TYPE (K_IF K_NOT K_EXISTS { if_not_exists = true; } )?
-         tn=userTypeName { $expr = ::make_shared<create_type_statement>(tn, if_not_exists); }
+createTypeStatement returns [CreateTypeStatement expr]
+    @init { boolean ifNotExists = false; }
+    : K_CREATE K_TYPE (K_IF K_NOT K_EXISTS { ifNotExists = true; } )?
+         tn=userTypeName { $expr = new CreateTypeStatement(tn, ifNotExists); }
         '(' typeColumns[expr] ( ',' typeColumns[expr]? )* ')'
    ;

-typeColumns[::shared_ptr<create_type_statement> expr]
-    : k=ident v=comparatorType { $expr->add_definition(k, v); }
+typeColumns[CreateTypeStatement expr]
+    : k=ident v=comparatorType { $expr.addDefinition(k, v); }
    ;
+#endif


 /**
--- a/cql3/column_condition.hh
+++ b/cql3/column_condition.hh
@@ -737,7 +737,7 @@ public:
        /** A condition on a collection element. For example: "IF col['key'] = 'foo'" */
        static ::shared_ptr<raw> collection_condition(::shared_ptr<term::raw> value, ::shared_ptr<term::raw> collection_element,
                const operator_type& op) {
-            return ::make_shared<raw>(std::move(value), std::vector<::shared_ptr<term::raw>>{}, ::shared_ptr<abstract_marker::in_raw>{}, std::move(collection_element), op);
+            return ::make_shared<raw>(std::move(value), std::vector<::shared_ptr<term::raw>>{}, ::shared_ptr<abstract_marker::in_raw>{}, std::move(collection_element), operator_type::IN);
        }

        /** An IN condition on a collection element. For example: "IF col['key'] IN ('foo', 'bar', ...)" */
--- a/cql3/column_identifier.cc
+++ b/cql3/column_identifier.cc
@@ -121,7 +121,3 @@ column_identifier::new_selector_factory(database& db, schema_ptr schema, std::ve
 }

 }
-
-bool cql3::column_identifier::text_comparator::operator()(const cql3::column_identifier& c1, const cql3::column_identifier& c2) const {
-    return c1.text() < c2.text();
-}
--- a/cql3/column_identifier.hh
+++ b/cql3/column_identifier.hh
@@ -61,11 +61,6 @@ public:
 private:
    sstring _text;
 public:
-    // less comparator sorting by text
-    struct text_comparator {
-        bool operator()(const column_identifier& c1, const column_identifier& c2) const;
-    };
-
    column_identifier(sstring raw_text, bool keep_case);

    column_identifier(bytes bytes_, data_type type);
--- a/cql3/functions/aggregate_fcts.hh
+++ b/cql3/functions/aggregate_fcts.hh
@@ -58,10 +58,10 @@ public:
    virtual void reset() override {
        _count = 0;
    }
-    virtual opt_bytes compute(cql_serialization_format sf) override {
+    virtual opt_bytes compute(serialization_format sf) override {
        return long_type->decompose(_count);
    }
-    virtual void add_input(cql_serialization_format sf, const std::vector<opt_bytes>& values) override {
+    virtual void add_input(serialization_format sf, const std::vector<opt_bytes>& values) override {
        ++_count;
    }
 };
@@ -83,10 +83,10 @@ public:
    virtual void reset() override {
        _sum = {};
    }
-    virtual opt_bytes compute(cql_serialization_format sf) override {
+    virtual opt_bytes compute(serialization_format sf) override {
        return data_type_for<Type>()->decompose(_sum);
    }
-    virtual void add_input(cql_serialization_format sf, const std::vector<opt_bytes>& values) override {
+    virtual void add_input(serialization_format sf, const std::vector<opt_bytes>& values) override {
        if (!values[0]) {
            return;
        }
@@ -120,14 +120,14 @@ public:
        _sum = {};
        _count = 0;
    }
-    virtual opt_bytes compute(cql_serialization_format sf) override {
+    virtual opt_bytes compute(serialization_format sf) override {
        Type ret = 0;
        if (_count) {
            ret = _sum / _count;
        }
        return data_type_for<Type>()->decompose(ret);
    }
-    virtual void add_input(cql_serialization_format sf, const std::vector<opt_bytes>& values) override {
+    virtual void add_input(serialization_format sf, const std::vector<opt_bytes>& values) override {
        if (!values[0]) {
            return;
        }
@@ -159,13 +159,13 @@ public:
    virtual void reset() override {
        _max = {};
    }
-    virtual opt_bytes compute(cql_serialization_format sf) override {
+    virtual opt_bytes compute(serialization_format sf) override {
        if (!_max) {
            return {};
        }
        return data_type_for<Type>()->decompose(*_max);
    }
-    virtual void add_input(cql_serialization_format sf, const std::vector<opt_bytes>& values) override {
+    virtual void add_input(serialization_format sf, const std::vector<opt_bytes>& values) override {
        if (!values[0]) {
            return;
        }
@@ -206,13 +206,13 @@ public:
    virtual void reset() override {
        _min = {};
    }
-    virtual opt_bytes compute(cql_serialization_format sf) override {
+    virtual opt_bytes compute(serialization_format sf) override {
        if (!_min) {
            return {};
        }
        return data_type_for<Type>()->decompose(*_min);
    }
-    virtual void add_input(cql_serialization_format sf, const std::vector<opt_bytes>& values) override {
+    virtual void add_input(serialization_format sf, const std::vector<opt_bytes>& values) override {
        if (!values[0]) {
            return;
        }
@@ -255,10 +255,10 @@ public:
    virtual void reset() override {
        _count = 0;
    }
-    virtual opt_bytes compute(cql_serialization_format sf) override {
+    virtual opt_bytes compute(serialization_format sf) override {
        return long_type->decompose(_count);
    }
-    virtual void add_input(cql_serialization_format sf, const std::vector<opt_bytes>& values) override {
+    virtual void add_input(serialization_format sf, const std::vector<opt_bytes>& values) override {
        if (!values[0]) {
            return;
        }
--- a/cql3/functions/aggregate_function.hh
+++ b/cql3/functions/aggregate_function.hh
@@ -77,7 +77,7 @@ public:
         * @param protocol_version native protocol version
         * @param values the values to add to the aggregate.
         */
-        virtual void add_input(cql_serialization_format sf, const std::vector<opt_bytes>& values) = 0;
+        virtual void add_input(serialization_format sf, const std::vector<opt_bytes>& values) = 0;

        /**
         * Computes and returns the aggregate current value.
@@ -85,7 +85,7 @@ public:
         * @param protocol_version native protocol version
         * @return the aggregate current value.
         */
-        virtual opt_bytes compute(cql_serialization_format sf) = 0;
+        virtual opt_bytes compute(serialization_format sf) = 0;

        /**
         * Reset this aggregate.
--- a/cql3/functions/bytes_conversion_fcts.hh
+++ b/cql3/functions/bytes_conversion_fcts.hh
@@ -58,7 +58,7 @@ shared_ptr<function>
 make_to_blob_function(data_type from_type) {
    auto name = from_type->as_cql3_type()->to_string() + "asblob";
    return make_native_scalar_function<true>(name, bytes_type, { from_type },
-            [] (cql_serialization_format sf, const std::vector<bytes_opt>& parameters) {
+            [] (serialization_format sf, const std::vector<bytes_opt>& parameters) {
        return parameters[0];
    });
 }
@@ -68,7 +68,7 @@ shared_ptr<function>
 make_from_blob_function(data_type to_type) {
    sstring name = sstring("blobas") + to_type->as_cql3_type()->to_string();
    return make_native_scalar_function<true>(name, to_type, { bytes_type },
-            [name, to_type] (cql_serialization_format sf, const std::vector<bytes_opt>& parameters) -> bytes_opt {
+            [name, to_type] (serialization_format sf, const std::vector<bytes_opt>& parameters) -> bytes_opt {
        auto&& val = parameters[0];
        if (!val) {
            return val;
@@ -89,7 +89,7 @@ inline
 shared_ptr<function>
 make_varchar_as_blob_fct() {
    return make_native_scalar_function<true>("varcharasblob", bytes_type, { utf8_type },
-            [] (cql_serialization_format sf, const std::vector<bytes_opt>& parameters) -> bytes_opt {
+            [] (serialization_format sf, const std::vector<bytes_opt>& parameters) -> bytes_opt {
        return parameters[0];
    });
 }
@@ -98,7 +98,7 @@ inline
 shared_ptr<function>
 make_blob_as_varchar_fct() {
    return make_native_scalar_function<true>("blobasvarchar", utf8_type, { bytes_type },
-            [] (cql_serialization_format sf, const std::vector<bytes_opt>& parameters) -> bytes_opt {
+            [] (serialization_format sf, const std::vector<bytes_opt>& parameters) -> bytes_opt {
        return parameters[0];
    });
 }
--- a/cql3/functions/function_call.hh
+++ b/cql3/functions/function_call.hh
@@ -61,11 +61,11 @@ public:
    virtual shared_ptr<terminal> bind(const query_options& options) override;
    virtual bytes_view_opt bind_and_get(const query_options& options) override;
 private:
-    static bytes_opt execute_internal(cql_serialization_format sf, scalar_function& fun, std::vector<bytes_opt> params);
+    static bytes_opt execute_internal(serialization_format sf, scalar_function& fun, std::vector<bytes_opt> params);
 public:
    virtual bool contains_bind_marker() const override;
 private:
-    static shared_ptr<terminal> make_terminal(shared_ptr<function> fun, bytes_opt result, cql_serialization_format sf);
+    static shared_ptr<terminal> make_terminal(shared_ptr<function> fun, bytes_opt result, serialization_format sf);
 public:
    class raw : public term::raw {
        function_name _name;
--- a/cql3/functions/functions.cc
+++ b/cql3/functions/functions.cc
@@ -299,7 +299,7 @@ function_call::collect_marker_specification(shared_ptr<variable_specifications>

 shared_ptr<terminal>
 function_call::bind(const query_options& options) {
-    return make_terminal(_fun, to_bytes_opt(bind_and_get(options)), options.get_cql_serialization_format());
+    return make_terminal(_fun, to_bytes_opt(bind_and_get(options)), options.get_serialization_format());
 }

 bytes_view_opt
@@ -315,12 +315,12 @@ function_call::bind_and_get(const query_options& options) {
        }
        buffers.push_back(std::move(to_bytes_opt(val)));
    }
-    auto result = execute_internal(options.get_cql_serialization_format(), *_fun, std::move(buffers));
+    auto result = execute_internal(options.get_serialization_format(), *_fun, std::move(buffers));
    return options.make_temporary(result);
 }

 bytes_opt
-function_call::execute_internal(cql_serialization_format sf, scalar_function& fun, std::vector<bytes_opt> params) {
+function_call::execute_internal(serialization_format sf, scalar_function& fun, std::vector<bytes_opt> params) {
    bytes_opt result = fun.execute(sf, params);
    try {
        // Check the method didn't lied on it's declared return type
@@ -347,7 +347,7 @@ function_call::contains_bind_marker() const {
 }

 shared_ptr<terminal>
-function_call::make_terminal(shared_ptr<function> fun, bytes_opt result, cql_serialization_format sf)  {
+function_call::make_terminal(shared_ptr<function> fun, bytes_opt result, serialization_format sf)  {
    if (!dynamic_pointer_cast<const collection_type_impl>(fun->return_type())) {
        return ::make_shared<constants::value>(std::move(result));
    }
@@ -413,7 +413,7 @@ function_call::raw::prepare(database& db, const sstring& keyspace, ::shared_ptr<
    // If all parameters are terminal and the function is pure, we can
    // evaluate it now, otherwise we'd have to wait execution time
    if (all_terminal && scalar_fun->is_pure()) {
-        return make_terminal(scalar_fun, execute(*scalar_fun, parameters), query_options::DEFAULT.get_cql_serialization_format());
+        return make_terminal(scalar_fun, execute(*scalar_fun, parameters), query_options::DEFAULT.get_serialization_format());
    } else {
        return ::make_shared<function_call>(scalar_fun, parameters);
    }
@@ -429,7 +429,7 @@ function_call::raw::execute(scalar_function& fun, std::vector<shared_ptr<term>>
        buffers.push_back(std::move(param));
    }

-    return execute_internal(cql_serialization_format::internal(), fun, buffers);
+    return execute_internal(serialization_format::internal(), fun, buffers);
 }

 assignment_testable::test_result
--- a/cql3/functions/native_scalar_function.hh
+++ b/cql3/functions/native_scalar_function.hh
@@ -74,10 +74,7 @@ public:
            : native_scalar_function(std::move(name), std::move(return_type), std::move(arg_types))
            , _func(std::forward<Func>(func)) {
    }
-    virtual bool is_pure() override {
-        return Pure;
-    }
-    virtual bytes_opt execute(cql_serialization_format sf, const std::vector<bytes_opt>& parameters) override {
+    virtual bytes_opt execute(serialization_format sf, const std::vector<bytes_opt>& parameters) override {
        return _func(sf, parameters);
    }
 };
--- a/cql3/functions/scalar_function.hh
+++ b/cql3/functions/scalar_function.hh
@@ -58,7 +58,7 @@ public:
     * @return the result of applying this function to the parameter
     * @throws InvalidRequestException if this function cannot not be applied to the parameter
     */
-    virtual bytes_opt execute(cql_serialization_format sf, const std::vector<bytes_opt>& parameters) = 0;
+    virtual bytes_opt execute(serialization_format sf, const std::vector<bytes_opt>& parameters) = 0;
 };


--- a/cql3/functions/time_uuid_fcts.hh
+++ b/cql3/functions/time_uuid_fcts.hh
@@ -56,7 +56,7 @@ inline
 shared_ptr<function>
 make_now_fct() {
    return make_native_scalar_function<false>("now", timeuuid_type, {},
-            [] (cql_serialization_format sf, const std::vector<bytes_opt>& values) -> bytes_opt {
+            [] (serialization_format sf, const std::vector<bytes_opt>& values) -> bytes_opt {
        return {to_bytes(utils::UUID_gen::get_time_UUID())};
    });
 }
@@ -65,7 +65,7 @@ inline
 shared_ptr<function>
 make_min_timeuuid_fct() {
    return make_native_scalar_function<true>("mintimeuuid", timeuuid_type, { timestamp_type },
-            [] (cql_serialization_format sf, const std::vector<bytes_opt>& values) -> bytes_opt {
+            [] (serialization_format sf, const std::vector<bytes_opt>& values) -> bytes_opt {
        auto& bb = values[0];
        if (!bb) {
            return {};
@@ -84,7 +84,7 @@ inline
 shared_ptr<function>
 make_max_timeuuid_fct() {
    return make_native_scalar_function<true>("maxtimeuuid", timeuuid_type, { timestamp_type },
-            [] (cql_serialization_format sf, const std::vector<bytes_opt>& values) -> bytes_opt {
+            [] (serialization_format sf, const std::vector<bytes_opt>& values) -> bytes_opt {
        // FIXME: should values be a vector<optional<bytes>>?
        auto& bb = values[0];
        if (!bb) {
@@ -104,7 +104,7 @@ inline
 shared_ptr<function>
 make_date_of_fct() {
    return make_native_scalar_function<true>("dateof", timestamp_type, { timeuuid_type },
-            [] (cql_serialization_format sf, const std::vector<bytes_opt>& values) -> bytes_opt {
+            [] (serialization_format sf, const std::vector<bytes_opt>& values) -> bytes_opt {
        using namespace utils;
        auto& bb = values[0];
        if (!bb) {
@@ -119,7 +119,7 @@ inline
 shared_ptr<function>
 make_unix_timestamp_of_fcf() {
    return make_native_scalar_function<true>("unixtimestampof", long_type, { timeuuid_type },
-            [] (cql_serialization_format sf, const std::vector<bytes_opt>& values) -> bytes_opt {
+            [] (serialization_format sf, const std::vector<bytes_opt>& values) -> bytes_opt {
        using namespace utils;
        auto& bb = values[0];
        if (!bb) {
--- a/cql3/functions/token_fct.hh
+++ b/cql3/functions/token_fct.hh
@@ -61,9 +61,10 @@ public:
                    , _schema(s) {
    }

-    bytes_opt execute(cql_serialization_format sf, const std::vector<bytes_opt>& parameters) override {
-        auto key = partition_key::from_optional_exploded(*_schema, parameters);
-        auto tok = dht::global_partitioner().get_token(*_schema, key);
+    bytes_opt execute(serialization_format sf, const std::vector<bytes_opt>& parameters) override {
+        auto buf = _schema->partition_key_type()->serialize_optionals(parameters);
+        auto view = partition_key_view::from_bytes(std::move(buf));
+        auto tok = dht::global_partitioner().get_token(*_schema, view);
        warn(unimplemented::cause::VALIDATION);
        return dht::global_partitioner().token_to_bytes(tok);
    }
--- a/cql3/functions/uuid_fcts.hh
+++ b/cql3/functions/uuid_fcts.hh
@@ -53,7 +53,7 @@ inline
 shared_ptr<function>
 make_uuid_fct() {
    return make_native_scalar_function<false>("uuid", uuid_type, {},
-            [] (cql_serialization_format sf, const std::vector<bytes_opt>& parameters) -> bytes_opt {
+            [] (serialization_format sf, const std::vector<bytes_opt>& parameters) -> bytes_opt {
        return {uuid_type->decompose(utils::make_random_uuid())};
    });
 }
--- a/cql3/lists.cc
+++ b/cql3/lists.cc
@@ -108,7 +108,7 @@ lists::literal::to_string() const {
 }

 lists::value
-lists::value::from_serialized(bytes_view v, list_type type, cql_serialization_format sf) {
+lists::value::from_serialized(bytes_view v, list_type type, serialization_format sf) {
    try {
        // Collections have this small hack that validate cannot be called on a serialized object,
        // but compose does the validation (so we're fine).
@@ -128,11 +128,11 @@ lists::value::from_serialized(bytes_view v, list_type type, cql_serialization_fo

 bytes_opt
 lists::value::get(const query_options& options) {
-    return get_with_protocol_version(options.get_cql_serialization_format());
+    return get_with_protocol_version(options.get_serialization_format());
 }

 bytes
-lists::value::get_with_protocol_version(cql_serialization_format sf) {
+lists::value::get_with_protocol_version(serialization_format sf) {
    // Can't use boost::indirect_iterator, because optional is not an iterator
    auto deref = [] (bytes_opt& x) { return *x; };
    return collection_type_impl::pack(
@@ -212,7 +212,7 @@ lists::marker::bind(const query_options& options) {
    if (!value) {
        return nullptr;
    } else {
-        return make_shared(value::from_serialized(*value, std::move(ltype), options.get_cql_serialization_format()));
+        return make_shared(value::from_serialized(*value, std::move(ltype), options.get_serialization_format()));
    }
 }

@@ -276,26 +276,27 @@ lists::setter_by_index::execute(mutation& m, const exploded_clustering_prefix& p
    if (!existing_list_opt) {
        throw exceptions::invalid_request_exception("Attempted to set an element on a list which is null");
    }
+    collection_mutation_view existing_list_ser = *existing_list_opt;
    auto ltype = dynamic_pointer_cast<const list_type_impl>(column.type);
-    auto&& existing_list = *existing_list_opt;
+    collection_type_impl::mutation_view existing_list = ltype->deserialize_mutation_form(existing_list_ser);
    // we verified that index is an int32_type
-    if (idx < 0 || size_t(idx) >= existing_list.size()) {
+    if (idx < 0 || size_t(idx) >= existing_list.cells.size()) {
        throw exceptions::invalid_request_exception(sprint("List index %d out of bound, list has size %d",
-                idx, existing_list.size()));
+                idx, existing_list.cells.size()));
    }

-    const bytes& eidx = existing_list[idx].key;
+    bytes_view eidx = existing_list.cells[idx].first;
    list_type_impl::mutation mut;
    mut.cells.reserve(1);
    if (!value) {
-        mut.cells.emplace_back(eidx, params.make_dead_cell());
+        mut.cells.emplace_back(to_bytes(eidx), params.make_dead_cell());
    } else {
        if (value->size() > std::numeric_limits<uint16_t>::max()) {
            throw exceptions::invalid_request_exception(
                    sprint("List value is too long. List values are limited to %d bytes but %d bytes value provided",
                            std::numeric_limits<uint16_t>::max(), value->size()));
        }
-        mut.cells.emplace_back(eidx, params.make_cell(*value));
+        mut.cells.emplace_back(to_bytes(eidx), params.make_cell(*value));
    }
    auto smut = ltype->serialize_mutation_form(mut);
    m.set_cell(prefix, column, atomic_cell_or_collection::from_collection_mutation(std::move(smut)));
@@ -338,8 +339,13 @@ lists::do_append(shared_ptr<term> t,
        if (!value) {
            m.set_cell(prefix, column, params.make_dead_cell());
        } else {
-            auto newv = list_value->get_with_protocol_version(cql_serialization_format::internal());
-            m.set_cell(prefix, column, params.make_cell(std::move(newv)));
+            auto&& to_add = list_value->_elements;
+            auto deref = [] (const bytes_opt& v) { return *v; };
+            auto&& newv = collection_mutation{list_type_impl::pack(
+                    boost::make_transform_iterator(to_add.begin(), deref),
+                    boost::make_transform_iterator(to_add.end(), deref),
+                    to_add.size(), serialization_format::internal())};
+            m.set_cell(prefix, column, atomic_cell_or_collection::from_collection_mutation(std::move(newv)));
        }
    }
 }
@@ -395,9 +401,9 @@ lists::discarder::execute(mutation& m, const exploded_clustering_prefix& prefix,
        return;
    }

-    auto&& elist = *existing_list;
+    auto&& elist = ltype->deserialize_mutation_form(*existing_list);

-    if (elist.empty()) {
+    if (elist.cells.empty()) {
        return;
    }

@@ -414,14 +420,14 @@ lists::discarder::execute(mutation& m, const exploded_clustering_prefix& prefix,
    // toDiscard will be small and keeping a list will be more efficient.
    auto&& to_discard = lvalue->_elements;
    collection_type_impl::mutation mnew;
-    for (auto&& cell : elist) {
+    for (auto&& cell : elist.cells) {
        auto have_value = [&] (bytes_view value) {
            return std::find_if(to_discard.begin(), to_discard.end(),
                                [ltype, value] (auto&& v) { return ltype->get_elements_type()->equal(*v, value); })
                                         != to_discard.end();
        };
-        if (have_value(cell.value)) {
-            mnew.cells.emplace_back(cell.key, params.make_dead_cell());
+        if (cell.second.is_live() && have_value(cell.second.value())) {
+            mnew.cells.emplace_back(bytes(cell.first.begin(), cell.first.end()), params.make_dead_cell());
        }
    }
    auto mnew_ser = ltype->serialize_mutation_form(mnew);
@@ -449,17 +455,17 @@ lists::discarder_by_index::execute(mutation& m, const exploded_clustering_prefix
    if (!column.is_static()) {
        row_key = clustering_key::from_clustering_prefix(*params._schema, prefix);
    }
-    auto&& existing_list_opt = params.get_prefetched_list(m.key(), std::move(row_key), column);
+    auto&& existing_list = params.get_prefetched_list(m.key(), std::move(row_key), column);
    int32_t idx = read_simple_exactly<int32_t>(*cvalue->_bytes);
-    if (!existing_list_opt) {
+    if (!existing_list) {
        throw exceptions::invalid_request_exception("Attempted to delete an element from a list which is null");
    }
-    auto&& existing_list = *existing_list_opt;
-    if (idx < 0 || size_t(idx) >= existing_list.size()) {
-        throw exceptions::invalid_request_exception(sprint("List index %d out of bound, list has size %d", idx, existing_list.size()));
+    auto&& deserialized = ltype->deserialize_mutation_form(*existing_list);
+    if (idx < 0 || size_t(idx) >= deserialized.cells.size()) {
+        throw exceptions::invalid_request_exception(sprint("List index %d out of bound, list has size %d", idx, deserialized.cells.size()));
    }
    collection_type_impl::mutation mut;
-    mut.cells.emplace_back(existing_list[idx].key, params.make_dead_cell());
+    mut.cells.emplace_back(to_bytes(deserialized.cells[idx].first), params.make_dead_cell());
    m.set_cell(prefix, column, ltype->serialize_mutation_form(mut));
 }

--- a/cql3/lists.hh
+++ b/cql3/lists.hh
@@ -78,9 +78,9 @@ public:
        explicit value(std::vector<bytes_opt> elements)
            : _elements(std::move(elements)) {
        }
-        static value from_serialized(bytes_view v, list_type type, cql_serialization_format sf);
+        static value from_serialized(bytes_view v, list_type type, serialization_format sf);
        virtual bytes_opt get(const query_options& options) override;
-        virtual bytes get_with_protocol_version(cql_serialization_format sf) override;
+        virtual bytes get_with_protocol_version(serialization_format sf) override;
        bool equals(shared_ptr<list_type_impl> lt, const value& v);
        virtual std::vector<bytes_opt> get_elements() override;
        virtual sstring to_string() const;
--- a/cql3/maps.cc
+++ b/cql3/maps.cc
@@ -152,7 +152,7 @@ maps::literal::to_string() const {
 }

 maps::value
-maps::value::from_serialized(bytes_view value, map_type type, cql_serialization_format sf) {
+maps::value::from_serialized(bytes_view value, map_type type, serialization_format sf) {
    try {
        // Collections have this small hack that validate cannot be called on a serialized object,
        // but compose does the validation (so we're fine).
@@ -171,11 +171,11 @@ maps::value::from_serialized(bytes_view value, map_type type, cql_serialization_

 bytes_opt
 maps::value::get(const query_options& options) {
-    return get_with_protocol_version(options.get_cql_serialization_format());
+    return get_with_protocol_version(options.get_serialization_format());
 }

 bytes
-maps::value::get_with_protocol_version(cql_serialization_format sf) {
+maps::value::get_with_protocol_version(serialization_format sf) {
    //FIXME: share code with serialize_partially_deserialized_form
    size_t len = collection_value_len(sf) * map.size() * 2 + collection_size_len(sf);
    for (auto&& e : map) {
@@ -257,7 +257,7 @@ maps::marker::bind(const query_options& options) {
                    maps::value::from_serialized(*val,
                            static_pointer_cast<const map_type_impl>(
                                    _receiver->type),
-                            options.get_cql_serialization_format())) :
+                            options.get_serialization_format())) :
            nullptr;
 }

@@ -333,7 +333,7 @@ maps::do_put(mutation& m, const exploded_clustering_prefix& prefix, const update
            m.set_cell(prefix, column, params.make_dead_cell());
        } else {
            auto v = map_type_impl::serialize_partially_deserialized_form({map_value->map.begin(), map_value->map.end()},
-                    cql_serialization_format::internal());
+                    serialization_format::internal());
            m.set_cell(prefix, column, params.make_cell(std::move(v)));
        }
    }
--- a/cql3/maps.hh
+++ b/cql3/maps.hh
@@ -81,9 +81,9 @@ public:
        value(std::map<bytes, bytes, serialized_compare> map)
            : map(std::move(map)) {
        }
-        static value from_serialized(bytes_view value, map_type type, cql_serialization_format sf);
+        static value from_serialized(bytes_view value, map_type type, serialization_format sf);
        virtual bytes_opt get(const query_options& options) override;
-        virtual bytes get_with_protocol_version(cql_serialization_format sf);
+        virtual bytes get_with_protocol_version(serialization_format sf);
        bool equals(map_type mt, const value& v);
        virtual sstring to_string() const;
    };
--- a/cql3/query_options.cc
+++ b/cql3/query_options.cc
@@ -47,7 +47,7 @@ namespace cql3 {
 thread_local const query_options::specific_options query_options::specific_options::DEFAULT{-1, {}, {}, api::missing_timestamp};

 thread_local query_options query_options::DEFAULT{db::consistency_level::ONE, std::experimental::nullopt,
-    {}, false, query_options::specific_options::DEFAULT, cql_serialization_format::latest()};
+    {}, false, query_options::specific_options::DEFAULT, version::native_protocol(), serialization_format::use_32_bit()};

 query_options::query_options(db::consistency_level consistency,
                             std::experimental::optional<std::vector<sstring_view>> names,
@@ -55,14 +55,16 @@ query_options::query_options(db::consistency_level consistency,
                             std::vector<bytes_view_opt> value_views,
                             bool skip_metadata,
                             specific_options options,
-                             cql_serialization_format sf)
+                             int32_t protocol_version,
+                             serialization_format sf)
    : _consistency(consistency)
    , _names(std::move(names))
    , _values(std::move(values))
    , _value_views(std::move(value_views))
    , _skip_metadata(skip_metadata)
    , _options(std::move(options))
-    , _cql_serialization_format(sf)
+    , _protocol_version(protocol_version)
+    , _serialization_format(sf)
 {
 }

@@ -71,7 +73,8 @@ query_options::query_options(db::consistency_level consistency,
                             std::vector<bytes_view_opt> value_views,
                             bool skip_metadata,
                             specific_options options,
-                             cql_serialization_format sf)
+                             int32_t protocol_version,
+                             serialization_format sf)
    : query_options(
          consistency,
          std::move(names),
@@ -79,6 +82,7 @@ query_options::query_options(db::consistency_level consistency,
          std::move(value_views),
          skip_metadata,
          std::move(options),
+          protocol_version,
          sf
      )
 {
@@ -90,7 +94,7 @@ query_options::query_options(query_options&& o, std::vector<std::vector<bytes_vi
    std::vector<query_options> tmp;
    tmp.reserve(value_views.size());
    std::transform(value_views.begin(), value_views.end(), std::back_inserter(tmp), [this](auto& vals) {
-        return query_options(_consistency, {}, vals, _skip_metadata, _options, _cql_serialization_format);
+        return query_options(_consistency, {}, vals, _skip_metadata, _options, _protocol_version, _serialization_format);
    });
    _batch_options = std::move(tmp);
 }
@@ -103,7 +107,8 @@ query_options::query_options(db::consistency_level cl, std::vector<bytes_opt> va
          {},
          false,
          query_options::specific_options::DEFAULT,
-          cql_serialization_format::latest()
+          version::native_protocol(),
+          serialization_format::use_32_bit()
      )
 {
    for (auto&& value : _values) {
@@ -173,12 +178,12 @@ api::timestamp_type query_options::get_timestamp(service::query_state& state) co

 int query_options::get_protocol_version() const
 {
-    return _cql_serialization_format.protocol_version();
+    return _protocol_version;
 }

-cql_serialization_format query_options::get_cql_serialization_format() const
+serialization_format query_options::get_serialization_format() const
 {
-    return _cql_serialization_format;
+    return _serialization_format;
 }

 const query_options::specific_options& query_options::get_specific_options() const
--- a/cql3/query_options.hh
+++ b/cql3/query_options.hh
@@ -48,7 +48,7 @@
 #include "service/pager/paging_state.hh"
 #include "cql3/column_specification.hh"
 #include "cql3/column_identifier.hh"
-#include "cql_serialization_format.hh"
+#include "serialization_format.hh"

 namespace cql3 {

@@ -74,7 +74,8 @@ private:
    mutable std::vector<std::vector<int8_t>> _temporaries;
    const bool _skip_metadata;
    const specific_options _options;
-    cql_serialization_format _cql_serialization_format;
+    const int32_t _protocol_version; // transient
+    serialization_format _serialization_format;
    std::experimental::optional<std::vector<query_options>> _batch_options;
 public:
    query_options(query_options&&) = default;
@@ -86,19 +87,22 @@ public:
                           std::vector<bytes_view_opt> value_views,
                           bool skip_metadata,
                           specific_options options,
-                           cql_serialization_format sf);
+                           int32_t protocol_version,
+                           serialization_format sf);
    explicit query_options(db::consistency_level consistency,
                           std::experimental::optional<std::vector<sstring_view>> names,
                           std::vector<bytes_view_opt> value_views,
                           bool skip_metadata,
                           specific_options options,
-                           cql_serialization_format sf);
+                           int32_t protocol_version,
+                           serialization_format sf);

    explicit query_options(db::consistency_level consistency,
                           std::vector<std::vector<bytes_view_opt>> value_views,
                           bool skip_metadata,
                           specific_options options,
-                           cql_serialization_format sf);
+                           int32_t protocol_version,
+                           serialization_format sf);

    // Batch query_options constructor
    explicit query_options(query_options&&, std::vector<std::vector<bytes_view_opt>> value_views);
@@ -127,7 +131,7 @@ public:
     * a native protocol request (i.e. it's been allocated locally or by CQL-over-thrift).
     */
    int get_protocol_version() const;
-    cql_serialization_format get_cql_serialization_format() const;
+    serialization_format get_serialization_format() const;
    // Mainly for the sake of BatchQueryOptions
    const specific_options& get_specific_options() const;
    const query_options& for_statement(size_t i) const;
--- a/cql3/result_set.hh
+++ b/cql3/result_set.hh
@@ -287,13 +287,6 @@ public:

 };

-inline ::shared_ptr<cql3::metadata> make_empty_metadata()
-{
-    auto result = ::make_shared<cql3::metadata>(std::vector<::shared_ptr<cql3::column_specification>>{});
-    result->set_skip_metadata();
-    return result;
-}
-
 class result_set {
 #if 0
    private static final ColumnIdentifier COUNT_COLUMN = new ColumnIdentifier("count", false);
--- a/cql3/selection/aggregate_function_selector.hh
+++ b/cql3/selection/aggregate_function_selector.hh
@@ -53,7 +53,7 @@ public:
        return true;
    }

-    virtual void add_input(cql_serialization_format sf, result_set_builder& rs) override {
+    virtual void add_input(serialization_format sf, result_set_builder& rs) override {
        // Aggregation of aggregation is not supported
        size_t m = _arg_selectors.size();
        for (size_t i = 0; i < m; ++i) {
@@ -65,7 +65,7 @@ public:
        _aggregate->add_input(sf, _args);
    }

-    virtual bytes_opt get_output(cql_serialization_format sf) override {
+    virtual bytes_opt get_output(serialization_format sf) override {
        return _aggregate->compute(sf);
    }

--- a/cql3/selection/field_selector.hh
+++ b/cql3/selection/field_selector.hh
@@ -87,11 +87,11 @@ public:
        return false;
    }

-    virtual void add_input(cql_serialization_format sf, result_set_builder& rs) override {
+    virtual void add_input(serialization_format sf, result_set_builder& rs) override {
        _selected->add_input(sf, rs);
    }

-    virtual bytes_opt get_output(cql_serialization_format sf) override {
+    virtual bytes_opt get_output(serialization_format sf) override {
        auto&& value = _selected->get_output(sf);
        if (!value) {
            return std::experimental::nullopt;
--- a/cql3/selection/scalar_function_selector.hh
+++ b/cql3/selection/scalar_function_selector.hh
@@ -57,7 +57,7 @@ public:
        return _arg_selectors[0]->is_aggregate();
    }

-    virtual void add_input(cql_serialization_format sf, result_set_builder& rs) override {
+    virtual void add_input(serialization_format sf, result_set_builder& rs) override {
        size_t m = _arg_selectors.size();
        for (size_t i = 0; i < m; ++i) {
            auto&& s = _arg_selectors[i];
@@ -68,7 +68,7 @@ public:
    virtual void reset() override {
    }

-    virtual bytes_opt get_output(cql_serialization_format sf) override {
+    virtual bytes_opt get_output(serialization_format sf) override {
        size_t m = _arg_selectors.size();
        for (size_t i = 0; i < m; ++i) {
            auto&& s = _arg_selectors[i];
--- a/cql3/selection/selection.cc
+++ b/cql3/selection/selection.cc
@@ -63,8 +63,7 @@ selection::selection(schema_ptr schema,
 query::partition_slice::option_set selection::get_query_options() {
    query::partition_slice::option_set opts;

-    opts.set_if<query::partition_slice::option::send_timestamp>(_collect_timestamps);
-    opts.set_if<query::partition_slice::option::send_expiry>(_collect_TTLs);
+    opts.set_if<query::partition_slice::option::send_timestamp_and_expiry>(_collect_timestamps || _collect_TTLs);

    opts.set_if<query::partition_slice::option::send_partition_key>(
        std::any_of(_columns.begin(), _columns.end(),
@@ -113,11 +112,11 @@ protected:
            _current.clear();
        }

-        virtual std::vector<bytes_opt> get_output_row(cql_serialization_format sf) override {
+        virtual std::vector<bytes_opt> get_output_row(serialization_format sf) override {
            return std::move(_current);
        }

-        virtual void add_input_row(cql_serialization_format sf, result_set_builder& rs) override {
+        virtual void add_input_row(serialization_format sf, result_set_builder& rs) override {
            _current = std::move(*rs.current);
        }

@@ -181,7 +180,7 @@ protected:
            return _factories->contains_only_aggregate_functions();
        }

-        virtual std::vector<bytes_opt> get_output_row(cql_serialization_format sf) override {
+        virtual std::vector<bytes_opt> get_output_row(serialization_format sf) override {
            std::vector<bytes_opt> output_row;
            output_row.reserve(_selectors.size());
            for (auto&& s : _selectors) {
@@ -190,7 +189,7 @@ protected:
            return output_row;
        }

-        virtual void add_input_row(cql_serialization_format sf, result_set_builder& rs) {
+        virtual void add_input_row(serialization_format sf, result_set_builder& rs) {
            for (auto&& s : _selectors) {
                s->add_input(sf, rs);
            }
@@ -253,11 +252,11 @@ selection::collect_metadata(schema_ptr schema, const std::vector<::shared_ptr<ra
    return r;
 }

-result_set_builder::result_set_builder(const selection& s, db_clock::time_point now, cql_serialization_format sf)
+result_set_builder::result_set_builder(const selection& s, db_clock::time_point now, serialization_format sf)
    : _result_set(std::make_unique<result_set>(::make_shared<metadata>(*(s.get_result_metadata()))))
    , _selectors(s.new_selectors())
    , _now(now)
-    , _cql_serialization_format(sf)
+    , _serialization_format(sf)
 {
    if (s._collect_timestamps) {
        _timestamps.resize(s._columns.size(), 0);
@@ -296,16 +295,17 @@ void result_set_builder::add(const column_definition& def, const query::result_a
    }
 }

-void result_set_builder::add_collection(const column_definition& def, bytes_view c) {
-    current->emplace_back(to_bytes(c));
+void result_set_builder::add(const column_definition& def, collection_mutation_view c) {
+    auto&& ctype = static_cast<const collection_type_impl*>(def.type.get());
+    current->emplace_back(ctype->to_value(c, _serialization_format));
    // timestamps, ttls meaningless for collections
 }

 void result_set_builder::new_row() {
    if (current) {
-        _selectors->add_input_row(_cql_serialization_format, *this);
+        _selectors->add_input_row(_serialization_format, *this);
        if (!_selectors->is_aggregate()) {
-            _result_set->add_row(_selectors->get_output_row(_cql_serialization_format));
+            _result_set->add_row(_selectors->get_output_row(_serialization_format));
            _selectors->reset();
        }
        current->clear();
@@ -319,13 +319,13 @@ void result_set_builder::new_row() {

 std::unique_ptr<result_set> result_set_builder::build() {
    if (current) {
-        _selectors->add_input_row(_cql_serialization_format, *this);
-        _result_set->add_row(_selectors->get_output_row(_cql_serialization_format));
+        _selectors->add_input_row(_serialization_format, *this);
+        _result_set->add_row(_selectors->get_output_row(_serialization_format));
        _selectors->reset();
        current = std::experimental::nullopt;
    }
    if (_result_set->empty() && _selectors->is_aggregate()) {
-        _result_set->add_row(_selectors->get_output_row(_cql_serialization_format));
+        _result_set->add_row(_selectors->get_output_row(_serialization_format));
    }
    return std::move(_result_set);
 }
@@ -344,7 +344,7 @@ void result_set_builder::visitor::add_value(const column_definition& def,
            _builder.add_empty();
            return;
        }
-        _builder.add_collection(def, *cell);
+        _builder.add(def, *cell);
    } else {
        auto cell = i.next_atomic_cell();
        if (!cell) {
--- a/cql3/selection/selection.hh
+++ b/cql3/selection/selection.hh
@@ -69,9 +69,9 @@ public:
    * @param rs the <code>ResultSetBuilder</code>
    * @throws InvalidRequestException
    */
-    virtual void add_input_row(cql_serialization_format sf, result_set_builder& rs) = 0;
+    virtual void add_input_row(serialization_format sf, result_set_builder& rs) = 0;

-    virtual std::vector<bytes_opt> get_output_row(cql_serialization_format sf) = 0;
+    virtual std::vector<bytes_opt> get_output_row(serialization_format sf) = 0;

    virtual void reset() = 0;
 };
@@ -236,13 +236,13 @@ private:
    std::vector<api::timestamp_type> _timestamps;
    std::vector<int32_t> _ttls;
    const db_clock::time_point _now;
-    cql_serialization_format _cql_serialization_format;
+    serialization_format _serialization_format;
 public:
-    result_set_builder(const selection& s, db_clock::time_point now, cql_serialization_format sf);
+    result_set_builder(const selection& s, db_clock::time_point now, serialization_format sf);
    void add_empty();
    void add(bytes_opt value);
    void add(const column_definition& def, const query::result_atomic_cell_view& c);
-    void add_collection(const column_definition& def, bytes_view c);
+    void add(const column_definition& def, collection_mutation_view c);
    void new_row();
    std::unique_ptr<result_set> build();
    api::timestamp_type timestamp_of(size_t idx);
--- a/cql3/selection/selector.hh
+++ b/cql3/selection/selector.hh
@@ -71,7 +71,7 @@ public:
     * @param rs the <code>result_set_builder</code>
     * @throws InvalidRequestException if a problem occurs while add the input value
     */
-    virtual void add_input(cql_serialization_format sf, result_set_builder& rs) = 0;
+    virtual void add_input(serialization_format sf, result_set_builder& rs) = 0;

    /**
     * Returns the selector output.
@@ -80,7 +80,7 @@ public:
     * @return the selector output
     * @throws InvalidRequestException if a problem occurs while computing the output value
     */
-    virtual bytes_opt get_output(cql_serialization_format sf) = 0;
+    virtual bytes_opt get_output(serialization_format sf) = 0;

    /**
     * Returns the <code>selector</code> output type.
--- a/cql3/selection/simple_selector.hh
+++ b/cql3/selection/simple_selector.hh
@@ -88,12 +88,12 @@ public:
        , _type(type)
    { }

-    virtual void add_input(cql_serialization_format sf, result_set_builder& rs) override {
+    virtual void add_input(serialization_format sf, result_set_builder& rs) override {
        // TODO: can we steal it?
        _current = (*rs.current)[_idx];
    }

-    virtual bytes_opt get_output(cql_serialization_format sf) override {
+    virtual bytes_opt get_output(serialization_format sf) override {
        return std::move(_current);
    }

--- a/cql3/selection/writetime_or_ttl_selector.hh
+++ b/cql3/selection/writetime_or_ttl_selector.hh
@@ -86,7 +86,7 @@ public:
        return make_shared<wtots_factory>(std::move(column_name), idx, is_writetime);
    }

-    virtual void add_input(cql_serialization_format sf, result_set_builder& rs) override {
+    virtual void add_input(serialization_format sf, result_set_builder& rs) override {
        if (_is_writetime) {
            int64_t ts = rs.timestamp_of(_idx);
            if (ts != api::missing_timestamp) {
@@ -108,7 +108,7 @@ public:
        }
    }

-    virtual bytes_opt get_output(cql_serialization_format sf) override {
+    virtual bytes_opt get_output(serialization_format sf) override {
        return _current;
    }

--- a/cql3/sets.cc
+++ b/cql3/sets.cc
@@ -120,7 +120,7 @@ sets::literal::to_string() const {
 }

 sets::value
-sets::value::from_serialized(bytes_view v, set_type type, cql_serialization_format sf) {
+sets::value::from_serialized(bytes_view v, set_type type, serialization_format sf) {
    try {
        // Collections have this small hack that validate cannot be called on a serialized object,
        // but compose does the validation (so we're fine).
@@ -138,11 +138,11 @@ sets::value::from_serialized(bytes_view v, set_type type, cql_serialization_form

 bytes_opt
 sets::value::get(const query_options& options) {
-    return get_with_protocol_version(options.get_cql_serialization_format());
+    return get_with_protocol_version(options.get_serialization_format());
 }

 bytes
-sets::value::get_with_protocol_version(cql_serialization_format sf) {
+sets::value::get_with_protocol_version(serialization_format sf) {
    return collection_type_impl::pack(_elements.begin(), _elements.end(),
            _elements.size(), sf);
 }
@@ -215,7 +215,7 @@ sets::marker::bind(const query_options& options) {
        return nullptr;
    } else {
        auto as_set_type = static_pointer_cast<const set_type_impl>(_receiver->type);
-        return make_shared(value::from_serialized(*value, as_set_type, options.get_cql_serialization_format()));
+        return make_shared(value::from_serialized(*value, as_set_type, options.get_serialization_format()));
    }
 }

@@ -262,7 +262,7 @@ sets::adder::do_add(mutation& m, const exploded_clustering_prefix& row_key, cons
        // for frozen sets, we're overwriting the whole cell
        auto v = set_type->serialize_partially_deserialized_form(
                {set_value->_elements.begin(), set_value->_elements.end()},
-                cql_serialization_format::internal());
+                serialization_format::internal());
        m.set_cell(row_key, column, params.make_cell(std::move(v)));
    } else {
        m.set_cell(row_key, column, params.make_dead_cell());
--- a/cql3/sets.hh
+++ b/cql3/sets.hh
@@ -78,9 +78,9 @@ public:
        value(std::set<bytes, serialized_compare> elements)
                : _elements(std::move(elements)) {
        }
-        static value from_serialized(bytes_view v, set_type type, cql_serialization_format sf);
+        static value from_serialized(bytes_view v, set_type type, serialization_format sf);
        virtual bytes_opt get(const query_options& options) override;
-        virtual bytes get_with_protocol_version(cql_serialization_format sf) override;
+        virtual bytes get_with_protocol_version(serialization_format sf) override;
        bool equals(set_type st, const value& v);
        virtual sstring to_string() const override;
    };
--- a/cql3/statements/create_table_statement.cc
+++ b/cql3/statements/create_table_statement.cc
@@ -44,7 +44,6 @@
 #include <regex>

 #include <boost/range/adaptor/map.hpp>
-#include <boost/range/algorithm/adjacent_find.hpp>

 #include "cql3/statements/create_table_statement.hh"

@@ -174,12 +173,13 @@ create_table_statement::raw_statement::raw_statement(::shared_ptr<cf_name> name,
        throw exceptions::invalid_request_exception(sprint("Table names shouldn't be more than %d characters long (got \"%s\")", schema::NAME_LENGTH, cf_name.c_str()));
    }

-    // Check for duplicate column names
-    auto i = boost::range::adjacent_find(_defined_names, [] (auto&& e1, auto&& e2) {
-        return e1->text() == e2->text();
-    });
-    if (i != _defined_names.end()) {
-        throw exceptions::invalid_request_exception(sprint("Multiple definition of identifier %s", (*i)->text()));
+    for (auto&& entry : _defined_names) {
+        auto c = std::count_if(_defined_names.begin(), _defined_names.end(), [&entry] (auto e) {
+            return entry->text() == e->text();
+        });
+        if (c > 1) {
+            throw exceptions::invalid_request_exception(sprint("Multiple definition of identifier %s", entry->text().c_str()));
+        }
    }

    properties->validate();
--- a/cql3/statements/create_table_statement.hh
+++ b/cql3/statements/create_table_statement.hh
@@ -51,7 +51,6 @@

 #include "core/shared_ptr.hh"

-#include <seastar/util/indirect.hh>
 #include <unordered_map>
 #include <utility>
 #include <vector>
@@ -140,8 +139,7 @@ private:
    create_table_statement::column_set_type _static_columns;

    bool _use_compact_storage = false;
-    std::multiset<::shared_ptr<column_identifier>,
-            indirect_less<::shared_ptr<column_identifier>, column_identifier::text_comparator>> _defined_names;
+    std::multiset<::shared_ptr<column_identifier>> _defined_names;
    bool _if_not_exists;
 public:
    raw_statement(::shared_ptr<cf_name> name, bool if_not_exists);
--- a/cql3/statements/create_type_statement.cc
+++ b/cql3/statements/create_type_statement.cc
@@ -1,156 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright 2016 ScyllaDB
- */
-
-/*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include "cql3/statements/create_type_statement.hh"
-
-namespace cql3 {
-
-namespace statements {
-
-create_type_statement::create_type_statement(const ut_name& name, bool if_not_exists)
-    : _name{name}
-    , _if_not_exists{if_not_exists}
-{
-}
-
-void create_type_statement::prepare_keyspace(const service::client_state& state)
-{
-    if (!_name.has_keyspace()) {
-        _name.set_keyspace(state.get_keyspace());
-    }
-}
-
-void create_type_statement::add_definition(::shared_ptr<column_identifier> name, ::shared_ptr<cql3_type::raw> type)
-{
-    _column_names.emplace_back(name);
-    _column_types.emplace_back(type);
-}
-
-void create_type_statement::check_access(const service::client_state& state)
-{
-    warn(unimplemented::cause::PERMISSIONS);
-#if 0
-    state.hasKeyspaceAccess(keyspace(), Permission.CREATE);
-#endif
-}
-
-void create_type_statement::validate(distributed<service::storage_proxy>&, const service::client_state& state)
-{
-#if 0
-    KSMetaData ksm = Schema.instance.getKSMetaData(name.getKeyspace());
-    if (ksm == null)
-        throw new InvalidRequestException(String.format("Cannot add type in unknown keyspace %s", name.getKeyspace()));
-
-    if (ksm.userTypes.getType(name.getUserTypeName()) != null && !ifNotExists)
-        throw new InvalidRequestException(String.format("A user type of name %s already exists", name));
-
-    for (CQL3Type.Raw type : columnTypes)
-        if (type.isCounter())
-            throw new InvalidRequestException("A user type cannot contain counters");
-#endif
-}
-
-#if 0
-public static void checkForDuplicateNames(UserType type) throws InvalidRequestException
-{
-    for (int i = 0; i < type.size() - 1; i++)
-    {
-        ByteBuffer fieldName = type.fieldName(i);
-        for (int j = i+1; j < type.size(); j++)
-        {
-            if (fieldName.equals(type.fieldName(j)))
-                throw new InvalidRequestException(String.format("Duplicate field name %s in type %s",
-                                                                UTF8Type.instance.getString(fieldName),
-                                                                UTF8Type.instance.getString(type.name)));
-        }
-    }
-}
-#endif
-
-shared_ptr<transport::event::schema_change> create_type_statement::change_event()
-{
-    using namespace transport;
-
-    return make_shared<transport::event::schema_change>(event::schema_change::change_type::CREATED,
-                                                        event::schema_change::target_type::TYPE,
-                                                        keyspace(),
-                                                        _name.get_string_type_name());
-}
-
-const sstring& create_type_statement::keyspace() const
-{
-    return _name.get_keyspace();
-}
-
-#if 0
-private UserType createType() throws InvalidRequestException
-{
-    List<ByteBuffer> names = new ArrayList<>(columnNames.size());
-    for (ColumnIdentifier name : columnNames)
-        names.add(name.bytes);
-
-    List<AbstractType<?>> types = new ArrayList<>(columnTypes.size());
-    for (CQL3Type.Raw type : columnTypes)
-        types.add(type.prepare(keyspace()).getType());
-
-    return new UserType(name.getKeyspace(), name.getUserTypeName(), names, types);
-}
-#endif
-
-future<bool> create_type_statement::announce_migration(distributed<service::storage_proxy>& proxy, bool is_local_only)
-{
-    throw std::runtime_error("User-defined types are not supported yet");
-#if 0
-   KSMetaData ksm = Schema.instance.getKSMetaData(name.getKeyspace());
-   assert ksm != null; // should haven't validate otherwise
-
-   // Can happen with ifNotExists
-   if (ksm.userTypes.getType(name.getUserTypeName()) != null)
-       return false;
-
-   UserType type = createType();
-   checkForDuplicateNames(type);
-   MigrationManager.announceNewType(type, isLocalOnly);
-   return true;
-#endif
-}
-
-}
-
-}
--- a/cql3/statements/create_type_statement.hh
+++ b/cql3/statements/create_type_statement.hh
@@ -1,75 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright 2016 ScyllaDB
- */
-
-/*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#pragma once
-
-#include "cql3/statements/schema_altering_statement.hh"
-#include "cql3/cql3_type.hh"
-#include "cql3/ut_name.hh"
-
-namespace cql3 {
-
-namespace statements {
-
-class create_type_statement : public schema_altering_statement {
-    ut_name _name;
-    std::vector<::shared_ptr<column_identifier>> _column_names;
-    std::vector<::shared_ptr<cql3_type::raw>> _column_types;
-    bool _if_not_exists;
-public:
-    create_type_statement(const ut_name& name, bool if_not_exists);
-
-    virtual void prepare_keyspace(const service::client_state& state) override;
-
-    void add_definition(::shared_ptr<column_identifier> name, ::shared_ptr<cql3_type::raw> type);
-
-    virtual void check_access(const service::client_state& state) override;
-
-    virtual void validate(distributed<service::storage_proxy>&, const service::client_state& state) override;
-
-    virtual shared_ptr<transport::event::schema_change> change_event() override;
-
-    virtual const sstring& keyspace() const override;
-
-    virtual future<bool> announce_migration(distributed<service::storage_proxy>& proxy, bool is_local_only) override;
-};
-
-}
-
-}
--- a/cql3/statements/modification_statement.cc
+++ b/cql3/statements/modification_statement.cc
@@ -189,20 +189,13 @@ class prefetch_data_builder {
    schema_ptr _schema;
    std::experimental::optional<partition_key> _pkey;
 private:
-    void add_cell(update_parameters::prefetch_data::row& cells, const column_definition& def, const std::experimental::optional<bytes_view>& cell) {
+    void add_cell(update_parameters::prefetch_data::row& cells, const column_definition& def, const std::experimental::optional<collection_mutation_view>& cell) {
        if (cell) {
            auto ctype = static_pointer_cast<const collection_type_impl>(def.type);
            if (!ctype->is_multi_cell()) {
                throw std::logic_error(sprint("cannot prefetch frozen collection: %s", def.name_as_text()));
            }
-            auto map_type = map_type_impl::get_instance(ctype->name_comparator(), ctype->value_comparator(), true);
-            update_parameters::prefetch_data::cell_list list;
-            // FIXME: Iterate over a range instead of fully exploded collection
-            auto dv = map_type->deserialize(*cell);
-            for (auto&& el : value_cast<map_type_impl::native_type>(dv)) {
-                list.emplace_back(update_parameters::prefetch_data::cell{el.first.serialize(), el.second.serialize()});
-            }
-            cells.emplace(def.id, std::move(list));
+            cells.emplace(def.id, collection_mutation{*cell});
        }
    };
 public:
@@ -282,8 +275,7 @@ modification_statement::read_required_rows(
            std::move(regular_cols),
            query::partition_slice::option_set::of<
                query::partition_slice::option::send_partition_key,
-                query::partition_slice::option::send_clustering_key,
-                query::partition_slice::option::collections_as_maps>());
+                query::partition_slice::option::send_clustering_key>());
    std::vector<query::partition_range> pr;
    for (auto&& pk : *keys) {
        pr.emplace_back(dht::global_partitioner().decorate_key(*s, pk));
--- a/cql3/statements/select_statement.cc
+++ b/cql3/statements/select_statement.cc
@@ -117,11 +117,6 @@ select_statement::for_selection(schema_ptr schema, ::shared_ptr<selection::selec
        ::shared_ptr<term>{});
 }

-::shared_ptr<cql3::metadata> select_statement::get_result_metadata() const {
-    // FIXME: COUNT needs special result metadata handling.
-    return _selection->get_result_metadata();
-}
-
 uint32_t select_statement::get_bound_terms() {
    return _bound_terms;
 }
@@ -175,7 +170,7 @@ select_statement::make_partition_slice(const query_options& options) {
    if (_parameters->is_distinct()) {
        _opts.set(query::partition_slice::option::distinct);
        return query::partition_slice({ query::clustering_range::make_open_ended_both_sides() },
-            std::move(static_columns), {}, _opts, nullptr, options.get_cql_serialization_format());
+            std::move(static_columns), {}, _opts);
    }

    auto bounds = _restrictions->get_clustering_bounds(options);
@@ -184,7 +179,7 @@ select_statement::make_partition_slice(const query_options& options) {
        std::reverse(bounds.begin(), bounds.end());
    }
    return query::partition_slice(std::move(bounds),
-        std::move(static_columns), std::move(regular_columns), _opts, nullptr, options.get_cql_serialization_format());
+        std::move(static_columns), std::move(regular_columns), _opts);
 }

 int32_t select_statement::get_limit(const query_options& options) const {
@@ -251,7 +246,7 @@ select_statement::execute(distributed<service::storage_proxy>& proxy, service::q
    if (aggregate) {
        return do_with(
                cql3::selection::result_set_builder(*_selection, now,
-                        options.get_cql_serialization_format()),
+                        options.get_serialization_format()),
                [p, page_size, now](auto& builder) {
                    return do_until([p] {return p->is_exhausted();},
                            [p, &builder, page_size, now] {
@@ -343,8 +338,8 @@ shared_ptr<transport::messages::result_message> select_statement::process_result
        db_clock::time_point now) {

    cql3::selection::result_set_builder builder(*_selection, now,
-            options.get_cql_serialization_format());
-    query::result_view::consume(*results, cmd->slice,
+            options.get_serialization_format());
+    query::result_view::consume(results->buf(), cmd->slice,
            cql3::selection::result_set_builder::visitor(builder, *_schema,
                    *_selection));
    auto rs = builder.build();
--- a/cql3/statements/select_statement.hh
+++ b/cql3/statements/select_statement.hh
@@ -121,7 +121,6 @@ public:
    static ::shared_ptr<select_statement> for_selection(
        schema_ptr schema, ::shared_ptr<selection::selection> selection);

-    ::shared_ptr<cql3::metadata> get_result_metadata() const;
    virtual uint32_t get_bound_terms() override;
    virtual void check_access(const service::client_state& state) override;
    virtual void validate(distributed<service::storage_proxy>&, const service::client_state& state) override;
--- a/cql3/statements/update_statement.cc
+++ b/cql3/statements/update_statement.cc
@@ -78,7 +78,7 @@ void update_statement::add_update_for_key(mutation& m, const exploded_clustering
        // If there are static columns, there also must be clustering columns, in which
        // case empty prefix can only refer to the static row.
        bool is_static_prefix = s->has_static_columns() && !prefix;
-        if (type == statement_type::INSERT && !is_static_prefix && s->is_cql3_table()) {
+        if (type == statement_type::INSERT && !is_static_prefix) {
            auto& row = m.partition().clustered_row(clustering_key::from_clustering_prefix(*s, prefix));
            row.apply(row_marker(params.timestamp(), params.ttl(), params.expiry()));
        }
@@ -137,17 +137,19 @@ update_statement::parsed_insert::prepare_internal(database& db, schema_ptr schem
        throw exceptions::invalid_request_exception("No columns provided to INSERT");
    }

-    std::unordered_set<bytes> column_ids;
    for (size_t i = 0; i < _column_names.size(); i++) {
        auto id = _column_names[i]->prepare_column_identifier(schema);
        auto def = get_column_definition(schema, *id);
        if (!def) {
            throw exceptions::invalid_request_exception(sprint("Unknown identifier %s", *id));
        }
-        if (column_ids.count(id->name())) {
-            throw exceptions::invalid_request_exception(sprint("Multiple definitions found for column %s", *id));
+
+        for (size_t j = 0; j < i; j++) {
+            auto other_id = _column_names[j]->prepare_column_identifier(schema);
+            if (*id == *other_id) {
+                throw exceptions::invalid_request_exception(sprint("Multiple definitions found for column %s", *id));
+            }
        }
-        column_ids.emplace(id->name());

        auto&& value = _column_values[i];

--- a/cql3/term.hh
+++ b/cql3/term.hh
@@ -205,7 +205,7 @@ class collection_terminal {
 public:
    virtual ~collection_terminal() {}
    /** Gets the value of the collection when serialized with the given protocol version format */
-    virtual bytes get_with_protocol_version(cql_serialization_format sf) = 0;
+    virtual bytes get_with_protocol_version(serialization_format sf) = 0;
 };

 /**
--- a/cql3/tuples.hh
+++ b/cql3/tuples.hh
@@ -202,12 +202,12 @@ public:
                buffers[i] = to_bytes_opt(_elements[i]->bind_and_get(options));
                // Inside tuples, we must force the serialization of collections to v3 whatever protocol
                // version is in use since we're going to store directly that serialized value.
-                if (options.get_cql_serialization_format() != cql_serialization_format::internal()
+                if (options.get_serialization_format() != serialization_format::internal()
                        && _type->type(i)->is_collection()) {
                    if (buffers[i]) {
                        buffers[i] = static_pointer_cast<const collection_type_impl>(_type->type(i))->reserialize(
-                                options.get_cql_serialization_format(),
-                                cql_serialization_format::internal(),
+                                options.get_serialization_format(),
+                                serialization_format::internal(),
                                bytes_view(*buffers[i]));
                    }
                }
@@ -251,7 +251,7 @@ public:
            try {
                // Collections have this small hack that validate cannot be called on a serialized object,
                // but the deserialization does the validation (so we're fine).
-                auto l = value_cast<list_type_impl::native_type>(type->deserialize(value, options.get_cql_serialization_format()));
+                auto l = value_cast<list_type_impl::native_type>(type->deserialize(value, options.get_serialization_format()));
                auto ttype = dynamic_pointer_cast<const tuple_type_impl>(type->get_elements_type());
                assert(ttype);

--- a/cql3/update_parameters.cc
+++ b/cql3/update_parameters.cc
@@ -43,7 +43,7 @@

 namespace cql3 {

-const update_parameters::prefetch_data::cell_list*
+std::experimental::optional<collection_mutation_view>
 update_parameters::get_prefetched_list(
    partition_key pkey,
    std::experimental::optional<clustering_key> ckey,
@@ -63,7 +63,7 @@ update_parameters::get_prefetched_list(
    if (j == row.end()) {
        return {};
    }
-    return &j->second;
+    return {j->second};
 }

 update_parameters::prefetch_data::prefetch_data(schema_ptr schema)
--- a/cql3/update_parameters.hh
+++ b/cql3/update_parameters.hh
@@ -88,12 +88,7 @@ public:
                       && bool(k1.second) == bool(k2.second) && (!k1.second || ck_eq(*k1.second, *k2.second));
            }
        };
-        struct cell {
-            bytes key;
-            bytes value;
-        };
-        using cell_list = std::vector<cell>;
-        using row = std::unordered_map<column_id, cell_list>;
+        using row = std::unordered_map<column_id, collection_mutation>;
    public:
        std::unordered_map<key, row, key_hashing, key_equality> rows;
        schema_ptr schema;
@@ -190,7 +185,7 @@ public:
        return _timestamp;
    }

-    const prefetch_data::cell_list*
+    std::experimental::optional<collection_mutation_view>
    get_prefetched_list(
        partition_key pkey,
        std::experimental::optional<clustering_key> ckey,
--- a/cql3/user_types.cc
+++ b/cql3/user_types.cc
@@ -161,15 +161,15 @@ void user_types::delayed_value::collect_marker_specification(shared_ptr<variable
 }

 std::vector<bytes_opt> user_types::delayed_value::bind_internal(const query_options& options) {
-    auto sf = options.get_cql_serialization_format();
+    auto sf = options.get_serialization_format();
    std::vector<bytes_opt> buffers;
    for (size_t i = 0; i < _type->size(); ++i) {
        buffers.push_back(to_bytes_opt(_values[i]->bind_and_get(options)));
        // Inside UDT values, we must force the serialization of collections to v3 whatever protocol
        // version is in use since we're going to store directly that serialized value.
-        if (!sf.collection_format_unchanged() && _type->field_type(i)->is_collection() && buffers.back()) {
+        if (sf != serialization_format::use_32_bit() && _type->field_type(i)->is_collection() && buffers.back()) {
            auto&& ctype = static_pointer_cast<const collection_type_impl>(_type->field_type(i));
-            buffers.back() = ctype->reserialize(sf, cql_serialization_format::latest(), bytes_view(*buffers.back()));
+            buffers.back() = ctype->reserialize(sf, serialization_format::use_32_bit(), bytes_view(*buffers.back()));
        }
    }
    return buffers;
--- a/cql3/ut_name.cc
+++ b/cql3/ut_name.cc
@@ -56,7 +56,7 @@ void ut_name::set_keyspace(sstring keyspace) {
    _ks_name = std::experimental::optional<sstring>{keyspace};
 }

-const sstring& ut_name::get_keyspace() const {
+sstring ut_name::get_keyspace() const {
    return _ks_name.value();
 }

--- a/cql3/ut_name.hh
+++ b/cql3/ut_name.hh
@@ -58,7 +58,7 @@ public:

    void set_keyspace(sstring keyspace);

-    const sstring& get_keyspace() const;
+    sstring get_keyspace() const;

    bytes get_user_type_name() const;

--- a/cql_serialization_format.hh
+++ b/cql_serialization_format.hh
@@ -1,52 +0,0 @@
-/*
- * Copyright (C) 2015 Cloudius Systems, Ltd.
- */
-
-/*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#pragma once
-
-#include <iostream>
-
-using cql_protocol_version_type = uint8_t;
-
-// Abstraction of transport protocol-dependent serialization format
-// Protocols v1, v2 used 16 bits for collection sizes, while v3 and
-// above use 32 bits.  But letting every bit of the code know what
-// transport protocol we're using (and in some cases, we aren't using
-// any transport -- it's for internal storage) is bad, so abstract it
-// away here.
-
-class cql_serialization_format {
-    cql_protocol_version_type _version;
-public:
-    static constexpr cql_protocol_version_type latest_version = 3;
-    explicit cql_serialization_format(cql_protocol_version_type version) : _version(version) {}
-    static cql_serialization_format latest() { return cql_serialization_format{latest_version}; }
-    static cql_serialization_format internal() { return latest(); }
-    bool using_32_bits_for_collections() const { return _version >= 3; }
-    bool operator==(cql_serialization_format x) const { return _version == x._version; }
-    bool operator!=(cql_serialization_format x) const { return !operator==(x); }
-    cql_protocol_version_type protocol_version() const { return _version; }
-    friend std::ostream& operator<<(std::ostream& out, const cql_serialization_format& sf) {
-        return out << static_cast<int>(sf._version);
-    }
-    bool collection_format_unchanged(cql_serialization_format other = cql_serialization_format::latest()) const {
-        return using_32_bits_for_collections() == other.using_32_bits_for_collections();
-    }
-};
--- a/database.cc
+++ b/database.cc
@@ -26,6 +26,7 @@
 #include "db/commitlog/commitlog_entry.hh"
 #include "db/system_keyspace.hh"
 #include "db/consistency_level.hh"
+#include "db/serializer.hh"
 #include "db/commitlog/commitlog.hh"
 #include "db/config.hh"
 #include "to_string.hh"
@@ -116,7 +117,7 @@ column_family::column_family(schema_ptr schema, config config, no_commitlog cl,

 partition_presence_checker
 column_family::make_partition_presence_checker(lw_shared_ptr<sstable_list> old_sstables) {
-    return [this, old_sstables = std::move(old_sstables)] (partition_key_view key) {
+    return [this, old_sstables = std::move(old_sstables)] (const partition_key& key) {
        for (auto&& s : *old_sstables) {
            if (s.second->filter_has_key(*_schema, key)) {
                return partition_presence_checker_result::maybe_exists;
@@ -157,17 +158,17 @@ class range_sstable_reader final : public mutation_reader::impl {
    mutation_reader _reader;
    // Use a pointer instead of copying, so we don't need to regenerate the reader if
    // the priority changes.
-    const io_priority_class& _pc;
+    const io_priority_class* _pc;
 public:
    range_sstable_reader(schema_ptr s, lw_shared_ptr<sstable_list> sstables, const query::partition_range& pr, const io_priority_class& pc)
        : _pr(pr)
        , _sstables(std::move(sstables))
-        , _pc(pc)
+        , _pc(&pc)
    {
        std::vector<mutation_reader> readers;
        for (const lw_shared_ptr<sstables::sstable>& sst : *_sstables | boost::adaptors::map_values) {
            // FIXME: make sstable::read_range_rows() return ::mutation_reader so that we can drop this wrapper.
-            mutation_reader reader = make_mutation_reader<sstable_range_wrapping_reader>(sst, s, pr, pc);
+            mutation_reader reader = make_mutation_reader<sstable_range_wrapping_reader>(sst, s, pr);
            if (sst->is_shared()) {
                reader = make_filtering_reader(std::move(reader), belongs_to_current_shard);
            }
@@ -191,13 +192,13 @@ class single_key_sstable_reader final : public mutation_reader::impl {
    lw_shared_ptr<sstable_list> _sstables;
    // Use a pointer instead of copying, so we don't need to regenerate the reader if
    // the priority changes.
-    const io_priority_class& _pc;
+    const io_priority_class* _pc;
 public:
    single_key_sstable_reader(schema_ptr schema, lw_shared_ptr<sstable_list> sstables, const partition_key& key, const io_priority_class& pc)
        : _schema(std::move(schema))
        , _key(sstables::key::from_partition_key(*_schema, key))
        , _sstables(std::move(sstables))
-        , _pc(pc)
+        , _pc(&pc)
    { }

    virtual future<mutation_opt> operator()() override {
@@ -205,7 +206,7 @@ public:
            return make_ready_future<mutation_opt>();
        }
        return parallel_for_each(*_sstables | boost::adaptors::map_values, [this](const lw_shared_ptr<sstables::sstable>& sstable) {
-            return sstable->read_row(_schema, _key, _pc).then([this](mutation_opt mo) {
+            return sstable->read_row(_schema, _key).then([this](mutation_opt mo) {
                apply(_m, std::move(mo));
            });
        }).then([this] {
@@ -314,7 +315,7 @@ column_family::make_reader(schema_ptr s, const query::partition_range& range, co
    // https://github.com/scylladb/scylla/issues/185

    for (auto&& mt : *_memtables) {
-        readers.emplace_back(mt->make_reader(s, range, pc));
+        readers.emplace_back(mt->make_reader(s, range));
    }

    if (_config.enable_cache) {
@@ -515,9 +516,9 @@ future<sstables::entry_descriptor> column_family::probe_file(sstring sstdir, sst
    });
 }

-void column_family::update_stats_for_new_sstable(uint64_t disk_space_used_by_sstable) {
-    _stats.live_disk_space_used += disk_space_used_by_sstable;
-    _stats.total_disk_space_used += disk_space_used_by_sstable;
+void column_family::update_stats_for_new_sstable(uint64_t new_sstable_data_size) {
+    _stats.live_disk_space_used += new_sstable_data_size;
+    _stats.total_disk_space_used += new_sstable_data_size;
    _stats.live_sstable_count++;
 }

@@ -529,7 +530,7 @@ void column_family::add_sstable(lw_shared_ptr<sstables::sstable> sstable) {
    auto generation = sstable->generation();
    // allow in-progress reads to continue using old list
    _sstables = make_lw_shared<sstable_list>(*_sstables);
-    update_stats_for_new_sstable(sstable->bytes_on_disk());
+    update_stats_for_new_sstable(sstable->data_size());
    _sstables->emplace(generation, std::move(sstable));
 }

@@ -728,7 +729,7 @@ column_family::rebuild_sstable_list(const std::vector<sstables::shared_sstable>&
    // We create a new list rather than modifying it in-place, so that
    // on-going reads can continue to use the old list.
    auto current_sstables = _sstables;
-    auto new_sstable_list = make_lw_shared<sstable_list>();
+    _sstables = make_lw_shared<sstable_list>();

    // zeroing live_disk_space_used and live_sstable_count because the
    // sstable list is re-created below.
@@ -742,7 +743,7 @@ column_family::rebuild_sstable_list(const std::vector<sstables::shared_sstable>&
        // Checks if oldtab is a sstable not being compacted.
        if (!s.count(oldtab.second)) {
            update_stats_for_new_sstable(oldtab.second->data_size());
-            new_sstable_list->emplace(oldtab.first, oldtab.second);
+            _sstables->emplace(oldtab.first, oldtab.second);
        }
    }

@@ -750,14 +751,12 @@ column_family::rebuild_sstable_list(const std::vector<sstables::shared_sstable>&
        // FIXME: rename the new sstable(s). Verify a rename doesn't cause
        // problems for the sstable object.
        update_stats_for_new_sstable(newtab->data_size());
-        new_sstable_list->emplace(newtab->generation(), newtab);
+        _sstables->emplace(newtab->generation(), newtab);
    }

    for (const auto& oldtab : sstables_to_remove) {
        oldtab->mark_for_deletion();
    }
-
-    _sstables = std::move(new_sstable_list);
 }

 future<>
@@ -1297,14 +1296,14 @@ void database::add_column_family(schema_ptr schema, column_family::config cfg) {
    _ks_cf_to_uuid.emplace(std::move(kscf), uuid);
 }

-future<> database::drop_column_family(const sstring& ks_name, const sstring& cf_name, timestamp_func tsf) {
+future<> database::drop_column_family(db_clock::time_point dropped_at, const sstring& ks_name, const sstring& cf_name) {
    auto uuid = find_uuid(ks_name, cf_name);
    auto& ks = find_keyspace(ks_name);
    auto cf = _column_families.at(uuid);
    _column_families.erase(uuid);
    ks.metadata()->remove_column_family(cf->schema());
    _ks_cf_to_uuid.erase(std::make_pair(ks_name, cf_name));
-    return truncate(ks, *cf, std::move(tsf)).then([this, cf] {
+    return truncate(dropped_at, ks, *cf).then([this, cf] {
        return cf->stop();
    }).then([this, cf] {
        return make_ready_future<>();
@@ -1587,9 +1586,7 @@ future<lw_shared_ptr<query::result>>
 column_family::query(schema_ptr s, const query::read_command& cmd, const std::vector<query::partition_range>& partition_ranges) {
    utils::latency_counter lc;
    _stats.reads.set_latency(lc);
-    auto qs_ptr = std::make_unique<query_state>(std::move(s), cmd, partition_ranges);
-    auto& qs = *qs_ptr;
-    {
+    return do_with(query_state(std::move(s), cmd, partition_ranges), [this] (query_state& qs) {
        return do_until(std::bind(&query_state::done, &qs), [this, &qs] {
            auto&& range = *qs.current_partition_range++;
            qs.reader = make_reader(qs.schema, range, service::get_local_sstable_query_read_priority());
@@ -1600,23 +1597,23 @@ column_family::query(schema_ptr s, const query::read_command& cmd, const std::ve
                        auto p_builder = qs.builder.add_partition(*mo->schema(), mo->key());
                        auto is_distinct = qs.cmd.slice.options.contains(query::partition_slice::option::distinct);
                        auto limit = !is_distinct ? qs.limit : 1;
-                        auto rows_added = mo->partition().query(p_builder, *qs.schema, qs.cmd.timestamp, limit);
-                        qs.limit -= rows_added;
+                        mo->partition().query(p_builder, *qs.schema, qs.cmd.timestamp, limit);
+                        qs.limit -= p_builder.row_count();
                    } else {
                        qs.range_empty = true;
                    }
                });
            });
-        }).then([qs_ptr = std::move(qs_ptr), &qs] {
+        }).then([&qs] {
            return make_ready_future<lw_shared_ptr<query::result>>(
                    make_lw_shared<query::result>(qs.builder.build()));
-        }).finally([lc, this]() mutable {
+        });
+    }).finally([lc, this]() mutable {
        _stats.reads.mark(lc);
        if (lc.is_start()) {
            _stats.estimated_read.add(lc.latency(), _stats.reads.count);
        }
-        });
-    }
+    });
 }

 mutation_source
@@ -1913,13 +1910,13 @@ future<> database::flush_all_memtables() {
    });
 }

-future<> database::truncate(sstring ksname, sstring cfname, timestamp_func tsf) {
+future<> database::truncate(db_clock::time_point truncated_at, sstring ksname, sstring cfname) {
    auto& ks = find_keyspace(ksname);
    auto& cf = find_column_family(ksname, cfname);
-    return truncate(ks, cf, std::move(tsf));
+    return truncate(truncated_at, ks, cf);
 }

-future<> database::truncate(const keyspace& ks, column_family& cf, timestamp_func tsf)
+future<> database::truncate(db_clock::time_point truncated_at, const keyspace& ks, column_family& cf)
 {
    const auto durable = ks.metadata()->durable_writes();
    const auto auto_snapshot = get_config().auto_snapshot();
@@ -1934,22 +1931,20 @@ future<> database::truncate(const keyspace& ks, column_family& cf, timestamp_fun
        cf.clear();
    }

-    return cf.run_with_compaction_disabled([f = std::move(f), &cf, auto_snapshot, tsf = std::move(tsf)]() mutable {
-        return f.then([&cf, auto_snapshot, tsf = std::move(tsf)] {
+    return cf.run_with_compaction_disabled([truncated_at, f = std::move(f), &cf, auto_snapshot, cfname = cf.schema()->cf_name()]() mutable {
+        return f.then([truncated_at, &cf, auto_snapshot, cfname = std::move(cfname)] {
            dblog.debug("Discarding sstable data for truncated CF + indexes");
            // TODO: notify truncation

-            return tsf().then([&cf, auto_snapshot](db_clock::time_point truncated_at) {
-                future<> f = make_ready_future<>();
-                if (auto_snapshot) {
-                    auto name = sprint("%d-%s", truncated_at.time_since_epoch().count(), cf.schema()->cf_name());
-                    f = cf.snapshot(name);
-                }
-                return f.then([&cf, truncated_at] {
-                    return cf.discard_sstables(truncated_at).then([&cf, truncated_at](db::replay_position rp) {
-                        // TODO: indexes.
-                        return db::system_keyspace::save_truncation_record(cf, truncated_at, rp);
-                    });
+            future<> f = make_ready_future<>();
+            if (auto_snapshot) {
+                auto name = sprint("%d-%s", truncated_at.time_since_epoch().count(), cfname);
+                f = cf.snapshot(name);
+            }
+            return f.then([&cf, truncated_at] {
+                return cf.discard_sstables(truncated_at).then([&cf, truncated_at](db::replay_position rp) {
+                    // TODO: indexes.
+                    return db::system_keyspace::save_truncation_record(cf, truncated_at, rp);
                });
            });
        });
--- a/database.hh
+++ b/database.hh
@@ -171,8 +171,11 @@ private:
    int _compaction_disabled = 0;
    class memtable_flush_queue;
    std::unique_ptr<memtable_flush_queue> _flush_queue;
+    // Store generation of sstables being compacted at the moment. That's needed to prevent a
+    // sstable from being compacted twice.
+    std::unordered_set<unsigned long> _compacting_generations;
 private:
-    void update_stats_for_new_sstable(uint64_t disk_space_used_by_sstable);
+    void update_stats_for_new_sstable(uint64_t new_sstable_data_size);
    void add_sstable(sstables::sstable&& sstable);
    void add_sstable(lw_shared_ptr<sstables::sstable> sstable);
    void add_memtable();
@@ -395,6 +398,10 @@ public:
            }
        });
    }
+
+    std::unordered_set<unsigned long>& compacting_generations() {
+        return _compacting_generations;
+    }
 private:
    // One does not need to wait on this future if all we are interested in, is
    // initiating the write.  The writes initiated here will eventually
@@ -637,6 +644,8 @@ public:

    void add_column_family(schema_ptr schema, column_family::config cfg);

+    future<> drop_column_family(db_clock::time_point changed_at, const sstring& ks_name, const sstring& cf_name);
+
    /* throws std::out_of_range if missing */
    const utils::UUID& find_uuid(const sstring& ks, const sstring& cf) const throw (std::out_of_range);
    const utils::UUID& find_uuid(const schema_ptr&) const throw (std::out_of_range);
@@ -703,16 +712,9 @@ public:
    }

    future<> flush_all_memtables();
-
-    // See #937. Truncation now requires a callback to get a time stamp
-    // that must be guaranteed to be the same for all shards.
-    typedef std::function<future<db_clock::time_point>()> timestamp_func;
-
    /** Truncates the given column family */
-    future<> truncate(sstring ksname, sstring cfname, timestamp_func);
-    future<> truncate(const keyspace& ks, column_family& cf, timestamp_func);
-
-    future<> drop_column_family(const sstring& ks_name, const sstring& cf_name, timestamp_func);
+    future<> truncate(db_clock::time_point truncated_at, sstring ksname, sstring cfname);
+    future<> truncate(db_clock::time_point truncated_at, const keyspace& ks, column_family& cf);

    const logalloc::region_group& dirty_memory_region_group() const {
        return _dirty_memory_region_group;
--- a/db/batchlog_manager.cc
+++ b/db/batchlog_manager.cc
@@ -59,12 +59,6 @@
 #include "gms/failure_detector.hh"
 #include "service/storage_service.hh"
 #include "schema_registry.hh"
-#include "idl/uuid.dist.hh"
-#include "idl/frozen_schema.dist.hh"
-#include "serializer_impl.hh"
-#include "serialization_visitors.hh"
-#include "idl/uuid.dist.impl.hh"
-#include "idl/frozen_schema.dist.impl.hh"

 static logging::logger logger("batchlog_manager");

@@ -125,11 +119,15 @@ mutation db::batchlog_manager::get_batch_log_mutation_for(const std::vector<muta
    auto timestamp = api::new_timestamp();
    auto data = [this, &mutations] {
        std::vector<canonical_mutation> fm(mutations.begin(), mutations.end());
-        bytes_ostream out;
+        const auto size = std::accumulate(fm.begin(), fm.end(), size_t(0), [](size_t s, auto& m) {
+            return s + serializer<canonical_mutation>{m}.size();
+        });
+        bytes buf(bytes::initialized_later(), size);
+        data_output out(buf);
        for (auto& m : fm) {
-            ser::serialize(out, m);
+            serializer<canonical_mutation>{m}(out);
        }
-        return to_bytes(out.linearize());
+        return buf;
    }();

    mutation m(key, schema);
@@ -180,9 +178,9 @@ future<> db::batchlog_manager::replay_all_failed_batches() {
        logger.debug("Replaying batch {}", id);

        auto fms = make_lw_shared<std::deque<canonical_mutation>>();
-        auto in = ser::as_input_stream(data);
-        while (in.size()) {
-            fms->emplace_back(ser::deserialize(in, boost::type<canonical_mutation>()));
+        data_input in(data);
+        while (in.has_next()) {
+            fms->emplace_back(serializer<canonical_mutation>::read(in));
        }

        auto size = data.size();
--- a/db/commitlog/commitlog.cc
+++ b/db/commitlog/commitlog.cc
@@ -67,9 +67,6 @@
 #include "commitlog_entry.hh"
 #include "service/priority_manager.hh"

-#include <boost/range/numeric.hpp>
-#include <boost/range/adaptor/transformed.hpp>
-
 static logging::logger logger("commitlog");

 class crc32_nbo {
@@ -148,7 +145,7 @@ const std::string db::commitlog::descriptor::FILENAME_PREFIX(
        "CommitLog" + SEPARATOR);
 const std::string db::commitlog::descriptor::FILENAME_EXTENSION(".log");

-class db::commitlog::segment_manager : public ::enable_shared_from_this<segment_manager> {
+class db::commitlog::segment_manager {
 public:
    config cfg;
    const uint64_t max_size;
@@ -278,8 +275,6 @@ public:

    scollectd::registrations create_counters();

-    void orphan_all();
-
    void discard_unused_segments();
    void discard_completed_segments(const cf_id_type& id,
            const replay_position& pos);
@@ -377,7 +372,7 @@ private:
 */

 class db::commitlog::segment: public enable_lw_shared_from_this<segment> {
-    ::shared_ptr<segment_manager> _segment_manager;
+    segment_manager* _segment_manager;

    descriptor _desc;
    file _file;
@@ -409,7 +404,7 @@ class db::commitlog::segment: public enable_lw_shared_from_this<segment> {
        // This is maintaining the semantica of only using the write-lock
        // as a gate for flushing, i.e. once we've begun a flush for position X
        // we are ok with writes to positions > X
-        return _segment_manager->begin_flush().then(std::bind(&rwlock::write_lock, &_dwrite)).finally([this] {
+        return _dwrite.write_lock().then(std::bind(&segment_manager::begin_flush, _segment_manager)).finally([this] {
            _dwrite.write_unlock();
        });
    }
@@ -422,12 +417,12 @@ class db::commitlog::segment: public enable_lw_shared_from_this<segment> {
        // This is maintaining the semantica of only using the write-lock
        // as a gate for flushing, i.e. once we've begun a flush for position X
        // we are ok with writes to positions > X
-        return _segment_manager->begin_write().then(std::bind(&rwlock::read_lock, &_dwrite));
+        return _dwrite.read_lock().then(std::bind(&segment_manager::begin_write, _segment_manager));
    }

    void end_write() {
-        _dwrite.read_unlock();
        _segment_manager->end_write();
+        _dwrite.read_unlock();
    }

 public:
@@ -449,8 +444,8 @@ public:
    // TODO : tune initial / default size
    static constexpr size_t default_size = align_up<size_t>(128 * 1024, alignment);

-    segment(::shared_ptr<segment_manager> m, const descriptor& d, file && f, bool active)
-            : _segment_manager(std::move(m)), _desc(std::move(d)), _file(std::move(f)), _sync_time(
+    segment(segment_manager* m, const descriptor& d, file && f, bool active)
+            : _segment_manager(m), _desc(std::move(d)), _file(std::move(f)), _sync_time(
                    clock_type::now()), _queue(0)
    {
        ++_segment_manager->totals.segments_created;
@@ -558,7 +553,7 @@ public:
                                    throw;
                                }
                            });
-        }).finally([this, me] {
+        }).finally([this] {
            end_flush();
        });
    }
@@ -647,7 +642,7 @@ public:
        forget_schema_versions();

        // acquire read lock
-        return begin_write().then([this, size, off, buf = std::move(buf)]() mutable {
+        return begin_write().then([this, size, off, buf = std::move(buf), me]() mutable {
            auto written = make_lw_shared<size_t>(0);
            auto p = buf.get();
            return repeat([this, size, off, written, p]() mutable {
@@ -1046,7 +1041,7 @@ future<db::commitlog::segment_manager::sseg_ptr> db::commitlog::segment_manager:
    return open_file_dma(cfg.commit_log_location + "/" + d.filename(), open_flags::wo | open_flags::create).then([this, d, active](file f) {
        // xfs doesn't like files extended betond eof, so enlarge the file
        return f.truncate(max_size).then([this, d, active, f] () mutable {
-            auto s = make_lw_shared<segment>(this->shared_from_this(), d, std::move(f), active);
+            auto s = make_lw_shared<segment>(this, d, std::move(f), active);
            return make_ready_future<sseg_ptr>(s);
        });
    });
@@ -1160,10 +1155,6 @@ future<> db::commitlog::segment_manager::shutdown() {
    return make_ready_future<>();
 }

-void db::commitlog::segment_manager::orphan_all() {
-    _segments.clear();
-    _reserve_segments.clear();
-}

 /*
 * Sync all segments, then clear them out. To ensure all ops are done.
@@ -1177,7 +1168,7 @@ future<> db::commitlog::segment_manager::clear() {
        for (auto& s : _segments) {
            s->mark_clean();
        }
-        orphan_all();
+       _segments.clear();
    });
 }
 /**
@@ -1211,15 +1202,7 @@ void db::commitlog::segment_manager::on_timer() {
        // take outstanding allocations into regard. This is paranoid,
        // but if for some reason the file::open takes longer than timer period,
        // we could flood the reserve list with new segments
-        //
-        // #482 - _reserve_allocating is decremented in the finally clause below.
-        // This is needed because if either allocate_segment _or_ emplacing into
-        // _reserve_segments should throw, we still need the counter reset
-        // However, because of this, it might be that emplace was done, but not decrement,
-        // when we get here again. So occasionally we might get a sum of the two that is
-        // not consistent. It should however always just potentially be _to much_, i.e.
-        // just an indicator that we don't need to do anything. So lets do that.
-        auto n = std::min(_reserve_segments.size() + _reserve_allocating, _num_reserve_segments);
+        auto n = _reserve_segments.size() + _reserve_allocating;
        return parallel_for_each(boost::irange(n, _num_reserve_segments), [this, n](auto i) {
            ++_reserve_allocating;
            return this->allocate_segment(false).then([this](sseg_ptr s) {
@@ -1300,9 +1283,8 @@ void db::commitlog::segment_manager::release_buffer(buffer_type&& b) {
        logger.trace("Deleting {} buffers", _temp_buffers.size() - max_temp_buffers);
        _temp_buffers.erase(_temp_buffers.begin() + max_temp_buffers, _temp_buffers.end());
    }
-    totals.buffer_list_bytes = boost::accumulate(
-	    _temp_buffers | boost::adaptors::transformed(std::mem_fn(&buffer_type::size)),
-            size_t(0), std::plus<size_t>());
+    totals.buffer_list_bytes = std::accumulate(_temp_buffers.begin(),
+            _temp_buffers.end(), size_t(0), std::plus<size_t>());
 }

 /**
@@ -1352,7 +1334,7 @@ future<db::replay_position> db::commitlog::add_entry(const cf_id_type& id, const
 }

 db::commitlog::commitlog(config cfg)
-        : _segment_manager(::make_shared<segment_manager>(std::move(cfg))) {
+        : _segment_manager(new segment_manager(std::move(cfg))) {
 }

 db::commitlog::commitlog(commitlog&& v) noexcept
@@ -1360,9 +1342,6 @@ db::commitlog::commitlog(commitlog&& v) noexcept
 }

 db::commitlog::~commitlog() {
-    if (_segment_manager != nullptr) {
-        _segment_manager->orphan_all();
-    }
 }

 future<db::commitlog> db::commitlog::create_commitlog(config cfg) {
--- a/db/commitlog/commitlog.hh
+++ b/db/commitlog/commitlog.hh
@@ -98,7 +98,7 @@ public:
    class segment;

 private:
-    ::shared_ptr<segment_manager> _segment_manager;
+    std::unique_ptr<segment_manager> _segment_manager;
 public:
    enum class sync_mode {
        PERIODIC, BATCH
--- a/db/commitlog/commitlog_entry.cc
+++ b/db/commitlog/commitlog_entry.cc
@@ -1,86 +0,0 @@
-/*
- * Copyright 2016 ScyllaDB
- */
-
-/*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include "commitlog_entry.hh"
-#include "idl/uuid.dist.hh"
-#include "idl/keys.dist.hh"
-#include "idl/frozen_mutation.dist.hh"
-#include "idl/mutation.dist.hh"
-#include "idl/commitlog.dist.hh"
-#include "serializer_impl.hh"
-#include "serialization_visitors.hh"
-#include "idl/uuid.dist.impl.hh"
-#include "idl/keys.dist.impl.hh"
-#include "idl/frozen_mutation.dist.impl.hh"
-#include "idl/mutation.dist.impl.hh"
-#include "idl/commitlog.dist.impl.hh"
-
-commitlog_entry::commitlog_entry(stdx::optional<column_mapping> mapping, frozen_mutation&& mutation)
-    : _mapping(std::move(mapping))
-      , _mutation_storage(std::move(mutation))
-      , _mutation(*_mutation_storage)
-{ }
-
-commitlog_entry::commitlog_entry(stdx::optional<column_mapping> mapping, const frozen_mutation& mutation)
-    : _mapping(std::move(mapping))
-      , _mutation(mutation)
-{ }
-
-commitlog_entry::commitlog_entry(commitlog_entry&& ce)
-    : _mapping(std::move(ce._mapping))
-    , _mutation_storage(std::move(ce._mutation_storage))
-    , _mutation(_mutation_storage ? *_mutation_storage : ce._mutation)
-{
-}
-
-commitlog_entry& commitlog_entry::operator=(commitlog_entry&& ce)
-{
-    if (this != &ce) {
-        this->~commitlog_entry();
-        new (this) commitlog_entry(std::move(ce));
-    }
-    return *this;
-}
-
-commitlog_entry commitlog_entry_writer::get_entry() const {
-    if (_with_schema) {
-        return commitlog_entry(_schema->get_column_mapping(), _mutation);
-    } else {
-        return commitlog_entry({}, _mutation);
-    }
-}
-
-void commitlog_entry_writer::compute_size() {
-    _size = ser::get_sizeof(get_entry());
-}
-
-void commitlog_entry_writer::write(data_output& out) const {
-    seastar::simple_output_stream str(out.reserve(size()));
-    ser::serialize(str, get_entry());
-}
-
-commitlog_entry_reader::commitlog_entry_reader(const temporary_buffer<char>& buffer)
-    : _ce([&] {
-    seastar::simple_input_stream in(buffer.get(), buffer.size());
-    return ser::deserialize(in, boost::type<commitlog_entry>());
-}())
-{
-}
--- a/db/commitlog/commitlog_entry.hh
+++ b/db/commitlog/commitlog_entry.hh
@@ -25,43 +25,21 @@

 #include "frozen_mutation.hh"
 #include "schema.hh"
-#include "utils/data_output.hh"

 namespace stdx = std::experimental;

-class commitlog_entry {
-    stdx::optional<column_mapping> _mapping;
-    stdx::optional<frozen_mutation> _mutation_storage;
-    const frozen_mutation& _mutation;
-public:
-    commitlog_entry(stdx::optional<column_mapping> mapping, frozen_mutation&& mutation);
-    commitlog_entry(stdx::optional<column_mapping> mapping, const frozen_mutation& mutation);
-    commitlog_entry(commitlog_entry&&);
-    commitlog_entry(const commitlog_entry&) = delete;
-    commitlog_entry& operator=(commitlog_entry&&);
-    commitlog_entry& operator=(const commitlog_entry&) = delete;
-    const stdx::optional<column_mapping>& mapping() const { return _mapping; }
-    const frozen_mutation& mutation() const { return _mutation; }
-};
-
 class commitlog_entry_writer {
    schema_ptr _schema;
+    db::serializer<column_mapping> _column_mapping_serializer;
    const frozen_mutation& _mutation;
    bool _with_schema = true;
-    size_t _size;
-private:
-    void compute_size();
-    commitlog_entry get_entry() const;
 public:
    commitlog_entry_writer(schema_ptr s, const frozen_mutation& fm)
-        : _schema(std::move(s)), _mutation(fm)
-    {
-        compute_size();
-    }
+        : _schema(std::move(s)), _column_mapping_serializer(_schema->get_column_mapping()), _mutation(fm)
+    { }

    void set_with_schema(bool value) {
        _with_schema = value;
-        compute_size();
    }
    bool with_schema() {
        return _with_schema;
@@ -71,17 +49,40 @@ public:
    }

    size_t size() const {
-        return _size;
+        size_t size = data_output::serialized_size<bool>();
+        if (_with_schema) {
+            size += _column_mapping_serializer.size();
+        }
+        size += _mutation.representation().size();
+        return size;
    }

-    void write(data_output& out) const;
+    void write(data_output& out) const {
+        out.write(_with_schema);
+        if (_with_schema) {
+            _column_mapping_serializer.write(out);
+        }
+        auto bv = _mutation.representation();
+        out.write(bv.begin(), bv.end());
+    }
 };

 class commitlog_entry_reader {
-    commitlog_entry _ce;
+    frozen_mutation _mutation;
+    stdx::optional<column_mapping> _column_mapping;
 public:
-    commitlog_entry_reader(const temporary_buffer<char>& buffer);
+    commitlog_entry_reader(const temporary_buffer<char>& buffer)
+        : _mutation(bytes())
+    {
+        data_input in(buffer);
+        bool has_column_mapping = in.read<bool>();
+        if (has_column_mapping) {
+            _column_mapping = db::serializer<::column_mapping>::read(in);
+        }
+        auto bv = in.read_view(in.avail());
+        _mutation = frozen_mutation(bytes(bv.begin(), bv.end()));
+    }

-    const stdx::optional<column_mapping>& get_column_mapping() const { return _ce.mapping(); }
-    const frozen_mutation& mutation() const { return _ce.mutation(); }
+    const stdx::optional<column_mapping>& get_column_mapping() const { return _column_mapping; }
+    const frozen_mutation& mutation() const { return _mutation; }
 };
--- a/db/commitlog/commitlog_replayer.cc
+++ b/db/commitlog/commitlog_replayer.cc
@@ -53,6 +53,7 @@
 #include "database.hh"
 #include "sstables/sstables.hh"
 #include "db/system_keyspace.hh"
+#include "db/serializer.hh"
 #include "cql3/query_processor.hh"
 #include "log.hh"
 #include "converting_mutation_partition_applier.hh"
--- a/db/schema_tables.cc
+++ b/db/schema_tables.cc
@@ -65,7 +65,6 @@
 #include <boost/range/adaptor/map.hpp>

 #include "compaction_strategy.hh"
-#include "utils/joinpoint.hh"

 using namespace db::system_keyspace;

@@ -607,10 +606,10 @@ future<> do_merge_schema(distributed<service::storage_proxy>& proxy, std::vector
 #endif
       proxy.local().get_db().invoke_on_all([keyspaces_to_drop = std::move(keyspaces_to_drop)] (database& db) {
           // it is safe to drop a keyspace only when all nested ColumnFamilies where deleted
-           return do_for_each(keyspaces_to_drop, [&db] (auto keyspace_to_drop) {
+           for (auto&& keyspace_to_drop : keyspaces_to_drop) {
               db.drop_keyspace(keyspace_to_drop);
-               return service::get_local_migration_manager().notify_drop_keyspace(keyspace_to_drop);
-            });
+               service::get_local_migration_manager().notify_drop_keyspace(keyspace_to_drop);
+           }
       }).get0();
   });
 }
@@ -650,7 +649,7 @@ future<std::set<sstring>> merge_keyspaces(distributed<service::storage_proxy>& p
            return do_for_each(created, [&db](auto&& val) {
                auto ksm = create_keyspace_from_schema_partition(val);
                return db.create_keyspace(ksm).then([ksm] {
-                    return service::get_local_migration_manager().notify_create_keyspace(ksm);
+                    service::get_local_migration_manager().notify_create_keyspace(ksm);
                });
            }).then([&altered, &db] () mutable {
                for (auto&& name : altered) {
@@ -680,6 +679,7 @@ static void merge_tables(distributed<service::storage_proxy>& proxy,
    std::map<qualified_name, schema_mutations>&& before,
    std::map<qualified_name, schema_mutations>&& after)
 {
+    auto changed_at = db_clock::now();
    std::vector<global_schema_ptr> created;
    std::vector<global_schema_ptr> altered;
    std::vector<global_schema_ptr> dropped;
@@ -687,44 +687,36 @@ static void merge_tables(distributed<service::storage_proxy>& proxy,
    auto diff = difference(before, after);
    for (auto&& key : diff.entries_only_on_left) {
        auto&& s = proxy.local().get_db().local().find_schema(key.keyspace_name, key.table_name);
-        logger.info("Dropping {}.{} id={} version={}", s->ks_name(), s->cf_name(), s->id(), s->version());
        dropped.emplace_back(s);
    }
    for (auto&& key : diff.entries_only_on_right) {
-        auto s = create_table_from_mutations(after.at(key));
-        logger.info("Creating {}.{} id={} version={}", s->ks_name(), s->cf_name(), s->id(), s->version());
-        created.emplace_back(s);
+        created.emplace_back(create_table_from_mutations(after.at(key)));
    }
    for (auto&& key : diff.entries_differing) {
-        auto s = create_table_from_mutations(after.at(key));
-        logger.info("Altering {}.{} id={} version={}", s->ks_name(), s->cf_name(), s->id(), s->version());
-        altered.emplace_back(s);
+        altered.emplace_back(create_table_from_mutations(after.at(key)));
    }

-    do_with(utils::make_joinpoint([] { return db_clock::now();})
-        , [&created, &dropped, &altered, &proxy](auto& tsf) {
-        return proxy.local().get_db().invoke_on_all([&created, &dropped, &altered, &tsf] (database& db) {
-            return seastar::async([&] {
-                for (auto&& gs : created) {
-                    schema_ptr s = gs.get();
-                    auto& ks = db.find_keyspace(s->ks_name());
-                    auto cfg = ks.make_column_family_config(*s);
-                    db.add_column_family(s, cfg);
-                    auto& cf = db.find_column_family(s);
-                    cf.mark_ready_for_writes();
-                    ks.make_directory_for_column_family(s->cf_name(), s->id()).get();
-                    service::get_local_migration_manager().notify_create_column_family(s);
-                }
-                for (auto&& gs : altered) {
-                    update_column_family(db, gs.get());
-                }
-                parallel_for_each(dropped.begin(), dropped.end(), [&db, &tsf](auto&& gs) {
-                    schema_ptr s = gs.get();
-                    return db.drop_column_family(s->ks_name(), s->cf_name(), [&tsf] { return tsf.value(); }).then([s] {
-                        service::get_local_migration_manager().notify_drop_column_family(s);
-                    });
-                }).get();
-            });
+    proxy.local().get_db().invoke_on_all([&created, &dropped, &altered, changed_at] (database& db) {
+        return seastar::async([&] {
+            for (auto&& gs : created) {
+                schema_ptr s = gs.get();
+                auto& ks = db.find_keyspace(s->ks_name());
+                auto cfg = ks.make_column_family_config(*s);
+                db.add_column_family(s, cfg);
+                auto& cf = db.find_column_family(s);
+                cf.mark_ready_for_writes();
+                ks.make_directory_for_column_family(s->cf_name(), s->id()).get();
+                service::get_local_migration_manager().notify_create_column_family(s);
+            }
+            for (auto&& gs : altered) {
+                update_column_family(db, gs.get());
+            }
+            parallel_for_each(dropped.begin(), dropped.end(), [changed_at, &db](auto&& gs) {
+                schema_ptr s = gs.get();
+                return db.drop_column_family(changed_at, s->ks_name(), s->cf_name()).then([s] {
+                    service::get_local_migration_manager().notify_drop_column_family(s);
+                });
+            }).get();
        });
    }).get();
 }
--- a/db/serializer.cc
+++ b/db/serializer.cc
@@ -0,0 +1,194 @@
+/*
+ * Copyright 2015 Cloudius Systems
+ */
+
+/*
+ * This file is part of Scylla.
+ *
+ * Scylla is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Scylla is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "serializer.hh"
+#include "database.hh"
+#include "types.hh"
+#include "utils/serialization.hh"
+
+typedef uint32_t count_type; // Me thinks 32-bits are enough for "normal" count purposes.
+
+template<>
+db::serializer<utils::UUID>::serializer(const utils::UUID& uuid)
+        : _item(uuid), _size(2 * sizeof(uint64_t)) {
+}
+
+template<>
+void db::serializer<utils::UUID>::write(output& out,
+        const type& t) {
+    out.write(t.get_most_significant_bits());
+    out.write(t.get_least_significant_bits());
+}
+
+template<>
+void db::serializer<utils::UUID>::read(utils::UUID& uuid, input& in) {
+    uuid = read(in);
+}
+
+template<>
+void db::serializer<utils::UUID>::skip(input& in) {
+    in.skip(2 * sizeof(uint64_t));
+}
+
+template<> utils::UUID db::serializer<utils::UUID>::read(input& in) {
+    auto msb = in.read<uint64_t>();
+    auto lsb = in.read<uint64_t>();
+    return utils::UUID(msb, lsb);
+}
+
+template<>
+db::serializer<bytes>::serializer(const bytes& b)
+        : _item(b), _size(output::serialized_size(b)) {
+}
+
+template<>
+void db::serializer<bytes>::write(output& out, const type& t) {
+    out.write(t);
+}
+
+template<>
+void db::serializer<bytes>::read(bytes& b, input& in) {
+    b = in.read<bytes>();
+}
+
+template<>
+void db::serializer<bytes>::skip(input& in) {
+    in.read<bytes>(); // FIXME: Avoid reading
+}
+
+template<>
+db::serializer<bytes_view>::serializer(const bytes_view& v)
+        : _item(v), _size(output::serialized_size(v)) {
+}
+
+template<>
+void db::serializer<bytes_view>::write(output& out, const type& t) {
+    out.write(t);
+}
+
+template<>
+void db::serializer<bytes_view>::read(bytes_view& v, input& in) {
+    v = in.read<bytes_view>();
+}
+
+template<>
+bytes_view db::serializer<bytes_view>::read(input& in) {
+    return in.read<bytes_view>();
+}
+
+template<>
+db::serializer<sstring>::serializer(const sstring& s)
+        : _item(s), _size(output::serialized_size(s)) {
+}
+
+template<>
+void db::serializer<sstring>::write(output& out, const type& t) {
+    out.write(t);
+}
+
+template<>
+void db::serializer<sstring>::read(sstring& s, input& in) {
+    s = in.read<sstring>();
+}
+
+template<>
+void db::serializer<sstring>::skip(input& in) {
+    in.read<sstring>(); // FIXME: avoid reading
+}
+
+template<>
+db::serializer<tombstone>::serializer(const tombstone& t)
+        : _item(t), _size(sizeof(t.timestamp) + sizeof(decltype(t.deletion_time.time_since_epoch().count()))) {
+}
+
+template<>
+void db::serializer<tombstone>::write(output& out, const type& t) {
+    out.write(t.timestamp);
+    out.write(t.deletion_time.time_since_epoch().count());
+}
+
+template<>
+void db::serializer<tombstone>::read(tombstone& t, input& in) {
+    t.timestamp = in.read<decltype(t.timestamp)>();
+    auto deletion_time = in.read<decltype(t.deletion_time.time_since_epoch().count())>();
+    t.deletion_time = gc_clock::time_point(gc_clock::duration(deletion_time));
+}
+
+template<>
+db::serializer<atomic_cell_view>::serializer(const atomic_cell_view& c)
+        : _item(c), _size(bytes_view_serializer(c.serialize()).size()) {
+}
+
+template<>
+void db::serializer<atomic_cell_view>::write(output& out, const atomic_cell_view& t) {
+    bytes_view_serializer::write(out, t.serialize());
+}
+
+template<>
+void db::serializer<atomic_cell_view>::read(atomic_cell_view& c, input& in) {
+    c = atomic_cell_view::from_bytes(bytes_view_serializer::read(in));
+}
+
+template<>
+atomic_cell_view db::serializer<atomic_cell_view>::read(input& in) {
+    return atomic_cell_view::from_bytes(bytes_view_serializer::read(in));
+}
+
+template<>
+db::serializer<collection_mutation_view>::serializer(const collection_mutation_view& c)
+        : _item(c), _size(bytes_view_serializer(c.serialize()).size()) {
+}
+
+template<>
+void db::serializer<collection_mutation_view>::write(output& out, const collection_mutation_view& t) {
+    bytes_view_serializer::write(out, t.serialize());
+}
+
+template<>
+void db::serializer<collection_mutation_view>::read(collection_mutation_view& c, input& in) {
+    c = collection_mutation_view::from_bytes(bytes_view_serializer::read(in));
+}
+
+template<>
+db::serializer<db::replay_position>::serializer(const db::replay_position& rp)
+        : _item(rp), _size(sizeof(uint64_t) * 2) {
+}
+
+template<>
+void db::serializer<db::replay_position>::write(output& out, const db::replay_position& rp) {
+    out.write<uint64_t>(rp.id);
+    out.write<uint64_t>(rp.pos);
+}
+
+template<>
+void db::serializer<db::replay_position>::read(db::replay_position& rp, input& in) {
+    rp.id = in.read<uint64_t>();
+    rp.pos = in.read<uint64_t>();
+}
+
+template class db::serializer<tombstone> ;
+template class db::serializer<bytes> ;
+template class db::serializer<bytes_view> ;
+template class db::serializer<sstring> ;
+template class db::serializer<atomic_cell_view> ;
+template class db::serializer<collection_mutation_view> ;
+template class db::serializer<utils::UUID> ;
+template class db::serializer<db::replay_position> ;
--- a/db/serializer.hh
+++ b/db/serializer.hh
@@ -0,0 +1,235 @@
+/*
+ * Copyright 2015 Cloudius Systems
+ */
+
+/*
+ * This file is part of Scylla.
+ *
+ * Scylla is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Scylla is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef DB_SERIALIZER_HH_
+#define DB_SERIALIZER_HH_
+
+#include <experimental/optional>
+
+#include "utils/data_input.hh"
+#include "utils/data_output.hh"
+#include "bytes_ostream.hh"
+#include "bytes.hh"
+#include "database_fwd.hh"
+#include "db/commitlog/replay_position.hh"
+
+namespace db {
+/**
+ * Serialization objects for various types and using "internal" format. (Not CQL, origin whatnot).
+ * The design rationale is that a "serializer" can be instantiated for an object, and will contain
+ * the obj + size, and is usable as a functor.
+ *
+ * Serialization can also be done "explicitly" through the static method "write"
+ * (Not using "serialize", because writing "serializer<apa>::serialize" all the time is tiring and redundant)
+ * though care should be takes than data will fit of course.
+ */
+template<typename T>
+class serializer {
+public:
+    typedef T type;
+    typedef data_output output;
+    typedef data_input input;
+    typedef serializer<T> _MyType;
+
+    serializer(const type&);
+
+    // apply to memory, must be at least size() large.
+    const _MyType& operator()(output& out) const {
+        write(out, _item);
+        return *this;
+    }
+
+    static void write(output&, const type&);
+    static void read(type&, input&);
+    static type read(input&);
+    static void skip(input& in);
+
+    size_t size() const {
+        return _size;
+    }
+
+    void write(bytes_ostream& out) const {
+        auto buf = out.write_place_holder(_size);
+        data_output data_out((char*)buf, _size);
+        write(data_out, _item);
+    }
+
+    void write(data_output& out) const {
+        write(out, _item);
+    }
+
+    bytes to_bytes() const {
+        bytes b(bytes::initialized_later(), _size);
+        data_output out(b);
+        write(out);
+        return b;
+    }
+
+    static type from_bytes(bytes_view v) {
+        data_input in(v);
+        return read(in);
+    }
+private:
+    const type& _item;
+    size_t _size;
+};
+
+template<typename T>
+class serializer<std::experimental::optional<T>> {
+public:
+    typedef std::experimental::optional<T> type;
+    typedef data_output output;
+    typedef data_input input;
+    typedef serializer<T> _MyType;
+
+    serializer(const type& t)
+        : _item(t)
+        , _size(output::serialized_size<bool>() + (t ? serializer<T>(*t).size() : 0))
+    {}
+
+    // apply to memory, must be at least size() large.
+    const _MyType& operator()(output& out) const {
+        write(out, _item);
+        return *this;
+    }
+
+    static void write(output& out, const type& v) {
+        bool en = v;
+        out.write<bool>(en);
+        if (en) {
+            serializer<T>::write(out, *v);
+        }
+    }
+    static void read(type& dst, input& in) {
+        auto en = in.read<bool>();
+        if (en) {
+            dst = serializer<T>::read(in);
+        } else {
+            dst = {};
+        }
+    }
+    static type read(input& in) {
+        type t;
+        read(t, in);
+        return t;
+    }
+    static void skip(input& in) {
+        auto en = in.read<bool>();
+        if (en) {
+            serializer<T>::skip(in);
+        }
+    }
+
+    size_t size() const {
+        return _size;
+    }
+
+    void write(bytes_ostream& out) const {
+        auto buf = out.write_place_holder(_size);
+        data_output data_out((char*)buf, _size);
+        write(data_out, _item);
+    }
+
+    void write(data_output& out) const {
+        write(out, _item);
+    }
+
+    bytes to_bytes() const {
+        bytes b(bytes::initialized_later(), _size);
+        data_output out(b);
+        write(out);
+        return b;
+    }
+
+    static type from_bytes(bytes_view v) {
+        data_input in(v);
+        return read(in);
+    }
+private:
+    const std::experimental::optional<T> _item;
+    size_t _size;
+};
+
+
+template<> serializer<utils::UUID>::serializer(const utils::UUID &);
+template<> void serializer<utils::UUID>::write(output&, const type&);
+template<> void serializer<utils::UUID>::read(utils::UUID&, input&);
+template<> void serializer<utils::UUID>::skip(input&);
+template<> utils::UUID serializer<utils::UUID>::read(input&);
+
+template<> serializer<bytes>::serializer(const bytes &);
+template<> void serializer<bytes>::write(output&, const type&);
+template<> void serializer<bytes>::read(bytes&, input&);
+template<> void serializer<bytes>::skip(input&);
+
+template<> serializer<bytes_view>::serializer(const bytes_view&);
+template<> void serializer<bytes_view>::write(output&, const type&);
+template<> void serializer<bytes_view>::read(bytes_view&, input&);
+template<> bytes_view serializer<bytes_view>::read(input&);
+
+template<> serializer<sstring>::serializer(const sstring&);
+template<> void serializer<sstring>::write(output&, const type&);
+template<> void serializer<sstring>::read(sstring&, input&);
+template<> void serializer<sstring>::skip(input&);
+
+template<> serializer<tombstone>::serializer(const tombstone &);
+template<> void serializer<tombstone>::write(output&, const type&);
+template<> void serializer<tombstone>::read(tombstone&, input&);
+
+template<> serializer<atomic_cell_view>::serializer(const atomic_cell_view &);
+template<> void serializer<atomic_cell_view>::write(output&, const type&);
+template<> void serializer<atomic_cell_view>::read(atomic_cell_view&, input&);
+template<> atomic_cell_view serializer<atomic_cell_view>::read(input&);
+
+template<> serializer<collection_mutation_view>::serializer(const collection_mutation_view &);
+template<> void serializer<collection_mutation_view>::write(output&, const type&);
+template<> void serializer<collection_mutation_view>::read(collection_mutation_view&, input&);
+
+template<> serializer<db::replay_position>::serializer(const db::replay_position&);
+template<> void serializer<db::replay_position>::write(output&, const db::replay_position&);
+template<> void serializer<db::replay_position>::read(db::replay_position&, input&);
+
+template<typename T>
+T serializer<T>::read(input& in) {
+    type t;
+    read(t, in);
+    return t;
+}
+
+extern template class serializer<tombstone>;
+extern template class serializer<bytes>;
+extern template class serializer<bytes_view>;
+extern template class serializer<sstring>;
+extern template class serializer<utils::UUID>;
+extern template class serializer<db::replay_position>;
+
+typedef serializer<tombstone> tombstone_serializer;
+typedef serializer<bytes> bytes_serializer; // Compatible with bytes_view_serializer
+typedef serializer<bytes_view> bytes_view_serializer; // Compatible with bytes_serializer
+typedef serializer<sstring> sstring_serializer;
+typedef serializer<atomic_cell_view> atomic_cell_view_serializer;
+typedef serializer<collection_mutation_view> collection_mutation_view_serializer;
+typedef serializer<utils::UUID> uuid_serializer;
+typedef serializer<db::replay_position> replay_position_serializer;
+
+}
+
+#endif /* DB_SERIALIZER_HH_ */
--- a/db/system_keyspace.cc
+++ b/db/system_keyspace.cc
@@ -58,16 +58,14 @@
 #include "thrift/server.hh"
 #include "exceptions/exceptions.hh"
 #include "cql3/query_processor.hh"
+#include "db/serializer.hh"
 #include "query_context.hh"
 #include "partition_slice_builder.hh"
 #include "db/config.hh"
 #include "schema_builder.hh"
 #include "md5_hasher.hh"
 #include "release.hh"
-#include "log.hh"
-#include "serializer.hh"
 #include <core/enum.hh>
-#include "service/storage_proxy.hh"

 using days = std::chrono::duration<int, std::ratio<24 * 3600>>;

@@ -77,7 +75,6 @@ std::unique_ptr<query_context> qctx = {};

 namespace system_keyspace {

-static logging::logger logger("system_keyspace");
 static const api::timestamp_type creation_timestamp = api::new_timestamp();

 api::timestamp_type schema_creation_timestamp() {
@@ -441,7 +438,7 @@ static future<> setup_version() {
                             version::release(),
                             cql3::query_processor::CQL_VERSION,
                             org::apache::cassandra::thrift_version,
-                             to_sstring(cql_serialization_format::latest_version),
+                             to_sstring(version::native_protocol()),
                             snitch->get_datacenter(utils::fb_utilities::get_broadcast_address()),
                             snitch->get_rack(utils::fb_utilities::get_broadcast_address()),
                             sstring(dht::global_partitioner().name()),
@@ -549,44 +546,31 @@ future<> setup(distributed<database>& db, distributed<cql3::query_processor>& qp
    });
 }

-struct truncation_record {
-    static constexpr uint32_t current_magic = 0x53435452; // 'S' 'C' 'T' 'R'
-
-    uint32_t magic;
-    std::vector<db::replay_position> positions;
-    db_clock::time_point time_stamp;
-};
-}
-}
-
-#include "idl/replay_position.dist.hh"
-#include "idl/truncation_record.dist.hh"
-#include "serializer_impl.hh"
-#include "idl/replay_position.dist.impl.hh"
-#include "idl/truncation_record.dist.impl.hh"
-
-namespace db {
-namespace system_keyspace {
-
+typedef std::pair<replay_positions, db_clock::time_point> truncation_entry;
 typedef utils::UUID truncation_key;
-typedef std::unordered_map<truncation_key, truncation_record> truncation_map;
+typedef std::unordered_map<truncation_key, truncation_entry> truncation_map;

-static constexpr uint8_t current_version = 1;
 static thread_local std::experimental::optional<truncation_map> truncation_records;

 future<> save_truncation_records(const column_family& cf, db_clock::time_point truncated_at, replay_positions positions) {
-    truncation_record r;
+    auto size =
+            sizeof(db_clock::rep)
+                    + positions.size()
+                            * db::serializer<replay_position>(
+                                    db::replay_position()).size();
+    bytes buf(bytes::initialized_later(), size);
+    data_output out(buf);

-    r.magic = truncation_record::current_magic;
-    r.time_stamp = truncated_at;
-    r.positions = std::move(positions);
-
-    auto buf = ser::serialize_to_buffer<bytes>(r, sizeof(current_version));
-
-    buf[0] = current_version;
-
-    static_assert(sizeof(current_version) == 1, "using this as mark");
-    assert(buf.size() & 1); // verify we've created an odd-numbered buffer
+    // Old version would write a single RP. We write N. Resulting blob size
+    // will determine how many.
+    // An external entity reading this blob would get a "correct" RP
+    // and a garbled time stamp. But an external entity has no business
+    // reading this data anyway, since it is meaningless outside this
+    // machine instance.
+    for (auto& rp : positions) {
+        db::serializer<replay_position>::write(out, rp);
+    }
+    out.write<db_clock::rep>(truncated_at.time_since_epoch().count());

    map_type_impl::native_type tmp;
    tmp.emplace_back(cf.schema()->id(), data_value(buf));
@@ -610,7 +594,7 @@ future<> remove_truncation_record(utils::UUID id) {
    });
 }

-static future<truncation_record> get_truncation_record(utils::UUID cf_id) {
+static future<truncation_entry> get_truncation_record(utils::UUID cf_id) {
    if (!truncation_records) {
        sstring req = sprint("SELECT truncated_at FROM system.%s WHERE key = '%s'", LOCAL, LOCAL);
        return qctx->qp().execute_internal(req).then([cf_id](::shared_ptr<cql3::untyped_result_set> rs) {
@@ -621,56 +605,22 @@ static future<truncation_record> get_truncation_record(utils::UUID cf_id) {
                    auto uuid = p.first;
                    auto buf = p.second;

-                    try {
-                        truncation_record e;
+                    truncation_entry e;

-                        if (buf.size() & 1) {
-                            // new record.
-                            if (buf[0] != current_version) {
-                                logger.warn("Found truncation record of unknown version {}. Ignoring.", int(buf[0]));
-                                continue;
-                            }
-                            e = ser::deserialize_from_buffer(buf, boost::type<truncation_record>(), 1);
-                            if (e.magic == truncation_record::current_magic) {
-                                tmp[uuid] = e;
-                                continue;
-                            }
-                        } else {
-                            // old scylla records. (We hope)
-                            // Read 64+64 bit RP:s, even though the
-                            // struct (and official serial size) is 64+32.
-                            data_input in(buf);
+                    data_input in(buf);

-                            logger.debug("Reading old type record");
-                            while (in.avail() > sizeof(db_clock::rep)) {
-                                auto id = in.read<uint64_t>();
-                                auto pos = in.read<uint64_t>();
-                                e.positions.emplace_back(id, position_type(pos));
-                            }
-                            if (in.avail() == sizeof(db_clock::rep)) {
-                                e.time_stamp = db_clock::time_point(db_clock::duration(in.read<db_clock::rep>()));
-                                tmp[uuid] = e;
-                                continue;
-                            }
-                        }
-                    } catch (std::out_of_range &) {
+                    while (in.avail() > sizeof(db_clock::rep)) {
+                        e.first.emplace_back(db::serializer<replay_position>::read(in));
                    }
-                    // Trying to load an origin table.
-                    // This is useless to us, because the only usage for this
-                    // data is commit log and batch replay, and we cannot replay
-                    // either from origin anyway.
-                    logger.warn("Error reading truncation record for {}. "
-                                    "Most likely this is data from a cassandra instance."
-                                    "Make sure you have cleared commit and batch logs before upgrading.",
-                                    uuid
-                    );
+                    e.second = db_clock::time_point(db_clock::duration(in.read<db_clock::rep>()));
+                    tmp[uuid] = e;
                }
            }
            truncation_records = std::move(tmp);
            return get_truncation_record(cf_id);
        });
    }
-    return make_ready_future<truncation_record>((*truncation_records)[cf_id]);
+    return make_ready_future<truncation_entry>((*truncation_records)[cf_id]);
 }

 future<> save_truncation_record(const column_family& cf, db_clock::time_point truncated_at, db::replay_position rp) {
@@ -678,16 +628,16 @@ future<> save_truncation_record(const column_family& cf, db_clock::time_point tr
    // once, for each core (calling us). But right now, redesigning so that calling here (or, rather,
    // save_truncation_records), is done from "somewhere higher, once per machine, not shard" is tricky.
    // Mainly because drop_tables also uses truncate. And is run per-core as well. Gah.
-    return get_truncation_record(cf.schema()->id()).then([&cf, truncated_at, rp](truncation_record e) {
-        auto i = std::find_if(e.positions.begin(), e.positions.end(), [rp](replay_position& p) {
+    return get_truncated_position(cf.schema()->id()).then([&cf, truncated_at, rp](replay_positions positions) {
+        auto i = std::find_if(positions.begin(), positions.end(), [rp](auto& p) {
            return p.shard_id() == rp.shard_id();
        });
-        if (i == e.positions.end()) {
-            e.positions.emplace_back(rp);
+        if (i == positions.end()) {
+            positions.emplace_back(rp);
        } else {
            *i = rp;
        }
-        return save_truncation_records(cf, std::max(truncated_at, e.time_stamp), e.positions);
+        return save_truncation_records(cf, truncated_at, positions);
    });
 }

@@ -703,14 +653,14 @@ future<db::replay_position> get_truncated_position(utils::UUID cf_id, uint32_t s
 }

 future<replay_positions> get_truncated_position(utils::UUID cf_id) {
-    return get_truncation_record(cf_id).then([](truncation_record e) {
-        return make_ready_future<replay_positions>(e.positions);
+    return get_truncation_record(cf_id).then([](truncation_entry e) {
+        return make_ready_future<replay_positions>(e.first);
    });
 }

 future<db_clock::time_point> get_truncated_at(utils::UUID cf_id) {
-    return get_truncation_record(cf_id).then([](truncation_record e) {
-        return make_ready_future<db_clock::time_point>(e.time_stamp);
+    return get_truncation_record(cf_id).then([](truncation_entry e) {
+        return make_ready_future<db_clock::time_point>(e.second);
    });
 }

@@ -1146,36 +1096,5 @@ future<std::vector<compaction_history_entry>> get_compaction_history()
    });
 }

-
-future<int> increment_and_get_generation() {
-    auto req = sprint("SELECT gossip_generation FROM system.%s WHERE key='%s'", LOCAL, LOCAL);
-    return qctx->qp().execute_internal(req).then([] (auto rs) {
-        int generation;
-        if (rs->empty() || !rs->one().has("gossip_generation")) {
-            // seconds-since-epoch isn't a foolproof new generation
-            // (where foolproof is "guaranteed to be larger than the last one seen at this ip address"),
-            // but it's as close as sanely possible
-            generation = service::get_generation_number();
-        } else {
-            // Other nodes will ignore gossip messages about a node that have a lower generation than previously seen.
-            int stored_generation = rs->one().template get_as<int>("gossip_generation") + 1;
-            int now = service::get_generation_number();
-            if (stored_generation >= now) {
-                logger.warn("Using stored Gossip Generation {} as it is greater than current system time {}."
-                            "See CASSANDRA-3654 if you experience problems", stored_generation, now);
-                generation = stored_generation;
-            } else {
-                generation = now;
-            }
-        }
-        auto req = sprint("INSERT INTO system.%s (key, gossip_generation) VALUES ('%s', ?)", LOCAL, LOCAL);
-        return qctx->qp().execute_internal(req, {generation}).then([generation] (auto rs) {
-            return force_blocking_flush(LOCAL);
-        }).then([generation] {
-            return make_ready_future<int>(generation);
-        });
-    });
-}
-
 } // namespace system_keyspace
 } // namespace db
--- a/db/system_keyspace.hh
+++ b/db/system_keyspace.hh
@@ -401,9 +401,127 @@ enum class bootstrap_state {
     */
    future<std::unordered_map<gms::inet_address, utils::UUID>> load_host_ids();

-    future<std::unordered_set<dht::token>> get_saved_tokens();
+#if 0
+    /**
+     * Get preferred IP for given endpoint if it is known. Otherwise this returns given endpoint itself.
+     *
+     * @param ep endpoint address to check
+     * @return Preferred IP for given endpoint if present, otherwise returns given ep
+     */
+    public static InetAddress getPreferredIP(InetAddress ep)
+    {
+        String req = "SELECT preferred_ip FROM system.%s WHERE peer=?";
+        UntypedResultSet result = executeInternal(String.format(req, PEERS), ep);
+        if (!result.isEmpty() && result.one().has("preferred_ip"))
+            return result.one().getInetAddress("preferred_ip");
+        return ep;
+    }
+
+    /**
+     * Return a map of IP addresses containing a map of dc and rack info
+     */
+    public static Map<InetAddress, Map<String,String>> loadDcRackInfo()
+    {
+        Map<InetAddress, Map<String, String>> result = new HashMap<>();
+        for (UntypedResultSet.Row row : executeInternal("SELECT peer, data_center, rack from system." + PEERS))
+        {
+            InetAddress peer = row.getInetAddress("peer");
+            if (row.has("data_center") && row.has("rack"))
+            {
+                Map<String, String> dcRack = new HashMap<>();
+                dcRack.put("data_center", row.getString("data_center"));
+                dcRack.put("rack", row.getString("rack"));
+                result.put(peer, dcRack);
+            }
+        }
+        return result;
+    }
+
+    /**
+     * One of three things will happen if you try to read the system keyspace:
+     * 1. files are present and you can read them: great
+     * 2. no files are there: great (new node is assumed)
+     * 3. files are present but you can't read them: bad
+     * @throws ConfigurationException
+     */
+    public static void checkHealth() throws ConfigurationException
+    {
+        Keyspace keyspace;
+        try
+        {
+            keyspace = Keyspace.open(NAME);
+        }
+        catch (AssertionError err)
+        {
+            // this happens when a user switches from OPP to RP.
+            ConfigurationException ex = new ConfigurationException("Could not read system keyspace!");
+            ex.initCause(err);
+            throw ex;
+        }
+        ColumnFamilyStore cfs = keyspace.getColumnFamilyStore(LOCAL);
+
+        String req = "SELECT cluster_name FROM system.%s WHERE key='%s'";
+        UntypedResultSet result = executeInternal(String.format(req, LOCAL, LOCAL));
+
+        if (result.isEmpty() || !result.one().has("cluster_name"))
+        {
+            // this is a brand new node
+            if (!cfs.getSSTables().isEmpty())
+                throw new ConfigurationException("Found system keyspace files, but they couldn't be loaded!");
+
+            // no system files.  this is a new node.
+            req = "INSERT INTO system.%s (key, cluster_name) VALUES ('%s', ?)";
+            executeInternal(String.format(req, LOCAL, LOCAL), DatabaseDescriptor.getClusterName());
+            return;
+        }
+
+        String savedClusterName = result.one().getString("cluster_name");
+        if (!DatabaseDescriptor.getClusterName().equals(savedClusterName))
+            throw new ConfigurationException("Saved cluster name " + savedClusterName + " != configured name " + DatabaseDescriptor.getClusterName());
+    }
+
+#endif
+    future<std::unordered_set<dht::token>> get_saved_tokens();
+#if 0
+
+    public static int incrementAndGetGeneration()
+    {
+        String req = "SELECT gossip_generation FROM system.%s WHERE key='%s'";
+        UntypedResultSet result = executeInternal(String.format(req, LOCAL, LOCAL));
+
+        int generation;
+        if (result.isEmpty() || !result.one().has("gossip_generation"))
+        {
+            // seconds-since-epoch isn't a foolproof new generation
+            // (where foolproof is "guaranteed to be larger than the last one seen at this ip address"),
+            // but it's as close as sanely possible
+            generation = (int) (System.currentTimeMillis() / 1000);
+        }
+        else
+        {
+            // Other nodes will ignore gossip messages about a node that have a lower generation than previously seen.
+            final int storedGeneration = result.one().getInt("gossip_generation") + 1;
+            final int now = (int) (System.currentTimeMillis() / 1000);
+            if (storedGeneration >= now)
+            {
+                logger.warn("Using stored Gossip Generation {} as it is greater than current system time {}.  See CASSANDRA-3654 if you experience problems",
+                            storedGeneration, now);
+                generation = storedGeneration;
+            }
+            else
+            {
+                generation = now;
+            }
+        }
+
+        req = "INSERT INTO system.%s (key, gossip_generation) VALUES ('%s', ?)";
+        executeInternal(String.format(req, LOCAL, LOCAL), generation);
+        forceBlockingFlush(LOCAL);
+
+        return generation;
+    }
+#endif

-future<int> increment_and_get_generation();
 bool bootstrap_complete();
 bool bootstrap_in_progress();
 bootstrap_state get_bootstrap_state();
--- a/dist/ami/build_ami.sh
+++ b/dist/ami/build_ami.sh
@@ -6,54 +6,22 @@ if [ ! -e dist/ami/build_ami.sh ]; then
 fi

 print_usage() {
-    echo "build_ami.sh --localrpm --unstable"
-    echo "  --localrpm  deploy locally built rpms"
-    echo "  --unstable  use unstable branch"
+    echo "build_ami.sh -l"
+    echo "  -l  deploy locally built rpms"
    exit 1
 }
 LOCALRPM=0
-while [ $# -gt 0 ]; do
-    case "$1" in
-        "--localrpm")
+while getopts lh OPT; do
+    case "$OPT" in
+        "l")
            LOCALRPM=1
-            INSTALL_ARGS="$INSTALL_ARGS --localrpm"
-            shift 1
            ;;
-        "--unstable")
-            INSTALL_ARGS="$INSTALL_ARGS --unstable"
-            shift 1
-            ;;
-        *)
+        "h")
            print_usage
            ;;
    esac
 done

-if [ $LOCALRPM -eq 1 ]; then
-    rm -rf build/*
-    sudo yum -y install git
-    if [ ! -f dist/ami/files/scylla-server.x86_64.rpm ]; then
-        dist/redhat/build_rpm.sh
-        cp build/rpmbuild/RPMS/x86_64/scylla-server-`cat build/SCYLLA-VERSION-FILE`-`cat build/SCYLLA-RELEASE-FILE`.*.x86_64.rpm dist/ami/files/scylla-server.x86_64.rpm
-    fi
-    if [ ! -f dist/ami/files/scylla-jmx.noarch.rpm ]; then
-        cd build
-        git clone --depth 1 https://github.com/scylladb/scylla-jmx.git
-        cd scylla-jmx
-        sh -x -e dist/redhat/build_rpm.sh $*
-        cd ../..
-        cp build/scylla-jmx/build/rpmbuild/RPMS/noarch/scylla-jmx-`cat build/scylla-jmx/build/SCYLLA-VERSION-FILE`-`cat build/scylla-jmx/build/SCYLLA-RELEASE-FILE`.*.noarch.rpm dist/ami/files/scylla-jmx.noarch.rpm
-    fi
-    if [ ! -f dist/ami/files/scylla-tools.noarch.rpm ]; then
-        cd build
-        git clone --depth 1 https://github.com/scylladb/scylla-tools-java.git
-        cd scylla-tools-java
-        sh -x -e dist/redhat/build_rpm.sh
-        cd ../..
-        cp build/scylla-tools-java/build/rpmbuild/RPMS/noarch/scylla-tools-`cat build/scylla-tools-java/build/SCYLLA-VERSION-FILE`-`cat build/scylla-tools-java/build/SCYLLA-RELEASE-FILE`.*.noarch.rpm dist/ami/files/scylla-tools.noarch.rpm
-    fi
-fi
-
 cd dist/ami

 if [ ! -f variables.json ]; then
@@ -69,4 +37,13 @@ if [ ! -d packer ]; then
    cd -
 fi

-packer/packer build -var-file=variables.json -var install_args="$INSTALL_ARGS" scylla.json
+echo "sudo yum remove -y abrt" > scylla_deploy.sh
+if [ $LOCALRPM = 0 ]; then
+    echo "sudo sh -x -e /home/centos/scylla_install_pkg" >> scylla_deploy.sh
+else
+    echo "sudo sh -x -e /home/centos/scylla_install_pkg -l /home/centos" >> scylla_deploy.sh
+fi
+echo "sudo sh -x -e /usr/lib/scylla/scylla_setup -a" >> scylla_deploy.sh
+
+chmod a+rx scylla_deploy.sh
+packer/packer build -var-file=variables.json scylla.json
--- a/dist/ami/build_ami_local.sh
+++ b/dist/ami/build_ami_local.sh
@@ -0,0 +1,31 @@
+#!/bin/sh -e
+
+if [ ! -e dist/ami/build_ami_local.sh ]; then
+    echo "run build_ami_local.sh in top of scylla dir"
+    exit 1
+fi
+
+rm -rf build/*
+sudo yum -y install git
+if [ ! -f dist/ami/files/scylla-server.x86_64.rpm ]; then
+    dist/redhat/build_rpm.sh
+    cp build/rpmbuild/RPMS/x86_64/scylla-server-`cat build/SCYLLA-VERSION-FILE`-`cat build/SCYLLA-RELEASE-FILE`.*.x86_64.rpm dist/ami/files/scylla-server.x86_64.rpm
+fi
+if [ ! -f dist/ami/files/scylla-jmx.noarch.rpm ]; then
+    cd build
+    git clone --depth 1 https://github.com/scylladb/scylla-jmx.git
+    cd scylla-jmx
+    sh -x -e dist/redhat/build_rpm.sh $*
+    cd ../..
+    cp build/scylla-jmx/build/rpmbuild/RPMS/noarch/scylla-jmx-`cat build/scylla-jmx/build/SCYLLA-VERSION-FILE`-`cat build/scylla-jmx/build/SCYLLA-RELEASE-FILE`.*.noarch.rpm dist/ami/files/scylla-jmx.noarch.rpm
+fi
+if [ ! -f dist/ami/files/scylla-tools.noarch.rpm ]; then
+    cd build
+    git clone --depth 1 https://github.com/scylladb/scylla-tools-java.git
+    cd scylla-tools-java
+    sh -x -e dist/redhat/build_rpm.sh
+    cd ../..
+    cp build/scylla-tools-java/build/rpmbuild/RPMS/noarch/scylla-tools-`cat build/scylla-tools-java/build/SCYLLA-VERSION-FILE`-`cat build/scylla-tools-java/build/SCYLLA-RELEASE-FILE`.*.noarch.rpm dist/ami/files/scylla-tools.noarch.rpm
+fi
+
+exec dist/ami/build_ami.sh -l
--- a/dist/ami/files/scylla-ami
+++ b/dist/ami/files/scylla-ami
--- a/dist/ami/scylla.json
+++ b/dist/ami/scylla.json
@@ -13,7 +13,7 @@
      "instance_type": "{{user `instance_type`}}",
      "ssh_username": "centos",
      "ssh_timeout": "5m",
-      "ami_name": "{{user `ami_prefix`}}scylla_{{isotime | clean_ami_name}}",
+      "ami_name": "scylla_{{isotime | clean_ami_name}}",
      "enhanced_networking": true,
      "launch_block_device_mappings": [
        {
@@ -71,9 +71,7 @@
    },
    {
      "type": "shell",
-      "inline": [
-         "sudo /home/centos/scylla-ami/scylla_install_ami {{ user `install_args` }}"
-       ]
+      "script": "scylla_deploy.sh"
    }
  ],
  "variables": {
@@ -83,8 +81,6 @@
    "security_group_id": "",
    "region": "",
    "associate_public_ip_address": "",
-    "instance_type": "",
-    "install_args": "",
-    "ami_prefix": ""
+    "instance_type": ""
  }
 }
--- a/dist/common/bin/scyllatop
+++ b/dist/common/bin/scyllatop
@@ -1,5 +0,0 @@
-#!/bin/sh -e
-#
-#  Copyright (C) 2016 ScyllaDB
-
-exec python /usr/lib/scylla/scyllatop/scyllatop.py $@
--- a/dist/common/collectd.d/scylla.conf
+++ b/dist/common/collectd.d/scylla.conf
@@ -1,9 +0,0 @@
-LoadPlugin network
-LoadPlugin unixsock
-<Plugin network>
-	Listen "127.0.0.1" "25826"
-</Plugin>
-<Plugin unixsock>
-	SocketFile "/var/run/collectd-unixsock"
-	SocketPerms "0666"
-</Plugin>
--- a/dist/common/scripts/scylla_coredump_setup
+++ b/dist/common/scripts/scylla_coredump_setup
@@ -3,19 +3,18 @@
 #  Copyright (C) 2015 ScyllaDB

 print_usage() {
-    echo "scylla_coredump_setup --dump-to-raiddir"
-    echo "  --dump-to-raiddir  store coredump to /var/lib/scylla"
+    echo "scylla_coredump_setup -s"
+    echo "  -s  store coredump to /var/lib/scylla"
    exit 1
 }

 SYMLINK=0
-while [ $# -gt 0 ]; do
-    case "$1" in
-        "--dump-to-raiddir")
+while getopts sh OPT; do
+    case "$OPT" in
+        "s")
            SYMLINK=1
-            shift 1
            ;;
-        *)
+        "h")
            print_usage
            ;;
    esac
--- a/dist/common/scripts/scylla_io_setup
+++ b/dist/common/scripts/scylla_io_setup
@@ -1,62 +0,0 @@
-#!/bin/sh
-
-is_ami() {
-    if [ "`dmidecode --string system-version | grep \.amazon`" != "" ] && \
-       [ "`curl http://169.254.169.254/latest/meta-data/ami-id | grep ami-`" != "" ]; then
-         echo 1
-    else
-         echo 0
-    fi
-}
-
-is_supported_instance_type() {
-    TYPE=`curl http://169.254.169.254/latest/meta-data/instance-type|cut -d . -f 1`
-    case $TYPE in
-        "m3"|"c3"|"i2") echo 1;;
-        *) echo 0;;
-    esac
-}
-
-is_developer_mode() {
-    echo $SCYLLA_ARGS|egrep -c "\-\-developer-mode(\s+|=)1"
-}
-
-if [ ! -f /etc/scylla/io_configured ] && [ `is_developer_mode` -eq 0 ]; then
-    if [ `is_ami` -eq 1 ]; then
-        SMP=`echo $SCYLLA_ARGS|sed -e "s/^.*smp\(\s\+\|=\)\([0-9]*\).*$/\2/"`
-        CPUSET=`echo $SCYLLA_ARGS|sed -e "s/^.*\(--cpuset\(\s\+\|=\)[0-9\-]*\).*$/\1/"`
-    fi
-    if [ `is_ami` -eq 1 ] && [ `is_supported_instance_type` -eq 1 ]; then
-        NR_CPU=`cat /proc/cpuinfo |grep processor|wc -l`
-        NR_DISKS=`curl http://169.254.169.254/latest/meta-data/block-device-mapping/|grep ephemeral|wc -l`
-
-        if [ "$SMP" != "" ]; then
-            NR_CPU=$SMP
-        fi
-        NR_SHARDS=$NR_CPU
-        if [ $NR_CPU -ge 8 ] && [ "$SET_NIC" = "no" ]; then
-            NR_SHARDS=$((NR_CPU - 1))
-        fi
-        if [ $NR_DISKS -lt 2 ]; then NR_DISKS=2; fi
-
-        NR_REQS=$((32 * $NR_DISKS / 2))
-
-        NR_IO_QUEUES=$NR_SHARDS
-        if [ $(($NR_REQS/$NR_IO_QUEUES)) -lt 4 ]; then
-            NR_IO_QUEUES=$(($NR_REQS / 4))
-        fi
-
-        NR_REQS=$(($(($NR_REQS / $NR_IO_QUEUES)) * $NR_IO_QUEUES))
-
-        echo "SCYLLA_IO=\"--num-io-queues $NR_IO_QUEUES --max-io-requests $NR_REQS\"" > /etc/scylla.d/io.conf
-    else
-        iotune --evaluation-directory /var/lib/scylla --format envfile --options-file /etc/scylla.d/io.conf $CPUSET
-        if [ $? -ne 0 ]; then
-            logger -p user.err "/var/lib/scylla did not pass validation tests, it may not be on XFS and/or has limited disk space."
-            logger -p user.err "This is a non-supported setup, and performance is expected to be very bad."
-            logger -p user.err "For better performance, placing your data on XFS-formatted directories is required."
-            logger -p user.err " To override this error, see the developer_mode configuration option."
-        fi
-    fi
-    touch /etc/scylla/io_configured
-fi
--- a/dist/common/scripts/scylla_ntp_setup
+++ b/dist/common/scripts/scylla_ntp_setup
@@ -3,18 +3,18 @@
 #  Copyright (C) 2015 ScyllaDB

 print_usage() {
-    echo "scylla_ntp_setup --subdomain centos"
-    echo "  --subdomain specify subdomain of pool.ntp.org (ex: centos, fedora or amazon)"
+    echo "scylla_ntp_setup -a"
+    echo "  -a  AMI instance mode"
    exit 1
 }

-while [ $# -gt 0 ]; do
-    case "$1" in
-        "--subdomain")
-            DOMAIN="$2"
-            shift 2
+AMI=0
+while getopts a OPT; do
+    case "$OPT" in
+        "a")
+            AMI=1
            ;;
-        *)
+        "h")
            print_usage
            ;;
    esac
@@ -28,8 +28,8 @@ if [ "$NAME" = "Ubuntu" ]; then
    service ntp start
 else
    yum install -y ntp ntpdate || true
-    if [ "$DOMAIN" != "" ]; then
-        sed -e "s#\..*\.pool\.ntp\.org#.$DOMAIN.pool.ntp.org#" /etc/ntp.conf > /tmp/ntp.conf
+    if [ $AMI -eq 1 ]; then
+        sed -e s#centos.pool.ntp.org#amazon.pool.ntp.org# /etc/ntp.conf > /tmp/ntp.conf
        mv /tmp/ntp.conf /etc/ntp.conf
    fi
    if [ "`systemctl is-active ntpd`" = "active" ]; then
--- a/dist/common/scripts/scylla_prepare
+++ b/dist/common/scripts/scylla_prepare
@@ -20,7 +20,7 @@ elif [ "$NETWORK_MODE" = "dpdk" ]; then
    done
 else # NETWORK_MODE = posix
    if [ "$SET_NIC" = "yes" ]; then
-        /usr/lib/scylla/posix_net_conf.sh $IFNAME
+        sudo sh /usr/lib/scylla/posix_net_conf.sh $IFNAME >/dev/null 2>&1 || true
    fi
 fi
 . /etc/os-release
--- a/dist/common/scripts/scylla_raid_setup
+++ b/dist/common/scripts/scylla_raid_setup
@@ -3,31 +3,28 @@
 #  Copyright (C) 2015 ScyllaDB

 print_usage() {
-    echo "scylla-raid-setup --disks /dev/hda,/dev/hdb... --raiddev /dev/md0 --update-fstab"
-    echo "  --disks	specify disks for RAID"
-    echo "  --raiddev	MD device name for RAID"
-    echo "  --update-fstab update /etc/fstab for RAID"
+    echo "scylla-raid-setup -d /dev/hda,/dev/hdb... -r /dev/md0 -u"
+    echo "  -d  specify disks for RAID"
+    echo "  -r  MD device name for RAID"
+    echo "  -u  update /etc/fstab for RAID"
    exit 1
 }

 RAID=/dev/md0
 FSTAB=0
-while [ $# -gt 0 ]; do
-    case "$1" in
-        "--disks")
-            DISKS=`echo "$2"|tr -s ',' ' '`
-            NR_DISK=$((`echo "$2"|grep , -o|wc -w` + 1))
-            shift 2
+while getopts d:r:uh OPT; do
+    case "$OPT" in
+        "d")
+            DISKS=`echo $OPTARG|tr -s ',' ' '`
+            NR_DISK=$((`echo $OPTARG|grep , -o|wc -w` + 1))
            ;;
-        "--raiddev")
-            RAID="$2"
-            shift 2
+        "r")
+            RAID=$OPTARG
            ;;
-        "--update-fstab")
+        "u")
            FSTAB=1
-            shift 1
            ;;
-        *)
+        "h")
            print_usage
            ;;
    esac
--- a/dist/common/scripts/scylla_selinux_setup
+++ b/dist/common/scripts/scylla_selinux_setup
@@ -1,13 +0,0 @@
-#!/bin/sh -e
-#
-#  Copyright (C) 2015 ScyllaDB
-
-. /etc/os-release
-
-if [ "$ID" != "ubuntu" ]; then
-    if [ "`sestatus | awk '{print $3}'`" != "disabled" ]; then
-        setenforce 0
-        sed -e "s/enforcing/disabled/" /etc/sysconfig/selinux > /tmp/selinux
-        mv /tmp/selinux /etc/sysconfig/
-    fi
-fi
--- a/dist/common/scripts/scylla_setup
+++ b/dist/common/scripts/scylla_setup
@@ -8,210 +8,65 @@ if [ "`id -u`" -ne 0 ]; then
 fi

 print_usage() {
-    echo "scylla_setup --disks /dev/hda,/dev/hdb... --nic eth0 --ntp-domain centos --ami --no-enable-service --no-selinux-setup --no-bootparam-setup --no-ntp-setup --no-raid-setup --no-coredump-setup --no-sysconfig-setup"
-    echo "  --disks			specify disks for RAID"
-    echo "  --nic				specify NIC"
-    echo "  --ntp-domain			specify NTP domain"
-    echo "  --ami				setup AMI instance"
-    echo "  --no-enable-service		skip enabling service"
-    echo "  --no-selinux-setup		skip selinux setup"
-    echo "  --no-bootparam-setup		skip bootparam setup"
-    echo "  --no-ntp-setup		skip ntp setup"
-    echo "  --no-raid-setup		skip raid setup"
-    echo "  --no-coredump-setup		skip coredump setup"
-    echo "  --no-sysconfig-setup		skip sysconfig setup"
+    echo "scylla_setup -d /dev/hda,/dev/hdb... -n eth0 -a"
+    echo "  -d  specify disks for RAID"
+    echo "  -n  specify NIC"
+    echo "  -a  setup AMI instance"
    exit 1
 }

-interactive_ask_service() {
-    echo $1
-    while true; do
-        echo -n "yes/no: "
-        read ans
-        case $ans in
-            "y" | "yes")
-                return 1
-                ;;
-            "n" | "no")
-                return 0
-                ;;
-        esac
-    done
-}
-
+NIC=eth0
+RAID=/dev/md0
 AMI=0
-ENABLE_SERVICE=1
-SELINUX_SETUP=1
-BOOTPARAM_SETUP=1
-NTP_SETUP=1
-RAID_SETUP=1
-COREDUMP_SETUP=1
-SYSCONFIG_SETUP=1
-
-if [ $# -ne 0 ]; then
-    INTERACTIVE=0
-else
-    INTERACTIVE=1
-fi
-
-while [ $# -gt 0 ]; do
-    case "$1" in
-        "--disks")
-            DISKS="$2"
-            shift 2
+while getopts d:n:al:h OPT; do
+    case "$OPT" in
+        "n")
+            NIC=$OPTARG
            ;;
-        "--nic")
-            NIC="$2"
-            shift 2
+        "d")
+            DISKS=$OPTARG
            ;;
-        "--ntp-domain")
-            NTP_DOMAIN="$2"
-            shift 2
-            ;;
-        "--ami")
+        "a")
            AMI=1
-            shift 1
            ;;
-        "--no-enable-service")
-            ENABLE_SERVICE=0
-            shift 1
-            ;;
-        "--no-selinux-setup")
-            SELINUX_SETUP=0
-            shift 1
-            ;;
-        "--no-bootparam-setup")
-            BOOTPARAM_SETUP=0
-            shift 1
-            ;;
-        "--no-ntp-setup")
-            NTP_SETUP=0
-            shift 1
-            ;;
-        "--no-raid-setup")
-            RAID_SETUP=0
-            shift 1
-            ;;
-        "--no-coredump-setup")
-            COREDUMP_SETUP=0
-            shift 1
-            ;;
-        "--no-sysconfig-setup")
-            SYSCONFIG_SETUP=0
-            shift 1
-            ;;
-        "-h" | "--help")
+        "h")
            print_usage
-            shift 1
            ;;
    esac
 done

-if [ $INTERACTIVE -eq 0 ] && [ $RAID_SETUP -eq 1 ] && [ "$DISKS" = "" ]; then
-    print_usage
-fi
-if [ $INTERACTIVE -eq 0 ] && [ $SYSCONFIG_SETUP -eq 1 ] && [ "$NIC" = "" ]; then
-    print_usage
-fi
-
 . /etc/os-release

-if [ $INTERACTIVE -eq 1 ]; then
-    interactive_ask_service "Do you want to enable ScyllaDB services?" &&:
-    ENABLE_SERVICE=$?
-fi
-if [ $ENABLE_SERVICE -eq 1 ]; then
-    if [ "$ID" = "fedora" ] || [ "$ID" = "centos" ]; then
-        systemctl enable scylla-io-setup.service
-        systemctl enable scylla-server.service
-        systemctl enable scylla-jmx.service
+if [ "$ID" != "ubuntu" ]; then
+    if [ "`sestatus | awk '{print $3}'`" != "disabled" ]; then
+        setenforce 0
+        sed -e "s/enforcing/disabled/" /etc/sysconfig/selinux > /tmp/selinux
+        mv /tmp/selinux /etc/sysconfig/
    fi
+    if [ $AMI -eq 1 ]; then
+        grep -v ' - mounts' /etc/cloud/cloud.cfg > /tmp/cloud.cfg
+        mv /tmp/cloud.cfg /etc/cloud/cloud.cfg
+        mv /home/centos/scylla-ami/scylla-ami-setup.service /usr/lib/systemd/system/
+        mv /home/centos/scylla-ami /usr/lib/scylla/scylla-ami
+        chmod a+rx /usr/lib/scylla/scylla-ami/ds2_configure.py
+        systemctl daemon-reload
+        systemctl enable scylla-ami-setup.service
+    fi
+    systemctl enable scylla-server.service
+    systemctl enable scylla-jmx.service
 fi
-
-if [ $INTERACTIVE -eq 1 ]; then
-    interactive_ask_service "Do you want to disable SELinux?" &&:
-    SELINUX_SETUP=$?
-fi
-if [ $SELINUX_SETUP -eq 1 ]; then
-    /usr/lib/scylla/scylla_selinux_setup
-fi
-
-if [ $INTERACTIVE -eq 1 ]; then
-    interactive_ask_service "Do you want to setup bootloader options?" &&:
-    BOOTPARAM_SETUP=$?
-fi
-if [ $BOOTPARAM_SETUP -eq 1 ]; then
+if [ $AMI -eq 0 ]; then
+    /usr/lib/scylla/scylla_ntp_setup
    /usr/lib/scylla/scylla_bootparam_setup
-fi
-
-if [ $INTERACTIVE -eq 1 ]; then
-    interactive_ask_service "Do you want to setup NTP?" &&:
-    NTP_SETUP=$?
-fi
-if [ $NTP_SETUP -eq 1 ]; then
-    if [ "$NTP_DOMAIN" != "" ]; then
-        /usr/lib/scylla/scylla_ntp_setup --subdomain $NTP_DOMAIN
-    else
-        /usr/lib/scylla/scylla_ntp_setup
-    fi
-fi
-
-if [ $INTERACTIVE -eq 1 ]; then
-    interactive_ask_service "Do you want to setup RAID?" &&:
-    RAID_SETUP=$?
-    if [ $RAID_SETUP -eq 1 ]; then
-        echo "Please select disks from following list: "
-        while true; do
-            lsblk -d -i -n -p -r|awk '{print $1}'|sed -e ':loop;N;$!b loop;s/\n/ /g'
-            echo "type 'done' to finish selection. selected: $DISKS"
-            echo -n "> "
-            read dsk
-            if [ "$dsk" = "done" ]; then
-                break
-            fi
-            if [ -e $dsk ]; then
-                if [ "$DISKS" = "" ]; then
-                    DISKS=$dsk
-                else
-                    DISKS="$DISKS,$dsk"
-                fi
-            else
-                echo "$dsk not found"
-            fi
-        done
-    fi
-fi
-if [ $RAID_SETUP -eq 1 ]; then
-    /usr/lib/scylla/scylla_raid_setup --disks $DISKS --update-fstab
-fi
-
-if [ $INTERACTIVE -eq 1 ]; then
-    interactive_ask_service "Do you want to setup coredump?" &&:
-    COREDUMP_SETUP=$?
-fi
-if [ $COREDUMP_SETUP -eq 1 ]; then
-    if [ "$DISKS" != "" ]; then
-        /usr/lib/scylla/scylla_coredump_setup --dump-to-raiddir
+    if [ $DISKS != "" ]; then
+        /usr/lib/scylla/scylla_raid_setup -d $DISKS -u
+        /usr/lib/scylla/scylla_coredump_setup -s
    else
        /usr/lib/scylla/scylla_coredump_setup
    fi
-fi
-
-if [ $INTERACTIVE -eq 1 ]; then
-    interactive_ask_service "Do you want to setup sysconfig?" &&:
-    SYSCONFIG_SETUP=$?
-    if [ $SYSCONFIG_SETUP -eq 1 ]; then
-        echo "Please select NIC from following list: "
-        while true; do
-            ls /sys/class/net
-            echo -n "> "
-            read NIC
-            if [ -e /sys/class/net/$NIC ]; then
-                break
-            fi
-        done
-    fi
-fi
-if [ $SYSCONFIG_SETUP -eq 1 ]; then
-    /usr/lib/scylla/scylla_sysconfig_setup --nic $NIC
+else
+    /usr/lib/scylla/scylla_coredump_setup -s
+    /usr/lib/scylla/scylla_ntp_setup -a
+    /usr/lib/scylla/scylla_bootparam_setup -a
+    /usr/lib/scylla/scylla_sysconfig_setup -n $NIC
 fi
--- a/dist/common/scripts/scylla_sysconfig_setup
+++ b/dist/common/scripts/scylla_sysconfig_setup
@@ -3,16 +3,17 @@
 #  Copyright (C) 2015 ScyllaDB

 print_usage() {
-    echo "scylla-sysconfig-setup --nic eth0 --mode posix --nr-hugepages 64 --user scylla --group scylla --homedir /var/lib/scylla --confdir /etc/scylla --setup-nic"
-    echo "  --nic  specify NIC"
-    echo "  --mode  network mode (posix, dpdk)"
-    echo "  --nr-hugepages  number of hugepages"
-    echo "  --user  user (dpdk requires root)"
-    echo "  --group  group (dpdk requires root)"
-    echo "  --homedir  scylla home directory"
-    echo "  --confdir  scylla config directory"
-    echo "  --setup-nic  setup NIC's interrupts, RPS, XPS"
-    echo "  --ami  AMI instance mode"
+    echo "scylla-sysconfig-setup -n eth0 -m posix -p 64 -u scylla -g scylla -r /var/lib/scylla -c /etc/scylla -N -a -k"
+    echo "  -n  specify NIC"
+    echo "  -m  network mode (posix, dpdk)"
+    echo "  -p  number of hugepages"
+    echo "  -u  user (dpdk requires root)"
+    echo "  -g  group (dpdk requires root)"
+    echo "  -r  scylla home directory"
+    echo "  -c  scylla config directory"
+    echo "  -N  setup NIC's interrupts, RPS, XPS"
+    echo "  -a  AMI instance mode"
+    echo "  -d  disk count"
    exit 1
 }

@@ -24,48 +25,40 @@ else
 fi
 . $SYSCONFIG/scylla-server

-if [ $# -eq 0 ]; then
-    print_usage
-fi
-while [ $# -gt 0 ]; do
-    case "$1" in
-        "--nic")
-            NIC="$2"
-            shift 2
+DISK_COUNT=0
+while getopts n:m:p:u:g:d:c:Nakh OPT; do
+    case "$OPT" in
+        "n")
+            NIC=$OPTARG
            ;;
-        "--mode")
-            NETWORK_MODE="$2"
-            shift 2
+        "m")
+            NETWORK_MODE=$OPTARG
            ;;
-        "--nr-hugepages")
-            NR_HUGEPAGES="$2"
-            shift 2
+        "p")
+            NR_HUGEPAGES=$OPTARG
            ;;
-        "--user")
-            USER="$2"
-            shift 2
+        "u")
+            USER=$OPTARG
            ;;
-        "--group")
-            GROUP="$2"
-            shift 2
+        "g")
+            GROUP=$OPTARG
            ;;
-        "--homedir")
-            SCYLLA_HOME="$2"
-            shift 2
+        "r")
+            SCYLLA_HOME=$OPTARG
            ;;
-        "--confdir")
-            SCYLLA_CONF="$2"
-            shift 2
+        "c")
+            SCYLLA_CONF=$OPTARG
            ;;
-        "--setup-nic")
+        "N")
            SETUP_NIC=1
-            shift 1
            ;;
-        "--ami")
+        "a")
            AMI=yes
-            shift 1
            ;;
-        *)
+        "d")
+            DISK_COUNT=$OPTARG
+            ;;
+        "h")
            print_usage
            ;;
    esac
@@ -76,11 +69,29 @@ echo Setting parameters on $SYSCONFIG/scylla-server
 ETHDRV=`/usr/lib/scylla/dpdk_nic_bind.py --status | grep if=$NIC | sed -e "s/^.*drv=//" -e "s/ .*$//"`
 ETHPCIID=`/usr/lib/scylla/dpdk_nic_bind.py --status | grep if=$NIC | awk '{print $1}'`
 NR_CPU=`cat /proc/cpuinfo |grep processor|wc -l`
-if [ "$AMI" = "yes" ] && [ $NR_CPU -ge 8 ] && [ "$SET_NIC" = "no" ]; then
-    NR=$((NR_CPU - 1))
+NR_SHARDS=$NR_CPU
+if [ $NR_CPU -ge 8 ] && [ "$SET_NIC" = "no" ]; then
+    NR_SHARDS=$((NR_CPU - 1))
    SET_NIC="yes"
-    SCYLLA_ARGS="$SCYLLA_ARGS --cpuset 1-$NR  --smp $NR"
+    SCYLLA_ARGS="$SCYLLA_ARGS --cpuset 1-$NR_SHARDS  --smp $NR_SHARDS"
 fi
+
+if [ "$AMI" = "yes" ] && [ $DISK_COUNT -gt 0 ]; then
+    NR_DISKS=$DISK_COUNT
+    if [ $NR_DISKS -lt 2 ]; then NR_DISKS=2; fi
+
+    NR_REQS=$((32 * $NR_DISKS / 2))
+
+    NR_IO_QUEUES=$NR_SHARDS
+    if [ $(($NR_REQS/$NR_IO_QUEUES)) -lt 4 ]; then
+        NR_IO_QUEUES=$(($NR_REQS / 4))
+    fi
+
+    NR_REQS=$(($(($NR_REQS / $NR_IO_QUEUES)) * $NR_IO_QUEUES))
+
+    SCYLLA_IO="$SCYLLA_IO --num-io-queues $NR_IO_QUEUES --max-io-requests $NR_REQS"
+fi
+
 sed -e s#^NETWORK_MODE=.*#NETWORK_MODE=$NETWORK_MODE# \
    -e s#^ETHDRV=.*#ETHDRV=$ETHDRV# \
    -e s#^ETHPCIID=.*#ETHPCIID=$ETHPCIID# \
@@ -91,6 +102,7 @@ sed -e s#^NETWORK_MODE=.*#NETWORK_MODE=$NETWORK_MODE# \
    -e s#^SCYLLA_CONF=.*#SCYLLA_CONF=$SCYLLA_CONF# \
    -e s#^SET_NIC=.*#SET_NIC=$SET_NIC# \
    -e "s#^SCYLLA_ARGS=.*#SCYLLA_ARGS=\"$SCYLLA_ARGS\"#" \
+    -e "s#^SCYLLA_IO=.*#SCYLLA_IO=\"$SCYLLA_IO\"#" \
    -e s#^AMI=.*#AMI=$AMI# \
    $SYSCONFIG/scylla-server > /tmp/scylla-server
 mv /tmp/scylla-server $SYSCONFIG/scylla-server
--- a/dist/common/scylla.d/io.conf
+++ b/dist/common/scylla.d/io.conf
@@ -1,4 +0,0 @@
-# DO NO EDIT
-# This file should be automatically configure by scylla-io-setup.service
-#
-# SEASTAR_IO="--max-io-requests=1 --num-io-queues=1"
--- a/dist/common/sudoers.d/scylla
+++ b/dist/common/sudoers.d/scylla
@@ -1 +1 @@
-scylla ALL=(ALL) NOPASSWD:SETENV: /usr/lib/scylla/scylla_prepare,/usr/lib/scylla/scylla_stop,/usr/lib/scylla/scylla_io_setup
+scylla ALL=(ALL) NOPASSWD:SETENV: /usr/lib/scylla/scylla_prepare,/usr/lib/scylla/scylla_stop
--- a/dist/common/sysconfig/scylla-server
+++ b/dist/common/sysconfig/scylla-server
@@ -40,5 +40,8 @@ SCYLLA_ARGS="--log-to-syslog 1 --log-to-stdout 0 --default-log-level info --coll
 ## scylla arguments (for dpdk mode)
 #SCYLLA_ARGS="--log-to-syslog 1 --log-to-stdout 0 --default-log-level info --collectd-address=127.0.0.1:25826 --collectd=1 --collectd-poll-period 3000 --network-stack native --dpdk-pmd"

+# scylla io
+SCYLLA_IO=
+
 # setup as AMI instance
 AMI=no
--- a/dist/redhat/build_rpm.sh
+++ b/dist/redhat/build_rpm.sh
@@ -1,19 +1,17 @@
 #!/bin/sh -e

-. /etc/os-release
 print_usage() {
-    echo "build_rpm.sh --rebuild-dep"
-    echo "  --rebuild-dep  rebuild dependency packages (CentOS)"
+    echo "build_rpm.sh -R"
+    echo "  -R  rebuild dependency packages (CentOS)"
    exit 1
 }
 REBUILD=0
-for OPT in "$@"; do
+while getopts Rh OPT; do
    case "$OPT" in
-        "--rebuild-dep")
+        "R")
            REBUILD=1
-            shift 1
            ;;
-        *)
+        "h")
            print_usage
            ;;
    esac
@@ -26,6 +24,7 @@ if [ ! -e dist/redhat/build_rpm.sh ]; then
    exit 1
 fi

+. /etc/os-release
 if [ "$ID" != "fedora" ] && [ "$ID" != "centos" ]; then
    echo "Unsupported distribution"
    exit 1
--- a/dist/redhat/scylla-server.spec.in
+++ b/dist/redhat/scylla-server.spec.in
@@ -11,7 +11,7 @@ Source0:        %{name}-@@VERSION@@-@@RELEASE@@.tar
 BuildRequires:  libaio-devel libstdc++-devel cryptopp-devel hwloc-devel numactl-devel libpciaccess-devel libxml2-devel zlib-devel thrift-devel yaml-cpp-devel lz4-devel snappy-devel jsoncpp-devel systemd-devel xz-devel openssl-devel libcap-devel libselinux-devel libgcrypt-devel libgpg-error-devel elfutils-devel krb5-devel libcom_err-devel libattr-devel pcre-devel elfutils-libelf-devel bzip2-devel keyutils-libs-devel xfsprogs-devel make gnutls-devel systemd-devel
 %{?fedora:BuildRequires: boost-devel ninja-build ragel antlr3-tool antlr3-C++-devel python3 gcc-c++ libasan libubsan python3-pyparsing}
 %{?rhel:BuildRequires: scylla-libstdc++-static scylla-boost-devel scylla-ninja-build scylla-ragel scylla-antlr3-tool scylla-antlr3-C++-devel python34 scylla-gcc-c++ >= 5.1.1, python34-pyparsing}
-Requires:       systemd-libs hwloc collectd
+Requires:       systemd-libs hwloc
 Conflicts:      abrt

 %description
@@ -26,12 +26,12 @@ Conflicts:      abrt

 %build
 %if 0%{?fedora}
-./configure.py --disable-xen --enable-dpdk --mode=release
+./configure.py --with scylla --disable-xen --enable-dpdk --mode=release
 %endif
 %if 0%{?rhel}
-python3.4 ./configure.py --disable-xen --enable-dpdk --mode=release --static-stdc++ --compiler=/opt/scylladb/bin/g++ --python python3.4
+python3.4 ./configure.py --with scylla --disable-xen --enable-dpdk --mode=release --static-stdc++ --compiler=/opt/scylladb/bin/g++ --python python3.4
 %endif
-ninja-build -j2 build/release/scylla build/release/iotune
+ninja-build -j2

 %install
 rm -rf $RPM_BUILD_ROOT
@@ -39,9 +39,7 @@ mkdir -p $RPM_BUILD_ROOT%{_bindir}
 mkdir -p $RPM_BUILD_ROOT%{_sysconfdir}/sysconfig/
 mkdir -p $RPM_BUILD_ROOT%{_sysconfdir}/security/limits.d/
 mkdir -p $RPM_BUILD_ROOT%{_sysconfdir}/sudoers.d/
-mkdir -p $RPM_BUILD_ROOT%{_sysconfdir}/collectd.d/
 mkdir -p $RPM_BUILD_ROOT%{_sysconfdir}/scylla/
-mkdir -p $RPM_BUILD_ROOT%{_sysconfdir}/scylla.d/
 mkdir -p $RPM_BUILD_ROOT%{_docdir}/scylla/
 mkdir -p $RPM_BUILD_ROOT%{_unitdir}
 mkdir -p $RPM_BUILD_ROOT%{_prefix}/lib/scylla/
@@ -49,18 +47,14 @@ mkdir -p $RPM_BUILD_ROOT%{_prefix}/lib/scylla/
 install -m644 dist/common/sysconfig/scylla-server $RPM_BUILD_ROOT%{_sysconfdir}/sysconfig/
 install -m644 dist/common/limits.d/scylla.conf $RPM_BUILD_ROOT%{_sysconfdir}/security/limits.d/
 install -m644 dist/common/sudoers.d/scylla $RPM_BUILD_ROOT%{_sysconfdir}/sudoers.d/
-install -m644 dist/common/collectd.d/scylla.conf $RPM_BUILD_ROOT%{_sysconfdir}/collectd.d/
-install -m644 dist/common/scylla.d/*.conf $RPM_BUILD_ROOT%{_sysconfdir}/scylla.d/
 install -d -m755 $RPM_BUILD_ROOT%{_sysconfdir}/scylla
 install -m644 conf/scylla.yaml $RPM_BUILD_ROOT%{_sysconfdir}/scylla/
 install -m644 conf/cassandra-rackdc.properties $RPM_BUILD_ROOT%{_sysconfdir}/scylla/
-install -m644 dist/redhat/systemd/*.service $RPM_BUILD_ROOT%{_unitdir}/
+install -m644 dist/redhat/systemd/scylla-server.service $RPM_BUILD_ROOT%{_unitdir}/
 install -m755 dist/common/scripts/* $RPM_BUILD_ROOT%{_prefix}/lib/scylla/
 install -m755 seastar/scripts/posix_net_conf.sh  $RPM_BUILD_ROOT%{_prefix}/lib/scylla/
 install -m755 seastar/dpdk/tools/dpdk_nic_bind.py $RPM_BUILD_ROOT%{_prefix}/lib/scylla/
 install -m755 build/release/scylla $RPM_BUILD_ROOT%{_bindir}
-install -m755 build/release/iotune $RPM_BUILD_ROOT%{_bindir}
-install -m755 dist/common/bin/scyllatop $RPM_BUILD_ROOT%{_bindir}
 install -d -m755 $RPM_BUILD_ROOT%{_docdir}/scylla
 install -m644 README.md $RPM_BUILD_ROOT%{_docdir}/scylla/
 install -m644 README-DPDK.md $RPM_BUILD_ROOT%{_docdir}/scylla/
@@ -76,7 +70,6 @@ install -d -m755 $RPM_BUILD_ROOT%{_prefix}/lib/scylla/swagger-ui
 cp -r swagger-ui/dist $RPM_BUILD_ROOT%{_prefix}/lib/scylla/swagger-ui
 install -d -m755 $RPM_BUILD_ROOT%{_prefix}/lib/scylla/api
 cp -r api/api-doc $RPM_BUILD_ROOT%{_prefix}/lib/scylla/api
-cp -r tools/scyllatop $RPM_BUILD_ROOT%{_prefix}/lib/scylla/scyllatop

 %pre
 /usr/sbin/groupadd scylla 2> /dev/null || :
@@ -113,11 +106,9 @@ if [ -f /etc/systemd/coredump.conf ];then
    /usr/lib/scylla/scylla_coredump_setup
 fi
 %systemd_post scylla-server.service
-%systemd_post scylla-io-setup.service

 %preun
 %systemd_preun scylla-server.service
-%systemd_preun scylla-io-setup.service

 %postun
 %systemd_postun
@@ -128,7 +119,6 @@ if  [ -d /tmp/%{name}-%{version}-%{release} ]; then
    rm -rf /tmp/%{name}-%{version}-%{release}/
 fi
 ln -sfT /etc/scylla /var/lib/scylla/conf
-systemctl restart collectd

 %clean
 rm -rf $RPM_BUILD_ROOT
@@ -139,22 +129,16 @@ rm -rf $RPM_BUILD_ROOT
 %config(noreplace) %{_sysconfdir}/sysconfig/scylla-server
 %{_sysconfdir}/security/limits.d/scylla.conf
 %{_sysconfdir}/sudoers.d/scylla
-%config(noreplace) %{_sysconfdir}/collectd.d/scylla.conf
 %attr(0755,root,root) %dir %{_sysconfdir}/scylla
 %config(noreplace) %{_sysconfdir}/scylla/scylla.yaml
 %config(noreplace) %{_sysconfdir}/scylla/cassandra-rackdc.properties
-%attr(0755,root,root) %dir %{_sysconfdir}/scylla.d
-%config(noreplace) %{_sysconfdir}/scylla.d/*.conf
 %{_docdir}/scylla/README.md
 %{_docdir}/scylla/README-DPDK.md
 %{_docdir}/scylla/NOTICE.txt
 %{_docdir}/scylla/ORIGIN
 %{_docdir}/scylla/licenses/
 %{_unitdir}/scylla-server.service
-%{_unitdir}/scylla-io-setup.service
 %{_bindir}/scylla
-%{_bindir}/iotune
-%{_bindir}/scyllatop
 %{_prefix}/lib/scylla/scylla_prepare
 %{_prefix}/lib/scylla/scylla_stop
 %{_prefix}/lib/scylla/scylla_setup
@@ -163,15 +147,12 @@ rm -rf $RPM_BUILD_ROOT
 %{_prefix}/lib/scylla/scylla_sysconfig_setup
 %{_prefix}/lib/scylla/scylla_bootparam_setup
 %{_prefix}/lib/scylla/scylla_ntp_setup
-%{_prefix}/lib/scylla/scylla_selinux_setup
-%{_prefix}/lib/scylla/scylla_io_setup
 %{_prefix}/lib/scylla/posix_net_conf.sh
 %{_prefix}/lib/scylla/dpdk_nic_bind.py
 %{_prefix}/lib/scylla/dpdk_nic_bind.pyc
 %{_prefix}/lib/scylla/dpdk_nic_bind.pyo
 %{_prefix}/lib/scylla/swagger-ui/dist/*
 %{_prefix}/lib/scylla/api/api-doc/*
-%{_prefix}/lib/scylla/scyllatop/*
 %attr(0755,scylla,scylla) %dir %{_sharedstatedir}/scylla/
 %attr(0755,scylla,scylla) %dir %{_sharedstatedir}/scylla/data
 %attr(0755,scylla,scylla) %dir %{_sharedstatedir}/scylla/commitlog
--- a/dist/redhat/systemd/scylla-io-setup.service
+++ b/dist/redhat/systemd/scylla-io-setup.service
@@ -1,10 +0,0 @@
-[Unit]
-Description=Scylla IO Setup
-After=network.target
-
-[Service]
-Type=oneshot
-EnvironmentFile=/etc/sysconfig/scylla-server
-ExecStart=/usr/lib/scylla/scylla_io_setup
-RemainAfterExit=yes
-TimeoutStartSec=1800
--- a/dist/redhat/systemd/scylla-server.service
+++ b/dist/redhat/systemd/scylla-server.service
@@ -1,7 +1,6 @@
 [Unit]
 Description=Scylla Server
-After=scylla-io-setup.service
-Requires=scylla-io-setup.service
+After=network.target

 [Service]
 Type=notify
@@ -12,13 +11,12 @@ LimitNPROC=8096
 WorkingDirectory=/var/lib/scylla
 Environment="HOME=/var/lib/scylla"
 EnvironmentFile=/etc/sysconfig/scylla-server
-EnvironmentFile=/etc/scylla.d/*.conf
 ExecStartPre=/usr/bin/sudo -E /usr/lib/scylla/scylla_prepare
 ExecStart=/usr/bin/scylla $SCYLLA_ARGS $SCYLLA_IO
 ExecStopPost=/usr/bin/sudo -E /usr/lib/scylla/scylla_stop
 TimeoutStartSec=900
 KillMode=process
-Restart=on-abnormal
+Restart=no
 User=scylla

 [Install]
--- a/dist/ubuntu/build_deb.sh
+++ b/dist/ubuntu/build_deb.sh
@@ -49,8 +49,8 @@ if [ "$RELEASE" = "15.10" ]; then
    sed -i -e "s/@@COMPILER@@/g++/g" debian/rules
    sed -i -e "s/@@COMPILER@@/g++/g" debian/control
 else
-    sed -i -e "s/@@COMPILER@@/g++-5/g" debian/rules
-    sed -i -e "s/@@COMPILER@@/g++-5/g" debian/control
+    sed -i -e "s/@@COMPILER@@/g++-4.9/g" debian/rules
+    sed -i -e "s/@@COMPILER@@/g++-4.9/g" debian/control
 fi


@@ -59,8 +59,8 @@ fi
 if [ "$RELEASE" != "15.10" ]; then
    sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test
    sudo apt-get -y update
+    sudo apt-get -y install g++-4.9
 fi
-sudo apt-get -y install g++-5
 echo Y | sudo mk-build-deps -i -r

 debuild -r fakeroot -us -uc
--- a/dist/ubuntu/control.in
+++ b/dist/ubuntu/control.in
@@ -8,7 +8,7 @@ Build-Depends: debhelper (>= 9), libyaml-cpp-dev, liblz4-dev, libsnappy-dev, lib

 Package: scylla-server
 Architecture: amd64
-Depends: ${shlibs:Depends}, ${misc:Depends}, hugepages, adduser, hwloc-nox, collectd
+Depends: ${shlibs:Depends}, ${misc:Depends}, hugepages, adduser, hwloc-nox
 Description: Scylla database server binaries 
 Scylla is a highly scalable, eventually consistent, distributed,
 partitioned row DB.
--- a/dist/ubuntu/debian/scylla-server.init
+++ b/dist/ubuntu/debian/scylla-server.init
@@ -29,8 +29,8 @@ SCRIPTNAME=/etc/init.d/$NAME
 [ -x "$DAEMON" ] || exit 0

 # Read configuration variable file if it is present
-eval "`grep -v -e "^\s*#" -e "^$" /etc/default/scylla-server|sed -e 's/^/export /'`"
-eval "`grep -v -e "^\s*#" -e "^$" /etc/scylla.d/*.conf|sed -e 's/^/export /'`"
+[ -r /etc/default/$NAME ] && . /etc/default/$NAME
+export NETWORK_MODE TAP BRIDGE ETHDRV ETHPCIID NR_HUGEPAGES USER GROUP SCYLLA_HOME SCYLLA_CONF SCYLLA_ARGS

 # Define LSB log_* functions.
 . /lib/lsb/init-functions
@@ -38,7 +38,6 @@ eval "`grep -v -e "^\s*#" -e "^$" /etc/scylla.d/*.conf|sed -e 's/^/export /'`"
 do_start()
 {
 	/usr/lib/scylla/scylla_prepare	
-        /usr/lib/scylla/scylla_io_setup
 	# Return
 	#   0 if daemon has been started
 	#   1 if daemon was already running
--- a/Show More
+++ b/Show More