lsa: disable constant_time_size in binomial_heap implementation

Corrupts heap on boost < 1.60, and not needed. Fixes #698.
dist: host gcc-5.1.1-4.fc22.src.rpm on our S3 account, since Fedora mirror deleted it
2015-12-29 15:12:30 +02:00 · 2015-12-17 12:53:19 +02:00 · 2015-12-16 11:46:14 +02:00 · 2015-12-09 18:26:22 +02:00 · 2015-12-09 18:23:47 +02:00 · 2015-12-09 14:19:43 +02:00
374 changed files with 20545 additions and 6153 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -4,3 +4,4 @@
 build
 build.ninja
 cscope.*
+/debian/
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,6 +1,6 @@
 [submodule "seastar"]
 	path = seastar
-	url = ../seastar
+	url = ../scylla-seastar
 	ignore = dirty
 [submodule "swagger-ui"]
 	path = swagger-ui
--- a/76
+++ b/76
@@ -1 +1,77 @@
 http://git-wip-us.apache.org/repos/asf/cassandra.git trunk (bf599fb5b062cbcc652da78b7d699e7a01b949ad)
+
+import = bf599fb5b062cbcc652da78b7d699e7a01b949ad
+Y      = Already in scylla
+
+$ git log --oneline import..cassandra-2.1.11 -- gms/
+Y  484e645 Mark node as dead even if already left
+   d0c166f Add trampled commit back
+   ba5837e Merge branch 'cassandra-2.0' into cassandra-2.1
+   718e47f Forgot a damn c/r
+   a7282e4 Merge branch 'cassandra-2.0' into cassandra-2.1
+Y  ae4cd69 Print versions for gossip states in gossipinfo.
+Y  7fba3d2 Don't mark nodes down before the max local pause interval once paused.
+   c2142e6 Merge branch 'cassandra-2.0' into cassandra-2.1
+   ba9a69e checkForEndpointCollision fails for legitimate collisions, finalized list of statuses and nits, CASSANDRA-9765
+   54470a2 checkForEndpointCollision fails for legitimate collisions, improved version after CR, CASSANDRA-9765
+   2c9b490 checkForEndpointCollision fails for legitimate collisions, CASSANDRA-9765
+   4c15970 Merge branch 'cassandra-2.0' into cassandra-2.1
+   ad8047a ArrivalWindow should use primitives
+Y  4012134 Failure detector detects and ignores local pauses
+   9bcdd0f Merge branch 'cassandra-2.0' into cassandra-2.1
+   cefaa4e Close incoming connections when MessagingService is stopped
+   ea1beda Merge branch 'cassandra-2.0' into cassandra-2.1
+   08dbbd6 Ignore gossip SYNs after shutdown
+   3c17ac6 Merge branch 'cassandra-2.0' into cassandra-2.1
+   a64bc43 lists work better when you initialize them
+   543a899 change list to arraylist
+   730d4d4 Merge branch 'cassandra-2.0' into cassandra-2.1
+   e3e2de0 change list to arraylist
+   f7884c5 Merge branch 'cassandra-2.0' into cassandra-2.1
+Y  84b2846 remove redundant state
+   4f2c372 Merge branch 'cassandra-2.0' into cassandra-2.1
+Y  b2c62bb Add shutdown gossip state to prevent timeouts during rolling restarts
+Y  def4835 Add missing follow on fix for 7816 only applied to cassandra-2.1 branch in 763130bdbde2f4cec2e8973bcd5203caf51cc89f
+Y  763130b Followup commit for 7816
+   1376b8e Merge branch 'cassandra-2.0' into cassandra-2.1
+Y  2199a87 Fix duplicate up/down messages sent to native clients
+   136042e Merge branch 'cassandra-2.0' into cassandra-2.1
+Y  eb9c5bb Improve FD logging when the arrival time is ignored.
+
+$ git log --oneline import..cassandra-2.1.11 -- service/StorageService.java
+   92c5787 Keep StorageServiceMBean interface stable
+   6039d0e Fix DC and Rack in nodetool info
+   a2f0da0 Merge branch 'cassandra-2.0' into cassandra-2.1
+   c4de752 Follow-up to CASSANDRA-10238
+   e889ee4 2i key cache load fails
+   4b1d59e Merge branch 'cassandra-2.0' into cassandra-2.1
+   257cdaa Fix consolidating racks violating the RF contract
+Y  27754c0 refuse to decomission if not in state NORMAL patch by Jan Karlsson and Stefania for CASSANDRA-8741
+Y  5bc56c3 refuse to decomission if not in state NORMAL patch by Jan Karlsson and Stefania for CASSANDRA-8741
+Y  8f9ca07 Cannot replace token does not exist - DN node removed as Fat Client
+   c2142e6 Merge branch 'cassandra-2.0' into cassandra-2.1
+   54470a2 checkForEndpointCollision fails for legitimate collisions, improved version after CR, CASSANDRA-9765
+   1eccced Handle corrupt files on startup
+   2c9b490 checkForEndpointCollision fails for legitimate collisions, CASSANDRA-9765
+   c4b5260 Merge branch 'cassandra-2.0' into cassandra-2.1
+Y  52dbc3f Can't transition from write survey to normal mode
+   9966419 Make rebuild only run one at a time
+   d693ca1 Merge branch 'cassandra-2.0' into cassandra-2.1
+   be9eff5 Add option to not validate atoms during scrub
+   2a4daaf followup fix for 8564
+   93478ab Wait for anticompaction to finish
+   9e9846e Fix for harmless exceptions being logged as ERROR
+   6d06f32 Fix anticompaction blocking ANTI_ENTROPY stage
+   4f2c372 Merge branch 'cassandra-2.0' into cassandra-2.1
+Y  b2c62bb Add shutdown gossip state to prevent timeouts during rolling restarts
+Y  cba1b68 Fix failed bootstrap/replace attempts being persisted in system.peers
+   f59df28 Allow takeColumnFamilySnapshot to take a list of tables patch by Sachin Jarin; reviewed by Nick Bailey for CASSANDRA-8348
+Y  ac46747 Fix failed bootstrap/replace attempts being persisted in system.peers
+   5abab57 Merge branch 'cassandra-2.0' into cassandra-2.1
+   0ff9c3c Allow reusing snapshot tags across different column families.
+   f9c57a5 Merge branch 'cassandra-2.0' into cassandra-2.1
+Y  b296c55 Fix MOVED_NODE client event
+   bbb3fc7 Merge branch 'cassandra-2.0' into cassandra-2.1
+   37eb2a0 Fix NPE in nodetool getendpoints with bad ks/cf
+   f8b43d4 Merge branch 'cassandra-2.0' into cassandra-2.1
+   e20810c Remove C* specific class from JMX API
--- a/README.md
+++ b/README.md
@@ -11,13 +11,37 @@ git submodule init
 git submodule update --recursive
 ```

-### Building scylla on Fedora
-Installing required packages:
+### Building and Running Scylla on Fedora
+* Installing required packages:

 ```
 sudo yum install yaml-cpp-devel lz4-devel zlib-devel snappy-devel jsoncpp-devel thrift-devel antlr3-tool antlr3-C++-devel libasan libubsan
 ```

+* Build Scylla
+```
+./configure.py --mode=release --with=scylla --disable-xen
+ninja build/release/scylla -j2 # you can use more cpus if you have tons of RAM
+
+```
+
+* Run Scylla
+```
+./build/release/scylla
+
+```
+
+* run Scylla with one CPU and ./tmp as data directory
+
+```
+./build/release/scylla --datadir tmp --commitlog-directory tmp --smp 1
+```
+
+* For more run options:
+```
+./build/release/scylla --help
+```
+
 ## Building Fedora RPM

 As a pre-requisite, you need to install [Mock](https://fedoraproject.org/wiki/Mock) on your machine:
@@ -56,5 +80,17 @@ docker build -t <image-name> .
 Run the image with:

 ```
-docker run -i -t <image name>
+docker run -p $(hostname -i):9042:9042 -i -t <image name>
 ```
+
+
+## Contributing to Scylla
+
+Do not send pull requests.
+
+Send patches to the mailing list address scylladb-dev@googlegroups.com.
+Be sure to subscribe.
+
+In order for your patches to be merged, you must sign the Contributor's
+License Agreement, protecting your rights and ours.  See
+http://www.scylladb.com/opensource/cla/.
--- a/2
+++ b/2
@@ -1,6 +1,6 @@
 #!/bin/sh

-VERSION=0.10
+VERSION=0.13.2

 if test -f version
 then
--- a/api/api-doc/column_family.json
+++ b/api/api-doc/column_family.json
@@ -579,30 +579,6 @@
            }
         ]
      },
-      {
-         "path":"/column_family/sstables/snapshots_size/{name}",
-         "operations":[
-            {
-               "method":"GET",
-               "summary":"the size of SSTables in 'snapshots' subdirectory which aren't live anymore",
-               "type":"double",
-               "nickname":"true_snapshots_size",
-               "produces":[
-                  "application/json"
-               ],
-               "parameters":[
-                  {
-                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
-                     "required":true,
-                     "allowMultiple":false,
-                     "type":"string",
-                     "paramType":"path"
-                  }
-               ]
-            }
-         ]
-      },
      {
         "path":"/column_family/metrics/memtable_columns_count/{name}",
         "operations":[
@@ -2041,7 +2017,7 @@
         ]
      },
      {
-         "path":"/column_family/metrics/true_snapshots_size/{name}",
+         "path":"/column_family/metrics/snapshots_size/{name}",
         "operations":[
            {
               "method":"GET",
--- a/api/api-doc/compaction_manager.json
+++ b/api/api-doc/compaction_manager.json
@@ -15,7 +15,7 @@
               "summary":"get List of running compactions",
               "type":"array",
               "items":{
-                  "type":"jsonmap"
+                  "type":"summary"
               },
               "nickname":"get_compactions",
               "produces":[
@@ -27,16 +27,35 @@
         ]
      },
      {
-         "path":"/compaction_manager/compaction_summary",
+         "path":"/compaction_manager/compaction_history",
         "operations":[
            {
               "method":"GET",
-               "summary":"get compaction summary",
+               "summary":"get List of the compaction history",
               "type":"array",
               "items":{
-                  "type":"string"
+                  "type":"history"
               },
-               "nickname":"get_compaction_summary",
+               "nickname":"get_compaction_history",
+               "produces":[
+                  "application/json"
+               ],
+               "parameters":[
+               ]
+            }
+         ]
+      },
+      {
+         "path":"/compaction_manager/compaction_info",
+         "operations":[
+            {
+               "method":"GET",
+               "summary":"get a list of all active compaction info",
+               "type":"array",
+               "items":{
+                  "type":"compaction_info"
+               },
+               "nickname":"get_compaction_info",
               "produces":[
                  "application/json"
               ],
@@ -155,32 +174,112 @@
    }
   ],
   "models":{
-      "mapper":{
-         "id":"mapper",
-         "description":"A key value mapping",
+      "row_merged":{
+         "id":"row_merged",
+         "description":"A row merged information",
         "properties":{
            "key":{
-               "type":"string",
-               "description":"The key"
+               "type":"int",
+               "description":"The number of sstable"
            },
            "value":{
-               "type":"string",
-               "description":"The value"
+               "type":"long",
+               "description":"The number or row compacted"
            }
         }
      },
-      "jsonmap":{
-         "id":"jsonmap",
-         "description":"A json representation of a map as a list of key value",
+      "compaction_info" :{
+          "id": "compaction_info",
+          "description":"A key value mapping",
+          "properties":{
+            "operation_type":{
+               "type":"string",
+               "description":"The operation type"
+            },
+            "completed":{
+               "type":"long",
+               "description":"The current completed"
+            },
+            "total":{
+               "type":"long",
+               "description":"The total to compact"
+            },
+            "unit":{
+               "type":"string",
+               "description":"The compacted unit"
+            }
+          }
+      },
+      "summary":{
+         "id":"summary",
+         "description":"A compaction summary object",
         "properties":{
-            "value":{
-               "type":"array",
-               "items":{
-                  "type":"mapper"
-               },
-               "description":"A list of key, value mapping"
+            "id":{
+               "type":"string",
+               "description":"The UUID"
+            },
+            "ks":{
+               "type":"string",
+               "description":"The keyspace name"
+            },
+            "cf":{
+               "type":"string",
+               "description":"The column family name"
+            },
+            "completed":{
+               "type":"long",
+               "description":"The number of units completed"
+            },
+            "total":{
+               "type":"long",
+               "description":"The total number of units"
+            },
+            "task_type":{
+               "type":"string",
+               "description":"The task compaction type"
+            },
+            "unit":{
+               "type":"string",
+               "description":"The units being used"
            }
         }
+      },
+      "history": {
+      "id":"history",
+      "description":"Compaction history information",
+      "properties":{
+            "id":{
+               "type":"string",
+               "description":"The UUID"
+            },
+            "cf":{
+               "type":"string",
+               "description":"The column family name"
+            },
+            "ks":{
+               "type":"string",
+               "description":"The keyspace name"
+            },
+            "compacted_at":{
+               "type":"long",
+               "description":"The time of compaction"
+            },
+            "bytes_in":{
+               "type":"long",
+               "description":"Bytes in"
+            },
+            "bytes_out":{
+               "type":"long",
+               "description":"Bytes out"
+            },
+            "rows_merged":{
+               "type":"array",
+               "items":{
+                  "type":"row_merged"
+               },
+               "description":"The merged rows"
+            }
+        }
      }
   }
-}
+}
--- a/api/api-doc/failure_detector.json
+++ b/api/api-doc/failure_detector.json
@@ -48,7 +48,10 @@
            {
               "method":"GET",
               "summary":"Get all endpoint states",
-               "type":"string",
+               "type":"array",
+               "items":{
+                  "type":"endpoint_state"
+               },
               "nickname":"get_all_endpoint_states",
               "produces":[
                  "application/json"
@@ -148,6 +151,53 @@
                    "description": "The value"
                }
            }
+        },
+        "endpoint_state": {
+           "id": "states",
+           "description": "Holds an endpoint state",
+               "properties": {
+                "addrs": {
+                    "type": "string",
+                    "description": "The endpoint address"
+                },
+                "generation": {
+                    "type": "int",
+                    "description": "The heart beat generation"
+                },
+                "version": {
+                    "type": "int",
+                    "description": "The heart beat version"
+                },
+                "update_time": {
+                    "type": "long",
+                    "description": "The update timestamp"
+                },
+                "is_alive": {
+                    "type": "boolean",
+                    "description": "Is the endpoint alive"
+                },
+                "application_state" : {
+                    "type":"array",
+                    "items":{
+                        "type":"version_value"
+                    },
+                    "description": "Is the endpoint alive"
+                }
+            }
+        },
+        "version_value": {
+           "id": "version_value",
+           "description": "Holds a version value for an application state",
+               "properties": {
+                "application_state": {
+                    "type": "int",
+                    "description": "The application state enum index"
+                },
+                "value": {
+                    "type": "string",
+                    "description": "The version value"
+                }
+            }
        }
    }
 }
--- a/api/api-doc/messaging_service.json
+++ b/api/api-doc/messaging_service.json
@@ -8,13 +8,16 @@
   ],
   "apis":[
      {
-         "path":"/messaging_service/totaltimeouts",
+         "path":"/messaging_service/messages/timeout",
         "operations":[
            {
               "method":"GET",
-               "summary":"Total number of timeouts happened on this node",
-               "type":"long",
-               "nickname":"get_totaltimeouts",
+               "summary":"Get the number of timeout messages",
+               "type":"array",
+               "items":{
+                  "type":"message_counter"
+               },
+               "nickname":"get_timeout_messages",
               "produces":[
                  "application/json"
               ],
@@ -25,7 +28,7 @@
         ]
      },
      {
-         "path":"/messaging_service/messages/dropped",
+         "path":"/messaging_service/messages/dropped_by_ver",
         "operations":[
            {
               "method":"GET",
@@ -34,6 +37,25 @@
               "items":{
                  "type":"verb_counter"
               },
+               "nickname":"get_dropped_messages_by_ver",
+               "produces":[
+                  "application/json"
+               ],
+               "parameters":[
+               ]
+            }
+         ]
+      },
+      {
+         "path":"/messaging_service/messages/dropped",
+         "operations":[
+            {
+               "method":"GET",
+               "summary":"Get the number of messages that were dropped before sending",
+               "type":"array",
+               "items":{
+                  "type":"message_counter"
+               },
               "nickname":"get_dropped_messages",
               "produces":[
                  "application/json"
@@ -143,6 +165,49 @@
               ]
            }
         ]
+      },
+      {
+         "path":"/messaging_service/messages/respond_completed",
+         "operations":[
+            {
+               "method":"GET",
+               "summary":"Get the number of completed respond messages",
+               "type":"array",
+               "items":{
+                  "type":"message_counter"
+               },
+               "nickname":"get_respond_completed_messages",
+               "produces":[
+                  "application/json"
+               ],
+               "parameters":[
+               ]
+            }
+         ]
+      },
+      {
+         "path":"/messaging_service/version",
+         "operations":[
+            {
+               "method":"GET",
+               "summary":"Get the version number",
+               "type":"int",
+               "nickname":"get_version",
+               "produces":[
+                  "application/json"
+               ],
+               "parameters":[
+                  {
+                     "name":"addr",
+                     "description":"Address",
+                     "required":true,
+                     "allowMultiple":false,
+                     "type":"string",
+                     "paramType":"query"
+                  }
+               ]
+            }
+         ]
      }
   ],
   "models":{
@@ -150,10 +215,10 @@
         "id":"message_counter",
         "description":"Holds command counters",
         "properties":{
-            "count":{
+            "value":{
               "type":"long"
            },
-            "ip":{
+            "key":{
               "type":"string"
            }
         }
--- a/api/api-doc/storage_proxy.json
+++ b/api/api-doc/storage_proxy.json
@@ -193,8 +193,8 @@
         "operations":[
            {
               "method":"GET",
-               "summary":"Get the RPC timeout",
-               "type":"long",
+               "summary":"Get the RPC timeout in seconds",
+               "type":"double",
               "nickname":"get_rpc_timeout",
               "produces":[
                  "application/json"
@@ -214,10 +214,10 @@
               "parameters":[
                  {
                     "name":"timeout",
-                     "description":"Timeout in millis",
+                     "description":"Timeout in seconds",
                     "required":true,
                     "allowMultiple":false,
-                     "type":"long",
+                     "type":"double",
                     "paramType":"query"
                  }
               ]
@@ -229,8 +229,8 @@
         "operations":[
            {
               "method":"GET",
-               "summary":"Get the read RPC timeout",
-               "type":"long",
+               "summary":"Get the read RPC timeout in seconds",
+               "type":"double",
               "nickname":"get_read_rpc_timeout",
               "produces":[
                  "application/json"
@@ -250,10 +250,10 @@
               "parameters":[
                  {
                     "name":"timeout",
-                     "description":"timeout_in_millis",
+                     "description":"The timeout in second",
                     "required":true,
                     "allowMultiple":false,
-                     "type":"long",
+                     "type":"double",
                     "paramType":"query"
                  }
               ]
@@ -265,8 +265,8 @@
         "operations":[
            {
               "method":"GET",
-               "summary":"Get the write RPC timeout",
-               "type":"long",
+               "summary":"Get the write RPC timeout in seconds",
+               "type":"double",
               "nickname":"get_write_rpc_timeout",
               "produces":[
                  "application/json"
@@ -286,10 +286,10 @@
               "parameters":[
                  {
                     "name":"timeout",
-                     "description":"timeout in millisecond",
+                     "description":"timeout in seconds",
                     "required":true,
                     "allowMultiple":false,
-                     "type":"long",
+                     "type":"double",
                     "paramType":"query"
                  }
               ]
@@ -301,8 +301,8 @@
         "operations":[
            {
               "method":"GET",
-               "summary":"Get counter write rpc timeout",
-               "type":"long",
+               "summary":"Get counter write rpc timeout in seconds",
+               "type":"double",
               "nickname":"get_counter_write_rpc_timeout",
               "produces":[
                  "application/json"
@@ -322,10 +322,10 @@
               "parameters":[
                  {
                     "name":"timeout",
-                     "description":"timeout in millisecond",
+                     "description":"timeout in seconds",
                     "required":true,
                     "allowMultiple":false,
-                     "type":"long",
+                     "type":"double",
                     "paramType":"query"
                  }
               ]
@@ -337,8 +337,8 @@
         "operations":[
            {
               "method":"GET",
-               "summary":"Get CAS contention timeout",
-               "type":"long",
+               "summary":"Get CAS contention timeout in seconds",
+               "type":"double",
               "nickname":"get_cas_contention_timeout",
               "produces":[
                  "application/json"
@@ -358,10 +358,10 @@
               "parameters":[
                  {
                     "name":"timeout",
-                     "description":"timeout in millisecond",
+                     "description":"timeout in second",
                     "required":true,
                     "allowMultiple":false,
-                     "type":"long",
+                     "type":"double",
                     "paramType":"query"
                  }
               ]
@@ -373,8 +373,8 @@
         "operations":[
            {
               "method":"GET",
-               "summary":"Get range rpc timeout",
-               "type":"long",
+               "summary":"Get range rpc timeout in seconds",
+               "type":"double",
               "nickname":"get_range_rpc_timeout",
               "produces":[
                  "application/json"
@@ -394,10 +394,10 @@
               "parameters":[
                  {
                     "name":"timeout",
-                     "description":"timeout in millisecond",
+                     "description":"timeout in second",
                     "required":true,
                     "allowMultiple":false,
-                     "type":"long",
+                     "type":"double",
                     "paramType":"query"
                  }
               ]
@@ -409,8 +409,8 @@
         "operations":[
            {
               "method":"GET",
-               "summary":"Get truncate rpc timeout",
-               "type":"long",
+               "summary":"Get truncate rpc timeout in seconds",
+               "type":"double",
               "nickname":"get_truncate_rpc_timeout",
               "produces":[
                  "application/json"
@@ -430,10 +430,10 @@
               "parameters":[
                  {
                     "name":"timeout",
-                     "description":"timeout in millisecond",
+                     "description":"timeout in second",
                     "required":true,
                     "allowMultiple":false,
-                     "type":"long",
+                     "type":"double",
                     "paramType":"query"
                  }
               ]
@@ -717,7 +717,7 @@
      ]
    },
    {
-      "path": "/storage_proxy/metrics/read/latency/histogram",
+      "path": "/storage_proxy/metrics/read/histogram",
      "operations": [
        {
          "method": "GET",
@@ -732,7 +732,7 @@
      ]
    },
    {
-      "path": "/storage_proxy/metrics/range/latency/histogram",
+      "path": "/storage_proxy/metrics/range/histogram",
      "operations": [
        {
          "method": "GET",
@@ -807,7 +807,7 @@
      ]
    },
    {
-      "path": "/storage_proxy/metrics/write/latency/histogram",
+      "path": "/storage_proxy/metrics/write/histogram",
      "operations": [
        {
          "method": "GET",
@@ -820,7 +820,103 @@
          "parameters": []
        }
      ]
-    }
+    },
+    {
+         "path":"/storage_proxy/metrics/read/estimated_histogram/",
+         "operations":[
+            {
+               "method":"GET",
+               "summary":"Get read estimated latency",
+               "$ref":"#/utils/estimated_histogram",
+               "nickname":"get_read_estimated_histogram",
+               "produces":[
+                  "application/json"
+               ],
+               "parameters":[
+               ]
+            }
+         ]
+      },
+      {
+         "path":"/storage_proxy/metrics/read",
+         "operations":[
+            {
+               "method":"GET",
+               "summary":"Get read latency",
+               "type":"int",
+               "nickname":"get_read_latency",
+               "produces":[
+                  "application/json"
+               ],
+               "parameters":[
+               ]
+            }
+         ]
+      },
+      {
+         "path":"/storage_proxy/metrics/write/estimated_histogram/",
+         "operations":[
+            {
+               "method":"GET",
+               "summary":"Get write estimated latency",
+               "$ref":"#/utils/estimated_histogram",
+               "nickname":"get_write_estimated_histogram",
+               "produces":[
+                  "application/json"
+               ],
+               "parameters":[
+               ]
+            }
+         ]
+      },
+      {
+         "path":"/storage_proxy/metrics/write",
+         "operations":[
+            {
+               "method":"GET",
+               "summary":"Get write latency",
+               "type":"int",
+               "nickname":"get_write_latency",
+               "produces":[
+                  "application/json"
+               ],
+               "parameters":[
+               ]
+            }
+         ]
+      },
+      {
+         "path":"/storage_proxy/metrics/range/estimated_histogram/",
+         "operations":[
+            {
+               "method":"GET",
+               "summary":"Get range estimated latency",
+               "$ref":"#/utils/estimated_histogram",
+               "nickname":"get_range_estimated_histogram",
+               "produces":[
+                  "application/json"
+               ],
+               "parameters":[
+               ]
+            }
+         ]
+      },
+      {
+         "path":"/storage_proxy/metrics/range",
+         "operations":[
+            {
+               "method":"GET",
+               "summary":"Get range latency",
+               "type":"int",
+               "nickname":"get_range_latency",
+               "produces":[
+                  "application/json"
+               ],
+               "parameters":[
+               ]
+            }
+         ]
+      }
   ],
   "models":{
      "mapper_list":{
--- a/api/api-doc/storage_service.json
+++ b/api/api-doc/storage_service.json
@@ -290,6 +290,25 @@
            }
         ]
      },
+      {
+         "path":"/storage_service/describe_ring/",
+         "operations":[
+            {
+               "method":"GET",
+               "summary":"The TokenRange for a any keyspace",
+               "type":"array",
+               "items":{
+                  "type":"token_range"
+               },
+               "nickname":"describe_any_ring",
+               "produces":[
+                  "application/json"
+               ],
+               "parameters":[
+               ]
+            }
+         ]
+      },
      {
         "path":"/storage_service/describe_ring/{keyspace}",
         "operations":[
@@ -298,9 +317,9 @@
               "summary":"The TokenRange for a given keyspace",
               "type":"array",
               "items":{
-                  "type":"string"
+                  "type":"token_range"
               },
-               "nickname":"describe_ring_jmx",
+               "nickname":"describe_ring",
               "produces":[
                  "application/json"
               ],
@@ -311,7 +330,7 @@
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
-                     "paramType":"query"
+                     "paramType":"path"
                  }
               ]
            }
@@ -609,7 +628,7 @@
         "path":"/storage_service/keyspace_cleanup/{keyspace}",
         "operations":[
            {
-               "method":"GET",
+               "method":"POST",
               "summary":"Trigger a cleanup of keys on a single keyspace",
               "type":"int",
               "nickname":"force_keyspace_cleanup",
@@ -890,8 +909,8 @@
               ],
               "parameters":[
                  {
-                     "name":"token",
-                     "description":"The token to remove",
+                     "name":"host_id",
+                     "description":"Remove the node with host_id from the cluster",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
@@ -1969,9 +1988,9 @@
         "id":"snapshot",
         "description":"Snapshot detail",
         "properties":{
-            "key":{
+            "ks":{
               "type":"string",
-               "description":"The key snapshot key"
+               "description":"The key space snapshot key"
            },
            "cf":{
               "type":"string",
@@ -1993,7 +2012,7 @@
         "properties":{
            "key":{
               "type":"string",
-               "description":"The keyspace"
+               "description":"The snapshot key"
            },
            "value":{
               "type":"array",
@@ -2003,6 +2022,59 @@
               "description":"The column family"
            }
         }
+      },
+      "endpoint_detail":{
+         "id":"endpoint_detail",
+         "description":"Endpoint detail",
+         "properties":{
+            "host":{
+               "type":"string",
+               "description":"The endpoint host"
+            },
+            "datacenter":{
+               "type":"string",
+               "description":"The endpoint datacenter"
+            },
+            "rack":{
+               "type":"string",
+               "description":"The endpoint rack"
+            }
+         }
+      },
+      "token_range":{
+         "id":"token_range",
+         "description":"Endpoint range information",
+         "properties":{
+            "start_token":{
+               "type":"string",
+               "description":"The range start token"
+            },
+            "end_token":{
+               "type":"string",
+               "description":"The range start token"
+            },
+            "endpoints":{
+               "type":"array",
+               "items":{
+                  "type":"string"
+               },
+               "description":"The endpoints"
+            },
+            "rpc_endpoints":{
+               "type":"array",
+               "items":{
+                  "type":"string"
+               },
+               "description":"The rpc endpoints"
+            },
+            "endpoint_details":{
+               "type":"array",
+               "items":{
+                  "type":"endpoint_detail"
+               },
+               "description":"The endpoint details"
+            }
+         }
      }
   }
 }
--- a/api/api-doc/stream_manager.json
+++ b/api/api-doc/stream_manager.json
@@ -25,6 +25,102 @@
               ]
            }
         ]
+      },
+      {
+         "path":"/stream_manager/metrics/outbound",
+         "operations":[
+            {
+               "method":"GET",
+               "summary":"Get number of active outbound streams",
+               "type":"int",
+               "nickname":"get_all_active_streams_outbound",
+               "produces":[
+                  "application/json"
+               ],
+               "parameters":[
+               ]
+            }
+         ]
+      },
+      {
+         "path":"/stream_manager/metrics/incoming/{peer}",
+         "operations":[
+            {
+               "method":"GET",
+               "summary":"Get total incoming bytes",
+               "type":"int",
+               "nickname":"get_total_incoming_bytes",
+               "produces":[
+                  "application/json"
+               ],
+               "parameters":[
+                  {
+                     "name":"peer",
+                     "description":"The stream peer",
+                     "required":true,
+                     "allowMultiple":false,
+                     "type":"string",
+                     "paramType":"path"
+                  }
+               ]
+            }
+         ]
+      },
+      {
+         "path":"/stream_manager/metrics/incoming",
+         "operations":[
+            {
+               "method":"GET",
+               "summary":"Get all total incoming bytes",
+               "type":"int",
+               "nickname":"get_all_total_incoming_bytes",
+               "produces":[
+                  "application/json"
+               ],
+               "parameters":[
+               ]
+            }
+         ]
+      },
+      {
+         "path":"/stream_manager/metrics/outgoing/{peer}",
+         "operations":[
+            {
+               "method":"GET",
+               "summary":"Get total outgoing bytes",
+               "type":"int",
+               "nickname":"get_total_outgoing_bytes",
+               "produces":[
+                  "application/json"
+               ],
+               "parameters":[
+                  {
+                     "name":"peer",
+                     "description":"The stream peer",
+                     "required":true,
+                     "allowMultiple":false,
+                     "type":"string",
+                     "paramType":"path"
+                  }
+               ]
+            }
+         ]
+      },
+      {
+         "path":"/stream_manager/metrics/outgoing",
+         "operations":[
+            {
+               "method":"GET",
+               "summary":"Get all total outgoing bytes",
+               "type":"int",
+               "nickname":"get_all_total_outgoing_bytes",
+               "produces":[
+                  "application/json"
+               ],
+               "parameters":[
+               ]
+            }
+         ]
      }
   ],
   "models":{
--- a/api/api-doc/system.json
+++ b/api/api-doc/system.json
@@ -0,0 +1,114 @@
+{
+   "apiVersion":"0.0.1",
+   "swaggerVersion":"1.2",
+   "basePath":"{{Protocol}}://{{Host}}",
+   "resourcePath":"/system",
+   "produces":[
+      "application/json"
+   ],
+   "apis":[
+      {
+         "path":"/system/logger",
+         "operations":[
+            {
+               "method":"GET",
+               "summary":"Get all logger names",
+               "type":"array",
+               "items":{
+                  "type":"string"
+               },
+               "nickname":"get_all_logger_names",
+               "produces":[
+                  "application/json"
+               ],
+               "parameters":[
+               ]
+            },
+            {
+               "method":"POST",
+               "summary":"Set all logger level",
+               "type":"void",
+               "nickname":"set_all_logger_level",
+               "produces":[
+                  "application/json"
+               ],
+               "parameters":[
+                  {
+                     "name":"level",
+                     "description":"The new log level",
+                     "required":true,
+                     "allowMultiple":false,
+                     "type":"string",
+                     "enum":[
+                        "error",
+                        "warn",
+                        "info",
+                        "debug",
+                        "trace"
+                     ],
+                     "paramType":"query"
+                  }
+               ]
+            }
+         ]
+      },
+      {
+         "path":"/system/logger/{name}",
+         "operations":[
+            {
+               "method":"GET",
+               "summary":"Get logger level",
+               "type":"string",
+               "nickname":"get_logger_level",
+               "produces":[
+                  "application/json"
+               ],
+               "parameters":[
+                  {
+                     "name":"name",
+                     "description":"The logger to query about",
+                     "required":true,
+                     "allowMultiple":false,
+                     "type":"string",
+                     "paramType":"path"
+                  }
+               ]
+            },
+            {
+               "method":"POST",
+               "summary":"Set logger level",
+               "type":"void",
+               "nickname":"set_logger_level",
+               "produces":[
+                  "application/json"
+               ],
+               "parameters":[
+                  {
+                     "name":"name",
+                     "description":"The logger to query about",
+                     "required":true,
+                     "allowMultiple":false,
+                     "type":"string",
+                     "paramType":"path"
+                  },
+                  {
+                     "name":"level",
+                     "description":"The new log level",
+                     "required":true,
+                     "allowMultiple":false,
+                     "type":"string",
+                     "enum":[
+                        "error",
+                        "warn",
+                        "info",
+                        "debug",
+                        "trace"
+                     ],
+                     "paramType":"query"
+                  }
+               ]
+            }
+         ]
+      }
+   ]
+}
--- a/api/api.cc
+++ b/api/api.cc
@@ -38,6 +38,7 @@
 #include "hinted_handoff.hh"
 #include "http/exception.hh"
 #include "stream_manager.hh"
+#include "system.hh"

 namespace api {

@@ -108,6 +109,10 @@ future<> set_server(http_context& ctx) {
        rb->register_function(r, "stream_manager",
                "The stream manager API");
        set_stream_manager(ctx, r);
+        rb->register_function(r, "system",
+                "The system related API");
+        set_system(ctx, r);
+
    });
 }

--- a/api/api.hh
+++ b/api/api.hh
@@ -128,47 +128,54 @@ inline double pow2(double a) {
    return a * a;
 }

-inline httpd::utils_json::histogram add_histogram(httpd::utils_json::histogram res,
+// FIXME: Move to utils::ihistogram::operator+=()
+inline utils::ihistogram add_histogram(utils::ihistogram res,
        const utils::ihistogram& val) {
-    if (!res.count._set) {
-        res = val;
-        return res;
+    if (res.count == 0) {
+        return val;
    }
    if (val.count == 0) {
-        return res;
+        return std::move(res);
    }
-    if (res.min() > val.min) {
+    if (res.min > val.min) {
        res.min = val.min;
    }
-    if (res.max() < val.max) {
+    if (res.max < val.max) {
        res.max = val.max;
    }
-    double ncount = res.count() + val.count;
+    double ncount = res.count + val.count;
    // To get an estimated sum we take the estimated mean
    // and multiply it by the true count
-    res.sum = res.sum() + val.mean * val.count;
-    double a = res.count()/ncount;
+    res.sum = res.sum + val.mean * val.count;
+    double a = res.count/ncount;
    double b = val.count/ncount;

-    double mean =  a * res.mean() + b * val.mean;
+    double mean =  a * res.mean + b * val.mean;

-    res.variance = (res.variance() + pow2(res.mean() - mean) )* a +
+    res.variance = (res.variance + pow2(res.mean - mean) )* a +
            (val.variance + pow2(val.mean -mean))* b;

    res.mean = mean;
-    res.count = res.count() + val.count;
+    res.count = res.count + val.count;
    for (auto i : val.sample) {
-        res.sample.push(i);
+        res.sample.push_back(i);
    }
    return res;
 }

+inline
+httpd::utils_json::histogram to_json(const utils::ihistogram& val) {
+    httpd::utils_json::histogram h;
+    h = val;
+    return h;
+}
+
 template<class T, class F>
 future<json::json_return_type>  sum_histogram_stats(distributed<T>& d, utils::ihistogram F::*f) {

-    return d.map_reduce0([f](const T& p) {return p.get_stats().*f;}, httpd::utils_json::histogram(),
-            add_histogram).then([](const httpd::utils_json::histogram& val) {
-        return make_ready_future<json::json_return_type>(val);
+    return d.map_reduce0([f](const T& p) {return p.get_stats().*f;}, utils::ihistogram(),
+            add_histogram).then([](const utils::ihistogram& val) {
+        return make_ready_future<json::json_return_type>(to_json(val));
    });
 }

--- a/api/column_family.cc
+++ b/api/column_family.cc
@@ -110,23 +110,25 @@ static future<json::json_return_type>  get_cf_histogram(http_context& ctx, const
        utils::ihistogram column_family::stats::*f) {
    utils::UUID uuid = get_uuid(name, ctx.db.local());
    return ctx.db.map_reduce0([f, uuid](const database& p) {return p.find_column_family(uuid).get_stats().*f;},
-            httpd::utils_json::histogram(),
+            utils::ihistogram(),
            add_histogram)
-            .then([](const httpd::utils_json::histogram& val) {
-                return make_ready_future<json::json_return_type>(val);
+            .then([](const utils::ihistogram& val) {
+                return make_ready_future<json::json_return_type>(to_json(val));
    });
 }

 static future<json::json_return_type> get_cf_histogram(http_context& ctx, utils::ihistogram column_family::stats::*f) {
-    std::function<httpd::utils_json::histogram(const database&)> fun = [f] (const database& db)  {
-        httpd::utils_json::histogram res;
+    std::function<utils::ihistogram(const database&)> fun = [f] (const database& db)  {
+        utils::ihistogram res;
        for (auto i : db.get_column_families()) {
            res = add_histogram(res, i.second->get_stats().*f);
        }
        return res;
    };
-    return ctx.db.map(fun).then([](const std::vector<httpd::utils_json::histogram> &res) {
-        return make_ready_future<json::json_return_type>(res);
+    return ctx.db.map(fun).then([](const std::vector<utils::ihistogram> &res) {
+        std::vector<httpd::utils_json::histogram> r;
+        boost::copy(res | boost::adaptors::transformed(to_json), std::back_inserter(r));
+        return make_ready_future<json::json_return_type>(r);
    });
 }

@@ -370,7 +372,7 @@ void set_column_family(http_context& ctx, routes& r) {
    });

    cf::get_write_latency_histogram.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return get_cf_histogram(ctx, req->param["name"], &column_family::stats::reads);
+        return get_cf_histogram(ctx, req->param["name"], &column_family::stats::writes);
    });

    cf::get_all_write_latency_histogram.set(r, [&ctx] (std::unique_ptr<request> req) {
@@ -589,11 +591,16 @@ void set_column_family(http_context& ctx, routes& r) {
        return make_ready_future<json::json_return_type>(0);
    });

-    cf::get_true_snapshots_size.set(r, [] (std::unique_ptr<request> req) {
-        //TBD
-        // FIXME
-        //auto id = get_uuid(req->param["name"], ctx.db.local());
-        return make_ready_future<json::json_return_type>(0);
+    cf::get_true_snapshots_size.set(r, [&ctx] (std::unique_ptr<request> req) {
+        auto uuid = get_uuid(req->param["name"], ctx.db.local());
+        return ctx.db.local().find_column_family(uuid).get_snapshot_details().then([](
+                const std::unordered_map<sstring, column_family::snapshot_details>& sd) {
+            int64_t res = 0;
+            for (auto i : sd) {
+                res += i.second.total;
+            }
+            return make_ready_future<json::json_return_type>(res);
+        });
    });

    cf::get_all_true_snapshots_size.set(r, [] (std::unique_ptr<request> req) {
@@ -615,30 +622,29 @@ void set_column_family(http_context& ctx, routes& r) {
        return make_ready_future<json::json_return_type>(0);
    });

-    cf::get_row_cache_hit.set(r, [] (std::unique_ptr<request> req) {
-        //TBD
-        unimplemented();
-        //auto id = get_uuid(req->param["name"], ctx.db.local());
-        return make_ready_future<json::json_return_type>(0);
+    cf::get_row_cache_hit.set(r, [&ctx] (std::unique_ptr<request> req) {
+        return map_reduce_cf(ctx, req->param["name"], 0, [](const column_family& cf) {
+            return cf.get_row_cache().stats().hits;
+        }, std::plus<int64_t>());
    });

-    cf::get_all_row_cache_hit.set(r, [] (std::unique_ptr<request> req) {
-        //TBD
-        unimplemented();
-        return make_ready_future<json::json_return_type>(0);
+    cf::get_all_row_cache_hit.set(r, [&ctx] (std::unique_ptr<request> req) {
+        return map_reduce_cf(ctx, 0, [](const column_family& cf) {
+            return cf.get_row_cache().stats().hits;
+        }, std::plus<int64_t>());
    });

-    cf::get_row_cache_miss.set(r, [] (std::unique_ptr<request> req) {
-        //TBD
-        unimplemented();
-        //auto id = get_uuid(req->param["name"], ctx.db.local());
-        return make_ready_future<json::json_return_type>(0);
+    cf::get_row_cache_miss.set(r, [&ctx] (std::unique_ptr<request> req) {
+        return map_reduce_cf(ctx, req->param["name"], 0, [](const column_family& cf) {
+            return cf.get_row_cache().stats().misses;
+        }, std::plus<int64_t>());
    });

-    cf::get_all_row_cache_miss.set(r, [] (std::unique_ptr<request> req) {
-        //TBD
-        unimplemented();
-        return make_ready_future<json::json_return_type>(0);
+    cf::get_all_row_cache_miss.set(r, [&ctx] (std::unique_ptr<request> req) {
+        return map_reduce_cf(ctx, 0, [](const column_family& cf) {
+            return cf.get_row_cache().stats().misses;
+        }, std::plus<int64_t>());
+
    });

    cf::get_cas_prepare.set(r, [] (std::unique_ptr<request> req) {
@@ -662,41 +668,19 @@ void set_column_family(http_context& ctx, routes& r) {
        return make_ready_future<json::json_return_type>(0);
    });

-    cf::get_sstables_per_read_histogram.set(r, [] (std::unique_ptr<request> req) {
-        //TBD
-        unimplemented();
-        //auto id = get_uuid(req->param["name"], ctx.db.local());
-        std::vector<double> res;
-        return make_ready_future<json::json_return_type>(res);
+    cf::get_sstables_per_read_histogram.set(r, [&ctx] (std::unique_ptr<request> req) {
+        return map_reduce_cf(ctx, req->param["name"], sstables::estimated_histogram(0), [](column_family& cf) {
+            return cf.get_stats().estimated_sstable_per_read;
+        },
+        sstables::merge, utils_json::estimated_histogram());
    });

-    cf::get_tombstone_scanned_histogram.set(r, [] (std::unique_ptr<request> req) {
-        //TBD
-        // FIXME
-        //auto id = get_uuid(req->param["name"], ctx.db.local());
-        httpd::utils_json::histogram res;
-        res.count = 0;
-        res.mean = 0;
-        res.max = 0;
-        res.min = 0;
-        res.sum = 0;
-        res.variance = 0;
-        return make_ready_future<json::json_return_type>(res);
+    cf::get_tombstone_scanned_histogram.set(r, [&ctx] (std::unique_ptr<request> req) {
+        return get_cf_histogram(ctx, req->param["name"], &column_family::stats::tombstone_scanned);
    });

-    cf::get_live_scanned_histogram.set(r, [] (std::unique_ptr<request> req) {
-        //TBD
-        // FIXME
-        //auto id = get_uuid(req->param["name"], ctx.db.local());
-        //std::vector<double> res;
-        httpd::utils_json::histogram res;
-        res.count = 0;
-        res.mean = 0;
-        res.max = 0;
-        res.min = 0;
-        res.sum = 0;
-        res.variance = 0;
-        return make_ready_future<json::json_return_type>(res);
+    cf::get_live_scanned_histogram.set(r, [&ctx] (std::unique_ptr<request> req) {
+        return get_cf_histogram(ctx, req->param["name"], &column_family::stats::live_scanned);
    });

    cf::get_col_update_time_delta_histogram.set(r, [] (std::unique_ptr<request> req) {
--- a/api/compaction_manager.cc
+++ b/api/compaction_manager.cc
@@ -26,11 +26,11 @@ namespace api {

 using namespace scollectd;
 namespace cm = httpd::compaction_manager_json;
-
+using namespace json;

 static future<json::json_return_type> get_cm_stats(http_context& ctx,
        int64_t compaction_manager::stats::*f) {
-    return ctx.db.map_reduce0([&](database& db) {
+    return ctx.db.map_reduce0([f](database& db) {
        return db.get_compaction_manager().get_stats().*f;
    }, int64_t(0), std::plus<int64_t>()).then([](const int64_t& res) {
        return make_ready_future<json::json_return_type>(res);
@@ -40,27 +40,23 @@ static future<json::json_return_type> get_cm_stats(http_context& ctx,
 void set_compaction_manager(http_context& ctx, routes& r) {
    cm::get_compactions.set(r, [] (std::unique_ptr<request> req) {
        //TBD
-        unimplemented();
-        std::vector<cm::jsonmap> map;
+        // FIXME
+        warn(unimplemented::cause::API);
+        std::vector<cm::summary> map;
        return make_ready_future<json::json_return_type>(map);
    });

-    cm::get_compaction_summary.set(r, [] (std::unique_ptr<request> req) {
-        //TBD
-        unimplemented();
-        std::vector<sstring> res;
-        return make_ready_future<json::json_return_type>(res);
-    });
-
    cm::force_user_defined_compaction.set(r, [] (std::unique_ptr<request> req) {
        //TBD
-        unimplemented();
-        return make_ready_future<json::json_return_type>("");
+        // FIXME
+        warn(unimplemented::cause::API);
+        return make_ready_future<json::json_return_type>(json_void());
    });

    cm::stop_compaction.set(r, [] (std::unique_ptr<request> req) {
        //TBD
-        unimplemented();
+        // FIXME
+        warn(unimplemented::cause::API);
        return make_ready_future<json::json_return_type>("");
    });

@@ -81,11 +77,26 @@ void set_compaction_manager(http_context& ctx, routes& r) {

    cm::get_bytes_compacted.set(r, [] (std::unique_ptr<request> req) {
        //TBD
-        unimplemented();
+        // FIXME
+        warn(unimplemented::cause::API);
        return make_ready_future<json::json_return_type>(0);
    });

+    cm::get_compaction_history.set(r, [] (std::unique_ptr<request> req) {
+        //TBD
+        // FIXME
+        warn(unimplemented::cause::API);
+        std::vector<cm::history> res;
+        return make_ready_future<json::json_return_type>(res);
+    });

+    cm::get_compaction_info.set(r, [] (std::unique_ptr<request> req) {
+        //TBD
+        // FIXME
+        warn(unimplemented::cause::API);
+        std::vector<cm::compaction_info> res;
+        return make_ready_future<json::json_return_type>(res);
+    });

 }

--- a/api/failure_detector.cc
+++ b/api/failure_detector.cc
@@ -22,15 +22,33 @@
 #include "failure_detector.hh"
 #include "api/api-doc/failure_detector.json.hh"
 #include "gms/failure_detector.hh"
+#include "gms/application_state.hh"
+#include "gms/gossiper.hh"
 namespace api {

 namespace fd = httpd::failure_detector_json;

 void set_failure_detector(http_context& ctx, routes& r) {
    fd::get_all_endpoint_states.set(r, [](std::unique_ptr<request> req) {
-        return gms::get_all_endpoint_states().then([](const sstring& str) {
-            return make_ready_future<json::json_return_type>(str);
-        });
+        std::vector<fd::endpoint_state> res;
+        for (auto i : gms::get_local_gossiper().endpoint_state_map) {
+            fd::endpoint_state val;
+            val.addrs = boost::lexical_cast<std::string>(i.first);
+            val.is_alive = i.second.is_alive();
+            val.generation = i.second.get_heart_beat_state().get_generation();
+            val.version = i.second.get_heart_beat_state().get_heart_beat_version();
+            val.update_time = i.second.get_update_timestamp().time_since_epoch().count();
+            for (auto a : i.second.get_application_state_map()) {
+                fd::version_value version_val;
+                // We return the enum index and not it's name to stay compatible to origin
+                // method that the state index are static but the name can be changed.
+                version_val.application_state = static_cast<std::underlying_type<gms::application_state>::type>(a.first);
+                version_val.value = a.second.value;
+                val.application_state.push(version_val);
+            }
+            res.push_back(val);
+        }
+        return make_ready_future<json::json_return_type>(res);
    });

    fd::get_up_endpoint_count.set(r, [](std::unique_ptr<request> req) {
--- a/api/gossiper.cc
+++ b/api/gossiper.cc
@@ -27,47 +27,43 @@ namespace api {
 using namespace json;

 void set_gossiper(http_context& ctx, routes& r) {
-    httpd::gossiper_json::get_down_endpoint.set(r, [](std::unique_ptr<request> req) {
-        return gms::get_unreachable_members().then([](std::set<gms::inet_address> res) {
-            return make_ready_future<json::json_return_type>(container_to_vec(res));
-        });
+    httpd::gossiper_json::get_down_endpoint.set(r, [] (const_req req) {
+        auto res = gms::get_local_gossiper().get_unreachable_members();
+        return container_to_vec(res);
    });

-    httpd::gossiper_json::get_live_endpoint.set(r, [](std::unique_ptr<request> req) {
-        return gms::get_live_members().then([](std::set<gms::inet_address> res) {
-            return make_ready_future<json::json_return_type>(container_to_vec(res));
-        });
+    httpd::gossiper_json::get_live_endpoint.set(r, [] (const_req req) {
+        auto res = gms::get_local_gossiper().get_live_members();
+        return container_to_vec(res);
    });

-    httpd::gossiper_json::get_endpoint_downtime.set(r, [](std::unique_ptr<request> req) {
+    httpd::gossiper_json::get_endpoint_downtime.set(r, [] (const_req req) {
+        gms::inet_address ep(req.param["addr"]);
+        return gms::get_local_gossiper().get_endpoint_downtime(ep);
+    });
+
+    httpd::gossiper_json::get_current_generation_number.set(r, [] (std::unique_ptr<request> req) {
        gms::inet_address ep(req->param["addr"]);
-        return gms::get_endpoint_downtime(ep).then([](int64_t res) {
+        return gms::get_local_gossiper().get_current_generation_number(ep).then([] (int res) {
            return make_ready_future<json::json_return_type>(res);
        });
    });

-    httpd::gossiper_json::get_current_generation_number.set(r, [](std::unique_ptr<request> req) {
+    httpd::gossiper_json::get_current_heart_beat_version.set(r, [] (std::unique_ptr<request> req) {
        gms::inet_address ep(req->param["addr"]);
-        return gms::get_current_generation_number(ep).then([](int res) {
-            return make_ready_future<json::json_return_type>(res);
-        });
-    });
-
-    httpd::gossiper_json::get_current_heart_beat_version.set(r, [](std::unique_ptr<request> req) {
-        gms::inet_address ep(req->param["addr"]);
-        return gms::get_current_heart_beat_version(ep).then([](int res) {
+        return gms::get_local_gossiper().get_current_heart_beat_version(ep).then([] (int res) {
            return make_ready_future<json::json_return_type>(res);
        });
    });

    httpd::gossiper_json::assassinate_endpoint.set(r, [](std::unique_ptr<request> req) {
        if (req->get_query_param("unsafe") != "True") {
-            return gms::assassinate_endpoint(req->param["addr"]).then([] {
-                    return make_ready_future<json::json_return_type>(json_void());
+            return gms::get_local_gossiper().assassinate_endpoint(req->param["addr"]).then([] {
+                return make_ready_future<json::json_return_type>(json_void());
            });
        }
-        return gms::unsafe_assassinate_endpoint(req->param["addr"]).then([] {
-                return make_ready_future<json::json_return_type>(json_void());
+        return gms::get_local_gossiper().unsafe_assassinate_endpoint(req->param["addr"]).then([] {
+            return make_ready_future<json::json_return_type>(json_void());
        });
    });
 }
--- a/api/messaging_service.cc
+++ b/api/messaging_service.cc
@@ -41,8 +41,8 @@ std::vector<message_counter> map_to_message_counters(
    std::vector<message_counter> res;
    for (auto i : map) {
        res.push_back(message_counter());
-        res.back().ip = boost::lexical_cast<sstring>(i.first);
-        res.back().count = i.second;
+        res.back().key = boost::lexical_cast<sstring>(i.first);
+        res.back().value = i.second;
    }
    return res;
 }
@@ -70,12 +70,39 @@ future_json_function get_client_getter(std::function<uint64_t(const shard_info&)
    };
 }

+future_json_function get_server_getter(std::function<uint64_t(const rpc::stats&)> f) {
+    return [f](std::unique_ptr<request> req) {
+        using map_type = std::unordered_map<gms::inet_address, uint64_t>;
+        auto get_shard_map = [f](messaging_service& ms) {
+            std::unordered_map<gms::inet_address, unsigned long> map;
+            ms.foreach_server_connection_stats([&map, f] (const rpc::client_info& info, const rpc::stats& stats) mutable {
+                map[gms::inet_address(net::ipv4_address(info.addr))] = f(stats);
+            });
+            return map;
+        };
+        return  get_messaging_service().map_reduce0(get_shard_map, map_type(), map_sum<map_type>).
+                then([](map_type&& map) {
+            return make_ready_future<json::json_return_type>(map_to_message_counters(map));
+        });
+    };
+}
+
 void set_messaging_service(http_context& ctx, routes& r) {
+    get_timeout_messages.set(r, get_client_getter([](const shard_info& c) {
+        return c.get_stats().timeout;
+    }));

    get_sent_messages.set(r, get_client_getter([](const shard_info& c) {
        return c.get_stats().sent_messages;
    }));

+    get_dropped_messages.set(r, get_client_getter([](const shard_info& c) {
+        // We don't have the same drop message mechanism
+        // as origin has.
+        // hence we can always return 0
+        return 0;
+    }));
+
    get_exception_messages.set(r, get_client_getter([](const shard_info& c) {
        return c.get_stats().exception_received;
    }));
@@ -84,11 +111,19 @@ void set_messaging_service(http_context& ctx, routes& r) {
        return c.get_stats().pending;
    }));

-    get_respond_pending_messages.set(r, get_client_getter([](const shard_info& c) {
-        return c.get_stats().wait_reply;
+    get_respond_pending_messages.set(r, get_server_getter([](const rpc::stats& c) {
+        return c.pending;
    }));

-    get_dropped_messages.set(r, [](std::unique_ptr<request> req) {
+    get_respond_completed_messages.set(r, get_server_getter([](const rpc::stats& c) {
+        return c.sent_messages;
+    }));
+
+    get_version.set(r, [](const_req req) {
+        return net::get_local_messaging_service().get_raw_version(req.get_query_param("addr"));
+    });
+
+    get_dropped_messages_by_ver.set(r, [](std::unique_ptr<request> req) {
        shared_ptr<std::vector<uint64_t>> map = make_shared<std::vector<uint64_t>>(num_verb, 0);

        return net::get_messaging_service().map_reduce([map](const uint64_t* local_map) mutable {
--- a/api/storage_proxy.cc
+++ b/api/storage_proxy.cc
@@ -23,6 +23,9 @@
 #include "service/storage_proxy.hh"
 #include "api/api-doc/storage_proxy.json.hh"
 #include "api/api-doc/utils.json.hh"
+#include "service/storage_service.hh"
+#include "db/config.hh"
+#include "utils/histogram.hh"

 namespace api {

@@ -30,6 +33,23 @@ namespace sp = httpd::storage_proxy_json;
 using proxy = service::storage_proxy;
 using namespace json;

+static future<json::json_return_type>  sum_estimated_histogram(http_context& ctx, sstables::estimated_histogram proxy::stats::*f) {
+    return ctx.sp.map_reduce0([f](const proxy& p) {return p.get_stats().*f;}, sstables::estimated_histogram(),
+            sstables::merge).then([](const sstables::estimated_histogram& val) {
+        utils_json::estimated_histogram res;
+        res = val;
+        return make_ready_future<json::json_return_type>(res);
+    });
+}
+
+static future<json::json_return_type>  total_latency(http_context& ctx, utils::ihistogram proxy::stats::*f) {
+    return ctx.sp.map_reduce0([f](const proxy& p) {return (p.get_stats().*f).mean * (p.get_stats().*f).count;}, 0.0,
+            std::plus<double>()).then([](double val) {
+        int64_t res = val;
+        return make_ready_future<json::json_return_type>(res);
+    });
+}
+
 void set_storage_proxy(http_context& ctx, routes& r) {
    sp::get_total_hints.set(r, [](std::unique_ptr<request> req)  {
        //TBD
@@ -39,7 +59,9 @@ void set_storage_proxy(http_context& ctx, routes& r) {

    sp::get_hinted_handoff_enabled.set(r, [](std::unique_ptr<request> req)  {
        //TBD
-        unimplemented();
+        // FIXME
+        // hinted handoff is not supported currently,
+        // so we should return false
        return make_ready_future<json::json_return_type>(false);
    });

@@ -96,10 +118,8 @@ void set_storage_proxy(http_context& ctx, routes& r) {
        return make_ready_future<json::json_return_type>(0);
    });

-    sp::get_rpc_timeout.set(r, [](std::unique_ptr<request> req)  {
-        //TBD
-        unimplemented();
-        return make_ready_future<json::json_return_type>(1);
+    sp::get_rpc_timeout.set(r, [&ctx](const_req req)  {
+        return ctx.db.local().get_config().request_timeout_in_ms()/1000.0;
    });

    sp::set_rpc_timeout.set(r, [](std::unique_ptr<request> req)  {
@@ -109,10 +129,8 @@ void set_storage_proxy(http_context& ctx, routes& r) {
        return make_ready_future<json::json_return_type>(json_void());
    });

-    sp::get_read_rpc_timeout.set(r, [](std::unique_ptr<request> req)  {
-        //TBD
-        unimplemented();
-        return make_ready_future<json::json_return_type>(0);
+    sp::get_read_rpc_timeout.set(r, [&ctx](const_req req)  {
+        return ctx.db.local().get_config().read_request_timeout_in_ms()/1000.0;
    });

    sp::set_read_rpc_timeout.set(r, [](std::unique_ptr<request> req)  {
@@ -122,10 +140,8 @@ void set_storage_proxy(http_context& ctx, routes& r) {
        return make_ready_future<json::json_return_type>(json_void());
    });

-    sp::get_write_rpc_timeout.set(r, [](std::unique_ptr<request> req)  {
-        //TBD
-        unimplemented();
-        return make_ready_future<json::json_return_type>(0);
+    sp::get_write_rpc_timeout.set(r, [&ctx](const_req req)  {
+        return ctx.db.local().get_config().write_request_timeout_in_ms()/1000.0;
    });

    sp::set_write_rpc_timeout.set(r, [](std::unique_ptr<request> req)  {
@@ -135,11 +151,10 @@ void set_storage_proxy(http_context& ctx, routes& r) {
        return make_ready_future<json::json_return_type>(json_void());
    });

-    sp::get_counter_write_rpc_timeout.set(r, [](std::unique_ptr<request> req)  {
-        //TBD
-        unimplemented();
-        return make_ready_future<json::json_return_type>(0);
+    sp::get_counter_write_rpc_timeout.set(r, [&ctx](const_req req)  {
+        return ctx.db.local().get_config().counter_write_request_timeout_in_ms()/1000.0;
    });
+
    sp::set_counter_write_rpc_timeout.set(r, [](std::unique_ptr<request> req)  {
        //TBD
        unimplemented();
@@ -147,10 +162,8 @@ void set_storage_proxy(http_context& ctx, routes& r) {
        return make_ready_future<json::json_return_type>(json_void());
    });

-    sp::get_cas_contention_timeout.set(r, [](std::unique_ptr<request> req)  {
-        //TBD
-        unimplemented();
-        return make_ready_future<json::json_return_type>(0);
+    sp::get_cas_contention_timeout.set(r, [&ctx](const_req req)  {
+        return ctx.db.local().get_config().cas_contention_timeout_in_ms()/1000.0;
    });

    sp::set_cas_contention_timeout.set(r, [](std::unique_ptr<request> req)  {
@@ -160,10 +173,8 @@ void set_storage_proxy(http_context& ctx, routes& r) {
        return make_ready_future<json::json_return_type>(json_void());
    });

-    sp::get_range_rpc_timeout.set(r, [](std::unique_ptr<request> req)  {
-        //TBD
-        unimplemented();
-        return make_ready_future<json::json_return_type>(0);
+    sp::get_range_rpc_timeout.set(r, [&ctx](const_req req)  {
+        return ctx.db.local().get_config().range_request_timeout_in_ms()/1000.0;
    });

    sp::set_range_rpc_timeout.set(r, [](std::unique_ptr<request> req)  {
@@ -173,10 +184,8 @@ void set_storage_proxy(http_context& ctx, routes& r) {
        return make_ready_future<json::json_return_type>(json_void());
    });

-    sp::get_truncate_rpc_timeout.set(r, [](std::unique_ptr<request> req)  {
-        //TBD
-        unimplemented();
-        return make_ready_future<json::json_return_type>(0);
+    sp::get_truncate_rpc_timeout.set(r, [&ctx](const_req req)  {
+        return ctx.db.local().get_config().truncate_request_timeout_in_ms()/1000.0;
    });

    sp::set_truncate_rpc_timeout.set(r, [](std::unique_ptr<request> req)  {
@@ -192,28 +201,28 @@ void set_storage_proxy(http_context& ctx, routes& r) {
        return make_ready_future<json::json_return_type>(json_void());
    });

-    sp::get_read_repair_attempted.set(r, [](std::unique_ptr<request> req)  {
-        //TBD
-        unimplemented();
-        return make_ready_future<json::json_return_type>(0);
+    sp::get_read_repair_attempted.set(r, [&ctx](std::unique_ptr<request> req)  {
+        return sum_stats(ctx.sp, &proxy::stats::read_repair_attempts);
    });

-    sp::get_read_repair_repaired_blocking.set(r, [](std::unique_ptr<request> req)  {
-        //TBD
-        unimplemented();
-        return make_ready_future<json::json_return_type>(0);
+    sp::get_read_repair_repaired_blocking.set(r, [&ctx](std::unique_ptr<request> req)  {
+        return sum_stats(ctx.sp, &proxy::stats::read_repair_repaired_blocking);
    });

-    sp::get_read_repair_repaired_background.set(r, [](std::unique_ptr<request> req)  {
-        //TBD
-        unimplemented();
-        return make_ready_future<json::json_return_type>(0);
+    sp::get_read_repair_repaired_background.set(r, [&ctx](std::unique_ptr<request> req)  {
+        return sum_stats(ctx.sp, &proxy::stats::read_repair_repaired_background);
    });

    sp::get_schema_versions.set(r, [](std::unique_ptr<request> req)  {
        //TBD
-        unimplemented();
+        // FIXME
+        // describe_schema_versions is not implemented yet
+        // this is a work around
        std::vector<sp::mapper_list> res;
+        sp::mapper_list entry;
+        entry.key = boost::lexical_cast<std::string>(utils::fb_utilities::get_broadcast_address());
+        entry.value.push(service::get_local_storage_service().get_schema_version());
+        res.push_back(entry);
        return make_ready_future<json::json_return_type>(res);
    });

@@ -316,6 +325,29 @@ void set_storage_proxy(http_context& ctx, routes& r) {
    sp::get_read_metrics_latency_histogram.set(r, [&ctx](std::unique_ptr<request> req) {
        return sum_histogram_stats(ctx.sp, &proxy::stats::read);
    });
+
+    sp::get_read_estimated_histogram.set(r, [&ctx](std::unique_ptr<request> req) {
+        return sum_estimated_histogram(ctx, &proxy::stats::estimated_read);
+    });
+
+    sp::get_read_latency.set(r, [&ctx](std::unique_ptr<request> req) {
+        return total_latency(ctx, &proxy::stats::read);
+    });
+    sp::get_write_estimated_histogram.set(r, [&ctx](std::unique_ptr<request> req) {
+        return sum_estimated_histogram(ctx, &proxy::stats::estimated_write);
+    });
+
+    sp::get_write_latency.set(r, [&ctx](std::unique_ptr<request> req) {
+        return total_latency(ctx, &proxy::stats::write);
+    });
+
+    sp::get_range_estimated_histogram.set(r, [&ctx](std::unique_ptr<request> req) {
+        return sum_histogram_stats(ctx.sp, &proxy::stats::read);
+    });
+
+    sp::get_range_latency.set(r, [&ctx](std::unique_ptr<request> req) {
+        return total_latency(ctx, &proxy::stats::range);
+    });
 }

 }
--- a/api/storage_service.cc
+++ b/api/storage_service.cc
@@ -30,8 +30,6 @@
 #include "repair/repair.hh"
 #include "locator/snitch_base.hh"
 #include "column_family.hh"
-#include <unordered_map>
-#include "utils/fb_utilities.hh"

 namespace api {

@@ -45,6 +43,29 @@ static sstring validate_keyspace(http_context& ctx, const parameters& param) {
    throw bad_param_exception("Keyspace " + param["keyspace"] + " Does not exist");
 }

+
+static std::vector<ss::token_range> describe_ring(const sstring& keyspace) {
+    std::vector<ss::token_range> res;
+    for (auto d : service::get_local_storage_service().describe_ring(keyspace)) {
+        ss::token_range r;
+        r.start_token = d._start_token;
+        r.end_token = d._end_token;
+        r.endpoints = d._endpoints;
+        r.rpc_endpoints = d._rpc_endpoints;
+        for (auto det : d._endpoint_details) {
+            ss::endpoint_detail ed;
+            ed.host = det._host;
+            ed.datacenter = det._datacenter;
+            if (det._rack != "") {
+                ed.rack = det._rack;
+            }
+            r.endpoint_details.push(ed);
+        }
+        res.push_back(r);
+    }
+    return res;
+}
+
 void set_storage_service(http_context& ctx, routes& r) {
    ss::local_hostid.set(r, [](std::unique_ptr<request> req) {
        return db::system_keyspace::get_local_host_id().then([](const utils::UUID& id) {
@@ -52,28 +73,25 @@ void set_storage_service(http_context& ctx, routes& r) {
        });
    });

-    ss::get_tokens.set(r, [](std::unique_ptr<request> req) {
-        return service::sorted_tokens().then([](const std::vector<dht::token>& tokens) {
-            return make_ready_future<json::json_return_type>(container_to_vec(tokens));
-        });
+    ss::get_tokens.set(r, [] (const_req req) {
+        auto tokens = service::get_local_storage_service().get_token_metadata().sorted_tokens();
+        return container_to_vec(tokens);
    });

-    ss::get_node_tokens.set(r, [](std::unique_ptr<request> req) {
-        gms::inet_address addr(req->param["endpoint"]);
-        return service::get_tokens(addr).then([](const std::vector<dht::token>& tokens) {
-            return make_ready_future<json::json_return_type>(container_to_vec(tokens));
-        });
+    ss::get_node_tokens.set(r, [] (const_req req) {
+        gms::inet_address addr(req.param["endpoint"]);
+        auto tokens = service::get_local_storage_service().get_token_metadata().get_tokens(addr);
+        return container_to_vec(tokens);
    });

    ss::get_commitlog.set(r, [&ctx](const_req req) {
        return ctx.db.local().commitlog()->active_config().commit_log_location;
    });

-    ss::get_token_endpoint.set(r, [](std::unique_ptr<request> req) {
-        return service::get_token_to_endpoint().then([] (const std::map<dht::token, gms::inet_address>& tokens){
-            std::vector<storage_service_json::mapper> res;
-            return make_ready_future<json::json_return_type>(map_to_key_value(tokens, res));
-        });
+    ss::get_token_endpoint.set(r, [] (const_req req) {
+        auto token_to_ep = service::get_local_storage_service().get_token_metadata().get_token_to_endpoint();
+        std::vector<storage_service_json::mapper> res;
+        return map_to_key_value(token_to_ep, res);
    });

    ss::get_leaving_nodes.set(r, [](const_req req) {
@@ -130,12 +148,13 @@ void set_storage_service(http_context& ctx, routes& r) {
        return make_ready_future<json::json_return_type>(res);
    });

-    ss::describe_ring_jmx.set(r, [&ctx](std::unique_ptr<request> req) {
-        //TBD
-        unimplemented();
-        auto keyspace = validate_keyspace(ctx, req->param);
-        std::vector<sstring> res;
-        return make_ready_future<json::json_return_type>(res);
+    ss::describe_any_ring.set(r, [&ctx](const_req req) {
+        return describe_ring("");
+    });
+
+    ss::describe_ring.set(r, [&ctx](const_req req) {
+        auto keyspace = validate_keyspace(ctx, req.param);
+        return describe_ring(keyspace);
    });

    ss::get_host_id_map.set(r, [](const_req req) {
@@ -148,74 +167,82 @@ void set_storage_service(http_context& ctx, routes& r) {
        return get_cf_stats(ctx, &column_family::stats::live_disk_space_used);
    });

-    ss::get_load_map.set(r, [&ctx](std::unique_ptr<request> req) {
-        // FIXME
-        // The function should return a mapping between inet address
-        // and the load (disk space used)
-        // in origin the implementation is based on the load broadcast
-        // we do not currently support.
-        // As a workaround, the local load is calculated (this part is similar
-        // to origin) and a map with a single entry is return.
-        return ctx.db.map_reduce0([](database& db) {
-            int64_t res = 0;
-            for (auto i : db.get_column_families()) {
-                res += i.second->get_stats().live_disk_space_used;
-            }
-            return res;
-        }, 0, std::plus<int64_t>()).then([](int64_t size) {
+    ss::get_load_map.set(r, [] (std::unique_ptr<request> req) {
+        return service::get_local_storage_service().get_load_map().then([] (auto&& load_map) {
            std::vector<ss::mapper> res;
-            std::unordered_map<gms::inet_address, double> load_map;
-            load_map[utils::fb_utilities::get_broadcast_address()] = size;
            return make_ready_future<json::json_return_type>(map_to_key_value(load_map, res));
        });
    });

    ss::get_current_generation_number.set(r, [](std::unique_ptr<request> req) {
        gms::inet_address ep(utils::fb_utilities::get_broadcast_address());
-        return gms::get_current_generation_number(ep).then([](int res) {
+        return gms::get_local_gossiper().get_current_generation_number(ep).then([](int res) {
            return make_ready_future<json::json_return_type>(res);
        });
    });

-    ss::get_natural_endpoints.set(r, [&ctx](std::unique_ptr<request> req) {
-        //TBD
-        unimplemented();
-        auto keyspace = validate_keyspace(ctx, req->param);
-        auto column_family = req->get_query_param("cf");
-        auto key = req->get_query_param("key");
-
-        std::vector<sstring> res;
-        return make_ready_future<json::json_return_type>(res);
+    ss::get_natural_endpoints.set(r, [&ctx](const_req req) {
+        auto keyspace = validate_keyspace(ctx, req.param);
+        return container_to_vec(service::get_local_storage_service().get_natural_endpoints(keyspace, req.get_query_param("cf"),
+                req.get_query_param("key")));
    });

    ss::get_snapshot_details.set(r, [](std::unique_ptr<request> req) {
-        //TBD
-        unimplemented();
-        std::vector<ss::snapshots> res;
-        return make_ready_future<json::json_return_type>(res);
+        return service::get_local_storage_service().get_snapshot_details().then([] (auto result) {
+            std::vector<ss::snapshots> res;
+            for (auto& map: result) {
+                ss::snapshots all_snapshots;
+                all_snapshots.key = map.first;
+
+                std::vector<ss::snapshot> snapshot;
+                for (auto& cf: map.second) {
+                    ss::snapshot s;
+                    s.ks = cf.ks;
+                    s.cf = cf.cf;
+                    s.live = cf.live;
+                    s.total = cf.total;
+                    snapshot.push_back(std::move(s));
+                }
+                all_snapshots.value = std::move(snapshot);
+                res.push_back(std::move(all_snapshots));
+            }
+            return make_ready_future<json::json_return_type>(std::move(res));
+        });
    });

    ss::take_snapshot.set(r, [](std::unique_ptr<request> req) {
-        //TBD
-        unimplemented();
        auto tag = req->get_query_param("tag");
-        auto keyname = req->get_query_param("kn");
        auto column_family = req->get_query_param("cf");
-        return make_ready_future<json::json_return_type>(json_void());
+
+        std::vector<sstring> keynames = split(req->get_query_param("kn"), ",");
+
+        auto resp = make_ready_future<>();
+        if (column_family.empty()) {
+            resp = service::get_local_storage_service().take_snapshot(tag, keynames);
+        } else {
+            if (keynames.size() > 1) {
+                throw httpd::bad_param_exception("Only one keyspace allowed when specifying a column family");
+            }
+            resp = service::get_local_storage_service().take_column_family_snapshot(keynames[0], column_family, tag);
+        }
+        return resp.then([] {
+            return make_ready_future<json::json_return_type>(json_void());
+        });
    });

    ss::del_snapshot.set(r, [](std::unique_ptr<request> req) {
-        //TBD
-        unimplemented();
        auto tag = req->get_query_param("tag");
-        auto keyname = req->get_query_param("kn");
-        return make_ready_future<json::json_return_type>(json_void());
+
+        std::vector<sstring> keynames = split(req->get_query_param("kn"), ",");
+        return service::get_local_storage_service().clear_snapshot(tag, keynames).then([] {
+            return make_ready_future<json::json_return_type>(json_void());
+        });
    });

    ss::true_snapshots_size.set(r, [](std::unique_ptr<request> req) {
-        //TBD
-        unimplemented();
-        return make_ready_future<json::json_return_type>(0);
+        return service::get_local_storage_service().true_snapshots_size().then([] (int64_t size) {
+            return make_ready_future<json::json_return_type>(size);
+        });
    });

    ss::force_keyspace_compaction.set(r, [&ctx](std::unique_ptr<request> req) {
@@ -239,10 +266,14 @@ void set_storage_service(http_context& ctx, routes& r) {

    ss::force_keyspace_cleanup.set(r, [&ctx](std::unique_ptr<request> req) {
        //TBD
-        unimplemented();
+        // FIXME
+        // the nodetool clean up is used in many tests
+        // this workaround willl let it work until
+        // a cleanup is implemented
+        warn(unimplemented::cause::API);
        auto keyspace = validate_keyspace(ctx, req->param);
        auto column_family = req->get_query_param("cf");
-        return make_ready_future<json::json_return_type>(json_void());
+        return make_ready_future<json::json_return_type>(0);
    });

    ss::scrub.set(r, [&ctx](std::unique_ptr<request> req) {
@@ -330,17 +361,16 @@ void set_storage_service(http_context& ctx, routes& r) {
        });
    });

-    ss::move.set(r, [](std::unique_ptr<request> req) {
-        //TBD
-        unimplemented();
+    ss::move.set(r, [] (std::unique_ptr<request> req) {
        auto new_token = req->get_query_param("new_token");
-        return make_ready_future<json::json_return_type>(json_void());
+        return service::get_local_storage_service().move(new_token).then([] {
+            return make_ready_future<json::json_return_type>(json_void());
+        });
    });

    ss::remove_node.set(r, [](std::unique_ptr<request> req) {
-        // FIXME: This api is incorrect. remove_node takes a host id string parameter instead of token.
-        auto host_id = req->get_query_param("token");
-        return service::get_local_storage_service().remove_node(std::move(host_id)).then([] {
+        auto host_id = req->get_query_param("host_id");
+        return service::get_local_storage_service().remove_node(host_id).then([] {
            return make_ready_future<json::json_return_type>(json_void());
        });
    });
@@ -440,8 +470,10 @@ void set_storage_service(http_context& ctx, routes& r) {
        return make_ready_future<json::json_return_type>(json_void());
    });

-    ss::is_initialized.set(r, [](const_req req) {
-        return service::get_local_storage_service().is_initialized();
+    ss::is_initialized.set(r, [](std::unique_ptr<request> req) {
+        return service::get_local_storage_service().is_initialized().then([] (bool initialized) {
+            return make_ready_future<json::json_return_type>(initialized);
+        });
    });

    ss::stop_rpc_server.set(r, [](std::unique_ptr<request> req) {
@@ -456,8 +488,10 @@ void set_storage_service(http_context& ctx, routes& r) {
        });
    });

-    ss::is_rpc_server_running.set(r, [](const_req req) {
-        return service::get_local_storage_service().is_rpc_server_running();
+    ss::is_rpc_server_running.set(r, [] (std::unique_ptr<request> req) {
+        return service::get_local_storage_service().is_rpc_server_running().then([] (bool running) {
+            return make_ready_future<json::json_return_type>(running);
+        });
    });

    ss::start_native_transport.set(r, [](std::unique_ptr<request> req) {
@@ -472,8 +506,10 @@ void set_storage_service(http_context& ctx, routes& r) {
        });
    });

-    ss::is_native_transport_running.set(r, [](const_req req) {
-        return service::get_local_storage_service().is_native_transport_running();
+    ss::is_native_transport_running.set(r, [] (std::unique_ptr<request> req) {
+        return service::get_local_storage_service().is_native_transport_running().then([] (bool running) {
+            return make_ready_future<json::json_return_type>(running);
+        });
    });

    ss::join_ring.set(r, [](std::unique_ptr<request> req) {
@@ -482,8 +518,10 @@ void set_storage_service(http_context& ctx, routes& r) {
        });
    });

-    ss::is_joined.set(r, [](const_req req) {
-        return service::get_local_storage_service().is_joined();
+    ss::is_joined.set(r, [] (std::unique_ptr<request> req) {
+        return service::get_local_storage_service().is_joined().then([] (bool is_joined) {
+            return make_ready_future<json::json_return_type>(is_joined);
+        });
    });

    ss::set_stream_throughput_mb_per_sec.set(r, [](std::unique_ptr<request> req) {
@@ -575,11 +613,16 @@ void set_storage_service(http_context& ctx, routes& r) {
    });

    ss::load_new_ss_tables.set(r, [&ctx](std::unique_ptr<request> req) {
-        //TBD
-        unimplemented();
-        auto keyspace = validate_keyspace(ctx, req->param);
-        auto column_family = req->get_query_param("cf");
-        return make_ready_future<json::json_return_type>(json_void());
+        auto ks = validate_keyspace(ctx, req->param);
+        auto cf = req->get_query_param("cf");
+        // No need to add the keyspace, since all we want is to avoid always sending this to the same
+        // CPU. Even then I am being overzealous here. This is not something that happens all the time.
+        auto coordinator = std::hash<sstring>()(cf) % smp::count;
+        return service::get_storage_service().invoke_on(coordinator, [ks = std::move(ks), cf = std::move(cf)] (service::storage_service& s) {
+            return s.load_new_sstables(ks, cf);
+        }).then([] {
+            return make_ready_future<json::json_return_type>(json_void());
+        });
    });

    ss::sample_key_range.set(r, [](std::unique_ptr<request> req) {
@@ -631,16 +674,12 @@ void set_storage_service(http_context& ctx, routes& r) {
        return make_ready_future<json::json_return_type>(json_void());
      });

-    ss::get_cluster_name.set(r, [](std::unique_ptr<request> req) {
-        //TBD
-        unimplemented();
-        return make_ready_future<json::json_return_type>(json_void());
+    ss::get_cluster_name.set(r, [](const_req req) {
+        return gms::get_local_gossiper().get_cluster_name();
    });

-    ss::get_partitioner_name.set(r, [](std::unique_ptr<request> req) {
-        //TBD
-        unimplemented();
-        return make_ready_future<json::json_return_type>(json_void());
+    ss::get_partitioner_name.set(r, [](const_req req) {
+        return gms::get_local_gossiper().get_partitioner_name();
    });

    ss::get_tombstone_warn_threshold.set(r, [](std::unique_ptr<request> req) {
@@ -711,17 +750,19 @@ void set_storage_service(http_context& ctx, routes& r) {
        return make_ready_future<json::json_return_type>(0);
    });

-    ss::get_ownership.set(r, [](const_req req) {
-        auto tokens = service::get_local_storage_service().get_ownership();
-        std::vector<storage_service_json::mapper> res;
-        return map_to_key_value(tokens, res);
+    ss::get_ownership.set(r, [] (std::unique_ptr<request> req) {
+        return service::get_local_storage_service().get_ownership().then([] (auto&& ownership) {
+            std::vector<storage_service_json::mapper> res;
+            return make_ready_future<json::json_return_type>(map_to_key_value(ownership, res));
+        });
    });

-    ss::get_effective_ownership.set(r, [&ctx](const_req req) {
-        auto tokens = service::get_local_storage_service().effective_ownership(
-                (req.param["keyspace"] == "null")? "" : validate_keyspace(ctx, req.param));
-        std::vector<storage_service_json::mapper> res;
-        return map_to_key_value(tokens, res);
+    ss::get_effective_ownership.set(r, [&ctx] (std::unique_ptr<request> req) {
+        auto keyspace_name = req->param["keyspace"] == "null" ? "" : validate_keyspace(ctx, req->param);
+        return service::get_local_storage_service().effective_ownership(keyspace_name).then([] (auto&& ownership) {
+            std::vector<storage_service_json::mapper> res;
+            return make_ready_future<json::json_return_type>(map_to_key_value(ownership, res));
+        });
    });
 }

--- a/api/stream_manager.cc
+++ b/api/stream_manager.cc
@@ -24,6 +24,7 @@
 #include "streaming/stream_result_future.hh"
 #include "api/api-doc/stream_manager.json.hh"
 #include <vector>
+#include "gms/gossiper.hh"

 namespace api {

@@ -97,6 +98,80 @@ void set_stream_manager(http_context& ctx, routes& r) {
                    return make_ready_future<json::json_return_type>(res);
                });
            });
+
+    hs::get_all_active_streams_outbound.set(r, [](std::unique_ptr<request> req) {
+        return streaming::get_stream_manager().map_reduce0([](streaming::stream_manager& stream) {
+            return stream.get_initiated_streams().size();
+        }, 0, std::plus<int64_t>()).then([](int64_t res) {
+            return make_ready_future<json::json_return_type>(res);
+        });
+    });
+
+    hs::get_total_incoming_bytes.set(r, [](std::unique_ptr<request> req) {
+        gms::inet_address ep(req->param["peer"]);
+        utils::UUID plan_id = gms::get_local_gossiper().get_host_id(ep);
+        return streaming::get_stream_manager().map_reduce0([plan_id](streaming::stream_manager& stream) {
+            int64_t res = 0;
+            streaming::stream_result_future* s = stream.get_receiving_stream(plan_id).get();
+            if (s != nullptr) {
+                for (auto si: s->get_coordinator()->get_all_session_info()) {
+                    res += si.get_total_size_received();
+                }
+            }
+            return res;
+        }, 0, std::plus<int64_t>()).then([](int64_t res) {
+            return make_ready_future<json::json_return_type>(res);
+        });
+    });
+
+    hs::get_all_total_incoming_bytes.set(r, [](std::unique_ptr<request> req) {
+        return streaming::get_stream_manager().map_reduce0([](streaming::stream_manager& stream) {
+            int64_t res = 0;
+            for (auto s : stream.get_receiving_streams()) {
+                if (s.second.get() != nullptr) {
+                    for (auto si: s.second.get()->get_coordinator()->get_all_session_info()) {
+                        res += si.get_total_size_received();
+                    }
+                }
+            }
+            return res;
+        }, 0, std::plus<int64_t>()).then([](int64_t res) {
+            return make_ready_future<json::json_return_type>(res);
+        });
+    });
+
+    hs::get_total_outgoing_bytes.set(r, [](std::unique_ptr<request> req) {
+        gms::inet_address ep(req->param["peer"]);
+        utils::UUID plan_id = gms::get_local_gossiper().get_host_id(ep);
+        return streaming::get_stream_manager().map_reduce0([plan_id](streaming::stream_manager& stream) {
+            int64_t res = 0;
+            streaming::stream_result_future* s = stream.get_sending_stream(plan_id).get();
+            if (s != nullptr) {
+                for (auto si: s->get_coordinator()->get_all_session_info()) {
+                    res += si.get_total_size_received();
+                }
+            }
+            return res;
+        }, 0, std::plus<int64_t>()).then([](int64_t res) {
+            return make_ready_future<json::json_return_type>(res);
+        });
+    });
+
+    hs::get_all_total_outgoing_bytes.set(r, [](std::unique_ptr<request> req) {
+        return streaming::get_stream_manager().map_reduce0([](streaming::stream_manager& stream) {
+            int64_t res = 0;
+            for (auto s : stream.get_initiated_streams()) {
+                if (s.second.get() != nullptr) {
+                    for (auto si: s.second.get()->get_coordinator()->get_all_session_info()) {
+                        res += si.get_total_size_received();
+                    }
+                }
+            }
+            return res;
+        }, 0, std::plus<int64_t>()).then([](int64_t res) {
+            return make_ready_future<json::json_return_type>(res);
+        });
+    });
 }

 }
--- a/api/system.cc
+++ b/api/system.cc
@@ -0,0 +1,70 @@
+/*
+ * Copyright 2015 Cloudius Systems
+ */
+
+/*
+ * This file is part of Scylla.
+ *
+ * Scylla is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Scylla is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "api/api-doc/system.json.hh"
+#include "api/api.hh"
+
+#include "http/exception.hh"
+#include "log.hh"
+
+namespace api {
+
+namespace hs = httpd::system_json;
+
+void set_system(http_context& ctx, routes& r) {
+    hs::get_all_logger_names.set(r, [](const_req req) {
+        return logging::logger_registry().get_all_logger_names();
+    });
+
+    hs::set_all_logger_level.set(r, [](const_req req) {
+        try {
+            logging::log_level level = boost::lexical_cast<logging::log_level>(std::string(req.get_query_param("level")));
+            logging::logger_registry().set_all_loggers_level(level);
+        } catch (boost::bad_lexical_cast& e) {
+            throw bad_param_exception("Unknown logging level " + req.get_query_param("level"));
+        }
+        return json::json_void();
+    });
+
+    hs::get_logger_level.set(r, [](const_req req) {
+        try {
+            return logging::level_name(logging::logger_registry().get_logger_level(req.param["name"]));
+        } catch (std::out_of_range& e) {
+            throw bad_param_exception("Unknown logger name " + req.param["name"]);
+        }
+        // just to keep the compiler happy
+        return sstring();
+    });
+
+    hs::set_logger_level.set(r, [](const_req req) {
+        try {
+            logging::log_level level = boost::lexical_cast<logging::log_level>(std::string(req.get_query_param("level")));
+            logging::logger_registry().set_logger_level(req.param["name"], level);
+        } catch (std::out_of_range& e) {
+            throw bad_param_exception("Unknown logger name " + req.param["name"]);
+        } catch (boost::bad_lexical_cast& e) {
+            throw bad_param_exception("Unknown logging level " + req.get_query_param("level"));
+        }
+        return json::json_void();
+    });
+}
+
+}
--- a/api/system.hh
+++ b/api/system.hh
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2015 Cloudius Systems
+ */
+
+/*
+ * This file is part of Scylla.
+ *
+ * Scylla is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Scylla is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include "api.hh"
+
+namespace api {
+
+void set_system(http_context& ctx, routes& r);
+
+}
--- a/atomic_cell.hh
+++ b/atomic_cell.hh
@@ -234,6 +234,8 @@ public:
    friend std::ostream& operator<<(std::ostream& os, const atomic_cell& ac);
 };

+class collection_mutation_view;
+
 // Represents a mutation of a collection.  Actual format is determined by collection type,
 // and is:
 //   set:  list of atomic_cell
@@ -241,20 +243,30 @@ public:
 //   list: tbd, probably ugly
 class collection_mutation {
 public:
-    struct view {
-        bytes_view data;
-        bytes_view serialize() const { return data; }
-        static view from_bytes(bytes_view v) { return { v }; }
-    };
-    struct one {
-        managed_bytes data;
-        one() {}
-        one(managed_bytes b) : data(std::move(b)) {}
-        one(view v) : data(v.data) {}
-        operator view() const { return { data }; }
-    };
+    managed_bytes data;
+    collection_mutation() {}
+    collection_mutation(managed_bytes b) : data(std::move(b)) {}
+    collection_mutation(collection_mutation_view v);
+    operator collection_mutation_view() const;
 };

+class collection_mutation_view {
+public:
+    bytes_view data;
+    bytes_view serialize() const { return data; }
+    static collection_mutation_view from_bytes(bytes_view v) { return { v }; }
+};
+
+inline
+collection_mutation::collection_mutation(collection_mutation_view v)
+        : data(v.data) {
+}
+
+inline
+collection_mutation::operator collection_mutation_view() const {
+    return { data };
+}
+
 namespace db {
 template<typename T>
 class serializer;
@@ -274,15 +286,15 @@ public:
    atomic_cell_or_collection(atomic_cell ac) : _data(std::move(ac._data)) {}
    static atomic_cell_or_collection from_atomic_cell(atomic_cell data) { return { std::move(data._data) }; }
    atomic_cell_view as_atomic_cell() const { return atomic_cell_view::from_bytes(_data); }
-    atomic_cell_or_collection(collection_mutation::one cm) : _data(std::move(cm.data)) {}
+    atomic_cell_or_collection(collection_mutation cm) : _data(std::move(cm.data)) {}
    explicit operator bool() const {
        return !_data.empty();
    }
-    static atomic_cell_or_collection from_collection_mutation(collection_mutation::one data) {
+    static atomic_cell_or_collection from_collection_mutation(collection_mutation data) {
        return std::move(data.data);
    }
-    collection_mutation::view as_collection_mutation() const {
-        return collection_mutation::view{_data};
+    collection_mutation_view as_collection_mutation() const {
+        return collection_mutation_view{_data};
    }
    bytes_view serialize() const {
        return _data;
--- a/bytes_ostream.hh
+++ b/bytes_ostream.hh
@@ -33,8 +33,10 @@
 *
 */
 class bytes_ostream {
+public:
    using size_type = bytes::size_type;
    using value_type = bytes::value_type;
+private:
    static_assert(sizeof(value_type) == 1, "value_type is assumed to be one byte long");
    struct chunk {
        // FIXME: group fragment pointers to reduce pointer chasing when packetizing
@@ -117,13 +119,13 @@ private:
        };
    }
 public:
-    bytes_ostream()
+    bytes_ostream() noexcept
        : _begin()
        , _current(nullptr)
        , _size(0)
    { }

-    bytes_ostream(bytes_ostream&& o)
+    bytes_ostream(bytes_ostream&& o) noexcept
        : _begin(std::move(o._begin))
        , _current(o._current)
        , _size(o._size)
@@ -148,7 +150,7 @@ public:
        return *this;
    }

-    bytes_ostream& operator=(bytes_ostream&& o) {
+    bytes_ostream& operator=(bytes_ostream&& o) noexcept {
        _size = o._size;
        _begin = std::move(o._begin);
        _current = o._current;
--- a/caching_options.hh
+++ b/caching_options.hh
@@ -43,11 +43,13 @@ class caching_options {
            throw exceptions::configuration_exception("Invalid key value: " + k); 
        }

-        try {
-            boost::lexical_cast<unsigned long>(r);
-        } catch (boost::bad_lexical_cast& e) {
-            if ((r != "ALL") && (r != "NONE")) {
-                throw exceptions::configuration_exception("Invalid key value: " + k); 
+        if ((r == "ALL") || (r == "NONE")) {
+            return;
+        } else {
+            try {
+                boost::lexical_cast<unsigned long>(r);
+            } catch (boost::bad_lexical_cast& e) {
+                throw exceptions::configuration_exception("Invalid key value: " + r);
            }
        }
    }
--- a/compaction_strategy.hh
+++ b/compaction_strategy.hh
@@ -29,7 +29,8 @@ enum class compaction_strategy_type {
    null,
    major,
    size_tiered,
-    // FIXME: Add support to LevelTiered, and DateTiered.
+    leveled,
+    // FIXME: Add support to DateTiered.
 };

 class compaction_strategy_impl;
@@ -54,20 +55,26 @@ public:
            return "MajorCompactionStrategy";
        case compaction_strategy_type::size_tiered:
            return "SizeTieredCompactionStrategy";
+        case compaction_strategy_type::leveled:
+            return "LeveledCompactionStrategy";
        default:
            throw std::runtime_error("Invalid Compaction Strategy");
        }
    }

    static compaction_strategy_type type(const sstring& name) {
-        if (name == "NullCompactionStrategy") {
+        auto pos = name.find("org.apache.cassandra.db.compaction.");
+        sstring short_name = (pos == sstring::npos) ? name : name.substr(pos + 35);
+        if (short_name == "NullCompactionStrategy") {
            return compaction_strategy_type::null;
-        } else if (name == "MajorCompactionStrategy") {
+        } else if (short_name == "MajorCompactionStrategy") {
            return compaction_strategy_type::major;
-        } else if (name == "SizeTieredCompactionStrategy") {
+        } else if (short_name == "SizeTieredCompactionStrategy") {
            return compaction_strategy_type::size_tiered;
+        } else if (short_name == "LeveledCompactionStrategy") {
+            return compaction_strategy_type::leveled;
        } else {
-            throw exceptions::configuration_exception(sprint("Unable to find compaction strategy class 'org.apache.cassandra.db.compaction.%s", name));
+            throw exceptions::configuration_exception(sprint("Unable to find compaction strategy class '%s'", name));
        }
    }

--- a/compound.hh
+++ b/compound.hh
@@ -159,7 +159,7 @@ public:
        }
        return ::serialize_value(*this, values);
    }
-    bytes serialize_value_deep(const std::vector<boost::any>& values) {
+    bytes serialize_value_deep(const std::vector<data_value>& values) {
        // TODO: Optimize
        std::vector<bytes> partial;
        partial.reserve(values.size());
--- a/conf/cassandra-rackdc.properties
+++ b/conf/cassandra-rackdc.properties
@@ -0,0 +1,15 @@
+#
+# cassandra-rackdc.properties
+#
+# The lines may include white spaces at the beginning and the end.
+# The rack and data center names may also include white spaces.
+# All trailing and leading white spaces will be trimmed.
+#  
+# dc=my_data_center
+# rack=my_rack
+# prefer_local=<false | true>
+# dc_suffix=<Data Center name suffix, used by EC2SnitchXXX snitches>
+#
+
+
+ 
--- a/conf/scylla.yaml
+++ b/conf/scylla.yaml
@@ -409,15 +409,16 @@ partitioner: org.apache.cassandra.dht.Murmur3Partitioner
 #   offheap_objects: native memory, eliminating nio buffer heap overhead
 # memtable_allocation_type: heap_buffers

-# Total space to use for commitlogs.  Since commitlog segments are
-# mmapped, and hence use up address space, the default size is 32
-# on 32-bit JVMs, and 8192 on 64-bit JVMs.
+# Total space to use for commitlogs.
 #
 # If space gets above this value (it will round up to the next nearest
 # segment multiple), Scylla will flush every dirty CF in the oldest
 # segment and remove it.  So a small total commitlog space will tend
 # to cause more flush activity on less-active columnfamilies.
-commitlog_total_space_in_mb: 8192
+#
+# A value of -1 (default) will automatically equate it to the total amount of memory
+# available for Scylla.
+commitlog_total_space_in_mb: -1

 # This sets the amount of memtable flush writer threads.  These will
 # be blocked by disk io, and each one will hold a memtable in memory
--- a/configure.py
+++ b/configure.py
@@ -132,7 +132,7 @@ modes = {
    },
 }

-urchin_tests = [
+scylla_tests = [
    'tests/mutation_test',
    'tests/range_test',
    'tests/types_test',
@@ -152,6 +152,7 @@ urchin_tests = [
    'tests/cql_query_test',
    'tests/storage_proxy_test',
    'tests/mutation_reader_test',
+    'tests/key_reader_test',
    'tests/mutation_query_test',
    'tests/row_cache_test',
    'tests/test-serialization',
@@ -169,6 +170,7 @@ urchin_tests = [
    'tests/compound_test',
    'tests/config_test',
    'tests/gossiping_property_file_snitch_test',
+    'tests/ec2_snitch_test',
    'tests/snitch_reset_test',
    'tests/network_topology_strategy_test',
    'tests/query_processor_test',
@@ -180,13 +182,14 @@ urchin_tests = [
    'tests/logalloc_test',
    'tests/managed_vector_test',
    'tests/crc_test',
+    'tests/flush_queue_test',
 ]

 apps = [
    'scylla',
    ]

-tests = urchin_tests
+tests = scylla_tests

 all_artifacts = apps + tests

@@ -216,18 +219,20 @@ arg_parser.add_argument('--debuginfo', action = 'store', dest = 'debuginfo', typ
                        help = 'Enable(1)/disable(0)compiler debug information generation')
 arg_parser.add_argument('--static-stdc++', dest = 'staticcxx', action = 'store_true',
 			help = 'Link libgcc and libstdc++ statically')
+arg_parser.add_argument('--tests-debuginfo', action = 'store', dest = 'tests_debuginfo', type = int, default = 0,
+                        help = 'Enable(1)/disable(0)compiler debug information generation for tests')
 add_tristate(arg_parser, name = 'hwloc', dest = 'hwloc', help = 'hwloc support')
 add_tristate(arg_parser, name = 'xen', dest = 'xen', help = 'Xen support')
 args = arg_parser.parse_args()

 defines = []
-urchin_libs = '-llz4 -lsnappy -lz -lboost_thread -lcryptopp -lrt -lyaml-cpp -lboost_date_time'
+scylla_libs = '-llz4 -lsnappy -lz -lboost_thread -lcryptopp -lrt -lyaml-cpp -lboost_date_time'

 extra_cxxflags = {}

 cassandra_interface = Thrift(source = 'interface/cassandra.thrift', service = 'Cassandra')

-urchin_core = (['database.cc',
+scylla_core = (['database.cc',
                 'schema.cc',
                 'bytes.cc',
                 'mutation.cc',
@@ -242,6 +247,7 @@ urchin_core = (['database.cc',
                 'mutation_partition_serializer.cc',
                 'mutation_reader.cc',
                 'mutation_query.cc',
+                 'key_reader.cc',
                 'keys.cc',
                 'sstables/sstables.cc',
                 'sstables/compress.cc',
@@ -266,6 +272,8 @@ urchin_core = (['database.cc',
                 'cql3/maps.cc',
                 'cql3/functions/functions.cc',
                 'cql3/statements/cf_prop_defs.cc',
+                 'cql3/statements/cf_statement.cc',
+                 'cql3/statements/create_keyspace_statement.cc',
                 'cql3/statements/create_table_statement.cc',
                 'cql3/statements/drop_keyspace_statement.cc',
                 'cql3/statements/drop_table_statement.cc',
@@ -339,10 +347,12 @@ urchin_core = (['database.cc',
                 'gms/gossip_digest_ack.cc',
                 'gms/gossip_digest_ack2.cc',
                 'gms/endpoint_state.cc',
+                 'gms/application_state.cc',
                 'dht/i_partitioner.cc',
                 'dht/murmur3_partitioner.cc',
                 'dht/byte_ordered_partitioner.cc',
                 'dht/boot_strapper.cc',
+                 'dht/range_streamer.cc',
                 'unimplemented.cc',
                 'query.cc',
                 'query-result-set.cc',
@@ -356,9 +366,16 @@ urchin_core = (['database.cc',
                 'locator/simple_snitch.cc',
                 'locator/rack_inferring_snitch.cc',
                 'locator/gossiping_property_file_snitch.cc',
+                 'locator/production_snitch_base.cc',
+                 'locator/ec2_snitch.cc',
+                 'locator/ec2_multi_region_snitch.cc',
                 'message/messaging_service.cc',
                 'service/migration_task.cc',
                 'service/storage_service.cc',
+                 'service/pending_range_calculator_service.cc',
+                 'service/load_broadcaster.cc',
+                 'service/pager/paging_state.cc',
+                 'service/pager/query_pagers.cc',
                 'streaming/streaming.cc',
                 'streaming/stream_task.cc',
                 'streaming/stream_session.cc',
@@ -379,11 +396,13 @@ urchin_core = (['database.cc',
                 'streaming/messages/file_message_header.cc',
                 'streaming/messages/outgoing_file_message.cc',
                 'streaming/messages/incoming_file_message.cc',
+                 'streaming/stream_session_state.cc',
                 'gc_clock.cc',
                 'partition_slice_builder.cc',
                 'init.cc',
                 'repair/repair.cc',
                 'exceptions/exceptions.cc',
+                 'dns.cc',
                 ]
                + [Antlr3Grammar('cql3/Cql.g')]
                + [Thrift('interface/cassandra.thrift', 'Cassandra')]
@@ -419,22 +438,24 @@ api = ['api/api.cc',
       'api/lsa.cc',
       'api/api-doc/stream_manager.json',
       'api/stream_manager.cc',
+       'api/api-doc/system.json',
+       'api/system.cc'
       ]

-urchin_tests_dependencies = urchin_core + [
+scylla_tests_dependencies = scylla_core + [
    'tests/cql_test_env.cc',
    'tests/cql_assertions.cc',
    'tests/result_set_assertions.cc',
    'tests/mutation_source_test.cc',
 ]

-urchin_tests_seastar_deps = [
+scylla_tests_seastar_deps = [
    'seastar/tests/test-utils.cc',
    'seastar/tests/test_runner.cc',
 ]

 deps = {
-    'scylla': ['main.cc'] + urchin_core + api,
+    'scylla': ['main.cc'] + scylla_core + api,
 }

 tests_not_using_seastar_test_framework = set([
@@ -464,13 +485,13 @@ tests_not_using_seastar_test_framework = set([
 ])

 for t in tests_not_using_seastar_test_framework:
-    if not t in urchin_tests:
-        raise Exception("Test %s not found in urchin_tests" % (t))
+    if not t in scylla_tests:
+        raise Exception("Test %s not found in scylla_tests" % (t))

-for t in urchin_tests:
-    deps[t] = urchin_tests_dependencies + [t + '.cc']
+for t in scylla_tests:
+    deps[t] = scylla_tests_dependencies + [t + '.cc']
    if t not in tests_not_using_seastar_test_framework:
-        deps[t] += urchin_tests_seastar_deps
+        deps[t] += scylla_tests_seastar_deps

 deps['tests/sstable_test'] += ['tests/sstable_datafile_test.cc']

@@ -491,6 +512,7 @@ warnings = [w
 warnings = ' '.join(warnings)

 dbgflag = debug_flag(args.cxx) if args.debuginfo else ''
+tests_link_rule = 'link' if args.tests_debuginfo else 'link_stripped'

 if args.so:
    args.pie = '-shared'
@@ -531,7 +553,8 @@ if args.dpdk:
 elif args.dpdk_target:
    seastar_flags += ['--dpdk-target', args.dpdk_target]

-seastar_flags += ['--compiler', args.cxx, '--cflags=-march=nehalem']
+seastar_cflags = args.user_cflags + " -march=nehalem"
+seastar_flags += ['--compiler', args.cxx, '--cflags=%s' % (seastar_cflags)]

 status = subprocess.call(['./configure.py'] + seastar_flags, cwd = 'seastar')

@@ -665,7 +688,7 @@ with open(buildfile, 'w') as f:
                    # So we strip the tests by default; The user can very
                    # quickly re-link the test unstripped by adding a "_g"
                    # to the test name, e.g., "ninja build/release/testname_g"
-                    f.write('build $builddir/{}/{}: link_stripped.{} {} {}\n'.format(mode, binary, mode, str.join(' ', objs),
+                    f.write('build $builddir/{}/{}: {}.{} {} {}\n'.format(mode, binary, tests_link_rule, mode, str.join(' ', objs),
                                                                                     'seastar/build/{}/libseastar.a'.format(mode)))
                    if has_thrift:
                        f.write('   libs =  -lthrift -lboost_system $libs\n')
--- a/cql3/Cql.g
+++ b/cql3/Cql.g
@@ -856,7 +856,7 @@ dropIndexStatement returns [DropIndexStatement expr]
  * TRUNCATE <CF>;
  */
 truncateStatement returns [::shared_ptr<truncate_statement> stmt]
-    : K_TRUNCATE cf=columnFamilyName { $stmt = ::make_shared<truncate_statement>(cf); }
+    : K_TRUNCATE (K_COLUMNFAMILY)? cf=columnFamilyName { $stmt = ::make_shared<truncate_statement>(cf); }
    ;

 #if 0
@@ -1224,8 +1224,8 @@ properties[::shared_ptr<cql3::statements::property_definitions> props]
    ;

 property[::shared_ptr<cql3::statements::property_definitions> props]
-    : k=ident '=' (simple=propertyValue { try { $props->add_property(k->to_string(), simple); } catch (exceptions::syntax_exception e) { add_recognition_error(e.what()); } }
-                  |   map=mapLiteral    { try { $props->add_property(k->to_string(), convert_property_map(map)); } catch (exceptions::syntax_exception e) { add_recognition_error(e.what()); } })
+    : k=ident '=' simple=propertyValue { try { $props->add_property(k->to_string(), simple); } catch (exceptions::syntax_exception e) { add_recognition_error(e.what()); } }
+    | k=ident '=' map=mapLiteral { try { $props->add_property(k->to_string(), convert_property_map(map)); } catch (exceptions::syntax_exception e) { add_recognition_error(e.what()); } }
    ;

 propertyValue returns [sstring str]
--- a/cql3/attributes.cc
+++ b/cql3/attributes.cc
@@ -80,7 +80,7 @@ int64_t attributes::get_timestamp(int64_t now, const query_options& options) {
    } catch (marshal_exception e) {
        throw exceptions::invalid_request_exception("Invalid timestamp value");
    }
-    return boost::any_cast<int64_t>(data_type_for<int64_t>()->deserialize(*tval));
+    return value_cast<int64_t>(data_type_for<int64_t>()->deserialize(*tval));
 }

 int32_t attributes::get_time_to_live(const query_options& options) {
@@ -99,7 +99,7 @@ int32_t attributes::get_time_to_live(const query_options& options) {
        throw exceptions::invalid_request_exception("Invalid TTL value");
    }

-    auto ttl = boost::any_cast<int32_t>(data_type_for<int32_t>()->deserialize(*tval));
+    auto ttl = value_cast<int32_t>(data_type_for<int32_t>()->deserialize(*tval));
    if (ttl < 0) {
        throw exceptions::invalid_request_exception("A TTL must be greater or equal to 0");
    }
--- a/cql3/constants.cc
+++ b/cql3/constants.cc
@@ -160,7 +160,7 @@ void constants::deleter::execute(mutation& m, const exploded_clustering_prefix&
        auto ctype = static_pointer_cast<const collection_type_impl>(column.type);
        m.set_cell(prefix, column, atomic_cell_or_collection::from_collection_mutation(ctype->serialize_mutation_form(coll_m)));
    } else {
-        m.set_cell(prefix, column, params.make_dead_cell());
+        m.set_cell(prefix, column, make_dead_cell(params));
    }
 }

--- a/cql3/constants.hh
+++ b/cql3/constants.hh
@@ -197,7 +197,7 @@ public:

        virtual void execute(mutation& m, const exploded_clustering_prefix& prefix, const update_parameters& params) override {
            auto value = _t->bind_and_get(params._options);
-            auto cell = value ? params.make_cell(*value) : params.make_dead_cell();
+            auto cell = value ? make_cell(*value, params) : make_dead_cell(params);
            m.set_cell(prefix, column, std::move(cell));
        }
    };
--- a/cql3/cql_statement.hh
+++ b/cql3/cql_statement.hh
@@ -98,6 +98,10 @@ public:
        execute_internal(distributed<service::storage_proxy>& proxy, service::query_state& state, const query_options& options) = 0;

    virtual bool uses_function(const sstring& ks_name, const sstring& function_name) const = 0;
+
+    virtual bool depends_on_keyspace(const sstring& ks_name) const = 0;
+
+    virtual bool depends_on_column_family(const sstring& cf_name) const = 0;
 };

 }
--- a/cql3/functions/aggregate_fcts.hh
+++ b/cql3/functions/aggregate_fcts.hh
@@ -90,7 +90,7 @@ public:
        if (!values[0]) {
            return;
        }
-        _sum += boost::any_cast<Type>(data_type_for<Type>()->deserialize(*values[0]));
+        _sum += value_cast<Type>(data_type_for<Type>()->deserialize(*values[0]));
    }
 };

@@ -132,7 +132,7 @@ public:
            return;
        }
        ++_count;
-        _sum += boost::any_cast<Type>(data_type_for<Type>()->deserialize(*values[0]));
+        _sum += value_cast<Type>(data_type_for<Type>()->deserialize(*values[0]));
    }
 };

@@ -169,7 +169,7 @@ public:
        if (!values[0]) {
            return;
        }
-        auto val = boost::any_cast<Type>(data_type_for<Type>()->deserialize(*values[0]));
+        auto val = value_cast<Type>(data_type_for<Type>()->deserialize(*values[0]));
        if (!_max) {
            _max = val;
        } else {
@@ -216,7 +216,7 @@ public:
        if (!values[0]) {
            return;
        }
-        auto val = boost::any_cast<Type>(data_type_for<Type>()->deserialize(*values[0]));
+        auto val = value_cast<Type>(data_type_for<Type>()->deserialize(*values[0]));
        if (!_min) {
            _min = val;
        } else {
--- a/cql3/functions/functions.cc
+++ b/cql3/functions/functions.cc
@@ -50,6 +50,11 @@ functions::init() {
        if (type == cql3_type::varchar || type == cql3_type::blob) {
            continue;
        }
+        // counters are not supported yet
+        if (type->is_counter()) {
+            warn(unimplemented::cause::COUNTERS);
+            continue;
+        }

        declare(make_to_blob_function(type->get_type()));
        declare(make_from_blob_function(type->get_type()));
--- a/cql3/functions/time_uuid_fcts.hh
+++ b/cql3/functions/time_uuid_fcts.hh
@@ -71,10 +71,10 @@ make_min_timeuuid_fct() {
            return {};
        }
        auto ts_obj = timestamp_type->deserialize(*bb);
-        if (ts_obj.empty()) {
+        if (ts_obj.is_null()) {
            return {};
        }
-        auto ts = boost::any_cast<db_clock::time_point>(ts_obj);
+        auto ts = value_cast<db_clock::time_point>(ts_obj);
        auto uuid = utils::UUID_gen::min_time_UUID(ts.time_since_epoch().count());
        return {timeuuid_type->decompose(uuid)};
    });
@@ -91,10 +91,10 @@ make_max_timeuuid_fct() {
            return {};
        }
        auto ts_obj = timestamp_type->deserialize(*bb);
-        if (ts_obj.empty()) {
+        if (ts_obj.is_null()) {
            return {};
        }
-        auto ts = boost::any_cast<db_clock::time_point>(ts_obj);
+        auto ts = value_cast<db_clock::time_point>(ts_obj);
        auto uuid = utils::UUID_gen::max_time_UUID(ts.time_since_epoch().count());
        return {timeuuid_type->decompose(uuid)};
    });
--- a/cql3/functions/uuid_fcts.hh
+++ b/cql3/functions/uuid_fcts.hh
@@ -54,7 +54,7 @@ shared_ptr<function>
 make_uuid_fct() {
    return make_native_scalar_function<false>("uuid", uuid_type, {},
            [] (serialization_format sf, const std::vector<bytes_opt>& parameters) -> bytes_opt {
-        return {uuid_type->decompose(boost::any(utils::make_random_uuid()))};
+        return {uuid_type->decompose(utils::make_random_uuid())};
    });
 }

--- a/cql3/lists.cc
+++ b/cql3/lists.cc
@@ -113,12 +113,12 @@ lists::value::from_serialized(bytes_view v, list_type type, serialization_format
        // Collections have this small hack that validate cannot be called on a serialized object,
        // but compose does the validation (so we're fine).
        // FIXME: deserializeForNativeProtocol()?!
-        auto l = boost::any_cast<list_type_impl::native_type>(type->deserialize(v, sf));
+        auto l = value_cast<list_type_impl::native_type>(type->deserialize(v, sf));
        std::vector<bytes_opt> elements;
        elements.reserve(l.size());
        for (auto&& element : l) {
            // elements can be null in lists that represent a set of IN values
-            elements.push_back(element.empty() ? bytes_opt() : bytes_opt(type->get_elements_type()->decompose(element)));
+            elements.push_back(element.is_null() ? bytes_opt() : bytes_opt(type->get_elements_type()->decompose(element)));
        }
        return value(std::move(elements));
    } catch (marshal_exception& e) {
@@ -272,9 +272,9 @@ lists::setter_by_index::execute(mutation& m, const exploded_clustering_prefix& p

    auto existing_list_opt = params.get_prefetched_list(m.key(), row_key, column);
    if (!existing_list_opt) {
-        throw exceptions::invalid_request_exception(sprint("List index %d out of bound, list has size 0", idx));
+        throw exceptions::invalid_request_exception("Attempted to set an element on a list which is null");
    }
-    collection_mutation::view existing_list_ser = *existing_list_opt;
+    collection_mutation_view existing_list_ser = *existing_list_opt;
    auto ltype = dynamic_pointer_cast<const list_type_impl>(column.type);
    collection_type_impl::mutation_view existing_list = ltype->deserialize_mutation_form(existing_list_ser);
    // we verified that index is an int32_type
@@ -339,7 +339,7 @@ lists::do_append(shared_ptr<term> t,
        } else {
            auto&& to_add = list_value->_elements;
            auto deref = [] (const bytes_opt& v) { return *v; };
-            auto&& newv = collection_mutation::one{list_type_impl::pack(
+            auto&& newv = collection_mutation{list_type_impl::pack(
                    boost::make_transform_iterator(to_add.begin(), deref),
                    boost::make_transform_iterator(to_add.end(), deref),
                    to_add.size(), serialization_format::internal())};
@@ -448,7 +448,7 @@ lists::discarder_by_index::execute(mutation& m, const exploded_clustering_prefix
    auto&& existing_list = params.get_prefetched_list(m.key(), row_key, column);
    int32_t idx = read_simple_exactly<int32_t>(*cvalue->_bytes);
    if (!existing_list) {
-        throw exceptions::invalid_request_exception("List does  not exist");
+        throw exceptions::invalid_request_exception("Attempted to delete an element from a list which is null");
    }
    auto&& deserialized = ltype->deserialize_mutation_form(*existing_list);
    if (idx < 0 || size_t(idx) >= deserialized.cells.size()) {
--- a/cql3/maps.cc
+++ b/cql3/maps.cc
@@ -161,7 +161,7 @@ maps::value::from_serialized(bytes_view value, map_type type, serialization_form
        // Collections have this small hack that validate cannot be called on a serialized object,
        // but compose does the validation (so we're fine).
        // FIXME: deserialize_for_native_protocol?!
-        auto m = boost::any_cast<map_type_impl::native_type>(type->deserialize(value, sf));
+        auto m = value_cast<map_type_impl::native_type>(type->deserialize(value, sf));
        std::map<bytes, bytes, serialized_compare> map(type->get_keys_type()->as_less_comparator());
        for (auto&& e : m) {
            map.emplace(type->get_keys_type()->decompose(e.first),
@@ -350,10 +350,8 @@ maps::discarder_by_key::execute(mutation& m, const exploded_clustering_prefix& p
    if (!key) {
        throw exceptions::invalid_request_exception("Invalid null map key");
    }
-    auto ckey = dynamic_pointer_cast<constants::value>(std::move(key));
-    assert(ckey);
    collection_type_impl::mutation mut;
-    mut.cells.emplace_back(*ckey->_bytes, params.make_dead_cell());
+    mut.cells.emplace_back(*key->get(params._options), params.make_dead_cell());
    auto mtype = static_cast<const map_type_impl*>(column.type.get());
    m.set_cell(prefix, column, mtype->serialize_mutation_form(mut));
 }
--- a/cql3/operation.cc
+++ b/cql3/operation.cc
@@ -216,7 +216,7 @@ operation::element_deletion::prepare(database& db, const sstring& keyspace, cons
        return make_shared<lists::discarder_by_index>(receiver, std::move(idx));
    } else if (&ctype->_kind == &collection_type_impl::kind::set) {
        auto&& elt = _element->prepare(db, keyspace, sets::value_spec_of(receiver.column_specification));
-        return make_shared<sets::discarder>(receiver, std::move(elt));
+        return make_shared<sets::element_discarder>(receiver, std::move(elt));
    } else if (&ctype->_kind == &collection_type_impl::kind::map) {
        auto&& key = _element->prepare(db, keyspace, maps::key_spec_of(*receiver.column_specification));
        return make_shared<maps::discarder_by_key>(receiver, std::move(key));
--- a/cql3/operation.hh
+++ b/cql3/operation.hh
@@ -45,6 +45,7 @@
 #include "exceptions/exceptions.hh"
 #include "database_fwd.hh"
 #include "term.hh"
+#include "update_parameters.hh"

 #include <experimental/optional>

@@ -86,6 +87,14 @@ public:

    virtual ~operation() {}

+    atomic_cell make_dead_cell(const update_parameters& params) const {
+        return params.make_dead_cell();
+    }
+
+    atomic_cell make_cell(bytes_view value, const update_parameters& params) const {
+        return params.make_cell(value);
+    }
+
    virtual bool uses_function(const sstring& ks_name, const sstring& function_name) const {
        return _t && _t->uses_function(ks_name, function_name);
    }
--- a/cql3/query_processor.cc
+++ b/cql3/query_processor.cc
@@ -56,14 +56,15 @@ using namespace transport::messages;

 logging::logger log("query_processor");

+distributed<query_processor> _the_query_processor;
+
 const sstring query_processor::CQL_VERSION = "3.2.0";

 class query_processor::internal_state {
-    service::client_state _cs;
    service::query_state _qs;
 public:
    internal_state()
-            : _cs(service::client_state::internal_tag()), _qs(_cs) {
+            : _qs(service::client_state{service::client_state::internal_tag()}) {
    }
    operator service::query_state&() {
        return _qs;
@@ -72,14 +73,13 @@ public:
        return _qs;
    }
    operator service::client_state&() {
-        return _cs;
+        return _qs.get_client_state();
    }
    operator const service::client_state&() const {
-        return _cs;
+        return _qs.get_client_state();
    }
-
    api::timestamp_type next_timestamp() {
-        return _cs.get_timestamp();
+        return _qs.get_client_state().get_timestamp();
    }
 };

@@ -89,12 +89,23 @@ api::timestamp_type query_processor::next_timestamp() {

 query_processor::query_processor(distributed<service::storage_proxy>& proxy,
        distributed<database>& db)
-        : _proxy(proxy), _db(db), _internal_state(new internal_state()) {
+    : _migration_subscriber{std::make_unique<migration_subscriber>(this)}
+    , _proxy(proxy)
+    , _db(db)
+    , _internal_state(new internal_state())
+{
+    service::get_local_migration_manager().register_listener(_migration_subscriber.get());
 }

 query_processor::~query_processor()
 {}

+future<> query_processor::stop()
+{
+    service::get_local_migration_manager().unregister_listener(_migration_subscriber.get());
+    return make_ready_future<>();
+}
+
 future<::shared_ptr<result_message>>
 query_processor::process(const sstring_view& query_string, service::query_state& query_state, query_options& options)
 {
@@ -212,6 +223,11 @@ query_processor::store_prepared_statement(const std::experimental::string_view&
    }
 }

+void query_processor::invalidate_prepared_statement(bytes statement_id)
+{
+    _prepared_statements.erase(statement_id);
+}
+
 static bytes md5_calculate(const std::experimental::string_view& s)
 {
    constexpr size_t size = CryptoPP::Weak1::MD5::DIGESTSIZE;
@@ -284,7 +300,7 @@ query_processor::parse_statement(const sstring_view& query)

 query_options query_processor::make_internal_options(
        ::shared_ptr<statements::parsed_statement::prepared> p,
-        const std::initializer_list<boost::any>& values) {
+        const std::initializer_list<data_value>& values) {
    if (p->bound_names.size() != values.size()) {
        throw std::invalid_argument(sprint("Invalid number of values. Expecting %d but got %d", p->bound_names.size(), values.size()));
    }
@@ -292,9 +308,9 @@ query_options query_processor::make_internal_options(
    std::vector<bytes_opt> bound_values;
    for (auto& v : values) {
        auto& n = *ni++;
-        if (v.type() == typeid(bytes)) {
-            bound_values.push_back({boost::any_cast<bytes>(v)});
-        } else if (v.empty()) {
+        if (v.type() == bytes_type) {
+            bound_values.push_back({value_cast<bytes>(v)});
+        } else if (v.is_null()) {
            bound_values.push_back({});
        } else {
            bound_values.push_back({n->type->decompose(v)});
@@ -317,7 +333,7 @@ query_options query_processor::make_internal_options(

 future<::shared_ptr<untyped_result_set>> query_processor::execute_internal(
        const std::experimental::string_view& query_string,
-        const std::initializer_list<boost::any>& values) {
+        const std::initializer_list<data_value>& values) {
    auto p = prepare_internal(query_string);
    auto opts = make_internal_options(p, values);
    return do_with(std::move(opts),
@@ -338,5 +354,97 @@ query_processor::process_batch(::shared_ptr<statements::batch_statement> batch,
    return batch->execute(_proxy, query_state, options);
 }

+query_processor::migration_subscriber::migration_subscriber(query_processor* qp)
+    : _qp{qp}
+{
+}
+
+void query_processor::migration_subscriber::on_create_keyspace(const sstring& ks_name)
+{
+}
+
+void query_processor::migration_subscriber::on_create_column_family(const sstring& ks_name, const sstring& cf_name)
+{
+}
+
+void query_processor::migration_subscriber::on_create_user_type(const sstring& ks_name, const sstring& type_name)
+{
+}
+
+void query_processor::migration_subscriber::on_create_function(const sstring& ks_name, const sstring& function_name)
+{
+    log.warn("{} event ignored", __func__);
+}
+
+void query_processor::migration_subscriber::on_create_aggregate(const sstring& ks_name, const sstring& aggregate_name)
+{
+    log.warn("{} event ignored", __func__);
+}
+
+void query_processor::migration_subscriber::on_update_keyspace(const sstring& ks_name)
+{
+}
+
+void query_processor::migration_subscriber::on_update_column_family(const sstring& ks_name, const sstring& cf_name)
+{
+}
+
+void query_processor::migration_subscriber::on_update_user_type(const sstring& ks_name, const sstring& type_name)
+{
+}
+
+void query_processor::migration_subscriber::on_update_function(const sstring& ks_name, const sstring& function_name)
+{
+}
+
+void query_processor::migration_subscriber::on_update_aggregate(const sstring& ks_name, const sstring& aggregate_name)
+{
+}
+
+void query_processor::migration_subscriber::on_drop_keyspace(const sstring& ks_name)
+{
+    remove_invalid_prepared_statements(ks_name, std::experimental::nullopt);
+}
+
+void query_processor::migration_subscriber::on_drop_column_family(const sstring& ks_name, const sstring& cf_name)
+{
+    remove_invalid_prepared_statements(ks_name, cf_name);
+}
+
+void query_processor::migration_subscriber::on_drop_user_type(const sstring& ks_name, const sstring& type_name)
+{
+}
+
+void query_processor::migration_subscriber::on_drop_function(const sstring& ks_name, const sstring& function_name)
+{
+    log.warn("{} event ignored", __func__);
+}
+
+void query_processor::migration_subscriber::on_drop_aggregate(const sstring& ks_name, const sstring& aggregate_name)
+{
+    log.warn("{} event ignored", __func__);
+}
+
+void query_processor::migration_subscriber::remove_invalid_prepared_statements(sstring ks_name, std::experimental::optional<sstring> cf_name)
+{
+    std::vector<bytes> invalid;
+    for (auto& kv : _qp->_prepared_statements) {
+        auto id   = kv.first;
+        auto stmt = kv.second;
+        if (should_invalidate(ks_name, cf_name, stmt->statement)) {
+            invalid.emplace_back(id);
+        }
+    }
+    for (auto& id : invalid) {
+        get_query_processor().invoke_on_all([id] (auto& qp) {
+            qp.invalidate_prepared_statement(id);
+        });
+    }
+}
+
+bool query_processor::migration_subscriber::should_invalidate(sstring ks_name, std::experimental::optional<sstring> cf_name, ::shared_ptr<cql_statement> statement)
+{
+    return statement->depends_on_keyspace(ks_name) && (!cf_name || statement->depends_on_column_family(*cf_name));
+}

 }
--- a/cql3/query_processor.hh
+++ b/cql3/query_processor.hh
@@ -48,6 +48,7 @@
 #include "exceptions/exceptions.hh"
 #include "cql3/query_options.hh"
 #include "cql3/statements/cf_statement.hh"
+#include "service/migration_manager.hh"
 #include "service/query_state.hh"
 #include "log.hh"
 #include "core/distributed.hh"
@@ -61,7 +62,10 @@ class batch_statement;
 }

 class query_processor {
+public:
+    class migration_subscriber;
 private:
+    std::unique_ptr<migration_subscriber> _migration_subscriber;
    distributed<service::storage_proxy>& _proxy;
    distributed<database>& _db;

@@ -319,12 +323,12 @@ public:
 #endif
 private:
    ::shared_ptr<statements::parsed_statement::prepared> prepare_internal(const std::experimental::string_view& query);
-    query_options make_internal_options(::shared_ptr<statements::parsed_statement::prepared>, const std::initializer_list<boost::any>&);
+    query_options make_internal_options(::shared_ptr<statements::parsed_statement::prepared>, const std::initializer_list<data_value>&);

 public:
    future<::shared_ptr<untyped_result_set>> execute_internal(
            const std::experimental::string_view& query_string,
-            const std::initializer_list<boost::any>& = { });
+            const std::initializer_list<data_value>& = { });

    /*
     * This function provides a timestamp that is guaranteed to be higher than any timestamp
@@ -423,6 +427,8 @@ private:
    future<::shared_ptr<transport::messages::result_message::prepared>>
    store_prepared_statement(const std::experimental::string_view& query_string, const sstring& keyspace, ::shared_ptr<statements::parsed_statement::prepared> prepared, bool for_thrift);

+    void invalidate_prepared_statement(bytes statement_id);
+
 #if 0
    public ResultMessage processPrepared(CQLStatement statement, QueryState queryState, QueryOptions options)
    throws RequestExecutionException, RequestValidationException
@@ -461,111 +467,48 @@ public:
    {
        return meter.measureDeep(key);
    }
-
-    private static class MigrationSubscriber implements IMigrationListener
-    {
-        private void removeInvalidPreparedStatements(String ksName, String cfName)
-        {
-            removeInvalidPreparedStatements(preparedStatements.values().iterator(), ksName, cfName);
-            removeInvalidPreparedStatements(thriftPreparedStatements.values().iterator(), ksName, cfName);
-        }
-
-        private void removeInvalidPreparedStatements(Iterator<ParsedStatement.Prepared> iterator, String ksName, String cfName)
-        {
-            while (iterator.hasNext())
-            {
-                if (shouldInvalidate(ksName, cfName, iterator.next().statement))
-                    iterator.remove();
-            }
-        }
-
-        private boolean shouldInvalidate(String ksName, String cfName, CQLStatement statement)
-        {
-            String statementKsName;
-            String statementCfName;
-
-            if (statement instanceof ModificationStatement)
-            {
-                ModificationStatement modificationStatement = ((ModificationStatement) statement);
-                statementKsName = modificationStatement.keyspace();
-                statementCfName = modificationStatement.columnFamily();
-            }
-            else if (statement instanceof SelectStatement)
-            {
-                SelectStatement selectStatement = ((SelectStatement) statement);
-                statementKsName = selectStatement.keyspace();
-                statementCfName = selectStatement.columnFamily();
-            }
-            else
-            {
-                return false;
-            }
-
-            return ksName.equals(statementKsName) && (cfName == null || cfName.equals(statementCfName));
-        }
-
-        public void onCreateKeyspace(String ksName) { }
-        public void onCreateColumnFamily(String ksName, String cfName) { }
-        public void onCreateUserType(String ksName, String typeName) { }
-        public void onCreateFunction(String ksName, String functionName) {
-            if (Functions.getOverloadCount(new FunctionName(ksName, functionName)) > 1)
-            {
-                // in case there are other overloads, we have to remove all overloads since argument type
-                // matching may change (due to type casting)
-                removeInvalidPreparedStatementsForFunction(preparedStatements.values().iterator(), ksName, functionName);
-                removeInvalidPreparedStatementsForFunction(thriftPreparedStatements.values().iterator(), ksName, functionName);
-            }
-        }
-        public void onCreateAggregate(String ksName, String aggregateName) {
-            if (Functions.getOverloadCount(new FunctionName(ksName, aggregateName)) > 1)
-            {
-                // in case there are other overloads, we have to remove all overloads since argument type
-                // matching may change (due to type casting)
-                removeInvalidPreparedStatementsForFunction(preparedStatements.values().iterator(), ksName, aggregateName);
-                removeInvalidPreparedStatementsForFunction(thriftPreparedStatements.values().iterator(), ksName, aggregateName);
-            }
-        }
-
-        public void onUpdateKeyspace(String ksName) { }
-        public void onUpdateColumnFamily(String ksName, String cfName) { }
-        public void onUpdateUserType(String ksName, String typeName) { }
-        public void onUpdateFunction(String ksName, String functionName) { }
-        public void onUpdateAggregate(String ksName, String aggregateName) { }
-
-        public void onDropKeyspace(String ksName)
-        {
-            removeInvalidPreparedStatements(ksName, null);
-        }
-
-        public void onDropColumnFamily(String ksName, String cfName)
-        {
-            removeInvalidPreparedStatements(ksName, cfName);
-        }
-
-        public void onDropUserType(String ksName, String typeName) { }
-        public void onDropFunction(String ksName, String functionName) {
-            removeInvalidPreparedStatementsForFunction(preparedStatements.values().iterator(), ksName, functionName);
-            removeInvalidPreparedStatementsForFunction(thriftPreparedStatements.values().iterator(), ksName, functionName);
-        }
-        public void onDropAggregate(String ksName, String aggregateName)
-        {
-            removeInvalidPreparedStatementsForFunction(preparedStatements.values().iterator(), ksName, aggregateName);
-            removeInvalidPreparedStatementsForFunction(thriftPreparedStatements.values().iterator(), ksName, aggregateName);
-        }
-
-        private void removeInvalidPreparedStatementsForFunction(Iterator<ParsedStatement.Prepared> iterator,
-                                                                String ksName, String functionName)
-        {
-            while (iterator.hasNext())
-                if (iterator.next().statement.usesFunction(ksName, functionName))
-                    iterator.remove();
-        }
-    }
 #endif
 public:
-    future<> stop() {
-        return make_ready_future<>();
-    }
+    future<> stop();
+
+    friend class migration_subscriber;
 };

+class query_processor::migration_subscriber : public service::migration_listener {
+    query_processor* _qp;
+public:
+    migration_subscriber(query_processor* qp);
+
+    virtual void on_create_keyspace(const sstring& ks_name) override;
+    virtual void on_create_column_family(const sstring& ks_name, const sstring& cf_name) override;
+    virtual void on_create_user_type(const sstring& ks_name, const sstring& type_name) override;
+    virtual void on_create_function(const sstring& ks_name, const sstring& function_name) override;
+    virtual void on_create_aggregate(const sstring& ks_name, const sstring& aggregate_name) override;
+
+    virtual void on_update_keyspace(const sstring& ks_name) override;
+    virtual void on_update_column_family(const sstring& ks_name, const sstring& cf_name) override;
+    virtual void on_update_user_type(const sstring& ks_name, const sstring& type_name) override;
+    virtual void on_update_function(const sstring& ks_name, const sstring& function_name) override;
+    virtual void on_update_aggregate(const sstring& ks_name, const sstring& aggregate_name) override;
+
+    virtual void on_drop_keyspace(const sstring& ks_name) override;
+    virtual void on_drop_column_family(const sstring& ks_name, const sstring& cf_name) override;
+    virtual void on_drop_user_type(const sstring& ks_name, const sstring& type_name) override;
+    virtual void on_drop_function(const sstring& ks_name, const sstring& function_name) override;
+    virtual void on_drop_aggregate(const sstring& ks_name, const sstring& aggregate_name) override;
+private:
+    void remove_invalid_prepared_statements(sstring ks_name, std::experimental::optional<sstring> cf_name);
+    bool should_invalidate(sstring ks_name, std::experimental::optional<sstring> cf_name, ::shared_ptr<cql_statement> statement);
+};
+
+extern distributed<query_processor> _the_query_processor;
+
+inline distributed<query_processor>& get_query_processor() {
+    return _the_query_processor;
+}
+
+inline query_processor& get_local_query_processor() {
+    return _the_query_processor.local();
+}
+
 }
--- a/cql3/result_set.hh
+++ b/cql3/result_set.hh
@@ -80,7 +80,7 @@ public:

 private:
    const uint32_t _column_count;
-    ::shared_ptr<service::pager::paging_state> _paging_state;
+    ::shared_ptr<const service::pager::paging_state> _paging_state;

 public:
    metadata(std::vector<::shared_ptr<column_specification>> names_)
@@ -88,7 +88,7 @@ public:
    { }

    metadata(flag_enum_set flags, std::vector<::shared_ptr<column_specification>> names_, uint32_t column_count,
-            ::shared_ptr<service::pager::paging_state> paging_state)
+            ::shared_ptr<const service::pager::paging_state> paging_state)
        : _flags(flags)
        , names(std::move(names_))
        , _column_count(column_count)
@@ -121,7 +121,7 @@ private:
    }

 public:
-    void set_has_more_pages(::shared_ptr<service::pager::paging_state> paging_state) {
+    void set_has_more_pages(::shared_ptr<const service::pager::paging_state> paging_state) {
        if (!paging_state) {
            return;
        }
@@ -342,6 +342,10 @@ public:
        std::sort(_rows.begin(), _rows.end(), std::forward<RowComparator>(cmp));
    }

+    metadata& get_metadata() {
+        return *_metadata;
+    }
+
    const metadata& get_metadata() const {
        return *_metadata;
    }
--- a/cql3/selection/selection.cc
+++ b/cql3/selection/selection.cc
@@ -125,7 +125,7 @@ protected:
        }
    };

-    std::unique_ptr<selectors> new_selectors() {
+    std::unique_ptr<selectors> new_selectors() const override {
        return std::make_unique<simple_selectors>();
    }
 };
@@ -196,7 +196,7 @@ protected:
        }
    };

-    std::unique_ptr<selectors> new_selectors() {
+    std::unique_ptr<selectors> new_selectors() const override  {
        return std::make_unique<selectors_with_processing>(_factories);
    }
 };
@@ -252,7 +252,7 @@ selection::collect_metadata(schema_ptr schema, const std::vector<::shared_ptr<ra
    return r;
 }

-result_set_builder::result_set_builder(selection& s, db_clock::time_point now, serialization_format sf)
+result_set_builder::result_set_builder(const selection& s, db_clock::time_point now, serialization_format sf)
    : _result_set(std::make_unique<result_set>(::make_shared<metadata>(*(s.get_result_metadata()))))
    , _selectors(s.new_selectors())
    , _now(now)
@@ -295,7 +295,7 @@ void result_set_builder::add(const column_definition& def, const query::result_a
    }
 }

-void result_set_builder::add(const column_definition& def, collection_mutation::view c) {
+void result_set_builder::add(const column_definition& def, collection_mutation_view c) {
    auto&& ctype = static_cast<const collection_type_impl*>(def.type.get());
    current->emplace_back(ctype->to_value(c, _serialization_format));
    // timestamps, ttls meaningless for collections
@@ -330,6 +330,94 @@ std::unique_ptr<result_set> result_set_builder::build() {
    return std::move(_result_set);
 }

+result_set_builder::visitor::visitor(
+        cql3::selection::result_set_builder& builder, const schema& s,
+        const selection& selection)
+        : _builder(builder), _schema(s), _selection(selection), _row_count(0) {
+}
+
+void result_set_builder::visitor::add_value(const column_definition& def,
+        query::result_row_view::iterator_type& i) {
+    if (def.type->is_multi_cell()) {
+        auto cell = i.next_collection_cell();
+        if (!cell) {
+            _builder.add_empty();
+            return;
+        }
+        _builder.add(def, *cell);
+    } else {
+        auto cell = i.next_atomic_cell();
+        if (!cell) {
+            _builder.add_empty();
+            return;
+        }
+        _builder.add(def, *cell);
+    }
+}
+
+void result_set_builder::visitor::accept_new_partition(const partition_key& key,
+        uint32_t row_count) {
+    _partition_key = key.explode(_schema);
+    _row_count = row_count;
+}
+
+void result_set_builder::visitor::accept_new_partition(uint32_t row_count) {
+    _row_count = row_count;
+}
+
+void result_set_builder::visitor::accept_new_row(const clustering_key& key,
+        const query::result_row_view& static_row,
+        const query::result_row_view& row) {
+    _clustering_key = key.explode(_schema);
+    accept_new_row(static_row, row);
+}
+
+void result_set_builder::visitor::accept_new_row(
+        const query::result_row_view& static_row,
+        const query::result_row_view& row) {
+    auto static_row_iterator = static_row.iterator();
+    auto row_iterator = row.iterator();
+    _builder.new_row();
+    for (auto&& def : _selection.get_columns()) {
+        switch (def->kind) {
+        case column_kind::partition_key:
+            _builder.add(_partition_key[def->component_index()]);
+            break;
+        case column_kind::clustering_key:
+            _builder.add(_clustering_key[def->component_index()]);
+            break;
+        case column_kind::regular_column:
+            add_value(*def, row_iterator);
+            break;
+        case column_kind::compact_column:
+            add_value(*def, row_iterator);
+            break;
+        case column_kind::static_column:
+            add_value(*def, static_row_iterator);
+            break;
+        default:
+            assert(0);
+        }
+    }
+}
+
+void result_set_builder::visitor::accept_partition_end(
+        const query::result_row_view& static_row) {
+    if (_row_count == 0) {
+        _builder.new_row();
+        auto static_row_iterator = static_row.iterator();
+        for (auto&& def : _selection.get_columns()) {
+            if (def->is_partition_key()) {
+                _builder.add(_partition_key[def->component_index()]);
+            } else if (def->is_static()) {
+                add_value(*def, static_row_iterator);
+            } else {
+                _builder.add_empty();
+            }
+        }
+    }
+}
+
 api::timestamp_type result_set_builder::timestamp_of(size_t idx) {
    return _timestamps[idx];
 }
--- a/cql3/selection/selection.hh
+++ b/cql3/selection/selection.hh
@@ -161,7 +161,7 @@ public:
        return std::find(_columns.begin(), _columns.end(), &def) != _columns.end();
    }

-    ::shared_ptr<metadata> get_result_metadata() {
+    ::shared_ptr<metadata> get_result_metadata() const {
        return _metadata;
    }

@@ -186,16 +186,16 @@ private:
 public:
    static ::shared_ptr<selection> from_selectors(database& db, schema_ptr schema, const std::vector<::shared_ptr<raw_selector>>& raw_selectors);

-    virtual std::unique_ptr<selectors> new_selectors() = 0;
+    virtual std::unique_ptr<selectors> new_selectors() const = 0;

    /**
     * Returns a range of CQL3 columns this selection needs.
     */
-    auto const& get_columns() {
+    auto const& get_columns() const {
        return _columns;
    }

-    uint32_t get_column_count() {
+    uint32_t get_column_count() const {
        return _columns.size();
    }

@@ -238,15 +238,39 @@ private:
    const db_clock::time_point _now;
    serialization_format _serialization_format;
 public:
-    result_set_builder(selection& s, db_clock::time_point now, serialization_format sf);
+    result_set_builder(const selection& s, db_clock::time_point now, serialization_format sf);
    void add_empty();
    void add(bytes_opt value);
    void add(const column_definition& def, const query::result_atomic_cell_view& c);
-    void add(const column_definition& def, collection_mutation::view c);
+    void add(const column_definition& def, collection_mutation_view c);
    void new_row();
    std::unique_ptr<result_set> build();
    api::timestamp_type timestamp_of(size_t idx);
    int32_t ttl_of(size_t idx);
+    
+    // Implements ResultVisitor concept from query.hh
+    class visitor {
+    protected:
+        result_set_builder& _builder;
+        const schema& _schema;
+        const selection& _selection;
+        uint32_t _row_count;
+        std::vector<bytes> _partition_key;
+        std::vector<bytes> _clustering_key;
+    public:
+        visitor(cql3::selection::result_set_builder& builder, const schema& s, const selection&);
+        visitor(visitor&&) = default;
+
+        void add_value(const column_definition& def, query::result_row_view::iterator_type& i);
+        void accept_new_partition(const partition_key& key, uint32_t row_count);
+        void accept_new_partition(uint32_t row_count);
+        void accept_new_row(const clustering_key& key,
+                const query::result_row_view& static_row,
+                const query::result_row_view& row);
+        void accept_new_row(const query::result_row_view& static_row,
+                const query::result_row_view& row);
+        void accept_partition_end(const query::result_row_view& static_row);
+    };
 private:
    bytes_opt get_value(data_type t, query::result_atomic_cell_view c);
 };
--- a/cql3/sets.cc
+++ b/cql3/sets.cc
@@ -125,7 +125,7 @@ sets::value::from_serialized(bytes_view v, set_type type, serialization_format s
        // Collections have this small hack that validate cannot be called on a serialized object,
        // but compose does the validation (so we're fine).
        // FIXME: deserializeForNativeProtocol?!
-        auto s = boost::any_cast<set_type_impl::native_type>(type->deserialize(v, sf));
+        auto s = value_cast<set_type_impl::native_type>(type->deserialize(v, sf));
        std::set<bytes, serialized_compare> elements(type->get_elements_type()->as_less_comparator());
        for (auto&& element : s) {
            elements.insert(elements.end(), type->get_elements_type()->decompose(element));
@@ -284,16 +284,11 @@ sets::discarder::execute(mutation& m, const exploded_clustering_prefix& row_key,
    auto kill = [&] (bytes idx) {
        mut.cells.push_back({std::move(idx), params.make_dead_cell()});
    };
-    // This can be either a set or a single element
-    auto cvalue = dynamic_pointer_cast<constants::value>(value);
-    if (cvalue) {
-        kill(cvalue->_bytes ? *cvalue->_bytes : bytes());
-    } else {
-        auto svalue = static_pointer_cast<sets::value>(value);
-        mut.cells.reserve(svalue->_elements.size());
-        for (auto&& e : svalue->_elements) {
-            kill(e);
-        }
+    auto svalue = dynamic_pointer_cast<sets::value>(value);
+    assert(svalue);
+    mut.cells.reserve(svalue->_elements.size());
+    for (auto&& e : svalue->_elements) {
+        kill(e);
    }
    auto ctype = static_pointer_cast<const collection_type_impl>(column.type);
    m.set_cell(row_key, column,
@@ -301,4 +296,17 @@ sets::discarder::execute(mutation& m, const exploded_clustering_prefix& row_key,
                    ctype->serialize_mutation_form(mut)));
 }

+void sets::element_discarder::execute(mutation& m, const exploded_clustering_prefix& row_key, const update_parameters& params)
+{
+    assert(column.type->is_multi_cell() && "Attempted to remove items from a frozen set");
+    auto elt = _t->bind(params._options);
+    if (!elt) {
+        throw exceptions::invalid_request_exception("Invalid null set element");
+    }
+    collection_type_impl::mutation mut;
+    mut.cells.emplace_back(*elt->get(params._options), params.make_dead_cell());
+    auto ctype = static_pointer_cast<const collection_type_impl>(column.type);
+    m.set_cell(row_key, column, ctype->serialize_mutation_form(mut));
+}
+
 }
--- a/cql3/sets.hh
+++ b/cql3/sets.hh
@@ -133,6 +133,13 @@ public:
        }
        virtual void execute(mutation& m, const exploded_clustering_prefix& row_key, const update_parameters& params) override;
    };
+
+    class element_discarder : public operation {
+    public:
+        element_discarder(const column_definition& column, shared_ptr<term> t)
+            : operation(column, std::move(t)) { }
+        virtual void execute(mutation& m, const exploded_clustering_prefix& row_key, const update_parameters& params) override;
+    };
 };

 }
--- a/cql3/statements/batch_statement.cc
+++ b/cql3/statements/batch_statement.cc
@@ -45,6 +45,16 @@ namespace statements {

 logging::logger batch_statement::_logger("BatchStatement");

+bool batch_statement::depends_on_keyspace(const sstring& ks_name) const
+{
+    return false;
+}
+
+bool batch_statement::depends_on_column_family(const sstring& cf_name) const
+{
+    return false;
+}
+
 }

 }
--- a/cql3/statements/batch_statement.hh
+++ b/cql3/statements/batch_statement.hh
@@ -96,6 +96,10 @@ public:
                || boost::algorithm::any_of(_statements, [&] (auto&& s) { return s->uses_function(ks_name, function_name); });
    }

+    virtual bool depends_on_keyspace(const sstring& ks_name) const override;
+
+    virtual bool depends_on_column_family(const sstring& cf_name) const override;
+
    virtual uint32_t get_bound_terms() override {
        return _bound_terms;
    }
--- a/cql3/statements/cf_statement.cc
+++ b/cql3/statements/cf_statement.cc
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright 2014-2015 ScyllaDB
+ *
+ * Modified by ScyllaDB
+ */
+
+/*
+ * This file is part of Scylla.
+ *
+ * Scylla is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Scylla is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "cql3/statements/cf_statement.hh"
+
+namespace cql3 {
+
+namespace statements {
+
+cf_statement::cf_statement(::shared_ptr<cf_name> cf_name)
+    : _cf_name(std::move(cf_name))
+{
+}
+
+void cf_statement::prepare_keyspace(const service::client_state& state)
+{
+    if (!_cf_name->has_keyspace()) {
+        // XXX: We explicitely only want to call state.getKeyspace() in this case, as we don't want to throw
+        // if not logged in any keyspace but a keyspace is explicitely set on the statement. So don't move
+        // the call outside the 'if' or replace the method by 'prepareKeyspace(state.getKeyspace())'
+        _cf_name->set_keyspace(state.get_keyspace(), true);
+    }
+}
+
+void cf_statement::prepare_keyspace(sstring keyspace)
+{
+    if (!_cf_name->has_keyspace()) {
+        _cf_name->set_keyspace(keyspace, true);
+    }
+}
+
+const sstring& cf_statement::keyspace() const
+{
+    assert(_cf_name->has_keyspace()); // "The statement hasn't be prepared correctly";
+    return _cf_name->get_keyspace();
+}
+
+const sstring& cf_statement::column_family() const
+{
+    return _cf_name->get_column_family();
+}
+
+}
+
+}
--- a/cql3/statements/cf_statement.hh
+++ b/cql3/statements/cf_statement.hh
@@ -57,35 +57,16 @@ class cf_statement : public parsed_statement {
 protected:
    ::shared_ptr<cf_name> _cf_name;

-    cf_statement(::shared_ptr<cf_name> cf_name)
-        : _cf_name(std::move(cf_name))
-    { }
-
+    cf_statement(::shared_ptr<cf_name> cf_name);
 public:
-    virtual void prepare_keyspace(const service::client_state& state) {
-        if (!_cf_name->has_keyspace()) {
-            // XXX: We explicitely only want to call state.getKeyspace() in this case, as we don't want to throw
-            // if not logged in any keyspace but a keyspace is explicitely set on the statement. So don't move
-            // the call outside the 'if' or replace the method by 'prepareKeyspace(state.getKeyspace())'
-            _cf_name->set_keyspace(state.get_keyspace(), true);
-        }
-    }
+    virtual void prepare_keyspace(const service::client_state& state);

    // Only for internal calls, use the version with ClientState for user queries
-    virtual void prepare_keyspace(sstring keyspace) {
-        if (!_cf_name->has_keyspace()) {
-            _cf_name->set_keyspace(keyspace, true);
-        }
-    }
+    virtual void prepare_keyspace(sstring keyspace);

-    virtual const sstring& keyspace() const {
-        assert(_cf_name->has_keyspace()); // "The statement hasn't be prepared correctly";
-        return _cf_name->get_keyspace();
-    }
+    virtual const sstring& keyspace() const;

-    virtual const sstring& column_family() const {
-        return _cf_name->get_column_family();
-    }
+    virtual const sstring& column_family() const;
 };

 }
--- a/cql3/statements/create_keyspace_statement.cc
+++ b/cql3/statements/create_keyspace_statement.cc
@@ -0,0 +1,130 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright 2015 ScyllaDB
+ *
+ * Modified by ScyllaDB
+ */
+
+/*
+ * This file is part of Scylla.
+ *
+ * Scylla is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Scylla is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "cql3/statements/create_keyspace_statement.hh"
+
+#include "service/migration_manager.hh"
+
+#include <regex>
+
+namespace cql3 {
+
+namespace statements {
+
+create_keyspace_statement::create_keyspace_statement(const sstring& name, shared_ptr<ks_prop_defs> attrs, bool if_not_exists)
+    : _name{name}
+    , _attrs{attrs}
+    , _if_not_exists{if_not_exists}
+{
+}
+
+const sstring& create_keyspace_statement::keyspace() const
+{
+    return _name;
+}
+
+void create_keyspace_statement::check_access(const service::client_state& state)
+{
+    warn(unimplemented::cause::PERMISSIONS);
+#if 0
+    state.hasAllKeyspacesAccess(Permission.CREATE);
+#endif
+}
+
+void create_keyspace_statement::validate(distributed<service::storage_proxy>&, const service::client_state& state)
+{
+    std::string name;
+    name.resize(_name.length());
+    std::transform(_name.begin(), _name.end(), name.begin(), ::tolower);
+    if (name == db::system_keyspace::NAME) {
+        throw exceptions::invalid_request_exception("system keyspace is not user-modifiable");
+    }
+    // keyspace name
+    std::regex name_regex("\\w+");
+    if (!std::regex_match(name, name_regex)) {
+        throw exceptions::invalid_request_exception(sprint("\"%s\" is not a valid keyspace name", _name.c_str()));
+    }
+    if (name.length() > schema::NAME_LENGTH) {
+        throw exceptions::invalid_request_exception(sprint("Keyspace names shouldn't be more than %d characters long (got \"%s\")", schema::NAME_LENGTH, _name.c_str()));
+    }
+
+    _attrs->validate();
+
+    if (!bool(_attrs->get_replication_strategy_class())) {
+        throw exceptions::configuration_exception("Missing mandatory replication strategy class");
+    }
+#if 0
+    // The strategy is validated through KSMetaData.validate() in announceNewKeyspace below.
+    // However, for backward compatibility with thrift, this doesn't validate unexpected options yet,
+    // so doing proper validation here.
+    AbstractReplicationStrategy.validateReplicationStrategy(name,
+                                                            AbstractReplicationStrategy.getClass(attrs.getReplicationStrategyClass()),
+                                                            StorageService.instance.getTokenMetadata(),
+                                                            DatabaseDescriptor.getEndpointSnitch(),
+                                                            attrs.getReplicationOptions());
+#endif
+}
+
+future<bool> create_keyspace_statement::announce_migration(distributed<service::storage_proxy>& proxy, bool is_local_only)
+{
+    return make_ready_future<>().then([this, is_local_only] {
+        return service::get_local_migration_manager().announce_new_keyspace(_attrs->as_ks_metadata(_name), is_local_only);
+    }).then_wrapped([this] (auto&& f) {
+        try {
+            f.get();
+            return true;
+        } catch (const exceptions::already_exists_exception& e) {
+            if (_if_not_exists) {
+                return false;
+            }
+            throw e;
+        }
+    });
+}
+
+shared_ptr<transport::event::schema_change> create_keyspace_statement::change_event()
+{
+    return make_shared<transport::event::schema_change>(transport::event::schema_change::change_type::CREATED, keyspace());
+}
+
+}
+
+}
--- a/cql3/statements/create_keyspace_statement.hh
+++ b/cql3/statements/create_keyspace_statement.hh
@@ -41,11 +41,8 @@

 #pragma once

-#include <regex>
-
 #include "cql3/statements/schema_altering_statement.hh"
 #include "cql3/statements/ks_prop_defs.hh"
-#include "service/migration_manager.hh"
 #include "transport/event.hh"

 #include "core/shared_ptr.hh"
@@ -69,22 +66,11 @@ public:
     * @param name the name of the keyspace to create
     * @param attrs map of the raw keyword arguments that followed the <code>WITH</code> keyword.
     */
-    create_keyspace_statement(const sstring& name, shared_ptr<ks_prop_defs> attrs, bool if_not_exists)
-        : _name{name}
-        , _attrs{attrs}
-        , _if_not_exists{if_not_exists}
-    { }
+    create_keyspace_statement(const sstring& name, shared_ptr<ks_prop_defs> attrs, bool if_not_exists);

-    virtual const sstring& keyspace() const override {
-        return _name;
-    }
+    virtual const sstring& keyspace() const override;

-    virtual void check_access(const service::client_state& state) override {
-        warn(unimplemented::cause::PERMISSIONS);
-#if 0
-        state.hasAllKeyspacesAccess(Permission.CREATE);
-#endif
-    }
+    virtual void check_access(const service::client_state& state) override;

    /**
     * The <code>CqlParser</code> only goes as far as extracting the keyword arguments
@@ -93,58 +79,11 @@ public:
     *
     * @throws InvalidRequestException if arguments are missing or unacceptable
     */
-    virtual void validate(distributed<service::storage_proxy>&, const service::client_state& state) override {
-        std::string name;
-        name.resize(_name.length());
-        std::transform(_name.begin(), _name.end(), name.begin(), ::tolower);
-        if (name == db::system_keyspace::NAME) {
-            throw exceptions::invalid_request_exception("system keyspace is not user-modifiable");
-        }
-        // keyspace name
-        std::regex name_regex("\\w+");
-        if (!std::regex_match(name, name_regex)) {
-            throw exceptions::invalid_request_exception(sprint("\"%s\" is not a valid keyspace name", _name.c_str()));
-        }
-        if (name.length() > schema::NAME_LENGTH) {
-            throw exceptions::invalid_request_exception(sprint("Keyspace names shouldn't be more than %d characters long (got \"%s\")", schema::NAME_LENGTH, _name.c_str()));
-        }
+    virtual void validate(distributed<service::storage_proxy>&, const service::client_state& state) override;

-        _attrs->validate();
+    virtual future<bool> announce_migration(distributed<service::storage_proxy>& proxy, bool is_local_only) override;

-        if (!bool(_attrs->get_replication_strategy_class())) {
-            throw exceptions::configuration_exception("Missing mandatory replication strategy class");
-        }
-#if 0
-        // The strategy is validated through KSMetaData.validate() in announceNewKeyspace below.
-        // However, for backward compatibility with thrift, this doesn't validate unexpected options yet,
-        // so doing proper validation here.
-        AbstractReplicationStrategy.validateReplicationStrategy(name,
-                                                                AbstractReplicationStrategy.getClass(attrs.getReplicationStrategyClass()),
-                                                                StorageService.instance.getTokenMetadata(),
-                                                                DatabaseDescriptor.getEndpointSnitch(),
-                                                                attrs.getReplicationOptions());
-#endif
-    }
-
-    virtual future<bool> announce_migration(distributed<service::storage_proxy>& proxy, bool is_local_only) override {
-        return make_ready_future<>().then([this, is_local_only] {
-            return service::get_local_migration_manager().announce_new_keyspace(_attrs->as_ks_metadata(_name), is_local_only);
-        }).then_wrapped([this] (auto&& f) {
-            try {
-                f.get();
-                return true;
-            } catch (const exceptions::already_exists_exception& e) {
-                if (_if_not_exists) {
-                    return false;
-                }
-                throw e;
-            }
-        });
-    }
-
-    virtual shared_ptr<transport::event::schema_change> change_event() override {
-        return make_shared<transport::event::schema_change>(transport::event::schema_change::change_type::CREATED, keyspace());
-    }
+    virtual shared_ptr<transport::event::schema_change> change_event() override;
 };

 }
--- a/cql3/statements/modification_statement.cc
+++ b/cql3/statements/modification_statement.cc
@@ -130,9 +130,9 @@ public:
                    const query::result_row_view& row) {
        update_parameters::prefetch_data::row cells;

-        auto add_cell = [&cells] (column_id id, std::experimental::optional<collection_mutation::view>&& cell) {
+        auto add_cell = [&cells] (column_id id, std::experimental::optional<collection_mutation_view>&& cell) {
            if (cell) {
-                cells.emplace(id, collection_mutation::one{to_bytes(cell->data)});
+                cells.emplace(id, collection_mutation{to_bytes(cell->data)});
            }
        };

@@ -541,6 +541,14 @@ modification_statement::validate(distributed<service::storage_proxy>&, const ser
    }
 }

+bool modification_statement::depends_on_keyspace(const sstring& ks_name) const {
+    return keyspace() == ks_name;
+}
+
+bool modification_statement::depends_on_column_family(const sstring& cf_name) const {
+    return column_family() == cf_name;
+}
+
 }

 }
--- a/cql3/statements/modification_statement.hh
+++ b/cql3/statements/modification_statement.hh
@@ -188,6 +188,10 @@ public:

    void validate(distributed<service::storage_proxy>&, const service::client_state& state) override;

+    virtual bool depends_on_keyspace(const sstring& ks_name) const override;
+
+    virtual bool depends_on_column_family(const sstring& cf_name) const override;
+
    void add_operation(::shared_ptr<operation> op) {
        if (op->column.is_static()) {
            _sets_static_columns = true;
--- a/cql3/statements/schema_altering_statement.cc
+++ b/cql3/statements/schema_altering_statement.cc
@@ -47,6 +47,50 @@ namespace cql3 {

 namespace statements {

+schema_altering_statement::schema_altering_statement()
+    : cf_statement{::shared_ptr<cf_name>{}}
+    , _is_column_family_level{false}
+{
+}
+
+schema_altering_statement::schema_altering_statement(::shared_ptr<cf_name> name)
+    : cf_statement{std::move(name)}
+    , _is_column_family_level{true}
+{
+}
+
+bool schema_altering_statement::uses_function(const sstring& ks_name, const sstring& function_name) const
+{
+    return cf_statement::uses_function(ks_name, function_name);
+}
+
+bool schema_altering_statement::depends_on_keyspace(const sstring& ks_name) const
+{
+    return false;
+}
+
+bool schema_altering_statement::depends_on_column_family(const sstring& cf_name) const
+{
+    return false;
+}
+
+uint32_t schema_altering_statement::get_bound_terms()
+{
+    return 0;
+}
+
+void schema_altering_statement::prepare_keyspace(const service::client_state& state)
+{
+    if (_is_column_family_level) {
+        cf_statement::prepare_keyspace(state);
+    }
+}
+
+::shared_ptr<parsed_statement::prepared> schema_altering_statement::prepare(database& db)
+{
+    return ::make_shared<parsed_statement::prepared>(this->shared_from_this());
+}
+
 future<::shared_ptr<messages::result_message>>
 schema_altering_statement::execute0(distributed<service::storage_proxy>& proxy, service::query_state& state, const query_options& options, bool is_local_only) {
    // If an IF [NOT] EXISTS clause was used, this may not result in an actual schema change.  To avoid doing
--- a/cql3/statements/schema_altering_statement.hh
+++ b/cql3/statements/schema_altering_statement.hh
@@ -41,16 +41,7 @@

 #pragma once

-namespace transport {
-
-namespace messages {
-
-class result_message;
-
-}
-
-}
-
+#include "transport/messages_fwd.hh"
 #include "transport/event.hh"

 #include "cql3/statements/cf_statement.hh"
@@ -76,33 +67,21 @@ private:
    future<::shared_ptr<messages::result_message>>
    execute0(distributed<service::storage_proxy>& proxy, service::query_state& state, const query_options& options, bool);
 protected:
-    schema_altering_statement()
-        : cf_statement{::shared_ptr<cf_name>{}}
-        , _is_column_family_level{false}
-    { }
+    schema_altering_statement();

-    schema_altering_statement(::shared_ptr<cf_name> name)
-        : cf_statement{std::move(name)}
-        , _is_column_family_level{true}
-    { }
+    schema_altering_statement(::shared_ptr<cf_name> name);

-    virtual bool uses_function(const sstring& ks_name, const sstring& function_name) const override {
-        return cf_statement::uses_function(ks_name, function_name);
-    }
+    virtual bool uses_function(const sstring& ks_name, const sstring& function_name) const override;

-    virtual uint32_t get_bound_terms() override {
-        return 0;
-    }
+    virtual bool depends_on_keyspace(const sstring& ks_name) const override;

-    virtual void prepare_keyspace(const service::client_state& state) override {
-        if (_is_column_family_level) {
-            cf_statement::prepare_keyspace(state);
-        }
-    }
+    virtual bool depends_on_column_family(const sstring& cf_name) const override;

-    virtual ::shared_ptr<prepared> prepare(database& db) override {
-        return ::make_shared<parsed_statement::prepared>(this->shared_from_this());
-    }
+    virtual uint32_t get_bound_terms() override;
+
+    virtual void prepare_keyspace(const service::client_state& state) override;
+
+    virtual ::shared_ptr<prepared> prepare(database& db) override;

    virtual shared_ptr<transport::event::schema_change> change_event() = 0;

--- a/cql3/statements/select_statement.cc
+++ b/cql3/statements/select_statement.cc
@@ -46,6 +46,7 @@
 #include "core/shared_ptr.hh"
 #include "query-result-reader.hh"
 #include "query_result_merger.hh"
+#include "service/pager/query_pagers.hh"

 namespace cql3 {

@@ -106,6 +107,14 @@ void select_statement::validate(distributed<service::storage_proxy>&, const serv
    // Nothing to do, all validation has been done by raw_statemet::prepare()
 }

+bool select_statement::depends_on_keyspace(const sstring& ks_name) const {
+    return keyspace() == ks_name;
+}
+
+bool select_statement::depends_on_column_family(const sstring& cf_name) const {
+    return column_family() == cf_name;
+}
+
 query::partition_slice
 select_statement::make_partition_slice(const query_options& options) {
    std::vector<column_id> static_columns;
@@ -152,7 +161,7 @@ int32_t select_statement::get_limit(const query_options& options) const {

    try {
        int32_type->validate(*val);
-        auto l = boost::any_cast<int32_t>(int32_type->deserialize(*val));
+        auto l = value_cast<int32_t>(int32_type->deserialize(*val));
        if (l <= 0) {
            throw exceptions::invalid_request_exception("LIMIT must be strictly positive");
        }
@@ -187,37 +196,51 @@ select_statement::execute(distributed<service::storage_proxy>& proxy, service::q
        page_size = DEFAULT_COUNT_PAGE_SIZE;
    }

-    warn(unimplemented::cause::PAGING);
-    return execute(proxy, command, _restrictions->get_partition_key_ranges(options), state, options, now);
+    auto key_ranges = _restrictions->get_partition_key_ranges(options);

-#if 0
-    if (page_size <= 0 || !command || !query_pagers::may_need_paging(command, page_size)) {
-        return execute(proxy, command, state, options, now);
+    if (page_size <= 0
+            || !service::pager::query_pagers::may_need_paging(page_size,
+                    *command, key_ranges)) {
+        return execute(proxy, command, std::move(key_ranges), state, options,
+                now);
    }

-    auto pager = query_pagers::pager(command, cl, state.get_client_state(), options.get_paging_state());
+    auto p = service::pager::query_pagers::pager(_schema, _selection,
+            state, options, command, std::move(key_ranges));

-    if (selection->isAggregate()) {
-        return page_aggregate_query(pager, options, page_size, now);
+    if (_selection->is_aggregate()) {
+        return do_with(
+                cql3::selection::result_set_builder(*_selection, now,
+                        options.get_serialization_format()),
+                [p, page_size, now](auto& builder) {
+                    return do_until([p] {return p->is_exhausted();},
+                            [p, &builder, page_size, now] {
+                                return p->fetch_page(builder, page_size, now);
+                            }
+                    ).then([&builder] {
+                                auto rs = builder.build();
+                                auto msg = ::make_shared<transport::messages::result_message::rows>(std::move(rs));
+                                return make_ready_future<shared_ptr<transport::messages::result_message>>(std::move(msg));
+                            });
+                });
    }

-    // We can't properly do post-query ordering if we page (see #6722)
    if (needs_post_query_ordering()) {
        throw exceptions::invalid_request_exception(
-              "Cannot page queries with both ORDER BY and a IN restriction on the partition key;"
-              " you must either remove the ORDER BY or the IN and sort client side, or disable paging for this query");
+                "Cannot page queries with both ORDER BY and a IN restriction on the partition key;"
+                        " you must either remove the ORDER BY or the IN and sort client side, or disable paging for this query");
    }

-    return pager->fetch_page(page_size).then([this, pager, &options, limit, now] (auto page) {
-        auto msg = process_results(page, options, limit, now);
+    return p->fetch_page(page_size, now).then(
+            [this, p, &options, limit, now](std::unique_ptr<cql3::result_set> rs) {

-        if (!pager->is_exhausted()) {
-            msg->result->metadata->set_has_more_pages(pager->state());
-        }
+                if (!p->is_exhausted()) {
+                    rs->get_metadata().set_has_more_pages(p->state());
+                }

-        return msg;
-    });
-#endif
+                auto msg = ::make_shared<transport::messages::result_message::rows>(std::move(rs));
+                return make_ready_future<shared_ptr<transport::messages::result_message>>(std::move(msg));
+            });
 }

 future<shared_ptr<transport::messages::result_message>>
@@ -273,114 +296,18 @@ select_statement::execute_internal(distributed<service::storage_proxy>& proxy, s
    }
 }

-// Implements ResultVisitor concept from query.hh
-class result_set_building_visitor {
-    cql3::selection::result_set_builder& builder;
-    select_statement& stmt;
-    uint32_t _row_count;
-    std::vector<bytes> _partition_key;
-    std::vector<bytes> _clustering_key;
-public:
-    result_set_building_visitor(cql3::selection::result_set_builder& builder, select_statement& stmt)
-        : builder(builder)
-        , stmt(stmt)
-        , _row_count(0)
-    { }
-
-    void add_value(const column_definition& def, query::result_row_view::iterator_type& i) {
-        if (def.type->is_multi_cell()) {
-            auto cell = i.next_collection_cell();
-            if (!cell) {
-                builder.add_empty();
-                return;
-            }
-            builder.add(def, *cell);
-        } else {
-            auto cell = i.next_atomic_cell();
-            if (!cell) {
-                builder.add_empty();
-                return;
-            }
-            builder.add(def, *cell);
-        }
-    };
-
-    void accept_new_partition(const partition_key& key, uint32_t row_count) {
-        _partition_key = key.explode(*stmt._schema);
-        _row_count = row_count;
-    }
-
-    void accept_new_partition(uint32_t row_count) {
-        _row_count = row_count;
-    }
-
-    void accept_new_row(const clustering_key& key, const query::result_row_view& static_row,
-            const query::result_row_view& row) {
-        _clustering_key = key.explode(*stmt._schema);
-        accept_new_row(static_row, row);
-    }
-
-    void accept_new_row(const query::result_row_view& static_row, const query::result_row_view& row) {
-        auto static_row_iterator = static_row.iterator();
-        auto row_iterator = row.iterator();
-        builder.new_row();
-        for (auto&& def : stmt._selection->get_columns()) {
-            switch (def->kind) {
-                case column_kind::partition_key:
-                    builder.add(_partition_key[def->component_index()]);
-                    break;
-                case column_kind::clustering_key:
-                    builder.add(_clustering_key[def->component_index()]);
-                    break;
-                case column_kind::regular_column:
-                    add_value(*def, row_iterator);
-                    break;
-                case column_kind::compact_column:
-                    add_value(*def, row_iterator);
-                    break;
-                case column_kind::static_column:
-                    add_value(*def, static_row_iterator);
-                    break;
-                default:
-                    assert(0);
-            }
-        }
-    }
-
-    void accept_partition_end(const query::result_row_view& static_row) {
-        if (_row_count == 0) {
-            builder.new_row();
-            auto static_row_iterator = static_row.iterator();
-            for (auto&& def : stmt._selection->get_columns()) {
-                if (def->is_partition_key()) {
-                    builder.add(_partition_key[def->component_index()]);
-                } else if (def->is_static()) {
-                    add_value(*def, static_row_iterator);
-                } else {
-                    builder.add_empty();
-                }
-            }
-        }
-    }
-};
-
-shared_ptr<transport::messages::result_message>
-select_statement::process_results(foreign_ptr<lw_shared_ptr<query::result>> results, lw_shared_ptr<query::read_command> cmd,
-        const query_options& options, db_clock::time_point now) {
-    cql3::selection::result_set_builder builder(*_selection, now, options.get_serialization_format());
-
-    // FIXME: This special casing saves us the cost of copying an already
-    // linearized response. When we switch views to scattered_reader this will go away.
-    if (results->buf().is_linearized()) {
-        query::result_view view(results->buf().view());
-        view.consume(cmd->slice, result_set_building_visitor(builder, *this));
-    } else {
-        bytes_ostream w(results->buf());
-        query::result_view view(w.linearize());
-        view.consume(cmd->slice, result_set_building_visitor(builder, *this));
-    }
+shared_ptr<transport::messages::result_message> select_statement::process_results(
+        foreign_ptr<lw_shared_ptr<query::result>> results,
+        lw_shared_ptr<query::read_command> cmd, const query_options& options,
+        db_clock::time_point now) {

+    cql3::selection::result_set_builder builder(*_selection, now,
+            options.get_serialization_format());
+    query::result_view::consume(results->buf(), cmd->slice,
+            cql3::selection::result_set_builder::visitor(builder, *_schema,
+                    *_selection));
    auto rs = builder.build();
+
    if (needs_post_query_ordering()) {
        rs->sort(_ordering_comparator);
        if (_is_reversed) {
--- a/cql3/statements/select_statement.hh
+++ b/cql3/statements/select_statement.hh
@@ -63,7 +63,6 @@ namespace statements {
 *
 */
 class select_statement : public cql_statement {
-    friend class result_set_building_visitor;
 public:
    class parameters final {
    public:
@@ -132,6 +131,8 @@ public:
    virtual uint32_t get_bound_terms() override;
    virtual void check_access(const service::client_state& state) override;
    virtual void validate(distributed<service::storage_proxy>&, const service::client_state& state) override;
+    virtual bool depends_on_keyspace(const sstring& ks_name) const;
+    virtual bool depends_on_column_family(const sstring& cf_name) const;

    virtual future<::shared_ptr<transport::messages::result_message>> execute(distributed<service::storage_proxy>& proxy,
        service::query_state& state, const query_options& options) override;
@@ -192,18 +193,16 @@ public:
        QueryOptions options = QueryOptions.DEFAULT;
        return process(rows, options, getLimit(options), System.currentTimeMillis());
    }
-
-    public String keyspace()
-    {
-        return _schema.ks_name;
-    }
-
-    public String columnFamily()
-    {
-        return _schema.cfName;
-    }
 #endif

+    const sstring& keyspace() const {
+        return _schema->ks_name();
+    }
+
+    const sstring& column_family() const {
+        return _schema->cf_name();
+    }
+
    query::partition_slice make_partition_slice(const query_options& options);

 #if 0
--- a/cql3/statements/truncate_statement.cc
+++ b/cql3/statements/truncate_statement.cc
@@ -68,6 +68,16 @@ bool truncate_statement::uses_function(const sstring& ks_name, const sstring& fu
    return parsed_statement::uses_function(ks_name, function_name);
 }

+bool truncate_statement::depends_on_keyspace(const sstring& ks_name) const
+{
+    return false;
+}
+
+bool truncate_statement::depends_on_column_family(const sstring& cf_name) const
+{
+    return false;
+}
+
 void truncate_statement::check_access(const service::client_state& state)
 {
    warn(unimplemented::cause::AUTH);
--- a/cql3/statements/truncate_statement.hh
+++ b/cql3/statements/truncate_statement.hh
@@ -60,6 +60,10 @@ public:

    virtual bool uses_function(const sstring& ks_name, const sstring& function_name) const override;

+    virtual bool depends_on_keyspace(const sstring& ks_name) const override;
+
+    virtual bool depends_on_column_family(const sstring& cf_name) const override;
+
    virtual void check_access(const service::client_state& state) override;

    virtual void validate(distributed<service::storage_proxy>&, const service::client_state& state) override;
--- a/cql3/statements/use_statement.cc
+++ b/cql3/statements/use_statement.cc
@@ -47,6 +47,45 @@ namespace cql3 {

 namespace statements {

+use_statement::use_statement(sstring keyspace)
+    : _keyspace(keyspace)
+{
+}
+
+uint32_t use_statement::get_bound_terms()
+{
+    return 0;
+}
+
+::shared_ptr<parsed_statement::prepared> use_statement::prepare(database& db)
+{
+    return ::make_shared<parsed_statement::prepared>(this->shared_from_this());
+}
+
+bool use_statement::uses_function(const sstring& ks_name, const sstring& function_name) const
+{
+    return parsed_statement::uses_function(ks_name, function_name);
+}
+
+bool use_statement::depends_on_keyspace(const sstring& ks_name) const
+{
+    return false;
+}
+
+bool use_statement::depends_on_column_family(const sstring& cf_name) const
+{
+    return false;
+}
+
+void use_statement::check_access(const service::client_state& state)
+{
+    state.validate_login();
+}
+
+void use_statement::validate(distributed<service::storage_proxy>&, const service::client_state& state)
+{
+}
+
 future<::shared_ptr<transport::messages::result_message>>
 use_statement::execute(distributed<service::storage_proxy>& proxy, service::query_state& state, const query_options& options) {
    state.get_client_state().set_keyspace(proxy.local().get_db(), _keyspace);
--- a/cql3/statements/use_statement.hh
+++ b/cql3/statements/use_statement.hh
@@ -42,17 +42,9 @@
 #pragma once

 #include "cql3/statements/parsed_statement.hh"
+#include "transport/messages_fwd.hh"
 #include "cql3/cql_statement.hh"

-namespace transport {
-
-namespace messages {
-
-class result_message;
-
-}
-
-}
 namespace cql3 {

 namespace statements {
@@ -62,49 +54,27 @@ private:
    const sstring _keyspace;

 public:
-    use_statement(sstring keyspace)
-        : _keyspace(keyspace)
-    { }
+    use_statement(sstring keyspace);

-    virtual uint32_t get_bound_terms() override {
-        return 0;
-    }
+    virtual uint32_t get_bound_terms() override;

-    virtual ::shared_ptr<prepared> prepare(database& db) override {
-        return ::make_shared<parsed_statement::prepared>(this->shared_from_this());
-    }
+    virtual ::shared_ptr<prepared> prepare(database& db) override;

-    virtual bool uses_function(const sstring& ks_name, const sstring& function_name) const override {
-        return parsed_statement::uses_function(ks_name, function_name);
-    }
+    virtual bool uses_function(const sstring& ks_name, const sstring& function_name) const override;

-    virtual void check_access(const service::client_state& state) override {
-        state.validate_login();
-    }
+    virtual bool depends_on_keyspace(const sstring& ks_name) const override;

-    virtual void validate(distributed<service::storage_proxy>&, const service::client_state& state) override {
-    }
+    virtual bool depends_on_column_family(const sstring& cf_name) const override;
+
+    virtual void check_access(const service::client_state& state) override;
+
+    virtual void validate(distributed<service::storage_proxy>&, const service::client_state& state) override;

    virtual future<::shared_ptr<transport::messages::result_message>>
    execute(distributed<service::storage_proxy>& proxy, service::query_state& state, const query_options& options) override;

    virtual future<::shared_ptr<transport::messages::result_message>>
    execute_internal(distributed<service::storage_proxy>& proxy, service::query_state& state, const query_options& options) override;
-
-#if 0
-    virtual future<::shared_ptr<transport::messages::result_message>>
-    execute(distributed<service::storage_proxy>& proxy, service::query_state& state, const query_options& options) override {
-        state.get_client_state().set_keyspace(_keyspace);
-        auto result =::make_shared<transport::messages::result_message::set_keyspace>(_keyspace);
-        return make_ready_future<::shared_ptr<transport::messages::result_message>>(result);
-    }
-
-    virtual future<::shared_ptr<transport::messages::result_message>>
-    execute_internal(distributed<service::storage_proxy>& proxy, service::query_state& state, const query_options& options) override {
-        // Internal queries are exclusively on the system keyspace and 'use' is thus useless
-        throw std::runtime_error("unsupported operation");
-    }
-#endif
 };

 }
--- a/cql3/tuples.hh
+++ b/cql3/tuples.hh
@@ -259,7 +259,7 @@ public:
            try {
                // Collections have this small hack that validate cannot be called on a serialized object,
                // but the deserialization does the validation (so we're fine).
-                auto l = boost::any_cast<list_type_impl::native_type>(type->deserialize(value, options.get_serialization_format()));
+                auto l = value_cast<list_type_impl::native_type>(type->deserialize(value, options.get_serialization_format()));
                auto ttype = dynamic_pointer_cast<const tuple_type_impl>(type->get_elements_type());
                assert(ttype);

--- a/cql3/untyped_result_set.hh
+++ b/cql3/untyped_result_set.hh
@@ -66,7 +66,7 @@ public:
        }
        template<typename T>
        T get_as(const sstring& name) const {
-            return boost::any_cast<T>(data_type_for<T>()->deserialize(get_blob(name)));
+            return value_cast<T>(data_type_for<T>()->deserialize(get_blob(name)));
        }
        // this could maybe be done as an overload of get_as (or something), but that just
        // muddles things for no real gain. Let user (us) attempt to know what he is doing instead.
@@ -75,12 +75,12 @@ public:
                data_type_for<K>(), data_type valtype =
                data_type_for<V>()) const {
            auto vec =
-                    boost::any_cast<const map_type_impl::native_type&>(
+                    value_cast<map_type_impl::native_type>(
                            map_type_impl::get_instance(keytype, valtype, false)->deserialize(
                                    get_blob(name)));
            std::transform(vec.begin(), vec.end(), out,
                    [](auto& p) {
-                        return std::pair<K, V>(boost::any_cast<const K&>(p.first), boost::any_cast<const V&>(p.second));
+                        return std::pair<K, V>(value_cast<K>(p.first), value_cast<V>(p.second));
                    });
        }
        template<typename K, typename V, typename ... Rest>
--- a/cql3/update_parameters.cc
+++ b/cql3/update_parameters.cc
@@ -43,7 +43,7 @@

 namespace cql3 {

-std::experimental::optional<collection_mutation::view>
+std::experimental::optional<collection_mutation_view>
 update_parameters::get_prefetched_list(
    const partition_key& pkey,
    const clustering_key& row_key,
--- a/cql3/update_parameters.hh
+++ b/cql3/update_parameters.hh
@@ -86,7 +86,7 @@ public:
                return pk_eq(k1.first, k2.first) && ck_eq(k1.second, k2.second);
            }
        };
-        using row = std::unordered_map<column_id, collection_mutation::one>;
+        using row = std::unordered_map<column_id, collection_mutation>;
    public:
        std::unordered_map<key, row, key_hashing, key_equality> rows;
        schema_ptr schema;
@@ -183,7 +183,7 @@ public:
        return _timestamp;
    }

-    std::experimental::optional<collection_mutation::view> get_prefetched_list(
+    std::experimental::optional<collection_mutation_view> get_prefetched_list(
        const partition_key& pkey, const clustering_key& row_key, const column_definition& column) const;
 };

--- a/database.cc
+++ b/database.cc
@@ -34,6 +34,7 @@
 #include "cql3/column_identifier.hh"
 #include "core/seastar.hh"
 #include <seastar/core/sleep.hh>
+#include <seastar/core/rwlock.hh>
 #include <boost/algorithm/string/classification.hpp>
 #include <boost/algorithm/string/split.hpp>
 #include "sstables/sstables.hh"
@@ -53,20 +54,41 @@
 #include "mutation_query.hh"
 #include "sstable_mutation_readers.hh"
 #include <core/fstream.hh>
+#include <seastar/core/enum.hh>
 #include "utils/latency.hh"
+#include "utils/flush_queue.hh"

 using namespace std::chrono_literals;

 logging::logger dblog("database");

+// Slight extension to the flush_queue type.
+class column_family::memtable_flush_queue : public utils::flush_queue<db::replay_position> {
+public:
+    template<typename Func, typename Post>
+    auto run_cf_flush(db::replay_position rp, Func&& func, Post&& post) {
+        // special case: empty rp, yet still data.
+        // We generate a few memtables with no valid, "high_rp", yet
+        // still containing data -> actual flush.
+        // And to make matters worse, we can initiate a flush of N such
+        // tables at the same time.
+        // Just queue them at the end of the queue and treat them as such.
+        if (rp == db::replay_position() && !empty()) {
+            rp = highest_key();
+        }
+        return run_with_ordered_post_op(rp, std::forward<Func>(func), std::forward<Post>(post));
+    }
+};
+
 column_family::column_family(schema_ptr schema, config config, db::commitlog& cl, compaction_manager& compaction_manager)
    : _schema(std::move(schema))
    , _config(std::move(config))
    , _memtables(make_lw_shared(memtable_list{}))
    , _sstables(make_lw_shared<sstable_list>())
-    , _cache(_schema, sstables_as_mutation_source(), global_cache_tracker())
+    , _cache(_schema, sstables_as_mutation_source(), sstables_as_key_source(), global_cache_tracker())
    , _commitlog(&cl)
    , _compaction_manager(compaction_manager)
+    , _flush_queue(std::make_unique<memtable_flush_queue>())
 {
    add_memtable();
    if (!_config.enable_disk_writes) {
@@ -79,9 +101,10 @@ column_family::column_family(schema_ptr schema, config config, no_commitlog cl,
    , _config(std::move(config))
    , _memtables(make_lw_shared(memtable_list{}))
    , _sstables(make_lw_shared<sstable_list>())
-    , _cache(_schema, sstables_as_mutation_source(), global_cache_tracker())
+    , _cache(_schema, sstables_as_mutation_source(), sstables_as_key_source(), global_cache_tracker())
    , _commitlog(nullptr)
    , _compaction_manager(compaction_manager)
+    , _flush_queue(std::make_unique<memtable_flush_queue>())
 {
    add_memtable();
    if (!_config.enable_disk_writes) {
@@ -196,6 +219,24 @@ column_family::make_sstable_reader(const query::partition_range& pr) const {
    }
 }

+key_source column_family::sstables_as_key_source() const {
+    return [this] (const query::partition_range& range) {
+        std::vector<key_reader> readers;
+        readers.reserve(_sstables->size());
+        std::transform(_sstables->begin(), _sstables->end(), std::back_inserter(readers), [&] (auto&& entry) {
+            auto& sst = entry.second;
+            auto rd = sstables::make_key_reader(_schema, sst, range);
+            if (sst->is_shared()) {
+                rd = make_filtering_reader(std::move(rd), [] (const dht::decorated_key& dk) {
+                    return dht::shard_of(dk.token()) == engine().cpu_id();
+                });
+            }
+            return rd;
+        });
+        return make_combined_reader(_schema, std::move(readers));
+    };
+}
+
 // Exposed for testing, not performance critical.
 future<column_family::const_mutation_partition_ptr>
 column_family::find_partition(const dht::decorated_key& key) const {
@@ -259,8 +300,8 @@ column_family::make_reader(const query::partition_range& range) const {
    // range queries in cache, so that scans can always be satisfied form
    // memtable and cache only, as long as data is not evicted.
    //
-    // https://github.com/cloudius-systems/urchin/issues/309
-    // https://github.com/cloudius-systems/urchin/issues/185
+    // https://github.com/scylladb/scylla/issues/309
+    // https://github.com/scylladb/scylla/issues/185

    for (auto&& mt : *_memtables) {
        readers.emplace_back(mt->make_reader(range));
@@ -315,14 +356,17 @@ column_family::for_all_partitions_slow(std::function<bool (const dht::decorated_
 }

 class lister {
+public:
+    using dir_entry_types = std::unordered_set<directory_entry_type, enum_hash<directory_entry_type>>;
+private:
    file _f;
    std::function<future<> (directory_entry de)> _walker;
-    directory_entry_type _expected_type;
+    dir_entry_types _expected_type;
    subscription<directory_entry> _listing;
    sstring _dirname;

 public:
-    lister(file f, directory_entry_type type, std::function<future<> (directory_entry)> walker, sstring dirname)
+    lister(file f, dir_entry_types type, std::function<future<> (directory_entry)> walker, sstring dirname)
            : _f(std::move(f))
            , _walker(std::move(walker))
            , _expected_type(type)
@@ -330,13 +374,13 @@ public:
            , _dirname(dirname) {
    }

-    static future<> scan_dir(sstring name, directory_entry_type type, std::function<future<> (directory_entry)> walker);
+    static future<> scan_dir(sstring name, dir_entry_types type, std::function<future<> (directory_entry)> walker);
 protected:
    future<> _visit(directory_entry de) {

        return guarantee_type(std::move(de)).then([this] (directory_entry de) {
            // Hide all synthetic directories and hidden files.
-            if ((de.type != _expected_type) || (de.name[0] == '.')) {
+            if ((!_expected_type.count(*(de.type))) || (de.name[0] == '.')) {
                return make_ready_future<>();
            }
            return _walker(de);
@@ -359,8 +403,7 @@ private:
 };


-future<> lister::scan_dir(sstring name, directory_entry_type type, std::function<future<> (directory_entry)> walker) {
-
+future<> lister::scan_dir(sstring name, lister::dir_entry_types type, std::function<future<> (directory_entry)> walker) {
    return engine().open_directory(name).then([type, walker = std::move(walker), name] (file f) {
        auto l = make_lw_shared<lister>(std::move(f), type, walker, name);
        return l->done().then([l] { });
@@ -373,6 +416,23 @@ static std::vector<sstring> parse_fname(sstring filename) {
    return comps;
 }

+static bool belongs_to_current_shard(const schema& s, const partition_key& first, const partition_key& last) {
+    auto key_shard = [&s] (const partition_key& pk) {
+        auto token = dht::global_partitioner().get_token(s, pk);
+        return dht::shard_of(token);
+    };
+    auto s1 = key_shard(first);
+    auto s2 = key_shard(last);
+    auto me = engine().cpu_id();
+    return (s1 <= me) && (me <= s2);
+}
+
+static bool belongs_to_current_shard(const schema& s, range<partition_key> r) {
+    assert(r.start());
+    assert(r.end());
+    return belongs_to_current_shard(s, r.start()->value(), r.end()->value());
+}
+
 future<sstables::entry_descriptor> column_family::probe_file(sstring sstdir, sstring fname) {

    using namespace sstables;
@@ -386,16 +446,24 @@ future<sstables::entry_descriptor> column_family::probe_file(sstring sstdir, sst
        return make_ready_future<entry_descriptor>(std::move(comps));
    }

-    // Make sure new sstables don't overwrite this one.
-    _sstable_generation = std::max<uint64_t>(_sstable_generation, comps.generation /  smp::count + 1);
+    update_sstables_known_generation(comps.generation);
    assert(_sstables->count(comps.generation) == 0);

-    auto sst = std::make_unique<sstables::sstable>(_schema->ks_name(), _schema->cf_name(), sstdir, comps.generation, comps.version, comps.format);
-    auto fut = sst->load();
-    return std::move(fut).then([this, sst = std::move(sst)] () mutable {
-        add_sstable(std::move(*sst));
-        return make_ready_future<>();
-    }).then_wrapped([fname, comps = std::move(comps)] (future<> f) {
+    auto fut = sstable::get_sstable_key_range(*_schema, _schema->ks_name(), _schema->cf_name(), sstdir, comps.generation, comps.version, comps.format);
+    return std::move(fut).then([this, sstdir = std::move(sstdir), comps] (range<partition_key> r) {
+        // Checks whether or not sstable belongs to current shard.
+        if (!belongs_to_current_shard(*_schema, std::move(r))) {
+            sstable::mark_sstable_for_deletion(_schema->ks_name(), _schema->cf_name(), sstdir, comps.generation, comps.version, comps.format);
+            return make_ready_future<>();
+        }
+
+        auto sst = std::make_unique<sstables::sstable>(_schema->ks_name(), _schema->cf_name(), sstdir, comps.generation, comps.version, comps.format);
+        auto fut = sst->load();
+        return std::move(fut).then([this, sst = std::move(sst)] () mutable {
+            add_sstable(std::move(*sst));
+            return make_ready_future<>();
+        });
+    }).then_wrapped([fname, comps] (future<> f) {
        try {
            f.get();
        } catch (malformed_sstable_exception& e) {
@@ -420,19 +488,6 @@ void column_family::add_sstable(sstables::sstable&& sstable) {
 }

 void column_family::add_sstable(lw_shared_ptr<sstables::sstable> sstable) {
-    auto key_shard = [this] (const partition_key& pk) {
-        auto token = dht::global_partitioner().get_token(*_schema, pk);
-        return dht::shard_of(token);
-    };
-    auto s1 = key_shard(sstable->get_first_partition_key(*_schema));
-    auto s2 = key_shard(sstable->get_last_partition_key(*_schema));
-    auto me = engine().cpu_id();
-    auto included = (s1 <= me) && (me <= s2);
-    if (!included) {
-        dblog.info("sstable {} not relevant for this shard, ignoring", sstable->get_filename());
-        sstable->mark_for_deletion();
-        return;
-    }
    auto generation = sstable->generation();
    // allow in-progress reads to continue using old list
    _sstables = make_lw_shared<sstable_list>(*_sstables);
@@ -477,8 +532,17 @@ column_family::seal_active_memtable() {
    );
    _highest_flushed_rp = old->replay_position();

-    return seastar::with_gate(_in_flight_seals, [old, this] {
-        return flush_memtable_to_sstable(old);
+    return _flush_queue->run_cf_flush(old->replay_position(), [old, this] {
+        return repeat([this, old] {
+            return with_lock(_sstables_lock.for_read(), [this, old] {
+                _flush_queue->check_open_gate();
+                return try_flush_memtable_to_sstable(old);
+            });
+        });
+    }, [old, this] {
+        if (_commitlog) {
+            _commitlog->discard_completed_segments(_schema->id(), old->replay_position());
+        }
    });
    // FIXME: release commit log
    // FIXME: provide back-pressure to upper layers
@@ -495,6 +559,10 @@ column_family::try_flush_memtable_to_sstable(lw_shared_ptr<memtable> old) {
        sstables::sstable::version_types::ka,
        sstables::sstable::format_types::big);

+    auto memtable_size = old->occupancy().total_space();
+
+    _config.cf_stats->pending_memtables_flushes_count++;
+    _config.cf_stats->pending_memtables_flushes_bytes += memtable_size;
    newtab->set_unshared();
    dblog.debug("Flushing to {}", newtab->get_filename());
    return newtab->write_components(*old).then([this, newtab, old] {
@@ -518,38 +586,35 @@ column_family::try_flush_memtable_to_sstable(lw_shared_ptr<memtable> old) {
                return newtab->create_links(dir);
            });
        });
-    }).then([this, old, newtab] {
+    }).then_wrapped([this, old, newtab, memtable_size] (future<> ret) {
+        _config.cf_stats->pending_memtables_flushes_count--;
+        _config.cf_stats->pending_memtables_flushes_bytes -= memtable_size;
        dblog.debug("Flushing done");
-        // We must add sstable before we call update_cache(), because
-        // memtable's data after moving to cache can be evicted at any time.
-        auto old_sstables = _sstables;
-        add_sstable(newtab);
-        old->mark_flushed(newtab);
-        return update_cache(*old, std::move(old_sstables));
-    }).then_wrapped([this, old] (future<> ret) {
        try {
            ret.get();

-            // FIXME: until the surrounding function returns a future and
-            // caller ensures ordering (i.e. finish flushing one or more sequential tables before
-            // doing the discard), this below is _not_ correct, since the use of replay_position
-            // depends on us reporting the factual highest position we've actually flushed,
-            // _and_ all positions (for a given UUID) below having been dealt with.
-            //
-            // Note that the whole scheme is also dependent on memtables being "allocated" in order,
-            // i.e. we may not flush a younger memtable before and older, and we need to use the
-            // highest rp.
-            if (_commitlog) {
-                _commitlog->discard_completed_segments(_schema->id(), old->replay_position());
-            }
-            _memtables->erase(boost::range::find(*_memtables, old));
-            dblog.debug("Memtable replaced");
+            // We must add sstable before we call update_cache(), because
+            // memtable's data after moving to cache can be evicted at any time.
+            auto old_sstables = _sstables;
+            add_sstable(newtab);
+            old->mark_flushed(newtab);
+
            trigger_compaction();
-            return make_ready_future<stop_iteration>(stop_iteration::yes);
-        } catch (std::exception& e) {
-            dblog.error("failed to write sstable: {}", e.what());
+
+            return update_cache(*old, std::move(old_sstables)).then_wrapped([this, old] (future<> f) {
+                try {
+                    f.get();
+                } catch(...) {
+                    dblog.error("failed to move memtable to cache: {}", std::current_exception());
+                }
+
+                _memtables->erase(boost::range::find(*_memtables, old));
+                dblog.debug("Memtable replaced");
+
+                return make_ready_future<stop_iteration>(stop_iteration::yes);
+            });
        } catch (...) {
-            dblog.error("failed to write sstable: unknown error");
+            dblog.error("failed to write sstable: {}", std::current_exception());
        }
        return sleep(10s).then([] {
            return make_ready_future<stop_iteration>(stop_iteration::no);
@@ -557,15 +622,6 @@ column_family::try_flush_memtable_to_sstable(lw_shared_ptr<memtable> old) {
    });
 }

-future<>
-column_family::flush_memtable_to_sstable(lw_shared_ptr<memtable> memt) {
-    return repeat([this, memt] {
-        return seastar::with_gate(_in_flight_seals, [memt, this] {
-            return try_flush_memtable_to_sstable(memt);
-        });
-    });
-}
-
 void
 column_family::start() {
    // FIXME: add option to disable automatic compaction.
@@ -575,70 +631,140 @@ column_family::start() {
 future<>
 column_family::stop() {
    seal_active_memtable();
-
    return _compaction_manager.remove(this).then([this] {
-        return _in_flight_seals.close().then([this] {
-            return make_ready_future<>();
+        return _flush_queue->close();
+    });
+}
+
+
+future<std::vector<sstables::entry_descriptor>>
+column_family::reshuffle_sstables(int64_t start) {
+    struct work {
+        int64_t current_gen;
+        sstable_list sstables;
+        std::unordered_map<int64_t, sstables::entry_descriptor> descriptors;
+        std::vector<sstables::entry_descriptor> reshuffled;
+        work(int64_t start) : current_gen(start ? start : 1) {}
+    };
+
+    return do_with(work(start), [this] (work& work) {
+        return lister::scan_dir(_config.datadir, { directory_entry_type::regular }, [this, &work] (directory_entry de) {
+            auto comps = sstables::entry_descriptor::make_descriptor(de.name);
+            if (comps.component != sstables::sstable::component_type::TOC) {
+                return make_ready_future<>();
+            } else if (comps.generation < work.current_gen) {
+                return make_ready_future<>();
+            }
+            auto sst = make_lw_shared<sstables::sstable>(_schema->ks_name(), _schema->cf_name(),
+                                                         _config.datadir, comps.generation,
+                                                         comps.version, comps.format);
+            work.sstables.emplace(comps.generation, std::move(sst));
+            work.descriptors.emplace(comps.generation, std::move(comps));
+            // FIXME: This is the only place in which we actually issue disk activity aside from
+            // directory metadata operations.
+            //
+            // But without the TOC information, we don't know which files we should link.
+            // The alternative to that would be to change create link to try creating a
+            // link for all possible files and handling the failures gracefuly, but that's not
+            // exactly fast either.
+            //
+            // Those SSTables are not known by anyone in the system. So we don't have any kind of
+            // object describing them. There isn't too much of a choice.
+            return work.sstables[comps.generation]->read_toc();
+        }).then([&work] {
+            // Note: cannot be parallel because we will be shuffling things around at this stage. Can't race.
+            return do_for_each(work.sstables, [&work] (auto& pair) {
+                auto&& comps = std::move(work.descriptors.at(pair.first));
+                comps.generation = work.current_gen;
+                work.reshuffled.push_back(std::move(comps));
+
+                if (pair.first == work.current_gen) {
+                    ++work.current_gen;
+                    return make_ready_future<>();
+                }
+                return pair.second->set_generation(work.current_gen++);
+            });
+        }).then([&work] {
+            return make_ready_future<std::vector<sstables::entry_descriptor>>(std::move(work.reshuffled));
        });
    });
 }

 future<>
-column_family::compact_sstables(std::vector<sstables::shared_sstable> sstables) {
-    if (!sstables.size()) {
+column_family::compact_sstables(sstables::compaction_descriptor descriptor) {
+    if (!descriptor.sstables.size()) {
        // if there is nothing to compact, just return.
        return make_ready_future<>();
    }

-    auto sstables_to_compact = make_lw_shared<std::vector<sstables::shared_sstable>>(std::move(sstables));
+    return with_lock(_sstables_lock.for_read(), [this, descriptor = std::move(descriptor)] {
+        auto sstables_to_compact = make_lw_shared<std::vector<sstables::shared_sstable>>(std::move(descriptor.sstables));

-    auto new_tables = make_lw_shared<std::vector<
-            std::pair<unsigned, sstables::shared_sstable>>>();
-    auto create_sstable = [this, new_tables] {
-            // FIXME: this generation calculation should be in a function.
-            auto gen = _sstable_generation++ * smp::count + engine().cpu_id();
-            // FIXME: use "tmp" marker in names of incomplete sstable
-            auto sst = make_lw_shared<sstables::sstable>(_schema->ks_name(), _schema->cf_name(), _config.datadir, gen,
-                    sstables::sstable::version_types::ka,
-                    sstables::sstable::format_types::big);
-            sst->set_unshared();
-            new_tables->emplace_back(gen, sst);
-            return sst;
-    };
-    return sstables::compact_sstables(*sstables_to_compact, *this,
-            create_sstable).then([this, new_tables, sstables_to_compact] {
-        // Build a new list of _sstables: We remove from the existing list the
-        // tables we compacted (by now, there might be more sstables flushed
-        // later), and we add the new tables generated by the compaction.
-        // We create a new list rather than modifying it in-place, so that
-        // on-going reads can continue to use the old list.
-        auto current_sstables = _sstables;
-        _sstables = make_lw_shared<sstable_list>();
+        auto new_tables = make_lw_shared<std::vector<
+                std::pair<unsigned, sstables::shared_sstable>>>();
+        auto create_sstable = [this, new_tables] {
+                // FIXME: this generation calculation should be in a function.
+                auto gen = _sstable_generation++ * smp::count + engine().cpu_id();
+                // FIXME: use "tmp" marker in names of incomplete sstable
+                auto sst = make_lw_shared<sstables::sstable>(_schema->ks_name(), _schema->cf_name(), _config.datadir, gen,
+                        sstables::sstable::version_types::ka,
+                        sstables::sstable::format_types::big);
+                sst->set_unshared();
+                new_tables->emplace_back(gen, sst);
+                return sst;
+        };
+        return sstables::compact_sstables(*sstables_to_compact, *this,
+                create_sstable, descriptor.max_sstable_bytes, descriptor.level).then([this, new_tables, sstables_to_compact] {
+            // Build a new list of _sstables: We remove from the existing list the
+            // tables we compacted (by now, there might be more sstables flushed
+            // later), and we add the new tables generated by the compaction.
+            // We create a new list rather than modifying it in-place, so that
+            // on-going reads can continue to use the old list.
+            auto current_sstables = _sstables;
+            _sstables = make_lw_shared<sstable_list>();

-        // zeroing live_disk_space_used and live_sstable_count because the
-        // sstable list is re-created below.
-        _stats.live_disk_space_used = 0;
-        _stats.live_sstable_count = 0;
+            // zeroing live_disk_space_used and live_sstable_count because the
+            // sstable list is re-created below.
+            _stats.live_disk_space_used = 0;
+            _stats.live_sstable_count = 0;

-        std::unordered_set<sstables::shared_sstable> s(
-                sstables_to_compact->begin(), sstables_to_compact->end());
-        for (const auto& oldtab : *current_sstables) {
-            if (!s.count(oldtab.second)) {
-                update_stats_for_new_sstable(oldtab.second->data_size());
-                _sstables->emplace(oldtab.first, oldtab.second);
+            std::unordered_set<sstables::shared_sstable> s(
+                    sstables_to_compact->begin(), sstables_to_compact->end());
+            for (const auto& oldtab : *current_sstables) {
+                if (!s.count(oldtab.second)) {
+                    update_stats_for_new_sstable(oldtab.second->data_size());
+                    _sstables->emplace(oldtab.first, oldtab.second);
+                }
+
+                for (const auto& newtab : *new_tables) {
+                    // FIXME: rename the new sstable(s). Verify a rename doesn't cause
+                    // problems for the sstable object.
+                    update_stats_for_new_sstable(newtab.second->data_size());
+                    _sstables->emplace(newtab.first, newtab.second);
+                }
+
+                for (const auto& oldtab : *sstables_to_compact) {
+                    oldtab->mark_for_deletion();
+                }
            }
-        }
+        });
+    });
+}

-        for (const auto& newtab : *new_tables) {
-            // FIXME: rename the new sstable(s). Verify a rename doesn't cause
-            // problems for the sstable object.
-            update_stats_for_new_sstable(newtab.second->data_size());
-            _sstables->emplace(newtab.first, newtab.second);
-        }
-
-        for (const auto& oldtab : *sstables_to_compact) {
-            oldtab->mark_for_deletion();
-        }
+future<>
+column_family::load_new_sstables(std::vector<sstables::entry_descriptor> new_tables) {
+    return parallel_for_each(new_tables, [this] (auto comps) {
+        auto sst = make_lw_shared<sstables::sstable>(_schema->ks_name(), _schema->cf_name(), _config.datadir, comps.generation, comps.version, comps.format);
+        return sst->load().then([this, sst] {
+            return sst->mutate_sstable_level(0);
+        }).then([this, sst] {
+            auto first = sst->get_first_partition_key(*_schema);
+            auto last = sst->get_last_partition_key(*_schema);
+            if (belongs_to_current_shard(*_schema, first, last)) {
+                this->add_sstable(sst);
+            }
+            return make_ready_future<>();
+        });
    });
 }

@@ -653,7 +779,7 @@ column_family::compact_all_sstables() {
    }
    // FIXME: check if the lower bound min_compaction_threshold() from schema
    // should be taken into account before proceeding with compaction.
-    return compact_sstables(std::move(sstables));
+    return compact_sstables(sstables::compaction_descriptor(std::move(sstables)));
 }

 void column_family::start_compaction() {
@@ -728,9 +854,10 @@ future<> column_family::populate(sstring sstdir) {
    auto verifier = make_lw_shared<std::unordered_map<unsigned long, status>>();
    auto descriptor = make_lw_shared<sstable_descriptor>();

-    return lister::scan_dir(sstdir, directory_entry_type::regular, [this, sstdir, verifier, descriptor] (directory_entry de) {
+  return do_with(std::vector<future<>>(), [this, sstdir, verifier, descriptor] (std::vector<future<>>& futures) {
+    return lister::scan_dir(sstdir, { directory_entry_type::regular }, [this, sstdir, verifier, descriptor, &futures] (directory_entry de) {
        // FIXME: The secondary indexes are in this level, but with a directory type, (starting with ".")
-        return probe_file(sstdir, de.name).then([verifier, descriptor] (auto entry) {
+        auto f = probe_file(sstdir, de.name).then([verifier, descriptor] (auto entry) {
            if (verifier->count(entry.generation)) {
                if (verifier->at(entry.generation) == status::has_toc_file) {
                    if (entry.component == sstables::sstable::component_type::TOC) {
@@ -761,6 +888,23 @@ future<> column_family::populate(sstring sstdir) {
                descriptor->format = entry.format;
            }
        });
+
+        // push future returned by probe_file into an array of futures,
+        // so that the supplied callback will not block scan_dir() from
+        // reading the next entry in the directory.
+        futures.push_back(std::move(f));
+
+        return make_ready_future<>();
+    }).then([&futures] {
+        return when_all(futures.begin(), futures.end()).then([] (std::vector<future<>> ret) {
+            try {
+                for (auto& f : ret) {
+                    f.get();
+                }
+            } catch(...) {
+                throw;
+            }
+        });
    }).then([verifier, sstdir, descriptor, this] {
        return parallel_for_each(*verifier, [sstdir = std::move(sstdir), descriptor, this] (auto v) {
            if (v.second == status::has_temporary_toc_file) {
@@ -782,6 +926,7 @@ future<> column_family::populate(sstring sstdir) {
            return make_ready_future<>();
        });
    });
+  });
 }

 utils::UUID database::empty_version = utils::UUID_gen::get_name_UUID(bytes{});
@@ -815,6 +960,20 @@ database::setup_collectd() {
                , scollectd::make_typed(scollectd::data_type::GAUGE, [this] {
            return _dirty_memory_region_group.memory_used();
    })));
+
+    _collectd.push_back(
+        scollectd::add_polled_metric(scollectd::type_instance_id("memtables"
+                , scollectd::per_cpu_plugin_instance
+                , "queue_length", "pending_flushes")
+                , scollectd::make_typed(scollectd::data_type::GAUGE, _cf_stats.pending_memtables_flushes_count)
+    ));
+
+    _collectd.push_back(
+        scollectd::add_polled_metric(scollectd::type_instance_id("memtables"
+                , scollectd::per_cpu_plugin_instance
+                , "bytes", "pending_flushes")
+                , scollectd::make_typed(scollectd::data_type::GAUGE, _cf_stats.pending_memtables_flushes_bytes)
+    ));
 }

 database::~database() {
@@ -835,7 +994,7 @@ future<> database::populate_keyspace(sstring datadir, sstring ks_name) {
        dblog.warn("Skipping undefined keyspace: {}", ks_name);
    } else {
        dblog.info("Populating Keyspace {}", ks_name);
-        return lister::scan_dir(ksdir, directory_entry_type::directory, [this, ksdir, ks_name] (directory_entry de) {
+        return lister::scan_dir(ksdir, { directory_entry_type::directory }, [this, ksdir, ks_name] (directory_entry de) {
            auto comps = parse_fname(de.name);
            if (comps.size() < 2) {
                dblog.error("Keyspace {}: Skipping malformed CF {} ", ksdir, de.name);
@@ -860,7 +1019,7 @@ future<> database::populate_keyspace(sstring datadir, sstring ks_name) {
 }

 future<> database::populate(sstring datadir) {
-    return lister::scan_dir(datadir, directory_entry_type::directory, [this, datadir] (directory_entry de) {
+    return lister::scan_dir(datadir, { directory_entry_type::directory }, [this, datadir] (directory_entry de) {
        auto& ks_name = de.name;
        if (ks_name == "system") {
            return make_ready_future<>();
@@ -873,7 +1032,7 @@ template <typename Func>
 static future<>
 do_parse_system_tables(distributed<service::storage_proxy>& proxy, const sstring& _cf_name, Func&& func) {
    using namespace db::schema_tables;
-    static_assert(std::is_same<future<>, std::result_of_t<Func(schema_result::value_type&)>>::value,
+    static_assert(std::is_same<future<>, std::result_of_t<Func(schema_result_value_type&)>>::value,
                  "bad Func signature");


@@ -908,11 +1067,11 @@ do_parse_system_tables(distributed<service::storage_proxy>& proxy, const sstring

 future<> database::parse_system_tables(distributed<service::storage_proxy>& proxy) {
    using namespace db::schema_tables;
-    return do_parse_system_tables(proxy, db::schema_tables::KEYSPACES, [this] (schema_result::value_type &v) {
+    return do_parse_system_tables(proxy, db::schema_tables::KEYSPACES, [this] (schema_result_value_type &v) {
        auto ksm = create_keyspace_from_schema_partition(v);
        return create_keyspace(ksm);
    }).then([&proxy, this] {
-        return do_parse_system_tables(proxy, db::schema_tables::COLUMNFAMILIES, [this, &proxy] (schema_result::value_type &v) {
+        return do_parse_system_tables(proxy, db::schema_tables::COLUMNFAMILIES, [this, &proxy] (schema_result_value_type &v) {
            return create_tables_from_tables_partition(proxy, v.second).then([this] (std::map<sstring, schema_ptr> tables) {
                for (auto& t: tables) {
                    auto s = t.second;
@@ -1149,6 +1308,7 @@ keyspace::make_column_family_config(const schema& s) const {
    cfg.enable_cache = _config.enable_cache;
    cfg.max_memtable_size = _config.max_memtable_size;
    cfg.dirty_memory_region_group = _config.dirty_memory_region_group;
+    cfg.cf_stats = _config.cf_stats;
    cfg.enable_incremental_backups = _config.enable_incremental_backups;

    return cfg;
@@ -1163,7 +1323,7 @@ keyspace::column_family_directory(const sstring& name, utils::UUID uuid) const {

 future<>
 keyspace::make_directory_for_column_family(const sstring& name, utils::UUID uuid) {
-    return make_directory(column_family_directory(name, uuid));
+    return touch_directory(column_family_directory(name, uuid));
 }

 no_such_keyspace::no_such_keyspace(const sstring& ks_name)
@@ -1321,7 +1481,7 @@ column_family::query(const query::read_command& cmd, const std::vector<query::pa
            return do_until([&qs] { return !qs.limit || qs.range_empty; }, [this, &qs] {
                return qs.reader().then([this, &qs](mutation_opt mo) {
                    if (mo) {
-                        auto p_builder = qs.builder.add_partition(mo->key());
+                        auto p_builder = qs.builder.add_partition(*mo->schema(), mo->key());
                        auto is_distinct = qs.cmd.slice.options.contains(query::partition_slice::option::distinct);
                        auto limit = !is_distinct ? qs.limit : 1;
                        mo->partition().query(p_builder, *_schema, qs.cmd.timestamp, limit);
@@ -1337,6 +1497,9 @@ column_family::query(const query::read_command& cmd, const std::vector<query::pa
        });
    }).finally([lc, this]() mutable {
        _stats.reads.mark(lc);
+        if (lc.is_start()) {
+            _stats.estimated_read.add(lc.latency(), _stats.reads.count);
+        }
    });
 }

@@ -1349,28 +1512,51 @@ column_family::as_mutation_source() const {

 future<lw_shared_ptr<query::result>>
 database::query(const query::read_command& cmd, const std::vector<query::partition_range>& ranges) {
-    static auto make_empty = [] {
-        return make_ready_future<lw_shared_ptr<query::result>>(make_lw_shared(query::result()));
-    };
-
-    try {
-        column_family& cf = find_column_family(cmd.cf_id);
-        return cf.query(cmd, ranges);
-    } catch (const no_such_column_family&) {
-        // FIXME: load from sstables
-        return make_empty();
-    }
+    column_family& cf = find_column_family(cmd.cf_id);
+    return cf.query(cmd, ranges);
 }

 future<reconcilable_result>
 database::query_mutations(const query::read_command& cmd, const query::partition_range& range) {
+    column_family& cf = find_column_family(cmd.cf_id);
+    return mutation_query(cf.as_mutation_source(), range, cmd.slice, cmd.row_limit, cmd.timestamp);
+}
+
+std::unordered_set<sstring> database::get_initial_tokens() {
+    std::unordered_set<sstring> tokens;
+    sstring tokens_string = get_config().initial_token();
    try {
-        column_family& cf = find_column_family(cmd.cf_id);
-        return mutation_query(cf.as_mutation_source(), range, cmd.slice, cmd.row_limit, cmd.timestamp);
-    } catch (const no_such_column_family&) {
-        // FIXME: load from sstables
-        return make_ready_future<reconcilable_result>(reconcilable_result());
+        boost::split(tokens, tokens_string, boost::is_any_of(sstring(",")));
+    } catch (...) {
+        throw std::runtime_error(sprint("Unable to parse initial_token=%s", tokens_string));
    }
+    tokens.erase("");
+    return tokens;
+}
+
+std::experimental::optional<gms::inet_address> database::get_replace_address() {
+    auto& cfg = get_config();
+    sstring replace_address = cfg.replace_address();
+    sstring replace_address_first_boot = cfg.replace_address_first_boot();
+    try {
+        if (!replace_address.empty()) {
+            return gms::inet_address(replace_address);
+        } else if (!replace_address_first_boot.empty()) {
+            return gms::inet_address(replace_address_first_boot);
+        }
+        return std::experimental::nullopt;
+    } catch (...) {
+        return std::experimental::nullopt;
+    }
+}
+
+bool database::is_replacing() {
+    sstring replace_address_first_boot = get_config().replace_address_first_boot();
+    if (!replace_address_first_boot.empty() && db::system_keyspace::bootstrap_complete()) {
+        dblog.info("Replace address on first boot requested; this node is already bootstrapped");
+        return false;
+    }
+    return bool(get_replace_address());
 }

 std::ostream& operator<<(std::ostream& out, const atomic_cell_or_collection& c) {
@@ -1402,8 +1588,7 @@ future<> database::apply_in_memory(const frozen_mutation& m, const db::replay_po
        auto& cf = find_column_family(m.column_family_id());
        cf.apply(m, rp);
    } catch (no_such_column_family&) {
-        // TODO: log a warning
-        // FIXME: load keyspace meta-data from storage
+        dblog.error("Attempting to mutate non-existent table {}", m.column_family_id());
    }
    return make_ready_future<>();
 }
@@ -1491,6 +1676,7 @@ database::make_keyspace_config(const keyspace_metadata& ksm) {
        cfg.max_memtable_size = std::numeric_limits<size_t>::max();
    }
    cfg.dirty_memory_region_group = &_dirty_memory_region_group;
+    cfg.cf_stats = &_cf_stats;
    cfg.enable_incremental_backups = _cfg->incremental_backups();
    return cfg;
 }
@@ -1547,9 +1733,7 @@ database::stop() {
    return _compaction_manager.stop().then([this] {
        // try to ensure that CL has done disk flushing
        if (_commitlog != nullptr) {
-            return _commitlog->shutdown().then([this] {
-                return _commitlog->sync_all_segments();
-            });
+            return _commitlog->shutdown();
        }
        return make_ready_future<>();
    }).then([this] {
@@ -1700,7 +1884,18 @@ future<> column_family::snapshot(sstring name) {
            return parallel_for_each(tables, [name](sstables::shared_sstable sstable) {
                auto dir = sstable->get_dir() + "/snapshots/" + name;
                return recursive_touch_directory(dir).then([sstable, dir] {
-                    return sstable->create_links(dir);
+                    return sstable->create_links(dir).then_wrapped([] (future<> f) {
+                        // If the SSTables are shared, one of the CPUs will fail here.
+                        // That is completely fine, though. We only need one link.
+                        try {
+                            f.get();
+                        } catch (std::system_error& e) {
+                            if (e.code() != std::error_code(EEXIST, std::system_category())) {
+                                throw;
+                            }
+                        }
+                        return make_ready_future<>();
+                    });
                });
            }).then([jsondir, &tables] {
                // This is not just an optimization. If we have no files, jsondir may not have been created,
@@ -1710,7 +1905,7 @@ future<> column_family::snapshot(sstring name) {
                } else {
                    return make_ready_future<>();
                }
-            }).then([this, &tables, jsondir] {
+            }).finally([this, &tables, jsondir] {
                auto shard = std::hash<sstring>()(jsondir) % smp::count;
                std::unordered_set<sstring> table_names;
                for (auto& sst : tables) {
@@ -1749,6 +1944,129 @@ future<> column_family::snapshot(sstring name) {
    });
 }

+future<bool> column_family::snapshot_exists(sstring tag) {
+    sstring jsondir = _config.datadir + "/snapshots/";
+    return engine().open_directory(std::move(jsondir)).then_wrapped([] (future<file> f) {
+        try {
+            f.get0();
+            return make_ready_future<bool>(true);
+        } catch (std::system_error& e) {
+            if (e.code() != std::error_code(ENOENT, std::system_category())) {
+                throw;
+            }
+            return make_ready_future<bool>(false);
+        }
+    });
+}
+
+enum class missing { no, yes };
+static missing
+file_missing(future<> f) {
+    try {
+        f.get();
+        return missing::no;
+    } catch (std::system_error& e) {
+        if (e.code() != std::error_code(ENOENT, std::system_category())) {
+            throw;
+        }
+        return missing::yes;
+    }
+}
+
+future<> column_family::clear_snapshot(sstring tag) {
+    sstring jsondir = _config.datadir + "/snapshots/";
+    sstring parent = _config.datadir;
+    if (!tag.empty()) {
+        jsondir += tag;
+        parent += "/snapshots/";
+    }
+
+    lister::dir_entry_types dir_and_files = { directory_entry_type::regular, directory_entry_type::directory };
+    return lister::scan_dir(jsondir, dir_and_files, [this, curr_dir = jsondir, dir_and_files, tag] (directory_entry de) {
+        // FIXME: We really need a better directory walker. This should eventually be part of the seastar infrastructure.
+        // It's hard to write this in a fully recursive manner because we need to keep information about the parent directory,
+        // so we can remove the file. For now, we'll take advantage of the fact that we will at most visit 2 levels and keep
+        // it ugly but simple.
+        auto recurse = make_ready_future<>();
+        if (de.type == directory_entry_type::directory) {
+            // Should only recurse when tag is empty, meaning delete all snapshots
+            if (!tag.empty()) {
+                throw std::runtime_error(sprint("Unexpected directory %s found at %s! Aborting", de.name, curr_dir));
+            }
+            auto newdir = curr_dir + "/" + de.name;
+            recurse = lister::scan_dir(newdir, dir_and_files, [this, curr_dir = newdir] (directory_entry de) {
+                return remove_file(curr_dir + "/" + de.name);
+            });
+        }
+        return recurse.then([fname = curr_dir + "/" + de.name] {
+            return remove_file(fname);
+        });
+    }).then_wrapped([jsondir] (future<> f) {
+        // Fine if directory does not exist. If it did, we delete it
+        if (file_missing(std::move(f)) == missing::no) {
+            return remove_file(jsondir);
+        }
+        return make_ready_future<>();
+    }).then([parent] {
+        return sync_directory(parent).then_wrapped([] (future<> f) {
+            // Should always exist for empty tags, but may not exist for a single tag if we never took
+            // snapshots. We will check this here just to mask out the exception, without silencing
+            // unexpected ones.
+            file_missing(std::move(f));
+            return make_ready_future<>();
+        });
+    });
+}
+
+future<std::unordered_map<sstring, column_family::snapshot_details>> column_family::get_snapshot_details() {
+    std::unordered_map<sstring, snapshot_details> all_snapshots;
+    return do_with(std::move(all_snapshots), [this] (auto& all_snapshots) {
+        return engine().file_exists(_config.datadir + "/snapshots").then([this, &all_snapshots](bool file_exists) {
+            if (!file_exists) {
+                return make_ready_future<>();
+            }
+            return lister::scan_dir(_config.datadir + "/snapshots",  { directory_entry_type::directory }, [this, &all_snapshots] (directory_entry de) {
+            auto snapshot_name = de.name;
+            auto snapshot = _config.datadir + "/snapshots/" + snapshot_name;
+            all_snapshots.emplace(snapshot_name, snapshot_details());
+            return lister::scan_dir(snapshot,  { directory_entry_type::regular }, [this, &all_snapshots, snapshot, snapshot_name] (directory_entry de) {
+                return file_size(snapshot + "/" + de.name).then([this, &all_snapshots, snapshot_name, name = de.name] (auto size) {
+                    // The manifest is the only file expected to be in this directory not belonging to the SSTable.
+                    // For it, we account the total size, but zero it for the true size calculation.
+                    //
+                    // All the others should just generate an exception: there is something wrong, so don't blindly
+                    // add it to the size.
+                    if (name != "manifest.json") {
+                        sstables::entry_descriptor::make_descriptor(name);
+                        all_snapshots.at(snapshot_name).total += size;
+                    } else {
+                        size = 0;
+                    }
+                    return make_ready_future<uint64_t>(size);
+                }).then([this, &all_snapshots, snapshot_name, name = de.name] (auto size) {
+                    // FIXME: When we support multiple data directories, the file may not necessarily
+                    // live in this same location. May have to test others as well.
+                    return file_size(_config.datadir + "/" + name).then_wrapped([&all_snapshots, snapshot_name, size] (auto fut) {
+                        try {
+                            // File exists in the main SSTable directory. Snapshots are not contributing to size
+                            fut.get0();
+                        } catch (std::system_error& e) {
+                            if (e.code() != std::error_code(ENOENT, std::system_category())) {
+                                throw;
+                            }
+                            all_snapshots.at(snapshot_name).live += size;
+                        }
+                        return make_ready_future<>();
+                    });
+                });
+            });
+        });
+        }).then([&all_snapshots] {
+            return std::move(all_snapshots);
+        });
+    });
+}
+
 future<> column_family::flush() {
    // FIXME: this will synchronously wait for this write to finish, but doesn't guarantee
    // anything about previous writes.
@@ -1789,26 +2107,27 @@ void column_family::clear() {
 future<db::replay_position> column_family::discard_sstables(db_clock::time_point truncated_at) {
    assert(_stats.pending_compactions == 0);

-    db::replay_position rp;
-    auto gc_trunc = to_gc_clock(truncated_at);
+    return with_lock(_sstables_lock.for_read(), [this, truncated_at] {
+        db::replay_position rp;
+        auto gc_trunc = to_gc_clock(truncated_at);

-    auto pruned = make_lw_shared<sstable_list>();
+        auto pruned = make_lw_shared<sstable_list>();

-    for (auto&p : *_sstables) {
-        if (p.second->max_data_age() <= gc_trunc) {
-            rp = std::max(p.second->get_stats_metadata().position, rp);
-            p.second->mark_for_deletion();
-            continue;
+        for (auto&p : *_sstables) {
+            if (p.second->max_data_age() <= gc_trunc) {
+                rp = std::max(p.second->get_stats_metadata().position, rp);
+                p.second->mark_for_deletion();
+                continue;
+            }
+            pruned->emplace(p.first, p.second);
        }
-        pruned->emplace(p.first, p.second);
-    }

-    _sstables = std::move(pruned);
+        _sstables = std::move(pruned);

-    dblog.debug("cleaning out row cache");
-    _cache.clear();
-
-    return make_ready_future<db::replay_position>(rp);
+        dblog.debug("cleaning out row cache");
+        _cache.clear();
+        return make_ready_future<db::replay_position>(rp);
+    });
 }


--- a/database.hh
+++ b/database.hh
@@ -32,6 +32,7 @@
 #include "utils/hash.hh"
 #include "db_clock.hh"
 #include "gc_clock.hh"
+#include <chrono>
 #include "core/distributed.hh"
 #include <functional>
 #include <cstdint>
@@ -67,6 +68,9 @@
 #include "utils/exponential_backoff_retry.hh"
 #include "utils/histogram.hh"
 #include "sstables/estimated_histogram.hh"
+#include "sstables/compaction.hh"
+#include "key_reader.hh"
+#include <seastar/core/rwlock.hh>

 class frozen_mutation;
 class reconcilable_result;
@@ -98,6 +102,16 @@ class replay_position_reordered_exception : public std::exception {};
 using memtable_list = std::vector<lw_shared_ptr<memtable>>;
 using sstable_list = sstables::sstable_list;

+// The CF has a "stats" structure. But we don't want all fields here,
+// since some of them are fairly complex for exporting to collectd. Also,
+// that structure matches what we export via the API, so better leave it
+// untouched. And we need more fields. We will summarize it in here what
+// we need.
+struct cf_stats {
+    int64_t pending_memtables_flushes_count = 0;
+    int64_t pending_memtables_flushes_bytes = 0;
+};
+
 class column_family {
 public:
    struct config {
@@ -109,6 +123,7 @@ public:
        bool enable_incremental_backups = false;
        size_t max_memtable_size = 5'000'000;
        logalloc::region_group* dirty_memory_region_group = nullptr;
+        ::cf_stats* cf_stats = nullptr;
    };
    struct no_commitlog {};
    struct stats {
@@ -125,8 +140,15 @@ public:
        utils::ihistogram writes{256};
        sstables::estimated_histogram estimated_read;
        sstables::estimated_histogram estimated_write;
+        sstables::estimated_histogram estimated_sstable_per_read;
+        utils::ihistogram tombstone_scanned;
+        utils::ihistogram live_scanned;
    };

+    struct snapshot_details {
+        int64_t total;
+        int64_t live;
+    };
 private:
    schema_ptr _schema;
    config _config;
@@ -134,8 +156,11 @@ private:
    lw_shared_ptr<memtable_list> _memtables;
    // generation -> sstable. Ordered by key so we can easily get the most recent.
    lw_shared_ptr<sstable_list> _sstables;
+    // There are situations in which we need to stop writing sstables. Flushers will take
+    // the read lock, and the ones that wish to stop that process will take the write lock.
+    rwlock _sstables_lock;
    mutable row_cache _cache; // Cache covers only sstables.
-    unsigned _sstable_generation = 1;
+    int64_t _sstable_generation = 1;
    unsigned _mutation_count = 0;
    db::replay_position _highest_flushed_rp;
    // Provided by the database that owns this commitlog
@@ -145,15 +170,21 @@ private:
    // Whether or not a cf is queued by its compaction manager.
    bool _compaction_manager_queued = false;
    int _compaction_disabled = 0;
+    class memtable_flush_queue;
+    std::unique_ptr<memtable_flush_queue> _flush_queue;
 private:
    void update_stats_for_new_sstable(uint64_t new_sstable_data_size);
    void add_sstable(sstables::sstable&& sstable);
    void add_sstable(lw_shared_ptr<sstables::sstable> sstable);
    void add_memtable();
-    future<> flush_memtable_to_sstable(lw_shared_ptr<memtable> memt);
    future<stop_iteration> try_flush_memtable_to_sstable(lw_shared_ptr<memtable> memt);
    future<> update_cache(memtable&, lw_shared_ptr<sstable_list> old_sstables);
    struct merge_comparator;
+
+    // update the sstable generation, making sure that new new sstables don't overwrite this one.
+    void update_sstables_known_generation(unsigned generation) {
+        _sstable_generation = std::max<uint64_t>(_sstable_generation, generation /  smp::count + 1);
+    }
 private:
    // Creates a mutation reader which covers sstables.
    // Caller needs to ensure that column_family remains live (FIXME: relax this).
@@ -161,7 +192,10 @@ private:
    mutation_reader make_sstable_reader(const query::partition_range& range) const;

    mutation_source sstables_as_mutation_source();
+    key_source sstables_as_key_source() const;
    partition_presence_checker make_partition_presence_checker(lw_shared_ptr<sstable_list> old_sstables);
+    // We will use highres because hopefully it won't take more than a few usecs
+    std::chrono::high_resolution_clock::time_point _sstable_writes_disabled_at;
 public:
    // Creates a mutation reader which covers all data sources for this column family.
    // Caller needs to ensure that column_family remains live (FIXME: relax this).
@@ -208,15 +242,52 @@ public:
    void clear(); // discards memtable(s) without flushing them to disk.
    future<db::replay_position> discard_sstables(db_clock::time_point);

+    // Important warning: disabling writes will only have an effect in the current shard.
+    // The other shards will keep writing tables at will. Therefore, you very likely need
+    // to call this separately in all shards first, to guarantee that none of them are writing
+    // new data before you can safely assume that the whole node is disabled.
+    future<int64_t> disable_sstable_write() {
+        _sstable_writes_disabled_at = std::chrono::high_resolution_clock::now();
+        return _sstables_lock.write_lock().then([this] {
+            return make_ready_future<int64_t>((*_sstables->end()).first);
+        });
+    }
+
+    // SSTable writes are now allowed again, and generation is updated to new_generation
+    // returns the amount of microseconds elapsed since we disabled writes.
+    std::chrono::high_resolution_clock::duration enable_sstable_write(int64_t new_generation) {
+        update_sstables_known_generation(new_generation);
+        _sstables_lock.write_unlock();
+        return std::chrono::high_resolution_clock::now() - _sstable_writes_disabled_at;
+    }
+
+    // Make sure the generation numbers are sequential, starting from "start".
+    // Generations before "start" are left untouched.
+    //
+    // Return the highest generation number seen so far
+    //
+    // Word of warning: although this function will reshuffle anything over "start", it is
+    // very dangerous to do that with live SSTables. This is meant to be used with SSTables
+    // that are not yet managed by the system.
+    //
+    // An example usage would query all shards asking what is the highest SSTable number known
+    // to them, and then pass that + 1 as "start".
+    future<std::vector<sstables::entry_descriptor>> reshuffle_sstables(int64_t start);
+
    // FIXME: this is just an example, should be changed to something more
    // general. compact_all_sstables() starts a compaction of all sstables.
    // It doesn't flush the current memtable first. It's just a ad-hoc method,
    // not a real compaction policy.
    future<> compact_all_sstables();
    // Compact all sstables provided in the vector.
-    future<> compact_sstables(std::vector<lw_shared_ptr<sstables::sstable>> sstables);
+    future<> compact_sstables(sstables::compaction_descriptor descriptor);

+    future<bool> snapshot_exists(sstring name);
+
+    future<> load_new_sstables(std::vector<sstables::entry_descriptor> new_tables);
    future<> snapshot(sstring name);
+    future<> clear_snapshot(sstring name);
+    future<std::unordered_map<sstring, snapshot_details>> get_snapshot_details();

    const bool incremental_backups_enabled() const {
        return _config.enable_incremental_backups;
@@ -385,6 +456,7 @@ public:
        bool enable_incremental_backups = false;
        size_t max_memtable_size = 5'000'000;
        logalloc::region_group* dirty_memory_region_group = nullptr;
+        ::cf_stats* cf_stats = nullptr;
    };
 private:
    std::unique_ptr<locator::abstract_replication_strategy> _replication_strategy;
@@ -443,6 +515,7 @@ public:
 //   use shard_of() for data

 class database {
+    ::cf_stats _cf_stats;
    logalloc::region_group _dirty_memory_region_group;
    std::unordered_map<sstring, keyspace> _keyspaces;
    std::unordered_map<utils::UUID, lw_shared_ptr<column_family>> _column_families;
@@ -573,6 +646,10 @@ public:
    const logalloc::region_group& dirty_memory_region_group() const {
        return _dirty_memory_region_group;
    }
+
+    std::unordered_set<sstring> get_initial_tokens();
+    std::experimental::optional<gms::inet_address> get_replace_address();
+    bool is_replacing();
 };

 // FIXME: stub
@@ -586,6 +663,9 @@ column_family::apply(const mutation& m, const db::replay_position& rp) {
    active_memtable().apply(m, rp);
    seal_on_overflow();
    _stats.writes.mark(lc);
+    if (lc.is_start()) {
+        _stats.estimated_write.add(lc.latency(), _stats.writes.count);
+    }
 }

 inline
@@ -617,6 +697,9 @@ column_family::apply(const frozen_mutation& m, const db::replay_position& rp) {
    active_memtable().apply(m, rp);
    seal_on_overflow();
    _stats.writes.mark(lc);
+    if (lc.is_start()) {
+        _stats.estimated_write.add(lc.latency(), _stats.writes.count);
+    }
 }

 future<> update_schema_version_and_announce(distributed<service::storage_proxy>& proxy);
--- a/db/batchlog_manager.cc
+++ b/db/batchlog_manager.cc
@@ -132,7 +132,7 @@ mutation db::batchlog_manager::get_batch_log_mutation_for(const std::vector<muta
    mutation m(key, schema);
    m.set_cell({}, to_bytes("version"), version, timestamp);
    m.set_cell({}, to_bytes("written_at"), now, timestamp);
-    m.set_cell({}, to_bytes("data"), std::move(data), timestamp);
+    m.set_cell({}, to_bytes("data"), data_value(std::move(data)), timestamp);

    return m;
 }
--- a/db/commitlog/commitlog.cc
+++ b/db/commitlog/commitlog.cc
@@ -55,6 +55,7 @@
 #include <core/rwlock.hh>
 #include <core/gate.hh>
 #include <core/fstream.hh>
+#include <seastar/core/memory.hh>
 #include <net/byteorder.hh>

 #include "commitlog.hh"
@@ -89,7 +90,7 @@ public:

 db::commitlog::config::config(const db::config& cfg)
    : commit_log_location(cfg.commitlog_directory())
-    , commitlog_total_space_in_mb(cfg.commitlog_total_space_in_mb())
+    , commitlog_total_space_in_mb(cfg.commitlog_total_space_in_mb() >= 0 ? cfg.commitlog_total_space_in_mb() : memory::stats().total_memory() >> 20)
    , commitlog_segment_size_in_mb(cfg.commitlog_segment_size_in_mb())
    , commitlog_sync_period_in_ms(cfg.commitlog_sync_batch_window_in_ms())
    , mode(cfg.commitlog_sync() == "batch" ? sync_mode::BATCH : sync_mode::PERIODIC)
@@ -177,6 +178,15 @@ public:

    stats totals;

+    void begin_op() {
+        _gate.enter();
+        ++totals.pending_operations;
+    }
+    void end_op() {
+        --totals.pending_operations;
+        _gate.leave();
+    }
+
    segment_manager(config c)
            : cfg(c), max_size(
                    std::min<size_t>(std::numeric_limits<position_type>::max(),
@@ -192,10 +202,14 @@ public:
        if (cfg.commit_log_location.empty()) {
            cfg.commit_log_location = "/var/lib/scylla/commitlog";
        }
-        logger.trace("Commitlog maximum disk size: {} MB / cpu ({} cpus)",
-                max_disk_size / (1024*1024), smp::count);
+        logger.trace("Commitlog {} maximum disk size: {} MB / cpu ({} cpus)",
+                cfg.commit_log_location, max_disk_size / (1024 * 1024),
+                smp::count);
        _regs = create_counters();
    }
+    ~segment_manager() {
+        logger.trace("Commitlog {} disposed", cfg.commit_log_location);
+    }

    uint64_t next_id() {
        return ++_ids;
@@ -218,7 +232,9 @@ public:
    void on_timer();
    void sync();
    void arm(uint32_t extra = 0) {
-        _timer.arm(std::chrono::milliseconds(cfg.commitlog_sync_period_in_ms + extra));
+        if (!_shutdown) {
+            _timer.arm(std::chrono::milliseconds(cfg.commitlog_sync_period_in_ms + extra));
+        }
    }

    std::vector<sstring> get_active_names() const;
@@ -300,7 +316,8 @@ public:
    // The commit log entry overhead in bytes (int: length + int: head checksum + int: tail checksum)
    static constexpr size_t entry_overhead_size = 3 * sizeof(uint32_t);
    static constexpr size_t segment_overhead_size = 2 * sizeof(uint32_t);
-    static constexpr size_t descriptor_header_size = 4 * sizeof(uint32_t);
+    static constexpr size_t descriptor_header_size = 5 * sizeof(uint32_t);
+    static constexpr uint32_t segment_magic = ('S'<<24) |('C'<< 16) | ('L' << 8) | 'C';

    // The commit log (chained) sync marker/header size in bytes (int: length + int: checksum [segmentId, position])
    static constexpr size_t sync_marker_size = 2 * sizeof(uint32_t);
@@ -371,6 +388,7 @@ public:
    }
    future<sseg_ptr> flush(uint64_t pos = 0) {
        auto me = shared_from_this();
+        assert(!me.owned());
        if (pos == 0) {
            pos = _file_pos;
        }
@@ -378,38 +396,35 @@ public:
            logger.trace("{} already synced! ({} < {})", *this, pos, _flush_pos);
            return make_ready_future<sseg_ptr>(std::move(me));
        }
-        logger.trace("Syncing {} -> {}", _flush_pos, pos);
+        logger.trace("Syncing {} {} -> {}", *this, _flush_pos, pos);
        // Make sure all disk writes are done.
        // This is not 100% neccesary, we really only need the ones below our flush pos,
        // but since we pretty much assume that task ordering will make this the case anyway...
+
        return _dwrite.write_lock().then(
-                [this, me = std::move(me), pos]() mutable {
+                [this, me, pos]() mutable {
                    _dwrite.write_unlock(); // release it already.
                    pos = std::max(pos, _file_pos);
                    if (pos <= _flush_pos) {
                        logger.trace("{} already synced! ({} < {})", *this, pos, _flush_pos);
                        return make_ready_future<sseg_ptr>(std::move(me));
                    }
-                    ++_segment_manager->totals.pending_operations;
-                    return _file.flush().handle_exception([](auto ex) {
+                    _segment_manager->begin_op();
+                    return _file.flush().then_wrapped([this, pos, me](auto f) {
                                try {
-                                    std::rethrow_exception(ex);
+                                    f.get();
                                    // TODO: retry/ignore/fail/stop - optional behaviour in origin.
                                    // we fast-fail the whole commit.
-                                } catch (std::exception& e) {
-                                    logger.error("Failed to flush commits to disk: {}", e.what());
-                                    throw;
+                                    _flush_pos = std::max(pos, _flush_pos);
+                                    ++_segment_manager->totals.flush_count;
+                                    logger.trace("{} synced to {}", *this, _flush_pos);
+                                    return make_ready_future<sseg_ptr>(std::move(me));
                                } catch (...) {
-                                    logger.error("Failed to flush commits to disk.");
+                                    logger.error("Failed to flush commits to disk: {}", std::current_exception());
                                    throw;
                                }
-                            }).then([this, pos, me = std::move(me)]() {
-                                _flush_pos = std::max(pos, _flush_pos);
-                                ++_segment_manager->totals.flush_count;
-                                logger.trace("{} synced to {}", *this, _flush_pos);
-                                return make_ready_future<sseg_ptr>(std::move(me));
-                            }).finally([this] {
-                                --_segment_manager->totals.pending_operations;
+                            }).finally([this, me] {
+                                _segment_manager->end_op();
                            });
                });
    }
@@ -455,6 +470,8 @@ public:
            _segment_manager->totals.total_size += k;
        }
        auto me = shared_from_this();
+        assert(!me.owned());
+
        if (size == 0) {
            return make_ready_future<sseg_ptr>(std::move(me));
        }
@@ -468,6 +485,7 @@ public:

        if (off == 0) {
            // first block. write file header.
+            out.write(segment_magic);
            out.write(_desc.ver);
            out.write(_desc.id);
            crc32_nbo crc;
@@ -489,9 +507,9 @@ public:

        // acquire read lock
        return _dwrite.read_lock().then([this, size, off, buf = std::move(buf), me]() mutable {
-            ++_segment_manager->totals.pending_operations;
            auto written = make_lw_shared<size_t>(0);
            auto p = buf.get();
+            _segment_manager->begin_op();
            return repeat([this, size, off, written, p]() mutable {
                return _file.dma_write(off + *written, p + *written, size - *written).then_wrapped([this, size, written](auto&& f) {
                    try {
@@ -517,11 +535,11 @@ public:
                });
            }).finally([this, buf = std::move(buf)]() mutable {
                _segment_manager->release_buffer(std::move(buf));
+                _segment_manager->end_op();
            });
        }).then([me] {
            return make_ready_future<sseg_ptr>(std::move(me));
        }).finally([me, this]() {
-            --_segment_manager->totals.pending_operations;
            _dwrite.read_unlock(); // release
        });
    }
@@ -540,18 +558,22 @@ public:
                                    + std::to_string(_segment_manager->max_mutation_size)));
        }
        // would we make the file too big?
-        if (position() + s > _segment_manager->max_size) {
-            // do this in next segment instead.
-            return finish_and_get_new().then(
-                    [id, size, func = std::move(func)](auto new_seg) {
-                        return new_seg->allocate(id, size, func);
-                    });
-        }
-        // enough data?
-        if (s > (_buffer.size() - _buf_pos)) {
-            // TODO: iff we have to many writes running, maybe we should
-            // wait for this?
-            cycle(s);
+        for (;;) {
+            if (position() + s > _segment_manager->max_size) {
+                // do this in next segment instead.
+                return finish_and_get_new().then(
+                        [id, size, func = std::move(func)](auto new_seg) {
+                            return new_seg->allocate(id, size, func);
+                        });
+            }
+            // enough data?
+            if (s > (_buffer.size() - _buf_pos)) {
+                // TODO: iff we have to many writes running, maybe we should
+                // wait for this?
+                cycle(s);
+                continue; // re-check file size overflow
+            }
+            break;
        }

        _gate.enter(); // this might throw. I guess we accept this?
@@ -629,13 +651,19 @@ public:
    bool is_still_allocating() const {
        return !_closed && position() < _segment_manager->max_size;
    }
-    bool is_clean() {
+    bool is_clean() const {
        return _cf_dirty.empty();
    }
-    bool is_unused() {
+    bool is_unused() const {
        return !is_still_allocating() && is_clean();
    }
-    bool contains(const replay_position& pos) {
+    bool is_flushed() const {
+        return position() <= _flush_pos;
+    }
+    bool can_delete() const {
+        return is_unused() && is_flushed();
+    }
+    bool contains(const replay_position& pos) const {
        return pos.id == _desc.id;
    }
    sstring get_segment_name() const {
@@ -820,8 +848,11 @@ void db::commitlog::segment_manager::flush_segments(bool force) {
 future<db::commitlog::segment_manager::sseg_ptr> db::commitlog::segment_manager::allocate_segment(bool active) {
    descriptor d(next_id());
    return engine().open_file_dma(cfg.commit_log_location + "/" + d.filename(), open_flags::wo | open_flags::create).then([this, d, active](file f) {
-        auto s = make_lw_shared<segment>(this, d, std::move(f), active);
-        return make_ready_future<sseg_ptr>(s);
+        // xfs doesn't like files extended betond eof, so enlarge the file
+        return f.truncate(max_size).then([this, d, active, f] () mutable {
+            auto s = make_lw_shared<segment>(this, d, std::move(f), active);
+            return make_ready_future<sseg_ptr>(s);
+        });
    });
 }

@@ -890,15 +921,19 @@ std::ostream& db::operator<<(std::ostream& out, const db::replay_position& p) {
 }

 void db::commitlog::segment_manager::discard_unused_segments() {
-    auto i = std::remove_if(_segments.begin(), _segments.end(), [=](auto& s) {
-        if (s->is_unused()) {
+    logger.trace("Checking for unused segments ({} active)", _segments.size());
+
+    auto i = std::remove_if(_segments.begin(), _segments.end(), [=](auto s) {
+        if (s->can_delete()) {
            logger.debug("Segment {} is unused", *s);
            return true;
        }
        if (s->is_still_allocating()) {
            logger.debug("Not safe to delete segment {}; still allocating.", s);
-        } else {
+        } else if (!s->is_clean()) {
            logger.debug("Not safe to delete segment {}; dirty is {}", s, segment::cf_mark {*s});
+        } else {
+            logger.debug("Not safe to delete segment {}; disk ops pending", s);
        }
        return false;
    });
@@ -918,12 +953,12 @@ future<> db::commitlog::segment_manager::sync_all_segments() {

 future<> db::commitlog::segment_manager::shutdown() {
    if (!_shutdown) {
-        _shutdown = true;
-        _timer.cancel();
-        return _gate.close().then([this] {
-            return parallel_for_each(_segments, [this](sseg_ptr s) {
-                return s->shutdown();
-            });
+        _shutdown = true; // no re-arm, no create new segments.
+        _timer.cancel(); // no more timer calls
+        return parallel_for_each(_segments, [this](sseg_ptr s) {
+            return s->shutdown(); // close each segment (no more alloc)
+        }).then(std::bind(&segment_manager::sync_all_segments, this)).then([this] { // flush all
+            return _gate.close(); // wait for any pending ops
        });
    }
    return make_ready_future<>();
@@ -936,11 +971,9 @@ future<> db::commitlog::segment_manager::shutdown() {
 * Only use from tests.
 */
 future<> db::commitlog::segment_manager::clear() {
-    logger.debug("Clearing all segments");
-    _shutdown = true;
-    _timer.cancel();
-    flush_segments(true);
-    return sync_all_segments().then([this] {
+    logger.debug("Clearing commitlog");
+    return shutdown().then([this] {
+        logger.debug("Clearing all segments");
        for (auto& s : _segments) {
            s->mark_clean();
        }
@@ -951,30 +984,30 @@ future<> db::commitlog::segment_manager::clear() {
 * Called by timer in periodic mode.
 */
 void db::commitlog::segment_manager::sync() {
-    for (auto& s : _segments) {
+    for (auto s : _segments) {
        s->sync(); // we do not care about waiting...
    }
 }

 void db::commitlog::segment_manager::on_timer() {
-    if (cfg.mode != sync_mode::BATCH) {
-        sync();
-    }
-    // IFF a new segment was put in use since last we checked, and we're
-    // above threshold, request flush.
-    if (_new_counter > 0) {
-        auto max = max_disk_size;
-        auto cur = totals.total_size_on_disk;
-        if (max != 0 && cur >= max) {
-            _new_counter = 0;
-            logger.debug("Size on disk {} MB exceeds local maximum {} MB", cur / (1024 * 1024), max / (1024 * 1024));
-            flush_segments();
-        }
-    }
    // Gate, because we are starting potentially blocking ops
    // without waiting for them, so segement_manager could be shut down
    // while they are running.
    seastar::with_gate(_gate, [this] {
+        if (cfg.mode != sync_mode::BATCH) {
+            sync();
+        }
+        // IFF a new segment was put in use since last we checked, and we're
+        // above threshold, request flush.
+        if (_new_counter > 0) {
+            auto max = max_disk_size;
+            auto cur = totals.total_size_on_disk;
+            if (max != 0 && cur >= max) {
+                _new_counter = 0;
+                logger.debug("Size on disk {} MB exceeds local maximum {} MB", cur / (1024 * 1024), max / (1024 * 1024));
+                flush_segments();
+            }
+        }
        // take outstanding allocations into regard. This is paranoid,
        // but if for some reason the file::open takes longer than timer period,
        // we could flood the reserve list with new segments
@@ -1064,7 +1097,7 @@ db::commitlog::commitlog(config cfg)
        : _segment_manager(new segment_manager(std::move(cfg))) {
 }

-db::commitlog::commitlog(commitlog&& v)
+db::commitlog::commitlog(commitlog&& v) noexcept
        : _segment_manager(std::move(v._segment_manager)) {
 }

@@ -1140,10 +1173,11 @@ const db::commitlog::config& db::commitlog::active_config() const {
    return _segment_manager->cfg;
 }

-future<subscription<temporary_buffer<char>, db::replay_position>>
+future<std::unique_ptr<subscription<temporary_buffer<char>, db::replay_position>>>
 db::commitlog::read_log_file(const sstring& filename, commit_load_reader_func next, position_type off) {
    return engine().open_file_dma(filename, open_flags::ro).then([next = std::move(next), off](file f) {
-       return read_log_file(std::move(f), std::move(next), off);
+       return std::make_unique<subscription<temporary_buffer<char>, replay_position>>(
+           read_log_file(std::move(f), std::move(next), off));
    });
 }

@@ -1159,6 +1193,8 @@ db::commitlog::read_log_file(file f, commit_load_reader_func next, position_type
        size_t next = 0;
        size_t start_off = 0;
        size_t skip_to = 0;
+        size_t file_size = 0;
+        size_t corrupt_size = 0;
        bool eof = false;
        bool header = true;

@@ -1190,6 +1226,10 @@ db::commitlog::read_log_file(file f, commit_load_reader_func next, position_type
                });
            });
        }
+        future<> stop() {
+            eof = true;
+            return make_ready_future<>();
+        }
        future<> read_header() {
            return fin.read_exactly(segment::descriptor_header_size).then([this](temporary_buffer<char> buf) {
                if (!advance(buf)) {
@@ -1198,10 +1238,20 @@ db::commitlog::read_log_file(file f, commit_load_reader_func next, position_type
                }
                // Will throw if we got eof
                data_input in(buf);
+                auto magic = in.read<uint32_t>();
                auto ver = in.read<uint32_t>();
                auto id = in.read<uint64_t>();
                auto checksum = in.read<uint32_t>();

+                if (magic == 0 && ver == 0 && id == 0 && checksum == 0) {
+                    // let's assume this was an empty (pre-allocated)
+                    // file. just skip it.
+                    return stop();
+                }
+
+                if (magic != segment::segment_magic) {
+                    throw std::invalid_argument("Not a scylla format commitlog file");
+                }
                crc32_nbo crc;
                crc.process(ver);
                crc.process<int32_t>(id & 0xffffffff);
@@ -1230,6 +1280,11 @@ db::commitlog::read_log_file(file f, commit_load_reader_func next, position_type
                auto next = in.read<uint32_t>();
                auto checksum = in.read<uint32_t>();

+                if (next == 0 && checksum == 0) {
+                    // in a pre-allocating world, this means eof
+                    return stop();
+                }
+
                crc32_nbo crc;
                crc.process<int32_t>(id & 0xffffffff);
                crc.process<int32_t>(id >> 32);
@@ -1237,7 +1292,11 @@ db::commitlog::read_log_file(file f, commit_load_reader_func next, position_type

                auto cs = crc.checksum();
                if (cs != checksum) {
-                    throw std::runtime_error("Checksum error in chunk header");
+                    // if a chunk header checksum is broken, we shall just assume that all
+                    // remaining is as well. We cannot trust the "next" pointer, so...
+                    logger.debug("Checksum error in segment chunk at {}.", pos);
+                    corrupt_size += (file_size - pos);
+                    return stop();
                }

                this->next = next;
@@ -1263,21 +1322,24 @@ db::commitlog::read_log_file(file f, commit_load_reader_func next, position_type
                auto size = in.read<uint32_t>();
                auto checksum = in.read<uint32_t>();

-                if (size == 0) {
-                    // special urchin case: zero padding due to dma blocks
-                    auto slack = next - pos;
-                    return skip(slack);
-                }
+                crc32_nbo crc;
+                crc.process(size);

-                if (size < 3 * sizeof(uint32_t)) {
-                    throw std::runtime_error("Invalid entry size");
+                if (size < 3 * sizeof(uint32_t) || checksum != crc.checksum()) {
+                    auto slack = next - pos;
+                    if (size != 0) {
+                        logger.debug("Segment entry at {} has broken header. Skipping to next chunk ({} bytes)", rp, slack);
+                        corrupt_size += slack;
+                    }
+                    // size == 0 -> special scylla case: zero padding due to dma blocks
+                    return skip(slack);
                }

                if (start_off > pos) {
                    return skip(size - entry_header_size);
                }

-                return fin.read_exactly(size - entry_header_size).then([this, size, checksum, rp](temporary_buffer<char> buf) {
+                return fin.read_exactly(size - entry_header_size).then([this, size, crc = std::move(crc), rp](temporary_buffer<char> buf) mutable {
                    advance(buf);

                    data_input in(buf);
@@ -1286,12 +1348,15 @@ db::commitlog::read_log_file(file f, commit_load_reader_func next, position_type
                    in.skip(data_size);
                    auto checksum = in.read<uint32_t>();

-                    crc32_nbo crc;
-                    crc.process(size);
                    crc.process_bytes(buf.get(), data_size);

                    if (crc.checksum() != checksum) {
-                        throw std::runtime_error("Checksum error in data entry");
+                        // If we're getting a checksum error here, most likely the rest of
+                        // the file will be corrupt as well. But it does not hurt to retry.
+                        // Just go to the next entry (since "size" in header seemed ok).
+                        logger.debug("Segment entry at {} checksum error. Skipping {} bytes", rp, size);
+                        corrupt_size += size;
+                        return make_ready_future<>();
                    }

                    return s.produce(buf.share(0, data_size), rp);
@@ -1299,10 +1364,18 @@ db::commitlog::read_log_file(file f, commit_load_reader_func next, position_type
            });
        }
        future<> read_file() {
-            return read_header().then(
-                    [this] {
-                        return do_until(std::bind(&work::end_of_file, this), std::bind(&work::read_chunk, this));
-                    });
+            return f.size().then([this](uint64_t size) {
+                file_size = size;
+            }).then([this] {
+                return read_header().then(
+                        [this] {
+                            return do_until(std::bind(&work::end_of_file, this), std::bind(&work::read_chunk, this));
+                }).then([this] {
+                  if (corrupt_size > 0) {
+                      throw segment_data_corruption_error("Data corruption", corrupt_size);
+                  }
+                });
+            });
        }
    };

@@ -1330,6 +1403,10 @@ uint64_t db::commitlog::get_completed_tasks() const {
    return _segment_manager->totals.allocation_count;
 }

+uint64_t db::commitlog::get_flush_count() const {
+    return _segment_manager->totals.flush_count;
+}
+
 uint64_t db::commitlog::get_pending_tasks() const {
    return _segment_manager->totals.pending_operations;
 }
--- a/db/commitlog/commitlog.hh
+++ b/db/commitlog/commitlog.hh
@@ -139,7 +139,7 @@ public:
        const uint32_t ver;
    };

-    commitlog(commitlog&&);
+    commitlog(commitlog&&) noexcept;
    ~commitlog();

    /**
@@ -231,6 +231,7 @@ public:

    uint64_t get_total_size() const;
    uint64_t get_completed_tasks() const;
+    uint64_t get_flush_count() const;
    uint64_t get_pending_tasks() const;
    uint64_t get_num_segments_created() const;
    uint64_t get_num_segments_destroyed() const;
@@ -265,8 +266,21 @@ public:

    typedef std::function<future<>(temporary_buffer<char>, replay_position)> commit_load_reader_func;

+    class segment_data_corruption_error: public std::runtime_error {
+    public:
+        segment_data_corruption_error(std::string msg, uint64_t s)
+                : std::runtime_error(msg), _bytes(s) {
+        }
+        uint64_t bytes() const {
+            return _bytes;
+        }
+    private:
+        uint64_t _bytes;
+    };
+
    static subscription<temporary_buffer<char>, replay_position> read_log_file(file, commit_load_reader_func, position_type = 0);
-    static future<subscription<temporary_buffer<char>, replay_position>> read_log_file(const sstring&, commit_load_reader_func, position_type = 0);
+    static future<std::unique_ptr<subscription<temporary_buffer<char>, replay_position>>> read_log_file(
+            const sstring&, commit_load_reader_func, position_type = 0);
 private:
    commitlog(config);
 };
--- a/db/commitlog/commitlog_replayer.cc
+++ b/db/commitlog/commitlog_replayer.cc
@@ -69,6 +69,7 @@ public:
        uint64_t invalid_mutations = 0;
        uint64_t skipped_mutations = 0;
        uint64_t applied_mutations = 0;
+        uint64_t corrupt_bytes = 0;
    };

    future<> process(stats*, temporary_buffer<char> buf, replay_position rp);
@@ -166,9 +167,16 @@ db::commitlog_replayer::impl::recover(sstring file) {
    return db::commitlog::read_log_file(file,
            std::bind(&impl::process, this, s.get(), std::placeholders::_1,
                    std::placeholders::_2), p).then([](auto s) {
-        auto f = s.done();
+        auto f = s->done();
        return f.finally([s = std::move(s)] {});
-    }).then([s] {
+    }).then_wrapped([s](future<> f) {
+        try {
+            f.get();
+        } catch (commitlog::segment_data_corruption_error& e) {
+            s->corrupt_bytes += e.bytes();
+        } catch (...) {
+            throw;
+        }
        return make_ready_future<stats>(*s);
    });
 }
@@ -233,7 +241,7 @@ db::commitlog_replayer::commitlog_replayer(seastar::sharded<cql3::query_processo
    : _impl(std::make_unique<impl>(qp))
 {}

-db::commitlog_replayer::commitlog_replayer(commitlog_replayer&& r)
+db::commitlog_replayer::commitlog_replayer(commitlog_replayer&& r) noexcept
    : _impl(std::move(r._impl))
 {}

@@ -250,24 +258,32 @@ future<db::commitlog_replayer> db::commitlog_replayer::create_replayer(seastar::
 }

 future<> db::commitlog_replayer::recover(std::vector<sstring> files) {
-    logger.info("Replaying {}", files);
-
    return parallel_for_each(files, [this](auto f) {
-        return this->recover(f).handle_exception([f](auto ep) {
-            logger.error("Error recovering {}: {}", f, ep);
-            std::rethrow_exception(ep);
-        });
+        return this->recover(f);
    });
 }

-future<> db::commitlog_replayer::recover(sstring file) {
-    return _impl->recover(file).then([file](impl::stats stats) {
+future<> db::commitlog_replayer::recover(sstring f) {
+    return _impl->recover(f).then([f](impl::stats stats) {
+        if (stats.corrupt_bytes != 0) {
+            logger.warn("Corrupted file: {}. {} bytes skipped.", f, stats.corrupt_bytes);
+        }
        logger.info("Log replay of {} complete, {} replayed mutations ({} invalid, {} skipped)"
-                , file
+                , f
                , stats.applied_mutations
                , stats.invalid_mutations
                , stats.skipped_mutations
                );
-    });
+    }).handle_exception([f](auto ep) {
+        logger.error("Error recovering {}: {}", f, ep);
+        try {
+            std::rethrow_exception(ep);
+        } catch (std::invalid_argument&) {
+            logger.error("Scylla cannot process {}. Make sure to fully flush all Cassandra commit log files to sstable before migrating.");
+            throw;
+        } catch (...) {
+            throw;
+        }
+    });;
 }

--- a/db/commitlog/commitlog_replayer.hh
+++ b/db/commitlog/commitlog_replayer.hh
@@ -57,7 +57,7 @@ class commitlog;

 class commitlog_replayer {
 public:
-    commitlog_replayer(commitlog_replayer&&);
+    commitlog_replayer(commitlog_replayer&&) noexcept;
    ~commitlog_replayer();

    static future<commitlog_replayer> create_replayer(seastar::sharded<cql3::query_processor>&);
--- a/db/config.cc
+++ b/db/config.cc
@@ -31,6 +31,7 @@
 #include "core/fstream.hh"
 #include "core/do_with.hh"
 #include "log.hh"
+#include <boost/any.hpp>

 static logging::logger logger("config");

--- a/db/config.hh
+++ b/db/config.hh
@@ -23,6 +23,7 @@
 #pragma once

 #include <boost/program_options.hpp>
+#include <boost/filesystem.hpp>
 #include <unordered_map>
 #include "core/sstring.hh"
 #include "core/future.hh"
@@ -112,6 +113,32 @@ public:
    future<> read_from_file(const sstring&);
    future<> read_from_file(file);

+    /**
+     * Scans the environment variables for configuration files directory
+     * definition. It's either $SCYLLA_CONF, $SCYLLA_HOME/conf or "conf" if none
+     * of SCYLLA_CONF and SCYLLA_HOME is defined.
+     *
+     * @return path of the directory where configuration files are located
+     *         according the environment variables definitions.
+     */
+    static boost::filesystem::path get_conf_dir() {
+        using namespace boost::filesystem;
+
+        path confdir;
+        auto* cd = std::getenv("SCYLLA_CONF");
+        if (cd != nullptr) {
+            confdir = path(cd);
+        } else {
+            auto* p = std::getenv("SCYLLA_HOME");
+            if (p != nullptr) {
+                confdir = path(p);
+            }
+            confdir /= "conf";
+        }
+
+        return confdir;
+    }
+
    typedef std::unordered_map<sstring, sstring> string_map;
    typedef std::vector<sstring> string_list;
    using seed_provider_type = db::seed_provider_type;
@@ -132,7 +159,7 @@ public:
     *  type:   is the value type (bool, uint32_t etc)
     *  status: is the current _usage_ of the opt. I.e. if you actually use the value, set it to "Used".
     *          Most values are set to "Unused", as in "will probably have an effect eventually".
-     *          Values set to "Invalid" have no meaning/usage in urchin, and should (and will currently)
+     *          Values set to "Invalid" have no meaning/usage in scylla, and should (and will currently)
     *          be signaled to a user providing a config with them, that these settings are pointless.
     *  desc:   documentation.
     *  value...: enumerated valid values if any. Not currently used, but why not...
@@ -263,7 +290,7 @@ public:
            "Related information: Configuring compaction"   \
    )                                                   \
    /* Common fault detection setting */    \
-    val(phi_convict_threshold, uint32_t, 8, Unused,     \
+    val(phi_convict_threshold, uint32_t, 8, Used,     \
            "Adjusts the sensitivity of the failure detector on an exponential scale. Generally this setting never needs adjusting.\n"  \
            "Related information: Failure detection and recovery"  \
    )                                                   \
@@ -289,7 +316,7 @@ public:
    val(commitlog_sync_batch_window_in_ms, uint32_t, 10000, Used,     \
            "Controls how long the system waits for other writes before performing a sync in \"batch\" mode."    \
    )   \
-    val(commitlog_total_space_in_mb, uint32_t, 8192, Used,     \
+    val(commitlog_total_space_in_mb, int64_t, -1, Used,     \
            "Total space used for commitlogs. If the used space goes above this value, Cassandra rounds up to the next nearest segment multiple and flushes memtables to disk for the oldest commitlog segments, removing those log segments. This reduces the amount of data to replay on startup, and prevents infrequently-updated tables from indefinitely keeping commitlog segments. A small total commitlog space tends to cause more flush activity on less-active tables.\n"  \
            "Related information: Configuring memtable throughput"  \
    )                                                   \
@@ -375,11 +402,11 @@ public:
    val(batch_size_warn_threshold_in_kb, uint32_t, 5, Unused,     \
            "Log WARN on any batch size exceeding this value in kilobytes. Caution should be taken on increasing the size of this threshold as it can lead to node instability."  \
    )   \
-    val(broadcast_address, sstring, /* listen_address */, Unused, \
+    val(broadcast_address, sstring, /* listen_address */, Used, \
            "The IP address a node tells other nodes in the cluster to contact it by. It allows public and private address to be different. For example, use the broadcast_address parameter in topologies where not all nodes have access to other nodes by their private IP addresses.\n" \
            "If your Cassandra cluster is deployed across multiple Amazon EC2 regions and you use the EC2MultiRegionSnitch , set the broadcast_address to public IP address of the node and the listen_address to the private IP."    \
    )   \
-    val(initial_token, sstring, /* N/A */, Unused,     \
+    val(initial_token, sstring, /* N/A */, Used,     \
            "Used in the single-node-per-token architecture, where a node owns exactly one contiguous range in the ring space. Setting this property overrides num_tokens.\n"   \
            "If you not using vnodes or have num_tokens set it to 1 or unspecified (#num_tokens), you should always specify this parameter when setting up a production cluster for the first time and when adding capacity. For more information, see this parameter in the Cassandra 1.1 Node and Cluster Configuration documentation.\n" \
            "This parameter can be used with num_tokens (vnodes ) in special cases such as Restoring from a snapshot." \
@@ -403,7 +430,7 @@ public:
            , "org.apache.cassandra.dht.ByteOrderedPartitioner" \
            , "org.apache.cassandra.dht.OrderPreservingPartitioner" \
    )                                                   \
-    val(storage_port, uint16_t, 7000, Unused,                \
+    val(storage_port, uint16_t, 7000, Used,                \
            "The port for inter-node communication."  \
    )                                                   \
    /* Advanced automatic backup setting */ \
@@ -533,7 +560,7 @@ public:
    )   \
    /* RPC (remote procedure call) settings */  \
    /* Settings for configuring and tuning client connections. */   \
-    val(broadcast_rpc_address, sstring, /* unset */, Unused,    \
+    val(broadcast_rpc_address, sstring, /* unset */, Used,    \
            "RPC address to broadcast to drivers and other Cassandra nodes. This cannot be set to 0.0.0.0. If blank, it is set to the value of the rpc_address or rpc_interface. If rpc_address or rpc_interfaceis set to 0.0.0.0, this property must be set.\n"    \
    )   \
    val(rpc_port, uint16_t, 9160, Used,                \
@@ -715,6 +742,14 @@ public:
    val(api_address, sstring, "", Used, "Http Rest API address") \
    val(api_ui_dir, sstring, "swagger-ui/dist/", Used, "The directory location of the API GUI") \
    val(api_doc_dir, sstring, "api/api-doc/", Used, "The API definition file directory") \
+    val(load_balance, sstring, "none", Used, "CQL request load balancing: 'none' or round-robin'") \
+    val(consistent_rangemovement, bool, true, Used, "When set to true, range movements will be consistent. It means: 1) it will refuse to bootstrapp a new node if other bootstrapping/leaving/moving nodes detected. 2) data will be streamed to a new node only from the node which is no longer responsible for the token range. Same as -Dcassandra.consistent.rangemovement in cassandra") \
+    val(join_ring, bool, true, Used, "When set to true, a node will join the token ring. When set to false, a node will not join the token ring. User can use nodetool join to initiate ring joinging later. Same as -Dcassandra.join_ring in cassandra.") \
+    val(load_ring_state, bool, true, Used, "When set to true, load tokens and host_ids previously saved. Same as -Dcassandra.load_ring_state in cassandra.") \
+    val(replace_node, sstring, "", Used, "The UUID of the node to replace. Same as -Dcassandra.replace_node in cssandra.") \
+    val(replace_token, sstring, "", Used, "The tokens of the node to replace. Same as -Dcassandra.replace_token in cassandra.") \
+    val(replace_address, sstring, "", Used, "The listen_address or broadcast_address of the dead node to replace. Same as -Dcassandra.replace_address.") \
+    val(replace_address_first_boot, sstring, "", Used, "Like replace_address option, but if the node has been bootstrapped sucessfully it will be ignored. Same as -Dcassandra.replace_address_first_boot.") \
    /* done! */

 #define _make_value_member(name, type, deflt, status, desc, ...)    \
--- a/db/query_context.hh
+++ b/db/query_context.hh
@@ -42,7 +42,7 @@ struct query_context {
    future<::shared_ptr<cql3::untyped_result_set>> execute_cql(sstring text, sstring cf, Args&&... args) {
        // FIXME: Would be better not to use sprint here.
        sstring req = sprint(text, cf);
-        return this->_qp.local().execute_internal(req, { boost::any(std::forward<Args>(args))... });
+        return this->_qp.local().execute_internal(req, { data_value(std::forward<Args>(args))... });
    }
    database& db() {
        return _db.local();
@@ -67,9 +67,8 @@ extern std::unique_ptr<query_context> qctx;
 // we executed the query, and return an empty result
 template <typename... Args>
 static future<::shared_ptr<cql3::untyped_result_set>> execute_cql(sstring text, Args&&... args) {
-    if (qctx) {
-        return qctx->execute_cql(text, std::forward<Args>(args)...);
-    }
-    return make_ready_future<shared_ptr<cql3::untyped_result_set>>(::make_shared<cql3::untyped_result_set>(cql3::untyped_result_set::make_empty()));
+    assert(qctx);
+    return qctx->execute_cql(text, std::forward<Args>(args)...);
 }
+
 }
--- a/db/schema_tables.cc
+++ b/db/schema_tables.cc
@@ -52,6 +52,7 @@
 #include "core/do_with.hh"
 #include "core/thread.hh"
 #include "json.hh"
+#include "log.hh"

 #include "db/marshal/type_parser.hh"
 #include "db/config.hh"
@@ -67,6 +68,8 @@ using namespace db::system_keyspace;
 namespace db {
 namespace schema_tables {

+logging::logger logger("schema_tables");
+
 std::vector<const char*> ALL { KEYSPACES, COLUMNFAMILIES, COLUMNS, TRIGGERS, USERTYPES, /* not present in 2.1.8: FUNCTIONS, AGGREGATES */ };

 using days = std::chrono::duration<int, std::ratio<24 * 3600>>;
@@ -326,7 +329,7 @@ future<utils::UUID> calculate_schema_digest(distributed<service::storage_proxy>&
            std::vector<query::result> results;
            for (auto&& p : rs->partitions()) {
                auto mut = p.mut().unfreeze(s);
-                auto partition_key = boost::any_cast<sstring>(utf8_type->deserialize(mut.key().get_component(*s, 0)));
+                auto partition_key = value_cast<sstring>(utf8_type->deserialize(mut.key().get_component(*s, 0)));
                if (partition_key == system_keyspace::NAME) {
                    continue;
                }
@@ -365,7 +368,7 @@ future<std::vector<frozen_mutation>> convert_schema_to_mutations(distributed<ser
            std::vector<frozen_mutation> results;
            for (auto&& p : rs->partitions()) {
                auto mut = p.mut().unfreeze(s);
-                auto partition_key = boost::any_cast<sstring>(utf8_type->deserialize(mut.key().get_component(*s, 0)));
+                auto partition_key = value_cast<sstring>(utf8_type->deserialize(mut.key().get_component(*s, 0)));
                if (partition_key == system_keyspace::NAME) {
                    continue;
                }
@@ -395,18 +398,18 @@ read_schema_for_keyspaces(distributed<service::storage_proxy>& proxy, const sstr
    return map_reduce(keyspace_names.begin(), keyspace_names.end(), map, schema_result{}, insert);
 }

-future<schema_result::value_type>
+future<schema_result_value_type>
 read_schema_partition_for_keyspace(distributed<service::storage_proxy>& proxy, const sstring& schema_table_name, const sstring& keyspace_name)
 {
    auto schema = proxy.local().get_db().local().find_schema(system_keyspace::NAME, schema_table_name);
    auto keyspace_key = dht::global_partitioner().decorate_key(*schema,
        partition_key::from_singular(*schema, keyspace_name));
    return db::system_keyspace::query(proxy, schema_table_name, keyspace_key).then([keyspace_name] (auto&& rs) {
-        return schema_result::value_type{keyspace_name, std::move(rs)};
+        return schema_result_value_type{keyspace_name, std::move(rs)};
    });
 }

-future<schema_result::value_type>
+future<schema_result_value_type>
 read_schema_partition_for_table(distributed<service::storage_proxy>& proxy, const sstring& schema_table_name, const sstring& keyspace_name, const sstring& table_name)
 {
    auto schema = proxy.local().get_db().local().find_schema(system_keyspace::NAME, schema_table_name);
@@ -414,7 +417,7 @@ read_schema_partition_for_table(distributed<service::storage_proxy>& proxy, cons
        partition_key::from_singular(*schema, keyspace_name));
    auto clustering_range = query::clustering_range(clustering_key_prefix::from_clustering_prefix(*schema, exploded_clustering_prefix({utf8_type->decompose(table_name)})));
    return db::system_keyspace::query(proxy, schema_table_name, keyspace_key, clustering_range).then([keyspace_name] (auto&& rs) {
-        return schema_result::value_type{keyspace_name, std::move(rs)};
+        return schema_result_value_type{keyspace_name, std::move(rs)};
    });
 }

@@ -465,7 +468,7 @@ future<> do_merge_schema(distributed<service::storage_proxy>& proxy, std::vector
       std::set<sstring> keyspaces;
       std::set<utils::UUID> column_families;
       for (auto&& mutation : mutations) {
-           keyspaces.emplace(boost::any_cast<sstring>(utf8_type->deserialize(mutation.key().get_component(*s, 0))));
+           keyspaces.emplace(value_cast<sstring>(utf8_type->deserialize(mutation.key().get_component(*s, 0))));
           column_families.emplace(mutation.column_family_id());
       }

@@ -525,7 +528,7 @@ future<> do_merge_schema(distributed<service::storage_proxy>& proxy, std::vector

 future<std::set<sstring>> merge_keyspaces(distributed<service::storage_proxy>& proxy, schema_result&& before, schema_result&& after)
 {
-    std::vector<schema_result::value_type> created;
+    std::vector<schema_result_value_type> created;
    std::vector<sstring> altered;
    std::set<sstring> dropped;

@@ -549,7 +552,7 @@ future<std::set<sstring>> merge_keyspaces(distributed<service::storage_proxy>& p
    for (auto&& key : diff.entries_only_on_right) {
        auto&& value = after[key];
        if (!value->empty()) {
-            created.emplace_back(schema_result::value_type{key, std::move(value)});
+            created.emplace_back(schema_result_value_type{key, std::move(value)});
        }
    }
    for (auto&& key : diff.entries_differing) {
@@ -563,7 +566,7 @@ future<std::set<sstring>> merge_keyspaces(distributed<service::storage_proxy>& p
        } else if (!pre->empty()) {
            dropped.emplace(keyspace_name);
        } else if (!post->empty()) { // a (re)created keyspace
-            created.emplace_back(schema_result::value_type{key, std::move(post)});
+            created.emplace_back(schema_result_value_type{key, std::move(post)});
        }
    }
    return do_with(std::move(created), [&proxy, altered = std::move(altered)] (auto& created) {
@@ -670,6 +673,8 @@ future<> merge_tables(distributed<service::storage_proxy>& proxy, schema_result&
                if (engine().cpu_id() == 0) {
                    for (auto&& cfm : created) {
                        service::migration_manager::notify_create_column_family(cfm).get0();
+                        auto& ks = db.find_keyspace(cfm->ks_name());
+                        ks.make_directory_for_column_family(cfm->cf_name(), cfm->id());
                    }
                    for (auto&& cfm : dropped) {
                        service::migration_manager::notify_drop_column_family(cfm).get0();
@@ -894,7 +899,7 @@ std::vector<mutation> make_drop_keyspace_mutations(lw_shared_ptr<keyspace_metada
 *
 * @param partition Keyspace attributes in serialized form
 */
-lw_shared_ptr<keyspace_metadata> create_keyspace_from_schema_partition(const schema_result::value_type& result)
+lw_shared_ptr<keyspace_metadata> create_keyspace_from_schema_partition(const schema_result_value_type& result)
 {
    auto&& rs = result.second;
    if (rs->empty()) {
@@ -1305,10 +1310,10 @@ void create_table_from_table_row_and_column_rows(schema_builder& builder, const
        builder.set_max_compaction_threshold(table_row.get_nonnull<int>("max_compaction_threshold"));
    }

-#if 0
-    if (result.has("comment"))
-        cfm.comment(result.getString("comment"));
-#endif
+    if (table_row.has("comment")) {
+        builder.set_comment(table_row.get_nonnull<sstring>("comment"));
+    }
+
    if (table_row.has("memtable_flush_period_in_ms")) {
        builder.set_memtable_flush_period(table_row.get_nonnull<int32_t>("memtable_flush_period_in_ms"));
    }
@@ -1325,9 +1330,15 @@ void create_table_from_table_row_and_column_rows(schema_builder& builder, const
        builder.set_speculative_retry(table_row.get_nonnull<sstring>("speculative_retry"));
    }

-    if (table_row.has("compaction_strategy")) {
-        auto strategy = table_row.get_nonnull<sstring>("compression_strategy_class");
-        builder.set_compaction_strategy(sstables::compaction_strategy::type(strategy));
+    if (table_row.has("compaction_strategy_class")) {
+        auto strategy = table_row.get_nonnull<sstring>("compaction_strategy_class");
+        try {
+            builder.set_compaction_strategy(sstables::compaction_strategy::type(strategy));
+        } catch (const exceptions::configuration_exception& e) {
+            // If compaction strategy class isn't supported, fallback to size tiered.
+            logger.warn("Falling back to size-tiered compaction strategy after the problem: {}", e.what());
+            builder.set_compaction_strategy(sstables::compaction_strategy_type::size_tiered);
+        }
    }

    if (table_row.has("compaction_strategy_options")) {
--- a/db/schema_tables.hh
+++ b/db/schema_tables.hh
@@ -55,6 +55,7 @@ namespace db {
 namespace schema_tables {

 using schema_result = std::map<sstring, lw_shared_ptr<query::result_set>>;
+using schema_result_value_type = std::pair<sstring, lw_shared_ptr<query::result_set>>;

 static constexpr auto KEYSPACES = "schema_keyspaces";
 static constexpr auto COLUMNFAMILIES = "schema_columnfamilies";
@@ -74,7 +75,7 @@ future<utils::UUID> calculate_schema_digest(distributed<service::storage_proxy>&

 future<std::vector<frozen_mutation>> convert_schema_to_mutations(distributed<service::storage_proxy>& proxy);

-future<schema_result::value_type>
+future<schema_result_value_type>
 read_schema_partition_for_keyspace(distributed<service::storage_proxy>& proxy, const sstring& schema_table_name, const sstring& keyspace_name);

 future<> merge_schema(distributed<service::storage_proxy>& proxy, std::vector<mutation> mutations);
@@ -89,11 +90,11 @@ std::vector<mutation> make_create_keyspace_mutations(lw_shared_ptr<keyspace_meta

 std::vector<mutation> make_drop_keyspace_mutations(lw_shared_ptr<keyspace_metadata> keyspace, api::timestamp_type timestamp);

-lw_shared_ptr<keyspace_metadata> create_keyspace_from_schema_partition(const schema_result::value_type& partition);
+lw_shared_ptr<keyspace_metadata> create_keyspace_from_schema_partition(const schema_result_value_type& partition);

 future<> merge_tables(distributed<service::storage_proxy>& proxy, schema_result&& before, schema_result&& after);

-lw_shared_ptr<keyspace_metadata> create_keyspace_from_schema_partition(const schema_result::value_type& partition);
+lw_shared_ptr<keyspace_metadata> create_keyspace_from_schema_partition(const schema_result_value_type& partition);

 mutation make_create_keyspace_mutation(lw_shared_ptr<keyspace_metadata> keyspace, api::timestamp_type timestamp, bool with_tables_and_types_and_functions = true);

--- a/db/serializer.cc
+++ b/db/serializer.cc
@@ -143,18 +143,18 @@ atomic_cell_view db::serializer<atomic_cell_view>::read(input& in) {
 }

 template<>
-db::serializer<collection_mutation::view>::serializer(const collection_mutation::view& c)
+db::serializer<collection_mutation_view>::serializer(const collection_mutation_view& c)
        : _item(c), _size(bytes_view_serializer(c.serialize()).size()) {
 }

 template<>
-void db::serializer<collection_mutation::view>::write(output& out, const collection_mutation::view& t) {
+void db::serializer<collection_mutation_view>::write(output& out, const collection_mutation_view& t) {
    bytes_view_serializer::write(out, t.serialize());
 }

 template<>
-void db::serializer<collection_mutation::view>::read(collection_mutation::view& c, input& in) {
-    c = collection_mutation::view::from_bytes(bytes_view_serializer::read(in));
+void db::serializer<collection_mutation_view>::read(collection_mutation_view& c, input& in) {
+    c = collection_mutation_view::from_bytes(bytes_view_serializer::read(in));
 }

 template<>
@@ -278,7 +278,7 @@ template class db::serializer<bytes> ;
 template class db::serializer<bytes_view> ;
 template class db::serializer<sstring> ;
 template class db::serializer<atomic_cell_view> ;
-template class db::serializer<collection_mutation::view> ;
+template class db::serializer<collection_mutation_view> ;
 template class db::serializer<utils::UUID> ;
 template class db::serializer<partition_key_view> ;
 template class db::serializer<clustering_key_view> ;
--- a/db/serializer.hh
+++ b/db/serializer.hh
@@ -26,7 +26,6 @@
 #include "utils/data_output.hh"
 #include "bytes_ostream.hh"
 #include "bytes.hh"
-#include "mutation.hh"
 #include "keys.hh"
 #include "database_fwd.hh"
 #include "frozen_mutation.hh"
@@ -109,9 +108,9 @@ template<> void serializer<atomic_cell_view>::write(output&, const type&);
 template<> void serializer<atomic_cell_view>::read(atomic_cell_view&, input&);
 template<> atomic_cell_view serializer<atomic_cell_view>::read(input&);

-template<> serializer<collection_mutation::view>::serializer(const collection_mutation::view &);
-template<> void serializer<collection_mutation::view>::write(output&, const type&);
-template<> void serializer<collection_mutation::view>::read(collection_mutation::view&, input&);
+template<> serializer<collection_mutation_view>::serializer(const collection_mutation_view &);
+template<> void serializer<collection_mutation_view>::write(output&, const type&);
+template<> void serializer<collection_mutation_view>::read(collection_mutation_view&, input&);

 template<> serializer<frozen_mutation>::serializer(const frozen_mutation &);
 template<> void serializer<frozen_mutation>::write(output&, const type&);
@@ -160,7 +159,7 @@ typedef serializer<bytes> bytes_serializer; // Compatible with bytes_view_serial
 typedef serializer<bytes_view> bytes_view_serializer; // Compatible with bytes_serializer
 typedef serializer<sstring> sstring_serializer;
 typedef serializer<atomic_cell_view> atomic_cell_view_serializer;
-typedef serializer<collection_mutation::view> collection_mutation_view_serializer;
+typedef serializer<collection_mutation_view> collection_mutation_view_serializer;
 typedef serializer<utils::UUID> uuid_serializer;
 typedef serializer<partition_key_view> partition_key_view_serializer;
 typedef serializer<clustering_key_view> clustering_key_view_serializer;
--- a/db/system_keyspace.cc
+++ b/db/system_keyspace.cc
@@ -486,9 +486,7 @@ future<> init_local_cache() {
 }

 void minimal_setup(distributed<database>& db, distributed<cql3::query_processor>& qp) {
-    auto new_ctx = std::make_unique<query_context>(db, qp);
-    qctx.swap(new_ctx);
-    assert(!new_ctx);
+    qctx = std::make_unique<query_context>(db, qp);
 }

 future<> setup(distributed<database>& db, distributed<cql3::query_processor>& qp) {
@@ -505,6 +503,10 @@ future<> setup(distributed<database>& db, distributed<cql3::query_processor>& qp
        return check_health();
    }).then([] {
        return db::schema_tables::save_system_keyspace_schema();
+    }).then([] {
+        return net::get_messaging_service().invoke_on_all([] (auto& ms){
+            return ms.init_local_preferred_ip_cache();
+        });
    });
    return make_ready_future<>();
 }
@@ -536,10 +538,11 @@ future<> save_truncation_records(const column_family& cf, db_clock::time_point t
    out.write<db_clock::rep>(truncated_at.time_since_epoch().count());

    map_type_impl::native_type tmp;
-    tmp.emplace_back(boost::any{ cf.schema()->id() }, boost::any{ buf });
+    tmp.emplace_back(cf.schema()->id(), data_value(buf));
+    auto map_type = map_type_impl::get_instance(uuid_type, bytes_type, true);

    sstring req = sprint("UPDATE system.%s SET truncated_at = truncated_at + ? WHERE key = '%s'", LOCAL, LOCAL);
-    return qctx->qp().execute_internal(req, {tmp}).then([](auto rs) {
+    return qctx->qp().execute_internal(req, {make_map_value(map_type, tmp)}).then([](auto rs) {
        truncation_records = {};
        return force_blocking_flush(LOCAL);
    });
@@ -629,7 +632,7 @@ future<db_clock::time_point> get_truncated_at(utils::UUID cf_id) {
 set_type_impl::native_type prepare_tokens(std::unordered_set<dht::token>& tokens) {
    set_type_impl::native_type tset;
    for (auto& t: tokens) {
-        tset.push_back(boost::any(dht::global_partitioner().to_sstring(t)));
+        tset.push_back(dht::global_partitioner().to_sstring(t));
    }
    return tset;
 }
@@ -637,7 +640,7 @@ set_type_impl::native_type prepare_tokens(std::unordered_set<dht::token>& tokens
 std::unordered_set<dht::token> decode_tokens(set_type_impl::native_type& tokens) {
    std::unordered_set<dht::token> tset;
    for (auto& t: tokens) {
-        auto str = boost::any_cast<sstring>(t);
+        auto str = value_cast<sstring>(t);
        assert(str == dht::global_partitioner().to_sstring(dht::global_partitioner().from_sstring(str)));
        tset.insert(dht::global_partitioner().from_sstring(str));
    }
@@ -654,7 +657,8 @@ future<> update_tokens(gms::inet_address ep, std::unordered_set<dht::token> toke
    }

    sstring req = "INSERT INTO system.%s (peer, tokens) VALUES (?, ?)";
-    return execute_cql(req, PEERS, ep.addr(), prepare_tokens(tokens)).discard_result().then([] {
+    auto set_type = set_type_impl::get_instance(utf8_type, true);
+    return execute_cql(req, PEERS, ep.addr(), make_set_value(set_type, prepare_tokens(tokens))).discard_result().then([] {
        return force_blocking_flush(PEERS);
    });
 }
@@ -685,7 +689,7 @@ future<std::unordered_map<gms::inet_address, std::unordered_set<dht::token>>> lo
                auto blob = row.get_blob("tokens");
                auto cdef = peers()->get_column_definition("tokens");
                auto deserialized = cdef->type->deserialize(blob);
-                auto tokens = boost::any_cast<set_type_impl::native_type>(deserialized);
+                auto tokens = value_cast<set_type_impl::native_type>(deserialized);

                ret->emplace(peer, decode_tokens(tokens));
            }
@@ -714,11 +718,28 @@ future<std::unordered_map<gms::inet_address, utils::UUID>> load_host_ids() {

 future<> update_preferred_ip(gms::inet_address ep, gms::inet_address preferred_ip) {
    sstring req = "INSERT INTO system.%s (peer, preferred_ip) VALUES (?, ?)";
-    return execute_cql(req, PEERS, ep.addr(), preferred_ip).discard_result().then([] {
+    return execute_cql(req, PEERS, ep.addr(), preferred_ip.addr()).discard_result().then([] {
        return force_blocking_flush(PEERS);
    });
 }

+future<std::unordered_map<gms::inet_address, gms::inet_address>> get_preferred_ips() {
+    sstring req = "SELECT peer, preferred_ip FROM system.%s";
+
+    return execute_cql(req, PEERS).then([] (::shared_ptr<cql3::untyped_result_set> cql_res_set) {
+        std::unordered_map<gms::inet_address, gms::inet_address> res;
+
+        for (auto& r : *cql_res_set) {
+            if (r.has("preferred_ip")) {
+                res.emplace(gms::inet_address(r.get_as<net::ipv4_address>("peer")),
+                            gms::inet_address(r.get_as<net::ipv4_address>("preferred_ip")));
+            }
+        }
+
+        return res;
+    });
+}
+
 template <typename Value>
 static future<> update_cached_values(gms::inet_address ep, sstring column_name, Value value) {
    return make_ready_future<>();
@@ -787,16 +808,14 @@ future<> update_tokens(std::unordered_set<dht::token> tokens) {
    }

    sstring req = "INSERT INTO system.%s (key, tokens) VALUES (?, ?)";
-    return execute_cql(req, LOCAL, sstring(LOCAL), prepare_tokens(tokens)).discard_result().then([] {
+    auto set_type = set_type_impl::get_instance(utf8_type, true);
+    return execute_cql(req, LOCAL, sstring(LOCAL), make_set_value(set_type, prepare_tokens(tokens))).discard_result().then([] {
        return force_blocking_flush(LOCAL);
    });
 }

 future<> force_blocking_flush(sstring cfname) {
-    if (!qctx) {
-        return make_ready_future<>();
-    }
-
+    assert(qctx);
    return qctx->_db.invoke_on_all([cfname = std::move(cfname)](database& db) {
        // if (!Boolean.getBoolean("cassandra.unsafesystem"))
        column_family& cf = db.find_column_family(NAME, cfname);
@@ -841,7 +860,7 @@ future<std::unordered_set<dht::token>> get_saved_tokens() {
        auto blob = msg->one().get_blob("tokens");
        auto cdef = local()->get_column_definition("tokens");
        auto deserialized = cdef->type->deserialize(blob);
-        auto tokens = boost::any_cast<set_type_impl::native_type>(deserialized);
+        auto tokens = value_cast<set_type_impl::native_type>(deserialized);

        return make_ready_future<std::unordered_set<dht::token>>(decode_tokens(tokens));
    });
--- a/db/system_keyspace.hh
+++ b/db/system_keyspace.hh
@@ -94,6 +94,7 @@ future<> update_tokens(std::unordered_set<dht::token> tokens);
 future<> update_tokens(gms::inet_address ep, std::unordered_set<dht::token> tokens);

 future<> update_preferred_ip(gms::inet_address ep, gms::inet_address preferred_ip);
+future<std::unordered_map<gms::inet_address, gms::inet_address>> get_preferred_ips();

 template <typename Value>
 future<> update_peer_info(gms::inet_address ep, sstring column_name, Value value);
--- a/dht/boot_strapper.cc
+++ b/dht/boot_strapper.cc
@@ -38,69 +38,67 @@

 #include "dht/boot_strapper.hh"
 #include "service/storage_service.hh"
+#include "dht/range_streamer.hh"
+#include "gms/failure_detector.hh"
+#include "log.hh"
+
+static logging::logger logger("boot_strapper");

 namespace dht {

 future<> boot_strapper::bootstrap() {
-    // FIXME: Stream data from other nodes
-    service::get_local_storage_service().finish_bootstrapping();
-    return make_ready_future<>();
-#if 0
-    if (logger.isDebugEnabled())
-        logger.debug("Beginning bootstrap process");
+    logger.debug("Beginning bootstrap process: sorted_tokens={}", _token_metadata.sorted_tokens());

-    RangeStreamer streamer = new RangeStreamer(tokenMetadata, tokens, address, "Bootstrap");
-    streamer.addSourceFilter(new RangeStreamer.FailureDetectorSourceFilter(FailureDetector.instance));
-
-    for (String keyspaceName : Schema.instance.getNonSystemKeyspaces())
-    {
-        AbstractReplicationStrategy strategy = Keyspace.open(keyspaceName).getReplicationStrategy();
-        streamer.addRanges(keyspaceName, strategy.getPendingAddressRanges(tokenMetadata, tokens, address));
+    auto streamer = make_lw_shared<range_streamer>(_db, _token_metadata, _tokens, _address, "Bootstrap");
+    streamer->add_source_filter(std::make_unique<range_streamer::failure_detector_source_filter>(gms::get_local_failure_detector()));
+    for (const auto& keyspace_name : _db.local().get_non_system_keyspaces()) {
+        auto& ks = _db.local().find_keyspace(keyspace_name);
+        auto& strategy = ks.get_replication_strategy();
+        std::vector<range<token>> ranges = strategy.get_pending_address_ranges(_token_metadata, _tokens, _address);
+        logger.debug("Will stream keyspace={}, ranges={}", keyspace_name, ranges);
+        streamer->add_ranges(keyspace_name, ranges);
    }

-    try
-    {
-        streamer.fetchAsync().get();
-        StorageService.instance.finishBootstrapping();
-    }
-    catch (InterruptedException e)
-    {
-        throw new RuntimeException("Interrupted while waiting on boostrap to complete. Bootstrap will have to be restarted.");
-    }
-    catch (ExecutionException e)
-    {
-        throw new RuntimeException("Error during boostrap: " + e.getCause().getMessage(), e.getCause());
-    }
-#endif
+    return streamer->fetch_async().then_wrapped([streamer] (auto&& f) {
+        try {
+            auto state = f.get0();
+        } catch (...) {
+            throw std::runtime_error(sprint("Error during boostrap: %s", std::current_exception()));
+        }
+        service::get_local_storage_service().finish_bootstrapping();
+        return make_ready_future<>();
+    });
 }

 std::unordered_set<token> boot_strapper::get_bootstrap_tokens(token_metadata metadata, database& db) {
-#if 0
-    Collection<String> initialTokens = DatabaseDescriptor.getInitialTokens();
+    auto initial_tokens = db.get_initial_tokens();
    // if user specified tokens, use those
-    if (initialTokens.size() > 0)
-    {
-        logger.debug("tokens manually specified as {}",  initialTokens);
-        List<Token> tokens = new ArrayList<Token>(initialTokens.size());
-        for (String tokenString : initialTokens)
-        {
-            Token token = StorageService.getPartitioner().getTokenFactory().fromString(tokenString);
-            if (metadata.getEndpoint(token) != null)
-                throw new ConfigurationException("Bootstrapping to existing token " + tokenString + " is not allowed (decommission/removenode the old node first).");
-            tokens.add(token);
+    if (initial_tokens.size() > 0) {
+        logger.debug("tokens manually specified as {}", initial_tokens);
+        std::unordered_set<token> tokens;
+        for (auto& token_string : initial_tokens) {
+            auto token = dht::global_partitioner().from_sstring(token_string);
+            if (metadata.get_endpoint(token)) {
+                throw std::runtime_error(sprint("Bootstrapping to existing token %s is not allowed (decommission/removenode the old node first).", token_string));
+            }
+            tokens.insert(token);
        }
+        logger.debug("Get manually specified bootstrap_tokens={}", tokens);
        return tokens;
    }
-#endif
+
    size_t num_tokens = db.get_config().num_tokens();
    if (num_tokens < 1) {
        throw std::runtime_error("num_tokens must be >= 1");
    }

-    // if (numTokens == 1)
-    //     logger.warn("Picking random token for a single vnode.  You should probably add more vnodes; failing that, you should probably specify the token manually");
+    if (num_tokens == 1) {
+        logger.warn("Picking random token for a single vnode.  You should probably add more vnodes; failing that, you should probably specify the token manually");
+    }

-    return get_random_tokens(metadata, num_tokens);
+    auto tokens = get_random_tokens(metadata, num_tokens);
+    logger.debug("Get random bootstrap_tokens={}", tokens);
+    return tokens;
 }

 std::unordered_set<token> boot_strapper::get_random_tokens(token_metadata metadata, size_t num_tokens) {
--- a/dht/boot_strapper.hh
+++ b/dht/boot_strapper.hh
@@ -49,14 +49,16 @@ class boot_strapper {
    using inet_address = gms::inet_address;
    using token_metadata = locator::token_metadata;
    using token = dht::token;
+    distributed<database>& _db;
    /* endpoint that needs to be bootstrapped */
    inet_address _address;
    /* token of the node being bootstrapped. */
    std::unordered_set<token> _tokens;
    token_metadata _token_metadata;
 public:
-    boot_strapper(inet_address addr, std::unordered_set<token> tokens, token_metadata tmd)
-        : _address(addr)
+    boot_strapper(distributed<database>& db, inet_address addr, std::unordered_set<token> tokens, token_metadata tmd)
+        : _db(db)
+        , _address(addr)
        , _tokens(tokens)
        , _token_metadata(tmd) {
    }
--- a/dht/byte_ordered_partitioner.cc
+++ b/dht/byte_ordered_partitioner.cc
@@ -44,11 +44,19 @@ token byte_ordered_partitioner::midpoint(const token& t1, const token& t2) const

 unsigned
 byte_ordered_partitioner::shard_of(const token& t) const {
-    if (t._data.empty()) {
-        return 0;
+    switch (t._kind) {
+        case token::kind::before_all_keys:
+            return 0;
+        case token::kind::after_all_keys:
+            return smp::count - 1;
+        case token::kind::key:
+            if (t._data.empty()) {
+                return 0;
+            }
+            // treat first byte as a fraction in the range [0, 1) and divide it evenly:
+            return (uint8_t(t._data[0]) * smp::count) >> 8;
    }
-    // treat first byte as a fraction in the range [0, 1) and divide it evenly:
-    return (uint8_t(t._data[0]) * smp::count) >> 8;
+    assert(0);
 }

 using registry = class_registrator<i_partitioner, byte_ordered_partitioner>;
--- a/dht/i_partitioner.cc
+++ b/dht/i_partitioner.cc
@@ -161,12 +161,7 @@ std::ostream& operator<<(std::ostream& out, const token& t) {
    } else if (t._kind == token::kind::before_all_keys) {
        out << "minimum token";
    } else {
-        auto flags = out.flags();
-        for (auto c : t._data) {
-            unsigned char x = c;
-            out << std::hex << std::setw(2) << std::setfill('0') << +x << " ";
-        }
-        out.flags(flags);
+        out << global_partitioner().to_sstring(t);
    }
    return out;
 }
@@ -297,6 +292,10 @@ int ring_position_comparator::operator()(const ring_position& lh, const ring_pos
    return lh.tri_compare(s, rh);
 }

+int token_comparator::operator()(const token& t1, const token& t2) const {
+    return tri_compare(t1, t2);
+}
+
 void token::serialize(bytes::iterator& out) const {
    uint8_t kind = _kind == dht::token::kind::before_all_keys ? 0 :
                   _kind == dht::token::kind::key ? 1 : 2;
--- a/dht/i_partitioner.hh
+++ b/dht/i_partitioner.hh
@@ -159,6 +159,8 @@ public:
    }
 };

+using decorated_key_opt = std::experimental::optional<decorated_key>;
+
 class i_partitioner {
 public:
    virtual ~i_partitioner() {}
@@ -390,6 +392,11 @@ struct ring_position_comparator {
    int operator()(const ring_position& lh, const ring_position& rh) const;
 };

+struct token_comparator {
+    // Return values are those of a trichotomic comparison.
+    int operator()(const token& t1, const token& t2) const;
+};
+
 std::ostream& operator<<(std::ostream& out, const token& t);

 std::ostream& operator<<(std::ostream& out, const decorated_key& t);
--- a/dht/murmur3_partitioner.cc
+++ b/dht/murmur3_partitioner.cc
@@ -75,6 +75,9 @@ token murmur3_partitioner::get_random_token() {
 }

 inline int64_t long_token(const token& t) {
+    if (t.is_minimum()) {
+        return std::numeric_limits<long>::min();
+    }

    if (t._data.size() != sizeof(int64_t)) {
        throw runtime_exception(sprint("Invalid token. Should have size %ld, has size %ld\n", sizeof(int64_t), t._data.size()));
@@ -178,11 +181,19 @@ murmur3_partitioner::get_token_validator() {

 unsigned
 murmur3_partitioner::shard_of(const token& t) const {
-    int64_t l = long_token(t);
-    // treat l as a fraction between 0 and 1 and use 128-bit arithmetic to
-    // divide that range evenly among shards:
-    uint64_t adjusted = uint64_t(l) + uint64_t(std::numeric_limits<int64_t>::min());
-    return (__int128(adjusted) * smp::count) >> 64;
+    switch (t._kind) {
+        case token::kind::before_all_keys:
+            return 0;
+        case token::kind::after_all_keys:
+            return smp::count - 1;
+        case token::kind::key:
+            int64_t l = long_token(t);
+            // treat l as a fraction between 0 and 1 and use 128-bit arithmetic to
+            // divide that range evenly among shards:
+            uint64_t adjusted = uint64_t(l) + uint64_t(std::numeric_limits<int64_t>::min());
+            return (__int128(adjusted) * smp::count) >> 64;
+    }
+    assert(0);
 }

 using registry = class_registrator<i_partitioner, murmur3_partitioner>;
--- a/dht/range_streamer.cc
+++ b/dht/range_streamer.cc
@@ -0,0 +1,269 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Modified by Cloudius Systems
+ * Copyright 2015 Cloudius Systems
+ */
+
+/*
+ * This file is part of Scylla.
+ *
+ * Scylla is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Scylla is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "dht/range_streamer.hh"
+#include "utils/fb_utilities.hh"
+#include "locator/snitch_base.hh"
+#include "database.hh"
+#include "gms/gossiper.hh"
+#include "gms/failure_detector.hh"
+#include "log.hh"
+#include "streaming/stream_plan.hh"
+#include "streaming/stream_state.hh"
+#include "service/storage_service.hh"
+
+namespace dht {
+
+logging::logger logger("range_streamer");
+
+using inet_address = gms::inet_address;
+
+static std::unordered_map<range<token>, std::unordered_set<inet_address>>
+unordered_multimap_to_unordered_map(const std::unordered_multimap<range<token>, inet_address>& multimap) {
+    std::unordered_map<range<token>, std::unordered_set<inet_address>> ret;
+    for (auto x : multimap) {
+        ret[x.first].emplace(x.second);
+    }
+    return ret;
+}
+
+std::unordered_multimap<inet_address, range<token>>
+range_streamer::get_range_fetch_map(const std::unordered_multimap<range<token>, inet_address>& ranges_with_sources,
+                                    const std::unordered_set<std::unique_ptr<i_source_filter>>& source_filters,
+                                    const sstring& keyspace) {
+    std::unordered_multimap<inet_address, range<token>> range_fetch_map_map;
+    for (auto x : unordered_multimap_to_unordered_map(ranges_with_sources)) {
+        const range<token>& range_ = x.first;
+        const std::unordered_set<inet_address>& addresses = x.second;
+        bool found_source = false;
+        for (auto address : addresses) {
+            if (address == utils::fb_utilities::get_broadcast_address()) {
+                // If localhost is a source, we have found one, but we don't add it to the map to avoid streaming locally
+                found_source = true;
+                continue;
+            }
+
+            auto filtered = false;
+            for (const auto& filter : source_filters) {
+                if (!filter->should_include(address)) {
+                    filtered = true;
+                    break;
+                }
+            }
+
+            if (filtered) {
+                continue;
+            }
+
+            range_fetch_map_map.emplace(address, range_);
+            found_source = true;
+            break; // ensure we only stream from one other node for each range
+        }
+
+        if (!found_source) {
+            throw std::runtime_error(sprint("unable to find sufficient sources for streaming range %s in keyspace %s", range_, keyspace));
+        }
+    }
+
+    return range_fetch_map_map;
+}
+
+std::unordered_multimap<range<token>, inet_address>
+range_streamer::get_all_ranges_with_sources_for(const sstring& keyspace_name, std::vector<range<token>> desired_ranges) {
+    logger.debug("{} ks={}", __func__, keyspace_name);
+
+    auto& ks = _db.local().find_keyspace(keyspace_name);
+    auto& strat = ks.get_replication_strategy();
+
+    auto tm = _metadata.clone_only_token_map();
+    auto range_addresses = unordered_multimap_to_unordered_map(strat.get_range_addresses(tm));
+
+    std::unordered_multimap<range<token>, inet_address> range_sources;
+    auto& snitch = locator::i_endpoint_snitch::get_local_snitch_ptr();
+    for (auto& desired_range : desired_ranges) {
+        auto found = false;
+        for (auto& x : range_addresses) {
+            const range<token>& src_range = x.first;
+            if (src_range.contains(desired_range, dht::tri_compare)) {
+                std::unordered_set<inet_address>& addresses = x.second;
+                auto preferred = snitch->get_sorted_list_by_proximity(_address, addresses);
+                for (inet_address& p : preferred) {
+                    range_sources.emplace(desired_range, p);
+                }
+                found = true;
+            }
+        }
+
+        if (!found) {
+            throw std::runtime_error(sprint("No sources found for %s", desired_range));
+        }
+    }
+
+    return range_sources;
+}
+
+std::unordered_multimap<range<token>, inet_address>
+range_streamer::get_all_ranges_with_strict_sources_for(const sstring& keyspace_name, std::vector<range<token>> desired_ranges) {
+    logger.debug("{} ks={}", __func__, keyspace_name);
+    assert (_tokens.empty() == false);
+
+    auto& ks = _db.local().find_keyspace(keyspace_name);
+    auto& strat = ks.get_replication_strategy();
+
+    //Active ranges
+    auto metadata_clone = _metadata.clone_only_token_map();
+    auto range_addresses = unordered_multimap_to_unordered_map(strat.get_range_addresses(metadata_clone));
+
+    //Pending ranges
+    metadata_clone.update_normal_tokens(_tokens, _address);
+    auto pending_range_addresses  = unordered_multimap_to_unordered_map(strat.get_range_addresses(metadata_clone));
+
+    //Collects the source that will have its range moved to the new node
+    std::unordered_multimap<range<token>, inet_address> range_sources;
+
+    for (auto& desired_range : desired_ranges) {
+        for (auto& x : range_addresses) {
+            const range<token>& src_range = x.first;
+            if (src_range.contains(desired_range, dht::tri_compare)) {
+                std::vector<inet_address> old_endpoints(x.second.begin(), x.second.end());
+                auto it = pending_range_addresses.find(desired_range);
+                if (it == pending_range_addresses.end()) {
+                    throw std::runtime_error(sprint("Can not find desired_range = {} in pending_range_addresses", desired_range));
+                }
+                std::unordered_set<inet_address> new_endpoints = it->second;
+
+                //Due to CASSANDRA-5953 we can have a higher RF then we have endpoints.
+                //So we need to be careful to only be strict when endpoints == RF
+                if (old_endpoints.size() == strat.get_replication_factor()) {
+                    auto it = std::remove_if(old_endpoints.begin(), old_endpoints.end(),
+                        [&new_endpoints] (inet_address ep) { return new_endpoints.count(ep); });
+                    old_endpoints.erase(it, old_endpoints.end());
+                    if (old_endpoints.size() != 1) {
+                        throw std::runtime_error(sprint("Expected 1 endpoint but found %d", old_endpoints.size()));
+                    }
+                }
+                range_sources.emplace(desired_range, old_endpoints.front());
+            }
+        }
+
+        //Validate
+        auto nr = range_sources.count(desired_range);
+        if (nr < 1) {
+            throw std::runtime_error(sprint("No sources found for %s", desired_range));
+        }
+
+        if (nr > 1) {
+            throw std::runtime_error(sprint("Multiple endpoints found for %s", desired_range));
+        }
+
+        inet_address source_ip = range_sources.find(desired_range)->second;
+        auto& gossiper = gms::get_local_gossiper();
+        auto source_state = gossiper.get_endpoint_state_for_endpoint(source_ip);
+        if (gossiper.is_enabled() && source_state && !source_state->is_alive()) {
+            throw std::runtime_error(sprint("A node required to move the data consistently is down (%s).  If you wish to move the data from a potentially inconsistent replica, restart the node with -Dcassandra.consistent.rangemovement=false", source_ip));
+        }
+    }
+
+    return range_sources;
+}
+
+bool range_streamer::use_strict_sources_for_ranges(const sstring& keyspace_name) {
+    auto& ks = _db.local().find_keyspace(keyspace_name);
+    auto& strat = ks.get_replication_strategy();
+    return !_db.local().is_replacing()
+           && use_strict_consistency()
+           && !_tokens.empty()
+           && _metadata.get_all_endpoints().size() != strat.get_replication_factor();
+}
+
+void range_streamer::add_ranges(const sstring& keyspace_name, std::vector<range<token>> ranges) {
+    auto ranges_for_keyspace = use_strict_sources_for_ranges(keyspace_name)
+        ? get_all_ranges_with_strict_sources_for(keyspace_name, ranges)
+        : get_all_ranges_with_sources_for(keyspace_name, ranges);
+
+    if (logger.is_enabled(logging::log_level::debug)) {
+        for (auto& x : ranges_for_keyspace) {
+            logger.debug("{} : range {} exists on {}", _description, x.first, x.second);
+        }
+    }
+
+    std::unordered_map<inet_address, std::vector<range<token>>> range_fetch_map;
+    for (auto& x : get_range_fetch_map(ranges_for_keyspace, _source_filters, keyspace_name)) {
+        range_fetch_map[x.first].emplace_back(x.second);
+    }
+
+    if (logger.is_enabled(logging::log_level::debug)) {
+        for (auto& x : range_fetch_map) {
+            logger.debug("{} : range {} from source {} for keyspace {}", _description, x.second, x.first, keyspace_name);
+        }
+    }
+    _to_fetch.emplace(keyspace_name, std::move(range_fetch_map));
+}
+
+future<streaming::stream_state> range_streamer::fetch_async() {
+    for (auto& fetch : _to_fetch) {
+        const auto& keyspace = fetch.first;
+        for (auto& x : fetch.second) {
+            auto& source = x.first;
+            auto& ranges = x.second;
+            auto preferred = net::get_local_messaging_service().get_preferred_ip(source);
+            /* Send messages to respective folks to stream data over to me */
+            if (logger.is_enabled(logging::log_level::debug)) {
+                logger.debug("{}ing from {} ranges {}", _description, source, ranges);
+            }
+            _stream_plan.request_ranges(source, preferred, keyspace, ranges);
+        }
+    }
+
+    return _stream_plan.execute();
+}
+
+std::unordered_multimap<inet_address, range<token>>
+range_streamer::get_work_map(const std::unordered_multimap<range<token>, inet_address>& ranges_with_source_target,
+             const sstring& keyspace) {
+    auto filter = std::make_unique<dht::range_streamer::failure_detector_source_filter>(gms::get_local_failure_detector());
+    std::unordered_set<std::unique_ptr<i_source_filter>> source_filters;
+    source_filters.emplace(std::move(filter));
+    return get_range_fetch_map(ranges_with_source_target, source_filters, keyspace);
+}
+
+bool range_streamer::use_strict_consistency() {
+    return service::get_local_storage_service().db().local().get_config().consistent_rangemovement();
+}
+
+} // dht
--- a/dht/range_streamer.hh
+++ b/dht/range_streamer.hh
@@ -0,0 +1,174 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Modified by Cloudius Systems
+ * Copyright 2015 Cloudius Systems
+ */
+
+/*
+ * This file is part of Scylla.
+ *
+ * Scylla is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Scylla is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include "locator/token_metadata.hh"
+#include "locator/snitch_base.hh"
+#include "streaming/stream_plan.hh"
+#include "streaming/stream_state.hh"
+#include "gms/inet_address.hh"
+#include "gms/i_failure_detector.hh"
+#include "range.hh"
+#include <seastar/core/distributed.hh>
+#include <unordered_map>
+#include <memory>
+
+class database;
+
+namespace dht {
+/**
+ * Assists in streaming ranges to a node.
+ */
+class range_streamer {
+public:
+    using inet_address = gms::inet_address;
+    using token_metadata = locator::token_metadata;
+    using stream_plan = streaming::stream_plan;
+    using stream_state = streaming::stream_state;
+    using i_failure_detector = gms::i_failure_detector;
+    static bool use_strict_consistency();
+public:
+    /**
+     * A filter applied to sources to stream from when constructing a fetch map.
+     */
+    class i_source_filter {
+    public:
+        virtual bool should_include(inet_address endpoint) = 0;
+        virtual ~i_source_filter() {}
+    };
+
+    /**
+     * Source filter which excludes any endpoints that are not alive according to a
+     * failure detector.
+     */
+    class failure_detector_source_filter : public i_source_filter {
+    private:
+        gms::i_failure_detector& _fd;
+    public:
+        failure_detector_source_filter(i_failure_detector& fd) : _fd(fd) { }
+        virtual bool should_include(inet_address endpoint) override { return _fd.is_alive(endpoint); }
+    };
+
+    /**
+     * Source filter which excludes any endpoints that are not in a specific data center.
+     */
+    class single_datacenter_filter : public i_source_filter {
+    private:
+        sstring _source_dc;
+    public:
+        single_datacenter_filter(const sstring& source_dc)
+            : _source_dc(source_dc) {
+        }
+        virtual bool should_include(inet_address endpoint) override {
+            auto& snitch_ptr = locator::i_endpoint_snitch::get_local_snitch_ptr();
+            return snitch_ptr->get_datacenter(endpoint) == _source_dc;
+        }
+    };
+
+    range_streamer(distributed<database>& db, token_metadata& tm, std::unordered_set<token> tokens, inet_address address, sstring description)
+        : _db(db)
+        , _metadata(tm)
+        , _tokens(std::move(tokens))
+        , _address(address)
+        , _description(std::move(description))
+        , _stream_plan(_description, true) {
+    }
+
+    range_streamer(distributed<database>& db, token_metadata& tm, inet_address address, sstring description)
+        : range_streamer(db, tm, std::unordered_set<token>(), address, description) {
+    }
+
+    void add_source_filter(std::unique_ptr<i_source_filter> filter) {
+        _source_filters.emplace(std::move(filter));
+    }
+
+    void add_ranges(const sstring& keyspace_name, std::vector<range<token>> ranges);
+private:
+    bool use_strict_sources_for_ranges(const sstring& keyspace_name);
+    /**
+     * Get a map of all ranges and their respective sources that are candidates for streaming the given ranges
+     * to us. For each range, the list of sources is sorted by proximity relative to the given destAddress.
+     */
+    std::unordered_multimap<range<token>, inet_address>
+    get_all_ranges_with_sources_for(const sstring& keyspace_name, std::vector<range<token>> desired_ranges);
+    /**
+     * Get a map of all ranges and the source that will be cleaned up once this bootstrapped node is added for the given ranges.
+     * For each range, the list should only contain a single source. This allows us to consistently migrate data without violating
+     * consistency.
+     */
+    std::unordered_multimap<range<token>, inet_address>
+    get_all_ranges_with_strict_sources_for(const sstring& keyspace_name, std::vector<range<token>> desired_ranges);
+private:
+    /**
+     * @param rangesWithSources The ranges we want to fetch (key) and their potential sources (value)
+     * @param sourceFilters A (possibly empty) collection of source filters to apply. In addition to any filters given
+     *                      here, we always exclude ourselves.
+     * @return
+     */
+    static std::unordered_multimap<inet_address, range<token>>
+    get_range_fetch_map(const std::unordered_multimap<range<token>, inet_address>& ranges_with_sources,
+                        const std::unordered_set<std::unique_ptr<i_source_filter>>& source_filters,
+                        const sstring& keyspace);
+
+public:
+    static std::unordered_multimap<inet_address, range<token>>
+    get_work_map(const std::unordered_multimap<range<token>, inet_address>& ranges_with_source_target,
+                 const sstring& keyspace);
+#if 0
+
+    // For testing purposes
+    Multimap<String, Map.Entry<InetAddress, Collection<Range<Token>>>> toFetch()
+    {
+        return toFetch;
+    }
+#endif
+public:
+    future<streaming::stream_state> fetch_async();
+private:
+    distributed<database>& _db;
+    token_metadata& _metadata;
+    std::unordered_set<token> _tokens;
+    inet_address _address;
+    sstring _description;
+    std::unordered_multimap<sstring, std::unordered_map<inet_address, std::vector<range<token>>>> _to_fetch;
+    std::unordered_set<std::unique_ptr<i_source_filter>> _source_filters;
+    stream_plan _stream_plan;
+};
+
+} // dht
--- a/dht/token_range.hh
+++ b/dht/token_range.hh
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2015 ScyllaDB.
+ */
+
+/*
+ * This file is part of Scylla.
+ *
+ * Scylla is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Scylla is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General License for more details.
+ *
+ * You should have received a copy of the GNU General License
+ * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+#include <vector>
+
+namespace dht {
+struct endpoint_details {
+    sstring _host;
+    sstring _datacenter;
+    sstring _rack;
+};
+
+struct token_range {
+    sstring _start_token;
+    sstring _end_token;
+    std::vector<sstring> _endpoints;
+    std::vector<sstring> _rpc_endpoints;
+    std::vector<endpoint_details> _endpoint_details;
+};
+}
--- a/dist/ami/build_ami.sh
+++ b/dist/ami/build_ami.sh
@@ -1,9 +1,7 @@
 #!/bin/sh -e

-if [ ! -e dist/ami/build_ami.sh ] || [ ! -e ../scylla-jmx/dist/redhat/build_rpm.sh ] || [ ! -e ../cassandra/dist/redhat/build_rpm.sh ]; then
+if [ ! -e dist/ami/build_ami.sh ]; then
    echo "run build_ami.sh in top of scylla dir"
-    echo "please make sure scylla-jmx is checked out under the same directory as scylla"
-    echo "please make sure cassandra with scylla tools branch checked out under the same directory as scylla"
    exit 1
 fi

@@ -14,39 +12,6 @@ if [ ! -f variables.json ]; then
    exit 1
 fi

-if [ ! -f files/scylla-server.rpm ] || [ ! -f files/scylla-server-debuginfo.rpm ]; then
-    cd ../../
-    dist/redhat/build_rpm.sh
-    SCYLLA_VERSION=$(cat build/SCYLLA-VERSION-FILE)
-    SCYLLA_RELEASE=$(cat build/SCYLLA-RELEASE-FILE)
-    RPM=`ls build/rpms/scylla-server-$SCYLLA_VERSION-$SCYLLA_RELEASE*.x86_64.rpm|grep -v debuginfo`
-    cp $RPM dist/ami/files/scylla-server.rpm
-    cp build/rpms/scylla-server-debuginfo-$SCYLLA_VERSION-$SCYLLA_RELEASE*.x86_64.rpm dist/ami/files/scylla-server-debuginfo.rpm
-    cd -
-fi
-
-if [ ! -f files/scylla-jmx.rpm ]; then
-    CWD=`pwd`
-    cd ../../../scylla-jmx
-    dist/redhat/build_rpm.sh
-    SCYLLA_VERSION=$(cat build/SCYLLA-VERSION-FILE)
-    SCYLLA_RELEASE=$(cat build/SCYLLA-RELEASE-FILE)
-    RPM=`ls build/rpms/scylla-jmx-$SCYLLA_VERSION-$SCYLLA_RELEASE*.noarch.rpm`
-    cp $RPM $CWD/files/scylla-jmx.rpm
-    cd -
-fi
-
-if [ ! -f files/scylla-tools.rpm ]; then
-    CWD=`pwd`
-    cd ../../../cassandra
-    dist/redhat/build_rpm.sh
-    SCYLLA_VERSION=$(cat build/SCYLLA-VERSION-FILE)
-    SCYLLA_RELEASE=$(cat build/SCYLLA-RELEASE-FILE)
-    RPM=`ls build/rpms/scylla-tools-$SCYLLA_VERSION-$SCYLLA_RELEASE*.noarch.rpm`
-    cp $RPM $CWD/files/scylla-tools.rpm
-    cd -
-fi
-
 if [ ! -d packer ]; then
    wget https://dl.bintray.com/mitchellh/packer/packer_0.8.6_linux_amd64.zip
    mkdir packer
--- a/dist/ami/files/scylla-ami
+++ b/dist/ami/files/scylla-ami
--- a/Show More
+++ b/Show More