From 0776ca1c5240c5f918b775fb48693dec8afdb49b Mon Sep 17 00:00:00 2001
From: Glauber Costa <glommer@scylladb.com>
Date: Thu, 8 Oct 2015 16:39:17 +0200
Subject: [PATCH 1/4] snapshots: don't hash pending snapshots by snapshot name

If we are hashing more than one CF, the snapshot themselves will all have the same name.
This will cause the files from one of them to spill towards the other when writing the manifest.

The proper hash is the jsondir: that one is unique per manifest file.

Signed-off-by: Glauber Costa <glommer@scylladb.com>
---
 database.cc | 29 +++++++++++++++--------------
 1 file changed, 15 insertions(+), 14 deletions(-)
diff --git a/database.cc b/database.cc
index dc956d6be7..0ae0d5358b 100644
--- a/database.cc
+++ b/database.cc
@@ -1658,11 +1658,11 @@ struct snapshot_manager {
 static thread_local std::unordered_map<sstring, lw_shared_ptr<snapshot_manager>> pending_snapshots;
 
 static future<>
-seal_snapshot(sstring jsondir, std::unordered_set<sstring> tables) {
+seal_snapshot(sstring jsondir) {
     std::ostringstream ss;
     int n = 0;
     ss << "{" << std::endl << "\t\"files\" : { ";
-    for (auto&& rf: tables) {
+    for (auto&& rf: pending_snapshots.at(jsondir)->files) {
         if (n++ > 0) {
             ss << ", ";
         }
@@ -1683,8 +1683,11 @@ seal_snapshot(sstring jsondir, std::unordered_set<sstring> tables) {
                return out.close();
             });
         });
-    }).then([jsondir = std::move(jsondir)] {
+    }).then([jsondir] {
         return sync_directory(std::move(jsondir));
+    }).finally([jsondir] {
+        pending_snapshots.erase(jsondir);
+        return make_ready_future<>();
     });
 }
 
@@ -1704,20 +1707,21 @@ future<> column_family::snapshot(sstring name) {
                 });
             }).then([jsondir] {
                 return sync_directory(std::move(jsondir));
-            }).then([this, &tables, name, jsondir] {
-                auto shard = std::hash<sstring>()(name) % smp::count;
+            }).then([this, &tables, jsondir] {
+                auto shard = std::hash<sstring>()(jsondir) % smp::count;
                 std::unordered_set<sstring> table_names;
                 for (auto& sst : tables) {
                     auto f = sst->get_filename();
                     auto rf = f.substr(sst->get_dir().size() + 1);
                     table_names.insert(std::move(rf));
                 }
-                return smp::submit_to(shard, [requester = engine().cpu_id(), name = std::move(name),
+                return smp::submit_to(shard, [requester = engine().cpu_id(), jsondir = std::move(jsondir),
                                               tables = std::move(table_names), datadir = _config.datadir] {
-                    if (pending_snapshots.count(name) == 0) {
-                        pending_snapshots.emplace(name, make_lw_shared<snapshot_manager>());
+
+                    if (pending_snapshots.count(jsondir) == 0) {
+                        pending_snapshots.emplace(jsondir, make_lw_shared<snapshot_manager>());
                     }
-                    auto snapshot = pending_snapshots.at(name);
+                    auto snapshot = pending_snapshots.at(jsondir);
                     for (auto&& sst: tables) {
                         snapshot->files.insert(std::move(sst));
                     }
@@ -1725,13 +1729,10 @@ future<> column_family::snapshot(sstring name) {
                     snapshot->requests.signal(1);
                     auto my_work = make_ready_future<>();
                     if (requester == engine().cpu_id()) {
-                        auto jsondir = datadir + "/snapshots/" + name;
                         my_work = snapshot->requests.wait(smp::count).then([jsondir = std::move(jsondir),
-                                                                         snapshot,
-                                                                         name = std::move(name)] () mutable {
-                            return seal_snapshot(jsondir, std::move(snapshot->files)).then([name = std::move(name), snapshot] {
+                                                                            snapshot] () mutable {
+                            return seal_snapshot(jsondir).then([snapshot] {
                                 snapshot->manifest_write.signal(smp::count);
-                                pending_snapshots.erase(name);
                                 return make_ready_future<>();
                             });
                         });

From efdfc78c0c40aa9589732df53331560a8bb732cb Mon Sep 17 00:00:00 2001
From: Glauber Costa <glommer@scylladb.com>
Date: Thu, 8 Oct 2015 16:43:05 +0200
Subject: [PATCH 2/4] snapshots: get rid of empty tables optimization

We currently have one optimization that returns early when there are no tables
to be snapshotted.

However, because of the way we are writing the manifest now, this will cause
the shard that happens to have tables to be waiting forever. So we should get
rid of it. All shards need to pass through the synchronization point.

Signed-off-by: Glauber Costa <glommer@scylladb.com>
---
 database.cc | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/database.cc b/database.cc
index 0ae0d5358b..c468effc41 100644
--- a/database.cc
+++ b/database.cc
@@ -1694,9 +1694,6 @@ seal_snapshot(sstring jsondir) {
 future<> column_family::snapshot(sstring name) {
     return flush().then([this, name = std::move(name)]() {
         auto tables = boost::copy_range<std::vector<sstables::shared_sstable>>(*_sstables | boost::adaptors::map_values);
-        if (tables.size() == 0) {
-            return make_ready_future<>();
-        }
         return do_with(std::move(tables), [this, name](std::vector<sstables::shared_sstable> & tables) {
             auto jsondir = _config.datadir + "/snapshots/" + name;
 
@@ -1705,8 +1702,14 @@ future<> column_family::snapshot(sstring name) {
                 return recursive_touch_directory(dir).then([sstable, dir] {
                     return sstable->create_links(dir);
                 });
-            }).then([jsondir] {
-                return sync_directory(std::move(jsondir));
+            }).then([jsondir, &tables] {
+                // This is not just an optimization. If we have no files, jsondir may not have been created,
+                // and sync_directory would throw.
+                if (tables.size()) {
+                    return sync_directory(std::move(jsondir));
+                } else {
+                    return make_ready_future<>();
+                }
             }).then([this, &tables, jsondir] {
                 auto shard = std::hash<sstring>()(jsondir) % smp::count;
                 std::unordered_set<sstring> table_names;

From cc343eb928aeb9d336c1745f7bc9831d2b355f42 Mon Sep 17 00:00:00 2001
From: Glauber Costa <glommer@scylladb.com>
Date: Thu, 8 Oct 2015 16:45:33 +0200
Subject: [PATCH 3/4] snapshots: handle jsondir creation for empty files case

We still need to write a manifest when there are no files in the snapshot.
But because we have never reached the touch_directory part in the sstables
loop for that case, nobody would have created jsondir in that case.

Since now all the file handling is done in the seal_snapshot phase, we should
just make sure the directory exists before initiating any other disk activity.

Signed-off-by: Glauber Costa <glommer@scylladb.com>
---
 database.cc | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/database.cc b/database.cc
index c468effc41..6520da8f64 100644
--- a/database.cc
+++ b/database.cc
@@ -1675,12 +1675,14 @@ seal_snapshot(sstring jsondir) {
 
     dblog.debug("Storing manifest {}", jsonfile);
 
-    return engine().open_file_dma(jsonfile, open_flags::wo | open_flags::create | open_flags::truncate).then([json](file f) {
-        return do_with(make_file_output_stream(std::move(f)), [json] (output_stream<char>& out) {
-            return out.write(json.c_str(), json.size()).then([&out] {
-               return out.flush();
-            }).then([&out] {
-               return out.close();
+    return recursive_touch_directory(jsondir).then([jsonfile, json = std::move(json)] {
+        return engine().open_file_dma(jsonfile, open_flags::wo | open_flags::create | open_flags::truncate).then([json](file f) {
+            return do_with(make_file_output_stream(std::move(f)), [json] (output_stream<char>& out) {
+                return out.write(json.c_str(), json.size()).then([&out] {
+                   return out.flush();
+                }).then([&out] {
+                   return out.close();
+                });
             });
         });
     }).then([jsondir] {

From 1549a438234d95bd154ecdae92e45b74cf3f777a Mon Sep 17 00:00:00 2001
From: Glauber Costa <glommer@scylladb.com>
Date: Thu, 8 Oct 2015 16:50:42 +0200
Subject: [PATCH 4/4] snapshots: fix json type

We are generating a general object ({}), whereas Cassandra 2.1.x generates an
array ([]). Let's do that as well to avoid surprising parsers.

Signed-off-by: Glauber Costa <glommer@scylladb.com>
---
 database.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/database.cc b/database.cc
index 6520da8f64..8ad0c2f917 100644
--- a/database.cc
+++ b/database.cc
@@ -1661,14 +1661,14 @@ static future<>
 seal_snapshot(sstring jsondir) {
     std::ostringstream ss;
     int n = 0;
-    ss << "{" << std::endl << "\t\"files\" : { ";
+    ss << "{" << std::endl << "\t\"files\" : [ ";
     for (auto&& rf: pending_snapshots.at(jsondir)->files) {
         if (n++ > 0) {
             ss << ", ";
         }
         ss << "\"" << rf << "\"";
     }
-    ss << " }" << std::endl << "}" << std::endl;
+    ss << " ]" << std::endl << "}" << std::endl;
 
     auto json = ss.str();
     auto jsonfile = jsondir + "/manifest.json";