From 9083a0e5a7bc15528e483e32e984897dd0357381 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Sat, 17 Oct 2015 13:27:13 +0200 Subject: [PATCH 1/2] snapshots: fix generation of snapshots with shared sstables create_links will fail in one of the shards if one of the SSTables happen to be shared. It should be fine if the link already exists, so let's just ignore that case. Signed-off-by: Glauber Costa --- database.cc | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/database.cc b/database.cc index b0e8e73db0..9daad806a1 100644 --- a/database.cc +++ b/database.cc @@ -1688,7 +1688,18 @@ future<> column_family::snapshot(sstring name) { return parallel_for_each(tables, [name](sstables::shared_sstable sstable) { auto dir = sstable->get_dir() + "/snapshots/" + name; return recursive_touch_directory(dir).then([sstable, dir] { - return sstable->create_links(dir); + return sstable->create_links(dir).then_wrapped([] (future<> f) { + // If the SSTables are shared, one of the CPUs will fail here. + // That is completely fine, though. We only need one link. + try { + f.get(); + } catch (std::system_error& e) { + if (e.code() != std::error_code(EEXIST, std::system_category())) { + throw; + } + } + return make_ready_future<>(); + }); }); }).then([jsondir, &tables] { // This is not just an optimization. If we have no files, jsondir may not have been created, From 218fdebbeb26eeae0bbd91c71dd2113519729510 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Sat, 17 Oct 2015 13:34:25 +0200 Subject: [PATCH 2/2] snapshot: do not allow exceptions in snapshot creation hang us With the distribute-and-sync method we are using, if an exception happens in the snapshot creation for any reason (think file permissions, etc), that will just hang the server since our shard won't do the necessary work to synchronize and note that we done our part (or tried to) in snapshot creation. Make the then clause a finally, so that the sync part is always executed. Signed-off-by: Glauber Costa --- database.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/database.cc b/database.cc index 9daad806a1..e98765141d 100644 --- a/database.cc +++ b/database.cc @@ -1709,7 +1709,7 @@ future<> column_family::snapshot(sstring name) { } else { return make_ready_future<>(); } - }).then([this, &tables, jsondir] { + }).finally([this, &tables, jsondir] { auto shard = std::hash()(jsondir) % smp::count; std::unordered_set table_names; for (auto& sst : tables) {