From 0776ca1c5240c5f918b775fb48693dec8afdb49b Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Thu, 8 Oct 2015 16:39:17 +0200 Subject: [PATCH 1/4] snapshots: don't hash pending snapshots by snapshot name If we are hashing more than one CF, the snapshot themselves will all have the same name. This will cause the files from one of them to spill towards the other when writing the manifest. The proper hash is the jsondir: that one is unique per manifest file. Signed-off-by: Glauber Costa --- database.cc | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/database.cc b/database.cc index dc956d6be7..0ae0d5358b 100644 --- a/database.cc +++ b/database.cc @@ -1658,11 +1658,11 @@ struct snapshot_manager { static thread_local std::unordered_map> pending_snapshots; static future<> -seal_snapshot(sstring jsondir, std::unordered_set tables) { +seal_snapshot(sstring jsondir) { std::ostringstream ss; int n = 0; ss << "{" << std::endl << "\t\"files\" : { "; - for (auto&& rf: tables) { + for (auto&& rf: pending_snapshots.at(jsondir)->files) { if (n++ > 0) { ss << ", "; } @@ -1683,8 +1683,11 @@ seal_snapshot(sstring jsondir, std::unordered_set tables) { return out.close(); }); }); - }).then([jsondir = std::move(jsondir)] { + }).then([jsondir] { return sync_directory(std::move(jsondir)); + }).finally([jsondir] { + pending_snapshots.erase(jsondir); + return make_ready_future<>(); }); } @@ -1704,20 +1707,21 @@ future<> column_family::snapshot(sstring name) { }); }).then([jsondir] { return sync_directory(std::move(jsondir)); - }).then([this, &tables, name, jsondir] { - auto shard = std::hash()(name) % smp::count; + }).then([this, &tables, jsondir] { + auto shard = std::hash()(jsondir) % smp::count; std::unordered_set table_names; for (auto& sst : tables) { auto f = sst->get_filename(); auto rf = f.substr(sst->get_dir().size() + 1); table_names.insert(std::move(rf)); } - return smp::submit_to(shard, [requester = engine().cpu_id(), name = std::move(name), + return smp::submit_to(shard, [requester = engine().cpu_id(), jsondir = std::move(jsondir), tables = std::move(table_names), datadir = _config.datadir] { - if (pending_snapshots.count(name) == 0) { - pending_snapshots.emplace(name, make_lw_shared()); + + if (pending_snapshots.count(jsondir) == 0) { + pending_snapshots.emplace(jsondir, make_lw_shared()); } - auto snapshot = pending_snapshots.at(name); + auto snapshot = pending_snapshots.at(jsondir); for (auto&& sst: tables) { snapshot->files.insert(std::move(sst)); } @@ -1725,13 +1729,10 @@ future<> column_family::snapshot(sstring name) { snapshot->requests.signal(1); auto my_work = make_ready_future<>(); if (requester == engine().cpu_id()) { - auto jsondir = datadir + "/snapshots/" + name; my_work = snapshot->requests.wait(smp::count).then([jsondir = std::move(jsondir), - snapshot, - name = std::move(name)] () mutable { - return seal_snapshot(jsondir, std::move(snapshot->files)).then([name = std::move(name), snapshot] { + snapshot] () mutable { + return seal_snapshot(jsondir).then([snapshot] { snapshot->manifest_write.signal(smp::count); - pending_snapshots.erase(name); return make_ready_future<>(); }); }); From efdfc78c0c40aa9589732df53331560a8bb732cb Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Thu, 8 Oct 2015 16:43:05 +0200 Subject: [PATCH 2/4] snapshots: get rid of empty tables optimization We currently have one optimization that returns early when there are no tables to be snapshotted. However, because of the way we are writing the manifest now, this will cause the shard that happens to have tables to be waiting forever. So we should get rid of it. All shards need to pass through the synchronization point. Signed-off-by: Glauber Costa --- database.cc | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/database.cc b/database.cc index 0ae0d5358b..c468effc41 100644 --- a/database.cc +++ b/database.cc @@ -1694,9 +1694,6 @@ seal_snapshot(sstring jsondir) { future<> column_family::snapshot(sstring name) { return flush().then([this, name = std::move(name)]() { auto tables = boost::copy_range>(*_sstables | boost::adaptors::map_values); - if (tables.size() == 0) { - return make_ready_future<>(); - } return do_with(std::move(tables), [this, name](std::vector & tables) { auto jsondir = _config.datadir + "/snapshots/" + name; @@ -1705,8 +1702,14 @@ future<> column_family::snapshot(sstring name) { return recursive_touch_directory(dir).then([sstable, dir] { return sstable->create_links(dir); }); - }).then([jsondir] { - return sync_directory(std::move(jsondir)); + }).then([jsondir, &tables] { + // This is not just an optimization. If we have no files, jsondir may not have been created, + // and sync_directory would throw. + if (tables.size()) { + return sync_directory(std::move(jsondir)); + } else { + return make_ready_future<>(); + } }).then([this, &tables, jsondir] { auto shard = std::hash()(jsondir) % smp::count; std::unordered_set table_names; From cc343eb928aeb9d336c1745f7bc9831d2b355f42 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Thu, 8 Oct 2015 16:45:33 +0200 Subject: [PATCH 3/4] snapshots: handle jsondir creation for empty files case We still need to write a manifest when there are no files in the snapshot. But because we have never reached the touch_directory part in the sstables loop for that case, nobody would have created jsondir in that case. Since now all the file handling is done in the seal_snapshot phase, we should just make sure the directory exists before initiating any other disk activity. Signed-off-by: Glauber Costa --- database.cc | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/database.cc b/database.cc index c468effc41..6520da8f64 100644 --- a/database.cc +++ b/database.cc @@ -1675,12 +1675,14 @@ seal_snapshot(sstring jsondir) { dblog.debug("Storing manifest {}", jsonfile); - return engine().open_file_dma(jsonfile, open_flags::wo | open_flags::create | open_flags::truncate).then([json](file f) { - return do_with(make_file_output_stream(std::move(f)), [json] (output_stream& out) { - return out.write(json.c_str(), json.size()).then([&out] { - return out.flush(); - }).then([&out] { - return out.close(); + return recursive_touch_directory(jsondir).then([jsonfile, json = std::move(json)] { + return engine().open_file_dma(jsonfile, open_flags::wo | open_flags::create | open_flags::truncate).then([json](file f) { + return do_with(make_file_output_stream(std::move(f)), [json] (output_stream& out) { + return out.write(json.c_str(), json.size()).then([&out] { + return out.flush(); + }).then([&out] { + return out.close(); + }); }); }); }).then([jsondir] { From 1549a438234d95bd154ecdae92e45b74cf3f777a Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Thu, 8 Oct 2015 16:50:42 +0200 Subject: [PATCH 4/4] snapshots: fix json type We are generating a general object ({}), whereas Cassandra 2.1.x generates an array ([]). Let's do that as well to avoid surprising parsers. Signed-off-by: Glauber Costa --- database.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/database.cc b/database.cc index 6520da8f64..8ad0c2f917 100644 --- a/database.cc +++ b/database.cc @@ -1661,14 +1661,14 @@ static future<> seal_snapshot(sstring jsondir) { std::ostringstream ss; int n = 0; - ss << "{" << std::endl << "\t\"files\" : { "; + ss << "{" << std::endl << "\t\"files\" : [ "; for (auto&& rf: pending_snapshots.at(jsondir)->files) { if (n++ > 0) { ss << ", "; } ss << "\"" << rf << "\""; } - ss << " }" << std::endl << "}" << std::endl; + ss << " ]" << std::endl << "}" << std::endl; auto json = ss.str(); auto jsonfile = jsondir + "/manifest.json";