table: no longer accept online loading of SSTable files in the main directory
Loading SSTables from the main directory is possible, to be compatible with Cassandra, but extremely dangerous and not recommended. From the beginning, we recommend using an separate, upload/ directory. In all this time, perhaps due to how the feature's usefulness is reduced in Cassandra due to the possible races, I have never seen anyone coming from Cassandra doing procedures involving refresh at all. Loading SSTables from the main directory forces us to disable writes to the table temporarily until the SSTables are sorted out. If we get rid of this, we can get rid of the disabling of the writes as well. We can't do it now because if we want to be nice to the odd user that may be using refresh through the main directory without our knowledge we should at least error out. This patch, then, does that: it errors out if SSTables are found in the main directory. It will not proceed with the refresh, and direct the user to the upload directory. The main loop in reshuffle_sstables is left in place structurally for now, but most of it is gone. The test for is is deleted. After a period of deprecation we can start ignoring these SSTables and get rid of the lock. Signed-off-by: Glauber Costa <glauber@scylladb.com> Message-Id: <20200429144511.13681-1-glauber@scylladb.com>
This commit is contained in:
committed by
Avi Kivity
parent
e44b2826ab
commit
70e5252a5d
38
table.cc
38
table.cc
@@ -1037,14 +1037,9 @@ table::stop() {
|
||||
future<std::vector<sstables::entry_descriptor>>
|
||||
table::reshuffle_sstables(std::set<int64_t> all_generations, int64_t start) {
|
||||
struct work {
|
||||
int64_t current_gen;
|
||||
std::set<int64_t> all_generations; // Stores generation of all live sstables in the system.
|
||||
std::map<int64_t, sstables::shared_sstable> sstables;
|
||||
std::unordered_map<int64_t, sstables::entry_descriptor> descriptors;
|
||||
std::vector<sstables::entry_descriptor> reshuffled;
|
||||
work(int64_t start, std::set<int64_t> gens)
|
||||
: current_gen(start ? start : 1)
|
||||
, all_generations(gens) {}
|
||||
: all_generations(gens) {}
|
||||
};
|
||||
|
||||
return do_with(work(start, std::move(all_generations)), [this] (work& work) {
|
||||
@@ -1058,35 +1053,10 @@ table::reshuffle_sstables(std::set<int64_t> all_generations, int64_t start) {
|
||||
if (work.all_generations.count(comps.generation) != 0) {
|
||||
return make_ready_future<>();
|
||||
}
|
||||
auto sst = make_sstable(_config.datadir, comps.generation, comps.version, comps.format);
|
||||
work.sstables.emplace(comps.generation, std::move(sst));
|
||||
work.descriptors.emplace(comps.generation, std::move(comps));
|
||||
// FIXME: This is the only place in which we actually issue disk activity aside from
|
||||
// directory metadata operations.
|
||||
//
|
||||
// But without the TOC information, we don't know which files we should link.
|
||||
// The alternative to that would be to change create link to try creating a
|
||||
// link for all possible files and handling the failures gracefuly, but that's not
|
||||
// exactly fast either.
|
||||
//
|
||||
// Those SSTables are not known by anyone in the system. So we don't have any kind of
|
||||
// object describing them. There isn't too much of a choice.
|
||||
return work.sstables[comps.generation]->read_toc();
|
||||
return make_exception_future<>(std::runtime_error("Loading SSTables from the main SSTable directory is unsafe and no longer supported."
|
||||
" You will find a directory called upload/ inside the table directory that can be used to load new SSTables into the system"));
|
||||
}, &manifest_json_filter).then([&work] {
|
||||
// Note: cannot be parallel because we will be shuffling things around at this stage. Can't race.
|
||||
return do_for_each(work.sstables, [&work] (auto& pair) {
|
||||
auto&& comps = std::move(work.descriptors.at(pair.first));
|
||||
comps.generation = work.current_gen;
|
||||
work.reshuffled.push_back(std::move(comps));
|
||||
|
||||
if (pair.first == work.current_gen) {
|
||||
++work.current_gen;
|
||||
return make_ready_future<>();
|
||||
}
|
||||
return pair.second->set_generation(work.current_gen++);
|
||||
});
|
||||
}).then([&work] {
|
||||
return make_ready_future<std::vector<sstables::entry_descriptor>>(std::move(work.reshuffled));
|
||||
return make_ready_future<std::vector<sstables::entry_descriptor>>();
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
@@ -881,49 +881,6 @@ SEASTAR_TEST_CASE(set_generation) {
|
||||
});
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(reshuffle) {
|
||||
return test_setup::do_with_cloned_tmp_directory(uncompressed_dir(), [] (test_env& env, sstring uncompressed_dir, sstring generation_dir) {
|
||||
return env.reusable_sst(uncompressed_schema(), uncompressed_dir, 1).then([generation_dir] (auto sstp) {
|
||||
return sstp->create_links(generation_dir, 1).then([sstp, generation_dir] {
|
||||
return sstp->create_links(generation_dir, 5).then([sstp, generation_dir] {
|
||||
return sstp->create_links(generation_dir, 10);
|
||||
});
|
||||
}).then([sstp] {});
|
||||
}).then([generation_dir] {
|
||||
auto cm = make_lw_shared<compaction_manager>();
|
||||
cm->start();
|
||||
|
||||
auto tracker = make_lw_shared<cache_tracker>();
|
||||
column_family::config cfg = column_family_test_config();
|
||||
cfg.datadir = generation_dir;
|
||||
cfg.enable_commitlog = false;
|
||||
cfg.enable_incremental_backups = false;
|
||||
auto cl_stats = make_lw_shared<cell_locker_stats>();
|
||||
auto cf = make_lw_shared<column_family>(uncompressed_schema(), cfg, column_family::no_commitlog(), *cm, *cl_stats, *tracker);
|
||||
cf->start();
|
||||
cf->mark_ready_for_writes();
|
||||
std::set<int64_t> existing_sstables = { 1, 5 };
|
||||
return cf->reshuffle_sstables(existing_sstables, 6).then([cm, cf, generation_dir] (std::vector<sstables::entry_descriptor> reshuffled) {
|
||||
BOOST_REQUIRE(reshuffled.size() == 1);
|
||||
BOOST_REQUIRE(reshuffled[0].generation == 6);
|
||||
return when_all(
|
||||
test_sstable_exists(generation_dir, 1, true),
|
||||
test_sstable_exists(generation_dir, 2, false),
|
||||
test_sstable_exists(generation_dir, 3, false),
|
||||
test_sstable_exists(generation_dir, 4, false),
|
||||
test_sstable_exists(generation_dir, 5, true),
|
||||
test_sstable_exists(generation_dir, 6, true),
|
||||
test_sstable_exists(generation_dir, 10, false)
|
||||
).discard_result().then([cm] {
|
||||
return cm->stop();
|
||||
});
|
||||
}).then([cm, cf, cl_stats, tracker] () mutable {
|
||||
cf = { };
|
||||
});
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(statistics_rewrite) {
|
||||
return test_setup::do_with_cloned_tmp_directory(uncompressed_dir(), [] (test_env& env, sstring uncompressed_dir, sstring generation_dir) {
|
||||
return env.reusable_sst(uncompressed_schema(), uncompressed_dir, 1).then([generation_dir] (auto sstp) {
|
||||
|
||||
Reference in New Issue
Block a user