table: no longer accept online loading of SSTable files in the main directory

Loading SSTables from the main directory is possible, to be compatible with
Cassandra, but extremely dangerous and not recommended.

From the beginning, we recommend using an separate, upload/ directory.
In all this time, perhaps due to how the feature's usefulness is reduced
in Cassandra due to the possible races, I have never seen anyone coming
from Cassandra doing procedures involving refresh at all.

Loading SSTables from the main directory forces us to disable writes to
the table temporarily until the SSTables are sorted out. If we get rid of
this, we can get rid of the disabling of the writes as well.

We can't do it now because if we want to be nice to the odd user that may
be using refresh through the main directory without our knowledge we should
at least error out.

This patch, then, does that: it errors out if SSTables are found in the main
directory. It will not proceed with the refresh, and direct the user to the
upload directory.

The main loop in reshuffle_sstables is left in place structurally for now, but
most of it is gone. The test for is is deleted.

After a period of deprecation we can start ignoring these SSTables and get rid
of the lock.

Signed-off-by: Glauber Costa <glauber@scylladb.com>
Message-Id: <20200429144511.13681-1-glauber@scylladb.com>
This commit is contained in:
Glauber Costa
2020-04-29 10:45:11 -04:00
committed by Avi Kivity
parent e44b2826ab
commit 70e5252a5d
2 changed files with 4 additions and 77 deletions

View File

@@ -1037,14 +1037,9 @@ table::stop() {
future<std::vector<sstables::entry_descriptor>>
table::reshuffle_sstables(std::set<int64_t> all_generations, int64_t start) {
struct work {
int64_t current_gen;
std::set<int64_t> all_generations; // Stores generation of all live sstables in the system.
std::map<int64_t, sstables::shared_sstable> sstables;
std::unordered_map<int64_t, sstables::entry_descriptor> descriptors;
std::vector<sstables::entry_descriptor> reshuffled;
work(int64_t start, std::set<int64_t> gens)
: current_gen(start ? start : 1)
, all_generations(gens) {}
: all_generations(gens) {}
};
return do_with(work(start, std::move(all_generations)), [this] (work& work) {
@@ -1058,35 +1053,10 @@ table::reshuffle_sstables(std::set<int64_t> all_generations, int64_t start) {
if (work.all_generations.count(comps.generation) != 0) {
return make_ready_future<>();
}
auto sst = make_sstable(_config.datadir, comps.generation, comps.version, comps.format);
work.sstables.emplace(comps.generation, std::move(sst));
work.descriptors.emplace(comps.generation, std::move(comps));
// FIXME: This is the only place in which we actually issue disk activity aside from
// directory metadata operations.
//
// But without the TOC information, we don't know which files we should link.
// The alternative to that would be to change create link to try creating a
// link for all possible files and handling the failures gracefuly, but that's not
// exactly fast either.
//
// Those SSTables are not known by anyone in the system. So we don't have any kind of
// object describing them. There isn't too much of a choice.
return work.sstables[comps.generation]->read_toc();
return make_exception_future<>(std::runtime_error("Loading SSTables from the main SSTable directory is unsafe and no longer supported."
" You will find a directory called upload/ inside the table directory that can be used to load new SSTables into the system"));
}, &manifest_json_filter).then([&work] {
// Note: cannot be parallel because we will be shuffling things around at this stage. Can't race.
return do_for_each(work.sstables, [&work] (auto& pair) {
auto&& comps = std::move(work.descriptors.at(pair.first));
comps.generation = work.current_gen;
work.reshuffled.push_back(std::move(comps));
if (pair.first == work.current_gen) {
++work.current_gen;
return make_ready_future<>();
}
return pair.second->set_generation(work.current_gen++);
});
}).then([&work] {
return make_ready_future<std::vector<sstables::entry_descriptor>>(std::move(work.reshuffled));
return make_ready_future<std::vector<sstables::entry_descriptor>>();
});
});
}

View File

@@ -881,49 +881,6 @@ SEASTAR_TEST_CASE(set_generation) {
});
}
SEASTAR_TEST_CASE(reshuffle) {
return test_setup::do_with_cloned_tmp_directory(uncompressed_dir(), [] (test_env& env, sstring uncompressed_dir, sstring generation_dir) {
return env.reusable_sst(uncompressed_schema(), uncompressed_dir, 1).then([generation_dir] (auto sstp) {
return sstp->create_links(generation_dir, 1).then([sstp, generation_dir] {
return sstp->create_links(generation_dir, 5).then([sstp, generation_dir] {
return sstp->create_links(generation_dir, 10);
});
}).then([sstp] {});
}).then([generation_dir] {
auto cm = make_lw_shared<compaction_manager>();
cm->start();
auto tracker = make_lw_shared<cache_tracker>();
column_family::config cfg = column_family_test_config();
cfg.datadir = generation_dir;
cfg.enable_commitlog = false;
cfg.enable_incremental_backups = false;
auto cl_stats = make_lw_shared<cell_locker_stats>();
auto cf = make_lw_shared<column_family>(uncompressed_schema(), cfg, column_family::no_commitlog(), *cm, *cl_stats, *tracker);
cf->start();
cf->mark_ready_for_writes();
std::set<int64_t> existing_sstables = { 1, 5 };
return cf->reshuffle_sstables(existing_sstables, 6).then([cm, cf, generation_dir] (std::vector<sstables::entry_descriptor> reshuffled) {
BOOST_REQUIRE(reshuffled.size() == 1);
BOOST_REQUIRE(reshuffled[0].generation == 6);
return when_all(
test_sstable_exists(generation_dir, 1, true),
test_sstable_exists(generation_dir, 2, false),
test_sstable_exists(generation_dir, 3, false),
test_sstable_exists(generation_dir, 4, false),
test_sstable_exists(generation_dir, 5, true),
test_sstable_exists(generation_dir, 6, true),
test_sstable_exists(generation_dir, 10, false)
).discard_result().then([cm] {
return cm->stop();
});
}).then([cm, cf, cl_stats, tracker] () mutable {
cf = { };
});
});
});
}
SEASTAR_TEST_CASE(statistics_rewrite) {
return test_setup::do_with_cloned_tmp_directory(uncompressed_dir(), [] (test_env& env, sstring uncompressed_dir, sstring generation_dir) {
return env.reusable_sst(uncompressed_schema(), uncompressed_dir, 1).then([generation_dir] (auto sstp) {