Files
scylladb/distributed_loader.hh
Glauber Costa 8021d12371 load_new_sstables: reshard before scanning the upload directory
In a later patch we will be able move files directly from upload
into the main directory. However for now, for the benefit of doing
this incrementally, we will first reshard in place with our new
reshard infrastructure.

load_new_sstables can then move the SSTables directly, without having
to worry about resharding. This has the immediate benefit that the
resharding happens:

- in the streaming group, without affecting compaction work
- without waiting for the current locks (which are held by compactions)
  in load_new_sstables to release.

We could, at this point, just move the SSTables to the main directory
right away.

I am not doing this in this patch, and opting to keep the rest of upload
process unchanged. This will be fixed later when we enable offstrategy
compactions: we'll then compact the SSTables generated into the main
directory.

Signed-off-by: Glauber Costa <glauber@scylladb.com>
2020-06-09 09:02:35 -04:00

94 lines
3.9 KiB
C++

/*
* Copyright (C) 2018 ScyllaDB
*/
/*
* This file is part of Scylla.
*
* Scylla is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Scylla is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include <seastar/core/future.hh>
#include <seastar/core/distributed.hh>
#include <seastar/core/sstring.hh>
#include <seastar/core/file.hh>
#include <vector>
#include <functional>
#include "seastarx.hh"
#include "sstables/compaction_descriptor.hh"
class database;
class table;
using column_family = table;
namespace db {
class system_distributed_keyspace;
namespace view {
class view_update_generator;
}
}
namespace sstables {
class entry_descriptor;
class foreign_sstable_open_info;
class sstable_directory;
}
namespace service {
class storage_proxy;
class migration_manager;
}
class distributed_loader {
public:
static future<> reshard(sharded<sstables::sstable_directory>& dir, sharded<database>& db, sstring ks_name, sstring table_name, sstables::compaction_sstable_creator_fn creator);
static future<> process_sstable_dir(sharded<sstables::sstable_directory>& dir);
static future<> lock_table(sharded<sstables::sstable_directory>& dir, sharded<database>& db, sstring ks_name, sstring cf_name);
static void reshard(distributed<database>& db, sstring ks_name, sstring cf_name);
static future<> open_sstable(distributed<database>& db, sstables::entry_descriptor comps,
std::function<future<> (column_family&, sstables::foreign_sstable_open_info)> func,
const io_priority_class& pc = default_priority_class());
static future<> verify_owner_and_mode(std::filesystem::path path);
static future<> load_new_sstables(distributed<database>& db, distributed<db::view::view_update_generator>& view_update_generator,
sstring ks, sstring cf, std::vector<sstables::entry_descriptor> new_tables);
static future<std::vector<sstables::entry_descriptor>> flush_upload_dir(distributed<database>& db, distributed<db::system_distributed_keyspace>& sys_dist_ks, sstring ks_name, sstring cf_name);
static future<> process_upload_dir(distributed<database>& db, sstring ks_name, sstring cf_name);
static future<sstables::entry_descriptor> probe_file(distributed<database>& db, sstring sstdir, sstring fname);
static future<> populate_column_family(distributed<database>& db, sstring sstdir, sstring ks, sstring cf);
static future<> populate_keyspace(distributed<database>& db, sstring datadir, sstring ks_name);
static future<> init_system_keyspace(distributed<database>& db);
static future<> ensure_system_table_directories(distributed<database>& db);
static future<> init_non_system_keyspaces(distributed<database>& db, distributed<service::storage_proxy>& proxy, distributed<service::migration_manager>& mm);
/**
* Marks a keyspace (by name) as "prioritized" on bootstrap.
* This will effectively let it bypass concurrency control.
* The only real use for this is to avoid certain chicken and
* egg issues.
*
* May only be called pre-bootstrap on main shard.
*/
static void mark_keyspace_as_load_prio(const sstring&);
private:
static future<> cleanup_column_family_temp_sst_dirs(sstring sstdir);
static future<> handle_sstables_pending_delete(sstring pending_deletes_dir);
static future<> do_populate_column_family(distributed<database>& db, sstring sstdir, sstring ks, sstring cf);
};