Files
scylladb/sstables/storage.hh
Taras Veretilnyk 51c345aaf6 sstables: add new rewrite component mechanism for safe sstable component rewriting
Previously, rewriting an sstable component (e.g., via rewrite_statistics) created a temporary file that was renamed
to the final name after sealing. This allows crash recovery by simply removing the temporary file on startup.

However, this approach won't work once component digests are stored in scylla_metadata,
as replacing a component like Statistics will require atomically updating both the component
and scylla_metadata with the new digest—impossible with POSIX rename.

The new mechanism creates a clone sstable with a fresh generation:
- Hard-links all components from the source except the component being rewritten and scylla metadata if update_sstable_id is true
- Copies original sstable components pointer and recognized components from the source
- Invokes a modifier callback to adjust the new sstable before rewriting
- Writes the modified component. If update_sstable_id is true, reads scylla metadata, generates new sstable_id and rewrites it.
- Seals the new sstable with a temporary TOC
- Replaces the old sstable atomically, the same way as it is done in compaction

This is built on the rewrite_sstables compaction framework to support batch operations (e.g., following incremental repair).
In case of any failure during the whole process, sstable will be automatically deleted on the node startup due to
temporary toc persistence.

This prepares the infrastructure for component digests. Once digests are introduced in scylla_metadata
this mechanism will be extended to also rewrite scylla metadata with the updated digest alongside the modified component, ensuring atomic updates of both.
2026-02-26 22:38:55 +01:00

137 lines
5.6 KiB
C++

/*
* Copyright (C) 2015-present ScyllaDB
*
*/
/*
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
*/
#pragma once
#include "utils/assert.hh"
#include <filesystem>
#include <seastar/core/file.hh>
#include <seastar/core/fstream.hh>
#include <seastar/core/future.hh>
#include <seastar/core/reactor.hh>
#include "data_dictionary/storage_options.hh"
#include "seastarx.hh"
#include "sstables/shared_sstable.hh"
#include "sstables/component_type.hh"
#include "sstables/generation_type.hh"
#include "utils/disk-error-handler.hh"
class schema;
namespace data_dictionary {
class storage_options;
}
namespace db { class config; }
namespace sstables {
enum class sstable_state;
class delayed_commit_changes;
class sstable;
class sstables_manager;
class entry_descriptor;
struct atomic_delete_context {
sstring pending_delete_log;
std::unordered_set<sstring> prefixes;
};
class opened_directory final {
std::filesystem::path _pathname;
file _file;
public:
explicit opened_directory(std::filesystem::path pathname) : _pathname(std::move(pathname)) {};
explicit opened_directory(const sstring &dir) : _pathname(std::string_view(dir)) {};
opened_directory(const opened_directory&) = delete;
opened_directory& operator=(const opened_directory&) = delete;
opened_directory(opened_directory&&) = default;
opened_directory& operator=(opened_directory&&) = default;
~opened_directory() = default;
const std::filesystem::path::string_type& native() const noexcept {
return _pathname.native();
}
const std::filesystem::path& path() const noexcept {
return _pathname;
}
future<> sync(io_error_handler error_handler) {
if (!_file) {
_file = co_await do_io_check(error_handler, open_directory, _pathname.native());
}
co_await do_io_check(error_handler, std::mem_fn(&file::flush), _file);
};
future<> close() {
return _file ? _file.close() : make_ready_future<>();
}
};
class storage {
friend class test;
// Internal, but can also be used by tests
virtual future<> change_dir_for_test(sstring nd) {
SCYLLA_ASSERT(false && "Changing directory not implemented");
}
virtual future<> create_links(const sstable& sst, const std::filesystem::path& dir) const {
SCYLLA_ASSERT(false && "Direct links creation not implemented");
}
virtual future<> move(const sstable& sst, sstring new_dir, generation_type generation, delayed_commit_changes* delay) {
SCYLLA_ASSERT(false && "Direct move not implemented");
}
public:
// Clone an sstable to a new generation, hard-linking all components except those in excluded_components.
// The new sstable is created with a TemporaryTOC, so it will be removed on restart if not sealed.
virtual future<> link_with_excluded_components(const sstable& sst, generation_type new_gen,
const std::unordered_set<component_type>& excluded_components) const {
SCYLLA_ASSERT(false && "link_with_excluded_components not implemented");
}
virtual ~storage() {}
using sync_dir = bool_class<struct sync_dir_tag>; // meaningful only to filesystem storage
virtual future<> seal(const sstable& sst) = 0;
virtual future<> snapshot(const sstable& sst, sstring name) const = 0;
virtual future<> clone(const sstable& sst, generation_type gen, bool leave_unsealed) const = 0;
virtual future<> change_state(const sstable& sst, sstable_state to, generation_type generation, delayed_commit_changes* delay) = 0;
// runs in async context
virtual void open(sstable& sst) = 0;
virtual future<> wipe(const sstable& sst, sync_dir) noexcept = 0;
virtual future<file> open_component(const sstable& sst, component_type type, open_flags flags, file_open_options options, bool check_integrity) = 0;
virtual future<data_sink> make_data_or_index_sink(sstable& sst, component_type type) = 0;
virtual future<data_source> make_data_or_index_source(sstable& sst, component_type type, file f, uint64_t offset, uint64_t len, file_input_stream_options opt) const = 0;
virtual future<data_source> make_source(sstable& sst, component_type type, file f, uint64_t offset, uint64_t len, file_input_stream_options opt) const = 0;
virtual future<data_sink> make_component_sink(sstable& sst, component_type type, open_flags oflags, file_output_stream_options options) = 0;
virtual future<> destroy(const sstable& sst) = 0;
virtual future<atomic_delete_context> atomic_delete_prepare(const std::vector<shared_sstable>&) const = 0;
virtual future<> atomic_delete_complete(atomic_delete_context ctx) const = 0;
virtual future<> remove_by_registry_entry(entry_descriptor desc) = 0;
// Free space available in the underlying storage.
virtual future<uint64_t> free_space() const = 0;
virtual future<> unlink_component(const sstable& sst, component_type) noexcept = 0;
virtual sstring prefix() const = 0;
};
std::unique_ptr<sstables::storage> make_storage(sstables_manager& manager, const data_dictionary::storage_options& s_opts, sstable_state state);
future<lw_shared_ptr<const data_dictionary::storage_options>> init_table_storage(const sstables_manager&, const schema&, const data_dictionary::storage_options& so);
future<> destroy_table_storage(const data_dictionary::storage_options& so);
future<> init_keyspace_storage(const sstables_manager&, const data_dictionary::storage_options& so, sstring ks_name);
std::vector<std::filesystem::path> get_local_directories(const std::vector<sstring>& data_file_directories, const data_dictionary::storage_options::local& so);
} // namespace sstables