This pull request adds support for calculation and storing CRC32 digests for all SSTable components. This change replaces plain file_writer with crc32_digest_file_writer for all SSTable components that should be checksummed. The resulting component digests are stored in the sstable structure and later persisted to disk as part of the Scylla metadata component during writer::consume_end_of_stream. Several test cases where introduced to verify expected behaviour. Additionally, this PR adds new rewrite component mechanism for safe sstable component rewriting. Previously, rewriting an sstable component (e.g., via rewrite_statistics) created a temporary file that was renamed to the final name after sealing. This allowed crash recovery by simply removing the temporary file on startup. However, with component digests stored in scylla_metadata (#20100), replacing a component like Statistics requires atomically updating both the component and scylla_metadata with the new digest - impossible with POSIX rename. The new mechanism creates a clone sstable with a fresh generation: - Hard-links all components from the source except the component being rewritten and scylla_metadata - Copies original sstable components pointer and recognized components from the source - Invokes a modifier callback to adjust the new sstable before rewriting - Writes the modified component along with updated scylla_metadata containing the new digest - Seals the new sstable with a temporary TOC - Replaces the old sstable atomically, the same way as it is done in compaction This is built on the rewrite_sstables compaction framework to support batch operations (e.g., following incremental repair). In case of any failure durning the whole process, sstable will be automatically deleted on the node startup due to temporary toc persistence. Backport is not required, it is a new feature Fixes https://github.com/scylladb/scylladb/issues/20100, https://github.com/scylladb/scylladb/issues/27453 Closes scylladb/scylladb#28338 * github.com:scylladb/scylladb: docs: document components_digests subcomponent and trailing digest in Scylla.db sstable_compaction_test: Add tests for perform_component_rewrite sstable_test: add verification testcases of SSTable components digests persistance sstables: store digest of all sstable components in scylla metadata sstables: replace rewrite_statistics with new rewrite component mechanism sstables: add new rewrite component mechanism for safe sstable component rewriting compaction: add compaction_group_view method to specify sstable version sstables: add null_data_sink and serialized_checksum for checksum-only calculation sstables: extract default write open flags into a constant sstables: Add write_simple_with_digest for component checksumming sstables: Extract file writer closing logic into separate methods sstables: Implement CRC32 digest-only writer
80 lines
3.5 KiB
C++
80 lines
3.5 KiB
C++
/*
|
|
* Copyright (C) 2021-present ScyllaDB
|
|
*
|
|
*/
|
|
|
|
/*
|
|
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
|
|
*/
|
|
|
|
#pragma once
|
|
|
|
#include <seastar/core/condition-variable.hh>
|
|
|
|
#include "schema/schema_fwd.hh"
|
|
#include "sstables/open_info.hh"
|
|
#include "compaction_descriptor.hh"
|
|
|
|
class reader_permit;
|
|
|
|
namespace sstables {
|
|
class sstable_set;
|
|
class sstables_manager;
|
|
struct sstable_writer_config;
|
|
}
|
|
|
|
namespace compaction {
|
|
class compaction_strategy;
|
|
class compaction_strategy_state;
|
|
class compaction_backlog_tracker;
|
|
|
|
class compaction_group_view {
|
|
public:
|
|
virtual ~compaction_group_view() {}
|
|
virtual dht::token_range token_range() const noexcept = 0;
|
|
virtual const schema_ptr& schema() const noexcept = 0;
|
|
// min threshold as defined by table.
|
|
virtual unsigned min_compaction_threshold() const noexcept = 0;
|
|
virtual bool compaction_enforce_min_threshold() const noexcept = 0;
|
|
virtual future<lw_shared_ptr<const sstables::sstable_set>> main_sstable_set() const = 0;
|
|
virtual future<lw_shared_ptr<const sstables::sstable_set>> maintenance_sstable_set() const = 0;
|
|
virtual lw_shared_ptr<const sstables::sstable_set> sstable_set_for_tombstone_gc() const = 0;
|
|
virtual std::unordered_set<sstables::shared_sstable> fully_expired_sstables(const std::vector<sstables::shared_sstable>& sstables, gc_clock::time_point compaction_time) const = 0;
|
|
virtual const std::vector<sstables::shared_sstable>& compacted_undeleted_sstables() const noexcept = 0;
|
|
virtual compaction_strategy& get_compaction_strategy() const noexcept = 0;
|
|
virtual compaction_strategy_state& get_compaction_strategy_state() noexcept = 0;
|
|
virtual reader_permit make_compaction_reader_permit() const = 0;
|
|
virtual sstables::sstables_manager& get_sstables_manager() noexcept = 0;
|
|
virtual sstables::shared_sstable make_sstable(sstables::sstable_state) const = 0;
|
|
virtual sstables::shared_sstable make_sstable(sstables::sstable_state, sstables::sstable_version_types) const = 0;
|
|
virtual sstables::sstable_writer_config configure_writer(sstring origin) const = 0;
|
|
virtual api::timestamp_type min_memtable_timestamp() const = 0;
|
|
virtual api::timestamp_type min_memtable_live_timestamp() const = 0;
|
|
virtual api::timestamp_type min_memtable_live_row_marker_timestamp() const = 0;
|
|
virtual bool memtable_has_key(const dht::decorated_key& key) const = 0;
|
|
virtual future<> on_compaction_completion(compaction_completion_desc desc, sstables::offstrategy offstrategy) = 0;
|
|
virtual bool is_auto_compaction_disabled_by_user() const noexcept = 0;
|
|
virtual bool tombstone_gc_enabled() const noexcept = 0;
|
|
virtual tombstone_gc_state get_tombstone_gc_state() const noexcept = 0;
|
|
virtual compaction_backlog_tracker& get_backlog_tracker() = 0;
|
|
virtual const std::string get_group_id() const noexcept = 0;
|
|
virtual seastar::condition_variable& get_staging_done_condition() noexcept = 0;
|
|
virtual dht::token_range get_token_range_after_split(const dht::token& t) const noexcept = 0;
|
|
virtual int64_t get_sstables_repaired_at() const noexcept = 0;
|
|
};
|
|
|
|
} // namespace compaction
|
|
|
|
namespace fmt {
|
|
|
|
template <>
|
|
struct formatter<compaction::compaction_group_view> : formatter<string_view> {
|
|
template <typename FormatContext>
|
|
auto format(const compaction::compaction_group_view& t, FormatContext& ctx) const {
|
|
auto s = t.schema();
|
|
return fmt::format_to(ctx.out(), "{}.{} compaction_group={}", s->ks_name(), s->cf_name(), t.get_group_id());
|
|
}
|
|
};
|
|
|
|
} // namespace fmt
|