mirror of
https://github.com/scylladb/scylladb.git
synced 2026-04-24 02:20:37 +00:00
For tombstone expiration to proceed correctly without the risk of resurrecting data, the sstable set must be present. Regular compaction and derivatives provide the sstable set, so they're able to expire tombstones with no resurrection risk. Resharding, on the other hand, can run on any shard, not necessarily on the same shard that one of the input sstables belongs to, so it currently cannot provide a sstable set for tombstone expiration to proceed safely. That being said, let's only do expiration based on the presence of the set. This makes room for the sstable set to be feeded to compaction via descriptor, allowing even resharding to do expiration. Currently, compaction thinks that sstable set can only come from the table, and that also needs to be changed for further flexibility. It's theoretically possible that a given resharding job will resurrect data if a fully expired SSTable is resharded at a shard which it doesn't belong to. Resharding will have no way to tell that expiring all that data will lead to resurrection because the relevant SSTables are at different shards. This is fixed by checking for fully expired sstables only on presence of the sstable set. Fixes #6600. Signed-off-by: Raphael S. Carvalho <raphaelsc@scylladb.com> Message-Id: <20200605200954.24696-1-raphaelsc@scylladb.com>
252 lines
9.3 KiB
C++
252 lines
9.3 KiB
C++
/*
|
|
* Copyright (C) 2015 ScyllaDB
|
|
*
|
|
*/
|
|
|
|
/*
|
|
* This file is part of Scylla.
|
|
*
|
|
* Scylla is free software: you can redistribute it and/or modify
|
|
* it under the terms of the GNU Affero General Public License as published by
|
|
* the Free Software Foundation, either version 3 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* Scylla is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
#pragma once
|
|
|
|
#include "database_fwd.hh"
|
|
#include "shared_sstable.hh"
|
|
#include "sstable_set.hh"
|
|
#include "gc_clock.hh"
|
|
#include "compaction_weight_registration.hh"
|
|
#include "utils/UUID.hh"
|
|
#include "dht/i_partitioner.hh"
|
|
#include "service/priority_manager.hh"
|
|
#include <seastar/core/thread.hh>
|
|
#include <functional>
|
|
|
|
class flat_mutation_reader;
|
|
|
|
namespace sstables {
|
|
|
|
enum class compaction_type;
|
|
|
|
class compaction_options {
|
|
public:
|
|
struct regular {
|
|
};
|
|
struct cleanup {
|
|
};
|
|
struct upgrade {
|
|
};
|
|
struct scrub {
|
|
bool skip_corrupted;
|
|
};
|
|
struct reshard {
|
|
};
|
|
|
|
private:
|
|
using options_variant = std::variant<regular, cleanup, upgrade, scrub, reshard>;
|
|
|
|
private:
|
|
options_variant _options;
|
|
|
|
private:
|
|
explicit compaction_options(options_variant options) : _options(std::move(options)) {
|
|
}
|
|
|
|
public:
|
|
static compaction_options make_reshard() {
|
|
return compaction_options(reshard{});
|
|
}
|
|
|
|
static compaction_options make_regular() {
|
|
return compaction_options(regular{});
|
|
}
|
|
|
|
static compaction_options make_cleanup() {
|
|
return compaction_options(cleanup{});
|
|
}
|
|
|
|
static compaction_options make_upgrade() {
|
|
return compaction_options(upgrade{});
|
|
}
|
|
|
|
static compaction_options make_scrub(bool skip_corrupted) {
|
|
return compaction_options(scrub{skip_corrupted});
|
|
}
|
|
|
|
template <typename... Visitor>
|
|
auto visit(Visitor&&... visitor) const {
|
|
return std::visit(std::forward<Visitor>(visitor)..., _options);
|
|
}
|
|
|
|
compaction_type type() const;
|
|
};
|
|
|
|
struct compaction_completion_desc {
|
|
// Old, existing SSTables that should be deleted and removed from the SSTable set.
|
|
std::vector<shared_sstable> old_sstables;
|
|
// New, fresh SSTables that should be added to SSTable set, replacing the old ones.
|
|
std::vector<shared_sstable> new_sstables;
|
|
// Set of compacted partition ranges that should be invalidated in the cache.
|
|
dht::partition_range_vector ranges_for_cache_invalidation;
|
|
};
|
|
|
|
// creates a new SSTable for a given shard
|
|
using creator_fn = std::function<shared_sstable(shard_id shard)>;
|
|
// Replaces old sstable(s) by new one(s) which contain all non-expired data.
|
|
using replacer_fn = std::function<void(compaction_completion_desc)>;
|
|
|
|
struct compaction_descriptor {
|
|
// List of sstables to be compacted.
|
|
std::vector<sstables::shared_sstable> sstables;
|
|
// This is a snapshot of the table's sstable set, used only for the purpose of expiring tombstones.
|
|
// If this sstable set cannot be provided, expiration will be disabled to prevent data from being resurrected.
|
|
std::optional<sstables::sstable_set> all_sstables_snapshot;
|
|
// Level of sstable(s) created by compaction procedure.
|
|
int level;
|
|
// Threshold size for sstable(s) to be created.
|
|
uint64_t max_sstable_bytes;
|
|
// Run identifier of output sstables.
|
|
utils::UUID run_identifier;
|
|
// Holds ownership of a weight assigned to this compaction iff it's a regular one.
|
|
std::optional<compaction_weight_registration> weight_registration;
|
|
// Calls compaction manager's task for this compaction to release reference to exhausted sstables.
|
|
std::function<void(const std::vector<shared_sstable>& exhausted_sstables)> release_exhausted;
|
|
// The options passed down to the compaction code.
|
|
// This also selects the kind of compaction to do.
|
|
compaction_options options = compaction_options::make_regular();
|
|
|
|
creator_fn creator;
|
|
replacer_fn replacer;
|
|
|
|
::io_priority_class io_priority = default_priority_class();
|
|
compaction_descriptor() = default;
|
|
|
|
static constexpr int default_level = 0;
|
|
static constexpr uint64_t default_max_sstable_bytes = std::numeric_limits<uint64_t>::max();
|
|
|
|
explicit compaction_descriptor(std::vector<sstables::shared_sstable> sstables,
|
|
std::optional<sstables::sstable_set> all_sstables_snapshot,
|
|
::io_priority_class io_priority,
|
|
int level = default_level,
|
|
uint64_t max_sstable_bytes = default_max_sstable_bytes,
|
|
utils::UUID run_identifier = utils::make_random_uuid(),
|
|
compaction_options options = compaction_options::make_regular())
|
|
: sstables(std::move(sstables))
|
|
, all_sstables_snapshot(std::move(all_sstables_snapshot))
|
|
, level(level)
|
|
, max_sstable_bytes(max_sstable_bytes)
|
|
, run_identifier(run_identifier)
|
|
, options(options)
|
|
, io_priority(io_priority)
|
|
{}
|
|
};
|
|
|
|
struct resharding_descriptor {
|
|
std::vector<sstables::shared_sstable> sstables;
|
|
uint64_t max_sstable_bytes;
|
|
shard_id reshard_at;
|
|
uint32_t level;
|
|
};
|
|
|
|
enum class compaction_type {
|
|
Compaction = 0,
|
|
Cleanup = 1,
|
|
Validation = 2,
|
|
Scrub = 3,
|
|
Index_build = 4,
|
|
Reshard = 5,
|
|
Upgrade = 6,
|
|
};
|
|
|
|
static inline sstring compaction_name(compaction_type type) {
|
|
switch (type) {
|
|
case compaction_type::Compaction:
|
|
return "COMPACTION";
|
|
case compaction_type::Cleanup:
|
|
return "CLEANUP";
|
|
case compaction_type::Validation:
|
|
return "VALIDATION";
|
|
case compaction_type::Scrub:
|
|
return "SCRUB";
|
|
case compaction_type::Index_build:
|
|
return "INDEX_BUILD";
|
|
case compaction_type::Reshard:
|
|
return "RESHARD";
|
|
case compaction_type::Upgrade:
|
|
return "UPGRADE";
|
|
default:
|
|
throw std::runtime_error("Invalid Compaction Type");
|
|
}
|
|
}
|
|
|
|
struct compaction_info {
|
|
compaction_type type = compaction_type::Compaction;
|
|
table* cf = nullptr;
|
|
sstring ks_name;
|
|
sstring cf_name;
|
|
size_t sstables = 0;
|
|
uint64_t start_size = 0;
|
|
uint64_t end_size = 0;
|
|
uint64_t total_partitions = 0;
|
|
uint64_t total_keys_written = 0;
|
|
int64_t ended_at;
|
|
std::vector<shared_sstable> new_sstables;
|
|
sstring stop_requested;
|
|
bool tracking = true;
|
|
utils::UUID run_identifier;
|
|
struct replacement {
|
|
const std::vector<shared_sstable> removed;
|
|
const std::vector<shared_sstable> added;
|
|
};
|
|
std::vector<replacement> pending_replacements;
|
|
|
|
bool is_stop_requested() const {
|
|
return stop_requested.size() > 0;
|
|
}
|
|
|
|
void stop(sstring reason) {
|
|
stop_requested = std::move(reason);
|
|
}
|
|
|
|
void stop_tracking() {
|
|
tracking = false;
|
|
}
|
|
};
|
|
|
|
// Compact a list of N sstables into M sstables.
|
|
// Returns info about the finished compaction, which includes vector to new sstables.
|
|
//
|
|
// creator is used to get a sstable object for a new sstable that will be written.
|
|
// replacer will replace old sstables by new ones in the column family.
|
|
// max_sstable_size is a relaxed limit size for a sstable to be generated.
|
|
// Example: It's okay for the size of a new sstable to go beyond max_sstable_size
|
|
// when writing its last partition.
|
|
// sstable_level will be level of the sstable(s) to be created by this function.
|
|
// If descriptor.cleanup is true, mutation that doesn't belong to current node will be
|
|
// cleaned up, log messages will inform the user that compact_sstables runs for
|
|
// cleaning operation, and compaction history will not be updated.
|
|
future<compaction_info> compact_sstables(sstables::compaction_descriptor descriptor, column_family& cf);
|
|
|
|
// Return list of expired sstables for column family cf.
|
|
// A sstable is fully expired *iff* its max_local_deletion_time precedes gc_before and its
|
|
// max timestamp is lower than any other relevant sstable.
|
|
// In simpler words, a sstable is fully expired if all of its live cells with TTL is expired
|
|
// and possibly doesn't contain any tombstone that covers cells in other sstables.
|
|
std::unordered_set<sstables::shared_sstable>
|
|
get_fully_expired_sstables(column_family& cf, const std::vector<sstables::shared_sstable>& compacting, gc_clock::time_point gc_before);
|
|
|
|
// For tests, can drop after we virtualize sstables.
|
|
flat_mutation_reader make_scrubbing_reader(flat_mutation_reader rd, bool skip_corrupted);
|
|
}
|