Files
scylladb/db/batchlog_manager.hh
Botond Dénes dd50bd9bd4 db/batchlog_manager: re-add v1 support
system.batchlog will still have to be used while the cluster is
upgrading from an older version, which doesn't know v2 yet.
Re-add support for replaying v1 batchlogs. The switch to v2 will happen
after the BATCHLOG_V2 cluster feature is enabled.

The only external user -- storage_proxy -- only needs a minor
adjustment: switch between the table names. The rest is handled
transparently by the db/batchlog.hh interface and the batchlog_manager.
2026-02-20 07:03:46 +02:00

125 lines
3.4 KiB
C++

/*
* Copyright (C) 2015-present ScyllaDB
*
* Modified by ScyllaDB
*/
/*
* SPDX-License-Identifier: (LicenseRef-ScyllaDB-Source-Available-1.0 and Apache-2.0)
*/
#pragma once
#include <seastar/core/future.hh>
#include <seastar/core/sharded.hh>
#include <seastar/core/gate.hh>
#include <seastar/core/metrics_registration.hh>
#include <seastar/core/abort_source.hh>
#include "db_clock.hh"
#include <chrono>
#include <limits>
namespace cql3 {
class query_processor;
} // namespace cql3
namespace gms {
class feature_service;
} // namespace gms
namespace db {
class system_keyspace;
using all_batches_replayed = bool_class<struct all_batches_replayed_tag>;
struct batchlog_manager_config {
db_clock::duration replay_timeout;
uint64_t replay_rate = std::numeric_limits<uint64_t>::max();
std::chrono::milliseconds delay = std::chrono::milliseconds(0);
unsigned replay_cleanup_after_replays;
};
enum class batchlog_stage : int8_t {
initial,
failed_replay
};
class batchlog_manager : public peering_sharded_service<batchlog_manager> {
public:
using post_replay_cleanup = bool_class<class post_replay_cleanup_tag>;
struct stats {
uint64_t write_attempts = 0;
};
private:
static constexpr std::chrono::seconds replay_interval = std::chrono::seconds(60);
static constexpr uint32_t page_size = 128; // same as HHOM, for now, w/out using any heuristics. TODO: set based on avg batch size.
static constexpr std::chrono::seconds write_timeout = std::chrono::seconds(300);
using clock_type = lowres_clock;
stats _stats;
seastar::metrics::metric_groups _metrics;
cql3::query_processor& _qp;
db::system_keyspace& _sys_ks;
gms::feature_service& _fs;
db_clock::duration _replay_timeout;
uint64_t _replay_rate;
std::chrono::milliseconds _delay;
unsigned _replay_cleanup_after_replays = 100;
semaphore _sem{1};
seastar::named_gate _gate;
unsigned _cpu = 0;
seastar::abort_source _stop;
future<> _loop_done;
gc_clock::time_point _last_replay;
// Was the v1 -> v2 migration already done since last restart?
// The migration is attempted once after each restart. This is redundant but
// keeps thing simple. Once no upgrade path exists from a ScyllaDB version
// which can still produce v1 entries, this migration code can be removed.
bool _migration_done = false;
future<> maybe_migrate_v1_to_v2();
future<all_batches_replayed> replay_all_failed_batches_v1(post_replay_cleanup cleanup);
future<all_batches_replayed> replay_all_failed_batches_v2(post_replay_cleanup cleanup);
future<all_batches_replayed> replay_all_failed_batches(post_replay_cleanup cleanup);
public:
// Takes a QP, not a distributes. Because this object is supposed
// to be per shard and does no dispatching beyond delegating the the
// shard qp (which is what you feed here).
batchlog_manager(cql3::query_processor&, db::system_keyspace& sys_ks, gms::feature_service& fs, batchlog_manager_config config);
// abort the replay loop and return its future.
future<> drain();
future<> stop();
future<all_batches_replayed> do_batch_log_replay(post_replay_cleanup cleanup);
future<size_t> count_all_batches() const;
gc_clock::time_point get_last_replay() const {
return _last_replay;
}
const stats& get_stats() const {
return _stats;
}
private:
future<> batchlog_replay_loop();
};
}