Files
scylladb/raft/server.hh
Konstantin Osipov 51c968bcb4 raft: rename log::non_snapshoted_length() to log::in_memory_size()
The old name was incorrect, in case apply_snapshot() was called with
non-zero trailing entries, the total log length is greater than the
length of the part that is not stored in a snapshot.

Fix spelling in related comments.

Rename fsm::wait() to fsm::wait_max_log_size(), it's a more
specific name. Rename max_log_length to max_log_size to use
'size' rather than 'length' consistently for log size.
2021-02-18 16:04:44 +03:00

135 lines
5.3 KiB
C++

/*
* Copyright (C) 2020 ScyllaDB
*/
/*
* This file is part of Scylla.
*
* Scylla is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Scylla is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include "raft.hh"
namespace raft {
enum class wait_type {
committed,
applied
};
// A single uniquely identified participant of a Raft group.
class server {
public:
struct configuration {
// automatically snapshot state machine after applying
// this number of entries
size_t snapshot_threshold = 1024;
// how many entries to leave in the log after tacking a snapshot
size_t snapshot_trailing = 200;
// max size of appended entries in bytes
size_t append_request_threshold = 100000;
// Max number of entries of in-memory part of the log after
// which requests are stopped to be admitted until the log
// is shrunk back by a snapshot. Should be greater than
// whatever the default number of trailing log entries
// is configured by the snapshot, otherwise the state
// machine will deadlock on attempt to submit a new entry.
size_t max_log_size = 5000;
};
virtual ~server() {}
// Add command to replicated log
// Returned future is resolved depending on wait_type parameter:
// 'committed' - when the entry is committed
// 'applied' - when the entry is applied (happens after it is committed)
// The function has to be called on a leader, throws not_a_leader exception otherwise.
// May fail because of internal error or because leader changed and an entry was replaced
// by another leader. In the later case dropped_entry exception will be returned.
virtual future<> add_entry(command command, wait_type type) = 0;
// Set a new cluster configuration. If the configuration is
// identical to the previous one does nothing.
// Provided node_info is passed to rpc::add_server() for each
// new server and rpc::remove_server() is called for each
// departing server.
// struct node_info is expected to contain connection
// information/credentials which is then used by RPC.
// Can be called on a leader only, otherwise throws not_a_leader.
// Cannot be called until previous set_configuration() completes
// otherwise throws conf_change_in_progress exception.
virtual future<> set_configuration(server_address_set c_new) = 0;
// Load persisted state and start background work that needs
// to run for this Raft server to function; The object cannot
// be used until the returned future is resolved.
virtual future<> start() = 0;
// Stop this Raft server, all submitted but not completed
// operations will get an error and callers will not be able
// to know if they succeeded or not. If this server was
// a leader it will relinquish its leadership and cease
// replication.
virtual future<> abort() = 0;
// Return Raft protocol current term.
virtual term_t get_current_term() const = 0;
// May be called before attempting a read from the local state
// machine. The read should proceed only after the returned
// future has resolved successfully.
// If called not on a leader throws not_a_leader error.
// After calling this function and resolving the returned
// future:
//
// 1) The result of all completed
// add_entries(wait_type::applied) can be observed by
// direct access to the local state machine.
// 2) A subsequent add_entry() is likely to find this
// server still in the leader role.
// 3) If the caller ensures that writes to the state machine
// are linearised and the current term didn't change
// between read_barrier() and add_entry(), (@sa
// get_current_term()), a pair of read from the state
// machine and add_entry() will be linearised as well.
//
// To sum up, @read_barrier() can be used as a poor man
// distributed Compare-And-Swap:
//
// lock()
// term_t term = get_current_term()
// co_await read_barrier()
// ... Read previous value from the state machine ...
// ... Create a new value ...
// if (term == get_current_term())) {
// co_await add_entry();
// }
// unlock()
virtual future<> read_barrier() = 0;
// Ad hoc functions for testing
virtual future<> elect_me_leader() = 0;
virtual future<> wait_log_idx(index_t) = 0;
virtual index_t log_last_idx() = 0;
virtual void elapse_election() = 0;
virtual bool is_leader() = 0;
virtual void tick() = 0;
};
std::unique_ptr<server> create_server(server_id uuid, std::unique_ptr<rpc> rpc,
std::unique_ptr<state_machine> state_machine, std::unique_ptr<persistence> persistence,
seastar::shared_ptr<failure_detector> failure_detector, server::configuration config);
} // namespace raft