/* * Copyright (C) 2020 ScyllaDB */ /* * This file is part of Scylla. * * Scylla is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Scylla is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Scylla. If not, see . */ #pragma once #include "raft.hh" namespace raft { enum class wait_type { committed, applied }; // A single uniquely identified participant of a Raft group. class server { public: struct configuration { // automatically snapshot state machine after applying // this number of entries size_t snapshot_threshold = 1024; // how many entries to leave in the log after tacking a snapshot size_t snapshot_trailing = 200; // max size of appended entries in bytes size_t append_request_threshold = 100000; // Max number of entries of in-memory part of the log after // which requests are stopped to be admitted until the log // is shrunk back by a snapshot. Should be greater than // whatever the default number of trailing log entries // is configured by the snapshot, otherwise the state // machine will deadlock on attempt to submit a new entry. size_t max_log_size = 5000; // If set to true will enable prevoting stage during election bool enable_prevoting = true; }; virtual ~server() {} // Add command to replicated log // Returned future is resolved depending on wait_type parameter: // 'committed' - when the entry is committed // 'applied' - when the entry is applied (happens after it is committed) // The function has to be called on a leader, throws not_a_leader exception otherwise. // May fail because of internal error or because leader changed and an entry was replaced // by another leader. In the later case dropped_entry exception will be returned. virtual future<> add_entry(command command, wait_type type) = 0; // Set a new cluster configuration. If the configuration is // identical to the previous one does nothing. // Provided node_info is passed to rpc::add_server() for each // new server and rpc::remove_server() is called for each // departing server. // struct node_info is expected to contain connection // information/credentials which is then used by RPC. // Can be called on a leader only, otherwise throws not_a_leader. // Cannot be called until previous set_configuration() completes // otherwise throws conf_change_in_progress exception. virtual future<> set_configuration(server_address_set c_new) = 0; // Load persisted state and start background work that needs // to run for this Raft server to function; The object cannot // be used until the returned future is resolved. virtual future<> start() = 0; // Stop this Raft server, all submitted but not completed // operations will get an error and callers will not be able // to know if they succeeded or not. If this server was // a leader it will relinquish its leadership and cease // replication. virtual future<> abort() = 0; // Return Raft protocol current term. virtual term_t get_current_term() const = 0; // May be called before attempting a read from the local state // machine. The read should proceed only after the returned // future has resolved successfully. // If called not on a leader throws not_a_leader error. // After calling this function and resolving the returned // future: // // 1) The result of all completed // add_entries(wait_type::applied) can be observed by // direct access to the local state machine. // 2) A subsequent add_entry() is likely to find this // server still in the leader role. // 3) If the caller ensures that writes to the state machine // are linearised and the current term didn't change // between read_barrier() and add_entry(), (@sa // get_current_term()), a pair of read from the state // machine and add_entry() will be linearised as well. // // To sum up, @read_barrier() can be used as a poor man // distributed Compare-And-Swap: // // lock() // term_t term = get_current_term() // co_await read_barrier() // ... Read previous value from the state machine ... // ... Create a new value ... // if (term == get_current_term())) { // co_await add_entry(); // } // unlock() virtual future<> read_barrier() = 0; // Ad hoc functions for testing virtual future<> elect_me_leader() = 0; virtual future<> wait_log_idx(index_t) = 0; virtual index_t log_last_idx() = 0; virtual void elapse_election() = 0; virtual bool is_leader() = 0; virtual void tick() = 0; }; std::unique_ptr create_server(server_id uuid, std::unique_ptr rpc, std::unique_ptr state_machine, std::unique_ptr persistence, seastar::shared_ptr failure_detector, server::configuration config); } // namespace raft