Files
scylladb/service/storage_service.cc
Glauber Costa 12dc7ebd26 system.local: convert update tokens
At this point, users of the interface are futurized already, so we
just need to make sure they call the right function.

Signed-off-by: Glauber Costa <glommer@cloudius-systems.com>
2015-07-07 20:24:43 -04:00

961 lines
42 KiB
C++

/*
* Copyright (C) 2015 Cloudius Systems, Ltd.
*/
#include "storage_service.hh"
#include "core/distributed.hh"
#include "locator/snitch_base.hh"
#include "db/system_keyspace.hh"
namespace service {
int storage_service::RING_DELAY = storage_service::get_ring_delay();
distributed<storage_service> _the_storage_service;
bool storage_service::should_bootstrap() {
// FIXME: Currently, we do boostrap if we are not a seed node.
// return DatabaseDescriptor.isAutoBootstrap() && !SystemKeyspace.bootstrapComplete() && !DatabaseDescriptor.getSeeds().contains(FBUtilities.getBroadcastAddress());
auto& gossiper = gms::get_local_gossiper();
auto seeds = gossiper.get_seeds();
return !seeds.count(get_broadcast_address());
}
future<> storage_service::prepare_to_join() {
if (!_joined) {
#if 0
if (DatabaseDescriptor.isReplacing() && !(Boolean.parseBoolean(System.getProperty("cassandra.join_ring", "true"))))
throw new ConfigurationException("Cannot set both join_ring=false and attempt to replace a node");
if (DatabaseDescriptor.getReplaceTokens().size() > 0 || DatabaseDescriptor.getReplaceNode() != null)
throw new RuntimeException("Replace method removed; use cassandra.replace_address instead");
if (DatabaseDescriptor.isReplacing())
{
if (SystemKeyspace.bootstrapComplete())
throw new RuntimeException("Cannot replace address with a node that is already bootstrapped");
if (!DatabaseDescriptor.isAutoBootstrap())
throw new RuntimeException("Trying to replace_address with auto_bootstrap disabled will not work, check your configuration");
_bootstrap_tokens = prepareReplacementInfo();
appStates.put(ApplicationState.TOKENS, valueFactory.tokens(_bootstrap_tokens));
appStates.put(ApplicationState.STATUS, valueFactory.hibernate(true));
}
else if (should_bootstrap())
{
checkForEndpointCollision();
}
#endif
// have to start the gossip service before we can see any info on other nodes. this is necessary
// for bootstrap to get the load info it needs.
// (we won't be part of the storage ring though until we add a counterId to our state, below.)
// Seed the host ID-to-endpoint map with our own ID.
return db::system_keyspace::get_local_host_id().then([this] (auto local_host_id) {
std::map<gms::application_state, gms::versioned_value> app_states;
_token_metadata.update_host_id(local_host_id, this->get_broadcast_address());
// FIXME: DatabaseDescriptor.getBroadcastRpcAddress()
gms::inet_address broadcast_rpc_address;
app_states.emplace(gms::application_state::NET_VERSION, value_factory.network_version());
app_states.emplace(gms::application_state::HOST_ID, value_factory.host_id(local_host_id));
app_states.emplace(gms::application_state::RPC_ADDRESS, value_factory.rpcaddress(broadcast_rpc_address));
app_states.emplace(gms::application_state::RELEASE_VERSION, value_factory.release_version());
//logger.info("Starting up server gossip");
auto& gossiper = gms::get_local_gossiper();
gossiper.register_(this);
using namespace std::chrono;
auto now = high_resolution_clock::now().time_since_epoch();
int generation_number = duration_cast<seconds>(now).count();
// FIXME: SystemKeyspace.incrementAndGetGeneration()
return gossiper.start(generation_number, app_states).then([this] {
print("Start gossiper service ...\n");
#if SS_DEBUG
gms::get_local_gossiper().debug_show();
_token_metadata.debug_show();
#endif
});
}).then([this] {
// gossip snitch infos (local DC and rack)
gossip_snitch_info();
#if 0
// gossip Schema.emptyVersion forcing immediate check for schema updates (see MigrationManager#maybeScheduleSchemaPull)
Schema.instance.updateVersionAndAnnounce(); // Ensure we know our own actual Schema UUID in preparation for updates
if (!MessagingService.instance().isListening())
MessagingService.instance().listen(FBUtilities.getLocalAddress());
LoadBroadcaster.instance.startBroadcasting();
HintedHandOffManager.instance.start();
BatchlogManager.instance.start();
#endif
});
}
return make_ready_future<>();
}
future<> storage_service::join_token_ring(int delay) {
auto f = make_ready_future<>();
_joined = true;
#if 0
// We bootstrap if we haven't successfully bootstrapped before, as long as we are not a seed.
// If we are a seed, or if the user manually sets auto_bootstrap to false,
// we'll skip streaming data from other nodes and jump directly into the ring.
//
// The seed check allows us to skip the RING_DELAY sleep for the single-node cluster case,
// which is useful for both new users and testing.
//
// We attempted to replace this with a schema-presence check, but you need a meaningful sleep
// to get schema info from gossip which defeats the purpose. See CASSANDRA-4427 for the gory details.
Set<InetAddress> current = new HashSet<>();
logger.debug("Bootstrap variables: {} {} {} {}",
DatabaseDescriptor.isAutoBootstrap(),
SystemKeyspace.bootstrapInProgress(),
SystemKeyspace.bootstrapComplete(),
DatabaseDescriptor.getSeeds().contains(FBUtilities.getBroadcastAddress()));
if (DatabaseDescriptor.isAutoBootstrap() && !SystemKeyspace.bootstrapComplete() && DatabaseDescriptor.getSeeds().contains(FBUtilities.getBroadcastAddress()))
logger.info("This node will not auto bootstrap because it is configured to be a seed node.");
#endif
if (should_bootstrap()) {
auto elapsed = make_shared<int>(0);
auto stop_cond = [elapsed, delay] {
// FIXME
// if we see schema, we can proceed to the next check directly
// if (!Schema.instance.getVersion().equals(Schema.emptyVersion)) {
// return true;
// }
if (*elapsed < delay) {
return false;
}
return true;
};
f = do_until(stop_cond, [elapsed] {
auto t = 1000;
return sleep(std::chrono::milliseconds(t)).then([elapsed, t] {
*elapsed += t;
});
}).then([this] {
_bootstrap_tokens = boot_strapper::get_bootstrap_tokens(_token_metadata);
bootstrap(_bootstrap_tokens);
});
#if 0
if (SystemKeyspace.bootstrapInProgress())
logger.warn("Detected previous bootstrap failure; retrying");
else
SystemKeyspace.setBootstrapState(SystemKeyspace.BootstrapState.IN_PROGRESS);
setMode(Mode.JOINING, "waiting for ring information", true);
// first sleep the delay to make sure we see all our peers
for (int i = 0; i < delay; i += 1000)
{
// if we see schema, we can proceed to the next check directly
if (!Schema.instance.getVersion().equals(Schema.emptyVersion))
{
logger.debug("got schema: {}", Schema.instance.getVersion());
break;
}
Uninterruptibles.sleepUninterruptibly(1, TimeUnit.SECONDS);
}
// if our schema hasn't matched yet, keep sleeping until it does
// (post CASSANDRA-1391 we don't expect this to be necessary very often, but it doesn't hurt to be careful)
while (!MigrationManager.isReadyForBootstrap())
{
setMode(Mode.JOINING, "waiting for schema information to complete", true);
Uninterruptibles.sleepUninterruptibly(1, TimeUnit.SECONDS);
}
setMode(Mode.JOINING, "schema complete, ready to bootstrap", true);
setMode(Mode.JOINING, "waiting for pending range calculation", true);
PendingRangeCalculatorService.instance.blockUntilFinished();
setMode(Mode.JOINING, "calculation complete, ready to bootstrap", true);
if (logger.isDebugEnabled())
logger.debug("... got ring + schema info");
if (Boolean.parseBoolean(System.getProperty("cassandra.consistent.rangemovement", "true")) &&
(
_token_metadata.getBootstrapTokens().valueSet().size() > 0 ||
_token_metadata.getLeavingEndpoints().size() > 0 ||
_token_metadata.getMovingEndpoints().size() > 0
))
throw new UnsupportedOperationException("Other bootstrapping/leaving/moving nodes detected, cannot bootstrap while cassandra.consistent.rangemovement is true");
if (!DatabaseDescriptor.isReplacing())
{
if (_token_metadata.isMember(FBUtilities.getBroadcastAddress()))
{
String s = "This node is already a member of the token ring; bootstrap aborted. (If replacing a dead node, remove the old one from the ring first.)";
throw new UnsupportedOperationException(s);
}
setMode(Mode.JOINING, "getting bootstrap token", true);
_bootstrap_tokens = BootStrapper.getBootstrapTokens(_token_metadata);
}
else
{
if (!DatabaseDescriptor.getReplaceAddress().equals(FBUtilities.getBroadcastAddress()))
{
try
{
// Sleep additionally to make sure that the server actually is not alive
// and giving it more time to gossip if alive.
Thread.sleep(LoadBroadcaster.BROADCAST_INTERVAL);
}
catch (InterruptedException e)
{
throw new AssertionError(e);
}
// check for operator errors...
for (Token token : _bootstrap_tokens)
{
InetAddress existing = _token_metadata.getEndpoint(token);
if (existing != null)
{
long nanoDelay = delay * 1000000L;
if (Gossiper.instance.getEndpointStateForEndpoint(existing).getUpdateTimestamp() > (System.nanoTime() - nanoDelay))
throw new UnsupportedOperationException("Cannot replace a live node... ");
current.add(existing);
}
else
{
throw new UnsupportedOperationException("Cannot replace token " + token + " which does not exist!");
}
}
}
else
{
try
{
Thread.sleep(RING_DELAY);
}
catch (InterruptedException e)
{
throw new AssertionError(e);
}
}
setMode(Mode.JOINING, "Replacing a node with token(s): " + _bootstrap_tokens, true);
}
bootstrap(_bootstrap_tokens);
assert !_is_bootstrap_mode; // bootstrap will block until finished
#endif
} else {
// FIXME: DatabaseDescriptor.getNumTokens()
size_t num_tokens = 3;
_bootstrap_tokens = boot_strapper::get_random_tokens(_token_metadata, num_tokens);
#if 0
_bootstrap_tokens = SystemKeyspace.getSavedTokens();
if (_bootstrap_tokens.isEmpty())
{
Collection<String> initialTokens = DatabaseDescriptor.getInitialTokens();
if (initialTokens.size() < 1)
{
_bootstrap_tokens = BootStrapper.getRandomTokens(_token_metadata, DatabaseDescriptor.getNumTokens());
if (DatabaseDescriptor.getNumTokens() == 1)
logger.warn("Generated random token {}. Random tokens will result in an unbalanced ring; see http://wiki.apache.org/cassandra/Operations", _bootstrap_tokens);
else
logger.info("Generated random tokens. tokens are {}", _bootstrap_tokens);
}
else
{
_bootstrap_tokens = new ArrayList<Token>(initialTokens.size());
for (String token : initialTokens)
_bootstrap_tokens.add(getPartitioner().getTokenFactory().fromString(token));
logger.info("Saved tokens not found. Using configuration value: {}", _bootstrap_tokens);
}
}
else
{
if (_bootstrap_tokens.size() != DatabaseDescriptor.getNumTokens())
throw new ConfigurationException("Cannot change the number of tokens from " + _bootstrap_tokens.size() + " to " + DatabaseDescriptor.getNumTokens());
else
logger.info("Using saved tokens {}", _bootstrap_tokens);
}
#endif
}
return f.then([this] {
return set_tokens(_bootstrap_tokens);
#if 0
// if we don't have system_traces keyspace at this point, then create it manually
if (Schema.instance.getKSMetaData(TraceKeyspace.NAME) == null)
MigrationManager.announceNewKeyspace(TraceKeyspace.definition(), 0, false);
if (!_is_survey_mode)
{
// start participating in the ring.
SystemKeyspace.setBootstrapState(SystemKeyspace.BootstrapState.COMPLETED);
set_tokens(_bootstrap_tokens);
// remove the existing info about the replaced node.
if (!current.isEmpty())
for (InetAddress existing : current)
Gossiper.instance.replacedEndpoint(existing);
assert _token_metadata.sortedTokens().size() > 0;
Auth.setup();
}
else
{
logger.info("Startup complete, but write survey mode is active, not becoming an active ring member. Use JMX (StorageService->joinRing()) to finalize ring joining.");
}
#endif
});
}
void storage_service::join_ring() {
#if 0
if (!joined) {
logger.info("Joining ring by operator request");
try
{
joinTokenRing(0);
}
catch (ConfigurationException e)
{
throw new IOException(e.getMessage());
}
} else if (_is_survey_mode) {
set_tokens(SystemKeyspace.getSavedTokens());
SystemKeyspace.setBootstrapState(SystemKeyspace.BootstrapState.COMPLETED);
_is_survey_mode = false;
logger.info("Leaving write survey mode and joining ring at operator request");
assert _token_metadata.sortedTokens().size() > 0;
Auth.setup();
}
#endif
}
future<> storage_service::bootstrap(std::unordered_set<token> tokens) {
_is_bootstrap_mode = true;
// DON'T use set_token, that makes us part of the ring locally which is incorrect until we are done bootstrapping
auto f = db::system_keyspace::update_tokens(tokens);
return f.then([this, tokens = std::move(tokens)] {
// FIXME: DatabaseDescriptor.isReplacing()
auto is_replacing = false;
auto sleep_time = std::chrono::milliseconds(1);
if (!is_replacing) {
// if not an existing token then bootstrap
auto& gossiper = gms::get_local_gossiper();
gossiper.add_local_application_state(gms::application_state::TOKENS, value_factory.tokens(tokens));
gossiper.add_local_application_state(gms::application_state::STATUS, value_factory.bootstrapping(tokens));
sleep_time = std::chrono::milliseconds(RING_DELAY);
// setMode(Mode.JOINING, "sleeping " + RING_DELAY + " ms for pending range setup", true);
} else {
// Dont set any state for the node which is bootstrapping the existing token...
for (auto t : tokens) {
_token_metadata.update_normal_token(t, get_broadcast_address());
}
// SystemKeyspace.removeEndpoint(DatabaseDescriptor.getReplaceAddress());
}
return sleep(sleep_time).then([] {
auto& gossiper = gms::get_local_gossiper();
if (!gossiper.seen_any_seed()) {
throw std::runtime_error("Unable to contact any seeds!");
}
// setMode(Mode.JOINING, "Starting to bootstrap...", true);
// new BootStrapper(FBUtilities.getBroadcastAddress(), tokens, _token_metadata).bootstrap(); // handles token update
// logger.info("Bootstrap completed! for the tokens {}", tokens);
return make_ready_future<>();
});
});
}
void storage_service::handle_state_bootstrap(inet_address endpoint) {
ss_debug("SS::handle_state_bootstrap endpoint=%s\n", endpoint);
// explicitly check for TOKENS, because a bootstrapping node might be bootstrapping in legacy mode; that is, not using vnodes and no token specified
auto tokens = get_tokens_for(endpoint);
// if (logger.isDebugEnabled())
// logger.debug("Node {} state bootstrapping, token {}", endpoint, tokens);
// if this node is present in token metadata, either we have missed intermediate states
// or the node had crashed. Print warning if needed, clear obsolete stuff and
// continue.
if (_token_metadata.is_member(endpoint)) {
// If isLeaving is false, we have missed both LEAVING and LEFT. However, if
// isLeaving is true, we have only missed LEFT. Waiting time between completing
// leave operation and rebootstrapping is relatively short, so the latter is quite
// common (not enough time for gossip to spread). Therefore we report only the
// former in the log.
if (!_token_metadata.is_leaving(endpoint)) {
// logger.info("Node {} state jump to bootstrap", endpoint);
}
// _token_metadata.removeEndpoint(endpoint);
}
_token_metadata.add_bootstrap_tokens(tokens, endpoint);
// FIXME
// PendingRangeCalculatorService.instance.update();
auto& gossiper = gms::get_local_gossiper();
if (gossiper.uses_host_id(endpoint)) {
_token_metadata.update_host_id(gossiper.get_host_id(endpoint), endpoint);
}
}
void storage_service::handle_state_normal(inet_address endpoint) {
ss_debug("SS::handle_state_bootstrap endpoint=%s\n", endpoint);
auto tokens = get_tokens_for(endpoint);
auto& gossiper = gms::get_local_gossiper();
std::unordered_set<token> tokensToUpdateInMetadata;
std::unordered_set<token> tokensToUpdateInSystemKeyspace;
std::unordered_set<token> localTokensToRemove;
std::unordered_set<inet_address> endpointsToRemove;
// if (logger.isDebugEnabled())
// logger.debug("Node {} state normal, token {}", endpoint, tokens);
if (_token_metadata.is_member(endpoint)) {
// logger.info("Node {} state jump to normal", endpoint);
}
update_peer_info(endpoint);
#if 1
// Order Matters, TM.updateHostID() should be called before TM.updateNormalToken(), (see CASSANDRA-4300).
if (gossiper.uses_host_id(endpoint)) {
auto host_id = gossiper.get_host_id(endpoint);
//inet_address existing = _token_metadata.get_endpoint_for_host_id(host_id);
// if (DatabaseDescriptor.isReplacing() &&
// Gossiper.instance.getEndpointStateForEndpoint(DatabaseDescriptor.getReplaceAddress()) != null &&
// (hostId.equals(Gossiper.instance.getHostId(DatabaseDescriptor.getReplaceAddress())))) {
if (false) {
// logger.warn("Not updating token metadata for {} because I am replacing it", endpoint);
} else {
if (false /*existing != null && !existing.equals(endpoint)*/) {
#if 0
if (existing == get_broadcast_address()) {
logger.warn("Not updating host ID {} for {} because it's mine", hostId, endpoint);
_token_metadata.removeEndpoint(endpoint);
endpointsToRemove.add(endpoint);
} else if (gossiper.compare_endpoint_startup(endpoint, existing) > 0) {
logger.warn("Host ID collision for {} between {} and {}; {} is the new owner", hostId, existing, endpoint, endpoint);
_token_metadata.removeEndpoint(existing);
endpointsToRemove.add(existing);
_token_metadata.update_host_id(hostId, endpoint);
} else {
logger.warn("Host ID collision for {} between {} and {}; ignored {}", hostId, existing, endpoint, endpoint);
_token_metadata.removeEndpoint(endpoint);
endpointsToRemove.add(endpoint);
}
#endif
} else {
_token_metadata.update_host_id(host_id, endpoint);
}
}
}
#endif
for (auto t : tokens) {
// we don't want to update if this node is responsible for the token and it has a later startup time than endpoint.
auto current_owner = _token_metadata.get_endpoint(t);
if (!current_owner) {
// logger.debug("New node {} at token {}", endpoint, t);
tokensToUpdateInMetadata.insert(t);
tokensToUpdateInSystemKeyspace.insert(t);
} else if (endpoint == *current_owner) {
// set state back to normal, since the node may have tried to leave, but failed and is now back up
tokensToUpdateInMetadata.insert(t);
tokensToUpdateInSystemKeyspace.insert(t);
} else if (gossiper.compare_endpoint_startup(endpoint, *current_owner) > 0) {
tokensToUpdateInMetadata.insert(t);
tokensToUpdateInSystemKeyspace.insert(t);
#if 0
// currentOwner is no longer current, endpoint is. Keep track of these moves, because when
// a host no longer has any tokens, we'll want to remove it.
Multimap<InetAddress, Token> epToTokenCopy = getTokenMetadata().getEndpointToTokenMapForReading();
epToTokenCopy.get(currentOwner).remove(token);
if (epToTokenCopy.get(currentOwner).size() < 1)
endpointsToRemove.add(currentOwner);
logger.info(String.format("Nodes %s and %s have the same token %s. %s is the new owner",
endpoint,
currentOwner,
token,
endpoint));
#endif
} else {
#if 0
logger.info(String.format("Nodes %s and %s have the same token %s. Ignoring %s",
endpoint,
currentOwner,
token,
endpoint));
#endif
}
}
bool is_moving = _token_metadata.is_moving(endpoint); // capture because updateNormalTokens clears moving status
_token_metadata.update_normal_tokens(tokensToUpdateInMetadata, endpoint);
// for (auto ep : endpointsToRemove) {
// removeEndpoint(ep);
// if (DatabaseDescriptor.isReplacing() && DatabaseDescriptor.getReplaceAddress().equals(ep))
// Gossiper.instance.replacementQuarantine(ep); // quarantine locally longer than normally; see CASSANDRA-8260
// }
if (!tokensToUpdateInSystemKeyspace.empty()) {
// SystemKeyspace.updateTokens(endpoint, tokensToUpdateInSystemKeyspace);
}
if (!localTokensToRemove.empty()) {
// SystemKeyspace.updateLocalTokens(Collections.<Token>emptyList(), localTokensToRemove);
}
if (is_moving) {
// _token_metadata.remove_from_moving(endpoint);
// for (IEndpointLifecycleSubscriber subscriber : lifecycleSubscribers)
// subscriber.onMove(endpoint);
} else {
// for (IEndpointLifecycleSubscriber subscriber : lifecycleSubscribers)
// subscriber.onJoinCluster(endpoint);
}
// PendingRangeCalculatorService.instance.update();
}
void storage_service::handle_state_leaving(inet_address endpoint) {
#if 0
Collection<Token> tokens;
tokens = get_tokens_for(endpoint);
if (logger.isDebugEnabled())
logger.debug("Node {} state leaving, tokens {}", endpoint, tokens);
// If the node is previously unknown or tokens do not match, update tokenmetadata to
// have this node as 'normal' (it must have been using this token before the
// leave). This way we'll get pending ranges right.
if (!_token_metadata.isMember(endpoint))
{
logger.info("Node {} state jump to leaving", endpoint);
_token_metadata.updateNormalTokens(tokens, endpoint);
}
else if (!_token_metadata.getTokens(endpoint).containsAll(tokens))
{
logger.warn("Node {} 'leaving' token mismatch. Long network partition?", endpoint);
_token_metadata.updateNormalTokens(tokens, endpoint);
}
// at this point the endpoint is certainly a member with this token, so let's proceed
// normally
_token_metadata.addLeavingEndpoint(endpoint);
PendingRangeCalculatorService.instance.update();
#endif
}
void storage_service::handle_state_left(inet_address endpoint, std::vector<sstring> pieces) {
#if 0
assert pieces.length >= 2;
Collection<Token> tokens;
tokens = get_tokens_for(endpoint);
if (logger.isDebugEnabled())
logger.debug("Node {} state left, tokens {}", endpoint, tokens);
excise(tokens, endpoint, extractExpireTime(pieces));
#endif
}
void storage_service::handle_state_moving(inet_address endpoint, std::vector<sstring> pieces) {
#if 0
assert pieces.length >= 2;
Token token = getPartitioner().getTokenFactory().fromString(pieces[1]);
if (logger.isDebugEnabled())
logger.debug("Node {} state moving, new token {}", endpoint, token);
_token_metadata.addMovingEndpoint(token, endpoint);
PendingRangeCalculatorService.instance.update();
#endif
}
void storage_service::handle_state_removing(inet_address endpoint, std::vector<sstring> pieces) {
#if 0
assert (pieces.length > 0);
if (endpoint.equals(FBUtilities.getBroadcastAddress()))
{
logger.info("Received removenode gossip about myself. Is this node rejoining after an explicit removenode?");
try
{
drain();
}
catch (Exception e)
{
throw new RuntimeException(e);
}
return;
}
if (_token_metadata.isMember(endpoint))
{
String state = pieces[0];
Collection<Token> removeTokens = _token_metadata.getTokens(endpoint);
if (VersionedValue.REMOVED_TOKEN.equals(state))
{
excise(removeTokens, endpoint, extractExpireTime(pieces));
}
else if (VersionedValue.REMOVING_TOKEN.equals(state))
{
if (logger.isDebugEnabled())
logger.debug("Tokens {} removed manually (endpoint was {})", removeTokens, endpoint);
// Note that the endpoint is being removed
_token_metadata.addLeavingEndpoint(endpoint);
PendingRangeCalculatorService.instance.update();
// find the endpoint coordinating this removal that we need to notify when we're done
String[] coordinator = Gossiper.instance.getEndpointStateForEndpoint(endpoint).getApplicationState(ApplicationState.REMOVAL_COORDINATOR).value.split(VersionedValue.DELIMITER_STR, -1);
UUID hostId = UUID.fromString(coordinator[1]);
// grab any data we are now responsible for and notify responsible node
restoreReplicaCount(endpoint, _token_metadata.getEndpointForHostId(hostId));
}
}
else // now that the gossiper has told us about this nonexistent member, notify the gossiper to remove it
{
if (VersionedValue.REMOVED_TOKEN.equals(pieces[0]))
addExpireTimeIfFound(endpoint, extractExpireTime(pieces));
removeEndpoint(endpoint);
}
#endif
}
void storage_service::on_join(gms::inet_address endpoint, gms::endpoint_state ep_state) {
ss_debug("SS::on_join endpoint=%s\n", endpoint);
auto tokens = get_tokens_for(endpoint);
for (auto t : tokens) {
ss_debug("t=%s\n", t);
}
for (auto e : ep_state.get_application_state_map()) {
on_change(endpoint, e.first, e.second);
}
// MigrationManager.instance.scheduleSchemaPull(endpoint, epState);
}
void storage_service::on_alive(gms::inet_address endpoint, gms::endpoint_state state) {
ss_debug("SS::on_alive endpoint=%s\n", endpoint);
#if 0
MigrationManager.instance.scheduleSchemaPull(endpoint, state);
if (_token_metadata.isMember(endpoint))
{
HintedHandOffManager.instance.scheduleHintDelivery(endpoint, true);
for (IEndpointLifecycleSubscriber subscriber : lifecycleSubscribers)
subscriber.onUp(endpoint);
}
#endif
}
void storage_service::before_change(gms::inet_address endpoint, gms::endpoint_state current_state, gms::application_state new_state_key, gms::versioned_value new_value) {
// no-op
}
void storage_service::on_change(inet_address endpoint, application_state state, versioned_value value) {
ss_debug("SS::on_change endpoint=%s\n", endpoint);
if (state == application_state::STATUS) {
std::vector<sstring> pieces;
boost::split(pieces, value.value, boost::is_any_of(sstring(versioned_value::DELIMITER_STR)));
assert(pieces.size() > 0);
sstring move_name = pieces[0];
if (move_name == sstring(versioned_value::STATUS_BOOTSTRAPPING)) {
handle_state_bootstrap(endpoint);
} else if (move_name == sstring(versioned_value::STATUS_NORMAL)) {
handle_state_normal(endpoint);
} else if (move_name == sstring(versioned_value::REMOVING_TOKEN) ||
move_name == sstring(versioned_value::REMOVED_TOKEN)) {
handle_state_removing(endpoint, pieces);
} else if (move_name == sstring(versioned_value::STATUS_LEAVING)) {
handle_state_leaving(endpoint);
} else if (move_name == sstring(versioned_value::STATUS_LEFT)) {
handle_state_left(endpoint, pieces);
} else if (move_name == sstring(versioned_value::STATUS_MOVING)) {
handle_state_moving(endpoint, pieces);
}
} else {
auto& gossiper = gms::get_local_gossiper();
auto ep_state = gossiper.get_endpoint_state_for_endpoint(endpoint);
if (!ep_state || gossiper.is_dead_state(*ep_state)) {
// logger.debug("Ignoring state change for dead or unknown endpoint: {}", endpoint);
return;
}
if (state == application_state::RELEASE_VERSION) {
// SystemKeyspace.updatePeerInfo(endpoint, "release_version", value.value);
} else if (state == application_state::DC) {
// SystemKeyspace.updatePeerInfo(endpoint, "data_center", value.value);
} else if (state == application_state::RACK) {
// SystemKeyspace.updatePeerInfo(endpoint, "rack", value.value);
} else if (state == application_state::RPC_ADDRESS) {
// try {
// SystemKeyspace.updatePeerInfo(endpoint, "rpc_address", InetAddress.getByName(value.value));
// } catch (UnknownHostException e) {
// throw new RuntimeException(e);
// }
} else if (state == application_state::SCHEMA) {
// SystemKeyspace.updatePeerInfo(endpoint, "schema_version", UUID.fromString(value.value));
// MigrationManager.instance.scheduleSchemaPull(endpoint, epState);
} else if (state == application_state::HOST_ID) {
// SystemKeyspace.updatePeerInfo(endpoint, "host_id", UUID.fromString(value.value));
}
}
replicate_to_all_cores();
}
void storage_service::on_remove(gms::inet_address endpoint) {
#if 0
_token_metadata.removeEndpoint(endpoint);
PendingRangeCalculatorService.instance.update();
#endif
}
void storage_service::on_dead(gms::inet_address endpoint, gms::endpoint_state state) {
#if 0
MessagingService.instance().convict(endpoint);
for (IEndpointLifecycleSubscriber subscriber : lifecycleSubscribers)
subscriber.onDown(endpoint);
#endif
}
void storage_service::on_restart(gms::inet_address endpoint, gms::endpoint_state state) {
#if 0
// If we have restarted before the node was even marked down, we need to reset the connection pool
if (state.isAlive())
onDead(endpoint, state);
#endif
}
void storage_service::update_peer_info(gms::inet_address endpoint) {
using namespace gms;
auto& gossiper = gms::get_local_gossiper();
auto ep_state = gossiper.get_endpoint_state_for_endpoint(endpoint);
if (!ep_state) {
return;
}
for (auto& entry : ep_state->get_application_state_map()) {
auto& app_state = entry.first;
//auto& value = entry.second.value
if (app_state == application_state::RELEASE_VERSION) {
// SystemKeyspace.updatePeerInfo(endpoint, "release_version", value);
} else if (app_state == application_state::DC) {
// SystemKeyspace.updatePeerInfo(endpoint, "data_center", value);
} else if (app_state == application_state::RACK) {
// SystemKeyspace.updatePeerInfo(endpoint, "rack", value);
} else if (app_state == application_state::RPC_ADDRESS) {
// SystemKeyspace.updatePeerInfo(endpoint, "rpc_address", InetAddress.getByName(value));
} else if (app_state == application_state::SCHEMA) {
// SystemKeyspace.updatePeerInfo(endpoint, "schema_version", UUID.fromString(value));
} else if (app_state == application_state::HOST_ID) {
// SystemKeyspace.updatePeerInfo(endpoint, "host_id", UUID.fromString(value));
}
}
}
sstring storage_service::get_application_state_value(inet_address endpoint, application_state appstate) {
auto& gossiper = gms::get_local_gossiper();
auto eps = gossiper.get_endpoint_state_for_endpoint(endpoint);
if (!eps) {
return {};
}
auto v = eps->get_application_state(appstate);
if (!v) {
return {};
}
return v->value;
}
std::unordered_set<locator::token> storage_service::get_tokens_for(inet_address endpoint) {
auto tokens_string = get_application_state_value(endpoint, application_state::TOKENS);
ss_debug("endpoint=%s, tokens_string=%s\n", endpoint, tokens_string);
std::vector<sstring> tokens;
std::unordered_set<token> ret;
boost::split(tokens, tokens_string, boost::is_any_of(";"));
for (auto str : tokens) {
ss_debug("token=%s\n", str);
sstring_view sv(str);
bytes b = from_hex(sv);
ret.emplace(token::kind::key, b);
}
return ret;
}
future<> storage_service::set_tokens(std::unordered_set<token> tokens) {
// if (logger.isDebugEnabled())
// logger.debug("Setting tokens to {}", tokens);
auto f = db::system_keyspace::update_tokens(tokens);
return f.then([this, tokens = std::move(tokens)] {
for (auto t : tokens) {
_token_metadata.update_normal_token(t, get_broadcast_address());
}
// Collection<Token> localTokens = getLocalTokens();
auto local_tokens = _bootstrap_tokens;
auto& gossiper = gms::get_local_gossiper();
gossiper.add_local_application_state(gms::application_state::TOKENS, value_factory.tokens(local_tokens));
gossiper.add_local_application_state(gms::application_state::STATUS, value_factory.normal(local_tokens));
//setMode(Mode.NORMAL, false);
replicate_to_all_cores();
});
}
future<> storage_service::init_server(int delay) {
#if 0
logger.info("Cassandra version: {}", FBUtilities.getReleaseVersionString());
logger.info("Thrift API version: {}", cassandraConstants.VERSION);
logger.info("CQL supported versions: {} (default: {})", StringUtils.join(ClientState.getCQLSupportedVersion(), ","), ClientState.DEFAULT_CQL_VERSION);
#endif
_initialized = true;
#if 0
try
{
// Ensure StorageProxy is initialized on start-up; see CASSANDRA-3797.
Class.forName("org.apache.cassandra.service.StorageProxy");
// also IndexSummaryManager, which is otherwise unreferenced
Class.forName("org.apache.cassandra.io.sstable.IndexSummaryManager");
}
catch (ClassNotFoundException e)
{
throw new AssertionError(e);
}
if (Boolean.parseBoolean(System.getProperty("cassandra.load_ring_state", "true")))
{
logger.info("Loading persisted ring state");
Multimap<InetAddress, Token> loadedTokens = SystemKeyspace.loadTokens();
Map<InetAddress, UUID> loadedHostIds = SystemKeyspace.loadHostIds();
for (InetAddress ep : loadedTokens.keySet())
{
if (ep.equals(FBUtilities.getBroadcastAddress()))
{
// entry has been mistakenly added, delete it
SystemKeyspace.removeEndpoint(ep);
}
else
{
_token_metadata.updateNormalTokens(loadedTokens.get(ep), ep);
if (loadedHostIds.containsKey(ep))
_token_metadata.update_host_id(loadedHostIds.get(ep), ep);
Gossiper.instance.addSavedEndpoint(ep);
}
}
}
// daemon threads, like our executors', continue to run while shutdown hooks are invoked
drainOnShutdown = new Thread(new WrappedRunnable()
{
@Override
public void runMayThrow() throws InterruptedException
{
ExecutorService counterMutationStage = StageManager.getStage(Stage.COUNTER_MUTATION);
ExecutorService mutationStage = StageManager.getStage(Stage.MUTATION);
if (mutationStage.isShutdown() && counterMutationStage.isShutdown())
return; // drained already
if (daemon != null)
shutdownClientServers();
ScheduledExecutors.optionalTasks.shutdown();
Gossiper.instance.stop();
// In-progress writes originating here could generate hints to be written, so shut down MessagingService
// before mutation stage, so we can get all the hints saved before shutting down
MessagingService.instance().shutdown();
counterMutationStage.shutdown();
mutationStage.shutdown();
counterMutationStage.awaitTermination(3600, TimeUnit.SECONDS);
mutationStage.awaitTermination(3600, TimeUnit.SECONDS);
StorageProxy.instance.verifyNoHintsInProgress();
List<Future<?>> flushes = new ArrayList<>();
for (Keyspace keyspace : Keyspace.all())
{
KSMetaData ksm = Schema.instance.getKSMetaData(keyspace.getName());
if (!ksm.durableWrites)
{
for (ColumnFamilyStore cfs : keyspace.getColumnFamilyStores())
flushes.add(cfs.forceFlush());
}
}
try
{
FBUtilities.waitOnFutures(flushes);
}
catch (Throwable t)
{
JVMStabilityInspector.inspectThrowable(t);
// don't let this stop us from shutting down the commitlog and other thread pools
logger.warn("Caught exception while waiting for memtable flushes during shutdown hook", t);
}
CommitLog.instance.shutdownBlocking();
// wait for miscellaneous tasks like sstable and commitlog segment deletion
ScheduledExecutors.nonPeriodicTasks.shutdown();
if (!ScheduledExecutors.nonPeriodicTasks.awaitTermination(1, TimeUnit.MINUTES))
logger.warn("Miscellaneous task executor still busy after one minute; proceeding with shutdown");
}
}, "StorageServiceShutdownHook");
Runtime.getRuntime().addShutdownHook(drainOnShutdown);
#endif
return prepare_to_join().then([this, delay] {
return join_token_ring(delay);
});
#if 0
// Has to be called after the host id has potentially changed in prepareToJoin().
for (ColumnFamilyStore cfs : ColumnFamilyStore.all())
if (cfs.metadata.isCounter())
cfs.initCounterCache();
if (Boolean.parseBoolean(System.getProperty("cassandra.join_ring", "true")))
{
joinTokenRing(delay);
}
else
{
Collection<Token> tokens = SystemKeyspace.getSavedTokens();
if (!tokens.isEmpty())
{
_token_metadata.updateNormalTokens(tokens, FBUtilities.getBroadcastAddress());
// order is important here, the gossiper can fire in between adding these two states. It's ok to send TOKENS without STATUS, but *not* vice versa.
List<Pair<ApplicationState, VersionedValue>> states = new ArrayList<Pair<ApplicationState, VersionedValue>>();
states.add(Pair.create(ApplicationState.TOKENS, valueFactory.tokens(tokens)));
states.add(Pair.create(ApplicationState.STATUS, valueFactory.hibernate(true)));
Gossiper.instance.addLocalApplicationStates(states);
}
logger.info("Not joining ring as requested. Use JMX (StorageService->joinRing()) to initiate ring joining");
}
#endif
}
void storage_service::replicate_to_all_cores() {
assert(engine().cpu_id() == 0);
// FIXME: There is no back pressure. If the remote cores are slow, and
// replication is called often, it will queue tasks to the semaphore
// without end.
_replicate_task.wait().then([this] {
return _the_storage_service.invoke_on_all([tm = _token_metadata] (storage_service& local_ss) {
if (engine().cpu_id() != 0) {
local_ss._token_metadata = tm;
}
});
}).then_wrapped([this] (auto&& f) {
try {
_replicate_task.signal();
f.get();
} catch (...) {
print("storage_service: Fail to replicate _token_metadata\n");
}
});
}
void storage_service::gossip_snitch_info() {
auto& snitch = locator::i_endpoint_snitch::get_local_snitch_ptr();
auto addr = get_broadcast_address();
auto dc = snitch->get_datacenter(addr);
auto rack = snitch->get_rack(addr);
auto& gossiper = gms::get_local_gossiper();
gossiper.add_local_application_state(gms::application_state::DC, value_factory.datacenter(dc));
gossiper.add_local_application_state(gms::application_state::RACK, value_factory.rack(rack));
}
future<> storage_service::stop() {
return make_ready_future<>();
}
} // namespace service