/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* * Copyright (C) 2015 ScyllaDB * * Modified by ScyllaDB */ /* * This file is part of Scylla. * * Scylla is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Scylla is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Scylla. If not, see . */ #pragma once #include "locator/network_topology_strategy.hh" #include "db/consistency_level_type.hh" #include "db/read_repair_decision.hh" #include "exceptions/exceptions.hh" #include "utils/fb_utilities.hh" #include "gms/inet_address.hh" #include "database.hh" #include #include namespace db { extern logging::logger cl_logger; size_t quorum_for(keyspace& ks); size_t local_quorum_for(keyspace& ks, const sstring& dc); size_t block_for_local_serial(keyspace& ks); size_t block_for_each_quorum(keyspace& ks); size_t block_for(keyspace& ks, consistency_level cl); bool is_datacenter_local(consistency_level l); bool is_local(gms::inet_address endpoint); template inline size_t count_local_endpoints(Range& live_endpoints) { return std::count_if(live_endpoints.begin(), live_endpoints.end(), is_local); } std::vector filter_for_query_dc_local(consistency_level cl, keyspace& ks, const std::vector& live_endpoints); std::vector filter_for_query(consistency_level cl, keyspace& ks, std::vector live_endpoints, read_repair_decision read_repair); std::vector filter_for_query(consistency_level cl, keyspace& ks, std::vector& live_endpoints); struct dc_node_count { size_t live = 0; size_t pending = 0; }; template > inline std::unordered_map count_per_dc_endpoints( keyspace& ks, Range& live_endpoints, const PendingRange& pending_endpoints = std::array()) { using namespace locator; auto& rs = ks.get_replication_strategy(); auto& snitch_ptr = i_endpoint_snitch::get_local_snitch_ptr(); network_topology_strategy* nrs = static_cast(&rs); std::unordered_map dc_endpoints; for (auto& dc : nrs->get_datacenters()) { dc_endpoints.emplace(dc, dc_node_count()); } // // Since live_endpoints are a subset of a get_natural_endpoints() output we // will never get any endpoints outside the dataceters from // nrs->get_datacenters(). // for (auto& endpoint : live_endpoints) { ++(dc_endpoints[snitch_ptr->get_datacenter(endpoint)].live); } for (auto& endpoint : pending_endpoints) { ++(dc_endpoints[snitch_ptr->get_datacenter(endpoint)].pending); } return dc_endpoints; } bool is_sufficient_live_nodes(consistency_level cl, keyspace& ks, const std::vector& live_endpoints); template inline bool assure_sufficient_live_nodes_each_quorum( consistency_level cl, keyspace& ks, Range& live_endpoints, const PendingRange& pending_endpoints) { using namespace locator; auto& rs = ks.get_replication_strategy(); if (rs.get_type() == replication_strategy_type::network_topology) { for (auto& entry : count_per_dc_endpoints(ks, live_endpoints, pending_endpoints)) { auto dc_block_for = local_quorum_for(ks, entry.first); auto dc_live = entry.second.live; auto dc_pending = entry.second.pending; if (dc_live < dc_block_for + dc_pending) { throw exceptions::unavailable_exception(cl, dc_block_for, dc_live); } } return true; } return false; } template> inline void assure_sufficient_live_nodes( consistency_level cl, keyspace& ks, Range& live_endpoints, const PendingRange& pending_endpoints = std::array()) { size_t need = block_for(ks, cl); switch (cl) { case consistency_level::ANY: // local hint is acceptable, and local node is always live break; case consistency_level::LOCAL_ONE: if (count_local_endpoints(live_endpoints) < count_local_endpoints(pending_endpoints) + 1) { throw exceptions::unavailable_exception(cl, 1, 0); } break; case consistency_level::LOCAL_QUORUM: { size_t local_live = count_local_endpoints(live_endpoints); size_t pending = count_local_endpoints(pending_endpoints); if (local_live < need + pending) { cl_logger.debug("Local replicas {} are insufficient to satisfy LOCAL_QUORUM requirement of needed {} and pending {}", live_endpoints, local_live, pending); throw exceptions::unavailable_exception(cl, need, local_live); } break; } case consistency_level::EACH_QUORUM: if (assure_sufficient_live_nodes_each_quorum(cl, ks, live_endpoints, pending_endpoints)) { break; } // Fallthough on purpose for SimpleStrategy default: size_t live = live_endpoints.size(); size_t pending = pending_endpoints.size(); if (live < need + pending) { cl_logger.debug("Live nodes {} do not satisfy ConsistencyLevel ({} required, {} pending)", live, need, pending); throw exceptions::unavailable_exception(cl, need, live); } break; } } void validate_for_read(const sstring& keyspace_name, consistency_level cl); void validate_for_write(const sstring& keyspace_name, consistency_level cl); bool is_serial_consistency(consistency_level cl); void validate_counter_for_write(schema_ptr s, consistency_level cl); }