/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* * Copyright 2015 Cloudius Systems * * Modified by Cloudius Systems */ #pragma once #include "database.hh" #include "query-request.hh" #include "query-result.hh" #include "query-result-set.hh" #include "core/distributed.hh" #include "db/consistency_level.hh" #include "db/write_type.hh" #include "utils/histogram.hh" namespace service { class abstract_write_response_handler; class abstract_read_executor; class storage_proxy /*implements StorageProxyMBean*/ { struct rh_entry { std::unique_ptr handler; timer<> expire_timer; rh_entry(std::unique_ptr&& h, std::function&& cb); }; public: struct stats { uint64_t read_timeouts; uint64_t read_unavailables; uint64_t range_slice_timeouts; uint64_t range_slice_unavailables; uint64_t write_timeouts; uint64_t write_unavailables; utils::ihistogram read; utils::ihistogram write; utils::ihistogram range; }; using response_id_type = uint64_t; private: distributed& _db; response_id_type _next_response_id = 0; std::unordered_map _response_handlers; constexpr static size_t _max_hints_in_progress = 128; // origin multiplies by FBUtilities.getAvailableProcessors() but we already sharded size_t _total_hints_in_progress = 0; std::unordered_map _hints_in_progress; stats _stats; static constexpr float CONCURRENT_SUBREQUESTS_MARGIN = 0.10; // for read repair chance calculation std::default_random_engine _urandom; std::uniform_real_distribution<> _read_repair_chance = std::uniform_real_distribution<>(0,1); private: void init_messaging_service(); future>> query_singular(lw_shared_ptr cmd, std::vector&& partition_ranges, db::consistency_level cl); response_id_type register_response_handler(std::unique_ptr&& h); void remove_response_handler(response_id_type id); void got_response(response_id_type id, gms::inet_address from); future<> response_wait(response_id_type id); abstract_write_response_handler& get_write_response_handler(storage_proxy::response_id_type id); response_id_type create_write_response_handler(keyspace& ks, db::consistency_level cl, db::write_type type, frozen_mutation&& mutation, std::unordered_set targets, std::vector& pending_endpoints, std::vector); response_id_type create_write_response_handler(const mutation&, db::consistency_level cl, db::write_type type); future<> send_to_live_endpoints(response_id_type response_id, sstring local_data_center); template size_t hint_to_dead_endpoints(lw_shared_ptr m, const Range& targets); void hint_to_dead_endpoints(response_id_type, db::consistency_level); bool cannot_hint(gms::inet_address target); size_t get_hints_in_progress_for(gms::inet_address target); bool should_hint(gms::inet_address ep); bool submit_hint(lw_shared_ptr m, gms::inet_address target); std::vector get_live_sorted_endpoints(keyspace& ks, const dht::token& token); db::read_repair_decision new_read_repair_decision(const schema& s); ::shared_ptr get_read_executor(lw_shared_ptr cmd, query::partition_range pr, db::consistency_level cl); future>> query_singular_local(lw_shared_ptr cmd, const query::partition_range& pr); future query_singular_local_digest(lw_shared_ptr cmd, const query::partition_range& pr); future>> query_partition_key_range(lw_shared_ptr cmd, query::partition_range&& range, db::consistency_level cl); std::vector get_restricted_ranges(keyspace& ks, const schema& s, query::partition_range range); float estimate_result_rows_per_range(lw_shared_ptr cmd, keyspace& ks); static std::vector intersection(const std::vector& l1, const std::vector& l2); future>>> query_partition_key_range_concurrent(std::chrono::high_resolution_clock::time_point timeout, std::vector>>&& results, lw_shared_ptr cmd, db::consistency_level cl, std::vector::iterator&& i, std::vector&& ranges, int concurrency_factor); future>> do_query(schema_ptr, lw_shared_ptr cmd, std::vector&& partition_ranges, db::consistency_level cl); public: storage_proxy(distributed& db); ~storage_proxy(); distributed& get_db() { return _db; } future<> mutate_locally(const mutation& m); future<> mutate_locally(const frozen_mutation& m); future<> mutate_locally(std::vector mutations); /** * Use this method to have these Mutations applied * across all replicas. This method will take care * of the possibility of a replica being down and hint * the data across to some other replica. * * @param mutations the mutations to be applied across the replicas * @param consistency_level the consistency level for the operation */ future<> mutate(std::vector mutations, db::consistency_level cl); future<> mutate_with_triggers(std::vector mutations, db::consistency_level cl, bool should_mutate_atomically); /** * See mutate. Adds additional steps before and after writing a batch. * Before writing the batch (but after doing availability check against the FD for the row replicas): * write the entire batch to a batchlog elsewhere in the cluster. * After: remove the batchlog entry (after writing hints for the batch rows, if necessary). * * @param mutations the Mutations to be applied across the replicas * @param consistency_level the consistency level for the operation */ future<> mutate_atomically(std::vector mutations, db::consistency_level cl); /* * Executes data query on the whole cluster. * * Partitions for each range will be ordered according to decorated_key ordering. Results for * each range from "partition_ranges" may appear in any order. */ future>> query(schema_ptr, lw_shared_ptr cmd, std::vector&& partition_ranges, db::consistency_level cl); future>> query_local(lw_shared_ptr cmd, std::vector&& partition_ranges); future>> query_mutations_locally( lw_shared_ptr cmd, const query::partition_range&); /* * Returns mutation_reader for given column family * which combines data from all shards. */ mutation_reader make_local_reader(utils::UUID cf_id, const query::partition_range&); future<> stop(); friend class abstract_read_executor; const stats& get_stats() const { return _stats; } }; extern distributed _the_storage_proxy; inline distributed& get_storage_proxy() { return _the_storage_proxy; } inline storage_proxy& get_local_storage_proxy() { return _the_storage_proxy.local(); } }