Distribute cache temperature over gossiper.

When a node start it does not have any information about cache temperature of other nodes in the cluster and it is hard (if not impossible) to make right guess. During cluster startup all nodes have cold caches, so there is no point to redirect reads to other nodes even though local cache it cold, but if only that node restarted than other nodes have populated cache and reads should be redirected. The node will get up-to-date information about other nodes caches, but only after receiving first reply, until then it does not have the information to make right decisions which may cause unwanted spikes immediately after restart. Having cache temperature in gossiper helps to solve the problem.
2026-05-12 19:02:12 +00:00 · 2017-05-23 17:19:18 +03:00
parent 991ec4a16c
commit 8ca1432b04
5 changed files with 18 additions and 1 deletions
--- a/gms/application_state.cc
+++ b/gms/application_state.cc
@@ -61,6 +61,7 @@ static const std::map<application_state, sstring> application_state_names = {
    {application_state::HOST_ID,                "HOST_ID"},
    {application_state::TOKENS,                 "TOKENS"},
    {application_state::SUPPORTED_FEATURES,     "SUPPORTED_FEATURES"},
+    {application_state::CACHE_HITRATES,         "CACHE_HITRATES"},
 };

 std::ostream& operator<<(std::ostream& os, const application_state& m) {
--- a/gms/application_state.hh
+++ b/gms/application_state.hh
@@ -58,8 +58,8 @@ enum class application_state {
    HOST_ID,
    TOKENS,
    SUPPORTED_FEATURES,
+    CACHE_HITRATES,
    // pad to allow adding new states to existing cluster
-    X2,
    X3,
    X4,
    X5,
--- a/gms/versioned_value.hh
+++ b/gms/versioned_value.hh
@@ -242,6 +242,11 @@ public:
        versioned_value supported_features(const sstring& features) {
            return versioned_value(features);
        }
+
+        versioned_value cache_hitrates(const sstring& hitrates) {
+            return versioned_value(hitrates);
+        }
+
    };
 }; // class versioned_value

--- a/service/misc_services.cc
+++ b/service/misc_services.cc
@@ -41,6 +41,8 @@
 #include "load_broadcaster.hh"
 #include "cache_hitrate_calculator.hh"
 #include "db/system_keyspace.hh"
+#include "gms/application_state.hh"
+#include "service/storage_service.hh"

 namespace service {

@@ -140,6 +142,7 @@ future<lowres_clock::duration> cache_hitrate_calculator::recalculate_hitrates()
        _diff = 0;
        // set calculated rates on all shards
        return _db.invoke_on_all([this, rates = std::move(rates), cpuid = engine().cpu_id()] (database& db) {
+            sstring gstate;
            for (auto& cf : db.get_column_families() | boost::adaptors::filtered(non_system_filter)) {
                stat s = rates.at(cf.first);
                float rate = 0;
@@ -149,9 +152,16 @@ future<lowres_clock::duration> cache_hitrate_calculator::recalculate_hitrates()
                if (engine().cpu_id() == cpuid) {
                    // calculate max difference between old rate and new one for all cfs
                    _diff = std::max(_diff, std::abs(float(cf.second->get_global_cache_hit_rate()) - rate));
+                    gstate += sprint("%s.%s:%f;", cf.second->schema()->ks_name(), cf.second->schema()->cf_name(), rate);
                }
                cf.second->set_global_cache_hit_rate(cache_temperature(rate));
            }
+            if (gstate.size()) {
+                auto& g = gms::get_local_gossiper();
+                auto& ss = get_local_storage_service();
+                return g.add_local_application_state(gms::application_state::CACHE_HITRATES, ss.value_factory.cache_hitrates(std::move(gstate)));
+            }
+            return make_ready_future<>();
        });
    }).then([this] {
        // if max difference during this round is big schedule next recalculate earlier
--- a/service/storage_service.cc
+++ b/service/storage_service.cc
@@ -300,6 +300,7 @@ void storage_service::prepare_to_join(std::vector<inet_address> loaded_endpoints
    app_states.emplace(gms::application_state::RPC_ADDRESS, value_factory.rpcaddress(broadcast_rpc_address));
    app_states.emplace(gms::application_state::RELEASE_VERSION, value_factory.release_version());
    app_states.emplace(gms::application_state::SUPPORTED_FEATURES, value_factory.supported_features(features));
+    app_states.emplace(gms::application_state::CACHE_HITRATES, value_factory.cache_hitrates(""));
    slogger.info("Starting up server gossip");

    auto& gossiper = gms::get_local_gossiper();