From 39db15d2cba81870bcc8269ff8afebb2fe2d2038 Mon Sep 17 00:00:00 2001 From: Asias He Date: Wed, 20 Jul 2022 08:52:18 +0800 Subject: [PATCH] misc_services: Fix cache hitrate update This patch avoids unncessary CACHE_HITRATES updates through gossip. After this patch: Publish CACHE_HITRATES in case: - We haven't published it at all - The diff is bigger than 1% and we haven't published in the last 5 seconds - The diff is really big 10% Note: A peer node can know the cache hitrate through read_data read_mutation_data and read_digest RPC verbs which have cache_temperature in the response. So there is no need to update CACHE_HITRATES through gossip in high frequency. We do the recalculation faster if the diff is bigger than 0.01. It is useful to do the calculation even if we do not publish the CACHE_HITRATES though gossip, since the recalculation will call the table->set_global_cache_hit_rate to set the hitrate. Fixes #5971 Closes #11079 --- service/cache_hitrate_calculator.hh | 2 ++ service/misc_services.cc | 40 +++++++++++++++++++++++------ 2 files changed, 34 insertions(+), 8 deletions(-) diff --git a/service/cache_hitrate_calculator.hh b/service/cache_hitrate_calculator.hh index 59b983b0e2..419dae0957 100644 --- a/service/cache_hitrate_calculator.hh +++ b/service/cache_hitrate_calculator.hh @@ -38,6 +38,8 @@ class cache_hitrate_calculator : public seastar::async_sharded_service _rates; size_t _slen = 0; std::string _gstate; + uint64_t _published_nr = 0; + lowres_clock::time_point _published_time; future<> _done = make_ready_future(); future recalculate_hitrates(); diff --git a/service/misc_services.cc b/service/misc_services.cc index 047b5f99ac..65662b8ad8 100644 --- a/service/misc_services.cc +++ b/service/misc_services.cc @@ -184,14 +184,38 @@ future cache_hitrate_calculator::recalculate_hitrates() }).then([this] { _slen = _gstate.size(); using namespace std::chrono_literals; - return _gossiper.add_local_application_state(gms::application_state::CACHE_HITRATES, gms::versioned_value::cache_hitrates(_gstate)).then([this] { - // if max difference during this round is big schedule next recalculate earlier - if (_diff < 0.01) { - return lowres_clock::duration(2000ms); - } else { - return lowres_clock::duration(500ms); - } - }); + auto now = lowres_clock::now(); + // Publish CACHE_HITRATES in case: + // + // - We haven't published it at all + // - The diff is bigger than 1% and we haven't published in the last 5 seconds + // - The diff is really big 10% + // + // Note: A peer node can know the cache hitrate through read_data + // read_mutation_data and read_digest RPC verbs which have + // cache_temperature in the response. So there is no need to update + // CACHE_HITRATES through gossip in high frequency. + bool do_publish = (_published_nr == 0) || + (_diff > 0.1) || + ( _diff > 0.01 && (now - _published_time) > 5000ms); + + // We do the recalculation faster if the diff is bigger than 0.01. It + // is useful to do the calculation even if we do not publish the + // CACHE_HITRATES though gossip, since the recalculation will call the + // table->set_global_cache_hit_rate to set the hitrate. + auto recalculate_duration = _diff > 0.01 ? lowres_clock::duration(500ms) : lowres_clock::duration(2000ms); + if (do_publish) { + llogger.debug("Send CACHE_HITRATES update max_diff={}, published_nr={}", _diff, _published_nr); + ++_published_nr; + _published_time = now; + return _gossiper.add_local_application_state(gms::application_state::CACHE_HITRATES, + gms::versioned_value::cache_hitrates(_gstate)).then([this, recalculate_duration] { + return recalculate_duration; + }); + } else { + llogger.debug("Skip CACHE_HITRATES update max_diff={}, published_nr={}", _diff, _published_nr); + return make_ready_future(recalculate_duration); + } }).finally([this] { _gstate = std::string(); // free memory, do not trust clear() to do that for string _rates.clear();