/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* * Copyright (C) 2015 ScyllaDB * * Modified by ScyllaDB */ /* * This file is part of Scylla. * * Scylla is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Scylla is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Scylla. If not, see . */ #pragma once #include #include #include #include #include #include #include "seastarx.hh" #include #include #include namespace utils { /** * This is a pseudo floating point implementation of an estimated histogram. * When entering a value: * All values lower than the MIN will be included in the first bucket. * All values higher than MAX will be included the last bucket that serves as the * infinity bucket. * * buckets are distributed as pseudo floating point: * The range [MIN, MAX) is split into log2 ranges. * ranges = log2(max/min) * Each of that ranges is split according to the number of buckets: * resolution = (NUM_BUCKETS - 1)/ranges * * For example, if the MIN value is 128, the MAX is 1024 and the number of buckets is 13: * * Anything below 128 will be in the bucket 0, anything above 1024 will be in bucket 13. * * the range [128, 1024) will be split into log2(1024/128) = 3: * 128, 256, 512, 1024 * * Each range is split into 12/3 = 4. * 128 | 256 | 512 | 1024 * 128 160 192 224| 256 320 384 448| 512 640 768 896| * * * Calculating the bucket limit of bucket i: * The range: 2^(i/4)* min * The sub range: i%4 * range/4 * * How to find a bucket index for a value. * The bucket index consist of two part: * higher bits are based on log2(value/min) * * lower bits are based on the high 2 MSB (ignoring the leading 1). * for example: 330 (101001010) * higher bits: log2(330/128) = 1 * low bit MSB: 330 = 01 (the lower two bits out of the upper 3) * So the index: 101 = 5 * * * About the min/max and number of buckets. * ======================================== * * For MIN and MAX choose numbers that are a power of 2. * The number of buckets will determine the resolution that should also be a power of 2 * So the total number of bucket should be log2(MAX/MIN) * Resolution + 1 * * Limitation: You must set the MIN value to be higher then the resolution. * For example, for a 2 bits resolution MIN should be 2^2 = 4 or higher. * */ template requires (Min > 0 && Min < Max) class approx_exponential_histogram { std::array _buckets; public: static constexpr unsigned RANGES = log2floor(Max/Min); static constexpr unsigned RESOLUTION = (NumBuckets - 1)/RANGES; static constexpr unsigned RESOLUTION_BITS = log2floor(RESOLUTION); static constexpr unsigned BASESHIFT = (Min == 0) ? 0 : log2floor(Min); static constexpr uint64_t LOWER_BITS_MASK = (1 << RESOLUTION_BITS) - 1; static_assert(BASESHIFT >= RESOLUTION); approx_exponential_histogram() { clear(); } /*! * \brief Returns the bucket lower limit given the bucket id. * The first and last bucket will always return the MIN and MAX respectively. * */ uint64_t get_bucket_lower_limit(uint16_t bucket_id) const { if (bucket_id == 0) { return Min; } if (bucket_id == NumBuckets - 1) { return Max; } int16_t range_id = (bucket_id >> RESOLUTION_BITS); return (1 << (range_id + BASESHIFT)) + ((bucket_id & LOWER_BITS_MASK) << (range_id + BASESHIFT - RESOLUTION_BITS)); } /*! * \brief Returns the bucket upper limit given the bucket id. * The last bucket will return MAX. * */ uint64_t get_bucket_upper_limit(uint16_t bucket_id) const { if (bucket_id == NumBuckets - 1) { return Max; } return get_bucket_lower_limit(bucket_id + 1); } /*! * \brief Find the bucket index for a given value * The position of a value that is lower or equal to Min will always be 0. * The position of a value is that is higher or equal to MAX will always be NUM_BUCKETS - 1. */ uint16_t find_bucket_pos(uint64_t val) const { if (val >= Max) { return NumBuckets - 1; } if (val <= Min) { return 0; } uint16_t range = log2floor(val); val >>= range - RESOLUTION_BITS; // leave the top most N+1 bits where N is the resolution. return ((range - BASESHIFT) << RESOLUTION_BITS) + (val & LOWER_BITS_MASK); } /*! * \brief returns a cumulative histogram. * * The metrics cumulative histogram uses upper bounds. * The histogram.count serves as an infinite upper bound bucket */ /*! * \brief clear the current values. */ void clear() { std::fill(_buckets.begin(), _buckets.end(), 0); } /*! * \brief Add an item to the histogram * Increments the count of the bucket closest to n, rounding DOWN. */ void add(uint64_t n) { _buckets.at(find_bucket_pos(n))++; } /*! * \brief returns the smallest value that could have been added to this histogram */ uint64_t min() const { for (size_t i = 0; i < NumBuckets; i ++) { if (_buckets[i] > 0) { return get_bucket_lower_limit(i); } } return 0; } /*! * \brief returns the largest value that could have been added to this histogram. If the histogram * overflowed, returns UINT64_MAX. */ uint64_t max() const { if (_buckets[NumBuckets - 1] > 0) { return UINT64_MAX; } for (int i = NumBuckets - 1; i >= 0; i--) { if (_buckets[i] > 0) { return get_bucket_upper_limit(i); } } return 0; } /*! * \brief merge a histogram to the current one. */ approx_exponential_histogram& merge(const approx_exponential_histogram& b) { for (size_t i = 0; i < NumBuckets; i++) { _buckets[i] += b.get(i); } return *this; } template friend approx_exponential_histogram merge(approx_exponential_histogram a, const approx_exponential_histogram& b); /* * \brief returns the count in the given bucket */ uint64_t get(size_t bucket) const { return _buckets[bucket]; } /*! * \brief get a histogram quantile * * returns the estimated value at given quantile */ uint64_t quantile(double quantile) const { if (quantile < 0 || quantile > 1.0) { throw std::runtime_error("Invalid quantile value " + std::to_string(quantile) + ". Value should be between 0 and 1"); } auto c = count(); if (!c) { return 0; // no data } auto pcount = uint64_t(std::floor(c * quantile)); uint64_t elements = 0; for (size_t i = 0; i < NumBuckets - 2; i++) { if (_buckets[i]) { elements += _buckets[i]; if (elements >= pcount) { return get_bucket_lower_limit(i); } } } return Max; // overflowed value is in the requested quantile } /*! * \brief returns the mean histogram value (average of bucket offsets, weighted by count) */ uint64_t mean() const { double elements = 0; uint64_t sum = 0; for (size_t i = 0; i < NumBuckets - 1; i++) { elements += _buckets[i]; sum += _buckets[i] * get_bucket_lower_limit(i); } return (sum + elements - 1) / elements; } /*! * \brief returns the number of buckets; */ size_t size() const { return NumBuckets; } /*! * \brief returns the total number of values inserted */ uint64_t count() const { uint64_t sum = 0L; for (size_t i = 0; i < NumBuckets; i++) { sum += _buckets[i]; } return sum; } /*! * \brief multiple all the buckets content in the histogram by a constant */ approx_exponential_histogram& operator*=(double v) { for (size_t i = 0; i < NumBuckets; i++) { _buckets[i] *= v; } return *this; } }; template inline approx_exponential_histogram base_estimated_histogram_merge(approx_exponential_histogram a, const approx_exponential_histogram& b) { return a.merge(b); } /*! * \brief estimated histogram for duration values * time_estimated_histogram is used for common task timing. * It covers the range of 0.5ms to 33s with a 2 bits granularity. * * 512us, 640us, 768us, 896us, 1024us, 1280us, 1536us, 1792us, 2048us, 2560us... */ class time_estimated_histogram : public approx_exponential_histogram<512, 33554432, 65> { public: using clock = std::chrono::steady_clock; using duration = clock::duration; using approx_exponential_histogram<512, 33554432, 65>::add; time_estimated_histogram& merge(const time_estimated_histogram& b) { approx_exponential_histogram<512, 33554432, 65>::merge(b); return *this; } void add_micro(uint64_t n) { add(n); } void add(const duration& latency) { add_micro(std::chrono::duration_cast(latency).count()); } }; inline time_estimated_histogram time_estimated_histogram_merge(time_estimated_histogram a, const time_estimated_histogram& b) { return a.merge(b); } struct estimated_histogram { using clock = std::chrono::steady_clock; using duration = clock::duration; /** * The series of values to which the counts in `buckets` correspond: * 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 17, 20, etc. * Thus, a `buckets` of [0, 0, 1, 10] would mean we had seen one value of 3 and 10 values of 4. * * The series starts at 1 and grows by 1.2 each time (rounding and removing duplicates). It goes from 1 * to around 36M by default (creating 90+1 buckets), which will give us timing resolution from microseconds to * 36 seconds, with less precision as the numbers get larger. * * When using the histogram for latency, the values are in microseconds * * Each bucket represents values from (previous bucket offset, current offset]. */ std::vector bucket_offsets; // buckets is one element longer than bucketOffsets -- the last element is values greater than the last offset std::vector buckets; int64_t _count = 0; int64_t _sample_sum = 0; estimated_histogram(int bucket_count = 90) { new_offsets(bucket_count); buckets.resize(bucket_offsets.size() + 1, 0); } seastar::metrics::histogram get_histogram(size_t lower_bucket = 1, size_t max_buckets = 16) const { seastar::metrics::histogram res; res.buckets.resize(max_buckets); int64_t last_bound = lower_bucket; uint64_t cummulative_count = 0; size_t pos = 0; res.sample_count = _count; res.sample_sum = _sample_sum; for (size_t i = 0; i < res.buckets.size(); i++) { auto& v = res.buckets[i]; v.upper_bound = last_bound; while (bucket_offsets[pos] <= last_bound) { cummulative_count += buckets[pos]; pos++; } v.count = cummulative_count; last_bound <<= 1; } return res; } seastar::metrics::histogram get_histogram(duration minmal_latency, size_t max_buckets = 16) const { return get_histogram(std::chrono::duration_cast(minmal_latency).count(), max_buckets); } private: void new_offsets(int size) { bucket_offsets.resize(size); if (size == 0) { return; } int64_t last = 1; bucket_offsets[0] = last; for (int i = 1; i < size; i++) { int64_t next = round(last * 1.2); if (next == last) { next++; } bucket_offsets[i] = next; last = next; } } public: /** * @return the histogram values corresponding to each bucket index */ const std::vector& get_bucket_offsets() const { return bucket_offsets; } /** * @return the histogram buckets */ const std::vector& get_buckets() const { return buckets; } void clear() { std::fill(buckets.begin(), buckets.end(), 0); _count = 0; _sample_sum = 0; } /** * Increments the count of the bucket closest to n, rounding UP. * @param n */ void add(int64_t n) { auto pos = bucket_offsets.size(); auto low = std::lower_bound(bucket_offsets.begin(), bucket_offsets.end(), n); if (low != bucket_offsets.end()) { pos = std::distance(bucket_offsets.begin(), low); } buckets.at(pos)++; _count++; _sample_sum += n; } /** * Increments the count of the bucket closest to n, rounding UP. * when using sampling, the number of items in the bucket will * be increase so that the overall number of items will be equal * to the new count * @param n */ void add_nano(int64_t n, int64_t new_count) { n /= 1000; if (new_count <= _count) { return; } auto pos = bucket_offsets.size(); auto low = std::lower_bound(bucket_offsets.begin(), bucket_offsets.end(), n); if (low != bucket_offsets.end()) { pos = std::distance(bucket_offsets.begin(), low); } buckets.at(pos)+= new_count - _count; _sample_sum += n * (new_count - _count); _count = new_count; } void add(duration latency, int64_t new_count) { add_nano(std::chrono::duration_cast(latency).count(), new_count); } /** * @return the smallest value that could have been added to this histogram */ int64_t min() const { size_t i = 0; for (auto b : buckets) { if (b > 0) { return i == 0 ? 0 : 1 + bucket_offsets[i - 1]; } i++; } return 0; } /** * @return the largest value that could have been added to this histogram. If the histogram * overflowed, returns INT64_MAX. */ int64_t max() const { int lastBucket = buckets.size() - 1; if (buckets[lastBucket] > 0) { return INT64_MAX; } for (int i = lastBucket - 1; i >= 0; i--) { if (buckets[i] > 0) { return bucket_offsets[i]; } } return 0; } /** * merge a histogram to the current one. */ estimated_histogram& merge(const estimated_histogram& b) { if (bucket_offsets.size() < b.bucket_offsets.size()) { new_offsets(b.bucket_offsets.size()); buckets.resize(b.bucket_offsets.size() + 1, 0); } size_t i = 0; for (auto p: b.buckets) { buckets[i++] += p; } _count += b._count; _sample_sum += b._sample_sum; return *this; } friend estimated_histogram merge(estimated_histogram a, const estimated_histogram& b); /** * @return the count in the given bucket */ int64_t get(int bucket) { return buckets[bucket]; } /** * @param percentile * @return estimated value at given percentile */ int64_t percentile(double perc) const { assert(perc >= 0 && perc <= 1.0); auto last_bucket = buckets.size() - 1; auto c = count(); if (!c) { return 0; // no data } auto pcount = int64_t(std::floor(c * perc)); int64_t elements = 0; for (size_t i = 0; i < last_bucket; i++) { if (buckets[i]) { elements += buckets[i]; if (elements >= pcount) { return bucket_offsets[i]; } } } return round(bucket_offsets.back() * 1.2); // overflowed value is in the requested percentile } /** * @return the mean histogram value (average of bucket offsets, weighted by count) */ int64_t mean() const { auto lastBucket = buckets.size() - 1; int64_t elements = 0; int64_t sum = 0; for (size_t i = 0; i < lastBucket; i++) { long bCount = buckets[i]; elements += bCount; sum += bCount * bucket_offsets[i]; } return ((double) (sum + elements -1)/ elements); } /** * @return the total number of non-zero values */ int64_t count() const { int64_t sum = 0L; for (size_t i = 0; i < buckets.size(); i++) { sum += buckets[i]; } return sum; } estimated_histogram& operator*=(double v) { for (size_t i = 0; i < buckets.size(); i++) { buckets[i] *= v; } return *this; } friend std::ostream& operator<<(std::ostream& out, const estimated_histogram& h) { // only print overflow if there is any size_t name_count; if (h.buckets[h.buckets.size() - 1] == 0) { name_count = h.buckets.size() - 1; } else { name_count = h.buckets.size(); } std::vector names; names.reserve(name_count); size_t max_name_len = 0; for (size_t i = 0; i < name_count; i++) { names.push_back(h.name_of_range(i)); max_name_len = std::max(max_name_len, names.back().size()); } sstring formatstr = format("{{:{:d}s}}: {{:d}}\n", max_name_len); for (size_t i = 0; i < name_count; i++) { int64_t count = h.buckets[i]; // sort-of-hack to not print empty ranges at the start that are only used to demarcate the // first populated range. for code clarity we don't omit this record from the maxNameLength // calculation, and accept the unnecessary whitespace prefixes that will occasionally occur if (i == 0 && count == 0) { continue; } out << format(formatstr.c_str(), names[i], count); } return out; } sstring name_of_range(size_t index) const { sstring s; s += "["; if (index == 0) { if (bucket_offsets[0] > 0) { // by original definition, this histogram is for values greater than zero only; // if values of 0 or less are required, an entry of lb-1 must be inserted at the start s += "1"; } else { s += "-Inf"; } } else { s += format("{:d}", bucket_offsets[index - 1] + 1); } s += ".."; if (index == bucket_offsets.size()) { s += "Inf"; } else { s += format("{:d}", bucket_offsets[index]); } s += "]"; return s; } }; inline estimated_histogram estimated_histogram_merge(estimated_histogram a, const estimated_histogram& b) { return a.merge(b); } }