/* * Copyright (C) 2015 ScyllaDB */ /* * This file is part of Scylla. * * Scylla is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Scylla is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Scylla. If not, see . */ #include #include #include #include #include #include #include "utils/managed_bytes.hh" #include "utils/extremum_tracking.hh" #include "utils/logalloc.hh" #include "row_cache.hh" #include "log.hh" #include "schema_builder.hh" #include "memtable.hh" #include "tests/perf/perf.hh" static const int update_iterations = 16; static const int cell_size = 128; static bool cancelled = false; template auto duration_in_seconds(Func&& f) { using clk = std::chrono::steady_clock; auto start = clk::now(); f(); auto end = clk::now(); return std::chrono::duration_cast>(end - start); } class scheduling_latency_measurer : public weakly_referencable { using clk = std::chrono::steady_clock; clk::time_point _last = clk::now(); utils::estimated_histogram _hist{300}; min_max_tracker _minmax; bool _stop = false; private: void schedule_tick(); void tick() { auto old = _last; _last = clk::now(); auto latency = _last - old; _minmax.update(latency); _hist.add(latency.count()); if (!_stop) { schedule_tick(); } } public: void start() { schedule_tick(); } void stop() { _stop = true; later().get(); // so that the last scheduled tick is counted } const utils::estimated_histogram& histogram() const { return _hist; } clk::duration min() const { return _minmax.min(); } clk::duration max() const { return _minmax.max(); } }; void scheduling_latency_measurer::schedule_tick() { seastar::schedule(make_task(default_scheduling_group(), [self = weak_from_this()] () mutable { if (self) { self->tick(); } })); } std::ostream& operator<<(std::ostream& out, const scheduling_latency_measurer& slm) { auto to_ms = [] (int64_t nanos) { return float(nanos) / 1e6; }; return out << sprint("{count: %d, " //"min: %.6f [ms], " //"50%%: %.6f [ms], " //"90%%: %.6f [ms], " "99%%: %.6f [ms], " "max: %.6f [ms]}", slm.histogram().count(), //to_ms(slm.min().count()), //to_ms(slm.histogram().percentile(0.5)), //to_ms(slm.histogram().percentile(0.9)), to_ms(slm.histogram().percentile(0.99)), to_ms(slm.max().count())); } template void run_test(const sstring& name, schema_ptr s, MutationGenerator&& gen) { cache_tracker tracker; row_cache cache(s, make_empty_snapshot_source(), tracker, is_continuous::yes); size_t memtable_size = seastar::memory::stats().total_memory() / 4; std::cout << name << ":\n"; for (int i = 0; i < update_iterations; ++i) { auto MB = 1024 * 1024; auto prefill_compacted = logalloc::memory_compacted(); auto prefill_allocated = logalloc::memory_allocated(); auto mt = make_lw_shared(s); while (mt->occupancy().total_space() < memtable_size) { auto pk = dht::global_partitioner().decorate_key(*s, partition_key::from_single_value(*s, data_value(utils::UUID_gen::get_time_UUID()).serialize())); mutation m = gen(); mt->apply(m); if (cancelled) { return; } } auto prev_compacted = logalloc::memory_compacted(); auto prev_allocated = logalloc::memory_allocated(); auto prev_rows_processed_from_memtable = tracker.get_stats().rows_processed_from_memtable; auto prev_rows_merged_from_memtable = tracker.get_stats().rows_merged_from_memtable; auto prev_rows_dropped_from_memtable = tracker.get_stats().rows_dropped_from_memtable; std::cout << sprint("cache: %d/%d [MB], memtable: %d/%d [MB], alloc/comp: %d/%d [MB] (amp: %.3f)\n", tracker.region().occupancy().used_space() / MB, tracker.region().occupancy().total_space() / MB, mt->occupancy().used_space() / MB, mt->occupancy().total_space() / MB, (prev_allocated - prefill_allocated) / MB, (prev_compacted - prefill_compacted) / MB, float((prev_compacted - prefill_compacted)) / (prev_allocated - prefill_allocated) ); scheduling_latency_measurer slm; slm.start(); auto d = duration_in_seconds([&] { cache.update([] {}, *mt).get(); }); slm.stop(); auto compacted = logalloc::memory_compacted() - prev_compacted; auto allocated = logalloc::memory_allocated() - prev_allocated; std::cout << sprint("update: %.6f [ms], stall: %s, cache: %d/%d [MB], alloc/comp: %d/%d [MB] (amp: %.3f), pr/me/dr %d/%d/%d\n", d.count() * 1000, slm, tracker.region().occupancy().used_space() / MB, tracker.region().occupancy().total_space() / MB, allocated / MB, compacted / MB, float(compacted)/allocated, tracker.get_stats().rows_processed_from_memtable - prev_rows_processed_from_memtable, tracker.get_stats().rows_merged_from_memtable - prev_rows_merged_from_memtable, tracker.get_stats().rows_dropped_from_memtable - prev_rows_dropped_from_memtable); } auto d = duration_in_seconds([&] { cache.invalidate([] {}).get(); }); std::cout << sprint("invalidation: %.6f [ms]", d.count() * 1000) << "\n"; } void test_small_partitions() { auto s = schema_builder("ks", "cf") .with_column("pk", uuid_type, column_kind::partition_key) .with_column("v1", bytes_type, column_kind::regular_column) .with_column("v2", bytes_type, column_kind::regular_column) .with_column("v3", bytes_type, column_kind::regular_column) .build(); run_test("Small partitions, no overwrites", s, [&] { auto pk = dht::global_partitioner().decorate_key(*s, partition_key::from_single_value(*s, data_value(utils::UUID_gen::get_time_UUID()).serialize())); mutation m(s, pk); auto val = data_value(bytes(bytes::initialized_later(), cell_size)); m.set_clustered_cell(clustering_key::make_empty(), "v1", val, api::new_timestamp()); m.set_clustered_cell(clustering_key::make_empty(), "v2", val, api::new_timestamp()); m.set_clustered_cell(clustering_key::make_empty(), "v3", val, api::new_timestamp()); return m; }); } void test_partition_with_lots_of_small_rows() { auto s = schema_builder("ks", "cf") .with_column("pk", uuid_type, column_kind::partition_key) .with_column("ck", reversed_type_impl::get_instance(int32_type), column_kind::clustering_key) .with_column("v1", bytes_type, column_kind::regular_column) .with_column("v2", bytes_type, column_kind::regular_column) .with_column("v3", bytes_type, column_kind::regular_column) .build(); auto pk = dht::global_partitioner().decorate_key(*s, partition_key::from_single_value(*s, data_value(utils::UUID_gen::get_time_UUID()).serialize())); int ck_idx = 0; run_test("Large partition, lots of small rows", s, [&] { mutation m(s, pk); auto val = data_value(bytes(bytes::initialized_later(), cell_size)); auto ck = clustering_key::from_single_value(*s, data_value(ck_idx++).serialize()); m.set_clustered_cell(ck, "v1", val, api::new_timestamp()); m.set_clustered_cell(ck, "v2", val, api::new_timestamp()); m.set_clustered_cell(ck, "v3", val, api::new_timestamp()); return m; }); } void test_partition_with_few_small_rows() { auto s = schema_builder("ks", "cf") .with_column("pk", uuid_type, column_kind::partition_key) .with_column("ck", reversed_type_impl::get_instance(int32_type), column_kind::clustering_key) .with_column("v1", bytes_type, column_kind::regular_column) .with_column("v2", bytes_type, column_kind::regular_column) .with_column("v3", bytes_type, column_kind::regular_column) .build(); run_test("Small partition with a few rows", s, [&] { auto pk = dht::global_partitioner().decorate_key(*s, partition_key::from_single_value(*s, data_value(utils::UUID_gen::get_time_UUID()).serialize())); mutation m(s, pk); auto val = data_value(bytes(bytes::initialized_later(), cell_size)); for (int i = 0; i < 3; ++i) { auto ck = clustering_key::from_single_value(*s, data_value(i).serialize()); m.set_clustered_cell(ck, "v1", val, api::new_timestamp()); m.set_clustered_cell(ck, "v2", val, api::new_timestamp()); m.set_clustered_cell(ck, "v3", val, api::new_timestamp()); } return m; }); } void test_partition_with_lots_of_range_tombstones() { auto s = schema_builder("ks", "cf") .with_column("pk", uuid_type, column_kind::partition_key) .with_column("ck", reversed_type_impl::get_instance(int32_type), column_kind::clustering_key) .with_column("v1", bytes_type, column_kind::regular_column) .with_column("v2", bytes_type, column_kind::regular_column) .with_column("v3", bytes_type, column_kind::regular_column) .build(); auto pk = dht::global_partitioner().decorate_key(*s, partition_key::from_single_value(*s, data_value(utils::UUID_gen::get_time_UUID()).serialize())); int ck_idx = 0; run_test("Large partition, lots of range tombstones", s, [&] { mutation m(s, pk); auto val = data_value(bytes(bytes::initialized_later(), cell_size)); auto ck = clustering_key::from_single_value(*s, data_value(ck_idx++).serialize()); auto r = query::clustering_range::make({ck}, {ck}); tombstone tomb(api::new_timestamp(), gc_clock::now()); m.partition().apply_row_tombstone(*s, range_tombstone(bound_view::from_range_start(r), bound_view::from_range_end(r), tomb)); return m; }); } int main(int argc, char** argv) { app_template app; return app.run(argc, argv, [&app] { return seastar::async([&] { engine().at_exit([] { cancelled = true; return make_ready_future(); }); logalloc::prime_segment_pool(memory::stats().total_memory(), memory::min_free_memory()).get(); test_small_partitions(); test_partition_with_few_small_rows(); test_partition_with_lots_of_small_rows(); test_partition_with_lots_of_range_tombstones(); }); }); }