Files
scylladb/tests/memory_footprint.cc
Avi Kivity 9b21a9bfb6 Merge "Implement partial cache" from Tomasz and Piotr
"This series enables cache to keep partial partitions.
Reads no longer have to read whole partition from sstables
in order to cache the result.

The 10MB threshold for partition size in cache is lifted.

Known issues:

 - There is no partial eviction yet, whole partitions are still evicted,
   and partition snapshots held by active reads are not evictable at all
 - Information about range continuity is not recorded if that
   would require inserting a dummy entry, or if previous entry
   doesn't belong to the latest snapshot
 - Cache update after memtable flush happening concurrently with reads
   may inhibit that reads' ability to populate cache (new issue)
 - Cache update from flushed memtables has partition granularity,
   so may cause latency problems with large partition
 - Schema is still tracked per-partition, so after schema changes
   reads may induce high latency due to whole partition needing
   to be converted atomically
 - Range tombstones are repeated in the stream for every range between
   cache entries they cover (new issue)
 - Populating scans for both small and large partitions (perf_fast_forward)
   experienced a 40% reduction of throughput, CPU bound

How was this tested:

 - test.py --mode release
 - row_cache_stress_test -c1 -m1G
 - perf_fast_forward, passes except for the test case checking range continuity population
   which would require inserting a dummy entry (mentioned above)
 - perf_simple_query (-c1 -m1G --duration 32):
     before: 90k [ops/s] stdev: 4k [ops/s]
     after:  94k [ops/s] stdev: 2k [ops/s]"

* tag 'tgrabiec/introduce-partial-cache-v8' of github.com:cloudius-systems/seastar-dev: (130 commits)
  tests: row_cache: Add test_tombstone_merging_in_partial_partition test case
  tests: Introduce row_cache_stress_test
  utils: Add helpers for dealing with nonwrapping_range<int>
  tests: simple_schema: Allow passing the tombstone to make_range_tombstone()
  tests: simple_schema: Accept value by reference
  tests: simple_schema: Make add_row() accept optional timestamp
  tests: simple_schema: Make new_timestamp() public
  tests: simple_schema: Introduce make_ckeys()
  tests: simple_schema: Introduce get_value(const clustered_row&) helper
  tests: simple_schema: Fix comment
  tests: simple_schema: Add missing include
  row_cache: Introduce evict()
  tests: Add cache_streamed_mutation_test
  tests: mutation_assertions: Allow expecting fragments
  mutation_fragment: Implement equality check
  tests: row_cache: Add test for population of random partitions
  tests: row_cache: Add test for partition tombstone population
  tests: row_cache: Test reading randomly populated partition
  tests: row_cache: Add test_single_partition_update()
  tests: row_cache: Add test_scan_with_partial_partitions
  ...
2017-06-26 14:54:37 +03:00

245 lines
8.9 KiB
C++

/*
* Copyright (C) 2015 ScyllaDB
*/
/*
* This file is part of Scylla.
*
* Scylla is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Scylla is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
*/
#include <boost/range/irange.hpp>
#include <seastar/util/defer.hh>
#include <seastar/core/app-template.hh>
#include <seastar/core/thread.hh>
#include "partition_slice_builder.hh"
#include "schema_builder.hh"
#include "memtable.hh"
#include "row_cache.hh"
#include "frozen_mutation.hh"
#include "tmpdir.hh"
#include "sstables/sstables.hh"
#include "canonical_mutation.hh"
#include "memtable-sstable.hh"
#include "disk-error-handler.hh"
#include "cql_test_env.hh"
thread_local disk_error_signal_type commit_error;
thread_local disk_error_signal_type general_disk_error;
class size_calculator {
class nest {
public:
static thread_local int level;
nest() { ++level; }
~nest() { --level; }
};
static std::string prefix() {
std::string s(" ");
for (int i = 0; i < nest::level; ++i) {
s += "-- ";
}
return s;
}
public:
static void print_cache_entry_size() {
std::cout << prefix() << "sizeof(cache_entry) = " << sizeof(cache_entry) << "\n";
{
nest n;
std::cout << prefix() << "sizeof(decorated_key) = " << sizeof(dht::decorated_key) << "\n";
std::cout << prefix() << "sizeof(lru_link_type) = " << sizeof(cache_entry::lru_link_type) << "\n";
std::cout << prefix() << "sizeof(cache_link_type) = " << sizeof(cache_entry::cache_link_type) << "\n";
print_mutation_partition_size();
}
std::cout << "\n";
std::cout << prefix() << "sizeof(rows_entry) = " << sizeof(rows_entry) << "\n";
std::cout << prefix() << "sizeof(deletable_row) = " << sizeof(deletable_row) << "\n";
std::cout << prefix() << "sizeof(row) = " << sizeof(row) << "\n";
std::cout << prefix() << "sizeof(atomic_cell_or_collection) = " << sizeof(atomic_cell_or_collection) << "\n";
}
static void print_mutation_partition_size() {
std::cout << prefix() << "sizeof(mutation_partition) = " << sizeof(mutation_partition) << "\n";
{
nest n;
std::cout << prefix() << "sizeof(_static_row) = " << sizeof(mutation_partition::_static_row) << "\n";
std::cout << prefix() << "sizeof(_rows) = " << sizeof(mutation_partition::_rows) << "\n";
std::cout << prefix() << "sizeof(_row_tombstones) = " << sizeof(mutation_partition::_row_tombstones) <<
"\n";
}
}
};
thread_local int size_calculator::nest::level = 0;
static schema_ptr cassandra_stress_schema() {
return schema_builder("ks", "cf")
.with_column("KEY", bytes_type, column_kind::partition_key)
.with_column("C0", bytes_type)
.with_column("C1", bytes_type)
.with_column("C2", bytes_type)
.with_column("C3", bytes_type)
.with_column("C4", bytes_type)
.build();
}
[[gnu::unused]]
static mutation make_cs_mutation() {
auto s = cassandra_stress_schema();
mutation m(partition_key::from_single_value(*s, bytes_type->from_string("4b343050393536353531")), s);
for (auto&& col : s->regular_columns()) {
m.set_clustered_cell(clustering_key::make_empty(), col,
atomic_cell::make_live(1, bytes_type->from_string("8f75da6b3dcec90c8a404fb9a5f6b0621e62d39c69ba5758e5f41b78311fbb26cc7a")));
}
return m;
}
bytes random_bytes(size_t size) {
bytes result(bytes::initialized_later(), size);
for (size_t i = 0; i < size; ++i) {
result[i] = std::rand() % std::numeric_limits<uint8_t>::max();
}
return result;
}
sstring random_string(size_t size) {
sstring result(sstring::initialized_later(), size);
static const char chars[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
for (size_t i = 0; i < size; ++i) {
result[i] = chars[std::rand() % sizeof(chars)];
}
return result;
}
struct mutation_settings {
size_t column_count;
size_t column_name_size;
size_t row_count;
size_t partition_key_size;
size_t clustering_key_size;
size_t data_size;
};
static mutation make_mutation(mutation_settings settings) {
auto builder = schema_builder("ks", "cf")
.with_column("pk", bytes_type, column_kind::partition_key)
.with_column("ck", bytes_type, column_kind::clustering_key);
for (size_t i = 0; i < settings.column_count; ++i) {
builder.with_column(to_bytes(random_string(settings.column_name_size)), bytes_type);
}
auto s = builder.build();
mutation m(partition_key::from_single_value(*s, bytes_type->decompose(data_value(random_bytes(settings.partition_key_size)))), s);
for (size_t i = 0; i < settings.row_count; ++i) {
auto ck = clustering_key::from_single_value(*s, bytes_type->decompose(data_value(random_bytes(settings.clustering_key_size))));
for (auto&& col : s->regular_columns()) {
m.set_clustered_cell(ck, col,
atomic_cell::make_live(1,
bytes_type->decompose(data_value(random_bytes(settings.data_size)))));
}
}
return m;
}
struct sizes {
size_t memtable;
size_t cache;
size_t sstable;
size_t frozen;
size_t canonical;
size_t query_result;
};
static sizes calculate_sizes(const mutation& m) {
sizes result;
auto s = m.schema();
auto mt = make_lw_shared<memtable>(s);
cache_tracker tracker;
row_cache cache(s, make_empty_snapshot_source(), tracker);
auto cache_initial_occupancy = tracker.region().occupancy().used_space();
assert(mt->occupancy().used_space() == 0);
mt->apply(m);
cache.populate(m);
result.memtable = mt->occupancy().used_space();
result.cache = tracker.region().occupancy().used_space() - cache_initial_occupancy;
result.frozen = freeze(m).representation().size();
result.canonical = canonical_mutation(m).representation().size();
result.query_result = m.query(partition_slice_builder(*s).build(), query::result_request::only_result).buf().size();
tmpdir sstable_dir;
auto sst = make_lw_shared<sstables::sstable>(s,
sstable_dir.path,
1 /* generation */,
sstables::sstable::version_types::la,
sstables::sstable::format_types::big);
write_memtable_to_sstable(*mt, sst).get();
sst->load().get();
result.sstable = sst->data_size();
return result;
}
int main(int argc, char** argv) {
namespace bpo = boost::program_options;
app_template app;
app.add_options()
("column-count", bpo::value<size_t>()->default_value(5), "column count")
("column-name-size", bpo::value<size_t>()->default_value(2), "column name size")
("row-count", bpo::value<size_t>()->default_value(1), "row count")
("partition-key-size", bpo::value<size_t>()->default_value(10), "partition key size")
("clustering-key-size", bpo::value<size_t>()->default_value(10), "clustering key size")
("data-size", bpo::value<size_t>()->default_value(32), "cell data size");
return app.run(argc, argv, [&] {
return do_with_cql_env([&] (auto&& env) {
return seastar::async([&] {
mutation_settings settings;
settings.column_count = app.configuration()["column-count"].as<size_t>();
settings.column_name_size = app.configuration()["column-name-size"].as<size_t>();
settings.row_count = app.configuration()["row-count"].as<size_t>();
settings.partition_key_size = app.configuration()["partition-key-size"].as<size_t>();
settings.clustering_key_size = app.configuration()["clustering-key-size"].as<size_t>();
settings.data_size = app.configuration()["data-size"].as<size_t>();
auto m = make_mutation(settings);
auto sizes = calculate_sizes(m);
std::cout << "mutation footprint:" << "\n";
std::cout << " - in cache: " << sizes.cache << "\n";
std::cout << " - in memtable: " << sizes.memtable << "\n";
std::cout << " - in sstable: " << sizes.sstable << "\n";
std::cout << " - frozen: " << sizes.frozen << "\n";
std::cout << " - canonical: " << sizes.canonical << "\n";
std::cout << " - query result: " << sizes.query_result << "\n";
std::cout << "\n";
size_calculator::print_cache_entry_size();
});
});
});
}