/* * Copyright (C) 2015 ScyllaDB */ /* * This file is part of Scylla. * * Scylla is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Scylla is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Scylla. If not, see . */ #include "schema_builder.hh" #include "mutation_reader_assertions.hh" #include "mutation_source_test.hh" // partitions must be sorted by decorated key static void require_no_token_duplicates(const std::vector& partitions) { std::experimental::optional last_token; for (auto&& p : partitions) { const dht::decorated_key& key = p.decorated_key(); if (last_token && key.token() == *last_token) { BOOST_FAIL("token duplicate detected"); } last_token = key.token(); } } static void test_range_queries(populate_fn populate) { BOOST_TEST_MESSAGE("Testing range queries"); auto s = schema_builder("ks", "cf") .with_column("key", bytes_type, column_kind::partition_key) .with_column("v", bytes_type) .build(); auto make_partition_mutation = [s] (bytes key) -> mutation { mutation m(partition_key::from_single_value(*s, key), s); m.set_clustered_cell(clustering_key::make_empty(), "v", data_value(bytes("v1")), 1); return m; }; int partition_count = 300; std::vector partitions; for (int i = 0; i < partition_count; ++i) { partitions.emplace_back( make_partition_mutation(to_bytes(sprint("key_%d", i)))); } std::sort(partitions.begin(), partitions.end(), mutation_decorated_key_less_comparator()); require_no_token_duplicates(partitions); dht::decorated_key key_before_all = partitions.front().decorated_key(); partitions.erase(partitions.begin()); dht::decorated_key key_after_all = partitions.back().decorated_key(); partitions.pop_back(); auto ds = populate(s, partitions); auto test_slice = [&] (query::range r) { BOOST_TEST_MESSAGE(sprint("Testing range %s", r)); assert_that(ds(s, r)) .produces(slice(partitions, r)) .produces_end_of_stream(); }; auto inclusive_token_range = [&] (size_t start, size_t end) { return query::partition_range::make( {dht::ring_position::starting_at(partitions[start].token())}, {dht::ring_position::ending_at(partitions[end].token())}); }; test_slice(query::partition_range::make( {key_before_all, true}, {partitions.front().decorated_key(), true})); test_slice(query::partition_range::make( {key_before_all, false}, {partitions.front().decorated_key(), true})); test_slice(query::partition_range::make( {key_before_all, false}, {partitions.front().decorated_key(), false})); test_slice(query::partition_range::make( {dht::ring_position::starting_at(key_before_all.token())}, {dht::ring_position::ending_at(partitions.front().token())})); test_slice(query::partition_range::make( {dht::ring_position::ending_at(key_before_all.token())}, {dht::ring_position::ending_at(partitions.front().token())})); test_slice(query::partition_range::make( {dht::ring_position::ending_at(key_before_all.token())}, {dht::ring_position::starting_at(partitions.front().token())})); test_slice(query::partition_range::make( {partitions.back().decorated_key(), true}, {key_after_all, true})); test_slice(query::partition_range::make( {partitions.back().decorated_key(), true}, {key_after_all, false})); test_slice(query::partition_range::make( {partitions.back().decorated_key(), false}, {key_after_all, false})); test_slice(query::partition_range::make( {dht::ring_position::starting_at(partitions.back().token())}, {dht::ring_position::ending_at(key_after_all.token())})); test_slice(query::partition_range::make( {dht::ring_position::starting_at(partitions.back().token())}, {dht::ring_position::starting_at(key_after_all.token())})); test_slice(query::partition_range::make( {dht::ring_position::ending_at(partitions.back().token())}, {dht::ring_position::starting_at(key_after_all.token())})); test_slice(query::partition_range::make( {partitions[0].decorated_key(), false}, {partitions[1].decorated_key(), true})); test_slice(query::partition_range::make( {partitions[0].decorated_key(), true}, {partitions[1].decorated_key(), false})); test_slice(query::partition_range::make( {partitions[1].decorated_key(), true}, {partitions[3].decorated_key(), false})); test_slice(query::partition_range::make( {partitions[1].decorated_key(), false}, {partitions[3].decorated_key(), true})); test_slice(query::partition_range::make_ending_with( {partitions[3].decorated_key(), true})); test_slice(query::partition_range::make_starting_with( {partitions[partitions.size() - 4].decorated_key(), true})); test_slice(inclusive_token_range(0, 0)); test_slice(inclusive_token_range(1, 1)); test_slice(inclusive_token_range(2, 4)); test_slice(inclusive_token_range(127, 128)); test_slice(inclusive_token_range(128, 128)); test_slice(inclusive_token_range(128, 129)); test_slice(inclusive_token_range(127, 129)); test_slice(inclusive_token_range(partitions.size() - 1, partitions.size() - 1)); test_slice(inclusive_token_range(0, partitions.size() - 1)); test_slice(inclusive_token_range(0, partitions.size() - 2)); test_slice(inclusive_token_range(0, partitions.size() - 3)); test_slice(inclusive_token_range(0, partitions.size() - 128)); test_slice(inclusive_token_range(1, partitions.size() - 1)); test_slice(inclusive_token_range(2, partitions.size() - 1)); test_slice(inclusive_token_range(3, partitions.size() - 1)); test_slice(inclusive_token_range(128, partitions.size() - 1)); } void run_mutation_source_tests(populate_fn populate) { test_range_queries(populate); } struct mutation_sets { std::vector> equal; std::vector> unequal; mutation_sets(){} }; static tombstone new_tombstone() { return { api::new_timestamp(), gc_clock::now() }; } static mutation_sets generate_mutation_sets() { using mutations = std::vector; mutation_sets result; { auto common_schema = schema_builder("ks", "test") .with_column("pk_col", bytes_type, column_kind::partition_key) .with_column("ck_col_1", bytes_type, column_kind::clustering_key) .with_column("ck_col_2", bytes_type, column_kind::clustering_key) .with_column("regular_col_1", bytes_type) .with_column("regular_col_2", bytes_type) .with_column("static_col_1", bytes_type, column_kind::static_column) .with_column("static_col_2", bytes_type, column_kind::static_column); auto s1 = common_schema .with_column("regular_col_1_s1", bytes_type) // will have id in between common columns .build(); auto s2 = common_schema .with_column("regular_col_1_s2", bytes_type) // will have id in between common columns .build(); // Differing keys result.unequal.emplace_back(mutations{ mutation(partition_key::from_single_value(*s1, to_bytes("key1")), s1), mutation(partition_key::from_single_value(*s2, to_bytes("key2")), s2) }); auto m1 = mutation(partition_key::from_single_value(*s1, to_bytes("key1")), s1); auto m2 = mutation(partition_key::from_single_value(*s2, to_bytes("key1")), s2); result.equal.emplace_back(mutations{m1, m2}); clustering_key ck1 = clustering_key::from_deeply_exploded(*s1, {data_value(bytes("ck1_0")), data_value(bytes("ck1_1"))}); clustering_key ck2 = clustering_key::from_deeply_exploded(*s1, {data_value(bytes("ck2_0")), data_value(bytes("ck2_1"))}); auto ttl = gc_clock::duration(1); { auto tomb = new_tombstone(); m1.partition().apply(tomb); result.unequal.emplace_back(mutations{m1, m2}); m2.partition().apply(tomb); result.equal.emplace_back(mutations{m1, m2}); } { auto tomb = new_tombstone(); m1.partition().apply_delete(*s1, ck2, tomb); result.unequal.emplace_back(mutations{m1, m2}); m2.partition().apply_delete(*s1, ck2, tomb); result.equal.emplace_back(mutations{m1, m2}); } { auto tomb = new_tombstone(); auto key = clustering_key_prefix::from_deeply_exploded(*s1, {data_value(bytes("ck2_0"))}); m1.partition().apply_row_tombstone(*s1, key, tomb); result.unequal.emplace_back(mutations{m1, m2}); m2.partition().apply_row_tombstone(*s1, key, tomb); result.equal.emplace_back(mutations{m1, m2}); } { auto ts = api::new_timestamp(); m1.set_clustered_cell(ck1, "regular_col_1", data_value(bytes("regular_col_value")), ts, ttl); result.unequal.emplace_back(mutations{m1, m2}); m2.set_clustered_cell(ck1, "regular_col_1", data_value(bytes("regular_col_value")), ts, ttl); result.equal.emplace_back(mutations{m1, m2}); } { auto ts = api::new_timestamp(); m1.set_clustered_cell(ck1, "regular_col_2", data_value(bytes("regular_col_value")), ts, ttl); result.unequal.emplace_back(mutations{m1, m2}); m2.set_clustered_cell(ck1, "regular_col_2", data_value(bytes("regular_col_value")), ts, ttl); result.equal.emplace_back(mutations{m1, m2}); } { auto ts = api::new_timestamp(); m1.partition().apply_insert(*s1, ck2, ts); result.unequal.emplace_back(mutations{m1, m2}); m2.partition().apply_insert(*s1, ck2, ts); result.equal.emplace_back(mutations{m1, m2}); } { auto ts = api::new_timestamp(); m1.set_clustered_cell(ck2, "regular_col_1", data_value(bytes("ck2_regular_col_1_value")), ts); result.unequal.emplace_back(mutations{m1, m2}); m2.set_clustered_cell(ck2, "regular_col_1", data_value(bytes("ck2_regular_col_1_value")), ts); result.equal.emplace_back(mutations{m1, m2}); } { auto ts = api::new_timestamp(); m1.set_static_cell("static_col_1", data_value(bytes("static_col_value")), ts, ttl); result.unequal.emplace_back(mutations{m1, m2}); m2.set_static_cell("static_col_1", data_value(bytes("static_col_value")), ts, ttl); result.equal.emplace_back(mutations{m1, m2}); } { auto ts = api::new_timestamp(); m1.set_static_cell("static_col_2", data_value(bytes("static_col_value")), ts); result.unequal.emplace_back(mutations{m1, m2}); m2.set_static_cell("static_col_2", data_value(bytes("static_col_value")), ts); result.equal.emplace_back(mutations{m1, m2}); } { auto ts = api::new_timestamp(); m1.set_clustered_cell(ck2, "regular_col_1_s1", data_value(bytes("x")), ts); result.unequal.emplace_back(mutations{m1, m2}); m2.set_clustered_cell(ck2, "regular_col_1_s2", data_value(bytes("x")), ts); result.unequal.emplace_back(mutations{m1, m2}); } } { random_mutation_generator gen; for (int i = 0; i < 10; ++i) { auto m = gen(false); result.unequal.emplace_back(mutations{m, gen()}); // collision unlikely result.equal.emplace_back(mutations{m, m}); } } return result; } static const mutation_sets& get_mutation_sets() { static thread_local const auto ms = generate_mutation_sets(); return ms; } void for_each_mutation_pair(std::function callback) { auto&& ms = get_mutation_sets(); for (auto&& mutations : ms.equal) { auto i = mutations.begin(); assert(i != mutations.end()); const mutation& first = *i++; while (i != mutations.end()) { callback(first, *i, are_equal::yes); ++i; } } for (auto&& mutations : ms.unequal) { auto i = mutations.begin(); assert(i != mutations.end()); const mutation& first = *i++; while (i != mutations.end()) { callback(first, *i, are_equal::no); ++i; } } } void for_each_mutation(std::function callback) { auto&& ms = get_mutation_sets(); for (auto&& mutations : ms.equal) { for (auto&& m : mutations) { callback(m); } } for (auto&& mutations : ms.unequal) { for (auto&& m : mutations) { callback(m); } } } bytes make_blob(size_t blob_size) { static thread_local std::independent_bits_engine random_bytes; bytes big_blob(bytes::initialized_later(), blob_size); for (auto&& b : big_blob) { b = random_bytes(); } return big_blob; }; class random_mutation_generator::impl { friend class random_mutation_generator; const size_t _external_blob_size = 128; // Should be enough to force use of external bytes storage const column_id column_count = row::max_vector_size * 2; std::mt19937 _gen; schema_ptr _schema; std::vector _blobs; static gc_clock::time_point expiry_dist(auto& gen) { static thread_local std::uniform_int_distribution dist(0, 2); return gc_clock::time_point() + std::chrono::seconds(dist(gen)); } public: schema_ptr make_schema() { auto builder = schema_builder("ks", "cf") .with_column("pk", bytes_type, column_kind::partition_key) .with_column("ck1", bytes_type, column_kind::clustering_key) .with_column("ck2", bytes_type, column_kind::clustering_key); // Create enough columns so that row can overflow its vector storage for (column_id i = 0; i < column_count; ++i) { { auto column_name = sprint("v%d", i); builder.with_column(to_bytes(column_name), bytes_type, column_kind::regular_column); } { auto column_name = sprint("s%d", i); builder.with_column(to_bytes(column_name), bytes_type, column_kind::static_column); } } return builder.build(); } impl() { _schema = make_schema(); for (int i = 0; i < 1024; ++i) { _blobs.emplace_back(make_blob(_external_blob_size)); } std::random_device rd; // In case of errors, replace the seed with a fixed value to get a deterministic run. auto seed = rd(); BOOST_TEST_MESSAGE(sprint("Random seed: %s", seed)); _gen = std::mt19937(seed); } mutation operator()(bool range_deletes) { std::uniform_int_distribution column_count_dist(1, column_count); std::uniform_int_distribution column_id_dist(0, column_count - 1); std::uniform_int_distribution value_blob_index_dist(0, 2); std::normal_distribution<> ck_index_dist(_blobs.size() / 2, 1.5); std::uniform_int_distribution bool_dist(0, 1); std::uniform_int_distribution timestamp_dist(api::min_timestamp, api::min_timestamp + 2); // 3 values auto pkey = partition_key::from_single_value(*_schema, _blobs[0]); mutation m(pkey, _schema); auto set_random_cells = [&] (row& r, column_kind kind) { auto columns_to_set = column_count_dist(_gen); for (column_id i = 0; i < columns_to_set; ++i) { // FIXME: generate expiring cells auto cell = bool_dist(_gen) ? atomic_cell::make_live(timestamp_dist(_gen), _blobs[value_blob_index_dist(_gen)]) : atomic_cell::make_dead(timestamp_dist(_gen), expiry_dist(_gen)); r.apply(_schema->column_at(kind, column_id_dist(_gen)), std::move(cell)); } }; auto random_tombstone = [&] { return tombstone(timestamp_dist(_gen), expiry_dist(_gen)); }; auto random_row_marker = [&] { static thread_local std::uniform_int_distribution dist(0, 3); switch (dist(_gen)) { case 0: return row_marker(); case 1: return row_marker(random_tombstone()); case 2: return row_marker(timestamp_dist(_gen)); case 3: return row_marker(timestamp_dist(_gen), std::chrono::seconds(1), expiry_dist(_gen)); default: assert(0); } }; if (bool_dist(_gen)) { m.partition().apply(random_tombstone()); } set_random_cells(m.partition().static_row(), column_kind::static_column); auto random_blob = [&] { return _blobs[std::min(_blobs.size() - 1, static_cast(std::max(0.0, ck_index_dist(_gen))))]; }; auto row_count_dist = [&] (auto& gen) { static thread_local std::normal_distribution<> dist(32, 1.5); return static_cast(std::min(100.0, std::max(0.0, dist(gen)))); }; size_t row_count = row_count_dist(_gen); for (size_t i = 0; i < row_count; ++i) { auto ckey = clustering_key::from_exploded(*_schema, {random_blob(), random_blob()}); deletable_row& row = m.partition().clustered_row(ckey); set_random_cells(row.cells(), column_kind::regular_column); row.marker() = random_row_marker(); } size_t range_tombstone_count = row_count_dist(_gen); for (size_t i = 0; i < range_tombstone_count; ++i) { auto&& start = clustering_key::from_exploded(*_schema, {random_blob()}); if (range_deletes) { clustering_key_prefix::less_compare less(*_schema); auto end = clustering_key::from_exploded(*_schema, {random_blob()}); if (less(end, start)) { std::swap(start, end); } m.partition().apply_row_tombstone(*_schema, range_tombstone(std::move(start), std::move(end), random_tombstone())); } else { m.partition().apply_row_tombstone(*_schema, start, random_tombstone()); } } return m; } }; random_mutation_generator::~random_mutation_generator() {} random_mutation_generator::random_mutation_generator() : _impl(std::make_unique()) { } mutation random_mutation_generator::operator()(bool range_deletes) { return (*_impl)(range_deletes); } schema_ptr random_mutation_generator::schema() const { return _impl->_schema; }