/* * Copyright (C) 2019-present ScyllaDB */ /* * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0 */ #include #include #include #include "cql3/cql3_type.hh" #include "cql3/description.hh" #include "mutation/mutation.hh" #include "schema/schema_builder.hh" #include "test/lib/cql_test_env.hh" #include "test/lib/eventually.hh" #include "test/lib/random_schema.hh" #include "test/lib/random_utils.hh" #include "types/list.hh" #include "types/map.hh" #include "types/set.hh" #include "types/tuple.hh" #include "types/user.hh" #include "utils/assert.hh" #include "utils/big_decimal.hh" #include "utils/UUID_gen.hh" #include "replica/schema_describe_helper.hh" namespace tests { type_generator::type_generator(random_schema_specification& spec) : _spec(spec) { struct simple_type_generator { data_type type; data_type operator()(std::mt19937&, is_multi_cell) { return type; } }; _generators = { simple_type_generator{byte_type}, simple_type_generator{short_type}, simple_type_generator{int32_type}, simple_type_generator{long_type}, simple_type_generator{ascii_type}, simple_type_generator{bytes_type}, simple_type_generator{utf8_type}, simple_type_generator{boolean_type}, simple_type_generator{date_type}, simple_type_generator{timeuuid_type}, simple_type_generator{timestamp_type}, simple_type_generator{simple_date_type}, simple_type_generator{time_type}, simple_type_generator{uuid_type}, simple_type_generator{inet_addr_type}, simple_type_generator{float_type}, simple_type_generator{double_type}, simple_type_generator{varint_type}, simple_type_generator{decimal_type}, simple_type_generator{duration_type}}; // tuple _generators.emplace_back( [this] (std::mt19937& engine, is_multi_cell) { std::uniform_int_distribution count_dist{2, 4}; const auto count = count_dist(engine); std::vector data_types; for (size_t i = 0; i < count; ++i) { data_types.emplace_back((*this)(engine, type_generator::is_multi_cell::no)); } return tuple_type_impl::get_instance(std::move(data_types)); }); // user _generators.emplace_back( [this] (std::mt19937& engine, is_multi_cell multi_cell) mutable { std::uniform_int_distribution count_dist{2, 4}; const auto count = count_dist(engine); std::vector field_names; std::vector field_types; for (size_t i = 0; i < count; ++i) { field_names.emplace_back(to_bytes(format("f{}", i))); field_types.emplace_back((*this)(engine, type_generator::is_multi_cell::no)); } return user_type_impl::get_instance(_spec.keyspace_name(), to_bytes(_spec.udt_name(engine)), std::move(field_names), std::move(field_types), bool(multi_cell)); }); // list _generators.emplace_back( [this] (std::mt19937& engine, is_multi_cell multi_cell) { auto element_type = (*this)(engine, type_generator::is_multi_cell::no); return list_type_impl::get_instance(std::move(element_type), bool(multi_cell)); }); // set _generators.emplace_back( [this] (std::mt19937& engine, is_multi_cell multi_cell) { auto element_type = (*this)(engine, type_generator::is_multi_cell::no); return set_type_impl::get_instance(std::move(element_type), bool(multi_cell)); }); // map _generators.emplace_back( [this] (std::mt19937& engine, is_multi_cell multi_cell) { auto key_type = (*this)(engine, type_generator::is_multi_cell::no); auto value_type = (*this)(engine, type_generator::is_multi_cell::no); return map_type_impl::get_instance(std::move(key_type), std::move(value_type), bool(multi_cell)); }); } data_type type_generator::operator()(std::mt19937& engine, is_multi_cell multi_cell) { auto dist = std::uniform_int_distribution(0, _generators.size() - 1); auto type = _generators.at(dist(engine))(engine, multi_cell); // duration type is not allowed in: // * primary key components // * as member types of collections // // To cover all this, we simply disallow it altogether when multi_cell is // no, which will be the case in all the above cases. // // We also disallow boolean type in keys, due to the poor value distribution // it provides. Generating keys which have a boolean in it, are prone to // collision and will result in poor cardinality. while (!multi_cell && (type == duration_type || type == boolean_type)) { type = (*this)(engine, multi_cell); } return type; } namespace { class default_random_schema_specification : public random_schema_specification { std::unordered_set _used_table_ids; std::unordered_set _used_udt_ids; std::uniform_int_distribution _partition_column_count_dist; std::uniform_int_distribution _clustering_column_count_dist; std::uniform_int_distribution _regular_column_count_dist; std::uniform_int_distribution _static_column_count_dist; type_generator _type_generator; compress_sstable _compress; private: static unsigned generate_unique_id(std::mt19937& engine, std::unordered_set& used_ids) { std::uniform_int_distribution id_dist(0, 1024); unsigned id; do { id = id_dist(engine); } while (used_ids.contains(id)); used_ids.insert(id); return id; } std::vector generate_types(std::mt19937& engine, std::uniform_int_distribution& count_dist, type_generator::is_multi_cell multi_cell, bool allow_reversed = false) { std::uniform_int_distribution reversed_dist{0, uint8_t(allow_reversed)}; std::uniform_int_distribution multi_cell_dist{0, uint8_t(bool(multi_cell))}; std::vector types; const auto count = count_dist(engine); for (size_t c = 0; c < count; ++c) { auto type = _type_generator(engine, type_generator::is_multi_cell(bool(multi_cell_dist(engine)))); if (reversed_dist(engine)) { types.emplace_back(make_shared(std::move(type))); } else { types.emplace_back(std::move(type)); } } return types; } public: default_random_schema_specification( sstring keyspace_name, std::uniform_int_distribution partition_column_count_dist, std::uniform_int_distribution clustering_column_count_dist, std::uniform_int_distribution regular_column_count_dist, std::uniform_int_distribution static_column_count_dist, compress_sstable compress) : random_schema_specification(std::move(keyspace_name)) , _partition_column_count_dist(partition_column_count_dist) , _clustering_column_count_dist(clustering_column_count_dist) , _regular_column_count_dist(regular_column_count_dist) , _static_column_count_dist(static_column_count_dist) , _type_generator(*this) , _compress(compress) { SCYLLA_ASSERT(_partition_column_count_dist.a() > 0); } virtual sstring table_name(std::mt19937& engine) override { return format("table{}", generate_unique_id(engine, _used_table_ids)); } virtual sstring udt_name(std::mt19937& engine) override { return format("udt{}", generate_unique_id(engine, _used_udt_ids)); } virtual std::vector partition_key_columns(std::mt19937& engine) override { return generate_types(engine, _partition_column_count_dist, type_generator::is_multi_cell::no, false); } virtual std::vector clustering_key_columns(std::mt19937& engine) override { return generate_types(engine, _clustering_column_count_dist, type_generator::is_multi_cell::no, true); } virtual std::vector regular_columns(std::mt19937& engine) override { return generate_types(engine, _regular_column_count_dist, type_generator::is_multi_cell::yes, false); } virtual std::vector static_columns(std::mt19937& engine) override { return generate_types(engine, _static_column_count_dist, type_generator::is_multi_cell::yes, false); } virtual compress_sstable& compress() override { return _compress; } }; } // anonymous namespace std::unique_ptr make_random_schema_specification( sstring keyspace_name, std::uniform_int_distribution partition_column_count_dist, std::uniform_int_distribution clustering_column_count_dist, std::uniform_int_distribution regular_column_count_dist, std::uniform_int_distribution static_column_count_dist, random_schema_specification::compress_sstable compress) { return std::make_unique(std::move(keyspace_name), partition_column_count_dist, clustering_column_count_dist, regular_column_count_dist, static_column_count_dist, compress); } namespace { utils::multiprecision_int generate_multiprecision_integer_value(std::mt19937& engine, size_t min_size_in_bytes, size_t max_size_in_bytes) { using utils::multiprecision_int; const auto max_bytes = std::min(size_t(16), std::max(size_t(2), max_size_in_bytes) - 1); const auto generate_int = [] (std::mt19937& engine, size_t max_bytes) { if (max_bytes == 8) { return multiprecision_int(random::get_int(engine)); } else { // max_bytes < 8 return multiprecision_int(random::get_int(0, (uint64_t(1) << (max_bytes * 8)) - uint64_t(1), engine)); } }; if (max_bytes <= 8) { return generate_int(engine, max_bytes); } else { // max_bytes > 8 auto ls = multiprecision_int(generate_int(engine, 8)); auto ms = multiprecision_int(generate_int(engine, max_bytes - 8)); return multiprecision_int(ls) + (multiprecision_int(ms) << 64); } } template String generate_string_value(std::mt19937& engine, typename String::value_type min, typename String::value_type max, size_t min_size_in_bytes, size_t max_size_in_bytes) { auto size_dist = random::stepped_int_distribution{{ {95.0, { 0, 31}}, { 4.5, { 32, 99}}, { 0.4, { 100, 999}}, { 0.1, {1000, 9999}}}}; auto char_dist = std::uniform_int_distribution(min, max); const auto size = std::clamp( size_dist(engine), min_size_in_bytes / sizeof(typename String::value_type), max_size_in_bytes / sizeof(typename String::value_type)); String str(size, '\0'); for (size_t i = 0; i < size; ++i) { str[i] = char_dist(engine); } return str; } std::vector generate_frozen_tuple_values(std::mt19937& engine, value_generator& val_gen, const std::vector& member_types, size_t min_size_in_bytes, size_t max_size_in_bytes) { std::vector values; values.reserve(member_types.size()); const auto member_min_size_in_bytes = min_size_in_bytes / member_types.size(); const auto member_max_size_in_bytes = max_size_in_bytes / member_types.size(); for (auto member_type : member_types) { values.push_back(val_gen.generate_atomic_value(engine, *member_type, member_min_size_in_bytes, member_max_size_in_bytes)); } return values; } data_model::mutation_description::collection generate_user_value(std::mt19937& engine, const user_type_impl& type, value_generator& val_gen) { using md = data_model::mutation_description; // Non-null fields. auto fields_num = std::uniform_int_distribution(1, type.size())(engine); auto field_idxs = random::random_subset(type.size(), fields_num, engine); std::sort(field_idxs.begin(), field_idxs.end()); md::collection collection; for (auto i: field_idxs) { collection.elements.push_back({serialize_field_index(i), val_gen.generate_atomic_value(engine, *type.type(i), value_generator::no_size_in_bytes_limit).serialize_nonnull()}); } return collection; } data_model::mutation_description::collection generate_collection(std::mt19937& engine, const abstract_type& key_type, const abstract_type& value_type, value_generator& val_gen) { using md = data_model::mutation_description; auto key_generator = val_gen.get_atomic_value_generator(key_type); auto value_generator = val_gen.get_atomic_value_generator(value_type); auto size_dist = std::uniform_int_distribution(0, 16); const auto size = size_dist(engine); std::map collection{key_type.as_less_comparator()}; for (size_t i = 0; i < size; ++i) { collection.emplace(key_generator(engine, 0, value_generator::no_size_in_bytes_limit).serialize_nonnull(), value_generator(engine, 0, value_generator::no_size_in_bytes_limit).serialize().value_or("")); } md::collection flat_collection; flat_collection.elements.reserve(collection.size()); for (auto&& [key, value] : collection) { flat_collection.elements.emplace_back(md::collection_element{key, value}); } return flat_collection; } std::vector generate_frozen_list(std::mt19937& engine, const abstract_type& value_type, value_generator& val_gen, size_t min_size_in_bytes, size_t max_size_in_bytes) { auto value_generator = val_gen.get_atomic_value_generator(value_type); auto size_dist = std::uniform_int_distribution(0, 4); const auto size = std::min(size_dist(engine), max_size_in_bytes / std::max(val_gen.min_size(value_type), size_t(1))); std::vector collection; if (!size) { return collection; } const auto value_min_size_in_bytes = min_size_in_bytes / size; const auto value_max_size_in_bytes = max_size_in_bytes / size; for (size_t i = 0; i < size; ++i) { collection.emplace_back(value_generator(engine, value_min_size_in_bytes, value_max_size_in_bytes)); } return collection; } std::vector generate_frozen_set(std::mt19937& engine, const abstract_type& key_type, value_generator& val_gen, size_t min_size_in_bytes, size_t max_size_in_bytes) { auto key_generator = val_gen.get_atomic_value_generator(key_type); auto size_dist = std::uniform_int_distribution(0, 4); const auto size = std::min(size_dist(engine), max_size_in_bytes / std::max(val_gen.min_size(key_type), size_t(1))); std::map collection{key_type.as_less_comparator()}; std::vector flat_collection; if (!size) { return flat_collection; } const auto value_max_size_in_bytes = max_size_in_bytes / size; const auto value_min_size_in_bytes = min_size_in_bytes / size; for (size_t i = 0; i < size; ++i) { auto val = key_generator(engine, value_min_size_in_bytes, value_max_size_in_bytes); auto serialized_key = val.serialize_nonnull(); collection.emplace(std::move(serialized_key), std::move(val)); } flat_collection.reserve(collection.size()); for (auto&& element : collection) { flat_collection.emplace_back(std::move(element.second)); } return flat_collection; } std::vector> generate_frozen_map(std::mt19937& engine, const abstract_type& key_type, const abstract_type& value_type, value_generator& val_gen, size_t min_size_in_bytes, size_t max_size_in_bytes) { auto key_generator = val_gen.get_atomic_value_generator(key_type); auto value_generator = val_gen.get_atomic_value_generator(value_type); auto size_dist = std::uniform_int_distribution(0, 4); const auto min_item_size_in_bytes = val_gen.min_size(key_type) + val_gen.min_size(value_type); const auto size = std::min(size_dist(engine), max_size_in_bytes / std::max(min_item_size_in_bytes, size_t(1))); std::map, serialized_compare> collection(key_type.as_less_comparator()); std::vector> flat_collection; if (!size) { return flat_collection; } const auto item_max_size_in_bytes = max_size_in_bytes / size; const auto key_max_size_in_bytes = item_max_size_in_bytes / 2; const auto value_max_size_in_bytes = item_max_size_in_bytes / 2; const auto item_min_size_in_bytes = min_size_in_bytes / size; const auto key_min_size_in_bytes = item_min_size_in_bytes / 2; const auto value_min_size_in_bytes = item_min_size_in_bytes / 2; for (size_t i = 0; i < size; ++i) { auto key = key_generator(engine, key_min_size_in_bytes, key_max_size_in_bytes); auto serialized_key = key.serialize_nonnull(); auto value = value_generator(engine, value_min_size_in_bytes, value_max_size_in_bytes); collection.emplace(std::move(serialized_key), std::pair(std::move(key), std::move(value))); } flat_collection.reserve(collection.size()); for (auto&& element : collection) { flat_collection.emplace_back(std::move(element.second)); } return flat_collection; } data_value generate_empty_value(std::mt19937&, size_t, size_t) { return data_value::make_null(empty_type); } data_value generate_byte_value(std::mt19937& engine, size_t, size_t) { return data_value(random::get_int(engine)); } data_value generate_short_value(std::mt19937& engine, size_t, size_t) { return data_value(random::get_int(engine)); } data_value generate_int32_value(std::mt19937& engine, size_t, size_t) { return data_value(random::get_int(engine)); } data_value generate_long_value(std::mt19937& engine, size_t, size_t) { return data_value(random::get_int(engine)); } data_value generate_ascii_value(std::mt19937& engine, size_t min_size_in_bytes, size_t max_size_in_bytes) { return data_value(ascii_native_type{generate_string_value(engine, 0, 127, min_size_in_bytes, max_size_in_bytes)}); } data_value generate_bytes_value(std::mt19937& engine, size_t min_size_in_bytes, size_t max_size_in_bytes) { return data_value(generate_string_value(engine, std::numeric_limits::min(), std::numeric_limits::max(), min_size_in_bytes, max_size_in_bytes)); } data_value generate_utf8_value(std::mt19937& engine, size_t min_size_in_bytes, size_t max_size_in_bytes) { auto wstr = generate_string_value(engine, 0, 0x0FFF, min_size_in_bytes, max_size_in_bytes); std::locale locale("en_US.utf8"); using codec = std::codecvt; auto& f = std::use_facet(locale); sstring utf8_str(wstr.size() * f.max_length(), '\0'); const wchar_t* from_next; char* to_next; std::mbstate_t mb{}; auto res = f.out(mb, &wstr[0], &wstr[wstr.size()], from_next, &utf8_str[0], &utf8_str[utf8_str.size()], to_next); SCYLLA_ASSERT(res == codec::ok); utf8_str.resize(to_next - &utf8_str[0]); return data_value(std::move(utf8_str)); } data_value generate_boolean_value(std::mt19937& engine, size_t, size_t) { auto dist = std::uniform_int_distribution(0, 1); return data_value(bool(dist(engine))); } data_value generate_date_value(std::mt19937& engine, size_t, size_t) { using pt = db_clock::time_point; // Python driver can't tolerate dates above year 9999. constexpr auto max_day = std::chrono::sys_days(std::chrono::year{10000}/1/1); constexpr auto max = std::chrono::sys_time(max_day).time_since_epoch().count() - 1; auto x = random::get_int>(0, max, engine); return data_value(date_type_native_type{pt(pt::duration(x))}); } data_value generate_timeuuid_value(std::mt19937&, size_t, size_t) { // FIXME: respect the passed engine. auto b = tests::random::get_bytes(16); b[6] = (b[6] & 0x0F) | 0x10; // version 1 return timeuuid_type->deserialize(b); } data_value generate_timestamp_value(std::mt19937& engine, size_t, size_t) { using pt = db_clock::time_point; // Python driver can't tolerate dates above year 9999 or below year 1. constexpr auto min_day = std::chrono::sys_days(std::chrono::year{1}/1/1); constexpr auto max_day = std::chrono::sys_days(std::chrono::year{10000}/1/1); constexpr auto min = std::chrono::sys_time(min_day).time_since_epoch().count(); constexpr auto max = std::chrono::sys_time(max_day).time_since_epoch().count() - 1; auto x = random::get_int(min, max, engine); return data_value(pt(pt::duration(x))); } data_value generate_simple_date_value(std::mt19937& engine, size_t, size_t) { return data_value(simple_date_native_type{random::get_int(engine)}); } data_value generate_time_value(std::mt19937& engine, size_t, size_t) { // A legal `time` is smaller than the number of nanoseconds in a day. auto max = std::chrono::nanoseconds(std::chrono::days(1)).count() - 1; return data_value(time_native_type{random::get_int(0, max, engine)}); } data_value generate_uuid_value(std::mt19937& engine, size_t, size_t) { auto b = tests::random::get_bytes(16, engine); b[6] = (b[6] & 0x0F) | 0x40; // version 4 return data_value(uuid_type->deserialize(b)); } data_value generate_inet_addr_value(std::mt19937& engine, size_t, size_t) { return data_value(net::ipv4_address(random::get_int(engine))); } data_value generate_float_value(std::mt19937& engine, size_t, size_t) { return data_value(random::get_real(engine)); } data_value generate_double_value(std::mt19937& engine, size_t, size_t) { return data_value(random::get_real(engine)); } data_value generate_varint_value(std::mt19937& engine, size_t min_size_in_bytes, size_t max_size_in_bytes) { return data_value(generate_multiprecision_integer_value(engine, min_size_in_bytes, max_size_in_bytes)); } data_value generate_decimal_value(std::mt19937& engine, size_t min_size_in_bytes, size_t max_size_in_bytes) { auto scale_dist = std::uniform_int_distribution(-8, 8); return data_value(big_decimal(scale_dist(engine), generate_multiprecision_integer_value(engine, min_size_in_bytes - sizeof(int32_t), max_size_in_bytes - sizeof(int32_t)))); } data_value generate_duration_value(std::mt19937& engine, size_t, size_t) { auto months = months_counter(random::get_int(engine)); auto days = days_counter(random::get_int(0, 31, engine)); auto nanoseconds = nanoseconds_counter(random::get_int(86400000000000, engine)); return data_value(cql_duration{months, days, nanoseconds}); } data_value generate_frozen_tuple_value(std::mt19937& engine, const tuple_type_impl& type, value_generator& val_gen, size_t min_size_in_bytes, size_t max_size_in_bytes) { SCYLLA_ASSERT(!type.is_multi_cell()); return make_tuple_value(type.shared_from_this(), generate_frozen_tuple_values(engine, val_gen, type.all_types(), min_size_in_bytes, max_size_in_bytes)); } data_value generate_frozen_user_value(std::mt19937& engine, const user_type_impl& type, value_generator& val_gen, size_t min_size_in_bytes, size_t max_size_in_bytes) { SCYLLA_ASSERT(!type.is_multi_cell()); return make_user_value(type.shared_from_this(), generate_frozen_tuple_values(engine, val_gen, type.all_types(), min_size_in_bytes, max_size_in_bytes)); } data_model::mutation_description::collection generate_list_value(std::mt19937& engine, const list_type_impl& type, value_generator& val_gen) { SCYLLA_ASSERT(type.is_multi_cell()); return generate_collection(engine, *type.name_comparator(), *type.value_comparator(), val_gen); } data_value generate_frozen_list_value(std::mt19937& engine, const list_type_impl& type, value_generator& val_gen, size_t min_size_in_bytes, size_t max_size_in_bytes) { SCYLLA_ASSERT(!type.is_multi_cell()); return make_list_value(type.shared_from_this(), generate_frozen_list(engine, *type.get_elements_type(), val_gen, min_size_in_bytes, max_size_in_bytes)); } data_model::mutation_description::collection generate_set_value(std::mt19937& engine, const set_type_impl& type, value_generator& val_gen) { SCYLLA_ASSERT(type.is_multi_cell()); return generate_collection(engine, *type.name_comparator(), *type.value_comparator(), val_gen); } data_value generate_frozen_set_value(std::mt19937& engine, const set_type_impl& type, value_generator& val_gen, size_t min_size_in_bytes, size_t max_size_in_bytes) { SCYLLA_ASSERT(!type.is_multi_cell()); return make_set_value(type.shared_from_this(), generate_frozen_set(engine, *type.get_elements_type(), val_gen, min_size_in_bytes, max_size_in_bytes)); } data_model::mutation_description::collection generate_map_value(std::mt19937& engine, const map_type_impl& type, value_generator& val_gen) { SCYLLA_ASSERT(type.is_multi_cell()); return generate_collection(engine, *type.name_comparator(), *type.value_comparator(), val_gen); } data_value generate_frozen_map_value(std::mt19937& engine, const map_type_impl& type, value_generator& val_gen, size_t min_size_in_bytes, size_t max_size_in_bytes) { SCYLLA_ASSERT(!type.is_multi_cell()); return make_map_value(type.shared_from_this(), generate_frozen_map(engine, *type.get_keys_type(), *type.get_values_type(), val_gen, min_size_in_bytes, max_size_in_bytes)); } } // anonymous namespace data_value value_generator::generate_atomic_value(std::mt19937& engine, const abstract_type& type, size_t max_size_in_bytes) { return generate_atomic_value(engine, type, 0, max_size_in_bytes); } data_value value_generator::generate_atomic_value(std::mt19937& engine, const abstract_type& type, size_t min_size_in_bytes, size_t max_size_in_bytes) { SCYLLA_ASSERT(!type.is_multi_cell()); return get_atomic_value_generator(type)(engine, min_size_in_bytes, max_size_in_bytes); } value_generator::value_generator() : _regular_value_generators{ {empty_type.get(), &generate_empty_value}, {byte_type.get(), &generate_byte_value}, {short_type.get(), &generate_short_value}, {int32_type.get(), &generate_int32_value}, {long_type.get(), &generate_long_value}, {ascii_type.get(), &generate_ascii_value}, {bytes_type.get(), &generate_bytes_value}, {utf8_type.get(), &generate_utf8_value}, {boolean_type.get(), &generate_boolean_value}, {date_type.get(), &generate_date_value}, {timeuuid_type.get(), &generate_timeuuid_value}, {timestamp_type.get(), &generate_timestamp_value}, {simple_date_type.get(), &generate_simple_date_value}, {time_type.get(), &generate_time_value}, {uuid_type.get(), &generate_uuid_value}, {inet_addr_type.get(), &generate_inet_addr_value}, {float_type.get(), &generate_float_value}, {double_type.get(), &generate_double_value}, {varint_type.get(), &generate_varint_value}, {decimal_type.get(), &generate_decimal_value}, {duration_type.get(), &generate_duration_value}} { std::mt19937 engine; for (const auto& [regular_type, regular_value_gen] : _regular_value_generators) { _regular_value_min_sizes.emplace(regular_type, regular_value_gen(engine, size_t{}, size_t{}).serialized_size()); } } size_t value_generator::min_size(const abstract_type& type) { SCYLLA_ASSERT(!type.is_multi_cell()); auto it = _regular_value_min_sizes.find(&type); if (it != _regular_value_min_sizes.end()) { return it->second; } std::mt19937 engine; if (auto maybe_user_type = dynamic_cast(&type)) { return generate_frozen_user_value(engine, *maybe_user_type, *this, size_t{}, size_t{}).serialized_size(); } if (auto maybe_tuple_type = dynamic_cast(&type)) { return generate_frozen_tuple_value(engine, *maybe_tuple_type, *this, size_t{}, size_t{}).serialized_size(); } if (auto maybe_list_type = dynamic_cast(&type)) { return generate_frozen_list_value(engine, *maybe_list_type, *this, size_t{}, size_t{}).serialized_size(); } if (auto maybe_set_type = dynamic_cast(&type)) { return generate_frozen_set_value(engine, *maybe_set_type, *this, size_t{}, size_t{}).serialized_size(); } if (auto maybe_map_type = dynamic_cast(&type)) { return generate_frozen_map_value(engine, *maybe_map_type, *this, size_t{}, size_t{}).serialized_size(); } if (auto maybe_reversed_type = dynamic_cast(&type)) { return min_size(*maybe_reversed_type->underlying_type()); } throw std::runtime_error(fmt::format("Don't know how to calculate min size for unknown type {}", type.name())); } value_generator::atomic_value_generator value_generator::get_atomic_value_generator(const abstract_type& type) { SCYLLA_ASSERT(!type.is_multi_cell()); auto it = _regular_value_generators.find(&type); if (it != _regular_value_generators.end()) { return it->second; } if (auto maybe_user_type = dynamic_cast(&type)) { return [this, maybe_user_type] (std::mt19937& engine, size_t min_size_in_bytes, size_t max_size_in_bytes) { return generate_frozen_user_value(engine, *maybe_user_type, *this, min_size_in_bytes, max_size_in_bytes); }; } if (auto maybe_tuple_type = dynamic_cast(&type)) { return [this, maybe_tuple_type] (std::mt19937& engine, size_t min_size_in_bytes, size_t max_size_in_bytes) { return generate_frozen_tuple_value(engine, *maybe_tuple_type, *this, min_size_in_bytes, max_size_in_bytes); }; } if (auto maybe_list_type = dynamic_cast(&type)) { return [this, maybe_list_type] (std::mt19937& engine, size_t min_size_in_bytes, size_t max_size_in_bytes) { return generate_frozen_list_value(engine, *maybe_list_type, *this, min_size_in_bytes, max_size_in_bytes); }; } if (auto maybe_set_type = dynamic_cast(&type)) { return [this, maybe_set_type] (std::mt19937& engine, size_t min_size_in_bytes, size_t max_size_in_bytes) { return generate_frozen_set_value(engine, *maybe_set_type, *this, min_size_in_bytes, max_size_in_bytes); }; } if (auto maybe_map_type = dynamic_cast(&type)) { return [this, maybe_map_type] (std::mt19937& engine, size_t min_size_in_bytes, size_t max_size_in_bytes) { return generate_frozen_map_value(engine, *maybe_map_type, *this, min_size_in_bytes, max_size_in_bytes); }; } if (auto maybe_reversed_type = dynamic_cast(&type)) { return get_atomic_value_generator(*maybe_reversed_type->underlying_type()); } throw std::runtime_error(fmt::format("Don't know how to generate value for unknown type {}", type.name())); } value_generator::generator value_generator::get_generator(const abstract_type& type) { auto it = _regular_value_generators.find(&type); if (it != _regular_value_generators.end()) { return [gen = it->second] (std::mt19937& engine) -> data_model::mutation_description::value { return gen(engine, 0, no_size_in_bytes_limit).serialize_nonnull(); }; } if (auto maybe_user_type = dynamic_cast(&type)) { if (maybe_user_type->is_multi_cell()) { return [this, maybe_user_type] (std::mt19937& engine) -> data_model::mutation_description::value { return generate_user_value(engine, *maybe_user_type, *this); }; } else { return [this, maybe_user_type] (std::mt19937& engine) -> data_model::mutation_description::value { return generate_frozen_user_value(engine, *maybe_user_type, *this, 0, no_size_in_bytes_limit).serialize_nonnull(); }; } } if (auto maybe_tuple_type = dynamic_cast(&type)) { return [this, maybe_tuple_type] (std::mt19937& engine) -> data_model::mutation_description::value { return generate_frozen_tuple_value(engine, *maybe_tuple_type, *this, 0, no_size_in_bytes_limit).serialize_nonnull(); }; } if (auto maybe_list_type = dynamic_cast(&type)) { if (maybe_list_type->is_multi_cell()) { return [this, maybe_list_type] (std::mt19937& engine) -> data_model::mutation_description::value { return generate_list_value(engine, *maybe_list_type, *this); }; } else { return [this, maybe_list_type] (std::mt19937& engine) -> data_model::mutation_description::value { return generate_frozen_list_value(engine, *maybe_list_type, *this, 0, no_size_in_bytes_limit).serialize_nonnull(); }; } } if (auto maybe_set_type = dynamic_cast(&type)) { if (maybe_set_type->is_multi_cell()) { return [this, maybe_set_type] (std::mt19937& engine) -> data_model::mutation_description::value { return generate_set_value(engine, *maybe_set_type, *this); }; } else { return [this, maybe_set_type] (std::mt19937& engine) -> data_model::mutation_description::value { return generate_frozen_set_value(engine, *maybe_set_type, *this, 0, no_size_in_bytes_limit).serialize_nonnull(); }; } } if (auto maybe_map_type = dynamic_cast(&type)) { if (maybe_map_type->is_multi_cell()) { return [this, maybe_map_type] (std::mt19937& engine) -> data_model::mutation_description::value { return generate_map_value(engine, *maybe_map_type, *this); }; } else { return [this, maybe_map_type] (std::mt19937& engine) -> data_model::mutation_description::value { return generate_frozen_map_value(engine, *maybe_map_type, *this, 0, no_size_in_bytes_limit).serialize_nonnull(); }; } } if (auto maybe_reversed_type = dynamic_cast(&type)) { return get_generator(*maybe_reversed_type->underlying_type()); } throw std::runtime_error(fmt::format("Don't know how to generate value for unknown type {}", type.name())); } data_model::mutation_description::value value_generator::generate_value(std::mt19937& engine, const abstract_type& type) { return get_generator(type)(engine); } timestamp_generator default_timestamp_generator() { return [] (std::mt19937& engine, timestamp_destination, api::timestamp_type min_timestamp) { auto ts_dist = std::uniform_int_distribution(min_timestamp, api::max_timestamp); return ts_dist(engine); }; } timestamp_generator uncompactible_timestamp_generator(uint32_t seed, api::timestamp_type min_timestamp) { auto engine = std::mt19937(seed); const auto rank = [] (timestamp_destination dest) -> api::timestamp_type { switch (dest) { case timestamp_destination::partition_tombstone: return 0; case timestamp_destination::range_tombstone: return 1; case timestamp_destination::row_tombstone: return 2; case timestamp_destination::collection_tombstone: return 3; case timestamp_destination::row_marker: case timestamp_destination::cell_timestamp: case timestamp_destination::collection_cell_timestamp: return 4; } }; const auto max_rank = rank(timestamp_destination::collection_cell_timestamp); const auto margin = 1000; std::vector points; points.push_back(min_timestamp); for (api::timestamp_type i = 0; i < max_rank; ++i) { const auto remaining_ranks = max_rank - i; const auto point = std::uniform_int_distribution(points.back() + margin, api::max_timestamp - (remaining_ranks * margin))(engine); points.push_back(point); } points.push_back(api::max_timestamp); return [rank, points] (std::mt19937& engine, timestamp_destination destination, api::timestamp_type curr_min_ts) { const auto r = rank(destination); auto ts_dist = std::uniform_int_distribution(points.at(r), points.at(r + 1) - 1); return ts_dist(engine); }; } expiry_generator no_expiry_expiry_generator() { return [] (std::mt19937& engine, timestamp_destination destination) -> std::optional { return std::nullopt; }; } namespace { schema_ptr build_random_schema(uint32_t seed, random_schema_specification& spec) { auto engine = std::mt19937{seed}; auto builder = schema_builder(spec.keyspace_name(), spec.table_name(engine)); auto pk_columns = spec.partition_key_columns(engine); SCYLLA_ASSERT(!pk_columns.empty()); // Let's not pull in boost::test here for (size_t pk = 0; pk < pk_columns.size(); ++pk) { builder.with_column(to_bytes(format("pk{}", pk)), std::move(pk_columns[pk]), column_kind::partition_key); } auto ck_columns = spec.clustering_key_columns(engine); for (size_t ck = 0; ck < ck_columns.size(); ++ck) { builder.with_column(to_bytes(format("ck{}", ck)), std::move(ck_columns[ck]), column_kind::clustering_key); } if (!ck_columns.empty()) { auto static_columns = spec.static_columns(engine); for (size_t s = 0; s < static_columns.size(); ++s) { builder.with_column(to_bytes(format("s{}", s)), std::move(static_columns[s]), column_kind::static_column); } } auto regular_columns = spec.regular_columns(engine); for (size_t r = 0; r < regular_columns.size(); ++r) { builder.with_column(to_bytes(format("v{}", r)), std::move(regular_columns[r]), column_kind::regular_column); } if (spec.compress() == random_schema_specification::compress_sstable::no) { builder.set_compressor_params(compression_parameters::no_compression()); } builder.with_tombstone_gc_options(tombstone_gc_options(tombstone_gc_mode::timeout)); return builder.build(); } sstring udt_to_str(const user_type_impl& udt) { auto udt_desc = udt.describe(cql3::with_create_statement::yes); return udt_desc.create_statement.value().linearize(); } struct udt_list { std::vector vector; void insert(const user_type_impl* udt) { auto it = std::find(vector.begin(), vector.end(), udt); if (it == vector.end()) { vector.push_back(udt); } } void merge(udt_list other) { for (auto& udt : other.vector) { insert(udt); } } }; udt_list dump_udts(const std::vector& types) { udt_list udts; for (const auto& dt : types) { const auto* const type = dt.get(); if (auto maybe_user_type = dynamic_cast(type)) { udts.merge(dump_udts(maybe_user_type->field_types())); udts.insert(maybe_user_type); } else if (auto maybe_tuple_type = dynamic_cast(type)) { udts.merge(dump_udts(maybe_tuple_type->all_types())); } else if (auto maybe_list_type = dynamic_cast(type)) { udts.merge(dump_udts({maybe_list_type->get_elements_type()})); } else if (auto maybe_set_type = dynamic_cast(type)) { udts.merge(dump_udts({maybe_set_type->get_elements_type()})); } else if (auto maybe_map_type = dynamic_cast(type)) { udts.merge(dump_udts({maybe_map_type->get_keys_type(), maybe_map_type->get_values_type()})); } else if (auto maybe_reversed_type = dynamic_cast(type)) { udts.merge(dump_udts({maybe_reversed_type->underlying_type()})); } } return udts; } std::vector dump_udts(const schema& schema) { udt_list udts; const auto cdefs_to_types = [] (const schema::const_iterator_range_type& cdefs) -> std::vector { return cdefs | std::views::transform([] (const column_definition& cdef) { return cdef.type; }) | std::ranges::to(); }; udts.merge(dump_udts(cdefs_to_types(schema.partition_key_columns()))); udts.merge(dump_udts(cdefs_to_types(schema.clustering_key_columns()))); udts.merge(dump_udts(cdefs_to_types(schema.regular_columns()))); udts.merge(dump_udts(cdefs_to_types(schema.static_columns()))); return udts.vector; } std::vector columns_specs(schema_ptr schema, column_kind kind) { const auto count = schema->columns_count(kind); if (!count) { return {}; } std::vector col_specs; for (column_count_type c = 0; c < count; ++c) { const auto& cdef = schema->column_at(kind, c); col_specs.emplace_back(format("{} {}{}", cdef.name_as_cql_string(), cdef.type->as_cql3_type().to_string(), kind == column_kind::static_column ? " static" : "")); } return col_specs; } std::vector column_names(schema_ptr schema, column_kind kind) { const auto count = schema->columns_count(kind); if (!count) { return {}; } std::vector col_names; for (column_count_type c = 0; c < count; ++c) { const auto& cdef = schema->column_at(kind, c); col_names.emplace_back(cdef.name_as_cql_string()); } return col_names; } void decorate_with_timestamps(const schema& schema, std::mt19937& engine, timestamp_generator& ts_gen, expiry_generator exp_gen, data_model::mutation_description::value& value) { std::visit( make_visitor( [&] (data_model::mutation_description::atomic_value& v) { v.timestamp = ts_gen(engine, timestamp_destination::cell_timestamp, api::min_timestamp); if (auto expiry_opt = exp_gen(engine, timestamp_destination::cell_timestamp)) { v.expiring = data_model::mutation_description::expiry_info{expiry_opt->ttl, expiry_opt->expiry_point}; } }, [&] (data_model::mutation_description::collection& c) { if (auto ts = ts_gen(engine, timestamp_destination::collection_tombstone, api::min_timestamp); ts != api::missing_timestamp) { if (ts == api::max_timestamp) { // Caveat: leave some headroom for the cells // having a timestamp larger than the // tombstone's. ts--; } auto expiry_opt = exp_gen(engine, timestamp_destination::collection_tombstone); const auto deletion_time = expiry_opt ? expiry_opt->expiry_point : gc_clock::now(); c.tomb = tombstone(ts, deletion_time); } for (auto& [ key, value ] : c.elements) { value.timestamp = ts_gen(engine, timestamp_destination::collection_cell_timestamp, c.tomb.timestamp); SCYLLA_ASSERT(!c.tomb || value.timestamp > c.tomb.timestamp); if (auto expiry_opt = exp_gen(engine, timestamp_destination::collection_cell_timestamp)) { value.expiring = data_model::mutation_description::expiry_info{expiry_opt->ttl, expiry_opt->expiry_point}; } } }), value); } } // anonymous namespace data_model::mutation_description::key random_schema::make_key(uint32_t n, value_generator& gen, schema::const_iterator_range_type columns, size_t max_size_in_bytes) { std::mt19937 engine(n); const size_t max_component_size = max_size_in_bytes / std::distance(columns.begin(), columns.end()); std::vector key; for (const auto& cdef : columns) { key.emplace_back(gen.generate_atomic_value(engine, *cdef.type, max_component_size).serialize_nonnull()); } return key; } data_model::mutation_description::key random_schema::make_partition_key(uint32_t n, value_generator& gen) const { return make_key(n, gen, _schema->partition_key_columns(), std::numeric_limits::max()); } data_model::mutation_description::key random_schema::make_clustering_key(uint32_t n, value_generator& gen) const { SCYLLA_ASSERT(_schema->clustering_key_size() > 0); return make_key(n, gen, _schema->clustering_key_columns(), std::numeric_limits::max()); } random_schema::random_schema(uint32_t seed, random_schema_specification& spec) : _schema(build_random_schema(seed, spec)) { } sstring random_schema::cql() const { auto udts = dump_udts(*_schema); sstring udts_str; if (!udts.empty()) { udts_str = seastar::format("{}", fmt::join(udts | std::views::transform([] (const user_type_impl* const udt) { return udt_to_str(*udt); }), "\n")); } std::vector col_specs; for (auto kind : {column_kind::partition_key, column_kind::clustering_key, column_kind::regular_column, column_kind::static_column}) { auto cols = columns_specs(_schema, kind); std::move(cols.begin(), cols.end(), std::back_inserter(col_specs)); } std::string primary_key; auto partition_column_names = column_names(_schema, column_kind::partition_key); auto clustering_key_names = column_names(_schema, column_kind::clustering_key); if (!clustering_key_names.empty()) { primary_key = fmt::format("({}), {}", fmt::join(partition_column_names, ", "), fmt::join(clustering_key_names, ", ")); } else { primary_key = fmt::format("{}", fmt::join(partition_column_names, ", ")); } // FIXME include the clustering column orderings return seastar::format( "{}\nCREATE TABLE {}.{} (\n\t{}\n\tPRIMARY KEY ({}))", udts_str, _schema->ks_name(), _schema->cf_name(), fmt::join(col_specs, ",\n\t"), primary_key); } data_model::mutation_description::key random_schema::make_pkey(uint32_t n) { value_generator g; return make_partition_key(n, g); } std::vector random_schema::make_pkeys(size_t n) { std::set keys{dht::ring_position_less_comparator{*_schema}}; value_generator val_gen; uint32_t i{0}; while (keys.size() < n) { keys.emplace(dht::decorate_key(*_schema, partition_key::from_exploded(make_partition_key(i, val_gen)))); ++i; } return keys | std::views::transform([] (const dht::decorated_key& dkey) { return dkey.key().explode(); }) | std::ranges::to>(); } data_model::mutation_description::key random_schema::make_ckey(uint32_t n) { value_generator g; return make_clustering_key(n, g); } std::vector random_schema::make_ckeys(size_t n) { std::set keys{clustering_key::less_compare{*_schema}}; value_generator val_gen; for (uint32_t i = 0; i < n; i++) { keys.emplace(clustering_key::from_exploded(make_clustering_key(i, val_gen))); } return keys | std::views::transform([] (const clustering_key& ckey) { return ckey.explode(); }) | std::ranges::to>(); } data_model::mutation_description random_schema::new_mutation(data_model::mutation_description::key pkey) { return data_model::mutation_description(std::move(pkey)); } data_model::mutation_description random_schema::new_mutation(uint32_t n) { return new_mutation(make_pkey(n)); } void random_schema::set_partition_tombstone(std::mt19937& engine, data_model::mutation_description& md, timestamp_generator ts_gen, expiry_generator exp_gen) { if (const auto ts = ts_gen(engine, timestamp_destination::partition_tombstone, api::min_timestamp); ts != api::missing_timestamp) { auto expiry_opt = exp_gen(engine, timestamp_destination::partition_tombstone); const auto deletion_time = expiry_opt ? expiry_opt->expiry_point : gc_clock::now(); md.set_partition_tombstone(tombstone(ts, deletion_time)); } } void random_schema::add_row(std::mt19937& engine, data_model::mutation_description& md, data_model::mutation_description::key ckey, timestamp_generator ts_gen, expiry_generator exp_gen) { value_generator gen; for (const auto& cdef : _schema->regular_columns()) { auto value = gen.generate_value(engine, *cdef.type); decorate_with_timestamps(*_schema, engine, ts_gen, exp_gen, value); md.add_clustered_cell(ckey, cdef.name_as_text(), std::move(value)); } if (auto ts = ts_gen(engine, timestamp_destination::row_marker, api::min_timestamp); ts != api::missing_timestamp) { if (auto expiry_opt = exp_gen(engine, timestamp_destination::row_marker)) { md.add_clustered_row_marker(ckey, tests::data_model::mutation_description::row_marker(ts, expiry_opt->ttl, expiry_opt->expiry_point)); } else { md.add_clustered_row_marker(ckey, ts); } } if (auto ts = ts_gen(engine, timestamp_destination::row_tombstone, api::min_timestamp); ts != api::missing_timestamp) { auto expiry_opt = exp_gen(engine, timestamp_destination::row_tombstone); const auto deletion_time = expiry_opt ? expiry_opt->expiry_point : gc_clock::now(); md.add_clustered_row_tombstone(ckey, row_tombstone{tombstone{ts, deletion_time}}); } } void random_schema::add_row(std::mt19937& engine, data_model::mutation_description& md, uint32_t n, timestamp_generator ts_gen, expiry_generator exp_gen) { add_row(engine, md, make_ckey(n), std::move(ts_gen), std::move(exp_gen)); } void random_schema::add_static_row(std::mt19937& engine, data_model::mutation_description& md, timestamp_generator ts_gen, expiry_generator exp_gen) { value_generator gen; for (const auto& cdef : _schema->static_columns()) { auto value = gen.generate_value(engine, *cdef.type); decorate_with_timestamps(*_schema, engine, ts_gen, exp_gen, value); md.add_static_cell(cdef.name_as_text(), std::move(value)); } } void random_schema::delete_range( std::mt19937& engine, data_model::mutation_description& md, interval range, timestamp_generator ts_gen, expiry_generator exp_gen) { auto expiry_opt = exp_gen(engine, timestamp_destination::range_tombstone); const auto deletion_time = expiry_opt ? expiry_opt->expiry_point : gc_clock::now(); md.add_range_tombstone(std::move(range), tombstone{ts_gen(engine, timestamp_destination::range_tombstone, api::min_timestamp), deletion_time}); } future<> random_schema::create_with_cql(cql_test_env& env) { return async([this, &env] { const auto ks_name = _schema->ks_name(); const auto tbl_name = _schema->cf_name(); for (const auto& udt : dump_udts(*_schema)) { env.execute_cql(udt_to_str(*udt)).get(); eventually_true([&] () mutable { return env.db().map_reduce0([&] (replica::database& db) { return db.user_types().get(ks_name).has_type(udt->get_name()); }, true, std::logical_and{}).get(); }); } auto& db = env.local_db(); auto schema_desc = _schema->describe( replica::make_schema_describe_helper(_schema, db.as_data_dictionary()), cql3::describe_option::STMTS); sstring create_statement = schema_desc.create_statement.value().linearize(); env.execute_cql(create_statement).get(); auto& tbl = db.find_column_family(ks_name, tbl_name); _schema = tbl.schema(); }); } future> generate_random_mutations( uint32_t seed, tests::random_schema& random_schema, timestamp_generator ts_gen, expiry_generator exp_gen, std::uniform_int_distribution partition_count_dist, std::uniform_int_distribution clustering_row_count_dist, std::uniform_int_distribution range_tombstone_count_dist) { auto engine = std::mt19937(seed); const auto schema_has_clustering_columns = random_schema.schema()->clustering_key_size() > 0; const auto partition_count = partition_count_dist(engine); utils::chunked_vector muts; muts.reserve(partition_count); for (size_t pk = 0; pk != partition_count; ++pk) { auto mut = random_schema.new_mutation(pk); random_schema.set_partition_tombstone(engine, mut, ts_gen, exp_gen); random_schema.add_static_row(engine, mut, ts_gen, exp_gen); if (!schema_has_clustering_columns) { muts.emplace_back(mut.build(random_schema.schema())); continue; } const auto clustering_row_count = clustering_row_count_dist(engine); const auto range_tombstone_count = range_tombstone_count_dist(engine); auto ckeys = random_schema.make_ckeys(std::max(clustering_row_count, range_tombstone_count)); for (uint32_t ck = 0; ck < ckeys.size(); ++ck) { random_schema.add_row(engine, mut, ckeys[ck], ts_gen, exp_gen); co_await coroutine::maybe_yield(); } for (size_t i = 0; i < range_tombstone_count; ++i) { const auto a = tests::random::get_int(0, ckeys.size() - 1, engine); const auto b = tests::random::get_int(0, ckeys.size() - 1, engine); random_schema.delete_range( engine, mut, interval::make(ckeys.at(std::min(a, b)), ckeys.at(std::max(a, b))), ts_gen, exp_gen); co_await coroutine::maybe_yield(); } muts.emplace_back(mut.build(random_schema.schema())); } std::ranges::sort(muts, [s = random_schema.schema()] (const mutation& a, const mutation& b) { return a.decorated_key().less_compare(*s, b.decorated_key()); }); auto range = boost::unique(muts, [s = random_schema.schema()] (const mutation& a, const mutation& b) { return a.decorated_key().equal(*s, b.decorated_key()); }); while (range.end() != muts.end()) { muts.pop_back(); } co_return std::move(muts); } future> generate_random_mutations( tests::random_schema& random_schema, timestamp_generator ts_gen, expiry_generator exp_gen, std::uniform_int_distribution partition_count_dist, std::uniform_int_distribution clustering_row_count_dist, std::uniform_int_distribution range_tombstone_count_dist) { return generate_random_mutations(tests::random::get_int(), random_schema, std::move(ts_gen), std::move(exp_gen), partition_count_dist, clustering_row_count_dist, range_tombstone_count_dist); } future> generate_random_mutations(tests::random_schema& random_schema, size_t partition_count) { return generate_random_mutations( random_schema, default_timestamp_generator(), no_expiry_expiry_generator(), std::uniform_int_distribution(partition_count, partition_count)); } } // namespace tests