Files
scylladb/test/lib/random_schema.cc
Botond Dénes a785c0cf41 test/lib/random_schema: use timeout-mode tombstone_gc
This is the current de-facto default for all tests using random schema
and some are apparently relying on this. Make this explicit to avoid
upsetting tests, by the impending change of this default to repair.
2026-03-03 14:09:28 +02:00

1265 lines
56 KiB
C++

/*
* Copyright (C) 2019-present ScyllaDB
*/
/*
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
*/
#include <algorithm>
#include <boost/range/algorithm/unique.hpp>
#include <seastar/coroutine/maybe_yield.hh>
#include "cql3/cql3_type.hh"
#include "cql3/description.hh"
#include "mutation/mutation.hh"
#include "schema/schema_builder.hh"
#include "test/lib/cql_test_env.hh"
#include "test/lib/eventually.hh"
#include "test/lib/random_schema.hh"
#include "test/lib/random_utils.hh"
#include "types/list.hh"
#include "types/map.hh"
#include "types/set.hh"
#include "types/tuple.hh"
#include "types/user.hh"
#include "utils/assert.hh"
#include "utils/big_decimal.hh"
#include "utils/UUID_gen.hh"
#include "replica/schema_describe_helper.hh"
namespace tests {
type_generator::type_generator(random_schema_specification& spec) : _spec(spec) {
struct simple_type_generator {
data_type type;
data_type operator()(std::mt19937&, is_multi_cell) { return type; }
};
_generators = {
simple_type_generator{byte_type},
simple_type_generator{short_type},
simple_type_generator{int32_type},
simple_type_generator{long_type},
simple_type_generator{ascii_type},
simple_type_generator{bytes_type},
simple_type_generator{utf8_type},
simple_type_generator{boolean_type},
simple_type_generator{date_type},
simple_type_generator{timeuuid_type},
simple_type_generator{timestamp_type},
simple_type_generator{simple_date_type},
simple_type_generator{time_type},
simple_type_generator{uuid_type},
simple_type_generator{inet_addr_type},
simple_type_generator{float_type},
simple_type_generator{double_type},
simple_type_generator{varint_type},
simple_type_generator{decimal_type},
simple_type_generator{duration_type}};
// tuple
_generators.emplace_back(
[this] (std::mt19937& engine, is_multi_cell) {
std::uniform_int_distribution<size_t> count_dist{2, 4};
const auto count = count_dist(engine);
std::vector<data_type> data_types;
for (size_t i = 0; i < count; ++i) {
data_types.emplace_back((*this)(engine, type_generator::is_multi_cell::no));
}
return tuple_type_impl::get_instance(std::move(data_types));
});
// user
_generators.emplace_back(
[this] (std::mt19937& engine, is_multi_cell multi_cell) mutable {
std::uniform_int_distribution<size_t> count_dist{2, 4};
const auto count = count_dist(engine);
std::vector<bytes> field_names;
std::vector<data_type> field_types;
for (size_t i = 0; i < count; ++i) {
field_names.emplace_back(to_bytes(format("f{}", i)));
field_types.emplace_back((*this)(engine, type_generator::is_multi_cell::no));
}
return user_type_impl::get_instance(_spec.keyspace_name(), to_bytes(_spec.udt_name(engine)), std::move(field_names),
std::move(field_types), bool(multi_cell));
});
// list
_generators.emplace_back(
[this] (std::mt19937& engine, is_multi_cell multi_cell) {
auto element_type = (*this)(engine, type_generator::is_multi_cell::no);
return list_type_impl::get_instance(std::move(element_type), bool(multi_cell));
});
// set
_generators.emplace_back(
[this] (std::mt19937& engine, is_multi_cell multi_cell) {
auto element_type = (*this)(engine, type_generator::is_multi_cell::no);
return set_type_impl::get_instance(std::move(element_type), bool(multi_cell));
});
// map
_generators.emplace_back(
[this] (std::mt19937& engine, is_multi_cell multi_cell) {
auto key_type = (*this)(engine, type_generator::is_multi_cell::no);
auto value_type = (*this)(engine, type_generator::is_multi_cell::no);
return map_type_impl::get_instance(std::move(key_type), std::move(value_type), bool(multi_cell));
});
}
data_type type_generator::operator()(std::mt19937& engine, is_multi_cell multi_cell) {
auto dist = std::uniform_int_distribution<size_t>(0, _generators.size() - 1);
auto type = _generators.at(dist(engine))(engine, multi_cell);
// duration type is not allowed in:
// * primary key components
// * as member types of collections
//
// To cover all this, we simply disallow it altogether when multi_cell is
// no, which will be the case in all the above cases.
//
// We also disallow boolean type in keys, due to the poor value distribution
// it provides. Generating keys which have a boolean in it, are prone to
// collision and will result in poor cardinality.
while (!multi_cell && (type == duration_type || type == boolean_type)) {
type = (*this)(engine, multi_cell);
}
return type;
}
namespace {
class default_random_schema_specification : public random_schema_specification {
std::unordered_set<unsigned> _used_table_ids;
std::unordered_set<unsigned> _used_udt_ids;
std::uniform_int_distribution<size_t> _partition_column_count_dist;
std::uniform_int_distribution<size_t> _clustering_column_count_dist;
std::uniform_int_distribution<size_t> _regular_column_count_dist;
std::uniform_int_distribution<size_t> _static_column_count_dist;
type_generator _type_generator;
compress_sstable _compress;
private:
static unsigned generate_unique_id(std::mt19937& engine, std::unordered_set<unsigned>& used_ids) {
std::uniform_int_distribution<unsigned> id_dist(0, 1024);
unsigned id;
do {
id = id_dist(engine);
} while (used_ids.contains(id));
used_ids.insert(id);
return id;
}
std::vector<data_type> generate_types(std::mt19937& engine, std::uniform_int_distribution<size_t>& count_dist,
type_generator::is_multi_cell multi_cell, bool allow_reversed = false) {
std::uniform_int_distribution<uint8_t> reversed_dist{0, uint8_t(allow_reversed)};
std::uniform_int_distribution<uint8_t> multi_cell_dist{0, uint8_t(bool(multi_cell))};
std::vector<data_type> types;
const auto count = count_dist(engine);
for (size_t c = 0; c < count; ++c) {
auto type = _type_generator(engine, type_generator::is_multi_cell(bool(multi_cell_dist(engine))));
if (reversed_dist(engine)) {
types.emplace_back(make_shared<reversed_type_impl>(std::move(type)));
} else {
types.emplace_back(std::move(type));
}
}
return types;
}
public:
default_random_schema_specification(
sstring keyspace_name,
std::uniform_int_distribution<size_t> partition_column_count_dist,
std::uniform_int_distribution<size_t> clustering_column_count_dist,
std::uniform_int_distribution<size_t> regular_column_count_dist,
std::uniform_int_distribution<size_t> static_column_count_dist,
compress_sstable compress)
: random_schema_specification(std::move(keyspace_name))
, _partition_column_count_dist(partition_column_count_dist)
, _clustering_column_count_dist(clustering_column_count_dist)
, _regular_column_count_dist(regular_column_count_dist)
, _static_column_count_dist(static_column_count_dist)
, _type_generator(*this)
, _compress(compress) {
SCYLLA_ASSERT(_partition_column_count_dist.a() > 0);
}
virtual sstring table_name(std::mt19937& engine) override {
return format("table{}", generate_unique_id(engine, _used_table_ids));
}
virtual sstring udt_name(std::mt19937& engine) override {
return format("udt{}", generate_unique_id(engine, _used_udt_ids));
}
virtual std::vector<data_type> partition_key_columns(std::mt19937& engine) override {
return generate_types(engine, _partition_column_count_dist, type_generator::is_multi_cell::no, false);
}
virtual std::vector<data_type> clustering_key_columns(std::mt19937& engine) override {
return generate_types(engine, _clustering_column_count_dist, type_generator::is_multi_cell::no, true);
}
virtual std::vector<data_type> regular_columns(std::mt19937& engine) override {
return generate_types(engine, _regular_column_count_dist, type_generator::is_multi_cell::yes, false);
}
virtual std::vector<data_type> static_columns(std::mt19937& engine) override {
return generate_types(engine, _static_column_count_dist, type_generator::is_multi_cell::yes, false);
}
virtual compress_sstable& compress() override {
return _compress;
}
};
} // anonymous namespace
std::unique_ptr<random_schema_specification> make_random_schema_specification(
sstring keyspace_name,
std::uniform_int_distribution<size_t> partition_column_count_dist,
std::uniform_int_distribution<size_t> clustering_column_count_dist,
std::uniform_int_distribution<size_t> regular_column_count_dist,
std::uniform_int_distribution<size_t> static_column_count_dist,
random_schema_specification::compress_sstable compress) {
return std::make_unique<default_random_schema_specification>(std::move(keyspace_name), partition_column_count_dist, clustering_column_count_dist,
regular_column_count_dist, static_column_count_dist, compress);
}
namespace {
utils::multiprecision_int generate_multiprecision_integer_value(std::mt19937& engine, size_t min_size_in_bytes, size_t max_size_in_bytes) {
using utils::multiprecision_int;
const auto max_bytes = std::min(size_t(16), std::max(size_t(2), max_size_in_bytes) - 1);
const auto generate_int = [] (std::mt19937& engine, size_t max_bytes) {
if (max_bytes == 8) {
return multiprecision_int(random::get_int<uint64_t>(engine));
} else { // max_bytes < 8
return multiprecision_int(random::get_int<uint64_t>(0, (uint64_t(1) << (max_bytes * 8)) - uint64_t(1), engine));
}
};
if (max_bytes <= 8) {
return generate_int(engine, max_bytes);
} else { // max_bytes > 8
auto ls = multiprecision_int(generate_int(engine, 8));
auto ms = multiprecision_int(generate_int(engine, max_bytes - 8));
return multiprecision_int(ls) + (multiprecision_int(ms) << 64);
}
}
template <typename String>
String generate_string_value(std::mt19937& engine, typename String::value_type min, typename String::value_type max,
size_t min_size_in_bytes, size_t max_size_in_bytes) {
auto size_dist = random::stepped_int_distribution<size_t>{{
{95.0, { 0, 31}},
{ 4.5, { 32, 99}},
{ 0.4, { 100, 999}},
{ 0.1, {1000, 9999}}}};
auto char_dist = std::uniform_int_distribution<typename String::value_type>(min, max);
const auto size = std::clamp(
size_dist(engine),
min_size_in_bytes / sizeof(typename String::value_type),
max_size_in_bytes / sizeof(typename String::value_type));
String str(size, '\0');
for (size_t i = 0; i < size; ++i) {
str[i] = char_dist(engine);
}
return str;
}
std::vector<data_value> generate_frozen_tuple_values(std::mt19937& engine, value_generator& val_gen, const std::vector<data_type>& member_types,
size_t min_size_in_bytes, size_t max_size_in_bytes) {
std::vector<data_value> values;
values.reserve(member_types.size());
const auto member_min_size_in_bytes = min_size_in_bytes / member_types.size();
const auto member_max_size_in_bytes = max_size_in_bytes / member_types.size();
for (auto member_type : member_types) {
values.push_back(val_gen.generate_atomic_value(engine, *member_type, member_min_size_in_bytes, member_max_size_in_bytes));
}
return values;
}
data_model::mutation_description::collection generate_user_value(std::mt19937& engine, const user_type_impl& type,
value_generator& val_gen) {
using md = data_model::mutation_description;
// Non-null fields.
auto fields_num = std::uniform_int_distribution<size_t>(1, type.size())(engine);
auto field_idxs = random::random_subset<unsigned>(type.size(), fields_num, engine);
std::sort(field_idxs.begin(), field_idxs.end());
md::collection collection;
for (auto i: field_idxs) {
collection.elements.push_back({serialize_field_index(i),
val_gen.generate_atomic_value(engine, *type.type(i), value_generator::no_size_in_bytes_limit).serialize_nonnull()});
}
return collection;
}
data_model::mutation_description::collection generate_collection(std::mt19937& engine, const abstract_type& key_type,
const abstract_type& value_type, value_generator& val_gen) {
using md = data_model::mutation_description;
auto key_generator = val_gen.get_atomic_value_generator(key_type);
auto value_generator = val_gen.get_atomic_value_generator(value_type);
auto size_dist = std::uniform_int_distribution<size_t>(0, 16);
const auto size = size_dist(engine);
std::map<bytes, md::atomic_value, serialized_compare> collection{key_type.as_less_comparator()};
for (size_t i = 0; i < size; ++i) {
collection.emplace(key_generator(engine, 0, value_generator::no_size_in_bytes_limit).serialize_nonnull(),
value_generator(engine, 0, value_generator::no_size_in_bytes_limit).serialize().value_or(""));
}
md::collection flat_collection;
flat_collection.elements.reserve(collection.size());
for (auto&& [key, value] : collection) {
flat_collection.elements.emplace_back(md::collection_element{key, value});
}
return flat_collection;
}
std::vector<data_value> generate_frozen_list(std::mt19937& engine, const abstract_type& value_type, value_generator& val_gen,
size_t min_size_in_bytes, size_t max_size_in_bytes) {
auto value_generator = val_gen.get_atomic_value_generator(value_type);
auto size_dist = std::uniform_int_distribution<size_t>(0, 4);
const auto size = std::min(size_dist(engine), max_size_in_bytes / std::max(val_gen.min_size(value_type), size_t(1)));
std::vector<data_value> collection;
if (!size) {
return collection;
}
const auto value_min_size_in_bytes = min_size_in_bytes / size;
const auto value_max_size_in_bytes = max_size_in_bytes / size;
for (size_t i = 0; i < size; ++i) {
collection.emplace_back(value_generator(engine, value_min_size_in_bytes, value_max_size_in_bytes));
}
return collection;
}
std::vector<data_value> generate_frozen_set(std::mt19937& engine, const abstract_type& key_type, value_generator& val_gen,
size_t min_size_in_bytes, size_t max_size_in_bytes) {
auto key_generator = val_gen.get_atomic_value_generator(key_type);
auto size_dist = std::uniform_int_distribution<size_t>(0, 4);
const auto size = std::min(size_dist(engine), max_size_in_bytes / std::max(val_gen.min_size(key_type), size_t(1)));
std::map<bytes, data_value, serialized_compare> collection{key_type.as_less_comparator()};
std::vector<data_value> flat_collection;
if (!size) {
return flat_collection;
}
const auto value_max_size_in_bytes = max_size_in_bytes / size;
const auto value_min_size_in_bytes = min_size_in_bytes / size;
for (size_t i = 0; i < size; ++i) {
auto val = key_generator(engine, value_min_size_in_bytes, value_max_size_in_bytes);
auto serialized_key = val.serialize_nonnull();
collection.emplace(std::move(serialized_key), std::move(val));
}
flat_collection.reserve(collection.size());
for (auto&& element : collection) {
flat_collection.emplace_back(std::move(element.second));
}
return flat_collection;
}
std::vector<std::pair<data_value, data_value>> generate_frozen_map(std::mt19937& engine, const abstract_type& key_type,
const abstract_type& value_type, value_generator& val_gen, size_t min_size_in_bytes, size_t max_size_in_bytes) {
auto key_generator = val_gen.get_atomic_value_generator(key_type);
auto value_generator = val_gen.get_atomic_value_generator(value_type);
auto size_dist = std::uniform_int_distribution<size_t>(0, 4);
const auto min_item_size_in_bytes = val_gen.min_size(key_type) + val_gen.min_size(value_type);
const auto size = std::min(size_dist(engine), max_size_in_bytes / std::max(min_item_size_in_bytes, size_t(1)));
std::map<bytes, std::pair<data_value, data_value>, serialized_compare> collection(key_type.as_less_comparator());
std::vector<std::pair<data_value, data_value>> flat_collection;
if (!size) {
return flat_collection;
}
const auto item_max_size_in_bytes = max_size_in_bytes / size;
const auto key_max_size_in_bytes = item_max_size_in_bytes / 2;
const auto value_max_size_in_bytes = item_max_size_in_bytes / 2;
const auto item_min_size_in_bytes = min_size_in_bytes / size;
const auto key_min_size_in_bytes = item_min_size_in_bytes / 2;
const auto value_min_size_in_bytes = item_min_size_in_bytes / 2;
for (size_t i = 0; i < size; ++i) {
auto key = key_generator(engine, key_min_size_in_bytes, key_max_size_in_bytes);
auto serialized_key = key.serialize_nonnull();
auto value = value_generator(engine, value_min_size_in_bytes, value_max_size_in_bytes);
collection.emplace(std::move(serialized_key), std::pair(std::move(key), std::move(value)));
}
flat_collection.reserve(collection.size());
for (auto&& element : collection) {
flat_collection.emplace_back(std::move(element.second));
}
return flat_collection;
}
data_value generate_empty_value(std::mt19937&, size_t, size_t) {
return data_value::make_null(empty_type);
}
data_value generate_byte_value(std::mt19937& engine, size_t, size_t) {
return data_value(random::get_int<int8_t>(engine));
}
data_value generate_short_value(std::mt19937& engine, size_t, size_t) {
return data_value(random::get_int<int16_t>(engine));
}
data_value generate_int32_value(std::mt19937& engine, size_t, size_t) {
return data_value(random::get_int<int32_t>(engine));
}
data_value generate_long_value(std::mt19937& engine, size_t, size_t) {
return data_value(random::get_int<int64_t>(engine));
}
data_value generate_ascii_value(std::mt19937& engine, size_t min_size_in_bytes, size_t max_size_in_bytes) {
return data_value(ascii_native_type{generate_string_value<sstring>(engine, 0, 127, min_size_in_bytes, max_size_in_bytes)});
}
data_value generate_bytes_value(std::mt19937& engine, size_t min_size_in_bytes, size_t max_size_in_bytes) {
return data_value(generate_string_value<bytes>(engine, std::numeric_limits<bytes::value_type>::min(),
std::numeric_limits<bytes::value_type>::max(), min_size_in_bytes, max_size_in_bytes));
}
data_value generate_utf8_value(std::mt19937& engine, size_t min_size_in_bytes, size_t max_size_in_bytes) {
auto wstr = generate_string_value<std::wstring>(engine, 0, 0x0FFF, min_size_in_bytes, max_size_in_bytes);
std::locale locale("en_US.utf8");
using codec = std::codecvt<wchar_t, char, std::mbstate_t>;
auto& f = std::use_facet<codec>(locale);
sstring utf8_str(wstr.size() * f.max_length(), '\0');
const wchar_t* from_next;
char* to_next;
std::mbstate_t mb{};
auto res = f.out(mb, &wstr[0], &wstr[wstr.size()], from_next, &utf8_str[0], &utf8_str[utf8_str.size()], to_next);
SCYLLA_ASSERT(res == codec::ok);
utf8_str.resize(to_next - &utf8_str[0]);
return data_value(std::move(utf8_str));
}
data_value generate_boolean_value(std::mt19937& engine, size_t, size_t) {
auto dist = std::uniform_int_distribution<int8_t>(0, 1);
return data_value(bool(dist(engine)));
}
data_value generate_date_value(std::mt19937& engine, size_t, size_t) {
using pt = db_clock::time_point;
// Python driver can't tolerate dates above year 9999.
constexpr auto max_day = std::chrono::sys_days(std::chrono::year{10000}/1/1);
constexpr auto max = std::chrono::sys_time<std::chrono::milliseconds>(max_day).time_since_epoch().count() - 1;
auto x = random::get_int<std::make_unsigned_t<db_clock::rep>>(0, max, engine);
return data_value(date_type_native_type{pt(pt::duration(x))});
}
data_value generate_timeuuid_value(std::mt19937&, size_t, size_t) {
// FIXME: respect the passed engine.
auto b = tests::random::get_bytes(16);
b[6] = (b[6] & 0x0F) | 0x10; // version 1
return timeuuid_type->deserialize(b);
}
data_value generate_timestamp_value(std::mt19937& engine, size_t, size_t) {
using pt = db_clock::time_point;
// Python driver can't tolerate dates above year 9999 or below year 1.
constexpr auto min_day = std::chrono::sys_days(std::chrono::year{1}/1/1);
constexpr auto max_day = std::chrono::sys_days(std::chrono::year{10000}/1/1);
constexpr auto min = std::chrono::sys_time<std::chrono::milliseconds>(min_day).time_since_epoch().count();
constexpr auto max = std::chrono::sys_time<std::chrono::milliseconds>(max_day).time_since_epoch().count() - 1;
auto x = random::get_int<pt::rep>(min, max, engine);
return data_value(pt(pt::duration(x)));
}
data_value generate_simple_date_value(std::mt19937& engine, size_t, size_t) {
return data_value(simple_date_native_type{random::get_int<simple_date_native_type::primary_type>(engine)});
}
data_value generate_time_value(std::mt19937& engine, size_t, size_t) {
// A legal `time` is smaller than the number of nanoseconds in a day.
auto max = std::chrono::nanoseconds(std::chrono::days(1)).count() - 1;
return data_value(time_native_type{random::get_int<time_native_type::primary_type>(0, max, engine)});
}
data_value generate_uuid_value(std::mt19937& engine, size_t, size_t) {
auto b = tests::random::get_bytes(16, engine);
b[6] = (b[6] & 0x0F) | 0x40; // version 4
return data_value(uuid_type->deserialize(b));
}
data_value generate_inet_addr_value(std::mt19937& engine, size_t, size_t) {
return data_value(net::ipv4_address(random::get_int<int32_t>(engine)));
}
data_value generate_float_value(std::mt19937& engine, size_t, size_t) {
return data_value(random::get_real<float>(engine));
}
data_value generate_double_value(std::mt19937& engine, size_t, size_t) {
return data_value(random::get_real<double>(engine));
}
data_value generate_varint_value(std::mt19937& engine, size_t min_size_in_bytes, size_t max_size_in_bytes) {
return data_value(generate_multiprecision_integer_value(engine, min_size_in_bytes, max_size_in_bytes));
}
data_value generate_decimal_value(std::mt19937& engine, size_t min_size_in_bytes, size_t max_size_in_bytes) {
auto scale_dist = std::uniform_int_distribution<int32_t>(-8, 8);
return data_value(big_decimal(scale_dist(engine), generate_multiprecision_integer_value(engine,
min_size_in_bytes - sizeof(int32_t), max_size_in_bytes - sizeof(int32_t))));
}
data_value generate_duration_value(std::mt19937& engine, size_t, size_t) {
auto months = months_counter(random::get_int<months_counter::value_type>(engine));
auto days = days_counter(random::get_int<days_counter::value_type>(0, 31, engine));
auto nanoseconds = nanoseconds_counter(random::get_int<nanoseconds_counter::value_type>(86400000000000, engine));
return data_value(cql_duration{months, days, nanoseconds});
}
data_value generate_frozen_tuple_value(std::mt19937& engine, const tuple_type_impl& type, value_generator& val_gen, size_t min_size_in_bytes, size_t max_size_in_bytes) {
SCYLLA_ASSERT(!type.is_multi_cell());
return make_tuple_value(type.shared_from_this(), generate_frozen_tuple_values(engine, val_gen, type.all_types(), min_size_in_bytes, max_size_in_bytes));
}
data_value generate_frozen_user_value(std::mt19937& engine, const user_type_impl& type, value_generator& val_gen, size_t min_size_in_bytes, size_t max_size_in_bytes) {
SCYLLA_ASSERT(!type.is_multi_cell());
return make_user_value(type.shared_from_this(), generate_frozen_tuple_values(engine, val_gen, type.all_types(), min_size_in_bytes, max_size_in_bytes));
}
data_model::mutation_description::collection generate_list_value(std::mt19937& engine, const list_type_impl& type, value_generator& val_gen) {
SCYLLA_ASSERT(type.is_multi_cell());
return generate_collection(engine, *type.name_comparator(), *type.value_comparator(), val_gen);
}
data_value generate_frozen_list_value(std::mt19937& engine, const list_type_impl& type, value_generator& val_gen, size_t min_size_in_bytes, size_t max_size_in_bytes) {
SCYLLA_ASSERT(!type.is_multi_cell());
return make_list_value(type.shared_from_this(),
generate_frozen_list(engine, *type.get_elements_type(), val_gen, min_size_in_bytes, max_size_in_bytes));
}
data_model::mutation_description::collection generate_set_value(std::mt19937& engine, const set_type_impl& type, value_generator& val_gen) {
SCYLLA_ASSERT(type.is_multi_cell());
return generate_collection(engine, *type.name_comparator(), *type.value_comparator(), val_gen);
}
data_value generate_frozen_set_value(std::mt19937& engine, const set_type_impl& type, value_generator& val_gen, size_t min_size_in_bytes, size_t max_size_in_bytes) {
SCYLLA_ASSERT(!type.is_multi_cell());
return make_set_value(type.shared_from_this(),
generate_frozen_set(engine, *type.get_elements_type(), val_gen, min_size_in_bytes, max_size_in_bytes));
}
data_model::mutation_description::collection generate_map_value(std::mt19937& engine, const map_type_impl& type, value_generator& val_gen) {
SCYLLA_ASSERT(type.is_multi_cell());
return generate_collection(engine, *type.name_comparator(), *type.value_comparator(), val_gen);
}
data_value generate_frozen_map_value(std::mt19937& engine, const map_type_impl& type, value_generator& val_gen, size_t min_size_in_bytes, size_t max_size_in_bytes) {
SCYLLA_ASSERT(!type.is_multi_cell());
return make_map_value(type.shared_from_this(),
generate_frozen_map(engine, *type.get_keys_type(), *type.get_values_type(), val_gen, min_size_in_bytes, max_size_in_bytes));
}
} // anonymous namespace
data_value value_generator::generate_atomic_value(std::mt19937& engine, const abstract_type& type, size_t max_size_in_bytes) {
return generate_atomic_value(engine, type, 0, max_size_in_bytes);
}
data_value value_generator::generate_atomic_value(std::mt19937& engine, const abstract_type& type, size_t min_size_in_bytes, size_t max_size_in_bytes) {
SCYLLA_ASSERT(!type.is_multi_cell());
return get_atomic_value_generator(type)(engine, min_size_in_bytes, max_size_in_bytes);
}
value_generator::value_generator()
: _regular_value_generators{
{empty_type.get(), &generate_empty_value},
{byte_type.get(), &generate_byte_value},
{short_type.get(), &generate_short_value},
{int32_type.get(), &generate_int32_value},
{long_type.get(), &generate_long_value},
{ascii_type.get(), &generate_ascii_value},
{bytes_type.get(), &generate_bytes_value},
{utf8_type.get(), &generate_utf8_value},
{boolean_type.get(), &generate_boolean_value},
{date_type.get(), &generate_date_value},
{timeuuid_type.get(), &generate_timeuuid_value},
{timestamp_type.get(), &generate_timestamp_value},
{simple_date_type.get(), &generate_simple_date_value},
{time_type.get(), &generate_time_value},
{uuid_type.get(), &generate_uuid_value},
{inet_addr_type.get(), &generate_inet_addr_value},
{float_type.get(), &generate_float_value},
{double_type.get(), &generate_double_value},
{varint_type.get(), &generate_varint_value},
{decimal_type.get(), &generate_decimal_value},
{duration_type.get(), &generate_duration_value}} {
std::mt19937 engine;
for (const auto& [regular_type, regular_value_gen] : _regular_value_generators) {
_regular_value_min_sizes.emplace(regular_type, regular_value_gen(engine, size_t{}, size_t{}).serialized_size());
}
}
size_t value_generator::min_size(const abstract_type& type) {
SCYLLA_ASSERT(!type.is_multi_cell());
auto it = _regular_value_min_sizes.find(&type);
if (it != _regular_value_min_sizes.end()) {
return it->second;
}
std::mt19937 engine;
if (auto maybe_user_type = dynamic_cast<const user_type_impl*>(&type)) {
return generate_frozen_user_value(engine, *maybe_user_type, *this, size_t{}, size_t{}).serialized_size();
}
if (auto maybe_tuple_type = dynamic_cast<const tuple_type_impl*>(&type)) {
return generate_frozen_tuple_value(engine, *maybe_tuple_type, *this, size_t{}, size_t{}).serialized_size();
}
if (auto maybe_list_type = dynamic_cast<const list_type_impl*>(&type)) {
return generate_frozen_list_value(engine, *maybe_list_type, *this, size_t{}, size_t{}).serialized_size();
}
if (auto maybe_set_type = dynamic_cast<const set_type_impl*>(&type)) {
return generate_frozen_set_value(engine, *maybe_set_type, *this, size_t{}, size_t{}).serialized_size();
}
if (auto maybe_map_type = dynamic_cast<const map_type_impl*>(&type)) {
return generate_frozen_map_value(engine, *maybe_map_type, *this, size_t{}, size_t{}).serialized_size();
}
if (auto maybe_reversed_type = dynamic_cast<const reversed_type_impl*>(&type)) {
return min_size(*maybe_reversed_type->underlying_type());
}
throw std::runtime_error(fmt::format("Don't know how to calculate min size for unknown type {}", type.name()));
}
value_generator::atomic_value_generator value_generator::get_atomic_value_generator(const abstract_type& type) {
SCYLLA_ASSERT(!type.is_multi_cell());
auto it = _regular_value_generators.find(&type);
if (it != _regular_value_generators.end()) {
return it->second;
}
if (auto maybe_user_type = dynamic_cast<const user_type_impl*>(&type)) {
return [this, maybe_user_type] (std::mt19937& engine, size_t min_size_in_bytes, size_t max_size_in_bytes) {
return generate_frozen_user_value(engine, *maybe_user_type, *this, min_size_in_bytes, max_size_in_bytes);
};
}
if (auto maybe_tuple_type = dynamic_cast<const tuple_type_impl*>(&type)) {
return [this, maybe_tuple_type] (std::mt19937& engine, size_t min_size_in_bytes, size_t max_size_in_bytes) {
return generate_frozen_tuple_value(engine, *maybe_tuple_type, *this, min_size_in_bytes, max_size_in_bytes);
};
}
if (auto maybe_list_type = dynamic_cast<const list_type_impl*>(&type)) {
return [this, maybe_list_type] (std::mt19937& engine, size_t min_size_in_bytes, size_t max_size_in_bytes) {
return generate_frozen_list_value(engine, *maybe_list_type, *this, min_size_in_bytes, max_size_in_bytes);
};
}
if (auto maybe_set_type = dynamic_cast<const set_type_impl*>(&type)) {
return [this, maybe_set_type] (std::mt19937& engine, size_t min_size_in_bytes, size_t max_size_in_bytes) {
return generate_frozen_set_value(engine, *maybe_set_type, *this, min_size_in_bytes, max_size_in_bytes);
};
}
if (auto maybe_map_type = dynamic_cast<const map_type_impl*>(&type)) {
return [this, maybe_map_type] (std::mt19937& engine, size_t min_size_in_bytes, size_t max_size_in_bytes) {
return generate_frozen_map_value(engine, *maybe_map_type, *this, min_size_in_bytes, max_size_in_bytes);
};
}
if (auto maybe_reversed_type = dynamic_cast<const reversed_type_impl*>(&type)) {
return get_atomic_value_generator(*maybe_reversed_type->underlying_type());
}
throw std::runtime_error(fmt::format("Don't know how to generate value for unknown type {}", type.name()));
}
value_generator::generator value_generator::get_generator(const abstract_type& type) {
auto it = _regular_value_generators.find(&type);
if (it != _regular_value_generators.end()) {
return [gen = it->second] (std::mt19937& engine) -> data_model::mutation_description::value {
return gen(engine, 0, no_size_in_bytes_limit).serialize_nonnull();
};
}
if (auto maybe_user_type = dynamic_cast<const user_type_impl*>(&type)) {
if (maybe_user_type->is_multi_cell()) {
return [this, maybe_user_type] (std::mt19937& engine) -> data_model::mutation_description::value {
return generate_user_value(engine, *maybe_user_type, *this);
};
} else {
return [this, maybe_user_type] (std::mt19937& engine) -> data_model::mutation_description::value {
return generate_frozen_user_value(engine, *maybe_user_type, *this, 0, no_size_in_bytes_limit).serialize_nonnull();
};
}
}
if (auto maybe_tuple_type = dynamic_cast<const tuple_type_impl*>(&type)) {
return [this, maybe_tuple_type] (std::mt19937& engine) -> data_model::mutation_description::value {
return generate_frozen_tuple_value(engine, *maybe_tuple_type, *this, 0, no_size_in_bytes_limit).serialize_nonnull();
};
}
if (auto maybe_list_type = dynamic_cast<const list_type_impl*>(&type)) {
if (maybe_list_type->is_multi_cell()) {
return [this, maybe_list_type] (std::mt19937& engine) -> data_model::mutation_description::value {
return generate_list_value(engine, *maybe_list_type, *this);
};
} else {
return [this, maybe_list_type] (std::mt19937& engine) -> data_model::mutation_description::value {
return generate_frozen_list_value(engine, *maybe_list_type, *this, 0, no_size_in_bytes_limit).serialize_nonnull();
};
}
}
if (auto maybe_set_type = dynamic_cast<const set_type_impl*>(&type)) {
if (maybe_set_type->is_multi_cell()) {
return [this, maybe_set_type] (std::mt19937& engine) -> data_model::mutation_description::value {
return generate_set_value(engine, *maybe_set_type, *this);
};
} else {
return [this, maybe_set_type] (std::mt19937& engine) -> data_model::mutation_description::value {
return generate_frozen_set_value(engine, *maybe_set_type, *this, 0, no_size_in_bytes_limit).serialize_nonnull();
};
}
}
if (auto maybe_map_type = dynamic_cast<const map_type_impl*>(&type)) {
if (maybe_map_type->is_multi_cell()) {
return [this, maybe_map_type] (std::mt19937& engine) -> data_model::mutation_description::value {
return generate_map_value(engine, *maybe_map_type, *this);
};
} else {
return [this, maybe_map_type] (std::mt19937& engine) -> data_model::mutation_description::value {
return generate_frozen_map_value(engine, *maybe_map_type, *this, 0, no_size_in_bytes_limit).serialize_nonnull();
};
}
}
if (auto maybe_reversed_type = dynamic_cast<const reversed_type_impl*>(&type)) {
return get_generator(*maybe_reversed_type->underlying_type());
}
throw std::runtime_error(fmt::format("Don't know how to generate value for unknown type {}", type.name()));
}
data_model::mutation_description::value value_generator::generate_value(std::mt19937& engine, const abstract_type& type) {
return get_generator(type)(engine);
}
timestamp_generator default_timestamp_generator() {
return [] (std::mt19937& engine, timestamp_destination, api::timestamp_type min_timestamp) {
auto ts_dist = std::uniform_int_distribution<api::timestamp_type>(min_timestamp, api::max_timestamp);
return ts_dist(engine);
};
}
timestamp_generator uncompactible_timestamp_generator(uint32_t seed, api::timestamp_type min_timestamp) {
auto engine = std::mt19937(seed);
const auto rank = [] (timestamp_destination dest) -> api::timestamp_type {
switch (dest) {
case timestamp_destination::partition_tombstone:
return 0;
case timestamp_destination::range_tombstone:
return 1;
case timestamp_destination::row_tombstone:
return 2;
case timestamp_destination::collection_tombstone:
return 3;
case timestamp_destination::row_marker:
case timestamp_destination::cell_timestamp:
case timestamp_destination::collection_cell_timestamp:
return 4;
}
};
const auto max_rank = rank(timestamp_destination::collection_cell_timestamp);
const auto margin = 1000;
std::vector<api::timestamp_type> points;
points.push_back(min_timestamp);
for (api::timestamp_type i = 0; i < max_rank; ++i) {
const auto remaining_ranks = max_rank - i;
const auto point = std::uniform_int_distribution<api::timestamp_type>(points.back() + margin, api::max_timestamp - (remaining_ranks * margin))(engine);
points.push_back(point);
}
points.push_back(api::max_timestamp);
return [rank, points] (std::mt19937& engine, timestamp_destination destination, api::timestamp_type curr_min_ts) {
const auto r = rank(destination);
auto ts_dist = std::uniform_int_distribution<api::timestamp_type>(points.at(r), points.at(r + 1) - 1);
return ts_dist(engine);
};
}
expiry_generator no_expiry_expiry_generator() {
return [] (std::mt19937& engine, timestamp_destination destination) -> std::optional<expiry_info> {
return std::nullopt;
};
}
namespace {
schema_ptr build_random_schema(uint32_t seed, random_schema_specification& spec) {
auto engine = std::mt19937{seed};
auto builder = schema_builder(spec.keyspace_name(), spec.table_name(engine));
auto pk_columns = spec.partition_key_columns(engine);
SCYLLA_ASSERT(!pk_columns.empty()); // Let's not pull in boost::test here
for (size_t pk = 0; pk < pk_columns.size(); ++pk) {
builder.with_column(to_bytes(format("pk{}", pk)), std::move(pk_columns[pk]), column_kind::partition_key);
}
auto ck_columns = spec.clustering_key_columns(engine);
for (size_t ck = 0; ck < ck_columns.size(); ++ck) {
builder.with_column(to_bytes(format("ck{}", ck)), std::move(ck_columns[ck]), column_kind::clustering_key);
}
if (!ck_columns.empty()) {
auto static_columns = spec.static_columns(engine);
for (size_t s = 0; s < static_columns.size(); ++s) {
builder.with_column(to_bytes(format("s{}", s)), std::move(static_columns[s]), column_kind::static_column);
}
}
auto regular_columns = spec.regular_columns(engine);
for (size_t r = 0; r < regular_columns.size(); ++r) {
builder.with_column(to_bytes(format("v{}", r)), std::move(regular_columns[r]), column_kind::regular_column);
}
if (spec.compress() == random_schema_specification::compress_sstable::no) {
builder.set_compressor_params(compression_parameters::no_compression());
}
builder.with_tombstone_gc_options(tombstone_gc_options(tombstone_gc_mode::timeout));
return builder.build();
}
sstring udt_to_str(const user_type_impl& udt) {
auto udt_desc = udt.describe(cql3::with_create_statement::yes);
return udt_desc.create_statement.value().linearize();
}
struct udt_list {
std::vector<const user_type_impl*> vector;
void insert(const user_type_impl* udt) {
auto it = std::find(vector.begin(), vector.end(), udt);
if (it == vector.end()) {
vector.push_back(udt);
}
}
void merge(udt_list other) {
for (auto& udt : other.vector) {
insert(udt);
}
}
};
udt_list dump_udts(const std::vector<data_type>& types) {
udt_list udts;
for (const auto& dt : types) {
const auto* const type = dt.get();
if (auto maybe_user_type = dynamic_cast<const user_type_impl*>(type)) {
udts.merge(dump_udts(maybe_user_type->field_types()));
udts.insert(maybe_user_type);
} else if (auto maybe_tuple_type = dynamic_cast<const tuple_type_impl*>(type)) {
udts.merge(dump_udts(maybe_tuple_type->all_types()));
} else if (auto maybe_list_type = dynamic_cast<const list_type_impl*>(type)) {
udts.merge(dump_udts({maybe_list_type->get_elements_type()}));
} else if (auto maybe_set_type = dynamic_cast<const set_type_impl*>(type)) {
udts.merge(dump_udts({maybe_set_type->get_elements_type()}));
} else if (auto maybe_map_type = dynamic_cast<const map_type_impl*>(type)) {
udts.merge(dump_udts({maybe_map_type->get_keys_type(), maybe_map_type->get_values_type()}));
} else if (auto maybe_reversed_type = dynamic_cast<const reversed_type_impl*>(type)) {
udts.merge(dump_udts({maybe_reversed_type->underlying_type()}));
}
}
return udts;
}
std::vector<const user_type_impl*> dump_udts(const schema& schema) {
udt_list udts;
const auto cdefs_to_types = [] (const schema::const_iterator_range_type& cdefs) -> std::vector<data_type> {
return cdefs
| std::views::transform([] (const column_definition& cdef) { return cdef.type; })
| std::ranges::to<std::vector>();
};
udts.merge(dump_udts(cdefs_to_types(schema.partition_key_columns())));
udts.merge(dump_udts(cdefs_to_types(schema.clustering_key_columns())));
udts.merge(dump_udts(cdefs_to_types(schema.regular_columns())));
udts.merge(dump_udts(cdefs_to_types(schema.static_columns())));
return udts.vector;
}
std::vector<sstring> columns_specs(schema_ptr schema, column_kind kind) {
const auto count = schema->columns_count(kind);
if (!count) {
return {};
}
std::vector<sstring> col_specs;
for (column_count_type c = 0; c < count; ++c) {
const auto& cdef = schema->column_at(kind, c);
col_specs.emplace_back(format("{} {}{}", cdef.name_as_cql_string(), cdef.type->as_cql3_type().to_string(),
kind == column_kind::static_column ? " static" : ""));
}
return col_specs;
}
std::vector<sstring> column_names(schema_ptr schema, column_kind kind) {
const auto count = schema->columns_count(kind);
if (!count) {
return {};
}
std::vector<sstring> col_names;
for (column_count_type c = 0; c < count; ++c) {
const auto& cdef = schema->column_at(kind, c);
col_names.emplace_back(cdef.name_as_cql_string());
}
return col_names;
}
void decorate_with_timestamps(const schema& schema, std::mt19937& engine, timestamp_generator& ts_gen, expiry_generator exp_gen,
data_model::mutation_description::value& value) {
std::visit(
make_visitor(
[&] (data_model::mutation_description::atomic_value& v) {
v.timestamp = ts_gen(engine, timestamp_destination::cell_timestamp, api::min_timestamp);
if (auto expiry_opt = exp_gen(engine, timestamp_destination::cell_timestamp)) {
v.expiring = data_model::mutation_description::expiry_info{expiry_opt->ttl, expiry_opt->expiry_point};
}
},
[&] (data_model::mutation_description::collection& c) {
if (auto ts = ts_gen(engine, timestamp_destination::collection_tombstone, api::min_timestamp);
ts != api::missing_timestamp) {
if (ts == api::max_timestamp) {
// Caveat: leave some headroom for the cells
// having a timestamp larger than the
// tombstone's.
ts--;
}
auto expiry_opt = exp_gen(engine, timestamp_destination::collection_tombstone);
const auto deletion_time = expiry_opt ? expiry_opt->expiry_point : gc_clock::now();
c.tomb = tombstone(ts, deletion_time);
}
for (auto& [ key, value ] : c.elements) {
value.timestamp = ts_gen(engine, timestamp_destination::collection_cell_timestamp, c.tomb.timestamp);
SCYLLA_ASSERT(!c.tomb || value.timestamp > c.tomb.timestamp);
if (auto expiry_opt = exp_gen(engine, timestamp_destination::collection_cell_timestamp)) {
value.expiring = data_model::mutation_description::expiry_info{expiry_opt->ttl, expiry_opt->expiry_point};
}
}
}),
value);
}
} // anonymous namespace
data_model::mutation_description::key random_schema::make_key(uint32_t n, value_generator& gen, schema::const_iterator_range_type columns,
size_t max_size_in_bytes) {
std::mt19937 engine(n);
const size_t max_component_size = max_size_in_bytes / std::distance(columns.begin(), columns.end());
std::vector<bytes> key;
for (const auto& cdef : columns) {
key.emplace_back(gen.generate_atomic_value(engine, *cdef.type, max_component_size).serialize_nonnull());
}
return key;
}
data_model::mutation_description::key random_schema::make_partition_key(uint32_t n, value_generator& gen) const {
return make_key(n, gen, _schema->partition_key_columns(), std::numeric_limits<partition_key::compound::element_type::size_type>::max());
}
data_model::mutation_description::key random_schema::make_clustering_key(uint32_t n, value_generator& gen) const {
SCYLLA_ASSERT(_schema->clustering_key_size() > 0);
return make_key(n, gen, _schema->clustering_key_columns(), std::numeric_limits<clustering_key::compound::element_type::size_type>::max());
}
random_schema::random_schema(uint32_t seed, random_schema_specification& spec)
: _schema(build_random_schema(seed, spec)) {
}
sstring random_schema::cql() const {
auto udts = dump_udts(*_schema);
sstring udts_str;
if (!udts.empty()) {
udts_str = seastar::format("{}", fmt::join(udts |
std::views::transform([] (const user_type_impl* const udt) { return udt_to_str(*udt); }), "\n"));
}
std::vector<sstring> col_specs;
for (auto kind : {column_kind::partition_key, column_kind::clustering_key, column_kind::regular_column, column_kind::static_column}) {
auto cols = columns_specs(_schema, kind);
std::move(cols.begin(), cols.end(), std::back_inserter(col_specs));
}
std::string primary_key;
auto partition_column_names = column_names(_schema, column_kind::partition_key);
auto clustering_key_names = column_names(_schema, column_kind::clustering_key);
if (!clustering_key_names.empty()) {
primary_key = fmt::format("({}), {}", fmt::join(partition_column_names, ", "), fmt::join(clustering_key_names, ", "));
} else {
primary_key = fmt::format("{}", fmt::join(partition_column_names, ", "));
}
// FIXME include the clustering column orderings
return seastar::format(
"{}\nCREATE TABLE {}.{} (\n\t{}\n\tPRIMARY KEY ({}))",
udts_str,
_schema->ks_name(),
_schema->cf_name(),
fmt::join(col_specs, ",\n\t"),
primary_key);
}
data_model::mutation_description::key random_schema::make_pkey(uint32_t n) {
value_generator g;
return make_partition_key(n, g);
}
std::vector<data_model::mutation_description::key> random_schema::make_pkeys(size_t n) {
std::set<dht::decorated_key, dht::ring_position_less_comparator> keys{dht::ring_position_less_comparator{*_schema}};
value_generator val_gen;
uint32_t i{0};
while (keys.size() < n) {
keys.emplace(dht::decorate_key(*_schema, partition_key::from_exploded(make_partition_key(i, val_gen))));
++i;
}
return keys
| std::views::transform([] (const dht::decorated_key& dkey) { return dkey.key().explode(); })
| std::ranges::to<std::vector<data_model::mutation_description::key>>();
}
data_model::mutation_description::key random_schema::make_ckey(uint32_t n) {
value_generator g;
return make_clustering_key(n, g);
}
std::vector<data_model::mutation_description::key> random_schema::make_ckeys(size_t n) {
std::set<clustering_key, clustering_key::less_compare> keys{clustering_key::less_compare{*_schema}};
value_generator val_gen;
for (uint32_t i = 0; i < n; i++) {
keys.emplace(clustering_key::from_exploded(make_clustering_key(i, val_gen)));
}
return keys
| std::views::transform([] (const clustering_key& ckey) { return ckey.explode(); })
| std::ranges::to<std::vector<data_model::mutation_description::key>>();
}
data_model::mutation_description random_schema::new_mutation(data_model::mutation_description::key pkey) {
return data_model::mutation_description(std::move(pkey));
}
data_model::mutation_description random_schema::new_mutation(uint32_t n) {
return new_mutation(make_pkey(n));
}
void random_schema::set_partition_tombstone(std::mt19937& engine, data_model::mutation_description& md, timestamp_generator ts_gen,
expiry_generator exp_gen) {
if (const auto ts = ts_gen(engine, timestamp_destination::partition_tombstone, api::min_timestamp); ts != api::missing_timestamp) {
auto expiry_opt = exp_gen(engine, timestamp_destination::partition_tombstone);
const auto deletion_time = expiry_opt ? expiry_opt->expiry_point : gc_clock::now();
md.set_partition_tombstone(tombstone(ts, deletion_time));
}
}
void random_schema::add_row(std::mt19937& engine, data_model::mutation_description& md, data_model::mutation_description::key ckey,
timestamp_generator ts_gen, expiry_generator exp_gen) {
value_generator gen;
for (const auto& cdef : _schema->regular_columns()) {
auto value = gen.generate_value(engine, *cdef.type);
decorate_with_timestamps(*_schema, engine, ts_gen, exp_gen, value);
md.add_clustered_cell(ckey, cdef.name_as_text(), std::move(value));
}
if (auto ts = ts_gen(engine, timestamp_destination::row_marker, api::min_timestamp); ts != api::missing_timestamp) {
if (auto expiry_opt = exp_gen(engine, timestamp_destination::row_marker)) {
md.add_clustered_row_marker(ckey, tests::data_model::mutation_description::row_marker(ts, expiry_opt->ttl, expiry_opt->expiry_point));
} else {
md.add_clustered_row_marker(ckey, ts);
}
}
if (auto ts = ts_gen(engine, timestamp_destination::row_tombstone, api::min_timestamp); ts != api::missing_timestamp) {
auto expiry_opt = exp_gen(engine, timestamp_destination::row_tombstone);
const auto deletion_time = expiry_opt ? expiry_opt->expiry_point : gc_clock::now();
md.add_clustered_row_tombstone(ckey, row_tombstone{tombstone{ts, deletion_time}});
}
}
void random_schema::add_row(std::mt19937& engine, data_model::mutation_description& md, uint32_t n, timestamp_generator ts_gen,
expiry_generator exp_gen) {
add_row(engine, md, make_ckey(n), std::move(ts_gen), std::move(exp_gen));
}
void random_schema::add_static_row(std::mt19937& engine, data_model::mutation_description& md, timestamp_generator ts_gen, expiry_generator exp_gen) {
value_generator gen;
for (const auto& cdef : _schema->static_columns()) {
auto value = gen.generate_value(engine, *cdef.type);
decorate_with_timestamps(*_schema, engine, ts_gen, exp_gen, value);
md.add_static_cell(cdef.name_as_text(), std::move(value));
}
}
void random_schema::delete_range(
std::mt19937& engine,
data_model::mutation_description& md,
interval<data_model::mutation_description::key> range,
timestamp_generator ts_gen,
expiry_generator exp_gen) {
auto expiry_opt = exp_gen(engine, timestamp_destination::range_tombstone);
const auto deletion_time = expiry_opt ? expiry_opt->expiry_point : gc_clock::now();
md.add_range_tombstone(std::move(range), tombstone{ts_gen(engine, timestamp_destination::range_tombstone, api::min_timestamp), deletion_time});
}
future<> random_schema::create_with_cql(cql_test_env& env) {
return async([this, &env] {
const auto ks_name = _schema->ks_name();
const auto tbl_name = _schema->cf_name();
for (const auto& udt : dump_udts(*_schema)) {
env.execute_cql(udt_to_str(*udt)).get();
eventually_true([&] () mutable {
return env.db().map_reduce0([&] (replica::database& db) {
return db.user_types().get(ks_name).has_type(udt->get_name());
}, true, std::logical_and<bool>{}).get();
});
}
auto& db = env.local_db();
auto schema_desc = _schema->describe(
replica::make_schema_describe_helper(_schema, db.as_data_dictionary()),
cql3::describe_option::STMTS);
sstring create_statement = schema_desc.create_statement.value().linearize();
env.execute_cql(create_statement).get();
auto& tbl = db.find_column_family(ks_name, tbl_name);
_schema = tbl.schema();
});
}
future<utils::chunked_vector<mutation>> generate_random_mutations(
uint32_t seed,
tests::random_schema& random_schema,
timestamp_generator ts_gen,
expiry_generator exp_gen,
std::uniform_int_distribution<size_t> partition_count_dist,
std::uniform_int_distribution<size_t> clustering_row_count_dist,
std::uniform_int_distribution<size_t> range_tombstone_count_dist) {
auto engine = std::mt19937(seed);
const auto schema_has_clustering_columns = random_schema.schema()->clustering_key_size() > 0;
const auto partition_count = partition_count_dist(engine);
utils::chunked_vector<mutation> muts;
muts.reserve(partition_count);
for (size_t pk = 0; pk != partition_count; ++pk) {
auto mut = random_schema.new_mutation(pk);
random_schema.set_partition_tombstone(engine, mut, ts_gen, exp_gen);
random_schema.add_static_row(engine, mut, ts_gen, exp_gen);
if (!schema_has_clustering_columns) {
muts.emplace_back(mut.build(random_schema.schema()));
continue;
}
const auto clustering_row_count = clustering_row_count_dist(engine);
const auto range_tombstone_count = range_tombstone_count_dist(engine);
auto ckeys = random_schema.make_ckeys(std::max(clustering_row_count, range_tombstone_count));
for (uint32_t ck = 0; ck < ckeys.size(); ++ck) {
random_schema.add_row(engine, mut, ckeys[ck], ts_gen, exp_gen);
co_await coroutine::maybe_yield();
}
for (size_t i = 0; i < range_tombstone_count; ++i) {
const auto a = tests::random::get_int<size_t>(0, ckeys.size() - 1, engine);
const auto b = tests::random::get_int<size_t>(0, ckeys.size() - 1, engine);
random_schema.delete_range(
engine,
mut,
interval<tests::data_model::mutation_description::key>::make(ckeys.at(std::min(a, b)), ckeys.at(std::max(a, b))),
ts_gen,
exp_gen);
co_await coroutine::maybe_yield();
}
muts.emplace_back(mut.build(random_schema.schema()));
}
std::ranges::sort(muts, [s = random_schema.schema()] (const mutation& a, const mutation& b) {
return a.decorated_key().less_compare(*s, b.decorated_key());
});
auto range = boost::unique(muts, [s = random_schema.schema()] (const mutation& a, const mutation& b) {
return a.decorated_key().equal(*s, b.decorated_key());
});
while (range.end() != muts.end()) {
muts.pop_back();
}
co_return std::move(muts);
}
future<utils::chunked_vector<mutation>> generate_random_mutations(
tests::random_schema& random_schema,
timestamp_generator ts_gen,
expiry_generator exp_gen,
std::uniform_int_distribution<size_t> partition_count_dist,
std::uniform_int_distribution<size_t> clustering_row_count_dist,
std::uniform_int_distribution<size_t> range_tombstone_count_dist) {
return generate_random_mutations(tests::random::get_int<uint32_t>(), random_schema, std::move(ts_gen), std::move(exp_gen), partition_count_dist,
clustering_row_count_dist, range_tombstone_count_dist);
}
future<utils::chunked_vector<mutation>> generate_random_mutations(tests::random_schema& random_schema, size_t partition_count) {
return generate_random_mutations(
random_schema,
default_timestamp_generator(),
no_expiry_expiry_generator(),
std::uniform_int_distribution<size_t>(partition_count, partition_count));
}
} // namespace tests