Files
scylladb/test/lib/random_schema.cc
Michał Chojnowski 77dcb2bcda test/lib/random_schema: add some constraints for generated uuid and time/date values
I want to write a test which generates a random table (random schema,
random data) and uses the Python driver to query it.
But it turns out that some values generated by test/lib/random_schema
can't be deserialized by the Python driver.
For example, it doesn't unknown uuid versions, dates before year 1
of after year 9999, or `time` values greater or equal to the number
of nanoseconds in a day.

AFAIK those "driver-illegal" values aren't particularly interesting
for tests which use `random_schema`, so we can just not generate
them.
2025-09-07 00:32:02 +02:00

1262 lines
56 KiB
C++

/*
* Copyright (C) 2019-present ScyllaDB
*/
/*
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
*/
#include <algorithm>
#include <boost/range/algorithm/unique.hpp>
#include <seastar/coroutine/maybe_yield.hh>
#include "cql3/cql3_type.hh"
#include "cql3/description.hh"
#include "mutation/mutation.hh"
#include "schema/schema_builder.hh"
#include "test/lib/cql_test_env.hh"
#include "test/lib/eventually.hh"
#include "test/lib/random_schema.hh"
#include "test/lib/random_utils.hh"
#include "types/list.hh"
#include "types/map.hh"
#include "types/set.hh"
#include "types/tuple.hh"
#include "types/user.hh"
#include "utils/assert.hh"
#include "utils/big_decimal.hh"
#include "utils/UUID_gen.hh"
#include "replica/schema_describe_helper.hh"
namespace tests {
type_generator::type_generator(random_schema_specification& spec) : _spec(spec) {
struct simple_type_generator {
data_type type;
data_type operator()(std::mt19937&, is_multi_cell) { return type; }
};
_generators = {
simple_type_generator{byte_type},
simple_type_generator{short_type},
simple_type_generator{int32_type},
simple_type_generator{long_type},
simple_type_generator{ascii_type},
simple_type_generator{bytes_type},
simple_type_generator{utf8_type},
simple_type_generator{boolean_type},
simple_type_generator{date_type},
simple_type_generator{timeuuid_type},
simple_type_generator{timestamp_type},
simple_type_generator{simple_date_type},
simple_type_generator{time_type},
simple_type_generator{uuid_type},
simple_type_generator{inet_addr_type},
simple_type_generator{float_type},
simple_type_generator{double_type},
simple_type_generator{varint_type},
simple_type_generator{decimal_type},
simple_type_generator{duration_type}};
// tuple
_generators.emplace_back(
[this] (std::mt19937& engine, is_multi_cell) {
std::uniform_int_distribution<size_t> count_dist{2, 4};
const auto count = count_dist(engine);
std::vector<data_type> data_types;
for (size_t i = 0; i < count; ++i) {
data_types.emplace_back((*this)(engine, type_generator::is_multi_cell::no));
}
return tuple_type_impl::get_instance(std::move(data_types));
});
// user
_generators.emplace_back(
[this] (std::mt19937& engine, is_multi_cell multi_cell) mutable {
std::uniform_int_distribution<size_t> count_dist{2, 4};
const auto count = count_dist(engine);
std::vector<bytes> field_names;
std::vector<data_type> field_types;
for (size_t i = 0; i < count; ++i) {
field_names.emplace_back(to_bytes(format("f{}", i)));
field_types.emplace_back((*this)(engine, type_generator::is_multi_cell::no));
}
return user_type_impl::get_instance(_spec.keyspace_name(), to_bytes(_spec.udt_name(engine)), std::move(field_names),
std::move(field_types), bool(multi_cell));
});
// list
_generators.emplace_back(
[this] (std::mt19937& engine, is_multi_cell multi_cell) {
auto element_type = (*this)(engine, type_generator::is_multi_cell::no);
return list_type_impl::get_instance(std::move(element_type), bool(multi_cell));
});
// set
_generators.emplace_back(
[this] (std::mt19937& engine, is_multi_cell multi_cell) {
auto element_type = (*this)(engine, type_generator::is_multi_cell::no);
return set_type_impl::get_instance(std::move(element_type), bool(multi_cell));
});
// map
_generators.emplace_back(
[this] (std::mt19937& engine, is_multi_cell multi_cell) {
auto key_type = (*this)(engine, type_generator::is_multi_cell::no);
auto value_type = (*this)(engine, type_generator::is_multi_cell::no);
return map_type_impl::get_instance(std::move(key_type), std::move(value_type), bool(multi_cell));
});
}
data_type type_generator::operator()(std::mt19937& engine, is_multi_cell multi_cell) {
auto dist = std::uniform_int_distribution<size_t>(0, _generators.size() - 1);
auto type = _generators.at(dist(engine))(engine, multi_cell);
// duration type is not allowed in:
// * primary key components
// * as member types of collections
//
// To cover all this, we simply disallow it altogether when multi_cell is
// no, which will be the case in all the above cases.
//
// We also disallow boolean type in keys, due to the poor value distribution
// it provides. Generating keys which have a boolean in it, are prone to
// collision and will result in poor cardinality.
while (!multi_cell && (type == duration_type || type == boolean_type)) {
type = (*this)(engine, multi_cell);
}
return type;
}
namespace {
class default_random_schema_specification : public random_schema_specification {
std::unordered_set<unsigned> _used_table_ids;
std::unordered_set<unsigned> _used_udt_ids;
std::uniform_int_distribution<size_t> _partition_column_count_dist;
std::uniform_int_distribution<size_t> _clustering_column_count_dist;
std::uniform_int_distribution<size_t> _regular_column_count_dist;
std::uniform_int_distribution<size_t> _static_column_count_dist;
type_generator _type_generator;
compress_sstable _compress;
private:
static unsigned generate_unique_id(std::mt19937& engine, std::unordered_set<unsigned>& used_ids) {
std::uniform_int_distribution<unsigned> id_dist(0, 1024);
unsigned id;
do {
id = id_dist(engine);
} while (used_ids.contains(id));
used_ids.insert(id);
return id;
}
std::vector<data_type> generate_types(std::mt19937& engine, std::uniform_int_distribution<size_t>& count_dist,
type_generator::is_multi_cell multi_cell, bool allow_reversed = false) {
std::uniform_int_distribution<uint8_t> reversed_dist{0, uint8_t(allow_reversed)};
std::uniform_int_distribution<uint8_t> multi_cell_dist{0, uint8_t(bool(multi_cell))};
std::vector<data_type> types;
const auto count = count_dist(engine);
for (size_t c = 0; c < count; ++c) {
auto type = _type_generator(engine, type_generator::is_multi_cell(bool(multi_cell_dist(engine))));
if (reversed_dist(engine)) {
types.emplace_back(make_shared<reversed_type_impl>(std::move(type)));
} else {
types.emplace_back(std::move(type));
}
}
return types;
}
public:
default_random_schema_specification(
sstring keyspace_name,
std::uniform_int_distribution<size_t> partition_column_count_dist,
std::uniform_int_distribution<size_t> clustering_column_count_dist,
std::uniform_int_distribution<size_t> regular_column_count_dist,
std::uniform_int_distribution<size_t> static_column_count_dist,
compress_sstable compress)
: random_schema_specification(std::move(keyspace_name))
, _partition_column_count_dist(partition_column_count_dist)
, _clustering_column_count_dist(clustering_column_count_dist)
, _regular_column_count_dist(regular_column_count_dist)
, _static_column_count_dist(static_column_count_dist)
, _type_generator(*this)
, _compress(compress) {
SCYLLA_ASSERT(_partition_column_count_dist.a() > 0);
}
virtual sstring table_name(std::mt19937& engine) override {
return format("table{}", generate_unique_id(engine, _used_table_ids));
}
virtual sstring udt_name(std::mt19937& engine) override {
return format("udt{}", generate_unique_id(engine, _used_udt_ids));
}
virtual std::vector<data_type> partition_key_columns(std::mt19937& engine) override {
return generate_types(engine, _partition_column_count_dist, type_generator::is_multi_cell::no, false);
}
virtual std::vector<data_type> clustering_key_columns(std::mt19937& engine) override {
return generate_types(engine, _clustering_column_count_dist, type_generator::is_multi_cell::no, true);
}
virtual std::vector<data_type> regular_columns(std::mt19937& engine) override {
return generate_types(engine, _regular_column_count_dist, type_generator::is_multi_cell::yes, false);
}
virtual std::vector<data_type> static_columns(std::mt19937& engine) override {
return generate_types(engine, _static_column_count_dist, type_generator::is_multi_cell::yes, false);
}
virtual compress_sstable& compress() override {
return _compress;
}
};
} // anonymous namespace
std::unique_ptr<random_schema_specification> make_random_schema_specification(
sstring keyspace_name,
std::uniform_int_distribution<size_t> partition_column_count_dist,
std::uniform_int_distribution<size_t> clustering_column_count_dist,
std::uniform_int_distribution<size_t> regular_column_count_dist,
std::uniform_int_distribution<size_t> static_column_count_dist,
random_schema_specification::compress_sstable compress) {
return std::make_unique<default_random_schema_specification>(std::move(keyspace_name), partition_column_count_dist, clustering_column_count_dist,
regular_column_count_dist, static_column_count_dist, compress);
}
namespace {
utils::multiprecision_int generate_multiprecision_integer_value(std::mt19937& engine, size_t min_size_in_bytes, size_t max_size_in_bytes) {
using utils::multiprecision_int;
const auto max_bytes = std::min(size_t(16), std::max(size_t(2), max_size_in_bytes) - 1);
const auto generate_int = [] (std::mt19937& engine, size_t max_bytes) {
if (max_bytes == 8) {
return multiprecision_int(random::get_int<uint64_t>(engine));
} else { // max_bytes < 8
return multiprecision_int(random::get_int<uint64_t>(0, (uint64_t(1) << (max_bytes * 8)) - uint64_t(1), engine));
}
};
if (max_bytes <= 8) {
return generate_int(engine, max_bytes);
} else { // max_bytes > 8
auto ls = multiprecision_int(generate_int(engine, 8));
auto ms = multiprecision_int(generate_int(engine, max_bytes - 8));
return multiprecision_int(ls) + (multiprecision_int(ms) << 64);
}
}
template <typename String>
String generate_string_value(std::mt19937& engine, typename String::value_type min, typename String::value_type max,
size_t min_size_in_bytes, size_t max_size_in_bytes) {
auto size_dist = random::stepped_int_distribution<size_t>{{
{95.0, { 0, 31}},
{ 4.5, { 32, 99}},
{ 0.4, { 100, 999}},
{ 0.1, {1000, 9999}}}};
auto char_dist = std::uniform_int_distribution<typename String::value_type>(min, max);
const auto size = std::clamp(
size_dist(engine),
min_size_in_bytes / sizeof(typename String::value_type),
max_size_in_bytes / sizeof(typename String::value_type));
String str(size, '\0');
for (size_t i = 0; i < size; ++i) {
str[i] = char_dist(engine);
}
return str;
}
std::vector<data_value> generate_frozen_tuple_values(std::mt19937& engine, value_generator& val_gen, const std::vector<data_type>& member_types,
size_t min_size_in_bytes, size_t max_size_in_bytes) {
std::vector<data_value> values;
values.reserve(member_types.size());
const auto member_min_size_in_bytes = min_size_in_bytes / member_types.size();
const auto member_max_size_in_bytes = max_size_in_bytes / member_types.size();
for (auto member_type : member_types) {
values.push_back(val_gen.generate_atomic_value(engine, *member_type, member_min_size_in_bytes, member_max_size_in_bytes));
}
return values;
}
data_model::mutation_description::collection generate_user_value(std::mt19937& engine, const user_type_impl& type,
value_generator& val_gen) {
using md = data_model::mutation_description;
// Non-null fields.
auto fields_num = std::uniform_int_distribution<size_t>(1, type.size())(engine);
auto field_idxs = random::random_subset<unsigned>(type.size(), fields_num, engine);
std::sort(field_idxs.begin(), field_idxs.end());
md::collection collection;
for (auto i: field_idxs) {
collection.elements.push_back({serialize_field_index(i),
val_gen.generate_atomic_value(engine, *type.type(i), value_generator::no_size_in_bytes_limit).serialize_nonnull()});
}
return collection;
}
data_model::mutation_description::collection generate_collection(std::mt19937& engine, const abstract_type& key_type,
const abstract_type& value_type, value_generator& val_gen) {
using md = data_model::mutation_description;
auto key_generator = val_gen.get_atomic_value_generator(key_type);
auto value_generator = val_gen.get_atomic_value_generator(value_type);
auto size_dist = std::uniform_int_distribution<size_t>(0, 16);
const auto size = size_dist(engine);
std::map<bytes, md::atomic_value, serialized_compare> collection{key_type.as_less_comparator()};
for (size_t i = 0; i < size; ++i) {
collection.emplace(key_generator(engine, 0, value_generator::no_size_in_bytes_limit).serialize_nonnull(),
value_generator(engine, 0, value_generator::no_size_in_bytes_limit).serialize().value_or(""));
}
md::collection flat_collection;
flat_collection.elements.reserve(collection.size());
for (auto&& [key, value] : collection) {
flat_collection.elements.emplace_back(md::collection_element{key, value});
}
return flat_collection;
}
std::vector<data_value> generate_frozen_list(std::mt19937& engine, const abstract_type& value_type, value_generator& val_gen,
size_t min_size_in_bytes, size_t max_size_in_bytes) {
auto value_generator = val_gen.get_atomic_value_generator(value_type);
auto size_dist = std::uniform_int_distribution<size_t>(0, 4);
const auto size = std::min(size_dist(engine), max_size_in_bytes / std::max(val_gen.min_size(value_type), size_t(1)));
std::vector<data_value> collection;
if (!size) {
return collection;
}
const auto value_min_size_in_bytes = min_size_in_bytes / size;
const auto value_max_size_in_bytes = max_size_in_bytes / size;
for (size_t i = 0; i < size; ++i) {
collection.emplace_back(value_generator(engine, value_min_size_in_bytes, value_max_size_in_bytes));
}
return collection;
}
std::vector<data_value> generate_frozen_set(std::mt19937& engine, const abstract_type& key_type, value_generator& val_gen,
size_t min_size_in_bytes, size_t max_size_in_bytes) {
auto key_generator = val_gen.get_atomic_value_generator(key_type);
auto size_dist = std::uniform_int_distribution<size_t>(0, 4);
const auto size = std::min(size_dist(engine), max_size_in_bytes / std::max(val_gen.min_size(key_type), size_t(1)));
std::map<bytes, data_value, serialized_compare> collection{key_type.as_less_comparator()};
std::vector<data_value> flat_collection;
if (!size) {
return flat_collection;
}
const auto value_max_size_in_bytes = max_size_in_bytes / size;
const auto value_min_size_in_bytes = min_size_in_bytes / size;
for (size_t i = 0; i < size; ++i) {
auto val = key_generator(engine, value_min_size_in_bytes, value_max_size_in_bytes);
auto serialized_key = val.serialize_nonnull();
collection.emplace(std::move(serialized_key), std::move(val));
}
flat_collection.reserve(collection.size());
for (auto&& element : collection) {
flat_collection.emplace_back(std::move(element.second));
}
return flat_collection;
}
std::vector<std::pair<data_value, data_value>> generate_frozen_map(std::mt19937& engine, const abstract_type& key_type,
const abstract_type& value_type, value_generator& val_gen, size_t min_size_in_bytes, size_t max_size_in_bytes) {
auto key_generator = val_gen.get_atomic_value_generator(key_type);
auto value_generator = val_gen.get_atomic_value_generator(value_type);
auto size_dist = std::uniform_int_distribution<size_t>(0, 4);
const auto min_item_size_in_bytes = val_gen.min_size(key_type) + val_gen.min_size(value_type);
const auto size = std::min(size_dist(engine), max_size_in_bytes / std::max(min_item_size_in_bytes, size_t(1)));
std::map<bytes, std::pair<data_value, data_value>, serialized_compare> collection(key_type.as_less_comparator());
std::vector<std::pair<data_value, data_value>> flat_collection;
if (!size) {
return flat_collection;
}
const auto item_max_size_in_bytes = max_size_in_bytes / size;
const auto key_max_size_in_bytes = item_max_size_in_bytes / 2;
const auto value_max_size_in_bytes = item_max_size_in_bytes / 2;
const auto item_min_size_in_bytes = min_size_in_bytes / size;
const auto key_min_size_in_bytes = item_min_size_in_bytes / 2;
const auto value_min_size_in_bytes = item_min_size_in_bytes / 2;
for (size_t i = 0; i < size; ++i) {
auto key = key_generator(engine, key_min_size_in_bytes, key_max_size_in_bytes);
auto serialized_key = key.serialize_nonnull();
auto value = value_generator(engine, value_min_size_in_bytes, value_max_size_in_bytes);
collection.emplace(std::move(serialized_key), std::pair(std::move(key), std::move(value)));
}
flat_collection.reserve(collection.size());
for (auto&& element : collection) {
flat_collection.emplace_back(std::move(element.second));
}
return flat_collection;
}
data_value generate_empty_value(std::mt19937&, size_t, size_t) {
return data_value::make_null(empty_type);
}
data_value generate_byte_value(std::mt19937& engine, size_t, size_t) {
return data_value(random::get_int<int8_t>(engine));
}
data_value generate_short_value(std::mt19937& engine, size_t, size_t) {
return data_value(random::get_int<int16_t>(engine));
}
data_value generate_int32_value(std::mt19937& engine, size_t, size_t) {
return data_value(random::get_int<int32_t>(engine));
}
data_value generate_long_value(std::mt19937& engine, size_t, size_t) {
return data_value(random::get_int<int64_t>(engine));
}
data_value generate_ascii_value(std::mt19937& engine, size_t min_size_in_bytes, size_t max_size_in_bytes) {
return data_value(ascii_native_type{generate_string_value<sstring>(engine, 0, 127, min_size_in_bytes, max_size_in_bytes)});
}
data_value generate_bytes_value(std::mt19937& engine, size_t min_size_in_bytes, size_t max_size_in_bytes) {
return data_value(generate_string_value<bytes>(engine, std::numeric_limits<bytes::value_type>::min(),
std::numeric_limits<bytes::value_type>::max(), min_size_in_bytes, max_size_in_bytes));
}
data_value generate_utf8_value(std::mt19937& engine, size_t min_size_in_bytes, size_t max_size_in_bytes) {
auto wstr = generate_string_value<std::wstring>(engine, 0, 0x0FFF, min_size_in_bytes, max_size_in_bytes);
std::locale locale("en_US.utf8");
using codec = std::codecvt<wchar_t, char, std::mbstate_t>;
auto& f = std::use_facet<codec>(locale);
sstring utf8_str(wstr.size() * f.max_length(), '\0');
const wchar_t* from_next;
char* to_next;
std::mbstate_t mb{};
auto res = f.out(mb, &wstr[0], &wstr[wstr.size()], from_next, &utf8_str[0], &utf8_str[utf8_str.size()], to_next);
SCYLLA_ASSERT(res == codec::ok);
utf8_str.resize(to_next - &utf8_str[0]);
return data_value(std::move(utf8_str));
}
data_value generate_boolean_value(std::mt19937& engine, size_t, size_t) {
auto dist = std::uniform_int_distribution<int8_t>(0, 1);
return data_value(bool(dist(engine)));
}
data_value generate_date_value(std::mt19937& engine, size_t, size_t) {
using pt = db_clock::time_point;
// Python driver can't tolerate dates above year 9999.
constexpr auto max_day = std::chrono::sys_days(std::chrono::year{10000}/1/1);
constexpr auto max = std::chrono::sys_time<std::chrono::milliseconds>(max_day).time_since_epoch().count() - 1;
auto x = random::get_int<std::make_unsigned_t<db_clock::rep>>(0, max, engine);
return data_value(date_type_native_type{pt(pt::duration(x))});
}
data_value generate_timeuuid_value(std::mt19937&, size_t, size_t) {
// FIXME: respect the passed engine.
auto b = tests::random::get_bytes(16);
b[6] = (b[6] & 0x0F) | 0x10; // version 1
return timeuuid_type->deserialize(b);
}
data_value generate_timestamp_value(std::mt19937& engine, size_t, size_t) {
using pt = db_clock::time_point;
// Python driver can't tolerate dates above year 9999 or below year 1.
constexpr auto min_day = std::chrono::sys_days(std::chrono::year{1}/1/1);
constexpr auto max_day = std::chrono::sys_days(std::chrono::year{10000}/1/1);
constexpr auto min = std::chrono::sys_time<std::chrono::milliseconds>(min_day).time_since_epoch().count();
constexpr auto max = std::chrono::sys_time<std::chrono::milliseconds>(max_day).time_since_epoch().count() - 1;
auto x = random::get_int<pt::rep>(min, max, engine);
return data_value(pt(pt::duration(x)));
}
data_value generate_simple_date_value(std::mt19937& engine, size_t, size_t) {
return data_value(simple_date_native_type{random::get_int<simple_date_native_type::primary_type>(engine)});
}
data_value generate_time_value(std::mt19937& engine, size_t, size_t) {
// A legal `time` is smaller than the number of nanoseconds in a day.
auto max = std::chrono::nanoseconds(std::chrono::days(1)).count() - 1;
return data_value(time_native_type{random::get_int<time_native_type::primary_type>(0, max, engine)});
}
data_value generate_uuid_value(std::mt19937& engine, size_t, size_t) {
auto b = tests::random::get_bytes(16, engine);
b[6] = (b[6] & 0x0F) | 0x40; // version 4
return data_value(uuid_type->deserialize(b));
}
data_value generate_inet_addr_value(std::mt19937& engine, size_t, size_t) {
return data_value(net::ipv4_address(random::get_int<int32_t>(engine)));
}
data_value generate_float_value(std::mt19937& engine, size_t, size_t) {
return data_value(random::get_real<float>(engine));
}
data_value generate_double_value(std::mt19937& engine, size_t, size_t) {
return data_value(random::get_real<double>(engine));
}
data_value generate_varint_value(std::mt19937& engine, size_t min_size_in_bytes, size_t max_size_in_bytes) {
return data_value(generate_multiprecision_integer_value(engine, min_size_in_bytes, max_size_in_bytes));
}
data_value generate_decimal_value(std::mt19937& engine, size_t min_size_in_bytes, size_t max_size_in_bytes) {
auto scale_dist = std::uniform_int_distribution<int32_t>(-8, 8);
return data_value(big_decimal(scale_dist(engine), generate_multiprecision_integer_value(engine,
min_size_in_bytes - sizeof(int32_t), max_size_in_bytes - sizeof(int32_t))));
}
data_value generate_duration_value(std::mt19937& engine, size_t, size_t) {
auto months = months_counter(random::get_int<months_counter::value_type>(engine));
auto days = days_counter(random::get_int<days_counter::value_type>(0, 31, engine));
auto nanoseconds = nanoseconds_counter(random::get_int<nanoseconds_counter::value_type>(86400000000000, engine));
return data_value(cql_duration{months, days, nanoseconds});
}
data_value generate_frozen_tuple_value(std::mt19937& engine, const tuple_type_impl& type, value_generator& val_gen, size_t min_size_in_bytes, size_t max_size_in_bytes) {
SCYLLA_ASSERT(!type.is_multi_cell());
return make_tuple_value(type.shared_from_this(), generate_frozen_tuple_values(engine, val_gen, type.all_types(), min_size_in_bytes, max_size_in_bytes));
}
data_value generate_frozen_user_value(std::mt19937& engine, const user_type_impl& type, value_generator& val_gen, size_t min_size_in_bytes, size_t max_size_in_bytes) {
SCYLLA_ASSERT(!type.is_multi_cell());
return make_user_value(type.shared_from_this(), generate_frozen_tuple_values(engine, val_gen, type.all_types(), min_size_in_bytes, max_size_in_bytes));
}
data_model::mutation_description::collection generate_list_value(std::mt19937& engine, const list_type_impl& type, value_generator& val_gen) {
SCYLLA_ASSERT(type.is_multi_cell());
return generate_collection(engine, *type.name_comparator(), *type.value_comparator(), val_gen);
}
data_value generate_frozen_list_value(std::mt19937& engine, const list_type_impl& type, value_generator& val_gen, size_t min_size_in_bytes, size_t max_size_in_bytes) {
SCYLLA_ASSERT(!type.is_multi_cell());
return make_list_value(type.shared_from_this(),
generate_frozen_list(engine, *type.get_elements_type(), val_gen, min_size_in_bytes, max_size_in_bytes));
}
data_model::mutation_description::collection generate_set_value(std::mt19937& engine, const set_type_impl& type, value_generator& val_gen) {
SCYLLA_ASSERT(type.is_multi_cell());
return generate_collection(engine, *type.name_comparator(), *type.value_comparator(), val_gen);
}
data_value generate_frozen_set_value(std::mt19937& engine, const set_type_impl& type, value_generator& val_gen, size_t min_size_in_bytes, size_t max_size_in_bytes) {
SCYLLA_ASSERT(!type.is_multi_cell());
return make_set_value(type.shared_from_this(),
generate_frozen_set(engine, *type.get_elements_type(), val_gen, min_size_in_bytes, max_size_in_bytes));
}
data_model::mutation_description::collection generate_map_value(std::mt19937& engine, const map_type_impl& type, value_generator& val_gen) {
SCYLLA_ASSERT(type.is_multi_cell());
return generate_collection(engine, *type.name_comparator(), *type.value_comparator(), val_gen);
}
data_value generate_frozen_map_value(std::mt19937& engine, const map_type_impl& type, value_generator& val_gen, size_t min_size_in_bytes, size_t max_size_in_bytes) {
SCYLLA_ASSERT(!type.is_multi_cell());
return make_map_value(type.shared_from_this(),
generate_frozen_map(engine, *type.get_keys_type(), *type.get_values_type(), val_gen, min_size_in_bytes, max_size_in_bytes));
}
} // anonymous namespace
data_value value_generator::generate_atomic_value(std::mt19937& engine, const abstract_type& type, size_t max_size_in_bytes) {
return generate_atomic_value(engine, type, 0, max_size_in_bytes);
}
data_value value_generator::generate_atomic_value(std::mt19937& engine, const abstract_type& type, size_t min_size_in_bytes, size_t max_size_in_bytes) {
SCYLLA_ASSERT(!type.is_multi_cell());
return get_atomic_value_generator(type)(engine, min_size_in_bytes, max_size_in_bytes);
}
value_generator::value_generator()
: _regular_value_generators{
{empty_type.get(), &generate_empty_value},
{byte_type.get(), &generate_byte_value},
{short_type.get(), &generate_short_value},
{int32_type.get(), &generate_int32_value},
{long_type.get(), &generate_long_value},
{ascii_type.get(), &generate_ascii_value},
{bytes_type.get(), &generate_bytes_value},
{utf8_type.get(), &generate_utf8_value},
{boolean_type.get(), &generate_boolean_value},
{date_type.get(), &generate_date_value},
{timeuuid_type.get(), &generate_timeuuid_value},
{timestamp_type.get(), &generate_timestamp_value},
{simple_date_type.get(), &generate_simple_date_value},
{time_type.get(), &generate_time_value},
{uuid_type.get(), &generate_uuid_value},
{inet_addr_type.get(), &generate_inet_addr_value},
{float_type.get(), &generate_float_value},
{double_type.get(), &generate_double_value},
{varint_type.get(), &generate_varint_value},
{decimal_type.get(), &generate_decimal_value},
{duration_type.get(), &generate_duration_value}} {
std::mt19937 engine;
for (const auto& [regular_type, regular_value_gen] : _regular_value_generators) {
_regular_value_min_sizes.emplace(regular_type, regular_value_gen(engine, size_t{}, size_t{}).serialized_size());
}
}
size_t value_generator::min_size(const abstract_type& type) {
SCYLLA_ASSERT(!type.is_multi_cell());
auto it = _regular_value_min_sizes.find(&type);
if (it != _regular_value_min_sizes.end()) {
return it->second;
}
std::mt19937 engine;
if (auto maybe_user_type = dynamic_cast<const user_type_impl*>(&type)) {
return generate_frozen_user_value(engine, *maybe_user_type, *this, size_t{}, size_t{}).serialized_size();
}
if (auto maybe_tuple_type = dynamic_cast<const tuple_type_impl*>(&type)) {
return generate_frozen_tuple_value(engine, *maybe_tuple_type, *this, size_t{}, size_t{}).serialized_size();
}
if (auto maybe_list_type = dynamic_cast<const list_type_impl*>(&type)) {
return generate_frozen_list_value(engine, *maybe_list_type, *this, size_t{}, size_t{}).serialized_size();
}
if (auto maybe_set_type = dynamic_cast<const set_type_impl*>(&type)) {
return generate_frozen_set_value(engine, *maybe_set_type, *this, size_t{}, size_t{}).serialized_size();
}
if (auto maybe_map_type = dynamic_cast<const map_type_impl*>(&type)) {
return generate_frozen_map_value(engine, *maybe_map_type, *this, size_t{}, size_t{}).serialized_size();
}
if (auto maybe_reversed_type = dynamic_cast<const reversed_type_impl*>(&type)) {
return min_size(*maybe_reversed_type->underlying_type());
}
throw std::runtime_error(fmt::format("Don't know how to calculate min size for unknown type {}", type.name()));
}
value_generator::atomic_value_generator value_generator::get_atomic_value_generator(const abstract_type& type) {
SCYLLA_ASSERT(!type.is_multi_cell());
auto it = _regular_value_generators.find(&type);
if (it != _regular_value_generators.end()) {
return it->second;
}
if (auto maybe_user_type = dynamic_cast<const user_type_impl*>(&type)) {
return [this, maybe_user_type] (std::mt19937& engine, size_t min_size_in_bytes, size_t max_size_in_bytes) {
return generate_frozen_user_value(engine, *maybe_user_type, *this, min_size_in_bytes, max_size_in_bytes);
};
}
if (auto maybe_tuple_type = dynamic_cast<const tuple_type_impl*>(&type)) {
return [this, maybe_tuple_type] (std::mt19937& engine, size_t min_size_in_bytes, size_t max_size_in_bytes) {
return generate_frozen_tuple_value(engine, *maybe_tuple_type, *this, min_size_in_bytes, max_size_in_bytes);
};
}
if (auto maybe_list_type = dynamic_cast<const list_type_impl*>(&type)) {
return [this, maybe_list_type] (std::mt19937& engine, size_t min_size_in_bytes, size_t max_size_in_bytes) {
return generate_frozen_list_value(engine, *maybe_list_type, *this, min_size_in_bytes, max_size_in_bytes);
};
}
if (auto maybe_set_type = dynamic_cast<const set_type_impl*>(&type)) {
return [this, maybe_set_type] (std::mt19937& engine, size_t min_size_in_bytes, size_t max_size_in_bytes) {
return generate_frozen_set_value(engine, *maybe_set_type, *this, min_size_in_bytes, max_size_in_bytes);
};
}
if (auto maybe_map_type = dynamic_cast<const map_type_impl*>(&type)) {
return [this, maybe_map_type] (std::mt19937& engine, size_t min_size_in_bytes, size_t max_size_in_bytes) {
return generate_frozen_map_value(engine, *maybe_map_type, *this, min_size_in_bytes, max_size_in_bytes);
};
}
if (auto maybe_reversed_type = dynamic_cast<const reversed_type_impl*>(&type)) {
return get_atomic_value_generator(*maybe_reversed_type->underlying_type());
}
throw std::runtime_error(fmt::format("Don't know how to generate value for unknown type {}", type.name()));
}
value_generator::generator value_generator::get_generator(const abstract_type& type) {
auto it = _regular_value_generators.find(&type);
if (it != _regular_value_generators.end()) {
return [gen = it->second] (std::mt19937& engine) -> data_model::mutation_description::value {
return gen(engine, 0, no_size_in_bytes_limit).serialize_nonnull();
};
}
if (auto maybe_user_type = dynamic_cast<const user_type_impl*>(&type)) {
if (maybe_user_type->is_multi_cell()) {
return [this, maybe_user_type] (std::mt19937& engine) -> data_model::mutation_description::value {
return generate_user_value(engine, *maybe_user_type, *this);
};
} else {
return [this, maybe_user_type] (std::mt19937& engine) -> data_model::mutation_description::value {
return generate_frozen_user_value(engine, *maybe_user_type, *this, 0, no_size_in_bytes_limit).serialize_nonnull();
};
}
}
if (auto maybe_tuple_type = dynamic_cast<const tuple_type_impl*>(&type)) {
return [this, maybe_tuple_type] (std::mt19937& engine) -> data_model::mutation_description::value {
return generate_frozen_tuple_value(engine, *maybe_tuple_type, *this, 0, no_size_in_bytes_limit).serialize_nonnull();
};
}
if (auto maybe_list_type = dynamic_cast<const list_type_impl*>(&type)) {
if (maybe_list_type->is_multi_cell()) {
return [this, maybe_list_type] (std::mt19937& engine) -> data_model::mutation_description::value {
return generate_list_value(engine, *maybe_list_type, *this);
};
} else {
return [this, maybe_list_type] (std::mt19937& engine) -> data_model::mutation_description::value {
return generate_frozen_list_value(engine, *maybe_list_type, *this, 0, no_size_in_bytes_limit).serialize_nonnull();
};
}
}
if (auto maybe_set_type = dynamic_cast<const set_type_impl*>(&type)) {
if (maybe_set_type->is_multi_cell()) {
return [this, maybe_set_type] (std::mt19937& engine) -> data_model::mutation_description::value {
return generate_set_value(engine, *maybe_set_type, *this);
};
} else {
return [this, maybe_set_type] (std::mt19937& engine) -> data_model::mutation_description::value {
return generate_frozen_set_value(engine, *maybe_set_type, *this, 0, no_size_in_bytes_limit).serialize_nonnull();
};
}
}
if (auto maybe_map_type = dynamic_cast<const map_type_impl*>(&type)) {
if (maybe_map_type->is_multi_cell()) {
return [this, maybe_map_type] (std::mt19937& engine) -> data_model::mutation_description::value {
return generate_map_value(engine, *maybe_map_type, *this);
};
} else {
return [this, maybe_map_type] (std::mt19937& engine) -> data_model::mutation_description::value {
return generate_frozen_map_value(engine, *maybe_map_type, *this, 0, no_size_in_bytes_limit).serialize_nonnull();
};
}
}
if (auto maybe_reversed_type = dynamic_cast<const reversed_type_impl*>(&type)) {
return get_generator(*maybe_reversed_type->underlying_type());
}
throw std::runtime_error(fmt::format("Don't know how to generate value for unknown type {}", type.name()));
}
data_model::mutation_description::value value_generator::generate_value(std::mt19937& engine, const abstract_type& type) {
return get_generator(type)(engine);
}
timestamp_generator default_timestamp_generator() {
return [] (std::mt19937& engine, timestamp_destination, api::timestamp_type min_timestamp) {
auto ts_dist = std::uniform_int_distribution<api::timestamp_type>(min_timestamp, api::max_timestamp);
return ts_dist(engine);
};
}
timestamp_generator uncompactible_timestamp_generator(uint32_t seed, api::timestamp_type min_timestamp) {
auto engine = std::mt19937(seed);
const auto rank = [] (timestamp_destination dest) -> api::timestamp_type {
switch (dest) {
case timestamp_destination::partition_tombstone:
return 0;
case timestamp_destination::range_tombstone:
return 1;
case timestamp_destination::row_tombstone:
return 2;
case timestamp_destination::collection_tombstone:
return 3;
case timestamp_destination::row_marker:
case timestamp_destination::cell_timestamp:
case timestamp_destination::collection_cell_timestamp:
return 4;
}
};
const auto max_rank = rank(timestamp_destination::collection_cell_timestamp);
const auto margin = 1000;
std::vector<api::timestamp_type> points;
points.push_back(min_timestamp);
for (api::timestamp_type i = 0; i < max_rank; ++i) {
const auto remaining_ranks = max_rank - i;
const auto point = std::uniform_int_distribution<api::timestamp_type>(points.back() + margin, api::max_timestamp - (remaining_ranks * margin))(engine);
points.push_back(point);
}
points.push_back(api::max_timestamp);
return [rank, points] (std::mt19937& engine, timestamp_destination destination, api::timestamp_type curr_min_ts) {
const auto r = rank(destination);
auto ts_dist = std::uniform_int_distribution<api::timestamp_type>(points.at(r), points.at(r + 1) - 1);
return ts_dist(engine);
};
}
expiry_generator no_expiry_expiry_generator() {
return [] (std::mt19937& engine, timestamp_destination destination) -> std::optional<expiry_info> {
return std::nullopt;
};
}
namespace {
schema_ptr build_random_schema(uint32_t seed, random_schema_specification& spec) {
auto engine = std::mt19937{seed};
auto builder = schema_builder(spec.keyspace_name(), spec.table_name(engine));
auto pk_columns = spec.partition_key_columns(engine);
SCYLLA_ASSERT(!pk_columns.empty()); // Let's not pull in boost::test here
for (size_t pk = 0; pk < pk_columns.size(); ++pk) {
builder.with_column(to_bytes(format("pk{}", pk)), std::move(pk_columns[pk]), column_kind::partition_key);
}
auto ck_columns = spec.clustering_key_columns(engine);
for (size_t ck = 0; ck < ck_columns.size(); ++ck) {
builder.with_column(to_bytes(format("ck{}", ck)), std::move(ck_columns[ck]), column_kind::clustering_key);
}
if (!ck_columns.empty()) {
auto static_columns = spec.static_columns(engine);
for (size_t s = 0; s < static_columns.size(); ++s) {
builder.with_column(to_bytes(format("s{}", s)), std::move(static_columns[s]), column_kind::static_column);
}
}
auto regular_columns = spec.regular_columns(engine);
for (size_t r = 0; r < regular_columns.size(); ++r) {
builder.with_column(to_bytes(format("v{}", r)), std::move(regular_columns[r]), column_kind::regular_column);
}
if (spec.compress() == random_schema_specification::compress_sstable::no) {
builder.set_compressor_params(compression_parameters::no_compression());
}
return builder.build();
}
sstring udt_to_str(const user_type_impl& udt) {
auto udt_desc = udt.describe(cql3::with_create_statement::yes);
return udt_desc.create_statement.value().linearize();
}
struct udt_list {
std::vector<const user_type_impl*> vector;
void insert(const user_type_impl* udt) {
auto it = std::find(vector.begin(), vector.end(), udt);
if (it == vector.end()) {
vector.push_back(udt);
}
}
void merge(udt_list other) {
for (auto& udt : other.vector) {
insert(udt);
}
}
};
udt_list dump_udts(const std::vector<data_type>& types) {
udt_list udts;
for (const auto& dt : types) {
const auto* const type = dt.get();
if (auto maybe_user_type = dynamic_cast<const user_type_impl*>(type)) {
udts.merge(dump_udts(maybe_user_type->field_types()));
udts.insert(maybe_user_type);
} else if (auto maybe_tuple_type = dynamic_cast<const tuple_type_impl*>(type)) {
udts.merge(dump_udts(maybe_tuple_type->all_types()));
} else if (auto maybe_list_type = dynamic_cast<const list_type_impl*>(type)) {
udts.merge(dump_udts({maybe_list_type->get_elements_type()}));
} else if (auto maybe_set_type = dynamic_cast<const set_type_impl*>(type)) {
udts.merge(dump_udts({maybe_set_type->get_elements_type()}));
} else if (auto maybe_map_type = dynamic_cast<const map_type_impl*>(type)) {
udts.merge(dump_udts({maybe_map_type->get_keys_type(), maybe_map_type->get_values_type()}));
} else if (auto maybe_reversed_type = dynamic_cast<const reversed_type_impl*>(type)) {
udts.merge(dump_udts({maybe_reversed_type->underlying_type()}));
}
}
return udts;
}
std::vector<const user_type_impl*> dump_udts(const schema& schema) {
udt_list udts;
const auto cdefs_to_types = [] (const schema::const_iterator_range_type& cdefs) -> std::vector<data_type> {
return cdefs
| std::views::transform([] (const column_definition& cdef) { return cdef.type; })
| std::ranges::to<std::vector>();
};
udts.merge(dump_udts(cdefs_to_types(schema.partition_key_columns())));
udts.merge(dump_udts(cdefs_to_types(schema.clustering_key_columns())));
udts.merge(dump_udts(cdefs_to_types(schema.regular_columns())));
udts.merge(dump_udts(cdefs_to_types(schema.static_columns())));
return udts.vector;
}
std::vector<sstring> columns_specs(schema_ptr schema, column_kind kind) {
const auto count = schema->columns_count(kind);
if (!count) {
return {};
}
std::vector<sstring> col_specs;
for (column_count_type c = 0; c < count; ++c) {
const auto& cdef = schema->column_at(kind, c);
col_specs.emplace_back(format("{} {}{}", cdef.name_as_cql_string(), cdef.type->as_cql3_type().to_string(),
kind == column_kind::static_column ? " static" : ""));
}
return col_specs;
}
std::vector<sstring> column_names(schema_ptr schema, column_kind kind) {
const auto count = schema->columns_count(kind);
if (!count) {
return {};
}
std::vector<sstring> col_names;
for (column_count_type c = 0; c < count; ++c) {
const auto& cdef = schema->column_at(kind, c);
col_names.emplace_back(cdef.name_as_cql_string());
}
return col_names;
}
void decorate_with_timestamps(const schema& schema, std::mt19937& engine, timestamp_generator& ts_gen, expiry_generator exp_gen,
data_model::mutation_description::value& value) {
std::visit(
make_visitor(
[&] (data_model::mutation_description::atomic_value& v) {
v.timestamp = ts_gen(engine, timestamp_destination::cell_timestamp, api::min_timestamp);
if (auto expiry_opt = exp_gen(engine, timestamp_destination::cell_timestamp)) {
v.expiring = data_model::mutation_description::expiry_info{expiry_opt->ttl, expiry_opt->expiry_point};
}
},
[&] (data_model::mutation_description::collection& c) {
if (auto ts = ts_gen(engine, timestamp_destination::collection_tombstone, api::min_timestamp);
ts != api::missing_timestamp) {
if (ts == api::max_timestamp) {
// Caveat: leave some headroom for the cells
// having a timestamp larger than the
// tombstone's.
ts--;
}
auto expiry_opt = exp_gen(engine, timestamp_destination::collection_tombstone);
const auto deletion_time = expiry_opt ? expiry_opt->expiry_point : gc_clock::now();
c.tomb = tombstone(ts, deletion_time);
}
for (auto& [ key, value ] : c.elements) {
value.timestamp = ts_gen(engine, timestamp_destination::collection_cell_timestamp, c.tomb.timestamp);
SCYLLA_ASSERT(!c.tomb || value.timestamp > c.tomb.timestamp);
if (auto expiry_opt = exp_gen(engine, timestamp_destination::collection_cell_timestamp)) {
value.expiring = data_model::mutation_description::expiry_info{expiry_opt->ttl, expiry_opt->expiry_point};
}
}
}),
value);
}
} // anonymous namespace
data_model::mutation_description::key random_schema::make_key(uint32_t n, value_generator& gen, schema::const_iterator_range_type columns,
size_t max_size_in_bytes) {
std::mt19937 engine(n);
const size_t max_component_size = max_size_in_bytes / std::distance(columns.begin(), columns.end());
std::vector<bytes> key;
for (const auto& cdef : columns) {
key.emplace_back(gen.generate_atomic_value(engine, *cdef.type, max_component_size).serialize_nonnull());
}
return key;
}
data_model::mutation_description::key random_schema::make_partition_key(uint32_t n, value_generator& gen) const {
return make_key(n, gen, _schema->partition_key_columns(), std::numeric_limits<partition_key::compound::element_type::size_type>::max());
}
data_model::mutation_description::key random_schema::make_clustering_key(uint32_t n, value_generator& gen) const {
SCYLLA_ASSERT(_schema->clustering_key_size() > 0);
return make_key(n, gen, _schema->clustering_key_columns(), std::numeric_limits<clustering_key::compound::element_type::size_type>::max());
}
random_schema::random_schema(uint32_t seed, random_schema_specification& spec)
: _schema(build_random_schema(seed, spec)) {
}
sstring random_schema::cql() const {
auto udts = dump_udts(*_schema);
sstring udts_str;
if (!udts.empty()) {
udts_str = seastar::format("{}", fmt::join(udts |
std::views::transform([] (const user_type_impl* const udt) { return udt_to_str(*udt); }), "\n"));
}
std::vector<sstring> col_specs;
for (auto kind : {column_kind::partition_key, column_kind::clustering_key, column_kind::regular_column, column_kind::static_column}) {
auto cols = columns_specs(_schema, kind);
std::move(cols.begin(), cols.end(), std::back_inserter(col_specs));
}
std::string primary_key;
auto partition_column_names = column_names(_schema, column_kind::partition_key);
auto clustering_key_names = column_names(_schema, column_kind::clustering_key);
if (!clustering_key_names.empty()) {
primary_key = fmt::format("({}), {}", fmt::join(partition_column_names, ", "), fmt::join(clustering_key_names, ", "));
} else {
primary_key = fmt::format("{}", fmt::join(partition_column_names, ", "));
}
// FIXME include the clustering column orderings
return seastar::format(
"{}\nCREATE TABLE {}.{} (\n\t{}\n\tPRIMARY KEY ({}))",
udts_str,
_schema->ks_name(),
_schema->cf_name(),
fmt::join(col_specs, ",\n\t"),
primary_key);
}
data_model::mutation_description::key random_schema::make_pkey(uint32_t n) {
value_generator g;
return make_partition_key(n, g);
}
std::vector<data_model::mutation_description::key> random_schema::make_pkeys(size_t n) {
std::set<dht::decorated_key, dht::ring_position_less_comparator> keys{dht::ring_position_less_comparator{*_schema}};
value_generator val_gen;
uint32_t i{0};
while (keys.size() < n) {
keys.emplace(dht::decorate_key(*_schema, partition_key::from_exploded(make_partition_key(i, val_gen))));
++i;
}
return keys
| std::views::transform([] (const dht::decorated_key& dkey) { return dkey.key().explode(); })
| std::ranges::to<std::vector<data_model::mutation_description::key>>();
}
data_model::mutation_description::key random_schema::make_ckey(uint32_t n) {
value_generator g;
return make_clustering_key(n, g);
}
std::vector<data_model::mutation_description::key> random_schema::make_ckeys(size_t n) {
std::set<clustering_key, clustering_key::less_compare> keys{clustering_key::less_compare{*_schema}};
value_generator val_gen;
for (uint32_t i = 0; i < n; i++) {
keys.emplace(clustering_key::from_exploded(make_clustering_key(i, val_gen)));
}
return keys
| std::views::transform([] (const clustering_key& ckey) { return ckey.explode(); })
| std::ranges::to<std::vector<data_model::mutation_description::key>>();
}
data_model::mutation_description random_schema::new_mutation(data_model::mutation_description::key pkey) {
return data_model::mutation_description(std::move(pkey));
}
data_model::mutation_description random_schema::new_mutation(uint32_t n) {
return new_mutation(make_pkey(n));
}
void random_schema::set_partition_tombstone(std::mt19937& engine, data_model::mutation_description& md, timestamp_generator ts_gen,
expiry_generator exp_gen) {
if (const auto ts = ts_gen(engine, timestamp_destination::partition_tombstone, api::min_timestamp); ts != api::missing_timestamp) {
auto expiry_opt = exp_gen(engine, timestamp_destination::partition_tombstone);
const auto deletion_time = expiry_opt ? expiry_opt->expiry_point : gc_clock::now();
md.set_partition_tombstone(tombstone(ts, deletion_time));
}
}
void random_schema::add_row(std::mt19937& engine, data_model::mutation_description& md, data_model::mutation_description::key ckey,
timestamp_generator ts_gen, expiry_generator exp_gen) {
value_generator gen;
for (const auto& cdef : _schema->regular_columns()) {
auto value = gen.generate_value(engine, *cdef.type);
decorate_with_timestamps(*_schema, engine, ts_gen, exp_gen, value);
md.add_clustered_cell(ckey, cdef.name_as_text(), std::move(value));
}
if (auto ts = ts_gen(engine, timestamp_destination::row_marker, api::min_timestamp); ts != api::missing_timestamp) {
if (auto expiry_opt = exp_gen(engine, timestamp_destination::row_marker)) {
md.add_clustered_row_marker(ckey, tests::data_model::mutation_description::row_marker(ts, expiry_opt->ttl, expiry_opt->expiry_point));
} else {
md.add_clustered_row_marker(ckey, ts);
}
}
if (auto ts = ts_gen(engine, timestamp_destination::row_tombstone, api::min_timestamp); ts != api::missing_timestamp) {
auto expiry_opt = exp_gen(engine, timestamp_destination::row_tombstone);
const auto deletion_time = expiry_opt ? expiry_opt->expiry_point : gc_clock::now();
md.add_clustered_row_tombstone(ckey, row_tombstone{tombstone{ts, deletion_time}});
}
}
void random_schema::add_row(std::mt19937& engine, data_model::mutation_description& md, uint32_t n, timestamp_generator ts_gen,
expiry_generator exp_gen) {
add_row(engine, md, make_ckey(n), std::move(ts_gen), std::move(exp_gen));
}
void random_schema::add_static_row(std::mt19937& engine, data_model::mutation_description& md, timestamp_generator ts_gen, expiry_generator exp_gen) {
value_generator gen;
for (const auto& cdef : _schema->static_columns()) {
auto value = gen.generate_value(engine, *cdef.type);
decorate_with_timestamps(*_schema, engine, ts_gen, exp_gen, value);
md.add_static_cell(cdef.name_as_text(), std::move(value));
}
}
void random_schema::delete_range(
std::mt19937& engine,
data_model::mutation_description& md,
interval<data_model::mutation_description::key> range,
timestamp_generator ts_gen,
expiry_generator exp_gen) {
auto expiry_opt = exp_gen(engine, timestamp_destination::range_tombstone);
const auto deletion_time = expiry_opt ? expiry_opt->expiry_point : gc_clock::now();
md.add_range_tombstone(std::move(range), tombstone{ts_gen(engine, timestamp_destination::range_tombstone, api::min_timestamp), deletion_time});
}
future<> random_schema::create_with_cql(cql_test_env& env) {
return async([this, &env] {
const auto ks_name = _schema->ks_name();
const auto tbl_name = _schema->cf_name();
for (const auto& udt : dump_udts(*_schema)) {
env.execute_cql(udt_to_str(*udt)).get();
eventually_true([&] () mutable {
return env.db().map_reduce0([&] (replica::database& db) {
return db.user_types().get(ks_name).has_type(udt->get_name());
}, true, std::logical_and<bool>{}).get();
});
}
auto& db = env.local_db();
auto schema_desc = _schema->describe(
replica::make_schema_describe_helper(_schema, db.as_data_dictionary()),
cql3::describe_option::STMTS);
sstring create_statement = schema_desc.create_statement.value().linearize();
env.execute_cql(create_statement).get();
auto& tbl = db.find_column_family(ks_name, tbl_name);
_schema = tbl.schema();
});
}
future<utils::chunked_vector<mutation>> generate_random_mutations(
uint32_t seed,
tests::random_schema& random_schema,
timestamp_generator ts_gen,
expiry_generator exp_gen,
std::uniform_int_distribution<size_t> partition_count_dist,
std::uniform_int_distribution<size_t> clustering_row_count_dist,
std::uniform_int_distribution<size_t> range_tombstone_count_dist) {
auto engine = std::mt19937(seed);
const auto schema_has_clustering_columns = random_schema.schema()->clustering_key_size() > 0;
const auto partition_count = partition_count_dist(engine);
utils::chunked_vector<mutation> muts;
muts.reserve(partition_count);
for (size_t pk = 0; pk != partition_count; ++pk) {
auto mut = random_schema.new_mutation(pk);
random_schema.set_partition_tombstone(engine, mut, ts_gen, exp_gen);
random_schema.add_static_row(engine, mut, ts_gen, exp_gen);
if (!schema_has_clustering_columns) {
muts.emplace_back(mut.build(random_schema.schema()));
continue;
}
const auto clustering_row_count = clustering_row_count_dist(engine);
const auto range_tombstone_count = range_tombstone_count_dist(engine);
auto ckeys = random_schema.make_ckeys(std::max(clustering_row_count, range_tombstone_count));
for (uint32_t ck = 0; ck < ckeys.size(); ++ck) {
random_schema.add_row(engine, mut, ckeys[ck], ts_gen, exp_gen);
co_await coroutine::maybe_yield();
}
for (size_t i = 0; i < range_tombstone_count; ++i) {
const auto a = tests::random::get_int<size_t>(0, ckeys.size() - 1, engine);
const auto b = tests::random::get_int<size_t>(0, ckeys.size() - 1, engine);
random_schema.delete_range(
engine,
mut,
interval<tests::data_model::mutation_description::key>::make(ckeys.at(std::min(a, b)), ckeys.at(std::max(a, b))),
ts_gen,
exp_gen);
co_await coroutine::maybe_yield();
}
muts.emplace_back(mut.build(random_schema.schema()));
}
std::ranges::sort(muts, [s = random_schema.schema()] (const mutation& a, const mutation& b) {
return a.decorated_key().less_compare(*s, b.decorated_key());
});
auto range = boost::unique(muts, [s = random_schema.schema()] (const mutation& a, const mutation& b) {
return a.decorated_key().equal(*s, b.decorated_key());
});
while (range.end() != muts.end()) {
muts.pop_back();
}
co_return std::move(muts);
}
future<utils::chunked_vector<mutation>> generate_random_mutations(
tests::random_schema& random_schema,
timestamp_generator ts_gen,
expiry_generator exp_gen,
std::uniform_int_distribution<size_t> partition_count_dist,
std::uniform_int_distribution<size_t> clustering_row_count_dist,
std::uniform_int_distribution<size_t> range_tombstone_count_dist) {
return generate_random_mutations(tests::random::get_int<uint32_t>(), random_schema, std::move(ts_gen), std::move(exp_gen), partition_count_dist,
clustering_row_count_dist, range_tombstone_count_dist);
}
future<utils::chunked_vector<mutation>> generate_random_mutations(tests::random_schema& random_schema, size_t partition_count) {
return generate_random_mutations(
random_schema,
default_timestamp_generator(),
no_expiry_expiry_generator(),
std::uniform_int_distribution<size_t>(partition_count, partition_count));
}
} // namespace tests