Files
scylladb/test/lib/random_schema.hh
Avi Kivity 605bf6e221 range.hh: retire
range.hh was deprecated in bd794629f9 (2020) since its names
conflict with the C++ library concept of an iterator range. The name
::range also mapped to the dangerous wrapping_interval rather than
nonwrapping_interval.

Complete the deprecation by removing range.hh and replacing all the
aliases by the names they point to from the interval library. Note
this now exposes uses of wrapping intervals as they are now explicit.

The unit tests are renamed and range.hh is deleted.

Closes scylladb/scylladb#17428
2024-02-21 00:24:25 +02:00

270 lines
12 KiB
C++

/*
* Copyright (C) 2019-present ScyllaDB
*/
/*
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
#pragma once
#include "schema/schema.hh"
#include "test/lib/data_model.hh"
///
/// Random schema and random data generation related utilities.
///
class cql_test_env;
namespace tests {
class random_schema_specification {
sstring _keyspace_name;
public:
explicit random_schema_specification(sstring keyspace_name) : _keyspace_name(std::move(keyspace_name)) { }
virtual ~random_schema_specification() = default;
// Should be the same for all invocations
const sstring& keyspace_name() const { return _keyspace_name; }
// Should be unique on the instance level.
virtual sstring table_name(std::mt19937& engine) = 0;
// Should be unique on the instance level.
virtual sstring udt_name(std::mt19937& engine) = 0;
virtual std::vector<data_type> partition_key_columns(std::mt19937& engine) = 0;
virtual std::vector<data_type> clustering_key_columns(std::mt19937& engine) = 0;
virtual std::vector<data_type> regular_columns(std::mt19937& engine) = 0;
virtual std::vector<data_type> static_columns(std::mt19937& engine) = 0;
};
/// Helper class that can generate a subset of all valid combination of types.
///
/// Can be used to implement concrete random schema specifications.
/// TODO: counters
class type_generator {
public:
using is_multi_cell = bool_class<class is_multi_cell_tag>;
private:
using generator = std::function<data_type(std::mt19937&, is_multi_cell)>;
private:
random_schema_specification& _spec;
std::vector<generator> _generators;
public:
explicit type_generator(random_schema_specification& spec);
// This is captured.
type_generator(type_generator&&) = delete;
data_type operator()(std::mt19937& engine, is_multi_cell multi_cell);
};
/// The default random schema specification.
///
/// Warning: reusing the same keyspace_name across specs can lead to user
/// defined type clashes.
std::unique_ptr<random_schema_specification> make_random_schema_specification(
sstring keyspace_name,
std::uniform_int_distribution<size_t> partition_column_count_dist = std::uniform_int_distribution<size_t>(1, 4),
std::uniform_int_distribution<size_t> clustering_column_count_dist = std::uniform_int_distribution<size_t>(0, 4),
std::uniform_int_distribution<size_t> regular_column_count_dist = std::uniform_int_distribution<size_t>(1, 4),
std::uniform_int_distribution<size_t> static_column_count_dist = std::uniform_int_distribution<size_t>(0, 4));
/// Generate values for any type.
///
/// Values sizes:
/// * string types (ascii, utf8, bytes):
/// - 95.0% [ 0, 32) characters.
/// - 4.5% [ 32, 100) characters.
/// - 0.4% [ 100, 1000) characters.
/// - 0.1% [1000, 10000) characters.
/// * collections: max 16 elements.
/// * frozen collections: max 4 elements.
/// For native types, the intent is to cover the entire value range.
/// TODO: counters
class value_generator {
public:
using atomic_value_generator = std::function<data_value(std::mt19937&, size_t, size_t)>;
using generator = std::function<data_model::mutation_description::value(std::mt19937&)>;
static const size_t no_size_in_bytes_limit{std::numeric_limits<size_t>::max()};
private:
std::unordered_map<const abstract_type*, atomic_value_generator> _regular_value_generators;
std::unordered_map<const abstract_type*, size_t> _regular_value_min_sizes;
public:
value_generator();
value_generator(value_generator&&) = delete;
/// Only for atomic types.
size_t min_size(const abstract_type& type);
atomic_value_generator get_atomic_value_generator(const abstract_type& type);
// Generate a value for the given type, according to the provided size constraints.
// Controlling the size of values only really works with string-like types and collections of these.
data_value generate_atomic_value(std::mt19937& engine, const abstract_type& type, size_t max_size_in_bytes = no_size_in_bytes_limit);
data_value generate_atomic_value(std::mt19937& engine, const abstract_type& type, size_t min_size_in_bytes, size_t max_size_in_bytes);
generator get_generator(const abstract_type& type);
data_model::mutation_description::value generate_value(std::mt19937& engine, const abstract_type& type);
};
enum class timestamp_destination {
partition_tombstone,
row_marker,
cell_timestamp,
collection_cell_timestamp,
row_tombstone,
collection_tombstone,
range_tombstone,
};
/// Functor that generates timestamps for various destinations.
using timestamp_generator = std::function<api::timestamp_type(std::mt19937& engine, timestamp_destination destination,
api::timestamp_type min_timestamp)>;
/// The default timestamp generator.
///
/// Generates fully random timestamps in the range:
/// [api::min_timestamp, api::max_timestamp]
/// Ignores timestamp destination.
timestamp_generator default_timestamp_generator();
struct expiry_info {
gc_clock::duration ttl;
gc_clock::time_point expiry_point;
};
/// Functor that generates expiry for various destinations.
/// A disengaged optional means the cell doesn't expire. When the destination is
/// a tombstone, gc_clock::now() + schema::gc_grace_seconds() will be used as
/// the expiry instead.
/// The `expiry_info::ttl` is always ignored for tombstone destinations (because
/// they have a fixed ttl as determined by `schema::gc_grace_seconds()`.
using expiry_generator = std::function<std::optional<expiry_info>(std::mt19937& engine, timestamp_destination destination)>;
/// Always returns disengaged optionals.
expiry_generator no_expiry_expiry_generator();
/// Utility class wrapping a randomly generated schema.
///
/// The schema is generated when the class is constructed.
/// The generation is deterministic, the same seed will generate the same schema.
class random_schema {
schema_ptr _schema;
private:
static data_model::mutation_description::key make_key(uint32_t n, value_generator& gen, schema::const_iterator_range_type columns,
size_t max_size_in_bytes);
data_model::mutation_description::key make_partition_key(uint32_t n, value_generator& gen) const;
data_model::mutation_description::key make_clustering_key(uint32_t n, value_generator& gen) const;
public:
/// Create a random schema.
///
/// Passing the same seed and spec will yield the same schema. Part of this
/// guarantee rests on the spec, which, if a custom one is used, should
/// make sure to honor this guarantee.
random_schema(uint32_t seed, random_schema_specification& spec);
schema_ptr schema() const {
return _schema;
}
sstring cql() const;
/// Create the generated schema as a table via CQL.
///
/// Along with all its dependencies, like UDTs.
/// The underlying schema_ptr instance is replaced with the one from the
/// local table instance.
future<> create_with_cql(cql_test_env& env);
/// Make a partition key which is n-th in some arbitrary sequence of keys.
///
/// There is no particular order for the keys, they're not in ring order.
/// This method is deterministic, the pair of the seed used to generate the
/// schema and `n` will map to the same generated value.
data_model::mutation_description::key make_pkey(uint32_t n);
/// Make n partition keys.
///
/// Keys are in ring order.
/// This method is deterministic, the pair of the seed used to generate the
/// schema and `n` will map to the same generated values.
std::vector<data_model::mutation_description::key> make_pkeys(size_t n);
/// Make a clustering key which is n-th in some arbitrary sequence of keys.
///
/// There is no particular order for the keys, they're not in clustering order.
/// This method is deterministic, the pair of the seed used to generate the
/// schema and `n` will map to the same generated value.
data_model::mutation_description::key make_ckey(uint32_t n);
/// Make up to n clustering keys.
///
/// Key are in clustering order.
/// This method is deterministic, the pair of the seed used to generate the
/// schema and `n` will map to the same generated values.
/// Fewer than n keys may be returned if the schema limits the clustering keys space.
std::vector<data_model::mutation_description::key> make_ckeys(size_t n);
data_model::mutation_description new_mutation(data_model::mutation_description::key pkey);
/// Make a new mutation with a key produced via `make_pkey(n)`.
data_model::mutation_description new_mutation(uint32_t n);
/// Set the partition tombstone
void set_partition_tombstone(std::mt19937& engine, data_model::mutation_description& md,
timestamp_generator ts_gen = default_timestamp_generator(),
expiry_generator exp_gen = no_expiry_expiry_generator());
void add_row(std::mt19937& engine, data_model::mutation_description& md, data_model::mutation_description::key ckey,
timestamp_generator ts_gen = default_timestamp_generator(),
expiry_generator exp_gen = no_expiry_expiry_generator());
/// Add a new row with a key produced via `make_ckey(n)`.
void add_row(std::mt19937& engine, data_model::mutation_description& md, uint32_t n, timestamp_generator ts_gen = default_timestamp_generator(),
expiry_generator exp_gen = no_expiry_expiry_generator());
void add_static_row(std::mt19937& engine, data_model::mutation_description& md, timestamp_generator ts_gen = default_timestamp_generator(),
expiry_generator exp_gen = no_expiry_expiry_generator());
void delete_range(
std::mt19937& engine,
data_model::mutation_description& md,
nonwrapping_interval<data_model::mutation_description::key> range,
timestamp_generator ts_gen = default_timestamp_generator(),
expiry_generator exp_gen = no_expiry_expiry_generator());
};
/// Generate random mutations using the random schema.
///
/// `clustering_row_count_dist` and `range_tombstone_count_dist` will be used to
/// generate the respective counts for *each* partition. These params are
/// ignored if the schema has no clustering columns.
/// Mutations are returned in ring order. Does not contain duplicate partitions.
/// Futurized to avoid stalls.
future<std::vector<mutation>> generate_random_mutations(
uint32_t seed,
tests::random_schema& random_schema,
timestamp_generator ts_gen = default_timestamp_generator(),
expiry_generator exp_gen = no_expiry_expiry_generator(),
std::uniform_int_distribution<size_t> partition_count_dist = std::uniform_int_distribution<size_t>(8, 16),
std::uniform_int_distribution<size_t> clustering_row_count_dist = std::uniform_int_distribution<size_t>(16, 128),
std::uniform_int_distribution<size_t> range_tombstone_count_dist = std::uniform_int_distribution<size_t>(4, 16));
future<std::vector<mutation>> generate_random_mutations(
tests::random_schema& random_schema,
timestamp_generator ts_gen = default_timestamp_generator(),
expiry_generator exp_gen = no_expiry_expiry_generator(),
std::uniform_int_distribution<size_t> partition_count_dist = std::uniform_int_distribution<size_t>(8, 16),
std::uniform_int_distribution<size_t> clustering_row_count_dist = std::uniform_int_distribution<size_t>(16, 128),
std::uniform_int_distribution<size_t> range_tombstone_count_dist = std::uniform_int_distribution<size_t>(4, 16));
/// Generate exactly partition_count partitions. See the more general overload above.
future<std::vector<mutation>> generate_random_mutations(tests::random_schema& random_schema, size_t partition_count);
} // namespace tests