tests/random_schema: generate_random_mutations(): allow customizing generated data

Allow callers to specify the number of partitions generated, as well as the number of clustering rows and range tombstones generated per partition.
2026-06-02 13:06:57 +00:00 · 2019-07-05 10:17:24 +03:00
parent d2930ffa53
commit cf135c6257
3 changed files with 44 additions and 20 deletions
--- a/tests/mutation_writer_test.cc
+++ b/tests/mutation_writer_test.cc
@@ -241,7 +241,20 @@ SEASTAR_THREAD_TEST_CASE(test_timestamp_based_splitting_mutation_writer) {

    tlog.info("Random schema:\n{}", random_schema.cql());

-    auto muts = tests::generate_random_mutations(random_schema);
+    auto ts_gen = [&, underlying = tests::default_timestamp_generator()] (std::mt19937& engine,
+            tests::timestamp_destination ts_dest, api::timestamp_type min_timestamp) -> api::timestamp_type {
+        if (ts_dest == tests::timestamp_destination::partition_tombstone ||
+                ts_dest == tests::timestamp_destination::row_marker ||
+                ts_dest == tests::timestamp_destination::row_tombstone ||
+                ts_dest == tests::timestamp_destination::collection_tombstone) {
+            if (tests::random::get_int<int>(0, 10, engine)) {
+                return api::missing_timestamp;
+            }
+        }
+        return underlying(engine, ts_dest, min_timestamp);
+    };
+
+    auto muts = tests::generate_random_mutations(random_schema, ts_gen);

    auto classify_fn = [] (api::timestamp_type ts) {
        return int64_t(ts % 2);
--- a/tests/random_schema.cc
+++ b/tests/random_schema.cc
@@ -1007,29 +1007,30 @@ void random_schema::delete_range(
    md.add_range_tombstone(std::move(range), tombstone{ts_gen(engine, timestamp_destination::range_tombstone, api::min_timestamp), {}});
 }

-std::vector<mutation> generate_random_mutations(tests::random_schema& random_schema) {
-    auto ts_gen = [&, underlying = tests::default_timestamp_generator()] (std::mt19937& engine,
-            tests::timestamp_destination ts_dest, api::timestamp_type min_timestamp) -> api::timestamp_type {
-        if (ts_dest == tests::timestamp_destination::partition_tombstone ||
-                ts_dest == tests::timestamp_destination::row_marker ||
-                ts_dest == tests::timestamp_destination::row_tombstone ||
-                ts_dest == tests::timestamp_destination::collection_tombstone) {
-            if (tests::random::get_int<int>(0, 10, engine)) {
-                return api::missing_timestamp;
-            }
-        }
-        return underlying(engine, ts_dest, min_timestamp);
-    };
-
+std::vector<mutation> generate_random_mutations(
+        tests::random_schema& random_schema,
+        timestamp_generator ts_gen,
+        std::uniform_int_distribution<size_t> partition_count_dist,
+        std::uniform_int_distribution<size_t> clustering_row_count_dist,
+        std::uniform_int_distribution<size_t> range_tombstone_count_dist) {
    auto engine = std::mt19937(tests::random::get_int<uint32_t>());
+    const auto schema_has_clustering_columns = random_schema.schema()->clustering_key_size() > 0;
+    const auto partition_count = partition_count_dist(engine);
    auto muts = std::vector<mutation>{};
-    auto ckeys = random_schema.make_ckeys(100);
-    for (uint32_t pk = 0; pk < 10; ++pk) {
+    for (uint32_t pk = 0; pk < partition_count; ++pk) {
        auto mut = random_schema.new_mutation(pk);
-        for (uint32_t ck = 0; ck < 100; ++ck) {
+        random_schema.add_static_row(engine, mut, ts_gen);
+
+        if (!schema_has_clustering_columns) {
+            muts.emplace_back(mut.build(random_schema.schema()));
+            continue;
+        }
+
+        const auto clustering_row_count = clustering_row_count_dist(engine);
+        auto ckeys = random_schema.make_ckeys(clustering_row_count);
+        for (uint32_t ck = 0; ck < clustering_row_count; ++ck) {
            random_schema.add_row(engine, mut, ckeys[ck], ts_gen);
        }
-        random_schema.add_static_row(engine, mut, ts_gen);

        for (size_t i = 0; i < 4; ++i) {
            const auto a = tests::random::get_int<size_t>(0, ckeys.size() - 1, engine);
--- a/tests/random_schema.hh
+++ b/tests/random_schema.hh
@@ -220,6 +220,16 @@ public:
            timestamp_generator ts_gen = default_timestamp_generator());
 };

-std::vector<mutation> generate_random_mutations(tests::random_schema& random_schema);
+/// Generate random mutations using the random schema.
+///
+/// `clustering_row_count_dist` and `range_tombstone_count_dist` will be used to
+/// generate the respective counts for *each* partition. These params are
+/// ignored if the schema has no clustering columns.
+std::vector<mutation> generate_random_mutations(
+        tests::random_schema& random_schema,
+        timestamp_generator ts_gen = default_timestamp_generator(),
+        std::uniform_int_distribution<size_t> partition_count_dist = std::uniform_int_distribution<size_t>(8, 16),
+        std::uniform_int_distribution<size_t> clustering_row_count_dist = std::uniform_int_distribution<size_t>(16, 128),
+        std::uniform_int_distribution<size_t> range_tombstone_count_dist = std::uniform_int_distribution<size_t>(4, 16));

 } // namespace tests