/* * Copyright (C) 2024-present ScyllaDB */ /* * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0 */ #include "test/lib/scylla_test_case.hh" #include #include #include #include "bytes_ostream.hh" #include "test/lib/log.hh" #include "test/lib/random_utils.hh" #include "test/lib/sstable_test_env.hh" #include "types/types.hh" #include "types/comparable_bytes.hh" #include "utils/big_decimal.hh" #include "utils/multiprecision_int.hh" #include "utils/UUID.hh" #include "utils/UUID_gen.hh" BOOST_AUTO_TEST_CASE(test_comparable_bytes_opt) { BOOST_REQUIRE(comparable_bytes::from_data_value(data_value::make_null(int32_type)) == comparable_bytes_opt()); BOOST_REQUIRE(comparable_bytes::from_serialized_bytes(*int32_type, managed_bytes_opt()) == comparable_bytes_opt()); } BOOST_AUTO_TEST_CASE(test_bool) { auto test_bool_value = [] (comparable_bytes_opt& comparable_bytes, bool value) { BOOST_REQUIRE_EQUAL(comparable_bytes->size(), 1); BOOST_REQUIRE_MESSAGE(comparable_bytes->as_managed_bytes_view().front() == uint8_t(value), fmt::format("comparable bytes encode failed for bool value : {}", value)); BOOST_REQUIRE_MESSAGE(value == comparable_bytes->to_data_value(boolean_type), fmt::format("comparable bytes decode failed for bool value : {}", value)); }; auto cb_false = comparable_bytes::from_data_value(false); test_bool_value(cb_false, false); auto cb_true = comparable_bytes::from_data_value(true); test_bool_value(cb_true, true); // Verify order BOOST_REQUIRE(cb_false < cb_true); } void byte_comparable_test(std::vector&& test_data) { struct test_item { managed_bytes serialized_bytes; comparable_bytes comparable_bytes; }; std::vector test_items; // test encode/decode const auto test_data_type = test_data.at(0).type(); testlog.info("testing type '{}' with {} items...", test_data_type.get()->cql3_type_name(), test_data.size()); testlog.trace("test data : {}", test_data); for (const data_value& value : test_data) { // verify comparable bytes encode/decode auto original_serialized_bytes = managed_bytes(value.serialize_nonnull()); comparable_bytes comparable_bytes(*test_data_type, original_serialized_bytes); auto decoded_serialized_bytes = comparable_bytes.to_serialized_bytes(*test_data_type).value(); if (test_data_type == decimal_type) { // The `decimal_type` requires special handling because its comparable byte representation // normalizes the scale and unscaled value. This means the serialized bytes after // decoding from comparable bytes might not be identical to the original serialized bytes, // despite them representing the same decimal value. // For instance, 2e-1 (scale=1, unscaled_value=2) and 20e-2 (scale=2, unscaled_value=20) // are equivalent decimals but have different serialized forms. Comparable byte encoding // will normalize them. So, instead of directly comparing serialized bytes, compare the // deserialized decoded value against the original decimal value. auto decoded_value = decimal_type->deserialize_value(managed_bytes_view(decoded_serialized_bytes)); BOOST_REQUIRE_MESSAGE(value == decoded_value, seastar::value_of([&] () { return fmt::format("comparable bytes encode/decode failed for value : {}", value); })); } else { // Compare the serialized bytes directly BOOST_REQUIRE_MESSAGE(original_serialized_bytes == decoded_serialized_bytes, seastar::value_of([&] () { return fmt::format("comparable bytes encode/decode failed for value : {}", value); })); } // collect the data in a vector to verify ordering later test_items.emplace_back(original_serialized_bytes, comparable_bytes); }; // Verify that decoding succeeds even when the comparable bytes contain // extra data appended after the value to be converted. // This required for decode to work on composite types. bytes_ostream bos; // Select an item from the middle to test this case as front and back items // are often edge cases (e.g. min/max values). const auto item_id = test_items.size() / 2; auto test_value = test_items.at(item_id); auto cb_view = test_value.comparable_bytes.as_managed_bytes_view(); bos.write(cb_view); bos.write(bytes("this-still-should-work")); auto cb = comparable_bytes(std::move(bos).to_managed_bytes()); auto decoded_value = cb.to_data_value(test_data_type); BOOST_REQUIRE_MESSAGE(test_data.at(item_id) == decoded_value, seastar::value_of([&] () { return fmt::format("comparable bytes decode failed with appended bytes; expected : {}; actual : {}", test_data.at(0), decoded_value); })); // Sort the items based on comparable bytes std::ranges::sort(test_items, [] (const test_item& a, const test_item& b) { return a.comparable_bytes < b.comparable_bytes; }); // Verify that ordering them based on comparable bytes, sorts the values as expected BOOST_REQUIRE_MESSAGE(std::ranges::is_sorted(test_items, [&test_data_type] (const test_item& a, const test_item& b) { return test_data_type->compare(a.serialized_bytes, b.serialized_bytes) == std::strong_ordering::less; }), "sorting items based on comparable bytes failed"); } template static std::vector generate_integer_test_data( // Function to create a data_value from the underlying integer type. std::function create_data_value_func = {}, // Function to filter out values that should not be included in the test data. std::function filter_func = {}) { if (!create_data_value_func) { if constexpr (std::is_signed_v) { // If a custom create_data_value_fn is not provided, create data_value // directly from the underlying integer type. create_data_value_func = [](int_type num) { return data_value(num); }; } else { // For unsigned integer types, the caller must provide a custom create_data_value_fn, // as the data_value class doesn't have an unambiguous constructor for unsigned values. SCYLLA_ASSERT(false); } } std::vector test_data; auto push_to_test_data = [&] (int_type num) { for (int_type n : std::initializer_list{num, ~num}) { if (!filter_func || filter_func(n)) { test_data.push_back(create_data_value_func(n)); } } }; // Generates test values by shifting bit(1) through all possible positions and then deriving // multiple test cases from each value. This helps test edge cases and boundary conditions // by covering values with different bit patterns across the entire range of the type. auto num = int_type(1); auto num_bits = sizeof(int_type) * 8; test_data.reserve(num_bits * 4); while (num_bits-- > 0) { // for every num, we push [num, ~num, num - 1, ~(num - 1)] to the test data. push_to_test_data(num); if (num != std::numeric_limits::min()) { push_to_test_data(num - 1); } num <<= 1; } return test_data; } BOOST_AUTO_TEST_CASE(test_tinyint) { byte_comparable_test(generate_integer_test_data()); } BOOST_AUTO_TEST_CASE(test_smallint) { byte_comparable_test(generate_integer_test_data()); } BOOST_AUTO_TEST_CASE(test_int) { byte_comparable_test(generate_integer_test_data()); } BOOST_AUTO_TEST_CASE(test_bigint) { byte_comparable_test(generate_integer_test_data()); } BOOST_AUTO_TEST_CASE(test_date) { byte_comparable_test(generate_integer_test_data([] (uint32_t days) { return data_value(simple_date_native_type{days}); })); } BOOST_AUTO_TEST_CASE(test_time) { constexpr int64_t max_ns_in_a_day = 24L * 60 * 60 * 1000 * 1000 * 1000; byte_comparable_test(generate_integer_test_data([] (int64_t nanoseconds) { return data_value(time_native_type{nanoseconds}); }, [] (int64_t ns_candidate) { // allow only valid nanosecond values return ns_candidate >= 0 && ns_candidate <= max_ns_in_a_day; })); } BOOST_AUTO_TEST_CASE(test_timestamp) { byte_comparable_test(generate_integer_test_data([] (db_clock::rep milliseconds) { return data_value(db_clock::time_point(db_clock::duration(milliseconds))); })); } template static std::vector generate_floating_point_test_data() { std::vector test_data; for (fp_type n : {-1e30f, -1e3f, -1.0f, -0.001f, -1e-30f, -0.0f, 0.0f, 1e-30f, 0.001f, 1.0f, 1e3f, 1e30f, -std::numeric_limits::min(), std::numeric_limits::min(), -std::numeric_limits::max(), std::numeric_limits::max(), -std::numeric_limits::infinity(), std::numeric_limits::infinity(), std::numeric_limits::quiet_NaN()}) { test_data.emplace_back(n); } // double has a few more test items int random_exponent_min = -30, random_exponent_max = 30; if constexpr (std::is_same_v) { for (fp_type n : std::vector{-1e200, -1e100, 1e100, 1e200, -std::numeric_limits::min(), std::numeric_limits::min(), -std::numeric_limits::max(), std::numeric_limits::max()}) { test_data.emplace_back(n); } random_exponent_min = -300; random_exponent_max = 300; } // generate some random test data for (int i = 0; i < 100; i++) { const auto significand = tests::random::get_int(std::numeric_limits::min(), std::numeric_limits::max()); const auto scale = std::pow(10, tests::random::get_int(random_exponent_min, random_exponent_max)); test_data.push_back(fp_type(significand * scale)); } return test_data; } BOOST_AUTO_TEST_CASE(test_float) { byte_comparable_test(generate_floating_point_test_data()); } BOOST_AUTO_TEST_CASE(test_double) { byte_comparable_test(generate_floating_point_test_data()); } void encode_varint_length(uint64_t length, int64_t sign_mask, bytes_ostream& out); uint64_t decode_varint_length(managed_bytes_view& src, int64_t sign_only_byte); BOOST_AUTO_TEST_CASE(test_varint_length_encoding) { for (int shift = 0; shift < 64; shift++) { uint64_t length = (uint64_t(1) << shift) - 1; for (int64_t sign_mask : {0, -1}) { bytes_ostream out; encode_varint_length(length, sign_mask, out); auto mb = std::move(out).to_managed_bytes(); auto mbv = managed_bytes_view(mb); BOOST_REQUIRE_EQUAL(length, decode_varint_length(mbv, sign_mask)); } } } BOOST_AUTO_TEST_CASE(test_varint) { // Generate small integers std::vector test_data = generate_integer_test_data([] (int64_t n) { return data_value(utils::multiprecision_int(n)); }); // Generate more large numbers test_data.reserve(test_data.size() + (20 * 4 * 4)); auto multiprecision_one = utils::multiprecision_int(1); for (int shift = 1; shift <= 20; shift++) { for (auto shift_multiplier : {64, 100, 256, 512}) { auto large_number = multiprecision_one << shift * shift_multiplier; for (auto number : std::initializer_list{large_number, large_number - 1, -large_number, -(large_number - 1)}) { test_data.emplace_back(number); } } } byte_comparable_test(std::move(test_data)); } static int64_t msb_with_version(int64_t msb, int version) { // Set the version bits in the msb of the UUID return (msb & ~(0xF << 12)) | (version << 12); } static void test_uuid_and_flipped_uuid(utils::UUID&& uuid, std::vector& test_data, std::function& create_data_value) { auto uuid_dv = create_data_value(std::move(uuid)); // negate the uuid to create a flipped version auto flipped_uuid = utils::UUID_gen::negate(uuid); auto flipped_uuid_dv = create_data_value(std::move(flipped_uuid)); // verify that the original and flipped uuids compare correctly in byte-comparable format BOOST_REQUIRE(uuid <=> flipped_uuid == comparable_bytes::from_data_value(uuid_dv) <=> comparable_bytes::from_data_value(flipped_uuid_dv)); // add both original and flipped uuids to the test data test_data.push_back(std::move(uuid_dv)); test_data.push_back(std::move(flipped_uuid_dv)); } static std::vector generate_timeuuid_test_data(bool create_timeuuid_native_type) { std::function create_data_value; if (create_timeuuid_native_type) { // create data_value for timeuuid data type create_data_value = [] (utils::UUID&& time_uuid) { return data_value(timeuuid_native_type(std::move(time_uuid))); }; } else { // create data_value for uuid data type create_data_value = [] (utils::UUID&& time_uuid) { return data_value(std::move(time_uuid)); }; } std::vector test_data; for (auto [msb, lsb] : std::initializer_list>{ {0, 0}, {std::numeric_limits::min(), std::numeric_limits::min()}, {std::numeric_limits::max(), std::numeric_limits::max()}, }) { test_uuid_and_flipped_uuid(utils::UUID(msb_with_version(msb, 1), lsb), test_data, create_data_value); } for (int i = 0; i < 500; i++) { // Generate a random msb with version set to 1 (time-based UUID) test_uuid_and_flipped_uuid( utils::UUID(msb_with_version(tests::random::get_int(std::numeric_limits::min(), std::numeric_limits::max()), 1), tests::random::get_int(std::numeric_limits::min(), std::numeric_limits::max())), test_data, create_data_value); } return test_data; } BOOST_AUTO_TEST_CASE(test_timeuuid) { byte_comparable_test(generate_timeuuid_test_data(true)); } BOOST_AUTO_TEST_CASE(test_uuid) { // generate time uuids auto test_data = generate_timeuuid_test_data(false); // test few edge cases test_data.emplace_back(utils::null_uuid()); test_data.emplace_back(utils::UUID(std::numeric_limits::max(), std::numeric_limits::max())); test_data.emplace_back(utils::UUID(std::numeric_limits::min(), std::numeric_limits::min())); test_data.emplace_back(utils::UUID("ffffffff-ffff-ffff-ffff-ffffffffffff")); // test name based, type 3 uuids test_data.emplace_back(utils::UUID_gen::get_name_UUID("scylladb")); test_data.emplace_back(utils::UUID_gen::get_name_UUID("lakshminarayanansreethar")); // generate few random uuids std::function create_data_value = [] (utils::UUID&& time_uuid) { return data_value(std::move(time_uuid)); }; for (auto i = 0; i < 500; i++) { // Generate a random msb with version set to 4 test_uuid_and_flipped_uuid( utils::UUID(msb_with_version(tests::random::get_int(std::numeric_limits::min(), std::numeric_limits::max()), 4), tests::random::get_int(std::numeric_limits::min(), std::numeric_limits::max())), test_data, create_data_value); } byte_comparable_test(std::move(test_data)); } extern std::size_t count_digits(const boost::multiprecision::cpp_int& value); BOOST_AUTO_TEST_CASE(test_count_digits) { auto test_precision = [] (boost::multiprecision::cpp_int&& num) { const auto expected_length = num.str().length(); BOOST_REQUIRE_EQUAL(count_digits(num), expected_length); BOOST_REQUIRE_EQUAL(count_digits(-num), expected_length); }; test_precision(boost::multiprecision::cpp_int("0")); test_precision(boost::multiprecision::cpp_int("123")); test_precision(boost::multiprecision::cpp_int("123456")); test_precision(boost::multiprecision::cpp_int("12345600")); test_precision(boost::multiprecision::cpp_int("9999999")); test_precision(boost::multiprecision::cpp_int( "123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890")); } BOOST_AUTO_TEST_CASE(test_decimal) { // generate few multiprecision ints to be used as unscaled_values in the big_decimal std::vector unscaled_values; auto multiprecision_one = utils::multiprecision_int(1); for (int shift = 1; shift <= 10; shift++) { for (auto shift_prod : {1, 2, 4, 8, 10, 32, 64, 100, 256}) { auto mp_num = multiprecision_one << shift * shift_prod; for (auto n : std::initializer_list{mp_num, mp_num - 1, -mp_num, -(mp_num - 1)}) { unscaled_values.push_back(std::move(n)); } } } // scales to generate the big_decimal std::vector scales{1, 2, 4, 5, 10, 100, 1000}; std::vector _test_data; _test_data.reserve(unscaled_values.size() * scales.size() * 5); for (const auto& unscaled_value : unscaled_values) { _test_data.emplace_back(big_decimal(0, unscaled_value)); _test_data.emplace_back(big_decimal(std::numeric_limits::min(), unscaled_value)); _test_data.emplace_back(big_decimal(std::numeric_limits::max(), unscaled_value)); for (const auto& scale : scales) { _test_data.emplace_back(big_decimal(scale, unscaled_value)); _test_data.emplace_back(big_decimal(-scale, unscaled_value)); } } byte_comparable_test(std::move(_test_data)); } BOOST_AUTO_TEST_CASE(test_blob) { auto random_bytes = [] (size_t length) { std::vector data(length); for (auto& byte : data) { byte = tests::random::get_int(); } return bytes(reinterpret_cast(data.data()), length); }; std::vector test_data; test_data.reserve(500); for (int i = 0; i < 100; i++) { for (int length : {1, 10, 100, 1000}) { test_data.emplace_back(random_bytes(length)); } } // test a few cases that are stored across multiple fragments for (int i = 0; i < 10; i++) { for (int frag_count = 1; frag_count <= 10; frag_count++) { const size_t length = 128 * 1024 * frag_count; test_data.emplace_back(random_bytes(length)); } } byte_comparable_test(std::move(test_data)); } static std::vector generate_string_test_data( std::function create_data_value_func) { const std::string charset = "0123456789" "ABCDEFGHIJKLMNOPQRSTUVWXYZ" "abcdefghijklmnopqrstuvwxyz"; auto random_text = [&charset] (size_t length) { std::string generated_text; generated_text.reserve(length); for (size_t i = 0; i < length; ++i) { generated_text += charset[tests::random::get_int(0, charset.size() - 1)]; } return generated_text; }; std::vector test_data; test_data.reserve(500); for (int i = 0; i < 100; i++) { for (int length : {1, 10, 100, 1000}) { test_data.push_back(create_data_value_func(random_text(length))); } } // test a few cases that are stored across multiple fragments for (int i = 0; i < 10; i++) { for (int frag_count = 1; frag_count <= 10; frag_count++) { const size_t length = 128 * 1024 * frag_count; test_data.push_back(create_data_value_func(random_text(length))); } } return test_data; } BOOST_AUTO_TEST_CASE(test_ascii) { byte_comparable_test(generate_string_test_data([] (std::string&& str) { return data_value(ascii_native_type(str)); })); } BOOST_AUTO_TEST_CASE(test_text) { byte_comparable_test(generate_string_test_data([] (std::string&& str) { return data_value(str); })); } BOOST_AUTO_TEST_CASE(test_duration) { constexpr int64_t max_ns_in_a_day = 24L * 60 * 60 * 1000 * 1000 * 1000; std::vector test_data; test_data.reserve(1000); for (int i = 0; i < 1000; i++) { const auto months = months_counter{tests::random::get_int(0, 12)}; const auto days = days_counter{tests::random::get_int(0, 28)}; const auto ns = nanoseconds_counter{tests::random::get_int(0, max_ns_in_a_day)}; test_data.emplace_back(cql_duration(months, days, ns)); } byte_comparable_test(std::move(test_data)); } BOOST_AUTO_TEST_CASE(test_inet) { auto test_data = generate_integer_test_data([](uint32_t value) { return data_value(seastar::net::ipv4_address(value)); }); // Include few more addresses for (const std::string& addr : { // IPv4 "127.0.0.1", "10.0.0.1", "172.16.1.1", "192.168.2.2", "224.3.3.3", // IPv6 "0000:0000:0000:0000:0000:0000:0000:0000", "ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff", "fe80:1:23:456:7890:1:23:456", }) { test_data.emplace_back(seastar::net::inet_address(addr)); } byte_comparable_test(std::move(test_data)); } // Test Scylla's byte-comparable encoding compatibility with Cassandra's implementation by // verifying that serialized values produce the same comparable bytes as those generated by Cassandra. // The test data was generated using the cassandra unit test pushed to the following branch: // https://github.com/scylladb/scylla-dev/blob/byte-comparable-compatibility-generator SEASTAR_TEST_CASE(test_compatibility) { return sstables::test_env::do_with_async([] (sstables::test_env&) { auto file = open_file_dma("test/resource/byte_comparable_compatibility_data.csv", open_flags::ro).get(); auto fs = make_file_input_stream(file); temporary_buffer buf = fs.read().get(); // Read file contents in a loop and handle them line by line. data_type type; std::string input_buffer; while (!buf.empty()) { input_buffer.append(buf.get(), buf.size()); size_t pos = 0; while (pos != input_buffer.size()) { // Extract the CSV entry from the next line size_t end = input_buffer.find('\n', pos); if (end == std::string::npos) { // no \n in the input, need to read more data from the file break; } std::string curr_line = input_buffer.substr(pos, end - pos); pos = end + 1; // Extract test data from curr_line std::vector csv_values; std::istringstream line_stream(curr_line); std::string current_value; while (std::getline(line_stream, current_value, ',')) { csv_values.push_back(std::move(current_value)); } // Test data has `type` followed by the test data in subsequent lines. if (csv_values.size() == 1) { // This is the type line, parse it and continue to the next line. type = abstract_type::parse_type(csv_values[0]); testlog.info("testing compatibility of type: {}", type->cql3_type_name()); continue; } // Test data has two columns: actual value and expected comparable bytes. const auto serialized_bytes = type->from_string(csv_values[0]); const auto expected_comparable_bytes = bytes_type->from_string(csv_values[1]); comparable_bytes comparable_bytes(*type, managed_bytes_view(serialized_bytes)); BOOST_REQUIRE_MESSAGE(comparable_bytes.as_managed_bytes_view() == managed_bytes_view(expected_comparable_bytes), seastar::value_of([&] () { return fmt::format("failed for type : {} and value : {}", type->cql3_type_name(), csv_values[0]); })); } // Remove the lines that were processed from the input buffer. input_buffer.erase(0, pos); buf = fs.read().get(); } file.close().get(); }); }