From 46631692cd00e20ccaf22aaac3b78f03fe4e4703 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Botond=20D=C3=A9nes?= Date: Wed, 27 May 2026 17:55:30 +0300 Subject: [PATCH] mutation_fragment_stream_validator: use legacy byte order for same-token partition key comparison MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When two partition keys share the same token, their relative order is determined by their raw serialized bytes (legacy_tri_compare), which matches the physical on-disk order in SSTables. The validator was using partition_key::tri_compare instead — a type-aware comparator that can disagree with byte order for types like timeuuid. The result was a false-positive "out-of-order partition key" error for any two same-token partitions whose timeuuid (or other type-aware) order is the reverse of their byte order. In scrub mode this caused the second partition to be silently dropped. Fixes: SCYLLADB-2304 Closes scylladb/scylladb#30120 --- .../mutation_fragment_stream_validator.cc | 3 +- test/boost/mutation_fragment_test.cc | 83 +++++++++++++++++++ 2 files changed, 84 insertions(+), 2 deletions(-) diff --git a/mutation/mutation_fragment_stream_validator.cc b/mutation/mutation_fragment_stream_validator.cc index e193453185..20d4568773 100644 --- a/mutation/mutation_fragment_stream_validator.cc +++ b/mutation/mutation_fragment_stream_validator.cc @@ -60,8 +60,7 @@ mutation_fragment_stream_validator::validate(dht::token t, const partition_key* if (_prev_partition_key.token() > t) { return ooo_key_result(_schema, t, pkey, _prev_partition_key); } - partition_key::tri_compare cmp(_schema); - if (_prev_partition_key.token() == t && pkey && cmp(_prev_partition_key.key(), *pkey) >= 0) { + if (_prev_partition_key.token() == t && pkey && _prev_partition_key.key().legacy_tri_compare(_schema, *pkey) >= 0) { return ooo_key_result(_schema, t, pkey, _prev_partition_key); } _prev_partition_key._token = t; diff --git a/test/boost/mutation_fragment_test.cc b/test/boost/mutation_fragment_test.cc index 05f0853a62..eea393da3e 100644 --- a/test/boost/mutation_fragment_test.cc +++ b/test/boost/mutation_fragment_test.cc @@ -29,6 +29,9 @@ #include "test/lib/simple_schema.hh" #include "test/lib/fragment_scatterer.hh" #include "test/lib/test_utils.hh" +#include "test/lib/random_schema.hh" +#include "test/lib/random_utils.hh" +#include "dht/i_partitioner.hh" #include "readers/from_mutations.hh" @@ -704,3 +707,83 @@ SEASTAR_THREAD_TEST_CASE(test_mutation_fragment_stream_validator_validation_leve BOOST_REQUIRE(validation_level < vl::token || !validator(dk0)); } } + +// Verify that mutation_fragment_stream_validator uses ring (legacy raw-byte) order +// for same-token partition keys, not the type-aware partition_key::tri_compare order. +// +// Two keys that share a token must be ordered by their raw serialized bytes +// (legacy_tri_compare), which is what SSTables use on disk. Using the +// type-aware comparator instead would yield false-positive "out-of-order" +// errors for key types (e.g. timeuuid) whose semantic order differs from the +// raw-byte order. +// +// We use the "token trick": we fabricate decorated_key objects that share one +// fixed token but carry distinct partition keys. The validator does not +// verify that the token is the true murmur3 hash of the key, so this lets us +// test the same-token ordering path with arbitrary schemas and key values. +SEASTAR_THREAD_TEST_CASE(test_mutation_fragment_stream_validator_ring_order_same_token) { + testing::scoped_no_abort_on_internal_error _; + + // A fixed token shared by every test key. Any kind::key value works; we + // just need all decorated_keys to have the same token so the validator + // exercises the same-token ordering branch (line that calls + // legacy_tri_compare in mutation_fragment_stream_validator.cc). + const dht::token shared_token{int64_t{42}}; + + const size_t num_keys = 32; + + auto spec = tests::make_random_schema_specification( + get_name(), + std::uniform_int_distribution(1, 3), // 1–3 pk columns + std::uniform_int_distribution(0, 0), // no ck columns needed + std::uniform_int_distribution(1, 1), // 1 regular column + std::uniform_int_distribution(0, 0)); // no static columns + + tests::random_schema rschema(tests::random::get_int(), *spec); + const schema& s = *rschema.schema(); + + // Generate random partition keys and wrap them as decorated_keys that + // all share shared_token. + std::vector dkeys; + dkeys.reserve(num_keys); + for (size_t i = 0; i < num_keys; ++i) { + auto raw = rschema.make_pkey(static_cast(i)); + dkeys.push_back(dht::decorated_key{shared_token, + partition_key::from_exploded(s, raw)}); + } + + // Sort by legacy_tri_compare — the ring order within a token. + std::sort(dkeys.begin(), dkeys.end(), + [&s](const dht::decorated_key& a, const dht::decorated_key& b) { + return a.key().legacy_tri_compare(s, b.key()) < 0; + }); + + // Remove byte-identical keys (can happen with some type+value combinations). + dkeys.erase(std::unique(dkeys.begin(), dkeys.end(), + [&s](const dht::decorated_key& a, const dht::decorated_key& b) { + return a.key().legacy_tri_compare(s, b.key()) == 0; + }), dkeys.end()); + + BOOST_REQUIRE_GT(dkeys.size(), 2); + + // 1. The validator must accept all keys fed in correct ring order. + { + mutation_fragment_stream_validator validator(s); + for (const auto& dk : dkeys) { + auto res = validator(dk); + BOOST_REQUIRE_MESSAGE(res, fmt::format("validator wrongly rejected key {} in correct ring order", dk.key().with_schema(s))); + } + } + + // 2. For every adjacent pair (dkeys[i] < dkeys[i+1] in ring order), + // feeding dkeys[i+1] first and then dkeys[i] must be rejected. + for (size_t i = 0; i + 1 < dkeys.size(); ++i) { + mutation_fragment_stream_validator validator(s); + // Feed the later key first (valid so far). + BOOST_REQUIRE(validator(dkeys[i + 1])); + // Now feed the earlier key — out of ring order, must be rejected. + auto res = validator(dkeys[i]); + BOOST_REQUIRE_MESSAGE(!res, + fmt::format("validator accepted out-of-ring-order key {} after {}", dkeys[i].key().with_schema(s), dkeys[i + 1].key().with_schema(s))); + } +}