Alternator::streams: Make SequenceNumber monotinically growing

Fixes #7424 AWS sdk (kinesis) assumes SequenceNumbers are monotonically growing bigints. Since we sort on and use timeuuids are these a "raw" bit representation of this will _not_ fulfill the requirement. However, we can "unwrap" the timestamp of uuid msb and give the value as timestamp<<64|lsb, which will ensure sort order == bigint order.
2026-05-12 19:02:12 +00:00 · 2020-10-13 13:10:40 +00:00
parent 3f800d68c6
commit 83339f4bac
3 changed files with 26 additions and 2 deletions
--- a/alternator/streams.cc
+++ b/alternator/streams.cc
@@ -270,7 +270,17 @@ struct sequence_number {

        using namespace boost::multiprecision;

-        uint128_t hi = uint64_t(num.uuid.get_most_significant_bits());
+        /**
+         * #7424 - aws sdk assumes sequence numbers are
+         * monotonically growing bigints. 
+         *
+         * Timeuuids viewed as msb<<64|lsb are _not_,
+         * but they are still sorted as
+         *  timestamp() << 64|lsb
+         * so we can simpy unpack the mangled msb
+         * and use as hi 64 in our "bignum".
+         */
+        uint128_t hi = uint64_t(num.uuid.timestamp());
        uint128_t lo = uint64_t(num.uuid.get_least_significant_bits());

        return os << std::dec << ((hi << 64) | lo);
@@ -281,7 +291,8 @@ sequence_number::sequence_number(std::string_view v)
    : uuid([&] {
        using namespace boost::multiprecision;
        uint128_t tmp{v};
-        return utils::UUID(uint64_t(tmp >> 64), uint64_t(tmp & std::numeric_limits<uint64_t>::max()));
+        // see above
+        return utils::UUID_gen::get_time_UUID_raw(uint64_t(tmp >> 64), uint64_t(tmp & std::numeric_limits<uint64_t>::max()));
    }())
 {}

--- a/test/alternator/test_streams.py
+++ b/test/alternator/test_streams.py
@@ -1028,6 +1028,11 @@ def test_streams_after_sequence_number(test_table_ss_keys_only, dynamodbstreams)
                assert response['Records'][1]['dynamodb']['Keys'] == {'p': {'S': p}, 'c': {'S': c}}
                sequence_number_1 = response['Records'][0]['dynamodb']['SequenceNumber']
                sequence_number_2 = response['Records'][1]['dynamodb']['SequenceNumber']
+
+                # #7424 - AWS sdk assumes sequence numbers can be compared
+                # as bigints, and are monotonically growing.
+                assert int(sequence_number_1) < int(sequence_number_2)
+
                # If we use the SequenceNumber of the first event to create an
                # AFTER_SEQUENCE_NUMBER iterator, we can read the second event
                # (only) again. We don't need a loop and a timeout, because this
--- a/utils/UUID_gen.hh
+++ b/utils/UUID_gen.hh
@@ -132,6 +132,14 @@ public:
        assert(uuid.is_timestamp());
        return uuid;
    }
+
+    static UUID get_time_UUID_raw(int64_t nanos, int64_t clock_seq_and_node)
+    {
+        auto uuid = UUID(create_time(nanos), clock_seq_and_node);
+        assert(uuid.is_timestamp());
+        return uuid;
+    }
+
    /**
     * Similar to get_time_UUID, but randomize the clock and sequence.
     * If you can guarantee that the when_in_micros() argument is unique for