Alternator::streams: Make SequenceNumber monotinically growing

Fixes #7424

AWS sdk (kinesis) assumes SequenceNumbers are monotonically
growing bigints. Since we sort on and use timeuuids are these
a "raw" bit representation of this will _not_ fulfill the
requirement. However, we can "unwrap" the timestamp of uuid
msb and give the value as timestamp<<64|lsb, which will
ensure sort order == bigint order.
This commit is contained in:
Calle Wilund
2020-10-13 13:10:40 +00:00
committed by Pekka Enberg
parent 3f800d68c6
commit 83339f4bac
3 changed files with 26 additions and 2 deletions

View File

@@ -270,7 +270,17 @@ struct sequence_number {
using namespace boost::multiprecision;
uint128_t hi = uint64_t(num.uuid.get_most_significant_bits());
/**
* #7424 - aws sdk assumes sequence numbers are
* monotonically growing bigints.
*
* Timeuuids viewed as msb<<64|lsb are _not_,
* but they are still sorted as
* timestamp() << 64|lsb
* so we can simpy unpack the mangled msb
* and use as hi 64 in our "bignum".
*/
uint128_t hi = uint64_t(num.uuid.timestamp());
uint128_t lo = uint64_t(num.uuid.get_least_significant_bits());
return os << std::dec << ((hi << 64) | lo);
@@ -281,7 +291,8 @@ sequence_number::sequence_number(std::string_view v)
: uuid([&] {
using namespace boost::multiprecision;
uint128_t tmp{v};
return utils::UUID(uint64_t(tmp >> 64), uint64_t(tmp & std::numeric_limits<uint64_t>::max()));
// see above
return utils::UUID_gen::get_time_UUID_raw(uint64_t(tmp >> 64), uint64_t(tmp & std::numeric_limits<uint64_t>::max()));
}())
{}

View File

@@ -1028,6 +1028,11 @@ def test_streams_after_sequence_number(test_table_ss_keys_only, dynamodbstreams)
assert response['Records'][1]['dynamodb']['Keys'] == {'p': {'S': p}, 'c': {'S': c}}
sequence_number_1 = response['Records'][0]['dynamodb']['SequenceNumber']
sequence_number_2 = response['Records'][1]['dynamodb']['SequenceNumber']
# #7424 - AWS sdk assumes sequence numbers can be compared
# as bigints, and are monotonically growing.
assert int(sequence_number_1) < int(sequence_number_2)
# If we use the SequenceNumber of the first event to create an
# AFTER_SEQUENCE_NUMBER iterator, we can read the second event
# (only) again. We don't need a loop and a timeout, because this

View File

@@ -132,6 +132,14 @@ public:
assert(uuid.is_timestamp());
return uuid;
}
static UUID get_time_UUID_raw(int64_t nanos, int64_t clock_seq_and_node)
{
auto uuid = UUID(create_time(nanos), clock_seq_and_node);
assert(uuid.is_timestamp());
return uuid;
}
/**
* Similar to get_time_UUID, but randomize the clock and sequence.
* If you can guarantee that the when_in_micros() argument is unique for