#pragma once /* * * Modified by ScyllaDB * Copyright (C) 2015-present ScyllaDB */ /* * SPDX-License-Identifier: (LicenseRef-ScyllaDB-Source-Available-1.0 and Apache-2.0) */ #include "utils/assert.hh" #include #include #include #include #include #include "UUID.hh" #include "on_internal_error.hh" #include "db_clock.hh" namespace utils { // Scylla uses specialized timeuuids for list keys. They use // limited space of timeuuid clockseq component to store // sub-microsecond time. This exception is thrown when an attempt // is made to construct such a UUID with a sub-microsecond argument // which is outside the available bit range. struct timeuuid_submicro_out_of_range: public std::out_of_range { using out_of_range::out_of_range; }; /** * The goods are here: www.ietf.org/rfc/rfc4122.txt. */ class UUID_gen { public: // UUID timestamp time component is represented in intervals // of 1/10 of a microsecond since the beginning of GMT epoch. using decimicroseconds = std::chrono::duration>; using microseconds = std::chrono::microseconds; using milliseconds = std::chrono::milliseconds; private: // A grand day! millis at 00:00:00.000 15 Oct 1582. static constexpr decimicroseconds START_EPOCH = decimicroseconds{-122192928000000000L}; // UUID time must fit in 60 bits static constexpr milliseconds UUID_UNIXTIME_MAX = duration_cast( decimicroseconds{0x0fffffffffffffffL} + START_EPOCH); static constexpr milliseconds UUID_UNIXTIME_MIN = duration_cast( -decimicroseconds{0x0fffffffffffffffL} + START_EPOCH); // A random mac address for use in timeuuids // where we can not use clockseq to randomize the physical // node, and prefer using a random address to a physical one // to avoid duplicate timeuuids when system time goes back // while scylla is restarting. Using a spoof node also helps // avoid timeuuid duplicates when multiple nodes run on the // same host and share the physical MAC address. static thread_local const int64_t spoof_node; static thread_local const int64_t clock_seq_and_node; /* * The min and max possible lsb for a UUID. * Note that his is not 0 and all 1's because Cassandra TimeUUIDType * compares the lsb parts as a signed byte array comparison. So the min * value is 8 times -128 and the max is 8 times +127. * * Note that we ignore the uuid variant (namely, MIN_CLOCK_SEQ_AND_NODE * have variant 2 as it should, but MAX_CLOCK_SEQ_AND_NODE have variant 0). * I don't think that has any practical consequence and is more robust in * case someone provides a UUID with a broken variant. */ static constexpr int64_t MIN_CLOCK_SEQ_AND_NODE = 0x8080808080808080L; static constexpr int64_t MAX_CLOCK_SEQ_AND_NODE = 0x7f7f7f7f7f7f7f7fL; // An instance of UUID_gen uses clock_seq_and_node so should // be constructed after it. static thread_local UUID_gen _instance; decimicroseconds _last_used_time = decimicroseconds{0}; UUID_gen() { // make sure someone didn't whack the clockSeqAndNode by changing the order of instantiation. SCYLLA_ASSERT(clock_seq_and_node != 0); } // Return decimicrosecond time based on the system time, // in milliseconds. If the current millisecond hasn't change // from the previous call, increment the previously used // value by one decimicrosecond. // NOTE: In the original Java code this function was // "synchronized". This isn't needed since in Scylla we do not // need monotonicity between time UUIDs created at different // shards and UUID code uses thread local state on each shard. int64_t create_time_safe() { auto millis = duration_cast(db_clock::now().time_since_epoch()); decimicroseconds when = from_unix_timestamp(millis); if (when > _last_used_time) { _last_used_time = when; } else { when = ++_last_used_time; } return create_time(when); } public: // We have only 17 timeuuid bits available to store this // value. static constexpr int SUBMICRO_LIMIT = (1<<17); /** * Creates a type 1 UUID (time-based UUID). * * @return a UUID instance */ static UUID get_time_UUID() { auto uuid = UUID(_instance.create_time_safe(), clock_seq_and_node); SCYLLA_ASSERT(uuid.is_timestamp()); return uuid; } /** * Creates a type 1 UUID (time-based UUID) with the wall clock time point @param tp. * * @return a UUID instance */ static UUID get_time_UUID(std::chrono::system_clock::time_point tp) { auto uuid = UUID(create_time(from_unix_timestamp(tp.time_since_epoch())), clock_seq_and_node); SCYLLA_ASSERT(uuid.is_timestamp()); return uuid; } /** * Creates a type 1 UUID (time-based UUID) with the timestamp of @param when, in milliseconds. * * @return a UUID instance */ static UUID get_time_UUID(milliseconds when, int64_t clock_seq_and_node = UUID_gen::clock_seq_and_node) { auto uuid = UUID(create_time(from_unix_timestamp(when)), clock_seq_and_node); SCYLLA_ASSERT(uuid.is_timestamp()); return uuid; } static UUID get_time_UUID_raw(decimicroseconds when, int64_t clock_seq_and_node) { auto uuid = UUID(create_time(when), clock_seq_and_node); SCYLLA_ASSERT(uuid.is_timestamp()); return uuid; } /** * Similar to get_time_UUID, but randomize the clock and sequence. * If you can guarantee that the when_in_micros() argument is unique for * every call, then you should prefer get_time_UUID_from_micros() which is faster. If you can't * guarantee this however, this method will ensure the returned UUID are still unique (across calls) * through randomization. * * @param when_in_micros a unix time in microseconds. * @return a new UUID 'id' such that micros_timestamp(id) == when_in_micros. The UUID returned * by different calls will be unique even if when_in_micros is not. */ static UUID get_random_time_UUID_from_micros(std::chrono::microseconds when_in_micros) { static thread_local std::mt19937_64 rand_gen(std::random_device().operator()()); static thread_local std::uniform_int_distribution rand_dist(std::numeric_limits::min()); auto lsb = rand_dist(rand_gen); lsb &= ~0xC000000000000000L; // clear variant bits lsb |= 0x8000000000000000L; // set variant to IETF variant auto uuid = UUID(create_time(from_unix_timestamp(when_in_micros)), lsb); SCYLLA_ASSERT(uuid.is_timestamp()); return uuid; } // Generate a time-based (Version 1) UUID using // a microsecond-precision Unix time and a unique number in // range [0, 131072). // Used to generate many unique, monotonic UUIDs // sharing the same microsecond part. In lightweight // transactions we must ensure monotonicity between all UUIDs // which belong to one lightweight transaction and UUIDs of // another transaction, but still need multiple distinct and // monotonic UUIDs within the same transaction. // \throws timeuuid_submicro_out_of_range // static std::array get_time_UUID_bytes_from_micros_and_submicros(std::chrono::microseconds when_in_micros, int submicros) { std::array uuid_bytes; if (submicros < 0 || submicros >= SUBMICRO_LIMIT) { throw timeuuid_submicro_out_of_range("timeuuid submicro component does not fit into available bits"); } auto dmc = from_unix_timestamp(when_in_micros); // We have roughly 3 extra bits we will use to increase // sub-microsecond component range from clockseq's 2^14 to 2^17. int64_t msb = create_time(dmc + decimicroseconds((submicros >> 14) & 0b111)); // See RFC 4122 for details. msb = net::hton(msb); std::copy_n(reinterpret_cast(&msb), sizeof(msb), uuid_bytes.data()); // Use 14-bit clockseq to store the rest of sub-microsecond component. int64_t clockseq = submicros & 0b11'1111'1111'1111; // Scylla, like Cassandra, uses signed int8 compare to // compare lower bits of timeuuid. It means 0xA0 > 0xFF. // Bit-xor the sign bit to "fix" the order. See also // https://issues.apache.org/jira/browse/CASSANDRA-8730 // and Cassandra commit 6d266253a5bdaf3a25eef14e54deb56aba9b2944 // // Turn 0 into -127, 1 into -126, ... and 128 into 0, ... clockseq ^= 0b0000'0000'1000'0000; // Least significant bits: UUID variant (1), clockseq and node. // To protect against the system clock back-adjustment, // use a random (spoof) node identifier. Normally this // protection is provided by clockseq component, but we've // just stored sub-microsecond time in it. int64_t lsb = ((clockseq | 0b1000'0000'0000'0000) << 48) | UUID_gen::spoof_node; lsb = net::hton(lsb); std::copy_n(reinterpret_cast(&lsb), sizeof(lsb), uuid_bytes.data() + sizeof(msb)); return uuid_bytes; } /** validates uuid from raw bytes. */ static bool is_valid_UUID(bytes raw) { return raw.size() == 16; } /** creates uuid from raw bytes. */ static UUID get_UUID(bytes raw) { SCYLLA_ASSERT(raw.size() == 16); return get_UUID(raw.begin()); } /** creates uuid from raw bytes. src must point to a region of 16 bytes*/ static UUID get_UUID(const int8_t* src) { struct tmp { uint64_t msb, lsb; } t; std::copy(src, src + 16, reinterpret_cast(&t)); return UUID(net::ntoh(t.msb), net::ntoh(t.lsb)); } /** * Creates a type 3 (name based) UUID based on the specified byte array. */ static UUID get_name_UUID(bytes_view b); static UUID get_name_UUID(std::string_view str); static UUID get_name_UUID(const unsigned char* s, size_t len); /** decomposes a uuid into raw bytes. */ static std::array decompose(const UUID& uuid) { uint64_t most = uuid.get_most_significant_bits(); uint64_t least = uuid.get_least_significant_bits(); std::array b; for (int i = 0; i < 8; i++) { b[i] = (char)(most >> ((7-i) * 8)); b[8+i] = (char)(least >> ((7-i) * 8)); } return b; } /** * Returns a 16 byte representation of a type 1 UUID (a time-based UUID), * based on the current system time. * * @return a type 1 UUID represented as a byte[] */ static std::array get_time_UUID_bytes() { uint64_t msb = _instance.create_time_safe(); uint64_t lsb = clock_seq_and_node; std::array uuid_bytes; for (int i = 0; i < 8; i++) { uuid_bytes[i] = (int8_t) (msb >> 8 * (7 - i)); } for (int i = 8; i < 16; i++) { uuid_bytes[i] = (int8_t) (lsb >> 8 * (7 - (i - 8))); } return uuid_bytes; } /** * Returns the smaller possible type 1 UUID having the provided timestamp. * * Warning: this method should only be used for querying as this * doesn't at all guarantee the uniqueness of the resulting UUID. */ static UUID min_time_UUID(decimicroseconds timestamp = decimicroseconds{0}) { auto uuid = UUID(create_time(from_unix_timestamp(timestamp)), MIN_CLOCK_SEQ_AND_NODE); SCYLLA_ASSERT(uuid.is_timestamp()); return uuid; } /** * Returns the biggest possible type 1 UUID having the provided timestamp. * * Warning: this method should only be used for querying as this * doesn't at all guarantee the uniqueness of the resulting UUID. */ static UUID max_time_UUID(milliseconds timestamp) { // unix timestamp are milliseconds precision, uuid timestamp are 100's // nanoseconds precision. If we ask for the biggest uuid have unix // timestamp 1ms, then we should not extend 100's nanoseconds // precision by taking 10000, but rather 19999. decimicroseconds uuid_tstamp = from_unix_timestamp(timestamp + milliseconds(1)) - decimicroseconds(1); auto uuid = UUID(create_time(uuid_tstamp), MAX_CLOCK_SEQ_AND_NODE); SCYLLA_ASSERT(uuid.is_timestamp()); return uuid; } /** * @param uuid * @return decimicroseconds since Unix epoch */ static decimicroseconds unix_timestamp_decimicros(UUID uuid) { return decimicroseconds(uuid.timestamp()) + START_EPOCH; } /** * @param uuid * @return microseconds since Unix epoch */ static microseconds unix_timestamp_micros(UUID uuid) { return duration_cast(unix_timestamp_decimicros(uuid)); } /** * @param uuid * @return milliseconds since Unix epoch */ static milliseconds unix_timestamp(UUID uuid) { return duration_cast(unix_timestamp_decimicros(uuid)); } /** * @param uuid * @return seconds since Unix epoch */ static std::chrono::seconds unix_timestamp_in_sec(UUID uuid) { using namespace std::chrono; return duration_cast(static_cast(unix_timestamp(uuid))); } /** * @param uuid * @return microseconds since Unix epoch */ static int64_t micros_timestamp(UUID uuid) { return (uuid.timestamp() + START_EPOCH.count())/10; } template static bool is_valid_unix_timestamp(std::chrono::duration> d) { milliseconds dms = duration_cast(d); return dms > UUID_UNIXTIME_MIN && dms < UUID_UNIXTIME_MAX; } template static decimicroseconds from_unix_timestamp(std::chrono::duration> d) { // Avoid 64-bit representation overflow when adding // timeuuid epoch to nanosecond resolution time. auto dmc = duration_cast(d); return dmc - START_EPOCH; } // std::chrono typeaware wrapper around create_time(). // Creates a timeuuid compatible time (decimicroseconds since // the start of GMT epoch). template static int64_t create_time(std::chrono::duration> d) { auto dmc = duration_cast(d); uint64_t msb = dmc.count(); // timeuuid time must fit in 60 bits if ((0xf000000000000000UL & msb)) { // We hope callers would try to avoid this case, but they don't // always do, so SCYLLA_ASSERT() would be bad here - and caused #17035. utils::on_internal_error("timeuuid time must fit in 60 bits"); } return ((0x00000000ffffffffL & msb) << 32 | (0x0000ffff00000000UL & msb) >> 16 | (0x0fff000000000000UL & msb) >> 48 | 0x0000000000001000L); // sets the version to 1. } // Produce an UUID which is derived from this UUID in a reversible manner // // Such that: // // auto original_uuid = UUID_gen::get_time_UUID(); // auto negated_uuid = UUID_gen::negate(original_uuid); // SCYLLA_ASSERT(original_uuid != negated_uuid); // SCYLLA_ASSERT(original_uuid == UUID_gen::negate(negated_uuid)); static UUID negate(UUID); }; // for the curious, here is how I generated START_EPOCH // Calendar c = Calendar.getInstance(TimeZone.getTimeZone("GMT-0")); // c.set(Calendar.YEAR, 1582); // c.set(Calendar.MONTH, Calendar.OCTOBER); // c.set(Calendar.DAY_OF_MONTH, 15); // c.set(Calendar.HOUR_OF_DAY, 0); // c.set(Calendar.MINUTE, 0); // c.set(Calendar.SECOND, 0); // c.set(Calendar.MILLISECOND, 0); // long START_EPOCH = c.getTimeInMillis(); } // namespace utils