scylladb/vint-serialization.cc

/*
 * Copyright 2017-present ScyllaDB
 *
 * Modified by ScyllaDB
 */

/*
 * SPDX-License-Identifier: (LicenseRef-ScyllaDB-Source-Available-1.0 and Apache-2.0)
 */

#include "vint-serialization.hh"

#include <seastar/core/bitops.hh>

#include <algorithm>
#include <array>
#include <limits>

static_assert(-1 == ~0, "Not a twos-complement architecture");

static constexpr uint64_t encode_zigzag(int64_t n) noexcept {
    // The right shift has to be arithmetic and not logical.
    return (static_cast<uint64_t>(n) << 1) ^ static_cast<uint64_t>(n >> 63);
}

static constexpr int64_t decode_zigzag(uint64_t n) noexcept {
    return static_cast<int64_t>((n >> 1) ^ -(n & 1));
}

// Mask for extracting from the first byte the part that is not used for indicating the total number of bytes.
static uint64_t first_byte_value_mask(vint_size_type extra_bytes_size) {
    // Include the sentinel zero bit in the mask.
    return uint64_t(0xff) >> extra_bytes_size;
}

vint_size_type signed_vint::serialize(int64_t value, bytes::iterator out) {
    return unsigned_vint::serialize(encode_zigzag(value), out);
}

vint_size_type signed_vint::serialized_size(int64_t value) noexcept {
    return unsigned_vint::serialized_size(encode_zigzag(value));
}

int64_t signed_vint::deserialize(bytes_view v) {
    const auto un = unsigned_vint::deserialize(v);
    return decode_zigzag(un);
}

// The number of additional bytes that we need to read.
static vint_size_type count_extra_bytes(int8_t first_byte) {
    return std::countl_zero(static_cast<uint8_t>(~first_byte));
}

static void encode(uint64_t value, vint_size_type size, bytes::iterator out) {
    std::array<int8_t, 9> buffer({});

    // `size` is always in the range [1, 9].
    const auto extra_bytes_size = size - 1;

    for (vint_size_type i = 0; i <= extra_bytes_size; ++i) {
        buffer[extra_bytes_size - i] = static_cast<int8_t>(value & 0xff);
        value >>= 8;
    }

    buffer[0] |= ~first_byte_value_mask(extra_bytes_size);
    std::copy_n(buffer.cbegin(), size, out);
}

vint_size_type unsigned_vint::serialize(uint64_t value, bytes::iterator out) {
    const auto size = serialized_size(value);

    if (size == 1) {
        *out = static_cast<int8_t>(value & 0xff);
        return 1;
    }

    encode(value, size, out);
    return size;
}

vint_size_type unsigned_vint::serialized_size(uint64_t value) noexcept {
    // No need for the overhead of checking that all bits are zero.
    //
    // A signed quantity, to allow the case of `magnitude == 0` to result in a value of 9 below.
    const auto magnitude = static_cast<int64_t>(count_leading_zeros(value | uint64_t(1)));

    return vint_size_type(9) - vint_size_type((magnitude - 1) / 7);
}

uint64_t unsigned_vint::deserialize(bytes_view v) {
    auto src = v.data();
    auto len = v.size();
    const int8_t first_byte = *src;

    // No additional bytes, since the most significant bit is not set.
    if (first_byte >= 0) {
        return uint64_t(first_byte);
    }

    const auto extra_bytes_size = count_extra_bytes(first_byte);

    // Extract the bits not used for counting bytes.
    auto result = uint64_t(first_byte) & first_byte_value_mask(extra_bytes_size);

#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
    uint64_t value;
    // If we can overread do that. It is cheaper to have a single 64-bit read and
    // then mask out the unneeded part than to do 8x 1 byte reads.
    if (len >= sizeof(uint64_t) + 1) [[likely]] {
        std::copy_n(src + 1, sizeof(uint64_t), reinterpret_cast<int8_t*>(&value));
    } else {
        value = 0;
        std::copy_n(src + 1, extra_bytes_size, reinterpret_cast<int8_t*>(&value));
    }
    value = be_to_cpu(value << (64 - (extra_bytes_size * 8)));
    result <<= (extra_bytes_size * 8) % 64;
    result |= value;
#else
    for (vint_size_type index = 0; index < extra_bytes_size; ++index) {
        result <<= 8;
        result |= (uint64_t(v[index + 1]) & uint64_t(0xff));
    }
#endif
    return result;
}