/* * Copyright (C) 2015-present ScyllaDB */ /* * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0 * */ #pragma once #include #include #include #if defined(__x86_64__) || defined(__i386__) #include #elif defined(__aarch64__) #include /* Implement x86-64 intrinsics with according aarch64 ones */ inline uint32_t _mm_crc32_u8(uint32_t crc, uint8_t in) { return __crc32cb(crc, in); } inline uint32_t _mm_crc32_u16(uint32_t crc, uint16_t in) { return __crc32ch(crc, in); } inline uint32_t _mm_crc32_u32(uint32_t crc, uint32_t in) { return __crc32cw(crc, in); } inline uint32_t _mm_crc32_u64(uint32_t crc, uint64_t in) { return __crc32cd(crc, in); } #endif #include "utils/clmul.hh" #include "utils/fragment_range.hh" namespace utils { class crc32 { uint32_t _r = 0; public: // All process() functions assume input is in // host byte order (i.e. equivalent to storing // the value in a buffer and crcing the buffer). // On x86 use the crc32 instruction added in SSE 4.2. #if defined(__x86_64__) || defined(__i386__) || defined(__aarch64__) void process_le(int8_t in) { _r = _mm_crc32_u8(_r, in); } void process_le(uint8_t in) { _r = _mm_crc32_u8(_r, in); } void process_le(int16_t in) { _r = _mm_crc32_u16(_r, in); } void process_le(uint16_t in) { _r = _mm_crc32_u16(_r, in); } void process_le(int32_t in) { _r = _mm_crc32_u32(_r, in); } void process_le(uint32_t in) { _r = _mm_crc32_u32(_r, in); } void process_le(int64_t in) { _r = _mm_crc32_u64(_r, in); } void process_le(uint64_t in) { _r = _mm_crc32_u64(_r, in); } template void process_be(T in) { in = seastar::net::hton(in); process_le(in); } #endif void process(const uint8_t* in, size_t size) { if ((reinterpret_cast(in) & 1) && size >= 1) { process_le(*in); ++in; --size; } if ((reinterpret_cast(in) & 3) && size >= 2) { process_le(seastar::read_le(reinterpret_cast(in))); in += 2; size -= 2; } if ((reinterpret_cast(in) & 7) && size >= 4) { process_le(seastar::read_le(reinterpret_cast(in))); in += 4; size -= 4; } // do in three parallel loops while (size >= 1024) { uint32_t crc0 = _r, crc1 = 0, crc2 = 0; // calculate three blocks in parallel // - crc0: in64[ 0, 1, ..., 41] // - crc1: in64[42, 43, ..., 83] // - crc2: in64[84, 85, ..., 125] for (int i = 0; i < 42; ++i, in += 8) { crc0 = _mm_crc32_u64(crc0, seastar::read_le((const char*)in)); crc1 = _mm_crc32_u64(crc1, seastar::read_le((const char*)in + 42*8)); crc2 = _mm_crc32_u64(crc2, seastar::read_le((const char*)in + 42*2*8)); } in += 42*2*8; // combine three blocks' crc and last two u64 // - CRC32(crc0 * CRC32(x^(42*64*2))) crc0 = _mm_crc32_u64(0, clmul_u32(crc0, 0xe417f38a)); // - CRC32(crc1 * CRC32(x^(42*64))) crc1 = _mm_crc32_u64(0, clmul_u32(crc1, 0x8f158014)); // - CRC32(crc2 * x^32 + u64[-2]) crc2 = _mm_crc32_u64(crc2, seastar::read_le((const char*)in)); in += 8; // - Last u64 _r = _mm_crc32_u64(crc0^crc1^crc2, seastar::read_le((const char*)in)); in += 8; size -= 1024; } while (size >= 8) { process_le(seastar::read_le(reinterpret_cast(in))); in += 8; size -= 8; } if (size >= 4) { process_le(seastar::read_le(reinterpret_cast(in))); in += 4; size -= 4; } if (size >= 2) { process_le(seastar::read_le(reinterpret_cast(in))); in += 2; size -= 2; } if (size >= 1) { process_le(*in); } } template requires FragmentRange void process_fragmented(const FragmentedBuffer& buffer) { for (bytes_view bv : buffer) { process(reinterpret_cast(bv.data()), bv.size()); } } uint32_t get() const { return _r; } }; }