Files
scylladb/utils/serialization.hh
Avi Kivity f3eade2f62 treewide: relicense to ScyllaDB-Source-Available-1.0
Drop the AGPL license in favor of a source-available license.
See the blog post [1] for details.

[1] https://www.scylladb.com/2024/12/18/why-were-moving-to-a-source-available-license/
2024-12-18 17:45:13 +02:00

146 lines
4.9 KiB
C++

#pragma once
/*
* Copyright (C) 2014-present ScyllaDB
*/
/*
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
*/
// The following is a redesigned subset of Java's DataOutput,
// DataOutputStream, DataInput, DataInputStream, etc. It allows serializing
// several primitive types (e.g., integer, string, etc.) to an object which
// is only capable of write()ing a single byte (write(char)) or an array of
// bytes (write(char *, int)), and deserializing the same data from an object
// with a char read() interface.
//
// The format of this serialization is identical to the format used by
// Java's DataOutputStream class. This is important to allow us communicate
// with nodes running Java version of the code.
//
// We only support the subset actually used in Cassandra, and the subset
// that is reversible, i.e., can be read back by data_input. For example,
// we only support DataOutput.writeUTF(string) and not
// DataOutput.writeChars(string) - because the latter does not include
// the length, which is necessary for reading the string back.
#include <stdint.h>
#include <seastar/core/sstring.hh>
#include <seastar/net/byteorder.hh>
#include "seastarx.hh"
class UTFDataFormatException { };
class EOFException { };
static constexpr size_t serialize_int8_size = 1;
static constexpr size_t serialize_bool_size = 1;
static constexpr size_t serialize_int16_size = 2;
static constexpr size_t serialize_int32_size = 4;
static constexpr size_t serialize_int64_size = 8;
namespace internal_impl {
template <typename ExplicitIntegerType, typename CharOutputIterator, typename IntegerType>
requires std::is_integral<ExplicitIntegerType>::value && std::is_integral<IntegerType>::value && requires (CharOutputIterator it) {
*it++ = 'a';
}
inline
void serialize_int(CharOutputIterator& out, IntegerType val) {
ExplicitIntegerType nval = net::hton(ExplicitIntegerType(val));
out = std::copy_n(reinterpret_cast<const char*>(&nval), sizeof(nval), out);
}
}
template <typename CharOutputIterator>
inline
void serialize_int8(CharOutputIterator& out, uint8_t val) {
internal_impl::serialize_int<uint8_t>(out, val);
}
template <typename CharOutputIterator>
inline
void serialize_int16(CharOutputIterator& out, uint16_t val) {
internal_impl::serialize_int<uint16_t>(out, val);
}
template <typename CharOutputIterator>
inline
void serialize_int32(CharOutputIterator& out, uint32_t val) {
internal_impl::serialize_int<uint32_t>(out, val);
}
template <typename CharOutputIterator>
inline
void serialize_int64(CharOutputIterator& out, uint64_t val) {
internal_impl::serialize_int<uint64_t>(out, val);
}
template <typename CharOutputIterator>
inline
void serialize_bool(CharOutputIterator& out, bool val) {
serialize_int8(out, val ? 1 : 0);
}
// The following serializer is compatible with Java's writeUTF().
// In our C++ implementation, we assume the string is already UTF-8
// encoded. Unfortunately, Java's implementation is a bit different from
// UTF-8 for encoding characters above 16 bits in unicode (see
// http://docs.oracle.com/javase/7/docs/api/java/io/DataInput.html#modified-utf-8)
// For now we'll just assume those aren't in the string...
// TODO: fix the compatibility with Java even in this case.
template <typename CharOutputIterator>
requires requires (CharOutputIterator it) {
*it++ = 'a';
}
inline
void serialize_string(CharOutputIterator& out, const sstring& s) {
// Java specifies that nulls in the string need to be replaced by the
// two bytes 0xC0, 0x80. Let's not bother with such transformation
// now, but just verify wasn't needed.
for (char c : s) {
if (c == '\0') {
throw UTFDataFormatException();
}
}
if (s.size() > std::numeric_limits<uint16_t>::max()) {
// Java specifies the string length is written as uint16_t, so we
// can't serialize longer strings.
throw UTFDataFormatException();
}
serialize_int16(out, s.size());
out = std::copy(s.begin(), s.end(), out);
}
template <typename CharOutputIterator>
requires requires (CharOutputIterator it) {
*it++ = 'a';
}
inline
void serialize_string(CharOutputIterator& out, const char* s) {
// TODO: like above, need to change UTF-8 when above 16-bit.
auto len = strlen(s);
if (len > std::numeric_limits<uint16_t>::max()) {
// Java specifies the string length is written as uint16_t, so we
// can't serialize longer strings.
throw UTFDataFormatException();
}
serialize_int16(out, len);
out = std::copy_n(s, len, out);
}
inline
size_t serialize_string_size(const sstring& s) {;
// As above, this code is missing the case of modified utf-8
return serialize_int16_size + s.size();
}
template<typename T, typename CharOutputIterator>
inline
void write(CharOutputIterator& out, const T& val) {
auto v = net::ntoh(val);
out = std::copy_n(reinterpret_cast<char*>(&v), sizeof(v), out);
}