This commit is only cosmetics, changes calls to GetStringLength into rjson::to_string_view with the same underlying implementation.
465 lines
19 KiB
C++
465 lines
19 KiB
C++
/*
|
|
* Copyright 2019-present ScyllaDB
|
|
*/
|
|
|
|
/*
|
|
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
|
|
*/
|
|
|
|
#pragma once
|
|
|
|
/*
|
|
* rjson is a wrapper over rapidjson library, providing fast JSON parsing and generation.
|
|
*
|
|
* rapidjson has strict copy elision policies, which, among other things, involves
|
|
* using provided char arrays without copying them and allows copying objects only explicitly.
|
|
* As such, one should be careful when passing strings with limited liveness
|
|
* (e.g. data underneath local std::strings) to rjson functions, because created JSON objects
|
|
* may end up relying on dangling char pointers. All rjson functions that create JSONs from strings
|
|
* by rjson have both APIs for string_ref_type (more optimal, used when the string is known to live
|
|
* at least as long as the object, e.g. a static char array) and for std::strings. The more optimal
|
|
* variants should be used *only* if the liveness of the string is guaranteed, otherwise it will
|
|
* result in undefined behaviour.
|
|
* Also, bear in mind that methods exposed by rjson::value are generic, but some of them
|
|
* work fine only for specific types. In case the type does not match, an rjson::error will be thrown.
|
|
* Examples of such mismatched usages is calling MemberCount() on a JSON value not of object type
|
|
* or calling Size() on a non-array value.
|
|
*/
|
|
|
|
#include <iostream>
|
|
#include <map>
|
|
#include <string>
|
|
#include <string_view>
|
|
#include <type_traits>
|
|
#include "utils/base64.hh"
|
|
#include "utils/chunked_vector.hh"
|
|
|
|
#include <seastar/core/future.hh>
|
|
|
|
namespace seastar {
|
|
template<typename> class output_stream;
|
|
}
|
|
|
|
namespace rjson {
|
|
class error : public std::exception {
|
|
std::string _msg;
|
|
public:
|
|
error() = default;
|
|
error(const std::string& msg) : _msg(msg) {}
|
|
|
|
virtual const char* what() const noexcept override { return _msg.c_str(); }
|
|
};
|
|
}
|
|
|
|
// rapidjson configuration macros
|
|
#define RAPIDJSON_HAS_STDSTRING 1
|
|
// Default rjson policy is to use assert() - which is dangerous for two reasons:
|
|
// 1. assert() can be turned off with -DNDEBUG
|
|
// 2. assert() crashes a program
|
|
// Fortunately, the default policy can be overridden, and so rapidjson errors will
|
|
// throw an rjson::error exception instead.
|
|
#define RAPIDJSON_ASSERT(x) do { if (!(x)) throw rjson::error(fmt::format("JSON assert failed on condition '{}', at: {}", #x, current_backtrace_tasklocal())); } while (0)
|
|
// This macro is used for functions which are called for every json char making it
|
|
// quite costly if not inlined, by default rapidjson only enables it if NDEBUG
|
|
// is defined which isn't the case for us.
|
|
#define RAPIDJSON_FORCEINLINE __attribute__((always_inline))
|
|
|
|
#include <rapidjson/document.h>
|
|
#include <rapidjson/writer.h>
|
|
#include <rapidjson/stringbuffer.h>
|
|
#include <rapidjson/allocators.h>
|
|
#include <rapidjson/ostreamwrapper.h>
|
|
#include <seastar/core/sstring.hh>
|
|
#include "seastarx.hh"
|
|
|
|
namespace rjson {
|
|
|
|
// The internal namespace is a workaround for the fact that fmt::format
|
|
// also has a to_string_view function and erroneously looks up our rjson::to_string_view
|
|
// if this allocator is in the rjson namespace.
|
|
namespace internal {
|
|
// Implements an interface conforming to the one in rapidjson/allocators.h,
|
|
// but throws rjson::error on allocation failures
|
|
class throwing_allocator : public rapidjson::CrtAllocator {
|
|
using base = rapidjson::CrtAllocator;
|
|
public:
|
|
static const bool kNeedFree = base::kNeedFree;
|
|
void* Malloc(size_t size);
|
|
void* Realloc(void* orig_ptr, size_t orig_size, size_t new_size);
|
|
static void Free(void* ptr);
|
|
};
|
|
}
|
|
|
|
using allocator = internal::throwing_allocator;
|
|
using encoding = rapidjson::UTF8<>;
|
|
using document = rapidjson::GenericDocument<encoding, allocator, allocator>;
|
|
using value = rapidjson::GenericValue<encoding, allocator>;
|
|
using string_ref_type = value::StringRefType;
|
|
using string_buffer = rapidjson::GenericStringBuffer<encoding, allocator>;
|
|
using writer = rapidjson::Writer<string_buffer, encoding, encoding, allocator>;
|
|
using type = rapidjson::Type;
|
|
|
|
// The default value is derived from the days when rjson resided in alternator:
|
|
// - the original DynamoDB nested level limit is 32
|
|
// - it's raised by 7 for alternator to make it safer and more cool
|
|
// - the value is doubled because the nesting level is bumped twice
|
|
// for every alternator object - because each alternator object
|
|
// consists of a 2-level JSON object.
|
|
inline constexpr size_t default_max_nested_level = 78;
|
|
|
|
/**
|
|
* exception specializations.
|
|
*/
|
|
class malformed_value : public error {
|
|
public:
|
|
malformed_value(std::string_view name, const rjson::value& value);
|
|
malformed_value(std::string_view name, std::string_view value);
|
|
};
|
|
|
|
class missing_value : public error {
|
|
public:
|
|
missing_value(std::string_view name);
|
|
};
|
|
|
|
|
|
// Returns an object representing JSON's null
|
|
inline rjson::value null_value() {
|
|
return rjson::value(rapidjson::kNullType);
|
|
}
|
|
|
|
// Returns an empty JSON object - {}
|
|
inline rjson::value empty_object() {
|
|
return rjson::value(rapidjson::kObjectType);
|
|
}
|
|
|
|
// Returns an empty JSON array - []
|
|
inline rjson::value empty_array() {
|
|
return rjson::value(rapidjson::kArrayType);
|
|
}
|
|
|
|
// Returns an empty JSON string - ""
|
|
inline rjson::value empty_string() {
|
|
return rjson::value(rapidjson::kStringType);
|
|
}
|
|
|
|
// Convert the JSON value to a string with JSON syntax, the opposite of parse().
|
|
// The representation is dense - without any redundant indentation.
|
|
std::string print(const rjson::value& value, size_t max_nested_level = default_max_nested_level);
|
|
|
|
// Writes the JSON value to the output_stream, similar to print() -> string, but
|
|
// directly. Note this has potentially more data copy overhead and should be
|
|
// reserved for larger values where not creating huge linear strings is useful.
|
|
// (Because data will first be written to N buffers before actually committed to stream,
|
|
// where N = <n bytes text>/k (k dependent on buffer/write block behaviour of stream),
|
|
// and also copy at least 2n bytes (assuming underlying stream is buffered).
|
|
// This may or may not be more data copying/overhead than creating a string directly
|
|
// and printing it, but again, it will guarantee fragmented buffer behaviour).
|
|
// Note: input value must remain valid for the call, but is _not_ required to be valid
|
|
// until future resolves - i.e. the full json -> text conversion is done _before_
|
|
// pushing fully to stream. I.e. it is valid to do `return print(rjson::value("... something..."), os);`
|
|
seastar::future<> print(const rjson::value& value, seastar::output_stream<char>&, size_t max_nested_level = default_max_nested_level);
|
|
|
|
// A variant of print() which prints a JSON object (it assumes the given
|
|
// rjson::value is an object) but adds to it at the end an additional array.
|
|
// That extra array is given a separate chunked_vector, to avoid putting it
|
|
// inside the rjson::value because RapidJSON does contiguous allocations
|
|
// for arrays, which we want to avoid for potentially long arrays (#23535).
|
|
// We only have a streaming version of this function, as it is only used for
|
|
// large arrays, and we want to yield while printing the JSON.
|
|
// If we can ever fix RapidJSON to avoid contiguous allocations for arrays,
|
|
// we can remove this function.
|
|
future<> print_with_extra_array(const rjson::value& value,
|
|
std::string_view array_name,
|
|
const utils::chunked_vector<rjson::value>& array,
|
|
seastar::output_stream<char>& os,
|
|
size_t max_nested_level = default_max_nested_level);
|
|
|
|
// Copies given JSON value - involves allocation
|
|
rjson::value copy(const rjson::value& value);
|
|
|
|
// copyable_value can be used when seamless copying of value is needed
|
|
class copyable_value : public value {
|
|
public:
|
|
copyable_value() noexcept : value() {
|
|
}
|
|
copyable_value(const copyable_value& v) : value(copy(v)) {
|
|
}
|
|
copyable_value(copyable_value&& v) noexcept : value(std::move(v)) {
|
|
}
|
|
copyable_value(value&& v) noexcept : value(std::move(v)) {
|
|
}
|
|
copyable_value& operator=(const copyable_value& v) {
|
|
if (this != &v) {
|
|
*this = copy(v);
|
|
}
|
|
return *this;
|
|
}
|
|
copyable_value& operator=(copyable_value&& v) noexcept {
|
|
value::operator=(value(std::move(v)));
|
|
return *this;
|
|
}
|
|
};
|
|
|
|
// Parses a JSON value from given string or raw character array.
|
|
// The string/char array liveness does not need to be persisted,
|
|
// as parse() will allocate member names and values.
|
|
// Throws rjson::error if parsing failed.
|
|
rjson::value parse(std::string_view str, size_t max_nested_level = default_max_nested_level);
|
|
// Parses a JSON value returns a disengaged optional on failure.
|
|
// NOTICE: any error context will be lost, so this function should
|
|
// be used only if one does not care why parsing failed.
|
|
std::optional<rjson::value> try_parse(std::string_view str, size_t max_nested_level = default_max_nested_level);
|
|
// Needs to be run in thread context
|
|
rjson::value parse_yieldable(std::string_view str, size_t max_nested_level = default_max_nested_level);
|
|
|
|
// chunked_content holds a non-contiguous buffer of bytes - such as bytes
|
|
// read by util::read_entire_stream(). We assume that chunked_content does
|
|
// not contain any empty buffers (the vector can be empty, meaning empty
|
|
// content - but individual buffers cannot).
|
|
using chunked_content = std::vector<temporary_buffer<char>>;
|
|
|
|
// Additional variants of parse() and parse_yieldable() that work on non-
|
|
// contiguous chunked_content. The chunked_content is moved into the parsing
|
|
// function so that we can start freeing chunks as soon as we parse them.
|
|
rjson::value parse(chunked_content&&, size_t max_nested_level = default_max_nested_level);
|
|
rjson::value parse_yieldable(chunked_content&&, size_t max_nested_level = default_max_nested_level);
|
|
|
|
// Creates a JSON value (of JSON string type) out of internal string representations.
|
|
// The string value is copied, so str's liveness does not need to be persisted.
|
|
rjson::value from_string(const char* str, size_t size);
|
|
rjson::value from_string(std::string_view view);
|
|
|
|
// Returns a string_view to the string held in a JSON value (which is
|
|
// assumed to hold a string, i.e., v.IsString() == true). This is a view
|
|
// to the existing data - no copying is done.
|
|
inline std::string_view to_string_view(const rjson::value& v) {
|
|
return std::string_view(v.GetString(), v.GetStringLength());
|
|
}
|
|
|
|
// Those functions must be called on json string object.
|
|
// They make a copy of underlying data so it's safe to destroy
|
|
// rjson::value afterwards.
|
|
//
|
|
// Rapidjson's GetString method alone is not good enough
|
|
// for string conversion because it needs to scan the string
|
|
// unnecessarily and GetStringLength could be used to avoid that.
|
|
inline sstring to_sstring(const rjson::value& str) {
|
|
return sstring(str.GetString(), str.GetStringLength());
|
|
}
|
|
inline std::string to_string(const rjson::value& str) {
|
|
return std::string(str.GetString(), str.GetStringLength());
|
|
}
|
|
|
|
// Returns a pointer to JSON member if it exists, nullptr otherwise
|
|
rjson::value* find(rjson::value& value, std::string_view name);
|
|
const rjson::value* find(const rjson::value& value, std::string_view name);
|
|
|
|
// Returns a reference to JSON member if it exists, throws otherwise
|
|
rjson::value& get(rjson::value& value, std::string_view name);
|
|
const rjson::value& get(const rjson::value& value, std::string_view name);
|
|
|
|
/**
|
|
* Type conversion getter.
|
|
* Will typically require an existing rapidjson::internal::TypeHelper<...>
|
|
* to exist for the type queried.
|
|
*/
|
|
template<typename T>
|
|
T get(const rjson::value& value, std::string_view name) {
|
|
auto& v = get(value, name);
|
|
try {
|
|
return v.Get<T>();
|
|
} catch (...) {
|
|
std::throw_with_nested(malformed_value(name, v));
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Type conversion opt getter.
|
|
* Will typically require an existing rapidjson::internal::TypeHelper<...>
|
|
* to exist for the type queried.
|
|
*
|
|
* Return std::nullopt if value does not exist.
|
|
*/
|
|
template<typename T>
|
|
std::optional<T> get_opt(const rjson::value& value, std::string_view name) {
|
|
auto* v = find(value, name);
|
|
try {
|
|
return v ? std::optional<T>(v->Get<T>()) : std::nullopt;
|
|
} catch (...) {
|
|
std::throw_with_nested(malformed_value(name, *v));
|
|
}
|
|
}
|
|
|
|
// The various add*() functions below *add* a new member to a JSON object.
|
|
// They all assume that a member with the same key (name) doesn't already
|
|
// exist in that object, so they are meant to be used just to build a new
|
|
// object from scratch. If a member with the same name *may* exist, and
|
|
// might need to be replaced, use the replace*() functions instead.
|
|
// The benefit of the add*() functions is that they are faster (O(1),
|
|
// compared to O(n) for the replace* function that need to inspect the
|
|
// existing members).
|
|
|
|
// Adds a member to a given JSON object by moving the member - allocates the name.
|
|
// Throws if base is not a JSON object.
|
|
// Assumes a member with the same name does not yet exist in base.
|
|
void add_with_string_name(rjson::value& base, std::string_view name, rjson::value&& member);
|
|
|
|
// Adds a string member to a given JSON object by assigning its reference - allocates the name.
|
|
// NOTICE: member string liveness must be ensured to be at least as long as base's.
|
|
// Throws if base is not a JSON object.
|
|
// Assumes a member with the same name does not yet exist in base.
|
|
void add_with_string_name(rjson::value& base, std::string_view name, rjson::string_ref_type member);
|
|
|
|
// Adds a member to a given JSON object by moving the member.
|
|
// NOTICE: name liveness must be ensured to be at least as long as base's.
|
|
// Throws if base is not a JSON object.
|
|
// Assumes a member with the same name does not yet exist in base.
|
|
void add(rjson::value& base, rjson::string_ref_type name, rjson::value&& member);
|
|
|
|
// Adds a string member to a given JSON object by assigning its reference.
|
|
// NOTICE: name liveness must be ensured to be at least as long as base's.
|
|
// NOTICE: member liveness must be ensured to be at least as long as base's.
|
|
// Throws if base is not a JSON object.
|
|
// Assumes a member with the same name does not yet exist in base.
|
|
void add(rjson::value& base, rjson::string_ref_type name, rjson::string_ref_type member);
|
|
|
|
/**
|
|
* Type conversion setter.
|
|
* Will typically require an existing rapidjson::internal::TypeHelper<...>
|
|
* to exist for the type written.
|
|
*
|
|
* (Note: order is important. Need to be after set(..., rjson::value)).
|
|
* Assumes a member with the same name does not yet exist in base.
|
|
*/
|
|
template<typename T>
|
|
requires (!std::is_constructible_v<string_ref_type, T>)
|
|
void add(rjson::value& base, rjson::string_ref_type name, T&& member) {
|
|
extern allocator the_allocator;
|
|
|
|
rjson::value v;
|
|
v.Set(std::forward<T>(member), the_allocator);
|
|
add(base, std::move(name), std::move(v));
|
|
}
|
|
|
|
// Set a member in a given JSON object by moving the member - allocates the name.
|
|
// If a member with the same name already exist in base, it is replaced.
|
|
// Throws if base is not a JSON object.
|
|
void replace_with_string_name(rjson::value& base, std::string_view name, rjson::value&& member);
|
|
|
|
|
|
// Adds a value to a JSON list by moving the item to its end.
|
|
// Throws if base_array is not a JSON array.
|
|
void push_back(rjson::value& base_array, rjson::value&& item);
|
|
|
|
// Remove a member from a JSON object. Throws if value isn't an object.
|
|
bool remove_member(rjson::value& value, std::string_view name);
|
|
|
|
struct single_value_comp {
|
|
bool operator()(const rjson::value& r1, const rjson::value& r2) const;
|
|
};
|
|
|
|
// Helper function for parsing a JSON straight into a map
|
|
// of strings representing their values - useful for various
|
|
// database helper functions.
|
|
// This function exists for historical reasons - existing infrastructure
|
|
// relies on being able to transform a JSON string into a map of sstrings.
|
|
template<typename Map>
|
|
requires (std::is_same_v<Map, std::map<sstring, sstring>> || std::is_same_v<Map, std::unordered_map<sstring, sstring>>)
|
|
Map parse_to_map(std::string_view raw) {
|
|
Map map;
|
|
rjson::value root = rjson::parse(raw);
|
|
if (root.IsNull()) {
|
|
return map;
|
|
}
|
|
if (!root.IsObject()) {
|
|
throw rjson::error("Only json objects can be transformed to maps. Encountered: " + std::string(raw));
|
|
}
|
|
for (auto it = root.MemberBegin(); it != root.MemberEnd(); ++it) {
|
|
if (it->value.IsString()) {
|
|
map.emplace(sstring(rjson::to_string_view(it->name)), sstring(rjson::to_string_view(it->value)));
|
|
} else {
|
|
map.emplace(sstring(rjson::to_string_view(it->name)), sstring(rjson::print(it->value)));
|
|
}
|
|
}
|
|
return map;
|
|
}
|
|
|
|
// This function exists for historical reasons as well.
|
|
rjson::value from_string_map(const std::map<sstring, sstring>& map);
|
|
|
|
// The function operates on sstrings for historical reasons.
|
|
sstring quote_json_string(const sstring& value);
|
|
|
|
inline bytes base64_decode(const value& v) {
|
|
return ::base64_decode(to_string_view(v));
|
|
}
|
|
|
|
// A writer which allows writing json into an std::ostream in a streaming manner.
|
|
//
|
|
// It is a wrapper around rapidjson::Writer, with a more convenient API.
|
|
class streaming_writer {
|
|
using stream = rapidjson::BasicOStreamWrapper<std::ostream>;
|
|
using writer = rapidjson::Writer<stream, rjson::encoding, rjson::encoding, rjson::allocator>;
|
|
|
|
stream _stream;
|
|
writer _writer;
|
|
|
|
public:
|
|
streaming_writer(std::ostream& os = std::cout) : _stream(os), _writer(_stream)
|
|
{ }
|
|
|
|
writer& rjson_writer() { return _writer; }
|
|
|
|
// following the rapidjson method names here
|
|
bool Null() { return _writer.Null(); }
|
|
bool Bool(bool b) { return _writer.Bool(b); }
|
|
bool Int(int i) { return _writer.Int(i); }
|
|
bool Uint(unsigned i) { return _writer.Uint(i); }
|
|
bool Int64(int64_t i) { return _writer.Int64(i); }
|
|
bool Uint64(uint64_t i) { return _writer.Uint64(i); }
|
|
bool Double(double d) { return _writer.Double(d); }
|
|
bool RawNumber(std::string_view str) { return _writer.RawNumber(str.data(), str.size(), false); }
|
|
bool RawValue(std::string_view json, type root_type) { return _writer.RawValue(json.data(), json.size(), root_type); }
|
|
bool String(std::string_view str) { return _writer.String(str.data(), str.size(), false); }
|
|
bool StartObject() { return _writer.StartObject(); }
|
|
bool Key(std::string_view str) { return _writer.Key(str.data(), str.size(), false); }
|
|
bool EndObject(rapidjson::SizeType memberCount = 0) { return _writer.EndObject(memberCount); }
|
|
bool StartArray() { return _writer.StartArray(); }
|
|
bool EndArray(rapidjson::SizeType elementCount = 0) { return _writer.EndArray(elementCount); }
|
|
|
|
template<typename U>
|
|
bool Write(U v) {
|
|
using T = std::remove_cvref_t<U>;
|
|
if constexpr (std::same_as<T, bool>) {
|
|
return Bool(v);
|
|
} else if constexpr (std::same_as<T, int>) {
|
|
return Int(v);
|
|
} else if constexpr (std::same_as<T, unsigned>) {
|
|
return Uint(v);
|
|
} else if constexpr (std::same_as<T, int64_t>) {
|
|
return Int64(v);
|
|
} else if constexpr (std::same_as<T, uint64_t>) {
|
|
return Uint64(v);
|
|
} else if constexpr (std::same_as<T, double>) {
|
|
return Double(v);
|
|
} else if constexpr (std::convertible_to<T, std::string_view>) {
|
|
return String(v);
|
|
}
|
|
}
|
|
};
|
|
|
|
inline bool is_leaf(const rjson::value& value) {
|
|
return !value.IsObject() && !value.IsArray();
|
|
}
|
|
|
|
future<> destroy_gently(rjson::value&& value);
|
|
|
|
} // end namespace rjson
|
|
|
|
template <> struct fmt::formatter<rjson::value> {
|
|
constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); }
|
|
auto format(const rjson::value& v, fmt::format_context& ctx) const {
|
|
return fmt::format_to(ctx.out(), "{}", rjson::print(v));
|
|
}
|
|
};
|