alternator: migrate to rapidjson library

Profiling alternator implied that JSON parsing takes up a fair amount
of CPU, and as such should be optimized. libjsoncpp is a standard
library for handling JSON objects, but it also proves slower than
rapidjson, which is hereby used instead.
The results indicated that libjsoncpp used roughly 30% of CPU
for a single-shard alternator instance under stress, while rapidjson
dropped that usage to 18% without optimizations.
Future optimizations should include eliding object copying, string copying
and perhaps experimenting with different JSON allocators.
This commit is contained in:
Piotr Sarna
2019-08-05 09:06:07 +02:00
committed by Avi Kivity
parent 0fd1354ef9
commit cb29d6485e
5 changed files with 487 additions and 389 deletions

View File

@@ -11,16 +11,16 @@
#include <list>
#include <map>
#include "alternator/conditions.hh"
#include "alternator/serialization.hh"
#include "alternator/error.hh"
#include "cql3/constants.hh"
#include <unordered_map>
#include "rjson.hh"
namespace alternator {
static logging::logger clogger("alternator-conditions");
comparison_operator_type get_comparison_operator(const Json::Value& comparison_operator) {
comparison_operator_type get_comparison_operator(const rjson::value& comparison_operator) {
static std::unordered_map<std::string, comparison_operator_type> ops = {
{"EQ", comparison_operator_type::EQ},
{"LE", comparison_operator_type::LE},
@@ -30,10 +30,10 @@ comparison_operator_type get_comparison_operator(const Json::Value& comparison_o
{"BETWEEN", comparison_operator_type::BETWEEN},
{"BEGINS_WITH", comparison_operator_type::BEGINS_WITH},
}; //TODO(sarna): NE, IN, CONTAINS, NULL, NOT_NULL
if (!comparison_operator.isString()) {
throw api_error("ValidationException", format("Invalid comparison operator definition {}", comparison_operator.toStyledString()));
if (!comparison_operator.IsString()) {
throw api_error("ValidationException", format("Invalid comparison operator definition {}", rjson::print(comparison_operator)));
}
std::string op = comparison_operator.asString();
std::string op = comparison_operator.GetString();
auto it = ops.find(op);
if (it == ops.end()) {
throw api_error("ValidationException", format("Unsupported comparison operator {}", op));
@@ -41,7 +41,7 @@ comparison_operator_type get_comparison_operator(const Json::Value& comparison_o
return it->second;
}
::shared_ptr<cql3::restrictions::single_column_restriction::contains> make_map_element_restriction(const column_definition& cdef, const std::string& key, const Json::Value& value) {
::shared_ptr<cql3::restrictions::single_column_restriction::contains> make_map_element_restriction(const column_definition& cdef, const std::string& key, const rjson::value& value) {
bytes raw_key = utf8_type->from_string(sstring(key));
auto key_value = ::make_shared<cql3::constants::value>(cql3::raw_value::make_value(std::move(raw_key)));
bytes raw_value = serialize_item(value);
@@ -49,15 +49,15 @@ comparison_operator_type get_comparison_operator(const Json::Value& comparison_o
return make_shared<cql3::restrictions::single_column_restriction::contains>(cdef, std::move(key_value), std::move(entry_value));
}
::shared_ptr<cql3::restrictions::statement_restrictions> get_filtering_restrictions(schema_ptr schema, const column_definition& attrs_col, const Json::Value& query_filter) {
clogger.trace("Getting filtering restrictions for: {}", query_filter.toStyledString());
::shared_ptr<cql3::restrictions::statement_restrictions> get_filtering_restrictions(schema_ptr schema, const column_definition& attrs_col, const rjson::value& query_filter) {
clogger.trace("Getting filtering restrictions for: {}", rjson::print(query_filter));
auto filtering_restrictions = ::make_shared<cql3::restrictions::statement_restrictions>(schema, true);
for (auto it = query_filter.begin(); it != query_filter.end(); ++it) {
std::string column_name = it.key().asString();
const Json::Value& condition = *it;
for (auto it = query_filter.MemberBegin(); it != query_filter.MemberEnd(); ++it) {
std::string column_name = it->name.GetString();
const rjson::value& condition = it->value;
Json::Value comp_definition = condition.get("ComparisonOperator", Json::Value());
Json::Value attr_list = condition.get("AttributeValueList", Json::Value(Json::arrayValue));
const rjson::value& comp_definition = rjson::get(condition, "ComparisonOperator");
const rjson::value& attr_list = rjson::get(condition, "AttributeValueList");
comparison_operator_type op = get_comparison_operator(comp_definition);
if (schema->get_column_definition(to_bytes(column_name))) {
@@ -67,8 +67,8 @@ comparison_operator_type get_comparison_operator(const Json::Value& comparison_o
if (op != comparison_operator_type::EQ) {
throw api_error("ValidationException", "Filtering is currently implemented for EQ operator only");
}
if (attr_list.size() != 1) {
throw api_error("ValidationException", format("EQ restriction needs exactly 1 attribute value: {}", attr_list.toStyledString()));
if (attr_list.Size() != 1) {
throw api_error("ValidationException", format("EQ restriction needs exactly 1 attribute value: {}", rjson::print(attr_list)));
}
filtering_restrictions->add_restriction(make_map_element_restriction(attrs_col, column_name, attr_list[0]), false, true);

View File

@@ -21,6 +21,7 @@
#pragma once
#include "cql3/restrictions/statement_restrictions.hh"
#include "serialization.hh"
namespace alternator {
@@ -28,9 +29,9 @@ enum class comparison_operator_type {
EQ, NE, LE, LT, GE, GT, IN, BETWEEN, CONTAINS, IS_NULL, NOT_NULL, BEGINS_WITH
};
comparison_operator_type get_comparison_operator(const Json::Value& comparison_operator);
comparison_operator_type get_comparison_operator(const rjson::value& comparison_operator);
::shared_ptr<cql3::restrictions::single_column_restriction::contains> make_map_element_restriction(const column_definition& cdef, const std::string& key, const Json::Value& value);
::shared_ptr<cql3::restrictions::statement_restrictions> get_filtering_restrictions(schema_ptr schema, const column_definition& attrs_col, const Json::Value& query_filter);
::shared_ptr<cql3::restrictions::single_column_restriction::contains> make_map_element_restriction(const column_definition& cdef, const std::string& key, const rjson::value& value);
::shared_ptr<cql3::restrictions::statement_restrictions> get_filtering_restrictions(schema_ptr schema, const column_definition& attrs_col, const rjson::value& query_filter);
}

File diff suppressed because it is too large Load Diff

View File

@@ -12,6 +12,7 @@
#include "log.hh"
#include "serialization.hh"
#include "error.hh"
#include "rapidjson/writer.h"
static logging::logger slogger("alternator-serialization");
@@ -45,26 +46,32 @@ type_representation represent_type(alternator_type atype) {
return it->second;
}
bytes serialize_item(const Json::Value& item) {
if (item.size() != 1) {
throw api_error("ValidationException", format("An item can contain only one attribute definition: {}", item.toStyledString()));
bytes serialize_item(const rjson::value& item) {
if (item.IsNull() || item.MemberCount() != 1) {
throw api_error("ValidationException", format("An item can contain only one attribute definition: {}", item));
}
auto it = item.begin();
type_info type_info = type_info_from_string(it.key().asString()); // JSON keys are guaranteed to be strings
auto it = item.MemberBegin();
type_info type_info = type_info_from_string(it->name.GetString()); // JSON keys are guaranteed to be strings
if (type_info.atype == alternator_type::NOT_SUPPORTED_YET) {
slogger.trace("Non-optimal serialization of type {}", it.key());
return bytes{int8_t(type_info.atype)} + to_bytes(item.toStyledString());
slogger.trace("Non-optimal serialization of type {}", it->name.GetString());
return bytes{int8_t(type_info.atype)} + to_bytes(rjson::print(item));
}
bytes serialized;
// Alternator bytes representation does not start with "0x" followed by hex digits as Scylla-JSON does,
// but instead uses base64.
if (type_info.dtype == bytes_type) {
std::string raw_value = it->asString();
std::string raw_value = it->value.GetString();
serialized = base64_decode(std::string_view(raw_value));
} else if (type_info.dtype == decimal_type) {
serialized = type_info.dtype->from_string(it->value.GetString());
} else if (type_info.dtype == boolean_type) {
serialized = type_info.dtype->from_json_object(Json::Value(it->value.GetBool()), cql_serialization_format::internal());
} else {
serialized = type_info.dtype->from_json_object(*it, cql_serialization_format::internal());
//FIXME(sarna): Once we have type visitors, this double conversion hack should be replaced with parsing straight from rapidjson
serialized = type_info.dtype->from_json_object(Json::Value(rjson::print(it->value)), cql_serialization_format::internal());
}
//NOTICE: redundant copy here, from_json_object should accept bytes' output iterator too.
@@ -72,8 +79,8 @@ bytes serialize_item(const Json::Value& item) {
return bytes{int8_t(type_info.atype)} + std::move(serialized);
}
Json::Value deserialize_item(bytes_view bv) {
Json::Value deserialized;
rjson::value deserialize_item(bytes_view bv) {
rjson::value deserialized(rapidjson::kObjectType);
if (bv.empty()) {
throw api_error("ValidationException", "Serialized value empty");
}
@@ -83,17 +90,18 @@ Json::Value deserialize_item(bytes_view bv) {
if (atype == alternator_type::NOT_SUPPORTED_YET) {
slogger.trace("Non-optimal deserialization of alternator type {}", int8_t(atype));
return json::to_json_value(sstring(reinterpret_cast<const char *>(bv.data()), bv.size()));
return rjson::parse_raw(reinterpret_cast<const char *>(bv.data()), bv.size());
}
type_representation type_representation = represent_type(atype);
if (type_representation.dtype == bytes_type) {
deserialized[type_representation.ident] = base64_encode(bv);
std::string b64 = base64_encode(bv);
rjson::set_with_string_name(deserialized, type_representation.ident, rjson::from_string(b64));
} else if (type_representation.dtype == decimal_type) {
auto s = decimal_type->to_json_string(bytes(bv)); //FIXME(sarna): unnecessary copy
deserialized[type_representation.ident] = Json::Value(reinterpret_cast<const char*>(s.data()), reinterpret_cast<const char*>(s.data()) + s.size());
rjson::set_with_string_name(deserialized, type_representation.ident, rjson::from_string(s));
} else {
deserialized[type_representation.ident] = json::to_json_value(type_representation.dtype->to_json_string(bytes(bv))); //FIXME(sarna): unnecessary copy
rjson::set_with_string_name(deserialized, type_representation.ident, rjson::parse(type_representation.dtype->to_string(bytes(bv))));
}
return deserialized;
@@ -113,43 +121,41 @@ std::string type_to_string(data_type type) {
return it->second;
}
bytes get_key_column_value(const Json::Value& item, const column_definition& column) {
bytes get_key_column_value(const rjson::value& item, const column_definition& column) {
std::string column_name = column.name_as_text();
std::string expected_type = type_to_string(column.type);
Json::Value key_typed_value = item.get(column_name, Json::nullValue);
if (!key_typed_value.isObject() || key_typed_value.size() != 1) {
const rjson::value& key_typed_value = rjson::get(item, rjson::value::StringRefType(column_name.c_str()));
if (!key_typed_value.IsObject() || key_typed_value.MemberCount() != 1) {
throw api_error("ValidationException",
format("Missing or invalid value object for key column {}: {}",
column_name, item.toStyledString()));
format("Missing or invalid value object for key column {}: {}", column_name, item));
}
auto it = key_typed_value.begin();
if (it.key().asString() != expected_type) {
auto it = key_typed_value.MemberBegin();
if (it->name.GetString() != expected_type) {
throw api_error("ValidationException",
format("Expected type {} for key column {}, got type {}",
expected_type, column_name, it.key().asString()));
expected_type, column_name, it->name.GetString()));
}
if (column.type == bytes_type) {
return base64_decode(it->asString());
return base64_decode(it->value.GetString());
} else {
return column.type->from_string(it->asString());
return column.type->from_string(it->value.GetString());
}
}
Json::Value json_key_column_value(bytes_view cell, const column_definition& column) {
rjson::value json_key_column_value(bytes_view cell, const column_definition& column) {
if (column.type == bytes_type) {
return base64_encode(cell);
std::string b64 = base64_encode(cell);
return rjson::from_string(b64);
} if (column.type == utf8_type) {
return Json::Value(reinterpret_cast<const char*>(cell.data()),
reinterpret_cast<const char*>(cell.data()) + cell.size());
return rjson::from_string(std::string(reinterpret_cast<const char*>(cell.data()), cell.size()));
} else if (column.type == decimal_type) {
// FIXME: use specialized Alternator number type, not the more
// general "decimal_type". A dedicated type can be more efficient
// in storage space and in parsing speed.
auto s = decimal_type->to_json_string(bytes(cell));
return Json::Value(reinterpret_cast<const char*>(s.data()),
reinterpret_cast<const char*>(s.data()) + s.size());
return rjson::from_string(s);
} else {
// We shouldn't get here, we shouldn't see such key columns.
throw std::runtime_error(format("Unexpected key type: {}", column.type->name()));
@@ -157,7 +163,7 @@ Json::Value json_key_column_value(bytes_view cell, const column_definition& colu
}
partition_key pk_from_json(const Json::Value& item, schema_ptr schema) {
partition_key pk_from_json(const rjson::value& item, schema_ptr schema) {
std::vector<bytes> raw_pk;
// FIXME: this is a loop, but we really allow only one partition key column.
for (const column_definition& cdef : schema->partition_key_columns()) {
@@ -167,7 +173,7 @@ partition_key pk_from_json(const Json::Value& item, schema_ptr schema) {
return partition_key::from_exploded(raw_pk);
}
clustering_key ck_from_json(const Json::Value& item, schema_ptr schema) {
clustering_key ck_from_json(const rjson::value& item, schema_ptr schema) {
if (schema->clustering_key_size() == 0) {
return clustering_key::make_empty();
}

View File

@@ -14,7 +14,7 @@
#include "types.hh"
#include "schema.hh"
#include "keys.hh"
#include "json.hh"
#include "rjson.hh"
namespace alternator {
@@ -35,15 +35,15 @@ struct type_representation {
type_info type_info_from_string(std::string type);
type_representation represent_type(alternator_type atype);
bytes serialize_item(const Json::Value& item);
Json::Value deserialize_item(bytes_view bv);
bytes serialize_item(const rjson::value& item);
rjson::value deserialize_item(bytes_view bv);
std::string type_to_string(data_type type);
bytes get_key_column_value(const Json::Value& item, const column_definition& column);
Json::Value json_key_column_value(bytes_view cell, const column_definition& column);
bytes get_key_column_value(const rjson::value& item, const column_definition& column);
rjson::value json_key_column_value(bytes_view cell, const column_definition& column);
partition_key pk_from_json(const Json::Value& item, schema_ptr schema);
clustering_key ck_from_json(const Json::Value& item, schema_ptr schema);
partition_key pk_from_json(const rjson::value& item, schema_ptr schema);
clustering_key ck_from_json(const rjson::value& item, schema_ptr schema);
}