alternator: migrate to rapidjson library
Profiling alternator implied that JSON parsing takes up a fair amount of CPU, and as such should be optimized. libjsoncpp is a standard library for handling JSON objects, but it also proves slower than rapidjson, which is hereby used instead. The results indicated that libjsoncpp used roughly 30% of CPU for a single-shard alternator instance under stress, while rapidjson dropped that usage to 18% without optimizations. Future optimizations should include eliding object copying, string copying and perhaps experimenting with different JSON allocators.
This commit is contained in:
@@ -11,16 +11,16 @@
|
||||
#include <list>
|
||||
#include <map>
|
||||
#include "alternator/conditions.hh"
|
||||
#include "alternator/serialization.hh"
|
||||
#include "alternator/error.hh"
|
||||
#include "cql3/constants.hh"
|
||||
#include <unordered_map>
|
||||
#include "rjson.hh"
|
||||
|
||||
namespace alternator {
|
||||
|
||||
static logging::logger clogger("alternator-conditions");
|
||||
|
||||
comparison_operator_type get_comparison_operator(const Json::Value& comparison_operator) {
|
||||
comparison_operator_type get_comparison_operator(const rjson::value& comparison_operator) {
|
||||
static std::unordered_map<std::string, comparison_operator_type> ops = {
|
||||
{"EQ", comparison_operator_type::EQ},
|
||||
{"LE", comparison_operator_type::LE},
|
||||
@@ -30,10 +30,10 @@ comparison_operator_type get_comparison_operator(const Json::Value& comparison_o
|
||||
{"BETWEEN", comparison_operator_type::BETWEEN},
|
||||
{"BEGINS_WITH", comparison_operator_type::BEGINS_WITH},
|
||||
}; //TODO(sarna): NE, IN, CONTAINS, NULL, NOT_NULL
|
||||
if (!comparison_operator.isString()) {
|
||||
throw api_error("ValidationException", format("Invalid comparison operator definition {}", comparison_operator.toStyledString()));
|
||||
if (!comparison_operator.IsString()) {
|
||||
throw api_error("ValidationException", format("Invalid comparison operator definition {}", rjson::print(comparison_operator)));
|
||||
}
|
||||
std::string op = comparison_operator.asString();
|
||||
std::string op = comparison_operator.GetString();
|
||||
auto it = ops.find(op);
|
||||
if (it == ops.end()) {
|
||||
throw api_error("ValidationException", format("Unsupported comparison operator {}", op));
|
||||
@@ -41,7 +41,7 @@ comparison_operator_type get_comparison_operator(const Json::Value& comparison_o
|
||||
return it->second;
|
||||
}
|
||||
|
||||
::shared_ptr<cql3::restrictions::single_column_restriction::contains> make_map_element_restriction(const column_definition& cdef, const std::string& key, const Json::Value& value) {
|
||||
::shared_ptr<cql3::restrictions::single_column_restriction::contains> make_map_element_restriction(const column_definition& cdef, const std::string& key, const rjson::value& value) {
|
||||
bytes raw_key = utf8_type->from_string(sstring(key));
|
||||
auto key_value = ::make_shared<cql3::constants::value>(cql3::raw_value::make_value(std::move(raw_key)));
|
||||
bytes raw_value = serialize_item(value);
|
||||
@@ -49,15 +49,15 @@ comparison_operator_type get_comparison_operator(const Json::Value& comparison_o
|
||||
return make_shared<cql3::restrictions::single_column_restriction::contains>(cdef, std::move(key_value), std::move(entry_value));
|
||||
}
|
||||
|
||||
::shared_ptr<cql3::restrictions::statement_restrictions> get_filtering_restrictions(schema_ptr schema, const column_definition& attrs_col, const Json::Value& query_filter) {
|
||||
clogger.trace("Getting filtering restrictions for: {}", query_filter.toStyledString());
|
||||
::shared_ptr<cql3::restrictions::statement_restrictions> get_filtering_restrictions(schema_ptr schema, const column_definition& attrs_col, const rjson::value& query_filter) {
|
||||
clogger.trace("Getting filtering restrictions for: {}", rjson::print(query_filter));
|
||||
auto filtering_restrictions = ::make_shared<cql3::restrictions::statement_restrictions>(schema, true);
|
||||
for (auto it = query_filter.begin(); it != query_filter.end(); ++it) {
|
||||
std::string column_name = it.key().asString();
|
||||
const Json::Value& condition = *it;
|
||||
for (auto it = query_filter.MemberBegin(); it != query_filter.MemberEnd(); ++it) {
|
||||
std::string column_name = it->name.GetString();
|
||||
const rjson::value& condition = it->value;
|
||||
|
||||
Json::Value comp_definition = condition.get("ComparisonOperator", Json::Value());
|
||||
Json::Value attr_list = condition.get("AttributeValueList", Json::Value(Json::arrayValue));
|
||||
const rjson::value& comp_definition = rjson::get(condition, "ComparisonOperator");
|
||||
const rjson::value& attr_list = rjson::get(condition, "AttributeValueList");
|
||||
comparison_operator_type op = get_comparison_operator(comp_definition);
|
||||
|
||||
if (schema->get_column_definition(to_bytes(column_name))) {
|
||||
@@ -67,8 +67,8 @@ comparison_operator_type get_comparison_operator(const Json::Value& comparison_o
|
||||
if (op != comparison_operator_type::EQ) {
|
||||
throw api_error("ValidationException", "Filtering is currently implemented for EQ operator only");
|
||||
}
|
||||
if (attr_list.size() != 1) {
|
||||
throw api_error("ValidationException", format("EQ restriction needs exactly 1 attribute value: {}", attr_list.toStyledString()));
|
||||
if (attr_list.Size() != 1) {
|
||||
throw api_error("ValidationException", format("EQ restriction needs exactly 1 attribute value: {}", rjson::print(attr_list)));
|
||||
}
|
||||
|
||||
filtering_restrictions->add_restriction(make_map_element_restriction(attrs_col, column_name, attr_list[0]), false, true);
|
||||
|
||||
@@ -21,6 +21,7 @@
|
||||
#pragma once
|
||||
|
||||
#include "cql3/restrictions/statement_restrictions.hh"
|
||||
#include "serialization.hh"
|
||||
|
||||
namespace alternator {
|
||||
|
||||
@@ -28,9 +29,9 @@ enum class comparison_operator_type {
|
||||
EQ, NE, LE, LT, GE, GT, IN, BETWEEN, CONTAINS, IS_NULL, NOT_NULL, BEGINS_WITH
|
||||
};
|
||||
|
||||
comparison_operator_type get_comparison_operator(const Json::Value& comparison_operator);
|
||||
comparison_operator_type get_comparison_operator(const rjson::value& comparison_operator);
|
||||
|
||||
::shared_ptr<cql3::restrictions::single_column_restriction::contains> make_map_element_restriction(const column_definition& cdef, const std::string& key, const Json::Value& value);
|
||||
::shared_ptr<cql3::restrictions::statement_restrictions> get_filtering_restrictions(schema_ptr schema, const column_definition& attrs_col, const Json::Value& query_filter);
|
||||
::shared_ptr<cql3::restrictions::single_column_restriction::contains> make_map_element_restriction(const column_definition& cdef, const std::string& key, const rjson::value& value);
|
||||
::shared_ptr<cql3::restrictions::statement_restrictions> get_filtering_restrictions(schema_ptr schema, const column_definition& attrs_col, const rjson::value& query_filter);
|
||||
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -12,6 +12,7 @@
|
||||
#include "log.hh"
|
||||
#include "serialization.hh"
|
||||
#include "error.hh"
|
||||
#include "rapidjson/writer.h"
|
||||
|
||||
static logging::logger slogger("alternator-serialization");
|
||||
|
||||
@@ -45,26 +46,32 @@ type_representation represent_type(alternator_type atype) {
|
||||
return it->second;
|
||||
}
|
||||
|
||||
bytes serialize_item(const Json::Value& item) {
|
||||
if (item.size() != 1) {
|
||||
throw api_error("ValidationException", format("An item can contain only one attribute definition: {}", item.toStyledString()));
|
||||
bytes serialize_item(const rjson::value& item) {
|
||||
if (item.IsNull() || item.MemberCount() != 1) {
|
||||
throw api_error("ValidationException", format("An item can contain only one attribute definition: {}", item));
|
||||
}
|
||||
auto it = item.begin();
|
||||
type_info type_info = type_info_from_string(it.key().asString()); // JSON keys are guaranteed to be strings
|
||||
auto it = item.MemberBegin();
|
||||
type_info type_info = type_info_from_string(it->name.GetString()); // JSON keys are guaranteed to be strings
|
||||
|
||||
if (type_info.atype == alternator_type::NOT_SUPPORTED_YET) {
|
||||
slogger.trace("Non-optimal serialization of type {}", it.key());
|
||||
return bytes{int8_t(type_info.atype)} + to_bytes(item.toStyledString());
|
||||
slogger.trace("Non-optimal serialization of type {}", it->name.GetString());
|
||||
return bytes{int8_t(type_info.atype)} + to_bytes(rjson::print(item));
|
||||
}
|
||||
|
||||
bytes serialized;
|
||||
// Alternator bytes representation does not start with "0x" followed by hex digits as Scylla-JSON does,
|
||||
// but instead uses base64.
|
||||
|
||||
if (type_info.dtype == bytes_type) {
|
||||
std::string raw_value = it->asString();
|
||||
std::string raw_value = it->value.GetString();
|
||||
serialized = base64_decode(std::string_view(raw_value));
|
||||
} else if (type_info.dtype == decimal_type) {
|
||||
serialized = type_info.dtype->from_string(it->value.GetString());
|
||||
} else if (type_info.dtype == boolean_type) {
|
||||
serialized = type_info.dtype->from_json_object(Json::Value(it->value.GetBool()), cql_serialization_format::internal());
|
||||
} else {
|
||||
serialized = type_info.dtype->from_json_object(*it, cql_serialization_format::internal());
|
||||
//FIXME(sarna): Once we have type visitors, this double conversion hack should be replaced with parsing straight from rapidjson
|
||||
serialized = type_info.dtype->from_json_object(Json::Value(rjson::print(it->value)), cql_serialization_format::internal());
|
||||
}
|
||||
|
||||
//NOTICE: redundant copy here, from_json_object should accept bytes' output iterator too.
|
||||
@@ -72,8 +79,8 @@ bytes serialize_item(const Json::Value& item) {
|
||||
return bytes{int8_t(type_info.atype)} + std::move(serialized);
|
||||
}
|
||||
|
||||
Json::Value deserialize_item(bytes_view bv) {
|
||||
Json::Value deserialized;
|
||||
rjson::value deserialize_item(bytes_view bv) {
|
||||
rjson::value deserialized(rapidjson::kObjectType);
|
||||
if (bv.empty()) {
|
||||
throw api_error("ValidationException", "Serialized value empty");
|
||||
}
|
||||
@@ -83,17 +90,18 @@ Json::Value deserialize_item(bytes_view bv) {
|
||||
|
||||
if (atype == alternator_type::NOT_SUPPORTED_YET) {
|
||||
slogger.trace("Non-optimal deserialization of alternator type {}", int8_t(atype));
|
||||
return json::to_json_value(sstring(reinterpret_cast<const char *>(bv.data()), bv.size()));
|
||||
return rjson::parse_raw(reinterpret_cast<const char *>(bv.data()), bv.size());
|
||||
}
|
||||
|
||||
type_representation type_representation = represent_type(atype);
|
||||
if (type_representation.dtype == bytes_type) {
|
||||
deserialized[type_representation.ident] = base64_encode(bv);
|
||||
std::string b64 = base64_encode(bv);
|
||||
rjson::set_with_string_name(deserialized, type_representation.ident, rjson::from_string(b64));
|
||||
} else if (type_representation.dtype == decimal_type) {
|
||||
auto s = decimal_type->to_json_string(bytes(bv)); //FIXME(sarna): unnecessary copy
|
||||
deserialized[type_representation.ident] = Json::Value(reinterpret_cast<const char*>(s.data()), reinterpret_cast<const char*>(s.data()) + s.size());
|
||||
rjson::set_with_string_name(deserialized, type_representation.ident, rjson::from_string(s));
|
||||
} else {
|
||||
deserialized[type_representation.ident] = json::to_json_value(type_representation.dtype->to_json_string(bytes(bv))); //FIXME(sarna): unnecessary copy
|
||||
rjson::set_with_string_name(deserialized, type_representation.ident, rjson::parse(type_representation.dtype->to_string(bytes(bv))));
|
||||
}
|
||||
|
||||
return deserialized;
|
||||
@@ -113,43 +121,41 @@ std::string type_to_string(data_type type) {
|
||||
return it->second;
|
||||
}
|
||||
|
||||
bytes get_key_column_value(const Json::Value& item, const column_definition& column) {
|
||||
bytes get_key_column_value(const rjson::value& item, const column_definition& column) {
|
||||
std::string column_name = column.name_as_text();
|
||||
std::string expected_type = type_to_string(column.type);
|
||||
|
||||
Json::Value key_typed_value = item.get(column_name, Json::nullValue);
|
||||
if (!key_typed_value.isObject() || key_typed_value.size() != 1) {
|
||||
const rjson::value& key_typed_value = rjson::get(item, rjson::value::StringRefType(column_name.c_str()));
|
||||
if (!key_typed_value.IsObject() || key_typed_value.MemberCount() != 1) {
|
||||
throw api_error("ValidationException",
|
||||
format("Missing or invalid value object for key column {}: {}",
|
||||
column_name, item.toStyledString()));
|
||||
format("Missing or invalid value object for key column {}: {}", column_name, item));
|
||||
}
|
||||
auto it = key_typed_value.begin();
|
||||
if (it.key().asString() != expected_type) {
|
||||
auto it = key_typed_value.MemberBegin();
|
||||
if (it->name.GetString() != expected_type) {
|
||||
throw api_error("ValidationException",
|
||||
format("Expected type {} for key column {}, got type {}",
|
||||
expected_type, column_name, it.key().asString()));
|
||||
expected_type, column_name, it->name.GetString()));
|
||||
}
|
||||
if (column.type == bytes_type) {
|
||||
return base64_decode(it->asString());
|
||||
return base64_decode(it->value.GetString());
|
||||
} else {
|
||||
return column.type->from_string(it->asString());
|
||||
return column.type->from_string(it->value.GetString());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
Json::Value json_key_column_value(bytes_view cell, const column_definition& column) {
|
||||
rjson::value json_key_column_value(bytes_view cell, const column_definition& column) {
|
||||
if (column.type == bytes_type) {
|
||||
return base64_encode(cell);
|
||||
std::string b64 = base64_encode(cell);
|
||||
return rjson::from_string(b64);
|
||||
} if (column.type == utf8_type) {
|
||||
return Json::Value(reinterpret_cast<const char*>(cell.data()),
|
||||
reinterpret_cast<const char*>(cell.data()) + cell.size());
|
||||
return rjson::from_string(std::string(reinterpret_cast<const char*>(cell.data()), cell.size()));
|
||||
} else if (column.type == decimal_type) {
|
||||
// FIXME: use specialized Alternator number type, not the more
|
||||
// general "decimal_type". A dedicated type can be more efficient
|
||||
// in storage space and in parsing speed.
|
||||
auto s = decimal_type->to_json_string(bytes(cell));
|
||||
return Json::Value(reinterpret_cast<const char*>(s.data()),
|
||||
reinterpret_cast<const char*>(s.data()) + s.size());
|
||||
return rjson::from_string(s);
|
||||
} else {
|
||||
// We shouldn't get here, we shouldn't see such key columns.
|
||||
throw std::runtime_error(format("Unexpected key type: {}", column.type->name()));
|
||||
@@ -157,7 +163,7 @@ Json::Value json_key_column_value(bytes_view cell, const column_definition& colu
|
||||
}
|
||||
|
||||
|
||||
partition_key pk_from_json(const Json::Value& item, schema_ptr schema) {
|
||||
partition_key pk_from_json(const rjson::value& item, schema_ptr schema) {
|
||||
std::vector<bytes> raw_pk;
|
||||
// FIXME: this is a loop, but we really allow only one partition key column.
|
||||
for (const column_definition& cdef : schema->partition_key_columns()) {
|
||||
@@ -167,7 +173,7 @@ partition_key pk_from_json(const Json::Value& item, schema_ptr schema) {
|
||||
return partition_key::from_exploded(raw_pk);
|
||||
}
|
||||
|
||||
clustering_key ck_from_json(const Json::Value& item, schema_ptr schema) {
|
||||
clustering_key ck_from_json(const rjson::value& item, schema_ptr schema) {
|
||||
if (schema->clustering_key_size() == 0) {
|
||||
return clustering_key::make_empty();
|
||||
}
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
#include "types.hh"
|
||||
#include "schema.hh"
|
||||
#include "keys.hh"
|
||||
#include "json.hh"
|
||||
#include "rjson.hh"
|
||||
|
||||
namespace alternator {
|
||||
|
||||
@@ -35,15 +35,15 @@ struct type_representation {
|
||||
type_info type_info_from_string(std::string type);
|
||||
type_representation represent_type(alternator_type atype);
|
||||
|
||||
bytes serialize_item(const Json::Value& item);
|
||||
Json::Value deserialize_item(bytes_view bv);
|
||||
bytes serialize_item(const rjson::value& item);
|
||||
rjson::value deserialize_item(bytes_view bv);
|
||||
|
||||
std::string type_to_string(data_type type);
|
||||
|
||||
bytes get_key_column_value(const Json::Value& item, const column_definition& column);
|
||||
Json::Value json_key_column_value(bytes_view cell, const column_definition& column);
|
||||
bytes get_key_column_value(const rjson::value& item, const column_definition& column);
|
||||
rjson::value json_key_column_value(bytes_view cell, const column_definition& column);
|
||||
|
||||
partition_key pk_from_json(const Json::Value& item, schema_ptr schema);
|
||||
clustering_key ck_from_json(const Json::Value& item, schema_ptr schema);
|
||||
partition_key pk_from_json(const rjson::value& item, schema_ptr schema);
|
||||
clustering_key ck_from_json(const rjson::value& item, schema_ptr schema);
|
||||
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user