alternator: migrate to rapidjson library

Profiling alternator implied that JSON parsing takes up a fair amount of CPU, and as such should be optimized. libjsoncpp is a standard library for handling JSON objects, but it also proves slower than rapidjson, which is hereby used instead. The results indicated that libjsoncpp used roughly 30% of CPU for a single-shard alternator instance under stress, while rapidjson dropped that usage to 18% without optimizations. Future optimizations should include eliding object copying, string copying and perhaps experimenting with different JSON allocators.
2026-05-29 19:21:01 +00:00 · 2019-08-05 09:06:07 +02:00
parent 0fd1354ef9
commit cb29d6485e
5 changed files with 487 additions and 389 deletions
--- a/alternator/conditions.cc
+++ b/alternator/conditions.cc
@@ -11,16 +11,16 @@
 #include <list>
 #include <map>
 #include "alternator/conditions.hh"
-#include "alternator/serialization.hh"
 #include "alternator/error.hh"
 #include "cql3/constants.hh"
 #include <unordered_map>
+#include "rjson.hh"

 namespace alternator {

 static logging::logger clogger("alternator-conditions");

-comparison_operator_type get_comparison_operator(const Json::Value& comparison_operator) {
+comparison_operator_type get_comparison_operator(const rjson::value& comparison_operator) {
    static std::unordered_map<std::string, comparison_operator_type> ops = {
            {"EQ", comparison_operator_type::EQ},
            {"LE", comparison_operator_type::LE},
@@ -30,10 +30,10 @@ comparison_operator_type get_comparison_operator(const Json::Value& comparison_o
            {"BETWEEN", comparison_operator_type::BETWEEN},
            {"BEGINS_WITH", comparison_operator_type::BEGINS_WITH},
    }; //TODO(sarna): NE, IN, CONTAINS, NULL, NOT_NULL
-    if (!comparison_operator.isString()) {
-        throw api_error("ValidationException", format("Invalid comparison operator definition {}", comparison_operator.toStyledString()));
+    if (!comparison_operator.IsString()) {
+        throw api_error("ValidationException", format("Invalid comparison operator definition {}", rjson::print(comparison_operator)));
    }
-    std::string op = comparison_operator.asString();
+    std::string op = comparison_operator.GetString();
    auto it = ops.find(op);
    if (it == ops.end()) {
        throw api_error("ValidationException", format("Unsupported comparison operator {}", op));
@@ -41,7 +41,7 @@ comparison_operator_type get_comparison_operator(const Json::Value& comparison_o
    return it->second;
 }

-::shared_ptr<cql3::restrictions::single_column_restriction::contains> make_map_element_restriction(const column_definition& cdef, const std::string& key, const Json::Value& value) {
+::shared_ptr<cql3::restrictions::single_column_restriction::contains> make_map_element_restriction(const column_definition& cdef, const std::string& key, const rjson::value& value) {
    bytes raw_key = utf8_type->from_string(sstring(key));
    auto key_value = ::make_shared<cql3::constants::value>(cql3::raw_value::make_value(std::move(raw_key)));
    bytes raw_value = serialize_item(value);
@@ -49,15 +49,15 @@ comparison_operator_type get_comparison_operator(const Json::Value& comparison_o
    return make_shared<cql3::restrictions::single_column_restriction::contains>(cdef, std::move(key_value), std::move(entry_value));
 }

-::shared_ptr<cql3::restrictions::statement_restrictions> get_filtering_restrictions(schema_ptr schema, const column_definition& attrs_col, const Json::Value& query_filter) {
-    clogger.trace("Getting filtering restrictions for: {}", query_filter.toStyledString());
+::shared_ptr<cql3::restrictions::statement_restrictions> get_filtering_restrictions(schema_ptr schema, const column_definition& attrs_col, const rjson::value& query_filter) {
+    clogger.trace("Getting filtering restrictions for: {}", rjson::print(query_filter));
    auto filtering_restrictions = ::make_shared<cql3::restrictions::statement_restrictions>(schema, true);
-    for (auto it = query_filter.begin(); it != query_filter.end(); ++it) {
-        std::string column_name = it.key().asString();
-        const Json::Value& condition = *it;
+    for (auto it = query_filter.MemberBegin(); it != query_filter.MemberEnd(); ++it) {
+        std::string column_name = it->name.GetString();
+        const rjson::value& condition = it->value;

-        Json::Value comp_definition = condition.get("ComparisonOperator", Json::Value());
-        Json::Value attr_list = condition.get("AttributeValueList", Json::Value(Json::arrayValue));
+        const rjson::value& comp_definition = rjson::get(condition, "ComparisonOperator");
+        const rjson::value& attr_list = rjson::get(condition, "AttributeValueList");
        comparison_operator_type op = get_comparison_operator(comp_definition);

        if (schema->get_column_definition(to_bytes(column_name))) {
@@ -67,8 +67,8 @@ comparison_operator_type get_comparison_operator(const Json::Value& comparison_o
        if (op != comparison_operator_type::EQ) {
            throw api_error("ValidationException", "Filtering is currently implemented for EQ operator only");
        }
-        if (attr_list.size() != 1) {
-            throw api_error("ValidationException", format("EQ restriction needs exactly 1 attribute value: {}", attr_list.toStyledString()));
+        if (attr_list.Size() != 1) {
+            throw api_error("ValidationException", format("EQ restriction needs exactly 1 attribute value: {}", rjson::print(attr_list)));
        }

        filtering_restrictions->add_restriction(make_map_element_restriction(attrs_col, column_name, attr_list[0]), false, true);
--- a/alternator/conditions.hh
+++ b/alternator/conditions.hh
@@ -21,6 +21,7 @@
 #pragma once

 #include "cql3/restrictions/statement_restrictions.hh"
+#include "serialization.hh"

 namespace alternator {

@@ -28,9 +29,9 @@ enum class comparison_operator_type {
    EQ, NE, LE, LT, GE, GT, IN, BETWEEN, CONTAINS, IS_NULL, NOT_NULL, BEGINS_WITH
 };

-comparison_operator_type get_comparison_operator(const Json::Value& comparison_operator);
+comparison_operator_type get_comparison_operator(const rjson::value& comparison_operator);

-::shared_ptr<cql3::restrictions::single_column_restriction::contains> make_map_element_restriction(const column_definition& cdef, const std::string& key, const Json::Value& value);
-::shared_ptr<cql3::restrictions::statement_restrictions> get_filtering_restrictions(schema_ptr schema, const column_definition& attrs_col, const Json::Value& query_filter);
+::shared_ptr<cql3::restrictions::single_column_restriction::contains> make_map_element_restriction(const column_definition& cdef, const std::string& key, const rjson::value& value);
+::shared_ptr<cql3::restrictions::statement_restrictions> get_filtering_restrictions(schema_ptr schema, const column_definition& attrs_col, const rjson::value& query_filter);

 }
--- a/alternator/executor.cc
+++ b/alternator/executor.cc
--- a/alternator/serialization.cc
+++ b/alternator/serialization.cc
@@ -12,6 +12,7 @@
 #include "log.hh"
 #include "serialization.hh"
 #include "error.hh"
+#include "rapidjson/writer.h"

 static logging::logger slogger("alternator-serialization");

@@ -45,26 +46,32 @@ type_representation represent_type(alternator_type atype) {
    return it->second;
 }

-bytes serialize_item(const Json::Value& item) {
-    if (item.size() != 1) {
-        throw api_error("ValidationException", format("An item can contain only one attribute definition: {}", item.toStyledString()));
+bytes serialize_item(const rjson::value& item) {
+    if (item.IsNull() || item.MemberCount() != 1) {
+        throw api_error("ValidationException", format("An item can contain only one attribute definition: {}", item));
    }
-    auto it = item.begin();
-    type_info type_info = type_info_from_string(it.key().asString()); // JSON keys are guaranteed to be strings
+    auto it = item.MemberBegin();
+    type_info type_info = type_info_from_string(it->name.GetString()); // JSON keys are guaranteed to be strings

    if (type_info.atype == alternator_type::NOT_SUPPORTED_YET) {
-        slogger.trace("Non-optimal serialization of type {}", it.key());
-        return bytes{int8_t(type_info.atype)} + to_bytes(item.toStyledString());
+        slogger.trace("Non-optimal serialization of type {}", it->name.GetString());
+        return bytes{int8_t(type_info.atype)} + to_bytes(rjson::print(item));
    }

    bytes serialized;
    // Alternator bytes representation does not start with "0x" followed by hex digits as Scylla-JSON does,
    // but instead uses base64.
+
    if (type_info.dtype == bytes_type) {
-        std::string raw_value = it->asString();
+        std::string raw_value = it->value.GetString();
        serialized = base64_decode(std::string_view(raw_value));
+    } else if (type_info.dtype == decimal_type) {
+        serialized = type_info.dtype->from_string(it->value.GetString());
+    } else if (type_info.dtype == boolean_type) {
+        serialized = type_info.dtype->from_json_object(Json::Value(it->value.GetBool()), cql_serialization_format::internal());
    } else {
-        serialized = type_info.dtype->from_json_object(*it, cql_serialization_format::internal());
+    	 //FIXME(sarna): Once we have type visitors, this double conversion hack should be replaced with parsing straight from rapidjson
+        serialized = type_info.dtype->from_json_object(Json::Value(rjson::print(it->value)), cql_serialization_format::internal());
    }

    //NOTICE: redundant copy here, from_json_object should accept bytes' output iterator too.
@@ -72,8 +79,8 @@ bytes serialize_item(const Json::Value& item) {
    return bytes{int8_t(type_info.atype)} + std::move(serialized);
 }

-Json::Value deserialize_item(bytes_view bv) {
-    Json::Value deserialized;
+rjson::value deserialize_item(bytes_view bv) {
+    rjson::value deserialized(rapidjson::kObjectType);
    if (bv.empty()) {
        throw api_error("ValidationException", "Serialized value empty");
    }
@@ -83,17 +90,18 @@ Json::Value deserialize_item(bytes_view bv) {

    if (atype == alternator_type::NOT_SUPPORTED_YET) {
        slogger.trace("Non-optimal deserialization of alternator type {}", int8_t(atype));
-        return json::to_json_value(sstring(reinterpret_cast<const char *>(bv.data()), bv.size()));
+        return rjson::parse_raw(reinterpret_cast<const char *>(bv.data()), bv.size());
    }

    type_representation type_representation = represent_type(atype);
    if (type_representation.dtype == bytes_type) {
-        deserialized[type_representation.ident] = base64_encode(bv);
+        std::string b64 = base64_encode(bv);
+        rjson::set_with_string_name(deserialized, type_representation.ident, rjson::from_string(b64));
    } else if (type_representation.dtype == decimal_type) {
        auto s = decimal_type->to_json_string(bytes(bv)); //FIXME(sarna): unnecessary copy
-        deserialized[type_representation.ident] = Json::Value(reinterpret_cast<const char*>(s.data()), reinterpret_cast<const char*>(s.data()) + s.size());
+        rjson::set_with_string_name(deserialized, type_representation.ident, rjson::from_string(s));
    } else {
-        deserialized[type_representation.ident] = json::to_json_value(type_representation.dtype->to_json_string(bytes(bv))); //FIXME(sarna): unnecessary copy
+        rjson::set_with_string_name(deserialized, type_representation.ident, rjson::parse(type_representation.dtype->to_string(bytes(bv))));
    }

    return deserialized;
@@ -113,43 +121,41 @@ std::string type_to_string(data_type type) {
    return it->second;
 }

-bytes get_key_column_value(const Json::Value& item, const column_definition& column) {
+bytes get_key_column_value(const rjson::value& item, const column_definition& column) {
    std::string column_name = column.name_as_text();
    std::string expected_type = type_to_string(column.type);

-    Json::Value key_typed_value = item.get(column_name, Json::nullValue);
-    if (!key_typed_value.isObject() || key_typed_value.size() != 1) {
+    const rjson::value& key_typed_value = rjson::get(item, rjson::value::StringRefType(column_name.c_str()));
+    if (!key_typed_value.IsObject() || key_typed_value.MemberCount() != 1) {
        throw api_error("ValidationException",
-                format("Missing or invalid value object for key column {}: {}",
-                        column_name, item.toStyledString()));
+                format("Missing or invalid value object for key column {}: {}", column_name, item));
    }
-    auto it = key_typed_value.begin();
-    if (it.key().asString() != expected_type) {
+    auto it = key_typed_value.MemberBegin();
+    if (it->name.GetString() != expected_type) {
        throw api_error("ValidationException",
                format("Expected type {} for key column {}, got type {}",
-                        expected_type, column_name, it.key().asString()));
+                        expected_type, column_name, it->name.GetString()));
    }
    if (column.type == bytes_type) {
-        return base64_decode(it->asString());
+        return base64_decode(it->value.GetString());
    } else {
-        return column.type->from_string(it->asString());
+        return column.type->from_string(it->value.GetString());
    }

 }

-Json::Value json_key_column_value(bytes_view cell, const column_definition& column) {
+rjson::value json_key_column_value(bytes_view cell, const column_definition& column) {
    if (column.type == bytes_type) {
-        return base64_encode(cell);
+        std::string b64 = base64_encode(cell);
+        return rjson::from_string(b64);
    } if (column.type == utf8_type) {
-        return Json::Value(reinterpret_cast<const char*>(cell.data()),
-                reinterpret_cast<const char*>(cell.data()) + cell.size());
+        return rjson::from_string(std::string(reinterpret_cast<const char*>(cell.data()), cell.size()));
    } else if (column.type == decimal_type) {
        // FIXME: use specialized Alternator number type, not the more
        // general "decimal_type". A dedicated type can be more efficient
        // in storage space and in parsing speed.
        auto s = decimal_type->to_json_string(bytes(cell));
-        return Json::Value(reinterpret_cast<const char*>(s.data()),
-                reinterpret_cast<const char*>(s.data()) + s.size());
+        return rjson::from_string(s);
    } else {
        // We shouldn't get here, we shouldn't see such key columns.
        throw std::runtime_error(format("Unexpected key type: {}", column.type->name()));
@@ -157,7 +163,7 @@ Json::Value json_key_column_value(bytes_view cell, const column_definition& colu
 }


-partition_key pk_from_json(const Json::Value& item, schema_ptr schema) {
+partition_key pk_from_json(const rjson::value& item, schema_ptr schema) {
    std::vector<bytes> raw_pk;
    // FIXME: this is a loop, but we really allow only one partition key column.
    for (const column_definition& cdef : schema->partition_key_columns()) {
@@ -167,7 +173,7 @@ partition_key pk_from_json(const Json::Value& item, schema_ptr schema) {
   return partition_key::from_exploded(raw_pk);
 }

-clustering_key ck_from_json(const Json::Value& item, schema_ptr schema) {
+clustering_key ck_from_json(const rjson::value& item, schema_ptr schema) {
    if (schema->clustering_key_size() == 0) {
        return clustering_key::make_empty();
    }
--- a/alternator/serialization.hh
+++ b/alternator/serialization.hh
@@ -14,7 +14,7 @@
 #include "types.hh"
 #include "schema.hh"
 #include "keys.hh"
-#include "json.hh"
+#include "rjson.hh"

 namespace alternator {

@@ -35,15 +35,15 @@ struct type_representation {
 type_info type_info_from_string(std::string type);
 type_representation represent_type(alternator_type atype);

-bytes serialize_item(const Json::Value& item);
-Json::Value deserialize_item(bytes_view bv);
+bytes serialize_item(const rjson::value& item);
+rjson::value deserialize_item(bytes_view bv);

 std::string type_to_string(data_type type);

-bytes get_key_column_value(const Json::Value& item, const column_definition& column);
-Json::Value json_key_column_value(bytes_view cell, const column_definition& column);
+bytes get_key_column_value(const rjson::value& item, const column_definition& column);
+rjson::value json_key_column_value(bytes_view cell, const column_definition& column);

-partition_key pk_from_json(const Json::Value& item, schema_ptr schema);
-clustering_key ck_from_json(const Json::Value& item, schema_ptr schema);
+partition_key pk_from_json(const rjson::value& item, schema_ptr schema);
+clustering_key ck_from_json(const rjson::value& item, schema_ptr schema);

 }