/* * Copyright 2019 ScyllaDB */ /* * This file is part of Scylla. * * See the LICENSE.PROPRIETARY file in the top-level directory for licensing information. */ #include "base64.hh" #include "log.hh" #include "serialization.hh" #include "error.hh" #include "rapidjson/writer.h" static logging::logger slogger("alternator-serialization"); namespace alternator { type_info type_info_from_string(std::string type) { static thread_local const std::unordered_map type_infos = { {"S", {alternator_type::S, utf8_type}}, {"B", {alternator_type::B, bytes_type}}, {"BOOL", {alternator_type::BOOL, boolean_type}}, {"N", {alternator_type::N, decimal_type}}, //FIXME: Replace with custom Alternator type when implemented }; auto it = type_infos.find(type); if (it == type_infos.end()) { return {alternator_type::NOT_SUPPORTED_YET, utf8_type}; } return it->second; } type_representation represent_type(alternator_type atype) { static thread_local const std::unordered_map type_representations = { {alternator_type::S, {"S", utf8_type}}, {alternator_type::B, {"B", bytes_type}}, {alternator_type::BOOL, {"BOOL", boolean_type}}, {alternator_type::N, {"N", decimal_type}}, //FIXME: Replace with custom Alternator type when implemented }; auto it = type_representations.find(atype); if (it == type_representations.end()) { throw std::runtime_error(format("Unknown alternator type {}", int8_t(atype))); } return it->second; } bytes serialize_item(const rjson::value& item) { if (item.IsNull() || item.MemberCount() != 1) { throw api_error("ValidationException", format("An item can contain only one attribute definition: {}", item)); } auto it = item.MemberBegin(); type_info type_info = type_info_from_string(it->name.GetString()); // JSON keys are guaranteed to be strings if (type_info.atype == alternator_type::NOT_SUPPORTED_YET) { slogger.trace("Non-optimal serialization of type {}", it->name.GetString()); return bytes{int8_t(type_info.atype)} + to_bytes(rjson::print(item)); } bytes serialized; // Alternator bytes representation does not start with "0x" followed by hex digits as Scylla-JSON does, // but instead uses base64. if (type_info.dtype == bytes_type) { std::string raw_value = it->value.GetString(); serialized = base64_decode(std::string_view(raw_value)); } else if (type_info.dtype == decimal_type) { serialized = type_info.dtype->from_string(it->value.GetString()); } else if (type_info.dtype == boolean_type) { serialized = type_info.dtype->from_json_object(Json::Value(it->value.GetBool()), cql_serialization_format::internal()); } else { //FIXME(sarna): Once we have type visitors, this double conversion hack should be replaced with parsing straight from rapidjson serialized = type_info.dtype->from_json_object(Json::Value(rjson::print(it->value)), cql_serialization_format::internal()); } //NOTICE: redundant copy here, from_json_object should accept bytes' output iterator too. // Or, we could append type info to the end, but that's unorthodox. return bytes{int8_t(type_info.atype)} + std::move(serialized); } rjson::value deserialize_item(bytes_view bv) { rjson::value deserialized(rapidjson::kObjectType); if (bv.empty()) { throw api_error("ValidationException", "Serialized value empty"); } alternator_type atype = alternator_type(bv[0]); bv.remove_prefix(1); if (atype == alternator_type::NOT_SUPPORTED_YET) { slogger.trace("Non-optimal deserialization of alternator type {}", int8_t(atype)); return rjson::parse_raw(reinterpret_cast(bv.data()), bv.size()); } type_representation type_representation = represent_type(atype); if (type_representation.dtype == bytes_type) { std::string b64 = base64_encode(bv); rjson::set_with_string_name(deserialized, type_representation.ident, rjson::from_string(b64)); } else if (type_representation.dtype == decimal_type) { auto s = decimal_type->to_json_string(bytes(bv)); //FIXME(sarna): unnecessary copy rjson::set_with_string_name(deserialized, type_representation.ident, rjson::from_string(s)); } else { rjson::set_with_string_name(deserialized, type_representation.ident, rjson::parse(type_representation.dtype->to_string(bytes(bv)))); } return deserialized; } std::string type_to_string(data_type type) { static thread_local std::unordered_map types = { {utf8_type, "S"}, {bytes_type, "B"}, {boolean_type, "BOOL"}, {decimal_type, "N"}, // FIXME: use a specialized Alternator number type instead of the general decimal_type }; auto it = types.find(type); if (it == types.end()) { throw std::runtime_error(format("Unknown type {}", type->name())); } return it->second; } bytes get_key_column_value(const rjson::value& item, const column_definition& column) { std::string column_name = column.name_as_text(); std::string expected_type = type_to_string(column.type); const rjson::value& key_typed_value = rjson::get(item, rjson::value::StringRefType(column_name.c_str())); if (!key_typed_value.IsObject() || key_typed_value.MemberCount() != 1) { throw api_error("ValidationException", format("Missing or invalid value object for key column {}: {}", column_name, item)); } auto it = key_typed_value.MemberBegin(); if (it->name.GetString() != expected_type) { throw api_error("ValidationException", format("Expected type {} for key column {}, got type {}", expected_type, column_name, it->name.GetString())); } if (column.type == bytes_type) { return base64_decode(it->value.GetString()); } else { return column.type->from_string(it->value.GetString()); } } rjson::value json_key_column_value(bytes_view cell, const column_definition& column) { if (column.type == bytes_type) { std::string b64 = base64_encode(cell); return rjson::from_string(b64); } if (column.type == utf8_type) { return rjson::from_string(std::string(reinterpret_cast(cell.data()), cell.size())); } else if (column.type == decimal_type) { // FIXME: use specialized Alternator number type, not the more // general "decimal_type". A dedicated type can be more efficient // in storage space and in parsing speed. auto s = decimal_type->to_json_string(bytes(cell)); return rjson::from_string(s); } else { // We shouldn't get here, we shouldn't see such key columns. throw std::runtime_error(format("Unexpected key type: {}", column.type->name())); } } partition_key pk_from_json(const rjson::value& item, schema_ptr schema) { std::vector raw_pk; // FIXME: this is a loop, but we really allow only one partition key column. for (const column_definition& cdef : schema->partition_key_columns()) { bytes raw_value = get_key_column_value(item, cdef); raw_pk.push_back(std::move(raw_value)); } return partition_key::from_exploded(raw_pk); } clustering_key ck_from_json(const rjson::value& item, schema_ptr schema) { if (schema->clustering_key_size() == 0) { return clustering_key::make_empty(); } std::vector raw_ck; // FIXME: this is a loop, but we really allow only one clustering key column. for (const column_definition& cdef : schema->clustering_key_columns()) { bytes raw_value = get_key_column_value(item, cdef); raw_ck.push_back(std::move(raw_value)); } return clustering_key::from_exploded(raw_ck); } }