From 4bfd5d7ed1990cdb7f1135cd7f6067a4f2a8982b Mon Sep 17 00:00:00 2001 From: Nadav Har'El Date: Thu, 23 May 2019 14:51:30 +0300 Subject: [PATCH] alternator: add support for bytes as key columns Until now we only supported string for key columns (hash or sort key). This patch adds support for the bytes type (a.k.a binary or blob) as well. The last missing type to be supported in keys is the number type. Note that in JSON, bytes values are represented with base64 encoding, so we need to decode them before storing the decoded value, and re-encode when the user retrieves the value. The decoding is important not just for saving storage space (the encoding is 4/3 the size of the decoded) but also for correct *sorting* of the binary keys. Signed-off-by: Nadav Har'El --- alternator/executor.cc | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/alternator/executor.cc b/alternator/executor.cc index c037ac8ebf..1f94faa4ad 100644 --- a/alternator/executor.cc +++ b/alternator/executor.cc @@ -10,6 +10,8 @@ #include +#include "base64.hh" + #include "alternator/executor.hh" #include "log.hh" #include "json.hh" @@ -315,11 +317,26 @@ static bytes get_key_column_value(const Json::Value& item, const column_definiti format("Expected type {} for key column {}, got type {}", expected_type, column_name, it.key().asString())); } - // FIXME: if expected_type is B, we need to do base64 decoding! - return column.type->from_string(it->asString()); + if (column.type == bytes_type) { + return base64_decode(it->asString()); + } else { + return column.type->from_string(it->asString()); + } } +static Json::Value json_key_column_value(bytes_view cell, const column_definition& column) { + if (column.type == bytes_type) { + return base64_encode(cell); + } if (column.type == utf8_type) { + return Json::Value(reinterpret_cast(cell.data()), + reinterpret_cast(cell.data()) + cell.size()); + } else { + // We shouldn't get here, we shouldn't see such key columns. + throw std::runtime_error(format("Unexpected key type: {}", column.type->name())); + } +} + static partition_key pk_from_json(const Json::Value& item, schema_ptr schema) { std::vector raw_pk; // FIXME: this is a loop, but we really allow only one partition key column. @@ -512,10 +529,10 @@ static Json::Value describe_item(schema_ptr schema, const query::partition_slice auto column_it = columns.begin(); for (const bytes_opt& cell : result_row) { std::string column_name = (*column_it)->name_as_text(); - if (column_name != executor::ATTRS_COLUMN_NAME) { + if (cell && column_name != executor::ATTRS_COLUMN_NAME) { if (attrs_to_get.empty() || attrs_to_get.count(column_name) > 0) { Json::Value& field = item[column_name.c_str()]; - field[type_to_string((*column_it)->type)] = json::to_json_value((*column_it)->type->to_json_string(cell)); + field[type_to_string((*column_it)->type)] = json_key_column_value(*cell, **column_it); } } else if (cell) { auto deserialized = attrs_type()->deserialize(*cell, cql_serialization_format::latest()); @@ -623,7 +640,7 @@ public: if (column_name != executor::ATTRS_COLUMN_NAME) { if (_attrs_to_get.empty() || _attrs_to_get.count(column_name) > 0) { Json::Value& field = _item[column_name.c_str()]; - field[type_to_string((*_column_it)->type)] = json::to_json_value((*_column_it)->type->to_json_string(bytes(bv))); + field[type_to_string((*_column_it)->type)] = json_key_column_value(bv, **_column_it); } } else { auto deserialized = attrs_type()->deserialize(bv, cql_serialization_format::latest());