alternator: add support for bytes as key columns

Until now we only supported string for key columns (hash or sort key). This patch adds support for the bytes type (a.k.a binary or blob) as well. The last missing type to be supported in keys is the number type. Note that in JSON, bytes values are represented with base64 encoding, so we need to decode them before storing the decoded value, and re-encode when the user retrieves the value. The decoding is important not just for saving storage space (the encoding is 4/3 the size of the decoded) but also for correct *sorting* of the binary keys. Signed-off-by: Nadav Har'El <nyh@scylladb.com>
2026-06-04 05:53:13 +00:00 · 2019-05-23 14:51:30 +03:00
parent 57b46a92d7
commit 4bfd5d7ed1
1 changed files with 22 additions and 5 deletions
--- a/alternator/executor.cc
+++ b/alternator/executor.cc
@@ -10,6 +10,8 @@

 #include <regex>

+#include "base64.hh"
+
 #include "alternator/executor.hh"
 #include "log.hh"
 #include "json.hh"
@@ -315,11 +317,26 @@ static bytes get_key_column_value(const Json::Value& item, const column_definiti
                format("Expected type {} for key column {}, got type {}",
                        expected_type, column_name, it.key().asString()));
    }
-    // FIXME: if expected_type is B, we need to do base64 decoding!
-    return column.type->from_string(it->asString());
+    if (column.type == bytes_type) {
+        return base64_decode(it->asString());
+    } else {
+        return column.type->from_string(it->asString());
+    }

 }

+static Json::Value json_key_column_value(bytes_view cell, const column_definition& column) {
+    if (column.type == bytes_type) {
+        return base64_encode(cell);
+    } if (column.type == utf8_type) {
+        return Json::Value(reinterpret_cast<const char*>(cell.data()),
+                reinterpret_cast<const char*>(cell.data()) + cell.size());
+    } else {
+        // We shouldn't get here, we shouldn't see such key columns.
+        throw std::runtime_error(format("Unexpected key type: {}", column.type->name()));
+    }
+}
+
 static partition_key pk_from_json(const Json::Value& item, schema_ptr schema) {
    std::vector<bytes> raw_pk;
    // FIXME: this is a loop, but we really allow only one partition key column.
@@ -512,10 +529,10 @@ static Json::Value describe_item(schema_ptr schema, const query::partition_slice
        auto column_it = columns.begin();
        for (const bytes_opt& cell : result_row) {
            std::string column_name = (*column_it)->name_as_text();
-            if (column_name != executor::ATTRS_COLUMN_NAME) {
+            if (cell && column_name != executor::ATTRS_COLUMN_NAME) {
                if (attrs_to_get.empty() || attrs_to_get.count(column_name) > 0) {
                    Json::Value& field = item[column_name.c_str()];
-                    field[type_to_string((*column_it)->type)] = json::to_json_value((*column_it)->type->to_json_string(cell));
+                    field[type_to_string((*column_it)->type)] = json_key_column_value(*cell, **column_it);
                }
            } else if (cell) {
                auto deserialized = attrs_type()->deserialize(*cell, cql_serialization_format::latest());
@@ -623,7 +640,7 @@ public:
            if (column_name != executor::ATTRS_COLUMN_NAME) {
                if (_attrs_to_get.empty() || _attrs_to_get.count(column_name) > 0) {
                    Json::Value& field = _item[column_name.c_str()];
-                    field[type_to_string((*_column_it)->type)] = json::to_json_value((*_column_it)->type->to_json_string(bytes(bv)));
+                    field[type_to_string((*_column_it)->type)] = json_key_column_value(bv, **_column_it);
                }
            } else {
                auto deserialized = attrs_type()->deserialize(bv, cql_serialization_format::latest());