From 4af3359744b3942e7797f7f1869a86901d80f90f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Botond=20D=C3=A9nes?= Date: Thu, 12 Feb 2026 09:21:29 +0200 Subject: [PATCH] cql3/expr: use utils::chunked_string for untyped_constant::raw_text This value can be a string or bytes literal, which can get very large in rare cases. Use chunked storage to avoid large allocations. --- cql3/Cql.g | 2 +- cql3/expr/expression.cc | 23 ++++++++++++++--------- cql3/expr/expression.hh | 3 ++- cql3/expr/prepare_expr.cc | 14 +++++++------- 4 files changed, 24 insertions(+), 18 deletions(-) diff --git a/cql3/Cql.g b/cql3/Cql.g index 3526fab316..0a3de52b23 100644 --- a/cql3/Cql.g +++ b/cql3/Cql.g @@ -1840,7 +1840,7 @@ property[cql3::statements::property_definitions& props] ; propertyValue returns [sstring str] - : c=constant { $str = c.raw_text; } + : c=constant { $str = c.raw_text.linearize(); } // FIXME: unreserved keywords below are indistinguishable from their string representation, // which might be problematic in the future. A possible solution is to use a more complicated // type for storing property values instead of just plain strings. For the specific case diff --git a/cql3/expr/expression.cc b/cql3/expr/expression.cc index ee1651b6b3..afa0bdcf1c 100644 --- a/cql3/expr/expression.cc +++ b/cql3/expr/expression.cc @@ -784,9 +784,9 @@ auto fmt::formatter::format(const cql3::expr::e }, [&] (const untyped_constant& uc) { if (uc.partial_type == untyped_constant::type_class::string) { - out = fmt::format_to(out, "'{}'", uc.raw_text); + out = fmt::format_to(out, "'{}'", uc.raw_text.linearize()); } else { - out = fmt::format_to(out, "{}", uc.raw_text); + out = fmt::format_to(out, "{}", uc.raw_text.linearize()); } }, [&] (const tuple_constructor& tc) { @@ -2668,8 +2668,10 @@ std::map convert_property_map(const collection_constructor& ma add_recognition_error(msg); break; } - if (!res.emplace(left->raw_text, right->raw_text).second) { - sstring msg = fmt::format("Multiple definition for property {}", left->raw_text); + const auto left_text = left->raw_text.linearize(); + const auto right_text = right->raw_text.linearize(); + if (!res.emplace(left_text, right_text).second) { + sstring msg = fmt::format("Multiple definition for property {}", left_text); add_recognition_error(msg); break; } @@ -2702,8 +2704,10 @@ convert_extended_property_map(const collection_constructor& map, error_sink_fn a } auto right_str = expr::as_if(&entry_tuple->elements[1]); if (right_str) { - if (!res.emplace(left->raw_text, right_str->raw_text).second) { - sstring msg = fmt::format("Multiple definition for property {}", left->raw_text); + const auto left_text = left->raw_text.linearize(); + const auto right_text = right_str->raw_text.linearize(); + if (!res.emplace(left_text, right_text).second) { + sstring msg = fmt::format("Multiple definition for property {}", left_text); add_recognition_error(msg); break; } @@ -2727,10 +2731,11 @@ convert_extended_property_map(const collection_constructor& map, error_sink_fn a add_recognition_error(msg); return ""; } - return elem->raw_text; + return elem->raw_text.linearize(); }) | std::ranges::to>(); - if (!res.emplace(left->raw_text, std::move(values)).second) { - sstring msg = fmt::format("Multiple definition for property {}", left->raw_text); + const auto left_text = left->raw_text.linearize(); + if (!res.emplace(left_text, std::move(values)).second) { + sstring msg = fmt::format("Multiple definition for property {}", left_text); add_recognition_error(msg); break; } diff --git a/cql3/expr/expression.hh b/cql3/expr/expression.hh index 776bc0088c..8c913c758a 100644 --- a/cql3/expr/expression.hh +++ b/cql3/expr/expression.hh @@ -19,6 +19,7 @@ #include "cql3/functions/function_name.hh" #include "seastarx.hh" #include "cql3/values.hh" +#include "utils/chunked_string.hh" class row; @@ -385,7 +386,7 @@ struct bind_variable { struct untyped_constant { enum type_class { integer, floating_point, string, boolean, duration, uuid, hex, null }; type_class partial_type; - sstring raw_text; + utils::chunked_string raw_text; friend bool operator==(const untyped_constant&, const untyped_constant&) = default; }; diff --git a/cql3/expr/prepare_expr.cc b/cql3/expr/prepare_expr.cc index 501e2f2c13..1f28d64d96 100644 --- a/cql3/expr/prepare_expr.cc +++ b/cql3/expr/prepare_expr.cc @@ -702,19 +702,19 @@ template <> struct fmt::formatter : fm namespace cql3::expr { static -bytes -untyped_constant_parsed_value(const untyped_constant uc, data_type validator) +managed_bytes +untyped_constant_parsed_value(const untyped_constant& uc, data_type validator) { try { if (uc.partial_type == untyped_constant::type_class::hex && validator == bytes_type) { - auto v = static_cast(uc.raw_text); + auto v = utils::chunked_string_view(uc.raw_text); v.remove_prefix(2); - return to_bytes(validator->from_string(v)); + return validator->from_string(v); } if (validator->is_counter()) { - return to_bytes(long_type->from_string(uc.raw_text)); + return long_type->from_string(uc.raw_text); } - return to_bytes(validator->from_string(uc.raw_text)); + return validator->from_string(uc.raw_text); } catch (const marshal_exception& e) { throw exceptions::invalid_request_exception(e.what()); } @@ -810,7 +810,7 @@ untyped_constant_prepare_expression(const untyped_constant& uc, data_dictionary: if (!is_assignable(untyped_constant_test_assignment(uc, db, keyspace, *receiver))) { if (uc.partial_type != untyped_constant::type_class::null) { throw exceptions::invalid_request_exception(format("Invalid {} constant ({}) for \"{}\" of type {}", - uc.partial_type, uc.raw_text, *receiver->name, receiver->type->as_cql3_type().to_string())); + uc.partial_type, uc.raw_text.linearize(), *receiver->name, receiver->type->as_cql3_type().to_string())); } else { throw exceptions::invalid_request_exception("Invalid null value for counter increment/decrement"); }