From 38b675737da1cbf87333d5d9ab3f02ef14b0ac02 Mon Sep 17 00:00:00 2001 From: Nadav Har'El Date: Sun, 12 Apr 2026 11:05:06 +0300 Subject: [PATCH 1/8] cql3: extend WRITETIME/TTL grammar to accept collection and UDT elements Previously, WRITETIME() and TTL() only accepted a simple column name (cident), so WRITETIME(m['key']) or WRITETIME(x.a) was a syntax error. This patch begins to implements support for applying WRITETIME() and TTL() to individual elements of a non-frozen map, set or UDT, as requested in issue #15427. On its own this commit only changes the parser (Cql.g). The prepare step still rejects subscript and field-selection nodes with an invalid_request_exception, so there is no user-visible behavior change yet - just that a syntax error is replaced by a different error. Upcoming patches add the extended wire format for per-element timestamps (commit 2), the selection layer that consumes it (commit 3), and the prepare/evaluate logic that ties everything together (commit 4), after which WRITETIME() and TTL(col[key]) for collection or UDT elements will finally be fully functional. The parser change in this patch expands the subscriptExpr rule to support the col.field syntax, not only col[key]. This change also allows the UDT field syntax to be used in LWT conditions, which is another long-standing missing feature (#13624); But to correctly support this feature we'll need an additional patch to fix a couple of remaining bugs - this will be the eighth commit in this series. --- cql3/Cql.g | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/cql3/Cql.g b/cql3/Cql.g index d891204764..3526fab316 100644 --- a/cql3/Cql.g +++ b/cql3/Cql.g @@ -429,10 +429,10 @@ unaliasedSelector returns [uexpression tmp] : ( c=cident { tmp = unresolved_identifier{std::move(c)}; } | v=value { tmp = std::move(v); } | K_COUNT '(' countArgument ')' { tmp = make_count_rows_function_expression(); } - | K_WRITETIME '(' c=cident ')' { tmp = column_mutation_attribute{column_mutation_attribute::attribute_kind::writetime, - unresolved_identifier{std::move(c)}}; } - | K_TTL '(' c=cident ')' { tmp = column_mutation_attribute{column_mutation_attribute::attribute_kind::ttl, - unresolved_identifier{std::move(c)}}; } + | K_WRITETIME '(' a=subscriptExpr ')' { tmp = column_mutation_attribute{column_mutation_attribute::attribute_kind::writetime, + std::move(a)}; } + | K_TTL '(' a=subscriptExpr ')' { tmp = column_mutation_attribute{column_mutation_attribute::attribute_kind::ttl, + std::move(a)}; } | f=functionName args=selectionFunctionArgs { tmp = function_call{std::move(f), std::move(args)}; } | K_CAST '(' arg=unaliasedSelector K_AS t=native_type ')' { tmp = cast{.style = cast::cast_style::sql, .arg = std::move(arg), .type = std::move(t)}; } ) @@ -1794,7 +1794,9 @@ columnRefExpr returns [uexpression e] subscriptExpr returns [uexpression e] : col=columnRefExpr { e = std::move(col); } - ( '[' sub=term ']' { e = subscript{std::move(e), std::move(sub)}; } )? + ( '[' sub=term ']' { e = subscript{std::move(e), std::move(sub)}; } + | '.' fi=cident { e = field_selection{std::move(e), std::move(fi)}; } + )? ; singleColumnInValuesOrMarkerExpr returns [uexpression e] From bb63db34e518d3b592f8d8c2d04a114f72726cae Mon Sep 17 00:00:00 2001 From: Nadav Har'El Date: Sun, 12 Apr 2026 11:05:44 +0300 Subject: [PATCH 2/8] cql3: add extended wire format for per-element timestamps and TTLs Introduce the infrastructure needed to transport per-element timestamps and TTL expiry times from replicas to coordinators, required for WRITETIME(col[key]) / TTL(col[key]) and WRITETIME(col.field) / TTL(col.field). * Add a 'writetime_ttl_individual_element' cluster feature flag that guards usage of the new wire format during rolling upgrades: the extended format is only emitted and consumed when every node in the cluster supports it. * Implement serialize_for_cql_with_timestamps() (types/types.cc), a variant of serialize_for_cql() that appends a per-element section to the regular CQL bytes, listing each live element's serialized key, timestamp, and expiry. The format is: [uint32 cql_len][cql bytes] [int32 entry_count] [per entry: (int32 key_len)(key bytes)(int64 timestamp)(int64 expiry)] expiry is -1 when the element has no TTL. * Add partition_slice::option::send_collection_timestamps and modify write_cell() (mutation_partition.cc) to use the new function serialize_for_cql_with_timestamps() when this option is available. This commit stands alone with no user-visible effect: nothing yet sets the new partition-slice option. The next patch adds the selection-layer code that sets the option and parses the extended response. --- gms/feature_service.hh | 1 + mutation/collection_mutation.hh | 7 ++++++ mutation/mutation_partition.cc | 6 ++++- query/query-request.hh | 8 ++++++- types/types.cc | 40 +++++++++++++++++++++++++++++++++ 5 files changed, 60 insertions(+), 2 deletions(-) diff --git a/gms/feature_service.hh b/gms/feature_service.hh index 530dec6f4a..d7223258a0 100644 --- a/gms/feature_service.hh +++ b/gms/feature_service.hh @@ -179,6 +179,7 @@ public: gms::feature tablets_intermediate_fallback_cleanup { *this, "TABLETS_INTERMEDIATE_FALLBACK_CLEANUP"sv }; gms::feature batchlog_v2 { *this, "BATCHLOG_V2"sv }; gms::feature vnodes_to_tablets_migrations { *this, "VNODES_TO_TABLETS_MIGRATIONS"sv }; + gms::feature writetime_ttl_individual_element { *this, "WRITETIME_TTL_INDIVIDUAL_ELEMENT"sv }; public: const std::unordered_map>& registered_features() const; diff --git a/mutation/collection_mutation.hh b/mutation/collection_mutation.hh index 914a7d93e4..6c510e0728 100644 --- a/mutation/collection_mutation.hh +++ b/mutation/collection_mutation.hh @@ -133,6 +133,13 @@ collection_mutation difference(const abstract_type&, collection_mutation_view, c // Serializes the given collection of cells to a sequence of bytes ready to be sent over the CQL protocol. bytes_ostream serialize_for_cql(const abstract_type&, collection_mutation_view); +// Like serialize_for_cql, but uses an extended format that embeds per-element +// timestamps and expiries, for use with WRITETIME(col[key]) / TTL(col[key]) +// and WRITETIME(col.field) / TTL(col.field) selectors. +// The format is: [cql-bytes-length as uint32][regular CQL bytes][count as int32] +// [per-element: (key-len as int32)(key bytes)(timestamp as int64)(expiry as int64 in gc_clock ticks, -1 if none)] +bytes_ostream serialize_for_cql_with_timestamps(const abstract_type&, collection_mutation_view); + template <> struct fmt::formatter : fmt::formatter { auto format(const collection_mutation_view::printer&, fmt::format_context& ctx) const diff --git a/mutation/mutation_partition.cc b/mutation/mutation_partition.cc index 3abcbe0a90..f2cb88a754 100644 --- a/mutation/mutation_partition.cc +++ b/mutation/mutation_partition.cc @@ -692,9 +692,13 @@ void write_cell(RowWriter& w, const query::partition_slice& slice, data_type typ type = map_type_impl::get_instance(ctype.name_comparator(), ctype.value_comparator(), true); } + bytes_ostream serialized = ((type->is_map() || type->is_set() || type->is_user_type()) && type->is_multi_cell() + && slice.options.contains()) + ? serialize_for_cql_with_timestamps(*type, std::move(v)) + : serialize_for_cql(*type, std::move(v)); w.add().write().skip_timestamp() .skip_expiry() - .write_fragmented_value(serialize_for_cql(*type, std::move(v))) + .write_fragmented_value(std::move(serialized)) .skip_ttl() .end_qr_cell(); } diff --git a/query/query-request.hh b/query/query-request.hh index 13a659dd3e..d4ff05c75a 100644 --- a/query/query-request.hh +++ b/query/query-request.hh @@ -204,6 +204,11 @@ public: // is a lot of dead rows. This flag is needed during rolling upgrades to support // old coordinators which do not tolerate pages with no live rows. allow_mutation_read_page_without_live_row, + // When set, multi-cell collection cells and non-frozen UDT cells in + // the query result use an extended format that embeds per-element + // timestamps and expiries to support WRITETIME(col[key])/TTL(col[key]) + // and WRITETIME(col.field)/TTL(col.field) selectors. + send_collection_timestamps, }; using option_set = enum_set>; + option::allow_mutation_read_page_without_live_row, + option::send_collection_timestamps>>; clustering_row_ranges _row_ranges; public: column_id_vector static_columns; // TODO: consider using bitmap diff --git a/types/types.cc b/types/types.cc index 9e49b19711..0b4039cb72 100644 --- a/types/types.cc +++ b/types/types.cc @@ -3709,6 +3709,46 @@ bytes_ostream serialize_for_cql(const abstract_type& type, collection_mutation_v }); } +bytes_ostream serialize_for_cql_with_timestamps(const abstract_type& type, collection_mutation_view v) { + throwing_assert(type.is_multi_cell()); + return v.with_deserialized(type, [&] (collection_mutation_view_description mv) -> bytes_ostream { + // Step 1: produce regular CQL bytes (copy of mv is made inside serialize_for_cql_aux, mv is still valid after) + bytes_ostream cql = visit(type, make_visitor( + [&] (const map_type_impl& ctype) { return serialize_for_cql_aux(ctype, mv); }, + [&] (const set_type_impl& ctype) { return serialize_for_cql_aux(ctype, mv); }, + [&] (const user_type_impl& utype) { return serialize_for_cql_aux(utype, mv); }, + [&] (const abstract_type& o) -> bytes_ostream { + throw std::runtime_error(format("serialize_for_cql_with_timestamps: unsupported type {}", o.name())); + } + )); + + // Step 2: build extended format: + // [uint32: cql byte length][cql bytes] + // [int32: entry count][count entries: (int32 keylen)(key bytes)(int64 timestamp)(int64 expiry, -1 if no TTL)] + bytes_ostream out; + write_simple(out, uint32_t(cql.size())); + out.append(cql); + auto count_slot = out.write_place_holder(collection_size_len()); + int elements = 0; + for (auto&& e : mv.cells) { + if (e.second.is_live(mv.tomb, false)) { + bytes_view key = e.first; + write_simple(out, int32_t(key.size())); + out.write(key); + write_simple(out, e.second.timestamp()); + int64_t expiry_raw = -1; + if (e.second.is_live_and_has_ttl()) { + expiry_raw = e.second.expiry().time_since_epoch().count(); + } + write_simple(out, expiry_raw); + ++elements; + } + } + write_collection_size(count_slot, elements); + return out; + }); +} + bytes serialize_field_index(size_t idx) { if (idx >= size_t(std::numeric_limits::max())) { // should've been rejected earlier, but just to be sure... From 4ac63de06309ca9e3caaa67930904b417b1a8a30 Mon Sep 17 00:00:00 2001 From: Nadav Har'El Date: Sun, 12 Apr 2026 11:06:30 +0300 Subject: [PATCH 3/8] cql3: parse per-element timestamps/TTLs in the selection layer Wire up the selection and result-set infrastructure to consume the extended collection wire format introduced in the previous patch and expose per-element timestamps and TTLs to the expression evaluator. * Add collection_cell_metadata: maps from raw element-key bytes to timestamp and remaining TTL, one entry per collection or UDT cell. Add a corresponding collection_element_metadata span to evaluation_inputs so that evaluators can access it. * Add a flag _collect_collection_timestamps to selection (selection.hh/cc). When any selected expression contains a WRITETIME(col[key])/TTL(col[key]) or WRITETIME(col.field)/TTL(col.field) attribute, the flag is set and the send_collection_timestamps partition-slice option is enabled, causing storage nodes to use the extended wire format from the previous patch. * Implement result_set_builder::add_collection() (selection.cc): when _collect_collection_timestamps is set, parse the extended format, decode per-element timestamps and remaining TTLs (computed from the stored expiry time and the query time), and store them in _collection_element_metadata indexed by column position. When the flag is not set, the existing plain-bytes path is unchanged. After this patch, the new selection feature is still not available to the end-user because the prepare step still forbids it. The next patch will finally complete the expression preparation and evaluation. It will read the new collection_element_metadata and return the correct timestamp or TTL value. --- cql3/expr/collection_cell_metadata.hh | 21 +++++++++ cql3/expr/evaluate.hh | 2 + cql3/selection/selection.cc | 62 +++++++++++++++++++++++++-- cql3/selection/selection.hh | 7 ++- 4 files changed, 88 insertions(+), 4 deletions(-) create mode 100644 cql3/expr/collection_cell_metadata.hh diff --git a/cql3/expr/collection_cell_metadata.hh b/cql3/expr/collection_cell_metadata.hh new file mode 100644 index 0000000000..e551068bcc --- /dev/null +++ b/cql3/expr/collection_cell_metadata.hh @@ -0,0 +1,21 @@ +// Copyright (C) 2026-present ScyllaDB +// SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0 + +#pragma once + +#include + +#include "bytes.hh" +#include "mutation/timestamp.hh" + +namespace cql3::expr { + +// Per-element timestamps and TTLs for a cell of a map, set or UDT (populated +// when a WRITETIME() or TTL() of col[key] or col.field are in the query. +// Keys are the raw serialized keys or serialized field index. +struct collection_cell_metadata { + std::map timestamps; + std::map ttls; // remaining TTL in seconds (-1 if no TTL) +}; + +} // namespace cql3::expr diff --git a/cql3/expr/evaluate.hh b/cql3/expr/evaluate.hh index f39b42d7af..166bd6d045 100644 --- a/cql3/expr/evaluate.hh +++ b/cql3/expr/evaluate.hh @@ -3,6 +3,7 @@ #pragma once +#include "collection_cell_metadata.hh" #include "expression.hh" #include "bytes.hh" @@ -27,6 +28,7 @@ struct evaluation_inputs { std::span static_and_regular_timestamps; // indexes match `selection` member std::span static_and_regular_ttls; // indexes match `selection` member std::span temporaries; // indexes match temporary::index + std::span collection_element_metadata; // indexes match `selection` member }; // Takes a prepared expression and calculates its value. diff --git a/cql3/selection/selection.cc b/cql3/selection/selection.cc index 133539ce80..c35979f24a 100644 --- a/cql3/selection/selection.cc +++ b/cql3/selection/selection.cc @@ -17,6 +17,7 @@ #include "cql3/expr/expr-utils.hh" #include "cql3/functions/first_function.hh" #include "cql3/functions/aggregate_fcts.hh" +#include "types/types.hh" #include @@ -31,12 +32,14 @@ selection::selection(schema_ptr schema, std::vector> metadata_, bool collect_timestamps, bool collect_TTLs, + bool collect_collection_timestamps, trivial is_trivial) : _schema(std::move(schema)) , _columns(std::move(columns)) , _metadata(::make_shared(std::move(metadata_))) , _collect_timestamps(collect_timestamps) , _collect_TTLs(collect_TTLs) + , _collect_collection_timestamps(collect_collection_timestamps) , _contains_static_columns(std::any_of(_columns.begin(), _columns.end(), std::mem_fn(&column_definition::is_static))) , _is_trivial(is_trivial) { } @@ -46,6 +49,7 @@ query::partition_slice::option_set selection::get_query_options() { opts.set_if(_collect_timestamps); opts.set_if(_collect_TTLs); + opts.set_if(_collect_collection_timestamps); opts.set_if( std::any_of(_columns.begin(), _columns.end(), @@ -114,7 +118,7 @@ public: */ simple_selection(schema_ptr schema, std::vector columns, std::vector> metadata, bool is_wildcard) - : selection(schema, std::move(columns), std::move(metadata), false, false, trivial::yes) + : selection(schema, std::move(columns), std::move(metadata), false, false, false, trivial::yes) , _is_wildcard(is_wildcard) { } @@ -178,6 +182,12 @@ contains_column_mutation_attribute(expr::column_mutation_attribute::attribute_ki }); } +static bool contains_collection_mutation_attribute(const expr::expression& e) { + return expr::find_in_expression(e, [](const expr::column_mutation_attribute& cma) { + return expr::is(cma.column) || expr::is(cma.column); + }); +} + static bool contains_writetime(const expr::expression& e) { @@ -202,7 +212,8 @@ public: std::vector selectors) : selection(schema, std::move(columns), std::move(metadata), contains_writetime(expr::tuple_constructor{selectors}), - contains_ttl(expr::tuple_constructor{selectors})) + contains_ttl(expr::tuple_constructor{selectors}), + contains_collection_mutation_attribute(expr::tuple_constructor{selectors})) , _selectors(std::move(selectors)) { auto agg_split = expr::split_aggregation(_selectors); @@ -391,6 +402,7 @@ protected: .static_and_regular_timestamps = rs._timestamps, .static_and_regular_ttls = rs._ttls, .temporaries = {}, + .collection_element_metadata = rs._collection_element_metadata, }; for (auto&& e : _sel._selectors) { auto out = expr::evaluate(e, inputs); @@ -429,6 +441,7 @@ protected: .static_and_regular_timestamps = rs._timestamps, .static_and_regular_ttls = rs._ttls, .temporaries = _temporaries, + .collection_element_metadata = rs._collection_element_metadata, }; for (size_t i = 0; i != _sel._inner_loop.size(); ++i) { _temporaries[i] = expr::evaluate(_sel._inner_loop[i], inputs); @@ -553,6 +566,9 @@ result_set_builder::result_set_builder(const selection& s, gc_clock::time_point if (s._collect_TTLs) { _ttls.resize(s._columns.size(), 0); } + if (s._collect_collection_timestamps) { + _collection_element_metadata.resize(s._columns.size()); + } } void result_set_builder::add_empty() { @@ -563,6 +579,9 @@ void result_set_builder::add_empty() { if (!_ttls.empty()) { _ttls[current.size() - 1] = -1; } + if (!_collection_element_metadata.empty()) { + _collection_element_metadata[current.size() - 1] = {}; + } } void result_set_builder::add(bytes_opt value) { @@ -585,8 +604,45 @@ void result_set_builder::add(const column_definition& def, const query::result_a } void result_set_builder::add_collection(const column_definition& def, bytes_view c) { + size_t col_idx = current.size(); + if (!_collection_element_metadata.empty()) { + // Extended format produced by serialize_for_cql_with_timestamps() + // [uint32 cql_len][cql bytes][int32 entry_count] + // followed by entry_count entries, each: + // [int32 key_len][key bytes][int64 timestamp][int64 expiry_raw] + // where expiry_raw is -1 if the element does not expire, otherwise + // it is the serialized gc_clock time used to derive the remaining + // TTL. The flag _collect_collection_timestamps = true determines + // whether this extended format is used (instead of a plain CQL + // collection blob), and it is only enabled when a feature flag + // guarantees both reader and writer support it. + uint32_t cql_len = read_simple(c); + bytes_view cql_bytes = read_simple_bytes(c, cql_len); + current.emplace_back(to_bytes(cql_bytes)); + + auto& meta = _collection_element_metadata[col_idx]; + meta = {}; // clear stale data from previous row + int32_t entry_count = read_simple(c); + for (int32_t i = 0; i < entry_count; ++i) { + int32_t key_len = read_simple(c); + bytes key = to_bytes(read_simple_bytes(c, key_len)); + int64_t ts = read_simple(c); + int64_t expiry_raw = read_simple(c); + meta.timestamps[key] = ts; + if (expiry_raw != -1) { + auto expiry = gc_clock::time_point(gc_clock::duration(expiry_raw)); + auto ttl_left = expiry - _now; + int32_t ttl = int32_t(ttl_left.count()); + if (ttl > 0) { + meta.ttls[key] = ttl; + } + // otherwise, expired or no TTL; We can omit this key from + // map - missing key is treated as null by the evaluator. + } + } + return; + } current.emplace_back(to_bytes(c)); - // timestamps, ttls meaningless for collections } void result_set_builder::update_last_group() { diff --git a/cql3/selection/selection.hh b/cql3/selection/selection.hh index 2d68e44c27..28980be6a6 100644 --- a/cql3/selection/selection.hh +++ b/cql3/selection/selection.hh @@ -12,6 +12,7 @@ #include "utils/assert.hh" #include "bytes.hh" +#include "cql3/expr/collection_cell_metadata.hh" #include "schema/schema_fwd.hh" #include "query/query-result-reader.hh" #include "selector.hh" @@ -69,6 +70,7 @@ private: ::shared_ptr _metadata; const bool _collect_timestamps; const bool _collect_TTLs; + const bool _collect_collection_timestamps; const bool _contains_static_columns; bool _is_trivial; protected: @@ -78,7 +80,9 @@ protected: std::vector columns, std::vector> metadata_, bool collect_timestamps, - bool collect_TTLs, trivial is_trivial = trivial::no); + bool collect_TTLs, + bool collect_collection_timestamps, + trivial is_trivial = trivial::no); virtual ~selection() {} public: @@ -197,6 +201,7 @@ public: std::vector current_clustering_key; std::vector _timestamps; std::vector _ttls; + std::vector _collection_element_metadata; const query_options* _options; private: const gc_clock::time_point _now; From 35e807a36ceff76c529626058383a3fe71f7e3fd Mon Sep 17 00:00:00 2001 From: Nadav Har'El Date: Sun, 12 Apr 2026 11:06:59 +0300 Subject: [PATCH 4/8] cql3: prepare and evaluate WRITETIME/TTL on collection elements and UDT fields Complete the implementation of SELECT WRITETIME(col[key])/TTL(col[key]) and WRITETIME(col.field)/TTL(col.field), building on the grammar (commit 1), wire format (commit 2), and selection-layer (commit 3) changes in the preceding patches. * prepare_column_mutation_attribute() (prepare_expr.cc) now handles the subscript and field_selection nodes that the grammar produces: - For subscripts, it validates that the inner column is a non-frozen map or set and checks the 'writetime_ttl_individual_element' feature flag so the feature is rejected during rolling upgrades. - For field selections, it validates that the inner column is a non-frozen UDT, with the same feature-flag check. * do_evaluate(column_mutation_attribute) (expression.cc) handles the same two cases. For a field selection it serializes the field index as a key and looks it up in collection_element_metadata; for a subscript it evaluates the subscript key and looks it up in the same map. A missing key (element not found or expired) returns NULL, matching Cassandra behavior. Together with the preceding three patches, this finally fixes #15427. The next three patches will add tests and documentation for the new feature, and the final eighth patch will fix the implementation of UDT fields in LWT expressions - which the first patch made the grammar allow but is still not implemented correctly. --- cql3/expr/expression.cc | 52 +++++++++++++++++++++++++++++++++++++++ cql3/expr/prepare_expr.cc | 34 +++++++++++++++++++++++++ 2 files changed, 86 insertions(+) diff --git a/cql3/expr/expression.cc b/cql3/expr/expression.cc index fe3e1105c9..7c7d5260d1 100644 --- a/cql3/expr/expression.cc +++ b/cql3/expr/expression.cc @@ -1206,6 +1206,58 @@ cql3::raw_value do_evaluate(const field_selection& field_select, const evaluatio static cql3::raw_value do_evaluate(const column_mutation_attribute& cma, const evaluation_inputs& inputs) { + // Helper for WRITETIME/TTL on a collection element or UDT field: given the + // inner column and the serialized element key, validate the index and look + // up the per-element timestamp or TTL in collection_element_metadata. + auto lookup_element_attribute = [&](const column_value* inner_col, std::string_view context, bytes key) -> cql3::raw_value { + int32_t index = inputs.selection->index_of(*inner_col->col); + if (inputs.collection_element_metadata.empty() || index < 0 || size_t(index) >= inputs.collection_element_metadata.size()) { + on_internal_error(expr_logger, fmt::format("evaluating column_mutation_attribute {}: column {} is not in selection", + context, inner_col->col->name_as_text())); + } + const auto& meta = inputs.collection_element_metadata[index]; + switch (cma.kind) { + case column_mutation_attribute::attribute_kind::writetime: { + const auto it = meta.timestamps.find(key); + if (it == meta.timestamps.end()) { + return cql3::raw_value::make_null(); + } + return raw_value::make_value(data_value(it->second).serialize()); + } + case column_mutation_attribute::attribute_kind::ttl: { + const auto it = meta.ttls.find(key); + // The test it->second <= 0 (rather than < 0) matches the + // single-TTL check ttl_v <= 0 below. + if (it == meta.ttls.end() || it->second <= 0) { + return cql3::raw_value::make_null(); + } + return raw_value::make_value(data_value(it->second).serialize()); + } + } + on_internal_error(expr_logger, fmt::format("evaluating column_mutation_attribute {} with unexpected kind", context)); + }; + // Handle WRITETIME(x.field) / TTL(x.field) on a UDT field + if (auto fs = expr::as_if(&cma.column)) { + auto inner_col = expr::as_if(&fs->structure); + if (!inner_col) { + on_internal_error(expr_logger, fmt::format("evaluating column_mutation_attribute field_selection: inner expression is not a column: {}", fs->structure)); + } + return lookup_element_attribute(inner_col, "field_selection", serialize_field_index(fs->field_idx)); + } + // Handle WRITETIME(m[key]) / TTL(m[key]) on a map element + if (auto sub = expr::as_if(&cma.column)) { + auto inner_col = expr::as_if(&sub->val); + if (!inner_col) { + on_internal_error(expr_logger, fmt::format("evaluating column_mutation_attribute subscript: inner expression is not a column: {}", sub->val)); + } + auto evaluated_key = evaluate(sub->sub, inputs); + if (evaluated_key.is_null()) { + return cql3::raw_value::make_null(); + } + return evaluated_key.view().with_linearized([&] (bytes_view key_bv) { + return lookup_element_attribute(inner_col, "subscript", bytes(key_bv)); + }); + } auto col = expr::as_if(&cma.column); if (!col) { on_internal_error(expr_logger, fmt::format("evaluating column_mutation_attribute of non-column {}", cma.column)); diff --git a/cql3/expr/prepare_expr.cc b/cql3/expr/prepare_expr.cc index c743ea3172..8b057bef01 100644 --- a/cql3/expr/prepare_expr.cc +++ b/cql3/expr/prepare_expr.cc @@ -1259,6 +1259,40 @@ prepare_column_mutation_attribute( receiver->type->name(), receiver->name->text())); } auto column = prepare_expression(cma.column, db, keyspace, schema_opt, nullptr); + // Helper for the subscript and field-selection cases below: validates that + // inner_expr is a column, not a primary key column, that its type satisfies + // type_allowed, and that the cluster feature flag is on. + auto validate_and_return = + [&](const expression& inner_expr, std::string_view context, + auto type_allowed, std::string_view type_allowed_str) -> std::optional { + auto inner_cval = expr::as_if(&inner_expr); + if (!inner_cval) { + throw exceptions::invalid_request_exception(fmt::format("{} on a {} expects a column, got {}", cma.kind, context, inner_expr)); + } + if (inner_cval->col->is_primary_key()) { + throw exceptions::invalid_request_exception(fmt::format("{} is not legal on primary key component {}", cma.kind, inner_cval->col->name_as_text())); + } + if (!type_allowed(inner_cval->col->type)) { + throw exceptions::invalid_request_exception(fmt::format("{} on a {} is only valid for {}", cma.kind, context, type_allowed_str)); + } + if (!db.features().writetime_ttl_individual_element) { + throw exceptions::invalid_request_exception(fmt::format( + "{} on a {} is not supported until all nodes in the cluster are upgraded", cma.kind, context)); + } + return column_mutation_attribute{.kind = cma.kind, .column = std::move(column)}; + }; + // Handle WRITETIME(m[key]) / TTL(m[key]) - a subscript into a non-frozen map or set column + if (auto sub = expr::as_if(&column)) { + return validate_and_return(sub->val, "subscript", + [](const data_type& t) { return (t->is_map() || t->is_set()) && t->is_multi_cell(); }, + "non-frozen map or set columns"); + } + // Handle WRITETIME(x.field) / TTL(x.field) - a field selection into a non-frozen UDT column + if (auto fs = expr::as_if(&column)) { + return validate_and_return(fs->structure, "field selection", + [](const data_type& t) { return t->is_user_type() && t->is_multi_cell(); }, + "non-frozen UDT columns"); + } auto cval = expr::as_if(&column); if (!cval) { throw exceptions::invalid_request_exception(fmt::format("{} expects a column, but {} is a general expression", cma.kind, column)); From ccb94618ccbe4696eadd46bc1673d0c20b37d638 Mon Sep 17 00:00:00 2001 From: Nadav Har'El Date: Thu, 19 Mar 2026 19:49:33 +0200 Subject: [PATCH 5/8] test/cqlpy: test WRITETIME() and TTL() on element of map, set or UDT This patch adds many tests verifying the behavior of WRITETIME() and TTL() on individual elements of maps, sets and UDTs, serving as a regression test for issue #15427. We also add tests verifying our understanding of related issues like WRITETIME() and TTL() of entire collections and of individual elements of *frozen* collections. All new tests pass on Cassandra 5.0, helping to verify that our implementation is compatible with Cassandra. They also pass on ScyllaDB after the previous patch (most didn't before that patch). Signed-off-by: Nadav Har'El --- test/cqlpy/test_select_collection_element.py | 271 ++++++++++++++++++- test/cqlpy/test_type_tuple.py | 60 ++++ test/cqlpy/test_udt.py | 150 ++++++++++ 3 files changed, 472 insertions(+), 9 deletions(-) create mode 100644 test/cqlpy/test_type_tuple.py create mode 100644 test/cqlpy/test_udt.py diff --git a/test/cqlpy/test_select_collection_element.py b/test/cqlpy/test_select_collection_element.py index ef56660bc3..9cb5f573c1 100644 --- a/test/cqlpy/test_select_collection_element.py +++ b/test/cqlpy/test_select_collection_element.py @@ -6,6 +6,7 @@ # Tests for SELECT of a specific key in a collection column ############################################################################# +import time import pytest from cassandra.protocol import InvalidRequest from .util import unique_name, unique_key_int, new_test_table, new_type, new_function @@ -18,6 +19,13 @@ def table1(cql, test_keyspace): yield table cql.execute("DROP TABLE " + table) +@pytest.fixture(scope="module") +def table2(cql, test_keyspace): + table = test_keyspace + "." + unique_name() + cql.execute(f"CREATE TABLE {table} (p int PRIMARY KEY, s set)") + yield table + cql.execute("DROP TABLE " + table) + def test_basic_int_key_selection(cql, table1): p = unique_key_int() cql.execute(f"INSERT INTO {table1}(p,m) VALUES ({p}, " + "{1:10,2:20})") @@ -136,15 +144,13 @@ def test_list_subscript(scylla_only, cql, test_keyspace): assert list(cql.execute(f"SELECT l[2] FROM {table} WHERE p={p}")) == [(None,)] assert list(cql.execute(f"SELECT l[10] FROM {table} WHERE p={p}")) == [(None,)] -def test_set_subscript(cql, test_keyspace): - schema = 'p int PRIMARY KEY, s set' - with new_test_table(cql, test_keyspace, schema) as table: - p = unique_key_int() - cql.execute(f"INSERT INTO {table}(p,s) VALUES ({p}, " + "{10,20})") - assert list(cql.execute(f"SELECT s[0] FROM {table} WHERE p={p}")) == [(None,)] - assert list(cql.execute(f"SELECT s[10] FROM {table} WHERE p={p}")) == [(10,)] - assert list(cql.execute(f"SELECT s[11] FROM {table} WHERE p={p}")) == [(None,)] - assert list(cql.execute(f"SELECT s[20] FROM {table} WHERE p={p}")) == [(20,)] +def test_set_subscript(cql, table2): + p = unique_key_int() + cql.execute(f"INSERT INTO {table2}(p,s) VALUES ({p}, " + "{10,20})") + assert list(cql.execute(f"SELECT s[0] FROM {table2} WHERE p={p}")) == [(None,)] + assert list(cql.execute(f"SELECT s[10] FROM {table2} WHERE p={p}")) == [(10,)] + assert list(cql.execute(f"SELECT s[11] FROM {table2} WHERE p={p}")) == [(None,)] + assert list(cql.execute(f"SELECT s[20] FROM {table2} WHERE p={p}")) == [(20,)] # scylla only because cassandra doesn't support lua language @pytest.mark.xfail(reason="#22075") @@ -154,3 +160,250 @@ def test_subscript_function_arg(scylla_only, cql, test_keyspace, table1): p = unique_key_int() cql.execute(f"INSERT INTO {table1}(p,m) VALUES ({p}, " + "{1:10,2:20})") assert list(cql.execute(f"SELECT add_one(m[1]) FROM {table1} WHERE p={p}")) == [(11,)] + +# Test selecting the WRITETIME() of a specific key in a map column. +# Cassandra added support for this in Cassandra 5.0 (CASSANDRA-8877). +# Reproduces issue #15427. +def test_writetime_map_element(cql, table1): + p = unique_key_int() + # Generate two reasonable but different timestamps + timestamp = int(time.time() * 1000000) + timestamp1 = timestamp - 1234 + timestamp2 = timestamp - 1217 # newer than timestamp1 + # Insert two elements with INSERT and check their timestamps + cql.execute(f'INSERT INTO {table1}(p,m) VALUES ({p}, {{1:10,2:20}}) USING TIMESTAMP {timestamp1}') + assert list(cql.execute(f"SELECT WRITETIME(m[1]) FROM {table1} WHERE p={p}")) == [(timestamp1,)] + assert list(cql.execute(f"SELECT WRITETIME(m[2]) FROM {table1} WHERE p={p}")) == [(timestamp1,)] + # Replace one element and add another with UPDATE, using a newer timestamp + # and check the WRITETIME. + cql.execute(f'UPDATE {table1} USING TIMESTAMP {timestamp2} SET m = m + {{2:30}} WHERE p={p}') + cql.execute(f'UPDATE {table1} USING TIMESTAMP {timestamp2} SET m = m + {{3:30}} WHERE p={p}') + assert list(cql.execute(f"SELECT WRITETIME(m[1]) FROM {table1} WHERE p={p}")) == [(timestamp1,)] + assert list(cql.execute(f"SELECT WRITETIME(m[2]) FROM {table1} WHERE p={p}")) == [(timestamp2,)] + assert list(cql.execute(f"SELECT WRITETIME(m[3]) FROM {table1} WHERE p={p}")) == [(timestamp2,)] + +# Additional tests for WRITETIME() of a specific key in a map, with missing +# items, missing maps, and missing keys, to check that it returns null in +# those cases instead of erroring. +# Reproduces issue #15427. +def test_writetime_map_element_nulls(cql, table1): + # Missing item (row doesn't exist): WRITETIME(m[key]) returns no row. + p_missing = unique_key_int() + assert list(cql.execute(f"SELECT WRITETIME(m[1]) FROM {table1} WHERE p={p_missing}")) == [] + # Existing row but map column is null (never written): returns null + # timestamp (looks like "None" in Python). + p_null_map = unique_key_int() + cql.execute(f"INSERT INTO {table1}(p) VALUES ({p_null_map})") + assert list(cql.execute(f"SELECT WRITETIME(m[1]) FROM {table1} WHERE p={p_null_map}")) == [(None,)] + # Existing row with a map, but the requested key is absent: returns null + # timestamp. + p = unique_key_int() + timestamp = int(time.time() * 1000000) - 1234 + cql.execute(f'INSERT INTO {table1}(p,m) VALUES ({p}, {{1:10}}) USING TIMESTAMP {timestamp}') + assert list(cql.execute(f"SELECT WRITETIME(m[1]) FROM {table1} WHERE p={p}")) == [(timestamp,)] + assert list(cql.execute(f"SELECT WRITETIME(m[99]) FROM {table1} WHERE p={p}")) == [(None,)] + +# Also, null as a subscript is allowed and returns null timestamp, instead of +# erroring. This test is marked cassandra_bug because Cassandra returns an +# ugly "NoHostAvailable ... java.lang.NullPointerException" error in this case +# (see CASSANDRA-21248). +def test_writetime_map_element_null_subscript(cql, table1, cassandra_bug): + p = unique_key_int() + cql.execute(f'INSERT INTO {table1}(p,m) VALUES ({p}, {{1:10}})') + assert list(cql.execute(f"SELECT WRITETIME(m[null]) FROM {table1} WHERE p={p}")) == [(None,)] + +# Test selecting the TTL() of a specific key in a map column. +# Cassandra added support for this in Cassandra 5.0 (CASSANDRA-8877). +# Reproduces issue #15427. +def test_ttl_map_element(cql, table1): + p = unique_key_int() + ttl1 = 3600 # 1 hour + ttl2 = 7200 # 2 hours, different from ttl1 + # Insert two elements with INSERT and check their TTLs + cql.execute(f'INSERT INTO {table1}(p,m) VALUES ({p}, {{1:10,2:20}}) USING TTL {ttl1}') + # Because of the time taken to execute the statements and reading the + # TTL() reads the remaining TTL, we can't check for an exact TTL value, + # but we can check that it's less than or equal to the TTL we set and + # greater than TTL minus some small amount of time. + [(t,)] = cql.execute(f"SELECT TTL(m[1]) FROM {table1} WHERE p={p}") + assert t is not None and t <= ttl1 and t > ttl1 - 60 + [(t,)] = cql.execute(f"SELECT TTL(m[2]) FROM {table1} WHERE p={p}") + assert t is not None and t <= ttl1 and t > ttl1 - 60 + # Replace one element and add another with UPDATE, using a different TTL + # and check the TTLs. + cql.execute(f'UPDATE {table1} USING TTL {ttl2} SET m = m + {{2:30}} WHERE p={p}') + cql.execute(f'UPDATE {table1} USING TTL {ttl2} SET m = m + {{3:30}} WHERE p={p}') + [(t,)] = cql.execute(f"SELECT TTL(m[1]) FROM {table1} WHERE p={p}") + assert t is not None and t <= ttl1 and t > ttl1 - 60 + [(t,)] = cql.execute(f"SELECT TTL(m[2]) FROM {table1} WHERE p={p}") + assert t is not None and t <= ttl2 and t > ttl2 - 60 + [(t,)] = cql.execute(f"SELECT TTL(m[3]) FROM {table1} WHERE p={p}") + assert t is not None and t <= ttl2 and t > ttl2 - 60 + +# Additional tests for TTL() of a specific key in a map. Just like in the +# similar test for WRITETIME() above, also for TTL we can have missing +# items, missing maps, and missing keys. For for TTL() there is also the +# case where the TTL itself is missing for some cells but not others. +def test_ttl_map_element_nulls(cql, table1): + # Missing item (row doesn't exist): TTL(m[key]) returns no row. + p_missing = unique_key_int() + assert list(cql.execute(f"SELECT TTL(m[1]) FROM {table1} WHERE p={p_missing}")) == [] + # Existing row but map column is null (never written): returns null + # TTL (looks like "None" in Python). + p = unique_key_int() + cql.execute(f"INSERT INTO {table1}(p) VALUES ({p})") + assert list(cql.execute(f"SELECT TTL(m[1]) FROM {table1} WHERE p={p}")) == [(None,)] + # Existing row with a map, but the requested key is absent: returns null + # TTL. + p = unique_key_int() + ttl1 = 1000 + cql.execute(f'INSERT INTO {table1}(p,m) VALUES ({p}, {{1:10}}) USING TTL {ttl1}') + assert list(cql.execute(f"SELECT TTL(m[2]) FROM {table1} WHERE p={p}")) == [(None,)] + # Row with some cells having TTL, others not. + cql.execute(f'UPDATE {table1} SET m = m + {{2:20}} WHERE p={p}') + [(t,)] = cql.execute(f'SELECT TTL(m[1]) FROM {table1} WHERE p={p}') + assert t is not None and t <= ttl1 and t > ttl1 - 60 + [(t,)] = cql.execute(f'SELECT TTL(m[2]) FROM {table1} WHERE p={p}') + assert t is None + +# Also, null as a subscript is allowed and returns null ttl, instead of +# erroring. This test is marked cassandra_bug because Cassandra returns an +# ugly "NoHostAvailable ... java.lang.NullPointerException" error in this case +# (see CASSANDRA-21248). +def test_ttl_map_element_null_subscript(cql, table1, cassandra_bug): + p = unique_key_int() + cql.execute(f'INSERT INTO {table1}(p,m) VALUES ({p}, {{1:10}})') + assert list(cql.execute(f"SELECT TTL(m[null]) FROM {table1} WHERE p={p}")) == [(None,)] + +# Test that WRITETIME() and TTL() cannot be used on an entire unfrozen +# collection column. Because an unfrozen collection is stored as multiple +# cells that may each have different timestamps and TTLs, there is no single +# WRITETIME or TTL to return for the whole collection. +# It appears that Cassandra has a bug here: It allows selecting WRITETIME() +# of an entire unfrozen map, but returns an array of timestamps, where you +# can't even tell which timestamp belongs to which element. So we'll mark +# this test cassandra_bug - see CASSANDRA-21240. +def test_writetime_ttl_whole_collection_forbidden(cql, table1, cassandra_bug): + p = unique_key_int() + timestamp = int(time.time() * 1000000) - 1234 # a reasonable timestamp + cql.execute(f'INSERT INTO {table1}(p,m) VALUES ({p}, {{1:10,2:20}}) USING TIMESTAMP {timestamp}') + with pytest.raises(InvalidRequest): + cql.execute(f"SELECT WRITETIME(m) FROM {table1} WHERE p={p}") + with pytest.raises(InvalidRequest): + cql.execute(f"SELECT TTL(m) FROM {table1} WHERE p={p}") + +# Test selecting the WRITETIME() of a specific element in a set column. +# Like non-frozen maps, non-frozen sets store each element as a separate cell +# with its own write timestamp, so WRITETIME(s[key]) should return the +# timestamp of the last write to that element. +# The syntax writetime(s[key]) derives from the syntax s[key] for checking +# the presence of a key in a set - tested in test_set_subscript() above. +def test_writetime_set_element(cql, table2): + p = unique_key_int() + timestamp = int(time.time() * 1000000) + timestamp1 = timestamp - 1234 + timestamp2 = timestamp - 1217 # newer than timestamp1 + # Insert two elements with INSERT and check their timestamps. + cql.execute(f'INSERT INTO {table2}(p,s) VALUES ({p}, {{10,20}}) USING TIMESTAMP {timestamp1}') + assert list(cql.execute(f"SELECT WRITETIME(s[10]) FROM {table2} WHERE p={p}")) == [(timestamp1,)] + assert list(cql.execute(f"SELECT WRITETIME(s[20]) FROM {table2} WHERE p={p}")) == [(timestamp1,)] + # An element not present in the set returns null. + assert list(cql.execute(f"SELECT WRITETIME(s[99]) FROM {table2} WHERE p={p}")) == [(None,)] + # Add a new element with UPDATE using a newer timestamp and check that + # the existing elements keep their old timestamp while the new one has + # the newer timestamp. + cql.execute(f'UPDATE {table2} USING TIMESTAMP {timestamp2} SET s = s + {{30}} WHERE p={p}') + assert list(cql.execute(f"SELECT WRITETIME(s[10]) FROM {table2} WHERE p={p}")) == [(timestamp1,)] + assert list(cql.execute(f"SELECT WRITETIME(s[20]) FROM {table2} WHERE p={p}")) == [(timestamp1,)] + assert list(cql.execute(f"SELECT WRITETIME(s[30]) FROM {table2} WHERE p={p}")) == [(timestamp2,)] + +# Test that WRITETIME(col[key]) fails with a clear error when col is not a +# map/set. +def test_writetime_subscript_on_wrong_type(cql, test_keyspace): + schema = 'p int PRIMARY KEY, v int' + with new_test_table(cql, test_keyspace, schema) as table: + # Subscript on a non-map/set column should be rejected. + # Cassandra says "Invalid element selection: v is of type int is not a + # collection". Scylla says "Column v is not a map/set/list, cannot be + # subscripted" + with pytest.raises(InvalidRequest, match=' v '): + cql.execute(f"SELECT WRITETIME(v[3]) FROM {table}") + +# Test that TTL(col[key]) fails with a clear error when col is not a +# map/set. +def test_ttl_subscript_on_wrong_type(cql, test_keyspace): + schema = 'p int PRIMARY KEY, v int' + with new_test_table(cql, test_keyspace, schema) as table: + # Subscript on a non-map/set column should be rejected. + # Cassandra says "Invalid element selection: v is of type int is not a + # collection". Scylla says "Column v is not a map/set/list, cannot be + # subscripted" + with pytest.raises(InvalidRequest, match=' v '): + cql.execute(f"SELECT TTL(v[3]) FROM {table}") + +# Test that WRITETIME(col[key]) and TTL(col[key]) are not allowed for list +# elements. In theory, this could be allowed - lists really do have individual +# timestamps and ttls for each elements, just like maps and sets. But Cassandra +# doesn't allow it, so we don't allow it either. Note that list[index] is an +# inefficient way to access a list element, so it shouldn't be encouraged. +def test_writetime_ttl_list_element_forbidden(cql, test_keyspace): + schema = 'p int PRIMARY KEY, l list' + with new_test_table(cql, test_keyspace, schema) as table: + # Scylla an Cassandra have different error messages: Cassandra says + # "Element selection is only allowed on sets and maps, but l is a list" + # Scylla says "WRITETIME on a subscript is only valid for non-frozen + # map or set columns" + with pytest.raises(InvalidRequest, match='Element|subscript'): + cql.execute(f"SELECT WRITETIME(l[0]) FROM {table}") + with pytest.raises(InvalidRequest, match='Element|subscript'): + cql.execute(f"SELECT TTL(l[0]) FROM {table}") + +# In a *frozen* map, the entire map is stored as a single atomic cell and has +# a single timestamp and TTL, so WRITETIME(col[key]) and TTL(col[key]) are +# not useful. Moreover we noted in test_writetime_field_on_frozen_udt, +# that Cassandra does *not* allow them for fields frozen tuples, so it +# is arguably wrong to allow them for fields of frozen UDTs - and also in +# members of frozen maps. +# Nevertheless, Cassandra does allow WRITETIME() and TTL() on individual +# members of frozen maps, and returns the timestamp and TTL of the whole map. +# Because of this difference from Cassandra, we mark this test as xfail. +# In the future we can consider if Cassandra's support for member timestamps +# in frozen maps is a mistake, and replace the xfail by cassandra_bug. +@pytest.mark.xfail(reason="Cassandra allows WRITETIME on members of frozen maps, but Scylla does not") +def test_writetime_frozen_map_elements(cql, test_keyspace): + schema = f'p int PRIMARY KEY, x frozen>' + with new_test_table(cql, test_keyspace, schema) as table: + p = unique_key_int() + timestamp = int(time.time() * 1000000) - 1234 + cql.execute(f'INSERT INTO {table}(p, x) VALUES ({p}, {{1: 10}}) USING TIMESTAMP {timestamp}') + # Scylla's error message: "WRITETIME on a subscript is only valid for + # non-frozen map or set columns". Cassandra allows this. + assert list(cql.execute(f'SELECT WRITETIME(x[1]) FROM {table} WHERE p={p}')) == [(timestamp,)] + +# This is a variant of the previous test (test_writetime_frozen_map_elements) +# with the frozen map also being a primary key column (which is allowed for +# a *frozen* map). Primary key columns don't have a timestamp or TLL at +# all, so this case should be rejected even if we generally allow WRITETIME +# or TTL on frozen map members. +def test_writetime_frozen_map_elements_pk(cql, test_keyspace): + schema = 'pk frozen> PRIMARY KEY' + with new_test_table(cql, test_keyspace, schema) as table: + # Scylla's error message is "WRITETIME is not legal on primary key + # component pk". Cassandra's is "Cannot use selection function + # writetime on PRIMARY KEY part pk". + with pytest.raises(InvalidRequest, match='primary key|PRIMARY KEY'): + cql.execute(f"SELECT WRITETIME(pk[1]) FROM {table}") + with pytest.raises(InvalidRequest, match='primary key|PRIMARY KEY'): + cql.execute(f"SELECT TTL(pk[1]) FROM {table}") + +# Test that we can select m, m[key] and writetime(m[key]) in the same query. +# This requires the implementation to be able to handle a mix of an original +# copy of m and also the writetime information. +def test_writetime_mixed_map_and_map_element(cql, table1): + p = unique_key_int() + timestamp = int(time.time() * 1000000) + timestamp1 = timestamp - 1234 + timestamp2 = timestamp - 1217 + cql.execute(f'UPDATE {table1} USING TIMESTAMP {timestamp1} SET m = m + {{1:10}} WHERE p={p}') + cql.execute(f'UPDATE {table1} USING TIMESTAMP {timestamp2} SET m = m + {{2:20}} WHERE p={p}') + assert list(cql.execute(f"SELECT m, m[1], m[2], WRITETIME(m[2]), WRITETIME(m[1]) FROM {table1} WHERE p={p}")) == [({1: 10, 2: 20}, 10, 20, timestamp2, timestamp1)] diff --git a/test/cqlpy/test_type_tuple.py b/test/cqlpy/test_type_tuple.py new file mode 100644 index 0000000000..1f4b3a2c36 --- /dev/null +++ b/test/cqlpy/test_type_tuple.py @@ -0,0 +1,60 @@ +# Copyright 2026-present ScyllaDB +# +# SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0 + +############################################################################# +# Tests involving the "tuple" column type. +# +# There are additional tests involving tuples in the context of other features +# (describe, aggregates, filtering, json, etc.) in other files. We also have +# many tests for tuples ported from Cassandra in cassandra_tests/. Here we only +# have a few tests that didn't fit elsewhere. +############################################################################# + +import time +import pytest +from cassandra.protocol import InvalidRequest +from .util import new_test_table, unique_key_int + +@pytest.fixture(scope="module") +def table1(cql, test_keyspace): + with new_test_table(cql, test_keyspace, 'p int PRIMARY KEY, t tuple') as table: + yield table + +# Unlike UDTs, tuples are always frozen, meaning that individual fields cannot +# be updated separately. So there is no point in allowing writetime() or ttl() +# on individual fields of a tuple - so we do reject them, and we'll test this +# here. Additionally, we check that writetime() and ttl() on the entire tuple +# column is allowed. +def test_writetime_on_tuple(cql, table1): + # Subscript on a tuple is not valid for WRITETIME, since tuple is not + # a map or set. + with pytest.raises(InvalidRequest, match=' t '): + cql.execute(f"SELECT WRITETIME(t[0]) FROM {table1}") + # Field selection on a tuple is also not valid: tuples are not user + # types, so the field_selection preparation itself rejects it. + with pytest.raises(InvalidRequest, match='user type'): + cql.execute(f"SELECT WRITETIME(t.a) FROM {table1}") + # But WRITETIME() on the entire tuple column is allowed: it returns + # the single timestamp of the whole (frozen) tuple cell. + p = unique_key_int() + timestamp = int(time.time() * 1000000) - 1234 + cql.execute(f"INSERT INTO {table1}(p, t) VALUES ({p}, (1, 2)) USING TIMESTAMP {timestamp}") + assert list(cql.execute(f"SELECT WRITETIME(t) FROM {table1} WHERE p={p}")) == [(timestamp,)] + +def test_ttl_on_tuple(cql, table1): + # Subscript on a tuple is not valid for TTL, since tuple is not + # a map or set. + with pytest.raises(InvalidRequest, match=' t '): + cql.execute(f"SELECT TTL(t[0]) FROM {table1}") + # Field selection on a tuple is also not valid: tuples are not user + # types, so the field_selection preparation itself rejects it. + with pytest.raises(InvalidRequest, match='user type'): + cql.execute(f"SELECT TTL(t.a) FROM {table1}") + # But TTL() on the entire tuple column is allowed: it returns + # the single timestamp of the whole (frozen) tuple cell. + p = unique_key_int() + cql.execute(f"INSERT INTO {table1}(p, t) VALUES ({p}, (1, 2)) USING TTL 1000") + ret = list(cql.execute(f"SELECT TTL(t) FROM {table1} WHERE p={p}")) + # TTL() returns the remaining TTL, which may be slightly less than 1000 by the time we read it, so we check that it's between 900 and 1000. + assert len(ret) == 1 and len(ret[0]) == 1 and 900 <= ret[0][0] <= 1000 diff --git a/test/cqlpy/test_udt.py b/test/cqlpy/test_udt.py new file mode 100644 index 0000000000..1255a4d1a0 --- /dev/null +++ b/test/cqlpy/test_udt.py @@ -0,0 +1,150 @@ +# Copyright 2026-present ScyllaDB +# +# SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0 + +############################################################################# +# Various tests for User-Defined Type (UDT) support in Scylla. +# +# There are additional tests involving UDTs in the context of other features +# (describe, aggregates, filtering, json, etc.) in other files. We also have +# many tests for UDTs ported from Cassandra in cassandra_tests/. Here we only +# have a few tests that didn't fit elsewhere. +############################################################################# + +import time +import pytest +from cassandra.protocol import InvalidRequest +from .util import new_test_table, unique_key_int, new_type + +# In test_select_collection_element.py we test that we can select +# WRITETIME(col[key]) and TTL(col[key]) for an element in a map and a set, +# reproducing issue #15427. +# Here we check that we can also select WRITETIME(a.field) for a field of +# an UDT: +def test_writetime_udt_field(cql, test_keyspace): + with new_type(cql, test_keyspace, '(a int, b int)') as typ: + schema = f"p int PRIMARY KEY, x {typ}" + with new_test_table(cql, test_keyspace, schema) as table: + p = unique_key_int() + timestamp = int(time.time() * 1000000) + timestamp1 = timestamp - 1234 + timestamp2 = timestamp - 1217 + cql.execute(f"UPDATE {table} USING TIMESTAMP {timestamp1} SET x.a = 42 WHERE p={p}") + cql.execute(f"UPDATE {table} USING TIMESTAMP {timestamp2} SET x.b = 17 WHERE p={p}") + assert list(cql.execute(f"SELECT WRITETIME(x.a) FROM {table} WHERE p={p}")) == [(timestamp1,)] + assert list(cql.execute(f"SELECT WRITETIME(x.b) FROM {table} WHERE p={p}")) == [(timestamp2,)] + +# And TTL(a.field) for a field of an UDT: +def test_ttl_udt_field(cql, test_keyspace): + with new_type(cql, test_keyspace, '(a int, b int)') as typ: + schema = f"p int PRIMARY KEY, x {typ}" + with new_test_table(cql, test_keyspace, schema) as table: + p = unique_key_int() + ttl1 = 3600 + ttl2 = 7200 + cql.execute(f"UPDATE {table} USING TTL {ttl1} SET x.a = 42 WHERE p={p}") + cql.execute(f"UPDATE {table} USING TTL {ttl2} SET x.b = 17 WHERE p={p}") + # TTL() on a field returns the remaining TTL for that field, which may be slightly less than the original TTL by the time we read it, so we check that it's between ttl and ttl-100. + [(t,)] = cql.execute(f"SELECT TTL(x.a) FROM {table} WHERE p={p}") + assert t is not None and t <= ttl1 and t > ttl1 - 100 + [(t,)] = cql.execute(f"SELECT TTL(x.b) FROM {table} WHERE p={p}") + assert t is not None and t <= ttl2 and t > ttl2 - 100 + # A field written without a TTL returns null TTL. + cql.execute(f"UPDATE {table} SET x.a = 99 WHERE p={p}") + [(t,)] = cql.execute(f"SELECT TTL(x.a) FROM {table} WHERE p={p}") + assert t is None + +# Test that WRITETIME(col.field) and TTL(col.field) fail with a clear error +# when col is not a UDT, e.g., an int. +def test_writetime_field_on_wrong_type(cql, test_keyspace): + schema = f'p int PRIMARY KEY, v int' + with new_test_table(cql, test_keyspace, schema) as table: + # Subscript on a non-map/set column should be rejected. + # Cassandra says "Invalid element selection: v is of type int is not a + # collection". Scylla says "Column v is not a map/set/list, cannot be + # subscripted" + with pytest.raises(InvalidRequest, match=' v '): + cql.execute(f"SELECT WRITETIME(v[3]) FROM {table}") + with pytest.raises(InvalidRequest, match=' v '): + cql.execute(f"SELECT TTL(v[3]) FROM {table}") + # Field selection on a non-UDT column should be rejected. + # Cassandra and Scylla both say "Invalid field selection: + # v of type int is not a user type". + with pytest.raises(InvalidRequest, match='user type'): + cql.execute(f"SELECT WRITETIME(v.a) FROM {table}") + with pytest.raises(InvalidRequest, match='user type'): + cql.execute(f"SELECT TTL(v.a) FROM {table}") + +# If a WRITETIME() of individual fields of a tuple is not supported (we have +# test for this - test_type_tuple.py::test_writetime_on_tuple), then it's +# reasonable that it is also not supported on a *frozen* UDT, since just like +# in a tuple, a frozen UDT does not have timestamps for individual fields. +# But, surprisingly, Cassandra does allow WRITETIME() on individual fields of +# a frozen UDT, and returns the timestamp of the whole UDT. Because of this +# difference from Cassandra, we mark this test as xfail. In the future we +# can consider if Cassandra's support for field timestamps in frozen UDTs +# but not tuples is a mistake, and replace the xfail by cassandra_bug. +@pytest.mark.xfail(reason="Cassandra allows WRITETIME on fields of frozen UDTs, but Scylla does not") +def test_writetime_field_on_frozen_udt(cql, test_keyspace): + with new_type(cql, test_keyspace, '(a int, b int)') as typ: + schema = f'p int PRIMARY KEY, x frozen<{typ}>' + with new_test_table(cql, test_keyspace, schema) as table: + p = unique_key_int() + timestamp = int(time.time() * 1000000) - 1234 + cql.execute(f'INSERT INTO {table}(p, x) VALUES ({p}, {{a: 1, b: 2}}) USING TIMESTAMP {timestamp}') + # Scylla's error message: "WRITETIME on a field selection is + # only valid for non-frozen UDT columns". Cassandra allows this. + assert list(cql.execute(f'SELECT WRITETIME(x.a) FROM {table} WHERE p={p}')) == [(timestamp,)] + +@pytest.mark.xfail(reason="Cassandra allows TTL on fields of frozen UDTs, but Scylla does not") +def test_ttl_field_on_frozen_udt(cql, test_keyspace): + with new_type(cql, test_keyspace, '(a int, b int)') as typ: + schema = f'p int PRIMARY KEY, x frozen<{typ}>' + with new_test_table(cql, test_keyspace, schema) as table: + p = unique_key_int() + ttl = 3600 + cql.execute(f'INSERT INTO {table}(p, x) VALUES ({p}, {{a: 1, b: 2}}) USING TTL {ttl}') + # Scylla's error message: "TTL on a field selection is + # only valid for non-frozen UDT columns". Cassandra allows this. + [(t,)] = cql.execute(f'SELECT TTL(x.a) FROM {table} WHERE p={p}') + assert t is not None and t <= ttl and t > ttl - 100 + +# This is a variant of the previous test (test_writetime_field_on_frozen_udt) +# with the frozen udt also being a primary key column (which is allowed for +# a *frozen* udt). Primary key columns don't have a timestamp or TTL at +# all, so this case should be rejected even if we generally allow WRITETIME +# or TTL on frozen udt members. +def test_writetime_ttl_frozen_udt_elements_pk(cql, test_keyspace): + with new_type(cql, test_keyspace, '(a int, b int)') as typ: + schema = f'pk frozen<{typ}> PRIMARY KEY' + with new_test_table(cql, test_keyspace, schema) as table: + # Scylla's error message is "WRITETIME is not legal on primary key + # component pk". Cassandra's is "Cannot use selection function + # writetime on PRIMARY KEY part pk". + with pytest.raises(InvalidRequest, match='primary key|PRIMARY KEY'): + cql.execute(f"SELECT WRITETIME(pk.a) FROM {table}") + with pytest.raises(InvalidRequest, match='primary key|PRIMARY KEY'): + cql.execute(f"SELECT TTL(pk.a) FROM {table}") + +# Test WRITETIME() on an entire frozen UDT column. A frozen UDT is stored as +# a single atomic cell, so it has a single timestamp like any atomic column. +def test_writetime_frozen_udt(cql, test_keyspace): + with new_type(cql, test_keyspace, '(a int, b int)') as typ: + schema = f"p int PRIMARY KEY, x frozen<{typ}>" + with new_test_table(cql, test_keyspace, schema) as table: + p = unique_key_int() + timestamp = int(time.time() * 1000000) - 1234 + cql.execute(f"INSERT INTO {table}(p, x) VALUES ({p}, {{a: 1, b: 2}}) USING TIMESTAMP {timestamp}") + assert list(cql.execute(f"SELECT WRITETIME(x) FROM {table} WHERE p={p}")) == [(timestamp,)] + +# Test TTL() on an entire frozen UDT column. A frozen UDT is stored as +# a single atomic cell, so it has a single TTL like any atomic column. +def test_ttl_frozen_udt(cql, test_keyspace): + with new_type(cql, test_keyspace, '(a int, b int)') as typ: + schema = f"p int PRIMARY KEY, x frozen<{typ}>" + with new_test_table(cql, test_keyspace, schema) as table: + p = unique_key_int() + ttl = 3600 + cql.execute(f"INSERT INTO {table}(p, x) VALUES ({p}, {{a: 1, b: 2}}) USING TTL {ttl}") + [(t,)] = cql.execute(f"SELECT TTL(x) FROM {table} WHERE p={p}") + assert t is not None and t <= ttl and t > ttl - 100 \ No newline at end of file From a544dae04707621c6cafa98a1b37c296df592cd0 Mon Sep 17 00:00:00 2001 From: Nadav Har'El Date: Sun, 22 Mar 2026 23:03:07 +0200 Subject: [PATCH 6/8] test/boost: test WRITETIME() and TTL() on map collection elements Add tests in test/boost/expr_test.cc for the low-level implementation of writetime() and ttl() on a map element. Signed-off-by: Nadav Har'El --- test/boost/expr_test.cc | 114 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 114 insertions(+) diff --git a/test/boost/expr_test.cc b/test/boost/expr_test.cc index ce1d26cc6b..4378b7f2ce 100644 --- a/test/boost/expr_test.cc +++ b/test/boost/expr_test.cc @@ -3521,6 +3521,120 @@ BOOST_AUTO_TEST_CASE(evaluate_column_mutation_attribute) { } +// Build a schema with a non-frozen map column, suitable for testing WRITETIME(m[k]) and TTL(m[k]). +static schema_ptr make_map_test_schema() { + return schema_builder("test_ks", "test_cf") + .with_column("pk", int32_type, column_kind::partition_key) + .with_column("ck", int32_type, column_kind::clustering_key) + .with_column("m", map_type_impl::get_instance(int32_type, int32_type, /*is_multi_cell=*/true), column_kind::regular_column) + .build(); +} + +// Test that WRITETIME(m[key]) returns the per-element timestamp from collection_cell_metadata. +BOOST_AUTO_TEST_CASE(evaluate_writetime_map_element) { + auto s = make_map_test_schema(); + const column_definition& map_col = s->regular_column_at(0); + + // Subscript expression: m[1] and m[2] + auto sub1 = subscript{ + .val = column_value(&map_col), + .sub = make_int_const(1), + }; + auto sub2 = subscript{ + .val = column_value(&map_col), + .sub = make_int_const(2), + }; + + auto writetime_m1 = column_mutation_attribute{ + .kind = column_mutation_attribute::attribute_kind::writetime, + .column = sub1, + }; + auto writetime_m2 = column_mutation_attribute{ + .kind = column_mutation_attribute::attribute_kind::writetime, + .column = sub2, + }; + + // Build collection_cell_metadata: key1 -> ts1, key2 -> ts2 + bytes key1 = int32_type->decompose(int32_t(1)); + bytes key2 = int32_type->decompose(int32_t(2)); + api::timestamp_type ts1 = 1000000; + api::timestamp_type ts2 = 2000000; + + auto [inputs, inputs_data] = make_evaluation_inputs(s, { + {"pk", make_int_raw(0)}, + {"ck", make_int_raw(0)}, + {"m", mutation_column_value{make_int_int_map_raw({{1, 10}, {2, 20}}), ts1, -1}}, + }); + + // Construct and attach collection_element_metadata for column index of "m" + int32_t m_index = inputs_data->selection->index_of(map_col); + std::vector meta(inputs_data->timestamps.size()); + meta[m_index].timestamps = {{key1, ts1}, {key2, ts2}}; + + inputs.collection_element_metadata = meta; + + BOOST_REQUIRE_EQUAL(evaluate(writetime_m1, inputs), make_bigint_raw(ts1)); + BOOST_REQUIRE_EQUAL(evaluate(writetime_m2, inputs), make_bigint_raw(ts2)); +} + +// Test that WRITETIME(m[key]) returns null for a key absent from the metadata. +BOOST_AUTO_TEST_CASE(evaluate_writetime_map_element_missing_key) { + auto s = make_map_test_schema(); + const column_definition& map_col = s->regular_column_at(0); + + auto writetime_m99 = column_mutation_attribute{ + .kind = column_mutation_attribute::attribute_kind::writetime, + .column = subscript{.val = column_value(&map_col), .sub = make_int_const(99)}, + }; + + bytes key1 = int32_type->decompose(int32_t(1)); + auto [inputs, inputs_data] = make_evaluation_inputs(s, { + {"pk", make_int_raw(0)}, + {"ck", make_int_raw(0)}, + {"m", mutation_column_value{make_int_int_map_raw({{1, 10}}), 1000000, -1}}, + }); + int32_t m_index = inputs_data->selection->index_of(map_col); + std::vector meta(inputs_data->timestamps.size()); + meta[m_index].timestamps = {{key1, api::timestamp_type(1000000)}}; + inputs.collection_element_metadata = meta; + + BOOST_REQUIRE_EQUAL(evaluate(writetime_m99, inputs), cql3::raw_value::make_null()); +} + +// Test that TTL(m[key]) returns the per-element TTL from collection_cell_metadata, +// and returns null when the TTL is -1 (no expiry). +BOOST_AUTO_TEST_CASE(evaluate_ttl_map_element) { + auto s = make_map_test_schema(); + const column_definition& map_col = s->regular_column_at(0); + + auto ttl_m1 = column_mutation_attribute{ + .kind = column_mutation_attribute::attribute_kind::ttl, + .column = subscript{.val = column_value(&map_col), .sub = make_int_const(1)}, + }; + auto ttl_m2 = column_mutation_attribute{ + .kind = column_mutation_attribute::attribute_kind::ttl, + .column = subscript{.val = column_value(&map_col), .sub = make_int_const(2)}, + }; + + bytes key1 = int32_type->decompose(int32_t(1)); + bytes key2 = int32_type->decompose(int32_t(2)); + + auto [inputs, inputs_data] = make_evaluation_inputs(s, { + {"pk", make_int_raw(0)}, + {"ck", make_int_raw(0)}, + {"m", mutation_column_value{make_int_int_map_raw({{1, 10}, {2, 20}}), 1000000, -1}}, + }); + int32_t m_index = inputs_data->selection->index_of(map_col); + std::vector meta(inputs_data->timestamps.size()); + // key1 has TTL 3600, key2 has no TTL (-1) + meta[m_index].ttls = {{key1, int32_t(3600)}, {key2, int32_t(-1)}}; + inputs.collection_element_metadata = meta; + + BOOST_REQUIRE_EQUAL(evaluate(ttl_m1, inputs), make_int_raw(3600)); + // TTL -1 means no expiry -> should return null + BOOST_REQUIRE_EQUAL(evaluate(ttl_m2, inputs), cql3::raw_value::make_null()); +} + // It should be possible to prepare an empty conjunction BOOST_AUTO_TEST_CASE(prepare_conjunction_empty) { schema_ptr table_schema = make_simple_test_schema(); From bb2fb810bbd79ea77cf2741b04b391d7716b737d Mon Sep 17 00:00:00 2001 From: Nadav Har'El Date: Sun, 22 Mar 2026 18:34:24 +0200 Subject: [PATCH 7/8] cql3: document WRITETIME() and TTL() for elements of map, set or UDT Add to the SELECT documentation (docs/cql/dml/select.rst) documentation of the new ability to select WRITETIME() and TTL() of a single element of map, set or UDT. Also in the TTL documentation (docs/cql/time-to-live.rst), which already had a section on "TTL for a collection", add a mention of the ability to read a single element's TTL(), and an example. Signed-off-by: Nadav Har'El --- docs/cql/dml/select.rst | 61 +++++++++++++++++++++++++++++++++++++-- docs/cql/time-to-live.rst | 28 +++++++++++++++++- 2 files changed, 86 insertions(+), 3 deletions(-) diff --git a/docs/cql/dml/select.rst b/docs/cql/dml/select.rst index 5e05111eb8..0146d6e75b 100644 --- a/docs/cql/dml/select.rst +++ b/docs/cql/dml/select.rst @@ -117,11 +117,15 @@ set will be that of the alias. For instance:: clause, not in the ``ORDER BY`` clause, ...). You must use the original column name instead. +.. _select-writetime-ttl: + ``WRITETIME`` and ``TTL`` function ``````````````````````````````````` Selection supports two special functions (which aren't allowed anywhere else): ``WRITETIME`` and ``TTL``. Both functions -take only one argument, and that argument *must* be a column name (so, for instance, ``TTL(3)`` is invalid). +take only one argument, which must be either a column name, a subscript expression of the form ``collection_column[element]`` +for an individual element of a non-frozen map or set collection, or a field selection of the form ``udt_column.field`` +for an individual field of a non-frozen user-defined type column (so, for instance, ``TTL(3)`` is invalid). Those functions let you retrieve meta-information that is stored internally for each column, namely: @@ -131,7 +135,60 @@ You can read more about the ``TIMESTAMP`` retrieved by ``WRITETIME`` in the :ref - ``TTL`` retrieves the remaining time to live (in *seconds*) for the value of the column, if it set to expire, or ``null`` otherwise. -You can read more about TTL in the :doc:`documentation ` and also in `this ScyllaDB University lesson `_. +You can read more about TTL in the :doc:`documentation ` and also in `this ScyllaDB University lesson `_. + +**Using** ``WRITETIME`` **and** ``TTL`` **on map and set collection elements** + +For a non-frozen map or set column, each element is stored as an independent cell and may have its own write timestamp and TTL. +You can retrieve the per-element timestamp or TTL by subscripting the column with the element:: + + -- Create a table with a map column and a set column + CREATE TABLE t (pk int PRIMARY KEY, m map, s set); + + -- Insert elements with an explicit timestamp + INSERT INTO t (pk, m, s) VALUES (1, {1: 10, 2: 20}, {1, 2}) USING TIMESTAMP 1000000; + + -- Update one element with a newer timestamp + UPDATE t USING TIMESTAMP 2000000 SET m = m + {2: 30}, s = s + {3} WHERE pk = 1; + + -- WRITETIME(m[key]) returns the write timestamp of that specific map element + SELECT WRITETIME(m[1]), WRITETIME(m[2]) FROM t WHERE pk = 1; + -- Returns: 1000000 | 2000000 + + -- WRITETIME(s[element]) returns the write timestamp of that specific set element + SELECT WRITETIME(s[1]), WRITETIME(s[3]) FROM t WHERE pk = 1; + -- Returns: 1000000 | 2000000 + + -- TTL(m[key]) returns the remaining TTL of that specific map element + INSERT INTO t (pk, m) VALUES (1, {1: 10}) USING TTL 3600; + SELECT TTL(m[1]) FROM t WHERE pk = 1; + -- Returns the remaining TTL in seconds for element m[1] + +Note that ``WRITETIME(m)`` and ``TTL(m)`` on an entire non-frozen map or set column are **not** supported — +since each element may have a different timestamp or TTL, there is no single meaningful value to return. +Only ``WRITETIME(m[key])`` and ``TTL(m[key])`` (for a specific map key) or ``WRITETIME(s[element])`` +and ``TTL(s[element])`` (for a specific set element) are allowed. + +**Using** ``WRITETIME`` **and** ``TTL`` **on user-defined type fields** + +For a non-frozen user-defined type (UDT) column, each field is stored as an independent cell and may have its own +write timestamp and TTL. You can retrieve the per-field timestamp or TTL using dot notation:: + + CREATE TYPE address (street text, city text); + CREATE TABLE person (pk int PRIMARY KEY, addr address); + + -- Write two fields at different timestamps + UPDATE person USING TIMESTAMP 1000000 SET addr.street = '123 Main St' WHERE pk = 1; + UPDATE person USING TIMESTAMP 2000000 SET addr.city = 'Springfield' WHERE pk = 1; + + -- WRITETIME(udt_column.field) returns the write timestamp of that specific UDT field + SELECT WRITETIME(addr.street), WRITETIME(addr.city) FROM person WHERE pk = 1; + -- Returns: 1000000 | 2000000 + + -- TTL(udt_column.field) returns the remaining TTL of that specific UDT field + UPDATE person USING TTL 3600 SET addr.street = 'Oak Ave' WHERE pk = 1; + SELECT TTL(addr.street) FROM person WHERE pk = 1; + -- Returns the remaining TTL in seconds for addr.street .. _where-clause: diff --git a/docs/cql/time-to-live.rst b/docs/cql/time-to-live.rst index f170723aba..02e230ff8c 100644 --- a/docs/cql/time-to-live.rst +++ b/docs/cql/time-to-live.rst @@ -97,7 +97,33 @@ The ``gc_grace_seconds`` parameter is defined :ref:`here ` CQL Reference. +You can set the TTL on a per element basis for collections. + +See for example the :ref:`Maps ` CQL Reference for map collections. +For a non-frozen map or set column, each element is stored independently and can have its own TTL. You can query the +remaining TTL for a specific map element using ``TTL(map_column[key])`` or for a specific set element using +``TTL(set_column[element])``: + +.. code-block:: cql + + CREATE TABLE t (pk int PRIMARY KEY, m map, s set); + INSERT INTO t (pk, m, s) VALUES (1, {1: 10}, {100}) USING TTL 3600; + UPDATE t USING TTL 7200 SET m = m + {2: 20}, s = s + {200} WHERE pk = 1; + + -- Returns the remaining TTL for each map element independently + SELECT TTL(m[1]), TTL(m[2]) FROM t WHERE pk = 1; + + -- Returns the remaining TTL for each set element independently + SELECT TTL(s[100]), TTL(s[200]) FROM t WHERE pk = 1; + +Similarly, you can retrieve the write timestamp of a specific map element using ``WRITETIME(map_column[key])`` +or of a specific set element using ``WRITETIME(set_column[element])``. + +For a non-frozen user-defined type (UDT) column, each field is also stored independently and can have its own TTL. +You can query the remaining TTL or write timestamp of a specific field using dot notation: +``TTL(udt_column.field)`` and ``WRITETIME(udt_column.field)``. + +See the :ref:`WRITETIME and TTL function ` section for details. Notes ^^^^^ From 33dbb63aefd1414cacc2041cf243892d92a6dc12 Mon Sep 17 00:00:00 2001 From: Nadav Har'El Date: Thu, 26 Mar 2026 11:26:32 +0200 Subject: [PATCH 8/8] cql3: support UDT fields in LWT expressions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In an earlier patch, we used the CQL grammar's "subscriptExpr" in the rule for WRITETIME() and TTL(). But since we also wanted these to support UDT fields (x.a), not just collection subscripts (x[3]), we expanded subscriptExpr to also support the field syntax. But LWT expressions already used this subscriptExpr, which meant that LWT expressions unintentionally gained support for UDT fields. Missing support for UDT fields in LWT is a long-standing known Cassandra-compatibility bug (#13624), and now our grammar finally supports the missing syntax. But supporting the syntax is not enough for correct implementation of this feature - we also need to fix the expression handling: Two bugs prevented expressions like `v.a = 0` from working in LWT IF clauses, where `v` is a column of user-defined type. The first bug was in get_lhs_receiver() in prepare_expr.cc: it lacked a handler for field_selection nodes, causing an "unexpected expression" internal error when preparing a condition like `IF v.a = 0`. The fix adds a handler that returns a column_specification whose type is taken from the prepared field_selection's type field. The second bug was in search_and_replace() in expression.cc: when recursing into a field_selection node it reconstructed it with only `structure` and `field`, silently dropping the `field_idx` and `type` fields that are set during preparation. As a result, any transformation that uses search_and_replace() on a prepared expression containing a field_selection — such as adjust_for_collection_as_maps() called from column_condition_prepare() — would zero out those fields. At evaluation time, type_of() on the field_selection returned a null data_type pointer, causing a segmentation fault when the comparison operator tried to call ->equal() through it. The fix preserves field_idx and type when reconstructing the node. Fixes #13624. --- cql3/expr/expression.cc | 2 +- cql3/expr/prepare_expr.cc | 6 ++++++ .../insert_update_if_condition_collections_test.py | 2 +- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/cql3/expr/expression.cc b/cql3/expr/expression.cc index 7c7d5260d1..ab5e1bd1c4 100644 --- a/cql3/expr/expression.cc +++ b/cql3/expr/expression.cc @@ -1031,7 +1031,7 @@ expression search_and_replace(const expression& e, return cast{c.style, recurse(c.arg), c.type}; }, [&] (const field_selection& fs) -> expression { - return field_selection{recurse(fs.structure), fs.field}; + return field_selection{recurse(fs.structure), fs.field, fs.field_idx, fs.type}; }, [&] (const subscript& s) -> expression { return subscript { diff --git a/cql3/expr/prepare_expr.cc b/cql3/expr/prepare_expr.cc index 8b057bef01..b34f6f6f4b 100644 --- a/cql3/expr/prepare_expr.cc +++ b/cql3/expr/prepare_expr.cc @@ -1688,6 +1688,12 @@ static lw_shared_ptr get_lhs_receiver(const expression& pr return list_value_spec_of(*sub_col.col->column_specification); } }, + [&](const field_selection& fs) -> lw_shared_ptr { + return make_lw_shared( + schema.ks_name(), schema.cf_name(), + ::make_shared(fs.field->text(), true), + fs.type); + }, [&](const tuple_constructor& tup) -> lw_shared_ptr { std::ostringstream tuple_name; tuple_name << "("; diff --git a/test/cqlpy/cassandra_tests/validation/operations/insert_update_if_condition_collections_test.py b/test/cqlpy/cassandra_tests/validation/operations/insert_update_if_condition_collections_test.py index af3aa8baa4..f6ed5dc3fa 100644 --- a/test/cqlpy/cassandra_tests/validation/operations/insert_update_if_condition_collections_test.py +++ b/test/cqlpy/cassandra_tests/validation/operations/insert_update_if_condition_collections_test.py @@ -266,7 +266,7 @@ def checkInvalidUDT(cql, table, condition, value, expected): assertInvalidThrow(cql, table, expected, "DELETE FROM %s WHERE k = 0 IF " + condition) assertRows(execute(cql, table, "SELECT * FROM %s"), row(0, value)) -@pytest.mark.xfail(reason="Issue #13624") +# Reproduces #13624 def testUDTField(cql, test_keyspace): with create_type(cql, test_keyspace, "(a int, b text)") as typename: for frozen in [False, True]: