diff --git a/column_computation.hh b/column_computation.hh index 9198d1192a..befa102ba5 100644 --- a/column_computation.hh +++ b/column_computation.hh @@ -54,6 +54,36 @@ public: virtual bytes_opt compute_value(const schema& schema, const partition_key& key, const clustering_row& row) const = 0; }; +/* + * Computes token value of partition key and returns it as bytes. + * + * Should NOT be used (use token_column_computation), because ordering + * of bytes is different than ordering of tokens (signed vs unsigned comparison). + * + * The type name stored for computations of this class is "token" - this was + * the original implementation. (now depracated for new tables) + */ +class legacy_token_column_computation : public column_computation { +public: + virtual column_computation_ptr clone() const override { + return std::make_unique(*this); + } + virtual bytes serialize() const override; + virtual bytes_opt compute_value(const schema& schema, const partition_key& key, const clustering_row& row) const override; +}; + + +/* + * Computes token value of partition key and returns it as long_type. + * The return type means that it can be trivially sorted (for example + * if computed column using this computation is a clustering key), + * preserving the correct order of tokens (using signed comparisons). + * + * Please use this class instead of legacy_token_column_computation. + * + * The type name stored for computations of this class is "token_v2". + * (the name "token" refers to the depracated legacy_token_column_computation) + */ class token_column_computation : public column_computation { public: virtual column_computation_ptr clone() const override { diff --git a/cql3/statements/select_statement.cc b/cql3/statements/select_statement.cc index 5a15f8f6fd..447819cc72 100644 --- a/cql3/statements/select_statement.cc +++ b/cql3/statements/select_statement.cc @@ -891,6 +891,23 @@ static void append_base_key_to_index_ck(std::vector& exploded_index_ std::move(begin, key_view.end(), std::back_inserter(exploded_index_ck)); } +bytes indexed_table_select_statement::compute_idx_token(const partition_key& key) const { + const column_definition& cdef = *_view_schema->clustering_key_columns().begin(); + clustering_row empty_row(clustering_key_prefix::make_empty()); + bytes_opt computed_value; + if (!cdef.is_computed()) { + // FIXME(pgrabowski): this legacy code is here for backward compatibility and should be removed + // once "computed_columns feature" is supported by every node + computed_value = legacy_token_column_computation().compute_value(*_schema, key, empty_row); + } else { + computed_value = cdef.get_computation().compute_value(*_schema, key, empty_row); + } + if (!computed_value) { + throw std::logic_error(format("No value computed for idx_token column {}", cdef.name())); + } + return *computed_value; +} + lw_shared_ptr indexed_table_select_statement::generate_view_paging_state_from_base_query_results(lw_shared_ptr paging_state, const foreign_ptr>& results, service::storage_proxy& proxy, service::query_state& state, const query_options& options) const { const column_definition* cdef = _schema->get_column_definition(to_bytes(_index.target_column())); @@ -924,7 +941,7 @@ lw_shared_ptr indexed_table_select_statement if (_index.metadata().local()) { exploded_index_ck.push_back(bytes_view(*indexed_column_value)); } else { - token_bytes = dht::get_token(*_schema, last_base_pk).data(); + token_bytes = compute_idx_token(last_base_pk); exploded_index_ck.push_back(bytes_view(token_bytes)); append_base_key_to_index_ck(exploded_index_ck, last_base_pk, *cdef); } @@ -1108,7 +1125,7 @@ query::partition_slice indexed_table_select_statement::get_partition_slice_for_g // Computed token column needs to be added to index view restrictions const column_definition& token_cdef = *_view_schema->clustering_key_columns().begin(); auto base_pk = partition_key::from_optional_exploded(*_schema, single_pk_restrictions->values(options)); - bytes token_value = dht::get_token(*_schema, base_pk).data(); + bytes token_value = compute_idx_token(base_pk); auto token_restriction = ::make_shared(token_cdef); token_restriction->expression = expr::binary_operator{ &token_cdef, expr::oper_t::EQ, diff --git a/cql3/statements/select_statement.hh b/cql3/statements/select_statement.hh index 788e275c65..d382b8d7f3 100644 --- a/cql3/statements/select_statement.hh +++ b/cql3/statements/select_statement.hh @@ -300,6 +300,8 @@ private: query::partition_slice get_partition_slice_for_local_index_posting_list(const query_options& options) const; query::partition_slice get_partition_slice_for_global_index_posting_list(const query_options& options) const; + + bytes compute_idx_token(const partition_key& key) const; }; } diff --git a/db/schema_tables.cc b/db/schema_tables.cc index 64a649e26f..fc84b0e62f 100644 --- a/db/schema_tables.cc +++ b/db/schema_tables.cc @@ -3050,7 +3050,7 @@ future<> maybe_update_legacy_secondary_index_mv_schema(service::migration_manage // and as such it must be recreated properly. if (!base_schema->columns_by_name().contains(first_view_ck.name())) { schema_builder builder{schema_ptr(v)}; - builder.mark_column_computed(first_view_ck.name(), std::make_unique()); + builder.mark_column_computed(first_view_ck.name(), std::make_unique()); return mm.announce_view_update(view_ptr(builder.build()), true); } return make_ready_future<>(); diff --git a/db/view/view.cc b/db/view/view.cc index 9cf6396631..d44a8b9cd3 100644 --- a/db/view/view.cc +++ b/db/view/view.cc @@ -416,7 +416,7 @@ deletable_row& view_updates::get_view_row(const partition_key& base_key, const c if (!service::get_local_storage_service().db().local().find_column_family(_base->id()).get_index_manager().is_index(*_view)) { throw std::logic_error(format("Column {} doesn't exist in base and this view is not backing a secondary index", cdef.name_as_text())); } - computed_value = token_column_computation().compute_value(*_base, base_key, update); + computed_value = legacy_token_column_computation().compute_value(*_base, base_key, update); } else { computed_value = cdef.get_computation().compute_value(*_base, base_key, update); } diff --git a/gms/feature.hh b/gms/feature.hh index 0c8c379ccf..c742b3521b 100644 --- a/gms/feature.hh +++ b/gms/feature.hh @@ -143,6 +143,7 @@ extern const std::string_view LWT; extern const std::string_view PER_TABLE_PARTITIONERS; extern const std::string_view PER_TABLE_CACHING; extern const std::string_view DIGEST_FOR_NULL_VALUES; +extern const std::string_view CORRECT_IDX_TOKEN_IN_SECONDARY_INDEX; } diff --git a/gms/feature_service.cc b/gms/feature_service.cc index 771d16df90..101862471f 100644 --- a/gms/feature_service.cc +++ b/gms/feature_service.cc @@ -62,6 +62,7 @@ constexpr std::string_view features::LWT = "LWT"; constexpr std::string_view features::PER_TABLE_PARTITIONERS = "PER_TABLE_PARTITIONERS"; constexpr std::string_view features::PER_TABLE_CACHING = "PER_TABLE_CACHING"; constexpr std::string_view features::DIGEST_FOR_NULL_VALUES = "DIGEST_FOR_NULL_VALUES"; +constexpr std::string_view features::CORRECT_IDX_TOKEN_IN_SECONDARY_INDEX = "CORRECT_IDX_TOKEN_IN_SECONDARY_INDEX"; static logging::logger logger("features"); @@ -86,6 +87,7 @@ feature_service::feature_service(feature_config cfg) : _config(cfg) , _per_table_partitioners_feature(*this, features::PER_TABLE_PARTITIONERS) , _per_table_caching_feature(*this, features::PER_TABLE_CACHING) , _digest_for_null_values_feature(*this, features::DIGEST_FOR_NULL_VALUES) + , _correct_idx_token_in_secondary_index_feature(*this, features::CORRECT_IDX_TOKEN_IN_SECONDARY_INDEX) {} feature_config feature_config_from_db_config(db::config& cfg, std::set disabled) { @@ -187,6 +189,7 @@ std::set feature_service::known_feature_set() { gms::features::UDF, gms::features::CDC, gms::features::DIGEST_FOR_NULL_VALUES, + gms::features::CORRECT_IDX_TOKEN_IN_SECONDARY_INDEX }; for (const sstring& s : _config._disabled_features) { @@ -266,6 +269,7 @@ void feature_service::enable(const std::set& list) { std::ref(_per_table_partitioners_feature), std::ref(_per_table_caching_feature), std::ref(_digest_for_null_values_feature), + std::ref(_correct_idx_token_in_secondary_index_feature) }) { if (list.contains(f.name())) { diff --git a/gms/feature_service.hh b/gms/feature_service.hh index ca0e6b6b68..047af0e3b0 100644 --- a/gms/feature_service.hh +++ b/gms/feature_service.hh @@ -92,6 +92,7 @@ private: gms::feature _per_table_partitioners_feature; gms::feature _per_table_caching_feature; gms::feature _digest_for_null_values_feature; + gms::feature _correct_idx_token_in_secondary_index_feature; public: bool cluster_supports_user_defined_functions() const { @@ -160,6 +161,10 @@ public: bool cluster_supports_lwt() const { return bool(_lwt_feature); } + + bool cluster_supports_correct_idx_token_in_secondary_index() const { + return bool(_correct_idx_token_in_secondary_index_feature); + } }; } // namespace gms diff --git a/index/secondary_index_manager.cc b/index/secondary_index_manager.cc index 9e4c157dbe..781c0e2e39 100644 --- a/index/secondary_index_manager.cc +++ b/index/secondary_index_manager.cc @@ -48,6 +48,7 @@ #include "schema_builder.hh" #include "database.hh" #include "db/view/view.hh" +#include "service/storage_service.hh" #include #include @@ -144,7 +145,13 @@ view_ptr secondary_index_manager::create_view_for_index(const index_metadata& im builder.with_column(index_target->name(), index_target->type, column_kind::partition_key); // Additional token column is added to ensure token order on secondary index queries bytes token_column_name = get_available_token_column_name(*schema); - builder.with_computed_column(token_column_name, bytes_type, column_kind::clustering_key, std::make_unique()); + if (service::get_local_storage_service().db().local().features().cluster_supports_correct_idx_token_in_secondary_index()) { + builder.with_computed_column(token_column_name, long_type, column_kind::clustering_key, std::make_unique()); + } else { + // FIXME(pgrabowski): this legacy code is here for backward compatibility and should be removed + // once "supports_correct_idx_token_in_secondary_index" is supported by every node + builder.with_computed_column(token_column_name, bytes_type, column_kind::clustering_key, std::make_unique()); + } for (auto& col : schema->partition_key_columns()) { if (col == *index_target) { continue; diff --git a/schema.cc b/schema.cc index 87d40c8bc4..e396dc4299 100644 --- a/schema.cc +++ b/schema.cc @@ -1602,21 +1602,35 @@ column_computation_ptr column_computation::deserialize(const rjson::value& parse throw std::runtime_error(format("Type {} is not convertible to string", *type_json)); } if (rjson::to_string_view(*type_json) == "token") { + return std::make_unique(); + } + if (rjson::to_string_view(*type_json) == "token_v2") { return std::make_unique(); } throw std::runtime_error(format("Incorrect column computation type {} found when parsing {}", *type_json, parsed)); } -bytes token_column_computation::serialize() const { +bytes legacy_token_column_computation::serialize() const { rjson::value serialized = rjson::empty_object(); rjson::set(serialized, "type", rjson::from_string("token")); return to_bytes(rjson::print(serialized)); } -bytes_opt token_column_computation::compute_value(const schema& schema, const partition_key& key, const clustering_row& row) const { +bytes_opt legacy_token_column_computation::compute_value(const schema& schema, const partition_key& key, const clustering_row& row) const { return dht::get_token(schema, key).data(); } +bytes token_column_computation::serialize() const { + rjson::value serialized = rjson::empty_object(); + rjson::set(serialized, "type", rjson::from_string("token_v2")); + return to_bytes(rjson::print(serialized)); +} + +bytes_opt token_column_computation::compute_value(const schema& schema, const partition_key& key, const clustering_row& row) const { + auto long_value = dht::token::to_int64(dht::get_token(schema, key)); + return long_type->decompose(long_value); +} + bool operator==(const raw_view_info& x, const raw_view_info& y) { return x._base_id == y._base_id && x._base_name == y._base_name diff --git a/test/boost/secondary_index_test.cc b/test/boost/secondary_index_test.cc index 7c7460bb5e..ba0c580116 100644 --- a/test/boost/secondary_index_test.cc +++ b/test/boost/secondary_index_test.cc @@ -280,9 +280,9 @@ SEASTAR_TEST_CASE(test_many_columns) { auto res = e.execute_cql("SELECT * from tab WHERE b = 2").get0(); assert_that(res).is_rows().with_size(3) .with_rows({ + {{int32_type->decompose(0)}, {int32_type->decompose(2)}, {int32_type->decompose(0)}, {int32_type->decompose(0)}, {int32_type->decompose(0)}, {int32_type->decompose(0)}}, {{int32_type->decompose(1)}, {int32_type->decompose(2)}, {int32_type->decompose(3)}, {int32_type->decompose(4)}, {int32_type->decompose(5)}, {int32_type->decompose(6)}}, {{int32_type->decompose(1)}, {int32_type->decompose(2)}, {int32_type->decompose(3)}, {int32_type->decompose(7)}, {int32_type->decompose(5)}, {int32_type->decompose(0)}}, - {{int32_type->decompose(0)}, {int32_type->decompose(2)}, {int32_type->decompose(0)}, {int32_type->decompose(0)}, {int32_type->decompose(0)}, {int32_type->decompose(0)}}, }); }); BOOST_TEST_PASSPOINT(); @@ -290,9 +290,9 @@ SEASTAR_TEST_CASE(test_many_columns) { auto res = e.execute_cql("SELECT * from tab WHERE c = 3").get0(); assert_that(res).is_rows().with_size(3) .with_rows({ + {{int32_type->decompose(0)}, {int32_type->decompose(0)}, {int32_type->decompose(3)}, {int32_type->decompose(0)}, {int32_type->decompose(0)}, {int32_type->decompose(0)}}, {{int32_type->decompose(1)}, {int32_type->decompose(2)}, {int32_type->decompose(3)}, {int32_type->decompose(4)}, {int32_type->decompose(5)}, {int32_type->decompose(6)}}, {{int32_type->decompose(1)}, {int32_type->decompose(2)}, {int32_type->decompose(3)}, {int32_type->decompose(7)}, {int32_type->decompose(5)}, {int32_type->decompose(0)}}, - {{int32_type->decompose(0)}, {int32_type->decompose(0)}, {int32_type->decompose(3)}, {int32_type->decompose(0)}, {int32_type->decompose(0)}, {int32_type->decompose(0)}}, }); }); BOOST_TEST_PASSPOINT(); @@ -300,8 +300,8 @@ SEASTAR_TEST_CASE(test_many_columns) { auto res = e.execute_cql("SELECT * from tab WHERE d = 4").get0(); assert_that(res).is_rows().with_size(2) .with_rows({ - {{int32_type->decompose(1)}, {int32_type->decompose(2)}, {int32_type->decompose(3)}, {int32_type->decompose(4)}, {int32_type->decompose(5)}, {int32_type->decompose(6)}}, {{int32_type->decompose(0)}, {int32_type->decompose(0)}, {int32_type->decompose(0)}, {int32_type->decompose(4)}, {int32_type->decompose(0)}, {int32_type->decompose(0)}}, + {{int32_type->decompose(1)}, {int32_type->decompose(2)}, {int32_type->decompose(3)}, {int32_type->decompose(4)}, {int32_type->decompose(5)}, {int32_type->decompose(6)}}, }); }); BOOST_TEST_PASSPOINT(); @@ -309,9 +309,9 @@ SEASTAR_TEST_CASE(test_many_columns) { auto res = e.execute_cql("SELECT * from tab WHERE e = 5").get0(); assert_that(res).is_rows().with_size(3) .with_rows({ + {{int32_type->decompose(0)}, {int32_type->decompose(0)}, {int32_type->decompose(0)}, {int32_type->decompose(0)}, {int32_type->decompose(5)}, {int32_type->decompose(0)}}, {{int32_type->decompose(1)}, {int32_type->decompose(2)}, {int32_type->decompose(3)}, {int32_type->decompose(4)}, {int32_type->decompose(5)}, {int32_type->decompose(6)}}, {{int32_type->decompose(1)}, {int32_type->decompose(2)}, {int32_type->decompose(3)}, {int32_type->decompose(7)}, {int32_type->decompose(5)}, {int32_type->decompose(0)}}, - {{int32_type->decompose(0)}, {int32_type->decompose(0)}, {int32_type->decompose(0)}, {int32_type->decompose(0)}, {int32_type->decompose(5)}, {int32_type->decompose(0)}}, }); }); BOOST_TEST_PASSPOINT(); @@ -319,8 +319,8 @@ SEASTAR_TEST_CASE(test_many_columns) { auto res = e.execute_cql("SELECT * from tab WHERE f = 6").get0(); assert_that(res).is_rows().with_size(2) .with_rows({ - {{int32_type->decompose(1)}, {int32_type->decompose(2)}, {int32_type->decompose(3)}, {int32_type->decompose(4)}, {int32_type->decompose(5)}, {int32_type->decompose(6)}}, {{int32_type->decompose(0)}, {int32_type->decompose(0)}, {int32_type->decompose(0)}, {int32_type->decompose(7)}, {int32_type->decompose(0)}, {int32_type->decompose(6)}}, + {{int32_type->decompose(1)}, {int32_type->decompose(2)}, {int32_type->decompose(3)}, {int32_type->decompose(4)}, {int32_type->decompose(5)}, {int32_type->decompose(6)}}, }); }); }); @@ -477,7 +477,7 @@ SEASTAR_TEST_CASE(test_simple_index_paging) { expect_more_pages(res, true); assert_that(res).is_rows().with_rows({{ - {int32_type->decompose(3)}, {int32_type->decompose(2)}, {int32_type->decompose(1)}, + {int32_type->decompose(1)}, {int32_type->decompose(1)}, {int32_type->decompose(1)}, }}); qo = std::make_unique(db::consistency_level::LOCAL_ONE, infinite_timeout_config, std::vector{}, @@ -487,7 +487,7 @@ SEASTAR_TEST_CASE(test_simple_index_paging) { paging_state = extract_paging_state(res); assert_that(res).is_rows().with_rows({{ - {int32_type->decompose(1)}, {int32_type->decompose(1)}, {int32_type->decompose(1)}, + {int32_type->decompose(1)}, {int32_type->decompose(2)}, {int32_type->decompose(1)}, }}); qo = std::make_unique(db::consistency_level::LOCAL_ONE, infinite_timeout_config, std::vector{}, @@ -496,7 +496,7 @@ SEASTAR_TEST_CASE(test_simple_index_paging) { paging_state = extract_paging_state(res); assert_that(res).is_rows().with_rows({{ - {int32_type->decompose(1)}, {int32_type->decompose(2)}, {int32_type->decompose(1)}, + {int32_type->decompose(3)}, {int32_type->decompose(2)}, {int32_type->decompose(1)}, }}); // Due to implementation differences from origin (Scylla is allowed to return empty pages with has_more_pages == true @@ -521,7 +521,7 @@ SEASTAR_TEST_CASE(test_simple_index_paging) { auto paging_state = extract_paging_state(res); assert_that(res).is_rows().with_rows({{ - {int32_type->decompose(3)}, {int32_type->decompose(2)}, {int32_type->decompose(1)}, + {int32_type->decompose(1)}, {int32_type->decompose(2)}, {int32_type->decompose(1)}, }}); qo = std::make_unique(db::consistency_level::LOCAL_ONE, infinite_timeout_config, std::vector{}, @@ -529,7 +529,7 @@ SEASTAR_TEST_CASE(test_simple_index_paging) { res = e.execute_cql("SELECT * FROM tab WHERE c = 2", std::move(qo)).get0(); assert_that(res).is_rows().with_rows({{ - {int32_type->decompose(1)}, {int32_type->decompose(2)}, {int32_type->decompose(1)}, + {int32_type->decompose(3)}, {int32_type->decompose(2)}, {int32_type->decompose(1)}, }}); }); @@ -540,7 +540,7 @@ SEASTAR_TEST_CASE(test_simple_index_paging) { auto paging_state = extract_paging_state(res); assert_that(res).is_rows().with_rows({{ - {int32_type->decompose(3)}, {int32_type->decompose(2)}, {int32_type->decompose(1)}, + {int32_type->decompose(1)}, {int32_type->decompose(2)}, {int32_type->decompose(1)}, }}); // Override the actual paging state with one with empty keys, @@ -1518,6 +1518,37 @@ SEASTAR_TEST_CASE(test_computed_columns) { }); } +// Ref: #3423 - rows should be returned in token order, +// using signed comparison (ref: #7443) +SEASTAR_TEST_CASE(test_token_order) { + return do_with_cql_env_thread([] (auto& e) { + cquery_nofail(e, "CREATE TABLE t (pk int, v int, PRIMARY KEY(pk))"); + cquery_nofail(e, "CREATE INDEX ON t(v)"); + + for (int i = 0; i < 7; i++) { + cquery_nofail(e, format("INSERT INTO t (pk, v) VALUES ({}, 1)", i).c_str()); + } + + std::vector> expected_rows = { + { int32_type->decompose(5) }, // token(pk) = -7509452495886106294 + { int32_type->decompose(1) }, // token(pk) = -4069959284402364209 + { int32_type->decompose(0) }, // token(pk) = -3485513579396041028 + { int32_type->decompose(2) }, // token(pk) = -3248873570005575792 + { int32_type->decompose(4) }, // token(pk) = -2729420104000364805 + { int32_type->decompose(6) }, // token(pk) = +2705480034054113608 + { int32_type->decompose(3) }, // token(pk) = +9010454139840013625 + }; + + eventually([&] { + auto nonindex_order = cquery_nofail(e, "SELECT pk FROM t"); + auto index_order = cquery_nofail(e, "SELECT pk FROM t WHERE v = 1"); + + assert_that(nonindex_order).is_rows().with_rows(expected_rows); + assert_that(index_order).is_rows().with_rows(expected_rows); + }); + }); +} + // Ref: #5708 - filtering should be applied on an indexed column // if the restriction is not eligible for indexing (it's not EQ) SEASTAR_TEST_CASE(test_filtering_indexed_column) { diff --git a/test/cql/mv_si_local_updates_test.result b/test/cql/mv_si_local_updates_test.result index 6b54b5552e..7156759523 100644 --- a/test/cql/mv_si_local_updates_test.result +++ b/test/cql/mv_si_local_updates_test.result @@ -53,17 +53,17 @@ SELECT * FROM t WHERE v = 6; { "rows" : [ - { - "c" : "6", - "p1" : "4", - "p2" : "5", - "v" : "6" - }, { "c" : "5", "p1" : "3", "p2" : "4", "v" : "6" + }, + { + "c" : "6", + "p1" : "4", + "p2" : "5", + "v" : "6" } ] }