/* * Copyright (C) 2015-present ScyllaDB * * Modified by ScyllaDB */ /* * SPDX-License-Identifier: (LicenseRef-ScyllaDB-Source-Available-1.0 and Apache-2.0) */ #pragma once #include "cql3/statements/raw/select_statement.hh" #include "cql3/expr/unset.hh" #include "cql3/cql_statement.hh" #include "cql3/stats.hh" #include #include #include "transport/messages/result_message.hh" #include "index/secondary_index_manager.hh" #include "exceptions/coordinator_result.hh" #include "locator/host_id.hh" #include "service/cas_shard.hh" #include "vector_search/vector_store_client.hh" namespace service { class client_state; class storage_proxy; class storage_proxy_coordinator_query_options; class storage_proxy_coordinator_query_result; } // namespace service namespace cql3 { class query_processor; namespace selection { class selection; } // namespace selection namespace restrictions { class restrictions; class statement_restrictions; } // namespace restrictions namespace statements { /// Encapsulates a partition key and clustering key prefix as a primary key. struct primary_key { dht::decorated_key partition; clustering_key_prefix clustering; }; /** * Encapsulates a completely parsed SELECT query, including the target * column family, expression, result count, and ordering clause. * */ class select_statement : public cql_statement { public: template using coordinator_result = exceptions::coordinator_result; using parameters = raw::select_statement::parameters; using ordering_comparator_type = raw::select_statement::ordering_comparator_type; using prepared_ann_ordering_type = raw::select_statement::prepared_ann_ordering_type; bool _may_use_token_aware_routing; protected: static thread_local const lw_shared_ptr _default_parameters; schema_ptr _schema; schema_ptr _query_schema; uint32_t _bound_terms; lw_shared_ptr _parameters; ::shared_ptr _selection; const ::shared_ptr _restrictions; const bool _restrictions_need_filtering; ::shared_ptr> _group_by_cell_indices; ///< Indices in result row of cells holding GROUP BY values. bool _is_reversed; expr::unset_bind_variable_guard _limit_unset_guard; std::optional _limit; expr::unset_bind_variable_guard _per_partition_limit_unset_guard; std::optional _per_partition_limit; template using compare_fn = raw::select_statement::compare_fn; using result_row_type = raw::select_statement::result_row_type; /** * The comparator used to orders results when multiple keys are selected (using IN). */ ordering_comparator_type _ordering_comparator; query::partition_slice::option_set _opts; cql_stats& _stats; const ks_selector _ks_sel; bool _range_scan = false; bool _range_scan_no_bypass_cache = false; std::unique_ptr _attrs; private: future> process_results_complex(foreign_ptr> results, lw_shared_ptr cmd, const query_options& options, gc_clock::time_point now) const; protected : virtual future<::shared_ptr> do_execute(query_processor& qp, service::query_state& state, const query_options& options) const; friend class select_statement_executor; public: select_statement(schema_ptr schema, uint32_t bound_terms, lw_shared_ptr parameters, ::shared_ptr selection, ::shared_ptr restrictions, ::shared_ptr> group_by_cell_indices, bool is_reversed, ordering_comparator_type ordering_comparator, std::optional limit, std::optional per_partition_limit, cql_stats& stats, std::unique_ptr attrs); virtual ::shared_ptr get_result_metadata() const override; virtual uint32_t get_bound_terms() const override; virtual future<> check_access(query_processor& qp, const service::client_state& state) const override; virtual bool depends_on(std::string_view ks_name, std::optional cf_name) const override; virtual future<::shared_ptr> execute(query_processor& qp, service::query_state& state, const query_options& options, std::optional guard) const override; virtual future<::shared_ptr> execute_without_checking_exception_message(query_processor& qp, service::query_state& qs, const query_options& options, std::optional guard) const override; future<::shared_ptr> execute_non_aggregate_unpaged(query_processor& qp, lw_shared_ptr cmd, dht::partition_range_vector&& partition_ranges, service::query_state& state, const query_options& options, gc_clock::time_point now) const; future<::shared_ptr> execute_without_checking_exception_message_non_aggregate_unpaged(query_processor& qp, lw_shared_ptr cmd, dht::partition_range_vector&& partition_ranges, service::query_state& state, const query_options& options, gc_clock::time_point now, std::optional cas_shard) const; future<::shared_ptr> execute_without_checking_exception_message_aggregate_or_paged(query_processor& qp, lw_shared_ptr cmd, dht::partition_range_vector&& partition_ranges, service::query_state& state, const query_options& options, gc_clock::time_point now, int32_t page_size, bool aggregate, bool nonpaged_filtering, uint64_t limit, std::optional cas_shard) const; future> process_results(foreign_ptr> results, lw_shared_ptr cmd, const query_options& options, gc_clock::time_point now) const; const sstring& keyspace() const; const sstring& column_family() const; query::partition_slice make_partition_slice(const query_options& options) const; const ::shared_ptr get_restrictions() const; bool has_group_by() const { return _group_by_cell_indices && !_group_by_cell_indices->empty(); } db::timeout_clock::duration get_timeout(const service::client_state& state, const query_options& options) const; protected: uint64_t get_limit(const query_options& options, const std::optional& limit, bool is_per_partition_limit = false) const; static uint64_t get_inner_loop_limit(uint64_t limit, bool is_aggregate); bool needs_post_query_ordering() const; virtual void update_stats_rows_read(int64_t rows_read) const { _stats.rows_read += rows_read; } }; class primary_key_select_statement : public select_statement { public: primary_key_select_statement(schema_ptr schema, uint32_t bound_terms, lw_shared_ptr parameters, ::shared_ptr selection, ::shared_ptr restrictions, ::shared_ptr> group_by_cell_indices, bool is_reversed, ordering_comparator_type ordering_comparator, std::optional limit, std::optional per_partition_limit, cql_stats &stats, std::unique_ptr attrs); }; class view_indexed_table_select_statement : public select_statement { secondary_index::index _index; expr::expression _used_index_restrictions; schema_ptr _view_schema; noncopyable_function _get_partition_ranges_for_posting_list; noncopyable_function _get_partition_slice_for_posting_list; public: static constexpr size_t max_base_table_query_concurrency = 4096; static ::shared_ptr prepare(data_dictionary::database db, schema_ptr schema, uint32_t bound_terms, lw_shared_ptr parameters, ::shared_ptr selection, ::shared_ptr restrictions, ::shared_ptr> group_by_cell_indices, bool is_reversed, ordering_comparator_type ordering_comparator, std::optional limit, std::optional per_partition_limit, cql_stats &stats, std::unique_ptr attrs); view_indexed_table_select_statement(schema_ptr schema, uint32_t bound_terms, lw_shared_ptr parameters, ::shared_ptr selection, ::shared_ptr restrictions, ::shared_ptr> group_by_cell_indices, bool is_reversed, ordering_comparator_type ordering_comparator, std::optional limit, std::optional per_partition_limit, cql_stats &stats, const secondary_index::index& index, expr::expression used_index_restrictions, schema_ptr view_schema, std::unique_ptr attrs); private: virtual future<::shared_ptr> do_execute(query_processor& qp, service::query_state& state, const query_options& options) const override; future<::shared_ptr> actually_do_execute(query_processor& qp, service::query_state& state, const query_options& options) const; lw_shared_ptr generate_view_paging_state_from_base_query_results(lw_shared_ptr paging_state, const foreign_ptr>& results, service::query_state& state, const query_options& options, uint32_t internal_page_size) const; future>>> find_index_partition_ranges(query_processor& qp, service::query_state& state, const query_options& options) const; future, lw_shared_ptr>>> find_index_clustering_rows(query_processor& qp, service::query_state& state, const query_options& options) const; future> process_base_query_results( foreign_ptr> results, lw_shared_ptr cmd, service::query_state& state, const query_options& options, gc_clock::time_point now, lw_shared_ptr paging_state, uint32_t internal_page_size) const; lw_shared_ptr prepare_command_for_base_query(query_processor& qp, const query_options& options, service::query_state& state, gc_clock::time_point now, bool use_paging) const; future>, lw_shared_ptr>>> do_execute_base_query( query_processor& qp, dht::partition_range_vector&& partition_ranges, service::query_state& state, const query_options& options, gc_clock::time_point now, lw_shared_ptr paging_state) const; future> execute_base_query( query_processor& qp, dht::partition_range_vector&& partition_ranges, service::query_state& state, const query_options& options, gc_clock::time_point now, lw_shared_ptr paging_state) const; // Function for fetching the selected columns from a list of clustering rows. // It is currently used only in our Secondary Index implementation - ordinary // CQL SELECT statements do not have the syntax to request a list of rows. // FIXME: The current implementation is very inefficient - it requests each // row separately (and, incrementally, in parallel). Even multiple rows from a single // partition are requested separately. This last case can be easily improved, // but to implement the general case (multiple rows from multiple partitions) // efficiently, we will need more support from other layers. // Keys are ordered in token order (see #3423) future>, lw_shared_ptr>>> do_execute_base_query( query_processor& qp, std::vector&& primary_keys, service::query_state& state, const query_options& options, gc_clock::time_point now, lw_shared_ptr paging_state) const; future> execute_base_query( query_processor& qp, std::vector&& primary_keys, service::query_state& state, const query_options& options, gc_clock::time_point now, lw_shared_ptr paging_state) const; virtual void update_stats_rows_read(int64_t rows_read) const override { _stats.rows_read += rows_read; _stats.secondary_index_rows_read += rows_read; } future>> read_posting_list( query_processor& qp, const query_options& options, uint64_t limit, service::query_state& state, gc_clock::time_point now, db::timeout_clock::time_point timeout, bool include_base_clustering_key) const; dht::partition_range_vector get_partition_ranges_for_local_index_posting_list(const query_options& options) const; dht::partition_range_vector get_partition_ranges_for_global_index_posting_list(const query_options& options) const; query::partition_slice get_partition_slice_for_local_index_posting_list(const query_options& options) const; query::partition_slice get_partition_slice_for_global_index_posting_list(const query_options& options) const; bytes compute_idx_token(const partition_key& key) const; }; class mutation_fragments_select_statement : public select_statement { schema_ptr _underlying_schema; public: mutation_fragments_select_statement( schema_ptr output_schema, schema_ptr underlying_schema, uint32_t bound_terms, lw_shared_ptr parameters, ::shared_ptr selection, ::shared_ptr restrictions, ::shared_ptr> group_by_cell_indices, bool is_reversed, ordering_comparator_type ordering_comparator, std::optional limit, std::optional per_partition_limit, cql_stats &stats, std::unique_ptr attrs); // This statement has a schema that is different from that of the underlying table. static schema_ptr generate_output_schema(schema_ptr underlying_schema); private: future> do_query( locator::effective_replication_map_ptr erm_keepalive, locator::host_id this_node, service::storage_proxy& sp, schema_ptr schema, lw_shared_ptr cmd, dht::partition_range_vector partition_ranges, db::consistency_level cl, service::storage_proxy_coordinator_query_options optional_params) const; virtual future<::shared_ptr> do_execute(query_processor& qp, service::query_state& state, const query_options& options) const override; }; class vector_indexed_table_select_statement : public select_statement { secondary_index::index _index; prepared_ann_ordering_type _prepared_ann_ordering; mutable gc_clock::time_point _query_start_time_point; public: static constexpr size_t max_ann_query_limit = 1000; static ::shared_ptr prepare(data_dictionary::database db, schema_ptr schema, uint32_t bound_terms, lw_shared_ptr parameters, ::shared_ptr selection, ::shared_ptr restrictions, ::shared_ptr> group_by_cell_indices, bool is_reversed, ordering_comparator_type ordering_comparator, prepared_ann_ordering_type prepared_ann_ordering, std::optional limit, std::optional per_partition_limit, cql_stats& stats, std::unique_ptr attrs); vector_indexed_table_select_statement(schema_ptr schema, uint32_t bound_terms, lw_shared_ptr parameters, ::shared_ptr selection, ::shared_ptr restrictions, ::shared_ptr> group_by_cell_indices, bool is_reversed, ordering_comparator_type ordering_comparator, prepared_ann_ordering_type prepared_ann_ordering, std::optional limit, std::optional per_partition_limit, cql_stats& stats, const secondary_index::index& index, std::unique_ptr attrs); private: future<::shared_ptr> do_execute( query_processor& qp, service::query_state& state, const query_options& options) const override; void update_stats() const; lw_shared_ptr prepare_command_for_base_query(query_processor& qp, service::query_state& state, const query_options& options) const; std::vector get_ann_ordering_vector(const query_options& options) const; future<::shared_ptr> query_base_table(query_processor& qp, service::query_state& state, const query_options& options, const std::vector& pkeys, lowres_clock::time_point timeout) const; future<::shared_ptr> query_base_table(query_processor& qp, service::query_state& state, const query_options& options, lw_shared_ptr command, lowres_clock::time_point timeout, const std::vector& pkeys) const; future<::shared_ptr> query_base_table(query_processor& qp, service::query_state& state, const query_options& options, lw_shared_ptr command, lowres_clock::time_point timeout, std::vector partition_ranges) const; }; } }