Files
scylladb/cql3/statements/select_statement.hh
Szymon Malewski 29d090845a vector_index: rescoring: Fetch oversampled rows
So far with oversampling the extended set of keys was returned from VS,
but query to the base table was still limited by the query `limit`.
Now for rescoring we want to fetch rows for all the keys returned from VS.
However later we need to restore the command limit, to trim result_set accordingly.
For non-rescoring scenarios we trim directly keys set returned from VS if it happens to exceed query limit.

With this change rescoring validation tests (except `no_nulls_in_rescored_results`) pass fully.

Fixes https://scylladb.atlassian.net/browse/SCYLLADB-83
2026-01-22 15:38:44 +01:00

408 lines
21 KiB
C++

/*
* Copyright (C) 2015-present ScyllaDB
*
* Modified by ScyllaDB
*/
/*
* SPDX-License-Identifier: (LicenseRef-ScyllaDB-Source-Available-1.0 and Apache-2.0)
*/
#pragma once
#include "cql3/statements/raw/select_statement.hh"
#include "cql3/expr/unset.hh"
#include "cql3/cql_statement.hh"
#include "cql3/stats.hh"
#include <seastar/core/shared_ptr.hh>
#include <string_view>
#include "transport/messages/result_message.hh"
#include "index/secondary_index_manager.hh"
#include "exceptions/coordinator_result.hh"
#include "locator/host_id.hh"
#include "service/cas_shard.hh"
#include "vector_search/vector_store_client.hh"
#include "vector_search/filter.hh"
namespace service {
class client_state;
class storage_proxy;
class storage_proxy_coordinator_query_options;
class storage_proxy_coordinator_query_result;
} // namespace service
namespace cql3 {
class query_processor;
namespace selection {
class selection;
} // namespace selection
namespace restrictions {
class restrictions;
class statement_restrictions;
} // namespace restrictions
namespace statements {
/// Encapsulates a partition key and clustering key prefix as a primary key.
struct primary_key {
dht::decorated_key partition;
clustering_key_prefix clustering;
};
/**
* Encapsulates a completely parsed SELECT query, including the target
* column family, expression, result count, and ordering clause.
*
*/
class select_statement : public cql_statement {
public:
template<typename T>
using coordinator_result = exceptions::coordinator_result<T>;
using parameters = raw::select_statement::parameters;
using ordering_comparator_type = raw::select_statement::ordering_comparator_type;
using prepared_ann_ordering_type = raw::select_statement::prepared_ann_ordering_type;
bool _may_use_token_aware_routing;
protected:
static thread_local const lw_shared_ptr<const parameters> _default_parameters;
schema_ptr _schema;
schema_ptr _query_schema;
uint32_t _bound_terms;
lw_shared_ptr<const parameters> _parameters;
::shared_ptr<selection::selection> _selection;
const ::shared_ptr<const restrictions::statement_restrictions> _restrictions;
const bool _restrictions_need_filtering;
::shared_ptr<std::vector<size_t>> _group_by_cell_indices; ///< Indices in result row of cells holding GROUP BY values.
bool _is_reversed;
expr::unset_bind_variable_guard _limit_unset_guard;
std::optional<expr::expression> _limit;
expr::unset_bind_variable_guard _per_partition_limit_unset_guard;
std::optional<expr::expression> _per_partition_limit;
template<typename T>
using compare_fn = raw::select_statement::compare_fn<T>;
using result_row_type = raw::select_statement::result_row_type;
/**
* The comparator used to orders results when multiple keys are selected (using IN).
*/
ordering_comparator_type _ordering_comparator;
query::partition_slice::option_set _opts;
cql_stats& _stats;
const ks_selector _ks_sel;
bool _range_scan = false;
bool _range_scan_no_bypass_cache = false;
std::unique_ptr<cql3::attributes> _attrs;
private:
future<shared_ptr<cql_transport::messages::result_message>> process_results_complex(foreign_ptr<lw_shared_ptr<query::result>> results,
lw_shared_ptr<query::read_command> cmd, const query_options& options, gc_clock::time_point now) const;
protected :
virtual future<::shared_ptr<cql_transport::messages::result_message>> do_execute(query_processor& qp,
service::query_state& state, const query_options& options) const;
friend class select_statement_executor;
public:
select_statement(schema_ptr schema,
uint32_t bound_terms,
lw_shared_ptr<const parameters> parameters,
::shared_ptr<selection::selection> selection,
::shared_ptr<const restrictions::statement_restrictions> restrictions,
::shared_ptr<std::vector<size_t>> group_by_cell_indices,
bool is_reversed,
ordering_comparator_type ordering_comparator,
std::optional<expr::expression> limit,
std::optional<expr::expression> per_partition_limit,
cql_stats& stats,
std::unique_ptr<cql3::attributes> attrs);
virtual ::shared_ptr<const cql3::metadata> get_result_metadata() const override;
virtual uint32_t get_bound_terms() const override;
virtual future<> check_access(query_processor& qp, const service::client_state& state) const override;
virtual bool depends_on(std::string_view ks_name, std::optional<std::string_view> cf_name) const override;
virtual future<::shared_ptr<cql_transport::messages::result_message>> execute(query_processor& qp,
service::query_state& state, const query_options& options, std::optional<service::group0_guard> guard) const override;
virtual future<::shared_ptr<cql_transport::messages::result_message>>
execute_without_checking_exception_message(query_processor& qp, service::query_state& qs, const query_options& options, std::optional<service::group0_guard> guard) const override;
future<::shared_ptr<cql_transport::messages::result_message>> execute_non_aggregate_unpaged(query_processor& qp,
lw_shared_ptr<query::read_command> cmd, dht::partition_range_vector&& partition_ranges, service::query_state& state,
const query_options& options, gc_clock::time_point now) const;
future<::shared_ptr<cql_transport::messages::result_message>> execute_without_checking_exception_message_non_aggregate_unpaged(query_processor& qp,
lw_shared_ptr<query::read_command> cmd, dht::partition_range_vector&& partition_ranges, service::query_state& state,
const query_options& options, gc_clock::time_point now,
std::optional<service::cas_shard> cas_shard) const;
future<::shared_ptr<cql_transport::messages::result_message>> execute_without_checking_exception_message_aggregate_or_paged(query_processor& qp,
lw_shared_ptr<query::read_command> cmd, dht::partition_range_vector&& partition_ranges, service::query_state& state,
const query_options& options, gc_clock::time_point now, int32_t page_size, bool aggregate, bool nonpaged_filtering, uint64_t limit,
std::optional<service::cas_shard> cas_shard) const;
future<shared_ptr<cql_transport::messages::result_message>> process_results(foreign_ptr<lw_shared_ptr<query::result>> results,
lw_shared_ptr<query::read_command> cmd, const query_options& options, gc_clock::time_point now) const;
const sstring& keyspace() const;
const sstring& column_family() const;
query::partition_slice make_partition_slice(const query_options& options) const;
const ::shared_ptr<const restrictions::statement_restrictions> get_restrictions() const;
bool has_group_by() const { return _group_by_cell_indices && !_group_by_cell_indices->empty(); }
db::timeout_clock::duration get_timeout(const service::client_state& state, const query_options& options) const;
protected:
uint64_t get_limit(const query_options& options, const std::optional<expr::expression>& limit, bool is_per_partition_limit = false) const;
static uint64_t get_inner_loop_limit(uint64_t limit, bool is_aggregate);
bool needs_post_query_ordering() const;
virtual void update_stats_rows_read(int64_t rows_read) const {
_stats.rows_read += rows_read;
}
};
class primary_key_select_statement : public select_statement {
public:
primary_key_select_statement(schema_ptr schema,
uint32_t bound_terms,
lw_shared_ptr<const parameters> parameters,
::shared_ptr<selection::selection> selection,
::shared_ptr<const restrictions::statement_restrictions> restrictions,
::shared_ptr<std::vector<size_t>> group_by_cell_indices,
bool is_reversed,
ordering_comparator_type ordering_comparator,
std::optional<expr::expression> limit,
std::optional<expr::expression> per_partition_limit,
cql_stats &stats,
std::unique_ptr<cql3::attributes> attrs);
};
class view_indexed_table_select_statement : public select_statement {
secondary_index::index _index;
expr::expression _used_index_restrictions;
schema_ptr _view_schema;
noncopyable_function<dht::partition_range_vector(const query_options&)> _get_partition_ranges_for_posting_list;
noncopyable_function<query::partition_slice(const query_options&)> _get_partition_slice_for_posting_list;
public:
static constexpr size_t max_base_table_query_concurrency = 4096;
static ::shared_ptr<cql3::statements::select_statement> prepare(data_dictionary::database db,
schema_ptr schema,
uint32_t bound_terms,
lw_shared_ptr<const parameters> parameters,
::shared_ptr<selection::selection> selection,
::shared_ptr<restrictions::statement_restrictions> restrictions,
::shared_ptr<std::vector<size_t>> group_by_cell_indices,
bool is_reversed,
ordering_comparator_type ordering_comparator,
std::optional<expr::expression> limit,
std::optional<expr::expression> per_partition_limit,
cql_stats &stats,
std::unique_ptr<cql3::attributes> attrs);
view_indexed_table_select_statement(schema_ptr schema,
uint32_t bound_terms,
lw_shared_ptr<const parameters> parameters,
::shared_ptr<selection::selection> selection,
::shared_ptr<const restrictions::statement_restrictions> restrictions,
::shared_ptr<std::vector<size_t>> group_by_cell_indices,
bool is_reversed,
ordering_comparator_type ordering_comparator,
std::optional<expr::expression> limit,
std::optional<expr::expression> per_partition_limit,
cql_stats &stats,
const secondary_index::index& index,
expr::expression used_index_restrictions,
schema_ptr view_schema,
std::unique_ptr<cql3::attributes> attrs);
private:
virtual future<::shared_ptr<cql_transport::messages::result_message>> do_execute(query_processor& qp,
service::query_state& state, const query_options& options) const override;
future<::shared_ptr<cql_transport::messages::result_message>> actually_do_execute(query_processor& qp,
service::query_state& state, const query_options& options) const;
lw_shared_ptr<const service::pager::paging_state> generate_view_paging_state_from_base_query_results(lw_shared_ptr<const service::pager::paging_state> paging_state,
const foreign_ptr<lw_shared_ptr<query::result>>& results, service::query_state& state, const query_options& options, uint32_t internal_page_size) const;
future<coordinator_result<std::tuple<dht::partition_range_vector, lw_shared_ptr<const service::pager::paging_state>>>> find_index_partition_ranges(query_processor& qp,
service::query_state& state,
const query_options& options) const;
future<coordinator_result<std::tuple<std::vector<primary_key>, lw_shared_ptr<const service::pager::paging_state>>>> find_index_clustering_rows(query_processor& qp,
service::query_state& state,
const query_options& options) const;
future<shared_ptr<cql_transport::messages::result_message>>
process_base_query_results(
foreign_ptr<lw_shared_ptr<query::result>> results,
lw_shared_ptr<query::read_command> cmd,
service::query_state& state,
const query_options& options,
gc_clock::time_point now,
lw_shared_ptr<const service::pager::paging_state> paging_state,
uint32_t internal_page_size) const;
lw_shared_ptr<query::read_command>
prepare_command_for_base_query(query_processor& qp, const query_options& options, service::query_state& state, gc_clock::time_point now,
bool use_paging) const;
future<coordinator_result<std::tuple<foreign_ptr<lw_shared_ptr<query::result>>, lw_shared_ptr<query::read_command>>>>
do_execute_base_query(
query_processor& qp,
dht::partition_range_vector&& partition_ranges,
service::query_state& state,
const query_options& options,
gc_clock::time_point now,
lw_shared_ptr<const service::pager::paging_state> paging_state) const;
future<shared_ptr<cql_transport::messages::result_message>>
execute_base_query(
query_processor& qp,
dht::partition_range_vector&& partition_ranges,
service::query_state& state,
const query_options& options,
gc_clock::time_point now,
lw_shared_ptr<const service::pager::paging_state> paging_state) const;
// Function for fetching the selected columns from a list of clustering rows.
// It is currently used only in our Secondary Index implementation - ordinary
// CQL SELECT statements do not have the syntax to request a list of rows.
// FIXME: The current implementation is very inefficient - it requests each
// row separately (and, incrementally, in parallel). Even multiple rows from a single
// partition are requested separately. This last case can be easily improved,
// but to implement the general case (multiple rows from multiple partitions)
// efficiently, we will need more support from other layers.
// Keys are ordered in token order (see #3423)
future<coordinator_result<std::tuple<foreign_ptr<lw_shared_ptr<query::result>>, lw_shared_ptr<query::read_command>>>>
do_execute_base_query(
query_processor& qp,
std::vector<primary_key>&& primary_keys,
service::query_state& state,
const query_options& options,
gc_clock::time_point now,
lw_shared_ptr<const service::pager::paging_state> paging_state) const;
future<shared_ptr<cql_transport::messages::result_message>>
execute_base_query(
query_processor& qp,
std::vector<primary_key>&& primary_keys,
service::query_state& state,
const query_options& options,
gc_clock::time_point now,
lw_shared_ptr<const service::pager::paging_state> paging_state) const;
virtual void update_stats_rows_read(int64_t rows_read) const override {
_stats.rows_read += rows_read;
_stats.secondary_index_rows_read += rows_read;
}
future<coordinator_result<::shared_ptr<cql_transport::messages::result_message::rows>>> read_posting_list(
query_processor& qp,
const query_options& options,
uint64_t limit,
service::query_state& state,
gc_clock::time_point now,
db::timeout_clock::time_point timeout,
bool include_base_clustering_key) const;
dht::partition_range_vector get_partition_ranges_for_local_index_posting_list(const query_options& options) const;
dht::partition_range_vector get_partition_ranges_for_global_index_posting_list(const query_options& options) const;
query::partition_slice get_partition_slice_for_local_index_posting_list(const query_options& options) const;
query::partition_slice get_partition_slice_for_global_index_posting_list(const query_options& options) const;
bytes compute_idx_token(const partition_key& key) const;
};
class mutation_fragments_select_statement : public select_statement {
schema_ptr _underlying_schema;
public:
mutation_fragments_select_statement(
schema_ptr output_schema,
schema_ptr underlying_schema,
uint32_t bound_terms,
lw_shared_ptr<const parameters> parameters,
::shared_ptr<selection::selection> selection,
::shared_ptr<const restrictions::statement_restrictions> restrictions,
::shared_ptr<std::vector<size_t>> group_by_cell_indices,
bool is_reversed,
ordering_comparator_type ordering_comparator,
std::optional<expr::expression> limit,
std::optional<expr::expression> per_partition_limit,
cql_stats &stats,
std::unique_ptr<cql3::attributes> attrs);
// This statement has a schema that is different from that of the underlying table.
static schema_ptr generate_output_schema(schema_ptr underlying_schema);
private:
future<exceptions::coordinator_result<service::storage_proxy_coordinator_query_result>>
do_query(
locator::effective_replication_map_ptr erm_keepalive,
locator::host_id this_node,
service::storage_proxy& sp,
schema_ptr schema,
lw_shared_ptr<query::read_command> cmd,
dht::partition_range_vector partition_ranges,
db::consistency_level cl,
service::storage_proxy_coordinator_query_options optional_params) const;
virtual future<::shared_ptr<cql_transport::messages::result_message>> do_execute(query_processor& qp,
service::query_state& state, const query_options& options) const override;
};
class vector_indexed_table_select_statement : public select_statement {
secondary_index::index _index;
prepared_ann_ordering_type _prepared_ann_ordering;
vector_search::prepared_filter _prepared_filter;
mutable gc_clock::time_point _query_start_time_point;
public:
static constexpr size_t max_ann_query_limit = 1000;
static ::shared_ptr<cql3::statements::select_statement> prepare(data_dictionary::database db, schema_ptr schema, uint32_t bound_terms,
lw_shared_ptr<const parameters> parameters, ::shared_ptr<selection::selection> selection,
::shared_ptr<restrictions::statement_restrictions> restrictions, ::shared_ptr<std::vector<size_t>> group_by_cell_indices, bool is_reversed,
ordering_comparator_type ordering_comparator, prepared_ann_ordering_type prepared_ann_ordering, std::optional<expr::expression> limit,
std::optional<expr::expression> per_partition_limit, cql_stats& stats, const secondary_index::index& index, std::unique_ptr<cql3::attributes> attrs);
vector_indexed_table_select_statement(schema_ptr schema, uint32_t bound_terms, lw_shared_ptr<const parameters> parameters,
::shared_ptr<selection::selection> selection, ::shared_ptr<const restrictions::statement_restrictions> restrictions,
::shared_ptr<std::vector<size_t>> group_by_cell_indices, bool is_reversed, ordering_comparator_type ordering_comparator,
prepared_ann_ordering_type prepared_ann_ordering, std::optional<expr::expression> limit, std::optional<expr::expression> per_partition_limit,
cql_stats& stats, const secondary_index::index& index, vector_search::prepared_filter prepared_filter, std::unique_ptr<cql3::attributes> attrs);
private:
future<::shared_ptr<cql_transport::messages::result_message>> do_execute(
query_processor& qp, service::query_state& state, const query_options& options) const override;
void update_stats() const;
lw_shared_ptr<query::read_command> prepare_command_for_base_query(query_processor& qp, service::query_state& state, const query_options& options, uint64_t fetch_limit) const;
std::vector<float> get_ann_ordering_vector(const query_options& options) const;
future<::shared_ptr<cql_transport::messages::result_message>> query_base_table(query_processor& qp, service::query_state& state,
const query_options& options, const std::vector<vector_search::primary_key>& pkeys, lowres_clock::time_point timeout) const;
future<::shared_ptr<cql_transport::messages::result_message>> query_base_table(query_processor& qp, service::query_state& state,
const query_options& options, lw_shared_ptr<query::read_command> command, lowres_clock::time_point timeout,
const std::vector<vector_search::primary_key>& pkeys) const;
future<::shared_ptr<cql_transport::messages::result_message>> query_base_table(query_processor& qp, service::query_state& state,
const query_options& options, lw_shared_ptr<query::read_command> command, lowres_clock::time_point timeout,
std::vector<dht::partition_range> partition_ranges) const;
};
}
}