The execution of SELECT statements with ANN ordering (vector search) was previously implemented within `indexed_table_select_statement`. This was not ideal, as vector search logic is independent of secondary index selects. This resulted in unnecessary complexity because vector search queries don't use features like aggregates or paging. More importantly, `indexed_table_select_statement` assumed a non-null `view_schema` pointer, which doesn't hold for vector indexes (where `view_ptr` is null). This caused null pointer dereferences during ANN ordered selects, leading to crashes (VECTOR-179). Other parts of the class still dereference `view_schema` without null checks. Moving the vector search select logic out of `indexed_table_select_statement` simplifies the code and prevents these null pointer dereferences.
186 lines
7.4 KiB
C++
186 lines
7.4 KiB
C++
/*
|
|
* Copyright (C) 2015-present ScyllaDB
|
|
*
|
|
* Modified by ScyllaDB
|
|
*/
|
|
|
|
/*
|
|
* SPDX-License-Identifier: (LicenseRef-ScyllaDB-Source-Available-1.0 and Apache-2.0)
|
|
*/
|
|
|
|
#pragma once
|
|
|
|
#include "cql3/statements/raw/cf_statement.hh"
|
|
#include "cql3/statements/prepared_statement.hh"
|
|
#include "cql3/restrictions/statement_restrictions.hh"
|
|
#include "cql3/attributes.hh"
|
|
#include "db/tri_mode_restriction.hh"
|
|
#include <seastar/core/shared_ptr.hh>
|
|
|
|
namespace cql3 {
|
|
|
|
namespace selection {
|
|
class selection;
|
|
class raw_selector;
|
|
class prepared_selector;
|
|
} // namespace selection
|
|
|
|
namespace statements {
|
|
|
|
namespace raw {
|
|
|
|
/**
|
|
* Encapsulates a completely parsed SELECT query, including the target
|
|
* column family, expression, result count, and ordering clause.
|
|
*
|
|
*/
|
|
class select_statement : public cf_statement
|
|
{
|
|
public:
|
|
// Ordering of selected values as defined by the basic comparison order.
|
|
// Even for a column that by default has ordering 4, 3, 2, 1 ordering it in ascending order will result in 1, 2, 3, 4.
|
|
enum class ordering {
|
|
ascending,
|
|
descending
|
|
};
|
|
// Vector of floats with dimension the same as the vector indexed column.
|
|
// This vector is the target for the nearest neighbors in ANN queries.
|
|
using ann_vector = expr::expression;
|
|
using ordering_type = std::variant<ordering, ann_vector>;
|
|
class parameters final {
|
|
public:
|
|
using orderings_type = std::vector<std::pair<shared_ptr<column_identifier::raw>, ordering_type>>;
|
|
enum class statement_subtype { REGULAR, JSON, PRUNE_MATERIALIZED_VIEW, MUTATION_FRAGMENTS };
|
|
private:
|
|
const orderings_type _orderings;
|
|
const bool _is_distinct;
|
|
const bool _allow_filtering;
|
|
const statement_subtype _statement_subtype;
|
|
bool _bypass_cache = false;
|
|
public:
|
|
parameters();
|
|
parameters(orderings_type orderings,
|
|
bool is_distinct,
|
|
bool allow_filtering);
|
|
parameters(orderings_type orderings,
|
|
bool is_distinct,
|
|
bool allow_filtering,
|
|
statement_subtype statement_subtype,
|
|
bool bypass_cache);
|
|
bool is_distinct() const;
|
|
bool allow_filtering() const;
|
|
bool is_json() const;
|
|
bool is_mutation_fragments() const;
|
|
bool bypass_cache() const;
|
|
bool is_prune_materialized_view() const;
|
|
orderings_type const& orderings() const;
|
|
};
|
|
template<typename T>
|
|
using compare_fn = std::function<bool(const T&, const T&)>;
|
|
|
|
using result_row_type = std::vector<managed_bytes_opt>;
|
|
using prepared_ann_ordering_type = std::pair<const column_definition*, expr::expression>;
|
|
using ordering_comparator_type = compare_fn<result_row_type>;
|
|
protected:
|
|
virtual audit::statement_category category() const override;
|
|
private:
|
|
using prepared_orderings_type = std::vector<std::pair<const column_definition*, ordering_type>>;
|
|
private:
|
|
lw_shared_ptr<const parameters> _parameters;
|
|
std::vector<::shared_ptr<selection::raw_selector>> _select_clause;
|
|
expr::expression _where_clause;
|
|
std::optional<expr::expression> _limit;
|
|
std::optional<expr::expression> _per_partition_limit;
|
|
std::vector<::shared_ptr<cql3::column_identifier::raw>> _group_by_columns;
|
|
std::unique_ptr<cql3::attributes::raw> _attrs;
|
|
public:
|
|
select_statement(cf_name cf_name,
|
|
lw_shared_ptr<const parameters> parameters,
|
|
std::vector<::shared_ptr<selection::raw_selector>> select_clause,
|
|
expr::expression where_clause,
|
|
std::optional<expr::expression> limit,
|
|
std::optional<expr::expression> per_partition_limit,
|
|
std::vector<::shared_ptr<cql3::column_identifier::raw>> group_by_columns,
|
|
std::unique_ptr<cql3::attributes::raw> attrs);
|
|
|
|
virtual std::unique_ptr<prepared_statement> prepare(data_dictionary::database db, cql_stats& stats) override {
|
|
return prepare(db, stats, false);
|
|
}
|
|
std::unique_ptr<prepared_statement> prepare(data_dictionary::database db, cql_stats& stats, bool for_view);
|
|
private:
|
|
std::vector<selection::prepared_selector> maybe_jsonize_select_clause(std::vector<selection::prepared_selector> select, data_dictionary::database db, schema_ptr schema);
|
|
::shared_ptr<restrictions::statement_restrictions> prepare_restrictions(
|
|
data_dictionary::database db,
|
|
schema_ptr schema,
|
|
prepare_context& ctx,
|
|
::shared_ptr<selection::selection> selection,
|
|
bool for_view = false,
|
|
bool allow_filtering = false,
|
|
restrictions::check_indexes do_check_indexes = restrictions::check_indexes::yes);
|
|
|
|
/** Returns an expression for the limit or nullopt if no limit is set */
|
|
std::optional<expr::expression> prepare_limit(data_dictionary::database db, prepare_context& ctx, const std::optional<expr::expression>& limit);
|
|
|
|
// Checks whether it is legal to have ORDER BY in this statement
|
|
static void verify_ordering_is_allowed(const parameters& params, const restrictions::statement_restrictions& restrictions);
|
|
|
|
void handle_unrecognized_ordering_column(const column_identifier& column) const;
|
|
|
|
// Processes ORDER BY column orderings, converts column_identifiers to column_defintions
|
|
prepared_orderings_type prepare_orderings(const schema& schema) const;
|
|
|
|
void verify_ordering_is_valid(const prepared_orderings_type&, const schema&, const restrictions::statement_restrictions& restrictions) const;
|
|
|
|
prepared_ann_ordering_type prepare_ann_ordering(const schema& schema, prepare_context& ctx, data_dictionary::database db) const;
|
|
|
|
// Checks whether this ordering reverses all results.
|
|
// We only allow leaving select results unchanged or reversing them.
|
|
bool is_ordering_reversed(const prepared_orderings_type&) const;
|
|
|
|
select_statement::ordering_comparator_type get_ordering_comparator(
|
|
const prepared_orderings_type&,
|
|
selection::selection& selection,
|
|
const restrictions::statement_restrictions& restrictions);
|
|
|
|
void validate_distinct_selection(const schema& schema,
|
|
const selection::selection& selection,
|
|
const restrictions::statement_restrictions& restrictions) const;
|
|
|
|
/** If ALLOW FILTERING was not specified, this verifies that it is not needed */
|
|
void check_needs_filtering(
|
|
const restrictions::statement_restrictions& restrictions,
|
|
db::tri_mode_restriction_t::mode strict_allow_filtering,
|
|
std::vector<sstring>& warnings);
|
|
|
|
void ensure_filtering_columns_retrieval(data_dictionary::database db,
|
|
selection::selection& selection,
|
|
const restrictions::statement_restrictions& restrictions);
|
|
|
|
/// Returns indices of GROUP BY cells in fetched rows.
|
|
std::vector<size_t> prepare_group_by(const schema& schema, selection::selection& selection) const;
|
|
|
|
bool contains_alias(const column_identifier& name) const;
|
|
|
|
lw_shared_ptr<column_specification> limit_receiver(bool per_partition = false);
|
|
|
|
#if 0
|
|
public:
|
|
virtual sstring to_string() override {
|
|
return sstring("raw_statement(")
|
|
+ "name=" + cf_name->to_string()
|
|
+ ", selectClause=" + to_string(_select_clause)
|
|
+ ", whereClause=" + to_string(_where_clause)
|
|
+ ", isDistinct=" + to_string(_parameters->is_distinct())
|
|
+ ", isJson=" + to_string(_parameters->is_json())
|
|
+ ")";
|
|
}
|
|
};
|
|
#endif
|
|
};
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|