Files
scylladb/index/fulltext_index.cc
Dawid Pawlik 9e02e11ea8 fulltext_index: enforce CDC requirements for fulltext indexes
Fulltext indexes rely on CDC to track changes for asynchronous index
building. Enforce the following CDC constraints during CREATE INDEX:
- CDC TTL must be at least 86400 seconds (24 hours)
- CDC delta mode must be 'full' or postimage must be enabled

Add `has_fulltext_index()` and `check_cdc_options()` so that other
modules can detect fulltext indexes and validate CDC settings:
- include fulltext indexes in `cdc_enabled()` so the CDC log
  is auto-created, and validate CDC options in
  `on_before_update_column_family()`
- block `ALTER TABLE ... WITH cdc = {'enabled': false}`
  when a fulltext index exists on the table
2026-05-19 08:52:47 +02:00

90 lines
3.9 KiB
C++

/*
* Copyright 2026-present ScyllaDB
*/
/*
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.1
*/
#include "cql3/statements/index_target.hh"
#include "cql3/util.hh"
#include "exceptions/exceptions.hh"
#include "schema/schema.hh"
#include "index/fulltext_index.hh"
#include "index/index_option_utils.hh"
#include "index/secondary_index_manager.hh"
#include <seastar/core/sstring.hh>
#include <boost/algorithm/string.hpp>
namespace secondary_index {
// Supported text analyzers for fulltext indexing.
// This list corresponds to analyzers expected to be provided
// by the backend search engine (Tantivy).
static const std::vector<sstring> analyzer_values = {
"standard", "english", "german", "french", "spanish", "italian", "portuguese", "russian", "simple", "whitespace"};
const static std::unordered_map<sstring, std::function<void(std::string_view, const sstring&, const sstring&)>> fulltext_index_options = {
// 'analyzer' specifies the built-in text analyzer to use for tokenization.
{"analyzer", std::bind_front(util::validate_enumerated_option, analyzer_values)},
// 'positions' controls whether token positions are stored in the index.
// Required for phrase queries. Set to false to save space.
{"positions", std::bind_front(util::validate_enumerated_option, util::boolean_values)},
};
std::optional<cql3::description> fulltext_index::describe(const index_metadata& im, const schema& base_schema) const {
auto target = im.options().at(cql3::statements::index_target::target_option_name);
auto target_column = cql3::statements::index_target::column_name_from_target_string(target);
return describe_with_target(im, base_schema, cql3::util::maybe_quote(target_column));
}
void fulltext_index::check_target(const schema& schema, const std::vector<::shared_ptr<cql3::statements::index_target>>& targets) const {
using cql3::statements::index_target;
if (targets.size() != 1) {
throw exceptions::invalid_request_exception("Fulltext index must have exactly one target column");
}
auto& target = targets[0];
if (!std::holds_alternative<index_target::single_column>(target->value)) {
throw exceptions::invalid_request_exception("Fulltext index target must be a single column");
}
auto& column = std::get<index_target::single_column>(target->value);
auto c_name = column->to_string();
auto const* c_def = schema.get_column_definition(column->name());
if (c_def == nullptr) {
throw exceptions::invalid_request_exception(format("Column {} not found in schema", c_name));
}
auto kind = c_def->type->get_kind();
if (kind != abstract_type::kind::utf8 && kind != abstract_type::kind::ascii) {
throw exceptions::invalid_request_exception(
format("Fulltext index is only supported on text, varchar, or ascii columns, but column {} has an incompatible type", c_name));
}
}
void fulltext_index::check_index_options(const cql3::statements::index_specific_prop_defs& properties) const {
for (auto option : properties.get_raw_options()) {
auto it = fulltext_index_options.find(option.first);
if (it == fulltext_index_options.end()) {
throw exceptions::invalid_request_exception(format("Unsupported option {} for fulltext index", option.first));
}
it->second(index_type_name(), option.first, option.second);
}
}
void fulltext_index::validate(const schema& schema, const cql3::statements::index_specific_prop_defs& properties,
const std::vector<::shared_ptr<cql3::statements::index_target>>& targets, const gms::feature_service&, const data_dictionary::database& db) const {
check_uses_tablets(schema, db);
check_target(schema, targets);
check_cdc_options(schema);
check_index_options(properties);
}
std::unique_ptr<secondary_index::custom_index> fulltext_index_factory() {
return std::make_unique<fulltext_index>();
}
} // namespace secondary_index