Compare commits
152 Commits
debug_form
...
scylla-202
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
1a896169dc | ||
|
|
9ccad33e59 | ||
|
|
a17dd4d4c9 | ||
|
|
a2a4c6e4b2 | ||
|
|
64182d9df6 | ||
|
|
8e85ef90d2 | ||
|
|
b5b2ffa5df | ||
|
|
b7f067ce33 | ||
|
|
307f00a398 | ||
|
|
c56e47f72f | ||
|
|
51ee15f02d | ||
|
|
36bb089663 | ||
|
|
1661b35050 | ||
|
|
8c93a331f7 | ||
|
|
85c161b9f1 | ||
|
|
d088cc8a2d | ||
|
|
39c20144e5 | ||
|
|
f1e7cee7a5 | ||
|
|
023b27312d | ||
|
|
2ffbc81e19 | ||
|
|
88e548ed72 | ||
|
|
975882a489 | ||
|
|
3653662099 | ||
|
|
7336bb38fa | ||
|
|
cff90755d8 | ||
|
|
3be469da29 | ||
|
|
1895724465 | ||
|
|
9dca28d2b8 | ||
|
|
bc98301783 | ||
|
|
220bbcf329 | ||
|
|
ecdefe801c | ||
|
|
af2215c2d2 | ||
|
|
864528eb9b | ||
|
|
5153b91514 | ||
|
|
0a0347cb4e | ||
|
|
da64c02b92 | ||
|
|
39aabe5191 | ||
|
|
9eeff8573b | ||
|
|
4115f6f367 | ||
|
|
fb2c46dfbe | ||
|
|
b4e37600d6 | ||
|
|
6bbf20a440 | ||
|
|
b8797551eb | ||
|
|
b1cf1890a9 | ||
|
|
2f0ebe9f49 | ||
|
|
3633fb9ff8 | ||
|
|
33b5f27057 | ||
|
|
11ecc886c3 | ||
|
|
eb147ec564 | ||
|
|
637e5fc9b5 | ||
|
|
0d77754c63 | ||
|
|
5481c9aedd | ||
|
|
59db708cba | ||
|
|
28690f8203 | ||
|
|
235c859b98 | ||
|
|
5453e85f39 | ||
|
|
7a6bcb3a3f | ||
|
|
8b2a382eb6 | ||
|
|
cdd51d8b7a | ||
|
|
88a8d140b3 | ||
|
|
1957dac2b4 | ||
|
|
1091ef89e1 | ||
|
|
b081e07ffa | ||
|
|
1f102ca2f7 | ||
|
|
8a98f0d5b6 | ||
|
|
cdae92065b | ||
|
|
4813c48d64 | ||
|
|
b623b108c3 | ||
|
|
7fdc7bdc4b | ||
|
|
c2e926850d | ||
|
|
6d5b029812 | ||
|
|
ffeb55cf77 | ||
|
|
37aa7c216c | ||
|
|
0b0e9f0c32 | ||
|
|
871fabd60a | ||
|
|
67b7ea12a2 | ||
|
|
d74bb95f54 | ||
|
|
98977e9465 | ||
|
|
e84376c9dc | ||
|
|
79556be7a7 | ||
|
|
fe0740ff56 | ||
|
|
aa5cb15166 | ||
|
|
13d79ba990 | ||
|
|
35b410326b | ||
|
|
12a3fcceae | ||
|
|
040c59674a | ||
|
|
b50a6657e8 | ||
|
|
93479ffcf9 | ||
|
|
38bd74b2d4 | ||
|
|
6ee1779578 | ||
|
|
9fe2301647 | ||
|
|
6b27459de3 | ||
|
|
48130ca2e9 | ||
|
|
5054087f0b | ||
|
|
889fb9c18b | ||
|
|
c627aff5f7 | ||
|
|
ffca4a9f85 | ||
|
|
2c0ffdce31 | ||
|
|
ff7e93ddd5 | ||
|
|
1998733228 | ||
|
|
e79ee2ddb0 | ||
|
|
4c39943b3f | ||
|
|
17c86f8b57 | ||
|
|
d05b3897a2 | ||
|
|
9116fc635e | ||
|
|
5f74b5fdff | ||
|
|
a746fd2bb8 | ||
|
|
89a5889bed | ||
|
|
8d1f6df818 | ||
|
|
75320c9a13 | ||
|
|
359af0ae9c | ||
|
|
7f350558c2 | ||
|
|
fa9b1800b6 | ||
|
|
c25d447b9c | ||
|
|
cf147d8f85 | ||
|
|
319626e941 | ||
|
|
cca2d974b6 | ||
|
|
43f2e5f86b | ||
|
|
ad81d49923 | ||
|
|
138c68d80e | ||
|
|
e0fb727f18 | ||
|
|
440833ae59 | ||
|
|
246635c426 | ||
|
|
58eda6670f | ||
|
|
28b8896680 | ||
|
|
e9cae4be17 | ||
|
|
daf1c96ad3 | ||
|
|
1a1893078a | ||
|
|
8cc5566a3c | ||
|
|
1f52ced2ff | ||
|
|
d7e3ab2226 | ||
|
|
cf589222a0 | ||
|
|
156800a3dd | ||
|
|
d1e8b02260 | ||
|
|
a51888694e | ||
|
|
68f134ee23 | ||
|
|
b623c237bc | ||
|
|
8379d545c5 | ||
|
|
58d13d0daf | ||
|
|
4def507b1b | ||
|
|
69ad9350cc | ||
|
|
29e5f5f54d | ||
|
|
379b3fa46c | ||
|
|
fe869fd902 | ||
|
|
dc55a566fa | ||
|
|
3bb8039359 | ||
|
|
9f3838e614 | ||
|
|
366212f997 | ||
|
|
c0637aff81 | ||
|
|
dcf436eb84 | ||
|
|
8e754e9d41 | ||
|
|
f407799f25 |
20
.github/workflows/make-pr-ready-for-review.yaml
vendored
Normal file
20
.github/workflows/make-pr-ready-for-review.yaml
vendored
Normal file
@@ -0,0 +1,20 @@
|
||||
name: Mark PR as Ready When Conflicts Label is Removed
|
||||
|
||||
on:
|
||||
pull_request_target:
|
||||
types:
|
||||
- unlabeled
|
||||
|
||||
env:
|
||||
DEFAULT_BRANCH: 'master'
|
||||
|
||||
jobs:
|
||||
mark-ready:
|
||||
if: github.event.label.name == 'conflicts'
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Mark pull request as ready for review
|
||||
run: gh pr ready "${{ github.event.pull_request.number }}"
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.AUTO_BACKPORT_TOKEN }}
|
||||
2
.gitmodules
vendored
2
.gitmodules
vendored
@@ -1,6 +1,6 @@
|
||||
[submodule "seastar"]
|
||||
path = seastar
|
||||
url = ../seastar
|
||||
url = ../scylla-seastar
|
||||
ignore = dirty
|
||||
[submodule "swagger-ui"]
|
||||
path = swagger-ui
|
||||
|
||||
@@ -78,7 +78,7 @@ fi
|
||||
|
||||
# Default scylla product/version tags
|
||||
PRODUCT=scylla
|
||||
VERSION=2025.1.0-dev
|
||||
VERSION=2025.1.1
|
||||
|
||||
if test -f version
|
||||
then
|
||||
|
||||
@@ -88,6 +88,9 @@ public:
|
||||
static api_error table_not_found(std::string msg) {
|
||||
return api_error("TableNotFoundException", std::move(msg));
|
||||
}
|
||||
static api_error limit_exceeded(std::string msg) {
|
||||
return api_error("LimitExceededException", std::move(msg));
|
||||
}
|
||||
static api_error internal(std::string msg) {
|
||||
return api_error("InternalServerError", std::move(msg), http::reply::status_type::internal_server_error);
|
||||
}
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
*/
|
||||
|
||||
#include <fmt/ranges.h>
|
||||
#include <seastar/core/on_internal_error.hh>
|
||||
#include "alternator/executor.hh"
|
||||
#include "alternator/consumed_capacity.hh"
|
||||
#include "auth/permission.hh"
|
||||
@@ -55,6 +56,9 @@
|
||||
#include "utils/error_injection.hh"
|
||||
#include "db/schema_tables.hh"
|
||||
#include "utils/rjson.hh"
|
||||
#include "alternator/extract_from_attrs.hh"
|
||||
#include "types/types.hh"
|
||||
#include "db/system_keyspace.hh"
|
||||
|
||||
using namespace std::chrono_literals;
|
||||
|
||||
@@ -215,7 +219,7 @@ static void validate_table_name(const std::string& name) {
|
||||
// instead of each component individually as DynamoDB does.
|
||||
// The view_name() function assumes the table_name has already been validated
|
||||
// but validates the legality of index_name and the combination of both.
|
||||
static std::string view_name(const std::string& table_name, std::string_view index_name, const std::string& delim = ":") {
|
||||
static std::string view_name(std::string_view table_name, std::string_view index_name, const std::string& delim = ":") {
|
||||
if (index_name.length() < 3) {
|
||||
throw api_error::validation("IndexName must be at least 3 characters long");
|
||||
}
|
||||
@@ -223,7 +227,7 @@ static std::string view_name(const std::string& table_name, std::string_view ind
|
||||
throw api_error::validation(
|
||||
fmt::format("IndexName '{}' must satisfy regular expression pattern: [a-zA-Z0-9_.-]+", index_name));
|
||||
}
|
||||
std::string ret = table_name + delim + std::string(index_name);
|
||||
std::string ret = std::string(table_name) + delim + std::string(index_name);
|
||||
if (ret.length() > max_table_name_length) {
|
||||
throw api_error::validation(
|
||||
fmt::format("The total length of TableName ('{}') and IndexName ('{}') cannot exceed {} characters",
|
||||
@@ -232,7 +236,7 @@ static std::string view_name(const std::string& table_name, std::string_view ind
|
||||
return ret;
|
||||
}
|
||||
|
||||
static std::string lsi_name(const std::string& table_name, std::string_view index_name) {
|
||||
static std::string lsi_name(std::string_view table_name, std::string_view index_name) {
|
||||
return view_name(table_name, index_name, "!:");
|
||||
}
|
||||
|
||||
@@ -469,7 +473,90 @@ static rjson::value generate_arn_for_index(const schema& schema, std::string_vie
|
||||
schema.ks_name(), schema.cf_name(), index_name));
|
||||
}
|
||||
|
||||
static rjson::value fill_table_description(schema_ptr schema, table_status tbl_status, service::storage_proxy const& proxy)
|
||||
// The following function checks if a given view has finished building.
|
||||
// We need this for describe_table() to know if a view is still backfilling,
|
||||
// or active.
|
||||
//
|
||||
// Currently we don't have in view_ptr the knowledge whether a view finished
|
||||
// building long ago - so checking this involves a somewhat inefficient, but
|
||||
// still node-local, process:
|
||||
// We need a table that can accurately tell that all nodes have finished
|
||||
// building this view. system.built_views is not good enough because it only
|
||||
// knows the view building status in the current node. In recent versions,
|
||||
// after PR #19745, we have a local table system.view_build_status_v2 with
|
||||
// global information, replacing the old system_distributed.view_build_status.
|
||||
// In theory, there can be a period during upgrading an old cluster when this
|
||||
// table is not yet available. However, since the IndexStatus is a new feature
|
||||
// too, it is acceptable that it doesn't yet work in the middle of the update.
|
||||
static future<bool> is_view_built(
|
||||
view_ptr view,
|
||||
service::storage_proxy& proxy,
|
||||
service::client_state& client_state,
|
||||
tracing::trace_state_ptr trace_state,
|
||||
service_permit permit) {
|
||||
auto schema = proxy.data_dictionary().find_table(
|
||||
"system", db::system_keyspace::VIEW_BUILD_STATUS_V2).schema();
|
||||
// The table system.view_build_status_v2 has "keyspace_name" and
|
||||
// "view_name" as the partition key, and each clustering row has
|
||||
// "host_id" as clustering key and a string "status". We need to
|
||||
// read a single partition:
|
||||
partition_key pk = partition_key::from_exploded(*schema,
|
||||
{utf8_type->decompose(view->ks_name()),
|
||||
utf8_type->decompose(view->cf_name())});
|
||||
dht::partition_range_vector partition_ranges{
|
||||
dht::partition_range(dht::decorate_key(*schema, pk))};
|
||||
auto selection = cql3::selection::selection::wildcard(schema); // only for get_query_options()!
|
||||
auto partition_slice = query::partition_slice(
|
||||
{query::clustering_range::make_open_ended_both_sides()},
|
||||
{}, // static columns
|
||||
{schema->get_column_definition("status")->id}, // regular columns
|
||||
selection->get_query_options());
|
||||
auto command = ::make_lw_shared<query::read_command>(
|
||||
schema->id(), schema->version(), partition_slice,
|
||||
proxy.get_max_result_size(partition_slice),
|
||||
query::tombstone_limit(proxy.get_tombstone_limit()));
|
||||
service::storage_proxy::coordinator_query_result qr =
|
||||
co_await proxy.query(
|
||||
schema, std::move(command), std::move(partition_ranges),
|
||||
db::consistency_level::LOCAL_ONE,
|
||||
service::storage_proxy::coordinator_query_options(
|
||||
executor::default_timeout(), std::move(permit), client_state, trace_state));
|
||||
query::result_set rs = query::result_set::from_raw_result(
|
||||
schema, partition_slice, *qr.query_result);
|
||||
std::unordered_map<locator::host_id, sstring> statuses;
|
||||
for (auto&& r : rs.rows()) {
|
||||
auto host_id = r.get<utils::UUID>("host_id");
|
||||
auto status = r.get<sstring>("status");
|
||||
if (host_id && status) {
|
||||
statuses.emplace(locator::host_id(*host_id), *status);
|
||||
}
|
||||
}
|
||||
// A view is considered "built" if all nodes reported SUCCESS in having
|
||||
// built this view. Note that we need this "SUCCESS" for all nodes in the
|
||||
// cluster - even those that are temporarily down (their success is known
|
||||
// by this node, even if they are down). Conversely, we don't care what is
|
||||
// the recorded status for any node which is no longer in the cluster - it
|
||||
// is possible we forgot to erase the status of nodes that left the
|
||||
// cluster, but here we just ignore them and look at the nodes actually
|
||||
// in the topology.
|
||||
bool all_built = true;
|
||||
auto token_metadata = proxy.get_token_metadata_ptr();
|
||||
token_metadata->get_topology().for_each_node(
|
||||
[&] (const locator::node& node) {
|
||||
// Note: we could skip nodes in DCs which have no replication of
|
||||
// this view. However, in practice even those nodes would run
|
||||
// the view building (and just see empty content) so we don't
|
||||
// need to bother with this skipping.
|
||||
auto it = statuses.find(node.host_id());
|
||||
if (it == statuses.end() || it->second != "SUCCESS") {
|
||||
all_built = false;
|
||||
}
|
||||
});
|
||||
co_return all_built;
|
||||
|
||||
}
|
||||
|
||||
static future<rjson::value> fill_table_description(schema_ptr schema, table_status tbl_status, service::storage_proxy& proxy, service::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit)
|
||||
{
|
||||
rjson::value table_description = rjson::empty_object();
|
||||
auto tags_ptr = db::get_tags_of_table(schema);
|
||||
@@ -548,7 +635,22 @@ static rjson::value fill_table_description(schema_ptr schema, table_status tbl_s
|
||||
// FIXME: we have to get ProjectionType from the schema when it is added
|
||||
rjson::add(view_entry, "Projection", std::move(projection));
|
||||
// Local secondary indexes are marked by an extra '!' sign occurring before the ':' delimiter
|
||||
rjson::value& index_array = (delim_it > 1 && cf_name[delim_it-1] == '!') ? lsi_array : gsi_array;
|
||||
bool is_lsi = (delim_it > 1 && cf_name[delim_it-1] == '!');
|
||||
// Add IndexStatus and Backfilling flags, but only for GSIs -
|
||||
// LSIs can only be created with the table itself and do not
|
||||
// have a status. Alternator schema operations are synchronous
|
||||
// so only two combinations of these flags are possible: ACTIVE
|
||||
// (for a built view) or CREATING+Backfilling (if view building
|
||||
// is in progress).
|
||||
if (!is_lsi) {
|
||||
if (co_await is_view_built(vptr, proxy, client_state, trace_state, permit)) {
|
||||
rjson::add(view_entry, "IndexStatus", "ACTIVE");
|
||||
} else {
|
||||
rjson::add(view_entry, "IndexStatus", "CREATING");
|
||||
rjson::add(view_entry, "Backfilling", rjson::value(true));
|
||||
}
|
||||
}
|
||||
rjson::value& index_array = is_lsi ? lsi_array : gsi_array;
|
||||
rjson::push_back(index_array, std::move(view_entry));
|
||||
}
|
||||
if (!lsi_array.Empty()) {
|
||||
@@ -572,7 +674,7 @@ static rjson::value fill_table_description(schema_ptr schema, table_status tbl_s
|
||||
executor::supplement_table_stream_info(table_description, *schema, proxy);
|
||||
|
||||
// FIXME: still missing some response fields (issue #5026)
|
||||
return table_description;
|
||||
co_return table_description;
|
||||
}
|
||||
|
||||
bool is_alternator_keyspace(const sstring& ks_name) {
|
||||
@@ -591,11 +693,11 @@ future<executor::request_return_type> executor::describe_table(client_state& cli
|
||||
|
||||
tracing::add_table_name(trace_state, schema->ks_name(), schema->cf_name());
|
||||
|
||||
rjson::value table_description = fill_table_description(schema, table_status::active, _proxy);
|
||||
rjson::value table_description = co_await fill_table_description(schema, table_status::active, _proxy, client_state, trace_state, permit);
|
||||
rjson::value response = rjson::empty_object();
|
||||
rjson::add(response, "Table", std::move(table_description));
|
||||
elogger.trace("returning {}", response);
|
||||
return make_ready_future<executor::request_return_type>(make_jsonable(std::move(response)));
|
||||
co_return make_jsonable(std::move(response));
|
||||
}
|
||||
|
||||
// Check CQL's Role-Based Access Control (RBAC) permission_to_check (MODIFY,
|
||||
@@ -656,7 +758,7 @@ future<executor::request_return_type> executor::delete_table(client_state& clien
|
||||
auto& p = _proxy.container();
|
||||
|
||||
schema_ptr schema = get_table(_proxy, request);
|
||||
rjson::value table_description = fill_table_description(schema, table_status::deleting, _proxy);
|
||||
rjson::value table_description = co_await fill_table_description(schema, table_status::deleting, _proxy, client_state, trace_state, permit);
|
||||
co_await verify_permission(_enforce_authorization, client_state, schema, auth::permission::DROP);
|
||||
co_await _mm.container().invoke_on(0, [&, cs = client_state.move_to_other_shard()] (service::migration_manager& mm) -> future<> {
|
||||
// FIXME: the following needs to be in a loop. If mm.announce() below
|
||||
@@ -704,7 +806,7 @@ future<executor::request_return_type> executor::delete_table(client_state& clien
|
||||
co_return make_jsonable(std::move(response));
|
||||
}
|
||||
|
||||
static data_type parse_key_type(const std::string& type) {
|
||||
static data_type parse_key_type(std::string_view type) {
|
||||
// Note that keys are only allowed to be string, blob or number (S/B/N).
|
||||
// The other types: boolean and various lists or sets - are not allowed.
|
||||
if (type.length() == 1) {
|
||||
@@ -719,7 +821,7 @@ static data_type parse_key_type(const std::string& type) {
|
||||
}
|
||||
|
||||
|
||||
static void add_column(schema_builder& builder, const std::string& name, const rjson::value& attribute_definitions, column_kind kind) {
|
||||
static void add_column(schema_builder& builder, const std::string& name, const rjson::value& attribute_definitions, column_kind kind, bool computed_column=false) {
|
||||
// FIXME: Currently, the column name ATTRS_COLUMN_NAME is not allowed
|
||||
// because we use it for our untyped attribute map, and we can't have a
|
||||
// second column with the same name. We should fix this, by renaming
|
||||
@@ -731,7 +833,16 @@ static void add_column(schema_builder& builder, const std::string& name, const r
|
||||
const rjson::value& attribute_info = *it;
|
||||
if (attribute_info["AttributeName"].GetString() == name) {
|
||||
auto type = attribute_info["AttributeType"].GetString();
|
||||
builder.with_column(to_bytes(name), parse_key_type(type), kind);
|
||||
data_type dt = parse_key_type(type);
|
||||
if (computed_column) {
|
||||
// Computed column for GSI (doesn't choose a real column as-is
|
||||
// but rather extracts a single value from the ":attrs" map)
|
||||
alternator_type at = type_info_from_string(type).atype;
|
||||
builder.with_computed_column(to_bytes(name), dt, kind,
|
||||
std::make_unique<extract_from_attrs_column_computation>(to_bytes(name), at));
|
||||
} else {
|
||||
builder.with_column(to_bytes(name), dt, kind);
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
@@ -1072,6 +1183,87 @@ static std::unordered_set<std::string> validate_attribute_definitions(const rjso
|
||||
return seen_attribute_names;
|
||||
}
|
||||
|
||||
// The following "extract_from_attrs_column_computation" implementation is
|
||||
// what allows Alternator GSIs to use in a materialized view's key a member
|
||||
// from the ":attrs" map instead of a real column in the schema:
|
||||
|
||||
const bytes extract_from_attrs_column_computation::MAP_NAME = executor::ATTRS_COLUMN_NAME;
|
||||
|
||||
column_computation_ptr extract_from_attrs_column_computation::clone() const {
|
||||
return std::make_unique<extract_from_attrs_column_computation>(*this);
|
||||
}
|
||||
|
||||
// Serialize the *definition* of this column computation into a JSON
|
||||
// string with a unique "type" string - TYPE_NAME - which then causes
|
||||
// column_computation::deserialize() to create an object from this class.
|
||||
bytes extract_from_attrs_column_computation::serialize() const {
|
||||
rjson::value ret = rjson::empty_object();
|
||||
rjson::add(ret, "type", TYPE_NAME);
|
||||
rjson::add(ret, "attr_name", rjson::from_string(to_string_view(_attr_name)));
|
||||
rjson::add(ret, "desired_type", represent_type(_desired_type).ident);
|
||||
return to_bytes(rjson::print(ret));
|
||||
}
|
||||
|
||||
// Construct an extract_from_attrs_column_computation object based on the
|
||||
// saved output of serialize(). Calls on_internal_error() if the string
|
||||
// doesn't match the expected output format of serialize(). "type" is not
|
||||
// checked - we assume the caller (column_computation::deserialize()) won't
|
||||
// call this constructor if "type" doesn't match.
|
||||
extract_from_attrs_column_computation::extract_from_attrs_column_computation(const rjson::value &v) {
|
||||
const rjson::value* attr_name = rjson::find(v, "attr_name");
|
||||
if (attr_name->IsString()) {
|
||||
_attr_name = bytes(to_bytes_view(rjson::to_string_view(*attr_name)));
|
||||
const rjson::value* desired_type = rjson::find(v, "desired_type");
|
||||
if (desired_type->IsString()) {
|
||||
_desired_type = type_info_from_string(rjson::to_string_view(*desired_type)).atype;
|
||||
switch (_desired_type) {
|
||||
case alternator_type::S:
|
||||
case alternator_type::B:
|
||||
case alternator_type::N:
|
||||
// We're done
|
||||
return;
|
||||
default:
|
||||
// Fall through to on_internal_error below.
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
on_internal_error(elogger, format("Improperly formatted alternator::extract_from_attrs_column_computation computed column definition: {}", v));
|
||||
}
|
||||
|
||||
regular_column_transformation::result extract_from_attrs_column_computation::compute_value(
|
||||
const schema& schema,
|
||||
const partition_key& key,
|
||||
const db::view::clustering_or_static_row& row) const
|
||||
{
|
||||
const column_definition* attrs_col = schema.get_column_definition(MAP_NAME);
|
||||
if (!attrs_col || !attrs_col->is_regular() || !attrs_col->is_multi_cell()) {
|
||||
on_internal_error(elogger, "extract_from_attrs_column_computation::compute_value() on a table without an attrs map");
|
||||
}
|
||||
// Look for the desired attribute _attr_name in the attrs_col map in row:
|
||||
const atomic_cell_or_collection* attrs = row.cells().find_cell(attrs_col->id);
|
||||
if (!attrs) {
|
||||
return regular_column_transformation::result();
|
||||
}
|
||||
collection_mutation_view cmv = attrs->as_collection_mutation();
|
||||
return cmv.with_deserialized(*attrs_col->type, [this] (const collection_mutation_view_description& cmvd) {
|
||||
for (auto&& [key, cell] : cmvd.cells) {
|
||||
if (key == _attr_name) {
|
||||
return regular_column_transformation::result(cell,
|
||||
std::bind(serialized_value_if_type, std::placeholders::_1, _desired_type));
|
||||
}
|
||||
}
|
||||
return regular_column_transformation::result();
|
||||
});
|
||||
}
|
||||
|
||||
// extract_from_attrs_column_computation needs the whole row to compute
|
||||
// value, it cann't use just the partition key.
|
||||
bytes extract_from_attrs_column_computation::compute_value(const schema&, const partition_key&) const {
|
||||
on_internal_error(elogger, "extract_from_attrs_column_computation::compute_value called without row");
|
||||
}
|
||||
|
||||
|
||||
static future<executor::request_return_type> create_table_on_shard0(service::client_state&& client_state, tracing::trace_state_ptr trace_state, rjson::value request, service::storage_proxy& sp, service::migration_manager& mm, gms::gossiper& gossiper, bool enforce_authorization) {
|
||||
SCYLLA_ASSERT(this_shard_id() == 0);
|
||||
|
||||
@@ -1110,67 +1302,15 @@ static future<executor::request_return_type> create_table_on_shard0(service::cli
|
||||
|
||||
schema_ptr partial_schema = builder.build();
|
||||
|
||||
// Parse GlobalSecondaryIndexes parameters before creating the base
|
||||
// table, so if we have a parse errors we can fail without creating
|
||||
// Parse Local/GlobalSecondaryIndexes parameters before creating the
|
||||
// base table, so if we have a parse errors we can fail without creating
|
||||
// any table.
|
||||
const rjson::value* gsi = rjson::find(request, "GlobalSecondaryIndexes");
|
||||
std::vector<schema_builder> view_builders;
|
||||
std::unordered_set<std::string> index_names;
|
||||
if (gsi) {
|
||||
if (!gsi->IsArray()) {
|
||||
co_return api_error::validation("GlobalSecondaryIndexes must be an array.");
|
||||
}
|
||||
for (const rjson::value& g : gsi->GetArray()) {
|
||||
const rjson::value* index_name_v = rjson::find(g, "IndexName");
|
||||
if (!index_name_v || !index_name_v->IsString()) {
|
||||
co_return api_error::validation("GlobalSecondaryIndexes IndexName must be a string.");
|
||||
}
|
||||
std::string_view index_name = rjson::to_string_view(*index_name_v);
|
||||
auto [it, added] = index_names.emplace(index_name);
|
||||
if (!added) {
|
||||
co_return api_error::validation(fmt::format("Duplicate IndexName '{}', ", index_name));
|
||||
}
|
||||
std::string vname(view_name(table_name, index_name));
|
||||
elogger.trace("Adding GSI {}", index_name);
|
||||
// FIXME: read and handle "Projection" parameter. This will
|
||||
// require the MV code to copy just parts of the attrs map.
|
||||
schema_builder view_builder(keyspace_name, vname);
|
||||
auto [view_hash_key, view_range_key] = parse_key_schema(g);
|
||||
if (partial_schema->get_column_definition(to_bytes(view_hash_key)) == nullptr) {
|
||||
// A column that exists in a global secondary index is upgraded from being a map entry
|
||||
// to having a regular column definition in the base schema
|
||||
add_column(builder, view_hash_key, attribute_definitions, column_kind::regular_column);
|
||||
}
|
||||
add_column(view_builder, view_hash_key, attribute_definitions, column_kind::partition_key);
|
||||
unused_attribute_definitions.erase(view_hash_key);
|
||||
if (!view_range_key.empty()) {
|
||||
if (partial_schema->get_column_definition(to_bytes(view_range_key)) == nullptr) {
|
||||
// A column that exists in a global secondary index is upgraded from being a map entry
|
||||
// to having a regular column definition in the base schema
|
||||
if (partial_schema->get_column_definition(to_bytes(view_hash_key)) == nullptr) {
|
||||
// FIXME: this is alternator limitation only, because Scylla's materialized views
|
||||
// we use underneath do not allow more than 1 base regular column to be part of the MV key
|
||||
elogger.warn("Only 1 regular column from the base table should be used in the GSI key in order to ensure correct liveness management without assumptions");
|
||||
}
|
||||
add_column(builder, view_range_key, attribute_definitions, column_kind::regular_column);
|
||||
}
|
||||
add_column(view_builder, view_range_key, attribute_definitions, column_kind::clustering_key);
|
||||
unused_attribute_definitions.erase(view_range_key);
|
||||
}
|
||||
// Base key columns which aren't part of the index's key need to
|
||||
// be added to the view nonetheless, as (additional) clustering
|
||||
// key(s).
|
||||
if (hash_key != view_hash_key && hash_key != view_range_key) {
|
||||
add_column(view_builder, hash_key, attribute_definitions, column_kind::clustering_key);
|
||||
}
|
||||
if (!range_key.empty() && range_key != view_hash_key && range_key != view_range_key) {
|
||||
add_column(view_builder, range_key, attribute_definitions, column_kind::clustering_key);
|
||||
}
|
||||
// GSIs have no tags:
|
||||
view_builder.add_extension(db::tags_extension::NAME, ::make_shared<db::tags_extension>());
|
||||
view_builders.emplace_back(std::move(view_builder));
|
||||
}
|
||||
}
|
||||
// Remember the attributes used for LSI keys. Since LSI must be created
|
||||
// with the table, we make these attributes real schema columns, and need
|
||||
// to remember this below if the same attributes are used as GSI keys.
|
||||
std::unordered_set<std::string> lsi_range_keys;
|
||||
|
||||
const rjson::value* lsi = rjson::find(request, "LocalSecondaryIndexes");
|
||||
if (lsi) {
|
||||
@@ -1228,9 +1368,68 @@ static future<executor::request_return_type> create_table_on_shard0(service::cli
|
||||
std::map<sstring, sstring> tags_map = {{db::SYNCHRONOUS_VIEW_UPDATES_TAG_KEY, "true"}};
|
||||
view_builder.add_extension(db::tags_extension::NAME, ::make_shared<db::tags_extension>(tags_map));
|
||||
view_builders.emplace_back(std::move(view_builder));
|
||||
lsi_range_keys.emplace(view_range_key);
|
||||
}
|
||||
}
|
||||
|
||||
const rjson::value* gsi = rjson::find(request, "GlobalSecondaryIndexes");
|
||||
if (gsi) {
|
||||
if (!gsi->IsArray()) {
|
||||
co_return api_error::validation("GlobalSecondaryIndexes must be an array.");
|
||||
}
|
||||
for (const rjson::value& g : gsi->GetArray()) {
|
||||
const rjson::value* index_name_v = rjson::find(g, "IndexName");
|
||||
if (!index_name_v || !index_name_v->IsString()) {
|
||||
co_return api_error::validation("GlobalSecondaryIndexes IndexName must be a string.");
|
||||
}
|
||||
std::string_view index_name = rjson::to_string_view(*index_name_v);
|
||||
auto [it, added] = index_names.emplace(index_name);
|
||||
if (!added) {
|
||||
co_return api_error::validation(fmt::format("Duplicate IndexName '{}', ", index_name));
|
||||
}
|
||||
std::string vname(view_name(table_name, index_name));
|
||||
elogger.trace("Adding GSI {}", index_name);
|
||||
// FIXME: read and handle "Projection" parameter. This will
|
||||
// require the MV code to copy just parts of the attrs map.
|
||||
schema_builder view_builder(keyspace_name, vname);
|
||||
auto [view_hash_key, view_range_key] = parse_key_schema(g);
|
||||
|
||||
// If an attribute is already a real column in the base table
|
||||
// (i.e., a key attribute) or we already made it a real column
|
||||
// as an LSI key above, we can use it directly as a view key.
|
||||
// Otherwise, we need to add it as a "computed column", which
|
||||
// extracts and deserializes the attribute from the ":attrs" map.
|
||||
bool view_hash_key_real_column =
|
||||
partial_schema->get_column_definition(to_bytes(view_hash_key)) ||
|
||||
lsi_range_keys.contains(view_hash_key);
|
||||
add_column(view_builder, view_hash_key, attribute_definitions, column_kind::partition_key, !view_hash_key_real_column);
|
||||
unused_attribute_definitions.erase(view_hash_key);
|
||||
if (!view_range_key.empty()) {
|
||||
bool view_range_key_real_column =
|
||||
partial_schema->get_column_definition(to_bytes(view_range_key)) ||
|
||||
lsi_range_keys.contains(view_range_key);
|
||||
add_column(view_builder, view_range_key, attribute_definitions, column_kind::clustering_key, !view_range_key_real_column);
|
||||
if (!partial_schema->get_column_definition(to_bytes(view_range_key)) &&
|
||||
!partial_schema->get_column_definition(to_bytes(view_hash_key))) {
|
||||
// FIXME: This warning should go away. See issue #6714
|
||||
elogger.warn("Only 1 regular column from the base table should be used in the GSI key in order to ensure correct liveness management without assumptions");
|
||||
}
|
||||
unused_attribute_definitions.erase(view_range_key);
|
||||
}
|
||||
// Base key columns which aren't part of the index's key need to
|
||||
// be added to the view nonetheless, as (additional) clustering
|
||||
// key(s).
|
||||
if (hash_key != view_hash_key && hash_key != view_range_key) {
|
||||
add_column(view_builder, hash_key, attribute_definitions, column_kind::clustering_key);
|
||||
}
|
||||
if (!range_key.empty() && range_key != view_hash_key && range_key != view_range_key) {
|
||||
add_column(view_builder, range_key, attribute_definitions, column_kind::clustering_key);
|
||||
}
|
||||
// GSIs have no tags:
|
||||
view_builder.add_extension(db::tags_extension::NAME, ::make_shared<db::tags_extension>());
|
||||
view_builders.emplace_back(std::move(view_builder));
|
||||
}
|
||||
}
|
||||
if (!unused_attribute_definitions.empty()) {
|
||||
co_return api_error::validation(fmt::format(
|
||||
"AttributeDefinitions defines spurious attributes not used by any KeySchema: {}",
|
||||
@@ -1371,12 +1570,37 @@ future<executor::request_return_type> executor::create_table(client_state& clien
|
||||
});
|
||||
}
|
||||
|
||||
// When UpdateTable adds a GSI, the type of its key columns must be specified
|
||||
// in a AttributeDefinitions. If one of these key columns are *already* key
|
||||
// columns of the base table or any of its prior GSIs or LSIs, the type
|
||||
// given in AttributeDefinitions must match the type of the existing key -
|
||||
// otherise Alternator will not know which type to enforce in new writes.
|
||||
// This function checks for such conflicts. It assumes that the structure of
|
||||
// the given attribute_definitions was already validated (with
|
||||
// validate_attribute_definitions()).
|
||||
// This function should be called multiple times - once for the base schema
|
||||
// and once for each of its views (existing GSIs and LSIs on this table).
|
||||
static void check_attribute_definitions_conflicts(const rjson::value& attribute_definitions, const schema& schema) {
|
||||
for (auto& def : schema.primary_key_columns()) {
|
||||
std::string def_type = type_to_string(def.type);
|
||||
for (auto it = attribute_definitions.Begin(); it != attribute_definitions.End(); ++it) {
|
||||
const rjson::value& attribute_info = *it;
|
||||
if (attribute_info["AttributeName"].GetString() == def.name_as_text()) {
|
||||
auto type = attribute_info["AttributeType"].GetString();
|
||||
if (type != def_type) {
|
||||
throw api_error::validation(fmt::format("AttributeDefinitions redefined {} to {} already a key attribute of type {} in this table", def.name_as_text(), type, def_type));
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
future<executor::request_return_type> executor::update_table(client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value request) {
|
||||
_stats.api_operations.update_table++;
|
||||
elogger.trace("Updating table {}", request);
|
||||
|
||||
static const std::vector<sstring> unsupported = {
|
||||
"GlobalSecondaryIndexUpdates",
|
||||
"ProvisionedThroughput",
|
||||
"ReplicaUpdates",
|
||||
"SSESpecification",
|
||||
@@ -1388,11 +1612,14 @@ future<executor::request_return_type> executor::update_table(client_state& clien
|
||||
}
|
||||
}
|
||||
|
||||
bool empty_request = true;
|
||||
|
||||
if (rjson::find(request, "BillingMode")) {
|
||||
empty_request = false;
|
||||
verify_billing_mode(request);
|
||||
}
|
||||
|
||||
co_return co_await _mm.container().invoke_on(0, [&p = _proxy.container(), request = std::move(request), gt = tracing::global_trace_state_ptr(std::move(trace_state)), enforce_authorization = bool(_enforce_authorization), client_state_other_shard = client_state.move_to_other_shard()]
|
||||
co_return co_await _mm.container().invoke_on(0, [&p = _proxy.container(), request = std::move(request), gt = tracing::global_trace_state_ptr(std::move(trace_state)), enforce_authorization = bool(_enforce_authorization), client_state_other_shard = client_state.move_to_other_shard(), empty_request]
|
||||
(service::migration_manager& mm) mutable -> future<executor::request_return_type> {
|
||||
// FIXME: the following needs to be in a loop. If mm.announce() below
|
||||
// fails, we need to retry the whole thing.
|
||||
@@ -1412,6 +1639,7 @@ future<executor::request_return_type> executor::update_table(client_state& clien
|
||||
|
||||
rjson::value* stream_specification = rjson::find(request, "StreamSpecification");
|
||||
if (stream_specification && stream_specification->IsObject()) {
|
||||
empty_request = false;
|
||||
add_stream_options(*stream_specification, builder, p.local());
|
||||
// Alternator Streams doesn't yet work when the table uses tablets (#16317)
|
||||
auto stream_enabled = rjson::find(*stream_specification, "StreamEnabled");
|
||||
@@ -1423,8 +1651,162 @@ future<executor::request_return_type> executor::update_table(client_state& clien
|
||||
}
|
||||
|
||||
auto schema = builder.build();
|
||||
std::vector<view_ptr> new_views;
|
||||
std::vector<std::string> dropped_views;
|
||||
|
||||
rjson::value* gsi_updates = rjson::find(request, "GlobalSecondaryIndexUpdates");
|
||||
if (gsi_updates) {
|
||||
if (!gsi_updates->IsArray()) {
|
||||
co_return api_error::validation("GlobalSecondaryIndexUpdates must be an array");
|
||||
}
|
||||
if (gsi_updates->Size() > 1) {
|
||||
// Although UpdateTable takes an array of operations and could
|
||||
// support multiple Create and/or Delete operations in one
|
||||
// command, DynamoDB doesn't actually allows this, and throws
|
||||
// a LimitExceededException if this is attempted.
|
||||
co_return api_error::limit_exceeded("GlobalSecondaryIndexUpdates only allows one index creation or deletion");
|
||||
}
|
||||
if (gsi_updates->Size() == 1) {
|
||||
empty_request = false;
|
||||
if (!(*gsi_updates)[0].IsObject() || (*gsi_updates)[0].MemberCount() != 1) {
|
||||
co_return api_error::validation("GlobalSecondaryIndexUpdates array must contain one object with a Create, Delete or Update operation");
|
||||
}
|
||||
auto it = (*gsi_updates)[0].MemberBegin();
|
||||
const std::string_view op = rjson::to_string_view(it->name);
|
||||
if (!it->value.IsObject()) {
|
||||
co_return api_error::validation("GlobalSecondaryIndexUpdates entries must be objects");
|
||||
}
|
||||
const rjson::value* index_name_v = rjson::find(it->value, "IndexName");
|
||||
if (!index_name_v || !index_name_v->IsString()) {
|
||||
co_return api_error::validation("GlobalSecondaryIndexUpdates operation must have IndexName");
|
||||
}
|
||||
std::string_view index_name = rjson::to_string_view(*index_name_v);
|
||||
std::string_view table_name = schema->cf_name();
|
||||
std::string_view keyspace_name = schema->ks_name();
|
||||
std::string vname(view_name(table_name, index_name));
|
||||
if (op == "Create") {
|
||||
const rjson::value* attribute_definitions = rjson::find(request, "AttributeDefinitions");
|
||||
if (!attribute_definitions) {
|
||||
co_return api_error::validation("GlobalSecondaryIndexUpdates Create needs AttributeDefinitions");
|
||||
}
|
||||
std::unordered_set<std::string> unused_attribute_definitions =
|
||||
validate_attribute_definitions(*attribute_definitions);
|
||||
check_attribute_definitions_conflicts(*attribute_definitions, *schema);
|
||||
for (auto& view : p.local().data_dictionary().find_column_family(tab).views()) {
|
||||
check_attribute_definitions_conflicts(*attribute_definitions, *view);
|
||||
}
|
||||
|
||||
if (p.local().data_dictionary().has_schema(keyspace_name, vname)) {
|
||||
// Surprisingly, DynamoDB uses validation error here, not resource_in_use
|
||||
co_return api_error::validation(fmt::format(
|
||||
"GSI {} already exists in table {}", index_name, table_name));
|
||||
}
|
||||
if (p.local().data_dictionary().has_schema(keyspace_name, lsi_name(table_name, index_name))) {
|
||||
co_return api_error::validation(fmt::format(
|
||||
"LSI {} already exists in table {}, can't use same name for GSI", index_name, table_name));
|
||||
}
|
||||
|
||||
elogger.trace("Adding GSI {}", index_name);
|
||||
// FIXME: read and handle "Projection" parameter. This will
|
||||
// require the MV code to copy just parts of the attrs map.
|
||||
schema_builder view_builder(keyspace_name, vname);
|
||||
auto [view_hash_key, view_range_key] = parse_key_schema(it->value);
|
||||
// If an attribute is already a real column in the base
|
||||
// table (i.e., a key attribute in the base table or LSI),
|
||||
// we can use it directly as a view key. Otherwise, we
|
||||
// need to add it as a "computed column", which extracts
|
||||
// and deserializes the attribute from the ":attrs" map.
|
||||
bool view_hash_key_real_column =
|
||||
schema->get_column_definition(to_bytes(view_hash_key));
|
||||
add_column(view_builder, view_hash_key, *attribute_definitions, column_kind::partition_key, !view_hash_key_real_column);
|
||||
unused_attribute_definitions.erase(view_hash_key);
|
||||
if (!view_range_key.empty()) {
|
||||
bool view_range_key_real_column =
|
||||
schema->get_column_definition(to_bytes(view_range_key));
|
||||
add_column(view_builder, view_range_key, *attribute_definitions, column_kind::clustering_key, !view_range_key_real_column);
|
||||
if (!schema->get_column_definition(to_bytes(view_range_key)) &&
|
||||
!schema->get_column_definition(to_bytes(view_hash_key))) {
|
||||
// FIXME: This warning should go away. See issue #6714
|
||||
elogger.warn("Only 1 regular column from the base table should be used in the GSI key in order to ensure correct liveness management without assumptions");
|
||||
}
|
||||
unused_attribute_definitions.erase(view_range_key);
|
||||
}
|
||||
// Surprisingly, although DynamoDB checks for unused
|
||||
// AttributeDefinitions in CreateTable, it does not
|
||||
// check it in UpdateTable. We decided to check anyway.
|
||||
if (!unused_attribute_definitions.empty()) {
|
||||
co_return api_error::validation(fmt::format(
|
||||
"AttributeDefinitions defines spurious attributes not used by any KeySchema: {}",
|
||||
unused_attribute_definitions));
|
||||
}
|
||||
// Base key columns which aren't part of the index's key need to
|
||||
// be added to the view nonetheless, as (additional) clustering
|
||||
// key(s).
|
||||
for (auto& def : schema->primary_key_columns()) {
|
||||
if (def.name_as_text() != view_hash_key && def.name_as_text() != view_range_key) {
|
||||
view_builder.with_column(def.name(), def.type, column_kind::clustering_key);
|
||||
}
|
||||
}
|
||||
// GSIs have no tags:
|
||||
view_builder.add_extension(db::tags_extension::NAME, ::make_shared<db::tags_extension>());
|
||||
// Note below we don't need to add virtual columns, as all
|
||||
// base columns were copied to view. TODO: reconsider the need
|
||||
// for virtual columns when we support Projection.
|
||||
for (const column_definition& regular_cdef : schema->regular_columns()) {
|
||||
if (!view_builder.has_column(*cql3::to_identifier(regular_cdef))) {
|
||||
view_builder.with_column(regular_cdef.name(), regular_cdef.type, column_kind::regular_column);
|
||||
}
|
||||
}
|
||||
const bool include_all_columns = true;
|
||||
view_builder.with_view_info(*schema, include_all_columns, ""/*where clause*/);
|
||||
new_views.emplace_back(view_builder.build());
|
||||
} else if (op == "Delete") {
|
||||
elogger.trace("Deleting GSI {}", index_name);
|
||||
if (!p.local().data_dictionary().has_schema(keyspace_name, vname)) {
|
||||
co_return api_error::resource_not_found(fmt::format("No GSI {} in table {}", index_name, table_name));
|
||||
}
|
||||
dropped_views.emplace_back(vname);
|
||||
} else if (op == "Update") {
|
||||
co_return api_error::validation("GlobalSecondaryIndexUpdates Update not yet supported");
|
||||
} else {
|
||||
co_return api_error::validation(fmt::format("GlobalSecondaryIndexUpdates supports a Create, Delete or Update operation, saw '{}'", op));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (empty_request) {
|
||||
co_return api_error::validation("UpdateTable requires one of GlobalSecondaryIndexUpdates, StreamSpecification or BillingMode to be specified");
|
||||
}
|
||||
|
||||
co_await verify_permission(enforce_authorization, client_state_other_shard.get(), schema, auth::permission::ALTER);
|
||||
auto m = co_await service::prepare_column_family_update_announcement(p.local(), schema, std::vector<view_ptr>(), group0_guard.write_timestamp());
|
||||
auto m = co_await service::prepare_column_family_update_announcement(p.local(), schema, std::vector<view_ptr>(), group0_guard.write_timestamp());
|
||||
for (view_ptr view : new_views) {
|
||||
auto m2 = co_await service::prepare_new_view_announcement(p.local(), view, group0_guard.write_timestamp());
|
||||
std::move(m2.begin(), m2.end(), std::back_inserter(m));
|
||||
}
|
||||
for (const std::string& view_name : dropped_views) {
|
||||
auto m2 = co_await service::prepare_view_drop_announcement(p.local(), schema->ks_name(), view_name, group0_guard.write_timestamp());
|
||||
std::move(m2.begin(), m2.end(), std::back_inserter(m));
|
||||
}
|
||||
// If a role is allowed to create a GSI, we should give it permissions
|
||||
// to read the GSI it just created. This is known as "auto-grant".
|
||||
// Also, when we delete a GSI we should revoke any permissions set on
|
||||
// it - so if it's ever created again the old permissions wouldn't be
|
||||
// remembered for the new GSI. This is known as "auto-revoke"
|
||||
if (client_state_other_shard.get().user() && (!new_views.empty() || !dropped_views.empty())) {
|
||||
service::group0_batch mc(std::move(group0_guard));
|
||||
mc.add_mutations(std::move(m));
|
||||
for (view_ptr view : new_views) {
|
||||
auto resource = auth::make_data_resource(view->ks_name(), view->cf_name());
|
||||
co_await auth::grant_applicable_permissions(
|
||||
*client_state_other_shard.get().get_auth_service(), *client_state_other_shard.get().user(), resource, mc);
|
||||
}
|
||||
for (const auto& view_name : dropped_views) {
|
||||
auto resource = auth::make_data_resource(schema->ks_name(), view_name);
|
||||
co_await auth::revoke_all(*client_state_other_shard.get().get_auth_service(), resource, mc);
|
||||
}
|
||||
std::tie(m, group0_guard) = co_await std::move(mc).extract();
|
||||
}
|
||||
|
||||
co_await mm.announce(std::move(m), std::move(group0_guard), format("alternator-executor: update {} table", tab->cf_name()));
|
||||
|
||||
@@ -1546,7 +1928,7 @@ public:
|
||||
struct delete_item {};
|
||||
struct put_item {};
|
||||
put_or_delete_item(const rjson::value& key, schema_ptr schema, delete_item);
|
||||
put_or_delete_item(const rjson::value& item, schema_ptr schema, put_item);
|
||||
put_or_delete_item(const rjson::value& item, schema_ptr schema, put_item, std::unordered_map<bytes, std::string> key_attributes);
|
||||
// put_or_delete_item doesn't keep a reference to schema (so it can be
|
||||
// moved between shards for LWT) so it needs to be given again to build():
|
||||
mutation build(schema_ptr schema, api::timestamp_type ts) const;
|
||||
@@ -1578,7 +1960,75 @@ static inline const column_definition* find_attribute(const schema& schema, cons
|
||||
return cdef;
|
||||
}
|
||||
|
||||
put_or_delete_item::put_or_delete_item(const rjson::value& item, schema_ptr schema, put_item)
|
||||
|
||||
// Get a list of all attributes that serve as a key attributes for any of the
|
||||
// GSIs or LSIs of this table, and the declared type for each (can be only
|
||||
// "S", "B", or "N"). The implementation below will also list the base table's
|
||||
// key columns (they are the views' clustering keys).
|
||||
std::unordered_map<bytes, std::string> si_key_attributes(data_dictionary::table t) {
|
||||
std::unordered_map<bytes, std::string> ret;
|
||||
for (const view_ptr& v : t.views()) {
|
||||
for (const column_definition& cdef : v->partition_key_columns()) {
|
||||
ret[cdef.name()] = type_to_string(cdef.type);
|
||||
}
|
||||
for (const column_definition& cdef : v->clustering_key_columns()) {
|
||||
ret[cdef.name()] = type_to_string(cdef.type);
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
// When an attribute is a key (hash or sort) of one of the GSIs on a table,
|
||||
// DynamoDB refuses an update to that attribute with an unsuitable value.
|
||||
// Unsuitable values are:
|
||||
// 1. An empty string (those are normally allowed as values, but not allowed
|
||||
// as keys, including GSI keys).
|
||||
// 2. A value with a type different than that declared for the GSI key.
|
||||
// Normally non-key attributes can take values of any type (DynamoDB is
|
||||
// schema-less), but as soon as an attribute is used as a GSI key, it
|
||||
// must be set only to the specific type declared for that key.
|
||||
// (Note that a missing value for an GSI key attribute is fine - the update
|
||||
// will happen on the base table, but won't reach the view table. In this
|
||||
// case, this function simply won't be called for this attribute.)
|
||||
//
|
||||
// This function checks if the given attribute update is an update to some
|
||||
// GSI's key, and if the value is unsuitable, a api_error::validation is
|
||||
// thrown. The checking here is similar to the checking done in
|
||||
// get_key_from_typed_value() for the base table's key columns.
|
||||
//
|
||||
// validate_value_if_gsi_key() should only be called after validate_value()
|
||||
// already validated that the value itself has a valid form.
|
||||
static inline void validate_value_if_gsi_key(
|
||||
std::unordered_map<bytes, std::string> key_attributes,
|
||||
const bytes& attribute,
|
||||
const rjson::value& value) {
|
||||
if (key_attributes.empty()) {
|
||||
return;
|
||||
}
|
||||
auto it = key_attributes.find(attribute);
|
||||
if (it == key_attributes.end()) {
|
||||
// Given attribute is not a key column with a fixed type, so no
|
||||
// more validation to do.
|
||||
return;
|
||||
}
|
||||
const std::string& expected_type = it->second;
|
||||
// We assume that validate_value() was previously called on this value,
|
||||
// so value is known to be of the proper format (an object with one
|
||||
// member, whose key and value are strings)
|
||||
std::string_view value_type = rjson::to_string_view(value.MemberBegin()->name);
|
||||
if (expected_type != value_type) {
|
||||
throw api_error::validation(fmt::format(
|
||||
"Type mismatch: expected type {} for GSI key attribute {}, got type {}",
|
||||
expected_type, to_string_view(attribute), value_type));
|
||||
}
|
||||
std::string_view value_content = rjson::to_string_view(value.MemberBegin()->value);
|
||||
if (value_content.empty()) {
|
||||
throw api_error::validation(fmt::format(
|
||||
"GSI key attribute {} cannot be set to an empty string", to_string_view(attribute)));
|
||||
}
|
||||
}
|
||||
|
||||
put_or_delete_item::put_or_delete_item(const rjson::value& item, schema_ptr schema, put_item, std::unordered_map<bytes, std::string> key_attributes)
|
||||
: _pk(pk_from_json(item, schema)), _ck(ck_from_json(item, schema)) {
|
||||
_cells = std::vector<cell>();
|
||||
_cells->reserve(item.MemberCount());
|
||||
@@ -1588,6 +2038,9 @@ put_or_delete_item::put_or_delete_item(const rjson::value& item, schema_ptr sche
|
||||
const column_definition* cdef = find_attribute(*schema, column_name);
|
||||
_length_in_bytes += column_name.size();
|
||||
if (!cdef) {
|
||||
// This attribute may be a key column of one of the GSI, in which
|
||||
// case there are some limitations on the value
|
||||
validate_value_if_gsi_key(key_attributes, column_name, it->value);
|
||||
bytes value = serialize_item(it->value);
|
||||
if (value.size()) {
|
||||
// ScyllaDB uses one extra byte compared to DynamoDB for the bytes length
|
||||
@@ -1595,7 +2048,7 @@ put_or_delete_item::put_or_delete_item(const rjson::value& item, schema_ptr sche
|
||||
}
|
||||
_cells->push_back({std::move(column_name), serialize_item(it->value)});
|
||||
} else if (!cdef->is_primary_key()) {
|
||||
// Fixed-type regular column can be used for GSI key
|
||||
// Fixed-type regular column can be used for LSI key
|
||||
bytes value = get_key_from_typed_value(it->value, *cdef);
|
||||
_cells->push_back({std::move(column_name),
|
||||
value});
|
||||
@@ -1954,7 +2407,8 @@ public:
|
||||
parsed::condition_expression _condition_expression;
|
||||
put_item_operation(service::storage_proxy& proxy, rjson::value&& request)
|
||||
: rmw_operation(proxy, std::move(request))
|
||||
, _mutation_builder(rjson::get(_request, "Item"), schema(), put_or_delete_item::put_item{}) {
|
||||
, _mutation_builder(rjson::get(_request, "Item"), schema(), put_or_delete_item::put_item{},
|
||||
si_key_attributes(proxy.data_dictionary().find_table(schema()->ks_name(), schema()->cf_name()))) {
|
||||
_pk = _mutation_builder.pk();
|
||||
_ck = _mutation_builder.ck();
|
||||
if (_returnvalues != returnvalues::NONE && _returnvalues != returnvalues::ALL_OLD) {
|
||||
@@ -2315,7 +2769,8 @@ future<executor::request_return_type> executor::batch_write_item(client_state& c
|
||||
const rjson::value& put_request = r->value;
|
||||
const rjson::value& item = put_request["Item"];
|
||||
mutation_builders.emplace_back(schema, put_or_delete_item(
|
||||
item, schema, put_or_delete_item::put_item{}));
|
||||
item, schema, put_or_delete_item::put_item{},
|
||||
si_key_attributes(_proxy.data_dictionary().find_table(schema->ks_name(), schema->cf_name()))));
|
||||
auto mut_key = std::make_pair(mutation_builders.back().second.pk(), mutation_builders.back().second.ck());
|
||||
if (used_keys.contains(mut_key)) {
|
||||
co_return api_error::validation("Provided list of item keys contains duplicates");
|
||||
@@ -2859,6 +3314,10 @@ public:
|
||||
// them by top-level attribute, and detects forbidden overlaps/conflicts.
|
||||
attribute_path_map<parsed::update_expression::action> _update_expression;
|
||||
|
||||
// Saved list of GSI keys in the table being updated, used for
|
||||
// validate_value_if_gsi_key()
|
||||
std::unordered_map<bytes, std::string> _key_attributes;
|
||||
|
||||
parsed::condition_expression _condition_expression;
|
||||
|
||||
update_item_operation(service::storage_proxy& proxy, rjson::value&& request);
|
||||
@@ -2950,6 +3409,9 @@ update_item_operation::update_item_operation(service::storage_proxy& proxy, rjso
|
||||
if (expression_attribute_values) {
|
||||
_consumed_capacity._total_bytes += estimate_value_size(*expression_attribute_values);
|
||||
}
|
||||
|
||||
_key_attributes = si_key_attributes(proxy.data_dictionary().find_table(
|
||||
_schema->ks_name(), _schema->cf_name()));
|
||||
}
|
||||
|
||||
// These are the cases where update_item_operation::apply() needs to use
|
||||
@@ -3247,6 +3709,9 @@ update_item_operation::apply(std::unique_ptr<rjson::value> previous_item, api::t
|
||||
bytes column_value = get_key_from_typed_value(json_value, *cdef);
|
||||
row.cells().apply(*cdef, atomic_cell::make_live(*cdef->type, ts, column_value));
|
||||
} else {
|
||||
// This attribute may be a key column of one of the GSIs, in which
|
||||
// case there are some limitations on the value.
|
||||
validate_value_if_gsi_key(_key_attributes, column_name, json_value);
|
||||
attrs_collector.put(std::move(column_name), serialize_item(json_value), ts);
|
||||
}
|
||||
};
|
||||
|
||||
73
alternator/extract_from_attrs.hh
Normal file
73
alternator/extract_from_attrs.hh
Normal file
@@ -0,0 +1,73 @@
|
||||
/*
|
||||
* Copyright 2024-present ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
|
||||
#include "utils/rjson.hh"
|
||||
#include "serialization.hh"
|
||||
#include "column_computation.hh"
|
||||
#include "db/view/regular_column_transformation.hh"
|
||||
|
||||
namespace alternator {
|
||||
|
||||
// An implementation of a "column_computation" which extracts a specific
|
||||
// non-key attribute from the big map (":attrs") of all non-key attributes,
|
||||
// and deserializes it if it has the desired type. GSI will use this computed
|
||||
// column as a materialized-view key when the view key attribute isn't a
|
||||
// full-fledged CQL column but rather stored in ":attrs".
|
||||
class extract_from_attrs_column_computation : public regular_column_transformation {
|
||||
// The name of the CQL column name holding the attribute map. It is a
|
||||
// constant defined in executor.cc (as ":attrs"), so doesn't need
|
||||
// to be specified when constructing the column computation.
|
||||
static const bytes MAP_NAME;
|
||||
// The top-level attribute name to extract from the ":attrs" map.
|
||||
bytes _attr_name;
|
||||
// The type we expect for the value stored in the attribute. If the type
|
||||
// matches the expected type, it is decoded from the serialized format
|
||||
// we store in the map's values) into the raw CQL type value that we use
|
||||
// for keys, and returned by compute_value(). Only the types "S" (string),
|
||||
// "B" (bytes) and "N" (number) are allowed as keys in DynamoDB, and
|
||||
// therefore in desired_type.
|
||||
alternator_type _desired_type;
|
||||
public:
|
||||
virtual column_computation_ptr clone() const override;
|
||||
// TYPE_NAME is a unique string that distinguishes this class from other
|
||||
// column_computation subclasses. column_computation::deserialize() will
|
||||
// construct an object of this subclass if it sees a "type" TYPE_NAME.
|
||||
static inline const std::string TYPE_NAME = "alternator_extract_from_attrs";
|
||||
// Serialize the *definition* of this column computation into a JSON
|
||||
// string with a unique "type" string - TYPE_NAME - which then causes
|
||||
// column_computation::deserialize() to create an object from this class.
|
||||
virtual bytes serialize() const override;
|
||||
// Construct this object based on the previous output of serialize().
|
||||
// Calls on_internal_error() if the string doesn't match the output format
|
||||
// of serialize(). "type" is not checked column_computation::deserialize()
|
||||
// won't call this constructor if "type" doesn't match.
|
||||
extract_from_attrs_column_computation(const rjson::value &v);
|
||||
extract_from_attrs_column_computation(bytes_view attr_name, alternator_type desired_type)
|
||||
: _attr_name(attr_name), _desired_type(desired_type)
|
||||
{}
|
||||
// Implement regular_column_transformation's compute_value() that
|
||||
// accepts the full row:
|
||||
result compute_value(const schema& schema, const partition_key& key,
|
||||
const db::view::clustering_or_static_row& row) const override;
|
||||
// But do not implement column_computation's compute_value() that
|
||||
// accepts only a partition key - that's not enough so our implementation
|
||||
// of this function does on_internal_error().
|
||||
bytes compute_value(const schema& schema, const partition_key& key) const override;
|
||||
// This computed column does depend on a non-primary key column, so
|
||||
// its result may change in the update and we need to compute it
|
||||
// before and after the update.
|
||||
virtual bool depends_on_non_primary_key_column() const override {
|
||||
return true;
|
||||
}
|
||||
};
|
||||
} // namespace alternator
|
||||
@@ -245,6 +245,27 @@ rjson::value deserialize_item(bytes_view bv) {
|
||||
return deserialized;
|
||||
}
|
||||
|
||||
// This function takes a bytes_view created earlier by serialize_item(), and
|
||||
// if has the type "expected_type", the function returns the value as a
|
||||
// raw Scylla type. If the type doesn't match, returns an unset optional.
|
||||
// This function only supports the key types S (string), B (bytes) and N
|
||||
// (number) - serialize_item() serializes those types as a single-byte type
|
||||
// followed by the serialized raw Scylla type, so all this function needs to
|
||||
// do is to remove the first byte. This makes this function much more
|
||||
// efficient than deserialize_item() above because it avoids transformation
|
||||
// to/from JSON.
|
||||
std::optional<bytes> serialized_value_if_type(bytes_view bv, alternator_type expected_type) {
|
||||
if (bv.empty() || alternator_type(bv[0]) != expected_type) {
|
||||
return std::nullopt;
|
||||
}
|
||||
// Currently, serialize_item() for types in alternator_type (notably S, B
|
||||
// and N) are nothing more than Scylla's raw format for these types
|
||||
// preceded by a type byte. So we just need to skip that byte and we are
|
||||
// left by exactly what we need to return.
|
||||
bv.remove_prefix(1);
|
||||
return bytes(bv);
|
||||
}
|
||||
|
||||
std::string type_to_string(data_type type) {
|
||||
static thread_local std::unordered_map<data_type, std::string> types = {
|
||||
{utf8_type, "S"},
|
||||
|
||||
@@ -43,6 +43,7 @@ type_representation represent_type(alternator_type atype);
|
||||
|
||||
bytes serialize_item(const rjson::value& item);
|
||||
rjson::value deserialize_item(bytes_view bv);
|
||||
std::optional<bytes> serialized_value_if_type(bytes_view bv, alternator_type expected_type);
|
||||
|
||||
std::string type_to_string(data_type type);
|
||||
|
||||
|
||||
@@ -2864,6 +2864,30 @@
|
||||
"allowMultiple":false,
|
||||
"type":"string",
|
||||
"paramType":"query"
|
||||
},
|
||||
{
|
||||
"name":"hosts_filter",
|
||||
"description":"Repair replicas listed in the comma-separated host_id list.",
|
||||
"required":false,
|
||||
"allowMultiple":false,
|
||||
"type":"string",
|
||||
"paramType":"query"
|
||||
},
|
||||
{
|
||||
"name":"dcs_filter",
|
||||
"description":"Repair replicas listed in the comma-separated DC list",
|
||||
"required":false,
|
||||
"allowMultiple":false,
|
||||
"type":"string",
|
||||
"paramType":"query"
|
||||
},
|
||||
{
|
||||
"name":"await_completion",
|
||||
"description":"Set true to wait for the repair to complete. Set false to skip waiting for the repair to complete. When the option is not provided, it defaults to false.",
|
||||
"required":false,
|
||||
"allowMultiple":false,
|
||||
"type":"string",
|
||||
"paramType":"query"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
@@ -253,6 +253,30 @@
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"path":"/task_manager/drain/{module}",
|
||||
"operations":[
|
||||
{
|
||||
"method":"POST",
|
||||
"summary":"Drain finished local tasks",
|
||||
"type":"void",
|
||||
"nickname":"drain_tasks",
|
||||
"produces":[
|
||||
"application/json"
|
||||
],
|
||||
"parameters":[
|
||||
{
|
||||
"name":"module",
|
||||
"description":"The module to drain",
|
||||
"required":true,
|
||||
"allowMultiple":false,
|
||||
"type":"string",
|
||||
"paramType":"path"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"models":{
|
||||
|
||||
@@ -6,6 +6,8 @@
|
||||
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
|
||||
*/
|
||||
|
||||
#include "build_mode.hh"
|
||||
|
||||
#ifndef SCYLLA_BUILD_MODE_RELEASE
|
||||
|
||||
#include <seastar/core/coroutine.hh>
|
||||
|
||||
@@ -1543,6 +1543,11 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
|
||||
}
|
||||
auto ks = req->get_query_param("ks");
|
||||
auto table = req->get_query_param("table");
|
||||
bool await_completion = false;
|
||||
auto await = req->get_query_param("await_completion");
|
||||
if (!await.empty()) {
|
||||
await_completion = validate_bool(await);
|
||||
}
|
||||
validate_table(ctx, ks, table);
|
||||
auto table_id = ctx.db.local().find_column_family(ks, table).schema()->id();
|
||||
std::variant<utils::chunked_vector<dht::token>, service::storage_service::all_tokens_tag> tokens_variant;
|
||||
@@ -1551,8 +1556,22 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
|
||||
} else {
|
||||
tokens_variant = tokens;
|
||||
}
|
||||
auto hosts = req->get_query_param("hosts_filter");
|
||||
auto dcs = req->get_query_param("dcs_filter");
|
||||
|
||||
auto res = co_await ss.local().add_repair_tablet_request(table_id, tokens_variant);
|
||||
std::unordered_set<locator::host_id> hosts_filter;
|
||||
if (!hosts.empty()) {
|
||||
std::string delim = ",";
|
||||
hosts_filter = std::ranges::views::split(hosts, delim) | std::views::transform([](auto&& h) {
|
||||
try {
|
||||
return locator::host_id(utils::UUID(std::string_view{h}));
|
||||
} catch (...) {
|
||||
throw httpd::bad_param_exception(fmt::format("Wrong host_id format {}", h));
|
||||
}
|
||||
}) | std::ranges::to<std::unordered_set>();
|
||||
}
|
||||
auto dcs_filter = locator::tablet_task_info::deserialize_repair_dcs_filter(dcs);
|
||||
auto res = co_await ss.local().add_repair_tablet_request(table_id, tokens_variant, hosts_filter, dcs_filter, await_completion);
|
||||
co_return json::json_return_type(res);
|
||||
});
|
||||
|
||||
|
||||
@@ -232,6 +232,32 @@ void set_task_manager(http_context& ctx, routes& r, sharded<tasks::task_manager>
|
||||
uint32_t user_ttl = cfg.user_task_ttl_seconds();
|
||||
co_return json::json_return_type(user_ttl);
|
||||
});
|
||||
|
||||
tm::drain_tasks.set(r, [&tm] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
|
||||
co_await tm.invoke_on_all([&req] (tasks::task_manager& tm) -> future<> {
|
||||
tasks::task_manager::module_ptr module;
|
||||
try {
|
||||
module = tm.find_module(req->get_path_param("module"));
|
||||
} catch (...) {
|
||||
throw bad_param_exception(fmt::format("{}", std::current_exception()));
|
||||
}
|
||||
|
||||
const auto& local_tasks = module->get_local_tasks();
|
||||
std::vector<tasks::task_id> ids;
|
||||
ids.reserve(local_tasks.size());
|
||||
std::transform(begin(local_tasks), end(local_tasks), std::back_inserter(ids), [] (const auto& task) {
|
||||
return task.second->is_complete() ? task.first : tasks::task_id::create_null_id();
|
||||
});
|
||||
|
||||
for (auto&& id : ids) {
|
||||
if (id) {
|
||||
module->unregister_task(id);
|
||||
}
|
||||
co_await maybe_yield();
|
||||
}
|
||||
});
|
||||
co_return json_void();
|
||||
});
|
||||
}
|
||||
|
||||
void unset_task_manager(http_context& ctx, routes& r) {
|
||||
@@ -243,6 +269,7 @@ void unset_task_manager(http_context& ctx, routes& r) {
|
||||
tm::get_task_status_recursively.unset(r);
|
||||
tm::get_and_update_ttl.unset(r);
|
||||
tm::get_ttl.unset(r);
|
||||
tm::drain_tasks.unset(r);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -6,6 +6,9 @@
|
||||
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
|
||||
*/
|
||||
|
||||
|
||||
#include "build_mode.hh"
|
||||
|
||||
#ifndef SCYLLA_BUILD_MODE_RELEASE
|
||||
|
||||
#include <seastar/core/coroutine.hh>
|
||||
|
||||
@@ -1112,7 +1112,9 @@ future<bool> generation_service::legacy_do_handle_cdc_generation(cdc::generation
|
||||
auto sys_dist_ks = get_sys_dist_ks();
|
||||
auto gen = co_await retrieve_generation_data(gen_id, _sys_ks.local(), *sys_dist_ks, { _token_metadata.get()->count_normal_token_owners() });
|
||||
if (!gen) {
|
||||
throw std::runtime_error(fmt::format(
|
||||
// This may happen during raft upgrade when a node gossips about a generation that
|
||||
// was propagated through raft and we didn't apply it yet.
|
||||
throw generation_handling_nonfatal_exception(fmt::format(
|
||||
"Could not find CDC generation {} in distributed system tables (current time: {}),"
|
||||
" even though some node gossiped about it.",
|
||||
gen_id, db_clock::now()));
|
||||
|
||||
@@ -186,7 +186,7 @@ bool cdc::metadata::prepare(db_clock::time_point tp) {
|
||||
}
|
||||
|
||||
auto ts = to_ts(tp);
|
||||
auto emplaced = _gens.emplace(to_ts(tp), std::nullopt).second;
|
||||
auto [it, emplaced] = _gens.emplace(to_ts(tp), std::nullopt);
|
||||
|
||||
if (_last_stream_timestamp != api::missing_timestamp) {
|
||||
auto last_correct_gen = gen_used_at(_last_stream_timestamp);
|
||||
@@ -201,5 +201,5 @@ bool cdc::metadata::prepare(db_clock::time_point tp) {
|
||||
}
|
||||
}
|
||||
|
||||
return emplaced;
|
||||
return !it->second;
|
||||
}
|
||||
|
||||
@@ -15,6 +15,7 @@
|
||||
#include "sstables/sstables_manager.hh"
|
||||
#include <memory>
|
||||
#include <fmt/ranges.h>
|
||||
#include <seastar/core/future.hh>
|
||||
#include <seastar/core/metrics.hh>
|
||||
#include <seastar/core/coroutine.hh>
|
||||
#include <seastar/coroutine/switch_to.hh>
|
||||
@@ -503,7 +504,7 @@ public:
|
||||
|
||||
virtual ~sstables_task_executor() = default;
|
||||
|
||||
virtual void release_resources() noexcept override;
|
||||
virtual future<> release_resources() noexcept override;
|
||||
|
||||
virtual future<tasks::task_manager::task::progress> get_progress() const override {
|
||||
return compaction_task_impl::get_progress(_compaction_data, _progress_monitor);
|
||||
@@ -788,9 +789,10 @@ compaction::compaction_state::~compaction_state() {
|
||||
compaction_done.broken();
|
||||
}
|
||||
|
||||
void sstables_task_executor::release_resources() noexcept {
|
||||
future<> sstables_task_executor::release_resources() noexcept {
|
||||
_cm._stats.pending_tasks -= _sstables.size() - (_state == state::pending);
|
||||
_sstables = {};
|
||||
return make_ready_future();
|
||||
}
|
||||
|
||||
future<compaction_manager::compaction_stats_opt> compaction_task_executor::run_compaction() noexcept {
|
||||
@@ -1565,10 +1567,10 @@ public:
|
||||
, _can_purge(can_purge)
|
||||
{}
|
||||
|
||||
virtual void release_resources() noexcept override {
|
||||
virtual future<> release_resources() noexcept override {
|
||||
_compacting.release_all();
|
||||
_owned_ranges_ptr = nullptr;
|
||||
sstables_task_executor::release_resources();
|
||||
co_await sstables_task_executor::release_resources();
|
||||
}
|
||||
|
||||
protected:
|
||||
@@ -1846,11 +1848,12 @@ public:
|
||||
|
||||
virtual ~cleanup_sstables_compaction_task_executor() = default;
|
||||
|
||||
virtual void release_resources() noexcept override {
|
||||
virtual future<> release_resources() noexcept override {
|
||||
_cm._stats.pending_tasks -= _pending_cleanup_jobs.size();
|
||||
_pending_cleanup_jobs = {};
|
||||
_compacting.release_all();
|
||||
_owned_ranges_ptr = nullptr;
|
||||
return make_ready_future();
|
||||
}
|
||||
|
||||
virtual future<tasks::task_manager::task::progress> get_progress() const override {
|
||||
|
||||
@@ -689,3 +689,6 @@ maintenance_socket: ignore
|
||||
# Note that creating keyspaces with tablets enabled or disabled is irreversible.
|
||||
# The `tablets` option cannot be changed using `ALTER KEYSPACE`.
|
||||
enable_tablets: true
|
||||
|
||||
# Enforce RF-rack-valid keyspaces.
|
||||
rf_rack_valid_keyspaces: false
|
||||
|
||||
@@ -1564,7 +1564,7 @@ deps['test/boost/linearizing_input_stream_test'] = [
|
||||
"test/boost/linearizing_input_stream_test.cc",
|
||||
"test/lib/log.cc",
|
||||
]
|
||||
deps['test/boost/expr_test'] = ['test/boost/expr_test.cc', 'test/lib/expr_test_utils.cc'] + scylla_core
|
||||
deps['test/boost/expr_test'] = ['test/boost/expr_test.cc', 'test/lib/expr_test_utils.cc'] + scylla_core + alternator
|
||||
deps['test/boost/rate_limiter_test'] = ['test/boost/rate_limiter_test.cc', 'db/rate_limiter.cc']
|
||||
deps['test/boost/exceptions_optimized_test'] = ['test/boost/exceptions_optimized_test.cc', 'utils/exceptions.cc']
|
||||
deps['test/boost/exceptions_fallback_test'] = ['test/boost/exceptions_fallback_test.cc', 'utils/exceptions.cc']
|
||||
@@ -1581,8 +1581,8 @@ deps['test/raft/many_test'] = ['test/raft/many_test.cc', 'test/raft/replication.
|
||||
deps['test/raft/fsm_test'] = ['test/raft/fsm_test.cc', 'test/raft/helpers.cc', 'test/lib/log.cc'] + scylla_raft_dependencies
|
||||
deps['test/raft/etcd_test'] = ['test/raft/etcd_test.cc', 'test/raft/helpers.cc', 'test/lib/log.cc'] + scylla_raft_dependencies
|
||||
deps['test/raft/raft_sys_table_storage_test'] = ['test/raft/raft_sys_table_storage_test.cc'] + \
|
||||
scylla_core + scylla_tests_generic_dependencies
|
||||
deps['test/boost/address_map_test'] = ['test/boost/address_map_test.cc'] + scylla_core
|
||||
scylla_core + alternator + scylla_tests_generic_dependencies
|
||||
deps['test/boost/address_map_test'] = ['test/boost/address_map_test.cc'] + scylla_core + alternator
|
||||
deps['test/raft/discovery_test'] = ['test/raft/discovery_test.cc',
|
||||
'test/raft/helpers.cc',
|
||||
'test/lib/log.cc',
|
||||
|
||||
16
cql3/Cql.g
16
cql3/Cql.g
@@ -709,17 +709,23 @@ batchStatement returns [std::unique_ptr<cql3::statements::raw::batch_statement>
|
||||
: K_BEGIN
|
||||
( K_UNLOGGED { type = btype::UNLOGGED; } | K_COUNTER { type = btype::COUNTER; } )?
|
||||
K_BATCH ( usingClause[attrs] )?
|
||||
( s=batchStatementObjective ';'? { statements.push_back(std::move(s)); } )*
|
||||
( s=batchStatementObjective ';'?
|
||||
{
|
||||
auto&& stmt = *$s.statement;
|
||||
stmt->add_raw(sstring{$s.text});
|
||||
statements.push_back(std::move(stmt));
|
||||
} )*
|
||||
K_APPLY K_BATCH
|
||||
{
|
||||
$expr = std::make_unique<cql3::statements::raw::batch_statement>(type, std::move(attrs), std::move(statements));
|
||||
}
|
||||
;
|
||||
|
||||
batchStatementObjective returns [std::unique_ptr<cql3::statements::raw::modification_statement> statement]
|
||||
: i=insertStatement { $statement = std::move(i); }
|
||||
| u=updateStatement { $statement = std::move(u); }
|
||||
| d=deleteStatement { $statement = std::move(d); }
|
||||
batchStatementObjective returns [::lw_shared_ptr<std::unique_ptr<cql3::statements::raw::modification_statement>> statement]
|
||||
@init { using original_ret_type = std::unique_ptr<cql3::statements::raw::modification_statement>; }
|
||||
: i=insertStatement { $statement = make_lw_shared<original_ret_type>(std::move(i)); }
|
||||
| u=updateStatement { $statement = make_lw_shared<original_ret_type>(std::move(u)); }
|
||||
| d=deleteStatement { $statement = make_lw_shared<original_ret_type>(std::move(d)); }
|
||||
;
|
||||
|
||||
dropAggregateStatement returns [std::unique_ptr<cql3::statements::drop_aggregate_statement> expr]
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
#include <seastar/core/on_internal_error.hh>
|
||||
#include <stdexcept>
|
||||
#include "alter_keyspace_statement.hh"
|
||||
#include "locator/tablets.hh"
|
||||
#include "prepared_statement.hh"
|
||||
#include "service/migration_manager.hh"
|
||||
#include "service/storage_proxy.hh"
|
||||
@@ -25,6 +26,7 @@
|
||||
#include "create_keyspace_statement.hh"
|
||||
#include "gms/feature_service.hh"
|
||||
#include "replica/database.hh"
|
||||
#include "db/config.hh"
|
||||
|
||||
static logging::logger mylogger("alter_keyspace");
|
||||
|
||||
@@ -193,9 +195,9 @@ cql3::statements::alter_keyspace_statement::prepare_schema_mutations(query_proce
|
||||
event::schema_change::target_type target_type = event::schema_change::target_type::KEYSPACE;
|
||||
auto ks = qp.db().find_keyspace(_name);
|
||||
auto ks_md = ks.metadata();
|
||||
const auto& tm = *qp.proxy().get_token_metadata_ptr();
|
||||
const auto tmptr = qp.proxy().get_token_metadata_ptr();
|
||||
const auto& feat = qp.proxy().features();
|
||||
auto ks_md_update = _attrs->as_ks_metadata_update(ks_md, tm, feat);
|
||||
auto ks_md_update = _attrs->as_ks_metadata_update(ks_md, *tmptr, feat);
|
||||
std::vector<mutation> muts;
|
||||
std::vector<sstring> warnings;
|
||||
bool include_tablet_options = _attrs->get_map(_attrs->KW_TABLETS).has_value();
|
||||
@@ -246,6 +248,36 @@ cql3::statements::alter_keyspace_statement::prepare_schema_mutations(query_proce
|
||||
muts.insert(muts.begin(), schema_mutations.begin(), schema_mutations.end());
|
||||
}
|
||||
|
||||
// If `rf_rack_valid_keyspaces` is enabled, it's forbidden to perform a schema change that
|
||||
// would lead to an RF-rack-valid keyspace. Verify that this change does not.
|
||||
// For more context, see: scylladb/scylladb#23071.
|
||||
if (qp.db().get_config().rf_rack_valid_keyspaces()) {
|
||||
auto rs = locator::abstract_replication_strategy::create_replication_strategy(
|
||||
ks_md_update->strategy_name(),
|
||||
locator::replication_strategy_params(ks_md_update->strategy_options(), ks_md_update->initial_tablets()));
|
||||
|
||||
try {
|
||||
// There are two things to note here:
|
||||
// 1. We hold a group0_guard, so it's correct to check this here.
|
||||
// The topology or schema cannot change while we're performing this query.
|
||||
// 2. The replication strategy we use here does NOT represent the actual state
|
||||
// we will arrive at after applying the schema change. For instance, if the user
|
||||
// did not specify the RF for some of the DCs, it's equal to 0 in the replication
|
||||
// strategy we pass to this function, while in reality that means that the RF
|
||||
// will NOT change. That is not a problem:
|
||||
// - RF=0 is valid for all DCs, so it won't trigger an exception on its own,
|
||||
// - the keyspace must've been RF-rack-valid before this change. We check that
|
||||
// condition for all keyspaces at startup.
|
||||
// The second hyphen is not really true because currently topological changes can
|
||||
// disturb it (see scylladb/scylladb#23345), but we ignore that.
|
||||
locator::assert_rf_rack_valid_keyspace(_name, tmptr, *rs);
|
||||
} catch (const std::exception& e) {
|
||||
// There's no guarantee what the type of the exception will be, so we need to
|
||||
// wrap it manually here in a type that can be passed to the user.
|
||||
throw exceptions::invalid_request_exception(e.what());
|
||||
}
|
||||
}
|
||||
|
||||
auto ret = ::make_shared<event::schema_change>(
|
||||
event::schema_change::change_type::UPDATED,
|
||||
target_type,
|
||||
|
||||
@@ -87,6 +87,9 @@ std::vector<::shared_ptr<index_target>> create_index_statement::validate_while_e
|
||||
"Secondary indexes are not supported on COMPACT STORAGE tables that have clustering columns");
|
||||
}
|
||||
|
||||
if (!db.features().views_with_tablets && db.find_keyspace(keyspace()).get_replication_strategy().uses_tablets()) {
|
||||
throw exceptions::invalid_request_exception(format("Secondary indexes are not supported on base tables with tablets (keyspace '{}')", keyspace()));
|
||||
}
|
||||
validate_for_local_index(*schema);
|
||||
|
||||
std::vector<::shared_ptr<index_target>> targets;
|
||||
|
||||
@@ -11,6 +11,8 @@
|
||||
#include <seastar/core/coroutine.hh>
|
||||
#include "cql3/statements/create_keyspace_statement.hh"
|
||||
#include "cql3/statements/ks_prop_defs.hh"
|
||||
#include "exceptions/exceptions.hh"
|
||||
#include "locator/tablets.hh"
|
||||
#include "prepared_statement.hh"
|
||||
#include "data_dictionary/data_dictionary.hh"
|
||||
#include "data_dictionary/keyspace_metadata.hh"
|
||||
@@ -90,14 +92,14 @@ void create_keyspace_statement::validate(query_processor& qp, const service::cli
|
||||
|
||||
future<std::tuple<::shared_ptr<cql_transport::event::schema_change>, std::vector<mutation>, cql3::cql_warnings_vec>> create_keyspace_statement::prepare_schema_mutations(query_processor& qp, const query_options&, api::timestamp_type ts) const {
|
||||
using namespace cql_transport;
|
||||
const auto& tm = *qp.proxy().get_token_metadata_ptr();
|
||||
const auto tmptr = qp.proxy().get_token_metadata_ptr();
|
||||
const auto& feat = qp.proxy().features();
|
||||
const auto& cfg = qp.db().get_config();
|
||||
std::vector<mutation> m;
|
||||
std::vector<sstring> warnings;
|
||||
|
||||
try {
|
||||
auto ksm = _attrs->as_ks_metadata(_name, tm, feat, cfg);
|
||||
auto ksm = _attrs->as_ks_metadata(_name, *tmptr, feat, cfg);
|
||||
m = service::prepare_new_keyspace_announcement(qp.db().real_database(), ksm, ts);
|
||||
// If the new keyspace uses tablets, as long as there are features
|
||||
// which aren't supported by tablets we want to warn the user that
|
||||
@@ -116,6 +118,21 @@ future<std::tuple<::shared_ptr<cql_transport::event::schema_change>, std::vector
|
||||
"without tablets by adding AND TABLETS = {'enabled': false} "
|
||||
"to the CREATE KEYSPACE statement.");
|
||||
}
|
||||
|
||||
// If `rf_rack_valid_keyspaces` is enabled, it's forbidden to create an RF-rack-invalid keyspace.
|
||||
// Verify that it's RF-rack-valid.
|
||||
// For more context, see: scylladb/scylladb#23071.
|
||||
if (cfg.rf_rack_valid_keyspaces()) {
|
||||
try {
|
||||
// We hold a group0_guard, so it's correct to check this here.
|
||||
// The topology or schema cannot change while we're performing this query.
|
||||
locator::assert_rf_rack_valid_keyspace(_name, tmptr, *rs);
|
||||
} catch (const std::exception& e) {
|
||||
// There's no guarantee what the type of the exception will be, so we need to
|
||||
// wrap it manually here in a type that can be passed to the user.
|
||||
throw exceptions::invalid_request_exception(e.what());
|
||||
}
|
||||
}
|
||||
} catch (const exceptions::already_exists_exception& e) {
|
||||
if (!_if_not_exists) {
|
||||
co_return coroutine::exception(std::current_exception());
|
||||
|
||||
@@ -140,6 +140,9 @@ std::pair<view_ptr, cql3::cql_warnings_vec> create_view_statement::prepare_view(
|
||||
|
||||
schema_ptr schema = validation::validate_column_family(db, _base_name.get_keyspace(), _base_name.get_column_family());
|
||||
|
||||
if (!db.features().views_with_tablets && db.find_keyspace(keyspace()).get_replication_strategy().uses_tablets()) {
|
||||
throw exceptions::invalid_request_exception(format("Materialized views are not supported on base tables with tablets"));
|
||||
}
|
||||
if (schema->is_counter()) {
|
||||
throw exceptions::invalid_request_exception(format("Materialized views are not supported on counter tables"));
|
||||
}
|
||||
|
||||
@@ -536,6 +536,9 @@ db::config::config(std::shared_ptr<db::extensions> exts)
|
||||
"The directory where the schema commit log is stored. This is a special commitlog instance used for schema and system tables. For optimal write performance, it is recommended the commit log be on a separate disk partition (ideally, a separate physical device) from the data file directories.")
|
||||
, data_file_directories(this, "data_file_directories", "datadir", value_status::Used, { },
|
||||
"The directory location where table data (SSTables) is stored.")
|
||||
, data_file_capacity(this, "data_file_capacity", liveness::LiveUpdate, value_status::Used, 0,
|
||||
"Total capacity in bytes for storing data files. Used by tablet load balancer to compute storage utilization."
|
||||
" If not set, will use file system's capacity.")
|
||||
, hints_directory(this, "hints_directory", value_status::Used, "",
|
||||
"The directory where hints files are stored if hinted handoff is enabled.")
|
||||
, view_hints_directory(this, "view_hints_directory", value_status::Used, "",
|
||||
@@ -1201,7 +1204,7 @@ db::config::config(std::shared_ptr<db::extensions> exts)
|
||||
"Start serializing reads after their collective memory consumption goes above $normal_limit * $multiplier.")
|
||||
, reader_concurrency_semaphore_kill_limit_multiplier(this, "reader_concurrency_semaphore_kill_limit_multiplier", liveness::LiveUpdate, value_status::Used, 4,
|
||||
"Start killing reads after their collective memory consumption goes above $normal_limit * $multiplier.")
|
||||
, reader_concurrency_semaphore_cpu_concurrency(this, "reader_concurrency_semaphore_cpu_concurrency", liveness::LiveUpdate, value_status::Used, 1,
|
||||
, reader_concurrency_semaphore_cpu_concurrency(this, "reader_concurrency_semaphore_cpu_concurrency", liveness::LiveUpdate, value_status::Used, 2,
|
||||
"Admit new reads while there are less than this number of requests that need CPU.")
|
||||
, view_update_reader_concurrency_semaphore_serialize_limit_multiplier(this, "view_update_reader_concurrency_semaphore_serialize_limit_multiplier", liveness::LiveUpdate, value_status::Used, 2,
|
||||
"Start serializing view update reads after their collective memory consumption goes above $normal_limit * $multiplier.")
|
||||
@@ -1364,6 +1367,9 @@ db::config::config(std::shared_ptr<db::extensions> exts)
|
||||
, disk_space_monitor_high_polling_interval_in_seconds(this, "disk_space_monitor_high_polling_interval_in_seconds", value_status::Used, 1, "Disk-space polling interval at or above polling threshold")
|
||||
, disk_space_monitor_polling_interval_threshold(this, "disk_space_monitor_polling_interval_threshold", value_status::Used, 0.9, "Disk-space polling threshold. Polling interval is increased when disk utilization is greater than or equal to this threshold")
|
||||
, enable_create_table_with_compact_storage(this, "enable_create_table_with_compact_storage", liveness::LiveUpdate, value_status::Used, false, "Enable the deprecated feature of CREATE TABLE WITH COMPACT STORAGE. This feature will eventually be removed in a future version.")
|
||||
, rf_rack_valid_keyspaces(this, "rf_rack_valid_keyspaces", liveness::MustRestart, value_status::Used, false,
|
||||
"Enforce RF-rack-valid keyspaces. Additionally, if there are existing RF-rack-invalid "
|
||||
"keyspaces, attempting to start a node with this option ON will fail.")
|
||||
, default_log_level(this, "default_log_level", value_status::Used, seastar::log_level::info, "Default log level for log messages")
|
||||
, logger_log_level(this, "logger_log_level", value_status::Used, {}, "Map of logger name to log level. Valid log levels are 'error', 'warn', 'info', 'debug' and 'trace'")
|
||||
, log_to_stdout(this, "log_to_stdout", value_status::Used, true, "Send log output to stdout")
|
||||
|
||||
@@ -183,6 +183,7 @@ public:
|
||||
named_value<sstring> commitlog_directory;
|
||||
named_value<sstring> schema_commitlog_directory;
|
||||
named_value<string_list> data_file_directories;
|
||||
named_value<uint64_t> data_file_capacity;
|
||||
named_value<sstring> hints_directory;
|
||||
named_value<sstring> view_hints_directory;
|
||||
named_value<sstring> saved_caches_directory;
|
||||
@@ -535,6 +536,8 @@ public:
|
||||
|
||||
named_value<bool> enable_create_table_with_compact_storage;
|
||||
|
||||
named_value<bool> rf_rack_valid_keyspaces;
|
||||
|
||||
static const sstring default_tls_priority;
|
||||
private:
|
||||
template<typename T>
|
||||
|
||||
@@ -146,6 +146,10 @@ future<> hint_endpoint_manager::stop(drain should_drain) noexcept {
|
||||
});
|
||||
}
|
||||
|
||||
void hint_endpoint_manager::cancel_draining() noexcept {
|
||||
_sender.cancel_draining();
|
||||
}
|
||||
|
||||
hint_endpoint_manager::hint_endpoint_manager(const endpoint_id& key, fs::path hint_directory, manager& shard_manager)
|
||||
: _key(key)
|
||||
, _shard_manager(shard_manager)
|
||||
|
||||
@@ -102,6 +102,8 @@ public:
|
||||
/// \return Ready future when all operations are complete
|
||||
future<> stop(drain should_drain = drain::no) noexcept;
|
||||
|
||||
void cancel_draining() noexcept;
|
||||
|
||||
/// \brief Start the timer.
|
||||
void start();
|
||||
|
||||
@@ -144,6 +146,10 @@ public:
|
||||
return _state.contains(state::stopped);
|
||||
}
|
||||
|
||||
bool canceled_draining() const noexcept {
|
||||
return _sender.canceled_draining();
|
||||
}
|
||||
|
||||
/// \brief Returns replay position of the most recently written hint.
|
||||
///
|
||||
/// If there weren't any hints written during this endpoint manager's lifetime, a zero replay_position is returned.
|
||||
|
||||
@@ -10,6 +10,7 @@
|
||||
#include "db/hints/internal/hint_sender.hh"
|
||||
|
||||
// Seastar features.
|
||||
#include <chrono>
|
||||
#include <exception>
|
||||
#include <seastar/core/abort_source.hh>
|
||||
#include <seastar/core/coroutine.hh>
|
||||
@@ -192,6 +193,14 @@ future<> hint_sender::stop(drain should_drain) noexcept {
|
||||
});
|
||||
}
|
||||
|
||||
void hint_sender::cancel_draining() {
|
||||
manager_logger.info("Draining of {} has been marked as canceled", _ep_key);
|
||||
if (_state.contains(state::draining)) {
|
||||
_state.remove(state::draining);
|
||||
}
|
||||
_state.set(state::canceled_draining);
|
||||
}
|
||||
|
||||
void hint_sender::add_segment(sstring seg_name) {
|
||||
_segments_to_replay.emplace_back(std::move(seg_name));
|
||||
}
|
||||
@@ -449,6 +458,8 @@ bool hint_sender::send_one_file(const sstring& fname) {
|
||||
gc_clock::duration secs_since_file_mod = std::chrono::seconds(last_mod.tv_sec);
|
||||
lw_shared_ptr<send_one_file_ctx> ctx_ptr = make_lw_shared<send_one_file_ctx>(_last_schema_ver_to_column_mapping);
|
||||
|
||||
struct canceled_draining_exception {};
|
||||
|
||||
try {
|
||||
commitlog::read_log_file(fname, manager::FILENAME_PREFIX, [this, secs_since_file_mod, &fname, ctx_ptr] (commitlog::buffer_and_replay_position buf_rp) -> future<> {
|
||||
auto& buf = buf_rp.buffer;
|
||||
@@ -461,6 +472,12 @@ bool hint_sender::send_one_file(const sstring& fname) {
|
||||
co_return;
|
||||
}
|
||||
|
||||
if (canceled_draining()) {
|
||||
manager_logger.debug("[{}] Exiting reading from commitlog because of canceled draining", _ep_key);
|
||||
// We need to throw an exception here to cancel reading the segment.
|
||||
throw canceled_draining_exception{};
|
||||
}
|
||||
|
||||
// Break early if stop() was called or the destination node went down.
|
||||
if (!can_send()) {
|
||||
ctx_ptr->segment_replay_failed = true;
|
||||
@@ -491,6 +508,8 @@ bool hint_sender::send_one_file(const sstring& fname) {
|
||||
manager_logger.error("{}: {}. Dropping...", fname, ex.what());
|
||||
ctx_ptr->segment_replay_failed = false;
|
||||
++this->shard_stats().corrupted_files;
|
||||
} catch (const canceled_draining_exception&) {
|
||||
manager_logger.debug("[{}] Loop in send_one_file finishes due to canceled draining", _ep_key);
|
||||
} catch (...) {
|
||||
manager_logger.trace("sending of {} failed: {}", fname, std::current_exception());
|
||||
ctx_ptr->segment_replay_failed = true;
|
||||
@@ -499,6 +518,12 @@ bool hint_sender::send_one_file(const sstring& fname) {
|
||||
// wait till all background hints sending is complete
|
||||
ctx_ptr->file_send_gate.close().get();
|
||||
|
||||
// If draining was canceled, we can't say anything about the segment's state,
|
||||
// so return immediately. We return false here because of that reason too.
|
||||
if (canceled_draining()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// If we are draining ignore failures and drop the segment even if we failed to send it.
|
||||
if (draining() && ctx_ptr->segment_replay_failed) {
|
||||
manager_logger.trace("send_one_file(): we are draining so we are going to delete the segment anyway");
|
||||
@@ -556,6 +581,10 @@ void hint_sender::send_hints_maybe() noexcept {
|
||||
|
||||
try {
|
||||
while (true) {
|
||||
if (canceled_draining()) {
|
||||
manager_logger.debug("[{}] Exiting loop in send_hints_maybe because of canceled draining", _ep_key);
|
||||
break;
|
||||
}
|
||||
const sstring* seg_name = name_of_current_segment();
|
||||
if (!seg_name || !replay_allowed() || !can_send()) {
|
||||
break;
|
||||
|
||||
@@ -66,12 +66,14 @@ class hint_sender {
|
||||
stopping, // stop() was called
|
||||
ep_state_left_the_ring, // destination Node is not a part of the ring anymore - usually means that it has been decommissioned
|
||||
draining, // try to send everything out and ignore errors
|
||||
canceled_draining, // draining was started, but it got canceled
|
||||
};
|
||||
|
||||
using state_set = enum_set<super_enum<state,
|
||||
state::stopping,
|
||||
state::ep_state_left_the_ring,
|
||||
state::draining>>;
|
||||
state::draining,
|
||||
state::canceled_draining>>;
|
||||
|
||||
struct send_one_file_ctx {
|
||||
send_one_file_ctx(std::unordered_map<table_schema_version, column_mapping>& last_schema_ver_to_column_mapping)
|
||||
@@ -140,6 +142,12 @@ public:
|
||||
/// \param should_drain if is drain::yes - drain all pending hints
|
||||
future<> stop(drain should_drain) noexcept;
|
||||
|
||||
void cancel_draining();
|
||||
|
||||
bool canceled_draining() const noexcept {
|
||||
return _state.contains(state::canceled_draining);
|
||||
}
|
||||
|
||||
/// \brief Add a new segment ready for sending.
|
||||
void add_segment(sstring seg_name);
|
||||
|
||||
|
||||
@@ -220,11 +220,24 @@ future<> manager::stop() {
|
||||
|
||||
set_stopping();
|
||||
|
||||
return _migrating_done.finally([this] {
|
||||
const auto& node = *_proxy.get_token_metadata_ptr()->get_topology().this_node();
|
||||
const bool leaving = node.is_leaving() || node.left();
|
||||
|
||||
return _migrating_done.finally([this, leaving] {
|
||||
// We want to stop the manager as soon as possible if it's not leaving the cluster.
|
||||
// Because of that, we need to cancel all ongoing drains (since that can take quite a bit of time),
|
||||
// but we also need to ensure that no new drains will be started in the meantime.
|
||||
if (!leaving) {
|
||||
for (auto& [_, ep_man] : _ep_managers) {
|
||||
ep_man.cancel_draining();
|
||||
}
|
||||
}
|
||||
return _draining_eps_gate.close();
|
||||
// At this point, all endpoint managers that were being previously drained have been deleted from the map.
|
||||
// In other words, the next lambda is safe to run, i.e. we won't call `hint_endpoint_manager::stop()` twice.
|
||||
}).finally([this] {
|
||||
return parallel_for_each(_ep_managers | std::views::values, [] (hint_endpoint_manager& ep_man) {
|
||||
return ep_man.stop();
|
||||
return ep_man.stop(drain::no);
|
||||
}).finally([this] {
|
||||
_ep_managers.clear();
|
||||
_hint_directory_manager.clear();
|
||||
@@ -667,7 +680,7 @@ future<> manager::drain_for(endpoint_id host_id, gms::inet_address ip) noexcept
|
||||
co_return;
|
||||
}
|
||||
|
||||
manager_logger.trace("on_leave_cluster: {} is removed/decommissioned", host_id);
|
||||
manager_logger.trace("Draining starts for {}", host_id);
|
||||
|
||||
const auto holder = seastar::gate::holder{_draining_eps_gate};
|
||||
// As long as we hold on to this lock, no migration of hinted handoff to host IDs
|
||||
@@ -677,9 +690,24 @@ future<> manager::drain_for(endpoint_id host_id, gms::inet_address ip) noexcept
|
||||
|
||||
// After an endpoint has been drained, we remove its directory with all of its contents.
|
||||
auto drain_ep_manager = [] (hint_endpoint_manager& ep_man) -> future<> {
|
||||
return ep_man.stop(drain::yes).finally([&] {
|
||||
return ep_man.with_file_update_mutex([&ep_man] {
|
||||
return remove_file(ep_man.hints_dir().native());
|
||||
// Prevent a drain if the endpoint manager was marked to cancel it.
|
||||
if (ep_man.canceled_draining()) {
|
||||
return make_ready_future();
|
||||
}
|
||||
return ep_man.stop(drain::yes).finally([&ep_man] {
|
||||
// If draining was canceled, we can't remove the hint directory yet
|
||||
// because there might still be some hints that we should send.
|
||||
// We'll do that when the node starts again.
|
||||
// Note that canceling draining can ONLY occur when the node is simply stopping.
|
||||
// That cannot happen when decommissioning the node.
|
||||
if (ep_man.canceled_draining()) {
|
||||
return make_ready_future();
|
||||
}
|
||||
|
||||
return ep_man.with_file_update_mutex([&ep_man] -> future<> {
|
||||
return remove_file(ep_man.hints_dir().native()).then([&ep_man] {
|
||||
manager_logger.debug("Removed hint directory for {}", ep_man.end_point_key());
|
||||
});
|
||||
});
|
||||
});
|
||||
};
|
||||
@@ -986,4 +1014,18 @@ future<> manager::perform_migration() {
|
||||
manager_logger.info("Migration of hinted handoff to host ID has finished successfully");
|
||||
}
|
||||
|
||||
// Technical note: This function obviously doesn't need to be a coroutine. However, it's better to impose
|
||||
// this constraint early on with possible future refactors in mind. It should be easier
|
||||
// to modify the function this way.
|
||||
future<> manager::drain_left_nodes() {
|
||||
for (const auto& [host_id, ep_man] : _ep_managers) {
|
||||
if (!_proxy.get_token_metadata_ptr()->is_normal_token_owner(host_id)) {
|
||||
// It's safe to discard this future. It's awaited in `manager::stop()`.
|
||||
(void) drain_for(host_id, {});
|
||||
}
|
||||
}
|
||||
|
||||
co_return;
|
||||
}
|
||||
|
||||
} // namespace db::hints
|
||||
|
||||
@@ -382,6 +382,12 @@ private:
|
||||
/// ALL requested sync points will be canceled, i.e. an exception will be issued
|
||||
/// in the corresponding futures.
|
||||
future<> perform_migration();
|
||||
|
||||
public:
|
||||
/// Performs draining for all nodes that have already left the cluster.
|
||||
/// This should only be called when the hint endpoint managers have been initialized
|
||||
/// and the hint manager has started.
|
||||
future<> drain_left_nodes();
|
||||
};
|
||||
|
||||
} // namespace db::hints
|
||||
|
||||
@@ -239,6 +239,15 @@ future<> resource_manager::stop() noexcept {
|
||||
});
|
||||
}
|
||||
|
||||
future<> resource_manager::drain_hints_for_left_nodes() {
|
||||
for (manager& m : _shard_managers) {
|
||||
// It's safe to discard the future here. It's awaited in `manager::stop()`.
|
||||
(void) m.drain_left_nodes();
|
||||
}
|
||||
|
||||
co_return;
|
||||
}
|
||||
|
||||
future<> resource_manager::register_manager(manager& m) {
|
||||
return with_semaphore(_operation_lock, 1, [this, &m] () {
|
||||
return with_semaphore(_space_watchdog.update_lock(), 1, [this, &m] {
|
||||
|
||||
@@ -188,6 +188,8 @@ public:
|
||||
/// \brief Allows replaying hints for managers which are registered now or will be in the future.
|
||||
void allow_replaying() noexcept;
|
||||
|
||||
future<> drain_hints_for_left_nodes();
|
||||
|
||||
/// \brief Registers the hints::manager in resource_manager, and starts it, if resource_manager is already running.
|
||||
///
|
||||
/// The hints::managers can be added either before or after resource_manager starts.
|
||||
|
||||
@@ -23,6 +23,7 @@
|
||||
#include "gms/feature_service.hh"
|
||||
#include "system_keyspace_view_types.hh"
|
||||
#include "schema/schema_builder.hh"
|
||||
#include "timestamp.hh"
|
||||
#include "utils/assert.hh"
|
||||
#include "utils/hashers.hh"
|
||||
#include "utils/log.hh"
|
||||
@@ -2931,9 +2932,8 @@ future<std::optional<mutation>> system_keyspace::get_service_levels_version_muta
|
||||
return get_scylla_local_mutation(_db, SERVICE_LEVELS_VERSION_KEY);
|
||||
}
|
||||
|
||||
future<mutation> system_keyspace::make_service_levels_version_mutation(int8_t version, const service::group0_guard& guard) {
|
||||
future<mutation> system_keyspace::make_service_levels_version_mutation(int8_t version, api::timestamp_type timestamp) {
|
||||
static sstring query = format("INSERT INTO {}.{} (key, value) VALUES (?, ?);", db::system_keyspace::NAME, db::system_keyspace::SCYLLA_LOCAL);
|
||||
auto timestamp = guard.write_timestamp();
|
||||
auto muts = co_await _qp.get_mutations_internal(query, internal_system_query_state(), timestamp, {SERVICE_LEVELS_VERSION_KEY, format("{}", version)});
|
||||
|
||||
if (muts.size() != 1) {
|
||||
|
||||
@@ -654,7 +654,7 @@ public:
|
||||
public:
|
||||
future<std::optional<int8_t>> get_service_levels_version();
|
||||
|
||||
future<mutation> make_service_levels_version_mutation(int8_t version, const service::group0_guard& guard);
|
||||
future<mutation> make_service_levels_version_mutation(int8_t version, api::timestamp_type timestamp);
|
||||
future<std::optional<mutation>> get_service_levels_version_mutation();
|
||||
|
||||
// Publishes a new compression dictionary to `dicts`,
|
||||
|
||||
127
db/view/regular_column_transformation.hh
Normal file
127
db/view/regular_column_transformation.hh
Normal file
@@ -0,0 +1,127 @@
|
||||
/*
|
||||
* Copyright (C) 2024-present ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "column_computation.hh"
|
||||
#include "mutation/atomic_cell.hh"
|
||||
#include "timestamp.hh"
|
||||
#include <type_traits>
|
||||
|
||||
class row_marker;
|
||||
|
||||
// In a basic column_computation defined in column_computation.hh, the
|
||||
// compute_value() method is only based on the partition key, and it must
|
||||
// return a value. That API has very limited applications - basically the
|
||||
// only thing we can implement with it is token_column_computation which
|
||||
// we used to create the token column in secondary indexes.
|
||||
// The regular_column_transformation base class here is more powerful, but
|
||||
// still is not a completely general computation: Its compute_value() virtual
|
||||
// method can transform the value read from a single cell of a regular column
|
||||
// into a new cell stored in a structure regular_column_transformation::result.
|
||||
//
|
||||
// In more details, the assumptions of regular_column_transformation is:
|
||||
// 1. compute_value() computes the value based on a *single* column in a
|
||||
// row passed to compute_value().
|
||||
// This assumption means that the value or deletion of the value always
|
||||
// has a single known timestamp (and the value can't be half-missing)
|
||||
// and single TTL information. That would not have been possible if we
|
||||
// allowed the computation to depend on multiple columns.
|
||||
// 2. compute_value() computes the value based on a *regular* column in the
|
||||
// base table. This means that an update can modify this value (unlike a
|
||||
// base-table key column that can't change in an update), so the view
|
||||
// update code needs to compute the value before and after the update,
|
||||
// and potentially delete and create view rows.
|
||||
// 3. compute_value() returns a column_computation::result which includes
|
||||
// a value and its liveness information (timestamp and ttl/expiry) or
|
||||
// is missing a value.
|
||||
|
||||
class regular_column_transformation : public column_computation {
|
||||
public:
|
||||
struct result {
|
||||
// We can use "bytes" instead of "managed_bytes" here because we know
|
||||
// that a column_computation is only used for generating a key value,
|
||||
// and that is limited to 64K. This limitation is enforced below -
|
||||
// we never linearize a cell's value if its size is more than 64K.
|
||||
std::optional<bytes> _value;
|
||||
|
||||
// _ttl and _expiry are only defined if _value is set.
|
||||
// The default values below are used when the source cell does not
|
||||
// expire, and are the same values that row_marker uses for a non-
|
||||
// expiring marker. This is useful when creating a row_marker from
|
||||
// get_ttl() and get_expiry().
|
||||
gc_clock::duration _ttl { 0 };
|
||||
gc_clock::time_point _expiry { gc_clock::duration(0) };
|
||||
|
||||
// _ts may be set even if _value is missing, which can remember the
|
||||
// timestamp of a tombstone. Note that the current view-update code
|
||||
// that uses this class doesn't use _ts when _value is missing.
|
||||
api::timestamp_type _ts = api::missing_timestamp;
|
||||
|
||||
api::timestamp_type get_ts() const {
|
||||
return _ts;
|
||||
}
|
||||
|
||||
bool has_value() const {
|
||||
return _value.has_value();
|
||||
}
|
||||
|
||||
// Should only be called if has_value() is true:
|
||||
const bytes& get_value() const {
|
||||
return *_value;
|
||||
}
|
||||
gc_clock::duration get_ttl() const {
|
||||
return _ttl;
|
||||
}
|
||||
gc_clock::time_point get_expiry() const {
|
||||
return _expiry;
|
||||
}
|
||||
|
||||
// A missing computation result
|
||||
result() { }
|
||||
|
||||
// Construct a computation result by copying a given atomic_cell -
|
||||
// including its value, timestamp, and ttl - or deletion timestamp.
|
||||
// The second parameter is an optional transformation function f -
|
||||
// taking a bytes and returning an optional<bytes> - that transforms
|
||||
// the value of the cell but keeps its other liveness information.
|
||||
// If f returns a nullopt, it causes the view row should be deleted.
|
||||
template<typename Func=std::identity>
|
||||
requires std::invocable<Func, bytes> && std::convertible_to<std::invoke_result_t<Func, bytes>, std::optional<bytes>>
|
||||
result(atomic_cell_view cell, Func f = {}) {
|
||||
_ts = cell.timestamp();
|
||||
if (cell.is_live()) {
|
||||
// If the cell is larger than what a key can hold (64KB),
|
||||
// return a missing value. This lets us skip this item during
|
||||
// view building and avoid hanging the view build as described
|
||||
// in #8627. But it doesn't prevent later inserting such a item
|
||||
// to the base table, nor does it implement front-end specific
|
||||
// limits (such as Alternator's 1K or 2K limits - see #10347).
|
||||
// Those stricter limits should be validated in the base-table
|
||||
// write code, not here - deep inside the view update code.
|
||||
// Note also we assume that f() doesn't grow the value further.
|
||||
if (cell.value().size() >= 65536) {
|
||||
return;
|
||||
}
|
||||
_value = f(to_bytes(cell.value()));
|
||||
if (_value) {
|
||||
if (cell.is_live_and_has_ttl()) {
|
||||
_ttl = cell.ttl();
|
||||
_expiry = cell.expiry();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
virtual ~regular_column_transformation() = default;
|
||||
virtual result compute_value(
|
||||
const schema& schema,
|
||||
const partition_key& key,
|
||||
const db::view::clustering_or_static_row& row) const = 0;
|
||||
};
|
||||
425
db/view/view.cc
425
db/view/view.cc
@@ -36,6 +36,7 @@
|
||||
#include "db/view/view_builder.hh"
|
||||
#include "db/view/view_updating_consumer.hh"
|
||||
#include "db/view/view_update_generator.hh"
|
||||
#include "db/view/regular_column_transformation.hh"
|
||||
#include "db/system_keyspace_view_types.hh"
|
||||
#include "db/system_keyspace.hh"
|
||||
#include "db/system_distributed_keyspace.hh"
|
||||
@@ -506,79 +507,6 @@ size_t view_updates::op_count() const {
|
||||
return _op_count;
|
||||
}
|
||||
|
||||
row_marker view_updates::compute_row_marker(const clustering_or_static_row& base_row) const {
|
||||
/*
|
||||
* We need to compute both the timestamp and expiration for view rows.
|
||||
*
|
||||
* Below there are several distinct cases depending on how many new key
|
||||
* columns the view has - i.e., how many of the view's key columns were
|
||||
* regular columns in the base. base_regular_columns_in_view_pk.size():
|
||||
*
|
||||
* Zero new key columns:
|
||||
* The view rows key is composed only from base key columns, and those
|
||||
* cannot be changed in an update, so the view row remains alive as
|
||||
* long as the base row is alive. We need to return the same row
|
||||
* marker as the base for the view - to keep an empty view row alive
|
||||
* for as long as an empty base row exists.
|
||||
* Note that in this case, if there are *unselected* base columns, we
|
||||
* may need to keep an empty view row alive even without a row marker
|
||||
* because the base row (which has additional columns) is still alive.
|
||||
* For that we have the "virtual columns" feature: In the zero new
|
||||
* key columns case, we put unselected columns in the view as empty
|
||||
* columns, to keep the view row alive.
|
||||
*
|
||||
* One new key column:
|
||||
* In this case, there is a regular base column that is part of the
|
||||
* view key. This regular column can be added or deleted in an update,
|
||||
* or its expiration be set, and those can cause the view row -
|
||||
* including its row marker - to need to appear or disappear as well.
|
||||
* So the liveness of cell of this one column determines the liveness
|
||||
* of the view row and the row marker that we return.
|
||||
*
|
||||
* Two or more new key columns:
|
||||
* This case is explicitly NOT supported in CQL - one cannot create a
|
||||
* view with more than one base-regular columns in its key. In general
|
||||
* picking one liveness (timestamp and expiration) is not possible
|
||||
* if there are multiple regular base columns in the view key, as
|
||||
* those can have different liveness.
|
||||
* However, we do allow this case for Alternator - we need to allow
|
||||
* the case of two (but not more) because the DynamoDB API allows
|
||||
* creating a GSI whose two key columns (hash and range key) were
|
||||
* regular columns.
|
||||
* We can support this case in Alternator because it doesn't use
|
||||
* expiration (the "TTL" it does support is different), and doesn't
|
||||
* support user-defined timestamps. But, the two columns can still
|
||||
* have different timestamps - this happens if an update modifies
|
||||
* just one of them. In this case the timestamp of the view update
|
||||
* (and that of the row marker we return) is the later of these two
|
||||
* updated columns.
|
||||
*/
|
||||
const auto& col_ids = base_row.is_clustering_row()
|
||||
? _base_info->base_regular_columns_in_view_pk()
|
||||
: _base_info->base_static_columns_in_view_pk();
|
||||
if (!col_ids.empty()) {
|
||||
auto& def = _base->column_at(base_row.column_kind(), col_ids[0]);
|
||||
// Note: multi-cell columns can't be part of the primary key.
|
||||
auto cell = base_row.cells().cell_at(col_ids[0]).as_atomic_cell(def);
|
||||
auto ts = cell.timestamp();
|
||||
if (col_ids.size() > 1){
|
||||
// As explained above, this case only happens in Alternator,
|
||||
// and we may need to pick a higher ts:
|
||||
auto& second_def = _base->column_at(base_row.column_kind(), col_ids[1]);
|
||||
auto second_cell = base_row.cells().cell_at(col_ids[1]).as_atomic_cell(second_def);
|
||||
auto second_ts = second_cell.timestamp();
|
||||
ts = std::max(ts, second_ts);
|
||||
// Alternator isn't supposed to have TTL or more than two col_ids!
|
||||
if (col_ids.size() != 2 || cell.is_live_and_has_ttl() || second_cell.is_live_and_has_ttl()) [[unlikely]] {
|
||||
utils::on_internal_error(format("Unexpected col_ids length {} or has TTL", col_ids.size()));
|
||||
}
|
||||
}
|
||||
return cell.is_live_and_has_ttl() ? row_marker(ts, cell.ttl(), cell.expiry()) : row_marker(ts);
|
||||
}
|
||||
|
||||
return base_row.marker();
|
||||
}
|
||||
|
||||
namespace {
|
||||
// The following struct is identical to view_key_with_action, except the key
|
||||
// is stored as a managed_bytes_view instead of bytes.
|
||||
@@ -654,8 +582,8 @@ public:
|
||||
return {_update.key()->get_component(_base, base_col->position())};
|
||||
default:
|
||||
if (base_col->kind != _update.column_kind()) {
|
||||
on_internal_error(vlogger, format("Tried to get a {} column from a {} row update, which is impossible",
|
||||
to_sstring(base_col->kind), _update.is_clustering_row() ? "clustering" : "static"));
|
||||
on_internal_error(vlogger, format("Tried to get a {} column {} from a {} row update, which is impossible",
|
||||
to_sstring(base_col->kind), base_col->name_as_text(), _update.is_clustering_row() ? "clustering" : "static"));
|
||||
}
|
||||
auto& c = _update.cells().cell_at(base_col->id);
|
||||
auto value_view = base_col->is_atomic() ? c.as_atomic_cell(cdef).value() : c.as_collection_mutation().data;
|
||||
@@ -676,6 +604,22 @@ private:
|
||||
return handle_collection_column_computation(collection_computation);
|
||||
}
|
||||
|
||||
// TODO: we already calculated this computation in updatable_view_key_cols,
|
||||
// so perhaps we should pass it here and not re-compute it. But this will
|
||||
// mean computed columns will only work for view key columns (currently
|
||||
// we assume that anyway)
|
||||
if (auto* c = dynamic_cast<const regular_column_transformation*>(&computation)) {
|
||||
regular_column_transformation::result after =
|
||||
c->compute_value(_base, _base_key, _update);
|
||||
if (after.has_value()) {
|
||||
return {managed_bytes_view(linearized_values.emplace_back(after.get_value()))};
|
||||
}
|
||||
// We only get to this function when we know the _update row
|
||||
// exists and call it to read its key columns, so we don't expect
|
||||
// to see a missing value for any of those columns
|
||||
on_internal_error(vlogger, fmt::format("unexpected call to handle_computed_column {} missing in update", cdef.name_as_text()));
|
||||
}
|
||||
|
||||
auto computed_value = computation.compute_value(_base, _base_key);
|
||||
return {managed_bytes_view(linearized_values.emplace_back(std::move(computed_value)))};
|
||||
}
|
||||
@@ -727,7 +671,6 @@ view_updates::get_view_rows(const partition_key& base_key, const clustering_or_s
|
||||
if (partition.partition_tombstone() && partition.partition_tombstone() == row_delete_tomb.tomb()) {
|
||||
return;
|
||||
}
|
||||
|
||||
ret.push_back({&partition.clustered_row(*_view, std::move(ckey)), action});
|
||||
};
|
||||
|
||||
@@ -934,13 +877,12 @@ static void add_cells_to_view(const schema& base, const schema& view, column_kin
|
||||
* Creates a view entry corresponding to the provided base row.
|
||||
* This method checks that the base row does match the view filter before applying anything.
|
||||
*/
|
||||
void view_updates::create_entry(data_dictionary::database db, const partition_key& base_key, const clustering_or_static_row& update, gc_clock::time_point now) {
|
||||
void view_updates::create_entry(data_dictionary::database db, const partition_key& base_key, const clustering_or_static_row& update, gc_clock::time_point now, row_marker update_marker) {
|
||||
if (!matches_view_filter(db, *_base, _view_info, base_key, update, now)) {
|
||||
return;
|
||||
}
|
||||
|
||||
auto view_rows = get_view_rows(base_key, update, std::nullopt, {});
|
||||
auto update_marker = compute_row_marker(update);
|
||||
const auto kind = update.column_kind();
|
||||
for (const auto& [r, action]: view_rows) {
|
||||
if (auto rm = std::get_if<row_marker>(&action)) {
|
||||
@@ -958,48 +900,28 @@ void view_updates::create_entry(data_dictionary::database db, const partition_ke
|
||||
* Deletes the view entry corresponding to the provided base row.
|
||||
* This method checks that the base row does match the view filter before bothering.
|
||||
*/
|
||||
void view_updates::delete_old_entry(data_dictionary::database db, const partition_key& base_key, const clustering_or_static_row& existing, const clustering_or_static_row& update, gc_clock::time_point now) {
|
||||
void view_updates::delete_old_entry(data_dictionary::database db, const partition_key& base_key, const clustering_or_static_row& existing, const clustering_or_static_row& update, gc_clock::time_point now, api::timestamp_type deletion_ts) {
|
||||
// Before deleting an old entry, make sure it was matching the view filter
|
||||
// (otherwise there is nothing to delete)
|
||||
if (matches_view_filter(db, *_base, _view_info, base_key, existing, now)) {
|
||||
do_delete_old_entry(base_key, existing, update, now);
|
||||
do_delete_old_entry(base_key, existing, update, now, deletion_ts);
|
||||
}
|
||||
}
|
||||
|
||||
void view_updates::do_delete_old_entry(const partition_key& base_key, const clustering_or_static_row& existing, const clustering_or_static_row& update, gc_clock::time_point now) {
|
||||
void view_updates::do_delete_old_entry(const partition_key& base_key, const clustering_or_static_row& existing, const clustering_or_static_row& update, gc_clock::time_point now, api::timestamp_type deletion_ts) {
|
||||
auto view_rows = get_view_rows(base_key, existing, std::nullopt, update.tomb());
|
||||
const auto kind = existing.column_kind();
|
||||
for (const auto& [r, action] : view_rows) {
|
||||
const auto& col_ids = existing.is_clustering_row()
|
||||
? _base_info->base_regular_columns_in_view_pk()
|
||||
: _base_info->base_static_columns_in_view_pk();
|
||||
if (_view_info.has_computed_column_depending_on_base_non_primary_key()) {
|
||||
if (auto ts_tag = std::get_if<view_key_and_action::shadowable_tombstone_tag>(&action)) {
|
||||
r->apply(ts_tag->into_shadowable_tombstone(now));
|
||||
}
|
||||
} else if (!col_ids.empty()) {
|
||||
// We delete the old row using a shadowable row tombstone, making sure that
|
||||
// the tombstone deletes everything in the row (or it might still show up).
|
||||
// Note: multi-cell columns can't be part of the primary key.
|
||||
auto& def = _base->column_at(kind, col_ids[0]);
|
||||
auto cell = existing.cells().cell_at(col_ids[0]).as_atomic_cell(def);
|
||||
auto ts = cell.timestamp();
|
||||
if (col_ids.size() > 1) {
|
||||
// This is the Alternator-only support for two regular base
|
||||
// columns that become view key columns. See explanation in
|
||||
// view_updates::compute_row_marker().
|
||||
auto& second_def = _base->column_at(kind, col_ids[1]);
|
||||
auto second_cell = existing.cells().cell_at(col_ids[1]).as_atomic_cell(second_def);
|
||||
auto second_ts = second_cell.timestamp();
|
||||
ts = std::max(ts, second_ts);
|
||||
// Alternator isn't supposed to have more than two col_ids!
|
||||
if (col_ids.size() != 2) [[unlikely]] {
|
||||
utils::on_internal_error(format("Unexpected col_ids length {}", col_ids.size()));
|
||||
}
|
||||
}
|
||||
if (cell.is_live()) {
|
||||
r->apply(shadowable_tombstone(ts, now));
|
||||
}
|
||||
if (!col_ids.empty() || _view_info.has_computed_column_depending_on_base_non_primary_key()) {
|
||||
// The view key could have been modified because it contains or
|
||||
// depends on a non-primary-key. The fact that this function was
|
||||
// called instead of update_entry() means the caller knows it
|
||||
// wants to delete the old row (with the given deletion_ts) and
|
||||
// will create a different one. So let's honor this.
|
||||
r->apply(shadowable_tombstone(deletion_ts, now));
|
||||
} else {
|
||||
// "update" caused the base row to have been deleted, and !col_id
|
||||
// means view row is the same - so it needs to be deleted as well
|
||||
@@ -1100,15 +1022,15 @@ bool view_updates::can_skip_view_updates(const clustering_or_static_row& update,
|
||||
* This method checks that the base row (before and after) matches the view filter before
|
||||
* applying anything.
|
||||
*/
|
||||
void view_updates::update_entry(data_dictionary::database db, const partition_key& base_key, const clustering_or_static_row& update, const clustering_or_static_row& existing, gc_clock::time_point now) {
|
||||
void view_updates::update_entry(data_dictionary::database db, const partition_key& base_key, const clustering_or_static_row& update, const clustering_or_static_row& existing, gc_clock::time_point now, row_marker update_marker) {
|
||||
// While we know update and existing correspond to the same view entry,
|
||||
// they may not match the view filter.
|
||||
if (!matches_view_filter(db, *_base, _view_info, base_key, existing, now)) {
|
||||
create_entry(db, base_key, update, now);
|
||||
create_entry(db, base_key, update, now, update_marker);
|
||||
return;
|
||||
}
|
||||
if (!matches_view_filter(db, *_base, _view_info, base_key, update, now)) {
|
||||
do_delete_old_entry(base_key, existing, update, now);
|
||||
do_delete_old_entry(base_key, existing, update, now, update_marker.timestamp());
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -1117,7 +1039,7 @@ void view_updates::update_entry(data_dictionary::database db, const partition_ke
|
||||
}
|
||||
|
||||
auto view_rows = get_view_rows(base_key, update, std::nullopt, {});
|
||||
auto update_marker = compute_row_marker(update);
|
||||
|
||||
const auto kind = update.column_kind();
|
||||
for (const auto& [r, action] : view_rows) {
|
||||
if (auto rm = std::get_if<row_marker>(&action)) {
|
||||
@@ -1133,6 +1055,8 @@ void view_updates::update_entry(data_dictionary::database db, const partition_ke
|
||||
_op_count += view_rows.size();
|
||||
}
|
||||
|
||||
// Note: despite the general-sounding name of this function, it is used
|
||||
// just for the case of collection indexing.
|
||||
void view_updates::update_entry_for_computed_column(
|
||||
const partition_key& base_key,
|
||||
const clustering_or_static_row& update,
|
||||
@@ -1155,30 +1079,72 @@ void view_updates::update_entry_for_computed_column(
|
||||
}
|
||||
}
|
||||
|
||||
// view_updates::generate_update() is the main function for taking an update
|
||||
// to a base table row - consisting of existing and updated versions of row -
|
||||
// and creating from it zero or more updates to a given materialized view.
|
||||
// These view updates may consist of updating an existing view row, deleting
|
||||
// an old view row, and/or creating a new view row.
|
||||
// There are several distinct cases depending on how many of the view's key
|
||||
// columns are "new key columns", i.e., were regular key columns in the base
|
||||
// or are a computed column based on a regular column (these computed columns
|
||||
// are used by, for example, Alternator's GSI):
|
||||
//
|
||||
// Zero new key columns:
|
||||
// The view rows key is composed only from base key columns, and those can't
|
||||
// be changed in an update, so the view row remains alive as long as the
|
||||
// base row is alive. The row marker for the view needs to be set to the
|
||||
// same row marker in the base - to keep an empty view row alive for as long
|
||||
// as an empty base row exists.
|
||||
// Note that in this case, if there are *unselected* base columns, we may
|
||||
// need to keep an empty view row alive even without a row marker because
|
||||
// the base row (which has additional columns) is still alive. For that we
|
||||
// have the "virtual columns" feature: In the zero new key columns case, we
|
||||
// put unselected columns in the view as empty columns, to keep the view
|
||||
// row alive.
|
||||
//
|
||||
// One new key column:
|
||||
// In this case, there is a regular base column that is part of the view
|
||||
// key. This regular column can be added or deleted in an update, or its
|
||||
// expiration be set, and those can cause the view row - including its row
|
||||
// marker - to need to appear or disappear as well. So the liveness of cell
|
||||
// of this one column determines the liveness of the view row and the row
|
||||
// marker that we set for it.
|
||||
//
|
||||
// Two or more new key columns:
|
||||
// This case is explicitly NOT supported in CQL - one cannot create a view
|
||||
// with more than one base-regular columns in its key. In general picking
|
||||
// one liveness (timestamp and expiration) is not possible if there are
|
||||
// multiple regular base columns in the view key, asthose can have different
|
||||
// liveness.
|
||||
// However, we do allow this case for Alternator - we need to allow the case
|
||||
// of two (but not more) because the DynamoDB API allows creating a GSI
|
||||
// whose two key columns (hash and range key) were regular columns. We can
|
||||
// support this case in Alternator because it doesn't use expiration (the
|
||||
// "TTL" it does support is different), and doesn't support user-defined
|
||||
// timestamps. But, the two columns can still have different timestamps -
|
||||
// this happens if an update modifies just one of them. In this case the
|
||||
// timestamp of the view update (and that of the row marker) is the later
|
||||
// of these two updated columns.
|
||||
void view_updates::generate_update(
|
||||
data_dictionary::database db,
|
||||
const partition_key& base_key,
|
||||
const clustering_or_static_row& update,
|
||||
const std::optional<clustering_or_static_row>& existing,
|
||||
gc_clock::time_point now) {
|
||||
|
||||
// Note that the base PK columns in update and existing are the same, since we're intrinsically dealing
|
||||
// with the same base row. So we have to check 3 things:
|
||||
// 1) that the clustering key doesn't have a null, which can happen for compact tables. If that's the case,
|
||||
// there is no corresponding entries.
|
||||
// 2) if there is a column not part of the base PK in the view PK, whether it is changed by the update.
|
||||
// 3) whether the update actually matches the view SELECT filter
|
||||
|
||||
// FIXME: The following if() is old code which may be related to COMPACT
|
||||
// STORAGE. If this is a real case, refer to a test that demonstrates it.
|
||||
// If it's not a real case, remove this if().
|
||||
if (update.is_clustering_row()) {
|
||||
if (!update.key()->is_full(*_base)) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (_view_info.has_computed_column_depending_on_base_non_primary_key()) {
|
||||
return update_entry_for_computed_column(base_key, update, existing, now);
|
||||
}
|
||||
if (!_base_info->has_base_non_pk_columns_in_view_pk) {
|
||||
// If the view key depends on any regular column in the base, the update
|
||||
// may change the view key and may require deleting an old view row and
|
||||
// inserting a new row. The other case, which we'll handle here first,
|
||||
// is easier and require just modifying one view row.
|
||||
if (!_base_info->has_base_non_pk_columns_in_view_pk &&
|
||||
!_view_info.has_computed_column_depending_on_base_non_primary_key()) {
|
||||
if (update.is_static_row()) {
|
||||
// TODO: support static rows in views with pk only including columns from base pk
|
||||
return;
|
||||
@@ -1186,85 +1152,186 @@ void view_updates::generate_update(
|
||||
// The view key is necessarily the same pre and post update.
|
||||
if (existing && existing->is_live(*_base)) {
|
||||
if (update.is_live(*_base)) {
|
||||
update_entry(db, base_key, update, *existing, now);
|
||||
update_entry(db, base_key, update, *existing, now, update.marker());
|
||||
} else {
|
||||
delete_old_entry(db, base_key, *existing, update, now);
|
||||
delete_old_entry(db, base_key, *existing, update, now, api::missing_timestamp);
|
||||
}
|
||||
} else if (update.is_live(*_base)) {
|
||||
create_entry(db, base_key, update, now);
|
||||
create_entry(db, base_key, update, now, update.marker());
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
const auto& col_ids = update.is_clustering_row()
|
||||
? _base_info->base_regular_columns_in_view_pk()
|
||||
: _base_info->base_static_columns_in_view_pk();
|
||||
|
||||
// The view has a non-primary-key column from the base table as its primary key.
|
||||
// That means it's either a regular or static column. If we are currently
|
||||
// processing an update which does not correspond to the column's kind,
|
||||
// just stop here.
|
||||
if (col_ids.empty()) {
|
||||
// Find the view key columns that may be changed by an update.
|
||||
// This case is interesting because a change to the view key means that
|
||||
// we may need to delete an old view row and/or create a new view row.
|
||||
// The columns we look for are view key columns that are neither base key
|
||||
// columns nor computed columns based just on key columns. In other words,
|
||||
// we look here for columns which were regular columns or static columns
|
||||
// in the base table, or computed columns based on regular columns.
|
||||
struct updatable_view_key_col {
|
||||
column_id view_col_id;
|
||||
regular_column_transformation::result before;
|
||||
regular_column_transformation::result after;
|
||||
};
|
||||
std::vector<updatable_view_key_col> updatable_view_key_cols;
|
||||
for (const column_definition& view_col : _view->primary_key_columns()) {
|
||||
if (view_col.is_computed()) {
|
||||
const column_computation& computation = view_col.get_computation();
|
||||
if (computation.depends_on_non_primary_key_column()) {
|
||||
// Column is a computed column that does not depend just on
|
||||
// the base key, so it may change in the update.
|
||||
if (auto* c = dynamic_cast<const regular_column_transformation*>(&computation)) {
|
||||
updatable_view_key_cols.emplace_back(view_col.id,
|
||||
existing ? c->compute_value(*_base, base_key, *existing) : regular_column_transformation::result(),
|
||||
c->compute_value(*_base, base_key, update));
|
||||
} else {
|
||||
// The only other column_computation we have which has
|
||||
// depends_on_non_primary_key_column is
|
||||
// collection_column_computation, and we have a special
|
||||
// function to handle that case:
|
||||
return update_entry_for_computed_column(base_key, update, existing, now);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
const column_definition* base_col = _base->get_column_definition(view_col.name());
|
||||
if (!base_col) {
|
||||
on_internal_error(vlogger, fmt::format("Column {} in view {}.{} was not found in the base table {}.{}",
|
||||
view_col.name(), _view->ks_name(), _view->cf_name(), _base->ks_name(), _base->cf_name()));
|
||||
}
|
||||
// If the view key column was also a base primary key column, then
|
||||
// it can't possibly change in this update. But the column was not
|
||||
// not a primary key column - i.e., a regular column or static
|
||||
// column, the update might have changed it and we need to list it
|
||||
// on updatable_view_key_cols.
|
||||
// We check base_col->kind == update.column_kind() instead of just
|
||||
// !base_col->is_primary_key() because when update is a static row
|
||||
// we know it can't possibly update a regular column (and vice
|
||||
// versa).
|
||||
if (base_col->kind == update.column_kind()) {
|
||||
// This is view key, so we know it is atomic
|
||||
std::optional<atomic_cell_view> after;
|
||||
auto afterp = update.cells().find_cell(base_col->id);
|
||||
if (afterp) {
|
||||
after = afterp->as_atomic_cell(*base_col);
|
||||
}
|
||||
std::optional<atomic_cell_view> before;
|
||||
if (existing) {
|
||||
auto beforep = existing->cells().find_cell(base_col->id);
|
||||
if (beforep) {
|
||||
before = beforep->as_atomic_cell(*base_col);
|
||||
}
|
||||
}
|
||||
updatable_view_key_cols.emplace_back(view_col.id,
|
||||
before ? regular_column_transformation::result(*before) : regular_column_transformation::result(),
|
||||
after ? regular_column_transformation::result(*after) : regular_column_transformation::result());
|
||||
}
|
||||
}
|
||||
}
|
||||
// If we reached here, the view has a non-primary-key column from the base
|
||||
// table as its primary key. That means it's either a regular or static
|
||||
// column. If we are currently processing an update which does not
|
||||
// correspond to the column's kind, updatable_view_key_cols will be empty
|
||||
// and we can just stop here.
|
||||
if (updatable_view_key_cols.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
const auto kind = update.column_kind();
|
||||
|
||||
// If one of the key columns is missing, set has_new_row = false
|
||||
// meaning that after the update there will be no view row.
|
||||
// If one of the key columns is missing in the existing value,
|
||||
// set has_old_row = false meaning we don't have an old row to
|
||||
// delete.
|
||||
// Use updatable_view_key_cols - the before and after values of the
|
||||
// view key columns that may have changed - to determine if the update
|
||||
// changes an existing view row, deletes an old row or creates a new row.
|
||||
bool has_old_row = true;
|
||||
bool has_new_row = true;
|
||||
bool same_row = true;
|
||||
for (auto col_id : col_ids) {
|
||||
auto* after = update.cells().find_cell(col_id);
|
||||
auto& cdef = _base->column_at(kind, col_id);
|
||||
if (existing) {
|
||||
auto* before = existing->cells().find_cell(col_id);
|
||||
// Note that this cell is necessarily atomic, because col_ids are
|
||||
// view key columns, and keys must be atomic.
|
||||
if (before && before->as_atomic_cell(cdef).is_live()) {
|
||||
if (after && after->as_atomic_cell(cdef).is_live()) {
|
||||
// We need to compare just the values of the keys, not
|
||||
// metadata like the timestamp. This is because below,
|
||||
// if the old and new view row have the same key, we need
|
||||
// to be sure to reach the update_entry() case.
|
||||
auto cmp = compare_unsigned(before->as_atomic_cell(cdef).value(), after->as_atomic_cell(cdef).value());
|
||||
if (cmp != 0) {
|
||||
same_row = false;
|
||||
}
|
||||
bool same_row = true; // undefined if either has_old_row or has_new_row are false
|
||||
for (const auto& u : updatable_view_key_cols) {
|
||||
if (u.before.has_value()) {
|
||||
if (u.after.has_value()) {
|
||||
if (compare_unsigned(u.before.get_value(), u.after.get_value()) != 0) {
|
||||
same_row = false;
|
||||
}
|
||||
} else {
|
||||
has_old_row = false;
|
||||
has_new_row = false;
|
||||
}
|
||||
} else {
|
||||
has_old_row = false;
|
||||
}
|
||||
if (!after || !after->as_atomic_cell(cdef).is_live()) {
|
||||
has_new_row = false;
|
||||
if (!u.after.has_value()) {
|
||||
has_new_row = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If has_new_row, calculate a row marker for this view row - i.e., a
|
||||
// timestamp and ttl - based on those of the updatable view key column
|
||||
// (or, in an Alternator-only extension, more than one).
|
||||
row_marker new_row_rm; // only set if has_new_row
|
||||
if (has_new_row) {
|
||||
// Note:
|
||||
// 1. By reaching here we know that updatable_view_key_cols has at
|
||||
// least one member (in CQL, it's always one, in Alternator it
|
||||
// may be two).
|
||||
// 2. Because has_new_row, we know all elements in that array have
|
||||
// after.has_value() true, so we can use after.get_ts() et al.
|
||||
api::timestamp_type new_row_ts = updatable_view_key_cols[0].after.get_ts();
|
||||
// This is the Alternator-only support for *two* regular base columns
|
||||
// that become view key columns. The timestamp we use is the *maximum*
|
||||
// of the two key columns, as explained in pull-request #17172.
|
||||
if (updatable_view_key_cols.size() > 1) {
|
||||
auto second_ts = updatable_view_key_cols[1].after.get_ts();
|
||||
new_row_ts = std::max(new_row_ts, second_ts);
|
||||
// Alternator isn't supposed to have more than two updatable view key columns!
|
||||
if (updatable_view_key_cols.size() != 2) [[unlikely]] {
|
||||
utils::on_internal_error(format("Unexpected updatable_view_key_col length {}", updatable_view_key_cols.size()));
|
||||
}
|
||||
}
|
||||
// We assume that either updatable_view_key_cols has just one column
|
||||
// (the only situation allowed in CQL) or if there is more then one
|
||||
// they have the same expiry information (in Alternator, there is
|
||||
// never a CQL TTL set).
|
||||
new_row_rm = row_marker(new_row_ts, updatable_view_key_cols[0].after.get_ttl(), updatable_view_key_cols[0].after.get_expiry());
|
||||
}
|
||||
|
||||
if (has_old_row) {
|
||||
// As explained in #19977, when there is one updatable_view_key_cols
|
||||
// (the only case allowed in CQL) the deletion timestamp is before's
|
||||
// timestamp. As explained in #17119, if there are two of them (only
|
||||
// possible in Alternator), we take the maximum.
|
||||
// Note:
|
||||
// 1. By reaching here we know that updatable_view_key_cols has at
|
||||
// least one member (in CQL, it's always one, in Alternator it
|
||||
// may be two).
|
||||
// 2. Because has_old_row, we know all elements in that array have
|
||||
// before.has_value() true, so we can use before.get_ts().
|
||||
auto old_row_ts = updatable_view_key_cols[0].before.get_ts();
|
||||
if (updatable_view_key_cols.size() > 1) {
|
||||
// This is the Alternator-only support for two regular base
|
||||
// columns that become view key columns. See explanation in
|
||||
// view_updates::compute_row_marker().
|
||||
auto second_ts = updatable_view_key_cols[1].before.get_ts();
|
||||
old_row_ts = std::max(old_row_ts, second_ts);
|
||||
// Alternator isn't supposed to have more than two updatable view key columns!
|
||||
if (updatable_view_key_cols.size() != 2) [[unlikely]] {
|
||||
utils::on_internal_error(format("Unexpected updatable_view_key_col length {}", updatable_view_key_cols.size()));
|
||||
}
|
||||
}
|
||||
if (has_new_row) {
|
||||
if (same_row) {
|
||||
update_entry(db, base_key, update, *existing, now);
|
||||
update_entry(db, base_key, update, *existing, now, new_row_rm);
|
||||
} else {
|
||||
// This code doesn't work if the old and new view row have the
|
||||
// same key, because if they do we get both data and tombstone
|
||||
// for the same timestamp (now) and the tombstone wins. This
|
||||
// is why we need the "same_row" case above - it's not just a
|
||||
// performance optimization.
|
||||
delete_old_entry(db, base_key, *existing, update, now);
|
||||
create_entry(db, base_key, update, now);
|
||||
// The following code doesn't work if the old and new view row
|
||||
// have the same key, because if they do we can get both data
|
||||
// and tombstone for the same timestamp and the tombstone
|
||||
// wins. This is why we need the "same_row" case above - it's
|
||||
// not just a performance optimization.
|
||||
delete_old_entry(db, base_key, *existing, update, now, old_row_ts);
|
||||
create_entry(db, base_key, update, now, new_row_rm);
|
||||
}
|
||||
} else {
|
||||
delete_old_entry(db, base_key, *existing, update, now);
|
||||
delete_old_entry(db, base_key, *existing, update, now, old_row_ts);
|
||||
}
|
||||
} else if (has_new_row) {
|
||||
create_entry(db, base_key, update, now);
|
||||
create_entry(db, base_key, update, now, new_row_rm);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
bool view_updates::is_partition_key_permutation_of_base_partition_key() const {
|
||||
@@ -2995,6 +3062,12 @@ public:
|
||||
_step.build_status.pop_back();
|
||||
}
|
||||
}
|
||||
|
||||
// before going back to the minimum token, advance current_key to the end
|
||||
// and check for built views in that range.
|
||||
_step.current_key = {_step.prange.end().value_or(dht::ring_position::max()).value().token(), partition_key::make_empty()};
|
||||
check_for_built_views();
|
||||
|
||||
_step.current_key = {dht::minimum_token(), partition_key::make_empty()};
|
||||
for (auto&& vs : _step.build_status) {
|
||||
vs.next_token = dht::minimum_token();
|
||||
|
||||
@@ -240,10 +240,10 @@ private:
|
||||
};
|
||||
std::vector<view_row_entry> get_view_rows(const partition_key& base_key, const clustering_or_static_row& update, const std::optional<clustering_or_static_row>& existing, row_tombstone update_tomb);
|
||||
bool can_skip_view_updates(const clustering_or_static_row& update, const clustering_or_static_row& existing) const;
|
||||
void create_entry(data_dictionary::database db, const partition_key& base_key, const clustering_or_static_row& update, gc_clock::time_point now);
|
||||
void delete_old_entry(data_dictionary::database db, const partition_key& base_key, const clustering_or_static_row& existing, const clustering_or_static_row& update, gc_clock::time_point now);
|
||||
void do_delete_old_entry(const partition_key& base_key, const clustering_or_static_row& existing, const clustering_or_static_row& update, gc_clock::time_point now);
|
||||
void update_entry(data_dictionary::database db, const partition_key& base_key, const clustering_or_static_row& update, const clustering_or_static_row& existing, gc_clock::time_point now);
|
||||
void create_entry(data_dictionary::database db, const partition_key& base_key, const clustering_or_static_row& update, gc_clock::time_point now, row_marker update_marker);
|
||||
void delete_old_entry(data_dictionary::database db, const partition_key& base_key, const clustering_or_static_row& existing, const clustering_or_static_row& update, gc_clock::time_point now, api::timestamp_type deletion_ts);
|
||||
void do_delete_old_entry(const partition_key& base_key, const clustering_or_static_row& existing, const clustering_or_static_row& update, gc_clock::time_point now, api::timestamp_type deletion_ts);
|
||||
void update_entry(data_dictionary::database db, const partition_key& base_key, const clustering_or_static_row& update, const clustering_or_static_row& existing, gc_clock::time_point now, row_marker update_marker);
|
||||
void update_entry_for_computed_column(const partition_key& base_key, const clustering_or_static_row& update, const std::optional<clustering_or_static_row>& existing, gc_clock::time_point now);
|
||||
};
|
||||
|
||||
|
||||
58
dist/debian/control.template
vendored
58
dist/debian/control.template
vendored
@@ -12,15 +12,16 @@ Architecture: any
|
||||
Description: Scylla database main configuration file
|
||||
Scylla is a highly scalable, eventually consistent, distributed,
|
||||
partitioned row DB.
|
||||
Replaces: %{product}-server (<< 1.1)
|
||||
Replaces: %{product}-server (<< 1.1), scylla-enterprise-conf (<< 2025.1.0~)
|
||||
Conflicts: %{product}-server (<< 1.1)
|
||||
Breaks: scylla-enterprise-conf (<< 2025.1.0~)
|
||||
|
||||
Package: %{product}-server
|
||||
Architecture: any
|
||||
Depends: ${misc:Depends}, %{product}-conf (= ${binary:Version}), %{product}-python3 (= ${binary:Version})
|
||||
Replaces: %{product}-tools (<<5.5)
|
||||
Breaks: %{product}-tools (<<5.5)
|
||||
Description: Scylla database server binaries
|
||||
Replaces: %{product}-tools (<<5.5), scylla-enterprise-tools (<< 2024.2.0~), scylla-enterprise-server (<< 2025.1.0~)
|
||||
Breaks: %{product}-tools (<<5.5), scylla-enterprise-tools (<< 2024.2.0~), scylla-enterprise-server (<< 2025.1.0~)
|
||||
Description: Scylla database server binaries
|
||||
Scylla is a highly scalable, eventually consistent, distributed,
|
||||
partitioned row DB.
|
||||
|
||||
@@ -29,6 +30,8 @@ Section: debug
|
||||
Priority: extra
|
||||
Architecture: any
|
||||
Depends: %{product}-server (= ${binary:Version}), ${misc:Depends}
|
||||
Replaces: scylla-enterprise-server-dbg (<< 2025.1.0~)
|
||||
Breaks: scylla-enterprise-server-dbg (<< 2025.1.0~)
|
||||
Description: debugging symbols for %{product}-server
|
||||
Scylla is a highly scalable, eventually consistent, distributed,
|
||||
partitioned row DB.
|
||||
@@ -37,13 +40,17 @@ Description: debugging symbols for %{product}-server
|
||||
Package: %{product}-kernel-conf
|
||||
Architecture: any
|
||||
Depends: procps
|
||||
Replaces: scylla-enterprise-kernel-conf (<< 2025.1.0~)
|
||||
Breaks: scylla-enterprise-kernel-conf (<< 2025.1.0~)
|
||||
Description: Scylla kernel tuning configuration
|
||||
Scylla is a highly scalable, eventually consistent, distributed,
|
||||
partitioned row DB.
|
||||
|
||||
Package: %{product}-node-exporter
|
||||
Architecture: any
|
||||
Replaces: scylla-enterprise-node-exporter (<< 2025.1.0~)
|
||||
Conflicts: prometheus-node-exporter
|
||||
Breaks: scylla-enterprise-node-exporter (<< 2025.1.0~)
|
||||
Description: Prometheus exporter for machine metrics
|
||||
Prometheus exporter for machine metrics, written in Go with pluggable metric collectors.
|
||||
|
||||
@@ -54,6 +61,49 @@ Depends: %{product}-server (= ${binary:Version})
|
||||
, %{product}-kernel-conf (= ${binary:Version})
|
||||
, %{product}-node-exporter (= ${binary:Version})
|
||||
, %{product}-cqlsh (= ${binary:Version})
|
||||
Replaces: scylla-enterprise (<< 2025.1.0~)
|
||||
Breaks: scylla-enterprise (<< 2025.1.0~)
|
||||
Description: Scylla database metapackage
|
||||
Scylla is a highly scalable, eventually consistent, distributed,
|
||||
partitioned row DB.
|
||||
|
||||
Package: scylla-enterprise-conf
|
||||
Depends: %{product}-conf (= ${binary:Version})
|
||||
Architecture: all
|
||||
Priority: optional
|
||||
Section: oldlibs
|
||||
Description: transitional package
|
||||
This is a transitional package. It can safely be removed.
|
||||
|
||||
Package: scylla-enterprise-server
|
||||
Depends: %{product}-server (= ${binary:Version})
|
||||
Architecture: all
|
||||
Priority: optional
|
||||
Section: oldlibs
|
||||
Description: transitional package
|
||||
This is a transitional package. It can safely be removed.
|
||||
|
||||
Package: scylla-enterprise
|
||||
Depends: %{product} (= ${binary:Version})
|
||||
Architecture: all
|
||||
Priority: optional
|
||||
Section: oldlibs
|
||||
Description: transitional package
|
||||
This is a transitional package. It can safely be removed.
|
||||
|
||||
Package: scylla-enterprise-kernel-conf
|
||||
Depends: %{product}-kernel-conf (= ${binary:Version})
|
||||
Architecture: all
|
||||
Priority: optional
|
||||
Section: oldlibs
|
||||
Description: transitional package
|
||||
This is a transitional package. It can safely be removed.
|
||||
|
||||
Package: scylla-enterprise-node-exporter
|
||||
Depends: %{product}-node-exporter (= ${binary:Version})
|
||||
Architecture: all
|
||||
Priority: optional
|
||||
Section: oldlibs
|
||||
Description: transitional package
|
||||
This is a transitional package. It can safely be removed.
|
||||
|
||||
4
dist/debian/debian/rules
vendored
4
dist/debian/debian/rules
vendored
@@ -11,6 +11,8 @@ endif
|
||||
|
||||
product := $(subst -server,,$(DEB_SOURCE))
|
||||
|
||||
libreloc_list := $(shell find scylla/libreloc/ -maxdepth 1 -type f -not -name .*.hmac -and -not -name gnutls.config -printf '-X%f ')
|
||||
libexec_list := $(shell find scylla/libexec/ -maxdepth 1 -type f -not -name scylla -and -not -name iotune -printf '-X%f ')
|
||||
override_dh_auto_configure:
|
||||
|
||||
override_dh_auto_build:
|
||||
@@ -38,7 +40,7 @@ endif
|
||||
override_dh_strip:
|
||||
# The binaries (ethtool...patchelf) don't pass dh_strip after going through patchelf. Since they are
|
||||
# already stripped, nothing is lost if we exclude them, so that's what we do.
|
||||
dh_strip -Xlibprotobuf.so.15 -Xld.so -Xethtool -Xgawk -Xgzip -Xhwloc-calc -Xhwloc-distrib -Xifconfig -Xlscpu -Xnetstat -Xpatchelf --dbg-package=$(product)-server-dbg
|
||||
dh_strip $(libreloc_list) $(libexec_list) --dbg-package=$(product)-server-dbg
|
||||
find $(CURDIR)/debian/$(product)-server-dbg/usr/lib/debug/.build-id/ -name "*.debug" -exec objcopy --decompress-debug-sections {} \;
|
||||
|
||||
override_dh_makeshlibs:
|
||||
|
||||
1
dist/debian/debian/scylla-server.install
vendored
1
dist/debian/debian/scylla-server.install
vendored
@@ -21,6 +21,7 @@ opt/scylladb/scyllatop/*
|
||||
opt/scylladb/scripts/libexec/*
|
||||
opt/scylladb/bin/*
|
||||
opt/scylladb/libreloc/*
|
||||
opt/scylladb/libreloc/.*.hmac
|
||||
opt/scylladb/libexec/*
|
||||
usr/lib/scylla/*
|
||||
var/lib/scylla/data
|
||||
|
||||
14
dist/redhat/scylla.spec
vendored
14
dist/redhat/scylla.spec
vendored
@@ -13,7 +13,8 @@ Requires: %{product}-python3 = %{version}-%{release}
|
||||
Requires: %{product}-kernel-conf = %{version}-%{release}
|
||||
Requires: %{product}-node-exporter = %{version}-%{release}
|
||||
Requires: %{product}-cqlsh = %{version}-%{release}
|
||||
Obsoletes: scylla-server < 1.1
|
||||
Provides: scylla-enterprise = %{version}-%{release}
|
||||
Obsoletes: scylla-enterprise < 2025.1.0
|
||||
|
||||
%global _debugsource_template %{nil}
|
||||
%global _debuginfo_subpackages %{nil}
|
||||
@@ -73,6 +74,10 @@ Requires: %{product}-python3 = %{version}-%{release}
|
||||
AutoReqProv: no
|
||||
Provides: %{product}-tools:%{_bindir}/nodetool
|
||||
Provides: %{product}-tools:%{_sysconfigdir}/bash_completion.d/nodetool-completion
|
||||
Provides: scylla-enterprise-tools:%{_bindir}/nodetool
|
||||
Provides: scylla-enterprise-tools:%{_sysconfigdir}/bash_completion.d/nodetool-completion
|
||||
Provides: scylla-enterprise-server = %{version}-%{release}
|
||||
Obsoletes: scylla-enterprise-server < 2025.1.0
|
||||
|
||||
%description server
|
||||
This package contains ScyllaDB server.
|
||||
@@ -132,6 +137,7 @@ ln -sfT /etc/scylla /var/lib/scylla/conf
|
||||
/opt/scylladb/scyllatop/*
|
||||
/opt/scylladb/bin/*
|
||||
/opt/scylladb/libreloc/*
|
||||
/opt/scylladb/libreloc/.*.hmac
|
||||
/opt/scylladb/libexec/*
|
||||
%{_prefix}/lib/scylla/*
|
||||
%attr(0755,scylla,scylla) %dir %{_sharedstatedir}/scylla/
|
||||
@@ -156,6 +162,8 @@ ln -sfT /etc/scylla /var/lib/scylla/conf
|
||||
Group: Applications/Databases
|
||||
Summary: Scylla configuration package
|
||||
Obsoletes: scylla-server < 1.1
|
||||
Provides: scylla-enterprise-conf = %{version}-%{release}
|
||||
Obsoletes: scylla-enterprise-conf < 2025.1.0
|
||||
|
||||
%description conf
|
||||
This package contains the main scylla configuration file.
|
||||
@@ -176,6 +184,8 @@ Summary: Scylla configuration package for the Linux kernel
|
||||
Requires: kmod
|
||||
# tuned overwrites our sysctl settings
|
||||
Obsoletes: tuned >= 2.11.0
|
||||
Provides: scylla-enterprise-kernel-conf = %{version}-%{release}
|
||||
Obsoletes: scylla-enterprise-kernel-conf < 2025.1.0
|
||||
|
||||
%description kernel-conf
|
||||
This package contains Linux kernel configuration changes for the Scylla database. Install this package
|
||||
@@ -212,6 +222,8 @@ Group: Applications/Databases
|
||||
Summary: Prometheus exporter for machine metrics
|
||||
License: ASL 2.0
|
||||
URL: https://github.com/prometheus/node_exporter
|
||||
Provides: scylla-enterprise-node-exporter = %{version}-%{release}
|
||||
Obsoletes: scylla-enterprise-node-exporter < 2025.1.0
|
||||
|
||||
%description node-exporter
|
||||
Prometheus exporter for machine metrics, written in Go with pluggable metric collectors.
|
||||
|
||||
@@ -187,8 +187,8 @@ ATTACH SERVICE_LEVEL oltp TO bob;
|
||||
Note that `alternator_enforce_authorization` has to be enabled in Scylla configuration.
|
||||
|
||||
See [Authorization](##Authorization) section to learn more about roles and authorization.
|
||||
See <https://enterprise.docs.scylladb.com/stable/using-scylla/workload-prioritization.html>
|
||||
to read about **Workload Prioritization** in detail.
|
||||
See [Workload Prioritization](../features/workload-prioritization)
|
||||
to read about Workload Prioritization in detail.
|
||||
|
||||
## Metrics
|
||||
|
||||
@@ -272,12 +272,6 @@ behave the same in Alternator. However, there are a few features which we have
|
||||
not implemented yet. Unimplemented features return an error when used, so
|
||||
they should be easy to detect. Here is a list of these unimplemented features:
|
||||
|
||||
* Currently in Alternator, a GSI (Global Secondary Index) can only be added
|
||||
to a table at table creation time. DynamoDB allows adding a GSI (but not an
|
||||
LSI) to an existing table using an UpdateTable operation, and similarly it
|
||||
allows removing a GSI from a table.
|
||||
<https://github.com/scylladb/scylla/issues/11567>
|
||||
|
||||
* GSI (Global Secondary Index) and LSI (Local Secondary Index) may be
|
||||
configured to project only a subset of the base-table attributes to the
|
||||
index. This option is not yet respected by Alternator - all attributes
|
||||
@@ -319,7 +313,7 @@ they should be easy to detect. Here is a list of these unimplemented features:
|
||||
RestoreTableToPointInTime
|
||||
|
||||
* DynamoDB's encryption-at-rest settings are not supported. The Encryption-
|
||||
at-rest feature is available in Scylla Enterprise, but needs to be
|
||||
at-rest feature is available in ScyllaDB, but needs to be
|
||||
enabled and configured separately, not through the DynamoDB API.
|
||||
|
||||
* No support for throughput accounting or capping. As mentioned above, the
|
||||
@@ -378,3 +372,14 @@ they should be easy to detect. Here is a list of these unimplemented features:
|
||||
that can be used to forbid table deletion. This table option was added to
|
||||
DynamoDB in March 2023.
|
||||
<https://github.com/scylladb/scylla/issues/14482>
|
||||
|
||||
* Alternator does not support the table option WarmThroughput that can be
|
||||
used to check or guarantee that the database has "warmed" to handle a
|
||||
particular throughput. This table option was added to DynamoDB in
|
||||
November 2024.
|
||||
<https://github.com/scylladb/scylladb/issues/21853>
|
||||
|
||||
* Alternator does not support the table option MultiRegionConsistency
|
||||
that can be used to achieve consistent reads on global (multi-region) tables.
|
||||
This table option was added as a preview to DynamoDB in December 2024.
|
||||
<https://github.com/scylladb/scylladb/issues/21852>
|
||||
|
||||
@@ -144,3 +144,46 @@ If a certain data center or rack has no functional nodes, or doesn't even
|
||||
exist, an empty list (`[]`) is returned by the `/localnodes` request.
|
||||
A client should be prepared to consider expanding the node search to an
|
||||
entire data center, or other data centers, in that case.
|
||||
|
||||
## Tablets
|
||||
"Tablets" are ScyllaDB's new approach to replicating data across a cluster.
|
||||
It replaces the older approach which was named "vnodes". Compared to vnodes,
|
||||
tablets are smaller pieces of tables that are easier to move between nodes,
|
||||
and allow for faster growing or shrinking of the cluster when needed.
|
||||
|
||||
In this version, tablet support is incomplete and not all of the features
|
||||
which Alternator needs are supported with tablets. So currently, new
|
||||
Alternator tables default to using vnodes - not tablets.
|
||||
|
||||
However, if you do want to create an Alternator table which uses tablets,
|
||||
you can do this by specifying the `experimental:initial_tablets` tag in
|
||||
the CreateTable operation. The value of this tag can be:
|
||||
|
||||
* Any valid integer as the value of this tag enables tablets.
|
||||
Typically the number "0" is used - which tells ScyllaDB to pick a reasonable
|
||||
number of initial tablets. But any other number can be used, and this
|
||||
number overrides the default choice of initial number of tablets.
|
||||
|
||||
* Any non-integer value - e.g., the string "none" - creates the table
|
||||
without tablets - i.e., using vnodes.
|
||||
|
||||
The `experimental:initial_tablets` tag only has any effect while creating
|
||||
a new table with CreateTable - changing it later has no effect.
|
||||
|
||||
Because the tablets support is incomplete, when tablets are enabled for an
|
||||
Alternator table, the following features will not work for this table:
|
||||
|
||||
* The table must have one of the write isolation modes which does not
|
||||
not use LWT, because it's not supported with tablets. The allowed write
|
||||
isolation modes are `forbid_rmw` or `unsafe_rmw`.
|
||||
Setting the isolation mode to `always_use_lwt` will succeed, but the writes
|
||||
themselves will fail with an InternalServerError. At that point you can
|
||||
still change the write isolation mode of the table to a supported mode.
|
||||
See <https://github.com/scylladb/scylladb/issues/18068>.
|
||||
|
||||
* Enabling TTL with UpdateTableToLive doesn't work (results in an error).
|
||||
See <https://github.com/scylladb/scylla/issues/16567>.
|
||||
|
||||
* Enabling Streams with CreateTable or UpdateTable doesn't work
|
||||
(results in an error).
|
||||
See <https://github.com/scylladb/scylla/issues/16317>.
|
||||
|
||||
@@ -70,8 +70,6 @@ Set the parameters for :ref:`Leveled Compaction <leveled-compaction-strategy-lcs
|
||||
Incremental Compaction Strategy (ICS)
|
||||
=====================================
|
||||
|
||||
.. versionadded:: 2019.1.4 Scylla Enterprise
|
||||
|
||||
ICS principles of operation are similar to those of STCS, merely replacing the increasingly larger SSTables in each tier, by increasingly longer SSTable runs, modeled after LCS runs, but using larger fragment size of 1 GB, by default.
|
||||
|
||||
Compaction is triggered when there are two or more runs of roughly the same size. These runs are incrementally compacted with each other, producing a new SSTable run, while incrementally releasing space as soon as each SSTable in the input run is processed and compacted. This method eliminates the high temporary space amplification problem of STCS by limiting the overhead to twice the (constant) fragment size, per shard.
|
||||
|
||||
@@ -12,6 +12,7 @@ ScyllaDB Architecture
|
||||
SSTable <sstable/index/>
|
||||
Compaction Strategies <compaction/compaction-strategies>
|
||||
Raft Consensus Algorithm in ScyllaDB </architecture/raft>
|
||||
Zero-token Nodes </architecture/zero-token-nodes>
|
||||
|
||||
|
||||
* :doc:`Data Distribution with Tablets </architecture/tablets/>` - Tablets in ScyllaDB
|
||||
@@ -22,5 +23,6 @@ ScyllaDB Architecture
|
||||
* :doc:`SSTable </architecture/sstable/index/>` - ScyllaDB SSTable 2.0 and 3.0 Format Information
|
||||
* :doc:`Compaction Strategies </architecture/compaction/compaction-strategies>` - High-level analysis of different compaction strategies
|
||||
* :doc:`Raft Consensus Algorithm in ScyllaDB </architecture/raft>` - Overview of how Raft is implemented in ScyllaDB.
|
||||
* :doc:`Zero-token Nodes </architecture/zero-token-nodes>` - Nodes that do not replicate any data.
|
||||
|
||||
Learn more about these topics in the `ScyllaDB University: Architecture lesson <https://university.scylladb.com/courses/scylla-essentials-overview/lessons/architecture/>`_.
|
||||
|
||||
@@ -15,7 +15,7 @@ SSTable Version Support
|
||||
- ScyllaDB Enterprise Version
|
||||
- ScyllaDB Open Source Version
|
||||
* - 3.x ('me')
|
||||
- 2022.2
|
||||
- 2022.2 and above
|
||||
- 5.1 and above
|
||||
* - 3.x ('md')
|
||||
- 2021.1
|
||||
|
||||
@@ -9,11 +9,7 @@ ScyllaDB SSTable Format
|
||||
|
||||
.. include:: _common/sstable_what_is.rst
|
||||
|
||||
* In ScyllaDB 6.0 and above, *me* format is enabled by default.
|
||||
|
||||
* In ScyllaDB Enterprise 2021.1, ScyllaDB 4.3 and above, *md* format is enabled by default.
|
||||
|
||||
* In ScyllaDB 3.1 and above, *mc* format is enabled by default.
|
||||
In ScyllaDB 6.0 and above, *me* format is enabled by default.
|
||||
|
||||
For more information on each of the SSTable formats, see below:
|
||||
|
||||
|
||||
@@ -12,17 +12,7 @@ ScyllaDB SSTable - 3.x
|
||||
|
||||
.. include:: ../_common/sstable_what_is.rst
|
||||
|
||||
* In ScyllaDB 6.0 and above, the ``me`` format is mandatory, and ``md`` format is used only when upgrading from an existing cluster using ``md``. The ``sstable_format`` parameter is ignored if it is set to ``md``.
|
||||
* In ScyllaDB 5.1 and above, the ``me`` format is enabled by default.
|
||||
* In ScyllaDB 4.3 to 5.0, the ``md`` format is enabled by default.
|
||||
* In ScyllaDB 3.1 to 4.2, the ``mc`` format is enabled by default.
|
||||
* In ScyllaDB 3.0, the ``mc`` format is disabled by default. You can enable it by adding the ``enable_sstables_mc_format`` parameter set to ``true`` in the ``scylla.yaml`` file. For example:
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
enable_sstables_mc_format: true
|
||||
|
||||
.. REMOVE IN FUTURE VERSIONS - Remove the note above in version 5.2.
|
||||
In ScyllaDB 6.0 and above, the ``me`` format is mandatory, and ``md`` format is used only when upgrading from an existing cluster using ``md``. The ``sstable_format`` parameter is ignored if it is set to ``md``.
|
||||
|
||||
Additional Information
|
||||
-------------------------
|
||||
|
||||
@@ -75,15 +75,7 @@ to a new node.
|
||||
File-based Streaming
|
||||
========================
|
||||
|
||||
:label-tip:`ScyllaDB Enterprise`
|
||||
|
||||
File-based streaming is a ScyllaDB Enterprise-only feature that optimizes
|
||||
tablet migration.
|
||||
|
||||
In ScyllaDB Open Source, migrating tablets is performed by streaming mutation
|
||||
fragments, which involves deserializing SSTable files into mutation fragments
|
||||
and re-serializing them back into SSTables on the other node.
|
||||
In ScyllaDB Enterprise, migrating tablets is performed by streaming entire
|
||||
Migrating tablets is performed by streaming entire
|
||||
SStables, which does not require (de)serializing or processing mutation fragments.
|
||||
As a result, less data is streamed over the network, and less CPU is consumed,
|
||||
especially for data models that contain small cells.
|
||||
@@ -143,9 +135,17 @@ You can create a keyspace with tablets enabled with the ``tablets = {'enabled':
|
||||
the keyspace schema with ``tablets = { 'enabled': false }`` or
|
||||
``tablets = { 'enabled': true }``.
|
||||
|
||||
.. _tablets-limitations:
|
||||
|
||||
Limitations and Unsupported Features
|
||||
--------------------------------------
|
||||
|
||||
.. warning::
|
||||
|
||||
If a keyspace has tablets enabled, it must remain :term:`RF-rack-valid <RF-rack-valid keyspace>`
|
||||
throughout its lifetime. Failing to keep that invariant satisfied may result in data inconsistencies,
|
||||
performance problems, or other issues.
|
||||
|
||||
The following ScyllaDB features are not supported if a keyspace has tablets
|
||||
enabled:
|
||||
|
||||
@@ -157,6 +157,15 @@ enabled:
|
||||
If you plan to use any of the above features, CREATE your keyspace
|
||||
:ref:`with tablets disabled <tablets-enable-tablets>`.
|
||||
|
||||
The following ScyllaDB features are disabled by default when used with a keyspace
|
||||
that has tablets enabled:
|
||||
|
||||
* Materialized Views (MV)
|
||||
* Secondary indexes (SI, as it depends on MV)
|
||||
|
||||
To enable MV and SI for tablet keyspaces, use the `--experimental-features=views-with-tablets`
|
||||
configuration option. See :ref:`Views with tablets <admin-views-with-tablets>` for details.
|
||||
|
||||
Resharding in keyspaces with tablets enabled has the following limitations:
|
||||
|
||||
* ScyllaDB does not support reducing the number of shards after node restart.
|
||||
|
||||
28
docs/architecture/zero-token-nodes.rst
Normal file
28
docs/architecture/zero-token-nodes.rst
Normal file
@@ -0,0 +1,28 @@
|
||||
=========================
|
||||
Zero-token Nodes
|
||||
=========================
|
||||
|
||||
By default, all nodes in a cluster own a set of token ranges and are used to
|
||||
replicate data. In certain circumstances, you may choose to add a node that
|
||||
doesn't own any token. Such nodes are referred to as zero-token nodes. They
|
||||
do not have a copy of the data but only participate in Raft quorum voting.
|
||||
|
||||
To configure a zero-token node, set the ``join_ring`` parameter to ``false``.
|
||||
|
||||
You can use zero-token nodes in multi-DC deployments to reduce the risk of
|
||||
losing a quorum of nodes.
|
||||
See :doc:`Preventing Quorum Loss in Symmetrical Multi-DC Clusters </operating-scylla/procedures/cluster-management/arbiter-dc>` for details.
|
||||
|
||||
Note that:
|
||||
|
||||
* Zero-token nodes are ignored by drivers, so there is no need to change
|
||||
the load balancing policy on the clients after adding zero-token nodes
|
||||
to the cluster.
|
||||
* Zero-token nodes never store replicated data, so running ``nodetool rebuild``,
|
||||
``nodetool repair``, and ``nodetool cleanup`` can be skipped as it does not
|
||||
affect zero-token nodes.
|
||||
* Racks consisting solely of zero-token nodes are not taken into consideration
|
||||
when deciding whether a keyspace is :term:`RF-rack-valid <RF-rack-valid keyspace>`.
|
||||
However, an RF-rack-valid keyspace must have the replication factor equal to 0
|
||||
in an :doc:`arbiter DC </operating-scylla/procedures/cluster-management/arbiter-dc>`.
|
||||
Otherwise, it is RF-rack-invalid.
|
||||
@@ -1,3 +0,0 @@
|
||||
By default, a keyspace is created with tablets enabled. The ``tablets`` option
|
||||
is used to opt out a keyspace from tablets-based distribution; see :ref:`Enabling Tablets <tablets-enable-tablets>`
|
||||
for details.
|
||||
@@ -170,8 +170,6 @@ LCS options
|
||||
Incremental Compaction Strategy (ICS)
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
.. versionadded:: 2019.1.4 Scylla Enterprise
|
||||
|
||||
When using ICS, SSTable runs are put in different buckets depending on their size.
|
||||
When an SSTable run is bucketed, the average size of the runs in the bucket is compared to the new run, as well as the ``bucket_high`` and ``bucket_low`` levels.
|
||||
|
||||
|
||||
@@ -203,18 +203,6 @@ An example that excludes a datacenter while using ``replication_factor``::
|
||||
DESCRIBE KEYSPACE excalibur
|
||||
CREATE KEYSPACE excalibur WITH replication = {'class': 'NetworkTopologyStrategy', 'DC1': '3'} AND durable_writes = true;
|
||||
|
||||
|
||||
|
||||
.. only:: opensource
|
||||
|
||||
Keyspace storage options :label-caution:`Experimental`
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
By default, SStables of a keyspace are stored locally.
|
||||
As an alternative, you can configure your keyspace to be stored
|
||||
on Amazon S3 or another S3-compatible object store.
|
||||
See :ref:`Keyspace storage options <admin-keyspace-storage-options>` for details.
|
||||
|
||||
.. _tablets:
|
||||
|
||||
The ``tablets`` property
|
||||
@@ -232,7 +220,15 @@ sub-option type description
|
||||
``'initial'`` int The number of tablets to start with
|
||||
===================================== ====== =============================================
|
||||
|
||||
.. scylladb_include_flag:: tablets-default.rst
|
||||
By default, a keyspace is created with tablets enabled. You can use the ``tablets`` option
|
||||
to opt out a keyspace from tablets-based distribution.
|
||||
|
||||
You may want to opt out if you plan to use features that are not supported for keyspaces
|
||||
with tablets enabled. Keyspaces using tablets must also remain :term:`RF-rack-valid <RF-rack-valid keyspace>`
|
||||
throughout their lifetime. See :ref:`Limitations and Unsupported Features <tablets-limitations>`
|
||||
for details.
|
||||
|
||||
**The ``initial`` sub-option (deprecated)**
|
||||
|
||||
A good rule of thumb to calculate initial tablets is to divide the expected total storage used
|
||||
by tables in this keyspace by (``replication_factor`` * 5GB). For example, if you expect a 30TB
|
||||
@@ -253,6 +249,14 @@ An example that creates a keyspace with 2048 tablets per table::
|
||||
|
||||
See :doc:`Data Distribution with Tablets </architecture/tablets>` for more information about tablets.
|
||||
|
||||
Keyspace storage options :label-caution:`Experimental`
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
By default, SStables of a keyspace are stored locally.
|
||||
As an alternative, you can configure your keyspace to be stored
|
||||
on Amazon S3 or another S3-compatible object store.
|
||||
See :ref:`Keyspace storage options <admin-keyspace-storage-options>` for details.
|
||||
|
||||
.. _use-statement:
|
||||
|
||||
USE
|
||||
@@ -285,8 +289,8 @@ For instance::
|
||||
|
||||
The supported options are the same as :ref:`creating a keyspace <create-keyspace-statement>`.
|
||||
|
||||
ALTER KEYSPACE with Tablets :label-caution:`Experimental`
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
ALTER KEYSPACE with Tablets
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Modifying a keyspace with tablets enabled is possible and doesn't require any special CQL syntax. However, there are some limitations:
|
||||
|
||||
@@ -295,6 +299,7 @@ Modifying a keyspace with tablets enabled is possible and doesn't require any sp
|
||||
- If there's any other ongoing global topology operation, executing the ``ALTER`` statement will fail (with an explicit and specific error) and needs to be repeated.
|
||||
- The ``ALTER`` statement may take longer than the regular query timeout, and even if it times out, it will continue to execute in the background.
|
||||
- The replication strategy cannot be modified, as keyspaces with tablets only support ``NetworkTopologyStrategy``.
|
||||
- The ``ALTER`` statement will fail if it would make the keyspace :term:`RF-rack-invalid <RF-rack-valid keyspace>`.
|
||||
|
||||
.. _drop-keyspace-statement:
|
||||
|
||||
|
||||
@@ -225,7 +225,9 @@ CREATE TYPE system.tablet_task_info (
|
||||
tablet_task_id uuid,
|
||||
request_time timestamp,
|
||||
sched_nr bigint,
|
||||
sched_time timestamp
|
||||
sched_time timestamp,
|
||||
repair_hosts_filter text,
|
||||
repair_dcs_filter text,
|
||||
)
|
||||
~~~
|
||||
|
||||
@@ -255,6 +257,8 @@ Only tables which use tablet-based replication strategy have an entry here.
|
||||
* `request_time` - The time the request is created.
|
||||
* `sched_nr` - Number of times the request has been scheduled by the repair scheduler.
|
||||
* `sched_time` - The time the request has been scheduled by the repair scheduler.
|
||||
* `repair_hosts_filter` - Repair replicas listed in the comma-separated host_id list.
|
||||
* `repair_dcs_filter` - Repair replicas listed in the comma-separated DC list.
|
||||
|
||||
`repair_scheduler_config` contains configuration for the repair scheduler. It contains the following values:
|
||||
* `auto_repair_enabled` - When set to true, auto repair is enabled. Disabled by default.
|
||||
|
||||
@@ -64,18 +64,20 @@ Briefly:
|
||||
- `/task_manager/list_module_tasks/{module}` -
|
||||
lists (by default non-internal) tasks in the module;
|
||||
- `/task_manager/task_status/{task_id}` -
|
||||
gets the task's status, unregisters the task if it's finished;
|
||||
gets the task's status;
|
||||
- `/task_manager/abort_task/{task_id}` -
|
||||
aborts the task if it's abortable;
|
||||
- `/task_manager/wait_task/{task_id}` -
|
||||
waits for the task and gets its status;
|
||||
- `/task_manager/task_status_recursive/{task_id}` -
|
||||
gets statuses of the task and all its descendants in BFS
|
||||
order, unregisters the task;
|
||||
order;
|
||||
- `/task_manager/ttl` -
|
||||
gets or sets new ttl.
|
||||
- `/task_manager/user_ttl` -
|
||||
gets or sets new user ttl.
|
||||
- `/task_manager/drain/{module}` -
|
||||
unregisters all finished local tasks in the module.
|
||||
|
||||
# Virtual tasks
|
||||
|
||||
|
||||
@@ -124,6 +124,9 @@ Additionally to specific node states, there entire topology can also be in a tra
|
||||
it from group 0. We also use this state to rollback a failed bootstrap or decommission.
|
||||
- `rollback_to_normal` - the decommission or removenode operation failed. Rollback the operation by
|
||||
moving the node we tried to decommission/remove back to the normal state.
|
||||
- `lock` - the topology stays in this state until externally changed (to null state), preventing topology
|
||||
requests from starting. Intended to be used in tests which want to prevent internally-triggered topology
|
||||
operations during the test.
|
||||
|
||||
When a node bootstraps, we create new tokens for it and a new CDC generation
|
||||
and enter the `commit_cdc_generation` state. Once the generation is committed,
|
||||
|
||||
@@ -193,6 +193,8 @@ ScyllaDB comes with its own version of the Apache Cassandra client tools, in the
|
||||
|
||||
We recommend uninstalling Apache Cassandra before installing :code:`scylla-tools`.
|
||||
|
||||
.. TODO Update the example below then a patch release for 2025.1 is available
|
||||
|
||||
.. _faq-pinning:
|
||||
|
||||
Can I install or upgrade to a patch release other than latest on Debian or Ubuntu?
|
||||
|
||||
@@ -18,7 +18,7 @@ For example, consider the following two workloads:
|
||||
- Slow queries
|
||||
- In essence - Latency agnostic
|
||||
|
||||
Using Service Level CQL commands, database administrators (working on Scylla Enterprise) can set different workload prioritization levels (levels of service) for each workload without sacrificing latency or throughput.
|
||||
Using Service Level CQL commands, database administrators (working on ScyllaDB) can set different workload prioritization levels (levels of service) for each workload without sacrificing latency or throughput.
|
||||
By assigning each service level to the different roles within your organization, DBAs ensure that each role_ receives the level of service the role requires.
|
||||
|
||||
.. _`role` : /operating-scylla/security/rbac_usecase/
|
||||
@@ -425,7 +425,7 @@ In order for workload prioritization to take effect, application users need to b
|
||||
|
||||
Limits
|
||||
======
|
||||
Scylla Enterprise is limited to 8 service levels, including the default one; this means you can create up to 7 service levels.
|
||||
ScyllaDB is limited to 8 service levels, including the default one; this means you can create up to 7 service levels.
|
||||
|
||||
|
||||
Additional References
|
||||
|
||||
@@ -1,21 +0,0 @@
|
||||
You can `build ScyllaDB from source <https://github.com/scylladb/scylladb#build-prerequisites>`_ on other x86_64 or aarch64 platforms, without any guarantees.
|
||||
|
||||
+----------------------------+--------------------+-------+---------------+
|
||||
| Linux Distributions |Ubuntu | Debian|Rocky / CentOS |
|
||||
| | | |/ RHEL |
|
||||
+----------------------------+------+------+------+-------+-------+-------+
|
||||
| ScyllaDB Version / Version |20.04 |22.04 |24.04 | 11 | 8 | 9 |
|
||||
+============================+======+======+======+=======+=======+=======+
|
||||
| 6.2 | |v| | |v| | |v| | |v| | |v| | |v| |
|
||||
+----------------------------+------+------+------+-------+-------+-------+
|
||||
| 6.1 | |v| | |v| | |v| | |v| | |v| | |v| |
|
||||
+----------------------------+------+------+------+-------+-------+-------+
|
||||
|
||||
* The recommended OS for ScyllaDB Open Source is Ubuntu 22.04.
|
||||
* All releases are available as a Docker container and EC2 AMI, GCP, and Azure images.
|
||||
|
||||
Supported Architecture
|
||||
-----------------------------
|
||||
|
||||
ScyllaDB Open Source supports x86_64 for all versions and AArch64 starting from ScyllaDB 4.6 and nightly build.
|
||||
In particular, aarch64 support includes AWS EC2 Graviton.
|
||||
@@ -110,7 +110,7 @@ Google Compute Engine (GCE)
|
||||
-----------------------------------
|
||||
|
||||
Pick a zone where Haswell CPUs are found. Local SSD performance offers, according to Google, less than 1 ms of latency and up to 680,000 read IOPS and 360,000 write IOPS.
|
||||
Image with NVMe disk interface is recommended, CentOS 7 for ScyllaDB Enterprise 2020.1 and older, and Ubuntu 20 for 2021.1 and later.
|
||||
Image with NVMe disk interface is recommended.
|
||||
(`More info <https://cloud.google.com/compute/docs/disks/local-ssd>`_)
|
||||
|
||||
Recommended instances types are `n1-highmem <https://cloud.google.com/compute/docs/general-purpose-machines#n1_machines>`_ and `n2-highmem <https://cloud.google.com/compute/docs/general-purpose-machines#n2_machines>`_
|
||||
|
||||
@@ -4,7 +4,7 @@ ScyllaDB Web Installer for Linux
|
||||
|
||||
ScyllaDB Web Installer is a platform-agnostic installation script you can run with ``curl`` to install ScyllaDB on Linux.
|
||||
|
||||
See `ScyllaDB Download Center <https://www.scylladb.com/download/#core>`_ for information on manually installing ScyllaDB with platform-specific installation packages.
|
||||
See :doc:`Install ScyllaDB Linux Packages </getting-started/install-scylla/install-on-linux/>` for information on manually installing ScyllaDB with platform-specific installation packages.
|
||||
|
||||
Prerequisites
|
||||
--------------
|
||||
@@ -20,44 +20,50 @@ To install ScyllaDB with Web Installer, run:
|
||||
|
||||
curl -sSf get.scylladb.com/server | sudo bash
|
||||
|
||||
By default, running the script installs the latest official version of ScyllaDB Open Source. You can use the following
|
||||
options to install a different version or ScyllaDB Enterprise:
|
||||
|
||||
.. list-table::
|
||||
:widths: 20 25 55
|
||||
:header-rows: 1
|
||||
|
||||
* - Option
|
||||
- Acceptable values
|
||||
- Description
|
||||
* - ``--scylla-product``
|
||||
- ``scylla`` | ``scylla-enterprise``
|
||||
- Specifies the ScyllaDB product to install: Open Source (``scylla``) or Enterprise (``scylla-enterprise``) The default is ``scylla``.
|
||||
* - ``--scylla-version``
|
||||
- ``<version number>``
|
||||
- Specifies the ScyllaDB version to install. You can specify the major release (``x.y``) to install the latest patch for that version or a specific patch release (``x.y.x``). The default is the latest official version.
|
||||
By default, running the script installs the latest official version of ScyllaDB.
|
||||
|
||||
You can run the command with the ``-h`` or ``--help`` flag to print information about the script.
|
||||
|
||||
Examples
|
||||
===========
|
||||
Installing a Non-default Version
|
||||
---------------------------------------
|
||||
|
||||
Installing ScyllaDB Open Source 6.0.1:
|
||||
You can install a version other than the default.
|
||||
|
||||
Versions 2025.1 and Later
|
||||
==============================
|
||||
|
||||
Run the command with the ``--scylla-version`` option to specify the version
|
||||
you want to install.
|
||||
|
||||
**Example**
|
||||
|
||||
.. code:: console
|
||||
|
||||
curl -sSf get.scylladb.com/server | sudo bash -s -- --scylla-version 2025.1.1
|
||||
|
||||
curl -sSf get.scylladb.com/server | sudo bash -s -- --scylla-version 6.0.1
|
||||
|
||||
Installing the latest patch release for ScyllaDB Open Source 6.0:
|
||||
Versions Earlier than 2025.1
|
||||
================================
|
||||
|
||||
To install a supported version of *ScyllaDB Enterprise*, run the command with:
|
||||
|
||||
* ``--scylla-product scylla-enterprise`` to specify that you want to install
|
||||
ScyllaDB Entrprise.
|
||||
* ``--scylla-version`` to specify the version you want to install.
|
||||
|
||||
For example:
|
||||
|
||||
.. code:: console
|
||||
|
||||
curl -sSf get.scylladb.com/server | sudo bash -s -- --scylla-product scylla-enterprise --scylla-version 2024.1
|
||||
|
||||
curl -sSf get.scylladb.com/server | sudo bash -s -- --scylla-version 6.0
|
||||
To install a supported version of *ScyllaDB Open Source*, run the command with
|
||||
the ``--scylla-version`` option to specify the version you want to install.
|
||||
|
||||
Installing ScyllaDB Enterprise 2024.1:
|
||||
For example:
|
||||
|
||||
.. code:: console
|
||||
|
||||
curl -sSf get.scylladb.com/server | sudo bash -s -- --scylla-product scylla-enterprise --scylla-version 2024.1
|
||||
|
||||
curl -sSf get.scylladb.com/server | sudo bash -s -- --scylla-version 6.2.1
|
||||
|
||||
.. include:: /getting-started/_common/setup-after-install.rst
|
||||
@@ -1,13 +1,38 @@
|
||||
OS Support by Linux Distributions and Version
|
||||
==============================================
|
||||
|
||||
The following matrix shows which Linux distributions, containers, and images are supported with which versions of ScyllaDB.
|
||||
The following matrix shows which Linux distributions, containers, and images
|
||||
are :ref:`supported <os-support-definition>` with which versions of ScyllaDB.
|
||||
|
||||
Where *supported* in this scope means:
|
||||
+-------------------------------+--------------------------+-------+------------------+---------------+
|
||||
| Linux Distributions |Ubuntu | Debian| Rocky / Centos / | Amazon Linux |
|
||||
| | | | RHEL | |
|
||||
+-------------------------------+------+------+------------+-------+-------+----------+---------------+
|
||||
| ScyllaDB Version / OS Version |20.04 |22.04 |24.04 | 11 | 8 | 9 | 2023 |
|
||||
+===============================+======+======+============+=======+=======+==========+===============+
|
||||
| Enterprise 2025.1 | |v| | |v| | |v| | |v| | |v| | |v| | |v| |
|
||||
+-------------------------------+------+------+------------+-------+-------+----------+---------------+
|
||||
| Enterprise 2024.2 | |v| | |v| | |v| | |v| | |v| | |v| | |v| |
|
||||
+-------------------------------+------+------+------------+-------+-------+----------+---------------+
|
||||
| Enterprise 2024.1 | |v| | |v| | |v| ``*`` | |v| | |v| | |v| | |x| |
|
||||
+-------------------------------+------+------+------------+-------+-------+----------+---------------+
|
||||
| Open Source 6.2 | |v| | |v| | |v| | |v| | |v| | |v| | |v| |
|
||||
+-------------------------------+------+------+------------+-------+-------+----------+---------------+
|
||||
|
||||
``*`` 2024.1.9 and later
|
||||
|
||||
All releases are available as a Docker container, EC2 AMI, GCP, and Azure images.
|
||||
|
||||
.. _os-support-definition:
|
||||
|
||||
By *supported*, it is meant that:
|
||||
|
||||
- A binary installation package is available to `download <https://www.scylladb.com/download/>`_.
|
||||
- The download and install procedures are tested as part of ScyllaDB release process for each version.
|
||||
- An automated install is included from :doc:`ScyllaDB Web Installer for Linux tool </getting-started/installation-common/scylla-web-installer>` (for latest versions)
|
||||
- The download and install procedures are tested as part of the ScyllaDB release process for each version.
|
||||
- An automated install is included from :doc:`ScyllaDB Web Installer for Linux tool </getting-started/installation-common/scylla-web-installer>` (for the latest versions).
|
||||
|
||||
You can `build ScyllaDB from source <https://github.com/scylladb/scylladb#build-prerequisites>`_
|
||||
on other x86_64 or aarch64 platforms, without any guarantees.
|
||||
|
||||
|
||||
.. scylladb_include_flag:: os-support-info.rst
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@ ScyllaDB Requirements
|
||||
:hidden:
|
||||
|
||||
system-requirements
|
||||
os-support
|
||||
OS Support <os-support>
|
||||
Cloud Instance Recommendations <cloud-instance-recommendations>
|
||||
scylla-in-a-shared-environment
|
||||
|
||||
|
||||
@@ -2,19 +2,6 @@
|
||||
ScyllaDB Seed Nodes
|
||||
===================
|
||||
|
||||
**Topic: ScyllaDB Seed Nodes Overview**
|
||||
|
||||
**Learn: What a seed node is, and how they should be used in a ScyllaDB Cluster**
|
||||
|
||||
**Audience: ScyllaDB Administrators**
|
||||
|
||||
|
||||
What is the Function of a Seed Node in ScyllaDB?
|
||||
------------------------------------------------
|
||||
|
||||
.. note::
|
||||
Seed nodes function was changed in ScyllaDB Open Source 4.3 and ScyllaDB Enterprise 2021.1; if you are running an older version, see :ref:`Older Version Of ScyllaDB <seeds-older-versions>`.
|
||||
|
||||
A ScyllaDB seed node is a node specified with the ``seeds`` configuration parameter in ``scylla.yaml``. It is used by new node joining as the first contact point.
|
||||
It allows nodes to discover the cluster ring topology on startup (when joining the cluster). This means that any time a node is joining the cluster, it needs to learn the cluster ring topology, meaning:
|
||||
|
||||
@@ -22,27 +9,8 @@ It allows nodes to discover the cluster ring topology on startup (when joining t
|
||||
- Which token ranges are available
|
||||
- Which nodes will own which tokens when a new node joins the cluster
|
||||
|
||||
**Once the nodes have joined the cluster, seed node has no function.**
|
||||
**Once the nodes have joined the cluster, the seed node has no function.**
|
||||
|
||||
The first node in a new cluster needs to be a seed node.
|
||||
|
||||
.. _seeds-older-versions:
|
||||
|
||||
Older Version Of ScyllaDB
|
||||
-------------------------
|
||||
|
||||
In ScyllaDB releases older than ScyllaDB Open Source 4.3 and ScyllaDB Enterprise 2021.1, seed node has one more function: it assists with :doc:`gossip </kb/gossip>` convergence.
|
||||
Gossiping with other nodes ensures that any update to the cluster is propagated across the cluster. This includes detecting and alerting whenever a node goes down, comes back, or is removed from the cluster.
|
||||
|
||||
This functions was removed, as described in `Seedless NoSQL: Getting Rid of Seed Nodes in ScyllaDB <https://www.scylladb.com/2020/09/22/seedless-nosql-getting-rid-of-seed-nodes-in-scylla/>`_.
|
||||
|
||||
If you run an older ScyllaDB release, we recommend upgrading to version 4.3 (ScyllaDB Open Source) or 2021.1 (ScyllaDB Enterprise) or later. If you choose to run an older version, it is good practice to follow these guidelines:
|
||||
|
||||
* The first node in a new cluster needs to be a seed node.
|
||||
* Ensure that all nodes in the cluster have the same seed nodes listed in each node's scylla.yaml.
|
||||
* To maintain resiliency of the cluster, it is recommended to have more than one seed node in the cluster.
|
||||
* If you have more than one seed in a DC with multiple racks (or availability zones), make sure to put your seeds in different racks.
|
||||
* You must have at least one node that is not a seed node. You cannot create a cluster where all nodes are seed nodes.
|
||||
* You should have more than one seed node.
|
||||
|
||||
The first node in a new cluster must be a seed node. In typical scenarios,
|
||||
there's no need to configure more than one seed node.
|
||||
|
||||
|
||||
@@ -8,7 +8,6 @@
|
||||
* :doc:`cassandra-stress </operating-scylla/admin-tools/cassandra-stress/>` A tool for benchmarking and load testing a ScyllaDB and Cassandra clusters.
|
||||
* :doc:`SSTabledump </operating-scylla/admin-tools/sstabledump>`
|
||||
* :doc:`SSTableMetadata </operating-scylla/admin-tools/sstablemetadata>`
|
||||
* configuration_encryptor - :doc:`encrypt at rest </operating-scylla/security/encryption-at-rest>` sensitive scylla configuration entries using system key.
|
||||
* scylla local-file-key-generator - Generate a local file (system) key for :doc:`encryption at rest </operating-scylla/security/encryption-at-rest>`, with the provided length, Key algorithm, Algorithm block mode and Algorithm padding method.
|
||||
* `scyllatop <https://www.scylladb.com/2016/03/22/scyllatop/>`_ - A terminal base top-like tool for scylladb collectd/prometheus metrics.
|
||||
* :doc:`scylla_dev_mode_setup</getting-started/installation-common/dev-mod>` - run ScyllaDB in Developer Mode.
|
||||
|
||||
@@ -3,275 +3,6 @@ Cassandra Stress
|
||||
|
||||
The cassandra-stress tool is used for benchmarking and load testing both ScyllaDB and Cassandra clusters. The cassandra-stress tool also supports testing arbitrary CQL tables and queries to allow users to benchmark their data model.
|
||||
|
||||
This documentation focuses on user mode as this allows the testing of your actual schema.
|
||||
|
||||
Usage
|
||||
-----
|
||||
|
||||
There are several operation types:
|
||||
|
||||
* write-only, read-only, and mixed workloads of standard data
|
||||
* write-only and read-only workloads for counter columns
|
||||
* user configured workloads, running custom queries on custom schemas
|
||||
* The syntax is cassandra-stress <command> [options]. If you want more information on a given command or options, just run cassandra-stress help
|
||||
|
||||
Commands:
|
||||
|
||||
read: Multiple concurrent reads - the cluster must first be populated by a write test.
|
||||
|
||||
write: Multiple concurrent writes against the cluster.
|
||||
|
||||
mixed: Interleaving of any basic commands, with configurable ratio and distribution - the cluster must first be populated by a write test.
|
||||
|
||||
counter_write: Multiple concurrent updates of counters.
|
||||
|
||||
counter_read: Multiple concurrent reads of counters. The cluster must first be populated by a counterwrite test.
|
||||
|
||||
user: Interleaving of user provided queries, with configurable ratio and distribution.
|
||||
|
||||
help: Print help for a command or option.
|
||||
|
||||
print: Inspect the output of a distribution definition.
|
||||
|
||||
legacy: Legacy support mode.
|
||||
|
||||
Primary Options:
|
||||
|
||||
-pop: Population distribution and intra-partition visit order.
|
||||
|
||||
-insert: Insert specific options relating to various methods for batching and splitting partition updates.
|
||||
|
||||
-col: Column details such as size and count distribution, data generator, names, comparator and if super columns should be used.
|
||||
|
||||
-rate: Thread count, rate limit or automatic mode (default is auto).
|
||||
|
||||
-mode: CQL with options.
|
||||
|
||||
-errors: How to handle errors when encountered during stress.
|
||||
|
||||
-sample: Specify the number of samples to collect for measuring latency.
|
||||
|
||||
-schema: Replication settings, compression, compaction, etc.
|
||||
|
||||
-node: Nodes to connect to.
|
||||
|
||||
-log: Where to log progress to, and the interval at which to do it.
|
||||
|
||||
-transport: Custom transport factories.
|
||||
|
||||
-port: The port to connect to cassandra nodes on.
|
||||
|
||||
-sendto: Specify a stress server to send this command to.
|
||||
|
||||
-graph: Graph recorded metrics.
|
||||
|
||||
-tokenrange: Token range settings.
|
||||
|
||||
User mode
|
||||
---------
|
||||
|
||||
User mode allows you to use your stress your own schemas. This can save time in the long run rather than building an application and then realising your schema doesn’t scale.
|
||||
|
||||
Profile
|
||||
.......
|
||||
|
||||
User mode requires a profile defined in YAML. Multiple YAML files may be specified in which case operations in the ops argument are referenced as specname.opname.
|
||||
|
||||
An identifier for the profile:
|
||||
|
||||
.. code-block:: cql
|
||||
|
||||
specname: staff_activities
|
||||
|
||||
The keyspace for the test:
|
||||
|
||||
.. code-block:: cql
|
||||
|
||||
keyspace: staff
|
||||
|
||||
CQL for the keyspace. Optional if the keyspace already exists:
|
||||
|
||||
.. code-block:: cql
|
||||
|
||||
keyspace_definition: |
|
||||
CREATE KEYSPACE stresscql WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 3};
|
||||
|
||||
The table to be stressed:
|
||||
|
||||
.. code-block:: cql
|
||||
|
||||
table: staff_activities
|
||||
|
||||
CQL for the table. Optional if the table already exists:
|
||||
|
||||
.. code-block:: cql
|
||||
|
||||
table_definition: |
|
||||
CREATE TABLE staff_activities (
|
||||
name text,
|
||||
when timeuuid,
|
||||
what text,
|
||||
PRIMARY KEY(name, when, what)
|
||||
)
|
||||
|
||||
Optional meta information on the generated columns in the above table. The min and max only apply to text and blob types. The distribution field represents the total unique population distribution of that column across rows:
|
||||
|
||||
.. code-block:: cql
|
||||
|
||||
columnspec:
|
||||
- name: name
|
||||
size: uniform(5..10) # The names of the staff members are between 5-10 characters
|
||||
population: uniform(1..10) # 10 possible staff members to pick from
|
||||
- name: when
|
||||
cluster: uniform(20..500) # Staff members do between 20 and 500 events
|
||||
- name: what
|
||||
size: normal(10..100,50)
|
||||
|
||||
Supported types are:
|
||||
|
||||
An exponential distribution over the range [min..max]:
|
||||
|
||||
.. code-block:: cql
|
||||
|
||||
EXP(min..max)
|
||||
|
||||
An extreme value (Weibull) distribution over the range [min..max]:
|
||||
|
||||
.. code-block:: cql
|
||||
|
||||
EXTREME(min..max,shape)
|
||||
|
||||
A gaussian/normal distribution, where mean=(min+max)/2, and stdev is (mean-min)/stdvrng:
|
||||
|
||||
.. code-block:: cql
|
||||
|
||||
GAUSSIAN(min..max,stdvrng)
|
||||
|
||||
A gaussian/normal distribution, with explicitly defined mean and stdev:
|
||||
|
||||
.. code-block:: cql
|
||||
|
||||
GAUSSIAN(min..max,mean,stdev)
|
||||
|
||||
A uniform distribution over the range [min, max]:
|
||||
|
||||
.. code-block:: cql
|
||||
|
||||
UNIFORM(min..max)
|
||||
|
||||
A fixed distribution, always returning the same value:
|
||||
|
||||
.. code-block:: cql
|
||||
|
||||
FIXED(val)
|
||||
|
||||
If preceded by ~, the distribution is inverted
|
||||
|
||||
Defaults for all columns are size: uniform(4..8), population: uniform(1..100B), cluster: fixed(1)
|
||||
|
||||
Insert distributions:
|
||||
|
||||
.. code-block:: cql
|
||||
|
||||
insert:
|
||||
# How many partition to insert per batch
|
||||
partitions: fixed(1)
|
||||
# How many rows to update per partition
|
||||
select: fixed(1)/500
|
||||
# UNLOGGED or LOGGED batch for insert
|
||||
batchtype: UNLOGGED
|
||||
|
||||
Currently all inserts are done inside batches.
|
||||
|
||||
Read statements to use during the test:
|
||||
|
||||
.. code-block:: cql
|
||||
|
||||
queries:
|
||||
events:
|
||||
cql: select * from staff_activities where name = ?
|
||||
fields: samerow
|
||||
latest_event:
|
||||
cql: select * from staff_activities where name = ? LIMIT 1
|
||||
fields: samerow
|
||||
|
||||
Running a user mode test:
|
||||
|
||||
.. code-block:: cql
|
||||
|
||||
cassandra-stress user profile=./example.yaml duration=1m "ops(insert=1,latest_event=1,events=1)" truncate=once
|
||||
|
||||
This will create the schema then run tests for 1 minute with an equal number of inserts, latest_event queries and events queries. Additionally the table will be truncated once before the test.
|
||||
|
||||
The full example can be found here yaml
|
||||
|
||||
Running a user mode test with multiple yaml files:
|
||||
|
||||
.. code-block::
|
||||
|
||||
cassandra-stress user profile=./example.yaml,./example2.yaml duration=1m “ops(ex1.insert=1,ex1.latest_event=1,ex2.insert=2)” truncate=once
|
||||
|
||||
This will run operations as specified in both the example.yaml and example2.yaml files. example.yaml and example2.yaml can reference the same table
|
||||
although care must be taken that the table definition is identical (data generation specs can be different).
|
||||
|
||||
.. Lightweight transaction support
|
||||
.. ...............................
|
||||
|
||||
.. cassandra-stress supports lightweight transactions. In this it will first read current data from Cassandra and then uses read value(s) to fulfill lightweight transaction condition(s).
|
||||
|
||||
.. Lightweight transaction update query:
|
||||
|
||||
.. .. code-block:: cql
|
||||
|
||||
.. queries:
|
||||
.. regularupdate:
|
||||
.. cql: update blogposts set author = ? where domain = ? and published_date = ?
|
||||
.. fields: samerow
|
||||
.. updatewithlwt:
|
||||
.. cql: update blogposts set author = ? where domain = ? and published_date = ? IF body = ? AND url = ?
|
||||
.. fields: samerow
|
||||
|
||||
Graphing
|
||||
........
|
||||
|
||||
Graphs can be generated for each run of stress.
|
||||
|
||||
.. image:: example-stress-graph.png
|
||||
|
||||
To create a new graph:
|
||||
|
||||
.. code-block:: cql
|
||||
|
||||
cassandra-stress user profile=./stress-example.yaml "ops(insert=1,latest_event=1,events=1)" -graph file=graph.html title="Awesome graph"
|
||||
|
||||
To add a new run to an existing graph point to an existing file and add a revision name:
|
||||
|
||||
.. code-block:: cql
|
||||
|
||||
cassandra-stress user profile=./stress-example.yaml duration=1m "ops(insert=1,latest_event=1,events=1)" -graph file=graph.html title="Awesome graph" revision="Second run"
|
||||
|
||||
FAQ
|
||||
...
|
||||
|
||||
How do you use NetworkTopologyStrategy for the keyspace?
|
||||
|
||||
Use the schema option making sure to either escape the parenthesis or enclose in quotes:
|
||||
|
||||
.. code-block:: cql
|
||||
|
||||
cassandra-stress write -schema "replication(strategy=NetworkTopologyStrategy,datacenter1=3)"
|
||||
|
||||
How do you use SSL?
|
||||
|
||||
Use the transport option:
|
||||
|
||||
.. code-block:: cql
|
||||
|
||||
cassandra-stress "write n=100k cl=ONE no-warmup" -transport "truststore=$HOME/jks/truststore.jks truststore-password=cassandra"
|
||||
|
||||
|
||||
Cassandra Stress is not part of ScyllaDB and it is not distributed along side it anymore. It has it's own seperate repository and release cycle. More information about it can be found on `GitHub <https://github.com/scylladb/cassandra-stress>`_ or on `DockerHub <https://hub.docker.com/r/scylladb/cassandra-stress>`_.
|
||||
|
||||
.. include:: /rst_include/apache-copyrights.rst
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@ Bulk loads SSTables from a directory to a ScyllaDB cluster via the **CQL API**.
|
||||
|
||||
.. warning::
|
||||
|
||||
SSTableLoader is deprecated since ScyllaDB 6.2 and will be removed in the next release.
|
||||
SSTableLoader is deprecated and will be removed in a future release.
|
||||
Please consider switching to :doc:`nodetool refresh --load-and-stream </operating-scylla/nodetool-commands/refresh>`.
|
||||
|
||||
.. note::
|
||||
|
||||
@@ -74,13 +74,13 @@ API calls
|
||||
- *keyspace* - if set, tasks are filtered to contain only the ones working on this keyspace;
|
||||
- *table* - if set, tasks are filtered to contain only the ones working on this table;
|
||||
|
||||
* ``/task_manager/task_status/{task_id}`` - gets the task's status, unregisters the task if it's finished;
|
||||
* ``/task_manager/task_status/{task_id}`` - gets the task's status;
|
||||
* ``/task_manager/abort_task/{task_id}`` - aborts the task if it's abortable, otherwise 403 status code is returned;
|
||||
* ``/task_manager/wait_task/{task_id}`` - waits for the task and gets its status (does not unregister the tasks); query params:
|
||||
* ``/task_manager/wait_task/{task_id}`` - waits for the task and gets its status; query params:
|
||||
|
||||
- *timeout* - timeout in seconds; if set - 408 status code is returned if waiting times out;
|
||||
|
||||
* ``/task_manager/task_status_recursive/{task_id}`` - gets statuses of the task and all its descendants in BFS order, unregisters the root task;
|
||||
* ``/task_manager/task_status_recursive/{task_id}`` - gets statuses of the task and all its descendants in BFS order;
|
||||
* ``/task_manager/ttl`` - gets or sets new ttl; query params (if setting):
|
||||
|
||||
- *ttl* - new ttl value.
|
||||
@@ -89,6 +89,8 @@ API calls
|
||||
|
||||
- *user_ttl* - new user ttl value.
|
||||
|
||||
* ``/task_manager/drain/{module}`` - unregisters all finished local tasks in the module.
|
||||
|
||||
Cluster tasks are not unregistered from task manager with API calls.
|
||||
|
||||
Tasks API
|
||||
|
||||
@@ -257,8 +257,6 @@ ScyllaDB uses experimental flags to expose non-production-ready features safely.
|
||||
In recent ScyllaDB versions, these features are controlled by the ``experimental_features`` list in scylla.yaml, allowing one to choose which experimental to enable.
|
||||
Use ``scylla --help`` to get the list of experimental features.
|
||||
|
||||
ScyllaDB Enterprise and ScyllaDB Cloud do not officially support experimental Features.
|
||||
|
||||
.. _admin-keyspace-storage-options:
|
||||
|
||||
Keyspace storage options
|
||||
@@ -286,6 +284,24 @@ Before creating keyspaces with object storage, you also need to
|
||||
:ref:`configure <object-storage-configuration>` the object storage
|
||||
credentials and endpoint.
|
||||
|
||||
.. _admin-views-with-tablets:
|
||||
|
||||
Views with tablets
|
||||
------------------
|
||||
|
||||
By default, Materialized Views (MV) and Secondary Indexes (SI)
|
||||
are disabled in keyspaces that use tablets.
|
||||
|
||||
Support for MV and SI with tablets is experimental and must be explicitly
|
||||
enabled in the ``scylla.yaml`` configuration file by specifying
|
||||
the ``views-with-tablets`` option:
|
||||
|
||||
.. code-block:: yaml
|
||||
|
||||
experimental_features:
|
||||
- views-with-tablets
|
||||
|
||||
|
||||
Monitoring
|
||||
==========
|
||||
ScyllaDB exposes interfaces for online monitoring, as described below.
|
||||
|
||||
21
docs/operating-scylla/nodetool-commands/tasks/drain.rst
Normal file
21
docs/operating-scylla/nodetool-commands/tasks/drain.rst
Normal file
@@ -0,0 +1,21 @@
|
||||
Nodetool tasks drain
|
||||
====================
|
||||
**tasks drain** - Unregisters all finished local tasks from the module.
|
||||
If a module is not specified, finished tasks in all modules are unregistered.
|
||||
|
||||
Syntax
|
||||
-------
|
||||
.. code-block:: console
|
||||
|
||||
nodetool tasks drain [--module <module>]
|
||||
|
||||
Options
|
||||
-------
|
||||
|
||||
* ``--module`` - if set, only the specified module is drained.
|
||||
|
||||
For example:
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
> nodetool tasks drain --module repair
|
||||
@@ -5,6 +5,7 @@ Nodetool tasks
|
||||
:hidden:
|
||||
|
||||
abort <abort>
|
||||
drain <drain>
|
||||
user-ttl <user-ttl>
|
||||
list <list>
|
||||
modules <modules>
|
||||
@@ -23,15 +24,12 @@ Task Status Retention
|
||||
|
||||
* When a task completes, its status is temporarily stored on the executing node
|
||||
* Status information is retained for up to :confval:`task_ttl_in_seconds` seconds
|
||||
* The status information of a completed task is automatically removed after being queried with ``tasks status`` or ``tasks tree``
|
||||
* ``tasks wait`` returns the status, but it does not remove the task information of the queried task
|
||||
|
||||
.. note:: Multiple status queries using ``tasks status`` and ``tasks tree`` for the same completed task will only receive a response for the first query, since the status is removed after being retrieved.
|
||||
|
||||
Supported tasks suboperations
|
||||
-----------------------------
|
||||
|
||||
* :doc:`abort </operating-scylla/nodetool-commands/tasks/abort>` - Aborts the task.
|
||||
* :doc:`drain </operating-scylla/nodetool-commands/tasks/drain>` - Unregisters all finished local tasks.
|
||||
* :doc:`user-ttl </operating-scylla/nodetool-commands/tasks/user-ttl>` - Gets or sets user_task_ttl value.
|
||||
* :doc:`list </operating-scylla/nodetool-commands/tasks/list>` - Lists tasks in the module.
|
||||
* :doc:`modules </operating-scylla/nodetool-commands/tasks/modules>` - Lists supported modules.
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
Nodetool tasks status
|
||||
=========================
|
||||
**tasks status** - Gets the status of a task manager task. If the task was finished it is unregistered.
|
||||
**tasks status** - Gets the status of a task manager task.
|
||||
|
||||
Syntax
|
||||
-------
|
||||
@@ -23,10 +23,10 @@ Example output
|
||||
type: repair
|
||||
kind: node
|
||||
scope: keyspace
|
||||
state: done
|
||||
state: running
|
||||
is_abortable: true
|
||||
start_time: 2024-07-29T15:48:55Z
|
||||
end_time: 2024-07-29T15:48:55Z
|
||||
end_time:
|
||||
error:
|
||||
parent_id: none
|
||||
sequence_number: 5
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
Nodetool tasks tree
|
||||
=======================
|
||||
**tasks tree** - Gets the statuses of a task manager task and all its descendants.
|
||||
The statuses are listed in BFS order. If the task was finished it is unregistered.
|
||||
The statuses are listed in BFS order.
|
||||
|
||||
If task_id isn't specified, trees of all non-internal tasks are printed
|
||||
(internal tasks are the ones that have a parent or cover an operation that
|
||||
|
||||
@@ -7,8 +7,8 @@ Even though ScyllaDB is a fault-tolerant system, it is recommended to regularly
|
||||
* Backup is a per-node procedure. Make sure to back up each node in your
|
||||
cluster. For cluster-wide backup and restore, see `ScyllaDB Manager <https://manager.docs.scylladb.com/stable/restore/>`_.
|
||||
* Backup works the same for non-encrypted and encrypted SStables. You can use
|
||||
`Encryption at Rest <https://enterprise.docs.scylladb.com/stable/operating-scylla/security/encryption-at-rest.html>`_
|
||||
available in ScyllaDB Enterprise without affecting the backup procedure.
|
||||
:doc:`Encryption at Rest </operating-scylla/security/encryption-at-rest>`
|
||||
without affecting the backup procedure.
|
||||
|
||||
You can choose one of the following:
|
||||
|
||||
|
||||
@@ -77,7 +77,7 @@ Procedure
|
||||
|
||||
.. note::
|
||||
|
||||
ScyllaDB Open Source 3.0 and later and ScyllaDB Enterprise 2019.1 and later support :doc:`Materialized View(MV) </features/materialized-views>` and :doc:`Secondary Index(SI) </features/secondary-indexes>`.
|
||||
ScyllaDB supports :doc:`Materialized View(MV) </features/materialized-views>` and :doc:`Secondary Index(SI) </features/secondary-indexes>`.
|
||||
|
||||
When migrating data from Apache Cassandra with MV or SI, you can either:
|
||||
|
||||
|
||||
@@ -1,10 +1,13 @@
|
||||
.. Note::
|
||||
|
||||
Make sure to use the same ScyllaDB **patch release** on the new/replaced node, to match the rest of the cluster. It is not recommended to add a new node with a different release to the cluster.
|
||||
For example, use the following for installing ScyllaDB patch release (use your deployed version)
|
||||
For example, use the following for installing ScyllaDB patch release (use your deployed version):
|
||||
|
||||
.. code::
|
||||
|
||||
sudo yum install scylla-2025.1.0
|
||||
|
||||
* ScyllaDB Enterprise - ``sudo yum install scylla-enterprise-2018.1.9``
|
||||
|
||||
* ScyllaDB open source - ``sudo yum install scylla-3.0.3``
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -202,6 +202,7 @@ Add New DC
|
||||
|
||||
#. If you are using ScyllaDB Monitoring, update the `monitoring stack <https://monitoring.docs.scylladb.com/stable/install/monitoring_stack.html#configure-scylla-nodes-from-files>`_ to monitor it. If you are using ScyllaDB Manager, make sure you install the `Manager Agent <https://manager.docs.scylladb.com/stable/install-scylla-manager-agent.html>`_ and Manager can access the new DC.
|
||||
|
||||
.. _add-dc-to-existing-dc-not-connect-clients:
|
||||
|
||||
Configure the Client not to Connect to the New DC
|
||||
-------------------------------------------------
|
||||
|
||||
@@ -0,0 +1,57 @@
|
||||
=========================================================
|
||||
Preventing Quorum Loss in Symmetrical Multi-DC Clusters
|
||||
=========================================================
|
||||
|
||||
ScyllaDB requires at least a quorum (majority) of nodes in a cluster to be up
|
||||
and communicate with each other. A cluster that loses a quorum can handle reads
|
||||
and writes of user data, but cluster management operations, such as schema and
|
||||
topology updates, are impossible.
|
||||
|
||||
In clusters that are symmetrical, i.e., have two (DCs) with the same number of
|
||||
nodes, losing a quorum may occur if one of the DCs becomes unavailable.
|
||||
For example, if one DC fails in a 2-DC cluster where each DC has three nodes,
|
||||
only three out of six nodes are available, and the quorum is lost.
|
||||
|
||||
Adding another DC would mitigate the risk of losing a quorum, but it comes
|
||||
with network and storage costs. To prevent the quorum loss with minimum costs,
|
||||
you can configure an arbiter (tie-breaker) DC.
|
||||
|
||||
An arbiter DC is a datacenter with a :doc:`zero-token node </architecture/zero-token-nodes>`
|
||||
-- a node that doesn't replicate any data but is only used for Raft quorum
|
||||
voting. An arbiter DC maintains the cluster quorum if one of the other DCs
|
||||
fails, while it doesn't incur extra network and storage costs as it has no
|
||||
user data.
|
||||
|
||||
Adding an Arbiter DC
|
||||
-----------------------
|
||||
|
||||
To set up an arbiter DC, follow the procedure to
|
||||
:doc:`add a new datacenter to an existing cluster </operating-scylla/procedures/cluster-management/add-dc-to-existing-dc/>`.
|
||||
When editing the *scylla.yaml* file, set the ``join_ring`` parameter to
|
||||
``false`` following these guidelines:
|
||||
|
||||
* Set ``join_ring=false`` before you start the node(s). If you set that
|
||||
parameter on a node that has already been bootstrapped and owns a token
|
||||
range, the node startup will fail. In such a case, you'll need to
|
||||
:doc:`decommission </operating-scylla/procedures/cluster-management/decommissioning-data-center>`
|
||||
the node, :doc:`wipe it clean </operating-scylla/procedures/cluster-management/clear-data>`,
|
||||
and add it back to the arbiter DC properly following
|
||||
the :doc:`procedure </operating-scylla/procedures/cluster-management/add-dc-to-existing-dc/>`.
|
||||
* As a rule, one node is sufficient for an arbiter to serve as a tie-breaker.
|
||||
In case you add more than one node to the arbiter DC, ensure that you set
|
||||
``join_ring=false`` on all the nodes in that DC.
|
||||
|
||||
Follow-up steps:
|
||||
^^^^^^^^^^^^^^^^^^^
|
||||
* An arbiter DC has a replication factor of 0 (RF=0) for all keyspaces. You
|
||||
need to ``ALTER`` the keyspaces to update their RF.
|
||||
* Since zero-token nodes are ignored by drivers, you can skip
|
||||
:ref:`configuring the client not to connect to the new DC <add-dc-to-existing-dc-not-connect-clients>`.
|
||||
|
||||
References
|
||||
----------------
|
||||
|
||||
* :doc:`Zero-token Nodes </architecture/zero-token-nodes>`
|
||||
* :doc:`Raft Consensus Algorithm in ScyllaDB </architecture/raft>`
|
||||
* :doc:`Handling Node Failures </troubleshooting/handling-node-failures>`
|
||||
* :doc:`Adding a New Data Center Into an Existing ScyllaDB Cluster </operating-scylla/procedures/cluster-management/add-dc-to-existing-dc/>`
|
||||
@@ -209,6 +209,17 @@ In this example, we will show how to install a nine nodes cluster.
|
||||
UN 54.187.142.201 109.54 KB 256 ? d99967d6-987c-4a54-829d-86d1b921470f RACK1
|
||||
UN 54.187.168.20 109.54 KB 256 ? 2329c2e0-64e1-41dc-8202-74403a40f851 RACK1
|
||||
|
||||
See also:
|
||||
--------------------------
|
||||
Preventing Quorum Loss
|
||||
--------------------------
|
||||
|
||||
If your cluster is symmetrical, i.e., it has an even number of datacenters
|
||||
with the same number of nodes, consider adding an arbiter DC to mitigate
|
||||
the risk of losing a quorum at a minimum cost.
|
||||
See :doc:`Preventing Quorum Loss in Symmetrical Multi-DC Clusters </operating-scylla/procedures/cluster-management/arbiter-dc>`
|
||||
for details.
|
||||
|
||||
------------
|
||||
See also:
|
||||
------------
|
||||
:doc:`Create a ScyllaDB Cluster - Single Data Center (DC) </operating-scylla/procedures/cluster-management/create-cluster>`
|
||||
|
||||
@@ -26,6 +26,8 @@ Cluster Management Procedures
|
||||
Safely Restart Your Cluster <safe-start>
|
||||
Handling Membership Change Failures <handling-membership-change-failures>
|
||||
repair-based-node-operation
|
||||
Prevent Quorum Loss in Symmetrical Multi-DC Clusters <arbiter-dc>
|
||||
|
||||
|
||||
.. panel-box::
|
||||
:title: Cluster and DC Creation
|
||||
@@ -84,6 +86,8 @@ Cluster Management Procedures
|
||||
|
||||
* :doc:`Repair Based Node Operations (RBNO) </operating-scylla/procedures/cluster-management/repair-based-node-operation>`
|
||||
|
||||
* :doc:`Preventing Quorum Loss in Symmetrical Multi-DC Clusters <arbiter-dc>`
|
||||
|
||||
.. panel-box::
|
||||
:title: Topology Changes
|
||||
:id: "getting-started"
|
||||
|
||||
@@ -5,9 +5,10 @@ Remove a Seed Node from Seed List
|
||||
This procedure describes how to remove a seed node from the seed list.
|
||||
|
||||
.. note::
|
||||
The seed concept in gossip has been removed. A seed node
|
||||
is only used by a new node during startup to learn about the cluster topology. As a result, you only need to configure one
|
||||
seed node in a node's ``scylla.yaml`` file.
|
||||
A seed node is only used by a new node during startup to learn about the cluster topology.
|
||||
This means it is sufficient to configure one seed node in a node's ``scylla.yaml`` file.
|
||||
|
||||
The first node in a new cluster must be a seed node.
|
||||
|
||||
|
||||
Prerequisites
|
||||
|
||||
@@ -3,13 +3,28 @@
|
||||
Replacing a Dead Seed Node
|
||||
===========================
|
||||
|
||||
.. note::
|
||||
The seed concept in gossip has been removed.
|
||||
A seed node is only used by a new node during startup to learn about the cluster topology. As a result, there's no need
|
||||
to replace the node configured with the ``seeds`` parameter in the ``scylla.yaml`` file.
|
||||
|
||||
In ScyllaDB, it is not possible to bootstrap a seed node. The following steps describe how to replace a dead seed node.
|
||||
|
||||
.. note::
|
||||
A seed node is only used by a new node during startup to learn about
|
||||
the cluster topology.
|
||||
Once the nodes have joined the cluster, the seed node has no function.
|
||||
In typical scenarios, there's no need to replace the node
|
||||
configured with the ``seeds`` parameter in the ``scylla.yaml`` file.
|
||||
|
||||
* The first node in a new cluster must be a seed node.
|
||||
* It is sufficient to configure one seed node in a node's ``scylla.yaml`` file.
|
||||
You may choose to configure two or three seed nodes if your cluster is large.
|
||||
* It’s not recommended that all the nodes in the cluster be defined as seed nodes.
|
||||
* If you update the IP address of a seed node or remove it from the cluster,
|
||||
you should update configuration files on all the remaining nodes to keep the
|
||||
configuration consistent.
|
||||
Once a node has joined the cluster and has all the peer information saved
|
||||
locally in the ``system.peers`` system table, seed nodes are no longer used,
|
||||
but they are still contacted on each restart. To avoid configuration errors
|
||||
and to be able to reach out to the cluster if the seed IP address changes,
|
||||
the seed configuration should be valid.
|
||||
|
||||
Prerequisites
|
||||
-------------
|
||||
|
||||
@@ -35,4 +50,4 @@ Procedure
|
||||
Use ``nodetool status`` to verify that restarted nodes are online before restarting more nodes. If too many nodes are offline, the cluster may suffer temporary service degradation or outage.
|
||||
#. Replace the dead node using the :doc:`dead node replacement procedure </operating-scylla/procedures/cluster-management/replace-dead-node/>`.
|
||||
|
||||
Your cluster should have more than one seed node, but it's not allowed to define all the nodes in the cluster to be seed nodes.
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@ Advanced Internode (RPC) Compression
|
||||
Internode (RPC) compression controls whether traffic between nodes is
|
||||
compressed. If enabled, it reduces network bandwidth usage.
|
||||
|
||||
To further reduce network traffic, you can configure ScyllaDB Enterprise to use
|
||||
To further reduce network traffic, you can configure ScyllaDB to use
|
||||
ZSTD-based compression and shared dictionary compression. You can enable one or
|
||||
both of these features to limit network throughput and reduce network transfer costs.
|
||||
|
||||
|
||||
@@ -181,7 +181,7 @@ Use Workload Prioritization
|
||||
In a typical application there are operational workloads that require low latency.
|
||||
Sometimes these run in parallel with analytic workloads that process high volumes of data and do not require low latency.
|
||||
With workload prioritization, one can prevent that the analytic workloads lead to an unwanted high latency on operational workload.
|
||||
`Workload prioritization <https://enterprise.docs.scylladb.com/stable/using-scylla/workload-prioritization.html>`_ is only available with `ScyllaDB Enterprise <https://enterprise.docs.scylladb.com/>`_.
|
||||
See :doc:`Workload prioritization </operating-scylla/security/encryption-at-rest>`.
|
||||
|
||||
Bypass Cache
|
||||
============
|
||||
@@ -330,7 +330,7 @@ When records get updated or deleted, the old data eventually needs to be deleted
|
||||
The compaction settings can make a huge difference.
|
||||
|
||||
* Use the following :ref:`Compaction Strategy Matrix <CSM1>` to use the correct compaction strategy for your workload.
|
||||
* ICS is an incremental compaction strategy that combines the low space amplification of LCS with the low write amplification of STCS. It is **only** available with ScyllaDB Enterprise.
|
||||
* ICS is an incremental compaction strategy that combines the low space amplification of LCS with the low write amplification of STCS.
|
||||
* If you have time series data, the TWCS should be used.
|
||||
|
||||
Read more about :doc:`Compaction Strategies </architecture/compaction/compaction-strategies>`
|
||||
|
||||
@@ -199,11 +199,13 @@ The ScyllaDB image supports many command line options that are passed to the Doc
|
||||
-------------
|
||||
The ``--seeds`` command line option configures ScyllaDB's seed nodes. If no ``--seeds`` option is specified, ScyllaDB uses its own IP address as the seed.
|
||||
|
||||
For example, to configure ScyllaDB to run with two seed nodes ``192.168.0.100`` and ``192.168.0.200``.
|
||||
For example, to configure ScyllaDB to run with the seed node ``192.168.0.100``, run:
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
docker run --name some-scylla -d scylladb/scylla --seeds 192.168.0.100,192.168.0.200
|
||||
docker run --name some-scylla -d scylladb/scylla --seeds 192.168.0.100
|
||||
|
||||
See :doc:`ScyllaDB Seed Nodes </kb/seed-nodes>` for details.
|
||||
|
||||
--listen-address ADDR
|
||||
---------------------
|
||||
|
||||
@@ -28,7 +28,7 @@ Incremental Compaction Strategy (ICS)
|
||||
We highly recommend using ICS (the default setting) for any table that you have.
|
||||
You will have much less Space Amplification with ICS as it only requires 25% additional storage, as opposed to STCS which requires 50% more.
|
||||
|
||||
.. note:: ICS is the default compaction strategy setting for Scylla Enterprise versions 2020.1 and higher.
|
||||
.. note:: ICS is the default compaction strategy.
|
||||
|
||||
* Refer to :ref:`Incremental Compaction Strategy <ICS1>` for an overview of the benefits.
|
||||
* Refer to :ref:`Incremental Compaction Strategy Overview <incremental-compaction-strategy-ics>` for a description of how it works.
|
||||
|
||||
@@ -2,9 +2,6 @@
|
||||
ScyllaDB Auditing Guide
|
||||
========================
|
||||
|
||||
:label-tip:`ScyllaDB Enterprise`
|
||||
|
||||
|
||||
Auditing allows the administrator to monitor activities on a Scylla cluster, including queries and data changes.
|
||||
The information is stored in a Syslog or a Scylla table.
|
||||
|
||||
@@ -64,7 +61,7 @@ QUERY Logs all queries
|
||||
--------- -----------------------------------------------------------------------------------------
|
||||
ADMIN Logs service level operations: create, alter, drop, attach, detach, list.
|
||||
For :ref:`service level <workload-priorization-service-level-management>`
|
||||
auditing, this parameter is available in Scylla Enterprise 2019.1 and later.
|
||||
auditing.
|
||||
========= =========================================================================================
|
||||
|
||||
Note that audit for every DML or QUERY might impact performance and consume a lot of storage.
|
||||
|
||||
@@ -5,11 +5,11 @@ Encryption at Rest
|
||||
Introduction
|
||||
----------------------
|
||||
|
||||
ScyllaDB Enterprise protects your sensitive data with data-at-rest encryption.
|
||||
ScyllaDB protects your sensitive data with data-at-rest encryption.
|
||||
It protects the privacy of your user's data, reduces the risk of data breaches, and helps meet regulatory requirements.
|
||||
In particular, it provides an additional level of protection for your data persisted in storage or its backups.
|
||||
|
||||
When ScyllaDB Enterprise Encryption at Rest is used together with Encryption in Transit (:doc:`Node to Node </operating-scylla/security/node-node-encryption>` and :doc:`Client to Node </operating-scylla/security/client-node-encryption>`), you benefit from end to end data encryption.
|
||||
When ScyllaDB's Encryption at Rest is used together with Encryption in Transit (:doc:`Node to Node </operating-scylla/security/node-node-encryption>` and :doc:`Client to Node </operating-scylla/security/client-node-encryption>`), you benefit from end to end data encryption.
|
||||
|
||||
About Encryption at Rest
|
||||
-----------------------------
|
||||
@@ -143,8 +143,6 @@ Depending on your key provider, you will either have the option of allowing Scyl
|
||||
* Replicated Key Provider - you must generate a system key yourself
|
||||
* Local Key Provider - If you do not generate your own secret key, ScyllaDB will create one for you
|
||||
|
||||
When encrypting ScyllaDB config by ``configuration_encryptor``, you also need to generate a secret key and upload the key to all nodes.
|
||||
|
||||
|
||||
Use the key generator script
|
||||
================================
|
||||
@@ -282,8 +280,6 @@ If you are using :term:`KMIP <Key Management Interoperability Protocol (KMIP)>`
|
||||
Set the KMS Host
|
||||
----------------------
|
||||
|
||||
.. note:: KMS support is available since ScyllaDB Enterprise **2023.1.1**.
|
||||
|
||||
If you are using AWS KMS to encrypt tables or system information, add the KMS information to the ``scylla.yaml`` configuration file.
|
||||
|
||||
#. Edit the ``scylla.yaml`` file located in ``/etc/scylla/`` to add the following in KMS host(s) section:
|
||||
@@ -408,10 +404,6 @@ If you are using Google GCP KMS to encrypt tables or system information, add the
|
||||
Encrypt Tables
|
||||
-----------------------------
|
||||
|
||||
.. note::
|
||||
|
||||
This feature is available since ScyllaDB Enterprise 2023.1.2.
|
||||
|
||||
ScyllaDB allows you to enable or disable default encryption of tables.
|
||||
When enabled, tables will be encrypted by default using the configuration
|
||||
provided for the ``user_info_encryption`` option in the ``scylla.yaml`` file.
|
||||
@@ -820,32 +812,6 @@ Once this encryption is enabled, it is used for all system data.
|
||||
|
||||
.. wasn't able to test this successfully
|
||||
|
||||
.. Encrypt and Decrypt Configuration Files
|
||||
.. =======================================
|
||||
|
||||
.. Using the Configuration Encryption tool, you can encrypt parts of the scylla.yaml file which contain encryption configuration settings.
|
||||
|
||||
.. **Procedure**
|
||||
|
||||
.. 1. Run the Configuration Encryption script:
|
||||
|
||||
.. test code-block: none
|
||||
|
||||
.. /bin/configuration_encryptor [options] [key-path]
|
||||
|
||||
.. Where:
|
||||
|
||||
.. * ``-c, --config`` - the path to the configuration file (/etc/scylla/scylla.yaml, for example)
|
||||
.. * ``-d, --decrypt`` - decrypts the configuration file at the specified path
|
||||
.. * ``-o, --output`` - (optional) writes the configuration file to a specified target. This can be the same location as the source file.
|
||||
.. * ``-h. --help`` - help for this command
|
||||
|
||||
.. For example:
|
||||
|
||||
.. test code-block: none
|
||||
|
||||
.. sudo -u scylla /bin/configuration_encryptor -c /etc/scylla/scylla.yaml /etc/scylla/encryption_keys/secret_key
|
||||
.. end of test
|
||||
|
||||
When a Key is Lost
|
||||
----------------------
|
||||
|
||||
@@ -7,10 +7,6 @@ LDAP Authentication
|
||||
|
||||
saslauthd
|
||||
|
||||
:label-tip:`ScyllaDB Enterprise`
|
||||
|
||||
.. versionadded:: 2021.1.2
|
||||
|
||||
Scylla supports user authentication via an LDAP server by leveraging the SaslauthdAuthenticator.
|
||||
By configuring saslauthd correctly against your LDAP server, you enable Scylla to check the user’s credentials through it.
|
||||
|
||||
|
||||
@@ -2,11 +2,7 @@
|
||||
LDAP Authorization (Role Management)
|
||||
=====================================
|
||||
|
||||
:label-tip:`ScyllaDB Enterprise`
|
||||
|
||||
.. versionadded:: 2021.1.2
|
||||
|
||||
Scylla Enterprise customers can manage and authorize users’ privileges via an :abbr:`LDAP (Lightweight Directory Access Protocol)` server.
|
||||
Scylla customers can manage and authorize users’ privileges via an :abbr:`LDAP (Lightweight Directory Access Protocol)` server.
|
||||
LDAP is an open, vendor-neutral, industry-standard protocol for accessing and maintaining distributed user access control over a standard IP network.
|
||||
If your users are already stored in an LDAP directory, you can now use the same LDAP server to regulate their roles in Scylla.
|
||||
|
||||
|
||||
@@ -22,7 +22,7 @@ In the same manner, should someone leave the organization, all you would have to
|
||||
Should someone change positions at the company, just assign the new employee to the new role and revoke roles no longer required for the new position.
|
||||
|
||||
To build an RBAC environment, you need to create the roles and their associated permissions and then assign or grant the roles to the individual users. Roles inherit the permissions of any other roles that they are granted. The hierarchy of roles can be either simple or extremely complex. This gives great flexibility to database administrators, where they can create specific permission conditions without incurring a huge administrative burden.
|
||||
In addition to standard roles, ScyllaDB Enterprise users can implement :doc:`Workload Prioritization </features/workload-prioritization/>`, which allows you to attach roles to Service Levels, thus granting resources to roles as the role demands.
|
||||
In addition to standard roles, ScyllaDB users can implement :doc:`Workload Prioritization </features/workload-prioritization/>`, which allows you to attach roles to Service Levels, thus granting resources to roles as the role demands.
|
||||
|
||||
.. _rbac-usecase-grant-roles-and-permissions:
|
||||
|
||||
|
||||
@@ -31,11 +31,9 @@ Encryption on Transit, Client to Node and Node to Node
|
||||
Encryption on Transit protects your communication against a 3rd interception on the network connection.
|
||||
Configure ScyllaDB to use TLS/SSL for all the connections. Use TLS/SSL to encrypt communication between ScyllaDB nodes and client applications.
|
||||
|
||||
.. only:: enterprise
|
||||
|
||||
Starting with version 2023.1.1, you can run ScyllaDB Enterprise on FIPS-enabled Ubuntu,
|
||||
which uses FIPS 140-2 certified libraries (such as OpenSSL, GnuTLS, and more) and Linux
|
||||
kernel in FIPS mode.
|
||||
You can run ScyllaDB on FIPS-enabled Ubuntu,
|
||||
which uses FIPS 140-2 certified libraries (such as OpenSSL, GnuTLS, and more) and Linux
|
||||
kernel in FIPS mode.
|
||||
|
||||
* :doc:`Encryption Data in Transit Client to Node </operating-scylla/security/client-node-encryption>`
|
||||
|
||||
@@ -43,7 +41,6 @@ Configure ScyllaDB to use TLS/SSL for all the connections. Use TLS/SSL to encryp
|
||||
|
||||
Encryption at Rest
|
||||
~~~~~~~~~~~~~~~~~~
|
||||
Encryption at Rest is available in a Scylla Enterprise 2019.1.1.
|
||||
|
||||
Encryption at Rest protects the privacy of your user's data, reduces the risk of data breaches, and helps meet regulatory requirements.
|
||||
In particular, it provides an additional level of protection for your data persisted in storage or backup.
|
||||
|
||||
@@ -127,6 +127,12 @@ Glossary
|
||||
RBNO is enabled by default for a subset node operations.
|
||||
See :doc:`Repair Based Node Operations </operating-scylla/procedures/cluster-management/repair-based-node-operation>` for details.
|
||||
|
||||
RF-rack-valid keyspace
|
||||
A keyspace with :doc:`tablets </architecture/tablets>` enabled is RF-rack-valid if all of its data centers
|
||||
have the :term:`Replication Factor (RF) <Replication Factor (RF)>` of 0, 1, or the number of racks in that data center.
|
||||
|
||||
Keyspaces with tablets disabled are always deemed RF-rack-valid, even if they do not satisfy the aforementioned condition.
|
||||
|
||||
Shard
|
||||
Each ScyllaDB node is internally split into *shards*, an independent thread bound to a dedicated core.
|
||||
Each shard of data is allotted CPU, RAM, persistent storage, and networking resources which it uses as efficiently as possible.
|
||||
@@ -187,8 +193,8 @@ Glossary
|
||||
Cache dummy rows are entries in the row set, which have a clustering position, although they do not represent CQL rows written by users. ScyllaDB cache uses them to mark boundaries of population ranges, to represent the information that the whole range is complete, and there is no need to go to sstables to read the gaps between existing row entries when scanning.
|
||||
|
||||
Workload
|
||||
A database category that allows you to manage different sources of database activities, such as requests or administrative activities. By defining workloads, you can specify how ScyllaDB will process those activities. For example, `ScyllaDB Enterprise <https://enterprise.docs.scylladb.com/>`_
|
||||
ships with a feature that allows you to prioritize one workload over another (e.g., user requests over administrative activities). See `Workload Prioritization <https://enterprise.docs.scylladb.com/stable/using-scylla/workload-prioritization.html>`_.
|
||||
A database category that allows you to manage different sources of database activities, such as requests or administrative activities. By defining workloads, you can specify how ScyllaDB will process those activities. For example, ScyllaDB
|
||||
ships with a feature that allows you to prioritize one workload over another (e.g., user requests over administrative activities). See :doc:`Workload Prioritization </features/workload-prioritization/>`.
|
||||
|
||||
MurmurHash3
|
||||
A hash function `created by Austin Appleby <https://en.wikipedia.org/wiki/MurmurHash>`_, and used by the :term:`Partitioner` to distribute the partitions between nodes.
|
||||
|
||||
@@ -15,4 +15,3 @@ Reference
|
||||
* :doc:`Limits </reference/limits>`
|
||||
* :doc:`API Reference </reference/api-reference>`
|
||||
* :doc:`Metrics </reference/metrics>`
|
||||
* .. scylladb_include_flag:: enterprise-vs-oss-matrix-link.rst
|
||||
|
||||
@@ -1,50 +0,0 @@
|
||||
There are two alternative upgrade procedures:
|
||||
|
||||
* :ref:`Upgrading ScyllaDB and simultaneously updating 3rd party and OS packages <upgrade-image-recommended-procedure>`. It is recommended if you are running a ScyllaDB official image (EC2 AMI, GCP, and Azure images), which is based on Ubuntu 20.04.
|
||||
|
||||
* :ref:`Upgrading ScyllaDB without updating any external packages <upgrade-image-enterprise-upgrade-guide-regular-procedure>`.
|
||||
|
||||
.. _upgrade-image-recommended-procedure:
|
||||
|
||||
**To upgrade ScyllaDB and update 3rd party and OS packages (RECOMMENDED):**
|
||||
|
||||
Choosing this upgrade procedure allows you to upgrade your ScyllaDB version and update the 3rd party and OS packages using one command.
|
||||
|
||||
#. Update the |SCYLLA_REPO|_ to |NEW_VERSION|.
|
||||
|
||||
#. Load the new repo:
|
||||
|
||||
.. code:: sh
|
||||
|
||||
sudo apt-get update
|
||||
|
||||
#. Run the following command to update the manifest file:
|
||||
|
||||
.. code:: sh
|
||||
|
||||
cat scylla-enterprise-packages-<version>-<arch>.txt | sudo xargs -n1 apt-get install -y
|
||||
|
||||
Where:
|
||||
|
||||
* ``<version>`` - The ScyllaDB version to which you are upgrading ( |NEW_VERSION| ).
|
||||
* ``<arch>`` - Architecture type: ``x86_64`` or ``aarch64``.
|
||||
|
||||
The file is included in the ScyllaDB packages downloaded in the previous step. The file location is ``http://downloads.scylladb.com/downloads/scylla-enterprise/aws/manifest/scylla-enterprise-packages-<version>-<arch>.txt``.
|
||||
|
||||
Example:
|
||||
|
||||
.. code:: console
|
||||
|
||||
cat scylla-enterprise-packages-2022.1.10-x86_64.txt | sudo xargs -n1 apt-get install -y
|
||||
|
||||
|
||||
.. note::
|
||||
|
||||
Alternatively, you can update the manifest file with the following command:
|
||||
|
||||
``sudo apt-get install $(awk '{print $1'} scylla-enterprise-packages-<version>-<arch>.txt) -y``
|
||||
|
||||
|
||||
|
||||
|
||||
.. _upgrade-image-enterprise-upgrade-guide-regular-procedure:
|
||||
27
docs/upgrade/about-upgrade.rst
Normal file
27
docs/upgrade/about-upgrade.rst
Normal file
@@ -0,0 +1,27 @@
|
||||
================
|
||||
About Upgrade
|
||||
================
|
||||
|
||||
ScyllaDB upgrade is a rolling procedure - it does not require a full cluster
|
||||
shutdown and is performed without any downtime or disruption of service.
|
||||
|
||||
To ensure a successful upgrade, follow
|
||||
the :doc:`documented upgrade procedures <upgrade-guides/index>` tested by
|
||||
ScyllaDB. This means that:
|
||||
|
||||
* You should perform the upgrades consecutively - to each successive X.Y
|
||||
version, **without skipping any major or minor version**, unless there is
|
||||
a documented upgrade procedure to bypass a version.
|
||||
* Before you upgrade to the next version, the whole cluster (each node) must
|
||||
be upgraded to the previous version.
|
||||
* You cannot perform an upgrade by replacing the nodes in the cluster with new
|
||||
nodes with a different ScyllaDB version. You should never add a new node with
|
||||
a different version to a cluster - if you
|
||||
:doc:`add a node </operating-scylla/procedures/cluster-management/add-node-to-cluster>`,
|
||||
it must have the same X.Y.Z (major.minor.patch) version as the other nodes in
|
||||
the cluster.
|
||||
|
||||
Upgrading to each patch version by following the Maintenance Release Upgrade
|
||||
Guide is optional. However, we recommend upgrading to the latest patch release
|
||||
for your version before upgrading to a new version.
|
||||
|
||||
@@ -4,52 +4,13 @@ Upgrade ScyllaDB
|
||||
|
||||
.. toctree::
|
||||
:titlesonly:
|
||||
:hidden:
|
||||
|
||||
ScyllaDB Versioning <scylladb-versioning>
|
||||
ScyllaDB Open Source Upgrade <upgrade-opensource/index>
|
||||
ScyllaDB Open Source to ScyllaDB Enterprise Upgrade <upgrade-to-enterprise/index>
|
||||
ScyllaDB Image <ami-upgrade>
|
||||
ScyllaDB Enterprise <https://enterprise.docs.scylladb.com/stable/upgrade/upgrade-enterprise/index.html>
|
||||
|
||||
Overview
|
||||
---------
|
||||
|
||||
ScyllaDB upgrade is a rolling procedure - it does not require a full cluster shutdown and is performed without any
|
||||
downtime or disruption of service.
|
||||
|
||||
To ensure a successful upgrade, follow the :ref:`documented upgrade procedures <upgrade-upgrade-procedures>` tested by ScyllaDB. This means that:
|
||||
|
||||
* You should perform the upgrades consecutively - to each successive X.Y version, **without skipping any major or minor version**.
|
||||
* Before you upgrade to the next version, the whole cluster (each node) must be upgraded to the previous version.
|
||||
* You cannot perform an upgrade by replacing the nodes in the cluster with new nodes with a different ScyllaDB version. You should never add a new node with a different version to a cluster - if you :doc:`add a node </operating-scylla/procedures/cluster-management/add-node-to-cluster>`, it must have the same X.Y.Z (major.minor.patch) version as the other nodes in the cluster.
|
||||
|
||||
Example
|
||||
========
|
||||
|
||||
The following example shows the upgrade path for a 3-node cluster from version 4.3 to version 4.6:
|
||||
|
||||
#. Upgrade all three nodes to version 4.4.
|
||||
#. Upgrade all three nodes to version 4.5.
|
||||
#. Upgrade all three nodes to version 4.6.
|
||||
|
||||
|
||||
Upgrading to each patch version by following the Maintenance Release Upgrade Guide
|
||||
is optional. However, we recommend upgrading to the latest patch release for your version before upgrading to a new version.
|
||||
For example, upgrade to patch 4.4.8 before upgrading to version 4.5.
|
||||
|
||||
.. _upgrade-upgrade-procedures:
|
||||
|
||||
Procedures for Upgrading ScyllaDB
|
||||
-----------------------------------
|
||||
|
||||
* :doc:`Upgrade ScyllaDB Open Source <upgrade-opensource/index>`
|
||||
|
||||
* :doc:`Upgrade from ScyllaDB Open Source to ScyllaDB Enterprise <upgrade-to-enterprise/index>`
|
||||
|
||||
* :doc:`Upgrade ScyllaDB Image <ami-upgrade>`
|
||||
|
||||
* `Upgrade ScyllaDB Enterprise <https://enterprise.docs.scylladb.com/stable/upgrade/upgrade-enterprise/index.html>`_
|
||||
About Upgrade <about-upgrade>
|
||||
Upgrade Guides <upgrade-guides/index>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -1,61 +0,0 @@
|
||||
============================
|
||||
ScyllaDB Versioning
|
||||
============================
|
||||
|
||||
ScyllaDB follows the ``MAJOR.MINOR.PATCH`` `semantic versioning <https://semver.org/>`_:
|
||||
|
||||
* ``MAJOR`` versions contain significant changes in the product and may introduce incompatible API changes.
|
||||
* ``MINOR`` versions introduce new features and improvements in a backward-compatible manner.
|
||||
* ``PATCH`` versions have backward-compatible bug fixes.
|
||||
|
||||
**Examples**
|
||||
|
||||
ScyllaDB Open Source:
|
||||
|
||||
* ``MAJOR`` versions: 4.y, 5.y
|
||||
* ``MINOR`` versions: 5.2.z, 5.4.z
|
||||
* ``PATCH`` versions: 5.2.1, 5.2.2
|
||||
|
||||
|
||||
ScyllaDB Enterprise:
|
||||
|
||||
* ``MAJOR`` versions: 2021.y.x, 2022.y.z
|
||||
* ``MINOR`` versions: 2022.1.z, 2022.2.z
|
||||
* ``PATCH`` versions: 2022.1.1, 2022.1.2
|
||||
|
||||
|
||||
|
||||
|
||||
.. only:: enterprise
|
||||
|
||||
ScyllaDB Enterprise Version Support Policy
|
||||
----------------------------------------------------
|
||||
|
||||
ScyllaDB Enterprise supports two latest ``MAJOR`` versions and two latest ``MINOR`` versions. They are referred to as LTS (long-term support) and feature releass, respectively.
|
||||
|
||||
**Example**
|
||||
|
||||
Let's assume that the following versions are available as of today:
|
||||
2021.1, 2022.1, 2022.2, 2022.3, 2022.4
|
||||
|
||||
The following versions would be supported:
|
||||
|
||||
* 2021.1 and 2022.1 - two latest ``MAJOR`` versions (LTS)
|
||||
* 2022.3 and 2022.4 - two latest ``MINOR`` versions (feature releases)
|
||||
|
||||
Version 2022.2 would not be supported.
|
||||
|
||||
|
||||
LTS vs. Feature Releases
|
||||
-----------------------------
|
||||
|
||||
Long-Term Support (LTS) - Major Versions:
|
||||
|
||||
* Released approximately once a year.
|
||||
|
||||
|
||||
Feature Releases - Minor Versions:
|
||||
|
||||
* 3-4 releases per year
|
||||
* Closely follow ScyllaDB Open Source releases (see `ScyllaDB Enterprise vs. Open Source Matrix <https://enterprise.docs.scylladb.com/stable/reference/versions-matrix-enterprise-oss.html>`_)
|
||||
* Introduce features added in ScyllaDB Open Source, as well as Enterprise-only premium features
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user