Compare commits
51 Commits
debug_form
...
scylla-5.1
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
40a1905a2d | ||
|
|
4e2c436222 | ||
|
|
68be369f93 | ||
|
|
0f7adb5f47 | ||
|
|
82dc8357ef | ||
|
|
12a58957e2 | ||
|
|
3423ad6e38 | ||
|
|
64001719fa | ||
|
|
cc3d368bc8 | ||
|
|
d957b0044b | ||
|
|
d4ed67bd47 | ||
|
|
0cd6341cae | ||
|
|
23d8852a82 | ||
|
|
88016de43e | ||
|
|
bdecf4318a | ||
|
|
72bf244ad1 | ||
|
|
ee82323599 | ||
|
|
2f78df92ab | ||
|
|
e2809674d2 | ||
|
|
0295d0c5c8 | ||
|
|
fa94222662 | ||
|
|
dff7f3c5ba | ||
|
|
3723713130 | ||
|
|
03f8411e38 | ||
|
|
0e391d67d1 | ||
|
|
f76989285e | ||
|
|
9deeeb4db1 | ||
|
|
1f3196735f | ||
|
|
abb6817261 | ||
|
|
d3fd090429 | ||
|
|
3e7c57d162 | ||
|
|
f878a34da3 | ||
|
|
eaded57b2e | ||
|
|
25d2da08d1 | ||
|
|
9b1a570f6f | ||
|
|
426d045249 | ||
|
|
86dbbf12cc | ||
|
|
b05903eddd | ||
|
|
26ead53304 | ||
|
|
f60bab9471 | ||
|
|
66f34245fc | ||
|
|
4047528bd9 | ||
|
|
1a82c61452 | ||
|
|
3d9800eb1c | ||
|
|
c48e9b47dd | ||
|
|
2eadaad9f7 | ||
|
|
d10aee15e7 | ||
|
|
9e017cb1e6 | ||
|
|
b8504cc9b2 | ||
|
|
856703a85e | ||
|
|
86a6c1fb2b |
2
.gitmodules
vendored
2
.gitmodules
vendored
@@ -1,6 +1,6 @@
|
||||
[submodule "seastar"]
|
||||
path = seastar
|
||||
url = ../seastar
|
||||
url = ../scylla-seastar
|
||||
ignore = dirty
|
||||
[submodule "swagger-ui"]
|
||||
path = swagger-ui
|
||||
|
||||
@@ -60,7 +60,7 @@ fi
|
||||
|
||||
# Default scylla product/version tags
|
||||
PRODUCT=scylla
|
||||
VERSION=5.1.0-dev
|
||||
VERSION=5.1.0-rc5
|
||||
|
||||
if test -f version
|
||||
then
|
||||
|
||||
@@ -438,6 +438,11 @@ future<executor::request_return_type> executor::describe_table(client_state& cli
|
||||
rjson::add(table_description, "BillingModeSummary", rjson::empty_object());
|
||||
rjson::add(table_description["BillingModeSummary"], "BillingMode", "PAY_PER_REQUEST");
|
||||
rjson::add(table_description["BillingModeSummary"], "LastUpdateToPayPerRequestDateTime", rjson::value(creation_date_seconds));
|
||||
// In PAY_PER_REQUEST billing mode, provisioned capacity should return 0
|
||||
rjson::add(table_description, "ProvisionedThroughput", rjson::empty_object());
|
||||
rjson::add(table_description["ProvisionedThroughput"], "ReadCapacityUnits", 0);
|
||||
rjson::add(table_description["ProvisionedThroughput"], "WriteCapacityUnits", 0);
|
||||
rjson::add(table_description["ProvisionedThroughput"], "NumberOfDecreasesToday", 0);
|
||||
|
||||
std::unordered_map<std::string,std::string> key_attribute_types;
|
||||
// Add base table's KeySchema and collect types for AttributeDefinitions:
|
||||
@@ -460,6 +465,11 @@ future<executor::request_return_type> executor::describe_table(client_state& cli
|
||||
rjson::add(view_entry, "IndexArn", generate_arn_for_index(*schema, index_name));
|
||||
// Add indexes's KeySchema and collect types for AttributeDefinitions:
|
||||
describe_key_schema(view_entry, *vptr, key_attribute_types);
|
||||
// Add projection type
|
||||
rjson::value projection = rjson::empty_object();
|
||||
rjson::add(projection, "ProjectionType", "ALL");
|
||||
// FIXME: we have to get ProjectionType from the schema when it is added
|
||||
rjson::add(view_entry, "Projection", std::move(projection));
|
||||
// Local secondary indexes are marked by an extra '!' sign occurring before the ':' delimiter
|
||||
rjson::value& index_array = (delim_it > 1 && cf_name[delim_it-1] == '!') ? lsi_array : gsi_array;
|
||||
rjson::push_back(index_array, std::move(view_entry));
|
||||
|
||||
@@ -80,8 +80,10 @@ struct compaction_data {
|
||||
}
|
||||
|
||||
void stop(sstring reason) {
|
||||
stop_requested = std::move(reason);
|
||||
abort.request_abort();
|
||||
if (!abort.abort_requested()) {
|
||||
stop_requested = std::move(reason);
|
||||
abort.request_abort();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -842,6 +842,20 @@ future<> compaction_manager::really_do_stop() {
|
||||
cmlog.info("Stopped");
|
||||
}
|
||||
|
||||
template <typename Ex>
|
||||
requires std::is_base_of_v<std::exception, Ex> &&
|
||||
requires (const Ex& ex) {
|
||||
{ ex.code() } noexcept -> std::same_as<const std::error_code&>;
|
||||
}
|
||||
auto swallow_enospc(const Ex& ex) noexcept {
|
||||
if (ex.code().value() != ENOSPC) {
|
||||
return make_exception_future<>(std::make_exception_ptr(ex));
|
||||
}
|
||||
|
||||
cmlog.warn("Got ENOSPC on stop, ignoring...");
|
||||
return make_ready_future<>();
|
||||
}
|
||||
|
||||
void compaction_manager::do_stop() noexcept {
|
||||
if (_state == state::none || _state == state::stopped) {
|
||||
return;
|
||||
@@ -849,7 +863,10 @@ void compaction_manager::do_stop() noexcept {
|
||||
|
||||
try {
|
||||
_state = state::stopped;
|
||||
_stop_future = really_do_stop();
|
||||
_stop_future = really_do_stop()
|
||||
.handle_exception_type([] (const std::system_error& ex) { return swallow_enospc(ex); })
|
||||
.handle_exception_type([] (const storage_io_error& ex) { return swallow_enospc(ex); })
|
||||
;
|
||||
} catch (...) {
|
||||
cmlog.error("Failed to stop the manager: {}", std::current_exception());
|
||||
}
|
||||
@@ -1050,7 +1067,7 @@ public:
|
||||
bool performed() const noexcept {
|
||||
return _performed;
|
||||
}
|
||||
|
||||
private:
|
||||
future<> run_offstrategy_compaction(sstables::compaction_data& cdata) {
|
||||
// This procedure will reshape sstables in maintenance set until it's ready for
|
||||
// integration into main set.
|
||||
@@ -1083,6 +1100,7 @@ public:
|
||||
return desc.sstables.size() ? std::make_optional(std::move(desc)) : std::nullopt;
|
||||
};
|
||||
|
||||
std::exception_ptr err;
|
||||
while (auto desc = get_next_job()) {
|
||||
desc->creator = [this, &new_unused_sstables, &t] (shard_id dummy) {
|
||||
auto sst = t.make_sstable();
|
||||
@@ -1091,7 +1109,16 @@ public:
|
||||
};
|
||||
auto input = boost::copy_range<std::unordered_set<sstables::shared_sstable>>(desc->sstables);
|
||||
|
||||
auto ret = co_await sstables::compact_sstables(std::move(*desc), cdata, t);
|
||||
sstables::compaction_result ret;
|
||||
try {
|
||||
ret = co_await sstables::compact_sstables(std::move(*desc), cdata, t);
|
||||
} catch (sstables::compaction_stopped_exception&) {
|
||||
// If off-strategy compaction stopped on user request, let's not discard the partial work.
|
||||
// Therefore, both un-reshaped and reshaped data will be integrated into main set, allowing
|
||||
// regular compaction to continue from where off-strategy left off.
|
||||
err = std::current_exception();
|
||||
break;
|
||||
}
|
||||
_performed = true;
|
||||
|
||||
// update list of reshape candidates without input but with output added to it
|
||||
@@ -1128,6 +1155,9 @@ public:
|
||||
for (auto& sst : sstables_to_remove) {
|
||||
sst->mark_for_deletion();
|
||||
}
|
||||
if (err) {
|
||||
co_await coroutine::return_exception_ptr(std::move(err));
|
||||
}
|
||||
}
|
||||
protected:
|
||||
virtual future<compaction_stats_opt> do_run() override {
|
||||
|
||||
@@ -1396,7 +1396,7 @@ serviceLevelOrRoleName returns [sstring name]
|
||||
std::transform($name.begin(), $name.end(), $name.begin(), ::tolower); }
|
||||
| t=STRING_LITERAL { $name = sstring($t.text); }
|
||||
| t=QUOTED_NAME { $name = sstring($t.text); }
|
||||
| k=unreserved_keyword { $name = sstring($t.text);
|
||||
| k=unreserved_keyword { $name = k;
|
||||
std::transform($name.begin(), $name.end(), $name.begin(), ::tolower);}
|
||||
| QMARK {add_recognition_error("Bind variables cannot be used for service levels or role names");}
|
||||
;
|
||||
|
||||
@@ -20,6 +20,7 @@
|
||||
#include "tombstone_gc.hh"
|
||||
#include "db/per_partition_rate_limit_extension.hh"
|
||||
#include "db/per_partition_rate_limit_options.hh"
|
||||
#include "utils/bloom_calculations.hh"
|
||||
|
||||
#include <boost/algorithm/string/predicate.hpp>
|
||||
|
||||
@@ -152,6 +153,16 @@ void cf_prop_defs::validate(const data_dictionary::database db, sstring ks_name,
|
||||
throw exceptions::configuration_exception(KW_MAX_INDEX_INTERVAL + " must be greater than " + KW_MIN_INDEX_INTERVAL);
|
||||
}
|
||||
|
||||
if (get_simple(KW_BF_FP_CHANCE)) {
|
||||
double bloom_filter_fp_chance = get_double(KW_BF_FP_CHANCE, 0/*not used*/);
|
||||
double min_bloom_filter_fp_chance = utils::bloom_calculations::min_supported_bloom_filter_fp_chance();
|
||||
if (bloom_filter_fp_chance <= min_bloom_filter_fp_chance || bloom_filter_fp_chance > 1.0) {
|
||||
throw exceptions::configuration_exception(format(
|
||||
"{} must be larger than {} and less than or equal to 1.0 (got {})",
|
||||
KW_BF_FP_CHANCE, min_bloom_filter_fp_chance, bloom_filter_fp_chance));
|
||||
}
|
||||
}
|
||||
|
||||
speculative_retry::from_sstring(get_string(KW_SPECULATIVE_RETRY, speculative_retry(speculative_retry::type::NONE, 0).to_sstring()));
|
||||
}
|
||||
|
||||
|
||||
@@ -2031,7 +2031,7 @@ future<> db::commitlog::segment_manager::shutdown() {
|
||||
}
|
||||
}
|
||||
co_await _shutdown_promise->get_shared_future();
|
||||
clogger.info("Commitlog shutdown complete");
|
||||
clogger.debug("Commitlog shutdown complete");
|
||||
}
|
||||
|
||||
void db::commitlog::segment_manager::add_file_to_dispose(named_file f, dispose_mode mode) {
|
||||
|
||||
@@ -899,6 +899,8 @@ db::config::config(std::shared_ptr<db::extensions> exts)
|
||||
"Ignore truncation record stored in system tables as if tables were never truncated.")
|
||||
, force_schema_commit_log(this, "force_schema_commit_log", value_status::Used, false,
|
||||
"Use separate schema commit log unconditionally rater than after restart following discovery of cluster-wide support for it.")
|
||||
, cache_index_pages(this, "cache_index_pages", liveness::LiveUpdate, value_status::Used, true,
|
||||
"Keep SSTable index pages in the global cache after a SSTable read. Expected to improve performance for workloads with big partitions, but may degrade performance for workloads with small partitions.")
|
||||
, default_log_level(this, "default_log_level", value_status::Used)
|
||||
, logger_log_level(this, "logger_log_level", value_status::Used)
|
||||
, log_to_stdout(this, "log_to_stdout", value_status::Used)
|
||||
|
||||
@@ -379,6 +379,8 @@ public:
|
||||
named_value<bool> ignore_truncation_record;
|
||||
named_value<bool> force_schema_commit_log;
|
||||
|
||||
named_value<bool> cache_index_pages;
|
||||
|
||||
seastar::logging_settings logging_settings(const log_cli::options&) const;
|
||||
|
||||
const db::extensions& extensions() const;
|
||||
|
||||
@@ -868,13 +868,18 @@ void view_updates::generate_update(
|
||||
bool same_row = true;
|
||||
for (auto col_id : col_ids) {
|
||||
auto* after = update.cells().find_cell(col_id);
|
||||
// Note: multi-cell columns can't be part of the primary key.
|
||||
auto& cdef = _base->regular_column_at(col_id);
|
||||
if (existing) {
|
||||
auto* before = existing->cells().find_cell(col_id);
|
||||
// Note that this cell is necessarily atomic, because col_ids are
|
||||
// view key columns, and keys must be atomic.
|
||||
if (before && before->as_atomic_cell(cdef).is_live()) {
|
||||
if (after && after->as_atomic_cell(cdef).is_live()) {
|
||||
auto cmp = compare_atomic_cell_for_merge(before->as_atomic_cell(cdef), after->as_atomic_cell(cdef));
|
||||
// We need to compare just the values of the keys, not
|
||||
// metadata like the timestamp. This is because below,
|
||||
// if the old and new view row have the same key, we need
|
||||
// to be sure to reach the update_entry() case.
|
||||
auto cmp = compare_unsigned(before->as_atomic_cell(cdef).value(), after->as_atomic_cell(cdef).value());
|
||||
if (cmp != 0) {
|
||||
same_row = false;
|
||||
}
|
||||
@@ -894,7 +899,13 @@ void view_updates::generate_update(
|
||||
if (same_row) {
|
||||
update_entry(base_key, update, *existing, now);
|
||||
} else {
|
||||
replace_entry(base_key, update, *existing, now);
|
||||
// This code doesn't work if the old and new view row have the
|
||||
// same key, because if they do we get both data and tombstone
|
||||
// for the same timestamp (now) and the tombstone wins. This
|
||||
// is why we need the "same_row" case above - it's not just a
|
||||
// performance optimization.
|
||||
delete_old_entry(base_key, *existing, update, now);
|
||||
create_entry(base_key, update, now);
|
||||
}
|
||||
} else {
|
||||
delete_old_entry(base_key, *existing, update, now);
|
||||
@@ -2056,15 +2067,20 @@ public:
|
||||
// Called in the context of a seastar::thread.
|
||||
void view_builder::execute(build_step& step, exponential_backoff_retry r) {
|
||||
gc_clock::time_point now = gc_clock::now();
|
||||
auto consumer = compact_for_query_v2<view_builder::consumer>(
|
||||
auto compaction_state = make_lw_shared<compact_for_query_state_v2>(
|
||||
*step.reader.schema(),
|
||||
now,
|
||||
step.pslice,
|
||||
batch_size,
|
||||
query::max_partitions,
|
||||
view_builder::consumer{*this, step, now});
|
||||
consumer.consume_new_partition(step.current_key); // Initialize the state in case we're resuming a partition
|
||||
query::max_partitions);
|
||||
auto consumer = compact_for_query_v2<view_builder::consumer>(compaction_state, view_builder::consumer{*this, step, now});
|
||||
auto built = step.reader.consume_in_thread(std::move(consumer));
|
||||
if (auto ds = std::move(*compaction_state).detach_state()) {
|
||||
if (ds->current_tombstone) {
|
||||
step.reader.unpop_mutation_fragment(mutation_fragment_v2(*step.reader.schema(), step.reader.permit(), std::move(*ds->current_tombstone)));
|
||||
}
|
||||
step.reader.unpop_mutation_fragment(mutation_fragment_v2(*step.reader.schema(), step.reader.permit(), std::move(ds->partition_start)));
|
||||
}
|
||||
|
||||
_as.check();
|
||||
|
||||
|
||||
@@ -154,10 +154,7 @@ private:
|
||||
void delete_old_entry(const partition_key& base_key, const clustering_row& existing, const clustering_row& update, gc_clock::time_point now);
|
||||
void do_delete_old_entry(const partition_key& base_key, const clustering_row& existing, const clustering_row& update, gc_clock::time_point now);
|
||||
void update_entry(const partition_key& base_key, const clustering_row& update, const clustering_row& existing, gc_clock::time_point now);
|
||||
void replace_entry(const partition_key& base_key, const clustering_row& update, const clustering_row& existing, gc_clock::time_point now) {
|
||||
create_entry(base_key, update, now);
|
||||
delete_old_entry(base_key, existing, update, now);
|
||||
}
|
||||
void update_entry_for_computed_column(const partition_key& base_key, const clustering_row& update, const std::optional<clustering_row>& existing, gc_clock::time_point now);
|
||||
};
|
||||
|
||||
class view_update_builder {
|
||||
|
||||
@@ -1,6 +1,11 @@
|
||||
### a dictionary of redirections
|
||||
#old path: new path
|
||||
|
||||
# unifying the Ubunut upgrade guide for different Ubuntu versions: from 5.0 to 2022.1
|
||||
|
||||
/stable/upgrade/upgrade-to-enterprise/upgrade-guide-from-5.0-to-2022.1/upgrade-guide-from-5.0-to-2022.1-ubuntu-18-04.html: /stable/upgrade/upgrade-to-enterprise/upgrade-guide-from-5.0-to-2022.1/upgrade-guide-from-5.0-to-2022.1-ubuntu.html
|
||||
/stable/upgrade/upgrade-to-enterprise/upgrade-guide-from-5.0-to-2022.1/upgrade-guide-from-5.0-to-2022.1-ubuntu-20-04.html: /stable/upgrade/upgrade-to-enterprise/upgrade-guide-from-5.0-to-2022.1/upgrade-guide-from-5.0-to-2022.1-ubuntu.html
|
||||
|
||||
# removing the old Monitoring Stack documentation from the ScyllaDB docs
|
||||
|
||||
/stable/operating-scylla/monitoring/index.html: https://monitoring.docs.scylladb.com/stable/
|
||||
|
||||
@@ -1,20 +1,25 @@
|
||||
:term:`Sorted Strings Table (SSTable)<SSTable>` is the persistent file format used by Scylla and Apache Cassandra. SSTable is saved as a persistent, ordered, immutable set of files on disk.
|
||||
:term:`Sorted Strings Table (SSTable)<SSTable>` is the persistent file format used by ScyllaDB and Apache Cassandra. SSTable is saved as a persistent, ordered, immutable set of files on disk.
|
||||
Immutable means SSTables are never modified; they are created by a MemTable flush and are deleted by a compaction.
|
||||
The location of Scylla SSTables is specified in scylla.yaml ``data_file_directories`` parameter (default location: ``/var/lib/scylla/data``).
|
||||
The location of ScyllaDB SSTables is specified in scylla.yaml ``data_file_directories`` parameter (default location: ``/var/lib/scylla/data``).
|
||||
|
||||
SSTable 3.0 (mc format) is more efficient and requires less disk space than the SSTable 2.x. SSTable version support is as follows:
|
||||
SSTable 3.x is more efficient and requires less disk space than the SSTable 2.x.
|
||||
|
||||
SSTable Version Support
|
||||
------------------------
|
||||
|
||||
.. list-table::
|
||||
:widths: 33 33 33
|
||||
:header-rows: 1
|
||||
|
||||
* - SSTable Version
|
||||
- Scylla Enterprise Version
|
||||
- Scylla Open Source Version
|
||||
- ScyllaDB Enterprise Version
|
||||
- ScyllaDB Open Source Version
|
||||
* - 3.x ('me')
|
||||
- 2022.2
|
||||
- 5.1 and above
|
||||
* - 3.x ('md')
|
||||
- 2021.1
|
||||
- 4.3 and above
|
||||
- 4.3, 4.4, 4.5, 4.6, 5.0
|
||||
* - 3.0 ('mc')
|
||||
- 2019.1, 2020.1
|
||||
- 3.x, 4.1, 4.2
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
Scylla SSTable - 3.x
|
||||
====================
|
||||
ScyllaDB SSTable - 3.x
|
||||
=======================
|
||||
|
||||
.. toctree::
|
||||
:hidden:
|
||||
@@ -12,21 +12,24 @@ Scylla SSTable - 3.x
|
||||
|
||||
.. include:: ../_common/sstable_what_is.rst
|
||||
|
||||
* In Scylla 3.1 and above, mc format is enabled by default.
|
||||
* In ScyllaDB 5.1 and above, the ``me`` format is enabled by default.
|
||||
* In ScyllaDB 4.3 to 5.0, the ``md`` format is enabled by default.
|
||||
* In ScyllaDB 3.1 to 4.2, the ``mc`` format is enabled by default.
|
||||
* In ScyllaDB 3.0, the ``mc`` format is disabled by default. You can enable it by adding the ``enable_sstables_mc_format`` parameter set to ``true`` in the ``scylla.yaml`` file. For example:
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
enable_sstables_mc_format: true
|
||||
|
||||
* In Scylla 3.0, mc format is disabled by default and can be enabled by adding the ``enable_sstables_mc_format`` parameter as 'true' in ``scylla.yaml`` file.
|
||||
.. REMOVE IN FUTURE VERSIONS - Remove the note above in version 5.2.
|
||||
|
||||
For example:
|
||||
Additional Information
|
||||
-------------------------
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
enable_sstables_mc_format: true
|
||||
|
||||
|
||||
For more information on Scylla 3.x SSTable formats, see below:
|
||||
For more information on ScyllaDB 3.x SSTable formats, see below:
|
||||
|
||||
* :doc:`SSTable 3.0 Data File Format <sstables-3-data-file-format>`
|
||||
* :doc:`SSTable 3.0 Statistics <sstables-3-statistics>`
|
||||
* :doc:`SSTable 3.0 Summary <sstables-3-summary>`
|
||||
* :doc:`SSTable 3.0 Index <sstables-3-index>`
|
||||
* :doc:`SSTable 3.0 Format in Scylla <sstable-format>`
|
||||
* :doc:`SSTable 3.0 Format in ScyllaDB <sstable-format>`
|
||||
|
||||
@@ -28,8 +28,13 @@ Table of contents mc-1-big-TOC.txt
|
||||
|
||||
This document focuses on the data file format but also refers to other components in parts where information stored in them affects the way we read/write the data file.
|
||||
|
||||
Note that the file on-disk format applies both to the "mc" and "md" SSTable format versions.
|
||||
The "md" format only fixed the semantics of the (min|max)_clustering_key fields in the SSTable Statistics file, which are now valid for describing the accurate range of clustering prefixes present in the SSTable.
|
||||
Note that the file on-disk format applies to all "m*" SSTable format versions ("mc", "md", and "me").
|
||||
|
||||
* The "md" format only fixed the semantics of the ``(min|max)_clustering_key`` fields in the SSTable Statistics file,
|
||||
which are now valid for describing the accurate range of clustering prefixes present in the SSTable.
|
||||
* The "me" format added the ``host_id`` of the host writing the SStable to the SSTable Statistics file.
|
||||
It is used to qualify the commit log replay position that is also stored in the SSTable Statistics file.
|
||||
|
||||
See :doc:`SSTables 3.0 Statistics File Format </architecture/sstable/sstable3/sstables-3-statistics>` for more details.
|
||||
|
||||
Overview
|
||||
|
||||
@@ -175,6 +175,13 @@ Whole entry
|
||||
// It contains only one commit log position interval - [lower bound of commit log, upper bound of commit log].
|
||||
|
||||
array<be32<int32_t>, commit_log_interval> commit_log_intervals;
|
||||
|
||||
// Versions MC and MD of SSTable 3.x format end here.
|
||||
|
||||
// UUID of the host that wrote the SSTable.
|
||||
// Qualifies all commitlog positions in the SSTable Statistics file.
|
||||
|
||||
UUID host_id;
|
||||
}
|
||||
|
||||
using clustering_bound = array<be32<int32_t>, clustering_column>;
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# ScyllaDB CQL Extensions
|
||||
|
||||
Scylla extends the CQL language to provide a few extra features. This document
|
||||
ScyllaDB extends the CQL language to provide a few extra features. This document
|
||||
lists those extensions.
|
||||
|
||||
## BYPASS CACHE clause
|
||||
@@ -109,7 +109,7 @@ Storage options can be inspected by checking the new system schema table: `syste
|
||||
A special statement is dedicated for pruning ghost rows from materialized views.
|
||||
Ghost row is an inconsistency issue which manifests itself by having rows
|
||||
in a materialized view which do not correspond to any base table rows.
|
||||
Such inconsistencies should be prevented altogether and Scylla is striving to avoid
|
||||
Such inconsistencies should be prevented altogether and ScyllaDB is striving to avoid
|
||||
them, but *if* they happen, this statement can be used to restore a materialized view
|
||||
to a fully consistent state without rebuilding it from scratch.
|
||||
|
||||
@@ -133,21 +133,35 @@ token ranges.
|
||||
|
||||
## Synchronous materialized views
|
||||
|
||||
Materialized view updates can be applied synchronously (with errors propagated
|
||||
back to the user) or asynchronously, in the background. Historically, in order
|
||||
to use synchronous updates, the materialized view had to be local,
|
||||
which could be achieved e.g. by using the same partition key definition
|
||||
as the one present in the base table.
|
||||
Scylla also allows explicitly marking the view as synchronous, which forces
|
||||
all its view updates to be updated synchronously. Such views tend to reduce
|
||||
observed availability of the base table, because a base table write would only
|
||||
succeed if all synchronous view updates also succeed. On the other hand,
|
||||
failed view updates would be detected immediately, and appropriate action
|
||||
can be taken (e.g. pruning the materialized view, as mentioned in the paragraph
|
||||
above).
|
||||
Usually, when a table with materialized views is updated, the update to the
|
||||
views happens _asynchronously_, i.e., in the background. This means that
|
||||
the user cannot know when the view updates have all finished - or even be
|
||||
sure that they succeeded.
|
||||
|
||||
In order to mark a materialized view as synchronous, one can use the following
|
||||
syntax:
|
||||
However, there are circumstances where ScyllaDB does view updates
|
||||
_synchronously_ - i.e., the user's write returns only after the views
|
||||
were updated. This happens when the materialized-view replica is on the
|
||||
same node as the base-table replica. For example, if the base table and
|
||||
the view have the same partition key. Note that only ScyllaDB guarantees
|
||||
synchronous view updates in this case - they are asynchronous in Cassandra.
|
||||
|
||||
ScyllaDB also allows explicitly marking a view as synchronous. When a view
|
||||
is marked synchronous, base-table updates will wait for that view to be
|
||||
updated before returning. A base table may have multiple views marked
|
||||
synchronous, and will wait for all of them. The consistency level of a
|
||||
write applies to synchronous views as well as to the base table: For
|
||||
example, writing with QUORUM consistency level returns only after a
|
||||
quorum of the base-table replicas were updated *and* also a quorum of
|
||||
each synchronous view table was also updated.
|
||||
|
||||
Synchronous views tend to reduce the observed availability of the base table,
|
||||
because a base-table write would only succeed if enough synchronous view
|
||||
updates also succeed. On the other hand, failed view updates would be
|
||||
detected immediately, and appropriate action can be taken, such as retrying
|
||||
the write or pruning the materialized view (as mentioned in the previous
|
||||
section). This can improve the consistency of the base table with its views.
|
||||
|
||||
To create a new materialized view with synchronous updates, use:
|
||||
|
||||
```cql
|
||||
CREATE MATERIALIZED VIEW main.mv
|
||||
@@ -157,12 +171,18 @@ CREATE MATERIALIZED VIEW main.mv
|
||||
WITH synchronous_updates = true;
|
||||
```
|
||||
|
||||
To make an existing materialized view synchronous, use:
|
||||
|
||||
```cql
|
||||
ALTER MATERIALIZED VIEW main.mv WITH synchronous_updates = true;
|
||||
```
|
||||
|
||||
Synchronous updates can also be dynamically turned off by setting
|
||||
the value of `synchronous_updates` to `false`.
|
||||
To return a materialized view to the default behavior (which, as explained
|
||||
above, _usually_ means asynchronous updates), use:
|
||||
|
||||
```cql
|
||||
ALTER MATERIALIZED VIEW main.mv WITH synchronous_updates = false;
|
||||
```
|
||||
|
||||
### Synchronous global secondary indexes
|
||||
|
||||
@@ -261,7 +281,7 @@ that the rate of requests exceeds configured limit, the cluster will start
|
||||
rejecting some of them in order to bring the throughput back to the configured
|
||||
limit. Rejected requests are less costly which can help reduce overload.
|
||||
|
||||
_NOTE_: Due to Scylla's distributed nature, tracking per-partition request rates
|
||||
_NOTE_: Due to ScyllaDB's distributed nature, tracking per-partition request rates
|
||||
is not perfect and the actual rate of accepted requests may be higher up to
|
||||
a factor of keyspace's `RF`. This feature should not be used to enforce precise
|
||||
limits but rather serve as an overload protection feature.
|
||||
|
||||
@@ -99,11 +99,12 @@ alternatively, of the wildcard character (``*``) to select all the columns defin
|
||||
Selectors
|
||||
`````````
|
||||
|
||||
A :token:`selector` can be one of:
|
||||
A :token:`selector` can be one of the following:
|
||||
|
||||
- A column name of the table selected to retrieve the values for that column.
|
||||
- A casting, which allows you to convert a nested selector to a (compatible) type.
|
||||
- A function call, where the arguments are selector themselves.
|
||||
- A call to the :ref:`COUNT function <count-function>`, which counts all non-null results.
|
||||
|
||||
Aliases
|
||||
```````
|
||||
|
||||
@@ -21,7 +21,6 @@
|
||||
.. _cql-functions:
|
||||
|
||||
.. Need some intro for UDF and native functions in general and point those to it.
|
||||
.. _udfs:
|
||||
.. _native-functions:
|
||||
|
||||
Functions
|
||||
@@ -33,13 +32,15 @@ CQL supports two main categories of functions:
|
||||
- The :ref:`aggregate functions <aggregate-functions>`, which are used to aggregate multiple rows of results from a
|
||||
``SELECT`` statement.
|
||||
|
||||
.. In both cases, CQL provides a number of native "hard-coded" functions as well as the ability to create new user-defined
|
||||
.. functions.
|
||||
In both cases, CQL provides a number of native "hard-coded" functions as well as the ability to create new user-defined
|
||||
functions.
|
||||
|
||||
.. .. note:: By default, the use of user-defined functions is disabled by default for security concerns (even when
|
||||
.. enabled, the execution of user-defined functions is sandboxed and a "rogue" function should not be allowed to do
|
||||
.. evil, but no sandbox is perfect so using user-defined functions is opt-in). See the ``enable_user_defined_functions``
|
||||
.. in ``scylla.yaml`` to enable them.
|
||||
.. note:: Although user-defined functions are sandboxed, protecting the system from a "rogue" function, user-defined functions are disabled by default for extra security.
|
||||
See the ``enable_user_defined_functions`` in ``scylla.yaml`` to enable them.
|
||||
|
||||
Additionally, user-defined functions are still experimental and need to be explicitly enabled by adding ``udf`` to the list of
|
||||
``experimental_features`` configuration options in ``scylla.yaml``, or turning on the ``experimental`` flag.
|
||||
See :ref:`Enabling Experimental Features <yaml_enabling_experimental_features>` for details.
|
||||
|
||||
.. A function is identifier by its name:
|
||||
|
||||
@@ -60,11 +61,11 @@ Native functions
|
||||
Cast
|
||||
````
|
||||
|
||||
Supported starting from Scylla version 2.1
|
||||
Supported starting from ScyllaDB version 2.1
|
||||
|
||||
The ``cast`` function can be used to convert one native datatype to another.
|
||||
|
||||
The following table describes the conversions supported by the ``cast`` function. Scylla will silently ignore any cast converting a cast datatype into its own datatype.
|
||||
The following table describes the conversions supported by the ``cast`` function. ScyllaDB will silently ignore any cast converting a cast datatype into its own datatype.
|
||||
|
||||
=============== =======================================================================================================
|
||||
From To
|
||||
@@ -228,6 +229,65 @@ A number of functions are provided to “convert” the native types into binary
|
||||
takes a 64-bit ``blob`` argument and converts it to a ``bigint`` value. For example, ``bigintAsBlob(3)`` is
|
||||
``0x0000000000000003`` and ``blobAsBigint(0x0000000000000003)`` is ``3``.
|
||||
|
||||
.. _udfs:
|
||||
|
||||
User-defined functions :label-caution:`Experimental`
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
User-defined functions (UDFs) execute user-provided code in ScyllaDB. Supported languages are currently Lua and WebAssembly.
|
||||
|
||||
UDFs are part of the ScyllaDB schema and are automatically propagated to all nodes in the cluster.
|
||||
UDFs can be overloaded, so that multiple UDFs with different argument types can have the same function name, for example::
|
||||
|
||||
CREATE FUNCTION sample ( arg int ) ...;
|
||||
CREATE FUNCTION sample ( arg text ) ...;
|
||||
|
||||
When calling a user-defined function, arguments can be literals or terms. Prepared statement placeholders can be used, too.
|
||||
|
||||
CREATE FUNCTION statement
|
||||
`````````````````````````
|
||||
|
||||
Creating a new user-defined function uses the ``CREATE FUNCTION`` statement. For example::
|
||||
|
||||
CREATE OR REPLACE FUNCTION div(dividend double, divisor double)
|
||||
RETURNS NULL ON NULL INPUT
|
||||
RETURNS double
|
||||
LANGUAGE LUA
|
||||
AS 'return dividend/divisor;';
|
||||
|
||||
``CREATE FUNCTION`` with the optional ``OR REPLACE`` keywords creates either a function
|
||||
or replaces an existing one with the same signature. A ``CREATE FUNCTION`` without ``OR REPLACE``
|
||||
fails if a function with the same signature already exists. If the optional ``IF NOT EXISTS``
|
||||
keywords are used, the function will only be created only if another function with the same
|
||||
signature does not exist. ``OR REPLACE`` and ``IF NOT EXISTS`` cannot be used together.
|
||||
|
||||
Behavior for null input values must be defined for each function:
|
||||
|
||||
* ``RETURNS NULL ON NULL INPUT`` declares that the function will always return null (without being executed) if any of the input arguments is null.
|
||||
* ``CALLED ON NULL INPUT`` declares that the function will always be executed.
|
||||
|
||||
Function Signature
|
||||
``````````````````
|
||||
|
||||
Signatures are used to distinguish individual functions. The signature consists of a fully-qualified function name of the <keyspace>.<function_name> and a concatenated list of all the argument types.
|
||||
|
||||
Note that keyspace names, function names and argument types are subject to the default naming conventions and case-sensitivity rules.
|
||||
|
||||
Functions belong to a keyspace; if no keyspace is specified, the current keyspace is used. User-defined functions are not allowed in the system keyspaces.
|
||||
|
||||
DROP FUNCTION statement
|
||||
```````````````````````
|
||||
|
||||
Dropping a function uses the ``DROP FUNCTION`` statement. For example::
|
||||
|
||||
DROP FUNCTION myfunction;
|
||||
DROP FUNCTION mykeyspace.afunction;
|
||||
DROP FUNCTION afunction ( int );
|
||||
DROP FUNCTION afunction ( text );
|
||||
|
||||
You must specify the argument types of the function, the arguments_signature, in the drop command if there are multiple overloaded functions with the same name but different signatures.
|
||||
``DROP FUNCTION`` with the optional ``IF EXISTS`` keywords drops a function if it exists, but does not throw an error if it doesn’t.
|
||||
|
||||
.. _aggregate-functions:
|
||||
|
||||
Aggregate functions
|
||||
@@ -261,6 +321,10 @@ It also can be used to count the non-null value of a given column::
|
||||
|
||||
SELECT COUNT (scores) FROM plays;
|
||||
|
||||
.. note::
|
||||
Counting all rows in a table may be time-consuming and exceed the default timeout. In such a case,
|
||||
see :doc:`Counting all rows in a table is slow </kb/count-all-rows>` for instructions.
|
||||
|
||||
Max and Min
|
||||
```````````
|
||||
|
||||
@@ -286,6 +350,59 @@ instance::
|
||||
|
||||
.. _user-defined-aggregates-functions:
|
||||
|
||||
User-defined aggregates (UDAs) :label-caution:`Experimental`
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
User-defined aggregates allow the creation of custom aggregate functions. User-defined aggregates can be used in SELECT statement.
|
||||
|
||||
Each aggregate requires an initial state of type ``STYPE`` defined with the ``INITCOND`` value (default value: ``null``). The first argument of the state function must have type STYPE. The remaining arguments of the state function must match the types of the user-defined aggregate arguments. The state function is called once for each row, and the value returned by the state function becomes the new state. After all rows are processed, the optional FINALFUNC is executed with the last state value as its argument.
|
||||
|
||||
The ``STYPE`` value is mandatory in order to distinguish possibly overloaded versions of the state and/or final function, since the overload can appear after creation of the aggregate.
|
||||
|
||||
A complete working example for user-defined aggregates (assuming that a keyspace has been selected using the ``USE`` statement)::
|
||||
|
||||
CREATE FUNCTION accumulate_len(acc tuple<bigint,bigint>, a text)
|
||||
RETURNS NULL ON NULL INPUT
|
||||
RETURNS tuple<bigint,bigint>
|
||||
LANGUAGE lua as 'return {acc[1] + 1, acc[2] + #a}';
|
||||
|
||||
CREATE OR REPLACE FUNCTION present(res tuple<bigint,bigint>)
|
||||
RETURNS NULL ON NULL INPUT
|
||||
RETURNS text
|
||||
LANGUAGE lua as
|
||||
'return "The average string length is " .. res[2]/res[1] .. "!"';
|
||||
|
||||
CREATE OR REPLACE AGGREGATE avg_length(text)
|
||||
SFUNC accumulate_len
|
||||
STYPE tuple<bigint,bigint>
|
||||
FINALFUNC present
|
||||
INITCOND (0,0);
|
||||
|
||||
CREATE AGGREGATE statement
|
||||
``````````````````````````
|
||||
|
||||
The ``CREATE AGGREGATE`` command with the optional ``OR REPLACE`` keywords creates either an aggregate or replaces an existing one with the same signature. A ``CREATE AGGREGATE`` without ``OR REPLACE`` fails if an aggregate with the same signature already exists. The ``CREATE AGGREGATE`` command with the optional ``IF NOT EXISTS`` keywords creates an aggregate if it does not already exist. The ``OR REPLACE`` and ``IF NOT EXISTS`` phrases cannot be used together.
|
||||
|
||||
The ``STYPE`` value defines the type of the state value and must be specified. The optional ``INITCOND`` defines the initial state value for the aggregate; the default value is null. A non-null ``INITCOND`` must be specified for state functions that are declared with ``RETURNS NULL ON NULL INPUT``.
|
||||
|
||||
The ``SFUNC`` value references an existing function to use as the state-modifying function. The first argument of the state function must have type ``STYPE``. The remaining arguments of the state function must match the types of the user-defined aggregate arguments. The state function is called once for each row, and the value returned by the state function becomes the new state. State is not updated for state functions declared with ``RETURNS NULL ON NULL INPUT`` and called with null. After all rows are processed, the optional ``FINALFUNC`` is executed with last state value as its argument. It must take only one argument with type ``STYPE``, but the return type of the ``FINALFUNC`` may be a different type. A final function declared with ``RETURNS NULL ON NULL INPUT`` means that the aggregate’s return value will be null, if the last state is null.
|
||||
|
||||
If no ``FINALFUNC`` is defined, the overall return type of the aggregate function is ``STYPE``. If a ``FINALFUNC`` is defined, it is the return type of that function.
|
||||
|
||||
DROP AGGREGATE statement
|
||||
````````````````````````
|
||||
|
||||
Dropping an user-defined aggregate function uses the DROP AGGREGATE statement. For example::
|
||||
|
||||
DROP AGGREGATE myAggregate;
|
||||
DROP AGGREGATE myKeyspace.anAggregate;
|
||||
DROP AGGREGATE someAggregate ( int );
|
||||
DROP AGGREGATE someAggregate ( text );
|
||||
|
||||
The ``DROP AGGREGATE`` statement removes an aggregate created using ``CREATE AGGREGATE``. You must specify the argument types of the aggregate to drop if there are multiple overloaded aggregates with the same name but a different signature.
|
||||
|
||||
The ``DROP AGGREGATE`` command with the optional ``IF EXISTS`` keywords drops an aggregate if it exists, and does nothing if a function with the signature does not exist.
|
||||
|
||||
.. include:: /rst_include/apache-cql-return-index.rst
|
||||
|
||||
.. include:: /rst_include/apache-copyrights.rst
|
||||
.. include:: /rst_include/apache-copyrights.rst
|
||||
@@ -25,29 +25,31 @@
|
||||
|
||||
<div class="grid-x grid-margin-x hs">
|
||||
|
||||
.. topic-box::
|
||||
:title: New to ScyllaDB? Start here!
|
||||
:link: https://cloud.docs.scylladb.com/stable/scylladb-basics/
|
||||
:class: large-4
|
||||
:anchor: ScyllaDB Basics
|
||||
|
||||
Learn the essentials of ScyllaDB.
|
||||
|
||||
|
||||
.. topic-box::
|
||||
:title: Let us manage your DB
|
||||
:link: https://cloud.docs.scylladb.com
|
||||
:class: large-4
|
||||
:anchor: Get Started with Scylla Cloud
|
||||
:anchor: ScyllaDB Cloud Documentation
|
||||
|
||||
Take advantage of Scylla Cloud, a fully-managed database-as-a-service.
|
||||
Simplify application development with ScyllaDB Cloud - a fully managed database-as-a-service.
|
||||
|
||||
.. topic-box::
|
||||
:title: Manage your own DB
|
||||
:link: getting-started
|
||||
:class: large-4
|
||||
:anchor: Get Started with Scylla
|
||||
:anchor: ScyllaDB Open Source and Enterprise Documentation
|
||||
|
||||
Provision and manage a Scylla cluster in your environment.
|
||||
Deploy and manage your database in your own environment.
|
||||
|
||||
.. topic-box::
|
||||
:title: Connect your application to Scylla
|
||||
:link: using-scylla/drivers
|
||||
:class: large-4
|
||||
:anchor: Choose a Driver
|
||||
|
||||
Use high performance Scylla drivers to connect your application to a Scylla cluster.
|
||||
|
||||
.. raw:: html
|
||||
|
||||
@@ -57,14 +59,13 @@
|
||||
|
||||
<div class="topics-grid topics-grid--products">
|
||||
|
||||
<h2 class="topics-grid__title">Our Product List</h2>
|
||||
<p class="topics-grid__text">To begin choose a product from the list below</p>
|
||||
<h2 class="topics-grid__title">Our Products</h2>
|
||||
|
||||
<div class="grid-container full">
|
||||
<div class="grid-x grid-margin-x">
|
||||
|
||||
.. topic-box::
|
||||
:title: Scylla Enterprise
|
||||
:title: ScyllaDB Enterprise
|
||||
:link: getting-started
|
||||
:image: /_static/img/mascots/scylla-enterprise.svg
|
||||
:class: topic-box--product,large-3,small-6
|
||||
@@ -72,7 +73,7 @@
|
||||
ScyllaDB’s most stable high-performance enterprise-grade NoSQL database.
|
||||
|
||||
.. topic-box::
|
||||
:title: Scylla Open Source
|
||||
:title: ScyllaDB Open Source
|
||||
:link: getting-started
|
||||
:image: /_static/img/mascots/scylla-opensource.svg
|
||||
:class: topic-box--product,large-3,small-6
|
||||
@@ -80,15 +81,15 @@
|
||||
A high-performance NoSQL database with a close-to-the-hardware, shared-nothing approach.
|
||||
|
||||
.. topic-box::
|
||||
:title: Scylla Cloud
|
||||
:title: ScyllaDB Cloud
|
||||
:link: https://cloud.docs.scylladb.com
|
||||
:image: /_static/img/mascots/scylla-cloud.svg
|
||||
:class: topic-box--product,large-3,small-6
|
||||
|
||||
A fully managed NoSQL database as a service powered by Scylla Enterprise.
|
||||
A fully managed NoSQL database as a service powered by ScyllaDB Enterprise.
|
||||
|
||||
.. topic-box::
|
||||
:title: Scylla Alternator
|
||||
:title: ScyllaDB Alternator
|
||||
:link: https://docs.scylladb.com/stable/alternator/alternator.html
|
||||
:image: /_static/img/mascots/scylla-alternator.svg
|
||||
:class: topic-box--product,large-3,small-6
|
||||
@@ -96,23 +97,23 @@
|
||||
Open source Amazon DynamoDB-compatible API.
|
||||
|
||||
.. topic-box::
|
||||
:title: Scylla Monitoring Stack
|
||||
:title: ScyllaDB Monitoring Stack
|
||||
:link: https://monitoring.docs.scylladb.com
|
||||
:image: /_static/img/mascots/scylla-monitor.svg
|
||||
:class: topic-box--product,large-3,small-6
|
||||
|
||||
Complete open source monitoring solution for your Scylla clusters.
|
||||
Complete open source monitoring solution for your ScyllaDB clusters.
|
||||
|
||||
.. topic-box::
|
||||
:title: Scylla Manager
|
||||
:title: ScyllaDB Manager
|
||||
:link: https://manager.docs.scylladb.com
|
||||
:image: /_static/img/mascots/scylla-manager.svg
|
||||
:class: topic-box--product,large-3,small-6
|
||||
|
||||
Hassle-free Scylla NoSQL database management for scale-out clusters.
|
||||
Hassle-free ScyllaDB NoSQL database management for scale-out clusters.
|
||||
|
||||
.. topic-box::
|
||||
:title: Scylla Drivers
|
||||
:title: ScyllaDB Drivers
|
||||
:link: https://docs.scylladb.com/stable/using-scylla/drivers/
|
||||
:image: /_static/img/mascots/scylla-drivers.svg
|
||||
:class: topic-box--product,large-3,small-6
|
||||
@@ -120,12 +121,12 @@
|
||||
Shard-aware drivers for superior performance.
|
||||
|
||||
.. topic-box::
|
||||
:title: Scylla Operator
|
||||
:title: ScyllaDB Operator
|
||||
:link: https://operator.docs.scylladb.com
|
||||
:image: /_static/img/mascots/scylla-enterprise.svg
|
||||
:class: topic-box--product,large-3,small-6
|
||||
|
||||
Easily run and manage your Scylla Cluster on Kubernetes.
|
||||
Easily run and manage your ScyllaDB cluster on Kubernetes.
|
||||
|
||||
.. raw:: html
|
||||
|
||||
@@ -135,19 +136,19 @@
|
||||
|
||||
<div class="topics-grid">
|
||||
|
||||
<h2 class="topics-grid__title">Learn More About Scylla</h2>
|
||||
<h2 class="topics-grid__title">Learn More About ScyllaDB</h2>
|
||||
<p class="topics-grid__text"></p>
|
||||
<div class="grid-container full">
|
||||
<div class="grid-x grid-margin-x">
|
||||
|
||||
.. topic-box::
|
||||
:title: Attend Scylla University
|
||||
:title: Attend ScyllaDB University
|
||||
:link: https://university.scylladb.com/
|
||||
:image: /_static/img/mascots/scylla-university.png
|
||||
:class: large-6,small-12
|
||||
:anchor: Find a Class
|
||||
|
||||
| Register to take a *free* class at Scylla University.
|
||||
| Register to take a *free* class at ScyllaDB University.
|
||||
| There are several learning paths to choose from.
|
||||
|
||||
.. topic-box::
|
||||
@@ -178,9 +179,9 @@
|
||||
architecture/index
|
||||
troubleshooting/index
|
||||
kb/index
|
||||
Scylla University <https://university.scylladb.com/>
|
||||
ScyllaDB University <https://university.scylladb.com/>
|
||||
faq
|
||||
Contribute to Scylla <contribute>
|
||||
Contribute to ScyllaDB <contribute>
|
||||
glossary
|
||||
alternator/alternator
|
||||
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
Counting all rows in a table is slow
|
||||
====================================
|
||||
|
||||
**Audience: Scylla users**
|
||||
**Audience: ScyllaDB users**
|
||||
|
||||
Trying to count all rows in a table using
|
||||
|
||||
@@ -10,14 +10,21 @@ Trying to count all rows in a table using
|
||||
|
||||
SELECT COUNT(1) FROM ks.table;
|
||||
|
||||
often fails with **ReadTimeout** error.
|
||||
may fail with the **ReadTimeout** error.
|
||||
|
||||
COUNT() is running a full-scan query on all nodes, which might take a long time to finish. Often the time is greater than Scylla query timeout.
|
||||
One way to bypass this in Scylla 4.4 or later is increasing the timeout for this query using the :ref:`USING TIMEOUT <using-timeout>` directive, for example:
|
||||
COUNT() runs a full-scan query on all nodes, which might take a long time to finish. As a result, the count time may be greater than the ScyllaDB query timeout.
|
||||
One way to prevent that issue in Scylla 4.4 or later is to increase the timeout for the query using the :ref:`USING TIMEOUT <using-timeout>` directive, for example:
|
||||
|
||||
|
||||
.. code-block:: cql
|
||||
|
||||
SELECT COUNT(1) FROM ks.table USING TIMEOUT 120s;
|
||||
|
||||
You can also get an *estimation* of the number **of partitions** (not rows) with :doc:`nodetool tablestats </operating-scylla/nodetool-commands/tablestats>`
|
||||
You can also get an *estimation* of the number **of partitions** (not rows) with :doc:`nodetool tablestats </operating-scylla/nodetool-commands/tablestats>`.
|
||||
|
||||
.. note::
|
||||
ScyllaDB 5.1 includes improvements to speed up the execution of SELECT COUNT(*) queries.
|
||||
To increase the count speed, we recommend upgrading to ScyllaDB 5.1 or later.
|
||||
|
||||
|
||||
.. REMOVE IN FUTURE VERSIONS - Remove the note above in version 5.1.
|
||||
|
||||
@@ -42,7 +42,7 @@ Steps:
|
||||
|
||||
.. code-block:: sh
|
||||
|
||||
nodetool compact <keyspace>.<mytable>;
|
||||
nodetool compact <keyspace> <mytable>;
|
||||
|
||||
5. Alter the table and change the grace period back to the original ``gc_grace_seconds`` value.
|
||||
|
||||
|
||||
@@ -3,8 +3,8 @@
|
||||
* :doc:`REST - Scylla REST/HTTP Admin API</operating-scylla/rest>`.
|
||||
* :doc:`Tracing </using-scylla/tracing>` - a ScyllaDB tool for debugging and analyzing internal flows in the server.
|
||||
* :doc:`SSTableloader </operating-scylla/admin-tools/sstableloader>` - Bulk load the sstables found in the directory to a Scylla cluster
|
||||
* :doc:`scylla-sstable </operating-scylla/admin-tools/scylla-sstable>` - Validates and dumps the content of SStables, generates a histogram, dumps the content of the SStable index.
|
||||
* :doc:`scylla-types </operating-scylla/admin-tools/scylla-types/>` - Examines raw values obtained from SStables, logs, coredumps, etc.
|
||||
* :doc:`Scylla SStable </operating-scylla/admin-tools/scylla-sstable>` - Validates and dumps the content of SStables, generates a histogram, dumps the content of the SStable index.
|
||||
* :doc:`Scylla Types </operating-scylla/admin-tools/scylla-types/>` - Examines raw values obtained from SStables, logs, coredumps, etc.
|
||||
* :doc:`cassandra-stress </operating-scylla/admin-tools/cassandra-stress/>` A tool for benchmarking and load testing a Scylla and Cassandra clusters.
|
||||
* :doc:`SSTabledump - Scylla 3.0, Scylla Enterprise 2019.1 and newer versions </operating-scylla/admin-tools/sstabledump>`
|
||||
* :doc:`SSTable2JSON - Scylla 2.3 and older </operating-scylla/admin-tools/sstable2json>`
|
||||
|
||||
@@ -9,8 +9,8 @@ Admin Tools
|
||||
CQLSh </cql/cqlsh>
|
||||
REST </operating-scylla/rest>
|
||||
Tracing </using-scylla/tracing>
|
||||
scylla-sstable
|
||||
scylla-types </operating-scylla/admin-tools/scylla-types/>
|
||||
Scylla SStable </operating-scylla/admin-tools/scylla-sstable/>
|
||||
Scylla Types </operating-scylla/admin-tools/scylla-types/>
|
||||
sstableloader
|
||||
cassandra-stress </operating-scylla/admin-tools/cassandra-stress/>
|
||||
sstabledump
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
scylla-sstable
|
||||
Scylla SStable
|
||||
==============
|
||||
|
||||
.. versionadded:: 5.0
|
||||
@@ -9,7 +9,17 @@ Introduction
|
||||
This tool allows you to examine the content of SStables by performing operations such as dumping the content of SStables,
|
||||
generating a histogram, validating the content of SStables, and more. See `Supported Operations`_ for the list of available operations.
|
||||
|
||||
Run ``scylla-sstable --help`` for additional information about the tool and the operations.
|
||||
Run ``scylla sstable --help`` for additional information about the tool and the operations.
|
||||
|
||||
This tool is similar to SStableDump_, with notable differences:
|
||||
|
||||
* Built on the ScyllaDB C++ codebase, it supports all SStable formats and components that ScyllaDB supports.
|
||||
* Expanded scope: this tool supports much more than dumping SStable data components (see `Supported Operations`_).
|
||||
* More flexible on how schema is obtained and where SStables are located: SStableDump_ only supports dumping SStables located in their native data directory. To dump an SStable, one has to clone the entire ScyllaDB data directory tree, including system table directories and even config files. ``scylla sstable`` can dump sstables from any path with multiple choices on how to obtain the schema, see Schema_.
|
||||
|
||||
Currently, SStableDump_ works better on production systems as it automatically loads the schema from the system tables, unlike ``scylla sstable``, which has to be provided with the schema explicitly. On the other hand ``scylla sstable`` works better for off-line investigations, as it can be used with as little as just a schema definition file and a single sstable. In the future we plan on closing this gap -- adding support for automatic schema-loading for ``scylla sstable`` too -- and completely supplant SStableDump_ with ``scylla sstable``.
|
||||
|
||||
.. _SStableDump: /operating-scylla/admin-tools/sstabledump
|
||||
|
||||
Usage
|
||||
------
|
||||
@@ -21,11 +31,82 @@ The command syntax is as follows:
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
scylla-sstable <operation> <path to SStable>
|
||||
scylla sstable <operation> <path to SStable>
|
||||
|
||||
|
||||
You can specify more than one SStable.
|
||||
|
||||
Schema
|
||||
^^^^^^
|
||||
All operations need a schema to interpret the SStables with.
|
||||
Currently, there are two ways to obtain the schema:
|
||||
|
||||
* ``--schema-file FILENAME`` - Read the schema definition from a file.
|
||||
* ``--system-schema KEYSPACE.TABLE`` - Use the known definition of built-in tables (only works for system tables).
|
||||
|
||||
By default, the tool uses the first method: ``--schema-file schema.cql``; i.e. it assumes there is a schema file named ``schema.cql`` in the working directory.
|
||||
If this fails, it will exit with an error.
|
||||
|
||||
The schema file should contain all definitions needed to interpret data belonging to the table.
|
||||
|
||||
Example ``schema.cql``:
|
||||
|
||||
.. code-block:: cql
|
||||
|
||||
CREATE KEYSPACE ks WITH replication = {'class': 'NetworkTopologyStrategy', 'mydc1': 1, 'mydc2': 4};
|
||||
|
||||
CREATE TYPE ks.mytype (
|
||||
f1 int,
|
||||
f2 text
|
||||
);
|
||||
|
||||
CREATE TABLE ks.cf (
|
||||
pk int,
|
||||
ck text,
|
||||
v1 int,
|
||||
v2 mytype,
|
||||
PRIMARY KEY (pk, ck)
|
||||
);
|
||||
|
||||
Note:
|
||||
|
||||
* In addition to the table itself, the definition also has to includes any user defined types the table uses.
|
||||
* The keyspace definition is optional, if missing one will be auto-generated.
|
||||
* The schema file doesn't have to be called ``schema.cql``, this is just the default name. Any file name is supported (with any extension).
|
||||
|
||||
Dropped columns
|
||||
***************
|
||||
|
||||
The examined sstable might have columns which were dropped from the schema definition. In this case providing the up-do-date schema will not be enough, the tool will fail when attempting to process a cell for the dropped column.
|
||||
Dropped columns can be provided to the tool in the form of insert statements into the ``system_schema.dropped_columns`` system table, in the schema definition file. Example:
|
||||
|
||||
.. code-block:: cql
|
||||
|
||||
INSERT INTO system_schema.dropped_columns (
|
||||
keyspace_name,
|
||||
table_name,
|
||||
column_name,
|
||||
dropped_time,
|
||||
type
|
||||
) VALUES (
|
||||
'ks',
|
||||
'cf',
|
||||
'v1',
|
||||
1631011979170675,
|
||||
'int'
|
||||
);
|
||||
|
||||
CREATE TABLE ks.cf (pk int PRIMARY KEY, v2 int);
|
||||
|
||||
System tables
|
||||
*************
|
||||
|
||||
If the examined table is a system table -- it belongs to one of the system keyspaces (``system``, ``system_schema``, ``system_distributed`` or ``system_distributed_everywhere``) -- you can just tell the tool to use the known built-in definition of said table. This is possible with the ``--system-schema`` flag. Example:
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
scylla sstable dump-data --system-schema system.local ./path/to/md-123456-big-Data.db
|
||||
|
||||
Supported Operations
|
||||
^^^^^^^^^^^^^^^^^^^^^^^
|
||||
The ``dump-*`` operations output JSON. For ``dump-data``, you can specify another output format.
|
||||
@@ -56,17 +137,17 @@ Dumping the content of the SStable:
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
scylla-sstable dump-data /path/to/md-123456-big-Data.db
|
||||
scylla sstable dump-data /path/to/md-123456-big-Data.db
|
||||
|
||||
Dumping the content of two SStables as a unified stream:
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
scylla-sstable dump-data --merge /path/to/md-123456-big-Data.db /path/to/md-123457-big-Data.db
|
||||
scylla sstable dump-data --merge /path/to/md-123456-big-Data.db /path/to/md-123457-big-Data.db
|
||||
|
||||
|
||||
Validating the specified SStables:
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
scylla-sstable validate /path/to/md-123456-big-Data.db /path/to/md-123457-big-Data.db
|
||||
scylla sstable validate /path/to/md-123456-big-Data.db /path/to/md-123457-big-Data.db
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
scylla-types
|
||||
Scylla Types
|
||||
==============
|
||||
|
||||
.. versionadded:: 5.0
|
||||
@@ -26,7 +26,7 @@ The command syntax is as follows:
|
||||
* Provide the values in the hex form without a leading 0x prefix.
|
||||
* You must specify the type of the provided values. See :ref:`Specifying the Value Type <scylla-types-type>`.
|
||||
* The number of provided values depends on the operation. See :ref:`Supported Operations <scylla-types-operations>` for details.
|
||||
* The scylla-types operations come with additional options. See :ref:`Additional Options <scylla-types-options>` for the list of options.
|
||||
* The ``scylla types`` operations come with additional options. See :ref:`Additional Options <scylla-types-options>` for the list of options.
|
||||
|
||||
.. _scylla-types-type:
|
||||
|
||||
|
||||
@@ -4,8 +4,10 @@ SSTabledump
|
||||
This tool allows you to converts SSTable into a JSON format file.
|
||||
SSTabledump supported when using Scylla 3.0, Scylla Enterprise 2019.1, and newer versions.
|
||||
In older versions, the tool is named SSTable2json_.
|
||||
If you need more flexibility or want to dump more than just the data-component, see scylla-sstable_.
|
||||
|
||||
.. _SSTable2json: /operating-scylla/admin-tools/sstable2json
|
||||
.. _scylla-sstable: /operating-scylla/admin-tools/scylla-sstable
|
||||
|
||||
Use the full path to the data file when executing the command.
|
||||
|
||||
|
||||
@@ -7,9 +7,7 @@ This document is a step-by-step procedure for upgrading from ScyllaDB Open Sourc
|
||||
|
||||
Applicable Versions
|
||||
===================
|
||||
This guide covers upgrading ScyllaDB from version 5.0.x to ScyllaDB Enterprise version 2022.1.y on the following platform:
|
||||
|
||||
* |OS|
|
||||
This guide covers upgrading ScyllaDB from version 5.0.x to ScyllaDB Enterprise version 2022.1.y on |OS|. See :doc:`OS Support by Platform and Version </getting-started/os-support>` for information about supported |OS| versions.
|
||||
|
||||
Upgrade Procedure
|
||||
=================
|
||||
|
||||
@@ -0,0 +1,79 @@
|
||||
=============================================================================
|
||||
Upgrade Guide - |SCYLLA_NAME| |SRC_VERSION| to |NEW_VERSION| for |OS|
|
||||
=============================================================================
|
||||
|
||||
This document is a step-by-step procedure for upgrading from ScyllaDB Enterprise 2021.1 to ScyllaDB Enterprise 2022.1, and rollback to 2021.1 if required.
|
||||
|
||||
|
||||
Applicable Versions
|
||||
===================
|
||||
This guide covers upgrading ScyllaDB Enterprise from version 2021.1.x to ScyllaDB Enterprise version 2022.1.y on |OS|. See :doc:`OS Support by Platform and Version </getting-started/os-support>` for information about supported versions.
|
||||
|
||||
Upgrade Procedure
|
||||
=================
|
||||
.. include:: /upgrade/upgrade-enterprise/_common/enterprise_2022.1_warnings.rst
|
||||
|
||||
A ScyllaDB upgrade is a rolling procedure that does **not** require a full cluster shutdown.
|
||||
For each of the nodes in the cluster, you will:
|
||||
|
||||
* Check the cluster schema
|
||||
* Drain the node and backup the data
|
||||
* Backup the configuration file
|
||||
* Stop ScyllaDB
|
||||
* Download and install the new ScyllaDB packages
|
||||
* Start ScyllaDB
|
||||
* Validate that the upgrade was successful
|
||||
|
||||
Apply the following procedure **serially** on each node. Do not move to the next node before validating the node that you upgraded is up and running the new version.
|
||||
|
||||
**During** the rolling upgrade, it is highly recommended:
|
||||
|
||||
* Not to use new 2022.1 features.
|
||||
* Not to run administration functions, like repairs, refresh, rebuild, or add or remove nodes. See `sctool <https://manager.docs.scylladb.com/stable/sctool/index.html>`_ for suspending ScyllaDB Manager's scheduled or running repairs.
|
||||
* Not to apply schema changes.
|
||||
|
||||
.. include:: /upgrade/_common/upgrade_to_2022_warning.rst
|
||||
|
||||
Upgrade Steps
|
||||
=============
|
||||
Check the cluster schema
|
||||
-------------------------
|
||||
Make sure that all nodes have the schema synched before the upgrade. The upgrade will fail if there is a schema disagreement between nodes.
|
||||
|
||||
.. code:: sh
|
||||
|
||||
nodetool describecluster
|
||||
|
||||
Drain the nodes and backup the data
|
||||
-------------------------------------
|
||||
Before any major procedure, like an upgrade, it is recommended to backup all the data to an external device. In ScyllaDB, backup is done using the ``nodetool snapshot`` command. For **each** node in the cluster, run the following command:
|
||||
|
||||
.. code:: sh
|
||||
|
||||
nodetool drain
|
||||
nodetool snapshot
|
||||
|
||||
Take note of the directory name that nodetool gives you, and copy all the directories having this name under ``/var/lib/scylla`` to a backup device.
|
||||
|
||||
When the upgrade is completed on all nodes, the snapshot should be removed with the ``nodetool clearsnapshot -t <snapshot>`` command to prevent running out of space.
|
||||
|
||||
Backup the configuration file
|
||||
------------------------------
|
||||
|
||||
.. code:: sh
|
||||
|
||||
sudo cp -a /etc/scylla/scylla.yaml /etc/scylla/scylla.yaml.backup-2021.1
|
||||
|
||||
Gracefully stop the node
|
||||
------------------------
|
||||
|
||||
.. code:: sh
|
||||
|
||||
sudo service scylla-enterprise-server stop
|
||||
|
||||
Download and install the new release
|
||||
------------------------------------
|
||||
Before upgrading, check what version you are running now using ``dpkg -l scylla\*server``. You should use the same version in case you want to |ROLLBACK|_ the upgrade. If you are not running a 2021.1.x version, stop right here! This guide only covers 2021.1.x to 2022.1.y upgrades.
|
||||
|
||||
|
||||
|
||||
@@ -0,0 +1,127 @@
|
||||
**To upgrade ScyllaDB:**
|
||||
|
||||
#. Update the |APT|_ to **2022.1** and enable scylla/ppa repo.
|
||||
|
||||
.. code:: sh
|
||||
|
||||
Ubuntu 16:
|
||||
sudo add-apt-repository -y ppa:scylladb/ppa
|
||||
|
||||
#. Configure Java 1.8, which is requested by ScyllaDB Enterprise 2022.1.
|
||||
|
||||
.. code:: sh
|
||||
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y openjdk-8-jre-headless
|
||||
sudo update-java-alternatives -s java-1.8.0-openjdk-amd64
|
||||
|
||||
#. Install:
|
||||
|
||||
.. code:: sh
|
||||
|
||||
sudo apt-get clean all
|
||||
sudo apt-get update
|
||||
sudo apt-get dist-upgrade scylla-enterprise
|
||||
|
||||
Answer ‘y’ to the first two questions.
|
||||
|
||||
Start the node
|
||||
--------------
|
||||
|
||||
A new io.conf format was introduced in Scylla 2.3 and 2019.1. If your io.conf doesn't contain `--io-properties-file` option, then it's still the old format. You need to re-run the io setup to generate new io.conf.
|
||||
|
||||
.. code:: sh
|
||||
|
||||
sudo scylla_io_setup
|
||||
|
||||
.. code:: sh
|
||||
|
||||
sudo service scylla-enterprise-server start
|
||||
|
||||
Validate
|
||||
--------
|
||||
#. Check cluster status with ``nodetool status`` and make sure **all** nodes, including the one you just upgraded, are in UN status.
|
||||
#. Use ``curl -X GET "http://localhost:10000/storage_service/scylla_release_version"`` to check the ScyllaDB version.
|
||||
#. Check scylla-enterprise-server log (by ``journalctl _COMM=scylla``) and ``/var/log/syslog`` to validate there are no errors.
|
||||
#. Check again after two minutes to validate no new issues are introduced.
|
||||
|
||||
Once you are sure the node upgrade is successful, move to the next node in the cluster.
|
||||
|
||||
See :doc:`Scylla Metrics Update - Scylla Enterprise 2021.1 to 2022.1<metric-update-2021.1-to-2022.1>` for more information.
|
||||
|
||||
Rollback Procedure
|
||||
==================
|
||||
|
||||
.. include:: /upgrade/_common/warning_rollback.rst
|
||||
|
||||
The following procedure describes a rollback from ScyllaDB Enterprise release 2022.1.x to 2022.1.y. Apply this procedure if an upgrade from 2021.1 to 2022.1 failed before completing on all nodes. Use this procedure only for nodes you upgraded to 2022.1
|
||||
|
||||
ScyllaDB rollback is a rolling procedure that does **not** require a full cluster shutdown.
|
||||
For each of the nodes you rollback to 2021.1, you will:
|
||||
|
||||
* Drain the node and stop ScyllaDB
|
||||
* Retrieve the old Scylla packages
|
||||
* Restore the configuration file
|
||||
* Restart ScyllaDB
|
||||
* Validate the rollback success
|
||||
|
||||
Apply the following procedure **serially** on each node. Do not move to the next node before validating the node is up and running with the new version.
|
||||
|
||||
Rollback Steps
|
||||
==============
|
||||
Gracefully shutdown ScyllaDB
|
||||
----------------------------
|
||||
|
||||
.. code:: sh
|
||||
|
||||
nodetool drain
|
||||
sudo service scylla-enterprise-server stop
|
||||
|
||||
Download and install the old release
|
||||
------------------------------------
|
||||
#. Remove the old repo file.
|
||||
|
||||
.. code:: sh
|
||||
|
||||
sudo rm -rf /etc/apt/sources.list.d/scylla.list
|
||||
|
||||
#. Update the |APT|_ to **2021.1**.
|
||||
#. Install:
|
||||
|
||||
.. code:: sh
|
||||
|
||||
sudo apt-get clean all
|
||||
sudo apt-get update
|
||||
sudo apt-get remove scylla\* -y
|
||||
sudo apt-get install scylla-enterprise
|
||||
|
||||
Answer ‘y’ to the first two questions.
|
||||
|
||||
Restore the configuration file
|
||||
------------------------------
|
||||
.. code:: sh
|
||||
|
||||
sudo rm -rf /etc/scylla/scylla.yaml
|
||||
sudo cp -a /etc/scylla/scylla.yaml.backup-2021.1 /etc/scylla/scylla.yaml
|
||||
|
||||
Restore system tables
|
||||
---------------------
|
||||
|
||||
Restore all tables of **system** and **system_schema** from the previous snapshot - 2022.1 uses a different set of system tables. Refer to :doc:`Restore from a Backup and Incremental Backup </operating-scylla/procedures/backup-restore/restore/>`.
|
||||
|
||||
.. code:: sh
|
||||
|
||||
cd /var/lib/scylla/data/keyspace_name/table_name-UUID/snapshots/<snapshot_name>/
|
||||
sudo cp -r * /var/lib/scylla/data/keyspace_name/table_name-UUID/
|
||||
sudo chown -R scylla:scylla /var/lib/scylla/data/keyspace_name/table_name-UUID/
|
||||
|
||||
Start the node
|
||||
--------------
|
||||
|
||||
.. code:: sh
|
||||
|
||||
sudo service scylla-enterprise-server start
|
||||
|
||||
Validate
|
||||
--------
|
||||
Check the upgrade instructions above for validation. Once you are sure the node rollback is successful, move to the next node in the cluster.
|
||||
@@ -0,0 +1,2 @@
|
||||
.. include:: /upgrade/_common/upgrade-guide-v2022-ubuntu-and-debian-p1.rst
|
||||
.. include:: /upgrade/_common/upgrade-guide-v2022-ubuntu-and-debian-p2.rst
|
||||
@@ -96,30 +96,6 @@ Answer ‘y’ to the first two questions.
|
||||
|
||||
Alternator users upgrading from Scylla 4.0 to 4.1, need to set :doc:`default isolation level </upgrade/upgrade-opensource/upgrade-guide-from-4.0-to-4.1/alternator>`
|
||||
|
||||
Update 3rd party and OS packages
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
.. versionadded:: Scylla 5.0
|
||||
.. versionadded:: Scylla Enterprise 2021.1.10
|
||||
|
||||
This step is optional. It is recommended if you run a Scylla official image (EC2 AMI, GCP, and Azure images) based on Ubuntu 20.04.
|
||||
|
||||
Run the following command:
|
||||
|
||||
.. code:: sh
|
||||
|
||||
cat scylla-packages-xxx-x86_64.txt | sudo xargs -n1 apt-get -y
|
||||
|
||||
|
||||
Where xxx is the relevant Scylla version ( |NEW_VERSION| ). The file is included in the Scylla packages downloaded in the previous step.
|
||||
|
||||
For example
|
||||
|
||||
.. code:: sh
|
||||
|
||||
cat scylla-packages-5.1.2-x86_64.txt | sudo xargs -n1 apt-get -y
|
||||
|
||||
|
||||
Start the node
|
||||
--------------
|
||||
|
||||
|
||||
@@ -81,27 +81,4 @@ Download and install the new release
|
||||
------------------------------------
|
||||
Before upgrading, check what version you are running now using ``dpkg -s scylla-server``. You should use the same version in case you want to |ROLLBACK|_ the upgrade. If you are not running a |SRC_VERSION|.x version, stop right here! This guide only covers |SRC_VERSION|.x to |NEW_VERSION|.y upgrades.
|
||||
|
||||
**To upgrade ScyllaDB:**
|
||||
|
||||
1. Update the |SCYLLA_REPO|_ to |NEW_VERSION|
|
||||
|
||||
2. Install
|
||||
|
||||
.. code-block::
|
||||
|
||||
sudo apt-get clean all
|
||||
sudo apt-get update
|
||||
sudo apt-get dist-upgrade |PKG_NAME|
|
||||
|
||||
|
||||
Answer ‘y’ to the first two questions.
|
||||
|
||||
**To upgrade ScyllaDB and update 3rd party and OS packages:**
|
||||
|
||||
.. include:: /upgrade/_common/upgrade-image.rst
|
||||
|
||||
|
||||
.. note::
|
||||
|
||||
Alternator users upgrading from Scylla 4.0 to 4.1 need to set :doc:`default isolation level </upgrade/upgrade-opensource/upgrade-guide-from-4.0-to-4.1/alternator>`.
|
||||
|
||||
|
||||
@@ -1,3 +1,23 @@
|
||||
**To upgrade ScyllaDB:**
|
||||
|
||||
1. Update the |SCYLLA_REPO|_ to |NEW_VERSION|
|
||||
|
||||
2. Install
|
||||
|
||||
.. code-block::
|
||||
|
||||
sudo apt-get clean all
|
||||
sudo apt-get update
|
||||
sudo apt-get dist-upgrade |PKG_NAME|
|
||||
|
||||
|
||||
Answer ‘y’ to the first two questions.
|
||||
|
||||
.. note::
|
||||
|
||||
Alternator users upgrading from Scylla 4.0 to 4.1 need to set :doc:`default isolation level </upgrade/upgrade-opensource/upgrade-guide-from-4.0-to-4.1/alternator>`.
|
||||
|
||||
|
||||
Start the node
|
||||
--------------
|
||||
|
||||
|
||||
@@ -1,20 +1,29 @@
|
||||
**To upgrade ScyllaDB and update 3rd party and OS packages (RECOMMENDED):**
|
||||
|
||||
.. versionadded:: Scylla 5.0
|
||||
.. versionadded:: Scylla Enterprise 2021.1.10
|
||||
|
||||
This alternative installation upgrade method allows you to upgrade your ScyllaDB version and update the 3rd party and OS packages using one command. This method is recommended if you run a ScyllaDB official image (EC2 AMI, GCP, and Azure images) based on Ubuntu 20.04.
|
||||
This installation upgrade method allows you to upgrade your ScyllaDB version and update the 3rd party and OS packages using one command. This method is recommended if you run a ScyllaDB official image (EC2 AMI, GCP, and Azure images), which is based on Ubuntu 20.04.
|
||||
|
||||
#. Update the |SCYLLA_REPO|_ to |NEW_VERSION|.
|
||||
|
||||
#. Run the following command:
|
||||
#. Run the following command to update the manifest file:
|
||||
|
||||
.. code:: sh
|
||||
|
||||
cat scylla-packages-xxx-x86_64.txt | sudo xargs -n1 apt-get -y
|
||||
cat scylla-packages-<version>-<arch>.txt | sudo xargs -n1 apt-get -y
|
||||
|
||||
Where:
|
||||
|
||||
* ``<version>`` - The Scylla version to which you are upgrading ( |NEW_VERSION| ).
|
||||
* ``<arch>`` - Architecture type: ``x86`` or ``aarch644``.
|
||||
|
||||
Where xxx is the relevant Scylla version ( |NEW_VERSION| ). The file is included in the Scylla packages downloaded in the previous step.
|
||||
|
||||
For example
|
||||
The file is included in the ScyllaDB packages downloaded in the previous step. The file location is:
|
||||
|
||||
* ScyllaDB Enterprise: ``http://downloads.scylladb.com/downloads/scylla-enterprise/aws/manifest/scylla-packages-<version>-<arch>.txt``
|
||||
* ScyllaDB Open Source: ``http://downloads.scylladb.com/downloads/scylla/aws/manifest/scylla-packages-<version>-<arch>.txt``
|
||||
|
||||
Example:
|
||||
|
||||
.. code:: sh
|
||||
|
||||
|
||||
@@ -7,9 +7,9 @@ Upgrade from ScyllaDB Enterprise 2021.1 to 2022.1
|
||||
:titlesonly:
|
||||
|
||||
Red Hat Enterprise Linux and CentOS <upgrade-guide-from-2021.1-to-2022.1-rpm>
|
||||
Ubuntu 18.04 <upgrade-guide-from-2021.1-to-2022.1-ubuntu-18-04>
|
||||
Ubuntu 20.04 <upgrade-guide-from-2021.1-to-2022.1-ubuntu-20-04>
|
||||
Ubuntu <upgrade-guide-from-2021.1-to-2022.1-ubuntu>
|
||||
Debian <upgrade-guide-from-2021.1-to-2022.1-debian>
|
||||
ScyllaDB Image <upgrade-guide-from-2022.1-to-2022.1-image>
|
||||
Metrics <metric-update-2021.1-to-2022.1>
|
||||
|
||||
.. raw:: html
|
||||
@@ -25,9 +25,9 @@ Upgrade from ScyllaDB Enterprise 2021.1 to 2022.1
|
||||
Upgrade guides are available for:
|
||||
|
||||
* :doc:`Upgrade ScyllaDB Enterprise from 2021.1.x to 2022.1.y on Red Hat Enterprise Linux and CentOS <upgrade-guide-from-2021.1-to-2022.1-rpm>`
|
||||
* :doc:`Upgrade ScyllaDB Enterprise from 2021.1.x to 2022.1.y on Ubuntu 18.04 <upgrade-guide-from-2021.1-to-2022.1-ubuntu-18-04>`
|
||||
* :doc:`Upgrade ScyllaDB Enterprise from 2021.1.x to 2022.1.y on Ubuntu 20.04 <upgrade-guide-from-2021.1-to-2022.1-ubuntu-20-04>`
|
||||
* :doc:`Upgrade ScyllaDB Enterprise from 2021.1.x to 2022.1.y on Ubuntu <upgrade-guide-from-2021.1-to-2022.1-ubuntu>`
|
||||
* :doc:`Upgrade ScyllaDB Enterprise from 2021.1.x to 2022.1.y on Debian <upgrade-guide-from-2021.1-to-2022.1-debian>`
|
||||
* :doc:`Upgrade ScyllaDB Enterprise Image (EC2, GCP, and Azure) from 2021.1.x to 2022.1.y <upgrade-guide-from-2022.1-to-2022.1-image>`
|
||||
* :doc:`ScyllaDB Enterprise Metrics Update - Scylla 2021.1 to 2022.1<metric-update-2021.1-to-2022.1>`
|
||||
|
||||
|
||||
|
||||
@@ -1,7 +1,13 @@
|
||||
.. |OS| replace:: Debian 9
|
||||
.. |OS| replace:: Debian
|
||||
.. |ROLLBACK| replace:: rollback
|
||||
.. _ROLLBACK: /upgrade/upgrade-enterprise/upgrade-guide-from-2021.1-to-2022.1/upgrade-guide-from-2021.1-to-2022.1-debian/#rollback-procedure
|
||||
.. _ROLLBACK: /upgrade/upgrade-enterprise/upgrade-guide-from-2021.1-to-2022.1/upgrade-guide-from-2021.1-to-2022.1/#rollback-procedure
|
||||
.. |SRC_VERSION| replace:: 2021.1
|
||||
.. |NEW_VERSION| replace:: 2022.1
|
||||
.. |SCYLLA_NAME| replace:: ScyllaDB Enterprise
|
||||
.. |PKG_NAME| replace:: scylla
|
||||
.. |APT| replace:: ScyllaDB Enterprise Deb repo
|
||||
.. _APT: https://www.scylladb.com/customer-portal/?product=ent&platform=debian-9&version=stable-release-2022.1
|
||||
.. |SCYLLA_REPO| replace:: ScyllaDB Enterprise Deb repo
|
||||
.. _SCYLLA_REPO: https://www.scylladb.com/customer-portal/?product=ent&platform=debian-9&version=stable-release-2022.1
|
||||
.. |OPENJDK| replace:: openjdk-8-jre-headless
|
||||
.. include:: /upgrade/_common/upgrade-guide-from-2021.1-to-2022.1-ubuntu-and-debian.rst
|
||||
.. include:: /upgrade/_common/upgrade-guide-v2022-ubuntu-and-debian.rst
|
||||
|
||||
@@ -7,7 +7,7 @@ This document is a step-by-step procedure for upgrading from ScyllaDB Enterprise
|
||||
|
||||
Applicable Versions
|
||||
===================
|
||||
This guide covers upgrading ScyllaDB from version **2021.1.8** or later to ScyllaDB Enterprise version 2021.1.y, on the following platforms:
|
||||
This guide covers upgrading ScyllaDB from version **2021.1.8** or later to ScyllaDB Enterprise version 2022.1.y, on the following platforms:
|
||||
|
||||
* Red Hat Enterprise Linux, version 7 and later
|
||||
* CentOS, version 7 and later
|
||||
|
||||
@@ -1,7 +0,0 @@
|
||||
.. |OS| replace:: 18.04
|
||||
.. |ROLLBACK| replace:: rollback
|
||||
.. _ROLLBACK: /upgrade/upgrade-enterprise/upgrade-guide-from-2021.1-to-2022.1/upgrade-guide-from-2021.1-to-2022.1-ubuntu-18-04/#rollback-procedure
|
||||
.. |APT| replace:: ScyllaDB Enterprise Deb repo
|
||||
.. _APT: https://www.scylladb.com/customer-portal/?product=ent&platform=ubuntu-18.04&version=stable-release-2022.1
|
||||
.. |OPENJDK| replace:: openjdk-8-jre-headless
|
||||
.. include:: /upgrade/_common/upgrade-guide-from-2021.1-to-2022.1-ubuntu-and-debian.rst
|
||||
@@ -1,7 +0,0 @@
|
||||
.. |OS| replace:: 20.04
|
||||
.. |ROLLBACK| replace:: rollback
|
||||
.. _ROLLBACK: /upgrade/upgrade-enterprise/upgrade-guide-from-2021.1-to-2022.1/upgrade-guide-from-2021.1-to-2022.1-ubuntu-18-04/#rollback-procedure
|
||||
.. |APT| replace:: ScyllaDB Enterprise Deb repo
|
||||
.. _APT: https://www.scylladb.com/customer-portal/?product=ent&platform=ubuntu-20.04&version=stable-release-2022.1
|
||||
.. |OPENJDK| replace:: openjdk-8-jre-headless
|
||||
.. include:: /upgrade/_common/upgrade-guide-from-2021.1-to-2022.1-ubuntu-and-debian.rst
|
||||
@@ -0,0 +1,13 @@
|
||||
.. |OS| replace:: Ubuntu
|
||||
.. |ROLLBACK| replace:: rollback
|
||||
.. _ROLLBACK: /upgrade/upgrade-enterprise/upgrade-guide-from-2021.1-to-2022.1/upgrade-guide-from-2021.1-to-2022.1-ubuntu/#rollback-procedure
|
||||
.. |SRC_VERSION| replace:: 2021.1
|
||||
.. |NEW_VERSION| replace:: 2022.1
|
||||
.. |SCYLLA_NAME| replace:: ScyllaDB Enterprise
|
||||
.. |PKG_NAME| replace:: scylla
|
||||
.. |APT| replace:: ScyllaDB Enterprise Deb repo
|
||||
.. _APT: https://www.scylladb.com/customer-portal/?product=ent&platform=ubuntu-20.04&version=stable-release-2022.1
|
||||
.. |SCYLLA_REPO| replace:: ScyllaDB Enterprise Deb repo
|
||||
.. _SCYLLA_REPO: https://www.scylladb.com/customer-portal/?product=ent&platform=ubuntu-20.04&version=stable-release-2022.1
|
||||
.. |OPENJDK| replace:: openjdk-8-jre-headless
|
||||
.. include:: /upgrade/_common/upgrade-guide-v2022-ubuntu-and-debian.rst
|
||||
@@ -0,0 +1,18 @@
|
||||
.. |OS| replace:: EC2, GCP, and Azure
|
||||
.. |ROLLBACK| replace:: rollback
|
||||
.. _ROLLBACK: /upgrade/upgrade-enterprise/upgrade-guide-from-2021.1-to-2022.1/upgrade-guide-from-2021.1-to-2022.1-image/#rollback-procedure
|
||||
.. |SRC_VERSION| replace:: 2021.1
|
||||
.. |NEW_VERSION| replace:: 2022.1
|
||||
.. |SCYLLA_NAME| replace:: ScyllaDB Image
|
||||
.. |PKG_NAME| replace:: scylla
|
||||
.. |APT| replace:: ScyllaDB Enterprise Deb repo
|
||||
.. _APT: https://www.scylladb.com/customer-portal/?product=ent&platform=ubuntu-20.04&version=stable-release-2022.1
|
||||
.. |SCYLLA_REPO| replace:: ScyllaDB Enterprise Deb repo
|
||||
.. _SCYLLA_REPO: https://www.scylladb.com/customer-portal/?product=ent&platform=ubuntu-20.04&version=stable-release-2022.1
|
||||
.. |SCYLLA_METRICS| replace:: Scylla Metrics Update - Scylla 2021.1 to 2022.1
|
||||
.. _SCYLLA_METRICS: ../metric-update-2021.1-to-2022.1
|
||||
.. |OPENJDK| replace:: openjdk-8-jre-headless
|
||||
.. include:: /upgrade/_common/upgrade-guide-v2022-ubuntu-and-debian-p1.rst
|
||||
.. include:: /upgrade/_common/upgrade-image.rst
|
||||
.. include:: /upgrade/_common/upgrade-guide-v2022-ubuntu-and-debian-p2.rst
|
||||
|
||||
@@ -7,8 +7,7 @@ Upgrade - ScyllaDB 5.0 to ScyllaDB Enterprise 2022.1
|
||||
:hidden:
|
||||
|
||||
Red Hat Enterprise Linux and CentOS <upgrade-guide-from-5.0-to-2022.1-rpm>
|
||||
Ubuntu 18.04 <upgrade-guide-from-5.0-to-2022.1-ubuntu-18-04>
|
||||
Ubuntu 20.04 <upgrade-guide-from-5.0-to-2022.1-ubuntu-20-04>
|
||||
Ubuntu <upgrade-guide-from-5.0-to-2022.1-ubuntu>
|
||||
Debian 10 <upgrade-guide-from-5.0-to-2022.1-debian-10>
|
||||
Metrics <metric-update-5.0-to-2022.1>
|
||||
|
||||
@@ -26,8 +25,7 @@ Upgrade - ScyllaDB 5.0 to ScyllaDB Enterprise 2022.1
|
||||
Upgrade guides are available for:
|
||||
|
||||
* :doc:`Upgrade ScyllaDB Enterprise from 5.0.x to 2022.1.y on Red Hat Enterprise Linux and CentOS <upgrade-guide-from-5.0-to-2022.1-rpm>`
|
||||
* :doc:`Upgrade ScyllaDB Enterprise from 5.0.x to 2022.1.y on Ubuntu 18.04 <upgrade-guide-from-5.0-to-2022.1-ubuntu-18-04>`
|
||||
* :doc:`Upgrade ScyllaDB Enterprise from 5.0.x to 2022.1.y on Ubuntu 20.04 <upgrade-guide-from-5.0-to-2022.1-ubuntu-20-04>`
|
||||
* :doc:`Upgrade ScyllaDB Enterprise from 5.0.x to 2022.1.y on Ubuntu <upgrade-guide-from-5.0-to-2022.1-ubuntu>`
|
||||
* :doc:`Upgrade ScyllaDB Enterprise from 5.0.x to 2022.1.y on Debian 10 <upgrade-guide-from-5.0-to-2022.1-debian-10>`
|
||||
* :doc:`ScyllaDB Metrics Update - ScyllaDB 5.0 to 2022.1 <metric-update-5.0-to-2022.1>`
|
||||
|
||||
|
||||
@@ -1,8 +0,0 @@
|
||||
.. |OS| replace:: Ubuntu 18.04
|
||||
.. |ROLLBACK| replace:: rollback
|
||||
.. _ROLLBACK: /upgrade/upgrade-to-enterprise/upgrade-guide-from-5.0-to-2022.1/upgrade-guide-from-5.0-to-2022.1-ubuntu-18-04/#id4
|
||||
.. |APT| replace:: ScyllaDB deb repo
|
||||
.. _APT: https://www.scylladb.com/download/?platform=ubuntu-16.04&version=scylla-5.0
|
||||
.. |APT_ENTERPRISE| replace:: ScyllaDB Enterprise Deb repo
|
||||
.. _APT_ENTERPRISE: https://www.scylladb.com/customer-portal/?product=ent&platform=ubuntu-18.04&version=stable-release-2022.1
|
||||
.. include:: /upgrade/_common/upgrade-guide-from-5.0-to-2022.1-ubuntu-and-debian.rst
|
||||
@@ -1,6 +1,6 @@
|
||||
.. |OS| replace:: Ubuntu 20.04
|
||||
.. |OS| replace:: Ubuntu
|
||||
.. |ROLLBACK| replace:: rollback
|
||||
.. _ROLLBACK: /upgrade/upgrade-to-enterprise/upgrade-guide-from-5.0-to-2022.1/upgrade-guide-from-5.0-to-2022.1-ubuntu-20-04/#id4
|
||||
.. _ROLLBACK: /upgrade/upgrade-to-enterprise/upgrade-guide-from-5.0-to-2022.1/upgrade-guide-from-5.0-to-2022.1-ubuntu/#rollback-procedure
|
||||
.. |APT| replace:: ScyllaDB deb repo
|
||||
.. _APT: http://www.scylladb.com/download/
|
||||
.. |APT_ENTERPRISE| replace:: Scylla Enterprise Deb repo
|
||||
@@ -25,8 +25,8 @@ Scylla Open Source Features
|
||||
the implementation of Raft, schema changes in ScyllaDB are safe, including concurrent schema updates.
|
||||
This feature is experimental in version 5.0 and needs to be explicitly enabled.
|
||||
|
||||
* :doc:`scylla-sstable tool </operating-scylla/admin-tools/scylla-sstable/>` - An admin tool that allows you to examine the content of SStables by performing operations such as dumping the content of SStables, generating a histogram, validating the content of SStables, and more.
|
||||
* :doc:`scylla-types tool </operating-scylla/admin-tools/scylla-types/>` - An admin tool that allows you to examine raw values obtained from SStables, logs, coredumps, etc., by printing, validating or comparing the values.
|
||||
* :doc:`Scylla SStable tool </operating-scylla/admin-tools/scylla-sstable/>` - An admin tool that allows you to examine the content of SStables by performing operations such as dumping the content of SStables, generating a histogram, validating the content of SStables, and more.
|
||||
* :doc:`Scylla Types tool </operating-scylla/admin-tools/scylla-types/>` - An admin tool that allows you to examine raw values obtained from SStables, logs, coredumps, etc., by printing, validating or comparing the values.
|
||||
* :doc:`Virtual Tables </operating-scylla/admin-tools/virtual-tables/>` - Tables that retrieve system-level information by generating their contents on-the-fly when queried.
|
||||
|
||||
* Virtual table for configuration - ``system.config``, allows you to query and update configuration over CQL.
|
||||
|
||||
@@ -83,7 +83,7 @@ overloaded_exception::overloaded_exception(size_t c) noexcept
|
||||
{}
|
||||
|
||||
rate_limit_exception::rate_limit_exception(const sstring& ks, const sstring& cf, db::operation_type op_type_, bool rejected_by_coordinator_) noexcept
|
||||
: cassandra_exception(exception_code::CONFIG_ERROR, prepare_message("Per-partition rate limit reached for {} in table {}.{}, rejected by {}", op_type_, ks, cf, rejected_by_coordinator_ ? "coordinator" : "replicas"))
|
||||
: cassandra_exception(exception_code::RATE_LIMIT_ERROR, prepare_message("Per-partition rate limit reached for {} in table {}.{}, rejected by {}", op_type_, ks, cf, rejected_by_coordinator_ ? "coordinator" : "replicas"))
|
||||
, op_type(op_type_)
|
||||
, rejected_by_coordinator(rejected_by_coordinator_)
|
||||
{ }
|
||||
|
||||
@@ -1037,10 +1037,10 @@ std::set<inet_address> gossiper::get_live_members() {
|
||||
|
||||
std::set<inet_address> gossiper::get_live_token_owners() {
|
||||
std::set<inet_address> token_owners;
|
||||
for (auto& member : get_live_members()) {
|
||||
auto es = get_endpoint_state_for_endpoint_ptr(member);
|
||||
if (es && !is_dead_state(*es) && get_token_metadata_ptr()->is_member(member)) {
|
||||
token_owners.insert(member);
|
||||
auto normal_token_owners = get_token_metadata_ptr()->get_all_endpoints();
|
||||
for (auto& node: normal_token_owners) {
|
||||
if (is_alive(node)) {
|
||||
token_owners.insert(node);
|
||||
}
|
||||
}
|
||||
return token_owners;
|
||||
@@ -1048,10 +1048,10 @@ std::set<inet_address> gossiper::get_live_token_owners() {
|
||||
|
||||
std::set<inet_address> gossiper::get_unreachable_token_owners() {
|
||||
std::set<inet_address> token_owners;
|
||||
for (auto&& x : _unreachable_endpoints) {
|
||||
auto& endpoint = x.first;
|
||||
if (get_token_metadata_ptr()->is_member(endpoint)) {
|
||||
token_owners.insert(endpoint);
|
||||
auto normal_token_owners = get_token_metadata_ptr()->get_all_endpoints();
|
||||
for (auto& node: normal_token_owners) {
|
||||
if (!is_alive(node)) {
|
||||
token_owners.insert(node);
|
||||
}
|
||||
}
|
||||
return token_owners;
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
#include "locator/ec2_snitch.hh"
|
||||
#include <seastar/core/seastar.hh>
|
||||
#include <seastar/core/sleep.hh>
|
||||
#include <seastar/core/do_with.hh>
|
||||
|
||||
#include <boost/algorithm/string/classification.hpp>
|
||||
#include <boost/algorithm/string/split.hpp>
|
||||
@@ -61,6 +63,30 @@ future<> ec2_snitch::start() {
|
||||
}
|
||||
|
||||
future<sstring> ec2_snitch::aws_api_call(sstring addr, uint16_t port, sstring cmd) {
|
||||
return do_with(int(0), [this, addr, port, cmd] (int& i) {
|
||||
return repeat_until_value([this, addr, port, cmd, &i]() -> future<std::optional<sstring>> {
|
||||
++i;
|
||||
return aws_api_call_once(addr, port, cmd).then([] (auto res) {
|
||||
return make_ready_future<std::optional<sstring>>(std::move(res));
|
||||
}).handle_exception([&i] (auto ep) {
|
||||
try {
|
||||
std::rethrow_exception(ep);
|
||||
} catch (const std::system_error &e) {
|
||||
logger().error(e.what());
|
||||
if (i >= AWS_API_CALL_RETRIES - 1) {
|
||||
logger().error("Maximum number of retries exceeded");
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
return sleep(AWS_API_CALL_RETRY_INTERVAL).then([] {
|
||||
return make_ready_future<std::optional<sstring>>(std::nullopt);
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
future<sstring> ec2_snitch::aws_api_call_once(sstring addr, uint16_t port, sstring cmd) {
|
||||
return connect(socket_address(inet_address{addr}, port))
|
||||
.then([this, addr, cmd] (connected_socket fd) {
|
||||
_sd = std::move(fd);
|
||||
|
||||
@@ -16,6 +16,8 @@ public:
|
||||
static constexpr const char* ZONE_NAME_QUERY_REQ = "/latest/meta-data/placement/availability-zone";
|
||||
static constexpr const char* AWS_QUERY_SERVER_ADDR = "169.254.169.254";
|
||||
static constexpr uint16_t AWS_QUERY_SERVER_PORT = 80;
|
||||
static constexpr int AWS_API_CALL_RETRIES = 5;
|
||||
static constexpr auto AWS_API_CALL_RETRY_INTERVAL = std::chrono::seconds{5};
|
||||
|
||||
ec2_snitch(const snitch_config&);
|
||||
virtual future<> start() override;
|
||||
@@ -32,5 +34,6 @@ private:
|
||||
output_stream<char> _out;
|
||||
http_response_parser _parser;
|
||||
sstring _zone_req;
|
||||
future<sstring> aws_api_call_once(sstring addr, uint16_t port, const sstring cmd);
|
||||
};
|
||||
} // namespace locator
|
||||
|
||||
6
main.cc
6
main.cc
@@ -600,6 +600,12 @@ To start the scylla server proper, simply invoke as: scylla server (or just scyl
|
||||
|
||||
cfg->broadcast_to_all_shards().get();
|
||||
|
||||
// We pass this piece of config through a global as a temporary hack.
|
||||
// See the comment at the definition of sstables::global_cache_index_pages.
|
||||
smp::invoke_on_all([&cfg] {
|
||||
sstables::global_cache_index_pages = cfg->cache_index_pages.operator utils::updateable_value<bool>();
|
||||
}).get();
|
||||
|
||||
::sighup_handler sighup_handler(opts, *cfg);
|
||||
auto stop_sighup_handler = defer_verbose_shutdown("sighup", [&] {
|
||||
sighup_handler.stop().get();
|
||||
|
||||
@@ -467,6 +467,8 @@ static constexpr unsigned do_get_rpc_client_idx(messaging_verb verb) {
|
||||
// should not be blocked by any data requests.
|
||||
case messaging_verb::GROUP0_PEER_EXCHANGE:
|
||||
case messaging_verb::GROUP0_MODIFY_CONFIG:
|
||||
// ATTN -- if moving GOSSIP_ verbs elsewhere, mind updating the tcp_nodelay
|
||||
// setting in get_rpc_client(), which assumes gossiper verbs live in idx 0
|
||||
return 0;
|
||||
case messaging_verb::PREPARE_MESSAGE:
|
||||
case messaging_verb::PREPARE_DONE_MESSAGE:
|
||||
@@ -737,7 +739,7 @@ shared_ptr<messaging_service::rpc_protocol_client_wrapper> messaging_service::ge
|
||||
}();
|
||||
|
||||
auto must_tcp_nodelay = [&] {
|
||||
if (idx == 1) {
|
||||
if (idx == 0) {
|
||||
return true; // gossip
|
||||
}
|
||||
if (_cfg.tcp_nodelay == tcp_nodelay_what::local) {
|
||||
|
||||
@@ -826,6 +826,7 @@ public:
|
||||
|
||||
void apply(tombstone deleted_at) {
|
||||
_deleted_at.apply(deleted_at);
|
||||
maybe_shadow();
|
||||
}
|
||||
|
||||
void apply(shadowable_tombstone deleted_at) {
|
||||
|
||||
@@ -444,7 +444,7 @@ public:
|
||||
// When throws, the cursor is invalidated and its position is not changed.
|
||||
bool advance_to(position_in_partition_view lower_bound) {
|
||||
maybe_advance_to(lower_bound);
|
||||
return no_clustering_row_between(_schema, lower_bound, position());
|
||||
return no_clustering_row_between_weak(_schema, lower_bound, position());
|
||||
}
|
||||
|
||||
// Call only when valid.
|
||||
|
||||
@@ -571,6 +571,20 @@ bool no_clustering_row_between(const schema& s, position_in_partition_view a, po
|
||||
}
|
||||
}
|
||||
|
||||
// Returns true if and only if there can't be any clustering_row with position >= a and < b.
|
||||
// It is assumed that a <= b.
|
||||
inline
|
||||
bool no_clustering_row_between_weak(const schema& s, position_in_partition_view a, position_in_partition_view b) {
|
||||
clustering_key_prefix::equality eq(s);
|
||||
if (a.has_key() && b.has_key()) {
|
||||
return eq(a.key(), b.key())
|
||||
&& (a.get_bound_weight() == bound_weight::after_all_prefixed
|
||||
|| b.get_bound_weight() != bound_weight::after_all_prefixed);
|
||||
} else {
|
||||
return !a.has_key() && !b.has_key();
|
||||
}
|
||||
}
|
||||
|
||||
// Includes all position_in_partition objects "p" for which: start <= p < end
|
||||
// And only those.
|
||||
class position_range {
|
||||
|
||||
19
querier.cc
19
querier.cc
@@ -413,25 +413,6 @@ future<bool> querier_cache::evict_one() noexcept {
|
||||
co_return false;
|
||||
}
|
||||
|
||||
future<> querier_cache::evict_all_for_table(const utils::UUID& schema_id) noexcept {
|
||||
for (auto ip : {&_data_querier_index, &_mutation_querier_index, &_shard_mutation_querier_index}) {
|
||||
auto& idx = *ip;
|
||||
for (auto it = idx.begin(); it != idx.end();) {
|
||||
if (it->second->schema().id() == schema_id) {
|
||||
auto reader_opt = it->second->permit().semaphore().unregister_inactive_read(querier_utils::get_inactive_read_handle(*it->second));
|
||||
it = idx.erase(it);
|
||||
--_stats.population;
|
||||
if (reader_opt) {
|
||||
co_await reader_opt->close();
|
||||
}
|
||||
} else {
|
||||
++it;
|
||||
}
|
||||
}
|
||||
}
|
||||
co_return;
|
||||
}
|
||||
|
||||
future<> querier_cache::stop() noexcept {
|
||||
co_await _closing_gate.close();
|
||||
|
||||
|
||||
@@ -383,11 +383,6 @@ public:
|
||||
/// is empty).
|
||||
future<bool> evict_one() noexcept;
|
||||
|
||||
/// Evict all queriers that belong to a table.
|
||||
///
|
||||
/// Should be used when dropping a table.
|
||||
future<> evict_all_for_table(const utils::UUID& schema_id) noexcept;
|
||||
|
||||
/// Close all queriers and wait on background work.
|
||||
///
|
||||
/// Should be used before destroying the querier_cache.
|
||||
|
||||
@@ -9,6 +9,7 @@
|
||||
#include <boost/range/adaptor/reversed.hpp>
|
||||
#include "range_tombstone_list.hh"
|
||||
#include "utils/allocation_strategy.hh"
|
||||
#include "utils/amortized_reserve.hh"
|
||||
#include <seastar/util/variant_utils.hh>
|
||||
|
||||
range_tombstone_list::range_tombstone_list(const range_tombstone_list& x)
|
||||
@@ -375,13 +376,13 @@ range_tombstone_list::reverter::insert(range_tombstones_type::iterator it, range
|
||||
|
||||
range_tombstone_list::range_tombstones_type::iterator
|
||||
range_tombstone_list::reverter::erase(range_tombstones_type::iterator it) {
|
||||
_ops.reserve(_ops.size() + 1);
|
||||
amortized_reserve(_ops, _ops.size() + 1);
|
||||
_ops.emplace_back(erase_undo_op(*it));
|
||||
return _dst._tombstones.erase(it);
|
||||
}
|
||||
|
||||
void range_tombstone_list::reverter::update(range_tombstones_type::iterator it, range_tombstone&& new_rt) {
|
||||
_ops.reserve(_ops.size() + 1);
|
||||
amortized_reserve(_ops, _ops.size() + 1);
|
||||
swap(it->tombstone(), new_rt);
|
||||
_ops.emplace_back(update_undo_op(std::move(new_rt), *it));
|
||||
}
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
#include "range_tombstone.hh"
|
||||
#include "query-request.hh"
|
||||
#include "utils/preempt.hh"
|
||||
#include "utils/chunked_vector.hh"
|
||||
#include <iosfwd>
|
||||
#include <variant>
|
||||
|
||||
@@ -106,7 +107,7 @@ class range_tombstone_list final {
|
||||
class reverter {
|
||||
private:
|
||||
using op = std::variant<erase_undo_op, insert_undo_op, update_undo_op>;
|
||||
std::vector<op> _ops;
|
||||
utils::chunked_vector<op> _ops;
|
||||
const schema& _s;
|
||||
protected:
|
||||
range_tombstone_list& _dst;
|
||||
|
||||
@@ -749,6 +749,25 @@ void reader_concurrency_semaphore::clear_inactive_reads() {
|
||||
}
|
||||
}
|
||||
|
||||
future<> reader_concurrency_semaphore::evict_inactive_reads_for_table(utils::UUID id) noexcept {
|
||||
inactive_reads_type evicted_readers;
|
||||
auto it = _inactive_reads.begin();
|
||||
while (it != _inactive_reads.end()) {
|
||||
auto& ir = *it;
|
||||
++it;
|
||||
if (ir.reader.schema()->id() == id) {
|
||||
do_detach_inactive_reader(ir, evict_reason::manual);
|
||||
ir.ttl_timer.cancel();
|
||||
ir.unlink();
|
||||
evicted_readers.push_back(ir);
|
||||
}
|
||||
}
|
||||
while (!evicted_readers.empty()) {
|
||||
std::unique_ptr<inactive_read> irp(&evicted_readers.front());
|
||||
co_await irp->reader.close();
|
||||
}
|
||||
}
|
||||
|
||||
std::runtime_error reader_concurrency_semaphore::stopped_exception() {
|
||||
return std::runtime_error(format("{} was stopped", _name));
|
||||
}
|
||||
@@ -771,11 +790,9 @@ future<> reader_concurrency_semaphore::stop() noexcept {
|
||||
co_return;
|
||||
}
|
||||
|
||||
flat_mutation_reader_v2 reader_concurrency_semaphore::detach_inactive_reader(inactive_read& ir, evict_reason reason) noexcept {
|
||||
auto reader = std::move(ir.reader);
|
||||
void reader_concurrency_semaphore::do_detach_inactive_reader(inactive_read& ir, evict_reason reason) noexcept {
|
||||
ir.detach();
|
||||
reader.permit()._impl->on_evicted();
|
||||
std::unique_ptr<inactive_read> irp(&ir);
|
||||
ir.reader.permit()._impl->on_evicted();
|
||||
try {
|
||||
if (ir.notify_handler) {
|
||||
ir.notify_handler(reason);
|
||||
@@ -794,7 +811,12 @@ flat_mutation_reader_v2 reader_concurrency_semaphore::detach_inactive_reader(ina
|
||||
break;
|
||||
}
|
||||
--_stats.inactive_reads;
|
||||
return reader;
|
||||
}
|
||||
|
||||
flat_mutation_reader_v2 reader_concurrency_semaphore::detach_inactive_reader(inactive_read& ir, evict_reason reason) noexcept {
|
||||
std::unique_ptr<inactive_read> irp(&ir);
|
||||
do_detach_inactive_reader(ir, reason);
|
||||
return std::move(irp->reader);
|
||||
}
|
||||
|
||||
void reader_concurrency_semaphore::evict(inactive_read& ir, evict_reason reason) noexcept {
|
||||
|
||||
@@ -187,6 +187,7 @@ private:
|
||||
std::optional<future<>> _execution_loop_future;
|
||||
|
||||
private:
|
||||
void do_detach_inactive_reader(inactive_read&, evict_reason reason) noexcept;
|
||||
[[nodiscard]] flat_mutation_reader_v2 detach_inactive_reader(inactive_read&, evict_reason reason) noexcept;
|
||||
void evict(inactive_read&, evict_reason reason) noexcept;
|
||||
|
||||
@@ -302,6 +303,9 @@ public:
|
||||
|
||||
/// Clear all inactive reads.
|
||||
void clear_inactive_reads();
|
||||
|
||||
/// Evict all inactive reads the belong to the table designated by the id.
|
||||
future<> evict_inactive_reads_for_table(utils::UUID id) noexcept;
|
||||
private:
|
||||
// The following two functions are extension points for
|
||||
// future inheriting classes that needs to run some stop
|
||||
|
||||
@@ -847,12 +847,12 @@ future<> shard_reader_v2::do_fill_buffer() {
|
||||
}
|
||||
|
||||
auto res = co_await(std::move(fill_buf_fut));
|
||||
_end_of_stream = res.end_of_stream;
|
||||
reserve_additional(res.buffer->size());
|
||||
for (const auto& mf : *res.buffer) {
|
||||
push_mutation_fragment(mutation_fragment_v2(*_schema, _permit, mf));
|
||||
co_await coroutine::maybe_yield();
|
||||
}
|
||||
_end_of_stream = res.end_of_stream;
|
||||
}
|
||||
|
||||
future<> shard_reader_v2::fill_buffer() {
|
||||
|
||||
@@ -1017,7 +1017,9 @@ future<> database::drop_column_family(const sstring& ks_name, const sstring& cf_
|
||||
remove(*cf);
|
||||
cf->clear_views();
|
||||
co_await cf->await_pending_ops();
|
||||
co_await _querier_cache.evict_all_for_table(cf->schema()->id());
|
||||
for (auto* sem : {&_read_concurrency_sem, &_streaming_concurrency_sem, &_compaction_concurrency_sem, &_system_read_concurrency_sem}) {
|
||||
co_await sem->evict_inactive_reads_for_table(uuid);
|
||||
}
|
||||
auto f = co_await coroutine::as_future(truncate(ks, *cf, std::move(tsf), snapshot));
|
||||
co_await cf->stop();
|
||||
f.get(); // re-throw exception from truncate() if any
|
||||
@@ -2238,10 +2240,14 @@ future<> database::stop() {
|
||||
|
||||
// try to ensure that CL has done disk flushing
|
||||
if (_commitlog) {
|
||||
dblog.info("Shutting down commitlog");
|
||||
co_await _commitlog->shutdown();
|
||||
dblog.info("Shutting down commitlog complete");
|
||||
}
|
||||
if (_schema_commitlog) {
|
||||
dblog.info("Shutting down schema commitlog");
|
||||
co_await _schema_commitlog->shutdown();
|
||||
dblog.info("Shutting down schema commitlog complete");
|
||||
}
|
||||
co_await _view_update_concurrency_sem.wait(max_memory_pending_view_updates());
|
||||
if (_commitlog) {
|
||||
|
||||
@@ -586,7 +586,8 @@ table::seal_active_memtable(flush_permit&& flush_permit) noexcept {
|
||||
auto permit = std::move(flush_permit);
|
||||
auto r = exponential_backoff_retry(100ms, 10s);
|
||||
// Try flushing for around half an hour (30 minutes every 10 seconds)
|
||||
int allowed_retries = 30 * 60 / 10;
|
||||
int default_retries = 30 * 60 / 10;
|
||||
int allowed_retries = default_retries;
|
||||
std::optional<utils::phased_barrier::operation> op;
|
||||
size_t memtable_size;
|
||||
future<> previous_flush = make_ready_future<>();
|
||||
@@ -599,7 +600,19 @@ table::seal_active_memtable(flush_permit&& flush_permit) noexcept {
|
||||
} catch (...) {
|
||||
ex = std::current_exception();
|
||||
_config.cf_stats->failed_memtables_flushes_count++;
|
||||
auto abort_on_error = [ex] () {
|
||||
|
||||
if (try_catch<std::bad_alloc>(ex)) {
|
||||
// There is a chance something else will free the memory, so we can try again
|
||||
allowed_retries--;
|
||||
} else if (auto ep = try_catch<std::system_error>(ex)) {
|
||||
allowed_retries = ep->code().value() == ENOSPC ? default_retries : 0;
|
||||
} else if (auto ep = try_catch<storage_io_error>(ex)) {
|
||||
allowed_retries = ep->code().value() == ENOSPC ? default_retries : 0;
|
||||
} else {
|
||||
allowed_retries = 0;
|
||||
}
|
||||
|
||||
if (allowed_retries <= 0) {
|
||||
// At this point we don't know what has happened and it's better to potentially
|
||||
// take the node down and rely on commitlog to replay.
|
||||
//
|
||||
@@ -608,14 +621,6 @@ table::seal_active_memtable(flush_permit&& flush_permit) noexcept {
|
||||
// may end up in an infinite crash loop.
|
||||
tlogger.error("Memtable flush failed due to: {}. Aborting, at {}", ex, current_backtrace());
|
||||
std::abort();
|
||||
};
|
||||
if (try_catch<std::bad_alloc>(ex)) {
|
||||
// There is a chance something else will free the memory, so we can try again
|
||||
if (allowed_retries-- <= 0) {
|
||||
abort_on_error();
|
||||
}
|
||||
} else {
|
||||
abort_on_error();
|
||||
}
|
||||
}
|
||||
if (_async_gate.is_closed()) {
|
||||
@@ -681,7 +686,7 @@ table::seal_active_memtable(flush_permit&& flush_permit) noexcept {
|
||||
auto write_permit = permit.release_sstable_write_permit();
|
||||
|
||||
utils::get_local_injector().inject("table_seal_active_memtable_try_flush", []() {
|
||||
throw std::bad_alloc();
|
||||
throw std::system_error(ENOSPC, std::system_category(), "Injected error");
|
||||
});
|
||||
co_return co_await this->try_flush_memtable_to_sstable(old, std::move(write_permit));
|
||||
});
|
||||
|
||||
2
seastar
2
seastar
Submodule seastar updated: f9f5228b74...3aa91b4d2d
@@ -1189,7 +1189,7 @@ private:
|
||||
}
|
||||
index_reader& get_index_reader() {
|
||||
if (!_index_reader) {
|
||||
auto caching = use_caching(!_slice.options.contains(query::partition_slice::option::bypass_cache));
|
||||
auto caching = use_caching(global_cache_index_pages && !_slice.options.contains(query::partition_slice::option::bypass_cache));
|
||||
_index_reader = std::make_unique<index_reader>(_sst, _consumer.permit(), _consumer.io_priority(),
|
||||
_consumer.trace_state(), caching, _single_partition_read);
|
||||
}
|
||||
|
||||
@@ -1319,7 +1319,7 @@ private:
|
||||
}
|
||||
index_reader& get_index_reader() {
|
||||
if (!_index_reader) {
|
||||
auto caching = use_caching(!_slice.options.contains(query::partition_slice::option::bypass_cache));
|
||||
auto caching = use_caching(global_cache_index_pages && !_slice.options.contains(query::partition_slice::option::bypass_cache));
|
||||
_index_reader = std::make_unique<index_reader>(_sst, _consumer.permit(), _consumer.io_priority(),
|
||||
_consumer.trace_state(), caching, _single_partition_read);
|
||||
}
|
||||
@@ -1754,9 +1754,7 @@ public:
|
||||
_monitor.on_read_started(_context->reader_position());
|
||||
}
|
||||
public:
|
||||
void on_out_of_clustering_range() override {
|
||||
push_mutation_fragment(mutation_fragment_v2(*_schema, _permit, partition_end()));
|
||||
}
|
||||
void on_out_of_clustering_range() override { }
|
||||
virtual future<> fast_forward_to(const dht::partition_range& pr) override {
|
||||
on_internal_error(sstlog, "mx_crawling_sstable_mutation_reader: doesn't support fast_forward_to(const dht::partition_range&)");
|
||||
}
|
||||
|
||||
@@ -87,6 +87,18 @@ thread_local disk_error_signal_type sstable_write_error;
|
||||
|
||||
namespace sstables {
|
||||
|
||||
// The below flag governs the mode of index file page caching used by the index
|
||||
// reader.
|
||||
//
|
||||
// If set to true, the reader will read and/or populate a common global cache,
|
||||
// which shares its capacity with the row cache. If false, the reader will use
|
||||
// BYPASS CACHE semantics for index caching.
|
||||
//
|
||||
// This flag is intended to be a temporary hack. The goal is to eventually
|
||||
// solve index caching problems via a smart cache replacement policy.
|
||||
//
|
||||
thread_local utils::updateable_value<bool> global_cache_index_pages(false);
|
||||
|
||||
logging::logger sstlog("sstable");
|
||||
|
||||
// Because this is a noop and won't hold any state, it is better to use a global than a
|
||||
|
||||
@@ -50,6 +50,7 @@
|
||||
#include "mutation_fragment_stream_validator.hh"
|
||||
#include "readers/flat_mutation_reader_fwd.hh"
|
||||
#include "tracing/trace_state.hh"
|
||||
#include "utils/updateable_value.hh"
|
||||
|
||||
#include <seastar/util/optimized_optional.hh>
|
||||
|
||||
@@ -58,6 +59,8 @@ class cached_file;
|
||||
|
||||
namespace sstables {
|
||||
|
||||
extern thread_local utils::updateable_value<bool> global_cache_index_pages;
|
||||
|
||||
namespace mc {
|
||||
class writer;
|
||||
}
|
||||
|
||||
@@ -94,9 +94,9 @@ def test_describe_table_size(test_table):
|
||||
# Test the ProvisionedThroughput attribute returned by DescribeTable.
|
||||
# This is a very partial test: Our test table is configured without
|
||||
# provisioned throughput, so obviously it will not have interesting settings
|
||||
# for it. DynamoDB returns zeros for some of the attributes, even though
|
||||
# the documentation suggests missing values should have been fine too.
|
||||
@pytest.mark.xfail(reason="DescribeTable does not return provisioned throughput")
|
||||
# for it. But DynamoDB documents that zeros be returned for WriteCapacityUnits
|
||||
# and ReadCapacityUnits, and does this in practice as well - and some
|
||||
# applications assume these numbers are always there (even if 0).
|
||||
def test_describe_table_provisioned_throughput(test_table):
|
||||
got = test_table.meta.client.describe_table(TableName=test_table.name)['Table']
|
||||
assert got['ProvisionedThroughput']['NumberOfDecreasesToday'] == 0
|
||||
|
||||
@@ -168,7 +168,6 @@ def test_gsi_empty_value(test_table_gsi_2):
|
||||
test_table_gsi_2.put_item(Item={'p': random_string(), 'x': ''})
|
||||
|
||||
# Verify that a GSI is correctly listed in describe_table
|
||||
@pytest.mark.xfail(reason="DescribeTable for GSI misses IndexSizeBytes, ItemCount, Projection, IndexStatus")
|
||||
def test_gsi_describe(test_table_gsi_1):
|
||||
desc = test_table_gsi_1.meta.client.describe_table(TableName=test_table_gsi_1.name)
|
||||
assert 'Table' in desc
|
||||
@@ -177,16 +176,26 @@ def test_gsi_describe(test_table_gsi_1):
|
||||
assert len(gsis) == 1
|
||||
gsi = gsis[0]
|
||||
assert gsi['IndexName'] == 'hello'
|
||||
assert 'IndexSizeBytes' in gsi # actual size depends on content
|
||||
assert 'ItemCount' in gsi
|
||||
assert gsi['Projection'] == {'ProjectionType': 'ALL'}
|
||||
assert gsi['IndexStatus'] == 'ACTIVE'
|
||||
assert gsi['KeySchema'] == [{'KeyType': 'HASH', 'AttributeName': 'c'},
|
||||
{'KeyType': 'RANGE', 'AttributeName': 'p'}]
|
||||
# The index's ARN should look like the table's ARN followed by /index/<indexname>.
|
||||
assert gsi['IndexArn'] == desc['Table']['TableArn'] + '/index/hello'
|
||||
# TODO: check also ProvisionedThroughput
|
||||
|
||||
# In addition to the basic listing of an GSI in DescribeTable tested above,
|
||||
# in this test we check additional fields that should appear in each GSI's
|
||||
# description.
|
||||
@pytest.mark.xfail(reason="issues #7550, #11466, #11471")
|
||||
def test_gsi_describe_fields(test_table_gsi_1):
|
||||
desc = test_table_gsi_1.meta.client.describe_table(TableName=test_table_gsi_1.name)
|
||||
gsis = desc['Table']['GlobalSecondaryIndexes']
|
||||
assert len(gsis) == 1
|
||||
gsi = gsis[0]
|
||||
assert 'IndexSizeBytes' in gsi # actual size depends on content
|
||||
assert 'ItemCount' in gsi
|
||||
assert gsi['IndexStatus'] == 'ACTIVE'
|
||||
|
||||
# When a GSI's key includes an attribute not in the base table's key, we
|
||||
# need to remember to add its type to AttributeDefinitions.
|
||||
def test_gsi_missing_attribute_definition(dynamodb):
|
||||
@@ -427,6 +436,126 @@ def test_gsi_update_second_regular_base_column(test_table_gsi_3):
|
||||
KeyConditions={'a': {'AttributeValueList': [items[3]['a']], 'ComparisonOperator': 'EQ'},
|
||||
'b': {'AttributeValueList': [items[3]['b']], 'ComparisonOperator': 'EQ'}})
|
||||
|
||||
# Test reproducing issue #11801: In issue #5006 we noticed that in the special
|
||||
# case of a GSI with with two non-key attributes as keys (test_table_gsi_3),
|
||||
# an update of the second attribute forgot to delete the old row. We fixed
|
||||
# that bug, but a bug remained for updates which update the value to the *same*
|
||||
# value - in that case the old row shouldn't be deleted, but we did - as
|
||||
# noticed in issue #11801.
|
||||
def test_11801(test_table_gsi_3):
|
||||
p = random_string()
|
||||
a = random_string()
|
||||
b = random_string()
|
||||
item = {'p': p, 'a': a, 'b': b, 'd': random_string()}
|
||||
test_table_gsi_3.put_item(Item=item)
|
||||
assert_index_query(test_table_gsi_3, 'hello', [item],
|
||||
KeyConditions={'a': {'AttributeValueList': [a], 'ComparisonOperator': 'EQ'},
|
||||
'b': {'AttributeValueList': [b], 'ComparisonOperator': 'EQ'}})
|
||||
# Update the attribute 'b' to the same value b that it already had.
|
||||
# This shouldn't change anything in the base table or in the GSI
|
||||
test_table_gsi_3.update_item(Key={'p': p}, AttributeUpdates={'b': {'Value': b, 'Action': 'PUT'}})
|
||||
assert item == test_table_gsi_3.get_item(Key={'p': p}, ConsistentRead=True)['Item']
|
||||
# In issue #11801, the following assertion failed (the view row was
|
||||
# deleted and nothing matched the query).
|
||||
assert_index_query(test_table_gsi_3, 'hello', [item],
|
||||
KeyConditions={'a': {'AttributeValueList': [a], 'ComparisonOperator': 'EQ'},
|
||||
'b': {'AttributeValueList': [b], 'ComparisonOperator': 'EQ'}})
|
||||
# Above we checked that setting 'b' to the same value didn't remove
|
||||
# the old GSI row. But the same update may actually modify the GSI row
|
||||
# (e.g., an unrelated attribute d) - check this modification took place:
|
||||
item['d'] = random_string()
|
||||
test_table_gsi_3.update_item(Key={'p': p},
|
||||
AttributeUpdates={'b': {'Value': b, 'Action': 'PUT'},
|
||||
'd': {'Value': item['d'], 'Action': 'PUT'}})
|
||||
assert item == test_table_gsi_3.get_item(Key={'p': p}, ConsistentRead=True)['Item']
|
||||
assert_index_query(test_table_gsi_3, 'hello', [item],
|
||||
KeyConditions={'a': {'AttributeValueList': [a], 'ComparisonOperator': 'EQ'},
|
||||
'b': {'AttributeValueList': [b], 'ComparisonOperator': 'EQ'}})
|
||||
|
||||
# This test is the same as test_11801, but updating the first attribute (a)
|
||||
# instead of the second (b). This test didn't fail, showing that issue #11801
|
||||
# is - like #5006 - specific to the case of updating the second attribute.
|
||||
def test_11801_variant1(test_table_gsi_3):
|
||||
p = random_string()
|
||||
a = random_string()
|
||||
b = random_string()
|
||||
d = random_string()
|
||||
item = {'p': p, 'a': a, 'b': b, 'd': d}
|
||||
test_table_gsi_3.put_item(Item=item)
|
||||
assert_index_query(test_table_gsi_3, 'hello', [item],
|
||||
KeyConditions={'a': {'AttributeValueList': [a], 'ComparisonOperator': 'EQ'},
|
||||
'b': {'AttributeValueList': [b], 'ComparisonOperator': 'EQ'}})
|
||||
test_table_gsi_3.update_item(Key={'p': p}, AttributeUpdates={'a': {'Value': a, 'Action': 'PUT'}})
|
||||
assert_index_query(test_table_gsi_3, 'hello', [item],
|
||||
KeyConditions={'a': {'AttributeValueList': [a], 'ComparisonOperator': 'EQ'},
|
||||
'b': {'AttributeValueList': [b], 'ComparisonOperator': 'EQ'}})
|
||||
|
||||
# This test is the same as test_11801, but updates b to a different value
|
||||
# (newb) instead of to the same one. This test didn't fail, showing that
|
||||
# issue #11801 is specific to updates to the same value. This test basically
|
||||
# reproduces the already-fixed #5006 (we also have another test above which
|
||||
# reproduces that issue - test_gsi_update_second_regular_base_column())
|
||||
def test_11801_variant2(test_table_gsi_3):
|
||||
p = random_string()
|
||||
a = random_string()
|
||||
b = random_string()
|
||||
item = {'p': p, 'a': a, 'b': b, 'd': random_string()}
|
||||
test_table_gsi_3.put_item(Item=item)
|
||||
assert_index_query(test_table_gsi_3, 'hello', [item],
|
||||
KeyConditions={'a': {'AttributeValueList': [a], 'ComparisonOperator': 'EQ'},
|
||||
'b': {'AttributeValueList': [b], 'ComparisonOperator': 'EQ'}})
|
||||
newb = random_string()
|
||||
item['b'] = newb
|
||||
test_table_gsi_3.update_item(Key={'p': p}, AttributeUpdates={'b': {'Value': newb, 'Action': 'PUT'}})
|
||||
assert_index_query(test_table_gsi_3, 'hello', [],
|
||||
KeyConditions={'a': {'AttributeValueList': [a], 'ComparisonOperator': 'EQ'},
|
||||
'b': {'AttributeValueList': [b], 'ComparisonOperator': 'EQ'}})
|
||||
assert_index_query(test_table_gsi_3, 'hello', [item],
|
||||
KeyConditions={'a': {'AttributeValueList': [a], 'ComparisonOperator': 'EQ'},
|
||||
'b': {'AttributeValueList': [newb], 'ComparisonOperator': 'EQ'}})
|
||||
|
||||
# This test is the same as test_11801, but uses a different table schema
|
||||
# (test_table_gsi_5) where there is only one new key column in the view (x).
|
||||
# This test passed, showing that issue #11801 was specific to the special
|
||||
# case of a view with two new key columns (test_table_gsi_3).
|
||||
def test_11801_variant3(test_table_gsi_5):
|
||||
p = random_string()
|
||||
c = random_string()
|
||||
x = random_string()
|
||||
item = {'p': p, 'c': c, 'x': x, 'd': random_string()}
|
||||
test_table_gsi_5.put_item(Item=item)
|
||||
assert_index_query(test_table_gsi_5, 'hello', [item],
|
||||
KeyConditions={'p': {'AttributeValueList': [p], 'ComparisonOperator': 'EQ'},
|
||||
'x': {'AttributeValueList': [x], 'ComparisonOperator': 'EQ'}})
|
||||
test_table_gsi_5.update_item(Key={'p': p, 'c': c}, AttributeUpdates={'x': {'Value': x, 'Action': 'PUT'}})
|
||||
assert item == test_table_gsi_5.get_item(Key={'p': p, 'c': c}, ConsistentRead=True)['Item']
|
||||
assert_index_query(test_table_gsi_5, 'hello', [item],
|
||||
KeyConditions={'p': {'AttributeValueList': [p], 'ComparisonOperator': 'EQ'},
|
||||
'x': {'AttributeValueList': [x], 'ComparisonOperator': 'EQ'}})
|
||||
|
||||
# Another test similar to test_11801, but instead of updating a view key
|
||||
# column to the same value it already has, simply don't update it at all
|
||||
# (and just modify some other regular column). This test passed, showing
|
||||
# that issue #11801 is specific to the case of updating a view key column
|
||||
# to the same value it already had.
|
||||
def test_11801_variant4(test_table_gsi_3):
|
||||
p = random_string()
|
||||
a = random_string()
|
||||
b = random_string()
|
||||
item = {'p': p, 'a': a, 'b': b, 'd': random_string()}
|
||||
test_table_gsi_3.put_item(Item=item)
|
||||
assert_index_query(test_table_gsi_3, 'hello', [item],
|
||||
KeyConditions={'a': {'AttributeValueList': [a], 'ComparisonOperator': 'EQ'},
|
||||
'b': {'AttributeValueList': [b], 'ComparisonOperator': 'EQ'}})
|
||||
# An update that doesn't change the GSI keys (a or b), just a regular
|
||||
# column d.
|
||||
item['d'] = random_string()
|
||||
test_table_gsi_3.update_item(Key={'p': p}, AttributeUpdates={'d': {'Value': item['d'], 'Action': 'PUT'}})
|
||||
assert item == test_table_gsi_3.get_item(Key={'p': p}, ConsistentRead=True)['Item']
|
||||
assert_index_query(test_table_gsi_3, 'hello', [item],
|
||||
KeyConditions={'a': {'AttributeValueList': [a], 'ComparisonOperator': 'EQ'},
|
||||
'b': {'AttributeValueList': [b], 'ComparisonOperator': 'EQ'}})
|
||||
|
||||
# Test that when a table has a GSI, if the indexed attribute is missing, the
|
||||
# item is added to the base table but not the index.
|
||||
# This is the same feature we already tested in test_gsi_missing_attribute()
|
||||
|
||||
@@ -242,8 +242,32 @@ def test_lsi_describe(test_table_lsi_4):
|
||||
assert(sorted([lsi['IndexName'] for lsi in lsis]) == ['hello_x1', 'hello_x2', 'hello_x3', 'hello_x4'])
|
||||
for lsi in lsis:
|
||||
assert lsi['IndexArn'] == desc['Table']['TableArn'] + '/index/' + lsi['IndexName']
|
||||
# TODO: check projection and key params
|
||||
# TODO: check also ProvisionedThroughput
|
||||
assert lsi['Projection'] == {'ProjectionType': 'ALL'}
|
||||
|
||||
# In addition to the basic listing of an LSI in DescribeTable tested above,
|
||||
# in this test we check additional fields that should appear in each LSI's
|
||||
# description.
|
||||
# Note that whereas GSIs also have IndexStatus and ProvisionedThroughput
|
||||
# fields, LSIs do not. IndexStatus is not needed because LSIs cannot be
|
||||
# added after the base table is created, and ProvisionedThroughput isn't
|
||||
# needed because an LSI shares its provisioning with the base table.
|
||||
@pytest.mark.xfail(reason="issues #7550, #11466")
|
||||
def test_lsi_describe_fields(test_table_lsi_1):
|
||||
desc = test_table_lsi_1.meta.client.describe_table(TableName=test_table_lsi_1.name)
|
||||
assert 'Table' in desc
|
||||
assert 'LocalSecondaryIndexes' in desc['Table']
|
||||
lsis = desc['Table']['LocalSecondaryIndexes']
|
||||
assert len(lsis) == 1
|
||||
lsi = lsis[0]
|
||||
assert lsi['IndexName'] == 'hello'
|
||||
assert 'IndexSizeBytes' in lsi # actual size depends on content
|
||||
assert 'ItemCount' in lsi
|
||||
assert not 'IndexStatus' in lsi
|
||||
assert not 'ProvisionedThroughput' in lsi
|
||||
assert lsi['KeySchema'] == [{'KeyType': 'HASH', 'AttributeName': 'p'},
|
||||
{'KeyType': 'RANGE', 'AttributeName': 'b'}]
|
||||
# The index's ARN should look like the table's ARN followed by /index/<indexname>.
|
||||
assert lsi['IndexArn'] == desc['Table']['TableArn'] + '/index/hello'
|
||||
|
||||
# A table with selective projection - only keys are projected into the index
|
||||
@pytest.fixture(scope="module")
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
#include <deque>
|
||||
#include <random>
|
||||
#include "utils/chunked_vector.hh"
|
||||
#include "utils/amortized_reserve.hh"
|
||||
|
||||
#include <boost/range/algorithm/sort.hpp>
|
||||
#include <boost/range/algorithm/equal.hpp>
|
||||
@@ -207,3 +208,37 @@ BOOST_AUTO_TEST_CASE(test_shrinking_and_expansion_involving_chunk_boundary) {
|
||||
v.emplace_back(std::make_unique<uint64_t>(i));
|
||||
}
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(test_amoritzed_reserve) {
|
||||
utils::chunked_vector<int> v;
|
||||
|
||||
v.reserve(10);
|
||||
amortized_reserve(v, 1);
|
||||
BOOST_REQUIRE_EQUAL(v.capacity(), 10);
|
||||
BOOST_REQUIRE_EQUAL(v.size(), 0);
|
||||
|
||||
v = {};
|
||||
amortized_reserve(v, 1);
|
||||
BOOST_REQUIRE_EQUAL(v.capacity(), 1);
|
||||
BOOST_REQUIRE_EQUAL(v.size(), 0);
|
||||
|
||||
v = {};
|
||||
amortized_reserve(v, 1);
|
||||
BOOST_REQUIRE_EQUAL(v.capacity(), 1);
|
||||
amortized_reserve(v, 2);
|
||||
BOOST_REQUIRE_EQUAL(v.capacity(), 2);
|
||||
amortized_reserve(v, 3);
|
||||
BOOST_REQUIRE_EQUAL(v.capacity(), 4);
|
||||
amortized_reserve(v, 4);
|
||||
BOOST_REQUIRE_EQUAL(v.capacity(), 4);
|
||||
amortized_reserve(v, 5);
|
||||
BOOST_REQUIRE_EQUAL(v.capacity(), 8);
|
||||
amortized_reserve(v, 6);
|
||||
BOOST_REQUIRE_EQUAL(v.capacity(), 8);
|
||||
amortized_reserve(v, 7);
|
||||
BOOST_REQUIRE_EQUAL(v.capacity(), 8);
|
||||
amortized_reserve(v, 7);
|
||||
BOOST_REQUIRE_EQUAL(v.capacity(), 8);
|
||||
amortized_reserve(v, 1);
|
||||
BOOST_REQUIRE_EQUAL(v.capacity(), 8);
|
||||
}
|
||||
|
||||
@@ -1852,6 +1852,29 @@ SEASTAR_TEST_CASE(test_continuity_merging_of_complete_mutations) {
|
||||
return make_ready_future<>();
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_commutativity_and_associativity) {
|
||||
random_mutation_generator gen(random_mutation_generator::generate_counters::no);
|
||||
gen.set_key_cardinality(7);
|
||||
|
||||
for (int i = 0; i < 10; ++i) {
|
||||
mutation m1 = gen();
|
||||
m1.partition().make_fully_continuous();
|
||||
mutation m2 = gen();
|
||||
m2.partition().make_fully_continuous();
|
||||
mutation m3 = gen();
|
||||
m3.partition().make_fully_continuous();
|
||||
|
||||
assert_that(m1 + m2 + m3)
|
||||
.is_equal_to(m1 + m3 + m2)
|
||||
.is_equal_to(m2 + m1 + m3)
|
||||
.is_equal_to(m2 + m3 + m1)
|
||||
.is_equal_to(m3 + m1 + m2)
|
||||
.is_equal_to(m3 + m2 + m1);
|
||||
}
|
||||
|
||||
return make_ready_future<>();
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_continuity_merging) {
|
||||
return seastar::async([] {
|
||||
simple_schema table;
|
||||
|
||||
@@ -641,7 +641,7 @@ SEASTAR_TEST_CASE(test_apply_to_incomplete_respects_continuity) {
|
||||
static mutation_partition read_using_cursor(partition_snapshot& snap) {
|
||||
tests::reader_concurrency_semaphore_wrapper semaphore;
|
||||
partition_snapshot_row_cursor cur(*snap.schema(), snap);
|
||||
cur.maybe_refresh();
|
||||
cur.advance_to(position_in_partition::before_all_clustered_rows());
|
||||
auto mp = read_partition_from(*snap.schema(), cur);
|
||||
for (auto&& rt : snap.range_tombstones()) {
|
||||
mp.apply_delete(*snap.schema(), rt);
|
||||
|
||||
@@ -327,11 +327,6 @@ public:
|
||||
return *this;
|
||||
}
|
||||
|
||||
test_querier_cache& evict_all_for_table() {
|
||||
_cache.evict_all_for_table(get_schema()->id()).get();
|
||||
return *this;
|
||||
}
|
||||
|
||||
test_querier_cache& no_misses() {
|
||||
BOOST_REQUIRE_EQUAL(_cache.get_stats().misses, _expected_stats.misses);
|
||||
return *this;
|
||||
@@ -727,21 +722,6 @@ SEASTAR_THREAD_TEST_CASE(test_resources_based_cache_eviction) {
|
||||
}, std::move(db_cfg_ptr)).get();
|
||||
}
|
||||
|
||||
SEASTAR_THREAD_TEST_CASE(test_evict_all_for_table) {
|
||||
test_querier_cache t;
|
||||
|
||||
const auto entry = t.produce_first_page_and_save_mutation_querier();
|
||||
|
||||
t.evict_all_for_table();
|
||||
t.assert_cache_lookup_mutation_querier(entry.key, *t.get_schema(), entry.expected_range, entry.expected_slice)
|
||||
.misses()
|
||||
.no_drops()
|
||||
.no_evictions();
|
||||
|
||||
// Check that the querier was removed from the semaphore too.
|
||||
BOOST_CHECK(!t.get_semaphore().try_evict_one_inactive_read());
|
||||
}
|
||||
|
||||
SEASTAR_THREAD_TEST_CASE(test_immediate_evict_on_insert) {
|
||||
test_querier_cache t;
|
||||
|
||||
|
||||
@@ -10,6 +10,7 @@
|
||||
#include "test/lib/simple_schema.hh"
|
||||
#include "test/lib/eventually.hh"
|
||||
#include "test/lib/random_utils.hh"
|
||||
#include "test/lib/random_schema.hh"
|
||||
|
||||
#include <seastar/core/coroutine.hh>
|
||||
#include <seastar/testing/test_case.hh>
|
||||
@@ -915,3 +916,44 @@ SEASTAR_THREAD_TEST_CASE(test_reader_concurrency_semaphore_used_blocked) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
SEASTAR_THREAD_TEST_CASE(test_reader_concurrency_semaphore_evict_inactive_reads_for_table) {
|
||||
auto spec = tests::make_random_schema_specification(get_name());
|
||||
|
||||
std::list<tests::random_schema> schemas;
|
||||
std::unordered_map<tests::random_schema*, std::vector<reader_concurrency_semaphore::inactive_read_handle>> schema_handles;
|
||||
for (unsigned i = 0; i < 4; ++i) {
|
||||
auto& s = schemas.emplace_back(tests::random_schema(i, *spec));
|
||||
schema_handles.emplace(&s, std::vector<reader_concurrency_semaphore::inactive_read_handle>{});
|
||||
}
|
||||
|
||||
reader_concurrency_semaphore semaphore(reader_concurrency_semaphore::no_limits{}, get_name());
|
||||
auto stop_sem = deferred_stop(semaphore);
|
||||
|
||||
for (auto& s : schemas) {
|
||||
auto& handles = schema_handles[&s];
|
||||
for (int i = 0; i < 10; ++i) {
|
||||
handles.emplace_back(semaphore.register_inactive_read(make_empty_flat_reader_v2(s.schema(), semaphore.make_tracking_only_permit(s.schema().get(), get_name(), db::no_timeout))));
|
||||
}
|
||||
}
|
||||
|
||||
for (auto& s : schemas) {
|
||||
auto& handles = schema_handles[&s];
|
||||
BOOST_REQUIRE(std::all_of(handles.begin(), handles.end(), [] (const reader_concurrency_semaphore::inactive_read_handle& handle) { return bool(handle); }));
|
||||
}
|
||||
|
||||
for (auto& s : schemas) {
|
||||
auto& handles = schema_handles[&s];
|
||||
BOOST_REQUIRE(std::all_of(handles.begin(), handles.end(), [] (const reader_concurrency_semaphore::inactive_read_handle& handle) { return bool(handle); }));
|
||||
semaphore.evict_inactive_reads_for_table(s.schema()->id()).get();
|
||||
for (const auto& [k, v] : schema_handles) {
|
||||
if (k == &s) {
|
||||
BOOST_REQUIRE(std::all_of(v.begin(), v.end(), [] (const reader_concurrency_semaphore::inactive_read_handle& handle) { return !bool(handle); }));
|
||||
} else if (!v.empty()) {
|
||||
BOOST_REQUIRE(std::all_of(v.begin(), v.end(), [] (const reader_concurrency_semaphore::inactive_read_handle& handle) { return bool(handle); }));
|
||||
}
|
||||
}
|
||||
handles.clear();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1235,9 +1235,13 @@ SEASTAR_TEST_CASE(test_update_failure) {
|
||||
class throttle {
|
||||
unsigned _block_counter = 0;
|
||||
promise<> _p; // valid when _block_counter != 0, resolves when goes down to 0
|
||||
std::optional<promise<>> _entered;
|
||||
bool _one_shot;
|
||||
public:
|
||||
// one_shot means whether only the first enter() after block() will block.
|
||||
throttle(bool one_shot = false) : _one_shot(one_shot) {}
|
||||
future<> enter() {
|
||||
if (_block_counter) {
|
||||
if (_block_counter && (!_one_shot || _entered)) {
|
||||
promise<> p1;
|
||||
promise<> p2;
|
||||
|
||||
@@ -1249,16 +1253,21 @@ public:
|
||||
p3.set_value();
|
||||
});
|
||||
_p = std::move(p2);
|
||||
|
||||
if (_entered) {
|
||||
_entered->set_value();
|
||||
_entered.reset();
|
||||
}
|
||||
return f1;
|
||||
} else {
|
||||
return make_ready_future<>();
|
||||
}
|
||||
}
|
||||
|
||||
void block() {
|
||||
future<> block() {
|
||||
++_block_counter;
|
||||
_p = promise<>();
|
||||
_entered = promise<>();
|
||||
return _entered->get_future();
|
||||
}
|
||||
|
||||
void unblock() {
|
||||
@@ -1402,7 +1411,7 @@ SEASTAR_TEST_CASE(test_cache_population_and_update_race) {
|
||||
mt2->apply(m);
|
||||
}
|
||||
|
||||
thr.block();
|
||||
auto f = thr.block();
|
||||
|
||||
auto m0_range = dht::partition_range::make_singular(ring[0].ring_position());
|
||||
auto rd1 = cache.make_reader(s, semaphore.make_permit(), m0_range);
|
||||
@@ -1413,6 +1422,7 @@ SEASTAR_TEST_CASE(test_cache_population_and_update_race) {
|
||||
rd2.set_max_buffer_size(1);
|
||||
auto rd2_fill_buffer = rd2.fill_buffer();
|
||||
|
||||
f.get();
|
||||
sleep(10ms).get();
|
||||
|
||||
// This update should miss on all partitions
|
||||
@@ -1540,12 +1550,13 @@ SEASTAR_TEST_CASE(test_cache_population_and_clear_race) {
|
||||
mt2->apply(m);
|
||||
}
|
||||
|
||||
thr.block();
|
||||
auto f = thr.block();
|
||||
|
||||
auto rd1 = cache.make_reader(s, semaphore.make_permit());
|
||||
rd1.set_max_buffer_size(1);
|
||||
auto rd1_fill_buffer = rd1.fill_buffer();
|
||||
|
||||
f.get();
|
||||
sleep(10ms).get();
|
||||
|
||||
// This update should miss on all partitions
|
||||
@@ -3341,6 +3352,7 @@ SEASTAR_TEST_CASE(test_tombstone_merging_of_overlapping_tombstones_in_many_versi
|
||||
SEASTAR_TEST_CASE(test_concurrent_reads_and_eviction) {
|
||||
return seastar::async([] {
|
||||
random_mutation_generator gen(random_mutation_generator::generate_counters::no);
|
||||
gen.set_key_cardinality(16);
|
||||
memtable_snapshot_source underlying(gen.schema());
|
||||
schema_ptr s = gen.schema();
|
||||
schema_ptr rev_s = s->make_reversed();
|
||||
@@ -3994,3 +4006,81 @@ SEASTAR_TEST_CASE(row_cache_is_populated_using_compacting_sstable_reader) {
|
||||
BOOST_ASSERT(rt.calculate_size() == 1);
|
||||
});
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_eviction_of_upper_bound_of_population_range) {
|
||||
return seastar::async([] {
|
||||
simple_schema s;
|
||||
tests::reader_concurrency_semaphore_wrapper semaphore;
|
||||
auto cache_mt = make_lw_shared<replica::memtable>(s.schema());
|
||||
|
||||
auto pkey = s.make_pkey("pk");
|
||||
|
||||
mutation m1(s.schema(), pkey);
|
||||
s.add_row(m1, s.make_ckey(1), "v1");
|
||||
s.add_row(m1, s.make_ckey(2), "v2");
|
||||
cache_mt->apply(m1);
|
||||
|
||||
cache_tracker tracker;
|
||||
throttle thr(true);
|
||||
auto cache_source = make_decorated_snapshot_source(snapshot_source([&] { return cache_mt->as_data_source(); }),
|
||||
[&] (mutation_source src) {
|
||||
return throttled_mutation_source(thr, std::move(src));
|
||||
});
|
||||
row_cache cache(s.schema(), cache_source, tracker);
|
||||
|
||||
auto pr = dht::partition_range::make_singular(pkey);
|
||||
|
||||
auto read = [&] (int start, int end) {
|
||||
auto slice = partition_slice_builder(*s.schema())
|
||||
.with_range(query::clustering_range::make(s.make_ckey(start), s.make_ckey(end)))
|
||||
.build();
|
||||
auto rd = cache.make_reader(s.schema(), semaphore.make_permit(), pr, slice);
|
||||
auto close_rd = deferred_close(rd);
|
||||
auto m_cache = read_mutation_from_flat_mutation_reader(rd).get0();
|
||||
close_rd.close_now();
|
||||
rd = cache_mt->make_flat_reader(s.schema(), semaphore.make_permit(), pr, slice);
|
||||
auto close_rd2 = deferred_close(rd);
|
||||
auto m_mt = read_mutation_from_flat_mutation_reader(rd).get0();
|
||||
BOOST_REQUIRE(m_mt);
|
||||
assert_that(m_cache).has_mutation().is_equal_to(*m_mt);
|
||||
};
|
||||
|
||||
// populate [2]
|
||||
{
|
||||
auto slice = partition_slice_builder(*s.schema())
|
||||
.with_range(query::clustering_range::make_singular(s.make_ckey(2)))
|
||||
.build();
|
||||
assert_that(cache.make_reader(s.schema(), semaphore.make_permit(), pr, slice))
|
||||
.has_monotonic_positions();
|
||||
}
|
||||
|
||||
auto arrived = thr.block();
|
||||
|
||||
// Read [0, 2]
|
||||
auto f = seastar::async([&] {
|
||||
read(0, 2);
|
||||
});
|
||||
|
||||
arrived.get();
|
||||
|
||||
// populate (2, 3]
|
||||
{
|
||||
auto slice = partition_slice_builder(*s.schema())
|
||||
.with_range(query::clustering_range::make(query::clustering_range::bound(s.make_ckey(2), false),
|
||||
query::clustering_range::bound(s.make_ckey(3), true)))
|
||||
.build();
|
||||
assert_that(cache.make_reader(s.schema(), semaphore.make_permit(), pr, slice))
|
||||
.has_monotonic_positions();
|
||||
}
|
||||
|
||||
testlog.trace("Evicting");
|
||||
evict_one_row(tracker); // Evicts before(0)
|
||||
evict_one_row(tracker); // Evicts ck(2)
|
||||
testlog.trace("Unblocking");
|
||||
|
||||
thr.unblock();
|
||||
f.get();
|
||||
|
||||
read(0, 3);
|
||||
});
|
||||
}
|
||||
|
||||
@@ -3149,3 +3149,58 @@ SEASTAR_TEST_CASE(test_index_fast_forwarding_after_eof) {
|
||||
return make_ready_future<>();
|
||||
});
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_crawling_reader_out_of_range_last_range_tombstone_change) {
|
||||
return test_env::do_with_async([] (test_env& env) {
|
||||
simple_schema table;
|
||||
|
||||
auto mut = table.new_mutation("pk0");
|
||||
auto ckeys = table.make_ckeys(4);
|
||||
table.add_row(mut, ckeys[0], "v0");
|
||||
table.add_row(mut, ckeys[1], "v1");
|
||||
table.add_row(mut, ckeys[2], "v2");
|
||||
using bound = query::clustering_range::bound;
|
||||
table.delete_range(mut, query::clustering_range::make(bound{ckeys[3], true}, bound{clustering_key::make_empty(), true}), tombstone(1, gc_clock::now()));
|
||||
|
||||
auto tmp = tmpdir();
|
||||
auto sst_gen = [&env, &table, &tmp] () {
|
||||
return env.make_sstable(table.schema(), tmp.path().string(), 1, sstables::get_highest_sstable_version(), big);
|
||||
};
|
||||
auto sst = make_sstable_containing(sst_gen, {mut});
|
||||
|
||||
assert_that(sst->make_crawling_reader(table.schema(), env.make_reader_permit())).has_monotonic_positions();
|
||||
});
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_crawling_reader_random_schema_random_mutations) {
|
||||
return test_env::do_with_async([this] (test_env& env) {
|
||||
auto random_spec = tests::make_random_schema_specification(
|
||||
get_name(),
|
||||
std::uniform_int_distribution<size_t>(1, 4),
|
||||
std::uniform_int_distribution<size_t>(2, 4),
|
||||
std::uniform_int_distribution<size_t>(2, 8),
|
||||
std::uniform_int_distribution<size_t>(2, 8));
|
||||
auto random_schema = tests::random_schema{tests::random::get_int<uint32_t>(), *random_spec};
|
||||
auto schema = random_schema.schema();
|
||||
|
||||
testlog.info("Random schema:\n{}", random_schema.cql());
|
||||
|
||||
const auto muts = tests::generate_random_mutations(random_schema, 20).get();
|
||||
|
||||
auto tmp = tmpdir();
|
||||
auto sst_gen = [&env, schema, &tmp] () {
|
||||
return env.make_sstable(schema, tmp.path().string(), 1, sstables::get_highest_sstable_version(), big);
|
||||
};
|
||||
auto sst = make_sstable_containing(sst_gen, muts);
|
||||
|
||||
{
|
||||
auto rd = assert_that(sst->make_crawling_reader(schema, env.make_reader_permit()));
|
||||
|
||||
for (const auto& mut : muts) {
|
||||
rd.produces(mut);
|
||||
}
|
||||
}
|
||||
|
||||
assert_that(sst->make_crawling_reader(schema, env.make_reader_permit())).has_monotonic_positions();
|
||||
});
|
||||
}
|
||||
|
||||
@@ -6,6 +6,7 @@ import pytest
|
||||
import rest_api
|
||||
import nodetool
|
||||
from util import new_test_table
|
||||
from cassandra.protocol import ConfigurationException
|
||||
|
||||
# Test inserts `N` rows into table, flushes it
|
||||
# and tries to read `M` non-existing keys.
|
||||
@@ -29,3 +30,27 @@ def test_bloom_filter(scylla_only, cql, test_keyspace, N, M, fp_chance):
|
||||
ratio = fp / M
|
||||
assert ratio >= fp_chance * 0.7 and ratio <= fp_chance * 1.15
|
||||
|
||||
# Test very small bloom_filter_fp_chance settings.
|
||||
# The Cassandra documentation suggests that bloom_filter_fp_chance can be set
|
||||
# to anything between 0 and 1, and the Datastax documentation even goes further
|
||||
# and explains that 0 means "the largest possible Bloom filter".
|
||||
# But in practice, there is a minimal false-positive chance that the Bloom
|
||||
# filter can possibly achieve and Cassandra refuses lower settings (see
|
||||
# CASSANDRA-11920) and Scylla should do the same instead of crashing much
|
||||
# later during a memtable flush as it did in issue #11524.
|
||||
@pytest.mark.parametrize("fp_chance", [1e-5, 0])
|
||||
def test_small_bloom_filter_fp_chance(cql, test_keyspace, fp_chance):
|
||||
with pytest.raises(ConfigurationException):
|
||||
with new_test_table(cql, test_keyspace, 'a int PRIMARY KEY', f'WITH bloom_filter_fp_chance = {fp_chance}') as table:
|
||||
cql.execute(f'INSERT INTO {table} (a) VALUES (1)')
|
||||
# In issue #11524, Scylla used to crash during this flush after the
|
||||
# table creation succeeded above.
|
||||
nodetool.flush(cql, table)
|
||||
|
||||
# Check that bloom_filter_fp_chance outside [0, 1] (i.e., > 1 or < 0)
|
||||
# is, unsurprisingly, forbidden.
|
||||
@pytest.mark.parametrize("fp_chance", [-0.1, 1.1])
|
||||
def test_invalid_bloom_filter_fp_chance(cql, test_keyspace, fp_chance):
|
||||
with pytest.raises(ConfigurationException):
|
||||
with new_test_table(cql, test_keyspace, 'a int PRIMARY KEY', f'WITH bloom_filter_fp_chance = {fp_chance}') as table:
|
||||
pass
|
||||
|
||||
@@ -259,3 +259,146 @@ def test_mv_synchronous_updates(cql, test_keyspace):
|
||||
if wanted_trace2 in event.description:
|
||||
wanted_traces_were_found[1] = True
|
||||
assert all(wanted_traces_were_found)
|
||||
|
||||
# Reproduces #8627:
|
||||
# Whereas regular columns values are limited in size to 2GB, key columns are
|
||||
# limited to 64KB. This means that if a certain column is regular in the base
|
||||
# table but a key in one of its views, we cannot write to this regular column
|
||||
# an over-64KB value. Ideally, such a write should fail cleanly with an
|
||||
# InvalidQuery.
|
||||
# But today, neither Cassandra nor Scylla does this correctly. Both do not
|
||||
# detect the problem at the coordinator level, and both send the writes to the
|
||||
# replicas and fail the view update in each replica. The user's write may or
|
||||
# may not fail depending on whether the view update is done synchronously
|
||||
# (Scylla, sometimes) or asynchrhonously (Casandra). But even in the failure
|
||||
# case the failure does not explain why the replica writes failed - the only
|
||||
# message about a key being too long appears in the log.
|
||||
# Note that the same issue also applies to secondary indexes, and this is
|
||||
# tested in test_secondary_index.py.
|
||||
@pytest.mark.xfail(reason="issue #8627")
|
||||
def test_oversized_base_regular_view_key(cql, test_keyspace, cassandra_bug):
|
||||
with new_test_table(cql, test_keyspace, 'p int primary key, v text') as table:
|
||||
with new_materialized_view(cql, table, select='*', pk='v,p', where='v is not null and p is not null') as mv:
|
||||
big = 'x'*66536
|
||||
with pytest.raises(InvalidRequest, match='size'):
|
||||
cql.execute(f"INSERT INTO {table}(p,v) VALUES (1,'{big}')")
|
||||
# Ideally, the entire write operation should be considered
|
||||
# invalid, and no part of it will be done. In particular, the
|
||||
# base write will also not happen.
|
||||
assert [] == list(cql.execute(f"SELECT * FROM {table} WHERE p=1"))
|
||||
|
||||
# Reproduces #8627:
|
||||
# Same as test_oversized_base_regular_view_key above, just check *view
|
||||
# building*- i.e., pre-existing data in the base table that needs to be
|
||||
# copied to the view. The view building cannot return an error to the user,
|
||||
# but we do expect it to skip the problematic row and continue to complete
|
||||
# the rest of the vew build.
|
||||
@pytest.mark.xfail(reason="issue #8627")
|
||||
# This test currently breaks the build (it repeats a failing build step,
|
||||
# and never complete) and we cannot quickly recognize this failure, so
|
||||
# to avoid a very slow failure, we currently "skip" this test.
|
||||
@pytest.mark.skip(reason="issue #8627, fails very slow")
|
||||
def test_oversized_base_regular_view_key_build(cql, test_keyspace, cassandra_bug):
|
||||
with new_test_table(cql, test_keyspace, 'p int primary key, v text') as table:
|
||||
# No materialized view yet - a "big" value in v is perfectly fine:
|
||||
stmt = cql.prepare(f'INSERT INTO {table} (p,v) VALUES (?, ?)')
|
||||
for i in range(30):
|
||||
cql.execute(stmt, [i, str(i)])
|
||||
big = 'x'*66536
|
||||
cql.execute(stmt, [30, big])
|
||||
assert [(30,big)] == list(cql.execute(f'SELECT * FROM {table} WHERE p=30'))
|
||||
# Add a materialized view with v as the new key. The view build,
|
||||
# copying data from the base table to the view, should start promptly.
|
||||
with new_materialized_view(cql, table, select='*', pk='v,p', where='v is not null and p is not null') as mv:
|
||||
# If Scylla's view builder hangs or stops, there is no way to
|
||||
# tell this state apart from a view build that simply hasn't
|
||||
# completed yet (besides looking at the logs, which we don't).
|
||||
# This means, unfortunately, that a failure of this test is slow -
|
||||
# it needs to wait for a timeout.
|
||||
start_time = time.time()
|
||||
while time.time() < start_time + 30:
|
||||
results = set(list(cql.execute(f'SELECT * from {mv}')))
|
||||
# The oversized "big" cannot be a key in the view, so
|
||||
# shouldn't be in results:
|
||||
assert not (big, 30) in results
|
||||
print(results)
|
||||
# The rest of the items in the base table should be in
|
||||
# the view:
|
||||
if results == {(str(i), i) for i in range(30)}:
|
||||
break
|
||||
time.sleep(0.1)
|
||||
assert results == {(str(i), i) for i in range(30)}
|
||||
|
||||
# Reproduces #11668
|
||||
# When the view builder resumes building a partition, it reuses the reader
|
||||
# used from the previous step but re-creates the compactor. This means that any
|
||||
# range tombstone changes active at the time of suspending the step, have to be
|
||||
# explicitly re-opened on when resuming. Without that, already deleted base rows
|
||||
# can be resurrected as demonstrated by this test.
|
||||
# The view-builder suspends processing a base-table after
|
||||
# `view_builder::batch_size` (that is 128) rows. So in this test we create a
|
||||
# table which has at least 2X that many rows and add a range tombstone so that
|
||||
# it covers half of the rows (even rows are covered why odd rows aren't).
|
||||
def test_view_builder_suspend_with_active_range_tombstone(cql, test_keyspace, scylla_only):
|
||||
with new_test_table(cql, test_keyspace, "pk int, ck int, v int, PRIMARY KEY(pk, ck)", "WITH compaction = {'class': 'NullCompactionStrategy'}") as table:
|
||||
stmt = cql.prepare(f'INSERT INTO {table} (pk, ck, v) VALUES (?, ?, ?)')
|
||||
|
||||
# sstable 1 - even rows
|
||||
for ck in range(0, 512, 2):
|
||||
cql.execute(stmt, (0, ck, ck))
|
||||
nodetool.flush(cql, table)
|
||||
|
||||
# sstable 2 - odd rows and a range tombstone covering even rows
|
||||
# we need two sstables so memtable doesn't compact away the shadowed rows
|
||||
cql.execute(f"DELETE FROM {table} WHERE pk = 0 AND ck >= 0 AND ck < 512")
|
||||
for ck in range(1, 512, 2):
|
||||
cql.execute(stmt, (0, ck, ck))
|
||||
nodetool.flush(cql, table)
|
||||
|
||||
# we should not see any even rows here - they are covered by the range tombstone
|
||||
res = [r.ck for r in cql.execute(f"SELECT ck FROM {table} WHERE pk = 0")]
|
||||
assert res == list(range(1, 512, 2))
|
||||
|
||||
with new_materialized_view(cql, table, select='*', pk='v,pk,ck', where='v is not null and pk is not null and ck is not null') as mv:
|
||||
start_time = time.time()
|
||||
while time.time() < start_time + 30:
|
||||
res = sorted([r.v for r in cql.execute(f"SELECT * FROM {mv}")])
|
||||
if len(res) >= 512/2:
|
||||
break
|
||||
time.sleep(0.1)
|
||||
# again, we should not see any even rows in the materialized-view,
|
||||
# they are covered with a range tombstone in the base-table
|
||||
assert res == list(range(1, 512, 2))
|
||||
|
||||
# A variant of the above using a partition-tombstone, which is also lost similar
|
||||
# to range tombstones.
|
||||
def test_view_builder_suspend_with_partition_tombstone(cql, test_keyspace, scylla_only):
|
||||
with new_test_table(cql, test_keyspace, "pk int, ck int, v int, PRIMARY KEY(pk, ck)", "WITH compaction = {'class': 'NullCompactionStrategy'}") as table:
|
||||
stmt = cql.prepare(f'INSERT INTO {table} (pk, ck, v) VALUES (?, ?, ?)')
|
||||
|
||||
# sstable 1 - even rows
|
||||
for ck in range(0, 512, 2):
|
||||
cql.execute(stmt, (0, ck, ck))
|
||||
nodetool.flush(cql, table)
|
||||
|
||||
# sstable 2 - odd rows and a partition covering even rows
|
||||
# we need two sstables so memtable doesn't compact away the shadowed rows
|
||||
cql.execute(f"DELETE FROM {table} WHERE pk = 0")
|
||||
for ck in range(1, 512, 2):
|
||||
cql.execute(stmt, (0, ck, ck))
|
||||
nodetool.flush(cql, table)
|
||||
|
||||
# we should not see any even rows here - they are covered by the partition tombstone
|
||||
res = [r.ck for r in cql.execute(f"SELECT ck FROM {table} WHERE pk = 0")]
|
||||
assert res == list(range(1, 512, 2))
|
||||
|
||||
with new_materialized_view(cql, table, select='*', pk='v,pk,ck', where='v is not null and pk is not null and ck is not null') as mv:
|
||||
start_time = time.time()
|
||||
while time.time() < start_time + 30:
|
||||
res = sorted([r.v for r in cql.execute(f"SELECT * FROM {mv}")])
|
||||
if len(res) >= 512/2:
|
||||
break
|
||||
time.sleep(0.1)
|
||||
# again, we should not see any even rows in the materialized-view,
|
||||
# they are covered with a partition tombstone in the base-table
|
||||
assert res == list(range(1, 512, 2))
|
||||
|
||||
@@ -2013,6 +2013,11 @@ public:
|
||||
_blobs = boost::copy_range<std::vector<bytes>>(keys | boost::adaptors::transformed([this] (sstring& k) { return to_bytes(k); }));
|
||||
}
|
||||
|
||||
void set_key_cardinality(size_t n_keys) {
|
||||
assert(n_keys <= n_blobs);
|
||||
_ck_index_dist = std::uniform_int_distribution<size_t>{0, n_keys - 1};
|
||||
}
|
||||
|
||||
bytes random_blob() {
|
||||
return _blobs[std::min(_blobs.size() - 1, std::max<size_t>(0, _ck_index_dist(_gen)))];
|
||||
}
|
||||
@@ -2236,12 +2241,23 @@ public:
|
||||
};
|
||||
|
||||
size_t row_count = row_count_dist(_gen);
|
||||
for (size_t i = 0; i < row_count; ++i) {
|
||||
auto ckey = make_random_key();
|
||||
|
||||
std::unordered_set<clustering_key, clustering_key::hashing, clustering_key::equality> keys(
|
||||
0, clustering_key::hashing(*_schema), clustering_key::equality(*_schema));
|
||||
while (keys.size() < row_count) {
|
||||
keys.emplace(make_random_key());
|
||||
}
|
||||
|
||||
for (auto&& ckey : keys) {
|
||||
is_continuous continuous = is_continuous(_bool_dist(_gen));
|
||||
if (_not_dummy_dist(_gen)) {
|
||||
deletable_row& row = m.partition().clustered_row(*_schema, ckey, is_dummy::no, continuous);
|
||||
row.apply(random_row_marker());
|
||||
if (!row.marker().is_missing() && !row.marker().is_live()) {
|
||||
// Mutations are not associative if dead marker is not matched with a dead row
|
||||
// due to shadowable tombstone merging rules. See #11307.
|
||||
row.apply(tombstone(row.marker().timestamp(), row.marker().deletion_time()));
|
||||
}
|
||||
if (_bool_dist(_gen)) {
|
||||
set_random_cells(row.cells(), column_kind::regular_column);
|
||||
} else {
|
||||
@@ -2332,6 +2348,10 @@ std::vector<query::clustering_range> random_mutation_generator::make_random_rang
|
||||
return _impl->make_random_ranges(n_ranges);
|
||||
}
|
||||
|
||||
void random_mutation_generator::set_key_cardinality(size_t n_keys) {
|
||||
_impl->set_key_cardinality(n_keys);
|
||||
}
|
||||
|
||||
void for_each_schema_change(std::function<void(schema_ptr, const std::vector<mutation>&,
|
||||
schema_ptr, const std::vector<mutation>&)> fn) {
|
||||
auto map_of_int_to_int = map_type_impl::get_instance(int32_type, int32_type, true);
|
||||
|
||||
@@ -64,6 +64,8 @@ public:
|
||||
range_tombstone make_random_range_tombstone();
|
||||
std::vector<dht::decorated_key> make_partition_keys(size_t n);
|
||||
std::vector<query::clustering_range> make_random_ranges(unsigned n_ranges);
|
||||
// Sets the number of distinct clustering keys which will be used in generated mutations.
|
||||
void set_key_cardinality(size_t);
|
||||
};
|
||||
|
||||
bytes make_blob(size_t blob_size);
|
||||
|
||||
@@ -43,6 +43,9 @@ void run_test(const sstring& name, schema_ptr s, MutationGenerator&& gen) {
|
||||
auto prefill_compacted = logalloc::memory_compacted();
|
||||
auto prefill_allocated = logalloc::memory_allocated();
|
||||
|
||||
scheduling_latency_measurer memtable_slm;
|
||||
memtable_slm.start();
|
||||
|
||||
auto mt = make_lw_shared<replica::memtable>(s);
|
||||
auto fill_d = duration_in_seconds([&] {
|
||||
while (mt->occupancy().total_space() < memtable_size) {
|
||||
@@ -54,7 +57,8 @@ void run_test(const sstring& name, schema_ptr s, MutationGenerator&& gen) {
|
||||
}
|
||||
}
|
||||
});
|
||||
std::cout << format("Memtable fill took {:.6f} [ms]", fill_d.count() * 1000) << std::endl;
|
||||
memtable_slm.stop();
|
||||
std::cout << format("Memtable fill took {:.6f} [ms], {}", fill_d.count() * 1000, memtable_slm) << std::endl;
|
||||
|
||||
std::cout << "Draining..." << std::endl;
|
||||
auto drain_d = duration_in_seconds([&] {
|
||||
@@ -223,6 +227,40 @@ void test_partition_with_lots_of_range_tombstones() {
|
||||
});
|
||||
}
|
||||
|
||||
// This test case stresses handling of overlapping range tombstones
|
||||
void test_partition_with_lots_of_range_tombstones_with_residuals() {
|
||||
auto s = schema_builder("ks", "cf")
|
||||
.with_column("pk", uuid_type, column_kind::partition_key)
|
||||
.with_column("ck", int32_type, column_kind::clustering_key)
|
||||
.with_column("v1", bytes_type, column_kind::regular_column)
|
||||
.with_column("v2", bytes_type, column_kind::regular_column)
|
||||
.with_column("v3", bytes_type, column_kind::regular_column)
|
||||
.build();
|
||||
|
||||
auto pk = dht::decorate_key(*s, partition_key::from_single_value(*s,
|
||||
serialized(utils::UUID_gen::get_time_UUID())));
|
||||
int ck_idx = 0;
|
||||
|
||||
run_test("Large partition, lots of range tombstones with residuals", s, [&] {
|
||||
mutation m(s, pk);
|
||||
auto val = data_value(bytes(bytes::initialized_later(), cell_size));
|
||||
auto ck = clustering_key::from_single_value(*s, serialized(ck_idx++));
|
||||
auto r = query::clustering_range::make({ck}, {ck});
|
||||
tombstone tomb(api::new_timestamp(), gc_clock::now());
|
||||
m.partition().apply_row_tombstone(*s, range_tombstone(bound_view::from_range_start(r), bound_view::top(), tomb));
|
||||
|
||||
// Stress range tombstone overlapping with lots of range tombstones
|
||||
auto stride = 1'000'000;
|
||||
if (ck_idx == stride) {
|
||||
ck = clustering_key::from_single_value(*s, serialized(ck_idx - stride));
|
||||
r = query::clustering_range::make({ck}, {ck});
|
||||
m.partition().apply_row_tombstone(*s, range_tombstone(bound_view::from_range_start(r), bound_view::top(), tomb));
|
||||
}
|
||||
|
||||
return m;
|
||||
});
|
||||
}
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
app_template app;
|
||||
return app.run(argc, argv, [&app] {
|
||||
@@ -236,6 +274,7 @@ int main(int argc, char** argv) {
|
||||
test_partition_with_few_small_rows();
|
||||
test_partition_with_lots_of_small_rows();
|
||||
test_partition_with_lots_of_range_tombstones();
|
||||
test_partition_with_lots_of_range_tombstones_with_residuals();
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
@@ -81,6 +81,11 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
future<> error() {
|
||||
_barrier.abort();
|
||||
return make_ready_future<>();
|
||||
}
|
||||
|
||||
unsigned get_phase() const noexcept { return _phase.load(); }
|
||||
};
|
||||
|
||||
@@ -115,6 +120,16 @@ int main(int argc, char **argv) {
|
||||
}
|
||||
w.stop().get();
|
||||
}
|
||||
|
||||
std::vector<int> count(64);
|
||||
parallel_for_each(count, [] (auto& cnt) -> future<> {
|
||||
std::vector<sharded<worker>> w(32);
|
||||
co_await parallel_for_each(w, [] (auto &sw) -> future<> {
|
||||
co_await sw.start(utils::cross_shard_barrier());
|
||||
co_await sw.invoke_on_all(&worker::error);
|
||||
co_await sw.stop();
|
||||
});
|
||||
}).get();
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
Submodule tools/java updated: ad6764b506...b3959948dd
@@ -727,7 +727,10 @@ future<fragmented_temporary_buffer> cql_server::connection::read_and_decompress_
|
||||
if (ret < 0) {
|
||||
throw std::runtime_error("CQL frame LZ4 uncompression failure");
|
||||
}
|
||||
return out.size();
|
||||
if (ret != out.size()) {
|
||||
throw std::runtime_error("Malformed CQL frame - provided uncompressed size different than real uncompressed size");
|
||||
}
|
||||
return static_cast<size_t>(ret);
|
||||
});
|
||||
on_compression_buffer_use();
|
||||
return uncomp;
|
||||
|
||||
54
utils/amortized_reserve.hh
Normal file
54
utils/amortized_reserve.hh
Normal file
@@ -0,0 +1,54 @@
|
||||
/*
|
||||
* Copyright (C) 2022-present ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <concepts>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
|
||||
/// Represents a container which can preallocate space for future insertions
|
||||
/// which can be used to reduce the number of overall memory re-allocation and item movement.
|
||||
///
|
||||
/// The number of items for which space is currently reserved is returned by capacity().
|
||||
/// This includes items currently present in the container.
|
||||
///
|
||||
/// The number of items currently present is returned by size().
|
||||
///
|
||||
/// Invariant:
|
||||
///
|
||||
/// size() <= capacity()
|
||||
///
|
||||
/// Space is reserved by calling reserve(desired_capacity).
|
||||
/// The post-condition of calling reserve() is:
|
||||
///
|
||||
/// capacity() >= desired_capacity
|
||||
///
|
||||
/// It is guaranteed insertion of (capacity() - size()) items does not
|
||||
/// throw if T::value_type constructor and move constructor do not throw.
|
||||
template <typename T>
|
||||
concept ContainerWithCapacity = requires (T x, size_t desired_capacity, typename T::value_type e) {
|
||||
{ x.reserve(desired_capacity) } -> std::same_as<void>;
|
||||
{ x.capacity() } -> std::same_as<size_t>;
|
||||
{ x.size() } -> std::same_as<size_t>;
|
||||
};
|
||||
|
||||
static_assert(ContainerWithCapacity<std::vector<int>>);
|
||||
|
||||
/// Reserves space for at least desired_capacity - v.size() elements.
|
||||
///
|
||||
/// Amortizes space expansion so that a series of N calls to amortized_reserve(v, v.size() + 1)
|
||||
/// starting from an empty container takes O(N) time overall.
|
||||
///
|
||||
/// Post-condition: v.capacity() >= desired_capacity
|
||||
template <ContainerWithCapacity T>
|
||||
void amortized_reserve(T& v, size_t desired_capacity) {
|
||||
if (desired_capacity > v.capacity()) {
|
||||
v.reserve(std::max(desired_capacity, v.capacity() * 2));
|
||||
}
|
||||
}
|
||||
@@ -123,6 +123,18 @@ namespace bloom_calculations {
|
||||
}
|
||||
return std::min(probs.size() - 1, size_t(v));
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves the minimum supported bloom_filter_fp_chance value
|
||||
* if compute_bloom_spec() above is attempted with bloom_filter_fp_chance
|
||||
* lower than this, it will throw an unsupported_operation_exception.
|
||||
*/
|
||||
inline double min_supported_bloom_filter_fp_chance() {
|
||||
int max_buckets = probs.size() - 1;
|
||||
int max_K = probs[max_buckets].size() - 1;
|
||||
return probs[max_buckets][max_K];
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -126,9 +126,9 @@ private:
|
||||
future<> complete() {
|
||||
_b->counter.fetch_add(smp::count);
|
||||
bool alive = _b->alive.load(std::memory_order_relaxed);
|
||||
return smp::invoke_on_all([this, sid = this_shard_id(), alive] {
|
||||
return smp::invoke_on_all([b = _b, sid = this_shard_id(), alive] {
|
||||
if (this_shard_id() != sid) {
|
||||
std::optional<promise<>>& w = _b->wakeup[this_shard_id()];
|
||||
std::optional<promise<>>& w = b->wakeup[this_shard_id()];
|
||||
if (alive) {
|
||||
assert(w.has_value());
|
||||
w->set_value();
|
||||
|
||||
@@ -52,7 +52,7 @@ public:
|
||||
return _what.c_str();
|
||||
}
|
||||
|
||||
const std::error_code& code() const { return _code; }
|
||||
const std::error_code& code() const noexcept { return _code; }
|
||||
};
|
||||
|
||||
// Rethrow exception if not null
|
||||
|
||||
@@ -1329,6 +1329,12 @@ void reclaim_timer::sample_stats(stats& data) {
|
||||
}
|
||||
|
||||
void reclaim_timer::report() const noexcept {
|
||||
// The logger can allocate (and will recover from allocation failure), and
|
||||
// we're in a memory-sensitive situation here and allocation can easily fail.
|
||||
// Prevent --abort-on-seastar-bad-alloc from crashing us in a situation that
|
||||
// we're likely to recover from, by reclaiming more.
|
||||
auto guard = memory::disable_abort_on_alloc_failure_temporarily();
|
||||
|
||||
auto time_level = _stall_detected ? log_level::warn : log_level::debug;
|
||||
auto info_level = _stall_detected ? log_level::info : log_level::debug;
|
||||
auto MiB = 1024*1024;
|
||||
|
||||
Reference in New Issue
Block a user