Compare commits

...

8 Commits

Author SHA1 Message Date
Yaron Kaikov
a8e372bf94 release: prepare for 4.3.rc1 2020-11-09 23:08:50 +02:00
Amnon Heiman
17e5ac9ab1 scyllatop/livedata.py: Safe iteration over metrics
This patch change the code that iterates over the metrics to use a copy
of the metrics names to make it safe to remove the metrics from the
metrics object.

Fixes #7488

Signed-off-by: Amnon Heiman <amnon@scylladb.com>
(cherry picked from commit 52db99f25f)
2020-11-08 19:16:03 +02:00
Calle Wilund
d1d968c6e9 partition_version: Change range_tombstones() to return chunked_vector
Refs #7364

The number of tombstones can be large. As a stopgap measure to
just returning a source range (with keepalive), we can at least
alleviate the problem by using a chunked vector.

Closes #7433

(cherry picked from commit 4b65d67a1a)
2020-11-08 14:38:18 +02:00
Tomasz Grabiec
e186f66bfe sstables: ka/la: Fix abort when next_partition() is called with certain reader state
Cleanup compaction is using consume_pausable_in_thread() to skip over
disowned partitions, which uses flat_mutation_reader::next_partition().

The implementation of next_partition() for the sstable reader has a
bug which may cause the following assertion failure:

  scylla: sstables/mp_row_consumer.hh:422: row_consumer::proceed sstables::mp_row_consumer_k_l::flush(): Assertion `!_ready' failed.

This happens when the sstable reader's buffer gets full when we reach
the partition end. The last fragment of the partition won't be pushed
into the buffer but will stay in the _ready variable. When
next_partition() is called in this state, _ready will not be cleared
and the fragment will be carried over to the next partition. This will
cause assertion failure when the reader attempts to emit the first
fragment of the next partition.

The fix is to clear _ready when entering a partition, just like we
clear _range_tombstones there.

Fixes #7553.
Message-Id: <1604534702-12777-1-git-send-email-tgrabiec@scylladb.com>

(cherry picked from commit fb9b5cae05)
2020-11-08 14:25:27 +02:00
Piotr Sarna
78a39e8364 schema_tables: fix fixing old secondary index schemas
Old secondary index schemas did not have their idx_token column
marked as computed, and there already exists code which updates
them. Unfortunately, the fix itself contains an error and doesn't
fire if computed columns are not yet supported by the whole cluster,
which is a very common situation during upgrades.

Fixes #7515

Closes #7516

(cherry picked from commit b66c285f94)
2020-11-05 17:52:44 +02:00
Calle Wilund
bbef05ae3c cdc: Add an "end-of-record" column to
Fixes #7435

Adds an "eor" (end-of-record) column to cdc log. This is non-null only on
last-in-timestamp group rows, i.e. end of a singular source "event".

A client can use this as a shortcut to knowing whether or not he has a
full cdc "record" for a given source mutation (single row change).

Closes #7436

(cherry picked from commit 46ea8c9b8b)
2020-10-28 12:47:17 +02:00
Yaron Kaikov
6f324cb732 release: prepare for 4.3.rc0
Closes #7484
2020-10-26 14:43:13 +02:00
Avi Kivity
239499a35a Revert "Revert "config: Do not enable repair based node operations by default""
This reverts commit 71d0d58f8c. We still
do not have confirmation that it works reliably.
2020-10-26 14:21:26 +02:00
13 changed files with 53 additions and 17 deletions

View File

@@ -1,7 +1,7 @@
#!/bin/sh
PRODUCT=scylla
VERSION=666.development
VERSION=4.3.rc1
if test -f version
then

View File

@@ -519,6 +519,7 @@ static schema_ptr create_log_schema(const schema& s, std::optional<utils::UUID>
b.with_column(log_meta_column_name_bytes("batch_seq_no"), int32_type, column_kind::clustering_key);
b.with_column(log_meta_column_name_bytes("operation"), data_type_for<operation_native_type>());
b.with_column(log_meta_column_name_bytes("ttl"), long_type);
b.with_column(log_meta_column_name_bytes("end_of_batch"), boolean_type);
b.set_caching_options(caching_options::get_disabled_caching_options());
auto add_columns = [&] (const schema::const_iterator_range_type& columns, bool is_data_col = false) {
for (const auto& column : columns) {
@@ -880,14 +881,26 @@ public:
return _base_schema;
}
clustering_key create_ck(int batch) const {
return clustering_key::from_exploded(_log_schema, { _tuuid, int32_type->decompose(batch) });
}
// Creates a new clustering row in the mutation, assigning it the next `cdc$batch_seq_no`.
// The numbering of batch sequence numbers starts from 0.
clustering_key allocate_new_log_row() {
auto log_ck = clustering_key::from_exploded(_log_schema, { _tuuid, int32_type->decompose(_batch_no++) });
auto log_ck = create_ck(_batch_no++);
set_key_columns(log_ck, _base_schema.partition_key_columns(), _base_pk);
return log_ck;
}
bool has_rows() const {
return _batch_no != 0;
}
clustering_key last_row_key() const {
return create_ck(_batch_no - 1);
}
// A common pattern is to allocate a row and then immediately set its `cdc$operation` column.
clustering_key allocate_new_log_row(operation op) {
auto log_ck = allocate_new_log_row();
@@ -944,6 +957,11 @@ public:
_log_mut.set_cell(log_ck, log_cdef, atomic_cell::make_live(*log_cdef.type, _ts, deleted_elements, _ttl));
}
void end_record() {
if (has_rows()) {
_log_mut.set_cell(last_row_key(), log_meta_column_name_bytes("end_of_batch"), data_value(true), _ts, _ttl);
}
}
private:
void set_key_columns(const clustering_key& log_ck, schema::const_iterator_range_type columns, const std::vector<bytes>& key) {
size_t pos = 0;
@@ -1519,6 +1537,11 @@ public:
cdc::inspect_mutation(m, v);
}
void end_record() override {
assert(_builder);
_builder->end_record();
}
// Takes and returns generated cdc log mutations and associated statistics about parts touched during transformer's lifetime.
// The `transformer` object on which this method was called on should not be used anymore.
std::tuple<std::vector<mutation>, stats::part_type_set> finish() && {

View File

@@ -684,6 +684,8 @@ void process_changes_with_splitting(const mutation& base_mutation, change_proces
processor.produce_postimage(&ck);
}
}
processor.end_record();
}
}
@@ -731,6 +733,8 @@ void process_changes_without_splitting(const mutation& base_mutation, change_pro
processor.produce_postimage(&cr.key());
}
}
processor.end_record();
}
} // namespace cdc

View File

@@ -77,6 +77,10 @@ public:
// both columns have different timestamp or TTL set.
// m - the small mutation to be converted into CDC log rows.
virtual void process_change(const mutation& m) = 0;
// Tells processor we have reached end of record - last part
// of a given timestamp batch
virtual void end_record() = 0;
};
bool should_split(const mutation& base_mutation);

View File

@@ -694,7 +694,7 @@ db::config::config(std::shared_ptr<db::extensions> exts)
, replace_address(this, "replace_address", value_status::Used, "", "The listen_address or broadcast_address of the dead node to replace. Same as -Dcassandra.replace_address.")
, replace_address_first_boot(this, "replace_address_first_boot", value_status::Used, "", "Like replace_address option, but if the node has been bootstrapped successfully it will be ignored. Same as -Dcassandra.replace_address_first_boot.")
, override_decommission(this, "override_decommission", value_status::Used, false, "Set true to force a decommissioned node to join the cluster")
, enable_repair_based_node_ops(this, "enable_repair_based_node_ops", liveness::LiveUpdate, value_status::Used, true, "Set true to use enable repair based node operations instead of streaming based")
, enable_repair_based_node_ops(this, "enable_repair_based_node_ops", liveness::LiveUpdate, value_status::Used, false, "Set true to use enable repair based node operations instead of streaming based")
, ring_delay_ms(this, "ring_delay_ms", value_status::Used, 30 * 1000, "Time a node waits to hear from other nodes before joining the ring in milliseconds. Same as -Dcassandra.ring_delay_ms in cassandra.")
, shadow_round_ms(this, "shadow_round_ms", value_status::Used, 300 * 1000, "The maximum gossip shadow round time. Can be used to reduce the gossip feature check time during node boot up.")
, fd_max_interval_ms(this, "fd_max_interval_ms", value_status::Used, 2 * 1000, "The maximum failure_detector interval time in milliseconds. Interval larger than the maximum will be ignored. Larger cluster may need to increase the default.")

View File

@@ -3035,10 +3035,6 @@ future<> maybe_update_legacy_secondary_index_mv_schema(service::migration_manage
// format, where "token" is not marked as computed. Once we're sure that all indexes have their
// columns marked as computed (because they were either created on a node that supports computed
// columns or were fixed by this utility function), it's safe to remove this function altogether.
if (!db.features().cluster_supports_computed_columns()) {
return make_ready_future<>();
}
if (v->clustering_key_size() == 0) {
return make_ready_future<>();
}

View File

@@ -5,8 +5,8 @@ MAINTAINER Avi Kivity <avi@cloudius-systems.com>
ENV container docker
# The SCYLLA_REPO_URL argument specifies the URL to the RPM repository this Docker image uses to install Scylla. The default value is the Scylla's unstable RPM repository, which contains the daily build.
ARG SCYLLA_REPO_URL=http://downloads.scylladb.com/rpm/unstable/centos/master/latest/scylla.repo
ARG VERSION=666.development
ARG SCYLLA_REPO_URL=http://downloads.scylladb.com/rpm/unstable/centos/scylla-4.3/latest/scylla.repo
ARG VERSION=4.3.rc0
ADD scylla_bashrc /scylla_bashrc

View File

@@ -542,12 +542,12 @@ partition_snapshot_ptr partition_entry::read(logalloc::region& r,
return partition_snapshot_ptr(std::move(snp));
}
std::vector<range_tombstone>
partition_snapshot::range_tombstone_result
partition_snapshot::range_tombstones(position_in_partition_view start, position_in_partition_view end)
{
partition_version* v = &*version();
if (!v->next()) {
return boost::copy_range<std::vector<range_tombstone>>(
return boost::copy_range<range_tombstone_result>(
v->partition().row_tombstones().slice(*_schema, start, end));
}
range_tombstone_list list(*_schema);
@@ -557,10 +557,10 @@ partition_snapshot::range_tombstones(position_in_partition_view start, position_
}
v = v->next();
}
return boost::copy_range<std::vector<range_tombstone>>(list.slice(*_schema, start, end));
return boost::copy_range<range_tombstone_result>(list.slice(*_schema, start, end));
}
std::vector<range_tombstone>
partition_snapshot::range_tombstone_result
partition_snapshot::range_tombstones()
{
return range_tombstones(

View File

@@ -26,6 +26,7 @@
#include "utils/anchorless_list.hh"
#include "utils/logalloc.hh"
#include "utils/coroutine.hh"
#include "utils/chunked_vector.hh"
#include <boost/intrusive/parent_from_member.hpp>
#include <boost/intrusive/slist.hpp>
@@ -400,10 +401,13 @@ public:
::static_row static_row(bool digest_requested) const;
bool static_row_continuous() const;
mutation_partition squashed() const;
using range_tombstone_result = utils::chunked_vector<range_tombstone>;
// Returns range tombstones overlapping with [start, end)
std::vector<range_tombstone> range_tombstones(position_in_partition_view start, position_in_partition_view end);
range_tombstone_result range_tombstones(position_in_partition_view start, position_in_partition_view end);
// Returns all range tombstones
std::vector<range_tombstone> range_tombstones();
range_tombstone_result range_tombstones();
};
class partition_snapshot_ptr {

View File

@@ -378,6 +378,7 @@ private:
_fwd_end = _fwd ? position_in_partition::before_all_clustered_rows() : position_in_partition::after_all_clustered_rows();
_out_of_range = false;
_range_tombstones.reset();
_ready = {};
_first_row_encountered = false;
}
public:

View File

@@ -326,6 +326,7 @@ SEASTAR_THREAD_TEST_CASE(test_cdc_log_schema) {
// cdc log clustering key
assert_has_column(cdc::log_meta_column_name("operation"), byte_type);
assert_has_column(cdc::log_meta_column_name("ttl"), long_type);
assert_has_column(cdc::log_meta_column_name("end_of_batch"), boolean_type);
// pk
assert_has_column(cdc::log_data_column_name("pk"), int32_type);
@@ -534,6 +535,7 @@ SEASTAR_THREAD_TEST_CASE(test_pre_post_image_logging) {
auto val_index = column_index(*rows, cdc::log_data_column_name("val"));
auto val2_index = column_index(*rows, cdc::log_data_column_name("val2"));
auto ttl_index = column_index(*rows, cdc::log_meta_column_name("ttl"));
auto eor_index = column_index(*rows, cdc::log_meta_column_name("end_of_batch"));
auto val_type = int32_type;
auto val = *first[0][val_index];
@@ -583,10 +585,12 @@ SEASTAR_THREAD_TEST_CASE(test_pre_post_image_logging) {
if (post_enabled) {
val = *post_image.back()[val_index];
val2 = *post_image.back()[val2_index];
auto eor = *post_image.back()[eor_index];
BOOST_REQUIRE_EQUAL(int32_type->decompose(1111), *post_image.back()[ck2_index]);
BOOST_REQUIRE_EQUAL(data_value(nv), val_type->deserialize(bytes_view(val)));
BOOST_REQUIRE_EQUAL(data_value(22222), val_type->deserialize(bytes_view(val2)));
BOOST_REQUIRE_EQUAL(data_value(true), boolean_type->deserialize(bytes_view(eor)));
}
const auto& ttl_cell = second[second.size() - 2][ttl_index];

View File

@@ -49,7 +49,7 @@ static thread_local mutation_application_stats app_stats_for_tests;
// Verifies that tombstones in "list" are monotonic, overlap with the requested range,
// and have information equivalent with "expected" in that range.
static
void check_tombstone_slice(const schema& s, std::vector<range_tombstone> list,
void check_tombstone_slice(const schema& s, const utils::chunked_vector<range_tombstone>& list,
const query::clustering_range& range,
std::initializer_list<range_tombstone> expected)
{

View File

@@ -39,7 +39,7 @@ class LiveData(object):
def _discoverMetrics(self):
results = metric.Metric.discover(self._metric_source)
logging.debug('_discoverMetrics: {} results discovered'.format(len(results)))
for symbol in results:
for symbol in list(results):
if not self._matches(symbol, self._metricPatterns):
results.pop(symbol)
logging.debug('_initializeMetrics: {} results matched'.format(len(results)))