From d8e6d1ed98ae605c0d96a549e5ecd8bcbf34a25a Mon Sep 17 00:00:00 2001 From: Piotr Jastrzebski Date: Mon, 10 Sep 2018 16:16:00 +0200 Subject: [PATCH 01/14] SST3: Make sure we call consume_partition_end even when we slice and fast forward to. Signed-off-by: Piotr Jastrzebski --- sstables/row.hh | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sstables/row.hh b/sstables/row.hh index 4aea0bb876..674450df5b 100644 --- a/sstables/row.hh +++ b/sstables/row.hh @@ -1282,6 +1282,13 @@ public: { } void verify_end_state() { + // If reading a partial row (i.e., when we have a clustering row + // filter and using a promoted index), we may be in FLAGS or FLAGS_2 + // state instead of PARTITION_START. + if (_state == state::FLAGS || _state == state::FLAGS_2) { + _consumer.consume_partition_end(); + return; + } if (_state != state::PARTITION_START || _prestate != prestate::NONE) { throw malformed_sstable_exception("end of input, but not end of partition"); } From 057c26f894fbfb0130b8df8f1464c17f50de0bfe Mon Sep 17 00:00:00 2001 From: Vladimir Krivopalov Date: Wed, 5 Sep 2018 14:02:12 -0700 Subject: [PATCH 02/14] sstables: Support 'mc' version parsing from filename. Signed-off-by: Vladimir Krivopalov --- sstables/sstables.cc | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/sstables/sstables.cc b/sstables/sstables.cc index cd5c4bb451..5f6709b2f2 100644 --- a/sstables/sstables.cc +++ b/sstables/sstables.cc @@ -3759,7 +3759,7 @@ future<> sstable::set_generation(int64_t new_generation) { } entry_descriptor entry_descriptor::make_descriptor(sstring sstdir, sstring fname) { - static std::regex la("la-(\\d+)-(\\w+)-(.*)"); + static std::regex la_mc("(la|mc)-(\\d+)-(\\w+)-(.*)"); static std::regex ka("(\\w+)-(\\w+)-ka-(\\d+)-(.*)"); static std::regex dir(".*/([^/]*)/(\\w+)-[\\da-fA-F]+(?:/upload|/snapshots/[^/]+)?/?"); @@ -3776,7 +3776,7 @@ entry_descriptor entry_descriptor::make_descriptor(sstring sstdir, sstring fname sstlog.debug("Make descriptor sstdir: {}; fname: {}", sstdir, fname); std::string s(fname); - if (std::regex_match(s, match, la)) { + if (std::regex_match(s, match, la_mc)) { std::string sdir(sstdir); std::smatch dirmatch; if (std::regex_match(sdir, dirmatch, dir)) { @@ -3785,10 +3785,10 @@ entry_descriptor entry_descriptor::make_descriptor(sstring sstdir, sstring fname } else { throw malformed_sstable_exception(seastar::sprint("invalid version for file %s with path %s. Path doesn't match known pattern.", fname, sstdir)); } - version = sstable::version_types::la; - generation = match[1].str(); - format = sstring(match[2].str()); - component = sstring(match[3].str()); + version = (match[1].str() == "la") ? sstable::version_types::la : sstable::version_types::mc; + generation = match[2].str(); + format = sstring(match[3].str()); + component = sstring(match[4].str()); } else if (std::regex_match(s, match, ka)) { ks = match[1].str(); cf = match[2].str(); From bdca27ae412a0d89f55b41510cb03bbcc2eaac72 Mon Sep 17 00:00:00 2001 From: Vladimir Krivopalov Date: Thu, 6 Sep 2018 15:32:29 -0700 Subject: [PATCH 03/14] sstables: Always store only min bases in serialization_header. There previously was an inconsistency in treating min values stored in a serialization_header. They are written to or read from a Statistics.db as deltas against fixed bases, but when we parse timeouts from the data file, we need the full bases, not just deltas. This inconsistency causes wrong timestamp values if we write an sstable and then read from it using one and the same sstable object because we turn min values into bases on write and then don't adjust them back because we already have them in memory. Signed-off-by: Vladimir Krivopalov --- sstables/m_format_read_helpers.hh | 6 +++--- sstables/sstables.cc | 12 ++++-------- sstables/sstables.hh | 3 --- sstables/types.hh | 27 +++++++++++++++++---------- 4 files changed, 24 insertions(+), 24 deletions(-) diff --git a/sstables/m_format_read_helpers.hh b/sstables/m_format_read_helpers.hh index 0ecc4ad6fe..70c0c4a4ab 100644 --- a/sstables/m_format_read_helpers.hh +++ b/sstables/m_format_read_helpers.hh @@ -134,7 +134,7 @@ inline api::timestamp_type parse_timestamp(uint64_t value) { inline api::timestamp_type parse_timestamp(const serialization_header& header, uint64_t delta) { - return parse_timestamp(header.min_timestamp.value + delta); + return parse_timestamp(header.get_min_timestamp() + delta); } inline gc_clock::duration parse_ttl(uint32_t value) { @@ -146,7 +146,7 @@ inline gc_clock::duration parse_ttl(uint32_t value) { inline gc_clock::duration parse_ttl(const serialization_header& header, uint32_t delta) { - return parse_ttl(header.min_ttl.value + delta); + return parse_ttl(header.get_min_ttl() + delta); } inline gc_clock::time_point parse_expiry(uint32_t value) { @@ -158,7 +158,7 @@ inline gc_clock::time_point parse_expiry(uint32_t value) { inline gc_clock::time_point parse_expiry(const serialization_header& header, uint32_t delta) { - return parse_expiry(header.min_local_deletion_time.value + delta); + return parse_expiry(header.get_min_local_deletion_time() + delta); } }; // namespace sstables diff --git a/sstables/sstables.cc b/sstables/sstables.cc index 5f6709b2f2..00c256c511 100644 --- a/sstables/sstables.cc +++ b/sstables/sstables.cc @@ -1299,11 +1299,7 @@ double sstable::estimate_droppable_tombstone_ratio(gc_clock::time_point gc_befor } future<> sstable::read_statistics(const io_priority_class& pc) { - return read_simple(_components->statistics, pc).then([this] { - if (_version == version_types::mc) { - adjust_serialization_header(); - } - }); + return read_simple(_components->statistics, pc); } void sstable::write_statistics(const io_priority_class& pc) { @@ -2118,9 +2114,9 @@ static sstring pk_type_to_string(const schema& s) { static serialization_header make_serialization_header(const schema& s, const encoding_stats& enc_stats) { serialization_header header; - header.min_timestamp.value = enc_stats.min_timestamp - encoding_stats::timestamp_epoch; - header.min_local_deletion_time.value = enc_stats.min_local_deletion_time - encoding_stats::deletion_time_epoch; - header.min_ttl.value = enc_stats.min_ttl - encoding_stats::ttl_epoch; + header.min_timestamp_base.value = enc_stats.min_timestamp - encoding_stats::timestamp_epoch; + header.min_local_deletion_time_base.value = enc_stats.min_local_deletion_time - encoding_stats::deletion_time_epoch; + header.min_ttl_base.value = enc_stats.min_ttl - encoding_stats::ttl_epoch; header.pk_type_name = to_bytes_array_vint_size(pk_type_to_string(s)); diff --git a/sstables/sstables.hh b/sstables/sstables.hh index 06051d416b..79395f91e9 100644 --- a/sstables/sstables.hh +++ b/sstables/sstables.hh @@ -592,9 +592,6 @@ private: serialization_header& s = *static_cast(p.get()); return s; } - void adjust_serialization_header() { - get_mutable_serialization_header(*_components).adjust(); - } public: future<> read_toc(); diff --git a/sstables/types.hh b/sstables/types.hh index 730ad084cf..65bc36ea22 100644 --- a/sstables/types.hh +++ b/sstables/types.hh @@ -358,9 +358,9 @@ struct stats_metadata : public metadata_base { using bytes_array_vint_size = disk_string_vint_size; struct serialization_header : public metadata_base { - vint min_timestamp; - vint min_local_deletion_time; - vint min_ttl; + vint min_timestamp_base; + vint min_local_deletion_time_base; + vint min_ttl_base; bytes_array_vint_size pk_type_name; disk_array_vint_size clustering_key_types_names; struct column_desc { @@ -381,9 +381,9 @@ struct serialization_header : public metadata_base { switch (v) { case sstable_version_types::mc: return f( - min_timestamp, - min_local_deletion_time, - min_ttl, + min_timestamp_base, + min_local_deletion_time_base, + min_ttl_base, pk_type_name, clustering_key_types_names, static_columns, @@ -397,10 +397,17 @@ struct serialization_header : public metadata_base { // Should never reach here - compiler will complain if switch above does not cover all sstable versions abort(); } - void adjust() { - min_timestamp.value += encoding_stats::timestamp_epoch; - min_local_deletion_time.value += encoding_stats::deletion_time_epoch; - min_ttl.value += encoding_stats::ttl_epoch; + + uint64_t get_min_timestamp() const { + return min_timestamp_base.value + encoding_stats::timestamp_epoch; + } + + uint32_t get_min_ttl() const { + return min_ttl_base.value + encoding_stats::ttl_epoch; + } + + uint32_t get_min_local_deletion_time() const { + return min_local_deletion_time_base.value + encoding_stats::deletion_time_epoch; } }; From 84341821b1df5ccbcfd110ea5e4a46ce72f51132 Mon Sep 17 00:00:00 2001 From: Vladimir Krivopalov Date: Thu, 6 Sep 2018 15:52:11 -0700 Subject: [PATCH 04/14] sstables: Don't validate timestamps against the max value on parsing. Internally, timestamps are represented as signed integers (int64_t) but stored as unsigned ones. So it is quite possible to store data with timestamp that is represented as a number larger than the max value of int64_t type. One such example is api::min_timestamp() that is used when generating system schema tables ("keyspaces"). When cast to uint64_t, it turns into a large value. Signed-off-by: Vladimir Krivopalov --- sstables/m_format_read_helpers.hh | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/sstables/m_format_read_helpers.hh b/sstables/m_format_read_helpers.hh index 70c0c4a4ab..1b51ee9ec5 100644 --- a/sstables/m_format_read_helpers.hh +++ b/sstables/m_format_read_helpers.hh @@ -125,16 +125,9 @@ inline future<> read_vint(random_access_reader& in, T& value) { } } -inline api::timestamp_type parse_timestamp(uint64_t value) { - if (value > api::max_timestamp) { - throw malformed_sstable_exception("Too big timestamp: " + value); - } - return static_cast(value); -} - inline api::timestamp_type parse_timestamp(const serialization_header& header, uint64_t delta) { - return parse_timestamp(header.get_min_timestamp() + delta); + return static_cast(header.get_min_timestamp() + delta); } inline gc_clock::duration parse_ttl(uint32_t value) { From fc901eb700eca49e4eb092303d54e8e3b7908511 Mon Sep 17 00:00:00 2001 From: Vladimir Krivopalov Date: Thu, 6 Sep 2018 16:15:13 -0700 Subject: [PATCH 05/14] sstables: Fix string formatting for exception messages in m_format_read_helpers. Before this fix, the code was a potential undefined behaviour and crash because it would add a large value to a const char* and try to create a std::string out of it. Signed-off-by: Vladimir Krivopalov --- sstables/m_format_read_helpers.hh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sstables/m_format_read_helpers.hh b/sstables/m_format_read_helpers.hh index 1b51ee9ec5..b9d9588a6d 100644 --- a/sstables/m_format_read_helpers.hh +++ b/sstables/m_format_read_helpers.hh @@ -132,7 +132,7 @@ inline api::timestamp_type parse_timestamp(const serialization_header& header, inline gc_clock::duration parse_ttl(uint32_t value) { if (value > std::numeric_limits::max()) { - throw malformed_sstable_exception("Too big ttl: " + value); + throw malformed_sstable_exception(format("Too big ttl: {}", value)); } return gc_clock::duration(value); } @@ -144,7 +144,7 @@ inline gc_clock::duration parse_ttl(const serialization_header& header, inline gc_clock::time_point parse_expiry(uint32_t value) { if (value > std::numeric_limits::max()) { - throw malformed_sstable_exception("Too big expiry: " + value); + throw malformed_sstable_exception(format("Too big expiry: {}", value)); } return gc_clock::time_point(gc_clock::duration(value)); } From 9166badebe3a36463e45ac773ec8d82d3f291cd2 Mon Sep 17 00:00:00 2001 From: Vladimir Krivopalov Date: Fri, 7 Sep 2018 16:09:35 -0700 Subject: [PATCH 06/14] sstables: Only forward the call to fast_forwarding_to in mp_row_consumer_m if filter exists. It may happen that we hit the end of partition and then get fast_forward_to() called in which case we attempt to call it from an already destroyed object. We need to check the _mf_filter value before doing so. Signed-off-by: Vladimir Krivopalov --- sstables/mp_row_consumer.hh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sstables/mp_row_consumer.hh b/sstables/mp_row_consumer.hh index 5415541d6d..eea012c5d0 100644 --- a/sstables/mp_row_consumer.hh +++ b/sstables/mp_row_consumer.hh @@ -1014,6 +1014,10 @@ public: } std::optional fast_forward_to(position_range r, db::timeout_clock::time_point) { + if (!_mf_filter) { + return {}; + } + return _mf_filter->fast_forward_to(std::move(r)); } From 0f298113c7a178e77946dec3925cb97b4f7e815e Mon Sep 17 00:00:00 2001 From: Vladimir Krivopalov Date: Tue, 11 Sep 2018 10:17:37 -0700 Subject: [PATCH 07/14] bytes: Add helper for turning bytes_view into sstring_view. Signed-off-by: Vladimir Krivopalov --- bytes.hh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/bytes.hh b/bytes.hh index 78552b32c7..d1c05a3b6a 100644 --- a/bytes.hh +++ b/bytes.hh @@ -35,6 +35,10 @@ using bytes_mutable_view = basic_mutable_view; using bytes_opt = std::experimental::optional; using sstring_view = std::experimental::string_view; +inline sstring_view to_sstring_view(bytes_view view) { + return {reinterpret_cast(view.data()), view.size()}; +} + namespace std { template <> From 0048f4814e05a4e884ba54c0a08bfd16a80c5878 Mon Sep 17 00:00:00 2001 From: Vladimir Krivopalov Date: Fri, 7 Sep 2018 16:11:34 -0700 Subject: [PATCH 08/14] sstables: Use parser_type instead of abstract_type::parse_type in column_translation. abstract_type::parse_type() only deals with simple types and fails to parse wrapped types such as org.apache.cassandra.db.marshal.FrozenType(org.apache.cassandra.db.marshal.ListType(org.apache.cassandra.db.marshal.UTF8Type)) Signed-off-by: Vladimir Krivopalov --- sstables/column_translation.hh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sstables/column_translation.hh b/sstables/column_translation.hh index e19ac72520..e0945d3a66 100644 --- a/sstables/column_translation.hh +++ b/sstables/column_translation.hh @@ -28,6 +28,7 @@ #include "schema.hh" #include "sstables/types.hh" #include "utils/UUID.hh" +#include "db/marshal/type_parser.hh" namespace sstables { @@ -40,7 +41,7 @@ inline column_values_fixed_lengths get_clustering_values_fixed_lengths(const ser column_values_fixed_lengths lengths; lengths.reserve(header.clustering_key_types_names.elements.size()); for (auto&& t : header.clustering_key_types_names.elements) { - auto type = abstract_type::parse_type(sstring(std::cbegin(t.value), std::cend(t.value))); + auto type = db::marshal::type_parser::parse(to_sstring_view(t.value)); lengths.push_back(type->value_length_if_fixed()); } From be3613bdb64d7ca95a201370db57817e6511f320 Mon Sep 17 00:00:00 2001 From: Vladimir Krivopalov Date: Fri, 7 Sep 2018 21:06:03 -0700 Subject: [PATCH 09/14] sstables: Explicitly cast api::timestamp_type to uint64_t when delta-encoding. This avoids noisy warnings like "signed value overflow" when ASAN is turned on. Signed-off-by: Vladimir Krivopalov --- sstables/m_format_write_helpers.cc | 2 +- sstables/sstables.cc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sstables/m_format_write_helpers.cc b/sstables/m_format_write_helpers.cc index 4be0dd9b6b..ea4b2d3de7 100644 --- a/sstables/m_format_write_helpers.cc +++ b/sstables/m_format_write_helpers.cc @@ -298,7 +298,7 @@ void write_missing_columns(file_writer& out, const schema& s, const row& row) { template void write_unsigned_delta_vint(file_writer& out, T value, T base) { using unsigned_type = std::make_unsigned_t; - unsigned_type delta = value - base; + unsigned_type delta = static_cast(value) - base; write_vint(out, delta); } diff --git a/sstables/sstables.cc b/sstables/sstables.cc index 00c256c511..28e8d89f69 100644 --- a/sstables/sstables.cc +++ b/sstables/sstables.cc @@ -2114,7 +2114,7 @@ static sstring pk_type_to_string(const schema& s) { static serialization_header make_serialization_header(const schema& s, const encoding_stats& enc_stats) { serialization_header header; - header.min_timestamp_base.value = enc_stats.min_timestamp - encoding_stats::timestamp_epoch; + header.min_timestamp_base.value = static_cast(enc_stats.min_timestamp) - encoding_stats::timestamp_epoch; header.min_local_deletion_time_base.value = enc_stats.min_local_deletion_time - encoding_stats::deletion_time_epoch; header.min_ttl_base.value = enc_stats.min_ttl - encoding_stats::ttl_epoch; From 24590fe88cb28f1f77a56bbfe380d5c9d217d2ae Mon Sep 17 00:00:00 2001 From: Vladimir Krivopalov Date: Mon, 10 Sep 2018 10:40:41 -0700 Subject: [PATCH 10/14] sstables: Fix parsing of complex columns with zero subcolumns. Before this fix, a complex column with zero subcolumns would be incorrecty parsed as it would re-read the deletion time twice. Now, this case is handled properly. Signed-off-by: Vladimir Krivopalov --- sstables/row.hh | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sstables/row.hh b/sstables/row.hh index 674450df5b..5fa3268176 100644 --- a/sstables/row.hh +++ b/sstables/row.hh @@ -1173,6 +1173,14 @@ public: } case state::COMPLEX_COLUMN_SIZE_2: _subcolumns_to_read = _u64; + if (_subcolumns_to_read == 0) { + auto id = get_column_id(); + move_to_next_column(); + if (_consumer.consume_complex_column_end(std::move(id)) != consumer_m::proceed::yes) { + _state = state::COLUMN; + return consumer_m::proceed::no; + } + } goto column_label; case state::RANGE_TOMBSTONE_MARKER: range_tombstone_marker_label: From 0edd3c57a99b30d7274de65c941854580728c335 Mon Sep 17 00:00:00 2001 From: Vladimir Krivopalov Date: Mon, 10 Sep 2018 12:26:23 -0700 Subject: [PATCH 11/14] tests: Add test for reading a complex column with zero subcolumns (SST3). The files are generated by Scylla as a compaction_history table. Signed-off-by: Vladimir Krivopalov --- tests/sstable_3_x_test.cc | 50 ++++++++++++++++++ .../mc-1-big-Data.db | Bin 0 -> 610 bytes .../mc-1-big-Index.db | Bin 0 -> 145 bytes .../mc-1-big-Statistics.db | Bin 0 -> 4982 bytes .../mc-1-big-Summary.db | Bin 0 -> 92 bytes .../mc-1-big-TOC.txt | 5 ++ 6 files changed, 55 insertions(+) create mode 100644 tests/sstables/3.x/uncompressed/complex_column_zero_subcolumns/mc-1-big-Data.db create mode 100644 tests/sstables/3.x/uncompressed/complex_column_zero_subcolumns/mc-1-big-Index.db create mode 100644 tests/sstables/3.x/uncompressed/complex_column_zero_subcolumns/mc-1-big-Statistics.db create mode 100644 tests/sstables/3.x/uncompressed/complex_column_zero_subcolumns/mc-1-big-Summary.db create mode 100644 tests/sstables/3.x/uncompressed/complex_column_zero_subcolumns/mc-1-big-TOC.txt diff --git a/tests/sstable_3_x_test.cc b/tests/sstable_3_x_test.cc index e4032c5571..59fc952bf2 100644 --- a/tests/sstable_3_x_test.cc +++ b/tests/sstable_3_x_test.cc @@ -46,6 +46,7 @@ #include "types.hh" #include "partition_slice_builder.hh" #include "schema.hh" +#include "utils/UUID_gen.hh" using namespace sstables; @@ -4408,3 +4409,52 @@ SEASTAR_THREAD_TEST_CASE(test_read_table_empty_clustering_key) { assert_that(sst.read_rows_flat()).produces(keys); } +/* + * Test for a bug discovered with Scylla's compaction_history tables + * containing complex columns with zero subcolumns. + */ +SEASTAR_THREAD_TEST_CASE(test_complex_column_zero_subcolumns_read) { + using utils::UUID; + const sstring path = + "tests/sstables/3.x/uncompressed/complex_column_zero_subcolumns"; + + schema_ptr s = schema_builder("test_ks", "test_table") + .with_column("id", uuid_type, column_kind::partition_key) + .with_column("bytes_in", long_type) + .with_column("bytes_out", long_type) + .with_column("columnfamily_name", utf8_type) + .with_column("compacted_at", timestamp_type) + .with_column("keyspace_name", utf8_type) + .with_column("rows_merged", map_type_impl::get_instance(int32_type, long_type, true)) + .build(); + + sstable_assertions sst(s, path); + sst.load(); + + auto to_pkey = [&s] (const UUID& key) { + auto bytes = uuid_type->decompose(key); + auto pk = partition_key::from_single_value(*s, bytes); + return dht::global_partitioner().decorate_key(*s, pk); + }; + + std::vector keys { + UUID{"09fea990-b320-11e8-83a7-000000000000"}, + UUID{"0a310430-b320-11e8-83a7-000000000000"}, + UUID{"0a214cc0-b320-11e8-83a7-000000000000"}, + UUID{"0a00a560-b320-11e8-83a7-000000000000"}, + UUID{"0a0a6960-b320-11e8-83a7-000000000000"}, + UUID{"0a147b80-b320-11e8-83a7-000000000000"}, + UUID{"0a187320-b320-11e8-83a7-000000000000"}, + }; + + auto rd = sst.read_rows_flat(); + rd.set_max_buffer_size(1); + auto r = assert_that(std::move(rd)); + for (const auto& key : keys) { + r.produces_partition_start(to_pkey(key)) + .produces_row_with_key(clustering_key::make_empty()) + .produces_partition_end(); + } + r.produces_end_of_stream(); +} + diff --git a/tests/sstables/3.x/uncompressed/complex_column_zero_subcolumns/mc-1-big-Data.db b/tests/sstables/3.x/uncompressed/complex_column_zero_subcolumns/mc-1-big-Data.db new file mode 100644 index 0000000000000000000000000000000000000000..f5d2ff7fd1cd46a8a87a66adac2677e77e441333 GIT binary patch literal 610 zcmZP&;QY68!e#}*7tPBVz@YyB|NjjTCPR*~93#U?s|G0uU-J``A$&xNwIDUMs2Ip+ zOx<2|Ync>Vab7Y8Dlo2m^p*K!w{xm_@G4;KeO%w<@bfYV%4xwXnj#>Rbv8ta*i zYV57eFk{y$K}_Y!#BHj%+(FG|Bx5yop~gy|Kr{9$%-DmPO%QWMsvB^+)?Dt8raqFn xu2Z4r>b*rZ_qHj_+(Vjr5OXDp74VtcpM+$t#$KqoViu@r;dUV0-2OzMxd5lRol*b* literal 0 HcmV?d00001 diff --git a/tests/sstables/3.x/uncompressed/complex_column_zero_subcolumns/mc-1-big-Index.db b/tests/sstables/3.x/uncompressed/complex_column_zero_subcolumns/mc-1-big-Index.db new file mode 100644 index 0000000000000000000000000000000000000000..f19b85715ec25b195d7e7b3dc225b32fa59fe426 GIT binary patch literal 145 zcmZP&;QY68!e#}*7tPBVU_gM&ki`I5EQEnUfJ@Qm0J2!a8jv8v(gb9|Mi!7DS0<`p X6i85{x&cMyCXk>+u>!JS6Bh#j%61`Y literal 0 HcmV?d00001 diff --git a/tests/sstables/3.x/uncompressed/complex_column_zero_subcolumns/mc-1-big-Statistics.db b/tests/sstables/3.x/uncompressed/complex_column_zero_subcolumns/mc-1-big-Statistics.db new file mode 100644 index 0000000000000000000000000000000000000000..4389e4871af415a3dde8f8633683efdd508f24f6 GIT binary patch literal 4982 zcmeI$e{2&~902gUYrD1GR@ZL8sA18u2@EWRItB!ogTht=VQwu9rzyAg+=^?vT(7c@ z85f-YOfx5ngJpnXvXG6iC<@zL;19GDL4n0EH9}@m2AR>K5gbBR@1QN;?~|nZhY5f5 zCAWK@e&4(IP-Yq9_?fX%C8LMm+rDA*EdLpv7YgHQ&S67}$E&UdtKmtk=uB z9XxAr)Cz_TKHlZyjb$t^I0dK2&GAJojX#wf$=o4bK}=cUaCk~=A)S#f6=_P$N$L;} zDM@*lNK2i9N$f02(p6znK#2Jy!pVe{gw=#o32O*z31snz6WGiQ!aK2L#PwxeI4-whD~#AlE3uP_ z@hWfQxOx~nHH7`h>)5*Yv8O+WtslXDY$5g&2e1v5*oFJBR}W#AO~c+g6WjYNb}NhB zO|0Kx6OMQl&K74jRUVot?3$yXAVq9&bq8eo;6yIJo_BJ-}#gHejD3QqVg-pt{^{C zjQh7mA4KtOzvz&gH})a#y|om%|8qP)m-^9s((3dZusmH<@+R`DIfszXoLU6U%sGy} zUzztb-@>>|-+nSOii>0 z=U58CIhOpe?75`ANNLl(Q|xbI&lQ^rDU;Z9q3NI!doDB|i9J_h&jl95!?x!#4qbCl z%#r}5@LBeB&u_YW?lp8)y;Ig%)%Av+QY~+mj+578gK&sBy7=PNoIrEzvvslD12WNI z^o1BJa>uHgIMds7W!JIJ)61LZKKQ4YYJ-dAy|rwe!D=;|EdF}&f1V^|wO`=8Hm5so zrS%?njZ{f>uZG7b{Oc-OyQj|Qa=*yBoOOPin{{z Date: Mon, 10 Sep 2018 15:11:32 -0700 Subject: [PATCH 12/14] db: Add configuration option for enabling SSTables 'mc' format. This flag will only be used for testing purposes until Scylla 3.o release and will be removed once SSTables 'mc' testing is completed. Signed-off-by: Vladimir Krivopalov --- db/config.hh | 1 + 1 file changed, 1 insertion(+) diff --git a/db/config.hh b/db/config.hh index 698ddd88b1..197a519c25 100644 --- a/db/config.hh +++ b/db/config.hh @@ -739,6 +739,7 @@ public: " Performance is affected to some extent as a result. Useful to help debugging problems that may arise at another layers.") \ val(cpu_scheduler, bool, true, Used, "Enable cpu scheduling") \ val(view_building, bool, true, Used, "Enable view building; should only be set to false when the node is experience issues due to view building") \ + val(enable_sstables_mc_format, bool, false, Used, "Enable SSTables 'mc' format to be used as the default file format; FOR TESTING PURPOSES ONLY - TO BE REMOVED BEFORE RELEASE") \ /* done! */ #define _make_value_member(name, type, deflt, status, desc, ...) \ From c98937e04c2fa4349d88af16e5719c9a16741431 Mon Sep 17 00:00:00 2001 From: Vladimir Krivopalov Date: Wed, 5 Sep 2018 11:03:34 -0700 Subject: [PATCH 13/14] sstables: Support SSTables 'mc' format as a feature. Signed-off-by: Vladimir Krivopalov --- service/storage_service.cc | 8 +++++++- service/storage_service.hh | 6 ++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/service/storage_service.cc b/service/storage_service.cc index 74e52d1e43..b4576169f0 100644 --- a/service/storage_service.cc +++ b/service/storage_service.cc @@ -100,6 +100,7 @@ static const sstring XXHASH_FEATURE = "XXHASH"; static const sstring ROLES_FEATURE = "ROLES"; static const sstring LA_SSTABLE_FEATURE = "LA_SSTABLE_FORMAT"; static const sstring STREAM_WITH_RPC_STREAM = "STREAM_WITH_RPC_STREAM"; +static const sstring MC_SSTABLE_FEATURE = "MC_SSTABLE_FORMAT"; distributed _the_storage_service; @@ -209,7 +210,11 @@ sstring storage_service::get_config_supported_features() { LA_SSTABLE_FEATURE, STREAM_WITH_RPC_STREAM, }; - if (service::get_local_storage_service()._db.local().get_config().experimental()) { + auto& config = service::get_local_storage_service()._db.local().get_config(); + if (config.enable_sstables_mc_format()) { + features.push_back(MC_SSTABLE_FEATURE); + } + if (config.experimental()) { features.push_back(MATERIALIZED_VIEWS_FEATURE); features.push_back(INDEXES_FEATURE); } @@ -426,6 +431,7 @@ void storage_service::register_features() { _roles_feature = gms::feature(ROLES_FEATURE); _la_sstable_feature = gms::feature(LA_SSTABLE_FEATURE); _stream_with_rpc_stream_feature = gms::feature(STREAM_WITH_RPC_STREAM); + _mc_sstable_feature = gms::feature(MC_SSTABLE_FEATURE); if (_db.local().get_config().experimental()) { _materialized_views_feature = gms::feature(MATERIALIZED_VIEWS_FEATURE); diff --git a/service/storage_service.hh b/service/storage_service.hh index e5c23b0507..278095245e 100644 --- a/service/storage_service.hh +++ b/service/storage_service.hh @@ -289,6 +289,7 @@ private: gms::feature _roles_feature; gms::feature _la_sstable_feature; gms::feature _stream_with_rpc_stream_feature; + gms::feature _mc_sstable_feature; public: void enable_all_features() { _range_tombstones_feature.enable(); @@ -305,6 +306,7 @@ public: _roles_feature.enable(); _la_sstable_feature.enable(); _stream_with_rpc_stream_feature.enable(); + _mc_sstable_feature.enable(); } void finish_bootstrapping() { @@ -2287,6 +2289,10 @@ public: bool cluster_supports_stream_with_rpc_stream() const { return bool(_stream_with_rpc_stream_feature); } + + bool cluster_supports_mc_sstable() const { + return bool(_mc_sstable_feature); + } }; inline future<> init_storage_service(distributed& db, sharded& auth_service, sharded& sys_dist_ks) { From cd80d6ff656d51f563cb6524386a65ff67b36619 Mon Sep 17 00:00:00 2001 From: Vladimir Krivopalov Date: Mon, 10 Sep 2018 15:38:38 -0700 Subject: [PATCH 14/14] database: Honour enable_sstables_mc_format configuration option. Only enable SSTables 'mc' format if the entire cluster supports it and it is enabled in the configuration file. Signed-off-by: Vladimir Krivopalov --- database.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/database.cc b/database.cc index dca99ebfbf..d2566e60ff 100644 --- a/database.cc +++ b/database.cc @@ -88,7 +88,9 @@ logging::logger dblog("database"); namespace { sstables::sstable::version_types get_highest_supported_format() { - if (service::get_local_storage_service().cluster_supports_la_sstable()) { + if (service::get_local_storage_service().cluster_supports_mc_sstable()) { + return sstables::sstable::version_types::mc; + } else if (service::get_local_storage_service().cluster_supports_la_sstable()) { return sstables::sstable::version_types::la; } else { return sstables::sstable::version_types::ka;