From d6a003dc59225a0c3bc47ec6d94124e2aa13f010 Mon Sep 17 00:00:00 2001 From: Tomasz Grabiec Date: Wed, 6 May 2015 17:21:25 +0200 Subject: [PATCH 1/5] gc_clock: Store max_ttl as duration rather than time_point Extra wrapping in time_point has no point. --- cql3/attributes.hh | 4 ++-- gc_clock.hh | 3 +-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/cql3/attributes.hh b/cql3/attributes.hh index fbcb3211ce..634dbfdde0 100644 --- a/cql3/attributes.hh +++ b/cql3/attributes.hh @@ -104,9 +104,9 @@ public: throw exceptions::invalid_request_exception("A TTL must be greater or equal to 0"); } - if (ttl > max_ttl.time_since_epoch().count()) { + if (ttl > max_ttl.count()) { throw exceptions::invalid_request_exception("ttl is too large. requested (" + std::to_string(ttl) + - ") maximum (" + std::to_string(max_ttl.time_since_epoch().count()) + ")"); + ") maximum (" + std::to_string(max_ttl.count()) + ")"); } return ttl; diff --git a/gc_clock.hh b/gc_clock.hh index 2817216977..7c1882763f 100644 --- a/gc_clock.hh +++ b/gc_clock.hh @@ -25,5 +25,4 @@ public: using ttl_opt = std::experimental::optional; // 20 years in seconds -static constexpr gc_clock::time_point max_ttl = gc_clock::time_point{ - gc_clock::duration{20 * 365 * 24 * 60 * 60}}; +static constexpr gc_clock::duration max_ttl = gc_clock::duration{20 * 365 * 24 * 60 * 60}; From 5ba1486ae75b1cc6cf691fa9e3c5e7276ef7adde Mon Sep 17 00:00:00 2001 From: Tomasz Grabiec Date: Wed, 6 May 2015 17:25:31 +0200 Subject: [PATCH 2/5] db: Rename "ttl" to "expiry" when it's used as time point To avoid confusion with "ttl" the duration. --- atomic_cell.hh | 56 ++++++++++++++++----------------- cql3/selection/selection.cc | 12 +++---- cql3/update_parameters.hh | 2 +- database.cc | 12 +++---- gc_clock.hh | 2 +- mutation.cc | 8 ++--- mutation.hh | 4 +-- query-request.hh | 4 +-- query-result-reader.hh | 22 ++++++------- query-result-writer.hh | 6 ++-- query-result.hh | 6 ++-- sstables/partition.cc | 4 +-- tests/perf/perf_mutation.cc | 2 +- tests/urchin/mutation_test.cc | 2 +- tests/urchin/serializer_test.cc | 2 +- tests/urchin/sstable_test.cc | 2 +- thrift/handler.cc | 4 +-- 17 files changed, 75 insertions(+), 75 deletions(-) diff --git a/atomic_cell.hh b/atomic_cell.hh index 99a082ea93..27ce65ca4e 100644 --- a/atomic_cell.hh +++ b/atomic_cell.hh @@ -31,25 +31,25 @@ class atomic_cell_or_collection; * * Layout: * - * := ? - * := + * := ? + * := */ class atomic_cell_type final { private: static constexpr int8_t DEAD_FLAGS = 0; static constexpr int8_t LIVE_FLAG = 0x01; - static constexpr int8_t TTL_FLAG = 0x02; // When present, TTL field is present. Set only for live cells + static constexpr int8_t EXPIRY_FLAG = 0x02; // When present, expiry field is present. Set only for live cells static constexpr unsigned flags_size = 1; static constexpr unsigned timestamp_offset = flags_size; static constexpr unsigned timestamp_size = 8; - static constexpr unsigned ttl_offset = timestamp_offset + timestamp_size; - static constexpr unsigned ttl_size = 4; + static constexpr unsigned expiry_offset = timestamp_offset + timestamp_size; + static constexpr unsigned expiry_size = 4; private: static bool is_live(const bytes_view& cell) { return cell[0] != DEAD_FLAGS; } static bool is_live_and_has_ttl(const bytes_view& cell) { - return cell[0] & TTL_FLAG; + return cell[0] & EXPIRY_FLAG; } static bool is_dead(const bytes_view& cell) { return cell[0] == DEAD_FLAGS; @@ -60,34 +60,34 @@ private: } // Can be called on live cells only static bytes_view value(bytes_view cell) { - auto ttl_field_size = bool(cell[0] & TTL_FLAG) * ttl_size; - auto value_offset = flags_size + timestamp_size + ttl_field_size; + auto expiry_field_size = bool(cell[0] & EXPIRY_FLAG) * expiry_size; + auto value_offset = flags_size + timestamp_size + expiry_field_size; cell.remove_prefix(value_offset); return cell; } // Can be called on live and dead cells. For dead cells, the result is never empty. - static ttl_opt ttl(const bytes_view& cell) { + static expiry_opt expiry(const bytes_view& cell) { auto flags = cell[0]; - if (flags == DEAD_FLAGS || (flags & TTL_FLAG)) { - auto ttl = get_field(cell, ttl_offset); - return {gc_clock::time_point(gc_clock::duration(ttl))}; + if (flags == DEAD_FLAGS || (flags & EXPIRY_FLAG)) { + auto expiry = get_field(cell, expiry_offset); + return {gc_clock::time_point(gc_clock::duration(expiry))}; } return {}; } - static bytes make_dead(api::timestamp_type timestamp, gc_clock::time_point ttl) { - bytes b(bytes::initialized_later(), flags_size + timestamp_size + ttl_size); + static bytes make_dead(api::timestamp_type timestamp, gc_clock::time_point expiry) { + bytes b(bytes::initialized_later(), flags_size + timestamp_size + expiry_size); b[0] = DEAD_FLAGS; set_field(b, timestamp_offset, timestamp); - set_field(b, ttl_offset, ttl.time_since_epoch().count()); + set_field(b, expiry_offset, expiry.time_since_epoch().count()); return b; } - static bytes make_live(api::timestamp_type timestamp, ttl_opt ttl, bytes_view value) { - auto value_offset = flags_size + timestamp_size + bool(ttl) * ttl_size; + static bytes make_live(api::timestamp_type timestamp, expiry_opt expiry, bytes_view value) { + auto value_offset = flags_size + timestamp_size + bool(expiry) * expiry_size; bytes b(bytes::initialized_later(), value_offset + value.size()); - b[0] = (ttl ? TTL_FLAG : 0) | LIVE_FLAG; + b[0] = (expiry ? EXPIRY_FLAG : 0) | LIVE_FLAG; set_field(b, timestamp_offset, timestamp); - if (ttl) { - set_field(b, ttl_offset, ttl->time_since_epoch().count()); + if (expiry) { + set_field(b, expiry_offset, expiry->time_since_epoch().count()); } std::copy_n(value.begin(), value.size(), b.begin() + value_offset); return b; @@ -123,8 +123,8 @@ public: return atomic_cell_type::value(_data); } // Can be called on live and dead cells. For dead cells, the result is never empty. - ttl_opt ttl() const { - return atomic_cell_type::ttl(_data); + expiry_opt expiry() const { + return atomic_cell_type::expiry(_data); } bytes_view serialize() const { return _data; @@ -164,8 +164,8 @@ public: return atomic_cell_type::value(_data); } // Can be called on live and dead cells. For dead cells, the result is never empty. - ttl_opt ttl() const { - return atomic_cell_type::ttl(_data); + expiry_opt expiry() const { + return atomic_cell_type::expiry(_data); } bytes_view serialize() const { return _data; @@ -173,11 +173,11 @@ public: operator atomic_cell_view() const { return atomic_cell_view(_data); } - static atomic_cell make_dead(api::timestamp_type timestamp, gc_clock::time_point ttl) { - return atomic_cell_type::make_dead(timestamp, ttl); + static atomic_cell make_dead(api::timestamp_type timestamp, gc_clock::time_point expiry) { + return atomic_cell_type::make_dead(timestamp, expiry); } - static atomic_cell make_live(api::timestamp_type timestamp, ttl_opt ttl, bytes_view value) { - return atomic_cell_type::make_live(timestamp, ttl, value); + static atomic_cell make_live(api::timestamp_type timestamp, expiry_opt expiry, bytes_view value) { + return atomic_cell_type::make_live(timestamp, expiry, value); } friend class atomic_cell_or_collection; friend std::ostream& operator<<(std::ostream& os, const atomic_cell& ac); diff --git a/cql3/selection/selection.cc b/cql3/selection/selection.cc index d9a816688b..2799942a89 100644 --- a/cql3/selection/selection.cc +++ b/cql3/selection/selection.cc @@ -46,7 +46,7 @@ selection::selection(schema_ptr schema, query::partition_slice::option_set selection::get_query_options() { query::partition_slice::option_set opts; - opts.set_if(_collect_timestamps || _collect_TTLs); + opts.set_if(_collect_timestamps || _collect_TTLs); opts.set_if( std::any_of(_columns.begin(), _columns.end(), @@ -261,12 +261,12 @@ void result_set_builder::add(const column_definition& def, const query::result_a _timestamps[current->size() - 1] = c.timestamp(); } if (!_ttls.empty()) { - gc_clock::duration ttl(-1); - auto maybe_ttl = c.ttl(); - if (maybe_ttl) { - ttl = *maybe_ttl - to_gc_clock(_now); + gc_clock::duration ttl_left(-1); + expiry_opt e = c.expiry(); + if (e) { + ttl_left = *e - to_gc_clock(_now); } - _ttls[current->size() - 1] = ttl.count(); + _ttls[current->size() - 1] = ttl_left.count(); } } diff --git a/cql3/update_parameters.hh b/cql3/update_parameters.hh index daf6cf2531..7ed0f81840 100644 --- a/cql3/update_parameters.hh +++ b/cql3/update_parameters.hh @@ -117,7 +117,7 @@ public: } return atomic_cell::make_live(_timestamp, - ttl.count() > 0 ? ttl_opt{_local_deletion_time + ttl} : ttl_opt{}, value); + ttl.count() > 0 ? expiry_opt{_local_deletion_time + ttl} : expiry_opt{}, value); }; #if 0 diff --git a/database.cc b/database.cc index 248708b6d8..40f49fba1e 100644 --- a/database.cc +++ b/database.cc @@ -549,10 +549,10 @@ compare_atomic_cell_for_merge(atomic_cell_view left, atomic_cell_view right) { if (left.is_live()) { return compare_unsigned(left.value(), right.value()); } else { - if (*left.ttl() != *right.ttl()) { - // Origin compares big-endian serialized TTL - return (uint32_t)left.ttl()->time_since_epoch().count() - < (uint32_t)right.ttl()->time_since_epoch().count() ? -1 : 1; + if (*left.expiry() != *right.expiry()) { + // Origin compares big-endian serialized expiry time + return (uint32_t)left.expiry()->time_since_epoch().count() + < (uint32_t)right.expiry()->time_since_epoch().count() ? -1 : 1; } return 0; } @@ -727,10 +727,10 @@ operator<<(std::ostream& os, const exploded_clustering_prefix& ecp) { std::ostream& operator<<(std::ostream& os, const atomic_cell_view& acv) { - return fprint(os, "atomic_cell{%s;ts=%d;ttl=%d}", + return fprint(os, "atomic_cell{%s;ts=%d;expiry=%d}", (acv.is_live() ? to_hex(acv.value()) : sstring("DEAD")), acv.timestamp(), - acv.is_live_and_has_ttl() ? acv.ttl()->time_since_epoch().count() : -1); + acv.is_live_and_has_ttl() ? acv.expiry()->time_since_epoch().count() : -1); } std::ostream& diff --git a/gc_clock.hh b/gc_clock.hh index 7c1882763f..c493930d9e 100644 --- a/gc_clock.hh +++ b/gc_clock.hh @@ -22,7 +22,7 @@ public: }; -using ttl_opt = std::experimental::optional; +using expiry_opt = std::experimental::optional; // 20 years in seconds static constexpr gc_clock::duration max_ttl = gc_clock::duration{20 * 365 * 24 * 60 * 60}; diff --git a/mutation.cc b/mutation.cc index d7032de8c6..6b80f13f82 100644 --- a/mutation.cc +++ b/mutation.cc @@ -26,12 +26,12 @@ void mutation::set_clustered_cell(const exploded_clustering_prefix& prefix, cons } void mutation::set_clustered_cell(const clustering_key& key, const bytes& name, const boost::any& value, - api::timestamp_type timestamp, ttl_opt ttl) { + api::timestamp_type timestamp, expiry_opt expiry) { auto column_def = _schema->get_column_definition(name); if (!column_def) { throw std::runtime_error(sprint("no column definition found for '%s'", name)); } - return set_clustered_cell(key, *column_def, atomic_cell::make_live(timestamp, ttl, column_def->type->decompose(value))); + return set_clustered_cell(key, *column_def, atomic_cell::make_live(timestamp, expiry, column_def->type->decompose(value))); } void mutation::set_clustered_cell(const clustering_key& key, const column_definition& def, atomic_cell_or_collection value) { @@ -40,12 +40,12 @@ void mutation::set_clustered_cell(const clustering_key& key, const column_defini } void mutation::set_cell(const exploded_clustering_prefix& prefix, const bytes& name, const boost::any& value, - api::timestamp_type timestamp, ttl_opt ttl) { + api::timestamp_type timestamp, expiry_opt expiry) { auto column_def = _schema->get_column_definition(name); if (!column_def) { throw std::runtime_error(sprint("no column definition found for '%s'", name)); } - return set_cell(prefix, *column_def, atomic_cell::make_live(timestamp, ttl, column_def->type->decompose(value))); + return set_cell(prefix, *column_def, atomic_cell::make_live(timestamp, expiry, column_def->type->decompose(value))); } void mutation::set_cell(const exploded_clustering_prefix& prefix, const column_definition& def, atomic_cell_or_collection value) { diff --git a/mutation.hh b/mutation.hh index 5ca985151e..73f7164b20 100644 --- a/mutation.hh +++ b/mutation.hh @@ -23,9 +23,9 @@ public: mutation(const mutation&) = default; void set_static_cell(const column_definition& def, atomic_cell_or_collection value); void set_clustered_cell(const exploded_clustering_prefix& prefix, const column_definition& def, atomic_cell_or_collection value); - void set_clustered_cell(const clustering_key& key, const bytes& name, const boost::any& value, api::timestamp_type timestamp, ttl_opt ttl = {}); + void set_clustered_cell(const clustering_key& key, const bytes& name, const boost::any& value, api::timestamp_type timestamp, expiry_opt expiry = {}); void set_clustered_cell(const clustering_key& key, const column_definition& def, atomic_cell_or_collection value); - void set_cell(const exploded_clustering_prefix& prefix, const bytes& name, const boost::any& value, api::timestamp_type timestamp, ttl_opt ttl = {}); + void set_cell(const exploded_clustering_prefix& prefix, const bytes& name, const boost::any& value, api::timestamp_type timestamp, expiry_opt expiry = {}); void set_cell(const exploded_clustering_prefix& prefix, const column_definition& def, atomic_cell_or_collection value); std::experimental::optional get_cell(const clustering_key& rkey, const column_definition& def) const; const partition_key& key() const { return _dk._key; }; diff --git a/query-request.hh b/query-request.hh index d3067c8b25..54151f7cc5 100644 --- a/query-request.hh +++ b/query-request.hh @@ -121,11 +121,11 @@ using clustering_range = range; class partition_slice { public: - enum class option { send_clustering_key, send_partition_key, send_timestamp_and_ttl }; + enum class option { send_clustering_key, send_partition_key, send_timestamp_and_expiry }; using option_set = enum_set>; + option::send_timestamp_and_expiry>>; public: std::vector row_ranges; std::vector static_columns; // TODO: consider using bitmap diff --git a/query-result-reader.hh b/query-result-reader.hh index 3a6917f55b..797b7d282c 100644 --- a/query-result-reader.hh +++ b/query-result-reader.hh @@ -14,18 +14,18 @@ namespace query { class result_atomic_cell_view { api::timestamp_type _timestamp; - ttl_opt _ttl; + expiry_opt _expiry; bytes_view _value; public: - result_atomic_cell_view(api::timestamp_type timestamp, ttl_opt ttl, bytes_view value) - : _timestamp(timestamp), _ttl(ttl), _value(value) { } + result_atomic_cell_view(api::timestamp_type timestamp, expiry_opt expiry, bytes_view value) + : _timestamp(timestamp), _expiry(expiry), _value(value) { } api::timestamp_type timestamp() const { return _timestamp; } - ttl_opt ttl() const { - return _ttl; + expiry_opt expiry() const { + return _expiry; } bytes_view value() const { @@ -55,16 +55,16 @@ public: return {}; } api::timestamp_type timestamp = api::missing_timestamp; - ttl_opt ttl_; - if (_slice.options.contains()) { + expiry_opt expiry_; + if (_slice.options.contains()) { timestamp = _in.read (); - auto ttl_rep = _in.read(); - if (ttl_rep != std::numeric_limits::max()) { - ttl_ = gc_clock::time_point(gc_clock::duration(ttl_rep)); + auto expiry_rep = _in.read(); + if (expiry_rep != std::numeric_limits::max()) { + expiry_ = gc_clock::time_point(gc_clock::duration(expiry_rep)); } } auto value = _in.read_view_to_blob(); - return {result_atomic_cell_view(timestamp, ttl_, value)}; + return {result_atomic_cell_view(timestamp, expiry_, value)}; } std::experimental::optional next_collection_cell() { auto present = _in.read(); diff --git a/query-result-writer.hh b/query-result-writer.hh index 54a3de4341..b9b5c32340 100644 --- a/query-result-writer.hh +++ b/query-result-writer.hh @@ -43,10 +43,10 @@ public: // FIXME: store this in a bitmap _w.write(true); assert(c.is_live()); - if (_slice.options.contains()) { + if (_slice.options.contains()) { _w.write(c.timestamp()); - if (c.ttl()) { - _w.write(c.ttl()->time_since_epoch().count()); + if (c.expiry()) { + _w.write(c.expiry()->time_since_epoch().count()); } else { _w.write(std::numeric_limits::max()); } diff --git a/query-result.hh b/query-result.hh index 9f8d46ffc8..eb4e4a5a17 100644 --- a/query-result.hh +++ b/query-result.hh @@ -46,7 +46,7 @@ namespace query { // client, because they are already specified in the query request, and not // queried for. The query results hold keys optionally. // -// Also, meta-data like cell timestamp and ttl is optional. It is only needed +// Also, meta-data like cell timestamp and expiry is optional. It is only needed // if the query has writetime() or ttl() functions in it, which it typically // won't have. // @@ -62,13 +62,13 @@ namespace query { // ::= // ::= + // ::= | -// ::= [ ] +// ::= [ ] // ::= // // ::= // ::= * // ::= -// ::= +// ::= // ::= // ::= // ::= diff --git a/sstables/partition.cc b/sstables/partition.cc index 041940920f..75d9cace93 100644 --- a/sstables/partition.cc +++ b/sstables/partition.cc @@ -163,7 +163,7 @@ public: throw malformed_sstable_exception("wrong number of clustering columns"); } - ttl_opt opt; + expiry_opt opt; if (ttl) { gc_clock::duration secs(expiration); auto tp = gc_clock::time_point(secs); @@ -172,7 +172,7 @@ public: return; } - opt = ttl_opt(tp); + opt = expiry_opt(tp); } else { opt = {}; } diff --git a/tests/perf/perf_mutation.cc b/tests/perf/perf_mutation.cc index 3afe596bcf..993ade9e2a 100644 --- a/tests/perf/perf_mutation.cc +++ b/tests/perf/perf_mutation.cc @@ -2,7 +2,7 @@ #include "perf.hh" static atomic_cell make_atomic_cell(bytes value) { - return atomic_cell::make_live(0, ttl_opt{}, value); + return atomic_cell::make_live(0, expiry_opt{}, value); }; int main(int argc, char* argv[]) { diff --git a/tests/urchin/mutation_test.cc b/tests/urchin/mutation_test.cc index bdd31dff9c..302d61ad93 100644 --- a/tests/urchin/mutation_test.cc +++ b/tests/urchin/mutation_test.cc @@ -15,7 +15,7 @@ static sstring some_keyspace("ks"); static sstring some_column_family("cf"); static atomic_cell make_atomic_cell(bytes value) { - return atomic_cell::make_live(0, ttl_opt{}, std::move(value)); + return atomic_cell::make_live(0, expiry_opt{}, std::move(value)); }; BOOST_AUTO_TEST_CASE(test_mutation_is_applied) { diff --git a/tests/urchin/serializer_test.cc b/tests/urchin/serializer_test.cc index b5a1e8bf36..6e50980169 100644 --- a/tests/urchin/serializer_test.cc +++ b/tests/urchin/serializer_test.cc @@ -140,7 +140,7 @@ static sstring some_keyspace("ks"); static sstring some_column_family("cf"); static atomic_cell make_atomic_cell(bytes value) { - return atomic_cell::make_live(0, ttl_opt{}, std::move(value)); + return atomic_cell::make_live(0, expiry_opt{}, std::move(value)); } SEASTAR_TEST_CASE(test_mutation){ diff --git a/tests/urchin/sstable_test.cc b/tests/urchin/sstable_test.cc index a73813d24c..aac970ee35 100644 --- a/tests/urchin/sstable_test.cc +++ b/tests/urchin/sstable_test.cc @@ -296,7 +296,7 @@ static sstring some_keyspace("ks"); static sstring some_column_family("cf"); static atomic_cell make_atomic_cell(bytes value) { - return atomic_cell::make_live(0, ttl_opt{}, std::move(value)); + return atomic_cell::make_live(0, expiry_opt{}, std::move(value)); }; SEASTAR_TEST_CASE(datafile_generation_01) { diff --git a/thrift/handler.cc b/thrift/handler.cc index fc3c798e00..05ba53f370 100644 --- a/thrift/handler.cc +++ b/thrift/handler.cc @@ -275,9 +275,9 @@ public: if (ttl.count() <= 0) { ttl = cf.schema()->default_time_to_live(); } - auto ttl_option = ttl.count() > 0 ? ttl_opt(gc_clock::now() + ttl) : ttl_opt(); + auto expiry = ttl.count() > 0 ? expiry_opt(gc_clock::now() + ttl) : expiry_opt(); m_to_apply.set_clustered_cell(empty_clustering_key, *def, - atomic_cell::make_live(col.timestamp, ttl_option, to_bytes(col.value))); + atomic_cell::make_live(col.timestamp, expiry, to_bytes(col.value))); } else if (cosc.__isset.super_column) { // FIXME: implement } else if (cosc.__isset.counter_column) { From f43836eb687542cbdf9d24ac162a80dd1f53a335 Mon Sep 17 00:00:00 2001 From: Tomasz Grabiec Date: Wed, 6 May 2015 18:31:21 +0200 Subject: [PATCH 3/5] db: Handle expired cells in compare_atomic_cell_for_merge() While at it, clarify some comments. --- database.cc | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/database.cc b/database.cc index 40f49fba1e..acbf4a4f16 100644 --- a/database.cc +++ b/database.cc @@ -537,7 +537,10 @@ memtable::apply(const mutation& m) { p.apply(_schema, m.partition()); } -// Based on org.apache.cassandra.db.AbstractCell#reconcile() +// Based on: +// - org.apache.cassandra.db.AbstractCell#reconcile() +// - org.apache.cassandra.db.BufferExpiringCell#reconcile() +// - org.apache.cassandra.db.BufferDeletedCell#reconcile() int compare_atomic_cell_for_merge(atomic_cell_view left, atomic_cell_view right) { if (left.timestamp() != right.timestamp()) { @@ -547,15 +550,28 @@ compare_atomic_cell_for_merge(atomic_cell_view left, atomic_cell_view right) { return left.is_live() ? -1 : 1; } if (left.is_live()) { - return compare_unsigned(left.value(), right.value()); - } else { - if (*left.expiry() != *right.expiry()) { - // Origin compares big-endian serialized expiry time - return (uint32_t)left.expiry()->time_since_epoch().count() - < (uint32_t)right.expiry()->time_since_epoch().count() ? -1 : 1; + auto c = compare_unsigned(left.value(), right.value()); + if (c != 0) { + return c; + } + if (left.is_live_and_has_ttl() + && right.is_live_and_has_ttl() + && *left.expiry() != *right.expiry()) + { + return left.expiry() < right.expiry() ? -1 : 1; + } + } else { + // Both are deleted + if (*left.expiry() != *right.expiry()) { + // Origin compares big-endian serialized expiry time. That's because it + // delegates to AbstractCell.reconcile() which compares values after + // comparing timestamps, which in case of deleted cells will hold + // serialized expiry. + return (uint32_t) left.expiry()->time_since_epoch().count() + < (uint32_t) right.expiry()->time_since_epoch().count() ? -1 : 1; } - return 0; } + return 0; } void From 784f841b8bce63b728ea8ab809e22eb045a2baf7 Mon Sep 17 00:00:00 2001 From: Tomasz Grabiec Date: Wed, 6 May 2015 18:32:17 +0200 Subject: [PATCH 4/5] tests: Add test for cell ordering --- tests/urchin/mutation_test.cc | 77 +++++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) diff --git a/tests/urchin/mutation_test.cc b/tests/urchin/mutation_test.cc index 302d61ad93..7c08f62e7c 100644 --- a/tests/urchin/mutation_test.cc +++ b/tests/urchin/mutation_test.cc @@ -277,3 +277,80 @@ BOOST_AUTO_TEST_CASE(test_multiple_memtables_multiple_partitions) { BOOST_REQUIRE(shadow == result); } +BOOST_AUTO_TEST_CASE(test_cell_ordering) { + auto expiry_1 = gc_clock::now(); + auto expiry_2 = expiry_1 + gc_clock::duration(1); + + auto assert_order = [] (atomic_cell_view first, atomic_cell_view second) { + if (compare_atomic_cell_for_merge(first, second) >= 0) { + BOOST_FAIL(sprint("Expected %s < %s", first, second)); + } + if (compare_atomic_cell_for_merge(second, first) <= 0) { + BOOST_FAIL(sprint("Expected %s < %s", second, first)); + } + }; + + auto assert_equal = [] (atomic_cell_view c1, atomic_cell_view c2) { + BOOST_REQUIRE(compare_atomic_cell_for_merge(c1, c2) == 0); + }; + + assert_equal( + atomic_cell::make_live(0, expiry_opt{}, bytes("value")), + atomic_cell::make_live(0, expiry_opt{}, bytes("value"))); + + assert_equal( + atomic_cell::make_live(1, expiry_1, bytes("value")), + atomic_cell::make_live(1, expiry_1, bytes("value"))); + + assert_equal( + atomic_cell::make_dead(1, expiry_1), + atomic_cell::make_dead(1, expiry_1)); + + // If one cell doesn't have an expiry, Origin considers them equal. + assert_equal( + atomic_cell::make_live(1, expiry_2, bytes()), + atomic_cell::make_live(1, expiry_opt{}, bytes())); + + assert_order( + atomic_cell::make_live(0, expiry_opt{}, bytes("value1")), + atomic_cell::make_live(0, expiry_opt{}, bytes("value2"))); + + assert_order( + atomic_cell::make_live(0, expiry_opt{}, bytes("value12")), + atomic_cell::make_live(0, expiry_opt{}, bytes("value2"))); + + // Live cells are ordered first by timestamp... + assert_order( + atomic_cell::make_live(0, expiry_opt{}, bytes("value2")), + atomic_cell::make_live(1, expiry_opt{}, bytes("value1"))); + + // ..then by value + assert_order( + atomic_cell::make_live(1, expiry_2, bytes("value1")), + atomic_cell::make_live(1, expiry_1, bytes("value2"))); + + // ..then by expiry + assert_order( + atomic_cell::make_live(1, expiry_1, bytes()), + atomic_cell::make_live(1, expiry_2, bytes())); + + // Dead wins + assert_order( + atomic_cell::make_live(1, expiry_opt{}, bytes("value")), + atomic_cell::make_dead(1, expiry_1)); + + // Dead wins with expiring cell + assert_order( + atomic_cell::make_live(1, expiry_2, bytes("value")), + atomic_cell::make_dead(1, expiry_1)); + + // Deleted cells are ordered first by timestamp + assert_order( + atomic_cell::make_dead(1, expiry_2), + atomic_cell::make_dead(2, expiry_1)); + + // ...then by expiry + assert_order( + atomic_cell::make_dead(1, expiry_1), + atomic_cell::make_dead(1, expiry_2)); +} From b1e45e440140215576ed58d0e0ef28e2a510e09f Mon Sep 17 00:00:00 2001 From: Tomasz Grabiec Date: Wed, 6 May 2015 19:12:14 +0200 Subject: [PATCH 5/5] db: Store ttl in atomic_cell Origin does that, so should we. Both ttl and expiry time are stored in sstables. The value of ttl seems to be used to calculate the read digest (expiry is not used for that). The API for creating atomic_cells changed a bit. To create a non-expiring cell: atomic_cell::make_live(timestamp, value); To create an expiring cell: atomic_cell::make_live(timestamp, value, expiry, ttl); or: // Expiry is calculated based on current clock reading atomic_cell::make_live(timestamp, value, ttl_optional); --- atomic_cell.hh | 54 +++++++++++++++++++++++++++------ cql3/update_parameters.hh | 7 +++-- gc_clock.hh | 1 + mutation.cc | 8 ++--- mutation.hh | 4 +-- sstables/partition.cc | 25 ++++++--------- tests/perf/perf_mutation.cc | 2 +- tests/urchin/mutation_test.cc | 53 ++++++++++++++++++-------------- tests/urchin/serializer_test.cc | 2 +- tests/urchin/sstable_test.cc | 2 +- thrift/handler.cc | 7 +++-- 11 files changed, 105 insertions(+), 60 deletions(-) diff --git a/atomic_cell.hh b/atomic_cell.hh index 27ce65ca4e..97148a6e86 100644 --- a/atomic_cell.hh +++ b/atomic_cell.hh @@ -31,7 +31,7 @@ class atomic_cell_or_collection; * * Layout: * - * := ? + * := ()? * := */ class atomic_cell_type final { @@ -44,6 +44,8 @@ private: static constexpr unsigned timestamp_size = 8; static constexpr unsigned expiry_offset = timestamp_offset + timestamp_size; static constexpr unsigned expiry_size = 4; + static constexpr unsigned ttl_offset = expiry_offset + expiry_size; + static constexpr unsigned ttl_size = 4; private: static bool is_live(const bytes_view& cell) { return cell[0] != DEAD_FLAGS; @@ -60,7 +62,7 @@ private: } // Can be called on live cells only static bytes_view value(bytes_view cell) { - auto expiry_field_size = bool(cell[0] & EXPIRY_FLAG) * expiry_size; + auto expiry_field_size = bool(cell[0] & EXPIRY_FLAG) * (expiry_size + ttl_size); auto value_offset = flags_size + timestamp_size + expiry_field_size; cell.remove_prefix(value_offset); return cell; @@ -74,6 +76,11 @@ private: } return {}; } + // Can be called only when is_live_and_has_ttl() is true. + static gc_clock::duration ttl(const bytes_view& cell) { + assert(is_live_and_has_ttl(cell)); + return gc_clock::duration(get_field(cell, ttl_offset)); + } static bytes make_dead(api::timestamp_type timestamp, gc_clock::time_point expiry) { bytes b(bytes::initialized_later(), flags_size + timestamp_size + expiry_size); b[0] = DEAD_FLAGS; @@ -81,14 +88,21 @@ private: set_field(b, expiry_offset, expiry.time_since_epoch().count()); return b; } - static bytes make_live(api::timestamp_type timestamp, expiry_opt expiry, bytes_view value) { - auto value_offset = flags_size + timestamp_size + bool(expiry) * expiry_size; + static bytes make_live(api::timestamp_type timestamp, bytes_view value) { + auto value_offset = flags_size + timestamp_size; bytes b(bytes::initialized_later(), value_offset + value.size()); - b[0] = (expiry ? EXPIRY_FLAG : 0) | LIVE_FLAG; + b[0] = LIVE_FLAG; set_field(b, timestamp_offset, timestamp); - if (expiry) { - set_field(b, expiry_offset, expiry->time_since_epoch().count()); - } + std::copy_n(value.begin(), value.size(), b.begin() + value_offset); + return b; + } + static bytes make_live(api::timestamp_type timestamp, bytes_view value, gc_clock::time_point expiry, gc_clock::duration ttl) { + auto value_offset = flags_size + timestamp_size + expiry_size + ttl_size; + bytes b(bytes::initialized_later(), value_offset + value.size()); + b[0] = EXPIRY_FLAG | LIVE_FLAG; + set_field(b, timestamp_offset, timestamp); + set_field(b, expiry_offset, expiry.time_since_epoch().count()); + set_field(b, ttl_offset, ttl.count()); std::copy_n(value.begin(), value.size(), b.begin() + value_offset); return b; } @@ -126,6 +140,10 @@ public: expiry_opt expiry() const { return atomic_cell_type::expiry(_data); } + // Can be called only when is_live_and_has_ttl() + gc_clock::duration ttl() const { + return atomic_cell_type::ttl(_data); + } bytes_view serialize() const { return _data; } @@ -167,6 +185,10 @@ public: expiry_opt expiry() const { return atomic_cell_type::expiry(_data); } + // Can be called only when is_live_and_has_ttl() + gc_clock::duration ttl() const { + return atomic_cell_type::ttl(_data); + } bytes_view serialize() const { return _data; } @@ -176,8 +198,20 @@ public: static atomic_cell make_dead(api::timestamp_type timestamp, gc_clock::time_point expiry) { return atomic_cell_type::make_dead(timestamp, expiry); } - static atomic_cell make_live(api::timestamp_type timestamp, expiry_opt expiry, bytes_view value) { - return atomic_cell_type::make_live(timestamp, expiry, value); + static atomic_cell make_live(api::timestamp_type timestamp, bytes_view value) { + return atomic_cell_type::make_live(timestamp, value); + } + static atomic_cell make_live(api::timestamp_type timestamp, bytes_view value, + gc_clock::time_point expiry, gc_clock::duration ttl) + { + return atomic_cell_type::make_live(timestamp, value, expiry, ttl); + } + static atomic_cell make_live(api::timestamp_type timestamp, bytes_view value, ttl_opt ttl) { + if (!ttl) { + return atomic_cell_type::make_live(timestamp, value); + } else { + return atomic_cell_type::make_live(timestamp, value, gc_clock::now() + *ttl, *ttl); + } } friend class atomic_cell_or_collection; friend std::ostream& operator<<(std::ostream& os, const atomic_cell& ac); diff --git a/cql3/update_parameters.hh b/cql3/update_parameters.hh index 7ed0f81840..ea57e587a0 100644 --- a/cql3/update_parameters.hh +++ b/cql3/update_parameters.hh @@ -116,8 +116,11 @@ public: ttl = _schema->default_time_to_live(); } - return atomic_cell::make_live(_timestamp, - ttl.count() > 0 ? expiry_opt{_local_deletion_time + ttl} : expiry_opt{}, value); + if (ttl.count() > 0) { + return atomic_cell::make_live(_timestamp, value, _local_deletion_time + ttl, ttl); + } else { + return atomic_cell::make_live(_timestamp, value); + } }; #if 0 diff --git a/gc_clock.hh b/gc_clock.hh index c493930d9e..8d66d7b5fb 100644 --- a/gc_clock.hh +++ b/gc_clock.hh @@ -23,6 +23,7 @@ public: using expiry_opt = std::experimental::optional; +using ttl_opt = std::experimental::optional; // 20 years in seconds static constexpr gc_clock::duration max_ttl = gc_clock::duration{20 * 365 * 24 * 60 * 60}; diff --git a/mutation.cc b/mutation.cc index 6b80f13f82..0b9e1530a5 100644 --- a/mutation.cc +++ b/mutation.cc @@ -26,12 +26,12 @@ void mutation::set_clustered_cell(const exploded_clustering_prefix& prefix, cons } void mutation::set_clustered_cell(const clustering_key& key, const bytes& name, const boost::any& value, - api::timestamp_type timestamp, expiry_opt expiry) { + api::timestamp_type timestamp, ttl_opt ttl) { auto column_def = _schema->get_column_definition(name); if (!column_def) { throw std::runtime_error(sprint("no column definition found for '%s'", name)); } - return set_clustered_cell(key, *column_def, atomic_cell::make_live(timestamp, expiry, column_def->type->decompose(value))); + return set_clustered_cell(key, *column_def, atomic_cell::make_live(timestamp, column_def->type->decompose(value), ttl)); } void mutation::set_clustered_cell(const clustering_key& key, const column_definition& def, atomic_cell_or_collection value) { @@ -40,12 +40,12 @@ void mutation::set_clustered_cell(const clustering_key& key, const column_defini } void mutation::set_cell(const exploded_clustering_prefix& prefix, const bytes& name, const boost::any& value, - api::timestamp_type timestamp, expiry_opt expiry) { + api::timestamp_type timestamp, ttl_opt ttl) { auto column_def = _schema->get_column_definition(name); if (!column_def) { throw std::runtime_error(sprint("no column definition found for '%s'", name)); } - return set_cell(prefix, *column_def, atomic_cell::make_live(timestamp, expiry, column_def->type->decompose(value))); + return set_cell(prefix, *column_def, atomic_cell::make_live(timestamp, column_def->type->decompose(value), ttl)); } void mutation::set_cell(const exploded_clustering_prefix& prefix, const column_definition& def, atomic_cell_or_collection value) { diff --git a/mutation.hh b/mutation.hh index 73f7164b20..5ca985151e 100644 --- a/mutation.hh +++ b/mutation.hh @@ -23,9 +23,9 @@ public: mutation(const mutation&) = default; void set_static_cell(const column_definition& def, atomic_cell_or_collection value); void set_clustered_cell(const exploded_clustering_prefix& prefix, const column_definition& def, atomic_cell_or_collection value); - void set_clustered_cell(const clustering_key& key, const bytes& name, const boost::any& value, api::timestamp_type timestamp, expiry_opt expiry = {}); + void set_clustered_cell(const clustering_key& key, const bytes& name, const boost::any& value, api::timestamp_type timestamp, ttl_opt ttl = {}); void set_clustered_cell(const clustering_key& key, const column_definition& def, atomic_cell_or_collection value); - void set_cell(const exploded_clustering_prefix& prefix, const bytes& name, const boost::any& value, api::timestamp_type timestamp, expiry_opt expiry = {}); + void set_cell(const exploded_clustering_prefix& prefix, const bytes& name, const boost::any& value, api::timestamp_type timestamp, ttl_opt ttl = {}); void set_cell(const exploded_clustering_prefix& prefix, const column_definition& def, atomic_cell_or_collection value); std::experimental::optional get_cell(const clustering_key& rkey, const column_definition& def) const; const partition_key& key() const { return _dk._key; }; diff --git a/sstables/partition.cc b/sstables/partition.cc index 75d9cace93..345c3b8056 100644 --- a/sstables/partition.cc +++ b/sstables/partition.cc @@ -145,6 +145,15 @@ public: } } + atomic_cell make_atomic_cell(uint64_t timestamp, bytes_view value, uint32_t ttl, uint32_t expiration) { + if (ttl) { + return atomic_cell::make_live(timestamp, value, + gc_clock::time_point(gc_clock::duration(expiration)), gc_clock::duration(ttl)); + } else { + return atomic_cell::make_live(timestamp, value); + } + } + virtual void consume_cell(bytes_view col_name, bytes_view value, uint64_t timestamp, uint32_t ttl, uint32_t expiration) override { static bytes cql_row_marker(3, bytes::value_type(0x0)); @@ -163,21 +172,7 @@ public: throw malformed_sstable_exception("wrong number of clustering columns"); } - expiry_opt opt; - if (ttl) { - gc_clock::duration secs(expiration); - auto tp = gc_clock::time_point(secs); - if (tp < gc_clock::now()) { - consume_deleted_cell(col, timestamp, tp); - return; - } - - opt = expiry_opt(tp); - } else { - opt = {}; - } - - auto ac = atomic_cell::make_live(timestamp, opt, value); + auto ac = make_atomic_cell(timestamp, value, ttl, expiration); if (col.is_static) { mut->set_static_cell(*(col.cdef), ac); diff --git a/tests/perf/perf_mutation.cc b/tests/perf/perf_mutation.cc index 993ade9e2a..7e49a0033d 100644 --- a/tests/perf/perf_mutation.cc +++ b/tests/perf/perf_mutation.cc @@ -2,7 +2,7 @@ #include "perf.hh" static atomic_cell make_atomic_cell(bytes value) { - return atomic_cell::make_live(0, expiry_opt{}, value); + return atomic_cell::make_live(0, value); }; int main(int argc, char* argv[]) { diff --git a/tests/urchin/mutation_test.cc b/tests/urchin/mutation_test.cc index 7c08f62e7c..bab9128100 100644 --- a/tests/urchin/mutation_test.cc +++ b/tests/urchin/mutation_test.cc @@ -15,7 +15,7 @@ static sstring some_keyspace("ks"); static sstring some_column_family("cf"); static atomic_cell make_atomic_cell(bytes value) { - return atomic_cell::make_live(0, expiry_opt{}, std::move(value)); + return atomic_cell::make_live(0, std::move(value)); }; BOOST_AUTO_TEST_CASE(test_mutation_is_applied) { @@ -248,7 +248,7 @@ BOOST_AUTO_TEST_CASE(test_multiple_memtables_multiple_partitions) { auto key = partition_key::from_exploded(*s, {int32_type->decompose(p1)}); auto c_key = clustering_key::from_exploded(*s, {int32_type->decompose(c1)}); mutation m(key, s); - m.set_clustered_cell(c_key, r1_col, atomic_cell::make_live(ts++, std::experimental::nullopt, int32_type->decompose(r1))); + m.set_clustered_cell(c_key, r1_col, atomic_cell::make_live(ts++, int32_type->decompose(r1))); cf.apply(std::move(m)); shadow[p1][c1] = r1; }; @@ -278,8 +278,11 @@ BOOST_AUTO_TEST_CASE(test_multiple_memtables_multiple_partitions) { } BOOST_AUTO_TEST_CASE(test_cell_ordering) { - auto expiry_1 = gc_clock::now(); - auto expiry_2 = expiry_1 + gc_clock::duration(1); + auto now = gc_clock::now(); + auto ttl_1 = gc_clock::duration(1); + auto ttl_2 = gc_clock::duration(2); + auto expiry_1 = now + ttl_1; + auto expiry_2 = now + ttl_2; auto assert_order = [] (atomic_cell_view first, atomic_cell_view second) { if (compare_atomic_cell_for_merge(first, second) >= 0) { @@ -292,15 +295,16 @@ BOOST_AUTO_TEST_CASE(test_cell_ordering) { auto assert_equal = [] (atomic_cell_view c1, atomic_cell_view c2) { BOOST_REQUIRE(compare_atomic_cell_for_merge(c1, c2) == 0); + BOOST_REQUIRE(compare_atomic_cell_for_merge(c2, c1) == 0); }; assert_equal( - atomic_cell::make_live(0, expiry_opt{}, bytes("value")), - atomic_cell::make_live(0, expiry_opt{}, bytes("value"))); + atomic_cell::make_live(0, bytes("value")), + atomic_cell::make_live(0, bytes("value"))); assert_equal( - atomic_cell::make_live(1, expiry_1, bytes("value")), - atomic_cell::make_live(1, expiry_1, bytes("value"))); + atomic_cell::make_live(1, bytes("value"), expiry_1, ttl_1), + atomic_cell::make_live(1, bytes("value"))); assert_equal( atomic_cell::make_dead(1, expiry_1), @@ -308,40 +312,45 @@ BOOST_AUTO_TEST_CASE(test_cell_ordering) { // If one cell doesn't have an expiry, Origin considers them equal. assert_equal( - atomic_cell::make_live(1, expiry_2, bytes()), - atomic_cell::make_live(1, expiry_opt{}, bytes())); + atomic_cell::make_live(1, bytes(), expiry_2, ttl_2), + atomic_cell::make_live(1, bytes())); + + // Origin doesn't compare ttl (is it wise?) + assert_equal( + atomic_cell::make_live(1, bytes("value"), expiry_1, ttl_1), + atomic_cell::make_live(1, bytes("value"), expiry_1, ttl_2)); assert_order( - atomic_cell::make_live(0, expiry_opt{}, bytes("value1")), - atomic_cell::make_live(0, expiry_opt{}, bytes("value2"))); + atomic_cell::make_live(0, bytes("value1")), + atomic_cell::make_live(0, bytes("value2"))); assert_order( - atomic_cell::make_live(0, expiry_opt{}, bytes("value12")), - atomic_cell::make_live(0, expiry_opt{}, bytes("value2"))); + atomic_cell::make_live(0, bytes("value12")), + atomic_cell::make_live(0, bytes("value2"))); // Live cells are ordered first by timestamp... assert_order( - atomic_cell::make_live(0, expiry_opt{}, bytes("value2")), - atomic_cell::make_live(1, expiry_opt{}, bytes("value1"))); + atomic_cell::make_live(0, bytes("value2")), + atomic_cell::make_live(1, bytes("value1"))); // ..then by value assert_order( - atomic_cell::make_live(1, expiry_2, bytes("value1")), - atomic_cell::make_live(1, expiry_1, bytes("value2"))); + atomic_cell::make_live(1, bytes("value1"), expiry_2, ttl_2), + atomic_cell::make_live(1, bytes("value2"), expiry_1, ttl_1)); // ..then by expiry assert_order( - atomic_cell::make_live(1, expiry_1, bytes()), - atomic_cell::make_live(1, expiry_2, bytes())); + atomic_cell::make_live(1, bytes(), expiry_1, ttl_1), + atomic_cell::make_live(1, bytes(), expiry_2, ttl_1)); // Dead wins assert_order( - atomic_cell::make_live(1, expiry_opt{}, bytes("value")), + atomic_cell::make_live(1, bytes("value")), atomic_cell::make_dead(1, expiry_1)); // Dead wins with expiring cell assert_order( - atomic_cell::make_live(1, expiry_2, bytes("value")), + atomic_cell::make_live(1, bytes("value"), expiry_2, ttl_2), atomic_cell::make_dead(1, expiry_1)); // Deleted cells are ordered first by timestamp diff --git a/tests/urchin/serializer_test.cc b/tests/urchin/serializer_test.cc index 6e50980169..96311a0722 100644 --- a/tests/urchin/serializer_test.cc +++ b/tests/urchin/serializer_test.cc @@ -140,7 +140,7 @@ static sstring some_keyspace("ks"); static sstring some_column_family("cf"); static atomic_cell make_atomic_cell(bytes value) { - return atomic_cell::make_live(0, expiry_opt{}, std::move(value)); + return atomic_cell::make_live(0, std::move(value)); } SEASTAR_TEST_CASE(test_mutation){ diff --git a/tests/urchin/sstable_test.cc b/tests/urchin/sstable_test.cc index aac970ee35..237fa171b3 100644 --- a/tests/urchin/sstable_test.cc +++ b/tests/urchin/sstable_test.cc @@ -296,7 +296,7 @@ static sstring some_keyspace("ks"); static sstring some_column_family("cf"); static atomic_cell make_atomic_cell(bytes value) { - return atomic_cell::make_live(0, expiry_opt{}, std::move(value)); + return atomic_cell::make_live(0, std::move(value)); }; SEASTAR_TEST_CASE(datafile_generation_01) { diff --git a/thrift/handler.cc b/thrift/handler.cc index 05ba53f370..7f9166c8aa 100644 --- a/thrift/handler.cc +++ b/thrift/handler.cc @@ -275,9 +275,12 @@ public: if (ttl.count() <= 0) { ttl = cf.schema()->default_time_to_live(); } - auto expiry = ttl.count() > 0 ? expiry_opt(gc_clock::now() + ttl) : expiry_opt(); + ttl_opt maybe_ttl; + if (ttl.count() > 0) { + maybe_ttl = ttl; + } m_to_apply.set_clustered_cell(empty_clustering_key, *def, - atomic_cell::make_live(col.timestamp, expiry, to_bytes(col.value))); + atomic_cell::make_live(col.timestamp, to_bytes(col.value), maybe_ttl)); } else if (cosc.__isset.super_column) { // FIXME: implement } else if (cosc.__isset.counter_column) {