From 05590628937bebe203e35e83e97b4731e4511f40 Mon Sep 17 00:00:00 2001 From: Tomasz Grabiec Date: Tue, 28 Apr 2015 10:58:23 +0200 Subject: [PATCH 01/20] types: Introduce lexicographical_tri_compare() --- types.hh | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/types.hh b/types.hh index ae33e41061..03f6f7d2d3 100644 --- a/types.hh +++ b/types.hh @@ -84,6 +84,30 @@ int lexicographical_tri_compare(TypesIterator types_first, TypesIterator types_l return 0; } +// Trichotomic version of std::lexicographical_compare() +// +// Returns an integer which is less, equal or greater than zero when the first value +// is respectively smaller, equal or greater than the second value. +template +int lexicographical_tri_compare(InputIt1 first1, InputIt1 last1, + InputIt2 first2, InputIt2 last2, + Compare comp) { + while (first1 != last1 && first2 != last2) { + auto c = comp(*first1, *first2); + if (c) { + return c; + } + ++first1; + ++first2; + } + bool e1 = first1 == last1; + bool e2 = first2 == last2; + if (e1 != e2) { + return e2 ? 1 : -1; + } + return 0; +} + // A trichotomic comparator for prefix equality total ordering. // In this ordering, two sequences are equal iff any of them is a prefix // of the another. Otherwise, lexicographical ordering determines the order. From 08a17496a35a8dab57568cd4f13632706b153161 Mon Sep 17 00:00:00 2001 From: Tomasz Grabiec Date: Tue, 28 Apr 2015 18:56:50 +0200 Subject: [PATCH 02/20] keys: Move get_component() to compound_wrapper It's an operation which is valid for any compound, not only partition_key. --- keys.hh | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/keys.hh b/keys.hh index d874e4aadb..568e151390 100644 --- a/keys.hh +++ b/keys.hh @@ -119,6 +119,12 @@ public: auto end(const schema& s) const { return get_compound_type(s)->end(_bytes); } + + bytes_view get_component(const schema& s, size_t idx) const { + auto it = begin(s); + std::advance(it, idx); + return *it; + } }; template @@ -274,12 +280,6 @@ public: public: using compound = lw_shared_ptr>; - bytes_view get_component(const schema& s, size_t idx) const { - auto it = begin(s); - std::advance(it, idx); - return *it; - } - static partition_key from_bytes(bytes b) { return partition_key(std::move(b)); } From 8d2233fb33a09fbc225fbb64cc4d1b266d67cd65 Mon Sep 17 00:00:00 2001 From: Tomasz Grabiec Date: Tue, 28 Apr 2015 10:01:27 +0200 Subject: [PATCH 03/20] compound: Add method for checking if type is compound or not Will be used by legacy comparator. --- compound.hh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/compound.hh b/compound.hh index 2f7c9b11e8..4d90146fec 100644 --- a/compound.hh +++ b/compound.hh @@ -58,6 +58,10 @@ public: return _types; } + bool is_singular() const { + return _types.size() == 1; + } + prefix_type as_prefix() { return prefix_type(_types); } From 6a9c49ee4790caa0db3551227fb78dcf7b7316b4 Mon Sep 17 00:00:00 2001 From: Tomasz Grabiec Date: Tue, 28 Apr 2015 16:49:56 +0200 Subject: [PATCH 04/20] compound: Implement postfix incrementation in the component iterator --- compound.hh | 5 +++++ tests/urchin/compound_test.cc | 12 ++++++++++++ 2 files changed, 17 insertions(+) diff --git a/compound.hh b/compound.hh index 4d90146fec..ec2cb0ba32 100644 --- a/compound.hh +++ b/compound.hh @@ -198,6 +198,11 @@ public: read_current(); return *this; } + iterator operator++(int) { + iterator i(*this); + ++(*this); + return i; + } const value_type& operator*() const { return _current; } bool operator!=(const iterator& i) const { return _v.begin() != i._v.begin(); } bool operator==(const iterator& i) const { return _v.begin() == i._v.begin(); } diff --git a/tests/urchin/compound_test.cc b/tests/urchin/compound_test.cc index 1f06cdea89..0c0487690f 100644 --- a/tests/urchin/compound_test.cc +++ b/tests/urchin/compound_test.cc @@ -146,3 +146,15 @@ BOOST_AUTO_TEST_CASE(test_conversion_methods_for_non_singular_compound) { do_test_conversion_methods_for_non_singular_compound(); do_test_conversion_methods_for_non_singular_compound(); } + +BOOST_AUTO_TEST_CASE(test_component_iterator_post_incrementation) { + compound_type t({bytes_type, bytes_type, bytes_type}); + + auto packed = t.serialize_value(to_bytes_vec({"el1", "el2", "el3"})); + auto i = t.begin(packed); + auto end = t.end(packed); + BOOST_REQUIRE_EQUAL(to_bytes("el1"), *i++); + BOOST_REQUIRE_EQUAL(to_bytes("el2"), *i++); + BOOST_REQUIRE_EQUAL(to_bytes("el3"), *i++); + BOOST_REQUIRE(i == end); +} From 6f536382b80b394faa619b673065d0d31e4b7011 Mon Sep 17 00:00:00 2001 From: Tomasz Grabiec Date: Tue, 28 Apr 2015 13:16:03 +0200 Subject: [PATCH 05/20] compound: Mark and document component iterator as InputIterator --- compound.hh | 2 +- keys.hh | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/compound.hh b/compound.hh index ec2cb0ba32..60ac33546e 100644 --- a/compound.hh +++ b/compound.hh @@ -153,7 +153,7 @@ public: bytes decompose_value(const value_type& values) { return ::serialize_value(*this, values); } - class iterator : public std::iterator { + class iterator : public std::iterator { private: ssize_t _types_left; bytes_view _v; diff --git a/keys.hh b/keys.hh index 568e151390..c59ec55814 100644 --- a/keys.hh +++ b/keys.hh @@ -111,6 +111,7 @@ public: } // begin() and end() return iterators over components of this compound. The iterator yields a bytes_view to the component. + // The iterators satisfy InputIterator concept. auto begin(const schema& s) const { return get_compound_type(s)->begin(_bytes); } From 7d45a472aa62498cfe760ebb6ea69b3133862ee7 Mon Sep 17 00:00:00 2001 From: Tomasz Grabiec Date: Wed, 29 Apr 2015 10:33:34 +0200 Subject: [PATCH 06/20] compound: Implement iterator::operator->() --- compound.hh | 1 + 1 file changed, 1 insertion(+) diff --git a/compound.hh b/compound.hh index 60ac33546e..71b01764e3 100644 --- a/compound.hh +++ b/compound.hh @@ -204,6 +204,7 @@ public: return i; } const value_type& operator*() const { return _current; } + const value_type* operator->() const { return &_current; } bool operator!=(const iterator& i) const { return _v.begin() != i._v.begin(); } bool operator==(const iterator& i) const { return _v.begin() == i._v.begin(); } }; From c78b2b1e0de06a4b3ede44d5be84724b6e83b950 Mon Sep 17 00:00:00 2001 From: Tomasz Grabiec Date: Wed, 29 Apr 2015 10:33:38 +0200 Subject: [PATCH 07/20] compound: Introduce components() method --- compound.hh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/compound.hh b/compound.hh index 71b01764e3..0833dbe530 100644 --- a/compound.hh +++ b/compound.hh @@ -214,6 +214,9 @@ public: iterator end(const bytes_view& v) const { return iterator(typename iterator::end_iterator_tag(), v); } + boost::iterator_range components(const bytes_view& v) const { + return { begin(v), end(v) }; + } auto iter_items(const bytes_view& v) { return boost::iterator_range(begin(v), end(v)); } From 89dca2eee45471430437d21daaa4e5c58e5ad6da Mon Sep 17 00:00:00 2001 From: Tomasz Grabiec Date: Wed, 29 Apr 2015 13:55:01 +0200 Subject: [PATCH 08/20] compound: Introduce legacy format adaptors --- compound_compat.hh | 175 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 175 insertions(+) create mode 100644 compound_compat.hh diff --git a/compound_compat.hh b/compound_compat.hh new file mode 100644 index 0000000000..94d187b9c7 --- /dev/null +++ b/compound_compat.hh @@ -0,0 +1,175 @@ +/* + * Copyright 2015 Cloudius Systems + */ + +#pragma once + +#include "compound.hh" + +// +// This header provides adaptors between the representation used by our compound_type<> +// and representation used by Origin. +// +// For single-component keys the legacy representation is equivalent +// to the only component's serialized form. For composite keys it the following +// (See org.apache.cassandra.db.marshal.CompositeType): +// +// ::= ( )+ +// ::= +// ::= +// ::= +// +// is component's value in serialized form. is always 0 for partition key. +// + +// Given a representation serialized using @CompoundType, provides a view on the +// representation of the same components as they would be serialized by Origin. +// +// The view is exposed in a form of a byte range. For example of use see to_legacy() function. +template +class legacy_compound_view { + static_assert(!CompoundType::is_prefixable, "Legacy view not defined for prefixes"); + CompoundType& _type; + bytes_view _packed; +public: + legacy_compound_view(CompoundType& c, bytes_view packed) + : _type(c) + , _packed(packed) + { } + + class iterator : public std::iterator { + bool _singular; + // Offset within virtual output space of a component. + // + // Offset: -2 -1 0 ... LEN-1 LEN + // Field: [ length MSB ] [ length LSB ] [ VALUE ] [ EOC ] + // + int32_t _offset; + typename CompoundType::iterator _i; + public: + struct end_tag {}; + + iterator(const legacy_compound_view& v) + : _singular(v._type.is_singular()) + , _offset(_singular ? 0 : -2) + , _i(v._type.begin(v._packed)) + { } + + iterator(const legacy_compound_view& v, end_tag) + : _offset(-2) + , _i(v._type.end(v._packed)) + { } + + value_type operator*() const { + int32_t component_size = _i->size(); + if (_offset == -2) { + return (component_size >> 8) & 0xff; + } else if (_offset == -1) { + return component_size & 0xff; + } else if (_offset < component_size) { + return (*_i)[_offset]; + } else { // _offset == component_size + return 0; // EOC field + } + } + + iterator& operator++() { + auto component_size = (int32_t) _i->size(); + if (_offset < component_size + // When _singular, we skip the EOC byte. + && (!_singular || _offset != (component_size - 1))) + { + ++_offset; + } else { + ++_i; + _offset = -2; + } + return *this; + } + + bool operator==(const iterator& other) const { + return _offset == other._offset && other._i == _i; + } + + bool operator!=(const iterator& other) const { + return !(*this == other); + } + }; + + // A trichotomic comparator defined on @CompoundType representations which + // orders them according to lexicographical ordering of their corresponding + // legacy representations. + // + // tri_comparator(t)(k1, k2) + // + // ...is equivalent to: + // + // compare_unsigned(to_legacy(t, k1), to_legacy(t, k2)) + // + // ...but more efficient. + // + struct tri_comparator { + const CompoundType& _type; + + tri_comparator(const CompoundType& type) + : _type(type) + { } + + tri_comparator(tri_comparator&& other) + : _type(other._type) + { } + + tri_comparator& operator=(tri_comparator&& other) { + this->~tri_comparator(); + new (this) tri_comparator(std::move(other)); + return *this; + } + + // @k1 and @k2 must be serialized using @type, which was passed to the constructor. + int operator()(bytes_view k1, bytes_view k2) const { + if (_type.is_singular()) { + return compare_unsigned(*_type.begin(k1), *_type.begin(k2)); + } + return lexicographical_tri_compare( + _type.begin(k1), _type.end(k1), + _type.begin(k2), _type.end(k2), + [] (const bytes_view& c1, const bytes_view& c2) -> int { + if (c1.size() != c2.size()) { + return c1.size() < c2.size() ? -1 : 1; + } + return memcmp(c1.begin(), c2.begin(), c1.size()); + }); + } + }; + + // Equivalent to std::distance(begin(), end()), but computes faster + size_t size() const { + if (_type.is_singular()) { + return _type.begin(_packed)->size(); + } + size_t s = 0; + for (auto&& component : _type.components(_packed)) { + s += 2 /* length field */ + component.size() + 1 /* EOC */; + } + return s; + } + + iterator begin() const { + return iterator(*this); + } + + iterator end() const { + return iterator(*this, typename iterator::end_tag()); + } +}; + +// Converts compound_type<> representation to legacy representation +// @packed is assumed to be serialized using supplied @type. +template +static inline +bytes to_legacy(CompoundType& type, bytes_view packed) { + legacy_compound_view lv(type, packed); + bytes legacy_form(bytes::initialized_later(), lv.size()); + std::copy(lv.begin(), lv.end(), legacy_form.begin()); + return legacy_form; +} From e7c282fdf21b194cc9192cff246a0933a6c5bfb4 Mon Sep 17 00:00:00 2001 From: Tomasz Grabiec Date: Wed, 29 Apr 2015 13:56:23 +0200 Subject: [PATCH 09/20] tests: Add tests for compound adaptors --- tests/urchin/compound_test.cc | 54 +++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/tests/urchin/compound_test.cc b/tests/urchin/compound_test.cc index 0c0487690f..49c1353d09 100644 --- a/tests/urchin/compound_test.cc +++ b/tests/urchin/compound_test.cc @@ -7,6 +7,7 @@ #include #include "compound.hh" +#include "compound_compat.hh" #include "tests/urchin/range_assert.hh" static std::vector to_bytes_vec(std::vector values) { @@ -158,3 +159,56 @@ BOOST_AUTO_TEST_CASE(test_component_iterator_post_incrementation) { BOOST_REQUIRE_EQUAL(to_bytes("el3"), *i++); BOOST_REQUIRE(i == end); } + +BOOST_AUTO_TEST_CASE(test_conversion_to_legacy_form) { + compound_type singular({bytes_type}); + + BOOST_REQUIRE_EQUAL(to_legacy(singular, singular.serialize_single(to_bytes("asd"))), bytes("asd")); + BOOST_REQUIRE_EQUAL(to_legacy(singular, singular.serialize_single(to_bytes(""))), bytes("")); + + compound_type two_components({bytes_type, bytes_type}); + + BOOST_REQUIRE_EQUAL(to_legacy(two_components, two_components.serialize_value(to_bytes_vec({"el1", "elem2"}))), + bytes({'\x00', '\x03', 'e', 'l', '1', '\x00', '\x00', '\x05', 'e', 'l', 'e', 'm', '2', '\x00'})); + + BOOST_REQUIRE_EQUAL(to_legacy(two_components, two_components.serialize_value(to_bytes_vec({"el1", ""}))), + bytes({'\x00', '\x03', 'e', 'l', '1', '\x00', '\x00', '\x00', '\x00'})); +} + +BOOST_AUTO_TEST_CASE(test_legacy_ordering_of_singular) { + compound_type t({bytes_type}); + + auto make = [&t] (sstring value) -> bytes { + return t.serialize_single(to_bytes(value)); + }; + + legacy_compound_view::tri_comparator cmp(t); + + BOOST_REQUIRE(cmp(make("A"), make("B")) < 0); + BOOST_REQUIRE(cmp(make("AA"), make("B")) < 0); + BOOST_REQUIRE(cmp(make("B"), make("AB")) > 0); + BOOST_REQUIRE(cmp(make("B"), make("A")) > 0); + BOOST_REQUIRE(cmp(make("A"), make("A")) == 0); +} + +BOOST_AUTO_TEST_CASE(test_legacy_ordering_of_composites) { + compound_type t({bytes_type, bytes_type}); + + auto make = [&t] (sstring v1, sstring v2) -> bytes { + return t.serialize_value(std::vector{to_bytes(v1), to_bytes(v2)}); + }; + + legacy_compound_view::tri_comparator cmp(t); + + BOOST_REQUIRE(cmp(make("A", "B"), make("A", "B")) == 0); + BOOST_REQUIRE(cmp(make("A", "B"), make("A", "C")) < 0); + BOOST_REQUIRE(cmp(make("A", "B"), make("B", "B")) < 0); + BOOST_REQUIRE(cmp(make("A", "C"), make("B", "B")) < 0); + BOOST_REQUIRE(cmp(make("B", "A"), make("A", "A")) > 0); + + BOOST_REQUIRE(cmp(make("AA", "B"), make("B", "B")) > 0); + BOOST_REQUIRE(cmp(make("A", "AA"), make("A", "A")) > 0); + + BOOST_REQUIRE(cmp(make("", "A"), make("A", "A")) < 0); + BOOST_REQUIRE(cmp(make("A", ""), make("A", "A")) < 0); +} From 82882779b6dabfc448c8edde4a7ddfa4915fbedb Mon Sep 17 00:00:00 2001 From: Tomasz Grabiec Date: Thu, 30 Apr 2015 10:50:15 +0200 Subject: [PATCH 10/20] keys: Expose compatibility layer with Origin in partition_key --- keys.hh | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/keys.hh b/keys.hh index c59ec55814..904d13afed 100644 --- a/keys.hh +++ b/keys.hh @@ -7,6 +7,7 @@ #include "schema.hh" #include "bytes.hh" #include "types.hh" +#include "compound_compat.hh" // // This header defines type system for primary key holders. @@ -276,10 +277,11 @@ public: }; class partition_key : public compound_wrapper { + using c_type = compound_type; public: partition_key(bytes&& b) : compound_wrapper(std::move(b)) {} public: - using compound = lw_shared_ptr>; + using compound = lw_shared_ptr; static partition_key from_bytes(bytes b) { return partition_key(std::move(b)); @@ -289,6 +291,23 @@ public: return s.partition_key_type(); } + // Returns key's representation which is compatible with Origin. + // The result is valid as long as the schema is live. + const legacy_compound_view legacy_form(const schema& s) const { + return { *get_compound_type(s), _bytes }; + } + + // A trichotomic comparator for ordering compatible with Origin. + int legacy_tri_compare(const schema& s, const partition_key& o) const { + auto cmp = legacy_compound_view::tri_comparator(*get_compound_type(s)); + return cmp(*this, o); + } + + // Checks if keys are equal in a way which is compatible with Origin. + bool legacy_equal(const schema& s, const partition_key& o) const { + return legacy_tri_compare(s, o) == 0; + } + friend std::ostream& operator<<(std::ostream& out, const partition_key& pk); }; From 71e580c9c640be3466fa2b4e63cbd361fcbabf07 Mon Sep 17 00:00:00 2001 From: Tomasz Grabiec Date: Thu, 30 Apr 2015 10:51:19 +0200 Subject: [PATCH 11/20] tests: keys: Add compatibility layer tests --- tests/urchin/keys_test.cc | 42 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/tests/urchin/keys_test.cc b/tests/urchin/keys_test.cc index 2dc706efbc..0282e42242 100644 --- a/tests/urchin/keys_test.cc +++ b/tests/urchin/keys_test.cc @@ -54,3 +54,45 @@ BOOST_AUTO_TEST_CASE(test_key_component_iterator) { BOOST_REQUIRE(i == end); } + +BOOST_AUTO_TEST_CASE(test_legacy_ordering_for_non_composite_key) { + schema s({}, "", "", {{"c1", bytes_type}}, {}, {}, {}, utf8_type); + + auto to_key = [&s] (sstring value) { + return partition_key::from_single_value(s, to_bytes(value)); + }; + + auto cmp = [&s] (const partition_key& k1, const partition_key& k2) { + return k1.legacy_tri_compare(s, k2); + }; + + BOOST_REQUIRE(cmp(to_key("A"), to_key("B")) < 0); + BOOST_REQUIRE(cmp(to_key("AA"), to_key("B")) < 0); + BOOST_REQUIRE(cmp(to_key("B"), to_key("AB")) > 0); + BOOST_REQUIRE(cmp(to_key("B"), to_key("A")) > 0); + BOOST_REQUIRE(cmp(to_key("A"), to_key("A")) == 0); +} + +BOOST_AUTO_TEST_CASE(test_legacy_ordering_for_composite_keys) { + schema s({}, "", "", {{"c1", bytes_type}, {"c2", bytes_type}}, {}, {}, {}, utf8_type); + + auto to_key = [&s] (sstring v1, sstring v2) { + return partition_key::from_exploded(s, std::vector{to_bytes(v1), to_bytes(v2)}); + }; + + auto cmp = [&s] (const partition_key& k1, const partition_key& k2) { + return k1.legacy_tri_compare(s, k2); + }; + + BOOST_REQUIRE(cmp(to_key("A", "B"), to_key("A", "B")) == 0); + BOOST_REQUIRE(cmp(to_key("A", "B"), to_key("A", "C")) < 0); + BOOST_REQUIRE(cmp(to_key("A", "B"), to_key("B", "B")) < 0); + BOOST_REQUIRE(cmp(to_key("A", "C"), to_key("B", "B")) < 0); + BOOST_REQUIRE(cmp(to_key("B", "A"), to_key("A", "A")) > 0); + + BOOST_REQUIRE(cmp(to_key("AA", "B"), to_key("B", "B")) > 0); + BOOST_REQUIRE(cmp(to_key("A", "AA"), to_key("A", "A")) > 0); + + BOOST_REQUIRE(cmp(to_key("", "A"), to_key("A", "A")) < 0); + BOOST_REQUIRE(cmp(to_key("A", ""), to_key("A", "A")) < 0); +} From ad05bb92d18811caa7f4ae635da7fbc3ff9c615c Mon Sep 17 00:00:00 2001 From: Tomasz Grabiec Date: Thu, 30 Apr 2015 10:31:52 +0200 Subject: [PATCH 12/20] compound: Add compound::is_prefixable --- compound.hh | 1 + 1 file changed, 1 insertion(+) diff --git a/compound.hh b/compound.hh index 0833dbe530..e7304d68d7 100644 --- a/compound.hh +++ b/compound.hh @@ -41,6 +41,7 @@ private: const bool _byte_order_equal; const bool _byte_order_comparable; public: + static constexpr bool is_prefixable = AllowPrefixes == allow_prefixes::yes; using prefix_type = compound_type; using value_type = std::vector; From 9197914697fcaecabfad9438d4c60e4382c645ec Mon Sep 17 00:00:00 2001 From: Tomasz Grabiec Date: Tue, 28 Apr 2015 19:09:45 +0200 Subject: [PATCH 13/20] utils: Fix indentation in murmur_hash.hh We don't indent namespaces. --- utils/murmur_hash.hh | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/utils/murmur_hash.hh b/utils/murmur_hash.hh index ecdc38283c..68b46d8baf 100644 --- a/utils/murmur_hash.hh +++ b/utils/murmur_hash.hh @@ -37,11 +37,12 @@ namespace utils { -namespace murmur_hash -{ - uint32_t hash32(bytes_view data, int32_t seed); - uint64_t hash2_64(bytes_view key, uint64_t seed); - void hash3_x64_128(bytes_view key, uint64_t seed, std::array &result); -}; +namespace murmur_hash { + +uint32_t hash32(bytes_view data, int32_t seed); +uint64_t hash2_64(bytes_view key, uint64_t seed); +void hash3_x64_128(bytes_view key, uint64_t seed, std::array& result); + +} // namespace murmur_hash } // namespace utils From 5fc149d454d32f3b8ff96c1398af06826343b31a Mon Sep 17 00:00:00 2001 From: Tomasz Grabiec Date: Wed, 29 Apr 2015 10:37:12 +0200 Subject: [PATCH 14/20] utils: murmur_hash: Add input iterator based hash3_x64_128() version --- utils/murmur_hash.cc | 16 ------- utils/murmur_hash.hh | 107 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 107 insertions(+), 16 deletions(-) diff --git a/utils/murmur_hash.cc b/utils/murmur_hash.cc index ec013aa2b5..aa6348c4f2 100644 --- a/utils/murmur_hash.cc +++ b/utils/murmur_hash.cc @@ -148,22 +148,6 @@ static uint64_t getblock(bytes_view key, uint32_t index) (((uint64_t) key[i_8 + 6] & 0xff) << 48) + (((uint64_t) key[i_8 + 7] & 0xff) << 56); } -static uint64_t rotl64(uint64_t v, uint32_t n) -{ - return ((v << n) | ((uint64_t)v >> (64 - n))); -} - -static uint64_t fmix(uint64_t k) -{ - k ^= (uint64_t)k >> 33; - k *= 0xff51afd7ed558ccdL; - k ^= (uint64_t)k >> 33; - k *= 0xc4ceb9fe1a85ec53L; - k ^= (uint64_t)k >> 33; - - return k; -} - void hash3_x64_128(bytes_view key, uint64_t seed, std::array &result) { uint32_t length = key.size(); diff --git a/utils/murmur_hash.hh b/utils/murmur_hash.hh index 68b46d8baf..01b78f9c18 100644 --- a/utils/murmur_hash.hh +++ b/utils/murmur_hash.hh @@ -41,6 +41,113 @@ namespace murmur_hash { uint32_t hash32(bytes_view data, int32_t seed); uint64_t hash2_64(bytes_view key, uint64_t seed); + +template +static inline +uint64_t read_block(InputIterator& in) { + typename std::iterator_traits::value_type tmp[8]; + for (int i = 0; i < 8; ++i) { + tmp[i] = *in; + ++in; + } + return ((uint64_t) tmp[0] & 0xff) + (((uint64_t) tmp[1] & 0xff) << 8) + + (((uint64_t) tmp[2] & 0xff) << 16) + (((uint64_t) tmp[3] & 0xff) << 24) + + (((uint64_t) tmp[4] & 0xff) << 32) + (((uint64_t) tmp[5] & 0xff) << 40) + + (((uint64_t) tmp[6] & 0xff) << 48) + (((uint64_t) tmp[7] & 0xff) << 56); +} + +static inline +uint64_t rotl64(uint64_t v, uint32_t n) { + return ((v << n) | ((uint64_t)v >> (64 - n))); +} + +static inline +uint64_t fmix(uint64_t k) { + k ^= (uint64_t)k >> 33; + k *= 0xff51afd7ed558ccdL; + k ^= (uint64_t)k >> 33; + k *= 0xc4ceb9fe1a85ec53L; + k ^= (uint64_t)k >> 33; + + return k; +} + +template +void hash3_x64_128(InputIterator in, uint32_t length, uint64_t seed, std::array& result) { + const uint32_t nblocks = length >> 4; // Process as 128-bit blocks. + + uint64_t h1 = seed; + uint64_t h2 = seed; + + uint64_t c1 = 0x87c37b91114253d5L; + uint64_t c2 = 0x4cf5ad432745937fL; + + //---------- + // body + + for(uint32_t i = 0; i < nblocks; i++) + { + uint64_t k1 = read_block(in); + uint64_t k2 = read_block(in); + + k1 *= c1; k1 = rotl64(k1,31); k1 *= c2; h1 ^= k1; + + h1 = rotl64(h1,27); h1 += h2; h1 = h1*5+0x52dce729; + + k2 *= c2; k2 = rotl64(k2,33); k2 *= c1; h2 ^= k2; + + h2 = rotl64(h2,31); h2 += h1; h2 = h2*5+0x38495ab5; + } + + //---------- + // tail + + uint64_t k1 = 0; + uint64_t k2 = 0; + + typename std::iterator_traits::value_type tmp[15]; + std::copy_n(in, length & 15, tmp); + + switch(length & 15) + { + case 15: k2 ^= ((uint64_t) tmp[14]) << 48; + case 14: k2 ^= ((uint64_t) tmp[13]) << 40; + case 13: k2 ^= ((uint64_t) tmp[12]) << 32; + case 12: k2 ^= ((uint64_t) tmp[11]) << 24; + case 11: k2 ^= ((uint64_t) tmp[10]) << 16; + case 10: k2 ^= ((uint64_t) tmp[9]) << 8; + case 9: k2 ^= ((uint64_t) tmp[8]) << 0; + k2 *= c2; k2 = rotl64(k2,33); k2 *= c1; h2 ^= k2; + case 8: k1 ^= ((uint64_t) tmp[7]) << 56; + case 7: k1 ^= ((uint64_t) tmp[6]) << 48; + case 6: k1 ^= ((uint64_t) tmp[5]) << 40; + case 5: k1 ^= ((uint64_t) tmp[4]) << 32; + case 4: k1 ^= ((uint64_t) tmp[3]) << 24; + case 3: k1 ^= ((uint64_t) tmp[2]) << 16; + case 2: k1 ^= ((uint64_t) tmp[1]) << 8; + case 1: k1 ^= ((uint64_t) tmp[0]); + k1 *= c1; k1 = rotl64(k1,31); k1 *= c2; h1 ^= k1; + }; + + //---------- + // finalization + + h1 ^= length; + h2 ^= length; + + h1 += h2; + h2 += h1; + + h1 = fmix(h1); + h2 = fmix(h2); + + h1 += h2; + h2 += h1; + + result[0] = h1; + result[1] = h2; +} + void hash3_x64_128(bytes_view key, uint64_t seed, std::array& result); } // namespace murmur_hash From ce78aef19a376278ca876db0539bb2ee18070976 Mon Sep 17 00:00:00 2001 From: Tomasz Grabiec Date: Wed, 29 Apr 2015 14:48:57 +0200 Subject: [PATCH 15/20] tests: Introduce murmur_hash_test.cc --- configure.py | 2 + test.py | 1 + tests/urchin/murmur_hash_test.cc | 106 +++++++++++++++++++++++++++++++ 3 files changed, 109 insertions(+) create mode 100644 tests/urchin/murmur_hash_test.cc diff --git a/configure.py b/configure.py index ebd84d7bb9..36f05efb2c 100755 --- a/configure.py +++ b/configure.py @@ -208,6 +208,7 @@ tests = [ tests += [ 'tests/urchin/bytes_ostream_test', 'tests/urchin/UUID_test', + 'tests/urchin/murmur_hash_test', ] apps = [ @@ -479,6 +480,7 @@ deps['tests/urchin/serializer_test'] += boost_test_lib deps['tests/urchin/bytes_ostream_test'] = ['tests/urchin/bytes_ostream_test.cc'] deps['tests/urchin/UUID_test'] = ['utils/UUID_gen.cc', 'tests/urchin/UUID_test.cc'] +deps['tests/urchin/murmur_hash_test'] = ['bytes.cc', 'utils/murmur_hash.cc', 'tests/urchin/murmur_hash_test.cc'] warnings = [ '-Wno-mismatched-tags', # clang-only diff --git a/test.py b/test.py index 2498ae9725..b5175388f1 100755 --- a/test.py +++ b/test.py @@ -42,6 +42,7 @@ boost_tests = [ 'cartesian_product_test', 'urchin/UUID_test', 'urchin/compound_test', + 'urchin/murmur_hash_test', ] other_tests = [ diff --git a/tests/urchin/murmur_hash_test.cc b/tests/urchin/murmur_hash_test.cc new file mode 100644 index 0000000000..8ce3f447fe --- /dev/null +++ b/tests/urchin/murmur_hash_test.cc @@ -0,0 +1,106 @@ +/* + * Copyright 2015 Cloudius Systems + */ + +#define BOOST_TEST_DYN_LINK +#define BOOST_TEST_MODULE core + +#include + +#include "utils/murmur_hash.hh" +#include "bytes.hh" +#include "core/print.hh" + +static const bytes full_sequence("012345678901234567890123456789012345678901234567890123456789"); + +static const uint64_t seed = 0xcafebabe; + +// Below are pre-calculated results of hashing consecutive prefixes of full_sequence using hash3_x64_128(), +// staring from an empty prefix. +std::array prefix_hashes[] = { + {13907055927958326333ULL, 10701141902926764871ULL}, + {16872847325129109440ULL, 5125572542408278394ULL}, + {11916219991241122015ULL, 747256650753853469ULL}, + {1492790099208671403ULL, 16635411534431524239ULL}, + {16764172998150925140ULL, 7440789969466348974ULL}, + {6846275695158209935ULL, 11251493995290334439ULL}, + {1075204625779168927ULL, 3453614304122336174ULL}, + {1404180555660983881ULL, 13684781009779545989ULL}, + {10185829608361057848ULL, 1102754042417891721ULL}, + {12850382803381855486ULL, 7404649381971707328ULL}, + {972515366528881960ULL, 4507841639019527002ULL}, + {9279316204399455969ULL, 9712180353841837616ULL}, + {16558181491899334208ULL, 17507114537353308311ULL}, + {12977947643557220239ULL, 8334539845739718010ULL}, + {3743840537387886281ULL, 15297576726012815871ULL}, + {10675210326497176757ULL, 11200838847539594424ULL}, + {16363715880225337291ULL, 2866762944263215884ULL}, + {1272769995400892137ULL, 1744366104172354624ULL}, + {17426490373034063702ULL, 12666853004117709655ULL}, + {10757142341798556363ULL, 3984810732374497004ULL}, + {4593020710048021108ULL, 14359610319437287264ULL}, + {18212086870806388719ULL, 7490375939640747191ULL}, + {11209001888824275013ULL, 6491913312740217486ULL}, + {17601044365330203914ULL, 1779402119744049378ULL}, + {3916812090790925532ULL, 17533572508631620015ULL}, + {10113761195332211536ULL, 4163484992388084181ULL}, + {4353425943622404193ULL, 1830165015196477722ULL}, + {3904126367597302219ULL, 7917741892387588561ULL}, + {7077450301176172141ULL, 8070185570157969067ULL}, + {6331768922468785771ULL, 9311778359071820659ULL}, + {7715740891587706229ULL, 16510772505395753023ULL}, + {4510384582422222090ULL, 9352450339278885986ULL}, + {6746132289648898302ULL, 15402380546251654069ULL}, + {1315904697672087497ULL, 2686857386486814319ULL}, + {16122226135709041149ULL, 1278536837434550412ULL}, + {6449104926034509627ULL, 8809488279970194649ULL}, + {9047965986959166273ULL, 14963749820458851455ULL}, + {18095596803119563681ULL, 2806499127062067052ULL}, + {545238237267145238ULL, 4583663570136224396ULL}, + {12335897404061220746ULL, 8643308333771385742ULL}, + {15016951849151361171ULL, 13012972687708005422ULL}, + {12896848725136832414ULL, 9881710852371170521ULL}, + {17900663530283054991ULL, 9606960248070178723ULL}, + {4513619521783122834ULL, 4823611535250518791ULL}, + {15572858348470724038ULL, 4882998878774456634ULL}, + {3464540909110937960ULL, 14591983318346304410ULL}, + {2951301498066556278ULL, 3029976006973164807ULL}, + {7848995488883197496ULL, 10621954303326018594ULL}, + {5702723040652442467ULL, 11325339470689059424ULL}, + {870698890980252409ULL, 8294946103885186165ULL}, + {423348447487367835ULL, 4067674294039261619ULL}, + {397951862030142664ULL, 17073640849499096681ULL}, + {9374556141781683538ULL, 10333311062251856416ULL}, + {1097707041202763764ULL, 2870200096551238743ULL}, + {11493051326088411054ULL, 12348796263566330575ULL}, + {15865059192259516415ULL, 4808544582161036476ULL}, + {2717981543414886593ULL, 5944564527643476706ULL}, + {887521262173735642ULL, 3558550013200985442ULL}, + {9496424291456600748ULL, 9845949835154361896ULL}, + {1589012859535948937ULL, 7402826160257180747ULL} +}; + +BOOST_AUTO_TEST_CASE(test_hash_output) { + auto assert_hashes_equal = [] (bytes_view data, std::array lhs, std::array rhs) { + if (lhs != rhs) { + BOOST_FAIL(sprint("Hashes differ for %s (got {0x%x, 0x%x} and {0x%x, 0x%x})", data, + lhs[0], lhs[1], rhs[0], rhs[1])); + } + }; + + for (int i = 0; i < full_sequence.size(); ++i) { + auto prefix = bytes_view(full_sequence.begin(), i); + auto&& expected = prefix_hashes[i]; + + { + std::array dst; + utils::murmur_hash::hash3_x64_128(prefix, seed, dst); + } + + // Test the iterator version + { + std::array dst; + utils::murmur_hash::hash3_x64_128(prefix.begin(), prefix.size(), seed, dst); + } + } +} From 46e72cbc64aaddff560f6ce36d37882ea299e642 Mon Sep 17 00:00:00 2001 From: Tomasz Grabiec Date: Wed, 29 Apr 2015 15:24:31 +0200 Subject: [PATCH 16/20] tests: Introduce perf_hash.cc, hashing benchmark Output on my laptop: $ build/release/tests/perf/perf_hash Timing fixed hash... 28671657.15 tps 28720930.45 tps 28622017.20 tps 28677088.01 tps 29223543.70 tps Timing iterator hash... 22023042.57 tps 21953352.04 tps 21393787.05 tps 21613837.10 tps 21563284.57 tps --- configure.py | 1 + tests/perf/perf_hash.cc | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+) create mode 100644 tests/perf/perf_hash.cc diff --git a/configure.py b/configure.py index 36f05efb2c..36d9a2ebdb 100755 --- a/configure.py +++ b/configure.py @@ -158,6 +158,7 @@ urchin_tests = [ 'tests/urchin/types_test', 'tests/urchin/keys_test', 'tests/perf/perf_mutation', + 'tests/perf/perf_hash', 'tests/perf/perf_cql_parser', 'tests/perf/perf_simple_query', 'tests/urchin/cql_query_test', diff --git a/tests/perf/perf_hash.cc b/tests/perf/perf_hash.cc new file mode 100644 index 0000000000..44faeed440 --- /dev/null +++ b/tests/perf/perf_hash.cc @@ -0,0 +1,35 @@ +/* + * Copyright 2015 Cloudius Systems + */ + +#include "utils/murmur_hash.hh" +#include "tests/perf/perf.hh" + +volatile uint64_t black_hole; + +int main(int argc, char* argv[]) { + const uint64_t seed = 0; + auto src = bytes("0123412308129301923019283056789012345"); + + uint64_t sink = 0; + + std::cout << "Timing fixed hash...\n"; + + time_it([&] { + std::array dst; + utils::murmur_hash::hash3_x64_128(src, seed, dst); + sink += dst[0]; + sink += dst[1]; + }); + + std::cout << "Timing iterator hash...\n"; + + time_it([&] { + std::array dst; + utils::murmur_hash::hash3_x64_128(src.begin(), src.size(), seed, dst); + sink += dst[0]; + sink += dst[1]; + }); + + black_hole = sink; +} From 51d26620caad343c154971d85237d94e7ce05796 Mon Sep 17 00:00:00 2001 From: Tomasz Grabiec Date: Wed, 29 Apr 2015 16:05:32 +0200 Subject: [PATCH 17/20] db: Remove comment above partitions map I think the types are explicit enough now. --- database.hh | 1 - 1 file changed, 1 deletion(-) diff --git a/database.hh b/database.hh index b2996afe46..ad237df9fe 100644 --- a/database.hh +++ b/database.hh @@ -64,7 +64,6 @@ struct column_family { row& find_or_create_row_slow(const partition_key& partition_key, const clustering_key& clustering_key); row* find_row(const dht::decorated_key& partition_key, const clustering_key& clustering_key); schema_ptr _schema; - // partition key -> partition std::map partitions; void apply(const mutation& m); // Returns at most "cmd.limit" rows From 359af7745cd5c96e22eb2be7619c85722796d2ef Mon Sep 17 00:00:00 2001 From: Tomasz Grabiec Date: Wed, 29 Apr 2015 20:30:24 +0200 Subject: [PATCH 18/20] dht: murmur3_partitioner: move get_token() implementation to .cc --- dht/murmur3_partitioner.cc | 10 ++++++++++ dht/murmur3_partitioner.hh | 8 ++------ 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/dht/murmur3_partitioner.cc b/dht/murmur3_partitioner.cc index e77c9b9b86..4ec86cbc34 100644 --- a/dht/murmur3_partitioner.cc +++ b/dht/murmur3_partitioner.cc @@ -31,6 +31,16 @@ murmur3_partitioner::get_token(bytes_view key) { return token{token::kind::key, std::move(b)}; } +token +murmur3_partitioner::get_token(const sstables::key_view& key) { + return get_token(bytes_view(key)); +} + +token +murmur3_partitioner::get_token(const partition_key& key) { + return get_token(bytes_view(key)); +} + inline long long_token(const token& t) { if (t._data.size() != sizeof(long)) { diff --git a/dht/murmur3_partitioner.hh b/dht/murmur3_partitioner.hh index e9fbab23d6..ab4d5da840 100644 --- a/dht/murmur3_partitioner.hh +++ b/dht/murmur3_partitioner.hh @@ -12,12 +12,8 @@ namespace dht { class murmur3_partitioner final : public i_partitioner { public: - virtual token get_token(const partition_key& key) override { - return get_token(bytes_view(key)); - } - virtual token get_token(const sstables::key_view& key) override { - return get_token(bytes_view(key)); - } + virtual token get_token(const partition_key& key); + virtual token get_token(const sstables::key_view& key); virtual bool preserves_order() override { return false; } virtual std::map describe_ownership(const std::vector& sorted_tokens); virtual data_type get_token_validator(); From aec740f8958cadb9c4f173cb227194ebc0834532 Mon Sep 17 00:00:00 2001 From: Tomasz Grabiec Date: Thu, 30 Apr 2015 10:53:30 +0200 Subject: [PATCH 19/20] db: Make decorated_key have ordering compatible with Origin --- database.cc | 7 ++--- database.hh | 2 +- db/legacy_schema_tables.cc | 13 ++++++---- db/legacy_schema_tables.hh | 3 ++- dht/i_partitioner.cc | 50 ++++++++++++++++++++++-------------- dht/i_partitioner.hh | 32 ++++++++++++++--------- dht/murmur3_partitioner.cc | 14 +++++++--- dht/murmur3_partitioner.hh | 3 ++- mutation.cc | 6 +++-- service/storage_proxy.cc | 20 ++++++++++----- tests/urchin/cql_test_env.cc | 2 +- thrift/handler.cc | 2 +- 12 files changed, 98 insertions(+), 56 deletions(-) diff --git a/database.cc b/database.cc index 0149bd82d1..fda46884de 100644 --- a/database.cc +++ b/database.cc @@ -25,6 +25,7 @@ thread_local logging::logger dblog("database"); column_family::column_family(schema_ptr schema) : _schema(std::move(schema)) + , partitions(dht::decorated_key::less_comparator(_schema)) { } // define in .cc, since sstable is forward-declared in .hh @@ -39,7 +40,7 @@ column_family::find_partition(const dht::decorated_key& key) { mutation_partition* column_family::find_partition_slow(const partition_key& key) { - return find_partition(dht::global_partitioner().decorate_key(key)); + return find_partition(dht::global_partitioner().decorate_key(*_schema, key)); } row* @@ -53,14 +54,14 @@ column_family::find_row(const dht::decorated_key& partition_key, const clusterin mutation_partition& column_family::find_or_create_partition_slow(const partition_key& key) { - return find_or_create_partition(dht::global_partitioner().decorate_key(key)); + return find_or_create_partition(dht::global_partitioner().decorate_key(*_schema, key)); } mutation_partition& column_family::find_or_create_partition(const dht::decorated_key& key) { // call lower_bound so we have a hint for the insert, just in case. auto i = partitions.lower_bound(key); - if (i == partitions.end() || key != i->first) { + if (i == partitions.end() || !key.equal(*_schema, i->first)) { i = partitions.emplace_hint(i, std::make_pair(std::move(key), mutation_partition(_schema))); } return i->second; diff --git a/database.hh b/database.hh index ad237df9fe..c5ac0acc1e 100644 --- a/database.hh +++ b/database.hh @@ -64,7 +64,7 @@ struct column_family { row& find_or_create_row_slow(const partition_key& partition_key, const clustering_key& clustering_key); row* find_row(const dht::decorated_key& partition_key, const clustering_key& clustering_key); schema_ptr _schema; - std::map partitions; + std::map partitions; void apply(const mutation& m); // Returns at most "cmd.limit" rows future> query(const query::read_command& cmd); diff --git a/db/legacy_schema_tables.cc b/db/legacy_schema_tables.cc index b552d2101f..3f2679b395 100644 --- a/db/legacy_schema_tables.cc +++ b/db/legacy_schema_tables.cc @@ -399,14 +399,16 @@ std::vector ALL { KEYSPACES, COLUMNFAMILIES, COLUMNS, TRIGGERS, USE future read_schema_for_keyspaces(service::storage_proxy& proxy, const sstring& schema_table_name, const std::set& keyspace_names) { + auto schema = proxy.get_db().local().find_schema(system_keyspace::NAME, schema_table_name); auto map = [&proxy, schema_table_name] (sstring keyspace_name) { return read_schema_partition_for_keyspace(proxy, schema_table_name, keyspace_name); }; - auto insert = [] (schema_result&& schema, auto&& schema_entity) { + auto insert = [] (schema_result&& result, auto&& schema_entity) { if (schema_entity.second) { - schema.insert(std::move(schema_entity)); + result.insert(std::move(schema_entity)); } - return std::move(schema); + return std::move(result); }; - return map_reduce(keyspace_names.begin(), keyspace_names.end(), map, schema_result(), insert); + return map_reduce(keyspace_names.begin(), keyspace_names.end(), map, + schema_result(dht::decorated_key::less_comparator(schema)), insert); } #if 0 @@ -420,7 +422,8 @@ std::vector ALL { KEYSPACES, COLUMNFAMILIES, COLUMNS, TRIGGERS, USE read_schema_partition_for_keyspace(service::storage_proxy& proxy, const sstring& schema_table_name, const sstring& keyspace_name) { auto schema = proxy.get_db().local().find_schema(system_keyspace::NAME, schema_table_name); - auto keyspace_key = dht::global_partitioner().decorate_key(partition_key::from_single_value(*schema, to_bytes(keyspace_name))); + auto keyspace_key = dht::global_partitioner().decorate_key(*schema, + partition_key::from_single_value(*schema, to_bytes(keyspace_name))); return read_schema_partition_for_keyspace(proxy, schema_table_name, keyspace_key); } diff --git a/db/legacy_schema_tables.hh b/db/legacy_schema_tables.hh index 91dec955f5..7962bdc3d5 100644 --- a/db/legacy_schema_tables.hh +++ b/db/legacy_schema_tables.hh @@ -39,7 +39,8 @@ class result_set; namespace db { namespace legacy_schema_tables { -using schema_result = std::map>>; +using schema_result = std::map>, + dht::decorated_key::less_comparator>; static constexpr auto KEYSPACES = "schema_keyspaces"; static constexpr auto COLUMNFAMILIES = "schema_columnfamilies"; diff --git a/dht/i_partitioner.cc b/dht/i_partitioner.cc index 6a313ab145..db79b89c15 100644 --- a/dht/i_partitioner.cc +++ b/dht/i_partitioner.cc @@ -129,25 +129,6 @@ bool operator<(const token& t1, const token& t2) return false; } -bool operator<(const decorated_key& lht, const decorated_key& rht) { - if (lht._token == rht._token) { - return static_cast(lht._key) < rht._key; - } else { - return lht._token < rht._token; - } -} - -bool operator==(const decorated_key& lht, const decorated_key& rht) { - if (lht._token == rht._token) { - return static_cast(lht._key) == rht._key; - } - return false; -} - -bool operator!=(const decorated_key& lht, const decorated_key& rht) { - return !(lht == rht); -} - std::ostream& operator<<(std::ostream& out, const token& t) { auto flags = out.flags(); for (auto c : t._data) { @@ -171,4 +152,35 @@ global_partitioner() { return default_partitioner; } +bool +decorated_key::equal(const schema& s, const decorated_key& other) const { + if (_token == other._token) { + return _key.legacy_equal(s, other._key); + } + return false; +} + +int +decorated_key::tri_compare(const schema& s, const decorated_key& other) const { + if (_token == other._token) { + return _key.legacy_tri_compare(s, other._key); + } else { + return _token < other._token ? -1 : 1; + } +} + +bool +decorated_key::less_compare(const schema& s, const decorated_key& other) const { + return tri_compare(s, other) < 0; +} + +decorated_key::less_comparator::less_comparator(schema_ptr s) + : s(std::move(s)) +{ } + +bool +decorated_key::less_comparator::operator()(const decorated_key& lhs, const decorated_key& rhs) const { + return lhs.less_compare(*s, rhs); +} + } diff --git a/dht/i_partitioner.hh b/dht/i_partitioner.hh index a4c9dfba26..470bc01b38 100644 --- a/dht/i_partitioner.hh +++ b/dht/i_partitioner.hh @@ -68,11 +68,25 @@ bool operator==(const token& t1, const token& t2); bool operator<(const token& t1, const token& t2); std::ostream& operator<<(std::ostream& out, const token& t); - +// Wraps partition_key with its corresponding token. +// +// Total ordering defined by comparators is compatible with Origin's ordering. class decorated_key { public: token _token; partition_key _key; + + struct less_comparator { + schema_ptr s; + less_comparator(schema_ptr s); + bool operator()(const decorated_key& k1, const decorated_key& k2) const; + }; + + bool equal(const schema& s, const decorated_key& other) const; + + bool less_compare(const schema& s, const decorated_key& other) const; + + int tri_compare(const schema& s, const decorated_key& other) const; }; class i_partitioner { @@ -84,8 +98,8 @@ public: * @param key the raw, client-facing key * @return decorated version of key */ - decorated_key decorate_key(const partition_key& key) { - return { get_token(key), key }; + decorated_key decorate_key(const schema& s, const partition_key& key) { + return { get_token(s, key), key }; } /** @@ -94,8 +108,8 @@ public: * @param key the raw, client-facing key * @return decorated version of key */ - decorated_key decorate_key(partition_key&& key) { - auto token = get_token(key); + decorated_key decorate_key(const schema& s, partition_key&& key) { + auto token = get_token(s, key); return { std::move(token), std::move(key) }; } @@ -122,7 +136,7 @@ public: * (This is NOT a method to create a token from its string representation; * for that, use tokenFactory.fromString.) */ - virtual token get_token(const partition_key& key) = 0; + virtual token get_token(const schema& s, const partition_key& key) = 0; virtual token get_token(const sstables::key_view& key) = 0; /** @@ -164,12 +178,6 @@ protected: friend bool operator<(const token& t1, const token& t2); }; -bool operator<(const decorated_key& lht, const decorated_key& rht); - -bool operator==(const decorated_key& lht, const decorated_key& rht); - -bool operator!=(const decorated_key& lht, const decorated_key& rht); - std::ostream& operator<<(std::ostream& out, const token& t); std::ostream& operator<<(std::ostream& out, const decorated_key& t); diff --git a/dht/murmur3_partitioner.cc b/dht/murmur3_partitioner.cc index 4ec86cbc34..21e857c482 100644 --- a/dht/murmur3_partitioner.cc +++ b/dht/murmur3_partitioner.cc @@ -22,10 +22,15 @@ murmur3_partitioner::get_token(bytes_view key) { } std::array hash; utils::murmur_hash::hash3_x64_128(key, 0, hash); + return get_token(hash[0]); +} + +token +murmur3_partitioner::get_token(uint64_t value) const { // We don't normalize() the value, since token includes an is-before-everything // indicator. // FIXME: will this require a repair when importing a database? - auto t = net::hton(normalize(hash[0])); + auto t = net::hton(normalize(value)); bytes b(bytes::initialized_later(), 8); std::copy_n(reinterpret_cast(&t), 8, b.begin()); return token{token::kind::key, std::move(b)}; @@ -37,8 +42,11 @@ murmur3_partitioner::get_token(const sstables::key_view& key) { } token -murmur3_partitioner::get_token(const partition_key& key) { - return get_token(bytes_view(key)); +murmur3_partitioner::get_token(const schema& s, const partition_key& key) { + std::array hash; + auto&& legacy = key.legacy_form(s); + utils::murmur_hash::hash3_x64_128(legacy.begin(), legacy.size(), 0, hash); + return get_token(hash[0]); } inline long long_token(const token& t) { diff --git a/dht/murmur3_partitioner.hh b/dht/murmur3_partitioner.hh index ab4d5da840..96d4b1104f 100644 --- a/dht/murmur3_partitioner.hh +++ b/dht/murmur3_partitioner.hh @@ -12,7 +12,7 @@ namespace dht { class murmur3_partitioner final : public i_partitioner { public: - virtual token get_token(const partition_key& key); + virtual token get_token(const schema& s, const partition_key& key); virtual token get_token(const sstables::key_view& key); virtual bool preserves_order() override { return false; } virtual std::map describe_ownership(const std::vector& sorted_tokens); @@ -22,6 +22,7 @@ public: private: static int64_t normalize(int64_t in); token get_token(bytes_view key); + token get_token(uint64_t value) const; }; diff --git a/mutation.cc b/mutation.cc index 1624704038..04b64a2915 100644 --- a/mutation.cc +++ b/mutation.cc @@ -10,8 +10,10 @@ mutation::mutation(dht::decorated_key key, schema_ptr schema) , _p(_schema) { } -mutation::mutation(partition_key key_, schema_ptr schema_) - : mutation(dht::global_partitioner().decorate_key(std::move(key_)), std::move(schema_)) +mutation::mutation(partition_key key_, schema_ptr schema) + : _schema(std::move(schema)) + , _dk(dht::global_partitioner().decorate_key(*_schema, std::move(key_))) + , _p(_schema) { } void mutation::set_static_cell(const column_definition& def, atomic_cell_or_collection value) { diff --git a/service/storage_proxy.cc b/service/storage_proxy.cc index 9088ef6c84..d64315eea7 100644 --- a/service/storage_proxy.cc +++ b/service/storage_proxy.cc @@ -1188,13 +1188,19 @@ storage_proxy::query(lw_shared_ptr cmd, db::consistency_lev if (range.is_singular()) { auto& key = range.start_value(); - auto dk = dht::global_partitioner().decorate_key(key); - auto shard = _db.local().shard_of(dk._token); - return _db.invoke_on(shard, [cmd] (database& db) { - return db.query(*cmd).then([] (auto&& f) { - return make_foreign(std::move(f)); - }); - }).finally([cmd] {}); + // TODO: consider storing decorated key in the request + try { + auto schema = _db.local().find_schema(cmd->cf_id); + auto token = dht::global_partitioner().get_token(*schema, key); + auto shard = _db.local().shard_of(token); + return _db.invoke_on(shard, [cmd](database& db) { + return db.query(*cmd).then([](auto&& f) { + return make_foreign(std::move(f)); + }); + }).finally([cmd] { }); + } catch (const no_such_column_family&) { + return make_empty(); + } } // FIXME: Respect cmd->row_limit to avoid unnecessary transfer diff --git a/tests/urchin/cql_test_env.cc b/tests/urchin/cql_test_env.cc index 9af5116776..19639706f7 100644 --- a/tests/urchin/cql_test_env.cc +++ b/tests/urchin/cql_test_env.cc @@ -120,7 +120,7 @@ public: auto& cf = db.find_column_family(ks_name, table_name); auto schema = cf._schema; auto pkey = partition_key::from_deeply_exploded(*schema, pk); - auto dk = dht::global_partitioner().decorate_key(pkey); + auto dk = dht::global_partitioner().decorate_key(*schema, pkey); auto shard = db.shard_of(dk._token); return _db->invoke_on(shard, [pkey = std::move(pkey), ck = std::move(ck), diff --git a/thrift/handler.cc b/thrift/handler.cc index 0be540778f..a278113c6e 100644 --- a/thrift/handler.cc +++ b/thrift/handler.cc @@ -117,7 +117,7 @@ public: return complete_with_exception(std::move(exn_cob), "column family %s not found", column_parent.column_family); } auto pk = key_from_thrift(schema, to_bytes(key)); - auto dk = dht::global_partitioner().decorate_key(pk); + auto dk = dht::global_partitioner().decorate_key(*schema, pk); auto shard = _db.local().shard_of(dk._token); auto do_get = [this, From 72c327b02f7fa0d3d0c86175ffa0f89154479a7f Mon Sep 17 00:00:00 2001 From: Tomasz Grabiec Date: Thu, 30 Apr 2015 12:01:14 +0200 Subject: [PATCH 20/20] tests: Add partitioner test --- configure.py | 1 + tests/urchin/partitioner_test.cc | 45 ++++++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+) create mode 100644 tests/urchin/partitioner_test.cc diff --git a/configure.py b/configure.py index 36d9a2ebdb..5beb0c4d70 100755 --- a/configure.py +++ b/configure.py @@ -157,6 +157,7 @@ urchin_tests = [ 'tests/urchin/mutation_test', 'tests/urchin/types_test', 'tests/urchin/keys_test', + 'tests/urchin/partitioner_test', 'tests/perf/perf_mutation', 'tests/perf/perf_hash', 'tests/perf/perf_cql_parser', diff --git a/tests/urchin/partitioner_test.cc b/tests/urchin/partitioner_test.cc new file mode 100644 index 0000000000..d8a67ea975 --- /dev/null +++ b/tests/urchin/partitioner_test.cc @@ -0,0 +1,45 @@ +/* + * Copyright 2015 Cloudius Systems + */ + +#define BOOST_TEST_DYN_LINK +#define BOOST_TEST_MODULE core + +#include + +#include "dht/i_partitioner.hh" +#include "dht/murmur3_partitioner.hh" +#include "schema.hh" +#include "types.hh" + +static dht::token token_from_long(uint64_t value) { + auto t = net::hton(value); + bytes b(bytes::initialized_later(), 8); + std::copy_n(reinterpret_cast(&t), 8, b.begin()); + return { dht::token::kind::key, std::move(b) }; +} + +BOOST_AUTO_TEST_CASE(test_decorated_key_is_compatible_with_origin) { + schema s({}, "", "", + // partition key + {{"c1", int32_type}, {"c2", int32_type}}, + // clustering key + {}, + // regular columns + { + {"v", int32_type}, + }, + // static columns + {}, + // regular column name type + utf8_type + ); + + dht::murmur3_partitioner partitioner; + auto key = partition_key::from_deeply_exploded(s, {143, 234}); + auto dk = partitioner.decorate_key(s, key); + + // Expected value was taken from Origin + BOOST_REQUIRE_EQUAL(dk._token, token_from_long(4958784316840156970)); + BOOST_REQUIRE(dk._key.equal(s, key)); +}