From e1968c389eefe61dd734f094adca15be2bfa6dc7 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 11 Aug 2015 23:34:52 -0500 Subject: [PATCH] dht: use tri_compare for token comparisons Loading data from memory tends to be the most expensive part of the comparison operations. Because we don't have a tri_compare function for tokens, we end up having to do an equality test, which will load the token's data in memory, and then, because all we know is that they are not equal, we need to do another one. Having two dereferences is harmful, and shows up in my simple benchmark. This is because before writing to sstables, we must order the keys in decorated key order, which is heavy on the comparisons. The proposed change speeds up index write benchmark by 8.6%: Before: 41458.14 +- 1.49 partitions / sec (30 runs) After: 45020.81 +- 3.60 partitions / sec (30 runs) Parameters: --smp 6 --partitions 500000 Signed-off-by: Glauber Costa --- dht/byte_ordered_partitioner.hh | 7 ++--- dht/i_partitioner.cc | 48 +++++++++++++++------------------ dht/i_partitioner.hh | 14 ++++++++-- dht/murmur3_partitioner.cc | 21 ++++++--------- dht/murmur3_partitioner.hh | 3 +-- 5 files changed, 44 insertions(+), 49 deletions(-) diff --git a/dht/byte_ordered_partitioner.hh b/dht/byte_ordered_partitioner.hh index 06d223e87e..62cd10c1fa 100644 --- a/dht/byte_ordered_partitioner.hh +++ b/dht/byte_ordered_partitioner.hh @@ -29,11 +29,8 @@ public: virtual bool preserves_order() override { return true; } virtual std::map describe_ownership(const std::vector& sorted_tokens) override; virtual data_type get_token_validator() override { return bytes_type; } - virtual bool is_equal(const token& t1, const token& t2) override { - return compare_unsigned(t1._data, t2._data) == 0; - } - virtual bool is_less(const token& t1, const token& t2) override { - return compare_unsigned(t1._data, t2._data) < 0; + virtual int tri_compare(const token& t1, const token& t2) override { + return compare_unsigned(t1._data, t2._data); } virtual token midpoint(const token& t1, const token& t2) const; virtual sstring to_sstring(const dht::token& t) const override { diff --git a/dht/i_partitioner.cc b/dht/i_partitioner.cc index b35f2ba611..d20b265695 100644 --- a/dht/i_partitioner.cc +++ b/dht/i_partitioner.cc @@ -94,35 +94,28 @@ static inline unsigned char get_byte(bytes_view b, size_t off) { } } -bool i_partitioner::is_equal(const token& t1, const token& t2) { - - size_t sz = std::max(t1._data.size(), t2._data.size()); - - for (size_t i = 0; i < sz; i++) { - auto b1 = get_byte(t1._data, i); - auto b2 = get_byte(t2._data, i); - if (b1 != b2) { - return false; - } - } - return true; - -} - -bool i_partitioner::is_less(const token& t1, const token& t2) { - +int i_partitioner::tri_compare(const token& t1, const token& t2) { size_t sz = std::max(t1._data.size(), t2._data.size()); for (size_t i = 0; i < sz; i++) { auto b1 = get_byte(t1._data, i); auto b2 = get_byte(t2._data, i); if (b1 < b2) { - return true; + return -1; } else if (b1 > b2) { - return false; + return 1; } } - return false; + return 0; +} + +int tri_compare(const token& t1, const token& t2) { + if (t1._kind == t2._kind) { + return global_partitioner().tri_compare(t1, t2); + } else if (t1._kind < t2._kind) { + return -1; + } + return 1; } bool operator==(const token& t1, const token& t2) @@ -188,19 +181,20 @@ decorated_key::equal(const schema& s, const decorated_key& other) const { int decorated_key::tri_compare(const schema& s, const decorated_key& other) const { - if (_token == other._token) { - return _key.legacy_tri_compare(s, other._key); + auto r = dht::tri_compare(_token, other._token); + if (r != 0) { + return r; } else { - return _token < other._token ? -1 : 1; + return _key.legacy_tri_compare(s, other._key); } } int decorated_key::tri_compare(const schema& s, const ring_position& other) const { - if (_token != other.token()) { - return _token < other.token() ? -1 : 1; - } - if (other.has_key()) { + auto r = dht::tri_compare(_token, other.token()); + if (r != 0) { + return r; + } else if (other.has_key()) { return _key.legacy_tri_compare(s, *other.key()); } return -other.relation_to_keys(); diff --git a/dht/i_partitioner.hh b/dht/i_partitioner.hh index a5256fa8e7..fa8fc1edd6 100644 --- a/dht/i_partitioner.hh +++ b/dht/i_partitioner.hh @@ -92,6 +92,7 @@ token minimum_token(); token maximum_token(); bool operator==(const token& t1, const token& t2); bool operator<(const token& t1, const token& t2); +int tri_compare(const token& t1, const token& t2); inline bool operator!=(const token& t1, const token& t2) { return std::rel_ops::operator!=(t1, t2); } inline bool operator>(const token& t1, const token& t2) { return std::rel_ops::operator>(t1, t2); } inline bool operator<=(const token& t1, const token& t2) { return std::rel_ops::operator<=(t1, t2); } @@ -231,17 +232,26 @@ public: */ virtual unsigned shard_of(const token& t) const = 0; protected: + /** + * @return < 0 if if t1's _data array is less, t2's. 0 if they are equal, and > 0 otherwise. _kind comparison should be done separately. + */ + virtual int tri_compare(const token& t1, const token& t2); /** * @return true if t1's _data array is equal t2's. _kind comparison should be done separately. */ - virtual bool is_equal(const token& t1, const token& t2); + bool is_equal(const token& t1, const token& t2) { + return tri_compare(t1, t2) == 0; + } /** * @return true if t1's _data array is less then t2's. _kind comparison should be done separately. */ - virtual bool is_less(const token& t1, const token& t2); + bool is_less(const token& t1, const token& t2) { + return tri_compare(t1, t2) < 0; + } friend bool operator==(const token& t1, const token& t2); friend bool operator<(const token& t1, const token& t2); + friend int tri_compare(const token& t1, const token& t2); }; // diff --git a/dht/murmur3_partitioner.cc b/dht/murmur3_partitioner.cc index 8d21ee4ec3..49bbb0a337 100644 --- a/dht/murmur3_partitioner.cc +++ b/dht/murmur3_partitioner.cc @@ -71,20 +71,15 @@ sstring murmur3_partitioner::to_sstring(const token& t) const { return ::to_sstring(long_token(t)); } -bool murmur3_partitioner::is_equal(const token& t1, const token& t2) { +int murmur3_partitioner::tri_compare(const token& t1, const token& t2) { + long l1 = long_token(t1); + long l2 = long_token(t2); - auto l1 = long_token(t1); - auto l2 = long_token(t2); - - return l1 == l2; -} - -bool murmur3_partitioner::is_less(const token& t1, const token& t2) { - - auto l1 = long_token(t1); - auto l2 = long_token(t2); - - return l1 < l2; + if (l1 == l2) { + return 0; + } else { + return l1 < l2 ? -1 : 1; + } } token murmur3_partitioner::midpoint(const token& t1, const token& t2) const { diff --git a/dht/murmur3_partitioner.hh b/dht/murmur3_partitioner.hh index db640a8349..371bf5f911 100644 --- a/dht/murmur3_partitioner.hh +++ b/dht/murmur3_partitioner.hh @@ -18,8 +18,7 @@ public: virtual bool preserves_order() override { return false; } virtual std::map describe_ownership(const std::vector& sorted_tokens) override; virtual data_type get_token_validator() override; - virtual bool is_equal(const token& t1, const token& t2) override; - virtual bool is_less(const token& t1, const token& t2) override; + virtual int tri_compare(const token& t1, const token& t2) override; virtual token midpoint(const token& t1, const token& t2) const override; virtual sstring to_sstring(const dht::token& t) const override; virtual unsigned shard_of(const token& t) const override;