mirror of
https://github.com/scylladb/scylladb.git
synced 2026-04-23 18:10:39 +00:00
Merge branch 'tgrabiec/fix-decorated-key-order' of github.com:cloudius-systems/seastar-dev into db
Fix decorated_key ordering to match Origin's, from Tomasz.
This commit is contained in:
16
compound.hh
16
compound.hh
@@ -41,6 +41,7 @@ private:
|
||||
const bool _byte_order_equal;
|
||||
const bool _byte_order_comparable;
|
||||
public:
|
||||
static constexpr bool is_prefixable = AllowPrefixes == allow_prefixes::yes;
|
||||
using prefix_type = compound_type<allow_prefixes::yes>;
|
||||
using value_type = std::vector<bytes>;
|
||||
|
||||
@@ -58,6 +59,10 @@ public:
|
||||
return _types;
|
||||
}
|
||||
|
||||
bool is_singular() const {
|
||||
return _types.size() == 1;
|
||||
}
|
||||
|
||||
prefix_type as_prefix() {
|
||||
return prefix_type(_types);
|
||||
}
|
||||
@@ -149,7 +154,7 @@ public:
|
||||
bytes decompose_value(const value_type& values) {
|
||||
return ::serialize_value(*this, values);
|
||||
}
|
||||
class iterator : public std::iterator<std::forward_iterator_tag, bytes_view> {
|
||||
class iterator : public std::iterator<std::input_iterator_tag, bytes_view> {
|
||||
private:
|
||||
ssize_t _types_left;
|
||||
bytes_view _v;
|
||||
@@ -194,7 +199,13 @@ public:
|
||||
read_current();
|
||||
return *this;
|
||||
}
|
||||
iterator operator++(int) {
|
||||
iterator i(*this);
|
||||
++(*this);
|
||||
return i;
|
||||
}
|
||||
const value_type& operator*() const { return _current; }
|
||||
const value_type* operator->() const { return &_current; }
|
||||
bool operator!=(const iterator& i) const { return _v.begin() != i._v.begin(); }
|
||||
bool operator==(const iterator& i) const { return _v.begin() == i._v.begin(); }
|
||||
};
|
||||
@@ -204,6 +215,9 @@ public:
|
||||
iterator end(const bytes_view& v) const {
|
||||
return iterator(typename iterator::end_iterator_tag(), v);
|
||||
}
|
||||
boost::iterator_range<iterator> components(const bytes_view& v) const {
|
||||
return { begin(v), end(v) };
|
||||
}
|
||||
auto iter_items(const bytes_view& v) {
|
||||
return boost::iterator_range<iterator>(begin(v), end(v));
|
||||
}
|
||||
|
||||
175
compound_compat.hh
Normal file
175
compound_compat.hh
Normal file
@@ -0,0 +1,175 @@
|
||||
/*
|
||||
* Copyright 2015 Cloudius Systems
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "compound.hh"
|
||||
|
||||
//
|
||||
// This header provides adaptors between the representation used by our compound_type<>
|
||||
// and representation used by Origin.
|
||||
//
|
||||
// For single-component keys the legacy representation is equivalent
|
||||
// to the only component's serialized form. For composite keys it the following
|
||||
// (See org.apache.cassandra.db.marshal.CompositeType):
|
||||
//
|
||||
// <representation> ::= ( <component> )+
|
||||
// <component> ::= <length> <value> <EOC>
|
||||
// <length> ::= <uint16_t>
|
||||
// <EOC> ::= <uint8_t>
|
||||
//
|
||||
// <value> is component's value in serialized form. <EOC> is always 0 for partition key.
|
||||
//
|
||||
|
||||
// Given a representation serialized using @CompoundType, provides a view on the
|
||||
// representation of the same components as they would be serialized by Origin.
|
||||
//
|
||||
// The view is exposed in a form of a byte range. For example of use see to_legacy() function.
|
||||
template <typename CompoundType>
|
||||
class legacy_compound_view {
|
||||
static_assert(!CompoundType::is_prefixable, "Legacy view not defined for prefixes");
|
||||
CompoundType& _type;
|
||||
bytes_view _packed;
|
||||
public:
|
||||
legacy_compound_view(CompoundType& c, bytes_view packed)
|
||||
: _type(c)
|
||||
, _packed(packed)
|
||||
{ }
|
||||
|
||||
class iterator : public std::iterator<std::input_iterator_tag, bytes::value_type> {
|
||||
bool _singular;
|
||||
// Offset within virtual output space of a component.
|
||||
//
|
||||
// Offset: -2 -1 0 ... LEN-1 LEN
|
||||
// Field: [ length MSB ] [ length LSB ] [ VALUE ] [ EOC ]
|
||||
//
|
||||
int32_t _offset;
|
||||
typename CompoundType::iterator _i;
|
||||
public:
|
||||
struct end_tag {};
|
||||
|
||||
iterator(const legacy_compound_view& v)
|
||||
: _singular(v._type.is_singular())
|
||||
, _offset(_singular ? 0 : -2)
|
||||
, _i(v._type.begin(v._packed))
|
||||
{ }
|
||||
|
||||
iterator(const legacy_compound_view& v, end_tag)
|
||||
: _offset(-2)
|
||||
, _i(v._type.end(v._packed))
|
||||
{ }
|
||||
|
||||
value_type operator*() const {
|
||||
int32_t component_size = _i->size();
|
||||
if (_offset == -2) {
|
||||
return (component_size >> 8) & 0xff;
|
||||
} else if (_offset == -1) {
|
||||
return component_size & 0xff;
|
||||
} else if (_offset < component_size) {
|
||||
return (*_i)[_offset];
|
||||
} else { // _offset == component_size
|
||||
return 0; // EOC field
|
||||
}
|
||||
}
|
||||
|
||||
iterator& operator++() {
|
||||
auto component_size = (int32_t) _i->size();
|
||||
if (_offset < component_size
|
||||
// When _singular, we skip the EOC byte.
|
||||
&& (!_singular || _offset != (component_size - 1)))
|
||||
{
|
||||
++_offset;
|
||||
} else {
|
||||
++_i;
|
||||
_offset = -2;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
bool operator==(const iterator& other) const {
|
||||
return _offset == other._offset && other._i == _i;
|
||||
}
|
||||
|
||||
bool operator!=(const iterator& other) const {
|
||||
return !(*this == other);
|
||||
}
|
||||
};
|
||||
|
||||
// A trichotomic comparator defined on @CompoundType representations which
|
||||
// orders them according to lexicographical ordering of their corresponding
|
||||
// legacy representations.
|
||||
//
|
||||
// tri_comparator(t)(k1, k2)
|
||||
//
|
||||
// ...is equivalent to:
|
||||
//
|
||||
// compare_unsigned(to_legacy(t, k1), to_legacy(t, k2))
|
||||
//
|
||||
// ...but more efficient.
|
||||
//
|
||||
struct tri_comparator {
|
||||
const CompoundType& _type;
|
||||
|
||||
tri_comparator(const CompoundType& type)
|
||||
: _type(type)
|
||||
{ }
|
||||
|
||||
tri_comparator(tri_comparator&& other)
|
||||
: _type(other._type)
|
||||
{ }
|
||||
|
||||
tri_comparator& operator=(tri_comparator&& other) {
|
||||
this->~tri_comparator();
|
||||
new (this) tri_comparator(std::move(other));
|
||||
return *this;
|
||||
}
|
||||
|
||||
// @k1 and @k2 must be serialized using @type, which was passed to the constructor.
|
||||
int operator()(bytes_view k1, bytes_view k2) const {
|
||||
if (_type.is_singular()) {
|
||||
return compare_unsigned(*_type.begin(k1), *_type.begin(k2));
|
||||
}
|
||||
return lexicographical_tri_compare(
|
||||
_type.begin(k1), _type.end(k1),
|
||||
_type.begin(k2), _type.end(k2),
|
||||
[] (const bytes_view& c1, const bytes_view& c2) -> int {
|
||||
if (c1.size() != c2.size()) {
|
||||
return c1.size() < c2.size() ? -1 : 1;
|
||||
}
|
||||
return memcmp(c1.begin(), c2.begin(), c1.size());
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
// Equivalent to std::distance(begin(), end()), but computes faster
|
||||
size_t size() const {
|
||||
if (_type.is_singular()) {
|
||||
return _type.begin(_packed)->size();
|
||||
}
|
||||
size_t s = 0;
|
||||
for (auto&& component : _type.components(_packed)) {
|
||||
s += 2 /* length field */ + component.size() + 1 /* EOC */;
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
iterator begin() const {
|
||||
return iterator(*this);
|
||||
}
|
||||
|
||||
iterator end() const {
|
||||
return iterator(*this, typename iterator::end_tag());
|
||||
}
|
||||
};
|
||||
|
||||
// Converts compound_type<> representation to legacy representation
|
||||
// @packed is assumed to be serialized using supplied @type.
|
||||
template <typename CompoundType>
|
||||
static inline
|
||||
bytes to_legacy(CompoundType& type, bytes_view packed) {
|
||||
legacy_compound_view<CompoundType> lv(type, packed);
|
||||
bytes legacy_form(bytes::initialized_later(), lv.size());
|
||||
std::copy(lv.begin(), lv.end(), legacy_form.begin());
|
||||
return legacy_form;
|
||||
}
|
||||
@@ -157,7 +157,9 @@ urchin_tests = [
|
||||
'tests/urchin/mutation_test',
|
||||
'tests/urchin/types_test',
|
||||
'tests/urchin/keys_test',
|
||||
'tests/urchin/partitioner_test',
|
||||
'tests/perf/perf_mutation',
|
||||
'tests/perf/perf_hash',
|
||||
'tests/perf/perf_cql_parser',
|
||||
'tests/perf/perf_simple_query',
|
||||
'tests/urchin/cql_query_test',
|
||||
@@ -208,6 +210,7 @@ tests = [
|
||||
tests += [
|
||||
'tests/urchin/bytes_ostream_test',
|
||||
'tests/urchin/UUID_test',
|
||||
'tests/urchin/murmur_hash_test',
|
||||
]
|
||||
|
||||
apps = [
|
||||
@@ -479,6 +482,7 @@ deps['tests/urchin/serializer_test'] += boost_test_lib
|
||||
|
||||
deps['tests/urchin/bytes_ostream_test'] = ['tests/urchin/bytes_ostream_test.cc']
|
||||
deps['tests/urchin/UUID_test'] = ['utils/UUID_gen.cc', 'tests/urchin/UUID_test.cc']
|
||||
deps['tests/urchin/murmur_hash_test'] = ['bytes.cc', 'utils/murmur_hash.cc', 'tests/urchin/murmur_hash_test.cc']
|
||||
|
||||
warnings = [
|
||||
'-Wno-mismatched-tags', # clang-only
|
||||
|
||||
@@ -25,6 +25,7 @@ thread_local logging::logger dblog("database");
|
||||
|
||||
column_family::column_family(schema_ptr schema)
|
||||
: _schema(std::move(schema))
|
||||
, partitions(dht::decorated_key::less_comparator(_schema))
|
||||
{ }
|
||||
|
||||
// define in .cc, since sstable is forward-declared in .hh
|
||||
@@ -39,7 +40,7 @@ column_family::find_partition(const dht::decorated_key& key) {
|
||||
|
||||
mutation_partition*
|
||||
column_family::find_partition_slow(const partition_key& key) {
|
||||
return find_partition(dht::global_partitioner().decorate_key(key));
|
||||
return find_partition(dht::global_partitioner().decorate_key(*_schema, key));
|
||||
}
|
||||
|
||||
row*
|
||||
@@ -53,14 +54,14 @@ column_family::find_row(const dht::decorated_key& partition_key, const clusterin
|
||||
|
||||
mutation_partition&
|
||||
column_family::find_or_create_partition_slow(const partition_key& key) {
|
||||
return find_or_create_partition(dht::global_partitioner().decorate_key(key));
|
||||
return find_or_create_partition(dht::global_partitioner().decorate_key(*_schema, key));
|
||||
}
|
||||
|
||||
mutation_partition&
|
||||
column_family::find_or_create_partition(const dht::decorated_key& key) {
|
||||
// call lower_bound so we have a hint for the insert, just in case.
|
||||
auto i = partitions.lower_bound(key);
|
||||
if (i == partitions.end() || key != i->first) {
|
||||
if (i == partitions.end() || !key.equal(*_schema, i->first)) {
|
||||
i = partitions.emplace_hint(i, std::make_pair(std::move(key), mutation_partition(_schema)));
|
||||
}
|
||||
return i->second;
|
||||
|
||||
@@ -64,8 +64,7 @@ struct column_family {
|
||||
row& find_or_create_row_slow(const partition_key& partition_key, const clustering_key& clustering_key);
|
||||
row* find_row(const dht::decorated_key& partition_key, const clustering_key& clustering_key);
|
||||
schema_ptr _schema;
|
||||
// partition key -> partition
|
||||
std::map<dht::decorated_key, mutation_partition> partitions;
|
||||
std::map<dht::decorated_key, mutation_partition, dht::decorated_key::less_comparator> partitions;
|
||||
void apply(const mutation& m);
|
||||
// Returns at most "cmd.limit" rows
|
||||
future<lw_shared_ptr<query::result>> query(const query::read_command& cmd);
|
||||
|
||||
@@ -399,14 +399,16 @@ std::vector<const char*> ALL { KEYSPACES, COLUMNFAMILIES, COLUMNS, TRIGGERS, USE
|
||||
future<schema_result>
|
||||
read_schema_for_keyspaces(service::storage_proxy& proxy, const sstring& schema_table_name, const std::set<sstring>& keyspace_names)
|
||||
{
|
||||
auto schema = proxy.get_db().local().find_schema(system_keyspace::NAME, schema_table_name);
|
||||
auto map = [&proxy, schema_table_name] (sstring keyspace_name) { return read_schema_partition_for_keyspace(proxy, schema_table_name, keyspace_name); };
|
||||
auto insert = [] (schema_result&& schema, auto&& schema_entity) {
|
||||
auto insert = [] (schema_result&& result, auto&& schema_entity) {
|
||||
if (schema_entity.second) {
|
||||
schema.insert(std::move(schema_entity));
|
||||
result.insert(std::move(schema_entity));
|
||||
}
|
||||
return std::move(schema);
|
||||
return std::move(result);
|
||||
};
|
||||
return map_reduce(keyspace_names.begin(), keyspace_names.end(), map, schema_result(), insert);
|
||||
return map_reduce(keyspace_names.begin(), keyspace_names.end(), map,
|
||||
schema_result(dht::decorated_key::less_comparator(schema)), insert);
|
||||
}
|
||||
|
||||
#if 0
|
||||
@@ -420,7 +422,8 @@ std::vector<const char*> ALL { KEYSPACES, COLUMNFAMILIES, COLUMNS, TRIGGERS, USE
|
||||
read_schema_partition_for_keyspace(service::storage_proxy& proxy, const sstring& schema_table_name, const sstring& keyspace_name)
|
||||
{
|
||||
auto schema = proxy.get_db().local().find_schema(system_keyspace::NAME, schema_table_name);
|
||||
auto keyspace_key = dht::global_partitioner().decorate_key(partition_key::from_single_value(*schema, to_bytes(keyspace_name)));
|
||||
auto keyspace_key = dht::global_partitioner().decorate_key(*schema,
|
||||
partition_key::from_single_value(*schema, to_bytes(keyspace_name)));
|
||||
return read_schema_partition_for_keyspace(proxy, schema_table_name, keyspace_key);
|
||||
}
|
||||
|
||||
|
||||
@@ -39,7 +39,8 @@ class result_set;
|
||||
namespace db {
|
||||
namespace legacy_schema_tables {
|
||||
|
||||
using schema_result = std::map<dht::decorated_key, foreign_ptr<lw_shared_ptr<query::result_set>>>;
|
||||
using schema_result = std::map<dht::decorated_key, foreign_ptr<lw_shared_ptr<query::result_set>>,
|
||||
dht::decorated_key::less_comparator>;
|
||||
|
||||
static constexpr auto KEYSPACES = "schema_keyspaces";
|
||||
static constexpr auto COLUMNFAMILIES = "schema_columnfamilies";
|
||||
|
||||
@@ -129,25 +129,6 @@ bool operator<(const token& t1, const token& t2)
|
||||
return false;
|
||||
}
|
||||
|
||||
bool operator<(const decorated_key& lht, const decorated_key& rht) {
|
||||
if (lht._token == rht._token) {
|
||||
return static_cast<bytes_view>(lht._key) < rht._key;
|
||||
} else {
|
||||
return lht._token < rht._token;
|
||||
}
|
||||
}
|
||||
|
||||
bool operator==(const decorated_key& lht, const decorated_key& rht) {
|
||||
if (lht._token == rht._token) {
|
||||
return static_cast<bytes_view>(lht._key) == rht._key;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool operator!=(const decorated_key& lht, const decorated_key& rht) {
|
||||
return !(lht == rht);
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream& out, const token& t) {
|
||||
auto flags = out.flags();
|
||||
for (auto c : t._data) {
|
||||
@@ -171,4 +152,35 @@ global_partitioner() {
|
||||
return default_partitioner;
|
||||
}
|
||||
|
||||
bool
|
||||
decorated_key::equal(const schema& s, const decorated_key& other) const {
|
||||
if (_token == other._token) {
|
||||
return _key.legacy_equal(s, other._key);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
int
|
||||
decorated_key::tri_compare(const schema& s, const decorated_key& other) const {
|
||||
if (_token == other._token) {
|
||||
return _key.legacy_tri_compare(s, other._key);
|
||||
} else {
|
||||
return _token < other._token ? -1 : 1;
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
decorated_key::less_compare(const schema& s, const decorated_key& other) const {
|
||||
return tri_compare(s, other) < 0;
|
||||
}
|
||||
|
||||
decorated_key::less_comparator::less_comparator(schema_ptr s)
|
||||
: s(std::move(s))
|
||||
{ }
|
||||
|
||||
bool
|
||||
decorated_key::less_comparator::operator()(const decorated_key& lhs, const decorated_key& rhs) const {
|
||||
return lhs.less_compare(*s, rhs);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -68,11 +68,25 @@ bool operator==(const token& t1, const token& t2);
|
||||
bool operator<(const token& t1, const token& t2);
|
||||
std::ostream& operator<<(std::ostream& out, const token& t);
|
||||
|
||||
|
||||
// Wraps partition_key with its corresponding token.
|
||||
//
|
||||
// Total ordering defined by comparators is compatible with Origin's ordering.
|
||||
class decorated_key {
|
||||
public:
|
||||
token _token;
|
||||
partition_key _key;
|
||||
|
||||
struct less_comparator {
|
||||
schema_ptr s;
|
||||
less_comparator(schema_ptr s);
|
||||
bool operator()(const decorated_key& k1, const decorated_key& k2) const;
|
||||
};
|
||||
|
||||
bool equal(const schema& s, const decorated_key& other) const;
|
||||
|
||||
bool less_compare(const schema& s, const decorated_key& other) const;
|
||||
|
||||
int tri_compare(const schema& s, const decorated_key& other) const;
|
||||
};
|
||||
|
||||
class i_partitioner {
|
||||
@@ -84,8 +98,8 @@ public:
|
||||
* @param key the raw, client-facing key
|
||||
* @return decorated version of key
|
||||
*/
|
||||
decorated_key decorate_key(const partition_key& key) {
|
||||
return { get_token(key), key };
|
||||
decorated_key decorate_key(const schema& s, const partition_key& key) {
|
||||
return { get_token(s, key), key };
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -94,8 +108,8 @@ public:
|
||||
* @param key the raw, client-facing key
|
||||
* @return decorated version of key
|
||||
*/
|
||||
decorated_key decorate_key(partition_key&& key) {
|
||||
auto token = get_token(key);
|
||||
decorated_key decorate_key(const schema& s, partition_key&& key) {
|
||||
auto token = get_token(s, key);
|
||||
return { std::move(token), std::move(key) };
|
||||
}
|
||||
|
||||
@@ -122,7 +136,7 @@ public:
|
||||
* (This is NOT a method to create a token from its string representation;
|
||||
* for that, use tokenFactory.fromString.)
|
||||
*/
|
||||
virtual token get_token(const partition_key& key) = 0;
|
||||
virtual token get_token(const schema& s, const partition_key& key) = 0;
|
||||
virtual token get_token(const sstables::key_view& key) = 0;
|
||||
|
||||
/**
|
||||
@@ -164,12 +178,6 @@ protected:
|
||||
friend bool operator<(const token& t1, const token& t2);
|
||||
};
|
||||
|
||||
bool operator<(const decorated_key& lht, const decorated_key& rht);
|
||||
|
||||
bool operator==(const decorated_key& lht, const decorated_key& rht);
|
||||
|
||||
bool operator!=(const decorated_key& lht, const decorated_key& rht);
|
||||
|
||||
std::ostream& operator<<(std::ostream& out, const token& t);
|
||||
|
||||
std::ostream& operator<<(std::ostream& out, const decorated_key& t);
|
||||
|
||||
@@ -22,15 +22,33 @@ murmur3_partitioner::get_token(bytes_view key) {
|
||||
}
|
||||
std::array<uint64_t, 2> hash;
|
||||
utils::murmur_hash::hash3_x64_128(key, 0, hash);
|
||||
return get_token(hash[0]);
|
||||
}
|
||||
|
||||
token
|
||||
murmur3_partitioner::get_token(uint64_t value) const {
|
||||
// We don't normalize() the value, since token includes an is-before-everything
|
||||
// indicator.
|
||||
// FIXME: will this require a repair when importing a database?
|
||||
auto t = net::hton(normalize(hash[0]));
|
||||
auto t = net::hton(normalize(value));
|
||||
bytes b(bytes::initialized_later(), 8);
|
||||
std::copy_n(reinterpret_cast<int8_t*>(&t), 8, b.begin());
|
||||
return token{token::kind::key, std::move(b)};
|
||||
}
|
||||
|
||||
token
|
||||
murmur3_partitioner::get_token(const sstables::key_view& key) {
|
||||
return get_token(bytes_view(key));
|
||||
}
|
||||
|
||||
token
|
||||
murmur3_partitioner::get_token(const schema& s, const partition_key& key) {
|
||||
std::array<uint64_t, 2> hash;
|
||||
auto&& legacy = key.legacy_form(s);
|
||||
utils::murmur_hash::hash3_x64_128(legacy.begin(), legacy.size(), 0, hash);
|
||||
return get_token(hash[0]);
|
||||
}
|
||||
|
||||
inline long long_token(const token& t) {
|
||||
|
||||
if (t._data.size() != sizeof(long)) {
|
||||
|
||||
@@ -12,12 +12,8 @@ namespace dht {
|
||||
|
||||
class murmur3_partitioner final : public i_partitioner {
|
||||
public:
|
||||
virtual token get_token(const partition_key& key) override {
|
||||
return get_token(bytes_view(key));
|
||||
}
|
||||
virtual token get_token(const sstables::key_view& key) override {
|
||||
return get_token(bytes_view(key));
|
||||
}
|
||||
virtual token get_token(const schema& s, const partition_key& key);
|
||||
virtual token get_token(const sstables::key_view& key);
|
||||
virtual bool preserves_order() override { return false; }
|
||||
virtual std::map<token, float> describe_ownership(const std::vector<token>& sorted_tokens);
|
||||
virtual data_type get_token_validator();
|
||||
@@ -26,6 +22,7 @@ public:
|
||||
private:
|
||||
static int64_t normalize(int64_t in);
|
||||
token get_token(bytes_view key);
|
||||
token get_token(uint64_t value) const;
|
||||
};
|
||||
|
||||
|
||||
|
||||
34
keys.hh
34
keys.hh
@@ -7,6 +7,7 @@
|
||||
#include "schema.hh"
|
||||
#include "bytes.hh"
|
||||
#include "types.hh"
|
||||
#include "compound_compat.hh"
|
||||
|
||||
//
|
||||
// This header defines type system for primary key holders.
|
||||
@@ -111,6 +112,7 @@ public:
|
||||
}
|
||||
|
||||
// begin() and end() return iterators over components of this compound. The iterator yields a bytes_view to the component.
|
||||
// The iterators satisfy InputIterator concept.
|
||||
auto begin(const schema& s) const {
|
||||
return get_compound_type(s)->begin(_bytes);
|
||||
}
|
||||
@@ -119,6 +121,12 @@ public:
|
||||
auto end(const schema& s) const {
|
||||
return get_compound_type(s)->end(_bytes);
|
||||
}
|
||||
|
||||
bytes_view get_component(const schema& s, size_t idx) const {
|
||||
auto it = begin(s);
|
||||
std::advance(it, idx);
|
||||
return *it;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename TopLevel, typename PrefixTopLevel>
|
||||
@@ -269,16 +277,11 @@ public:
|
||||
};
|
||||
|
||||
class partition_key : public compound_wrapper<partition_key> {
|
||||
using c_type = compound_type<allow_prefixes::no>;
|
||||
public:
|
||||
partition_key(bytes&& b) : compound_wrapper<partition_key>(std::move(b)) {}
|
||||
public:
|
||||
using compound = lw_shared_ptr<compound_type<allow_prefixes::no>>;
|
||||
|
||||
bytes_view get_component(const schema& s, size_t idx) const {
|
||||
auto it = begin(s);
|
||||
std::advance(it, idx);
|
||||
return *it;
|
||||
}
|
||||
using compound = lw_shared_ptr<c_type>;
|
||||
|
||||
static partition_key from_bytes(bytes b) {
|
||||
return partition_key(std::move(b));
|
||||
@@ -288,6 +291,23 @@ public:
|
||||
return s.partition_key_type();
|
||||
}
|
||||
|
||||
// Returns key's representation which is compatible with Origin.
|
||||
// The result is valid as long as the schema is live.
|
||||
const legacy_compound_view<c_type> legacy_form(const schema& s) const {
|
||||
return { *get_compound_type(s), _bytes };
|
||||
}
|
||||
|
||||
// A trichotomic comparator for ordering compatible with Origin.
|
||||
int legacy_tri_compare(const schema& s, const partition_key& o) const {
|
||||
auto cmp = legacy_compound_view<c_type>::tri_comparator(*get_compound_type(s));
|
||||
return cmp(*this, o);
|
||||
}
|
||||
|
||||
// Checks if keys are equal in a way which is compatible with Origin.
|
||||
bool legacy_equal(const schema& s, const partition_key& o) const {
|
||||
return legacy_tri_compare(s, o) == 0;
|
||||
}
|
||||
|
||||
friend std::ostream& operator<<(std::ostream& out, const partition_key& pk);
|
||||
};
|
||||
|
||||
|
||||
@@ -10,8 +10,10 @@ mutation::mutation(dht::decorated_key key, schema_ptr schema)
|
||||
, _p(_schema)
|
||||
{ }
|
||||
|
||||
mutation::mutation(partition_key key_, schema_ptr schema_)
|
||||
: mutation(dht::global_partitioner().decorate_key(std::move(key_)), std::move(schema_))
|
||||
mutation::mutation(partition_key key_, schema_ptr schema)
|
||||
: _schema(std::move(schema))
|
||||
, _dk(dht::global_partitioner().decorate_key(*_schema, std::move(key_)))
|
||||
, _p(_schema)
|
||||
{ }
|
||||
|
||||
void mutation::set_static_cell(const column_definition& def, atomic_cell_or_collection value) {
|
||||
|
||||
@@ -1188,13 +1188,19 @@ storage_proxy::query(lw_shared_ptr<query::read_command> cmd, db::consistency_lev
|
||||
|
||||
if (range.is_singular()) {
|
||||
auto& key = range.start_value();
|
||||
auto dk = dht::global_partitioner().decorate_key(key);
|
||||
auto shard = _db.local().shard_of(dk._token);
|
||||
return _db.invoke_on(shard, [cmd] (database& db) {
|
||||
return db.query(*cmd).then([] (auto&& f) {
|
||||
return make_foreign(std::move(f));
|
||||
});
|
||||
}).finally([cmd] {});
|
||||
// TODO: consider storing decorated key in the request
|
||||
try {
|
||||
auto schema = _db.local().find_schema(cmd->cf_id);
|
||||
auto token = dht::global_partitioner().get_token(*schema, key);
|
||||
auto shard = _db.local().shard_of(token);
|
||||
return _db.invoke_on(shard, [cmd](database& db) {
|
||||
return db.query(*cmd).then([](auto&& f) {
|
||||
return make_foreign(std::move(f));
|
||||
});
|
||||
}).finally([cmd] { });
|
||||
} catch (const no_such_column_family&) {
|
||||
return make_empty();
|
||||
}
|
||||
}
|
||||
|
||||
// FIXME: Respect cmd->row_limit to avoid unnecessary transfer
|
||||
|
||||
1
test.py
1
test.py
@@ -42,6 +42,7 @@ boost_tests = [
|
||||
'cartesian_product_test',
|
||||
'urchin/UUID_test',
|
||||
'urchin/compound_test',
|
||||
'urchin/murmur_hash_test',
|
||||
]
|
||||
|
||||
other_tests = [
|
||||
|
||||
35
tests/perf/perf_hash.cc
Normal file
35
tests/perf/perf_hash.cc
Normal file
@@ -0,0 +1,35 @@
|
||||
/*
|
||||
* Copyright 2015 Cloudius Systems
|
||||
*/
|
||||
|
||||
#include "utils/murmur_hash.hh"
|
||||
#include "tests/perf/perf.hh"
|
||||
|
||||
volatile uint64_t black_hole;
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
const uint64_t seed = 0;
|
||||
auto src = bytes("0123412308129301923019283056789012345");
|
||||
|
||||
uint64_t sink = 0;
|
||||
|
||||
std::cout << "Timing fixed hash...\n";
|
||||
|
||||
time_it([&] {
|
||||
std::array<uint64_t,2> dst;
|
||||
utils::murmur_hash::hash3_x64_128(src, seed, dst);
|
||||
sink += dst[0];
|
||||
sink += dst[1];
|
||||
});
|
||||
|
||||
std::cout << "Timing iterator hash...\n";
|
||||
|
||||
time_it([&] {
|
||||
std::array<uint64_t,2> dst;
|
||||
utils::murmur_hash::hash3_x64_128(src.begin(), src.size(), seed, dst);
|
||||
sink += dst[0];
|
||||
sink += dst[1];
|
||||
});
|
||||
|
||||
black_hole = sink;
|
||||
}
|
||||
@@ -7,6 +7,7 @@
|
||||
|
||||
#include <boost/test/unit_test.hpp>
|
||||
#include "compound.hh"
|
||||
#include "compound_compat.hh"
|
||||
#include "tests/urchin/range_assert.hh"
|
||||
|
||||
static std::vector<bytes> to_bytes_vec(std::vector<sstring> values) {
|
||||
@@ -146,3 +147,68 @@ BOOST_AUTO_TEST_CASE(test_conversion_methods_for_non_singular_compound) {
|
||||
do_test_conversion_methods_for_non_singular_compound<allow_prefixes::yes>();
|
||||
do_test_conversion_methods_for_non_singular_compound<allow_prefixes::no>();
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(test_component_iterator_post_incrementation) {
|
||||
compound_type<allow_prefixes::no> t({bytes_type, bytes_type, bytes_type});
|
||||
|
||||
auto packed = t.serialize_value(to_bytes_vec({"el1", "el2", "el3"}));
|
||||
auto i = t.begin(packed);
|
||||
auto end = t.end(packed);
|
||||
BOOST_REQUIRE_EQUAL(to_bytes("el1"), *i++);
|
||||
BOOST_REQUIRE_EQUAL(to_bytes("el2"), *i++);
|
||||
BOOST_REQUIRE_EQUAL(to_bytes("el3"), *i++);
|
||||
BOOST_REQUIRE(i == end);
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(test_conversion_to_legacy_form) {
|
||||
compound_type<allow_prefixes::no> singular({bytes_type});
|
||||
|
||||
BOOST_REQUIRE_EQUAL(to_legacy(singular, singular.serialize_single(to_bytes("asd"))), bytes("asd"));
|
||||
BOOST_REQUIRE_EQUAL(to_legacy(singular, singular.serialize_single(to_bytes(""))), bytes(""));
|
||||
|
||||
compound_type<allow_prefixes::no> two_components({bytes_type, bytes_type});
|
||||
|
||||
BOOST_REQUIRE_EQUAL(to_legacy(two_components, two_components.serialize_value(to_bytes_vec({"el1", "elem2"}))),
|
||||
bytes({'\x00', '\x03', 'e', 'l', '1', '\x00', '\x00', '\x05', 'e', 'l', 'e', 'm', '2', '\x00'}));
|
||||
|
||||
BOOST_REQUIRE_EQUAL(to_legacy(two_components, two_components.serialize_value(to_bytes_vec({"el1", ""}))),
|
||||
bytes({'\x00', '\x03', 'e', 'l', '1', '\x00', '\x00', '\x00', '\x00'}));
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(test_legacy_ordering_of_singular) {
|
||||
compound_type<allow_prefixes::no> t({bytes_type});
|
||||
|
||||
auto make = [&t] (sstring value) -> bytes {
|
||||
return t.serialize_single(to_bytes(value));
|
||||
};
|
||||
|
||||
legacy_compound_view<decltype(t)>::tri_comparator cmp(t);
|
||||
|
||||
BOOST_REQUIRE(cmp(make("A"), make("B")) < 0);
|
||||
BOOST_REQUIRE(cmp(make("AA"), make("B")) < 0);
|
||||
BOOST_REQUIRE(cmp(make("B"), make("AB")) > 0);
|
||||
BOOST_REQUIRE(cmp(make("B"), make("A")) > 0);
|
||||
BOOST_REQUIRE(cmp(make("A"), make("A")) == 0);
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(test_legacy_ordering_of_composites) {
|
||||
compound_type<allow_prefixes::no> t({bytes_type, bytes_type});
|
||||
|
||||
auto make = [&t] (sstring v1, sstring v2) -> bytes {
|
||||
return t.serialize_value(std::vector<bytes>{to_bytes(v1), to_bytes(v2)});
|
||||
};
|
||||
|
||||
legacy_compound_view<decltype(t)>::tri_comparator cmp(t);
|
||||
|
||||
BOOST_REQUIRE(cmp(make("A", "B"), make("A", "B")) == 0);
|
||||
BOOST_REQUIRE(cmp(make("A", "B"), make("A", "C")) < 0);
|
||||
BOOST_REQUIRE(cmp(make("A", "B"), make("B", "B")) < 0);
|
||||
BOOST_REQUIRE(cmp(make("A", "C"), make("B", "B")) < 0);
|
||||
BOOST_REQUIRE(cmp(make("B", "A"), make("A", "A")) > 0);
|
||||
|
||||
BOOST_REQUIRE(cmp(make("AA", "B"), make("B", "B")) > 0);
|
||||
BOOST_REQUIRE(cmp(make("A", "AA"), make("A", "A")) > 0);
|
||||
|
||||
BOOST_REQUIRE(cmp(make("", "A"), make("A", "A")) < 0);
|
||||
BOOST_REQUIRE(cmp(make("A", ""), make("A", "A")) < 0);
|
||||
}
|
||||
|
||||
@@ -120,7 +120,7 @@ public:
|
||||
auto& cf = db.find_column_family(ks_name, table_name);
|
||||
auto schema = cf._schema;
|
||||
auto pkey = partition_key::from_deeply_exploded(*schema, pk);
|
||||
auto dk = dht::global_partitioner().decorate_key(pkey);
|
||||
auto dk = dht::global_partitioner().decorate_key(*schema, pkey);
|
||||
auto shard = db.shard_of(dk._token);
|
||||
return _db->invoke_on(shard, [pkey = std::move(pkey),
|
||||
ck = std::move(ck),
|
||||
|
||||
@@ -54,3 +54,45 @@ BOOST_AUTO_TEST_CASE(test_key_component_iterator) {
|
||||
|
||||
BOOST_REQUIRE(i == end);
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(test_legacy_ordering_for_non_composite_key) {
|
||||
schema s({}, "", "", {{"c1", bytes_type}}, {}, {}, {}, utf8_type);
|
||||
|
||||
auto to_key = [&s] (sstring value) {
|
||||
return partition_key::from_single_value(s, to_bytes(value));
|
||||
};
|
||||
|
||||
auto cmp = [&s] (const partition_key& k1, const partition_key& k2) {
|
||||
return k1.legacy_tri_compare(s, k2);
|
||||
};
|
||||
|
||||
BOOST_REQUIRE(cmp(to_key("A"), to_key("B")) < 0);
|
||||
BOOST_REQUIRE(cmp(to_key("AA"), to_key("B")) < 0);
|
||||
BOOST_REQUIRE(cmp(to_key("B"), to_key("AB")) > 0);
|
||||
BOOST_REQUIRE(cmp(to_key("B"), to_key("A")) > 0);
|
||||
BOOST_REQUIRE(cmp(to_key("A"), to_key("A")) == 0);
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(test_legacy_ordering_for_composite_keys) {
|
||||
schema s({}, "", "", {{"c1", bytes_type}, {"c2", bytes_type}}, {}, {}, {}, utf8_type);
|
||||
|
||||
auto to_key = [&s] (sstring v1, sstring v2) {
|
||||
return partition_key::from_exploded(s, std::vector<bytes>{to_bytes(v1), to_bytes(v2)});
|
||||
};
|
||||
|
||||
auto cmp = [&s] (const partition_key& k1, const partition_key& k2) {
|
||||
return k1.legacy_tri_compare(s, k2);
|
||||
};
|
||||
|
||||
BOOST_REQUIRE(cmp(to_key("A", "B"), to_key("A", "B")) == 0);
|
||||
BOOST_REQUIRE(cmp(to_key("A", "B"), to_key("A", "C")) < 0);
|
||||
BOOST_REQUIRE(cmp(to_key("A", "B"), to_key("B", "B")) < 0);
|
||||
BOOST_REQUIRE(cmp(to_key("A", "C"), to_key("B", "B")) < 0);
|
||||
BOOST_REQUIRE(cmp(to_key("B", "A"), to_key("A", "A")) > 0);
|
||||
|
||||
BOOST_REQUIRE(cmp(to_key("AA", "B"), to_key("B", "B")) > 0);
|
||||
BOOST_REQUIRE(cmp(to_key("A", "AA"), to_key("A", "A")) > 0);
|
||||
|
||||
BOOST_REQUIRE(cmp(to_key("", "A"), to_key("A", "A")) < 0);
|
||||
BOOST_REQUIRE(cmp(to_key("A", ""), to_key("A", "A")) < 0);
|
||||
}
|
||||
|
||||
106
tests/urchin/murmur_hash_test.cc
Normal file
106
tests/urchin/murmur_hash_test.cc
Normal file
@@ -0,0 +1,106 @@
|
||||
/*
|
||||
* Copyright 2015 Cloudius Systems
|
||||
*/
|
||||
|
||||
#define BOOST_TEST_DYN_LINK
|
||||
#define BOOST_TEST_MODULE core
|
||||
|
||||
#include <boost/test/unit_test.hpp>
|
||||
|
||||
#include "utils/murmur_hash.hh"
|
||||
#include "bytes.hh"
|
||||
#include "core/print.hh"
|
||||
|
||||
static const bytes full_sequence("012345678901234567890123456789012345678901234567890123456789");
|
||||
|
||||
static const uint64_t seed = 0xcafebabe;
|
||||
|
||||
// Below are pre-calculated results of hashing consecutive prefixes of full_sequence using hash3_x64_128(),
|
||||
// staring from an empty prefix.
|
||||
std::array<uint64_t,2> prefix_hashes[] = {
|
||||
{13907055927958326333ULL, 10701141902926764871ULL},
|
||||
{16872847325129109440ULL, 5125572542408278394ULL},
|
||||
{11916219991241122015ULL, 747256650753853469ULL},
|
||||
{1492790099208671403ULL, 16635411534431524239ULL},
|
||||
{16764172998150925140ULL, 7440789969466348974ULL},
|
||||
{6846275695158209935ULL, 11251493995290334439ULL},
|
||||
{1075204625779168927ULL, 3453614304122336174ULL},
|
||||
{1404180555660983881ULL, 13684781009779545989ULL},
|
||||
{10185829608361057848ULL, 1102754042417891721ULL},
|
||||
{12850382803381855486ULL, 7404649381971707328ULL},
|
||||
{972515366528881960ULL, 4507841639019527002ULL},
|
||||
{9279316204399455969ULL, 9712180353841837616ULL},
|
||||
{16558181491899334208ULL, 17507114537353308311ULL},
|
||||
{12977947643557220239ULL, 8334539845739718010ULL},
|
||||
{3743840537387886281ULL, 15297576726012815871ULL},
|
||||
{10675210326497176757ULL, 11200838847539594424ULL},
|
||||
{16363715880225337291ULL, 2866762944263215884ULL},
|
||||
{1272769995400892137ULL, 1744366104172354624ULL},
|
||||
{17426490373034063702ULL, 12666853004117709655ULL},
|
||||
{10757142341798556363ULL, 3984810732374497004ULL},
|
||||
{4593020710048021108ULL, 14359610319437287264ULL},
|
||||
{18212086870806388719ULL, 7490375939640747191ULL},
|
||||
{11209001888824275013ULL, 6491913312740217486ULL},
|
||||
{17601044365330203914ULL, 1779402119744049378ULL},
|
||||
{3916812090790925532ULL, 17533572508631620015ULL},
|
||||
{10113761195332211536ULL, 4163484992388084181ULL},
|
||||
{4353425943622404193ULL, 1830165015196477722ULL},
|
||||
{3904126367597302219ULL, 7917741892387588561ULL},
|
||||
{7077450301176172141ULL, 8070185570157969067ULL},
|
||||
{6331768922468785771ULL, 9311778359071820659ULL},
|
||||
{7715740891587706229ULL, 16510772505395753023ULL},
|
||||
{4510384582422222090ULL, 9352450339278885986ULL},
|
||||
{6746132289648898302ULL, 15402380546251654069ULL},
|
||||
{1315904697672087497ULL, 2686857386486814319ULL},
|
||||
{16122226135709041149ULL, 1278536837434550412ULL},
|
||||
{6449104926034509627ULL, 8809488279970194649ULL},
|
||||
{9047965986959166273ULL, 14963749820458851455ULL},
|
||||
{18095596803119563681ULL, 2806499127062067052ULL},
|
||||
{545238237267145238ULL, 4583663570136224396ULL},
|
||||
{12335897404061220746ULL, 8643308333771385742ULL},
|
||||
{15016951849151361171ULL, 13012972687708005422ULL},
|
||||
{12896848725136832414ULL, 9881710852371170521ULL},
|
||||
{17900663530283054991ULL, 9606960248070178723ULL},
|
||||
{4513619521783122834ULL, 4823611535250518791ULL},
|
||||
{15572858348470724038ULL, 4882998878774456634ULL},
|
||||
{3464540909110937960ULL, 14591983318346304410ULL},
|
||||
{2951301498066556278ULL, 3029976006973164807ULL},
|
||||
{7848995488883197496ULL, 10621954303326018594ULL},
|
||||
{5702723040652442467ULL, 11325339470689059424ULL},
|
||||
{870698890980252409ULL, 8294946103885186165ULL},
|
||||
{423348447487367835ULL, 4067674294039261619ULL},
|
||||
{397951862030142664ULL, 17073640849499096681ULL},
|
||||
{9374556141781683538ULL, 10333311062251856416ULL},
|
||||
{1097707041202763764ULL, 2870200096551238743ULL},
|
||||
{11493051326088411054ULL, 12348796263566330575ULL},
|
||||
{15865059192259516415ULL, 4808544582161036476ULL},
|
||||
{2717981543414886593ULL, 5944564527643476706ULL},
|
||||
{887521262173735642ULL, 3558550013200985442ULL},
|
||||
{9496424291456600748ULL, 9845949835154361896ULL},
|
||||
{1589012859535948937ULL, 7402826160257180747ULL}
|
||||
};
|
||||
|
||||
BOOST_AUTO_TEST_CASE(test_hash_output) {
|
||||
auto assert_hashes_equal = [] (bytes_view data, std::array<uint64_t,2> lhs, std::array<uint64_t,2> rhs) {
|
||||
if (lhs != rhs) {
|
||||
BOOST_FAIL(sprint("Hashes differ for %s (got {0x%x, 0x%x} and {0x%x, 0x%x})", data,
|
||||
lhs[0], lhs[1], rhs[0], rhs[1]));
|
||||
}
|
||||
};
|
||||
|
||||
for (int i = 0; i < full_sequence.size(); ++i) {
|
||||
auto prefix = bytes_view(full_sequence.begin(), i);
|
||||
auto&& expected = prefix_hashes[i];
|
||||
|
||||
{
|
||||
std::array<uint64_t, 2> dst;
|
||||
utils::murmur_hash::hash3_x64_128(prefix, seed, dst);
|
||||
}
|
||||
|
||||
// Test the iterator version
|
||||
{
|
||||
std::array<uint64_t,2> dst;
|
||||
utils::murmur_hash::hash3_x64_128(prefix.begin(), prefix.size(), seed, dst);
|
||||
}
|
||||
}
|
||||
}
|
||||
45
tests/urchin/partitioner_test.cc
Normal file
45
tests/urchin/partitioner_test.cc
Normal file
@@ -0,0 +1,45 @@
|
||||
/*
|
||||
* Copyright 2015 Cloudius Systems
|
||||
*/
|
||||
|
||||
#define BOOST_TEST_DYN_LINK
|
||||
#define BOOST_TEST_MODULE core
|
||||
|
||||
#include <boost/test/unit_test.hpp>
|
||||
|
||||
#include "dht/i_partitioner.hh"
|
||||
#include "dht/murmur3_partitioner.hh"
|
||||
#include "schema.hh"
|
||||
#include "types.hh"
|
||||
|
||||
static dht::token token_from_long(uint64_t value) {
|
||||
auto t = net::hton(value);
|
||||
bytes b(bytes::initialized_later(), 8);
|
||||
std::copy_n(reinterpret_cast<int8_t*>(&t), 8, b.begin());
|
||||
return { dht::token::kind::key, std::move(b) };
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(test_decorated_key_is_compatible_with_origin) {
|
||||
schema s({}, "", "",
|
||||
// partition key
|
||||
{{"c1", int32_type}, {"c2", int32_type}},
|
||||
// clustering key
|
||||
{},
|
||||
// regular columns
|
||||
{
|
||||
{"v", int32_type},
|
||||
},
|
||||
// static columns
|
||||
{},
|
||||
// regular column name type
|
||||
utf8_type
|
||||
);
|
||||
|
||||
dht::murmur3_partitioner partitioner;
|
||||
auto key = partition_key::from_deeply_exploded(s, {143, 234});
|
||||
auto dk = partitioner.decorate_key(s, key);
|
||||
|
||||
// Expected value was taken from Origin
|
||||
BOOST_REQUIRE_EQUAL(dk._token, token_from_long(4958784316840156970));
|
||||
BOOST_REQUIRE(dk._key.equal(s, key));
|
||||
}
|
||||
@@ -117,7 +117,7 @@ public:
|
||||
return complete_with_exception<InvalidRequestException>(std::move(exn_cob), "column family %s not found", column_parent.column_family);
|
||||
}
|
||||
auto pk = key_from_thrift(schema, to_bytes(key));
|
||||
auto dk = dht::global_partitioner().decorate_key(pk);
|
||||
auto dk = dht::global_partitioner().decorate_key(*schema, pk);
|
||||
auto shard = _db.local().shard_of(dk._token);
|
||||
|
||||
auto do_get = [this,
|
||||
|
||||
24
types.hh
24
types.hh
@@ -84,6 +84,30 @@ int lexicographical_tri_compare(TypesIterator types_first, TypesIterator types_l
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Trichotomic version of std::lexicographical_compare()
|
||||
//
|
||||
// Returns an integer which is less, equal or greater than zero when the first value
|
||||
// is respectively smaller, equal or greater than the second value.
|
||||
template <typename InputIt1, typename InputIt2, typename Compare>
|
||||
int lexicographical_tri_compare(InputIt1 first1, InputIt1 last1,
|
||||
InputIt2 first2, InputIt2 last2,
|
||||
Compare comp) {
|
||||
while (first1 != last1 && first2 != last2) {
|
||||
auto c = comp(*first1, *first2);
|
||||
if (c) {
|
||||
return c;
|
||||
}
|
||||
++first1;
|
||||
++first2;
|
||||
}
|
||||
bool e1 = first1 == last1;
|
||||
bool e2 = first2 == last2;
|
||||
if (e1 != e2) {
|
||||
return e2 ? 1 : -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// A trichotomic comparator for prefix equality total ordering.
|
||||
// In this ordering, two sequences are equal iff any of them is a prefix
|
||||
// of the another. Otherwise, lexicographical ordering determines the order.
|
||||
|
||||
@@ -148,22 +148,6 @@ static uint64_t getblock(bytes_view key, uint32_t index)
|
||||
(((uint64_t) key[i_8 + 6] & 0xff) << 48) + (((uint64_t) key[i_8 + 7] & 0xff) << 56);
|
||||
}
|
||||
|
||||
static uint64_t rotl64(uint64_t v, uint32_t n)
|
||||
{
|
||||
return ((v << n) | ((uint64_t)v >> (64 - n)));
|
||||
}
|
||||
|
||||
static uint64_t fmix(uint64_t k)
|
||||
{
|
||||
k ^= (uint64_t)k >> 33;
|
||||
k *= 0xff51afd7ed558ccdL;
|
||||
k ^= (uint64_t)k >> 33;
|
||||
k *= 0xc4ceb9fe1a85ec53L;
|
||||
k ^= (uint64_t)k >> 33;
|
||||
|
||||
return k;
|
||||
}
|
||||
|
||||
void hash3_x64_128(bytes_view key, uint64_t seed, std::array<uint64_t,2> &result)
|
||||
{
|
||||
uint32_t length = key.size();
|
||||
|
||||
@@ -37,11 +37,119 @@
|
||||
|
||||
namespace utils {
|
||||
|
||||
namespace murmur_hash
|
||||
{
|
||||
uint32_t hash32(bytes_view data, int32_t seed);
|
||||
uint64_t hash2_64(bytes_view key, uint64_t seed);
|
||||
void hash3_x64_128(bytes_view key, uint64_t seed, std::array<uint64_t,2> &result);
|
||||
};
|
||||
namespace murmur_hash {
|
||||
|
||||
uint32_t hash32(bytes_view data, int32_t seed);
|
||||
uint64_t hash2_64(bytes_view key, uint64_t seed);
|
||||
|
||||
template<typename InputIterator>
|
||||
static inline
|
||||
uint64_t read_block(InputIterator& in) {
|
||||
typename std::iterator_traits<InputIterator>::value_type tmp[8];
|
||||
for (int i = 0; i < 8; ++i) {
|
||||
tmp[i] = *in;
|
||||
++in;
|
||||
}
|
||||
return ((uint64_t) tmp[0] & 0xff) + (((uint64_t) tmp[1] & 0xff) << 8) +
|
||||
(((uint64_t) tmp[2] & 0xff) << 16) + (((uint64_t) tmp[3] & 0xff) << 24) +
|
||||
(((uint64_t) tmp[4] & 0xff) << 32) + (((uint64_t) tmp[5] & 0xff) << 40) +
|
||||
(((uint64_t) tmp[6] & 0xff) << 48) + (((uint64_t) tmp[7] & 0xff) << 56);
|
||||
}
|
||||
|
||||
static inline
|
||||
uint64_t rotl64(uint64_t v, uint32_t n) {
|
||||
return ((v << n) | ((uint64_t)v >> (64 - n)));
|
||||
}
|
||||
|
||||
static inline
|
||||
uint64_t fmix(uint64_t k) {
|
||||
k ^= (uint64_t)k >> 33;
|
||||
k *= 0xff51afd7ed558ccdL;
|
||||
k ^= (uint64_t)k >> 33;
|
||||
k *= 0xc4ceb9fe1a85ec53L;
|
||||
k ^= (uint64_t)k >> 33;
|
||||
|
||||
return k;
|
||||
}
|
||||
|
||||
template <typename InputIterator>
|
||||
void hash3_x64_128(InputIterator in, uint32_t length, uint64_t seed, std::array<uint64_t, 2>& result) {
|
||||
const uint32_t nblocks = length >> 4; // Process as 128-bit blocks.
|
||||
|
||||
uint64_t h1 = seed;
|
||||
uint64_t h2 = seed;
|
||||
|
||||
uint64_t c1 = 0x87c37b91114253d5L;
|
||||
uint64_t c2 = 0x4cf5ad432745937fL;
|
||||
|
||||
//----------
|
||||
// body
|
||||
|
||||
for(uint32_t i = 0; i < nblocks; i++)
|
||||
{
|
||||
uint64_t k1 = read_block(in);
|
||||
uint64_t k2 = read_block(in);
|
||||
|
||||
k1 *= c1; k1 = rotl64(k1,31); k1 *= c2; h1 ^= k1;
|
||||
|
||||
h1 = rotl64(h1,27); h1 += h2; h1 = h1*5+0x52dce729;
|
||||
|
||||
k2 *= c2; k2 = rotl64(k2,33); k2 *= c1; h2 ^= k2;
|
||||
|
||||
h2 = rotl64(h2,31); h2 += h1; h2 = h2*5+0x38495ab5;
|
||||
}
|
||||
|
||||
//----------
|
||||
// tail
|
||||
|
||||
uint64_t k1 = 0;
|
||||
uint64_t k2 = 0;
|
||||
|
||||
typename std::iterator_traits<InputIterator>::value_type tmp[15];
|
||||
std::copy_n(in, length & 15, tmp);
|
||||
|
||||
switch(length & 15)
|
||||
{
|
||||
case 15: k2 ^= ((uint64_t) tmp[14]) << 48;
|
||||
case 14: k2 ^= ((uint64_t) tmp[13]) << 40;
|
||||
case 13: k2 ^= ((uint64_t) tmp[12]) << 32;
|
||||
case 12: k2 ^= ((uint64_t) tmp[11]) << 24;
|
||||
case 11: k2 ^= ((uint64_t) tmp[10]) << 16;
|
||||
case 10: k2 ^= ((uint64_t) tmp[9]) << 8;
|
||||
case 9: k2 ^= ((uint64_t) tmp[8]) << 0;
|
||||
k2 *= c2; k2 = rotl64(k2,33); k2 *= c1; h2 ^= k2;
|
||||
case 8: k1 ^= ((uint64_t) tmp[7]) << 56;
|
||||
case 7: k1 ^= ((uint64_t) tmp[6]) << 48;
|
||||
case 6: k1 ^= ((uint64_t) tmp[5]) << 40;
|
||||
case 5: k1 ^= ((uint64_t) tmp[4]) << 32;
|
||||
case 4: k1 ^= ((uint64_t) tmp[3]) << 24;
|
||||
case 3: k1 ^= ((uint64_t) tmp[2]) << 16;
|
||||
case 2: k1 ^= ((uint64_t) tmp[1]) << 8;
|
||||
case 1: k1 ^= ((uint64_t) tmp[0]);
|
||||
k1 *= c1; k1 = rotl64(k1,31); k1 *= c2; h1 ^= k1;
|
||||
};
|
||||
|
||||
//----------
|
||||
// finalization
|
||||
|
||||
h1 ^= length;
|
||||
h2 ^= length;
|
||||
|
||||
h1 += h2;
|
||||
h2 += h1;
|
||||
|
||||
h1 = fmix(h1);
|
||||
h2 = fmix(h2);
|
||||
|
||||
h1 += h2;
|
||||
h2 += h1;
|
||||
|
||||
result[0] = h1;
|
||||
result[1] = h2;
|
||||
}
|
||||
|
||||
void hash3_x64_128(bytes_view key, uint64_t seed, std::array<uint64_t, 2>& result);
|
||||
|
||||
} // namespace murmur_hash
|
||||
|
||||
} // namespace utils
|
||||
|
||||
Reference in New Issue
Block a user