repair: convert hashing to streamed_mutations

This patch makes hashing for repair calculate checksums in a way that
doesn't require rebuilding whole mutation.
Unfortunately, such checksums are incompatible with the old ones so the
old way for computing checksums is preserved for compatibility reasons.

Signed-off-by: Paweł Dziepak <pdziepak@scylladb.com>
This commit is contained in:
Paweł Dziepak
2016-05-31 13:31:46 +01:00
parent e779e2f0c9
commit 7e06499458
6 changed files with 75 additions and 31 deletions

View File

@@ -19,6 +19,11 @@
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
*/
enum class repair_checksum : uint8_t {
legacy = 0,
streamed = 1,
};
class partition_checksum {
std::array<uint8_t, 32> digest();
};

View File

@@ -578,10 +578,11 @@ int main(int ac, char** av) {
api::set_server_stream_manager(ctx).get();
// Start handling REPAIR_CHECKSUM_RANGE messages
net::get_messaging_service().invoke_on_all([&db] (auto& ms) {
ms.register_repair_checksum_range([&db] (sstring keyspace, sstring cf, query::range<dht::token> range) {
ms.register_repair_checksum_range([&db] (sstring keyspace, sstring cf, query::range<dht::token> range, rpc::optional<repair_checksum> hash_version) {
auto hv = hash_version ? *hash_version : repair_checksum::legacy;
return do_with(std::move(keyspace), std::move(cf), std::move(range),
[&db] (auto& keyspace, auto& cf, auto& range) {
return checksum_range(db, keyspace, cf, range);
[&db, hv] (auto& keyspace, auto& cf, auto& range) {
return checksum_range(db, keyspace, cf, range, hv);
});
});
}).get();

View File

@@ -875,18 +875,18 @@ future<> messaging_service::send_replication_finished(msg_addr id, inet_address
// Wrapper for REPAIR_CHECKSUM_RANGE
void messaging_service::register_repair_checksum_range(
std::function<future<partition_checksum> (sstring keyspace,
sstring cf, query::range<dht::token> range)>&& f) {
sstring cf, query::range<dht::token> range, rpc::optional<repair_checksum> hash_version)>&& f) {
register_handler(this, messaging_verb::REPAIR_CHECKSUM_RANGE, std::move(f));
}
void messaging_service::unregister_repair_checksum_range() {
_rpc->unregister_handler(messaging_verb::REPAIR_CHECKSUM_RANGE);
}
future<partition_checksum> messaging_service::send_repair_checksum_range(
msg_addr id, sstring keyspace, sstring cf, ::range<dht::token> range)
msg_addr id, sstring keyspace, sstring cf, ::range<dht::token> range, repair_checksum hash_version)
{
return send_message<partition_checksum>(this,
messaging_verb::REPAIR_CHECKSUM_RANGE, std::move(id),
std::move(keyspace), std::move(cf), std::move(range));
std::move(keyspace), std::move(cf), std::move(range), hash_version);
}
} // namespace net

View File

@@ -30,6 +30,7 @@
#include "query-request.hh"
#include "mutation_query.hh"
#include "range.hh"
#include "repair/repair.hh"
#include <seastar/net/tls.hh>
@@ -233,9 +234,9 @@ public:
future<> send_complete_message(msg_addr id, UUID plan_id, unsigned dst_cpu_id);
// Wrapper for REPAIR_CHECKSUM_RANGE verb
void register_repair_checksum_range(std::function<future<partition_checksum> (sstring keyspace, sstring cf, range<dht::token> range)>&& func);
void register_repair_checksum_range(std::function<future<partition_checksum> (sstring keyspace, sstring cf, range<dht::token> range, rpc::optional<repair_checksum> hash_version)>&& func);
void unregister_repair_checksum_range();
future<partition_checksum> send_repair_checksum_range(msg_addr id, sstring keyspace, sstring cf, range<dht::token> range);
future<partition_checksum> send_repair_checksum_range(msg_addr id, sstring keyspace, sstring cf, range<dht::token> range, repair_checksum hash_version);
// Wrapper for GOSSIP_ECHO verb
void register_gossip_echo(std::function<future<> ()>&& func);

View File

@@ -263,11 +263,40 @@ public:
}
};
future<partition_checksum> partition_checksum::compute_legacy(streamed_mutation m)
{
return mutation_from_streamed_mutation(std::move(m)).then([] (auto mopt) {
assert(mopt);
std::array<uint8_t, 32> digest;
sha256_hasher h;
feed_hash(h, *mopt);
h.finalize(digest);
return partition_checksum(digest);
});
}
partition_checksum::partition_checksum(const mutation& m) {
sha256_hasher h;
feed_hash(h, m);
h.finalize(_digest);
future<partition_checksum> partition_checksum::compute_streamed(streamed_mutation m)
{
auto& s = *m.schema();
auto h = make_lw_shared<sha256_hasher>();
m.key().feed_hash(*h, s);
return do_with(std::move(m), [&s, h] (auto& sm) mutable {
mutation_hasher<sha256_hasher> mh(s, *h);
return consume(sm, std::move(mh)).then([ h ] {
std::array<uint8_t, 32> digest;
h->finalize(digest);
return partition_checksum(digest);
});
});
}
future<partition_checksum> partition_checksum::compute(streamed_mutation m, repair_checksum hash_version)
{
switch (hash_version) {
case repair_checksum::legacy: return compute_legacy(std::move(m));
case repair_checksum::streamed: return compute_streamed(std::move(m));
default: throw std::runtime_error(sprint("Unknown hash version: %d", static_cast<int>(hash_version)));
}
}
static inline unaligned<uint64_t>& qword(std::array<uint8_t, 32>& b, int n) {
@@ -324,24 +353,24 @@ std::ostream& operator<<(std::ostream& out, const partition_checksum& c) {
// data is coming in).
static future<partition_checksum> checksum_range_shard(database &db,
const sstring& keyspace_name, const sstring& cf_name,
const ::range<dht::token>& range) {
const ::range<dht::token>& range, repair_checksum hash_version) {
auto& cf = db.find_column_family(keyspace_name, cf_name);
return do_with(query::to_partition_range(range), [&cf] (const auto& partition_range) {
return do_with(query::to_partition_range(range), [&cf, hash_version] (const auto& partition_range) {
auto reader = cf.make_reader(cf.schema(),
partition_range,
query::no_clustering_key_filtering,
service::get_local_streaming_read_priority());
return do_with(std::move(reader), partition_checksum(),
[] (auto& reader, auto& checksum) {
return repeat([&reader, &checksum] () {
return reader().then([] (auto sm) {
return mutation_from_streamed_mutation(std::move(sm));
}).then([&checksum] (auto mopt) {
[hash_version] (auto& reader, auto& checksum) {
return repeat([&reader, &checksum, hash_version] () {
return reader().then([&checksum, hash_version] (auto mopt) {
if (mopt) {
checksum.add(partition_checksum(*mopt));
return stop_iteration::no;
return partition_checksum::compute(std::move(*mopt), hash_version).then([&checksum] (auto pc) {
checksum.add(pc);
return stop_iteration::no;
});
} else {
return stop_iteration::yes;
return make_ready_future<stop_iteration>(stop_iteration::yes);
}
});
}).then([&checksum] {
@@ -364,17 +393,17 @@ static future<partition_checksum> checksum_range_shard(database &db,
// function is not resolved.
future<partition_checksum> checksum_range(seastar::sharded<database> &db,
const sstring& keyspace, const sstring& cf,
const ::range<dht::token>& range) {
const ::range<dht::token>& range, repair_checksum hash_version) {
unsigned shard_begin = range.start() ?
dht::shard_of(range.start()->value()) : 0;
unsigned shard_end = range.end() ?
dht::shard_of(range.end()->value())+1 : smp::count;
return do_with(partition_checksum(), [shard_begin, shard_end, &db, &keyspace, &cf, &range] (auto& result) {
return do_with(partition_checksum(), [shard_begin, shard_end, &db, &keyspace, &cf, &range, hash_version] (auto& result) {
return parallel_for_each(boost::counting_iterator<int>(shard_begin),
boost::counting_iterator<int>(shard_end),
[&db, &keyspace, &cf, &range, &result] (unsigned shard) {
return db.invoke_on(shard, [&keyspace, &cf, &range] (database& db) {
return checksum_range_shard(db, keyspace, cf, range);
[&db, &keyspace, &cf, &range, &result, hash_version] (unsigned shard) {
return db.invoke_on(shard, [&keyspace, &cf, &range, hash_version] (database& db) {
return checksum_range_shard(db, keyspace, cf, range, hash_version);
}).then([&result] (partition_checksum sum) {
result.add(sum);
});
@@ -496,11 +525,11 @@ static future<> repair_cf_range(seastar::sharded<database>& db,
// there are any differences, sync the content of this range.
std::vector<future<partition_checksum>> checksums;
checksums.reserve(1 + neighbors.size());
checksums.push_back(checksum_range(db, keyspace, cf, range));
checksums.push_back(checksum_range(db, keyspace, cf, range, repair_checksum::legacy));
for (auto&& neighbor : neighbors) {
checksums.push_back(
net::get_local_messaging_service().send_repair_checksum_range(
net::msg_addr{neighbor},keyspace, cf, range));
net::msg_addr{neighbor},keyspace, cf, range, repair_checksum::legacy));
}
completion.enter();

View File

@@ -70,6 +70,11 @@ future<repair_status> repair_get_status(seastar::sharded<database>& db, int id);
// stop them abruptly).
future<> repair_shutdown(seastar::sharded<database>& db);
enum class repair_checksum {
legacy = 0,
streamed = 1,
};
// The class partition_checksum calculates a 256-bit cryptographically-secure
// checksum of a set of partitions fed to it. The checksum of a partition set
// is calculated by calculating a strong hash function (SHA-256) of each
@@ -81,10 +86,13 @@ future<> repair_shutdown(seastar::sharded<database>& db);
class partition_checksum {
private:
std::array<uint8_t, 32> _digest; // 256 bits
private:
static future<partition_checksum> compute_legacy(streamed_mutation m);
static future<partition_checksum> compute_streamed(streamed_mutation m);
public:
constexpr partition_checksum() : _digest{} { }
explicit partition_checksum(std::array<uint8_t, 32> digest) : _digest(std::move(digest)) { }
partition_checksum(const mutation& m);
static future<partition_checksum> compute(streamed_mutation m, repair_checksum rt);
void add(const partition_checksum& other);
bool operator==(const partition_checksum& other) const;
bool operator!=(const partition_checksum& other) const { return !operator==(other); }
@@ -99,4 +107,4 @@ public:
// not resolved.
future<partition_checksum> checksum_range(seastar::sharded<database> &db,
const sstring& keyspace, const sstring& cf,
const ::range<dht::token>& range);
const ::range<dht::token>& range, repair_checksum rt);