Files
scylladb/streaming/stream_transfer_task.cc
Asias He ba54654af3 streaming: Use interval_set to sort and merge ranges
So that the ranges are sorted and have no overlaps. We can have less
ranges to deal with and it can help the mutation readers to optimize.

Here is an exmaple of ranges generated by repair:

Before:

    INFO  2016-12-07 17:44:21,185 [shard 0] stream_session - cf_id =
    dec9fa90-bc3b-11e6-af78-000000000001,
    before ranges = {(-3383928698815274642, -3376937163195039606],
    (-7260764223708720005, -7251657821052234309], (-4767213984179237293,
    -4747032371925842389], (-7645879646119667643, -7589962743703481776],
    (-2340199306656526861, -2320523117224780931], (-576028861239229331,
    -560973674020019962], (-4070378863644120252, -3987599893827407860],
    (-2551584407739673151, -2498779102482524711], (-5416061903556353312,
    -5354212455975869358], (37594980457713898, 67885601051654285],
    (3083778975065200884, 3091232478835418439], (3131345970514528877,
    3187922544267434961], (5765437476661317163, 5778671293583720541],
    (5960610072466058818, 5972289771228014343], (7749618183851698485,
    7758080813117351135], (-3987599893827407860, -3899198931034439776],
    (-7251657821052234309, -7131649010279865221], (-3576581915808403133,
    -3383928698815274642], (-417850207760366422, -327959672080599465],
    (-2671876682129336880, -2551584407739673151], (-1305178847032904465,
    -1137497074548854552], (8540448858050275827, 8610171849752115483],
    (-560973674020019962, -417850207760366422], (-2498779102482524711,
    -2340199306656526861], (2394447940525988167, 2523396860109747637],
    (-6703329224557608009, -6517757811218772762], (-3675103288021821677,
    -3576581915808403133], (-5622185785296846551, -5416061903556353312],
    (8610171849752115483, 8742605005068551458], (8068079250973315241,
    8185655671734937642], (560264964510741191, 790641981923757238],
    (5581202487214475094, 5765437476661317163], (8742605005068551458,
    8923908282731801645], (-6038176423022601107, -5622185785296846551],
    (5778671293583720541, 5960610072466058818], (-3899198931034439776,
    -3675103288021821677], (8356739976149429222, 8540448858050275827],
    (-6517757811218772762, -6038176423022601107], (-8052600134279395253,
    -7645879646119667643], (-327959672080599465, 37594980457713898],
    (7758080813117351135, 8019254284118543066], (4781565016737645510,
    5067070718000527886], (2523396860109747637, 3083778975065200884],
    (-5354212455975869358, -4767213984179237293], (6784138025918878582,
    7190719703944308372], (67885601051654285, 447405341661896387],
    (-2190610927722759275, -1305178847032904465], (-4747032371925842389,
    -4070378863644120252]}, size=48

After:

    INFO  2016-12-07 17:44:21,185 [shard 0] stream_session - cf_id =
    dec9fa90-bc3b-11e6-af78-000000000001,
    after  ranges = {(-8052600134279395253, -7589962743703481776],
    (-7260764223708720005, -7131649010279865221], (-6703329224557608009,
    -3376937163195039606], (-2671876682129336880, -2320523117224780931],
    (-2190610927722759275, -1137497074548854552], (-576028861239229331,
    447405341661896387], (560264964510741191, 790641981923757238],
    (2394447940525988167, 3091232478835418439], (3131345970514528877,
    3187922544267434961], (4781565016737645510, 5067070718000527886],
    (5581202487214475094, 5972289771228014343], (6784138025918878582,
    7190719703944308372], (7749618183851698485, 8019254284118543066],
    (8068079250973315241, 8185655671734937642], (8356739976149429222,
    8923908282731801645]}, size=15
2016-12-12 11:09:26 +08:00

194 lines
8.3 KiB
C++

/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* Modified by ScyllaDB
* Copyright (C) 2015 ScyllaDB
*/
/*
* This file is part of Scylla.
*
* Scylla is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Scylla is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
*/
#include "log.hh"
#include "streaming/stream_detail.hh"
#include "streaming/stream_transfer_task.hh"
#include "streaming/stream_session.hh"
#include "streaming/stream_manager.hh"
#include "mutation_reader.hh"
#include "frozen_mutation.hh"
#include "mutation.hh"
#include "message/messaging_service.hh"
#include "range.hh"
#include "dht/i_partitioner.hh"
#include "service/priority_manager.hh"
#include <boost/range/irange.hpp>
#include "service/storage_service.hh"
#include <boost/icl/interval.hpp>
#include <boost/icl/interval_set.hpp>
namespace streaming {
extern logging::logger sslog;
stream_transfer_task::stream_transfer_task(shared_ptr<stream_session> session, UUID cf_id, std::vector<nonwrapping_range<dht::token>> ranges, long total_size)
: stream_task(session, cf_id)
, _ranges(std::move(ranges))
, _total_size(total_size) {
}
stream_transfer_task::~stream_transfer_task() = default;
struct send_info {
database& db;
utils::UUID plan_id;
utils::UUID cf_id;
std::vector<query::partition_range> prs;
net::messaging_service::msg_addr id;
uint32_t dst_cpu_id;
size_t mutations_nr{0};
semaphore mutations_done{0};
bool error_logged = false;
mutation_reader reader;
send_info(database& db_, utils::UUID plan_id_, utils::UUID cf_id_,
std::vector<query::partition_range> prs_, net::messaging_service::msg_addr id_,
uint32_t dst_cpu_id_)
: db(db_)
, plan_id(plan_id_)
, cf_id(cf_id_)
, prs(std::move(prs_))
, id(id_)
, dst_cpu_id(dst_cpu_id_) {
auto& cf = db.find_column_family(this->cf_id);
reader = cf.make_streaming_reader(cf.schema(), this->prs);
}
};
future<> do_send_mutations(auto si, auto fm, bool fragmented) {
return get_local_stream_manager().mutation_send_limiter().wait().then([si, fragmented, fm = std::move(fm)] () mutable {
sslog.debug("[Stream #{}] SEND STREAM_MUTATION to {}, cf_id={}", si->plan_id, si->id, si->cf_id);
auto fm_size = fm.representation().size();
net::get_local_messaging_service().send_stream_mutation(si->id, si->plan_id, std::move(fm), si->dst_cpu_id, fragmented).then([si, fm_size] {
sslog.debug("[Stream #{}] GOT STREAM_MUTATION Reply from {}", si->plan_id, si->id.addr);
get_local_stream_manager().update_progress(si->plan_id, si->id.addr, progress_info::direction::OUT, fm_size);
si->mutations_done.signal();
}).handle_exception([si] (auto ep) {
// There might be larger number of STREAM_MUTATION inflight.
// Log one error per column_family per range
if (!si->error_logged) {
si->error_logged = true;
sslog.warn("[Stream #{}] stream_transfer_task: Fail to send STREAM_MUTATION to {}: {}", si->plan_id, si->id, ep);
}
si->mutations_done.broken();
}).finally([] {
get_local_stream_manager().mutation_send_limiter().signal();
});
});
}
future<> send_mutations(auto si) {
return repeat([si] () {
return si->reader().then([si] (auto smopt) {
if (smopt && si->db.column_family_exists(si->cf_id)) {
size_t fragment_size = default_frozen_fragment_size;
// Mutations cannot be sent fragmented if the receiving side doesn't support that.
if (!service::get_local_storage_service().cluster_supports_large_partitions()) {
fragment_size = std::numeric_limits<size_t>::max();
}
return fragment_and_freeze(std::move(*smopt), [si] (auto fm, bool fragmented) {
si->mutations_nr++;
return do_send_mutations(si, std::move(fm), fragmented);
}, fragment_size).then([] { return stop_iteration::no; });
} else {
return make_ready_future<stop_iteration>(stop_iteration::yes);
}
});
}).then([si] {
return si->mutations_done.wait(si->mutations_nr);
});
}
void stream_transfer_task::start() {
auto plan_id = session->plan_id();
auto cf_id = this->cf_id;
auto dst_cpu_id = session->dst_cpu_id;
auto& schema = session->get_local_db().find_column_family(cf_id).schema();
auto id = net::messaging_service::msg_addr{session->peer, session->dst_cpu_id};
sslog.debug("[Stream #{}] stream_transfer_task: cf_id={}", plan_id, cf_id);
sort_and_merge_ranges();
_shard_ranges = dht::split_ranges_to_shards(_ranges, *schema);
parallel_for_each(_shard_ranges, [this, dst_cpu_id, plan_id, cf_id, id] (auto& item) {
auto& shard = item.first;
auto& prs = item.second;
return session->get_db().invoke_on(shard, [plan_id, cf_id, id, dst_cpu_id, prs = std::move(prs)] (database& db) mutable {
auto si = make_lw_shared<send_info>(db, plan_id, cf_id, prs, id, dst_cpu_id);
return send_mutations(std::move(si));
});
}).then([this, plan_id, cf_id, id] {
sslog.debug("[Stream #{}] SEND STREAM_MUTATION_DONE to {}, cf_id={}", plan_id, id, cf_id);
return session->ms().send_stream_mutation_done(id, plan_id, _ranges,
cf_id, session->dst_cpu_id).handle_exception([plan_id, id, cf_id] (auto ep) {
sslog.warn("[Stream #{}] stream_transfer_task: Fail to send STREAM_MUTATION_DONE to {}: {}", plan_id, id, ep);
std::rethrow_exception(ep);
});
}).then([this, id, plan_id, cf_id] {
sslog.debug("[Stream #{}] GOT STREAM_MUTATION_DONE Reply from {}", plan_id, id.addr);
session->start_keep_alive_timer();
session->transfer_task_completed(cf_id);
}).handle_exception([this, plan_id, id] (auto ep){
sslog.warn("[Stream #{}] stream_transfer_task: Fail to send to {}: {}", plan_id, id, ep);
this->session->on_error();
});
}
void stream_transfer_task::append_ranges(const std::vector<nonwrapping_range<dht::token>>& ranges) {
_ranges.insert(_ranges.end(), ranges.begin(), ranges.end());
}
void stream_transfer_task::sort_and_merge_ranges() {
boost::icl::interval_set<dht::token> myset;
std::vector<nonwrapping_range<dht::token>> ranges;
sslog.debug("cf_id = {}, before ranges = {}, size={}", cf_id, _ranges, _ranges.size());
_ranges.swap(ranges);
for (auto& range : ranges) {
// TODO: We should convert range_to_interval and interval_to_range to
// take nonwrapping_range ranges.
myset += locator::token_metadata::range_to_interval(range);
}
ranges.clear();
ranges.shrink_to_fit();
for (auto& i : myset) {
auto r = locator::token_metadata::interval_to_range(i);
_ranges.push_back(nonwrapping_range<dht::token>(r));
}
sslog.debug("cf_id = {}, after ranges = {}, size={}", cf_id, _ranges, _ranges.size());
}
} // namespace streaming