Compare commits
108 Commits
next
...
scylla-3.1
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
9c3cdded9e | ||
|
|
05272c53ed | ||
|
|
393b2abdc9 | ||
|
|
d9dc8f92cc | ||
|
|
c009f7b182 | ||
|
|
303a56f2bd | ||
|
|
57512d3df9 | ||
|
|
a894868298 | ||
|
|
a5d385d702 | ||
|
|
6413063b1b | ||
|
|
0d31c6da62 | ||
|
|
b62bb036ed | ||
|
|
bdabd2e7a4 | ||
|
|
d7fc7bcf9f | ||
|
|
21aec9c7ef | ||
|
|
02ce19e851 | ||
|
|
37c4be5e74 | ||
|
|
d81ac93728 | ||
|
|
024d1563ad | ||
|
|
4a1a281e84 | ||
|
|
d61dd1a933 | ||
|
|
447c1e3bcc | ||
|
|
834b92b3d7 | ||
|
|
2ec036f50c | ||
|
|
958fe2024f | ||
|
|
cd998b949a | ||
|
|
2e1e1392ea | ||
|
|
623ea5e3d9 | ||
|
|
f92a7ca2bf | ||
|
|
d70c2db09c | ||
|
|
e4a39ed319 | ||
|
|
bb70b9ed56 | ||
|
|
e06e795031 | ||
|
|
7d56e8e5bb | ||
|
|
417250607b | ||
|
|
d06bcef3b7 | ||
|
|
50c5cb6861 | ||
|
|
70f5154109 | ||
|
|
329c419c30 | ||
|
|
062d43c76e | ||
|
|
cf4c238b28 | ||
|
|
20090c1992 | ||
|
|
8ffb567474 | ||
|
|
710ec83d12 | ||
|
|
8d7c489436 | ||
|
|
6ec558e3a0 | ||
|
|
b1e2842c8c | ||
|
|
5a273737e3 | ||
|
|
b0d2312623 | ||
|
|
2f007d8e6b | ||
|
|
bebfd7b26c | ||
|
|
03b48b2caf | ||
|
|
95362624bc | ||
|
|
7865c314a5 | ||
|
|
0e6b62244c | ||
|
|
9d722a56b3 | ||
|
|
7009d5fb23 | ||
|
|
eb49fae020 | ||
|
|
92bf928170 | ||
|
|
deac0b0e94 | ||
|
|
c294000113 | ||
|
|
18bb2045aa | ||
|
|
5e3276d08f | ||
|
|
acff367ea8 | ||
|
|
e39724a343 | ||
|
|
31c4db83d8 | ||
|
|
433cb93f7a | ||
|
|
f553819919 | ||
|
|
48c34e7635 | ||
|
|
7f85b30941 | ||
|
|
7d14514b8a | ||
|
|
35f906f06f | ||
|
|
2c50a484f5 | ||
|
|
24ddb46707 | ||
|
|
f2fc3f32af | ||
|
|
c9f488ddc2 | ||
|
|
46498e77b8 | ||
|
|
440f33709e | ||
|
|
34696e1582 | ||
|
|
43bb290705 | ||
|
|
53980816de | ||
|
|
c1f4617530 | ||
|
|
efde9416ed | ||
|
|
224f9cee7e | ||
|
|
cd1d13f805 | ||
|
|
899291bc9b | ||
|
|
4130973f51 | ||
|
|
24e2c72888 | ||
|
|
69cc7d89c8 | ||
|
|
5f6c5d566a | ||
|
|
f32aea3834 | ||
|
|
933260cb53 | ||
|
|
f8ff0e1993 | ||
|
|
1fbab82553 | ||
|
|
c664615960 | ||
|
|
6a682dc5a2 | ||
|
|
c1271d08d3 | ||
|
|
0d5c2501b3 | ||
|
|
0dd84898ee | ||
|
|
d568270d7f | ||
|
|
78c57f18c4 | ||
|
|
ce27949797 | ||
|
|
6b47e23d29 | ||
|
|
1cb6cc0ac4 | ||
|
|
67435eff15 | ||
|
|
086ce13fb9 | ||
|
|
eb9a8f4442 | ||
|
|
178fb5fe5f |
2
.gitmodules
vendored
2
.gitmodules
vendored
@@ -1,6 +1,6 @@
|
||||
[submodule "seastar"]
|
||||
path = seastar
|
||||
url = ../seastar
|
||||
url = ../scylla-seastar
|
||||
ignore = dirty
|
||||
[submodule "swagger-ui"]
|
||||
path = swagger-ui
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#!/bin/sh
|
||||
|
||||
PRODUCT=scylla
|
||||
VERSION=666.development
|
||||
VERSION=3.1.0
|
||||
|
||||
if test -f version
|
||||
then
|
||||
|
||||
18
api/api.hh
18
api/api.hh
@@ -22,6 +22,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <seastar/json/json_elements.hh>
|
||||
#include <type_traits>
|
||||
#include <boost/lexical_cast.hpp>
|
||||
#include <boost/algorithm/string/split.hpp>
|
||||
#include <boost/algorithm/string/classification.hpp>
|
||||
@@ -231,7 +232,22 @@ public:
|
||||
return;
|
||||
}
|
||||
try {
|
||||
value = T{boost::lexical_cast<Base>(param)};
|
||||
// boost::lexical_cast does not use boolalpha. Converting a
|
||||
// true/false throws exceptions. We don't want that.
|
||||
if constexpr (std::is_same_v<Base, bool>) {
|
||||
// Cannot use boolalpha because we (probably) want to
|
||||
// accept 1 and 0 as well as true and false. And True. And fAlse.
|
||||
std::transform(param.begin(), param.end(), param.begin(), ::tolower);
|
||||
if (param == "true" || param == "1") {
|
||||
value = T(true);
|
||||
} else if (param == "false" || param == "0") {
|
||||
value = T(false);
|
||||
} else {
|
||||
throw boost::bad_lexical_cast{};
|
||||
}
|
||||
} else {
|
||||
value = T{boost::lexical_cast<Base>(param)};
|
||||
}
|
||||
} catch (boost::bad_lexical_cast&) {
|
||||
throw bad_param_exception(format("{} ({}): type error - should be {}", name, param, boost::units::detail::demangle(typeid(Base).name())));
|
||||
}
|
||||
|
||||
@@ -170,7 +170,9 @@ future<> service::start() {
|
||||
return once_among_shards([this] {
|
||||
return create_keyspace_if_missing();
|
||||
}).then([this] {
|
||||
return when_all_succeed(_role_manager->start(), _authorizer->start(), _authenticator->start());
|
||||
return _role_manager->start().then([this] {
|
||||
return when_all_succeed(_authorizer->start(), _authenticator->start());
|
||||
});
|
||||
}).then([this] {
|
||||
_permissions_cache = std::make_unique<permissions_cache>(_permissions_cache_config, *this, log);
|
||||
}).then([this] {
|
||||
|
||||
@@ -596,6 +596,7 @@ scylla_core = (['database.cc',
|
||||
'db/consistency_level.cc',
|
||||
'db/system_keyspace.cc',
|
||||
'db/system_distributed_keyspace.cc',
|
||||
'db/size_estimates_virtual_reader.cc',
|
||||
'db/schema_tables.cc',
|
||||
'db/cql_type_parser.cc',
|
||||
'db/legacy_schema_migrator.cc',
|
||||
|
||||
@@ -222,11 +222,9 @@ statement_restrictions::statement_restrictions(database& db,
|
||||
auto& cf = db.find_column_family(schema);
|
||||
auto& sim = cf.get_index_manager();
|
||||
const allow_local_index allow_local(!_partition_key_restrictions->has_unrestricted_components(*_schema) && _partition_key_restrictions->is_all_eq());
|
||||
bool has_queriable_clustering_column_index = _clustering_columns_restrictions->has_supporting_index(sim, allow_local);
|
||||
bool has_queriable_pk_index = _partition_key_restrictions->has_supporting_index(sim, allow_local);
|
||||
bool has_queriable_index = has_queriable_clustering_column_index
|
||||
|| has_queriable_pk_index
|
||||
|| _nonprimary_key_restrictions->has_supporting_index(sim, allow_local);
|
||||
const bool has_queriable_clustering_column_index = _clustering_columns_restrictions->has_supporting_index(sim, allow_local);
|
||||
const bool has_queriable_pk_index = _partition_key_restrictions->has_supporting_index(sim, allow_local);
|
||||
const bool has_queriable_regular_index = _nonprimary_key_restrictions->has_supporting_index(sim, allow_local);
|
||||
|
||||
// At this point, the select statement if fully constructed, but we still have a few things to validate
|
||||
process_partition_key_restrictions(has_queriable_pk_index, for_view, allow_filtering);
|
||||
@@ -286,7 +284,7 @@ statement_restrictions::statement_restrictions(database& db,
|
||||
}
|
||||
|
||||
if (!_nonprimary_key_restrictions->empty()) {
|
||||
if (has_queriable_index) {
|
||||
if (has_queriable_regular_index) {
|
||||
_uses_secondary_indexing = true;
|
||||
} else if (!allow_filtering) {
|
||||
throw exceptions::invalid_request_exception("Cannot execute this query as it might involve data filtering and "
|
||||
@@ -392,8 +390,9 @@ std::vector<const column_definition*> statement_restrictions::get_column_defs_fo
|
||||
}
|
||||
}
|
||||
}
|
||||
if (_clustering_columns_restrictions->needs_filtering(*_schema)) {
|
||||
column_id first_filtering_id = _schema->clustering_key_columns().begin()->id +
|
||||
const bool pk_has_unrestricted_components = _partition_key_restrictions->has_unrestricted_components(*_schema);
|
||||
if (pk_has_unrestricted_components || _clustering_columns_restrictions->needs_filtering(*_schema)) {
|
||||
column_id first_filtering_id = pk_has_unrestricted_components ? 0 : _schema->clustering_key_columns().begin()->id +
|
||||
_clustering_columns_restrictions->num_prefix_columns_that_need_not_be_filtered();
|
||||
for (auto&& cdef : _clustering_columns_restrictions->get_column_defs()) {
|
||||
if (cdef->id >= first_filtering_id && !column_uses_indexing(cdef)) {
|
||||
@@ -507,10 +506,9 @@ bool statement_restrictions::need_filtering() const {
|
||||
int number_of_filtering_restrictions = _nonprimary_key_restrictions->size();
|
||||
// If the whole partition key is restricted, it does not imply filtering
|
||||
if (_partition_key_restrictions->has_unrestricted_components(*_schema) || !_partition_key_restrictions->is_all_eq()) {
|
||||
number_of_filtering_restrictions += _partition_key_restrictions->size();
|
||||
if (_clustering_columns_restrictions->has_unrestricted_components(*_schema)) {
|
||||
number_of_filtering_restrictions += _clustering_columns_restrictions->size() - _clustering_columns_restrictions->prefix_size();
|
||||
}
|
||||
number_of_filtering_restrictions += _partition_key_restrictions->size() + _clustering_columns_restrictions->size();
|
||||
} else if (_clustering_columns_restrictions->has_unrestricted_components(*_schema)) {
|
||||
number_of_filtering_restrictions += _clustering_columns_restrictions->size() - _clustering_columns_restrictions->prefix_size();
|
||||
}
|
||||
return number_of_restricted_columns_for_indexing > 1
|
||||
|| (number_of_restricted_columns_for_indexing == 0 && _partition_key_restrictions->empty() && !_clustering_columns_restrictions->empty())
|
||||
|
||||
@@ -407,7 +407,7 @@ public:
|
||||
}
|
||||
|
||||
bool ck_restrictions_need_filtering() const {
|
||||
return _clustering_columns_restrictions->needs_filtering(*_schema);
|
||||
return _partition_key_restrictions->has_unrestricted_components(*_schema) || _clustering_columns_restrictions->needs_filtering(*_schema);
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -83,6 +83,9 @@ void metadata::maybe_set_paging_state(::shared_ptr<const service::pager::paging_
|
||||
assert(paging_state);
|
||||
if (paging_state->get_remaining() > 0) {
|
||||
set_paging_state(std::move(paging_state));
|
||||
} else {
|
||||
_flags.remove<flag::HAS_MORE_PAGES>();
|
||||
_paging_state = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -76,7 +76,7 @@ public:
|
||||
const bool _is_distinct;
|
||||
const bool _allow_filtering;
|
||||
const bool _is_json;
|
||||
bool _bypass_cache;
|
||||
bool _bypass_cache = false;
|
||||
public:
|
||||
parameters();
|
||||
parameters(orderings_type orderings,
|
||||
|
||||
@@ -1929,7 +1929,7 @@ flat_mutation_reader make_multishard_streaming_reader(distributed<database>& db,
|
||||
virtual flat_mutation_reader create_reader(
|
||||
schema_ptr schema,
|
||||
const dht::partition_range& range,
|
||||
const query::partition_slice&,
|
||||
const query::partition_slice& slice,
|
||||
const io_priority_class& pc,
|
||||
tracing::trace_state_ptr,
|
||||
mutation_reader::forwarding fwd_mr) override {
|
||||
@@ -1940,7 +1940,7 @@ flat_mutation_reader make_multishard_streaming_reader(distributed<database>& db,
|
||||
_contexts[shard].read_operation = make_foreign(std::make_unique<utils::phased_barrier::operation>(cf.read_in_progress()));
|
||||
_contexts[shard].semaphore = &cf.streaming_read_concurrency_semaphore();
|
||||
|
||||
return cf.make_streaming_reader(std::move(schema), *_contexts[shard].range, fwd_mr);
|
||||
return cf.make_streaming_reader(std::move(schema), *_contexts[shard].range, slice, fwd_mr);
|
||||
}
|
||||
virtual void destroy_reader(shard_id shard, future<stopped_reader> reader_fut) noexcept override {
|
||||
reader_fut.then([this, zis = shared_from_this(), shard] (stopped_reader&& reader) mutable {
|
||||
|
||||
16
database.hh
16
database.hh
@@ -458,6 +458,7 @@ private:
|
||||
// This semaphore ensures that an operation like snapshot won't have its selected
|
||||
// sstables deleted by compaction in parallel, a race condition which could
|
||||
// easily result in failure.
|
||||
// Locking order: must be acquired either independently or after _sstables_lock
|
||||
seastar::semaphore _sstable_deletion_sem = {1};
|
||||
// There are situations in which we need to stop writing sstables. Flushers will take
|
||||
// the read lock, and the ones that wish to stop that process will take the write lock.
|
||||
@@ -679,8 +680,13 @@ public:
|
||||
|
||||
// Single range overload.
|
||||
flat_mutation_reader make_streaming_reader(schema_ptr schema, const dht::partition_range& range,
|
||||
const query::partition_slice& slice,
|
||||
mutation_reader::forwarding fwd_mr = mutation_reader::forwarding::no) const;
|
||||
|
||||
flat_mutation_reader make_streaming_reader(schema_ptr schema, const dht::partition_range& range) {
|
||||
return make_streaming_reader(schema, range, schema->full_slice());
|
||||
}
|
||||
|
||||
sstables::shared_sstable make_streaming_sstable_for_write(std::optional<sstring> subdir = {});
|
||||
sstables::shared_sstable make_streaming_staging_sstable() {
|
||||
return make_streaming_sstable_for_write("staging");
|
||||
@@ -759,13 +765,7 @@ public:
|
||||
|
||||
// SSTable writes are now allowed again, and generation is updated to new_generation if != -1
|
||||
// returns the amount of microseconds elapsed since we disabled writes.
|
||||
std::chrono::steady_clock::duration enable_sstable_write(int64_t new_generation) {
|
||||
if (new_generation != -1) {
|
||||
update_sstables_known_generation(new_generation);
|
||||
}
|
||||
_sstables_lock.write_unlock();
|
||||
return std::chrono::steady_clock::now() - _sstable_writes_disabled_at;
|
||||
}
|
||||
std::chrono::steady_clock::duration enable_sstable_write(int64_t new_generation);
|
||||
|
||||
// Make sure the generation numbers are sequential, starting from "start".
|
||||
// Generations before "start" are left untouched.
|
||||
@@ -935,7 +935,7 @@ public:
|
||||
}
|
||||
|
||||
private:
|
||||
future<row_locker::lock_holder> do_push_view_replica_updates(const schema_ptr& s, mutation&& m, db::timeout_clock::time_point timeout, mutation_source&& source) const;
|
||||
future<row_locker::lock_holder> do_push_view_replica_updates(const schema_ptr& s, mutation&& m, db::timeout_clock::time_point timeout, mutation_source&& source, const io_priority_class& io_priority) const;
|
||||
std::vector<view_ptr> affected_views(const schema_ptr& base, const mutation& update) const;
|
||||
future<> generate_and_propagate_view_updates(const schema_ptr& base,
|
||||
std::vector<view_ptr>&& views,
|
||||
|
||||
@@ -396,10 +396,8 @@ std::unordered_set<gms::inet_address> db::batchlog_manager::endpoint_filter(cons
|
||||
|
||||
// grab a random member of up to two racks
|
||||
for (auto& rack : racks) {
|
||||
auto rack_members = validated.bucket(rack);
|
||||
auto n = validated.bucket_size(rack_members);
|
||||
auto cpy = boost::copy_range<std::vector<gms::inet_address>>(validated.equal_range(rack) | boost::adaptors::map_values);
|
||||
std::uniform_int_distribution<size_t> rdist(0, n - 1);
|
||||
std::uniform_int_distribution<size_t> rdist(0, cpy.size() - 1);
|
||||
result.emplace(cpy[rdist(_e1)]);
|
||||
}
|
||||
|
||||
|
||||
@@ -148,9 +148,18 @@ db::commitlog::descriptor::descriptor(const sstring& filename, const std::string
|
||||
: descriptor([&filename, &fname_prefix]() {
|
||||
std::smatch m;
|
||||
// match both legacy and new version of commitlogs Ex: CommitLog-12345.log and CommitLog-4-12345.log.
|
||||
std::regex rx("(?:.*/)?(?:Recycled-)?" + fname_prefix + "((\\d+)(" + SEPARATOR + "\\d+)?)" + FILENAME_EXTENSION);
|
||||
std::regex rx("(?:Recycled-)?" + fname_prefix + "((\\d+)(" + SEPARATOR + "\\d+)?)" + FILENAME_EXTENSION);
|
||||
std::string sfilename = filename;
|
||||
if (!std::regex_match(sfilename, m, rx)) {
|
||||
auto cbegin = sfilename.cbegin();
|
||||
// skip the leading path
|
||||
// Note: we're using rfind rather than the regex above
|
||||
// since it may run out of stack in debug builds.
|
||||
// See https://github.com/scylladb/scylla/issues/4464
|
||||
auto pos = std::string(filename).rfind('/');
|
||||
if (pos != std::string::npos) {
|
||||
cbegin += pos + 1;
|
||||
}
|
||||
if (!std::regex_match(cbegin, sfilename.cend(), m, rx)) {
|
||||
throw std::domain_error("Cannot parse the version of the file: " + filename);
|
||||
}
|
||||
if (m[3].length() == 0) {
|
||||
@@ -420,7 +429,11 @@ class db::commitlog::segment : public enable_shared_from_this<segment>, public c
|
||||
|
||||
uint64_t _file_pos = 0;
|
||||
uint64_t _flush_pos = 0;
|
||||
|
||||
bool _closed = false;
|
||||
// Not the same as _closed since files can be reused
|
||||
bool _closed_file = false;
|
||||
|
||||
bool _terminated = false;
|
||||
|
||||
using buffer_type = segment_manager::buffer_type;
|
||||
@@ -486,7 +499,7 @@ public:
|
||||
clogger.debug("Created new {} segment {}", active ? "active" : "reserve", *this);
|
||||
}
|
||||
~segment() {
|
||||
if (!_closed) {
|
||||
if (!_closed_file) {
|
||||
_segment_manager->add_file_to_close(std::move(_file));
|
||||
}
|
||||
if (is_clean()) {
|
||||
@@ -560,7 +573,7 @@ public:
|
||||
// and we should have waited out all pending.
|
||||
return me->_pending_ops.close().finally([me] {
|
||||
return me->_file.truncate(me->_flush_pos).then([me] {
|
||||
return me->_file.close();
|
||||
return me->_file.close().finally([me] { me->_closed_file = true; });
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -756,6 +756,7 @@ public:
|
||||
val(enable_dangerous_direct_import_of_cassandra_counters, bool, false, Used, "Only turn this option on if you want to import tables from Cassandra containing counters, and you are SURE that no counters in that table were created in a version earlier than Cassandra 2.1." \
|
||||
" It is not enough to have ever since upgraded to newer versions of Cassandra. If you EVER used a version earlier than 2.1 in the cluster where these SSTables come from, DO NOT TURN ON THIS OPTION! You will corrupt your data. You have been warned.") \
|
||||
val(enable_shard_aware_drivers, bool, true, Used, "Enable native transport drivers to use connection-per-shard for better performance") \
|
||||
val(abort_on_internal_error, bool, false, Used, "Abort the server instead of throwing exception when internal invariants are violated.") \
|
||||
/* done! */
|
||||
|
||||
#define _make_value_member(name, type, deflt, status, desc, ...) \
|
||||
|
||||
@@ -57,9 +57,30 @@ static ::shared_ptr<cql3::cql3_type::raw> parse_raw(const sstring& str) {
|
||||
}
|
||||
|
||||
data_type db::cql_type_parser::parse(const sstring& keyspace, const sstring& str, lw_shared_ptr<user_types_metadata> user_types) {
|
||||
static const thread_local std::unordered_map<sstring, cql3::cql3_type> native_types = []{
|
||||
std::unordered_map<sstring, cql3::cql3_type> res;
|
||||
for (auto& nt : cql3::cql3_type::values()) {
|
||||
res.emplace(nt.to_string(), nt);
|
||||
}
|
||||
return res;
|
||||
}();
|
||||
|
||||
auto i = native_types.find(str);
|
||||
if (i != native_types.end()) {
|
||||
return i->second.get_type();
|
||||
}
|
||||
|
||||
if (!user_types && service::get_storage_proxy().local_is_initialized()) {
|
||||
user_types = service::get_storage_proxy().local().get_db().local().find_keyspace(keyspace).metadata()->user_types();
|
||||
}
|
||||
// special-case top-level UDTs
|
||||
if (user_types) {
|
||||
auto& map = user_types->get_all_types();
|
||||
auto i = map.find(utf8_type->decompose(str));
|
||||
if (i != map.end()) {
|
||||
return i->second;
|
||||
}
|
||||
}
|
||||
|
||||
auto raw = parse_raw(str);
|
||||
auto cql = raw->prepare_internal(keyspace, user_types);
|
||||
|
||||
@@ -57,7 +57,7 @@ void data_listeners::on_write(const schema_ptr& s, const frozen_mutation& m) {
|
||||
}
|
||||
}
|
||||
|
||||
toppartitons_item_key::operator sstring() const {
|
||||
toppartitions_item_key::operator sstring() const {
|
||||
std::ostringstream oss;
|
||||
oss << key.key().with_schema(*schema);
|
||||
return oss.str();
|
||||
@@ -84,8 +84,11 @@ flat_mutation_reader toppartitions_data_listener::on_read(const schema_ptr& s, c
|
||||
return std::move(rd);
|
||||
}
|
||||
dblog.trace("toppartitions_data_listener::on_read: {}.{}", s->ks_name(), s->cf_name());
|
||||
return make_filtering_reader(std::move(rd), [this, &range, &slice, s = std::move(s)] (const dht::decorated_key& dk) {
|
||||
_top_k_read.append(toppartitons_item_key{s, dk});
|
||||
return make_filtering_reader(std::move(rd), [zis = this->weak_from_this(), &range, &slice, s = std::move(s)] (const dht::decorated_key& dk) {
|
||||
// The data query may be executing after the toppartitions_data_listener object has been removed, so check
|
||||
if (zis) {
|
||||
zis->_top_k_read.append(toppartitions_item_key{s, dk});
|
||||
}
|
||||
return true;
|
||||
});
|
||||
}
|
||||
@@ -95,7 +98,27 @@ void toppartitions_data_listener::on_write(const schema_ptr& s, const frozen_mut
|
||||
return;
|
||||
}
|
||||
dblog.trace("toppartitions_data_listener::on_write: {}.{}", _ks, _cf);
|
||||
_top_k_write.append(toppartitons_item_key{s, m.decorated_key(*s)});
|
||||
_top_k_write.append(toppartitions_item_key{s, m.decorated_key(*s)});
|
||||
}
|
||||
|
||||
toppartitions_data_listener::global_top_k::results
|
||||
toppartitions_data_listener::globalize(top_k::results&& r) {
|
||||
toppartitions_data_listener::global_top_k::results n;
|
||||
n.reserve(r.size());
|
||||
for (auto&& e : r) {
|
||||
n.emplace_back(global_top_k::results::value_type{toppartitions_global_item_key(std::move(e.item)), e.count, e.error});
|
||||
}
|
||||
return n;
|
||||
}
|
||||
|
||||
toppartitions_data_listener::top_k::results
|
||||
toppartitions_data_listener::localize(const global_top_k::results& r) {
|
||||
toppartitions_data_listener::top_k::results n;
|
||||
n.reserve(r.size());
|
||||
for (auto&& e : r) {
|
||||
n.emplace_back(top_k::results::value_type{toppartitions_item_key(e.item), e.count, e.error});
|
||||
}
|
||||
return n;
|
||||
}
|
||||
|
||||
toppartitions_query::toppartitions_query(distributed<database>& xdb, sstring ks, sstring cf,
|
||||
@@ -108,20 +131,20 @@ future<> toppartitions_query::scatter() {
|
||||
return _query.start(std::ref(_xdb), _ks, _cf);
|
||||
}
|
||||
|
||||
using top_t = toppartitions_data_listener::top_k::results;
|
||||
using top_t = toppartitions_data_listener::global_top_k::results;
|
||||
|
||||
future<toppartitions_query::results> toppartitions_query::gather(unsigned res_size) {
|
||||
dblog.debug("toppartitions_query::gather");
|
||||
|
||||
auto map = [res_size, this] (toppartitions_data_listener& listener) {
|
||||
dblog.trace("toppartitions_query::map_reduce with listener {}", &listener);
|
||||
top_t rd = listener._top_k_read.top(res_size);
|
||||
top_t wr = listener._top_k_write.top(res_size);
|
||||
return std::tuple<top_t, top_t>{std::move(rd), std::move(wr)};
|
||||
top_t rd = toppartitions_data_listener::globalize(listener._top_k_read.top(res_size));
|
||||
top_t wr = toppartitions_data_listener::globalize(listener._top_k_write.top(res_size));
|
||||
return make_foreign(std::make_unique<std::tuple<top_t, top_t>>(std::move(rd), std::move(wr)));
|
||||
};
|
||||
auto reduce = [this] (results res, std::tuple<top_t, top_t> rd_wr) {
|
||||
res.read.append(std::get<0>(rd_wr));
|
||||
res.write.append(std::get<1>(rd_wr));
|
||||
auto reduce = [this] (results res, foreign_ptr<std::unique_ptr<std::tuple<top_t, top_t>>> rd_wr) {
|
||||
res.read.append(toppartitions_data_listener::localize(std::get<0>(*rd_wr)));
|
||||
res.write.append(toppartitions_data_listener::localize(std::get<1>(*rd_wr)));
|
||||
return std::move(res);
|
||||
};
|
||||
return _query.map_reduce0(map, results{res_size}, reduce)
|
||||
|
||||
@@ -24,12 +24,14 @@
|
||||
#include <seastar/core/distributed.hh>
|
||||
#include <seastar/core/future.hh>
|
||||
#include <seastar/core/distributed.hh>
|
||||
#include <seastar/core/weak_ptr.hh>
|
||||
|
||||
#include "schema.hh"
|
||||
#include "flat_mutation_reader.hh"
|
||||
#include "mutation_reader.hh"
|
||||
#include "frozen_mutation.hh"
|
||||
#include "utils/top_k.hh"
|
||||
#include "schema_registry.hh"
|
||||
|
||||
#include <vector>
|
||||
#include <set>
|
||||
@@ -75,29 +77,54 @@ public:
|
||||
};
|
||||
|
||||
|
||||
struct toppartitons_item_key {
|
||||
struct toppartitions_item_key {
|
||||
schema_ptr schema;
|
||||
dht::decorated_key key;
|
||||
|
||||
toppartitons_item_key(const schema_ptr& schema, const dht::decorated_key& key) : schema(schema), key(key) {}
|
||||
toppartitons_item_key(const toppartitons_item_key& key) noexcept : schema(key.schema), key(key.key) {}
|
||||
toppartitions_item_key(const schema_ptr& schema, const dht::decorated_key& key) : schema(schema), key(key) {}
|
||||
toppartitions_item_key(const toppartitions_item_key& key) noexcept : schema(key.schema), key(key.key) {}
|
||||
|
||||
struct hash {
|
||||
size_t operator()(const toppartitons_item_key& k) const {
|
||||
size_t operator()(const toppartitions_item_key& k) const {
|
||||
return std::hash<dht::token>()(k.key.token());
|
||||
}
|
||||
};
|
||||
|
||||
struct comp {
|
||||
bool operator()(const toppartitons_item_key& k1, const toppartitons_item_key& k2) const {
|
||||
return k1.schema == k2.schema && k1.key.equal(*k2.schema, k2.key);
|
||||
bool operator()(const toppartitions_item_key& k1, const toppartitions_item_key& k2) const {
|
||||
return k1.schema->id() == k2.schema->id() && k1.key.equal(*k2.schema, k2.key);
|
||||
}
|
||||
};
|
||||
|
||||
explicit operator sstring() const;
|
||||
};
|
||||
|
||||
class toppartitions_data_listener : public data_listener {
|
||||
// Like toppartitions_item_key, but uses global_schema_ptr, so can be safely transported across shards
|
||||
struct toppartitions_global_item_key {
|
||||
global_schema_ptr schema;
|
||||
dht::decorated_key key;
|
||||
|
||||
toppartitions_global_item_key(toppartitions_item_key&& tik) : schema(std::move(tik.schema)), key(std::move(tik.key)) {}
|
||||
operator toppartitions_item_key() const {
|
||||
return toppartitions_item_key(schema, key);
|
||||
}
|
||||
|
||||
struct hash {
|
||||
size_t operator()(const toppartitions_global_item_key& k) const {
|
||||
return std::hash<dht::token>()(k.key.token());
|
||||
}
|
||||
};
|
||||
|
||||
struct comp {
|
||||
bool operator()(const toppartitions_global_item_key& k1, const toppartitions_global_item_key& k2) const {
|
||||
return k1.schema.get()->id() == k2.schema.get()->id() && k1.key.equal(*k2.schema.get(), k2.key);
|
||||
}
|
||||
};
|
||||
|
||||
explicit operator sstring() const;
|
||||
};
|
||||
|
||||
class toppartitions_data_listener : public data_listener, public weakly_referencable<toppartitions_data_listener> {
|
||||
friend class toppartitions_query;
|
||||
|
||||
database& _db;
|
||||
@@ -105,7 +132,11 @@ class toppartitions_data_listener : public data_listener {
|
||||
sstring _cf;
|
||||
|
||||
public:
|
||||
using top_k = utils::space_saving_top_k<toppartitons_item_key, toppartitons_item_key::hash, toppartitons_item_key::comp>;
|
||||
using top_k = utils::space_saving_top_k<toppartitions_item_key, toppartitions_item_key::hash, toppartitions_item_key::comp>;
|
||||
using global_top_k = utils::space_saving_top_k<toppartitions_global_item_key, toppartitions_global_item_key::hash, toppartitions_global_item_key::comp>;
|
||||
public:
|
||||
static global_top_k::results globalize(top_k::results&& r);
|
||||
static top_k::results localize(const global_top_k::results& r);
|
||||
private:
|
||||
top_k _top_k_read;
|
||||
top_k _top_k_write;
|
||||
|
||||
@@ -118,8 +118,8 @@ future<> manager::stop() {
|
||||
|
||||
return _draining_eps_gate.close().finally([this] {
|
||||
return parallel_for_each(_ep_managers, [] (auto& pair) {
|
||||
return pair.second.stop();
|
||||
}).finally([this] {
|
||||
return pair.second.stop();
|
||||
}).finally([this] {
|
||||
_ep_managers.clear();
|
||||
manager_logger.info("Stopped");
|
||||
}).discard_result();
|
||||
@@ -240,6 +240,8 @@ future<> manager::end_point_hints_manager::stop(drain should_drain) noexcept {
|
||||
manager::end_point_hints_manager::end_point_hints_manager(const key_type& key, manager& shard_manager)
|
||||
: _key(key)
|
||||
, _shard_manager(shard_manager)
|
||||
, _file_update_mutex_ptr(make_lw_shared<seastar::shared_mutex>())
|
||||
, _file_update_mutex(*_file_update_mutex_ptr)
|
||||
, _state(state_set::of<state::stopped>())
|
||||
, _hints_dir(_shard_manager.hints_dir() / format("{}", _key).c_str())
|
||||
, _sender(*this, _shard_manager.local_storage_proxy(), _shard_manager.local_db(), _shard_manager.local_gossiper())
|
||||
@@ -248,6 +250,8 @@ manager::end_point_hints_manager::end_point_hints_manager(const key_type& key, m
|
||||
manager::end_point_hints_manager::end_point_hints_manager(end_point_hints_manager&& other)
|
||||
: _key(other._key)
|
||||
, _shard_manager(other._shard_manager)
|
||||
, _file_update_mutex_ptr(std::move(other._file_update_mutex_ptr))
|
||||
, _file_update_mutex(*_file_update_mutex_ptr)
|
||||
, _state(other._state)
|
||||
, _hints_dir(std::move(other._hints_dir))
|
||||
, _sender(other._sender, *this)
|
||||
@@ -520,28 +524,35 @@ void manager::drain_for(gms::inet_address endpoint) {
|
||||
manager_logger.trace("on_leave_cluster: {} is removed/decommissioned", endpoint);
|
||||
|
||||
with_gate(_draining_eps_gate, [this, endpoint] {
|
||||
return futurize_apply([this, endpoint] () {
|
||||
if (utils::fb_utilities::is_me(endpoint)) {
|
||||
return parallel_for_each(_ep_managers, [] (auto& pair) {
|
||||
return pair.second.stop(drain::yes).finally([&pair] {
|
||||
return remove_file(pair.second.hints_dir().c_str());
|
||||
return with_semaphore(drain_lock(), 1, [this, endpoint] {
|
||||
return futurize_apply([this, endpoint] () {
|
||||
if (utils::fb_utilities::is_me(endpoint)) {
|
||||
return parallel_for_each(_ep_managers, [] (auto& pair) {
|
||||
return pair.second.stop(drain::yes).finally([&pair] {
|
||||
return with_file_update_mutex(pair.second, [&pair] {
|
||||
return remove_file(pair.second.hints_dir().c_str());
|
||||
});
|
||||
});
|
||||
}).finally([this] {
|
||||
_ep_managers.clear();
|
||||
});
|
||||
}).finally([this] {
|
||||
_ep_managers.clear();
|
||||
});
|
||||
} else {
|
||||
ep_managers_map_type::iterator ep_manager_it = find_ep_manager(endpoint);
|
||||
if (ep_manager_it != ep_managers_end()) {
|
||||
return ep_manager_it->second.stop(drain::yes).finally([this, endpoint, hints_dir = ep_manager_it->second.hints_dir()] {
|
||||
_ep_managers.erase(endpoint);
|
||||
return remove_file(hints_dir.c_str());
|
||||
});
|
||||
}
|
||||
} else {
|
||||
ep_managers_map_type::iterator ep_manager_it = find_ep_manager(endpoint);
|
||||
if (ep_manager_it != ep_managers_end()) {
|
||||
return ep_manager_it->second.stop(drain::yes).finally([this, endpoint, &ep_man = ep_manager_it->second] {
|
||||
return with_file_update_mutex(ep_man, [&ep_man] {
|
||||
return remove_file(ep_man.hints_dir().c_str());
|
||||
}).finally([this, endpoint] {
|
||||
_ep_managers.erase(endpoint);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
return make_ready_future<>();
|
||||
}
|
||||
}).handle_exception([endpoint] (auto eptr) {
|
||||
manager_logger.error("Exception when draining {}: {}", endpoint, eptr);
|
||||
return make_ready_future<>();
|
||||
}
|
||||
}).handle_exception([endpoint] (auto eptr) {
|
||||
manager_logger.error("Exception when draining {}: {}", endpoint, eptr);
|
||||
});
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
@@ -276,7 +276,8 @@ public:
|
||||
manager& _shard_manager;
|
||||
hints_store_ptr _hints_store_anchor;
|
||||
seastar::gate _store_gate;
|
||||
seastar::shared_mutex _file_update_mutex;
|
||||
lw_shared_ptr<seastar::shared_mutex> _file_update_mutex_ptr;
|
||||
seastar::shared_mutex& _file_update_mutex;
|
||||
|
||||
enum class state {
|
||||
can_hint, // hinting is currently allowed (used by the space_watchdog)
|
||||
@@ -378,8 +379,20 @@ public:
|
||||
return _state.contains(state::stopped);
|
||||
}
|
||||
|
||||
seastar::shared_mutex& file_update_mutex() {
|
||||
return _file_update_mutex;
|
||||
/// \brief Safely runs a given functor under the file_update_mutex of \ref ep_man
|
||||
///
|
||||
/// Runs a given functor under the file_update_mutex of the given end_point_hints_manager instance.
|
||||
/// This function is safe even if \ref ep_man gets destroyed before the future this function returns resolves
|
||||
/// (as long as the \ref func call itself is safe).
|
||||
///
|
||||
/// \tparam Func Functor type.
|
||||
/// \param ep_man end_point_hints_manager instance which file_update_mutex we want to lock.
|
||||
/// \param func Functor to run under the lock.
|
||||
/// \return Whatever \ref func returns.
|
||||
template <typename Func>
|
||||
friend inline auto with_file_update_mutex(end_point_hints_manager& ep_man, Func&& func) {
|
||||
lw_shared_ptr<seastar::shared_mutex> lock_ptr = ep_man._file_update_mutex_ptr;
|
||||
return with_lock(*lock_ptr, std::forward<Func>(func)).finally([lock_ptr] {});
|
||||
}
|
||||
|
||||
const fs::path& hints_dir() const noexcept {
|
||||
@@ -387,6 +400,10 @@ public:
|
||||
}
|
||||
|
||||
private:
|
||||
seastar::shared_mutex& file_update_mutex() noexcept {
|
||||
return _file_update_mutex;
|
||||
}
|
||||
|
||||
/// \brief Creates a new hints store object.
|
||||
///
|
||||
/// - Creates a hints store directory if doesn't exist: <shard_hints_dir>/<ep_key>
|
||||
@@ -453,6 +470,7 @@ private:
|
||||
stats _stats;
|
||||
seastar::metrics::metric_groups _metrics;
|
||||
std::unordered_set<ep_key_type> _eps_with_pending_hints;
|
||||
seastar::semaphore _drain_lock = {1};
|
||||
|
||||
public:
|
||||
manager(sstring hints_directory, std::vector<sstring> hinted_dcs, int64_t max_hint_window_ms, resource_manager&res_manager, distributed<database>& db);
|
||||
@@ -531,6 +549,10 @@ public:
|
||||
return _hints_dir_device_id;
|
||||
}
|
||||
|
||||
seastar::semaphore& drain_lock() noexcept {
|
||||
return _drain_lock;
|
||||
}
|
||||
|
||||
void allow_hints();
|
||||
void forbid_hints();
|
||||
void forbid_hints_for_eps_with_pending_hints();
|
||||
|
||||
@@ -89,16 +89,27 @@ future<> space_watchdog::stop() noexcept {
|
||||
return std::move(_started);
|
||||
}
|
||||
|
||||
// Called under the end_point_hints_manager::file_update_mutex() of the corresponding end_point_hints_manager instance.
|
||||
future<> space_watchdog::scan_one_ep_dir(fs::path path, manager& shard_manager, ep_key_type ep_key) {
|
||||
return lister::scan_dir(path, { directory_entry_type::regular }, [this, ep_key, &shard_manager] (fs::path dir, directory_entry de) {
|
||||
// Put the current end point ID to state.eps_with_pending_hints when we see the second hints file in its directory
|
||||
if (_files_count == 1) {
|
||||
shard_manager.add_ep_with_pending_hints(ep_key);
|
||||
}
|
||||
++_files_count;
|
||||
return do_with(std::move(path), [this, ep_key, &shard_manager] (fs::path& path) {
|
||||
// It may happen that we get here and the directory has already been deleted in the context of manager::drain_for().
|
||||
// In this case simply bail out.
|
||||
return engine().file_exists(path.native()).then([this, ep_key, &shard_manager, &path] (bool exists) {
|
||||
if (!exists) {
|
||||
return make_ready_future<>();
|
||||
} else {
|
||||
return lister::scan_dir(path, { directory_entry_type::regular }, [this, ep_key, &shard_manager] (fs::path dir, directory_entry de) {
|
||||
// Put the current end point ID to state.eps_with_pending_hints when we see the second hints file in its directory
|
||||
if (_files_count == 1) {
|
||||
shard_manager.add_ep_with_pending_hints(ep_key);
|
||||
}
|
||||
++_files_count;
|
||||
|
||||
return io_check(file_size, (dir / de.name.c_str()).c_str()).then([this] (uint64_t fsize) {
|
||||
_total_size += fsize;
|
||||
return io_check(file_size, (dir / de.name.c_str()).c_str()).then([this] (uint64_t fsize) {
|
||||
_total_size += fsize;
|
||||
});
|
||||
});
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
@@ -136,7 +147,7 @@ void space_watchdog::on_timer() {
|
||||
// continue to enumeration - there is no one to change them.
|
||||
auto it = shard_manager.find_ep_manager(de.name);
|
||||
if (it != shard_manager.ep_managers_end()) {
|
||||
return with_lock(it->second.file_update_mutex(), [this, &shard_manager, dir = std::move(dir), ep_name = std::move(de.name)]() mutable {
|
||||
return with_file_update_mutex(it->second, [this, &shard_manager, dir = std::move(dir), ep_name = std::move(de.name)] () mutable {
|
||||
return scan_one_ep_dir(dir / ep_name, shard_manager, ep_key_type(ep_name));
|
||||
});
|
||||
} else {
|
||||
|
||||
@@ -26,11 +26,17 @@
|
||||
namespace db {
|
||||
|
||||
enum class schema_feature {
|
||||
VIEW_VIRTUAL_COLUMNS
|
||||
VIEW_VIRTUAL_COLUMNS,
|
||||
|
||||
// When set, the schema digest is calcualted in a way such that it doesn't change after all
|
||||
// tombstones in an empty partition expire.
|
||||
// See https://github.com/scylladb/scylla/issues/4485
|
||||
DIGEST_INSENSITIVE_TO_EXPIRY,
|
||||
};
|
||||
|
||||
using schema_features = enum_set<super_enum<schema_feature,
|
||||
schema_feature::VIEW_VIRTUAL_COLUMNS
|
||||
schema_feature::VIEW_VIRTUAL_COLUMNS,
|
||||
schema_feature::DIGEST_INSENSITIVE_TO_EXPIRY
|
||||
>>;
|
||||
|
||||
}
|
||||
|
||||
@@ -587,9 +587,9 @@ future<utils::UUID> calculate_schema_digest(distributed<service::storage_proxy>&
|
||||
return mutations;
|
||||
});
|
||||
};
|
||||
auto reduce = [] (auto& hash, auto&& mutations) {
|
||||
auto reduce = [features] (auto& hash, auto&& mutations) {
|
||||
for (const mutation& m : mutations) {
|
||||
feed_hash_for_schema_digest(hash, m);
|
||||
feed_hash_for_schema_digest(hash, m, features);
|
||||
}
|
||||
};
|
||||
return do_with(md5_hasher(), all_table_names(features), [features, map, reduce] (auto& hash, auto& tables) {
|
||||
@@ -778,6 +778,13 @@ mutation compact_for_schema_digest(const mutation& m) {
|
||||
return m_compacted;
|
||||
}
|
||||
|
||||
void feed_hash_for_schema_digest(hasher& h, const mutation& m, schema_features features) {
|
||||
auto compacted = compact_for_schema_digest(m);
|
||||
if (!features.contains<schema_feature::DIGEST_INSENSITIVE_TO_EXPIRY>() || !compacted.partition().empty()) {
|
||||
feed_hash(h, compact_for_schema_digest(m));
|
||||
}
|
||||
}
|
||||
|
||||
// Applies deletion of the "version" column to a system_schema.scylla_tables mutation.
|
||||
static void delete_schema_version(mutation& m) {
|
||||
if (m.column_family_id() != scylla_tables()->id()) {
|
||||
@@ -2727,8 +2734,9 @@ namespace legacy {
|
||||
|
||||
table_schema_version schema_mutations::digest() const {
|
||||
md5_hasher h;
|
||||
db::schema_tables::feed_hash_for_schema_digest(h, _columnfamilies);
|
||||
db::schema_tables::feed_hash_for_schema_digest(h, _columns);
|
||||
const db::schema_features no_features;
|
||||
db::schema_tables::feed_hash_for_schema_digest(h, _columnfamilies, no_features);
|
||||
db::schema_tables::feed_hash_for_schema_digest(h, _columns, no_features);
|
||||
return utils::UUID_gen::get_name_UUID(h.finalize());
|
||||
}
|
||||
|
||||
|
||||
@@ -215,10 +215,7 @@ index_metadata_kind deserialize_index_kind(sstring kind);
|
||||
|
||||
mutation compact_for_schema_digest(const mutation& m);
|
||||
|
||||
template<typename Hasher>
|
||||
void feed_hash_for_schema_digest(Hasher& h, const mutation& m) {
|
||||
feed_hash(h, compact_for_schema_digest(m));
|
||||
}
|
||||
void feed_hash_for_schema_digest(hasher&, const mutation&, schema_features);
|
||||
|
||||
} // namespace schema_tables
|
||||
} // namespace db
|
||||
|
||||
328
db/size_estimates_virtual_reader.cc
Normal file
328
db/size_estimates_virtual_reader.cc
Normal file
@@ -0,0 +1,328 @@
|
||||
/*
|
||||
* Copyright (C) 2019 ScyllaDB
|
||||
*
|
||||
* Modified by ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file is part of Scylla.
|
||||
*
|
||||
* Scylla is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Scylla is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <boost/range/adaptor/indirected.hpp>
|
||||
#include <boost/range/adaptor/map.hpp>
|
||||
#include <boost/range/adaptor/transformed.hpp>
|
||||
#include <boost/range/algorithm/find_if.hpp>
|
||||
|
||||
#include "clustering_bounds_comparator.hh"
|
||||
#include "database_fwd.hh"
|
||||
#include "db/system_keyspace.hh"
|
||||
#include "dht/i_partitioner.hh"
|
||||
#include "partition_range_compat.hh"
|
||||
#include "range.hh"
|
||||
#include "service/storage_service.hh"
|
||||
#include "mutation_fragment.hh"
|
||||
#include "sstables/sstables.hh"
|
||||
#include "db/timeout_clock.hh"
|
||||
#include "database.hh"
|
||||
|
||||
#include "db/size_estimates_virtual_reader.hh"
|
||||
|
||||
namespace db {
|
||||
|
||||
namespace size_estimates {
|
||||
|
||||
struct virtual_row {
|
||||
const bytes& cf_name;
|
||||
const token_range& tokens;
|
||||
clustering_key_prefix as_key() const {
|
||||
return clustering_key_prefix::from_exploded(std::vector<bytes_view>{cf_name, tokens.start, tokens.end});
|
||||
}
|
||||
};
|
||||
|
||||
struct virtual_row_comparator {
|
||||
schema_ptr _schema;
|
||||
virtual_row_comparator(schema_ptr schema) : _schema(schema) { }
|
||||
bool operator()(const clustering_key_prefix& key1, const clustering_key_prefix& key2) {
|
||||
return clustering_key_prefix::prefix_equality_less_compare(*_schema)(key1, key2);
|
||||
}
|
||||
bool operator()(const virtual_row& row, const clustering_key_prefix& key) {
|
||||
return operator()(row.as_key(), key);
|
||||
}
|
||||
bool operator()(const clustering_key_prefix& key, const virtual_row& row) {
|
||||
return operator()(key, row.as_key());
|
||||
}
|
||||
};
|
||||
|
||||
// Iterating over the cartesian product of cf_names and token_ranges.
|
||||
class virtual_row_iterator : public std::iterator<std::input_iterator_tag, const virtual_row> {
|
||||
std::reference_wrapper<const std::vector<bytes>> _cf_names;
|
||||
std::reference_wrapper<const std::vector<token_range>> _ranges;
|
||||
size_t _cf_names_idx = 0;
|
||||
size_t _ranges_idx = 0;
|
||||
public:
|
||||
struct end_iterator_tag {};
|
||||
virtual_row_iterator(const std::vector<bytes>& cf_names, const std::vector<token_range>& ranges)
|
||||
: _cf_names(std::ref(cf_names))
|
||||
, _ranges(std::ref(ranges))
|
||||
{ }
|
||||
virtual_row_iterator(const std::vector<bytes>& cf_names, const std::vector<token_range>& ranges, end_iterator_tag)
|
||||
: _cf_names(std::ref(cf_names))
|
||||
, _ranges(std::ref(ranges))
|
||||
, _cf_names_idx(cf_names.size())
|
||||
, _ranges_idx(ranges.size())
|
||||
{
|
||||
if (cf_names.empty() || ranges.empty()) {
|
||||
// The product of an empty range with any range is an empty range.
|
||||
// In this case we want the end iterator to be equal to the begin iterator,
|
||||
// which has_ranges_idx = _cf_names_idx = 0.
|
||||
_ranges_idx = _cf_names_idx = 0;
|
||||
}
|
||||
}
|
||||
virtual_row_iterator& operator++() {
|
||||
if (++_ranges_idx == _ranges.get().size() && ++_cf_names_idx < _cf_names.get().size()) {
|
||||
_ranges_idx = 0;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
virtual_row_iterator operator++(int) {
|
||||
virtual_row_iterator i(*this);
|
||||
++(*this);
|
||||
return i;
|
||||
}
|
||||
const value_type operator*() const {
|
||||
return { _cf_names.get()[_cf_names_idx], _ranges.get()[_ranges_idx] };
|
||||
}
|
||||
bool operator==(const virtual_row_iterator& i) const {
|
||||
return _cf_names_idx == i._cf_names_idx
|
||||
&& _ranges_idx == i._ranges_idx;
|
||||
}
|
||||
bool operator!=(const virtual_row_iterator& i) const {
|
||||
return !(*this == i);
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Returns the keyspaces, ordered by name, as selected by the partition_range.
|
||||
*/
|
||||
static std::vector<sstring> get_keyspaces(const schema& s, const database& db, dht::partition_range range) {
|
||||
struct keyspace_less_comparator {
|
||||
const schema& _s;
|
||||
keyspace_less_comparator(const schema& s) : _s(s) { }
|
||||
dht::ring_position as_ring_position(const sstring& ks) {
|
||||
auto pkey = partition_key::from_single_value(_s, utf8_type->decompose(ks));
|
||||
return dht::global_partitioner().decorate_key(_s, std::move(pkey));
|
||||
}
|
||||
bool operator()(const sstring& ks1, const sstring& ks2) {
|
||||
return as_ring_position(ks1).less_compare(_s, as_ring_position(ks2));
|
||||
}
|
||||
bool operator()(const sstring& ks, const dht::ring_position& rp) {
|
||||
return as_ring_position(ks).less_compare(_s, rp);
|
||||
}
|
||||
bool operator()(const dht::ring_position& rp, const sstring& ks) {
|
||||
return rp.less_compare(_s, as_ring_position(ks));
|
||||
}
|
||||
};
|
||||
auto keyspaces = db.get_non_system_keyspaces();
|
||||
auto cmp = keyspace_less_comparator(s);
|
||||
boost::sort(keyspaces, cmp);
|
||||
return boost::copy_range<std::vector<sstring>>(
|
||||
range.slice(keyspaces, std::move(cmp)) | boost::adaptors::filtered([&s] (const auto& ks) {
|
||||
// If this is a range query, results are divided between shards by the partition key (keyspace_name).
|
||||
return shard_of(dht::global_partitioner().get_token(s,
|
||||
partition_key::from_single_value(s, utf8_type->decompose(ks))))
|
||||
== engine().cpu_id();
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Makes a wrapping range of ring_position from a nonwrapping range of token, used to select sstables.
|
||||
*/
|
||||
static dht::partition_range as_ring_position_range(dht::token_range& r) {
|
||||
std::optional<range<dht::ring_position>::bound> start_bound, end_bound;
|
||||
if (r.start()) {
|
||||
start_bound = {{ dht::ring_position(r.start()->value(), dht::ring_position::token_bound::start), r.start()->is_inclusive() }};
|
||||
}
|
||||
if (r.end()) {
|
||||
end_bound = {{ dht::ring_position(r.end()->value(), dht::ring_position::token_bound::end), r.end()->is_inclusive() }};
|
||||
}
|
||||
return dht::partition_range(std::move(start_bound), std::move(end_bound), r.is_singular());
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a new range_estimates for the specified range, considering the sstables associated with `cf`.
|
||||
*/
|
||||
static system_keyspace::range_estimates estimate(const column_family& cf, const token_range& r) {
|
||||
int64_t count{0};
|
||||
utils::estimated_histogram hist{0};
|
||||
auto from_bytes = [] (auto& b) {
|
||||
return dht::global_partitioner().from_sstring(utf8_type->to_string(b));
|
||||
};
|
||||
dht::token_range_vector ranges;
|
||||
::compat::unwrap_into(
|
||||
wrapping_range<dht::token>({{ from_bytes(r.start), false }}, {{ from_bytes(r.end) }}),
|
||||
dht::token_comparator(),
|
||||
[&] (auto&& rng) { ranges.push_back(std::move(rng)); });
|
||||
for (auto&& r : ranges) {
|
||||
auto rp_range = as_ring_position_range(r);
|
||||
for (auto&& sstable : cf.select_sstables(rp_range)) {
|
||||
count += sstable->estimated_keys_for_range(r);
|
||||
hist.merge(sstable->get_stats_metadata().estimated_partition_size);
|
||||
}
|
||||
}
|
||||
return {cf.schema(), r.start, r.end, count, count > 0 ? hist.mean() : 0};
|
||||
}
|
||||
|
||||
future<std::vector<token_range>> get_local_ranges() {
|
||||
auto& ss = service::get_local_storage_service();
|
||||
return ss.get_local_tokens().then([&ss] (auto&& tokens) {
|
||||
auto ranges = ss.get_token_metadata().get_primary_ranges_for(std::move(tokens));
|
||||
std::vector<token_range> local_ranges;
|
||||
auto to_bytes = [](const std::optional<dht::token_range::bound>& b) {
|
||||
assert(b);
|
||||
return utf8_type->decompose(dht::global_partitioner().to_sstring(b->value()));
|
||||
};
|
||||
// We merge the ranges to be compatible with how Cassandra shows it's size estimates table.
|
||||
// All queries will be on that table, where all entries are text and there's no notion of
|
||||
// token ranges form the CQL point of view.
|
||||
auto left_inf = boost::find_if(ranges, [] (auto&& r) {
|
||||
return !r.start() || r.start()->value() == dht::minimum_token();
|
||||
});
|
||||
auto right_inf = boost::find_if(ranges, [] (auto&& r) {
|
||||
return !r.end() || r.start()->value() == dht::maximum_token();
|
||||
});
|
||||
if (left_inf != right_inf && left_inf != ranges.end() && right_inf != ranges.end()) {
|
||||
local_ranges.push_back(token_range{to_bytes(right_inf->start()), to_bytes(left_inf->end())});
|
||||
ranges.erase(left_inf);
|
||||
ranges.erase(right_inf);
|
||||
}
|
||||
for (auto&& r : ranges) {
|
||||
local_ranges.push_back(token_range{to_bytes(r.start()), to_bytes(r.end())});
|
||||
}
|
||||
boost::sort(local_ranges, [] (auto&& tr1, auto&& tr2) {
|
||||
return utf8_type->less(tr1.start, tr2.start);
|
||||
});
|
||||
return local_ranges;
|
||||
});
|
||||
}
|
||||
|
||||
size_estimates_mutation_reader::size_estimates_mutation_reader(schema_ptr schema, const dht::partition_range& prange, const query::partition_slice& slice, streamed_mutation::forwarding fwd)
|
||||
: impl(schema)
|
||||
, _schema(std::move(schema))
|
||||
, _prange(&prange)
|
||||
, _slice(slice)
|
||||
, _fwd(fwd)
|
||||
{ }
|
||||
|
||||
future<> size_estimates_mutation_reader::get_next_partition() {
|
||||
auto& db = service::get_local_storage_proxy().get_db().local();
|
||||
if (!_keyspaces) {
|
||||
_keyspaces = get_keyspaces(*_schema, db, *_prange);
|
||||
_current_partition = _keyspaces->begin();
|
||||
}
|
||||
if (_current_partition == _keyspaces->end()) {
|
||||
_end_of_stream = true;
|
||||
return make_ready_future<>();
|
||||
}
|
||||
return get_local_ranges().then([&db, this] (auto&& ranges) {
|
||||
auto estimates = this->estimates_for_current_keyspace(db, std::move(ranges));
|
||||
auto mutations = db::system_keyspace::make_size_estimates_mutation(*_current_partition, std::move(estimates));
|
||||
++_current_partition;
|
||||
std::vector<mutation> ms;
|
||||
ms.emplace_back(std::move(mutations));
|
||||
_partition_reader = flat_mutation_reader_from_mutations(std::move(ms), _fwd);
|
||||
});
|
||||
}
|
||||
|
||||
future<> size_estimates_mutation_reader::fill_buffer(db::timeout_clock::time_point timeout) {
|
||||
return do_until([this, timeout] { return is_end_of_stream() || is_buffer_full(); }, [this, timeout] {
|
||||
if (!_partition_reader) {
|
||||
return get_next_partition();
|
||||
}
|
||||
return _partition_reader->consume_pausable([this] (mutation_fragment mf) {
|
||||
push_mutation_fragment(std::move(mf));
|
||||
return stop_iteration(is_buffer_full());
|
||||
}, timeout).then([this] {
|
||||
if (_partition_reader->is_end_of_stream() && _partition_reader->is_buffer_empty()) {
|
||||
_partition_reader = std::nullopt;
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
void size_estimates_mutation_reader::next_partition() {
|
||||
clear_buffer_to_next_partition();
|
||||
if (is_buffer_empty()) {
|
||||
_partition_reader = std::nullopt;
|
||||
}
|
||||
}
|
||||
|
||||
future<> size_estimates_mutation_reader::fast_forward_to(const dht::partition_range& pr, db::timeout_clock::time_point timeout) {
|
||||
clear_buffer();
|
||||
_prange = ≺
|
||||
_keyspaces = std::nullopt;
|
||||
_partition_reader = std::nullopt;
|
||||
_end_of_stream = false;
|
||||
return make_ready_future<>();
|
||||
}
|
||||
|
||||
future<> size_estimates_mutation_reader::fast_forward_to(position_range pr, db::timeout_clock::time_point timeout) {
|
||||
forward_buffer_to(pr.start());
|
||||
_end_of_stream = false;
|
||||
if (_partition_reader) {
|
||||
return _partition_reader->fast_forward_to(std::move(pr), timeout);
|
||||
}
|
||||
return make_ready_future<>();
|
||||
}
|
||||
|
||||
size_t size_estimates_mutation_reader::buffer_size() const {
|
||||
if (_partition_reader) {
|
||||
return flat_mutation_reader::impl::buffer_size() + _partition_reader->buffer_size();
|
||||
}
|
||||
return flat_mutation_reader::impl::buffer_size();
|
||||
}
|
||||
|
||||
std::vector<db::system_keyspace::range_estimates>
|
||||
size_estimates_mutation_reader::estimates_for_current_keyspace(const database& db, std::vector<token_range> local_ranges) const {
|
||||
// For each specified range, estimate (crudely) mean partition size and partitions count.
|
||||
auto pkey = partition_key::from_single_value(*_schema, utf8_type->decompose(*_current_partition));
|
||||
auto cfs = db.find_keyspace(*_current_partition).metadata()->cf_meta_data();
|
||||
auto cf_names = boost::copy_range<std::vector<bytes>>(cfs | boost::adaptors::transformed([] (auto&& cf) {
|
||||
return utf8_type->decompose(cf.first);
|
||||
}));
|
||||
boost::sort(cf_names, [] (auto&& n1, auto&& n2) {
|
||||
return utf8_type->less(n1, n2);
|
||||
});
|
||||
std::vector<db::system_keyspace::range_estimates> estimates;
|
||||
for (auto& range : _slice.row_ranges(*_schema, pkey)) {
|
||||
auto rows = boost::make_iterator_range(
|
||||
virtual_row_iterator(cf_names, local_ranges),
|
||||
virtual_row_iterator(cf_names, local_ranges, virtual_row_iterator::end_iterator_tag()));
|
||||
auto rows_to_estimate = range.slice(rows, virtual_row_comparator(_schema));
|
||||
for (auto&& r : rows_to_estimate) {
|
||||
auto& cf = db.find_column_family(*_current_partition, utf8_type->to_string(r.cf_name));
|
||||
estimates.push_back(estimate(cf, r.tokens));
|
||||
if (estimates.size() >= _slice.partition_row_limit()) {
|
||||
return estimates;
|
||||
}
|
||||
}
|
||||
}
|
||||
return estimates;
|
||||
}
|
||||
|
||||
} // namespace size_estimates
|
||||
|
||||
} // namespace db
|
||||
@@ -21,33 +21,18 @@
|
||||
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <boost/range/adaptor/indirected.hpp>
|
||||
#include <boost/range/adaptor/map.hpp>
|
||||
#include <boost/range/adaptor/transformed.hpp>
|
||||
#include <boost/range/algorithm/find_if.hpp>
|
||||
|
||||
#include "clustering_bounds_comparator.hh"
|
||||
#include "database_fwd.hh"
|
||||
#include "db/system_keyspace.hh"
|
||||
#include "dht/i_partitioner.hh"
|
||||
#include "mutation_reader.hh"
|
||||
#include "partition_range_compat.hh"
|
||||
#include "range.hh"
|
||||
#include "service/storage_service.hh"
|
||||
#include "mutation_fragment.hh"
|
||||
#include "sstables/sstables.hh"
|
||||
#include "db/timeout_clock.hh"
|
||||
#include "database.hh"
|
||||
|
||||
namespace db {
|
||||
|
||||
namespace size_estimates {
|
||||
|
||||
struct token_range {
|
||||
bytes start;
|
||||
bytes end;
|
||||
};
|
||||
|
||||
class size_estimates_mutation_reader final : public flat_mutation_reader::impl {
|
||||
struct token_range {
|
||||
bytes start;
|
||||
bytes end;
|
||||
};
|
||||
schema_ptr _schema;
|
||||
const dht::partition_range* _prange;
|
||||
const query::partition_slice& _slice;
|
||||
@@ -57,267 +42,18 @@ class size_estimates_mutation_reader final : public flat_mutation_reader::impl {
|
||||
streamed_mutation::forwarding _fwd;
|
||||
flat_mutation_reader_opt _partition_reader;
|
||||
public:
|
||||
size_estimates_mutation_reader(schema_ptr schema, const dht::partition_range& prange, const query::partition_slice& slice, streamed_mutation::forwarding fwd)
|
||||
: impl(schema)
|
||||
, _schema(std::move(schema))
|
||||
, _prange(&prange)
|
||||
, _slice(slice)
|
||||
, _fwd(fwd)
|
||||
{ }
|
||||
size_estimates_mutation_reader(schema_ptr, const dht::partition_range&, const query::partition_slice&, streamed_mutation::forwarding);
|
||||
|
||||
virtual future<> fill_buffer(db::timeout_clock::time_point) override;
|
||||
virtual void next_partition() override;
|
||||
virtual future<> fast_forward_to(const dht::partition_range&, db::timeout_clock::time_point) override;
|
||||
virtual future<> fast_forward_to(position_range, db::timeout_clock::time_point) override;
|
||||
virtual size_t buffer_size() const override;
|
||||
private:
|
||||
future<> get_next_partition() {
|
||||
// For each specified range, estimate (crudely) mean partition size and partitions count.
|
||||
auto& db = service::get_local_storage_proxy().get_db().local();
|
||||
if (!_keyspaces) {
|
||||
_keyspaces = get_keyspaces(*_schema, db, *_prange);
|
||||
_current_partition = _keyspaces->begin();
|
||||
}
|
||||
if (_current_partition == _keyspaces->end()) {
|
||||
_end_of_stream = true;
|
||||
return make_ready_future<>();
|
||||
}
|
||||
return get_local_ranges().then([&db, this] (auto&& ranges) {
|
||||
auto estimates = this->estimates_for_current_keyspace(db, std::move(ranges));
|
||||
auto mutations = db::system_keyspace::make_size_estimates_mutation(*_current_partition, std::move(estimates));
|
||||
++_current_partition;
|
||||
std::vector<mutation> ms;
|
||||
ms.emplace_back(std::move(mutations));
|
||||
_partition_reader = flat_mutation_reader_from_mutations(std::move(ms), _fwd);
|
||||
});
|
||||
}
|
||||
public:
|
||||
virtual future<> fill_buffer(db::timeout_clock::time_point timeout) override {
|
||||
return do_until([this, timeout] { return is_end_of_stream() || is_buffer_full(); }, [this, timeout] {
|
||||
if (!_partition_reader) {
|
||||
return get_next_partition();
|
||||
}
|
||||
return _partition_reader->consume_pausable([this] (mutation_fragment mf) {
|
||||
push_mutation_fragment(std::move(mf));
|
||||
return stop_iteration(is_buffer_full());
|
||||
}, timeout).then([this] {
|
||||
if (_partition_reader->is_end_of_stream() && _partition_reader->is_buffer_empty()) {
|
||||
_partition_reader = std::nullopt;
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
virtual void next_partition() override {
|
||||
clear_buffer_to_next_partition();
|
||||
if (is_buffer_empty()) {
|
||||
_partition_reader = std::nullopt;
|
||||
}
|
||||
}
|
||||
virtual future<> fast_forward_to(const dht::partition_range& pr, db::timeout_clock::time_point timeout) override {
|
||||
clear_buffer();
|
||||
_prange = ≺
|
||||
_keyspaces = std::nullopt;
|
||||
_partition_reader = std::nullopt;
|
||||
_end_of_stream = false;
|
||||
return make_ready_future<>();
|
||||
}
|
||||
virtual future<> fast_forward_to(position_range pr, db::timeout_clock::time_point timeout) override {
|
||||
forward_buffer_to(pr.start());
|
||||
_end_of_stream = false;
|
||||
if (_partition_reader) {
|
||||
return _partition_reader->fast_forward_to(std::move(pr), timeout);
|
||||
}
|
||||
return make_ready_future<>();
|
||||
}
|
||||
virtual size_t buffer_size() const override {
|
||||
if (_partition_reader) {
|
||||
return flat_mutation_reader::impl::buffer_size() + _partition_reader->buffer_size();
|
||||
}
|
||||
return flat_mutation_reader::impl::buffer_size();
|
||||
}
|
||||
/**
|
||||
* Returns the primary ranges for the local node.
|
||||
* Used for testing as well.
|
||||
*/
|
||||
static future<std::vector<token_range>> get_local_ranges() {
|
||||
auto& ss = service::get_local_storage_service();
|
||||
return ss.get_local_tokens().then([&ss] (auto&& tokens) {
|
||||
auto ranges = ss.get_token_metadata().get_primary_ranges_for(std::move(tokens));
|
||||
std::vector<token_range> local_ranges;
|
||||
auto to_bytes = [](const std::optional<dht::token_range::bound>& b) {
|
||||
assert(b);
|
||||
return utf8_type->decompose(dht::global_partitioner().to_sstring(b->value()));
|
||||
};
|
||||
// We merge the ranges to be compatible with how Cassandra shows it's size estimates table.
|
||||
// All queries will be on that table, where all entries are text and there's no notion of
|
||||
// token ranges form the CQL point of view.
|
||||
auto left_inf = boost::find_if(ranges, [] (auto&& r) {
|
||||
return !r.start() || r.start()->value() == dht::minimum_token();
|
||||
});
|
||||
auto right_inf = boost::find_if(ranges, [] (auto&& r) {
|
||||
return !r.end() || r.start()->value() == dht::maximum_token();
|
||||
});
|
||||
if (left_inf != right_inf && left_inf != ranges.end() && right_inf != ranges.end()) {
|
||||
local_ranges.push_back(token_range{to_bytes(right_inf->start()), to_bytes(left_inf->end())});
|
||||
ranges.erase(left_inf);
|
||||
ranges.erase(right_inf);
|
||||
}
|
||||
for (auto&& r : ranges) {
|
||||
local_ranges.push_back(token_range{to_bytes(r.start()), to_bytes(r.end())});
|
||||
}
|
||||
boost::sort(local_ranges, [] (auto&& tr1, auto&& tr2) {
|
||||
return utf8_type->less(tr1.start, tr2.start);
|
||||
});
|
||||
return local_ranges;
|
||||
});
|
||||
}
|
||||
private:
|
||||
struct virtual_row {
|
||||
const bytes& cf_name;
|
||||
const token_range& tokens;
|
||||
clustering_key_prefix as_key() const {
|
||||
return clustering_key_prefix::from_exploded(std::vector<bytes_view>{cf_name, tokens.start, tokens.end});
|
||||
}
|
||||
};
|
||||
struct virtual_row_comparator {
|
||||
schema_ptr _schema;
|
||||
virtual_row_comparator(schema_ptr schema) : _schema(schema) { }
|
||||
bool operator()(const clustering_key_prefix& key1, const clustering_key_prefix& key2) {
|
||||
return clustering_key_prefix::prefix_equality_less_compare(*_schema)(key1, key2);
|
||||
}
|
||||
bool operator()(const virtual_row& row, const clustering_key_prefix& key) {
|
||||
return operator()(row.as_key(), key);
|
||||
}
|
||||
bool operator()(const clustering_key_prefix& key, const virtual_row& row) {
|
||||
return operator()(key, row.as_key());
|
||||
}
|
||||
};
|
||||
class virtual_row_iterator : public std::iterator<std::input_iterator_tag, const virtual_row> {
|
||||
std::reference_wrapper<const std::vector<bytes>> _cf_names;
|
||||
std::reference_wrapper<const std::vector<token_range>> _ranges;
|
||||
size_t _cf_names_idx = 0;
|
||||
size_t _ranges_idx = 0;
|
||||
public:
|
||||
struct end_iterator_tag {};
|
||||
virtual_row_iterator(const std::vector<bytes>& cf_names, const std::vector<token_range>& ranges)
|
||||
: _cf_names(std::ref(cf_names))
|
||||
, _ranges(std::ref(ranges))
|
||||
{ }
|
||||
virtual_row_iterator(const std::vector<bytes>& cf_names, const std::vector<token_range>& ranges, end_iterator_tag)
|
||||
: _cf_names(std::ref(cf_names))
|
||||
, _ranges(std::ref(ranges))
|
||||
, _cf_names_idx(cf_names.size())
|
||||
, _ranges_idx(ranges.size())
|
||||
{ }
|
||||
virtual_row_iterator& operator++() {
|
||||
if (++_ranges_idx == _ranges.get().size() && ++_cf_names_idx < _cf_names.get().size()) {
|
||||
_ranges_idx = 0;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
virtual_row_iterator operator++(int) {
|
||||
virtual_row_iterator i(*this);
|
||||
++(*this);
|
||||
return i;
|
||||
}
|
||||
const value_type operator*() const {
|
||||
return { _cf_names.get()[_cf_names_idx], _ranges.get()[_ranges_idx] };
|
||||
}
|
||||
bool operator==(const virtual_row_iterator& i) const {
|
||||
return _cf_names_idx == i._cf_names_idx
|
||||
&& _ranges_idx == i._ranges_idx;
|
||||
}
|
||||
bool operator!=(const virtual_row_iterator& i) const {
|
||||
return !(*this == i);
|
||||
}
|
||||
};
|
||||
future<> get_next_partition();
|
||||
|
||||
std::vector<db::system_keyspace::range_estimates>
|
||||
estimates_for_current_keyspace(const database& db, std::vector<token_range> local_ranges) const {
|
||||
auto pkey = partition_key::from_single_value(*_schema, utf8_type->decompose(*_current_partition));
|
||||
auto cfs = db.find_keyspace(*_current_partition).metadata()->cf_meta_data();
|
||||
auto cf_names = boost::copy_range<std::vector<bytes>>(cfs | boost::adaptors::transformed([] (auto&& cf) {
|
||||
return utf8_type->decompose(cf.first);
|
||||
}));
|
||||
boost::sort(cf_names, [] (auto&& n1, auto&& n2) {
|
||||
return utf8_type->less(n1, n2);
|
||||
});
|
||||
std::vector<db::system_keyspace::range_estimates> estimates;
|
||||
for (auto& range : _slice.row_ranges(*_schema, pkey)) {
|
||||
auto rows = boost::make_iterator_range(
|
||||
virtual_row_iterator(cf_names, local_ranges),
|
||||
virtual_row_iterator(cf_names, local_ranges, virtual_row_iterator::end_iterator_tag()));
|
||||
auto rows_to_estimate = range.slice(rows, virtual_row_comparator(_schema));
|
||||
for (auto&& r : rows_to_estimate) {
|
||||
auto& cf = db.find_column_family(*_current_partition, utf8_type->to_string(r.cf_name));
|
||||
estimates.push_back(estimate(cf, r.tokens));
|
||||
if (estimates.size() >= _slice.partition_row_limit()) {
|
||||
return estimates;
|
||||
}
|
||||
}
|
||||
}
|
||||
return estimates;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the keyspaces, ordered by name, as selected by the partition_range.
|
||||
*/
|
||||
static ks_range get_keyspaces(const schema& s, const database& db, dht::partition_range range) {
|
||||
struct keyspace_less_comparator {
|
||||
const schema& _s;
|
||||
keyspace_less_comparator(const schema& s) : _s(s) { }
|
||||
dht::ring_position as_ring_position(const sstring& ks) {
|
||||
auto pkey = partition_key::from_single_value(_s, utf8_type->decompose(ks));
|
||||
return dht::global_partitioner().decorate_key(_s, std::move(pkey));
|
||||
}
|
||||
bool operator()(const sstring& ks1, const sstring& ks2) {
|
||||
return as_ring_position(ks1).less_compare(_s, as_ring_position(ks2));
|
||||
}
|
||||
bool operator()(const sstring& ks, const dht::ring_position& rp) {
|
||||
return as_ring_position(ks).less_compare(_s, rp);
|
||||
}
|
||||
bool operator()(const dht::ring_position& rp, const sstring& ks) {
|
||||
return rp.less_compare(_s, as_ring_position(ks));
|
||||
}
|
||||
};
|
||||
auto keyspaces = db.get_non_system_keyspaces();
|
||||
auto cmp = keyspace_less_comparator(s);
|
||||
boost::sort(keyspaces, cmp);
|
||||
return boost::copy_range<ks_range>(range.slice(keyspaces, std::move(cmp)));
|
||||
}
|
||||
|
||||
/**
|
||||
* Makes a wrapping range of ring_position from a nonwrapping range of token, used to select sstables.
|
||||
*/
|
||||
static dht::partition_range as_ring_position_range(dht::token_range& r) {
|
||||
std::optional<range<dht::ring_position>::bound> start_bound, end_bound;
|
||||
if (r.start()) {
|
||||
start_bound = {{ dht::ring_position(r.start()->value(), dht::ring_position::token_bound::start), r.start()->is_inclusive() }};
|
||||
}
|
||||
if (r.end()) {
|
||||
end_bound = {{ dht::ring_position(r.end()->value(), dht::ring_position::token_bound::end), r.end()->is_inclusive() }};
|
||||
}
|
||||
return dht::partition_range(std::move(start_bound), std::move(end_bound), r.is_singular());
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a new range_estimates for the specified range, considering the sstables associated with `cf`.
|
||||
*/
|
||||
static system_keyspace::range_estimates estimate(const column_family& cf, const token_range& r) {
|
||||
int64_t count{0};
|
||||
utils::estimated_histogram hist{0};
|
||||
auto from_bytes = [] (auto& b) {
|
||||
return dht::global_partitioner().from_sstring(utf8_type->to_string(b));
|
||||
};
|
||||
dht::token_range_vector ranges;
|
||||
::compat::unwrap_into(
|
||||
wrapping_range<dht::token>({{ from_bytes(r.start), false }}, {{ from_bytes(r.end) }}),
|
||||
dht::token_comparator(),
|
||||
[&] (auto&& rng) { ranges.push_back(std::move(rng)); });
|
||||
for (auto&& r : ranges) {
|
||||
auto rp_range = as_ring_position_range(r);
|
||||
for (auto&& sstable : cf.select_sstables(rp_range)) {
|
||||
count += sstable->estimated_keys_for_range(r);
|
||||
hist.merge(sstable->get_stats_metadata().estimated_partition_size);
|
||||
}
|
||||
}
|
||||
return {cf.schema(), r.start, r.end, count, count > 0 ? hist.mean() : 0};
|
||||
}
|
||||
estimates_for_current_keyspace(const database&, std::vector<token_range> local_ranges) const;
|
||||
};
|
||||
|
||||
struct virtual_reader {
|
||||
@@ -332,6 +68,12 @@ struct virtual_reader {
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Returns the primary ranges for the local node.
|
||||
* Used for testing as well.
|
||||
*/
|
||||
future<std::vector<token_range>> get_local_ranges();
|
||||
|
||||
} // namespace size_estimates
|
||||
|
||||
} // namespace db
|
||||
|
||||
@@ -143,10 +143,9 @@ void view_info::initialize_base_dependent_fields(const schema& base) {
|
||||
}
|
||||
|
||||
bool view_info::is_index() const {
|
||||
if (!_is_index) {
|
||||
_is_index = service::get_local_storage_service().db().local().find_column_family(base_id()).get_index_manager().is_index(_schema);
|
||||
}
|
||||
return *_is_index;
|
||||
//TODO(sarna): result of this call can be cached instead of calling index_manager::is_index every time
|
||||
column_family& base_cf = service::get_local_storage_service().db().local().find_column_family(base_id());
|
||||
return base_cf.get_index_manager().is_index(view_ptr(_schema.shared_from_this()));
|
||||
}
|
||||
|
||||
namespace db {
|
||||
@@ -1158,6 +1157,10 @@ future<> view_builder::stop() {
|
||||
return _sem.wait().then([this] {
|
||||
_sem.broken();
|
||||
return _build_step.join();
|
||||
}).handle_exception_type([] (const broken_semaphore&) {
|
||||
// ignored
|
||||
}).handle_exception_type([] (const semaphore_timed_out&) {
|
||||
// ignored
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
@@ -24,7 +24,9 @@
|
||||
namespace db::view {
|
||||
|
||||
future<> view_update_generator::start() {
|
||||
_started = seastar::async([this]() mutable {
|
||||
thread_attributes attr;
|
||||
attr.sched_group = _db.get_streaming_scheduling_group();
|
||||
_started = seastar::async(std::move(attr), [this]() mutable {
|
||||
while (!_as.abort_requested()) {
|
||||
if (_sstables_with_tables.empty()) {
|
||||
_pending_sstables.wait().get();
|
||||
|
||||
45
dist/ami/build_ami.sh
vendored
45
dist/ami/build_ami.sh
vendored
@@ -1,6 +1,7 @@
|
||||
#!/bin/bash -e
|
||||
|
||||
PRODUCT=$(cat SCYLLA-PRODUCT-FILE)
|
||||
./SCYLLA-VERSION-GEN
|
||||
PRODUCT=$(cat build/SCYLLA-PRODUCT-FILE)
|
||||
|
||||
if [ ! -e dist/ami/build_ami.sh ]; then
|
||||
echo "run build_ami.sh in top of scylla dir"
|
||||
@@ -16,6 +17,7 @@ print_usage() {
|
||||
exit 1
|
||||
}
|
||||
LOCALRPM=0
|
||||
REPO_FOR_INSTALL=
|
||||
while [ $# -gt 0 ]; do
|
||||
case "$1" in
|
||||
"--localrpm")
|
||||
@@ -23,10 +25,12 @@ while [ $# -gt 0 ]; do
|
||||
shift 1
|
||||
;;
|
||||
"--repo")
|
||||
REPO_FOR_INSTALL=$2
|
||||
INSTALL_ARGS="$INSTALL_ARGS --repo $2"
|
||||
shift 2
|
||||
;;
|
||||
"--repo-for-install")
|
||||
REPO_FOR_INSTALL=$2
|
||||
INSTALL_ARGS="$INSTALL_ARGS --repo-for-install $2"
|
||||
shift 2
|
||||
;;
|
||||
@@ -123,6 +127,43 @@ if [ $LOCALRPM -eq 1 ]; then
|
||||
cd ../..
|
||||
cp build/$PRODUCT-ami/build/RPMS/noarch/$PRODUCT-ami-`cat build/$PRODUCT-ami/build/SCYLLA-VERSION-FILE`-`cat build/$PRODUCT-ami/build/SCYLLA-RELEASE-FILE`.*.noarch.rpm dist/ami/files/$PRODUCT-ami.noarch.rpm
|
||||
fi
|
||||
if [ ! -f dist/ami/files/$PRODUCT-python3.x86_64.rpm ]; then
|
||||
reloc/python3/build_reloc.sh
|
||||
reloc/python3/build_rpm.sh
|
||||
cp build/redhat/RPMS/x86_64/$PRODUCT-python3*.x86_64.rpm dist/ami/files/$PRODUCT-python3.x86_64.rpm
|
||||
fi
|
||||
|
||||
SCYLLA_VERSION=$(rpm -q --qf %{VERSION}-%{RELEASE} dist/ami/files/$PRODUCT.x86_64.rpm || true)
|
||||
SCYLLA_AMI_VERSION=$(rpm -q --qf %{VERSION}-%{RELEASE} dist/ami/files/$PRODUCT-ami.noarch.rpm || true)
|
||||
SCYLLA_JMX_VERSION=$(rpm -q --qf %{VERSION}-%{RELEASE} dist/ami/files/$PRODUCT-jmx.noarch.rpm || true)
|
||||
SCYLLA_TOOLS_VERSION=$(rpm -q --qf %{VERSION}-%{RELEASE} dist/ami/files/$PRODUCT-tools.noarch.rpm || true)
|
||||
SCYLLA_PYTHON3_VERSION=$(rpm -q --qf %{VERSION}-%{RELEASE} dist/ami/files/$PRODUCT-python3.x86_64.rpm || true)
|
||||
else
|
||||
if [ -z "$REPO_FOR_INSTALL" ]; then
|
||||
print_usage
|
||||
exit 1
|
||||
fi
|
||||
if [ ! -f /usr/bin/yumdownloader ]; then
|
||||
if is_redhat_variant; then
|
||||
sudo yum install /usr/bin/yumdownloader
|
||||
else
|
||||
sudo apt-get install yum-utils
|
||||
fi
|
||||
fi
|
||||
if [ ! -f /usr/bin/curl ]; then
|
||||
pkg_install curl
|
||||
fi
|
||||
TMPREPO=$(mktemp -u -p /etc/yum.repos.d/ --suffix .repo)
|
||||
sudo curl -o $TMPREPO $REPO_FOR_INSTALL
|
||||
rm -rf build/ami_packages
|
||||
mkdir -p build/ami_packages
|
||||
yumdownloader --downloaddir build/ami_packages/ $PRODUCT $PRODUCT-kernel-conf $PRODUCT-conf $PRODUCT-server $PRODUCT-debuginfo $PRODUCT-ami $PRODUCT-jmx $PRODUCT-tools-core $PRODUCT-tools $PRODUCT-python3
|
||||
sudo rm -f $TMPREPO
|
||||
SCYLLA_VERSION=$(rpm -q --qf %{VERSION}-%{RELEASE} build/ami_packages/$PRODUCT-[0-9]*.rpm || true)
|
||||
SCYLLA_AMI_VERSION=$(rpm -q --qf %{VERSION}-%{RELEASE} build/ami_packages/$PRODUCT-ami-*.rpm || true)
|
||||
SCYLLA_JMX_VERSION=$(rpm -q --qf %{VERSION}-%{RELEASE} build/ami_packages/$PRODUCT-jmx-*.rpm || true)
|
||||
SCYLLA_TOOLS_VERSION=$(rpm -q --qf %{VERSION}-%{RELEASE} build/ami_packages/$PRODUCT-tools-[0-9]*.rpm || true)
|
||||
SCYLLA_PYTHON3_VERSION=$(rpm -q --qf %{VERSION}-%{RELEASE} build/ami_packages/$PRODUCT-python3-*.rpm || true)
|
||||
fi
|
||||
|
||||
cd dist/ami
|
||||
@@ -147,4 +188,4 @@ if [ ! -d packer ]; then
|
||||
cd -
|
||||
fi
|
||||
|
||||
env PACKER_LOG=1 PACKER_LOG_PATH=../../build/ami.log packer/packer build -var-file=variables.json -var install_args="$INSTALL_ARGS" -var region="$REGION" -var source_ami="$AMI" -var ssh_username="$SSH_USERNAME" scylla.json
|
||||
env PACKER_LOG=1 PACKER_LOG_PATH=../../build/ami.log packer/packer build -var-file=variables.json -var install_args="$INSTALL_ARGS" -var region="$REGION" -var source_ami="$AMI" -var ssh_username="$SSH_USERNAME" -var scylla_version="$SCYLLA_VERSION" -var scylla_ami_version="$SCYLLA_AMI_VERSION" -var scylla_jmx_version="$SCYLLA_JMX_VERSION" -var scylla_tools_version="$SCYLLA_TOOLS_VERSION" -var scylla_python3_version="$SCYLLA_PYTHON3_VERSION" scylla.json
|
||||
|
||||
10
dist/ami/scylla.json
vendored
10
dist/ami/scylla.json
vendored
@@ -56,7 +56,15 @@
|
||||
"ssh_username": "{{user `ssh_username`}}",
|
||||
"subnet_id": "{{user `subnet_id`}}",
|
||||
"type": "amazon-ebs",
|
||||
"user_data_file": "user_data.txt"
|
||||
"user_data_file": "user_data.txt",
|
||||
"ami_description": "scylla-{{user `scylla_version`}} scylla-ami-{{user `scylla_ami_version`}} scylla-jmx-{{user `scylla_jmx_version`}} scylla-tools-{{user `scylla_tools_version`}} scylla-python3-{{user `scylla_python3_version`}}",
|
||||
"tags": {
|
||||
"ScyllaVersion": "{{user `scylla_version`}}",
|
||||
"ScyllaAMIVersion": "{{user `scylla_ami_version`}}",
|
||||
"ScyllaJMXVersion": "{{user `scylla_jmx_version`}}",
|
||||
"ScyllaToolsVersion": "{{user `scylla_tools_version`}}",
|
||||
"ScyllaPython3Version": "{{user `scylla_python3_version`}}"
|
||||
}
|
||||
}
|
||||
],
|
||||
"provisioners": [
|
||||
|
||||
11
dist/common/scripts/scylla_io_setup
vendored
11
dist/common/scripts/scylla_io_setup
vendored
@@ -60,6 +60,17 @@ if __name__ == "__main__":
|
||||
disk_properties["read_bandwidth"] = 2015342735 * nr_disks
|
||||
disk_properties["write_iops"] = 181500 * nr_disks
|
||||
disk_properties["write_bandwidth"] = 808775652 * nr_disks
|
||||
elif idata.instance_class() == "i3en":
|
||||
if idata.instance() in ("i3en.large", "i3.xlarge", "i3en.2xlarge"):
|
||||
disk_properties["read_iops"] = 46489
|
||||
disk_properties["read_bandwidth"] = 353437280
|
||||
disk_properties["write_iops"] = 36680
|
||||
disk_properties["write_bandwidth"] = 164766656
|
||||
else:
|
||||
disk_properties["read_iops"] = 278478 * nr_disks
|
||||
disk_properties["read_bandwidth"] = 3029172992 * nr_disks
|
||||
disk_properties["write_iops"] = 221909 * nr_disks
|
||||
disk_properties["write_bandwidth"] = 1020482432 * nr_disks
|
||||
elif idata.instance_class() == "i2":
|
||||
disk_properties["read_iops"] = 64000 * nr_disks
|
||||
disk_properties["read_bandwidth"] = 507338935 * nr_disks
|
||||
|
||||
35
dist/common/scripts/scylla_setup
vendored
35
dist/common/scripts/scylla_setup
vendored
@@ -95,6 +95,9 @@ def do_verify_package(pkg):
|
||||
res = run('rpm -q {}'.format(pkg), silent=True, exception=False)
|
||||
elif is_gentoo_variant():
|
||||
res = 0 if len(glob.glob('/var/db/pkg/*/{}-*'.format(pkg))) else 1
|
||||
else:
|
||||
print("OS variant not recognized")
|
||||
res = 1
|
||||
if res != 0:
|
||||
print('{} package is not installed.'.format(pkg))
|
||||
sys.exit(1)
|
||||
@@ -252,22 +255,22 @@ if __name__ == '__main__':
|
||||
if not os.path.exists('/etc/scylla.d/housekeeping.cfg'):
|
||||
version_check = interactive_ask_service('Do you want to enable Scylla to check if there is a newer version of Scylla available?', 'Yes - start the Scylla-housekeeping service to check for a newer version. This check runs periodically. No - skips this step.', version_check)
|
||||
args.no_version_check = not version_check
|
||||
if version_check:
|
||||
with open('/etc/scylla.d/housekeeping.cfg', 'w') as f:
|
||||
f.write('[housekeeping]\ncheck-version: True\n')
|
||||
if is_systemd():
|
||||
systemd_unit('scylla-housekeeping-daily.timer').unmask()
|
||||
systemd_unit('scylla-housekeeping-restart.timer').unmask()
|
||||
else:
|
||||
with open('/etc/scylla.d/housekeeping.cfg', 'w') as f:
|
||||
f.write('[housekeeping]\ncheck-version: False\n')
|
||||
if is_systemd():
|
||||
hk_daily = systemd_unit('scylla-housekeeping-daily.timer')
|
||||
hk_daily.mask()
|
||||
hk_daily.stop()
|
||||
hk_restart = systemd_unit('scylla-housekeeping-restart.timer')
|
||||
hk_restart.mask()
|
||||
hk_restart.stop()
|
||||
if version_check:
|
||||
with open('/etc/scylla.d/housekeeping.cfg', 'w') as f:
|
||||
f.write('[housekeeping]\ncheck-version: True\n')
|
||||
if is_systemd():
|
||||
systemd_unit('scylla-housekeeping-daily.timer').unmask()
|
||||
systemd_unit('scylla-housekeeping-restart.timer').unmask()
|
||||
else:
|
||||
with open('/etc/scylla.d/housekeeping.cfg', 'w') as f:
|
||||
f.write('[housekeeping]\ncheck-version: False\n')
|
||||
if is_systemd():
|
||||
hk_daily = systemd_unit('scylla-housekeeping-daily.timer')
|
||||
hk_daily.mask()
|
||||
hk_daily.stop()
|
||||
hk_restart = systemd_unit('scylla-housekeeping-restart.timer')
|
||||
hk_restart.mask()
|
||||
hk_restart.stop()
|
||||
|
||||
cur_version=out('scylla --version', exception=False)
|
||||
if len(cur_version) > 0:
|
||||
|
||||
8
dist/common/scripts/scylla_util.py
vendored
8
dist/common/scripts/scylla_util.py
vendored
@@ -119,7 +119,7 @@ class aws_instance:
|
||||
return self._type.split(".")[0]
|
||||
|
||||
def is_supported_instance_class(self):
|
||||
if self.instance_class() in ['i2', 'i3']:
|
||||
if self.instance_class() in ['i2', 'i3', 'i3en']:
|
||||
return True
|
||||
return False
|
||||
|
||||
@@ -128,7 +128,7 @@ class aws_instance:
|
||||
instance_size = self.instance_size()
|
||||
if instance_class in ['c3', 'c4', 'd2', 'i2', 'r3']:
|
||||
return 'ixgbevf'
|
||||
if instance_class in ['c5', 'c5d', 'f1', 'g3', 'h1', 'i3', 'm5', 'm5d', 'p2', 'p3', 'r4', 'x1']:
|
||||
if instance_class in ['c5', 'c5d', 'f1', 'g3', 'h1', 'i3', 'i3en', 'm5', 'm5d', 'p2', 'p3', 'r4', 'x1']:
|
||||
return 'ena'
|
||||
if instance_class == 'm4':
|
||||
if instance_size == '16xlarge':
|
||||
@@ -304,7 +304,7 @@ def parse_os_release_line(line):
|
||||
val = shlex.split(data)[0]
|
||||
return (id, val.split(' ') if id == 'ID' or id == 'ID_LIKE' else val)
|
||||
|
||||
os_release = dict([parse_os_release_line(x) for x in open('/etc/os-release').read().splitlines()])
|
||||
os_release = dict([parse_os_release_line(x) for x in open('/etc/os-release').read().splitlines() if re.match(r'\w+=', x) ])
|
||||
|
||||
def is_debian_variant():
|
||||
d = os_release['ID_LIKE'] if 'ID_LIKE' in os_release else os_release['ID']
|
||||
@@ -313,7 +313,7 @@ def is_debian_variant():
|
||||
|
||||
def is_redhat_variant():
|
||||
d = os_release['ID_LIKE'] if 'ID_LIKE' in os_release else os_release['ID']
|
||||
return ('rhel' in d) or ('fedora' in d)
|
||||
return ('rhel' in d) or ('fedora' in d) or ('ol') in d
|
||||
|
||||
def is_gentoo_variant():
|
||||
return ('gentoo' in os_release['ID'])
|
||||
|
||||
2
dist/debian/control.mustache
vendored
2
dist/debian/control.mustache
vendored
@@ -16,7 +16,7 @@ Conflicts: {{product}}-server (<< 1.1)
|
||||
|
||||
Package: {{product}}-server
|
||||
Architecture: amd64
|
||||
Depends: ${shlibs:Depends}, ${misc:Depends}, adduser, hwloc-nox, {{product}}-conf, python-yaml, python-urwid, python-requests, curl, util-linux, python3-yaml, python3, uuid-runtime, pciutils, python3-pyudev, gzip, realpath | coreutils, num-utils, file
|
||||
Depends: ${shlibs:Depends}, ${misc:Depends}, adduser, hwloc-nox, {{product}}-conf, {{product}}-python3, curl, util-linux, uuid-runtime, pciutils, gzip, realpath | coreutils, num-utils, file
|
||||
Description: Scylla database server binaries
|
||||
Scylla is a highly scalable, eventually consistent, distributed,
|
||||
partitioned row DB.
|
||||
|
||||
30
dist/debian/debian/adjust_bin
vendored
Executable file
30
dist/debian/debian/adjust_bin
vendored
Executable file
@@ -0,0 +1,30 @@
|
||||
#!/bin/bash -ex
|
||||
|
||||
root="$1"
|
||||
bin="$2"
|
||||
prefix="/opt/scylladb"
|
||||
|
||||
[ "$bin" = patchelf ] && exit 0
|
||||
|
||||
patchelf() {
|
||||
# patchelf comes from the build system, so it needs the build system's ld.so and
|
||||
# shared libraries. We can't use patchelf on patchelf itself, so invoke it via
|
||||
# ld.so.
|
||||
LD_LIBRARY_PATH="$root/$prefix/bin/libreloc" "$root/$prefix"/libreloc/ld.so "$root/$prefix"/libexec/patchelf "$@"
|
||||
}
|
||||
|
||||
# We could add --set-rpath too, but then debugedit (called by rpmbuild) barfs
|
||||
# on the result. So use LD_LIBRARY_PATH in the thunk, below.
|
||||
patchelf \
|
||||
--set-interpreter "$prefix/libreloc/ld.so" \
|
||||
"$root/$prefix/libexec/$bin"
|
||||
mkdir -p "$root/$prefix/bin"
|
||||
cat > "$root/$prefix/bin/$bin" <<EOF
|
||||
#!/bin/bash -e
|
||||
export GNUTLS_SYSTEM_PRIORITY_FILE="\${GNUTLS_SYSTEM_PRIORITY_FILE-$prefix/libreloc/gnutls.config}"
|
||||
export LD_LIBRARY_PATH="$prefix/libreloc"
|
||||
exec -a "\$0" "$prefix/libexec/$bin" "\$@"
|
||||
EOF
|
||||
|
||||
chmod +x "$root/$prefix/bin/$bin"
|
||||
|
||||
140
dist/debian/python3/build_deb.sh
vendored
Executable file
140
dist/debian/python3/build_deb.sh
vendored
Executable file
@@ -0,0 +1,140 @@
|
||||
#!/bin/bash -e
|
||||
|
||||
PRODUCT=$(cat SCYLLA-PRODUCT-FILE)
|
||||
|
||||
. /etc/os-release
|
||||
print_usage() {
|
||||
echo "build_deb.sh --reloc-pkg build/release/scylla-python3-package.tar.gz"
|
||||
echo " --reloc-pkg specify relocatable package path"
|
||||
exit 1
|
||||
}
|
||||
|
||||
TARGET=stable
|
||||
RELOC_PKG=
|
||||
while [ $# -gt 0 ]; do
|
||||
case "$1" in
|
||||
"--reloc-pkg")
|
||||
RELOC_PKG=$2
|
||||
shift 2
|
||||
;;
|
||||
*)
|
||||
print_usage
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
is_redhat_variant() {
|
||||
[ -f /etc/redhat-release ]
|
||||
}
|
||||
is_debian_variant() {
|
||||
[ -f /etc/debian_version ]
|
||||
}
|
||||
pkg_install() {
|
||||
if is_redhat_variant; then
|
||||
sudo yum install -y $1
|
||||
elif is_debian_variant; then
|
||||
sudo apt-get install -y $1
|
||||
else
|
||||
echo "Requires to install following command: $1"
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
if [ ! -e SCYLLA-RELOCATABLE-FILE ]; then
|
||||
echo "do not directly execute build_deb.sh, use reloc/build_deb.sh instead."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ "$(arch)" != "x86_64" ]; then
|
||||
echo "Unsupported architecture: $(arch)"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ -z "$RELOC_PKG" ]; then
|
||||
print_usage
|
||||
exit 1
|
||||
fi
|
||||
if [ ! -f "$RELOC_PKG" ]; then
|
||||
echo "$RELOC_PKG is not found."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ -e debian ]; then
|
||||
rm -rf debian
|
||||
fi
|
||||
if is_debian_variant; then
|
||||
sudo apt-get -y update
|
||||
fi
|
||||
# this hack is needed since some environment installs 'git-core' package, it's
|
||||
# subset of the git command and doesn't works for our git-archive-all script.
|
||||
if is_redhat_variant && [ ! -f /usr/libexec/git-core/git-submodule ]; then
|
||||
sudo yum install -y git
|
||||
fi
|
||||
if [ ! -f /usr/bin/git ]; then
|
||||
pkg_install git
|
||||
fi
|
||||
if [ ! -f /usr/bin/python ]; then
|
||||
pkg_install python
|
||||
fi
|
||||
if [ ! -f /usr/bin/debuild ]; then
|
||||
pkg_install devscripts
|
||||
fi
|
||||
if [ ! -f /usr/bin/dh_testdir ]; then
|
||||
pkg_install debhelper
|
||||
fi
|
||||
if [ ! -f /usr/bin/fakeroot ]; then
|
||||
pkg_install fakeroot
|
||||
fi
|
||||
if [ ! -f /usr/bin/pystache ]; then
|
||||
if is_redhat_variant; then
|
||||
sudo yum install -y /usr/bin/pystache
|
||||
elif is_debian_variant; then
|
||||
sudo apt-get install -y python-pystache
|
||||
fi
|
||||
fi
|
||||
if [ ! -f /usr/bin/file ]; then
|
||||
pkg_install file
|
||||
fi
|
||||
if is_debian_variant && [ ! -f /usr/share/doc/python-pkg-resources/copyright ]; then
|
||||
sudo apt-get install -y python-pkg-resources
|
||||
fi
|
||||
|
||||
if [ "$ID" = "ubuntu" ] && [ ! -f /usr/share/keyrings/debian-archive-keyring.gpg ]; then
|
||||
sudo apt-get install -y debian-archive-keyring
|
||||
fi
|
||||
if [ "$ID" = "debian" ] && [ ! -f /usr/share/keyrings/ubuntu-archive-keyring.gpg ]; then
|
||||
sudo apt-get install -y ubuntu-archive-keyring
|
||||
fi
|
||||
|
||||
if [ -z "$TARGET" ]; then
|
||||
if is_debian_variant; then
|
||||
if [ ! -f /usr/bin/lsb_release ]; then
|
||||
pkg_install lsb-release
|
||||
fi
|
||||
TARGET=`lsb_release -c|awk '{print $2}'`
|
||||
else
|
||||
echo "Please specify target"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
RELOC_PKG_FULLPATH=$(readlink -f $RELOC_PKG)
|
||||
RELOC_PKG_BASENAME=$(basename $RELOC_PKG)
|
||||
SCYLLA_VERSION=$(cat SCYLLA-VERSION-FILE)
|
||||
SCYLLA_RELEASE=$(cat SCYLLA-RELEASE-FILE)
|
||||
|
||||
ln -fv $RELOC_PKG_FULLPATH ../$PRODUCT-python3_$SCYLLA_VERSION-$SCYLLA_RELEASE.orig.tar.gz
|
||||
|
||||
cp -al dist/debian/python3/debian debian
|
||||
if [ "$PRODUCT" != "scylla" ]; then
|
||||
for i in debian/scylla-*;do
|
||||
mv $i ${i/scylla-/$PRODUCT-}
|
||||
done
|
||||
fi
|
||||
REVISION="1"
|
||||
MUSTACHE_DIST="\"debian\": true, \"product\": \"$PRODUCT\", \"$PRODUCT\": true"
|
||||
pystache dist/debian/python3/changelog.mustache "{ $MUSTACHE_DIST, \"version\": \"$SCYLLA_VERSION\", \"release\": \"$SCYLLA_RELEASE\", \"revision\": \"$REVISION\", \"codename\": \"$TARGET\" }" > debian/changelog
|
||||
pystache dist/debian/python3/rules.mustache "{ $MUSTACHE_DIST }" > debian/rules
|
||||
pystache dist/debian/python3/control.mustache "{ $MUSTACHE_DIST }" > debian/control
|
||||
chmod a+rx debian/rules
|
||||
|
||||
debuild -rfakeroot -us -uc
|
||||
5
dist/debian/python3/changelog.mustache
vendored
Normal file
5
dist/debian/python3/changelog.mustache
vendored
Normal file
@@ -0,0 +1,5 @@
|
||||
{{product}}-python3 ({{version}}-{{release}}-{{revision}}) {{codename}}; urgency=medium
|
||||
|
||||
* Initial release.
|
||||
|
||||
-- Takuya ASADA <syuu@scylladb.com> Mon, 24 Aug 2015 09:22:55 +0000
|
||||
16
dist/debian/python3/control.mustache
vendored
Normal file
16
dist/debian/python3/control.mustache
vendored
Normal file
@@ -0,0 +1,16 @@
|
||||
Source: {{product}}-python3
|
||||
Maintainer: Takuya ASADA <syuu@scylladb.com>
|
||||
Homepage: http://scylladb.com
|
||||
Section: python
|
||||
Priority: optional
|
||||
X-Python3-Version: >= 3.4
|
||||
Standards-Version: 3.9.5
|
||||
|
||||
Package: {{product}}-python3
|
||||
Architecture: amd64
|
||||
Description: A standalone python3 interpreter that can be moved around different Linux machines
|
||||
This is a self-contained python interpreter that can be moved around
|
||||
different Linux machines as long as they run a new enough kernel (where
|
||||
new enough is defined by whichever Python module uses any kernel
|
||||
functionality). All shared libraries needed for the interpreter to
|
||||
operate are shipped with it.
|
||||
1
dist/debian/python3/debian/compat
vendored
Normal file
1
dist/debian/python3/debian/compat
vendored
Normal file
@@ -0,0 +1 @@
|
||||
9
|
||||
995
dist/debian/python3/debian/copyright
vendored
Normal file
995
dist/debian/python3/debian/copyright
vendored
Normal file
@@ -0,0 +1,995 @@
|
||||
This package was put together by Klee Dienes <klee@debian.org> from
|
||||
sources from ftp.python.org:/pub/python, based on the Debianization by
|
||||
the previous maintainers Bernd S. Brentrup <bsb@uni-muenster.de> and
|
||||
Bruce Perens. Current maintainer is Matthias Klose <doko@debian.org>.
|
||||
|
||||
It was downloaded from http://python.org/
|
||||
|
||||
Copyright:
|
||||
|
||||
Upstream Author: Guido van Rossum <guido@cwi.nl> and others.
|
||||
|
||||
License:
|
||||
|
||||
The following text includes the Python license and licenses and
|
||||
acknowledgements for incorporated software. The licenses can be read
|
||||
in the HTML and texinfo versions of the documentation as well, after
|
||||
installing the pythonx.y-doc package. Licenses for files not licensed
|
||||
under the Python Licenses are found at the end of this file.
|
||||
|
||||
|
||||
Python License
|
||||
==============
|
||||
|
||||
A. HISTORY OF THE SOFTWARE
|
||||
==========================
|
||||
|
||||
Python was created in the early 1990s by Guido van Rossum at Stichting
|
||||
Mathematisch Centrum (CWI, see http://www.cwi.nl) in the Netherlands
|
||||
as a successor of a language called ABC. Guido remains Python's
|
||||
principal author, although it includes many contributions from others.
|
||||
|
||||
In 1995, Guido continued his work on Python at the Corporation for
|
||||
National Research Initiatives (CNRI, see http://www.cnri.reston.va.us)
|
||||
in Reston, Virginia where he released several versions of the
|
||||
software.
|
||||
|
||||
In May 2000, Guido and the Python core development team moved to
|
||||
BeOpen.com to form the BeOpen PythonLabs team. In October of the same
|
||||
year, the PythonLabs team moved to Digital Creations (now Zope
|
||||
Corporation, see http://www.zope.com). In 2001, the Python Software
|
||||
Foundation (PSF, see http://www.python.org/psf/) was formed, a
|
||||
non-profit organization created specifically to own Python-related
|
||||
Intellectual Property. Zope Corporation is a sponsoring member of
|
||||
the PSF.
|
||||
|
||||
All Python releases are Open Source (see http://www.opensource.org for
|
||||
the Open Source Definition). Historically, most, but not all, Python
|
||||
releases have also been GPL-compatible; the table below summarizes
|
||||
the various releases.
|
||||
|
||||
Release Derived Year Owner GPL-
|
||||
from compatible? (1)
|
||||
|
||||
0.9.0 thru 1.2 1991-1995 CWI yes
|
||||
1.3 thru 1.5.2 1.2 1995-1999 CNRI yes
|
||||
1.6 1.5.2 2000 CNRI no
|
||||
2.0 1.6 2000 BeOpen.com no
|
||||
1.6.1 1.6 2001 CNRI yes (2)
|
||||
2.1 2.0+1.6.1 2001 PSF no
|
||||
2.0.1 2.0+1.6.1 2001 PSF yes
|
||||
2.1.1 2.1+2.0.1 2001 PSF yes
|
||||
2.2 2.1.1 2001 PSF yes
|
||||
2.1.2 2.1.1 2002 PSF yes
|
||||
2.1.3 2.1.2 2002 PSF yes
|
||||
2.2 and above 2.1.1 2001-now PSF yes
|
||||
|
||||
Footnotes:
|
||||
|
||||
(1) GPL-compatible doesn't mean that we're distributing Python under
|
||||
the GPL. All Python licenses, unlike the GPL, let you distribute
|
||||
a modified version without making your changes open source. The
|
||||
GPL-compatible licenses make it possible to combine Python with
|
||||
other software that is released under the GPL; the others don't.
|
||||
|
||||
(2) According to Richard Stallman, 1.6.1 is not GPL-compatible,
|
||||
because its license has a choice of law clause. According to
|
||||
CNRI, however, Stallman's lawyer has told CNRI's lawyer that 1.6.1
|
||||
is "not incompatible" with the GPL.
|
||||
|
||||
Thanks to the many outside volunteers who have worked under Guido's
|
||||
direction to make these releases possible.
|
||||
|
||||
|
||||
B. TERMS AND CONDITIONS FOR ACCESSING OR OTHERWISE USING PYTHON
|
||||
===============================================================
|
||||
|
||||
PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2
|
||||
--------------------------------------------
|
||||
|
||||
1. This LICENSE AGREEMENT is between the Python Software Foundation
|
||||
("PSF"), and the Individual or Organization ("Licensee") accessing and
|
||||
otherwise using this software ("Python") in source or binary form and
|
||||
its associated documentation.
|
||||
|
||||
2. Subject to the terms and conditions of this License Agreement, PSF
|
||||
hereby grants Licensee a nonexclusive, royalty-free, world-wide
|
||||
license to reproduce, analyze, test, perform and/or display publicly,
|
||||
prepare derivative works, distribute, and otherwise use Python alone
|
||||
or in any derivative version, provided, however, that PSF's License
|
||||
Agreement and PSF's notice of copyright, i.e., "Copyright (c) 2001,
|
||||
2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012,
|
||||
2013, 2014 Python Software Foundation; All Rights Reserved" are
|
||||
retained in Python alone or in any derivative version prepared by
|
||||
Licensee.
|
||||
|
||||
3. In the event Licensee prepares a derivative work that is based on
|
||||
or incorporates Python or any part thereof, and wants to make
|
||||
the derivative work available to others as provided herein, then
|
||||
Licensee hereby agrees to include in any such work a brief summary of
|
||||
the changes made to Python.
|
||||
|
||||
4. PSF is making Python available to Licensee on an "AS IS"
|
||||
basis. PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
|
||||
IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND
|
||||
DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
|
||||
FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON WILL NOT
|
||||
INFRINGE ANY THIRD PARTY RIGHTS.
|
||||
|
||||
5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON
|
||||
FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS
|
||||
A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON,
|
||||
OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
|
||||
|
||||
6. This License Agreement will automatically terminate upon a material
|
||||
breach of its terms and conditions.
|
||||
|
||||
7. Nothing in this License Agreement shall be deemed to create any
|
||||
relationship of agency, partnership, or joint venture between PSF and
|
||||
Licensee. This License Agreement does not grant permission to use PSF
|
||||
trademarks or trade name in a trademark sense to endorse or promote
|
||||
products or services of Licensee, or any third party.
|
||||
|
||||
8. By copying, installing or otherwise using Python, Licensee
|
||||
agrees to be bound by the terms and conditions of this License
|
||||
Agreement.
|
||||
|
||||
|
||||
BEOPEN.COM LICENSE AGREEMENT FOR PYTHON 2.0
|
||||
-------------------------------------------
|
||||
|
||||
BEOPEN PYTHON OPEN SOURCE LICENSE AGREEMENT VERSION 1
|
||||
|
||||
1. This LICENSE AGREEMENT is between BeOpen.com ("BeOpen"), having an
|
||||
office at 160 Saratoga Avenue, Santa Clara, CA 95051, and the
|
||||
Individual or Organization ("Licensee") accessing and otherwise using
|
||||
this software in source or binary form and its associated
|
||||
documentation ("the Software").
|
||||
|
||||
2. Subject to the terms and conditions of this BeOpen Python License
|
||||
Agreement, BeOpen hereby grants Licensee a non-exclusive,
|
||||
royalty-free, world-wide license to reproduce, analyze, test, perform
|
||||
and/or display publicly, prepare derivative works, distribute, and
|
||||
otherwise use the Software alone or in any derivative version,
|
||||
provided, however, that the BeOpen Python License is retained in the
|
||||
Software, alone or in any derivative version prepared by Licensee.
|
||||
|
||||
3. BeOpen is making the Software available to Licensee on an "AS IS"
|
||||
basis. BEOPEN MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
|
||||
IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, BEOPEN MAKES NO AND
|
||||
DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
|
||||
FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF THE SOFTWARE WILL NOT
|
||||
INFRINGE ANY THIRD PARTY RIGHTS.
|
||||
|
||||
4. BEOPEN SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF THE
|
||||
SOFTWARE FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS
|
||||
AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THE SOFTWARE, OR ANY
|
||||
DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
|
||||
|
||||
5. This License Agreement will automatically terminate upon a material
|
||||
breach of its terms and conditions.
|
||||
|
||||
6. This License Agreement shall be governed by and interpreted in all
|
||||
respects by the law of the State of California, excluding conflict of
|
||||
law provisions. Nothing in this License Agreement shall be deemed to
|
||||
create any relationship of agency, partnership, or joint venture
|
||||
between BeOpen and Licensee. This License Agreement does not grant
|
||||
permission to use BeOpen trademarks or trade names in a trademark
|
||||
sense to endorse or promote products or services of Licensee, or any
|
||||
third party. As an exception, the "BeOpen Python" logos available at
|
||||
http://www.pythonlabs.com/logos.html may be used according to the
|
||||
permissions granted on that web page.
|
||||
|
||||
7. By copying, installing or otherwise using the software, Licensee
|
||||
agrees to be bound by the terms and conditions of this License
|
||||
Agreement.
|
||||
|
||||
|
||||
CNRI LICENSE AGREEMENT FOR PYTHON 1.6.1
|
||||
---------------------------------------
|
||||
|
||||
1. This LICENSE AGREEMENT is between the Corporation for National
|
||||
Research Initiatives, having an office at 1895 Preston White Drive,
|
||||
Reston, VA 20191 ("CNRI"), and the Individual or Organization
|
||||
("Licensee") accessing and otherwise using Python 1.6.1 software in
|
||||
source or binary form and its associated documentation.
|
||||
|
||||
2. Subject to the terms and conditions of this License Agreement, CNRI
|
||||
hereby grants Licensee a nonexclusive, royalty-free, world-wide
|
||||
license to reproduce, analyze, test, perform and/or display publicly,
|
||||
prepare derivative works, distribute, and otherwise use Python 1.6.1
|
||||
alone or in any derivative version, provided, however, that CNRI's
|
||||
License Agreement and CNRI's notice of copyright, i.e., "Copyright (c)
|
||||
1995-2001 Corporation for National Research Initiatives; All Rights
|
||||
Reserved" are retained in Python 1.6.1 alone or in any derivative
|
||||
version prepared by Licensee. Alternately, in lieu of CNRI's License
|
||||
Agreement, Licensee may substitute the following text (omitting the
|
||||
quotes): "Python 1.6.1 is made available subject to the terms and
|
||||
conditions in CNRI's License Agreement. This Agreement together with
|
||||
Python 1.6.1 may be located on the Internet using the following
|
||||
unique, persistent identifier (known as a handle): 1895.22/1013. This
|
||||
Agreement may also be obtained from a proxy server on the Internet
|
||||
using the following URL: http://hdl.handle.net/1895.22/1013".
|
||||
|
||||
3. In the event Licensee prepares a derivative work that is based on
|
||||
or incorporates Python 1.6.1 or any part thereof, and wants to make
|
||||
the derivative work available to others as provided herein, then
|
||||
Licensee hereby agrees to include in any such work a brief summary of
|
||||
the changes made to Python 1.6.1.
|
||||
|
||||
4. CNRI is making Python 1.6.1 available to Licensee on an "AS IS"
|
||||
basis. CNRI MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
|
||||
IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, CNRI MAKES NO AND
|
||||
DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
|
||||
FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON 1.6.1 WILL NOT
|
||||
INFRINGE ANY THIRD PARTY RIGHTS.
|
||||
|
||||
5. CNRI SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON
|
||||
1.6.1 FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS
|
||||
A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON 1.6.1,
|
||||
OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
|
||||
|
||||
6. This License Agreement will automatically terminate upon a material
|
||||
breach of its terms and conditions.
|
||||
|
||||
7. This License Agreement shall be governed by the federal
|
||||
intellectual property law of the United States, including without
|
||||
limitation the federal copyright law, and, to the extent such
|
||||
U.S. federal law does not apply, by the law of the Commonwealth of
|
||||
Virginia, excluding Virginia's conflict of law provisions.
|
||||
Notwithstanding the foregoing, with regard to derivative works based
|
||||
on Python 1.6.1 that incorporate non-separable material that was
|
||||
previously distributed under the GNU General Public License (GPL), the
|
||||
law of the Commonwealth of Virginia shall govern this License
|
||||
Agreement only as to issues arising under or with respect to
|
||||
Paragraphs 4, 5, and 7 of this License Agreement. Nothing in this
|
||||
License Agreement shall be deemed to create any relationship of
|
||||
agency, partnership, or joint venture between CNRI and Licensee. This
|
||||
License Agreement does not grant permission to use CNRI trademarks or
|
||||
trade name in a trademark sense to endorse or promote products or
|
||||
services of Licensee, or any third party.
|
||||
|
||||
8. By clicking on the "ACCEPT" button where indicated, or by copying,
|
||||
installing or otherwise using Python 1.6.1, Licensee agrees to be
|
||||
bound by the terms and conditions of this License Agreement.
|
||||
|
||||
ACCEPT
|
||||
|
||||
|
||||
CWI LICENSE AGREEMENT FOR PYTHON 0.9.0 THROUGH 1.2
|
||||
--------------------------------------------------
|
||||
|
||||
Copyright (c) 1991 - 1995, Stichting Mathematisch Centrum Amsterdam,
|
||||
The Netherlands. All rights reserved.
|
||||
|
||||
Permission to use, copy, modify, and distribute this software and its
|
||||
documentation for any purpose and without fee is hereby granted,
|
||||
provided that the above copyright notice appear in all copies and that
|
||||
both that copyright notice and this permission notice appear in
|
||||
supporting documentation, and that the name of Stichting Mathematisch
|
||||
Centrum or CWI not be used in advertising or publicity pertaining to
|
||||
distribution of the software without specific, written prior
|
||||
permission.
|
||||
|
||||
STICHTING MATHEMATISCH CENTRUM DISCLAIMS ALL WARRANTIES WITH REGARD TO
|
||||
THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
|
||||
FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH CENTRUM BE LIABLE
|
||||
FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
|
||||
OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
|
||||
|
||||
Licenses and Acknowledgements for Incorporated Software
|
||||
=======================================================
|
||||
|
||||
Mersenne Twister
|
||||
----------------
|
||||
|
||||
The `_random' module includes code based on a download from
|
||||
`http://www.math.keio.ac.jp/~matumoto/MT2002/emt19937ar.html'. The
|
||||
following are the verbatim comments from the original code:
|
||||
|
||||
A C-program for MT19937, with initialization improved 2002/1/26.
|
||||
Coded by Takuji Nishimura and Makoto Matsumoto.
|
||||
|
||||
Before using, initialize the state by using init_genrand(seed)
|
||||
or init_by_array(init_key, key_length).
|
||||
|
||||
Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura,
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
3. The names of its contributors may not be used to endorse or promote
|
||||
products derived from this software without specific prior written
|
||||
permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
||||
TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
Any feedback is very welcome.
|
||||
http://www.math.keio.ac.jp/matumoto/emt.html
|
||||
email: matumoto@math.keio.ac.jp
|
||||
|
||||
|
||||
Sockets
|
||||
-------
|
||||
|
||||
The `socket' module uses the functions, `getaddrinfo', and
|
||||
`getnameinfo', which are coded in separate source files from the WIDE
|
||||
Project, `http://www.wide.ad.jp/about/index.html'.
|
||||
|
||||
Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
3. Neither the name of the project nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
|
||||
GAI_ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
|
||||
FOR GAI_ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON GAI_ANY THEORY OF LIABILITY, WHETHER
|
||||
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN GAI_ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
|
||||
Floating point exception control
|
||||
--------------------------------
|
||||
|
||||
The source for the `fpectl' module includes the following notice:
|
||||
|
||||
---------------------------------------------------------------------
|
||||
/ Copyright (c) 1996. \
|
||||
| The Regents of the University of California. |
|
||||
| All rights reserved. |
|
||||
| |
|
||||
| Permission to use, copy, modify, and distribute this software for |
|
||||
| any purpose without fee is hereby granted, provided that this en- |
|
||||
| tire notice is included in all copies of any software which is or |
|
||||
| includes a copy or modification of this software and in all |
|
||||
| copies of the supporting documentation for such software. |
|
||||
| |
|
||||
| This work was produced at the University of California, Lawrence |
|
||||
| Livermore National Laboratory under contract no. W-7405-ENG-48 |
|
||||
| between the U.S. Department of Energy and The Regents of the |
|
||||
| University of California for the operation of UC LLNL. |
|
||||
| |
|
||||
| DISCLAIMER |
|
||||
| |
|
||||
| This software was prepared as an account of work sponsored by an |
|
||||
| agency of the United States Government. Neither the United States |
|
||||
| Government nor the University of California nor any of their em- |
|
||||
| ployees, makes any warranty, express or implied, or assumes any |
|
||||
| liability or responsibility for the accuracy, completeness, or |
|
||||
| usefulness of any information, apparatus, product, or process |
|
||||
| disclosed, or represents that its use would not infringe |
|
||||
| privately-owned rights. Reference herein to any specific commer- |
|
||||
| cial products, process, or service by trade name, trademark, |
|
||||
| manufacturer, or otherwise, does not necessarily constitute or |
|
||||
| imply its endorsement, recommendation, or favoring by the United |
|
||||
| States Government or the University of California. The views and |
|
||||
| opinions of authors expressed herein do not necessarily state or |
|
||||
| reflect those of the United States Government or the University |
|
||||
| of California, and shall not be used for advertising or product |
|
||||
\ endorsement purposes. /
|
||||
---------------------------------------------------------------------
|
||||
|
||||
|
||||
Cookie management
|
||||
-----------------
|
||||
|
||||
The `Cookie' module contains the following notice:
|
||||
|
||||
Copyright 2000 by Timothy O'Malley <timo@alum.mit.edu>
|
||||
|
||||
All Rights Reserved
|
||||
|
||||
Permission to use, copy, modify, and distribute this software
|
||||
and its documentation for any purpose and without fee is hereby
|
||||
granted, provided that the above copyright notice appear in all
|
||||
copies and that both that copyright notice and this permission
|
||||
notice appear in supporting documentation, and that the name of
|
||||
Timothy O'Malley not be used in advertising or publicity
|
||||
pertaining to distribution of the software without specific, written
|
||||
prior permission.
|
||||
|
||||
Timothy O'Malley DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
|
||||
SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
|
||||
AND FITNESS, IN NO EVENT SHALL Timothy O'Malley BE LIABLE FOR
|
||||
ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
||||
WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
||||
ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
|
||||
PERFORMANCE OF THIS SOFTWARE.
|
||||
|
||||
|
||||
Execution tracing
|
||||
-----------------
|
||||
|
||||
The `trace' module contains the following notice:
|
||||
|
||||
portions copyright 2001, Autonomous Zones Industries, Inc., all rights...
|
||||
err... reserved and offered to the public under the terms of the
|
||||
Python 2.2 license.
|
||||
Author: Zooko O'Whielacronx
|
||||
http://zooko.com/
|
||||
mailto:zooko@zooko.com
|
||||
|
||||
Copyright 2000, Mojam Media, Inc., all rights reserved.
|
||||
Author: Skip Montanaro
|
||||
|
||||
Copyright 1999, Bioreason, Inc., all rights reserved.
|
||||
Author: Andrew Dalke
|
||||
|
||||
Copyright 1995-1997, Automatrix, Inc., all rights reserved.
|
||||
Author: Skip Montanaro
|
||||
|
||||
Copyright 1991-1995, Stichting Mathematisch Centrum, all rights reserved.
|
||||
|
||||
Permission to use, copy, modify, and distribute this Python software and
|
||||
its associated documentation for any purpose without fee is hereby
|
||||
granted, provided that the above copyright notice appears in all copies,
|
||||
and that both that copyright notice and this permission notice appear in
|
||||
supporting documentation, and that the name of neither Automatrix,
|
||||
Bioreason or Mojam Media be used in advertising or publicity pertaining
|
||||
to distribution of the software without specific, written prior
|
||||
permission.
|
||||
|
||||
|
||||
UUencode and UUdecode functions
|
||||
-------------------------------
|
||||
|
||||
The `uu' module contains the following notice:
|
||||
|
||||
Copyright 1994 by Lance Ellinghouse
|
||||
Cathedral City, California Republic, United States of America.
|
||||
All Rights Reserved
|
||||
Permission to use, copy, modify, and distribute this software and its
|
||||
documentation for any purpose and without fee is hereby granted,
|
||||
provided that the above copyright notice appear in all copies and that
|
||||
both that copyright notice and this permission notice appear in
|
||||
supporting documentation, and that the name of Lance Ellinghouse
|
||||
not be used in advertising or publicity pertaining to distribution
|
||||
of the software without specific, written prior permission.
|
||||
LANCE ELLINGHOUSE DISCLAIMS ALL WARRANTIES WITH REGARD TO
|
||||
THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
|
||||
FITNESS, IN NO EVENT SHALL LANCE ELLINGHOUSE CENTRUM BE LIABLE
|
||||
FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
|
||||
OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
|
||||
Modified by Jack Jansen, CWI, July 1995:
|
||||
- Use binascii module to do the actual line-by-line conversion
|
||||
between ascii and binary. This results in a 1000-fold speedup. The C
|
||||
version is still 5 times faster, though.
|
||||
- Arguments more compliant with python standard
|
||||
|
||||
|
||||
XML Remote Procedure Calls
|
||||
--------------------------
|
||||
|
||||
The `xmlrpclib' module contains the following notice:
|
||||
|
||||
The XML-RPC client interface is
|
||||
|
||||
Copyright (c) 1999-2002 by Secret Labs AB
|
||||
Copyright (c) 1999-2002 by Fredrik Lundh
|
||||
|
||||
By obtaining, using, and/or copying this software and/or its
|
||||
associated documentation, you agree that you have read, understood,
|
||||
and will comply with the following terms and conditions:
|
||||
|
||||
Permission to use, copy, modify, and distribute this software and
|
||||
its associated documentation for any purpose and without fee is
|
||||
hereby granted, provided that the above copyright notice appears in
|
||||
all copies, and that both that copyright notice and this permission
|
||||
notice appear in supporting documentation, and that the name of
|
||||
Secret Labs AB or the author not be used in advertising or publicity
|
||||
pertaining to distribution of the software without specific, written
|
||||
prior permission.
|
||||
|
||||
SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
|
||||
TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
|
||||
ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
|
||||
BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
|
||||
DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
||||
WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
||||
ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
||||
OF THIS SOFTWARE.
|
||||
|
||||
Licenses for Software linked to
|
||||
===============================
|
||||
|
||||
Note that the choice of GPL compatibility outlined above doesn't extend
|
||||
to modules linked to particular libraries, since they change the
|
||||
effective License of the module binary.
|
||||
|
||||
|
||||
GNU Readline
|
||||
------------
|
||||
|
||||
The 'readline' module makes use of GNU Readline.
|
||||
|
||||
The GNU Readline Library is free software; you can redistribute it
|
||||
and/or modify it under the terms of the GNU General Public License as
|
||||
published by the Free Software Foundation; either version 2, or (at
|
||||
your option) any later version.
|
||||
|
||||
On Debian systems, you can find the complete statement in
|
||||
/usr/share/doc/readline-common/copyright'. A copy of the GNU General
|
||||
Public License is available in /usr/share/common-licenses/GPL-2'.
|
||||
|
||||
|
||||
OpenSSL
|
||||
-------
|
||||
|
||||
The '_ssl' module makes use of OpenSSL.
|
||||
|
||||
The OpenSSL toolkit stays under a dual license, i.e. both the
|
||||
conditions of the OpenSSL License and the original SSLeay license
|
||||
apply to the toolkit. Actually both licenses are BSD-style Open
|
||||
Source licenses. Note that both licenses are incompatible with
|
||||
the GPL.
|
||||
|
||||
On Debian systems, you can find the complete license text in
|
||||
/usr/share/doc/openssl/copyright'.
|
||||
|
||||
|
||||
Files with other licenses than the Python License
|
||||
-------------------------------------------------
|
||||
|
||||
Files: Include/dynamic_annotations.h
|
||||
Files: Python/dynamic_annotations.c
|
||||
Copyright: (c) 2008-2009, Google Inc.
|
||||
License: Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Neither the name of Google Inc. nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
Files: Include/unicodeobject.h
|
||||
Copyright: (c) Corporation for National Research Initiatives.
|
||||
Copyright: (c) 1999 by Secret Labs AB.
|
||||
Copyright: (c) 1999 by Fredrik Lundh.
|
||||
License: By obtaining, using, and/or copying this software and/or its
|
||||
associated documentation, you agree that you have read, understood,
|
||||
and will comply with the following terms and conditions:
|
||||
|
||||
Permission to use, copy, modify, and distribute this software and its
|
||||
associated documentation for any purpose and without fee is hereby
|
||||
granted, provided that the above copyright notice appears in all
|
||||
copies, and that both that copyright notice and this permission notice
|
||||
appear in supporting documentation, and that the name of Secret Labs
|
||||
AB or the author not be used in advertising or publicity pertaining to
|
||||
distribution of the software without specific, written prior
|
||||
permission.
|
||||
|
||||
SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO
|
||||
THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
|
||||
FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR
|
||||
ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
|
||||
OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
|
||||
Files: Lib/logging/*
|
||||
Copyright: 2001-2010 by Vinay Sajip. All Rights Reserved.
|
||||
License: Permission to use, copy, modify, and distribute this software and
|
||||
its documentation for any purpose and without fee is hereby granted,
|
||||
provided that the above copyright notice appear in all copies and that
|
||||
both that copyright notice and this permission notice appear in
|
||||
supporting documentation, and that the name of Vinay Sajip
|
||||
not be used in advertising or publicity pertaining to distribution
|
||||
of the software without specific, written prior permission.
|
||||
VINAY SAJIP DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
|
||||
ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL
|
||||
VINAY SAJIP BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
|
||||
ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
|
||||
IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
|
||||
OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
|
||||
Files: Lib/multiprocessing/*
|
||||
Files: Modules/_multiprocessing/*
|
||||
Copyright: (c) 2006-2008, R Oudkerk. All rights reserved.
|
||||
License: Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
3. Neither the name of author nor the names of any contributors may be
|
||||
used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
SUCH DAMAGE.
|
||||
|
||||
Files: Lib/sqlite3/*
|
||||
Files: Modules/_sqlite/*
|
||||
Copyright: (C) 2004-2005 Gerhard Häring <gh@ghaering.de>
|
||||
License: This software is provided 'as-is', without any express or implied
|
||||
warranty. In no event will the authors be held liable for any damages
|
||||
arising from the use of this software.
|
||||
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it
|
||||
freely, subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not
|
||||
claim that you wrote the original software. If you use this software
|
||||
in a product, an acknowledgment in the product documentation would be
|
||||
appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be
|
||||
misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
|
||||
Files: Lib/async*
|
||||
Copyright: Copyright 1996 by Sam Rushing
|
||||
License: Permission to use, copy, modify, and distribute this software and
|
||||
its documentation for any purpose and without fee is hereby
|
||||
granted, provided that the above copyright notice appear in all
|
||||
copies and that both that copyright notice and this permission
|
||||
notice appear in supporting documentation, and that the name of Sam
|
||||
Rushing not be used in advertising or publicity pertaining to
|
||||
distribution of the software without specific, written prior
|
||||
permission.
|
||||
|
||||
SAM RUSHING DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
|
||||
INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN
|
||||
NO EVENT SHALL SAM RUSHING BE LIABLE FOR ANY SPECIAL, INDIRECT OR
|
||||
CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
|
||||
OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
|
||||
NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
|
||||
CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
|
||||
Files: Lib/tarfile.py
|
||||
Copyright: (C) 2002 Lars Gustaebel <lars@gustaebel.de>
|
||||
License: Permission is hereby granted, free of charge, to any person
|
||||
obtaining a copy of this software and associated documentation
|
||||
files (the "Software"), to deal in the Software without
|
||||
restriction, including without limitation the rights to use,
|
||||
copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the
|
||||
Software is furnished to do so, subject to the following
|
||||
conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
Files: Lib/turtle.py
|
||||
Copyright: (C) 2006 - 2010 Gregor Lingl
|
||||
License: This software is provided 'as-is', without any express or implied
|
||||
warranty. In no event will the authors be held liable for any damages
|
||||
arising from the use of this software.
|
||||
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it
|
||||
freely, subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not
|
||||
claim that you wrote the original software. If you use this software
|
||||
in a product, an acknowledgment in the product documentation would be
|
||||
appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be
|
||||
misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
|
||||
is copyright Gregor Lingl and licensed under a BSD-like license
|
||||
|
||||
Files: Modules/_ctypes/libffi/*
|
||||
Copyright: Copyright (C) 1996-2011 Red Hat, Inc and others.
|
||||
Copyright (C) 1996-2011 Anthony Green
|
||||
Copyright (C) 1996-2010 Free Software Foundation, Inc
|
||||
Copyright (c) 2003, 2004, 2006, 2007, 2008 Kaz Kojima
|
||||
Copyright (c) 2010, 2011, Plausible Labs Cooperative , Inc.
|
||||
Copyright (c) 2010 CodeSourcery
|
||||
Copyright (c) 1998 Andreas Schwab
|
||||
Copyright (c) 2000 Hewlett Packard Company
|
||||
Copyright (c) 2009 Bradley Smith
|
||||
Copyright (c) 2008 David Daney
|
||||
Copyright (c) 2004 Simon Posnjak
|
||||
Copyright (c) 2005 Axis Communications AB
|
||||
Copyright (c) 1998 Cygnus Solutions
|
||||
Copyright (c) 2004 Renesas Technology
|
||||
Copyright (c) 2002, 2007 Bo Thorsen <bo@suse.de>
|
||||
Copyright (c) 2002 Ranjit Mathew
|
||||
Copyright (c) 2002 Roger Sayle
|
||||
Copyright (c) 2000, 2007 Software AG
|
||||
Copyright (c) 2003 Jakub Jelinek
|
||||
Copyright (c) 2000, 2001 John Hornkvist
|
||||
Copyright (c) 1998 Geoffrey Keating
|
||||
Copyright (c) 2008 Björn König
|
||||
|
||||
License: Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of this software and associated documentation files (the
|
||||
``Software''), to deal in the Software without restriction, including
|
||||
without limitation the rights to use, copy, modify, merge, publish,
|
||||
distribute, sublicense, and/or sell copies of the Software, and to
|
||||
permit persons to whom the Software is furnished to do so, subject to
|
||||
the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included
|
||||
in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
|
||||
Documentation:
|
||||
Permission is granted to copy, distribute and/or modify this document
|
||||
under the terms of the GNU General Public License as published by the
|
||||
Free Software Foundation; either version 2, or (at your option) any
|
||||
later version. A copy of the license is included in the
|
||||
section entitled ``GNU General Public License''.
|
||||
|
||||
Files: Modules/_gestalt.c
|
||||
Copyright: 1991-1997 by Stichting Mathematisch Centrum, Amsterdam.
|
||||
License: Permission to use, copy, modify, and distribute this software and its
|
||||
documentation for any purpose and without fee is hereby granted,
|
||||
provided that the above copyright notice appear in all copies and that
|
||||
both that copyright notice and this permission notice appear in
|
||||
supporting documentation, and that the names of Stichting Mathematisch
|
||||
Centrum or CWI not be used in advertising or publicity pertaining to
|
||||
distribution of the software without specific, written prior permission.
|
||||
|
||||
STICHTING MATHEMATISCH CENTRUM DISCLAIMS ALL WARRANTIES WITH REGARD TO
|
||||
THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
|
||||
FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH CENTRUM BE LIABLE
|
||||
FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
|
||||
OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
|
||||
Files: Modules/syslogmodule.c
|
||||
Copyright: 1994 by Lance Ellinghouse
|
||||
License: Permission to use, copy, modify, and distribute this software and its
|
||||
documentation for any purpose and without fee is hereby granted,
|
||||
provided that the above copyright notice appear in all copies and that
|
||||
both that copyright notice and this permission notice appear in
|
||||
supporting documentation, and that the name of Lance Ellinghouse
|
||||
not be used in advertising or publicity pertaining to distribution
|
||||
of the software without specific, written prior permission.
|
||||
|
||||
LANCE ELLINGHOUSE DISCLAIMS ALL WARRANTIES WITH REGARD TO
|
||||
THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
|
||||
FITNESS, IN NO EVENT SHALL LANCE ELLINGHOUSE BE LIABLE FOR ANY SPECIAL,
|
||||
INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING
|
||||
FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
|
||||
NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
|
||||
WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
|
||||
Files: Modules/zlib/*
|
||||
Copyright: (C) 1995-2010 Jean-loup Gailly and Mark Adler
|
||||
License: This software is provided 'as-is', without any express or implied
|
||||
warranty. In no event will the authors be held liable for any damages
|
||||
arising from the use of this software.
|
||||
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it
|
||||
freely, subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not
|
||||
claim that you wrote the original software. If you use this software
|
||||
in a product, an acknowledgment in the product documentation would be
|
||||
appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be
|
||||
misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
|
||||
Jean-loup Gailly Mark Adler
|
||||
jloup@gzip.org madler@alumni.caltech.edu
|
||||
|
||||
If you use the zlib library in a product, we would appreciate *not* receiving
|
||||
lengthy legal documents to sign. The sources are provided for free but without
|
||||
warranty of any kind. The library has been entirely written by Jean-loup
|
||||
Gailly and Mark Adler; it does not include third-party code.
|
||||
|
||||
Files: Modules/expat/*
|
||||
Copyright: Copyright (c) 1998, 1999, 2000 Thai Open Source Software Center Ltd
|
||||
and Clark Cooper
|
||||
Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006 Expat maintainers
|
||||
License: Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of this software and associated documentation files (the
|
||||
"Software"), to deal in the Software without restriction, including
|
||||
without limitation the rights to use, copy, modify, merge, publish,
|
||||
distribute, sublicense, and/or sell copies of the Software, and to
|
||||
permit persons to whom the Software is furnished to do so, subject to
|
||||
the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included
|
||||
in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
Files: Modules/_decimal/libmpdec/*
|
||||
Copyright: Copyright (c) 2008-2012 Stefan Krah. All rights reserved.
|
||||
License: Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
.
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
,
|
||||
THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
SUCH DAMAGE.
|
||||
|
||||
Files: Misc/python-mode.el
|
||||
Copyright: Copyright (C) 1992,1993,1994 Tim Peters
|
||||
License: This software is provided as-is, without express or implied
|
||||
warranty. Permission to use, copy, modify, distribute or sell this
|
||||
software, without fee, for any purpose and by any individual or
|
||||
organization, is hereby granted, provided that the above copyright
|
||||
notice and this paragraph appear in all copies.
|
||||
|
||||
Files: Python/dtoa.c
|
||||
Copyright: (c) 1991, 2000, 2001 by Lucent Technologies.
|
||||
License: Permission to use, copy, modify, and distribute this software for any
|
||||
purpose without fee is hereby granted, provided that this entire notice
|
||||
is included in all copies of any software which is or includes a copy
|
||||
or modification of this software and in all copies of the supporting
|
||||
documentation for such software.
|
||||
|
||||
THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
|
||||
WARRANTY. IN PARTICULAR, NEITHER THE AUTHOR NOR LUCENT MAKES ANY
|
||||
REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
|
||||
OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
|
||||
|
||||
Files: Python/getopt.c
|
||||
Copyright: 1992-1994, David Gottner
|
||||
License: Permission to use, copy, modify, and distribute this software and its
|
||||
documentation for any purpose and without fee is hereby granted,
|
||||
provided that the above copyright notice, this permission notice and
|
||||
the following disclaimer notice appear unmodified in all copies.
|
||||
|
||||
I DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL I
|
||||
BE LIABLE FOR ANY SPECIAL, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY
|
||||
DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA, OR PROFITS, WHETHER
|
||||
IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
|
||||
OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
|
||||
Files: PC/_subprocess.c
|
||||
Copyright: Copyright (c) 2004 by Fredrik Lundh <fredrik@pythonware.com>
|
||||
Copyright (c) 2004 by Secret Labs AB, http://www.pythonware.com
|
||||
Copyright (c) 2004 by Peter Astrand <astrand@lysator.liu.se>
|
||||
License:
|
||||
* Permission to use, copy, modify, and distribute this software and
|
||||
* its associated documentation for any purpose and without fee is
|
||||
* hereby granted, provided that the above copyright notice appears in
|
||||
* all copies, and that both that copyright notice and this permission
|
||||
* notice appear in supporting documentation, and that the name of the
|
||||
* authors not be used in advertising or publicity pertaining to
|
||||
* distribution of the software without specific, written prior
|
||||
* permission.
|
||||
*
|
||||
* THE AUTHORS DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
|
||||
* INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
|
||||
* IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY SPECIAL, INDIRECT OR
|
||||
* CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
|
||||
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
|
||||
* WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
|
||||
Files: PC/winsound.c
|
||||
Copyright: Copyright (c) 1999 Toby Dickenson
|
||||
License: * Permission to use this software in any way is granted without
|
||||
* fee, provided that the copyright notice above appears in all
|
||||
* copies. This software is provided "as is" without any warranty.
|
||||
*/
|
||||
|
||||
/* Modified by Guido van Rossum */
|
||||
/* Beep added by Mark Hammond */
|
||||
/* Win9X Beep and platform identification added by Uncle Timmy */
|
||||
|
||||
Files: Tools/pybench/*
|
||||
Copyright: (c), 1997-2006, Marc-Andre Lemburg (mal@lemburg.com)
|
||||
(c), 2000-2006, eGenix.com Software GmbH (info@egenix.com)
|
||||
License: Permission to use, copy, modify, and distribute this software and its
|
||||
documentation for any purpose and without fee or royalty is hereby
|
||||
granted, provided that the above copyright notice appear in all copies
|
||||
and that both that copyright notice and this permission notice appear
|
||||
in supporting documentation or portions thereof, including
|
||||
modifications, that you make.
|
||||
|
||||
THE AUTHOR MARC-ANDRE LEMBURG DISCLAIMS ALL WARRANTIES WITH REGARD TO
|
||||
THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
|
||||
FITNESS, IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL,
|
||||
INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING
|
||||
FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
|
||||
NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
|
||||
WITH THE USE OR PERFORMANCE OF THIS SOFTWARE !
|
||||
3
dist/debian/python3/debian/scylla-python3.dirs
vendored
Normal file
3
dist/debian/python3/debian/scylla-python3.dirs
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
opt/scylladb/python3/bin
|
||||
opt/scylladb/python3/lib64
|
||||
opt/scylladb/python3/libexec
|
||||
3
dist/debian/python3/debian/scylla-python3.install
vendored
Normal file
3
dist/debian/python3/debian/scylla-python3.install
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
bin/* opt/scylladb/python3/bin
|
||||
lib64/* opt/scylladb/python3/lib64
|
||||
libexec/* opt/scylladb/python3/libexec
|
||||
22
dist/debian/python3/rules.mustache
vendored
Executable file
22
dist/debian/python3/rules.mustache
vendored
Executable file
@@ -0,0 +1,22 @@
|
||||
#!/usr/bin/make -f
|
||||
|
||||
export PYBUILD_DISABLE=1
|
||||
|
||||
override_dh_auto_configure:
|
||||
|
||||
override_dh_auto_build:
|
||||
|
||||
override_dh_strip:
|
||||
|
||||
override_dh_makeshlibs:
|
||||
|
||||
override_dh_shlibdeps:
|
||||
|
||||
override_dh_fixperms:
|
||||
dh_fixperms
|
||||
chmod 755 $(CURDIR)/debian/{{product}}-python3/opt/scylladb/python3/libexec/ld.so
|
||||
|
||||
override_dh_strip_nondeterminism:
|
||||
|
||||
%:
|
||||
dh $@
|
||||
17
dist/debian/rules.mustache
vendored
17
dist/debian/rules.mustache
vendored
@@ -9,12 +9,21 @@ override_dh_auto_build:
|
||||
|
||||
override_dh_auto_clean:
|
||||
|
||||
override_dh_auto_install:
|
||||
dh_auto_install
|
||||
override_dh_install:
|
||||
dh_install
|
||||
install -d $(CURDIR)/debian/scylla-server/usr/bin
|
||||
for bin in debian/scylla-server/opt/scylladb/libexec/*; do debian/adjust_bin $(CURDIR)/debian/scylla-server "$${bin#*libexec/}"; done
|
||||
ln -sf /opt/scylladb/bin/scylla $(CURDIR)/debian/scylla-server/usr/bin/scylla
|
||||
ln -sf /opt/scylladb/bin/iotune $(CURDIR)/debian/scylla-server/usr/bin/iotune
|
||||
ln -sf /usr/lib/scylla/scyllatop/scyllatop.py $(CURDIR)/debian/scylla-server/usr/bin/scyllatop
|
||||
find ./dist/common/scripts -type f -exec ./relocate_python_scripts.py \
|
||||
--installroot $(CURDIR)/debian/scylla-server/usr/lib/scylla/ --with-python3 "$(CURDIR)/debian/scylla-server/opt/scylladb/python3/bin/python3" {} +
|
||||
./relocate_python_scripts.py \
|
||||
--installroot $(CURDIR)/debian/scylla-server/usr/lib/scylla/ --with-python3 "$(CURDIR)/debian/scylla-server/opt/scylladb/python3/bin/python3" \
|
||||
seastar/scripts/perftune.py seastar/scripts/seastar-addr2line seastar/scripts/perftune.py
|
||||
./relocate_python_scripts.py \
|
||||
--installroot $(CURDIR)/debian/scylla-server/usr/lib/scylla/scyllatop/ --with-python3 "$(CURDIR)/debian/scylla-server/opt/scylladb/python3/bin/python3" \
|
||||
tools/scyllatop/scyllatop.py
|
||||
|
||||
override_dh_installinit:
|
||||
{{#scylla}}
|
||||
@@ -29,7 +38,9 @@ override_dh_installinit:
|
||||
dh_installinit --no-start --name node-exporter
|
||||
|
||||
override_dh_strip:
|
||||
dh_strip -Xlibprotobuf.so.15 -Xld.so --dbg-package={{product}}-server-dbg
|
||||
# The binaries (ethtool...patchelf) don't pass dh_strip after going through patchelf. Since they are
|
||||
# already stripped, nothing is lost if we exclude them, so that's what we do.
|
||||
dh_strip -Xlibprotobuf.so.15 -Xld.so -Xethtool -Xgawk -Xgzip -Xhwloc-calc -Xhwloc-distrib -Xifconfig -Xlscpu -Xnetstat -Xpatchelf --dbg-package={{product}}-server-dbg
|
||||
|
||||
override_dh_makeshlibs:
|
||||
|
||||
|
||||
5
dist/debian/scylla-server.install.mustache
vendored
5
dist/debian/scylla-server.install.mustache
vendored
@@ -1,14 +1,9 @@
|
||||
dist/common/limits.d/scylla.conf etc/security/limits.d
|
||||
dist/common/scylla.d/*.conf etc/scylla.d
|
||||
seastar/dpdk/usertools/dpdk-devbind.py usr/lib/scylla
|
||||
seastar/scripts/perftune.py usr/lib/scylla
|
||||
seastar/scripts/seastar-addr2line usr/lib/scylla
|
||||
seastar/scripts/seastar-cpu-map.sh usr/lib/scylla
|
||||
dist/common/scripts/* usr/lib/scylla
|
||||
tools/scyllatop usr/lib/scylla
|
||||
swagger-ui/dist usr/lib/scylla/swagger-ui
|
||||
api/api-doc usr/lib/scylla/api
|
||||
bin/* opt/scylladb/bin
|
||||
libreloc/* opt/scylladb/libreloc
|
||||
libexec/* opt/scylladb/libexec
|
||||
dist/common/sbin/* usr/sbin
|
||||
|
||||
2
dist/docker/redhat/Dockerfile
vendored
2
dist/docker/redhat/Dockerfile
vendored
@@ -28,7 +28,7 @@ ADD commandlineparser.py /commandlineparser.py
|
||||
ADD docker-entrypoint.py /docker-entrypoint.py
|
||||
ADD node_exporter_install /node_exporter_install
|
||||
# Install Scylla:
|
||||
RUN curl http://downloads.scylladb.com/rpm/unstable/centos/master/latest/scylla.repo -o /etc/yum.repos.d/scylla.repo && \
|
||||
RUN curl http://downloads.scylladb.com/rpm/centos/scylla-3.1.repo -o /etc/yum.repos.d/scylla.repo && \
|
||||
yum -y install epel-release && \
|
||||
yum -y clean expire-cache && \
|
||||
yum -y update && \
|
||||
|
||||
@@ -192,7 +192,11 @@ future<> verification_error(fs::path path, const char* fstr, Args&&... args) {
|
||||
// No other file types may exist.
|
||||
future<> distributed_loader::verify_owner_and_mode(fs::path path) {
|
||||
return file_stat(path.string(), follow_symlink::no).then([path = std::move(path)] (stat_data sd) {
|
||||
if (sd.uid != geteuid()) {
|
||||
// Under docker, we run with euid 0 and there is no reasonable way to enforce that the
|
||||
// in-container uid will have the same uid as files mounted from outside the container. So
|
||||
// just allow euid 0 as a special case. It should survive the file_accessible() checks below.
|
||||
// See #4823.
|
||||
if (geteuid() != 0 && sd.uid != geteuid()) {
|
||||
return verification_error(std::move(path), "File not owned by current euid: {}. Owner is: {}", geteuid(), sd.uid);
|
||||
}
|
||||
switch (sd.type) {
|
||||
|
||||
@@ -151,7 +151,7 @@ if __name__ == '__main__':
|
||||
argp.add_argument('--user', '-u')
|
||||
argp.add_argument('--password', '-p', default='none')
|
||||
argp.add_argument('--node', default='127.0.0.1', help='Node to connect to.')
|
||||
argp.add_argument('--port', default='9042', help='Port to connect to.')
|
||||
argp.add_argument('--port', default=9042, help='Port to connect to.', type=int)
|
||||
|
||||
args = argp.parse_args()
|
||||
res = validate_and_fix(args)
|
||||
|
||||
15
gc_clock.hh
15
gc_clock.hh
@@ -22,6 +22,7 @@
|
||||
#pragma once
|
||||
|
||||
#include "clocks-impl.hh"
|
||||
#include "hashing.hh"
|
||||
|
||||
#include <seastar/core/lowres_clock.hh>
|
||||
|
||||
@@ -71,3 +72,17 @@ using ttl_opt = std::optional<gc_clock::duration>;
|
||||
static constexpr gc_clock::duration max_ttl = gc_clock::duration{20 * 365 * 24 * 60 * 60};
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, gc_clock::time_point tp);
|
||||
|
||||
template<>
|
||||
struct appending_hash<gc_clock::time_point> {
|
||||
template<typename Hasher>
|
||||
void operator()(Hasher& h, gc_clock::time_point t) const {
|
||||
// Remain backwards-compatible with the 32-bit duration::rep (refs #4460).
|
||||
uint64_t d64 = t.time_since_epoch().count();
|
||||
feed_hash(h, uint32_t(d64 & 0xffff'ffff));
|
||||
uint32_t msb = d64 >> 32;
|
||||
if (msb) {
|
||||
feed_hash(h, msb);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
30
hashers.cc
30
hashers.cc
@@ -29,7 +29,7 @@ template <typename T> struct hasher_traits;
|
||||
template <> struct hasher_traits<md5_hasher> { using impl_type = CryptoPP::Weak::MD5; };
|
||||
template <> struct hasher_traits<sha256_hasher> { using impl_type = CryptoPP::SHA256; };
|
||||
|
||||
template <typename T, size_t size> struct hasher<T, size>::impl {
|
||||
template <typename T, size_t size> struct cryptopp_hasher<T, size>::impl {
|
||||
using impl_type = typename hasher_traits<T>::impl_type;
|
||||
|
||||
impl_type hash{};
|
||||
@@ -53,35 +53,35 @@ template <typename T, size_t size> struct hasher<T, size>::impl {
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T, size_t size> hasher<T, size>::hasher() : _impl(std::make_unique<impl>()) {}
|
||||
template <typename T, size_t size> cryptopp_hasher<T, size>::cryptopp_hasher() : _impl(std::make_unique<impl>()) {}
|
||||
|
||||
template <typename T, size_t size> hasher<T, size>::~hasher() = default;
|
||||
template <typename T, size_t size> cryptopp_hasher<T, size>::~cryptopp_hasher() = default;
|
||||
|
||||
template <typename T, size_t size> hasher<T, size>::hasher(hasher&& o) noexcept = default;
|
||||
template <typename T, size_t size> cryptopp_hasher<T, size>::cryptopp_hasher(cryptopp_hasher&& o) noexcept = default;
|
||||
|
||||
template <typename T, size_t size> hasher<T, size>::hasher(const hasher& o) : _impl(std::make_unique<hasher<T, size>::impl>(*o._impl)) {}
|
||||
template <typename T, size_t size> cryptopp_hasher<T, size>::cryptopp_hasher(const cryptopp_hasher& o) : _impl(std::make_unique<cryptopp_hasher<T, size>::impl>(*o._impl)) {}
|
||||
|
||||
template <typename T, size_t size> hasher<T, size>& hasher<T, size>::operator=(hasher&& o) noexcept = default;
|
||||
template <typename T, size_t size> cryptopp_hasher<T, size>& cryptopp_hasher<T, size>::operator=(cryptopp_hasher&& o) noexcept = default;
|
||||
|
||||
template <typename T, size_t size> hasher<T, size>& hasher<T, size>::operator=(const hasher& o) {
|
||||
_impl = std::make_unique<hasher<T, size>::impl>(*o._impl);
|
||||
template <typename T, size_t size> cryptopp_hasher<T, size>& cryptopp_hasher<T, size>::operator=(const cryptopp_hasher& o) {
|
||||
_impl = std::make_unique<cryptopp_hasher<T, size>::impl>(*o._impl);
|
||||
return *this;
|
||||
}
|
||||
|
||||
template <typename T, size_t size> bytes hasher<T, size>::finalize() { return _impl->finalize(); }
|
||||
template <typename T, size_t size> bytes cryptopp_hasher<T, size>::finalize() { return _impl->finalize(); }
|
||||
|
||||
template <typename T, size_t size> std::array<uint8_t, size> hasher<T, size>::finalize_array() {
|
||||
template <typename T, size_t size> std::array<uint8_t, size> cryptopp_hasher<T, size>::finalize_array() {
|
||||
return _impl->finalize_array();
|
||||
}
|
||||
|
||||
template <typename T, size_t size> void hasher<T, size>::update(const char* ptr, size_t length) { _impl->update(ptr, length); }
|
||||
template <typename T, size_t size> void cryptopp_hasher<T, size>::update(const char* ptr, size_t length) { _impl->update(ptr, length); }
|
||||
|
||||
template <typename T, size_t size> bytes hasher<T, size>::calculate(const std::string_view& s) {
|
||||
typename hasher<T, size>::impl::impl_type hash;
|
||||
template <typename T, size_t size> bytes cryptopp_hasher<T, size>::calculate(const std::string_view& s) {
|
||||
typename cryptopp_hasher<T, size>::impl::impl_type hash;
|
||||
unsigned char digest[size];
|
||||
hash.CalculateDigest(digest, reinterpret_cast<const unsigned char*>(s.data()), s.size());
|
||||
return std::move(bytes{reinterpret_cast<const int8_t*>(digest), size});
|
||||
}
|
||||
|
||||
template class hasher<md5_hasher, 16>;
|
||||
template class hasher<sha256_hasher, 32>;
|
||||
template class cryptopp_hasher<md5_hasher, 16>;
|
||||
template class cryptopp_hasher<sha256_hasher, 32>;
|
||||
|
||||
21
hashers.hh
21
hashers.hh
@@ -22,29 +22,30 @@
|
||||
#pragma once
|
||||
|
||||
#include "bytes.hh"
|
||||
#include "hashing.hh"
|
||||
|
||||
class md5_hasher;
|
||||
|
||||
template <typename T, size_t size> class hasher {
|
||||
template <typename T, size_t size> class cryptopp_hasher : public hasher {
|
||||
struct impl;
|
||||
std::unique_ptr<impl> _impl;
|
||||
|
||||
public:
|
||||
hasher();
|
||||
~hasher();
|
||||
hasher(hasher&&) noexcept;
|
||||
hasher(const hasher&);
|
||||
hasher& operator=(hasher&&) noexcept;
|
||||
hasher& operator=(const hasher&);
|
||||
cryptopp_hasher();
|
||||
~cryptopp_hasher();
|
||||
cryptopp_hasher(cryptopp_hasher&&) noexcept;
|
||||
cryptopp_hasher(const cryptopp_hasher&);
|
||||
cryptopp_hasher& operator=(cryptopp_hasher&&) noexcept;
|
||||
cryptopp_hasher& operator=(const cryptopp_hasher&);
|
||||
|
||||
bytes finalize();
|
||||
std::array<uint8_t, size> finalize_array();
|
||||
void update(const char* ptr, size_t length);
|
||||
void update(const char* ptr, size_t length) override;
|
||||
|
||||
// Use update and finalize to compute the hash over the full view.
|
||||
static bytes calculate(const std::string_view& s);
|
||||
};
|
||||
|
||||
class md5_hasher : public hasher<md5_hasher, 16> {};
|
||||
class md5_hasher final : public cryptopp_hasher<md5_hasher, 16> {};
|
||||
|
||||
class sha256_hasher : public hasher<sha256_hasher, 32> {};
|
||||
class sha256_hasher final : public cryptopp_hasher<sha256_hasher, 32> {};
|
||||
|
||||
79
hashing.hh
79
hashing.hh
@@ -27,6 +27,7 @@
|
||||
#include <seastar/core/byteorder.hh>
|
||||
#include <seastar/core/sstring.hh>
|
||||
#include "seastarx.hh"
|
||||
#include <seastar/util/gcc6-concepts.hh>
|
||||
|
||||
//
|
||||
// This hashing differs from std::hash<> in that it decouples knowledge about
|
||||
@@ -41,24 +42,38 @@
|
||||
// appending_hash<T> is machine-independent.
|
||||
//
|
||||
|
||||
// The Hasher concept
|
||||
struct Hasher {
|
||||
void update(const char* ptr, size_t size);
|
||||
GCC6_CONCEPT(
|
||||
template<typename H>
|
||||
concept bool Hasher() {
|
||||
return requires(H& h, const char* ptr, size_t size) {
|
||||
{ h.update(ptr, size) } -> void
|
||||
};
|
||||
}
|
||||
)
|
||||
|
||||
class hasher {
|
||||
public:
|
||||
virtual ~hasher() = default;
|
||||
virtual void update(const char* ptr, size_t size) = 0;
|
||||
};
|
||||
|
||||
GCC6_CONCEPT(static_assert(Hasher<hasher>());)
|
||||
|
||||
template<typename T, typename Enable = void>
|
||||
struct appending_hash;
|
||||
|
||||
template<typename Hasher, typename T, typename... Args>
|
||||
template<typename H, typename T, typename... Args>
|
||||
GCC6_CONCEPT(requires Hasher<H>())
|
||||
inline
|
||||
void feed_hash(Hasher& h, const T& value, Args&&... args) {
|
||||
void feed_hash(H& h, const T& value, Args&&... args) {
|
||||
appending_hash<T>()(h, value, std::forward<Args>(args)...);
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
struct appending_hash<T, std::enable_if_t<std::is_arithmetic<T>::value>> {
|
||||
template<typename Hasher>
|
||||
void operator()(Hasher& h, T value) const {
|
||||
template<typename H>
|
||||
GCC6_CONCEPT(requires Hasher<H>())
|
||||
void operator()(H& h, T value) const {
|
||||
auto value_le = cpu_to_le(value);
|
||||
h.update(reinterpret_cast<const char*>(&value_le), sizeof(T));
|
||||
}
|
||||
@@ -66,24 +81,27 @@ struct appending_hash<T, std::enable_if_t<std::is_arithmetic<T>::value>> {
|
||||
|
||||
template<>
|
||||
struct appending_hash<bool> {
|
||||
template<typename Hasher>
|
||||
void operator()(Hasher& h, bool value) const {
|
||||
template<typename H>
|
||||
GCC6_CONCEPT(requires Hasher<H>())
|
||||
void operator()(H& h, bool value) const {
|
||||
feed_hash(h, static_cast<uint8_t>(value));
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
struct appending_hash<T, std::enable_if_t<std::is_enum<T>::value>> {
|
||||
template<typename Hasher>
|
||||
void operator()(Hasher& h, const T& value) const {
|
||||
template<typename H>
|
||||
GCC6_CONCEPT(requires Hasher<H>())
|
||||
void operator()(H& h, const T& value) const {
|
||||
feed_hash(h, static_cast<std::underlying_type_t<T>>(value));
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
struct appending_hash<std::optional<T>> {
|
||||
template<typename Hasher>
|
||||
void operator()(Hasher& h, const std::optional<T>& value) const {
|
||||
template<typename H>
|
||||
GCC6_CONCEPT(requires Hasher<H>())
|
||||
void operator()(H& h, const std::optional<T>& value) const {
|
||||
if (value) {
|
||||
feed_hash(h, true);
|
||||
feed_hash(h, *value);
|
||||
@@ -95,8 +113,9 @@ struct appending_hash<std::optional<T>> {
|
||||
|
||||
template<size_t N>
|
||||
struct appending_hash<char[N]> {
|
||||
template<typename Hasher>
|
||||
void operator()(Hasher& h, const char (&value) [N]) const {
|
||||
template<typename H>
|
||||
GCC6_CONCEPT(requires Hasher<H>())
|
||||
void operator()(H& h, const char (&value) [N]) const {
|
||||
feed_hash(h, N);
|
||||
h.update(value, N);
|
||||
}
|
||||
@@ -104,8 +123,9 @@ struct appending_hash<char[N]> {
|
||||
|
||||
template<typename T>
|
||||
struct appending_hash<std::vector<T>> {
|
||||
template<typename Hasher>
|
||||
void operator()(Hasher& h, const std::vector<T>& value) const {
|
||||
template<typename H>
|
||||
GCC6_CONCEPT(requires Hasher<H>())
|
||||
void operator()(H& h, const std::vector<T>& value) const {
|
||||
feed_hash(h, value.size());
|
||||
for (auto&& v : value) {
|
||||
appending_hash<T>()(h, v);
|
||||
@@ -115,8 +135,9 @@ struct appending_hash<std::vector<T>> {
|
||||
|
||||
template<typename K, typename V>
|
||||
struct appending_hash<std::map<K, V>> {
|
||||
template<typename Hasher>
|
||||
void operator()(Hasher& h, const std::map<K, V>& value) const {
|
||||
template<typename H>
|
||||
GCC6_CONCEPT(requires Hasher<H>())
|
||||
void operator()(H& h, const std::map<K, V>& value) const {
|
||||
feed_hash(h, value.size());
|
||||
for (auto&& e : value) {
|
||||
appending_hash<K>()(h, e.first);
|
||||
@@ -127,8 +148,9 @@ struct appending_hash<std::map<K, V>> {
|
||||
|
||||
template<>
|
||||
struct appending_hash<sstring> {
|
||||
template<typename Hasher>
|
||||
void operator()(Hasher& h, const sstring& v) const {
|
||||
template<typename H>
|
||||
GCC6_CONCEPT(requires Hasher<H>())
|
||||
void operator()(H& h, const sstring& v) const {
|
||||
feed_hash(h, v.size());
|
||||
h.update(reinterpret_cast<const char*>(v.cbegin()), v.size() * sizeof(sstring::value_type));
|
||||
}
|
||||
@@ -136,8 +158,9 @@ struct appending_hash<sstring> {
|
||||
|
||||
template<>
|
||||
struct appending_hash<std::string> {
|
||||
template<typename Hasher>
|
||||
void operator()(Hasher& h, const std::string& v) const {
|
||||
template<typename H>
|
||||
GCC6_CONCEPT(requires Hasher<H>())
|
||||
void operator()(H& h, const std::string& v) const {
|
||||
feed_hash(h, v.size());
|
||||
h.update(reinterpret_cast<const char*>(v.data()), v.size() * sizeof(std::string::value_type));
|
||||
}
|
||||
@@ -145,16 +168,18 @@ struct appending_hash<std::string> {
|
||||
|
||||
template<typename T, typename R>
|
||||
struct appending_hash<std::chrono::duration<T, R>> {
|
||||
template<typename Hasher>
|
||||
void operator()(Hasher& h, std::chrono::duration<T, R> v) const {
|
||||
template<typename H>
|
||||
GCC6_CONCEPT(requires Hasher<H>())
|
||||
void operator()(H& h, std::chrono::duration<T, R> v) const {
|
||||
feed_hash(h, v.count());
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Clock, typename Duration>
|
||||
struct appending_hash<std::chrono::time_point<Clock, Duration>> {
|
||||
template<typename Hasher>
|
||||
void operator()(Hasher& h, std::chrono::time_point<Clock, Duration> v) const {
|
||||
template<typename H>
|
||||
GCC6_CONCEPT(requires Hasher<H>())
|
||||
void operator()(H& h, std::chrono::time_point<Clock, Duration> v) const {
|
||||
feed_hash(h, v.time_since_epoch().count());
|
||||
}
|
||||
};
|
||||
|
||||
@@ -51,4 +51,10 @@ enum class stream_reason : uint8_t {
|
||||
repair,
|
||||
};
|
||||
|
||||
enum class stream_mutation_fragments_cmd : uint8_t {
|
||||
error,
|
||||
mutation_fragment_data,
|
||||
end_of_stream,
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
4
init.cc
4
init.cc
@@ -155,6 +155,10 @@ void init_ms_fd_gossiper(sharded<gms::gossiper>& gossiper
|
||||
to_string(seeds), listen_address_in, broadcast_address);
|
||||
throw std::runtime_error("Use broadcast_address for seeds list");
|
||||
}
|
||||
if ((!cfg.replace_address_first_boot().empty() || !cfg.replace_address().empty()) && seeds.count(broadcast_address)) {
|
||||
startlog.error("Bad configuration: replace-address and replace-address-first-boot are not allowed for seed nodes");
|
||||
throw bad_configuration_error();
|
||||
}
|
||||
gossiper.local().set_seeds(seeds);
|
||||
gossiper.invoke_on_all([cluster_name](gms::gossiper& g) {
|
||||
g.set_cluster_name(cluster_name);
|
||||
|
||||
34
install.sh
34
install.sh
@@ -75,6 +75,29 @@ while [ $# -gt 0 ]; do
|
||||
esac
|
||||
done
|
||||
|
||||
patchelf() {
|
||||
# patchelf comes from the build system, so it needs the build system's ld.so and
|
||||
# shared libraries. We can't use patchelf on patchelf itself, so invoke it via
|
||||
# ld.so.
|
||||
LD_LIBRARY_PATH="$PWD/libreloc" libreloc/ld.so libexec/patchelf "$@"
|
||||
}
|
||||
|
||||
adjust_bin() {
|
||||
local bin="$1"
|
||||
# We could add --set-rpath too, but then debugedit (called by rpmbuild) barfs
|
||||
# on the result. So use LD_LIBRARY_PATH in the thunk, below.
|
||||
patchelf \
|
||||
--set-interpreter "/opt/scylladb/libreloc/ld.so" \
|
||||
"$root/opt/scylladb/libexec/$bin"
|
||||
cat > "$root/opt/scylladb/bin/$bin" <<EOF
|
||||
#!/bin/bash -e
|
||||
export GNUTLS_SYSTEM_PRIORITY_FILE="\${GNUTLS_SYSTEM_PRIORITY_FILE-/opt/scylladb/libreloc/gnutls.config}"
|
||||
export LD_LIBRARY_PATH="/opt/scylladb/libreloc"
|
||||
exec -a "\$0" "/opt/scylladb/libexec/$bin" "\$@"
|
||||
EOF
|
||||
chmod +x "$root/opt/scylladb/bin/$bin"
|
||||
}
|
||||
|
||||
rprefix="$root/$prefix"
|
||||
retc="$root/etc"
|
||||
rdoc="$rprefix/share/doc"
|
||||
@@ -105,16 +128,13 @@ install -m644 dist/common/systemd/*.service -Dt "$rprefix"/lib/systemd/system
|
||||
install -m644 dist/common/systemd/*.timer -Dt "$rprefix"/lib/systemd/system
|
||||
install -m755 seastar/scripts/seastar-cpu-map.sh -Dt "$rprefix"/lib/scylla/
|
||||
install -m755 seastar/dpdk/usertools/dpdk-devbind.py -Dt "$rprefix"/lib/scylla/
|
||||
install -m755 bin/* -Dt "$root/opt/scylladb/bin"
|
||||
install -m755 libreloc/* -Dt "$root/opt/scylladb/libreloc"
|
||||
# some files in libexec are symlinks, which "install" dereferences
|
||||
# use cp -P for the symlinks instead.
|
||||
install -m755 libexec/*.bin -Dt "$root/opt/scylladb/libexec"
|
||||
for f in libexec/*; do
|
||||
if [[ "$f" != *.bin ]]; then
|
||||
cp -P "$f" "$root/opt/scylladb/libexec"
|
||||
fi
|
||||
install -m755 libexec/* -Dt "$root/opt/scylladb/libexec"
|
||||
for bin in libexec/*; do
|
||||
adjust_bin "${bin#libexec/}"
|
||||
done
|
||||
install -m755 libreloc/* -Dt "$root/opt/scylladb/libreloc"
|
||||
ln -srf "$root/opt/scylladb/bin/scylla" "$rprefix/bin/scylla"
|
||||
ln -srf "$root/opt/scylladb/bin/iotune" "$rprefix/bin/iotune"
|
||||
ln -srf "$rprefix/lib/scylla/scyllatop/scyllatop.py" "$rprefix/bin/scyllatop"
|
||||
|
||||
26
main.cc
26
main.cc
@@ -340,15 +340,7 @@ int main(int ac, char** av) {
|
||||
auto cfg = make_lw_shared<db::config>(ext);
|
||||
auto init = app.get_options_description().add_options();
|
||||
|
||||
// If --version is requested, print it out and exit immediately to avoid
|
||||
// Seastar-specific warnings that may occur when running the app
|
||||
init("version", bpo::bool_switch(), "print version number and exit");
|
||||
bpo::variables_map vm;
|
||||
bpo::store(bpo::command_line_parser(ac, av).options(app.get_options_description()).allow_unregistered().run(), vm);
|
||||
if (vm["version"].as<bool>()) {
|
||||
fmt::print("{}\n", scylla_version());
|
||||
return 0;
|
||||
}
|
||||
|
||||
bpo::options_description deprecated("Deprecated options - ignored");
|
||||
deprecated.add_options()
|
||||
@@ -362,6 +354,15 @@ int main(int ac, char** av) {
|
||||
configurable::append_all(*cfg, init);
|
||||
cfg->add_options(init);
|
||||
|
||||
// If --version is requested, print it out and exit immediately to avoid
|
||||
// Seastar-specific warnings that may occur when running the app
|
||||
bpo::variables_map vm;
|
||||
bpo::store(bpo::command_line_parser(ac, av).options(app.get_options_description()).allow_unregistered().run(), vm);
|
||||
if (vm["version"].as<bool>()) {
|
||||
fmt::print("{}\n", scylla_version());
|
||||
return 0;
|
||||
}
|
||||
|
||||
distributed<database> db;
|
||||
seastar::sharded<service::cache_hitrate_calculator> cf_cache_hitrate_calculator;
|
||||
debug::db = &db;
|
||||
@@ -526,6 +527,9 @@ int main(int ac, char** av) {
|
||||
if (opts.count("developer-mode")) {
|
||||
smp::invoke_on_all([] { engine().set_strict_dma(false); }).get();
|
||||
}
|
||||
|
||||
set_abort_on_internal_error(cfg->abort_on_internal_error());
|
||||
|
||||
supervisor::notify("creating tracing");
|
||||
tracing::backend_registry tracing_backend_registry;
|
||||
tracing::register_tracing_keyspace_backend(tracing_backend_registry);
|
||||
@@ -916,8 +920,10 @@ int main(int ac, char** av) {
|
||||
service::get_local_storage_service().drain_on_shutdown().get();
|
||||
});
|
||||
|
||||
auto stop_view_builder = defer([] {
|
||||
view_builder.stop().get();
|
||||
auto stop_view_builder = defer([cfg] {
|
||||
if (cfg->view_building()) {
|
||||
view_builder.stop().get();
|
||||
}
|
||||
});
|
||||
|
||||
auto stop_compaction_manager = defer([&db] {
|
||||
|
||||
@@ -89,6 +89,7 @@
|
||||
#include "frozen_mutation.hh"
|
||||
#include "flat_mutation_reader.hh"
|
||||
#include "streaming/stream_manager.hh"
|
||||
#include "streaming/stream_mutation_fragments_cmd.hh"
|
||||
|
||||
namespace netw {
|
||||
|
||||
@@ -287,7 +288,6 @@ void messaging_service::start_listen() {
|
||||
if (_compress_what != compress_what::none) {
|
||||
so.compressor_factory = &compressor_factory;
|
||||
}
|
||||
so.streaming_domain = rpc::streaming_domain_type(0x55AA);
|
||||
so.load_balancing_algorithm = server_socket::load_balancing_algorithm::port;
|
||||
|
||||
// FIXME: we don't set so.tcp_nodelay, because we can't tell at this point whether the connection will come from a
|
||||
@@ -295,19 +295,21 @@ void messaging_service::start_listen() {
|
||||
// the first by wrapping its server_socket, but not the second.
|
||||
auto limits = rpc_resource_limits(_mcfg.rpc_memory_limit);
|
||||
if (!_server[0]) {
|
||||
auto listen = [&] (const gms::inet_address& a) {
|
||||
auto listen = [&] (const gms::inet_address& a, rpc::streaming_domain_type sdomain) {
|
||||
so.streaming_domain = sdomain;
|
||||
auto addr = ipv4_addr{a.raw_addr(), _port};
|
||||
return std::unique_ptr<rpc_protocol_server_wrapper>(new rpc_protocol_server_wrapper(*_rpc,
|
||||
so, addr, limits));
|
||||
};
|
||||
_server[0] = listen(_listen_address);
|
||||
_server[0] = listen(_listen_address, rpc::streaming_domain_type(0x55AA));
|
||||
if (listen_to_bc) {
|
||||
_server[1] = listen(utils::fb_utilities::get_broadcast_address());
|
||||
_server[1] = listen(utils::fb_utilities::get_broadcast_address(), rpc::streaming_domain_type(0x66BB));
|
||||
}
|
||||
}
|
||||
|
||||
if (!_server_tls[0]) {
|
||||
auto listen = [&] (const gms::inet_address& a) {
|
||||
auto listen = [&] (const gms::inet_address& a, rpc::streaming_domain_type sdomain) {
|
||||
so.streaming_domain = sdomain;
|
||||
return std::unique_ptr<rpc_protocol_server_wrapper>(
|
||||
[this, &so, &a, limits] () -> std::unique_ptr<rpc_protocol_server_wrapper>{
|
||||
if (_encrypt_what == encrypt_what::none) {
|
||||
@@ -321,9 +323,9 @@ void messaging_service::start_listen() {
|
||||
so, seastar::tls::listen(_credentials, addr, lo), limits);
|
||||
}());
|
||||
};
|
||||
_server_tls[0] = listen(_listen_address);
|
||||
_server_tls[0] = listen(_listen_address, rpc::streaming_domain_type(0x77CC));
|
||||
if (listen_to_bc) {
|
||||
_server_tls[1] = listen(utils::fb_utilities::get_broadcast_address());
|
||||
_server_tls[1] = listen(utils::fb_utilities::get_broadcast_address(), rpc::streaming_domain_type(0x88DD));
|
||||
}
|
||||
}
|
||||
// Do this on just cpu 0, to avoid duplicate logs.
|
||||
@@ -607,6 +609,7 @@ shared_ptr<messaging_service::rpc_protocol_client_wrapper> messaging_service::ge
|
||||
opts.compressor_factory = &compressor_factory;
|
||||
}
|
||||
opts.tcp_nodelay = must_tcp_nodelay;
|
||||
opts.reuseaddr = true;
|
||||
|
||||
auto client = must_encrypt ?
|
||||
::make_shared<rpc_protocol_client_wrapper>(*_rpc, std::move(opts),
|
||||
@@ -668,24 +671,24 @@ std::unique_ptr<messaging_service::rpc_protocol_wrapper>& messaging_service::rpc
|
||||
return _rpc;
|
||||
}
|
||||
|
||||
rpc::sink<int32_t> messaging_service::make_sink_for_stream_mutation_fragments(rpc::source<frozen_mutation_fragment>& source) {
|
||||
rpc::sink<int32_t> messaging_service::make_sink_for_stream_mutation_fragments(rpc::source<frozen_mutation_fragment, rpc::optional<streaming::stream_mutation_fragments_cmd>>& source) {
|
||||
return source.make_sink<netw::serializer, int32_t>();
|
||||
}
|
||||
|
||||
future<rpc::sink<frozen_mutation_fragment>, rpc::source<int32_t>>
|
||||
future<rpc::sink<frozen_mutation_fragment, streaming::stream_mutation_fragments_cmd>, rpc::source<int32_t>>
|
||||
messaging_service::make_sink_and_source_for_stream_mutation_fragments(utils::UUID schema_id, utils::UUID plan_id, utils::UUID cf_id, uint64_t estimated_partitions, streaming::stream_reason reason, msg_addr id) {
|
||||
auto rpc_client = get_rpc_client(messaging_verb::STREAM_MUTATION_FRAGMENTS, id);
|
||||
return rpc_client->make_stream_sink<netw::serializer, frozen_mutation_fragment>().then([this, plan_id, schema_id, cf_id, estimated_partitions, reason, rpc_client] (rpc::sink<frozen_mutation_fragment> sink) mutable {
|
||||
auto rpc_handler = rpc()->make_client<rpc::source<int32_t> (utils::UUID, utils::UUID, utils::UUID, uint64_t, streaming::stream_reason, rpc::sink<frozen_mutation_fragment>)>(messaging_verb::STREAM_MUTATION_FRAGMENTS);
|
||||
return rpc_client->make_stream_sink<netw::serializer, frozen_mutation_fragment, streaming::stream_mutation_fragments_cmd>().then([this, plan_id, schema_id, cf_id, estimated_partitions, reason, rpc_client] (rpc::sink<frozen_mutation_fragment, streaming::stream_mutation_fragments_cmd> sink) mutable {
|
||||
auto rpc_handler = rpc()->make_client<rpc::source<int32_t> (utils::UUID, utils::UUID, utils::UUID, uint64_t, streaming::stream_reason, rpc::sink<frozen_mutation_fragment, streaming::stream_mutation_fragments_cmd>)>(messaging_verb::STREAM_MUTATION_FRAGMENTS);
|
||||
return rpc_handler(*rpc_client , plan_id, schema_id, cf_id, estimated_partitions, reason, sink).then_wrapped([sink, rpc_client] (future<rpc::source<int32_t>> source) mutable {
|
||||
return (source.failed() ? sink.close() : make_ready_future<>()).then([sink = std::move(sink), source = std::move(source)] () mutable {
|
||||
return make_ready_future<rpc::sink<frozen_mutation_fragment>, rpc::source<int32_t>>(std::move(sink), std::move(source.get0()));
|
||||
return make_ready_future<rpc::sink<frozen_mutation_fragment, streaming::stream_mutation_fragments_cmd>, rpc::source<int32_t>>(std::move(sink), std::move(source.get0()));
|
||||
});
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
void messaging_service::register_stream_mutation_fragments(std::function<future<rpc::sink<int32_t>> (const rpc::client_info& cinfo, UUID plan_id, UUID schema_id, UUID cf_id, uint64_t estimated_partitions, rpc::optional<streaming::stream_reason>, rpc::source<frozen_mutation_fragment> source)>&& func) {
|
||||
void messaging_service::register_stream_mutation_fragments(std::function<future<rpc::sink<int32_t>> (const rpc::client_info& cinfo, UUID plan_id, UUID schema_id, UUID cf_id, uint64_t estimated_partitions, rpc::optional<streaming::stream_reason>, rpc::source<frozen_mutation_fragment, rpc::optional<streaming::stream_mutation_fragments_cmd>> source)>&& func) {
|
||||
register_handler(this, messaging_verb::STREAM_MUTATION_FRAGMENTS, std::move(func));
|
||||
}
|
||||
|
||||
@@ -1077,14 +1080,14 @@ future<> messaging_service::send_repair_put_row_diff(msg_addr id, uint32_t repai
|
||||
}
|
||||
|
||||
// Wrapper for REPAIR_ROW_LEVEL_START
|
||||
void messaging_service::register_repair_row_level_start(std::function<future<> (const rpc::client_info& cinfo, uint32_t repair_meta_id, sstring keyspace_name, sstring cf_name, dht::token_range range, row_level_diff_detect_algorithm algo, uint64_t max_row_buf_size, uint64_t seed, unsigned remote_shard, unsigned remote_shard_count, unsigned remote_ignore_msb, sstring remote_partitioner_name)>&& func) {
|
||||
void messaging_service::register_repair_row_level_start(std::function<future<> (const rpc::client_info& cinfo, uint32_t repair_meta_id, sstring keyspace_name, sstring cf_name, dht::token_range range, row_level_diff_detect_algorithm algo, uint64_t max_row_buf_size, uint64_t seed, unsigned remote_shard, unsigned remote_shard_count, unsigned remote_ignore_msb, sstring remote_partitioner_name, table_schema_version schema_version)>&& func) {
|
||||
register_handler(this, messaging_verb::REPAIR_ROW_LEVEL_START, std::move(func));
|
||||
}
|
||||
void messaging_service::unregister_repair_row_level_start() {
|
||||
_rpc->unregister_handler(messaging_verb::REPAIR_ROW_LEVEL_START);
|
||||
}
|
||||
future<> messaging_service::send_repair_row_level_start(msg_addr id, uint32_t repair_meta_id, sstring keyspace_name, sstring cf_name, dht::token_range range, row_level_diff_detect_algorithm algo, uint64_t max_row_buf_size, uint64_t seed, unsigned remote_shard, unsigned remote_shard_count, unsigned remote_ignore_msb, sstring remote_partitioner_name) {
|
||||
return send_message<void>(this, messaging_verb::REPAIR_ROW_LEVEL_START, std::move(id), repair_meta_id, std::move(keyspace_name), std::move(cf_name), std::move(range), algo, max_row_buf_size, seed, remote_shard, remote_shard_count, remote_ignore_msb, std::move(remote_partitioner_name));
|
||||
future<> messaging_service::send_repair_row_level_start(msg_addr id, uint32_t repair_meta_id, sstring keyspace_name, sstring cf_name, dht::token_range range, row_level_diff_detect_algorithm algo, uint64_t max_row_buf_size, uint64_t seed, unsigned remote_shard, unsigned remote_shard_count, unsigned remote_ignore_msb, sstring remote_partitioner_name, table_schema_version schema_version) {
|
||||
return send_message<void>(this, messaging_verb::REPAIR_ROW_LEVEL_START, std::move(id), repair_meta_id, std::move(keyspace_name), std::move(cf_name), std::move(range), algo, max_row_buf_size, seed, remote_shard, remote_shard_count, remote_ignore_msb, std::move(remote_partitioner_name), std::move(schema_version));
|
||||
}
|
||||
|
||||
// Wrapper for REPAIR_ROW_LEVEL_STOP
|
||||
|
||||
@@ -36,6 +36,7 @@
|
||||
#include "tracing/tracing.hh"
|
||||
#include "digest_algorithm.hh"
|
||||
#include "streaming/stream_reason.hh"
|
||||
#include "streaming/stream_mutation_fragments_cmd.hh"
|
||||
#include "cache_temperature.hh"
|
||||
|
||||
#include <list>
|
||||
@@ -270,9 +271,9 @@ public:
|
||||
|
||||
// Wrapper for STREAM_MUTATION_FRAGMENTS
|
||||
// The receiver of STREAM_MUTATION_FRAGMENTS sends status code to the sender to notify any error on the receiver side. The status code is of type int32_t. 0 means successful, -1 means error, other status code value are reserved for future use.
|
||||
void register_stream_mutation_fragments(std::function<future<rpc::sink<int32_t>> (const rpc::client_info& cinfo, UUID plan_id, UUID schema_id, UUID cf_id, uint64_t estimated_partitions, rpc::optional<streaming::stream_reason> reason_opt, rpc::source<frozen_mutation_fragment> source)>&& func);
|
||||
rpc::sink<int32_t> make_sink_for_stream_mutation_fragments(rpc::source<frozen_mutation_fragment>& source);
|
||||
future<rpc::sink<frozen_mutation_fragment>, rpc::source<int32_t>> make_sink_and_source_for_stream_mutation_fragments(utils::UUID schema_id, utils::UUID plan_id, utils::UUID cf_id, uint64_t estimated_partitions, streaming::stream_reason reason, msg_addr id);
|
||||
void register_stream_mutation_fragments(std::function<future<rpc::sink<int32_t>> (const rpc::client_info& cinfo, UUID plan_id, UUID schema_id, UUID cf_id, uint64_t estimated_partitions, rpc::optional<streaming::stream_reason> reason_opt, rpc::source<frozen_mutation_fragment, rpc::optional<streaming::stream_mutation_fragments_cmd>> source)>&& func);
|
||||
rpc::sink<int32_t> make_sink_for_stream_mutation_fragments(rpc::source<frozen_mutation_fragment, rpc::optional<streaming::stream_mutation_fragments_cmd>>& source);
|
||||
future<rpc::sink<frozen_mutation_fragment, streaming::stream_mutation_fragments_cmd>, rpc::source<int32_t>> make_sink_and_source_for_stream_mutation_fragments(utils::UUID schema_id, utils::UUID plan_id, utils::UUID cf_id, uint64_t estimated_partitions, streaming::stream_reason reason, msg_addr id);
|
||||
|
||||
void register_stream_mutation_done(std::function<future<> (const rpc::client_info& cinfo, UUID plan_id, dht::token_range_vector ranges, UUID cf_id, unsigned dst_cpu_id)>&& func);
|
||||
future<> send_stream_mutation_done(msg_addr id, UUID plan_id, dht::token_range_vector ranges, UUID cf_id, unsigned dst_cpu_id);
|
||||
@@ -311,9 +312,9 @@ public:
|
||||
future<> send_repair_put_row_diff(msg_addr id, uint32_t repair_meta_id, repair_rows_on_wire row_diff);
|
||||
|
||||
// Wrapper for REPAIR_ROW_LEVEL_START
|
||||
void register_repair_row_level_start(std::function<future<> (const rpc::client_info& cinfo, uint32_t repair_meta_id, sstring keyspace_name, sstring cf_name, dht::token_range range, row_level_diff_detect_algorithm algo, uint64_t max_row_buf_size, uint64_t seed, unsigned remote_shard, unsigned remote_shard_count, unsigned remote_ignore_msb, sstring remote_partitioner_name)>&& func);
|
||||
void register_repair_row_level_start(std::function<future<> (const rpc::client_info& cinfo, uint32_t repair_meta_id, sstring keyspace_name, sstring cf_name, dht::token_range range, row_level_diff_detect_algorithm algo, uint64_t max_row_buf_size, uint64_t seed, unsigned remote_shard, unsigned remote_shard_count, unsigned remote_ignore_msb, sstring remote_partitioner_name, table_schema_version schema_version)>&& func);
|
||||
void unregister_repair_row_level_start();
|
||||
future<> send_repair_row_level_start(msg_addr id, uint32_t repair_meta_id, sstring keyspace_name, sstring cf_name, dht::token_range range, row_level_diff_detect_algorithm algo, uint64_t max_row_buf_size, uint64_t seed, unsigned remote_shard, unsigned remote_shard_count, unsigned remote_ignore_msb, sstring remote_partitioner_name);
|
||||
future<> send_repair_row_level_start(msg_addr id, uint32_t repair_meta_id, sstring keyspace_name, sstring cf_name, dht::token_range range, row_level_diff_detect_algorithm algo, uint64_t max_row_buf_size, uint64_t seed, unsigned remote_shard, unsigned remote_shard_count, unsigned remote_ignore_msb, sstring remote_partitioner_name, table_schema_version schema_version);
|
||||
|
||||
// Wrapper for REPAIR_ROW_LEVEL_STOP
|
||||
void register_repair_row_level_stop(std::function<future<> (const rpc::client_info& cinfo, uint32_t repair_meta_id, sstring keyspace_name, sstring cf_name, dht::token_range range)>&& func);
|
||||
|
||||
@@ -145,7 +145,14 @@ mutation_partition::mutation_partition(const schema& s, const mutation_partition
|
||||
, _static_row(s, column_kind::static_column, x._static_row)
|
||||
, _static_row_continuous(x._static_row_continuous)
|
||||
, _rows()
|
||||
, _row_tombstones(x._row_tombstones) {
|
||||
, _row_tombstones(x._row_tombstones)
|
||||
#ifdef SEASTAR_DEBUG
|
||||
, _schema_version(s.version())
|
||||
#endif
|
||||
{
|
||||
#ifdef SEASTAR_DEBUG
|
||||
assert(x._schema_version == _schema_version);
|
||||
#endif
|
||||
auto cloner = [&s] (const auto& x) {
|
||||
return current_allocator().construct<rows_entry>(s, x);
|
||||
};
|
||||
@@ -158,7 +165,14 @@ mutation_partition::mutation_partition(const mutation_partition& x, const schema
|
||||
, _static_row(schema, column_kind::static_column, x._static_row)
|
||||
, _static_row_continuous(x._static_row_continuous)
|
||||
, _rows()
|
||||
, _row_tombstones(x._row_tombstones, range_tombstone_list::copy_comparator_only()) {
|
||||
, _row_tombstones(x._row_tombstones, range_tombstone_list::copy_comparator_only())
|
||||
#ifdef SEASTAR_DEBUG
|
||||
, _schema_version(schema.version())
|
||||
#endif
|
||||
{
|
||||
#ifdef SEASTAR_DEBUG
|
||||
assert(x._schema_version == _schema_version);
|
||||
#endif
|
||||
try {
|
||||
for(auto&& r : ck_ranges) {
|
||||
for (const rows_entry& e : x.range(schema, r)) {
|
||||
@@ -181,7 +195,13 @@ mutation_partition::mutation_partition(mutation_partition&& x, const schema& sch
|
||||
, _static_row_continuous(x._static_row_continuous)
|
||||
, _rows(std::move(x._rows))
|
||||
, _row_tombstones(std::move(x._row_tombstones))
|
||||
#ifdef SEASTAR_DEBUG
|
||||
, _schema_version(schema.version())
|
||||
#endif
|
||||
{
|
||||
#ifdef SEASTAR_DEBUG
|
||||
assert(x._schema_version == _schema_version);
|
||||
#endif
|
||||
{
|
||||
auto deleter = current_deleter<rows_entry>();
|
||||
auto it = _rows.begin();
|
||||
@@ -221,6 +241,7 @@ mutation_partition::operator=(mutation_partition&& x) noexcept {
|
||||
}
|
||||
|
||||
void mutation_partition::ensure_last_dummy(const schema& s) {
|
||||
check_schema(s);
|
||||
if (_rows.empty() || !_rows.rbegin()->is_last_dummy()) {
|
||||
_rows.insert_before(_rows.end(),
|
||||
*current_allocator().construct<rows_entry>(s, rows_entry::last_dummy_tag(), is_continuous::yes));
|
||||
@@ -277,11 +298,16 @@ void deletable_row::apply(const schema& s, clustering_row cr) {
|
||||
|
||||
void
|
||||
mutation_partition::apply(const schema& s, const mutation_fragment& mf) {
|
||||
check_schema(s);
|
||||
mutation_fragment_applier applier{s, *this};
|
||||
mf.visit(applier);
|
||||
}
|
||||
|
||||
stop_iteration mutation_partition::apply_monotonically(const schema& s, mutation_partition&& p, cache_tracker* tracker, is_preemptible preemptible) {
|
||||
#ifdef SEASTAR_DEBUG
|
||||
assert(s.version() == _schema_version);
|
||||
assert(p._schema_version == _schema_version);
|
||||
#endif
|
||||
_tombstone.apply(p._tombstone);
|
||||
_static_row.apply_monotonically(s, column_kind::static_column, std::move(p._static_row));
|
||||
_static_row_continuous |= p._static_row_continuous;
|
||||
@@ -387,6 +413,7 @@ void mutation_partition::apply_weak(const schema& s, mutation_partition&& p) {
|
||||
|
||||
tombstone
|
||||
mutation_partition::range_tombstone_for_row(const schema& schema, const clustering_key& key) const {
|
||||
check_schema(schema);
|
||||
tombstone t = _tombstone;
|
||||
if (!_row_tombstones.empty()) {
|
||||
auto found = _row_tombstones.search_tombstone_covering(schema, key);
|
||||
@@ -397,6 +424,7 @@ mutation_partition::range_tombstone_for_row(const schema& schema, const clusteri
|
||||
|
||||
row_tombstone
|
||||
mutation_partition::tombstone_for_row(const schema& schema, const clustering_key& key) const {
|
||||
check_schema(schema);
|
||||
row_tombstone t = row_tombstone(range_tombstone_for_row(schema, key));
|
||||
|
||||
auto j = _rows.find(key, rows_entry::compare(schema));
|
||||
@@ -409,6 +437,7 @@ mutation_partition::tombstone_for_row(const schema& schema, const clustering_key
|
||||
|
||||
row_tombstone
|
||||
mutation_partition::tombstone_for_row(const schema& schema, const rows_entry& e) const {
|
||||
check_schema(schema);
|
||||
row_tombstone t = e.row().deleted_at();
|
||||
t.apply(range_tombstone_for_row(schema, e.key()));
|
||||
return t;
|
||||
@@ -416,6 +445,7 @@ mutation_partition::tombstone_for_row(const schema& schema, const rows_entry& e)
|
||||
|
||||
void
|
||||
mutation_partition::apply_row_tombstone(const schema& schema, clustering_key_prefix prefix, tombstone t) {
|
||||
check_schema(schema);
|
||||
assert(!prefix.is_full(schema));
|
||||
auto start = prefix;
|
||||
_row_tombstones.apply(schema, {std::move(start), std::move(prefix), std::move(t)});
|
||||
@@ -423,11 +453,13 @@ mutation_partition::apply_row_tombstone(const schema& schema, clustering_key_pre
|
||||
|
||||
void
|
||||
mutation_partition::apply_row_tombstone(const schema& schema, range_tombstone rt) {
|
||||
check_schema(schema);
|
||||
_row_tombstones.apply(schema, std::move(rt));
|
||||
}
|
||||
|
||||
void
|
||||
mutation_partition::apply_delete(const schema& schema, const clustering_key_prefix& prefix, tombstone t) {
|
||||
check_schema(schema);
|
||||
if (prefix.is_empty(schema)) {
|
||||
apply(t);
|
||||
} else if (prefix.is_full(schema)) {
|
||||
@@ -439,6 +471,7 @@ mutation_partition::apply_delete(const schema& schema, const clustering_key_pref
|
||||
|
||||
void
|
||||
mutation_partition::apply_delete(const schema& schema, range_tombstone rt) {
|
||||
check_schema(schema);
|
||||
if (range_tombstone::is_single_clustering_row_tombstone(schema, rt.start, rt.start_kind, rt.end, rt.end_kind)) {
|
||||
apply_delete(schema, std::move(rt.start), std::move(rt.tomb));
|
||||
return;
|
||||
@@ -448,6 +481,7 @@ mutation_partition::apply_delete(const schema& schema, range_tombstone rt) {
|
||||
|
||||
void
|
||||
mutation_partition::apply_delete(const schema& schema, clustering_key&& prefix, tombstone t) {
|
||||
check_schema(schema);
|
||||
if (prefix.is_empty(schema)) {
|
||||
apply(t);
|
||||
} else if (prefix.is_full(schema)) {
|
||||
@@ -459,6 +493,7 @@ mutation_partition::apply_delete(const schema& schema, clustering_key&& prefix,
|
||||
|
||||
void
|
||||
mutation_partition::apply_delete(const schema& schema, clustering_key_prefix_view prefix, tombstone t) {
|
||||
check_schema(schema);
|
||||
if (prefix.is_empty(schema)) {
|
||||
apply(t);
|
||||
} else if (prefix.is_full(schema)) {
|
||||
@@ -484,6 +519,7 @@ void mutation_partition::insert_row(const schema& s, const clustering_key& key,
|
||||
}
|
||||
|
||||
void mutation_partition::insert_row(const schema& s, const clustering_key& key, const deletable_row& row) {
|
||||
check_schema(s);
|
||||
auto e = alloc_strategy_unique_ptr<rows_entry>(
|
||||
current_allocator().construct<rows_entry>(s, key, row));
|
||||
_rows.insert(_rows.end(), *e, rows_entry::compare(s));
|
||||
@@ -492,6 +528,7 @@ void mutation_partition::insert_row(const schema& s, const clustering_key& key,
|
||||
|
||||
const row*
|
||||
mutation_partition::find_row(const schema& s, const clustering_key& key) const {
|
||||
check_schema(s);
|
||||
auto i = _rows.find(key, rows_entry::compare(s));
|
||||
if (i == _rows.end()) {
|
||||
return nullptr;
|
||||
@@ -501,6 +538,7 @@ mutation_partition::find_row(const schema& s, const clustering_key& key) const {
|
||||
|
||||
deletable_row&
|
||||
mutation_partition::clustered_row(const schema& s, clustering_key&& key) {
|
||||
check_schema(s);
|
||||
auto i = _rows.find(key, rows_entry::compare(s));
|
||||
if (i == _rows.end()) {
|
||||
auto e = alloc_strategy_unique_ptr<rows_entry>(
|
||||
@@ -513,6 +551,7 @@ mutation_partition::clustered_row(const schema& s, clustering_key&& key) {
|
||||
|
||||
deletable_row&
|
||||
mutation_partition::clustered_row(const schema& s, const clustering_key& key) {
|
||||
check_schema(s);
|
||||
auto i = _rows.find(key, rows_entry::compare(s));
|
||||
if (i == _rows.end()) {
|
||||
auto e = alloc_strategy_unique_ptr<rows_entry>(
|
||||
@@ -525,6 +564,7 @@ mutation_partition::clustered_row(const schema& s, const clustering_key& key) {
|
||||
|
||||
deletable_row&
|
||||
mutation_partition::clustered_row(const schema& s, clustering_key_view key) {
|
||||
check_schema(s);
|
||||
auto i = _rows.find(key, rows_entry::compare(s));
|
||||
if (i == _rows.end()) {
|
||||
auto e = alloc_strategy_unique_ptr<rows_entry>(
|
||||
@@ -537,6 +577,7 @@ mutation_partition::clustered_row(const schema& s, clustering_key_view key) {
|
||||
|
||||
deletable_row&
|
||||
mutation_partition::clustered_row(const schema& s, position_in_partition_view pos, is_dummy dummy, is_continuous continuous) {
|
||||
check_schema(s);
|
||||
auto i = _rows.find(pos, rows_entry::compare(s));
|
||||
if (i == _rows.end()) {
|
||||
auto e = alloc_strategy_unique_ptr<rows_entry>(
|
||||
@@ -549,6 +590,7 @@ mutation_partition::clustered_row(const schema& s, position_in_partition_view po
|
||||
|
||||
mutation_partition::rows_type::const_iterator
|
||||
mutation_partition::lower_bound(const schema& schema, const query::clustering_range& r) const {
|
||||
check_schema(schema);
|
||||
if (!r.start()) {
|
||||
return std::cbegin(_rows);
|
||||
}
|
||||
@@ -557,6 +599,7 @@ mutation_partition::lower_bound(const schema& schema, const query::clustering_ra
|
||||
|
||||
mutation_partition::rows_type::const_iterator
|
||||
mutation_partition::upper_bound(const schema& schema, const query::clustering_range& r) const {
|
||||
check_schema(schema);
|
||||
if (!r.end()) {
|
||||
return std::cend(_rows);
|
||||
}
|
||||
@@ -565,6 +608,7 @@ mutation_partition::upper_bound(const schema& schema, const query::clustering_ra
|
||||
|
||||
boost::iterator_range<mutation_partition::rows_type::const_iterator>
|
||||
mutation_partition::range(const schema& schema, const query::clustering_range& r) const {
|
||||
check_schema(schema);
|
||||
return boost::make_iterator_range(lower_bound(schema, r), upper_bound(schema, r));
|
||||
}
|
||||
|
||||
@@ -601,6 +645,7 @@ mutation_partition::upper_bound(const schema& schema, const query::clustering_ra
|
||||
template<typename Func>
|
||||
void mutation_partition::for_each_row(const schema& schema, const query::clustering_range& row_range, bool reversed, Func&& func) const
|
||||
{
|
||||
check_schema(schema);
|
||||
auto r = range(schema, row_range);
|
||||
if (!reversed) {
|
||||
for (const auto& e : r) {
|
||||
@@ -817,6 +862,7 @@ bool has_any_live_data(const schema& s, column_kind kind, const row& cells, tomb
|
||||
|
||||
void
|
||||
mutation_partition::query_compacted(query::result::partition_writer& pw, const schema& s, uint32_t limit) const {
|
||||
check_schema(s);
|
||||
const query::partition_slice& slice = pw.slice();
|
||||
max_timestamp max_ts{pw.last_modified()};
|
||||
|
||||
@@ -1049,6 +1095,10 @@ bool mutation_partition::equal(const schema& s, const mutation_partition& p) con
|
||||
}
|
||||
|
||||
bool mutation_partition::equal(const schema& this_schema, const mutation_partition& p, const schema& p_schema) const {
|
||||
#ifdef SEASTAR_DEBUG
|
||||
assert(_schema_version == this_schema.version());
|
||||
assert(p._schema_version == p_schema.version());
|
||||
#endif
|
||||
if (_tombstone != p._tombstone) {
|
||||
return false;
|
||||
}
|
||||
@@ -1177,6 +1227,7 @@ row::apply_monotonically(const column_definition& column, atomic_cell_or_collect
|
||||
void
|
||||
row::append_cell(column_id id, atomic_cell_or_collection value) {
|
||||
if (_type == storage_type::vector && id < max_vector_size) {
|
||||
assert(_storage.vector.v.size() <= id);
|
||||
_storage.vector.v.resize(id);
|
||||
_storage.vector.v.emplace_back(cell_and_hash{std::move(value), cell_hash_opt()});
|
||||
_storage.vector.present.set(id);
|
||||
@@ -1241,6 +1292,7 @@ size_t rows_entry::memory_usage(const schema& s) const {
|
||||
}
|
||||
|
||||
size_t mutation_partition::external_memory_usage(const schema& s) const {
|
||||
check_schema(s);
|
||||
size_t sum = 0;
|
||||
sum += static_row().external_memory_usage(s, column_kind::static_column);
|
||||
for (auto& clr : clustered_rows()) {
|
||||
@@ -1259,6 +1311,7 @@ void mutation_partition::trim_rows(const schema& s,
|
||||
const std::vector<query::clustering_range>& row_ranges,
|
||||
Func&& func)
|
||||
{
|
||||
check_schema(s);
|
||||
static_assert(std::is_same<stop_iteration, std::result_of_t<Func(rows_entry&)>>::value, "Bad func signature");
|
||||
|
||||
stop_iteration stop = stop_iteration::no;
|
||||
@@ -1303,6 +1356,7 @@ uint32_t mutation_partition::do_compact(const schema& s,
|
||||
uint32_t row_limit,
|
||||
can_gc_fn& can_gc)
|
||||
{
|
||||
check_schema(s);
|
||||
assert(row_limit > 0);
|
||||
|
||||
auto gc_before = saturating_subtract(query_time, s.gc_grace_seconds());
|
||||
@@ -1368,12 +1422,14 @@ mutation_partition::compact_for_query(
|
||||
bool reverse,
|
||||
uint32_t row_limit)
|
||||
{
|
||||
check_schema(s);
|
||||
return do_compact(s, query_time, row_ranges, reverse, row_limit, always_gc);
|
||||
}
|
||||
|
||||
void mutation_partition::compact_for_compaction(const schema& s,
|
||||
can_gc_fn& can_gc, gc_clock::time_point compaction_time)
|
||||
{
|
||||
check_schema(s);
|
||||
static const std::vector<query::clustering_range> all_rows = {
|
||||
query::clustering_range::make_open_ended_both_sides()
|
||||
};
|
||||
@@ -1407,11 +1463,13 @@ row::is_live(const schema& s, column_kind kind, tombstone base_tombstone, gc_clo
|
||||
|
||||
bool
|
||||
mutation_partition::is_static_row_live(const schema& s, gc_clock::time_point query_time) const {
|
||||
check_schema(s);
|
||||
return has_any_live_data(s, column_kind::static_column, static_row(), _tombstone, query_time);
|
||||
}
|
||||
|
||||
size_t
|
||||
mutation_partition::live_row_count(const schema& s, gc_clock::time_point query_time) const {
|
||||
check_schema(s);
|
||||
size_t count = 0;
|
||||
|
||||
for (const rows_entry& e : non_dummy_rows()) {
|
||||
@@ -1757,6 +1815,7 @@ row row::difference(const schema& s, column_kind kind, const row& other) const
|
||||
|
||||
mutation_partition mutation_partition::difference(schema_ptr s, const mutation_partition& other) const
|
||||
{
|
||||
check_schema(*s);
|
||||
mutation_partition mp(s);
|
||||
if (_tombstone > other._tombstone) {
|
||||
mp.apply(_tombstone);
|
||||
@@ -1787,6 +1846,7 @@ mutation_partition mutation_partition::difference(schema_ptr s, const mutation_p
|
||||
}
|
||||
|
||||
void mutation_partition::accept(const schema& s, mutation_partition_visitor& v) const {
|
||||
check_schema(s);
|
||||
v.accept_partition_tombstone(_tombstone);
|
||||
_static_row.for_each_cell([&] (column_id id, const atomic_cell_or_collection& cell) {
|
||||
const column_definition& def = s.static_column_at(id);
|
||||
@@ -2200,6 +2260,9 @@ mutation_partition::mutation_partition(mutation_partition::incomplete_tag, const
|
||||
, _static_row_continuous(!s.has_static_columns())
|
||||
, _rows()
|
||||
, _row_tombstones(s)
|
||||
#ifdef SEASTAR_DEBUG
|
||||
, _schema_version(s.version())
|
||||
#endif
|
||||
{
|
||||
_rows.insert_before(_rows.end(),
|
||||
*current_allocator().construct<rows_entry>(s, rows_entry::last_dummy_tag(), is_continuous::no));
|
||||
@@ -2265,6 +2328,7 @@ void mutation_partition::set_continuity(const schema& s, const position_range& p
|
||||
}
|
||||
|
||||
clustering_interval_set mutation_partition::get_continuity(const schema& s, is_continuous cont) const {
|
||||
check_schema(s);
|
||||
clustering_interval_set result;
|
||||
auto i = _rows.begin();
|
||||
auto prev_pos = position_in_partition::before_all_clustered_rows();
|
||||
@@ -2314,6 +2378,7 @@ stop_iteration mutation_partition::clear_gently(cache_tracker* tracker) noexcept
|
||||
|
||||
bool
|
||||
mutation_partition::check_continuity(const schema& s, const position_range& r, is_continuous cont) const {
|
||||
check_schema(s);
|
||||
auto less = rows_entry::compare(s);
|
||||
auto i = _rows.lower_bound(r.start(), less);
|
||||
auto end = _rows.lower_bound(r.end(), less);
|
||||
|
||||
@@ -940,6 +940,9 @@ private:
|
||||
// Contains only strict prefixes so that we don't have to lookup full keys
|
||||
// in both _row_tombstones and _rows.
|
||||
range_tombstone_list _row_tombstones;
|
||||
#ifdef SEASTAR_DEBUG
|
||||
table_schema_version _schema_version;
|
||||
#endif
|
||||
|
||||
friend class mutation_partition_applier;
|
||||
friend class converting_mutation_partition_applier;
|
||||
@@ -954,10 +957,16 @@ public:
|
||||
mutation_partition(schema_ptr s)
|
||||
: _rows()
|
||||
, _row_tombstones(*s)
|
||||
#ifdef SEASTAR_DEBUG
|
||||
, _schema_version(s->version())
|
||||
#endif
|
||||
{ }
|
||||
mutation_partition(mutation_partition& other, copy_comparators_only)
|
||||
: _rows()
|
||||
, _row_tombstones(other._row_tombstones, range_tombstone_list::copy_comparator_only())
|
||||
#ifdef SEASTAR_DEBUG
|
||||
, _schema_version(other._schema_version)
|
||||
#endif
|
||||
{ }
|
||||
mutation_partition(mutation_partition&&) = default;
|
||||
mutation_partition(const schema& s, const mutation_partition&);
|
||||
@@ -1181,6 +1190,12 @@ private:
|
||||
template<typename Func>
|
||||
void for_each_row(const schema& schema, const query::clustering_range& row_range, bool reversed, Func&& func) const;
|
||||
friend class counter_write_query_result_builder;
|
||||
|
||||
void check_schema(const schema& s) const {
|
||||
#ifdef SEASTAR_DEBUG
|
||||
assert(s.version() == _schema_version);
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
inline
|
||||
|
||||
@@ -910,9 +910,10 @@ class shard_reader : public enable_lw_shared_from_this<shard_reader>, public fla
|
||||
bool _reader_created = false;
|
||||
bool _drop_partition_start = false;
|
||||
bool _drop_static_row = false;
|
||||
position_in_partition::tri_compare _tri_cmp;
|
||||
|
||||
std::optional<dht::decorated_key> _last_pkey;
|
||||
std::optional<position_in_partition> _last_position_in_partition;
|
||||
position_in_partition _next_position_in_partition = position_in_partition::for_partition_start();
|
||||
// These are used when the reader has to be recreated (after having been
|
||||
// evicted while paused) and the range and/or slice it is recreated with
|
||||
// differs from the original ones.
|
||||
@@ -920,13 +921,13 @@ class shard_reader : public enable_lw_shared_from_this<shard_reader>, public fla
|
||||
std::optional<query::partition_slice> _slice_override;
|
||||
|
||||
private:
|
||||
void update_last_position(const circular_buffer<mutation_fragment>& buffer);
|
||||
void update_next_position(flat_mutation_reader& reader, circular_buffer<mutation_fragment>& buffer);
|
||||
void adjust_partition_slice();
|
||||
flat_mutation_reader recreate_reader();
|
||||
flat_mutation_reader resume_or_create_reader();
|
||||
bool should_drop_fragment(const mutation_fragment& mf);
|
||||
future<> do_fill_buffer(flat_mutation_reader& reader, db::timeout_clock::time_point timeout);
|
||||
future<> ensure_buffer_contains_all_fragments_for_last_pos(flat_mutation_reader& reader, circular_buffer<mutation_fragment>& buffer,
|
||||
db::timeout_clock::time_point timeout);
|
||||
future<> fill_buffer(flat_mutation_reader& reader, circular_buffer<mutation_fragment>& buffer, db::timeout_clock::time_point timeout);
|
||||
|
||||
public:
|
||||
remote_reader(
|
||||
@@ -1024,7 +1025,7 @@ void shard_reader::stop() noexcept {
|
||||
}).finally([zis = shared_from_this()] {}));
|
||||
}
|
||||
|
||||
void shard_reader::remote_reader::update_last_position(const circular_buffer<mutation_fragment>& buffer) {
|
||||
void shard_reader::remote_reader::update_next_position(flat_mutation_reader& reader, circular_buffer<mutation_fragment>& buffer) {
|
||||
if (buffer.empty()) {
|
||||
return;
|
||||
}
|
||||
@@ -1035,7 +1036,31 @@ void shard_reader::remote_reader::update_last_position(const circular_buffer<mut
|
||||
_last_pkey = pk_it->as_partition_start().key();
|
||||
}
|
||||
|
||||
_last_position_in_partition.emplace(buffer.back().position());
|
||||
const auto last_pos = buffer.back().position();
|
||||
switch (last_pos.region()) {
|
||||
case partition_region::partition_start:
|
||||
_next_position_in_partition = position_in_partition::for_static_row();
|
||||
break;
|
||||
case partition_region::static_row:
|
||||
_next_position_in_partition = position_in_partition::before_all_clustered_rows();
|
||||
break;
|
||||
case partition_region::clustered:
|
||||
if (reader.is_buffer_empty()) {
|
||||
_next_position_in_partition = position_in_partition::after_key(last_pos);
|
||||
} else {
|
||||
const auto& next_frag = reader.peek_buffer();
|
||||
if (next_frag.is_end_of_partition()) {
|
||||
buffer.emplace_back(reader.pop_mutation_fragment());
|
||||
_next_position_in_partition = position_in_partition::for_partition_start();
|
||||
} else {
|
||||
_next_position_in_partition = position_in_partition(next_frag.position());
|
||||
}
|
||||
}
|
||||
break;
|
||||
case partition_region::partition_end:
|
||||
_next_position_in_partition = position_in_partition::for_partition_start();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void shard_reader::remote_reader::adjust_partition_slice() {
|
||||
@@ -1043,9 +1068,8 @@ void shard_reader::remote_reader::adjust_partition_slice() {
|
||||
_slice_override = _ps;
|
||||
}
|
||||
|
||||
auto& last_ckey = _last_position_in_partition->key();
|
||||
auto ranges = _slice_override->default_row_ranges();
|
||||
query::trim_clustering_row_ranges_to(*_schema, ranges, last_ckey);
|
||||
query::trim_clustering_row_ranges_to(*_schema, ranges, _next_position_in_partition);
|
||||
|
||||
_slice_override->clear_ranges();
|
||||
_slice_override->set_range(*_schema, _last_pkey->key(), std::move(ranges));
|
||||
@@ -1058,25 +1082,22 @@ flat_mutation_reader shard_reader::remote_reader::recreate_reader() {
|
||||
if (_last_pkey) {
|
||||
bool partition_range_is_inclusive = true;
|
||||
|
||||
if (_last_position_in_partition) {
|
||||
switch (_last_position_in_partition->region()) {
|
||||
case partition_region::partition_start:
|
||||
_drop_partition_start = true;
|
||||
break;
|
||||
case partition_region::static_row:
|
||||
_drop_partition_start = true;
|
||||
_drop_static_row = true;
|
||||
break;
|
||||
case partition_region::clustered:
|
||||
_drop_partition_start = true;
|
||||
_drop_static_row = true;
|
||||
adjust_partition_slice();
|
||||
slice = &*_slice_override;
|
||||
break;
|
||||
case partition_region::partition_end:
|
||||
partition_range_is_inclusive = false;
|
||||
break;
|
||||
}
|
||||
switch (_next_position_in_partition.region()) {
|
||||
case partition_region::partition_start:
|
||||
partition_range_is_inclusive = false;
|
||||
break;
|
||||
case partition_region::static_row:
|
||||
_drop_partition_start = true;
|
||||
break;
|
||||
case partition_region::clustered:
|
||||
_drop_partition_start = true;
|
||||
_drop_static_row = true;
|
||||
adjust_partition_slice();
|
||||
slice = &*_slice_override;
|
||||
break;
|
||||
case partition_region::partition_end:
|
||||
partition_range_is_inclusive = false;
|
||||
break;
|
||||
}
|
||||
|
||||
// The original range contained a single partition and we've read it
|
||||
@@ -1115,62 +1136,83 @@ flat_mutation_reader shard_reader::remote_reader::resume_or_create_reader() {
|
||||
return recreate_reader();
|
||||
}
|
||||
|
||||
bool shard_reader::remote_reader::should_drop_fragment(const mutation_fragment& mf) {
|
||||
if (_drop_partition_start && mf.is_partition_start()) {
|
||||
_drop_partition_start = false;
|
||||
return true;
|
||||
}
|
||||
if (_drop_static_row && mf.is_static_row()) {
|
||||
_drop_static_row = false;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
future<> shard_reader::remote_reader::do_fill_buffer(flat_mutation_reader& reader, db::timeout_clock::time_point timeout) {
|
||||
if (!_drop_partition_start && !_drop_static_row) {
|
||||
return reader.fill_buffer(timeout);
|
||||
}
|
||||
return repeat([this, &reader, timeout] {
|
||||
return reader.fill_buffer(timeout).then([this, &reader] {
|
||||
const auto eos = reader.is_end_of_stream();
|
||||
|
||||
if (reader.is_buffer_empty()) {
|
||||
return stop_iteration(eos);
|
||||
while (!reader.is_buffer_empty() && should_drop_fragment(reader.peek_buffer())) {
|
||||
reader.pop_mutation_fragment();
|
||||
}
|
||||
if (_drop_partition_start) {
|
||||
_drop_partition_start = false;
|
||||
if (reader.peek_buffer().is_partition_start()) {
|
||||
reader.pop_mutation_fragment();
|
||||
}
|
||||
}
|
||||
|
||||
if (reader.is_buffer_empty()) {
|
||||
return stop_iteration(eos);
|
||||
}
|
||||
if (_drop_static_row) {
|
||||
_drop_static_row = false;
|
||||
if (reader.peek_buffer().is_static_row()) {
|
||||
reader.pop_mutation_fragment();
|
||||
}
|
||||
}
|
||||
|
||||
return stop_iteration(reader.is_buffer_full() || eos);
|
||||
return stop_iteration(reader.is_buffer_full() || reader.is_end_of_stream());
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
future<> shard_reader::remote_reader::ensure_buffer_contains_all_fragments_for_last_pos(flat_mutation_reader& reader,
|
||||
circular_buffer<mutation_fragment>& buffer, db::timeout_clock::time_point timeout) {
|
||||
if (buffer.empty() || !buffer.back().is_range_tombstone()) {
|
||||
return make_ready_future<>();
|
||||
}
|
||||
|
||||
auto stop = [this, &reader, &buffer] {
|
||||
future<> shard_reader::remote_reader::fill_buffer(flat_mutation_reader& reader, circular_buffer<mutation_fragment>& buffer,
|
||||
db::timeout_clock::time_point timeout) {
|
||||
return do_fill_buffer(reader, timeout).then([this, &reader, &buffer, timeout] {
|
||||
if (reader.is_buffer_empty()) {
|
||||
return reader.is_end_of_stream();
|
||||
return make_ready_future<>();
|
||||
}
|
||||
const auto& next_pos = reader.peek_buffer().position();
|
||||
if (next_pos.region() != partition_region::clustered) {
|
||||
return true;
|
||||
}
|
||||
return !next_pos.key().equal(*_schema, buffer.back().position().key());
|
||||
};
|
||||
|
||||
return do_until(stop, [this, &reader, &buffer, timeout] {
|
||||
if (reader.is_buffer_empty()) {
|
||||
return do_fill_buffer(reader, timeout);
|
||||
}
|
||||
buffer.emplace_back(reader.pop_mutation_fragment());
|
||||
return make_ready_future<>();
|
||||
buffer = reader.detach_buffer();
|
||||
auto stop = [this, &reader, &buffer] {
|
||||
// The only problematic fragment kind is the range tombstone.
|
||||
// All other fragment kinds are safe to end the buffer on, and
|
||||
// are guaranteed to represent progress vs. the last buffer fill.
|
||||
if (!buffer.back().is_range_tombstone()) {
|
||||
return true;
|
||||
}
|
||||
if (reader.is_buffer_empty()) {
|
||||
return reader.is_end_of_stream();
|
||||
}
|
||||
const auto& next_pos = reader.peek_buffer().position();
|
||||
// To ensure safe progress we have to ensure the following:
|
||||
//
|
||||
// _next_position_in_partition < buffer.back().position() < next_pos
|
||||
//
|
||||
// * The first condition is to ensure we made progress since the
|
||||
// last buffer fill. Otherwise we might get into an endless loop if
|
||||
// the reader is recreated after each `fill_buffer()` call.
|
||||
// * The second condition is to ensure we have seen all fragments
|
||||
// with the same position. Otherwise we might jump over those
|
||||
// remaining fragments with the same position as the last
|
||||
// fragment's in the buffer when the reader is recreated.
|
||||
return _tri_cmp(_next_position_in_partition, buffer.back().position()) < 0 && _tri_cmp(buffer.back().position(), next_pos) < 0;
|
||||
};
|
||||
// Read additional fragments until it is safe to stop, if needed.
|
||||
// We have to ensure we stop at a fragment such that if the reader is
|
||||
// evicted and recreated later, we won't be skipping any fragments.
|
||||
// Practically, range tombstones are the only ones that are
|
||||
// problematic to end the buffer on. This is due to the fact range
|
||||
// tombstones can have the same position that multiple following range
|
||||
// tombstones, or a single following clustering row in the stream has.
|
||||
// When a range tombstone is the last in the buffer, we have to continue
|
||||
// to read until we are sure we've read all fragments sharing the same
|
||||
// position, so that we can safely continue reading from after said
|
||||
// position.
|
||||
return do_until(stop, [this, &reader, &buffer, timeout] {
|
||||
if (reader.is_buffer_empty()) {
|
||||
return do_fill_buffer(reader, timeout);
|
||||
}
|
||||
buffer.emplace_back(reader.pop_mutation_fragment());
|
||||
return make_ready_future<>();
|
||||
});
|
||||
}).then([this, &reader, &buffer] {
|
||||
update_next_position(reader, buffer);
|
||||
});
|
||||
}
|
||||
|
||||
@@ -1188,7 +1230,8 @@ shard_reader::remote_reader::remote_reader(
|
||||
, _ps(ps)
|
||||
, _pc(pc)
|
||||
, _trace_state(std::move(trace_state))
|
||||
, _fwd_mr(fwd_mr) {
|
||||
, _fwd_mr(fwd_mr)
|
||||
, _tri_cmp(*_schema) {
|
||||
}
|
||||
|
||||
future<shard_reader::fill_buffer_result> shard_reader::remote_reader::fill_buffer(const dht::partition_range& pr, bool pending_next_partition,
|
||||
@@ -1196,7 +1239,7 @@ future<shard_reader::fill_buffer_result> shard_reader::remote_reader::fill_buffe
|
||||
// We could have missed a `fast_forward_to()` if the reader wasn't created yet.
|
||||
_pr = ≺
|
||||
if (pending_next_partition) {
|
||||
_last_position_in_partition = position_in_partition(position_in_partition::end_of_partition_tag_t{});
|
||||
_next_position_in_partition = position_in_partition::for_partition_start();
|
||||
}
|
||||
return do_with(resume_or_create_reader(), circular_buffer<mutation_fragment>{},
|
||||
[this, pending_next_partition, timeout] (flat_mutation_reader& reader, circular_buffer<mutation_fragment>& buffer) mutable {
|
||||
@@ -1204,22 +1247,8 @@ future<shard_reader::fill_buffer_result> shard_reader::remote_reader::fill_buffe
|
||||
reader.next_partition();
|
||||
}
|
||||
|
||||
return do_fill_buffer(reader, timeout).then([this, &reader, &buffer, timeout] {
|
||||
buffer = reader.detach_buffer();
|
||||
// When the reader is recreated (after having been evicted) we
|
||||
// recreate it such that it starts reading from *after* the last
|
||||
// seen fragment's position. If the last seen fragment is a range
|
||||
// tombstone it is *not* guaranteed that the next fragments in the
|
||||
// data stream have positions strictly greater than the range
|
||||
// tombstone's. If the reader is evicted and has to be recreated,
|
||||
// these fragments would be then skipped as the read would continue
|
||||
// after their position.
|
||||
// To avoid this ensure that the buffer contains *all* fragments for
|
||||
// the last seen position.
|
||||
return ensure_buffer_contains_all_fragments_for_last_pos(reader, buffer, timeout);
|
||||
}).then([this, &reader, &buffer] {
|
||||
return fill_buffer(reader, buffer, timeout).then([this, &reader, &buffer] {
|
||||
const auto eos = reader.is_end_of_stream() && reader.is_buffer_empty();
|
||||
update_last_position(buffer);
|
||||
_irh = _lifecycle_policy.pause(std::move(reader));
|
||||
return fill_buffer_result(std::move(buffer), eos);
|
||||
});
|
||||
@@ -1229,7 +1258,7 @@ future<shard_reader::fill_buffer_result> shard_reader::remote_reader::fill_buffe
|
||||
future<> shard_reader::remote_reader::fast_forward_to(const dht::partition_range& pr, db::timeout_clock::time_point timeout) {
|
||||
_pr = ≺
|
||||
_last_pkey.reset();
|
||||
_last_position_in_partition.reset();
|
||||
_next_position_in_partition = position_in_partition::for_partition_start();
|
||||
|
||||
if (!_reader_created || !_irh) {
|
||||
return make_ready_future<>();
|
||||
|
||||
@@ -338,7 +338,7 @@ partition_version& partition_entry::add_version(const schema& s, cache_tracker*
|
||||
|
||||
void partition_entry::apply(const schema& s, const mutation_partition& mp, const schema& mp_schema)
|
||||
{
|
||||
apply(s, mutation_partition(s, mp), mp_schema);
|
||||
apply(s, mutation_partition(mp_schema, mp), mp_schema);
|
||||
}
|
||||
|
||||
void partition_entry::apply(const schema& s, mutation_partition&& mp, const schema& mp_schema)
|
||||
|
||||
@@ -129,6 +129,8 @@ public:
|
||||
: _type(partition_region::clustered), _ck(&ck) { }
|
||||
position_in_partition_view(range_tag_t, bound_view bv)
|
||||
: _type(partition_region::clustered), _bound_weight(position_weight(bv.kind())), _ck(&bv.prefix()) { }
|
||||
position_in_partition_view(const clustering_key_prefix& ck, bound_weight w)
|
||||
: _type(partition_region::clustered), _bound_weight(w), _ck(&ck) { }
|
||||
|
||||
static position_in_partition_view for_range_start(const query::clustering_range& r) {
|
||||
return {position_in_partition_view::range_tag_t(), bound_view::from_range_start(r)};
|
||||
@@ -159,6 +161,7 @@ public:
|
||||
}
|
||||
|
||||
partition_region region() const { return _type; }
|
||||
bound_weight get_bound_weight() const { return _bound_weight; }
|
||||
bool is_partition_start() const { return _type == partition_region::partition_start; }
|
||||
bool is_partition_end() const { return _type == partition_region::partition_end; }
|
||||
bool is_static_row() const { return _type == partition_region::static_row; }
|
||||
@@ -271,6 +274,10 @@ public:
|
||||
return {clustering_row_tag_t(), std::move(ck)};
|
||||
}
|
||||
|
||||
static position_in_partition for_partition_start() {
|
||||
return position_in_partition{partition_start_tag_t()};
|
||||
}
|
||||
|
||||
static position_in_partition for_static_row() {
|
||||
return position_in_partition{static_row_tag_t()};
|
||||
}
|
||||
|
||||
@@ -286,11 +286,11 @@ static void insert_querier(
|
||||
|
||||
auto& e = entries.emplace_back(key, std::move(q), expires);
|
||||
e.set_pos(--entries.end());
|
||||
++stats.population;
|
||||
|
||||
if (auto irh = sem.register_inactive_read(std::make_unique<querier_inactive_read>(entries, e.pos(), stats))) {
|
||||
e.set_inactive_handle(std::move(irh));
|
||||
index.insert(e);
|
||||
++stats.population;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -31,6 +31,8 @@
|
||||
#include "tracing/tracing.hh"
|
||||
#include "utils/small_vector.hh"
|
||||
|
||||
class position_in_partition_view;
|
||||
|
||||
namespace query {
|
||||
|
||||
using column_id_vector = utils::small_vector<column_id, 8>;
|
||||
@@ -58,10 +60,20 @@ typedef std::vector<clustering_range> clustering_row_ranges;
|
||||
|
||||
/// Trim the clustering ranges.
|
||||
///
|
||||
/// Equivalent of intersecting each range with [key, +inf), or (-inf, key] if
|
||||
/// Equivalent of intersecting each clustering range with [pos, +inf) position
|
||||
/// in partition range, or (-inf, pos] position in partition range if
|
||||
/// reversed == true. Ranges that do not intersect are dropped. Ranges that
|
||||
/// partially overlap are trimmed.
|
||||
/// Result: each range will overlap fully with [key, +inf), or (-int, key] if
|
||||
/// Result: each range will overlap fully with [pos, +inf), or (-int, pos] if
|
||||
/// reversed is true.
|
||||
void trim_clustering_row_ranges_to(const schema& s, clustering_row_ranges& ranges, position_in_partition_view pos, bool reversed = false);
|
||||
|
||||
/// Trim the clustering ranges.
|
||||
///
|
||||
/// Equivalent of intersecting each clustering range with (key, +inf) clustering
|
||||
/// range, or (-inf, key) clustering range if reversed == true. Ranges that do
|
||||
/// not intersect are dropped. Ranges that partially overlap are trimmed.
|
||||
/// Result: each range will overlap fully with (key, +inf), or (-int, key) if
|
||||
/// reversed is true.
|
||||
void trim_clustering_row_ranges_to(const schema& s, clustering_row_ranges& ranges, const clustering_key& key, bool reversed = false);
|
||||
|
||||
|
||||
32
query.cc
32
query.cc
@@ -71,34 +71,38 @@ std::ostream& operator<<(std::ostream& out, const specific_ranges& s) {
|
||||
return out << "{" << s._pk << " : " << join(", ", s._ranges) << "}";
|
||||
}
|
||||
|
||||
void trim_clustering_row_ranges_to(const schema& s, clustering_row_ranges& ranges, const clustering_key& key, bool reversed) {
|
||||
auto cmp = [reversed, bv_cmp = bound_view::compare(s)] (const auto& a, const auto& b) {
|
||||
return reversed ? bv_cmp(b, a) : bv_cmp(a, b);
|
||||
void trim_clustering_row_ranges_to(const schema& s, clustering_row_ranges& ranges, position_in_partition_view pos, bool reversed) {
|
||||
auto cmp = [reversed, cmp = position_in_partition::composite_tri_compare(s)] (const auto& a, const auto& b) {
|
||||
return reversed ? cmp(b, a) : cmp(a, b);
|
||||
};
|
||||
auto start_bound = [reversed] (const auto& range) -> const bound_view& {
|
||||
return reversed ? range.second : range.first;
|
||||
auto start_bound = [reversed] (const auto& range) -> position_in_partition_view {
|
||||
return reversed ? position_in_partition_view::for_range_end(range) : position_in_partition_view::for_range_start(range);
|
||||
};
|
||||
auto end_bound = [reversed] (const auto& range) -> const bound_view& {
|
||||
return reversed ? range.first : range.second;
|
||||
auto end_bound = [reversed] (const auto& range) -> position_in_partition_view {
|
||||
return reversed ? position_in_partition_view::for_range_start(range) : position_in_partition_view::for_range_end(range);
|
||||
};
|
||||
clustering_key_prefix::equality eq(s);
|
||||
|
||||
auto it = ranges.begin();
|
||||
while (it != ranges.end()) {
|
||||
auto range = bound_view::from_range(*it);
|
||||
if (cmp(end_bound(range), key) || eq(end_bound(range).prefix(), key)) {
|
||||
if (cmp(end_bound(*it), pos) <= 0) {
|
||||
it = ranges.erase(it);
|
||||
continue;
|
||||
} else if (cmp(start_bound(range), key)) {
|
||||
assert(cmp(key, end_bound(range)));
|
||||
auto r = reversed ? clustering_range(it->start(), clustering_range::bound { key, false })
|
||||
: clustering_range(clustering_range::bound { key, false }, it->end());
|
||||
} else if (cmp(start_bound(*it), pos) <= 0) {
|
||||
assert(cmp(pos, end_bound(*it)) < 0);
|
||||
auto r = reversed ?
|
||||
clustering_range(it->start(), clustering_range::bound(pos.key(), pos.get_bound_weight() != bound_weight::before_all_prefixed)) :
|
||||
clustering_range(clustering_range::bound(pos.key(), pos.get_bound_weight() != bound_weight::after_all_prefixed), it->end());
|
||||
*it = std::move(r);
|
||||
}
|
||||
++it;
|
||||
}
|
||||
}
|
||||
|
||||
void trim_clustering_row_ranges_to(const schema& s, clustering_row_ranges& ranges, const clustering_key& key, bool reversed) {
|
||||
return trim_clustering_row_ranges_to(s, ranges,
|
||||
position_in_partition_view(key, reversed ? bound_weight::before_all_prefixed : bound_weight::after_all_prefixed), reversed);
|
||||
}
|
||||
|
||||
partition_slice::partition_slice(clustering_row_ranges row_ranges,
|
||||
query::column_id_vector static_columns,
|
||||
query::column_id_vector regular_columns,
|
||||
|
||||
37
reloc/python3/build_deb.sh
Executable file
37
reloc/python3/build_deb.sh
Executable file
@@ -0,0 +1,37 @@
|
||||
#!/bin/bash -e
|
||||
|
||||
. /etc/os-release
|
||||
print_usage() {
|
||||
echo "build_deb.sh --reloc-pkg build/release/scylla-python3-package.tar.gz"
|
||||
echo " --reloc-pkg specify relocatable package path"
|
||||
exit 1
|
||||
}
|
||||
|
||||
RELOC_PKG=build/release/scylla-python3-package.tar.gz
|
||||
OPTS=""
|
||||
while [ $# -gt 0 ]; do
|
||||
case "$1" in
|
||||
"--reloc-pkg")
|
||||
OPTS="$OPTS $1 $(readlink -f $2)"
|
||||
RELOC_PKG=$2
|
||||
shift 2
|
||||
;;
|
||||
*)
|
||||
print_usage
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [ ! -e $RELOC_PKG ]; then
|
||||
echo "$RELOC_PKG does not exist."
|
||||
echo "Run ./reloc/python3/build_reloc.sh first."
|
||||
exit 1
|
||||
fi
|
||||
RELOC_PKG=$(readlink -f $RELOC_PKG)
|
||||
if [[ ! $OPTS =~ --reloc-pkg ]]; then
|
||||
OPTS="$OPTS --reloc-pkg $RELOC_PKG"
|
||||
fi
|
||||
mkdir -p build/debian/scylla-python3-package
|
||||
tar -C build/debian/scylla-python3-package -xpf $RELOC_PKG
|
||||
cd build/debian/scylla-python3-package
|
||||
exec ./dist/debian/python3/build_deb.sh $OPTS
|
||||
@@ -940,8 +940,20 @@ static future<> repair_cf_range(repair_info& ri,
|
||||
// Comparable to RepairSession in Origin
|
||||
static future<> repair_range(repair_info& ri, const dht::token_range& range) {
|
||||
auto id = utils::UUID_gen::get_time_UUID();
|
||||
return do_with(get_neighbors(ri.db.local(), ri.keyspace, range, ri.data_centers, ri.hosts), [&ri, range, id] (const auto& neighbors) {
|
||||
rlogger.debug("[repair #{}] new session: will sync {} on range {} for {}.{}", id, neighbors, range, ri.keyspace, ri.cfs);
|
||||
return do_with(get_neighbors(ri.db.local(), ri.keyspace, range, ri.data_centers, ri.hosts), [&ri, range, id] (std::vector<gms::inet_address>& neighbors) {
|
||||
auto live_neighbors = boost::copy_range<std::vector<gms::inet_address>>(neighbors |
|
||||
boost::adaptors::filtered([] (const gms::inet_address& node) { return gms::get_local_gossiper().is_alive(node); }));
|
||||
if (live_neighbors.size() != neighbors.size()) {
|
||||
ri.nr_failed_ranges++;
|
||||
auto status = live_neighbors.empty() ? "skipped" : "partial";
|
||||
rlogger.warn("Repair {} out of {} ranges, id={}, shard={}, keyspace={}, table={}, range={}, peers={}, live_peers={}, status={}",
|
||||
ri.ranges_index, ri.ranges.size(), ri.id, ri.shard, ri.keyspace, ri.cfs, range, neighbors, live_neighbors, status);
|
||||
if (live_neighbors.empty()) {
|
||||
return make_ready_future<>();
|
||||
}
|
||||
neighbors.swap(live_neighbors);
|
||||
}
|
||||
return ::service::get_local_migration_manager().sync_schema(ri.db.local(), neighbors).then([&neighbors, &ri, range, id] {
|
||||
return do_for_each(ri.cfs.begin(), ri.cfs.end(), [&ri, &neighbors, range] (auto&& cf) {
|
||||
ri._sub_ranges_nr++;
|
||||
if (ri.row_level_repair()) {
|
||||
@@ -950,6 +962,7 @@ static future<> repair_range(repair_info& ri, const dht::token_range& range) {
|
||||
return repair_cf_range(ri, cf, range, neighbors);
|
||||
}
|
||||
});
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@@ -295,6 +295,7 @@ public:
|
||||
void push_mutation_fragment(frozen_mutation_fragment mf) { _mfs.push_back(std::move(mf)); }
|
||||
};
|
||||
|
||||
using repair_row_on_wire = partition_key_and_mutation_fragments;
|
||||
using repair_rows_on_wire = std::list<partition_key_and_mutation_fragments>;
|
||||
|
||||
enum class row_level_diff_detect_algorithm : uint8_t {
|
||||
|
||||
@@ -152,8 +152,8 @@ class fragment_hasher {
|
||||
xx_hasher& _hasher;
|
||||
private:
|
||||
void consume_cell(const column_definition& col, const atomic_cell_or_collection& cell) {
|
||||
feed_hash(_hasher, col.name());
|
||||
feed_hash(_hasher, col.type->name());
|
||||
feed_hash(_hasher, col.kind);
|
||||
feed_hash(_hasher, col.id);
|
||||
feed_hash(_hasher, cell, col);
|
||||
}
|
||||
public:
|
||||
@@ -220,43 +220,62 @@ private:
|
||||
};
|
||||
|
||||
class repair_row {
|
||||
frozen_mutation_fragment _fm;
|
||||
std::optional<frozen_mutation_fragment> _fm;
|
||||
lw_shared_ptr<const decorated_key_with_hash> _dk_with_hash;
|
||||
repair_sync_boundary _boundary;
|
||||
repair_hash _hash;
|
||||
std::optional<repair_sync_boundary> _boundary;
|
||||
std::optional<repair_hash> _hash;
|
||||
lw_shared_ptr<mutation_fragment> _mf;
|
||||
public:
|
||||
repair_row() = delete;
|
||||
repair_row(frozen_mutation_fragment fm,
|
||||
position_in_partition pos,
|
||||
repair_row(std::optional<frozen_mutation_fragment> fm,
|
||||
std::optional<position_in_partition> pos,
|
||||
lw_shared_ptr<const decorated_key_with_hash> dk_with_hash,
|
||||
repair_hash hash,
|
||||
std::optional<repair_hash> hash,
|
||||
lw_shared_ptr<mutation_fragment> mf = {})
|
||||
: _fm(std::move(fm))
|
||||
, _dk_with_hash(std::move(dk_with_hash))
|
||||
, _boundary({_dk_with_hash->dk, std::move(pos)})
|
||||
, _boundary(pos ? std::optional<repair_sync_boundary>(repair_sync_boundary{_dk_with_hash->dk, std::move(*pos)}) : std::nullopt)
|
||||
, _hash(std::move(hash))
|
||||
, _mf(std::move(mf)) {
|
||||
}
|
||||
mutation_fragment& get_mutation_fragment() {
|
||||
if (!_mf) {
|
||||
throw std::runtime_error("get empty mutation_fragment");
|
||||
throw std::runtime_error("empty mutation_fragment");
|
||||
}
|
||||
return *_mf;
|
||||
}
|
||||
frozen_mutation_fragment& get_frozen_mutation() { return _fm; }
|
||||
const frozen_mutation_fragment& get_frozen_mutation() const { return _fm; }
|
||||
frozen_mutation_fragment& get_frozen_mutation() {
|
||||
if (!_fm) {
|
||||
throw std::runtime_error("empty frozen_mutation_fragment");
|
||||
}
|
||||
return *_fm;
|
||||
}
|
||||
const frozen_mutation_fragment& get_frozen_mutation() const {
|
||||
if (!_fm) {
|
||||
throw std::runtime_error("empty frozen_mutation_fragment");
|
||||
}
|
||||
return *_fm;
|
||||
}
|
||||
const lw_shared_ptr<const decorated_key_with_hash>& get_dk_with_hash() const {
|
||||
return _dk_with_hash;
|
||||
}
|
||||
size_t size() const {
|
||||
return _fm.representation().size();
|
||||
if (!_fm) {
|
||||
throw std::runtime_error("empty size due to empty frozen_mutation_fragment");
|
||||
}
|
||||
return _fm->representation().size();
|
||||
}
|
||||
const repair_sync_boundary& boundary() const {
|
||||
return _boundary;
|
||||
if (!_boundary) {
|
||||
throw std::runtime_error("empty repair_sync_boundary");
|
||||
}
|
||||
return *_boundary;
|
||||
}
|
||||
const repair_hash& hash() const {
|
||||
return _hash;
|
||||
if (!_hash) {
|
||||
throw std::runtime_error("empty hash");
|
||||
}
|
||||
return *_hash;
|
||||
}
|
||||
};
|
||||
|
||||
@@ -284,13 +303,14 @@ public:
|
||||
repair_reader(
|
||||
seastar::sharded<database>& db,
|
||||
column_family& cf,
|
||||
schema_ptr s,
|
||||
dht::token_range range,
|
||||
dht::i_partitioner& local_partitioner,
|
||||
dht::i_partitioner& remote_partitioner,
|
||||
unsigned remote_shard,
|
||||
uint64_t seed,
|
||||
is_local_reader local_reader)
|
||||
: _schema(cf.schema())
|
||||
: _schema(s)
|
||||
, _range(dht::to_partition_range(range))
|
||||
, _sharder(remote_partitioner, range, remote_shard)
|
||||
, _seed(seed)
|
||||
@@ -458,8 +478,8 @@ public:
|
||||
private:
|
||||
seastar::sharded<database>& _db;
|
||||
column_family& _cf;
|
||||
dht::token_range _range;
|
||||
schema_ptr _schema;
|
||||
dht::token_range _range;
|
||||
repair_sync_boundary::tri_compare _cmp;
|
||||
// The algorithm used to find the row difference
|
||||
row_level_diff_detect_algorithm _algo;
|
||||
@@ -519,6 +539,7 @@ public:
|
||||
repair_meta(
|
||||
seastar::sharded<database>& db,
|
||||
column_family& cf,
|
||||
schema_ptr s,
|
||||
dht::token_range range,
|
||||
row_level_diff_detect_algorithm algo,
|
||||
size_t max_row_buf_size,
|
||||
@@ -529,8 +550,8 @@ public:
|
||||
size_t nr_peer_nodes = 1)
|
||||
: _db(db)
|
||||
, _cf(cf)
|
||||
, _schema(s)
|
||||
, _range(range)
|
||||
, _schema(cf.schema())
|
||||
, _cmp(repair_sync_boundary::tri_compare(*_schema))
|
||||
, _algo(algo)
|
||||
, _max_row_buf_size(max_row_buf_size)
|
||||
@@ -545,6 +566,7 @@ public:
|
||||
, _repair_reader(
|
||||
_db,
|
||||
_cf,
|
||||
_schema,
|
||||
_range,
|
||||
dht::global_partitioner(),
|
||||
*_remote_partitioner,
|
||||
@@ -577,35 +599,45 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
static future<>
|
||||
insert_repair_meta(const gms::inet_address& from,
|
||||
uint32_t src_cpu_id,
|
||||
uint32_t repair_meta_id,
|
||||
sstring ks_name,
|
||||
sstring cf_name,
|
||||
dht::token_range range,
|
||||
row_level_diff_detect_algorithm algo,
|
||||
uint64_t max_row_buf_size,
|
||||
uint64_t seed,
|
||||
shard_config master_node_shard_config) {
|
||||
node_repair_meta_id id{from, repair_meta_id};
|
||||
auto& db = service::get_local_storage_proxy().get_db();
|
||||
auto& cf = db.local().find_column_family(ks_name, cf_name);
|
||||
auto rm = make_lw_shared<repair_meta>(db,
|
||||
cf,
|
||||
shard_config master_node_shard_config,
|
||||
table_schema_version schema_version) {
|
||||
return service::get_schema_for_write(schema_version, {from, src_cpu_id}).then([from,
|
||||
repair_meta_id,
|
||||
range,
|
||||
algo,
|
||||
max_row_buf_size,
|
||||
seed,
|
||||
repair_meta::repair_master::no,
|
||||
repair_meta_id,
|
||||
std::move(master_node_shard_config));
|
||||
bool insertion = repair_meta_map().emplace(id, rm).second;
|
||||
if (!insertion) {
|
||||
rlogger.warn("insert_repair_meta: repair_meta_id {} for node {} already exists, replace existing one", id.repair_meta_id, id.ip);
|
||||
repair_meta_map()[id] = rm;
|
||||
} else {
|
||||
rlogger.debug("insert_repair_meta: Inserted repair_meta_id {} for node {}", id.repair_meta_id, id.ip);
|
||||
}
|
||||
master_node_shard_config,
|
||||
schema_version] (schema_ptr s) {
|
||||
auto& db = service::get_local_storage_proxy().get_db();
|
||||
auto& cf = db.local().find_column_family(s->id());
|
||||
node_repair_meta_id id{from, repair_meta_id};
|
||||
auto rm = make_lw_shared<repair_meta>(db,
|
||||
cf,
|
||||
s,
|
||||
range,
|
||||
algo,
|
||||
max_row_buf_size,
|
||||
seed,
|
||||
repair_meta::repair_master::no,
|
||||
repair_meta_id,
|
||||
std::move(master_node_shard_config));
|
||||
bool insertion = repair_meta_map().emplace(id, rm).second;
|
||||
if (!insertion) {
|
||||
rlogger.warn("insert_repair_meta: repair_meta_id {} for node {} already exists, replace existing one", id.repair_meta_id, id.ip);
|
||||
repair_meta_map()[id] = rm;
|
||||
} else {
|
||||
rlogger.debug("insert_repair_meta: Inserted repair_meta_id {} for node {}", id.repair_meta_id, id.ip);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
static future<>
|
||||
@@ -642,7 +674,11 @@ public:
|
||||
}
|
||||
}
|
||||
return parallel_for_each(*repair_metas, [repair_metas] (auto& rm) {
|
||||
return rm->stop();
|
||||
return rm->stop().then([&rm] {
|
||||
rm = {};
|
||||
});
|
||||
}).then([repair_metas, from] {
|
||||
rlogger.debug("Removed all repair_meta for single node {}", from);
|
||||
});
|
||||
}
|
||||
|
||||
@@ -654,7 +690,11 @@ public:
|
||||
| boost::adaptors::map_values));
|
||||
repair_meta_map().clear();
|
||||
return parallel_for_each(*repair_metas, [repair_metas] (auto& rm) {
|
||||
return rm->stop();
|
||||
return rm->stop().then([&rm] {
|
||||
rm = {};
|
||||
});
|
||||
}).then([repair_metas] {
|
||||
rlogger.debug("Removed all repair_meta for all nodes");
|
||||
});
|
||||
}
|
||||
|
||||
@@ -952,12 +992,12 @@ private:
|
||||
}
|
||||
return to_repair_rows_list(rows).then([this, from, node_idx, update_buf, update_hash_set] (std::list<repair_row> row_diff) {
|
||||
return do_with(std::move(row_diff), [this, from, node_idx, update_buf, update_hash_set] (std::list<repair_row>& row_diff) {
|
||||
auto sz = get_repair_rows_size(row_diff);
|
||||
stats().rx_row_bytes += sz;
|
||||
stats().rx_row_nr += row_diff.size();
|
||||
stats().rx_row_nr_peer[from] += row_diff.size();
|
||||
_metrics.rx_row_nr += row_diff.size();
|
||||
_metrics.rx_row_bytes += sz;
|
||||
if (_repair_master) {
|
||||
auto sz = get_repair_rows_size(row_diff);
|
||||
stats().rx_row_bytes += sz;
|
||||
stats().rx_row_nr += row_diff.size();
|
||||
stats().rx_row_nr_peer[from] += row_diff.size();
|
||||
}
|
||||
if (update_buf) {
|
||||
std::list<repair_row> tmp;
|
||||
tmp.swap(_working_row_buf);
|
||||
@@ -993,11 +1033,16 @@ private:
|
||||
return do_with(repair_rows_on_wire(), std::move(row_list), [this] (repair_rows_on_wire& rows, std::list<repair_row>& row_list) {
|
||||
return do_for_each(row_list, [this, &rows] (repair_row& r) {
|
||||
auto pk = r.get_dk_with_hash()->dk.key();
|
||||
auto it = std::find_if(rows.begin(), rows.end(), [&pk, s=_schema] (partition_key_and_mutation_fragments& row) { return pk.legacy_equal(*s, row.get_key()); });
|
||||
if (it == rows.end()) {
|
||||
rows.push_back(partition_key_and_mutation_fragments(std::move(pk), {std::move(r.get_frozen_mutation())}));
|
||||
// No need to search from the beginning of the rows. Look at the end of repair_rows_on_wire is enough.
|
||||
if (rows.empty()) {
|
||||
rows.push_back(repair_row_on_wire(std::move(pk), {std::move(r.get_frozen_mutation())}));
|
||||
} else {
|
||||
it->push_mutation_fragment(std::move(r.get_frozen_mutation()));
|
||||
auto& row = rows.back();
|
||||
if (pk.legacy_equal(*_schema, row.get_key())) {
|
||||
row.push_mutation_fragment(std::move(r.get_frozen_mutation()));
|
||||
} else {
|
||||
rows.push_back(repair_row_on_wire(std::move(pk), {std::move(r.get_frozen_mutation())}));
|
||||
}
|
||||
}
|
||||
}).then([&rows] {
|
||||
return std::move(rows);
|
||||
@@ -1006,23 +1051,47 @@ private:
|
||||
};
|
||||
|
||||
future<std::list<repair_row>> to_repair_rows_list(repair_rows_on_wire rows) {
|
||||
return do_with(std::move(rows), std::list<repair_row>(), lw_shared_ptr<const decorated_key_with_hash>(),
|
||||
[this] (repair_rows_on_wire& rows, std::list<repair_row>& row_list, lw_shared_ptr<const decorated_key_with_hash>& dk_ptr) mutable {
|
||||
return do_for_each(rows, [this, &dk_ptr, &row_list] (partition_key_and_mutation_fragments& x) mutable {
|
||||
return do_with(std::move(rows), std::list<repair_row>(), lw_shared_ptr<const decorated_key_with_hash>(), lw_shared_ptr<mutation_fragment>(), position_in_partition::tri_compare(*_schema),
|
||||
[this] (repair_rows_on_wire& rows, std::list<repair_row>& row_list, lw_shared_ptr<const decorated_key_with_hash>& dk_ptr, lw_shared_ptr<mutation_fragment>& last_mf, position_in_partition::tri_compare& cmp) mutable {
|
||||
return do_for_each(rows, [this, &dk_ptr, &row_list, &last_mf, &cmp] (partition_key_and_mutation_fragments& x) mutable {
|
||||
dht::decorated_key dk = dht::global_partitioner().decorate_key(*_schema, x.get_key());
|
||||
if (!(dk_ptr && dk_ptr->dk.equal(*_schema, dk))) {
|
||||
dk_ptr = make_lw_shared<const decorated_key_with_hash>(*_schema, dk, _seed);
|
||||
}
|
||||
return do_for_each(x.get_mutation_fragments(), [this, &dk_ptr, &row_list] (frozen_mutation_fragment& fmf) mutable {
|
||||
// Keep the mutation_fragment in repair_row as an
|
||||
// optimization to avoid unfreeze again when
|
||||
// mutation_fragment is needed by _repair_writer.do_write()
|
||||
// to apply the repair_row to disk
|
||||
auto mf = make_lw_shared<mutation_fragment>(fmf.unfreeze(*_schema));
|
||||
auto hash = do_hash_for_mf(*dk_ptr, *mf);
|
||||
position_in_partition pos(mf->position());
|
||||
row_list.push_back(repair_row(std::move(fmf), std::move(pos), dk_ptr, std::move(hash), std::move(mf)));
|
||||
});
|
||||
if (_repair_master) {
|
||||
return do_for_each(x.get_mutation_fragments(), [this, &dk_ptr, &row_list] (frozen_mutation_fragment& fmf) mutable {
|
||||
_metrics.rx_row_nr += 1;
|
||||
_metrics.rx_row_bytes += fmf.representation().size();
|
||||
// Keep the mutation_fragment in repair_row as an
|
||||
// optimization to avoid unfreeze again when
|
||||
// mutation_fragment is needed by _repair_writer.do_write()
|
||||
// to apply the repair_row to disk
|
||||
auto mf = make_lw_shared<mutation_fragment>(fmf.unfreeze(*_schema));
|
||||
auto hash = do_hash_for_mf(*dk_ptr, *mf);
|
||||
position_in_partition pos(mf->position());
|
||||
row_list.push_back(repair_row(std::move(fmf), std::move(pos), dk_ptr, std::move(hash), std::move(mf)));
|
||||
});
|
||||
} else {
|
||||
last_mf = {};
|
||||
return do_for_each(x.get_mutation_fragments(), [this, &dk_ptr, &row_list, &last_mf, &cmp] (frozen_mutation_fragment& fmf) mutable {
|
||||
_metrics.rx_row_nr += 1;
|
||||
_metrics.rx_row_bytes += fmf.representation().size();
|
||||
auto mf = make_lw_shared<mutation_fragment>(fmf.unfreeze(*_schema));
|
||||
position_in_partition pos(mf->position());
|
||||
// If the mutation_fragment has the same position as
|
||||
// the last mutation_fragment, it means they are the
|
||||
// same row with different contents. We can not feed
|
||||
// such rows into the sstable writer. Instead we apply
|
||||
// the mutation_fragment into the previous one.
|
||||
if (last_mf && cmp(last_mf->position(), pos) == 0 && last_mf->mergeable_with(*mf)) {
|
||||
last_mf->apply(*_schema, std::move(*mf));
|
||||
} else {
|
||||
last_mf = mf;
|
||||
// On repair follower node, only decorated_key_with_hash and the mutation_fragment inside repair_row are used.
|
||||
row_list.push_back(repair_row({}, {}, dk_ptr, {}, std::move(mf)));
|
||||
}
|
||||
});
|
||||
}
|
||||
}).then([&row_list] {
|
||||
return std::move(row_list);
|
||||
});
|
||||
@@ -1084,29 +1153,28 @@ public:
|
||||
|
||||
// RPC API
|
||||
future<>
|
||||
repair_row_level_start(gms::inet_address remote_node, sstring ks_name, sstring cf_name, dht::token_range range) {
|
||||
repair_row_level_start(gms::inet_address remote_node, sstring ks_name, sstring cf_name, dht::token_range range, table_schema_version schema_version) {
|
||||
if (remote_node == _myip) {
|
||||
return make_ready_future<>();
|
||||
}
|
||||
stats().rpc_call_nr++;
|
||||
return netw::get_local_messaging_service().send_repair_row_level_start(msg_addr(remote_node),
|
||||
_repair_meta_id, std::move(ks_name), std::move(cf_name), std::move(range), _algo, _max_row_buf_size, _seed,
|
||||
_master_node_shard_config.shard, _master_node_shard_config.shard_count, _master_node_shard_config.ignore_msb, _master_node_shard_config.partitioner_name);
|
||||
_master_node_shard_config.shard, _master_node_shard_config.shard_count, _master_node_shard_config.ignore_msb, _master_node_shard_config.partitioner_name, std::move(schema_version));
|
||||
}
|
||||
|
||||
// RPC handler
|
||||
static future<>
|
||||
repair_row_level_start_handler(gms::inet_address from, uint32_t repair_meta_id, sstring ks_name, sstring cf_name,
|
||||
repair_row_level_start_handler(gms::inet_address from, uint32_t src_cpu_id, uint32_t repair_meta_id, sstring ks_name, sstring cf_name,
|
||||
dht::token_range range, row_level_diff_detect_algorithm algo, uint64_t max_row_buf_size,
|
||||
uint64_t seed, shard_config master_node_shard_config) {
|
||||
uint64_t seed, shard_config master_node_shard_config, table_schema_version schema_version) {
|
||||
if (!_sys_dist_ks->local_is_initialized() || !_view_update_generator->local_is_initialized()) {
|
||||
return make_exception_future<>(std::runtime_error(format("Node {} is not fully initialized for repair, try again later",
|
||||
utils::fb_utilities::get_broadcast_address())));
|
||||
}
|
||||
rlogger.debug(">>> Started Row Level Repair (Follower): local={}, peers={}, repair_meta_id={}, keyspace={}, cf={}, range={}",
|
||||
utils::fb_utilities::get_broadcast_address(), from, repair_meta_id, ks_name, cf_name, range);
|
||||
insert_repair_meta(from, repair_meta_id, std::move(ks_name), std::move(cf_name), std::move(range), algo, max_row_buf_size, seed, std::move(master_node_shard_config));
|
||||
return make_ready_future<>();
|
||||
rlogger.debug(">>> Started Row Level Repair (Follower): local={}, peers={}, repair_meta_id={}, keyspace={}, cf={}, schema_version={}, range={}",
|
||||
utils::fb_utilities::get_broadcast_address(), from, repair_meta_id, ks_name, cf_name, schema_version, range);
|
||||
return insert_repair_meta(from, src_cpu_id, repair_meta_id, std::move(range), algo, max_row_buf_size, seed, std::move(master_node_shard_config), std::move(schema_version));
|
||||
}
|
||||
|
||||
// RPC API
|
||||
@@ -1313,14 +1381,15 @@ future<> repair_init_messaging_service_handler(repair_service& rs, distributed<d
|
||||
});
|
||||
ms.register_repair_row_level_start([] (const rpc::client_info& cinfo, uint32_t repair_meta_id, sstring ks_name,
|
||||
sstring cf_name, dht::token_range range, row_level_diff_detect_algorithm algo, uint64_t max_row_buf_size, uint64_t seed,
|
||||
unsigned remote_shard, unsigned remote_shard_count, unsigned remote_ignore_msb, sstring remote_partitioner_name) {
|
||||
unsigned remote_shard, unsigned remote_shard_count, unsigned remote_ignore_msb, sstring remote_partitioner_name, table_schema_version schema_version) {
|
||||
auto src_cpu_id = cinfo.retrieve_auxiliary<uint32_t>("src_cpu_id");
|
||||
auto from = cinfo.retrieve_auxiliary<gms::inet_address>("baddr");
|
||||
return smp::submit_to(src_cpu_id % smp::count, [from, repair_meta_id, ks_name, cf_name,
|
||||
range, algo, max_row_buf_size, seed, remote_shard, remote_shard_count, remote_ignore_msb, remote_partitioner_name] () mutable {
|
||||
return repair_meta::repair_row_level_start_handler(from, repair_meta_id, std::move(ks_name),
|
||||
return smp::submit_to(src_cpu_id % smp::count, [from, src_cpu_id, repair_meta_id, ks_name, cf_name,
|
||||
range, algo, max_row_buf_size, seed, remote_shard, remote_shard_count, remote_ignore_msb, remote_partitioner_name, schema_version] () mutable {
|
||||
return repair_meta::repair_row_level_start_handler(from, src_cpu_id, repair_meta_id, std::move(ks_name),
|
||||
std::move(cf_name), std::move(range), algo, max_row_buf_size, seed,
|
||||
shard_config{remote_shard, remote_shard_count, remote_ignore_msb, std::move(remote_partitioner_name)});
|
||||
shard_config{remote_shard, remote_shard_count, remote_ignore_msb, std::move(remote_partitioner_name)},
|
||||
schema_version);
|
||||
});
|
||||
});
|
||||
ms.register_repair_row_level_stop([] (const rpc::client_info& cinfo, uint32_t repair_meta_id,
|
||||
@@ -1608,8 +1677,12 @@ public:
|
||||
dht::global_partitioner().sharding_ignore_msb(),
|
||||
dht::global_partitioner().name()
|
||||
};
|
||||
auto s = _cf.schema();
|
||||
auto schema_version = s->version();
|
||||
|
||||
repair_meta master(_ri.db,
|
||||
_cf,
|
||||
s,
|
||||
_range,
|
||||
algorithm,
|
||||
_max_row_buf_size,
|
||||
@@ -1622,12 +1695,13 @@ public:
|
||||
// All nodes including the node itself.
|
||||
_all_nodes.insert(_all_nodes.begin(), master.myip());
|
||||
|
||||
rlogger.debug(">>> Started Row Level Repair (Master): local={}, peers={}, repair_meta_id={}, keyspace={}, cf={}, range={}, seed={}",
|
||||
master.myip(), _all_live_peer_nodes, master.repair_meta_id(), _ri.keyspace, _cf_name, _range, _seed);
|
||||
rlogger.debug(">>> Started Row Level Repair (Master): local={}, peers={}, repair_meta_id={}, keyspace={}, cf={}, schema_version={}, range={}, seed={}",
|
||||
master.myip(), _all_live_peer_nodes, master.repair_meta_id(), _ri.keyspace, _cf_name, schema_version, _range, _seed);
|
||||
|
||||
|
||||
try {
|
||||
parallel_for_each(_all_nodes, [&, this] (const gms::inet_address& node) {
|
||||
return master.repair_row_level_start(node, _ri.keyspace, _cf_name, _range).then([&] () {
|
||||
return master.repair_row_level_start(node, _ri.keyspace, _cf_name, _range, schema_version).then([&] () {
|
||||
return master.repair_get_estimated_partitions(node).then([this, node] (uint64_t partitions) {
|
||||
rlogger.trace("Get repair_get_estimated_partitions for node={}, estimated_partitions={}", node, partitions);
|
||||
_estimated_partitions += partitions;
|
||||
@@ -1677,19 +1751,7 @@ public:
|
||||
future<> repair_cf_range_row_level(repair_info& ri,
|
||||
sstring cf_name, dht::token_range range,
|
||||
const std::vector<gms::inet_address>& all_peer_nodes) {
|
||||
auto all_live_peer_nodes = boost::copy_range<std::vector<gms::inet_address>>(all_peer_nodes |
|
||||
boost::adaptors::filtered([] (const gms::inet_address& node) { return gms::get_local_gossiper().is_alive(node); }));
|
||||
if (all_live_peer_nodes.size() != all_peer_nodes.size()) {
|
||||
rlogger.warn("Repair for range={} is partial, peer nodes={}, live peer nodes={}",
|
||||
range, all_peer_nodes, all_live_peer_nodes);
|
||||
ri.nr_failed_ranges++;
|
||||
}
|
||||
if (all_live_peer_nodes.empty()) {
|
||||
rlogger.info(">>> Skipped Row Level Repair (Master): local={}, peers={}, keyspace={}, cf={}, range={}",
|
||||
utils::fb_utilities::get_broadcast_address(), all_peer_nodes, ri.keyspace, cf_name, range);
|
||||
return make_ready_future<>();
|
||||
}
|
||||
return do_with(row_level_repair(ri, std::move(cf_name), std::move(range), std::move(all_live_peer_nodes)), [] (row_level_repair& repair) {
|
||||
return do_with(row_level_repair(ri, std::move(cf_name), std::move(range), all_peer_nodes), [] (row_level_repair& repair) {
|
||||
return repair.run();
|
||||
});
|
||||
}
|
||||
|
||||
@@ -69,19 +69,30 @@ table_schema_version schema_mutations::digest() const {
|
||||
}
|
||||
|
||||
md5_hasher h;
|
||||
db::schema_tables::feed_hash_for_schema_digest(h, _columnfamilies);
|
||||
db::schema_tables::feed_hash_for_schema_digest(h, _columns);
|
||||
db::schema_features sf = db::schema_features::full();
|
||||
|
||||
// Disable this feature so that the digest remains compactible with Scylla
|
||||
// versions prior to this feature.
|
||||
// This digest affects the table schema version calculation and it's important
|
||||
// that all nodes arrive at the same table schema version to avoid needless schema version
|
||||
// pulls. Table schema versions are calculated on boot when we don't yet
|
||||
// know all the cluster features, so we could get different table versions after reboot
|
||||
// in an already upgraded cluster.
|
||||
sf.remove<db::schema_feature::DIGEST_INSENSITIVE_TO_EXPIRY>();
|
||||
|
||||
db::schema_tables::feed_hash_for_schema_digest(h, _columnfamilies, sf);
|
||||
db::schema_tables::feed_hash_for_schema_digest(h, _columns, sf);
|
||||
if (_view_virtual_columns && !_view_virtual_columns->partition().empty()) {
|
||||
db::schema_tables::feed_hash_for_schema_digest(h, *_view_virtual_columns);
|
||||
db::schema_tables::feed_hash_for_schema_digest(h, *_view_virtual_columns, sf);
|
||||
}
|
||||
if (_indices && !_indices->partition().empty()) {
|
||||
db::schema_tables::feed_hash_for_schema_digest(h, *_indices);
|
||||
db::schema_tables::feed_hash_for_schema_digest(h, *_indices, sf);
|
||||
}
|
||||
if (_dropped_columns && !_dropped_columns->partition().empty()) {
|
||||
db::schema_tables::feed_hash_for_schema_digest(h, *_dropped_columns);
|
||||
db::schema_tables::feed_hash_for_schema_digest(h, *_dropped_columns, sf);
|
||||
}
|
||||
if (_scylla_tables) {
|
||||
db::schema_tables::feed_hash_for_schema_digest(h, *_scylla_tables);
|
||||
db::schema_tables::feed_hash_for_schema_digest(h, *_scylla_tables, sf);
|
||||
}
|
||||
return utils::UUID_gen::get_name_UUID(h.finalize());
|
||||
}
|
||||
|
||||
@@ -263,11 +263,9 @@ global_schema_ptr::global_schema_ptr(const global_schema_ptr& o)
|
||||
: global_schema_ptr(o.get())
|
||||
{ }
|
||||
|
||||
global_schema_ptr::global_schema_ptr(global_schema_ptr&& o) {
|
||||
global_schema_ptr::global_schema_ptr(global_schema_ptr&& o) noexcept {
|
||||
auto current = engine().cpu_id();
|
||||
if (o._cpu_of_origin != current) {
|
||||
throw std::runtime_error("Attempted to move global_schema_ptr across shards");
|
||||
}
|
||||
assert(o._cpu_of_origin == current);
|
||||
_ptr = std::move(o._ptr);
|
||||
_cpu_of_origin = current;
|
||||
}
|
||||
|
||||
@@ -173,7 +173,7 @@ public:
|
||||
// The other may come from a different shard.
|
||||
global_schema_ptr(const global_schema_ptr& other);
|
||||
// The other must come from current shard.
|
||||
global_schema_ptr(global_schema_ptr&& other);
|
||||
global_schema_ptr(global_schema_ptr&& other) noexcept;
|
||||
// May be invoked across shards. Always returns an engaged pointer.
|
||||
schema_ptr get() const;
|
||||
operator schema_ptr() const { return get(); }
|
||||
|
||||
@@ -231,9 +231,15 @@ ar = tarfile.open(args.output, mode='w|gz')
|
||||
pathlib.Path('build/SCYLLA-RELOCATABLE-FILE').touch()
|
||||
ar.add('build/SCYLLA-RELOCATABLE-FILE', arcname='SCYLLA-RELOCATABLE-FILE')
|
||||
ar.add('dist/redhat/python3')
|
||||
ar.add('dist/debian/python3')
|
||||
ar.add('build/python3/SCYLLA-RELEASE-FILE', arcname='SCYLLA-RELEASE-FILE')
|
||||
ar.add('build/python3/SCYLLA-VERSION-FILE', arcname='SCYLLA-VERSION-FILE')
|
||||
ar.add('build/SCYLLA-PRODUCT-FILE', arcname='SCYLLA-PRODUCT-FILE')
|
||||
for p in ['pyhton3-libs'] + packages:
|
||||
pdir = pathlib.Path('/usr/share/licenses/{}/'.format(p))
|
||||
if pdir.exists():
|
||||
for f in pdir.glob('*'):
|
||||
ar.add(f, arcname='licenses/{}/{}'.format(p, f.name))
|
||||
|
||||
for f in file_list:
|
||||
copy_file_to_python_env(ar, f)
|
||||
|
||||
@@ -61,6 +61,7 @@ args = ap.parse_args()
|
||||
|
||||
executables = ['build/{}/scylla'.format(args.mode),
|
||||
'build/{}/iotune'.format(args.mode),
|
||||
'/usr/bin/patchelf',
|
||||
'/usr/bin/lscpu',
|
||||
'/usr/bin/gawk',
|
||||
'/usr/bin/gzip',
|
||||
@@ -76,6 +77,9 @@ libs = {}
|
||||
for exe in executables:
|
||||
libs.update(ldd(exe))
|
||||
|
||||
# manually add libthread_db for debugging thread
|
||||
libs.update({'libthread_db-1.0.so': '/lib64/libthread_db-1.0.so'})
|
||||
|
||||
ld_so = libs['ld.so']
|
||||
|
||||
have_gnutls = any([lib.startswith('libgnutls.so')
|
||||
@@ -93,56 +97,9 @@ ar = tarfile.open(fileobj=gzip_process.stdin, mode='w|')
|
||||
pathlib.Path('build/SCYLLA-RELOCATABLE-FILE').touch()
|
||||
ar.add('build/SCYLLA-RELOCATABLE-FILE', arcname='SCYLLA-RELOCATABLE-FILE')
|
||||
|
||||
# This thunk is a shell script that arranges for the executable to be invoked,
|
||||
# under the following conditions:
|
||||
#
|
||||
# - the same argument vector is passed to the executable, including argv[0]
|
||||
# - the executable name (/proc/pid/comm, shown in top(1)) is the same
|
||||
# - the dynamic linker is taken from this package rather than the executable's
|
||||
# default (which is hardcoded to point to /lib64/ld-linux-x86_64.so or similar)
|
||||
# - LD_LIBRARY_PATH points to the lib/ directory so shared library dependencies
|
||||
# are satisified from there rather than the system default (e.g. /lib64)
|
||||
|
||||
# To do that, the dynamic linker is invoked using a symbolic link named after the
|
||||
# executable, not its standard name. We use "bash -a" to set argv[0].
|
||||
|
||||
# The full tangled web looks like:
|
||||
#
|
||||
# foobar/bin/scylla a shell script invoking everything
|
||||
# foobar/libexec/scylla.bin the real binary
|
||||
# foobar/libexec/scylla a symlink to ../lib/ld.so
|
||||
# foobar/libreloc/ld.so the dynamic linker
|
||||
# foobar/libreloc/lib... all the other libraries
|
||||
|
||||
# the transformations (done by the thunk and symlinks) are:
|
||||
#
|
||||
# bin/scylla args -> libexec/scylla libexec/scylla.bin args -> lib/ld.so libexec/scylla.bin args
|
||||
|
||||
thunk = b'''\
|
||||
#!/bin/bash
|
||||
|
||||
x="$(readlink -f "$0")"
|
||||
b="$(basename "$x")"
|
||||
d="$(dirname "$x")/.."
|
||||
ldso="$d/libexec/$b"
|
||||
realexe="$d/libexec/$b.bin"
|
||||
export GNUTLS_SYSTEM_PRIORITY_FILE="${GNUTLS_SYSTEM_PRIORITY_FILE-$d/libreloc/gnutls.config}"
|
||||
LD_LIBRARY_PATH="$d/libreloc" exec -a "$0" "$ldso" "$realexe" "$@"
|
||||
'''
|
||||
|
||||
for exe in executables:
|
||||
basename = os.path.basename(exe)
|
||||
ar.add(exe, arcname='libexec/' + basename + '.bin')
|
||||
ti = tarfile.TarInfo(name='bin/' + basename)
|
||||
ti.size = len(thunk)
|
||||
ti.mode = 0o755
|
||||
ti.mtime = os.stat(exe).st_mtime
|
||||
ar.addfile(ti, fileobj=io.BytesIO(thunk))
|
||||
ti = tarfile.TarInfo(name='libexec/' + basename)
|
||||
ti.type = tarfile.SYMTYPE
|
||||
ti.linkname = '../libreloc/ld.so'
|
||||
ti.mtime = os.stat(exe).st_mtime
|
||||
ar.addfile(ti)
|
||||
ar.add(exe, arcname='libexec/' + basename)
|
||||
for lib, libfile in libs.items():
|
||||
ar.add(libfile, arcname='libreloc/' + lib)
|
||||
if have_gnutls:
|
||||
|
||||
@@ -34,7 +34,15 @@ class FilesystemFixup:
|
||||
x="$(readlink -f "$0")"
|
||||
b="$(basename "$x")"
|
||||
d="$(dirname "$x")"
|
||||
PYTHONPATH="${{d}}:${{d}}/libexec:$PYTHONPATH" PATH="${{d}}/{pythonpath}:${{PATH}}" exec -a "$0" "${{d}}/libexec/${{b}}" "$@"
|
||||
CENTOS_SSL_CERT_FILE="/etc/pki/tls/cert.pem"
|
||||
if [ -f "${{CENTOS_SSL_CERT_FILE}}" ]; then
|
||||
c=${{CENTOS_SSL_CERT_FILE}}
|
||||
fi
|
||||
DEBIAN_SSL_CERT_FILE="/etc/ssl/certs/ca-certificates.crt"
|
||||
if [ -f "${{DEBIAN_SSL_CERT_FILE}}" ]; then
|
||||
c=${{DEBIAN_SSL_CERT_FILE}}
|
||||
fi
|
||||
PYTHONPATH="${{d}}:${{d}}/libexec:$PYTHONPATH" PATH="${{d}}/{pythonpath}:${{PATH}}" SSL_CERT_FILE="${{c}}" exec -a "$0" "${{d}}/libexec/${{b}}" "$@"
|
||||
'''
|
||||
self.python_path = python_path
|
||||
self.installroot = installroot
|
||||
|
||||
2
seastar
2
seastar
Submodule seastar updated: 4cdccae53b...75488f6ef2
@@ -30,11 +30,24 @@ using namespace seastar;
|
||||
namespace service {
|
||||
|
||||
class cache_hitrate_calculator : public seastar::async_sharded_service<cache_hitrate_calculator> {
|
||||
struct stat {
|
||||
float h = 0;
|
||||
float m = 0;
|
||||
stat& operator+=(stat& o) {
|
||||
h += o.h;
|
||||
m += o.m;
|
||||
return *this;
|
||||
}
|
||||
};
|
||||
|
||||
seastar::sharded<database>& _db;
|
||||
seastar::sharded<cache_hitrate_calculator>& _me;
|
||||
timer<lowres_clock> _timer;
|
||||
bool _stopped = false;
|
||||
float _diff = 0;
|
||||
std::unordered_map<utils::UUID, stat> _rates;
|
||||
size_t _slen = 0;
|
||||
std::string _gstate;
|
||||
future<> _done = make_ready_future();
|
||||
|
||||
future<lowres_clock::duration> recalculate_hitrates();
|
||||
|
||||
@@ -82,12 +82,16 @@ future<> migration_manager::stop()
|
||||
void migration_manager::init_messaging_service()
|
||||
{
|
||||
auto& ss = service::get_local_storage_service();
|
||||
_feature_listeners.push_back(ss.cluster_supports_view_virtual_columns().when_enabled([this, &ss] {
|
||||
|
||||
auto update_schema = [this, &ss] {
|
||||
with_gate(_background_tasks, [this, &ss] {
|
||||
mlogger.debug("view_virtual_columns feature enabled, recalculating schema version");
|
||||
return update_schema_version(get_storage_proxy(), ss.cluster_schema_features());
|
||||
mlogger.debug("features changed, recalculating schema version");
|
||||
return update_schema_version_and_announce(get_storage_proxy(), ss.cluster_schema_features());
|
||||
});
|
||||
}));
|
||||
};
|
||||
|
||||
_feature_listeners.push_back(ss.cluster_supports_view_virtual_columns().when_enabled(update_schema));
|
||||
_feature_listeners.push_back(ss.cluster_supports_digest_insensitive_to_expiry().when_enabled(update_schema));
|
||||
|
||||
auto& ms = netw::get_local_messaging_service();
|
||||
ms.register_definitions_update([this] (const rpc::client_info& cinfo, std::vector<frozen_mutation> m) {
|
||||
@@ -992,4 +996,22 @@ future<schema_ptr> get_schema_for_write(table_schema_version v, netw::messaging_
|
||||
});
|
||||
}
|
||||
|
||||
future<> migration_manager::sync_schema(const database& db, const std::vector<gms::inet_address>& nodes) {
|
||||
using schema_and_hosts = std::unordered_map<utils::UUID, std::vector<gms::inet_address>>;
|
||||
return do_with(schema_and_hosts(), db.get_version(), [this, &nodes] (schema_and_hosts& schema_map, utils::UUID& my_version) {
|
||||
return parallel_for_each(nodes, [this, &schema_map, &my_version] (const gms::inet_address& node) {
|
||||
return netw::get_messaging_service().local().send_schema_check(netw::msg_addr(node)).then([node, &schema_map, &my_version] (utils::UUID remote_version) {
|
||||
if (my_version != remote_version) {
|
||||
schema_map[remote_version].emplace_back(node);
|
||||
}
|
||||
});
|
||||
}).then([this, &schema_map] {
|
||||
return parallel_for_each(schema_map, [this] (auto& x) {
|
||||
mlogger.debug("Pulling schema {} from {}", x.first, x.second.front());
|
||||
return submit_migration_task(x.second.front());
|
||||
});
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -75,6 +75,9 @@ public:
|
||||
|
||||
future<> submit_migration_task(const gms::inet_address& endpoint);
|
||||
|
||||
// Makes sure that this node knows about all schema changes known by "nodes" that were made prior to this call.
|
||||
future<> sync_schema(const database& db, const std::vector<gms::inet_address>& nodes);
|
||||
|
||||
// Fetches schema from remote node and applies it locally.
|
||||
// Differs from submit_migration_task() in that all errors are propagated.
|
||||
// Coalesces requests.
|
||||
|
||||
@@ -113,16 +113,6 @@ void cache_hitrate_calculator::run_on(size_t master, lowres_clock::duration d) {
|
||||
}
|
||||
|
||||
future<lowres_clock::duration> cache_hitrate_calculator::recalculate_hitrates() {
|
||||
struct stat {
|
||||
float h = 0;
|
||||
float m = 0;
|
||||
stat& operator+=(stat& o) {
|
||||
h += o.h;
|
||||
m += o.m;
|
||||
return *this;
|
||||
}
|
||||
};
|
||||
|
||||
auto non_system_filter = [&] (const std::pair<utils::UUID, lw_shared_ptr<column_family>>& cf) {
|
||||
return _db.local().find_keyspace(cf.second->schema()->ks_name()).get_replication_strategy().get_type() != locator::replication_strategy_type::local;
|
||||
};
|
||||
@@ -144,15 +134,18 @@ future<lowres_clock::duration> cache_hitrate_calculator::recalculate_hitrates()
|
||||
|
||||
return _db.map_reduce0(cf_to_cache_hit_stats, std::unordered_map<utils::UUID, stat>(), sum_stats_per_cf).then([this, non_system_filter] (std::unordered_map<utils::UUID, stat> rates) mutable {
|
||||
_diff = 0;
|
||||
_gstate.reserve(_slen); // assume length did not change from previous iteration
|
||||
_slen = 0;
|
||||
_rates = std::move(rates);
|
||||
// set calculated rates on all shards
|
||||
return _db.invoke_on_all([this, rates = std::move(rates), cpuid = engine().cpu_id(), non_system_filter] (database& db) {
|
||||
sstring gstate;
|
||||
for (auto& cf : db.get_column_families() | boost::adaptors::filtered(non_system_filter)) {
|
||||
auto it = rates.find(cf.first);
|
||||
if (it == rates.end()) { // a table may be added before map/reduce compltes and this code runs
|
||||
continue;
|
||||
return _db.invoke_on_all([this, cpuid = engine().cpu_id(), non_system_filter] (database& db) {
|
||||
return do_for_each(_rates, [this, cpuid, &db] (auto&& r) mutable {
|
||||
auto it = db.get_column_families().find(r.first);
|
||||
if (it == db.get_column_families().end()) { // a table may be added before map/reduce completes and this code runs
|
||||
return;
|
||||
}
|
||||
stat s = it->second;
|
||||
auto& cf = *it;
|
||||
stat& s = r.second;
|
||||
float rate = 0;
|
||||
if (s.h) {
|
||||
rate = s.h / (s.h + s.m);
|
||||
@@ -160,24 +153,25 @@ future<lowres_clock::duration> cache_hitrate_calculator::recalculate_hitrates()
|
||||
if (engine().cpu_id() == cpuid) {
|
||||
// calculate max difference between old rate and new one for all cfs
|
||||
_diff = std::max(_diff, std::abs(float(cf.second->get_global_cache_hit_rate()) - rate));
|
||||
gstate += format("{}.{}:{:f};", cf.second->schema()->ks_name(), cf.second->schema()->cf_name(), rate);
|
||||
_gstate += format("{}.{}:{:0.6f};", cf.second->schema()->ks_name(), cf.second->schema()->cf_name(), rate);
|
||||
}
|
||||
cf.second->set_global_cache_hit_rate(cache_temperature(rate));
|
||||
}
|
||||
if (gstate.size()) {
|
||||
auto& g = gms::get_local_gossiper();
|
||||
auto& ss = get_local_storage_service();
|
||||
return g.add_local_application_state(gms::application_state::CACHE_HITRATES, ss.value_factory.cache_hitrates(std::move(gstate)));
|
||||
}
|
||||
return make_ready_future<>();
|
||||
});
|
||||
});
|
||||
}).then([this] {
|
||||
auto& g = gms::get_local_gossiper();
|
||||
auto& ss = get_local_storage_service();
|
||||
_slen = _gstate.size();
|
||||
g.add_local_application_state(gms::application_state::CACHE_HITRATES, ss.value_factory.cache_hitrates(_gstate));
|
||||
// if max difference during this round is big schedule next recalculate earlier
|
||||
if (_diff < 0.01) {
|
||||
return std::chrono::milliseconds(2000);
|
||||
} else {
|
||||
return std::chrono::milliseconds(500);
|
||||
}
|
||||
}).finally([this] {
|
||||
_gstate = std::string(); // free memory, do not trust clear() to do that for string
|
||||
_rates.clear();
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@@ -481,17 +481,58 @@ public:
|
||||
std::move(targets), pending_endpoints, std::move(dead_endpoints), std::move(tr_state), stats) {
|
||||
register_in_intrusive_list(*p);
|
||||
}
|
||||
~view_update_write_response_handler();
|
||||
private:
|
||||
void register_in_intrusive_list(storage_proxy& p);
|
||||
};
|
||||
|
||||
class storage_proxy::view_update_handlers_list : public bi::list<view_update_write_response_handler, bi::base_hook<view_update_write_response_handler>, bi::constant_time_size<false>> {
|
||||
// _live_iterators holds all iterators that point into the bi:list in the base class of this object.
|
||||
// If we remove a view_update_write_response_handler from the list, and an iterator happens to point
|
||||
// into it, we advance the iterator so it doesn't point at a removed object. See #4912.
|
||||
std::vector<iterator*> _live_iterators;
|
||||
public:
|
||||
view_update_handlers_list() {
|
||||
_live_iterators.reserve(10); // We only expect 1.
|
||||
}
|
||||
void register_live_iterator(iterator* itp) noexcept { // We don't tolerate failure, so abort instead
|
||||
_live_iterators.push_back(itp);
|
||||
}
|
||||
void unregister_live_iterator(iterator* itp) {
|
||||
_live_iterators.erase(boost::remove(_live_iterators, itp), _live_iterators.end());
|
||||
}
|
||||
void update_live_iterators(view_update_write_response_handler* vuwrh) {
|
||||
// vuwrh is being removed from the b::list, so if any live iterator points at it,
|
||||
// move it to the next object (this requires that the list is traversed in the forward
|
||||
// direction).
|
||||
for (auto& itp : _live_iterators) {
|
||||
if (&**itp == vuwrh) {
|
||||
++*itp;
|
||||
}
|
||||
}
|
||||
}
|
||||
class iterator_guard {
|
||||
view_update_handlers_list& _vuhl;
|
||||
iterator* _itp;
|
||||
public:
|
||||
iterator_guard(view_update_handlers_list& vuhl, iterator& it) : _vuhl(vuhl), _itp(&it) {
|
||||
_vuhl.register_live_iterator(_itp);
|
||||
}
|
||||
~iterator_guard() {
|
||||
_vuhl.unregister_live_iterator(_itp);
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
void view_update_write_response_handler::register_in_intrusive_list(storage_proxy& p) {
|
||||
p.get_view_update_handlers_list().push_back(*this);
|
||||
}
|
||||
|
||||
|
||||
view_update_write_response_handler::~view_update_write_response_handler() {
|
||||
_proxy->_view_update_handlers_list->update_live_iterators(this);
|
||||
}
|
||||
|
||||
class datacenter_sync_write_response_handler : public abstract_write_response_handler {
|
||||
struct dc_info {
|
||||
size_t acks;
|
||||
@@ -604,17 +645,21 @@ storage_proxy::response_id_type storage_proxy::register_response_handler(shared_
|
||||
|
||||
void storage_proxy::remove_response_handler(storage_proxy::response_id_type id) {
|
||||
auto entry = _response_handlers.find(id);
|
||||
assert(entry != _response_handlers.end());
|
||||
remove_response_handler_entry(std::move(entry));
|
||||
}
|
||||
|
||||
void storage_proxy::remove_response_handler_entry(response_handlers_map::iterator entry) {
|
||||
entry->second->on_released();
|
||||
_response_handlers.erase(std::move(entry));
|
||||
}
|
||||
|
||||
|
||||
void storage_proxy::got_response(storage_proxy::response_id_type id, gms::inet_address from, std::optional<db::view::update_backlog> backlog) {
|
||||
auto it = _response_handlers.find(id);
|
||||
if (it != _response_handlers.end()) {
|
||||
tracing::trace(it->second->get_trace_state(), "Got a response from /{}", from);
|
||||
if (it->second->response(from)) {
|
||||
remove_response_handler(id); // last one, remove entry. Will cancel expiration timer too.
|
||||
remove_response_handler_entry(std::move(it)); // last one, remove entry. Will cancel expiration timer too.
|
||||
} else {
|
||||
it->second->check_for_early_completion();
|
||||
}
|
||||
@@ -627,7 +672,7 @@ void storage_proxy::got_failure_response(storage_proxy::response_id_type id, gms
|
||||
if (it != _response_handlers.end()) {
|
||||
tracing::trace(it->second->get_trace_state(), "Got {} failures from /{}", count, from);
|
||||
if (it->second->failure_response(from, count)) {
|
||||
remove_response_handler(id);
|
||||
remove_response_handler_entry(std::move(it));
|
||||
} else {
|
||||
it->second->check_for_early_completion();
|
||||
}
|
||||
@@ -1097,6 +1142,22 @@ future<> storage_proxy::mutate_begin(std::vector<unique_response_handler> ids, d
|
||||
std::optional<clock_type::time_point> timeout_opt) {
|
||||
return parallel_for_each(ids, [this, cl, timeout_opt] (unique_response_handler& protected_response) {
|
||||
auto response_id = protected_response.id;
|
||||
// This function, mutate_begin(), is called after a preemption point
|
||||
// so it's possible that other code besides our caller just ran. In
|
||||
// particular, Scylla may have noticed that a remote node went down,
|
||||
// called storage_proxy::on_down(), and removed some of the ongoing
|
||||
// handlers, including this id. If this happens, we need to ignore
|
||||
// this id - not try to look it up or start a send.
|
||||
if (_response_handlers.find(response_id) == _response_handlers.end()) {
|
||||
protected_response.release(); // Don't try to remove this id again
|
||||
// Requests that time-out normally below after response_wait()
|
||||
// result in an exception (see ~abstract_write_response_handler())
|
||||
// However, here we no longer have the handler or its information
|
||||
// to put in the exception. The exception is not needed for
|
||||
// correctness (e.g., hints are written by timeout_cb(), not
|
||||
// because of an exception here).
|
||||
return make_exception_future<>(std::runtime_error("unstarted write cancelled"));
|
||||
}
|
||||
// it is better to send first and hint afterwards to reduce latency
|
||||
// but request may complete before hint_to_dead_endpoints() is called and
|
||||
// response_id handler will be removed, so we will have to do hint with separate
|
||||
@@ -2873,6 +2934,12 @@ storage_proxy::query_partition_key_range_concurrent(storage_proxy::clock_type::t
|
||||
dht::partition_range_vector ranges = ranges_to_vnodes(concurrency_factor);
|
||||
dht::partition_range_vector::iterator i = ranges.begin();
|
||||
|
||||
// query_ranges_to_vnodes_generator can return less results than requested. If the number of results
|
||||
// is small enough or there are a lot of results - concurrentcy_factor which is increased by shifting left can
|
||||
// eventualy zero out resulting in an infinite recursion. This line makes sure that concurrency factor is never
|
||||
// get stuck on 0 and never increased too much if the number of results remains small.
|
||||
concurrency_factor = std::max(size_t(1), ranges.size());
|
||||
|
||||
while (i != ranges.end()) {
|
||||
dht::partition_range& range = *i;
|
||||
std::vector<gms::inet_address> live_endpoints = get_live_sorted_endpoints(ks, end_token(range));
|
||||
@@ -3614,20 +3681,27 @@ void storage_proxy::on_up(const gms::inet_address& endpoint) {};
|
||||
|
||||
void storage_proxy::on_down(const gms::inet_address& endpoint) {
|
||||
assert(thread::running_in_thread());
|
||||
for (auto it = _view_update_handlers_list->begin(); it != _view_update_handlers_list->end(); ++it) {
|
||||
auto it = _view_update_handlers_list->begin();
|
||||
while (it != _view_update_handlers_list->end()) {
|
||||
auto guard = it->shared_from_this();
|
||||
if (it->get_targets().count(endpoint) > 0) {
|
||||
if (it->get_targets().count(endpoint) > 0 && _response_handlers.find(it->id()) != _response_handlers.end()) {
|
||||
it->timeout_cb();
|
||||
}
|
||||
seastar::thread::yield();
|
||||
++it;
|
||||
if (seastar::thread::should_yield()) {
|
||||
view_update_handlers_list::iterator_guard ig{*_view_update_handlers_list, it};
|
||||
seastar::thread::yield();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
future<> storage_proxy::drain_on_shutdown() {
|
||||
return do_with(::shared_ptr<abstract_write_response_handler>(), [this] (::shared_ptr<abstract_write_response_handler>& intrusive_list_guard) {
|
||||
return do_for_each(*_view_update_handlers_list, [&intrusive_list_guard] (abstract_write_response_handler& handler) {
|
||||
intrusive_list_guard = handler.shared_from_this();
|
||||
handler.timeout_cb();
|
||||
return do_for_each(*_view_update_handlers_list, [this, &intrusive_list_guard] (abstract_write_response_handler& handler) {
|
||||
if (_response_handlers.find(handler.id()) != _response_handlers.end()) {
|
||||
intrusive_list_guard = handler.shared_from_this();
|
||||
handler.timeout_cb();
|
||||
}
|
||||
});
|
||||
}).then([this] {
|
||||
return _hints_resource_manager.stop();
|
||||
|
||||
@@ -79,6 +79,7 @@ namespace service {
|
||||
class abstract_write_response_handler;
|
||||
class abstract_read_executor;
|
||||
class mutation_holder;
|
||||
class view_update_write_response_handler;
|
||||
|
||||
using replicas_per_token_range = std::unordered_map<dht::token_range, std::vector<utils::UUID>>;
|
||||
|
||||
@@ -131,6 +132,7 @@ private:
|
||||
~unique_response_handler();
|
||||
response_id_type release();
|
||||
};
|
||||
using response_handlers_map = std::unordered_map<response_id_type, ::shared_ptr<abstract_write_response_handler>>;
|
||||
|
||||
public:
|
||||
static const sstring COORDINATOR_STATS_CATEGORY;
|
||||
@@ -178,7 +180,7 @@ public:
|
||||
private:
|
||||
distributed<database>& _db;
|
||||
response_id_type _next_response_id;
|
||||
std::unordered_map<response_id_type, ::shared_ptr<abstract_write_response_handler>> _response_handlers;
|
||||
response_handlers_map _response_handlers;
|
||||
// This buffer hold ids of throttled writes in case resource consumption goes
|
||||
// below the threshold and we want to unthrottle some of them. Without this throttled
|
||||
// request with dead or slow replica may wait for up to timeout ms before replying
|
||||
@@ -220,6 +222,7 @@ private:
|
||||
coordinator_query_options optional_params);
|
||||
response_id_type register_response_handler(shared_ptr<abstract_write_response_handler>&& h);
|
||||
void remove_response_handler(response_id_type id);
|
||||
void remove_response_handler_entry(response_handlers_map::iterator entry);
|
||||
void got_response(response_id_type id, gms::inet_address from, std::optional<db::view::update_backlog> backlog);
|
||||
void got_failure_response(response_id_type id, gms::inet_address from, size_t count, std::optional<db::view::update_backlog> backlog);
|
||||
future<> response_wait(response_id_type id, clock_type::time_point timeout);
|
||||
@@ -457,6 +460,7 @@ public:
|
||||
friend class abstract_write_response_handler;
|
||||
friend class speculating_read_executor;
|
||||
friend class view_update_backlog_broker;
|
||||
friend class view_update_write_response_handler;
|
||||
};
|
||||
|
||||
extern distributed<storage_proxy> _the_storage_proxy;
|
||||
|
||||
@@ -110,6 +110,7 @@ static const sstring TRUNCATION_TABLE = "TRUNCATION_TABLE";
|
||||
static const sstring CORRECT_STATIC_COMPACT_IN_MC = "CORRECT_STATIC_COMPACT_IN_MC";
|
||||
static const sstring UNBOUNDED_RANGE_TOMBSTONES_FEATURE = "UNBOUNDED_RANGE_TOMBSTONES";
|
||||
static const sstring VIEW_VIRTUAL_COLUMNS = "VIEW_VIRTUAL_COLUMNS";
|
||||
static const sstring DIGEST_INSENSITIVE_TO_EXPIRY = "DIGEST_INSENSITIVE_TO_EXPIRY";
|
||||
|
||||
static const sstring SSTABLE_FORMAT_PARAM_NAME = "sstable_format";
|
||||
|
||||
@@ -162,6 +163,7 @@ storage_service::storage_service(distributed<database>& db, gms::gossiper& gossi
|
||||
, _correct_static_compact_in_mc(_feature_service, CORRECT_STATIC_COMPACT_IN_MC)
|
||||
, _unbounded_range_tombstones_feature(_feature_service, UNBOUNDED_RANGE_TOMBSTONES_FEATURE)
|
||||
, _view_virtual_columns(_feature_service, VIEW_VIRTUAL_COLUMNS)
|
||||
, _digest_insensitive_to_expiry(_feature_service, DIGEST_INSENSITIVE_TO_EXPIRY)
|
||||
, _la_feature_listener(*this, _feature_listeners_sem, sstables::sstable_version_types::la)
|
||||
, _mc_feature_listener(*this, _feature_listeners_sem, sstables::sstable_version_types::mc)
|
||||
, _replicate_action([this] { return do_replicate_to_all_cores(); })
|
||||
@@ -208,6 +210,7 @@ void storage_service::enable_all_features() {
|
||||
std::ref(_correct_static_compact_in_mc),
|
||||
std::ref(_unbounded_range_tombstones_feature),
|
||||
std::ref(_view_virtual_columns),
|
||||
std::ref(_digest_insensitive_to_expiry),
|
||||
})
|
||||
{
|
||||
if (features.count(f.name())) {
|
||||
@@ -311,6 +314,7 @@ std::set<sstring> storage_service::get_config_supported_features_set() {
|
||||
TRUNCATION_TABLE,
|
||||
CORRECT_STATIC_COMPACT_IN_MC,
|
||||
VIEW_VIRTUAL_COLUMNS,
|
||||
DIGEST_INSENSITIVE_TO_EXPIRY,
|
||||
};
|
||||
|
||||
// Do not respect config in the case database is not started
|
||||
@@ -479,6 +483,14 @@ void storage_service::prepare_to_join(std::vector<inet_address> loaded_endpoints
|
||||
}
|
||||
}
|
||||
|
||||
// If this is a restarting node, we should update tokens before gossip starts
|
||||
auto my_tokens = db::system_keyspace::get_saved_tokens().get0();
|
||||
bool restarting_normal_node = db::system_keyspace::bootstrap_complete() && !db().local().is_replacing() && !my_tokens.empty();
|
||||
if (restarting_normal_node) {
|
||||
slogger.info("Restarting a node in NORMAL status");
|
||||
_token_metadata.update_normal_tokens(my_tokens, get_broadcast_address());
|
||||
}
|
||||
|
||||
// have to start the gossip service before we can see any info on other nodes. this is necessary
|
||||
// for bootstrap to get the load info it needs.
|
||||
// (we won't be part of the storage ring though until we add a counterId to our state, below.)
|
||||
@@ -489,6 +501,12 @@ void storage_service::prepare_to_join(std::vector<inet_address> loaded_endpoints
|
||||
}).get();
|
||||
auto features = get_config_supported_features();
|
||||
_token_metadata.update_host_id(local_host_id, get_broadcast_address());
|
||||
|
||||
// Replicate the tokens early because once gossip runs other nodes
|
||||
// might send reads/writes to this node. Replicate it early to make
|
||||
// sure the tokens are valid on all the shards.
|
||||
replicate_to_all_cores().get();
|
||||
|
||||
auto broadcast_rpc_address = utils::fb_utilities::get_broadcast_rpc_address();
|
||||
auto& proxy = service::get_storage_proxy();
|
||||
// Ensure we know our own actual Schema UUID in preparation for updates
|
||||
@@ -503,6 +521,10 @@ void storage_service::prepare_to_join(std::vector<inet_address> loaded_endpoints
|
||||
app_states.emplace(gms::application_state::RPC_READY, value_factory.cql_ready(false));
|
||||
app_states.emplace(gms::application_state::VIEW_BACKLOG, versioned_value(""));
|
||||
app_states.emplace(gms::application_state::SCHEMA, value_factory.schema(schema_version));
|
||||
if (restarting_normal_node) {
|
||||
app_states.emplace(gms::application_state::TOKENS, value_factory.tokens(my_tokens));
|
||||
app_states.emplace(gms::application_state::STATUS, value_factory.normal(my_tokens));
|
||||
}
|
||||
slogger.info("Starting up server gossip");
|
||||
|
||||
_gossiper.register_(this->shared_from_this());
|
||||
@@ -811,6 +833,7 @@ void storage_service::bootstrap(std::unordered_set<token> tokens) {
|
||||
} else {
|
||||
// Dont set any state for the node which is bootstrapping the existing token...
|
||||
_token_metadata.update_normal_tokens(tokens, get_broadcast_address());
|
||||
replicate_to_all_cores().get();
|
||||
auto replace_addr = db().local().get_replace_address();
|
||||
if (replace_addr) {
|
||||
slogger.debug("Removing replaced endpoint {} from system.peers", *replace_addr);
|
||||
@@ -1581,6 +1604,7 @@ future<> storage_service::init_server(int delay, bind_messaging_port do_bind) {
|
||||
auto tokens = db::system_keyspace::get_saved_tokens().get0();
|
||||
if (!tokens.empty()) {
|
||||
_token_metadata.update_normal_tokens(tokens, get_broadcast_address());
|
||||
replicate_to_all_cores().get();
|
||||
// order is important here, the gossiper can fire in between adding these two states. It's ok to send TOKENS without STATUS, but *not* vice versa.
|
||||
_gossiper.add_local_application_state({
|
||||
{ gms::application_state::TOKENS, value_factory.tokens(tokens) },
|
||||
@@ -3479,6 +3503,7 @@ void storage_service::notify_cql_change(inet_address endpoint, bool ready)
|
||||
db::schema_features storage_service::cluster_schema_features() const {
|
||||
db::schema_features f;
|
||||
f.set_if<db::schema_feature::VIEW_VIRTUAL_COLUMNS>(bool(_view_virtual_columns));
|
||||
f.set_if<db::schema_feature::DIGEST_INSENSITIVE_TO_EXPIRY>(bool(_digest_insensitive_to_expiry));
|
||||
return f;
|
||||
}
|
||||
|
||||
|
||||
@@ -323,6 +323,7 @@ private:
|
||||
gms::feature _correct_static_compact_in_mc;
|
||||
gms::feature _unbounded_range_tombstones_feature;
|
||||
gms::feature _view_virtual_columns;
|
||||
gms::feature _digest_insensitive_to_expiry;
|
||||
|
||||
sstables::sstable_version_types _sstables_format = sstables::sstable_version_types::ka;
|
||||
seastar::semaphore _feature_listeners_sem = {1};
|
||||
@@ -2338,6 +2339,9 @@ public:
|
||||
const gms::feature& cluster_supports_view_virtual_columns() const {
|
||||
return _view_virtual_columns;
|
||||
}
|
||||
const gms::feature& cluster_supports_digest_insensitive_to_expiry() const {
|
||||
return _digest_insensitive_to_expiry;
|
||||
}
|
||||
// Returns schema features which all nodes in the cluster advertise as supported.
|
||||
db::schema_features cluster_schema_features() const;
|
||||
private:
|
||||
|
||||
@@ -104,16 +104,6 @@ static bool belongs_to_current_node(const dht::token& t, const dht::token_range_
|
||||
return false;
|
||||
}
|
||||
|
||||
static void delete_sstables_for_interrupted_compaction(std::vector<shared_sstable>& new_sstables, sstring& ks, sstring& cf) {
|
||||
// Delete either partially or fully written sstables of a compaction that
|
||||
// was either stopped abruptly (e.g. out of disk space) or deliberately
|
||||
// (e.g. nodetool stop COMPACTION).
|
||||
for (auto& sst : new_sstables) {
|
||||
clogger.debug("Deleting sstable {} of interrupted compaction for {}.{}", sst->get_filename(), ks, cf);
|
||||
sst->mark_for_deletion();
|
||||
}
|
||||
}
|
||||
|
||||
static std::vector<shared_sstable> get_uncompacting_sstables(column_family& cf, std::vector<shared_sstable> sstables) {
|
||||
auto all_sstables = boost::copy_range<std::vector<shared_sstable>>(*cf.get_sstables_including_compacted_undeleted());
|
||||
boost::sort(all_sstables, [] (const shared_sstable& x, const shared_sstable& y) {
|
||||
@@ -317,6 +307,9 @@ protected:
|
||||
column_family& _cf;
|
||||
schema_ptr _schema;
|
||||
std::vector<shared_sstable> _sstables;
|
||||
// Unused sstables are tracked because if compaction is interrupted we can only delete them.
|
||||
// Deleting used sstables could potentially result in data loss.
|
||||
std::vector<shared_sstable> _new_unused_sstables;
|
||||
lw_shared_ptr<sstable_set> _compacting;
|
||||
uint64_t _max_sstable_size;
|
||||
uint32_t _sstable_level;
|
||||
@@ -347,6 +340,7 @@ protected:
|
||||
|
||||
void setup_new_sstable(shared_sstable& sst) {
|
||||
_info->new_sstables.push_back(sst);
|
||||
_new_unused_sstables.push_back(sst);
|
||||
sst->get_metadata_collector().set_replay_position(_rp);
|
||||
sst->get_metadata_collector().sstable_level(_sstable_level);
|
||||
for (auto ancestor : _ancestors) {
|
||||
@@ -488,6 +482,16 @@ private:
|
||||
const schema_ptr& schema() const {
|
||||
return _schema;
|
||||
}
|
||||
|
||||
void delete_sstables_for_interrupted_compaction() {
|
||||
// Delete either partially or fully written sstables of a compaction that
|
||||
// was either stopped abruptly (e.g. out of disk space) or deliberately
|
||||
// (e.g. nodetool stop COMPACTION).
|
||||
for (auto& sst : _new_unused_sstables) {
|
||||
clogger.debug("Deleting sstable {} of interrupted compaction for {}.{}", sst->get_filename(), _info->ks_name, _info->cf_name);
|
||||
sst->mark_for_deletion();
|
||||
}
|
||||
}
|
||||
public:
|
||||
static future<compaction_info> run(std::unique_ptr<compaction> c);
|
||||
|
||||
@@ -521,7 +525,6 @@ void compacting_sstable_writer::consume_end_of_stream() {
|
||||
class regular_compaction : public compaction {
|
||||
std::function<shared_sstable()> _creator;
|
||||
replacer_fn _replacer;
|
||||
std::vector<shared_sstable> _unreplaced_new_tables;
|
||||
std::unordered_set<shared_sstable> _compacting_for_max_purgeable_func;
|
||||
// store a clone of sstable set for column family, which needs to be alive for incremental selector.
|
||||
sstable_set _set;
|
||||
@@ -625,8 +628,6 @@ private:
|
||||
}
|
||||
|
||||
void maybe_replace_exhausted_sstables() {
|
||||
_unreplaced_new_tables.push_back(_sst);
|
||||
|
||||
// Replace exhausted sstable(s), if any, by new one(s) in the column family.
|
||||
auto not_exhausted = [s = _schema, &dk = _sst->get_last_decorated_key()] (shared_sstable& sst) {
|
||||
return sst->get_last_decorated_key().tri_compare(*s, dk) > 0;
|
||||
@@ -668,7 +669,7 @@ private:
|
||||
_compacting->erase(sst);
|
||||
_monitor_generator.remove_sstable(_info->tracking, sst);
|
||||
});
|
||||
_replacer(std::vector<shared_sstable>(exhausted, _sstables.end()), std::move(_unreplaced_new_tables));
|
||||
_replacer(std::vector<shared_sstable>(exhausted, _sstables.end()), std::move(_new_unused_sstables));
|
||||
_sstables.erase(exhausted, _sstables.end());
|
||||
}
|
||||
}
|
||||
@@ -677,7 +678,7 @@ private:
|
||||
if (!_sstables.empty()) {
|
||||
std::vector<shared_sstable> sstables_compacted;
|
||||
std::move(_sstables.begin(), _sstables.end(), std::back_inserter(sstables_compacted));
|
||||
_replacer(std::move(sstables_compacted), std::move(_unreplaced_new_tables));
|
||||
_replacer(std::move(sstables_compacted), std::move(_new_unused_sstables));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -877,7 +878,7 @@ future<compaction_info> compaction::run(std::unique_ptr<compaction> c) {
|
||||
auto r = std::move(reader);
|
||||
r.consume_in_thread(std::move(cfc), c->filter_func(), db::no_timeout);
|
||||
} catch (...) {
|
||||
delete_sstables_for_interrupted_compaction(c->_info->new_sstables, c->_info->ks_name, c->_info->cf_name);
|
||||
c->delete_sstables_for_interrupted_compaction();
|
||||
c = nullptr; // make sure writers are stopped while running in thread context
|
||||
throw;
|
||||
}
|
||||
|
||||
@@ -639,7 +639,7 @@ future<> compaction_manager::perform_sstable_upgrade(column_family* cf, bool exc
|
||||
return cf->run_with_compaction_disabled([this, cf, &tables, exclude_current_version] {
|
||||
auto last_version = get_highest_supported_format();
|
||||
|
||||
for (auto& sst : *(cf->get_sstables())) {
|
||||
for (auto& sst : cf->candidates_for_compaction()) {
|
||||
// if we are a "normal" upgrade, we only care about
|
||||
// tables with older versions, but potentially
|
||||
// we are to actually rewrite everything. (-a)
|
||||
|
||||
@@ -479,11 +479,6 @@ public:
|
||||
auto itw = writes_per_window.find(bound);
|
||||
if (itw != writes_per_window.end()) {
|
||||
ow_this_window = &itw->second;
|
||||
// We will erase here so we can keep track of which
|
||||
// writes belong to existing windows. Writes that don't belong to any window
|
||||
// are writes in progress to new windows and will be accounted in the final
|
||||
// loop before we return
|
||||
writes_per_window.erase(itw);
|
||||
}
|
||||
auto* oc_this_window = &no_oc;
|
||||
auto itc = compactions_per_window.find(bound);
|
||||
@@ -491,6 +486,13 @@ public:
|
||||
oc_this_window = &itc->second;
|
||||
}
|
||||
b += windows.second.backlog(*ow_this_window, *oc_this_window);
|
||||
if (itw != writes_per_window.end()) {
|
||||
// We will erase here so we can keep track of which
|
||||
// writes belong to existing windows. Writes that don't belong to any window
|
||||
// are writes in progress to new windows and will be accounted in the final
|
||||
// loop before we return
|
||||
writes_per_window.erase(itw);
|
||||
}
|
||||
}
|
||||
|
||||
// Partial writes that don't belong to any window are accounted here.
|
||||
|
||||
@@ -380,9 +380,17 @@ private:
|
||||
}
|
||||
|
||||
return do_with(std::make_unique<reader>(_sstable, _pc, position, end, quantity), [this, summary_idx] (auto& entries_reader) {
|
||||
return entries_reader->_context.consume_input().then([this, summary_idx, &entries_reader] {
|
||||
return entries_reader->_context.consume_input().then_wrapped([this, summary_idx, &entries_reader] (future<> f) {
|
||||
std::exception_ptr ex;
|
||||
if (f.failed()) {
|
||||
ex = f.get_exception();
|
||||
sstlog.error("failed reading index for {}: {}", _sstable->get_filename(), ex);
|
||||
}
|
||||
auto indexes = std::move(entries_reader->_consumer.indexes);
|
||||
return entries_reader->_context.close().then([indexes = std::move(indexes)] () mutable {
|
||||
return entries_reader->_context.close().then([indexes = std::move(indexes), ex = std::move(ex)] () mutable {
|
||||
if (ex) {
|
||||
std::rethrow_exception(std::move(ex));
|
||||
}
|
||||
return std::move(indexes);
|
||||
});
|
||||
|
||||
|
||||
@@ -29,6 +29,7 @@
|
||||
#include "sstables/mc/types.hh"
|
||||
#include "db/config.hh"
|
||||
#include "atomic_cell.hh"
|
||||
#include "utils/exceptions.hh"
|
||||
|
||||
#include <functional>
|
||||
#include <boost/iterator/iterator_facade.hpp>
|
||||
@@ -533,7 +534,7 @@ private:
|
||||
shard_id _shard; // Specifies which shard the new SStable will belong to.
|
||||
bool _compression_enabled = false;
|
||||
std::unique_ptr<file_writer> _data_writer;
|
||||
std::optional<file_writer> _index_writer;
|
||||
std::unique_ptr<file_writer> _index_writer;
|
||||
bool _tombstone_written = false;
|
||||
bool _static_row_written = false;
|
||||
// The length of partition header (partition key, partition deletion and static row, if present)
|
||||
@@ -592,6 +593,10 @@ private:
|
||||
bool _write_regular_as_static; // See #4139
|
||||
|
||||
void init_file_writers();
|
||||
|
||||
// Returns the closed writer
|
||||
std::unique_ptr<file_writer> close_writer(std::unique_ptr<file_writer>& w);
|
||||
|
||||
void close_data_writer();
|
||||
void ensure_tombstone_is_written() {
|
||||
if (!_tombstone_written) {
|
||||
@@ -676,7 +681,7 @@ private:
|
||||
|
||||
// Writes single atomic cell
|
||||
void write_cell(bytes_ostream& writer, const clustering_key_prefix* clustering_key, atomic_cell_view cell, const column_definition& cdef,
|
||||
const row_time_properties& properties, bytes_view cell_path = {});
|
||||
const row_time_properties& properties, std::optional<bytes_view> cell_path = {});
|
||||
|
||||
// Writes information about row liveness (formerly 'row marker')
|
||||
void write_liveness_info(bytes_ostream& writer, const row_marker& marker);
|
||||
@@ -836,13 +841,17 @@ void writer::init_file_writers() {
|
||||
&_sst._components->compression,
|
||||
_schema.get_compressor_params()));
|
||||
}
|
||||
_index_writer.emplace(std::move(_sst._index_file), options);
|
||||
_index_writer = std::make_unique<file_writer>(std::move(_sst._index_file), options);
|
||||
}
|
||||
|
||||
std::unique_ptr<file_writer> writer::close_writer(std::unique_ptr<file_writer>& w) {
|
||||
auto writer = std::move(w);
|
||||
writer->close();
|
||||
return writer;
|
||||
}
|
||||
|
||||
void writer::close_data_writer() {
|
||||
auto writer = std::move(_data_writer);
|
||||
writer->close();
|
||||
|
||||
auto writer = close_writer(_data_writer);
|
||||
if (!_compression_enabled) {
|
||||
auto chksum_wr = static_cast<crc32_checksummed_file_writer*>(writer.get());
|
||||
_sst.write_digest(chksum_wr->full_checksum());
|
||||
@@ -970,7 +979,7 @@ void writer::consume(tombstone t) {
|
||||
}
|
||||
|
||||
void writer::write_cell(bytes_ostream& writer, const clustering_key_prefix* clustering_key, atomic_cell_view cell,
|
||||
const column_definition& cdef, const row_time_properties& properties, bytes_view cell_path) {
|
||||
const column_definition& cdef, const row_time_properties& properties, std::optional<bytes_view> cell_path) {
|
||||
|
||||
uint64_t current_pos = writer.size();
|
||||
bool is_deleted = !cell.is_live();
|
||||
@@ -983,7 +992,7 @@ void writer::write_cell(bytes_ostream& writer, const clustering_key_prefix* clus
|
||||
properties.local_deletion_time == cell.deletion_time();
|
||||
|
||||
cell_flags flags = cell_flags::none;
|
||||
if (!has_value) {
|
||||
if ((!has_value && !cdef.is_counter()) || is_deleted) {
|
||||
flags |= cell_flags::has_empty_value_mask;
|
||||
}
|
||||
if (is_deleted) {
|
||||
@@ -1012,20 +1021,22 @@ void writer::write_cell(bytes_ostream& writer, const clustering_key_prefix* clus
|
||||
}
|
||||
}
|
||||
|
||||
if (!cell_path.empty()) {
|
||||
write_vint(writer, cell_path.size());
|
||||
write(_sst.get_version(), writer, cell_path);
|
||||
if (bool(cell_path)) {
|
||||
write_vint(writer, cell_path->size());
|
||||
write(_sst.get_version(), writer, *cell_path);
|
||||
}
|
||||
|
||||
if (has_value) {
|
||||
if (cdef.is_counter()) {
|
||||
if (cdef.is_counter()) {
|
||||
if (!is_deleted) {
|
||||
assert(!cell.is_counter_update());
|
||||
counter_cell_view::with_linearized(cell, [&] (counter_cell_view ccv) {
|
||||
write_counter_value(ccv, writer, sstable_version_types::mc, [] (bytes_ostream& out, uint32_t value) {
|
||||
return write_vint(out, value);
|
||||
});
|
||||
});
|
||||
} else {
|
||||
}
|
||||
} else {
|
||||
if (has_value) {
|
||||
write_cell_value(writer, *cdef.type, cell.value());
|
||||
}
|
||||
}
|
||||
@@ -1370,10 +1381,15 @@ stop_iteration writer::consume_end_of_partition() {
|
||||
_first_key = *_partition_key;
|
||||
}
|
||||
_last_key = std::move(*_partition_key);
|
||||
_partition_key = std::nullopt;
|
||||
return get_data_offset() < _cfg.max_sstable_size ? stop_iteration::no : stop_iteration::yes;
|
||||
}
|
||||
|
||||
void writer::consume_end_of_stream() {
|
||||
if (_partition_key) {
|
||||
on_internal_error(sstlog, "Mutation stream ends with unclosed partition during write");
|
||||
}
|
||||
|
||||
_cfg.monitor->on_data_write_completed();
|
||||
|
||||
seal_summary(_sst._components->summary, std::move(_first_key), std::move(_last_key), _index_sampling_state);
|
||||
@@ -1382,8 +1398,7 @@ void writer::consume_end_of_stream() {
|
||||
_sst.get_metadata_collector().add_compression_ratio(_sst._components->compression.compressed_file_length(), _sst._components->compression.uncompressed_file_length());
|
||||
}
|
||||
|
||||
_index_writer->close();
|
||||
_index_writer.reset();
|
||||
close_writer(_index_writer);
|
||||
_sst.set_first_and_last_keys();
|
||||
|
||||
_sst._components->statistics.contents[metadata_type::Serialization] = std::make_unique<serialization_header>(std::move(_sst_schema.header));
|
||||
|
||||
@@ -44,6 +44,14 @@ namespace sstables {
|
||||
atomic_cell make_counter_cell(api::timestamp_type timestamp, bytes_view value) {
|
||||
static constexpr size_t shard_size = 32;
|
||||
|
||||
if (value.empty()) {
|
||||
// This will never happen in a correct MC sstable but
|
||||
// we had a bug #4363 that caused empty counters
|
||||
// to be incorrectly stored inside sstables.
|
||||
counter_cell_builder ccb;
|
||||
return ccb.build(timestamp);
|
||||
}
|
||||
|
||||
data_input in(value);
|
||||
|
||||
auto header_size = in.read<int16_t>();
|
||||
@@ -59,8 +67,6 @@ atomic_cell make_counter_cell(api::timestamp_type timestamp, bytes_view value) {
|
||||
throw marshal_exception("encountered remote shards in a counter cell");
|
||||
}
|
||||
|
||||
std::vector<counter_shard> shards;
|
||||
shards.reserve(shard_count);
|
||||
counter_cell_builder ccb(shard_count);
|
||||
for (auto i = 0u; i < shard_count; i++) {
|
||||
auto id_hi = in.read<int64_t>();
|
||||
|
||||
@@ -703,9 +703,12 @@ public:
|
||||
// Sets streamed_mutation::_end_of_range when there are no more fragments for the query range.
|
||||
// Returns information whether the parser should continue to parse more
|
||||
// input and produce more fragments or we have collected enough and should yield.
|
||||
// Returns proceed:yes only when all pending fragments have been pushed.
|
||||
proceed push_ready_fragments() {
|
||||
if (_ready) {
|
||||
return push_ready_fragments_with_ready_set();
|
||||
if (push_ready_fragments_with_ready_set() == proceed::no) {
|
||||
return proceed::no;
|
||||
}
|
||||
}
|
||||
|
||||
if (_out_of_range) {
|
||||
|
||||
@@ -1060,9 +1060,26 @@ void sstable::write_simple(const T& component, const io_priority_class& pc) {
|
||||
options.buffer_size = sstable_buffer_size;
|
||||
options.io_priority_class = pc;
|
||||
auto w = file_writer(std::move(f), std::move(options));
|
||||
write(_version, w, component);
|
||||
w.flush();
|
||||
w.close();
|
||||
std::exception_ptr eptr;
|
||||
try {
|
||||
write(_version, w, component);
|
||||
w.flush();
|
||||
} catch (...) {
|
||||
eptr = std::current_exception();
|
||||
}
|
||||
try {
|
||||
w.close();
|
||||
} catch (...) {
|
||||
std::exception_ptr close_eptr = std::current_exception();
|
||||
sstlog.warn("failed to close file_writer: {}", close_eptr);
|
||||
// If write succeeded but close failed, we rethrow close's exception.
|
||||
if (!eptr) {
|
||||
eptr = close_eptr;
|
||||
}
|
||||
}
|
||||
if (eptr) {
|
||||
std::rethrow_exception(eptr);
|
||||
}
|
||||
}
|
||||
|
||||
template future<> sstable::read_simple<component_type::Filter>(sstables::filter& f, const io_priority_class& pc);
|
||||
@@ -2133,11 +2150,15 @@ stop_iteration components_writer::consume_end_of_partition() {
|
||||
_first_key = *_partition_key;
|
||||
}
|
||||
_last_key = std::move(*_partition_key);
|
||||
_partition_key = std::nullopt;
|
||||
|
||||
return get_offset() < _max_sstable_size ? stop_iteration::no : stop_iteration::yes;
|
||||
}
|
||||
|
||||
void components_writer::consume_end_of_stream() {
|
||||
if (_partition_key) {
|
||||
on_internal_error(sstlog, "Mutation stream ends with unclosed partition during write");
|
||||
}
|
||||
// what if there is only one partition? what if it is empty?
|
||||
seal_summary(_sst._components->summary, std::move(_first_key), std::move(_last_key), _index_sampling_state);
|
||||
|
||||
@@ -3053,6 +3074,56 @@ std::optional<std::pair<uint64_t, uint64_t>> sstable::get_sample_indexes_for_ran
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a pair of positions [p1, p2) in the summary file corresponding to
|
||||
* pages which may include keys covered by the specified range, or a disengaged
|
||||
* optional if the sstable does not include any keys from the range.
|
||||
*/
|
||||
std::optional<std::pair<uint64_t, uint64_t>> sstable::get_index_pages_for_range(const dht::token_range& range) {
|
||||
const auto& entries = _components->summary.entries;
|
||||
auto entries_size = entries.size();
|
||||
index_comparator cmp(*_schema);
|
||||
dht::ring_position_comparator rp_cmp(*_schema);
|
||||
uint64_t left = 0;
|
||||
if (range.start()) {
|
||||
dht::ring_position_view pos = range.start()->is_inclusive()
|
||||
? dht::ring_position_view::starting_at(range.start()->value())
|
||||
: dht::ring_position_view::ending_at(range.start()->value());
|
||||
|
||||
// There is no summary entry for the last key, so in order to determine
|
||||
// if pos overlaps with the sstable or not we have to compare with the
|
||||
// last key.
|
||||
if (rp_cmp(pos, get_last_decorated_key()) > 0) {
|
||||
// left is past the end of the sampling.
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
left = std::distance(std::begin(entries),
|
||||
std::lower_bound(entries.begin(), entries.end(), pos, cmp));
|
||||
|
||||
if (left) {
|
||||
--left;
|
||||
}
|
||||
}
|
||||
uint64_t right = entries_size;
|
||||
if (range.end()) {
|
||||
dht::ring_position_view pos = range.end()->is_inclusive()
|
||||
? dht::ring_position_view::ending_at(range.end()->value())
|
||||
: dht::ring_position_view::starting_at(range.end()->value());
|
||||
|
||||
right = std::distance(std::begin(entries),
|
||||
std::lower_bound(entries.begin(), entries.end(), pos, cmp));
|
||||
if (right == 0) {
|
||||
// The first key is strictly greater than right.
|
||||
return std::nullopt;
|
||||
}
|
||||
}
|
||||
if (left < right) {
|
||||
return std::optional<std::pair<uint64_t, uint64_t>>(std::in_place_t(), left, right);
|
||||
}
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
std::vector<dht::decorated_key> sstable::get_key_samples(const schema& s, const dht::token_range& range) {
|
||||
auto index_range = get_sample_indexes_for_range(range);
|
||||
std::vector<dht::decorated_key> res;
|
||||
@@ -3066,10 +3137,15 @@ std::vector<dht::decorated_key> sstable::get_key_samples(const schema& s, const
|
||||
}
|
||||
|
||||
uint64_t sstable::estimated_keys_for_range(const dht::token_range& range) {
|
||||
auto sample_index_range = get_sample_indexes_for_range(range);
|
||||
uint64_t sample_key_count = sample_index_range ? sample_index_range->second - sample_index_range->first : 0;
|
||||
// adjust for the current sampling level
|
||||
uint64_t estimated_keys = sample_key_count * ((downsampling::BASE_SAMPLING_LEVEL * _components->summary.header.min_index_interval) / _components->summary.header.sampling_level);
|
||||
auto page_range = get_index_pages_for_range(range);
|
||||
if (!page_range) {
|
||||
return 0;
|
||||
}
|
||||
using uint128_t = unsigned __int128;
|
||||
uint64_t range_pages = page_range->second - page_range->first;
|
||||
auto total_keys = get_estimated_key_count();
|
||||
auto total_pages = _components->summary.entries.size();
|
||||
uint64_t estimated_keys = (uint128_t)range_pages * total_keys / total_pages;
|
||||
return std::max(uint64_t(1), estimated_keys);
|
||||
}
|
||||
|
||||
@@ -3146,7 +3222,7 @@ sstable::unlink()
|
||||
});
|
||||
|
||||
name = get_filename();
|
||||
auto update_large_data_fut = get_large_data_handler().maybe_delete_large_data_entries(*get_schema(), std::move(name), data_size())
|
||||
auto update_large_data_fut = get_large_data_handler().maybe_delete_large_data_entries(*get_schema(), name, data_size())
|
||||
.then_wrapped([name = std::move(name)] (future<> f) {
|
||||
if (f.failed()) {
|
||||
// Just log and ignore failures to delete large data entries.
|
||||
|
||||
@@ -655,6 +655,7 @@ private:
|
||||
composite::eoc marker = composite::eoc::none);
|
||||
|
||||
std::optional<std::pair<uint64_t, uint64_t>> get_sample_indexes_for_range(const dht::token_range& range);
|
||||
std::optional<std::pair<uint64_t, uint64_t>> get_index_pages_for_range(const dht::token_range& range);
|
||||
|
||||
std::vector<unsigned> compute_shards_for_this_sstable() const;
|
||||
template <typename Components>
|
||||
|
||||
@@ -458,7 +458,8 @@ enum sstable_feature : uint8_t {
|
||||
NonCompoundRangeTombstones = 1, // See #2986
|
||||
ShadowableTombstones = 2, // See #3885
|
||||
CorrectStaticCompact = 3, // See #4139
|
||||
End = 4,
|
||||
CorrectEmptyCounters = 4, // See #4363
|
||||
End = 5,
|
||||
};
|
||||
|
||||
// Scylla-specific features enabled for a particular sstable.
|
||||
|
||||
33
streaming/stream_mutation_fragments_cmd.hh
Normal file
33
streaming/stream_mutation_fragments_cmd.hh
Normal file
@@ -0,0 +1,33 @@
|
||||
/*
|
||||
* Copyright (C) 2019 ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file is part of Scylla.
|
||||
*
|
||||
* Scylla is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Scylla is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
namespace streaming {
|
||||
|
||||
enum class stream_mutation_fragments_cmd : uint8_t {
|
||||
error,
|
||||
mutation_fragment_data,
|
||||
end_of_stream,
|
||||
};
|
||||
|
||||
|
||||
}
|
||||
@@ -65,6 +65,7 @@
|
||||
#include <boost/algorithm/cxx11/any_of.hpp>
|
||||
#include <boost/range/adaptor/map.hpp>
|
||||
#include "../db/view/view_update_generator.hh"
|
||||
#include "streaming/stream_mutation_fragments_cmd.hh"
|
||||
|
||||
namespace streaming {
|
||||
|
||||
@@ -160,7 +161,7 @@ void stream_session::init_messaging_service_handler() {
|
||||
});
|
||||
});
|
||||
});
|
||||
ms().register_stream_mutation_fragments([] (const rpc::client_info& cinfo, UUID plan_id, UUID schema_id, UUID cf_id, uint64_t estimated_partitions, rpc::optional<stream_reason> reason_opt, rpc::source<frozen_mutation_fragment> source) {
|
||||
ms().register_stream_mutation_fragments([] (const rpc::client_info& cinfo, UUID plan_id, UUID schema_id, UUID cf_id, uint64_t estimated_partitions, rpc::optional<stream_reason> reason_opt, rpc::source<frozen_mutation_fragment, rpc::optional<stream_mutation_fragments_cmd>> source) {
|
||||
auto from = netw::messaging_service::get_source(cinfo);
|
||||
auto reason = reason_opt ? *reason_opt: stream_reason::unspecified;
|
||||
sslog.trace("Got stream_mutation_fragments from {} reason {}", from, int(reason));
|
||||
@@ -172,15 +173,41 @@ void stream_session::init_messaging_service_handler() {
|
||||
return with_scheduling_group(service::get_local_storage_service().db().local().get_streaming_scheduling_group(), [from, estimated_partitions, plan_id, schema_id, &cf, source, reason] () mutable {
|
||||
return service::get_schema_for_write(schema_id, from).then([from, estimated_partitions, plan_id, schema_id, &cf, source, reason] (schema_ptr s) mutable {
|
||||
auto sink = ms().make_sink_for_stream_mutation_fragments(source);
|
||||
auto get_next_mutation_fragment = [source, plan_id, from, s] () mutable {
|
||||
return source().then([plan_id, from, s] (std::optional<std::tuple<frozen_mutation_fragment>> fmf_opt) mutable {
|
||||
if (fmf_opt) {
|
||||
frozen_mutation_fragment& fmf = std::get<0>(fmf_opt.value());
|
||||
struct stream_mutation_fragments_cmd_status {
|
||||
bool got_cmd = false;
|
||||
bool got_end_of_stream = false;
|
||||
};
|
||||
auto cmd_status = make_lw_shared<stream_mutation_fragments_cmd_status>();
|
||||
auto get_next_mutation_fragment = [source, plan_id, from, s, cmd_status] () mutable {
|
||||
return source().then([plan_id, from, s, cmd_status] (std::optional<std::tuple<frozen_mutation_fragment, rpc::optional<stream_mutation_fragments_cmd>>> opt) mutable {
|
||||
if (opt) {
|
||||
auto cmd = std::get<1>(*opt);
|
||||
if (cmd) {
|
||||
cmd_status->got_cmd = true;
|
||||
switch (*cmd) {
|
||||
case stream_mutation_fragments_cmd::mutation_fragment_data:
|
||||
break;
|
||||
case stream_mutation_fragments_cmd::error:
|
||||
return make_exception_future<mutation_fragment_opt>(std::runtime_error("Sender failed"));
|
||||
case stream_mutation_fragments_cmd::end_of_stream:
|
||||
cmd_status->got_end_of_stream = true;
|
||||
return make_ready_future<mutation_fragment_opt>();
|
||||
default:
|
||||
return make_exception_future<mutation_fragment_opt>(std::runtime_error("Sender sent wrong cmd"));
|
||||
}
|
||||
}
|
||||
frozen_mutation_fragment& fmf = std::get<0>(*opt);
|
||||
auto sz = fmf.representation().size();
|
||||
auto mf = fmf.unfreeze(*s);
|
||||
streaming::get_local_stream_manager().update_progress(plan_id, from.addr, progress_info::direction::IN, sz);
|
||||
return make_ready_future<mutation_fragment_opt>(std::move(mf));
|
||||
} else {
|
||||
// If the sender has sent stream_mutation_fragments_cmd it means it is
|
||||
// a node that understands the new protocol. It must send end_of_stream
|
||||
// before close the stream.
|
||||
if (cmd_status->got_cmd && !cmd_status->got_end_of_stream) {
|
||||
return make_exception_future<mutation_fragment_opt>(std::runtime_error("Sender did not sent end_of_stream"));
|
||||
}
|
||||
return make_ready_future<mutation_fragment_opt>();
|
||||
}
|
||||
});
|
||||
@@ -211,7 +238,8 @@ void stream_session::init_messaging_service_handler() {
|
||||
int32_t status = 0;
|
||||
uint64_t received_partitions = 0;
|
||||
if (f.failed()) {
|
||||
f.ignore_ready_future();
|
||||
sslog.error("[Stream #{}] Failed to handle STREAM_MUTATION_FRAGMENTS (receive and distribute phase) for ks={}, cf={}, peer={}: {}",
|
||||
plan_id, s->ks_name(), s->cf_name(), from.addr, f.get_exception());
|
||||
status = -1;
|
||||
} else {
|
||||
received_partitions = f.get0();
|
||||
@@ -224,7 +252,8 @@ void stream_session::init_messaging_service_handler() {
|
||||
return sink.close();
|
||||
});
|
||||
}).handle_exception([s, plan_id, from, sink] (std::exception_ptr ep) {
|
||||
sslog.error("[Stream #{}] Failed to handle STREAM_MUTATION_FRAGMENTS for ks={}, cf={}, peer={}: {}", plan_id, s->ks_name(), s->cf_name(), from.addr, ep);
|
||||
sslog.error("[Stream #{}] Failed to handle STREAM_MUTATION_FRAGMENTS (respond phase) for ks={}, cf={}, peer={}: {}",
|
||||
plan_id, s->ks_name(), s->cf_name(), from.addr, ep);
|
||||
});
|
||||
return make_ready_future<rpc::sink<int>>(sink);
|
||||
});
|
||||
|
||||
@@ -42,6 +42,7 @@
|
||||
#include "streaming/stream_session.hh"
|
||||
#include "streaming/stream_manager.hh"
|
||||
#include "streaming/stream_reason.hh"
|
||||
#include "streaming/stream_mutation_fragments_cmd.hh"
|
||||
#include "mutation_reader.hh"
|
||||
#include "frozen_mutation.hh"
|
||||
#include "mutation.hh"
|
||||
@@ -105,6 +106,21 @@ struct send_info {
|
||||
, prs(to_partition_ranges(ranges))
|
||||
, reader(cf.make_streaming_reader(cf.schema(), prs)) {
|
||||
}
|
||||
future<bool> has_relevant_range_on_this_shard() {
|
||||
return do_with(false, [this] (bool& found_relevant_range) {
|
||||
return do_for_each(ranges, [this, &found_relevant_range] (dht::token_range range) {
|
||||
if (!found_relevant_range) {
|
||||
auto sharder = dht::selective_token_range_sharder(range, engine().cpu_id());
|
||||
auto range_shard = sharder.next();
|
||||
if (range_shard) {
|
||||
found_relevant_range = true;
|
||||
}
|
||||
}
|
||||
}).then([&found_relevant_range] {
|
||||
return found_relevant_range;
|
||||
});
|
||||
});
|
||||
}
|
||||
future<size_t> estimate_partitions() {
|
||||
return do_with(cf.get_sstables(), size_t(0), [this] (auto& sstables, size_t& partition_count) {
|
||||
return do_for_each(*sstables, [this, &partition_count] (auto& sst) {
|
||||
@@ -161,7 +177,7 @@ future<> send_mutations(lw_shared_ptr<send_info> si) {
|
||||
future<> send_mutation_fragments(lw_shared_ptr<send_info> si) {
|
||||
return si->estimate_partitions().then([si] (size_t estimated_partitions) {
|
||||
sslog.info("[Stream #{}] Start sending ks={}, cf={}, estimated_partitions={}, with new rpc streaming", si->plan_id, si->cf.schema()->ks_name(), si->cf.schema()->cf_name(), estimated_partitions);
|
||||
return netw::get_local_messaging_service().make_sink_and_source_for_stream_mutation_fragments(si->reader.schema()->version(), si->plan_id, si->cf_id, estimated_partitions, si->reason, si->id).then([si] (rpc::sink<frozen_mutation_fragment> sink, rpc::source<int32_t> source) mutable {
|
||||
return netw::get_local_messaging_service().make_sink_and_source_for_stream_mutation_fragments(si->reader.schema()->version(), si->plan_id, si->cf_id, estimated_partitions, si->reason, si->id).then([si] (rpc::sink<frozen_mutation_fragment, stream_mutation_fragments_cmd> sink, rpc::source<int32_t> source) mutable {
|
||||
auto got_error_from_peer = make_lw_shared<bool>(false);
|
||||
|
||||
auto source_op = [source, got_error_from_peer, si] () mutable -> future<> {
|
||||
@@ -184,18 +200,25 @@ future<> send_mutation_fragments(lw_shared_ptr<send_info> si) {
|
||||
}();
|
||||
|
||||
auto sink_op = [sink, si, got_error_from_peer] () mutable -> future<> {
|
||||
return do_with(std::move(sink), [si, got_error_from_peer] (rpc::sink<frozen_mutation_fragment>& sink) {
|
||||
return do_with(std::move(sink), [si, got_error_from_peer] (rpc::sink<frozen_mutation_fragment, stream_mutation_fragments_cmd>& sink) {
|
||||
return repeat([&sink, si, got_error_from_peer] () mutable {
|
||||
return si->reader(db::no_timeout).then([&sink, si, s = si->reader.schema(), got_error_from_peer] (mutation_fragment_opt mf) mutable {
|
||||
if (mf && !(*got_error_from_peer)) {
|
||||
frozen_mutation_fragment fmf = freeze(*s, *mf);
|
||||
auto size = fmf.representation().size();
|
||||
streaming::get_local_stream_manager().update_progress(si->plan_id, si->id.addr, streaming::progress_info::direction::OUT, size);
|
||||
return sink(fmf).then([] { return stop_iteration::no; });
|
||||
return sink(fmf, stream_mutation_fragments_cmd::mutation_fragment_data).then([] { return stop_iteration::no; });
|
||||
} else {
|
||||
return make_ready_future<stop_iteration>(stop_iteration::yes);
|
||||
}
|
||||
});
|
||||
}).then([&sink] () mutable {
|
||||
return sink(frozen_mutation_fragment(bytes_ostream()), stream_mutation_fragments_cmd::end_of_stream);
|
||||
}).handle_exception([&sink] (std::exception_ptr ep) mutable {
|
||||
// Notify the receiver the sender has failed
|
||||
return sink(frozen_mutation_fragment(bytes_ostream()), stream_mutation_fragments_cmd::error).then([ep = std::move(ep)] () mutable {
|
||||
return make_exception_future<>(std::move(ep));
|
||||
});
|
||||
}).finally([&sink] () mutable {
|
||||
return sink.close();
|
||||
});
|
||||
@@ -222,11 +245,18 @@ future<> stream_transfer_task::execute() {
|
||||
auto reason = session->get_reason();
|
||||
return session->get_db().invoke_on_all([plan_id, cf_id, id, dst_cpu_id, ranges=this->_ranges, streaming_with_rpc_stream, reason] (database& db) {
|
||||
auto si = make_lw_shared<send_info>(db, plan_id, cf_id, std::move(ranges), id, dst_cpu_id, reason);
|
||||
if (streaming_with_rpc_stream) {
|
||||
return send_mutation_fragments(std::move(si));
|
||||
} else {
|
||||
return send_mutations(std::move(si));
|
||||
}
|
||||
return si->has_relevant_range_on_this_shard().then([si, plan_id, cf_id, streaming_with_rpc_stream] (bool has_relevant_range_on_this_shard) {
|
||||
if (!has_relevant_range_on_this_shard) {
|
||||
sslog.debug("[Stream #{}] stream_transfer_task: cf_id={}: ignore ranges on shard={}",
|
||||
plan_id, cf_id, engine().cpu_id());
|
||||
return make_ready_future<>();
|
||||
}
|
||||
if (streaming_with_rpc_stream) {
|
||||
return send_mutation_fragments(std::move(si));
|
||||
} else {
|
||||
return send_mutations(std::move(si));
|
||||
}
|
||||
});
|
||||
}).then([this, plan_id, cf_id, id, streaming_with_rpc_stream] {
|
||||
sslog.debug("[Stream #{}] SEND STREAM_MUTATION_DONE to {}, cf_id={}", plan_id, id, cf_id);
|
||||
return session->ms().send_stream_mutation_done(id, plan_id, _ranges,
|
||||
|
||||
42
table.cc
42
table.cc
@@ -136,9 +136,12 @@ contains_rows(const sstables::sstable& sst, const schema_ptr& schema, const ck_f
|
||||
// of a range for each clustering component.
|
||||
static std::vector<sstables::shared_sstable>
|
||||
filter_sstable_for_reader(std::vector<sstables::shared_sstable>&& sstables, column_family& cf, const schema_ptr& schema,
|
||||
const sstables::key& key, const query::partition_slice& slice) {
|
||||
auto sstable_has_not_key = [&] (const sstables::shared_sstable& sst) {
|
||||
return !sst->filter_has_key(key);
|
||||
const dht::partition_range& pr, const sstables::key& key, const query::partition_slice& slice) {
|
||||
const dht::ring_position& pr_key = pr.start()->value();
|
||||
auto sstable_has_not_key = [&, cmp = dht::ring_position_comparator(*schema)] (const sstables::shared_sstable& sst) {
|
||||
return cmp(pr_key, sst->get_first_decorated_key()) < 0 ||
|
||||
cmp(pr_key, sst->get_last_decorated_key()) > 0 ||
|
||||
!sst->filter_has_key(key);
|
||||
};
|
||||
sstables.erase(boost::remove_if(sstables, sstable_has_not_key), sstables.end());
|
||||
|
||||
@@ -286,7 +289,7 @@ create_single_key_sstable_reader(column_family* cf,
|
||||
{
|
||||
auto key = sstables::key::from_partition_key(*schema, *pr.start()->value().key());
|
||||
auto readers = boost::copy_range<std::vector<flat_mutation_reader>>(
|
||||
filter_sstable_for_reader(sstables->select(pr), *cf, schema, key, slice)
|
||||
filter_sstable_for_reader(sstables->select(pr), *cf, schema, pr, key, slice)
|
||||
| boost::adaptors::transformed([&] (const sstables::shared_sstable& sstable) {
|
||||
tracing::trace(trace_state, "Reading key {} from sstable {}", pr, seastar::value_of([&sstable] { return sstable->get_filename(); }));
|
||||
return sstable->read_row_flat(schema, pr.start()->value(), slice, pc, resource_tracker, fwd);
|
||||
@@ -510,8 +513,8 @@ table::make_streaming_reader(schema_ptr s,
|
||||
return make_flat_multi_range_reader(s, std::move(source), ranges, slice, pc, nullptr, mutation_reader::forwarding::no);
|
||||
}
|
||||
|
||||
flat_mutation_reader table::make_streaming_reader(schema_ptr schema, const dht::partition_range& range, mutation_reader::forwarding fwd_mr) const {
|
||||
const auto& slice = schema->full_slice();
|
||||
flat_mutation_reader table::make_streaming_reader(schema_ptr schema, const dht::partition_range& range,
|
||||
const query::partition_slice& slice, mutation_reader::forwarding fwd_mr) const {
|
||||
const auto& pc = service::get_local_streaming_read_priority();
|
||||
auto trace_state = tracing::trace_state_ptr();
|
||||
const auto fwd = streamed_mutation::forwarding::no;
|
||||
@@ -1029,6 +1032,7 @@ table::reshuffle_sstables(std::set<int64_t> all_generations, int64_t start) {
|
||||
};
|
||||
|
||||
return do_with(work(start, std::move(all_generations)), [this] (work& work) {
|
||||
tlogger.info("Reshuffling SSTables in {}...", _config.datadir);
|
||||
return lister::scan_dir(_config.datadir, { directory_entry_type::regular }, [this, &work] (fs::path parent_dir, directory_entry de) {
|
||||
auto comps = sstables::entry_descriptor::make_descriptor(parent_dir.native(), de.name);
|
||||
if (comps.component != component_type::TOC) {
|
||||
@@ -1345,7 +1349,8 @@ future<> table::cleanup_sstables(sstables::compaction_descriptor descriptor, boo
|
||||
return with_semaphore(sem, 1, [this, &sst, &release_fn, is_actual_cleanup] {
|
||||
// release reference to sstables cleaned up, otherwise space usage from their data and index
|
||||
// components cannot be reclaimed until all of them are cleaned.
|
||||
auto descriptor = sstables::compaction_descriptor({ std::move(sst) }, sst->get_sstable_level());
|
||||
auto sstable_level = sst->get_sstable_level();
|
||||
auto descriptor = sstables::compaction_descriptor({ std::move(sst) }, sstable_level);
|
||||
descriptor.release_exhausted = release_fn;
|
||||
return this->compact_sstables(std::move(descriptor), is_actual_cleanup);
|
||||
});
|
||||
@@ -1956,6 +1961,8 @@ future<int64_t>
|
||||
table::disable_sstable_write() {
|
||||
_sstable_writes_disabled_at = std::chrono::steady_clock::now();
|
||||
return _sstables_lock.write_lock().then([this] {
|
||||
// _sstable_deletion_sem must be acquired after _sstables_lock.write_lock
|
||||
return _sstable_deletion_sem.wait().then([this] {
|
||||
if (_sstables->all()->empty()) {
|
||||
return make_ready_future<int64_t>(0);
|
||||
}
|
||||
@@ -1964,9 +1971,18 @@ table::disable_sstable_write() {
|
||||
max = std::max(max, s->generation());
|
||||
}
|
||||
return make_ready_future<int64_t>(max);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
std::chrono::steady_clock::duration table::enable_sstable_write(int64_t new_generation) {
|
||||
if (new_generation != -1) {
|
||||
update_sstables_known_generation(new_generation);
|
||||
}
|
||||
_sstable_deletion_sem.signal();
|
||||
_sstables_lock.write_unlock();
|
||||
return std::chrono::steady_clock::now() - _sstable_writes_disabled_at;
|
||||
}
|
||||
|
||||
void table::set_schema(schema_ptr s) {
|
||||
tlogger.debug("Changing schema version of {}.{} ({}) from {} to {}",
|
||||
@@ -2456,7 +2472,7 @@ future<row_locker::lock_holder> table::push_view_replica_updates(const schema_pt
|
||||
return push_view_replica_updates(s, std::move(m), timeout);
|
||||
}
|
||||
|
||||
future<row_locker::lock_holder> table::do_push_view_replica_updates(const schema_ptr& s, mutation&& m, db::timeout_clock::time_point timeout, mutation_source&& source) const {
|
||||
future<row_locker::lock_holder> table::do_push_view_replica_updates(const schema_ptr& s, mutation&& m, db::timeout_clock::time_point timeout, mutation_source&& source, const io_priority_class& io_priority) const {
|
||||
if (!_config.view_update_concurrency_semaphore->current()) {
|
||||
// We don't have resources to generate view updates for this write. If we reached this point, we failed to
|
||||
// throttle the client. The memory queue is already full, waiting on the semaphore would cause this node to
|
||||
@@ -2496,13 +2512,13 @@ future<row_locker::lock_holder> table::do_push_view_replica_updates(const schema
|
||||
// We'll return this lock to the caller, which will release it after
|
||||
// writing the base-table update.
|
||||
future<row_locker::lock_holder> lockf = local_base_lock(base, m.decorated_key(), slice.default_row_ranges(), timeout);
|
||||
return lockf.then([m = std::move(m), slice = std::move(slice), views = std::move(views), base, this, timeout, source = std::move(source)] (row_locker::lock_holder lock) {
|
||||
return lockf.then([m = std::move(m), slice = std::move(slice), views = std::move(views), base, this, timeout, source = std::move(source), &io_priority] (row_locker::lock_holder lock) {
|
||||
return do_with(
|
||||
dht::partition_range::make_singular(m.decorated_key()),
|
||||
std::move(slice),
|
||||
std::move(m),
|
||||
[base, views = std::move(views), lock = std::move(lock), this, timeout, source = std::move(source)] (auto& pk, auto& slice, auto& m) mutable {
|
||||
auto reader = source.make_reader(base, pk, slice, service::get_local_sstable_query_read_priority());
|
||||
[base, views = std::move(views), lock = std::move(lock), this, timeout, source = std::move(source), &io_priority] (auto& pk, auto& slice, auto& m) mutable {
|
||||
auto reader = source.make_reader(base, pk, slice, io_priority);
|
||||
return this->generate_and_propagate_view_updates(base, std::move(views), std::move(m), std::move(reader)).then([lock = std::move(lock)] () mutable {
|
||||
// return the local partition/row lock we have taken so it
|
||||
// remains locked until the caller is done modifying this
|
||||
@@ -2514,11 +2530,11 @@ future<row_locker::lock_holder> table::do_push_view_replica_updates(const schema
|
||||
}
|
||||
|
||||
future<row_locker::lock_holder> table::push_view_replica_updates(const schema_ptr& s, mutation&& m, db::timeout_clock::time_point timeout) const {
|
||||
return do_push_view_replica_updates(s, std::move(m), timeout, as_mutation_source());
|
||||
return do_push_view_replica_updates(s, std::move(m), timeout, as_mutation_source(), service::get_local_sstable_query_read_priority());
|
||||
}
|
||||
|
||||
future<row_locker::lock_holder> table::stream_view_replica_updates(const schema_ptr& s, mutation&& m, db::timeout_clock::time_point timeout, sstables::shared_sstable excluded_sstable) const {
|
||||
return do_push_view_replica_updates(s, std::move(m), timeout, as_mutation_source_excluding(std::move(excluded_sstable)));
|
||||
return do_push_view_replica_updates(s, std::move(m), timeout, as_mutation_source_excluding(std::move(excluded_sstable)), service::get_local_streaming_write_priority());
|
||||
}
|
||||
|
||||
mutation_source
|
||||
|
||||
@@ -178,3 +178,14 @@ rows_assertions rows_assertions::with_serialized_columns_count(size_t columns_co
|
||||
}
|
||||
return {*this};
|
||||
}
|
||||
|
||||
shared_ptr<cql_transport::messages::result_message> cquery_nofail(
|
||||
cql_test_env& env, const char* query, const std::experimental::source_location& loc) {
|
||||
try {
|
||||
return env.execute_cql(query).get0();
|
||||
} catch (...) {
|
||||
BOOST_FAIL(format("query '{}' failed: {}\n{}:{}: originally from here",
|
||||
query, std::current_exception(), loc.file_name(), loc.line()));
|
||||
}
|
||||
return shared_ptr<cql_transport::messages::result_message>(nullptr);
|
||||
}
|
||||
|
||||
@@ -22,8 +22,10 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "tests/cql_test_env.hh"
|
||||
#include "transport/messages/result_message_base.hh"
|
||||
#include "bytes.hh"
|
||||
#include <experimental/source_location>
|
||||
#include <seastar/core/shared_ptr.hh>
|
||||
#include <seastar/core/future.hh>
|
||||
|
||||
@@ -76,3 +78,12 @@ void assert_that_failed(future<T...>&& f)
|
||||
catch (...) {
|
||||
}
|
||||
}
|
||||
|
||||
/// Invokes env.execute_cql(query), awaits its result, and returns it. If an exception is thrown,
|
||||
/// invokes BOOST_FAIL with useful diagnostics.
|
||||
///
|
||||
/// \note Should be called from a seastar::thread context, as it awaits the CQL result.
|
||||
shared_ptr<cql_transport::messages::result_message> cquery_nofail(
|
||||
cql_test_env& env,
|
||||
const char* query,
|
||||
const std::experimental::source_location& loc = std::experimental::source_location::current());
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user