Compare commits
167 Commits
next
...
scylla-3.1
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c25f627a6e | ||
|
|
58b1bdc20c | ||
|
|
e1a7df174c | ||
|
|
6c39e17838 | ||
|
|
507d763f45 | ||
|
|
b042e27f0a | ||
|
|
375ce345a3 | ||
|
|
493b821dfa | ||
|
|
e1999c76b2 | ||
|
|
4791b726a0 | ||
|
|
d7354a5b8d | ||
|
|
6815b72b06 | ||
|
|
efc2df8ca3 | ||
|
|
dbe90f131f | ||
|
|
31d5d16c3d | ||
|
|
b0d122f9c5 | ||
|
|
9a10e4a245 | ||
|
|
871d1ebdd5 | ||
|
|
bff996959d | ||
|
|
1bdc83540b | ||
|
|
478c35e07a | ||
|
|
ba968ab9ec | ||
|
|
883b5e8395 | ||
|
|
b47033676a | ||
|
|
67e45b73f0 | ||
|
|
37eac75b6f | ||
|
|
e8431a3474 | ||
|
|
9d78d848e6 | ||
|
|
32aa6ddd7e | ||
|
|
74cc9477af | ||
|
|
95acf71680 | ||
|
|
921f8baf00 | ||
|
|
071d7d9210 | ||
|
|
769b9bbe59 | ||
|
|
d4e553c153 | ||
|
|
d983411488 | ||
|
|
27de1bb8e6 | ||
|
|
854f8ccb40 | ||
|
|
a68170c9a3 | ||
|
|
7e4bcf2c0f | ||
|
|
a74b3a182e | ||
|
|
e9bc579565 | ||
|
|
ad46bf06a7 | ||
|
|
1ff21a28b7 | ||
|
|
fb3dfaa736 | ||
|
|
5a02e6976f | ||
|
|
5202eea7a7 | ||
|
|
038733f1a5 | ||
|
|
0ed2e90925 | ||
|
|
9ee6d2bc15 | ||
|
|
23582a2ce9 | ||
|
|
5ddf0ec1df | ||
|
|
e6eb54af90 | ||
|
|
f5a869966a | ||
|
|
0c70cd626b | ||
|
|
0928aa4791 | ||
|
|
f32ec885c4 | ||
|
|
762eec2bc6 | ||
|
|
3f4d9f210f | ||
|
|
9c3cdded9e | ||
|
|
05272c53ed | ||
|
|
393b2abdc9 | ||
|
|
d9dc8f92cc | ||
|
|
c009f7b182 | ||
|
|
303a56f2bd | ||
|
|
57512d3df9 | ||
|
|
a894868298 | ||
|
|
a5d385d702 | ||
|
|
6413063b1b | ||
|
|
0d31c6da62 | ||
|
|
b62bb036ed | ||
|
|
bdabd2e7a4 | ||
|
|
d7fc7bcf9f | ||
|
|
21aec9c7ef | ||
|
|
02ce19e851 | ||
|
|
37c4be5e74 | ||
|
|
d81ac93728 | ||
|
|
024d1563ad | ||
|
|
4a1a281e84 | ||
|
|
d61dd1a933 | ||
|
|
447c1e3bcc | ||
|
|
834b92b3d7 | ||
|
|
2ec036f50c | ||
|
|
958fe2024f | ||
|
|
cd998b949a | ||
|
|
2e1e1392ea | ||
|
|
623ea5e3d9 | ||
|
|
f92a7ca2bf | ||
|
|
d70c2db09c | ||
|
|
e4a39ed319 | ||
|
|
bb70b9ed56 | ||
|
|
e06e795031 | ||
|
|
7d56e8e5bb | ||
|
|
417250607b | ||
|
|
d06bcef3b7 | ||
|
|
50c5cb6861 | ||
|
|
70f5154109 | ||
|
|
329c419c30 | ||
|
|
062d43c76e | ||
|
|
cf4c238b28 | ||
|
|
20090c1992 | ||
|
|
8ffb567474 | ||
|
|
710ec83d12 | ||
|
|
8d7c489436 | ||
|
|
6ec558e3a0 | ||
|
|
b1e2842c8c | ||
|
|
5a273737e3 | ||
|
|
b0d2312623 | ||
|
|
2f007d8e6b | ||
|
|
bebfd7b26c | ||
|
|
03b48b2caf | ||
|
|
95362624bc | ||
|
|
7865c314a5 | ||
|
|
0e6b62244c | ||
|
|
9d722a56b3 | ||
|
|
7009d5fb23 | ||
|
|
eb49fae020 | ||
|
|
92bf928170 | ||
|
|
deac0b0e94 | ||
|
|
c294000113 | ||
|
|
18bb2045aa | ||
|
|
5e3276d08f | ||
|
|
acff367ea8 | ||
|
|
e39724a343 | ||
|
|
31c4db83d8 | ||
|
|
433cb93f7a | ||
|
|
f553819919 | ||
|
|
48c34e7635 | ||
|
|
7f85b30941 | ||
|
|
7d14514b8a | ||
|
|
35f906f06f | ||
|
|
2c50a484f5 | ||
|
|
24ddb46707 | ||
|
|
f2fc3f32af | ||
|
|
c9f488ddc2 | ||
|
|
46498e77b8 | ||
|
|
440f33709e | ||
|
|
34696e1582 | ||
|
|
43bb290705 | ||
|
|
53980816de | ||
|
|
c1f4617530 | ||
|
|
efde9416ed | ||
|
|
224f9cee7e | ||
|
|
cd1d13f805 | ||
|
|
899291bc9b | ||
|
|
4130973f51 | ||
|
|
24e2c72888 | ||
|
|
69cc7d89c8 | ||
|
|
5f6c5d566a | ||
|
|
f32aea3834 | ||
|
|
933260cb53 | ||
|
|
f8ff0e1993 | ||
|
|
1fbab82553 | ||
|
|
c664615960 | ||
|
|
6a682dc5a2 | ||
|
|
c1271d08d3 | ||
|
|
0d5c2501b3 | ||
|
|
0dd84898ee | ||
|
|
d568270d7f | ||
|
|
78c57f18c4 | ||
|
|
ce27949797 | ||
|
|
6b47e23d29 | ||
|
|
1cb6cc0ac4 | ||
|
|
67435eff15 | ||
|
|
086ce13fb9 | ||
|
|
eb9a8f4442 | ||
|
|
178fb5fe5f |
2
.gitmodules
vendored
2
.gitmodules
vendored
@@ -1,6 +1,6 @@
|
||||
[submodule "seastar"]
|
||||
path = seastar
|
||||
url = ../seastar
|
||||
url = ../scylla-seastar
|
||||
ignore = dirty
|
||||
[submodule "swagger-ui"]
|
||||
path = swagger-ui
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#!/bin/sh
|
||||
|
||||
PRODUCT=scylla
|
||||
VERSION=666.development
|
||||
VERSION=3.1.4
|
||||
|
||||
if test -f version
|
||||
then
|
||||
|
||||
18
api/api.hh
18
api/api.hh
@@ -22,6 +22,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <seastar/json/json_elements.hh>
|
||||
#include <type_traits>
|
||||
#include <boost/lexical_cast.hpp>
|
||||
#include <boost/algorithm/string/split.hpp>
|
||||
#include <boost/algorithm/string/classification.hpp>
|
||||
@@ -231,7 +232,22 @@ public:
|
||||
return;
|
||||
}
|
||||
try {
|
||||
value = T{boost::lexical_cast<Base>(param)};
|
||||
// boost::lexical_cast does not use boolalpha. Converting a
|
||||
// true/false throws exceptions. We don't want that.
|
||||
if constexpr (std::is_same_v<Base, bool>) {
|
||||
// Cannot use boolalpha because we (probably) want to
|
||||
// accept 1 and 0 as well as true and false. And True. And fAlse.
|
||||
std::transform(param.begin(), param.end(), param.begin(), ::tolower);
|
||||
if (param == "true" || param == "1") {
|
||||
value = T(true);
|
||||
} else if (param == "false" || param == "0") {
|
||||
value = T(false);
|
||||
} else {
|
||||
throw boost::bad_lexical_cast{};
|
||||
}
|
||||
} else {
|
||||
value = T{boost::lexical_cast<Base>(param)};
|
||||
}
|
||||
} catch (boost::bad_lexical_cast&) {
|
||||
throw bad_param_exception(format("{} ({}): type error - should be {}", name, param, boost::units::detail::demangle(typeid(Base).name())));
|
||||
}
|
||||
|
||||
@@ -170,7 +170,9 @@ future<> service::start() {
|
||||
return once_among_shards([this] {
|
||||
return create_keyspace_if_missing();
|
||||
}).then([this] {
|
||||
return when_all_succeed(_role_manager->start(), _authorizer->start(), _authenticator->start());
|
||||
return _role_manager->start().then([this] {
|
||||
return when_all_succeed(_authorizer->start(), _authenticator->start());
|
||||
});
|
||||
}).then([this] {
|
||||
_permissions_cache = std::make_unique<permissions_cache>(_permissions_cache_config, *this, log);
|
||||
}).then([this] {
|
||||
|
||||
@@ -61,6 +61,7 @@ class cache_flat_mutation_reader final : public flat_mutation_reader::impl {
|
||||
// - _last_row points at a direct predecessor of the next row which is going to be read.
|
||||
// Used for populating continuity.
|
||||
// - _population_range_starts_before_all_rows is set accordingly
|
||||
// - _underlying is engaged and fast-forwarded
|
||||
reading_from_underlying,
|
||||
|
||||
end_of_stream
|
||||
@@ -99,7 +100,13 @@ class cache_flat_mutation_reader final : public flat_mutation_reader::impl {
|
||||
// forward progress is not guaranteed in case iterators are getting constantly invalidated.
|
||||
bool _lower_bound_changed = false;
|
||||
|
||||
// Points to the underlying reader conforming to _schema,
|
||||
// either to *_underlying_holder or _read_context->underlying().underlying().
|
||||
flat_mutation_reader* _underlying = nullptr;
|
||||
std::optional<flat_mutation_reader> _underlying_holder;
|
||||
|
||||
future<> do_fill_buffer(db::timeout_clock::time_point);
|
||||
future<> ensure_underlying(db::timeout_clock::time_point);
|
||||
void copy_from_cache_to_buffer();
|
||||
future<> process_static_row(db::timeout_clock::time_point);
|
||||
void move_to_end();
|
||||
@@ -186,23 +193,22 @@ future<> cache_flat_mutation_reader::process_static_row(db::timeout_clock::time_
|
||||
return make_ready_future<>();
|
||||
} else {
|
||||
_read_context->cache().on_row_miss();
|
||||
return _read_context->get_next_fragment(timeout).then([this] (mutation_fragment_opt&& sr) {
|
||||
if (sr) {
|
||||
assert(sr->is_static_row());
|
||||
maybe_add_to_cache(sr->as_static_row());
|
||||
push_mutation_fragment(std::move(*sr));
|
||||
}
|
||||
maybe_set_static_row_continuous();
|
||||
return ensure_underlying(timeout).then([this, timeout] {
|
||||
return (*_underlying)(timeout).then([this] (mutation_fragment_opt&& sr) {
|
||||
if (sr) {
|
||||
assert(sr->is_static_row());
|
||||
maybe_add_to_cache(sr->as_static_row());
|
||||
push_mutation_fragment(std::move(*sr));
|
||||
}
|
||||
maybe_set_static_row_continuous();
|
||||
});
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
inline
|
||||
void cache_flat_mutation_reader::touch_partition() {
|
||||
if (_snp->at_latest_version()) {
|
||||
rows_entry& last_dummy = *_snp->version()->partition().clustered_rows().rbegin();
|
||||
_snp->tracker()->touch(last_dummy);
|
||||
}
|
||||
_snp->touch();
|
||||
}
|
||||
|
||||
inline
|
||||
@@ -232,14 +238,36 @@ future<> cache_flat_mutation_reader::fill_buffer(db::timeout_clock::time_point t
|
||||
});
|
||||
}
|
||||
|
||||
inline
|
||||
future<> cache_flat_mutation_reader::ensure_underlying(db::timeout_clock::time_point timeout) {
|
||||
if (_underlying) {
|
||||
return make_ready_future<>();
|
||||
}
|
||||
return _read_context->ensure_underlying(timeout).then([this, timeout] {
|
||||
flat_mutation_reader& ctx_underlying = _read_context->underlying().underlying();
|
||||
if (ctx_underlying.schema() != _schema) {
|
||||
_underlying_holder = make_delegating_reader(ctx_underlying);
|
||||
_underlying_holder->upgrade_schema(_schema);
|
||||
_underlying = &*_underlying_holder;
|
||||
} else {
|
||||
_underlying = &ctx_underlying;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
inline
|
||||
future<> cache_flat_mutation_reader::do_fill_buffer(db::timeout_clock::time_point timeout) {
|
||||
if (_state == state::move_to_underlying) {
|
||||
if (!_underlying) {
|
||||
return ensure_underlying(timeout).then([this, timeout] {
|
||||
return do_fill_buffer(timeout);
|
||||
});
|
||||
}
|
||||
_state = state::reading_from_underlying;
|
||||
_population_range_starts_before_all_rows = _lower_bound.is_before_all_clustered_rows(*_schema);
|
||||
auto end = _next_row_in_range ? position_in_partition(_next_row.position())
|
||||
: position_in_partition(_upper_bound);
|
||||
return _read_context->fast_forward_to(position_range{_lower_bound, std::move(end)}, timeout).then([this, timeout] {
|
||||
return _underlying->fast_forward_to(position_range{_lower_bound, std::move(end)}, timeout).then([this, timeout] {
|
||||
return read_from_underlying(timeout);
|
||||
});
|
||||
}
|
||||
@@ -280,7 +308,7 @@ future<> cache_flat_mutation_reader::do_fill_buffer(db::timeout_clock::time_poin
|
||||
|
||||
inline
|
||||
future<> cache_flat_mutation_reader::read_from_underlying(db::timeout_clock::time_point timeout) {
|
||||
return consume_mutation_fragments_until(_read_context->underlying().underlying(),
|
||||
return consume_mutation_fragments_until(*_underlying,
|
||||
[this] { return _state != state::reading_from_underlying || is_buffer_full(); },
|
||||
[this] (mutation_fragment mf) {
|
||||
_read_context->cache().on_row_miss();
|
||||
|
||||
@@ -596,6 +596,7 @@ scylla_core = (['database.cc',
|
||||
'db/consistency_level.cc',
|
||||
'db/system_keyspace.cc',
|
||||
'db/system_distributed_keyspace.cc',
|
||||
'db/size_estimates_virtual_reader.cc',
|
||||
'db/schema_tables.cc',
|
||||
'db/cql_type_parser.cc',
|
||||
'db/legacy_schema_migrator.cc',
|
||||
|
||||
@@ -61,6 +61,16 @@ make_now_fct() {
|
||||
});
|
||||
}
|
||||
|
||||
static int64_t get_valid_timestamp(const data_value& ts_obj) {
|
||||
auto ts = value_cast<db_clock::time_point>(ts_obj);
|
||||
int64_t ms = ts.time_since_epoch().count();
|
||||
auto nanos_since = utils::UUID_gen::make_nanos_since(ms);
|
||||
if (!utils::UUID_gen::is_valid_nanos_since(nanos_since)) {
|
||||
throw exceptions::server_exception(format("{}: timestamp is out of range. Must be in milliseconds since epoch", ms));
|
||||
}
|
||||
return ms;
|
||||
}
|
||||
|
||||
inline
|
||||
shared_ptr<function>
|
||||
make_min_timeuuid_fct() {
|
||||
@@ -74,8 +84,7 @@ make_min_timeuuid_fct() {
|
||||
if (ts_obj.is_null()) {
|
||||
return {};
|
||||
}
|
||||
auto ts = value_cast<db_clock::time_point>(ts_obj);
|
||||
auto uuid = utils::UUID_gen::min_time_UUID(ts.time_since_epoch().count());
|
||||
auto uuid = utils::UUID_gen::min_time_UUID(get_valid_timestamp(ts_obj));
|
||||
return {timeuuid_type->decompose(uuid)};
|
||||
});
|
||||
}
|
||||
@@ -85,7 +94,6 @@ shared_ptr<function>
|
||||
make_max_timeuuid_fct() {
|
||||
return make_native_scalar_function<true>("maxtimeuuid", timeuuid_type, { timestamp_type },
|
||||
[] (cql_serialization_format sf, const std::vector<bytes_opt>& values) -> bytes_opt {
|
||||
// FIXME: should values be a vector<optional<bytes>>?
|
||||
auto& bb = values[0];
|
||||
if (!bb) {
|
||||
return {};
|
||||
@@ -94,12 +102,22 @@ make_max_timeuuid_fct() {
|
||||
if (ts_obj.is_null()) {
|
||||
return {};
|
||||
}
|
||||
auto ts = value_cast<db_clock::time_point>(ts_obj);
|
||||
auto uuid = utils::UUID_gen::max_time_UUID(ts.time_since_epoch().count());
|
||||
auto uuid = utils::UUID_gen::max_time_UUID(get_valid_timestamp(ts_obj));
|
||||
return {timeuuid_type->decompose(uuid)};
|
||||
});
|
||||
}
|
||||
|
||||
inline utils::UUID get_valid_timeuuid(bytes raw) {
|
||||
if (!utils::UUID_gen::is_valid_UUID(raw)) {
|
||||
throw exceptions::server_exception(format("invalid timeuuid: size={}", raw.size()));
|
||||
}
|
||||
auto uuid = utils::UUID_gen::get_UUID(raw);
|
||||
if (!uuid.is_timestamp()) {
|
||||
throw exceptions::server_exception(format("{}: Not a timeuuid: version={}", uuid, uuid.version()));
|
||||
}
|
||||
return uuid;
|
||||
}
|
||||
|
||||
inline
|
||||
shared_ptr<function>
|
||||
make_date_of_fct() {
|
||||
@@ -110,7 +128,7 @@ make_date_of_fct() {
|
||||
if (!bb) {
|
||||
return {};
|
||||
}
|
||||
auto ts = db_clock::time_point(db_clock::duration(UUID_gen::unix_timestamp(UUID_gen::get_UUID(*bb))));
|
||||
auto ts = db_clock::time_point(db_clock::duration(UUID_gen::unix_timestamp(get_valid_timeuuid(*bb))));
|
||||
return {timestamp_type->decompose(ts)};
|
||||
});
|
||||
}
|
||||
@@ -125,7 +143,7 @@ make_unix_timestamp_of_fct() {
|
||||
if (!bb) {
|
||||
return {};
|
||||
}
|
||||
return {long_type->decompose(UUID_gen::unix_timestamp(UUID_gen::get_UUID(*bb)))};
|
||||
return {long_type->decompose(UUID_gen::unix_timestamp(get_valid_timeuuid(*bb)))};
|
||||
});
|
||||
}
|
||||
|
||||
@@ -176,7 +194,7 @@ make_timeuuidtodate_fct() {
|
||||
if (!bb) {
|
||||
return {};
|
||||
}
|
||||
auto ts = db_clock::time_point(db_clock::duration(UUID_gen::unix_timestamp(UUID_gen::get_UUID(*bb))));
|
||||
auto ts = db_clock::time_point(db_clock::duration(UUID_gen::unix_timestamp(get_valid_timeuuid(*bb))));
|
||||
auto to_simple_date = get_castas_fctn(simple_date_type, timestamp_type);
|
||||
return {simple_date_type->decompose(to_simple_date(ts))};
|
||||
});
|
||||
@@ -211,7 +229,7 @@ make_timeuuidtotimestamp_fct() {
|
||||
if (!bb) {
|
||||
return {};
|
||||
}
|
||||
auto ts = db_clock::time_point(db_clock::duration(UUID_gen::unix_timestamp(UUID_gen::get_UUID(*bb))));
|
||||
auto ts = db_clock::time_point(db_clock::duration(UUID_gen::unix_timestamp(get_valid_timeuuid(*bb))));
|
||||
return {timestamp_type->decompose(ts)};
|
||||
});
|
||||
}
|
||||
@@ -245,10 +263,14 @@ make_timeuuidtounixtimestamp_fct() {
|
||||
if (!bb) {
|
||||
return {};
|
||||
}
|
||||
return {long_type->decompose(UUID_gen::unix_timestamp(UUID_gen::get_UUID(*bb)))};
|
||||
return {long_type->decompose(UUID_gen::unix_timestamp(get_valid_timeuuid(*bb)))};
|
||||
});
|
||||
}
|
||||
|
||||
inline bytes time_point_to_long(const data_value& v) {
|
||||
return data_value(get_valid_timestamp(v)).serialize();
|
||||
}
|
||||
|
||||
inline
|
||||
shared_ptr<function>
|
||||
make_timestamptounixtimestamp_fct() {
|
||||
@@ -263,7 +285,7 @@ make_timestamptounixtimestamp_fct() {
|
||||
if (ts_obj.is_null()) {
|
||||
return {};
|
||||
}
|
||||
return {long_type->decompose(ts_obj)};
|
||||
return time_point_to_long(ts_obj);
|
||||
});
|
||||
}
|
||||
|
||||
@@ -282,7 +304,7 @@ make_datetounixtimestamp_fct() {
|
||||
return {};
|
||||
}
|
||||
auto from_simple_date = get_castas_fctn(timestamp_type, simple_date_type);
|
||||
return {long_type->decompose(from_simple_date(simple_date_obj))};
|
||||
return time_point_to_long(from_simple_date(simple_date_obj));
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@@ -130,6 +130,18 @@ query_options::query_options(std::unique_ptr<query_options> qo, ::shared_ptr<ser
|
||||
|
||||
}
|
||||
|
||||
query_options::query_options(std::unique_ptr<query_options> qo, ::shared_ptr<service::pager::paging_state> paging_state, int32_t page_size)
|
||||
: query_options(qo->_consistency,
|
||||
qo->get_timeout_config(),
|
||||
std::move(qo->_names),
|
||||
std::move(qo->_values),
|
||||
std::move(qo->_value_views),
|
||||
qo->_skip_metadata,
|
||||
std::move(query_options::specific_options{page_size, paging_state, qo->_options.serial_consistency, qo->_options.timestamp}),
|
||||
qo->_cql_serialization_format) {
|
||||
|
||||
}
|
||||
|
||||
query_options::query_options(std::vector<cql3::raw_value> values)
|
||||
: query_options(
|
||||
db::consistency_level::ONE, infinite_timeout_config, std::move(values))
|
||||
|
||||
@@ -102,7 +102,7 @@ private:
|
||||
|
||||
public:
|
||||
query_options(query_options&&) = default;
|
||||
query_options(const query_options&) = delete;
|
||||
explicit query_options(const query_options&) = default;
|
||||
|
||||
explicit query_options(db::consistency_level consistency,
|
||||
const timeout_config& timeouts,
|
||||
@@ -155,6 +155,7 @@ public:
|
||||
explicit query_options(db::consistency_level, const timeout_config& timeouts,
|
||||
std::vector<cql3::raw_value> values, specific_options options = specific_options::DEFAULT);
|
||||
explicit query_options(std::unique_ptr<query_options>, ::shared_ptr<service::pager::paging_state> paging_state);
|
||||
explicit query_options(std::unique_ptr<query_options>, ::shared_ptr<service::pager::paging_state> paging_state, int32_t page_size);
|
||||
|
||||
const timeout_config& get_timeout_config() const { return _timeout_config; }
|
||||
|
||||
|
||||
@@ -222,11 +222,9 @@ statement_restrictions::statement_restrictions(database& db,
|
||||
auto& cf = db.find_column_family(schema);
|
||||
auto& sim = cf.get_index_manager();
|
||||
const allow_local_index allow_local(!_partition_key_restrictions->has_unrestricted_components(*_schema) && _partition_key_restrictions->is_all_eq());
|
||||
bool has_queriable_clustering_column_index = _clustering_columns_restrictions->has_supporting_index(sim, allow_local);
|
||||
bool has_queriable_pk_index = _partition_key_restrictions->has_supporting_index(sim, allow_local);
|
||||
bool has_queriable_index = has_queriable_clustering_column_index
|
||||
|| has_queriable_pk_index
|
||||
|| _nonprimary_key_restrictions->has_supporting_index(sim, allow_local);
|
||||
const bool has_queriable_clustering_column_index = _clustering_columns_restrictions->has_supporting_index(sim, allow_local);
|
||||
const bool has_queriable_pk_index = _partition_key_restrictions->has_supporting_index(sim, allow_local);
|
||||
const bool has_queriable_regular_index = _nonprimary_key_restrictions->has_supporting_index(sim, allow_local);
|
||||
|
||||
// At this point, the select statement if fully constructed, but we still have a few things to validate
|
||||
process_partition_key_restrictions(has_queriable_pk_index, for_view, allow_filtering);
|
||||
@@ -286,7 +284,7 @@ statement_restrictions::statement_restrictions(database& db,
|
||||
}
|
||||
|
||||
if (!_nonprimary_key_restrictions->empty()) {
|
||||
if (has_queriable_index) {
|
||||
if (has_queriable_regular_index) {
|
||||
_uses_secondary_indexing = true;
|
||||
} else if (!allow_filtering) {
|
||||
throw exceptions::invalid_request_exception("Cannot execute this query as it might involve data filtering and "
|
||||
@@ -392,8 +390,9 @@ std::vector<const column_definition*> statement_restrictions::get_column_defs_fo
|
||||
}
|
||||
}
|
||||
}
|
||||
if (_clustering_columns_restrictions->needs_filtering(*_schema)) {
|
||||
column_id first_filtering_id = _schema->clustering_key_columns().begin()->id +
|
||||
const bool pk_has_unrestricted_components = _partition_key_restrictions->has_unrestricted_components(*_schema);
|
||||
if (pk_has_unrestricted_components || _clustering_columns_restrictions->needs_filtering(*_schema)) {
|
||||
column_id first_filtering_id = pk_has_unrestricted_components ? 0 : _schema->clustering_key_columns().begin()->id +
|
||||
_clustering_columns_restrictions->num_prefix_columns_that_need_not_be_filtered();
|
||||
for (auto&& cdef : _clustering_columns_restrictions->get_column_defs()) {
|
||||
if (cdef->id >= first_filtering_id && !column_uses_indexing(cdef)) {
|
||||
@@ -507,10 +506,9 @@ bool statement_restrictions::need_filtering() const {
|
||||
int number_of_filtering_restrictions = _nonprimary_key_restrictions->size();
|
||||
// If the whole partition key is restricted, it does not imply filtering
|
||||
if (_partition_key_restrictions->has_unrestricted_components(*_schema) || !_partition_key_restrictions->is_all_eq()) {
|
||||
number_of_filtering_restrictions += _partition_key_restrictions->size();
|
||||
if (_clustering_columns_restrictions->has_unrestricted_components(*_schema)) {
|
||||
number_of_filtering_restrictions += _clustering_columns_restrictions->size() - _clustering_columns_restrictions->prefix_size();
|
||||
}
|
||||
number_of_filtering_restrictions += _partition_key_restrictions->size() + _clustering_columns_restrictions->size();
|
||||
} else if (_clustering_columns_restrictions->has_unrestricted_components(*_schema)) {
|
||||
number_of_filtering_restrictions += _clustering_columns_restrictions->size() - _clustering_columns_restrictions->prefix_size();
|
||||
}
|
||||
return number_of_restricted_columns_for_indexing > 1
|
||||
|| (number_of_restricted_columns_for_indexing == 0 && _partition_key_restrictions->empty() && !_clustering_columns_restrictions->empty())
|
||||
|
||||
@@ -407,7 +407,7 @@ public:
|
||||
}
|
||||
|
||||
bool ck_restrictions_need_filtering() const {
|
||||
return _clustering_columns_restrictions->needs_filtering(*_schema);
|
||||
return _partition_key_restrictions->has_unrestricted_components(*_schema) || _clustering_columns_restrictions->needs_filtering(*_schema);
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -83,6 +83,9 @@ void metadata::maybe_set_paging_state(::shared_ptr<const service::pager::paging_
|
||||
assert(paging_state);
|
||||
if (paging_state->get_remaining() > 0) {
|
||||
set_paging_state(std::move(paging_state));
|
||||
} else {
|
||||
_flags.remove<flag::HAS_MORE_PAGES>();
|
||||
_paging_state = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -92,6 +92,14 @@ public:
|
||||
: abstract_function_selector(fun, std::move(arg_selectors))
|
||||
, _tfun(dynamic_pointer_cast<T>(fun)) {
|
||||
}
|
||||
|
||||
const functions::function_name& name() const {
|
||||
return _tfun->name();
|
||||
}
|
||||
|
||||
virtual sstring assignment_testable_source_context() const override {
|
||||
return format("{}", this->name());
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
@@ -79,11 +79,6 @@ public:
|
||||
dynamic_pointer_cast<functions::aggregate_function>(func), std::move(arg_selectors))
|
||||
, _aggregate(fun()->new_aggregate()) {
|
||||
}
|
||||
|
||||
virtual sstring assignment_testable_source_context() const override {
|
||||
// FIXME:
|
||||
return "FIXME";
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
@@ -82,12 +82,6 @@ public:
|
||||
: abstract_function_selector_for<functions::scalar_function>(
|
||||
dynamic_pointer_cast<functions::scalar_function>(std::move(fun)), std::move(arg_selectors)) {
|
||||
}
|
||||
|
||||
virtual sstring assignment_testable_source_context() const override {
|
||||
// FIXME:
|
||||
return "FIXME";
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
@@ -142,7 +142,7 @@ shared_ptr<selector::factory>
|
||||
selectable::with_field_selection::new_selector_factory(database& db, schema_ptr s, std::vector<const column_definition*>& defs) {
|
||||
auto&& factory = _selected->new_selector_factory(db, s, defs);
|
||||
auto&& type = factory->new_instance()->get_type();
|
||||
auto&& ut = dynamic_pointer_cast<const user_type_impl>(std::move(type));
|
||||
auto&& ut = dynamic_pointer_cast<const user_type_impl>(type->underlying_type());
|
||||
if (!ut) {
|
||||
throw exceptions::invalid_request_exception(
|
||||
format("Invalid field selection: {} of type {} is not a user type",
|
||||
|
||||
@@ -166,7 +166,8 @@ alter_type_statement::add_or_alter::add_or_alter(const ut_name& name, bool is_ad
|
||||
user_type alter_type_statement::add_or_alter::do_add(database& db, user_type to_update) const
|
||||
{
|
||||
if (get_idx_of_field(to_update, _field_name)) {
|
||||
throw exceptions::invalid_request_exception(format("Cannot add new field {} to type {}: a field of the same name already exists", _field_name->name(), _name.to_string()));
|
||||
throw exceptions::invalid_request_exception(format("Cannot add new field {} to type {}: a field of the same name already exists",
|
||||
_field_name->to_string(), _name.to_string()));
|
||||
}
|
||||
|
||||
std::vector<bytes> new_names(to_update->field_names());
|
||||
@@ -174,7 +175,7 @@ user_type alter_type_statement::add_or_alter::do_add(database& db, user_type to_
|
||||
std::vector<data_type> new_types(to_update->field_types());
|
||||
auto&& add_type = _field_type->prepare(db, keyspace()).get_type();
|
||||
if (add_type->references_user_type(to_update->_keyspace, to_update->_name)) {
|
||||
throw exceptions::invalid_request_exception(format("Cannot add new field {} of type {} to type {} as this would create a circular reference", _field_name->name(), _field_type->to_string(), _name.to_string()));
|
||||
throw exceptions::invalid_request_exception(format("Cannot add new field {} of type {} to type {} as this would create a circular reference", _field_name->to_string(), _field_type->to_string(), _name.to_string()));
|
||||
}
|
||||
new_types.push_back(std::move(add_type));
|
||||
return user_type_impl::get_instance(to_update->_keyspace, to_update->_name, std::move(new_names), std::move(new_types));
|
||||
@@ -184,13 +185,14 @@ user_type alter_type_statement::add_or_alter::do_alter(database& db, user_type t
|
||||
{
|
||||
std::optional<uint32_t> idx = get_idx_of_field(to_update, _field_name);
|
||||
if (!idx) {
|
||||
throw exceptions::invalid_request_exception(format("Unknown field {} in type {}", _field_name->name(), _name.to_string()));
|
||||
throw exceptions::invalid_request_exception(format("Unknown field {} in type {}", _field_name->to_string(), _name.to_string()));
|
||||
}
|
||||
|
||||
auto previous = to_update->field_types()[*idx];
|
||||
auto new_type = _field_type->prepare(db, keyspace()).get_type();
|
||||
if (!new_type->is_compatible_with(*previous)) {
|
||||
throw exceptions::invalid_request_exception(format("Type {} in incompatible with previous type {} of field {} in user type {}", _field_type->to_string(), previous->as_cql3_type().to_string(), _field_name->name(), _name.to_string()));
|
||||
throw exceptions::invalid_request_exception(format("Type {} in incompatible with previous type {} of field {} in user type {}",
|
||||
_field_type->to_string(), previous->as_cql3_type().to_string(), _field_name->to_string(), _name.to_string()));
|
||||
}
|
||||
|
||||
std::vector<data_type> new_types(to_update->field_types());
|
||||
|
||||
@@ -76,7 +76,7 @@ public:
|
||||
const bool _is_distinct;
|
||||
const bool _allow_filtering;
|
||||
const bool _is_json;
|
||||
bool _bypass_cache;
|
||||
bool _bypass_cache = false;
|
||||
public:
|
||||
parameters();
|
||||
parameters(orderings_type orderings,
|
||||
|
||||
@@ -440,8 +440,8 @@ indexed_table_select_statement::prepare_command_for_base_query(const query_optio
|
||||
return cmd;
|
||||
}
|
||||
|
||||
future<shared_ptr<cql_transport::messages::result_message>>
|
||||
indexed_table_select_statement::execute_base_query(
|
||||
future<foreign_ptr<lw_shared_ptr<query::result>>, lw_shared_ptr<query::read_command>>
|
||||
indexed_table_select_statement::do_execute_base_query(
|
||||
service::storage_proxy& proxy,
|
||||
dht::partition_range_vector&& partition_ranges,
|
||||
service::query_state& state,
|
||||
@@ -492,22 +492,27 @@ indexed_table_select_statement::execute_base_query(
|
||||
}).then([&merger]() {
|
||||
return merger.get();
|
||||
});
|
||||
}).then([this, &proxy, &state, &options, now, cmd, paging_state = std::move(paging_state)] (foreign_ptr<lw_shared_ptr<query::result>> result) mutable {
|
||||
return this->process_base_query_results(std::move(result), cmd, proxy, state, options, now, std::move(paging_state));
|
||||
}).then([cmd] (foreign_ptr<lw_shared_ptr<query::result>> result) mutable {
|
||||
return make_ready_future<foreign_ptr<lw_shared_ptr<query::result>>, lw_shared_ptr<query::read_command>>(std::move(result), std::move(cmd));
|
||||
});
|
||||
}
|
||||
|
||||
// Function for fetching the selected columns from a list of clustering rows.
|
||||
// It is currently used only in our Secondary Index implementation - ordinary
|
||||
// CQL SELECT statements do not have the syntax to request a list of rows.
|
||||
// FIXME: The current implementation is very inefficient - it requests each
|
||||
// row separately (and, incrementally, in parallel). Even multiple rows from a single
|
||||
// partition are requested separately. This last case can be easily improved,
|
||||
// but to implement the general case (multiple rows from multiple partitions)
|
||||
// efficiently, we will need more support from other layers.
|
||||
// Keys are ordered in token order (see #3423)
|
||||
future<shared_ptr<cql_transport::messages::result_message>>
|
||||
indexed_table_select_statement::execute_base_query(
|
||||
service::storage_proxy& proxy,
|
||||
dht::partition_range_vector&& partition_ranges,
|
||||
service::query_state& state,
|
||||
const query_options& options,
|
||||
gc_clock::time_point now,
|
||||
::shared_ptr<const service::pager::paging_state> paging_state) {
|
||||
return do_execute_base_query(proxy, std::move(partition_ranges), state, options, now, paging_state).then(
|
||||
[this, &proxy, &state, &options, now, paging_state = std::move(paging_state)] (foreign_ptr<lw_shared_ptr<query::result>> result, lw_shared_ptr<query::read_command> cmd) {
|
||||
return process_base_query_results(std::move(result), std::move(cmd), proxy, state, options, now, std::move(paging_state));
|
||||
});
|
||||
}
|
||||
|
||||
future<foreign_ptr<lw_shared_ptr<query::result>>, lw_shared_ptr<query::read_command>>
|
||||
indexed_table_select_statement::do_execute_base_query(
|
||||
service::storage_proxy& proxy,
|
||||
std::vector<primary_key>&& primary_keys,
|
||||
service::query_state& state,
|
||||
@@ -562,9 +567,23 @@ indexed_table_select_statement::execute_base_query(
|
||||
});
|
||||
}).then([&merger] () {
|
||||
return merger.get();
|
||||
}).then([cmd] (foreign_ptr<lw_shared_ptr<query::result>> result) mutable {
|
||||
return make_ready_future<foreign_ptr<lw_shared_ptr<query::result>>, lw_shared_ptr<query::read_command>>(std::move(result), std::move(cmd));
|
||||
});
|
||||
}).then([this, &proxy, &state, &options, now, cmd, paging_state = std::move(paging_state)] (foreign_ptr<lw_shared_ptr<query::result>> result) mutable {
|
||||
return this->process_base_query_results(std::move(result), cmd, proxy, state, options, now, std::move(paging_state));
|
||||
});
|
||||
}
|
||||
|
||||
future<shared_ptr<cql_transport::messages::result_message>>
|
||||
indexed_table_select_statement::execute_base_query(
|
||||
service::storage_proxy& proxy,
|
||||
std::vector<primary_key>&& primary_keys,
|
||||
service::query_state& state,
|
||||
const query_options& options,
|
||||
gc_clock::time_point now,
|
||||
::shared_ptr<const service::pager::paging_state> paging_state) {
|
||||
return do_execute_base_query(proxy, std::move(primary_keys), state, options, now, paging_state).then(
|
||||
[this, &proxy, &state, &options, now, paging_state = std::move(paging_state)] (foreign_ptr<lw_shared_ptr<query::result>> result, lw_shared_ptr<query::read_command> cmd) {
|
||||
return process_base_query_results(std::move(result), std::move(cmd), proxy, state, options, now, std::move(paging_state));
|
||||
});
|
||||
}
|
||||
|
||||
@@ -868,6 +887,60 @@ indexed_table_select_statement::do_execute(service::storage_proxy& proxy,
|
||||
}
|
||||
}
|
||||
|
||||
// Aggregated and paged filtering needs to aggregate the results from all pages
|
||||
// in order to avoid returning partial per-page results (issue #4540).
|
||||
// It's a little bit more complicated than regular aggregation, because each paging state
|
||||
// needs to be translated between the base table and the underlying view.
|
||||
// The routine below keeps fetching pages from the underlying view, which are then
|
||||
// used to fetch base rows, which go straight to the result set builder.
|
||||
// A local, internal copy of query_options is kept in order to keep updating
|
||||
// the paging state between requesting data from replicas.
|
||||
const bool aggregate = _selection->is_aggregate();
|
||||
if (aggregate) {
|
||||
const bool restrictions_need_filtering = _restrictions->need_filtering();
|
||||
return do_with(cql3::selection::result_set_builder(*_selection, now, options.get_cql_serialization_format()), std::make_unique<cql3::query_options>(cql3::query_options(options)),
|
||||
[this, &options, &proxy, &state, now, whole_partitions, partition_slices, restrictions_need_filtering] (cql3::selection::result_set_builder& builder, std::unique_ptr<cql3::query_options>& internal_options) {
|
||||
// page size is set to the internal count page size, regardless of the user-provided value
|
||||
internal_options.reset(new cql3::query_options(std::move(internal_options), options.get_paging_state(), DEFAULT_COUNT_PAGE_SIZE));
|
||||
return repeat([this, &builder, &options, &internal_options, &proxy, &state, now, whole_partitions, partition_slices, restrictions_need_filtering] () {
|
||||
auto consume_results = [this, &builder, &options, &internal_options, restrictions_need_filtering] (foreign_ptr<lw_shared_ptr<query::result>> results, lw_shared_ptr<query::read_command> cmd) {
|
||||
if (restrictions_need_filtering) {
|
||||
query::result_view::consume(*results, cmd->slice, cql3::selection::result_set_builder::visitor(builder, *_schema, *_selection,
|
||||
cql3::selection::result_set_builder::restrictions_filter(_restrictions, options, cmd->row_limit, _schema, cmd->slice.partition_row_limit())));
|
||||
} else {
|
||||
query::result_view::consume(*results, cmd->slice, cql3::selection::result_set_builder::visitor(builder, *_schema, *_selection));
|
||||
}
|
||||
};
|
||||
|
||||
if (whole_partitions || partition_slices) {
|
||||
return find_index_partition_ranges(proxy, state, *internal_options).then(
|
||||
[this, now, &state, &internal_options, &proxy, consume_results = std::move(consume_results)] (dht::partition_range_vector partition_ranges, ::shared_ptr<const service::pager::paging_state> paging_state) {
|
||||
bool has_more_pages = paging_state && paging_state->get_remaining() > 0;
|
||||
internal_options.reset(new cql3::query_options(std::move(internal_options), paging_state ? ::make_shared<service::pager::paging_state>(*paging_state) : nullptr));
|
||||
return do_execute_base_query(proxy, std::move(partition_ranges), state, *internal_options, now, std::move(paging_state)).then(consume_results).then([has_more_pages] {
|
||||
return stop_iteration(!has_more_pages);
|
||||
});
|
||||
});
|
||||
} else {
|
||||
return find_index_clustering_rows(proxy, state, *internal_options).then(
|
||||
[this, now, &state, &internal_options, &proxy, consume_results = std::move(consume_results)] (std::vector<primary_key> primary_keys, ::shared_ptr<const service::pager::paging_state> paging_state) {
|
||||
bool has_more_pages = paging_state && paging_state->get_remaining() > 0;
|
||||
internal_options.reset(new cql3::query_options(std::move(internal_options), paging_state ? ::make_shared<service::pager::paging_state>(*paging_state) : nullptr));
|
||||
return this->do_execute_base_query(proxy, std::move(primary_keys), state, *internal_options, now, std::move(paging_state)).then(consume_results).then([has_more_pages] {
|
||||
return stop_iteration(!has_more_pages);
|
||||
});
|
||||
});
|
||||
}
|
||||
}).then([this, &builder, restrictions_need_filtering] () {
|
||||
auto rs = builder.build();
|
||||
update_stats_rows_read(rs->size());
|
||||
_stats.filtered_rows_matched_total += restrictions_need_filtering ? rs->size() : 0;
|
||||
auto msg = ::make_shared<cql_transport::messages::result_message::rows>(result(std::move(rs)));
|
||||
return make_ready_future<shared_ptr<cql_transport::messages::result_message>>(std::move(msg));
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
if (whole_partitions || partition_slices) {
|
||||
// In this case, can use our normal query machinery, which retrieves
|
||||
// entire partitions or the same slice for many partitions.
|
||||
|
||||
@@ -68,8 +68,8 @@ class select_statement : public cql_statement {
|
||||
public:
|
||||
using parameters = raw::select_statement::parameters;
|
||||
using ordering_comparator_type = raw::select_statement::ordering_comparator_type;
|
||||
protected:
|
||||
static constexpr int DEFAULT_COUNT_PAGE_SIZE = 10000;
|
||||
protected:
|
||||
static thread_local const ::shared_ptr<parameters> _default_parameters;
|
||||
schema_ptr _schema;
|
||||
uint32_t _bound_terms;
|
||||
@@ -229,6 +229,14 @@ private:
|
||||
lw_shared_ptr<query::read_command>
|
||||
prepare_command_for_base_query(const query_options& options, service::query_state& state, gc_clock::time_point now, bool use_paging);
|
||||
|
||||
future<foreign_ptr<lw_shared_ptr<query::result>>, lw_shared_ptr<query::read_command>>
|
||||
do_execute_base_query(
|
||||
service::storage_proxy& proxy,
|
||||
dht::partition_range_vector&& partition_ranges,
|
||||
service::query_state& state,
|
||||
const query_options& options,
|
||||
gc_clock::time_point now,
|
||||
::shared_ptr<const service::pager::paging_state> paging_state);
|
||||
future<shared_ptr<cql_transport::messages::result_message>>
|
||||
execute_base_query(
|
||||
service::storage_proxy& proxy,
|
||||
@@ -238,6 +246,23 @@ private:
|
||||
gc_clock::time_point now,
|
||||
::shared_ptr<const service::pager::paging_state> paging_state);
|
||||
|
||||
// Function for fetching the selected columns from a list of clustering rows.
|
||||
// It is currently used only in our Secondary Index implementation - ordinary
|
||||
// CQL SELECT statements do not have the syntax to request a list of rows.
|
||||
// FIXME: The current implementation is very inefficient - it requests each
|
||||
// row separately (and, incrementally, in parallel). Even multiple rows from a single
|
||||
// partition are requested separately. This last case can be easily improved,
|
||||
// but to implement the general case (multiple rows from multiple partitions)
|
||||
// efficiently, we will need more support from other layers.
|
||||
// Keys are ordered in token order (see #3423)
|
||||
future<foreign_ptr<lw_shared_ptr<query::result>>, lw_shared_ptr<query::read_command>>
|
||||
do_execute_base_query(
|
||||
service::storage_proxy& proxy,
|
||||
std::vector<primary_key>&& primary_keys,
|
||||
service::query_state& state,
|
||||
const query_options& options,
|
||||
gc_clock::time_point now,
|
||||
::shared_ptr<const service::pager::paging_state> paging_state);
|
||||
future<shared_ptr<cql_transport::messages::result_message>>
|
||||
execute_base_query(
|
||||
service::storage_proxy& proxy,
|
||||
|
||||
@@ -32,7 +32,7 @@ tuples::component_spec_of(shared_ptr<column_specification> column, size_t compon
|
||||
column->ks_name,
|
||||
column->cf_name,
|
||||
::make_shared<column_identifier>(format("{}[{:d}]", column->name, component), true),
|
||||
static_pointer_cast<const tuple_type_impl>(column->type)->type(component));
|
||||
static_pointer_cast<const tuple_type_impl>(column->type->underlying_type())->type(component));
|
||||
}
|
||||
|
||||
shared_ptr<term>
|
||||
|
||||
@@ -70,7 +70,7 @@ public:
|
||||
|
||||
private:
|
||||
void validate_assignable_to(database& db, const sstring& keyspace, shared_ptr<column_specification> receiver) {
|
||||
auto tt = dynamic_pointer_cast<const tuple_type_impl>(receiver->type);
|
||||
auto tt = dynamic_pointer_cast<const tuple_type_impl>(receiver->type->underlying_type());
|
||||
if (!tt) {
|
||||
throw exceptions::invalid_request_exception(format("Invalid tuple type literal for {} of type {}", receiver->name, receiver->type->as_cql3_type()));
|
||||
}
|
||||
|
||||
11
database.cc
11
database.cc
@@ -260,6 +260,10 @@ void backlog_controller::adjust() {
|
||||
|
||||
float backlog_controller::backlog_of_shares(float shares) const {
|
||||
size_t idx = 1;
|
||||
// No control points means the controller is disabled.
|
||||
if (_control_points.size() == 0) {
|
||||
return 1.0f;
|
||||
}
|
||||
while ((idx < _control_points.size() - 1) && (_control_points[idx].output < shares)) {
|
||||
idx++;
|
||||
}
|
||||
@@ -1929,7 +1933,7 @@ flat_mutation_reader make_multishard_streaming_reader(distributed<database>& db,
|
||||
virtual flat_mutation_reader create_reader(
|
||||
schema_ptr schema,
|
||||
const dht::partition_range& range,
|
||||
const query::partition_slice&,
|
||||
const query::partition_slice& slice,
|
||||
const io_priority_class& pc,
|
||||
tracing::trace_state_ptr,
|
||||
mutation_reader::forwarding fwd_mr) override {
|
||||
@@ -1940,7 +1944,7 @@ flat_mutation_reader make_multishard_streaming_reader(distributed<database>& db,
|
||||
_contexts[shard].read_operation = make_foreign(std::make_unique<utils::phased_barrier::operation>(cf.read_in_progress()));
|
||||
_contexts[shard].semaphore = &cf.streaming_read_concurrency_semaphore();
|
||||
|
||||
return cf.make_streaming_reader(std::move(schema), *_contexts[shard].range, fwd_mr);
|
||||
return cf.make_streaming_reader(std::move(schema), *_contexts[shard].range, slice, fwd_mr);
|
||||
}
|
||||
virtual void destroy_reader(shard_id shard, future<stopped_reader> reader_fut) noexcept override {
|
||||
reader_fut.then([this, zis = shared_from_this(), shard] (stopped_reader&& reader) mutable {
|
||||
@@ -1963,7 +1967,8 @@ flat_mutation_reader make_multishard_streaming_reader(distributed<database>& db,
|
||||
return make_multishard_combining_reader(make_shared<streaming_reader_lifecycle_policy>(db), partitioner, std::move(s), pr, ps, pc,
|
||||
std::move(trace_state), fwd_mr);
|
||||
});
|
||||
return make_flat_multi_range_reader(std::move(schema), std::move(ms), std::move(range_generator), schema->full_slice(),
|
||||
auto&& full_slice = schema->full_slice();
|
||||
return make_flat_multi_range_reader(std::move(schema), std::move(ms), std::move(range_generator), std::move(full_slice),
|
||||
service::get_local_streaming_read_priority(), {}, mutation_reader::forwarding::no);
|
||||
}
|
||||
|
||||
|
||||
16
database.hh
16
database.hh
@@ -458,6 +458,7 @@ private:
|
||||
// This semaphore ensures that an operation like snapshot won't have its selected
|
||||
// sstables deleted by compaction in parallel, a race condition which could
|
||||
// easily result in failure.
|
||||
// Locking order: must be acquired either independently or after _sstables_lock
|
||||
seastar::semaphore _sstable_deletion_sem = {1};
|
||||
// There are situations in which we need to stop writing sstables. Flushers will take
|
||||
// the read lock, and the ones that wish to stop that process will take the write lock.
|
||||
@@ -679,8 +680,13 @@ public:
|
||||
|
||||
// Single range overload.
|
||||
flat_mutation_reader make_streaming_reader(schema_ptr schema, const dht::partition_range& range,
|
||||
const query::partition_slice& slice,
|
||||
mutation_reader::forwarding fwd_mr = mutation_reader::forwarding::no) const;
|
||||
|
||||
flat_mutation_reader make_streaming_reader(schema_ptr schema, const dht::partition_range& range) {
|
||||
return make_streaming_reader(schema, range, schema->full_slice());
|
||||
}
|
||||
|
||||
sstables::shared_sstable make_streaming_sstable_for_write(std::optional<sstring> subdir = {});
|
||||
sstables::shared_sstable make_streaming_staging_sstable() {
|
||||
return make_streaming_sstable_for_write("staging");
|
||||
@@ -759,13 +765,7 @@ public:
|
||||
|
||||
// SSTable writes are now allowed again, and generation is updated to new_generation if != -1
|
||||
// returns the amount of microseconds elapsed since we disabled writes.
|
||||
std::chrono::steady_clock::duration enable_sstable_write(int64_t new_generation) {
|
||||
if (new_generation != -1) {
|
||||
update_sstables_known_generation(new_generation);
|
||||
}
|
||||
_sstables_lock.write_unlock();
|
||||
return std::chrono::steady_clock::now() - _sstable_writes_disabled_at;
|
||||
}
|
||||
std::chrono::steady_clock::duration enable_sstable_write(int64_t new_generation);
|
||||
|
||||
// Make sure the generation numbers are sequential, starting from "start".
|
||||
// Generations before "start" are left untouched.
|
||||
@@ -935,7 +935,7 @@ public:
|
||||
}
|
||||
|
||||
private:
|
||||
future<row_locker::lock_holder> do_push_view_replica_updates(const schema_ptr& s, mutation&& m, db::timeout_clock::time_point timeout, mutation_source&& source) const;
|
||||
future<row_locker::lock_holder> do_push_view_replica_updates(const schema_ptr& s, mutation&& m, db::timeout_clock::time_point timeout, mutation_source&& source, const io_priority_class& io_priority) const;
|
||||
std::vector<view_ptr> affected_views(const schema_ptr& base, const mutation& update) const;
|
||||
future<> generate_and_propagate_view_updates(const schema_ptr& base,
|
||||
std::vector<view_ptr>&& views,
|
||||
|
||||
@@ -396,10 +396,8 @@ std::unordered_set<gms::inet_address> db::batchlog_manager::endpoint_filter(cons
|
||||
|
||||
// grab a random member of up to two racks
|
||||
for (auto& rack : racks) {
|
||||
auto rack_members = validated.bucket(rack);
|
||||
auto n = validated.bucket_size(rack_members);
|
||||
auto cpy = boost::copy_range<std::vector<gms::inet_address>>(validated.equal_range(rack) | boost::adaptors::map_values);
|
||||
std::uniform_int_distribution<size_t> rdist(0, n - 1);
|
||||
std::uniform_int_distribution<size_t> rdist(0, cpy.size() - 1);
|
||||
result.emplace(cpy[rdist(_e1)]);
|
||||
}
|
||||
|
||||
|
||||
@@ -148,9 +148,18 @@ db::commitlog::descriptor::descriptor(const sstring& filename, const std::string
|
||||
: descriptor([&filename, &fname_prefix]() {
|
||||
std::smatch m;
|
||||
// match both legacy and new version of commitlogs Ex: CommitLog-12345.log and CommitLog-4-12345.log.
|
||||
std::regex rx("(?:.*/)?(?:Recycled-)?" + fname_prefix + "((\\d+)(" + SEPARATOR + "\\d+)?)" + FILENAME_EXTENSION);
|
||||
std::regex rx("(?:Recycled-)?" + fname_prefix + "((\\d+)(" + SEPARATOR + "\\d+)?)" + FILENAME_EXTENSION);
|
||||
std::string sfilename = filename;
|
||||
if (!std::regex_match(sfilename, m, rx)) {
|
||||
auto cbegin = sfilename.cbegin();
|
||||
// skip the leading path
|
||||
// Note: we're using rfind rather than the regex above
|
||||
// since it may run out of stack in debug builds.
|
||||
// See https://github.com/scylladb/scylla/issues/4464
|
||||
auto pos = std::string(filename).rfind('/');
|
||||
if (pos != std::string::npos) {
|
||||
cbegin += pos + 1;
|
||||
}
|
||||
if (!std::regex_match(cbegin, sfilename.cend(), m, rx)) {
|
||||
throw std::domain_error("Cannot parse the version of the file: " + filename);
|
||||
}
|
||||
if (m[3].length() == 0) {
|
||||
@@ -420,7 +429,11 @@ class db::commitlog::segment : public enable_shared_from_this<segment>, public c
|
||||
|
||||
uint64_t _file_pos = 0;
|
||||
uint64_t _flush_pos = 0;
|
||||
|
||||
bool _closed = false;
|
||||
// Not the same as _closed since files can be reused
|
||||
bool _closed_file = false;
|
||||
|
||||
bool _terminated = false;
|
||||
|
||||
using buffer_type = segment_manager::buffer_type;
|
||||
@@ -486,7 +499,7 @@ public:
|
||||
clogger.debug("Created new {} segment {}", active ? "active" : "reserve", *this);
|
||||
}
|
||||
~segment() {
|
||||
if (!_closed) {
|
||||
if (!_closed_file) {
|
||||
_segment_manager->add_file_to_close(std::move(_file));
|
||||
}
|
||||
if (is_clean()) {
|
||||
@@ -560,7 +573,7 @@ public:
|
||||
// and we should have waited out all pending.
|
||||
return me->_pending_ops.close().finally([me] {
|
||||
return me->_file.truncate(me->_flush_pos).then([me] {
|
||||
return me->_file.close();
|
||||
return me->_file.close().finally([me] { me->_closed_file = true; });
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -1223,6 +1236,34 @@ void db::commitlog::segment_manager::flush_segments(bool force) {
|
||||
}
|
||||
}
|
||||
|
||||
/// \brief Helper for ensuring a file is closed if an exception is thrown.
|
||||
///
|
||||
/// The file provided by the file_fut future is passed to func.
|
||||
/// * If func throws an exception E, the file is closed and we return
|
||||
/// a failed future with E.
|
||||
/// * If func returns a value V, the file is not closed and we return
|
||||
/// a future with V.
|
||||
/// Note that when an exception is not thrown, it is the
|
||||
/// responsibility of func to make sure the file will be closed. It
|
||||
/// can close the file itself, return it, or store it somewhere.
|
||||
///
|
||||
/// \tparam Func The type of function this wraps
|
||||
/// \param file_fut A future that produces a file
|
||||
/// \param func A function that uses a file
|
||||
/// \return A future that passes the file produced by file_fut to func
|
||||
/// and closes it if func fails
|
||||
template <typename Func>
|
||||
static auto close_on_failure(future<file> file_fut, Func func) {
|
||||
return file_fut.then([func = std::move(func)](file f) {
|
||||
return futurize_apply(func, f).handle_exception([f] (std::exception_ptr e) mutable {
|
||||
return f.close().then_wrapped([f, e = std::move(e)] (future<> x) {
|
||||
using futurator = futurize<std::result_of_t<Func(file)>>;
|
||||
return futurator::make_exception_future(e);
|
||||
});
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
future<db::commitlog::segment_manager::sseg_ptr> db::commitlog::segment_manager::allocate_segment_ex(const descriptor& d, sstring filename, open_flags flags, bool active) {
|
||||
file_open_options opt;
|
||||
opt.extent_allocation_size_hint = max_size;
|
||||
@@ -1249,7 +1290,7 @@ future<db::commitlog::segment_manager::sseg_ptr> db::commitlog::segment_manager:
|
||||
return fut;
|
||||
});
|
||||
|
||||
return fut.then([this, d, active, filename](file f) {
|
||||
return close_on_failure(std::move(fut), [this, d, active, filename] (file f) {
|
||||
f = make_checked_file(commit_error_handler, f);
|
||||
// xfs doesn't like files extended betond eof, so enlarge the file
|
||||
return f.truncate(max_size).then([this, d, active, f, filename] () mutable {
|
||||
|
||||
@@ -756,6 +756,8 @@ public:
|
||||
val(enable_dangerous_direct_import_of_cassandra_counters, bool, false, Used, "Only turn this option on if you want to import tables from Cassandra containing counters, and you are SURE that no counters in that table were created in a version earlier than Cassandra 2.1." \
|
||||
" It is not enough to have ever since upgraded to newer versions of Cassandra. If you EVER used a version earlier than 2.1 in the cluster where these SSTables come from, DO NOT TURN ON THIS OPTION! You will corrupt your data. You have been warned.") \
|
||||
val(enable_shard_aware_drivers, bool, true, Used, "Enable native transport drivers to use connection-per-shard for better performance") \
|
||||
val(abort_on_internal_error, bool, false, Used, "Abort the server instead of throwing exception when internal invariants are violated.") \
|
||||
val(enable_3_1_0_compatibility_mode, bool, false, Used, "Set to true if the cluster was initially installed from 3.1.0. If it was upgraded from an earlier version, or installed from a later version, leave this set to false. This adjusts the communication protocol to work around a bug in Scylla 3.1.0") \
|
||||
/* done! */
|
||||
|
||||
#define _make_value_member(name, type, deflt, status, desc, ...) \
|
||||
|
||||
@@ -57,9 +57,30 @@ static ::shared_ptr<cql3::cql3_type::raw> parse_raw(const sstring& str) {
|
||||
}
|
||||
|
||||
data_type db::cql_type_parser::parse(const sstring& keyspace, const sstring& str, lw_shared_ptr<user_types_metadata> user_types) {
|
||||
static const thread_local std::unordered_map<sstring, cql3::cql3_type> native_types = []{
|
||||
std::unordered_map<sstring, cql3::cql3_type> res;
|
||||
for (auto& nt : cql3::cql3_type::values()) {
|
||||
res.emplace(nt.to_string(), nt);
|
||||
}
|
||||
return res;
|
||||
}();
|
||||
|
||||
auto i = native_types.find(str);
|
||||
if (i != native_types.end()) {
|
||||
return i->second.get_type();
|
||||
}
|
||||
|
||||
if (!user_types && service::get_storage_proxy().local_is_initialized()) {
|
||||
user_types = service::get_storage_proxy().local().get_db().local().find_keyspace(keyspace).metadata()->user_types();
|
||||
}
|
||||
// special-case top-level UDTs
|
||||
if (user_types) {
|
||||
auto& map = user_types->get_all_types();
|
||||
auto i = map.find(utf8_type->decompose(str));
|
||||
if (i != map.end()) {
|
||||
return i->second;
|
||||
}
|
||||
}
|
||||
|
||||
auto raw = parse_raw(str);
|
||||
auto cql = raw->prepare_internal(keyspace, user_types);
|
||||
|
||||
@@ -57,7 +57,7 @@ void data_listeners::on_write(const schema_ptr& s, const frozen_mutation& m) {
|
||||
}
|
||||
}
|
||||
|
||||
toppartitons_item_key::operator sstring() const {
|
||||
toppartitions_item_key::operator sstring() const {
|
||||
std::ostringstream oss;
|
||||
oss << key.key().with_schema(*schema);
|
||||
return oss.str();
|
||||
@@ -84,8 +84,11 @@ flat_mutation_reader toppartitions_data_listener::on_read(const schema_ptr& s, c
|
||||
return std::move(rd);
|
||||
}
|
||||
dblog.trace("toppartitions_data_listener::on_read: {}.{}", s->ks_name(), s->cf_name());
|
||||
return make_filtering_reader(std::move(rd), [this, &range, &slice, s = std::move(s)] (const dht::decorated_key& dk) {
|
||||
_top_k_read.append(toppartitons_item_key{s, dk});
|
||||
return make_filtering_reader(std::move(rd), [zis = this->weak_from_this(), &range, &slice, s = std::move(s)] (const dht::decorated_key& dk) {
|
||||
// The data query may be executing after the toppartitions_data_listener object has been removed, so check
|
||||
if (zis) {
|
||||
zis->_top_k_read.append(toppartitions_item_key{s, dk});
|
||||
}
|
||||
return true;
|
||||
});
|
||||
}
|
||||
@@ -95,7 +98,27 @@ void toppartitions_data_listener::on_write(const schema_ptr& s, const frozen_mut
|
||||
return;
|
||||
}
|
||||
dblog.trace("toppartitions_data_listener::on_write: {}.{}", _ks, _cf);
|
||||
_top_k_write.append(toppartitons_item_key{s, m.decorated_key(*s)});
|
||||
_top_k_write.append(toppartitions_item_key{s, m.decorated_key(*s)});
|
||||
}
|
||||
|
||||
toppartitions_data_listener::global_top_k::results
|
||||
toppartitions_data_listener::globalize(top_k::results&& r) {
|
||||
toppartitions_data_listener::global_top_k::results n;
|
||||
n.reserve(r.size());
|
||||
for (auto&& e : r) {
|
||||
n.emplace_back(global_top_k::results::value_type{toppartitions_global_item_key(std::move(e.item)), e.count, e.error});
|
||||
}
|
||||
return n;
|
||||
}
|
||||
|
||||
toppartitions_data_listener::top_k::results
|
||||
toppartitions_data_listener::localize(const global_top_k::results& r) {
|
||||
toppartitions_data_listener::top_k::results n;
|
||||
n.reserve(r.size());
|
||||
for (auto&& e : r) {
|
||||
n.emplace_back(top_k::results::value_type{toppartitions_item_key(e.item), e.count, e.error});
|
||||
}
|
||||
return n;
|
||||
}
|
||||
|
||||
toppartitions_query::toppartitions_query(distributed<database>& xdb, sstring ks, sstring cf,
|
||||
@@ -108,20 +131,20 @@ future<> toppartitions_query::scatter() {
|
||||
return _query.start(std::ref(_xdb), _ks, _cf);
|
||||
}
|
||||
|
||||
using top_t = toppartitions_data_listener::top_k::results;
|
||||
using top_t = toppartitions_data_listener::global_top_k::results;
|
||||
|
||||
future<toppartitions_query::results> toppartitions_query::gather(unsigned res_size) {
|
||||
dblog.debug("toppartitions_query::gather");
|
||||
|
||||
auto map = [res_size, this] (toppartitions_data_listener& listener) {
|
||||
dblog.trace("toppartitions_query::map_reduce with listener {}", &listener);
|
||||
top_t rd = listener._top_k_read.top(res_size);
|
||||
top_t wr = listener._top_k_write.top(res_size);
|
||||
return std::tuple<top_t, top_t>{std::move(rd), std::move(wr)};
|
||||
top_t rd = toppartitions_data_listener::globalize(listener._top_k_read.top(res_size));
|
||||
top_t wr = toppartitions_data_listener::globalize(listener._top_k_write.top(res_size));
|
||||
return make_foreign(std::make_unique<std::tuple<top_t, top_t>>(std::move(rd), std::move(wr)));
|
||||
};
|
||||
auto reduce = [this] (results res, std::tuple<top_t, top_t> rd_wr) {
|
||||
res.read.append(std::get<0>(rd_wr));
|
||||
res.write.append(std::get<1>(rd_wr));
|
||||
auto reduce = [this] (results res, foreign_ptr<std::unique_ptr<std::tuple<top_t, top_t>>> rd_wr) {
|
||||
res.read.append(toppartitions_data_listener::localize(std::get<0>(*rd_wr)));
|
||||
res.write.append(toppartitions_data_listener::localize(std::get<1>(*rd_wr)));
|
||||
return std::move(res);
|
||||
};
|
||||
return _query.map_reduce0(map, results{res_size}, reduce)
|
||||
|
||||
@@ -24,12 +24,14 @@
|
||||
#include <seastar/core/distributed.hh>
|
||||
#include <seastar/core/future.hh>
|
||||
#include <seastar/core/distributed.hh>
|
||||
#include <seastar/core/weak_ptr.hh>
|
||||
|
||||
#include "schema.hh"
|
||||
#include "flat_mutation_reader.hh"
|
||||
#include "mutation_reader.hh"
|
||||
#include "frozen_mutation.hh"
|
||||
#include "utils/top_k.hh"
|
||||
#include "schema_registry.hh"
|
||||
|
||||
#include <vector>
|
||||
#include <set>
|
||||
@@ -75,29 +77,54 @@ public:
|
||||
};
|
||||
|
||||
|
||||
struct toppartitons_item_key {
|
||||
struct toppartitions_item_key {
|
||||
schema_ptr schema;
|
||||
dht::decorated_key key;
|
||||
|
||||
toppartitons_item_key(const schema_ptr& schema, const dht::decorated_key& key) : schema(schema), key(key) {}
|
||||
toppartitons_item_key(const toppartitons_item_key& key) noexcept : schema(key.schema), key(key.key) {}
|
||||
toppartitions_item_key(const schema_ptr& schema, const dht::decorated_key& key) : schema(schema), key(key) {}
|
||||
toppartitions_item_key(const toppartitions_item_key& key) noexcept : schema(key.schema), key(key.key) {}
|
||||
|
||||
struct hash {
|
||||
size_t operator()(const toppartitons_item_key& k) const {
|
||||
size_t operator()(const toppartitions_item_key& k) const {
|
||||
return std::hash<dht::token>()(k.key.token());
|
||||
}
|
||||
};
|
||||
|
||||
struct comp {
|
||||
bool operator()(const toppartitons_item_key& k1, const toppartitons_item_key& k2) const {
|
||||
return k1.schema == k2.schema && k1.key.equal(*k2.schema, k2.key);
|
||||
bool operator()(const toppartitions_item_key& k1, const toppartitions_item_key& k2) const {
|
||||
return k1.schema->id() == k2.schema->id() && k1.key.equal(*k2.schema, k2.key);
|
||||
}
|
||||
};
|
||||
|
||||
explicit operator sstring() const;
|
||||
};
|
||||
|
||||
class toppartitions_data_listener : public data_listener {
|
||||
// Like toppartitions_item_key, but uses global_schema_ptr, so can be safely transported across shards
|
||||
struct toppartitions_global_item_key {
|
||||
global_schema_ptr schema;
|
||||
dht::decorated_key key;
|
||||
|
||||
toppartitions_global_item_key(toppartitions_item_key&& tik) : schema(std::move(tik.schema)), key(std::move(tik.key)) {}
|
||||
operator toppartitions_item_key() const {
|
||||
return toppartitions_item_key(schema, key);
|
||||
}
|
||||
|
||||
struct hash {
|
||||
size_t operator()(const toppartitions_global_item_key& k) const {
|
||||
return std::hash<dht::token>()(k.key.token());
|
||||
}
|
||||
};
|
||||
|
||||
struct comp {
|
||||
bool operator()(const toppartitions_global_item_key& k1, const toppartitions_global_item_key& k2) const {
|
||||
return k1.schema.get()->id() == k2.schema.get()->id() && k1.key.equal(*k2.schema.get(), k2.key);
|
||||
}
|
||||
};
|
||||
|
||||
explicit operator sstring() const;
|
||||
};
|
||||
|
||||
class toppartitions_data_listener : public data_listener, public weakly_referencable<toppartitions_data_listener> {
|
||||
friend class toppartitions_query;
|
||||
|
||||
database& _db;
|
||||
@@ -105,7 +132,11 @@ class toppartitions_data_listener : public data_listener {
|
||||
sstring _cf;
|
||||
|
||||
public:
|
||||
using top_k = utils::space_saving_top_k<toppartitons_item_key, toppartitons_item_key::hash, toppartitons_item_key::comp>;
|
||||
using top_k = utils::space_saving_top_k<toppartitions_item_key, toppartitions_item_key::hash, toppartitions_item_key::comp>;
|
||||
using global_top_k = utils::space_saving_top_k<toppartitions_global_item_key, toppartitions_global_item_key::hash, toppartitions_global_item_key::comp>;
|
||||
public:
|
||||
static global_top_k::results globalize(top_k::results&& r);
|
||||
static top_k::results localize(const global_top_k::results& r);
|
||||
private:
|
||||
top_k _top_k_read;
|
||||
top_k _top_k_write;
|
||||
|
||||
@@ -118,8 +118,8 @@ future<> manager::stop() {
|
||||
|
||||
return _draining_eps_gate.close().finally([this] {
|
||||
return parallel_for_each(_ep_managers, [] (auto& pair) {
|
||||
return pair.second.stop();
|
||||
}).finally([this] {
|
||||
return pair.second.stop();
|
||||
}).finally([this] {
|
||||
_ep_managers.clear();
|
||||
manager_logger.info("Stopped");
|
||||
}).discard_result();
|
||||
@@ -240,6 +240,8 @@ future<> manager::end_point_hints_manager::stop(drain should_drain) noexcept {
|
||||
manager::end_point_hints_manager::end_point_hints_manager(const key_type& key, manager& shard_manager)
|
||||
: _key(key)
|
||||
, _shard_manager(shard_manager)
|
||||
, _file_update_mutex_ptr(make_lw_shared<seastar::shared_mutex>())
|
||||
, _file_update_mutex(*_file_update_mutex_ptr)
|
||||
, _state(state_set::of<state::stopped>())
|
||||
, _hints_dir(_shard_manager.hints_dir() / format("{}", _key).c_str())
|
||||
, _sender(*this, _shard_manager.local_storage_proxy(), _shard_manager.local_db(), _shard_manager.local_gossiper())
|
||||
@@ -248,6 +250,8 @@ manager::end_point_hints_manager::end_point_hints_manager(const key_type& key, m
|
||||
manager::end_point_hints_manager::end_point_hints_manager(end_point_hints_manager&& other)
|
||||
: _key(other._key)
|
||||
, _shard_manager(other._shard_manager)
|
||||
, _file_update_mutex_ptr(std::move(other._file_update_mutex_ptr))
|
||||
, _file_update_mutex(*_file_update_mutex_ptr)
|
||||
, _state(other._state)
|
||||
, _hints_dir(std::move(other._hints_dir))
|
||||
, _sender(other._sender, *this)
|
||||
@@ -401,11 +405,8 @@ future<> manager::end_point_hints_manager::sender::do_send_one_mutation(frozen_m
|
||||
return _proxy.send_to_endpoint(std::move(m), end_point_key(), { }, write_type::SIMPLE, service::allow_hints::no);
|
||||
} else {
|
||||
manager_logger.trace("Endpoints set has changed and {} is no longer a replica. Mutating from scratch...", end_point_key());
|
||||
// FIXME: using 1h as infinite timeout. If a node is down, we should get an
|
||||
// unavailable exception.
|
||||
auto timeout = db::timeout_clock::now() + 1h;
|
||||
//FIXME: Add required frozen_mutation overloads
|
||||
return _proxy.mutate({m.fm.unfreeze(m.s)}, consistency_level::ALL, timeout, nullptr);
|
||||
return _proxy.mutate_hint_from_scratch(std::move(m));
|
||||
}
|
||||
});
|
||||
}
|
||||
@@ -520,28 +521,35 @@ void manager::drain_for(gms::inet_address endpoint) {
|
||||
manager_logger.trace("on_leave_cluster: {} is removed/decommissioned", endpoint);
|
||||
|
||||
with_gate(_draining_eps_gate, [this, endpoint] {
|
||||
return futurize_apply([this, endpoint] () {
|
||||
if (utils::fb_utilities::is_me(endpoint)) {
|
||||
return parallel_for_each(_ep_managers, [] (auto& pair) {
|
||||
return pair.second.stop(drain::yes).finally([&pair] {
|
||||
return remove_file(pair.second.hints_dir().c_str());
|
||||
return with_semaphore(drain_lock(), 1, [this, endpoint] {
|
||||
return futurize_apply([this, endpoint] () {
|
||||
if (utils::fb_utilities::is_me(endpoint)) {
|
||||
return parallel_for_each(_ep_managers, [] (auto& pair) {
|
||||
return pair.second.stop(drain::yes).finally([&pair] {
|
||||
return with_file_update_mutex(pair.second, [&pair] {
|
||||
return remove_file(pair.second.hints_dir().c_str());
|
||||
});
|
||||
});
|
||||
}).finally([this] {
|
||||
_ep_managers.clear();
|
||||
});
|
||||
}).finally([this] {
|
||||
_ep_managers.clear();
|
||||
});
|
||||
} else {
|
||||
ep_managers_map_type::iterator ep_manager_it = find_ep_manager(endpoint);
|
||||
if (ep_manager_it != ep_managers_end()) {
|
||||
return ep_manager_it->second.stop(drain::yes).finally([this, endpoint, hints_dir = ep_manager_it->second.hints_dir()] {
|
||||
_ep_managers.erase(endpoint);
|
||||
return remove_file(hints_dir.c_str());
|
||||
});
|
||||
}
|
||||
} else {
|
||||
ep_managers_map_type::iterator ep_manager_it = find_ep_manager(endpoint);
|
||||
if (ep_manager_it != ep_managers_end()) {
|
||||
return ep_manager_it->second.stop(drain::yes).finally([this, endpoint, &ep_man = ep_manager_it->second] {
|
||||
return with_file_update_mutex(ep_man, [&ep_man] {
|
||||
return remove_file(ep_man.hints_dir().c_str());
|
||||
}).finally([this, endpoint] {
|
||||
_ep_managers.erase(endpoint);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
return make_ready_future<>();
|
||||
}
|
||||
}).handle_exception([endpoint] (auto eptr) {
|
||||
manager_logger.error("Exception when draining {}: {}", endpoint, eptr);
|
||||
return make_ready_future<>();
|
||||
}
|
||||
}).handle_exception([endpoint] (auto eptr) {
|
||||
manager_logger.error("Exception when draining {}: {}", endpoint, eptr);
|
||||
});
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
@@ -276,7 +276,8 @@ public:
|
||||
manager& _shard_manager;
|
||||
hints_store_ptr _hints_store_anchor;
|
||||
seastar::gate _store_gate;
|
||||
seastar::shared_mutex _file_update_mutex;
|
||||
lw_shared_ptr<seastar::shared_mutex> _file_update_mutex_ptr;
|
||||
seastar::shared_mutex& _file_update_mutex;
|
||||
|
||||
enum class state {
|
||||
can_hint, // hinting is currently allowed (used by the space_watchdog)
|
||||
@@ -378,8 +379,20 @@ public:
|
||||
return _state.contains(state::stopped);
|
||||
}
|
||||
|
||||
seastar::shared_mutex& file_update_mutex() {
|
||||
return _file_update_mutex;
|
||||
/// \brief Safely runs a given functor under the file_update_mutex of \ref ep_man
|
||||
///
|
||||
/// Runs a given functor under the file_update_mutex of the given end_point_hints_manager instance.
|
||||
/// This function is safe even if \ref ep_man gets destroyed before the future this function returns resolves
|
||||
/// (as long as the \ref func call itself is safe).
|
||||
///
|
||||
/// \tparam Func Functor type.
|
||||
/// \param ep_man end_point_hints_manager instance which file_update_mutex we want to lock.
|
||||
/// \param func Functor to run under the lock.
|
||||
/// \return Whatever \ref func returns.
|
||||
template <typename Func>
|
||||
friend inline auto with_file_update_mutex(end_point_hints_manager& ep_man, Func&& func) {
|
||||
lw_shared_ptr<seastar::shared_mutex> lock_ptr = ep_man._file_update_mutex_ptr;
|
||||
return with_lock(*lock_ptr, std::forward<Func>(func)).finally([lock_ptr] {});
|
||||
}
|
||||
|
||||
const fs::path& hints_dir() const noexcept {
|
||||
@@ -387,6 +400,10 @@ public:
|
||||
}
|
||||
|
||||
private:
|
||||
seastar::shared_mutex& file_update_mutex() noexcept {
|
||||
return _file_update_mutex;
|
||||
}
|
||||
|
||||
/// \brief Creates a new hints store object.
|
||||
///
|
||||
/// - Creates a hints store directory if doesn't exist: <shard_hints_dir>/<ep_key>
|
||||
@@ -453,6 +470,7 @@ private:
|
||||
stats _stats;
|
||||
seastar::metrics::metric_groups _metrics;
|
||||
std::unordered_set<ep_key_type> _eps_with_pending_hints;
|
||||
seastar::semaphore _drain_lock = {1};
|
||||
|
||||
public:
|
||||
manager(sstring hints_directory, std::vector<sstring> hinted_dcs, int64_t max_hint_window_ms, resource_manager&res_manager, distributed<database>& db);
|
||||
@@ -531,6 +549,10 @@ public:
|
||||
return _hints_dir_device_id;
|
||||
}
|
||||
|
||||
seastar::semaphore& drain_lock() noexcept {
|
||||
return _drain_lock;
|
||||
}
|
||||
|
||||
void allow_hints();
|
||||
void forbid_hints();
|
||||
void forbid_hints_for_eps_with_pending_hints();
|
||||
|
||||
@@ -89,16 +89,27 @@ future<> space_watchdog::stop() noexcept {
|
||||
return std::move(_started);
|
||||
}
|
||||
|
||||
// Called under the end_point_hints_manager::file_update_mutex() of the corresponding end_point_hints_manager instance.
|
||||
future<> space_watchdog::scan_one_ep_dir(fs::path path, manager& shard_manager, ep_key_type ep_key) {
|
||||
return lister::scan_dir(path, { directory_entry_type::regular }, [this, ep_key, &shard_manager] (fs::path dir, directory_entry de) {
|
||||
// Put the current end point ID to state.eps_with_pending_hints when we see the second hints file in its directory
|
||||
if (_files_count == 1) {
|
||||
shard_manager.add_ep_with_pending_hints(ep_key);
|
||||
}
|
||||
++_files_count;
|
||||
return do_with(std::move(path), [this, ep_key, &shard_manager] (fs::path& path) {
|
||||
// It may happen that we get here and the directory has already been deleted in the context of manager::drain_for().
|
||||
// In this case simply bail out.
|
||||
return engine().file_exists(path.native()).then([this, ep_key, &shard_manager, &path] (bool exists) {
|
||||
if (!exists) {
|
||||
return make_ready_future<>();
|
||||
} else {
|
||||
return lister::scan_dir(path, { directory_entry_type::regular }, [this, ep_key, &shard_manager] (fs::path dir, directory_entry de) {
|
||||
// Put the current end point ID to state.eps_with_pending_hints when we see the second hints file in its directory
|
||||
if (_files_count == 1) {
|
||||
shard_manager.add_ep_with_pending_hints(ep_key);
|
||||
}
|
||||
++_files_count;
|
||||
|
||||
return io_check(file_size, (dir / de.name.c_str()).c_str()).then([this] (uint64_t fsize) {
|
||||
_total_size += fsize;
|
||||
return io_check(file_size, (dir / de.name.c_str()).c_str()).then([this] (uint64_t fsize) {
|
||||
_total_size += fsize;
|
||||
});
|
||||
});
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
@@ -136,7 +147,7 @@ void space_watchdog::on_timer() {
|
||||
// continue to enumeration - there is no one to change them.
|
||||
auto it = shard_manager.find_ep_manager(de.name);
|
||||
if (it != shard_manager.ep_managers_end()) {
|
||||
return with_lock(it->second.file_update_mutex(), [this, &shard_manager, dir = std::move(dir), ep_name = std::move(de.name)]() mutable {
|
||||
return with_file_update_mutex(it->second, [this, &shard_manager, dir = std::move(dir), ep_name = std::move(de.name)] () mutable {
|
||||
return scan_one_ep_dir(dir / ep_name, shard_manager, ep_key_type(ep_name));
|
||||
});
|
||||
} else {
|
||||
|
||||
@@ -26,11 +26,17 @@
|
||||
namespace db {
|
||||
|
||||
enum class schema_feature {
|
||||
VIEW_VIRTUAL_COLUMNS
|
||||
VIEW_VIRTUAL_COLUMNS,
|
||||
|
||||
// When set, the schema digest is calcualted in a way such that it doesn't change after all
|
||||
// tombstones in an empty partition expire.
|
||||
// See https://github.com/scylladb/scylla/issues/4485
|
||||
DIGEST_INSENSITIVE_TO_EXPIRY,
|
||||
};
|
||||
|
||||
using schema_features = enum_set<super_enum<schema_feature,
|
||||
schema_feature::VIEW_VIRTUAL_COLUMNS
|
||||
schema_feature::VIEW_VIRTUAL_COLUMNS,
|
||||
schema_feature::DIGEST_INSENSITIVE_TO_EXPIRY
|
||||
>>;
|
||||
|
||||
}
|
||||
|
||||
@@ -587,9 +587,9 @@ future<utils::UUID> calculate_schema_digest(distributed<service::storage_proxy>&
|
||||
return mutations;
|
||||
});
|
||||
};
|
||||
auto reduce = [] (auto& hash, auto&& mutations) {
|
||||
auto reduce = [features] (auto& hash, auto&& mutations) {
|
||||
for (const mutation& m : mutations) {
|
||||
feed_hash_for_schema_digest(hash, m);
|
||||
feed_hash_for_schema_digest(hash, m, features);
|
||||
}
|
||||
};
|
||||
return do_with(md5_hasher(), all_table_names(features), [features, map, reduce] (auto& hash, auto& tables) {
|
||||
@@ -778,6 +778,13 @@ mutation compact_for_schema_digest(const mutation& m) {
|
||||
return m_compacted;
|
||||
}
|
||||
|
||||
void feed_hash_for_schema_digest(hasher& h, const mutation& m, schema_features features) {
|
||||
auto compacted = compact_for_schema_digest(m);
|
||||
if (!features.contains<schema_feature::DIGEST_INSENSITIVE_TO_EXPIRY>() || !compacted.partition().empty()) {
|
||||
feed_hash(h, compact_for_schema_digest(m));
|
||||
}
|
||||
}
|
||||
|
||||
// Applies deletion of the "version" column to a system_schema.scylla_tables mutation.
|
||||
static void delete_schema_version(mutation& m) {
|
||||
if (m.column_family_id() != scylla_tables()->id()) {
|
||||
@@ -1085,10 +1092,31 @@ static std::vector<V> get_list(const query::result_set_row& row, const sstring&
|
||||
// Create types for a given keyspace. This takes care of topologically sorting user defined types.
|
||||
template <typename T> static std::vector<user_type> create_types(keyspace_metadata& ks, T&& range) {
|
||||
cql_type_parser::raw_builder builder(ks);
|
||||
std::unordered_set<bytes> names;
|
||||
for (const query::result_set_row& row : range) {
|
||||
builder.add(row.get_nonnull<sstring>("type_name"),
|
||||
get_list<sstring>(row, "field_names"),
|
||||
get_list<sstring>(row, "field_types"));
|
||||
auto name = row.get_nonnull<sstring>("type_name");
|
||||
names.insert(to_bytes(name));
|
||||
builder.add(std::move(name), get_list<sstring>(row, "field_names"), get_list<sstring>(row, "field_types"));
|
||||
}
|
||||
// Add user types that use any of the above types. From the
|
||||
// database point of view they haven't changed since the content
|
||||
// of system.types is the same for them. The runtime objects in
|
||||
// the other hand now point to out of date types, so we need to
|
||||
// recreate them.
|
||||
for (const auto& p : ks.user_types()->get_all_types()) {
|
||||
const user_type& t = p.second;
|
||||
if (names.count(t->_name) != 0) {
|
||||
continue;
|
||||
}
|
||||
for (const auto& name : names) {
|
||||
if (t->references_user_type(t->_keyspace, name)) {
|
||||
std::vector<sstring> field_types;
|
||||
for (const data_type& f : t->field_types()) {
|
||||
field_types.push_back(f->as_cql3_type().to_string());
|
||||
}
|
||||
builder.add(t->get_name_as_string(), t->string_field_names(), std::move(field_types));
|
||||
}
|
||||
}
|
||||
}
|
||||
return builder.build();
|
||||
}
|
||||
@@ -2727,8 +2755,9 @@ namespace legacy {
|
||||
|
||||
table_schema_version schema_mutations::digest() const {
|
||||
md5_hasher h;
|
||||
db::schema_tables::feed_hash_for_schema_digest(h, _columnfamilies);
|
||||
db::schema_tables::feed_hash_for_schema_digest(h, _columns);
|
||||
const db::schema_features no_features;
|
||||
db::schema_tables::feed_hash_for_schema_digest(h, _columnfamilies, no_features);
|
||||
db::schema_tables::feed_hash_for_schema_digest(h, _columns, no_features);
|
||||
return utils::UUID_gen::get_name_UUID(h.finalize());
|
||||
}
|
||||
|
||||
|
||||
@@ -215,10 +215,7 @@ index_metadata_kind deserialize_index_kind(sstring kind);
|
||||
|
||||
mutation compact_for_schema_digest(const mutation& m);
|
||||
|
||||
template<typename Hasher>
|
||||
void feed_hash_for_schema_digest(Hasher& h, const mutation& m) {
|
||||
feed_hash(h, compact_for_schema_digest(m));
|
||||
}
|
||||
void feed_hash_for_schema_digest(hasher&, const mutation&, schema_features);
|
||||
|
||||
} // namespace schema_tables
|
||||
} // namespace db
|
||||
|
||||
328
db/size_estimates_virtual_reader.cc
Normal file
328
db/size_estimates_virtual_reader.cc
Normal file
@@ -0,0 +1,328 @@
|
||||
/*
|
||||
* Copyright (C) 2019 ScyllaDB
|
||||
*
|
||||
* Modified by ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file is part of Scylla.
|
||||
*
|
||||
* Scylla is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Scylla is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <boost/range/adaptor/indirected.hpp>
|
||||
#include <boost/range/adaptor/map.hpp>
|
||||
#include <boost/range/adaptor/transformed.hpp>
|
||||
#include <boost/range/algorithm/find_if.hpp>
|
||||
|
||||
#include "clustering_bounds_comparator.hh"
|
||||
#include "database_fwd.hh"
|
||||
#include "db/system_keyspace.hh"
|
||||
#include "dht/i_partitioner.hh"
|
||||
#include "partition_range_compat.hh"
|
||||
#include "range.hh"
|
||||
#include "service/storage_service.hh"
|
||||
#include "mutation_fragment.hh"
|
||||
#include "sstables/sstables.hh"
|
||||
#include "db/timeout_clock.hh"
|
||||
#include "database.hh"
|
||||
|
||||
#include "db/size_estimates_virtual_reader.hh"
|
||||
|
||||
namespace db {
|
||||
|
||||
namespace size_estimates {
|
||||
|
||||
struct virtual_row {
|
||||
const bytes& cf_name;
|
||||
const token_range& tokens;
|
||||
clustering_key_prefix as_key() const {
|
||||
return clustering_key_prefix::from_exploded(std::vector<bytes_view>{cf_name, tokens.start, tokens.end});
|
||||
}
|
||||
};
|
||||
|
||||
struct virtual_row_comparator {
|
||||
schema_ptr _schema;
|
||||
virtual_row_comparator(schema_ptr schema) : _schema(schema) { }
|
||||
bool operator()(const clustering_key_prefix& key1, const clustering_key_prefix& key2) {
|
||||
return clustering_key_prefix::prefix_equality_less_compare(*_schema)(key1, key2);
|
||||
}
|
||||
bool operator()(const virtual_row& row, const clustering_key_prefix& key) {
|
||||
return operator()(row.as_key(), key);
|
||||
}
|
||||
bool operator()(const clustering_key_prefix& key, const virtual_row& row) {
|
||||
return operator()(key, row.as_key());
|
||||
}
|
||||
};
|
||||
|
||||
// Iterating over the cartesian product of cf_names and token_ranges.
|
||||
class virtual_row_iterator : public std::iterator<std::input_iterator_tag, const virtual_row> {
|
||||
std::reference_wrapper<const std::vector<bytes>> _cf_names;
|
||||
std::reference_wrapper<const std::vector<token_range>> _ranges;
|
||||
size_t _cf_names_idx = 0;
|
||||
size_t _ranges_idx = 0;
|
||||
public:
|
||||
struct end_iterator_tag {};
|
||||
virtual_row_iterator(const std::vector<bytes>& cf_names, const std::vector<token_range>& ranges)
|
||||
: _cf_names(std::ref(cf_names))
|
||||
, _ranges(std::ref(ranges))
|
||||
{ }
|
||||
virtual_row_iterator(const std::vector<bytes>& cf_names, const std::vector<token_range>& ranges, end_iterator_tag)
|
||||
: _cf_names(std::ref(cf_names))
|
||||
, _ranges(std::ref(ranges))
|
||||
, _cf_names_idx(cf_names.size())
|
||||
, _ranges_idx(ranges.size())
|
||||
{
|
||||
if (cf_names.empty() || ranges.empty()) {
|
||||
// The product of an empty range with any range is an empty range.
|
||||
// In this case we want the end iterator to be equal to the begin iterator,
|
||||
// which has_ranges_idx = _cf_names_idx = 0.
|
||||
_ranges_idx = _cf_names_idx = 0;
|
||||
}
|
||||
}
|
||||
virtual_row_iterator& operator++() {
|
||||
if (++_ranges_idx == _ranges.get().size() && ++_cf_names_idx < _cf_names.get().size()) {
|
||||
_ranges_idx = 0;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
virtual_row_iterator operator++(int) {
|
||||
virtual_row_iterator i(*this);
|
||||
++(*this);
|
||||
return i;
|
||||
}
|
||||
const value_type operator*() const {
|
||||
return { _cf_names.get()[_cf_names_idx], _ranges.get()[_ranges_idx] };
|
||||
}
|
||||
bool operator==(const virtual_row_iterator& i) const {
|
||||
return _cf_names_idx == i._cf_names_idx
|
||||
&& _ranges_idx == i._ranges_idx;
|
||||
}
|
||||
bool operator!=(const virtual_row_iterator& i) const {
|
||||
return !(*this == i);
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Returns the keyspaces, ordered by name, as selected by the partition_range.
|
||||
*/
|
||||
static std::vector<sstring> get_keyspaces(const schema& s, const database& db, dht::partition_range range) {
|
||||
struct keyspace_less_comparator {
|
||||
const schema& _s;
|
||||
keyspace_less_comparator(const schema& s) : _s(s) { }
|
||||
dht::ring_position as_ring_position(const sstring& ks) {
|
||||
auto pkey = partition_key::from_single_value(_s, utf8_type->decompose(ks));
|
||||
return dht::global_partitioner().decorate_key(_s, std::move(pkey));
|
||||
}
|
||||
bool operator()(const sstring& ks1, const sstring& ks2) {
|
||||
return as_ring_position(ks1).less_compare(_s, as_ring_position(ks2));
|
||||
}
|
||||
bool operator()(const sstring& ks, const dht::ring_position& rp) {
|
||||
return as_ring_position(ks).less_compare(_s, rp);
|
||||
}
|
||||
bool operator()(const dht::ring_position& rp, const sstring& ks) {
|
||||
return rp.less_compare(_s, as_ring_position(ks));
|
||||
}
|
||||
};
|
||||
auto keyspaces = db.get_non_system_keyspaces();
|
||||
auto cmp = keyspace_less_comparator(s);
|
||||
boost::sort(keyspaces, cmp);
|
||||
return boost::copy_range<std::vector<sstring>>(
|
||||
range.slice(keyspaces, std::move(cmp)) | boost::adaptors::filtered([&s] (const auto& ks) {
|
||||
// If this is a range query, results are divided between shards by the partition key (keyspace_name).
|
||||
return shard_of(dht::global_partitioner().get_token(s,
|
||||
partition_key::from_single_value(s, utf8_type->decompose(ks))))
|
||||
== engine().cpu_id();
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Makes a wrapping range of ring_position from a nonwrapping range of token, used to select sstables.
|
||||
*/
|
||||
static dht::partition_range as_ring_position_range(dht::token_range& r) {
|
||||
std::optional<range<dht::ring_position>::bound> start_bound, end_bound;
|
||||
if (r.start()) {
|
||||
start_bound = {{ dht::ring_position(r.start()->value(), dht::ring_position::token_bound::start), r.start()->is_inclusive() }};
|
||||
}
|
||||
if (r.end()) {
|
||||
end_bound = {{ dht::ring_position(r.end()->value(), dht::ring_position::token_bound::end), r.end()->is_inclusive() }};
|
||||
}
|
||||
return dht::partition_range(std::move(start_bound), std::move(end_bound), r.is_singular());
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a new range_estimates for the specified range, considering the sstables associated with `cf`.
|
||||
*/
|
||||
static system_keyspace::range_estimates estimate(const column_family& cf, const token_range& r) {
|
||||
int64_t count{0};
|
||||
utils::estimated_histogram hist{0};
|
||||
auto from_bytes = [] (auto& b) {
|
||||
return dht::global_partitioner().from_sstring(utf8_type->to_string(b));
|
||||
};
|
||||
dht::token_range_vector ranges;
|
||||
::compat::unwrap_into(
|
||||
wrapping_range<dht::token>({{ from_bytes(r.start), false }}, {{ from_bytes(r.end) }}),
|
||||
dht::token_comparator(),
|
||||
[&] (auto&& rng) { ranges.push_back(std::move(rng)); });
|
||||
for (auto&& r : ranges) {
|
||||
auto rp_range = as_ring_position_range(r);
|
||||
for (auto&& sstable : cf.select_sstables(rp_range)) {
|
||||
count += sstable->estimated_keys_for_range(r);
|
||||
hist.merge(sstable->get_stats_metadata().estimated_partition_size);
|
||||
}
|
||||
}
|
||||
return {cf.schema(), r.start, r.end, count, count > 0 ? hist.mean() : 0};
|
||||
}
|
||||
|
||||
future<std::vector<token_range>> get_local_ranges() {
|
||||
auto& ss = service::get_local_storage_service();
|
||||
return ss.get_local_tokens().then([&ss] (auto&& tokens) {
|
||||
auto ranges = ss.get_token_metadata().get_primary_ranges_for(std::move(tokens));
|
||||
std::vector<token_range> local_ranges;
|
||||
auto to_bytes = [](const std::optional<dht::token_range::bound>& b) {
|
||||
assert(b);
|
||||
return utf8_type->decompose(dht::global_partitioner().to_sstring(b->value()));
|
||||
};
|
||||
// We merge the ranges to be compatible with how Cassandra shows it's size estimates table.
|
||||
// All queries will be on that table, where all entries are text and there's no notion of
|
||||
// token ranges form the CQL point of view.
|
||||
auto left_inf = boost::find_if(ranges, [] (auto&& r) {
|
||||
return !r.start() || r.start()->value() == dht::minimum_token();
|
||||
});
|
||||
auto right_inf = boost::find_if(ranges, [] (auto&& r) {
|
||||
return !r.end() || r.start()->value() == dht::maximum_token();
|
||||
});
|
||||
if (left_inf != right_inf && left_inf != ranges.end() && right_inf != ranges.end()) {
|
||||
local_ranges.push_back(token_range{to_bytes(right_inf->start()), to_bytes(left_inf->end())});
|
||||
ranges.erase(left_inf);
|
||||
ranges.erase(right_inf);
|
||||
}
|
||||
for (auto&& r : ranges) {
|
||||
local_ranges.push_back(token_range{to_bytes(r.start()), to_bytes(r.end())});
|
||||
}
|
||||
boost::sort(local_ranges, [] (auto&& tr1, auto&& tr2) {
|
||||
return utf8_type->less(tr1.start, tr2.start);
|
||||
});
|
||||
return local_ranges;
|
||||
});
|
||||
}
|
||||
|
||||
size_estimates_mutation_reader::size_estimates_mutation_reader(schema_ptr schema, const dht::partition_range& prange, const query::partition_slice& slice, streamed_mutation::forwarding fwd)
|
||||
: impl(schema)
|
||||
, _schema(std::move(schema))
|
||||
, _prange(&prange)
|
||||
, _slice(slice)
|
||||
, _fwd(fwd)
|
||||
{ }
|
||||
|
||||
future<> size_estimates_mutation_reader::get_next_partition() {
|
||||
auto& db = service::get_local_storage_proxy().get_db().local();
|
||||
if (!_keyspaces) {
|
||||
_keyspaces = get_keyspaces(*_schema, db, *_prange);
|
||||
_current_partition = _keyspaces->begin();
|
||||
}
|
||||
if (_current_partition == _keyspaces->end()) {
|
||||
_end_of_stream = true;
|
||||
return make_ready_future<>();
|
||||
}
|
||||
return get_local_ranges().then([&db, this] (auto&& ranges) {
|
||||
auto estimates = this->estimates_for_current_keyspace(db, std::move(ranges));
|
||||
auto mutations = db::system_keyspace::make_size_estimates_mutation(*_current_partition, std::move(estimates));
|
||||
++_current_partition;
|
||||
std::vector<mutation> ms;
|
||||
ms.emplace_back(std::move(mutations));
|
||||
_partition_reader = flat_mutation_reader_from_mutations(std::move(ms), _fwd);
|
||||
});
|
||||
}
|
||||
|
||||
future<> size_estimates_mutation_reader::fill_buffer(db::timeout_clock::time_point timeout) {
|
||||
return do_until([this, timeout] { return is_end_of_stream() || is_buffer_full(); }, [this, timeout] {
|
||||
if (!_partition_reader) {
|
||||
return get_next_partition();
|
||||
}
|
||||
return _partition_reader->consume_pausable([this] (mutation_fragment mf) {
|
||||
push_mutation_fragment(std::move(mf));
|
||||
return stop_iteration(is_buffer_full());
|
||||
}, timeout).then([this] {
|
||||
if (_partition_reader->is_end_of_stream() && _partition_reader->is_buffer_empty()) {
|
||||
_partition_reader = std::nullopt;
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
void size_estimates_mutation_reader::next_partition() {
|
||||
clear_buffer_to_next_partition();
|
||||
if (is_buffer_empty()) {
|
||||
_partition_reader = std::nullopt;
|
||||
}
|
||||
}
|
||||
|
||||
future<> size_estimates_mutation_reader::fast_forward_to(const dht::partition_range& pr, db::timeout_clock::time_point timeout) {
|
||||
clear_buffer();
|
||||
_prange = ≺
|
||||
_keyspaces = std::nullopt;
|
||||
_partition_reader = std::nullopt;
|
||||
_end_of_stream = false;
|
||||
return make_ready_future<>();
|
||||
}
|
||||
|
||||
future<> size_estimates_mutation_reader::fast_forward_to(position_range pr, db::timeout_clock::time_point timeout) {
|
||||
forward_buffer_to(pr.start());
|
||||
_end_of_stream = false;
|
||||
if (_partition_reader) {
|
||||
return _partition_reader->fast_forward_to(std::move(pr), timeout);
|
||||
}
|
||||
return make_ready_future<>();
|
||||
}
|
||||
|
||||
size_t size_estimates_mutation_reader::buffer_size() const {
|
||||
if (_partition_reader) {
|
||||
return flat_mutation_reader::impl::buffer_size() + _partition_reader->buffer_size();
|
||||
}
|
||||
return flat_mutation_reader::impl::buffer_size();
|
||||
}
|
||||
|
||||
std::vector<db::system_keyspace::range_estimates>
|
||||
size_estimates_mutation_reader::estimates_for_current_keyspace(const database& db, std::vector<token_range> local_ranges) const {
|
||||
// For each specified range, estimate (crudely) mean partition size and partitions count.
|
||||
auto pkey = partition_key::from_single_value(*_schema, utf8_type->decompose(*_current_partition));
|
||||
auto cfs = db.find_keyspace(*_current_partition).metadata()->cf_meta_data();
|
||||
auto cf_names = boost::copy_range<std::vector<bytes>>(cfs | boost::adaptors::transformed([] (auto&& cf) {
|
||||
return utf8_type->decompose(cf.first);
|
||||
}));
|
||||
boost::sort(cf_names, [] (auto&& n1, auto&& n2) {
|
||||
return utf8_type->less(n1, n2);
|
||||
});
|
||||
std::vector<db::system_keyspace::range_estimates> estimates;
|
||||
for (auto& range : _slice.row_ranges(*_schema, pkey)) {
|
||||
auto rows = boost::make_iterator_range(
|
||||
virtual_row_iterator(cf_names, local_ranges),
|
||||
virtual_row_iterator(cf_names, local_ranges, virtual_row_iterator::end_iterator_tag()));
|
||||
auto rows_to_estimate = range.slice(rows, virtual_row_comparator(_schema));
|
||||
for (auto&& r : rows_to_estimate) {
|
||||
auto& cf = db.find_column_family(*_current_partition, utf8_type->to_string(r.cf_name));
|
||||
estimates.push_back(estimate(cf, r.tokens));
|
||||
if (estimates.size() >= _slice.partition_row_limit()) {
|
||||
return estimates;
|
||||
}
|
||||
}
|
||||
}
|
||||
return estimates;
|
||||
}
|
||||
|
||||
} // namespace size_estimates
|
||||
|
||||
} // namespace db
|
||||
@@ -21,33 +21,18 @@
|
||||
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <boost/range/adaptor/indirected.hpp>
|
||||
#include <boost/range/adaptor/map.hpp>
|
||||
#include <boost/range/adaptor/transformed.hpp>
|
||||
#include <boost/range/algorithm/find_if.hpp>
|
||||
|
||||
#include "clustering_bounds_comparator.hh"
|
||||
#include "database_fwd.hh"
|
||||
#include "db/system_keyspace.hh"
|
||||
#include "dht/i_partitioner.hh"
|
||||
#include "mutation_reader.hh"
|
||||
#include "partition_range_compat.hh"
|
||||
#include "range.hh"
|
||||
#include "service/storage_service.hh"
|
||||
#include "mutation_fragment.hh"
|
||||
#include "sstables/sstables.hh"
|
||||
#include "db/timeout_clock.hh"
|
||||
#include "database.hh"
|
||||
|
||||
namespace db {
|
||||
|
||||
namespace size_estimates {
|
||||
|
||||
struct token_range {
|
||||
bytes start;
|
||||
bytes end;
|
||||
};
|
||||
|
||||
class size_estimates_mutation_reader final : public flat_mutation_reader::impl {
|
||||
struct token_range {
|
||||
bytes start;
|
||||
bytes end;
|
||||
};
|
||||
schema_ptr _schema;
|
||||
const dht::partition_range* _prange;
|
||||
const query::partition_slice& _slice;
|
||||
@@ -57,267 +42,18 @@ class size_estimates_mutation_reader final : public flat_mutation_reader::impl {
|
||||
streamed_mutation::forwarding _fwd;
|
||||
flat_mutation_reader_opt _partition_reader;
|
||||
public:
|
||||
size_estimates_mutation_reader(schema_ptr schema, const dht::partition_range& prange, const query::partition_slice& slice, streamed_mutation::forwarding fwd)
|
||||
: impl(schema)
|
||||
, _schema(std::move(schema))
|
||||
, _prange(&prange)
|
||||
, _slice(slice)
|
||||
, _fwd(fwd)
|
||||
{ }
|
||||
size_estimates_mutation_reader(schema_ptr, const dht::partition_range&, const query::partition_slice&, streamed_mutation::forwarding);
|
||||
|
||||
virtual future<> fill_buffer(db::timeout_clock::time_point) override;
|
||||
virtual void next_partition() override;
|
||||
virtual future<> fast_forward_to(const dht::partition_range&, db::timeout_clock::time_point) override;
|
||||
virtual future<> fast_forward_to(position_range, db::timeout_clock::time_point) override;
|
||||
virtual size_t buffer_size() const override;
|
||||
private:
|
||||
future<> get_next_partition() {
|
||||
// For each specified range, estimate (crudely) mean partition size and partitions count.
|
||||
auto& db = service::get_local_storage_proxy().get_db().local();
|
||||
if (!_keyspaces) {
|
||||
_keyspaces = get_keyspaces(*_schema, db, *_prange);
|
||||
_current_partition = _keyspaces->begin();
|
||||
}
|
||||
if (_current_partition == _keyspaces->end()) {
|
||||
_end_of_stream = true;
|
||||
return make_ready_future<>();
|
||||
}
|
||||
return get_local_ranges().then([&db, this] (auto&& ranges) {
|
||||
auto estimates = this->estimates_for_current_keyspace(db, std::move(ranges));
|
||||
auto mutations = db::system_keyspace::make_size_estimates_mutation(*_current_partition, std::move(estimates));
|
||||
++_current_partition;
|
||||
std::vector<mutation> ms;
|
||||
ms.emplace_back(std::move(mutations));
|
||||
_partition_reader = flat_mutation_reader_from_mutations(std::move(ms), _fwd);
|
||||
});
|
||||
}
|
||||
public:
|
||||
virtual future<> fill_buffer(db::timeout_clock::time_point timeout) override {
|
||||
return do_until([this, timeout] { return is_end_of_stream() || is_buffer_full(); }, [this, timeout] {
|
||||
if (!_partition_reader) {
|
||||
return get_next_partition();
|
||||
}
|
||||
return _partition_reader->consume_pausable([this] (mutation_fragment mf) {
|
||||
push_mutation_fragment(std::move(mf));
|
||||
return stop_iteration(is_buffer_full());
|
||||
}, timeout).then([this] {
|
||||
if (_partition_reader->is_end_of_stream() && _partition_reader->is_buffer_empty()) {
|
||||
_partition_reader = std::nullopt;
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
virtual void next_partition() override {
|
||||
clear_buffer_to_next_partition();
|
||||
if (is_buffer_empty()) {
|
||||
_partition_reader = std::nullopt;
|
||||
}
|
||||
}
|
||||
virtual future<> fast_forward_to(const dht::partition_range& pr, db::timeout_clock::time_point timeout) override {
|
||||
clear_buffer();
|
||||
_prange = ≺
|
||||
_keyspaces = std::nullopt;
|
||||
_partition_reader = std::nullopt;
|
||||
_end_of_stream = false;
|
||||
return make_ready_future<>();
|
||||
}
|
||||
virtual future<> fast_forward_to(position_range pr, db::timeout_clock::time_point timeout) override {
|
||||
forward_buffer_to(pr.start());
|
||||
_end_of_stream = false;
|
||||
if (_partition_reader) {
|
||||
return _partition_reader->fast_forward_to(std::move(pr), timeout);
|
||||
}
|
||||
return make_ready_future<>();
|
||||
}
|
||||
virtual size_t buffer_size() const override {
|
||||
if (_partition_reader) {
|
||||
return flat_mutation_reader::impl::buffer_size() + _partition_reader->buffer_size();
|
||||
}
|
||||
return flat_mutation_reader::impl::buffer_size();
|
||||
}
|
||||
/**
|
||||
* Returns the primary ranges for the local node.
|
||||
* Used for testing as well.
|
||||
*/
|
||||
static future<std::vector<token_range>> get_local_ranges() {
|
||||
auto& ss = service::get_local_storage_service();
|
||||
return ss.get_local_tokens().then([&ss] (auto&& tokens) {
|
||||
auto ranges = ss.get_token_metadata().get_primary_ranges_for(std::move(tokens));
|
||||
std::vector<token_range> local_ranges;
|
||||
auto to_bytes = [](const std::optional<dht::token_range::bound>& b) {
|
||||
assert(b);
|
||||
return utf8_type->decompose(dht::global_partitioner().to_sstring(b->value()));
|
||||
};
|
||||
// We merge the ranges to be compatible with how Cassandra shows it's size estimates table.
|
||||
// All queries will be on that table, where all entries are text and there's no notion of
|
||||
// token ranges form the CQL point of view.
|
||||
auto left_inf = boost::find_if(ranges, [] (auto&& r) {
|
||||
return !r.start() || r.start()->value() == dht::minimum_token();
|
||||
});
|
||||
auto right_inf = boost::find_if(ranges, [] (auto&& r) {
|
||||
return !r.end() || r.start()->value() == dht::maximum_token();
|
||||
});
|
||||
if (left_inf != right_inf && left_inf != ranges.end() && right_inf != ranges.end()) {
|
||||
local_ranges.push_back(token_range{to_bytes(right_inf->start()), to_bytes(left_inf->end())});
|
||||
ranges.erase(left_inf);
|
||||
ranges.erase(right_inf);
|
||||
}
|
||||
for (auto&& r : ranges) {
|
||||
local_ranges.push_back(token_range{to_bytes(r.start()), to_bytes(r.end())});
|
||||
}
|
||||
boost::sort(local_ranges, [] (auto&& tr1, auto&& tr2) {
|
||||
return utf8_type->less(tr1.start, tr2.start);
|
||||
});
|
||||
return local_ranges;
|
||||
});
|
||||
}
|
||||
private:
|
||||
struct virtual_row {
|
||||
const bytes& cf_name;
|
||||
const token_range& tokens;
|
||||
clustering_key_prefix as_key() const {
|
||||
return clustering_key_prefix::from_exploded(std::vector<bytes_view>{cf_name, tokens.start, tokens.end});
|
||||
}
|
||||
};
|
||||
struct virtual_row_comparator {
|
||||
schema_ptr _schema;
|
||||
virtual_row_comparator(schema_ptr schema) : _schema(schema) { }
|
||||
bool operator()(const clustering_key_prefix& key1, const clustering_key_prefix& key2) {
|
||||
return clustering_key_prefix::prefix_equality_less_compare(*_schema)(key1, key2);
|
||||
}
|
||||
bool operator()(const virtual_row& row, const clustering_key_prefix& key) {
|
||||
return operator()(row.as_key(), key);
|
||||
}
|
||||
bool operator()(const clustering_key_prefix& key, const virtual_row& row) {
|
||||
return operator()(key, row.as_key());
|
||||
}
|
||||
};
|
||||
class virtual_row_iterator : public std::iterator<std::input_iterator_tag, const virtual_row> {
|
||||
std::reference_wrapper<const std::vector<bytes>> _cf_names;
|
||||
std::reference_wrapper<const std::vector<token_range>> _ranges;
|
||||
size_t _cf_names_idx = 0;
|
||||
size_t _ranges_idx = 0;
|
||||
public:
|
||||
struct end_iterator_tag {};
|
||||
virtual_row_iterator(const std::vector<bytes>& cf_names, const std::vector<token_range>& ranges)
|
||||
: _cf_names(std::ref(cf_names))
|
||||
, _ranges(std::ref(ranges))
|
||||
{ }
|
||||
virtual_row_iterator(const std::vector<bytes>& cf_names, const std::vector<token_range>& ranges, end_iterator_tag)
|
||||
: _cf_names(std::ref(cf_names))
|
||||
, _ranges(std::ref(ranges))
|
||||
, _cf_names_idx(cf_names.size())
|
||||
, _ranges_idx(ranges.size())
|
||||
{ }
|
||||
virtual_row_iterator& operator++() {
|
||||
if (++_ranges_idx == _ranges.get().size() && ++_cf_names_idx < _cf_names.get().size()) {
|
||||
_ranges_idx = 0;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
virtual_row_iterator operator++(int) {
|
||||
virtual_row_iterator i(*this);
|
||||
++(*this);
|
||||
return i;
|
||||
}
|
||||
const value_type operator*() const {
|
||||
return { _cf_names.get()[_cf_names_idx], _ranges.get()[_ranges_idx] };
|
||||
}
|
||||
bool operator==(const virtual_row_iterator& i) const {
|
||||
return _cf_names_idx == i._cf_names_idx
|
||||
&& _ranges_idx == i._ranges_idx;
|
||||
}
|
||||
bool operator!=(const virtual_row_iterator& i) const {
|
||||
return !(*this == i);
|
||||
}
|
||||
};
|
||||
future<> get_next_partition();
|
||||
|
||||
std::vector<db::system_keyspace::range_estimates>
|
||||
estimates_for_current_keyspace(const database& db, std::vector<token_range> local_ranges) const {
|
||||
auto pkey = partition_key::from_single_value(*_schema, utf8_type->decompose(*_current_partition));
|
||||
auto cfs = db.find_keyspace(*_current_partition).metadata()->cf_meta_data();
|
||||
auto cf_names = boost::copy_range<std::vector<bytes>>(cfs | boost::adaptors::transformed([] (auto&& cf) {
|
||||
return utf8_type->decompose(cf.first);
|
||||
}));
|
||||
boost::sort(cf_names, [] (auto&& n1, auto&& n2) {
|
||||
return utf8_type->less(n1, n2);
|
||||
});
|
||||
std::vector<db::system_keyspace::range_estimates> estimates;
|
||||
for (auto& range : _slice.row_ranges(*_schema, pkey)) {
|
||||
auto rows = boost::make_iterator_range(
|
||||
virtual_row_iterator(cf_names, local_ranges),
|
||||
virtual_row_iterator(cf_names, local_ranges, virtual_row_iterator::end_iterator_tag()));
|
||||
auto rows_to_estimate = range.slice(rows, virtual_row_comparator(_schema));
|
||||
for (auto&& r : rows_to_estimate) {
|
||||
auto& cf = db.find_column_family(*_current_partition, utf8_type->to_string(r.cf_name));
|
||||
estimates.push_back(estimate(cf, r.tokens));
|
||||
if (estimates.size() >= _slice.partition_row_limit()) {
|
||||
return estimates;
|
||||
}
|
||||
}
|
||||
}
|
||||
return estimates;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the keyspaces, ordered by name, as selected by the partition_range.
|
||||
*/
|
||||
static ks_range get_keyspaces(const schema& s, const database& db, dht::partition_range range) {
|
||||
struct keyspace_less_comparator {
|
||||
const schema& _s;
|
||||
keyspace_less_comparator(const schema& s) : _s(s) { }
|
||||
dht::ring_position as_ring_position(const sstring& ks) {
|
||||
auto pkey = partition_key::from_single_value(_s, utf8_type->decompose(ks));
|
||||
return dht::global_partitioner().decorate_key(_s, std::move(pkey));
|
||||
}
|
||||
bool operator()(const sstring& ks1, const sstring& ks2) {
|
||||
return as_ring_position(ks1).less_compare(_s, as_ring_position(ks2));
|
||||
}
|
||||
bool operator()(const sstring& ks, const dht::ring_position& rp) {
|
||||
return as_ring_position(ks).less_compare(_s, rp);
|
||||
}
|
||||
bool operator()(const dht::ring_position& rp, const sstring& ks) {
|
||||
return rp.less_compare(_s, as_ring_position(ks));
|
||||
}
|
||||
};
|
||||
auto keyspaces = db.get_non_system_keyspaces();
|
||||
auto cmp = keyspace_less_comparator(s);
|
||||
boost::sort(keyspaces, cmp);
|
||||
return boost::copy_range<ks_range>(range.slice(keyspaces, std::move(cmp)));
|
||||
}
|
||||
|
||||
/**
|
||||
* Makes a wrapping range of ring_position from a nonwrapping range of token, used to select sstables.
|
||||
*/
|
||||
static dht::partition_range as_ring_position_range(dht::token_range& r) {
|
||||
std::optional<range<dht::ring_position>::bound> start_bound, end_bound;
|
||||
if (r.start()) {
|
||||
start_bound = {{ dht::ring_position(r.start()->value(), dht::ring_position::token_bound::start), r.start()->is_inclusive() }};
|
||||
}
|
||||
if (r.end()) {
|
||||
end_bound = {{ dht::ring_position(r.end()->value(), dht::ring_position::token_bound::end), r.end()->is_inclusive() }};
|
||||
}
|
||||
return dht::partition_range(std::move(start_bound), std::move(end_bound), r.is_singular());
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a new range_estimates for the specified range, considering the sstables associated with `cf`.
|
||||
*/
|
||||
static system_keyspace::range_estimates estimate(const column_family& cf, const token_range& r) {
|
||||
int64_t count{0};
|
||||
utils::estimated_histogram hist{0};
|
||||
auto from_bytes = [] (auto& b) {
|
||||
return dht::global_partitioner().from_sstring(utf8_type->to_string(b));
|
||||
};
|
||||
dht::token_range_vector ranges;
|
||||
::compat::unwrap_into(
|
||||
wrapping_range<dht::token>({{ from_bytes(r.start), false }}, {{ from_bytes(r.end) }}),
|
||||
dht::token_comparator(),
|
||||
[&] (auto&& rng) { ranges.push_back(std::move(rng)); });
|
||||
for (auto&& r : ranges) {
|
||||
auto rp_range = as_ring_position_range(r);
|
||||
for (auto&& sstable : cf.select_sstables(rp_range)) {
|
||||
count += sstable->estimated_keys_for_range(r);
|
||||
hist.merge(sstable->get_stats_metadata().estimated_partition_size);
|
||||
}
|
||||
}
|
||||
return {cf.schema(), r.start, r.end, count, count > 0 ? hist.mean() : 0};
|
||||
}
|
||||
estimates_for_current_keyspace(const database&, std::vector<token_range> local_ranges) const;
|
||||
};
|
||||
|
||||
struct virtual_reader {
|
||||
@@ -332,6 +68,12 @@ struct virtual_reader {
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Returns the primary ranges for the local node.
|
||||
* Used for testing as well.
|
||||
*/
|
||||
future<std::vector<token_range>> get_local_ranges();
|
||||
|
||||
} // namespace size_estimates
|
||||
|
||||
} // namespace db
|
||||
|
||||
@@ -44,6 +44,11 @@ namespace db::view {
|
||||
// columns. When reading the results from the scylla_views_builds_in_progress
|
||||
// table, we adjust the clustering key (we shed the cpu_id column) and map
|
||||
// back the regular columns.
|
||||
// Since mutation fragment consumers expect clustering_row fragments
|
||||
// not to be duplicated for given primary key, previous clustering key
|
||||
// is stored between mutation fragments. If the clustering key becomes
|
||||
// the same as the previous one (as a result of trimming cpu_id),
|
||||
// the duplicated fragment is ignored.
|
||||
class build_progress_virtual_reader {
|
||||
database& _db;
|
||||
|
||||
@@ -55,6 +60,7 @@ class build_progress_virtual_reader {
|
||||
const query::partition_slice& _legacy_slice;
|
||||
query::partition_slice _slice;
|
||||
flat_mutation_reader _underlying;
|
||||
std::optional<clustering_key> _previous_clustering_key;
|
||||
|
||||
build_progress_reader(
|
||||
schema_ptr legacy_schema,
|
||||
@@ -79,7 +85,8 @@ class build_progress_virtual_reader {
|
||||
pc,
|
||||
std::move(trace_state),
|
||||
fwd,
|
||||
fwd_mr)) {
|
||||
fwd_mr))
|
||||
, _previous_clustering_key() {
|
||||
}
|
||||
|
||||
const schema& underlying_schema() const {
|
||||
@@ -127,8 +134,13 @@ class build_progress_virtual_reader {
|
||||
legacy_in_progress_row.append_cell(_legacy_generation_number_col, std::move(c));
|
||||
}
|
||||
});
|
||||
auto ck = adjust_ckey(scylla_in_progress_row.key());
|
||||
if (_previous_clustering_key && ck.equal(*_schema, *_previous_clustering_key)) {
|
||||
continue;
|
||||
}
|
||||
_previous_clustering_key = ck;
|
||||
mf = clustering_row(
|
||||
adjust_ckey(scylla_in_progress_row.key()),
|
||||
std::move(ck),
|
||||
std::move(scylla_in_progress_row.tomb()),
|
||||
std::move(scylla_in_progress_row.marker()),
|
||||
std::move(legacy_in_progress_row));
|
||||
@@ -140,6 +152,8 @@ class build_progress_virtual_reader {
|
||||
adjust_ckey(scylla_in_progress_rt.end),
|
||||
scylla_in_progress_rt.end_kind,
|
||||
scylla_in_progress_rt.tomb);
|
||||
} else if (mf.is_end_of_partition()) {
|
||||
_previous_clustering_key.reset();
|
||||
}
|
||||
push_mutation_fragment(std::move(mf));
|
||||
}
|
||||
@@ -192,4 +206,4 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@@ -83,7 +83,7 @@ view_info::view_info(const schema& schema, const raw_view_info& raw_view_info)
|
||||
cql3::statements::select_statement& view_info::select_statement() const {
|
||||
if (!_select_statement) {
|
||||
shared_ptr<cql3::statements::raw::select_statement> raw;
|
||||
if (is_index()) {
|
||||
if (service::get_local_storage_service().db().local().find_column_family(base_id()).get_index_manager().is_global_index(_schema)) {
|
||||
// Token column is the first clustering column
|
||||
auto token_column_it = boost::range::find_if(_schema.all_columns(), std::mem_fn(&column_definition::is_clustering_key));
|
||||
auto real_columns = _schema.all_columns() | boost::adaptors::filtered([this, token_column_it](const column_definition& cdef) {
|
||||
@@ -143,10 +143,9 @@ void view_info::initialize_base_dependent_fields(const schema& base) {
|
||||
}
|
||||
|
||||
bool view_info::is_index() const {
|
||||
if (!_is_index) {
|
||||
_is_index = service::get_local_storage_service().db().local().find_column_family(base_id()).get_index_manager().is_index(_schema);
|
||||
}
|
||||
return *_is_index;
|
||||
//TODO(sarna): result of this call can be cached instead of calling index_manager::is_index every time
|
||||
column_family& base_cf = service::get_local_storage_service().db().local().find_column_family(base_id());
|
||||
return base_cf.get_index_manager().is_index(view_ptr(_schema.shared_from_this()));
|
||||
}
|
||||
|
||||
namespace db {
|
||||
@@ -450,7 +449,7 @@ void create_virtual_column(schema_builder& builder, const bytes& name, const dat
|
||||
// A map has keys and values. We don't need these values,
|
||||
// and can use empty values instead.
|
||||
auto mtype = dynamic_pointer_cast<const map_type_impl>(type);
|
||||
builder.with_column(name, map_type_impl::get_instance(mtype->get_values_type(), empty_type, true), column_kind::regular_column, column_view_virtual::yes);
|
||||
builder.with_column(name, map_type_impl::get_instance(mtype->get_keys_type(), empty_type, true), column_kind::regular_column, column_view_virtual::yes);
|
||||
} else if (ctype->is_set()) {
|
||||
// A set's cell has nothing beyond the keys, so the
|
||||
// virtual version of a set is, unfortunately, a complete
|
||||
@@ -1158,6 +1157,10 @@ future<> view_builder::stop() {
|
||||
return _sem.wait().then([this] {
|
||||
_sem.broken();
|
||||
return _build_step.join();
|
||||
}).handle_exception_type([] (const broken_semaphore&) {
|
||||
// ignored
|
||||
}).handle_exception_type([] (const semaphore_timed_out&) {
|
||||
// ignored
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
@@ -24,7 +24,9 @@
|
||||
namespace db::view {
|
||||
|
||||
future<> view_update_generator::start() {
|
||||
_started = seastar::async([this]() mutable {
|
||||
thread_attributes attr;
|
||||
attr.sched_group = _db.get_streaming_scheduling_group();
|
||||
_started = seastar::async(std::move(attr), [this]() mutable {
|
||||
while (!_as.abort_requested()) {
|
||||
if (_sstables_with_tables.empty()) {
|
||||
_pending_sstables.wait().get();
|
||||
|
||||
45
dist/ami/build_ami.sh
vendored
45
dist/ami/build_ami.sh
vendored
@@ -1,6 +1,7 @@
|
||||
#!/bin/bash -e
|
||||
|
||||
PRODUCT=$(cat SCYLLA-PRODUCT-FILE)
|
||||
./SCYLLA-VERSION-GEN
|
||||
PRODUCT=$(cat build/SCYLLA-PRODUCT-FILE)
|
||||
|
||||
if [ ! -e dist/ami/build_ami.sh ]; then
|
||||
echo "run build_ami.sh in top of scylla dir"
|
||||
@@ -16,6 +17,7 @@ print_usage() {
|
||||
exit 1
|
||||
}
|
||||
LOCALRPM=0
|
||||
REPO_FOR_INSTALL=
|
||||
while [ $# -gt 0 ]; do
|
||||
case "$1" in
|
||||
"--localrpm")
|
||||
@@ -23,10 +25,12 @@ while [ $# -gt 0 ]; do
|
||||
shift 1
|
||||
;;
|
||||
"--repo")
|
||||
REPO_FOR_INSTALL=$2
|
||||
INSTALL_ARGS="$INSTALL_ARGS --repo $2"
|
||||
shift 2
|
||||
;;
|
||||
"--repo-for-install")
|
||||
REPO_FOR_INSTALL=$2
|
||||
INSTALL_ARGS="$INSTALL_ARGS --repo-for-install $2"
|
||||
shift 2
|
||||
;;
|
||||
@@ -123,6 +127,43 @@ if [ $LOCALRPM -eq 1 ]; then
|
||||
cd ../..
|
||||
cp build/$PRODUCT-ami/build/RPMS/noarch/$PRODUCT-ami-`cat build/$PRODUCT-ami/build/SCYLLA-VERSION-FILE`-`cat build/$PRODUCT-ami/build/SCYLLA-RELEASE-FILE`.*.noarch.rpm dist/ami/files/$PRODUCT-ami.noarch.rpm
|
||||
fi
|
||||
if [ ! -f dist/ami/files/$PRODUCT-python3.x86_64.rpm ]; then
|
||||
reloc/python3/build_reloc.sh
|
||||
reloc/python3/build_rpm.sh
|
||||
cp build/redhat/RPMS/x86_64/$PRODUCT-python3*.x86_64.rpm dist/ami/files/$PRODUCT-python3.x86_64.rpm
|
||||
fi
|
||||
|
||||
SCYLLA_VERSION=$(rpm -q --qf %{VERSION}-%{RELEASE} dist/ami/files/$PRODUCT.x86_64.rpm || true)
|
||||
SCYLLA_AMI_VERSION=$(rpm -q --qf %{VERSION}-%{RELEASE} dist/ami/files/$PRODUCT-ami.noarch.rpm || true)
|
||||
SCYLLA_JMX_VERSION=$(rpm -q --qf %{VERSION}-%{RELEASE} dist/ami/files/$PRODUCT-jmx.noarch.rpm || true)
|
||||
SCYLLA_TOOLS_VERSION=$(rpm -q --qf %{VERSION}-%{RELEASE} dist/ami/files/$PRODUCT-tools.noarch.rpm || true)
|
||||
SCYLLA_PYTHON3_VERSION=$(rpm -q --qf %{VERSION}-%{RELEASE} dist/ami/files/$PRODUCT-python3.x86_64.rpm || true)
|
||||
else
|
||||
if [ -z "$REPO_FOR_INSTALL" ]; then
|
||||
print_usage
|
||||
exit 1
|
||||
fi
|
||||
if [ ! -f /usr/bin/yumdownloader ]; then
|
||||
if is_redhat_variant; then
|
||||
sudo yum install /usr/bin/yumdownloader
|
||||
else
|
||||
sudo apt-get install yum-utils
|
||||
fi
|
||||
fi
|
||||
if [ ! -f /usr/bin/curl ]; then
|
||||
pkg_install curl
|
||||
fi
|
||||
TMPREPO=$(mktemp -u -p /etc/yum.repos.d/ --suffix .repo)
|
||||
sudo curl -o $TMPREPO $REPO_FOR_INSTALL
|
||||
rm -rf build/ami_packages
|
||||
mkdir -p build/ami_packages
|
||||
yumdownloader --downloaddir build/ami_packages/ $PRODUCT $PRODUCT-kernel-conf $PRODUCT-conf $PRODUCT-server $PRODUCT-debuginfo $PRODUCT-ami $PRODUCT-jmx $PRODUCT-tools-core $PRODUCT-tools $PRODUCT-python3
|
||||
sudo rm -f $TMPREPO
|
||||
SCYLLA_VERSION=$(rpm -q --qf %{VERSION}-%{RELEASE} build/ami_packages/$PRODUCT-[0-9]*.rpm || true)
|
||||
SCYLLA_AMI_VERSION=$(rpm -q --qf %{VERSION}-%{RELEASE} build/ami_packages/$PRODUCT-ami-*.rpm || true)
|
||||
SCYLLA_JMX_VERSION=$(rpm -q --qf %{VERSION}-%{RELEASE} build/ami_packages/$PRODUCT-jmx-*.rpm || true)
|
||||
SCYLLA_TOOLS_VERSION=$(rpm -q --qf %{VERSION}-%{RELEASE} build/ami_packages/$PRODUCT-tools-[0-9]*.rpm || true)
|
||||
SCYLLA_PYTHON3_VERSION=$(rpm -q --qf %{VERSION}-%{RELEASE} build/ami_packages/$PRODUCT-python3-*.rpm || true)
|
||||
fi
|
||||
|
||||
cd dist/ami
|
||||
@@ -147,4 +188,4 @@ if [ ! -d packer ]; then
|
||||
cd -
|
||||
fi
|
||||
|
||||
env PACKER_LOG=1 PACKER_LOG_PATH=../../build/ami.log packer/packer build -var-file=variables.json -var install_args="$INSTALL_ARGS" -var region="$REGION" -var source_ami="$AMI" -var ssh_username="$SSH_USERNAME" scylla.json
|
||||
env PACKER_LOG=1 PACKER_LOG_PATH=../../build/ami.log packer/packer build -var-file=variables.json -var install_args="$INSTALL_ARGS" -var region="$REGION" -var source_ami="$AMI" -var ssh_username="$SSH_USERNAME" -var scylla_version="$SCYLLA_VERSION" -var scylla_ami_version="$SCYLLA_AMI_VERSION" -var scylla_jmx_version="$SCYLLA_JMX_VERSION" -var scylla_tools_version="$SCYLLA_TOOLS_VERSION" -var scylla_python3_version="$SCYLLA_PYTHON3_VERSION" scylla.json
|
||||
|
||||
10
dist/ami/scylla.json
vendored
10
dist/ami/scylla.json
vendored
@@ -56,7 +56,15 @@
|
||||
"ssh_username": "{{user `ssh_username`}}",
|
||||
"subnet_id": "{{user `subnet_id`}}",
|
||||
"type": "amazon-ebs",
|
||||
"user_data_file": "user_data.txt"
|
||||
"user_data_file": "user_data.txt",
|
||||
"ami_description": "scylla-{{user `scylla_version`}} scylla-ami-{{user `scylla_ami_version`}} scylla-jmx-{{user `scylla_jmx_version`}} scylla-tools-{{user `scylla_tools_version`}} scylla-python3-{{user `scylla_python3_version`}}",
|
||||
"tags": {
|
||||
"ScyllaVersion": "{{user `scylla_version`}}",
|
||||
"ScyllaAMIVersion": "{{user `scylla_ami_version`}}",
|
||||
"ScyllaJMXVersion": "{{user `scylla_jmx_version`}}",
|
||||
"ScyllaToolsVersion": "{{user `scylla_tools_version`}}",
|
||||
"ScyllaPython3Version": "{{user `scylla_python3_version`}}"
|
||||
}
|
||||
}
|
||||
],
|
||||
"provisioners": [
|
||||
|
||||
11
dist/common/scripts/scylla_io_setup
vendored
11
dist/common/scripts/scylla_io_setup
vendored
@@ -60,6 +60,17 @@ if __name__ == "__main__":
|
||||
disk_properties["read_bandwidth"] = 2015342735 * nr_disks
|
||||
disk_properties["write_iops"] = 181500 * nr_disks
|
||||
disk_properties["write_bandwidth"] = 808775652 * nr_disks
|
||||
elif idata.instance_class() == "i3en":
|
||||
if idata.instance() in ("i3en.large", "i3.xlarge", "i3en.2xlarge"):
|
||||
disk_properties["read_iops"] = 46489
|
||||
disk_properties["read_bandwidth"] = 353437280
|
||||
disk_properties["write_iops"] = 36680
|
||||
disk_properties["write_bandwidth"] = 164766656
|
||||
else:
|
||||
disk_properties["read_iops"] = 278478 * nr_disks
|
||||
disk_properties["read_bandwidth"] = 3029172992 * nr_disks
|
||||
disk_properties["write_iops"] = 221909 * nr_disks
|
||||
disk_properties["write_bandwidth"] = 1020482432 * nr_disks
|
||||
elif idata.instance_class() == "i2":
|
||||
disk_properties["read_iops"] = 64000 * nr_disks
|
||||
disk_properties["read_bandwidth"] = 507338935 * nr_disks
|
||||
|
||||
35
dist/common/scripts/scylla_setup
vendored
35
dist/common/scripts/scylla_setup
vendored
@@ -95,6 +95,9 @@ def do_verify_package(pkg):
|
||||
res = run('rpm -q {}'.format(pkg), silent=True, exception=False)
|
||||
elif is_gentoo_variant():
|
||||
res = 0 if len(glob.glob('/var/db/pkg/*/{}-*'.format(pkg))) else 1
|
||||
else:
|
||||
print("OS variant not recognized")
|
||||
res = 1
|
||||
if res != 0:
|
||||
print('{} package is not installed.'.format(pkg))
|
||||
sys.exit(1)
|
||||
@@ -252,22 +255,22 @@ if __name__ == '__main__':
|
||||
if not os.path.exists('/etc/scylla.d/housekeeping.cfg'):
|
||||
version_check = interactive_ask_service('Do you want to enable Scylla to check if there is a newer version of Scylla available?', 'Yes - start the Scylla-housekeeping service to check for a newer version. This check runs periodically. No - skips this step.', version_check)
|
||||
args.no_version_check = not version_check
|
||||
if version_check:
|
||||
with open('/etc/scylla.d/housekeeping.cfg', 'w') as f:
|
||||
f.write('[housekeeping]\ncheck-version: True\n')
|
||||
if is_systemd():
|
||||
systemd_unit('scylla-housekeeping-daily.timer').unmask()
|
||||
systemd_unit('scylla-housekeeping-restart.timer').unmask()
|
||||
else:
|
||||
with open('/etc/scylla.d/housekeeping.cfg', 'w') as f:
|
||||
f.write('[housekeeping]\ncheck-version: False\n')
|
||||
if is_systemd():
|
||||
hk_daily = systemd_unit('scylla-housekeeping-daily.timer')
|
||||
hk_daily.mask()
|
||||
hk_daily.stop()
|
||||
hk_restart = systemd_unit('scylla-housekeeping-restart.timer')
|
||||
hk_restart.mask()
|
||||
hk_restart.stop()
|
||||
if version_check:
|
||||
with open('/etc/scylla.d/housekeeping.cfg', 'w') as f:
|
||||
f.write('[housekeeping]\ncheck-version: True\n')
|
||||
if is_systemd():
|
||||
systemd_unit('scylla-housekeeping-daily.timer').unmask()
|
||||
systemd_unit('scylla-housekeeping-restart.timer').unmask()
|
||||
else:
|
||||
with open('/etc/scylla.d/housekeeping.cfg', 'w') as f:
|
||||
f.write('[housekeeping]\ncheck-version: False\n')
|
||||
if is_systemd():
|
||||
hk_daily = systemd_unit('scylla-housekeeping-daily.timer')
|
||||
hk_daily.mask()
|
||||
hk_daily.stop()
|
||||
hk_restart = systemd_unit('scylla-housekeeping-restart.timer')
|
||||
hk_restart.mask()
|
||||
hk_restart.stop()
|
||||
|
||||
cur_version=out('scylla --version', exception=False)
|
||||
if len(cur_version) > 0:
|
||||
|
||||
10
dist/common/scripts/scylla_util.py
vendored
10
dist/common/scripts/scylla_util.py
vendored
@@ -119,7 +119,7 @@ class aws_instance:
|
||||
return self._type.split(".")[0]
|
||||
|
||||
def is_supported_instance_class(self):
|
||||
if self.instance_class() in ['i2', 'i3']:
|
||||
if self.instance_class() in ['i2', 'i3', 'i3en']:
|
||||
return True
|
||||
return False
|
||||
|
||||
@@ -128,7 +128,7 @@ class aws_instance:
|
||||
instance_size = self.instance_size()
|
||||
if instance_class in ['c3', 'c4', 'd2', 'i2', 'r3']:
|
||||
return 'ixgbevf'
|
||||
if instance_class in ['c5', 'c5d', 'f1', 'g3', 'h1', 'i3', 'm5', 'm5d', 'p2', 'p3', 'r4', 'x1']:
|
||||
if instance_class in ['c5', 'c5d', 'f1', 'g3', 'h1', 'i3', 'i3en', 'm5', 'm5d', 'p2', 'p3', 'r4', 'x1']:
|
||||
return 'ena'
|
||||
if instance_class == 'm4':
|
||||
if instance_size == '16xlarge':
|
||||
@@ -304,7 +304,7 @@ def parse_os_release_line(line):
|
||||
val = shlex.split(data)[0]
|
||||
return (id, val.split(' ') if id == 'ID' or id == 'ID_LIKE' else val)
|
||||
|
||||
os_release = dict([parse_os_release_line(x) for x in open('/etc/os-release').read().splitlines()])
|
||||
os_release = dict([parse_os_release_line(x) for x in open('/etc/os-release').read().splitlines() if re.match(r'\w+=', x) ])
|
||||
|
||||
def is_debian_variant():
|
||||
d = os_release['ID_LIKE'] if 'ID_LIKE' in os_release else os_release['ID']
|
||||
@@ -313,7 +313,7 @@ def is_debian_variant():
|
||||
|
||||
def is_redhat_variant():
|
||||
d = os_release['ID_LIKE'] if 'ID_LIKE' in os_release else os_release['ID']
|
||||
return ('rhel' in d) or ('fedora' in d)
|
||||
return ('rhel' in d) or ('fedora' in d) or ('ol') in d
|
||||
|
||||
def is_gentoo_variant():
|
||||
return ('gentoo' in os_release['ID'])
|
||||
@@ -476,6 +476,8 @@ def create_perftune_conf(nic='eth0'):
|
||||
|
||||
|
||||
def is_valid_nic(nic):
|
||||
if len(nic) == 0:
|
||||
return False
|
||||
return os.path.exists('/sys/class/net/{}'.format(nic))
|
||||
|
||||
# Remove this when we do not support SET_NIC configuration value anymore
|
||||
|
||||
2
dist/debian/build_deb.sh
vendored
2
dist/debian/build_deb.sh
vendored
@@ -125,7 +125,7 @@ if [ -z "$TARGET" ]; then
|
||||
fi
|
||||
RELOC_PKG_FULLPATH=$(readlink -f $RELOC_PKG)
|
||||
RELOC_PKG_BASENAME=$(basename $RELOC_PKG)
|
||||
SCYLLA_VERSION=$(cat SCYLLA-VERSION-FILE)
|
||||
SCYLLA_VERSION=$(cat SCYLLA-VERSION-FILE | sed 's/\.rc/~rc/')
|
||||
SCYLLA_RELEASE=$(cat SCYLLA-RELEASE-FILE)
|
||||
|
||||
ln -fv $RELOC_PKG_FULLPATH ../$PRODUCT-server_$SCYLLA_VERSION-$SCYLLA_RELEASE.orig.tar.gz
|
||||
|
||||
2
dist/debian/control.mustache
vendored
2
dist/debian/control.mustache
vendored
@@ -16,7 +16,7 @@ Conflicts: {{product}}-server (<< 1.1)
|
||||
|
||||
Package: {{product}}-server
|
||||
Architecture: amd64
|
||||
Depends: ${shlibs:Depends}, ${misc:Depends}, adduser, hwloc-nox, {{product}}-conf, python-yaml, python-urwid, python-requests, curl, util-linux, python3-yaml, python3, uuid-runtime, pciutils, python3-pyudev, gzip, realpath | coreutils, num-utils, file
|
||||
Depends: ${shlibs:Depends}, ${misc:Depends}, adduser, hwloc-nox, {{product}}-conf, {{product}}-python3, curl, util-linux, uuid-runtime, pciutils, gzip, realpath | coreutils, num-utils, file
|
||||
Description: Scylla database server binaries
|
||||
Scylla is a highly scalable, eventually consistent, distributed,
|
||||
partitioned row DB.
|
||||
|
||||
30
dist/debian/debian/adjust_bin
vendored
Executable file
30
dist/debian/debian/adjust_bin
vendored
Executable file
@@ -0,0 +1,30 @@
|
||||
#!/bin/bash -ex
|
||||
|
||||
root="$1"
|
||||
bin="$2"
|
||||
prefix="/opt/scylladb"
|
||||
|
||||
[ "$bin" = patchelf ] && exit 0
|
||||
|
||||
patchelf() {
|
||||
# patchelf comes from the build system, so it needs the build system's ld.so and
|
||||
# shared libraries. We can't use patchelf on patchelf itself, so invoke it via
|
||||
# ld.so.
|
||||
LD_LIBRARY_PATH="$root/$prefix/bin/libreloc" "$root/$prefix"/libreloc/ld.so "$root/$prefix"/libexec/patchelf "$@"
|
||||
}
|
||||
|
||||
# We could add --set-rpath too, but then debugedit (called by rpmbuild) barfs
|
||||
# on the result. So use LD_LIBRARY_PATH in the thunk, below.
|
||||
patchelf \
|
||||
--set-interpreter "$prefix/libreloc/ld.so" \
|
||||
"$root/$prefix/libexec/$bin"
|
||||
mkdir -p "$root/$prefix/bin"
|
||||
cat > "$root/$prefix/bin/$bin" <<EOF
|
||||
#!/bin/bash -e
|
||||
export GNUTLS_SYSTEM_PRIORITY_FILE="\${GNUTLS_SYSTEM_PRIORITY_FILE-$prefix/libreloc/gnutls.config}"
|
||||
export LD_LIBRARY_PATH="$prefix/libreloc"
|
||||
exec -a "\$0" "$prefix/libexec/$bin" "\$@"
|
||||
EOF
|
||||
|
||||
chmod +x "$root/$prefix/bin/$bin"
|
||||
|
||||
6
dist/debian/debian/scylla-server.postrm
vendored
6
dist/debian/debian/scylla-server.postrm
vendored
@@ -4,7 +4,11 @@ set -e
|
||||
|
||||
case "$1" in
|
||||
purge|remove)
|
||||
rm -rf /etc/systemd/system/scylla-server.service.d/
|
||||
# We need to keep dependencies.conf and sysconfdir.conf on 'remove',
|
||||
# otherwise it will be missing after rollback.
|
||||
if [ "$1" = "purge" ]; then
|
||||
rm -rf /etc/systemd/system/scylla-server.service.d/
|
||||
fi
|
||||
;;
|
||||
esac
|
||||
|
||||
|
||||
140
dist/debian/python3/build_deb.sh
vendored
Executable file
140
dist/debian/python3/build_deb.sh
vendored
Executable file
@@ -0,0 +1,140 @@
|
||||
#!/bin/bash -e
|
||||
|
||||
PRODUCT=$(cat SCYLLA-PRODUCT-FILE)
|
||||
|
||||
. /etc/os-release
|
||||
print_usage() {
|
||||
echo "build_deb.sh --reloc-pkg build/release/scylla-python3-package.tar.gz"
|
||||
echo " --reloc-pkg specify relocatable package path"
|
||||
exit 1
|
||||
}
|
||||
|
||||
TARGET=stable
|
||||
RELOC_PKG=
|
||||
while [ $# -gt 0 ]; do
|
||||
case "$1" in
|
||||
"--reloc-pkg")
|
||||
RELOC_PKG=$2
|
||||
shift 2
|
||||
;;
|
||||
*)
|
||||
print_usage
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
is_redhat_variant() {
|
||||
[ -f /etc/redhat-release ]
|
||||
}
|
||||
is_debian_variant() {
|
||||
[ -f /etc/debian_version ]
|
||||
}
|
||||
pkg_install() {
|
||||
if is_redhat_variant; then
|
||||
sudo yum install -y $1
|
||||
elif is_debian_variant; then
|
||||
sudo apt-get install -y $1
|
||||
else
|
||||
echo "Requires to install following command: $1"
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
if [ ! -e SCYLLA-RELOCATABLE-FILE ]; then
|
||||
echo "do not directly execute build_deb.sh, use reloc/build_deb.sh instead."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ "$(arch)" != "x86_64" ]; then
|
||||
echo "Unsupported architecture: $(arch)"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ -z "$RELOC_PKG" ]; then
|
||||
print_usage
|
||||
exit 1
|
||||
fi
|
||||
if [ ! -f "$RELOC_PKG" ]; then
|
||||
echo "$RELOC_PKG is not found."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ -e debian ]; then
|
||||
rm -rf debian
|
||||
fi
|
||||
if is_debian_variant; then
|
||||
sudo apt-get -y update
|
||||
fi
|
||||
# this hack is needed since some environment installs 'git-core' package, it's
|
||||
# subset of the git command and doesn't works for our git-archive-all script.
|
||||
if is_redhat_variant && [ ! -f /usr/libexec/git-core/git-submodule ]; then
|
||||
sudo yum install -y git
|
||||
fi
|
||||
if [ ! -f /usr/bin/git ]; then
|
||||
pkg_install git
|
||||
fi
|
||||
if [ ! -f /usr/bin/python ]; then
|
||||
pkg_install python
|
||||
fi
|
||||
if [ ! -f /usr/bin/debuild ]; then
|
||||
pkg_install devscripts
|
||||
fi
|
||||
if [ ! -f /usr/bin/dh_testdir ]; then
|
||||
pkg_install debhelper
|
||||
fi
|
||||
if [ ! -f /usr/bin/fakeroot ]; then
|
||||
pkg_install fakeroot
|
||||
fi
|
||||
if [ ! -f /usr/bin/pystache ]; then
|
||||
if is_redhat_variant; then
|
||||
sudo yum install -y /usr/bin/pystache
|
||||
elif is_debian_variant; then
|
||||
sudo apt-get install -y python-pystache
|
||||
fi
|
||||
fi
|
||||
if [ ! -f /usr/bin/file ]; then
|
||||
pkg_install file
|
||||
fi
|
||||
if is_debian_variant && [ ! -f /usr/share/doc/python-pkg-resources/copyright ]; then
|
||||
sudo apt-get install -y python-pkg-resources
|
||||
fi
|
||||
|
||||
if [ "$ID" = "ubuntu" ] && [ ! -f /usr/share/keyrings/debian-archive-keyring.gpg ]; then
|
||||
sudo apt-get install -y debian-archive-keyring
|
||||
fi
|
||||
if [ "$ID" = "debian" ] && [ ! -f /usr/share/keyrings/ubuntu-archive-keyring.gpg ]; then
|
||||
sudo apt-get install -y ubuntu-archive-keyring
|
||||
fi
|
||||
|
||||
if [ -z "$TARGET" ]; then
|
||||
if is_debian_variant; then
|
||||
if [ ! -f /usr/bin/lsb_release ]; then
|
||||
pkg_install lsb-release
|
||||
fi
|
||||
TARGET=`lsb_release -c|awk '{print $2}'`
|
||||
else
|
||||
echo "Please specify target"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
RELOC_PKG_FULLPATH=$(readlink -f $RELOC_PKG)
|
||||
RELOC_PKG_BASENAME=$(basename $RELOC_PKG)
|
||||
SCYLLA_VERSION=$(cat SCYLLA-VERSION-FILE)
|
||||
SCYLLA_RELEASE=$(cat SCYLLA-RELEASE-FILE)
|
||||
|
||||
ln -fv $RELOC_PKG_FULLPATH ../$PRODUCT-python3_$SCYLLA_VERSION-$SCYLLA_RELEASE.orig.tar.gz
|
||||
|
||||
cp -al dist/debian/python3/debian debian
|
||||
if [ "$PRODUCT" != "scylla" ]; then
|
||||
for i in debian/scylla-*;do
|
||||
mv $i ${i/scylla-/$PRODUCT-}
|
||||
done
|
||||
fi
|
||||
REVISION="1"
|
||||
MUSTACHE_DIST="\"debian\": true, \"product\": \"$PRODUCT\", \"$PRODUCT\": true"
|
||||
pystache dist/debian/python3/changelog.mustache "{ $MUSTACHE_DIST, \"version\": \"$SCYLLA_VERSION\", \"release\": \"$SCYLLA_RELEASE\", \"revision\": \"$REVISION\", \"codename\": \"$TARGET\" }" > debian/changelog
|
||||
pystache dist/debian/python3/rules.mustache "{ $MUSTACHE_DIST }" > debian/rules
|
||||
pystache dist/debian/python3/control.mustache "{ $MUSTACHE_DIST }" > debian/control
|
||||
chmod a+rx debian/rules
|
||||
|
||||
debuild -rfakeroot -us -uc
|
||||
5
dist/debian/python3/changelog.mustache
vendored
Normal file
5
dist/debian/python3/changelog.mustache
vendored
Normal file
@@ -0,0 +1,5 @@
|
||||
{{product}}-python3 ({{version}}-{{release}}-{{revision}}) {{codename}}; urgency=medium
|
||||
|
||||
* Initial release.
|
||||
|
||||
-- Takuya ASADA <syuu@scylladb.com> Mon, 24 Aug 2015 09:22:55 +0000
|
||||
16
dist/debian/python3/control.mustache
vendored
Normal file
16
dist/debian/python3/control.mustache
vendored
Normal file
@@ -0,0 +1,16 @@
|
||||
Source: {{product}}-python3
|
||||
Maintainer: Takuya ASADA <syuu@scylladb.com>
|
||||
Homepage: http://scylladb.com
|
||||
Section: python
|
||||
Priority: optional
|
||||
X-Python3-Version: >= 3.4
|
||||
Standards-Version: 3.9.5
|
||||
|
||||
Package: {{product}}-python3
|
||||
Architecture: amd64
|
||||
Description: A standalone python3 interpreter that can be moved around different Linux machines
|
||||
This is a self-contained python interpreter that can be moved around
|
||||
different Linux machines as long as they run a new enough kernel (where
|
||||
new enough is defined by whichever Python module uses any kernel
|
||||
functionality). All shared libraries needed for the interpreter to
|
||||
operate are shipped with it.
|
||||
1
dist/debian/python3/debian/compat
vendored
Normal file
1
dist/debian/python3/debian/compat
vendored
Normal file
@@ -0,0 +1 @@
|
||||
9
|
||||
995
dist/debian/python3/debian/copyright
vendored
Normal file
995
dist/debian/python3/debian/copyright
vendored
Normal file
@@ -0,0 +1,995 @@
|
||||
This package was put together by Klee Dienes <klee@debian.org> from
|
||||
sources from ftp.python.org:/pub/python, based on the Debianization by
|
||||
the previous maintainers Bernd S. Brentrup <bsb@uni-muenster.de> and
|
||||
Bruce Perens. Current maintainer is Matthias Klose <doko@debian.org>.
|
||||
|
||||
It was downloaded from http://python.org/
|
||||
|
||||
Copyright:
|
||||
|
||||
Upstream Author: Guido van Rossum <guido@cwi.nl> and others.
|
||||
|
||||
License:
|
||||
|
||||
The following text includes the Python license and licenses and
|
||||
acknowledgements for incorporated software. The licenses can be read
|
||||
in the HTML and texinfo versions of the documentation as well, after
|
||||
installing the pythonx.y-doc package. Licenses for files not licensed
|
||||
under the Python Licenses are found at the end of this file.
|
||||
|
||||
|
||||
Python License
|
||||
==============
|
||||
|
||||
A. HISTORY OF THE SOFTWARE
|
||||
==========================
|
||||
|
||||
Python was created in the early 1990s by Guido van Rossum at Stichting
|
||||
Mathematisch Centrum (CWI, see http://www.cwi.nl) in the Netherlands
|
||||
as a successor of a language called ABC. Guido remains Python's
|
||||
principal author, although it includes many contributions from others.
|
||||
|
||||
In 1995, Guido continued his work on Python at the Corporation for
|
||||
National Research Initiatives (CNRI, see http://www.cnri.reston.va.us)
|
||||
in Reston, Virginia where he released several versions of the
|
||||
software.
|
||||
|
||||
In May 2000, Guido and the Python core development team moved to
|
||||
BeOpen.com to form the BeOpen PythonLabs team. In October of the same
|
||||
year, the PythonLabs team moved to Digital Creations (now Zope
|
||||
Corporation, see http://www.zope.com). In 2001, the Python Software
|
||||
Foundation (PSF, see http://www.python.org/psf/) was formed, a
|
||||
non-profit organization created specifically to own Python-related
|
||||
Intellectual Property. Zope Corporation is a sponsoring member of
|
||||
the PSF.
|
||||
|
||||
All Python releases are Open Source (see http://www.opensource.org for
|
||||
the Open Source Definition). Historically, most, but not all, Python
|
||||
releases have also been GPL-compatible; the table below summarizes
|
||||
the various releases.
|
||||
|
||||
Release Derived Year Owner GPL-
|
||||
from compatible? (1)
|
||||
|
||||
0.9.0 thru 1.2 1991-1995 CWI yes
|
||||
1.3 thru 1.5.2 1.2 1995-1999 CNRI yes
|
||||
1.6 1.5.2 2000 CNRI no
|
||||
2.0 1.6 2000 BeOpen.com no
|
||||
1.6.1 1.6 2001 CNRI yes (2)
|
||||
2.1 2.0+1.6.1 2001 PSF no
|
||||
2.0.1 2.0+1.6.1 2001 PSF yes
|
||||
2.1.1 2.1+2.0.1 2001 PSF yes
|
||||
2.2 2.1.1 2001 PSF yes
|
||||
2.1.2 2.1.1 2002 PSF yes
|
||||
2.1.3 2.1.2 2002 PSF yes
|
||||
2.2 and above 2.1.1 2001-now PSF yes
|
||||
|
||||
Footnotes:
|
||||
|
||||
(1) GPL-compatible doesn't mean that we're distributing Python under
|
||||
the GPL. All Python licenses, unlike the GPL, let you distribute
|
||||
a modified version without making your changes open source. The
|
||||
GPL-compatible licenses make it possible to combine Python with
|
||||
other software that is released under the GPL; the others don't.
|
||||
|
||||
(2) According to Richard Stallman, 1.6.1 is not GPL-compatible,
|
||||
because its license has a choice of law clause. According to
|
||||
CNRI, however, Stallman's lawyer has told CNRI's lawyer that 1.6.1
|
||||
is "not incompatible" with the GPL.
|
||||
|
||||
Thanks to the many outside volunteers who have worked under Guido's
|
||||
direction to make these releases possible.
|
||||
|
||||
|
||||
B. TERMS AND CONDITIONS FOR ACCESSING OR OTHERWISE USING PYTHON
|
||||
===============================================================
|
||||
|
||||
PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2
|
||||
--------------------------------------------
|
||||
|
||||
1. This LICENSE AGREEMENT is between the Python Software Foundation
|
||||
("PSF"), and the Individual or Organization ("Licensee") accessing and
|
||||
otherwise using this software ("Python") in source or binary form and
|
||||
its associated documentation.
|
||||
|
||||
2. Subject to the terms and conditions of this License Agreement, PSF
|
||||
hereby grants Licensee a nonexclusive, royalty-free, world-wide
|
||||
license to reproduce, analyze, test, perform and/or display publicly,
|
||||
prepare derivative works, distribute, and otherwise use Python alone
|
||||
or in any derivative version, provided, however, that PSF's License
|
||||
Agreement and PSF's notice of copyright, i.e., "Copyright (c) 2001,
|
||||
2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012,
|
||||
2013, 2014 Python Software Foundation; All Rights Reserved" are
|
||||
retained in Python alone or in any derivative version prepared by
|
||||
Licensee.
|
||||
|
||||
3. In the event Licensee prepares a derivative work that is based on
|
||||
or incorporates Python or any part thereof, and wants to make
|
||||
the derivative work available to others as provided herein, then
|
||||
Licensee hereby agrees to include in any such work a brief summary of
|
||||
the changes made to Python.
|
||||
|
||||
4. PSF is making Python available to Licensee on an "AS IS"
|
||||
basis. PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
|
||||
IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND
|
||||
DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
|
||||
FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON WILL NOT
|
||||
INFRINGE ANY THIRD PARTY RIGHTS.
|
||||
|
||||
5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON
|
||||
FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS
|
||||
A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON,
|
||||
OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
|
||||
|
||||
6. This License Agreement will automatically terminate upon a material
|
||||
breach of its terms and conditions.
|
||||
|
||||
7. Nothing in this License Agreement shall be deemed to create any
|
||||
relationship of agency, partnership, or joint venture between PSF and
|
||||
Licensee. This License Agreement does not grant permission to use PSF
|
||||
trademarks or trade name in a trademark sense to endorse or promote
|
||||
products or services of Licensee, or any third party.
|
||||
|
||||
8. By copying, installing or otherwise using Python, Licensee
|
||||
agrees to be bound by the terms and conditions of this License
|
||||
Agreement.
|
||||
|
||||
|
||||
BEOPEN.COM LICENSE AGREEMENT FOR PYTHON 2.0
|
||||
-------------------------------------------
|
||||
|
||||
BEOPEN PYTHON OPEN SOURCE LICENSE AGREEMENT VERSION 1
|
||||
|
||||
1. This LICENSE AGREEMENT is between BeOpen.com ("BeOpen"), having an
|
||||
office at 160 Saratoga Avenue, Santa Clara, CA 95051, and the
|
||||
Individual or Organization ("Licensee") accessing and otherwise using
|
||||
this software in source or binary form and its associated
|
||||
documentation ("the Software").
|
||||
|
||||
2. Subject to the terms and conditions of this BeOpen Python License
|
||||
Agreement, BeOpen hereby grants Licensee a non-exclusive,
|
||||
royalty-free, world-wide license to reproduce, analyze, test, perform
|
||||
and/or display publicly, prepare derivative works, distribute, and
|
||||
otherwise use the Software alone or in any derivative version,
|
||||
provided, however, that the BeOpen Python License is retained in the
|
||||
Software, alone or in any derivative version prepared by Licensee.
|
||||
|
||||
3. BeOpen is making the Software available to Licensee on an "AS IS"
|
||||
basis. BEOPEN MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
|
||||
IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, BEOPEN MAKES NO AND
|
||||
DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
|
||||
FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF THE SOFTWARE WILL NOT
|
||||
INFRINGE ANY THIRD PARTY RIGHTS.
|
||||
|
||||
4. BEOPEN SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF THE
|
||||
SOFTWARE FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS
|
||||
AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THE SOFTWARE, OR ANY
|
||||
DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
|
||||
|
||||
5. This License Agreement will automatically terminate upon a material
|
||||
breach of its terms and conditions.
|
||||
|
||||
6. This License Agreement shall be governed by and interpreted in all
|
||||
respects by the law of the State of California, excluding conflict of
|
||||
law provisions. Nothing in this License Agreement shall be deemed to
|
||||
create any relationship of agency, partnership, or joint venture
|
||||
between BeOpen and Licensee. This License Agreement does not grant
|
||||
permission to use BeOpen trademarks or trade names in a trademark
|
||||
sense to endorse or promote products or services of Licensee, or any
|
||||
third party. As an exception, the "BeOpen Python" logos available at
|
||||
http://www.pythonlabs.com/logos.html may be used according to the
|
||||
permissions granted on that web page.
|
||||
|
||||
7. By copying, installing or otherwise using the software, Licensee
|
||||
agrees to be bound by the terms and conditions of this License
|
||||
Agreement.
|
||||
|
||||
|
||||
CNRI LICENSE AGREEMENT FOR PYTHON 1.6.1
|
||||
---------------------------------------
|
||||
|
||||
1. This LICENSE AGREEMENT is between the Corporation for National
|
||||
Research Initiatives, having an office at 1895 Preston White Drive,
|
||||
Reston, VA 20191 ("CNRI"), and the Individual or Organization
|
||||
("Licensee") accessing and otherwise using Python 1.6.1 software in
|
||||
source or binary form and its associated documentation.
|
||||
|
||||
2. Subject to the terms and conditions of this License Agreement, CNRI
|
||||
hereby grants Licensee a nonexclusive, royalty-free, world-wide
|
||||
license to reproduce, analyze, test, perform and/or display publicly,
|
||||
prepare derivative works, distribute, and otherwise use Python 1.6.1
|
||||
alone or in any derivative version, provided, however, that CNRI's
|
||||
License Agreement and CNRI's notice of copyright, i.e., "Copyright (c)
|
||||
1995-2001 Corporation for National Research Initiatives; All Rights
|
||||
Reserved" are retained in Python 1.6.1 alone or in any derivative
|
||||
version prepared by Licensee. Alternately, in lieu of CNRI's License
|
||||
Agreement, Licensee may substitute the following text (omitting the
|
||||
quotes): "Python 1.6.1 is made available subject to the terms and
|
||||
conditions in CNRI's License Agreement. This Agreement together with
|
||||
Python 1.6.1 may be located on the Internet using the following
|
||||
unique, persistent identifier (known as a handle): 1895.22/1013. This
|
||||
Agreement may also be obtained from a proxy server on the Internet
|
||||
using the following URL: http://hdl.handle.net/1895.22/1013".
|
||||
|
||||
3. In the event Licensee prepares a derivative work that is based on
|
||||
or incorporates Python 1.6.1 or any part thereof, and wants to make
|
||||
the derivative work available to others as provided herein, then
|
||||
Licensee hereby agrees to include in any such work a brief summary of
|
||||
the changes made to Python 1.6.1.
|
||||
|
||||
4. CNRI is making Python 1.6.1 available to Licensee on an "AS IS"
|
||||
basis. CNRI MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
|
||||
IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, CNRI MAKES NO AND
|
||||
DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
|
||||
FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON 1.6.1 WILL NOT
|
||||
INFRINGE ANY THIRD PARTY RIGHTS.
|
||||
|
||||
5. CNRI SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON
|
||||
1.6.1 FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS
|
||||
A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON 1.6.1,
|
||||
OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
|
||||
|
||||
6. This License Agreement will automatically terminate upon a material
|
||||
breach of its terms and conditions.
|
||||
|
||||
7. This License Agreement shall be governed by the federal
|
||||
intellectual property law of the United States, including without
|
||||
limitation the federal copyright law, and, to the extent such
|
||||
U.S. federal law does not apply, by the law of the Commonwealth of
|
||||
Virginia, excluding Virginia's conflict of law provisions.
|
||||
Notwithstanding the foregoing, with regard to derivative works based
|
||||
on Python 1.6.1 that incorporate non-separable material that was
|
||||
previously distributed under the GNU General Public License (GPL), the
|
||||
law of the Commonwealth of Virginia shall govern this License
|
||||
Agreement only as to issues arising under or with respect to
|
||||
Paragraphs 4, 5, and 7 of this License Agreement. Nothing in this
|
||||
License Agreement shall be deemed to create any relationship of
|
||||
agency, partnership, or joint venture between CNRI and Licensee. This
|
||||
License Agreement does not grant permission to use CNRI trademarks or
|
||||
trade name in a trademark sense to endorse or promote products or
|
||||
services of Licensee, or any third party.
|
||||
|
||||
8. By clicking on the "ACCEPT" button where indicated, or by copying,
|
||||
installing or otherwise using Python 1.6.1, Licensee agrees to be
|
||||
bound by the terms and conditions of this License Agreement.
|
||||
|
||||
ACCEPT
|
||||
|
||||
|
||||
CWI LICENSE AGREEMENT FOR PYTHON 0.9.0 THROUGH 1.2
|
||||
--------------------------------------------------
|
||||
|
||||
Copyright (c) 1991 - 1995, Stichting Mathematisch Centrum Amsterdam,
|
||||
The Netherlands. All rights reserved.
|
||||
|
||||
Permission to use, copy, modify, and distribute this software and its
|
||||
documentation for any purpose and without fee is hereby granted,
|
||||
provided that the above copyright notice appear in all copies and that
|
||||
both that copyright notice and this permission notice appear in
|
||||
supporting documentation, and that the name of Stichting Mathematisch
|
||||
Centrum or CWI not be used in advertising or publicity pertaining to
|
||||
distribution of the software without specific, written prior
|
||||
permission.
|
||||
|
||||
STICHTING MATHEMATISCH CENTRUM DISCLAIMS ALL WARRANTIES WITH REGARD TO
|
||||
THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
|
||||
FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH CENTRUM BE LIABLE
|
||||
FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
|
||||
OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
|
||||
|
||||
Licenses and Acknowledgements for Incorporated Software
|
||||
=======================================================
|
||||
|
||||
Mersenne Twister
|
||||
----------------
|
||||
|
||||
The `_random' module includes code based on a download from
|
||||
`http://www.math.keio.ac.jp/~matumoto/MT2002/emt19937ar.html'. The
|
||||
following are the verbatim comments from the original code:
|
||||
|
||||
A C-program for MT19937, with initialization improved 2002/1/26.
|
||||
Coded by Takuji Nishimura and Makoto Matsumoto.
|
||||
|
||||
Before using, initialize the state by using init_genrand(seed)
|
||||
or init_by_array(init_key, key_length).
|
||||
|
||||
Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura,
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
3. The names of its contributors may not be used to endorse or promote
|
||||
products derived from this software without specific prior written
|
||||
permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
||||
TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
Any feedback is very welcome.
|
||||
http://www.math.keio.ac.jp/matumoto/emt.html
|
||||
email: matumoto@math.keio.ac.jp
|
||||
|
||||
|
||||
Sockets
|
||||
-------
|
||||
|
||||
The `socket' module uses the functions, `getaddrinfo', and
|
||||
`getnameinfo', which are coded in separate source files from the WIDE
|
||||
Project, `http://www.wide.ad.jp/about/index.html'.
|
||||
|
||||
Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
3. Neither the name of the project nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
|
||||
GAI_ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
|
||||
FOR GAI_ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON GAI_ANY THEORY OF LIABILITY, WHETHER
|
||||
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN GAI_ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
|
||||
Floating point exception control
|
||||
--------------------------------
|
||||
|
||||
The source for the `fpectl' module includes the following notice:
|
||||
|
||||
---------------------------------------------------------------------
|
||||
/ Copyright (c) 1996. \
|
||||
| The Regents of the University of California. |
|
||||
| All rights reserved. |
|
||||
| |
|
||||
| Permission to use, copy, modify, and distribute this software for |
|
||||
| any purpose without fee is hereby granted, provided that this en- |
|
||||
| tire notice is included in all copies of any software which is or |
|
||||
| includes a copy or modification of this software and in all |
|
||||
| copies of the supporting documentation for such software. |
|
||||
| |
|
||||
| This work was produced at the University of California, Lawrence |
|
||||
| Livermore National Laboratory under contract no. W-7405-ENG-48 |
|
||||
| between the U.S. Department of Energy and The Regents of the |
|
||||
| University of California for the operation of UC LLNL. |
|
||||
| |
|
||||
| DISCLAIMER |
|
||||
| |
|
||||
| This software was prepared as an account of work sponsored by an |
|
||||
| agency of the United States Government. Neither the United States |
|
||||
| Government nor the University of California nor any of their em- |
|
||||
| ployees, makes any warranty, express or implied, or assumes any |
|
||||
| liability or responsibility for the accuracy, completeness, or |
|
||||
| usefulness of any information, apparatus, product, or process |
|
||||
| disclosed, or represents that its use would not infringe |
|
||||
| privately-owned rights. Reference herein to any specific commer- |
|
||||
| cial products, process, or service by trade name, trademark, |
|
||||
| manufacturer, or otherwise, does not necessarily constitute or |
|
||||
| imply its endorsement, recommendation, or favoring by the United |
|
||||
| States Government or the University of California. The views and |
|
||||
| opinions of authors expressed herein do not necessarily state or |
|
||||
| reflect those of the United States Government or the University |
|
||||
| of California, and shall not be used for advertising or product |
|
||||
\ endorsement purposes. /
|
||||
---------------------------------------------------------------------
|
||||
|
||||
|
||||
Cookie management
|
||||
-----------------
|
||||
|
||||
The `Cookie' module contains the following notice:
|
||||
|
||||
Copyright 2000 by Timothy O'Malley <timo@alum.mit.edu>
|
||||
|
||||
All Rights Reserved
|
||||
|
||||
Permission to use, copy, modify, and distribute this software
|
||||
and its documentation for any purpose and without fee is hereby
|
||||
granted, provided that the above copyright notice appear in all
|
||||
copies and that both that copyright notice and this permission
|
||||
notice appear in supporting documentation, and that the name of
|
||||
Timothy O'Malley not be used in advertising or publicity
|
||||
pertaining to distribution of the software without specific, written
|
||||
prior permission.
|
||||
|
||||
Timothy O'Malley DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
|
||||
SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
|
||||
AND FITNESS, IN NO EVENT SHALL Timothy O'Malley BE LIABLE FOR
|
||||
ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
||||
WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
||||
ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
|
||||
PERFORMANCE OF THIS SOFTWARE.
|
||||
|
||||
|
||||
Execution tracing
|
||||
-----------------
|
||||
|
||||
The `trace' module contains the following notice:
|
||||
|
||||
portions copyright 2001, Autonomous Zones Industries, Inc., all rights...
|
||||
err... reserved and offered to the public under the terms of the
|
||||
Python 2.2 license.
|
||||
Author: Zooko O'Whielacronx
|
||||
http://zooko.com/
|
||||
mailto:zooko@zooko.com
|
||||
|
||||
Copyright 2000, Mojam Media, Inc., all rights reserved.
|
||||
Author: Skip Montanaro
|
||||
|
||||
Copyright 1999, Bioreason, Inc., all rights reserved.
|
||||
Author: Andrew Dalke
|
||||
|
||||
Copyright 1995-1997, Automatrix, Inc., all rights reserved.
|
||||
Author: Skip Montanaro
|
||||
|
||||
Copyright 1991-1995, Stichting Mathematisch Centrum, all rights reserved.
|
||||
|
||||
Permission to use, copy, modify, and distribute this Python software and
|
||||
its associated documentation for any purpose without fee is hereby
|
||||
granted, provided that the above copyright notice appears in all copies,
|
||||
and that both that copyright notice and this permission notice appear in
|
||||
supporting documentation, and that the name of neither Automatrix,
|
||||
Bioreason or Mojam Media be used in advertising or publicity pertaining
|
||||
to distribution of the software without specific, written prior
|
||||
permission.
|
||||
|
||||
|
||||
UUencode and UUdecode functions
|
||||
-------------------------------
|
||||
|
||||
The `uu' module contains the following notice:
|
||||
|
||||
Copyright 1994 by Lance Ellinghouse
|
||||
Cathedral City, California Republic, United States of America.
|
||||
All Rights Reserved
|
||||
Permission to use, copy, modify, and distribute this software and its
|
||||
documentation for any purpose and without fee is hereby granted,
|
||||
provided that the above copyright notice appear in all copies and that
|
||||
both that copyright notice and this permission notice appear in
|
||||
supporting documentation, and that the name of Lance Ellinghouse
|
||||
not be used in advertising or publicity pertaining to distribution
|
||||
of the software without specific, written prior permission.
|
||||
LANCE ELLINGHOUSE DISCLAIMS ALL WARRANTIES WITH REGARD TO
|
||||
THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
|
||||
FITNESS, IN NO EVENT SHALL LANCE ELLINGHOUSE CENTRUM BE LIABLE
|
||||
FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
|
||||
OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
|
||||
Modified by Jack Jansen, CWI, July 1995:
|
||||
- Use binascii module to do the actual line-by-line conversion
|
||||
between ascii and binary. This results in a 1000-fold speedup. The C
|
||||
version is still 5 times faster, though.
|
||||
- Arguments more compliant with python standard
|
||||
|
||||
|
||||
XML Remote Procedure Calls
|
||||
--------------------------
|
||||
|
||||
The `xmlrpclib' module contains the following notice:
|
||||
|
||||
The XML-RPC client interface is
|
||||
|
||||
Copyright (c) 1999-2002 by Secret Labs AB
|
||||
Copyright (c) 1999-2002 by Fredrik Lundh
|
||||
|
||||
By obtaining, using, and/or copying this software and/or its
|
||||
associated documentation, you agree that you have read, understood,
|
||||
and will comply with the following terms and conditions:
|
||||
|
||||
Permission to use, copy, modify, and distribute this software and
|
||||
its associated documentation for any purpose and without fee is
|
||||
hereby granted, provided that the above copyright notice appears in
|
||||
all copies, and that both that copyright notice and this permission
|
||||
notice appear in supporting documentation, and that the name of
|
||||
Secret Labs AB or the author not be used in advertising or publicity
|
||||
pertaining to distribution of the software without specific, written
|
||||
prior permission.
|
||||
|
||||
SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
|
||||
TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
|
||||
ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
|
||||
BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
|
||||
DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
||||
WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
||||
ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
||||
OF THIS SOFTWARE.
|
||||
|
||||
Licenses for Software linked to
|
||||
===============================
|
||||
|
||||
Note that the choice of GPL compatibility outlined above doesn't extend
|
||||
to modules linked to particular libraries, since they change the
|
||||
effective License of the module binary.
|
||||
|
||||
|
||||
GNU Readline
|
||||
------------
|
||||
|
||||
The 'readline' module makes use of GNU Readline.
|
||||
|
||||
The GNU Readline Library is free software; you can redistribute it
|
||||
and/or modify it under the terms of the GNU General Public License as
|
||||
published by the Free Software Foundation; either version 2, or (at
|
||||
your option) any later version.
|
||||
|
||||
On Debian systems, you can find the complete statement in
|
||||
/usr/share/doc/readline-common/copyright'. A copy of the GNU General
|
||||
Public License is available in /usr/share/common-licenses/GPL-2'.
|
||||
|
||||
|
||||
OpenSSL
|
||||
-------
|
||||
|
||||
The '_ssl' module makes use of OpenSSL.
|
||||
|
||||
The OpenSSL toolkit stays under a dual license, i.e. both the
|
||||
conditions of the OpenSSL License and the original SSLeay license
|
||||
apply to the toolkit. Actually both licenses are BSD-style Open
|
||||
Source licenses. Note that both licenses are incompatible with
|
||||
the GPL.
|
||||
|
||||
On Debian systems, you can find the complete license text in
|
||||
/usr/share/doc/openssl/copyright'.
|
||||
|
||||
|
||||
Files with other licenses than the Python License
|
||||
-------------------------------------------------
|
||||
|
||||
Files: Include/dynamic_annotations.h
|
||||
Files: Python/dynamic_annotations.c
|
||||
Copyright: (c) 2008-2009, Google Inc.
|
||||
License: Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Neither the name of Google Inc. nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
Files: Include/unicodeobject.h
|
||||
Copyright: (c) Corporation for National Research Initiatives.
|
||||
Copyright: (c) 1999 by Secret Labs AB.
|
||||
Copyright: (c) 1999 by Fredrik Lundh.
|
||||
License: By obtaining, using, and/or copying this software and/or its
|
||||
associated documentation, you agree that you have read, understood,
|
||||
and will comply with the following terms and conditions:
|
||||
|
||||
Permission to use, copy, modify, and distribute this software and its
|
||||
associated documentation for any purpose and without fee is hereby
|
||||
granted, provided that the above copyright notice appears in all
|
||||
copies, and that both that copyright notice and this permission notice
|
||||
appear in supporting documentation, and that the name of Secret Labs
|
||||
AB or the author not be used in advertising or publicity pertaining to
|
||||
distribution of the software without specific, written prior
|
||||
permission.
|
||||
|
||||
SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO
|
||||
THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
|
||||
FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR
|
||||
ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
|
||||
OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
|
||||
Files: Lib/logging/*
|
||||
Copyright: 2001-2010 by Vinay Sajip. All Rights Reserved.
|
||||
License: Permission to use, copy, modify, and distribute this software and
|
||||
its documentation for any purpose and without fee is hereby granted,
|
||||
provided that the above copyright notice appear in all copies and that
|
||||
both that copyright notice and this permission notice appear in
|
||||
supporting documentation, and that the name of Vinay Sajip
|
||||
not be used in advertising or publicity pertaining to distribution
|
||||
of the software without specific, written prior permission.
|
||||
VINAY SAJIP DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
|
||||
ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL
|
||||
VINAY SAJIP BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
|
||||
ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
|
||||
IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
|
||||
OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
|
||||
Files: Lib/multiprocessing/*
|
||||
Files: Modules/_multiprocessing/*
|
||||
Copyright: (c) 2006-2008, R Oudkerk. All rights reserved.
|
||||
License: Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
3. Neither the name of author nor the names of any contributors may be
|
||||
used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
SUCH DAMAGE.
|
||||
|
||||
Files: Lib/sqlite3/*
|
||||
Files: Modules/_sqlite/*
|
||||
Copyright: (C) 2004-2005 Gerhard Häring <gh@ghaering.de>
|
||||
License: This software is provided 'as-is', without any express or implied
|
||||
warranty. In no event will the authors be held liable for any damages
|
||||
arising from the use of this software.
|
||||
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it
|
||||
freely, subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not
|
||||
claim that you wrote the original software. If you use this software
|
||||
in a product, an acknowledgment in the product documentation would be
|
||||
appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be
|
||||
misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
|
||||
Files: Lib/async*
|
||||
Copyright: Copyright 1996 by Sam Rushing
|
||||
License: Permission to use, copy, modify, and distribute this software and
|
||||
its documentation for any purpose and without fee is hereby
|
||||
granted, provided that the above copyright notice appear in all
|
||||
copies and that both that copyright notice and this permission
|
||||
notice appear in supporting documentation, and that the name of Sam
|
||||
Rushing not be used in advertising or publicity pertaining to
|
||||
distribution of the software without specific, written prior
|
||||
permission.
|
||||
|
||||
SAM RUSHING DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
|
||||
INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN
|
||||
NO EVENT SHALL SAM RUSHING BE LIABLE FOR ANY SPECIAL, INDIRECT OR
|
||||
CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
|
||||
OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
|
||||
NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
|
||||
CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
|
||||
Files: Lib/tarfile.py
|
||||
Copyright: (C) 2002 Lars Gustaebel <lars@gustaebel.de>
|
||||
License: Permission is hereby granted, free of charge, to any person
|
||||
obtaining a copy of this software and associated documentation
|
||||
files (the "Software"), to deal in the Software without
|
||||
restriction, including without limitation the rights to use,
|
||||
copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the
|
||||
Software is furnished to do so, subject to the following
|
||||
conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
Files: Lib/turtle.py
|
||||
Copyright: (C) 2006 - 2010 Gregor Lingl
|
||||
License: This software is provided 'as-is', without any express or implied
|
||||
warranty. In no event will the authors be held liable for any damages
|
||||
arising from the use of this software.
|
||||
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it
|
||||
freely, subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not
|
||||
claim that you wrote the original software. If you use this software
|
||||
in a product, an acknowledgment in the product documentation would be
|
||||
appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be
|
||||
misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
|
||||
is copyright Gregor Lingl and licensed under a BSD-like license
|
||||
|
||||
Files: Modules/_ctypes/libffi/*
|
||||
Copyright: Copyright (C) 1996-2011 Red Hat, Inc and others.
|
||||
Copyright (C) 1996-2011 Anthony Green
|
||||
Copyright (C) 1996-2010 Free Software Foundation, Inc
|
||||
Copyright (c) 2003, 2004, 2006, 2007, 2008 Kaz Kojima
|
||||
Copyright (c) 2010, 2011, Plausible Labs Cooperative , Inc.
|
||||
Copyright (c) 2010 CodeSourcery
|
||||
Copyright (c) 1998 Andreas Schwab
|
||||
Copyright (c) 2000 Hewlett Packard Company
|
||||
Copyright (c) 2009 Bradley Smith
|
||||
Copyright (c) 2008 David Daney
|
||||
Copyright (c) 2004 Simon Posnjak
|
||||
Copyright (c) 2005 Axis Communications AB
|
||||
Copyright (c) 1998 Cygnus Solutions
|
||||
Copyright (c) 2004 Renesas Technology
|
||||
Copyright (c) 2002, 2007 Bo Thorsen <bo@suse.de>
|
||||
Copyright (c) 2002 Ranjit Mathew
|
||||
Copyright (c) 2002 Roger Sayle
|
||||
Copyright (c) 2000, 2007 Software AG
|
||||
Copyright (c) 2003 Jakub Jelinek
|
||||
Copyright (c) 2000, 2001 John Hornkvist
|
||||
Copyright (c) 1998 Geoffrey Keating
|
||||
Copyright (c) 2008 Björn König
|
||||
|
||||
License: Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of this software and associated documentation files (the
|
||||
``Software''), to deal in the Software without restriction, including
|
||||
without limitation the rights to use, copy, modify, merge, publish,
|
||||
distribute, sublicense, and/or sell copies of the Software, and to
|
||||
permit persons to whom the Software is furnished to do so, subject to
|
||||
the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included
|
||||
in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
|
||||
Documentation:
|
||||
Permission is granted to copy, distribute and/or modify this document
|
||||
under the terms of the GNU General Public License as published by the
|
||||
Free Software Foundation; either version 2, or (at your option) any
|
||||
later version. A copy of the license is included in the
|
||||
section entitled ``GNU General Public License''.
|
||||
|
||||
Files: Modules/_gestalt.c
|
||||
Copyright: 1991-1997 by Stichting Mathematisch Centrum, Amsterdam.
|
||||
License: Permission to use, copy, modify, and distribute this software and its
|
||||
documentation for any purpose and without fee is hereby granted,
|
||||
provided that the above copyright notice appear in all copies and that
|
||||
both that copyright notice and this permission notice appear in
|
||||
supporting documentation, and that the names of Stichting Mathematisch
|
||||
Centrum or CWI not be used in advertising or publicity pertaining to
|
||||
distribution of the software without specific, written prior permission.
|
||||
|
||||
STICHTING MATHEMATISCH CENTRUM DISCLAIMS ALL WARRANTIES WITH REGARD TO
|
||||
THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
|
||||
FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH CENTRUM BE LIABLE
|
||||
FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
|
||||
OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
|
||||
Files: Modules/syslogmodule.c
|
||||
Copyright: 1994 by Lance Ellinghouse
|
||||
License: Permission to use, copy, modify, and distribute this software and its
|
||||
documentation for any purpose and without fee is hereby granted,
|
||||
provided that the above copyright notice appear in all copies and that
|
||||
both that copyright notice and this permission notice appear in
|
||||
supporting documentation, and that the name of Lance Ellinghouse
|
||||
not be used in advertising or publicity pertaining to distribution
|
||||
of the software without specific, written prior permission.
|
||||
|
||||
LANCE ELLINGHOUSE DISCLAIMS ALL WARRANTIES WITH REGARD TO
|
||||
THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
|
||||
FITNESS, IN NO EVENT SHALL LANCE ELLINGHOUSE BE LIABLE FOR ANY SPECIAL,
|
||||
INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING
|
||||
FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
|
||||
NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
|
||||
WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
|
||||
Files: Modules/zlib/*
|
||||
Copyright: (C) 1995-2010 Jean-loup Gailly and Mark Adler
|
||||
License: This software is provided 'as-is', without any express or implied
|
||||
warranty. In no event will the authors be held liable for any damages
|
||||
arising from the use of this software.
|
||||
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it
|
||||
freely, subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not
|
||||
claim that you wrote the original software. If you use this software
|
||||
in a product, an acknowledgment in the product documentation would be
|
||||
appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be
|
||||
misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
|
||||
Jean-loup Gailly Mark Adler
|
||||
jloup@gzip.org madler@alumni.caltech.edu
|
||||
|
||||
If you use the zlib library in a product, we would appreciate *not* receiving
|
||||
lengthy legal documents to sign. The sources are provided for free but without
|
||||
warranty of any kind. The library has been entirely written by Jean-loup
|
||||
Gailly and Mark Adler; it does not include third-party code.
|
||||
|
||||
Files: Modules/expat/*
|
||||
Copyright: Copyright (c) 1998, 1999, 2000 Thai Open Source Software Center Ltd
|
||||
and Clark Cooper
|
||||
Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006 Expat maintainers
|
||||
License: Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of this software and associated documentation files (the
|
||||
"Software"), to deal in the Software without restriction, including
|
||||
without limitation the rights to use, copy, modify, merge, publish,
|
||||
distribute, sublicense, and/or sell copies of the Software, and to
|
||||
permit persons to whom the Software is furnished to do so, subject to
|
||||
the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included
|
||||
in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
Files: Modules/_decimal/libmpdec/*
|
||||
Copyright: Copyright (c) 2008-2012 Stefan Krah. All rights reserved.
|
||||
License: Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
.
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
,
|
||||
THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
SUCH DAMAGE.
|
||||
|
||||
Files: Misc/python-mode.el
|
||||
Copyright: Copyright (C) 1992,1993,1994 Tim Peters
|
||||
License: This software is provided as-is, without express or implied
|
||||
warranty. Permission to use, copy, modify, distribute or sell this
|
||||
software, without fee, for any purpose and by any individual or
|
||||
organization, is hereby granted, provided that the above copyright
|
||||
notice and this paragraph appear in all copies.
|
||||
|
||||
Files: Python/dtoa.c
|
||||
Copyright: (c) 1991, 2000, 2001 by Lucent Technologies.
|
||||
License: Permission to use, copy, modify, and distribute this software for any
|
||||
purpose without fee is hereby granted, provided that this entire notice
|
||||
is included in all copies of any software which is or includes a copy
|
||||
or modification of this software and in all copies of the supporting
|
||||
documentation for such software.
|
||||
|
||||
THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
|
||||
WARRANTY. IN PARTICULAR, NEITHER THE AUTHOR NOR LUCENT MAKES ANY
|
||||
REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
|
||||
OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
|
||||
|
||||
Files: Python/getopt.c
|
||||
Copyright: 1992-1994, David Gottner
|
||||
License: Permission to use, copy, modify, and distribute this software and its
|
||||
documentation for any purpose and without fee is hereby granted,
|
||||
provided that the above copyright notice, this permission notice and
|
||||
the following disclaimer notice appear unmodified in all copies.
|
||||
|
||||
I DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL I
|
||||
BE LIABLE FOR ANY SPECIAL, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY
|
||||
DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA, OR PROFITS, WHETHER
|
||||
IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
|
||||
OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
|
||||
Files: PC/_subprocess.c
|
||||
Copyright: Copyright (c) 2004 by Fredrik Lundh <fredrik@pythonware.com>
|
||||
Copyright (c) 2004 by Secret Labs AB, http://www.pythonware.com
|
||||
Copyright (c) 2004 by Peter Astrand <astrand@lysator.liu.se>
|
||||
License:
|
||||
* Permission to use, copy, modify, and distribute this software and
|
||||
* its associated documentation for any purpose and without fee is
|
||||
* hereby granted, provided that the above copyright notice appears in
|
||||
* all copies, and that both that copyright notice and this permission
|
||||
* notice appear in supporting documentation, and that the name of the
|
||||
* authors not be used in advertising or publicity pertaining to
|
||||
* distribution of the software without specific, written prior
|
||||
* permission.
|
||||
*
|
||||
* THE AUTHORS DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
|
||||
* INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
|
||||
* IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY SPECIAL, INDIRECT OR
|
||||
* CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
|
||||
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
|
||||
* WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
|
||||
Files: PC/winsound.c
|
||||
Copyright: Copyright (c) 1999 Toby Dickenson
|
||||
License: * Permission to use this software in any way is granted without
|
||||
* fee, provided that the copyright notice above appears in all
|
||||
* copies. This software is provided "as is" without any warranty.
|
||||
*/
|
||||
|
||||
/* Modified by Guido van Rossum */
|
||||
/* Beep added by Mark Hammond */
|
||||
/* Win9X Beep and platform identification added by Uncle Timmy */
|
||||
|
||||
Files: Tools/pybench/*
|
||||
Copyright: (c), 1997-2006, Marc-Andre Lemburg (mal@lemburg.com)
|
||||
(c), 2000-2006, eGenix.com Software GmbH (info@egenix.com)
|
||||
License: Permission to use, copy, modify, and distribute this software and its
|
||||
documentation for any purpose and without fee or royalty is hereby
|
||||
granted, provided that the above copyright notice appear in all copies
|
||||
and that both that copyright notice and this permission notice appear
|
||||
in supporting documentation or portions thereof, including
|
||||
modifications, that you make.
|
||||
|
||||
THE AUTHOR MARC-ANDRE LEMBURG DISCLAIMS ALL WARRANTIES WITH REGARD TO
|
||||
THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
|
||||
FITNESS, IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL,
|
||||
INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING
|
||||
FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
|
||||
NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
|
||||
WITH THE USE OR PERFORMANCE OF THIS SOFTWARE !
|
||||
3
dist/debian/python3/debian/scylla-python3.dirs
vendored
Normal file
3
dist/debian/python3/debian/scylla-python3.dirs
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
opt/scylladb/python3/bin
|
||||
opt/scylladb/python3/lib64
|
||||
opt/scylladb/python3/libexec
|
||||
3
dist/debian/python3/debian/scylla-python3.install
vendored
Normal file
3
dist/debian/python3/debian/scylla-python3.install
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
bin/* opt/scylladb/python3/bin
|
||||
lib64/* opt/scylladb/python3/lib64
|
||||
libexec/* opt/scylladb/python3/libexec
|
||||
22
dist/debian/python3/rules.mustache
vendored
Executable file
22
dist/debian/python3/rules.mustache
vendored
Executable file
@@ -0,0 +1,22 @@
|
||||
#!/usr/bin/make -f
|
||||
|
||||
export PYBUILD_DISABLE=1
|
||||
|
||||
override_dh_auto_configure:
|
||||
|
||||
override_dh_auto_build:
|
||||
|
||||
override_dh_strip:
|
||||
|
||||
override_dh_makeshlibs:
|
||||
|
||||
override_dh_shlibdeps:
|
||||
|
||||
override_dh_fixperms:
|
||||
dh_fixperms
|
||||
chmod 755 $(CURDIR)/debian/{{product}}-python3/opt/scylladb/python3/libexec/ld.so
|
||||
|
||||
override_dh_strip_nondeterminism:
|
||||
|
||||
%:
|
||||
dh $@
|
||||
17
dist/debian/rules.mustache
vendored
17
dist/debian/rules.mustache
vendored
@@ -9,12 +9,21 @@ override_dh_auto_build:
|
||||
|
||||
override_dh_auto_clean:
|
||||
|
||||
override_dh_auto_install:
|
||||
dh_auto_install
|
||||
override_dh_install:
|
||||
dh_install
|
||||
install -d $(CURDIR)/debian/scylla-server/usr/bin
|
||||
for bin in debian/scylla-server/opt/scylladb/libexec/*; do debian/adjust_bin $(CURDIR)/debian/scylla-server "$${bin#*libexec/}"; done
|
||||
ln -sf /opt/scylladb/bin/scylla $(CURDIR)/debian/scylla-server/usr/bin/scylla
|
||||
ln -sf /opt/scylladb/bin/iotune $(CURDIR)/debian/scylla-server/usr/bin/iotune
|
||||
ln -sf /usr/lib/scylla/scyllatop/scyllatop.py $(CURDIR)/debian/scylla-server/usr/bin/scyllatop
|
||||
find ./dist/common/scripts -type f -exec ./relocate_python_scripts.py \
|
||||
--installroot $(CURDIR)/debian/scylla-server/usr/lib/scylla/ --with-python3 "$(CURDIR)/debian/scylla-server/opt/scylladb/python3/bin/python3" {} +
|
||||
./relocate_python_scripts.py \
|
||||
--installroot $(CURDIR)/debian/scylla-server/usr/lib/scylla/ --with-python3 "$(CURDIR)/debian/scylla-server/opt/scylladb/python3/bin/python3" \
|
||||
seastar/scripts/perftune.py seastar/scripts/seastar-addr2line seastar/scripts/perftune.py
|
||||
./relocate_python_scripts.py \
|
||||
--installroot $(CURDIR)/debian/scylla-server/usr/lib/scylla/scyllatop/ --with-python3 "$(CURDIR)/debian/scylla-server/opt/scylladb/python3/bin/python3" \
|
||||
tools/scyllatop/scyllatop.py
|
||||
|
||||
override_dh_installinit:
|
||||
{{#scylla}}
|
||||
@@ -29,7 +38,9 @@ override_dh_installinit:
|
||||
dh_installinit --no-start --name node-exporter
|
||||
|
||||
override_dh_strip:
|
||||
dh_strip -Xlibprotobuf.so.15 -Xld.so --dbg-package={{product}}-server-dbg
|
||||
# The binaries (ethtool...patchelf) don't pass dh_strip after going through patchelf. Since they are
|
||||
# already stripped, nothing is lost if we exclude them, so that's what we do.
|
||||
dh_strip -Xlibprotobuf.so.15 -Xld.so -Xethtool -Xgawk -Xgzip -Xhwloc-calc -Xhwloc-distrib -Xifconfig -Xlscpu -Xnetstat -Xpatchelf --dbg-package={{product}}-server-dbg
|
||||
|
||||
override_dh_makeshlibs:
|
||||
|
||||
|
||||
6
dist/debian/scylla-server.install.mustache
vendored
6
dist/debian/scylla-server.install.mustache
vendored
@@ -1,14 +1,9 @@
|
||||
dist/common/limits.d/scylla.conf etc/security/limits.d
|
||||
dist/common/scylla.d/*.conf etc/scylla.d
|
||||
seastar/dpdk/usertools/dpdk-devbind.py usr/lib/scylla
|
||||
seastar/scripts/perftune.py usr/lib/scylla
|
||||
seastar/scripts/seastar-addr2line usr/lib/scylla
|
||||
seastar/scripts/seastar-cpu-map.sh usr/lib/scylla
|
||||
dist/common/scripts/* usr/lib/scylla
|
||||
tools/scyllatop usr/lib/scylla
|
||||
swagger-ui/dist usr/lib/scylla/swagger-ui
|
||||
api/api-doc usr/lib/scylla/api
|
||||
bin/* opt/scylladb/bin
|
||||
libreloc/* opt/scylladb/libreloc
|
||||
libexec/* opt/scylladb/libexec
|
||||
dist/common/sbin/* usr/sbin
|
||||
@@ -20,3 +15,4 @@ dist/common/systemd/scylla-housekeeping-restart.timer /lib/systemd/system
|
||||
dist/common/systemd/scylla-fstrim.timer /lib/systemd/system
|
||||
dist/debian/scripts/scylla_save_coredump usr/lib/scylla
|
||||
dist/debian/scripts/scylla_delay_fstrim usr/lib/scylla
|
||||
tools/scyllatop usr/lib/scylla
|
||||
|
||||
2
dist/docker/redhat/Dockerfile
vendored
2
dist/docker/redhat/Dockerfile
vendored
@@ -28,7 +28,7 @@ ADD commandlineparser.py /commandlineparser.py
|
||||
ADD docker-entrypoint.py /docker-entrypoint.py
|
||||
ADD node_exporter_install /node_exporter_install
|
||||
# Install Scylla:
|
||||
RUN curl http://downloads.scylladb.com/rpm/unstable/centos/master/latest/scylla.repo -o /etc/yum.repos.d/scylla.repo && \
|
||||
RUN curl http://downloads.scylladb.com/rpm/centos/scylla-3.1.repo -o /etc/yum.repos.d/scylla.repo && \
|
||||
yum -y install epel-release && \
|
||||
yum -y clean expire-cache && \
|
||||
yum -y update && \
|
||||
|
||||
@@ -192,7 +192,11 @@ future<> verification_error(fs::path path, const char* fstr, Args&&... args) {
|
||||
// No other file types may exist.
|
||||
future<> distributed_loader::verify_owner_and_mode(fs::path path) {
|
||||
return file_stat(path.string(), follow_symlink::no).then([path = std::move(path)] (stat_data sd) {
|
||||
if (sd.uid != geteuid()) {
|
||||
// Under docker, we run with euid 0 and there is no reasonable way to enforce that the
|
||||
// in-container uid will have the same uid as files mounted from outside the container. So
|
||||
// just allow euid 0 as a special case. It should survive the file_accessible() checks below.
|
||||
// See #4823.
|
||||
if (geteuid() != 0 && sd.uid != geteuid()) {
|
||||
return verification_error(std::move(path), "File not owned by current euid: {}. Owner is: {}", geteuid(), sd.uid);
|
||||
}
|
||||
switch (sd.type) {
|
||||
|
||||
@@ -98,6 +98,13 @@ public:
|
||||
sstring get_message() const { return what(); }
|
||||
};
|
||||
|
||||
class server_exception : public cassandra_exception {
|
||||
public:
|
||||
server_exception(sstring msg) noexcept
|
||||
: exceptions::cassandra_exception{exceptions::exception_code::SERVER_ERROR, std::move(msg)}
|
||||
{ }
|
||||
};
|
||||
|
||||
class protocol_exception : public cassandra_exception {
|
||||
public:
|
||||
protocol_exception(sstring msg) noexcept
|
||||
|
||||
@@ -151,7 +151,7 @@ if __name__ == '__main__':
|
||||
argp.add_argument('--user', '-u')
|
||||
argp.add_argument('--password', '-p', default='none')
|
||||
argp.add_argument('--node', default='127.0.0.1', help='Node to connect to.')
|
||||
argp.add_argument('--port', default='9042', help='Port to connect to.')
|
||||
argp.add_argument('--port', default=9042, help='Port to connect to.', type=int)
|
||||
|
||||
args = argp.parse_args()
|
||||
res = validate_and_fix(args)
|
||||
|
||||
@@ -23,6 +23,7 @@
|
||||
#include "mutation_reader.hh"
|
||||
#include "seastar/util/reference_wrapper.hh"
|
||||
#include "clustering_ranges_walker.hh"
|
||||
#include "schema_upgrader.hh"
|
||||
#include <algorithm>
|
||||
|
||||
#include <boost/range/adaptor/transformed.hpp>
|
||||
@@ -908,3 +909,7 @@ public:
|
||||
flat_mutation_reader make_generating_reader(schema_ptr s, std::function<future<mutation_fragment_opt> ()> get_next_fragment) {
|
||||
return make_flat_mutation_reader<generating_reader>(std::move(s), std::move(get_next_fragment));
|
||||
}
|
||||
|
||||
void flat_mutation_reader::do_upgrade_schema(const schema_ptr& s) {
|
||||
*this = transform(std::move(*this), schema_upgrader(s));
|
||||
}
|
||||
|
||||
@@ -326,6 +326,7 @@ private:
|
||||
flat_mutation_reader() = default;
|
||||
explicit operator bool() const noexcept { return bool(_impl); }
|
||||
friend class optimized_optional<flat_mutation_reader>;
|
||||
void do_upgrade_schema(const schema_ptr&);
|
||||
public:
|
||||
// Documented in mutation_reader::forwarding in mutation_reader.hh.
|
||||
class partition_range_forwarding_tag;
|
||||
@@ -474,6 +475,14 @@ public:
|
||||
void move_buffer_content_to(impl& other) {
|
||||
_impl->move_buffer_content_to(other);
|
||||
}
|
||||
|
||||
// Causes this reader to conform to s.
|
||||
// Multiple calls of upgrade_schema() compose, effects of prior calls on the stream are preserved.
|
||||
void upgrade_schema(const schema_ptr& s) {
|
||||
if (__builtin_expect(s != schema(), false)) {
|
||||
do_upgrade_schema(s);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
using flat_mutation_reader_opt = optimized_optional<flat_mutation_reader>;
|
||||
@@ -576,8 +585,12 @@ class delegating_reader : public flat_mutation_reader::impl {
|
||||
public:
|
||||
delegating_reader(Underlying&& r) : impl(to_reference(r).schema()), _underlying(std::forward<Underlying>(r)) { }
|
||||
virtual future<> fill_buffer(db::timeout_clock::time_point timeout) override {
|
||||
return fill_buffer_from(to_reference(_underlying), timeout).then([this] (bool underlying_finished) {
|
||||
_end_of_stream = underlying_finished;
|
||||
if (is_buffer_full()) {
|
||||
return make_ready_future<>();
|
||||
}
|
||||
return to_reference(_underlying).fill_buffer(timeout).then([this] {
|
||||
_end_of_stream = to_reference(_underlying).is_end_of_stream();
|
||||
to_reference(_underlying).move_buffer_content_to(*this);
|
||||
});
|
||||
}
|
||||
virtual future<> fast_forward_to(position_range pr, db::timeout_clock::time_point timeout) override {
|
||||
|
||||
49
gc_clock.hh
49
gc_clock.hh
@@ -22,6 +22,7 @@
|
||||
#pragma once
|
||||
|
||||
#include "clocks-impl.hh"
|
||||
#include "hashing.hh"
|
||||
|
||||
#include <seastar/core/lowres_clock.hh>
|
||||
|
||||
@@ -71,3 +72,51 @@ using ttl_opt = std::optional<gc_clock::duration>;
|
||||
static constexpr gc_clock::duration max_ttl = gc_clock::duration{20 * 365 * 24 * 60 * 60};
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, gc_clock::time_point tp);
|
||||
|
||||
template<>
|
||||
struct appending_hash<gc_clock::time_point> {
|
||||
template<typename Hasher>
|
||||
void operator()(Hasher& h, gc_clock::time_point t) const {
|
||||
// Remain backwards-compatible with the 32-bit duration::rep (refs #4460).
|
||||
uint64_t d64 = t.time_since_epoch().count();
|
||||
feed_hash(h, uint32_t(d64 & 0xffff'ffff));
|
||||
uint32_t msb = d64 >> 32;
|
||||
if (msb) {
|
||||
feed_hash(h, msb);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
namespace ser {
|
||||
|
||||
// Forward-declaration - defined in serializer.hh, to avoid including it here.
|
||||
|
||||
template <typename Output>
|
||||
void serialize_gc_clock_duration_value(Output& out, int64_t value);
|
||||
|
||||
template <typename Input>
|
||||
int64_t deserialize_gc_clock_duration_value(Input& in);
|
||||
|
||||
template <typename T>
|
||||
struct serializer;
|
||||
|
||||
template <>
|
||||
struct serializer<gc_clock::duration> {
|
||||
template <typename Input>
|
||||
static gc_clock::duration read(Input& in) {
|
||||
return gc_clock::duration(deserialize_gc_clock_duration_value(in));
|
||||
}
|
||||
|
||||
template <typename Output>
|
||||
static void write(Output& out, gc_clock::duration d) {
|
||||
serialize_gc_clock_duration_value(out, d.count());
|
||||
}
|
||||
|
||||
template <typename Input>
|
||||
static void skip(Input& in) {
|
||||
read(in);
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
@@ -481,8 +481,7 @@ future<> gossiper::apply_state_locally(std::map<inet_address, endpoint_state> ma
|
||||
int local_generation = local_ep_state_ptr.get_heart_beat_state().get_generation();
|
||||
int remote_generation = remote_state.get_heart_beat_state().get_generation();
|
||||
logger.trace("{} local generation {}, remote generation {}", ep, local_generation, remote_generation);
|
||||
// A node was removed with nodetool removenode can have a generation of 2
|
||||
if (local_generation > 2 && remote_generation > local_generation + MAX_GENERATION_DIFFERENCE) {
|
||||
if (remote_generation > service::get_generation_number() + MAX_GENERATION_DIFFERENCE) {
|
||||
// assume some peer has corrupted memory and is broadcasting an unbelievable generation about another peer (or itself)
|
||||
logger.warn("received an invalid gossip generation for peer {}; local generation = {}, received generation = {}",
|
||||
ep, local_generation, remote_generation);
|
||||
|
||||
@@ -160,7 +160,9 @@ public:
|
||||
static constexpr std::chrono::milliseconds INTERVAL{1000};
|
||||
static constexpr std::chrono::hours A_VERY_LONG_TIME{24 * 3};
|
||||
|
||||
/** Maximimum difference in generation and version values we are willing to accept about a peer */
|
||||
// Maximimum difference between remote generation value and generation
|
||||
// value this node would get if this node were restarted that we are
|
||||
// willing to accept about a peer.
|
||||
static constexpr int64_t MAX_GENERATION_DIFFERENCE = 86400 * 365;
|
||||
std::chrono::milliseconds fat_client_timeout;
|
||||
|
||||
|
||||
30
hashers.cc
30
hashers.cc
@@ -29,7 +29,7 @@ template <typename T> struct hasher_traits;
|
||||
template <> struct hasher_traits<md5_hasher> { using impl_type = CryptoPP::Weak::MD5; };
|
||||
template <> struct hasher_traits<sha256_hasher> { using impl_type = CryptoPP::SHA256; };
|
||||
|
||||
template <typename T, size_t size> struct hasher<T, size>::impl {
|
||||
template <typename T, size_t size> struct cryptopp_hasher<T, size>::impl {
|
||||
using impl_type = typename hasher_traits<T>::impl_type;
|
||||
|
||||
impl_type hash{};
|
||||
@@ -53,35 +53,35 @@ template <typename T, size_t size> struct hasher<T, size>::impl {
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T, size_t size> hasher<T, size>::hasher() : _impl(std::make_unique<impl>()) {}
|
||||
template <typename T, size_t size> cryptopp_hasher<T, size>::cryptopp_hasher() : _impl(std::make_unique<impl>()) {}
|
||||
|
||||
template <typename T, size_t size> hasher<T, size>::~hasher() = default;
|
||||
template <typename T, size_t size> cryptopp_hasher<T, size>::~cryptopp_hasher() = default;
|
||||
|
||||
template <typename T, size_t size> hasher<T, size>::hasher(hasher&& o) noexcept = default;
|
||||
template <typename T, size_t size> cryptopp_hasher<T, size>::cryptopp_hasher(cryptopp_hasher&& o) noexcept = default;
|
||||
|
||||
template <typename T, size_t size> hasher<T, size>::hasher(const hasher& o) : _impl(std::make_unique<hasher<T, size>::impl>(*o._impl)) {}
|
||||
template <typename T, size_t size> cryptopp_hasher<T, size>::cryptopp_hasher(const cryptopp_hasher& o) : _impl(std::make_unique<cryptopp_hasher<T, size>::impl>(*o._impl)) {}
|
||||
|
||||
template <typename T, size_t size> hasher<T, size>& hasher<T, size>::operator=(hasher&& o) noexcept = default;
|
||||
template <typename T, size_t size> cryptopp_hasher<T, size>& cryptopp_hasher<T, size>::operator=(cryptopp_hasher&& o) noexcept = default;
|
||||
|
||||
template <typename T, size_t size> hasher<T, size>& hasher<T, size>::operator=(const hasher& o) {
|
||||
_impl = std::make_unique<hasher<T, size>::impl>(*o._impl);
|
||||
template <typename T, size_t size> cryptopp_hasher<T, size>& cryptopp_hasher<T, size>::operator=(const cryptopp_hasher& o) {
|
||||
_impl = std::make_unique<cryptopp_hasher<T, size>::impl>(*o._impl);
|
||||
return *this;
|
||||
}
|
||||
|
||||
template <typename T, size_t size> bytes hasher<T, size>::finalize() { return _impl->finalize(); }
|
||||
template <typename T, size_t size> bytes cryptopp_hasher<T, size>::finalize() { return _impl->finalize(); }
|
||||
|
||||
template <typename T, size_t size> std::array<uint8_t, size> hasher<T, size>::finalize_array() {
|
||||
template <typename T, size_t size> std::array<uint8_t, size> cryptopp_hasher<T, size>::finalize_array() {
|
||||
return _impl->finalize_array();
|
||||
}
|
||||
|
||||
template <typename T, size_t size> void hasher<T, size>::update(const char* ptr, size_t length) { _impl->update(ptr, length); }
|
||||
template <typename T, size_t size> void cryptopp_hasher<T, size>::update(const char* ptr, size_t length) { _impl->update(ptr, length); }
|
||||
|
||||
template <typename T, size_t size> bytes hasher<T, size>::calculate(const std::string_view& s) {
|
||||
typename hasher<T, size>::impl::impl_type hash;
|
||||
template <typename T, size_t size> bytes cryptopp_hasher<T, size>::calculate(const std::string_view& s) {
|
||||
typename cryptopp_hasher<T, size>::impl::impl_type hash;
|
||||
unsigned char digest[size];
|
||||
hash.CalculateDigest(digest, reinterpret_cast<const unsigned char*>(s.data()), s.size());
|
||||
return std::move(bytes{reinterpret_cast<const int8_t*>(digest), size});
|
||||
}
|
||||
|
||||
template class hasher<md5_hasher, 16>;
|
||||
template class hasher<sha256_hasher, 32>;
|
||||
template class cryptopp_hasher<md5_hasher, 16>;
|
||||
template class cryptopp_hasher<sha256_hasher, 32>;
|
||||
|
||||
21
hashers.hh
21
hashers.hh
@@ -22,29 +22,30 @@
|
||||
#pragma once
|
||||
|
||||
#include "bytes.hh"
|
||||
#include "hashing.hh"
|
||||
|
||||
class md5_hasher;
|
||||
|
||||
template <typename T, size_t size> class hasher {
|
||||
template <typename T, size_t size> class cryptopp_hasher : public hasher {
|
||||
struct impl;
|
||||
std::unique_ptr<impl> _impl;
|
||||
|
||||
public:
|
||||
hasher();
|
||||
~hasher();
|
||||
hasher(hasher&&) noexcept;
|
||||
hasher(const hasher&);
|
||||
hasher& operator=(hasher&&) noexcept;
|
||||
hasher& operator=(const hasher&);
|
||||
cryptopp_hasher();
|
||||
~cryptopp_hasher();
|
||||
cryptopp_hasher(cryptopp_hasher&&) noexcept;
|
||||
cryptopp_hasher(const cryptopp_hasher&);
|
||||
cryptopp_hasher& operator=(cryptopp_hasher&&) noexcept;
|
||||
cryptopp_hasher& operator=(const cryptopp_hasher&);
|
||||
|
||||
bytes finalize();
|
||||
std::array<uint8_t, size> finalize_array();
|
||||
void update(const char* ptr, size_t length);
|
||||
void update(const char* ptr, size_t length) override;
|
||||
|
||||
// Use update and finalize to compute the hash over the full view.
|
||||
static bytes calculate(const std::string_view& s);
|
||||
};
|
||||
|
||||
class md5_hasher : public hasher<md5_hasher, 16> {};
|
||||
class md5_hasher final : public cryptopp_hasher<md5_hasher, 16> {};
|
||||
|
||||
class sha256_hasher : public hasher<sha256_hasher, 32> {};
|
||||
class sha256_hasher final : public cryptopp_hasher<sha256_hasher, 32> {};
|
||||
|
||||
79
hashing.hh
79
hashing.hh
@@ -27,6 +27,7 @@
|
||||
#include <seastar/core/byteorder.hh>
|
||||
#include <seastar/core/sstring.hh>
|
||||
#include "seastarx.hh"
|
||||
#include <seastar/util/gcc6-concepts.hh>
|
||||
|
||||
//
|
||||
// This hashing differs from std::hash<> in that it decouples knowledge about
|
||||
@@ -41,24 +42,38 @@
|
||||
// appending_hash<T> is machine-independent.
|
||||
//
|
||||
|
||||
// The Hasher concept
|
||||
struct Hasher {
|
||||
void update(const char* ptr, size_t size);
|
||||
GCC6_CONCEPT(
|
||||
template<typename H>
|
||||
concept bool Hasher() {
|
||||
return requires(H& h, const char* ptr, size_t size) {
|
||||
{ h.update(ptr, size) } -> void
|
||||
};
|
||||
}
|
||||
)
|
||||
|
||||
class hasher {
|
||||
public:
|
||||
virtual ~hasher() = default;
|
||||
virtual void update(const char* ptr, size_t size) = 0;
|
||||
};
|
||||
|
||||
GCC6_CONCEPT(static_assert(Hasher<hasher>());)
|
||||
|
||||
template<typename T, typename Enable = void>
|
||||
struct appending_hash;
|
||||
|
||||
template<typename Hasher, typename T, typename... Args>
|
||||
template<typename H, typename T, typename... Args>
|
||||
GCC6_CONCEPT(requires Hasher<H>())
|
||||
inline
|
||||
void feed_hash(Hasher& h, const T& value, Args&&... args) {
|
||||
void feed_hash(H& h, const T& value, Args&&... args) {
|
||||
appending_hash<T>()(h, value, std::forward<Args>(args)...);
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
struct appending_hash<T, std::enable_if_t<std::is_arithmetic<T>::value>> {
|
||||
template<typename Hasher>
|
||||
void operator()(Hasher& h, T value) const {
|
||||
template<typename H>
|
||||
GCC6_CONCEPT(requires Hasher<H>())
|
||||
void operator()(H& h, T value) const {
|
||||
auto value_le = cpu_to_le(value);
|
||||
h.update(reinterpret_cast<const char*>(&value_le), sizeof(T));
|
||||
}
|
||||
@@ -66,24 +81,27 @@ struct appending_hash<T, std::enable_if_t<std::is_arithmetic<T>::value>> {
|
||||
|
||||
template<>
|
||||
struct appending_hash<bool> {
|
||||
template<typename Hasher>
|
||||
void operator()(Hasher& h, bool value) const {
|
||||
template<typename H>
|
||||
GCC6_CONCEPT(requires Hasher<H>())
|
||||
void operator()(H& h, bool value) const {
|
||||
feed_hash(h, static_cast<uint8_t>(value));
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
struct appending_hash<T, std::enable_if_t<std::is_enum<T>::value>> {
|
||||
template<typename Hasher>
|
||||
void operator()(Hasher& h, const T& value) const {
|
||||
template<typename H>
|
||||
GCC6_CONCEPT(requires Hasher<H>())
|
||||
void operator()(H& h, const T& value) const {
|
||||
feed_hash(h, static_cast<std::underlying_type_t<T>>(value));
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
struct appending_hash<std::optional<T>> {
|
||||
template<typename Hasher>
|
||||
void operator()(Hasher& h, const std::optional<T>& value) const {
|
||||
template<typename H>
|
||||
GCC6_CONCEPT(requires Hasher<H>())
|
||||
void operator()(H& h, const std::optional<T>& value) const {
|
||||
if (value) {
|
||||
feed_hash(h, true);
|
||||
feed_hash(h, *value);
|
||||
@@ -95,8 +113,9 @@ struct appending_hash<std::optional<T>> {
|
||||
|
||||
template<size_t N>
|
||||
struct appending_hash<char[N]> {
|
||||
template<typename Hasher>
|
||||
void operator()(Hasher& h, const char (&value) [N]) const {
|
||||
template<typename H>
|
||||
GCC6_CONCEPT(requires Hasher<H>())
|
||||
void operator()(H& h, const char (&value) [N]) const {
|
||||
feed_hash(h, N);
|
||||
h.update(value, N);
|
||||
}
|
||||
@@ -104,8 +123,9 @@ struct appending_hash<char[N]> {
|
||||
|
||||
template<typename T>
|
||||
struct appending_hash<std::vector<T>> {
|
||||
template<typename Hasher>
|
||||
void operator()(Hasher& h, const std::vector<T>& value) const {
|
||||
template<typename H>
|
||||
GCC6_CONCEPT(requires Hasher<H>())
|
||||
void operator()(H& h, const std::vector<T>& value) const {
|
||||
feed_hash(h, value.size());
|
||||
for (auto&& v : value) {
|
||||
appending_hash<T>()(h, v);
|
||||
@@ -115,8 +135,9 @@ struct appending_hash<std::vector<T>> {
|
||||
|
||||
template<typename K, typename V>
|
||||
struct appending_hash<std::map<K, V>> {
|
||||
template<typename Hasher>
|
||||
void operator()(Hasher& h, const std::map<K, V>& value) const {
|
||||
template<typename H>
|
||||
GCC6_CONCEPT(requires Hasher<H>())
|
||||
void operator()(H& h, const std::map<K, V>& value) const {
|
||||
feed_hash(h, value.size());
|
||||
for (auto&& e : value) {
|
||||
appending_hash<K>()(h, e.first);
|
||||
@@ -127,8 +148,9 @@ struct appending_hash<std::map<K, V>> {
|
||||
|
||||
template<>
|
||||
struct appending_hash<sstring> {
|
||||
template<typename Hasher>
|
||||
void operator()(Hasher& h, const sstring& v) const {
|
||||
template<typename H>
|
||||
GCC6_CONCEPT(requires Hasher<H>())
|
||||
void operator()(H& h, const sstring& v) const {
|
||||
feed_hash(h, v.size());
|
||||
h.update(reinterpret_cast<const char*>(v.cbegin()), v.size() * sizeof(sstring::value_type));
|
||||
}
|
||||
@@ -136,8 +158,9 @@ struct appending_hash<sstring> {
|
||||
|
||||
template<>
|
||||
struct appending_hash<std::string> {
|
||||
template<typename Hasher>
|
||||
void operator()(Hasher& h, const std::string& v) const {
|
||||
template<typename H>
|
||||
GCC6_CONCEPT(requires Hasher<H>())
|
||||
void operator()(H& h, const std::string& v) const {
|
||||
feed_hash(h, v.size());
|
||||
h.update(reinterpret_cast<const char*>(v.data()), v.size() * sizeof(std::string::value_type));
|
||||
}
|
||||
@@ -145,16 +168,18 @@ struct appending_hash<std::string> {
|
||||
|
||||
template<typename T, typename R>
|
||||
struct appending_hash<std::chrono::duration<T, R>> {
|
||||
template<typename Hasher>
|
||||
void operator()(Hasher& h, std::chrono::duration<T, R> v) const {
|
||||
template<typename H>
|
||||
GCC6_CONCEPT(requires Hasher<H>())
|
||||
void operator()(H& h, std::chrono::duration<T, R> v) const {
|
||||
feed_hash(h, v.count());
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Clock, typename Duration>
|
||||
struct appending_hash<std::chrono::time_point<Clock, Duration>> {
|
||||
template<typename Hasher>
|
||||
void operator()(Hasher& h, std::chrono::time_point<Clock, Duration> v) const {
|
||||
template<typename H>
|
||||
GCC6_CONCEPT(requires Hasher<H>())
|
||||
void operator()(H& h, std::chrono::time_point<Clock, Duration> v) const {
|
||||
feed_hash(h, v.time_since_epoch().count());
|
||||
}
|
||||
};
|
||||
|
||||
@@ -26,6 +26,6 @@ class partition {
|
||||
|
||||
class reconcilable_result {
|
||||
uint32_t row_count();
|
||||
std::vector<partition> partitions();
|
||||
utils::chunked_vector<partition> partitions();
|
||||
query::short_read is_short_read() [[version 1.6]] = query::short_read::no;
|
||||
};
|
||||
|
||||
@@ -51,4 +51,10 @@ enum class stream_reason : uint8_t {
|
||||
repair,
|
||||
};
|
||||
|
||||
enum class stream_mutation_fragments_cmd : uint8_t {
|
||||
error,
|
||||
mutation_fragment_data,
|
||||
end_of_stream,
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
@@ -181,4 +181,10 @@ bool secondary_index_manager::is_index(const schema& s) const {
|
||||
});
|
||||
}
|
||||
|
||||
bool secondary_index_manager::is_global_index(const schema& s) const {
|
||||
return boost::algorithm::any_of(_indices | boost::adaptors::map_values, [&s] (const index& i) {
|
||||
return !i.metadata().local() && s.cf_name() == index_table_name(i.metadata().name());
|
||||
});
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -77,6 +77,7 @@ public:
|
||||
std::vector<index> list_indexes() const;
|
||||
bool is_index(view_ptr) const;
|
||||
bool is_index(const schema& s) const;
|
||||
bool is_global_index(const schema& s) const;
|
||||
private:
|
||||
void add_index(const index_metadata& im);
|
||||
};
|
||||
|
||||
4
init.cc
4
init.cc
@@ -155,6 +155,10 @@ void init_ms_fd_gossiper(sharded<gms::gossiper>& gossiper
|
||||
to_string(seeds), listen_address_in, broadcast_address);
|
||||
throw std::runtime_error("Use broadcast_address for seeds list");
|
||||
}
|
||||
if ((!cfg.replace_address_first_boot().empty() || !cfg.replace_address().empty()) && seeds.count(broadcast_address)) {
|
||||
startlog.error("Bad configuration: replace-address and replace-address-first-boot are not allowed for seed nodes");
|
||||
throw bad_configuration_error();
|
||||
}
|
||||
gossiper.local().set_seeds(seeds);
|
||||
gossiper.invoke_on_all([cluster_name](gms::gossiper& g) {
|
||||
g.set_cluster_name(cluster_name);
|
||||
|
||||
34
install.sh
34
install.sh
@@ -75,6 +75,29 @@ while [ $# -gt 0 ]; do
|
||||
esac
|
||||
done
|
||||
|
||||
patchelf() {
|
||||
# patchelf comes from the build system, so it needs the build system's ld.so and
|
||||
# shared libraries. We can't use patchelf on patchelf itself, so invoke it via
|
||||
# ld.so.
|
||||
LD_LIBRARY_PATH="$PWD/libreloc" libreloc/ld.so libexec/patchelf "$@"
|
||||
}
|
||||
|
||||
adjust_bin() {
|
||||
local bin="$1"
|
||||
# We could add --set-rpath too, but then debugedit (called by rpmbuild) barfs
|
||||
# on the result. So use LD_LIBRARY_PATH in the thunk, below.
|
||||
patchelf \
|
||||
--set-interpreter "/opt/scylladb/libreloc/ld.so" \
|
||||
"$root/opt/scylladb/libexec/$bin"
|
||||
cat > "$root/opt/scylladb/bin/$bin" <<EOF
|
||||
#!/bin/bash -e
|
||||
export GNUTLS_SYSTEM_PRIORITY_FILE="\${GNUTLS_SYSTEM_PRIORITY_FILE-/opt/scylladb/libreloc/gnutls.config}"
|
||||
export LD_LIBRARY_PATH="/opt/scylladb/libreloc"
|
||||
exec -a "\$0" "/opt/scylladb/libexec/$bin" "\$@"
|
||||
EOF
|
||||
chmod +x "$root/opt/scylladb/bin/$bin"
|
||||
}
|
||||
|
||||
rprefix="$root/$prefix"
|
||||
retc="$root/etc"
|
||||
rdoc="$rprefix/share/doc"
|
||||
@@ -105,16 +128,13 @@ install -m644 dist/common/systemd/*.service -Dt "$rprefix"/lib/systemd/system
|
||||
install -m644 dist/common/systemd/*.timer -Dt "$rprefix"/lib/systemd/system
|
||||
install -m755 seastar/scripts/seastar-cpu-map.sh -Dt "$rprefix"/lib/scylla/
|
||||
install -m755 seastar/dpdk/usertools/dpdk-devbind.py -Dt "$rprefix"/lib/scylla/
|
||||
install -m755 bin/* -Dt "$root/opt/scylladb/bin"
|
||||
install -m755 libreloc/* -Dt "$root/opt/scylladb/libreloc"
|
||||
# some files in libexec are symlinks, which "install" dereferences
|
||||
# use cp -P for the symlinks instead.
|
||||
install -m755 libexec/*.bin -Dt "$root/opt/scylladb/libexec"
|
||||
for f in libexec/*; do
|
||||
if [[ "$f" != *.bin ]]; then
|
||||
cp -P "$f" "$root/opt/scylladb/libexec"
|
||||
fi
|
||||
install -m755 libexec/* -Dt "$root/opt/scylladb/libexec"
|
||||
for bin in libexec/*; do
|
||||
adjust_bin "${bin#libexec/}"
|
||||
done
|
||||
install -m755 libreloc/* -Dt "$root/opt/scylladb/libreloc"
|
||||
ln -srf "$root/opt/scylladb/bin/scylla" "$rprefix/bin/scylla"
|
||||
ln -srf "$root/opt/scylladb/bin/iotune" "$rprefix/bin/iotune"
|
||||
ln -srf "$rprefix/lib/scylla/scyllatop/scyllatop.py" "$rprefix/bin/scyllatop"
|
||||
|
||||
42
main.cc
42
main.cc
@@ -54,6 +54,7 @@
|
||||
#include <seastar/core/file.hh>
|
||||
#include <sys/time.h>
|
||||
#include <sys/resource.h>
|
||||
#include <sys/prctl.h>
|
||||
#include "disk-error-handler.hh"
|
||||
#include "tracing/tracing.hh"
|
||||
#include "tracing/tracing_backend_registry.hh"
|
||||
@@ -69,6 +70,7 @@
|
||||
#include "sstables/sstables.hh"
|
||||
#include "gms/feature_service.hh"
|
||||
#include "distributed_loader.hh"
|
||||
#include "serializer.hh"
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
@@ -322,6 +324,15 @@ static std::optional<std::vector<sstring>> parse_hinted_handoff_enabled(sstring
|
||||
}
|
||||
|
||||
int main(int ac, char** av) {
|
||||
// Allow core dumps. The would be disabled by default if
|
||||
// CAP_SYS_NICE was added to the binary, as is suggested by the
|
||||
// epoll backend.
|
||||
int r = prctl(PR_SET_DUMPABLE, 1, 0, 0, 0);
|
||||
if (r) {
|
||||
std::cerr << "Could not make scylla dumpable\n";
|
||||
exit(1);
|
||||
}
|
||||
|
||||
int return_value = 0;
|
||||
try {
|
||||
// early check to avoid triggering
|
||||
@@ -340,15 +351,7 @@ int main(int ac, char** av) {
|
||||
auto cfg = make_lw_shared<db::config>(ext);
|
||||
auto init = app.get_options_description().add_options();
|
||||
|
||||
// If --version is requested, print it out and exit immediately to avoid
|
||||
// Seastar-specific warnings that may occur when running the app
|
||||
init("version", bpo::bool_switch(), "print version number and exit");
|
||||
bpo::variables_map vm;
|
||||
bpo::store(bpo::command_line_parser(ac, av).options(app.get_options_description()).allow_unregistered().run(), vm);
|
||||
if (vm["version"].as<bool>()) {
|
||||
fmt::print("{}\n", scylla_version());
|
||||
return 0;
|
||||
}
|
||||
|
||||
bpo::options_description deprecated("Deprecated options - ignored");
|
||||
deprecated.add_options()
|
||||
@@ -362,6 +365,15 @@ int main(int ac, char** av) {
|
||||
configurable::append_all(*cfg, init);
|
||||
cfg->add_options(init);
|
||||
|
||||
// If --version is requested, print it out and exit immediately to avoid
|
||||
// Seastar-specific warnings that may occur when running the app
|
||||
bpo::variables_map vm;
|
||||
bpo::store(bpo::command_line_parser(ac, av).options(app.get_options_description()).allow_unregistered().run(), vm);
|
||||
if (vm["version"].as<bool>()) {
|
||||
fmt::print("{}\n", scylla_version());
|
||||
return 0;
|
||||
}
|
||||
|
||||
distributed<database> db;
|
||||
seastar::sharded<service::cache_hitrate_calculator> cf_cache_hitrate_calculator;
|
||||
debug::db = &db;
|
||||
@@ -407,6 +419,11 @@ int main(int ac, char** av) {
|
||||
read_config(opts, *cfg).get();
|
||||
configurable::init_all(opts, *cfg, *ext).get();
|
||||
|
||||
// We're writing to a non-atomic variable here. But bool writes are atomic
|
||||
// in all supported architectures, and some broadcast or other below
|
||||
// will apply the required memory barriers anyway.
|
||||
ser::gc_clock_using_3_1_0_serialization = cfg->enable_3_1_0_compatibility_mode();
|
||||
|
||||
logalloc::prime_segment_pool(memory::stats().total_memory(), memory::min_free_memory()).get();
|
||||
logging::apply_settings(cfg->logging_settings(opts));
|
||||
|
||||
@@ -526,6 +543,9 @@ int main(int ac, char** av) {
|
||||
if (opts.count("developer-mode")) {
|
||||
smp::invoke_on_all([] { engine().set_strict_dma(false); }).get();
|
||||
}
|
||||
|
||||
set_abort_on_internal_error(cfg->abort_on_internal_error());
|
||||
|
||||
supervisor::notify("creating tracing");
|
||||
tracing::backend_registry tracing_backend_registry;
|
||||
tracing::register_tracing_keyspace_backend(tracing_backend_registry);
|
||||
@@ -916,8 +936,10 @@ int main(int ac, char** av) {
|
||||
service::get_local_storage_service().drain_on_shutdown().get();
|
||||
});
|
||||
|
||||
auto stop_view_builder = defer([] {
|
||||
view_builder.stop().get();
|
||||
auto stop_view_builder = defer([cfg] {
|
||||
if (cfg->view_building()) {
|
||||
view_builder.stop().get();
|
||||
}
|
||||
});
|
||||
|
||||
auto stop_compaction_manager = defer([&db] {
|
||||
|
||||
32
memtable.cc
32
memtable.cc
@@ -23,7 +23,6 @@
|
||||
#include "database.hh"
|
||||
#include "frozen_mutation.hh"
|
||||
#include "partition_snapshot_reader.hh"
|
||||
#include "schema_upgrader.hh"
|
||||
#include "partition_builder.hh"
|
||||
|
||||
void memtable::memtable_encoding_stats_collector::update_timestamp(api::timestamp_type ts) {
|
||||
@@ -429,11 +428,8 @@ public:
|
||||
bool digest_requested = _slice.options.contains<query::partition_slice::option::with_digest>();
|
||||
auto mpsr = make_partition_snapshot_flat_reader(snp_schema, std::move(key_and_snp->first), std::move(cr),
|
||||
std::move(key_and_snp->second), digest_requested, region(), read_section(), mtbl(), streamed_mutation::forwarding::no);
|
||||
if (snp_schema->version() != schema()->version()) {
|
||||
_delegate = transform(std::move(mpsr), schema_upgrader(schema()));
|
||||
} else {
|
||||
_delegate = std::move(mpsr);
|
||||
}
|
||||
mpsr.upgrade_schema(schema());
|
||||
_delegate = std::move(mpsr);
|
||||
} else {
|
||||
_end_of_stream = true;
|
||||
}
|
||||
@@ -588,11 +584,8 @@ private:
|
||||
auto snp_schema = key_and_snp->second->schema();
|
||||
auto mpsr = make_partition_snapshot_flat_reader<partition_snapshot_accounter>(snp_schema, std::move(key_and_snp->first), std::move(cr),
|
||||
std::move(key_and_snp->second), false, region(), read_section(), mtbl(), streamed_mutation::forwarding::no, *snp_schema, _flushed_memory);
|
||||
if (snp_schema->version() != schema()->version()) {
|
||||
_partition_reader = transform(std::move(mpsr), schema_upgrader(schema()));
|
||||
} else {
|
||||
_partition_reader = std::move(mpsr);
|
||||
}
|
||||
mpsr.upgrade_schema(schema());
|
||||
_partition_reader = std::move(mpsr);
|
||||
}
|
||||
}
|
||||
public:
|
||||
@@ -668,11 +661,8 @@ memtable::make_flat_reader(schema_ptr s,
|
||||
bool digest_requested = slice.options.contains<query::partition_slice::option::with_digest>();
|
||||
auto rd = make_partition_snapshot_flat_reader(snp_schema, std::move(dk), std::move(cr), std::move(snp), digest_requested,
|
||||
*this, _read_section, shared_from_this(), fwd);
|
||||
if (snp_schema->version() != s->version()) {
|
||||
return transform(std::move(rd), schema_upgrader(s));
|
||||
} else {
|
||||
return rd;
|
||||
}
|
||||
rd.upgrade_schema(s);
|
||||
return rd;
|
||||
} else {
|
||||
auto res = make_flat_mutation_reader<scanning_reader>(std::move(s), shared_from_this(), range, slice, pc, fwd_mr);
|
||||
if (fwd == streamed_mutation::forwarding::yes) {
|
||||
@@ -787,13 +777,19 @@ bool memtable::is_flushed() const {
|
||||
return bool(_underlying);
|
||||
}
|
||||
|
||||
void memtable_entry::upgrade_schema(const schema_ptr& s, mutation_cleaner& cleaner) {
|
||||
if (_schema != s) {
|
||||
partition().upgrade(_schema, s, cleaner, no_cache_tracker);
|
||||
_schema = s;
|
||||
}
|
||||
}
|
||||
|
||||
void memtable::upgrade_entry(memtable_entry& e) {
|
||||
if (e._schema != _schema) {
|
||||
assert(!reclaiming_enabled());
|
||||
with_allocator(allocator(), [this, &e] {
|
||||
with_linearized_managed_bytes([&] {
|
||||
e.partition().upgrade(e._schema, _schema, cleaner(), no_cache_tracker);
|
||||
e._schema = _schema;
|
||||
e.upgrade_schema(_schema, cleaner());
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
@@ -69,6 +69,10 @@ public:
|
||||
schema_ptr& schema() { return _schema; }
|
||||
partition_snapshot_ptr snapshot(memtable& mtbl);
|
||||
|
||||
// Makes the entry conform to given schema.
|
||||
// Must be called under allocating section of the region which owns the entry.
|
||||
void upgrade_schema(const schema_ptr&, mutation_cleaner&);
|
||||
|
||||
size_t external_memory_usage_without_rows() const {
|
||||
return _key.key().external_memory_usage();
|
||||
}
|
||||
|
||||
@@ -89,6 +89,7 @@
|
||||
#include "frozen_mutation.hh"
|
||||
#include "flat_mutation_reader.hh"
|
||||
#include "streaming/stream_manager.hh"
|
||||
#include "streaming/stream_mutation_fragments_cmd.hh"
|
||||
|
||||
namespace netw {
|
||||
|
||||
@@ -287,7 +288,6 @@ void messaging_service::start_listen() {
|
||||
if (_compress_what != compress_what::none) {
|
||||
so.compressor_factory = &compressor_factory;
|
||||
}
|
||||
so.streaming_domain = rpc::streaming_domain_type(0x55AA);
|
||||
so.load_balancing_algorithm = server_socket::load_balancing_algorithm::port;
|
||||
|
||||
// FIXME: we don't set so.tcp_nodelay, because we can't tell at this point whether the connection will come from a
|
||||
@@ -295,19 +295,21 @@ void messaging_service::start_listen() {
|
||||
// the first by wrapping its server_socket, but not the second.
|
||||
auto limits = rpc_resource_limits(_mcfg.rpc_memory_limit);
|
||||
if (!_server[0]) {
|
||||
auto listen = [&] (const gms::inet_address& a) {
|
||||
auto listen = [&] (const gms::inet_address& a, rpc::streaming_domain_type sdomain) {
|
||||
so.streaming_domain = sdomain;
|
||||
auto addr = ipv4_addr{a.raw_addr(), _port};
|
||||
return std::unique_ptr<rpc_protocol_server_wrapper>(new rpc_protocol_server_wrapper(*_rpc,
|
||||
so, addr, limits));
|
||||
};
|
||||
_server[0] = listen(_listen_address);
|
||||
_server[0] = listen(_listen_address, rpc::streaming_domain_type(0x55AA));
|
||||
if (listen_to_bc) {
|
||||
_server[1] = listen(utils::fb_utilities::get_broadcast_address());
|
||||
_server[1] = listen(utils::fb_utilities::get_broadcast_address(), rpc::streaming_domain_type(0x66BB));
|
||||
}
|
||||
}
|
||||
|
||||
if (!_server_tls[0]) {
|
||||
auto listen = [&] (const gms::inet_address& a) {
|
||||
auto listen = [&] (const gms::inet_address& a, rpc::streaming_domain_type sdomain) {
|
||||
so.streaming_domain = sdomain;
|
||||
return std::unique_ptr<rpc_protocol_server_wrapper>(
|
||||
[this, &so, &a, limits] () -> std::unique_ptr<rpc_protocol_server_wrapper>{
|
||||
if (_encrypt_what == encrypt_what::none) {
|
||||
@@ -321,9 +323,9 @@ void messaging_service::start_listen() {
|
||||
so, seastar::tls::listen(_credentials, addr, lo), limits);
|
||||
}());
|
||||
};
|
||||
_server_tls[0] = listen(_listen_address);
|
||||
_server_tls[0] = listen(_listen_address, rpc::streaming_domain_type(0x77CC));
|
||||
if (listen_to_bc) {
|
||||
_server_tls[1] = listen(utils::fb_utilities::get_broadcast_address());
|
||||
_server_tls[1] = listen(utils::fb_utilities::get_broadcast_address(), rpc::streaming_domain_type(0x88DD));
|
||||
}
|
||||
}
|
||||
// Do this on just cpu 0, to avoid duplicate logs.
|
||||
@@ -607,6 +609,7 @@ shared_ptr<messaging_service::rpc_protocol_client_wrapper> messaging_service::ge
|
||||
opts.compressor_factory = &compressor_factory;
|
||||
}
|
||||
opts.tcp_nodelay = must_tcp_nodelay;
|
||||
opts.reuseaddr = true;
|
||||
|
||||
auto client = must_encrypt ?
|
||||
::make_shared<rpc_protocol_client_wrapper>(*_rpc, std::move(opts),
|
||||
@@ -668,24 +671,24 @@ std::unique_ptr<messaging_service::rpc_protocol_wrapper>& messaging_service::rpc
|
||||
return _rpc;
|
||||
}
|
||||
|
||||
rpc::sink<int32_t> messaging_service::make_sink_for_stream_mutation_fragments(rpc::source<frozen_mutation_fragment>& source) {
|
||||
rpc::sink<int32_t> messaging_service::make_sink_for_stream_mutation_fragments(rpc::source<frozen_mutation_fragment, rpc::optional<streaming::stream_mutation_fragments_cmd>>& source) {
|
||||
return source.make_sink<netw::serializer, int32_t>();
|
||||
}
|
||||
|
||||
future<rpc::sink<frozen_mutation_fragment>, rpc::source<int32_t>>
|
||||
future<rpc::sink<frozen_mutation_fragment, streaming::stream_mutation_fragments_cmd>, rpc::source<int32_t>>
|
||||
messaging_service::make_sink_and_source_for_stream_mutation_fragments(utils::UUID schema_id, utils::UUID plan_id, utils::UUID cf_id, uint64_t estimated_partitions, streaming::stream_reason reason, msg_addr id) {
|
||||
auto rpc_client = get_rpc_client(messaging_verb::STREAM_MUTATION_FRAGMENTS, id);
|
||||
return rpc_client->make_stream_sink<netw::serializer, frozen_mutation_fragment>().then([this, plan_id, schema_id, cf_id, estimated_partitions, reason, rpc_client] (rpc::sink<frozen_mutation_fragment> sink) mutable {
|
||||
auto rpc_handler = rpc()->make_client<rpc::source<int32_t> (utils::UUID, utils::UUID, utils::UUID, uint64_t, streaming::stream_reason, rpc::sink<frozen_mutation_fragment>)>(messaging_verb::STREAM_MUTATION_FRAGMENTS);
|
||||
return rpc_client->make_stream_sink<netw::serializer, frozen_mutation_fragment, streaming::stream_mutation_fragments_cmd>().then([this, plan_id, schema_id, cf_id, estimated_partitions, reason, rpc_client] (rpc::sink<frozen_mutation_fragment, streaming::stream_mutation_fragments_cmd> sink) mutable {
|
||||
auto rpc_handler = rpc()->make_client<rpc::source<int32_t> (utils::UUID, utils::UUID, utils::UUID, uint64_t, streaming::stream_reason, rpc::sink<frozen_mutation_fragment, streaming::stream_mutation_fragments_cmd>)>(messaging_verb::STREAM_MUTATION_FRAGMENTS);
|
||||
return rpc_handler(*rpc_client , plan_id, schema_id, cf_id, estimated_partitions, reason, sink).then_wrapped([sink, rpc_client] (future<rpc::source<int32_t>> source) mutable {
|
||||
return (source.failed() ? sink.close() : make_ready_future<>()).then([sink = std::move(sink), source = std::move(source)] () mutable {
|
||||
return make_ready_future<rpc::sink<frozen_mutation_fragment>, rpc::source<int32_t>>(std::move(sink), std::move(source.get0()));
|
||||
return make_ready_future<rpc::sink<frozen_mutation_fragment, streaming::stream_mutation_fragments_cmd>, rpc::source<int32_t>>(std::move(sink), std::move(source.get0()));
|
||||
});
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
void messaging_service::register_stream_mutation_fragments(std::function<future<rpc::sink<int32_t>> (const rpc::client_info& cinfo, UUID plan_id, UUID schema_id, UUID cf_id, uint64_t estimated_partitions, rpc::optional<streaming::stream_reason>, rpc::source<frozen_mutation_fragment> source)>&& func) {
|
||||
void messaging_service::register_stream_mutation_fragments(std::function<future<rpc::sink<int32_t>> (const rpc::client_info& cinfo, UUID plan_id, UUID schema_id, UUID cf_id, uint64_t estimated_partitions, rpc::optional<streaming::stream_reason>, rpc::source<frozen_mutation_fragment, rpc::optional<streaming::stream_mutation_fragments_cmd>> source)>&& func) {
|
||||
register_handler(this, messaging_verb::STREAM_MUTATION_FRAGMENTS, std::move(func));
|
||||
}
|
||||
|
||||
@@ -1077,14 +1080,14 @@ future<> messaging_service::send_repair_put_row_diff(msg_addr id, uint32_t repai
|
||||
}
|
||||
|
||||
// Wrapper for REPAIR_ROW_LEVEL_START
|
||||
void messaging_service::register_repair_row_level_start(std::function<future<> (const rpc::client_info& cinfo, uint32_t repair_meta_id, sstring keyspace_name, sstring cf_name, dht::token_range range, row_level_diff_detect_algorithm algo, uint64_t max_row_buf_size, uint64_t seed, unsigned remote_shard, unsigned remote_shard_count, unsigned remote_ignore_msb, sstring remote_partitioner_name)>&& func) {
|
||||
void messaging_service::register_repair_row_level_start(std::function<future<> (const rpc::client_info& cinfo, uint32_t repair_meta_id, sstring keyspace_name, sstring cf_name, dht::token_range range, row_level_diff_detect_algorithm algo, uint64_t max_row_buf_size, uint64_t seed, unsigned remote_shard, unsigned remote_shard_count, unsigned remote_ignore_msb, sstring remote_partitioner_name, table_schema_version schema_version)>&& func) {
|
||||
register_handler(this, messaging_verb::REPAIR_ROW_LEVEL_START, std::move(func));
|
||||
}
|
||||
void messaging_service::unregister_repair_row_level_start() {
|
||||
_rpc->unregister_handler(messaging_verb::REPAIR_ROW_LEVEL_START);
|
||||
}
|
||||
future<> messaging_service::send_repair_row_level_start(msg_addr id, uint32_t repair_meta_id, sstring keyspace_name, sstring cf_name, dht::token_range range, row_level_diff_detect_algorithm algo, uint64_t max_row_buf_size, uint64_t seed, unsigned remote_shard, unsigned remote_shard_count, unsigned remote_ignore_msb, sstring remote_partitioner_name) {
|
||||
return send_message<void>(this, messaging_verb::REPAIR_ROW_LEVEL_START, std::move(id), repair_meta_id, std::move(keyspace_name), std::move(cf_name), std::move(range), algo, max_row_buf_size, seed, remote_shard, remote_shard_count, remote_ignore_msb, std::move(remote_partitioner_name));
|
||||
future<> messaging_service::send_repair_row_level_start(msg_addr id, uint32_t repair_meta_id, sstring keyspace_name, sstring cf_name, dht::token_range range, row_level_diff_detect_algorithm algo, uint64_t max_row_buf_size, uint64_t seed, unsigned remote_shard, unsigned remote_shard_count, unsigned remote_ignore_msb, sstring remote_partitioner_name, table_schema_version schema_version) {
|
||||
return send_message<void>(this, messaging_verb::REPAIR_ROW_LEVEL_START, std::move(id), repair_meta_id, std::move(keyspace_name), std::move(cf_name), std::move(range), algo, max_row_buf_size, seed, remote_shard, remote_shard_count, remote_ignore_msb, std::move(remote_partitioner_name), std::move(schema_version));
|
||||
}
|
||||
|
||||
// Wrapper for REPAIR_ROW_LEVEL_STOP
|
||||
|
||||
@@ -36,6 +36,7 @@
|
||||
#include "tracing/tracing.hh"
|
||||
#include "digest_algorithm.hh"
|
||||
#include "streaming/stream_reason.hh"
|
||||
#include "streaming/stream_mutation_fragments_cmd.hh"
|
||||
#include "cache_temperature.hh"
|
||||
|
||||
#include <list>
|
||||
@@ -270,9 +271,9 @@ public:
|
||||
|
||||
// Wrapper for STREAM_MUTATION_FRAGMENTS
|
||||
// The receiver of STREAM_MUTATION_FRAGMENTS sends status code to the sender to notify any error on the receiver side. The status code is of type int32_t. 0 means successful, -1 means error, other status code value are reserved for future use.
|
||||
void register_stream_mutation_fragments(std::function<future<rpc::sink<int32_t>> (const rpc::client_info& cinfo, UUID plan_id, UUID schema_id, UUID cf_id, uint64_t estimated_partitions, rpc::optional<streaming::stream_reason> reason_opt, rpc::source<frozen_mutation_fragment> source)>&& func);
|
||||
rpc::sink<int32_t> make_sink_for_stream_mutation_fragments(rpc::source<frozen_mutation_fragment>& source);
|
||||
future<rpc::sink<frozen_mutation_fragment>, rpc::source<int32_t>> make_sink_and_source_for_stream_mutation_fragments(utils::UUID schema_id, utils::UUID plan_id, utils::UUID cf_id, uint64_t estimated_partitions, streaming::stream_reason reason, msg_addr id);
|
||||
void register_stream_mutation_fragments(std::function<future<rpc::sink<int32_t>> (const rpc::client_info& cinfo, UUID plan_id, UUID schema_id, UUID cf_id, uint64_t estimated_partitions, rpc::optional<streaming::stream_reason> reason_opt, rpc::source<frozen_mutation_fragment, rpc::optional<streaming::stream_mutation_fragments_cmd>> source)>&& func);
|
||||
rpc::sink<int32_t> make_sink_for_stream_mutation_fragments(rpc::source<frozen_mutation_fragment, rpc::optional<streaming::stream_mutation_fragments_cmd>>& source);
|
||||
future<rpc::sink<frozen_mutation_fragment, streaming::stream_mutation_fragments_cmd>, rpc::source<int32_t>> make_sink_and_source_for_stream_mutation_fragments(utils::UUID schema_id, utils::UUID plan_id, utils::UUID cf_id, uint64_t estimated_partitions, streaming::stream_reason reason, msg_addr id);
|
||||
|
||||
void register_stream_mutation_done(std::function<future<> (const rpc::client_info& cinfo, UUID plan_id, dht::token_range_vector ranges, UUID cf_id, unsigned dst_cpu_id)>&& func);
|
||||
future<> send_stream_mutation_done(msg_addr id, UUID plan_id, dht::token_range_vector ranges, UUID cf_id, unsigned dst_cpu_id);
|
||||
@@ -311,9 +312,9 @@ public:
|
||||
future<> send_repair_put_row_diff(msg_addr id, uint32_t repair_meta_id, repair_rows_on_wire row_diff);
|
||||
|
||||
// Wrapper for REPAIR_ROW_LEVEL_START
|
||||
void register_repair_row_level_start(std::function<future<> (const rpc::client_info& cinfo, uint32_t repair_meta_id, sstring keyspace_name, sstring cf_name, dht::token_range range, row_level_diff_detect_algorithm algo, uint64_t max_row_buf_size, uint64_t seed, unsigned remote_shard, unsigned remote_shard_count, unsigned remote_ignore_msb, sstring remote_partitioner_name)>&& func);
|
||||
void register_repair_row_level_start(std::function<future<> (const rpc::client_info& cinfo, uint32_t repair_meta_id, sstring keyspace_name, sstring cf_name, dht::token_range range, row_level_diff_detect_algorithm algo, uint64_t max_row_buf_size, uint64_t seed, unsigned remote_shard, unsigned remote_shard_count, unsigned remote_ignore_msb, sstring remote_partitioner_name, table_schema_version schema_version)>&& func);
|
||||
void unregister_repair_row_level_start();
|
||||
future<> send_repair_row_level_start(msg_addr id, uint32_t repair_meta_id, sstring keyspace_name, sstring cf_name, dht::token_range range, row_level_diff_detect_algorithm algo, uint64_t max_row_buf_size, uint64_t seed, unsigned remote_shard, unsigned remote_shard_count, unsigned remote_ignore_msb, sstring remote_partitioner_name);
|
||||
future<> send_repair_row_level_start(msg_addr id, uint32_t repair_meta_id, sstring keyspace_name, sstring cf_name, dht::token_range range, row_level_diff_detect_algorithm algo, uint64_t max_row_buf_size, uint64_t seed, unsigned remote_shard, unsigned remote_shard_count, unsigned remote_ignore_msb, sstring remote_partitioner_name, table_schema_version schema_version);
|
||||
|
||||
// Wrapper for REPAIR_ROW_LEVEL_STOP
|
||||
void register_repair_row_level_stop(std::function<future<> (const rpc::client_info& cinfo, uint32_t repair_meta_id, sstring keyspace_name, sstring cf_name, dht::token_range range)>&& func);
|
||||
|
||||
@@ -39,6 +39,9 @@
|
||||
#include "mutation_cleaner.hh"
|
||||
#include <seastar/core/execution_stage.hh>
|
||||
#include "types/map.hh"
|
||||
#include "utils/exceptions.hh"
|
||||
|
||||
logging::logger mplog("mutation_partition");
|
||||
|
||||
template<bool reversed>
|
||||
struct reversal_traits;
|
||||
@@ -145,7 +148,14 @@ mutation_partition::mutation_partition(const schema& s, const mutation_partition
|
||||
, _static_row(s, column_kind::static_column, x._static_row)
|
||||
, _static_row_continuous(x._static_row_continuous)
|
||||
, _rows()
|
||||
, _row_tombstones(x._row_tombstones) {
|
||||
, _row_tombstones(x._row_tombstones)
|
||||
#ifdef SEASTAR_DEBUG
|
||||
, _schema_version(s.version())
|
||||
#endif
|
||||
{
|
||||
#ifdef SEASTAR_DEBUG
|
||||
assert(x._schema_version == _schema_version);
|
||||
#endif
|
||||
auto cloner = [&s] (const auto& x) {
|
||||
return current_allocator().construct<rows_entry>(s, x);
|
||||
};
|
||||
@@ -158,7 +168,14 @@ mutation_partition::mutation_partition(const mutation_partition& x, const schema
|
||||
, _static_row(schema, column_kind::static_column, x._static_row)
|
||||
, _static_row_continuous(x._static_row_continuous)
|
||||
, _rows()
|
||||
, _row_tombstones(x._row_tombstones, range_tombstone_list::copy_comparator_only()) {
|
||||
, _row_tombstones(x._row_tombstones, range_tombstone_list::copy_comparator_only())
|
||||
#ifdef SEASTAR_DEBUG
|
||||
, _schema_version(schema.version())
|
||||
#endif
|
||||
{
|
||||
#ifdef SEASTAR_DEBUG
|
||||
assert(x._schema_version == _schema_version);
|
||||
#endif
|
||||
try {
|
||||
for(auto&& r : ck_ranges) {
|
||||
for (const rows_entry& e : x.range(schema, r)) {
|
||||
@@ -181,7 +198,13 @@ mutation_partition::mutation_partition(mutation_partition&& x, const schema& sch
|
||||
, _static_row_continuous(x._static_row_continuous)
|
||||
, _rows(std::move(x._rows))
|
||||
, _row_tombstones(std::move(x._row_tombstones))
|
||||
#ifdef SEASTAR_DEBUG
|
||||
, _schema_version(schema.version())
|
||||
#endif
|
||||
{
|
||||
#ifdef SEASTAR_DEBUG
|
||||
assert(x._schema_version == _schema_version);
|
||||
#endif
|
||||
{
|
||||
auto deleter = current_deleter<rows_entry>();
|
||||
auto it = _rows.begin();
|
||||
@@ -221,6 +244,7 @@ mutation_partition::operator=(mutation_partition&& x) noexcept {
|
||||
}
|
||||
|
||||
void mutation_partition::ensure_last_dummy(const schema& s) {
|
||||
check_schema(s);
|
||||
if (_rows.empty() || !_rows.rbegin()->is_last_dummy()) {
|
||||
_rows.insert_before(_rows.end(),
|
||||
*current_allocator().construct<rows_entry>(s, rows_entry::last_dummy_tag(), is_continuous::yes));
|
||||
@@ -277,11 +301,16 @@ void deletable_row::apply(const schema& s, clustering_row cr) {
|
||||
|
||||
void
|
||||
mutation_partition::apply(const schema& s, const mutation_fragment& mf) {
|
||||
check_schema(s);
|
||||
mutation_fragment_applier applier{s, *this};
|
||||
mf.visit(applier);
|
||||
}
|
||||
|
||||
stop_iteration mutation_partition::apply_monotonically(const schema& s, mutation_partition&& p, cache_tracker* tracker, is_preemptible preemptible) {
|
||||
#ifdef SEASTAR_DEBUG
|
||||
assert(s.version() == _schema_version);
|
||||
assert(p._schema_version == _schema_version);
|
||||
#endif
|
||||
_tombstone.apply(p._tombstone);
|
||||
_static_row.apply_monotonically(s, column_kind::static_column, std::move(p._static_row));
|
||||
_static_row_continuous |= p._static_row_continuous;
|
||||
@@ -387,6 +416,7 @@ void mutation_partition::apply_weak(const schema& s, mutation_partition&& p) {
|
||||
|
||||
tombstone
|
||||
mutation_partition::range_tombstone_for_row(const schema& schema, const clustering_key& key) const {
|
||||
check_schema(schema);
|
||||
tombstone t = _tombstone;
|
||||
if (!_row_tombstones.empty()) {
|
||||
auto found = _row_tombstones.search_tombstone_covering(schema, key);
|
||||
@@ -397,6 +427,7 @@ mutation_partition::range_tombstone_for_row(const schema& schema, const clusteri
|
||||
|
||||
row_tombstone
|
||||
mutation_partition::tombstone_for_row(const schema& schema, const clustering_key& key) const {
|
||||
check_schema(schema);
|
||||
row_tombstone t = row_tombstone(range_tombstone_for_row(schema, key));
|
||||
|
||||
auto j = _rows.find(key, rows_entry::compare(schema));
|
||||
@@ -409,6 +440,7 @@ mutation_partition::tombstone_for_row(const schema& schema, const clustering_key
|
||||
|
||||
row_tombstone
|
||||
mutation_partition::tombstone_for_row(const schema& schema, const rows_entry& e) const {
|
||||
check_schema(schema);
|
||||
row_tombstone t = e.row().deleted_at();
|
||||
t.apply(range_tombstone_for_row(schema, e.key()));
|
||||
return t;
|
||||
@@ -416,6 +448,7 @@ mutation_partition::tombstone_for_row(const schema& schema, const rows_entry& e)
|
||||
|
||||
void
|
||||
mutation_partition::apply_row_tombstone(const schema& schema, clustering_key_prefix prefix, tombstone t) {
|
||||
check_schema(schema);
|
||||
assert(!prefix.is_full(schema));
|
||||
auto start = prefix;
|
||||
_row_tombstones.apply(schema, {std::move(start), std::move(prefix), std::move(t)});
|
||||
@@ -423,11 +456,13 @@ mutation_partition::apply_row_tombstone(const schema& schema, clustering_key_pre
|
||||
|
||||
void
|
||||
mutation_partition::apply_row_tombstone(const schema& schema, range_tombstone rt) {
|
||||
check_schema(schema);
|
||||
_row_tombstones.apply(schema, std::move(rt));
|
||||
}
|
||||
|
||||
void
|
||||
mutation_partition::apply_delete(const schema& schema, const clustering_key_prefix& prefix, tombstone t) {
|
||||
check_schema(schema);
|
||||
if (prefix.is_empty(schema)) {
|
||||
apply(t);
|
||||
} else if (prefix.is_full(schema)) {
|
||||
@@ -439,6 +474,7 @@ mutation_partition::apply_delete(const schema& schema, const clustering_key_pref
|
||||
|
||||
void
|
||||
mutation_partition::apply_delete(const schema& schema, range_tombstone rt) {
|
||||
check_schema(schema);
|
||||
if (range_tombstone::is_single_clustering_row_tombstone(schema, rt.start, rt.start_kind, rt.end, rt.end_kind)) {
|
||||
apply_delete(schema, std::move(rt.start), std::move(rt.tomb));
|
||||
return;
|
||||
@@ -448,6 +484,7 @@ mutation_partition::apply_delete(const schema& schema, range_tombstone rt) {
|
||||
|
||||
void
|
||||
mutation_partition::apply_delete(const schema& schema, clustering_key&& prefix, tombstone t) {
|
||||
check_schema(schema);
|
||||
if (prefix.is_empty(schema)) {
|
||||
apply(t);
|
||||
} else if (prefix.is_full(schema)) {
|
||||
@@ -459,6 +496,7 @@ mutation_partition::apply_delete(const schema& schema, clustering_key&& prefix,
|
||||
|
||||
void
|
||||
mutation_partition::apply_delete(const schema& schema, clustering_key_prefix_view prefix, tombstone t) {
|
||||
check_schema(schema);
|
||||
if (prefix.is_empty(schema)) {
|
||||
apply(t);
|
||||
} else if (prefix.is_full(schema)) {
|
||||
@@ -484,6 +522,7 @@ void mutation_partition::insert_row(const schema& s, const clustering_key& key,
|
||||
}
|
||||
|
||||
void mutation_partition::insert_row(const schema& s, const clustering_key& key, const deletable_row& row) {
|
||||
check_schema(s);
|
||||
auto e = alloc_strategy_unique_ptr<rows_entry>(
|
||||
current_allocator().construct<rows_entry>(s, key, row));
|
||||
_rows.insert(_rows.end(), *e, rows_entry::compare(s));
|
||||
@@ -492,6 +531,7 @@ void mutation_partition::insert_row(const schema& s, const clustering_key& key,
|
||||
|
||||
const row*
|
||||
mutation_partition::find_row(const schema& s, const clustering_key& key) const {
|
||||
check_schema(s);
|
||||
auto i = _rows.find(key, rows_entry::compare(s));
|
||||
if (i == _rows.end()) {
|
||||
return nullptr;
|
||||
@@ -501,6 +541,7 @@ mutation_partition::find_row(const schema& s, const clustering_key& key) const {
|
||||
|
||||
deletable_row&
|
||||
mutation_partition::clustered_row(const schema& s, clustering_key&& key) {
|
||||
check_schema(s);
|
||||
auto i = _rows.find(key, rows_entry::compare(s));
|
||||
if (i == _rows.end()) {
|
||||
auto e = alloc_strategy_unique_ptr<rows_entry>(
|
||||
@@ -513,6 +554,7 @@ mutation_partition::clustered_row(const schema& s, clustering_key&& key) {
|
||||
|
||||
deletable_row&
|
||||
mutation_partition::clustered_row(const schema& s, const clustering_key& key) {
|
||||
check_schema(s);
|
||||
auto i = _rows.find(key, rows_entry::compare(s));
|
||||
if (i == _rows.end()) {
|
||||
auto e = alloc_strategy_unique_ptr<rows_entry>(
|
||||
@@ -525,6 +567,7 @@ mutation_partition::clustered_row(const schema& s, const clustering_key& key) {
|
||||
|
||||
deletable_row&
|
||||
mutation_partition::clustered_row(const schema& s, clustering_key_view key) {
|
||||
check_schema(s);
|
||||
auto i = _rows.find(key, rows_entry::compare(s));
|
||||
if (i == _rows.end()) {
|
||||
auto e = alloc_strategy_unique_ptr<rows_entry>(
|
||||
@@ -537,6 +580,7 @@ mutation_partition::clustered_row(const schema& s, clustering_key_view key) {
|
||||
|
||||
deletable_row&
|
||||
mutation_partition::clustered_row(const schema& s, position_in_partition_view pos, is_dummy dummy, is_continuous continuous) {
|
||||
check_schema(s);
|
||||
auto i = _rows.find(pos, rows_entry::compare(s));
|
||||
if (i == _rows.end()) {
|
||||
auto e = alloc_strategy_unique_ptr<rows_entry>(
|
||||
@@ -549,6 +593,7 @@ mutation_partition::clustered_row(const schema& s, position_in_partition_view po
|
||||
|
||||
mutation_partition::rows_type::const_iterator
|
||||
mutation_partition::lower_bound(const schema& schema, const query::clustering_range& r) const {
|
||||
check_schema(schema);
|
||||
if (!r.start()) {
|
||||
return std::cbegin(_rows);
|
||||
}
|
||||
@@ -557,6 +602,7 @@ mutation_partition::lower_bound(const schema& schema, const query::clustering_ra
|
||||
|
||||
mutation_partition::rows_type::const_iterator
|
||||
mutation_partition::upper_bound(const schema& schema, const query::clustering_range& r) const {
|
||||
check_schema(schema);
|
||||
if (!r.end()) {
|
||||
return std::cend(_rows);
|
||||
}
|
||||
@@ -565,6 +611,7 @@ mutation_partition::upper_bound(const schema& schema, const query::clustering_ra
|
||||
|
||||
boost::iterator_range<mutation_partition::rows_type::const_iterator>
|
||||
mutation_partition::range(const schema& schema, const query::clustering_range& r) const {
|
||||
check_schema(schema);
|
||||
return boost::make_iterator_range(lower_bound(schema, r), upper_bound(schema, r));
|
||||
}
|
||||
|
||||
@@ -601,6 +648,7 @@ mutation_partition::upper_bound(const schema& schema, const query::clustering_ra
|
||||
template<typename Func>
|
||||
void mutation_partition::for_each_row(const schema& schema, const query::clustering_range& row_range, bool reversed, Func&& func) const
|
||||
{
|
||||
check_schema(schema);
|
||||
auto r = range(schema, row_range);
|
||||
if (!reversed) {
|
||||
for (const auto& e : r) {
|
||||
@@ -817,6 +865,7 @@ bool has_any_live_data(const schema& s, column_kind kind, const row& cells, tomb
|
||||
|
||||
void
|
||||
mutation_partition::query_compacted(query::result::partition_writer& pw, const schema& s, uint32_t limit) const {
|
||||
check_schema(s);
|
||||
const query::partition_slice& slice = pw.slice();
|
||||
max_timestamp max_ts{pw.last_modified()};
|
||||
|
||||
@@ -1049,6 +1098,10 @@ bool mutation_partition::equal(const schema& s, const mutation_partition& p) con
|
||||
}
|
||||
|
||||
bool mutation_partition::equal(const schema& this_schema, const mutation_partition& p, const schema& p_schema) const {
|
||||
#ifdef SEASTAR_DEBUG
|
||||
assert(_schema_version == this_schema.version());
|
||||
assert(p._schema_version == p_schema.version());
|
||||
#endif
|
||||
if (_tombstone != p._tombstone) {
|
||||
return false;
|
||||
}
|
||||
@@ -1177,6 +1230,9 @@ row::apply_monotonically(const column_definition& column, atomic_cell_or_collect
|
||||
void
|
||||
row::append_cell(column_id id, atomic_cell_or_collection value) {
|
||||
if (_type == storage_type::vector && id < max_vector_size) {
|
||||
if (_storage.vector.v.size() > id) {
|
||||
on_internal_error(mplog, format("Attempted to append cell#{} to row already having {} cells", id, _storage.vector.v.size()));
|
||||
}
|
||||
_storage.vector.v.resize(id);
|
||||
_storage.vector.v.emplace_back(cell_and_hash{std::move(value), cell_hash_opt()});
|
||||
_storage.vector.present.set(id);
|
||||
@@ -1241,6 +1297,7 @@ size_t rows_entry::memory_usage(const schema& s) const {
|
||||
}
|
||||
|
||||
size_t mutation_partition::external_memory_usage(const schema& s) const {
|
||||
check_schema(s);
|
||||
size_t sum = 0;
|
||||
sum += static_row().external_memory_usage(s, column_kind::static_column);
|
||||
for (auto& clr : clustered_rows()) {
|
||||
@@ -1259,6 +1316,7 @@ void mutation_partition::trim_rows(const schema& s,
|
||||
const std::vector<query::clustering_range>& row_ranges,
|
||||
Func&& func)
|
||||
{
|
||||
check_schema(s);
|
||||
static_assert(std::is_same<stop_iteration, std::result_of_t<Func(rows_entry&)>>::value, "Bad func signature");
|
||||
|
||||
stop_iteration stop = stop_iteration::no;
|
||||
@@ -1303,6 +1361,7 @@ uint32_t mutation_partition::do_compact(const schema& s,
|
||||
uint32_t row_limit,
|
||||
can_gc_fn& can_gc)
|
||||
{
|
||||
check_schema(s);
|
||||
assert(row_limit > 0);
|
||||
|
||||
auto gc_before = saturating_subtract(query_time, s.gc_grace_seconds());
|
||||
@@ -1368,12 +1427,14 @@ mutation_partition::compact_for_query(
|
||||
bool reverse,
|
||||
uint32_t row_limit)
|
||||
{
|
||||
check_schema(s);
|
||||
return do_compact(s, query_time, row_ranges, reverse, row_limit, always_gc);
|
||||
}
|
||||
|
||||
void mutation_partition::compact_for_compaction(const schema& s,
|
||||
can_gc_fn& can_gc, gc_clock::time_point compaction_time)
|
||||
{
|
||||
check_schema(s);
|
||||
static const std::vector<query::clustering_range> all_rows = {
|
||||
query::clustering_range::make_open_ended_both_sides()
|
||||
};
|
||||
@@ -1407,11 +1468,13 @@ row::is_live(const schema& s, column_kind kind, tombstone base_tombstone, gc_clo
|
||||
|
||||
bool
|
||||
mutation_partition::is_static_row_live(const schema& s, gc_clock::time_point query_time) const {
|
||||
check_schema(s);
|
||||
return has_any_live_data(s, column_kind::static_column, static_row(), _tombstone, query_time);
|
||||
}
|
||||
|
||||
size_t
|
||||
mutation_partition::live_row_count(const schema& s, gc_clock::time_point query_time) const {
|
||||
check_schema(s);
|
||||
size_t count = 0;
|
||||
|
||||
for (const rows_entry& e : non_dummy_rows()) {
|
||||
@@ -1757,6 +1820,7 @@ row row::difference(const schema& s, column_kind kind, const row& other) const
|
||||
|
||||
mutation_partition mutation_partition::difference(schema_ptr s, const mutation_partition& other) const
|
||||
{
|
||||
check_schema(*s);
|
||||
mutation_partition mp(s);
|
||||
if (_tombstone > other._tombstone) {
|
||||
mp.apply(_tombstone);
|
||||
@@ -1787,6 +1851,7 @@ mutation_partition mutation_partition::difference(schema_ptr s, const mutation_p
|
||||
}
|
||||
|
||||
void mutation_partition::accept(const schema& s, mutation_partition_visitor& v) const {
|
||||
check_schema(s);
|
||||
v.accept_partition_tombstone(_tombstone);
|
||||
_static_row.for_each_cell([&] (column_id id, const atomic_cell_or_collection& cell) {
|
||||
const column_definition& def = s.static_column_at(id);
|
||||
@@ -2200,6 +2265,9 @@ mutation_partition::mutation_partition(mutation_partition::incomplete_tag, const
|
||||
, _static_row_continuous(!s.has_static_columns())
|
||||
, _rows()
|
||||
, _row_tombstones(s)
|
||||
#ifdef SEASTAR_DEBUG
|
||||
, _schema_version(s.version())
|
||||
#endif
|
||||
{
|
||||
_rows.insert_before(_rows.end(),
|
||||
*current_allocator().construct<rows_entry>(s, rows_entry::last_dummy_tag(), is_continuous::no));
|
||||
@@ -2265,6 +2333,7 @@ void mutation_partition::set_continuity(const schema& s, const position_range& p
|
||||
}
|
||||
|
||||
clustering_interval_set mutation_partition::get_continuity(const schema& s, is_continuous cont) const {
|
||||
check_schema(s);
|
||||
clustering_interval_set result;
|
||||
auto i = _rows.begin();
|
||||
auto prev_pos = position_in_partition::before_all_clustered_rows();
|
||||
@@ -2314,6 +2383,7 @@ stop_iteration mutation_partition::clear_gently(cache_tracker* tracker) noexcept
|
||||
|
||||
bool
|
||||
mutation_partition::check_continuity(const schema& s, const position_range& r, is_continuous cont) const {
|
||||
check_schema(s);
|
||||
auto less = rows_entry::compare(s);
|
||||
auto i = _rows.lower_bound(r.start(), less);
|
||||
auto end = _rows.lower_bound(r.end(), less);
|
||||
|
||||
@@ -397,7 +397,7 @@ public:
|
||||
if (is_missing() || _ttl == dead) {
|
||||
return false;
|
||||
}
|
||||
if (_ttl != no_ttl && _expiry < now) {
|
||||
if (_ttl != no_ttl && _expiry <= now) {
|
||||
return false;
|
||||
}
|
||||
return _timestamp > t.timestamp;
|
||||
@@ -407,7 +407,7 @@ public:
|
||||
if (_ttl == dead) {
|
||||
return true;
|
||||
}
|
||||
return _ttl != no_ttl && _expiry < now;
|
||||
return _ttl != no_ttl && _expiry <= now;
|
||||
}
|
||||
// Can be called only when is_live().
|
||||
bool is_expiring() const {
|
||||
@@ -447,7 +447,7 @@ public:
|
||||
_timestamp = api::missing_timestamp;
|
||||
return false;
|
||||
}
|
||||
if (_ttl > no_ttl && _expiry < now) {
|
||||
if (_ttl > no_ttl && _expiry <= now) {
|
||||
_expiry -= _ttl;
|
||||
_ttl = dead;
|
||||
}
|
||||
@@ -940,6 +940,9 @@ private:
|
||||
// Contains only strict prefixes so that we don't have to lookup full keys
|
||||
// in both _row_tombstones and _rows.
|
||||
range_tombstone_list _row_tombstones;
|
||||
#ifdef SEASTAR_DEBUG
|
||||
table_schema_version _schema_version;
|
||||
#endif
|
||||
|
||||
friend class mutation_partition_applier;
|
||||
friend class converting_mutation_partition_applier;
|
||||
@@ -954,10 +957,16 @@ public:
|
||||
mutation_partition(schema_ptr s)
|
||||
: _rows()
|
||||
, _row_tombstones(*s)
|
||||
#ifdef SEASTAR_DEBUG
|
||||
, _schema_version(s->version())
|
||||
#endif
|
||||
{ }
|
||||
mutation_partition(mutation_partition& other, copy_comparators_only)
|
||||
: _rows()
|
||||
, _row_tombstones(other._row_tombstones, range_tombstone_list::copy_comparator_only())
|
||||
#ifdef SEASTAR_DEBUG
|
||||
, _schema_version(other._schema_version)
|
||||
#endif
|
||||
{ }
|
||||
mutation_partition(mutation_partition&&) = default;
|
||||
mutation_partition(const schema& s, const mutation_partition&);
|
||||
@@ -1181,6 +1190,12 @@ private:
|
||||
template<typename Func>
|
||||
void for_each_row(const schema& schema, const query::clustering_range& row_range, bool reversed, Func&& func) const;
|
||||
friend class counter_write_query_result_builder;
|
||||
|
||||
void check_schema(const schema& s) const {
|
||||
#ifdef SEASTAR_DEBUG
|
||||
assert(s.version() == _schema_version);
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
inline
|
||||
|
||||
@@ -31,7 +31,7 @@ reconcilable_result::reconcilable_result()
|
||||
: _row_count(0)
|
||||
{ }
|
||||
|
||||
reconcilable_result::reconcilable_result(uint32_t row_count, std::vector<partition> p, query::short_read short_read,
|
||||
reconcilable_result::reconcilable_result(uint32_t row_count, utils::chunked_vector<partition> p, query::short_read short_read,
|
||||
query::result_memory_tracker memory_tracker)
|
||||
: _row_count(row_count)
|
||||
, _short_read(short_read)
|
||||
@@ -39,11 +39,11 @@ reconcilable_result::reconcilable_result(uint32_t row_count, std::vector<partiti
|
||||
, _partitions(std::move(p))
|
||||
{ }
|
||||
|
||||
const std::vector<partition>& reconcilable_result::partitions() const {
|
||||
const utils::chunked_vector<partition>& reconcilable_result::partitions() const {
|
||||
return _partitions;
|
||||
}
|
||||
|
||||
std::vector<partition>& reconcilable_result::partitions() {
|
||||
utils::chunked_vector<partition>& reconcilable_result::partitions() {
|
||||
return _partitions;
|
||||
}
|
||||
|
||||
|
||||
@@ -27,6 +27,7 @@
|
||||
#include "frozen_mutation.hh"
|
||||
#include "db/timeout_clock.hh"
|
||||
#include "querier.hh"
|
||||
#include "utils/chunked_vector.hh"
|
||||
#include <seastar/core/execution_stage.hh>
|
||||
|
||||
class reconcilable_result;
|
||||
@@ -72,17 +73,17 @@ class reconcilable_result {
|
||||
uint32_t _row_count;
|
||||
query::short_read _short_read;
|
||||
query::result_memory_tracker _memory_tracker;
|
||||
std::vector<partition> _partitions;
|
||||
utils::chunked_vector<partition> _partitions;
|
||||
public:
|
||||
~reconcilable_result();
|
||||
reconcilable_result();
|
||||
reconcilable_result(reconcilable_result&&) = default;
|
||||
reconcilable_result& operator=(reconcilable_result&&) = default;
|
||||
reconcilable_result(uint32_t row_count, std::vector<partition> partitions, query::short_read short_read,
|
||||
reconcilable_result(uint32_t row_count, utils::chunked_vector<partition> partitions, query::short_read short_read,
|
||||
query::result_memory_tracker memory_tracker = { });
|
||||
|
||||
const std::vector<partition>& partitions() const;
|
||||
std::vector<partition>& partitions();
|
||||
const utils::chunked_vector<partition>& partitions() const;
|
||||
utils::chunked_vector<partition>& partitions();
|
||||
|
||||
uint32_t row_count() const {
|
||||
return _row_count;
|
||||
@@ -112,7 +113,7 @@ class reconcilable_result_builder {
|
||||
const schema& _schema;
|
||||
const query::partition_slice& _slice;
|
||||
|
||||
std::vector<partition> _result;
|
||||
utils::chunked_vector<partition> _result;
|
||||
uint32_t _live_rows{};
|
||||
|
||||
bool _has_ck_selector{};
|
||||
|
||||
@@ -910,9 +910,10 @@ class shard_reader : public enable_lw_shared_from_this<shard_reader>, public fla
|
||||
bool _reader_created = false;
|
||||
bool _drop_partition_start = false;
|
||||
bool _drop_static_row = false;
|
||||
position_in_partition::tri_compare _tri_cmp;
|
||||
|
||||
std::optional<dht::decorated_key> _last_pkey;
|
||||
std::optional<position_in_partition> _last_position_in_partition;
|
||||
position_in_partition _next_position_in_partition = position_in_partition::for_partition_start();
|
||||
// These are used when the reader has to be recreated (after having been
|
||||
// evicted while paused) and the range and/or slice it is recreated with
|
||||
// differs from the original ones.
|
||||
@@ -920,13 +921,13 @@ class shard_reader : public enable_lw_shared_from_this<shard_reader>, public fla
|
||||
std::optional<query::partition_slice> _slice_override;
|
||||
|
||||
private:
|
||||
void update_last_position(const circular_buffer<mutation_fragment>& buffer);
|
||||
void update_next_position(flat_mutation_reader& reader, circular_buffer<mutation_fragment>& buffer);
|
||||
void adjust_partition_slice();
|
||||
flat_mutation_reader recreate_reader();
|
||||
flat_mutation_reader resume_or_create_reader();
|
||||
bool should_drop_fragment(const mutation_fragment& mf);
|
||||
future<> do_fill_buffer(flat_mutation_reader& reader, db::timeout_clock::time_point timeout);
|
||||
future<> ensure_buffer_contains_all_fragments_for_last_pos(flat_mutation_reader& reader, circular_buffer<mutation_fragment>& buffer,
|
||||
db::timeout_clock::time_point timeout);
|
||||
future<> fill_buffer(flat_mutation_reader& reader, circular_buffer<mutation_fragment>& buffer, db::timeout_clock::time_point timeout);
|
||||
|
||||
public:
|
||||
remote_reader(
|
||||
@@ -1024,7 +1025,7 @@ void shard_reader::stop() noexcept {
|
||||
}).finally([zis = shared_from_this()] {}));
|
||||
}
|
||||
|
||||
void shard_reader::remote_reader::update_last_position(const circular_buffer<mutation_fragment>& buffer) {
|
||||
void shard_reader::remote_reader::update_next_position(flat_mutation_reader& reader, circular_buffer<mutation_fragment>& buffer) {
|
||||
if (buffer.empty()) {
|
||||
return;
|
||||
}
|
||||
@@ -1035,7 +1036,31 @@ void shard_reader::remote_reader::update_last_position(const circular_buffer<mut
|
||||
_last_pkey = pk_it->as_partition_start().key();
|
||||
}
|
||||
|
||||
_last_position_in_partition.emplace(buffer.back().position());
|
||||
const auto last_pos = buffer.back().position();
|
||||
switch (last_pos.region()) {
|
||||
case partition_region::partition_start:
|
||||
_next_position_in_partition = position_in_partition::for_static_row();
|
||||
break;
|
||||
case partition_region::static_row:
|
||||
_next_position_in_partition = position_in_partition::before_all_clustered_rows();
|
||||
break;
|
||||
case partition_region::clustered:
|
||||
if (reader.is_buffer_empty()) {
|
||||
_next_position_in_partition = position_in_partition::after_key(last_pos);
|
||||
} else {
|
||||
const auto& next_frag = reader.peek_buffer();
|
||||
if (next_frag.is_end_of_partition()) {
|
||||
buffer.emplace_back(reader.pop_mutation_fragment());
|
||||
_next_position_in_partition = position_in_partition::for_partition_start();
|
||||
} else {
|
||||
_next_position_in_partition = position_in_partition(next_frag.position());
|
||||
}
|
||||
}
|
||||
break;
|
||||
case partition_region::partition_end:
|
||||
_next_position_in_partition = position_in_partition::for_partition_start();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void shard_reader::remote_reader::adjust_partition_slice() {
|
||||
@@ -1043,9 +1068,8 @@ void shard_reader::remote_reader::adjust_partition_slice() {
|
||||
_slice_override = _ps;
|
||||
}
|
||||
|
||||
auto& last_ckey = _last_position_in_partition->key();
|
||||
auto ranges = _slice_override->default_row_ranges();
|
||||
query::trim_clustering_row_ranges_to(*_schema, ranges, last_ckey);
|
||||
query::trim_clustering_row_ranges_to(*_schema, ranges, _next_position_in_partition);
|
||||
|
||||
_slice_override->clear_ranges();
|
||||
_slice_override->set_range(*_schema, _last_pkey->key(), std::move(ranges));
|
||||
@@ -1058,25 +1082,22 @@ flat_mutation_reader shard_reader::remote_reader::recreate_reader() {
|
||||
if (_last_pkey) {
|
||||
bool partition_range_is_inclusive = true;
|
||||
|
||||
if (_last_position_in_partition) {
|
||||
switch (_last_position_in_partition->region()) {
|
||||
case partition_region::partition_start:
|
||||
_drop_partition_start = true;
|
||||
break;
|
||||
case partition_region::static_row:
|
||||
_drop_partition_start = true;
|
||||
_drop_static_row = true;
|
||||
break;
|
||||
case partition_region::clustered:
|
||||
_drop_partition_start = true;
|
||||
_drop_static_row = true;
|
||||
adjust_partition_slice();
|
||||
slice = &*_slice_override;
|
||||
break;
|
||||
case partition_region::partition_end:
|
||||
partition_range_is_inclusive = false;
|
||||
break;
|
||||
}
|
||||
switch (_next_position_in_partition.region()) {
|
||||
case partition_region::partition_start:
|
||||
partition_range_is_inclusive = false;
|
||||
break;
|
||||
case partition_region::static_row:
|
||||
_drop_partition_start = true;
|
||||
break;
|
||||
case partition_region::clustered:
|
||||
_drop_partition_start = true;
|
||||
_drop_static_row = true;
|
||||
adjust_partition_slice();
|
||||
slice = &*_slice_override;
|
||||
break;
|
||||
case partition_region::partition_end:
|
||||
partition_range_is_inclusive = false;
|
||||
break;
|
||||
}
|
||||
|
||||
// The original range contained a single partition and we've read it
|
||||
@@ -1115,62 +1136,83 @@ flat_mutation_reader shard_reader::remote_reader::resume_or_create_reader() {
|
||||
return recreate_reader();
|
||||
}
|
||||
|
||||
bool shard_reader::remote_reader::should_drop_fragment(const mutation_fragment& mf) {
|
||||
if (_drop_partition_start && mf.is_partition_start()) {
|
||||
_drop_partition_start = false;
|
||||
return true;
|
||||
}
|
||||
if (_drop_static_row && mf.is_static_row()) {
|
||||
_drop_static_row = false;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
future<> shard_reader::remote_reader::do_fill_buffer(flat_mutation_reader& reader, db::timeout_clock::time_point timeout) {
|
||||
if (!_drop_partition_start && !_drop_static_row) {
|
||||
return reader.fill_buffer(timeout);
|
||||
}
|
||||
return repeat([this, &reader, timeout] {
|
||||
return reader.fill_buffer(timeout).then([this, &reader] {
|
||||
const auto eos = reader.is_end_of_stream();
|
||||
|
||||
if (reader.is_buffer_empty()) {
|
||||
return stop_iteration(eos);
|
||||
while (!reader.is_buffer_empty() && should_drop_fragment(reader.peek_buffer())) {
|
||||
reader.pop_mutation_fragment();
|
||||
}
|
||||
if (_drop_partition_start) {
|
||||
_drop_partition_start = false;
|
||||
if (reader.peek_buffer().is_partition_start()) {
|
||||
reader.pop_mutation_fragment();
|
||||
}
|
||||
}
|
||||
|
||||
if (reader.is_buffer_empty()) {
|
||||
return stop_iteration(eos);
|
||||
}
|
||||
if (_drop_static_row) {
|
||||
_drop_static_row = false;
|
||||
if (reader.peek_buffer().is_static_row()) {
|
||||
reader.pop_mutation_fragment();
|
||||
}
|
||||
}
|
||||
|
||||
return stop_iteration(reader.is_buffer_full() || eos);
|
||||
return stop_iteration(reader.is_buffer_full() || reader.is_end_of_stream());
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
future<> shard_reader::remote_reader::ensure_buffer_contains_all_fragments_for_last_pos(flat_mutation_reader& reader,
|
||||
circular_buffer<mutation_fragment>& buffer, db::timeout_clock::time_point timeout) {
|
||||
if (buffer.empty() || !buffer.back().is_range_tombstone()) {
|
||||
return make_ready_future<>();
|
||||
}
|
||||
|
||||
auto stop = [this, &reader, &buffer] {
|
||||
future<> shard_reader::remote_reader::fill_buffer(flat_mutation_reader& reader, circular_buffer<mutation_fragment>& buffer,
|
||||
db::timeout_clock::time_point timeout) {
|
||||
return do_fill_buffer(reader, timeout).then([this, &reader, &buffer, timeout] {
|
||||
if (reader.is_buffer_empty()) {
|
||||
return reader.is_end_of_stream();
|
||||
return make_ready_future<>();
|
||||
}
|
||||
const auto& next_pos = reader.peek_buffer().position();
|
||||
if (next_pos.region() != partition_region::clustered) {
|
||||
return true;
|
||||
}
|
||||
return !next_pos.key().equal(*_schema, buffer.back().position().key());
|
||||
};
|
||||
|
||||
return do_until(stop, [this, &reader, &buffer, timeout] {
|
||||
if (reader.is_buffer_empty()) {
|
||||
return do_fill_buffer(reader, timeout);
|
||||
}
|
||||
buffer.emplace_back(reader.pop_mutation_fragment());
|
||||
return make_ready_future<>();
|
||||
buffer = reader.detach_buffer();
|
||||
auto stop = [this, &reader, &buffer] {
|
||||
// The only problematic fragment kind is the range tombstone.
|
||||
// All other fragment kinds are safe to end the buffer on, and
|
||||
// are guaranteed to represent progress vs. the last buffer fill.
|
||||
if (!buffer.back().is_range_tombstone()) {
|
||||
return true;
|
||||
}
|
||||
if (reader.is_buffer_empty()) {
|
||||
return reader.is_end_of_stream();
|
||||
}
|
||||
const auto& next_pos = reader.peek_buffer().position();
|
||||
// To ensure safe progress we have to ensure the following:
|
||||
//
|
||||
// _next_position_in_partition < buffer.back().position() < next_pos
|
||||
//
|
||||
// * The first condition is to ensure we made progress since the
|
||||
// last buffer fill. Otherwise we might get into an endless loop if
|
||||
// the reader is recreated after each `fill_buffer()` call.
|
||||
// * The second condition is to ensure we have seen all fragments
|
||||
// with the same position. Otherwise we might jump over those
|
||||
// remaining fragments with the same position as the last
|
||||
// fragment's in the buffer when the reader is recreated.
|
||||
return _tri_cmp(_next_position_in_partition, buffer.back().position()) < 0 && _tri_cmp(buffer.back().position(), next_pos) < 0;
|
||||
};
|
||||
// Read additional fragments until it is safe to stop, if needed.
|
||||
// We have to ensure we stop at a fragment such that if the reader is
|
||||
// evicted and recreated later, we won't be skipping any fragments.
|
||||
// Practically, range tombstones are the only ones that are
|
||||
// problematic to end the buffer on. This is due to the fact range
|
||||
// tombstones can have the same position that multiple following range
|
||||
// tombstones, or a single following clustering row in the stream has.
|
||||
// When a range tombstone is the last in the buffer, we have to continue
|
||||
// to read until we are sure we've read all fragments sharing the same
|
||||
// position, so that we can safely continue reading from after said
|
||||
// position.
|
||||
return do_until(stop, [this, &reader, &buffer, timeout] {
|
||||
if (reader.is_buffer_empty()) {
|
||||
return do_fill_buffer(reader, timeout);
|
||||
}
|
||||
buffer.emplace_back(reader.pop_mutation_fragment());
|
||||
return make_ready_future<>();
|
||||
});
|
||||
}).then([this, &reader, &buffer] {
|
||||
update_next_position(reader, buffer);
|
||||
});
|
||||
}
|
||||
|
||||
@@ -1188,7 +1230,8 @@ shard_reader::remote_reader::remote_reader(
|
||||
, _ps(ps)
|
||||
, _pc(pc)
|
||||
, _trace_state(std::move(trace_state))
|
||||
, _fwd_mr(fwd_mr) {
|
||||
, _fwd_mr(fwd_mr)
|
||||
, _tri_cmp(*_schema) {
|
||||
}
|
||||
|
||||
future<shard_reader::fill_buffer_result> shard_reader::remote_reader::fill_buffer(const dht::partition_range& pr, bool pending_next_partition,
|
||||
@@ -1196,7 +1239,7 @@ future<shard_reader::fill_buffer_result> shard_reader::remote_reader::fill_buffe
|
||||
// We could have missed a `fast_forward_to()` if the reader wasn't created yet.
|
||||
_pr = ≺
|
||||
if (pending_next_partition) {
|
||||
_last_position_in_partition = position_in_partition(position_in_partition::end_of_partition_tag_t{});
|
||||
_next_position_in_partition = position_in_partition::for_partition_start();
|
||||
}
|
||||
return do_with(resume_or_create_reader(), circular_buffer<mutation_fragment>{},
|
||||
[this, pending_next_partition, timeout] (flat_mutation_reader& reader, circular_buffer<mutation_fragment>& buffer) mutable {
|
||||
@@ -1204,22 +1247,8 @@ future<shard_reader::fill_buffer_result> shard_reader::remote_reader::fill_buffe
|
||||
reader.next_partition();
|
||||
}
|
||||
|
||||
return do_fill_buffer(reader, timeout).then([this, &reader, &buffer, timeout] {
|
||||
buffer = reader.detach_buffer();
|
||||
// When the reader is recreated (after having been evicted) we
|
||||
// recreate it such that it starts reading from *after* the last
|
||||
// seen fragment's position. If the last seen fragment is a range
|
||||
// tombstone it is *not* guaranteed that the next fragments in the
|
||||
// data stream have positions strictly greater than the range
|
||||
// tombstone's. If the reader is evicted and has to be recreated,
|
||||
// these fragments would be then skipped as the read would continue
|
||||
// after their position.
|
||||
// To avoid this ensure that the buffer contains *all* fragments for
|
||||
// the last seen position.
|
||||
return ensure_buffer_contains_all_fragments_for_last_pos(reader, buffer, timeout);
|
||||
}).then([this, &reader, &buffer] {
|
||||
return fill_buffer(reader, buffer, timeout).then([this, &reader, &buffer] {
|
||||
const auto eos = reader.is_end_of_stream() && reader.is_buffer_empty();
|
||||
update_last_position(buffer);
|
||||
_irh = _lifecycle_policy.pause(std::move(reader));
|
||||
return fill_buffer_result(std::move(buffer), eos);
|
||||
});
|
||||
@@ -1229,7 +1258,7 @@ future<shard_reader::fill_buffer_result> shard_reader::remote_reader::fill_buffe
|
||||
future<> shard_reader::remote_reader::fast_forward_to(const dht::partition_range& pr, db::timeout_clock::time_point timeout) {
|
||||
_pr = ≺
|
||||
_last_pkey.reset();
|
||||
_last_position_in_partition.reset();
|
||||
_next_position_in_partition = position_in_partition::for_partition_start();
|
||||
|
||||
if (!_reader_created || !_irh) {
|
||||
return make_ready_future<>();
|
||||
|
||||
@@ -172,6 +172,9 @@ tombstone partition_entry::partition_tombstone() const {
|
||||
|
||||
partition_snapshot::~partition_snapshot() {
|
||||
with_allocator(region().allocator(), [this] {
|
||||
if (_locked) {
|
||||
touch();
|
||||
}
|
||||
if (_version && _version.is_unique_owner()) {
|
||||
auto v = &*_version;
|
||||
_version = {};
|
||||
@@ -268,6 +271,7 @@ partition_entry::~partition_entry() {
|
||||
return;
|
||||
}
|
||||
if (_snapshot) {
|
||||
assert(!_snapshot->is_locked());
|
||||
_snapshot->_version = std::move(_version);
|
||||
_snapshot->_version.mark_as_unique_owner();
|
||||
_snapshot->_entry = nullptr;
|
||||
@@ -284,6 +288,7 @@ stop_iteration partition_entry::clear_gently(cache_tracker* tracker) noexcept {
|
||||
}
|
||||
|
||||
if (_snapshot) {
|
||||
assert(!_snapshot->is_locked());
|
||||
_snapshot->_version = std::move(_version);
|
||||
_snapshot->_version.mark_as_unique_owner();
|
||||
_snapshot->_entry = nullptr;
|
||||
@@ -311,6 +316,7 @@ stop_iteration partition_entry::clear_gently(cache_tracker* tracker) noexcept {
|
||||
void partition_entry::set_version(partition_version* new_version)
|
||||
{
|
||||
if (_snapshot) {
|
||||
assert(!_snapshot->is_locked());
|
||||
_snapshot->_version = std::move(_version);
|
||||
_snapshot->_entry = nullptr;
|
||||
}
|
||||
@@ -338,7 +344,7 @@ partition_version& partition_entry::add_version(const schema& s, cache_tracker*
|
||||
|
||||
void partition_entry::apply(const schema& s, const mutation_partition& mp, const schema& mp_schema)
|
||||
{
|
||||
apply(s, mutation_partition(s, mp), mp_schema);
|
||||
apply(s, mutation_partition(mp_schema, mp), mp_schema);
|
||||
}
|
||||
|
||||
void partition_entry::apply(const schema& s, mutation_partition&& mp, const schema& mp_schema)
|
||||
@@ -459,7 +465,6 @@ public:
|
||||
|
||||
coroutine partition_entry::apply_to_incomplete(const schema& s,
|
||||
partition_entry&& pe,
|
||||
const schema& pe_schema,
|
||||
mutation_cleaner& pe_cleaner,
|
||||
logalloc::allocating_section& alloc,
|
||||
logalloc::region& reg,
|
||||
@@ -479,10 +484,6 @@ coroutine partition_entry::apply_to_incomplete(const schema& s,
|
||||
// partitions where I saw 40% slow down.
|
||||
const bool preemptible = s.clustering_key_size() > 0;
|
||||
|
||||
if (s.version() != pe_schema.version()) {
|
||||
pe.upgrade(pe_schema.shared_from_this(), s.shared_from_this(), pe_cleaner, no_cache_tracker);
|
||||
}
|
||||
|
||||
// When preemptible, later memtable reads could start using the snapshot before
|
||||
// snapshot's writes are made visible in cache, which would cause them to miss those writes.
|
||||
// So we cannot allow erasing when preemptible.
|
||||
@@ -496,6 +497,7 @@ coroutine partition_entry::apply_to_incomplete(const schema& s,
|
||||
prev_snp = read(reg, tracker.cleaner(), s.shared_from_this(), &tracker, phase - 1);
|
||||
}
|
||||
auto dst_snp = read(reg, tracker.cleaner(), s.shared_from_this(), &tracker, phase);
|
||||
dst_snp->lock();
|
||||
|
||||
// Once we start updating the partition, we must keep all snapshots until the update completes,
|
||||
// otherwise partial writes would be published. So the scope of snapshots must enclose the scope
|
||||
@@ -570,6 +572,7 @@ coroutine partition_entry::apply_to_incomplete(const schema& s,
|
||||
auto has_next = src_cur.erase_and_advance();
|
||||
acc.unpin_memory(size);
|
||||
if (!has_next) {
|
||||
dst_snp->unlock();
|
||||
return stop_iteration::yes;
|
||||
}
|
||||
} while (!preemptible || !need_preempt());
|
||||
@@ -661,6 +664,18 @@ partition_snapshot::range_tombstones()
|
||||
position_in_partition_view::after_all_clustered_rows());
|
||||
}
|
||||
|
||||
void partition_snapshot::touch() noexcept {
|
||||
// Eviction assumes that older versions are evicted before newer so only the latest snapshot
|
||||
// can be touched.
|
||||
if (_tracker && at_latest_version()) {
|
||||
auto&& rows = version()->partition().clustered_rows();
|
||||
assert(!rows.empty());
|
||||
rows_entry& last_dummy = *rows.rbegin();
|
||||
assert(last_dummy.is_last_dummy());
|
||||
_tracker->touch(last_dummy);
|
||||
}
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream& out, const partition_entry::printer& p) {
|
||||
auto& e = p._partition_entry;
|
||||
out << "{";
|
||||
@@ -688,6 +703,7 @@ void partition_entry::evict(mutation_cleaner& cleaner) noexcept {
|
||||
return;
|
||||
}
|
||||
if (_snapshot) {
|
||||
assert(!_snapshot->is_locked());
|
||||
_snapshot->_version = std::move(_version);
|
||||
_snapshot->_version.mark_as_unique_owner();
|
||||
_snapshot->_entry = nullptr;
|
||||
@@ -707,3 +723,18 @@ partition_snapshot_ptr::~partition_snapshot_ptr() {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void partition_snapshot::lock() noexcept {
|
||||
// partition_entry::is_locked() assumes that if there is a locked snapshot,
|
||||
// it can be found attached directly to it.
|
||||
assert(at_latest_version());
|
||||
_locked = true;
|
||||
}
|
||||
|
||||
void partition_snapshot::unlock() noexcept {
|
||||
// Locked snapshots must always be latest, is_locked() assumes that.
|
||||
// Also, touch() is only effective when this snapshot is latest.
|
||||
assert(at_latest_version());
|
||||
_locked = false;
|
||||
touch(); // Make the entry evictable again in case it was fully unlinked by eviction attempt.
|
||||
}
|
||||
|
||||
@@ -303,6 +303,7 @@ private:
|
||||
mutation_cleaner* _cleaner;
|
||||
cache_tracker* _tracker;
|
||||
boost::intrusive::slist_member_hook<> _cleaner_hook;
|
||||
bool _locked = false;
|
||||
friend class partition_entry;
|
||||
friend class mutation_cleaner_impl;
|
||||
public:
|
||||
@@ -318,6 +319,22 @@ public:
|
||||
partition_snapshot& operator=(const partition_snapshot&) = delete;
|
||||
partition_snapshot& operator=(partition_snapshot&&) = delete;
|
||||
|
||||
// Makes the snapshot locked.
|
||||
// See is_locked() for meaning.
|
||||
// Can be called only when at_lastest_version(). The snapshot must remain latest as long as it's locked.
|
||||
void lock() noexcept;
|
||||
|
||||
// Makes the snapshot no longer locked.
|
||||
// See is_locked() for meaning.
|
||||
void unlock() noexcept;
|
||||
|
||||
// Tells whether the snapshot is locked.
|
||||
// Locking the snapshot prevents it from getting detached from the partition entry.
|
||||
// It also prevents the partition entry from being evicted.
|
||||
bool is_locked() const {
|
||||
return _locked;
|
||||
}
|
||||
|
||||
static partition_snapshot& container_of(partition_version_ref* ref) {
|
||||
return *boost::intrusive::get_parent_from_member(ref, &partition_snapshot::_version);
|
||||
}
|
||||
@@ -344,6 +361,9 @@ public:
|
||||
// to the latest version.
|
||||
stop_iteration slide_to_oldest() noexcept;
|
||||
|
||||
// Brings the snapshot to the front of the LRU.
|
||||
void touch() noexcept;
|
||||
|
||||
// Must be called after snapshot's original region is merged into a different region
|
||||
// before the original region is destroyed, unless the snapshot is destroyed earlier.
|
||||
void migrate(logalloc::region* region, mutation_cleaner* cleaner) noexcept {
|
||||
@@ -503,9 +523,18 @@ public:
|
||||
return _version->all_elements_reversed();
|
||||
}
|
||||
|
||||
// Tells whether this entry is locked.
|
||||
// Locked entries are undergoing an update and should not have their snapshots
|
||||
// detached from the entry.
|
||||
// Certain methods can only be called when !is_locked().
|
||||
bool is_locked() const {
|
||||
return _snapshot && _snapshot->is_locked();
|
||||
}
|
||||
|
||||
// Strong exception guarantees.
|
||||
// Assumes this instance and mp are fully continuous.
|
||||
// Use only on non-evictable entries.
|
||||
// Must not be called when is_locked().
|
||||
void apply(const schema& s, const mutation_partition& mp, const schema& mp_schema);
|
||||
void apply(const schema& s, mutation_partition&& mp, const schema& mp_schema);
|
||||
|
||||
@@ -526,11 +555,14 @@ public:
|
||||
// such that if the operation is retried (possibly many times) and eventually
|
||||
// succeeds the result will be as if the first attempt didn't fail.
|
||||
//
|
||||
// The schema of pe must conform to s.
|
||||
//
|
||||
// Returns a coroutine object representing the operation.
|
||||
// The coroutine must be resumed with the region being unlocked.
|
||||
//
|
||||
// The coroutine cannot run concurrently with other apply() calls.
|
||||
coroutine apply_to_incomplete(const schema& s,
|
||||
partition_entry&& pe,
|
||||
const schema& pe_schema,
|
||||
mutation_cleaner& pe_cleaner,
|
||||
logalloc::allocating_section&,
|
||||
logalloc::region&,
|
||||
@@ -539,6 +571,7 @@ public:
|
||||
real_dirty_memory_accounter&);
|
||||
|
||||
// If this entry is evictable, cache_tracker must be provided.
|
||||
// Must not be called when is_locked().
|
||||
partition_version& add_version(const schema& s, cache_tracker*);
|
||||
|
||||
// Returns a reference to existing version with an active snapshot of given phase
|
||||
@@ -568,9 +601,11 @@ public:
|
||||
tombstone partition_tombstone() const;
|
||||
|
||||
// needs to be called with reclaiming disabled
|
||||
// Must not be called when is_locked().
|
||||
void upgrade(schema_ptr from, schema_ptr to, mutation_cleaner&, cache_tracker*);
|
||||
|
||||
// Snapshots with different values of phase will point to different partition_version objects.
|
||||
// When is_locked(), read() can only be called with a phase which is <= the phase of the current snapshot.
|
||||
partition_snapshot_ptr read(logalloc::region& region,
|
||||
mutation_cleaner&,
|
||||
schema_ptr entry_schema,
|
||||
|
||||
@@ -129,6 +129,8 @@ public:
|
||||
: _type(partition_region::clustered), _ck(&ck) { }
|
||||
position_in_partition_view(range_tag_t, bound_view bv)
|
||||
: _type(partition_region::clustered), _bound_weight(position_weight(bv.kind())), _ck(&bv.prefix()) { }
|
||||
position_in_partition_view(const clustering_key_prefix& ck, bound_weight w)
|
||||
: _type(partition_region::clustered), _bound_weight(w), _ck(&ck) { }
|
||||
|
||||
static position_in_partition_view for_range_start(const query::clustering_range& r) {
|
||||
return {position_in_partition_view::range_tag_t(), bound_view::from_range_start(r)};
|
||||
@@ -159,6 +161,7 @@ public:
|
||||
}
|
||||
|
||||
partition_region region() const { return _type; }
|
||||
bound_weight get_bound_weight() const { return _bound_weight; }
|
||||
bool is_partition_start() const { return _type == partition_region::partition_start; }
|
||||
bool is_partition_end() const { return _type == partition_region::partition_end; }
|
||||
bool is_static_row() const { return _type == partition_region::static_row; }
|
||||
@@ -271,6 +274,10 @@ public:
|
||||
return {clustering_row_tag_t(), std::move(ck)};
|
||||
}
|
||||
|
||||
static position_in_partition for_partition_start() {
|
||||
return position_in_partition{partition_start_tag_t()};
|
||||
}
|
||||
|
||||
static position_in_partition for_static_row() {
|
||||
return position_in_partition{static_row_tag_t()};
|
||||
}
|
||||
|
||||
@@ -286,11 +286,11 @@ static void insert_querier(
|
||||
|
||||
auto& e = entries.emplace_back(key, std::move(q), expires);
|
||||
e.set_pos(--entries.end());
|
||||
++stats.population;
|
||||
|
||||
if (auto irh = sem.register_inactive_read(std::make_unique<querier_inactive_read>(entries, e.pos(), stats))) {
|
||||
e.set_inactive_handle(std::move(irh));
|
||||
index.insert(e);
|
||||
++stats.population;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -31,6 +31,8 @@
|
||||
#include "tracing/tracing.hh"
|
||||
#include "utils/small_vector.hh"
|
||||
|
||||
class position_in_partition_view;
|
||||
|
||||
namespace query {
|
||||
|
||||
using column_id_vector = utils::small_vector<column_id, 8>;
|
||||
@@ -58,10 +60,20 @@ typedef std::vector<clustering_range> clustering_row_ranges;
|
||||
|
||||
/// Trim the clustering ranges.
|
||||
///
|
||||
/// Equivalent of intersecting each range with [key, +inf), or (-inf, key] if
|
||||
/// Equivalent of intersecting each clustering range with [pos, +inf) position
|
||||
/// in partition range, or (-inf, pos] position in partition range if
|
||||
/// reversed == true. Ranges that do not intersect are dropped. Ranges that
|
||||
/// partially overlap are trimmed.
|
||||
/// Result: each range will overlap fully with [key, +inf), or (-int, key] if
|
||||
/// Result: each range will overlap fully with [pos, +inf), or (-int, pos] if
|
||||
/// reversed is true.
|
||||
void trim_clustering_row_ranges_to(const schema& s, clustering_row_ranges& ranges, position_in_partition_view pos, bool reversed = false);
|
||||
|
||||
/// Trim the clustering ranges.
|
||||
///
|
||||
/// Equivalent of intersecting each clustering range with (key, +inf) clustering
|
||||
/// range, or (-inf, key) clustering range if reversed == true. Ranges that do
|
||||
/// not intersect are dropped. Ranges that partially overlap are trimmed.
|
||||
/// Result: each range will overlap fully with (key, +inf), or (-int, key) if
|
||||
/// reversed is true.
|
||||
void trim_clustering_row_ranges_to(const schema& s, clustering_row_ranges& ranges, const clustering_key& key, bool reversed = false);
|
||||
|
||||
|
||||
32
query.cc
32
query.cc
@@ -71,34 +71,38 @@ std::ostream& operator<<(std::ostream& out, const specific_ranges& s) {
|
||||
return out << "{" << s._pk << " : " << join(", ", s._ranges) << "}";
|
||||
}
|
||||
|
||||
void trim_clustering_row_ranges_to(const schema& s, clustering_row_ranges& ranges, const clustering_key& key, bool reversed) {
|
||||
auto cmp = [reversed, bv_cmp = bound_view::compare(s)] (const auto& a, const auto& b) {
|
||||
return reversed ? bv_cmp(b, a) : bv_cmp(a, b);
|
||||
void trim_clustering_row_ranges_to(const schema& s, clustering_row_ranges& ranges, position_in_partition_view pos, bool reversed) {
|
||||
auto cmp = [reversed, cmp = position_in_partition::composite_tri_compare(s)] (const auto& a, const auto& b) {
|
||||
return reversed ? cmp(b, a) : cmp(a, b);
|
||||
};
|
||||
auto start_bound = [reversed] (const auto& range) -> const bound_view& {
|
||||
return reversed ? range.second : range.first;
|
||||
auto start_bound = [reversed] (const auto& range) -> position_in_partition_view {
|
||||
return reversed ? position_in_partition_view::for_range_end(range) : position_in_partition_view::for_range_start(range);
|
||||
};
|
||||
auto end_bound = [reversed] (const auto& range) -> const bound_view& {
|
||||
return reversed ? range.first : range.second;
|
||||
auto end_bound = [reversed] (const auto& range) -> position_in_partition_view {
|
||||
return reversed ? position_in_partition_view::for_range_start(range) : position_in_partition_view::for_range_end(range);
|
||||
};
|
||||
clustering_key_prefix::equality eq(s);
|
||||
|
||||
auto it = ranges.begin();
|
||||
while (it != ranges.end()) {
|
||||
auto range = bound_view::from_range(*it);
|
||||
if (cmp(end_bound(range), key) || eq(end_bound(range).prefix(), key)) {
|
||||
if (cmp(end_bound(*it), pos) <= 0) {
|
||||
it = ranges.erase(it);
|
||||
continue;
|
||||
} else if (cmp(start_bound(range), key)) {
|
||||
assert(cmp(key, end_bound(range)));
|
||||
auto r = reversed ? clustering_range(it->start(), clustering_range::bound { key, false })
|
||||
: clustering_range(clustering_range::bound { key, false }, it->end());
|
||||
} else if (cmp(start_bound(*it), pos) <= 0) {
|
||||
assert(cmp(pos, end_bound(*it)) < 0);
|
||||
auto r = reversed ?
|
||||
clustering_range(it->start(), clustering_range::bound(pos.key(), pos.get_bound_weight() != bound_weight::before_all_prefixed)) :
|
||||
clustering_range(clustering_range::bound(pos.key(), pos.get_bound_weight() != bound_weight::after_all_prefixed), it->end());
|
||||
*it = std::move(r);
|
||||
}
|
||||
++it;
|
||||
}
|
||||
}
|
||||
|
||||
void trim_clustering_row_ranges_to(const schema& s, clustering_row_ranges& ranges, const clustering_key& key, bool reversed) {
|
||||
return trim_clustering_row_ranges_to(s, ranges,
|
||||
position_in_partition_view(key, reversed ? bound_weight::before_all_prefixed : bound_weight::after_all_prefixed), reversed);
|
||||
}
|
||||
|
||||
partition_slice::partition_slice(clustering_row_ranges row_ranges,
|
||||
query::column_id_vector static_columns,
|
||||
query::column_id_vector regular_columns,
|
||||
|
||||
@@ -187,7 +187,7 @@ public:
|
||||
const dht::decorated_key& key() const { return *_key; }
|
||||
void on_underlying_created() { ++_underlying_created; }
|
||||
bool digest_requested() const { return _slice.options.contains<query::partition_slice::option::with_digest>(); }
|
||||
private:
|
||||
public:
|
||||
future<> ensure_underlying(db::timeout_clock::time_point timeout) {
|
||||
if (_underlying_snapshot) {
|
||||
return create_underlying(true, timeout);
|
||||
@@ -206,18 +206,6 @@ public:
|
||||
_underlying_snapshot = {};
|
||||
_key = dk;
|
||||
}
|
||||
// Fast forwards the underlying streamed_mutation to given range.
|
||||
future<> fast_forward_to(position_range range, db::timeout_clock::time_point timeout) {
|
||||
return ensure_underlying(timeout).then([this, range = std::move(range), timeout] {
|
||||
return _underlying.underlying().fast_forward_to(std::move(range), timeout);
|
||||
});
|
||||
}
|
||||
// Gets the next fragment from the underlying reader
|
||||
future<mutation_fragment_opt> get_next_fragment(db::timeout_clock::time_point timeout) {
|
||||
return ensure_underlying(timeout).then([this, timeout] {
|
||||
return _underlying.underlying()(timeout);
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
@@ -8,7 +8,6 @@ print_usage() {
|
||||
echo " --clean clean build directory"
|
||||
echo " --compiler C++ compiler path"
|
||||
echo " --c-compiler C compiler path"
|
||||
echo " --nodeps skip installing dependencies"
|
||||
exit 1
|
||||
}
|
||||
|
||||
@@ -16,7 +15,6 @@ JOBS=
|
||||
CLEAN=
|
||||
COMPILER=
|
||||
CCOMPILER=
|
||||
NODEPS=
|
||||
while [ $# -gt 0 ]; do
|
||||
case "$1" in
|
||||
"--jobs")
|
||||
@@ -36,7 +34,6 @@ while [ $# -gt 0 ]; do
|
||||
shift 2
|
||||
;;
|
||||
"--nodeps")
|
||||
NODEPS=yes
|
||||
shift 1
|
||||
;;
|
||||
*)
|
||||
@@ -66,10 +63,6 @@ if [ -f build/release/scylla-package.tar.gz ]; then
|
||||
rm build/release/scylla-package.tar.gz
|
||||
fi
|
||||
|
||||
if [ -z "$NODEPS" ]; then
|
||||
sudo ./install-dependencies.sh
|
||||
fi
|
||||
|
||||
NINJA=$(which ninja-build) &&:
|
||||
if [ -z "$NINJA" ]; then
|
||||
NINJA=$(which ninja) &&:
|
||||
|
||||
37
reloc/python3/build_deb.sh
Executable file
37
reloc/python3/build_deb.sh
Executable file
@@ -0,0 +1,37 @@
|
||||
#!/bin/bash -e
|
||||
|
||||
. /etc/os-release
|
||||
print_usage() {
|
||||
echo "build_deb.sh --reloc-pkg build/release/scylla-python3-package.tar.gz"
|
||||
echo " --reloc-pkg specify relocatable package path"
|
||||
exit 1
|
||||
}
|
||||
|
||||
RELOC_PKG=build/release/scylla-python3-package.tar.gz
|
||||
OPTS=""
|
||||
while [ $# -gt 0 ]; do
|
||||
case "$1" in
|
||||
"--reloc-pkg")
|
||||
OPTS="$OPTS $1 $(readlink -f $2)"
|
||||
RELOC_PKG=$2
|
||||
shift 2
|
||||
;;
|
||||
*)
|
||||
print_usage
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [ ! -e $RELOC_PKG ]; then
|
||||
echo "$RELOC_PKG does not exist."
|
||||
echo "Run ./reloc/python3/build_reloc.sh first."
|
||||
exit 1
|
||||
fi
|
||||
RELOC_PKG=$(readlink -f $RELOC_PKG)
|
||||
if [[ ! $OPTS =~ --reloc-pkg ]]; then
|
||||
OPTS="$OPTS --reloc-pkg $RELOC_PKG"
|
||||
fi
|
||||
mkdir -p build/debian/scylla-python3-package
|
||||
tar -C build/debian/scylla-python3-package -xpf $RELOC_PKG
|
||||
cd build/debian/scylla-python3-package
|
||||
exec ./dist/debian/python3/build_deb.sh $OPTS
|
||||
@@ -780,8 +780,10 @@ static future<> repair_cf_range(repair_info& ri,
|
||||
// still do our best to repair available replicas.
|
||||
std::vector<gms::inet_address> live_neighbors;
|
||||
std::vector<partition_checksum> live_neighbors_checksum;
|
||||
bool local_checksum_failed = false;
|
||||
for (unsigned i = 0; i < checksums.size(); i++) {
|
||||
if (checksums[i].failed()) {
|
||||
local_checksum_failed |= (i == 0);
|
||||
rlogger.warn(
|
||||
"Checksum of ks={}, table={}, range={} on {} failed: {}",
|
||||
ri.keyspace, cf, range,
|
||||
@@ -797,7 +799,7 @@ static future<> repair_cf_range(repair_info& ri,
|
||||
live_neighbors_checksum.push_back(checksums[i].get0());
|
||||
}
|
||||
}
|
||||
if (checksums[0].failed() || live_neighbors.empty()) {
|
||||
if (local_checksum_failed || live_neighbors.empty()) {
|
||||
return make_ready_future<>();
|
||||
}
|
||||
// If one of the available checksums is different, repair
|
||||
@@ -940,8 +942,20 @@ static future<> repair_cf_range(repair_info& ri,
|
||||
// Comparable to RepairSession in Origin
|
||||
static future<> repair_range(repair_info& ri, const dht::token_range& range) {
|
||||
auto id = utils::UUID_gen::get_time_UUID();
|
||||
return do_with(get_neighbors(ri.db.local(), ri.keyspace, range, ri.data_centers, ri.hosts), [&ri, range, id] (const auto& neighbors) {
|
||||
rlogger.debug("[repair #{}] new session: will sync {} on range {} for {}.{}", id, neighbors, range, ri.keyspace, ri.cfs);
|
||||
return do_with(get_neighbors(ri.db.local(), ri.keyspace, range, ri.data_centers, ri.hosts), [&ri, range, id] (std::vector<gms::inet_address>& neighbors) {
|
||||
auto live_neighbors = boost::copy_range<std::vector<gms::inet_address>>(neighbors |
|
||||
boost::adaptors::filtered([] (const gms::inet_address& node) { return gms::get_local_gossiper().is_alive(node); }));
|
||||
if (live_neighbors.size() != neighbors.size()) {
|
||||
ri.nr_failed_ranges++;
|
||||
auto status = live_neighbors.empty() ? "skipped" : "partial";
|
||||
rlogger.warn("Repair {} out of {} ranges, id={}, shard={}, keyspace={}, table={}, range={}, peers={}, live_peers={}, status={}",
|
||||
ri.ranges_index, ri.ranges.size(), ri.id, ri.shard, ri.keyspace, ri.cfs, range, neighbors, live_neighbors, status);
|
||||
if (live_neighbors.empty()) {
|
||||
return make_ready_future<>();
|
||||
}
|
||||
neighbors.swap(live_neighbors);
|
||||
}
|
||||
return ::service::get_local_migration_manager().sync_schema(ri.db.local(), neighbors).then([&neighbors, &ri, range, id] {
|
||||
return do_for_each(ri.cfs.begin(), ri.cfs.end(), [&ri, &neighbors, range] (auto&& cf) {
|
||||
ri._sub_ranges_nr++;
|
||||
if (ri.row_level_repair()) {
|
||||
@@ -950,6 +964,7 @@ static future<> repair_range(repair_info& ri, const dht::token_range& range) {
|
||||
return repair_cf_range(ri, cf, range, neighbors);
|
||||
}
|
||||
});
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@@ -295,6 +295,7 @@ public:
|
||||
void push_mutation_fragment(frozen_mutation_fragment mf) { _mfs.push_back(std::move(mf)); }
|
||||
};
|
||||
|
||||
using repair_row_on_wire = partition_key_and_mutation_fragments;
|
||||
using repair_rows_on_wire = std::list<partition_key_and_mutation_fragments>;
|
||||
|
||||
enum class row_level_diff_detect_algorithm : uint8_t {
|
||||
|
||||
@@ -152,8 +152,8 @@ class fragment_hasher {
|
||||
xx_hasher& _hasher;
|
||||
private:
|
||||
void consume_cell(const column_definition& col, const atomic_cell_or_collection& cell) {
|
||||
feed_hash(_hasher, col.name());
|
||||
feed_hash(_hasher, col.type->name());
|
||||
feed_hash(_hasher, col.kind);
|
||||
feed_hash(_hasher, col.id);
|
||||
feed_hash(_hasher, cell, col);
|
||||
}
|
||||
public:
|
||||
@@ -220,43 +220,62 @@ private:
|
||||
};
|
||||
|
||||
class repair_row {
|
||||
frozen_mutation_fragment _fm;
|
||||
std::optional<frozen_mutation_fragment> _fm;
|
||||
lw_shared_ptr<const decorated_key_with_hash> _dk_with_hash;
|
||||
repair_sync_boundary _boundary;
|
||||
repair_hash _hash;
|
||||
std::optional<repair_sync_boundary> _boundary;
|
||||
std::optional<repair_hash> _hash;
|
||||
lw_shared_ptr<mutation_fragment> _mf;
|
||||
public:
|
||||
repair_row() = delete;
|
||||
repair_row(frozen_mutation_fragment fm,
|
||||
position_in_partition pos,
|
||||
repair_row(std::optional<frozen_mutation_fragment> fm,
|
||||
std::optional<position_in_partition> pos,
|
||||
lw_shared_ptr<const decorated_key_with_hash> dk_with_hash,
|
||||
repair_hash hash,
|
||||
std::optional<repair_hash> hash,
|
||||
lw_shared_ptr<mutation_fragment> mf = {})
|
||||
: _fm(std::move(fm))
|
||||
, _dk_with_hash(std::move(dk_with_hash))
|
||||
, _boundary({_dk_with_hash->dk, std::move(pos)})
|
||||
, _boundary(pos ? std::optional<repair_sync_boundary>(repair_sync_boundary{_dk_with_hash->dk, std::move(*pos)}) : std::nullopt)
|
||||
, _hash(std::move(hash))
|
||||
, _mf(std::move(mf)) {
|
||||
}
|
||||
mutation_fragment& get_mutation_fragment() {
|
||||
if (!_mf) {
|
||||
throw std::runtime_error("get empty mutation_fragment");
|
||||
throw std::runtime_error("empty mutation_fragment");
|
||||
}
|
||||
return *_mf;
|
||||
}
|
||||
frozen_mutation_fragment& get_frozen_mutation() { return _fm; }
|
||||
const frozen_mutation_fragment& get_frozen_mutation() const { return _fm; }
|
||||
frozen_mutation_fragment& get_frozen_mutation() {
|
||||
if (!_fm) {
|
||||
throw std::runtime_error("empty frozen_mutation_fragment");
|
||||
}
|
||||
return *_fm;
|
||||
}
|
||||
const frozen_mutation_fragment& get_frozen_mutation() const {
|
||||
if (!_fm) {
|
||||
throw std::runtime_error("empty frozen_mutation_fragment");
|
||||
}
|
||||
return *_fm;
|
||||
}
|
||||
const lw_shared_ptr<const decorated_key_with_hash>& get_dk_with_hash() const {
|
||||
return _dk_with_hash;
|
||||
}
|
||||
size_t size() const {
|
||||
return _fm.representation().size();
|
||||
if (!_fm) {
|
||||
throw std::runtime_error("empty size due to empty frozen_mutation_fragment");
|
||||
}
|
||||
return _fm->representation().size();
|
||||
}
|
||||
const repair_sync_boundary& boundary() const {
|
||||
return _boundary;
|
||||
if (!_boundary) {
|
||||
throw std::runtime_error("empty repair_sync_boundary");
|
||||
}
|
||||
return *_boundary;
|
||||
}
|
||||
const repair_hash& hash() const {
|
||||
return _hash;
|
||||
if (!_hash) {
|
||||
throw std::runtime_error("empty hash");
|
||||
}
|
||||
return *_hash;
|
||||
}
|
||||
};
|
||||
|
||||
@@ -284,13 +303,14 @@ public:
|
||||
repair_reader(
|
||||
seastar::sharded<database>& db,
|
||||
column_family& cf,
|
||||
schema_ptr s,
|
||||
dht::token_range range,
|
||||
dht::i_partitioner& local_partitioner,
|
||||
dht::i_partitioner& remote_partitioner,
|
||||
unsigned remote_shard,
|
||||
uint64_t seed,
|
||||
is_local_reader local_reader)
|
||||
: _schema(cf.schema())
|
||||
: _schema(s)
|
||||
, _range(dht::to_partition_range(range))
|
||||
, _sharder(remote_partitioner, range, remote_shard)
|
||||
, _seed(seed)
|
||||
@@ -351,6 +371,10 @@ class repair_writer {
|
||||
std::vector<std::optional<seastar::queue<mutation_fragment_opt>>> _mq;
|
||||
// Current partition written to disk
|
||||
std::vector<lw_shared_ptr<const decorated_key_with_hash>> _current_dk_written_to_sstable;
|
||||
// Is current partition still open. A partition is opened when a
|
||||
// partition_start is written and is closed when a partition_end is
|
||||
// written.
|
||||
std::vector<bool> _partition_opened;
|
||||
public:
|
||||
repair_writer(
|
||||
schema_ptr schema,
|
||||
@@ -365,10 +389,13 @@ public:
|
||||
future<> write_start_and_mf(lw_shared_ptr<const decorated_key_with_hash> dk, mutation_fragment mf, unsigned node_idx) {
|
||||
_current_dk_written_to_sstable[node_idx] = dk;
|
||||
if (mf.is_partition_start()) {
|
||||
return _mq[node_idx]->push_eventually(mutation_fragment_opt(std::move(mf)));
|
||||
return _mq[node_idx]->push_eventually(mutation_fragment_opt(std::move(mf))).then([this, node_idx] {
|
||||
_partition_opened[node_idx] = true;
|
||||
});
|
||||
} else {
|
||||
auto start = mutation_fragment(partition_start(dk->dk, tombstone()));
|
||||
return _mq[node_idx]->push_eventually(mutation_fragment_opt(std::move(start))).then([this, node_idx, mf = std::move(mf)] () mutable {
|
||||
_partition_opened[node_idx] = true;
|
||||
return _mq[node_idx]->push_eventually(mutation_fragment_opt(std::move(mf)));
|
||||
});
|
||||
}
|
||||
@@ -378,6 +405,7 @@ public:
|
||||
_writer_done.resize(_nr_peer_nodes);
|
||||
_mq.resize(_nr_peer_nodes);
|
||||
_current_dk_written_to_sstable.resize(_nr_peer_nodes);
|
||||
_partition_opened.resize(_nr_peer_nodes, false);
|
||||
}
|
||||
|
||||
void create_writer(unsigned node_idx) {
|
||||
@@ -414,12 +442,21 @@ public:
|
||||
t.stream_in_progress());
|
||||
}
|
||||
|
||||
future<> write_partition_end(unsigned node_idx) {
|
||||
if (_partition_opened[node_idx]) {
|
||||
return _mq[node_idx]->push_eventually(mutation_fragment(partition_end())).then([this, node_idx] {
|
||||
_partition_opened[node_idx] = false;
|
||||
});
|
||||
}
|
||||
return make_ready_future<>();
|
||||
}
|
||||
|
||||
future<> do_write(unsigned node_idx, lw_shared_ptr<const decorated_key_with_hash> dk, mutation_fragment mf) {
|
||||
if (_current_dk_written_to_sstable[node_idx]) {
|
||||
if (_current_dk_written_to_sstable[node_idx]->dk.equal(*_schema, dk->dk)) {
|
||||
return _mq[node_idx]->push_eventually(mutation_fragment_opt(std::move(mf)));
|
||||
} else {
|
||||
return _mq[node_idx]->push_eventually(mutation_fragment(partition_end())).then([this,
|
||||
return write_partition_end(node_idx).then([this,
|
||||
node_idx, dk = std::move(dk), mf = std::move(mf)] () mutable {
|
||||
return write_start_and_mf(std::move(dk), std::move(mf), node_idx);
|
||||
});
|
||||
@@ -433,7 +470,7 @@ public:
|
||||
return parallel_for_each(boost::irange(unsigned(0), unsigned(_nr_peer_nodes)), [this] (unsigned node_idx) {
|
||||
if (_writer_done[node_idx] && _mq[node_idx]) {
|
||||
// Partition_end is never sent on wire, so we have to write one ourselves.
|
||||
return _mq[node_idx]->push_eventually(mutation_fragment(partition_end())).then([this, node_idx] () mutable {
|
||||
return write_partition_end(node_idx).then([this, node_idx] () mutable {
|
||||
// Empty mutation_fragment_opt means no more data, so the writer can seal the sstables.
|
||||
return _mq[node_idx]->push_eventually(mutation_fragment_opt()).then([this, node_idx] () mutable {
|
||||
return (*_writer_done[node_idx]).then([] (uint64_t partitions) {
|
||||
@@ -458,8 +495,8 @@ public:
|
||||
private:
|
||||
seastar::sharded<database>& _db;
|
||||
column_family& _cf;
|
||||
dht::token_range _range;
|
||||
schema_ptr _schema;
|
||||
dht::token_range _range;
|
||||
repair_sync_boundary::tri_compare _cmp;
|
||||
// The algorithm used to find the row difference
|
||||
row_level_diff_detect_algorithm _algo;
|
||||
@@ -519,6 +556,7 @@ public:
|
||||
repair_meta(
|
||||
seastar::sharded<database>& db,
|
||||
column_family& cf,
|
||||
schema_ptr s,
|
||||
dht::token_range range,
|
||||
row_level_diff_detect_algorithm algo,
|
||||
size_t max_row_buf_size,
|
||||
@@ -529,8 +567,8 @@ public:
|
||||
size_t nr_peer_nodes = 1)
|
||||
: _db(db)
|
||||
, _cf(cf)
|
||||
, _schema(s)
|
||||
, _range(range)
|
||||
, _schema(cf.schema())
|
||||
, _cmp(repair_sync_boundary::tri_compare(*_schema))
|
||||
, _algo(algo)
|
||||
, _max_row_buf_size(max_row_buf_size)
|
||||
@@ -545,6 +583,7 @@ public:
|
||||
, _repair_reader(
|
||||
_db,
|
||||
_cf,
|
||||
_schema,
|
||||
_range,
|
||||
dht::global_partitioner(),
|
||||
*_remote_partitioner,
|
||||
@@ -577,35 +616,45 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
static future<>
|
||||
insert_repair_meta(const gms::inet_address& from,
|
||||
uint32_t src_cpu_id,
|
||||
uint32_t repair_meta_id,
|
||||
sstring ks_name,
|
||||
sstring cf_name,
|
||||
dht::token_range range,
|
||||
row_level_diff_detect_algorithm algo,
|
||||
uint64_t max_row_buf_size,
|
||||
uint64_t seed,
|
||||
shard_config master_node_shard_config) {
|
||||
node_repair_meta_id id{from, repair_meta_id};
|
||||
auto& db = service::get_local_storage_proxy().get_db();
|
||||
auto& cf = db.local().find_column_family(ks_name, cf_name);
|
||||
auto rm = make_lw_shared<repair_meta>(db,
|
||||
cf,
|
||||
shard_config master_node_shard_config,
|
||||
table_schema_version schema_version) {
|
||||
return service::get_schema_for_write(schema_version, {from, src_cpu_id}).then([from,
|
||||
repair_meta_id,
|
||||
range,
|
||||
algo,
|
||||
max_row_buf_size,
|
||||
seed,
|
||||
repair_meta::repair_master::no,
|
||||
repair_meta_id,
|
||||
std::move(master_node_shard_config));
|
||||
bool insertion = repair_meta_map().emplace(id, rm).second;
|
||||
if (!insertion) {
|
||||
rlogger.warn("insert_repair_meta: repair_meta_id {} for node {} already exists, replace existing one", id.repair_meta_id, id.ip);
|
||||
repair_meta_map()[id] = rm;
|
||||
} else {
|
||||
rlogger.debug("insert_repair_meta: Inserted repair_meta_id {} for node {}", id.repair_meta_id, id.ip);
|
||||
}
|
||||
master_node_shard_config,
|
||||
schema_version] (schema_ptr s) {
|
||||
auto& db = service::get_local_storage_proxy().get_db();
|
||||
auto& cf = db.local().find_column_family(s->id());
|
||||
node_repair_meta_id id{from, repair_meta_id};
|
||||
auto rm = make_lw_shared<repair_meta>(db,
|
||||
cf,
|
||||
s,
|
||||
range,
|
||||
algo,
|
||||
max_row_buf_size,
|
||||
seed,
|
||||
repair_meta::repair_master::no,
|
||||
repair_meta_id,
|
||||
std::move(master_node_shard_config));
|
||||
bool insertion = repair_meta_map().emplace(id, rm).second;
|
||||
if (!insertion) {
|
||||
rlogger.warn("insert_repair_meta: repair_meta_id {} for node {} already exists, replace existing one", id.repair_meta_id, id.ip);
|
||||
repair_meta_map()[id] = rm;
|
||||
} else {
|
||||
rlogger.debug("insert_repair_meta: Inserted repair_meta_id {} for node {}", id.repair_meta_id, id.ip);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
static future<>
|
||||
@@ -642,7 +691,11 @@ public:
|
||||
}
|
||||
}
|
||||
return parallel_for_each(*repair_metas, [repair_metas] (auto& rm) {
|
||||
return rm->stop();
|
||||
return rm->stop().then([&rm] {
|
||||
rm = {};
|
||||
});
|
||||
}).then([repair_metas, from] {
|
||||
rlogger.debug("Removed all repair_meta for single node {}", from);
|
||||
});
|
||||
}
|
||||
|
||||
@@ -654,7 +707,11 @@ public:
|
||||
| boost::adaptors::map_values));
|
||||
repair_meta_map().clear();
|
||||
return parallel_for_each(*repair_metas, [repair_metas] (auto& rm) {
|
||||
return rm->stop();
|
||||
return rm->stop().then([&rm] {
|
||||
rm = {};
|
||||
});
|
||||
}).then([repair_metas] {
|
||||
rlogger.debug("Removed all repair_meta for all nodes");
|
||||
});
|
||||
}
|
||||
|
||||
@@ -952,12 +1009,12 @@ private:
|
||||
}
|
||||
return to_repair_rows_list(rows).then([this, from, node_idx, update_buf, update_hash_set] (std::list<repair_row> row_diff) {
|
||||
return do_with(std::move(row_diff), [this, from, node_idx, update_buf, update_hash_set] (std::list<repair_row>& row_diff) {
|
||||
auto sz = get_repair_rows_size(row_diff);
|
||||
stats().rx_row_bytes += sz;
|
||||
stats().rx_row_nr += row_diff.size();
|
||||
stats().rx_row_nr_peer[from] += row_diff.size();
|
||||
_metrics.rx_row_nr += row_diff.size();
|
||||
_metrics.rx_row_bytes += sz;
|
||||
if (_repair_master) {
|
||||
auto sz = get_repair_rows_size(row_diff);
|
||||
stats().rx_row_bytes += sz;
|
||||
stats().rx_row_nr += row_diff.size();
|
||||
stats().rx_row_nr_peer[from] += row_diff.size();
|
||||
}
|
||||
if (update_buf) {
|
||||
std::list<repair_row> tmp;
|
||||
tmp.swap(_working_row_buf);
|
||||
@@ -993,11 +1050,16 @@ private:
|
||||
return do_with(repair_rows_on_wire(), std::move(row_list), [this] (repair_rows_on_wire& rows, std::list<repair_row>& row_list) {
|
||||
return do_for_each(row_list, [this, &rows] (repair_row& r) {
|
||||
auto pk = r.get_dk_with_hash()->dk.key();
|
||||
auto it = std::find_if(rows.begin(), rows.end(), [&pk, s=_schema] (partition_key_and_mutation_fragments& row) { return pk.legacy_equal(*s, row.get_key()); });
|
||||
if (it == rows.end()) {
|
||||
rows.push_back(partition_key_and_mutation_fragments(std::move(pk), {std::move(r.get_frozen_mutation())}));
|
||||
// No need to search from the beginning of the rows. Look at the end of repair_rows_on_wire is enough.
|
||||
if (rows.empty()) {
|
||||
rows.push_back(repair_row_on_wire(std::move(pk), {std::move(r.get_frozen_mutation())}));
|
||||
} else {
|
||||
it->push_mutation_fragment(std::move(r.get_frozen_mutation()));
|
||||
auto& row = rows.back();
|
||||
if (pk.legacy_equal(*_schema, row.get_key())) {
|
||||
row.push_mutation_fragment(std::move(r.get_frozen_mutation()));
|
||||
} else {
|
||||
rows.push_back(repair_row_on_wire(std::move(pk), {std::move(r.get_frozen_mutation())}));
|
||||
}
|
||||
}
|
||||
}).then([&rows] {
|
||||
return std::move(rows);
|
||||
@@ -1006,23 +1068,47 @@ private:
|
||||
};
|
||||
|
||||
future<std::list<repair_row>> to_repair_rows_list(repair_rows_on_wire rows) {
|
||||
return do_with(std::move(rows), std::list<repair_row>(), lw_shared_ptr<const decorated_key_with_hash>(),
|
||||
[this] (repair_rows_on_wire& rows, std::list<repair_row>& row_list, lw_shared_ptr<const decorated_key_with_hash>& dk_ptr) mutable {
|
||||
return do_for_each(rows, [this, &dk_ptr, &row_list] (partition_key_and_mutation_fragments& x) mutable {
|
||||
return do_with(std::move(rows), std::list<repair_row>(), lw_shared_ptr<const decorated_key_with_hash>(), lw_shared_ptr<mutation_fragment>(), position_in_partition::tri_compare(*_schema),
|
||||
[this] (repair_rows_on_wire& rows, std::list<repair_row>& row_list, lw_shared_ptr<const decorated_key_with_hash>& dk_ptr, lw_shared_ptr<mutation_fragment>& last_mf, position_in_partition::tri_compare& cmp) mutable {
|
||||
return do_for_each(rows, [this, &dk_ptr, &row_list, &last_mf, &cmp] (partition_key_and_mutation_fragments& x) mutable {
|
||||
dht::decorated_key dk = dht::global_partitioner().decorate_key(*_schema, x.get_key());
|
||||
if (!(dk_ptr && dk_ptr->dk.equal(*_schema, dk))) {
|
||||
dk_ptr = make_lw_shared<const decorated_key_with_hash>(*_schema, dk, _seed);
|
||||
}
|
||||
return do_for_each(x.get_mutation_fragments(), [this, &dk_ptr, &row_list] (frozen_mutation_fragment& fmf) mutable {
|
||||
// Keep the mutation_fragment in repair_row as an
|
||||
// optimization to avoid unfreeze again when
|
||||
// mutation_fragment is needed by _repair_writer.do_write()
|
||||
// to apply the repair_row to disk
|
||||
auto mf = make_lw_shared<mutation_fragment>(fmf.unfreeze(*_schema));
|
||||
auto hash = do_hash_for_mf(*dk_ptr, *mf);
|
||||
position_in_partition pos(mf->position());
|
||||
row_list.push_back(repair_row(std::move(fmf), std::move(pos), dk_ptr, std::move(hash), std::move(mf)));
|
||||
});
|
||||
if (_repair_master) {
|
||||
return do_for_each(x.get_mutation_fragments(), [this, &dk_ptr, &row_list] (frozen_mutation_fragment& fmf) mutable {
|
||||
_metrics.rx_row_nr += 1;
|
||||
_metrics.rx_row_bytes += fmf.representation().size();
|
||||
// Keep the mutation_fragment in repair_row as an
|
||||
// optimization to avoid unfreeze again when
|
||||
// mutation_fragment is needed by _repair_writer.do_write()
|
||||
// to apply the repair_row to disk
|
||||
auto mf = make_lw_shared<mutation_fragment>(fmf.unfreeze(*_schema));
|
||||
auto hash = do_hash_for_mf(*dk_ptr, *mf);
|
||||
position_in_partition pos(mf->position());
|
||||
row_list.push_back(repair_row(std::move(fmf), std::move(pos), dk_ptr, std::move(hash), std::move(mf)));
|
||||
});
|
||||
} else {
|
||||
last_mf = {};
|
||||
return do_for_each(x.get_mutation_fragments(), [this, &dk_ptr, &row_list, &last_mf, &cmp] (frozen_mutation_fragment& fmf) mutable {
|
||||
_metrics.rx_row_nr += 1;
|
||||
_metrics.rx_row_bytes += fmf.representation().size();
|
||||
auto mf = make_lw_shared<mutation_fragment>(fmf.unfreeze(*_schema));
|
||||
position_in_partition pos(mf->position());
|
||||
// If the mutation_fragment has the same position as
|
||||
// the last mutation_fragment, it means they are the
|
||||
// same row with different contents. We can not feed
|
||||
// such rows into the sstable writer. Instead we apply
|
||||
// the mutation_fragment into the previous one.
|
||||
if (last_mf && cmp(last_mf->position(), pos) == 0 && last_mf->mergeable_with(*mf)) {
|
||||
last_mf->apply(*_schema, std::move(*mf));
|
||||
} else {
|
||||
last_mf = mf;
|
||||
// On repair follower node, only decorated_key_with_hash and the mutation_fragment inside repair_row are used.
|
||||
row_list.push_back(repair_row({}, {}, dk_ptr, {}, std::move(mf)));
|
||||
}
|
||||
});
|
||||
}
|
||||
}).then([&row_list] {
|
||||
return std::move(row_list);
|
||||
});
|
||||
@@ -1084,29 +1170,28 @@ public:
|
||||
|
||||
// RPC API
|
||||
future<>
|
||||
repair_row_level_start(gms::inet_address remote_node, sstring ks_name, sstring cf_name, dht::token_range range) {
|
||||
repair_row_level_start(gms::inet_address remote_node, sstring ks_name, sstring cf_name, dht::token_range range, table_schema_version schema_version) {
|
||||
if (remote_node == _myip) {
|
||||
return make_ready_future<>();
|
||||
}
|
||||
stats().rpc_call_nr++;
|
||||
return netw::get_local_messaging_service().send_repair_row_level_start(msg_addr(remote_node),
|
||||
_repair_meta_id, std::move(ks_name), std::move(cf_name), std::move(range), _algo, _max_row_buf_size, _seed,
|
||||
_master_node_shard_config.shard, _master_node_shard_config.shard_count, _master_node_shard_config.ignore_msb, _master_node_shard_config.partitioner_name);
|
||||
_master_node_shard_config.shard, _master_node_shard_config.shard_count, _master_node_shard_config.ignore_msb, _master_node_shard_config.partitioner_name, std::move(schema_version));
|
||||
}
|
||||
|
||||
// RPC handler
|
||||
static future<>
|
||||
repair_row_level_start_handler(gms::inet_address from, uint32_t repair_meta_id, sstring ks_name, sstring cf_name,
|
||||
repair_row_level_start_handler(gms::inet_address from, uint32_t src_cpu_id, uint32_t repair_meta_id, sstring ks_name, sstring cf_name,
|
||||
dht::token_range range, row_level_diff_detect_algorithm algo, uint64_t max_row_buf_size,
|
||||
uint64_t seed, shard_config master_node_shard_config) {
|
||||
uint64_t seed, shard_config master_node_shard_config, table_schema_version schema_version) {
|
||||
if (!_sys_dist_ks->local_is_initialized() || !_view_update_generator->local_is_initialized()) {
|
||||
return make_exception_future<>(std::runtime_error(format("Node {} is not fully initialized for repair, try again later",
|
||||
utils::fb_utilities::get_broadcast_address())));
|
||||
}
|
||||
rlogger.debug(">>> Started Row Level Repair (Follower): local={}, peers={}, repair_meta_id={}, keyspace={}, cf={}, range={}",
|
||||
utils::fb_utilities::get_broadcast_address(), from, repair_meta_id, ks_name, cf_name, range);
|
||||
insert_repair_meta(from, repair_meta_id, std::move(ks_name), std::move(cf_name), std::move(range), algo, max_row_buf_size, seed, std::move(master_node_shard_config));
|
||||
return make_ready_future<>();
|
||||
rlogger.debug(">>> Started Row Level Repair (Follower): local={}, peers={}, repair_meta_id={}, keyspace={}, cf={}, schema_version={}, range={}",
|
||||
utils::fb_utilities::get_broadcast_address(), from, repair_meta_id, ks_name, cf_name, schema_version, range);
|
||||
return insert_repair_meta(from, src_cpu_id, repair_meta_id, std::move(range), algo, max_row_buf_size, seed, std::move(master_node_shard_config), std::move(schema_version));
|
||||
}
|
||||
|
||||
// RPC API
|
||||
@@ -1313,14 +1398,15 @@ future<> repair_init_messaging_service_handler(repair_service& rs, distributed<d
|
||||
});
|
||||
ms.register_repair_row_level_start([] (const rpc::client_info& cinfo, uint32_t repair_meta_id, sstring ks_name,
|
||||
sstring cf_name, dht::token_range range, row_level_diff_detect_algorithm algo, uint64_t max_row_buf_size, uint64_t seed,
|
||||
unsigned remote_shard, unsigned remote_shard_count, unsigned remote_ignore_msb, sstring remote_partitioner_name) {
|
||||
unsigned remote_shard, unsigned remote_shard_count, unsigned remote_ignore_msb, sstring remote_partitioner_name, table_schema_version schema_version) {
|
||||
auto src_cpu_id = cinfo.retrieve_auxiliary<uint32_t>("src_cpu_id");
|
||||
auto from = cinfo.retrieve_auxiliary<gms::inet_address>("baddr");
|
||||
return smp::submit_to(src_cpu_id % smp::count, [from, repair_meta_id, ks_name, cf_name,
|
||||
range, algo, max_row_buf_size, seed, remote_shard, remote_shard_count, remote_ignore_msb, remote_partitioner_name] () mutable {
|
||||
return repair_meta::repair_row_level_start_handler(from, repair_meta_id, std::move(ks_name),
|
||||
return smp::submit_to(src_cpu_id % smp::count, [from, src_cpu_id, repair_meta_id, ks_name, cf_name,
|
||||
range, algo, max_row_buf_size, seed, remote_shard, remote_shard_count, remote_ignore_msb, remote_partitioner_name, schema_version] () mutable {
|
||||
return repair_meta::repair_row_level_start_handler(from, src_cpu_id, repair_meta_id, std::move(ks_name),
|
||||
std::move(cf_name), std::move(range), algo, max_row_buf_size, seed,
|
||||
shard_config{remote_shard, remote_shard_count, remote_ignore_msb, std::move(remote_partitioner_name)});
|
||||
shard_config{remote_shard, remote_shard_count, remote_ignore_msb, std::move(remote_partitioner_name)},
|
||||
schema_version);
|
||||
});
|
||||
});
|
||||
ms.register_repair_row_level_stop([] (const rpc::client_info& cinfo, uint32_t repair_meta_id,
|
||||
@@ -1389,7 +1475,7 @@ class row_level_repair {
|
||||
|
||||
// If the total size of the `_row_buf` on either of the nodes is zero,
|
||||
// we set this flag, which is an indication that rows are not synced.
|
||||
bool _zero_rows;
|
||||
bool _zero_rows = false;
|
||||
|
||||
// Sum of estimated_partitions on all peers
|
||||
uint64_t _estimated_partitions = 0;
|
||||
@@ -1608,8 +1694,12 @@ public:
|
||||
dht::global_partitioner().sharding_ignore_msb(),
|
||||
dht::global_partitioner().name()
|
||||
};
|
||||
auto s = _cf.schema();
|
||||
auto schema_version = s->version();
|
||||
|
||||
repair_meta master(_ri.db,
|
||||
_cf,
|
||||
s,
|
||||
_range,
|
||||
algorithm,
|
||||
_max_row_buf_size,
|
||||
@@ -1622,12 +1712,13 @@ public:
|
||||
// All nodes including the node itself.
|
||||
_all_nodes.insert(_all_nodes.begin(), master.myip());
|
||||
|
||||
rlogger.debug(">>> Started Row Level Repair (Master): local={}, peers={}, repair_meta_id={}, keyspace={}, cf={}, range={}, seed={}",
|
||||
master.myip(), _all_live_peer_nodes, master.repair_meta_id(), _ri.keyspace, _cf_name, _range, _seed);
|
||||
rlogger.debug(">>> Started Row Level Repair (Master): local={}, peers={}, repair_meta_id={}, keyspace={}, cf={}, schema_version={}, range={}, seed={}",
|
||||
master.myip(), _all_live_peer_nodes, master.repair_meta_id(), _ri.keyspace, _cf_name, schema_version, _range, _seed);
|
||||
|
||||
|
||||
try {
|
||||
parallel_for_each(_all_nodes, [&, this] (const gms::inet_address& node) {
|
||||
return master.repair_row_level_start(node, _ri.keyspace, _cf_name, _range).then([&] () {
|
||||
return master.repair_row_level_start(node, _ri.keyspace, _cf_name, _range, schema_version).then([&] () {
|
||||
return master.repair_get_estimated_partitions(node).then([this, node] (uint64_t partitions) {
|
||||
rlogger.trace("Get repair_get_estimated_partitions for node={}, estimated_partitions={}", node, partitions);
|
||||
_estimated_partitions += partitions;
|
||||
@@ -1677,19 +1768,7 @@ public:
|
||||
future<> repair_cf_range_row_level(repair_info& ri,
|
||||
sstring cf_name, dht::token_range range,
|
||||
const std::vector<gms::inet_address>& all_peer_nodes) {
|
||||
auto all_live_peer_nodes = boost::copy_range<std::vector<gms::inet_address>>(all_peer_nodes |
|
||||
boost::adaptors::filtered([] (const gms::inet_address& node) { return gms::get_local_gossiper().is_alive(node); }));
|
||||
if (all_live_peer_nodes.size() != all_peer_nodes.size()) {
|
||||
rlogger.warn("Repair for range={} is partial, peer nodes={}, live peer nodes={}",
|
||||
range, all_peer_nodes, all_live_peer_nodes);
|
||||
ri.nr_failed_ranges++;
|
||||
}
|
||||
if (all_live_peer_nodes.empty()) {
|
||||
rlogger.info(">>> Skipped Row Level Repair (Master): local={}, peers={}, keyspace={}, cf={}, range={}",
|
||||
utils::fb_utilities::get_broadcast_address(), all_peer_nodes, ri.keyspace, cf_name, range);
|
||||
return make_ready_future<>();
|
||||
}
|
||||
return do_with(row_level_repair(ri, std::move(cf_name), std::move(range), std::move(all_live_peer_nodes)), [] (row_level_repair& repair) {
|
||||
return do_with(row_level_repair(ri, std::move(cf_name), std::move(range), all_peer_nodes), [] (row_level_repair& repair) {
|
||||
return repair.run();
|
||||
});
|
||||
}
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user