mirror of
https://github.com/scylladb/scylladb.git
synced 2026-04-21 17:10:35 +00:00
Compare commits
197 Commits
copilot/fi
...
branch-1.7
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
19907fad15 | ||
|
|
97f781c4d8 | ||
|
|
88e69701bd | ||
|
|
9007b38002 | ||
|
|
f2e0affcc5 | ||
|
|
6fce847000 | ||
|
|
f6f91a49cb | ||
|
|
266a45ad1e | ||
|
|
7d88026f22 | ||
|
|
760af5635d | ||
|
|
8c18bfa8d6 | ||
|
|
04e3785f77 | ||
|
|
e00e6ad1b6 | ||
|
|
5653ea9f8d | ||
|
|
4dbd1b77cd | ||
|
|
0e61212c20 | ||
|
|
6f4bc82b6e | ||
|
|
c1a30d3f60 | ||
|
|
cbad33033f | ||
|
|
1f31be9ba3 | ||
|
|
7e89dc3bbf | ||
|
|
2cdcaeba6e | ||
|
|
55cb0cafa8 | ||
|
|
660572e85c | ||
|
|
b86da0c479 | ||
|
|
b1b8599b1a | ||
|
|
89c037dfc8 | ||
|
|
25eec66935 | ||
|
|
b5787ca640 | ||
|
|
838dbd98ac | ||
|
|
022c2ff53a | ||
|
|
b7c27d73d8 | ||
|
|
bdc0ca7064 | ||
|
|
34260ce471 | ||
|
|
cffe57bcc7 | ||
|
|
adb9ce7f38 | ||
|
|
5f1fd7a0b1 | ||
|
|
d1f06633e0 | ||
|
|
b54ea3f6cf | ||
|
|
63fd65414a | ||
|
|
9790c2d229 | ||
|
|
7728a8dec5 | ||
|
|
1fd4a3ed34 | ||
|
|
0b48863a7e | ||
|
|
aec94b926c | ||
|
|
0ac2c388b6 | ||
|
|
09ac5b57aa | ||
|
|
ff643e3e40 | ||
|
|
a7b8d89de8 | ||
|
|
013fa3da14 | ||
|
|
259cfaf8f9 | ||
|
|
6501bf8e54 | ||
|
|
41b4055911 | ||
|
|
b594f21f91 | ||
|
|
bcd2e6249f | ||
|
|
4c79add7b0 | ||
|
|
00f6ccb75d | ||
|
|
77ac5a63db | ||
|
|
eb9de1a807 | ||
|
|
643a777067 | ||
|
|
6f91939650 | ||
|
|
15da71266d | ||
|
|
9cd36ade00 | ||
|
|
6f58a1372e | ||
|
|
0a9d26de4a | ||
|
|
35cd63e1f7 | ||
|
|
2ada799e07 | ||
|
|
b71037ac55 | ||
|
|
8639f32efd | ||
|
|
a0dce7c922 | ||
|
|
d39ff4f2ac | ||
|
|
7cbfe0711f | ||
|
|
139a2d14a1 | ||
|
|
6fff331698 | ||
|
|
43ae64cd47 | ||
|
|
f306b47a88 | ||
|
|
47b1e39410 | ||
|
|
0f4d5cde8e | ||
|
|
a24dcf1a19 | ||
|
|
611c25234e | ||
|
|
f64e3e24d4 | ||
|
|
f6034c717d | ||
|
|
b6f4df3cc8 | ||
|
|
af028360d7 | ||
|
|
60af7eab10 | ||
|
|
665d14584c | ||
|
|
bb56e7682c | ||
|
|
a4bd56ce40 | ||
|
|
6340fe61af | ||
|
|
f2317a6f3f | ||
|
|
7bb41b50f9 | ||
|
|
57d602fdd6 | ||
|
|
cd14b83192 | ||
|
|
a85b70d846 | ||
|
|
f44ea5335b | ||
|
|
a95c045b48 | ||
|
|
eb396d2795 | ||
|
|
dbbf99d7fa | ||
|
|
f7a143e7be | ||
|
|
562102cc76 | ||
|
|
d4b444418a | ||
|
|
befd4c9819 | ||
|
|
eb2fe0fbd3 | ||
|
|
eb6b0b1267 | ||
|
|
7836600ded | ||
|
|
230c33da49 | ||
|
|
17d8a0c727 | ||
|
|
064de6f8de | ||
|
|
df56c108b7 | ||
|
|
25607ab9df | ||
|
|
b26bd8bbeb | ||
|
|
1ca7f5458b | ||
|
|
50c8a08e91 | ||
|
|
9d1b9084ed | ||
|
|
e2c75d8532 | ||
|
|
59063f4891 | ||
|
|
de79792373 | ||
|
|
3557b449ac | ||
|
|
a8e89d624a | ||
|
|
31cd6914a8 | ||
|
|
a441f889c3 | ||
|
|
91b7cb8576 | ||
|
|
2b17c4aacf | ||
|
|
f61d9ac632 | ||
|
|
fc9db8bb03 | ||
|
|
bd67d23927 | ||
|
|
bdeeebbd74 | ||
|
|
a1cb29e7ec | ||
|
|
e8369644fd | ||
|
|
a36cabdb30 | ||
|
|
1d26fab73e | ||
|
|
5f0c635da7 | ||
|
|
82cc3d7aa5 | ||
|
|
98d782cfe1 | ||
|
|
ea0591ad3d | ||
|
|
7eedd743bf | ||
|
|
8a21961ec9 | ||
|
|
08698d9030 | ||
|
|
df5a291c63 | ||
|
|
1a77312aec | ||
|
|
ea684c9a3e | ||
|
|
2df7c80c66 | ||
|
|
193b5d1782 | ||
|
|
6609c9accb | ||
|
|
2f107d3f61 | ||
|
|
dd9afa4c93 | ||
|
|
4021e2befb | ||
|
|
9b26a57288 | ||
|
|
31b5ef13c2 | ||
|
|
4bbee01288 | ||
|
|
3cc03f88fd | ||
|
|
4179d8f7c4 | ||
|
|
c20ddaf5af | ||
|
|
29dd48621b | ||
|
|
87de77a5ea | ||
|
|
66c4dcba8e | ||
|
|
7cfdc08af9 | ||
|
|
fdbe5caf41 | ||
|
|
522e62089b | ||
|
|
699648d5a1 | ||
|
|
698a4e62d9 | ||
|
|
63bec22d28 | ||
|
|
3d14e6e802 | ||
|
|
ea4a2dad96 | ||
|
|
655e6197cb | ||
|
|
1a1370d33e | ||
|
|
7f17424a4e | ||
|
|
dd56f1bec7 | ||
|
|
5df61797d6 | ||
|
|
b6db9e3d51 | ||
|
|
f2595bea85 | ||
|
|
e930ef0ee0 | ||
|
|
4cf0f88724 | ||
|
|
372f07b06e | ||
|
|
0ccc6630a8 | ||
|
|
b95a2338be | ||
|
|
f2d0ac9994 | ||
|
|
56725de0db | ||
|
|
6f479c8999 | ||
|
|
8c0488bce9 | ||
|
|
68dd11e275 | ||
|
|
a64c53d05f | ||
|
|
42e7a59cca | ||
|
|
2cd019ee47 | ||
|
|
bc8b553bec | ||
|
|
0ba98be899 | ||
|
|
d6899134a7 | ||
|
|
5253031110 | ||
|
|
a203c87f0d | ||
|
|
37fc0e6840 | ||
|
|
0429e5d8ea | ||
|
|
3c147437ac | ||
|
|
e4b3f02286 | ||
|
|
5a8013e155 | ||
|
|
fdba5b8eac | ||
|
|
558a52802a | ||
|
|
4f416c7272 |
2
.gitmodules
vendored
2
.gitmodules
vendored
@@ -1,6 +1,6 @@
|
|||||||
[submodule "seastar"]
|
[submodule "seastar"]
|
||||||
path = seastar
|
path = seastar
|
||||||
url = ../seastar
|
url = ../scylla-seastar
|
||||||
ignore = dirty
|
ignore = dirty
|
||||||
[submodule "swagger-ui"]
|
[submodule "swagger-ui"]
|
||||||
path = swagger-ui
|
path = swagger-ui
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
#!/bin/sh
|
#!/bin/sh
|
||||||
|
|
||||||
VERSION=666.development
|
VERSION=1.7.5
|
||||||
|
|
||||||
if test -f version
|
if test -f version
|
||||||
then
|
then
|
||||||
|
|||||||
@@ -246,7 +246,8 @@ future<> auth::auth::setup() {
|
|||||||
std::map<sstring, sstring> opts;
|
std::map<sstring, sstring> opts;
|
||||||
opts["replication_factor"] = "1";
|
opts["replication_factor"] = "1";
|
||||||
auto ksm = keyspace_metadata::new_keyspace(AUTH_KS, "org.apache.cassandra.locator.SimpleStrategy", opts, true);
|
auto ksm = keyspace_metadata::new_keyspace(AUTH_KS, "org.apache.cassandra.locator.SimpleStrategy", opts, true);
|
||||||
f = service::get_local_migration_manager().announce_new_keyspace(ksm, false);
|
// We use min_timestamp so that default keyspace metadata will loose with any manual adjustments. See issue #2129.
|
||||||
|
f = service::get_local_migration_manager().announce_new_keyspace(ksm, api::min_timestamp, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
return f.then([] {
|
return f.then([] {
|
||||||
|
|||||||
@@ -22,13 +22,28 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <boost/intrusive/unordered_set.hpp>
|
#include <boost/intrusive/unordered_set.hpp>
|
||||||
|
|
||||||
|
#if __has_include(<boost/container/small_vector.hpp>)
|
||||||
|
|
||||||
#include <boost/container/small_vector.hpp>
|
#include <boost/container/small_vector.hpp>
|
||||||
|
|
||||||
|
template <typename T, size_t N>
|
||||||
|
using small_vector = boost::container::small_vector<T, N>;
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
#include <vector>
|
||||||
|
template <typename T, size_t N>
|
||||||
|
using small_vector = std::vector<T>;
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
#include "fnv1a_hasher.hh"
|
#include "fnv1a_hasher.hh"
|
||||||
|
#include "streamed_mutation.hh"
|
||||||
#include "mutation_partition.hh"
|
#include "mutation_partition.hh"
|
||||||
|
|
||||||
class cells_range {
|
class cells_range {
|
||||||
using ids_vector_type = boost::container::small_vector<column_id, 5>;
|
using ids_vector_type = small_vector<column_id, 5>;
|
||||||
|
|
||||||
position_in_partition_view _position;
|
position_in_partition_view _position;
|
||||||
ids_vector_type _ids;
|
ids_vector_type _ids;
|
||||||
@@ -147,7 +162,7 @@ class cell_locker {
|
|||||||
// temporarily removed from its parent partition_entry.
|
// temporarily removed from its parent partition_entry.
|
||||||
// Returns true if the cell_entry still exist in the new schema and
|
// Returns true if the cell_entry still exist in the new schema and
|
||||||
// should be reinserted.
|
// should be reinserted.
|
||||||
bool upgrade(const schema& from, const schema& to, column_kind kind) {
|
bool upgrade(const schema& from, const schema& to, column_kind kind) noexcept {
|
||||||
auto& old_column_mapping = from.get_column_mapping();
|
auto& old_column_mapping = from.get_column_mapping();
|
||||||
auto& column = old_column_mapping.column_at(kind, _address.id);
|
auto& column = old_column_mapping.column_at(kind, _address.id);
|
||||||
auto cdef = to.get_column_definition(column.name());
|
auto cdef = to.get_column_definition(column.name());
|
||||||
@@ -170,7 +185,9 @@ class cell_locker {
|
|||||||
}
|
}
|
||||||
|
|
||||||
~cell_entry() {
|
~cell_entry() {
|
||||||
assert(is_linked());
|
if (!is_linked()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
unlink();
|
unlink();
|
||||||
if (!--_parent._cell_count) {
|
if (!--_parent._cell_count) {
|
||||||
delete &_parent;
|
delete &_parent;
|
||||||
@@ -286,10 +303,9 @@ class cell_locker {
|
|||||||
};
|
};
|
||||||
|
|
||||||
class equal_compare {
|
class equal_compare {
|
||||||
schema_ptr _schema;
|
|
||||||
dht::decorated_key_equals_comparator _cmp;
|
dht::decorated_key_equals_comparator _cmp;
|
||||||
public:
|
public:
|
||||||
explicit equal_compare(const schema s) : _cmp(s) { }
|
explicit equal_compare(const schema& s) : _cmp(s) { }
|
||||||
bool operator()(const dht::decorated_key& dk, const partition_entry& pe) {
|
bool operator()(const dht::decorated_key& dk, const partition_entry& pe) {
|
||||||
return _cmp(dk, pe._key);
|
return _cmp(dk, pe._key);
|
||||||
}
|
}
|
||||||
@@ -386,22 +402,19 @@ struct cell_locker::locker {
|
|||||||
|
|
||||||
partition_cells_range _range;
|
partition_cells_range _range;
|
||||||
partition_cells_range::iterator _current_ck;
|
partition_cells_range::iterator _current_ck;
|
||||||
cells_range _cells_range;
|
|
||||||
cells_range::const_iterator _current_cell;
|
cells_range::const_iterator _current_cell;
|
||||||
|
|
||||||
std::vector<locked_cell> _locks;
|
std::vector<locked_cell> _locks;
|
||||||
private:
|
private:
|
||||||
void update_ck() {
|
void update_ck() {
|
||||||
if (!is_done()) {
|
if (!is_done()) {
|
||||||
_cells_range = *_current_ck;
|
_current_cell = _current_ck->begin();
|
||||||
_current_cell = _cells_range.begin();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
future<> lock_next();
|
future<> lock_next();
|
||||||
|
|
||||||
bool is_done() const { return _current_ck == _range.end(); }
|
bool is_done() const { return _current_ck == _range.end(); }
|
||||||
std::vector<locked_cell> get() && { return std::move(_locks); }
|
|
||||||
public:
|
public:
|
||||||
explicit locker(const ::schema& s, partition_entry& pe, partition_cells_range&& range)
|
explicit locker(const ::schema& s, partition_entry& pe, partition_cells_range&& range)
|
||||||
: _hasher(s)
|
: _hasher(s)
|
||||||
@@ -413,18 +426,22 @@ public:
|
|||||||
update_ck();
|
update_ck();
|
||||||
}
|
}
|
||||||
|
|
||||||
future<std::vector<locked_cell>> lock_all() && {
|
locker(const locker&) = delete;
|
||||||
|
locker(locker&&) = delete;
|
||||||
|
|
||||||
|
future<> lock_all() {
|
||||||
// Cannot defer before first call to lock_next().
|
// Cannot defer before first call to lock_next().
|
||||||
return lock_next().then([this] {
|
return lock_next().then([this] {
|
||||||
return do_until([this] { return is_done(); }, [this] {
|
return do_until([this] { return is_done(); }, [this] {
|
||||||
return lock_next();
|
return lock_next();
|
||||||
}).then([&] {
|
|
||||||
return std::move(*this).get();
|
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::vector<locked_cell> get() && { return std::move(_locks); }
|
||||||
};
|
};
|
||||||
|
|
||||||
|
inline
|
||||||
future<std::vector<locked_cell>> cell_locker::lock_cells(const dht::decorated_key& dk, partition_cells_range&& range) {
|
future<std::vector<locked_cell>> cell_locker::lock_cells(const dht::decorated_key& dk, partition_cells_range&& range) {
|
||||||
partition_entry::hasher pe_hash;
|
partition_entry::hasher pe_hash;
|
||||||
partition_entry::equal_compare pe_eq(*_schema);
|
partition_entry::equal_compare pe_eq(*_schema);
|
||||||
@@ -460,14 +477,17 @@ future<std::vector<locked_cell>> cell_locker::lock_cells(const dht::decorated_ke
|
|||||||
return make_ready_future<std::vector<locked_cell>>(std::move(locks));
|
return make_ready_future<std::vector<locked_cell>>(std::move(locks));
|
||||||
}
|
}
|
||||||
|
|
||||||
return do_with(locker(*_schema, *it, std::move(range)), [] (auto& locker) mutable {
|
auto l = std::make_unique<locker>(*_schema, *it, std::move(range));
|
||||||
return std::move(locker).lock_all();
|
auto f = l->lock_all();
|
||||||
|
return f.then([l = std::move(l)] {
|
||||||
|
return std::move(*l).get();
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline
|
||||||
future<> cell_locker::locker::lock_next() {
|
future<> cell_locker::locker::lock_next() {
|
||||||
while (!is_done()) {
|
while (!is_done()) {
|
||||||
if (_current_cell == _cells_range.end() || _cells_range.empty()) {
|
if (_current_cell == _current_ck->end()) {
|
||||||
++_current_ck;
|
++_current_ck;
|
||||||
update_ck();
|
update_ck();
|
||||||
continue;
|
continue;
|
||||||
@@ -475,7 +495,7 @@ future<> cell_locker::locker::lock_next() {
|
|||||||
|
|
||||||
auto cid = *_current_cell++;
|
auto cid = *_current_cell++;
|
||||||
|
|
||||||
cell_address ca { position_in_partition(_cells_range.position()), cid };
|
cell_address ca { position_in_partition(_current_ck->position()), cid };
|
||||||
auto it = _partition_entry.cells().find(ca, _hasher, _eq_cmp);
|
auto it = _partition_entry.cells().find(ca, _hasher, _eq_cmp);
|
||||||
if (it != _partition_entry.cells().end()) {
|
if (it != _partition_entry.cells().end()) {
|
||||||
return it->lock().then([this, ce = it->shared_from_this()] () mutable {
|
return it->lock().then([this, ce = it->shared_from_this()] () mutable {
|
||||||
@@ -483,27 +503,25 @@ future<> cell_locker::locker::lock_next() {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
auto cell = make_lw_shared<cell_entry>(_partition_entry, position_in_partition(_cells_range.position()), cid);
|
auto cell = make_lw_shared<cell_entry>(_partition_entry, position_in_partition(_current_ck->position()), cid);
|
||||||
_partition_entry.insert(cell);
|
_partition_entry.insert(cell);
|
||||||
_locks.emplace_back(std::move(cell));
|
_locks.emplace_back(std::move(cell));
|
||||||
}
|
}
|
||||||
return make_ready_future<>();
|
return make_ready_future<>();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline
|
||||||
bool cell_locker::partition_entry::upgrade(schema_ptr new_schema) {
|
bool cell_locker::partition_entry::upgrade(schema_ptr new_schema) {
|
||||||
if (_schema == new_schema) {
|
if (_schema == new_schema) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto buckets = std::make_unique<cells_type::bucket_type[]>(initial_bucket_count);
|
auto buckets = std::make_unique<cells_type::bucket_type[]>(_cells.bucket_count());
|
||||||
auto cells = cells_type(cells_type::bucket_traits(buckets.get(), _cells.bucket_count()),
|
auto cells = cells_type(cells_type::bucket_traits(buckets.get(), _cells.bucket_count()),
|
||||||
cell_entry::hasher(*new_schema), cell_entry::equal_compare(*new_schema));
|
cell_entry::hasher(*new_schema), cell_entry::equal_compare(*new_schema));
|
||||||
|
|
||||||
while (!_cells.empty()) {
|
_cells.clear_and_dispose([&] (cell_entry* cell_ptr) noexcept {
|
||||||
auto it = _cells.begin();
|
auto& cell = *cell_ptr;
|
||||||
auto& cell = *it;
|
|
||||||
_cells.erase(it);
|
|
||||||
|
|
||||||
auto kind = cell.position().is_static_row() ? column_kind::static_column
|
auto kind = cell.position().is_static_row() ? column_kind::static_column
|
||||||
: column_kind::regular_column;
|
: column_kind::regular_column;
|
||||||
auto reinsert = cell.upgrade(*_schema, *new_schema, kind);
|
auto reinsert = cell.upgrade(*_schema, *new_schema, kind);
|
||||||
@@ -512,9 +530,16 @@ bool cell_locker::partition_entry::upgrade(schema_ptr new_schema) {
|
|||||||
} else {
|
} else {
|
||||||
_cell_count--;
|
_cell_count--;
|
||||||
}
|
}
|
||||||
}
|
});
|
||||||
|
|
||||||
|
// bi::unordered_set move assignment is actually a swap.
|
||||||
|
// Original _buckets cannot be destroyed before the container using them is
|
||||||
|
// so we need to explicitly make sure that the original _cells is no more.
|
||||||
_cells = std::move(cells);
|
_cells = std::move(cells);
|
||||||
|
auto destroy = [] (auto) { };
|
||||||
|
destroy(std::move(cells));
|
||||||
|
|
||||||
_buckets = std::move(buckets);
|
_buckets = std::move(buckets);
|
||||||
|
_schema = new_schema;
|
||||||
return _cell_count;
|
return _cell_count;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -788,3 +788,23 @@ commitlog_total_space_in_mb: -1
|
|||||||
# By default, Scylla binds all interfaces to the prometheus API
|
# By default, Scylla binds all interfaces to the prometheus API
|
||||||
# It is possible to restrict the listening address to a specific one
|
# It is possible to restrict the listening address to a specific one
|
||||||
# prometheus_address: 0.0.0.0
|
# prometheus_address: 0.0.0.0
|
||||||
|
|
||||||
|
# Distribution of data among cores (shards) within a node
|
||||||
|
#
|
||||||
|
# Scylla distributes data within a node among shards, using a round-robin
|
||||||
|
# strategy:
|
||||||
|
# [shard0] [shard1] ... [shardN-1] [shard0] [shard1] ... [shardN-1] ...
|
||||||
|
#
|
||||||
|
# Scylla versions 1.6 and below used just one repetition of the pattern;
|
||||||
|
# this intefered with data placement among nodes (vnodes).
|
||||||
|
#
|
||||||
|
# Scylla versions 1.7 and above use 4096 repetitions of the pattern; this
|
||||||
|
# provides for better data distribution.
|
||||||
|
#
|
||||||
|
# the value below is log (base 2) of the number of repetitions.
|
||||||
|
#
|
||||||
|
# Set to 0 to avoid rewriting all data when upgrading from Scylla 1.6 and
|
||||||
|
# below.
|
||||||
|
#
|
||||||
|
# Keep at 12 for new clusters.
|
||||||
|
murmur3_partitioner_ignore_msb_bits: 12
|
||||||
|
|||||||
@@ -230,6 +230,7 @@ scylla_tests = [
|
|||||||
'tests/virtual_reader_test',
|
'tests/virtual_reader_test',
|
||||||
'tests/view_schema_test',
|
'tests/view_schema_test',
|
||||||
'tests/counter_test',
|
'tests/counter_test',
|
||||||
|
'tests/cell_locker_test',
|
||||||
]
|
]
|
||||||
|
|
||||||
apps = [
|
apps = [
|
||||||
@@ -408,6 +409,7 @@ scylla_core = (['database.cc',
|
|||||||
'cql3/selection/selector.cc',
|
'cql3/selection/selector.cc',
|
||||||
'cql3/restrictions/statement_restrictions.cc',
|
'cql3/restrictions/statement_restrictions.cc',
|
||||||
'cql3/result_set.cc',
|
'cql3/result_set.cc',
|
||||||
|
'cql3/variable_specifications.cc',
|
||||||
'db/consistency_level.cc',
|
'db/consistency_level.cc',
|
||||||
'db/system_keyspace.cc',
|
'db/system_keyspace.cc',
|
||||||
'db/schema_tables.cc',
|
'db/schema_tables.cc',
|
||||||
@@ -628,7 +630,7 @@ deps['tests/sstable_test'] += ['tests/sstable_datafile_test.cc']
|
|||||||
|
|
||||||
deps['tests/bytes_ostream_test'] = ['tests/bytes_ostream_test.cc']
|
deps['tests/bytes_ostream_test'] = ['tests/bytes_ostream_test.cc']
|
||||||
deps['tests/input_stream_test'] = ['tests/input_stream_test.cc']
|
deps['tests/input_stream_test'] = ['tests/input_stream_test.cc']
|
||||||
deps['tests/UUID_test'] = ['utils/UUID_gen.cc', 'tests/UUID_test.cc']
|
deps['tests/UUID_test'] = ['utils/UUID_gen.cc', 'tests/UUID_test.cc', 'utils/uuid.cc']
|
||||||
deps['tests/murmur_hash_test'] = ['bytes.cc', 'utils/murmur_hash.cc', 'tests/murmur_hash_test.cc']
|
deps['tests/murmur_hash_test'] = ['bytes.cc', 'utils/murmur_hash.cc', 'tests/murmur_hash_test.cc']
|
||||||
deps['tests/allocation_strategy_test'] = ['tests/allocation_strategy_test.cc', 'utils/logalloc.cc', 'utils/dynamic_bitset.cc']
|
deps['tests/allocation_strategy_test'] = ['tests/allocation_strategy_test.cc', 'utils/logalloc.cc', 'utils/dynamic_bitset.cc']
|
||||||
deps['tests/anchorless_list_test'] = ['tests/anchorless_list_test.cc']
|
deps['tests/anchorless_list_test'] = ['tests/anchorless_list_test.cc']
|
||||||
|
|||||||
77
counters.cc
77
counters.cc
@@ -29,6 +29,15 @@ counter_id counter_id::local()
|
|||||||
return counter_id(service::get_local_storage_service().get_local_id());
|
return counter_id(service::get_local_storage_service().get_local_id());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool counter_id::less_compare_1_7_4::operator()(const counter_id& a, const counter_id& b) const
|
||||||
|
{
|
||||||
|
if (a._most_significant != b._most_significant) {
|
||||||
|
return a._most_significant < b._most_significant;
|
||||||
|
} else {
|
||||||
|
return a._least_significant < b._least_significant;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
std::ostream& operator<<(std::ostream& os, const counter_id& id) {
|
std::ostream& operator<<(std::ostream& os, const counter_id& id) {
|
||||||
return os << id.to_uuid();
|
return os << id.to_uuid();
|
||||||
}
|
}
|
||||||
@@ -42,6 +51,33 @@ std::ostream& operator<<(std::ostream& os, counter_cell_view ccv) {
|
|||||||
return os << "{counter_cell timestamp: " << ccv.timestamp() << " shards: {" << ::join(", ", ccv.shards()) << "}}";
|
return os << "{counter_cell timestamp: " << ccv.timestamp() << " shards: {" << ::join(", ", ccv.shards()) << "}}";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void counter_cell_builder::do_sort_and_remove_duplicates()
|
||||||
|
{
|
||||||
|
boost::range::sort(_shards, [] (auto& a, auto& b) { return a.id() < b.id(); });
|
||||||
|
|
||||||
|
std::vector<counter_shard> new_shards;
|
||||||
|
new_shards.reserve(_shards.size());
|
||||||
|
for (auto& cs : _shards) {
|
||||||
|
if (new_shards.empty() || new_shards.back().id() != cs.id()) {
|
||||||
|
new_shards.emplace_back(cs);
|
||||||
|
} else {
|
||||||
|
new_shards.back().apply(cs);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_shards = std::move(new_shards);
|
||||||
|
_sorted = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<counter_shard> counter_cell_view::shards_compatible_with_1_7_4() const
|
||||||
|
{
|
||||||
|
auto sorted_shards = boost::copy_range<std::vector<counter_shard>>(shards());
|
||||||
|
counter_id::less_compare_1_7_4 cmp;
|
||||||
|
boost::range::sort(sorted_shards, [&] (auto& a, auto& b) {
|
||||||
|
return cmp(a.id(), b.id());
|
||||||
|
});
|
||||||
|
return sorted_shards;
|
||||||
|
}
|
||||||
|
|
||||||
bool counter_cell_view::apply_reversibly(atomic_cell_or_collection& dst, atomic_cell_or_collection& src)
|
bool counter_cell_view::apply_reversibly(atomic_cell_or_collection& dst, atomic_cell_or_collection& src)
|
||||||
{
|
{
|
||||||
// TODO: optimise for single shard existing in the other
|
// TODO: optimise for single shard existing in the other
|
||||||
@@ -139,8 +175,8 @@ stdx::optional<atomic_cell> counter_cell_view::difference(atomic_cell_view a, at
|
|||||||
void transform_counter_updates_to_shards(mutation& m, const mutation* current_state, uint64_t clock_offset) {
|
void transform_counter_updates_to_shards(mutation& m, const mutation* current_state, uint64_t clock_offset) {
|
||||||
// FIXME: allow current_state to be frozen_mutation
|
// FIXME: allow current_state to be frozen_mutation
|
||||||
|
|
||||||
auto transform_new_row_to_shards = [clock_offset] (auto& cr) {
|
auto transform_new_row_to_shards = [clock_offset] (auto& cells) {
|
||||||
cr.row().cells().for_each_cell([clock_offset] (auto, atomic_cell_or_collection& ac_o_c) {
|
cells.for_each_cell([clock_offset] (auto, atomic_cell_or_collection& ac_o_c) {
|
||||||
auto acv = ac_o_c.as_atomic_cell();
|
auto acv = ac_o_c.as_atomic_cell();
|
||||||
if (!acv.is_live()) {
|
if (!acv.is_live()) {
|
||||||
return; // continue -- we are in lambda
|
return; // continue -- we are in lambda
|
||||||
@@ -153,32 +189,22 @@ void transform_counter_updates_to_shards(mutation& m, const mutation* current_st
|
|||||||
};
|
};
|
||||||
|
|
||||||
if (!current_state) {
|
if (!current_state) {
|
||||||
|
transform_new_row_to_shards(m.partition().static_row());
|
||||||
for (auto& cr : m.partition().clustered_rows()) {
|
for (auto& cr : m.partition().clustered_rows()) {
|
||||||
transform_new_row_to_shards(cr);
|
transform_new_row_to_shards(cr.row().cells());
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
clustering_key::less_compare cmp(*m.schema());
|
clustering_key::less_compare cmp(*m.schema());
|
||||||
|
|
||||||
auto& cstate = current_state->partition();
|
auto transform_row_to_shards = [clock_offset] (auto& transformee, auto& state) {
|
||||||
auto it = cstate.clustered_rows().begin();
|
|
||||||
auto end = cstate.clustered_rows().end();
|
|
||||||
for (auto& cr : m.partition().clustered_rows()) {
|
|
||||||
while (it != end && cmp(it->key(), cr.key())) {
|
|
||||||
++it;
|
|
||||||
}
|
|
||||||
if (it == end || cmp(cr.key(), it->key())) {
|
|
||||||
transform_new_row_to_shards(cr);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
struct counter_shard_or_tombstone {
|
struct counter_shard_or_tombstone {
|
||||||
stdx::optional<counter_shard> shard;
|
stdx::optional<counter_shard> shard;
|
||||||
tombstone tomb;
|
tombstone tomb;
|
||||||
};
|
};
|
||||||
std::deque<std::pair<column_id, counter_shard_or_tombstone>> shards;
|
std::deque<std::pair<column_id, counter_shard_or_tombstone>> shards;
|
||||||
it->row().cells().for_each_cell([&] (column_id id, const atomic_cell_or_collection& ac_o_c) {
|
state.for_each_cell([&] (column_id id, const atomic_cell_or_collection& ac_o_c) {
|
||||||
auto acv = ac_o_c.as_atomic_cell();
|
auto acv = ac_o_c.as_atomic_cell();
|
||||||
if (!acv.is_live()) {
|
if (!acv.is_live()) {
|
||||||
counter_shard_or_tombstone cs_o_t { { },
|
counter_shard_or_tombstone cs_o_t { { },
|
||||||
@@ -194,7 +220,7 @@ void transform_counter_updates_to_shards(mutation& m, const mutation* current_st
|
|||||||
shards.emplace_back(std::make_pair(id, counter_shard_or_tombstone { counter_shard(*cs), tombstone() }));
|
shards.emplace_back(std::make_pair(id, counter_shard_or_tombstone { counter_shard(*cs), tombstone() }));
|
||||||
});
|
});
|
||||||
|
|
||||||
cr.row().cells().for_each_cell([&] (column_id id, atomic_cell_or_collection& ac_o_c) {
|
transformee.for_each_cell([&] (column_id id, atomic_cell_or_collection& ac_o_c) {
|
||||||
auto acv = ac_o_c.as_atomic_cell();
|
auto acv = ac_o_c.as_atomic_cell();
|
||||||
if (!acv.is_live()) {
|
if (!acv.is_live()) {
|
||||||
return; // continue -- we are in lambda
|
return; // continue -- we are in lambda
|
||||||
@@ -224,5 +250,22 @@ void transform_counter_updates_to_shards(mutation& m, const mutation* current_st
|
|||||||
}
|
}
|
||||||
ac_o_c = ccb.build(acv.timestamp());
|
ac_o_c = ccb.build(acv.timestamp());
|
||||||
});
|
});
|
||||||
|
};
|
||||||
|
|
||||||
|
transform_row_to_shards(m.partition().static_row(), current_state->partition().static_row());
|
||||||
|
|
||||||
|
auto& cstate = current_state->partition();
|
||||||
|
auto it = cstate.clustered_rows().begin();
|
||||||
|
auto end = cstate.clustered_rows().end();
|
||||||
|
for (auto& cr : m.partition().clustered_rows()) {
|
||||||
|
while (it != end && cmp(it->key(), cr.key())) {
|
||||||
|
++it;
|
||||||
|
}
|
||||||
|
if (it == end || cmp(cr.key(), it->key())) {
|
||||||
|
transform_new_row_to_shards(cr.row().cells());
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
transform_row_to_shards(cr.row().cells(), it->row().cells());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
68
counters.hh
68
counters.hh
@@ -36,6 +36,10 @@ class counter_id {
|
|||||||
int64_t _least_significant;
|
int64_t _least_significant;
|
||||||
int64_t _most_significant;
|
int64_t _most_significant;
|
||||||
public:
|
public:
|
||||||
|
static_assert(std::is_same<decltype(std::declval<utils::UUID>().get_least_significant_bits()), int64_t>::value
|
||||||
|
&& std::is_same<decltype(std::declval<utils::UUID>().get_most_significant_bits()), int64_t>::value,
|
||||||
|
"utils::UUID is expected to work with two signed 64-bit integers");
|
||||||
|
|
||||||
counter_id() = default;
|
counter_id() = default;
|
||||||
explicit counter_id(utils::UUID uuid) noexcept
|
explicit counter_id(utils::UUID uuid) noexcept
|
||||||
: _least_significant(uuid.get_least_significant_bits())
|
: _least_significant(uuid.get_least_significant_bits())
|
||||||
@@ -49,12 +53,20 @@ public:
|
|||||||
bool operator<(const counter_id& other) const {
|
bool operator<(const counter_id& other) const {
|
||||||
return to_uuid() < other.to_uuid();
|
return to_uuid() < other.to_uuid();
|
||||||
}
|
}
|
||||||
|
bool operator>(const counter_id& other) const {
|
||||||
|
return other.to_uuid() < to_uuid();
|
||||||
|
}
|
||||||
bool operator==(const counter_id& other) const {
|
bool operator==(const counter_id& other) const {
|
||||||
return to_uuid() == other.to_uuid();
|
return to_uuid() == other.to_uuid();
|
||||||
}
|
}
|
||||||
bool operator!=(const counter_id& other) const {
|
bool operator!=(const counter_id& other) const {
|
||||||
return !(*this == other);
|
return !(*this == other);
|
||||||
}
|
}
|
||||||
|
public:
|
||||||
|
// (Wrong) Counter ID ordering used by Scylla 1.7.4 and earlier.
|
||||||
|
struct less_compare_1_7_4 {
|
||||||
|
bool operator()(const counter_id& a, const counter_id& b) const;
|
||||||
|
};
|
||||||
public:
|
public:
|
||||||
static counter_id local();
|
static counter_id local();
|
||||||
|
|
||||||
@@ -94,6 +106,14 @@ public:
|
|||||||
int64_t value() const { return read<int64_t>(offset::value); }
|
int64_t value() const { return read<int64_t>(offset::value); }
|
||||||
int64_t logical_clock() const { return read<int64_t>(offset::logical_clock); }
|
int64_t logical_clock() const { return read<int64_t>(offset::logical_clock); }
|
||||||
|
|
||||||
|
bool operator==(const counter_shard_view& other) const {
|
||||||
|
return id() == other.id() && value() == other.value()
|
||||||
|
&& logical_clock() == other.logical_clock();
|
||||||
|
}
|
||||||
|
bool operator!=(const counter_shard_view& other) const {
|
||||||
|
return !(*this == other);
|
||||||
|
}
|
||||||
|
|
||||||
struct less_compare_by_id {
|
struct less_compare_by_id {
|
||||||
bool operator()(const counter_shard_view& x, const counter_shard_view& y) const {
|
bool operator()(const counter_shard_view& x, const counter_shard_view& y) const {
|
||||||
return x.id() < y.id();
|
return x.id() < y.id();
|
||||||
@@ -112,6 +132,18 @@ private:
|
|||||||
static void write(const T& value, bytes::iterator& out) {
|
static void write(const T& value, bytes::iterator& out) {
|
||||||
out = std::copy_n(reinterpret_cast<const char*>(&value), sizeof(T), out);
|
out = std::copy_n(reinterpret_cast<const char*>(&value), sizeof(T), out);
|
||||||
}
|
}
|
||||||
|
private:
|
||||||
|
// Shared logic for applying counter_shards and counter_shard_views.
|
||||||
|
// T is either counter_shard or basic_counter_shard_view<U>.
|
||||||
|
template<typename T>
|
||||||
|
counter_shard& do_apply(T&& other) noexcept {
|
||||||
|
auto other_clock = other.logical_clock();
|
||||||
|
if (_logical_clock < other_clock) {
|
||||||
|
_logical_clock = other_clock;
|
||||||
|
_value = other.value();
|
||||||
|
}
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
public:
|
public:
|
||||||
counter_shard(counter_id id, int64_t value, int64_t logical_clock) noexcept
|
counter_shard(counter_id id, int64_t value, int64_t logical_clock) noexcept
|
||||||
: _id(id)
|
: _id(id)
|
||||||
@@ -136,12 +168,11 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
counter_shard& apply(counter_shard_view other) noexcept {
|
counter_shard& apply(counter_shard_view other) noexcept {
|
||||||
auto other_clock = other.logical_clock();
|
return do_apply(other);
|
||||||
if (_logical_clock < other_clock) {
|
}
|
||||||
_logical_clock = other_clock;
|
|
||||||
_value = other.value();
|
counter_shard& apply(const counter_shard& other) noexcept {
|
||||||
}
|
return do_apply(other);
|
||||||
return *this;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static size_t serialized_size() {
|
static size_t serialized_size() {
|
||||||
@@ -156,6 +187,9 @@ public:
|
|||||||
|
|
||||||
class counter_cell_builder {
|
class counter_cell_builder {
|
||||||
std::vector<counter_shard> _shards;
|
std::vector<counter_shard> _shards;
|
||||||
|
bool _sorted = true;
|
||||||
|
private:
|
||||||
|
void do_sort_and_remove_duplicates();
|
||||||
public:
|
public:
|
||||||
counter_cell_builder() = default;
|
counter_cell_builder() = default;
|
||||||
counter_cell_builder(size_t shard_count) {
|
counter_cell_builder(size_t shard_count) {
|
||||||
@@ -166,6 +200,21 @@ public:
|
|||||||
_shards.emplace_back(cs);
|
_shards.emplace_back(cs);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void add_maybe_unsorted_shard(const counter_shard& cs) {
|
||||||
|
add_shard(cs);
|
||||||
|
if (_sorted && _shards.size() > 1) {
|
||||||
|
auto current = _shards.rbegin();
|
||||||
|
auto previous = std::next(current);
|
||||||
|
_sorted = current->id() > previous->id();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void sort_and_remove_duplicates() {
|
||||||
|
if (!_sorted) {
|
||||||
|
do_sort_and_remove_duplicates();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
size_t serialized_size() const {
|
size_t serialized_size() const {
|
||||||
return _shards.size() * counter_shard::serialized_size();
|
return _shards.size() * counter_shard::serialized_size();
|
||||||
}
|
}
|
||||||
@@ -287,6 +336,13 @@ public:
|
|||||||
return get_shard(counter_id::local());
|
return get_shard(counter_id::local());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool operator==(const counter_cell_view& other) const {
|
||||||
|
return timestamp() == other.timestamp() && boost::equal(shards(), other.shards());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Returns counter shards in an order that is compatible with Scylla 1.7.4.
|
||||||
|
std::vector<counter_shard> shards_compatible_with_1_7_4() const;
|
||||||
|
|
||||||
// Reversibly applies two counter cells, at least one of them must be live.
|
// Reversibly applies two counter cells, at least one of them must be live.
|
||||||
// Returns true iff dst was modified.
|
// Returns true iff dst was modified.
|
||||||
static bool apply_reversibly(atomic_cell_or_collection& dst, atomic_cell_or_collection& src);
|
static bool apply_reversibly(atomic_cell_or_collection& dst, atomic_cell_or_collection& src);
|
||||||
|
|||||||
@@ -1548,6 +1548,8 @@ basic_unreserved_keyword returns [sstring str]
|
|||||||
| K_DISTINCT
|
| K_DISTINCT
|
||||||
| K_CONTAINS
|
| K_CONTAINS
|
||||||
| K_STATIC
|
| K_STATIC
|
||||||
|
| K_FROZEN
|
||||||
|
| K_TUPLE
|
||||||
| K_FUNCTION
|
| K_FUNCTION
|
||||||
| K_AGGREGATE
|
| K_AGGREGATE
|
||||||
| K_SFUNC
|
| K_SFUNC
|
||||||
|
|||||||
@@ -67,6 +67,14 @@ functions::init() {
|
|||||||
declare(aggregate_fcts::make_max_function<int64_t>());
|
declare(aggregate_fcts::make_max_function<int64_t>());
|
||||||
declare(aggregate_fcts::make_min_function<int64_t>());
|
declare(aggregate_fcts::make_min_function<int64_t>());
|
||||||
|
|
||||||
|
declare(aggregate_fcts::make_count_function<float>());
|
||||||
|
declare(aggregate_fcts::make_max_function<float>());
|
||||||
|
declare(aggregate_fcts::make_min_function<float>());
|
||||||
|
|
||||||
|
declare(aggregate_fcts::make_count_function<double>());
|
||||||
|
declare(aggregate_fcts::make_max_function<double>());
|
||||||
|
declare(aggregate_fcts::make_min_function<double>());
|
||||||
|
|
||||||
//FIXME:
|
//FIXME:
|
||||||
//declare(aggregate_fcts::make_count_function<bytes>());
|
//declare(aggregate_fcts::make_count_function<bytes>());
|
||||||
//declare(aggregate_fcts::make_max_function<bytes>());
|
//declare(aggregate_fcts::make_max_function<bytes>());
|
||||||
@@ -78,15 +86,17 @@ functions::init() {
|
|||||||
declare(make_blob_as_varchar_fct());
|
declare(make_blob_as_varchar_fct());
|
||||||
declare(aggregate_fcts::make_sum_function<int32_t>());
|
declare(aggregate_fcts::make_sum_function<int32_t>());
|
||||||
declare(aggregate_fcts::make_sum_function<int64_t>());
|
declare(aggregate_fcts::make_sum_function<int64_t>());
|
||||||
declare(aggregate_fcts::make_avg_function<int32_t>());
|
declare(aggregate_fcts::make_sum_function<float>());
|
||||||
declare(aggregate_fcts::make_avg_function<int64_t>());
|
declare(aggregate_fcts::make_sum_function<double>());
|
||||||
#if 0
|
#if 0
|
||||||
declare(AggregateFcts.sumFunctionForFloat);
|
|
||||||
declare(AggregateFcts.sumFunctionForDouble);
|
|
||||||
declare(AggregateFcts.sumFunctionForDecimal);
|
declare(AggregateFcts.sumFunctionForDecimal);
|
||||||
declare(AggregateFcts.sumFunctionForVarint);
|
declare(AggregateFcts.sumFunctionForVarint);
|
||||||
declare(AggregateFcts.avgFunctionForFloat);
|
#endif
|
||||||
declare(AggregateFcts.avgFunctionForDouble);
|
declare(aggregate_fcts::make_avg_function<int32_t>());
|
||||||
|
declare(aggregate_fcts::make_avg_function<int64_t>());
|
||||||
|
declare(aggregate_fcts::make_avg_function<float>());
|
||||||
|
declare(aggregate_fcts::make_avg_function<double>());
|
||||||
|
#if 0
|
||||||
declare(AggregateFcts.avgFunctionForVarint);
|
declare(AggregateFcts.avgFunctionForVarint);
|
||||||
declare(AggregateFcts.avgFunctionForDecimal);
|
declare(AggregateFcts.avgFunctionForDecimal);
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -115,7 +115,7 @@ public:
|
|||||||
if (restriction->is_slice()) {
|
if (restriction->is_slice()) {
|
||||||
throw exceptions::invalid_request_exception(sprint(
|
throw exceptions::invalid_request_exception(sprint(
|
||||||
"PRIMARY KEY column \"%s\" cannot be restricted (preceding column \"%s\" is restricted by a non-EQ relation)",
|
"PRIMARY KEY column \"%s\" cannot be restricted (preceding column \"%s\" is restricted by a non-EQ relation)",
|
||||||
_restrictions->next_column(new_column)->name_as_text(), new_column.name_as_text()));
|
last_column.name_as_text(), new_column.name_as_text()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -43,6 +43,7 @@
|
|||||||
#include "schema_builder.hh"
|
#include "schema_builder.hh"
|
||||||
#include "service/migration_manager.hh"
|
#include "service/migration_manager.hh"
|
||||||
#include "boost/range/adaptor/map.hpp"
|
#include "boost/range/adaptor/map.hpp"
|
||||||
|
#include "stdx.hh"
|
||||||
|
|
||||||
namespace cql3 {
|
namespace cql3 {
|
||||||
|
|
||||||
@@ -86,14 +87,14 @@ const sstring& alter_type_statement::keyspace() const
|
|||||||
return _name.get_keyspace();
|
return _name.get_keyspace();
|
||||||
}
|
}
|
||||||
|
|
||||||
static int32_t get_idx_of_field(user_type type, shared_ptr<column_identifier> field)
|
static stdx::optional<uint32_t> get_idx_of_field(user_type type, shared_ptr<column_identifier> field)
|
||||||
{
|
{
|
||||||
for (uint32_t i = 0; i < type->field_names().size(); ++i) {
|
for (uint32_t i = 0; i < type->field_names().size(); ++i) {
|
||||||
if (field->name() == type->field_names()[i]) {
|
if (field->name() == type->field_names()[i]) {
|
||||||
return i;
|
return {i};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return -1;
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
void alter_type_statement::do_announce_migration(database& db, ::keyspace& ks, bool is_local_only)
|
void alter_type_statement::do_announce_migration(database& db, ::keyspace& ks, bool is_local_only)
|
||||||
@@ -168,7 +169,7 @@ alter_type_statement::add_or_alter::add_or_alter(const ut_name& name, bool is_ad
|
|||||||
|
|
||||||
user_type alter_type_statement::add_or_alter::do_add(database& db, user_type to_update) const
|
user_type alter_type_statement::add_or_alter::do_add(database& db, user_type to_update) const
|
||||||
{
|
{
|
||||||
if (get_idx_of_field(to_update, _field_name) >= 0) {
|
if (get_idx_of_field(to_update, _field_name)) {
|
||||||
throw exceptions::invalid_request_exception(sprint("Cannot add new field %s to type %s: a field of the same name already exists", _field_name->name(), _name.to_string()));
|
throw exceptions::invalid_request_exception(sprint("Cannot add new field %s to type %s: a field of the same name already exists", _field_name->name(), _name.to_string()));
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -185,19 +186,19 @@ user_type alter_type_statement::add_or_alter::do_add(database& db, user_type to_
|
|||||||
|
|
||||||
user_type alter_type_statement::add_or_alter::do_alter(database& db, user_type to_update) const
|
user_type alter_type_statement::add_or_alter::do_alter(database& db, user_type to_update) const
|
||||||
{
|
{
|
||||||
uint32_t idx = get_idx_of_field(to_update, _field_name);
|
stdx::optional<uint32_t> idx = get_idx_of_field(to_update, _field_name);
|
||||||
if (idx < 0) {
|
if (!idx) {
|
||||||
throw exceptions::invalid_request_exception(sprint("Unknown field %s in type %s", _field_name->name(), _name.to_string()));
|
throw exceptions::invalid_request_exception(sprint("Unknown field %s in type %s", _field_name->name(), _name.to_string()));
|
||||||
}
|
}
|
||||||
|
|
||||||
auto previous = to_update->field_types()[idx];
|
auto previous = to_update->field_types()[*idx];
|
||||||
auto new_type = _field_type->prepare(db, keyspace())->get_type();
|
auto new_type = _field_type->prepare(db, keyspace())->get_type();
|
||||||
if (!new_type->is_compatible_with(*previous)) {
|
if (!new_type->is_compatible_with(*previous)) {
|
||||||
throw exceptions::invalid_request_exception(sprint("Type %s in incompatible with previous type %s of field %s in user type %s", _field_type->to_string(), previous->as_cql3_type()->to_string(), _field_name->name(), _name.to_string()));
|
throw exceptions::invalid_request_exception(sprint("Type %s in incompatible with previous type %s of field %s in user type %s", _field_type->to_string(), previous->as_cql3_type()->to_string(), _field_name->name(), _name.to_string()));
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<data_type> new_types(to_update->field_types());
|
std::vector<data_type> new_types(to_update->field_types());
|
||||||
new_types[idx] = new_type;
|
new_types[*idx] = new_type;
|
||||||
return user_type_impl::get_instance(to_update->_keyspace, to_update->_name, to_update->field_names(), std::move(new_types));
|
return user_type_impl::get_instance(to_update->_keyspace, to_update->_name, to_update->field_names(), std::move(new_types));
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -221,11 +222,11 @@ user_type alter_type_statement::renames::make_updated_type(database& db, user_ty
|
|||||||
std::vector<bytes> new_names(to_update->field_names());
|
std::vector<bytes> new_names(to_update->field_names());
|
||||||
for (auto&& rename : _renames) {
|
for (auto&& rename : _renames) {
|
||||||
auto&& from = rename.first;
|
auto&& from = rename.first;
|
||||||
int32_t idx = get_idx_of_field(to_update, from);
|
stdx::optional<uint32_t> idx = get_idx_of_field(to_update, from);
|
||||||
if (idx < 0) {
|
if (!idx) {
|
||||||
throw exceptions::invalid_request_exception(sprint("Unknown field %s in type %s", from->to_string(), _name.to_string()));
|
throw exceptions::invalid_request_exception(sprint("Unknown field %s in type %s", from->to_string(), _name.to_string()));
|
||||||
}
|
}
|
||||||
new_names[idx] = rename.second->name();
|
new_names[*idx] = rename.second->name();
|
||||||
}
|
}
|
||||||
auto&& updated = user_type_impl::get_instance(to_update->_keyspace, to_update->_name, std::move(new_names), to_update->field_types());
|
auto&& updated = user_type_impl::get_instance(to_update->_keyspace, to_update->_name, std::move(new_names), to_update->field_types());
|
||||||
create_type_statement::check_for_duplicate_names(updated);
|
create_type_statement::check_for_duplicate_names(updated);
|
||||||
|
|||||||
@@ -381,8 +381,18 @@ shared_ptr<prepared_statement>
|
|||||||
batch_statement::prepare(database& db, cql_stats& stats) {
|
batch_statement::prepare(database& db, cql_stats& stats) {
|
||||||
auto&& bound_names = get_bound_variables();
|
auto&& bound_names = get_bound_variables();
|
||||||
|
|
||||||
|
stdx::optional<sstring> first_ks;
|
||||||
|
stdx::optional<sstring> first_cf;
|
||||||
|
bool have_multiple_cfs = false;
|
||||||
|
|
||||||
std::vector<shared_ptr<cql3::statements::modification_statement>> statements;
|
std::vector<shared_ptr<cql3::statements::modification_statement>> statements;
|
||||||
for (auto&& parsed : _parsed_statements) {
|
for (auto&& parsed : _parsed_statements) {
|
||||||
|
if (!first_ks) {
|
||||||
|
first_ks = parsed->keyspace();
|
||||||
|
first_cf = parsed->column_family();
|
||||||
|
} else {
|
||||||
|
have_multiple_cfs = first_ks.value() != parsed->keyspace() || first_cf.value() != parsed->column_family();
|
||||||
|
}
|
||||||
statements.push_back(parsed->prepare(db, bound_names, stats));
|
statements.push_back(parsed->prepare(db, bound_names, stats));
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -392,8 +402,13 @@ batch_statement::prepare(database& db, cql_stats& stats) {
|
|||||||
cql3::statements::batch_statement batch_statement_(bound_names->size(), _type, std::move(statements), std::move(prep_attrs), stats);
|
cql3::statements::batch_statement batch_statement_(bound_names->size(), _type, std::move(statements), std::move(prep_attrs), stats);
|
||||||
batch_statement_.validate();
|
batch_statement_.validate();
|
||||||
|
|
||||||
|
std::vector<uint16_t> partition_key_bind_indices;
|
||||||
|
if (!have_multiple_cfs && batch_statement_.get_statements().size() > 0) {
|
||||||
|
partition_key_bind_indices = bound_names->get_partition_key_bind_indexes(batch_statement_.get_statements()[0]->s);
|
||||||
|
}
|
||||||
return ::make_shared<prepared>(make_shared(std::move(batch_statement_)),
|
return ::make_shared<prepared>(make_shared(std::move(batch_statement_)),
|
||||||
bound_names->get_specifications());
|
bound_names->get_specifications(),
|
||||||
|
std::move(partition_key_bind_indices));
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -79,6 +79,57 @@ void drop_type_statement::validate(distributed<service::storage_proxy>& proxy, c
|
|||||||
throw exceptions::invalid_request_exception(sprint("No user type named %s exists.", _name.to_string()));
|
throw exceptions::invalid_request_exception(sprint("No user type named %s exists.", _name.to_string()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// We don't want to drop a type unless it's not used anymore (mainly because
|
||||||
|
// if someone drops a type and recreates one with the same name but different
|
||||||
|
// definition with the previous name still in use, things can get messy).
|
||||||
|
// We have two places to check: 1) other user type that can nest the one
|
||||||
|
// we drop and 2) existing tables referencing the type (maybe in a nested
|
||||||
|
// way).
|
||||||
|
|
||||||
|
// This code is moved from schema_keyspace (akin to origin) because we cannot
|
||||||
|
// delay this check to until after we've applied the mutations. If a type or
|
||||||
|
// table references the type we're dropping, we will a.) get exceptions parsing
|
||||||
|
// (can be translated to invalid_request, but...) and more importantly b.)
|
||||||
|
// we will leave those types/tables in a broken state.
|
||||||
|
// We managed to get through this before because we neither enforced hard
|
||||||
|
// cross reference between types when loading them, nor did we in fact
|
||||||
|
// probably ever run the scenario of dropping a referenced type and then
|
||||||
|
// actually using the referee.
|
||||||
|
//
|
||||||
|
// Now, this has a giant flaw. We are succeptible to race conditions here,
|
||||||
|
// since we could have a drop at the same time as a create type that references
|
||||||
|
// the dropped one, but we complete the check before the create is done,
|
||||||
|
// yet apply the drop mutations after -> inconsistent data!
|
||||||
|
// This problem is the same in origin, and I see no good way around it
|
||||||
|
// as long as the atomicity of schema modifications are based on
|
||||||
|
// actual appy of mutations, because unlike other drops, this one isn't
|
||||||
|
// benevolent.
|
||||||
|
// I guess this is one case where user need beware, and don't mess with types
|
||||||
|
// concurrently!
|
||||||
|
|
||||||
|
auto&& type = old->second;
|
||||||
|
auto&& keyspace = type->_keyspace;
|
||||||
|
auto&& name = type->_name;
|
||||||
|
|
||||||
|
for (auto&& ut : all_types | boost::adaptors::map_values) {
|
||||||
|
if (ut->_keyspace == keyspace && ut->_name == name) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ut->references_user_type(keyspace, name)) {
|
||||||
|
throw exceptions::invalid_request_exception(sprint("Cannot drop user type %s.%s as it is still used by user type %s", keyspace, type->get_name_as_string(), ut->get_name_as_string()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (auto&& cfm : ks.metadata()->cf_meta_data() | boost::adaptors::map_values) {
|
||||||
|
for (auto&& col : cfm->all_columns()) {
|
||||||
|
if (col.second->type->references_user_type(keyspace, name)) {
|
||||||
|
throw exceptions::invalid_request_exception(sprint("Cannot drop user type %s.%s as it is still used by table %s.%s", keyspace, type->get_name_as_string(), cfm->ks_name(), cfm->cf_name()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
} catch (no_such_keyspace& e) {
|
} catch (no_such_keyspace& e) {
|
||||||
throw exceptions::invalid_request_exception(sprint("Cannot drop type in unknown keyspace %s", keyspace()));
|
throw exceptions::invalid_request_exception(sprint("Cannot drop type in unknown keyspace %s", keyspace()));
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -597,9 +597,11 @@ namespace raw {
|
|||||||
|
|
||||||
::shared_ptr<prepared_statement>
|
::shared_ptr<prepared_statement>
|
||||||
modification_statement::modification_statement::prepare(database& db, cql_stats& stats) {
|
modification_statement::modification_statement::prepare(database& db, cql_stats& stats) {
|
||||||
|
schema_ptr schema = validation::validate_column_family(db, keyspace(), column_family());
|
||||||
auto bound_names = get_bound_variables();
|
auto bound_names = get_bound_variables();
|
||||||
auto statement = prepare(db, bound_names, stats);
|
auto statement = prepare(db, bound_names, stats);
|
||||||
return ::make_shared<prepared>(std::move(statement), *bound_names);
|
auto partition_key_bind_indices = bound_names->get_partition_key_bind_indexes(schema);
|
||||||
|
return ::make_shared<prepared>(std::move(statement), *bound_names, std::move(partition_key_bind_indices));
|
||||||
}
|
}
|
||||||
|
|
||||||
::shared_ptr<cql3::statements::modification_statement>
|
::shared_ptr<cql3::statements::modification_statement>
|
||||||
|
|||||||
@@ -67,21 +67,22 @@ bool parsed_statement::uses_function(const sstring& ks_name, const sstring& func
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
prepared_statement::prepared_statement(::shared_ptr<cql_statement> statement_, std::vector<::shared_ptr<column_specification>> bound_names_)
|
prepared_statement::prepared_statement(::shared_ptr<cql_statement> statement_, std::vector<::shared_ptr<column_specification>> bound_names_, std::vector<uint16_t> partition_key_bind_indices)
|
||||||
: statement(std::move(statement_))
|
: statement(std::move(statement_))
|
||||||
, bound_names(std::move(bound_names_))
|
, bound_names(std::move(bound_names_))
|
||||||
|
, partition_key_bind_indices(std::move(partition_key_bind_indices))
|
||||||
{ }
|
{ }
|
||||||
|
|
||||||
prepared_statement::prepared_statement(::shared_ptr<cql_statement> statement_, const variable_specifications& names)
|
prepared_statement::prepared_statement(::shared_ptr<cql_statement> statement_, const variable_specifications& names, const std::vector<uint16_t>& partition_key_bind_indices)
|
||||||
: prepared_statement(statement_, names.get_specifications())
|
: prepared_statement(statement_, names.get_specifications(), partition_key_bind_indices)
|
||||||
{ }
|
{ }
|
||||||
|
|
||||||
prepared_statement::prepared_statement(::shared_ptr<cql_statement> statement_, variable_specifications&& names)
|
prepared_statement::prepared_statement(::shared_ptr<cql_statement> statement_, variable_specifications&& names, std::vector<uint16_t>&& partition_key_bind_indices)
|
||||||
: prepared_statement(statement_, std::move(names).get_specifications())
|
: prepared_statement(statement_, std::move(names).get_specifications(), std::move(partition_key_bind_indices))
|
||||||
{ }
|
{ }
|
||||||
|
|
||||||
prepared_statement::prepared_statement(::shared_ptr<cql_statement>&& statement_)
|
prepared_statement::prepared_statement(::shared_ptr<cql_statement>&& statement_)
|
||||||
: prepared_statement(statement_, std::vector<::shared_ptr<column_specification>>())
|
: prepared_statement(statement_, std::vector<::shared_ptr<column_specification>>(), std::vector<uint16_t>())
|
||||||
{ }
|
{ }
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -60,12 +60,13 @@ public:
|
|||||||
sstring raw_cql_statement;
|
sstring raw_cql_statement;
|
||||||
const ::shared_ptr<cql_statement> statement;
|
const ::shared_ptr<cql_statement> statement;
|
||||||
const std::vector<::shared_ptr<column_specification>> bound_names;
|
const std::vector<::shared_ptr<column_specification>> bound_names;
|
||||||
|
std::vector<uint16_t> partition_key_bind_indices;
|
||||||
|
|
||||||
prepared_statement(::shared_ptr<cql_statement> statement_, std::vector<::shared_ptr<column_specification>> bound_names_);
|
prepared_statement(::shared_ptr<cql_statement> statement_, std::vector<::shared_ptr<column_specification>> bound_names_, std::vector<uint16_t> partition_key_bind_indices);
|
||||||
|
|
||||||
prepared_statement(::shared_ptr<cql_statement> statement_, const variable_specifications& names);
|
prepared_statement(::shared_ptr<cql_statement> statement_, const variable_specifications& names, const std::vector<uint16_t>& partition_key_bind_indices);
|
||||||
|
|
||||||
prepared_statement(::shared_ptr<cql_statement> statement_, variable_specifications&& names);
|
prepared_statement(::shared_ptr<cql_statement> statement_, variable_specifications&& names, std::vector<uint16_t>&& partition_key_bind_indices);
|
||||||
|
|
||||||
prepared_statement(::shared_ptr<cql_statement>&& statement_);
|
prepared_statement(::shared_ptr<cql_statement>&& statement_);
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -445,7 +445,9 @@ select_statement::select_statement(::shared_ptr<cf_name> cf_name,
|
|||||||
prepare_limit(db, bound_names),
|
prepare_limit(db, bound_names),
|
||||||
stats);
|
stats);
|
||||||
|
|
||||||
return ::make_shared<prepared>(std::move(stmt), std::move(*bound_names));
|
auto partition_key_bind_indices = bound_names->get_partition_key_bind_indexes(schema);
|
||||||
|
|
||||||
|
return ::make_shared<prepared>(std::move(stmt), std::move(*bound_names), std::move(partition_key_bind_indices));
|
||||||
}
|
}
|
||||||
|
|
||||||
::shared_ptr<restrictions::statement_restrictions>
|
::shared_ptr<restrictions::statement_restrictions>
|
||||||
|
|||||||
98
cql3/variable_specifications.cc
Normal file
98
cql3/variable_specifications.cc
Normal file
@@ -0,0 +1,98 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2015 ScyllaDB
|
||||||
|
*
|
||||||
|
* Modified by ScyllaDB
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This file is part of Scylla.
|
||||||
|
*
|
||||||
|
* Scylla is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU Affero General Public License as published by
|
||||||
|
* the Free Software Foundation, either version 3 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* Scylla is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "cql3/variable_specifications.hh"
|
||||||
|
|
||||||
|
namespace cql3 {
|
||||||
|
|
||||||
|
variable_specifications::variable_specifications(const std::vector<::shared_ptr<column_identifier>>& variable_names)
|
||||||
|
: _variable_names{variable_names}
|
||||||
|
, _specs{variable_names.size()}
|
||||||
|
, _target_columns{variable_names.size()}
|
||||||
|
{ }
|
||||||
|
|
||||||
|
::shared_ptr<variable_specifications> variable_specifications::empty() {
|
||||||
|
return ::make_shared<variable_specifications>(std::vector<::shared_ptr<column_identifier>>{});
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t variable_specifications::size() const {
|
||||||
|
return _variable_names.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<::shared_ptr<column_specification>> variable_specifications::get_specifications() const & {
|
||||||
|
return std::vector<::shared_ptr<column_specification>>(_specs.begin(), _specs.end());
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<::shared_ptr<column_specification>> variable_specifications::get_specifications() && {
|
||||||
|
return std::move(_specs);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<uint16_t> variable_specifications::get_partition_key_bind_indexes(schema_ptr schema) const {
|
||||||
|
auto count = schema->partition_key_columns().size();
|
||||||
|
std::vector<uint16_t> partition_key_positions(count, uint16_t(0));
|
||||||
|
std::vector<bool> set(count, false);
|
||||||
|
for (size_t i = 0; i < _target_columns.size(); i++) {
|
||||||
|
auto& target_column = _target_columns[i];
|
||||||
|
const auto* cdef = schema->get_column_definition(target_column->name->name());
|
||||||
|
if (cdef && cdef->is_partition_key()) {
|
||||||
|
partition_key_positions[cdef->position()] = i;
|
||||||
|
set[cdef->position()] = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (bool b : set) {
|
||||||
|
if (!b) {
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return partition_key_positions;
|
||||||
|
}
|
||||||
|
|
||||||
|
void variable_specifications::add(int32_t bind_index, ::shared_ptr<column_specification> spec) {
|
||||||
|
_target_columns[bind_index] = spec;
|
||||||
|
auto name = _variable_names[bind_index];
|
||||||
|
// Use the user name, if there is one
|
||||||
|
if (name) {
|
||||||
|
spec = ::make_shared<column_specification>(spec->ks_name, spec->cf_name, name, spec->type);
|
||||||
|
}
|
||||||
|
_specs[bind_index] = spec;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
@@ -53,41 +53,26 @@ class variable_specifications final {
|
|||||||
private:
|
private:
|
||||||
std::vector<shared_ptr<column_identifier>> _variable_names;
|
std::vector<shared_ptr<column_identifier>> _variable_names;
|
||||||
std::vector<::shared_ptr<column_specification>> _specs;
|
std::vector<::shared_ptr<column_specification>> _specs;
|
||||||
|
std::vector<::shared_ptr<column_specification>> _target_columns;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
variable_specifications(const std::vector<::shared_ptr<column_identifier>>& variable_names)
|
variable_specifications(const std::vector<::shared_ptr<column_identifier>>& variable_names);
|
||||||
: _variable_names{variable_names}
|
|
||||||
, _specs{variable_names.size()}
|
|
||||||
{ }
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns an empty instance of <code>VariableSpecifications</code>.
|
* Returns an empty instance of <code>VariableSpecifications</code>.
|
||||||
* @return an empty instance of <code>VariableSpecifications</code>
|
* @return an empty instance of <code>VariableSpecifications</code>
|
||||||
*/
|
*/
|
||||||
static ::shared_ptr<variable_specifications> empty() {
|
static ::shared_ptr<variable_specifications> empty();
|
||||||
return ::make_shared<variable_specifications>(std::vector<::shared_ptr<column_identifier>>{});
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t size() const {
|
size_t size() const;
|
||||||
return _variable_names.size();
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<::shared_ptr<column_specification>> get_specifications() const & {
|
std::vector<::shared_ptr<column_specification>> get_specifications() const &;
|
||||||
return std::vector<::shared_ptr<column_specification>>(_specs.begin(), _specs.end());
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<::shared_ptr<column_specification>> get_specifications() && {
|
std::vector<::shared_ptr<column_specification>> get_specifications() &&;
|
||||||
return std::move(_specs);
|
|
||||||
}
|
|
||||||
|
|
||||||
void add(int32_t bind_index, ::shared_ptr<column_specification> spec) {
|
std::vector<uint16_t> get_partition_key_bind_indexes(schema_ptr schema) const;
|
||||||
auto name = _variable_names[bind_index];
|
|
||||||
// Use the user name, if there is one
|
void add(int32_t bind_index, ::shared_ptr<column_specification> spec);
|
||||||
if (name) {
|
|
||||||
spec = ::make_shared<column_specification>(spec->ks_name, spec->cf_name, name, spec->type);
|
|
||||||
}
|
|
||||||
_specs[bind_index] = spec;
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
130
database.cc
130
database.cc
@@ -165,8 +165,9 @@ column_family::sstables_as_mutation_source() {
|
|||||||
const dht::partition_range& r,
|
const dht::partition_range& r,
|
||||||
const query::partition_slice& slice,
|
const query::partition_slice& slice,
|
||||||
const io_priority_class& pc,
|
const io_priority_class& pc,
|
||||||
tracing::trace_state_ptr trace_state) {
|
tracing::trace_state_ptr trace_state,
|
||||||
return make_sstable_reader(std::move(s), r, slice, pc, std::move(trace_state));
|
mutation_reader::forwarding fwd_mr) {
|
||||||
|
return make_sstable_reader(std::move(s), r, slice, pc, std::move(trace_state), fwd_mr);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -364,12 +365,13 @@ class range_sstable_reader final : public combined_mutation_reader {
|
|||||||
const io_priority_class& _pc;
|
const io_priority_class& _pc;
|
||||||
tracing::trace_state_ptr _trace_state;
|
tracing::trace_state_ptr _trace_state;
|
||||||
const query::partition_slice& _slice;
|
const query::partition_slice& _slice;
|
||||||
|
mutation_reader::forwarding _fwd_mr;
|
||||||
private:
|
private:
|
||||||
std::unique_ptr<mutation_reader> create_reader(sstables::shared_sstable sst) {
|
std::unique_ptr<mutation_reader> create_reader(sstables::shared_sstable sst) {
|
||||||
tracing::trace(_trace_state, "Reading partition range {} from sstable {}", *_pr, seastar::value_of([&sst] { return sst->get_filename(); }));
|
tracing::trace(_trace_state, "Reading partition range {} from sstable {}", *_pr, seastar::value_of([&sst] { return sst->get_filename(); }));
|
||||||
// FIXME: make sstable::read_range_rows() return ::mutation_reader so that we can drop this wrapper.
|
// FIXME: make sstable::read_range_rows() return ::mutation_reader so that we can drop this wrapper.
|
||||||
mutation_reader reader =
|
mutation_reader reader =
|
||||||
make_mutation_reader<sstable_range_wrapping_reader>(sst, _s, *_pr, _slice, _pc);
|
make_mutation_reader<sstable_range_wrapping_reader>(sst, _s, *_pr, _slice, _pc, _fwd_mr);
|
||||||
if (sst->is_shared()) {
|
if (sst->is_shared()) {
|
||||||
reader = make_filtering_reader(std::move(reader), belongs_to_current_shard);
|
reader = make_filtering_reader(std::move(reader), belongs_to_current_shard);
|
||||||
}
|
}
|
||||||
@@ -381,13 +383,15 @@ public:
|
|||||||
const dht::partition_range& pr,
|
const dht::partition_range& pr,
|
||||||
const query::partition_slice& slice,
|
const query::partition_slice& slice,
|
||||||
const io_priority_class& pc,
|
const io_priority_class& pc,
|
||||||
tracing::trace_state_ptr trace_state)
|
tracing::trace_state_ptr trace_state,
|
||||||
|
mutation_reader::forwarding fwd_mr)
|
||||||
: _s(s)
|
: _s(s)
|
||||||
, _pr(&pr)
|
, _pr(&pr)
|
||||||
, _sstables(std::move(sstables))
|
, _sstables(std::move(sstables))
|
||||||
, _pc(pc)
|
, _pc(pc)
|
||||||
, _trace_state(std::move(trace_state))
|
, _trace_state(std::move(trace_state))
|
||||||
, _slice(slice)
|
, _slice(slice)
|
||||||
|
, _fwd_mr(fwd_mr)
|
||||||
{
|
{
|
||||||
auto ssts = _sstables->select(pr);
|
auto ssts = _sstables->select(pr);
|
||||||
std::vector<mutation_reader*> readers;
|
std::vector<mutation_reader*> readers;
|
||||||
@@ -506,7 +510,8 @@ column_family::make_sstable_reader(schema_ptr s,
|
|||||||
const dht::partition_range& pr,
|
const dht::partition_range& pr,
|
||||||
const query::partition_slice& slice,
|
const query::partition_slice& slice,
|
||||||
const io_priority_class& pc,
|
const io_priority_class& pc,
|
||||||
tracing::trace_state_ptr trace_state) const {
|
tracing::trace_state_ptr trace_state,
|
||||||
|
mutation_reader::forwarding fwd_mr) const {
|
||||||
// restricts a reader's concurrency if the configuration specifies it
|
// restricts a reader's concurrency if the configuration specifies it
|
||||||
auto restrict_reader = [&] (mutation_reader&& in) {
|
auto restrict_reader = [&] (mutation_reader&& in) {
|
||||||
auto&& config = [this, &pc] () -> const restricted_mutation_reader_config& {
|
auto&& config = [this, &pc] () -> const restricted_mutation_reader_config& {
|
||||||
@@ -522,6 +527,10 @@ column_family::make_sstable_reader(schema_ptr s,
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// CAVEAT: if make_sstable_reader() is called on a single partition
|
||||||
|
// we want to optimize and read exactly this partition. As a
|
||||||
|
// consequence, fast_forward_to() will *NOT* work on the result,
|
||||||
|
// regardless of what the fwd_mr parameter says.
|
||||||
if (pr.is_singular() && pr.start()->value().has_key()) {
|
if (pr.is_singular() && pr.start()->value().has_key()) {
|
||||||
const dht::ring_position& pos = pr.start()->value();
|
const dht::ring_position& pos = pr.start()->value();
|
||||||
if (dht::shard_of(pos.token()) != engine().cpu_id()) {
|
if (dht::shard_of(pos.token()) != engine().cpu_id()) {
|
||||||
@@ -531,7 +540,7 @@ column_family::make_sstable_reader(schema_ptr s,
|
|||||||
_stats.estimated_sstable_per_read, *pos.key(), slice, pc, std::move(trace_state)));
|
_stats.estimated_sstable_per_read, *pos.key(), slice, pc, std::move(trace_state)));
|
||||||
} else {
|
} else {
|
||||||
// range_sstable_reader is not movable so we need to wrap it
|
// range_sstable_reader is not movable so we need to wrap it
|
||||||
return restrict_reader(make_mutation_reader<range_sstable_reader>(std::move(s), _sstables, pr, slice, pc, std::move(trace_state)));
|
return restrict_reader(make_mutation_reader<range_sstable_reader>(std::move(s), _sstables, pr, slice, pc, std::move(trace_state), fwd_mr));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -578,7 +587,8 @@ column_family::make_reader(schema_ptr s,
|
|||||||
const dht::partition_range& range,
|
const dht::partition_range& range,
|
||||||
const query::partition_slice& slice,
|
const query::partition_slice& slice,
|
||||||
const io_priority_class& pc,
|
const io_priority_class& pc,
|
||||||
tracing::trace_state_ptr trace_state) const {
|
tracing::trace_state_ptr trace_state,
|
||||||
|
mutation_reader::forwarding fwd_mr) const {
|
||||||
if (_virtual_reader) {
|
if (_virtual_reader) {
|
||||||
return _virtual_reader(s, range, slice, pc, trace_state);
|
return _virtual_reader(s, range, slice, pc, trace_state);
|
||||||
}
|
}
|
||||||
@@ -607,13 +617,13 @@ column_family::make_reader(schema_ptr s,
|
|||||||
// https://github.com/scylladb/scylla/issues/185
|
// https://github.com/scylladb/scylla/issues/185
|
||||||
|
|
||||||
for (auto&& mt : *_memtables) {
|
for (auto&& mt : *_memtables) {
|
||||||
readers.emplace_back(mt->make_reader(s, range, slice, pc));
|
readers.emplace_back(mt->make_reader(s, range, slice, pc, trace_state, fwd_mr));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (_config.enable_cache) {
|
if (_config.enable_cache) {
|
||||||
readers.emplace_back(_cache.make_reader(s, range, slice, pc, std::move(trace_state)));
|
readers.emplace_back(_cache.make_reader(s, range, slice, pc, std::move(trace_state), fwd_mr));
|
||||||
} else {
|
} else {
|
||||||
readers.emplace_back(make_sstable_reader(s, range, slice, pc, std::move(trace_state)));
|
readers.emplace_back(make_sstable_reader(s, range, slice, pc, std::move(trace_state), fwd_mr));
|
||||||
}
|
}
|
||||||
|
|
||||||
return make_combined_reader(std::move(readers));
|
return make_combined_reader(std::move(readers));
|
||||||
@@ -629,10 +639,10 @@ column_family::make_streaming_reader(schema_ptr s,
|
|||||||
readers.reserve(_memtables->size() + 1);
|
readers.reserve(_memtables->size() + 1);
|
||||||
|
|
||||||
for (auto&& mt : *_memtables) {
|
for (auto&& mt : *_memtables) {
|
||||||
readers.emplace_back(mt->make_reader(s, range, slice, pc));
|
readers.emplace_back(mt->make_reader(s, range, slice, pc, nullptr, mutation_reader::forwarding::no));
|
||||||
}
|
}
|
||||||
|
|
||||||
readers.emplace_back(make_sstable_reader(s, range, slice, pc, nullptr));
|
readers.emplace_back(make_sstable_reader(s, range, slice, pc, nullptr, mutation_reader::forwarding::no));
|
||||||
|
|
||||||
return make_combined_reader(std::move(readers));
|
return make_combined_reader(std::move(readers));
|
||||||
}
|
}
|
||||||
@@ -644,17 +654,17 @@ column_family::make_streaming_reader(schema_ptr s,
|
|||||||
auto& pc = service::get_local_streaming_read_priority();
|
auto& pc = service::get_local_streaming_read_priority();
|
||||||
|
|
||||||
auto source = mutation_source([this] (schema_ptr s, const dht::partition_range& range, const query::partition_slice& slice,
|
auto source = mutation_source([this] (schema_ptr s, const dht::partition_range& range, const query::partition_slice& slice,
|
||||||
const io_priority_class& pc, tracing::trace_state_ptr trace_state) {
|
const io_priority_class& pc, tracing::trace_state_ptr trace_state, mutation_reader::forwarding fwd_mr) {
|
||||||
std::vector<mutation_reader> readers;
|
std::vector<mutation_reader> readers;
|
||||||
readers.reserve(_memtables->size() + 1);
|
readers.reserve(_memtables->size() + 1);
|
||||||
for (auto&& mt : *_memtables) {
|
for (auto&& mt : *_memtables) {
|
||||||
readers.emplace_back(mt->make_reader(s, range, slice, pc));
|
readers.emplace_back(mt->make_reader(s, range, slice, pc, trace_state, fwd_mr));
|
||||||
}
|
}
|
||||||
readers.emplace_back(make_sstable_reader(s, range, slice, pc, std::move(trace_state)));
|
readers.emplace_back(make_sstable_reader(s, range, slice, pc, std::move(trace_state), fwd_mr));
|
||||||
return make_combined_reader(std::move(readers));
|
return make_combined_reader(std::move(readers));
|
||||||
});
|
});
|
||||||
|
|
||||||
return make_multi_range_reader(s, std::move(source), ranges, slice, pc, nullptr);
|
return make_multi_range_reader(s, std::move(source), ranges, slice, pc, nullptr, mutation_reader::forwarding::no);
|
||||||
}
|
}
|
||||||
|
|
||||||
future<std::vector<locked_cell>> column_family::lock_counter_cells(const mutation& m) {
|
future<std::vector<locked_cell>> column_family::lock_counter_cells(const mutation& m) {
|
||||||
@@ -939,7 +949,8 @@ column_family::seal_active_streaming_memtable_immediate() {
|
|||||||
}).then([this, old, newtab] () {
|
}).then([this, old, newtab] () {
|
||||||
add_sstable(newtab, {engine().cpu_id()});
|
add_sstable(newtab, {engine().cpu_id()});
|
||||||
trigger_compaction();
|
trigger_compaction();
|
||||||
}).handle_exception([] (auto ep) {
|
}).handle_exception([newtab] (auto ep) {
|
||||||
|
newtab->mark_for_deletion();
|
||||||
dblog.error("failed to write streamed sstable: {}", ep);
|
dblog.error("failed to write streamed sstable: {}", ep);
|
||||||
return make_exception_future<>(ep);
|
return make_exception_future<>(ep);
|
||||||
});
|
});
|
||||||
@@ -977,7 +988,8 @@ future<> column_family::seal_active_streaming_memtable_big(streaming_memtable_bi
|
|||||||
auto&& priority = service::get_local_streaming_write_priority();
|
auto&& priority = service::get_local_streaming_write_priority();
|
||||||
return newtab->write_components(*old, incremental_backups_enabled(), priority, true).then([this, newtab, old, &smb] {
|
return newtab->write_components(*old, incremental_backups_enabled(), priority, true).then([this, newtab, old, &smb] {
|
||||||
smb.sstables.emplace_back(newtab);
|
smb.sstables.emplace_back(newtab);
|
||||||
}).handle_exception([] (auto ep) {
|
}).handle_exception([newtab] (auto ep) {
|
||||||
|
newtab->mark_for_deletion();
|
||||||
dblog.error("failed to write streamed sstable: {}", ep);
|
dblog.error("failed to write streamed sstable: {}", ep);
|
||||||
return make_exception_future<>(ep);
|
return make_exception_future<>(ep);
|
||||||
});
|
});
|
||||||
@@ -1082,6 +1094,7 @@ column_family::try_flush_memtable_to_sstable(lw_shared_ptr<memtable> old) {
|
|||||||
return make_ready_future<stop_iteration>(stop_iteration::yes);
|
return make_ready_future<stop_iteration>(stop_iteration::yes);
|
||||||
});
|
});
|
||||||
} catch (...) {
|
} catch (...) {
|
||||||
|
newtab->mark_for_deletion();
|
||||||
dblog.error("failed to write sstable {}: {}", newtab->get_filename(), std::current_exception());
|
dblog.error("failed to write sstable {}: {}", newtab->get_filename(), std::current_exception());
|
||||||
// If we failed this write we will try the write again and that will create a new flush reader
|
// If we failed this write we will try the write again and that will create a new flush reader
|
||||||
// that will decrease dirty memory again. So we need to reset the accounting.
|
// that will decrease dirty memory again. So we need to reset the accounting.
|
||||||
@@ -1250,7 +1263,7 @@ void column_family::rebuild_statistics() {
|
|||||||
// making the two ranges compatible when compiling with boost 1.55.
|
// making the two ranges compatible when compiling with boost 1.55.
|
||||||
// Noone is actually moving anything...
|
// Noone is actually moving anything...
|
||||||
std::move(*_sstables->all()))) {
|
std::move(*_sstables->all()))) {
|
||||||
update_stats_for_new_sstable(tab->data_size(), tab->get_shards_for_this_sstable());
|
update_stats_for_new_sstable(tab->bytes_on_disk(), tab->get_shards_for_this_sstable());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1357,7 +1370,7 @@ column_family::compact_sstables(sstables::compaction_descriptor descriptor, bool
|
|||||||
}
|
}
|
||||||
|
|
||||||
static bool needs_cleanup(const lw_shared_ptr<sstables::sstable>& sst,
|
static bool needs_cleanup(const lw_shared_ptr<sstables::sstable>& sst,
|
||||||
const lw_shared_ptr<dht::token_range_vector>& owned_ranges,
|
const dht::token_range_vector& owned_ranges,
|
||||||
schema_ptr s) {
|
schema_ptr s) {
|
||||||
auto first = sst->get_first_partition_key();
|
auto first = sst->get_first_partition_key();
|
||||||
auto last = sst->get_last_partition_key();
|
auto last = sst->get_last_partition_key();
|
||||||
@@ -1366,7 +1379,7 @@ static bool needs_cleanup(const lw_shared_ptr<sstables::sstable>& sst,
|
|||||||
dht::token_range sst_token_range = dht::token_range::make(first_token, last_token);
|
dht::token_range sst_token_range = dht::token_range::make(first_token, last_token);
|
||||||
|
|
||||||
// return true iff sst partition range isn't fully contained in any of the owned ranges.
|
// return true iff sst partition range isn't fully contained in any of the owned ranges.
|
||||||
for (auto& r : *owned_ranges) {
|
for (auto& r : owned_ranges) {
|
||||||
if (r.contains(sst_token_range, dht::token_comparator())) {
|
if (r.contains(sst_token_range, dht::token_comparator())) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@@ -1376,17 +1389,24 @@ static bool needs_cleanup(const lw_shared_ptr<sstables::sstable>& sst,
|
|||||||
|
|
||||||
future<> column_family::cleanup_sstables(sstables::compaction_descriptor descriptor) {
|
future<> column_family::cleanup_sstables(sstables::compaction_descriptor descriptor) {
|
||||||
dht::token_range_vector r = service::get_local_storage_service().get_local_ranges(_schema->ks_name());
|
dht::token_range_vector r = service::get_local_storage_service().get_local_ranges(_schema->ks_name());
|
||||||
auto owned_ranges = make_lw_shared<dht::token_range_vector>(std::move(r));
|
|
||||||
auto sstables_to_cleanup = make_lw_shared<std::vector<sstables::shared_sstable>>(std::move(descriptor.sstables));
|
|
||||||
|
|
||||||
return parallel_for_each(*sstables_to_cleanup, [this, owned_ranges = std::move(owned_ranges), sstables_to_cleanup] (auto& sst) {
|
return do_with(std::move(descriptor.sstables), std::move(r), [this] (auto& sstables, auto& owned_ranges) {
|
||||||
if (!owned_ranges->empty() && !needs_cleanup(sst, owned_ranges, _schema)) {
|
return do_for_each(sstables, [this, &owned_ranges] (auto& sst) {
|
||||||
|
if (!owned_ranges.empty() && !needs_cleanup(sst, owned_ranges, _schema)) {
|
||||||
return make_ready_future<>();
|
return make_ready_future<>();
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<sstables::shared_sstable> sstable_to_compact({ sst });
|
// this semaphore ensures that only one cleanup will run per shard.
|
||||||
return this->compact_sstables(sstables::compaction_descriptor(std::move(sstable_to_compact), sst->get_sstable_level()), true);
|
// That's to prevent node from running out of space when almost all sstables
|
||||||
|
// need cleanup, so if sstables are cleaned in parallel, we may need almost
|
||||||
|
// twice the disk space used by those sstables.
|
||||||
|
static thread_local semaphore sem(1);
|
||||||
|
|
||||||
|
return with_semaphore(sem, 1, [this, &sst] {
|
||||||
|
return this->compact_sstables(sstables::compaction_descriptor({ sst }, sst->get_sstable_level()), true);
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
// FIXME: this is just an example, should be changed to something more general
|
// FIXME: this is just an example, should be changed to something more general
|
||||||
@@ -1525,16 +1545,19 @@ future<> distributed_loader::open_sstable(distributed<database>& db, sstables::e
|
|||||||
|
|
||||||
return db.invoke_on(column_family::calculate_shard_from_sstable_generation(comps.generation),
|
return db.invoke_on(column_family::calculate_shard_from_sstable_generation(comps.generation),
|
||||||
[&db, comps = std::move(comps), func = std::move(func)] (database& local) {
|
[&db, comps = std::move(comps), func = std::move(func)] (database& local) {
|
||||||
auto& cf = local.find_column_family(comps.ks, comps.cf);
|
|
||||||
|
|
||||||
auto f = sstables::sstable::load_shared_components(cf.schema(), cf._config.datadir, comps.generation, comps.version, comps.format);
|
return with_semaphore(local.sstable_load_concurrency_sem(), 1, [&db, &local, comps = std::move(comps), func = std::move(func)] {
|
||||||
return f.then([&db, comps = std::move(comps), func = std::move(func)] (sstables::sstable_open_info info) {
|
auto& cf = local.find_column_family(comps.ks, comps.cf);
|
||||||
// shared components loaded, now opening sstable in all shards with shared components
|
|
||||||
return do_with(std::move(info), [&db, comps = std::move(comps), func = std::move(func)] (auto& info) {
|
auto f = sstables::sstable::load_shared_components(cf.schema(), cf._config.datadir, comps.generation, comps.version, comps.format);
|
||||||
return invoke_all_with_ptr(db, std::move(info.components),
|
return f.then([&db, comps = std::move(comps), func = std::move(func)] (sstables::sstable_open_info info) {
|
||||||
[owners = info.owners, data = info.data.dup(), index = info.index.dup(), comps, func] (database& db, auto components) {
|
// shared components loaded, now opening sstable in all shards with shared components
|
||||||
auto& cf = db.find_column_family(comps.ks, comps.cf);
|
return do_with(std::move(info), [&db, comps = std::move(comps), func = std::move(func)] (auto& info) {
|
||||||
return func(cf, sstables::foreign_sstable_open_info{std::move(components), owners, data, index});
|
return invoke_all_with_ptr(db, std::move(info.components),
|
||||||
|
[owners = info.owners, data = info.data.dup(), index = info.index.dup(), comps, func] (database& db, auto components) {
|
||||||
|
auto& cf = db.find_column_family(comps.ks, comps.cf);
|
||||||
|
return func(cf, sstables::foreign_sstable_open_info{std::move(components), owners, data, index});
|
||||||
|
});
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
@@ -1706,7 +1729,7 @@ future<> distributed_loader::populate_column_family(distributed<database>& db, s
|
|||||||
return make_ready_future<>();
|
return make_ready_future<>();
|
||||||
});
|
});
|
||||||
}).then([verifier, sstdir, descriptor, ks = std::move(ks), cf = std::move(cf)] {
|
}).then([verifier, sstdir, descriptor, ks = std::move(ks), cf = std::move(cf)] {
|
||||||
return parallel_for_each(*verifier, [sstdir = std::move(sstdir), ks = std::move(ks), cf = std::move(cf), descriptor] (auto v) {
|
return do_for_each(*verifier, [sstdir = std::move(sstdir), ks = std::move(ks), cf = std::move(cf), descriptor, verifier] (auto v) {
|
||||||
if (v.second == status::has_temporary_toc_file) {
|
if (v.second == status::has_temporary_toc_file) {
|
||||||
unsigned long gen = v.first;
|
unsigned long gen = v.first;
|
||||||
assert(descriptor->version);
|
assert(descriptor->version);
|
||||||
@@ -1745,9 +1768,9 @@ database::database(const db::config& cfg)
|
|||||||
: _stats(make_lw_shared<db_stats>())
|
: _stats(make_lw_shared<db_stats>())
|
||||||
, _cfg(std::make_unique<db::config>(cfg))
|
, _cfg(std::make_unique<db::config>(cfg))
|
||||||
// Allow system tables a pool of 10 MB memory to write, but never block on other regions.
|
// Allow system tables a pool of 10 MB memory to write, but never block on other regions.
|
||||||
, _system_dirty_memory_manager(*this, 10 << 20)
|
, _system_dirty_memory_manager(*this, 10 << 20, cfg.virtual_dirty_soft_limit())
|
||||||
, _dirty_memory_manager(*this, memory::stats().total_memory() * 0.45)
|
, _dirty_memory_manager(*this, memory::stats().total_memory() * 0.45, cfg.virtual_dirty_soft_limit())
|
||||||
, _streaming_dirty_memory_manager(*this, memory::stats().total_memory() * 0.10)
|
, _streaming_dirty_memory_manager(*this, memory::stats().total_memory() * 0.10, cfg.virtual_dirty_soft_limit())
|
||||||
, _version(empty_version)
|
, _version(empty_version)
|
||||||
, _enable_incremental_backups(cfg.incremental_backups())
|
, _enable_incremental_backups(cfg.incremental_backups())
|
||||||
{
|
{
|
||||||
@@ -1802,7 +1825,7 @@ database::setup_metrics() {
|
|||||||
});
|
});
|
||||||
|
|
||||||
_metrics.add_group("database", {
|
_metrics.add_group("database", {
|
||||||
sm::make_gauge("requests_blocked_memory", [this] { return _dirty_memory_manager.region_group().blocked_requests(); },
|
sm::make_gauge("requests_blocked_memory_current", [this] { return _dirty_memory_manager.region_group().blocked_requests(); },
|
||||||
sm::description(
|
sm::description(
|
||||||
seastar::format("Holds the current number of requests blocked due to reaching the memory quota ({}B). "
|
seastar::format("Holds the current number of requests blocked due to reaching the memory quota ({}B). "
|
||||||
"Non-zero value indicates that our bottleneck is memory and more specifically - the memory quota allocated for the \"database\" component.", _dirty_memory_manager.throttle_threshold()))),
|
"Non-zero value indicates that our bottleneck is memory and more specifically - the memory quota allocated for the \"database\" component.", _dirty_memory_manager.throttle_threshold()))),
|
||||||
@@ -2535,8 +2558,10 @@ column_family::as_mutation_source(tracing::trace_state_ptr trace_state) const {
|
|||||||
return mutation_source([this, trace_state = std::move(trace_state)] (schema_ptr s,
|
return mutation_source([this, trace_state = std::move(trace_state)] (schema_ptr s,
|
||||||
const dht::partition_range& range,
|
const dht::partition_range& range,
|
||||||
const query::partition_slice& slice,
|
const query::partition_slice& slice,
|
||||||
const io_priority_class& pc) {
|
const io_priority_class& pc,
|
||||||
return this->make_reader(std::move(s), range, slice, pc, std::move(trace_state));
|
tracing::trace_state_ptr trace_state,
|
||||||
|
mutation_reader::forwarding fwd_mr) {
|
||||||
|
return this->make_reader(std::move(s), range, slice, pc, std::move(trace_state), fwd_mr);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2663,7 +2688,7 @@ column_family::apply(const frozen_mutation& m, const schema_ptr& m_schema, const
|
|||||||
do_apply(m, m_schema, rp);
|
do_apply(m, m_schema, rp);
|
||||||
}
|
}
|
||||||
|
|
||||||
future<frozen_mutation> database::do_apply_counter_update(column_family& cf, const frozen_mutation& fm, schema_ptr m_schema) {
|
future<frozen_mutation> database::do_apply_counter_update(column_family& cf, const frozen_mutation& fm, schema_ptr m_schema, timeout_clock::time_point timeout) {
|
||||||
auto m = fm.unfreeze(m_schema);
|
auto m = fm.unfreeze(m_schema);
|
||||||
m.upgrade(cf.schema());
|
m.upgrade(cf.schema());
|
||||||
|
|
||||||
@@ -2689,9 +2714,9 @@ future<frozen_mutation> database::do_apply_counter_update(column_family& cf, con
|
|||||||
cql_serialization_format::internal(), query::max_rows);
|
cql_serialization_format::internal(), query::max_rows);
|
||||||
|
|
||||||
return do_with(std::move(slice), std::move(m), std::vector<locked_cell>(), stdx::optional<frozen_mutation>(),
|
return do_with(std::move(slice), std::move(m), std::vector<locked_cell>(), stdx::optional<frozen_mutation>(),
|
||||||
[this, &cf] (const query::partition_slice& slice, mutation& m, std::vector<locked_cell>& locks,
|
[this, &cf, timeout] (const query::partition_slice& slice, mutation& m, std::vector<locked_cell>& locks,
|
||||||
stdx::optional<frozen_mutation>& fm) mutable {
|
stdx::optional<frozen_mutation>& fm) mutable {
|
||||||
return cf.lock_counter_cells(m).then([&, m_schema = cf.schema(), this] (std::vector<locked_cell> lcs) {
|
return cf.lock_counter_cells(m).then([&, timeout, m_schema = cf.schema(), this] (std::vector<locked_cell> lcs) {
|
||||||
locks = std::move(lcs);
|
locks = std::move(lcs);
|
||||||
|
|
||||||
// Before counter update is applied it needs to be transformed from
|
// Before counter update is applied it needs to be transformed from
|
||||||
@@ -2702,7 +2727,7 @@ future<frozen_mutation> database::do_apply_counter_update(column_family& cf, con
|
|||||||
return mutation_query(m_schema, cf.as_mutation_source({}),
|
return mutation_query(m_schema, cf.as_mutation_source({}),
|
||||||
dht::partition_range::make_singular(m.decorated_key()),
|
dht::partition_range::make_singular(m.decorated_key()),
|
||||||
slice, query::max_rows, query::max_partitions,
|
slice, query::max_rows, query::max_partitions,
|
||||||
gc_clock::now(), { }).then([this, &cf, &m, &fm, m_schema] (auto result) {
|
gc_clock::now(), { }).then([this, timeout, &cf, &m, &fm, m_schema] (auto result) {
|
||||||
|
|
||||||
// ...now, that we got existing state of all affected counter
|
// ...now, that we got existing state of all affected counter
|
||||||
// cells we can look for our shard in each of them, increment
|
// cells we can look for our shard in each of them, increment
|
||||||
@@ -2714,9 +2739,8 @@ future<frozen_mutation> database::do_apply_counter_update(column_family& cf, con
|
|||||||
transform_counter_updates_to_shards(m, mopt ? &*mopt : nullptr, cf.failed_counter_applies_to_memtable());
|
transform_counter_updates_to_shards(m, mopt ? &*mopt : nullptr, cf.failed_counter_applies_to_memtable());
|
||||||
|
|
||||||
// FIXME: oh dear, another freeze
|
// FIXME: oh dear, another freeze
|
||||||
// FIXME: timeout
|
|
||||||
fm = freeze(m);
|
fm = freeze(m);
|
||||||
return this->do_apply(m_schema, *fm, { });
|
return this->do_apply(m_schema, *fm, timeout);
|
||||||
}).then([&fm] {
|
}).then([&fm] {
|
||||||
return std::move(*fm);
|
return std::move(*fm);
|
||||||
});
|
});
|
||||||
@@ -2854,7 +2878,7 @@ future<> dirty_memory_manager::flush_when_needed() {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
void dirty_memory_manager::start_reclaiming() {
|
void dirty_memory_manager::start_reclaiming() noexcept {
|
||||||
_should_flush.signal();
|
_should_flush.signal();
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2876,7 +2900,7 @@ future<frozen_mutation> database::apply_counter_update(schema_ptr s, const froze
|
|||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
auto& cf = find_column_family(m.column_family_id());
|
auto& cf = find_column_family(m.column_family_id());
|
||||||
return do_apply_counter_update(cf, m, s);
|
return do_apply_counter_update(cf, m, s, timeout);
|
||||||
} catch (no_such_column_family&) {
|
} catch (no_such_column_family&) {
|
||||||
dblog.error("Attempting to mutate non-existent table {}", m.column_family_id());
|
dblog.error("Attempting to mutate non-existent table {}", m.column_family_id());
|
||||||
throw;
|
throw;
|
||||||
@@ -3103,6 +3127,10 @@ future<> database::truncate(const keyspace& ks, column_family& cf, timestamp_fun
|
|||||||
}
|
}
|
||||||
return f.then([&cf, truncated_at] {
|
return f.then([&cf, truncated_at] {
|
||||||
return cf.discard_sstables(truncated_at).then([&cf, truncated_at](db::replay_position rp) {
|
return cf.discard_sstables(truncated_at).then([&cf, truncated_at](db::replay_position rp) {
|
||||||
|
// TODO: verify that rp == db::replay_position is because we have no sstables (and no data flushed)
|
||||||
|
if (rp == db::replay_position()) {
|
||||||
|
return make_ready_future();
|
||||||
|
}
|
||||||
// TODO: indexes.
|
// TODO: indexes.
|
||||||
return db::system_keyspace::save_truncation_record(cf, truncated_at, rp);
|
return db::system_keyspace::save_truncation_record(cf, truncated_at, rp);
|
||||||
});
|
});
|
||||||
|
|||||||
20
database.hh
20
database.hh
@@ -149,7 +149,7 @@ class dirty_memory_manager: public logalloc::region_group_reclaimer {
|
|||||||
std::unordered_map<const logalloc::region*, flush_permit> _flush_manager;
|
std::unordered_map<const logalloc::region*, flush_permit> _flush_manager;
|
||||||
|
|
||||||
future<> _waiting_flush;
|
future<> _waiting_flush;
|
||||||
virtual void start_reclaiming() override;
|
virtual void start_reclaiming() noexcept override;
|
||||||
|
|
||||||
bool has_pressure() const {
|
bool has_pressure() const {
|
||||||
return over_soft_limit();
|
return over_soft_limit();
|
||||||
@@ -193,8 +193,8 @@ public:
|
|||||||
//
|
//
|
||||||
// We then set the soft limit to 80 % of the virtual dirty hard limit, which is equal to 40 % of
|
// We then set the soft limit to 80 % of the virtual dirty hard limit, which is equal to 40 % of
|
||||||
// the user-supplied threshold.
|
// the user-supplied threshold.
|
||||||
dirty_memory_manager(database& db, size_t threshold)
|
dirty_memory_manager(database& db, size_t threshold, double soft_limit)
|
||||||
: logalloc::region_group_reclaimer(threshold / 2, threshold * 0.40)
|
: logalloc::region_group_reclaimer(threshold / 2, threshold * soft_limit / 2)
|
||||||
, _db(&db)
|
, _db(&db)
|
||||||
, _region_group(*this)
|
, _region_group(*this)
|
||||||
, _flush_serializer(1)
|
, _flush_serializer(1)
|
||||||
@@ -582,7 +582,8 @@ private:
|
|||||||
const dht::partition_range& range,
|
const dht::partition_range& range,
|
||||||
const query::partition_slice& slice,
|
const query::partition_slice& slice,
|
||||||
const io_priority_class& pc,
|
const io_priority_class& pc,
|
||||||
tracing::trace_state_ptr trace_state) const;
|
tracing::trace_state_ptr trace_state,
|
||||||
|
mutation_reader::forwarding fwd_mr) const;
|
||||||
|
|
||||||
mutation_source sstables_as_mutation_source();
|
mutation_source sstables_as_mutation_source();
|
||||||
partition_presence_checker make_partition_presence_checker(lw_shared_ptr<sstables::sstable_set>);
|
partition_presence_checker make_partition_presence_checker(lw_shared_ptr<sstables::sstable_set>);
|
||||||
@@ -624,7 +625,8 @@ public:
|
|||||||
const dht::partition_range& range = query::full_partition_range,
|
const dht::partition_range& range = query::full_partition_range,
|
||||||
const query::partition_slice& slice = query::full_slice,
|
const query::partition_slice& slice = query::full_slice,
|
||||||
const io_priority_class& pc = default_priority_class(),
|
const io_priority_class& pc = default_priority_class(),
|
||||||
tracing::trace_state_ptr trace_state = nullptr) const;
|
tracing::trace_state_ptr trace_state = nullptr,
|
||||||
|
mutation_reader::forwarding fwd_mr = mutation_reader::forwarding::yes) const;
|
||||||
|
|
||||||
// The streaming mutation reader differs from the regular mutation reader in that:
|
// The streaming mutation reader differs from the regular mutation reader in that:
|
||||||
// - Reflects all writes accepted by replica prior to creation of the
|
// - Reflects all writes accepted by replica prior to creation of the
|
||||||
@@ -1076,6 +1078,7 @@ private:
|
|||||||
::cf_stats _cf_stats;
|
::cf_stats _cf_stats;
|
||||||
static constexpr size_t max_concurrent_reads() { return 100; }
|
static constexpr size_t max_concurrent_reads() { return 100; }
|
||||||
static constexpr size_t max_system_concurrent_reads() { return 10; }
|
static constexpr size_t max_system_concurrent_reads() { return 10; }
|
||||||
|
static constexpr size_t max_concurrent_sstable_loads() { return 3; }
|
||||||
struct db_stats {
|
struct db_stats {
|
||||||
uint64_t total_writes = 0;
|
uint64_t total_writes = 0;
|
||||||
uint64_t total_writes_failed = 0;
|
uint64_t total_writes_failed = 0;
|
||||||
@@ -1101,6 +1104,8 @@ private:
|
|||||||
semaphore _system_read_concurrency_sem{max_system_concurrent_reads()};
|
semaphore _system_read_concurrency_sem{max_system_concurrent_reads()};
|
||||||
restricted_mutation_reader_config _system_read_concurrency_config;
|
restricted_mutation_reader_config _system_read_concurrency_config;
|
||||||
|
|
||||||
|
semaphore _sstable_load_concurrency_sem{max_concurrent_sstable_loads()};
|
||||||
|
|
||||||
std::unordered_map<sstring, keyspace> _keyspaces;
|
std::unordered_map<sstring, keyspace> _keyspaces;
|
||||||
std::unordered_map<utils::UUID, lw_shared_ptr<column_family>> _column_families;
|
std::unordered_map<utils::UUID, lw_shared_ptr<column_family>> _column_families;
|
||||||
std::unordered_map<std::pair<sstring, sstring>, utils::UUID, utils::tuple_hash> _ks_cf_to_uuid;
|
std::unordered_map<std::pair<sstring, sstring>, utils::UUID, utils::tuple_hash> _ks_cf_to_uuid;
|
||||||
@@ -1126,7 +1131,7 @@ private:
|
|||||||
|
|
||||||
query::result_memory_limiter _result_memory_limiter;
|
query::result_memory_limiter _result_memory_limiter;
|
||||||
|
|
||||||
future<frozen_mutation> do_apply_counter_update(column_family& cf, const frozen_mutation& fm, schema_ptr m_schema);
|
future<frozen_mutation> do_apply_counter_update(column_family& cf, const frozen_mutation& fm, schema_ptr m_schema, timeout_clock::time_point timeout);
|
||||||
public:
|
public:
|
||||||
static utils::UUID empty_version;
|
static utils::UUID empty_version;
|
||||||
|
|
||||||
@@ -1257,6 +1262,9 @@ public:
|
|||||||
semaphore& system_keyspace_read_concurrency_sem() {
|
semaphore& system_keyspace_read_concurrency_sem() {
|
||||||
return _system_read_concurrency_sem;
|
return _system_read_concurrency_sem;
|
||||||
}
|
}
|
||||||
|
semaphore& sstable_load_concurrency_sem() {
|
||||||
|
return _sstable_load_concurrency_sem;
|
||||||
|
}
|
||||||
|
|
||||||
friend class distributed_loader;
|
friend class distributed_loader;
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -492,6 +492,7 @@ public:
|
|||||||
if (shutdown) {
|
if (shutdown) {
|
||||||
auto me = shared_from_this();
|
auto me = shared_from_this();
|
||||||
return _gate.close().then([me] {
|
return _gate.close().then([me] {
|
||||||
|
me->_closed = true;
|
||||||
return me->sync().finally([me] {
|
return me->sync().finally([me] {
|
||||||
// When we get here, nothing should add ops,
|
// When we get here, nothing should add ops,
|
||||||
// and we should have waited out all pending.
|
// and we should have waited out all pending.
|
||||||
@@ -1281,6 +1282,7 @@ future<> db::commitlog::segment_manager::shutdown() {
|
|||||||
return _gate.close().then(std::bind(&segment_manager::sync_all_segments, this, true));
|
return _gate.close().then(std::bind(&segment_manager::sync_all_segments, this, true));
|
||||||
});
|
});
|
||||||
}).finally([this] {
|
}).finally([this] {
|
||||||
|
discard_unused_segments();
|
||||||
// Now that the gate is closed and requests completed we are sure nobody else will pop()
|
// Now that the gate is closed and requests completed we are sure nobody else will pop()
|
||||||
return clear_reserve_segments().finally([this] {
|
return clear_reserve_segments().finally([this] {
|
||||||
return std::move(_reserve_replenisher).then_wrapped([this] (auto f) {
|
return std::move(_reserve_replenisher).then_wrapped([this] (auto f) {
|
||||||
@@ -1588,7 +1590,7 @@ db::commitlog::read_log_file(file f, commit_load_reader_func next, position_type
|
|||||||
bool failed = false;
|
bool failed = false;
|
||||||
|
|
||||||
work(file f, position_type o = 0)
|
work(file f, position_type o = 0)
|
||||||
: f(f), fin(make_file_input_stream(f, o, make_file_input_stream_options())), start_off(o) {
|
: f(f), fin(make_file_input_stream(f, 0, make_file_input_stream_options())), start_off(o) {
|
||||||
}
|
}
|
||||||
work(work&&) = default;
|
work(work&&) = default;
|
||||||
|
|
||||||
|
|||||||
@@ -34,48 +34,26 @@
|
|||||||
#include "idl/mutation.dist.impl.hh"
|
#include "idl/mutation.dist.impl.hh"
|
||||||
#include "idl/commitlog.dist.impl.hh"
|
#include "idl/commitlog.dist.impl.hh"
|
||||||
|
|
||||||
commitlog_entry::commitlog_entry(stdx::optional<column_mapping> mapping, frozen_mutation&& mutation)
|
template<typename Output>
|
||||||
: _mapping(std::move(mapping))
|
void commitlog_entry_writer::serialize(Output& out) const {
|
||||||
, _mutation_storage(std::move(mutation))
|
[this, wr = ser::writer_of_commitlog_entry<Output>(out)] () mutable {
|
||||||
, _mutation(*_mutation_storage)
|
if (_with_schema) {
|
||||||
{ }
|
return std::move(wr).write_mapping(_schema->get_column_mapping());
|
||||||
|
} else {
|
||||||
commitlog_entry::commitlog_entry(stdx::optional<column_mapping> mapping, const frozen_mutation& mutation)
|
return std::move(wr).skip_mapping();
|
||||||
: _mapping(std::move(mapping))
|
}
|
||||||
, _mutation(mutation)
|
}().write_mutation(_mutation).end_commitlog_entry();
|
||||||
{ }
|
|
||||||
|
|
||||||
commitlog_entry::commitlog_entry(commitlog_entry&& ce)
|
|
||||||
: _mapping(std::move(ce._mapping))
|
|
||||||
, _mutation_storage(std::move(ce._mutation_storage))
|
|
||||||
, _mutation(_mutation_storage ? *_mutation_storage : ce._mutation)
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
commitlog_entry& commitlog_entry::operator=(commitlog_entry&& ce)
|
|
||||||
{
|
|
||||||
if (this != &ce) {
|
|
||||||
this->~commitlog_entry();
|
|
||||||
new (this) commitlog_entry(std::move(ce));
|
|
||||||
}
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
|
|
||||||
commitlog_entry commitlog_entry_writer::get_entry() const {
|
|
||||||
if (_with_schema) {
|
|
||||||
return commitlog_entry(_schema->get_column_mapping(), _mutation);
|
|
||||||
} else {
|
|
||||||
return commitlog_entry({}, _mutation);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void commitlog_entry_writer::compute_size() {
|
void commitlog_entry_writer::compute_size() {
|
||||||
_size = ser::get_sizeof(get_entry());
|
seastar::measuring_output_stream ms;
|
||||||
|
serialize(ms);
|
||||||
|
_size = ms.size();
|
||||||
}
|
}
|
||||||
|
|
||||||
void commitlog_entry_writer::write(data_output& out) const {
|
void commitlog_entry_writer::write(data_output& out) const {
|
||||||
seastar::simple_output_stream str(out.reserve(size()), size());
|
seastar::simple_output_stream str(out.reserve(size()), size());
|
||||||
ser::serialize(str, get_entry());
|
serialize(str);
|
||||||
}
|
}
|
||||||
|
|
||||||
commitlog_entry_reader::commitlog_entry_reader(const temporary_buffer<char>& buffer)
|
commitlog_entry_reader::commitlog_entry_reader(const temporary_buffer<char>& buffer)
|
||||||
|
|||||||
@@ -31,15 +31,10 @@ namespace stdx = std::experimental;
|
|||||||
|
|
||||||
class commitlog_entry {
|
class commitlog_entry {
|
||||||
stdx::optional<column_mapping> _mapping;
|
stdx::optional<column_mapping> _mapping;
|
||||||
stdx::optional<frozen_mutation> _mutation_storage;
|
frozen_mutation _mutation;
|
||||||
const frozen_mutation& _mutation;
|
|
||||||
public:
|
public:
|
||||||
commitlog_entry(stdx::optional<column_mapping> mapping, frozen_mutation&& mutation);
|
commitlog_entry(stdx::optional<column_mapping> mapping, frozen_mutation&& mutation)
|
||||||
commitlog_entry(stdx::optional<column_mapping> mapping, const frozen_mutation& mutation);
|
: _mapping(std::move(mapping)), _mutation(std::move(mutation)) { }
|
||||||
commitlog_entry(commitlog_entry&&);
|
|
||||||
commitlog_entry(const commitlog_entry&) = delete;
|
|
||||||
commitlog_entry& operator=(commitlog_entry&&);
|
|
||||||
commitlog_entry& operator=(const commitlog_entry&) = delete;
|
|
||||||
const stdx::optional<column_mapping>& mapping() const { return _mapping; }
|
const stdx::optional<column_mapping>& mapping() const { return _mapping; }
|
||||||
const frozen_mutation& mutation() const { return _mutation; }
|
const frozen_mutation& mutation() const { return _mutation; }
|
||||||
};
|
};
|
||||||
@@ -50,8 +45,9 @@ class commitlog_entry_writer {
|
|||||||
bool _with_schema = true;
|
bool _with_schema = true;
|
||||||
size_t _size;
|
size_t _size;
|
||||||
private:
|
private:
|
||||||
|
template<typename Output>
|
||||||
|
void serialize(Output&) const;
|
||||||
void compute_size();
|
void compute_size();
|
||||||
commitlog_entry get_entry() const;
|
|
||||||
public:
|
public:
|
||||||
commitlog_entry_writer(schema_ptr s, const frozen_mutation& fm)
|
commitlog_entry_writer(schema_ptr s, const frozen_mutation& fm)
|
||||||
: _schema(std::move(s)), _mutation(fm)
|
: _schema(std::move(s)), _mutation(fm)
|
||||||
@@ -88,4 +84,4 @@ public:
|
|||||||
|
|
||||||
const stdx::optional<column_mapping>& get_column_mapping() const { return _ce.mapping(); }
|
const stdx::optional<column_mapping>& get_column_mapping() const { return _ce.mapping(); }
|
||||||
const frozen_mutation& mutation() const { return _ce.mutation(); }
|
const frozen_mutation& mutation() const { return _ce.mutation(); }
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -61,13 +61,19 @@
|
|||||||
|
|
||||||
static logging::logger logger("commitlog_replayer");
|
static logging::logger logger("commitlog_replayer");
|
||||||
|
|
||||||
struct column_mappings {
|
|
||||||
std::unordered_map<table_schema_version, column_mapping> map;
|
|
||||||
future<> stop() { return make_ready_future<>(); }
|
|
||||||
};
|
|
||||||
|
|
||||||
class db::commitlog_replayer::impl {
|
class db::commitlog_replayer::impl {
|
||||||
seastar::sharded<column_mappings> _column_mappings;
|
struct column_mappings {
|
||||||
|
std::unordered_map<table_schema_version, column_mapping> map;
|
||||||
|
future<> stop() { return make_ready_future<>(); }
|
||||||
|
};
|
||||||
|
|
||||||
|
// we want the processing methods to be const, since they use
|
||||||
|
// shard-sharing of data -> read only
|
||||||
|
// this one is special since it is thread local.
|
||||||
|
// Should actually make sharded::local a const function (it does
|
||||||
|
// not modify content), but...
|
||||||
|
mutable seastar::sharded<column_mappings> _column_mappings;
|
||||||
|
|
||||||
friend class db::commitlog_replayer;
|
friend class db::commitlog_replayer;
|
||||||
public:
|
public:
|
||||||
impl(seastar::sharded<cql3::query_processor>& db);
|
impl(seastar::sharded<cql3::query_processor>& db);
|
||||||
@@ -94,13 +100,35 @@ public:
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
future<> process(stats*, temporary_buffer<char> buf, replay_position rp);
|
// move start/stop of the thread local bookkeep to "top level"
|
||||||
future<stats> recover(sstring file);
|
// and also make sure to assert on it actually being started.
|
||||||
|
future<> start() {
|
||||||
|
return _column_mappings.start();
|
||||||
|
}
|
||||||
|
future<> stop() {
|
||||||
|
return _column_mappings.stop();
|
||||||
|
}
|
||||||
|
|
||||||
|
future<> process(stats*, temporary_buffer<char> buf, replay_position rp) const;
|
||||||
|
future<stats> recover(sstring file) const;
|
||||||
|
|
||||||
typedef std::unordered_map<utils::UUID, replay_position> rp_map;
|
typedef std::unordered_map<utils::UUID, replay_position> rp_map;
|
||||||
typedef std::unordered_map<unsigned, rp_map> shard_rpm_map;
|
typedef std::unordered_map<unsigned, rp_map> shard_rpm_map;
|
||||||
typedef std::unordered_map<unsigned, replay_position> shard_rp_map;
|
typedef std::unordered_map<unsigned, replay_position> shard_rp_map;
|
||||||
|
|
||||||
|
replay_position min_pos(unsigned shard) const {
|
||||||
|
auto i = _min_pos.find(shard);
|
||||||
|
return i != _min_pos.end() ? i->second : replay_position();
|
||||||
|
}
|
||||||
|
replay_position cf_min_pos(const utils::UUID& uuid, unsigned shard) const {
|
||||||
|
auto i = _rpm.find(shard);
|
||||||
|
if (i == _rpm.end()) {
|
||||||
|
return replay_position();
|
||||||
|
}
|
||||||
|
auto j = i->second.find(uuid);
|
||||||
|
return j != i->second.end() ? j->second : replay_position();
|
||||||
|
}
|
||||||
|
|
||||||
seastar::sharded<cql3::query_processor>&
|
seastar::sharded<cql3::query_processor>&
|
||||||
_qp;
|
_qp;
|
||||||
shard_rpm_map
|
shard_rpm_map
|
||||||
@@ -175,7 +203,6 @@ future<> db::commitlog_replayer::impl::init() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for (auto&p : _min_pos) {
|
for (auto&p : _min_pos) {
|
||||||
logger.debug("minimum position for shard {}: {}", p.first, p.second);
|
logger.debug("minimum position for shard {}: {}", p.first, p.second);
|
||||||
}
|
}
|
||||||
@@ -188,9 +215,11 @@ future<> db::commitlog_replayer::impl::init() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
future<db::commitlog_replayer::impl::stats>
|
future<db::commitlog_replayer::impl::stats>
|
||||||
db::commitlog_replayer::impl::recover(sstring file) {
|
db::commitlog_replayer::impl::recover(sstring file) const {
|
||||||
|
assert(_column_mappings.local_is_initialized());
|
||||||
|
|
||||||
replay_position rp{commitlog::descriptor(file)};
|
replay_position rp{commitlog::descriptor(file)};
|
||||||
auto gp = _min_pos[rp.shard_id()];
|
auto gp = min_pos(rp.shard_id());
|
||||||
|
|
||||||
if (rp.id < gp.id) {
|
if (rp.id < gp.id) {
|
||||||
logger.debug("skipping replay of fully-flushed {}", file);
|
logger.debug("skipping replay of fully-flushed {}", file);
|
||||||
@@ -220,7 +249,7 @@ db::commitlog_replayer::impl::recover(sstring file) {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
future<> db::commitlog_replayer::impl::process(stats* s, temporary_buffer<char> buf, replay_position rp) {
|
future<> db::commitlog_replayer::impl::process(stats* s, temporary_buffer<char> buf, replay_position rp) const {
|
||||||
try {
|
try {
|
||||||
|
|
||||||
commitlog_entry_reader cer(buf);
|
commitlog_entry_reader cer(buf);
|
||||||
@@ -238,17 +267,16 @@ future<> db::commitlog_replayer::impl::process(stats* s, temporary_buffer<char>
|
|||||||
const column_mapping& src_cm = cm_it->second;
|
const column_mapping& src_cm = cm_it->second;
|
||||||
|
|
||||||
auto shard_id = rp.shard_id();
|
auto shard_id = rp.shard_id();
|
||||||
if (rp < _min_pos[shard_id]) {
|
if (rp < min_pos(shard_id)) {
|
||||||
logger.trace("entry {} is less than global min position. skipping", rp);
|
logger.trace("entry {} is less than global min position. skipping", rp);
|
||||||
s->skipped_mutations++;
|
s->skipped_mutations++;
|
||||||
return make_ready_future<>();
|
return make_ready_future<>();
|
||||||
}
|
}
|
||||||
|
|
||||||
auto uuid = fm.column_family_id();
|
auto uuid = fm.column_family_id();
|
||||||
auto& map = _rpm[shard_id];
|
auto cf_rp = cf_min_pos(uuid, shard_id);
|
||||||
auto i = map.find(uuid);
|
if (rp <= cf_rp) {
|
||||||
if (i != map.end() && rp <= i->second) {
|
logger.trace("entry {} at {} is younger than recorded replay position {}. skipping", fm.column_family_id(), rp, cf_rp);
|
||||||
logger.trace("entry {} at {} is younger than recorded replay position {}. skipping", fm.column_family_id(), rp, i->second);
|
|
||||||
s->skipped_mutations++;
|
s->skipped_mutations++;
|
||||||
return make_ready_future<>();
|
return make_ready_future<>();
|
||||||
}
|
}
|
||||||
@@ -323,42 +351,55 @@ future<db::commitlog_replayer> db::commitlog_replayer::create_replayer(seastar::
|
|||||||
}
|
}
|
||||||
|
|
||||||
future<> db::commitlog_replayer::recover(std::vector<sstring> files) {
|
future<> db::commitlog_replayer::recover(std::vector<sstring> files) {
|
||||||
return _impl->_column_mappings.start().then([this, files = std::move(files)] {
|
typedef std::unordered_multimap<unsigned, sstring> shard_file_map;
|
||||||
|
|
||||||
logger.info("Replaying {}", join(", ", files));
|
logger.info("Replaying {}", join(", ", files));
|
||||||
return map_reduce(files, [this](auto f) {
|
|
||||||
logger.debug("Replaying {}", f);
|
// pre-compute work per shard already.
|
||||||
return _impl->recover(f).then([f](impl::stats stats) {
|
auto map = ::make_lw_shared<shard_file_map>();
|
||||||
if (stats.corrupt_bytes != 0) {
|
for (auto& f : files) {
|
||||||
logger.warn("Corrupted file: {}. {} bytes skipped.", f, stats.corrupt_bytes);
|
commitlog::descriptor d(f);
|
||||||
}
|
replay_position p = d;
|
||||||
logger.debug("Log replay of {} complete, {} replayed mutations ({} invalid, {} skipped)"
|
map->emplace(p.shard_id() % smp::count, std::move(f));
|
||||||
, f
|
}
|
||||||
, stats.applied_mutations
|
|
||||||
, stats.invalid_mutations
|
return _impl->start().then([this, map] {
|
||||||
, stats.skipped_mutations
|
return map_reduce(smp::all_cpus(), [this, map](unsigned id) {
|
||||||
|
return smp::submit_to(id, [this, id, map]() {
|
||||||
|
auto total = ::make_lw_shared<impl::stats>();
|
||||||
|
// TODO: or something. For now, we do this serialized per shard,
|
||||||
|
// to reduce mutation congestion. We could probably (says avi)
|
||||||
|
// do 2 segments in parallel or something, but lets use this first.
|
||||||
|
auto range = map->equal_range(id);
|
||||||
|
return do_for_each(range.first, range.second, [this, total](const std::pair<unsigned, sstring>& p) {
|
||||||
|
auto&f = p.second;
|
||||||
|
logger.debug("Replaying {}", f);
|
||||||
|
return _impl->recover(f).then([f, total](impl::stats stats) {
|
||||||
|
if (stats.corrupt_bytes != 0) {
|
||||||
|
logger.warn("Corrupted file: {}. {} bytes skipped.", f, stats.corrupt_bytes);
|
||||||
|
}
|
||||||
|
logger.debug("Log replay of {} complete, {} replayed mutations ({} invalid, {} skipped)"
|
||||||
|
, f
|
||||||
|
, stats.applied_mutations
|
||||||
|
, stats.invalid_mutations
|
||||||
|
, stats.skipped_mutations
|
||||||
|
);
|
||||||
|
*total += stats;
|
||||||
|
});
|
||||||
|
}).then([total] {
|
||||||
|
return make_ready_future<impl::stats>(*total);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}, impl::stats(), std::plus<impl::stats>()).then([](impl::stats totals) {
|
||||||
|
logger.info("Log replay complete, {} replayed mutations ({} invalid, {} skipped)"
|
||||||
|
, totals.applied_mutations
|
||||||
|
, totals.invalid_mutations
|
||||||
|
, totals.skipped_mutations
|
||||||
);
|
);
|
||||||
return make_ready_future<impl::stats>(stats);
|
|
||||||
}).handle_exception([f](auto ep) -> future<impl::stats> {
|
|
||||||
logger.error("Error recovering {}: {}", f, ep);
|
|
||||||
try {
|
|
||||||
std::rethrow_exception(ep);
|
|
||||||
} catch (std::invalid_argument&) {
|
|
||||||
logger.error("Scylla cannot process {}. Make sure to fully flush all Cassandra commit log files to sstable before migrating.", f);
|
|
||||||
throw;
|
|
||||||
} catch (...) {
|
|
||||||
throw;
|
|
||||||
}
|
|
||||||
});
|
});
|
||||||
}, impl::stats(), std::plus<impl::stats>()).then([](impl::stats totals) {
|
|
||||||
logger.info("Log replay complete, {} replayed mutations ({} invalid, {} skipped)"
|
|
||||||
, totals.applied_mutations
|
|
||||||
, totals.invalid_mutations
|
|
||||||
, totals.skipped_mutations
|
|
||||||
);
|
|
||||||
}).finally([this] {
|
}).finally([this] {
|
||||||
return _impl->_column_mappings.stop();
|
return _impl->stop();
|
||||||
});
|
});
|
||||||
});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
future<> db::commitlog_replayer::recover(sstring f) {
|
future<> db::commitlog_replayer::recover(sstring f) {
|
||||||
|
|||||||
@@ -326,7 +326,7 @@ public:
|
|||||||
val(sstable_preemptive_open_interval_in_mb, uint32_t, 50, Unused, \
|
val(sstable_preemptive_open_interval_in_mb, uint32_t, 50, Unused, \
|
||||||
"When compacting, the replacement opens SSTables before they are completely written and uses in place of the prior SSTables for any range previously written. This setting helps to smoothly transfer reads between the SSTables by reducing page cache churn and keeps hot rows hot." \
|
"When compacting, the replacement opens SSTables before they are completely written and uses in place of the prior SSTables for any range previously written. This setting helps to smoothly transfer reads between the SSTables by reducing page cache churn and keeps hot rows hot." \
|
||||||
) \
|
) \
|
||||||
val(defragment_memory_on_idle, bool, true, Used, "Set to true to defragment memory when the cpu is idle. This reduces the amount of work Scylla performs when processing client requests.") \
|
val(defragment_memory_on_idle, bool, false, Used, "When set to true, will defragment memory when the cpu is idle. This reduces the amount of work Scylla performs when processing client requests.") \
|
||||||
/* Memtable settings */ \
|
/* Memtable settings */ \
|
||||||
val(memtable_allocation_type, sstring, "heap_buffers", Invalid, \
|
val(memtable_allocation_type, sstring, "heap_buffers", Invalid, \
|
||||||
"Specify the way Cassandra allocates and manages memtable memory. See Off-heap memtables in Cassandra 2.1. Options are:\n" \
|
"Specify the way Cassandra allocates and manages memtable memory. See Off-heap memtables in Cassandra 2.1. Options are:\n" \
|
||||||
@@ -729,6 +729,8 @@ public:
|
|||||||
val(replace_address_first_boot, sstring, "", Used, "Like replace_address option, but if the node has been bootstrapped successfully it will be ignored. Same as -Dcassandra.replace_address_first_boot.") \
|
val(replace_address_first_boot, sstring, "", Used, "Like replace_address option, but if the node has been bootstrapped successfully it will be ignored. Same as -Dcassandra.replace_address_first_boot.") \
|
||||||
val(override_decommission, bool, false, Used, "Set true to force a decommissioned node to join the cluster") \
|
val(override_decommission, bool, false, Used, "Set true to force a decommissioned node to join the cluster") \
|
||||||
val(ring_delay_ms, uint32_t, 30 * 1000, Used, "Time a node waits to hear from other nodes before joining the ring in milliseconds. Same as -Dcassandra.ring_delay_ms in cassandra.") \
|
val(ring_delay_ms, uint32_t, 30 * 1000, Used, "Time a node waits to hear from other nodes before joining the ring in milliseconds. Same as -Dcassandra.ring_delay_ms in cassandra.") \
|
||||||
|
val(fd_max_interval_ms, uint32_t, 2 * 1000, Used, "The maximum failure_detector interval time in milliseconds. Interval larger than the maximum will be ignored. Larger cluster may need to increase the default.") \
|
||||||
|
val(fd_initial_value_ms, uint32_t, 2 * 1000, Used, "The initial failure_detector interval time in milliseconds.") \
|
||||||
val(shutdown_announce_in_ms, uint32_t, 2 * 1000, Used, "Time a node waits after sending gossip shutdown message in milliseconds. Same as -Dcassandra.shutdown_announce_in_ms in cassandra.") \
|
val(shutdown_announce_in_ms, uint32_t, 2 * 1000, Used, "Time a node waits after sending gossip shutdown message in milliseconds. Same as -Dcassandra.shutdown_announce_in_ms in cassandra.") \
|
||||||
val(developer_mode, bool, false, Used, "Relax environment checks. Setting to true can reduce performance and reliability significantly.") \
|
val(developer_mode, bool, false, Used, "Relax environment checks. Setting to true can reduce performance and reliability significantly.") \
|
||||||
val(skip_wait_for_gossip_to_settle, int32_t, -1, Used, "An integer to configure the wait for gossip to settle. -1: wait normally, 0: do not wait at all, n: wait for at most n polls. Same as -Dcassandra.skip_wait_for_gossip_to_settle in cassandra.") \
|
val(skip_wait_for_gossip_to_settle, int32_t, -1, Used, "An integer to configure the wait for gossip to settle. -1: wait normally, 0: do not wait at all, n: wait for at most n polls. Same as -Dcassandra.skip_wait_for_gossip_to_settle in cassandra.") \
|
||||||
@@ -739,6 +741,7 @@ public:
|
|||||||
val(prometheus_prefix, sstring, "scylla", Used, "Set the prefix of the exported Prometheus metrics. Changing this will break Scylla's dashboard compatibility, do not change unless you know what you are doing.") \
|
val(prometheus_prefix, sstring, "scylla", Used, "Set the prefix of the exported Prometheus metrics. Changing this will break Scylla's dashboard compatibility, do not change unless you know what you are doing.") \
|
||||||
val(abort_on_lsa_bad_alloc, bool, false, Used, "Abort when allocation in LSA region fails") \
|
val(abort_on_lsa_bad_alloc, bool, false, Used, "Abort when allocation in LSA region fails") \
|
||||||
val(murmur3_partitioner_ignore_msb_bits, unsigned, 0, Used, "Number of most siginificant token bits to ignore in murmur3 partitioner; increase for very large clusters") \
|
val(murmur3_partitioner_ignore_msb_bits, unsigned, 0, Used, "Number of most siginificant token bits to ignore in murmur3 partitioner; increase for very large clusters") \
|
||||||
|
val(virtual_dirty_soft_limit, double, 0.6, Used, "Soft limit of virtual dirty memory expressed as a portion of the hard limit") \
|
||||||
/* done! */
|
/* done! */
|
||||||
|
|
||||||
#define _make_value_member(name, type, deflt, status, desc, ...) \
|
#define _make_value_member(name, type, deflt, status, desc, ...) \
|
||||||
|
|||||||
@@ -167,6 +167,14 @@ inline void assure_sufficient_live_nodes(
|
|||||||
const PendingRange& pending_endpoints = std::array<gms::inet_address, 0>()) {
|
const PendingRange& pending_endpoints = std::array<gms::inet_address, 0>()) {
|
||||||
size_t need = block_for(ks, cl);
|
size_t need = block_for(ks, cl);
|
||||||
|
|
||||||
|
auto adjust_live_for_error = [] (size_t live, size_t pending) {
|
||||||
|
// DowngradingConsistencyRetryPolicy uses alive replicas count from Unavailable
|
||||||
|
// exception to adjust CL for retry. When pending node is present CL is increased
|
||||||
|
// by 1 internally, so reported number of live nodes has to be adjusted to take
|
||||||
|
// this into account
|
||||||
|
return pending <= live ? live - pending : 0;
|
||||||
|
};
|
||||||
|
|
||||||
switch (cl) {
|
switch (cl) {
|
||||||
case consistency_level::ANY:
|
case consistency_level::ANY:
|
||||||
// local hint is acceptable, and local node is always live
|
// local hint is acceptable, and local node is always live
|
||||||
@@ -181,7 +189,7 @@ inline void assure_sufficient_live_nodes(
|
|||||||
size_t pending = count_local_endpoints(pending_endpoints);
|
size_t pending = count_local_endpoints(pending_endpoints);
|
||||||
if (local_live < need + pending) {
|
if (local_live < need + pending) {
|
||||||
cl_logger.debug("Local replicas {} are insufficient to satisfy LOCAL_QUORUM requirement of needed {} and pending {}", live_endpoints, local_live, pending);
|
cl_logger.debug("Local replicas {} are insufficient to satisfy LOCAL_QUORUM requirement of needed {} and pending {}", live_endpoints, local_live, pending);
|
||||||
throw exceptions::unavailable_exception(cl, need, local_live);
|
throw exceptions::unavailable_exception(cl, need, adjust_live_for_error(local_live, pending));
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@@ -195,7 +203,7 @@ inline void assure_sufficient_live_nodes(
|
|||||||
size_t pending = pending_endpoints.size();
|
size_t pending = pending_endpoints.size();
|
||||||
if (live < need + pending) {
|
if (live < need + pending) {
|
||||||
cl_logger.debug("Live nodes {} do not satisfy ConsistencyLevel ({} required, {} pending)", live, need, pending);
|
cl_logger.debug("Live nodes {} do not satisfy ConsistencyLevel ({} required, {} pending)", live, need, pending);
|
||||||
throw exceptions::unavailable_exception(cl, need, live);
|
throw exceptions::unavailable_exception(cl, need, adjust_live_for_error(live, pending));
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -77,6 +77,15 @@ namespace schema_tables {
|
|||||||
|
|
||||||
logging::logger logger("schema_tables");
|
logging::logger logger("schema_tables");
|
||||||
|
|
||||||
|
struct push_back_and_return {
|
||||||
|
std::vector<mutation> muts;
|
||||||
|
|
||||||
|
std::vector<mutation> operator()(mutation&& m) {
|
||||||
|
muts.emplace_back(std::move(m));
|
||||||
|
return std::move(muts);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
struct qualified_name {
|
struct qualified_name {
|
||||||
sstring keyspace_name;
|
sstring keyspace_name;
|
||||||
sstring table_name;
|
sstring table_name;
|
||||||
@@ -547,6 +556,14 @@ read_schema_partition_for_table(distributed<service::storage_proxy>& proxy, sche
|
|||||||
return query_partition_mutation(proxy.local(), std::move(schema), std::move(cmd), std::move(keyspace_key));
|
return query_partition_mutation(proxy.local(), std::move(schema), std::move(cmd), std::move(keyspace_key));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
future<mutation>
|
||||||
|
read_keyspace_mutation(distributed<service::storage_proxy>& proxy, const sstring& keyspace_name) {
|
||||||
|
schema_ptr s = keyspaces();
|
||||||
|
auto key = partition_key::from_singular(*s, keyspace_name);
|
||||||
|
auto cmd = make_lw_shared<query::read_command>(s->id(), s->version(), query::full_slice);
|
||||||
|
return query_partition_mutation(proxy.local(), std::move(s), std::move(cmd), std::move(key));
|
||||||
|
}
|
||||||
|
|
||||||
static semaphore the_merge_lock {1};
|
static semaphore the_merge_lock {1};
|
||||||
|
|
||||||
future<> merge_lock() {
|
future<> merge_lock() {
|
||||||
@@ -832,39 +849,6 @@ static inline void collect_types(std::set<sstring>& keys, schema_result& result,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void ensure_type_is_unused(distributed<service::storage_proxy>& proxy, user_type type)
|
|
||||||
{
|
|
||||||
// We don't want to drop a type unless it's not used anymore (mainly because
|
|
||||||
// if someone drops a type and recreates one with the same name but different
|
|
||||||
// definition with the previous name still in use, things can get messy).
|
|
||||||
// We have two places to check: 1) other user type that can nest the one
|
|
||||||
// we drop and 2) existing tables referencing the type (maybe in a nested
|
|
||||||
// way).
|
|
||||||
|
|
||||||
auto&& keyspace = type->_keyspace;
|
|
||||||
auto&& name = type->_name;
|
|
||||||
auto&& db = proxy.local().get_db().local();
|
|
||||||
auto&& ks = db.find_keyspace(type->_keyspace);
|
|
||||||
|
|
||||||
for (auto&& ut : ks.metadata()->user_types()->get_all_types() | boost::adaptors::map_values) {
|
|
||||||
if (ut->_keyspace == keyspace && ut->_name == name) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (ut->references_user_type(keyspace, name)) {
|
|
||||||
throw exceptions::invalid_request_exception(sprint("Cannot drop user type %s.%s as it is still used by user type %s", keyspace, type->get_name_as_string(), ut->get_name_as_string()));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for (auto&& cfm : ks.metadata()->cf_meta_data() | boost::adaptors::map_values) {
|
|
||||||
for (auto&& col : cfm->all_columns() | boost::adaptors::map_values) {
|
|
||||||
if (col->type->references_user_type(keyspace, name)) {
|
|
||||||
throw exceptions::invalid_request_exception(sprint("Cannot drop user type %s.%s as it is still used by table %s.%s", keyspace, type->get_name_as_string(), cfm->ks_name(), cfm->cf_name()));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// see the comments for merge_keyspaces()
|
// see the comments for merge_keyspaces()
|
||||||
static void merge_types(distributed<service::storage_proxy>& proxy, schema_result&& before, schema_result&& after)
|
static void merge_types(distributed<service::storage_proxy>& proxy, schema_result&& before, schema_result&& after)
|
||||||
{
|
{
|
||||||
@@ -898,10 +882,6 @@ static void merge_types(distributed<service::storage_proxy>& proxy, schema_resul
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for (auto&& ut : dropped) {
|
|
||||||
ensure_type_is_unused(proxy, ut);
|
|
||||||
}
|
|
||||||
|
|
||||||
proxy.local().get_db().invoke_on_all([&created, &dropped, &altered] (database& db) {
|
proxy.local().get_db().invoke_on_all([&created, &dropped, &altered] (database& db) {
|
||||||
return seastar::async([&] {
|
return seastar::async([&] {
|
||||||
for (auto&& type : created) {
|
for (auto&& type : created) {
|
||||||
@@ -1182,19 +1162,18 @@ void add_type_to_schema_mutation(user_type type, api::timestamp_type timestamp,
|
|||||||
mutations.emplace_back(std::move(m));
|
mutations.emplace_back(std::move(m));
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<mutation> make_create_type_mutations(lw_shared_ptr<keyspace_metadata> keyspace, user_type type, api::timestamp_type timestamp)
|
future<std::vector<mutation>> make_create_type_mutations(lw_shared_ptr<keyspace_metadata> keyspace, user_type type, api::timestamp_type timestamp)
|
||||||
{
|
{
|
||||||
// Include the serialized keyspace in case the target node missed a CREATE KEYSPACE migration (see CASSANDRA-5631).
|
std::vector<mutation> mutations;
|
||||||
auto mutations = make_create_keyspace_mutations(keyspace, timestamp, false);
|
|
||||||
add_type_to_schema_mutation(type, timestamp, mutations);
|
add_type_to_schema_mutation(type, timestamp, mutations);
|
||||||
return mutations;
|
|
||||||
|
// Include the serialized keyspace in case the target node missed a CREATE KEYSPACE migration (see CASSANDRA-5631).
|
||||||
|
return read_keyspace_mutation(service::get_storage_proxy(), keyspace->name()).then(push_back_and_return{std::move(mutations)});
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<mutation> make_drop_type_mutations(lw_shared_ptr<keyspace_metadata> keyspace, user_type type, api::timestamp_type timestamp)
|
future<std::vector<mutation>> make_drop_type_mutations(lw_shared_ptr<keyspace_metadata> keyspace, user_type type, api::timestamp_type timestamp)
|
||||||
{
|
{
|
||||||
// Include the serialized keyspace in case the target node missed a CREATE KEYSPACE migration (see CASSANDRA-5631).
|
std::vector<mutation> mutations;
|
||||||
auto mutations = make_create_keyspace_mutations(keyspace, timestamp, false);
|
|
||||||
|
|
||||||
schema_ptr s = usertypes();
|
schema_ptr s = usertypes();
|
||||||
auto pkey = partition_key::from_singular(*s, type->_keyspace);
|
auto pkey = partition_key::from_singular(*s, type->_keyspace);
|
||||||
auto ckey = clustering_key::from_singular(*s, type->get_name_as_string());
|
auto ckey = clustering_key::from_singular(*s, type->get_name_as_string());
|
||||||
@@ -1202,19 +1181,21 @@ std::vector<mutation> make_drop_type_mutations(lw_shared_ptr<keyspace_metadata>
|
|||||||
m.partition().apply_delete(*s, ckey, tombstone(timestamp, gc_clock::now()));
|
m.partition().apply_delete(*s, ckey, tombstone(timestamp, gc_clock::now()));
|
||||||
mutations.emplace_back(std::move(m));
|
mutations.emplace_back(std::move(m));
|
||||||
|
|
||||||
return mutations;
|
// Include the serialized keyspace in case the target node missed a CREATE KEYSPACE migration (see CASSANDRA-5631).
|
||||||
|
return read_keyspace_mutation(service::get_storage_proxy(), keyspace->name()).then(push_back_and_return{std::move(mutations)});
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Table metadata serialization/deserialization.
|
* Table metadata serialization/deserialization.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
std::vector<mutation> make_create_table_mutations(lw_shared_ptr<keyspace_metadata> keyspace, schema_ptr table, api::timestamp_type timestamp)
|
future<std::vector<mutation>> make_create_table_mutations(lw_shared_ptr<keyspace_metadata> keyspace, schema_ptr table, api::timestamp_type timestamp)
|
||||||
{
|
{
|
||||||
// Include the serialized keyspace in case the target node missed a CREATE KEYSPACE migration (see CASSANDRA-5631).
|
std::vector<mutation> mutations;
|
||||||
auto mutations = make_create_keyspace_mutations(keyspace, timestamp, false);
|
|
||||||
add_table_or_view_to_schema_mutation(table, timestamp, true, mutations);
|
add_table_or_view_to_schema_mutation(table, timestamp, true, mutations);
|
||||||
return mutations;
|
|
||||||
|
// Include the serialized keyspace in case the target node missed a CREATE KEYSPACE migration (see CASSANDRA-5631).
|
||||||
|
return read_keyspace_mutation(service::get_storage_proxy(), keyspace->name()).then(push_back_and_return{std::move(mutations)});
|
||||||
}
|
}
|
||||||
|
|
||||||
static schema_mutations make_table_mutations(schema_ptr table, api::timestamp_type timestamp, bool with_columns_and_triggers)
|
static schema_mutations make_table_mutations(schema_ptr table, api::timestamp_type timestamp, bool with_columns_and_triggers)
|
||||||
@@ -1347,15 +1328,13 @@ static void make_update_columns_mutations(schema_ptr old_table,
|
|||||||
mutations.emplace_back(std::move(columns_mutation));
|
mutations.emplace_back(std::move(columns_mutation));
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<mutation> make_update_table_mutations(lw_shared_ptr<keyspace_metadata> keyspace,
|
future<std::vector<mutation>> make_update_table_mutations(lw_shared_ptr<keyspace_metadata> keyspace,
|
||||||
schema_ptr old_table,
|
schema_ptr old_table,
|
||||||
schema_ptr new_table,
|
schema_ptr new_table,
|
||||||
api::timestamp_type timestamp,
|
api::timestamp_type timestamp,
|
||||||
bool from_thrift)
|
bool from_thrift)
|
||||||
{
|
{
|
||||||
// Include the serialized keyspace in case the target node missed a CREATE KEYSPACE migration (see CASSANDRA-5631).
|
std::vector<mutation> mutations;
|
||||||
auto mutations = make_create_keyspace_mutations(keyspace, timestamp, false);
|
|
||||||
|
|
||||||
add_table_or_view_to_schema_mutation(new_table, timestamp, false, mutations);
|
add_table_or_view_to_schema_mutation(new_table, timestamp, false, mutations);
|
||||||
|
|
||||||
make_update_columns_mutations(std::move(old_table), std::move(new_table), timestamp, from_thrift, mutations);
|
make_update_columns_mutations(std::move(old_table), std::move(new_table), timestamp, from_thrift, mutations);
|
||||||
@@ -1373,7 +1352,8 @@ std::vector<mutation> make_update_table_mutations(lw_shared_ptr<keyspace_metadat
|
|||||||
addTriggerToSchemaMutation(newTable, trigger, timestamp, mutation);
|
addTriggerToSchemaMutation(newTable, trigger, timestamp, mutation);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
return mutations;
|
// Include the serialized keyspace in case the target node missed a CREATE KEYSPACE migration (see CASSANDRA-5631).
|
||||||
|
return read_keyspace_mutation(service::get_storage_proxy(), keyspace->name()).then(push_back_and_return{std::move(mutations)});
|
||||||
}
|
}
|
||||||
|
|
||||||
static void make_drop_table_or_view_mutations(schema_ptr schema_table,
|
static void make_drop_table_or_view_mutations(schema_ptr schema_table,
|
||||||
@@ -1390,10 +1370,9 @@ static void make_drop_table_or_view_mutations(schema_ptr schema_table,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<mutation> make_drop_table_mutations(lw_shared_ptr<keyspace_metadata> keyspace, schema_ptr table, api::timestamp_type timestamp)
|
future<std::vector<mutation>> make_drop_table_mutations(lw_shared_ptr<keyspace_metadata> keyspace, schema_ptr table, api::timestamp_type timestamp)
|
||||||
{
|
{
|
||||||
// Include the serialized keyspace in case the target node missed a CREATE KEYSPACE migration (see CASSANDRA-5631).
|
std::vector<mutation> mutations;
|
||||||
auto mutations = make_create_keyspace_mutations(keyspace, timestamp, false);
|
|
||||||
make_drop_table_or_view_mutations(columnfamilies(), std::move(table), timestamp, mutations);
|
make_drop_table_or_view_mutations(columnfamilies(), std::move(table), timestamp, mutations);
|
||||||
|
|
||||||
#if 0
|
#if 0
|
||||||
@@ -1405,7 +1384,8 @@ std::vector<mutation> make_drop_table_mutations(lw_shared_ptr<keyspace_metadata>
|
|||||||
for (String indexName : Keyspace.open(keyspace.name).getColumnFamilyStore(table.cfName).getBuiltIndexes())
|
for (String indexName : Keyspace.open(keyspace.name).getColumnFamilyStore(table.cfName).getBuiltIndexes())
|
||||||
indexCells.addTombstone(indexCells.getComparator().makeCellName(indexName), ldt, timestamp);
|
indexCells.addTombstone(indexCells.getComparator().makeCellName(indexName), ldt, timestamp);
|
||||||
#endif
|
#endif
|
||||||
return mutations;
|
// Include the serialized keyspace in case the target node missed a CREATE KEYSPACE migration (see CASSANDRA-5631).
|
||||||
|
return read_keyspace_mutation(service::get_storage_proxy(), keyspace->name()).then(push_back_and_return{std::move(mutations)});
|
||||||
}
|
}
|
||||||
|
|
||||||
static future<schema_mutations> read_table_mutations(distributed<service::storage_proxy>& proxy, const qualified_name& table, schema_ptr s)
|
static future<schema_mutations> read_table_mutations(distributed<service::storage_proxy>& proxy, const qualified_name& table, schema_ptr s)
|
||||||
@@ -1481,12 +1461,16 @@ future<schema_ptr> create_table_from_table_row(distributed<service::storage_prox
|
|||||||
return create_table_from_name(proxy, ks_name, cf_name);
|
return create_table_from_name(proxy, ks_name, cf_name);
|
||||||
}
|
}
|
||||||
|
|
||||||
void prepare_builder_from_table_row(schema_builder& builder, const query::result_set_row& table_row)
|
void prepare_builder_from_table_row(schema_builder& builder, const query::result_set_row& table_row, bool is_dense)
|
||||||
{
|
{
|
||||||
|
|
||||||
auto comparator = table_row.get_nonnull<sstring>("comparator");
|
auto comparator = table_row.get_nonnull<sstring>("comparator");
|
||||||
bool is_compound = cell_comparator::check_compound(comparator);
|
bool is_compound = cell_comparator::check_compound(comparator);
|
||||||
builder.set_is_compound(is_compound);
|
builder.set_is_compound(is_compound);
|
||||||
|
if (!is_compound && !is_dense) { // For thrift dynamic tables, the comparator type is encoded in the clustering keys
|
||||||
|
auto regular_column_name_type = db::marshal::type_parser::parse(comparator);
|
||||||
|
builder.set_regular_column_name_type(regular_column_name_type);
|
||||||
|
}
|
||||||
|
|
||||||
cell_comparator::read_collections(builder, comparator);
|
cell_comparator::read_collections(builder, comparator);
|
||||||
|
|
||||||
if (table_row.has("read_repair_chance")) {
|
if (table_row.has("read_repair_chance")) {
|
||||||
@@ -1602,13 +1586,6 @@ schema_ptr create_table_from_mutations(schema_mutations sm, std::experimental::o
|
|||||||
AbstractType<?> fullRawComparator = CFMetaData.makeRawAbstractType(rawComparator, subComparator);
|
AbstractType<?> fullRawComparator = CFMetaData.makeRawAbstractType(rawComparator, subComparator);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
std::vector<column_definition> column_defs = create_columns_from_column_rows(
|
|
||||||
query::result_set(sm.columns_mutation()),
|
|
||||||
ks_name,
|
|
||||||
cf_name,/*,
|
|
||||||
fullRawComparator, */
|
|
||||||
cf == cf_type::super);
|
|
||||||
|
|
||||||
bool is_dense;
|
bool is_dense;
|
||||||
if (table_row.has("is_dense")) {
|
if (table_row.has("is_dense")) {
|
||||||
is_dense = table_row.get_nonnull<bool>("is_dense");
|
is_dense = table_row.get_nonnull<bool>("is_dense");
|
||||||
@@ -1617,6 +1594,16 @@ schema_ptr create_table_from_mutations(schema_mutations sm, std::experimental::o
|
|||||||
// is_dense = CFMetaData.calculateIsDense(fullRawComparator, columnDefs);
|
// is_dense = CFMetaData.calculateIsDense(fullRawComparator, columnDefs);
|
||||||
throw std::runtime_error(sprint("%s not implemented", __PRETTY_FUNCTION__));
|
throw std::runtime_error(sprint("%s not implemented", __PRETTY_FUNCTION__));
|
||||||
}
|
}
|
||||||
|
builder.set_is_dense(is_dense);
|
||||||
|
|
||||||
|
prepare_builder_from_table_row(builder, table_row, is_dense);
|
||||||
|
|
||||||
|
std::vector<column_definition> column_defs = create_columns_from_column_rows(
|
||||||
|
query::result_set(sm.columns_mutation()),
|
||||||
|
ks_name,
|
||||||
|
cf_name,
|
||||||
|
builder.regular_column_name_type(),
|
||||||
|
cf == cf_type::super);
|
||||||
|
|
||||||
#if 0
|
#if 0
|
||||||
CellNameType comparator = CellNames.fromAbstractType(fullRawComparator, isDense);
|
CellNameType comparator = CellNames.fromAbstractType(fullRawComparator, isDense);
|
||||||
@@ -1628,9 +1615,6 @@ schema_ptr create_table_from_mutations(schema_mutations sm, std::experimental::o
|
|||||||
|
|
||||||
CFMetaData cfm = new CFMetaData(ksName, cfName, cfType, comparator, cfId);
|
CFMetaData cfm = new CFMetaData(ksName, cfName, cfType, comparator, cfId);
|
||||||
#endif
|
#endif
|
||||||
builder.set_is_dense(is_dense);
|
|
||||||
|
|
||||||
prepare_builder_from_table_row(builder, table_row);
|
|
||||||
|
|
||||||
for (auto&& cdef : column_defs) {
|
for (auto&& cdef : column_defs) {
|
||||||
builder.with_column(cdef);
|
builder.with_column(cdef);
|
||||||
@@ -1662,7 +1646,8 @@ void add_column_to_schema_mutation(schema_ptr table,
|
|||||||
api::timestamp_type timestamp,
|
api::timestamp_type timestamp,
|
||||||
mutation& m)
|
mutation& m)
|
||||||
{
|
{
|
||||||
auto ckey = clustering_key::from_exploded(*m.schema(), {utf8_type->decompose(table->cf_name()), column.name()});
|
auto ckey = clustering_key::from_exploded(*m.schema(), {utf8_type->decompose(table->cf_name()),
|
||||||
|
utf8_type->decompose(column.name_as_text())});
|
||||||
m.set_clustered_cell(ckey, "validator", column.type->name(), timestamp);
|
m.set_clustered_cell(ckey, "validator", column.type->name(), timestamp);
|
||||||
m.set_clustered_cell(ckey, "type", serialize_kind(column.kind), timestamp);
|
m.set_clustered_cell(ckey, "type", serialize_kind(column.kind), timestamp);
|
||||||
if (!column.is_on_all_components()) {
|
if (!column.is_on_all_components()) {
|
||||||
@@ -1714,21 +1699,21 @@ void drop_column_from_schema_mutation(schema_ptr table, const column_definition&
|
|||||||
|
|
||||||
std::vector<column_definition> create_columns_from_column_rows(const query::result_set& rows,
|
std::vector<column_definition> create_columns_from_column_rows(const query::result_set& rows,
|
||||||
const sstring& keyspace,
|
const sstring& keyspace,
|
||||||
const sstring& table, /*,
|
const sstring& table,
|
||||||
AbstractType<?> rawComparator, */
|
data_type regular_column_name_type,
|
||||||
bool is_super)
|
bool is_super)
|
||||||
{
|
{
|
||||||
std::vector<column_definition> columns;
|
std::vector<column_definition> columns;
|
||||||
for (auto&& row : rows.rows()) {
|
for (auto&& row : rows.rows()) {
|
||||||
columns.emplace_back(std::move(create_column_from_column_row(row, keyspace, table, /*, rawComparator, */ is_super)));
|
columns.emplace_back(std::move(create_column_from_column_row(row, keyspace, table, regular_column_name_type, is_super)));
|
||||||
}
|
}
|
||||||
return columns;
|
return columns;
|
||||||
}
|
}
|
||||||
|
|
||||||
column_definition create_column_from_column_row(const query::result_set_row& row,
|
column_definition create_column_from_column_row(const query::result_set_row& row,
|
||||||
sstring keyspace,
|
sstring keyspace,
|
||||||
sstring table, /*,
|
sstring table,
|
||||||
AbstractType<?> rawComparator, */
|
data_type regular_column_name_type,
|
||||||
bool is_super)
|
bool is_super)
|
||||||
{
|
{
|
||||||
auto kind = deserialize_kind(row.get_nonnull<sstring>("type"));
|
auto kind = deserialize_kind(row.get_nonnull<sstring>("type"));
|
||||||
@@ -1744,13 +1729,8 @@ column_definition create_column_from_column_row(const query::result_set_row& row
|
|||||||
componentIndex = 1; // A ColumnDefinition for super columns applies to the column component
|
componentIndex = 1; // A ColumnDefinition for super columns applies to the column component
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if 0
|
auto comparator = kind == column_kind::regular_column ? regular_column_name_type : utf8_type;
|
||||||
// Note: we save the column name as string, but we should not assume that it is an UTF8 name, we
|
|
||||||
// we need to use the comparator fromString method
|
|
||||||
AbstractType<?> comparator = kind == ColumnDefinition.Kind.REGULAR
|
|
||||||
? getComponentComparator(rawComparator, componentIndex)
|
|
||||||
: UTF8Type.instance;
|
|
||||||
#endif
|
|
||||||
auto name_opt = row.get<sstring>("column_name");
|
auto name_opt = row.get<sstring>("column_name");
|
||||||
sstring name = name_opt ? *name_opt : sstring();
|
sstring name = name_opt ? *name_opt : sstring();
|
||||||
|
|
||||||
@@ -1769,8 +1749,7 @@ column_definition create_column_from_column_row(const query::result_set_row& row
|
|||||||
if (row.has("index_name"))
|
if (row.has("index_name"))
|
||||||
indexName = row.getString("index_name");
|
indexName = row.getString("index_name");
|
||||||
#endif
|
#endif
|
||||||
auto c = column_definition{utf8_type->decompose(name), validator, kind, component_index};
|
return column_definition{comparator->from_string(name), validator, kind, component_index};
|
||||||
return c;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -1788,7 +1767,7 @@ view_ptr create_view_from_mutations(schema_mutations sm, std::experimental::opti
|
|||||||
schema_builder builder{ks_name, cf_name, id};
|
schema_builder builder{ks_name, cf_name, id};
|
||||||
prepare_builder_from_table_row(builder, row);
|
prepare_builder_from_table_row(builder, row);
|
||||||
|
|
||||||
auto column_defs = create_columns_from_column_rows(query::result_set(sm.columns_mutation()), ks_name, cf_name, false);
|
auto column_defs = create_columns_from_column_rows(query::result_set(sm.columns_mutation()), ks_name, cf_name, utf8_type, false);
|
||||||
for (auto&& cdef : column_defs) {
|
for (auto&& cdef : column_defs) {
|
||||||
builder.with_column(cdef);
|
builder.with_column(cdef);
|
||||||
}
|
}
|
||||||
@@ -1899,37 +1878,39 @@ schema_mutations make_schema_mutations(schema_ptr s, api::timestamp_type timesta
|
|||||||
return s->is_view() ? make_view_mutations(view_ptr(s), timestamp, with_columns) : make_table_mutations(s, timestamp, with_columns);
|
return s->is_view() ? make_view_mutations(view_ptr(s), timestamp, with_columns) : make_table_mutations(s, timestamp, with_columns);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<mutation> make_create_view_mutations(lw_shared_ptr<keyspace_metadata> keyspace, view_ptr view, api::timestamp_type timestamp)
|
future<std::vector<mutation>> make_create_view_mutations(lw_shared_ptr<keyspace_metadata> keyspace, view_ptr view, api::timestamp_type timestamp)
|
||||||
{
|
{
|
||||||
// Include the serialized keyspace in case the target node missed a CREATE KEYSPACE migration (see CASSANDRA-5631).
|
std::vector<mutation> mutations;
|
||||||
auto mutations = make_create_keyspace_mutations(keyspace, timestamp, false);
|
|
||||||
// And also the serialized base table.
|
// And also the serialized base table.
|
||||||
auto base = keyspace->cf_meta_data().at(view->view_info()->base_name());
|
auto base = keyspace->cf_meta_data().at(view->view_info()->base_name());
|
||||||
add_table_or_view_to_schema_mutation(base, timestamp, true, mutations);
|
add_table_or_view_to_schema_mutation(base, timestamp, true, mutations);
|
||||||
add_table_or_view_to_schema_mutation(view, timestamp, true, mutations);
|
add_table_or_view_to_schema_mutation(view, timestamp, true, mutations);
|
||||||
return mutations;
|
|
||||||
|
// Include the serialized keyspace in case the target node missed a CREATE KEYSPACE migration (see CASSANDRA-5631).
|
||||||
|
return read_keyspace_mutation(service::get_storage_proxy(), keyspace->name()).then(push_back_and_return{std::move(mutations)});
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<mutation> make_update_view_mutations(lw_shared_ptr<keyspace_metadata> keyspace,
|
future<std::vector<mutation>> make_update_view_mutations(lw_shared_ptr<keyspace_metadata> keyspace,
|
||||||
view_ptr old_view,
|
view_ptr old_view,
|
||||||
view_ptr new_view,
|
view_ptr new_view,
|
||||||
api::timestamp_type timestamp)
|
api::timestamp_type timestamp)
|
||||||
{
|
{
|
||||||
// Include the serialized keyspace in case the target node missed a CREATE KEYSPACE migration (see CASSANDRA-5631).
|
std::vector<mutation> mutations;
|
||||||
auto mutations = make_create_keyspace_mutations(keyspace, timestamp, false);
|
|
||||||
// And also the serialized base table.
|
// And also the serialized base table.
|
||||||
auto base = keyspace->cf_meta_data().at(new_view->view_info()->base_name());
|
auto base = keyspace->cf_meta_data().at(new_view->view_info()->base_name());
|
||||||
add_table_or_view_to_schema_mutation(base, timestamp, true, mutations);
|
add_table_or_view_to_schema_mutation(base, timestamp, true, mutations);
|
||||||
add_table_or_view_to_schema_mutation(new_view, timestamp, false, mutations);
|
add_table_or_view_to_schema_mutation(new_view, timestamp, false, mutations);
|
||||||
make_update_columns_mutations(old_view, new_view, timestamp, false, mutations);
|
make_update_columns_mutations(old_view, new_view, timestamp, false, mutations);
|
||||||
return mutations;
|
|
||||||
|
// Include the serialized keyspace in case the target node missed a CREATE KEYSPACE migration (see CASSANDRA-5631).
|
||||||
|
return read_keyspace_mutation(service::get_storage_proxy(), keyspace->name()).then(push_back_and_return{std::move(mutations)});
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<mutation> make_drop_view_mutations(lw_shared_ptr<keyspace_metadata> keyspace, view_ptr view, api::timestamp_type timestamp) {
|
future<std::vector<mutation>> make_drop_view_mutations(lw_shared_ptr<keyspace_metadata> keyspace, view_ptr view, api::timestamp_type timestamp) {
|
||||||
// Include the serialized keyspace in case the target node missed a CREATE KEYSPACE migration (see CASSANDRA-5631).
|
std::vector<mutation> mutations;
|
||||||
auto mutations = make_create_keyspace_mutations(keyspace, timestamp, false);
|
|
||||||
make_drop_table_or_view_mutations(views(), view, timestamp, mutations);
|
make_drop_table_or_view_mutations(views(), view, timestamp, mutations);
|
||||||
return mutations;
|
// Include the serialized keyspace in case the target node missed a CREATE KEYSPACE migration (see CASSANDRA-5631).
|
||||||
|
return read_keyspace_mutation(service::get_storage_proxy(), keyspace->name()).then(push_back_and_return{std::move(mutations)});
|
||||||
}
|
}
|
||||||
|
|
||||||
#if 0
|
#if 0
|
||||||
|
|||||||
@@ -80,6 +80,7 @@ future<std::vector<frozen_mutation>> convert_schema_to_mutations(distributed<ser
|
|||||||
|
|
||||||
future<schema_result_value_type>
|
future<schema_result_value_type>
|
||||||
read_schema_partition_for_keyspace(distributed<service::storage_proxy>& proxy, const sstring& schema_table_name, const sstring& keyspace_name);
|
read_schema_partition_for_keyspace(distributed<service::storage_proxy>& proxy, const sstring& schema_table_name, const sstring& keyspace_name);
|
||||||
|
future<mutation> read_keyspace_mutation(distributed<service::storage_proxy>&, const sstring& keyspace_name);
|
||||||
|
|
||||||
future<> merge_schema(distributed<service::storage_proxy>& proxy, std::vector<mutation> mutations);
|
future<> merge_schema(distributed<service::storage_proxy>& proxy, std::vector<mutation> mutations);
|
||||||
|
|
||||||
@@ -95,17 +96,17 @@ std::vector<mutation> make_drop_keyspace_mutations(lw_shared_ptr<keyspace_metada
|
|||||||
|
|
||||||
lw_shared_ptr<keyspace_metadata> create_keyspace_from_schema_partition(const schema_result_value_type& partition);
|
lw_shared_ptr<keyspace_metadata> create_keyspace_from_schema_partition(const schema_result_value_type& partition);
|
||||||
|
|
||||||
std::vector<mutation> make_create_type_mutations(lw_shared_ptr<keyspace_metadata> keyspace, user_type type, api::timestamp_type timestamp);
|
future<std::vector<mutation>> make_create_type_mutations(lw_shared_ptr<keyspace_metadata> keyspace, user_type type, api::timestamp_type timestamp);
|
||||||
|
|
||||||
std::vector<user_type> create_types_from_schema_partition(const schema_result_value_type& result);
|
std::vector<user_type> create_types_from_schema_partition(const schema_result_value_type& result);
|
||||||
|
|
||||||
std::vector<mutation> make_drop_type_mutations(lw_shared_ptr<keyspace_metadata> keyspace, user_type type, api::timestamp_type timestamp);
|
future<std::vector<mutation>> make_drop_type_mutations(lw_shared_ptr<keyspace_metadata> keyspace, user_type type, api::timestamp_type timestamp);
|
||||||
|
|
||||||
void add_type_to_schema_mutation(user_type type, api::timestamp_type timestamp, std::vector<mutation>& mutations);
|
void add_type_to_schema_mutation(user_type type, api::timestamp_type timestamp, std::vector<mutation>& mutations);
|
||||||
|
|
||||||
std::vector<mutation> make_create_table_mutations(lw_shared_ptr<keyspace_metadata> keyspace, schema_ptr table, api::timestamp_type timestamp);
|
future<std::vector<mutation>> make_create_table_mutations(lw_shared_ptr<keyspace_metadata> keyspace, schema_ptr table, api::timestamp_type timestamp);
|
||||||
|
|
||||||
std::vector<mutation> make_update_table_mutations(
|
future<std::vector<mutation>> make_update_table_mutations(
|
||||||
lw_shared_ptr<keyspace_metadata> keyspace,
|
lw_shared_ptr<keyspace_metadata> keyspace,
|
||||||
schema_ptr old_table,
|
schema_ptr old_table,
|
||||||
schema_ptr new_table,
|
schema_ptr new_table,
|
||||||
@@ -114,13 +115,13 @@ std::vector<mutation> make_update_table_mutations(
|
|||||||
|
|
||||||
future<std::map<sstring, schema_ptr>> create_tables_from_tables_partition(distributed<service::storage_proxy>& proxy, const schema_result::mapped_type& result);
|
future<std::map<sstring, schema_ptr>> create_tables_from_tables_partition(distributed<service::storage_proxy>& proxy, const schema_result::mapped_type& result);
|
||||||
|
|
||||||
std::vector<mutation> make_drop_table_mutations(lw_shared_ptr<keyspace_metadata> keyspace, schema_ptr table, api::timestamp_type timestamp);
|
future<std::vector<mutation>> make_drop_table_mutations(lw_shared_ptr<keyspace_metadata> keyspace, schema_ptr table, api::timestamp_type timestamp);
|
||||||
|
|
||||||
future<schema_ptr> create_table_from_name(distributed<service::storage_proxy>& proxy, const sstring& keyspace, const sstring& table);
|
future<schema_ptr> create_table_from_name(distributed<service::storage_proxy>& proxy, const sstring& keyspace, const sstring& table);
|
||||||
|
|
||||||
future<schema_ptr> create_table_from_table_row(distributed<service::storage_proxy>& proxy, const query::result_set_row& row);
|
future<schema_ptr> create_table_from_table_row(distributed<service::storage_proxy>& proxy, const query::result_set_row& row);
|
||||||
|
|
||||||
void prepare_builder_from_table_row(schema_builder& builder, const query::result_set_row& table_row);
|
void prepare_builder_from_table_row(schema_builder& builder, const query::result_set_row& table_row, bool is_dense = false);
|
||||||
|
|
||||||
schema_ptr create_table_from_mutations(schema_mutations, std::experimental::optional<table_schema_version> version = {});
|
schema_ptr create_table_from_mutations(schema_mutations, std::experimental::optional<table_schema_version> version = {});
|
||||||
|
|
||||||
@@ -128,14 +129,14 @@ void drop_column_from_schema_mutation(schema_ptr table, const column_definition&
|
|||||||
|
|
||||||
std::vector<column_definition> create_columns_from_column_rows(const query::result_set& rows,
|
std::vector<column_definition> create_columns_from_column_rows(const query::result_set& rows,
|
||||||
const sstring& keyspace,
|
const sstring& keyspace,
|
||||||
const sstring& table,/*,
|
const sstring& table,
|
||||||
AbstractType<?> rawComparator, */
|
data_type regular_column_name_type,
|
||||||
bool is_super);
|
bool is_super);
|
||||||
|
|
||||||
column_definition create_column_from_column_row(const query::result_set_row& row,
|
column_definition create_column_from_column_row(const query::result_set_row& row,
|
||||||
sstring keyspace,
|
sstring keyspace,
|
||||||
sstring table, /*,
|
sstring table,
|
||||||
AbstractType<?> rawComparator, */
|
data_type regular_column_name_type,
|
||||||
bool is_super);
|
bool is_super);
|
||||||
|
|
||||||
|
|
||||||
@@ -149,11 +150,11 @@ schema_mutations make_schema_mutations(schema_ptr s, api::timestamp_type timesta
|
|||||||
|
|
||||||
void add_table_or_view_to_schema_mutation(schema_ptr view, api::timestamp_type timestamp, bool with_columns, std::vector<mutation>& mutations);
|
void add_table_or_view_to_schema_mutation(schema_ptr view, api::timestamp_type timestamp, bool with_columns, std::vector<mutation>& mutations);
|
||||||
|
|
||||||
std::vector<mutation> make_create_view_mutations(lw_shared_ptr<keyspace_metadata> keyspace, view_ptr view, api::timestamp_type timestamp);
|
future<std::vector<mutation>> make_create_view_mutations(lw_shared_ptr<keyspace_metadata> keyspace, view_ptr view, api::timestamp_type timestamp);
|
||||||
|
|
||||||
std::vector<mutation> make_update_view_mutations(lw_shared_ptr<keyspace_metadata> keyspace, view_ptr old_view, view_ptr new_view, api::timestamp_type timestamp);
|
future<std::vector<mutation>> make_update_view_mutations(lw_shared_ptr<keyspace_metadata> keyspace, view_ptr old_view, view_ptr new_view, api::timestamp_type timestamp);
|
||||||
|
|
||||||
std::vector<mutation> make_drop_view_mutations(lw_shared_ptr<keyspace_metadata> keyspace, view_ptr view, api::timestamp_type timestamp);
|
future<std::vector<mutation>> make_drop_view_mutations(lw_shared_ptr<keyspace_metadata> keyspace, view_ptr view, api::timestamp_type timestamp);
|
||||||
|
|
||||||
sstring serialize_kind(column_kind kind);
|
sstring serialize_kind(column_kind kind);
|
||||||
column_kind deserialize_kind(sstring kind);
|
column_kind deserialize_kind(sstring kind);
|
||||||
|
|||||||
@@ -21,6 +21,7 @@
|
|||||||
|
|
||||||
#include "byte_ordered_partitioner.hh"
|
#include "byte_ordered_partitioner.hh"
|
||||||
#include "utils/class_registrator.hh"
|
#include "utils/class_registrator.hh"
|
||||||
|
#include "utils/div_ceil.hh"
|
||||||
#include <boost/multiprecision/cpp_int.hpp>
|
#include <boost/multiprecision/cpp_int.hpp>
|
||||||
#include <boost/multiprecision/cpp_dec_float.hpp>
|
#include <boost/multiprecision/cpp_dec_float.hpp>
|
||||||
|
|
||||||
@@ -162,22 +163,17 @@ byte_ordered_partitioner::shard_of(const token& t) const {
|
|||||||
}
|
}
|
||||||
|
|
||||||
token
|
token
|
||||||
byte_ordered_partitioner::token_for_next_shard(const token& t) const {
|
byte_ordered_partitioner::token_for_next_shard(const token& t, shard_id shard, unsigned spans) const {
|
||||||
switch (t._kind) {
|
switch (t._kind) {
|
||||||
case token::kind::before_all_keys:
|
|
||||||
return token_for_next_shard(token(token::kind::key, managed_bytes{int8_t(0)}));
|
|
||||||
case token::kind::after_all_keys:
|
case token::kind::after_all_keys:
|
||||||
return maximum_token();
|
return maximum_token();
|
||||||
|
case token::kind::before_all_keys:
|
||||||
case token::kind::key:
|
case token::kind::key:
|
||||||
auto s = shard_of(t) + 1;
|
auto orig = shard_of(t);
|
||||||
if (s == _shard_count) {
|
if (shard <= orig || spans != 1) {
|
||||||
return maximum_token();
|
return maximum_token();
|
||||||
}
|
}
|
||||||
auto e = (s << 8) / _shard_count;
|
auto e = div_ceil(shard << 8, _shard_count);
|
||||||
// Division truncates; adjust
|
|
||||||
while (((e * _shard_count) >> 8) != s) {
|
|
||||||
++e;
|
|
||||||
}
|
|
||||||
return token(token::kind::key, managed_bytes({int8_t(e)}));
|
return token(token::kind::key, managed_bytes({int8_t(e)}));
|
||||||
}
|
}
|
||||||
assert(0);
|
assert(0);
|
||||||
|
|||||||
@@ -29,10 +29,9 @@
|
|||||||
namespace dht {
|
namespace dht {
|
||||||
|
|
||||||
class byte_ordered_partitioner final : public i_partitioner {
|
class byte_ordered_partitioner final : public i_partitioner {
|
||||||
unsigned _shard_count;
|
|
||||||
public:
|
public:
|
||||||
byte_ordered_partitioner(unsigned shard_count = smp::count, unsigned ignore_msb = 0) : _shard_count(shard_count) {}
|
byte_ordered_partitioner(unsigned shard_count = smp::count, unsigned ignore_msb = 0) : i_partitioner(shard_count) {}
|
||||||
virtual const sstring name() { return "org.apache.cassandra.dht.ByteOrderedPartitioner"; }
|
virtual const sstring name() const { return "org.apache.cassandra.dht.ByteOrderedPartitioner"; }
|
||||||
virtual token get_token(const schema& s, partition_key_view key) override {
|
virtual token get_token(const schema& s, partition_key_view key) override {
|
||||||
auto&& legacy = key.legacy_form(s);
|
auto&& legacy = key.legacy_form(s);
|
||||||
return token(token::kind::key, bytes(legacy.begin(), legacy.end()));
|
return token(token::kind::key, bytes(legacy.begin(), legacy.end()));
|
||||||
@@ -75,7 +74,7 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
virtual unsigned shard_of(const token& t) const override;
|
virtual unsigned shard_of(const token& t) const override;
|
||||||
virtual token token_for_next_shard(const token& t) const override;
|
virtual token token_for_next_shard(const token& t, shard_id shard, unsigned spans) const override;
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -25,6 +25,7 @@
|
|||||||
#include "utils/class_registrator.hh"
|
#include "utils/class_registrator.hh"
|
||||||
#include "types.hh"
|
#include "types.hh"
|
||||||
#include "utils/murmur_hash.hh"
|
#include "utils/murmur_hash.hh"
|
||||||
|
#include "utils/div_ceil.hh"
|
||||||
#include <boost/range/adaptor/map.hpp>
|
#include <boost/range/adaptor/map.hpp>
|
||||||
#include <boost/range/irange.hpp>
|
#include <boost/range/irange.hpp>
|
||||||
#include <boost/range/adaptor/transformed.hpp>
|
#include <boost/range/adaptor/transformed.hpp>
|
||||||
@@ -160,7 +161,7 @@ std::ostream& operator<<(std::ostream& out, const decorated_key& dk) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// FIXME: make it per-keyspace
|
// FIXME: make it per-keyspace
|
||||||
std::unique_ptr<i_partitioner> default_partitioner { new murmur3_partitioner };
|
std::unique_ptr<i_partitioner> default_partitioner;
|
||||||
|
|
||||||
void set_global_partitioner(const sstring& class_name, unsigned ignore_msb)
|
void set_global_partitioner(const sstring& class_name, unsigned ignore_msb)
|
||||||
{
|
{
|
||||||
@@ -176,6 +177,9 @@ void set_global_partitioner(const sstring& class_name, unsigned ignore_msb)
|
|||||||
|
|
||||||
i_partitioner&
|
i_partitioner&
|
||||||
global_partitioner() {
|
global_partitioner() {
|
||||||
|
if (!default_partitioner) {
|
||||||
|
default_partitioner = std::make_unique<murmur3_partitioner>(smp::count, 12);
|
||||||
|
}
|
||||||
return *default_partitioner;
|
return *default_partitioner;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -251,13 +255,35 @@ unsigned shard_of(const token& t) {
|
|||||||
return global_partitioner().shard_of(t);
|
return global_partitioner().shard_of(t);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
stdx::optional<dht::token_range>
|
||||||
|
selective_token_range_sharder::next() {
|
||||||
|
if (_done) {
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
while (_range.overlaps(dht::token_range(_start_boundary, {}), dht::token_comparator())
|
||||||
|
&& !(_start_boundary && _start_boundary->value() == maximum_token())) {
|
||||||
|
auto end_token = _partitioner.token_for_next_shard(_start_token, _next_shard);
|
||||||
|
auto candidate = dht::token_range(std::move(_start_boundary), range_bound<dht::token>(end_token, false));
|
||||||
|
auto intersection = _range.intersection(std::move(candidate), dht::token_comparator());
|
||||||
|
_start_token = _partitioner.token_for_next_shard(end_token, _shard);
|
||||||
|
_start_boundary = range_bound<dht::token>(_start_token);
|
||||||
|
if (intersection) {
|
||||||
|
return *intersection;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
_done = true;
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
stdx::optional<ring_position_range_and_shard>
|
stdx::optional<ring_position_range_and_shard>
|
||||||
ring_position_range_sharder::next(const schema& s) {
|
ring_position_range_sharder::next(const schema& s) {
|
||||||
if (_done) {
|
if (_done) {
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
auto shard = _range.start() ? shard_of(_range.start()->value().token()) : global_partitioner().shard_of_minimum_token();
|
auto shard = _range.start() ? _partitioner.shard_of(_range.start()->value().token()) : _partitioner.shard_of_minimum_token();
|
||||||
auto shard_boundary_token = _partitioner.token_for_next_shard(_range.start() ? _range.start()->value().token() : minimum_token());
|
auto next_shard = shard + 1 < _partitioner.shard_count() ? shard + 1 : 0;
|
||||||
|
auto shard_boundary_token = _partitioner.token_for_next_shard(_range.start() ? _range.start()->value().token() : minimum_token(), next_shard);
|
||||||
auto shard_boundary = ring_position::starting_at(shard_boundary_token);
|
auto shard_boundary = ring_position::starting_at(shard_boundary_token);
|
||||||
if ((!_range.end() || shard_boundary.less_compare(s, _range.end()->value()))
|
if ((!_range.end() || shard_boundary.less_compare(s, _range.end()->value()))
|
||||||
&& shard_boundary_token != maximum_token()) {
|
&& shard_boundary_token != maximum_token()) {
|
||||||
@@ -273,6 +299,96 @@ ring_position_range_sharder::next(const schema& s) {
|
|||||||
return ring_position_range_and_shard{std::move(_range), shard};
|
return ring_position_range_and_shard{std::move(_range), shard};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
ring_position_exponential_sharder::ring_position_exponential_sharder(const i_partitioner& partitioner, partition_range pr)
|
||||||
|
: _partitioner(partitioner)
|
||||||
|
, _range(std::move(pr))
|
||||||
|
, _last_ends(_partitioner.shard_count()) {
|
||||||
|
if (_range.start()) {
|
||||||
|
_first_shard = _next_shard = _partitioner.shard_of(_range.start()->value().token());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ring_position_exponential_sharder::ring_position_exponential_sharder(partition_range pr)
|
||||||
|
: ring_position_exponential_sharder(global_partitioner(), std::move(pr)) {
|
||||||
|
}
|
||||||
|
|
||||||
|
stdx::optional<ring_position_exponential_sharder_result>
|
||||||
|
ring_position_exponential_sharder::next(const schema& s) {
|
||||||
|
auto ret = ring_position_exponential_sharder_result{};
|
||||||
|
ret.per_shard_ranges.reserve(std::min(_spans_per_iteration, _partitioner.shard_count()));
|
||||||
|
ret.inorder = _spans_per_iteration <= _partitioner.shard_count();
|
||||||
|
unsigned spans_to_go = _spans_per_iteration;
|
||||||
|
auto cmp = ring_position_comparator(s);
|
||||||
|
auto spans_per_shard = _spans_per_iteration / _partitioner.shard_count();
|
||||||
|
auto shards_with_extra_span = _spans_per_iteration % _partitioner.shard_count();
|
||||||
|
auto first_shard = _next_shard;
|
||||||
|
_next_shard = (_next_shard + _spans_per_iteration) % _partitioner.shard_count();
|
||||||
|
for (auto i : boost::irange(0u, std::min(_partitioner.shard_count(), _spans_per_iteration))) {
|
||||||
|
auto shard = (first_shard + i) % _partitioner.shard_count();
|
||||||
|
if (_last_ends[shard] && *_last_ends[shard] == maximum_token()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
range_bound<ring_position> this_shard_start = [&] {
|
||||||
|
if (_last_ends[shard]) {
|
||||||
|
return range_bound<ring_position>(ring_position::starting_at(*_last_ends[shard]));
|
||||||
|
} else {
|
||||||
|
return _range.start().value_or(range_bound<ring_position>(ring_position::starting_at(minimum_token())));
|
||||||
|
}
|
||||||
|
}();
|
||||||
|
// token_for_next_span() may give us the wrong boundary on the first pass, so add an extra span:
|
||||||
|
auto extra_span = !_last_ends[shard] && shard != _first_shard;
|
||||||
|
auto spans = spans_per_shard + unsigned(i < shards_with_extra_span);
|
||||||
|
auto boundary = _partitioner.token_for_next_shard(this_shard_start.value().token(), shard, spans + extra_span);
|
||||||
|
auto proposed_range = partition_range(this_shard_start, range_bound<ring_position>(ring_position::starting_at(boundary), false));
|
||||||
|
auto intersection = _range.intersection(proposed_range, cmp);
|
||||||
|
if (!intersection) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
spans_to_go -= spans;
|
||||||
|
auto this_shard_result = ring_position_range_and_shard{std::move(*intersection), shard};
|
||||||
|
_last_ends[shard] = boundary;
|
||||||
|
ret.per_shard_ranges.push_back(std::move(this_shard_result));
|
||||||
|
}
|
||||||
|
if (ret.per_shard_ranges.empty()) {
|
||||||
|
return stdx::nullopt;
|
||||||
|
}
|
||||||
|
_spans_per_iteration *= 2;
|
||||||
|
return stdx::make_optional(std::move(ret));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
ring_position_exponential_vector_sharder::ring_position_exponential_vector_sharder(const std::vector<nonwrapping_range<ring_position>>& ranges)
|
||||||
|
: _ranges(std::begin(ranges), std::end(ranges)) {
|
||||||
|
if (!_ranges.empty()) {
|
||||||
|
_current_sharder.emplace(_ranges.front());
|
||||||
|
_ranges.pop_front();
|
||||||
|
++_element;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
stdx::optional<ring_position_exponential_vector_sharder_result>
|
||||||
|
ring_position_exponential_vector_sharder::next(const schema& s) {
|
||||||
|
if (!_current_sharder) {
|
||||||
|
return stdx::nullopt;
|
||||||
|
}
|
||||||
|
while (true) { // yuch
|
||||||
|
auto ret = _current_sharder->next(s);
|
||||||
|
if (ret) {
|
||||||
|
auto augmented = ring_position_exponential_vector_sharder_result{std::move(*ret), _element};
|
||||||
|
return stdx::make_optional(std::move(augmented));
|
||||||
|
}
|
||||||
|
if (_ranges.empty()) {
|
||||||
|
_current_sharder = stdx::nullopt;
|
||||||
|
return stdx::nullopt;
|
||||||
|
}
|
||||||
|
_current_sharder.emplace(_ranges.front());
|
||||||
|
_ranges.pop_front();
|
||||||
|
++_element;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
ring_position_range_vector_sharder::ring_position_range_vector_sharder(dht::partition_range_vector ranges)
|
ring_position_range_vector_sharder::ring_position_range_vector_sharder(dht::partition_range_vector ranges)
|
||||||
: _ranges(std::move(ranges))
|
: _ranges(std::move(ranges))
|
||||||
, _current_range(_ranges.begin()) {
|
, _current_range(_ranges.begin()) {
|
||||||
@@ -300,6 +416,33 @@ int ring_position_comparator::operator()(const ring_position& lh, const ring_pos
|
|||||||
return lh.tri_compare(s, rh);
|
return lh.tri_compare(s, rh);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::vector<partition_range>
|
||||||
|
split_range_to_single_shard(const i_partitioner& partitioner, const schema& s, const partition_range& pr, shard_id shard) {
|
||||||
|
auto cmp = ring_position_comparator(s);
|
||||||
|
auto ret = std::vector<partition_range>();
|
||||||
|
auto next_shard = shard + 1 == partitioner.shard_count() ? 0 : shard + 1;
|
||||||
|
auto start_token = pr.start() ? pr.start()->value().token() : minimum_token();
|
||||||
|
auto start_shard = partitioner.shard_of(start_token);
|
||||||
|
auto start_boundary = start_shard == shard ? pr.start() : range_bound<ring_position>(ring_position::starting_at(partitioner.token_for_next_shard(start_token, shard)));
|
||||||
|
while (pr.overlaps(partition_range(start_boundary, {}), cmp)
|
||||||
|
&& !(start_boundary && start_boundary->value().token() == maximum_token())) {
|
||||||
|
auto end_token = partitioner.token_for_next_shard(start_token, next_shard);
|
||||||
|
auto candidate = partition_range(std::move(start_boundary), range_bound<ring_position>(ring_position::starting_at(end_token), false));
|
||||||
|
auto intersection = pr.intersection(std::move(candidate), cmp);
|
||||||
|
if (intersection) {
|
||||||
|
ret.push_back(std::move(*intersection));
|
||||||
|
}
|
||||||
|
start_token = partitioner.token_for_next_shard(end_token, shard);
|
||||||
|
start_boundary = range_bound<ring_position>(ring_position::starting_at(start_token));
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<partition_range>
|
||||||
|
split_range_to_single_shard(const schema& s, const partition_range& pr, shard_id shard) {
|
||||||
|
return split_range_to_single_shard(global_partitioner(), s, pr, shard);
|
||||||
|
}
|
||||||
|
|
||||||
int token_comparator::operator()(const token& t1, const token& t2) const {
|
int token_comparator::operator()(const token& t1, const token& t2) const {
|
||||||
return tri_compare(t1, t2);
|
return tri_compare(t1, t2);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -180,7 +180,10 @@ public:
|
|||||||
using decorated_key_opt = std::experimental::optional<decorated_key>;
|
using decorated_key_opt = std::experimental::optional<decorated_key>;
|
||||||
|
|
||||||
class i_partitioner {
|
class i_partitioner {
|
||||||
|
protected:
|
||||||
|
unsigned _shard_count;
|
||||||
public:
|
public:
|
||||||
|
explicit i_partitioner(unsigned shard_count) : _shard_count(shard_count) {}
|
||||||
virtual ~i_partitioner() {}
|
virtual ~i_partitioner() {}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -272,7 +275,7 @@ public:
|
|||||||
/**
|
/**
|
||||||
* @return name of partitioner.
|
* @return name of partitioner.
|
||||||
*/
|
*/
|
||||||
virtual const sstring name() = 0;
|
virtual const sstring name() const = 0;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Calculates the shard that handles a particular token.
|
* Calculates the shard that handles a particular token.
|
||||||
@@ -280,9 +283,17 @@ public:
|
|||||||
virtual unsigned shard_of(const token& t) const = 0;
|
virtual unsigned shard_of(const token& t) const = 0;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets the first token greater than `t` that is not in the same shard as `t`.
|
* Gets the first token greater than `t` that is in shard `shard`, and is a shard boundary (its first token).
|
||||||
|
*
|
||||||
|
* If the `spans` parameter is greater than zero, the result is the same as if the function
|
||||||
|
* is called `spans` times, each time applied to its return value, but efficiently. This allows
|
||||||
|
* selecting ranges that include multiple round trips around the 0..smp::count-1 shard span:
|
||||||
|
*
|
||||||
|
* token_for_next_shard(t, shard, spans) == token_for_next_shard(token_for_shard(t, shard, 1), spans - 1)
|
||||||
|
*
|
||||||
|
* On overflow, maximum_token() is returned.
|
||||||
*/
|
*/
|
||||||
virtual token token_for_next_shard(const token& t) const = 0;
|
virtual token token_for_next_shard(const token& t, shard_id shard, unsigned spans = 1) const = 0;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets the first shard of the minimum token.
|
* Gets the first shard of the minimum token.
|
||||||
@@ -315,6 +326,13 @@ public:
|
|||||||
return tri_compare(t1, t2) < 0;
|
return tri_compare(t1, t2) < 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return number of shards configured for this partitioner
|
||||||
|
*/
|
||||||
|
unsigned shard_count() const {
|
||||||
|
return _shard_count;
|
||||||
|
}
|
||||||
|
|
||||||
friend bool operator==(const token& t1, const token& t2);
|
friend bool operator==(const token& t1, const token& t2);
|
||||||
friend bool operator<(const token& t1, const token& t2);
|
friend bool operator<(const token& t1, const token& t2);
|
||||||
friend int tri_compare(const token& t1, const token& t2);
|
friend int tri_compare(const token& t1, const token& t2);
|
||||||
@@ -476,6 +494,44 @@ struct ring_position_range_and_shard_and_element : ring_position_range_and_shard
|
|||||||
unsigned element;
|
unsigned element;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct ring_position_exponential_sharder_result {
|
||||||
|
std::vector<ring_position_range_and_shard> per_shard_ranges;
|
||||||
|
bool inorder = true;
|
||||||
|
};
|
||||||
|
|
||||||
|
// given a ring_position range, generates exponentially increasing
|
||||||
|
// sets per-shard sub-ranges
|
||||||
|
class ring_position_exponential_sharder {
|
||||||
|
const i_partitioner& _partitioner;
|
||||||
|
partition_range _range;
|
||||||
|
unsigned _spans_per_iteration = 1;
|
||||||
|
unsigned _first_shard = 0;
|
||||||
|
unsigned _next_shard = 0;
|
||||||
|
std::vector<stdx::optional<token>> _last_ends; // index = shard
|
||||||
|
public:
|
||||||
|
explicit ring_position_exponential_sharder(partition_range pr);
|
||||||
|
explicit ring_position_exponential_sharder(const i_partitioner& partitioner, partition_range pr);
|
||||||
|
stdx::optional<ring_position_exponential_sharder_result> next(const schema& s);
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ring_position_exponential_vector_sharder_result : ring_position_exponential_sharder_result {
|
||||||
|
ring_position_exponential_vector_sharder_result(ring_position_exponential_sharder_result rpesr, unsigned element)
|
||||||
|
: ring_position_exponential_sharder_result(std::move(rpesr)), element(element) {}
|
||||||
|
unsigned element; // range within vector from which this result came
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
// given a vector of sorted, disjoint ring_position ranges, generates exponentially increasing
|
||||||
|
// sets per-shard sub-ranges. May be non-exponential when moving from one ring position range to another.
|
||||||
|
class ring_position_exponential_vector_sharder {
|
||||||
|
std::deque<nonwrapping_range<ring_position>> _ranges;
|
||||||
|
stdx::optional<ring_position_exponential_sharder> _current_sharder;
|
||||||
|
unsigned _element = 0;
|
||||||
|
public:
|
||||||
|
explicit ring_position_exponential_vector_sharder(const std::vector<nonwrapping_range<ring_position>>& ranges);
|
||||||
|
stdx::optional<ring_position_exponential_vector_sharder_result> next(const schema& s);
|
||||||
|
};
|
||||||
|
|
||||||
class ring_position_range_vector_sharder {
|
class ring_position_range_vector_sharder {
|
||||||
using vec_type = dht::partition_range_vector;
|
using vec_type = dht::partition_range_vector;
|
||||||
vec_type _ranges;
|
vec_type _ranges;
|
||||||
@@ -504,6 +560,33 @@ split_range_to_shards(dht::partition_range pr, const schema& s);
|
|||||||
std::map<unsigned, dht::partition_range_vector>
|
std::map<unsigned, dht::partition_range_vector>
|
||||||
split_ranges_to_shards(const dht::token_range_vector& ranges, const schema& s);
|
split_ranges_to_shards(const dht::token_range_vector& ranges, const schema& s);
|
||||||
|
|
||||||
|
// Intersect a partition_range with a shard and return the the resulting sub-ranges, in sorted order
|
||||||
|
std::vector<partition_range> split_range_to_single_shard(const schema& s, const dht::partition_range& pr, shard_id shard);
|
||||||
|
std::vector<partition_range> split_range_to_single_shard(const i_partitioner& partitioner, const schema& s, const dht::partition_range& pr, shard_id shard);
|
||||||
|
|
||||||
|
class selective_token_range_sharder {
|
||||||
|
const i_partitioner& _partitioner;
|
||||||
|
dht::token_range _range;
|
||||||
|
shard_id _shard;
|
||||||
|
bool _done = false;
|
||||||
|
shard_id _next_shard;
|
||||||
|
dht::token _start_token;
|
||||||
|
stdx::optional<range_bound<dht::token>> _start_boundary;
|
||||||
|
public:
|
||||||
|
explicit selective_token_range_sharder(dht::token_range range, shard_id shard)
|
||||||
|
: selective_token_range_sharder(global_partitioner(), std::move(range), shard) {}
|
||||||
|
selective_token_range_sharder(const i_partitioner& partitioner, dht::token_range range, shard_id shard)
|
||||||
|
: _partitioner(partitioner)
|
||||||
|
, _range(std::move(range))
|
||||||
|
, _shard(shard)
|
||||||
|
, _next_shard(_shard + 1 == _partitioner.shard_count() ? 0 : _shard + 1)
|
||||||
|
, _start_token(_range.start() ? _range.start()->value() : minimum_token())
|
||||||
|
, _start_boundary(_partitioner.shard_of(_start_token) == shard ?
|
||||||
|
_range.start() : range_bound<dht::token>(_partitioner.token_for_next_shard(_start_token, shard))) {
|
||||||
|
}
|
||||||
|
stdx::optional<dht::token_range> next();
|
||||||
|
};
|
||||||
|
|
||||||
} // dht
|
} // dht
|
||||||
|
|
||||||
namespace std {
|
namespace std {
|
||||||
|
|||||||
@@ -24,9 +24,40 @@
|
|||||||
#include "sstables/key.hh"
|
#include "sstables/key.hh"
|
||||||
#include "utils/class_registrator.hh"
|
#include "utils/class_registrator.hh"
|
||||||
#include <boost/lexical_cast.hpp>
|
#include <boost/lexical_cast.hpp>
|
||||||
|
#include <boost/range/irange.hpp>
|
||||||
|
|
||||||
namespace dht {
|
namespace dht {
|
||||||
|
|
||||||
|
inline
|
||||||
|
unsigned
|
||||||
|
murmur3_partitioner::zero_based_shard_of(uint64_t token, unsigned shards, unsigned sharding_ignore_msb_bits) {
|
||||||
|
// This is the master function, the inverses have to match it wrt. rounding errors.
|
||||||
|
token <<= sharding_ignore_msb_bits;
|
||||||
|
// Treat "token" as a fraction in the interval [0, 1); compute:
|
||||||
|
// shard = floor((0.token) * shards)
|
||||||
|
return (uint128_t(token) * shards) >> 64;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<uint64_t>
|
||||||
|
murmur3_partitioner::init_zero_based_shard_start(unsigned shards, unsigned sharding_ignore_msb_bits) {
|
||||||
|
// computes the inverse of zero_based_shard_of(). ret[s] will return the smallest token that belongs to s
|
||||||
|
if (shards == 1) {
|
||||||
|
// Avoid the while loops below getting confused finding the "edge" between two nonexistent shards
|
||||||
|
return std::vector<uint64_t>(1, uint64_t(0));
|
||||||
|
}
|
||||||
|
auto ret = std::vector<uint64_t>(shards);
|
||||||
|
for (auto s : boost::irange<unsigned>(0, shards)) {
|
||||||
|
uint64_t token = (uint128_t(s) << 64) / shards;
|
||||||
|
token >>= sharding_ignore_msb_bits; // leftmost bits are ignored by zero_based_shard_of
|
||||||
|
// token is the start of the next shard, and can be slightly before due to rounding errors; adjust
|
||||||
|
while (zero_based_shard_of(token, shards, sharding_ignore_msb_bits) != s) {
|
||||||
|
++token;
|
||||||
|
}
|
||||||
|
ret[s] = token;
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
inline
|
inline
|
||||||
int64_t
|
int64_t
|
||||||
murmur3_partitioner::normalize(int64_t in) {
|
murmur3_partitioner::normalize(int64_t in) {
|
||||||
@@ -88,6 +119,16 @@ inline int64_t long_token(const token& t) {
|
|||||||
return net::ntoh(*lp);
|
return net::ntoh(*lp);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
uint64_t
|
||||||
|
murmur3_partitioner::unbias(const token& t) const {
|
||||||
|
return uint64_t(long_token(t)) + uint64_t(std::numeric_limits<int64_t>::min());
|
||||||
|
}
|
||||||
|
|
||||||
|
token
|
||||||
|
murmur3_partitioner::bias(uint64_t n) const {
|
||||||
|
return get_token(n - uint64_t(std::numeric_limits<int64_t>::min()));
|
||||||
|
}
|
||||||
|
|
||||||
sstring murmur3_partitioner::to_sstring(const token& t) const {
|
sstring murmur3_partitioner::to_sstring(const token& t) const {
|
||||||
return ::to_sstring(long_token(t));
|
return ::to_sstring(long_token(t));
|
||||||
}
|
}
|
||||||
@@ -210,46 +251,43 @@ murmur3_partitioner::shard_of(const token& t) const {
|
|||||||
case token::kind::after_all_keys:
|
case token::kind::after_all_keys:
|
||||||
return _shard_count - 1;
|
return _shard_count - 1;
|
||||||
case token::kind::key:
|
case token::kind::key:
|
||||||
int64_t l = long_token(t);
|
uint64_t adjusted = unbias(t);
|
||||||
// treat l as a fraction between 0 and 1 and use 128-bit arithmetic to
|
return zero_based_shard_of(adjusted, _shard_count, _sharding_ignore_msb_bits);
|
||||||
// divide that range evenly among shards:
|
|
||||||
uint64_t adjusted = uint64_t(l) + uint64_t(std::numeric_limits<int64_t>::min());
|
|
||||||
adjusted <<= _sharding_ignore_msb_bits;
|
|
||||||
return (__int128(adjusted) * _shard_count) >> 64;
|
|
||||||
}
|
}
|
||||||
assert(0);
|
assert(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
token
|
token
|
||||||
murmur3_partitioner::token_for_next_shard(const token& t) const {
|
murmur3_partitioner::token_for_next_shard(const token& t, shard_id shard, unsigned spans) const {
|
||||||
|
uint64_t n = 0;
|
||||||
switch (t._kind) {
|
switch (t._kind) {
|
||||||
case token::kind::before_all_keys:
|
case token::kind::before_all_keys:
|
||||||
return token_for_next_shard(get_token(std::numeric_limits<int64_t>::min() + 1));
|
break;
|
||||||
case token::kind::after_all_keys:
|
case token::kind::after_all_keys:
|
||||||
return maximum_token();
|
return maximum_token();
|
||||||
case token::kind::key:
|
case token::kind::key:
|
||||||
if (long_token(t) == std::numeric_limits<int64_t>::min()) {
|
n = unbias(t);
|
||||||
return token_for_next_shard(get_token(std::numeric_limits<int64_t>::min() + 1));
|
break;
|
||||||
}
|
|
||||||
using uint128 = unsigned __int128;
|
|
||||||
auto s = shard_of(t) + 1;
|
|
||||||
s = s < _shard_count ? s : 0;
|
|
||||||
int64_t l = long_token(t);
|
|
||||||
// treat l as a fraction between 0 and 1 and use 128-bit arithmetic to
|
|
||||||
// divide that range evenly among shards:
|
|
||||||
uint64_t adjusted = uint64_t(l) + uint64_t(std::numeric_limits<int64_t>::min());
|
|
||||||
auto mul = align_up(uint128(adjusted) * _shard_count + 1, uint128(1) << (64 - _sharding_ignore_msb_bits));
|
|
||||||
if (mul >> 64 == _shard_count) {
|
|
||||||
return maximum_token();
|
|
||||||
}
|
|
||||||
uint64_t e = mul / _shard_count;
|
|
||||||
while (((uint128(e << _sharding_ignore_msb_bits) * _shard_count) >> 64) != s) {
|
|
||||||
// division will round down, so correct for it
|
|
||||||
++e;
|
|
||||||
}
|
|
||||||
return get_token(e + uint64_t(std::numeric_limits<int64_t>::min()));
|
|
||||||
}
|
}
|
||||||
assert(0);
|
auto s = zero_based_shard_of(n, _shard_count, _sharding_ignore_msb_bits);
|
||||||
|
|
||||||
|
if (!_sharding_ignore_msb_bits) {
|
||||||
|
// This ought to be the same as the else branch, but avoids shifts by 64
|
||||||
|
n = _shard_start[shard];
|
||||||
|
if (spans > 1 || shard <= s) {
|
||||||
|
return maximum_token();
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
auto left_part = n >> (64 - _sharding_ignore_msb_bits);
|
||||||
|
left_part += spans - unsigned(shard > s);
|
||||||
|
if (left_part >= (1u << _sharding_ignore_msb_bits)) {
|
||||||
|
return maximum_token();
|
||||||
|
}
|
||||||
|
left_part <<= (64 - _sharding_ignore_msb_bits);
|
||||||
|
auto right_part = _shard_start[shard];
|
||||||
|
n = left_part | right_part;
|
||||||
|
}
|
||||||
|
return bias(n);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -23,20 +23,21 @@
|
|||||||
|
|
||||||
#include "i_partitioner.hh"
|
#include "i_partitioner.hh"
|
||||||
#include "bytes.hh"
|
#include "bytes.hh"
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
namespace dht {
|
namespace dht {
|
||||||
|
|
||||||
class murmur3_partitioner final : public i_partitioner {
|
class murmur3_partitioner final : public i_partitioner {
|
||||||
unsigned _shard_count;
|
|
||||||
unsigned _sharding_ignore_msb_bits;
|
unsigned _sharding_ignore_msb_bits;
|
||||||
|
std::vector<uint64_t> _shard_start = init_zero_based_shard_start(_shard_count, _sharding_ignore_msb_bits);
|
||||||
public:
|
public:
|
||||||
murmur3_partitioner(unsigned shard_count = smp::count, unsigned sharding_ignore_msb_bits = 0)
|
murmur3_partitioner(unsigned shard_count = smp::count, unsigned sharding_ignore_msb_bits = 0)
|
||||||
: _shard_count(shard_count)
|
: i_partitioner(shard_count)
|
||||||
// if one shard, ignore sharding_ignore_msb_bits as they will just cause needless
|
// if one shard, ignore sharding_ignore_msb_bits as they will just cause needless
|
||||||
// range breaks
|
// range breaks
|
||||||
, _sharding_ignore_msb_bits(shard_count > 1 ? sharding_ignore_msb_bits : 0) {
|
, _sharding_ignore_msb_bits(shard_count > 1 ? sharding_ignore_msb_bits : 0) {
|
||||||
}
|
}
|
||||||
virtual const sstring name() { return "org.apache.cassandra.dht.Murmur3Partitioner"; }
|
virtual const sstring name() const { return "org.apache.cassandra.dht.Murmur3Partitioner"; }
|
||||||
virtual token get_token(const schema& s, partition_key_view key) override;
|
virtual token get_token(const schema& s, partition_key_view key) override;
|
||||||
virtual token get_token(const sstables::key_view& key) override;
|
virtual token get_token(const sstables::key_view& key) override;
|
||||||
virtual token get_random_token() override;
|
virtual token get_random_token() override;
|
||||||
@@ -50,11 +51,16 @@ public:
|
|||||||
virtual dht::token from_bytes(bytes_view bytes) const override;
|
virtual dht::token from_bytes(bytes_view bytes) const override;
|
||||||
|
|
||||||
virtual unsigned shard_of(const token& t) const override;
|
virtual unsigned shard_of(const token& t) const override;
|
||||||
virtual token token_for_next_shard(const token& t) const override;
|
virtual token token_for_next_shard(const token& t, shard_id shard, unsigned spans) const override;
|
||||||
private:
|
private:
|
||||||
|
using uint128_t = unsigned __int128;
|
||||||
static int64_t normalize(int64_t in);
|
static int64_t normalize(int64_t in);
|
||||||
token get_token(bytes_view key);
|
token get_token(bytes_view key);
|
||||||
token get_token(uint64_t value) const;
|
token get_token(uint64_t value) const;
|
||||||
|
token bias(uint64_t value) const; // translate from a zero-baed range
|
||||||
|
uint64_t unbias(const token& t) const; // translate to a zero-baed range
|
||||||
|
static unsigned zero_based_shard_of(uint64_t zero_based_token, unsigned shards, unsigned sharding_ignore_msb_bits);
|
||||||
|
static std::vector<uint64_t> init_zero_based_shard_start(unsigned shards, unsigned sharding_ignore_msb_bits);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -22,6 +22,7 @@
|
|||||||
#include "md5_hasher.hh"
|
#include "md5_hasher.hh"
|
||||||
#include "random_partitioner.hh"
|
#include "random_partitioner.hh"
|
||||||
#include "utils/class_registrator.hh"
|
#include "utils/class_registrator.hh"
|
||||||
|
#include "utils/div_ceil.hh"
|
||||||
#include <boost/multiprecision/cpp_int.hpp>
|
#include <boost/multiprecision/cpp_int.hpp>
|
||||||
|
|
||||||
namespace dht {
|
namespace dht {
|
||||||
@@ -222,21 +223,20 @@ unsigned random_partitioner::shard_of(const token& t) const {
|
|||||||
}
|
}
|
||||||
|
|
||||||
token
|
token
|
||||||
random_partitioner::token_for_next_shard(const token& t) const {
|
random_partitioner::token_for_next_shard(const token& t, shard_id shard, unsigned spans) const {
|
||||||
|
if (_shard_count == 1) {
|
||||||
|
return maximum_token();
|
||||||
|
}
|
||||||
switch (t._kind) {
|
switch (t._kind) {
|
||||||
case token::kind::after_all_keys:
|
case token::kind::after_all_keys:
|
||||||
return maximum_token();
|
return maximum_token();
|
||||||
case token::kind::before_all_keys:
|
case token::kind::before_all_keys:
|
||||||
case token::kind::key:
|
case token::kind::key:
|
||||||
auto s = shard_of(t) + 1;
|
auto orig = shard_of(t);
|
||||||
if (s == _shard_count) {
|
if (shard <= orig || spans != 1) {
|
||||||
return maximum_token();
|
return maximum_token();
|
||||||
}
|
}
|
||||||
auto t = (boost::multiprecision::uint256_t(s) << 127) / _shard_count;
|
auto t = div_ceil(boost::multiprecision::uint256_t(shard) << 127, _shard_count);
|
||||||
// division truncates, so adjust
|
|
||||||
while (((t * _shard_count) >> 127) != s) {
|
|
||||||
++t;
|
|
||||||
}
|
|
||||||
return cppint_to_token(t.convert_to<boost::multiprecision::uint128_t>());
|
return cppint_to_token(t.convert_to<boost::multiprecision::uint128_t>());
|
||||||
}
|
}
|
||||||
assert(0);
|
assert(0);
|
||||||
|
|||||||
@@ -29,10 +29,9 @@
|
|||||||
namespace dht {
|
namespace dht {
|
||||||
|
|
||||||
class random_partitioner final : public i_partitioner {
|
class random_partitioner final : public i_partitioner {
|
||||||
unsigned _shard_count;
|
|
||||||
public:
|
public:
|
||||||
random_partitioner(unsigned shard_count = smp::count, unsigned ignore_msb = 0) : _shard_count(shard_count) {}
|
random_partitioner(unsigned shard_count = smp::count, unsigned ignore_msb = 0) : i_partitioner(shard_count) {}
|
||||||
virtual const sstring name() { return "org.apache.cassandra.dht.RandomPartitioner"; }
|
virtual const sstring name() const { return "org.apache.cassandra.dht.RandomPartitioner"; }
|
||||||
virtual token get_token(const schema& s, partition_key_view key) override;
|
virtual token get_token(const schema& s, partition_key_view key) override;
|
||||||
virtual token get_token(const sstables::key_view& key) override;
|
virtual token get_token(const sstables::key_view& key) override;
|
||||||
virtual token get_random_token() override;
|
virtual token get_random_token() override;
|
||||||
@@ -46,7 +45,7 @@ public:
|
|||||||
virtual dht::token from_sstring(const sstring& t) const override;
|
virtual dht::token from_sstring(const sstring& t) const override;
|
||||||
virtual dht::token from_bytes(bytes_view bytes) const override;
|
virtual dht::token from_bytes(bytes_view bytes) const override;
|
||||||
virtual unsigned shard_of(const token& t) const override;
|
virtual unsigned shard_of(const token& t) const override;
|
||||||
virtual token token_for_next_shard(const token& t) const override;
|
virtual token token_for_next_shard(const token& t, shard_id shard, unsigned spans) const override;
|
||||||
private:
|
private:
|
||||||
token get_token(bytes data);
|
token get_token(bytes data);
|
||||||
};
|
};
|
||||||
|
|||||||
2
dist/ami/files/scylla-ami
vendored
2
dist/ami/files/scylla-ami
vendored
Submodule dist/ami/files/scylla-ami updated: d5a439759d...407e8f37ca
1
dist/common/modprobe.d/scylla-raid0.conf
vendored
Normal file
1
dist/common/modprobe.d/scylla-raid0.conf
vendored
Normal file
@@ -0,0 +1 @@
|
|||||||
|
options raid0 devices_discard_performance=Y
|
||||||
4
dist/common/scripts/node_exporter_install
vendored
4
dist/common/scripts/node_exporter_install
vendored
@@ -27,11 +27,11 @@ if [ -f /usr/bin/node_exporter ]; then
|
|||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
version=0.12.0
|
version=0.14.0
|
||||||
dir=/usr/lib/scylla/Prometheus/node_exporter
|
dir=/usr/lib/scylla/Prometheus/node_exporter
|
||||||
mkdir -p $dir
|
mkdir -p $dir
|
||||||
cd $dir
|
cd $dir
|
||||||
curl -L https://github.com/prometheus/node_exporter/releases/download/$version/node_exporter-$version.linux-amd64.tar.gz -o $dir/node_exporter-$version.linux-amd64.tar.gz
|
curl -L https://github.com/prometheus/node_exporter/releases/download/v$version/node_exporter-$version.linux-amd64.tar.gz -o $dir/node_exporter-$version.linux-amd64.tar.gz
|
||||||
tar -xvzf $dir/node_exporter-$version.linux-amd64.tar.gz
|
tar -xvzf $dir/node_exporter-$version.linux-amd64.tar.gz
|
||||||
rm $dir/node_exporter-$version.linux-amd64.tar.gz
|
rm $dir/node_exporter-$version.linux-amd64.tar.gz
|
||||||
ln -s $dir/node_exporter-$version.linux-amd64/node_exporter /usr/bin
|
ln -s $dir/node_exporter-$version.linux-amd64/node_exporter /usr/bin
|
||||||
|
|||||||
74
dist/common/scripts/scylla_raid_setup
vendored
74
dist/common/scripts/scylla_raid_setup
vendored
@@ -5,15 +5,20 @@
|
|||||||
. /usr/lib/scylla/scylla_lib.sh
|
. /usr/lib/scylla/scylla_lib.sh
|
||||||
|
|
||||||
print_usage() {
|
print_usage() {
|
||||||
echo "scylla-raid-setup --disks /dev/hda,/dev/hdb... --raiddev /dev/md0 --update-fstab"
|
echo "scylla-raid-setup --disks /dev/hda,/dev/hdb... --raiddev /dev/md0 --update-fstab --root /var/lib/scylla --volume-role [all|data|commitlog]"
|
||||||
echo " --disks specify disks for RAID"
|
echo " --disks specify disks for RAID"
|
||||||
echo " --raiddev MD device name for RAID"
|
echo " --raiddev MD device name for RAID"
|
||||||
echo " --update-fstab update /etc/fstab for RAID"
|
echo " --update-fstab update /etc/fstab for RAID"
|
||||||
|
echo " --root specify the root of the tree"
|
||||||
|
echo " --volume-role specify how will this device be used (data, commitlog, or all)"
|
||||||
exit 1
|
exit 1
|
||||||
}
|
}
|
||||||
|
|
||||||
RAID=/dev/md0
|
RAID=/dev/md0
|
||||||
FSTAB=0
|
FSTAB=0
|
||||||
|
ROOT=/var/lib/scylla
|
||||||
|
ROLE="all"
|
||||||
|
|
||||||
while [ $# -gt 0 ]; do
|
while [ $# -gt 0 ]; do
|
||||||
case "$1" in
|
case "$1" in
|
||||||
"--disks")
|
"--disks")
|
||||||
@@ -29,12 +34,37 @@ while [ $# -gt 0 ]; do
|
|||||||
FSTAB=1
|
FSTAB=1
|
||||||
shift 1
|
shift 1
|
||||||
;;
|
;;
|
||||||
|
"--root")
|
||||||
|
ROOT="$2"
|
||||||
|
shift 2
|
||||||
|
;;
|
||||||
|
"--volume-role")
|
||||||
|
ROLE="$2"
|
||||||
|
shift 2
|
||||||
|
;;
|
||||||
*)
|
*)
|
||||||
print_usage
|
print_usage
|
||||||
;;
|
;;
|
||||||
esac
|
esac
|
||||||
done
|
done
|
||||||
|
|
||||||
|
ROOT=${ROOT%/}
|
||||||
|
case "$ROLE" in
|
||||||
|
"all")
|
||||||
|
MOUNT_AT=$ROOT
|
||||||
|
;;
|
||||||
|
"data")
|
||||||
|
MOUNT_AT="$ROOT/data"
|
||||||
|
;;
|
||||||
|
"commitlog")
|
||||||
|
MOUNT_AT="$ROOT/commitlog"
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
echo "Invalid role specified ($ROLE)"
|
||||||
|
print_usage
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
|
||||||
if [ "$DISKS" = "" ]; then
|
if [ "$DISKS" = "" ]; then
|
||||||
print_usage
|
print_usage
|
||||||
fi
|
fi
|
||||||
@@ -51,8 +81,8 @@ if [ -e $RAID ]; then
|
|||||||
echo "$RAID is already using"
|
echo "$RAID is already using"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
if [ "`mount|grep /var/lib/scylla`" != "" ]; then
|
if mountpoint -q $MOUNT_AT; then
|
||||||
echo "/var/lib/scylla is already mounted"
|
echo "$MOUNT_AT is already mounted"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
@@ -61,18 +91,32 @@ if is_debian_variant; then
|
|||||||
else
|
else
|
||||||
yum -y install mdadm xfsprogs
|
yum -y install mdadm xfsprogs
|
||||||
fi
|
fi
|
||||||
mdadm --create --verbose --force --run $RAID --level=0 -c1024 --raid-devices=$NR_DISK $DISKS
|
if [ "$ID" = "ubuntu" ] && [ "$VERSION_ID" = "14.04" ]; then
|
||||||
mkfs.xfs $RAID -f
|
mdadm --create --verbose --force --run $RAID --level=0 -c1024 --raid-devices=$NR_DISK $DISKS
|
||||||
echo "DEVICE $DISKS" > /etc/mdadm.conf
|
mkfs.xfs $RAID -f
|
||||||
mdadm --detail --scan >> /etc/mdadm.conf
|
else
|
||||||
|
for dsk in $DISKS; do
|
||||||
|
blkdiscard $dsk &
|
||||||
|
done
|
||||||
|
wait
|
||||||
|
mdadm --create --verbose --force --run $RAID --level=0 -c1024 --raid-devices=$NR_DISK $DISKS
|
||||||
|
mkfs.xfs $RAID -f -K
|
||||||
|
fi
|
||||||
|
mdadm --detail --scan > /etc/mdadm.conf
|
||||||
|
|
||||||
|
mkdir -p "$MOUNT_AT"
|
||||||
|
mount -t xfs -o noatime $RAID "$MOUNT_AT"
|
||||||
|
|
||||||
|
# create this unconditionally so we are more robust about ordering
|
||||||
|
# if the script is run multiple times. But must do after mount in case
|
||||||
|
# we are mounting the root
|
||||||
|
mkdir -p "$ROOT/data"
|
||||||
|
mkdir -p "$ROOT/commitlog"
|
||||||
|
mkdir -p "$ROOT/coredump"
|
||||||
|
chown scylla:scylla "$ROOT"
|
||||||
|
chown scylla:scylla "$ROOT"/*
|
||||||
|
|
||||||
if [ $FSTAB -ne 0 ]; then
|
if [ $FSTAB -ne 0 ]; then
|
||||||
UUID=`blkid $RAID | awk '{print $2}'`
|
UUID=`blkid $RAID | awk '{print $2}'`
|
||||||
echo "$UUID /var/lib/scylla xfs noatime 0 0" >> /etc/fstab
|
echo "$UUID $MOUNT_AT xfs noatime 0 0" >> /etc/fstab
|
||||||
fi
|
fi
|
||||||
mount -t xfs -o noatime $RAID /var/lib/scylla
|
|
||||||
|
|
||||||
mkdir -p /var/lib/scylla/data
|
|
||||||
mkdir -p /var/lib/scylla/commitlog
|
|
||||||
mkdir -p /var/lib/scylla/coredump
|
|
||||||
chown scylla:scylla /var/lib/scylla/*
|
|
||||||
chown scylla:scylla /var/lib/scylla/
|
|
||||||
|
|||||||
16
dist/common/scripts/scylla_setup
vendored
16
dist/common/scripts/scylla_setup
vendored
@@ -81,7 +81,7 @@ verify_package() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
list_block_devices() {
|
list_block_devices() {
|
||||||
if lsblk --help | grep -q -e -p; then
|
if lsblk --help | grep -q -e '^\s*-p'; then
|
||||||
lsblk -pnr | awk '{ print $1 }'
|
lsblk -pnr | awk '{ print $1 }'
|
||||||
else
|
else
|
||||||
ls -1 /dev/sd* /dev/hd* /dev/xvd* /dev/nvme* /dev/mapper/* 2>/dev/null|grep -v control
|
ls -1 /dev/sd* /dev/hd* /dev/xvd* /dev/nvme* /dev/mapper/* 2>/dev/null|grep -v control
|
||||||
@@ -218,6 +218,9 @@ while [ $# -gt 0 ]; do
|
|||||||
print_usage
|
print_usage
|
||||||
shift 1
|
shift 1
|
||||||
;;
|
;;
|
||||||
|
*)
|
||||||
|
echo "Invalid option: $@"
|
||||||
|
print_usage
|
||||||
esac
|
esac
|
||||||
done
|
done
|
||||||
|
|
||||||
@@ -267,21 +270,24 @@ if [ $ENABLE_SERVICE -eq 1 ]; then
|
|||||||
printf "[housekeeping]\ncheck-version: True\n" > /etc/scylla.d/housekeeping.cfg
|
printf "[housekeeping]\ncheck-version: True\n" > /etc/scylla.d/housekeeping.cfg
|
||||||
fi
|
fi
|
||||||
if is_systemd; then
|
if is_systemd; then
|
||||||
systemctl unmask scylla-housekeeping.timer
|
systemctl unmask scylla-housekeeping-daily.timer
|
||||||
|
systemctl unmask scylla-housekeeping-restart.timer
|
||||||
fi
|
fi
|
||||||
else
|
else
|
||||||
if [ ! -f /etc/scylla.d/housekeeping.cfg ]; then
|
if [ ! -f /etc/scylla.d/housekeeping.cfg ]; then
|
||||||
printf "[housekeeping]\ncheck-version: False\n" > /etc/scylla.d/housekeeping.cfg
|
printf "[housekeeping]\ncheck-version: False\n" > /etc/scylla.d/housekeeping.cfg
|
||||||
fi
|
fi
|
||||||
if is_systemd; then
|
if is_systemd; then
|
||||||
systemctl mask scylla-housekeeping.timer
|
systemctl mask scylla-housekeeping-daily.timer
|
||||||
systemctl stop scylla-housekeeping.timer || true
|
systemctl mask scylla-housekeeping-restart.timer
|
||||||
|
systemctl stop scylla-housekeeping-daily.timer || true
|
||||||
|
systemctl stop scylla-housekeeping-restart.timer || true
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
CUR_VERSION=`scylla --version` || true
|
CUR_VERSION=`scylla --version` || true
|
||||||
if [ "$CUR_VERSION" != "" ] && [ "$UUID" != "" ]; then
|
if [ "$CUR_VERSION" != "" ]; then
|
||||||
NEW_VERSION=`sudo -u scylla /usr/lib/scylla/scylla-housekeeping --uuid-file /var/lib/scylla-housekeeping/housekeeping.uuid version --version $CUR_VERSION --mode i` || true
|
NEW_VERSION=`sudo -u scylla /usr/lib/scylla/scylla-housekeeping --uuid-file /var/lib/scylla-housekeeping/housekeeping.uuid version --version $CUR_VERSION --mode i` || true
|
||||||
if [ "$NEW_VERSION" != "" ]; then
|
if [ "$NEW_VERSION" != "" ]; then
|
||||||
echo $NEW_VERSION
|
echo $NEW_VERSION
|
||||||
|
|||||||
2
dist/common/sysctl.d/99-scylla-sched.conf
vendored
2
dist/common/sysctl.d/99-scylla-sched.conf
vendored
@@ -5,7 +5,7 @@ kernel.sched_tunable_scaling = 0
|
|||||||
kernel.sched_min_granularity_ns = 500000
|
kernel.sched_min_granularity_ns = 500000
|
||||||
|
|
||||||
# Don't delay unrelated workloads
|
# Don't delay unrelated workloads
|
||||||
kernel.sched_wakeup_granularity_ns = 500000
|
kernel.sched_wakeup_granularity_ns = 450000
|
||||||
|
|
||||||
# Schedule all tasks in this period
|
# Schedule all tasks in this period
|
||||||
kernel.sched_latency_ns = 1000000
|
kernel.sched_latency_ns = 1000000
|
||||||
|
|||||||
@@ -1,12 +1,12 @@
|
|||||||
[Unit]
|
[Unit]
|
||||||
Description=Scylla Housekeeping
|
Description=Scylla Housekeeping daily mode
|
||||||
After=network.target
|
After=network.target
|
||||||
|
|
||||||
[Service]
|
[Service]
|
||||||
Type=simple
|
Type=simple
|
||||||
User=scylla
|
User=scylla
|
||||||
Group=scylla
|
Group=scylla
|
||||||
ExecStart=/usr/lib/scylla/scylla-housekeeping --uuid-file /var/lib/scylla-housekeeping/housekeeping.uuid -q -c /etc/scylla.d/housekeeping.cfg version --mode d
|
ExecStart=/usr/lib/scylla/scylla-housekeeping --uuid-file /var/lib/scylla-housekeeping/housekeeping.uuid -q -c /etc/scylla.d/housekeeping.cfg --repo-files @@REPOFILES@@ version --mode d
|
||||||
|
|
||||||
[Install]
|
[Install]
|
||||||
WantedBy=multi-user.target
|
WantedBy=multi-user.target
|
||||||
11
dist/common/systemd/scylla-housekeeping-daily.timer
vendored
Normal file
11
dist/common/systemd/scylla-housekeeping-daily.timer
vendored
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
[Unit]
|
||||||
|
Description=Run Scylla Housekeeping daily mode
|
||||||
|
After=scylla-server.service
|
||||||
|
BindsTo=scylla-server.service
|
||||||
|
|
||||||
|
[Timer]
|
||||||
|
OnActiveSec=1d
|
||||||
|
OnUnitActiveSec=1d
|
||||||
|
|
||||||
|
[Install]
|
||||||
|
WantedBy=timers.target
|
||||||
12
dist/common/systemd/scylla-housekeeping-restart.service.in
vendored
Normal file
12
dist/common/systemd/scylla-housekeeping-restart.service.in
vendored
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
[Unit]
|
||||||
|
Description=Scylla Housekeeping restart mode
|
||||||
|
After=network.target
|
||||||
|
|
||||||
|
[Service]
|
||||||
|
Type=simple
|
||||||
|
User=scylla
|
||||||
|
Group=scylla
|
||||||
|
ExecStart=/usr/lib/scylla/scylla-housekeeping --uuid-file /var/lib/scylla-housekeeping/housekeeping.uuid -q -c /etc/scylla.d/housekeeping.cfg --repo-files @@REPOFILES@@ version --mode r
|
||||||
|
|
||||||
|
[Install]
|
||||||
|
WantedBy=multi-user.target
|
||||||
@@ -1,12 +1,11 @@
|
|||||||
[Unit]
|
[Unit]
|
||||||
Description=Run Scylla Housekeeping daily
|
Description=Run Scylla Housekeeping restart mode
|
||||||
After=scylla-server.service
|
After=scylla-server.service
|
||||||
BindsTo=scylla-server.service
|
BindsTo=scylla-server.service
|
||||||
|
|
||||||
[Timer]
|
[Timer]
|
||||||
# set OnActiveSec to 3 to safely avoid issues/1846
|
# set OnActiveSec to 3 to safely avoid issues/1846
|
||||||
OnActiveSec=3
|
OnActiveSec=3
|
||||||
OnUnitActiveSec=1d
|
|
||||||
|
|
||||||
[Install]
|
[Install]
|
||||||
WantedBy=timers.target
|
WantedBy=timers.target
|
||||||
6
dist/common/systemd/scylla-server.service.in
vendored
6
dist/common/systemd/scylla-server.service.in
vendored
@@ -2,7 +2,8 @@
|
|||||||
Description=Scylla Server
|
Description=Scylla Server
|
||||||
After=network.target
|
After=network.target
|
||||||
Wants=scylla-jmx.service
|
Wants=scylla-jmx.service
|
||||||
Wants=scylla-housekeeping.timer
|
Wants=scylla-housekeeping-restart.timer
|
||||||
|
Wants=scylla-housekeeping-daily.timer
|
||||||
|
|
||||||
[Service]
|
[Service]
|
||||||
PermissionsStartOnly=true
|
PermissionsStartOnly=true
|
||||||
@@ -21,6 +22,9 @@ KillMode=process
|
|||||||
Restart=on-abnormal
|
Restart=on-abnormal
|
||||||
User=scylla
|
User=scylla
|
||||||
OOMScoreAdjust=-950
|
OOMScoreAdjust=-950
|
||||||
|
StandardOutput=syslog
|
||||||
|
StandardError=syslog
|
||||||
|
SyslogLevelPrefix=false
|
||||||
|
|
||||||
[Install]
|
[Install]
|
||||||
WantedBy=multi-user.target
|
WantedBy=multi-user.target
|
||||||
|
|||||||
27
dist/debian/build_deb.sh
vendored
27
dist/debian/build_deb.sh
vendored
@@ -7,6 +7,14 @@ print_usage() {
|
|||||||
echo " --rebuild-dep rebuild dependency packages"
|
echo " --rebuild-dep rebuild dependency packages"
|
||||||
exit 1
|
exit 1
|
||||||
}
|
}
|
||||||
|
install_deps() {
|
||||||
|
echo Y | sudo mk-build-deps
|
||||||
|
DEB_FILE=`ls *-build-deps*.deb`
|
||||||
|
sudo gdebi -n $DEB_FILE
|
||||||
|
sudo rm -f $DEB_FILE
|
||||||
|
sudo dpkg -P ${DEB_FILE%%_*.deb}
|
||||||
|
}
|
||||||
|
|
||||||
REBUILD=0
|
REBUILD=0
|
||||||
DIST=0
|
DIST=0
|
||||||
while [ $# -gt 0 ]; do
|
while [ $# -gt 0 ]; do
|
||||||
@@ -54,6 +62,9 @@ fi
|
|||||||
if [ ! -f /usr/bin/lsb_release ]; then
|
if [ ! -f /usr/bin/lsb_release ]; then
|
||||||
sudo apt-get -y install lsb-release
|
sudo apt-get -y install lsb-release
|
||||||
fi
|
fi
|
||||||
|
if [ ! -f /usr/bin/gdebi ]; then
|
||||||
|
sudo apt-get -y install gdebi-core
|
||||||
|
fi
|
||||||
|
|
||||||
DISTRIBUTION=`lsb_release -i|awk '{print $3}'`
|
DISTRIBUTION=`lsb_release -i|awk '{print $3}'`
|
||||||
CODENAME=`lsb_release -c|awk '{print $2}'`
|
CODENAME=`lsb_release -c|awk '{print $2}'`
|
||||||
@@ -84,7 +95,8 @@ if [ "$DISTRIBUTION" = "Debian" ]; then
|
|||||||
sed -i -e "s/@@COMPILER@@/g++-5/g" debian/rules
|
sed -i -e "s/@@COMPILER@@/g++-5/g" debian/rules
|
||||||
sed -i -e "s/@@BUILD_DEPENDS@@/libsystemd-dev, g++-5, libunwind-dev/g" debian/control
|
sed -i -e "s/@@BUILD_DEPENDS@@/libsystemd-dev, g++-5, libunwind-dev/g" debian/control
|
||||||
sed -i -e "s#@@INSTALL@@##g" debian/scylla-server.install
|
sed -i -e "s#@@INSTALL@@##g" debian/scylla-server.install
|
||||||
sed -i -e "s#@@HKDOTTIMER@@#dist/common/systemd/scylla-housekeeping.timer /lib/systemd/system#g" debian/scylla-server.install
|
sed -i -e "s#@@HKDOTTIMER_D@@#dist/common/systemd/scylla-housekeeping-daily.timer /lib/systemd/system#g" debian/scylla-server.install
|
||||||
|
sed -i -e "s#@@HKDOTTIMER_R@@#dist/common/systemd/scylla-housekeeping-restart.timer /lib/systemd/system#g" debian/scylla-server.install
|
||||||
sed -i -e "s#@@SYSCTL@@#dist/debian/sysctl.d/99-scylla.conf etc/sysctl.d#g" debian/scylla-server.install
|
sed -i -e "s#@@SYSCTL@@#dist/debian/sysctl.d/99-scylla.conf etc/sysctl.d#g" debian/scylla-server.install
|
||||||
elif [ "$VERSION_ID" = "14.04" ]; then
|
elif [ "$VERSION_ID" = "14.04" ]; then
|
||||||
sed -i -e "s/@@REVISION@@/0ubuntu1/g" debian/changelog
|
sed -i -e "s/@@REVISION@@/0ubuntu1/g" debian/changelog
|
||||||
@@ -92,7 +104,8 @@ elif [ "$VERSION_ID" = "14.04" ]; then
|
|||||||
sed -i -e "s/@@COMPILER@@/g++-5/g" debian/rules
|
sed -i -e "s/@@COMPILER@@/g++-5/g" debian/rules
|
||||||
sed -i -e "s/@@BUILD_DEPENDS@@/g++-5, libunwind8-dev/g" debian/control
|
sed -i -e "s/@@BUILD_DEPENDS@@/g++-5, libunwind8-dev/g" debian/control
|
||||||
sed -i -e "s#@@INSTALL@@#dist/debian/sudoers.d/scylla etc/sudoers.d#g" debian/scylla-server.install
|
sed -i -e "s#@@INSTALL@@#dist/debian/sudoers.d/scylla etc/sudoers.d#g" debian/scylla-server.install
|
||||||
sed -i -e "s#@@HKDOTTIMER@@##g" debian/scylla-server.install
|
sed -i -e "s#@@HKDOTTIMER_D@@##g" debian/scylla-server.install
|
||||||
|
sed -i -e "s#@@HKDOTTIMER_R@@##g" debian/scylla-server.install
|
||||||
sed -i -e "s#@@SYSCTL@@#dist/debian/sysctl.d/99-scylla.conf etc/sysctl.d#g" debian/scylla-server.install
|
sed -i -e "s#@@SYSCTL@@#dist/debian/sysctl.d/99-scylla.conf etc/sysctl.d#g" debian/scylla-server.install
|
||||||
else
|
else
|
||||||
sed -i -e "s/@@REVISION@@/0ubuntu1/g" debian/changelog
|
sed -i -e "s/@@REVISION@@/0ubuntu1/g" debian/changelog
|
||||||
@@ -100,7 +113,8 @@ else
|
|||||||
sed -i -e "s/@@COMPILER@@/g++/g" debian/rules
|
sed -i -e "s/@@COMPILER@@/g++/g" debian/rules
|
||||||
sed -i -e "s/@@BUILD_DEPENDS@@/libsystemd-dev, g++, libunwind-dev/g" debian/control
|
sed -i -e "s/@@BUILD_DEPENDS@@/libsystemd-dev, g++, libunwind-dev/g" debian/control
|
||||||
sed -i -e "s#@@INSTALL@@##g" debian/scylla-server.install
|
sed -i -e "s#@@INSTALL@@##g" debian/scylla-server.install
|
||||||
sed -i -e "s#@@HKDOTTIMER@@#dist/common/systemd/scylla-housekeeping.timer /lib/systemd/system#g" debian/scylla-server.install
|
sed -i -e "s#@@HKDOTTIMER_D@@#dist/common/systemd/scylla-housekeeping-daily.timer /lib/systemd/system#g" debian/scylla-server.install
|
||||||
|
sed -i -e "s#@@HKDOTTIMER_R@@#dist/common/systemd/scylla-housekeeping-restart.timer /lib/systemd/system#g" debian/scylla-server.install
|
||||||
sed -i -e "s#@@SYSCTL@@##g" debian/scylla-server.install
|
sed -i -e "s#@@SYSCTL@@##g" debian/scylla-server.install
|
||||||
fi
|
fi
|
||||||
if [ $DIST -gt 0 ]; then
|
if [ $DIST -gt 0 ]; then
|
||||||
@@ -116,7 +130,10 @@ fi
|
|||||||
|
|
||||||
cp dist/common/systemd/scylla-server.service.in debian/scylla-server.service
|
cp dist/common/systemd/scylla-server.service.in debian/scylla-server.service
|
||||||
sed -i -e "s#@@SYSCONFDIR@@#/etc/default#g" debian/scylla-server.service
|
sed -i -e "s#@@SYSCONFDIR@@#/etc/default#g" debian/scylla-server.service
|
||||||
cp dist/common/systemd/scylla-housekeeping.service debian/scylla-server.scylla-housekeeping.service
|
cp dist/common/systemd/scylla-housekeeping-daily.service.in debian/scylla-server.scylla-housekeeping-daily.service
|
||||||
|
sed -i -e "s#@@REPOFILES@@#'/etc/apt/sources.list.d/scylla*.list'#g" debian/scylla-server.scylla-housekeeping-daily.service
|
||||||
|
cp dist/common/systemd/scylla-housekeeping-restart.service.in debian/scylla-server.scylla-housekeeping-restart.service
|
||||||
|
sed -i -e "s#@@REPOFILES@@#'/etc/apt/sources.list.d/scylla*.list'#g" debian/scylla-server.scylla-housekeeping-restart.service
|
||||||
cp dist/common/systemd/node-exporter.service debian/scylla-server.node-exporter.service
|
cp dist/common/systemd/node-exporter.service debian/scylla-server.node-exporter.service
|
||||||
|
|
||||||
if [ "$VERSION_ID" = "14.04" ] && [ $REBUILD -eq 0 ]; then
|
if [ "$VERSION_ID" = "14.04" ] && [ $REBUILD -eq 0 ]; then
|
||||||
@@ -140,5 +157,5 @@ else
|
|||||||
sudo apt-get install g++
|
sudo apt-get install g++
|
||||||
fi
|
fi
|
||||||
|
|
||||||
echo Y | sudo mk-build-deps -i -r
|
install_deps
|
||||||
debuild -r fakeroot -us -uc
|
debuild -r fakeroot -us -uc
|
||||||
|
|||||||
2
dist/debian/control.in
vendored
2
dist/debian/control.in
vendored
@@ -4,7 +4,7 @@ Homepage: http://scylladb.com
|
|||||||
Section: database
|
Section: database
|
||||||
Priority: optional
|
Priority: optional
|
||||||
Standards-Version: 3.9.5
|
Standards-Version: 3.9.5
|
||||||
Build-Depends: debhelper (>= 9), libyaml-cpp-dev, liblz4-dev, libsnappy-dev, libcrypto++-dev, libjsoncpp-dev, libaio-dev, libthrift-dev, thrift-compiler, antlr3, antlr3-c++-dev, ragel, ninja-build, git, libboost-program-options1.55-dev | libboost-program-options-dev, libboost-filesystem1.55-dev | libboost-filesystem-dev, libboost-system1.55-dev | libboost-system-dev, libboost-thread1.55-dev | libboost-thread-dev, libboost-test1.55-dev | libboost-test-dev, libgnutls28-dev, libhwloc-dev, libnuma-dev, libpciaccess-dev, xfslibs-dev, python3-pyparsing, libxml2-dev, libsctp-dev, python-urwid, pciutils, libprotobuf-dev, protobuf-compiler, systemtap-sdt-dev, @@BUILD_DEPENDS@@
|
Build-Depends: debhelper (>= 9), libyaml-cpp-dev, liblz4-dev, libsnappy-dev, libcrypto++-dev, libjsoncpp-dev, libaio-dev, libthrift-dev, thrift-compiler, antlr3, antlr3-c++-dev, ragel, ninja-build, git, libboost-program-options1.55-dev | libboost-program-options-dev, libboost-filesystem1.55-dev | libboost-filesystem-dev, libboost-system1.55-dev | libboost-system-dev, libboost-thread1.55-dev | libboost-thread-dev, libboost-test1.55-dev | libboost-test-dev, libgnutls28-dev, libhwloc-dev, libnuma-dev, libpciaccess-dev, xfslibs-dev, python3-pyparsing, libxml2-dev, libsctp-dev, python-urwid, pciutils, libprotobuf-dev, protobuf-compiler, systemtap-sdt-dev, libtool, automake, @@BUILD_DEPENDS@@
|
||||||
|
|
||||||
Package: scylla-conf
|
Package: scylla-conf
|
||||||
Architecture: any
|
Architecture: any
|
||||||
|
|||||||
11
dist/debian/debian/scylla-kernel-conf.postinst
vendored
11
dist/debian/debian/scylla-kernel-conf.postinst
vendored
@@ -1,7 +1,14 @@
|
|||||||
#!/bin/sh
|
#!/bin/bash
|
||||||
|
|
||||||
set -e
|
set -e
|
||||||
|
|
||||||
sysctl -p/etc/sysctl.d/99-scylla-sched.conf
|
KVER=$(uname -r)
|
||||||
|
|
||||||
|
if [[ $KVER =~ 3\.13\.0\-([0-9]+)-generic ]]; then
|
||||||
|
echo "kernel $KVER detected, skip running sysctl..."
|
||||||
|
else
|
||||||
|
# expect failures in virtualized environments
|
||||||
|
sysctl -p/etc/sysctl.d/99-scylla-sched.conf || :
|
||||||
|
fi
|
||||||
|
|
||||||
#DEBHELPER#
|
#DEBHELPER#
|
||||||
|
|||||||
@@ -29,10 +29,10 @@ setgid scylla
|
|||||||
script
|
script
|
||||||
# make sure scylla is up before checking for the version
|
# make sure scylla is up before checking for the version
|
||||||
sleep 5
|
sleep 5
|
||||||
/usr/lib/scylla/scylla-housekeeping --uuid-file /var/lib/scylla-housekeeping/housekeeping.uuid -c /etc/scylla.d/housekeeping.cfg -q version --mode r || true
|
/usr/lib/scylla/scylla-housekeeping --uuid-file /var/lib/scylla-housekeeping/housekeeping.uuid --repo-files '/etc/apt/sources.list.d/scylla*.list' -c /etc/scylla.d/housekeeping.cfg -q version --mode r || true
|
||||||
while [ 1 ]
|
while [ 1 ]
|
||||||
do
|
do
|
||||||
sleep 1d
|
sleep 1d
|
||||||
/usr/lib/scylla/scylla-housekeeping --uuid-file /var/lib/scylla-housekeeping/housekeeping.uuid -c /etc/scylla.d/housekeeping.cfg -q version --mode d || true
|
/usr/lib/scylla/scylla-housekeeping --uuid-file /var/lib/scylla-housekeeping/housekeeping.uuid --repo-files '/etc/apt/sources.list.d/scylla*.list' -c /etc/scylla.d/housekeeping.cfg -q version --mode d || true
|
||||||
done
|
done
|
||||||
end script
|
end script
|
||||||
|
|||||||
1
dist/debian/debian/scylla-server.upstart
vendored
1
dist/debian/debian/scylla-server.upstart
vendored
@@ -41,6 +41,7 @@ script
|
|||||||
fi
|
fi
|
||||||
. "$i"
|
. "$i"
|
||||||
done
|
done
|
||||||
|
export SCYLLA_CONF SCYLLA_HOME
|
||||||
exec /usr/bin/scylla $SCYLLA_ARGS $SEASTAR_IO $DEV_MODE $CPUSET
|
exec /usr/bin/scylla $SCYLLA_ARGS $SEASTAR_IO $DEV_MODE $CPUSET
|
||||||
end script
|
end script
|
||||||
|
|
||||||
|
|||||||
33
dist/debian/dep/build_dependency.sh
vendored
33
dist/debian/dep/build_dependency.sh
vendored
@@ -1,7 +1,25 @@
|
|||||||
#!/bin/bash -e
|
#!/bin/bash -e
|
||||||
|
|
||||||
. /etc/os-release
|
. /etc/os-release
|
||||||
|
install_deps() {
|
||||||
|
echo Y | sudo mk-build-deps
|
||||||
|
DEB_FILE=`ls *-build-deps*.deb`
|
||||||
|
sudo gdebi -n $DEB_FILE
|
||||||
|
sudo rm -f $DEB_FILE
|
||||||
|
sudo dpkg -P ${DEB_FILE%%_*.deb}
|
||||||
|
}
|
||||||
|
|
||||||
DISTRIBUTION=`lsb_release -i|awk '{print $3}'`
|
DISTRIBUTION=`lsb_release -i|awk '{print $3}'`
|
||||||
|
CODENAME=`lsb_release -c|awk '{print $2}'`
|
||||||
|
|
||||||
|
# workaround fix for #2444
|
||||||
|
if [ "$CODENAME" = "jessie" ]; then
|
||||||
|
if [ ! -e /etc/apt/sources.list.d/jessie-backports.list ]; then
|
||||||
|
sudo sh -c 'echo deb "http://httpredir.debian.org/debian jessie-backports main" > /etc/apt/sources.list.d/jessie-backports.list'
|
||||||
|
fi
|
||||||
|
sudo apt-get -y update
|
||||||
|
sudo apt-get install -t jessie-backports -y texlive
|
||||||
|
fi
|
||||||
|
|
||||||
sudo apt-get install -y gdebi-core
|
sudo apt-get install -y gdebi-core
|
||||||
if [ "$VERSION_ID" = "14.04" ] || [ "$DISTRIBUTION" = "Debian" ]; then
|
if [ "$VERSION_ID" = "14.04" ] || [ "$DISTRIBUTION" = "Debian" ]; then
|
||||||
@@ -11,7 +29,7 @@ if [ "$VERSION_ID" = "14.04" ] || [ "$DISTRIBUTION" = "Debian" ]; then
|
|||||||
cp -a dist/debian/dep/antlr3-3.5.2/* build/antlr3-3.5.2
|
cp -a dist/debian/dep/antlr3-3.5.2/* build/antlr3-3.5.2
|
||||||
cd build/antlr3-3.5.2
|
cd build/antlr3-3.5.2
|
||||||
wget -nv http://www.antlr3.org/download/antlr-3.5.2-complete-no-st3.jar
|
wget -nv http://www.antlr3.org/download/antlr-3.5.2-complete-no-st3.jar
|
||||||
echo Y | sudo mk-build-deps -i -r
|
install_deps
|
||||||
debuild -r fakeroot --no-tgz-check -us -uc
|
debuild -r fakeroot --no-tgz-check -us -uc
|
||||||
cd -
|
cd -
|
||||||
fi
|
fi
|
||||||
@@ -39,7 +57,7 @@ if [ "$VERSION_ID" = "14.04" ] || [ "$DISTRIBUTION" = "Debian" ]; then
|
|||||||
cd -
|
cd -
|
||||||
cd build/gdb-7.11
|
cd build/gdb-7.11
|
||||||
patch -p0 < ../../dist/debian/dep/gdb.diff
|
patch -p0 < ../../dist/debian/dep/gdb.diff
|
||||||
echo Y | sudo mk-build-deps -i -r
|
install_deps
|
||||||
debuild -r fakeroot --no-tgz-check -us -uc
|
debuild -r fakeroot --no-tgz-check -us -uc
|
||||||
cd -
|
cd -
|
||||||
fi
|
fi
|
||||||
@@ -56,7 +74,7 @@ if [ ! -f build/antlr3-c++-dev_*.deb ]; then
|
|||||||
cd -
|
cd -
|
||||||
cp -a dist/debian/dep/antlr3-c++-dev-3.5.2/debian build/antlr3-c++-dev-3.5.2
|
cp -a dist/debian/dep/antlr3-c++-dev-3.5.2/debian build/antlr3-c++-dev-3.5.2
|
||||||
cd build/antlr3-c++-dev-3.5.2
|
cd build/antlr3-c++-dev-3.5.2
|
||||||
echo Y | sudo mk-build-deps -i -r
|
install_deps
|
||||||
debuild -r fakeroot --no-tgz-check -us -uc
|
debuild -r fakeroot --no-tgz-check -us -uc
|
||||||
cd -
|
cd -
|
||||||
fi
|
fi
|
||||||
@@ -70,17 +88,18 @@ if [ ! -f build/libthrift0_*.deb ]; then
|
|||||||
tar xpf thrift-0.9.3.tar.gz
|
tar xpf thrift-0.9.3.tar.gz
|
||||||
cd thrift-0.9.3
|
cd thrift-0.9.3
|
||||||
patch -p0 < ../../dist/debian/dep/thrift.diff
|
patch -p0 < ../../dist/debian/dep/thrift.diff
|
||||||
echo Y | sudo mk-build-deps -i -r
|
install_deps
|
||||||
debuild -r fakeroot --no-tgz-check -us -uc
|
debuild -r fakeroot --no-tgz-check -us -uc
|
||||||
cd ../..
|
cd ../..
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ "$DISTRIBUTION" = "Debian" ] && [ "$VERSION_ID" = "8" ]; then
|
if [ "$DISTRIBUTION" = "Debian" ] && [ "$VERSION_ID" = "8" ]; then
|
||||||
if [ ! -f build/gcc-5_*.deb ]; then
|
if [ ! -f build/gcc-5_*.deb ]; then
|
||||||
sudo cp dist/debian/dep/debian-stretch-source.list /etc/apt/sources.list.d/
|
|
||||||
sudo apt-get update
|
|
||||||
cd build
|
cd build
|
||||||
apt-get source gcc-5/stretch=5.4.1-2
|
wget https://launchpad.net/debian/+archive/primary/+files/gcc-5_5.4.1-5.dsc
|
||||||
|
wget https://launchpad.net/debian/+archive/primary/+files/gcc-5_5.4.1.orig.tar.gz
|
||||||
|
wget https://launchpad.net/debian/+archive/primary/+files/gcc-5_5.4.1-5.diff.gz
|
||||||
|
dpkg-source -x gcc-5_5.4.1-5.dsc
|
||||||
cd gcc-5-5.4.1
|
cd gcc-5-5.4.1
|
||||||
# resolve build time dependencies manually, since mk-build-deps doesn't works for gcc package
|
# resolve build time dependencies manually, since mk-build-deps doesn't works for gcc package
|
||||||
sudo apt-get install -y g++-multilib libc6-dev-i386 lib32gcc1 libc6-dev-x32 libx32gcc1 libc6-dbg m4 libtool autoconf2.64 autogen gawk zlib1g-dev systemtap-sdt-dev gperf bison flex gdb texinfo locales sharutils libantlr-java libffi-dev gnat-4.9 libisl-dev libmpc-dev libmpfr-dev libgmp-dev dejagnu realpath chrpath quilt doxygen graphviz ghostscript texlive-latex-base xsltproc libxml2-utils docbook-xsl-ns
|
sudo apt-get install -y g++-multilib libc6-dev-i386 lib32gcc1 libc6-dev-x32 libx32gcc1 libc6-dbg m4 libtool autoconf2.64 autogen gawk zlib1g-dev systemtap-sdt-dev gperf bison flex gdb texinfo locales sharutils libantlr-java libffi-dev gnat-4.9 libisl-dev libmpc-dev libmpfr-dev libgmp-dev dejagnu realpath chrpath quilt doxygen graphviz ghostscript texlive-latex-base xsltproc libxml2-utils docbook-xsl-ns
|
||||||
|
|||||||
20
dist/debian/dep/debian-gcc-5-jessie.diff
vendored
20
dist/debian/dep/debian-gcc-5-jessie.diff
vendored
@@ -1,6 +1,5 @@
|
|||||||
diff -Nur debian/rules.conf /home/syuu/gcc-5-5.4.1/debian/rules.conf
|
--- debian/rules.conf 2017-02-24 19:02:52.000000000 +0000
|
||||||
--- debian/rules.conf 2016-10-14 04:54:21.000000000 +0000
|
+++ /home/syuu/gcc-5.5/gcc-5-5.4.1/debian/rules.conf 2017-02-24 18:13:59.000000000 +0000
|
||||||
+++ /home/syuu/gcc-5-5.4.1/debian/rules.conf 2016-10-12 17:28:54.138711378 +0000
|
|
||||||
@@ -206,7 +206,7 @@
|
@@ -206,7 +206,7 @@
|
||||||
ifneq (,$(filter $(distrelease),vivid))
|
ifneq (,$(filter $(distrelease),vivid))
|
||||||
BINUTILSBDV = 2.25-3~
|
BINUTILSBDV = 2.25-3~
|
||||||
@@ -10,14 +9,16 @@ diff -Nur debian/rules.conf /home/syuu/gcc-5-5.4.1/debian/rules.conf
|
|||||||
else ifneq (,$(filter $(distrelease),sid stretch xenial))
|
else ifneq (,$(filter $(distrelease),sid stretch xenial))
|
||||||
BINUTILSBDV = 2.26.1
|
BINUTILSBDV = 2.26.1
|
||||||
endif
|
endif
|
||||||
@@ -387,9 +387,9 @@
|
@@ -386,10 +386,10 @@
|
||||||
|
MPFR_BUILD_DEP = libmpfr-dev (>= 3.0.0-9~),
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ISL_BUILD_DEP = libisl-dev,
|
-ISL_BUILD_DEP = libisl-dev,
|
||||||
-ifneq (,$(filter $(distrelease),jessie sid experimental))
|
-ifneq (,$(filter $(distrelease),jessie stretch sid experimental))
|
||||||
- ISL_BUILD_DEP = libisl-dev (>= 0.14),
|
- ISL_BUILD_DEP = libisl-dev (>= 0.14),
|
||||||
-endif
|
-endif
|
||||||
+#ifneq (,$(filter $(distrelease),jessie sid experimental))
|
+#ISL_BUILD_DEP = libisl-dev,
|
||||||
|
+#ifneq (,$(filter $(distrelease),jessie stretch sid experimental))
|
||||||
+# ISL_BUILD_DEP = libisl-dev (>= 0.14),
|
+# ISL_BUILD_DEP = libisl-dev (>= 0.14),
|
||||||
+#endif
|
+#endif
|
||||||
|
|
||||||
@@ -37,9 +38,8 @@ diff -Nur debian/rules.conf /home/syuu/gcc-5-5.4.1/debian/rules.conf
|
|||||||
ifneq ($(DEB_CROSS),yes)
|
ifneq ($(DEB_CROSS),yes)
|
||||||
# all archs for which to create b-d's
|
# all archs for which to create b-d's
|
||||||
any_archs = alpha amd64 armel armhf arm64 i386 mips mipsel mips64 mips64el powerpc ppc64 ppc64el m68k sh4 sparc64 s390x x32
|
any_archs = alpha amd64 armel armhf arm64 i386 mips mipsel mips64 mips64el powerpc ppc64 ppc64el m68k sh4 sparc64 s390x x32
|
||||||
diff -Nur debian/rules.defs /home/syuu/gcc-5-5.4.1/debian/rules.defs
|
--- debian/rules.defs 2017-02-24 19:02:52.000000000 +0000
|
||||||
--- debian/rules.defs 2016-10-14 04:54:21.000000000 +0000
|
+++ /home/syuu/gcc-5.5/gcc-5-5.4.1/debian/rules.defs 2017-02-24 18:13:59.000000000 +0000
|
||||||
+++ /home/syuu/gcc-5-5.4.1/debian/rules.defs 2016-10-13 10:18:51.647631508 +0000
|
|
||||||
@@ -412,7 +412,7 @@
|
@@ -412,7 +412,7 @@
|
||||||
# gcc versions (fixincludes, libgcj-common) ...
|
# gcc versions (fixincludes, libgcj-common) ...
|
||||||
#with_common_pkgs := yes
|
#with_common_pkgs := yes
|
||||||
|
|||||||
2
dist/debian/dep/debian-stretch-source.list
vendored
2
dist/debian/dep/debian-stretch-source.list
vendored
@@ -1,2 +0,0 @@
|
|||||||
deb-src http://httpredir.debian.org/debian stretch main
|
|
||||||
deb-src http://httpredir.debian.org/debian stretch-updates main
|
|
||||||
3
dist/debian/rules.in
vendored
3
dist/debian/rules.in
vendored
@@ -11,7 +11,8 @@ override_dh_auto_clean:
|
|||||||
|
|
||||||
override_dh_installinit:
|
override_dh_installinit:
|
||||||
dh_installinit --no-start @@DH_INSTALLINIT@@
|
dh_installinit --no-start @@DH_INSTALLINIT@@
|
||||||
dh_installinit --no-start --name scylla-housekeeping @@DH_INSTALLINIT@@
|
dh_installinit --no-start --name scylla-housekeeping-daily @@DH_INSTALLINIT@@
|
||||||
|
dh_installinit --no-start --name scylla-housekeeping-restart @@DH_INSTALLINIT@@
|
||||||
dh_installinit --no-start --name node-exporter @@DH_INSTALLINIT@@
|
dh_installinit --no-start --name node-exporter @@DH_INSTALLINIT@@
|
||||||
|
|
||||||
override_dh_strip:
|
override_dh_strip:
|
||||||
|
|||||||
3
dist/debian/scylla-server.install.in
vendored
3
dist/debian/scylla-server.install.in
vendored
@@ -15,6 +15,7 @@ build/release/iotune usr/bin
|
|||||||
dist/common/bin/scyllatop usr/bin
|
dist/common/bin/scyllatop usr/bin
|
||||||
dist/common/sbin/* usr/sbin
|
dist/common/sbin/* usr/sbin
|
||||||
@@ADDHKCFG@@
|
@@ADDHKCFG@@
|
||||||
@@HKDOTTIMER@@
|
@@HKDOTTIMER_D@@
|
||||||
|
@@HKDOTTIMER_R@@
|
||||||
@@INSTALL@@
|
@@INSTALL@@
|
||||||
@@SYSCTL@@
|
@@SYSCTL@@
|
||||||
|
|||||||
4
dist/docker/redhat/Dockerfile
vendored
4
dist/docker/redhat/Dockerfile
vendored
@@ -7,7 +7,7 @@ ENV container docker
|
|||||||
VOLUME [ "/sys/fs/cgroup" ]
|
VOLUME [ "/sys/fs/cgroup" ]
|
||||||
|
|
||||||
#install scylla
|
#install scylla
|
||||||
RUN curl http://downloads.scylladb.com/rpm/unstable/centos/master/latest/scylla.repo -o /etc/yum.repos.d/scylla.repo
|
RUN curl http://downloads.scylladb.com/rpm/centos/scylla-1.7.repo -o /etc/yum.repos.d/scylla.repo
|
||||||
RUN yum -y install epel-release
|
RUN yum -y install epel-release
|
||||||
RUN yum -y clean expire-cache
|
RUN yum -y clean expire-cache
|
||||||
RUN yum -y update
|
RUN yum -y update
|
||||||
@@ -38,6 +38,6 @@ ADD commandlineparser.py /commandlineparser.py
|
|||||||
ADD docker-entrypoint.py /docker-entrypoint.py
|
ADD docker-entrypoint.py /docker-entrypoint.py
|
||||||
ENTRYPOINT ["/docker-entrypoint.py"]
|
ENTRYPOINT ["/docker-entrypoint.py"]
|
||||||
|
|
||||||
EXPOSE 10000 9042 9160 7000 7001
|
EXPOSE 10000 9042 9160 9180 7000 7001
|
||||||
VOLUME [ "/var/lib/scylla" ]
|
VOLUME [ "/var/lib/scylla" ]
|
||||||
RUN chown -R scylla.scylla /var/lib/scylla
|
RUN chown -R scylla.scylla /var/lib/scylla
|
||||||
|
|||||||
2
dist/redhat/build_rpm.sh
vendored
2
dist/redhat/build_rpm.sh
vendored
@@ -74,7 +74,7 @@ if [ "$ID" = "centos" ] || [ "$ID" = "rhel" ]; then
|
|||||||
./dist/redhat/centos_dep/build_dependency.sh
|
./dist/redhat/centos_dep/build_dependency.sh
|
||||||
else
|
else
|
||||||
if [ "$ID" = "centos" ]; then
|
if [ "$ID" = "centos" ]; then
|
||||||
sudo curl https://s3.amazonaws.com/downloads.scylladb.com/rpm/unstable/centos/master/latest/scylla.repo -o /etc/yum.repos.d/scylla.repo
|
sudo curl http://downloads.scylladb.com.s3.amazonaws.com/rpm/centos/scylla-1.7.repo -o /etc/yum.repos.d/scylla.repo
|
||||||
else
|
else
|
||||||
echo "RHEL requires --rebuild-deps option."
|
echo "RHEL requires --rebuild-deps option."
|
||||||
exit 1
|
exit 1
|
||||||
|
|||||||
11
dist/redhat/centos_dep/build_dependency.sh
vendored
11
dist/redhat/centos_dep/build_dependency.sh
vendored
@@ -28,10 +28,6 @@ if [ ! -f boost-1.58.0-11.fc23.src.rpm ]; then
|
|||||||
wget -nv https://kojipkgs.fedoraproject.org//packages/boost/1.58.0/11.fc23/src/boost-1.58.0-11.fc23.src.rpm
|
wget -nv https://kojipkgs.fedoraproject.org//packages/boost/1.58.0/11.fc23/src/boost-1.58.0-11.fc23.src.rpm
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ ! -f ninja-build-1.6.0-2.fc23.src.rpm ]; then
|
|
||||||
wget -nv https://kojipkgs.fedoraproject.org//packages/ninja-build/1.6.0/2.fc23/src/ninja-build-1.6.0-2.fc23.src.rpm
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ ! -f ragel-6.8-5.fc23.src.rpm ]; then
|
if [ ! -f ragel-6.8-5.fc23.src.rpm ]; then
|
||||||
wget -nv https://kojipkgs.fedoraproject.org//packages/ragel/6.8/5.fc23/src/ragel-6.8-5.fc23.src.rpm
|
wget -nv https://kojipkgs.fedoraproject.org//packages/ragel/6.8/5.fc23/src/ragel-6.8-5.fc23.src.rpm
|
||||||
fi
|
fi
|
||||||
@@ -94,13 +90,6 @@ if [ ! -f $RPMBUILD/RPMS/x86_64/scylla-boost-1.58.0-11.el7*.x86_64.rpm ]; then
|
|||||||
fi
|
fi
|
||||||
do_install scylla-boost*
|
do_install scylla-boost*
|
||||||
|
|
||||||
if [ ! -f $RPMBUILD/RPMS/x86_64/scylla-ninja-build-1.6.0-2.el7*.x86_64.rpm ]; then
|
|
||||||
rpm --define "_topdir $RPMBUILD" -ivh build/srpms/ninja-build-1.6.0-2.fc23.src.rpm
|
|
||||||
patch $RPMBUILD/SPECS/ninja-build.spec < dist/redhat/centos_dep/ninja-build.diff
|
|
||||||
rpmbuild --define "_topdir $RPMBUILD" -ba $RPMBUILD/SPECS/ninja-build.spec
|
|
||||||
fi
|
|
||||||
do_install scylla-ninja-build-1.6.0-2.el7*.x86_64.rpm
|
|
||||||
|
|
||||||
if [ ! -f $RPMBUILD/RPMS/x86_64/scylla-ragel-6.8-5.el7*.x86_64.rpm ]; then
|
if [ ! -f $RPMBUILD/RPMS/x86_64/scylla-ragel-6.8-5.el7*.x86_64.rpm ]; then
|
||||||
rpm --define "_topdir $RPMBUILD" -ivh build/srpms/ragel-6.8-5.fc23.src.rpm
|
rpm --define "_topdir $RPMBUILD" -ivh build/srpms/ragel-6.8-5.fc23.src.rpm
|
||||||
patch $RPMBUILD/SPECS/ragel.spec < dist/redhat/centos_dep/ragel.diff
|
patch $RPMBUILD/SPECS/ragel.spec < dist/redhat/centos_dep/ragel.diff
|
||||||
|
|||||||
56
dist/redhat/centos_dep/ninja-build.diff
vendored
56
dist/redhat/centos_dep/ninja-build.diff
vendored
@@ -1,56 +0,0 @@
|
|||||||
--- ninja-build.spec.orig 2016-01-20 14:41:16.892802134 +0000
|
|
||||||
+++ ninja-build.spec 2016-01-20 14:44:42.453227192 +0000
|
|
||||||
@@ -1,19 +1,18 @@
|
|
||||||
-Name: ninja-build
|
|
||||||
+Name: scylla-ninja-build
|
|
||||||
Version: 1.6.0
|
|
||||||
Release: 2%{?dist}
|
|
||||||
Summary: A small build system with a focus on speed
|
|
||||||
License: ASL 2.0
|
|
||||||
URL: http://martine.github.com/ninja/
|
|
||||||
Source0: https://github.com/martine/ninja/archive/v%{version}.tar.gz#/ninja-%{version}.tar.gz
|
|
||||||
-Source1: ninja.vim
|
|
||||||
# Rename mentions of the executable name to be ninja-build.
|
|
||||||
Patch1000: ninja-1.6.0-binary-rename.patch
|
|
||||||
+Requires: scylla-env
|
|
||||||
BuildRequires: asciidoc
|
|
||||||
BuildRequires: gtest-devel
|
|
||||||
BuildRequires: python2-devel
|
|
||||||
-BuildRequires: re2c >= 0.11.3
|
|
||||||
-Requires: emacs-filesystem
|
|
||||||
-Requires: vim-filesystem
|
|
||||||
+#BuildRequires: scylla-re2c >= 0.11.3
|
|
||||||
+%define _prefix /opt/scylladb
|
|
||||||
|
|
||||||
%description
|
|
||||||
Ninja is a small build system with a focus on speed. It differs from other
|
|
||||||
@@ -32,15 +31,8 @@
|
|
||||||
./ninja -v ninja_test
|
|
||||||
|
|
||||||
%install
|
|
||||||
-# TODO: Install ninja_syntax.py?
|
|
||||||
-mkdir -p %{buildroot}/{%{_bindir},%{_datadir}/bash-completion/completions,%{_datadir}/emacs/site-lisp,%{_datadir}/vim/vimfiles/syntax,%{_datadir}/vim/vimfiles/ftdetect,%{_datadir}/zsh/site-functions}
|
|
||||||
-
|
|
||||||
+mkdir -p %{buildroot}/opt/scylladb/bin
|
|
||||||
install -pm755 ninja %{buildroot}%{_bindir}/ninja-build
|
|
||||||
-install -pm644 misc/bash-completion %{buildroot}%{_datadir}/bash-completion/completions/ninja-bash-completion
|
|
||||||
-install -pm644 misc/ninja-mode.el %{buildroot}%{_datadir}/emacs/site-lisp/ninja-mode.el
|
|
||||||
-install -pm644 misc/ninja.vim %{buildroot}%{_datadir}/vim/vimfiles/syntax/ninja.vim
|
|
||||||
-install -pm644 %{SOURCE1} %{buildroot}%{_datadir}/vim/vimfiles/ftdetect/ninja.vim
|
|
||||||
-install -pm644 misc/zsh-completion %{buildroot}%{_datadir}/zsh/site-functions/_ninja
|
|
||||||
|
|
||||||
%check
|
|
||||||
# workaround possible too low default limits
|
|
||||||
@@ -50,12 +42,6 @@
|
|
||||||
%files
|
|
||||||
%doc COPYING HACKING.md README doc/manual.html
|
|
||||||
%{_bindir}/ninja-build
|
|
||||||
-%{_datadir}/bash-completion/completions/ninja-bash-completion
|
|
||||||
-%{_datadir}/emacs/site-lisp/ninja-mode.el
|
|
||||||
-%{_datadir}/vim/vimfiles/syntax/ninja.vim
|
|
||||||
-%{_datadir}/vim/vimfiles/ftdetect/ninja.vim
|
|
||||||
-# zsh does not have a -filesystem package
|
|
||||||
-%{_datadir}/zsh/
|
|
||||||
|
|
||||||
%changelog
|
|
||||||
* Mon Nov 16 2015 Ben Boeckel <mathstuf@gmail.com> - 1.6.0-2
|
|
||||||
34
dist/redhat/scylla.spec.in
vendored
34
dist/redhat/scylla.spec.in
vendored
@@ -7,7 +7,7 @@ Group: Applications/Databases
|
|||||||
License: AGPLv3
|
License: AGPLv3
|
||||||
URL: http://www.scylladb.com/
|
URL: http://www.scylladb.com/
|
||||||
Source0: %{name}-@@VERSION@@-@@RELEASE@@.tar
|
Source0: %{name}-@@VERSION@@-@@RELEASE@@.tar
|
||||||
Requires: scylla-server scylla-jmx scylla-tools scylla-kernel-conf
|
Requires: scylla-server = @@VERSION@@ scylla-jmx = @@VERSION@@ scylla-tools = @@VERSION@@ scylla-kernel-conf = @@VERSION@@
|
||||||
Obsoletes: scylla-server < 1.1
|
Obsoletes: scylla-server < 1.1
|
||||||
|
|
||||||
%description
|
%description
|
||||||
@@ -27,9 +27,9 @@ Group: Applications/Databases
|
|||||||
Summary: The Scylla database server
|
Summary: The Scylla database server
|
||||||
License: AGPLv3
|
License: AGPLv3
|
||||||
URL: http://www.scylladb.com/
|
URL: http://www.scylladb.com/
|
||||||
BuildRequires: libaio-devel libstdc++-devel cryptopp-devel hwloc-devel numactl-devel libpciaccess-devel libxml2-devel zlib-devel thrift-devel yaml-cpp-devel lz4-devel snappy-devel jsoncpp-devel systemd-devel xz-devel pcre-devel elfutils-libelf-devel bzip2-devel keyutils-libs-devel xfsprogs-devel make gnutls-devel systemd-devel lksctp-tools-devel protobuf-devel protobuf-compiler libunwind-devel systemtap-sdt-devel
|
BuildRequires: libaio-devel libstdc++-devel cryptopp-devel hwloc-devel numactl-devel libpciaccess-devel libxml2-devel zlib-devel thrift-devel yaml-cpp-devel lz4-devel snappy-devel jsoncpp-devel systemd-devel xz-devel pcre-devel elfutils-libelf-devel bzip2-devel keyutils-libs-devel xfsprogs-devel make gnutls-devel systemd-devel lksctp-tools-devel protobuf-devel protobuf-compiler libunwind-devel systemtap-sdt-devel libtool automake ninja-build
|
||||||
%{?fedora:BuildRequires: boost-devel ninja-build ragel antlr3-tool antlr3-C++-devel python3 gcc-c++ libasan libubsan python3-pyparsing dnf-yum}
|
%{?fedora:BuildRequires: boost-devel ragel antlr3-tool antlr3-C++-devel python3 gcc-c++ libasan libubsan python3-pyparsing dnf-yum}
|
||||||
%{?rhel:BuildRequires: scylla-libstdc++-static scylla-boost-devel scylla-boost-static scylla-ninja-build scylla-ragel scylla-antlr3-tool scylla-antlr3-C++-devel python34 scylla-gcc-c++ >= 5.1.1, python34-pyparsing}
|
%{?rhel:BuildRequires: scylla-libstdc++-static scylla-boost-devel scylla-boost-static scylla-ragel scylla-antlr3-tool scylla-antlr3-C++-devel python34 scylla-gcc-c++ >= 5.1.1, python34-pyparsing}
|
||||||
Requires: scylla-conf systemd-libs hwloc collectd PyYAML python-urwid pciutils pyparsing python-requests curl util-linux python-setuptools pciutils
|
Requires: scylla-conf systemd-libs hwloc collectd PyYAML python-urwid pciutils pyparsing python-requests curl util-linux python-setuptools pciutils
|
||||||
%{?rhel:Requires: python34 python34-PyYAML}
|
%{?rhel:Requires: python34 python34-PyYAML}
|
||||||
Conflicts: abrt
|
Conflicts: abrt
|
||||||
@@ -53,6 +53,10 @@ python3.4 ./configure.py --disable-xen --enable-dpdk --mode=release --static-std
|
|||||||
ninja-build %{?_smp_mflags} build/release/scylla build/release/iotune
|
ninja-build %{?_smp_mflags} build/release/scylla build/release/iotune
|
||||||
cp dist/common/systemd/scylla-server.service.in build/scylla-server.service
|
cp dist/common/systemd/scylla-server.service.in build/scylla-server.service
|
||||||
sed -i -e "s#@@SYSCONFDIR@@#/etc/sysconfig#g" build/scylla-server.service
|
sed -i -e "s#@@SYSCONFDIR@@#/etc/sysconfig#g" build/scylla-server.service
|
||||||
|
cp dist/common/systemd/scylla-housekeeping-restart.service.in build/scylla-housekeeping-restart.service
|
||||||
|
sed -i -e "s#@@REPOFILES@@#'/etc/yum.repos.d/scylla*.repo'#g" build/scylla-housekeeping-restart.service
|
||||||
|
cp dist/common/systemd/scylla-housekeeping-daily.service.in build/scylla-housekeeping-daily.service
|
||||||
|
sed -i -e "s#@@REPOFILES@@#'/etc/yum.repos.d/scylla*.repo'#g" build/scylla-housekeeping-daily.service
|
||||||
|
|
||||||
%install
|
%install
|
||||||
rm -rf $RPM_BUILD_ROOT
|
rm -rf $RPM_BUILD_ROOT
|
||||||
@@ -63,6 +67,9 @@ mkdir -p $RPM_BUILD_ROOT%{_sysconfdir}/security/limits.d/
|
|||||||
mkdir -p $RPM_BUILD_ROOT%{_sysconfdir}/collectd.d/
|
mkdir -p $RPM_BUILD_ROOT%{_sysconfdir}/collectd.d/
|
||||||
mkdir -p $RPM_BUILD_ROOT%{_sysconfdir}/scylla/
|
mkdir -p $RPM_BUILD_ROOT%{_sysconfdir}/scylla/
|
||||||
mkdir -p $RPM_BUILD_ROOT%{_sysconfdir}/scylla.d/
|
mkdir -p $RPM_BUILD_ROOT%{_sysconfdir}/scylla.d/
|
||||||
|
%if 0%{?rhel}
|
||||||
|
mkdir -p $RPM_BUILD_ROOT%{_sysconfdir}/modprobe.d/
|
||||||
|
%endif
|
||||||
mkdir -p $RPM_BUILD_ROOT%{_sysctldir}/
|
mkdir -p $RPM_BUILD_ROOT%{_sysctldir}/
|
||||||
mkdir -p $RPM_BUILD_ROOT%{_docdir}/scylla/
|
mkdir -p $RPM_BUILD_ROOT%{_docdir}/scylla/
|
||||||
mkdir -p $RPM_BUILD_ROOT%{_unitdir}
|
mkdir -p $RPM_BUILD_ROOT%{_unitdir}
|
||||||
@@ -73,6 +80,9 @@ install -m644 dist/common/limits.d/scylla.conf $RPM_BUILD_ROOT%{_sysconfdir}/sec
|
|||||||
install -m644 dist/common/collectd.d/scylla.conf $RPM_BUILD_ROOT%{_sysconfdir}/collectd.d/
|
install -m644 dist/common/collectd.d/scylla.conf $RPM_BUILD_ROOT%{_sysconfdir}/collectd.d/
|
||||||
install -m644 dist/common/scylla.d/*.conf $RPM_BUILD_ROOT%{_sysconfdir}/scylla.d/
|
install -m644 dist/common/scylla.d/*.conf $RPM_BUILD_ROOT%{_sysconfdir}/scylla.d/
|
||||||
install -m644 dist/common/sysctl.d/*.conf $RPM_BUILD_ROOT%{_sysctldir}/
|
install -m644 dist/common/sysctl.d/*.conf $RPM_BUILD_ROOT%{_sysctldir}/
|
||||||
|
%if 0%{?rhel}
|
||||||
|
install -m644 dist/common/modprobe.d/*.conf $RPM_BUILD_ROOT%{_sysconfdir}/modprobe.d/
|
||||||
|
%endif
|
||||||
install -d -m755 $RPM_BUILD_ROOT%{_sysconfdir}/scylla
|
install -d -m755 $RPM_BUILD_ROOT%{_sysconfdir}/scylla
|
||||||
install -m644 conf/scylla.yaml $RPM_BUILD_ROOT%{_sysconfdir}/scylla/
|
install -m644 conf/scylla.yaml $RPM_BUILD_ROOT%{_sysconfdir}/scylla/
|
||||||
install -m644 conf/cassandra-rackdc.properties $RPM_BUILD_ROOT%{_sysconfdir}/scylla/
|
install -m644 conf/cassandra-rackdc.properties $RPM_BUILD_ROOT%{_sysconfdir}/scylla/
|
||||||
@@ -151,10 +161,8 @@ rm -rf $RPM_BUILD_ROOT
|
|||||||
%{_docdir}/scylla/NOTICE.txt
|
%{_docdir}/scylla/NOTICE.txt
|
||||||
%{_docdir}/scylla/ORIGIN
|
%{_docdir}/scylla/ORIGIN
|
||||||
%{_docdir}/scylla/licenses/
|
%{_docdir}/scylla/licenses/
|
||||||
%{_unitdir}/scylla-server.service
|
%{_unitdir}/*.service
|
||||||
%{_unitdir}/scylla-housekeeping.service
|
%{_unitdir}/*.timer
|
||||||
%{_unitdir}/scylla-housekeeping.timer
|
|
||||||
%{_unitdir}/node-exporter.service
|
|
||||||
%{_bindir}/scylla
|
%{_bindir}/scylla
|
||||||
%{_bindir}/iotune
|
%{_bindir}/iotune
|
||||||
%{_bindir}/scyllatop
|
%{_bindir}/scyllatop
|
||||||
@@ -228,6 +236,7 @@ Group: Applications/Databases
|
|||||||
Summary: Scylla configuration package for the Linux kernel
|
Summary: Scylla configuration package for the Linux kernel
|
||||||
License: AGPLv3
|
License: AGPLv3
|
||||||
URL: http://www.scylladb.com/
|
URL: http://www.scylladb.com/
|
||||||
|
Requires: kmod
|
||||||
|
|
||||||
%description kernel-conf
|
%description kernel-conf
|
||||||
This package contains Linux kernel configuration changes for the Scylla database. Install this package
|
This package contains Linux kernel configuration changes for the Scylla database. Install this package
|
||||||
@@ -237,9 +246,18 @@ if Scylla is the main application on your server and you wish to optimize its la
|
|||||||
# We cannot use the sysctl_apply rpm macro because it is not present in 7.0
|
# We cannot use the sysctl_apply rpm macro because it is not present in 7.0
|
||||||
# following is a "manual" expansion
|
# following is a "manual" expansion
|
||||||
/usr/lib/systemd/systemd-sysctl 99-scylla-sched.conf >/dev/null 2>&1 || :
|
/usr/lib/systemd/systemd-sysctl 99-scylla-sched.conf >/dev/null 2>&1 || :
|
||||||
|
# Write modprobe.d params when module already loaded
|
||||||
|
%if 0%{?rhel}
|
||||||
|
if [ -e /sys/module/raid0/parameters/devices_discard_performance ]; then
|
||||||
|
echo Y > /sys/module/raid0/parameters/devices_discard_performance
|
||||||
|
fi
|
||||||
|
%endif
|
||||||
|
|
||||||
%files kernel-conf
|
%files kernel-conf
|
||||||
%defattr(-,root,root)
|
%defattr(-,root,root)
|
||||||
|
%if 0%{?rhel}
|
||||||
|
%config(noreplace) %{_sysconfdir}/modprobe.d/*.conf
|
||||||
|
%endif
|
||||||
%{_sysctldir}/*.conf
|
%{_sysctldir}/*.conf
|
||||||
|
|
||||||
%changelog
|
%changelog
|
||||||
|
|||||||
@@ -50,6 +50,12 @@ public:
|
|||||||
// for real time waits.
|
// for real time waits.
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Returns a time point which is earlier from t by d, or minimum time point if it cannot be represented.
|
||||||
|
template<typename Clock, typename Duration, typename Rep, typename Period>
|
||||||
|
inline
|
||||||
|
auto saturating_subtract(std::chrono::time_point<Clock, Duration> t, std::chrono::duration<Rep, Period> d) -> decltype(t) {
|
||||||
|
return std::max(t, decltype(t)::min() + d) - d;
|
||||||
|
}
|
||||||
|
|
||||||
using expiry_opt = std::experimental::optional<gc_clock::time_point>;
|
using expiry_opt = std::experimental::optional<gc_clock::time_point>;
|
||||||
using ttl_opt = std::experimental::optional<gc_clock::duration>;
|
using ttl_opt = std::experimental::optional<gc_clock::duration>;
|
||||||
|
|||||||
@@ -43,6 +43,7 @@
|
|||||||
#include "gms/endpoint_state.hh"
|
#include "gms/endpoint_state.hh"
|
||||||
#include "gms/application_state.hh"
|
#include "gms/application_state.hh"
|
||||||
#include "gms/inet_address.hh"
|
#include "gms/inet_address.hh"
|
||||||
|
#include "service/storage_service.hh"
|
||||||
#include "log.hh"
|
#include "log.hh"
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <chrono>
|
#include <chrono>
|
||||||
@@ -56,37 +57,13 @@ constexpr std::chrono::milliseconds failure_detector::DEFAULT_MAX_PAUSE;
|
|||||||
using clk = arrival_window::clk;
|
using clk = arrival_window::clk;
|
||||||
|
|
||||||
static clk::duration get_initial_value() {
|
static clk::duration get_initial_value() {
|
||||||
#if 0
|
auto& cfg = service::get_local_storage_service().db().local().get_config();
|
||||||
String newvalue = System.getProperty("cassandra.fd_initial_value_ms");
|
return std::chrono::milliseconds(cfg.fd_initial_value_ms());
|
||||||
if (newvalue == null)
|
|
||||||
{
|
|
||||||
return Gossiper.intervalInMillis * 2;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
logger.info("Overriding FD INITIAL_VALUE to {}ms", newvalue);
|
|
||||||
return Integer.parseInt(newvalue);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
warn(unimplemented::cause::GOSSIP);
|
|
||||||
return std::chrono::seconds(2);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
clk::duration arrival_window::get_max_interval() {
|
clk::duration arrival_window::get_max_interval() {
|
||||||
#if 0
|
auto& cfg = service::get_local_storage_service().db().local().get_config();
|
||||||
sstring newvalue = System.getProperty("cassandra.fd_max_interval_ms");
|
return std::chrono::milliseconds(cfg.fd_max_interval_ms());
|
||||||
if (newvalue == null)
|
|
||||||
{
|
|
||||||
return failure_detector.INITIAL_VALUE_NANOS;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
logger.info("Overriding FD MAX_INTERVAL to {}ms", newvalue);
|
|
||||||
return TimeUnit.NANOSECONDS.convert(Integer.parseInt(newvalue), TimeUnit.MILLISECONDS);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
warn(unimplemented::cause::GOSSIP);
|
|
||||||
return get_initial_value();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void arrival_window::add(clk::time_point value, const gms::inet_address& ep) {
|
void arrival_window::add(clk::time_point value, const gms::inet_address& ep) {
|
||||||
@@ -95,7 +72,7 @@ void arrival_window::add(clk::time_point value, const gms::inet_address& ep) {
|
|||||||
if (inter_arrival_time <= get_max_interval()) {
|
if (inter_arrival_time <= get_max_interval()) {
|
||||||
_arrival_intervals.add(inter_arrival_time.count());
|
_arrival_intervals.add(inter_arrival_time.count());
|
||||||
} else {
|
} else {
|
||||||
logger.debug("failure_detector: Ignoring interval time of {} for {}", inter_arrival_time.count(), ep);
|
logger.debug("failure_detector: Ignoring interval time of {} for {}, mean={}, size={}", inter_arrival_time.count(), ep, mean(), size());
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// We use a very large initial interval since the "right" average depends on the cluster size
|
// We use a very large initial interval since the "right" average depends on the cluster size
|
||||||
|
|||||||
@@ -87,6 +87,8 @@ public:
|
|||||||
// see CASSANDRA-2597 for an explanation of the math at work here.
|
// see CASSANDRA-2597 for an explanation of the math at work here.
|
||||||
double phi(clk::time_point tnow);
|
double phi(clk::time_point tnow);
|
||||||
|
|
||||||
|
size_t size() { return _arrival_intervals.size(); }
|
||||||
|
|
||||||
friend std::ostream& operator<<(std::ostream& os, const arrival_window& w);
|
friend std::ostream& operator<<(std::ostream& os, const arrival_window& w);
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|||||||
136
gms/gossiper.cc
136
gms/gossiper.cc
@@ -480,7 +480,7 @@ void gossiper::remove_endpoint(inet_address endpoint) {
|
|||||||
logger.info("removed {} from _seeds, updated _seeds list = {}", endpoint, _seeds);
|
logger.info("removed {} from _seeds, updated _seeds list = {}", endpoint, _seeds);
|
||||||
}
|
}
|
||||||
|
|
||||||
_live_endpoints.erase(endpoint);
|
_live_endpoints.erase(std::remove(_live_endpoints.begin(), _live_endpoints.end(), endpoint), _live_endpoints.end());
|
||||||
_live_endpoints_just_added.remove(endpoint);
|
_live_endpoints_just_added.remove(endpoint);
|
||||||
_unreachable_endpoints.erase(endpoint);
|
_unreachable_endpoints.erase(endpoint);
|
||||||
quarantine_endpoint(endpoint);
|
quarantine_endpoint(endpoint);
|
||||||
@@ -567,10 +567,36 @@ void gossiper::run() {
|
|||||||
|
|
||||||
_gossiped_to_seed = false;
|
_gossiped_to_seed = false;
|
||||||
|
|
||||||
/* Gossip to some random live member */
|
auto get_random_node = [this] (const std::vector<inet_address>& nodes) {
|
||||||
do_gossip_to_live_member(message).handle_exception([] (auto ep) {
|
std::uniform_int_distribution<int> dist(0, nodes.size() - 1);
|
||||||
logger.trace("Faill to do_gossip_to_live_member: {}", ep);
|
int index = dist(this->_random);
|
||||||
});
|
return nodes[index];
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Gossip to some random live members */
|
||||||
|
// TODO: For now, we choose 10th of all the nodes in the cluster.
|
||||||
|
auto nr_live_nodes = std::max(size_t(1), endpoint_state_map.size() / 10);
|
||||||
|
nr_live_nodes = std::min(nr_live_nodes, _live_endpoints.size());
|
||||||
|
std::unordered_set<gms::inet_address> live_nodes;
|
||||||
|
logger.debug("nr_live_nodes={}, endpoint_state_map.size()={}, live_endpoints.size={}",
|
||||||
|
nr_live_nodes, endpoint_state_map.size(), _live_endpoints.size());
|
||||||
|
while (live_nodes.size() < nr_live_nodes && nr_live_nodes <= _live_endpoints.size()) {
|
||||||
|
if (!_live_endpoints_just_added.empty()) {
|
||||||
|
auto ep = _live_endpoints_just_added.front();
|
||||||
|
_live_endpoints_just_added.pop_front();
|
||||||
|
logger.info("Favor newly added node {}", ep);
|
||||||
|
live_nodes.insert(ep);
|
||||||
|
} else {
|
||||||
|
// Get a random live node
|
||||||
|
live_nodes.insert(get_random_node(_live_endpoints));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
logger.debug("Talk to {} live nodes: {}", nr_live_nodes, live_nodes);
|
||||||
|
for (auto& ep: live_nodes) {
|
||||||
|
do_gossip_to_live_member(message, ep).handle_exception([] (auto ep) {
|
||||||
|
logger.trace("Failed to do_gossip_to_live_member: {}", ep);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
/* Gossip to some unreachable member with some probability to check if he is back up */
|
/* Gossip to some unreachable member with some probability to check if he is back up */
|
||||||
do_gossip_to_unreachable_member(message).handle_exception([] (auto ep) {
|
do_gossip_to_unreachable_member(message).handle_exception([] (auto ep) {
|
||||||
@@ -695,7 +721,7 @@ void gossiper::unregister_(shared_ptr<i_endpoint_state_change_subscriber> subscr
|
|||||||
}
|
}
|
||||||
|
|
||||||
std::set<inet_address> gossiper::get_live_members() {
|
std::set<inet_address> gossiper::get_live_members() {
|
||||||
std::set<inet_address> live_members(_live_endpoints);
|
std::set<inet_address> live_members(_live_endpoints.begin(), _live_endpoints.end());
|
||||||
if (!live_members.count(get_broadcast_address())) {
|
if (!live_members.count(get_broadcast_address())) {
|
||||||
live_members.insert(get_broadcast_address());
|
live_members.insert(get_broadcast_address());
|
||||||
}
|
}
|
||||||
@@ -952,19 +978,8 @@ future<int> gossiper::get_current_heart_beat_version(inet_address endpoint) {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
future<> gossiper::do_gossip_to_live_member(gossip_digest_syn message) {
|
future<> gossiper::do_gossip_to_live_member(gossip_digest_syn message, gms::inet_address ep) {
|
||||||
size_t size = _live_endpoints.size();
|
return send_gossip(message, {ep});
|
||||||
if (size == 0) {
|
|
||||||
return make_ready_future<>();
|
|
||||||
}
|
|
||||||
logger.trace("do_gossip_to_live_member: live_endpoint nr={}", _live_endpoints.size());
|
|
||||||
if (!_live_endpoints_just_added.empty()) {
|
|
||||||
auto ep = _live_endpoints_just_added.front();
|
|
||||||
_live_endpoints_just_added.pop_front();
|
|
||||||
logger.info("do_gossip_to_live_member: Favor newly added node {}", ep);
|
|
||||||
return send_gossip(message, std::set<inet_address>{ep});
|
|
||||||
}
|
|
||||||
return send_gossip(message, _live_endpoints);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
future<> gossiper::do_gossip_to_unreachable_member(gossip_digest_syn message) {
|
future<> gossiper::do_gossip_to_unreachable_member(gossip_digest_syn message) {
|
||||||
@@ -1135,6 +1150,15 @@ void gossiper::mark_alive(inet_address addr, endpoint_state& local_state) {
|
|||||||
// real_mark_alive(addr, local_state);
|
// real_mark_alive(addr, local_state);
|
||||||
// return;
|
// return;
|
||||||
// }
|
// }
|
||||||
|
auto inserted = _pending_mark_alive_endpoints.insert(addr).second;
|
||||||
|
if (inserted) {
|
||||||
|
// The node is not in the _pending_mark_alive_endpoints
|
||||||
|
logger.debug("Mark Node {} alive with EchoMessage", addr);
|
||||||
|
} else {
|
||||||
|
// We are in the progress of marking this node alive
|
||||||
|
logger.debug("Node {} is being marked as up, ignoring duplicated mark alive operation", addr);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
local_state.mark_dead();
|
local_state.mark_dead();
|
||||||
msg_addr id = get_msg_addr(addr);
|
msg_addr id = get_msg_addr(addr);
|
||||||
@@ -1143,10 +1167,22 @@ void gossiper::mark_alive(inet_address addr, endpoint_state& local_state) {
|
|||||||
ms().send_gossip_echo(id).get();
|
ms().send_gossip_echo(id).get();
|
||||||
logger.trace("Got EchoMessage Reply");
|
logger.trace("Got EchoMessage Reply");
|
||||||
set_last_processed_message_at();
|
set_last_processed_message_at();
|
||||||
real_mark_alive(id.addr, local_state);
|
// After sending echo message, the Node might not be in the
|
||||||
|
// endpoint_state_map anymore, use the reference of local_state
|
||||||
|
// might cause user-after-free
|
||||||
|
auto it = endpoint_state_map.find(addr);
|
||||||
|
if (it == endpoint_state_map.end()) {
|
||||||
|
logger.info("Node {} is not in endpoint_state_map anymore", addr);
|
||||||
|
} else {
|
||||||
|
endpoint_state& state = it->second;
|
||||||
|
logger.debug("Mark Node {} alive after EchoMessage", addr);
|
||||||
|
real_mark_alive(addr, state);
|
||||||
|
}
|
||||||
} catch(...) {
|
} catch(...) {
|
||||||
logger.warn("Fail to send EchoMessage to {}: {}", id, std::current_exception());
|
logger.warn("Fail to send EchoMessage to {}: {}", id, std::current_exception());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
_pending_mark_alive_endpoints.erase(addr);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Runs inside seastar::async context
|
// Runs inside seastar::async context
|
||||||
@@ -1154,7 +1190,10 @@ void gossiper::real_mark_alive(inet_address addr, endpoint_state& local_state) {
|
|||||||
logger.trace("marking as alive {}", addr);
|
logger.trace("marking as alive {}", addr);
|
||||||
local_state.mark_alive();
|
local_state.mark_alive();
|
||||||
local_state.update_timestamp(); // prevents do_status_check from racing us and evicting if it was down > A_VERY_LONG_TIME
|
local_state.update_timestamp(); // prevents do_status_check from racing us and evicting if it was down > A_VERY_LONG_TIME
|
||||||
_live_endpoints.insert(addr);
|
auto it_ = std::find(_live_endpoints.begin(), _live_endpoints.end(), addr);
|
||||||
|
if (it_ == _live_endpoints.end()) {
|
||||||
|
_live_endpoints.push_back(addr);
|
||||||
|
}
|
||||||
auto it = std::find(_live_endpoints_just_added.begin(), _live_endpoints_just_added.end(), addr);
|
auto it = std::find(_live_endpoints_just_added.begin(), _live_endpoints_just_added.end(), addr);
|
||||||
if (it == _live_endpoints_just_added.end()) {
|
if (it == _live_endpoints_just_added.end()) {
|
||||||
_live_endpoints_just_added.push_back(addr);
|
_live_endpoints_just_added.push_back(addr);
|
||||||
@@ -1176,7 +1215,7 @@ void gossiper::real_mark_alive(inet_address addr, endpoint_state& local_state) {
|
|||||||
void gossiper::mark_dead(inet_address addr, endpoint_state& local_state) {
|
void gossiper::mark_dead(inet_address addr, endpoint_state& local_state) {
|
||||||
logger.trace("marking as down {}", addr);
|
logger.trace("marking as down {}", addr);
|
||||||
local_state.mark_dead();
|
local_state.mark_dead();
|
||||||
_live_endpoints.erase(addr);
|
_live_endpoints.erase(std::remove(_live_endpoints.begin(), _live_endpoints.end(), addr), _live_endpoints.end());
|
||||||
_live_endpoints_just_added.remove(addr);
|
_live_endpoints_just_added.remove(addr);
|
||||||
_unreachable_endpoints[addr] = now();
|
_unreachable_endpoints[addr] = now();
|
||||||
logger.info("InetAddress {} is now DOWN, status = {}", addr, get_gossip_status(local_state));
|
logger.info("InetAddress {} is now DOWN, status = {}", addr, get_gossip_status(local_state));
|
||||||
@@ -1188,10 +1227,7 @@ void gossiper::mark_dead(inet_address addr, endpoint_state& local_state) {
|
|||||||
|
|
||||||
// Runs inside seastar::async context
|
// Runs inside seastar::async context
|
||||||
void gossiper::handle_major_state_change(inet_address ep, const endpoint_state& eps) {
|
void gossiper::handle_major_state_change(inet_address ep, const endpoint_state& eps) {
|
||||||
std::experimental::optional<endpoint_state> local_ep_state;
|
auto eps_old = get_endpoint_state_for_endpoint(ep);
|
||||||
if (endpoint_state_map.count(ep) > 0) {
|
|
||||||
local_ep_state = endpoint_state_map.at(ep);
|
|
||||||
}
|
|
||||||
if (!is_dead_state(eps) && !_in_shadow_round) {
|
if (!is_dead_state(eps) && !_in_shadow_round) {
|
||||||
if (endpoint_state_map.count(ep)) {
|
if (endpoint_state_map.count(ep)) {
|
||||||
logger.debug("Node {} has restarted, now UP, status = {}", ep, get_gossip_status(eps));
|
logger.debug("Node {} has restarted, now UP, status = {}", ep, get_gossip_status(eps));
|
||||||
@@ -1202,24 +1238,37 @@ void gossiper::handle_major_state_change(inet_address ep, const endpoint_state&
|
|||||||
logger.trace("Adding endpoint state for {}, status = {}", ep, get_gossip_status(eps));
|
logger.trace("Adding endpoint state for {}, status = {}", ep, get_gossip_status(eps));
|
||||||
endpoint_state_map[ep] = eps;
|
endpoint_state_map[ep] = eps;
|
||||||
|
|
||||||
auto& ep_state = endpoint_state_map.at(ep);
|
if (_in_shadow_round) {
|
||||||
|
// In shadow round, we only interested in the peer's endpoint_state,
|
||||||
|
// e.g., gossip features, host_id, tokens. No need to call the
|
||||||
|
// on_restart or on_join callbacks or to go through the mark alive
|
||||||
|
// procedure with EchoMessage gossip message. We will do them during
|
||||||
|
// normal gossip runs anyway.
|
||||||
|
logger.debug("In shadow round addr={}, eps={}", ep, eps);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
if (local_ep_state) {
|
if (eps_old) {
|
||||||
// the node restarted: it is up to the subscriber to take whatever action is necessary
|
// the node restarted: it is up to the subscriber to take whatever action is necessary
|
||||||
_subscribers.for_each([ep, local_ep_state] (auto& subscriber) {
|
_subscribers.for_each([ep, eps_old] (auto& subscriber) {
|
||||||
subscriber->on_restart(ep, *local_ep_state);
|
subscriber->on_restart(ep, *eps_old);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
auto& ep_state = endpoint_state_map.at(ep);
|
||||||
if (!is_dead_state(ep_state)) {
|
if (!is_dead_state(ep_state)) {
|
||||||
mark_alive(ep, ep_state);
|
mark_alive(ep, ep_state);
|
||||||
} else {
|
} else {
|
||||||
logger.debug("Not marking {} alive due to dead state {}", ep, get_gossip_status(eps));
|
logger.debug("Not marking {} alive due to dead state {}", ep, get_gossip_status(eps));
|
||||||
mark_dead(ep, ep_state);
|
mark_dead(ep, ep_state);
|
||||||
}
|
}
|
||||||
_subscribers.for_each([ep, ep_state] (auto& subscriber) {
|
|
||||||
subscriber->on_join(ep, ep_state);
|
auto eps_new = get_endpoint_state_for_endpoint(ep);
|
||||||
});
|
if (eps_new) {
|
||||||
|
_subscribers.for_each([ep, eps_new] (auto& subscriber) {
|
||||||
|
subscriber->on_join(ep, *eps_new);
|
||||||
|
});
|
||||||
|
}
|
||||||
// check this at the end so nodes will learn about the endpoint
|
// check this at the end so nodes will learn about the endpoint
|
||||||
if (is_shutdown(ep)) {
|
if (is_shutdown(ep)) {
|
||||||
mark_as_shutdown(ep);
|
mark_as_shutdown(ep);
|
||||||
@@ -1240,6 +1289,10 @@ bool gossiper::is_shutdown(const inet_address& endpoint) const {
|
|||||||
return get_gossip_status(endpoint) == sstring(versioned_value::SHUTDOWN);
|
return get_gossip_status(endpoint) == sstring(versioned_value::SHUTDOWN);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool gossiper::is_normal(const inet_address& endpoint) const {
|
||||||
|
return get_gossip_status(endpoint) == sstring(versioned_value::STATUS_NORMAL);
|
||||||
|
}
|
||||||
|
|
||||||
bool gossiper::is_silent_shutdown_state(const endpoint_state& ep_state) const{
|
bool gossiper::is_silent_shutdown_state(const endpoint_state& ep_state) const{
|
||||||
sstring state = get_gossip_status(ep_state);
|
sstring state = get_gossip_status(ep_state);
|
||||||
for (auto& deadstate : SILENT_SHUTDOWN_STATES) {
|
for (auto& deadstate : SILENT_SHUTDOWN_STATES) {
|
||||||
@@ -1394,9 +1447,11 @@ future<> gossiper::start_gossiping(int generation_nbr, std::map<application_stat
|
|||||||
local_state.add_application_state(entry.first, entry.second);
|
local_state.add_application_state(entry.first, entry.second);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
auto generation = local_state.get_heart_beat_state().get_generation();
|
||||||
|
|
||||||
//notify snitches that Gossiper is about to start
|
//notify snitches that Gossiper is about to start
|
||||||
return locator::i_endpoint_snitch::get_local_snitch_ptr()->gossiper_starting().then([this, &local_state] {
|
return locator::i_endpoint_snitch::get_local_snitch_ptr()->gossiper_starting().then([this, generation] {
|
||||||
logger.trace("gossip started with generation {}", local_state.get_heart_beat_state().get_generation());
|
logger.trace("gossip started with generation {}", generation);
|
||||||
_enabled = true;
|
_enabled = true;
|
||||||
_nr_run = 0;
|
_nr_run = 0;
|
||||||
_scheduled_gossip_task.arm(INTERVAL);
|
_scheduled_gossip_task.arm(INTERVAL);
|
||||||
@@ -1493,16 +1548,19 @@ future<> gossiper::add_local_application_state(application_state state, versione
|
|||||||
logger.error(err.c_str());
|
logger.error(err.c_str());
|
||||||
throw std::runtime_error(err);
|
throw std::runtime_error(err);
|
||||||
}
|
}
|
||||||
endpoint_state& ep_state = gossiper.endpoint_state_map.at(ep_addr);
|
endpoint_state ep_state_before = gossiper.endpoint_state_map.at(ep_addr);
|
||||||
// Fire "before change" notifications:
|
// Fire "before change" notifications:
|
||||||
gossiper.do_before_change_notifications(ep_addr, ep_state, state, value);
|
gossiper.do_before_change_notifications(ep_addr, ep_state_before, state, value);
|
||||||
// Notifications may have taken some time, so preventively raise the version
|
// Notifications may have taken some time, so preventively raise the version
|
||||||
// of the new value, otherwise it could be ignored by the remote node
|
// of the new value, otherwise it could be ignored by the remote node
|
||||||
// if another value with a newer version was received in the meantime:
|
// if another value with a newer version was received in the meantime:
|
||||||
value = storage_service_value_factory().clone_with_higher_version(value);
|
value = storage_service_value_factory().clone_with_higher_version(value);
|
||||||
// Add to local application state and fire "on change" notifications:
|
// Add to local application state and fire "on change" notifications:
|
||||||
ep_state.add_application_state(state, value);
|
if (gossiper.endpoint_state_map.count(ep_addr)) {
|
||||||
gossiper.do_on_change_notifications(ep_addr, state, value);
|
auto& ep_state = gossiper.endpoint_state_map.at(ep_addr);
|
||||||
|
ep_state.add_application_state(state, value);
|
||||||
|
gossiper.do_on_change_notifications(ep_addr, state, value);
|
||||||
|
}
|
||||||
}).handle_exception([] (auto ep) {
|
}).handle_exception([] (auto ep) {
|
||||||
logger.warn("Fail to apply application_state: {}", ep);
|
logger.warn("Fail to apply application_state: {}", ep);
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -184,9 +184,12 @@ private:
|
|||||||
} _subscribers;
|
} _subscribers;
|
||||||
|
|
||||||
/* live member set */
|
/* live member set */
|
||||||
std::set<inet_address> _live_endpoints;
|
std::vector<inet_address> _live_endpoints;
|
||||||
std::list<inet_address> _live_endpoints_just_added;
|
std::list<inet_address> _live_endpoints_just_added;
|
||||||
|
|
||||||
|
/* nodes are being marked as alive */
|
||||||
|
std::unordered_set<inet_address> _pending_mark_alive_endpoints;
|
||||||
|
|
||||||
/* unreachable member set */
|
/* unreachable member set */
|
||||||
std::map<inet_address, clk::time_point> _unreachable_endpoints;
|
std::map<inet_address, clk::time_point> _unreachable_endpoints;
|
||||||
|
|
||||||
@@ -206,7 +209,7 @@ private:
|
|||||||
clk::time_point _last_processed_message_at = now();
|
clk::time_point _last_processed_message_at = now();
|
||||||
|
|
||||||
std::map<inet_address, clk::time_point> _shadow_unreachable_endpoints;
|
std::map<inet_address, clk::time_point> _shadow_unreachable_endpoints;
|
||||||
std::set<inet_address> _shadow_live_endpoints;
|
std::vector<inet_address> _shadow_live_endpoints;
|
||||||
|
|
||||||
void run();
|
void run();
|
||||||
public:
|
public:
|
||||||
@@ -366,8 +369,8 @@ private:
|
|||||||
*/
|
*/
|
||||||
future<> send_gossip(gossip_digest_syn message, std::set<inet_address> epset);
|
future<> send_gossip(gossip_digest_syn message, std::set<inet_address> epset);
|
||||||
|
|
||||||
/* Sends a Gossip message to a live member and returns true if the recipient was a seed */
|
/* Sends a Gossip message to a live member */
|
||||||
future<> do_gossip_to_live_member(gossip_digest_syn message);
|
future<> do_gossip_to_live_member(gossip_digest_syn message, inet_address ep);
|
||||||
|
|
||||||
/* Sends a Gossip message to an unreachable member */
|
/* Sends a Gossip message to an unreachable member */
|
||||||
future<> do_gossip_to_unreachable_member(gossip_digest_syn message);
|
future<> do_gossip_to_unreachable_member(gossip_digest_syn message);
|
||||||
@@ -501,6 +504,7 @@ public:
|
|||||||
void debug_show();
|
void debug_show();
|
||||||
public:
|
public:
|
||||||
bool is_shutdown(const inet_address& endpoint) const;
|
bool is_shutdown(const inet_address& endpoint) const;
|
||||||
|
bool is_normal(const inet_address& endpoint) const;
|
||||||
bool is_silent_shutdown_state(const endpoint_state& ep_state) const;
|
bool is_silent_shutdown_state(const endpoint_state& ep_state) const;
|
||||||
void mark_as_shutdown(const inet_address& endpoint);
|
void mark_as_shutdown(const inet_address& endpoint);
|
||||||
void force_newer_generation();
|
void force_newer_generation();
|
||||||
|
|||||||
@@ -277,6 +277,9 @@ def is_optional(lst):
|
|||||||
|
|
||||||
created_writers = set()
|
created_writers = set()
|
||||||
|
|
||||||
|
def get_member_name(name):
|
||||||
|
return name if not name.endswith('()') else name[:-2]
|
||||||
|
|
||||||
def get_members(cls):
|
def get_members(cls):
|
||||||
return [p for p in cls["members"] if not is_class(p) and not is_enum(p)]
|
return [p for p in cls["members"] if not is_class(p) and not is_enum(p)]
|
||||||
|
|
||||||
@@ -456,18 +459,19 @@ def add_param_writer_object(name, base_state, typ, var_type = "", var_index = No
|
|||||||
def add_param_write(current, base_state, vector = False, root_node = False):
|
def add_param_write(current, base_state, vector = False, root_node = False):
|
||||||
typ = current["type"]
|
typ = current["type"]
|
||||||
res = ""
|
res = ""
|
||||||
|
name = get_member_name(current["name"])
|
||||||
if is_basic_type(typ):
|
if is_basic_type(typ):
|
||||||
res = res + add_param_writer_basic_type(current["name"], base_state, typ)
|
res = res + add_param_writer_basic_type(name, base_state, typ)
|
||||||
elif is_optional(typ):
|
elif is_optional(typ):
|
||||||
res = res + Template(reindent(4, """
|
res = res + Template(reindent(4, """
|
||||||
after_${basestate}__$name<Output> skip_$name() && {
|
after_${basestate}__$name<Output> skip_$name() && {
|
||||||
serialize(_out, false);
|
serialize(_out, false);
|
||||||
return { _out, std::move(_state) };
|
return { _out, std::move(_state) };
|
||||||
}""")).substitute({'type': param_type(typ), 'name': current["name"], 'basestate' : base_state})
|
}""")).substitute({'type': param_type(typ), 'name': name, 'basestate' : base_state})
|
||||||
if is_basic_type(typ[1][0]):
|
if is_basic_type(typ[1][0]):
|
||||||
res = res + add_param_writer_basic_type(current["name"], base_state, typ[1][0], "", "true")
|
res = res + add_param_writer_basic_type(name, base_state, typ[1][0], "", "true")
|
||||||
elif is_local_type(typ[1][0]):
|
elif is_local_type(typ[1][0]):
|
||||||
res = res + add_param_writer_object(current["name"], base_state[0][1], typ, "", "true")
|
res = res + add_param_writer_object(name, base_state[0][1], typ, "", "true")
|
||||||
else:
|
else:
|
||||||
print("non supported optional type ", type[0][1])
|
print("non supported optional type ", type[0][1])
|
||||||
elif is_vector(typ):
|
elif is_vector(typ):
|
||||||
@@ -482,18 +486,18 @@ def add_param_write(current, base_state, vector = False, root_node = False):
|
|||||||
$set
|
$set
|
||||||
return { _out, std::move(_state) };
|
return { _out, std::move(_state) };
|
||||||
}
|
}
|
||||||
""").substitute({'type': param_type(typ), 'name': current["name"], 'basestate' : base_state, 'set' : set_size})
|
""").substitute({'type': param_type(typ), 'name': name, 'basestate' : base_state, 'set' : set_size})
|
||||||
elif is_local_type(typ):
|
elif is_local_type(typ):
|
||||||
res = res + add_param_writer_object(current["name"], base_state, typ)
|
res = res + add_param_writer_object(name, base_state, typ)
|
||||||
elif is_variant(typ):
|
elif is_variant(typ):
|
||||||
for idx, p in enumerate(typ[1]):
|
for idx, p in enumerate(typ[1]):
|
||||||
if is_basic_type(p):
|
if is_basic_type(p):
|
||||||
varient_type = param_type(p)
|
varient_type = param_type(p)
|
||||||
res = res + add_param_writer_basic_type(current["name"], base_state, varient_type,"_" + varient_type, idx, root_node)
|
res = res + add_param_writer_basic_type(name, base_state, varient_type,"_" + varient_type, idx, root_node)
|
||||||
elif is_variant(p):
|
elif is_variant(p):
|
||||||
res = res + add_param_writer_object(current["name"], base_state, p, '_' + "variant", idx, root_node)
|
res = res + add_param_writer_object(name, base_state, p, '_' + "variant", idx, root_node)
|
||||||
elif is_local_type(p):
|
elif is_local_type(p):
|
||||||
res = res + add_param_writer_object(current["name"], base_state, p, '_' + param_type(p), idx, root_node)
|
res = res + add_param_writer_object(name, base_state, p, '_' + param_type(p), idx, root_node)
|
||||||
else:
|
else:
|
||||||
print ("something is wrong with type", typ)
|
print ("something is wrong with type", typ)
|
||||||
return res;
|
return res;
|
||||||
@@ -658,7 +662,7 @@ def handle_visitors_nodes(info, hout, variant_node = False, clases = []):
|
|||||||
if not members:
|
if not members:
|
||||||
add_node(hout, base_state_name, None, base_state_name, prefix, parents, add_end_method(parents, current_name, variant_node, clases), False, is_final(cls))
|
add_node(hout, base_state_name, None, base_state_name, prefix, parents, add_end_method(parents, current_name, variant_node, clases), False, is_final(cls))
|
||||||
return
|
return
|
||||||
add_node(hout, base_state_name + "__" + members[-1]["name"], members[-1]["type"], base_state_name, "after_", base_state_name, add_end_method(parents, current_name, variant_node, clases))
|
add_node(hout, base_state_name + "__" + get_member_name(members[-1]["name"]), members[-1]["type"], base_state_name, "after_", base_state_name, add_end_method(parents, current_name, variant_node, clases))
|
||||||
# Create writer and reader for include class
|
# Create writer and reader for include class
|
||||||
if not variant_node:
|
if not variant_node:
|
||||||
for member in get_dependency(cls):
|
for member in get_dependency(cls):
|
||||||
@@ -666,9 +670,9 @@ def handle_visitors_nodes(info, hout, variant_node = False, clases = []):
|
|||||||
for ind in reversed(range(1, len(members))):
|
for ind in reversed(range(1, len(members))):
|
||||||
member = members[ind]
|
member = members[ind]
|
||||||
add_nodes_when_needed(hout, info, member, base_state_name, parents, member_classes)
|
add_nodes_when_needed(hout, info, member, base_state_name, parents, member_classes)
|
||||||
variant_state = base_state_name + "__" + member["name"] if is_variant(member["type"]) else base_state_name
|
variant_state = base_state_name + "__" + get_member_name(member["name"]) if is_variant(member["type"]) else base_state_name
|
||||||
is_param_vector = is_vector(member["type"]) and is_basic_type(member["type"][1][0])
|
is_param_vector = is_vector(member["type"]) and is_basic_type(member["type"][1][0])
|
||||||
add_node(hout, base_state_name + "__" + members[ind - 1]["name"], member["type"], variant_state, "after_", base_state_name, add_param_write(member, base_state_name), False)
|
add_node(hout, base_state_name + "__" + get_member_name(members[ind - 1]["name"]), member["type"], variant_state, "after_", base_state_name, add_param_write(member, base_state_name), False)
|
||||||
member = members[0]
|
member = members[0]
|
||||||
is_param_vector = is_vector(member["type"]) and is_basic_type(member["type"][1][0])
|
is_param_vector = is_vector(member["type"]) and is_basic_type(member["type"][1][0])
|
||||||
add_nodes_when_needed(hout, info, member, base_state_name, parents, member_classes)
|
add_nodes_when_needed(hout, info, member, base_state_name, parents, member_classes)
|
||||||
@@ -790,7 +794,7 @@ def add_view(hout, info):
|
|||||||
return deserialize(in, boost::type<$type>());
|
return deserialize(in, boost::type<$type>());
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
""")).substitute({'name' : m["name"], 'type' : full_type, 'skip' : skip}))
|
""")).substitute({'name' : get_member_name(m["name"]), 'type' : full_type, 'skip' : skip}))
|
||||||
|
|
||||||
skip = skip + Template("\n ser::skip(in, boost::type<${type}>());").substitute({'type': full_type})
|
skip = skip + Template("\n ser::skip(in, boost::type<${type}>());").substitute({'type': full_type})
|
||||||
|
|
||||||
|
|||||||
@@ -19,7 +19,7 @@
|
|||||||
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
class commitlog_entry {
|
class commitlog_entry [[writable]] {
|
||||||
std::experimental::optional<column_mapping> mapping();
|
std::experimental::optional<column_mapping> mapping();
|
||||||
frozen_mutation mutation();
|
frozen_mutation mutation();
|
||||||
};
|
};
|
||||||
|
|||||||
98
memtable.cc
98
memtable.cc
@@ -65,17 +65,15 @@ future<> memtable::clear_gently() noexcept {
|
|||||||
auto t = std::make_unique<seastar::thread>(attr, [this] {
|
auto t = std::make_unique<seastar::thread>(attr, [this] {
|
||||||
auto& alloc = allocator();
|
auto& alloc = allocator();
|
||||||
|
|
||||||
// entries can no longer be moved after unlink_leftmost_without_rebalance()
|
|
||||||
// so need to disable compaction.
|
|
||||||
logalloc::reclaim_lock rl(*this);
|
|
||||||
|
|
||||||
auto p = std::move(partitions);
|
auto p = std::move(partitions);
|
||||||
while (!p.empty()) {
|
while (!p.empty()) {
|
||||||
auto batch_size = std::min<size_t>(p.size(), 32);
|
auto batch_size = std::min<size_t>(p.size(), 32);
|
||||||
auto dirty_before = dirty_size();
|
auto dirty_before = dirty_size();
|
||||||
with_allocator(alloc, [&] () noexcept {
|
with_allocator(alloc, [&] () noexcept {
|
||||||
while (batch_size--) {
|
while (batch_size--) {
|
||||||
alloc.destroy(p.unlink_leftmost_without_rebalance());
|
p.erase_and_dispose(p.begin(), [&] (auto e) {
|
||||||
|
alloc.destroy(e);
|
||||||
|
});
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
remove_flushed_memory(dirty_before - dirty_size());
|
remove_flushed_memory(dirty_before - dirty_size());
|
||||||
@@ -205,19 +203,23 @@ protected:
|
|||||||
, _range(&range)
|
, _range(&range)
|
||||||
{ }
|
{ }
|
||||||
|
|
||||||
memtable_entry* fetch_next_entry() {
|
memtable_entry* fetch_entry() {
|
||||||
update_iterators();
|
update_iterators();
|
||||||
if (_i == _end) {
|
if (_i == _end) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
} else {
|
} else {
|
||||||
memtable_entry& e = *_i;
|
memtable_entry& e = *_i;
|
||||||
++_i;
|
|
||||||
_last = e.key();
|
|
||||||
_memtable->upgrade_entry(e);
|
_memtable->upgrade_entry(e);
|
||||||
return &e;
|
return &e;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void advance() {
|
||||||
|
memtable_entry& e = *_i;
|
||||||
|
_last = e.key();
|
||||||
|
++_i;
|
||||||
|
}
|
||||||
|
|
||||||
logalloc::allocating_section& read_section() {
|
logalloc::allocating_section& read_section() {
|
||||||
return _memtable->_read_section;
|
return _memtable->_read_section;
|
||||||
}
|
}
|
||||||
@@ -244,9 +246,10 @@ protected:
|
|||||||
|
|
||||||
mutation_reader delegate_reader(const dht::partition_range& delegate,
|
mutation_reader delegate_reader(const dht::partition_range& delegate,
|
||||||
const query::partition_slice& slice,
|
const query::partition_slice& slice,
|
||||||
const io_priority_class& pc) {
|
const io_priority_class& pc,
|
||||||
|
mutation_reader::forwarding fwd_mr) {
|
||||||
auto ret = make_mutation_reader<sstable_range_wrapping_reader>(
|
auto ret = make_mutation_reader<sstable_range_wrapping_reader>(
|
||||||
_memtable->_sstable, _schema, delegate, slice, pc);
|
_memtable->_sstable, _schema, delegate, slice, pc, fwd_mr);
|
||||||
_memtable = {};
|
_memtable = {};
|
||||||
_last = {};
|
_last = {};
|
||||||
return ret;
|
return ret;
|
||||||
@@ -264,15 +267,18 @@ class scanning_reader final: public iterator_reader {
|
|||||||
mutation_reader _delegate;
|
mutation_reader _delegate;
|
||||||
const io_priority_class& _pc;
|
const io_priority_class& _pc;
|
||||||
const query::partition_slice& _slice;
|
const query::partition_slice& _slice;
|
||||||
|
mutation_reader::forwarding _fwd_mr;
|
||||||
public:
|
public:
|
||||||
scanning_reader(schema_ptr s,
|
scanning_reader(schema_ptr s,
|
||||||
lw_shared_ptr<memtable> m,
|
lw_shared_ptr<memtable> m,
|
||||||
const dht::partition_range& range,
|
const dht::partition_range& range,
|
||||||
const query::partition_slice& slice,
|
const query::partition_slice& slice,
|
||||||
const io_priority_class& pc)
|
const io_priority_class& pc,
|
||||||
|
mutation_reader::forwarding fwd_mr)
|
||||||
: iterator_reader(std::move(s), std::move(m), range)
|
: iterator_reader(std::move(s), std::move(m), range)
|
||||||
, _pc(pc)
|
, _pc(pc)
|
||||||
, _slice(slice)
|
, _slice(slice)
|
||||||
|
, _fwd_mr(fwd_mr)
|
||||||
{ }
|
{ }
|
||||||
|
|
||||||
virtual future<streamed_mutation_opt> operator()() override {
|
virtual future<streamed_mutation_opt> operator()() override {
|
||||||
@@ -283,18 +289,22 @@ public:
|
|||||||
// FIXME: Use cache. See column_family::make_reader().
|
// FIXME: Use cache. See column_family::make_reader().
|
||||||
_delegate_range = get_delegate_range();
|
_delegate_range = get_delegate_range();
|
||||||
if (_delegate_range) {
|
if (_delegate_range) {
|
||||||
_delegate = delegate_reader(*_delegate_range, _slice, _pc);
|
_delegate = delegate_reader(*_delegate_range, _slice, _pc, _fwd_mr);
|
||||||
return _delegate();
|
return _delegate();
|
||||||
}
|
}
|
||||||
|
|
||||||
logalloc::reclaim_lock _(region());
|
return read_section()(region(), [&] {
|
||||||
managed_bytes::linearization_context_guard lcg;
|
return with_linearized_managed_bytes([&] {
|
||||||
memtable_entry* e = fetch_next_entry();
|
memtable_entry* e = fetch_entry();
|
||||||
if (!e) {
|
if (!e) {
|
||||||
return make_ready_future<streamed_mutation_opt>(stdx::nullopt);
|
return make_ready_future<streamed_mutation_opt>(stdx::nullopt);
|
||||||
} else {
|
} else {
|
||||||
return make_ready_future<streamed_mutation_opt>(e->read(mtbl(), schema(), _slice));
|
auto ret = make_ready_future<streamed_mutation_opt>(e->read(mtbl(), schema(), _slice));
|
||||||
}
|
advance();
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -391,19 +401,24 @@ public:
|
|||||||
flush_reader& operator=(const flush_reader&) = delete;
|
flush_reader& operator=(const flush_reader&) = delete;
|
||||||
|
|
||||||
virtual future<streamed_mutation_opt> operator()() override {
|
virtual future<streamed_mutation_opt> operator()() override {
|
||||||
logalloc::reclaim_lock _(region());
|
return read_section()(region(), [&] {
|
||||||
managed_bytes::linearization_context_guard lcg;
|
return with_linearized_managed_bytes([&] {
|
||||||
memtable_entry* e = fetch_next_entry();
|
memtable_entry* e = fetch_entry();
|
||||||
if (!e) {
|
if (!e) {
|
||||||
return make_ready_future<streamed_mutation_opt>(stdx::nullopt);
|
return make_ready_future<streamed_mutation_opt>(stdx::nullopt);
|
||||||
} else {
|
} else {
|
||||||
auto cr = query::clustering_key_filter_ranges::get_ranges(*schema(), query::full_slice, e->key().key());
|
auto cr = query::clustering_key_filter_ranges::get_ranges(*schema(), query::full_slice, e->key().key());
|
||||||
auto snp = e->partition().read(schema());
|
auto snp = e->partition().read(schema());
|
||||||
auto mpsr = make_partition_snapshot_reader<partition_snapshot_accounter>(schema(), e->key(), std::move(cr), snp, region(), read_section(), mtbl(), _flushed_memory);
|
auto mpsr = make_partition_snapshot_reader<partition_snapshot_accounter>(schema(), e->key(), std::move(cr),
|
||||||
_flushed_memory.account_component(*e);
|
snp, region(), read_section(), mtbl(), _flushed_memory);
|
||||||
_flushed_memory.account_component(*snp);
|
_flushed_memory.account_component(*e);
|
||||||
return make_ready_future<streamed_mutation_opt>(std::move(mpsr));
|
_flushed_memory.account_component(*snp);
|
||||||
}
|
auto ret = make_ready_future<streamed_mutation_opt>(std::move(mpsr));
|
||||||
|
advance();
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -411,7 +426,9 @@ mutation_reader
|
|||||||
memtable::make_reader(schema_ptr s,
|
memtable::make_reader(schema_ptr s,
|
||||||
const dht::partition_range& range,
|
const dht::partition_range& range,
|
||||||
const query::partition_slice& slice,
|
const query::partition_slice& slice,
|
||||||
const io_priority_class& pc) {
|
const io_priority_class& pc,
|
||||||
|
tracing::trace_state_ptr trace_state_ptr,
|
||||||
|
mutation_reader::forwarding fwd_mr) {
|
||||||
if (query::is_single_partition(range)) {
|
if (query::is_single_partition(range)) {
|
||||||
const query::ring_position& pos = range.start()->value();
|
const query::ring_position& pos = range.start()->value();
|
||||||
return _read_section(*this, [&] {
|
return _read_section(*this, [&] {
|
||||||
@@ -425,7 +442,7 @@ memtable::make_reader(schema_ptr s,
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
} else {
|
} else {
|
||||||
return make_mutation_reader<scanning_reader>(std::move(s), shared_from_this(), range, slice, pc);
|
return make_mutation_reader<scanning_reader>(std::move(s), shared_from_this(), range, slice, pc, fwd_mr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -434,7 +451,7 @@ memtable::make_flush_reader(schema_ptr s, const io_priority_class& pc) {
|
|||||||
if (group()) {
|
if (group()) {
|
||||||
return make_mutation_reader<flush_reader>(std::move(s), shared_from_this());
|
return make_mutation_reader<flush_reader>(std::move(s), shared_from_this());
|
||||||
} else {
|
} else {
|
||||||
return make_mutation_reader<scanning_reader>(std::move(s), shared_from_this(), query::full_partition_range, query::full_slice, pc);
|
return make_mutation_reader<scanning_reader>(std::move(s), shared_from_this(), query::full_partition_range, query::full_slice, pc, mutation_reader::forwarding::no);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -486,8 +503,13 @@ logalloc::occupancy_stats memtable::occupancy() const {
|
|||||||
}
|
}
|
||||||
|
|
||||||
mutation_source memtable::as_data_source() {
|
mutation_source memtable::as_data_source() {
|
||||||
return mutation_source([mt = shared_from_this()] (schema_ptr s, const dht::partition_range& range) {
|
return mutation_source([mt = shared_from_this()] (schema_ptr s,
|
||||||
return mt->make_reader(std::move(s), range);
|
const dht::partition_range& range,
|
||||||
|
const query::partition_slice& slice,
|
||||||
|
const io_priority_class& pc,
|
||||||
|
tracing::trace_state_ptr trace_state,
|
||||||
|
mutation_reader::forwarding fwd_mr) {
|
||||||
|
return mt->make_reader(std::move(s), range, slice, pc, std::move(trace_state), fwd_mr);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -173,7 +173,9 @@ public:
|
|||||||
mutation_reader make_reader(schema_ptr,
|
mutation_reader make_reader(schema_ptr,
|
||||||
const dht::partition_range& range = query::full_partition_range,
|
const dht::partition_range& range = query::full_partition_range,
|
||||||
const query::partition_slice& slice = query::full_slice,
|
const query::partition_slice& slice = query::full_slice,
|
||||||
const io_priority_class& pc = default_priority_class());
|
const io_priority_class& pc = default_priority_class(),
|
||||||
|
tracing::trace_state_ptr trace_state_ptr = nullptr,
|
||||||
|
mutation_reader::forwarding fwd_mr = mutation_reader::forwarding::no);
|
||||||
|
|
||||||
|
|
||||||
mutation_reader make_flush_reader(schema_ptr, const io_priority_class& pc);
|
mutation_reader make_flush_reader(schema_ptr, const io_priority_class& pc);
|
||||||
|
|||||||
@@ -274,7 +274,13 @@ void messaging_service::start_listen() {
|
|||||||
if (listen_to_bc) {
|
if (listen_to_bc) {
|
||||||
_server_tls[1] = listen(utils::fb_utilities::get_broadcast_address());
|
_server_tls[1] = listen(utils::fb_utilities::get_broadcast_address());
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
// Do this on just cpu 0, to avoid duplicate logs.
|
||||||
|
if (engine().cpu_id() == 0) {
|
||||||
|
if (_server_tls[0]) {
|
||||||
|
logger.info("Starting Encrypted Messaging Service on SSL port {}", _ssl_port);
|
||||||
|
}
|
||||||
|
logger.info("Starting Messaging Service on port {}", _port);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -308,14 +314,6 @@ messaging_service::messaging_service(gms::inet_address ip
|
|||||||
if (listen_now) {
|
if (listen_now) {
|
||||||
start_listen();
|
start_listen();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Do this on just cpu 0, to avoid duplicate logs.
|
|
||||||
if (engine().cpu_id() == 0) {
|
|
||||||
if (_server_tls[0]) {
|
|
||||||
logger.info("Starting Encrypted Messaging Service on SSL port {}", _ssl_port);
|
|
||||||
}
|
|
||||||
logger.info("Starting Messaging Service on port {}", _port);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
msg_addr messaging_service::get_source(const rpc::client_info& cinfo) {
|
msg_addr messaging_service::get_source(const rpc::client_info& cinfo) {
|
||||||
|
|||||||
@@ -123,7 +123,7 @@ public:
|
|||||||
uint32_t partition_limit, CompactedMutationsConsumer consumer)
|
uint32_t partition_limit, CompactedMutationsConsumer consumer)
|
||||||
: _schema(s)
|
: _schema(s)
|
||||||
, _query_time(query_time)
|
, _query_time(query_time)
|
||||||
, _gc_before(query_time - s.gc_grace_seconds())
|
, _gc_before(saturating_subtract(query_time, s.gc_grace_seconds()))
|
||||||
, _can_gc(always_gc)
|
, _can_gc(always_gc)
|
||||||
, _slice(slice)
|
, _slice(slice)
|
||||||
, _row_limit(limit)
|
, _row_limit(limit)
|
||||||
@@ -139,7 +139,7 @@ public:
|
|||||||
std::function<api::timestamp_type(const dht::decorated_key&)> get_max_purgeable)
|
std::function<api::timestamp_type(const dht::decorated_key&)> get_max_purgeable)
|
||||||
: _schema(s)
|
: _schema(s)
|
||||||
, _query_time(compaction_time)
|
, _query_time(compaction_time)
|
||||||
, _gc_before(_query_time - s.gc_grace_seconds())
|
, _gc_before(saturating_subtract(_query_time, s.gc_grace_seconds()))
|
||||||
, _get_max_purgeable(std::move(get_max_purgeable))
|
, _get_max_purgeable(std::move(get_max_purgeable))
|
||||||
, _can_gc([this] (tombstone t) { return can_gc(t); })
|
, _can_gc([this] (tombstone t) { return can_gc(t); })
|
||||||
, _slice(query::full_slice)
|
, _slice(query::full_slice)
|
||||||
|
|||||||
@@ -1183,7 +1183,7 @@ uint32_t mutation_partition::do_compact(const schema& s,
|
|||||||
{
|
{
|
||||||
assert(row_limit > 0);
|
assert(row_limit > 0);
|
||||||
|
|
||||||
auto gc_before = query_time - s.gc_grace_seconds();
|
auto gc_before = saturating_subtract(query_time, s.gc_grace_seconds());
|
||||||
|
|
||||||
auto should_purge_tombstone = [&] (const tombstone& t) {
|
auto should_purge_tombstone = [&] (const tombstone& t) {
|
||||||
return t.deletion_time < gc_before && can_gc(t);
|
return t.deletion_time < gc_before && can_gc(t);
|
||||||
@@ -1526,12 +1526,19 @@ bool row::compact_and_expire(const schema& s, column_kind kind, tombstone tomb,
|
|||||||
const column_definition& def = s.column_at(kind, id);
|
const column_definition& def = s.column_at(kind, id);
|
||||||
if (def.is_atomic()) {
|
if (def.is_atomic()) {
|
||||||
atomic_cell_view cell = c.as_atomic_cell();
|
atomic_cell_view cell = c.as_atomic_cell();
|
||||||
|
auto can_erase_cell = [&] {
|
||||||
|
return cell.deletion_time() < gc_before && can_gc(tombstone(cell.timestamp(), cell.deletion_time()));
|
||||||
|
};
|
||||||
|
|
||||||
if (cell.is_covered_by(tomb, def.is_counter())) {
|
if (cell.is_covered_by(tomb, def.is_counter())) {
|
||||||
erase = true;
|
erase = true;
|
||||||
} else if (cell.has_expired(query_time)) {
|
} else if (cell.has_expired(query_time)) {
|
||||||
c = atomic_cell::make_dead(cell.timestamp(), cell.deletion_time());
|
erase = can_erase_cell();
|
||||||
|
if (!erase) {
|
||||||
|
c = atomic_cell::make_dead(cell.timestamp(), cell.deletion_time());
|
||||||
|
}
|
||||||
} else if (!cell.is_live()) {
|
} else if (!cell.is_live()) {
|
||||||
erase = cell.deletion_time() < gc_before && can_gc(tombstone(cell.timestamp(), cell.deletion_time()));
|
erase = can_erase_cell();
|
||||||
} else {
|
} else {
|
||||||
any_live |= true;
|
any_live |= true;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -63,8 +63,14 @@ auto write_counter_cell(Writer&& writer, const atomic_cell& c)
|
|||||||
counter_cell_view ccv(c);
|
counter_cell_view ccv(c);
|
||||||
auto shards = std::move(value).start_value_counter_cell_full()
|
auto shards = std::move(value).start_value_counter_cell_full()
|
||||||
.start_shards();
|
.start_shards();
|
||||||
for (auto csv : ccv.shards()) {
|
if (service::get_local_storage_service().cluster_supports_correct_counter_order()) {
|
||||||
shards.add_shards(counter_shard(csv));
|
for (auto csv : ccv.shards()) {
|
||||||
|
shards.add_shards(counter_shard(csv));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for (auto& cs : ccv.shards_compatible_with_1_7_4()) {
|
||||||
|
shards.add_shards(cs);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return std::move(shards).end_shards().end_counter_cell_full();
|
return std::move(shards).end_shards().end_counter_cell_full();
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -73,8 +73,9 @@ atomic_cell read_atomic_cell(atomic_cell_variant cv)
|
|||||||
// TODO: a lot of copying for something called view
|
// TODO: a lot of copying for something called view
|
||||||
counter_cell_builder ccb; // we know the final number of shards
|
counter_cell_builder ccb; // we know the final number of shards
|
||||||
for (auto csv : ccv.shards()) {
|
for (auto csv : ccv.shards()) {
|
||||||
ccb.add_shard(counter_shard(csv));
|
ccb.add_maybe_unsorted_shard(counter_shard(csv));
|
||||||
}
|
}
|
||||||
|
ccb.sort_and_remove_duplicates();
|
||||||
return ccb.build(_created_at);
|
return ccb.build(_created_at);
|
||||||
}
|
}
|
||||||
atomic_cell operator()(ser::counter_cell_update_view& ccv) const {
|
atomic_cell operator()(ser::counter_cell_update_view& ccv) const {
|
||||||
|
|||||||
@@ -282,10 +282,12 @@ private:
|
|||||||
public:
|
public:
|
||||||
multi_range_mutation_reader(schema_ptr s, mutation_source source, const ranges_vector& ranges,
|
multi_range_mutation_reader(schema_ptr s, mutation_source source, const ranges_vector& ranges,
|
||||||
const query::partition_slice& slice, const io_priority_class& pc,
|
const query::partition_slice& slice, const io_priority_class& pc,
|
||||||
tracing::trace_state_ptr trace_state)
|
tracing::trace_state_ptr trace_state,
|
||||||
|
mutation_reader::forwarding fwd_mr)
|
||||||
: _ranges(ranges)
|
: _ranges(ranges)
|
||||||
, _current_range(_ranges.begin())
|
, _current_range(_ranges.begin())
|
||||||
, _reader(source(s, *_current_range, slice, pc, trace_state))
|
, _reader(source(s, *_current_range, slice, pc, trace_state,
|
||||||
|
_ranges.size() > 1 ? mutation_reader::forwarding::yes : fwd_mr))
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -317,8 +319,9 @@ public:
|
|||||||
mutation_reader
|
mutation_reader
|
||||||
make_multi_range_reader(schema_ptr s, mutation_source source, const dht::partition_range_vector& ranges,
|
make_multi_range_reader(schema_ptr s, mutation_source source, const dht::partition_range_vector& ranges,
|
||||||
const query::partition_slice& slice, const io_priority_class& pc,
|
const query::partition_slice& slice, const io_priority_class& pc,
|
||||||
tracing::trace_state_ptr trace_state)
|
tracing::trace_state_ptr trace_state,
|
||||||
|
mutation_reader::forwarding fwd_mr)
|
||||||
{
|
{
|
||||||
return make_mutation_reader<multi_range_mutation_reader>(std::move(s), std::move(source), ranges,
|
return make_mutation_reader<multi_range_mutation_reader>(std::move(s), std::move(source), ranges,
|
||||||
slice, pc, std::move(trace_state));
|
slice, pc, std::move(trace_state), fwd_mr);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -50,6 +50,20 @@
|
|||||||
// not be the optimal object to use here.
|
// not be the optimal object to use here.
|
||||||
class mutation_reader final {
|
class mutation_reader final {
|
||||||
public:
|
public:
|
||||||
|
// mutation_reader::forwarding determines whether fast_forward_to() may
|
||||||
|
// be used on the mutation reader to change the partition range being
|
||||||
|
// read. Enabling forwarding also changes read policy: forwarding::no
|
||||||
|
// means we will stop reading from disk at the end of the given range,
|
||||||
|
// but with forwarding::yes we may read ahead, anticipating the user to
|
||||||
|
// make a small skip with fast_forward_to() and continuing to read.
|
||||||
|
//
|
||||||
|
// Note that mutation_reader::forwarding is similarly name but different
|
||||||
|
// from streamed_mutation::forwarding - the former is about skipping to
|
||||||
|
// a different partition range, while the latter is about skipping
|
||||||
|
// inside a large partition.
|
||||||
|
class forwarding_tag;
|
||||||
|
using forwarding = bool_class<forwarding_tag>;
|
||||||
|
|
||||||
class impl {
|
class impl {
|
||||||
public:
|
public:
|
||||||
virtual ~impl() {}
|
virtual ~impl() {}
|
||||||
@@ -253,34 +267,45 @@ future<> consume(mutation_reader& reader, Consumer consumer) {
|
|||||||
class mutation_source {
|
class mutation_source {
|
||||||
using partition_range = const dht::partition_range&;
|
using partition_range = const dht::partition_range&;
|
||||||
using io_priority = const io_priority_class&;
|
using io_priority = const io_priority_class&;
|
||||||
std::function<mutation_reader(schema_ptr, partition_range, const query::partition_slice&, io_priority, tracing::trace_state_ptr)> _fn;
|
using func_type = std::function<mutation_reader(schema_ptr,
|
||||||
|
partition_range,
|
||||||
|
const query::partition_slice&,
|
||||||
|
io_priority,
|
||||||
|
tracing::trace_state_ptr,
|
||||||
|
mutation_reader::forwarding
|
||||||
|
)>;
|
||||||
|
// We could have our own version of std::function<> that is nothrow
|
||||||
|
// move constructible and save some indirection and allocation.
|
||||||
|
// Probably not worth the effort though.
|
||||||
|
std::unique_ptr<func_type> _fn;
|
||||||
|
private:
|
||||||
|
mutation_source() = default;
|
||||||
|
explicit operator bool() const { return bool(_fn); }
|
||||||
|
friend class optimized_optional<mutation_source>;
|
||||||
public:
|
public:
|
||||||
mutation_source(std::function<mutation_reader(schema_ptr, partition_range, const query::partition_slice&, io_priority, tracing::trace_state_ptr)> fn)
|
mutation_source(func_type fn) : _fn(std::make_unique<func_type>(std::move(fn))) {}
|
||||||
: _fn(std::move(fn)) {}
|
|
||||||
mutation_source(std::function<mutation_reader(schema_ptr, partition_range, const query::partition_slice&, io_priority)> fn)
|
|
||||||
: _fn([fn = std::move(fn)] (schema_ptr s, partition_range range, const query::partition_slice& slice, io_priority pc, tracing::trace_state_ptr) {
|
|
||||||
return fn(s, range, slice, pc);
|
|
||||||
}) {}
|
|
||||||
mutation_source(std::function<mutation_reader(schema_ptr, partition_range, const query::partition_slice&)> fn)
|
mutation_source(std::function<mutation_reader(schema_ptr, partition_range, const query::partition_slice&)> fn)
|
||||||
: _fn([fn = std::move(fn)] (schema_ptr s, partition_range range, const query::partition_slice& slice, io_priority, tracing::trace_state_ptr) {
|
: _fn(std::make_unique<func_type>([fn = std::move(fn)] (schema_ptr s, partition_range range, const query::partition_slice& slice, io_priority, tracing::trace_state_ptr, mutation_reader::forwarding) {
|
||||||
return fn(s, range, slice);
|
return fn(s, range, slice);
|
||||||
}) {}
|
})) {}
|
||||||
mutation_source(std::function<mutation_reader(schema_ptr, partition_range range)> fn)
|
mutation_source(std::function<mutation_reader(schema_ptr, partition_range range)> fn)
|
||||||
: _fn([fn = std::move(fn)] (schema_ptr s, partition_range range, const query::partition_slice&, io_priority, tracing::trace_state_ptr) {
|
: _fn(std::make_unique<func_type>([fn = std::move(fn)] (schema_ptr s, partition_range range, const query::partition_slice&, io_priority, tracing::trace_state_ptr, mutation_reader::forwarding) {
|
||||||
return fn(s, range);
|
return fn(s, range);
|
||||||
}) {}
|
})) {}
|
||||||
|
mutation_source(const mutation_source& other)
|
||||||
mutation_reader operator()(schema_ptr s, partition_range range, const query::partition_slice& slice, io_priority pc, tracing::trace_state_ptr trace_state) const {
|
: _fn(std::make_unique<func_type>(*other._fn)) { }
|
||||||
return _fn(std::move(s), range, slice, pc, std::move(trace_state));
|
mutation_source& operator=(const mutation_source& other) {
|
||||||
|
_fn = std::make_unique<func_type>(*other._fn);
|
||||||
|
return *this;
|
||||||
}
|
}
|
||||||
mutation_reader operator()(schema_ptr s, partition_range range, const query::partition_slice& slice, io_priority pc) const {
|
mutation_source(mutation_source&&) = default;
|
||||||
return _fn(std::move(s), range, slice, pc, nullptr);
|
mutation_source& operator=(mutation_source&&) = default;
|
||||||
}
|
mutation_reader operator()(schema_ptr s, partition_range range,
|
||||||
mutation_reader operator()(schema_ptr s, partition_range range, const query::partition_slice& slice) const {
|
const query::partition_slice& slice = query::full_slice,
|
||||||
return _fn(std::move(s), range, slice, default_priority_class(), nullptr);
|
io_priority pc = default_priority_class(),
|
||||||
}
|
tracing::trace_state_ptr trace_state = nullptr,
|
||||||
mutation_reader operator()(schema_ptr s, partition_range range) const {
|
mutation_reader::forwarding fwd_mr = mutation_reader::forwarding::yes) const {
|
||||||
return _fn(std::move(s), range, query::full_slice, default_priority_class(), nullptr);
|
return (*_fn)(std::move(s), range, slice, pc, trace_state, fwd_mr);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -430,4 +455,5 @@ stable_flattened_mutations_consumer<FlattenedConsumer> make_stable_flattened_mut
|
|||||||
mutation_reader
|
mutation_reader
|
||||||
make_multi_range_reader(schema_ptr s, mutation_source source, const dht::partition_range_vector& ranges,
|
make_multi_range_reader(schema_ptr s, mutation_source source, const dht::partition_range_vector& ranges,
|
||||||
const query::partition_slice& slice, const io_priority_class& pc = default_priority_class(),
|
const query::partition_slice& slice, const io_priority_class& pc = default_priority_class(),
|
||||||
tracing::trace_state_ptr trace_state = nullptr);
|
tracing::trace_state_ptr trace_state = nullptr,
|
||||||
|
mutation_reader::forwarding fwd_mr = mutation_reader::forwarding::yes);
|
||||||
|
|||||||
@@ -345,7 +345,7 @@ public:
|
|||||||
: _w(std::move(w))
|
: _w(std::move(w))
|
||||||
, _row_count(c)
|
, _row_count(c)
|
||||||
, _short_read(sr)
|
, _short_read(sr)
|
||||||
, _memory_tracker(std::move(_memory_tracker))
|
, _memory_tracker(std::move(memory_tracker))
|
||||||
, _partition_count(pc)
|
, _partition_count(pc)
|
||||||
{
|
{
|
||||||
w.reduce_chunk_count();
|
w.reduce_chunk_count();
|
||||||
|
|||||||
19
range.hh
19
range.hh
@@ -601,13 +601,13 @@ private:
|
|||||||
struct built_in_ : std_ {};
|
struct built_in_ : std_ {};
|
||||||
|
|
||||||
template<typename Range, typename LessComparator,
|
template<typename Range, typename LessComparator,
|
||||||
typename = decltype(&std::remove_reference<Range>::type::lower_bound)>
|
typename = decltype(std::declval<Range>().lower_bound(std::declval<T>(), std::declval<LessComparator>()))>
|
||||||
typename std::remove_reference<Range>::type::const_iterator do_lower_bound(const T& value, Range&& r, LessComparator&& cmp, built_in_) const {
|
typename std::remove_reference<Range>::type::const_iterator do_lower_bound(const T& value, Range&& r, LessComparator&& cmp, built_in_) const {
|
||||||
return r.lower_bound(value, std::forward<LessComparator>(cmp));
|
return r.lower_bound(value, std::forward<LessComparator>(cmp));
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename Range, typename LessComparator,
|
template<typename Range, typename LessComparator,
|
||||||
typename = decltype(&std::remove_reference<Range>::type::upper_bound)>
|
typename = decltype(std::declval<Range>().upper_bound(std::declval<T>(), std::declval<LessComparator>()))>
|
||||||
typename std::remove_reference<Range>::type::const_iterator do_upper_bound(const T& value, Range&& r, LessComparator&& cmp, built_in_) const {
|
typename std::remove_reference<Range>::type::const_iterator do_upper_bound(const T& value, Range&& r, LessComparator&& cmp, built_in_) const {
|
||||||
return r.upper_bound(value, std::forward<LessComparator>(cmp));
|
return r.upper_bound(value, std::forward<LessComparator>(cmp));
|
||||||
}
|
}
|
||||||
@@ -649,6 +649,21 @@ public:
|
|||||||
return boost::make_iterator_range(lower_bound(range, cmp), upper_bound(range, cmp));
|
return boost::make_iterator_range(lower_bound(range, cmp), upper_bound(range, cmp));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Returns the intersection between this range and other.
|
||||||
|
template<typename Comparator>
|
||||||
|
stdx::optional<nonwrapping_range> intersection(const nonwrapping_range& other, Comparator&& cmp) const {
|
||||||
|
auto p = std::minmax(_range, other._range, [&cmp] (auto&& a, auto&& b) {
|
||||||
|
return wrapping_range<T>::less_than(a.start_bound(), b.start_bound(), cmp);
|
||||||
|
});
|
||||||
|
if (wrapping_range<T>::greater_than_or_equal(p.first.end_bound(), p.second.start_bound(), cmp)) {
|
||||||
|
auto end = std::min(p.first.end_bound(), p.second.end_bound(), [&cmp] (auto&& a, auto&& b) {
|
||||||
|
return !wrapping_range<T>::greater_than_or_equal(a, b, cmp);
|
||||||
|
});
|
||||||
|
return nonwrapping_range(p.second.start(), end.b);
|
||||||
|
}
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
template<typename U>
|
template<typename U>
|
||||||
friend std::ostream& operator<<(std::ostream& out, const nonwrapping_range<U>& r);
|
friend std::ostream& operator<<(std::ostream& out, const nonwrapping_range<U>& r);
|
||||||
};
|
};
|
||||||
|
|||||||
76
repair/range_split.hh
Normal file
76
repair/range_split.hh
Normal file
@@ -0,0 +1,76 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (C) 2017 ScyllaDB
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This file is part of Scylla.
|
||||||
|
*
|
||||||
|
* Scylla is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU Affero General Public License as published by
|
||||||
|
* the Free Software Foundation, either version 3 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* Scylla is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <stack>
|
||||||
|
|
||||||
|
#include "dht/i_partitioner.hh"
|
||||||
|
|
||||||
|
// range_splitter(r, N, K) is a helper for splitting a given token_range r of
|
||||||
|
// estimated size N into many small ranges of size K, and later iterating
|
||||||
|
// over those small ranges once with the has_next() and next() methods.
|
||||||
|
// This implementation assumes only the availability of a range::midpoint()
|
||||||
|
// operation, and as result creates ranges with size between K/2 and K.
|
||||||
|
// Moreover, it has memory requirement log(N). With more general arithmetic
|
||||||
|
// support over tokens, we could get exactly K and O(1) memory.
|
||||||
|
class range_splitter {
|
||||||
|
std::stack<std::pair<::dht::token_range, float>> _stack;
|
||||||
|
uint64_t _desired;
|
||||||
|
public:
|
||||||
|
range_splitter(::dht::token_range r, uint64_t N, uint64_t K) {
|
||||||
|
_stack.push({r, N});
|
||||||
|
_desired = K;
|
||||||
|
}
|
||||||
|
bool has_next() const {
|
||||||
|
return !_stack.empty();
|
||||||
|
}
|
||||||
|
::dht::token_range next() {
|
||||||
|
// If the head range's estimated size is small enough, return it.
|
||||||
|
// Otherwise split it to two halves, push the second half on the
|
||||||
|
// stack, and repeat with the first half. May need to do this more
|
||||||
|
// than once (up to log(N/K) times) until we have one range small
|
||||||
|
// enough to return.
|
||||||
|
assert(!_stack.empty());
|
||||||
|
auto range = _stack.top().first;
|
||||||
|
auto size = _stack.top().second;
|
||||||
|
_stack.pop();
|
||||||
|
while (size > _desired) {
|
||||||
|
// The use of minimum_token() here twice is not a typo - because wrap-
|
||||||
|
// around token ranges are supported by midpoint(), the beyond-maximum
|
||||||
|
// token can also be represented by minimum_token().
|
||||||
|
auto midpoint = dht::global_partitioner().midpoint(
|
||||||
|
range.start() ? range.start()->value() : dht::minimum_token(),
|
||||||
|
range.end() ? range.end()->value() : dht::minimum_token());
|
||||||
|
// This shouldn't happen, but if the range included just one token, we
|
||||||
|
// can't split further (split() may actually fail with assertion failure)
|
||||||
|
if ((range.start() && midpoint == range.start()->value()) ||
|
||||||
|
(range.end() && midpoint == range.end()->value())) {
|
||||||
|
return range;
|
||||||
|
}
|
||||||
|
auto halves = range.split(midpoint, dht::token_comparator());
|
||||||
|
_stack.push({halves.second, size / 2.0});
|
||||||
|
range = halves.first;
|
||||||
|
size /= 2.0;
|
||||||
|
}
|
||||||
|
return range;
|
||||||
|
}
|
||||||
|
};
|
||||||
327
repair/repair.cc
327
repair/repair.cc
@@ -20,6 +20,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#include "repair.hh"
|
#include "repair.hh"
|
||||||
|
#include "range_split.hh"
|
||||||
|
|
||||||
#include "streaming/stream_plan.hh"
|
#include "streaming/stream_plan.hh"
|
||||||
#include "streaming/stream_state.hh"
|
#include "streaming/stream_state.hh"
|
||||||
@@ -40,11 +41,6 @@
|
|||||||
|
|
||||||
static logging::logger logger("repair");
|
static logging::logger logger("repair");
|
||||||
|
|
||||||
struct failed_range {
|
|
||||||
sstring cf;
|
|
||||||
::dht::token_range range;
|
|
||||||
};
|
|
||||||
|
|
||||||
class repair_info {
|
class repair_info {
|
||||||
public:
|
public:
|
||||||
seastar::sharded<database>& db;
|
seastar::sharded<database>& db;
|
||||||
@@ -52,15 +48,25 @@ public:
|
|||||||
dht::token_range_vector ranges;
|
dht::token_range_vector ranges;
|
||||||
std::vector<sstring> cfs;
|
std::vector<sstring> cfs;
|
||||||
int id;
|
int id;
|
||||||
|
shard_id shard;
|
||||||
std::vector<sstring> data_centers;
|
std::vector<sstring> data_centers;
|
||||||
std::vector<sstring> hosts;
|
std::vector<sstring> hosts;
|
||||||
std::vector<failed_range> failed_ranges;
|
size_t nr_failed_ranges = 0;
|
||||||
streaming::stream_plan sp_in;
|
// Map of peer -> <cf, ranges>
|
||||||
streaming::stream_plan sp_out;
|
std::unordered_map<gms::inet_address, std::unordered_map<sstring, dht::token_range_vector>> ranges_need_repair_in;
|
||||||
|
std::unordered_map<gms::inet_address, std::unordered_map<sstring, dht::token_range_vector>> ranges_need_repair_out;
|
||||||
// FIXME: this "100" needs to be a parameter.
|
// FIXME: this "100" needs to be a parameter.
|
||||||
uint64_t target_partitions = 100;
|
uint64_t target_partitions = 100;
|
||||||
// FIXME: this "10 * 1024 * 1024" needs to be a parameter.
|
// This affects how many ranges we put in a stream plan. The more the more
|
||||||
size_t sub_ranges_max = 10 * 1024 * 1024;
|
// memory we use to store the ranges in memory. However, it can reduce the
|
||||||
|
// total number of stream_plan we use for the repair.
|
||||||
|
size_t sub_ranges_to_stream = 10 * 1024;
|
||||||
|
size_t sp_index = 0;
|
||||||
|
size_t current_sub_ranges_nr_in = 0;
|
||||||
|
size_t current_sub_ranges_nr_out = 0;
|
||||||
|
int ranges_index = 0;
|
||||||
|
// Only allow one stream_plan in flight
|
||||||
|
semaphore sp_parallelism_semaphore{1};
|
||||||
public:
|
public:
|
||||||
repair_info(seastar::sharded<database>& db_,
|
repair_info(seastar::sharded<database>& db_,
|
||||||
const sstring& keyspace_,
|
const sstring& keyspace_,
|
||||||
@@ -74,42 +80,81 @@ public:
|
|||||||
, ranges(ranges_)
|
, ranges(ranges_)
|
||||||
, cfs(cfs_)
|
, cfs(cfs_)
|
||||||
, id(id_)
|
, id(id_)
|
||||||
|
, shard(engine().cpu_id())
|
||||||
, data_centers(data_centers_)
|
, data_centers(data_centers_)
|
||||||
, hosts(hosts_)
|
, hosts(hosts_) {
|
||||||
, sp_in(streaming::stream_plan(sprint("repair-in-%d", id)))
|
|
||||||
, sp_out(streaming::stream_plan(sprint("repair-out-%d", id))) {
|
|
||||||
|
|
||||||
}
|
}
|
||||||
future<> do_streaming() {
|
future<> do_streaming() {
|
||||||
return sp_in.execute().discard_result().then([this] {
|
size_t ranges_in = 0;
|
||||||
return sp_out.execute().discard_result();
|
size_t ranges_out = 0;
|
||||||
|
auto sp_in = make_lw_shared<streaming::stream_plan>(sprint("repair-in-id-%d-shard-%d-index-%d", id, shard, sp_index));
|
||||||
|
auto sp_out = make_lw_shared<streaming::stream_plan>(sprint("repair-out-id-%d-shard-%d-index-%d", id, shard, sp_index));
|
||||||
|
|
||||||
|
for (auto& x : ranges_need_repair_in) {
|
||||||
|
auto& peer = x.first;
|
||||||
|
for (auto& y : x.second) {
|
||||||
|
auto& cf = y.first;
|
||||||
|
auto& stream_ranges = y.second;
|
||||||
|
ranges_in += stream_ranges.size();
|
||||||
|
sp_in->request_ranges(peer, keyspace, std::move(stream_ranges), {cf});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ranges_need_repair_in.clear();
|
||||||
|
current_sub_ranges_nr_in = 0;
|
||||||
|
|
||||||
|
for (auto& x : ranges_need_repair_out) {
|
||||||
|
auto& peer = x.first;
|
||||||
|
for (auto& y : x.second) {
|
||||||
|
auto& cf = y.first;
|
||||||
|
auto& stream_ranges = y.second;
|
||||||
|
ranges_out += stream_ranges.size();
|
||||||
|
sp_out->transfer_ranges(peer, keyspace, std::move(stream_ranges), {cf});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ranges_need_repair_out.clear();
|
||||||
|
current_sub_ranges_nr_out = 0;
|
||||||
|
|
||||||
|
if (ranges_in || ranges_out) {
|
||||||
|
logger.info("Start streaming for repair id={}, shard={}, index={}, ranges_in={}, ranges_out={}", id, shard, sp_index, ranges_in, ranges_out);
|
||||||
|
}
|
||||||
|
sp_index++;
|
||||||
|
|
||||||
|
return sp_in->execute().discard_result().then([sp_in, sp_out] {
|
||||||
|
return sp_out->execute().discard_result();
|
||||||
}).handle_exception([] (auto ep) {
|
}).handle_exception([] (auto ep) {
|
||||||
logger.warn("repair's stream failed: {}", ep);
|
logger.warn("repair's stream failed: {}", ep);
|
||||||
return make_exception_future(ep);
|
return make_exception_future(ep);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
bool check_failed_ranges() {
|
void check_failed_ranges() {
|
||||||
if (failed_ranges.empty()) {
|
if (nr_failed_ranges) {
|
||||||
logger.info("repair {} completed successfully", id);
|
logger.info("repair {} on shard {} failed - {} ranges failed", id, shard, nr_failed_ranges);
|
||||||
return true;
|
throw std::runtime_error(sprint("repair %d on shard %d failed to do checksum for %d sub ranges", id, shard, nr_failed_ranges));
|
||||||
} else {
|
} else {
|
||||||
for (auto& frange: failed_ranges) {
|
logger.info("repair {} on shard {} completed successfully", id, shard);
|
||||||
logger.debug("repair cf {} range {} failed", frange.cf, frange.range);
|
|
||||||
}
|
|
||||||
logger.info("repair {} failed - {} ranges failed", id, failed_ranges.size());
|
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
void request_transfer_ranges(const sstring& cf,
|
future<> request_transfer_ranges(const sstring& cf,
|
||||||
const ::dht::token_range& range,
|
const ::dht::token_range& range,
|
||||||
const std::vector<gms::inet_address>& neighbors_in,
|
const std::vector<gms::inet_address>& neighbors_in,
|
||||||
const std::vector<gms::inet_address>& neighbors_out) {
|
const std::vector<gms::inet_address>& neighbors_out) {
|
||||||
for (const auto& peer : neighbors_in) {
|
logger.debug("Add cf {}, range {}, current_sub_ranges_nr_in {}, current_sub_ranges_nr_out {}", cf, range, current_sub_ranges_nr_in, current_sub_ranges_nr_out);
|
||||||
sp_in.request_ranges(peer, keyspace, {range}, {cf});
|
return sp_parallelism_semaphore.wait(1).then([this, cf, range, neighbors_in, neighbors_out] {
|
||||||
}
|
for (const auto& peer : neighbors_in) {
|
||||||
for (const auto& peer : neighbors_out) {
|
ranges_need_repair_in[peer][cf].emplace_back(range);
|
||||||
sp_out.transfer_ranges(peer, keyspace, {range}, {cf});
|
current_sub_ranges_nr_in++;
|
||||||
}
|
}
|
||||||
|
for (const auto& peer : neighbors_out) {
|
||||||
|
ranges_need_repair_out[peer][cf].emplace_back(range);
|
||||||
|
current_sub_ranges_nr_out++;
|
||||||
|
}
|
||||||
|
if (current_sub_ranges_nr_in >= sub_ranges_to_stream || current_sub_ranges_nr_out >= sub_ranges_to_stream) {
|
||||||
|
return do_streaming();
|
||||||
|
}
|
||||||
|
return make_ready_future<>();
|
||||||
|
}).finally([this] {
|
||||||
|
sp_parallelism_semaphore.signal(1);
|
||||||
|
});
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -268,7 +313,7 @@ static std::vector<gms::inet_address> get_neighbors(database& db,
|
|||||||
// be queried about more than once (FIXME: reconsider this. But note that
|
// be queried about more than once (FIXME: reconsider this. But note that
|
||||||
// failed repairs should be rare anwyay).
|
// failed repairs should be rare anwyay).
|
||||||
// This object is not thread safe, and must be used by only one cpu.
|
// This object is not thread safe, and must be used by only one cpu.
|
||||||
static class {
|
class tracker {
|
||||||
private:
|
private:
|
||||||
// Each repair_start() call returns a unique int which the user can later
|
// Each repair_start() call returns a unique int which the user can later
|
||||||
// use to follow the status of this repair with repair_status().
|
// use to follow the status of this repair with repair_status().
|
||||||
@@ -281,7 +326,11 @@ private:
|
|||||||
std::unordered_map<int, repair_status> _status;
|
std::unordered_map<int, repair_status> _status;
|
||||||
// Used to allow shutting down repairs in progress, and waiting for them.
|
// Used to allow shutting down repairs in progress, and waiting for them.
|
||||||
seastar::gate _gate;
|
seastar::gate _gate;
|
||||||
|
// Set when the repair service is being shutdown
|
||||||
|
std::atomic_bool _shutdown alignas(64);
|
||||||
public:
|
public:
|
||||||
|
tracker() : _shutdown(false) {
|
||||||
|
}
|
||||||
void start(int id) {
|
void start(int id) {
|
||||||
_gate.enter();
|
_gate.enter();
|
||||||
_status[id] = repair_status::RUNNING;
|
_status[id] = repair_status::RUNNING;
|
||||||
@@ -309,17 +358,19 @@ public:
|
|||||||
return _next_repair_command++;
|
return _next_repair_command++;
|
||||||
}
|
}
|
||||||
future<> shutdown() {
|
future<> shutdown() {
|
||||||
|
_shutdown.store(true, std::memory_order_relaxed);
|
||||||
return _gate.close();
|
return _gate.close();
|
||||||
}
|
}
|
||||||
void check_in_shutdown() {
|
void check_in_shutdown() {
|
||||||
_gate.check();
|
if (_shutdown.load(std::memory_order_relaxed)) {
|
||||||
|
throw std::runtime_error(sprint("Repair service is being shutdown"));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} repair_tracker;
|
};
|
||||||
|
|
||||||
|
static tracker repair_tracker;
|
||||||
|
|
||||||
static void check_in_shutdown() {
|
static void check_in_shutdown() {
|
||||||
// Only call this from the single CPU managing the repair - the only CPU
|
|
||||||
// which is allowed to use repair_tracker.
|
|
||||||
assert(engine().cpu_id() == 0);
|
|
||||||
repair_tracker.check_in_shutdown();
|
repair_tracker.check_in_shutdown();
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -445,6 +496,19 @@ static future<partition_checksum> checksum_range_shard(database &db,
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// It is counter-productive to allow a large number of range checksum
|
||||||
|
// operations to proceed in parallel (on the same shard), because the read
|
||||||
|
// operation can already parallelize itself as much as needed, and doing
|
||||||
|
// multiple reads in parallel just adds a lot of memory overheads.
|
||||||
|
// So checksum_parallelism_semaphore is used to limit this parallelism,
|
||||||
|
// and should be set to 1, or another small number.
|
||||||
|
//
|
||||||
|
// Note that checksumming_parallelism_semaphore applies not just in the
|
||||||
|
// repair master, but also in the slave: The repair slave may receive many
|
||||||
|
// checksum requests in parallel, but will only work on one or a few
|
||||||
|
// (checksum_parallelism_semaphore) at once.
|
||||||
|
static thread_local semaphore checksum_parallelism_semaphore(2);
|
||||||
|
|
||||||
// Calculate the checksum of the data held on all shards of a column family,
|
// Calculate the checksum of the data held on all shards of a column family,
|
||||||
// in the given token range.
|
// in the given token range.
|
||||||
// In practice, we only need to consider one or two shards which intersect the
|
// In practice, we only need to consider one or two shards which intersect the
|
||||||
@@ -467,7 +531,9 @@ future<partition_checksum> checksum_range(seastar::sharded<database> &db,
|
|||||||
auto& prs = shard_range.second;
|
auto& prs = shard_range.second;
|
||||||
return db.invoke_on(shard, [keyspace, cf, prs = std::move(prs), hash_version] (database& db) mutable {
|
return db.invoke_on(shard, [keyspace, cf, prs = std::move(prs), hash_version] (database& db) mutable {
|
||||||
return do_with(std::move(keyspace), std::move(cf), std::move(prs), [&db, hash_version] (auto& keyspace, auto& cf, auto& prs) {
|
return do_with(std::move(keyspace), std::move(cf), std::move(prs), [&db, hash_version] (auto& keyspace, auto& cf, auto& prs) {
|
||||||
return checksum_range_shard(db, keyspace, cf, prs, hash_version);
|
return with_semaphore(checksum_parallelism_semaphore, 1, [&db, hash_version, &keyspace, &cf, &prs] {
|
||||||
|
return checksum_range_shard(db, keyspace, cf, prs, hash_version);
|
||||||
|
});
|
||||||
});
|
});
|
||||||
}).then([&result] (partition_checksum sum) {
|
}).then([&result] (partition_checksum sum) {
|
||||||
result.add(sum);
|
result.add(sum);
|
||||||
@@ -478,32 +544,15 @@ future<partition_checksum> checksum_range(seastar::sharded<database> &db,
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
static void split_and_add(std::vector<::dht::token_range>& ranges,
|
// parallelism_semaphore limits the number of parallel ongoing checksum
|
||||||
const dht::token_range& range,
|
// comparisons. This could mean, for example, that this number of checksum
|
||||||
uint64_t estimated_partitions, uint64_t target_partitions) {
|
// requests have been sent to other nodes and we are waiting for them to
|
||||||
if (estimated_partitions < target_partitions) {
|
// return so we can compare those to our own checksums. This limit can be
|
||||||
// We're done, the range is small enough to not be split further
|
// set fairly high because the outstanding comparisons take only few
|
||||||
ranges.push_back(range);
|
// resources. In particular, we do NOT do this number of file reads in
|
||||||
return;
|
// parallel because file reads have large memory overhads (read buffers,
|
||||||
}
|
// partitions, etc.) - the number of concurrent reads is further limited
|
||||||
// The use of minimum_token() here twice is not a typo - because wrap-
|
// by an additional semaphore checksum_parallelism_semaphore (see above).
|
||||||
// around token ranges are supported by midpoint(), the beyond-maximum
|
|
||||||
// token can also be represented by minimum_token().
|
|
||||||
auto midpoint = dht::global_partitioner().midpoint(
|
|
||||||
range.start() ? range.start()->value() : dht::minimum_token(),
|
|
||||||
range.end() ? range.end()->value() : dht::minimum_token());
|
|
||||||
auto halves = range.split(midpoint, dht::token_comparator());
|
|
||||||
ranges.push_back(halves.first);
|
|
||||||
ranges.push_back(halves.second);
|
|
||||||
}
|
|
||||||
// We don't need to wait for one checksum to finish before we start the
|
|
||||||
// next, but doing too many of these operations in parallel also doesn't
|
|
||||||
// make sense, so we limit the number of concurrent ongoing checksum
|
|
||||||
// requests with a semaphore.
|
|
||||||
//
|
|
||||||
// FIXME: We shouldn't use a magic number here, but rather bind it to
|
|
||||||
// some resource. Otherwise we'll be doing too little in some machines,
|
|
||||||
// and too much in others.
|
|
||||||
//
|
//
|
||||||
// FIXME: This would be better of in a repair service, or even a per-shard
|
// FIXME: This would be better of in a repair service, or even a per-shard
|
||||||
// repair instance holding all repair state. However, since we are anyway
|
// repair instance holding all repair state. However, since we are anyway
|
||||||
@@ -512,6 +561,24 @@ static void split_and_add(std::vector<::dht::token_range>& ranges,
|
|||||||
constexpr int parallelism = 100;
|
constexpr int parallelism = 100;
|
||||||
static thread_local semaphore parallelism_semaphore(parallelism);
|
static thread_local semaphore parallelism_semaphore(parallelism);
|
||||||
|
|
||||||
|
static future<uint64_t> estimate_partitions(seastar::sharded<database>& db, const sstring& keyspace,
|
||||||
|
const sstring& cf, const dht::token_range& range) {
|
||||||
|
return db.map_reduce0(
|
||||||
|
[keyspace, cf, range] (auto& db) {
|
||||||
|
// FIXME: column_family should have a method to estimate the number of
|
||||||
|
// partitions (and of course it should use cardinality estimation bitmaps,
|
||||||
|
// not trivial sum). We shouldn't have this ugly code here...
|
||||||
|
// FIXME: If sstables are shared, they will be accounted more than
|
||||||
|
// once. However, shared sstables should exist for a short-time only.
|
||||||
|
auto sstables = db.find_column_family(keyspace, cf).get_sstables();
|
||||||
|
return boost::accumulate(*sstables, uint64_t(0),
|
||||||
|
[&range] (uint64_t x, auto&& sst) { return x + sst->estimated_keys_for_range(range); });
|
||||||
|
},
|
||||||
|
uint64_t(0),
|
||||||
|
std::plus<uint64_t>()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
// Repair a single cf in a single local range.
|
// Repair a single cf in a single local range.
|
||||||
// Comparable to RepairJob in Origin.
|
// Comparable to RepairJob in Origin.
|
||||||
static future<> repair_cf_range(repair_info& ri,
|
static future<> repair_cf_range(repair_info& ri,
|
||||||
@@ -522,42 +589,15 @@ static future<> repair_cf_range(repair_info& ri,
|
|||||||
return make_ready_future<>();
|
return make_ready_future<>();
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<::dht::token_range> ranges;
|
return estimate_partitions(ri.db, ri.keyspace, cf, range).then([&ri, cf, range, &neighbors] (uint64_t estimated_partitions) {
|
||||||
ranges.push_back(range);
|
range_splitter ranges(range, estimated_partitions, ri.target_partitions);
|
||||||
|
|
||||||
// Additionally, we want to break up large ranges so they will have
|
|
||||||
// (approximately) a desired number of rows each.
|
|
||||||
// FIXME: column_family should have a method to estimate the number of
|
|
||||||
// partitions (and of course it should use cardinality estimation bitmaps,
|
|
||||||
// not trivial sum). We shouldn't have this ugly code here...
|
|
||||||
auto sstables = ri.db.local().find_column_family(ri.keyspace, cf).get_sstables();
|
|
||||||
uint64_t estimated_partitions = 0;
|
|
||||||
for (auto sst : *sstables) {
|
|
||||||
estimated_partitions += sst->estimated_keys_for_range(range);
|
|
||||||
}
|
|
||||||
|
|
||||||
// FIXME: we should have an on-the-fly iterator generator here, not
|
|
||||||
// fill a vector in advance.
|
|
||||||
std::vector<::dht::token_range> tosplit;
|
|
||||||
while (estimated_partitions > ri.target_partitions) {
|
|
||||||
tosplit.clear();
|
|
||||||
ranges.swap(tosplit);
|
|
||||||
for (const auto& range : tosplit) {
|
|
||||||
split_and_add(ranges, range, estimated_partitions, ri.target_partitions);
|
|
||||||
}
|
|
||||||
estimated_partitions /= 2;
|
|
||||||
if (ranges.size() >= ri.sub_ranges_max) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
logger.debug("target_partitions={}, estimated_partitions={}, ranges.size={}, range={} -> ranges={}",
|
|
||||||
ri.target_partitions, estimated_partitions, ranges.size(), range, ranges);
|
|
||||||
|
|
||||||
return do_with(seastar::gate(), true, std::move(cf), std::move(ranges),
|
return do_with(seastar::gate(), true, std::move(cf), std::move(ranges),
|
||||||
[&ri, &neighbors] (auto& completion, auto& success, const auto& cf, auto& ranges) {
|
[&ri, &neighbors] (auto& completion, auto& success, const auto& cf, auto& ranges) {
|
||||||
return do_for_each(ranges, [&ri, &completion, &success, &neighbors, &cf] (const auto& range) {
|
return do_until([&ranges] () { return !ranges.has_next(); },
|
||||||
|
[&ranges, &ri, &completion, &success, &neighbors, &cf] () {
|
||||||
|
auto range = ranges.next();
|
||||||
check_in_shutdown();
|
check_in_shutdown();
|
||||||
return parallelism_semaphore.wait(1).then([&ri, &completion, &success, &neighbors, &cf, &range] {
|
return parallelism_semaphore.wait(1).then([&ri, &completion, &success, &neighbors, &cf, range] {
|
||||||
auto checksum_type = service::get_local_storage_service().cluster_supports_large_partitions()
|
auto checksum_type = service::get_local_storage_service().cluster_supports_large_partitions()
|
||||||
? repair_checksum::streamed : repair_checksum::legacy;
|
? repair_checksum::streamed : repair_checksum::legacy;
|
||||||
|
|
||||||
@@ -575,7 +615,7 @@ static future<> repair_cf_range(repair_info& ri,
|
|||||||
|
|
||||||
completion.enter();
|
completion.enter();
|
||||||
when_all(checksums.begin(), checksums.end()).then(
|
when_all(checksums.begin(), checksums.end()).then(
|
||||||
[&ri, &cf, &range, &neighbors, &success]
|
[&ri, &cf, range, &neighbors, &success]
|
||||||
(std::vector<future<partition_checksum>> checksums) {
|
(std::vector<future<partition_checksum>> checksums) {
|
||||||
// If only some of the replicas of this range are alive,
|
// If only some of the replicas of this range are alive,
|
||||||
// we set success=false so repair will fail, but we can
|
// we set success=false so repair will fail, but we can
|
||||||
@@ -591,7 +631,7 @@ static future<> repair_cf_range(repair_info& ri,
|
|||||||
utils::fb_utilities::get_broadcast_address()),
|
utils::fb_utilities::get_broadcast_address()),
|
||||||
checksums[i].get_exception());
|
checksums[i].get_exception());
|
||||||
success = false;
|
success = false;
|
||||||
ri.failed_ranges.push_back(failed_range{cf, range});
|
ri.nr_failed_ranges++;
|
||||||
// Do not break out of the loop here, so we can log
|
// Do not break out of the loop here, so we can log
|
||||||
// (and discard) all the exceptions.
|
// (and discard) all the exceptions.
|
||||||
} else if (i > 0) {
|
} else if (i > 0) {
|
||||||
@@ -615,14 +655,24 @@ static future<> repair_cf_range(repair_info& ri,
|
|||||||
|
|
||||||
auto node_reducer = [] (std::vector<gms::inet_address>& live_neighbors_in_or_out,
|
auto node_reducer = [] (std::vector<gms::inet_address>& live_neighbors_in_or_out,
|
||||||
std::vector<gms::inet_address>& nodes_with_same_checksum, size_t nr_nodes_to_keep) {
|
std::vector<gms::inet_address>& nodes_with_same_checksum, size_t nr_nodes_to_keep) {
|
||||||
|
// nodes_with_same_checksum contains two types of nodes:
|
||||||
|
// 1) the nodes we want to remove from live_neighbors_in_or_out.
|
||||||
|
// 2) the nodes, nr_nodes_to_keep in number, not to remove from
|
||||||
|
// live_neighbors_in_or_out
|
||||||
auto nr_nodes = nodes_with_same_checksum.size();
|
auto nr_nodes = nodes_with_same_checksum.size();
|
||||||
if (nr_nodes <= nr_nodes_to_keep) {
|
if (nr_nodes <= nr_nodes_to_keep) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: Remove the "far" nodes and keep the "near" nodes
|
if (nr_nodes_to_keep == 0) {
|
||||||
// to have better streaming performance
|
// All nodes in nodes_with_same_checksum will be removed from live_neighbors_in_or_out
|
||||||
nodes_with_same_checksum.resize(nr_nodes - nr_nodes_to_keep);
|
} else if (nr_nodes_to_keep == 1) {
|
||||||
|
auto node_is_remote = [] (gms::inet_address ip) { return !service::get_local_storage_service().is_local_dc(ip); };
|
||||||
|
boost::partition(nodes_with_same_checksum, node_is_remote);
|
||||||
|
nodes_with_same_checksum.resize(nr_nodes - nr_nodes_to_keep);
|
||||||
|
} else {
|
||||||
|
throw std::runtime_error(sprint("nr_nodes_to_keep = {}, but it can only be 1 or 0", nr_nodes_to_keep));
|
||||||
|
}
|
||||||
|
|
||||||
// Now, nodes_with_same_checksum contains nodes we want to remove, remove it from live_neighbors_in_or_out
|
// Now, nodes_with_same_checksum contains nodes we want to remove, remove it from live_neighbors_in_or_out
|
||||||
auto it = boost::range::remove_if(live_neighbors_in_or_out, [&nodes_with_same_checksum] (const auto& ip) {
|
auto it = boost::range::remove_if(live_neighbors_in_or_out, [&nodes_with_same_checksum] (const auto& ip) {
|
||||||
@@ -694,20 +744,19 @@ static future<> repair_cf_range(repair_info& ri,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (!(live_neighbors_in.empty() && live_neighbors_out.empty())) {
|
if (!(live_neighbors_in.empty() && live_neighbors_out.empty())) {
|
||||||
logger.info("Found differing range {} on nodes {}, in = {}, out = {}", range,
|
logger.debug("Found differing range {} on nodes {}, in = {}, out = {}", range,
|
||||||
live_neighbors, live_neighbors_in, live_neighbors_out);
|
live_neighbors, live_neighbors_in, live_neighbors_out);
|
||||||
ri.request_transfer_ranges(cf, range, live_neighbors_in, live_neighbors_out);
|
return ri.request_transfer_ranges(cf, range, live_neighbors_in, live_neighbors_out);
|
||||||
return make_ready_future<>();
|
|
||||||
}
|
}
|
||||||
return make_ready_future<>();
|
return make_ready_future<>();
|
||||||
}).handle_exception([&ri, &success, &cf, &range] (std::exception_ptr eptr) {
|
}).handle_exception([&ri, &success, &cf, range] (std::exception_ptr eptr) {
|
||||||
// Something above (e.g., request_transfer_ranges) failed. We could
|
// Something above (e.g., request_transfer_ranges) failed. We could
|
||||||
// stop the repair immediately, or let it continue with
|
// stop the repair immediately, or let it continue with
|
||||||
// other ranges (at the moment, we do the latter). But in
|
// other ranges (at the moment, we do the latter). But in
|
||||||
// any case, we need to remember that the repair failed to
|
// any case, we need to remember that the repair failed to
|
||||||
// tell the caller.
|
// tell the caller.
|
||||||
success = false;
|
success = false;
|
||||||
ri.failed_ranges.push_back(failed_range{cf, range});
|
ri.nr_failed_ranges++;
|
||||||
logger.warn("Failed sync of range {}: {}", range, eptr);
|
logger.warn("Failed sync of range {}: {}", range, eptr);
|
||||||
}).finally([&completion] {
|
}).finally([&completion] {
|
||||||
parallelism_semaphore.signal(1);
|
parallelism_semaphore.signal(1);
|
||||||
@@ -727,6 +776,7 @@ static future<> repair_cf_range(repair_info& ri,
|
|||||||
});
|
});
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
// Repair a single local range, multiple column families.
|
// Repair a single local range, multiple column families.
|
||||||
@@ -945,22 +995,39 @@ private:
|
|||||||
// same nodes as replicas.
|
// same nodes as replicas.
|
||||||
static future<> repair_ranges(repair_info ri) {
|
static future<> repair_ranges(repair_info ri) {
|
||||||
return do_with(std::move(ri), [] (auto& ri) {
|
return do_with(std::move(ri), [] (auto& ri) {
|
||||||
#if 1
|
#if 0
|
||||||
// repair all the ranges in parallel
|
// repair all the ranges in parallel
|
||||||
return parallel_for_each(ri.ranges, [&ri] (auto&& range) {
|
return parallel_for_each(ri.ranges, [&ri] (auto&& range) {
|
||||||
#else
|
#else
|
||||||
// repair all the ranges in sequence
|
// repair all the ranges in sequence
|
||||||
return do_for_each(ri.ranges, [&ri] (auto&& range) {
|
return do_for_each(ri.ranges, [&ri] (auto&& range) {
|
||||||
#endif
|
#endif
|
||||||
check_in_shutdown();
|
ri.ranges_index++;
|
||||||
return repair_range(ri, range);
|
logger.info("Repair {} out of {} ranges, id={}, shard={}, keyspace={}, table={}, range={}",
|
||||||
|
ri.ranges_index, ri.ranges.size(), ri.id, ri.shard, ri.keyspace, ri.cfs, range);
|
||||||
|
return do_with(dht::selective_token_range_sharder(range, ri.shard), [&ri] (auto& sharder) {
|
||||||
|
return repeat([&ri, &sharder] () {
|
||||||
|
check_in_shutdown();
|
||||||
|
auto range_shard = sharder.next();
|
||||||
|
if (range_shard) {
|
||||||
|
return repair_range(ri, *range_shard).then([] {
|
||||||
|
return make_ready_future<stop_iteration>(stop_iteration::no);
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
return make_ready_future<stop_iteration>(stop_iteration::yes);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
}).then([&ri] {
|
}).then([&ri] {
|
||||||
|
// Do streaming for the remaining ranges we do not stream in
|
||||||
|
// repair_cf_range
|
||||||
return ri.do_streaming();
|
return ri.do_streaming();
|
||||||
}).then([&ri] {
|
}).then([&ri] {
|
||||||
repair_tracker.done(ri.id, ri.check_failed_ranges());
|
ri.check_failed_ranges();
|
||||||
|
return make_ready_future<>();
|
||||||
}).handle_exception([&ri] (std::exception_ptr eptr) {
|
}).handle_exception([&ri] (std::exception_ptr eptr) {
|
||||||
logger.info("repair {} failed - {}", ri.id, eptr);
|
logger.info("repair {} failed - {}", ri.id, eptr);
|
||||||
repair_tracker.done(ri.id, false);
|
return make_exception_future<>(std::move(eptr));
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@@ -982,9 +1049,12 @@ static int do_repair_start(seastar::sharded<database>& db, sstring keyspace,
|
|||||||
// yet. Real ids returned by next_repair_command() will be >= 1.
|
// yet. Real ids returned by next_repair_command() will be >= 1.
|
||||||
int id = repair_tracker.next_repair_command();
|
int id = repair_tracker.next_repair_command();
|
||||||
logger.info("starting user-requested repair for keyspace {}, repair id {}, options {}", keyspace, id, options_map);
|
logger.info("starting user-requested repair for keyspace {}, repair id {}, options {}", keyspace, id, options_map);
|
||||||
|
|
||||||
repair_tracker.start(id);
|
repair_tracker.start(id);
|
||||||
|
|
||||||
|
if (!gms::get_local_gossiper().is_normal(utils::fb_utilities::get_broadcast_address())) {
|
||||||
|
throw std::runtime_error("Node is not in NORMAL status yet!");
|
||||||
|
}
|
||||||
|
|
||||||
// If the "ranges" option is not explicitly specified, we repair all the
|
// If the "ranges" option is not explicitly specified, we repair all the
|
||||||
// local ranges (the token ranges for which this node holds a replica of).
|
// local ranges (the token ranges for which this node holds a replica of).
|
||||||
// Each of these ranges may have a different set of replicas, so the
|
// Each of these ranges may have a different set of replicas, so the
|
||||||
@@ -1057,8 +1127,33 @@ static int do_repair_start(seastar::sharded<database>& db, sstring keyspace,
|
|||||||
cfs = list_column_families(db.local(), keyspace);
|
cfs = list_column_families(db.local(), keyspace);
|
||||||
}
|
}
|
||||||
|
|
||||||
repair_ranges(repair_info(db, std::move(keyspace), std::move(ranges),
|
|
||||||
std::move(cfs), id, options.data_centers, options.hosts));
|
std::vector<future<>> repair_results;
|
||||||
|
repair_results.reserve(smp::count);
|
||||||
|
|
||||||
|
for (auto shard : boost::irange(unsigned(0), smp::count)) {
|
||||||
|
auto f = db.invoke_on(shard, [keyspace, cfs, id, ranges,
|
||||||
|
data_centers = options.data_centers, hosts = options.hosts] (database& localdb) mutable {
|
||||||
|
return repair_ranges(repair_info(service::get_local_storage_service().db(),
|
||||||
|
std::move(keyspace), std::move(ranges), std::move(cfs),
|
||||||
|
id, std::move(data_centers), std::move(hosts)));
|
||||||
|
});
|
||||||
|
repair_results.push_back(std::move(f));
|
||||||
|
}
|
||||||
|
|
||||||
|
when_all(repair_results.begin(), repair_results.end()).then([id] (std::vector<future<>> results) {
|
||||||
|
if (std::any_of(results.begin(), results.end(), [] (auto&& f) { return f.failed(); })) {
|
||||||
|
repair_tracker.done(id, false);
|
||||||
|
logger.info("repair {} failed", id);
|
||||||
|
} else {
|
||||||
|
repair_tracker.done(id, true);
|
||||||
|
logger.info("repair {} completed successfully", id);
|
||||||
|
}
|
||||||
|
return make_ready_future<>();
|
||||||
|
}).handle_exception([id] (std::exception_ptr eptr) {
|
||||||
|
repair_tracker.done(id, false);
|
||||||
|
logger.info("repair {} failed: {}", id, eptr);
|
||||||
|
});
|
||||||
|
|
||||||
return id;
|
return id;
|
||||||
}
|
}
|
||||||
|
|||||||
30
row_cache.cc
30
row_cache.cc
@@ -315,6 +315,7 @@ public:
|
|||||||
}
|
}
|
||||||
return make_ready_future<streamed_mutation_opt>(streamed_mutation_opt());
|
return make_ready_future<streamed_mutation_opt>(streamed_mutation_opt());
|
||||||
} else {
|
} else {
|
||||||
|
_delegate = make_empty_reader(); // See issue #2623
|
||||||
_cache.on_uncached_wide_partition();
|
_cache.on_uncached_wide_partition();
|
||||||
_cache._tracker.on_wide_partition_mispopulation();
|
_cache._tracker.on_wide_partition_mispopulation();
|
||||||
_cache.mark_partition_as_wide(dk);
|
_cache.mark_partition_as_wide(dk);
|
||||||
@@ -460,6 +461,7 @@ private:
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
_reader = {}; // See issue #2644
|
||||||
_reader = _cache._underlying(_cache._schema, _range, query::full_slice, _pc, _trace_state);
|
_reader = _cache._underlying(_cache._schema, _range, query::full_slice, _pc, _trace_state);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -471,6 +473,7 @@ private:
|
|||||||
_last_key.reset(dk, _populate_phase);
|
_last_key.reset(dk, _populate_phase);
|
||||||
|
|
||||||
_large_partition_range = dht::partition_range::make_singular(dk);
|
_large_partition_range = dht::partition_range::make_singular(dk);
|
||||||
|
// FIXME: This may deadlock with _reader due to #2644. We can't reset _reader here, because it's still used after this.
|
||||||
_large_partition_reader = _cache._underlying(_schema, _large_partition_range, _slice, _pc, _trace_state);
|
_large_partition_reader = _cache._underlying(_schema, _large_partition_range, _slice, _pc, _trace_state);
|
||||||
return _large_partition_reader().then([this, dk = std::move(dk)] (auto smopt) mutable -> streamed_mutation_opt {
|
return _large_partition_reader().then([this, dk = std::move(dk)] (auto smopt) mutable -> streamed_mutation_opt {
|
||||||
_large_partition_reader = {};
|
_large_partition_reader = {};
|
||||||
@@ -564,6 +567,7 @@ public:
|
|||||||
|
|
||||||
if (!_reader_created || phase != _populate_phase) {
|
if (!_reader_created || phase != _populate_phase) {
|
||||||
_populate_phase = _cache._populate_phaser.phase();
|
_populate_phase = _cache._populate_phaser.phase();
|
||||||
|
_reader = {}; // See issue #2644
|
||||||
_reader = _cache._underlying(_cache._schema, _range, query::full_slice, _pc, _trace_state);
|
_reader = _cache._underlying(_cache._schema, _range, query::full_slice, _pc, _trace_state);
|
||||||
_reader_created = true;
|
_reader_created = true;
|
||||||
return make_ready_future();
|
return make_ready_future();
|
||||||
@@ -579,6 +583,7 @@ class scanning_and_populating_reader final : public mutation_reader::impl {
|
|||||||
|
|
||||||
just_cache_scanning_reader _primary_reader;
|
just_cache_scanning_reader _primary_reader;
|
||||||
range_populating_reader _secondary_reader;
|
range_populating_reader _secondary_reader;
|
||||||
|
mutation_reader::forwarding _fwd_mr;
|
||||||
streamed_mutation_opt _next_primary;
|
streamed_mutation_opt _next_primary;
|
||||||
bool _secondary_in_progress = false;
|
bool _secondary_in_progress = false;
|
||||||
bool _first_element = true;
|
bool _first_element = true;
|
||||||
@@ -655,11 +660,13 @@ public:
|
|||||||
const dht::partition_range& range,
|
const dht::partition_range& range,
|
||||||
const query::partition_slice& slice,
|
const query::partition_slice& slice,
|
||||||
const io_priority_class& pc,
|
const io_priority_class& pc,
|
||||||
tracing::trace_state_ptr trace_state)
|
tracing::trace_state_ptr trace_state,
|
||||||
|
mutation_reader::forwarding fwd_mr)
|
||||||
: _pr(range)
|
: _pr(range)
|
||||||
, _schema(s)
|
, _schema(s)
|
||||||
, _primary_reader(s, cache, range, slice, pc)
|
, _primary_reader(s, cache, range, slice, pc)
|
||||||
, _secondary_reader(cache, s, slice, pc, trace_state)
|
, _secondary_reader(cache, s, slice, pc, trace_state)
|
||||||
|
, _fwd_mr(fwd_mr)
|
||||||
{ }
|
{ }
|
||||||
|
|
||||||
future<streamed_mutation_opt> operator()() {
|
future<streamed_mutation_opt> operator()() {
|
||||||
@@ -676,8 +683,9 @@ row_cache::make_scanning_reader(schema_ptr s,
|
|||||||
const dht::partition_range& range,
|
const dht::partition_range& range,
|
||||||
const io_priority_class& pc,
|
const io_priority_class& pc,
|
||||||
const query::partition_slice& slice,
|
const query::partition_slice& slice,
|
||||||
tracing::trace_state_ptr trace_state) {
|
tracing::trace_state_ptr trace_state,
|
||||||
return make_mutation_reader<scanning_and_populating_reader>(std::move(s), *this, range, slice, pc, std::move(trace_state));
|
mutation_reader::forwarding fwd_mr) {
|
||||||
|
return make_mutation_reader<scanning_and_populating_reader>(std::move(s), *this, range, slice, pc, std::move(trace_state), fwd_mr);
|
||||||
}
|
}
|
||||||
|
|
||||||
mutation_reader
|
mutation_reader
|
||||||
@@ -685,12 +693,13 @@ row_cache::make_reader(schema_ptr s,
|
|||||||
const dht::partition_range& range,
|
const dht::partition_range& range,
|
||||||
const query::partition_slice& slice,
|
const query::partition_slice& slice,
|
||||||
const io_priority_class& pc,
|
const io_priority_class& pc,
|
||||||
tracing::trace_state_ptr trace_state) {
|
tracing::trace_state_ptr trace_state,
|
||||||
|
mutation_reader::forwarding fwd_mr) {
|
||||||
if (range.is_singular()) {
|
if (range.is_singular()) {
|
||||||
const query::ring_position& pos = range.start()->value();
|
const query::ring_position& pos = range.start()->value();
|
||||||
|
|
||||||
if (!pos.has_key()) {
|
if (!pos.has_key()) {
|
||||||
return make_scanning_reader(std::move(s), range, pc, slice, std::move(trace_state));
|
return make_scanning_reader(std::move(s), range, pc, slice, std::move(trace_state), fwd_mr);
|
||||||
}
|
}
|
||||||
|
|
||||||
return _read_section(_tracker.region(), [&] {
|
return _read_section(_tracker.region(), [&] {
|
||||||
@@ -703,7 +712,7 @@ row_cache::make_reader(schema_ptr s,
|
|||||||
upgrade_entry(e);
|
upgrade_entry(e);
|
||||||
mutation_reader reader;
|
mutation_reader reader;
|
||||||
if (e.wide_partition()) {
|
if (e.wide_partition()) {
|
||||||
reader = _underlying(s, range, slice, pc, std::move(trace_state));
|
reader = _underlying(s, range, slice, pc, std::move(trace_state), fwd_mr);
|
||||||
_tracker.on_uncached_wide_partition();
|
_tracker.on_uncached_wide_partition();
|
||||||
on_miss();
|
on_miss();
|
||||||
} else {
|
} else {
|
||||||
@@ -721,7 +730,7 @@ row_cache::make_reader(schema_ptr s,
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
return make_scanning_reader(std::move(s), range, pc, slice, std::move(trace_state));
|
return make_scanning_reader(std::move(s), range, pc, slice, std::move(trace_state), fwd_mr);
|
||||||
}
|
}
|
||||||
|
|
||||||
row_cache::~row_cache() {
|
row_cache::~row_cache() {
|
||||||
@@ -1023,12 +1032,13 @@ future<streamed_mutation_opt> cache_entry::read_wide(row_cache& rc, schema_ptr s
|
|||||||
: _range(std::move(pr))
|
: _range(std::move(pr))
|
||||||
, _reader(rc._underlying(s, _range, slice, pc))
|
, _reader(rc._underlying(s, _range, slice, pc))
|
||||||
{ }
|
{ }
|
||||||
|
range_and_underlyig_reader(range_and_underlyig_reader&&) = delete;
|
||||||
};
|
};
|
||||||
rc._tracker.on_uncached_wide_partition();
|
rc._tracker.on_uncached_wide_partition();
|
||||||
auto pr = dht::partition_range::make_singular(_key);
|
auto pr = dht::partition_range::make_singular(_key);
|
||||||
return do_with(range_and_underlyig_reader(rc, s, std::move(pr), slice, pc), [] (auto& r_a_ur) {
|
auto rd_ptr = std::make_unique<range_and_underlyig_reader>(rc, s, std::move(pr), slice, pc);
|
||||||
return r_a_ur._reader();
|
auto& r_a_ur = *rd_ptr;
|
||||||
});
|
return r_a_ur._reader().finally([rd_ptr = std::move(rd_ptr)] {});
|
||||||
}
|
}
|
||||||
|
|
||||||
streamed_mutation cache_entry::read(row_cache& rc, const schema_ptr& s) {
|
streamed_mutation cache_entry::read(row_cache& rc, const schema_ptr& s) {
|
||||||
|
|||||||
@@ -284,7 +284,8 @@ private:
|
|||||||
const dht::partition_range&,
|
const dht::partition_range&,
|
||||||
const io_priority_class& pc,
|
const io_priority_class& pc,
|
||||||
const query::partition_slice& slice,
|
const query::partition_slice& slice,
|
||||||
tracing::trace_state_ptr trace_state);
|
tracing::trace_state_ptr trace_state,
|
||||||
|
mutation_reader::forwarding);
|
||||||
void on_hit();
|
void on_hit();
|
||||||
void on_miss();
|
void on_miss();
|
||||||
void on_uncached_wide_partition();
|
void on_uncached_wide_partition();
|
||||||
@@ -335,7 +336,8 @@ public:
|
|||||||
const dht::partition_range& = query::full_partition_range,
|
const dht::partition_range& = query::full_partition_range,
|
||||||
const query::partition_slice& slice = query::full_slice,
|
const query::partition_slice& slice = query::full_slice,
|
||||||
const io_priority_class& = default_priority_class(),
|
const io_priority_class& = default_priority_class(),
|
||||||
tracing::trace_state_ptr trace_state = nullptr);
|
tracing::trace_state_ptr trace_state = nullptr,
|
||||||
|
mutation_reader::forwarding fwd_mr = mutation_reader::forwarding::no);
|
||||||
|
|
||||||
const stats& stats() const { return _stats; }
|
const stats& stats() const { return _stats; }
|
||||||
public:
|
public:
|
||||||
|
|||||||
24
schema.cc
24
schema.cc
@@ -145,6 +145,20 @@ void schema::rebuild() {
|
|||||||
|
|
||||||
thrift()._compound = is_compound();
|
thrift()._compound = is_compound();
|
||||||
thrift()._is_dynamic = clustering_key_size() > 0;
|
thrift()._is_dynamic = clustering_key_size() > 0;
|
||||||
|
|
||||||
|
if (default_validator()->is_counter()) {
|
||||||
|
for (auto&& cdef : boost::range::join(static_columns(), regular_columns())) {
|
||||||
|
if (!cdef.type->is_counter()) {
|
||||||
|
throw exceptions::configuration_exception(sprint("Cannot add a non counter column (%s) in a counter column family", cdef.name_as_text()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for (auto&& cdef : all_columns()) {
|
||||||
|
if (cdef.second->type->is_counter()) {
|
||||||
|
throw exceptions::configuration_exception(sprint("Cannot add a counter column (%s) in a non counter column family", cdef.second->name_as_text()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const column_mapping& schema::get_column_mapping() const {
|
const column_mapping& schema::get_column_mapping() const {
|
||||||
@@ -737,6 +751,16 @@ schema_ptr schema_builder::build() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
prepare_dense_schema(new_raw);
|
prepare_dense_schema(new_raw);
|
||||||
|
|
||||||
|
if (_default_validator) {
|
||||||
|
new_raw._default_validator = *_default_validator;
|
||||||
|
} else if (new_raw._is_dense || !new_raw._is_compound) {
|
||||||
|
auto regular_column = std::find_if(new_raw._columns.begin(), new_raw._columns.end(), [] (auto&& col) {
|
||||||
|
return col.kind == column_kind::regular_column;
|
||||||
|
});
|
||||||
|
new_raw._default_validator = regular_column->type;
|
||||||
|
}
|
||||||
|
|
||||||
return make_lw_shared<schema>(schema(new_raw));
|
return make_lw_shared<schema>(schema(new_raw));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -31,6 +31,7 @@ private:
|
|||||||
schema::raw_schema _raw;
|
schema::raw_schema _raw;
|
||||||
std::experimental::optional<compact_storage> _compact_storage;
|
std::experimental::optional<compact_storage> _compact_storage;
|
||||||
std::experimental::optional<table_schema_version> _version;
|
std::experimental::optional<table_schema_version> _version;
|
||||||
|
std::experimental::optional<data_type> _default_validator;
|
||||||
schema_builder(const schema::raw_schema&);
|
schema_builder(const schema::raw_schema&);
|
||||||
public:
|
public:
|
||||||
schema_builder(const sstring& ks_name, const sstring& cf_name,
|
schema_builder(const sstring& ks_name, const sstring& cf_name,
|
||||||
@@ -74,7 +75,7 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
schema_builder& set_default_validator(const data_type& validator) {
|
schema_builder& set_default_validator(const data_type& validator) {
|
||||||
_raw._default_validator = validator;
|
_default_validator = {validator};
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -273,9 +273,9 @@ schema_ptr global_schema_ptr::get() const {
|
|||||||
s = local_schema_registry().get_or_load(e.version(), [&e](table_schema_version) {
|
s = local_schema_registry().get_or_load(e.version(), [&e](table_schema_version) {
|
||||||
return e.frozen();
|
return e.frozen();
|
||||||
});
|
});
|
||||||
if (e.is_synced()) {
|
}
|
||||||
s->registry_entry()->mark_synced();
|
if (e.is_synced()) {
|
||||||
}
|
s->registry_entry()->mark_synced();
|
||||||
}
|
}
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -31,6 +31,8 @@ import os
|
|||||||
import sys
|
import sys
|
||||||
import subprocess
|
import subprocess
|
||||||
import uuid
|
import uuid
|
||||||
|
import re
|
||||||
|
import glob
|
||||||
from pkg_resources import parse_version
|
from pkg_resources import parse_version
|
||||||
|
|
||||||
VERSION = "1.0"
|
VERSION = "1.0"
|
||||||
@@ -69,6 +71,20 @@ def create_uuid_file(fl):
|
|||||||
with open(args.uuid_file, 'w') as myfile:
|
with open(args.uuid_file, 'w') as myfile:
|
||||||
myfile.write(str(uuid.uuid1()) + "\n")
|
myfile.write(str(uuid.uuid1()) + "\n")
|
||||||
|
|
||||||
|
def get_repo_file(dir):
|
||||||
|
files = glob.glob(dir)
|
||||||
|
files.sort(key=os.path.getmtime, reverse=True)
|
||||||
|
for name in files:
|
||||||
|
with open(name, 'r') as myfile:
|
||||||
|
for line in myfile:
|
||||||
|
match = re.search(".*http.?://.*/scylladb/([^/\s]+)/deb/([^/\s]+)\s.*", line)
|
||||||
|
if match:
|
||||||
|
return match.group(2), match.group(1)
|
||||||
|
match = re.search(".*http.?://.*/scylladb/([^/]+)/rpm/[^/]+/([^/\s]+)/.*", line)
|
||||||
|
if match:
|
||||||
|
return match.group(2), match.group(1)
|
||||||
|
return None, None
|
||||||
|
|
||||||
def check_version(ar):
|
def check_version(ar):
|
||||||
if config and (not config.has_option("housekeeping", "check-version") or not config.getboolean("housekeeping", "check-version")):
|
if config and (not config.has_option("housekeeping", "check-version") or not config.getboolean("housekeeping", "check-version")):
|
||||||
return
|
return
|
||||||
@@ -87,6 +103,10 @@ def check_version(ar):
|
|||||||
params = params + "&sts=" + ar.mode
|
params = params + "&sts=" + ar.mode
|
||||||
if uid:
|
if uid:
|
||||||
params = params + "&uu=" + uid
|
params = params + "&uu=" + uid
|
||||||
|
if repo_id:
|
||||||
|
params = params + "&rid=" + repo_id
|
||||||
|
if repo_type:
|
||||||
|
params = params + "&rtype=" + repo_type
|
||||||
latest_version = get_json_from_url(version_url + params)["version"]
|
latest_version = get_json_from_url(version_url + params)["version"]
|
||||||
except:
|
except:
|
||||||
traceln("Unable to retrieve version information")
|
traceln("Unable to retrieve version information")
|
||||||
@@ -99,6 +119,7 @@ parser.add_argument('-q', '--quiet', action='store_true', default=False, help='Q
|
|||||||
parser.add_argument('-c', '--config', default="", help='An optional config file. Specifying a missing file will terminate the script')
|
parser.add_argument('-c', '--config', default="", help='An optional config file. Specifying a missing file will terminate the script')
|
||||||
parser.add_argument('--uuid', default="", help='A uuid for the requests')
|
parser.add_argument('--uuid', default="", help='A uuid for the requests')
|
||||||
parser.add_argument('--uuid-file', default="", help='A uuid file for the requests')
|
parser.add_argument('--uuid-file', default="", help='A uuid file for the requests')
|
||||||
|
parser.add_argument('--repo-files', default="", help='The repository files that is been used for private repositories')
|
||||||
|
|
||||||
subparsers = parser.add_subparsers(help='Available commands')
|
subparsers = parser.add_subparsers(help='Available commands')
|
||||||
parser_help = subparsers.add_parser('help', help='Display help information')
|
parser_help = subparsers.add_parser('help', help='Display help information')
|
||||||
@@ -111,6 +132,9 @@ parser_system.set_defaults(func=check_version)
|
|||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
quiet = args.quiet
|
quiet = args.quiet
|
||||||
config = None
|
config = None
|
||||||
|
repo_id = None
|
||||||
|
repo_type = None
|
||||||
|
|
||||||
if args.config != "":
|
if args.config != "":
|
||||||
if not os.path.isfile(args.config):
|
if not os.path.isfile(args.config):
|
||||||
traceln("Config file ", args.config, " is missing, terminating")
|
traceln("Config file ", args.config, " is missing, terminating")
|
||||||
@@ -125,4 +149,6 @@ if args.uuid_file != "":
|
|||||||
create_uuid_file(args.uuid_file)
|
create_uuid_file(args.uuid_file)
|
||||||
with open(args.uuid_file, 'r') as myfile:
|
with open(args.uuid_file, 'r') as myfile:
|
||||||
uid = myfile.read().replace('\n', '')
|
uid = myfile.read().replace('\n', '')
|
||||||
|
if args.repo_files != "":
|
||||||
|
repo_type, repo_id = get_repo_file(args.repo_files)
|
||||||
args.func(args)
|
args.func(args)
|
||||||
|
|||||||
2
seastar
2
seastar
Submodule seastar updated: f07f8ed68d...a66e0c553d
@@ -115,4 +115,42 @@ inline frame<seastar::measuring_output_stream> start_frame(seastar::measuring_ou
|
|||||||
return { };
|
return { };
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<>
|
||||||
|
class place_holder<seastar::simple_output_stream> {
|
||||||
|
seastar::simple_output_stream _substream;
|
||||||
|
public:
|
||||||
|
place_holder(seastar::simple_output_stream substream)
|
||||||
|
: _substream(substream) { }
|
||||||
|
|
||||||
|
void set(seastar::simple_output_stream& out, size_type v) {
|
||||||
|
serialize(_substream, v);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<>
|
||||||
|
class frame<seastar::simple_output_stream> : public place_holder<seastar::simple_output_stream> {
|
||||||
|
char* _start;
|
||||||
|
public:
|
||||||
|
frame(seastar::simple_output_stream ph, char* start)
|
||||||
|
: place_holder(ph), _start(start) { }
|
||||||
|
|
||||||
|
void end(seastar::simple_output_stream& out) {
|
||||||
|
set(out, out.begin() - _start);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
inline place_holder<seastar::simple_output_stream> start_place_holder(seastar::simple_output_stream& out) {
|
||||||
|
return { out.write_substream(sizeof(size_type)) };
|
||||||
|
}
|
||||||
|
|
||||||
|
inline frame<seastar::simple_output_stream> start_frame(seastar::simple_output_stream& out) {
|
||||||
|
auto start = out.begin();
|
||||||
|
auto substream = out.write_substream(sizeof(size_type));
|
||||||
|
{
|
||||||
|
auto sstr = substream;
|
||||||
|
serialize(sstr, size_type(0));
|
||||||
|
}
|
||||||
|
return frame<seastar::simple_output_stream>(substream, start);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -481,8 +481,10 @@ future<> migration_manager::announce_new_column_family(schema_ptr cfm, bool anno
|
|||||||
throw exceptions::already_exists_exception(cfm->ks_name(), cfm->cf_name());
|
throw exceptions::already_exists_exception(cfm->ks_name(), cfm->cf_name());
|
||||||
}
|
}
|
||||||
logger.info("Create new ColumnFamily: {}", cfm);
|
logger.info("Create new ColumnFamily: {}", cfm);
|
||||||
auto mutations = db::schema_tables::make_create_table_mutations(keyspace.metadata(), cfm, api::new_timestamp());
|
return db::schema_tables::make_create_table_mutations(keyspace.metadata(), cfm, api::new_timestamp())
|
||||||
return announce(std::move(mutations), announce_locally);
|
.then([announce_locally, this] (auto&& mutations) {
|
||||||
|
return announce(std::move(mutations), announce_locally);
|
||||||
|
});
|
||||||
} catch (const no_such_keyspace& e) {
|
} catch (const no_such_keyspace& e) {
|
||||||
throw exceptions::configuration_exception(sprint("Cannot add table '%s' to non existing keyspace '%s'.", cfm->cf_name(), cfm->ks_name()));
|
throw exceptions::configuration_exception(sprint("Cannot add table '%s' to non existing keyspace '%s'.", cfm->cf_name(), cfm->ks_name()));
|
||||||
}
|
}
|
||||||
@@ -501,8 +503,10 @@ future<> migration_manager::announce_column_family_update(schema_ptr cfm, bool f
|
|||||||
#endif
|
#endif
|
||||||
logger.info("Update table '{}.{}' From {} To {}", cfm->ks_name(), cfm->cf_name(), *old_schema, *cfm);
|
logger.info("Update table '{}.{}' From {} To {}", cfm->ks_name(), cfm->cf_name(), *old_schema, *cfm);
|
||||||
auto&& keyspace = db.find_keyspace(cfm->ks_name());
|
auto&& keyspace = db.find_keyspace(cfm->ks_name());
|
||||||
auto mutations = db::schema_tables::make_update_table_mutations(keyspace.metadata(), old_schema, cfm, api::new_timestamp(), from_thrift);
|
return db::schema_tables::make_update_table_mutations(keyspace.metadata(), old_schema, cfm, api::new_timestamp(), from_thrift)
|
||||||
return announce(std::move(mutations), announce_locally);
|
.then([announce_locally] (auto&& mutations) {
|
||||||
|
return announce(std::move(mutations), announce_locally);
|
||||||
|
});
|
||||||
} catch (const no_such_column_family& e) {
|
} catch (const no_such_column_family& e) {
|
||||||
throw exceptions::configuration_exception(sprint("Cannot update non existing table '%s' in keyspace '%s'.",
|
throw exceptions::configuration_exception(sprint("Cannot update non existing table '%s' in keyspace '%s'.",
|
||||||
cfm->cf_name(), cfm->ks_name()));
|
cfm->cf_name(), cfm->ks_name()));
|
||||||
@@ -512,8 +516,10 @@ future<> migration_manager::announce_column_family_update(schema_ptr cfm, bool f
|
|||||||
static future<> do_announce_new_type(user_type new_type, bool announce_locally) {
|
static future<> do_announce_new_type(user_type new_type, bool announce_locally) {
|
||||||
auto& db = get_local_storage_proxy().get_db().local();
|
auto& db = get_local_storage_proxy().get_db().local();
|
||||||
auto&& keyspace = db.find_keyspace(new_type->_keyspace);
|
auto&& keyspace = db.find_keyspace(new_type->_keyspace);
|
||||||
auto mutations = db::schema_tables::make_create_type_mutations(keyspace.metadata(), new_type, api::new_timestamp());
|
return db::schema_tables::make_create_type_mutations(keyspace.metadata(), new_type, api::new_timestamp())
|
||||||
return migration_manager::announce(std::move(mutations), announce_locally);
|
.then([announce_locally] (auto&& mutations) {
|
||||||
|
return migration_manager::announce(std::move(mutations), announce_locally);
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
future<> migration_manager::announce_new_type(user_type new_type, bool announce_locally) {
|
future<> migration_manager::announce_new_type(user_type new_type, bool announce_locally) {
|
||||||
@@ -609,8 +615,10 @@ future<> migration_manager::announce_column_family_drop(const sstring& ks_name,
|
|||||||
ks_name, ::join(", ", views | boost::adaptors::transformed([](auto&& v) { return v->cf_name(); }))));
|
ks_name, ::join(", ", views | boost::adaptors::transformed([](auto&& v) { return v->cf_name(); }))));
|
||||||
}
|
}
|
||||||
logger.info("Drop table '{}.{}'", schema->ks_name(), schema->cf_name());
|
logger.info("Drop table '{}.{}'", schema->ks_name(), schema->cf_name());
|
||||||
auto mutations = db::schema_tables::make_drop_table_mutations(db.find_keyspace(ks_name).metadata(), schema, api::new_timestamp());
|
return db::schema_tables::make_drop_table_mutations(db.find_keyspace(ks_name).metadata(), schema, api::new_timestamp())
|
||||||
return announce(std::move(mutations), announce_locally);
|
.then([announce_locally] (auto&& mutations) {
|
||||||
|
return announce(std::move(mutations), announce_locally);
|
||||||
|
});
|
||||||
} catch (const no_such_column_family& e) {
|
} catch (const no_such_column_family& e) {
|
||||||
throw exceptions::configuration_exception(sprint("Cannot drop non existing table '%s' in keyspace '%s'.", cf_name, ks_name));
|
throw exceptions::configuration_exception(sprint("Cannot drop non existing table '%s' in keyspace '%s'.", cf_name, ks_name));
|
||||||
}
|
}
|
||||||
@@ -621,8 +629,10 @@ future<> migration_manager::announce_type_drop(user_type dropped_type, bool anno
|
|||||||
auto& db = get_local_storage_proxy().get_db().local();
|
auto& db = get_local_storage_proxy().get_db().local();
|
||||||
auto&& keyspace = db.find_keyspace(dropped_type->_keyspace);
|
auto&& keyspace = db.find_keyspace(dropped_type->_keyspace);
|
||||||
logger.info("Drop User Type: {}", dropped_type->get_name_as_string());
|
logger.info("Drop User Type: {}", dropped_type->get_name_as_string());
|
||||||
auto mutations = db::schema_tables::make_drop_type_mutations(keyspace.metadata(), dropped_type, api::new_timestamp());
|
return db::schema_tables::make_drop_type_mutations(keyspace.metadata(), dropped_type, api::new_timestamp())
|
||||||
return announce(std::move(mutations), announce_locally);
|
.then([announce_locally] (auto&& mutations) {
|
||||||
|
return announce(std::move(mutations), announce_locally);
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
future<> migration_manager::announce_new_view(view_ptr view, bool announce_locally)
|
future<> migration_manager::announce_new_view(view_ptr view, bool announce_locally)
|
||||||
@@ -637,8 +647,10 @@ future<> migration_manager::announce_new_view(view_ptr view, bool announce_local
|
|||||||
throw exceptions::already_exists_exception(view->ks_name(), view->cf_name());
|
throw exceptions::already_exists_exception(view->ks_name(), view->cf_name());
|
||||||
}
|
}
|
||||||
logger.info("Create new view: {}", view);
|
logger.info("Create new view: {}", view);
|
||||||
auto mutations = db::schema_tables::make_create_view_mutations(keyspace, std::move(view), api::new_timestamp());
|
return db::schema_tables::make_create_view_mutations(keyspace, std::move(view), api::new_timestamp())
|
||||||
return announce(std::move(mutations), announce_locally);
|
.then([announce_locally] (auto&& mutations) {
|
||||||
|
return announce(std::move(mutations), announce_locally);
|
||||||
|
});
|
||||||
} catch (const no_such_keyspace& e) {
|
} catch (const no_such_keyspace& e) {
|
||||||
throw exceptions::configuration_exception(sprint("Cannot add view '%s' to non existing keyspace '%s'.", view->cf_name(), view->ks_name()));
|
throw exceptions::configuration_exception(sprint("Cannot add view '%s' to non existing keyspace '%s'.", view->cf_name(), view->ks_name()));
|
||||||
}
|
}
|
||||||
@@ -660,8 +672,10 @@ future<> migration_manager::announce_view_update(view_ptr view, bool announce_lo
|
|||||||
oldCfm.validateCompatility(cfm);
|
oldCfm.validateCompatility(cfm);
|
||||||
#endif
|
#endif
|
||||||
logger.info("Update view '{}.{}' From {} To {}", view->ks_name(), view->cf_name(), *old_view, *view);
|
logger.info("Update view '{}.{}' From {} To {}", view->ks_name(), view->cf_name(), *old_view, *view);
|
||||||
auto mutations = db::schema_tables::make_update_view_mutations(std::move(keyspace), view_ptr(old_view), std::move(view), api::new_timestamp());
|
return db::schema_tables::make_update_view_mutations(std::move(keyspace), view_ptr(old_view), std::move(view), api::new_timestamp())
|
||||||
return announce(std::move(mutations), announce_locally);
|
.then([announce_locally] (auto&& mutations) {
|
||||||
|
return announce(std::move(mutations), announce_locally);
|
||||||
|
});
|
||||||
} catch (const std::out_of_range& e) {
|
} catch (const std::out_of_range& e) {
|
||||||
throw exceptions::configuration_exception(sprint("Cannot update non existing materialized view '%s' in keyspace '%s'.",
|
throw exceptions::configuration_exception(sprint("Cannot update non existing materialized view '%s' in keyspace '%s'.",
|
||||||
view->cf_name(), view->ks_name()));
|
view->cf_name(), view->ks_name()));
|
||||||
@@ -680,8 +694,10 @@ future<> migration_manager::announce_view_drop(const sstring& ks_name,
|
|||||||
}
|
}
|
||||||
auto keyspace = db.find_keyspace(ks_name).metadata();
|
auto keyspace = db.find_keyspace(ks_name).metadata();
|
||||||
logger.info("Drop view '{}.{}'", view->ks_name(), view->cf_name());
|
logger.info("Drop view '{}.{}'", view->ks_name(), view->cf_name());
|
||||||
auto mutations = db::schema_tables::make_drop_view_mutations(std::move(keyspace), view_ptr(std::move(view)), api::new_timestamp());
|
return db::schema_tables::make_drop_view_mutations(std::move(keyspace), view_ptr(std::move(view)), api::new_timestamp())
|
||||||
return announce(std::move(mutations), announce_locally);
|
.then([announce_locally] (auto&& mutations) {
|
||||||
|
return announce(std::move(mutations), announce_locally);
|
||||||
|
});
|
||||||
} catch (const no_such_column_family& e) {
|
} catch (const no_such_column_family& e) {
|
||||||
throw exceptions::configuration_exception(sprint("Cannot drop non existing materialized view '%s' in keyspace '%s'.",
|
throw exceptions::configuration_exception(sprint("Cannot drop non existing materialized view '%s' in keyspace '%s'.",
|
||||||
cf_name, ks_name));
|
cf_name, ks_name));
|
||||||
|
|||||||
@@ -478,7 +478,6 @@ inline uint64_t& storage_proxy::split_stats::get_ep_stat(gms::inet_address ep) {
|
|||||||
storage_proxy::~storage_proxy() {}
|
storage_proxy::~storage_proxy() {}
|
||||||
storage_proxy::storage_proxy(distributed<database>& db) : _db(db) {
|
storage_proxy::storage_proxy(distributed<database>& db) : _db(db) {
|
||||||
namespace sm = seastar::metrics;
|
namespace sm = seastar::metrics;
|
||||||
|
|
||||||
_metrics.add_group(COORDINATOR_STATS_CATEGORY, {
|
_metrics.add_group(COORDINATOR_STATS_CATEGORY, {
|
||||||
sm::make_queue_length("foreground_writes", [this] { return _stats.writes - _stats.background_writes; },
|
sm::make_queue_length("foreground_writes", [this] { return _stats.writes - _stats.background_writes; },
|
||||||
sm::description("number of currently pending foreground write requests")),
|
sm::description("number of currently pending foreground write requests")),
|
||||||
@@ -486,7 +485,7 @@ storage_proxy::storage_proxy(distributed<database>& db) : _db(db) {
|
|||||||
sm::make_queue_length("background_writes", [this] { return _stats.background_writes; },
|
sm::make_queue_length("background_writes", [this] { return _stats.background_writes; },
|
||||||
sm::description("number of currently pending background write requests")),
|
sm::description("number of currently pending background write requests")),
|
||||||
|
|
||||||
sm::make_queue_length("throttled_writes", [this] { return _throttled_writes.size(); },
|
sm::make_queue_length("current_throttled_writes", [this] { return _throttled_writes.size(); },
|
||||||
sm::description("number of currently throttled write requests")),
|
sm::description("number of currently throttled write requests")),
|
||||||
|
|
||||||
sm::make_total_operations("throttled_writes", [this] { return _stats.throttled_writes; },
|
sm::make_total_operations("throttled_writes", [this] { return _stats.throttled_writes; },
|
||||||
@@ -1733,14 +1732,14 @@ protected:
|
|||||||
size_t _targets_count;
|
size_t _targets_count;
|
||||||
promise<> _done_promise; // all target responded
|
promise<> _done_promise; // all target responded
|
||||||
bool _timedout = false; // will be true if request timeouts
|
bool _timedout = false; // will be true if request timeouts
|
||||||
timer<lowres_clock> _timeout;
|
timer<storage_proxy::clock_type> _timeout;
|
||||||
size_t _responses = 0;
|
size_t _responses = 0;
|
||||||
schema_ptr _schema;
|
schema_ptr _schema;
|
||||||
|
|
||||||
virtual void on_timeout() {}
|
virtual void on_timeout() {}
|
||||||
virtual size_t response_count() const = 0;
|
virtual size_t response_count() const = 0;
|
||||||
public:
|
public:
|
||||||
abstract_read_resolver(schema_ptr schema, db::consistency_level cl, size_t target_count, lowres_clock::time_point timeout)
|
abstract_read_resolver(schema_ptr schema, db::consistency_level cl, size_t target_count, storage_proxy::clock_type::time_point timeout)
|
||||||
: _cl(cl)
|
: _cl(cl)
|
||||||
, _targets_count(target_count)
|
, _targets_count(target_count)
|
||||||
, _schema(std::move(schema))
|
, _schema(std::move(schema))
|
||||||
@@ -1796,7 +1795,7 @@ class digest_read_resolver : public abstract_read_resolver {
|
|||||||
return _digest_results.size();
|
return _digest_results.size();
|
||||||
}
|
}
|
||||||
public:
|
public:
|
||||||
digest_read_resolver(schema_ptr schema, db::consistency_level cl, size_t block_for, lowres_clock::time_point timeout) : abstract_read_resolver(std::move(schema), cl, 0, timeout), _block_for(block_for) {}
|
digest_read_resolver(schema_ptr schema, db::consistency_level cl, size_t block_for, storage_proxy::clock_type::time_point timeout) : abstract_read_resolver(std::move(schema), cl, 0, timeout), _block_for(block_for) {}
|
||||||
void add_data(gms::inet_address from, foreign_ptr<lw_shared_ptr<query::result>> result) {
|
void add_data(gms::inet_address from, foreign_ptr<lw_shared_ptr<query::result>> result) {
|
||||||
if (!_timedout) {
|
if (!_timedout) {
|
||||||
// if only one target was queried digest_check() will be skipped so we can also skip digest calculation
|
// if only one target was queried digest_check() will be skipped so we can also skip digest calculation
|
||||||
@@ -2014,6 +2013,7 @@ private:
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
assert(last_partition);
|
||||||
return get_last_row(s, *last_partition, is_reversed);
|
return get_last_row(s, *last_partition, is_reversed);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2143,7 +2143,7 @@ private:
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
public:
|
public:
|
||||||
data_read_resolver(schema_ptr schema, db::consistency_level cl, size_t targets_count, lowres_clock::time_point timeout) : abstract_read_resolver(std::move(schema), cl, targets_count, timeout) {
|
data_read_resolver(schema_ptr schema, db::consistency_level cl, size_t targets_count, storage_proxy::clock_type::time_point timeout) : abstract_read_resolver(std::move(schema), cl, targets_count, timeout) {
|
||||||
_data_results.reserve(targets_count);
|
_data_results.reserve(targets_count);
|
||||||
}
|
}
|
||||||
void add_mutate_data(gms::inet_address from, foreign_ptr<lw_shared_ptr<reconcilable_result>> result) {
|
void add_mutate_data(gms::inet_address from, foreign_ptr<lw_shared_ptr<reconcilable_result>> result) {
|
||||||
@@ -2230,6 +2230,10 @@ public:
|
|||||||
v.emplace_back(r.from, stdx::optional<partition>(), r.reached_end, true);
|
v.emplace_back(r.from, stdx::optional<partition>(), r.reached_end, true);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
boost::sort(v, [] (const version& x, const version& y) {
|
||||||
|
return x.from < y.from;
|
||||||
|
});
|
||||||
} while(true);
|
} while(true);
|
||||||
|
|
||||||
std::vector<mutation_and_live_row_count> reconciled_partitions;
|
std::vector<mutation_and_live_row_count> reconciled_partitions;
|
||||||
@@ -2238,7 +2242,10 @@ public:
|
|||||||
// reconcile all versions
|
// reconcile all versions
|
||||||
boost::range::transform(boost::make_iterator_range(versions.begin(), versions.end()), std::back_inserter(reconciled_partitions),
|
boost::range::transform(boost::make_iterator_range(versions.begin(), versions.end()), std::back_inserter(reconciled_partitions),
|
||||||
[this, schema, original_per_partition_limit] (std::vector<version>& v) {
|
[this, schema, original_per_partition_limit] (std::vector<version>& v) {
|
||||||
auto m = boost::accumulate(v, mutation(v.front().par->mut().key(*schema), schema), [this, schema] (mutation& m, const version& ver) {
|
auto it = boost::range::find_if(v, [] (auto&& ver) {
|
||||||
|
return bool(ver.par);
|
||||||
|
});
|
||||||
|
auto m = boost::accumulate(v, mutation(it->par->mut().key(*schema), schema), [this, schema] (mutation& m, const version& ver) {
|
||||||
if (ver.par) {
|
if (ver.par) {
|
||||||
m.partition().apply(*schema, ver.par->mut().partition(), *schema);
|
m.partition().apply(*schema, ver.par->mut().partition(), *schema);
|
||||||
}
|
}
|
||||||
@@ -2330,7 +2337,7 @@ protected:
|
|||||||
using targets_iterator = std::vector<gms::inet_address>::iterator;
|
using targets_iterator = std::vector<gms::inet_address>::iterator;
|
||||||
using digest_resolver_ptr = ::shared_ptr<digest_read_resolver>;
|
using digest_resolver_ptr = ::shared_ptr<digest_read_resolver>;
|
||||||
using data_resolver_ptr = ::shared_ptr<data_read_resolver>;
|
using data_resolver_ptr = ::shared_ptr<data_read_resolver>;
|
||||||
using clock_type = lowres_clock;
|
using clock_type = storage_proxy::clock_type;
|
||||||
|
|
||||||
schema_ptr _schema;
|
schema_ptr _schema;
|
||||||
shared_ptr<storage_proxy> _proxy;
|
shared_ptr<storage_proxy> _proxy;
|
||||||
@@ -2454,7 +2461,7 @@ protected:
|
|||||||
uint32_t original_partition_limit() const {
|
uint32_t original_partition_limit() const {
|
||||||
return _cmd->partition_limit;
|
return _cmd->partition_limit;
|
||||||
}
|
}
|
||||||
void reconcile(db::consistency_level cl, lowres_clock::time_point timeout, lw_shared_ptr<query::read_command> cmd) {
|
void reconcile(db::consistency_level cl, storage_proxy::clock_type::time_point timeout, lw_shared_ptr<query::read_command> cmd) {
|
||||||
data_resolver_ptr data_resolver = ::make_shared<data_read_resolver>(_schema, cl, _targets.size(), timeout);
|
data_resolver_ptr data_resolver = ::make_shared<data_read_resolver>(_schema, cl, _targets.size(), timeout);
|
||||||
auto exec = shared_from_this();
|
auto exec = shared_from_this();
|
||||||
|
|
||||||
@@ -2529,12 +2536,12 @@ protected:
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
void reconcile(db::consistency_level cl, lowres_clock::time_point timeout) {
|
void reconcile(db::consistency_level cl, storage_proxy::clock_type::time_point timeout) {
|
||||||
reconcile(cl, timeout, _cmd);
|
reconcile(cl, timeout, _cmd);
|
||||||
}
|
}
|
||||||
|
|
||||||
public:
|
public:
|
||||||
virtual future<foreign_ptr<lw_shared_ptr<query::result>>> execute(lowres_clock::time_point timeout) {
|
virtual future<foreign_ptr<lw_shared_ptr<query::result>>> execute(storage_proxy::clock_type::time_point timeout) {
|
||||||
digest_resolver_ptr digest_resolver = ::make_shared<digest_read_resolver>(_schema, _cl, _block_for, timeout);
|
digest_resolver_ptr digest_resolver = ::make_shared<digest_read_resolver>(_schema, _cl, _block_for, timeout);
|
||||||
auto exec = shared_from_this();
|
auto exec = shared_from_this();
|
||||||
|
|
||||||
@@ -2604,7 +2611,7 @@ public:
|
|||||||
class always_speculating_read_executor : public abstract_read_executor {
|
class always_speculating_read_executor : public abstract_read_executor {
|
||||||
public:
|
public:
|
||||||
using abstract_read_executor::abstract_read_executor;
|
using abstract_read_executor::abstract_read_executor;
|
||||||
virtual future<> make_requests(digest_resolver_ptr resolver, lowres_clock::time_point timeout) {
|
virtual future<> make_requests(digest_resolver_ptr resolver, storage_proxy::clock_type::time_point timeout) {
|
||||||
resolver->add_wait_targets(_targets.size());
|
resolver->add_wait_targets(_targets.size());
|
||||||
// FIXME: consider disabling for CL=*ONE
|
// FIXME: consider disabling for CL=*ONE
|
||||||
bool want_digest = true;
|
bool want_digest = true;
|
||||||
@@ -2615,10 +2622,10 @@ public:
|
|||||||
|
|
||||||
// this executor sends request to an additional replica after some time below timeout
|
// this executor sends request to an additional replica after some time below timeout
|
||||||
class speculating_read_executor : public abstract_read_executor {
|
class speculating_read_executor : public abstract_read_executor {
|
||||||
timer<> _speculate_timer;
|
timer<storage_proxy::clock_type> _speculate_timer;
|
||||||
public:
|
public:
|
||||||
using abstract_read_executor::abstract_read_executor;
|
using abstract_read_executor::abstract_read_executor;
|
||||||
virtual future<> make_requests(digest_resolver_ptr resolver, lowres_clock::time_point timeout) {
|
virtual future<> make_requests(digest_resolver_ptr resolver, storage_proxy::clock_type::time_point timeout) {
|
||||||
_speculate_timer.set_callback([this, resolver, timeout] {
|
_speculate_timer.set_callback([this, resolver, timeout] {
|
||||||
if (!resolver->is_completed()) { // at the time the callback runs request may be completed already
|
if (!resolver->is_completed()) { // at the time the callback runs request may be completed already
|
||||||
resolver->add_wait_targets(1); // we send one more request so wait for it too
|
resolver->add_wait_targets(1); // we send one more request so wait for it too
|
||||||
@@ -2664,7 +2671,7 @@ class range_slice_read_executor : public abstract_read_executor {
|
|||||||
public:
|
public:
|
||||||
range_slice_read_executor(schema_ptr s, shared_ptr<storage_proxy> proxy, lw_shared_ptr<query::read_command> cmd, dht::partition_range pr, db::consistency_level cl, std::vector<gms::inet_address> targets, tracing::trace_state_ptr trace_state) :
|
range_slice_read_executor(schema_ptr s, shared_ptr<storage_proxy> proxy, lw_shared_ptr<query::read_command> cmd, dht::partition_range pr, db::consistency_level cl, std::vector<gms::inet_address> targets, tracing::trace_state_ptr trace_state) :
|
||||||
abstract_read_executor(std::move(s), std::move(proxy), std::move(cmd), std::move(pr), cl, targets.size(), std::move(targets), std::move(trace_state)) {}
|
abstract_read_executor(std::move(s), std::move(proxy), std::move(cmd), std::move(pr), cl, targets.size(), std::move(targets), std::move(trace_state)) {}
|
||||||
virtual future<foreign_ptr<lw_shared_ptr<query::result>>> execute(lowres_clock::time_point timeout) override {
|
virtual future<foreign_ptr<lw_shared_ptr<query::result>>> execute(storage_proxy::clock_type::time_point timeout) override {
|
||||||
reconcile(_cl, timeout);
|
reconcile(_cl, timeout);
|
||||||
return _result_promise.get_future();
|
return _result_promise.get_future();
|
||||||
}
|
}
|
||||||
@@ -2795,7 +2802,7 @@ future<foreign_ptr<lw_shared_ptr<query::result>>>
|
|||||||
storage_proxy::query_singular(lw_shared_ptr<query::read_command> cmd, dht::partition_range_vector&& partition_ranges, db::consistency_level cl, tracing::trace_state_ptr trace_state) {
|
storage_proxy::query_singular(lw_shared_ptr<query::read_command> cmd, dht::partition_range_vector&& partition_ranges, db::consistency_level cl, tracing::trace_state_ptr trace_state) {
|
||||||
std::vector<::shared_ptr<abstract_read_executor>> exec;
|
std::vector<::shared_ptr<abstract_read_executor>> exec;
|
||||||
exec.reserve(partition_ranges.size());
|
exec.reserve(partition_ranges.size());
|
||||||
auto timeout = lowres_clock::now() + std::chrono::milliseconds(_db.local().get_config().read_request_timeout_in_ms());
|
auto timeout = storage_proxy::clock_type::now() + std::chrono::milliseconds(_db.local().get_config().read_request_timeout_in_ms());
|
||||||
|
|
||||||
for (auto&& pr: partition_ranges) {
|
for (auto&& pr: partition_ranges) {
|
||||||
if (!pr.is_singular()) {
|
if (!pr.is_singular()) {
|
||||||
@@ -2819,7 +2826,7 @@ storage_proxy::query_singular(lw_shared_ptr<query::read_command> cmd, dht::parti
|
|||||||
}
|
}
|
||||||
|
|
||||||
future<std::vector<foreign_ptr<lw_shared_ptr<query::result>>>>
|
future<std::vector<foreign_ptr<lw_shared_ptr<query::result>>>>
|
||||||
storage_proxy::query_partition_key_range_concurrent(lowres_clock::time_point timeout, std::vector<foreign_ptr<lw_shared_ptr<query::result>>>&& results,
|
storage_proxy::query_partition_key_range_concurrent(storage_proxy::clock_type::time_point timeout, std::vector<foreign_ptr<lw_shared_ptr<query::result>>>&& results,
|
||||||
lw_shared_ptr<query::read_command> cmd, db::consistency_level cl, dht::partition_range_vector::iterator&& i,
|
lw_shared_ptr<query::read_command> cmd, db::consistency_level cl, dht::partition_range_vector::iterator&& i,
|
||||||
dht::partition_range_vector&& ranges, int concurrency_factor, tracing::trace_state_ptr trace_state,
|
dht::partition_range_vector&& ranges, int concurrency_factor, tracing::trace_state_ptr trace_state,
|
||||||
uint32_t remaining_row_count, uint32_t remaining_partition_count) {
|
uint32_t remaining_row_count, uint32_t remaining_partition_count) {
|
||||||
@@ -2923,7 +2930,7 @@ storage_proxy::query_partition_key_range(lw_shared_ptr<query::read_command> cmd,
|
|||||||
schema_ptr schema = local_schema_registry().get(cmd->schema_version);
|
schema_ptr schema = local_schema_registry().get(cmd->schema_version);
|
||||||
keyspace& ks = _db.local().find_keyspace(schema->ks_name());
|
keyspace& ks = _db.local().find_keyspace(schema->ks_name());
|
||||||
dht::partition_range_vector ranges;
|
dht::partition_range_vector ranges;
|
||||||
auto timeout = lowres_clock::now() + std::chrono::milliseconds(_db.local().get_config().read_request_timeout_in_ms());
|
auto timeout = storage_proxy::clock_type::now() + std::chrono::milliseconds(_db.local().get_config().read_request_timeout_in_ms());
|
||||||
|
|
||||||
// when dealing with LocalStrategy keyspaces, we can skip the range splitting and merging (which can be
|
// when dealing with LocalStrategy keyspaces, we can skip the range splitting and merging (which can be
|
||||||
// expensive in clusters with vnodes)
|
// expensive in clusters with vnodes)
|
||||||
@@ -3957,24 +3964,22 @@ storage_proxy::query_nonsingular_mutations_locally(schema_ptr s, lw_shared_ptr<q
|
|||||||
auto shard_cmd = make_lw_shared<query::read_command>(*cmd);
|
auto shard_cmd = make_lw_shared<query::read_command>(*cmd);
|
||||||
return do_with(cmd,
|
return do_with(cmd,
|
||||||
shard_cmd,
|
shard_cmd,
|
||||||
1u,
|
|
||||||
0u,
|
0u,
|
||||||
false,
|
false,
|
||||||
static_cast<unsigned>(prs.size()),
|
static_cast<unsigned>(prs.size()),
|
||||||
std::unordered_map<element_and_shard, partition_range_and_sort_key>{},
|
std::unordered_map<element_and_shard, partition_range_and_sort_key>{},
|
||||||
mutation_result_merger{s, cmd},
|
mutation_result_merger{s, cmd},
|
||||||
dht::ring_position_range_vector_sharder{prs},
|
dht::ring_position_exponential_vector_sharder{prs},
|
||||||
global_schema_ptr(s),
|
global_schema_ptr(s),
|
||||||
tracing::global_trace_state_ptr(std::move(trace_state)),
|
tracing::global_trace_state_ptr(std::move(trace_state)),
|
||||||
[this, s, max_size] (lw_shared_ptr<query::read_command>& cmd,
|
[this, s, max_size] (lw_shared_ptr<query::read_command>& cmd,
|
||||||
lw_shared_ptr<query::read_command>& shard_cmd,
|
lw_shared_ptr<query::read_command>& shard_cmd,
|
||||||
unsigned& shards_in_parallel,
|
|
||||||
unsigned& mutation_result_merger_key,
|
unsigned& mutation_result_merger_key,
|
||||||
bool& no_more_ranges,
|
bool& no_more_ranges,
|
||||||
unsigned& partition_range_count,
|
unsigned& partition_range_count,
|
||||||
std::unordered_map<element_and_shard, partition_range_and_sort_key>& shards_for_this_iteration,
|
std::unordered_map<element_and_shard, partition_range_and_sort_key>& shards_for_this_iteration,
|
||||||
mutation_result_merger& mrm,
|
mutation_result_merger& mrm,
|
||||||
dht::ring_position_range_vector_sharder& rprs,
|
dht::ring_position_exponential_vector_sharder& rpevs,
|
||||||
global_schema_ptr& gs,
|
global_schema_ptr& gs,
|
||||||
tracing::global_trace_state_ptr& gt) {
|
tracing::global_trace_state_ptr& gt) {
|
||||||
return _db.local().get_result_memory_limiter().new_mutation_read(max_size).then([&, s] (query::result_memory_accounter ma) {
|
return _db.local().get_result_memory_limiter().new_mutation_read(max_size).then([&, s] (query::result_memory_accounter ma) {
|
||||||
@@ -3985,36 +3990,32 @@ storage_proxy::query_nonsingular_mutations_locally(schema_ptr s, lw_shared_ptr<q
|
|||||||
// because we'll throw away most of the results. So we'll exponentially increase
|
// because we'll throw away most of the results. So we'll exponentially increase
|
||||||
// concurrency starting at 1, so we won't waste on dense tables and at most
|
// concurrency starting at 1, so we won't waste on dense tables and at most
|
||||||
// `log(nr_shards) + ignore_msb_bits` latency multiplier for near-empty tables.
|
// `log(nr_shards) + ignore_msb_bits` latency multiplier for near-empty tables.
|
||||||
|
//
|
||||||
|
// We use the ring_position_exponential_vector_sharder to give us subranges that follow
|
||||||
|
// this scheme.
|
||||||
shards_for_this_iteration.clear();
|
shards_for_this_iteration.clear();
|
||||||
// If we're reading from less than smp::count shards, then we can just append
|
// If we're reading from less than smp::count shards, then we can just append
|
||||||
// each shard in order without sorting. If we're reading from more, then
|
// each shard in order without sorting. If we're reading from more, then
|
||||||
// we'll read from some shards at least twice, so the partitions within will be
|
// we'll read from some shards at least twice, so the partitions within will be
|
||||||
// out-of-order wrt. other shards
|
// out-of-order wrt. other shards
|
||||||
|
auto this_iteration_subranges = rpevs.next(*s);
|
||||||
auto retain_shard_order = true;
|
auto retain_shard_order = true;
|
||||||
for (auto i = 0u; i < shards_in_parallel; ++i) {
|
no_more_ranges = true;
|
||||||
auto now = rprs.next(*s);
|
if (this_iteration_subranges) {
|
||||||
if (!now) {
|
no_more_ranges = false;
|
||||||
no_more_ranges = true;
|
retain_shard_order = this_iteration_subranges->inorder;
|
||||||
break;
|
auto sort_key = 0u;
|
||||||
}
|
for (auto&& now : this_iteration_subranges->per_shard_ranges) {
|
||||||
// Let's see if this is a new shard, or if we can expand an existing range
|
shards_for_this_iteration.emplace(element_and_shard{this_iteration_subranges->element, now.shard}, partition_range_and_sort_key{now.ring_range, sort_key++});
|
||||||
auto&& rng_ok = shards_for_this_iteration.emplace(element_and_shard{now->element, now->shard}, partition_range_and_sort_key{now->ring_range, i});
|
|
||||||
if (!rng_ok.second) {
|
|
||||||
// We saw this shard already, enlarge the range (we know now->ring_range came from the same partition range;
|
|
||||||
// otherwise it would have had a unique now->element).
|
|
||||||
auto& rng = rng_ok.first->second.pr;
|
|
||||||
rng = nonwrapping_range<dht::ring_position>(std::move(rng.start()), std::move(now->ring_range.end()));
|
|
||||||
// This range is no longer ordered with respect to the others, so:
|
|
||||||
retain_shard_order = false;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
auto key_base = mutation_result_merger_key;
|
auto key_base = mutation_result_merger_key;
|
||||||
|
|
||||||
// prepare for next iteration
|
// prepare for next iteration
|
||||||
// Each iteration uses a merger key that is either i in the loop above (so in the range [0, shards_in_parallel),
|
// Each iteration uses a merger key that is either i in the loop above (so in the range [0, shards_in_parallel),
|
||||||
// or, the element index in prs (so in the range [0, partition_range_count). Make room for sufficient keys.
|
// or, the element index in prs (so in the range [0, partition_range_count). Make room for sufficient keys.
|
||||||
mutation_result_merger_key += std::max(shards_in_parallel, partition_range_count);
|
mutation_result_merger_key += std::max(smp::count, partition_range_count);
|
||||||
shards_in_parallel *= 2;
|
|
||||||
|
|
||||||
shard_cmd->partition_limit = cmd->partition_limit - mrm.partition_count();
|
shard_cmd->partition_limit = cmd->partition_limit - mrm.partition_count();
|
||||||
shard_cmd->row_limit = cmd->row_limit - mrm.row_count();
|
shard_cmd->row_limit = cmd->row_limit - mrm.row_count();
|
||||||
|
|||||||
@@ -71,7 +71,7 @@ public:
|
|||||||
private:
|
private:
|
||||||
struct rh_entry {
|
struct rh_entry {
|
||||||
::shared_ptr<abstract_write_response_handler> handler;
|
::shared_ptr<abstract_write_response_handler> handler;
|
||||||
timer<lowres_clock> expire_timer;
|
timer<clock_type> expire_timer;
|
||||||
rh_entry(::shared_ptr<abstract_write_response_handler>&& h, std::function<void()>&& cb);
|
rh_entry(::shared_ptr<abstract_write_response_handler>&& h, std::function<void()>&& cb);
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -253,7 +253,7 @@ private:
|
|||||||
dht::partition_range_vector get_restricted_ranges(keyspace& ks, const schema& s, dht::partition_range range);
|
dht::partition_range_vector get_restricted_ranges(keyspace& ks, const schema& s, dht::partition_range range);
|
||||||
float estimate_result_rows_per_range(lw_shared_ptr<query::read_command> cmd, keyspace& ks);
|
float estimate_result_rows_per_range(lw_shared_ptr<query::read_command> cmd, keyspace& ks);
|
||||||
static std::vector<gms::inet_address> intersection(const std::vector<gms::inet_address>& l1, const std::vector<gms::inet_address>& l2);
|
static std::vector<gms::inet_address> intersection(const std::vector<gms::inet_address>& l1, const std::vector<gms::inet_address>& l2);
|
||||||
future<std::vector<foreign_ptr<lw_shared_ptr<query::result>>>> query_partition_key_range_concurrent(lowres_clock::time_point timeout,
|
future<std::vector<foreign_ptr<lw_shared_ptr<query::result>>>> query_partition_key_range_concurrent(clock_type::time_point timeout,
|
||||||
std::vector<foreign_ptr<lw_shared_ptr<query::result>>>&& results, lw_shared_ptr<query::read_command> cmd, db::consistency_level cl, dht::partition_range_vector::iterator&& i,
|
std::vector<foreign_ptr<lw_shared_ptr<query::result>>>&& results, lw_shared_ptr<query::read_command> cmd, db::consistency_level cl, dht::partition_range_vector::iterator&& i,
|
||||||
dht::partition_range_vector&& ranges, int concurrency_factor, tracing::trace_state_ptr trace_state,
|
dht::partition_range_vector&& ranges, int concurrency_factor, tracing::trace_state_ptr trace_state,
|
||||||
uint32_t remaining_row_count, uint32_t remaining_partition_count);
|
uint32_t remaining_row_count, uint32_t remaining_partition_count);
|
||||||
|
|||||||
@@ -84,6 +84,7 @@ static const sstring RANGE_TOMBSTONES_FEATURE = "RANGE_TOMBSTONES";
|
|||||||
static const sstring LARGE_PARTITIONS_FEATURE = "LARGE_PARTITIONS";
|
static const sstring LARGE_PARTITIONS_FEATURE = "LARGE_PARTITIONS";
|
||||||
static const sstring MATERIALIZED_VIEWS_FEATURE = "MATERIALIZED_VIEWS";
|
static const sstring MATERIALIZED_VIEWS_FEATURE = "MATERIALIZED_VIEWS";
|
||||||
static const sstring COUNTERS_FEATURE = "COUNTERS";
|
static const sstring COUNTERS_FEATURE = "COUNTERS";
|
||||||
|
static const sstring CORRECT_COUNTER_ORDER_FEATURE = "CORRECT_COUNTER_ORDER";
|
||||||
|
|
||||||
distributed<storage_service> _the_storage_service;
|
distributed<storage_service> _the_storage_service;
|
||||||
|
|
||||||
@@ -123,6 +124,7 @@ sstring storage_service::get_config_supported_features() {
|
|||||||
std::vector<sstring> features = {
|
std::vector<sstring> features = {
|
||||||
RANGE_TOMBSTONES_FEATURE,
|
RANGE_TOMBSTONES_FEATURE,
|
||||||
LARGE_PARTITIONS_FEATURE,
|
LARGE_PARTITIONS_FEATURE,
|
||||||
|
CORRECT_COUNTER_ORDER_FEATURE,
|
||||||
};
|
};
|
||||||
if (service::get_local_storage_service()._db.local().get_config().experimental()) {
|
if (service::get_local_storage_service()._db.local().get_config().experimental()) {
|
||||||
features.push_back(MATERIALIZED_VIEWS_FEATURE);
|
features.push_back(MATERIALIZED_VIEWS_FEATURE);
|
||||||
@@ -476,16 +478,6 @@ void storage_service::join_token_ring(int delay) {
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (!_is_survey_mode) {
|
if (!_is_survey_mode) {
|
||||||
// We have to create the system_auth and system_traces keyspaces and
|
|
||||||
// their tables before Node moves to the NORMAL state so that other
|
|
||||||
// Nodes joining the newly created cluster and serializing on this event
|
|
||||||
// "see" these new objects and don't try to create them.
|
|
||||||
//
|
|
||||||
// Otherwise there is a high chance to hit the issue #420.
|
|
||||||
auth::auth::setup().get();
|
|
||||||
supervisor::notify("starting tracing");
|
|
||||||
tracing::tracing::start_tracing().get();
|
|
||||||
|
|
||||||
// start participating in the ring.
|
// start participating in the ring.
|
||||||
db::system_keyspace::set_bootstrap_state(db::system_keyspace::bootstrap_state::COMPLETED).get();
|
db::system_keyspace::set_bootstrap_state(db::system_keyspace::bootstrap_state::COMPLETED).get();
|
||||||
set_tokens(_bootstrap_tokens);
|
set_tokens(_bootstrap_tokens);
|
||||||
@@ -501,6 +493,9 @@ void storage_service::join_token_ring(int delay) {
|
|||||||
logger.error(err.c_str());
|
logger.error(err.c_str());
|
||||||
throw std::runtime_error(err);
|
throw std::runtime_error(err);
|
||||||
}
|
}
|
||||||
|
auth::auth::setup().get();
|
||||||
|
supervisor::notify("starting tracing");
|
||||||
|
tracing::tracing::start_tracing().get();
|
||||||
} else {
|
} else {
|
||||||
logger.info("Startup complete, but write survey mode is active, not becoming an active ring member. Use JMX (StorageService->joinRing()) to finalize ring joining.");
|
logger.info("Startup complete, but write survey mode is active, not becoming an active ring member. Use JMX (StorageService->joinRing()) to finalize ring joining.");
|
||||||
}
|
}
|
||||||
@@ -1348,6 +1343,7 @@ future<> storage_service::init_server(int delay) {
|
|||||||
get_storage_service().invoke_on_all([] (auto& ss) {
|
get_storage_service().invoke_on_all([] (auto& ss) {
|
||||||
ss._range_tombstones_feature = gms::feature(RANGE_TOMBSTONES_FEATURE);
|
ss._range_tombstones_feature = gms::feature(RANGE_TOMBSTONES_FEATURE);
|
||||||
ss._large_partitions_feature = gms::feature(LARGE_PARTITIONS_FEATURE);
|
ss._large_partitions_feature = gms::feature(LARGE_PARTITIONS_FEATURE);
|
||||||
|
ss._correct_counter_order_feature = gms::feature(CORRECT_COUNTER_ORDER_FEATURE);
|
||||||
|
|
||||||
if (ss._db.local().get_config().experimental()) {
|
if (ss._db.local().get_config().experimental()) {
|
||||||
ss._materialized_views_feature = gms::feature(MATERIALIZED_VIEWS_FEATURE);
|
ss._materialized_views_feature = gms::feature(MATERIALIZED_VIEWS_FEATURE);
|
||||||
|
|||||||
@@ -262,6 +262,7 @@ private:
|
|||||||
gms::feature _large_partitions_feature;
|
gms::feature _large_partitions_feature;
|
||||||
gms::feature _materialized_views_feature;
|
gms::feature _materialized_views_feature;
|
||||||
gms::feature _counters_feature;
|
gms::feature _counters_feature;
|
||||||
|
gms::feature _correct_counter_order_feature;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
void enable_all_features() {
|
void enable_all_features() {
|
||||||
@@ -269,6 +270,7 @@ public:
|
|||||||
_large_partitions_feature.enable();
|
_large_partitions_feature.enable();
|
||||||
_materialized_views_feature.enable();
|
_materialized_views_feature.enable();
|
||||||
_counters_feature.enable();
|
_counters_feature.enable();
|
||||||
|
_correct_counter_order_feature.enable();
|
||||||
}
|
}
|
||||||
|
|
||||||
void finish_bootstrapping() {
|
void finish_bootstrapping() {
|
||||||
@@ -2230,6 +2232,10 @@ public:
|
|||||||
bool cluster_supports_counters() const {
|
bool cluster_supports_counters() const {
|
||||||
return bool(_counters_feature);
|
return bool(_counters_feature);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool cluster_supports_correct_counter_order() const {
|
||||||
|
return bool(_correct_counter_order_feature);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
inline future<> init_storage_service(distributed<database>& db) {
|
inline future<> init_storage_service(distributed<database>& db) {
|
||||||
|
|||||||
@@ -31,9 +31,9 @@ class sstable_range_wrapping_reader final : public mutation_reader::impl {
|
|||||||
public:
|
public:
|
||||||
sstable_range_wrapping_reader(lw_shared_ptr<sstables::sstable> sst,
|
sstable_range_wrapping_reader(lw_shared_ptr<sstables::sstable> sst,
|
||||||
schema_ptr s, const dht::partition_range& pr, const query::partition_slice& slice,
|
schema_ptr s, const dht::partition_range& pr, const query::partition_slice& slice,
|
||||||
const io_priority_class& pc)
|
const io_priority_class& pc, mutation_reader::forwarding fwd_mr)
|
||||||
: _sst(sst)
|
: _sst(sst)
|
||||||
, _smr(sst->read_range_rows(std::move(s), pr, slice, pc)) {
|
, _smr(sst->read_range_rows(std::move(s), pr, slice, pc, fwd_mr)) {
|
||||||
}
|
}
|
||||||
virtual future<streamed_mutation_opt> operator()() override {
|
virtual future<streamed_mutation_opt> operator()() override {
|
||||||
return _smr.read();
|
return _smr.read();
|
||||||
|
|||||||
@@ -47,6 +47,7 @@
|
|||||||
#include <boost/range/algorithm.hpp>
|
#include <boost/range/algorithm.hpp>
|
||||||
#include <boost/range/adaptors.hpp>
|
#include <boost/range/adaptors.hpp>
|
||||||
#include <boost/range/join.hpp>
|
#include <boost/range/join.hpp>
|
||||||
|
#include <boost/algorithm/cxx11/any_of.hpp>
|
||||||
|
|
||||||
#include "core/future-util.hh"
|
#include "core/future-util.hh"
|
||||||
#include "core/pipe.hh"
|
#include "core/pipe.hh"
|
||||||
@@ -382,11 +383,22 @@ get_fully_expired_sstables(column_family& cf, std::vector<sstables::shared_sstab
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
auto compacted_undeleted_gens = boost::copy_range<std::unordered_set<int64_t>>(cf.compacted_undeleted_sstables()
|
||||||
|
| boost::adaptors::transformed(std::mem_fn(&sstables::sstable::generation)));
|
||||||
|
auto has_undeleted_ancestor = [&compacted_undeleted_gens] (auto& candidate) {
|
||||||
|
return boost::algorithm::any_of(candidate->ancestors(), [&compacted_undeleted_gens] (auto gen) {
|
||||||
|
return compacted_undeleted_gens.count(gen);
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
// SStables that do not contain live data is added to list of possibly expired sstables.
|
// SStables that do not contain live data is added to list of possibly expired sstables.
|
||||||
for (auto& candidate : compacting) {
|
for (auto& candidate : compacting) {
|
||||||
logger.debug("Checking if candidate of generation {} and max_deletion_time {} is expired, gc_before is {}",
|
logger.debug("Checking if candidate of generation {} and max_deletion_time {} is expired, gc_before is {}",
|
||||||
candidate->generation(), candidate->get_stats_metadata().max_local_deletion_time, gc_before);
|
candidate->generation(), candidate->get_stats_metadata().max_local_deletion_time, gc_before);
|
||||||
if (candidate->get_stats_metadata().max_local_deletion_time < gc_before) {
|
// A fully expired sstable which has an ancestor undeleted shouldn't be compacted because
|
||||||
|
// expired data won't be purged because undeleted sstables are taken into account when
|
||||||
|
// calculating max purgeable timestamp, and not doing it could lead to a compaction loop.
|
||||||
|
if (candidate->get_stats_metadata().max_local_deletion_time < gc_before && !has_undeleted_ancestor(candidate)) {
|
||||||
logger.debug("Adding candidate of generation {} to list of possibly expired sstables", candidate->generation());
|
logger.debug("Adding candidate of generation {} to list of possibly expired sstables", candidate->generation());
|
||||||
candidates.push_back(candidate);
|
candidates.push_back(candidate);
|
||||||
} else {
|
} else {
|
||||||
|
|||||||
@@ -242,11 +242,12 @@ void compaction_manager::submit_sstable_rewrite(column_family* cf, sstables::sha
|
|||||||
// sstable we are planning to work on:
|
// sstable we are planning to work on:
|
||||||
_compacting_sstables.insert(sst);
|
_compacting_sstables.insert(sst);
|
||||||
auto task = make_lw_shared<compaction_manager::task>();
|
auto task = make_lw_shared<compaction_manager::task>();
|
||||||
|
task->compacting_cf = cf;
|
||||||
_tasks.push_back(task);
|
_tasks.push_back(task);
|
||||||
task->compaction_done = with_semaphore(sem, 1, [this, cf, sst] {
|
task->compaction_done = with_semaphore(sem, 1, [this, task, cf, sst] {
|
||||||
_stats.active_tasks++;
|
_stats.active_tasks++;
|
||||||
if (_stopped) {
|
if (!can_proceed(task)) {
|
||||||
return make_ready_future<>();;
|
return make_ready_future<>();
|
||||||
}
|
}
|
||||||
return cf->compact_sstables(sstables::compaction_descriptor(
|
return cf->compact_sstables(sstables::compaction_descriptor(
|
||||||
std::vector<sstables::shared_sstable>{sst},
|
std::vector<sstables::shared_sstable>{sst},
|
||||||
@@ -462,6 +463,14 @@ future<> compaction_manager::perform_cleanup(column_family* cf) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
future<> compaction_manager::remove(column_family* cf) {
|
future<> compaction_manager::remove(column_family* cf) {
|
||||||
|
// FIXME: better way to iterate through compaction info for a given column family,
|
||||||
|
// although this path isn't performance sensitive.
|
||||||
|
for (auto& info : _compactions) {
|
||||||
|
if (cf->schema()->ks_name() == info->ks && cf->schema()->cf_name() == info->cf) {
|
||||||
|
info->stop("column family removal");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// We need to guarantee that a task being stopped will not retry to compact
|
// We need to guarantee that a task being stopped will not retry to compact
|
||||||
// a column family being removed.
|
// a column family being removed.
|
||||||
auto tasks_to_stop = make_lw_shared<std::vector<lw_shared_ptr<task>>>();
|
auto tasks_to_stop = make_lw_shared<std::vector<lw_shared_ptr<task>>>();
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user