Compare commits
100 Commits
debug_form
...
scylla-1.7
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
dbbf99d7fa | ||
|
|
f7a143e7be | ||
|
|
562102cc76 | ||
|
|
d4b444418a | ||
|
|
befd4c9819 | ||
|
|
eb2fe0fbd3 | ||
|
|
eb6b0b1267 | ||
|
|
7836600ded | ||
|
|
230c33da49 | ||
|
|
17d8a0c727 | ||
|
|
064de6f8de | ||
|
|
df56c108b7 | ||
|
|
25607ab9df | ||
|
|
b26bd8bbeb | ||
|
|
1ca7f5458b | ||
|
|
50c8a08e91 | ||
|
|
9d1b9084ed | ||
|
|
e2c75d8532 | ||
|
|
59063f4891 | ||
|
|
de79792373 | ||
|
|
3557b449ac | ||
|
|
a8e89d624a | ||
|
|
31cd6914a8 | ||
|
|
a441f889c3 | ||
|
|
91b7cb8576 | ||
|
|
2b17c4aacf | ||
|
|
f61d9ac632 | ||
|
|
fc9db8bb03 | ||
|
|
bd67d23927 | ||
|
|
bdeeebbd74 | ||
|
|
a1cb29e7ec | ||
|
|
e8369644fd | ||
|
|
a36cabdb30 | ||
|
|
1d26fab73e | ||
|
|
5f0c635da7 | ||
|
|
82cc3d7aa5 | ||
|
|
98d782cfe1 | ||
|
|
ea0591ad3d | ||
|
|
7eedd743bf | ||
|
|
8a21961ec9 | ||
|
|
08698d9030 | ||
|
|
df5a291c63 | ||
|
|
1a77312aec | ||
|
|
ea684c9a3e | ||
|
|
2df7c80c66 | ||
|
|
193b5d1782 | ||
|
|
6609c9accb | ||
|
|
2f107d3f61 | ||
|
|
dd9afa4c93 | ||
|
|
4021e2befb | ||
|
|
9b26a57288 | ||
|
|
31b5ef13c2 | ||
|
|
4bbee01288 | ||
|
|
3cc03f88fd | ||
|
|
4179d8f7c4 | ||
|
|
c20ddaf5af | ||
|
|
29dd48621b | ||
|
|
87de77a5ea | ||
|
|
66c4dcba8e | ||
|
|
7cfdc08af9 | ||
|
|
fdbe5caf41 | ||
|
|
522e62089b | ||
|
|
699648d5a1 | ||
|
|
698a4e62d9 | ||
|
|
63bec22d28 | ||
|
|
3d14e6e802 | ||
|
|
ea4a2dad96 | ||
|
|
655e6197cb | ||
|
|
1a1370d33e | ||
|
|
7f17424a4e | ||
|
|
dd56f1bec7 | ||
|
|
5df61797d6 | ||
|
|
b6db9e3d51 | ||
|
|
f2595bea85 | ||
|
|
e930ef0ee0 | ||
|
|
4cf0f88724 | ||
|
|
372f07b06e | ||
|
|
0ccc6630a8 | ||
|
|
b95a2338be | ||
|
|
f2d0ac9994 | ||
|
|
56725de0db | ||
|
|
6f479c8999 | ||
|
|
8c0488bce9 | ||
|
|
68dd11e275 | ||
|
|
a64c53d05f | ||
|
|
42e7a59cca | ||
|
|
2cd019ee47 | ||
|
|
bc8b553bec | ||
|
|
0ba98be899 | ||
|
|
d6899134a7 | ||
|
|
5253031110 | ||
|
|
a203c87f0d | ||
|
|
37fc0e6840 | ||
|
|
0429e5d8ea | ||
|
|
3c147437ac | ||
|
|
e4b3f02286 | ||
|
|
5a8013e155 | ||
|
|
fdba5b8eac | ||
|
|
558a52802a | ||
|
|
4f416c7272 |
2
.gitmodules
vendored
2
.gitmodules
vendored
@@ -1,6 +1,6 @@
|
||||
[submodule "seastar"]
|
||||
path = seastar
|
||||
url = ../seastar
|
||||
url = ../scylla-seastar
|
||||
ignore = dirty
|
||||
[submodule "swagger-ui"]
|
||||
path = swagger-ui
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
#!/bin/sh
|
||||
|
||||
VERSION=666.development
|
||||
VERSION=1.7.1
|
||||
|
||||
if test -f version
|
||||
then
|
||||
|
||||
@@ -246,7 +246,8 @@ future<> auth::auth::setup() {
|
||||
std::map<sstring, sstring> opts;
|
||||
opts["replication_factor"] = "1";
|
||||
auto ksm = keyspace_metadata::new_keyspace(AUTH_KS, "org.apache.cassandra.locator.SimpleStrategy", opts, true);
|
||||
f = service::get_local_migration_manager().announce_new_keyspace(ksm, false);
|
||||
// We use min_timestamp so that default keyspace metadata will loose with any manual adjustments. See issue #2129.
|
||||
f = service::get_local_migration_manager().announce_new_keyspace(ksm, api::min_timestamp, false);
|
||||
}
|
||||
|
||||
return f.then([] {
|
||||
|
||||
@@ -22,13 +22,28 @@
|
||||
#pragma once
|
||||
|
||||
#include <boost/intrusive/unordered_set.hpp>
|
||||
|
||||
#if __has_include(<boost/container/small_vector.hpp>)
|
||||
|
||||
#include <boost/container/small_vector.hpp>
|
||||
|
||||
template <typename T, size_t N>
|
||||
using small_vector = boost::container::small_vector<T, N>;
|
||||
|
||||
#else
|
||||
|
||||
#include <vector>
|
||||
template <typename T, size_t N>
|
||||
using small_vector = std::vector<T>;
|
||||
|
||||
#endif
|
||||
|
||||
#include "fnv1a_hasher.hh"
|
||||
#include "streamed_mutation.hh"
|
||||
#include "mutation_partition.hh"
|
||||
|
||||
class cells_range {
|
||||
using ids_vector_type = boost::container::small_vector<column_id, 5>;
|
||||
using ids_vector_type = small_vector<column_id, 5>;
|
||||
|
||||
position_in_partition_view _position;
|
||||
ids_vector_type _ids;
|
||||
@@ -147,7 +162,7 @@ class cell_locker {
|
||||
// temporarily removed from its parent partition_entry.
|
||||
// Returns true if the cell_entry still exist in the new schema and
|
||||
// should be reinserted.
|
||||
bool upgrade(const schema& from, const schema& to, column_kind kind) {
|
||||
bool upgrade(const schema& from, const schema& to, column_kind kind) noexcept {
|
||||
auto& old_column_mapping = from.get_column_mapping();
|
||||
auto& column = old_column_mapping.column_at(kind, _address.id);
|
||||
auto cdef = to.get_column_definition(column.name());
|
||||
@@ -170,7 +185,9 @@ class cell_locker {
|
||||
}
|
||||
|
||||
~cell_entry() {
|
||||
assert(is_linked());
|
||||
if (!is_linked()) {
|
||||
return;
|
||||
}
|
||||
unlink();
|
||||
if (!--_parent._cell_count) {
|
||||
delete &_parent;
|
||||
@@ -286,10 +303,9 @@ class cell_locker {
|
||||
};
|
||||
|
||||
class equal_compare {
|
||||
schema_ptr _schema;
|
||||
dht::decorated_key_equals_comparator _cmp;
|
||||
public:
|
||||
explicit equal_compare(const schema s) : _cmp(s) { }
|
||||
explicit equal_compare(const schema& s) : _cmp(s) { }
|
||||
bool operator()(const dht::decorated_key& dk, const partition_entry& pe) {
|
||||
return _cmp(dk, pe._key);
|
||||
}
|
||||
@@ -386,22 +402,19 @@ struct cell_locker::locker {
|
||||
|
||||
partition_cells_range _range;
|
||||
partition_cells_range::iterator _current_ck;
|
||||
cells_range _cells_range;
|
||||
cells_range::const_iterator _current_cell;
|
||||
|
||||
std::vector<locked_cell> _locks;
|
||||
private:
|
||||
void update_ck() {
|
||||
if (!is_done()) {
|
||||
_cells_range = *_current_ck;
|
||||
_current_cell = _cells_range.begin();
|
||||
_current_cell = _current_ck->begin();
|
||||
}
|
||||
}
|
||||
|
||||
future<> lock_next();
|
||||
|
||||
bool is_done() const { return _current_ck == _range.end(); }
|
||||
std::vector<locked_cell> get() && { return std::move(_locks); }
|
||||
public:
|
||||
explicit locker(const ::schema& s, partition_entry& pe, partition_cells_range&& range)
|
||||
: _hasher(s)
|
||||
@@ -413,18 +426,22 @@ public:
|
||||
update_ck();
|
||||
}
|
||||
|
||||
future<std::vector<locked_cell>> lock_all() && {
|
||||
locker(const locker&) = delete;
|
||||
locker(locker&&) = delete;
|
||||
|
||||
future<> lock_all() {
|
||||
// Cannot defer before first call to lock_next().
|
||||
return lock_next().then([this] {
|
||||
return do_until([this] { return is_done(); }, [this] {
|
||||
return lock_next();
|
||||
}).then([&] {
|
||||
return std::move(*this).get();
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
std::vector<locked_cell> get() && { return std::move(_locks); }
|
||||
};
|
||||
|
||||
inline
|
||||
future<std::vector<locked_cell>> cell_locker::lock_cells(const dht::decorated_key& dk, partition_cells_range&& range) {
|
||||
partition_entry::hasher pe_hash;
|
||||
partition_entry::equal_compare pe_eq(*_schema);
|
||||
@@ -460,14 +477,17 @@ future<std::vector<locked_cell>> cell_locker::lock_cells(const dht::decorated_ke
|
||||
return make_ready_future<std::vector<locked_cell>>(std::move(locks));
|
||||
}
|
||||
|
||||
return do_with(locker(*_schema, *it, std::move(range)), [] (auto& locker) mutable {
|
||||
return std::move(locker).lock_all();
|
||||
auto l = std::make_unique<locker>(*_schema, *it, std::move(range));
|
||||
auto f = l->lock_all();
|
||||
return f.then([l = std::move(l)] {
|
||||
return std::move(*l).get();
|
||||
});
|
||||
}
|
||||
|
||||
inline
|
||||
future<> cell_locker::locker::lock_next() {
|
||||
while (!is_done()) {
|
||||
if (_current_cell == _cells_range.end() || _cells_range.empty()) {
|
||||
if (_current_cell == _current_ck->end()) {
|
||||
++_current_ck;
|
||||
update_ck();
|
||||
continue;
|
||||
@@ -475,7 +495,7 @@ future<> cell_locker::locker::lock_next() {
|
||||
|
||||
auto cid = *_current_cell++;
|
||||
|
||||
cell_address ca { position_in_partition(_cells_range.position()), cid };
|
||||
cell_address ca { position_in_partition(_current_ck->position()), cid };
|
||||
auto it = _partition_entry.cells().find(ca, _hasher, _eq_cmp);
|
||||
if (it != _partition_entry.cells().end()) {
|
||||
return it->lock().then([this, ce = it->shared_from_this()] () mutable {
|
||||
@@ -483,27 +503,25 @@ future<> cell_locker::locker::lock_next() {
|
||||
});
|
||||
}
|
||||
|
||||
auto cell = make_lw_shared<cell_entry>(_partition_entry, position_in_partition(_cells_range.position()), cid);
|
||||
auto cell = make_lw_shared<cell_entry>(_partition_entry, position_in_partition(_current_ck->position()), cid);
|
||||
_partition_entry.insert(cell);
|
||||
_locks.emplace_back(std::move(cell));
|
||||
}
|
||||
return make_ready_future<>();
|
||||
}
|
||||
|
||||
inline
|
||||
bool cell_locker::partition_entry::upgrade(schema_ptr new_schema) {
|
||||
if (_schema == new_schema) {
|
||||
return true;
|
||||
}
|
||||
|
||||
auto buckets = std::make_unique<cells_type::bucket_type[]>(initial_bucket_count);
|
||||
auto buckets = std::make_unique<cells_type::bucket_type[]>(_cells.bucket_count());
|
||||
auto cells = cells_type(cells_type::bucket_traits(buckets.get(), _cells.bucket_count()),
|
||||
cell_entry::hasher(*new_schema), cell_entry::equal_compare(*new_schema));
|
||||
|
||||
while (!_cells.empty()) {
|
||||
auto it = _cells.begin();
|
||||
auto& cell = *it;
|
||||
_cells.erase(it);
|
||||
|
||||
_cells.clear_and_dispose([&] (cell_entry* cell_ptr) noexcept {
|
||||
auto& cell = *cell_ptr;
|
||||
auto kind = cell.position().is_static_row() ? column_kind::static_column
|
||||
: column_kind::regular_column;
|
||||
auto reinsert = cell.upgrade(*_schema, *new_schema, kind);
|
||||
@@ -512,9 +530,16 @@ bool cell_locker::partition_entry::upgrade(schema_ptr new_schema) {
|
||||
} else {
|
||||
_cell_count--;
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// bi::unordered_set move assignment is actually a swap.
|
||||
// Original _buckets cannot be destroyed before the container using them is
|
||||
// so we need to explicitly make sure that the original _cells is no more.
|
||||
_cells = std::move(cells);
|
||||
auto destroy = [] (auto) { };
|
||||
destroy(std::move(cells));
|
||||
|
||||
_buckets = std::move(buckets);
|
||||
_schema = new_schema;
|
||||
return _cell_count;
|
||||
}
|
||||
|
||||
@@ -788,3 +788,23 @@ commitlog_total_space_in_mb: -1
|
||||
# By default, Scylla binds all interfaces to the prometheus API
|
||||
# It is possible to restrict the listening address to a specific one
|
||||
# prometheus_address: 0.0.0.0
|
||||
|
||||
# Distribution of data among cores (shards) within a node
|
||||
#
|
||||
# Scylla distributes data within a node among shards, using a round-robin
|
||||
# strategy:
|
||||
# [shard0] [shard1] ... [shardN-1] [shard0] [shard1] ... [shardN-1] ...
|
||||
#
|
||||
# Scylla versions 1.6 and below used just one repetition of the pattern;
|
||||
# this intefered with data placement among nodes (vnodes).
|
||||
#
|
||||
# Scylla versions 1.7 and above use 4096 repetitions of the pattern; this
|
||||
# provides for better data distribution.
|
||||
#
|
||||
# the value below is log (base 2) of the number of repetitions.
|
||||
#
|
||||
# Set to 0 to avoid rewriting all data when upgrading from Scylla 1.6 and
|
||||
# below.
|
||||
#
|
||||
# Keep at 12 for new clusters.
|
||||
murmur3_partitioner_ignore_msb_bits: 12
|
||||
|
||||
@@ -230,6 +230,7 @@ scylla_tests = [
|
||||
'tests/virtual_reader_test',
|
||||
'tests/view_schema_test',
|
||||
'tests/counter_test',
|
||||
'tests/cell_locker_test',
|
||||
]
|
||||
|
||||
apps = [
|
||||
@@ -408,6 +409,7 @@ scylla_core = (['database.cc',
|
||||
'cql3/selection/selector.cc',
|
||||
'cql3/restrictions/statement_restrictions.cc',
|
||||
'cql3/result_set.cc',
|
||||
'cql3/variable_specifications.cc',
|
||||
'db/consistency_level.cc',
|
||||
'db/system_keyspace.cc',
|
||||
'db/schema_tables.cc',
|
||||
|
||||
41
counters.cc
41
counters.cc
@@ -139,8 +139,8 @@ stdx::optional<atomic_cell> counter_cell_view::difference(atomic_cell_view a, at
|
||||
void transform_counter_updates_to_shards(mutation& m, const mutation* current_state, uint64_t clock_offset) {
|
||||
// FIXME: allow current_state to be frozen_mutation
|
||||
|
||||
auto transform_new_row_to_shards = [clock_offset] (auto& cr) {
|
||||
cr.row().cells().for_each_cell([clock_offset] (auto, atomic_cell_or_collection& ac_o_c) {
|
||||
auto transform_new_row_to_shards = [clock_offset] (auto& cells) {
|
||||
cells.for_each_cell([clock_offset] (auto, atomic_cell_or_collection& ac_o_c) {
|
||||
auto acv = ac_o_c.as_atomic_cell();
|
||||
if (!acv.is_live()) {
|
||||
return; // continue -- we are in lambda
|
||||
@@ -153,32 +153,22 @@ void transform_counter_updates_to_shards(mutation& m, const mutation* current_st
|
||||
};
|
||||
|
||||
if (!current_state) {
|
||||
transform_new_row_to_shards(m.partition().static_row());
|
||||
for (auto& cr : m.partition().clustered_rows()) {
|
||||
transform_new_row_to_shards(cr);
|
||||
transform_new_row_to_shards(cr.row().cells());
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
clustering_key::less_compare cmp(*m.schema());
|
||||
|
||||
auto& cstate = current_state->partition();
|
||||
auto it = cstate.clustered_rows().begin();
|
||||
auto end = cstate.clustered_rows().end();
|
||||
for (auto& cr : m.partition().clustered_rows()) {
|
||||
while (it != end && cmp(it->key(), cr.key())) {
|
||||
++it;
|
||||
}
|
||||
if (it == end || cmp(cr.key(), it->key())) {
|
||||
transform_new_row_to_shards(cr);
|
||||
continue;
|
||||
}
|
||||
|
||||
auto transform_row_to_shards = [clock_offset] (auto& transformee, auto& state) {
|
||||
struct counter_shard_or_tombstone {
|
||||
stdx::optional<counter_shard> shard;
|
||||
tombstone tomb;
|
||||
};
|
||||
std::deque<std::pair<column_id, counter_shard_or_tombstone>> shards;
|
||||
it->row().cells().for_each_cell([&] (column_id id, const atomic_cell_or_collection& ac_o_c) {
|
||||
state.for_each_cell([&] (column_id id, const atomic_cell_or_collection& ac_o_c) {
|
||||
auto acv = ac_o_c.as_atomic_cell();
|
||||
if (!acv.is_live()) {
|
||||
counter_shard_or_tombstone cs_o_t { { },
|
||||
@@ -194,7 +184,7 @@ void transform_counter_updates_to_shards(mutation& m, const mutation* current_st
|
||||
shards.emplace_back(std::make_pair(id, counter_shard_or_tombstone { counter_shard(*cs), tombstone() }));
|
||||
});
|
||||
|
||||
cr.row().cells().for_each_cell([&] (column_id id, atomic_cell_or_collection& ac_o_c) {
|
||||
transformee.for_each_cell([&] (column_id id, atomic_cell_or_collection& ac_o_c) {
|
||||
auto acv = ac_o_c.as_atomic_cell();
|
||||
if (!acv.is_live()) {
|
||||
return; // continue -- we are in lambda
|
||||
@@ -224,5 +214,22 @@ void transform_counter_updates_to_shards(mutation& m, const mutation* current_st
|
||||
}
|
||||
ac_o_c = ccb.build(acv.timestamp());
|
||||
});
|
||||
};
|
||||
|
||||
transform_row_to_shards(m.partition().static_row(), current_state->partition().static_row());
|
||||
|
||||
auto& cstate = current_state->partition();
|
||||
auto it = cstate.clustered_rows().begin();
|
||||
auto end = cstate.clustered_rows().end();
|
||||
for (auto& cr : m.partition().clustered_rows()) {
|
||||
while (it != end && cmp(it->key(), cr.key())) {
|
||||
++it;
|
||||
}
|
||||
if (it == end || cmp(cr.key(), it->key())) {
|
||||
transform_new_row_to_shards(cr.row().cells());
|
||||
continue;
|
||||
}
|
||||
|
||||
transform_row_to_shards(cr.row().cells(), it->row().cells());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -67,6 +67,14 @@ functions::init() {
|
||||
declare(aggregate_fcts::make_max_function<int64_t>());
|
||||
declare(aggregate_fcts::make_min_function<int64_t>());
|
||||
|
||||
declare(aggregate_fcts::make_count_function<float>());
|
||||
declare(aggregate_fcts::make_max_function<float>());
|
||||
declare(aggregate_fcts::make_min_function<float>());
|
||||
|
||||
declare(aggregate_fcts::make_count_function<double>());
|
||||
declare(aggregate_fcts::make_max_function<double>());
|
||||
declare(aggregate_fcts::make_min_function<double>());
|
||||
|
||||
//FIXME:
|
||||
//declare(aggregate_fcts::make_count_function<bytes>());
|
||||
//declare(aggregate_fcts::make_max_function<bytes>());
|
||||
@@ -78,15 +86,17 @@ functions::init() {
|
||||
declare(make_blob_as_varchar_fct());
|
||||
declare(aggregate_fcts::make_sum_function<int32_t>());
|
||||
declare(aggregate_fcts::make_sum_function<int64_t>());
|
||||
declare(aggregate_fcts::make_avg_function<int32_t>());
|
||||
declare(aggregate_fcts::make_avg_function<int64_t>());
|
||||
declare(aggregate_fcts::make_sum_function<float>());
|
||||
declare(aggregate_fcts::make_sum_function<double>());
|
||||
#if 0
|
||||
declare(AggregateFcts.sumFunctionForFloat);
|
||||
declare(AggregateFcts.sumFunctionForDouble);
|
||||
declare(AggregateFcts.sumFunctionForDecimal);
|
||||
declare(AggregateFcts.sumFunctionForVarint);
|
||||
declare(AggregateFcts.avgFunctionForFloat);
|
||||
declare(AggregateFcts.avgFunctionForDouble);
|
||||
#endif
|
||||
declare(aggregate_fcts::make_avg_function<int32_t>());
|
||||
declare(aggregate_fcts::make_avg_function<int64_t>());
|
||||
declare(aggregate_fcts::make_avg_function<float>());
|
||||
declare(aggregate_fcts::make_avg_function<double>());
|
||||
#if 0
|
||||
declare(AggregateFcts.avgFunctionForVarint);
|
||||
declare(AggregateFcts.avgFunctionForDecimal);
|
||||
#endif
|
||||
|
||||
@@ -43,6 +43,7 @@
|
||||
#include "schema_builder.hh"
|
||||
#include "service/migration_manager.hh"
|
||||
#include "boost/range/adaptor/map.hpp"
|
||||
#include "stdx.hh"
|
||||
|
||||
namespace cql3 {
|
||||
|
||||
@@ -86,14 +87,14 @@ const sstring& alter_type_statement::keyspace() const
|
||||
return _name.get_keyspace();
|
||||
}
|
||||
|
||||
static int32_t get_idx_of_field(user_type type, shared_ptr<column_identifier> field)
|
||||
static stdx::optional<uint32_t> get_idx_of_field(user_type type, shared_ptr<column_identifier> field)
|
||||
{
|
||||
for (uint32_t i = 0; i < type->field_names().size(); ++i) {
|
||||
if (field->name() == type->field_names()[i]) {
|
||||
return i;
|
||||
return {i};
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
return {};
|
||||
}
|
||||
|
||||
void alter_type_statement::do_announce_migration(database& db, ::keyspace& ks, bool is_local_only)
|
||||
@@ -168,7 +169,7 @@ alter_type_statement::add_or_alter::add_or_alter(const ut_name& name, bool is_ad
|
||||
|
||||
user_type alter_type_statement::add_or_alter::do_add(database& db, user_type to_update) const
|
||||
{
|
||||
if (get_idx_of_field(to_update, _field_name) >= 0) {
|
||||
if (get_idx_of_field(to_update, _field_name)) {
|
||||
throw exceptions::invalid_request_exception(sprint("Cannot add new field %s to type %s: a field of the same name already exists", _field_name->name(), _name.to_string()));
|
||||
}
|
||||
|
||||
@@ -185,19 +186,19 @@ user_type alter_type_statement::add_or_alter::do_add(database& db, user_type to_
|
||||
|
||||
user_type alter_type_statement::add_or_alter::do_alter(database& db, user_type to_update) const
|
||||
{
|
||||
uint32_t idx = get_idx_of_field(to_update, _field_name);
|
||||
if (idx < 0) {
|
||||
stdx::optional<uint32_t> idx = get_idx_of_field(to_update, _field_name);
|
||||
if (!idx) {
|
||||
throw exceptions::invalid_request_exception(sprint("Unknown field %s in type %s", _field_name->name(), _name.to_string()));
|
||||
}
|
||||
|
||||
auto previous = to_update->field_types()[idx];
|
||||
auto previous = to_update->field_types()[*idx];
|
||||
auto new_type = _field_type->prepare(db, keyspace())->get_type();
|
||||
if (!new_type->is_compatible_with(*previous)) {
|
||||
throw exceptions::invalid_request_exception(sprint("Type %s in incompatible with previous type %s of field %s in user type %s", _field_type->to_string(), previous->as_cql3_type()->to_string(), _field_name->name(), _name.to_string()));
|
||||
}
|
||||
|
||||
std::vector<data_type> new_types(to_update->field_types());
|
||||
new_types[idx] = new_type;
|
||||
new_types[*idx] = new_type;
|
||||
return user_type_impl::get_instance(to_update->_keyspace, to_update->_name, to_update->field_names(), std::move(new_types));
|
||||
}
|
||||
|
||||
@@ -221,11 +222,11 @@ user_type alter_type_statement::renames::make_updated_type(database& db, user_ty
|
||||
std::vector<bytes> new_names(to_update->field_names());
|
||||
for (auto&& rename : _renames) {
|
||||
auto&& from = rename.first;
|
||||
int32_t idx = get_idx_of_field(to_update, from);
|
||||
if (idx < 0) {
|
||||
stdx::optional<uint32_t> idx = get_idx_of_field(to_update, from);
|
||||
if (!idx) {
|
||||
throw exceptions::invalid_request_exception(sprint("Unknown field %s in type %s", from->to_string(), _name.to_string()));
|
||||
}
|
||||
new_names[idx] = rename.second->name();
|
||||
new_names[*idx] = rename.second->name();
|
||||
}
|
||||
auto&& updated = user_type_impl::get_instance(to_update->_keyspace, to_update->_name, std::move(new_names), to_update->field_types());
|
||||
create_type_statement::check_for_duplicate_names(updated);
|
||||
|
||||
@@ -381,8 +381,18 @@ shared_ptr<prepared_statement>
|
||||
batch_statement::prepare(database& db, cql_stats& stats) {
|
||||
auto&& bound_names = get_bound_variables();
|
||||
|
||||
stdx::optional<sstring> first_ks;
|
||||
stdx::optional<sstring> first_cf;
|
||||
bool have_multiple_cfs = false;
|
||||
|
||||
std::vector<shared_ptr<cql3::statements::modification_statement>> statements;
|
||||
for (auto&& parsed : _parsed_statements) {
|
||||
if (!first_ks) {
|
||||
first_ks = parsed->keyspace();
|
||||
first_cf = parsed->column_family();
|
||||
} else {
|
||||
have_multiple_cfs = first_ks.value() != parsed->keyspace() || first_cf.value() != parsed->column_family();
|
||||
}
|
||||
statements.push_back(parsed->prepare(db, bound_names, stats));
|
||||
}
|
||||
|
||||
@@ -392,8 +402,13 @@ batch_statement::prepare(database& db, cql_stats& stats) {
|
||||
cql3::statements::batch_statement batch_statement_(bound_names->size(), _type, std::move(statements), std::move(prep_attrs), stats);
|
||||
batch_statement_.validate();
|
||||
|
||||
std::vector<uint16_t> partition_key_bind_indices;
|
||||
if (!have_multiple_cfs && batch_statement_.get_statements().size() > 0) {
|
||||
partition_key_bind_indices = bound_names->get_partition_key_bind_indexes(batch_statement_.get_statements()[0]->s);
|
||||
}
|
||||
return ::make_shared<prepared>(make_shared(std::move(batch_statement_)),
|
||||
bound_names->get_specifications());
|
||||
bound_names->get_specifications(),
|
||||
std::move(partition_key_bind_indices));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -597,9 +597,11 @@ namespace raw {
|
||||
|
||||
::shared_ptr<prepared_statement>
|
||||
modification_statement::modification_statement::prepare(database& db, cql_stats& stats) {
|
||||
schema_ptr schema = validation::validate_column_family(db, keyspace(), column_family());
|
||||
auto bound_names = get_bound_variables();
|
||||
auto statement = prepare(db, bound_names, stats);
|
||||
return ::make_shared<prepared>(std::move(statement), *bound_names);
|
||||
auto partition_key_bind_indices = bound_names->get_partition_key_bind_indexes(schema);
|
||||
return ::make_shared<prepared>(std::move(statement), *bound_names, std::move(partition_key_bind_indices));
|
||||
}
|
||||
|
||||
::shared_ptr<cql3::statements::modification_statement>
|
||||
|
||||
@@ -67,21 +67,22 @@ bool parsed_statement::uses_function(const sstring& ks_name, const sstring& func
|
||||
|
||||
}
|
||||
|
||||
prepared_statement::prepared_statement(::shared_ptr<cql_statement> statement_, std::vector<::shared_ptr<column_specification>> bound_names_)
|
||||
prepared_statement::prepared_statement(::shared_ptr<cql_statement> statement_, std::vector<::shared_ptr<column_specification>> bound_names_, std::vector<uint16_t> partition_key_bind_indices)
|
||||
: statement(std::move(statement_))
|
||||
, bound_names(std::move(bound_names_))
|
||||
, partition_key_bind_indices(std::move(partition_key_bind_indices))
|
||||
{ }
|
||||
|
||||
prepared_statement::prepared_statement(::shared_ptr<cql_statement> statement_, const variable_specifications& names)
|
||||
: prepared_statement(statement_, names.get_specifications())
|
||||
prepared_statement::prepared_statement(::shared_ptr<cql_statement> statement_, const variable_specifications& names, const std::vector<uint16_t>& partition_key_bind_indices)
|
||||
: prepared_statement(statement_, names.get_specifications(), partition_key_bind_indices)
|
||||
{ }
|
||||
|
||||
prepared_statement::prepared_statement(::shared_ptr<cql_statement> statement_, variable_specifications&& names)
|
||||
: prepared_statement(statement_, std::move(names).get_specifications())
|
||||
prepared_statement::prepared_statement(::shared_ptr<cql_statement> statement_, variable_specifications&& names, std::vector<uint16_t>&& partition_key_bind_indices)
|
||||
: prepared_statement(statement_, std::move(names).get_specifications(), std::move(partition_key_bind_indices))
|
||||
{ }
|
||||
|
||||
prepared_statement::prepared_statement(::shared_ptr<cql_statement>&& statement_)
|
||||
: prepared_statement(statement_, std::vector<::shared_ptr<column_specification>>())
|
||||
: prepared_statement(statement_, std::vector<::shared_ptr<column_specification>>(), std::vector<uint16_t>())
|
||||
{ }
|
||||
|
||||
}
|
||||
|
||||
@@ -60,12 +60,13 @@ public:
|
||||
sstring raw_cql_statement;
|
||||
const ::shared_ptr<cql_statement> statement;
|
||||
const std::vector<::shared_ptr<column_specification>> bound_names;
|
||||
std::vector<uint16_t> partition_key_bind_indices;
|
||||
|
||||
prepared_statement(::shared_ptr<cql_statement> statement_, std::vector<::shared_ptr<column_specification>> bound_names_);
|
||||
prepared_statement(::shared_ptr<cql_statement> statement_, std::vector<::shared_ptr<column_specification>> bound_names_, std::vector<uint16_t> partition_key_bind_indices);
|
||||
|
||||
prepared_statement(::shared_ptr<cql_statement> statement_, const variable_specifications& names);
|
||||
prepared_statement(::shared_ptr<cql_statement> statement_, const variable_specifications& names, const std::vector<uint16_t>& partition_key_bind_indices);
|
||||
|
||||
prepared_statement(::shared_ptr<cql_statement> statement_, variable_specifications&& names);
|
||||
prepared_statement(::shared_ptr<cql_statement> statement_, variable_specifications&& names, std::vector<uint16_t>&& partition_key_bind_indices);
|
||||
|
||||
prepared_statement(::shared_ptr<cql_statement>&& statement_);
|
||||
};
|
||||
|
||||
@@ -445,7 +445,9 @@ select_statement::select_statement(::shared_ptr<cf_name> cf_name,
|
||||
prepare_limit(db, bound_names),
|
||||
stats);
|
||||
|
||||
return ::make_shared<prepared>(std::move(stmt), std::move(*bound_names));
|
||||
auto partition_key_bind_indices = bound_names->get_partition_key_bind_indexes(schema);
|
||||
|
||||
return ::make_shared<prepared>(std::move(stmt), std::move(*bound_names), std::move(partition_key_bind_indices));
|
||||
}
|
||||
|
||||
::shared_ptr<restrictions::statement_restrictions>
|
||||
|
||||
98
cql3/variable_specifications.cc
Normal file
98
cql3/variable_specifications.cc
Normal file
@@ -0,0 +1,98 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (C) 2015 ScyllaDB
|
||||
*
|
||||
* Modified by ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file is part of Scylla.
|
||||
*
|
||||
* Scylla is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Scylla is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "cql3/variable_specifications.hh"
|
||||
|
||||
namespace cql3 {
|
||||
|
||||
variable_specifications::variable_specifications(const std::vector<::shared_ptr<column_identifier>>& variable_names)
|
||||
: _variable_names{variable_names}
|
||||
, _specs{variable_names.size()}
|
||||
, _target_columns{variable_names.size()}
|
||||
{ }
|
||||
|
||||
::shared_ptr<variable_specifications> variable_specifications::empty() {
|
||||
return ::make_shared<variable_specifications>(std::vector<::shared_ptr<column_identifier>>{});
|
||||
}
|
||||
|
||||
size_t variable_specifications::size() const {
|
||||
return _variable_names.size();
|
||||
}
|
||||
|
||||
std::vector<::shared_ptr<column_specification>> variable_specifications::get_specifications() const & {
|
||||
return std::vector<::shared_ptr<column_specification>>(_specs.begin(), _specs.end());
|
||||
}
|
||||
|
||||
std::vector<::shared_ptr<column_specification>> variable_specifications::get_specifications() && {
|
||||
return std::move(_specs);
|
||||
}
|
||||
|
||||
std::vector<uint16_t> variable_specifications::get_partition_key_bind_indexes(schema_ptr schema) const {
|
||||
auto count = schema->partition_key_columns().size();
|
||||
std::vector<uint16_t> partition_key_positions(count, uint16_t(0));
|
||||
std::vector<bool> set(count, false);
|
||||
for (size_t i = 0; i < _target_columns.size(); i++) {
|
||||
auto& target_column = _target_columns[i];
|
||||
const auto* cdef = schema->get_column_definition(target_column->name->name());
|
||||
if (cdef && cdef->is_partition_key()) {
|
||||
partition_key_positions[cdef->position()] = i;
|
||||
set[cdef->position()] = true;
|
||||
}
|
||||
}
|
||||
for (bool b : set) {
|
||||
if (!b) {
|
||||
return {};
|
||||
}
|
||||
}
|
||||
return partition_key_positions;
|
||||
}
|
||||
|
||||
void variable_specifications::add(int32_t bind_index, ::shared_ptr<column_specification> spec) {
|
||||
_target_columns[bind_index] = spec;
|
||||
auto name = _variable_names[bind_index];
|
||||
// Use the user name, if there is one
|
||||
if (name) {
|
||||
spec = ::make_shared<column_specification>(spec->ks_name, spec->cf_name, name, spec->type);
|
||||
}
|
||||
_specs[bind_index] = spec;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -53,41 +53,26 @@ class variable_specifications final {
|
||||
private:
|
||||
std::vector<shared_ptr<column_identifier>> _variable_names;
|
||||
std::vector<::shared_ptr<column_specification>> _specs;
|
||||
std::vector<::shared_ptr<column_specification>> _target_columns;
|
||||
|
||||
public:
|
||||
variable_specifications(const std::vector<::shared_ptr<column_identifier>>& variable_names)
|
||||
: _variable_names{variable_names}
|
||||
, _specs{variable_names.size()}
|
||||
{ }
|
||||
variable_specifications(const std::vector<::shared_ptr<column_identifier>>& variable_names);
|
||||
|
||||
/**
|
||||
* Returns an empty instance of <code>VariableSpecifications</code>.
|
||||
* @return an empty instance of <code>VariableSpecifications</code>
|
||||
*/
|
||||
static ::shared_ptr<variable_specifications> empty() {
|
||||
return ::make_shared<variable_specifications>(std::vector<::shared_ptr<column_identifier>>{});
|
||||
}
|
||||
static ::shared_ptr<variable_specifications> empty();
|
||||
|
||||
size_t size() const {
|
||||
return _variable_names.size();
|
||||
}
|
||||
size_t size() const;
|
||||
|
||||
std::vector<::shared_ptr<column_specification>> get_specifications() const & {
|
||||
return std::vector<::shared_ptr<column_specification>>(_specs.begin(), _specs.end());
|
||||
}
|
||||
std::vector<::shared_ptr<column_specification>> get_specifications() const &;
|
||||
|
||||
std::vector<::shared_ptr<column_specification>> get_specifications() && {
|
||||
return std::move(_specs);
|
||||
}
|
||||
std::vector<::shared_ptr<column_specification>> get_specifications() &&;
|
||||
|
||||
void add(int32_t bind_index, ::shared_ptr<column_specification> spec) {
|
||||
auto name = _variable_names[bind_index];
|
||||
// Use the user name, if there is one
|
||||
if (name) {
|
||||
spec = ::make_shared<column_specification>(spec->ks_name, spec->cf_name, name, spec->type);
|
||||
}
|
||||
_specs[bind_index] = spec;
|
||||
}
|
||||
std::vector<uint16_t> get_partition_key_bind_indexes(schema_ptr schema) const;
|
||||
|
||||
void add(int32_t bind_index, ::shared_ptr<column_specification> spec);
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
62
database.cc
62
database.cc
@@ -1082,6 +1082,7 @@ column_family::try_flush_memtable_to_sstable(lw_shared_ptr<memtable> old) {
|
||||
return make_ready_future<stop_iteration>(stop_iteration::yes);
|
||||
});
|
||||
} catch (...) {
|
||||
newtab->mark_for_deletion();
|
||||
dblog.error("failed to write sstable {}: {}", newtab->get_filename(), std::current_exception());
|
||||
// If we failed this write we will try the write again and that will create a new flush reader
|
||||
// that will decrease dirty memory again. So we need to reset the accounting.
|
||||
@@ -1250,7 +1251,7 @@ void column_family::rebuild_statistics() {
|
||||
// making the two ranges compatible when compiling with boost 1.55.
|
||||
// Noone is actually moving anything...
|
||||
std::move(*_sstables->all()))) {
|
||||
update_stats_for_new_sstable(tab->data_size(), tab->get_shards_for_this_sstable());
|
||||
update_stats_for_new_sstable(tab->bytes_on_disk(), tab->get_shards_for_this_sstable());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1379,13 +1380,20 @@ future<> column_family::cleanup_sstables(sstables::compaction_descriptor descrip
|
||||
auto owned_ranges = make_lw_shared<dht::token_range_vector>(std::move(r));
|
||||
auto sstables_to_cleanup = make_lw_shared<std::vector<sstables::shared_sstable>>(std::move(descriptor.sstables));
|
||||
|
||||
return parallel_for_each(*sstables_to_cleanup, [this, owned_ranges = std::move(owned_ranges), sstables_to_cleanup] (auto& sst) {
|
||||
return do_for_each(*sstables_to_cleanup, [this, owned_ranges = std::move(owned_ranges), sstables_to_cleanup] (auto& sst) {
|
||||
if (!owned_ranges->empty() && !needs_cleanup(sst, owned_ranges, _schema)) {
|
||||
return make_ready_future<>();
|
||||
}
|
||||
|
||||
std::vector<sstables::shared_sstable> sstable_to_compact({ sst });
|
||||
return this->compact_sstables(sstables::compaction_descriptor(std::move(sstable_to_compact), sst->get_sstable_level()), true);
|
||||
// this semaphore ensures that only one cleanup will run per shard.
|
||||
// That's to prevent node from running out of space when almost all sstables
|
||||
// need cleanup, so if sstables are cleaned in parallel, we may need almost
|
||||
// twice the disk space used by those sstables.
|
||||
static thread_local semaphore sem(1);
|
||||
|
||||
return with_semaphore(sem, 1, [this, &sst] {
|
||||
return this->compact_sstables(sstables::compaction_descriptor({ sst }, sst->get_sstable_level()), true);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
@@ -1525,16 +1533,19 @@ future<> distributed_loader::open_sstable(distributed<database>& db, sstables::e
|
||||
|
||||
return db.invoke_on(column_family::calculate_shard_from_sstable_generation(comps.generation),
|
||||
[&db, comps = std::move(comps), func = std::move(func)] (database& local) {
|
||||
auto& cf = local.find_column_family(comps.ks, comps.cf);
|
||||
|
||||
auto f = sstables::sstable::load_shared_components(cf.schema(), cf._config.datadir, comps.generation, comps.version, comps.format);
|
||||
return f.then([&db, comps = std::move(comps), func = std::move(func)] (sstables::sstable_open_info info) {
|
||||
// shared components loaded, now opening sstable in all shards with shared components
|
||||
return do_with(std::move(info), [&db, comps = std::move(comps), func = std::move(func)] (auto& info) {
|
||||
return invoke_all_with_ptr(db, std::move(info.components),
|
||||
[owners = info.owners, data = info.data.dup(), index = info.index.dup(), comps, func] (database& db, auto components) {
|
||||
auto& cf = db.find_column_family(comps.ks, comps.cf);
|
||||
return func(cf, sstables::foreign_sstable_open_info{std::move(components), owners, data, index});
|
||||
return with_semaphore(local.sstable_load_concurrency_sem(), 1, [&db, &local, comps = std::move(comps), func = std::move(func)] {
|
||||
auto& cf = local.find_column_family(comps.ks, comps.cf);
|
||||
|
||||
auto f = sstables::sstable::load_shared_components(cf.schema(), cf._config.datadir, comps.generation, comps.version, comps.format);
|
||||
return f.then([&db, comps = std::move(comps), func = std::move(func)] (sstables::sstable_open_info info) {
|
||||
// shared components loaded, now opening sstable in all shards with shared components
|
||||
return do_with(std::move(info), [&db, comps = std::move(comps), func = std::move(func)] (auto& info) {
|
||||
return invoke_all_with_ptr(db, std::move(info.components),
|
||||
[owners = info.owners, data = info.data.dup(), index = info.index.dup(), comps, func] (database& db, auto components) {
|
||||
auto& cf = db.find_column_family(comps.ks, comps.cf);
|
||||
return func(cf, sstables::foreign_sstable_open_info{std::move(components), owners, data, index});
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -1706,7 +1717,7 @@ future<> distributed_loader::populate_column_family(distributed<database>& db, s
|
||||
return make_ready_future<>();
|
||||
});
|
||||
}).then([verifier, sstdir, descriptor, ks = std::move(ks), cf = std::move(cf)] {
|
||||
return parallel_for_each(*verifier, [sstdir = std::move(sstdir), ks = std::move(ks), cf = std::move(cf), descriptor] (auto v) {
|
||||
return do_for_each(*verifier, [sstdir = std::move(sstdir), ks = std::move(ks), cf = std::move(cf), descriptor, verifier] (auto v) {
|
||||
if (v.second == status::has_temporary_toc_file) {
|
||||
unsigned long gen = v.first;
|
||||
assert(descriptor->version);
|
||||
@@ -1745,9 +1756,9 @@ database::database(const db::config& cfg)
|
||||
: _stats(make_lw_shared<db_stats>())
|
||||
, _cfg(std::make_unique<db::config>(cfg))
|
||||
// Allow system tables a pool of 10 MB memory to write, but never block on other regions.
|
||||
, _system_dirty_memory_manager(*this, 10 << 20)
|
||||
, _dirty_memory_manager(*this, memory::stats().total_memory() * 0.45)
|
||||
, _streaming_dirty_memory_manager(*this, memory::stats().total_memory() * 0.10)
|
||||
, _system_dirty_memory_manager(*this, 10 << 20, cfg.virtual_dirty_soft_limit())
|
||||
, _dirty_memory_manager(*this, memory::stats().total_memory() * 0.45, cfg.virtual_dirty_soft_limit())
|
||||
, _streaming_dirty_memory_manager(*this, memory::stats().total_memory() * 0.10, cfg.virtual_dirty_soft_limit())
|
||||
, _version(empty_version)
|
||||
, _enable_incremental_backups(cfg.incremental_backups())
|
||||
{
|
||||
@@ -1802,7 +1813,7 @@ database::setup_metrics() {
|
||||
});
|
||||
|
||||
_metrics.add_group("database", {
|
||||
sm::make_gauge("requests_blocked_memory", [this] { return _dirty_memory_manager.region_group().blocked_requests(); },
|
||||
sm::make_gauge("requests_blocked_memory_current", [this] { return _dirty_memory_manager.region_group().blocked_requests(); },
|
||||
sm::description(
|
||||
seastar::format("Holds the current number of requests blocked due to reaching the memory quota ({}B). "
|
||||
"Non-zero value indicates that our bottleneck is memory and more specifically - the memory quota allocated for the \"database\" component.", _dirty_memory_manager.throttle_threshold()))),
|
||||
@@ -2663,7 +2674,7 @@ column_family::apply(const frozen_mutation& m, const schema_ptr& m_schema, const
|
||||
do_apply(m, m_schema, rp);
|
||||
}
|
||||
|
||||
future<frozen_mutation> database::do_apply_counter_update(column_family& cf, const frozen_mutation& fm, schema_ptr m_schema) {
|
||||
future<frozen_mutation> database::do_apply_counter_update(column_family& cf, const frozen_mutation& fm, schema_ptr m_schema, timeout_clock::time_point timeout) {
|
||||
auto m = fm.unfreeze(m_schema);
|
||||
m.upgrade(cf.schema());
|
||||
|
||||
@@ -2689,9 +2700,9 @@ future<frozen_mutation> database::do_apply_counter_update(column_family& cf, con
|
||||
cql_serialization_format::internal(), query::max_rows);
|
||||
|
||||
return do_with(std::move(slice), std::move(m), std::vector<locked_cell>(), stdx::optional<frozen_mutation>(),
|
||||
[this, &cf] (const query::partition_slice& slice, mutation& m, std::vector<locked_cell>& locks,
|
||||
[this, &cf, timeout] (const query::partition_slice& slice, mutation& m, std::vector<locked_cell>& locks,
|
||||
stdx::optional<frozen_mutation>& fm) mutable {
|
||||
return cf.lock_counter_cells(m).then([&, m_schema = cf.schema(), this] (std::vector<locked_cell> lcs) {
|
||||
return cf.lock_counter_cells(m).then([&, timeout, m_schema = cf.schema(), this] (std::vector<locked_cell> lcs) {
|
||||
locks = std::move(lcs);
|
||||
|
||||
// Before counter update is applied it needs to be transformed from
|
||||
@@ -2702,7 +2713,7 @@ future<frozen_mutation> database::do_apply_counter_update(column_family& cf, con
|
||||
return mutation_query(m_schema, cf.as_mutation_source({}),
|
||||
dht::partition_range::make_singular(m.decorated_key()),
|
||||
slice, query::max_rows, query::max_partitions,
|
||||
gc_clock::now(), { }).then([this, &cf, &m, &fm, m_schema] (auto result) {
|
||||
gc_clock::now(), { }).then([this, timeout, &cf, &m, &fm, m_schema] (auto result) {
|
||||
|
||||
// ...now, that we got existing state of all affected counter
|
||||
// cells we can look for our shard in each of them, increment
|
||||
@@ -2714,9 +2725,8 @@ future<frozen_mutation> database::do_apply_counter_update(column_family& cf, con
|
||||
transform_counter_updates_to_shards(m, mopt ? &*mopt : nullptr, cf.failed_counter_applies_to_memtable());
|
||||
|
||||
// FIXME: oh dear, another freeze
|
||||
// FIXME: timeout
|
||||
fm = freeze(m);
|
||||
return this->do_apply(m_schema, *fm, { });
|
||||
return this->do_apply(m_schema, *fm, timeout);
|
||||
}).then([&fm] {
|
||||
return std::move(*fm);
|
||||
});
|
||||
@@ -2854,7 +2864,7 @@ future<> dirty_memory_manager::flush_when_needed() {
|
||||
});
|
||||
}
|
||||
|
||||
void dirty_memory_manager::start_reclaiming() {
|
||||
void dirty_memory_manager::start_reclaiming() noexcept {
|
||||
_should_flush.signal();
|
||||
}
|
||||
|
||||
@@ -2876,7 +2886,7 @@ future<frozen_mutation> database::apply_counter_update(schema_ptr s, const froze
|
||||
}
|
||||
try {
|
||||
auto& cf = find_column_family(m.column_family_id());
|
||||
return do_apply_counter_update(cf, m, s);
|
||||
return do_apply_counter_update(cf, m, s, timeout);
|
||||
} catch (no_such_column_family&) {
|
||||
dblog.error("Attempting to mutate non-existent table {}", m.column_family_id());
|
||||
throw;
|
||||
|
||||
14
database.hh
14
database.hh
@@ -149,7 +149,7 @@ class dirty_memory_manager: public logalloc::region_group_reclaimer {
|
||||
std::unordered_map<const logalloc::region*, flush_permit> _flush_manager;
|
||||
|
||||
future<> _waiting_flush;
|
||||
virtual void start_reclaiming() override;
|
||||
virtual void start_reclaiming() noexcept override;
|
||||
|
||||
bool has_pressure() const {
|
||||
return over_soft_limit();
|
||||
@@ -193,8 +193,8 @@ public:
|
||||
//
|
||||
// We then set the soft limit to 80 % of the virtual dirty hard limit, which is equal to 40 % of
|
||||
// the user-supplied threshold.
|
||||
dirty_memory_manager(database& db, size_t threshold)
|
||||
: logalloc::region_group_reclaimer(threshold / 2, threshold * 0.40)
|
||||
dirty_memory_manager(database& db, size_t threshold, double soft_limit)
|
||||
: logalloc::region_group_reclaimer(threshold / 2, threshold * soft_limit / 2)
|
||||
, _db(&db)
|
||||
, _region_group(*this)
|
||||
, _flush_serializer(1)
|
||||
@@ -1076,6 +1076,7 @@ private:
|
||||
::cf_stats _cf_stats;
|
||||
static constexpr size_t max_concurrent_reads() { return 100; }
|
||||
static constexpr size_t max_system_concurrent_reads() { return 10; }
|
||||
static constexpr size_t max_concurrent_sstable_loads() { return 3; }
|
||||
struct db_stats {
|
||||
uint64_t total_writes = 0;
|
||||
uint64_t total_writes_failed = 0;
|
||||
@@ -1101,6 +1102,8 @@ private:
|
||||
semaphore _system_read_concurrency_sem{max_system_concurrent_reads()};
|
||||
restricted_mutation_reader_config _system_read_concurrency_config;
|
||||
|
||||
semaphore _sstable_load_concurrency_sem{max_concurrent_sstable_loads()};
|
||||
|
||||
std::unordered_map<sstring, keyspace> _keyspaces;
|
||||
std::unordered_map<utils::UUID, lw_shared_ptr<column_family>> _column_families;
|
||||
std::unordered_map<std::pair<sstring, sstring>, utils::UUID, utils::tuple_hash> _ks_cf_to_uuid;
|
||||
@@ -1126,7 +1129,7 @@ private:
|
||||
|
||||
query::result_memory_limiter _result_memory_limiter;
|
||||
|
||||
future<frozen_mutation> do_apply_counter_update(column_family& cf, const frozen_mutation& fm, schema_ptr m_schema);
|
||||
future<frozen_mutation> do_apply_counter_update(column_family& cf, const frozen_mutation& fm, schema_ptr m_schema, timeout_clock::time_point timeout);
|
||||
public:
|
||||
static utils::UUID empty_version;
|
||||
|
||||
@@ -1257,6 +1260,9 @@ public:
|
||||
semaphore& system_keyspace_read_concurrency_sem() {
|
||||
return _system_read_concurrency_sem;
|
||||
}
|
||||
semaphore& sstable_load_concurrency_sem() {
|
||||
return _sstable_load_concurrency_sem;
|
||||
}
|
||||
|
||||
friend class distributed_loader;
|
||||
};
|
||||
|
||||
@@ -1588,7 +1588,7 @@ db::commitlog::read_log_file(file f, commit_load_reader_func next, position_type
|
||||
bool failed = false;
|
||||
|
||||
work(file f, position_type o = 0)
|
||||
: f(f), fin(make_file_input_stream(f, o, make_file_input_stream_options())), start_off(o) {
|
||||
: f(f), fin(make_file_input_stream(f, 0, make_file_input_stream_options())), start_off(o) {
|
||||
}
|
||||
work(work&&) = default;
|
||||
|
||||
|
||||
@@ -61,13 +61,19 @@
|
||||
|
||||
static logging::logger logger("commitlog_replayer");
|
||||
|
||||
struct column_mappings {
|
||||
std::unordered_map<table_schema_version, column_mapping> map;
|
||||
future<> stop() { return make_ready_future<>(); }
|
||||
};
|
||||
|
||||
class db::commitlog_replayer::impl {
|
||||
seastar::sharded<column_mappings> _column_mappings;
|
||||
struct column_mappings {
|
||||
std::unordered_map<table_schema_version, column_mapping> map;
|
||||
future<> stop() { return make_ready_future<>(); }
|
||||
};
|
||||
|
||||
// we want the processing methods to be const, since they use
|
||||
// shard-sharing of data -> read only
|
||||
// this one is special since it is thread local.
|
||||
// Should actually make sharded::local a const function (it does
|
||||
// not modify content), but...
|
||||
mutable seastar::sharded<column_mappings> _column_mappings;
|
||||
|
||||
friend class db::commitlog_replayer;
|
||||
public:
|
||||
impl(seastar::sharded<cql3::query_processor>& db);
|
||||
@@ -94,13 +100,35 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
future<> process(stats*, temporary_buffer<char> buf, replay_position rp);
|
||||
future<stats> recover(sstring file);
|
||||
// move start/stop of the thread local bookkeep to "top level"
|
||||
// and also make sure to assert on it actually being started.
|
||||
future<> start() {
|
||||
return _column_mappings.start();
|
||||
}
|
||||
future<> stop() {
|
||||
return _column_mappings.stop();
|
||||
}
|
||||
|
||||
future<> process(stats*, temporary_buffer<char> buf, replay_position rp) const;
|
||||
future<stats> recover(sstring file) const;
|
||||
|
||||
typedef std::unordered_map<utils::UUID, replay_position> rp_map;
|
||||
typedef std::unordered_map<unsigned, rp_map> shard_rpm_map;
|
||||
typedef std::unordered_map<unsigned, replay_position> shard_rp_map;
|
||||
|
||||
replay_position min_pos(unsigned shard) const {
|
||||
auto i = _min_pos.find(shard);
|
||||
return i != _min_pos.end() ? i->second : replay_position();
|
||||
}
|
||||
replay_position cf_min_pos(const utils::UUID& uuid, unsigned shard) const {
|
||||
auto i = _rpm.find(shard);
|
||||
if (i == _rpm.end()) {
|
||||
return replay_position();
|
||||
}
|
||||
auto j = i->second.find(uuid);
|
||||
return j != i->second.end() ? j->second : replay_position();
|
||||
}
|
||||
|
||||
seastar::sharded<cql3::query_processor>&
|
||||
_qp;
|
||||
shard_rpm_map
|
||||
@@ -175,7 +203,6 @@ future<> db::commitlog_replayer::impl::init() {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (auto&p : _min_pos) {
|
||||
logger.debug("minimum position for shard {}: {}", p.first, p.second);
|
||||
}
|
||||
@@ -188,9 +215,11 @@ future<> db::commitlog_replayer::impl::init() {
|
||||
}
|
||||
|
||||
future<db::commitlog_replayer::impl::stats>
|
||||
db::commitlog_replayer::impl::recover(sstring file) {
|
||||
db::commitlog_replayer::impl::recover(sstring file) const {
|
||||
assert(_column_mappings.local_is_initialized());
|
||||
|
||||
replay_position rp{commitlog::descriptor(file)};
|
||||
auto gp = _min_pos[rp.shard_id()];
|
||||
auto gp = min_pos(rp.shard_id());
|
||||
|
||||
if (rp.id < gp.id) {
|
||||
logger.debug("skipping replay of fully-flushed {}", file);
|
||||
@@ -220,7 +249,7 @@ db::commitlog_replayer::impl::recover(sstring file) {
|
||||
});
|
||||
}
|
||||
|
||||
future<> db::commitlog_replayer::impl::process(stats* s, temporary_buffer<char> buf, replay_position rp) {
|
||||
future<> db::commitlog_replayer::impl::process(stats* s, temporary_buffer<char> buf, replay_position rp) const {
|
||||
try {
|
||||
|
||||
commitlog_entry_reader cer(buf);
|
||||
@@ -238,17 +267,16 @@ future<> db::commitlog_replayer::impl::process(stats* s, temporary_buffer<char>
|
||||
const column_mapping& src_cm = cm_it->second;
|
||||
|
||||
auto shard_id = rp.shard_id();
|
||||
if (rp < _min_pos[shard_id]) {
|
||||
if (rp < min_pos(shard_id)) {
|
||||
logger.trace("entry {} is less than global min position. skipping", rp);
|
||||
s->skipped_mutations++;
|
||||
return make_ready_future<>();
|
||||
}
|
||||
|
||||
auto uuid = fm.column_family_id();
|
||||
auto& map = _rpm[shard_id];
|
||||
auto i = map.find(uuid);
|
||||
if (i != map.end() && rp <= i->second) {
|
||||
logger.trace("entry {} at {} is younger than recorded replay position {}. skipping", fm.column_family_id(), rp, i->second);
|
||||
auto cf_rp = cf_min_pos(uuid, shard_id);
|
||||
if (rp <= cf_rp) {
|
||||
logger.trace("entry {} at {} is younger than recorded replay position {}. skipping", fm.column_family_id(), rp, cf_rp);
|
||||
s->skipped_mutations++;
|
||||
return make_ready_future<>();
|
||||
}
|
||||
@@ -323,42 +351,55 @@ future<db::commitlog_replayer> db::commitlog_replayer::create_replayer(seastar::
|
||||
}
|
||||
|
||||
future<> db::commitlog_replayer::recover(std::vector<sstring> files) {
|
||||
return _impl->_column_mappings.start().then([this, files = std::move(files)] {
|
||||
typedef std::unordered_multimap<unsigned, sstring> shard_file_map;
|
||||
|
||||
logger.info("Replaying {}", join(", ", files));
|
||||
return map_reduce(files, [this](auto f) {
|
||||
logger.debug("Replaying {}", f);
|
||||
return _impl->recover(f).then([f](impl::stats stats) {
|
||||
if (stats.corrupt_bytes != 0) {
|
||||
logger.warn("Corrupted file: {}. {} bytes skipped.", f, stats.corrupt_bytes);
|
||||
}
|
||||
logger.debug("Log replay of {} complete, {} replayed mutations ({} invalid, {} skipped)"
|
||||
, f
|
||||
, stats.applied_mutations
|
||||
, stats.invalid_mutations
|
||||
, stats.skipped_mutations
|
||||
|
||||
// pre-compute work per shard already.
|
||||
auto map = ::make_lw_shared<shard_file_map>();
|
||||
for (auto& f : files) {
|
||||
commitlog::descriptor d(f);
|
||||
replay_position p = d;
|
||||
map->emplace(p.shard_id() % smp::count, std::move(f));
|
||||
}
|
||||
|
||||
return _impl->start().then([this, map] {
|
||||
return map_reduce(smp::all_cpus(), [this, map](unsigned id) {
|
||||
return smp::submit_to(id, [this, id, map]() {
|
||||
auto total = ::make_lw_shared<impl::stats>();
|
||||
// TODO: or something. For now, we do this serialized per shard,
|
||||
// to reduce mutation congestion. We could probably (says avi)
|
||||
// do 2 segments in parallel or something, but lets use this first.
|
||||
auto range = map->equal_range(id);
|
||||
return do_for_each(range.first, range.second, [this, total](const std::pair<unsigned, sstring>& p) {
|
||||
auto&f = p.second;
|
||||
logger.debug("Replaying {}", f);
|
||||
return _impl->recover(f).then([f, total](impl::stats stats) {
|
||||
if (stats.corrupt_bytes != 0) {
|
||||
logger.warn("Corrupted file: {}. {} bytes skipped.", f, stats.corrupt_bytes);
|
||||
}
|
||||
logger.debug("Log replay of {} complete, {} replayed mutations ({} invalid, {} skipped)"
|
||||
, f
|
||||
, stats.applied_mutations
|
||||
, stats.invalid_mutations
|
||||
, stats.skipped_mutations
|
||||
);
|
||||
*total += stats;
|
||||
});
|
||||
}).then([total] {
|
||||
return make_ready_future<impl::stats>(*total);
|
||||
});
|
||||
});
|
||||
}, impl::stats(), std::plus<impl::stats>()).then([](impl::stats totals) {
|
||||
logger.info("Log replay complete, {} replayed mutations ({} invalid, {} skipped)"
|
||||
, totals.applied_mutations
|
||||
, totals.invalid_mutations
|
||||
, totals.skipped_mutations
|
||||
);
|
||||
return make_ready_future<impl::stats>(stats);
|
||||
}).handle_exception([f](auto ep) -> future<impl::stats> {
|
||||
logger.error("Error recovering {}: {}", f, ep);
|
||||
try {
|
||||
std::rethrow_exception(ep);
|
||||
} catch (std::invalid_argument&) {
|
||||
logger.error("Scylla cannot process {}. Make sure to fully flush all Cassandra commit log files to sstable before migrating.", f);
|
||||
throw;
|
||||
} catch (...) {
|
||||
throw;
|
||||
}
|
||||
});
|
||||
}, impl::stats(), std::plus<impl::stats>()).then([](impl::stats totals) {
|
||||
logger.info("Log replay complete, {} replayed mutations ({} invalid, {} skipped)"
|
||||
, totals.applied_mutations
|
||||
, totals.invalid_mutations
|
||||
, totals.skipped_mutations
|
||||
);
|
||||
}).finally([this] {
|
||||
return _impl->_column_mappings.stop();
|
||||
return _impl->stop();
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
future<> db::commitlog_replayer::recover(sstring f) {
|
||||
|
||||
@@ -739,6 +739,7 @@ public:
|
||||
val(prometheus_prefix, sstring, "scylla", Used, "Set the prefix of the exported Prometheus metrics. Changing this will break Scylla's dashboard compatibility, do not change unless you know what you are doing.") \
|
||||
val(abort_on_lsa_bad_alloc, bool, false, Used, "Abort when allocation in LSA region fails") \
|
||||
val(murmur3_partitioner_ignore_msb_bits, unsigned, 0, Used, "Number of most siginificant token bits to ignore in murmur3 partitioner; increase for very large clusters") \
|
||||
val(virtual_dirty_soft_limit, double, 0.6, Used, "Soft limit of virtual dirty memory expressed as a portion of the hard limit") \
|
||||
/* done! */
|
||||
|
||||
#define _make_value_member(name, type, deflt, status, desc, ...) \
|
||||
|
||||
@@ -77,6 +77,15 @@ namespace schema_tables {
|
||||
|
||||
logging::logger logger("schema_tables");
|
||||
|
||||
struct push_back_and_return {
|
||||
std::vector<mutation> muts;
|
||||
|
||||
std::vector<mutation> operator()(mutation&& m) {
|
||||
muts.emplace_back(std::move(m));
|
||||
return std::move(muts);
|
||||
}
|
||||
};
|
||||
|
||||
struct qualified_name {
|
||||
sstring keyspace_name;
|
||||
sstring table_name;
|
||||
@@ -547,6 +556,14 @@ read_schema_partition_for_table(distributed<service::storage_proxy>& proxy, sche
|
||||
return query_partition_mutation(proxy.local(), std::move(schema), std::move(cmd), std::move(keyspace_key));
|
||||
}
|
||||
|
||||
future<mutation>
|
||||
read_keyspace_mutation(distributed<service::storage_proxy>& proxy, const sstring& keyspace_name) {
|
||||
schema_ptr s = keyspaces();
|
||||
auto key = partition_key::from_singular(*s, keyspace_name);
|
||||
auto cmd = make_lw_shared<query::read_command>(s->id(), s->version(), query::full_slice);
|
||||
return query_partition_mutation(proxy.local(), std::move(s), std::move(cmd), std::move(key));
|
||||
}
|
||||
|
||||
static semaphore the_merge_lock {1};
|
||||
|
||||
future<> merge_lock() {
|
||||
@@ -1182,19 +1199,18 @@ void add_type_to_schema_mutation(user_type type, api::timestamp_type timestamp,
|
||||
mutations.emplace_back(std::move(m));
|
||||
}
|
||||
|
||||
std::vector<mutation> make_create_type_mutations(lw_shared_ptr<keyspace_metadata> keyspace, user_type type, api::timestamp_type timestamp)
|
||||
future<std::vector<mutation>> make_create_type_mutations(lw_shared_ptr<keyspace_metadata> keyspace, user_type type, api::timestamp_type timestamp)
|
||||
{
|
||||
// Include the serialized keyspace in case the target node missed a CREATE KEYSPACE migration (see CASSANDRA-5631).
|
||||
auto mutations = make_create_keyspace_mutations(keyspace, timestamp, false);
|
||||
std::vector<mutation> mutations;
|
||||
add_type_to_schema_mutation(type, timestamp, mutations);
|
||||
return mutations;
|
||||
|
||||
// Include the serialized keyspace in case the target node missed a CREATE KEYSPACE migration (see CASSANDRA-5631).
|
||||
return read_keyspace_mutation(service::get_storage_proxy(), keyspace->name()).then(push_back_and_return{std::move(mutations)});
|
||||
}
|
||||
|
||||
std::vector<mutation> make_drop_type_mutations(lw_shared_ptr<keyspace_metadata> keyspace, user_type type, api::timestamp_type timestamp)
|
||||
future<std::vector<mutation>> make_drop_type_mutations(lw_shared_ptr<keyspace_metadata> keyspace, user_type type, api::timestamp_type timestamp)
|
||||
{
|
||||
// Include the serialized keyspace in case the target node missed a CREATE KEYSPACE migration (see CASSANDRA-5631).
|
||||
auto mutations = make_create_keyspace_mutations(keyspace, timestamp, false);
|
||||
|
||||
std::vector<mutation> mutations;
|
||||
schema_ptr s = usertypes();
|
||||
auto pkey = partition_key::from_singular(*s, type->_keyspace);
|
||||
auto ckey = clustering_key::from_singular(*s, type->get_name_as_string());
|
||||
@@ -1202,19 +1218,21 @@ std::vector<mutation> make_drop_type_mutations(lw_shared_ptr<keyspace_metadata>
|
||||
m.partition().apply_delete(*s, ckey, tombstone(timestamp, gc_clock::now()));
|
||||
mutations.emplace_back(std::move(m));
|
||||
|
||||
return mutations;
|
||||
// Include the serialized keyspace in case the target node missed a CREATE KEYSPACE migration (see CASSANDRA-5631).
|
||||
return read_keyspace_mutation(service::get_storage_proxy(), keyspace->name()).then(push_back_and_return{std::move(mutations)});
|
||||
}
|
||||
|
||||
/*
|
||||
* Table metadata serialization/deserialization.
|
||||
*/
|
||||
|
||||
std::vector<mutation> make_create_table_mutations(lw_shared_ptr<keyspace_metadata> keyspace, schema_ptr table, api::timestamp_type timestamp)
|
||||
future<std::vector<mutation>> make_create_table_mutations(lw_shared_ptr<keyspace_metadata> keyspace, schema_ptr table, api::timestamp_type timestamp)
|
||||
{
|
||||
// Include the serialized keyspace in case the target node missed a CREATE KEYSPACE migration (see CASSANDRA-5631).
|
||||
auto mutations = make_create_keyspace_mutations(keyspace, timestamp, false);
|
||||
std::vector<mutation> mutations;
|
||||
add_table_or_view_to_schema_mutation(table, timestamp, true, mutations);
|
||||
return mutations;
|
||||
|
||||
// Include the serialized keyspace in case the target node missed a CREATE KEYSPACE migration (see CASSANDRA-5631).
|
||||
return read_keyspace_mutation(service::get_storage_proxy(), keyspace->name()).then(push_back_and_return{std::move(mutations)});
|
||||
}
|
||||
|
||||
static schema_mutations make_table_mutations(schema_ptr table, api::timestamp_type timestamp, bool with_columns_and_triggers)
|
||||
@@ -1347,15 +1365,13 @@ static void make_update_columns_mutations(schema_ptr old_table,
|
||||
mutations.emplace_back(std::move(columns_mutation));
|
||||
}
|
||||
|
||||
std::vector<mutation> make_update_table_mutations(lw_shared_ptr<keyspace_metadata> keyspace,
|
||||
future<std::vector<mutation>> make_update_table_mutations(lw_shared_ptr<keyspace_metadata> keyspace,
|
||||
schema_ptr old_table,
|
||||
schema_ptr new_table,
|
||||
api::timestamp_type timestamp,
|
||||
bool from_thrift)
|
||||
{
|
||||
// Include the serialized keyspace in case the target node missed a CREATE KEYSPACE migration (see CASSANDRA-5631).
|
||||
auto mutations = make_create_keyspace_mutations(keyspace, timestamp, false);
|
||||
|
||||
std::vector<mutation> mutations;
|
||||
add_table_or_view_to_schema_mutation(new_table, timestamp, false, mutations);
|
||||
|
||||
make_update_columns_mutations(std::move(old_table), std::move(new_table), timestamp, from_thrift, mutations);
|
||||
@@ -1373,7 +1389,8 @@ std::vector<mutation> make_update_table_mutations(lw_shared_ptr<keyspace_metadat
|
||||
addTriggerToSchemaMutation(newTable, trigger, timestamp, mutation);
|
||||
|
||||
#endif
|
||||
return mutations;
|
||||
// Include the serialized keyspace in case the target node missed a CREATE KEYSPACE migration (see CASSANDRA-5631).
|
||||
return read_keyspace_mutation(service::get_storage_proxy(), keyspace->name()).then(push_back_and_return{std::move(mutations)});
|
||||
}
|
||||
|
||||
static void make_drop_table_or_view_mutations(schema_ptr schema_table,
|
||||
@@ -1390,10 +1407,9 @@ static void make_drop_table_or_view_mutations(schema_ptr schema_table,
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<mutation> make_drop_table_mutations(lw_shared_ptr<keyspace_metadata> keyspace, schema_ptr table, api::timestamp_type timestamp)
|
||||
future<std::vector<mutation>> make_drop_table_mutations(lw_shared_ptr<keyspace_metadata> keyspace, schema_ptr table, api::timestamp_type timestamp)
|
||||
{
|
||||
// Include the serialized keyspace in case the target node missed a CREATE KEYSPACE migration (see CASSANDRA-5631).
|
||||
auto mutations = make_create_keyspace_mutations(keyspace, timestamp, false);
|
||||
std::vector<mutation> mutations;
|
||||
make_drop_table_or_view_mutations(columnfamilies(), std::move(table), timestamp, mutations);
|
||||
|
||||
#if 0
|
||||
@@ -1405,7 +1421,8 @@ std::vector<mutation> make_drop_table_mutations(lw_shared_ptr<keyspace_metadata>
|
||||
for (String indexName : Keyspace.open(keyspace.name).getColumnFamilyStore(table.cfName).getBuiltIndexes())
|
||||
indexCells.addTombstone(indexCells.getComparator().makeCellName(indexName), ldt, timestamp);
|
||||
#endif
|
||||
return mutations;
|
||||
// Include the serialized keyspace in case the target node missed a CREATE KEYSPACE migration (see CASSANDRA-5631).
|
||||
return read_keyspace_mutation(service::get_storage_proxy(), keyspace->name()).then(push_back_and_return{std::move(mutations)});
|
||||
}
|
||||
|
||||
static future<schema_mutations> read_table_mutations(distributed<service::storage_proxy>& proxy, const qualified_name& table, schema_ptr s)
|
||||
@@ -1899,37 +1916,39 @@ schema_mutations make_schema_mutations(schema_ptr s, api::timestamp_type timesta
|
||||
return s->is_view() ? make_view_mutations(view_ptr(s), timestamp, with_columns) : make_table_mutations(s, timestamp, with_columns);
|
||||
}
|
||||
|
||||
std::vector<mutation> make_create_view_mutations(lw_shared_ptr<keyspace_metadata> keyspace, view_ptr view, api::timestamp_type timestamp)
|
||||
future<std::vector<mutation>> make_create_view_mutations(lw_shared_ptr<keyspace_metadata> keyspace, view_ptr view, api::timestamp_type timestamp)
|
||||
{
|
||||
// Include the serialized keyspace in case the target node missed a CREATE KEYSPACE migration (see CASSANDRA-5631).
|
||||
auto mutations = make_create_keyspace_mutations(keyspace, timestamp, false);
|
||||
std::vector<mutation> mutations;
|
||||
// And also the serialized base table.
|
||||
auto base = keyspace->cf_meta_data().at(view->view_info()->base_name());
|
||||
add_table_or_view_to_schema_mutation(base, timestamp, true, mutations);
|
||||
add_table_or_view_to_schema_mutation(view, timestamp, true, mutations);
|
||||
return mutations;
|
||||
|
||||
// Include the serialized keyspace in case the target node missed a CREATE KEYSPACE migration (see CASSANDRA-5631).
|
||||
return read_keyspace_mutation(service::get_storage_proxy(), keyspace->name()).then(push_back_and_return{std::move(mutations)});
|
||||
}
|
||||
|
||||
std::vector<mutation> make_update_view_mutations(lw_shared_ptr<keyspace_metadata> keyspace,
|
||||
future<std::vector<mutation>> make_update_view_mutations(lw_shared_ptr<keyspace_metadata> keyspace,
|
||||
view_ptr old_view,
|
||||
view_ptr new_view,
|
||||
api::timestamp_type timestamp)
|
||||
{
|
||||
// Include the serialized keyspace in case the target node missed a CREATE KEYSPACE migration (see CASSANDRA-5631).
|
||||
auto mutations = make_create_keyspace_mutations(keyspace, timestamp, false);
|
||||
std::vector<mutation> mutations;
|
||||
// And also the serialized base table.
|
||||
auto base = keyspace->cf_meta_data().at(new_view->view_info()->base_name());
|
||||
add_table_or_view_to_schema_mutation(base, timestamp, true, mutations);
|
||||
add_table_or_view_to_schema_mutation(new_view, timestamp, false, mutations);
|
||||
make_update_columns_mutations(old_view, new_view, timestamp, false, mutations);
|
||||
return mutations;
|
||||
|
||||
// Include the serialized keyspace in case the target node missed a CREATE KEYSPACE migration (see CASSANDRA-5631).
|
||||
return read_keyspace_mutation(service::get_storage_proxy(), keyspace->name()).then(push_back_and_return{std::move(mutations)});
|
||||
}
|
||||
|
||||
std::vector<mutation> make_drop_view_mutations(lw_shared_ptr<keyspace_metadata> keyspace, view_ptr view, api::timestamp_type timestamp) {
|
||||
// Include the serialized keyspace in case the target node missed a CREATE KEYSPACE migration (see CASSANDRA-5631).
|
||||
auto mutations = make_create_keyspace_mutations(keyspace, timestamp, false);
|
||||
future<std::vector<mutation>> make_drop_view_mutations(lw_shared_ptr<keyspace_metadata> keyspace, view_ptr view, api::timestamp_type timestamp) {
|
||||
std::vector<mutation> mutations;
|
||||
make_drop_table_or_view_mutations(views(), view, timestamp, mutations);
|
||||
return mutations;
|
||||
// Include the serialized keyspace in case the target node missed a CREATE KEYSPACE migration (see CASSANDRA-5631).
|
||||
return read_keyspace_mutation(service::get_storage_proxy(), keyspace->name()).then(push_back_and_return{std::move(mutations)});
|
||||
}
|
||||
|
||||
#if 0
|
||||
|
||||
@@ -80,6 +80,7 @@ future<std::vector<frozen_mutation>> convert_schema_to_mutations(distributed<ser
|
||||
|
||||
future<schema_result_value_type>
|
||||
read_schema_partition_for_keyspace(distributed<service::storage_proxy>& proxy, const sstring& schema_table_name, const sstring& keyspace_name);
|
||||
future<mutation> read_keyspace_mutation(distributed<service::storage_proxy>&, const sstring& keyspace_name);
|
||||
|
||||
future<> merge_schema(distributed<service::storage_proxy>& proxy, std::vector<mutation> mutations);
|
||||
|
||||
@@ -95,17 +96,17 @@ std::vector<mutation> make_drop_keyspace_mutations(lw_shared_ptr<keyspace_metada
|
||||
|
||||
lw_shared_ptr<keyspace_metadata> create_keyspace_from_schema_partition(const schema_result_value_type& partition);
|
||||
|
||||
std::vector<mutation> make_create_type_mutations(lw_shared_ptr<keyspace_metadata> keyspace, user_type type, api::timestamp_type timestamp);
|
||||
future<std::vector<mutation>> make_create_type_mutations(lw_shared_ptr<keyspace_metadata> keyspace, user_type type, api::timestamp_type timestamp);
|
||||
|
||||
std::vector<user_type> create_types_from_schema_partition(const schema_result_value_type& result);
|
||||
|
||||
std::vector<mutation> make_drop_type_mutations(lw_shared_ptr<keyspace_metadata> keyspace, user_type type, api::timestamp_type timestamp);
|
||||
future<std::vector<mutation>> make_drop_type_mutations(lw_shared_ptr<keyspace_metadata> keyspace, user_type type, api::timestamp_type timestamp);
|
||||
|
||||
void add_type_to_schema_mutation(user_type type, api::timestamp_type timestamp, std::vector<mutation>& mutations);
|
||||
|
||||
std::vector<mutation> make_create_table_mutations(lw_shared_ptr<keyspace_metadata> keyspace, schema_ptr table, api::timestamp_type timestamp);
|
||||
future<std::vector<mutation>> make_create_table_mutations(lw_shared_ptr<keyspace_metadata> keyspace, schema_ptr table, api::timestamp_type timestamp);
|
||||
|
||||
std::vector<mutation> make_update_table_mutations(
|
||||
future<std::vector<mutation>> make_update_table_mutations(
|
||||
lw_shared_ptr<keyspace_metadata> keyspace,
|
||||
schema_ptr old_table,
|
||||
schema_ptr new_table,
|
||||
@@ -114,7 +115,7 @@ std::vector<mutation> make_update_table_mutations(
|
||||
|
||||
future<std::map<sstring, schema_ptr>> create_tables_from_tables_partition(distributed<service::storage_proxy>& proxy, const schema_result::mapped_type& result);
|
||||
|
||||
std::vector<mutation> make_drop_table_mutations(lw_shared_ptr<keyspace_metadata> keyspace, schema_ptr table, api::timestamp_type timestamp);
|
||||
future<std::vector<mutation>> make_drop_table_mutations(lw_shared_ptr<keyspace_metadata> keyspace, schema_ptr table, api::timestamp_type timestamp);
|
||||
|
||||
future<schema_ptr> create_table_from_name(distributed<service::storage_proxy>& proxy, const sstring& keyspace, const sstring& table);
|
||||
|
||||
@@ -149,11 +150,11 @@ schema_mutations make_schema_mutations(schema_ptr s, api::timestamp_type timesta
|
||||
|
||||
void add_table_or_view_to_schema_mutation(schema_ptr view, api::timestamp_type timestamp, bool with_columns, std::vector<mutation>& mutations);
|
||||
|
||||
std::vector<mutation> make_create_view_mutations(lw_shared_ptr<keyspace_metadata> keyspace, view_ptr view, api::timestamp_type timestamp);
|
||||
future<std::vector<mutation>> make_create_view_mutations(lw_shared_ptr<keyspace_metadata> keyspace, view_ptr view, api::timestamp_type timestamp);
|
||||
|
||||
std::vector<mutation> make_update_view_mutations(lw_shared_ptr<keyspace_metadata> keyspace, view_ptr old_view, view_ptr new_view, api::timestamp_type timestamp);
|
||||
future<std::vector<mutation>> make_update_view_mutations(lw_shared_ptr<keyspace_metadata> keyspace, view_ptr old_view, view_ptr new_view, api::timestamp_type timestamp);
|
||||
|
||||
std::vector<mutation> make_drop_view_mutations(lw_shared_ptr<keyspace_metadata> keyspace, view_ptr view, api::timestamp_type timestamp);
|
||||
future<std::vector<mutation>> make_drop_view_mutations(lw_shared_ptr<keyspace_metadata> keyspace, view_ptr view, api::timestamp_type timestamp);
|
||||
|
||||
sstring serialize_kind(column_kind kind);
|
||||
column_kind deserialize_kind(sstring kind);
|
||||
|
||||
@@ -21,6 +21,7 @@
|
||||
|
||||
#include "byte_ordered_partitioner.hh"
|
||||
#include "utils/class_registrator.hh"
|
||||
#include "utils/div_ceil.hh"
|
||||
#include <boost/multiprecision/cpp_int.hpp>
|
||||
#include <boost/multiprecision/cpp_dec_float.hpp>
|
||||
|
||||
@@ -162,22 +163,17 @@ byte_ordered_partitioner::shard_of(const token& t) const {
|
||||
}
|
||||
|
||||
token
|
||||
byte_ordered_partitioner::token_for_next_shard(const token& t) const {
|
||||
byte_ordered_partitioner::token_for_next_shard(const token& t, shard_id shard, unsigned spans) const {
|
||||
switch (t._kind) {
|
||||
case token::kind::before_all_keys:
|
||||
return token_for_next_shard(token(token::kind::key, managed_bytes{int8_t(0)}));
|
||||
case token::kind::after_all_keys:
|
||||
return maximum_token();
|
||||
case token::kind::before_all_keys:
|
||||
case token::kind::key:
|
||||
auto s = shard_of(t) + 1;
|
||||
if (s == _shard_count) {
|
||||
auto orig = shard_of(t);
|
||||
if (shard <= orig || spans != 1) {
|
||||
return maximum_token();
|
||||
}
|
||||
auto e = (s << 8) / _shard_count;
|
||||
// Division truncates; adjust
|
||||
while (((e * _shard_count) >> 8) != s) {
|
||||
++e;
|
||||
}
|
||||
auto e = div_ceil(shard << 8, _shard_count);
|
||||
return token(token::kind::key, managed_bytes({int8_t(e)}));
|
||||
}
|
||||
assert(0);
|
||||
|
||||
@@ -29,10 +29,9 @@
|
||||
namespace dht {
|
||||
|
||||
class byte_ordered_partitioner final : public i_partitioner {
|
||||
unsigned _shard_count;
|
||||
public:
|
||||
byte_ordered_partitioner(unsigned shard_count = smp::count, unsigned ignore_msb = 0) : _shard_count(shard_count) {}
|
||||
virtual const sstring name() { return "org.apache.cassandra.dht.ByteOrderedPartitioner"; }
|
||||
byte_ordered_partitioner(unsigned shard_count = smp::count, unsigned ignore_msb = 0) : i_partitioner(shard_count) {}
|
||||
virtual const sstring name() const { return "org.apache.cassandra.dht.ByteOrderedPartitioner"; }
|
||||
virtual token get_token(const schema& s, partition_key_view key) override {
|
||||
auto&& legacy = key.legacy_form(s);
|
||||
return token(token::kind::key, bytes(legacy.begin(), legacy.end()));
|
||||
@@ -75,7 +74,7 @@ public:
|
||||
}
|
||||
}
|
||||
virtual unsigned shard_of(const token& t) const override;
|
||||
virtual token token_for_next_shard(const token& t) const override;
|
||||
virtual token token_for_next_shard(const token& t, shard_id shard, unsigned spans) const override;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
@@ -25,6 +25,7 @@
|
||||
#include "utils/class_registrator.hh"
|
||||
#include "types.hh"
|
||||
#include "utils/murmur_hash.hh"
|
||||
#include "utils/div_ceil.hh"
|
||||
#include <boost/range/adaptor/map.hpp>
|
||||
#include <boost/range/irange.hpp>
|
||||
#include <boost/range/adaptor/transformed.hpp>
|
||||
@@ -160,7 +161,7 @@ std::ostream& operator<<(std::ostream& out, const decorated_key& dk) {
|
||||
}
|
||||
|
||||
// FIXME: make it per-keyspace
|
||||
std::unique_ptr<i_partitioner> default_partitioner { new murmur3_partitioner };
|
||||
std::unique_ptr<i_partitioner> default_partitioner;
|
||||
|
||||
void set_global_partitioner(const sstring& class_name, unsigned ignore_msb)
|
||||
{
|
||||
@@ -176,6 +177,9 @@ void set_global_partitioner(const sstring& class_name, unsigned ignore_msb)
|
||||
|
||||
i_partitioner&
|
||||
global_partitioner() {
|
||||
if (!default_partitioner) {
|
||||
default_partitioner = std::make_unique<murmur3_partitioner>(smp::count, 12);
|
||||
}
|
||||
return *default_partitioner;
|
||||
}
|
||||
|
||||
@@ -256,8 +260,9 @@ ring_position_range_sharder::next(const schema& s) {
|
||||
if (_done) {
|
||||
return {};
|
||||
}
|
||||
auto shard = _range.start() ? shard_of(_range.start()->value().token()) : global_partitioner().shard_of_minimum_token();
|
||||
auto shard_boundary_token = _partitioner.token_for_next_shard(_range.start() ? _range.start()->value().token() : minimum_token());
|
||||
auto shard = _range.start() ? _partitioner.shard_of(_range.start()->value().token()) : _partitioner.shard_of_minimum_token();
|
||||
auto next_shard = shard + 1 < _partitioner.shard_count() ? shard + 1 : 0;
|
||||
auto shard_boundary_token = _partitioner.token_for_next_shard(_range.start() ? _range.start()->value().token() : minimum_token(), next_shard);
|
||||
auto shard_boundary = ring_position::starting_at(shard_boundary_token);
|
||||
if ((!_range.end() || shard_boundary.less_compare(s, _range.end()->value()))
|
||||
&& shard_boundary_token != maximum_token()) {
|
||||
@@ -273,6 +278,96 @@ ring_position_range_sharder::next(const schema& s) {
|
||||
return ring_position_range_and_shard{std::move(_range), shard};
|
||||
}
|
||||
|
||||
|
||||
ring_position_exponential_sharder::ring_position_exponential_sharder(const i_partitioner& partitioner, partition_range pr)
|
||||
: _partitioner(partitioner)
|
||||
, _range(std::move(pr))
|
||||
, _last_ends(_partitioner.shard_count()) {
|
||||
if (_range.start()) {
|
||||
_first_shard = _next_shard = _partitioner.shard_of(_range.start()->value().token());
|
||||
}
|
||||
}
|
||||
|
||||
ring_position_exponential_sharder::ring_position_exponential_sharder(partition_range pr)
|
||||
: ring_position_exponential_sharder(global_partitioner(), std::move(pr)) {
|
||||
}
|
||||
|
||||
stdx::optional<ring_position_exponential_sharder_result>
|
||||
ring_position_exponential_sharder::next(const schema& s) {
|
||||
auto ret = ring_position_exponential_sharder_result{};
|
||||
ret.per_shard_ranges.reserve(std::min(_spans_per_iteration, _partitioner.shard_count()));
|
||||
ret.inorder = _spans_per_iteration <= _partitioner.shard_count();
|
||||
unsigned spans_to_go = _spans_per_iteration;
|
||||
auto cmp = ring_position_comparator(s);
|
||||
auto spans_per_shard = _spans_per_iteration / _partitioner.shard_count();
|
||||
auto shards_with_extra_span = _spans_per_iteration % _partitioner.shard_count();
|
||||
auto first_shard = _next_shard;
|
||||
_next_shard = (_next_shard + _spans_per_iteration) % _partitioner.shard_count();
|
||||
for (auto i : boost::irange(0u, std::min(_partitioner.shard_count(), _spans_per_iteration))) {
|
||||
auto shard = (first_shard + i) % _partitioner.shard_count();
|
||||
if (_last_ends[shard] && *_last_ends[shard] == maximum_token()) {
|
||||
continue;
|
||||
}
|
||||
range_bound<ring_position> this_shard_start = [&] {
|
||||
if (_last_ends[shard]) {
|
||||
return range_bound<ring_position>(ring_position::starting_at(*_last_ends[shard]));
|
||||
} else {
|
||||
return _range.start().value_or(range_bound<ring_position>(ring_position::starting_at(minimum_token())));
|
||||
}
|
||||
}();
|
||||
// token_for_next_span() may give us the wrong boundary on the first pass, so add an extra span:
|
||||
auto extra_span = !_last_ends[shard] && shard != _first_shard;
|
||||
auto spans = spans_per_shard + unsigned(i < shards_with_extra_span);
|
||||
auto boundary = _partitioner.token_for_next_shard(this_shard_start.value().token(), shard, spans + extra_span);
|
||||
auto proposed_range = partition_range(this_shard_start, range_bound<ring_position>(ring_position::starting_at(boundary), false));
|
||||
auto intersection = _range.intersection(proposed_range, cmp);
|
||||
if (!intersection) {
|
||||
continue;
|
||||
}
|
||||
spans_to_go -= spans;
|
||||
auto this_shard_result = ring_position_range_and_shard{std::move(*intersection), shard};
|
||||
_last_ends[shard] = boundary;
|
||||
ret.per_shard_ranges.push_back(std::move(this_shard_result));
|
||||
}
|
||||
if (ret.per_shard_ranges.empty()) {
|
||||
return stdx::nullopt;
|
||||
}
|
||||
_spans_per_iteration *= 2;
|
||||
return stdx::make_optional(std::move(ret));
|
||||
}
|
||||
|
||||
|
||||
ring_position_exponential_vector_sharder::ring_position_exponential_vector_sharder(const std::vector<nonwrapping_range<ring_position>>& ranges)
|
||||
: _ranges(std::begin(ranges), std::end(ranges)) {
|
||||
if (!_ranges.empty()) {
|
||||
_current_sharder.emplace(_ranges.front());
|
||||
_ranges.pop_front();
|
||||
++_element;
|
||||
}
|
||||
}
|
||||
|
||||
stdx::optional<ring_position_exponential_vector_sharder_result>
|
||||
ring_position_exponential_vector_sharder::next(const schema& s) {
|
||||
if (!_current_sharder) {
|
||||
return stdx::nullopt;
|
||||
}
|
||||
while (true) { // yuch
|
||||
auto ret = _current_sharder->next(s);
|
||||
if (ret) {
|
||||
auto augmented = ring_position_exponential_vector_sharder_result{std::move(*ret), _element};
|
||||
return stdx::make_optional(std::move(augmented));
|
||||
}
|
||||
if (_ranges.empty()) {
|
||||
_current_sharder = stdx::nullopt;
|
||||
return stdx::nullopt;
|
||||
}
|
||||
_current_sharder.emplace(_ranges.front());
|
||||
_ranges.pop_front();
|
||||
++_element;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
ring_position_range_vector_sharder::ring_position_range_vector_sharder(dht::partition_range_vector ranges)
|
||||
: _ranges(std::move(ranges))
|
||||
, _current_range(_ranges.begin()) {
|
||||
@@ -300,6 +395,33 @@ int ring_position_comparator::operator()(const ring_position& lh, const ring_pos
|
||||
return lh.tri_compare(s, rh);
|
||||
}
|
||||
|
||||
std::vector<partition_range>
|
||||
split_range_to_single_shard(const i_partitioner& partitioner, const schema& s, const partition_range& pr, shard_id shard) {
|
||||
auto cmp = ring_position_comparator(s);
|
||||
auto ret = std::vector<partition_range>();
|
||||
auto next_shard = shard + 1 == partitioner.shard_count() ? 0 : shard + 1;
|
||||
auto start_token = pr.start() ? pr.start()->value().token() : minimum_token();
|
||||
auto start_shard = partitioner.shard_of(start_token);
|
||||
auto start_boundary = start_shard == shard ? pr.start() : range_bound<ring_position>(ring_position::starting_at(partitioner.token_for_next_shard(start_token, shard)));
|
||||
while (pr.overlaps(partition_range(start_boundary, {}), cmp)
|
||||
&& !(start_boundary && start_boundary->value().token() == maximum_token())) {
|
||||
auto end_token = partitioner.token_for_next_shard(start_token, next_shard);
|
||||
auto candidate = partition_range(std::move(start_boundary), range_bound<ring_position>(ring_position::starting_at(end_token), false));
|
||||
auto intersection = pr.intersection(std::move(candidate), cmp);
|
||||
if (intersection) {
|
||||
ret.push_back(std::move(*intersection));
|
||||
}
|
||||
start_token = partitioner.token_for_next_shard(end_token, shard);
|
||||
start_boundary = range_bound<ring_position>(ring_position::starting_at(start_token));
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
std::vector<partition_range>
|
||||
split_range_to_single_shard(const schema& s, const partition_range& pr, shard_id shard) {
|
||||
return split_range_to_single_shard(global_partitioner(), s, pr, shard);
|
||||
}
|
||||
|
||||
int token_comparator::operator()(const token& t1, const token& t2) const {
|
||||
return tri_compare(t1, t2);
|
||||
}
|
||||
|
||||
@@ -180,7 +180,10 @@ public:
|
||||
using decorated_key_opt = std::experimental::optional<decorated_key>;
|
||||
|
||||
class i_partitioner {
|
||||
protected:
|
||||
unsigned _shard_count;
|
||||
public:
|
||||
explicit i_partitioner(unsigned shard_count) : _shard_count(shard_count) {}
|
||||
virtual ~i_partitioner() {}
|
||||
|
||||
/**
|
||||
@@ -272,7 +275,7 @@ public:
|
||||
/**
|
||||
* @return name of partitioner.
|
||||
*/
|
||||
virtual const sstring name() = 0;
|
||||
virtual const sstring name() const = 0;
|
||||
|
||||
/**
|
||||
* Calculates the shard that handles a particular token.
|
||||
@@ -280,9 +283,17 @@ public:
|
||||
virtual unsigned shard_of(const token& t) const = 0;
|
||||
|
||||
/**
|
||||
* Gets the first token greater than `t` that is not in the same shard as `t`.
|
||||
* Gets the first token greater than `t` that is in shard `shard`, and is a shard boundary (its first token).
|
||||
*
|
||||
* If the `spans` parameter is greater than zero, the result is the same as if the function
|
||||
* is called `spans` times, each time applied to its return value, but efficiently. This allows
|
||||
* selecting ranges that include multiple round trips around the 0..smp::count-1 shard span:
|
||||
*
|
||||
* token_for_next_shard(t, shard, spans) == token_for_next_shard(token_for_shard(t, shard, 1), spans - 1)
|
||||
*
|
||||
* On overflow, maximum_token() is returned.
|
||||
*/
|
||||
virtual token token_for_next_shard(const token& t) const = 0;
|
||||
virtual token token_for_next_shard(const token& t, shard_id shard, unsigned spans = 1) const = 0;
|
||||
|
||||
/**
|
||||
* Gets the first shard of the minimum token.
|
||||
@@ -315,6 +326,13 @@ public:
|
||||
return tri_compare(t1, t2) < 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return number of shards configured for this partitioner
|
||||
*/
|
||||
unsigned shard_count() const {
|
||||
return _shard_count;
|
||||
}
|
||||
|
||||
friend bool operator==(const token& t1, const token& t2);
|
||||
friend bool operator<(const token& t1, const token& t2);
|
||||
friend int tri_compare(const token& t1, const token& t2);
|
||||
@@ -476,6 +494,44 @@ struct ring_position_range_and_shard_and_element : ring_position_range_and_shard
|
||||
unsigned element;
|
||||
};
|
||||
|
||||
struct ring_position_exponential_sharder_result {
|
||||
std::vector<ring_position_range_and_shard> per_shard_ranges;
|
||||
bool inorder = true;
|
||||
};
|
||||
|
||||
// given a ring_position range, generates exponentially increasing
|
||||
// sets per-shard sub-ranges
|
||||
class ring_position_exponential_sharder {
|
||||
const i_partitioner& _partitioner;
|
||||
partition_range _range;
|
||||
unsigned _spans_per_iteration = 1;
|
||||
unsigned _first_shard = 0;
|
||||
unsigned _next_shard = 0;
|
||||
std::vector<stdx::optional<token>> _last_ends; // index = shard
|
||||
public:
|
||||
explicit ring_position_exponential_sharder(partition_range pr);
|
||||
explicit ring_position_exponential_sharder(const i_partitioner& partitioner, partition_range pr);
|
||||
stdx::optional<ring_position_exponential_sharder_result> next(const schema& s);
|
||||
};
|
||||
|
||||
struct ring_position_exponential_vector_sharder_result : ring_position_exponential_sharder_result {
|
||||
ring_position_exponential_vector_sharder_result(ring_position_exponential_sharder_result rpesr, unsigned element)
|
||||
: ring_position_exponential_sharder_result(std::move(rpesr)), element(element) {}
|
||||
unsigned element; // range within vector from which this result came
|
||||
};
|
||||
|
||||
|
||||
// given a vector of sorted, disjoint ring_position ranges, generates exponentially increasing
|
||||
// sets per-shard sub-ranges. May be non-exponential when moving from one ring position range to another.
|
||||
class ring_position_exponential_vector_sharder {
|
||||
std::deque<nonwrapping_range<ring_position>> _ranges;
|
||||
stdx::optional<ring_position_exponential_sharder> _current_sharder;
|
||||
unsigned _element = 0;
|
||||
public:
|
||||
explicit ring_position_exponential_vector_sharder(const std::vector<nonwrapping_range<ring_position>>& ranges);
|
||||
stdx::optional<ring_position_exponential_vector_sharder_result> next(const schema& s);
|
||||
};
|
||||
|
||||
class ring_position_range_vector_sharder {
|
||||
using vec_type = dht::partition_range_vector;
|
||||
vec_type _ranges;
|
||||
@@ -504,6 +560,10 @@ split_range_to_shards(dht::partition_range pr, const schema& s);
|
||||
std::map<unsigned, dht::partition_range_vector>
|
||||
split_ranges_to_shards(const dht::token_range_vector& ranges, const schema& s);
|
||||
|
||||
// Intersect a partition_range with a shard and return the the resulting sub-ranges, in sorted order
|
||||
std::vector<partition_range> split_range_to_single_shard(const schema& s, const dht::partition_range& pr, shard_id shard);
|
||||
std::vector<partition_range> split_range_to_single_shard(const i_partitioner& partitioner, const schema& s, const dht::partition_range& pr, shard_id shard);
|
||||
|
||||
} // dht
|
||||
|
||||
namespace std {
|
||||
|
||||
@@ -24,9 +24,40 @@
|
||||
#include "sstables/key.hh"
|
||||
#include "utils/class_registrator.hh"
|
||||
#include <boost/lexical_cast.hpp>
|
||||
#include <boost/range/irange.hpp>
|
||||
|
||||
namespace dht {
|
||||
|
||||
inline
|
||||
unsigned
|
||||
murmur3_partitioner::zero_based_shard_of(uint64_t token, unsigned shards, unsigned sharding_ignore_msb_bits) {
|
||||
// This is the master function, the inverses have to match it wrt. rounding errors.
|
||||
token <<= sharding_ignore_msb_bits;
|
||||
// Treat "token" as a fraction in the interval [0, 1); compute:
|
||||
// shard = floor((0.token) * shards)
|
||||
return (uint128_t(token) * shards) >> 64;
|
||||
}
|
||||
|
||||
std::vector<uint64_t>
|
||||
murmur3_partitioner::init_zero_based_shard_start(unsigned shards, unsigned sharding_ignore_msb_bits) {
|
||||
// computes the inverse of zero_based_shard_of(). ret[s] will return the smallest token that belongs to s
|
||||
if (shards == 1) {
|
||||
// Avoid the while loops below getting confused finding the "edge" between two nonexistent shards
|
||||
return std::vector<uint64_t>(1, uint64_t(0));
|
||||
}
|
||||
auto ret = std::vector<uint64_t>(shards);
|
||||
for (auto s : boost::irange<unsigned>(0, shards)) {
|
||||
uint64_t token = (uint128_t(s) << 64) / shards;
|
||||
token >>= sharding_ignore_msb_bits; // leftmost bits are ignored by zero_based_shard_of
|
||||
// token is the start of the next shard, and can be slightly before due to rounding errors; adjust
|
||||
while (zero_based_shard_of(token, shards, sharding_ignore_msb_bits) != s) {
|
||||
++token;
|
||||
}
|
||||
ret[s] = token;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
inline
|
||||
int64_t
|
||||
murmur3_partitioner::normalize(int64_t in) {
|
||||
@@ -88,6 +119,16 @@ inline int64_t long_token(const token& t) {
|
||||
return net::ntoh(*lp);
|
||||
}
|
||||
|
||||
uint64_t
|
||||
murmur3_partitioner::unbias(const token& t) const {
|
||||
return uint64_t(long_token(t)) + uint64_t(std::numeric_limits<int64_t>::min());
|
||||
}
|
||||
|
||||
token
|
||||
murmur3_partitioner::bias(uint64_t n) const {
|
||||
return get_token(n - uint64_t(std::numeric_limits<int64_t>::min()));
|
||||
}
|
||||
|
||||
sstring murmur3_partitioner::to_sstring(const token& t) const {
|
||||
return ::to_sstring(long_token(t));
|
||||
}
|
||||
@@ -210,46 +251,43 @@ murmur3_partitioner::shard_of(const token& t) const {
|
||||
case token::kind::after_all_keys:
|
||||
return _shard_count - 1;
|
||||
case token::kind::key:
|
||||
int64_t l = long_token(t);
|
||||
// treat l as a fraction between 0 and 1 and use 128-bit arithmetic to
|
||||
// divide that range evenly among shards:
|
||||
uint64_t adjusted = uint64_t(l) + uint64_t(std::numeric_limits<int64_t>::min());
|
||||
adjusted <<= _sharding_ignore_msb_bits;
|
||||
return (__int128(adjusted) * _shard_count) >> 64;
|
||||
uint64_t adjusted = unbias(t);
|
||||
return zero_based_shard_of(adjusted, _shard_count, _sharding_ignore_msb_bits);
|
||||
}
|
||||
assert(0);
|
||||
}
|
||||
|
||||
token
|
||||
murmur3_partitioner::token_for_next_shard(const token& t) const {
|
||||
murmur3_partitioner::token_for_next_shard(const token& t, shard_id shard, unsigned spans) const {
|
||||
uint64_t n = 0;
|
||||
switch (t._kind) {
|
||||
case token::kind::before_all_keys:
|
||||
return token_for_next_shard(get_token(std::numeric_limits<int64_t>::min() + 1));
|
||||
break;
|
||||
case token::kind::after_all_keys:
|
||||
return maximum_token();
|
||||
case token::kind::key:
|
||||
if (long_token(t) == std::numeric_limits<int64_t>::min()) {
|
||||
return token_for_next_shard(get_token(std::numeric_limits<int64_t>::min() + 1));
|
||||
}
|
||||
using uint128 = unsigned __int128;
|
||||
auto s = shard_of(t) + 1;
|
||||
s = s < _shard_count ? s : 0;
|
||||
int64_t l = long_token(t);
|
||||
// treat l as a fraction between 0 and 1 and use 128-bit arithmetic to
|
||||
// divide that range evenly among shards:
|
||||
uint64_t adjusted = uint64_t(l) + uint64_t(std::numeric_limits<int64_t>::min());
|
||||
auto mul = align_up(uint128(adjusted) * _shard_count + 1, uint128(1) << (64 - _sharding_ignore_msb_bits));
|
||||
if (mul >> 64 == _shard_count) {
|
||||
return maximum_token();
|
||||
}
|
||||
uint64_t e = mul / _shard_count;
|
||||
while (((uint128(e << _sharding_ignore_msb_bits) * _shard_count) >> 64) != s) {
|
||||
// division will round down, so correct for it
|
||||
++e;
|
||||
}
|
||||
return get_token(e + uint64_t(std::numeric_limits<int64_t>::min()));
|
||||
n = unbias(t);
|
||||
break;
|
||||
}
|
||||
assert(0);
|
||||
auto s = zero_based_shard_of(n, _shard_count, _sharding_ignore_msb_bits);
|
||||
|
||||
if (!_sharding_ignore_msb_bits) {
|
||||
// This ought to be the same as the else branch, but avoids shifts by 64
|
||||
n = _shard_start[shard];
|
||||
if (spans > 1 || shard <= s) {
|
||||
return maximum_token();
|
||||
}
|
||||
} else {
|
||||
auto left_part = n >> (64 - _sharding_ignore_msb_bits);
|
||||
left_part += spans - unsigned(shard > s);
|
||||
if (left_part >= (1u << _sharding_ignore_msb_bits)) {
|
||||
return maximum_token();
|
||||
}
|
||||
left_part <<= (64 - _sharding_ignore_msb_bits);
|
||||
auto right_part = _shard_start[shard];
|
||||
n = left_part | right_part;
|
||||
}
|
||||
return bias(n);
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -23,20 +23,21 @@
|
||||
|
||||
#include "i_partitioner.hh"
|
||||
#include "bytes.hh"
|
||||
#include <vector>
|
||||
|
||||
namespace dht {
|
||||
|
||||
class murmur3_partitioner final : public i_partitioner {
|
||||
unsigned _shard_count;
|
||||
unsigned _sharding_ignore_msb_bits;
|
||||
std::vector<uint64_t> _shard_start = init_zero_based_shard_start(_shard_count, _sharding_ignore_msb_bits);
|
||||
public:
|
||||
murmur3_partitioner(unsigned shard_count = smp::count, unsigned sharding_ignore_msb_bits = 0)
|
||||
: _shard_count(shard_count)
|
||||
: i_partitioner(shard_count)
|
||||
// if one shard, ignore sharding_ignore_msb_bits as they will just cause needless
|
||||
// range breaks
|
||||
, _sharding_ignore_msb_bits(shard_count > 1 ? sharding_ignore_msb_bits : 0) {
|
||||
}
|
||||
virtual const sstring name() { return "org.apache.cassandra.dht.Murmur3Partitioner"; }
|
||||
virtual const sstring name() const { return "org.apache.cassandra.dht.Murmur3Partitioner"; }
|
||||
virtual token get_token(const schema& s, partition_key_view key) override;
|
||||
virtual token get_token(const sstables::key_view& key) override;
|
||||
virtual token get_random_token() override;
|
||||
@@ -50,11 +51,16 @@ public:
|
||||
virtual dht::token from_bytes(bytes_view bytes) const override;
|
||||
|
||||
virtual unsigned shard_of(const token& t) const override;
|
||||
virtual token token_for_next_shard(const token& t) const override;
|
||||
virtual token token_for_next_shard(const token& t, shard_id shard, unsigned spans) const override;
|
||||
private:
|
||||
using uint128_t = unsigned __int128;
|
||||
static int64_t normalize(int64_t in);
|
||||
token get_token(bytes_view key);
|
||||
token get_token(uint64_t value) const;
|
||||
token bias(uint64_t value) const; // translate from a zero-baed range
|
||||
uint64_t unbias(const token& t) const; // translate to a zero-baed range
|
||||
static unsigned zero_based_shard_of(uint64_t zero_based_token, unsigned shards, unsigned sharding_ignore_msb_bits);
|
||||
static std::vector<uint64_t> init_zero_based_shard_start(unsigned shards, unsigned sharding_ignore_msb_bits);
|
||||
};
|
||||
|
||||
|
||||
|
||||
@@ -22,6 +22,7 @@
|
||||
#include "md5_hasher.hh"
|
||||
#include "random_partitioner.hh"
|
||||
#include "utils/class_registrator.hh"
|
||||
#include "utils/div_ceil.hh"
|
||||
#include <boost/multiprecision/cpp_int.hpp>
|
||||
|
||||
namespace dht {
|
||||
@@ -222,21 +223,20 @@ unsigned random_partitioner::shard_of(const token& t) const {
|
||||
}
|
||||
|
||||
token
|
||||
random_partitioner::token_for_next_shard(const token& t) const {
|
||||
random_partitioner::token_for_next_shard(const token& t, shard_id shard, unsigned spans) const {
|
||||
if (_shard_count == 1) {
|
||||
return maximum_token();
|
||||
}
|
||||
switch (t._kind) {
|
||||
case token::kind::after_all_keys:
|
||||
return maximum_token();
|
||||
case token::kind::before_all_keys:
|
||||
case token::kind::key:
|
||||
auto s = shard_of(t) + 1;
|
||||
if (s == _shard_count) {
|
||||
auto orig = shard_of(t);
|
||||
if (shard <= orig || spans != 1) {
|
||||
return maximum_token();
|
||||
}
|
||||
auto t = (boost::multiprecision::uint256_t(s) << 127) / _shard_count;
|
||||
// division truncates, so adjust
|
||||
while (((t * _shard_count) >> 127) != s) {
|
||||
++t;
|
||||
}
|
||||
auto t = div_ceil(boost::multiprecision::uint256_t(shard) << 127, _shard_count);
|
||||
return cppint_to_token(t.convert_to<boost::multiprecision::uint128_t>());
|
||||
}
|
||||
assert(0);
|
||||
|
||||
@@ -29,10 +29,9 @@
|
||||
namespace dht {
|
||||
|
||||
class random_partitioner final : public i_partitioner {
|
||||
unsigned _shard_count;
|
||||
public:
|
||||
random_partitioner(unsigned shard_count = smp::count, unsigned ignore_msb = 0) : _shard_count(shard_count) {}
|
||||
virtual const sstring name() { return "org.apache.cassandra.dht.RandomPartitioner"; }
|
||||
random_partitioner(unsigned shard_count = smp::count, unsigned ignore_msb = 0) : i_partitioner(shard_count) {}
|
||||
virtual const sstring name() const { return "org.apache.cassandra.dht.RandomPartitioner"; }
|
||||
virtual token get_token(const schema& s, partition_key_view key) override;
|
||||
virtual token get_token(const sstables::key_view& key) override;
|
||||
virtual token get_random_token() override;
|
||||
@@ -46,7 +45,7 @@ public:
|
||||
virtual dht::token from_sstring(const sstring& t) const override;
|
||||
virtual dht::token from_bytes(bytes_view bytes) const override;
|
||||
virtual unsigned shard_of(const token& t) const override;
|
||||
virtual token token_for_next_shard(const token& t) const override;
|
||||
virtual token token_for_next_shard(const token& t, shard_id shard, unsigned spans) const override;
|
||||
private:
|
||||
token get_token(bytes data);
|
||||
};
|
||||
|
||||
2
dist/ami/files/scylla-ami
vendored
2
dist/ami/files/scylla-ami
vendored
Submodule dist/ami/files/scylla-ami updated: d5a439759d...407e8f37ca
1
dist/common/modprobe.d/scylla-raid0.conf
vendored
Normal file
1
dist/common/modprobe.d/scylla-raid0.conf
vendored
Normal file
@@ -0,0 +1 @@
|
||||
options raid0 devices_discard_performance=Y
|
||||
74
dist/common/scripts/scylla_raid_setup
vendored
74
dist/common/scripts/scylla_raid_setup
vendored
@@ -5,15 +5,20 @@
|
||||
. /usr/lib/scylla/scylla_lib.sh
|
||||
|
||||
print_usage() {
|
||||
echo "scylla-raid-setup --disks /dev/hda,/dev/hdb... --raiddev /dev/md0 --update-fstab"
|
||||
echo "scylla-raid-setup --disks /dev/hda,/dev/hdb... --raiddev /dev/md0 --update-fstab --root /var/lib/scylla --volume-role [all|data|commitlog]"
|
||||
echo " --disks specify disks for RAID"
|
||||
echo " --raiddev MD device name for RAID"
|
||||
echo " --update-fstab update /etc/fstab for RAID"
|
||||
echo " --root specify the root of the tree"
|
||||
echo " --volume-role specify how will this device be used (data, commitlog, or all)"
|
||||
exit 1
|
||||
}
|
||||
|
||||
RAID=/dev/md0
|
||||
FSTAB=0
|
||||
ROOT=/var/lib/scylla
|
||||
ROLE="all"
|
||||
|
||||
while [ $# -gt 0 ]; do
|
||||
case "$1" in
|
||||
"--disks")
|
||||
@@ -29,12 +34,37 @@ while [ $# -gt 0 ]; do
|
||||
FSTAB=1
|
||||
shift 1
|
||||
;;
|
||||
"--root")
|
||||
ROOT="$2"
|
||||
shift 2
|
||||
;;
|
||||
"--volume-role")
|
||||
ROLE="$2"
|
||||
shift 2
|
||||
;;
|
||||
*)
|
||||
print_usage
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
ROOT=${ROOT%/}
|
||||
case "$ROLE" in
|
||||
"all")
|
||||
MOUNT_AT=$ROOT
|
||||
;;
|
||||
"data")
|
||||
MOUNT_AT="$ROOT/data"
|
||||
;;
|
||||
"commitlog")
|
||||
MOUNT_AT="$ROOT/commitlog"
|
||||
;;
|
||||
*)
|
||||
echo "Invalid role specified ($ROLE)"
|
||||
print_usage
|
||||
;;
|
||||
esac
|
||||
|
||||
if [ "$DISKS" = "" ]; then
|
||||
print_usage
|
||||
fi
|
||||
@@ -51,8 +81,8 @@ if [ -e $RAID ]; then
|
||||
echo "$RAID is already using"
|
||||
exit 1
|
||||
fi
|
||||
if [ "`mount|grep /var/lib/scylla`" != "" ]; then
|
||||
echo "/var/lib/scylla is already mounted"
|
||||
if mountpoint -q $MOUNT_AT; then
|
||||
echo "$MOUNT_AT is already mounted"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
@@ -61,18 +91,32 @@ if is_debian_variant; then
|
||||
else
|
||||
yum -y install mdadm xfsprogs
|
||||
fi
|
||||
mdadm --create --verbose --force --run $RAID --level=0 -c1024 --raid-devices=$NR_DISK $DISKS
|
||||
mkfs.xfs $RAID -f
|
||||
echo "DEVICE $DISKS" > /etc/mdadm.conf
|
||||
mdadm --detail --scan >> /etc/mdadm.conf
|
||||
if [ "$ID" = "ubuntu" ] && [ "$VERSION_ID" = "14.04" ]; then
|
||||
mdadm --create --verbose --force --run $RAID --level=0 -c1024 --raid-devices=$NR_DISK $DISKS
|
||||
mkfs.xfs $RAID -f
|
||||
else
|
||||
for dsk in $DISKS; do
|
||||
blkdiscard $dsk &
|
||||
done
|
||||
wait
|
||||
mdadm --create --verbose --force --run $RAID --level=0 -c1024 --raid-devices=$NR_DISK $DISKS
|
||||
mkfs.xfs $RAID -f -K
|
||||
fi
|
||||
mdadm --detail --scan > /etc/mdadm.conf
|
||||
|
||||
mkdir -p "$MOUNT_AT"
|
||||
mount -t xfs -o noatime $RAID "$MOUNT_AT"
|
||||
|
||||
# create this unconditionally so we are more robust about ordering
|
||||
# if the script is run multiple times. But must do after mount in case
|
||||
# we are mounting the root
|
||||
mkdir -p "$ROOT/data"
|
||||
mkdir -p "$ROOT/commitlog"
|
||||
mkdir -p "$ROOT/coredump"
|
||||
chown scylla:scylla "$ROOT"
|
||||
chown scylla:scylla "$ROOT"/*
|
||||
|
||||
if [ $FSTAB -ne 0 ]; then
|
||||
UUID=`blkid $RAID | awk '{print $2}'`
|
||||
echo "$UUID /var/lib/scylla xfs noatime 0 0" >> /etc/fstab
|
||||
echo "$UUID $MOUNT_AT xfs noatime 0 0" >> /etc/fstab
|
||||
fi
|
||||
mount -t xfs -o noatime $RAID /var/lib/scylla
|
||||
|
||||
mkdir -p /var/lib/scylla/data
|
||||
mkdir -p /var/lib/scylla/commitlog
|
||||
mkdir -p /var/lib/scylla/coredump
|
||||
chown scylla:scylla /var/lib/scylla/*
|
||||
chown scylla:scylla /var/lib/scylla/
|
||||
|
||||
13
dist/common/scripts/scylla_setup
vendored
13
dist/common/scripts/scylla_setup
vendored
@@ -81,7 +81,7 @@ verify_package() {
|
||||
}
|
||||
|
||||
list_block_devices() {
|
||||
if lsblk --help | grep -q -e -p; then
|
||||
if lsblk --help | grep -q -e '^\s*-p'; then
|
||||
lsblk -pnr | awk '{ print $1 }'
|
||||
else
|
||||
ls -1 /dev/sd* /dev/hd* /dev/xvd* /dev/nvme* /dev/mapper/* 2>/dev/null|grep -v control
|
||||
@@ -267,21 +267,24 @@ if [ $ENABLE_SERVICE -eq 1 ]; then
|
||||
printf "[housekeeping]\ncheck-version: True\n" > /etc/scylla.d/housekeeping.cfg
|
||||
fi
|
||||
if is_systemd; then
|
||||
systemctl unmask scylla-housekeeping.timer
|
||||
systemctl unmask scylla-housekeeping-daily.timer
|
||||
systemctl unmask scylla-housekeeping-restart.timer
|
||||
fi
|
||||
else
|
||||
if [ ! -f /etc/scylla.d/housekeeping.cfg ]; then
|
||||
printf "[housekeeping]\ncheck-version: False\n" > /etc/scylla.d/housekeeping.cfg
|
||||
fi
|
||||
if is_systemd; then
|
||||
systemctl mask scylla-housekeeping.timer
|
||||
systemctl stop scylla-housekeeping.timer || true
|
||||
systemctl mask scylla-housekeeping-daily.timer
|
||||
systemctl mask scylla-housekeeping-restart.timer
|
||||
systemctl stop scylla-housekeeping-daily.timer || true
|
||||
systemctl stop scylla-housekeeping-restart.timer || true
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
CUR_VERSION=`scylla --version` || true
|
||||
if [ "$CUR_VERSION" != "" ] && [ "$UUID" != "" ]; then
|
||||
if [ "$CUR_VERSION" != "" ]; then
|
||||
NEW_VERSION=`sudo -u scylla /usr/lib/scylla/scylla-housekeeping --uuid-file /var/lib/scylla-housekeeping/housekeeping.uuid version --version $CUR_VERSION --mode i` || true
|
||||
if [ "$NEW_VERSION" != "" ]; then
|
||||
echo $NEW_VERSION
|
||||
|
||||
2
dist/common/sysctl.d/99-scylla-sched.conf
vendored
2
dist/common/sysctl.d/99-scylla-sched.conf
vendored
@@ -5,7 +5,7 @@ kernel.sched_tunable_scaling = 0
|
||||
kernel.sched_min_granularity_ns = 500000
|
||||
|
||||
# Don't delay unrelated workloads
|
||||
kernel.sched_wakeup_granularity_ns = 500000
|
||||
kernel.sched_wakeup_granularity_ns = 450000
|
||||
|
||||
# Schedule all tasks in this period
|
||||
kernel.sched_latency_ns = 1000000
|
||||
|
||||
12
dist/common/systemd/scylla-housekeeping-daily.service
vendored
Normal file
12
dist/common/systemd/scylla-housekeeping-daily.service
vendored
Normal file
@@ -0,0 +1,12 @@
|
||||
[Unit]
|
||||
Description=Scylla Housekeeping daily mode
|
||||
After=network.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
User=scylla
|
||||
Group=scylla
|
||||
ExecStart=/usr/lib/scylla/scylla-housekeeping --uuid-file /var/lib/scylla-housekeeping/housekeeping.uuid --repo-files '/etc/yum.repos.d/scylla*.repo' -q -c /etc/scylla.d/housekeeping.cfg version --mode d
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
11
dist/common/systemd/scylla-housekeeping-daily.timer
vendored
Normal file
11
dist/common/systemd/scylla-housekeeping-daily.timer
vendored
Normal file
@@ -0,0 +1,11 @@
|
||||
[Unit]
|
||||
Description=Run Scylla Housekeeping daily mode
|
||||
After=scylla-server.service
|
||||
BindsTo=scylla-server.service
|
||||
|
||||
[Timer]
|
||||
OnActiveSec=1d
|
||||
OnUnitActiveSec=1d
|
||||
|
||||
[Install]
|
||||
WantedBy=timers.target
|
||||
12
dist/common/systemd/scylla-housekeeping-restart.service
vendored
Normal file
12
dist/common/systemd/scylla-housekeeping-restart.service
vendored
Normal file
@@ -0,0 +1,12 @@
|
||||
[Unit]
|
||||
Description=Scylla Housekeeping restart mode
|
||||
After=network.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
User=scylla
|
||||
Group=scylla
|
||||
ExecStart=/usr/lib/scylla/scylla-housekeeping --uuid-file /var/lib/scylla-housekeeping/housekeeping.uuid -q --repo-files '/etc/yum.repos.d/scylla*.repo' -c /etc/scylla.d/housekeeping.cfg version --mode r
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
@@ -1,12 +1,11 @@
|
||||
[Unit]
|
||||
Description=Run Scylla Housekeeping daily
|
||||
Description=Run Scylla Housekeeping restart mode
|
||||
After=scylla-server.service
|
||||
BindsTo=scylla-server.service
|
||||
|
||||
[Timer]
|
||||
# set OnActiveSec to 3 to safely avoid issues/1846
|
||||
OnActiveSec=3
|
||||
OnUnitActiveSec=1d
|
||||
|
||||
[Install]
|
||||
WantedBy=timers.target
|
||||
12
dist/common/systemd/scylla-housekeeping.service
vendored
12
dist/common/systemd/scylla-housekeeping.service
vendored
@@ -1,12 +0,0 @@
|
||||
[Unit]
|
||||
Description=Scylla Housekeeping
|
||||
After=network.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
User=scylla
|
||||
Group=scylla
|
||||
ExecStart=/usr/lib/scylla/scylla-housekeeping --uuid-file /var/lib/scylla-housekeeping/housekeeping.uuid -q -c /etc/scylla.d/housekeeping.cfg version --mode d
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
3
dist/common/systemd/scylla-server.service.in
vendored
3
dist/common/systemd/scylla-server.service.in
vendored
@@ -2,7 +2,8 @@
|
||||
Description=Scylla Server
|
||||
After=network.target
|
||||
Wants=scylla-jmx.service
|
||||
Wants=scylla-housekeeping.timer
|
||||
Wants=scylla-housekeeping-restart.timer
|
||||
Wants=scylla-housekeeping-daily.timer
|
||||
|
||||
[Service]
|
||||
PermissionsStartOnly=true
|
||||
|
||||
25
dist/debian/build_deb.sh
vendored
25
dist/debian/build_deb.sh
vendored
@@ -7,6 +7,14 @@ print_usage() {
|
||||
echo " --rebuild-dep rebuild dependency packages"
|
||||
exit 1
|
||||
}
|
||||
install_deps() {
|
||||
echo Y | sudo mk-build-deps
|
||||
DEB_FILE=`ls *-build-deps*.deb`
|
||||
sudo gdebi -n $DEB_FILE
|
||||
sudo rm -f $DEB_FILE
|
||||
sudo dpkg -P ${DEB_FILE%%_*.deb}
|
||||
}
|
||||
|
||||
REBUILD=0
|
||||
DIST=0
|
||||
while [ $# -gt 0 ]; do
|
||||
@@ -54,6 +62,9 @@ fi
|
||||
if [ ! -f /usr/bin/lsb_release ]; then
|
||||
sudo apt-get -y install lsb-release
|
||||
fi
|
||||
if [ ! -f /usr/bin/gdebi ]; then
|
||||
sudo apt-get -y install gdebi-core
|
||||
fi
|
||||
|
||||
DISTRIBUTION=`lsb_release -i|awk '{print $3}'`
|
||||
CODENAME=`lsb_release -c|awk '{print $2}'`
|
||||
@@ -84,7 +95,8 @@ if [ "$DISTRIBUTION" = "Debian" ]; then
|
||||
sed -i -e "s/@@COMPILER@@/g++-5/g" debian/rules
|
||||
sed -i -e "s/@@BUILD_DEPENDS@@/libsystemd-dev, g++-5, libunwind-dev/g" debian/control
|
||||
sed -i -e "s#@@INSTALL@@##g" debian/scylla-server.install
|
||||
sed -i -e "s#@@HKDOTTIMER@@#dist/common/systemd/scylla-housekeeping.timer /lib/systemd/system#g" debian/scylla-server.install
|
||||
sed -i -e "s#@@HKDOTTIMER_D@@#dist/common/systemd/scylla-housekeeping-daily.timer /lib/systemd/system#g" debian/scylla-server.install
|
||||
sed -i -e "s#@@HKDOTTIMER_R@@#dist/common/systemd/scylla-housekeeping-restart.timer /lib/systemd/system#g" debian/scylla-server.install
|
||||
sed -i -e "s#@@SYSCTL@@#dist/debian/sysctl.d/99-scylla.conf etc/sysctl.d#g" debian/scylla-server.install
|
||||
elif [ "$VERSION_ID" = "14.04" ]; then
|
||||
sed -i -e "s/@@REVISION@@/0ubuntu1/g" debian/changelog
|
||||
@@ -92,7 +104,8 @@ elif [ "$VERSION_ID" = "14.04" ]; then
|
||||
sed -i -e "s/@@COMPILER@@/g++-5/g" debian/rules
|
||||
sed -i -e "s/@@BUILD_DEPENDS@@/g++-5, libunwind8-dev/g" debian/control
|
||||
sed -i -e "s#@@INSTALL@@#dist/debian/sudoers.d/scylla etc/sudoers.d#g" debian/scylla-server.install
|
||||
sed -i -e "s#@@HKDOTTIMER@@##g" debian/scylla-server.install
|
||||
sed -i -e "s#@@HKDOTTIMER_D@@##g" debian/scylla-server.install
|
||||
sed -i -e "s#@@HKDOTTIMER_R@@##g" debian/scylla-server.install
|
||||
sed -i -e "s#@@SYSCTL@@#dist/debian/sysctl.d/99-scylla.conf etc/sysctl.d#g" debian/scylla-server.install
|
||||
else
|
||||
sed -i -e "s/@@REVISION@@/0ubuntu1/g" debian/changelog
|
||||
@@ -100,7 +113,8 @@ else
|
||||
sed -i -e "s/@@COMPILER@@/g++/g" debian/rules
|
||||
sed -i -e "s/@@BUILD_DEPENDS@@/libsystemd-dev, g++, libunwind-dev/g" debian/control
|
||||
sed -i -e "s#@@INSTALL@@##g" debian/scylla-server.install
|
||||
sed -i -e "s#@@HKDOTTIMER@@#dist/common/systemd/scylla-housekeeping.timer /lib/systemd/system#g" debian/scylla-server.install
|
||||
sed -i -e "s#@@HKDOTTIMER_D@@#dist/common/systemd/scylla-housekeeping-daily.timer /lib/systemd/system#g" debian/scylla-server.install
|
||||
sed -i -e "s#@@HKDOTTIMER_R@@#dist/common/systemd/scylla-housekeeping-restart.timer /lib/systemd/system#g" debian/scylla-server.install
|
||||
sed -i -e "s#@@SYSCTL@@##g" debian/scylla-server.install
|
||||
fi
|
||||
if [ $DIST -gt 0 ]; then
|
||||
@@ -116,7 +130,8 @@ fi
|
||||
|
||||
cp dist/common/systemd/scylla-server.service.in debian/scylla-server.service
|
||||
sed -i -e "s#@@SYSCONFDIR@@#/etc/default#g" debian/scylla-server.service
|
||||
cp dist/common/systemd/scylla-housekeeping.service debian/scylla-server.scylla-housekeeping.service
|
||||
cp dist/common/systemd/scylla-housekeeping-daily.service debian/scylla-server.scylla-housekeeping-daily.service
|
||||
cp dist/common/systemd/scylla-housekeeping-restart.service debian/scylla-server.scylla-housekeeping-restart.service
|
||||
cp dist/common/systemd/node-exporter.service debian/scylla-server.node-exporter.service
|
||||
|
||||
if [ "$VERSION_ID" = "14.04" ] && [ $REBUILD -eq 0 ]; then
|
||||
@@ -140,5 +155,5 @@ else
|
||||
sudo apt-get install g++
|
||||
fi
|
||||
|
||||
echo Y | sudo mk-build-deps -i -r
|
||||
install_deps
|
||||
debuild -r fakeroot -us -uc
|
||||
|
||||
2
dist/debian/control.in
vendored
2
dist/debian/control.in
vendored
@@ -4,7 +4,7 @@ Homepage: http://scylladb.com
|
||||
Section: database
|
||||
Priority: optional
|
||||
Standards-Version: 3.9.5
|
||||
Build-Depends: debhelper (>= 9), libyaml-cpp-dev, liblz4-dev, libsnappy-dev, libcrypto++-dev, libjsoncpp-dev, libaio-dev, libthrift-dev, thrift-compiler, antlr3, antlr3-c++-dev, ragel, ninja-build, git, libboost-program-options1.55-dev | libboost-program-options-dev, libboost-filesystem1.55-dev | libboost-filesystem-dev, libboost-system1.55-dev | libboost-system-dev, libboost-thread1.55-dev | libboost-thread-dev, libboost-test1.55-dev | libboost-test-dev, libgnutls28-dev, libhwloc-dev, libnuma-dev, libpciaccess-dev, xfslibs-dev, python3-pyparsing, libxml2-dev, libsctp-dev, python-urwid, pciutils, libprotobuf-dev, protobuf-compiler, systemtap-sdt-dev, @@BUILD_DEPENDS@@
|
||||
Build-Depends: debhelper (>= 9), libyaml-cpp-dev, liblz4-dev, libsnappy-dev, libcrypto++-dev, libjsoncpp-dev, libaio-dev, libthrift-dev, thrift-compiler, antlr3, antlr3-c++-dev, ragel, ninja-build, git, libboost-program-options1.55-dev | libboost-program-options-dev, libboost-filesystem1.55-dev | libboost-filesystem-dev, libboost-system1.55-dev | libboost-system-dev, libboost-thread1.55-dev | libboost-thread-dev, libboost-test1.55-dev | libboost-test-dev, libgnutls28-dev, libhwloc-dev, libnuma-dev, libpciaccess-dev, xfslibs-dev, python3-pyparsing, libxml2-dev, libsctp-dev, python-urwid, pciutils, libprotobuf-dev, protobuf-compiler, systemtap-sdt-dev, libtool, automake, @@BUILD_DEPENDS@@
|
||||
|
||||
Package: scylla-conf
|
||||
Architecture: any
|
||||
|
||||
@@ -29,10 +29,10 @@ setgid scylla
|
||||
script
|
||||
# make sure scylla is up before checking for the version
|
||||
sleep 5
|
||||
/usr/lib/scylla/scylla-housekeeping --uuid-file /var/lib/scylla-housekeeping/housekeeping.uuid -c /etc/scylla.d/housekeeping.cfg -q version --mode r || true
|
||||
/usr/lib/scylla/scylla-housekeeping --uuid-file /var/lib/scylla-housekeeping/housekeeping.uuid --repo-files '/etc/apt/sources.list.d/scylla*.list' -c /etc/scylla.d/housekeeping.cfg -q version --mode r || true
|
||||
while [ 1 ]
|
||||
do
|
||||
sleep 1d
|
||||
/usr/lib/scylla/scylla-housekeeping --uuid-file /var/lib/scylla-housekeeping/housekeeping.uuid -c /etc/scylla.d/housekeeping.cfg -q version --mode d || true
|
||||
/usr/lib/scylla/scylla-housekeeping --uuid-file /var/lib/scylla-housekeeping/housekeeping.uuid --repo-files '/etc/apt/sources.list.d/scylla*.list' -c /etc/scylla.d/housekeeping.cfg -q version --mode d || true
|
||||
done
|
||||
end script
|
||||
|
||||
1
dist/debian/debian/scylla-server.upstart
vendored
1
dist/debian/debian/scylla-server.upstart
vendored
@@ -41,6 +41,7 @@ script
|
||||
fi
|
||||
. "$i"
|
||||
done
|
||||
export SCYLLA_CONF SCYLLA_HOME
|
||||
exec /usr/bin/scylla $SCYLLA_ARGS $SEASTAR_IO $DEV_MODE $CPUSET
|
||||
end script
|
||||
|
||||
|
||||
33
dist/debian/dep/build_dependency.sh
vendored
33
dist/debian/dep/build_dependency.sh
vendored
@@ -1,7 +1,25 @@
|
||||
#!/bin/bash -e
|
||||
|
||||
. /etc/os-release
|
||||
install_deps() {
|
||||
echo Y | sudo mk-build-deps
|
||||
DEB_FILE=`ls *-build-deps*.deb`
|
||||
sudo gdebi -n $DEB_FILE
|
||||
sudo rm -f $DEB_FILE
|
||||
sudo dpkg -P ${DEB_FILE%%_*.deb}
|
||||
}
|
||||
|
||||
DISTRIBUTION=`lsb_release -i|awk '{print $3}'`
|
||||
CODENAME=`lsb_release -c|awk '{print $2}'`
|
||||
|
||||
# workaround fix for #2444
|
||||
if [ "$CODENAME" = "jessie" ]; then
|
||||
if [ ! -e /etc/apt/sources.list.d/jessie-backports.list ]; then
|
||||
sudo sh -c 'echo deb "http://httpredir.debian.org/debian jessie-backports main" > /etc/apt/sources.list.d/jessie-backports.list'
|
||||
fi
|
||||
sudo apt-get -y update
|
||||
sudo apt-get install -t jessie-backports -y texlive
|
||||
fi
|
||||
|
||||
sudo apt-get install -y gdebi-core
|
||||
if [ "$VERSION_ID" = "14.04" ] || [ "$DISTRIBUTION" = "Debian" ]; then
|
||||
@@ -11,7 +29,7 @@ if [ "$VERSION_ID" = "14.04" ] || [ "$DISTRIBUTION" = "Debian" ]; then
|
||||
cp -a dist/debian/dep/antlr3-3.5.2/* build/antlr3-3.5.2
|
||||
cd build/antlr3-3.5.2
|
||||
wget -nv http://www.antlr3.org/download/antlr-3.5.2-complete-no-st3.jar
|
||||
echo Y | sudo mk-build-deps -i -r
|
||||
install_deps
|
||||
debuild -r fakeroot --no-tgz-check -us -uc
|
||||
cd -
|
||||
fi
|
||||
@@ -39,7 +57,7 @@ if [ "$VERSION_ID" = "14.04" ] || [ "$DISTRIBUTION" = "Debian" ]; then
|
||||
cd -
|
||||
cd build/gdb-7.11
|
||||
patch -p0 < ../../dist/debian/dep/gdb.diff
|
||||
echo Y | sudo mk-build-deps -i -r
|
||||
install_deps
|
||||
debuild -r fakeroot --no-tgz-check -us -uc
|
||||
cd -
|
||||
fi
|
||||
@@ -56,7 +74,7 @@ if [ ! -f build/antlr3-c++-dev_*.deb ]; then
|
||||
cd -
|
||||
cp -a dist/debian/dep/antlr3-c++-dev-3.5.2/debian build/antlr3-c++-dev-3.5.2
|
||||
cd build/antlr3-c++-dev-3.5.2
|
||||
echo Y | sudo mk-build-deps -i -r
|
||||
install_deps
|
||||
debuild -r fakeroot --no-tgz-check -us -uc
|
||||
cd -
|
||||
fi
|
||||
@@ -70,17 +88,18 @@ if [ ! -f build/libthrift0_*.deb ]; then
|
||||
tar xpf thrift-0.9.3.tar.gz
|
||||
cd thrift-0.9.3
|
||||
patch -p0 < ../../dist/debian/dep/thrift.diff
|
||||
echo Y | sudo mk-build-deps -i -r
|
||||
install_deps
|
||||
debuild -r fakeroot --no-tgz-check -us -uc
|
||||
cd ../..
|
||||
fi
|
||||
|
||||
if [ "$DISTRIBUTION" = "Debian" ] && [ "$VERSION_ID" = "8" ]; then
|
||||
if [ ! -f build/gcc-5_*.deb ]; then
|
||||
sudo cp dist/debian/dep/debian-stretch-source.list /etc/apt/sources.list.d/
|
||||
sudo apt-get update
|
||||
cd build
|
||||
apt-get source gcc-5/stretch=5.4.1-2
|
||||
wget https://launchpad.net/debian/+archive/primary/+files/gcc-5_5.4.1-5.dsc
|
||||
wget https://launchpad.net/debian/+archive/primary/+files/gcc-5_5.4.1.orig.tar.gz
|
||||
wget https://launchpad.net/debian/+archive/primary/+files/gcc-5_5.4.1-5.diff.gz
|
||||
dpkg-source -x gcc-5_5.4.1-5.dsc
|
||||
cd gcc-5-5.4.1
|
||||
# resolve build time dependencies manually, since mk-build-deps doesn't works for gcc package
|
||||
sudo apt-get install -y g++-multilib libc6-dev-i386 lib32gcc1 libc6-dev-x32 libx32gcc1 libc6-dbg m4 libtool autoconf2.64 autogen gawk zlib1g-dev systemtap-sdt-dev gperf bison flex gdb texinfo locales sharutils libantlr-java libffi-dev gnat-4.9 libisl-dev libmpc-dev libmpfr-dev libgmp-dev dejagnu realpath chrpath quilt doxygen graphviz ghostscript texlive-latex-base xsltproc libxml2-utils docbook-xsl-ns
|
||||
|
||||
20
dist/debian/dep/debian-gcc-5-jessie.diff
vendored
20
dist/debian/dep/debian-gcc-5-jessie.diff
vendored
@@ -1,6 +1,5 @@
|
||||
diff -Nur debian/rules.conf /home/syuu/gcc-5-5.4.1/debian/rules.conf
|
||||
--- debian/rules.conf 2016-10-14 04:54:21.000000000 +0000
|
||||
+++ /home/syuu/gcc-5-5.4.1/debian/rules.conf 2016-10-12 17:28:54.138711378 +0000
|
||||
--- debian/rules.conf 2017-02-24 19:02:52.000000000 +0000
|
||||
+++ /home/syuu/gcc-5.5/gcc-5-5.4.1/debian/rules.conf 2017-02-24 18:13:59.000000000 +0000
|
||||
@@ -206,7 +206,7 @@
|
||||
ifneq (,$(filter $(distrelease),vivid))
|
||||
BINUTILSBDV = 2.25-3~
|
||||
@@ -10,14 +9,16 @@ diff -Nur debian/rules.conf /home/syuu/gcc-5-5.4.1/debian/rules.conf
|
||||
else ifneq (,$(filter $(distrelease),sid stretch xenial))
|
||||
BINUTILSBDV = 2.26.1
|
||||
endif
|
||||
@@ -387,9 +387,9 @@
|
||||
@@ -386,10 +386,10 @@
|
||||
MPFR_BUILD_DEP = libmpfr-dev (>= 3.0.0-9~),
|
||||
endif
|
||||
|
||||
ISL_BUILD_DEP = libisl-dev,
|
||||
-ifneq (,$(filter $(distrelease),jessie sid experimental))
|
||||
-ISL_BUILD_DEP = libisl-dev,
|
||||
-ifneq (,$(filter $(distrelease),jessie stretch sid experimental))
|
||||
- ISL_BUILD_DEP = libisl-dev (>= 0.14),
|
||||
-endif
|
||||
+#ifneq (,$(filter $(distrelease),jessie sid experimental))
|
||||
+#ISL_BUILD_DEP = libisl-dev,
|
||||
+#ifneq (,$(filter $(distrelease),jessie stretch sid experimental))
|
||||
+# ISL_BUILD_DEP = libisl-dev (>= 0.14),
|
||||
+#endif
|
||||
|
||||
@@ -37,9 +38,8 @@ diff -Nur debian/rules.conf /home/syuu/gcc-5-5.4.1/debian/rules.conf
|
||||
ifneq ($(DEB_CROSS),yes)
|
||||
# all archs for which to create b-d's
|
||||
any_archs = alpha amd64 armel armhf arm64 i386 mips mipsel mips64 mips64el powerpc ppc64 ppc64el m68k sh4 sparc64 s390x x32
|
||||
diff -Nur debian/rules.defs /home/syuu/gcc-5-5.4.1/debian/rules.defs
|
||||
--- debian/rules.defs 2016-10-14 04:54:21.000000000 +0000
|
||||
+++ /home/syuu/gcc-5-5.4.1/debian/rules.defs 2016-10-13 10:18:51.647631508 +0000
|
||||
--- debian/rules.defs 2017-02-24 19:02:52.000000000 +0000
|
||||
+++ /home/syuu/gcc-5.5/gcc-5-5.4.1/debian/rules.defs 2017-02-24 18:13:59.000000000 +0000
|
||||
@@ -412,7 +412,7 @@
|
||||
# gcc versions (fixincludes, libgcj-common) ...
|
||||
#with_common_pkgs := yes
|
||||
|
||||
2
dist/debian/dep/debian-stretch-source.list
vendored
2
dist/debian/dep/debian-stretch-source.list
vendored
@@ -1,2 +0,0 @@
|
||||
deb-src http://httpredir.debian.org/debian stretch main
|
||||
deb-src http://httpredir.debian.org/debian stretch-updates main
|
||||
3
dist/debian/rules.in
vendored
3
dist/debian/rules.in
vendored
@@ -11,7 +11,8 @@ override_dh_auto_clean:
|
||||
|
||||
override_dh_installinit:
|
||||
dh_installinit --no-start @@DH_INSTALLINIT@@
|
||||
dh_installinit --no-start --name scylla-housekeeping @@DH_INSTALLINIT@@
|
||||
dh_installinit --no-start --name scylla-housekeeping-daily @@DH_INSTALLINIT@@
|
||||
dh_installinit --no-start --name scylla-housekeeping-restart @@DH_INSTALLINIT@@
|
||||
dh_installinit --no-start --name node-exporter @@DH_INSTALLINIT@@
|
||||
|
||||
override_dh_strip:
|
||||
|
||||
3
dist/debian/scylla-server.install.in
vendored
3
dist/debian/scylla-server.install.in
vendored
@@ -15,6 +15,7 @@ build/release/iotune usr/bin
|
||||
dist/common/bin/scyllatop usr/bin
|
||||
dist/common/sbin/* usr/sbin
|
||||
@@ADDHKCFG@@
|
||||
@@HKDOTTIMER@@
|
||||
@@HKDOTTIMER_D@@
|
||||
@@HKDOTTIMER_R@@
|
||||
@@INSTALL@@
|
||||
@@SYSCTL@@
|
||||
|
||||
4
dist/docker/redhat/Dockerfile
vendored
4
dist/docker/redhat/Dockerfile
vendored
@@ -7,7 +7,7 @@ ENV container docker
|
||||
VOLUME [ "/sys/fs/cgroup" ]
|
||||
|
||||
#install scylla
|
||||
RUN curl http://downloads.scylladb.com/rpm/unstable/centos/master/latest/scylla.repo -o /etc/yum.repos.d/scylla.repo
|
||||
RUN curl http://downloads.scylladb.com/rpm/centos/scylla-1.7.repo -o /etc/yum.repos.d/scylla.repo
|
||||
RUN yum -y install epel-release
|
||||
RUN yum -y clean expire-cache
|
||||
RUN yum -y update
|
||||
@@ -38,6 +38,6 @@ ADD commandlineparser.py /commandlineparser.py
|
||||
ADD docker-entrypoint.py /docker-entrypoint.py
|
||||
ENTRYPOINT ["/docker-entrypoint.py"]
|
||||
|
||||
EXPOSE 10000 9042 9160 7000 7001
|
||||
EXPOSE 10000 9042 9160 9180 7000 7001
|
||||
VOLUME [ "/var/lib/scylla" ]
|
||||
RUN chown -R scylla.scylla /var/lib/scylla
|
||||
|
||||
11
dist/redhat/centos_dep/build_dependency.sh
vendored
11
dist/redhat/centos_dep/build_dependency.sh
vendored
@@ -28,10 +28,6 @@ if [ ! -f boost-1.58.0-11.fc23.src.rpm ]; then
|
||||
wget -nv https://kojipkgs.fedoraproject.org//packages/boost/1.58.0/11.fc23/src/boost-1.58.0-11.fc23.src.rpm
|
||||
fi
|
||||
|
||||
if [ ! -f ninja-build-1.6.0-2.fc23.src.rpm ]; then
|
||||
wget -nv https://kojipkgs.fedoraproject.org//packages/ninja-build/1.6.0/2.fc23/src/ninja-build-1.6.0-2.fc23.src.rpm
|
||||
fi
|
||||
|
||||
if [ ! -f ragel-6.8-5.fc23.src.rpm ]; then
|
||||
wget -nv https://kojipkgs.fedoraproject.org//packages/ragel/6.8/5.fc23/src/ragel-6.8-5.fc23.src.rpm
|
||||
fi
|
||||
@@ -94,13 +90,6 @@ if [ ! -f $RPMBUILD/RPMS/x86_64/scylla-boost-1.58.0-11.el7*.x86_64.rpm ]; then
|
||||
fi
|
||||
do_install scylla-boost*
|
||||
|
||||
if [ ! -f $RPMBUILD/RPMS/x86_64/scylla-ninja-build-1.6.0-2.el7*.x86_64.rpm ]; then
|
||||
rpm --define "_topdir $RPMBUILD" -ivh build/srpms/ninja-build-1.6.0-2.fc23.src.rpm
|
||||
patch $RPMBUILD/SPECS/ninja-build.spec < dist/redhat/centos_dep/ninja-build.diff
|
||||
rpmbuild --define "_topdir $RPMBUILD" -ba $RPMBUILD/SPECS/ninja-build.spec
|
||||
fi
|
||||
do_install scylla-ninja-build-1.6.0-2.el7*.x86_64.rpm
|
||||
|
||||
if [ ! -f $RPMBUILD/RPMS/x86_64/scylla-ragel-6.8-5.el7*.x86_64.rpm ]; then
|
||||
rpm --define "_topdir $RPMBUILD" -ivh build/srpms/ragel-6.8-5.fc23.src.rpm
|
||||
patch $RPMBUILD/SPECS/ragel.spec < dist/redhat/centos_dep/ragel.diff
|
||||
|
||||
56
dist/redhat/centos_dep/ninja-build.diff
vendored
56
dist/redhat/centos_dep/ninja-build.diff
vendored
@@ -1,56 +0,0 @@
|
||||
--- ninja-build.spec.orig 2016-01-20 14:41:16.892802134 +0000
|
||||
+++ ninja-build.spec 2016-01-20 14:44:42.453227192 +0000
|
||||
@@ -1,19 +1,18 @@
|
||||
-Name: ninja-build
|
||||
+Name: scylla-ninja-build
|
||||
Version: 1.6.0
|
||||
Release: 2%{?dist}
|
||||
Summary: A small build system with a focus on speed
|
||||
License: ASL 2.0
|
||||
URL: http://martine.github.com/ninja/
|
||||
Source0: https://github.com/martine/ninja/archive/v%{version}.tar.gz#/ninja-%{version}.tar.gz
|
||||
-Source1: ninja.vim
|
||||
# Rename mentions of the executable name to be ninja-build.
|
||||
Patch1000: ninja-1.6.0-binary-rename.patch
|
||||
+Requires: scylla-env
|
||||
BuildRequires: asciidoc
|
||||
BuildRequires: gtest-devel
|
||||
BuildRequires: python2-devel
|
||||
-BuildRequires: re2c >= 0.11.3
|
||||
-Requires: emacs-filesystem
|
||||
-Requires: vim-filesystem
|
||||
+#BuildRequires: scylla-re2c >= 0.11.3
|
||||
+%define _prefix /opt/scylladb
|
||||
|
||||
%description
|
||||
Ninja is a small build system with a focus on speed. It differs from other
|
||||
@@ -32,15 +31,8 @@
|
||||
./ninja -v ninja_test
|
||||
|
||||
%install
|
||||
-# TODO: Install ninja_syntax.py?
|
||||
-mkdir -p %{buildroot}/{%{_bindir},%{_datadir}/bash-completion/completions,%{_datadir}/emacs/site-lisp,%{_datadir}/vim/vimfiles/syntax,%{_datadir}/vim/vimfiles/ftdetect,%{_datadir}/zsh/site-functions}
|
||||
-
|
||||
+mkdir -p %{buildroot}/opt/scylladb/bin
|
||||
install -pm755 ninja %{buildroot}%{_bindir}/ninja-build
|
||||
-install -pm644 misc/bash-completion %{buildroot}%{_datadir}/bash-completion/completions/ninja-bash-completion
|
||||
-install -pm644 misc/ninja-mode.el %{buildroot}%{_datadir}/emacs/site-lisp/ninja-mode.el
|
||||
-install -pm644 misc/ninja.vim %{buildroot}%{_datadir}/vim/vimfiles/syntax/ninja.vim
|
||||
-install -pm644 %{SOURCE1} %{buildroot}%{_datadir}/vim/vimfiles/ftdetect/ninja.vim
|
||||
-install -pm644 misc/zsh-completion %{buildroot}%{_datadir}/zsh/site-functions/_ninja
|
||||
|
||||
%check
|
||||
# workaround possible too low default limits
|
||||
@@ -50,12 +42,6 @@
|
||||
%files
|
||||
%doc COPYING HACKING.md README doc/manual.html
|
||||
%{_bindir}/ninja-build
|
||||
-%{_datadir}/bash-completion/completions/ninja-bash-completion
|
||||
-%{_datadir}/emacs/site-lisp/ninja-mode.el
|
||||
-%{_datadir}/vim/vimfiles/syntax/ninja.vim
|
||||
-%{_datadir}/vim/vimfiles/ftdetect/ninja.vim
|
||||
-# zsh does not have a -filesystem package
|
||||
-%{_datadir}/zsh/
|
||||
|
||||
%changelog
|
||||
* Mon Nov 16 2015 Ben Boeckel <mathstuf@gmail.com> - 1.6.0-2
|
||||
28
dist/redhat/scylla.spec.in
vendored
28
dist/redhat/scylla.spec.in
vendored
@@ -27,9 +27,9 @@ Group: Applications/Databases
|
||||
Summary: The Scylla database server
|
||||
License: AGPLv3
|
||||
URL: http://www.scylladb.com/
|
||||
BuildRequires: libaio-devel libstdc++-devel cryptopp-devel hwloc-devel numactl-devel libpciaccess-devel libxml2-devel zlib-devel thrift-devel yaml-cpp-devel lz4-devel snappy-devel jsoncpp-devel systemd-devel xz-devel pcre-devel elfutils-libelf-devel bzip2-devel keyutils-libs-devel xfsprogs-devel make gnutls-devel systemd-devel lksctp-tools-devel protobuf-devel protobuf-compiler libunwind-devel systemtap-sdt-devel
|
||||
%{?fedora:BuildRequires: boost-devel ninja-build ragel antlr3-tool antlr3-C++-devel python3 gcc-c++ libasan libubsan python3-pyparsing dnf-yum}
|
||||
%{?rhel:BuildRequires: scylla-libstdc++-static scylla-boost-devel scylla-boost-static scylla-ninja-build scylla-ragel scylla-antlr3-tool scylla-antlr3-C++-devel python34 scylla-gcc-c++ >= 5.1.1, python34-pyparsing}
|
||||
BuildRequires: libaio-devel libstdc++-devel cryptopp-devel hwloc-devel numactl-devel libpciaccess-devel libxml2-devel zlib-devel thrift-devel yaml-cpp-devel lz4-devel snappy-devel jsoncpp-devel systemd-devel xz-devel pcre-devel elfutils-libelf-devel bzip2-devel keyutils-libs-devel xfsprogs-devel make gnutls-devel systemd-devel lksctp-tools-devel protobuf-devel protobuf-compiler libunwind-devel systemtap-sdt-devel libtool automake ninja-build
|
||||
%{?fedora:BuildRequires: boost-devel ragel antlr3-tool antlr3-C++-devel python3 gcc-c++ libasan libubsan python3-pyparsing dnf-yum}
|
||||
%{?rhel:BuildRequires: scylla-libstdc++-static scylla-boost-devel scylla-boost-static scylla-ragel scylla-antlr3-tool scylla-antlr3-C++-devel python34 scylla-gcc-c++ >= 5.1.1, python34-pyparsing}
|
||||
Requires: scylla-conf systemd-libs hwloc collectd PyYAML python-urwid pciutils pyparsing python-requests curl util-linux python-setuptools pciutils
|
||||
%{?rhel:Requires: python34 python34-PyYAML}
|
||||
Conflicts: abrt
|
||||
@@ -63,6 +63,9 @@ mkdir -p $RPM_BUILD_ROOT%{_sysconfdir}/security/limits.d/
|
||||
mkdir -p $RPM_BUILD_ROOT%{_sysconfdir}/collectd.d/
|
||||
mkdir -p $RPM_BUILD_ROOT%{_sysconfdir}/scylla/
|
||||
mkdir -p $RPM_BUILD_ROOT%{_sysconfdir}/scylla.d/
|
||||
%if 0%{?rhel}
|
||||
mkdir -p $RPM_BUILD_ROOT%{_sysconfdir}/modprobe.d/
|
||||
%endif
|
||||
mkdir -p $RPM_BUILD_ROOT%{_sysctldir}/
|
||||
mkdir -p $RPM_BUILD_ROOT%{_docdir}/scylla/
|
||||
mkdir -p $RPM_BUILD_ROOT%{_unitdir}
|
||||
@@ -73,6 +76,9 @@ install -m644 dist/common/limits.d/scylla.conf $RPM_BUILD_ROOT%{_sysconfdir}/sec
|
||||
install -m644 dist/common/collectd.d/scylla.conf $RPM_BUILD_ROOT%{_sysconfdir}/collectd.d/
|
||||
install -m644 dist/common/scylla.d/*.conf $RPM_BUILD_ROOT%{_sysconfdir}/scylla.d/
|
||||
install -m644 dist/common/sysctl.d/*.conf $RPM_BUILD_ROOT%{_sysctldir}/
|
||||
%if 0%{?rhel}
|
||||
install -m644 dist/common/modprobe.d/*.conf $RPM_BUILD_ROOT%{_sysconfdir}/modprobe.d/
|
||||
%endif
|
||||
install -d -m755 $RPM_BUILD_ROOT%{_sysconfdir}/scylla
|
||||
install -m644 conf/scylla.yaml $RPM_BUILD_ROOT%{_sysconfdir}/scylla/
|
||||
install -m644 conf/cassandra-rackdc.properties $RPM_BUILD_ROOT%{_sysconfdir}/scylla/
|
||||
@@ -151,10 +157,8 @@ rm -rf $RPM_BUILD_ROOT
|
||||
%{_docdir}/scylla/NOTICE.txt
|
||||
%{_docdir}/scylla/ORIGIN
|
||||
%{_docdir}/scylla/licenses/
|
||||
%{_unitdir}/scylla-server.service
|
||||
%{_unitdir}/scylla-housekeeping.service
|
||||
%{_unitdir}/scylla-housekeeping.timer
|
||||
%{_unitdir}/node-exporter.service
|
||||
%{_unitdir}/*.service
|
||||
%{_unitdir}/*.timer
|
||||
%{_bindir}/scylla
|
||||
%{_bindir}/iotune
|
||||
%{_bindir}/scyllatop
|
||||
@@ -228,6 +232,7 @@ Group: Applications/Databases
|
||||
Summary: Scylla configuration package for the Linux kernel
|
||||
License: AGPLv3
|
||||
URL: http://www.scylladb.com/
|
||||
Requires: kmod
|
||||
|
||||
%description kernel-conf
|
||||
This package contains Linux kernel configuration changes for the Scylla database. Install this package
|
||||
@@ -237,9 +242,18 @@ if Scylla is the main application on your server and you wish to optimize its la
|
||||
# We cannot use the sysctl_apply rpm macro because it is not present in 7.0
|
||||
# following is a "manual" expansion
|
||||
/usr/lib/systemd/systemd-sysctl 99-scylla-sched.conf >/dev/null 2>&1 || :
|
||||
# Write modprobe.d params when module already loaded
|
||||
%if 0%{?rhel}
|
||||
if [ -e /sys/module/raid0/parameters/devices_discard_performance ]; then
|
||||
echo Y > /sys/module/raid0/parameters/devices_discard_performance
|
||||
fi
|
||||
%endif
|
||||
|
||||
%files kernel-conf
|
||||
%defattr(-,root,root)
|
||||
%if 0%{?rhel}
|
||||
%config(noreplace) %{_sysconfdir}/modprobe.d/*.conf
|
||||
%endif
|
||||
%{_sysctldir}/*.conf
|
||||
|
||||
%changelog
|
||||
|
||||
@@ -50,6 +50,12 @@ public:
|
||||
// for real time waits.
|
||||
};
|
||||
|
||||
// Returns a time point which is earlier from t by d, or minimum time point if it cannot be represented.
|
||||
template<typename Clock, typename Duration, typename Rep, typename Period>
|
||||
inline
|
||||
auto saturating_subtract(std::chrono::time_point<Clock, Duration> t, std::chrono::duration<Rep, Period> d) -> decltype(t) {
|
||||
return std::max(t, decltype(t)::min() + d) - d;
|
||||
}
|
||||
|
||||
using expiry_opt = std::experimental::optional<gc_clock::time_point>;
|
||||
using ttl_opt = std::experimental::optional<gc_clock::duration>;
|
||||
|
||||
@@ -1135,6 +1135,15 @@ void gossiper::mark_alive(inet_address addr, endpoint_state& local_state) {
|
||||
// real_mark_alive(addr, local_state);
|
||||
// return;
|
||||
// }
|
||||
auto inserted = _pending_mark_alive_endpoints.insert(addr).second;
|
||||
if (inserted) {
|
||||
// The node is not in the _pending_mark_alive_endpoints
|
||||
logger.debug("Mark Node {} alive with EchoMessage", addr);
|
||||
} else {
|
||||
// We are in the progress of marking this node alive
|
||||
logger.debug("Node {} is being marked as up, ignoring duplicated mark alive operation", addr);
|
||||
return;
|
||||
}
|
||||
|
||||
local_state.mark_dead();
|
||||
msg_addr id = get_msg_addr(addr);
|
||||
@@ -1143,10 +1152,22 @@ void gossiper::mark_alive(inet_address addr, endpoint_state& local_state) {
|
||||
ms().send_gossip_echo(id).get();
|
||||
logger.trace("Got EchoMessage Reply");
|
||||
set_last_processed_message_at();
|
||||
real_mark_alive(id.addr, local_state);
|
||||
// After sending echo message, the Node might not be in the
|
||||
// endpoint_state_map anymore, use the reference of local_state
|
||||
// might cause user-after-free
|
||||
auto it = endpoint_state_map.find(addr);
|
||||
if (it == endpoint_state_map.end()) {
|
||||
logger.info("Node {} is not in endpoint_state_map anymore", addr);
|
||||
} else {
|
||||
endpoint_state& state = it->second;
|
||||
logger.debug("Mark Node {} alive after EchoMessage", addr);
|
||||
real_mark_alive(addr, state);
|
||||
}
|
||||
} catch(...) {
|
||||
logger.warn("Fail to send EchoMessage to {}: {}", id, std::current_exception());
|
||||
}
|
||||
|
||||
_pending_mark_alive_endpoints.erase(addr);
|
||||
}
|
||||
|
||||
// Runs inside seastar::async context
|
||||
@@ -1188,10 +1209,7 @@ void gossiper::mark_dead(inet_address addr, endpoint_state& local_state) {
|
||||
|
||||
// Runs inside seastar::async context
|
||||
void gossiper::handle_major_state_change(inet_address ep, const endpoint_state& eps) {
|
||||
std::experimental::optional<endpoint_state> local_ep_state;
|
||||
if (endpoint_state_map.count(ep) > 0) {
|
||||
local_ep_state = endpoint_state_map.at(ep);
|
||||
}
|
||||
auto eps_old = get_endpoint_state_for_endpoint(ep);
|
||||
if (!is_dead_state(eps) && !_in_shadow_round) {
|
||||
if (endpoint_state_map.count(ep)) {
|
||||
logger.debug("Node {} has restarted, now UP, status = {}", ep, get_gossip_status(eps));
|
||||
@@ -1202,24 +1220,37 @@ void gossiper::handle_major_state_change(inet_address ep, const endpoint_state&
|
||||
logger.trace("Adding endpoint state for {}, status = {}", ep, get_gossip_status(eps));
|
||||
endpoint_state_map[ep] = eps;
|
||||
|
||||
auto& ep_state = endpoint_state_map.at(ep);
|
||||
if (_in_shadow_round) {
|
||||
// In shadow round, we only interested in the peer's endpoint_state,
|
||||
// e.g., gossip features, host_id, tokens. No need to call the
|
||||
// on_restart or on_join callbacks or to go through the mark alive
|
||||
// procedure with EchoMessage gossip message. We will do them during
|
||||
// normal gossip runs anyway.
|
||||
logger.debug("In shadow round addr={}, eps={}", ep, eps);
|
||||
return;
|
||||
}
|
||||
|
||||
if (local_ep_state) {
|
||||
if (eps_old) {
|
||||
// the node restarted: it is up to the subscriber to take whatever action is necessary
|
||||
_subscribers.for_each([ep, local_ep_state] (auto& subscriber) {
|
||||
subscriber->on_restart(ep, *local_ep_state);
|
||||
_subscribers.for_each([ep, eps_old] (auto& subscriber) {
|
||||
subscriber->on_restart(ep, *eps_old);
|
||||
});
|
||||
}
|
||||
|
||||
auto& ep_state = endpoint_state_map.at(ep);
|
||||
if (!is_dead_state(ep_state)) {
|
||||
mark_alive(ep, ep_state);
|
||||
} else {
|
||||
logger.debug("Not marking {} alive due to dead state {}", ep, get_gossip_status(eps));
|
||||
mark_dead(ep, ep_state);
|
||||
}
|
||||
_subscribers.for_each([ep, ep_state] (auto& subscriber) {
|
||||
subscriber->on_join(ep, ep_state);
|
||||
});
|
||||
|
||||
auto eps_new = get_endpoint_state_for_endpoint(ep);
|
||||
if (eps_new) {
|
||||
_subscribers.for_each([ep, eps_new] (auto& subscriber) {
|
||||
subscriber->on_join(ep, *eps_new);
|
||||
});
|
||||
}
|
||||
// check this at the end so nodes will learn about the endpoint
|
||||
if (is_shutdown(ep)) {
|
||||
mark_as_shutdown(ep);
|
||||
@@ -1394,9 +1425,11 @@ future<> gossiper::start_gossiping(int generation_nbr, std::map<application_stat
|
||||
local_state.add_application_state(entry.first, entry.second);
|
||||
}
|
||||
|
||||
auto generation = local_state.get_heart_beat_state().get_generation();
|
||||
|
||||
//notify snitches that Gossiper is about to start
|
||||
return locator::i_endpoint_snitch::get_local_snitch_ptr()->gossiper_starting().then([this, &local_state] {
|
||||
logger.trace("gossip started with generation {}", local_state.get_heart_beat_state().get_generation());
|
||||
return locator::i_endpoint_snitch::get_local_snitch_ptr()->gossiper_starting().then([this, generation] {
|
||||
logger.trace("gossip started with generation {}", generation);
|
||||
_enabled = true;
|
||||
_nr_run = 0;
|
||||
_scheduled_gossip_task.arm(INTERVAL);
|
||||
@@ -1493,16 +1526,19 @@ future<> gossiper::add_local_application_state(application_state state, versione
|
||||
logger.error(err.c_str());
|
||||
throw std::runtime_error(err);
|
||||
}
|
||||
endpoint_state& ep_state = gossiper.endpoint_state_map.at(ep_addr);
|
||||
endpoint_state ep_state_before = gossiper.endpoint_state_map.at(ep_addr);
|
||||
// Fire "before change" notifications:
|
||||
gossiper.do_before_change_notifications(ep_addr, ep_state, state, value);
|
||||
gossiper.do_before_change_notifications(ep_addr, ep_state_before, state, value);
|
||||
// Notifications may have taken some time, so preventively raise the version
|
||||
// of the new value, otherwise it could be ignored by the remote node
|
||||
// if another value with a newer version was received in the meantime:
|
||||
value = storage_service_value_factory().clone_with_higher_version(value);
|
||||
// Add to local application state and fire "on change" notifications:
|
||||
ep_state.add_application_state(state, value);
|
||||
gossiper.do_on_change_notifications(ep_addr, state, value);
|
||||
if (gossiper.endpoint_state_map.count(ep_addr)) {
|
||||
auto& ep_state = gossiper.endpoint_state_map.at(ep_addr);
|
||||
ep_state.add_application_state(state, value);
|
||||
gossiper.do_on_change_notifications(ep_addr, state, value);
|
||||
}
|
||||
}).handle_exception([] (auto ep) {
|
||||
logger.warn("Fail to apply application_state: {}", ep);
|
||||
});
|
||||
|
||||
@@ -187,6 +187,9 @@ private:
|
||||
std::set<inet_address> _live_endpoints;
|
||||
std::list<inet_address> _live_endpoints_just_added;
|
||||
|
||||
/* nodes are being marked as alive */
|
||||
std::unordered_set<inet_address> _pending_mark_alive_endpoints;
|
||||
|
||||
/* unreachable member set */
|
||||
std::map<inet_address, clk::time_point> _unreachable_endpoints;
|
||||
|
||||
|
||||
69
memtable.cc
69
memtable.cc
@@ -65,17 +65,15 @@ future<> memtable::clear_gently() noexcept {
|
||||
auto t = std::make_unique<seastar::thread>(attr, [this] {
|
||||
auto& alloc = allocator();
|
||||
|
||||
// entries can no longer be moved after unlink_leftmost_without_rebalance()
|
||||
// so need to disable compaction.
|
||||
logalloc::reclaim_lock rl(*this);
|
||||
|
||||
auto p = std::move(partitions);
|
||||
while (!p.empty()) {
|
||||
auto batch_size = std::min<size_t>(p.size(), 32);
|
||||
auto dirty_before = dirty_size();
|
||||
with_allocator(alloc, [&] () noexcept {
|
||||
while (batch_size--) {
|
||||
alloc.destroy(p.unlink_leftmost_without_rebalance());
|
||||
p.erase_and_dispose(p.begin(), [&] (auto e) {
|
||||
alloc.destroy(e);
|
||||
});
|
||||
}
|
||||
});
|
||||
remove_flushed_memory(dirty_before - dirty_size());
|
||||
@@ -205,19 +203,23 @@ protected:
|
||||
, _range(&range)
|
||||
{ }
|
||||
|
||||
memtable_entry* fetch_next_entry() {
|
||||
memtable_entry* fetch_entry() {
|
||||
update_iterators();
|
||||
if (_i == _end) {
|
||||
return nullptr;
|
||||
} else {
|
||||
memtable_entry& e = *_i;
|
||||
++_i;
|
||||
_last = e.key();
|
||||
_memtable->upgrade_entry(e);
|
||||
return &e;
|
||||
}
|
||||
}
|
||||
|
||||
void advance() {
|
||||
memtable_entry& e = *_i;
|
||||
_last = e.key();
|
||||
++_i;
|
||||
}
|
||||
|
||||
logalloc::allocating_section& read_section() {
|
||||
return _memtable->_read_section;
|
||||
}
|
||||
@@ -287,14 +289,18 @@ public:
|
||||
return _delegate();
|
||||
}
|
||||
|
||||
logalloc::reclaim_lock _(region());
|
||||
managed_bytes::linearization_context_guard lcg;
|
||||
memtable_entry* e = fetch_next_entry();
|
||||
if (!e) {
|
||||
return make_ready_future<streamed_mutation_opt>(stdx::nullopt);
|
||||
} else {
|
||||
return make_ready_future<streamed_mutation_opt>(e->read(mtbl(), schema(), _slice));
|
||||
}
|
||||
return read_section()(region(), [&] {
|
||||
return with_linearized_managed_bytes([&] {
|
||||
memtable_entry* e = fetch_entry();
|
||||
if (!e) {
|
||||
return make_ready_future<streamed_mutation_opt>(stdx::nullopt);
|
||||
} else {
|
||||
auto ret = make_ready_future<streamed_mutation_opt>(e->read(mtbl(), schema(), _slice));
|
||||
advance();
|
||||
return ret;
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
@@ -391,19 +397,24 @@ public:
|
||||
flush_reader& operator=(const flush_reader&) = delete;
|
||||
|
||||
virtual future<streamed_mutation_opt> operator()() override {
|
||||
logalloc::reclaim_lock _(region());
|
||||
managed_bytes::linearization_context_guard lcg;
|
||||
memtable_entry* e = fetch_next_entry();
|
||||
if (!e) {
|
||||
return make_ready_future<streamed_mutation_opt>(stdx::nullopt);
|
||||
} else {
|
||||
auto cr = query::clustering_key_filter_ranges::get_ranges(*schema(), query::full_slice, e->key().key());
|
||||
auto snp = e->partition().read(schema());
|
||||
auto mpsr = make_partition_snapshot_reader<partition_snapshot_accounter>(schema(), e->key(), std::move(cr), snp, region(), read_section(), mtbl(), _flushed_memory);
|
||||
_flushed_memory.account_component(*e);
|
||||
_flushed_memory.account_component(*snp);
|
||||
return make_ready_future<streamed_mutation_opt>(std::move(mpsr));
|
||||
}
|
||||
return read_section()(region(), [&] {
|
||||
return with_linearized_managed_bytes([&] {
|
||||
memtable_entry* e = fetch_entry();
|
||||
if (!e) {
|
||||
return make_ready_future<streamed_mutation_opt>(stdx::nullopt);
|
||||
} else {
|
||||
auto cr = query::clustering_key_filter_ranges::get_ranges(*schema(), query::full_slice, e->key().key());
|
||||
auto snp = e->partition().read(schema());
|
||||
auto mpsr = make_partition_snapshot_reader<partition_snapshot_accounter>(schema(), e->key(), std::move(cr),
|
||||
snp, region(), read_section(), mtbl(), _flushed_memory);
|
||||
_flushed_memory.account_component(*e);
|
||||
_flushed_memory.account_component(*snp);
|
||||
auto ret = make_ready_future<streamed_mutation_opt>(std::move(mpsr));
|
||||
advance();
|
||||
return ret;
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -274,7 +274,13 @@ void messaging_service::start_listen() {
|
||||
if (listen_to_bc) {
|
||||
_server_tls[1] = listen(utils::fb_utilities::get_broadcast_address());
|
||||
}
|
||||
|
||||
}
|
||||
// Do this on just cpu 0, to avoid duplicate logs.
|
||||
if (engine().cpu_id() == 0) {
|
||||
if (_server_tls[0]) {
|
||||
logger.info("Starting Encrypted Messaging Service on SSL port {}", _ssl_port);
|
||||
}
|
||||
logger.info("Starting Messaging Service on port {}", _port);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -308,14 +314,6 @@ messaging_service::messaging_service(gms::inet_address ip
|
||||
if (listen_now) {
|
||||
start_listen();
|
||||
}
|
||||
|
||||
// Do this on just cpu 0, to avoid duplicate logs.
|
||||
if (engine().cpu_id() == 0) {
|
||||
if (_server_tls[0]) {
|
||||
logger.info("Starting Encrypted Messaging Service on SSL port {}", _ssl_port);
|
||||
}
|
||||
logger.info("Starting Messaging Service on port {}", _port);
|
||||
}
|
||||
}
|
||||
|
||||
msg_addr messaging_service::get_source(const rpc::client_info& cinfo) {
|
||||
|
||||
@@ -123,7 +123,7 @@ public:
|
||||
uint32_t partition_limit, CompactedMutationsConsumer consumer)
|
||||
: _schema(s)
|
||||
, _query_time(query_time)
|
||||
, _gc_before(query_time - s.gc_grace_seconds())
|
||||
, _gc_before(saturating_subtract(query_time, s.gc_grace_seconds()))
|
||||
, _can_gc(always_gc)
|
||||
, _slice(slice)
|
||||
, _row_limit(limit)
|
||||
@@ -139,7 +139,7 @@ public:
|
||||
std::function<api::timestamp_type(const dht::decorated_key&)> get_max_purgeable)
|
||||
: _schema(s)
|
||||
, _query_time(compaction_time)
|
||||
, _gc_before(_query_time - s.gc_grace_seconds())
|
||||
, _gc_before(saturating_subtract(_query_time, s.gc_grace_seconds()))
|
||||
, _get_max_purgeable(std::move(get_max_purgeable))
|
||||
, _can_gc([this] (tombstone t) { return can_gc(t); })
|
||||
, _slice(query::full_slice)
|
||||
|
||||
@@ -1183,7 +1183,7 @@ uint32_t mutation_partition::do_compact(const schema& s,
|
||||
{
|
||||
assert(row_limit > 0);
|
||||
|
||||
auto gc_before = query_time - s.gc_grace_seconds();
|
||||
auto gc_before = saturating_subtract(query_time, s.gc_grace_seconds());
|
||||
|
||||
auto should_purge_tombstone = [&] (const tombstone& t) {
|
||||
return t.deletion_time < gc_before && can_gc(t);
|
||||
@@ -1526,12 +1526,19 @@ bool row::compact_and_expire(const schema& s, column_kind kind, tombstone tomb,
|
||||
const column_definition& def = s.column_at(kind, id);
|
||||
if (def.is_atomic()) {
|
||||
atomic_cell_view cell = c.as_atomic_cell();
|
||||
auto can_erase_cell = [&] {
|
||||
return cell.deletion_time() < gc_before && can_gc(tombstone(cell.timestamp(), cell.deletion_time()));
|
||||
};
|
||||
|
||||
if (cell.is_covered_by(tomb, def.is_counter())) {
|
||||
erase = true;
|
||||
} else if (cell.has_expired(query_time)) {
|
||||
c = atomic_cell::make_dead(cell.timestamp(), cell.deletion_time());
|
||||
erase = can_erase_cell();
|
||||
if (!erase) {
|
||||
c = atomic_cell::make_dead(cell.timestamp(), cell.deletion_time());
|
||||
}
|
||||
} else if (!cell.is_live()) {
|
||||
erase = cell.deletion_time() < gc_before && can_gc(tombstone(cell.timestamp(), cell.deletion_time()));
|
||||
erase = can_erase_cell();
|
||||
} else {
|
||||
any_live |= true;
|
||||
}
|
||||
|
||||
@@ -345,7 +345,7 @@ public:
|
||||
: _w(std::move(w))
|
||||
, _row_count(c)
|
||||
, _short_read(sr)
|
||||
, _memory_tracker(std::move(_memory_tracker))
|
||||
, _memory_tracker(std::move(memory_tracker))
|
||||
, _partition_count(pc)
|
||||
{
|
||||
w.reduce_chunk_count();
|
||||
|
||||
19
range.hh
19
range.hh
@@ -601,13 +601,13 @@ private:
|
||||
struct built_in_ : std_ {};
|
||||
|
||||
template<typename Range, typename LessComparator,
|
||||
typename = decltype(&std::remove_reference<Range>::type::lower_bound)>
|
||||
typename = decltype(std::declval<Range>().lower_bound(std::declval<T>(), std::declval<LessComparator>()))>
|
||||
typename std::remove_reference<Range>::type::const_iterator do_lower_bound(const T& value, Range&& r, LessComparator&& cmp, built_in_) const {
|
||||
return r.lower_bound(value, std::forward<LessComparator>(cmp));
|
||||
}
|
||||
|
||||
template<typename Range, typename LessComparator,
|
||||
typename = decltype(&std::remove_reference<Range>::type::upper_bound)>
|
||||
typename = decltype(std::declval<Range>().upper_bound(std::declval<T>(), std::declval<LessComparator>()))>
|
||||
typename std::remove_reference<Range>::type::const_iterator do_upper_bound(const T& value, Range&& r, LessComparator&& cmp, built_in_) const {
|
||||
return r.upper_bound(value, std::forward<LessComparator>(cmp));
|
||||
}
|
||||
@@ -649,6 +649,21 @@ public:
|
||||
return boost::make_iterator_range(lower_bound(range, cmp), upper_bound(range, cmp));
|
||||
}
|
||||
|
||||
// Returns the intersection between this range and other.
|
||||
template<typename Comparator>
|
||||
stdx::optional<nonwrapping_range> intersection(const nonwrapping_range& other, Comparator&& cmp) const {
|
||||
auto p = std::minmax(_range, other._range, [&cmp] (auto&& a, auto&& b) {
|
||||
return wrapping_range<T>::less_than(a.start_bound(), b.start_bound(), cmp);
|
||||
});
|
||||
if (wrapping_range<T>::greater_than_or_equal(p.first.end_bound(), p.second.start_bound(), cmp)) {
|
||||
auto& end = std::min(p.first.end_bound(), p.second.end_bound(), [&cmp] (auto&& a, auto&& b) {
|
||||
return !wrapping_range<T>::greater_than_or_equal(a, b, cmp);
|
||||
});
|
||||
return nonwrapping_range(p.second.start(), end.b);
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
template<typename U>
|
||||
friend std::ostream& operator<<(std::ostream& out, const nonwrapping_range<U>& r);
|
||||
};
|
||||
|
||||
@@ -492,6 +492,13 @@ static void split_and_add(std::vector<::dht::token_range>& ranges,
|
||||
auto midpoint = dht::global_partitioner().midpoint(
|
||||
range.start() ? range.start()->value() : dht::minimum_token(),
|
||||
range.end() ? range.end()->value() : dht::minimum_token());
|
||||
// This shouldn't happen, but if the range included just one token, we
|
||||
// can't split further (split() may actually fail with assertion failure)
|
||||
if ((range.start() && midpoint == range.start()->value()) ||
|
||||
(range.end() && midpoint == range.end()->value())) {
|
||||
ranges.push_back(range);
|
||||
return;
|
||||
}
|
||||
auto halves = range.split(midpoint, dht::token_comparator());
|
||||
ranges.push_back(halves.first);
|
||||
ranges.push_back(halves.second);
|
||||
@@ -512,6 +519,24 @@ static void split_and_add(std::vector<::dht::token_range>& ranges,
|
||||
constexpr int parallelism = 100;
|
||||
static thread_local semaphore parallelism_semaphore(parallelism);
|
||||
|
||||
static future<uint64_t> estimate_partitions(seastar::sharded<database>& db, const sstring& keyspace,
|
||||
const sstring& cf, const dht::token_range& range) {
|
||||
return db.map_reduce0(
|
||||
[keyspace, cf, range] (auto& db) {
|
||||
// FIXME: column_family should have a method to estimate the number of
|
||||
// partitions (and of course it should use cardinality estimation bitmaps,
|
||||
// not trivial sum). We shouldn't have this ugly code here...
|
||||
// FIXME: If sstables are shared, they will be accounted more than
|
||||
// once. However, shared sstables should exist for a short-time only.
|
||||
auto sstables = db.find_column_family(keyspace, cf).get_sstables();
|
||||
return boost::accumulate(*sstables, uint64_t(0),
|
||||
[&range] (uint64_t x, auto&& sst) { return x + sst->estimated_keys_for_range(range); });
|
||||
},
|
||||
uint64_t(0),
|
||||
std::plus<uint64_t>()
|
||||
);
|
||||
}
|
||||
|
||||
// Repair a single cf in a single local range.
|
||||
// Comparable to RepairJob in Origin.
|
||||
static future<> repair_cf_range(repair_info& ri,
|
||||
@@ -522,19 +547,11 @@ static future<> repair_cf_range(repair_info& ri,
|
||||
return make_ready_future<>();
|
||||
}
|
||||
|
||||
std::vector<::dht::token_range> ranges;
|
||||
ranges.push_back(range);
|
||||
|
||||
return estimate_partitions(ri.db, ri.keyspace, cf, range).then([&ri, cf, range, &neighbors] (uint64_t estimated_partitions) {
|
||||
// Additionally, we want to break up large ranges so they will have
|
||||
// (approximately) a desired number of rows each.
|
||||
// FIXME: column_family should have a method to estimate the number of
|
||||
// partitions (and of course it should use cardinality estimation bitmaps,
|
||||
// not trivial sum). We shouldn't have this ugly code here...
|
||||
auto sstables = ri.db.local().find_column_family(ri.keyspace, cf).get_sstables();
|
||||
uint64_t estimated_partitions = 0;
|
||||
for (auto sst : *sstables) {
|
||||
estimated_partitions += sst->estimated_keys_for_range(range);
|
||||
}
|
||||
std::vector<::dht::token_range> ranges;
|
||||
ranges.push_back(range);
|
||||
|
||||
// FIXME: we should have an on-the-fly iterator generator here, not
|
||||
// fill a vector in advance.
|
||||
@@ -727,6 +744,7 @@ static future<> repair_cf_range(repair_info& ri,
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
// Repair a single local range, multiple column families.
|
||||
|
||||
@@ -1023,12 +1023,13 @@ future<streamed_mutation_opt> cache_entry::read_wide(row_cache& rc, schema_ptr s
|
||||
: _range(std::move(pr))
|
||||
, _reader(rc._underlying(s, _range, slice, pc))
|
||||
{ }
|
||||
range_and_underlyig_reader(range_and_underlyig_reader&&) = delete;
|
||||
};
|
||||
rc._tracker.on_uncached_wide_partition();
|
||||
auto pr = dht::partition_range::make_singular(_key);
|
||||
return do_with(range_and_underlyig_reader(rc, s, std::move(pr), slice, pc), [] (auto& r_a_ur) {
|
||||
return r_a_ur._reader();
|
||||
});
|
||||
auto rd_ptr = std::make_unique<range_and_underlyig_reader>(rc, s, std::move(pr), slice, pc);
|
||||
auto& r_a_ur = *rd_ptr;
|
||||
return r_a_ur._reader().finally([rd_ptr = std::move(rd_ptr)] {});
|
||||
}
|
||||
|
||||
streamed_mutation cache_entry::read(row_cache& rc, const schema_ptr& s) {
|
||||
|
||||
14
schema.cc
14
schema.cc
@@ -145,6 +145,20 @@ void schema::rebuild() {
|
||||
|
||||
thrift()._compound = is_compound();
|
||||
thrift()._is_dynamic = clustering_key_size() > 0;
|
||||
|
||||
if (default_validator()->is_counter()) {
|
||||
for (auto&& cdef : boost::range::join(static_columns(), regular_columns())) {
|
||||
if (!cdef.type->is_counter()) {
|
||||
throw exceptions::configuration_exception(sprint("Cannot add a non counter column (%s) in a counter column family", cdef.name_as_text()));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (auto&& cdef : all_columns()) {
|
||||
if (cdef.second->type->is_counter()) {
|
||||
throw exceptions::configuration_exception(sprint("Cannot add a counter column (%s) in a non counter column family", cdef.second->name_as_text()));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const column_mapping& schema::get_column_mapping() const {
|
||||
|
||||
@@ -31,6 +31,8 @@ import os
|
||||
import sys
|
||||
import subprocess
|
||||
import uuid
|
||||
import re
|
||||
import glob
|
||||
from pkg_resources import parse_version
|
||||
|
||||
VERSION = "1.0"
|
||||
@@ -69,6 +71,20 @@ def create_uuid_file(fl):
|
||||
with open(args.uuid_file, 'w') as myfile:
|
||||
myfile.write(str(uuid.uuid1()) + "\n")
|
||||
|
||||
def get_repo_file(dir):
|
||||
files = glob.glob(dir)
|
||||
files.sort(key=os.path.getmtime, reverse=True)
|
||||
for name in files:
|
||||
with open(name, 'r') as myfile:
|
||||
for line in myfile:
|
||||
match = re.search(".*http.?://.*/scylladb/([^/\s]+)/deb/([^/\s]+)\s.*", line)
|
||||
if match:
|
||||
return match.group(2), match.group(1)
|
||||
match = re.search(".*http.?://.*/scylladb/([^/]+)/rpm/[^/]+/([^/\s]+)/.*", line)
|
||||
if match:
|
||||
return match.group(2), match.group(1)
|
||||
return None, None
|
||||
|
||||
def check_version(ar):
|
||||
if config and (not config.has_option("housekeeping", "check-version") or not config.getboolean("housekeeping", "check-version")):
|
||||
return
|
||||
@@ -87,6 +103,10 @@ def check_version(ar):
|
||||
params = params + "&sts=" + ar.mode
|
||||
if uid:
|
||||
params = params + "&uu=" + uid
|
||||
if repo_id:
|
||||
params = params + "&rid=" + repo_id
|
||||
if repo_type:
|
||||
params = params + "&rtype=" + repo_type
|
||||
latest_version = get_json_from_url(version_url + params)["version"]
|
||||
except:
|
||||
traceln("Unable to retrieve version information")
|
||||
@@ -99,6 +119,7 @@ parser.add_argument('-q', '--quiet', action='store_true', default=False, help='Q
|
||||
parser.add_argument('-c', '--config', default="", help='An optional config file. Specifying a missing file will terminate the script')
|
||||
parser.add_argument('--uuid', default="", help='A uuid for the requests')
|
||||
parser.add_argument('--uuid-file', default="", help='A uuid file for the requests')
|
||||
parser.add_argument('--repo-files', default="", help='The repository files that is been used for private repositories')
|
||||
|
||||
subparsers = parser.add_subparsers(help='Available commands')
|
||||
parser_help = subparsers.add_parser('help', help='Display help information')
|
||||
@@ -111,6 +132,9 @@ parser_system.set_defaults(func=check_version)
|
||||
args = parser.parse_args()
|
||||
quiet = args.quiet
|
||||
config = None
|
||||
repo_id = None
|
||||
repo_type = None
|
||||
|
||||
if args.config != "":
|
||||
if not os.path.isfile(args.config):
|
||||
traceln("Config file ", args.config, " is missing, terminating")
|
||||
@@ -125,4 +149,6 @@ if args.uuid_file != "":
|
||||
create_uuid_file(args.uuid_file)
|
||||
with open(args.uuid_file, 'r') as myfile:
|
||||
uid = myfile.read().replace('\n', '')
|
||||
if args.repo_files != "":
|
||||
repo_type, repo_id = get_repo_file(args.repo_files)
|
||||
args.func(args)
|
||||
|
||||
2
seastar
2
seastar
Submodule seastar updated: f07f8ed68d...328fdbc5a0
@@ -481,8 +481,10 @@ future<> migration_manager::announce_new_column_family(schema_ptr cfm, bool anno
|
||||
throw exceptions::already_exists_exception(cfm->ks_name(), cfm->cf_name());
|
||||
}
|
||||
logger.info("Create new ColumnFamily: {}", cfm);
|
||||
auto mutations = db::schema_tables::make_create_table_mutations(keyspace.metadata(), cfm, api::new_timestamp());
|
||||
return announce(std::move(mutations), announce_locally);
|
||||
return db::schema_tables::make_create_table_mutations(keyspace.metadata(), cfm, api::new_timestamp())
|
||||
.then([announce_locally, this] (auto&& mutations) {
|
||||
return announce(std::move(mutations), announce_locally);
|
||||
});
|
||||
} catch (const no_such_keyspace& e) {
|
||||
throw exceptions::configuration_exception(sprint("Cannot add table '%s' to non existing keyspace '%s'.", cfm->cf_name(), cfm->ks_name()));
|
||||
}
|
||||
@@ -501,8 +503,10 @@ future<> migration_manager::announce_column_family_update(schema_ptr cfm, bool f
|
||||
#endif
|
||||
logger.info("Update table '{}.{}' From {} To {}", cfm->ks_name(), cfm->cf_name(), *old_schema, *cfm);
|
||||
auto&& keyspace = db.find_keyspace(cfm->ks_name());
|
||||
auto mutations = db::schema_tables::make_update_table_mutations(keyspace.metadata(), old_schema, cfm, api::new_timestamp(), from_thrift);
|
||||
return announce(std::move(mutations), announce_locally);
|
||||
return db::schema_tables::make_update_table_mutations(keyspace.metadata(), old_schema, cfm, api::new_timestamp(), from_thrift)
|
||||
.then([announce_locally] (auto&& mutations) {
|
||||
return announce(std::move(mutations), announce_locally);
|
||||
});
|
||||
} catch (const no_such_column_family& e) {
|
||||
throw exceptions::configuration_exception(sprint("Cannot update non existing table '%s' in keyspace '%s'.",
|
||||
cfm->cf_name(), cfm->ks_name()));
|
||||
@@ -512,8 +516,10 @@ future<> migration_manager::announce_column_family_update(schema_ptr cfm, bool f
|
||||
static future<> do_announce_new_type(user_type new_type, bool announce_locally) {
|
||||
auto& db = get_local_storage_proxy().get_db().local();
|
||||
auto&& keyspace = db.find_keyspace(new_type->_keyspace);
|
||||
auto mutations = db::schema_tables::make_create_type_mutations(keyspace.metadata(), new_type, api::new_timestamp());
|
||||
return migration_manager::announce(std::move(mutations), announce_locally);
|
||||
return db::schema_tables::make_create_type_mutations(keyspace.metadata(), new_type, api::new_timestamp())
|
||||
.then([announce_locally] (auto&& mutations) {
|
||||
return migration_manager::announce(std::move(mutations), announce_locally);
|
||||
});
|
||||
}
|
||||
|
||||
future<> migration_manager::announce_new_type(user_type new_type, bool announce_locally) {
|
||||
@@ -609,8 +615,10 @@ future<> migration_manager::announce_column_family_drop(const sstring& ks_name,
|
||||
ks_name, ::join(", ", views | boost::adaptors::transformed([](auto&& v) { return v->cf_name(); }))));
|
||||
}
|
||||
logger.info("Drop table '{}.{}'", schema->ks_name(), schema->cf_name());
|
||||
auto mutations = db::schema_tables::make_drop_table_mutations(db.find_keyspace(ks_name).metadata(), schema, api::new_timestamp());
|
||||
return announce(std::move(mutations), announce_locally);
|
||||
return db::schema_tables::make_drop_table_mutations(db.find_keyspace(ks_name).metadata(), schema, api::new_timestamp())
|
||||
.then([announce_locally] (auto&& mutations) {
|
||||
return announce(std::move(mutations), announce_locally);
|
||||
});
|
||||
} catch (const no_such_column_family& e) {
|
||||
throw exceptions::configuration_exception(sprint("Cannot drop non existing table '%s' in keyspace '%s'.", cf_name, ks_name));
|
||||
}
|
||||
@@ -621,8 +629,10 @@ future<> migration_manager::announce_type_drop(user_type dropped_type, bool anno
|
||||
auto& db = get_local_storage_proxy().get_db().local();
|
||||
auto&& keyspace = db.find_keyspace(dropped_type->_keyspace);
|
||||
logger.info("Drop User Type: {}", dropped_type->get_name_as_string());
|
||||
auto mutations = db::schema_tables::make_drop_type_mutations(keyspace.metadata(), dropped_type, api::new_timestamp());
|
||||
return announce(std::move(mutations), announce_locally);
|
||||
return db::schema_tables::make_drop_type_mutations(keyspace.metadata(), dropped_type, api::new_timestamp())
|
||||
.then([announce_locally] (auto&& mutations) {
|
||||
return announce(std::move(mutations), announce_locally);
|
||||
});
|
||||
}
|
||||
|
||||
future<> migration_manager::announce_new_view(view_ptr view, bool announce_locally)
|
||||
@@ -637,8 +647,10 @@ future<> migration_manager::announce_new_view(view_ptr view, bool announce_local
|
||||
throw exceptions::already_exists_exception(view->ks_name(), view->cf_name());
|
||||
}
|
||||
logger.info("Create new view: {}", view);
|
||||
auto mutations = db::schema_tables::make_create_view_mutations(keyspace, std::move(view), api::new_timestamp());
|
||||
return announce(std::move(mutations), announce_locally);
|
||||
return db::schema_tables::make_create_view_mutations(keyspace, std::move(view), api::new_timestamp())
|
||||
.then([announce_locally] (auto&& mutations) {
|
||||
return announce(std::move(mutations), announce_locally);
|
||||
});
|
||||
} catch (const no_such_keyspace& e) {
|
||||
throw exceptions::configuration_exception(sprint("Cannot add view '%s' to non existing keyspace '%s'.", view->cf_name(), view->ks_name()));
|
||||
}
|
||||
@@ -660,8 +672,10 @@ future<> migration_manager::announce_view_update(view_ptr view, bool announce_lo
|
||||
oldCfm.validateCompatility(cfm);
|
||||
#endif
|
||||
logger.info("Update view '{}.{}' From {} To {}", view->ks_name(), view->cf_name(), *old_view, *view);
|
||||
auto mutations = db::schema_tables::make_update_view_mutations(std::move(keyspace), view_ptr(old_view), std::move(view), api::new_timestamp());
|
||||
return announce(std::move(mutations), announce_locally);
|
||||
return db::schema_tables::make_update_view_mutations(std::move(keyspace), view_ptr(old_view), std::move(view), api::new_timestamp())
|
||||
.then([announce_locally] (auto&& mutations) {
|
||||
return announce(std::move(mutations), announce_locally);
|
||||
});
|
||||
} catch (const std::out_of_range& e) {
|
||||
throw exceptions::configuration_exception(sprint("Cannot update non existing materialized view '%s' in keyspace '%s'.",
|
||||
view->cf_name(), view->ks_name()));
|
||||
@@ -680,8 +694,10 @@ future<> migration_manager::announce_view_drop(const sstring& ks_name,
|
||||
}
|
||||
auto keyspace = db.find_keyspace(ks_name).metadata();
|
||||
logger.info("Drop view '{}.{}'", view->ks_name(), view->cf_name());
|
||||
auto mutations = db::schema_tables::make_drop_view_mutations(std::move(keyspace), view_ptr(std::move(view)), api::new_timestamp());
|
||||
return announce(std::move(mutations), announce_locally);
|
||||
return db::schema_tables::make_drop_view_mutations(std::move(keyspace), view_ptr(std::move(view)), api::new_timestamp())
|
||||
.then([announce_locally] (auto&& mutations) {
|
||||
return announce(std::move(mutations), announce_locally);
|
||||
});
|
||||
} catch (const no_such_column_family& e) {
|
||||
throw exceptions::configuration_exception(sprint("Cannot drop non existing materialized view '%s' in keyspace '%s'.",
|
||||
cf_name, ks_name));
|
||||
|
||||
@@ -478,7 +478,6 @@ inline uint64_t& storage_proxy::split_stats::get_ep_stat(gms::inet_address ep) {
|
||||
storage_proxy::~storage_proxy() {}
|
||||
storage_proxy::storage_proxy(distributed<database>& db) : _db(db) {
|
||||
namespace sm = seastar::metrics;
|
||||
|
||||
_metrics.add_group(COORDINATOR_STATS_CATEGORY, {
|
||||
sm::make_queue_length("foreground_writes", [this] { return _stats.writes - _stats.background_writes; },
|
||||
sm::description("number of currently pending foreground write requests")),
|
||||
@@ -486,7 +485,7 @@ storage_proxy::storage_proxy(distributed<database>& db) : _db(db) {
|
||||
sm::make_queue_length("background_writes", [this] { return _stats.background_writes; },
|
||||
sm::description("number of currently pending background write requests")),
|
||||
|
||||
sm::make_queue_length("throttled_writes", [this] { return _throttled_writes.size(); },
|
||||
sm::make_queue_length("current_throttled_writes", [this] { return _throttled_writes.size(); },
|
||||
sm::description("number of currently throttled write requests")),
|
||||
|
||||
sm::make_total_operations("throttled_writes", [this] { return _stats.throttled_writes; },
|
||||
@@ -1733,14 +1732,14 @@ protected:
|
||||
size_t _targets_count;
|
||||
promise<> _done_promise; // all target responded
|
||||
bool _timedout = false; // will be true if request timeouts
|
||||
timer<lowres_clock> _timeout;
|
||||
timer<storage_proxy::clock_type> _timeout;
|
||||
size_t _responses = 0;
|
||||
schema_ptr _schema;
|
||||
|
||||
virtual void on_timeout() {}
|
||||
virtual size_t response_count() const = 0;
|
||||
public:
|
||||
abstract_read_resolver(schema_ptr schema, db::consistency_level cl, size_t target_count, lowres_clock::time_point timeout)
|
||||
abstract_read_resolver(schema_ptr schema, db::consistency_level cl, size_t target_count, storage_proxy::clock_type::time_point timeout)
|
||||
: _cl(cl)
|
||||
, _targets_count(target_count)
|
||||
, _schema(std::move(schema))
|
||||
@@ -1796,7 +1795,7 @@ class digest_read_resolver : public abstract_read_resolver {
|
||||
return _digest_results.size();
|
||||
}
|
||||
public:
|
||||
digest_read_resolver(schema_ptr schema, db::consistency_level cl, size_t block_for, lowres_clock::time_point timeout) : abstract_read_resolver(std::move(schema), cl, 0, timeout), _block_for(block_for) {}
|
||||
digest_read_resolver(schema_ptr schema, db::consistency_level cl, size_t block_for, storage_proxy::clock_type::time_point timeout) : abstract_read_resolver(std::move(schema), cl, 0, timeout), _block_for(block_for) {}
|
||||
void add_data(gms::inet_address from, foreign_ptr<lw_shared_ptr<query::result>> result) {
|
||||
if (!_timedout) {
|
||||
// if only one target was queried digest_check() will be skipped so we can also skip digest calculation
|
||||
@@ -2143,7 +2142,7 @@ private:
|
||||
return false;
|
||||
}
|
||||
public:
|
||||
data_read_resolver(schema_ptr schema, db::consistency_level cl, size_t targets_count, lowres_clock::time_point timeout) : abstract_read_resolver(std::move(schema), cl, targets_count, timeout) {
|
||||
data_read_resolver(schema_ptr schema, db::consistency_level cl, size_t targets_count, storage_proxy::clock_type::time_point timeout) : abstract_read_resolver(std::move(schema), cl, targets_count, timeout) {
|
||||
_data_results.reserve(targets_count);
|
||||
}
|
||||
void add_mutate_data(gms::inet_address from, foreign_ptr<lw_shared_ptr<reconcilable_result>> result) {
|
||||
@@ -2330,7 +2329,7 @@ protected:
|
||||
using targets_iterator = std::vector<gms::inet_address>::iterator;
|
||||
using digest_resolver_ptr = ::shared_ptr<digest_read_resolver>;
|
||||
using data_resolver_ptr = ::shared_ptr<data_read_resolver>;
|
||||
using clock_type = lowres_clock;
|
||||
using clock_type = storage_proxy::clock_type;
|
||||
|
||||
schema_ptr _schema;
|
||||
shared_ptr<storage_proxy> _proxy;
|
||||
@@ -2454,7 +2453,7 @@ protected:
|
||||
uint32_t original_partition_limit() const {
|
||||
return _cmd->partition_limit;
|
||||
}
|
||||
void reconcile(db::consistency_level cl, lowres_clock::time_point timeout, lw_shared_ptr<query::read_command> cmd) {
|
||||
void reconcile(db::consistency_level cl, storage_proxy::clock_type::time_point timeout, lw_shared_ptr<query::read_command> cmd) {
|
||||
data_resolver_ptr data_resolver = ::make_shared<data_read_resolver>(_schema, cl, _targets.size(), timeout);
|
||||
auto exec = shared_from_this();
|
||||
|
||||
@@ -2529,12 +2528,12 @@ protected:
|
||||
}
|
||||
});
|
||||
}
|
||||
void reconcile(db::consistency_level cl, lowres_clock::time_point timeout) {
|
||||
void reconcile(db::consistency_level cl, storage_proxy::clock_type::time_point timeout) {
|
||||
reconcile(cl, timeout, _cmd);
|
||||
}
|
||||
|
||||
public:
|
||||
virtual future<foreign_ptr<lw_shared_ptr<query::result>>> execute(lowres_clock::time_point timeout) {
|
||||
virtual future<foreign_ptr<lw_shared_ptr<query::result>>> execute(storage_proxy::clock_type::time_point timeout) {
|
||||
digest_resolver_ptr digest_resolver = ::make_shared<digest_read_resolver>(_schema, _cl, _block_for, timeout);
|
||||
auto exec = shared_from_this();
|
||||
|
||||
@@ -2604,7 +2603,7 @@ public:
|
||||
class always_speculating_read_executor : public abstract_read_executor {
|
||||
public:
|
||||
using abstract_read_executor::abstract_read_executor;
|
||||
virtual future<> make_requests(digest_resolver_ptr resolver, lowres_clock::time_point timeout) {
|
||||
virtual future<> make_requests(digest_resolver_ptr resolver, storage_proxy::clock_type::time_point timeout) {
|
||||
resolver->add_wait_targets(_targets.size());
|
||||
// FIXME: consider disabling for CL=*ONE
|
||||
bool want_digest = true;
|
||||
@@ -2615,10 +2614,10 @@ public:
|
||||
|
||||
// this executor sends request to an additional replica after some time below timeout
|
||||
class speculating_read_executor : public abstract_read_executor {
|
||||
timer<> _speculate_timer;
|
||||
timer<storage_proxy::clock_type> _speculate_timer;
|
||||
public:
|
||||
using abstract_read_executor::abstract_read_executor;
|
||||
virtual future<> make_requests(digest_resolver_ptr resolver, lowres_clock::time_point timeout) {
|
||||
virtual future<> make_requests(digest_resolver_ptr resolver, storage_proxy::clock_type::time_point timeout) {
|
||||
_speculate_timer.set_callback([this, resolver, timeout] {
|
||||
if (!resolver->is_completed()) { // at the time the callback runs request may be completed already
|
||||
resolver->add_wait_targets(1); // we send one more request so wait for it too
|
||||
@@ -2664,7 +2663,7 @@ class range_slice_read_executor : public abstract_read_executor {
|
||||
public:
|
||||
range_slice_read_executor(schema_ptr s, shared_ptr<storage_proxy> proxy, lw_shared_ptr<query::read_command> cmd, dht::partition_range pr, db::consistency_level cl, std::vector<gms::inet_address> targets, tracing::trace_state_ptr trace_state) :
|
||||
abstract_read_executor(std::move(s), std::move(proxy), std::move(cmd), std::move(pr), cl, targets.size(), std::move(targets), std::move(trace_state)) {}
|
||||
virtual future<foreign_ptr<lw_shared_ptr<query::result>>> execute(lowres_clock::time_point timeout) override {
|
||||
virtual future<foreign_ptr<lw_shared_ptr<query::result>>> execute(storage_proxy::clock_type::time_point timeout) override {
|
||||
reconcile(_cl, timeout);
|
||||
return _result_promise.get_future();
|
||||
}
|
||||
@@ -2795,7 +2794,7 @@ future<foreign_ptr<lw_shared_ptr<query::result>>>
|
||||
storage_proxy::query_singular(lw_shared_ptr<query::read_command> cmd, dht::partition_range_vector&& partition_ranges, db::consistency_level cl, tracing::trace_state_ptr trace_state) {
|
||||
std::vector<::shared_ptr<abstract_read_executor>> exec;
|
||||
exec.reserve(partition_ranges.size());
|
||||
auto timeout = lowres_clock::now() + std::chrono::milliseconds(_db.local().get_config().read_request_timeout_in_ms());
|
||||
auto timeout = storage_proxy::clock_type::now() + std::chrono::milliseconds(_db.local().get_config().read_request_timeout_in_ms());
|
||||
|
||||
for (auto&& pr: partition_ranges) {
|
||||
if (!pr.is_singular()) {
|
||||
@@ -2819,7 +2818,7 @@ storage_proxy::query_singular(lw_shared_ptr<query::read_command> cmd, dht::parti
|
||||
}
|
||||
|
||||
future<std::vector<foreign_ptr<lw_shared_ptr<query::result>>>>
|
||||
storage_proxy::query_partition_key_range_concurrent(lowres_clock::time_point timeout, std::vector<foreign_ptr<lw_shared_ptr<query::result>>>&& results,
|
||||
storage_proxy::query_partition_key_range_concurrent(storage_proxy::clock_type::time_point timeout, std::vector<foreign_ptr<lw_shared_ptr<query::result>>>&& results,
|
||||
lw_shared_ptr<query::read_command> cmd, db::consistency_level cl, dht::partition_range_vector::iterator&& i,
|
||||
dht::partition_range_vector&& ranges, int concurrency_factor, tracing::trace_state_ptr trace_state,
|
||||
uint32_t remaining_row_count, uint32_t remaining_partition_count) {
|
||||
@@ -2923,7 +2922,7 @@ storage_proxy::query_partition_key_range(lw_shared_ptr<query::read_command> cmd,
|
||||
schema_ptr schema = local_schema_registry().get(cmd->schema_version);
|
||||
keyspace& ks = _db.local().find_keyspace(schema->ks_name());
|
||||
dht::partition_range_vector ranges;
|
||||
auto timeout = lowres_clock::now() + std::chrono::milliseconds(_db.local().get_config().read_request_timeout_in_ms());
|
||||
auto timeout = storage_proxy::clock_type::now() + std::chrono::milliseconds(_db.local().get_config().read_request_timeout_in_ms());
|
||||
|
||||
// when dealing with LocalStrategy keyspaces, we can skip the range splitting and merging (which can be
|
||||
// expensive in clusters with vnodes)
|
||||
@@ -3957,24 +3956,22 @@ storage_proxy::query_nonsingular_mutations_locally(schema_ptr s, lw_shared_ptr<q
|
||||
auto shard_cmd = make_lw_shared<query::read_command>(*cmd);
|
||||
return do_with(cmd,
|
||||
shard_cmd,
|
||||
1u,
|
||||
0u,
|
||||
false,
|
||||
static_cast<unsigned>(prs.size()),
|
||||
std::unordered_map<element_and_shard, partition_range_and_sort_key>{},
|
||||
mutation_result_merger{s, cmd},
|
||||
dht::ring_position_range_vector_sharder{prs},
|
||||
dht::ring_position_exponential_vector_sharder{prs},
|
||||
global_schema_ptr(s),
|
||||
tracing::global_trace_state_ptr(std::move(trace_state)),
|
||||
[this, s, max_size] (lw_shared_ptr<query::read_command>& cmd,
|
||||
lw_shared_ptr<query::read_command>& shard_cmd,
|
||||
unsigned& shards_in_parallel,
|
||||
unsigned& mutation_result_merger_key,
|
||||
bool& no_more_ranges,
|
||||
unsigned& partition_range_count,
|
||||
std::unordered_map<element_and_shard, partition_range_and_sort_key>& shards_for_this_iteration,
|
||||
mutation_result_merger& mrm,
|
||||
dht::ring_position_range_vector_sharder& rprs,
|
||||
dht::ring_position_exponential_vector_sharder& rpevs,
|
||||
global_schema_ptr& gs,
|
||||
tracing::global_trace_state_ptr& gt) {
|
||||
return _db.local().get_result_memory_limiter().new_mutation_read(max_size).then([&, s] (query::result_memory_accounter ma) {
|
||||
@@ -3985,36 +3982,32 @@ storage_proxy::query_nonsingular_mutations_locally(schema_ptr s, lw_shared_ptr<q
|
||||
// because we'll throw away most of the results. So we'll exponentially increase
|
||||
// concurrency starting at 1, so we won't waste on dense tables and at most
|
||||
// `log(nr_shards) + ignore_msb_bits` latency multiplier for near-empty tables.
|
||||
//
|
||||
// We use the ring_position_exponential_vector_sharder to give us subranges that follow
|
||||
// this scheme.
|
||||
shards_for_this_iteration.clear();
|
||||
// If we're reading from less than smp::count shards, then we can just append
|
||||
// each shard in order without sorting. If we're reading from more, then
|
||||
// we'll read from some shards at least twice, so the partitions within will be
|
||||
// out-of-order wrt. other shards
|
||||
auto this_iteration_subranges = rpevs.next(*s);
|
||||
auto retain_shard_order = true;
|
||||
for (auto i = 0u; i < shards_in_parallel; ++i) {
|
||||
auto now = rprs.next(*s);
|
||||
if (!now) {
|
||||
no_more_ranges = true;
|
||||
break;
|
||||
}
|
||||
// Let's see if this is a new shard, or if we can expand an existing range
|
||||
auto&& rng_ok = shards_for_this_iteration.emplace(element_and_shard{now->element, now->shard}, partition_range_and_sort_key{now->ring_range, i});
|
||||
if (!rng_ok.second) {
|
||||
// We saw this shard already, enlarge the range (we know now->ring_range came from the same partition range;
|
||||
// otherwise it would have had a unique now->element).
|
||||
auto& rng = rng_ok.first->second.pr;
|
||||
rng = nonwrapping_range<dht::ring_position>(std::move(rng.start()), std::move(now->ring_range.end()));
|
||||
// This range is no longer ordered with respect to the others, so:
|
||||
retain_shard_order = false;
|
||||
no_more_ranges = true;
|
||||
if (this_iteration_subranges) {
|
||||
no_more_ranges = false;
|
||||
retain_shard_order = this_iteration_subranges->inorder;
|
||||
auto sort_key = 0u;
|
||||
for (auto&& now : this_iteration_subranges->per_shard_ranges) {
|
||||
shards_for_this_iteration.emplace(element_and_shard{this_iteration_subranges->element, now.shard}, partition_range_and_sort_key{now.ring_range, sort_key++});
|
||||
}
|
||||
}
|
||||
|
||||
auto key_base = mutation_result_merger_key;
|
||||
|
||||
// prepare for next iteration
|
||||
// Each iteration uses a merger key that is either i in the loop above (so in the range [0, shards_in_parallel),
|
||||
// or, the element index in prs (so in the range [0, partition_range_count). Make room for sufficient keys.
|
||||
mutation_result_merger_key += std::max(shards_in_parallel, partition_range_count);
|
||||
shards_in_parallel *= 2;
|
||||
mutation_result_merger_key += std::max(smp::count, partition_range_count);
|
||||
|
||||
shard_cmd->partition_limit = cmd->partition_limit - mrm.partition_count();
|
||||
shard_cmd->row_limit = cmd->row_limit - mrm.row_count();
|
||||
|
||||
@@ -71,7 +71,7 @@ public:
|
||||
private:
|
||||
struct rh_entry {
|
||||
::shared_ptr<abstract_write_response_handler> handler;
|
||||
timer<lowres_clock> expire_timer;
|
||||
timer<clock_type> expire_timer;
|
||||
rh_entry(::shared_ptr<abstract_write_response_handler>&& h, std::function<void()>&& cb);
|
||||
};
|
||||
|
||||
@@ -253,7 +253,7 @@ private:
|
||||
dht::partition_range_vector get_restricted_ranges(keyspace& ks, const schema& s, dht::partition_range range);
|
||||
float estimate_result_rows_per_range(lw_shared_ptr<query::read_command> cmd, keyspace& ks);
|
||||
static std::vector<gms::inet_address> intersection(const std::vector<gms::inet_address>& l1, const std::vector<gms::inet_address>& l2);
|
||||
future<std::vector<foreign_ptr<lw_shared_ptr<query::result>>>> query_partition_key_range_concurrent(lowres_clock::time_point timeout,
|
||||
future<std::vector<foreign_ptr<lw_shared_ptr<query::result>>>> query_partition_key_range_concurrent(clock_type::time_point timeout,
|
||||
std::vector<foreign_ptr<lw_shared_ptr<query::result>>>&& results, lw_shared_ptr<query::read_command> cmd, db::consistency_level cl, dht::partition_range_vector::iterator&& i,
|
||||
dht::partition_range_vector&& ranges, int concurrency_factor, tracing::trace_state_ptr trace_state,
|
||||
uint32_t remaining_row_count, uint32_t remaining_partition_count);
|
||||
|
||||
@@ -47,6 +47,7 @@
|
||||
#include <boost/range/algorithm.hpp>
|
||||
#include <boost/range/adaptors.hpp>
|
||||
#include <boost/range/join.hpp>
|
||||
#include <boost/algorithm/cxx11/any_of.hpp>
|
||||
|
||||
#include "core/future-util.hh"
|
||||
#include "core/pipe.hh"
|
||||
@@ -382,11 +383,22 @@ get_fully_expired_sstables(column_family& cf, std::vector<sstables::shared_sstab
|
||||
}
|
||||
}
|
||||
|
||||
auto compacted_undeleted_gens = boost::copy_range<std::unordered_set<int64_t>>(cf.compacted_undeleted_sstables()
|
||||
| boost::adaptors::transformed(std::mem_fn(&sstables::sstable::generation)));
|
||||
auto has_undeleted_ancestor = [&compacted_undeleted_gens] (auto& candidate) {
|
||||
return boost::algorithm::any_of(candidate->ancestors(), [&compacted_undeleted_gens] (auto gen) {
|
||||
return compacted_undeleted_gens.count(gen);
|
||||
});
|
||||
};
|
||||
|
||||
// SStables that do not contain live data is added to list of possibly expired sstables.
|
||||
for (auto& candidate : compacting) {
|
||||
logger.debug("Checking if candidate of generation {} and max_deletion_time {} is expired, gc_before is {}",
|
||||
candidate->generation(), candidate->get_stats_metadata().max_local_deletion_time, gc_before);
|
||||
if (candidate->get_stats_metadata().max_local_deletion_time < gc_before) {
|
||||
// A fully expired sstable which has an ancestor undeleted shouldn't be compacted because
|
||||
// expired data won't be purged because undeleted sstables are taken into account when
|
||||
// calculating max purgeable timestamp, and not doing it could lead to a compaction loop.
|
||||
if (candidate->get_stats_metadata().max_local_deletion_time < gc_before && !has_undeleted_ancestor(candidate)) {
|
||||
logger.debug("Adding candidate of generation {} to list of possibly expired sstables", candidate->generation());
|
||||
candidates.push_back(candidate);
|
||||
} else {
|
||||
|
||||
@@ -242,11 +242,12 @@ void compaction_manager::submit_sstable_rewrite(column_family* cf, sstables::sha
|
||||
// sstable we are planning to work on:
|
||||
_compacting_sstables.insert(sst);
|
||||
auto task = make_lw_shared<compaction_manager::task>();
|
||||
task->compacting_cf = cf;
|
||||
_tasks.push_back(task);
|
||||
task->compaction_done = with_semaphore(sem, 1, [this, cf, sst] {
|
||||
task->compaction_done = with_semaphore(sem, 1, [this, task, cf, sst] {
|
||||
_stats.active_tasks++;
|
||||
if (_stopped) {
|
||||
return make_ready_future<>();;
|
||||
if (!can_proceed(task)) {
|
||||
return make_ready_future<>();
|
||||
}
|
||||
return cf->compact_sstables(sstables::compaction_descriptor(
|
||||
std::vector<sstables::shared_sstable>{sst},
|
||||
@@ -462,6 +463,14 @@ future<> compaction_manager::perform_cleanup(column_family* cf) {
|
||||
}
|
||||
|
||||
future<> compaction_manager::remove(column_family* cf) {
|
||||
// FIXME: better way to iterate through compaction info for a given column family,
|
||||
// although this path isn't performance sensitive.
|
||||
for (auto& info : _compactions) {
|
||||
if (cf->schema()->ks_name() == info->ks && cf->schema()->cf_name() == info->cf) {
|
||||
info->stop("column family removal");
|
||||
}
|
||||
}
|
||||
|
||||
// We need to guarantee that a task being stopped will not retry to compact
|
||||
// a column family being removed.
|
||||
auto tasks_to_stop = make_lw_shared<std::vector<lw_shared_ptr<task>>>();
|
||||
|
||||
@@ -191,7 +191,8 @@ class partitioned_sstable_set : public sstable_set_impl {
|
||||
using map_iterator = interval_map_type::const_iterator;
|
||||
private:
|
||||
schema_ptr _schema;
|
||||
interval_map_type _sstables;
|
||||
std::vector<shared_sstable> _unleveled_sstables;
|
||||
interval_map_type _leveled_sstables;
|
||||
private:
|
||||
static interval_type make_interval(const schema& s, const dht::partition_range& range) {
|
||||
return interval_type::closed(
|
||||
@@ -207,16 +208,16 @@ private:
|
||||
}
|
||||
std::pair<map_iterator, map_iterator> query(const dht::partition_range& range) const {
|
||||
if (range.start() && range.end()) {
|
||||
return _sstables.equal_range(make_interval(range));
|
||||
return _leveled_sstables.equal_range(make_interval(range));
|
||||
}
|
||||
else if (range.start() && !range.end()) {
|
||||
auto start = singular(range.start()->value());
|
||||
return { _sstables.lower_bound(start), _sstables.end() };
|
||||
return { _leveled_sstables.lower_bound(start), _leveled_sstables.end() };
|
||||
} else if (!range.start() && range.end()) {
|
||||
auto end = singular(range.end()->value());
|
||||
return { _sstables.begin(), _sstables.upper_bound(end) };
|
||||
return { _leveled_sstables.begin(), _leveled_sstables.upper_bound(end) };
|
||||
} else {
|
||||
return { _sstables.begin(), _sstables.end() };
|
||||
return { _leveled_sstables.begin(), _leveled_sstables.end() };
|
||||
}
|
||||
}
|
||||
public:
|
||||
@@ -234,29 +235,39 @@ public:
|
||||
while (b != e) {
|
||||
boost::copy(b++->second, std::inserter(result, result.end()));
|
||||
}
|
||||
return std::vector<shared_sstable>(result.begin(), result.end());
|
||||
auto r = _unleveled_sstables;
|
||||
r.insert(r.end(), result.begin(), result.end());
|
||||
return r;
|
||||
}
|
||||
virtual void insert(shared_sstable sst) override {
|
||||
auto first = sst->get_first_decorated_key().token();
|
||||
auto last = sst->get_last_decorated_key().token();
|
||||
using bound = dht::partition_range::bound;
|
||||
_sstables.add({
|
||||
make_interval(
|
||||
dht::partition_range(
|
||||
bound(dht::ring_position::starting_at(first)),
|
||||
bound(dht::ring_position::ending_at(last)))),
|
||||
value_set({sst})});
|
||||
if (sst->get_sstable_level() == 0) {
|
||||
_unleveled_sstables.push_back(std::move(sst));
|
||||
} else {
|
||||
auto first = sst->get_first_decorated_key().token();
|
||||
auto last = sst->get_last_decorated_key().token();
|
||||
using bound = dht::partition_range::bound;
|
||||
_leveled_sstables.add({
|
||||
make_interval(
|
||||
dht::partition_range(
|
||||
bound(dht::ring_position::starting_at(first)),
|
||||
bound(dht::ring_position::ending_at(last)))),
|
||||
value_set({sst})});
|
||||
}
|
||||
}
|
||||
virtual void erase(shared_sstable sst) override {
|
||||
auto first = sst->get_first_decorated_key().token();
|
||||
auto last = sst->get_last_decorated_key().token();
|
||||
using bound = dht::partition_range::bound;
|
||||
_sstables.subtract({
|
||||
make_interval(
|
||||
dht::partition_range(
|
||||
bound(dht::ring_position::starting_at(first)),
|
||||
bound(dht::ring_position::ending_at(last)))),
|
||||
value_set({sst})});
|
||||
if (sst->get_sstable_level() == 0) {
|
||||
_unleveled_sstables.erase(std::remove(_unleveled_sstables.begin(), _unleveled_sstables.end(), sst), _unleveled_sstables.end());
|
||||
} else {
|
||||
auto first = sst->get_first_decorated_key().token();
|
||||
auto last = sst->get_last_decorated_key().token();
|
||||
using bound = dht::partition_range::bound;
|
||||
_leveled_sstables.subtract({
|
||||
make_interval(
|
||||
dht::partition_range(
|
||||
bound(dht::ring_position::starting_at(first)),
|
||||
bound(dht::ring_position::ending_at(last)))),
|
||||
value_set({sst})});
|
||||
}
|
||||
}
|
||||
virtual std::unique_ptr<incremental_selector_impl> make_incremental_selector() const override;
|
||||
class incremental_selector;
|
||||
@@ -264,6 +275,7 @@ public:
|
||||
|
||||
class partitioned_sstable_set::incremental_selector : public incremental_selector_impl {
|
||||
schema_ptr _schema;
|
||||
const std::vector<shared_sstable>& _unleveled_sstables;
|
||||
map_iterator _it;
|
||||
const map_iterator _end;
|
||||
private:
|
||||
@@ -272,32 +284,35 @@ private:
|
||||
{i.upper().token(), boost::icl::is_right_closed(i.bounds())});
|
||||
}
|
||||
public:
|
||||
incremental_selector(schema_ptr schema, const interval_map_type& sstables)
|
||||
incremental_selector(schema_ptr schema, const std::vector<shared_sstable>& unleveled_sstables, const interval_map_type& leveled_sstables)
|
||||
: _schema(std::move(schema))
|
||||
, _it(sstables.begin())
|
||||
, _end(sstables.end()) {
|
||||
, _unleveled_sstables(unleveled_sstables)
|
||||
, _it(leveled_sstables.begin())
|
||||
, _end(leveled_sstables.end()) {
|
||||
}
|
||||
virtual std::pair<dht::token_range, std::vector<shared_sstable>> select(const dht::token& token) override {
|
||||
auto pr = dht::partition_range::make(dht::ring_position::starting_at(token), dht::ring_position::ending_at(token));
|
||||
auto interval = make_interval(*_schema, std::move(pr));
|
||||
auto ssts = _unleveled_sstables;
|
||||
|
||||
while (_it != _end) {
|
||||
if (boost::icl::contains(_it->first, interval)) {
|
||||
return std::make_pair(to_token_range(_it->first), std::vector<shared_sstable>(_it->second.begin(), _it->second.end()));
|
||||
ssts.insert(ssts.end(), _it->second.begin(), _it->second.end());
|
||||
return std::make_pair(to_token_range(_it->first), std::move(ssts));
|
||||
}
|
||||
// we don't want to skip current interval if token lies before it.
|
||||
if (boost::icl::lower_less(interval, _it->first)) {
|
||||
return std::make_pair(dht::token_range::make({token, true}, {_it->first.lower().token(), false}),
|
||||
std::vector<shared_sstable>());
|
||||
std::move(ssts));
|
||||
}
|
||||
_it++;
|
||||
}
|
||||
return std::make_pair(dht::token_range::make_open_ended_both_sides(), std::vector<shared_sstable>());
|
||||
return std::make_pair(dht::token_range::make_open_ended_both_sides(), std::move(ssts));
|
||||
}
|
||||
};
|
||||
|
||||
std::unique_ptr<incremental_selector_impl> partitioned_sstable_set::make_incremental_selector() const {
|
||||
return std::make_unique<incremental_selector>(_schema, _sstables);
|
||||
return std::make_unique<incremental_selector>(_schema, _unleveled_sstables, _leveled_sstables);
|
||||
}
|
||||
|
||||
class compaction_strategy_impl {
|
||||
|
||||
@@ -71,6 +71,12 @@ void compression::set_compressor(compressor c) {
|
||||
}
|
||||
}
|
||||
|
||||
// locate() takes a byte position in the uncompressed stream, and finds the
|
||||
// the location of the compressed chunk on disk which contains it, and the
|
||||
// offset in this chunk.
|
||||
// locate() may only be used for offsets of actual bytes, and in particular
|
||||
// the end-of-file position (one past the last byte) MUST not be used. If the
|
||||
// caller wants to read from the end of file, it should simply read nothing.
|
||||
compression::chunk_and_offset
|
||||
compression::locate(uint64_t position) const {
|
||||
auto ucl = uncompressed_chunk_length();
|
||||
@@ -310,6 +316,9 @@ public:
|
||||
virtual future<temporary_buffer<char>> skip(uint64_t n) override {
|
||||
_pos += n;
|
||||
assert(_pos <= _end_pos);
|
||||
if (_pos == _end_pos) {
|
||||
return make_ready_future<temporary_buffer<char>>();
|
||||
}
|
||||
auto addr = _compression_metadata->locate(_pos);
|
||||
auto underlying_n = addr.chunk_start - _underlying_pos;
|
||||
_underlying_pos = addr.chunk_start;
|
||||
|
||||
@@ -44,13 +44,7 @@ future<> sstable::read_filter(const io_priority_class& pc) {
|
||||
large_bitset bs(filter.buckets.elements.size() * 64);
|
||||
bs.load(filter.buckets.elements.begin(), filter.buckets.elements.end());
|
||||
_components->filter = utils::filter::create_filter(filter.hashes, std::move(bs));
|
||||
}).then([this] {
|
||||
return io_check([&] {
|
||||
return engine().file_size(this->filename(sstable::component_type::Filter));
|
||||
});
|
||||
});
|
||||
}).then([this] (auto size) {
|
||||
_filter_file_size = size;
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@@ -880,10 +880,12 @@ static inline bytes_view consume_bytes(bytes_view& p, size_t len) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline clustering_key_prefix get_clustering_key(
|
||||
const schema& schema, bytes_view col_name) {
|
||||
mp_row_consumer::column col(schema, std::move(col_name), api::max_timestamp);
|
||||
return std::move(col.clustering);
|
||||
static inline clustering_key_prefix get_clustering_key(const schema& s, composite_view col_name) {
|
||||
auto components = col_name.explode();
|
||||
if (components.size() > s.clustering_key_size()) {
|
||||
components.resize(s.clustering_key_size());
|
||||
}
|
||||
return clustering_key_prefix(std::move(components));
|
||||
}
|
||||
|
||||
static bool has_static_columns(const schema& schema, index_entry &ie) {
|
||||
@@ -955,9 +957,10 @@ sstables::sstable::find_disk_ranges(
|
||||
auto& range_start = ck_ranges.begin()->start();
|
||||
bool found_range_start = false;
|
||||
uint64_t range_start_pos;
|
||||
uint64_t prev_pos = 0;
|
||||
auto& range_end = ck_ranges.begin()->end();
|
||||
|
||||
auto cmp = clustering_key_prefix::tri_compare(*schema);
|
||||
auto cmp = clustering_key_prefix::prefix_equal_tri_compare(*schema);
|
||||
while (num_blocks--) {
|
||||
if (data.size() < 2) {
|
||||
// When we break out of this loop, we give up on
|
||||
@@ -976,7 +979,7 @@ sstables::sstable::find_disk_ranges(
|
||||
// But we only need to match the clustering key, because
|
||||
// we got a clustering key range to search for.
|
||||
auto start_ck = get_clustering_key(*schema,
|
||||
consume_bytes(data, len));
|
||||
composite_view(consume_bytes(data, len), schema->is_compound()));
|
||||
if (data.size() < 2) {
|
||||
break;
|
||||
}
|
||||
@@ -985,49 +988,50 @@ sstables::sstable::find_disk_ranges(
|
||||
break;
|
||||
}
|
||||
auto end_ck = get_clustering_key(*schema,
|
||||
consume_bytes(data, len));
|
||||
composite_view(consume_bytes(data, len), schema->is_compound()));
|
||||
if (data.size() < 16) {
|
||||
break;
|
||||
}
|
||||
uint64_t offset = consume_be<uint64_t>(data);
|
||||
uint64_t width = consume_be<uint64_t>(data);
|
||||
if (!found_range_start) {
|
||||
if (!range_start || cmp(range_start->value(), end_ck) <= 0) {
|
||||
range_start_pos = ie.position() + offset;
|
||||
found_range_start = true;
|
||||
}
|
||||
}
|
||||
uint64_t cur_pos = ie.position() + offset;
|
||||
bool found_range_end = false;
|
||||
uint64_t range_end_pos;
|
||||
if (range_end) {
|
||||
if (cmp(range_end->value(), start_ck) < 0) {
|
||||
// this block is already past the range_end
|
||||
found_range_end = true;
|
||||
range_end_pos = ie.position() + offset;
|
||||
range_end_pos = cur_pos;
|
||||
} else if (cmp(range_end->value(), end_ck) < 0 || num_blocks == 0) {
|
||||
// range_end is in the middle of this block.
|
||||
// Note the strict inequality above is important:
|
||||
// if range_end==end_ck the next block may contain
|
||||
// still more items matching range_end.
|
||||
found_range_end = true;
|
||||
range_end_pos = ie.position() + offset + width;
|
||||
range_end_pos = cur_pos + width;
|
||||
}
|
||||
} else if (num_blocks == 0) {
|
||||
// When !range_end, read until the last block.
|
||||
// In this case we could have also found the end of
|
||||
// the partition using the index.
|
||||
found_range_end = true;
|
||||
range_end_pos = ie.position() + offset + width;
|
||||
range_end_pos = cur_pos + width;
|
||||
}
|
||||
if (found_range_end) {
|
||||
if (!found_range_start) {
|
||||
// return empty range
|
||||
range_start_pos = range_end_pos = 0;
|
||||
if (!found_range_start) {
|
||||
if (!range_start || cmp(range_start->value(), start_ck) <= 0) {
|
||||
range_start_pos = prev_pos ? prev_pos : cur_pos;
|
||||
found_range_start = true;
|
||||
} else if (found_range_end || num_blocks == 0) {
|
||||
range_start_pos = cur_pos;
|
||||
found_range_start = true;
|
||||
}
|
||||
}
|
||||
if (found_range_end) { // found_range_end implies found_range_start
|
||||
return make_ready_future<disk_read_range>(
|
||||
disk_read_range(range_start_pos, range_end_pos,
|
||||
key, deltime));
|
||||
}
|
||||
prev_pos = cur_pos;
|
||||
}
|
||||
}
|
||||
// Else, if more than one clustering-key range needs to be read,
|
||||
|
||||
@@ -100,35 +100,48 @@ future<> await_background_jobs_on_all_shards() {
|
||||
}
|
||||
|
||||
class random_access_reader {
|
||||
input_stream<char> _in;
|
||||
std::unique_ptr<input_stream<char>> _in;
|
||||
seastar::gate _close_gate;
|
||||
protected:
|
||||
virtual input_stream<char> open_at(uint64_t pos) = 0;
|
||||
public:
|
||||
future<temporary_buffer<char>> read_exactly(size_t n) {
|
||||
return _in.read_exactly(n);
|
||||
return _in->read_exactly(n);
|
||||
}
|
||||
void seek(uint64_t pos) {
|
||||
_in = open_at(pos);
|
||||
if (_in) {
|
||||
seastar::with_gate(_close_gate, [in = std::move(_in)] () mutable {
|
||||
auto fut = in->close();
|
||||
return fut.then([in = std::move(in)] {});
|
||||
});
|
||||
}
|
||||
_in = std::make_unique<input_stream<char>>(open_at(pos));
|
||||
}
|
||||
bool eof() { return _in.eof(); }
|
||||
bool eof() { return _in->eof(); }
|
||||
virtual future<> close() {
|
||||
return _in.close();
|
||||
return _close_gate.close().then([this] {
|
||||
return _in->close();
|
||||
});
|
||||
}
|
||||
virtual ~random_access_reader() { }
|
||||
};
|
||||
|
||||
class file_random_access_reader : public random_access_reader {
|
||||
file _file;
|
||||
uint64_t _file_size;
|
||||
size_t _buffer_size;
|
||||
unsigned _read_ahead;
|
||||
public:
|
||||
virtual input_stream<char> open_at(uint64_t pos) override {
|
||||
auto len = _file_size - pos;
|
||||
file_input_stream_options options;
|
||||
options.buffer_size = _buffer_size;
|
||||
options.read_ahead = _read_ahead;
|
||||
|
||||
return make_file_input_stream(_file, pos, std::move(options));
|
||||
return make_file_input_stream(_file, pos, len, std::move(options));
|
||||
}
|
||||
explicit file_random_access_reader(file f, size_t buffer_size = 8192)
|
||||
: _file(std::move(f)), _buffer_size(buffer_size)
|
||||
explicit file_random_access_reader(file f, uint64_t file_size, size_t buffer_size = 8192, unsigned read_ahead = 4)
|
||||
: _file(std::move(f)), _file_size(file_size), _buffer_size(buffer_size), _read_ahead(read_ahead)
|
||||
{
|
||||
seek(0);
|
||||
}
|
||||
@@ -968,12 +981,15 @@ future<> sstable::read_simple(T& component, const io_priority_class& pc) {
|
||||
auto file_path = filename(Type);
|
||||
sstlog.debug(("Reading " + _component_map[Type] + " file {} ").c_str(), file_path);
|
||||
return open_file_dma(file_path, open_flags::ro).then([this, &component] (file fi) {
|
||||
auto f = make_checked_file(_read_error_handler, fi);
|
||||
auto r = make_lw_shared<file_random_access_reader>(std::move(f), sstable_buffer_size);
|
||||
auto fut = parse(*r, component);
|
||||
return fut.finally([r = std::move(r)] {
|
||||
return r->close();
|
||||
}).then([r] {});
|
||||
auto fut = fi.size();
|
||||
return fut.then([this, &component, fi = std::move(fi)] (uint64_t size) {
|
||||
auto f = make_checked_file(_read_error_handler, fi);
|
||||
auto r = make_lw_shared<file_random_access_reader>(std::move(f), size, sstable_buffer_size);
|
||||
auto fut = parse(*r, component);
|
||||
return fut.finally([r = std::move(r)] {
|
||||
return r->close();
|
||||
}).then([r] {});
|
||||
});
|
||||
}).then_wrapped([this, file_path] (future<> f) {
|
||||
try {
|
||||
f.get();
|
||||
@@ -1163,6 +1179,15 @@ future<> sstable::update_info_for_opened_data() {
|
||||
return _index_file.size().then([this] (auto size) {
|
||||
_index_file_size = size;
|
||||
});
|
||||
}).then([this] {
|
||||
if (this->has_component(sstable::component_type::Filter)) {
|
||||
return io_check([&] {
|
||||
return engine().file_size(this->filename(sstable::component_type::Filter));
|
||||
}).then([this] (auto size) {
|
||||
_filter_file_size = size;
|
||||
});
|
||||
}
|
||||
return make_ready_future<>();
|
||||
}).then([this] {
|
||||
this->set_clustering_components_ranges();
|
||||
this->set_first_and_last_keys();
|
||||
@@ -1199,19 +1224,16 @@ future<> sstable::create_data() {
|
||||
// No need to set tunable priorities for it.
|
||||
future<> sstable::load() {
|
||||
return read_toc().then([this] {
|
||||
return read_statistics(default_priority_class());
|
||||
}).then([this] {
|
||||
validate_min_max_metadata();
|
||||
set_clustering_components_ranges();
|
||||
return read_compression(default_priority_class());
|
||||
}).then([this] {
|
||||
return read_scylla_metadata(default_priority_class());
|
||||
}).then([this] {
|
||||
return read_filter(default_priority_class());
|
||||
}).then([this] {;
|
||||
return read_summary(default_priority_class());
|
||||
}).then([this] {
|
||||
return open_data();
|
||||
return seastar::when_all_succeed(
|
||||
read_statistics(default_priority_class()),
|
||||
read_compression(default_priority_class()),
|
||||
read_scylla_metadata(default_priority_class()),
|
||||
read_filter(default_priority_class()),
|
||||
read_summary(default_priority_class())).then([this] {
|
||||
validate_min_max_metadata();
|
||||
set_clustering_components_ranges();
|
||||
return open_data();
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
@@ -1690,24 +1712,21 @@ populate_statistics_offsets(statistics& s) {
|
||||
static
|
||||
sharding_metadata
|
||||
create_sharding_metadata(schema_ptr schema, const dht::decorated_key& first_key, const dht::decorated_key& last_key) {
|
||||
auto range = dht::partition_range::make(dht::ring_position(first_key), dht::ring_position(last_key));
|
||||
auto sharder = dht::ring_position_range_sharder(std::move(range));
|
||||
auto prange = dht::partition_range::make(dht::ring_position(first_key), dht::ring_position(last_key));
|
||||
auto sm = sharding_metadata();
|
||||
auto rpras = sharder.next(*schema);
|
||||
while (rpras) {
|
||||
if (rpras->shard == engine().cpu_id()) {
|
||||
for (auto&& range : dht::split_range_to_single_shard(*schema, prange, engine().cpu_id())) {
|
||||
if (true) { // keep indentation
|
||||
// we know left/right are not infinite
|
||||
auto&& left = rpras->ring_range.start()->value();
|
||||
auto&& right = rpras->ring_range.end()->value();
|
||||
auto&& left = range.start()->value();
|
||||
auto&& right = range.end()->value();
|
||||
auto&& left_token = left.token();
|
||||
auto left_exclusive = !left.has_key() && left.bound() == dht::ring_position::token_bound::end;
|
||||
auto&& right_token = right.token();
|
||||
auto right_exclusive = !right.has_key() && right.bound() == dht::ring_position::token_bound::start;
|
||||
sm.token_ranges.elements.push_back({
|
||||
sm.token_ranges.elements.push_back(disk_token_range{
|
||||
{left_exclusive, to_bytes(bytes_view(left_token._data))},
|
||||
{right_exclusive, to_bytes(bytes_view(right_token._data))}});
|
||||
}
|
||||
rpras = sharder.next(*schema);
|
||||
}
|
||||
return sm;
|
||||
}
|
||||
@@ -1951,19 +1970,20 @@ void sstable_writer::prepare_file_writer()
|
||||
options.write_behind = 10;
|
||||
|
||||
if (!_compression_enabled) {
|
||||
_writer = make_shared<checksummed_file_writer>(std::move(_sst._data_file), std::move(options), true);
|
||||
_writer = std::make_unique<checksummed_file_writer>(std::move(_sst._data_file), std::move(options), true);
|
||||
} else {
|
||||
prepare_compression(_sst._components->compression, _schema);
|
||||
_writer = make_shared<file_writer>(make_compressed_file_output_stream(std::move(_sst._data_file), std::move(options), &_sst._components->compression));
|
||||
_writer = std::make_unique<file_writer>(make_compressed_file_output_stream(std::move(_sst._data_file), std::move(options), &_sst._components->compression));
|
||||
}
|
||||
}
|
||||
|
||||
void sstable_writer::finish_file_writer()
|
||||
{
|
||||
_writer->close().get();
|
||||
auto writer = std::move(_writer);
|
||||
writer->close().get();
|
||||
|
||||
if (!_compression_enabled) {
|
||||
auto chksum_wr = static_pointer_cast<checksummed_file_writer>(_writer);
|
||||
auto chksum_wr = static_cast<checksummed_file_writer*>(writer.get());
|
||||
write_digest(_sst._write_error_handler, _sst.filename(sstable::component_type::Digest), chksum_wr->full_checksum());
|
||||
write_crc(_sst._write_error_handler, _sst.filename(sstable::component_type::CRC), chksum_wr->finalize_checksum());
|
||||
} else {
|
||||
@@ -1971,6 +1991,16 @@ void sstable_writer::finish_file_writer()
|
||||
}
|
||||
}
|
||||
|
||||
sstable_writer::~sstable_writer() {
|
||||
if (_writer) {
|
||||
try {
|
||||
_writer->close().get();
|
||||
} catch (...) {
|
||||
sstlog.error("sstable_writer failed to close file: {}", std::current_exception());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
sstable_writer::sstable_writer(sstable& sst, const schema& s, uint64_t estimated_partitions,
|
||||
uint64_t max_sstable_size, bool backup, bool leave_unsealed, const io_priority_class& pc)
|
||||
: _sst(sst)
|
||||
@@ -2324,6 +2354,11 @@ double sstable::get_compression_ratio() const {
|
||||
}
|
||||
}
|
||||
|
||||
std::unordered_set<uint64_t> sstable::ancestors() const {
|
||||
const compaction_metadata& cm = get_compaction_metadata();
|
||||
return boost::copy_range<std::unordered_set<uint64_t>>(cm.ancestors.elements);
|
||||
}
|
||||
|
||||
void sstable::set_sstable_level(uint32_t new_level) {
|
||||
auto entry = _components->statistics.contents.find(metadata_type::Stats);
|
||||
if (entry == _components->statistics.contents.end()) {
|
||||
|
||||
@@ -325,6 +325,8 @@ public:
|
||||
_collector.add_ancestor(generation);
|
||||
}
|
||||
|
||||
std::unordered_set<uint64_t> ancestors() const;
|
||||
|
||||
// Returns true iff this sstable contains data which belongs to many shards.
|
||||
bool is_shared() const {
|
||||
return _shared;
|
||||
@@ -803,7 +805,7 @@ class sstable_writer {
|
||||
bool _backup;
|
||||
bool _leave_unsealed;
|
||||
bool _compression_enabled;
|
||||
shared_ptr<file_writer> _writer;
|
||||
std::unique_ptr<file_writer> _writer;
|
||||
stdx::optional<components_writer> _components_writer;
|
||||
private:
|
||||
void prepare_file_writer();
|
||||
@@ -811,6 +813,10 @@ private:
|
||||
public:
|
||||
sstable_writer(sstable& sst, const schema& s, uint64_t estimated_partitions,
|
||||
uint64_t max_sstable_size, bool backup, bool leave_unsealed, const io_priority_class& pc);
|
||||
~sstable_writer();
|
||||
sstable_writer(sstable_writer&& o) : _sst(o._sst), _schema(o._schema), _pc(o._pc), _backup(o._backup),
|
||||
_leave_unsealed(o._leave_unsealed), _compression_enabled(o._compression_enabled), _writer(std::move(o._writer)),
|
||||
_components_writer(std::move(o._components_writer)) {}
|
||||
void consume_new_partition(const dht::decorated_key& dk) { return _components_writer->consume_new_partition(dk); }
|
||||
void consume(tombstone t) { _components_writer->consume(t); }
|
||||
stop_iteration consume(static_row&& sr) { return _components_writer->consume(std::move(sr)); }
|
||||
|
||||
@@ -175,10 +175,10 @@ private:
|
||||
stream_session_state _state = stream_session_state::INITIALIZED;
|
||||
bool _complete_sent = false;
|
||||
|
||||
// If the session is idle for 10 minutes, close the session
|
||||
std::chrono::seconds _keep_alive_timeout{60 * 10};
|
||||
// Check every 1 minutes
|
||||
std::chrono::seconds _keep_alive_interval{60};
|
||||
// If the session is idle for 300 minutes, close the session
|
||||
std::chrono::seconds _keep_alive_timeout{60 * 300};
|
||||
// Check every 10 minutes
|
||||
std::chrono::seconds _keep_alive_interval{60 * 10};
|
||||
timer<lowres_clock> _keep_alive;
|
||||
stream_bytes _last_stream_bytes;
|
||||
lowres_clock::time_point _last_stream_progress;
|
||||
|
||||
1
test.py
1
test.py
@@ -78,6 +78,7 @@ boost_tests = [
|
||||
'virtual_reader_test',
|
||||
'view_schema_test',
|
||||
'counter_test',
|
||||
'cell_locker_test',
|
||||
]
|
||||
|
||||
other_tests = [
|
||||
|
||||
@@ -55,13 +55,13 @@ SEASTAR_TEST_CASE(test_reading_with_different_schemas) {
|
||||
canonical_mutation cm1(m1);
|
||||
canonical_mutation cm2(m2);
|
||||
|
||||
{
|
||||
if (can_upgrade_schema(m1.schema(), m2.schema())) {
|
||||
auto m = cm1.to_mutation(m1.schema());
|
||||
m.upgrade(m2.schema());
|
||||
assert_that(cm1.to_mutation(m2.schema())).is_equal_to(m);
|
||||
}
|
||||
|
||||
{
|
||||
if (can_upgrade_schema(m2.schema(), m1.schema())) {
|
||||
auto m = cm2.to_mutation(m2.schema());
|
||||
m.upgrade(m1.schema());
|
||||
assert_that(cm2.to_mutation(m1.schema())).is_equal_to(m);
|
||||
|
||||
218
tests/cell_locker_test.cc
Normal file
218
tests/cell_locker_test.cc
Normal file
@@ -0,0 +1,218 @@
|
||||
/*
|
||||
* Copyright (C) 2017 ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file is part of Scylla.
|
||||
*
|
||||
* Scylla is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Scylla is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "tests/test-utils.hh"
|
||||
#include "disk-error-handler.hh"
|
||||
|
||||
#include <seastar/core/thread.hh>
|
||||
|
||||
#include "cell_locking.hh"
|
||||
#include "mutation.hh"
|
||||
#include "schema_builder.hh"
|
||||
|
||||
thread_local disk_error_signal_type commit_error;
|
||||
thread_local disk_error_signal_type general_disk_error;
|
||||
|
||||
static schema_ptr make_schema()
|
||||
{
|
||||
return schema_builder("ks", "cf")
|
||||
.with_column("pk", bytes_type, column_kind::partition_key)
|
||||
.with_column("ck", bytes_type, column_kind::clustering_key)
|
||||
.with_column("s1", bytes_type, column_kind::static_column)
|
||||
.with_column("s2", bytes_type, column_kind::static_column)
|
||||
.with_column("s3", bytes_type, column_kind::static_column)
|
||||
.with_column("r1", bytes_type)
|
||||
.with_column("r2", bytes_type)
|
||||
.with_column("r3", bytes_type)
|
||||
.build();
|
||||
}
|
||||
|
||||
static schema_ptr make_alternative_schema()
|
||||
{
|
||||
return schema_builder("ks", "cf")
|
||||
.with_column("pk", bytes_type, column_kind::partition_key)
|
||||
.with_column("ck", bytes_type, column_kind::clustering_key)
|
||||
.with_column("s0", bytes_type, column_kind::static_column)
|
||||
.with_column("s1", bytes_type, column_kind::static_column)
|
||||
.with_column("s2.5", bytes_type, column_kind::static_column)
|
||||
.with_column("s3", bytes_type, column_kind::static_column)
|
||||
.with_column("r0", bytes_type)
|
||||
.with_column("r1", bytes_type)
|
||||
.with_column("r2.5", bytes_type)
|
||||
.with_column("r3", bytes_type)
|
||||
.build();
|
||||
}
|
||||
|
||||
static schema_ptr make_schema_disjoint_with_others()
|
||||
{
|
||||
return schema_builder("ks", "cf")
|
||||
.with_column("pk", bytes_type, column_kind::partition_key)
|
||||
.with_column("ck", bytes_type, column_kind::clustering_key)
|
||||
.with_column("s8", bytes_type, column_kind::static_column)
|
||||
.with_column("s9", bytes_type, column_kind::static_column)
|
||||
.with_column("r8", bytes_type)
|
||||
.with_column("r9", bytes_type)
|
||||
.build();
|
||||
}
|
||||
|
||||
static data_value empty_value = data_value(to_bytes(""));
|
||||
|
||||
static auto make_row(const sstring& key, std::initializer_list<sstring> cells) {
|
||||
return std::pair<sstring, std::initializer_list<sstring>>(key, cells);
|
||||
}
|
||||
|
||||
static mutation make_mutation(schema_ptr s, const sstring& pk, std::initializer_list<sstring> static_cells,
|
||||
std::initializer_list<std::pair<sstring, std::initializer_list<sstring>>> clustering_cells)
|
||||
{
|
||||
auto m = mutation(partition_key::from_single_value(*s, to_bytes(pk)), s);
|
||||
for (auto&& c : static_cells) {
|
||||
m.set_static_cell(to_bytes(c), empty_value, api::new_timestamp());
|
||||
}
|
||||
for (auto&& r : clustering_cells) {
|
||||
auto ck = clustering_key::from_single_value(*s, to_bytes(r.first));
|
||||
for (auto&& c : r.second) {
|
||||
m.set_clustered_cell(ck, to_bytes(c), empty_value, api::new_timestamp());
|
||||
}
|
||||
}
|
||||
return m;
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_simple_locking_cells) {
|
||||
return seastar::async([&] {
|
||||
auto destroy = [] (auto) { };
|
||||
|
||||
auto s = make_schema();
|
||||
cell_locker cl(s);
|
||||
|
||||
auto m = make_mutation(s, "0", { "s1", "s3" }, {
|
||||
make_row("one", { "r1", "r2" }),
|
||||
make_row("two", { "r2", "r3" }),
|
||||
});
|
||||
|
||||
auto l1 = cl.lock_cells(m.decorated_key(), partition_cells_range(m.partition())).get0();
|
||||
auto f2 = cl.lock_cells(m.decorated_key(), partition_cells_range(m.partition()));
|
||||
BOOST_REQUIRE(!f2.available());
|
||||
|
||||
destroy(std::move(l1));
|
||||
destroy(f2.get0());
|
||||
});
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_disjoint_mutations) {
|
||||
return seastar::async([&] {
|
||||
auto s = make_schema();
|
||||
cell_locker cl(s);
|
||||
|
||||
auto m1 = make_mutation(s, "0", { "s1" }, {
|
||||
make_row("one", { "r1", "r2" }),
|
||||
make_row("two", { "r3" }),
|
||||
});
|
||||
auto m2 = make_mutation(s, "0", { "s2" }, {
|
||||
make_row("two", { "r1", "r2" }),
|
||||
make_row("one", { "r3" }),
|
||||
});
|
||||
|
||||
auto m3 = mutation(partition_key::from_single_value(*s, to_bytes("1")), s);
|
||||
m3.partition() = m1.partition();
|
||||
|
||||
auto l1 = cl.lock_cells(m1.decorated_key(), partition_cells_range(m1.partition())).get0();
|
||||
auto l2 = cl.lock_cells(m2.decorated_key(), partition_cells_range(m2.partition())).get0();
|
||||
auto l3 = cl.lock_cells(m3.decorated_key(), partition_cells_range(m3.partition())).get0();
|
||||
});
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_single_cell_overlap) {
|
||||
return seastar::async([&] {
|
||||
auto destroy = [] (auto) { };
|
||||
|
||||
auto s = make_schema();
|
||||
cell_locker cl(s);
|
||||
|
||||
auto m1 = make_mutation(s, "0", { "s1" }, {
|
||||
make_row("one", { "r1", "r2" }),
|
||||
make_row("two", { "r3" }),
|
||||
});
|
||||
auto m2 = make_mutation(s, "0", { "s1" }, {
|
||||
make_row("two", { "r1", "r2" }),
|
||||
make_row("one", { "r3" }),
|
||||
});
|
||||
auto m3 = make_mutation(s, "0", { "s2" }, {
|
||||
make_row("two", { "r1" }),
|
||||
make_row("one", { "r2", "r3" }),
|
||||
});
|
||||
|
||||
auto l1 = cl.lock_cells(m1.decorated_key(), partition_cells_range(m1.partition())).get0();
|
||||
auto f2 = cl.lock_cells(m2.decorated_key(), partition_cells_range(m2.partition()));
|
||||
BOOST_REQUIRE(!f2.available());
|
||||
destroy(std::move(l1));
|
||||
auto l2 = f2.get0();
|
||||
auto f3 = cl.lock_cells(m3.decorated_key(), partition_cells_range(m3.partition()));
|
||||
BOOST_REQUIRE(!f3.available());
|
||||
destroy(std::move(l2));
|
||||
auto l3 = f3.get0();
|
||||
});
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_schema_change) {
|
||||
return seastar::async([&] {
|
||||
auto destroy = [] (auto) { };
|
||||
|
||||
auto s1 = make_schema();
|
||||
auto s2 = make_alternative_schema();
|
||||
cell_locker cl(s1);
|
||||
|
||||
auto m1 = make_mutation(s1, "0", { "s1", "s2", "s3"}, {
|
||||
make_row("one", { "r1", "r2", "r3" }),
|
||||
});
|
||||
|
||||
// disjoint with m1
|
||||
auto m2 = make_mutation(s2, "0", { "s0", "s2.5"}, {
|
||||
make_row("one", { "r0", "r2.5" }),
|
||||
make_row("two", { "r1", "r3" }),
|
||||
});
|
||||
|
||||
// overlaps with m1
|
||||
auto m3 = make_mutation(s2, "0", { "s1" }, {
|
||||
make_row("one", { "r1", "r3" }),
|
||||
});
|
||||
|
||||
auto l1 = cl.lock_cells(m1.decorated_key(), partition_cells_range(m1.partition())).get0();
|
||||
|
||||
destroy(std::move(m1));
|
||||
destroy(std::move(s1));
|
||||
cl.set_schema(s2);
|
||||
|
||||
auto l2 = cl.lock_cells(m2.decorated_key(), partition_cells_range(m2.partition())).get0();
|
||||
auto f3 = cl.lock_cells(m3.decorated_key(), partition_cells_range(m3.partition()));
|
||||
BOOST_REQUIRE(!f3.available());
|
||||
destroy(std::move(l1));
|
||||
auto l3 = f3.get0();
|
||||
|
||||
auto s3 = make_schema_disjoint_with_others();
|
||||
cl.set_schema(s3);
|
||||
|
||||
auto m4 = make_mutation(s3, "0", { "s8", "s9"}, {
|
||||
make_row("one", { "r8", "r9" }),
|
||||
make_row("two", { "r8", "r9" }),
|
||||
});
|
||||
auto l4 = cl.lock_cells(m4.decorated_key(), partition_cells_range(m4.partition())).get0();
|
||||
});
|
||||
}
|
||||
@@ -73,6 +73,7 @@ schema_ptr get_schema() {
|
||||
return schema_builder("ks", "cf")
|
||||
.with_column("pk", int32_type, column_kind::partition_key)
|
||||
.with_column("ck", int32_type, column_kind::clustering_key)
|
||||
.with_column("s1", counter_type, column_kind::static_column)
|
||||
.with_column("c1", counter_type)
|
||||
.build();
|
||||
}
|
||||
@@ -90,6 +91,18 @@ atomic_cell_view get_counter_cell(mutation& m) {
|
||||
return *acv;
|
||||
};
|
||||
|
||||
atomic_cell_view get_static_counter_cell(mutation& m) {
|
||||
auto& mp = m.partition();
|
||||
const auto& cells = mp.static_row();
|
||||
BOOST_REQUIRE_EQUAL(cells.size(), 1);
|
||||
stdx::optional<atomic_cell_view> acv;
|
||||
cells.for_each_cell([&] (column_id, const atomic_cell_or_collection& ac_o_c) {
|
||||
acv = ac_o_c.as_atomic_cell();
|
||||
});
|
||||
BOOST_REQUIRE(bool(acv));
|
||||
return *acv;
|
||||
};
|
||||
|
||||
SEASTAR_TEST_CASE(test_counter_mutations) {
|
||||
return seastar::async([] {
|
||||
storage_service_for_tests ssft;
|
||||
@@ -101,6 +114,7 @@ SEASTAR_TEST_CASE(test_counter_mutations) {
|
||||
auto pk = partition_key::from_single_value(*s, int32_type->decompose(0));
|
||||
auto ck = clustering_key::from_single_value(*s, int32_type->decompose(0));
|
||||
auto& col = *s->get_column_definition(utf8_type->decompose(sstring("c1")));
|
||||
auto& scol = *s->get_column_definition(utf8_type->decompose(sstring("s1")));
|
||||
|
||||
mutation m1(pk, s);
|
||||
counter_cell_builder b1;
|
||||
@@ -109,15 +123,28 @@ SEASTAR_TEST_CASE(test_counter_mutations) {
|
||||
b1.add_shard(counter_shard(id[2], 3, 1));
|
||||
m1.set_clustered_cell(ck, col, b1.build(api::new_timestamp()));
|
||||
|
||||
counter_cell_builder b1s;
|
||||
b1s.add_shard(counter_shard(id[1], 4, 3));
|
||||
b1s.add_shard(counter_shard(id[2], 5, 1));
|
||||
b1s.add_shard(counter_shard(id[3], 6, 2));
|
||||
m1.set_static_cell(scol, b1s.build(api::new_timestamp()));
|
||||
|
||||
mutation m2(pk, s);
|
||||
counter_cell_builder b2;
|
||||
b1.add_shard(counter_shard(id[0], 1, 1));
|
||||
b2.add_shard(counter_shard(id[0], 1, 1));
|
||||
b2.add_shard(counter_shard(id[2], -5, 4));
|
||||
b2.add_shard(counter_shard(id[3], -100, 1));
|
||||
m2.set_clustered_cell(ck, col, b2.build(api::new_timestamp()));
|
||||
|
||||
counter_cell_builder b2s;
|
||||
b2s.add_shard(counter_shard(id[0], 8, 8));
|
||||
b2s.add_shard(counter_shard(id[1], 1, 4));
|
||||
b2s.add_shard(counter_shard(id[3], 9, 1));
|
||||
m2.set_static_cell(scol, b2s.build(api::new_timestamp()));
|
||||
|
||||
mutation m3(pk, s);
|
||||
m3.set_clustered_cell(ck, col, atomic_cell::make_dead(1, gc_clock::now()));
|
||||
m3.set_static_cell(scol, atomic_cell::make_dead(1, gc_clock::now()));
|
||||
|
||||
mutation m4(pk, s);
|
||||
m4.partition().apply(tombstone(0, gc_clock::now()));
|
||||
@@ -131,15 +158,23 @@ SEASTAR_TEST_CASE(test_counter_mutations) {
|
||||
counter_cell_view ccv { ac };
|
||||
BOOST_REQUIRE_EQUAL(ccv.total_value(), -102);
|
||||
|
||||
ac = get_static_counter_cell(m);
|
||||
BOOST_REQUIRE(ac.is_live());
|
||||
ccv = counter_cell_view(ac);
|
||||
BOOST_REQUIRE_EQUAL(ccv.total_value(), 20);
|
||||
|
||||
m.apply(m3);
|
||||
ac = get_counter_cell(m);
|
||||
BOOST_REQUIRE(!ac.is_live());
|
||||
ac = get_static_counter_cell(m);
|
||||
BOOST_REQUIRE(!ac.is_live());
|
||||
|
||||
m = m1;
|
||||
m.apply(m4);
|
||||
m.partition().compact_for_query(*s, gc_clock::now(), { query::clustering_range::make_singular(ck) },
|
||||
false, query::max_rows);
|
||||
BOOST_REQUIRE_EQUAL(m.partition().clustered_rows().calculate_size(), 0);
|
||||
BOOST_REQUIRE(m.partition().static_row().empty());
|
||||
|
||||
// Difference
|
||||
|
||||
@@ -147,7 +182,12 @@ SEASTAR_TEST_CASE(test_counter_mutations) {
|
||||
ac = get_counter_cell(m);
|
||||
BOOST_REQUIRE(ac.is_live());
|
||||
ccv = counter_cell_view(ac);
|
||||
BOOST_REQUIRE_EQUAL(ccv.total_value(), 3);
|
||||
BOOST_REQUIRE_EQUAL(ccv.total_value(), 2);
|
||||
|
||||
ac = get_static_counter_cell(m);
|
||||
BOOST_REQUIRE(ac.is_live());
|
||||
ccv = counter_cell_view(ac);
|
||||
BOOST_REQUIRE_EQUAL(ccv.total_value(), 11);
|
||||
|
||||
m = mutation(s, m1.decorated_key(), m2.partition().difference(s, m1.partition()));
|
||||
ac = get_counter_cell(m);
|
||||
@@ -155,13 +195,22 @@ SEASTAR_TEST_CASE(test_counter_mutations) {
|
||||
ccv = counter_cell_view(ac);
|
||||
BOOST_REQUIRE_EQUAL(ccv.total_value(), -105);
|
||||
|
||||
ac = get_static_counter_cell(m);
|
||||
BOOST_REQUIRE(ac.is_live());
|
||||
ccv = counter_cell_view(ac);
|
||||
BOOST_REQUIRE_EQUAL(ccv.total_value(), 9);
|
||||
|
||||
m = mutation(s, m1.decorated_key(), m1.partition().difference(s, m3.partition()));
|
||||
BOOST_REQUIRE_EQUAL(m.partition().clustered_rows().calculate_size(), 0);
|
||||
BOOST_REQUIRE(m.partition().static_row().empty());
|
||||
|
||||
m = mutation(s, m1.decorated_key(), m3.partition().difference(s, m1.partition()));
|
||||
ac = get_counter_cell(m);
|
||||
BOOST_REQUIRE(!ac.is_live());
|
||||
|
||||
ac = get_static_counter_cell(m);
|
||||
BOOST_REQUIRE(!ac.is_live());
|
||||
|
||||
// Freeze
|
||||
|
||||
auto fm1 = freeze(m1);
|
||||
@@ -206,18 +255,24 @@ SEASTAR_TEST_CASE(test_counter_update_mutations) {
|
||||
auto pk = partition_key::from_single_value(*s, int32_type->decompose(0));
|
||||
auto ck = clustering_key::from_single_value(*s, int32_type->decompose(0));
|
||||
auto& col = *s->get_column_definition(utf8_type->decompose(sstring("c1")));
|
||||
auto& scol = *s->get_column_definition(utf8_type->decompose(sstring("s1")));
|
||||
|
||||
auto c1 = atomic_cell::make_live_counter_update(api::new_timestamp(), long_type->decompose(int64_t(5)));
|
||||
auto s1 = atomic_cell::make_live_counter_update(api::new_timestamp(), long_type->decompose(int64_t(4)));
|
||||
mutation m1(pk, s);
|
||||
m1.set_clustered_cell(ck, col, c1);
|
||||
m1.set_static_cell(scol, s1);
|
||||
|
||||
auto c2 = atomic_cell::make_live_counter_update(api::new_timestamp(), long_type->decompose(int64_t(9)));
|
||||
auto s2 = atomic_cell::make_live_counter_update(api::new_timestamp(), long_type->decompose(int64_t(8)));
|
||||
mutation m2(pk, s);
|
||||
m2.set_clustered_cell(ck, col, c2);
|
||||
m2.set_static_cell(scol, s2);
|
||||
|
||||
auto c3 = atomic_cell::make_dead(api::new_timestamp() / 2, gc_clock::now());
|
||||
mutation m3(pk, s);
|
||||
m3.set_clustered_cell(ck, col, c3);
|
||||
m3.set_static_cell(scol, c3);
|
||||
|
||||
auto counter_update_value = [&] (atomic_cell_view acv) {
|
||||
return value_cast<int64_t>(long_type->deserialize_value(acv.value()));
|
||||
@@ -230,9 +285,86 @@ SEASTAR_TEST_CASE(test_counter_update_mutations) {
|
||||
BOOST_REQUIRE(ac.is_counter_update());
|
||||
BOOST_REQUIRE_EQUAL(counter_update_value(ac), 14);
|
||||
|
||||
ac = get_static_counter_cell(m12);
|
||||
BOOST_REQUIRE(ac.is_live());
|
||||
BOOST_REQUIRE(ac.is_counter_update());
|
||||
BOOST_REQUIRE_EQUAL(counter_update_value(ac), 12);
|
||||
|
||||
auto m123 = m12;
|
||||
m123.apply(m3);
|
||||
ac = get_counter_cell(m123);
|
||||
BOOST_REQUIRE(!ac.is_live());
|
||||
|
||||
ac = get_static_counter_cell(m123);
|
||||
BOOST_REQUIRE(!ac.is_live());
|
||||
});
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_transfer_updates_to_shards) {
|
||||
return seastar::async([] {
|
||||
storage_service_for_tests ssft;
|
||||
|
||||
auto s = get_schema();
|
||||
|
||||
auto pk = partition_key::from_single_value(*s, int32_type->decompose(0));
|
||||
auto ck = clustering_key::from_single_value(*s, int32_type->decompose(0));
|
||||
auto& col = *s->get_column_definition(utf8_type->decompose(sstring("c1")));
|
||||
auto& scol = *s->get_column_definition(utf8_type->decompose(sstring("s1")));
|
||||
|
||||
auto c1 = atomic_cell::make_live_counter_update(api::new_timestamp(), long_type->decompose(int64_t(5)));
|
||||
auto s1 = atomic_cell::make_live_counter_update(api::new_timestamp(), long_type->decompose(int64_t(4)));
|
||||
mutation m1(pk, s);
|
||||
m1.set_clustered_cell(ck, col, c1);
|
||||
m1.set_static_cell(scol, s1);
|
||||
|
||||
auto c2 = atomic_cell::make_live_counter_update(api::new_timestamp(), long_type->decompose(int64_t(9)));
|
||||
auto s2 = atomic_cell::make_live_counter_update(api::new_timestamp(), long_type->decompose(int64_t(8)));
|
||||
mutation m2(pk, s);
|
||||
m2.set_clustered_cell(ck, col, c2);
|
||||
m2.set_static_cell(scol, s2);
|
||||
|
||||
auto c3 = atomic_cell::make_dead(api::new_timestamp() / 2, gc_clock::now());
|
||||
mutation m3(pk, s);
|
||||
m3.set_clustered_cell(ck, col, c3);
|
||||
m3.set_static_cell(scol, c3);
|
||||
|
||||
auto m0 = m1;
|
||||
transform_counter_updates_to_shards(m0, nullptr, 0);
|
||||
|
||||
auto empty = mutation(pk, s);
|
||||
auto m = m1;
|
||||
transform_counter_updates_to_shards(m, &empty, 0);
|
||||
BOOST_REQUIRE_EQUAL(m, m0);
|
||||
|
||||
auto ac = get_counter_cell(m);
|
||||
BOOST_REQUIRE(ac.is_live());
|
||||
auto ccv = counter_cell_view(ac);
|
||||
BOOST_REQUIRE_EQUAL(ccv.total_value(), 5);
|
||||
|
||||
ac = get_static_counter_cell(m);
|
||||
BOOST_REQUIRE(ac.is_live());
|
||||
ccv = counter_cell_view(ac);
|
||||
BOOST_REQUIRE_EQUAL(ccv.total_value(), 4);
|
||||
|
||||
m = m2;
|
||||
transform_counter_updates_to_shards(m, &m0, 0);
|
||||
|
||||
ac = get_counter_cell(m);
|
||||
BOOST_REQUIRE(ac.is_live());
|
||||
ccv = counter_cell_view(ac);
|
||||
BOOST_REQUIRE_EQUAL(ccv.total_value(), 14);
|
||||
|
||||
ac = get_static_counter_cell(m);
|
||||
BOOST_REQUIRE(ac.is_live());
|
||||
ccv = counter_cell_view(ac);
|
||||
BOOST_REQUIRE_EQUAL(ccv.total_value(), 12);
|
||||
|
||||
m = m3;
|
||||
transform_counter_updates_to_shards(m, &m0, 0);
|
||||
ac = get_counter_cell(m);
|
||||
BOOST_REQUIRE(!ac.is_live());
|
||||
ac = get_static_counter_cell(m);
|
||||
BOOST_REQUIRE(!ac.is_live());
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@@ -279,7 +279,7 @@ public:
|
||||
auto stop_ms = defer([&ms] { ms.stop().get(); });
|
||||
|
||||
auto& ss = service::get_storage_service();
|
||||
ss.start(std::ref(*db));
|
||||
ss.start(std::ref(*db)).get();
|
||||
auto stop_storage_service = defer([&ss] { ss.stop().get(); });
|
||||
|
||||
db->start(std::move(*cfg)).get();
|
||||
|
||||
@@ -29,7 +29,9 @@
|
||||
#include <seastar/core/timer.hh>
|
||||
#include <seastar/core/sleep.hh>
|
||||
#include <seastar/tests/test-utils.hh>
|
||||
#include <seastar/util/defer.hh>
|
||||
#include <deque>
|
||||
#include "utils/phased_barrier.hh"
|
||||
|
||||
#include "utils/logalloc.hh"
|
||||
#include "utils/managed_ref.hh"
|
||||
@@ -102,7 +104,7 @@ SEASTAR_TEST_CASE(test_compaction_with_multiple_regions) {
|
||||
std::vector<managed_ref<int>> allocated1;
|
||||
std::vector<managed_ref<int>> allocated2;
|
||||
|
||||
int count = 32 * 1024 * 4;
|
||||
int count = 32 * 1024 * 4 * 2;
|
||||
|
||||
with_allocator(reg1.allocator(), [&] {
|
||||
for (int i = 0; i < count; i++) {
|
||||
@@ -529,11 +531,7 @@ inline void quiesce(FutureType&& fut) {
|
||||
// a request may be broken into many continuations. While we could just yield many times, the
|
||||
// exact amount needed to guarantee execution would be dependent on the internals of the
|
||||
// implementation, we want to avoid that.
|
||||
timer<> tmr;
|
||||
tmr.set_callback([] { BOOST_FAIL("The future we were waiting for took too long to get ready"); });
|
||||
tmr.arm(2s);
|
||||
fut.get();
|
||||
tmr.cancel();
|
||||
with_timeout(lowres_clock::now() + 2s, std::move(fut)).get();
|
||||
}
|
||||
|
||||
// Simple RAII structure that wraps around a region_group
|
||||
@@ -859,15 +857,22 @@ class test_reclaimer: public region_group_reclaimer {
|
||||
region_group _rg;
|
||||
std::vector<size_t> _reclaim_sizes;
|
||||
bool _shutdown = false;
|
||||
shared_promise<> _unleash_reclaimer;
|
||||
seastar::gate _reclaimers_done;
|
||||
public:
|
||||
virtual void start_reclaiming() override {
|
||||
while (this->under_pressure()) {
|
||||
size_t reclaimed = test_async_reclaim_region::from_region(_rg.get_largest_region()).evict();
|
||||
_result_accumulator->_reclaim_sizes.push_back(reclaimed);
|
||||
}
|
||||
virtual void start_reclaiming() noexcept override {
|
||||
with_gate(_reclaimers_done, [this] {
|
||||
return _unleash_reclaimer.get_shared_future().then([this] {
|
||||
while (this->under_pressure()) {
|
||||
size_t reclaimed = test_async_reclaim_region::from_region(_rg.get_largest_region()).evict();
|
||||
_result_accumulator->_reclaim_sizes.push_back(reclaimed);
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
~test_reclaimer() {
|
||||
_reclaimers_done.close().get();
|
||||
_rg.shutdown().get();
|
||||
}
|
||||
|
||||
@@ -881,6 +886,10 @@ public:
|
||||
|
||||
test_reclaimer(size_t threshold) : region_group_reclaimer(threshold), _result_accumulator(this), _rg(*this) {}
|
||||
test_reclaimer(test_reclaimer& parent, size_t threshold) : region_group_reclaimer(threshold), _result_accumulator(&parent), _rg(&parent._rg, *this) {}
|
||||
|
||||
void unleash() {
|
||||
_unleash_reclaimer.set_value();
|
||||
}
|
||||
};
|
||||
|
||||
SEASTAR_TEST_CASE(test_region_groups_basic_throttling_simple_active_reclaim) {
|
||||
@@ -888,6 +897,7 @@ SEASTAR_TEST_CASE(test_region_groups_basic_throttling_simple_active_reclaim) {
|
||||
// allocate a single region to exhaustion, and make sure active reclaim is activated.
|
||||
test_reclaimer simple(logalloc::segment_size);
|
||||
test_async_reclaim_region simple_region(simple.rg(), logalloc::segment_size);
|
||||
simple.unleash();
|
||||
|
||||
// Can't run this function until we have reclaimed something
|
||||
auto fut = simple.rg().run_when_memory_available([] {});
|
||||
@@ -912,6 +922,7 @@ SEASTAR_TEST_CASE(test_region_groups_basic_throttling_active_reclaim_worst_offen
|
||||
test_async_reclaim_region small_region(simple.rg(), logalloc::segment_size);
|
||||
test_async_reclaim_region medium_region(simple.rg(), 2 * logalloc::segment_size);
|
||||
test_async_reclaim_region big_region(simple.rg(), 3 * logalloc::segment_size);
|
||||
simple.unleash();
|
||||
|
||||
// Can't run this function until we have reclaimed
|
||||
auto fut = simple.rg().run_when_memory_available([&simple] {
|
||||
@@ -941,6 +952,9 @@ SEASTAR_TEST_CASE(test_region_groups_basic_throttling_active_reclaim_leaf_offend
|
||||
test_async_reclaim_region small_region(small_leaf.rg(), logalloc::segment_size);
|
||||
test_async_reclaim_region medium_region(root.rg(), 2 * logalloc::segment_size);
|
||||
test_async_reclaim_region big_region(large_leaf.rg(), 3 * logalloc::segment_size);
|
||||
root.unleash();
|
||||
large_leaf.unleash();
|
||||
small_leaf.unleash();
|
||||
|
||||
// Can't run this function until we have reclaimed. Try at the root, and we'll make sure
|
||||
// that the leaves are forced correctly.
|
||||
@@ -967,6 +981,8 @@ SEASTAR_TEST_CASE(test_region_groups_basic_throttling_active_reclaim_ancestor_bl
|
||||
test_reclaimer leaf(root, logalloc::segment_size);
|
||||
|
||||
test_async_reclaim_region root_region(root.rg(), logalloc::segment_size);
|
||||
root.unleash();
|
||||
leaf.unleash();
|
||||
|
||||
// Can't run this function until we have reclaimed. Try at the leaf, and we'll make sure
|
||||
// that the root reclaims
|
||||
@@ -992,6 +1008,8 @@ SEASTAR_TEST_CASE(test_region_groups_basic_throttling_active_reclaim_big_region_
|
||||
test_async_reclaim_region root_region(root.rg(), 4 * logalloc::segment_size);
|
||||
test_async_reclaim_region big_leaf_region(leaf.rg(), 3 * logalloc::segment_size);
|
||||
test_async_reclaim_region small_leaf_region(leaf.rg(), 2 * logalloc::segment_size);
|
||||
root.unleash();
|
||||
leaf.unleash();
|
||||
|
||||
auto fut = root.rg().run_when_memory_available([&root] {
|
||||
BOOST_REQUIRE_EQUAL(root.reclaim_sizes().size(), 3);
|
||||
@@ -1018,6 +1036,8 @@ SEASTAR_TEST_CASE(test_region_groups_basic_throttling_active_reclaim_no_double_r
|
||||
test_reclaimer leaf(root, logalloc::segment_size);
|
||||
|
||||
test_async_reclaim_region leaf_region(leaf.rg(), logalloc::segment_size);
|
||||
root.unleash();
|
||||
leaf.unleash();
|
||||
|
||||
auto fut_root = root.rg().run_when_memory_available([&root] {
|
||||
BOOST_REQUIRE_EQUAL(root.reclaim_sizes().size(), 1);
|
||||
@@ -1037,3 +1057,117 @@ SEASTAR_TEST_CASE(test_region_groups_basic_throttling_active_reclaim_no_double_r
|
||||
BOOST_REQUIRE_EQUAL(root.reclaim_sizes()[0], logalloc::segment_size);
|
||||
});
|
||||
}
|
||||
|
||||
// Reproduces issue #2021
|
||||
SEASTAR_TEST_CASE(test_no_crash_when_a_lot_of_requests_released_which_change_region_group_size) {
|
||||
return seastar::async([] {
|
||||
#ifndef DEFAULT_ALLOCATOR // Because we need memory::stats().free_memory();
|
||||
logging::logger_registry().set_logger_level("lsa", seastar::log_level::debug);
|
||||
|
||||
auto free_space = memory::stats().free_memory();
|
||||
size_t threshold = size_t(0.75 * free_space);
|
||||
region_group_reclaimer recl(threshold, threshold);
|
||||
region_group gr(recl);
|
||||
auto close_gr = defer([&gr] { gr.shutdown().get(); });
|
||||
region r(gr);
|
||||
|
||||
with_allocator(r.allocator(), [&] {
|
||||
std::vector<managed_bytes> objs;
|
||||
|
||||
r.make_evictable([&] {
|
||||
if (objs.empty()) {
|
||||
return memory::reclaiming_result::reclaimed_nothing;
|
||||
}
|
||||
with_allocator(r.allocator(), [&] {
|
||||
objs.pop_back();
|
||||
});
|
||||
return memory::reclaiming_result::reclaimed_something;
|
||||
});
|
||||
|
||||
auto fill_to_pressure = [&] {
|
||||
while (!recl.under_pressure()) {
|
||||
objs.emplace_back(managed_bytes(managed_bytes::initialized_later(), 1024));
|
||||
}
|
||||
};
|
||||
|
||||
utils::phased_barrier request_barrier;
|
||||
auto wait_for_requests = defer([&] { request_barrier.advance_and_await().get(); });
|
||||
|
||||
for (int i = 0; i < 1000000; ++i) {
|
||||
fill_to_pressure();
|
||||
future<> f = gr.run_when_memory_available([&, op = request_barrier.start()] {
|
||||
// Trigger group size change (Refs issue #2021)
|
||||
gr.update(-10);
|
||||
gr.update(+10);
|
||||
});
|
||||
BOOST_REQUIRE(!f.available());
|
||||
}
|
||||
|
||||
// Release
|
||||
while (recl.under_pressure()) {
|
||||
objs.pop_back();
|
||||
}
|
||||
});
|
||||
#endif
|
||||
});
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_reclaiming_runs_as_long_as_there_is_soft_pressure) {
|
||||
return seastar::async([] {
|
||||
size_t hard_threshold = logalloc::segment_size * 8;
|
||||
size_t soft_threshold = hard_threshold / 2;
|
||||
|
||||
class reclaimer : public region_group_reclaimer {
|
||||
bool _reclaim = false;
|
||||
protected:
|
||||
void start_reclaiming() noexcept override {
|
||||
_reclaim = true;
|
||||
}
|
||||
|
||||
void stop_reclaiming() noexcept override {
|
||||
_reclaim = false;
|
||||
}
|
||||
public:
|
||||
reclaimer(size_t hard_threshold, size_t soft_threshold)
|
||||
: region_group_reclaimer(hard_threshold, soft_threshold)
|
||||
{ }
|
||||
bool reclaiming() const { return _reclaim; };
|
||||
};
|
||||
|
||||
reclaimer recl(hard_threshold, soft_threshold);
|
||||
region_group gr(recl);
|
||||
auto close_gr = defer([&gr] { gr.shutdown().get(); });
|
||||
region r(gr);
|
||||
|
||||
with_allocator(r.allocator(), [&] {
|
||||
std::vector<managed_bytes> objs;
|
||||
|
||||
BOOST_REQUIRE(!recl.reclaiming());
|
||||
|
||||
while (!recl.over_soft_limit()) {
|
||||
objs.emplace_back(managed_bytes(managed_bytes::initialized_later(), logalloc::segment_size));
|
||||
}
|
||||
|
||||
BOOST_REQUIRE(recl.reclaiming());
|
||||
|
||||
while (!recl.under_pressure()) {
|
||||
objs.emplace_back(managed_bytes(managed_bytes::initialized_later(), logalloc::segment_size));
|
||||
}
|
||||
|
||||
BOOST_REQUIRE(recl.reclaiming());
|
||||
|
||||
while (recl.under_pressure()) {
|
||||
objs.pop_back();
|
||||
}
|
||||
|
||||
BOOST_REQUIRE(recl.over_soft_limit());
|
||||
BOOST_REQUIRE(recl.reclaiming());
|
||||
|
||||
while (recl.over_soft_limit()) {
|
||||
objs.pop_back();
|
||||
}
|
||||
|
||||
BOOST_REQUIRE(!recl.reclaiming());
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
@@ -76,13 +76,16 @@ int main(int argc, char** argv) {
|
||||
});
|
||||
|
||||
uint64_t counter = 0;
|
||||
logalloc::allocating_section alloc_sect;
|
||||
alloc_sect.set_lsa_reserve(0);
|
||||
alloc_sect.set_std_reserve(0);
|
||||
|
||||
while (counter < obj_count) {
|
||||
auto obj = managed_bytes(managed_bytes::initialized_later(), obj_size);
|
||||
{
|
||||
alloc_sect(r, [&] {
|
||||
auto obj = managed_bytes(managed_bytes::initialized_later(), obj_size);
|
||||
logalloc::reclaim_lock l(r);
|
||||
refs.push_back(std::move(obj));
|
||||
}
|
||||
});
|
||||
|
||||
++counter;
|
||||
|
||||
|
||||
@@ -191,7 +191,6 @@ static mutation_sets generate_mutation_sets() {
|
||||
.with_column("ck_col_2", bytes_type, column_kind::clustering_key)
|
||||
.with_column("regular_col_1", bytes_type)
|
||||
.with_column("regular_col_2", bytes_type)
|
||||
.with_column("regular_counter_col_1", counter_type)
|
||||
.with_column("static_col_1", bytes_type, column_kind::static_column)
|
||||
.with_column("static_col_2", bytes_type, column_kind::static_column);
|
||||
|
||||
@@ -300,9 +299,20 @@ static mutation_sets generate_mutation_sets() {
|
||||
}
|
||||
}
|
||||
|
||||
static constexpr auto rmg_iterations = 10;
|
||||
|
||||
{
|
||||
random_mutation_generator gen;
|
||||
for (int i = 0; i < 10; ++i) {
|
||||
random_mutation_generator gen(random_mutation_generator::generate_counters::no);
|
||||
for (int i = 0; i < rmg_iterations; ++i) {
|
||||
auto m = gen();
|
||||
result.unequal.emplace_back(mutations{m, gen()}); // collision unlikely
|
||||
result.equal.emplace_back(mutations{m, m});
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
random_mutation_generator gen(random_mutation_generator::generate_counters::yes);
|
||||
for (int i = 0; i < rmg_iterations; ++i) {
|
||||
auto m = gen();
|
||||
result.unequal.emplace_back(mutations{m, gen()}); // collision unlikely
|
||||
result.equal.emplace_back(mutations{m, m});
|
||||
@@ -364,6 +374,7 @@ bytes make_blob(size_t blob_size) {
|
||||
|
||||
class random_mutation_generator::impl {
|
||||
friend class random_mutation_generator;
|
||||
generate_counters _generate_counters;
|
||||
const size_t _external_blob_size = 128; // Should be enough to force use of external bytes storage
|
||||
const column_id column_count = row::max_vector_size * 2;
|
||||
std::mt19937 _gen;
|
||||
@@ -375,30 +386,33 @@ class random_mutation_generator::impl {
|
||||
return gc_clock::time_point() + std::chrono::seconds(dist(gen));
|
||||
}
|
||||
|
||||
public:
|
||||
schema_ptr make_schema() {
|
||||
schema_ptr do_make_schema(data_type type) {
|
||||
auto builder = schema_builder("ks", "cf")
|
||||
.with_column("pk", bytes_type, column_kind::partition_key)
|
||||
.with_column("ck1", bytes_type, column_kind::clustering_key)
|
||||
.with_column("ck2", bytes_type, column_kind::clustering_key)
|
||||
.with_column("c1", counter_type);
|
||||
.with_column("ck2", bytes_type, column_kind::clustering_key);
|
||||
|
||||
// Create enough columns so that row can overflow its vector storage
|
||||
for (column_id i = 0; i < column_count; ++i) {
|
||||
{
|
||||
auto column_name = sprint("v%d", i);
|
||||
builder.with_column(to_bytes(column_name), bytes_type, column_kind::regular_column);
|
||||
builder.with_column(to_bytes(column_name), type, column_kind::regular_column);
|
||||
}
|
||||
{
|
||||
auto column_name = sprint("s%d", i);
|
||||
builder.with_column(to_bytes(column_name), bytes_type, column_kind::static_column);
|
||||
builder.with_column(to_bytes(column_name), type, column_kind::static_column);
|
||||
}
|
||||
}
|
||||
|
||||
return builder.build();
|
||||
}
|
||||
|
||||
impl() {
|
||||
schema_ptr make_schema() {
|
||||
return _generate_counters ? do_make_schema(counter_type)
|
||||
: do_make_schema(bytes_type);
|
||||
}
|
||||
public:
|
||||
explicit impl(generate_counters counters) : _generate_counters(counters) {
|
||||
_schema = make_schema();
|
||||
|
||||
for (int i = 0; i < 1024; ++i) {
|
||||
@@ -424,8 +438,6 @@ public:
|
||||
auto pkey = partition_key::from_single_value(*_schema, _blobs[0]);
|
||||
mutation m(pkey, _schema);
|
||||
|
||||
auto& counter_column = *_schema->get_column_definition(utf8_type->decompose(sstring("c1")));
|
||||
|
||||
std::map<counter_id, std::set<int64_t>> counter_used_clock_values;
|
||||
std::vector<counter_id> counter_ids;
|
||||
std::generate_n(std::back_inserter(counter_ids), 8, counter_id::generate_random);
|
||||
@@ -459,16 +471,16 @@ public:
|
||||
auto columns_to_set = column_count_dist(_gen);
|
||||
for (column_id i = 0; i < columns_to_set; ++i) {
|
||||
auto cid = column_id_dist(_gen);
|
||||
if (kind == column_kind::regular_column && cid == counter_column.id) {
|
||||
auto cell = bool_dist(_gen)
|
||||
? random_counter_cell()
|
||||
: atomic_cell::make_dead(timestamp_dist(_gen), expiry_dist(_gen));
|
||||
r.apply(_schema->column_at(kind, cid), std::move(cell));
|
||||
continue;
|
||||
}
|
||||
auto get_live_cell = [&] {
|
||||
if (_generate_counters) {
|
||||
return random_counter_cell();
|
||||
} else {
|
||||
return atomic_cell::make_live(timestamp_dist(_gen), _blobs[value_blob_index_dist(_gen)]);
|
||||
}
|
||||
};
|
||||
// FIXME: generate expiring cells
|
||||
auto cell = bool_dist(_gen)
|
||||
? atomic_cell::make_live(timestamp_dist(_gen), _blobs[value_blob_index_dist(_gen)])
|
||||
? get_live_cell()
|
||||
: atomic_cell::make_dead(timestamp_dist(_gen), expiry_dist(_gen));
|
||||
r.apply(_schema->column_at(kind, cid), std::move(cell));
|
||||
}
|
||||
@@ -529,8 +541,8 @@ public:
|
||||
|
||||
random_mutation_generator::~random_mutation_generator() {}
|
||||
|
||||
random_mutation_generator::random_mutation_generator()
|
||||
: _impl(std::make_unique<random_mutation_generator::impl>())
|
||||
random_mutation_generator::random_mutation_generator(generate_counters counters)
|
||||
: _impl(std::make_unique<random_mutation_generator::impl>(counters))
|
||||
{ }
|
||||
|
||||
mutation random_mutation_generator::operator()() {
|
||||
|
||||
@@ -37,11 +37,19 @@ void for_each_mutation_pair(std::function<void(const mutation&, const mutation&,
|
||||
// Calls the provided function on mutations. Is supposed to exercise as many differences as possible.
|
||||
void for_each_mutation(std::function<void(const mutation&)>);
|
||||
|
||||
// Returns true if mutations in schema s1 can be upgraded to s2.
|
||||
inline bool can_upgrade_schema(schema_ptr from, schema_ptr to) {
|
||||
return from->is_counter() == to->is_counter();
|
||||
}
|
||||
|
||||
class random_mutation_generator {
|
||||
class impl;
|
||||
std::unique_ptr<impl> _impl;
|
||||
public:
|
||||
random_mutation_generator();
|
||||
struct generate_counters_tag { };
|
||||
using generate_counters = bool_class<generate_counters_tag>;
|
||||
|
||||
explicit random_mutation_generator(generate_counters);
|
||||
~random_mutation_generator();
|
||||
mutation operator()();
|
||||
schema_ptr schema() const;
|
||||
|
||||
@@ -795,8 +795,7 @@ public:
|
||||
};
|
||||
|
||||
SEASTAR_TEST_CASE(test_apply_is_atomic_in_case_of_allocation_failures) {
|
||||
random_mutation_generator gen;
|
||||
|
||||
auto do_test = [] (auto&& gen) {
|
||||
failure_injecting_allocation_strategy alloc(standard_allocator());
|
||||
with_allocator(alloc, [&] {
|
||||
auto target = gen();
|
||||
@@ -857,7 +856,10 @@ SEASTAR_TEST_CASE(test_apply_is_atomic_in_case_of_allocation_failures) {
|
||||
}
|
||||
}
|
||||
});
|
||||
};
|
||||
|
||||
do_test(random_mutation_generator(random_mutation_generator::generate_counters::no));
|
||||
do_test(random_mutation_generator(random_mutation_generator::generate_counters::yes));
|
||||
return make_ready_future<>();
|
||||
}
|
||||
|
||||
|
||||
@@ -22,6 +22,9 @@
|
||||
#define BOOST_TEST_MODULE core
|
||||
|
||||
#include <boost/test/unit_test.hpp>
|
||||
#include <boost/algorithm/cxx11/all_of.hpp>
|
||||
#include <boost/range/algorithm/sort.hpp>
|
||||
#include <boost/range/algorithm/adjacent_find.hpp>
|
||||
|
||||
#include "dht/i_partitioner.hh"
|
||||
#include "dht/murmur3_partitioner.hh"
|
||||
@@ -30,12 +33,23 @@
|
||||
#include "schema.hh"
|
||||
#include "types.hh"
|
||||
#include "schema_builder.hh"
|
||||
#include "utils/div_ceil.hh"
|
||||
|
||||
#include "disk-error-handler.hh"
|
||||
|
||||
thread_local disk_error_signal_type commit_error;
|
||||
thread_local disk_error_signal_type general_disk_error;
|
||||
|
||||
|
||||
template <typename... Args>
|
||||
static
|
||||
void
|
||||
debug(Args&&... args) {
|
||||
if (false) {
|
||||
print(std::forward<Args>(args)...);
|
||||
}
|
||||
}
|
||||
|
||||
static dht::token token_from_long(uint64_t value) {
|
||||
auto t = net::hton(value);
|
||||
bytes b(bytes::initialized_later(), 8);
|
||||
@@ -482,10 +496,11 @@ void test_partitioner_sharding(const dht::i_partitioner& part, unsigned shards,
|
||||
BOOST_REQUIRE_EQUAL(part.shard_of(lim), i % shards);
|
||||
if (i != 0) {
|
||||
BOOST_REQUIRE_EQUAL(part.shard_of(prev_token(part, lim)), (i - 1) % shards);
|
||||
BOOST_REQUIRE(part.is_equal(lim, part.token_for_next_shard(prev_token(part, lim))));
|
||||
BOOST_REQUIRE(part.is_equal(lim, part.token_for_next_shard(prev_token(part, lim), i % shards)));
|
||||
}
|
||||
if (i != (shards << ignorebits) - 1) {
|
||||
BOOST_REQUIRE_EQUAL(part.shard_of(part.token_for_next_shard(lim)), (i + 1) % shards);
|
||||
auto next_shard = (i + 1) % shards;
|
||||
BOOST_REQUIRE_EQUAL(part.shard_of(part.token_for_next_shard(lim, next_shard)), next_shard);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -620,3 +635,219 @@ BOOST_AUTO_TEST_CASE(test_byte_ordered_partitioner) {
|
||||
test_partitioner_sharding(bop1s, 1, bop1s_shard_limits, prev_token);
|
||||
}
|
||||
|
||||
|
||||
static
|
||||
dht::partition_range
|
||||
normalize(dht::partition_range pr) {
|
||||
auto start = pr.start();
|
||||
if (start && start->value().token() == dht::minimum_token()) {
|
||||
start = stdx::nullopt;
|
||||
}
|
||||
auto end = pr.end();
|
||||
if (end && end->value().token() == dht::maximum_token()) {
|
||||
end = stdx::nullopt;
|
||||
}
|
||||
return dht::partition_range(start, end);
|
||||
};
|
||||
|
||||
static
|
||||
void
|
||||
test_exponential_sharder(const dht::i_partitioner& part, const schema& s, const dht::partition_range& pr) {
|
||||
|
||||
dht::set_global_partitioner(part.name()); // so we can print tokens, also ring_position_comparator is not global_partitioner() clean
|
||||
|
||||
// Step 1: run the exponential sharder fully, and collect all results
|
||||
|
||||
debug("input range: %s\n", pr);
|
||||
auto results = std::vector<dht::ring_position_exponential_sharder_result>();
|
||||
auto sharder = dht::ring_position_exponential_sharder(part, pr);
|
||||
auto partial_result = sharder.next(s);
|
||||
while (partial_result) {
|
||||
results.push_back(std::move(*partial_result));
|
||||
partial_result = sharder.next(s);
|
||||
}
|
||||
|
||||
// Step 2: "de-exponentialize" the result by fragmenting large ranges
|
||||
|
||||
struct fragmented_sharder_result {
|
||||
bool inorder;
|
||||
struct shard_result {
|
||||
shard_id shard;
|
||||
std::vector<dht::partition_range> ranges;
|
||||
};
|
||||
std::vector<shard_result> shards;
|
||||
};
|
||||
auto fragmented_results = std::vector<fragmented_sharder_result>();
|
||||
for (auto&& partial_result : results) {
|
||||
auto fsr = fragmented_sharder_result();
|
||||
fsr.inorder = partial_result.inorder;
|
||||
debug("looking at partial result\n");
|
||||
for (auto&& per_shard_range : partial_result.per_shard_ranges) {
|
||||
debug("partial_result: looking at %s (shard %d)\n", per_shard_range.ring_range, per_shard_range.shard);
|
||||
auto sr = fragmented_sharder_result::shard_result();
|
||||
sr.shard = per_shard_range.shard;
|
||||
auto sharder = dht::ring_position_range_sharder(part, per_shard_range.ring_range);
|
||||
auto next = sharder.next(s);
|
||||
while (next) {
|
||||
debug("seeing: shard %d frag %s\n", next->shard, next->ring_range);
|
||||
if (next->shard == sr.shard) {
|
||||
debug("fragmented to %d\n", next->ring_range);
|
||||
sr.ranges.push_back(std::move(next->ring_range));
|
||||
}
|
||||
next = sharder.next(s);
|
||||
}
|
||||
fsr.shards.push_back(std::move(sr));
|
||||
}
|
||||
fragmented_results.push_back(std::move(fsr));
|
||||
}
|
||||
|
||||
// Step 3: collect all fragmented ranges
|
||||
|
||||
auto all_fragments = std::vector<dht::partition_range>();
|
||||
for (auto&& fr : fragmented_results) {
|
||||
for (auto&& sr : fr.shards) {
|
||||
for (auto&& f : sr.ranges) {
|
||||
all_fragments.push_back(f);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Step 4: verify no overlaps
|
||||
|
||||
bool no_overlaps = true;
|
||||
if (all_fragments.size() > 1) {
|
||||
for (auto i : boost::irange<size_t>(1, all_fragments.size() - 1)) {
|
||||
for (auto j : boost::irange<size_t>(0, i)) {
|
||||
no_overlaps &= !all_fragments[i].overlaps(all_fragments[j], dht::ring_position_comparator(s));
|
||||
}
|
||||
}
|
||||
}
|
||||
BOOST_REQUIRE(no_overlaps); // We OOM if BOOST_REQUIRE() is run in the inner loop
|
||||
|
||||
// Step 5: verify all fragments are contiguous
|
||||
|
||||
auto rplc = dht::ring_position_less_comparator(s);
|
||||
auto rptc = dht::ring_position_comparator(s);
|
||||
boost::sort(all_fragments, [&] (const dht::partition_range& a, const dht::partition_range b) {
|
||||
if (!a.start() || !b.start()) {
|
||||
return unsigned(bool(a.start())) < unsigned(bool(b.start()));
|
||||
} else {
|
||||
return rplc(a.start()->value(), b.start()->value());
|
||||
}
|
||||
});
|
||||
auto not_adjacent = [&] (const dht::partition_range& a, const dht::partition_range b) {
|
||||
return !a.end() || !b.start() || rptc(a.end()->value(), b.start()->value()) != 0;
|
||||
};
|
||||
BOOST_REQUIRE(boost::adjacent_find(all_fragments, not_adjacent) == all_fragments.end());
|
||||
|
||||
// Step 6: verify inorder is accurate; allow a false negative
|
||||
|
||||
for (auto&& fsr : fragmented_results) {
|
||||
auto has_one_fragment = [] (const fragmented_sharder_result::shard_result& sr) {
|
||||
return sr.ranges.size() <= 1; // the sharder may return a range that does not intersect the shard
|
||||
};
|
||||
BOOST_REQUIRE(!fsr.inorder || boost::algorithm::all_of(fsr.shards, has_one_fragment));
|
||||
}
|
||||
|
||||
// Step 7: verify that the fragmented range matches the input range (since the fragments are
|
||||
// contiguous, we need only test the edges).
|
||||
|
||||
auto reconstructed = normalize(dht::partition_range(all_fragments.front().start(), all_fragments.back().end()));
|
||||
auto original = normalize(pr);
|
||||
debug("original %s reconstructed %s\n", original, reconstructed);
|
||||
BOOST_REQUIRE(original.contains(reconstructed, rptc) && reconstructed.contains(original, rptc));
|
||||
|
||||
// Step 8: verify exponentiality
|
||||
debug("sizes %d %d\n", results.size(), 1 + log2ceil(div_ceil(all_fragments.size(), part.shard_count())) + log2ceil(part.shard_count()));
|
||||
BOOST_REQUIRE(results.size() <= 1 + log2ceil(div_ceil(all_fragments.size(), part.shard_count())) + log2ceil(part.shard_count()));
|
||||
}
|
||||
|
||||
static
|
||||
void
|
||||
test_something_with_some_interesting_ranges_and_partitioners(std::function<void (const dht::i_partitioner&, const schema&, const dht::partition_range&)> func_to_test) {
|
||||
auto s = schema_builder("ks", "cf")
|
||||
.with_column("c1", int32_type, column_kind::partition_key)
|
||||
.with_column("c2", int32_type, column_kind::partition_key)
|
||||
.with_column("v", int32_type)
|
||||
.build();
|
||||
auto some_murmur3_partitioners = {
|
||||
dht::murmur3_partitioner(1, 0),
|
||||
dht::murmur3_partitioner(7, 4),
|
||||
dht::murmur3_partitioner(4, 0),
|
||||
dht::murmur3_partitioner(32, 8), // More, and we OOM since memory isn't configured
|
||||
};
|
||||
auto some_random_partitioners = {
|
||||
dht::random_partitioner(1),
|
||||
dht::random_partitioner(3),
|
||||
};
|
||||
auto some_byte_ordered_partitioners = {
|
||||
dht::byte_ordered_partitioner(1),
|
||||
dht::byte_ordered_partitioner(7),
|
||||
};
|
||||
auto t1 = token_from_long(int64_t(-0x7fff'ffff'ffff'fffe));
|
||||
auto t2 = token_from_long(int64_t(-1));
|
||||
auto t3 = token_from_long(int64_t(1));
|
||||
auto t4 = token_from_long(int64_t(0x7fff'ffff'ffff'fffe));
|
||||
auto make_bound = [] (dht::ring_position rp) {
|
||||
return stdx::make_optional(range_bound<dht::ring_position>(std::move(rp)));
|
||||
};
|
||||
auto some_murmur3_ranges = {
|
||||
dht::partition_range::make_open_ended_both_sides(),
|
||||
dht::partition_range::make_starting_with(dht::ring_position::starting_at(t1)),
|
||||
dht::partition_range::make_starting_with(dht::ring_position::starting_at(t2)),
|
||||
dht::partition_range::make_starting_with(dht::ring_position::ending_at(t3)),
|
||||
dht::partition_range::make_starting_with(dht::ring_position::starting_at(t4)),
|
||||
dht::partition_range::make_ending_with(dht::ring_position::starting_at(t1)),
|
||||
dht::partition_range::make_ending_with(dht::ring_position::starting_at(t2)),
|
||||
dht::partition_range::make_ending_with(dht::ring_position::starting_at(t3)),
|
||||
dht::partition_range::make_ending_with(dht::ring_position::starting_at(t4)),
|
||||
dht::partition_range(make_bound(dht::ring_position::starting_at(t2)), make_bound(dht::ring_position::ending_at(t3))),
|
||||
dht::partition_range(make_bound(dht::ring_position::ending_at(t1)), make_bound(dht::ring_position::starting_at(t4))),
|
||||
};
|
||||
for (auto&& part : some_murmur3_partitioners) {
|
||||
for (auto&& range : some_murmur3_ranges) {
|
||||
func_to_test(part, *s, range);
|
||||
}
|
||||
}
|
||||
for (auto&& part : some_random_partitioners) {
|
||||
func_to_test(part, *s, dht::partition_range::make_open_ended_both_sides());
|
||||
}
|
||||
for (auto&& part : some_byte_ordered_partitioners) {
|
||||
func_to_test(part, *s, dht::partition_range::make_open_ended_both_sides());
|
||||
}
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(test_exponential_sharders) {
|
||||
return test_something_with_some_interesting_ranges_and_partitioners(test_exponential_sharder);
|
||||
}
|
||||
|
||||
static
|
||||
void
|
||||
do_test_split_range_to_single_shard(const dht::i_partitioner& part, const schema& s, const dht::partition_range& pr) {
|
||||
dht::set_global_partitioner(part.name()); // so we can print tokens, also ring_position_comparator is not global_partitioner() clean
|
||||
|
||||
for (auto shard : boost::irange(0u, part.shard_count())) {
|
||||
auto ranges = dht::split_range_to_single_shard(part, s, pr, shard);
|
||||
auto sharder = dht::ring_position_range_sharder(part, pr);
|
||||
auto x = sharder.next(s);
|
||||
auto cmp = dht::ring_position_comparator(s);
|
||||
auto reference_ranges = std::vector<dht::partition_range>();
|
||||
while (x) {
|
||||
if (x->shard == shard) {
|
||||
reference_ranges.push_back(std::move(x->ring_range));
|
||||
}
|
||||
x = sharder.next(s);
|
||||
}
|
||||
BOOST_REQUIRE(ranges.size() == reference_ranges.size());
|
||||
for (auto&& rs : boost::combine(ranges, reference_ranges)) {
|
||||
auto&& r1 = normalize(boost::get<0>(rs));
|
||||
auto&& r2 = normalize(boost::get<1>(rs));
|
||||
BOOST_REQUIRE(r1.contains(r2, cmp));
|
||||
BOOST_REQUIRE(r2.contains(r1, cmp));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(test_split_range_single_shard) {
|
||||
return test_something_with_some_interesting_ranges_and_partitioners(do_test_split_range_to_single_shard);
|
||||
}
|
||||
|
||||
@@ -394,5 +394,39 @@ BOOST_AUTO_TEST_CASE(test_split_after) {
|
||||
BOOST_REQUIRE_EQUAL(wr6.split_after(6, cmp), stdx::nullopt);
|
||||
BOOST_REQUIRE_EQUAL(wr6.split_after(8, cmp), wr(b(8, false), b(5)));
|
||||
BOOST_REQUIRE_EQUAL(wr6.split_after(9, cmp), wr(b(9, false), b(5)));
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(test_intersection) {
|
||||
using b = range_bound<unsigned>;
|
||||
using nwr = nonwrapping_range<unsigned>;
|
||||
auto cmp = unsigned_comparator();
|
||||
|
||||
auto r1 = nwr(b(5), b(10));
|
||||
auto r2 = nwr(b(1), b(4));
|
||||
BOOST_REQUIRE_EQUAL(r1.intersection(r2, cmp), stdx::nullopt);
|
||||
auto r3 = nwr(b(1), b(5));
|
||||
BOOST_REQUIRE_EQUAL(r1.intersection(r3, cmp), nwr(b(5), b(5)));
|
||||
auto r4 = nwr(b(2), b(7));
|
||||
BOOST_REQUIRE_EQUAL(r1.intersection(r4, cmp), nwr(b(5), b(7)));
|
||||
auto r5 = nwr(b(5), b(8));
|
||||
BOOST_REQUIRE_EQUAL(r1.intersection(r5, cmp), nwr(b(5), b(8)));
|
||||
auto r6 = nwr(b(6), b(8));
|
||||
BOOST_REQUIRE_EQUAL(r1.intersection(r6, cmp), nwr(b(6), b(8)));
|
||||
auto r7 = nwr(b(7), b(10));
|
||||
BOOST_REQUIRE_EQUAL(r1.intersection(r7, cmp), nwr(b(7), b(10)));
|
||||
auto r8 = nwr(b(8), b(11));
|
||||
BOOST_REQUIRE_EQUAL(r1.intersection(r8, cmp), nwr(b(8), b(10)));
|
||||
auto r9 = nwr(b(10), b(12));
|
||||
BOOST_REQUIRE_EQUAL(r1.intersection(r9, cmp), nwr(b(10), b(10)));
|
||||
auto r10 = nwr(b(12), b(20));
|
||||
BOOST_REQUIRE_EQUAL(r1.intersection(r10, cmp), stdx::nullopt);
|
||||
auto r11 = nwr(b(1), b(20));
|
||||
BOOST_REQUIRE_EQUAL(r1.intersection(r11, cmp), nwr(b(5), b(10)));
|
||||
|
||||
auto r12 = nwr(b(1), b(3, false));
|
||||
BOOST_REQUIRE_EQUAL(r12.intersection(nwr(b(3, false), b(5)), cmp), stdx::nullopt);
|
||||
BOOST_REQUIRE_EQUAL(r12.intersection(nwr(b(3, false), b(5)), cmp), stdx::nullopt);
|
||||
BOOST_REQUIRE_EQUAL(r12.intersection(nwr(b(2), { }), cmp), nwr(b(2), b(3, false)));
|
||||
BOOST_REQUIRE_EQUAL(r12.intersection(nwr({ }, b(2)), cmp), nwr(b(1), b(2)));
|
||||
|
||||
}
|
||||
|
||||
@@ -764,6 +764,15 @@ SEASTAR_TEST_CASE(test_update_failure) {
|
||||
}
|
||||
}
|
||||
|
||||
auto ev = tracker.region().evictor();
|
||||
tracker.region().make_evictable([ev, evicitons_left = int(10)] () mutable {
|
||||
if (evicitons_left == 0) {
|
||||
return memory::reclaiming_result::reclaimed_nothing;
|
||||
}
|
||||
--evicitons_left;
|
||||
return ev();
|
||||
});
|
||||
|
||||
try {
|
||||
cache.update(*mt, [] (auto&& key) {
|
||||
return partition_presence_checker_result::definitely_doesnt_exist;
|
||||
|
||||
@@ -1754,9 +1754,9 @@ static lw_shared_ptr<sstable> add_sstable_for_overlapping_test(lw_shared_ptr<col
|
||||
column_family_test(cf).add_sstable(sst);
|
||||
return sst;
|
||||
}
|
||||
static lw_shared_ptr<sstable> sstable_for_overlapping_test(const schema_ptr& schema, int64_t gen, sstring first_key, sstring last_key) {
|
||||
static lw_shared_ptr<sstable> sstable_for_overlapping_test(const schema_ptr& schema, int64_t gen, sstring first_key, sstring last_key, uint32_t level = 0) {
|
||||
auto sst = make_lw_shared<sstable>(schema, "", gen, la, big);
|
||||
sstables::test(sst).set_values(std::move(first_key), std::move(last_key), {});
|
||||
sstables::test(sst).set_values_for_leveled_strategy(0, level, 0, std::move(first_key), std::move(last_key));
|
||||
return sst;
|
||||
}
|
||||
|
||||
@@ -2231,6 +2231,13 @@ SEASTAR_TEST_CASE(tombstone_purge_test) {
|
||||
return m;
|
||||
};
|
||||
|
||||
auto make_expiring = [&] (partition_key key, bool ttl) {
|
||||
mutation m(key, s);
|
||||
m.set_clustered_cell(clustering_key::make_empty(), bytes("value"), data_value(int32_t(1)),
|
||||
gc_clock::now().time_since_epoch().count(), gc_clock::duration(ttl));
|
||||
return m;
|
||||
};
|
||||
|
||||
auto make_delete = [&] (partition_key key) {
|
||||
mutation m(key, s);
|
||||
tombstone tomb(next_timestamp(), gc_clock::now());
|
||||
@@ -2238,6 +2245,25 @@ SEASTAR_TEST_CASE(tombstone_purge_test) {
|
||||
return m;
|
||||
};
|
||||
|
||||
auto assert_that_produces_dead_cell = [&] (auto& sst, partition_key& key) {
|
||||
auto reader = make_lw_shared(sstable_reader(sst, s));
|
||||
(*reader)().then([&key] (auto sm) {
|
||||
return mutation_from_streamed_mutation(std::move(sm));
|
||||
}).then([reader, s, &key] (mutation_opt m) {
|
||||
BOOST_REQUIRE(m);
|
||||
BOOST_REQUIRE(m->key().equal(*s, key));
|
||||
auto& rows = m->partition().clustered_rows();
|
||||
BOOST_REQUIRE_EQUAL(rows.calculate_size(), 1);
|
||||
auto& row = rows.begin()->row();
|
||||
auto& cells = row.cells();
|
||||
BOOST_REQUIRE_EQUAL(cells.size(), 1);
|
||||
BOOST_REQUIRE(!cells.cell_at(s->get_column_definition("value")->id).as_atomic_cell().is_live());
|
||||
return (*reader)();
|
||||
}).then([reader, s] (streamed_mutation_opt m) {
|
||||
BOOST_REQUIRE(!m);
|
||||
}).get();
|
||||
};
|
||||
|
||||
auto alpha = partition_key::from_exploded(*s, {to_bytes("alpha")});
|
||||
auto beta = partition_key::from_exploded(*s, {to_bytes("beta")});
|
||||
|
||||
@@ -2316,6 +2342,52 @@ SEASTAR_TEST_CASE(tombstone_purge_test) {
|
||||
.produces(mut3)
|
||||
.produces_end_of_stream();
|
||||
}
|
||||
|
||||
{
|
||||
// check that expired cell will not be purged if it will ressurect overwritten data.
|
||||
auto mut1 = make_insert(alpha);
|
||||
auto mut2 = make_expiring(alpha, 1);
|
||||
|
||||
auto sst1 = make_sstable_containing(sst_gen, {mut1});
|
||||
auto sst2 = make_sstable_containing(sst_gen, {mut2});
|
||||
|
||||
forward_jump_clocks(std::chrono::seconds(5));
|
||||
|
||||
auto result = compact({sst1, sst2}, {sst2});
|
||||
BOOST_REQUIRE_EQUAL(1, result.size());
|
||||
assert_that_produces_dead_cell(result[0], alpha);
|
||||
|
||||
result = compact({sst1, sst2}, {sst1, sst2});
|
||||
BOOST_REQUIRE_EQUAL(0, result.size());
|
||||
}
|
||||
{
|
||||
auto mut1 = make_insert(alpha);
|
||||
auto mut2 = make_expiring(beta, 1);
|
||||
|
||||
auto sst1 = make_sstable_containing(sst_gen, {mut1});
|
||||
auto sst2 = make_sstable_containing(sst_gen, {mut2});
|
||||
|
||||
forward_jump_clocks(std::chrono::seconds(5));
|
||||
|
||||
auto result = compact({sst1, sst2}, {sst2});
|
||||
BOOST_REQUIRE_EQUAL(0, result.size());
|
||||
}
|
||||
{
|
||||
auto mut1 = make_insert(alpha);
|
||||
auto mut2 = make_expiring(alpha, 1);
|
||||
auto mut3 = make_insert(beta);
|
||||
|
||||
auto sst1 = make_sstable_containing(sst_gen, {mut1});
|
||||
auto sst2 = make_sstable_containing(sst_gen, {mut2, mut3});
|
||||
|
||||
forward_jump_clocks(std::chrono::seconds(5));
|
||||
|
||||
auto result = compact({sst1, sst2}, {sst1, sst2});
|
||||
BOOST_REQUIRE_EQUAL(1, result.size());
|
||||
assert_that(sstable_reader(result[0], s))
|
||||
.produces(mut3)
|
||||
.produces_end_of_stream();
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
@@ -3306,29 +3378,52 @@ SEASTAR_TEST_CASE(sstable_set_incremental_selector) {
|
||||
auto cs = sstables::make_compaction_strategy(sstables::compaction_strategy_type::leveled, s->compaction_strategy_options());
|
||||
auto key_and_token_pair = token_generation_for_current_shard(8);
|
||||
|
||||
sstable_set set = cs.make_sstable_set(s);
|
||||
set.insert(sstable_for_overlapping_test(s, 1, key_and_token_pair[0].first, key_and_token_pair[1].first));
|
||||
set.insert(sstable_for_overlapping_test(s, 2, key_and_token_pair[0].first, key_and_token_pair[1].first));
|
||||
set.insert(sstable_for_overlapping_test(s, 3, key_and_token_pair[3].first, key_and_token_pair[4].first));
|
||||
set.insert(sstable_for_overlapping_test(s, 4, key_and_token_pair[4].first, key_and_token_pair[4].first));
|
||||
set.insert(sstable_for_overlapping_test(s, 5, key_and_token_pair[4].first, key_and_token_pair[5].first));
|
||||
|
||||
sstable_set::incremental_selector selector = set.make_incremental_selector();
|
||||
auto check = [&selector] (const dht::token& token, std::unordered_set<int64_t> expected_gens) {
|
||||
auto check = [] (sstable_set::incremental_selector& selector, const dht::token& token, std::unordered_set<int64_t> expected_gens) {
|
||||
auto sstables = selector.select(token);
|
||||
BOOST_REQUIRE(sstables.size() == expected_gens.size());
|
||||
for (auto& sst : sstables) {
|
||||
BOOST_REQUIRE(expected_gens.count(sst->generation()) == 1);
|
||||
}
|
||||
};
|
||||
check(key_and_token_pair[0].second, {1, 2});
|
||||
check(key_and_token_pair[1].second, {1, 2});
|
||||
check(key_and_token_pair[2].second, {});
|
||||
check(key_and_token_pair[3].second, {3});
|
||||
check(key_and_token_pair[4].second, {3, 4, 5});
|
||||
check(key_and_token_pair[5].second, {5});
|
||||
check(key_and_token_pair[6].second, {});
|
||||
check(key_and_token_pair[7].second, {});
|
||||
|
||||
{
|
||||
sstable_set set = cs.make_sstable_set(s);
|
||||
set.insert(sstable_for_overlapping_test(s, 1, key_and_token_pair[0].first, key_and_token_pair[1].first, 1));
|
||||
set.insert(sstable_for_overlapping_test(s, 2, key_and_token_pair[0].first, key_and_token_pair[1].first, 1));
|
||||
set.insert(sstable_for_overlapping_test(s, 3, key_and_token_pair[3].first, key_and_token_pair[4].first, 1));
|
||||
set.insert(sstable_for_overlapping_test(s, 4, key_and_token_pair[4].first, key_and_token_pair[4].first, 1));
|
||||
set.insert(sstable_for_overlapping_test(s, 5, key_and_token_pair[4].first, key_and_token_pair[5].first, 1));
|
||||
|
||||
sstable_set::incremental_selector sel = set.make_incremental_selector();
|
||||
check(sel, key_and_token_pair[0].second, {1, 2});
|
||||
check(sel, key_and_token_pair[1].second, {1, 2});
|
||||
check(sel, key_and_token_pair[2].second, {});
|
||||
check(sel, key_and_token_pair[3].second, {3});
|
||||
check(sel, key_and_token_pair[4].second, {3, 4, 5});
|
||||
check(sel, key_and_token_pair[5].second, {5});
|
||||
check(sel, key_and_token_pair[6].second, {});
|
||||
check(sel, key_and_token_pair[7].second, {});
|
||||
}
|
||||
|
||||
{
|
||||
sstable_set set = cs.make_sstable_set(s);
|
||||
set.insert(sstable_for_overlapping_test(s, 0, key_and_token_pair[0].first, key_and_token_pair[1].first, 0));
|
||||
set.insert(sstable_for_overlapping_test(s, 1, key_and_token_pair[0].first, key_and_token_pair[1].first, 1));
|
||||
set.insert(sstable_for_overlapping_test(s, 2, key_and_token_pair[0].first, key_and_token_pair[1].first, 1));
|
||||
set.insert(sstable_for_overlapping_test(s, 3, key_and_token_pair[3].first, key_and_token_pair[4].first, 1));
|
||||
set.insert(sstable_for_overlapping_test(s, 4, key_and_token_pair[4].first, key_and_token_pair[4].first, 1));
|
||||
set.insert(sstable_for_overlapping_test(s, 5, key_and_token_pair[4].first, key_and_token_pair[5].first, 1));
|
||||
|
||||
sstable_set::incremental_selector sel = set.make_incremental_selector();
|
||||
check(sel, key_and_token_pair[0].second, {0, 1, 2});
|
||||
check(sel, key_and_token_pair[1].second, {0, 1, 2});
|
||||
check(sel, key_and_token_pair[2].second, {0});
|
||||
check(sel, key_and_token_pair[3].second, {0, 3});
|
||||
check(sel, key_and_token_pair[4].second, {0, 3, 4, 5});
|
||||
check(sel, key_and_token_pair[5].second, {0, 5});
|
||||
check(sel, key_and_token_pair[6].second, {0});
|
||||
check(sel, key_and_token_pair[7].second, {0});
|
||||
}
|
||||
|
||||
return make_ready_future<>();
|
||||
}
|
||||
|
||||
@@ -149,6 +149,7 @@ public:
|
||||
_sst->_components->summary.first_key.value = bytes(reinterpret_cast<const signed char*>(first_key.c_str()), first_key.size());
|
||||
_sst->_components->summary.last_key.value = bytes(reinterpret_cast<const signed char*>(last_key.c_str()), last_key.size());
|
||||
_sst->set_first_and_last_keys();
|
||||
_sst->_components->statistics.contents[metadata_type::Compaction] = std::make_unique<compaction_metadata>();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -238,7 +238,8 @@ future<> trace_keyspace_helper::start() {
|
||||
std::map<sstring, sstring> opts;
|
||||
opts["replication_factor"] = "2";
|
||||
auto ksm = keyspace_metadata::new_keyspace(KEYSPACE_NAME, "org.apache.cassandra.locator.SimpleStrategy", std::move(opts), true);
|
||||
service::get_local_migration_manager().announce_new_keyspace(ksm, false).get();
|
||||
// We use min_timestamp so that default keyspace metadata will loose with any manual adjustments. See issue #2129.
|
||||
service::get_local_migration_manager().announce_new_keyspace(ksm, api::min_timestamp, false).get();
|
||||
}
|
||||
|
||||
// Create tables
|
||||
|
||||
@@ -43,8 +43,7 @@ private:
|
||||
protected:
|
||||
prepared(::shared_ptr<cql3::statements::prepared_statement> prepared)
|
||||
: _prepared{prepared}
|
||||
// FIXME: Populate partition key bind indices for prepared_metadata.
|
||||
, _metadata{::make_shared<cql3::prepared_metadata>(prepared->bound_names, std::vector<uint16_t>())}
|
||||
, _metadata{::make_shared<cql3::prepared_metadata>(_prepared->bound_names, _prepared->partition_key_bind_indices)}
|
||||
, _result_metadata{extract_result_metadata(prepared->statement)}
|
||||
{ }
|
||||
public:
|
||||
|
||||
@@ -655,9 +655,9 @@ future<> cql_server::connection::process_request() {
|
||||
auto bv = bytes_view{reinterpret_cast<const int8_t*>(buf.begin()), buf.size()};
|
||||
auto cpu = pick_request_cpu();
|
||||
return smp::submit_to(cpu, [this, bv = std::move(bv), op, stream, client_state = _client_state, tracing_requested] () mutable {
|
||||
return this->process_request_one(bv, op, stream, std::move(client_state), tracing_requested).then([](auto&& response) {
|
||||
return this->process_request_one(bv, op, stream, std::move(client_state), tracing_requested).then([tracing_requested] (auto&& response) {
|
||||
auto& tracing_session_id_ptr = response.second.tracing_session_id_ptr();
|
||||
if (tracing_session_id_ptr) {
|
||||
if (tracing_requested == tracing_request_type::write_on_close && tracing_session_id_ptr) {
|
||||
response.first->set_tracing_id(*tracing_session_id_ptr);
|
||||
}
|
||||
return std::make_pair(make_foreign(response.first), response.second);
|
||||
|
||||
12
types.cc
12
types.cc
@@ -2152,14 +2152,20 @@ bool collection_type_impl::mutation::compact_and_expire(tombstone base_tomb, gc_
|
||||
std::vector<std::pair<bytes, atomic_cell>> survivors;
|
||||
for (auto&& name_and_cell : cells) {
|
||||
atomic_cell& cell = name_and_cell.second;
|
||||
auto cannot_erase_cell = [&] {
|
||||
return cell.deletion_time() >= gc_before || !can_gc(tombstone(cell.timestamp(), cell.deletion_time()));
|
||||
};
|
||||
|
||||
if (cell.is_covered_by(tomb, false)) {
|
||||
continue;
|
||||
}
|
||||
if (cell.has_expired(query_time)) {
|
||||
survivors.emplace_back(std::make_pair(
|
||||
std::move(name_and_cell.first), atomic_cell::make_dead(cell.timestamp(), cell.deletion_time())));
|
||||
if (cannot_erase_cell()) {
|
||||
survivors.emplace_back(std::make_pair(
|
||||
std::move(name_and_cell.first), atomic_cell::make_dead(cell.timestamp(), cell.deletion_time())));
|
||||
}
|
||||
} else if (!cell.is_live()) {
|
||||
if (cell.deletion_time() >= gc_before || !can_gc(tombstone(cell.timestamp(), cell.deletion_time()))) {
|
||||
if (cannot_erase_cell()) {
|
||||
survivors.emplace_back(std::move(name_and_cell));
|
||||
}
|
||||
} else {
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user