Compare commits
78 Commits
debug_form
...
scylla-1.0
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c6c176b1be | ||
|
|
9795edbe04 | ||
|
|
1539c8b136 | ||
|
|
0396a94eaf | ||
|
|
3c40c1be71 | ||
|
|
de969a5d6f | ||
|
|
0ade2894f7 | ||
|
|
6b36315040 | ||
|
|
edc5f8f2f7 | ||
|
|
066149ad46 | ||
|
|
1f07468195 | ||
|
|
0577ae5a61 | ||
|
|
054cf13cd0 | ||
|
|
71446edc97 | ||
|
|
c1d8a62b5b | ||
|
|
a3baef6b45 | ||
|
|
feaba177e2 | ||
|
|
83a289bdcd | ||
|
|
382e7e63b3 | ||
|
|
deeed904f4 | ||
|
|
d927053b3b | ||
|
|
8b8923b5af | ||
|
|
48ec129595 | ||
|
|
a4757a6737 | ||
|
|
223b73849d | ||
|
|
ba4b1eac45 | ||
|
|
9cf5fabfdf | ||
|
|
5723c664ad | ||
|
|
9635a83edd | ||
|
|
24c68e48a5 | ||
|
|
80cb0a28e1 | ||
|
|
95a9f66b75 | ||
|
|
58448d4b05 | ||
|
|
0a4d0e95f2 | ||
|
|
2c73e1c2e8 | ||
|
|
0ebd1ae62a | ||
|
|
14f616de3f | ||
|
|
827c0f68c3 | ||
|
|
e3607a4c16 | ||
|
|
59270c6d00 | ||
|
|
3be5d3a7c9 | ||
|
|
cd6697b506 | ||
|
|
acc9849e2b | ||
|
|
a445f6a7be | ||
|
|
88ed9c53a6 | ||
|
|
50f98ff90a | ||
|
|
30ffb2917f | ||
|
|
6ef8b45bf4 | ||
|
|
144829606a | ||
|
|
2eb54bb068 | ||
|
|
a133e48515 | ||
|
|
5db0049d99 | ||
|
|
ac80445bd9 | ||
|
|
0c3ffba5c8 | ||
|
|
7ca3d22c7d | ||
|
|
9b1d2dad89 | ||
|
|
7e6a7a6cb5 | ||
|
|
ec7f637384 | ||
|
|
eecfb2e4ef | ||
|
|
1f6476351a | ||
|
|
0d95dd310a | ||
|
|
80d2b72068 | ||
|
|
ac95f04ff9 | ||
|
|
08a8a4a1b4 | ||
|
|
b7e9924299 | ||
|
|
19ed269cc7 | ||
|
|
a223450a56 | ||
|
|
8f4800b30e | ||
|
|
7d13d115c6 | ||
|
|
c9c52235a1 | ||
|
|
52eeab089c | ||
|
|
49af399a2e | ||
|
|
d915370e3f | ||
|
|
a6d5e67923 | ||
|
|
f885750f90 | ||
|
|
36f55e409d | ||
|
|
c436fb5892 | ||
|
|
950bcd3e38 |
@@ -1,6 +1,6 @@
|
||||
#!/bin/sh
|
||||
|
||||
VERSION=666.development
|
||||
VERSION=1.0.rc1
|
||||
|
||||
if test -f version
|
||||
then
|
||||
|
||||
@@ -54,9 +54,9 @@ class atomic_cell_or_collection;
|
||||
*/
|
||||
class atomic_cell_type final {
|
||||
private:
|
||||
static constexpr int8_t DEAD_FLAGS = 0;
|
||||
static constexpr int8_t LIVE_FLAG = 0x01;
|
||||
static constexpr int8_t EXPIRY_FLAG = 0x02; // When present, expiry field is present. Set only for live cells
|
||||
static constexpr int8_t REVERT_FLAG = 0x04; // transient flag used to efficiently implement ReversiblyMergeable for atomic cells.
|
||||
static constexpr unsigned flags_size = 1;
|
||||
static constexpr unsigned timestamp_offset = flags_size;
|
||||
static constexpr unsigned timestamp_size = 8;
|
||||
@@ -67,14 +67,21 @@ private:
|
||||
static constexpr unsigned ttl_offset = expiry_offset + expiry_size;
|
||||
static constexpr unsigned ttl_size = 4;
|
||||
private:
|
||||
static bool is_revert_set(bytes_view cell) {
|
||||
return cell[0] & REVERT_FLAG;
|
||||
}
|
||||
template<typename BytesContainer>
|
||||
static void set_revert(BytesContainer& cell, bool revert) {
|
||||
cell[0] = (cell[0] & ~REVERT_FLAG) | (revert * REVERT_FLAG);
|
||||
}
|
||||
static bool is_live(const bytes_view& cell) {
|
||||
return cell[0] != DEAD_FLAGS;
|
||||
return cell[0] & LIVE_FLAG;
|
||||
}
|
||||
static bool is_live_and_has_ttl(const bytes_view& cell) {
|
||||
return cell[0] & EXPIRY_FLAG;
|
||||
}
|
||||
static bool is_dead(const bytes_view& cell) {
|
||||
return cell[0] == DEAD_FLAGS;
|
||||
return !is_live(cell);
|
||||
}
|
||||
// Can be called on live and dead cells
|
||||
static api::timestamp_type timestamp(const bytes_view& cell) {
|
||||
@@ -106,7 +113,7 @@ private:
|
||||
}
|
||||
static managed_bytes make_dead(api::timestamp_type timestamp, gc_clock::time_point deletion_time) {
|
||||
managed_bytes b(managed_bytes::initialized_later(), flags_size + timestamp_size + deletion_time_size);
|
||||
b[0] = DEAD_FLAGS;
|
||||
b[0] = 0;
|
||||
set_field(b, timestamp_offset, timestamp);
|
||||
set_field(b, deletion_time_offset, deletion_time.time_since_epoch().count());
|
||||
return b;
|
||||
@@ -140,8 +147,11 @@ protected:
|
||||
ByteContainer _data;
|
||||
protected:
|
||||
atomic_cell_base(ByteContainer&& data) : _data(std::forward<ByteContainer>(data)) { }
|
||||
atomic_cell_base(const ByteContainer& data) : _data(data) { }
|
||||
friend class atomic_cell_or_collection;
|
||||
public:
|
||||
bool is_revert_set() const {
|
||||
return atomic_cell_type::is_revert_set(_data);
|
||||
}
|
||||
bool is_live() const {
|
||||
return atomic_cell_type::is_live(_data);
|
||||
}
|
||||
@@ -187,10 +197,13 @@ public:
|
||||
bytes_view serialize() const {
|
||||
return _data;
|
||||
}
|
||||
void set_revert(bool revert) {
|
||||
atomic_cell_type::set_revert(_data, revert);
|
||||
}
|
||||
};
|
||||
|
||||
class atomic_cell_view final : public atomic_cell_base<bytes_view> {
|
||||
atomic_cell_view(bytes_view data) : atomic_cell_base(data) {}
|
||||
atomic_cell_view(bytes_view data) : atomic_cell_base(std::move(data)) {}
|
||||
public:
|
||||
static atomic_cell_view from_bytes(bytes_view data) { return atomic_cell_view(data); }
|
||||
|
||||
@@ -198,6 +211,11 @@ public:
|
||||
friend std::ostream& operator<<(std::ostream& os, const atomic_cell_view& acv);
|
||||
};
|
||||
|
||||
class atomic_cell_ref final : public atomic_cell_base<managed_bytes&> {
|
||||
public:
|
||||
atomic_cell_ref(managed_bytes& buf) : atomic_cell_base(buf) {}
|
||||
};
|
||||
|
||||
class atomic_cell final : public atomic_cell_base<managed_bytes> {
|
||||
atomic_cell(managed_bytes b) : atomic_cell_base(std::move(b)) {}
|
||||
public:
|
||||
|
||||
@@ -57,3 +57,19 @@ struct appending_hash<atomic_cell_view> {
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct appending_hash<atomic_cell> {
|
||||
template<typename Hasher>
|
||||
void operator()(Hasher& h, const atomic_cell& cell) const {
|
||||
feed_hash(h, static_cast<atomic_cell_view>(cell));
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct appending_hash<collection_mutation> {
|
||||
template<typename Hasher>
|
||||
void operator()(Hasher& h, const collection_mutation& cm) const {
|
||||
feed_hash(h, static_cast<collection_mutation_view>(cm));
|
||||
}
|
||||
};
|
||||
|
||||
@@ -27,6 +27,8 @@
|
||||
|
||||
// A variant type that can hold either an atomic_cell, or a serialized collection.
|
||||
// Which type is stored is determined by the schema.
|
||||
// Has an "empty" state.
|
||||
// Objects moved-from are left in an empty state.
|
||||
class atomic_cell_or_collection final {
|
||||
managed_bytes _data;
|
||||
private:
|
||||
@@ -36,6 +38,7 @@ public:
|
||||
atomic_cell_or_collection(atomic_cell ac) : _data(std::move(ac._data)) {}
|
||||
static atomic_cell_or_collection from_atomic_cell(atomic_cell data) { return { std::move(data._data) }; }
|
||||
atomic_cell_view as_atomic_cell() const { return atomic_cell_view::from_bytes(_data); }
|
||||
atomic_cell_ref as_atomic_cell_ref() { return { _data }; }
|
||||
atomic_cell_or_collection(collection_mutation cm) : _data(std::move(cm.data)) {}
|
||||
explicit operator bool() const {
|
||||
return !_data.empty();
|
||||
|
||||
16
configure.py
16
configure.py
@@ -845,8 +845,8 @@ with open(buildfile, 'w') as f:
|
||||
for obj in compiles:
|
||||
src = compiles[obj]
|
||||
gen_headers = list(ragels.keys())
|
||||
gen_headers += ['seastar/build/{}/http/request_parser.hh'.format(mode)]
|
||||
gen_headers += ['seastar/build/{}/http/http_response_parser.hh'.format(mode)]
|
||||
gen_headers += ['seastar/build/{}/gen/http/request_parser.hh'.format(mode)]
|
||||
gen_headers += ['seastar/build/{}/gen/http/http_response_parser.hh'.format(mode)]
|
||||
for th in thrifts:
|
||||
gen_headers += th.headers('$builddir/{}/gen'.format(mode))
|
||||
for g in antlr3_grammars:
|
||||
@@ -878,10 +878,10 @@ with open(buildfile, 'w') as f:
|
||||
for cc in grammar.sources('$builddir/{}/gen'.format(mode)):
|
||||
obj = cc.replace('.cpp', '.o')
|
||||
f.write('build {}: cxx.{} {} || {}\n'.format(obj, mode, cc, ' '.join(serializers)))
|
||||
f.write('build seastar/build/{mode}/libseastar.a seastar/build/{mode}/apps/iotune/iotune: ninja {seastar_deps}\n'
|
||||
f.write('build seastar/build/{mode}/libseastar.a seastar/build/{mode}/apps/iotune/iotune seastar/build/{mode}/gen/http/request_parser.hh seastar/build/{mode}/gen/http/http_response_parser.hh: ninja {seastar_deps}\n'
|
||||
.format(**locals()))
|
||||
f.write(' subdir = seastar\n')
|
||||
f.write(' target = build/{mode}/libseastar.a build/{mode}/apps/iotune/iotune\n'.format(**locals()))
|
||||
f.write(' target = build/{mode}/libseastar.a build/{mode}/apps/iotune/iotune build/{mode}/gen/http/request_parser.hh build/{mode}/gen/http/http_response_parser.hh\n'.format(**locals()))
|
||||
f.write(textwrap.dedent('''\
|
||||
build build/{mode}/iotune: copy seastar/build/{mode}/apps/iotune/iotune
|
||||
''').format(**locals()))
|
||||
@@ -895,14 +895,6 @@ with open(buildfile, 'w') as f:
|
||||
command = find -name '*.[chS]' -o -name "*.cc" -o -name "*.hh" | cscope -bq -i-
|
||||
description = CSCOPE
|
||||
build cscope: cscope
|
||||
rule request_parser_hh
|
||||
command = {ninja} -C seastar build/release/gen/http/request_parser.hh build/debug/gen/http/request_parser.hh
|
||||
description = GEN seastar/http/request_parser.hh
|
||||
build seastar/build/release/http/request_parser.hh seastar/build/debug/http/request_parser.hh: request_parser_hh
|
||||
rule http_response_parser_hh
|
||||
command = {ninja} -C seastar build/release/gen/http/http_response_parser.hh build/debug/gen/http/http_response_parser.hh
|
||||
description = GEN seastar/http/http_response_parser.hh
|
||||
build seastar/build/release/http/http_response_parser.hh seastar/build/debug/http/http_response_parser.hh: http_response_parser_hh
|
||||
rule clean
|
||||
command = rm -rf build
|
||||
description = CLEAN
|
||||
|
||||
@@ -169,26 +169,21 @@ public:
|
||||
}
|
||||
private:
|
||||
future<std::vector<mutation>> get_mutations(distributed<service::storage_proxy>& storage, const query_options& options, bool local, api::timestamp_type now) {
|
||||
struct collector {
|
||||
std::vector<mutation> _result;
|
||||
std::vector<mutation> get() && { return std::move(_result); }
|
||||
void operator()(std::vector<mutation> more) {
|
||||
std::move(more.begin(), more.end(), std::back_inserter(_result));
|
||||
}
|
||||
};
|
||||
auto get_mutations_for_statement = [this, &storage, &options, now, local] (size_t i) {
|
||||
auto&& statement = _statements[i];
|
||||
auto&& statement_options = options.for_statement(i);
|
||||
auto timestamp = _attrs->get_timestamp(now, statement_options);
|
||||
return statement->get_mutations(storage, statement_options, local, timestamp);
|
||||
};
|
||||
// FIXME: origin tries hard to merge mutations to same keyspace, for
|
||||
// some reason.
|
||||
return map_reduce(
|
||||
boost::make_counting_iterator<size_t>(0),
|
||||
boost::make_counting_iterator<size_t>(_statements.size()),
|
||||
get_mutations_for_statement,
|
||||
collector());
|
||||
// Do not process in parallel because operations like list append/prepend depend on execution order.
|
||||
return do_with(std::vector<mutation>(), [this, &storage, &options, now, local] (auto&& result) {
|
||||
return do_for_each(boost::make_counting_iterator<size_t>(0),
|
||||
boost::make_counting_iterator<size_t>(_statements.size()),
|
||||
[this, &storage, &options, now, local, &result] (size_t i) {
|
||||
auto&& statement = _statements[i];
|
||||
auto&& statement_options = options.for_statement(i);
|
||||
auto timestamp = _attrs->get_timestamp(now, statement_options);
|
||||
return statement->get_mutations(storage, statement_options, local, timestamp).then([&result] (auto&& more) {
|
||||
std::move(more.begin(), more.end(), std::back_inserter(result));
|
||||
});
|
||||
}).then([&result] {
|
||||
return std::move(result);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
public:
|
||||
|
||||
13
database.cc
13
database.cc
@@ -667,21 +667,26 @@ column_family::stop() {
|
||||
|
||||
|
||||
future<std::vector<sstables::entry_descriptor>>
|
||||
column_family::reshuffle_sstables(int64_t start) {
|
||||
column_family::reshuffle_sstables(std::set<int64_t> all_generations, int64_t start) {
|
||||
struct work {
|
||||
int64_t current_gen;
|
||||
std::set<int64_t> all_generations; // Stores generation of all live sstables in the system.
|
||||
sstable_list sstables;
|
||||
std::unordered_map<int64_t, sstables::entry_descriptor> descriptors;
|
||||
std::vector<sstables::entry_descriptor> reshuffled;
|
||||
work(int64_t start) : current_gen(start ? start : 1) {}
|
||||
work(int64_t start, std::set<int64_t> gens)
|
||||
: current_gen(start ? start : 1)
|
||||
, all_generations(gens) {}
|
||||
};
|
||||
|
||||
return do_with(work(start), [this] (work& work) {
|
||||
return do_with(work(start, std::move(all_generations)), [this] (work& work) {
|
||||
return lister::scan_dir(_config.datadir, { directory_entry_type::regular }, [this, &work] (directory_entry de) {
|
||||
auto comps = sstables::entry_descriptor::make_descriptor(de.name);
|
||||
if (comps.component != sstables::sstable::component_type::TOC) {
|
||||
return make_ready_future<>();
|
||||
} else if (comps.generation < work.current_gen) {
|
||||
}
|
||||
// Skip generations that were already loaded by Scylla at a previous stage.
|
||||
if (work.all_generations.count(comps.generation) != 0) {
|
||||
return make_ready_future<>();
|
||||
}
|
||||
auto sst = make_lw_shared<sstables::sstable>(_schema->ks_name(), _schema->cf_name(),
|
||||
|
||||
@@ -298,7 +298,10 @@ public:
|
||||
future<int64_t> disable_sstable_write() {
|
||||
_sstable_writes_disabled_at = std::chrono::steady_clock::now();
|
||||
return _sstables_lock.write_lock().then([this] {
|
||||
return make_ready_future<int64_t>((*_sstables->end()).first);
|
||||
if (_sstables->empty()) {
|
||||
return make_ready_future<int64_t>(1);
|
||||
}
|
||||
return make_ready_future<int64_t>((*_sstables->rbegin()).first);
|
||||
});
|
||||
}
|
||||
|
||||
@@ -321,9 +324,11 @@ public:
|
||||
// very dangerous to do that with live SSTables. This is meant to be used with SSTables
|
||||
// that are not yet managed by the system.
|
||||
//
|
||||
// Parameter all_generations stores the generation of all SSTables in the system, so it
|
||||
// will be easy to determine which SSTable is new.
|
||||
// An example usage would query all shards asking what is the highest SSTable number known
|
||||
// to them, and then pass that + 1 as "start".
|
||||
future<std::vector<sstables::entry_descriptor>> reshuffle_sstables(int64_t start);
|
||||
future<std::vector<sstables::entry_descriptor>> reshuffle_sstables(std::set<int64_t> all_generations, int64_t start);
|
||||
|
||||
// FIXME: this is just an example, should be changed to something more
|
||||
// general. compact_all_sstables() starts a compaction of all sstables.
|
||||
|
||||
89
dist/ami/build_ami.sh
vendored
89
dist/ami/build_ami.sh
vendored
@@ -29,28 +29,73 @@ while [ $# -gt 0 ]; do
|
||||
esac
|
||||
done
|
||||
|
||||
. /etc/os-release
|
||||
case "$ID" in
|
||||
"centos")
|
||||
AMI=ami-f3102499
|
||||
REGION=us-east-1
|
||||
SSH_USERNAME=centos
|
||||
;;
|
||||
"ubuntu")
|
||||
AMI=ami-ff427095
|
||||
REGION=us-east-1
|
||||
SSH_USERNAME=ubuntu
|
||||
;;
|
||||
*)
|
||||
echo "build_ami.sh does not supported this distribution."
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
if [ $LOCALRPM -eq 1 ]; then
|
||||
rm -rf build/*
|
||||
sudo yum -y install git
|
||||
if [ ! -f dist/ami/files/scylla-server.x86_64.rpm ]; then
|
||||
dist/redhat/build_rpm.sh
|
||||
cp build/rpmbuild/RPMS/x86_64/scylla-server-`cat build/SCYLLA-VERSION-FILE`-`cat build/SCYLLA-RELEASE-FILE`.*.x86_64.rpm dist/ami/files/scylla-server.x86_64.rpm
|
||||
fi
|
||||
if [ ! -f dist/ami/files/scylla-jmx.noarch.rpm ]; then
|
||||
cd build
|
||||
git clone --depth 1 https://github.com/scylladb/scylla-jmx.git
|
||||
cd scylla-jmx
|
||||
sh -x -e dist/redhat/build_rpm.sh $*
|
||||
cd ../..
|
||||
cp build/scylla-jmx/build/rpmbuild/RPMS/noarch/scylla-jmx-`cat build/scylla-jmx/build/SCYLLA-VERSION-FILE`-`cat build/scylla-jmx/build/SCYLLA-RELEASE-FILE`.*.noarch.rpm dist/ami/files/scylla-jmx.noarch.rpm
|
||||
fi
|
||||
if [ ! -f dist/ami/files/scylla-tools.noarch.rpm ]; then
|
||||
cd build
|
||||
git clone --depth 1 https://github.com/scylladb/scylla-tools-java.git
|
||||
cd scylla-tools-java
|
||||
sh -x -e dist/redhat/build_rpm.sh
|
||||
cd ../..
|
||||
cp build/scylla-tools-java/build/rpmbuild/RPMS/noarch/scylla-tools-`cat build/scylla-tools-java/build/SCYLLA-VERSION-FILE`-`cat build/scylla-tools-java/build/SCYLLA-RELEASE-FILE`.*.noarch.rpm dist/ami/files/scylla-tools.noarch.rpm
|
||||
if [ "$ID" = "centos" ]; then
|
||||
rm -rf build/*
|
||||
sudo yum -y install git
|
||||
if [ ! -f dist/ami/files/scylla-server.x86_64.rpm ]; then
|
||||
dist/redhat/build_rpm.sh
|
||||
cp build/rpmbuild/RPMS/x86_64/scylla-server-`cat build/SCYLLA-VERSION-FILE`-`cat build/SCYLLA-RELEASE-FILE`.*.x86_64.rpm dist/ami/files/scylla-server.x86_64.rpm
|
||||
fi
|
||||
if [ ! -f dist/ami/files/scylla-jmx.noarch.rpm ]; then
|
||||
cd build
|
||||
git clone --depth 1 https://github.com/scylladb/scylla-jmx.git
|
||||
cd scylla-jmx
|
||||
sh -x -e dist/redhat/build_rpm.sh $*
|
||||
cd ../..
|
||||
cp build/scylla-jmx/build/rpmbuild/RPMS/noarch/scylla-jmx-`cat build/scylla-jmx/build/SCYLLA-VERSION-FILE`-`cat build/scylla-jmx/build/SCYLLA-RELEASE-FILE`.*.noarch.rpm dist/ami/files/scylla-jmx.noarch.rpm
|
||||
fi
|
||||
if [ ! -f dist/ami/files/scylla-tools.noarch.rpm ]; then
|
||||
cd build
|
||||
git clone --depth 1 https://github.com/scylladb/scylla-tools-java.git
|
||||
cd scylla-tools-java
|
||||
sh -x -e dist/redhat/build_rpm.sh
|
||||
cd ../..
|
||||
cp build/scylla-tools-java/build/rpmbuild/RPMS/noarch/scylla-tools-`cat build/scylla-tools-java/build/SCYLLA-VERSION-FILE`-`cat build/scylla-tools-java/build/SCYLLA-RELEASE-FILE`.*.noarch.rpm dist/ami/files/scylla-tools.noarch.rpm
|
||||
fi
|
||||
else
|
||||
sudo apt-get install -y git
|
||||
if [ ! -f dist/ami/files/scylla-server_amd64.deb ]; then
|
||||
if [ ! -f ../scylla-server_`cat version`-ubuntu1_amd64.deb ]; then
|
||||
echo "Build .deb before running build_ami.sh"
|
||||
exit 1
|
||||
fi
|
||||
cp ../scylla-server_`cat version`-ubuntu1_amd64.deb dist/ami/files/scylla-server_amd64.deb
|
||||
fi
|
||||
if [ ! -f dist/ami/files/scylla-jmx_all.deb ]; then
|
||||
cd build
|
||||
git clone --depth 1 https://github.com/scylladb/scylla-jmx.git
|
||||
cd scylla-jmx
|
||||
sh -x -e dist/ubuntu/build_deb.sh $*
|
||||
cd ../..
|
||||
cp build/scylla-jmx_`cat build/scylla-jmx/version`-ubuntu1_all.deb dist/ami/files/scylla-jmx_all.deb
|
||||
fi
|
||||
if [ ! -f dist/ami/files/scylla-tools_all.deb ]; then
|
||||
cd build
|
||||
git clone --depth 1 https://github.com/scylladb/scylla-tools-java.git
|
||||
cd scylla-tools-java
|
||||
sh -x -e dist/ubuntu/build_deb.sh $*
|
||||
cd ../..
|
||||
cp build/scylla-tools_`cat build/scylla-tools-java/version`-ubuntu1_all.deb dist/ami/files/scylla-tools_all.deb
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
@@ -69,4 +114,4 @@ if [ ! -d packer ]; then
|
||||
cd -
|
||||
fi
|
||||
|
||||
packer/packer build -var-file=variables.json -var install_args="$INSTALL_ARGS" scylla.json
|
||||
packer/packer build -var-file=variables.json -var install_args="$INSTALL_ARGS" -var region="$REGION" -var source_ami="$AMI" -var ssh_username="$SSH_USERNAME" scylla.json
|
||||
|
||||
27
dist/ami/files/.bash_profile
vendored
27
dist/ami/files/.bash_profile
vendored
@@ -30,7 +30,21 @@ echo 'More documentation available at: '
|
||||
echo ' http://www.scylladb.com/doc/'
|
||||
echo
|
||||
|
||||
if [ "`systemctl is-active scylla-server`" = "active" ]; then
|
||||
. /etc/os-release
|
||||
if [ "$ID" = "ubuntu" ]; then
|
||||
if [ "`initctl status ssh|grep "running, process"`" != "" ]; then
|
||||
STARTED=1
|
||||
else
|
||||
STARTED=0
|
||||
fi
|
||||
else
|
||||
if [ "`systemctl is-active scylla-server`" = "active" ]; then
|
||||
STARTED=1
|
||||
else
|
||||
STARTED=0
|
||||
fi
|
||||
fi
|
||||
if [ $STARTED -eq 1 ]; then
|
||||
tput setaf 4
|
||||
tput bold
|
||||
echo " ScyllaDB is active."
|
||||
@@ -42,6 +56,13 @@ else
|
||||
echo " ScyllaDB is not started!"
|
||||
tput sgr0
|
||||
echo "Please wait for startup. To see status of ScyllaDB, run "
|
||||
echo " 'systemctl status scylla-server'"
|
||||
echo
|
||||
if [ "$ID" = "ubuntu" ]; then
|
||||
echo " 'initctl status scylla-server'"
|
||||
echo "and"
|
||||
echo " 'cat /var/log/upstart/scylla-server.log'"
|
||||
echo
|
||||
else
|
||||
echo " 'systemctl status scylla-server'"
|
||||
echo
|
||||
fi
|
||||
fi
|
||||
|
||||
2
dist/ami/files/scylla-ami
vendored
2
dist/ami/files/scylla-ami
vendored
Submodule dist/ami/files/scylla-ami updated: 84bcd0df6d...89e74362fd
14
dist/ami/scylla.json
vendored
14
dist/ami/scylla.json
vendored
@@ -8,10 +8,10 @@
|
||||
"security_group_id": "{{user `security_group_id`}}",
|
||||
"region": "{{user `region`}}",
|
||||
"associate_public_ip_address": "{{user `associate_public_ip_address`}}",
|
||||
"source_ami": "ami-f3102499",
|
||||
"source_ami": "{{user `source_ami`}}",
|
||||
"user_data_file": "user_data.txt",
|
||||
"instance_type": "{{user `instance_type`}}",
|
||||
"ssh_username": "centos",
|
||||
"ssh_username": "{{user `ssh_username`}}",
|
||||
"ssh_timeout": "5m",
|
||||
"ami_name": "{{user `ami_prefix`}}scylla_{{isotime | clean_ami_name}}",
|
||||
"enhanced_networking": true,
|
||||
@@ -62,17 +62,17 @@
|
||||
{
|
||||
"type": "file",
|
||||
"source": "files/",
|
||||
"destination": "/home/centos/"
|
||||
"destination": "/home/{{user `ssh_username`}}/"
|
||||
},
|
||||
{
|
||||
"type": "file",
|
||||
"source": "../../scripts/scylla_install_pkg",
|
||||
"destination": "/home/centos/scylla_install_pkg"
|
||||
"destination": "/home/{{user `ssh_username`}}/scylla_install_pkg"
|
||||
},
|
||||
{
|
||||
"type": "shell",
|
||||
"inline": [
|
||||
"sudo /home/centos/scylla-ami/scylla_install_ami {{ user `install_args` }}"
|
||||
"sudo /home/{{user `ssh_username`}}/scylla-ami/scylla_install_ami {{ user `install_args` }}"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -85,6 +85,8 @@
|
||||
"associate_public_ip_address": "",
|
||||
"instance_type": "",
|
||||
"install_args": "",
|
||||
"ami_prefix": ""
|
||||
"ami_prefix": "",
|
||||
"source_ami": "",
|
||||
"ssh_username": ""
|
||||
}
|
||||
}
|
||||
|
||||
31
dist/common/scripts/scylla_dev_mode_setup
vendored
Executable file
31
dist/common/scripts/scylla_dev_mode_setup
vendored
Executable file
@@ -0,0 +1,31 @@
|
||||
#!/bin/sh -e
|
||||
#
|
||||
# Copyright (C) 2015 ScyllaDB
|
||||
|
||||
print_usage() {
|
||||
echo "scylla_developer_mode_setup --developer-mode=[0|1]"
|
||||
echo " --developer-mode enable/disable developer mode"
|
||||
exit 1
|
||||
}
|
||||
|
||||
DEV_MODE=
|
||||
while [ $# -gt 0 ]; do
|
||||
case "$1" in
|
||||
"--developer-mode")
|
||||
DEV_MODE=$2
|
||||
shift 2
|
||||
;;
|
||||
*)
|
||||
print_usage
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [ "$DEV_MODE" = "" ]; then
|
||||
print_usage
|
||||
fi
|
||||
if [ "$DEV_MODE" != "0" ] && [ "$DEV_MODE" != "1" ]; then
|
||||
print_usage
|
||||
fi
|
||||
|
||||
echo "DEV_MODE=--developer-mode=$DEV_MODE" > /etc/scylla.d/dev-mode.conf
|
||||
73
dist/common/scripts/scylla_io_setup
vendored
73
dist/common/scripts/scylla_io_setup
vendored
@@ -1,31 +1,53 @@
|
||||
#!/bin/sh
|
||||
|
||||
is_ami() {
|
||||
if [ "`dmidecode --string system-version | grep \.amazon`" != "" ] && \
|
||||
[ "`curl http://169.254.169.254/latest/meta-data/ami-id | grep ami-`" != "" ]; then
|
||||
echo 1
|
||||
else
|
||||
echo 0
|
||||
fi
|
||||
print_usage() {
|
||||
echo "scylla_io_setup --ami"
|
||||
echo " --ami setup AMI instance"
|
||||
exit 1
|
||||
}
|
||||
|
||||
is_supported_instance_type() {
|
||||
TYPE=`curl http://169.254.169.254/latest/meta-data/instance-type|cut -d . -f 1`
|
||||
case $TYPE in
|
||||
"m3"|"c3"|"i2") echo 1;;
|
||||
*) echo 0;;
|
||||
AMI=0
|
||||
while [ $# -gt 0 ]; do
|
||||
case "$1" in
|
||||
"--ami")
|
||||
AMI=1
|
||||
shift 1
|
||||
;;
|
||||
*)
|
||||
print_usage
|
||||
;;
|
||||
esac
|
||||
}
|
||||
done
|
||||
|
||||
|
||||
is_developer_mode() {
|
||||
echo $SCYLLA_ARGS|egrep -c "\-\-developer-mode(\s+|=)1"
|
||||
cat /etc/scylla.d/dev-mode.conf|egrep -c "\-\-developer-mode(\s+|=)(1|true)"
|
||||
}
|
||||
|
||||
if [ ! -f /etc/scylla/io_configured ] && [ `is_developer_mode` -eq 0 ]; then
|
||||
if [ `is_ami` -eq 1 ] && [ `is_supported_instance_type` -eq 1 ]; then
|
||||
NR_CPU=`cat /proc/cpuinfo |grep processor|wc -l`
|
||||
NR_DISKS=`curl http://169.254.169.254/latest/meta-data/block-device-mapping/|grep ephemeral|wc -l`
|
||||
output_to_user()
|
||||
{
|
||||
echo "$1"
|
||||
logger -p user.err "$1"
|
||||
}
|
||||
|
||||
. /etc/os-release
|
||||
if [ "$NAME" = "Ubuntu" ]; then
|
||||
. /etc/default/scylla-server
|
||||
else
|
||||
. /etc/sysconfig/scylla-server
|
||||
fi
|
||||
|
||||
if [ `is_developer_mode` -eq 0 ]; then
|
||||
SMP=`echo $SCYLLA_ARGS|grep smp|sed -e "s/^.*smp\(\s\+\|=\)\([0-9]*\).*$/\2/"`
|
||||
CPUSET=`echo $SCYLLA_ARGS|grep cpuset|sed -e "s/^.*\(--cpuset\(\s\+\|=\)[0-9\-]*\).*$/\1/"`
|
||||
if [ $AMI -eq 1 ]; then
|
||||
NR_CPU=`cat /proc/cpuinfo |grep processor|wc -l`
|
||||
NR_DISKS=`lsblk --list --nodeps --noheadings | grep -v xvda | grep xvd | wc -l`
|
||||
TYPE=`curl http://169.254.169.254/latest/meta-data/instance-type|cut -d . -f 1`
|
||||
|
||||
if [ "$SMP" != "" ]; then
|
||||
NR_CPU=$SMP
|
||||
fi
|
||||
NR_SHARDS=$NR_CPU
|
||||
if [ $NR_CPU -ge 8 ] && [ "$SET_NIC" = "no" ]; then
|
||||
NR_SHARDS=$((NR_CPU - 1))
|
||||
@@ -39,17 +61,20 @@ if [ ! -f /etc/scylla/io_configured ] && [ `is_developer_mode` -eq 0 ]; then
|
||||
NR_IO_QUEUES=$(($NR_REQS / 4))
|
||||
fi
|
||||
|
||||
NR_IO_QUEUES=$((NR_IO_QUEUES>NR_SHARDS?NR_SHARDS:NR_IO_QUEUES))
|
||||
NR_REQS=$(($(($NR_REQS / $NR_IO_QUEUES)) * $NR_IO_QUEUES))
|
||||
if [ "$TYPE" = "i2" ]; then
|
||||
NR_REQS=$(($NR_REQS * 2))
|
||||
fi
|
||||
|
||||
echo "SEASTAR_IO=\"--num-io-queues $NR_IO_QUEUES --max-io-requests $NR_REQS\"" > /etc/scylla.d/io.conf
|
||||
else
|
||||
iotune --evaluation-directory /var/lib/scylla --format envfile --options-file /etc/scylla.d/io.conf
|
||||
iotune --evaluation-directory /var/lib/scylla --format envfile --options-file /etc/scylla.d/io.conf $CPUSET
|
||||
if [ $? -ne 0 ]; then
|
||||
logger -p user.err "/var/lib/scylla did not pass validation tests, it may not be on XFS and/or has limited disk space."
|
||||
logger -p user.err "This is a non-supported setup, and performance is expected to be very bad."
|
||||
logger -p user.err "For better performance, placing your data on XFS-formatted directories is required."
|
||||
logger -p user.err " To override this error, see the developer_mode configuration option."
|
||||
output_to_user "/var/lib/scylla did not pass validation tests, it may not be on XFS and/or has limited disk space."
|
||||
output_to_user "This is a non-supported setup, and performance is expected to be very bad."
|
||||
output_to_user "For better performance, placing your data on XFS-formatted directories is required."
|
||||
output_to_user " To override this error, see the developer_mode configuration option."
|
||||
fi
|
||||
fi
|
||||
touch /etc/scylla/io_configured
|
||||
fi
|
||||
|
||||
2
dist/common/scripts/scylla_raid_setup
vendored
2
dist/common/scripts/scylla_raid_setup
vendored
@@ -49,7 +49,7 @@ fi
|
||||
|
||||
. /etc/os-release
|
||||
if [ "$NAME" = "Ubuntu" ]; then
|
||||
apt-get -y install mdadm xfsprogs
|
||||
env DEBIAN_FRONTEND=noninteractive apt-get -y install mdadm xfsprogs
|
||||
else
|
||||
yum -y install mdadm xfsprogs
|
||||
fi
|
||||
|
||||
38
dist/common/scripts/scylla_setup
vendored
38
dist/common/scripts/scylla_setup
vendored
@@ -8,11 +8,12 @@ if [ "`id -u`" -ne 0 ]; then
|
||||
fi
|
||||
|
||||
print_usage() {
|
||||
echo "scylla_setup --disks /dev/hda,/dev/hdb... --nic eth0 --ntp-domain centos --ami --no-enable-service --no-selinux-setup --no-bootparam-setup --no-ntp-setup --no-raid-setup --no-coredump-setup --no-sysconfig-setup"
|
||||
echo "scylla_setup --disks /dev/hda,/dev/hdb... --nic eth0 --ntp-domain centos --ami --developer-mode --no-enable-service --no-selinux-setup --no-bootparam-setup --no-ntp-setup --no-raid-setup --no-coredump-setup --no-sysconfig-setup"
|
||||
echo " --disks specify disks for RAID"
|
||||
echo " --nic specify NIC"
|
||||
echo " --ntp-domain specify NTP domain"
|
||||
echo " --ami setup AMI instance"
|
||||
echo " --developer-mode enable developer mode"
|
||||
echo " --no-enable-service skip enabling service"
|
||||
echo " --no-selinux-setup skip selinux setup"
|
||||
echo " --no-bootparam-setup skip bootparam setup"
|
||||
@@ -20,6 +21,7 @@ print_usage() {
|
||||
echo " --no-raid-setup skip raid setup"
|
||||
echo " --no-coredump-setup skip coredump setup"
|
||||
echo " --no-sysconfig-setup skip sysconfig setup"
|
||||
echo " --no-io-setup skip IO configuration setup"
|
||||
exit 1
|
||||
}
|
||||
|
||||
@@ -40,6 +42,7 @@ interactive_ask_service() {
|
||||
}
|
||||
|
||||
AMI=0
|
||||
DEV_MODE=0
|
||||
ENABLE_SERVICE=1
|
||||
SELINUX_SETUP=1
|
||||
BOOTPARAM_SETUP=1
|
||||
@@ -47,6 +50,7 @@ NTP_SETUP=1
|
||||
RAID_SETUP=1
|
||||
COREDUMP_SETUP=1
|
||||
SYSCONFIG_SETUP=1
|
||||
IO_SETUP=1
|
||||
|
||||
if [ $# -ne 0 ]; then
|
||||
INTERACTIVE=0
|
||||
@@ -72,6 +76,10 @@ while [ $# -gt 0 ]; do
|
||||
AMI=1
|
||||
shift 1
|
||||
;;
|
||||
"--developer-mode")
|
||||
DEV_MODE=1
|
||||
shift 1
|
||||
;;
|
||||
"--no-enable-service")
|
||||
ENABLE_SERVICE=0
|
||||
shift 1
|
||||
@@ -100,6 +108,10 @@ while [ $# -gt 0 ]; do
|
||||
SYSCONFIG_SETUP=0
|
||||
shift 1
|
||||
;;
|
||||
"--no-io-setup")
|
||||
IO_SETUP=0
|
||||
shift 1
|
||||
;;
|
||||
"-h" | "--help")
|
||||
print_usage
|
||||
shift 1
|
||||
@@ -122,9 +134,9 @@ if [ $INTERACTIVE -eq 1 ]; then
|
||||
fi
|
||||
if [ $ENABLE_SERVICE -eq 1 ]; then
|
||||
if [ "$ID" = "fedora" ] || [ "$ID" = "centos" ]; then
|
||||
systemctl enable scylla-io-setup.service
|
||||
systemctl enable scylla-server.service
|
||||
systemctl enable scylla-jmx.service
|
||||
systemctl enable collectd.service
|
||||
fi
|
||||
fi
|
||||
|
||||
@@ -162,21 +174,21 @@ if [ $INTERACTIVE -eq 1 ]; then
|
||||
if [ $RAID_SETUP -eq 1 ]; then
|
||||
echo "Please select disks from following list: "
|
||||
while true; do
|
||||
lsblk -d -i -n -p -r|awk '{print $1}'|sed -e ':loop;N;$!b loop;s/\n/ /g'
|
||||
lsblk -d -i -n -r|awk '{print $1}'|sed -e ':loop;N;$!b loop;s/\n/ /g'
|
||||
echo "type 'done' to finish selection. selected: $DISKS"
|
||||
echo -n "> "
|
||||
read dsk
|
||||
if [ "$dsk" = "done" ]; then
|
||||
break
|
||||
fi
|
||||
if [ -e $dsk ]; then
|
||||
if [ -e /dev/$dsk ]; then
|
||||
if [ "$DISKS" = "" ]; then
|
||||
DISKS=$dsk
|
||||
DISKS=/dev/$dsk
|
||||
else
|
||||
DISKS="$DISKS,$dsk"
|
||||
DISKS="$DISKS,/dev/$dsk"
|
||||
fi
|
||||
else
|
||||
echo "$dsk not found"
|
||||
echo "/dev/$dsk not found"
|
||||
fi
|
||||
done
|
||||
fi
|
||||
@@ -212,6 +224,18 @@ if [ $INTERACTIVE -eq 1 ]; then
|
||||
done
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ $INTERACTIVE -eq 1 ]; then
|
||||
interactive_ask_service "Do you want to setup IO configuration?" &&:
|
||||
IO_SETUP=$?
|
||||
fi
|
||||
if [ $IO_SETUP -eq 1 ]; then
|
||||
/usr/lib/scylla/scylla_io_setup
|
||||
fi
|
||||
|
||||
if [ $SYSCONFIG_SETUP -eq 1 ]; then
|
||||
/usr/lib/scylla/scylla_sysconfig_setup --nic $NIC
|
||||
fi
|
||||
if [ $DEV_MODE -eq 1 ]; then
|
||||
/usr/lib/scylla/scylla_dev_mode_setup --developer-mode 1
|
||||
fi
|
||||
|
||||
2
dist/common/scripts/scylla_sysconfig_setup
vendored
2
dist/common/scripts/scylla_sysconfig_setup
vendored
@@ -76,7 +76,7 @@ echo Setting parameters on $SYSCONFIG/scylla-server
|
||||
ETHDRV=`/usr/lib/scylla/dpdk_nic_bind.py --status | grep if=$NIC | sed -e "s/^.*drv=//" -e "s/ .*$//"`
|
||||
ETHPCIID=`/usr/lib/scylla/dpdk_nic_bind.py --status | grep if=$NIC | awk '{print $1}'`
|
||||
NR_CPU=`cat /proc/cpuinfo |grep processor|wc -l`
|
||||
if [ $NR_CPU -ge 8 ] && [ "$SET_NIC" = "no" ]; then
|
||||
if [ "$AMI" = "yes" ] && [ $NR_CPU -ge 8 ] && [ "$SET_NIC" = "no" ]; then
|
||||
NR=$((NR_CPU - 1))
|
||||
SET_NIC="yes"
|
||||
SCYLLA_ARGS="$SCYLLA_ARGS --cpuset 1-$NR --smp $NR"
|
||||
|
||||
4
dist/common/scylla.d/dev-mode.conf
vendored
Normal file
4
dist/common/scylla.d/dev-mode.conf
vendored
Normal file
@@ -0,0 +1,4 @@
|
||||
# DO NO EDIT
|
||||
# This file should be automatically configure by scylla_dev_mode_setup
|
||||
#
|
||||
# DEV_MODE=--developer-mode=0
|
||||
2
dist/common/scylla.d/io.conf
vendored
2
dist/common/scylla.d/io.conf
vendored
@@ -1,4 +1,4 @@
|
||||
# DO NO EDIT
|
||||
# This file should be automatically configure by scylla-io-setup.service
|
||||
# This file should be automatically configure by scylla_io_setup
|
||||
#
|
||||
# SEASTAR_IO="--max-io-requests=1 --num-io-queues=1"
|
||||
|
||||
2
dist/common/sudoers.d/scylla
vendored
2
dist/common/sudoers.d/scylla
vendored
@@ -1 +1 @@
|
||||
scylla ALL=(ALL) NOPASSWD:SETENV: /usr/lib/scylla/scylla_prepare,/usr/lib/scylla/scylla_stop,/usr/lib/scylla/scylla_io_setup
|
||||
scylla ALL=(ALL) NOPASSWD:SETENV: /usr/lib/scylla/scylla_prepare,/usr/lib/scylla/scylla_stop,/usr/lib/scylla/scylla_io_setup,/usr/lib/scylla/scylla-ami/scylla_ami_setup
|
||||
|
||||
4
dist/redhat/scylla-server.spec.in
vendored
4
dist/redhat/scylla-server.spec.in
vendored
@@ -113,11 +113,9 @@ if [ -f /etc/systemd/coredump.conf ];then
|
||||
/usr/lib/scylla/scylla_coredump_setup
|
||||
fi
|
||||
%systemd_post scylla-server.service
|
||||
%systemd_post scylla-io-setup.service
|
||||
|
||||
%preun
|
||||
%systemd_preun scylla-server.service
|
||||
%systemd_preun scylla-io-setup.service
|
||||
|
||||
%postun
|
||||
%systemd_postun
|
||||
@@ -151,7 +149,6 @@ rm -rf $RPM_BUILD_ROOT
|
||||
%{_docdir}/scylla/ORIGIN
|
||||
%{_docdir}/scylla/licenses/
|
||||
%{_unitdir}/scylla-server.service
|
||||
%{_unitdir}/scylla-io-setup.service
|
||||
%{_bindir}/scylla
|
||||
%{_bindir}/iotune
|
||||
%{_bindir}/scyllatop
|
||||
@@ -165,6 +162,7 @@ rm -rf $RPM_BUILD_ROOT
|
||||
%{_prefix}/lib/scylla/scylla_ntp_setup
|
||||
%{_prefix}/lib/scylla/scylla_selinux_setup
|
||||
%{_prefix}/lib/scylla/scylla_io_setup
|
||||
%{_prefix}/lib/scylla/scylla_dev_mode_setup
|
||||
%{_prefix}/lib/scylla/posix_net_conf.sh
|
||||
%{_prefix}/lib/scylla/dpdk_nic_bind.py
|
||||
%{_prefix}/lib/scylla/dpdk_nic_bind.pyc
|
||||
|
||||
10
dist/redhat/systemd/scylla-io-setup.service
vendored
10
dist/redhat/systemd/scylla-io-setup.service
vendored
@@ -1,10 +0,0 @@
|
||||
[Unit]
|
||||
Description=Scylla IO Setup
|
||||
After=network.target
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
EnvironmentFile=/etc/sysconfig/scylla-server
|
||||
ExecStart=/usr/lib/scylla/scylla_io_setup
|
||||
RemainAfterExit=yes
|
||||
TimeoutStartSec=1800
|
||||
4
dist/redhat/systemd/scylla-server.service
vendored
4
dist/redhat/systemd/scylla-server.service
vendored
@@ -1,7 +1,5 @@
|
||||
[Unit]
|
||||
Description=Scylla Server
|
||||
After=scylla-io-setup.service
|
||||
Requires=scylla-io-setup.service
|
||||
|
||||
[Service]
|
||||
Type=notify
|
||||
@@ -14,7 +12,7 @@ Environment="HOME=/var/lib/scylla"
|
||||
EnvironmentFile=/etc/sysconfig/scylla-server
|
||||
EnvironmentFile=/etc/scylla.d/*.conf
|
||||
ExecStartPre=/usr/bin/sudo -E /usr/lib/scylla/scylla_prepare
|
||||
ExecStart=/usr/bin/scylla $SCYLLA_ARGS $SEASTAR_IO
|
||||
ExecStart=/usr/bin/scylla $SCYLLA_ARGS $SEASTAR_IO $DEV_MODE
|
||||
ExecStopPost=/usr/bin/sudo -E /usr/lib/scylla/scylla_stop
|
||||
TimeoutStartSec=900
|
||||
KillMode=process
|
||||
|
||||
2
dist/ubuntu/build_deb.sh
vendored
2
dist/ubuntu/build_deb.sh
vendored
@@ -32,7 +32,7 @@ if [ `grep -c $RELEASE dist/ubuntu/supported_release` -lt 1 ]; then
|
||||
fi
|
||||
|
||||
VERSION=$(./SCYLLA-VERSION-GEN)
|
||||
SCYLLA_VERSION=$(cat build/SCYLLA-VERSION-FILE)
|
||||
SCYLLA_VERSION=$(cat build/SCYLLA-VERSION-FILE | sed 's/\.rc/~rc/')
|
||||
SCYLLA_RELEASE=$(cat build/SCYLLA-RELEASE-FILE)
|
||||
echo $VERSION > version
|
||||
./scripts/git-archive-all --extra version --force-submodules --prefix scylla-server ../scylla-server_$SCYLLA_VERSION-$SCYLLA_RELEASE.orig.tar.gz
|
||||
|
||||
4
dist/ubuntu/debian/scylla-server.init
vendored
4
dist/ubuntu/debian/scylla-server.init
vendored
@@ -37,8 +37,10 @@ eval "`grep -v -e "^\s*#" -e "^$" /etc/scylla.d/*.conf|sed -e 's/^/export /'`"
|
||||
|
||||
do_start()
|
||||
{
|
||||
if [ "$AMI" = "yes" ]; then
|
||||
/usr/lib/scylla/scylla-ami/scylla_ami_setup
|
||||
fi
|
||||
/usr/lib/scylla/scylla_prepare
|
||||
/usr/lib/scylla/scylla_io_setup
|
||||
# Return
|
||||
# 0 if daemon has been started
|
||||
# 1 if daemon was already running
|
||||
|
||||
21
dist/ubuntu/debian/scylla-server.upstart
vendored
21
dist/ubuntu/debian/scylla-server.upstart
vendored
@@ -26,19 +26,30 @@ env HOME=/var/lib/scylla
|
||||
|
||||
pre-start script
|
||||
eval "`grep -v -e "^\s*#" -e "^$" /etc/default/scylla-server|sed -e 's/^/export /'`"
|
||||
eval "`grep -v -e "^\s*#" -e "^$" /etc/scylla.d/*.conf|sed -e 's/^/export /'`"
|
||||
. /etc/scylla.d/dev-mode.conf
|
||||
. /etc/scylla.d/io.conf
|
||||
export DEV_MODE
|
||||
export SEASTAR_IO
|
||||
if [ "$AMI" = "yes" ]; then
|
||||
sudo /usr/lib/scylla/scylla-ami/scylla_ami_setup
|
||||
fi
|
||||
sudo /usr/lib/scylla/scylla_prepare
|
||||
sudo /usr/lib/scylla/scylla_io_setup
|
||||
end script
|
||||
|
||||
script
|
||||
eval "`grep -v -e "^\s*#" -e "^$" /etc/default/scylla-server|sed -e 's/^/export /'`"
|
||||
eval "`grep -v -e "^\s*#" -e "^$" /etc/scylla.d/*.conf|sed -e 's/^/export /'`"
|
||||
exec /usr/bin/scylla $SCYLLA_ARGS $SEASTAR_IO
|
||||
. /etc/scylla.d/dev-mode.conf
|
||||
. /etc/scylla.d/io.conf
|
||||
export DEV_MODE
|
||||
export SEASTAR_IO
|
||||
exec /usr/bin/scylla $SCYLLA_ARGS $SEASTAR_IO $DEV_MODE
|
||||
end script
|
||||
|
||||
post-stop script
|
||||
eval "`grep -v -e "^\s*#" -e "^$" /etc/default/scylla-server|sed -e 's/^/export /'`"
|
||||
eval "`grep -v -e "^\s*#" -e "^$" /etc/scylla.d/*.conf|sed -e 's/^/export /'`"
|
||||
. /etc/scylla.d/dev-mode.conf
|
||||
. /etc/scylla.d/io.conf
|
||||
export DEV_MODE
|
||||
export SEASTAR_IO
|
||||
sudo /usr/lib/scylla/scylla_stop
|
||||
end script
|
||||
|
||||
5
dist/ubuntu/rules.in
vendored
5
dist/ubuntu/rules.in
vendored
@@ -35,7 +35,7 @@ override_dh_auto_install:
|
||||
cp $(CURDIR)/dist/common/collectd.d/scylla.conf $(COLLECTD)
|
||||
|
||||
mkdir -p $(SCYLLAD) && \
|
||||
cp $(CURDIR)/dist/common/scylla.d/io.conf $(SCYLLAD)
|
||||
cp $(CURDIR)/dist/common/scylla.d/*.conf $(SCYLLAD)
|
||||
|
||||
mkdir -p $(CONF) && \
|
||||
cp $(CURDIR)/conf/scylla.yaml $(CONF)
|
||||
@@ -72,6 +72,9 @@ override_dh_auto_install:
|
||||
mkdir -p $(CURDIR)/debian/scylla-server/var/lib/scylla/commitlog
|
||||
mkdir -p $(CURDIR)/debian/scylla-server/var/lib/scylla/coredump
|
||||
|
||||
override_dh_installinit:
|
||||
dh_installinit --no-start
|
||||
|
||||
override_dh_strip:
|
||||
dh_strip --dbg-package=scylla-server-dbg
|
||||
%:
|
||||
|
||||
@@ -62,7 +62,12 @@ static const std::map<application_state, sstring> application_state_names = {
|
||||
};
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, const application_state& m) {
|
||||
os << application_state_names.at(m);
|
||||
auto it = application_state_names.find(m);
|
||||
if (it != application_state_names.end()) {
|
||||
os << application_state_names.at(m);
|
||||
} else {
|
||||
os << "UNKNOWN";
|
||||
}
|
||||
return os;
|
||||
}
|
||||
|
||||
|
||||
@@ -20,7 +20,8 @@
|
||||
*/
|
||||
|
||||
namespace gms {
|
||||
enum class application_state:int {STATUS = 0,
|
||||
enum class application_state:int {
|
||||
STATUS = 0,
|
||||
LOAD,
|
||||
SCHEMA,
|
||||
DC,
|
||||
@@ -29,6 +30,7 @@ enum class application_state:int {STATUS = 0,
|
||||
REMOVAL_COORDINATOR,
|
||||
INTERNAL_IP,
|
||||
RPC_ADDRESS,
|
||||
X_11_PADDING,
|
||||
SEVERITY,
|
||||
NET_VERSION,
|
||||
HOST_ID,
|
||||
|
||||
24
main.cc
24
main.cc
@@ -293,9 +293,19 @@ int main(int ac, char** av) {
|
||||
sstring broadcast_rpc_address = cfg->broadcast_rpc_address();
|
||||
|
||||
if (!broadcast_address.empty()) {
|
||||
utils::fb_utilities::set_broadcast_address(broadcast_address);
|
||||
try {
|
||||
utils::fb_utilities::set_broadcast_address(broadcast_address);
|
||||
} catch (...) {
|
||||
startlog.error("Bad configuration: invalid 'broadcast_address': {}: {}", broadcast_address, std::current_exception());
|
||||
throw bad_configuration_error();
|
||||
}
|
||||
} else if (!listen_address.empty()) {
|
||||
utils::fb_utilities::set_broadcast_address(listen_address);
|
||||
try {
|
||||
utils::fb_utilities::set_broadcast_address(listen_address);
|
||||
} catch (...) {
|
||||
startlog.error("Bad configuration: invalid 'listen_address': {}: {}", listen_address, std::current_exception());
|
||||
throw bad_configuration_error();
|
||||
}
|
||||
} else {
|
||||
startlog.error("Bad configuration: neither listen_address nor broadcast_address are defined\n");
|
||||
throw bad_configuration_error();
|
||||
@@ -352,7 +362,6 @@ int main(int ac, char** av) {
|
||||
print("Scylla API server listening on %s:%s ...\n", api_address, api_port);
|
||||
supervisor_notify("initializing storage service");
|
||||
init_storage_service(db).get();
|
||||
api::set_server_storage_service(ctx).get();
|
||||
supervisor_notify("starting per-shard database core");
|
||||
// Note: changed from using a move here, because we want the config object intact.
|
||||
db.start(std::ref(*cfg)).get();
|
||||
@@ -422,14 +431,11 @@ int main(int ac, char** av) {
|
||||
, seed_provider
|
||||
, cluster_name
|
||||
, phi).get();
|
||||
api::set_server_gossip(ctx).get();
|
||||
supervisor_notify("starting messaging service");
|
||||
api::set_server_messaging_service(ctx).get();
|
||||
supervisor_notify("starting storage proxy");
|
||||
proxy.start(std::ref(db)).get();
|
||||
// #293 - do not stop anything
|
||||
// engine().at_exit([&proxy] { return proxy.stop(); });
|
||||
api::set_server_storage_proxy(ctx).get();
|
||||
supervisor_notify("starting migration manager");
|
||||
mm.start().get();
|
||||
// #293 - do not stop anything
|
||||
@@ -458,7 +464,6 @@ int main(int ac, char** av) {
|
||||
}
|
||||
return db.load_sstables(proxy);
|
||||
}).get();
|
||||
api::set_server_load_sstable(ctx).get();
|
||||
supervisor_notify("setting up system keyspace");
|
||||
db::system_keyspace::setup(db, qp).get();
|
||||
supervisor_notify("starting commit log");
|
||||
@@ -479,6 +484,11 @@ int main(int ac, char** av) {
|
||||
}
|
||||
}
|
||||
}
|
||||
api::set_server_storage_service(ctx).get();
|
||||
api::set_server_gossip(ctx).get();
|
||||
api::set_server_messaging_service(ctx).get();
|
||||
api::set_server_storage_proxy(ctx).get();
|
||||
api::set_server_load_sstable(ctx).get();
|
||||
supervisor_notify("initializing migration manager RPC verbs");
|
||||
service::get_migration_manager().invoke_on_all([] (auto& mm) {
|
||||
mm.init_messaging_service();
|
||||
|
||||
@@ -360,6 +360,7 @@ void messaging_service::cache_preferred_ip(gms::inet_address ep, gms::inet_addre
|
||||
}
|
||||
|
||||
shared_ptr<messaging_service::rpc_protocol_client_wrapper> messaging_service::get_rpc_client(messaging_verb verb, msg_addr id) {
|
||||
assert(!_stopping);
|
||||
auto idx = get_rpc_client_idx(verb);
|
||||
auto it = _clients[idx].find(id);
|
||||
|
||||
@@ -409,6 +410,13 @@ shared_ptr<messaging_service::rpc_protocol_client_wrapper> messaging_service::ge
|
||||
}
|
||||
|
||||
void messaging_service::remove_rpc_client_one(clients_map& clients, msg_addr id, bool dead_only) {
|
||||
if (_stopping) {
|
||||
// if messaging service is in a processed of been stopped no need to
|
||||
// stop and remove connection here since they are being stopped already
|
||||
// and we'll just interfere
|
||||
return;
|
||||
}
|
||||
|
||||
auto it = clients.find(id);
|
||||
if (it != clients.end() && (!dead_only || it->second.rpc_client->error())) {
|
||||
auto client = std::move(it->second.rpc_client);
|
||||
@@ -442,8 +450,12 @@ std::unique_ptr<messaging_service::rpc_protocol_wrapper>& messaging_service::rpc
|
||||
// Send a message for verb
|
||||
template <typename MsgIn, typename... MsgOut>
|
||||
auto send_message(messaging_service* ms, messaging_verb verb, msg_addr id, MsgOut&&... msg) {
|
||||
auto rpc_client_ptr = ms->get_rpc_client(verb, id);
|
||||
auto rpc_handler = ms->rpc()->make_client<MsgIn(MsgOut...)>(verb);
|
||||
if (ms->is_stopping()) {
|
||||
using futurator = futurize<std::result_of_t<decltype(rpc_handler)(rpc_protocol::client&, MsgOut...)>>;
|
||||
return futurator::make_exception_future(rpc::closed_error());
|
||||
}
|
||||
auto rpc_client_ptr = ms->get_rpc_client(verb, id);
|
||||
auto& rpc_client = *rpc_client_ptr;
|
||||
return rpc_handler(rpc_client, std::forward<MsgOut>(msg)...).then_wrapped([ms = ms->shared_from_this(), id, verb, rpc_client_ptr = std::move(rpc_client_ptr)] (auto&& f) {
|
||||
try {
|
||||
@@ -467,8 +479,12 @@ auto send_message(messaging_service* ms, messaging_verb verb, msg_addr id, MsgOu
|
||||
// TODO: Remove duplicated code in send_message
|
||||
template <typename MsgIn, typename Timeout, typename... MsgOut>
|
||||
auto send_message_timeout(messaging_service* ms, messaging_verb verb, msg_addr id, Timeout timeout, MsgOut&&... msg) {
|
||||
auto rpc_client_ptr = ms->get_rpc_client(verb, id);
|
||||
auto rpc_handler = ms->rpc()->make_client<MsgIn(MsgOut...)>(verb);
|
||||
if (ms->is_stopping()) {
|
||||
using futurator = futurize<std::result_of_t<decltype(rpc_handler)(rpc_protocol::client&, MsgOut...)>>;
|
||||
return futurator::make_exception_future(rpc::closed_error());
|
||||
}
|
||||
auto rpc_client_ptr = ms->get_rpc_client(verb, id);
|
||||
auto& rpc_client = *rpc_client_ptr;
|
||||
return rpc_handler(rpc_client, timeout, std::forward<MsgOut>(msg)...).then_wrapped([ms = ms->shared_from_this(), id, verb, rpc_client_ptr = std::move(rpc_client_ptr)] (auto&& f) {
|
||||
try {
|
||||
@@ -534,7 +550,7 @@ auto send_message_timeout_and_retry(messaging_service* ms, messaging_verb verb,
|
||||
throw;
|
||||
}
|
||||
});
|
||||
}).then([] (MsgInTuple result) {
|
||||
}).then([ms = ms->shared_from_this()] (MsgInTuple result) {
|
||||
return futurize<MsgIn>::from_tuple(std::move(result));
|
||||
});
|
||||
});
|
||||
|
||||
@@ -186,3 +186,7 @@ void mutation::apply(mutation&& m) {
|
||||
void mutation::apply(const mutation& m) {
|
||||
partition().apply(*schema(), m.partition(), *m.schema());
|
||||
}
|
||||
|
||||
mutation& mutation::operator=(const mutation& m) {
|
||||
return *this = mutation(m);
|
||||
}
|
||||
|
||||
@@ -60,9 +60,9 @@ public:
|
||||
mutation(const mutation& m)
|
||||
: _ptr(std::make_unique<data>(schema_ptr(m.schema()), dht::decorated_key(m.decorated_key()), m.partition()))
|
||||
{ }
|
||||
|
||||
mutation(mutation&&) = default;
|
||||
mutation& operator=(mutation&& x) = default;
|
||||
mutation& operator=(const mutation& m);
|
||||
|
||||
void set_static_cell(const column_definition& def, atomic_cell_or_collection&& value);
|
||||
void set_static_cell(const bytes& name, const data_value& value, api::timestamp_type timestamp, ttl_opt ttl = {});
|
||||
|
||||
@@ -20,12 +20,14 @@
|
||||
*/
|
||||
|
||||
#include <boost/range/adaptor/reversed.hpp>
|
||||
#include <seastar/util/defer.hh>
|
||||
#include "mutation_partition.hh"
|
||||
#include "mutation_partition_applier.hh"
|
||||
#include "converting_mutation_partition_applier.hh"
|
||||
#include "partition_builder.hh"
|
||||
#include "query-result-writer.hh"
|
||||
#include "atomic_cell_hash.hh"
|
||||
#include "reversibly_mergeable.hh"
|
||||
|
||||
template<bool reversed>
|
||||
struct reversal_traits;
|
||||
@@ -91,6 +93,109 @@ struct reversal_traits<true> {
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
//
|
||||
// apply_reversibly_intrusive_set() and revert_intrusive_set() implement ReversiblyMergeable
|
||||
// for a boost::intrusive_set<> container of ReversiblyMergeable entries.
|
||||
//
|
||||
// See reversibly_mergeable.hh
|
||||
//
|
||||
// Requirements:
|
||||
// - entry has distinct key and value states
|
||||
// - entries are ordered only by key in the container
|
||||
// - entry can have an empty value
|
||||
// - presence of an entry with an empty value doesn't affect equality of the containers
|
||||
// - E::empty() returns true iff the value is empty
|
||||
// - E(e.key()) creates an entry with empty value but the same key as that of e.
|
||||
//
|
||||
// Implementation of ReversiblyMergeable for the entry's value is provided via Apply and Revert functors.
|
||||
//
|
||||
// ReversiblyMergeable is constructed assuming the following properties of the 'apply' operation
|
||||
// on containers:
|
||||
//
|
||||
// apply([{k1, v1}], [{k1, v2}]) = [{k1, apply(v1, v2)}]
|
||||
// apply([{k1, v1}], [{k2, v2}]) = [{k1, v1}, {k2, v2}]
|
||||
//
|
||||
|
||||
// revert for apply_reversibly_intrusive_set()
|
||||
template<typename Container, typename Revert = default_reverter<typename Container::value_type>>
|
||||
void revert_intrusive_set_range(Container& dst, Container& src,
|
||||
typename Container::iterator start,
|
||||
typename Container::iterator end,
|
||||
Revert&& revert = Revert()) noexcept
|
||||
{
|
||||
using value_type = typename Container::value_type;
|
||||
auto deleter = current_deleter<value_type>();
|
||||
while (start != end) {
|
||||
auto& e = *start;
|
||||
// lower_bound() can allocate if linearization is required but it should have
|
||||
// been already performed by the lower_bound() invocation in apply_reversibly_intrusive_set() and
|
||||
// stored in the linearization context.
|
||||
auto i = dst.find(e);
|
||||
assert(i != dst.end());
|
||||
value_type& dst_e = *i;
|
||||
|
||||
if (e.empty()) {
|
||||
dst.erase(i);
|
||||
start = src.erase_and_dispose(start, deleter);
|
||||
start = src.insert_before(start, dst_e);
|
||||
} else {
|
||||
revert(dst_e, e);
|
||||
}
|
||||
|
||||
++start;
|
||||
}
|
||||
}
|
||||
|
||||
template<typename Container, typename Revert = default_reverter<typename Container::value_type>>
|
||||
void revert_intrusive_set(Container& dst, Container& src, Revert&& revert = Revert()) noexcept {
|
||||
revert_intrusive_set_range(dst, src, src.begin(), src.end(), std::forward<Revert>(revert));
|
||||
}
|
||||
|
||||
// Applies src onto dst. See comment above revert_intrusive_set_range() for more details.
|
||||
//
|
||||
// Returns an object which upon going out of scope, unless cancel() is called on it,
|
||||
// reverts the applicaiton by calling revert_intrusive_set(). The references to containers
|
||||
// must be stable as long as the returned object is live.
|
||||
template<typename Container,
|
||||
typename Apply = default_reversible_applier<typename Container::value_type>,
|
||||
typename Revert = default_reverter<typename Container::value_type>>
|
||||
auto apply_reversibly_intrusive_set(Container& dst, Container& src, Apply&& apply = Apply(), Revert&& revert = Revert()) {
|
||||
using value_type = typename Container::value_type;
|
||||
auto src_i = src.begin();
|
||||
try {
|
||||
while (src_i != src.end()) {
|
||||
value_type& src_e = *src_i;
|
||||
|
||||
// neutral entries will be given special meaning for the purpose of revert, so
|
||||
// get rid of empty rows from the input as if they were not there. This doesn't change
|
||||
// the value of src.
|
||||
if (src_e.empty()) {
|
||||
src_i = src.erase_and_dispose(src_i, current_deleter<value_type>());
|
||||
continue;
|
||||
}
|
||||
|
||||
auto i = dst.lower_bound(src_e);
|
||||
if (i == dst.end() || dst.key_comp()(src_e, *i)) {
|
||||
// Construct neutral entry which will represent missing dst entry for revert.
|
||||
value_type* empty_e = current_allocator().construct<value_type>(src_e.key());
|
||||
[&] () noexcept {
|
||||
src_i = src.erase(src_i);
|
||||
src_i = src.insert_before(src_i, *empty_e);
|
||||
dst.insert_before(i, src_e);
|
||||
}();
|
||||
} else {
|
||||
apply(*i, src_e);
|
||||
}
|
||||
++src_i;
|
||||
}
|
||||
return defer([&dst, &src, revert] { revert_intrusive_set(dst, src, revert); });
|
||||
} catch (...) {
|
||||
revert_intrusive_set_range(dst, src, src.begin(), src_i, revert);
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
mutation_partition::mutation_partition(const mutation_partition& x)
|
||||
: _tombstone(x._tombstone)
|
||||
, _static_row(x._static_row)
|
||||
@@ -134,29 +239,12 @@ mutation_partition::apply(const schema& s, const mutation_partition& p, const sc
|
||||
if (s.version() != p_schema.version()) {
|
||||
auto p2 = p;
|
||||
p2.upgrade(p_schema, s);
|
||||
apply(s, std::move(p2), s);
|
||||
apply(s, std::move(p2));
|
||||
return;
|
||||
}
|
||||
|
||||
_tombstone.apply(p._tombstone);
|
||||
|
||||
for (auto&& e : p._row_tombstones) {
|
||||
apply_row_tombstone(s, e.prefix(), e.t());
|
||||
}
|
||||
|
||||
_static_row.merge(s, column_kind::static_column, p._static_row);
|
||||
|
||||
for (auto&& entry : p._rows) {
|
||||
auto i = _rows.find(entry);
|
||||
if (i == _rows.end()) {
|
||||
auto e = current_allocator().construct<rows_entry>(entry);
|
||||
_rows.insert(i, *e);
|
||||
} else {
|
||||
i->row().apply(entry.row().deleted_at());
|
||||
i->row().apply(entry.row().marker());
|
||||
i->row().cells().merge(s, column_kind::regular_column, entry.row().cells());
|
||||
}
|
||||
}
|
||||
mutation_partition tmp(p);
|
||||
apply(s, std::move(tmp));
|
||||
}
|
||||
|
||||
void
|
||||
@@ -167,42 +255,42 @@ mutation_partition::apply(const schema& s, mutation_partition&& p, const schema&
|
||||
return;
|
||||
}
|
||||
|
||||
_tombstone.apply(p._tombstone);
|
||||
apply(s, std::move(p));
|
||||
}
|
||||
|
||||
p._row_tombstones.clear_and_dispose([this, &s] (row_tombstones_entry* e) {
|
||||
apply_row_tombstone(s, e);
|
||||
void
|
||||
mutation_partition::apply(const schema& s, mutation_partition&& p) {
|
||||
auto revert_row_tombstones = apply_reversibly_intrusive_set(_row_tombstones, p._row_tombstones);
|
||||
|
||||
_static_row.apply_reversibly(s, column_kind::static_column, p._static_row);
|
||||
auto revert_static_row = defer([&] {
|
||||
_static_row.revert(s, column_kind::static_column, p._static_row);
|
||||
});
|
||||
|
||||
_static_row.merge(s, column_kind::static_column, std::move(p._static_row));
|
||||
auto revert_rows = apply_reversibly_intrusive_set(_rows, p._rows,
|
||||
[&s] (rows_entry& dst, rows_entry& src) { dst.apply_reversibly(s, src); },
|
||||
[&s] (rows_entry& dst, rows_entry& src) noexcept { dst.revert(s, src); });
|
||||
|
||||
auto p_i = p._rows.begin();
|
||||
auto p_end = p._rows.end();
|
||||
while (p_i != p_end) {
|
||||
rows_entry& entry = *p_i;
|
||||
auto i = _rows.find(entry);
|
||||
if (i == _rows.end()) {
|
||||
p_i = p._rows.erase(p_i);
|
||||
_rows.insert(i, entry);
|
||||
} else {
|
||||
i->row().apply(entry.row().deleted_at());
|
||||
i->row().apply(entry.row().marker());
|
||||
i->row().cells().merge(s, column_kind::regular_column, std::move(entry.row().cells()));
|
||||
p_i = p._rows.erase_and_dispose(p_i, current_deleter<rows_entry>());
|
||||
}
|
||||
}
|
||||
_tombstone.apply(p._tombstone); // noexcept
|
||||
|
||||
revert_rows.cancel();
|
||||
revert_row_tombstones.cancel();
|
||||
revert_static_row.cancel();
|
||||
}
|
||||
|
||||
void
|
||||
mutation_partition::apply(const schema& s, mutation_partition_view p, const schema& p_schema) {
|
||||
if (p_schema.version() == s.version()) {
|
||||
mutation_partition_applier applier(s, *this);
|
||||
p.accept(s, applier);
|
||||
mutation_partition p2(*this, copy_comparators_only{});
|
||||
partition_builder b(s, p2);
|
||||
p.accept(s, b);
|
||||
apply(s, std::move(p2));
|
||||
} else {
|
||||
mutation_partition p2(*this, copy_comparators_only{});
|
||||
partition_builder b(p_schema, p2);
|
||||
p.accept(p_schema, b);
|
||||
p2.upgrade(p_schema, s);
|
||||
apply(s, std::move(p2), s);
|
||||
apply(s, std::move(p2));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -667,7 +755,7 @@ operator<<(std::ostream& os, const mutation_partition& mp) {
|
||||
constexpr gc_clock::duration row_marker::no_ttl;
|
||||
constexpr gc_clock::duration row_marker::dead;
|
||||
|
||||
int compare_row_marker_for_merge(const row_marker& left, const row_marker& right) {
|
||||
int compare_row_marker_for_merge(const row_marker& left, const row_marker& right) noexcept {
|
||||
if (left.timestamp() != right.timestamp()) {
|
||||
return left.timestamp() > right.timestamp() ? 1 : -1;
|
||||
}
|
||||
@@ -703,6 +791,18 @@ deletable_row::equal(column_kind kind, const schema& s, const deletable_row& oth
|
||||
return _cells.equal(kind, s, other._cells, other_schema);
|
||||
}
|
||||
|
||||
void deletable_row::apply_reversibly(const schema& s, deletable_row& src) {
|
||||
_cells.apply_reversibly(s, column_kind::regular_column, src._cells);
|
||||
_deleted_at.apply_reversibly(src._deleted_at); // noexcept
|
||||
_marker.apply_reversibly(src._marker); // noexcept
|
||||
}
|
||||
|
||||
void deletable_row::revert(const schema& s, deletable_row& src) {
|
||||
_cells.revert(s, column_kind::regular_column, src._cells);
|
||||
_deleted_at.revert(src._deleted_at);
|
||||
_marker.revert(src._marker);
|
||||
}
|
||||
|
||||
bool
|
||||
rows_entry::equal(const schema& s, const rows_entry& other) const {
|
||||
return equal(s, other, s);
|
||||
@@ -747,42 +847,123 @@ bool mutation_partition::equal(const schema& this_schema, const mutation_partiti
|
||||
}
|
||||
|
||||
void
|
||||
merge_column(const column_definition& def,
|
||||
atomic_cell_or_collection& old,
|
||||
atomic_cell_or_collection&& neww) {
|
||||
apply_reversibly(const column_definition& def, atomic_cell_or_collection& dst, atomic_cell_or_collection& src) {
|
||||
// Must be run via with_linearized_managed_bytes() context, but assume it is
|
||||
// provided via an upper layer
|
||||
if (def.is_atomic()) {
|
||||
if (compare_atomic_cell_for_merge(old.as_atomic_cell(), neww.as_atomic_cell()) < 0) {
|
||||
old = std::move(neww);
|
||||
auto&& src_ac = src.as_atomic_cell_ref();
|
||||
if (compare_atomic_cell_for_merge(dst.as_atomic_cell(), src.as_atomic_cell()) < 0) {
|
||||
std::swap(dst, src);
|
||||
src_ac.set_revert(true);
|
||||
} else {
|
||||
src_ac.set_revert(false);
|
||||
}
|
||||
} else {
|
||||
auto ct = static_pointer_cast<const collection_type_impl>(def.type);
|
||||
old = ct->merge(old.as_collection_mutation(), neww.as_collection_mutation());
|
||||
src = ct->merge(dst.as_collection_mutation(), src.as_collection_mutation());
|
||||
std::swap(dst, src);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
revert(const column_definition& def, atomic_cell_or_collection& dst, atomic_cell_or_collection& src) noexcept {
|
||||
static_assert(std::is_nothrow_move_constructible<atomic_cell_or_collection>::value
|
||||
&& std::is_nothrow_move_assignable<atomic_cell_or_collection>::value,
|
||||
"for std::swap() to be noexcept");
|
||||
if (def.is_atomic()) {
|
||||
auto&& ac = src.as_atomic_cell_ref();
|
||||
if (ac.is_revert_set()) {
|
||||
ac.set_revert(false);
|
||||
std::swap(dst, src);
|
||||
}
|
||||
} else {
|
||||
std::swap(dst, src);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
row::apply(const column_definition& column, const atomic_cell_or_collection& value) {
|
||||
// FIXME: Optimize
|
||||
atomic_cell_or_collection tmp(value);
|
||||
apply(column, std::move(tmp));
|
||||
}
|
||||
|
||||
void
|
||||
row::apply(const column_definition& column, atomic_cell_or_collection&& value) {
|
||||
apply_reversibly(column, value);
|
||||
}
|
||||
|
||||
template<typename Func, typename Rollback>
|
||||
void row::for_each_cell(Func&& func, Rollback&& rollback) {
|
||||
static_assert(noexcept(rollback(std::declval<column_id>(), std::declval<atomic_cell_or_collection&>())),
|
||||
"rollback must be noexcept");
|
||||
|
||||
if (_type == storage_type::vector) {
|
||||
unsigned i = 0;
|
||||
try {
|
||||
for (; i < _storage.vector.v.size(); i++) {
|
||||
if (_storage.vector.present.test(i)) {
|
||||
func(i, _storage.vector.v[i]);
|
||||
}
|
||||
}
|
||||
} catch (...) {
|
||||
while (i) {
|
||||
--i;
|
||||
if (_storage.vector.present.test(i)) {
|
||||
rollback(i, _storage.vector.v[i]);
|
||||
}
|
||||
}
|
||||
throw;
|
||||
}
|
||||
} else {
|
||||
auto i = _storage.set.begin();
|
||||
try {
|
||||
while (i != _storage.set.end()) {
|
||||
func(i->id(), i->cell());
|
||||
++i;
|
||||
}
|
||||
} catch (...) {
|
||||
while (i != _storage.set.begin()) {
|
||||
--i;
|
||||
rollback(i->id(), i->cell());
|
||||
}
|
||||
throw;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<typename Func>
|
||||
void row::for_each_cell(Func&& func) {
|
||||
if (_type == storage_type::vector) {
|
||||
for (auto i : bitsets::for_each_set(_storage.vector.present)) {
|
||||
func(i, _storage.vector.v[i]);
|
||||
}
|
||||
} else {
|
||||
for (auto& cell : _storage.set) {
|
||||
func(cell.id(), cell.cell());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
row::apply_reversibly(const column_definition& column, atomic_cell_or_collection& value) {
|
||||
static_assert(std::is_nothrow_move_constructible<atomic_cell_or_collection>::value
|
||||
&& std::is_nothrow_move_assignable<atomic_cell_or_collection>::value,
|
||||
"noexcept required for atomicity");
|
||||
|
||||
// our mutations are not yet immutable
|
||||
auto id = column.id;
|
||||
if (_type == storage_type::vector && id < max_vector_size) {
|
||||
if (id >= _storage.vector.size()) {
|
||||
_storage.vector.resize(id);
|
||||
_storage.vector.emplace_back(std::move(value));
|
||||
if (id >= _storage.vector.v.size()) {
|
||||
_storage.vector.v.resize(id);
|
||||
_storage.vector.v.emplace_back(std::move(value));
|
||||
_storage.vector.present.set(id);
|
||||
_size++;
|
||||
} else if (!bool(_storage.vector[id])) {
|
||||
_storage.vector[id] = std::move(value);
|
||||
} else if (!bool(_storage.vector.v[id])) {
|
||||
_storage.vector.v[id] = std::move(value);
|
||||
_storage.vector.present.set(id);
|
||||
_size++;
|
||||
} else {
|
||||
merge_column(column, _storage.vector[id], std::move(value));
|
||||
::apply_reversibly(column, _storage.vector.v[id], value);
|
||||
}
|
||||
} else {
|
||||
if (_type == storage_type::vector) {
|
||||
@@ -790,11 +971,37 @@ row::apply(const column_definition& column, atomic_cell_or_collection&& value) {
|
||||
}
|
||||
auto i = _storage.set.lower_bound(id, cell_entry::compare());
|
||||
if (i == _storage.set.end() || i->id() != id) {
|
||||
auto e = current_allocator().construct<cell_entry>(id, std::move(value));
|
||||
cell_entry* e = current_allocator().construct<cell_entry>(id);
|
||||
std::swap(e->_cell, value);
|
||||
_storage.set.insert(i, *e);
|
||||
_size++;
|
||||
} else {
|
||||
merge_column(column, i->cell(), std::move(value));
|
||||
::apply_reversibly(column, i->cell(), value);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
row::revert(const column_definition& column, atomic_cell_or_collection& src) noexcept {
|
||||
auto id = column.id;
|
||||
if (_type == storage_type::vector) {
|
||||
auto& dst = _storage.vector.v[id];
|
||||
if (!src) {
|
||||
std::swap(dst, src);
|
||||
_storage.vector.present.reset(id);
|
||||
--_size;
|
||||
} else {
|
||||
::revert(column, dst, src);
|
||||
}
|
||||
} else {
|
||||
auto i = _storage.set.find(id, cell_entry::compare());
|
||||
auto& dst = i->cell();
|
||||
if (!src) {
|
||||
std::swap(dst, src);
|
||||
_storage.set.erase_and_dispose(i, current_deleter<cell_entry>());
|
||||
--_size;
|
||||
} else {
|
||||
::revert(column, dst, src);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -802,8 +1009,9 @@ row::apply(const column_definition& column, atomic_cell_or_collection&& value) {
|
||||
void
|
||||
row::append_cell(column_id id, atomic_cell_or_collection value) {
|
||||
if (_type == storage_type::vector && id < max_vector_size) {
|
||||
_storage.vector.resize(id);
|
||||
_storage.vector.emplace_back(std::move(value));
|
||||
_storage.vector.v.resize(id);
|
||||
_storage.vector.v.emplace_back(std::move(value));
|
||||
_storage.vector.present.set(id);
|
||||
} else {
|
||||
if (_type == storage_type::vector) {
|
||||
vector_to_set();
|
||||
@@ -817,10 +1025,10 @@ row::append_cell(column_id id, atomic_cell_or_collection value) {
|
||||
const atomic_cell_or_collection*
|
||||
row::find_cell(column_id id) const {
|
||||
if (_type == storage_type::vector) {
|
||||
if (id >= _storage.vector.size() || !bool(_storage.vector[id])) {
|
||||
if (id >= _storage.vector.v.size() || !_storage.vector.present.test(id)) {
|
||||
return nullptr;
|
||||
}
|
||||
return &_storage.vector[id];
|
||||
return &_storage.vector.v[id];
|
||||
} else {
|
||||
auto i = _storage.set.find(id, cell_entry::compare());
|
||||
if (i == _storage.set.end()) {
|
||||
@@ -1034,7 +1242,7 @@ row::row(const row& o)
|
||||
, _size(o._size)
|
||||
{
|
||||
if (_type == storage_type::vector) {
|
||||
new (&_storage.vector) vector_type(o._storage.vector);
|
||||
new (&_storage.vector) vector_storage(o._storage.vector);
|
||||
} else {
|
||||
auto cloner = [] (const auto& x) {
|
||||
return current_allocator().construct<std::remove_const_t<std::remove_reference_t<decltype(x)>>>(x);
|
||||
@@ -1051,14 +1259,14 @@ row::row(const row& o)
|
||||
|
||||
row::~row() {
|
||||
if (_type == storage_type::vector) {
|
||||
_storage.vector.~vector_type();
|
||||
_storage.vector.~vector_storage();
|
||||
} else {
|
||||
_storage.set.clear_and_dispose(current_deleter<cell_entry>());
|
||||
_storage.set.~map_type();
|
||||
}
|
||||
}
|
||||
|
||||
row::cell_entry::cell_entry(const cell_entry& o) noexcept
|
||||
row::cell_entry::cell_entry(const cell_entry& o)
|
||||
: _id(o._id)
|
||||
, _cell(o._cell)
|
||||
{ }
|
||||
@@ -1085,15 +1293,20 @@ void row::vector_to_set()
|
||||
{
|
||||
assert(_type == storage_type::vector);
|
||||
map_type set;
|
||||
for (unsigned i = 0; i < _storage.vector.size(); i++) {
|
||||
auto& c = _storage.vector[i];
|
||||
if (!bool(c)) {
|
||||
continue;
|
||||
}
|
||||
try {
|
||||
for (auto i : bitsets::for_each_set(_storage.vector.present)) {
|
||||
auto& c = _storage.vector.v[i];
|
||||
auto e = current_allocator().construct<cell_entry>(i, std::move(c));
|
||||
set.insert(set.end(), *e);
|
||||
}
|
||||
_storage.vector.~vector_type();
|
||||
} catch (...) {
|
||||
set.clear_and_dispose([this, del = current_deleter<cell_entry>()] (cell_entry* ce) noexcept {
|
||||
_storage.vector.v[ce->id()] = std::move(ce->cell());
|
||||
del(ce);
|
||||
});
|
||||
throw;
|
||||
}
|
||||
_storage.vector.~vector_storage();
|
||||
new (&_storage.set) map_type(std::move(set));
|
||||
_type = storage_type::set;
|
||||
}
|
||||
@@ -1104,7 +1317,7 @@ void row::reserve(column_id last_column)
|
||||
if (last_column >= max_vector_size) {
|
||||
vector_to_set();
|
||||
} else {
|
||||
_storage.vector.reserve(last_column);
|
||||
_storage.vector.v.reserve(last_column);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1157,13 +1370,13 @@ bool row::equal(column_kind kind, const schema& this_schema, const row& other, c
|
||||
}
|
||||
|
||||
row::row() {
|
||||
new (&_storage.vector) vector_type;
|
||||
new (&_storage.vector) vector_storage;
|
||||
}
|
||||
|
||||
row::row(row&& other)
|
||||
: _type(other._type), _size(other._size) {
|
||||
if (_type == storage_type::vector) {
|
||||
new (&_storage.vector) vector_type(std::move(other._storage.vector));
|
||||
new (&_storage.vector) vector_storage(std::move(other._storage.vector));
|
||||
} else {
|
||||
new (&_storage.set) map_type(std::move(other._storage.set));
|
||||
}
|
||||
@@ -1177,27 +1390,25 @@ row& row::operator=(row&& other) {
|
||||
return *this;
|
||||
}
|
||||
|
||||
void row::merge(const schema& s, column_kind kind, const row& other) {
|
||||
void row::apply_reversibly(const schema& s, column_kind kind, row& other) {
|
||||
if (other.empty()) {
|
||||
return;
|
||||
}
|
||||
if (other._type == storage_type::vector) {
|
||||
reserve(other._storage.vector.size() - 1);
|
||||
reserve(other._storage.vector.v.size() - 1);
|
||||
} else {
|
||||
reserve(other._storage.set.rbegin()->id());
|
||||
}
|
||||
other.for_each_cell([&] (column_id id, const atomic_cell_or_collection& cell) {
|
||||
apply(s.column_at(kind, id), cell);
|
||||
other.for_each_cell([&] (column_id id, atomic_cell_or_collection& cell) {
|
||||
apply_reversibly(s.column_at(kind, id), cell);
|
||||
}, [&] (column_id id, atomic_cell_or_collection& cell) noexcept {
|
||||
revert(s.column_at(kind, id), cell);
|
||||
});
|
||||
}
|
||||
|
||||
void row::merge(const schema& s, column_kind kind, row&& other) {
|
||||
if (other._type == storage_type::vector) {
|
||||
reserve(other._storage.vector.size() - 1);
|
||||
} else {
|
||||
reserve(other._storage.set.rbegin()->id());
|
||||
}
|
||||
// FIXME: Optimize when 'other' is a set. We could move whole entries, not only cells.
|
||||
other.for_each_cell_until([&] (column_id id, atomic_cell_or_collection& cell) {
|
||||
apply(s.column_at(kind, id), std::move(cell));
|
||||
return stop_iteration::no;
|
||||
void row::revert(const schema& s, column_kind kind, row& other) noexcept {
|
||||
other.for_each_cell([&] (column_id id, atomic_cell_or_collection& cell) noexcept {
|
||||
revert(s.column_at(kind, id), cell);
|
||||
});
|
||||
}
|
||||
|
||||
@@ -1348,3 +1559,15 @@ mutation_partition::upgrade(const schema& old_schema, const schema& new_schema)
|
||||
accept(old_schema, v);
|
||||
*this = std::move(tmp);
|
||||
}
|
||||
|
||||
void row_marker::apply_reversibly(row_marker& rm) noexcept {
|
||||
if (compare_row_marker_for_merge(*this, rm) < 0) {
|
||||
std::swap(*this, rm);
|
||||
} else {
|
||||
rm = *this;
|
||||
}
|
||||
}
|
||||
|
||||
void row_marker::revert(row_marker& rm) noexcept {
|
||||
std::swap(*this, rm);
|
||||
}
|
||||
|
||||
@@ -28,6 +28,8 @@
|
||||
#include <boost/range/adaptor/indexed.hpp>
|
||||
#include <boost/range/adaptor/filtered.hpp>
|
||||
|
||||
#include <seastar/core/bitset-iter.hh>
|
||||
|
||||
#include "schema.hh"
|
||||
#include "tombstone.hh"
|
||||
#include "keys.hh"
|
||||
@@ -58,8 +60,11 @@ class row {
|
||||
: _id(id)
|
||||
, _cell(std::move(cell))
|
||||
{ }
|
||||
cell_entry(column_id id)
|
||||
: _id(id)
|
||||
{ }
|
||||
cell_entry(cell_entry&&) noexcept;
|
||||
cell_entry(const cell_entry&) noexcept;
|
||||
cell_entry(const cell_entry&);
|
||||
|
||||
column_id id() const { return _id; }
|
||||
const atomic_cell_or_collection& cell() const { return _cell; }
|
||||
@@ -96,11 +101,16 @@ public:
|
||||
private:
|
||||
using vector_type = managed_vector<atomic_cell_or_collection, internal_count, size_type>;
|
||||
|
||||
struct vector_storage {
|
||||
std::bitset<max_vector_size> present;
|
||||
vector_type v;
|
||||
};
|
||||
|
||||
union storage {
|
||||
storage() { }
|
||||
~storage() { }
|
||||
map_type set;
|
||||
vector_type vector;
|
||||
vector_storage vector;
|
||||
} _storage;
|
||||
public:
|
||||
row();
|
||||
@@ -109,6 +119,7 @@ public:
|
||||
row(row&& other);
|
||||
row& operator=(row&& other);
|
||||
size_t size() const { return _size; }
|
||||
bool empty() const { return _size == 0; }
|
||||
|
||||
void reserve(column_id);
|
||||
|
||||
@@ -120,13 +131,14 @@ private:
|
||||
template<typename Func>
|
||||
void remove_if(Func&& func) {
|
||||
if (_type == storage_type::vector) {
|
||||
for (unsigned i = 0; i < _storage.vector.size(); i++) {
|
||||
auto& c = _storage.vector[i];
|
||||
if (!bool(c)) {
|
||||
for (unsigned i = 0; i < _storage.vector.v.size(); i++) {
|
||||
if (!_storage.vector.present.test(i)) {
|
||||
continue;
|
||||
}
|
||||
auto& c = _storage.vector.v[i];
|
||||
if (func(i, c)) {
|
||||
c = atomic_cell_or_collection();
|
||||
_storage.vector.present.reset(i);
|
||||
_size--;
|
||||
}
|
||||
}
|
||||
@@ -146,11 +158,12 @@ private:
|
||||
|
||||
private:
|
||||
auto get_range_vector() const {
|
||||
auto range = boost::make_iterator_range(_storage.vector.begin(), _storage.vector.end());
|
||||
return range | boost::adaptors::filtered([] (const atomic_cell_or_collection& c) { return bool(c); })
|
||||
| boost::adaptors::transformed([this] (const atomic_cell_or_collection& c) {
|
||||
auto id = &c - _storage.vector.data();
|
||||
return std::pair<column_id, const atomic_cell_or_collection&>(id, std::cref(c));
|
||||
auto id_range = boost::irange<column_id>(0, _storage.vector.v.size());
|
||||
return boost::combine(id_range, _storage.vector.v)
|
||||
| boost::adaptors::filtered([this] (const boost::tuple<const column_id&, const atomic_cell_or_collection&>& t) {
|
||||
return _storage.vector.present.test(t.get<0>());
|
||||
}) | boost::adaptors::transformed([] (const boost::tuple<const column_id&, const atomic_cell_or_collection&>& t) {
|
||||
return std::pair<column_id, const atomic_cell_or_collection&>(t.get<0>(), t.get<1>());
|
||||
});
|
||||
}
|
||||
auto get_range_set() const {
|
||||
@@ -163,7 +176,23 @@ private:
|
||||
auto with_both_ranges(const row& other, Func&& func) const;
|
||||
|
||||
void vector_to_set();
|
||||
|
||||
// Calls Func(column_id, atomic_cell_or_collection&) for each cell in this row.
|
||||
//
|
||||
// Func() is allowed to modify the cell. Emptying a cell makes it still
|
||||
// visible to for_each().
|
||||
//
|
||||
// In case of exception, calls Rollback(column_id, atomic_cell_or_collection&) on
|
||||
// all cells on which Func() was successfully invoked in reverse order.
|
||||
//
|
||||
template<typename Func, typename Rollback>
|
||||
void for_each_cell(Func&&, Rollback&&);
|
||||
public:
|
||||
// Calls Func(column_id, atomic_cell_or_collection&) for each cell in this row.
|
||||
// noexcept if Func doesn't throw.
|
||||
template<typename Func>
|
||||
void for_each_cell(Func&&);
|
||||
|
||||
template<typename Func>
|
||||
void for_each_cell(Func&& func) const {
|
||||
for_each_cell_until([func = std::forward<Func>(func)] (column_id id, const atomic_cell_or_collection& c) {
|
||||
@@ -175,11 +204,8 @@ public:
|
||||
template<typename Func>
|
||||
void for_each_cell_until(Func&& func) const {
|
||||
if (_type == storage_type::vector) {
|
||||
for (unsigned i = 0; i < _storage.vector.size(); i++) {
|
||||
auto& cell = _storage.vector[i];
|
||||
if (!bool(cell)) {
|
||||
continue;
|
||||
}
|
||||
for (auto i : bitsets::for_each_set(_storage.vector.present)) {
|
||||
auto& cell = _storage.vector.v[i];
|
||||
if (func(i, cell) == stop_iteration::yes) {
|
||||
break;
|
||||
}
|
||||
@@ -187,29 +213,7 @@ public:
|
||||
} else {
|
||||
for (auto& cell : _storage.set) {
|
||||
const auto& c = cell.cell();
|
||||
if (c && func(cell.id(), c) == stop_iteration::yes) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<typename Func>
|
||||
void for_each_cell_until(Func&& func) {
|
||||
if (_type == storage_type::vector) {
|
||||
for (unsigned i = 0; i < _storage.vector.size(); i++) {
|
||||
auto& cell = _storage.vector[i];
|
||||
if (!bool(cell)) {
|
||||
continue;
|
||||
}
|
||||
if (func(i, cell) == stop_iteration::yes) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (auto& cell : _storage.set) {
|
||||
auto& c = cell.cell();
|
||||
if (c && func(cell.id(), c) == stop_iteration::yes) {
|
||||
if (func(cell.id(), c) == stop_iteration::yes) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
@@ -222,21 +226,26 @@ public:
|
||||
//
|
||||
// Merges cell's value into the row.
|
||||
//
|
||||
// In case of exception the current object and external object (moved-from)
|
||||
// are both left in some valid states, such that they still will commute to
|
||||
// a state the current object would have should the exception had not occurred.
|
||||
// In case of exception the current object is left with a value equivalent to the original state.
|
||||
//
|
||||
// The external cell is left in a valid state, such that it will commute with
|
||||
// current object to the same value should the exception had not occurred.
|
||||
//
|
||||
void apply(const column_definition& column, atomic_cell_or_collection&& cell);
|
||||
|
||||
// Equivalent to calling apply_reversibly() with a row containing only given cell.
|
||||
// See reversibly_mergeable.hh
|
||||
void apply_reversibly(const column_definition& column, atomic_cell_or_collection& cell);
|
||||
// See reversibly_mergeable.hh
|
||||
void revert(const column_definition& column, atomic_cell_or_collection& cell) noexcept;
|
||||
|
||||
// Adds cell to the row. The column must not be already set.
|
||||
void append_cell(column_id id, atomic_cell_or_collection cell);
|
||||
|
||||
void merge(const schema& s, column_kind kind, const row& other);
|
||||
|
||||
// In case of exception the current object and external object (moved-from)
|
||||
// are both left in some valid states, such that they still will commute to
|
||||
// a state the current object would have should the exception had not occurred.
|
||||
void merge(const schema& s, column_kind kind, row&& other);
|
||||
// See reversibly_mergeable.hh
|
||||
void apply_reversibly(const schema&, column_kind, row& src);
|
||||
// See reversibly_mergeable.hh
|
||||
void revert(const schema&, column_kind, row& src) noexcept;
|
||||
|
||||
// Expires cells based on query_time. Expires tombstones based on gc_before
|
||||
// and max_purgeable. Removes cells covered by tomb.
|
||||
@@ -258,7 +267,7 @@ public:
|
||||
std::ostream& operator<<(std::ostream& os, const std::pair<column_id, const atomic_cell_or_collection&>& c);
|
||||
|
||||
class row_marker;
|
||||
int compare_row_marker_for_merge(const row_marker& left, const row_marker& right);
|
||||
int compare_row_marker_for_merge(const row_marker& left, const row_marker& right) noexcept;
|
||||
|
||||
class row_marker {
|
||||
static constexpr gc_clock::duration no_ttl { 0 };
|
||||
@@ -321,6 +330,10 @@ public:
|
||||
*this = rm;
|
||||
}
|
||||
}
|
||||
// See reversibly_mergeable.hh
|
||||
void apply_reversibly(row_marker& rm) noexcept;
|
||||
// See reversibly_mergeable.hh
|
||||
void revert(row_marker& rm) noexcept;
|
||||
// Expires cells and tombstones. Removes items covered by higher level
|
||||
// tombstones.
|
||||
// Returns true if row marker is live.
|
||||
@@ -398,6 +411,11 @@ public:
|
||||
void remove_tombstone() {
|
||||
_deleted_at = tombstone();
|
||||
}
|
||||
|
||||
// See reversibly_mergeable.hh
|
||||
void apply_reversibly(const schema& s, deletable_row& src);
|
||||
// See reversibly_mergeable.hh
|
||||
void revert(const schema& s, deletable_row& src);
|
||||
public:
|
||||
tombstone deleted_at() const { return _deleted_at; }
|
||||
api::timestamp_type created_at() const { return _marker.timestamp(); }
|
||||
@@ -422,6 +440,9 @@ public:
|
||||
: _prefix(std::move(prefix))
|
||||
, _t(std::move(t))
|
||||
{ }
|
||||
row_tombstones_entry(const clustering_key_prefix& prefix)
|
||||
: _prefix(prefix)
|
||||
{ }
|
||||
row_tombstones_entry(row_tombstones_entry&& o) noexcept;
|
||||
row_tombstones_entry(const row_tombstones_entry&) = default;
|
||||
clustering_key_prefix& prefix() {
|
||||
@@ -430,6 +451,9 @@ public:
|
||||
const clustering_key_prefix& prefix() const {
|
||||
return _prefix;
|
||||
}
|
||||
const clustering_key_prefix& key() const {
|
||||
return _prefix;
|
||||
}
|
||||
tombstone& t() {
|
||||
return _t;
|
||||
}
|
||||
@@ -439,6 +463,14 @@ public:
|
||||
void apply(tombstone t) {
|
||||
_t.apply(t);
|
||||
}
|
||||
// See reversibly_mergeable.hh
|
||||
void apply_reversibly(row_tombstones_entry& e) {
|
||||
_t.apply_reversibly(e._t);
|
||||
}
|
||||
// See reversibly_mergeable.hh
|
||||
void revert(row_tombstones_entry& e) noexcept {
|
||||
_t.revert(e._t);
|
||||
}
|
||||
struct compare {
|
||||
clustering_key_prefix::less_compare _c;
|
||||
compare(const schema& s) : _c(s) {}
|
||||
@@ -472,6 +504,9 @@ public:
|
||||
|
||||
friend std::ostream& operator<<(std::ostream& os, const row_tombstones_entry& rte);
|
||||
bool equal(const schema& s, const row_tombstones_entry& other) const;
|
||||
bool empty() const {
|
||||
return !_t;
|
||||
}
|
||||
};
|
||||
|
||||
class rows_entry {
|
||||
@@ -512,6 +547,14 @@ public:
|
||||
void apply(tombstone t) {
|
||||
_row.apply(t);
|
||||
}
|
||||
// See reversibly_mergeable.hh
|
||||
void apply_reversibly(const schema& s, rows_entry& e) {
|
||||
_row.apply_reversibly(s, e._row);
|
||||
}
|
||||
// See reversibly_mergeable.hh
|
||||
void revert(const schema& s, rows_entry& e) noexcept {
|
||||
_row.revert(s, e._row);
|
||||
}
|
||||
bool empty() const {
|
||||
return _row.empty();
|
||||
}
|
||||
@@ -570,8 +613,8 @@ class mutation_partition final {
|
||||
using row_tombstones_type = boost::intrusive::set<row_tombstones_entry,
|
||||
boost::intrusive::member_hook<row_tombstones_entry, boost::intrusive::set_member_hook<>, &row_tombstones_entry::_link>,
|
||||
boost::intrusive::compare<row_tombstones_entry::compare>>;
|
||||
friend rows_entry;
|
||||
friend row_tombstones_entry;
|
||||
friend class rows_entry;
|
||||
friend class row_tombstones_entry;
|
||||
friend class size_calculator;
|
||||
private:
|
||||
tombstone _tombstone;
|
||||
@@ -626,19 +669,21 @@ public:
|
||||
// Commutative when this_schema == p_schema. If schemas differ, data in p which
|
||||
// is not representable in this_schema is dropped, thus apply() loses commutativity.
|
||||
//
|
||||
// Basic exception guarantees. If apply() throws after being called in
|
||||
// some entry state p0, the object is left in some consistent state p1 and
|
||||
// it's possible that p1 != p0 + p. It holds though that p1 + p = p0 + p.
|
||||
//
|
||||
// FIXME: make stronger exception guarantees (p1 = p0).
|
||||
// Strong exception guarantees.
|
||||
void apply(const schema& this_schema, const mutation_partition& p, const schema& p_schema);
|
||||
//
|
||||
// Same guarantees as for apply(const schema&, const mutation_partition&).
|
||||
// Applies p to current object.
|
||||
//
|
||||
// In case of exception the current object and external object (moved-from)
|
||||
// are both left in some valid states, such that they still will commute to
|
||||
// a state the current object would have should the exception had not occurred.
|
||||
// Commutative when this_schema == p_schema. If schemas differ, data in p which
|
||||
// is not representable in this_schema is dropped, thus apply() loses commutativity.
|
||||
//
|
||||
// If exception is thrown, this object will be left in a state equivalent to the entry state
|
||||
// and p will be left in a state which will commute with current object to the same value
|
||||
// should the exception had not occurred.
|
||||
void apply(const schema& this_schema, mutation_partition&& p, const schema& p_schema);
|
||||
// Use in case this instance and p share the same schema.
|
||||
// Same guarantees as apply(const schema&, mutation_partition&&, const schema&);
|
||||
void apply(const schema& s, mutation_partition&& p);
|
||||
// Same guarantees and constraints as for apply(const schema&, const mutation_partition&, const schema&).
|
||||
void apply(const schema& this_schema, mutation_partition_view p, const schema& p_schema);
|
||||
|
||||
|
||||
69
reversibly_mergeable.hh
Normal file
69
reversibly_mergeable.hh
Normal file
@@ -0,0 +1,69 @@
|
||||
/*
|
||||
* Copyright (C) 2016 Cloudius Systems, Ltd.
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file is part of Scylla.
|
||||
*
|
||||
* Scylla is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Scylla is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "utils/allocation_strategy.hh"
|
||||
#include <seastar/util/defer.hh>
|
||||
|
||||
//
|
||||
// ~~ Definitions ~~
|
||||
//
|
||||
// Mergeable type is a type which has an associated "apply" binary operation (T x T -> T)
|
||||
// which forms a commutative semigroup with instances of that type.
|
||||
//
|
||||
// ReversiblyMergeable type is a Mergeable type which has two binary operations associated,
|
||||
// "apply_reversibly" and "revert", both working on objects of that type (T x T -> T x T)
|
||||
// with the following properties:
|
||||
//
|
||||
// apply_reversibly(x, y) = (x', y')
|
||||
// revert(x', y') = (x'', y'')
|
||||
//
|
||||
// x' = apply(x, y)
|
||||
// x'' = x
|
||||
// apply(x'', y'') = apply(x, y)
|
||||
//
|
||||
// Note that it is not guaranteed that y'' = y and the state of y' is unspecified.
|
||||
//
|
||||
// ~~ API ~~
|
||||
//
|
||||
// "apply_reversibly" and "revert" are usually implemented as instance methods or functions
|
||||
// mutating both arguments to store the result of the operation in them.
|
||||
//
|
||||
// "revert" is not allowed to throw. If "apply_reversibly" throws the objects on which it operates
|
||||
// are left in valid states, with guarantees the same as if a successful apply_reversibly() was
|
||||
// followed by revert().
|
||||
//
|
||||
|
||||
|
||||
template<typename T>
|
||||
struct default_reversible_applier {
|
||||
void operator()(T& dst, T& src) const {
|
||||
dst.apply_reversibly(src);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
struct default_reverter {
|
||||
void operator()(T& dst, T& src) const noexcept {
|
||||
dst.revert(src);
|
||||
}
|
||||
};
|
||||
@@ -36,14 +36,29 @@ done
|
||||
. /etc/os-release
|
||||
|
||||
if [ "$ID" = "ubuntu" ]; then
|
||||
if [ "$LOCAL_PKG" = "" ]; then
|
||||
echo "#!/bin/sh" >> /usr/sbin/policy-rc.d
|
||||
echo "exit 101" >> /usr/sbin/policy-rc.d
|
||||
chmod +x /usr/sbin/policy-rc.d
|
||||
cp /etc/hosts /etc/hosts.orig
|
||||
echo 127.0.0.1 `hostname` >> /etc/hosts
|
||||
if [ $UNSTABLE -eq 0 ]; then
|
||||
echo "deb http://s3.amazonaws.com/downloads.scylladb.com/deb/ubuntu trusty/scylladb multiverse" > /etc/apt/sources.list.d/scylla.list
|
||||
apt-get update
|
||||
else
|
||||
echo "deb https://s3.amazonaws.com/downloads.scylladb.com/deb/unstable/ubuntu/master/latest trusty/scylladb multiverse" > /etc/apt/sources.list.d/scylla.list
|
||||
fi
|
||||
apt-get update
|
||||
if [ "$LOCAL_PKG" = "" ]; then
|
||||
apt-get install -y --force-yes scylla-server scylla-jmx scylla-tools
|
||||
else
|
||||
apt-get install -y --force-yes gdebi-core
|
||||
gdebi $LOCAL_PKG/scylla-server*.deb $LOCAL_PKG/scylla-jmx*.deb $LOCAL_PKG/scylla-tools*.deb
|
||||
if [ ! -f /usr/bin/gdebi ]; then
|
||||
apt-get install -y --force-yes gdebi-core
|
||||
fi
|
||||
echo Y | gdebi $LOCAL_PKG/scylla-server*.deb
|
||||
echo Y | gdebi $LOCAL_PKG/scylla-jmx*.deb
|
||||
echo Y | gdebi $LOCAL_PKG/scylla-tools*.deb
|
||||
fi
|
||||
mv /etc/hosts.orig /etc/hosts
|
||||
rm /usr/sbin/policy-rc.d
|
||||
else
|
||||
if [ "$ID" = "fedora" ]; then
|
||||
if [ $UNSTABLE -eq 0 ]; then
|
||||
|
||||
2
seastar
2
seastar
Submodule seastar updated: 6a207e11b1...9f2b8686a0
@@ -2514,18 +2514,37 @@ future<> storage_service::load_new_sstables(sstring ks_name, sstring cf_name) {
|
||||
auto& cf = db.find_column_family(ks_name, cf_name);
|
||||
return cf.disable_sstable_write();
|
||||
}).then([this, cf_name, ks_name] (int64_t max_seen_sstable) {
|
||||
logger.debug("Loading new sstables with generation numbers larger or equal than {}", max_seen_sstable);
|
||||
// Then, we will reshuffle the tables to make sure that the generation numbers don't go too high.
|
||||
// We will do all of it the same CPU, to make sure that we won't have two parallel shufflers stepping
|
||||
// onto each other.
|
||||
//
|
||||
// Note that this will reshuffle all tables, including existing ones. Figuring out which of the tables
|
||||
// are new would require coordination between all shards, so it is simpler this way. Renaming an existing
|
||||
// SSTable shouldn't be that bad, and we are assuming empty directory for normal operation anyway.
|
||||
auto shard = std::hash<sstring>()(cf_name) % smp::count;
|
||||
return _db.invoke_on(shard, [ks_name, cf_name, max_seen_sstable] (database& db) {
|
||||
|
||||
class all_generations {
|
||||
std::set<int64_t> _result;
|
||||
public:
|
||||
future<> operator()(std::set<int64_t> value) {
|
||||
_result.insert(value.begin(), value.end());
|
||||
return make_ready_future<>();
|
||||
}
|
||||
std::set<int64_t> get() && {
|
||||
return _result;
|
||||
}
|
||||
};
|
||||
|
||||
// We provide to reshuffle_sstables() the generation of all existing sstables, such that it will
|
||||
// easily know which sstables are new.
|
||||
return _db.map_reduce(all_generations(), [ks_name, cf_name] (database& db) {
|
||||
auto& cf = db.find_column_family(ks_name, cf_name);
|
||||
return cf.reshuffle_sstables(max_seen_sstable);
|
||||
std::set<int64_t> generations;
|
||||
for (auto& p : *(cf.get_sstables())) {
|
||||
generations.insert(p.second->generation());
|
||||
}
|
||||
return make_ready_future<std::set<int64_t>>(std::move(generations));
|
||||
}).then([this, max_seen_sstable, ks_name, cf_name] (std::set<int64_t> all_generations) {
|
||||
auto shard = std::hash<sstring>()(cf_name) % smp::count;
|
||||
return _db.invoke_on(shard, [ks_name, cf_name, max_seen_sstable, all_generations = std::move(all_generations)] (database& db) {
|
||||
auto& cf = db.find_column_family(ks_name, cf_name);
|
||||
return cf.reshuffle_sstables(std::move(all_generations), max_seen_sstable + 1);
|
||||
});
|
||||
});
|
||||
}).then_wrapped([this, ks_name, cf_name] (future<std::vector<sstables::entry_descriptor>> f) {
|
||||
std::vector<sstables::entry_descriptor> new_tables;
|
||||
|
||||
@@ -229,9 +229,14 @@ public:
|
||||
: _compression_metadata(cm)
|
||||
{
|
||||
_beg_pos = pos;
|
||||
if (pos >= _compression_metadata->data_len) {
|
||||
if (pos > _compression_metadata->data_len) {
|
||||
throw std::runtime_error("attempt to uncompress beyond end");
|
||||
}
|
||||
if (len == 0 || pos == _compression_metadata->data_len) {
|
||||
// Nothing to read
|
||||
_end_pos = _pos = _beg_pos;
|
||||
return;
|
||||
}
|
||||
if (len <= _compression_metadata->data_len - pos) {
|
||||
_end_pos = pos + len;
|
||||
} else {
|
||||
|
||||
@@ -495,52 +495,59 @@ class mutation_reader::impl {
|
||||
private:
|
||||
mp_row_consumer _consumer;
|
||||
std::experimental::optional<data_consume_context> _context;
|
||||
std::experimental::optional<future<data_consume_context>> _context_future;
|
||||
std::function<future<data_consume_context> ()> _get_context;
|
||||
public:
|
||||
impl(sstable& sst, schema_ptr schema, uint64_t start, uint64_t end,
|
||||
const io_priority_class &pc)
|
||||
: _consumer(schema, pc)
|
||||
, _context(sst.data_consume_rows(_consumer, start, end)) { }
|
||||
, _get_context([&sst, this, start, end] {
|
||||
return make_ready_future<data_consume_context>(sst.data_consume_rows(_consumer, start, end));
|
||||
}) { }
|
||||
impl(sstable& sst, schema_ptr schema,
|
||||
const io_priority_class &pc)
|
||||
: _consumer(schema, pc)
|
||||
, _context(sst.data_consume_rows(_consumer)) { }
|
||||
impl(sstable& sst, schema_ptr schema, future<uint64_t> start, future<uint64_t> end, const io_priority_class& pc)
|
||||
, _get_context([this, &sst] {
|
||||
return make_ready_future<data_consume_context>(sst.data_consume_rows(_consumer));
|
||||
}) { }
|
||||
impl(sstable& sst, schema_ptr schema, std::function<future<uint64_t>()> start, std::function<future<uint64_t>()> end, const io_priority_class& pc)
|
||||
: _consumer(schema, pc)
|
||||
, _context_future(start.then([this, &sst, end = std::move(end)] (uint64_t start) mutable {
|
||||
return end.then([this, &sst, start] (uint64_t end) mutable {
|
||||
return sst.data_consume_rows(_consumer, start, end);
|
||||
});
|
||||
})) { }
|
||||
impl() : _consumer() { }
|
||||
, _get_context([this, &sst, start = std::move(start), end = std::move(end)] () {
|
||||
return start().then([this, &sst, end = std::move(end)] (uint64_t start) {
|
||||
return end().then([this, &sst, start] (uint64_t end) {
|
||||
return make_ready_future<data_consume_context>(sst.data_consume_rows(_consumer, start, end));
|
||||
});
|
||||
});
|
||||
}) { }
|
||||
impl() : _consumer(), _get_context() { }
|
||||
|
||||
// Reference to _consumer is passed to data_consume_rows() in the constructor so we must not allow move/copy
|
||||
impl(impl&&) = delete;
|
||||
impl(const impl&) = delete;
|
||||
|
||||
future<mutation_opt> read() {
|
||||
if (_context) {
|
||||
return _context->read().then([this] {
|
||||
// We want after returning a mutation that _consumer.mut()
|
||||
// will be left in unengaged state (so on EOF we return an
|
||||
// unengaged optional). Moving _consumer.mut is *not* enough.
|
||||
auto ret = std::move(_consumer.mut);
|
||||
_consumer.mut = {};
|
||||
return std::move(ret);
|
||||
});
|
||||
} else if (_context_future) {
|
||||
return _context_future->then([this] (auto context) {
|
||||
_context = std::move(context);
|
||||
return _context->read().then([this] {
|
||||
auto ret = std::move(_consumer.mut);
|
||||
_consumer.mut = {};
|
||||
return std::move(ret);
|
||||
});
|
||||
});
|
||||
} else {
|
||||
if (!_get_context) {
|
||||
// empty mutation reader returns EOF immediately
|
||||
return make_ready_future<mutation_opt>();
|
||||
}
|
||||
|
||||
if (_context) {
|
||||
return do_read();
|
||||
}
|
||||
return (_get_context)().then([this] (data_consume_context context) {
|
||||
_context = std::move(context);
|
||||
return do_read();
|
||||
});
|
||||
}
|
||||
private:
|
||||
future<mutation_opt> do_read() {
|
||||
return _context->read().then([this] {
|
||||
// We want after returning a mutation that _consumer.mut()
|
||||
// will be left in unengaged state (so on EOF we return an
|
||||
// unengaged optional). Moving _consumer.mut is *not* enough.
|
||||
auto ret = std::move(_consumer.mut);
|
||||
_consumer.mut = {};
|
||||
return std::move(ret);
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
@@ -649,17 +656,19 @@ sstable::read_range_rows(schema_ptr schema, const query::partition_range& range,
|
||||
fail(unimplemented::cause::WRAP_AROUND);
|
||||
}
|
||||
|
||||
future<uint64_t> start = range.start()
|
||||
? (range.start()->is_inclusive()
|
||||
auto start = [this, range, schema, &pc] {
|
||||
return range.start() ? (range.start()->is_inclusive()
|
||||
? lower_bound(schema, range.start()->value(), pc)
|
||||
: upper_bound(schema, range.start()->value(), pc))
|
||||
: make_ready_future<uint64_t>(0);
|
||||
};
|
||||
|
||||
future<uint64_t> end = range.end()
|
||||
? (range.end()->is_inclusive()
|
||||
auto end = [this, range, schema, &pc] {
|
||||
return range.end() ? (range.end()->is_inclusive()
|
||||
? upper_bound(schema, range.end()->value(), pc)
|
||||
: lower_bound(schema, range.end()->value(), pc))
|
||||
: make_ready_future<uint64_t>(data_size());
|
||||
};
|
||||
|
||||
return std::make_unique<mutation_reader::impl>(
|
||||
*this, std::move(schema), std::move(start), std::move(end), pc);
|
||||
|
||||
@@ -103,8 +103,6 @@ void stream_session::init_messaging_service_handler() {
|
||||
auto session = get_session(plan_id, from, "PREPARE_MESSAGE");
|
||||
session->init(sr);
|
||||
session->dst_cpu_id = src_cpu_id;
|
||||
sslog.debug("[Stream #{}] GOT PREPARE_MESSAGE from {}: get session peer={}, dst_cpu_id={}",
|
||||
session->plan_id(), from, session->peer, session->dst_cpu_id);
|
||||
return session->prepare(std::move(msg.requests), std::move(msg.summaries));
|
||||
});
|
||||
});
|
||||
@@ -123,13 +121,27 @@ void stream_session::init_messaging_service_handler() {
|
||||
get_local_stream_manager().update_progress(plan_id, from.addr, progress_info::direction::IN, fm_size);
|
||||
return service::get_schema_for_write(fm.schema_version(), from).then([plan_id, from, &fm] (schema_ptr s) {
|
||||
auto cf_id = fm.column_family_id();
|
||||
sslog.debug("[Stream #{}] GOT STREAM_MUTATION from {}: cf_id={}", plan_id, from.addr, cf_id);
|
||||
|
||||
auto& db = service::get_local_storage_proxy().get_db().local();
|
||||
if (!db.column_family_exists(cf_id)) {
|
||||
sslog.debug("[Stream #{}] STREAM_MUTATION from {}: cf_id={} is missing, assume the table is dropped",
|
||||
sslog.warn("[Stream #{}] STREAM_MUTATION from {}: cf_id={} is missing, assume the table is dropped",
|
||||
plan_id, from.addr, cf_id);
|
||||
return make_ready_future<>();
|
||||
}
|
||||
return service::get_storage_proxy().local().mutate_locally(std::move(s), fm);
|
||||
return service::get_storage_proxy().local().mutate_locally(std::move(s), fm).then_wrapped([plan_id, cf_id, from] (auto&& f) {
|
||||
try {
|
||||
f.get();
|
||||
return make_ready_future<>();
|
||||
} catch (no_such_column_family) {
|
||||
sslog.warn("[Stream #{}] STREAM_MUTATION from {}: cf_id={} is missing, assume the table is dropped",
|
||||
plan_id, from.addr, cf_id);
|
||||
return make_ready_future<>();
|
||||
} catch (...) {
|
||||
throw;
|
||||
}
|
||||
return make_ready_future<>();
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -140,13 +152,21 @@ void stream_session::init_messaging_service_handler() {
|
||||
session->receive_task_completed(cf_id);
|
||||
return session->get_db().invoke_on_all([ranges = std::move(ranges), plan_id, from, cf_id] (database& db) {
|
||||
if (!db.column_family_exists(cf_id)) {
|
||||
sslog.debug("[Stream #{}] STREAM_MUTATION_DONE from {}: cf_id={} is missing, assume the table is dropped",
|
||||
sslog.warn("[Stream #{}] STREAM_MUTATION_DONE from {}: cf_id={} is missing, assume the table is dropped",
|
||||
plan_id, from, cf_id);
|
||||
return make_ready_future<>();
|
||||
}
|
||||
auto& cf = db.find_column_family(cf_id);
|
||||
for (auto& range : ranges) {
|
||||
cf.get_row_cache().invalidate(query::to_partition_range(range));
|
||||
try {
|
||||
auto& cf = db.find_column_family(cf_id);
|
||||
for (auto& range : ranges) {
|
||||
cf.get_row_cache().invalidate(query::to_partition_range(range));
|
||||
}
|
||||
} catch (no_such_column_family) {
|
||||
sslog.warn("[Stream #{}] STREAM_MUTATION_DONE from {}: cf_id={} is missing, assume the table is dropped",
|
||||
plan_id, from, cf_id);
|
||||
return make_ready_future<>();
|
||||
} catch (...) {
|
||||
throw;
|
||||
}
|
||||
return make_ready_future<>();
|
||||
});
|
||||
|
||||
@@ -333,7 +333,6 @@ public:
|
||||
|
||||
gms::get_gossiper().stop().get();
|
||||
gms::get_failure_detector().stop().get();
|
||||
net::get_messaging_service().stop().get();
|
||||
|
||||
_db->stop().get();
|
||||
|
||||
@@ -343,6 +342,8 @@ public:
|
||||
|
||||
sstables::await_background_jobs_on_all_shards().get();
|
||||
|
||||
net::get_messaging_service().stop().get();
|
||||
|
||||
bool old_active = true;
|
||||
assert(active.compare_exchange_strong(old_active, false));
|
||||
});
|
||||
|
||||
@@ -46,9 +46,9 @@ SEASTAR_TEST_CASE(test_boot_shutdown){
|
||||
gms::get_gossiper().start().get();
|
||||
gms::get_gossiper().stop().get();
|
||||
gms::get_failure_detector().stop().get();
|
||||
net::get_messaging_service().stop().get();
|
||||
db.stop().get();
|
||||
service::get_storage_service().stop().get();
|
||||
net::get_messaging_service().stop().get();
|
||||
locator::i_endpoint_snitch::stop_snitch().get();
|
||||
});
|
||||
}
|
||||
|
||||
@@ -108,7 +108,7 @@ SEASTAR_TEST_CASE(test_compaction_with_multiple_regions) {
|
||||
}
|
||||
});
|
||||
|
||||
size_t quarter = shard_tracker().occupancy().total_space() / 4;
|
||||
size_t quarter = shard_tracker().region_occupancy().total_space() / 4;
|
||||
|
||||
shard_tracker().reclaim_all_free_segments();
|
||||
|
||||
|
||||
@@ -63,6 +63,15 @@ static mutation_partition get_partition(memtable& mt, const partition_key& key)
|
||||
return std::move(mo->partition());
|
||||
}
|
||||
|
||||
bytes make_blob(size_t blob_size) {
|
||||
static thread_local std::independent_bits_engine<std::default_random_engine, 8, uint8_t> random_bytes;
|
||||
bytes big_blob(bytes::initialized_later(), blob_size);
|
||||
for (auto&& b : big_blob) {
|
||||
b = random_bytes();
|
||||
}
|
||||
return big_blob;
|
||||
};
|
||||
|
||||
template <typename Func>
|
||||
future<>
|
||||
with_column_family(schema_ptr s, column_family::config cfg, Func func) {
|
||||
@@ -270,6 +279,7 @@ SEASTAR_TEST_CASE(test_list_mutations) {
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_multiple_memtables_one_partition) {
|
||||
return seastar::async([] {
|
||||
auto s = make_lw_shared(schema({}, some_keyspace, some_column_family,
|
||||
{{"p1", utf8_type}}, {{"c1", int32_type}}, {{"r1", int32_type}}, {}, utf8_type));
|
||||
|
||||
@@ -280,7 +290,7 @@ SEASTAR_TEST_CASE(test_multiple_memtables_one_partition) {
|
||||
cfg.enable_incremental_backups = false;
|
||||
cfg.cf_stats = &*cf_stats;
|
||||
|
||||
return with_column_family(s, cfg, [s] (column_family& cf) {
|
||||
with_column_family(s, cfg, [s] (column_family& cf) {
|
||||
const column_definition& r1_col = *s->get_column_definition("r1");
|
||||
auto key = partition_key::from_exploded(*s, {to_bytes("key1")});
|
||||
|
||||
@@ -291,26 +301,30 @@ SEASTAR_TEST_CASE(test_multiple_memtables_one_partition) {
|
||||
cf.apply(std::move(m));
|
||||
return cf.flush();
|
||||
};
|
||||
return when_all(
|
||||
insert_row(1001, 2001),
|
||||
insert_row(1002, 2002),
|
||||
insert_row(1003, 2003)).discard_result().then([s, &r1_col, &cf, key] {
|
||||
insert_row(1001, 2001).get();
|
||||
insert_row(1002, 2002).get();
|
||||
insert_row(1003, 2003).get();
|
||||
{
|
||||
auto verify_row = [&] (int32_t c1, int32_t r1) {
|
||||
auto c_key = clustering_key::from_exploded(*s, {int32_type->decompose(c1)});
|
||||
return cf.find_row(cf.schema(), dht::global_partitioner().decorate_key(*s, key), std::move(c_key)).then([r1, r1_col] (auto r) {
|
||||
auto p_key = dht::global_partitioner().decorate_key(*s, key);
|
||||
auto r = cf.find_row(cf.schema(), p_key, c_key).get0();
|
||||
{
|
||||
BOOST_REQUIRE(r);
|
||||
auto i = r->find_cell(r1_col.id);
|
||||
BOOST_REQUIRE(i);
|
||||
auto cell = i->as_atomic_cell();
|
||||
BOOST_REQUIRE(cell.is_live());
|
||||
BOOST_REQUIRE(int32_type->equal(cell.value(), int32_type->decompose(r1)));
|
||||
});
|
||||
}
|
||||
};
|
||||
verify_row(1001, 2001);
|
||||
verify_row(1002, 2002);
|
||||
verify_row(1003, 2003);
|
||||
});
|
||||
}).then([cf_stats] {});
|
||||
}
|
||||
return make_ready_future<>();
|
||||
}).get();
|
||||
});
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_flush_in_the_middle_of_a_scan) {
|
||||
@@ -690,6 +704,275 @@ SEASTAR_TEST_CASE(test_row_counting) {
|
||||
});
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_tombstone_apply) {
|
||||
auto s = schema_builder("ks", "cf")
|
||||
.with_column("pk", bytes_type, column_kind::partition_key)
|
||||
.with_column("v", bytes_type, column_kind::regular_column)
|
||||
.build();
|
||||
|
||||
auto pkey = partition_key::from_single_value(*s, "key1");
|
||||
|
||||
mutation m1(pkey, s);
|
||||
|
||||
BOOST_REQUIRE_EQUAL(m1.partition().partition_tombstone(), tombstone());
|
||||
|
||||
mutation m2(pkey, s);
|
||||
auto tomb = tombstone(api::new_timestamp(), gc_clock::now());
|
||||
m2.partition().apply(tomb);
|
||||
BOOST_REQUIRE_EQUAL(m2.partition().partition_tombstone(), tomb);
|
||||
|
||||
m1.apply(m2);
|
||||
|
||||
BOOST_REQUIRE_EQUAL(m1.partition().partition_tombstone(), tomb);
|
||||
|
||||
return make_ready_future<>();
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_marker_apply) {
|
||||
auto s = schema_builder("ks", "cf")
|
||||
.with_column("pk", bytes_type, column_kind::partition_key)
|
||||
.with_column("ck", bytes_type, column_kind::clustering_key)
|
||||
.with_column("v", bytes_type, column_kind::regular_column)
|
||||
.build();
|
||||
|
||||
auto pkey = partition_key::from_single_value(*s, "pk1");
|
||||
auto ckey = clustering_key::from_single_value(*s, "ck1");
|
||||
|
||||
auto mutation_with_marker = [&] (row_marker rm) {
|
||||
mutation m(pkey, s);
|
||||
m.partition().clustered_row(ckey).marker() = rm;
|
||||
return m;
|
||||
};
|
||||
|
||||
{
|
||||
mutation m(pkey, s);
|
||||
auto marker = row_marker(api::new_timestamp());
|
||||
auto mm = mutation_with_marker(marker);
|
||||
m.apply(mm);
|
||||
BOOST_REQUIRE_EQUAL(m.partition().clustered_row(ckey).marker(), marker);
|
||||
}
|
||||
|
||||
{
|
||||
mutation m(pkey, s);
|
||||
auto marker = row_marker(api::new_timestamp(), std::chrono::seconds(1), gc_clock::now());
|
||||
m.apply(mutation_with_marker(marker));
|
||||
BOOST_REQUIRE_EQUAL(m.partition().clustered_row(ckey).marker(), marker);
|
||||
}
|
||||
|
||||
return make_ready_future<>();
|
||||
}
|
||||
|
||||
class failure_injecting_allocation_strategy : public allocation_strategy {
|
||||
allocation_strategy& _delegate;
|
||||
uint64_t _alloc_count;
|
||||
uint64_t _fail_at = std::numeric_limits<uint64_t>::max();
|
||||
public:
|
||||
failure_injecting_allocation_strategy(allocation_strategy& delegate) : _delegate(delegate) {}
|
||||
|
||||
virtual void* alloc(migrate_fn mf, size_t size, size_t alignment) override {
|
||||
if (_alloc_count >= _fail_at) {
|
||||
stop_failing();
|
||||
throw std::bad_alloc();
|
||||
}
|
||||
++_alloc_count;
|
||||
return _delegate.alloc(mf, size, alignment);
|
||||
}
|
||||
|
||||
virtual void free(void* ptr) override {
|
||||
_delegate.free(ptr);
|
||||
}
|
||||
|
||||
// Counts allocation attempts which are not failed due to fail_at().
|
||||
uint64_t alloc_count() const {
|
||||
return _alloc_count;
|
||||
}
|
||||
|
||||
void fail_after(uint64_t count) {
|
||||
_fail_at = _alloc_count + count;
|
||||
}
|
||||
|
||||
void stop_failing() {
|
||||
_fail_at = std::numeric_limits<uint64_t>::max();
|
||||
}
|
||||
};
|
||||
|
||||
SEASTAR_TEST_CASE(test_apply_is_atomic_in_case_of_allocation_failures) {
|
||||
auto builder = schema_builder("ks", "cf")
|
||||
.with_column("pk", bytes_type, column_kind::partition_key)
|
||||
.with_column("ck1", bytes_type, column_kind::clustering_key)
|
||||
.with_column("ck2", bytes_type, column_kind::clustering_key);
|
||||
|
||||
// Create enough columns so that row can overflow its vector storage
|
||||
std::vector<sstring> regular_column_names;
|
||||
std::vector<sstring> static_column_names;
|
||||
column_id column_count = row::max_vector_size * 2;
|
||||
for (column_id i = 0; i < column_count; ++i) {
|
||||
{
|
||||
auto column_name = sprint("v%d", i);
|
||||
regular_column_names.push_back(column_name);
|
||||
builder.with_column(to_bytes(column_name), bytes_type, column_kind::regular_column);
|
||||
}
|
||||
{
|
||||
auto column_name = sprint("s%d", i);
|
||||
static_column_names.push_back(column_name);
|
||||
builder.with_column(to_bytes(column_name), bytes_type, column_kind::static_column);
|
||||
}
|
||||
}
|
||||
|
||||
auto s = builder.build();
|
||||
|
||||
// Should be enough to force use of external bytes storage
|
||||
constexpr size_t external_blob_size = 128;
|
||||
|
||||
std::vector<bytes> blobs;
|
||||
for (int i = 0; i < 1024; ++i) {
|
||||
blobs.emplace_back(make_blob(external_blob_size));
|
||||
}
|
||||
|
||||
std::random_device rd;
|
||||
// In case of errors, replace the seed with a fixed value to get a deterministic run.
|
||||
auto seed = rd();
|
||||
std::mt19937 gen(seed);
|
||||
BOOST_TEST_MESSAGE(sprint("Random seed: %s", seed));
|
||||
|
||||
auto expiry_dist = [] (auto& gen) {
|
||||
static thread_local std::uniform_int_distribution<int> dist(0, 2);
|
||||
return gc_clock::time_point() + std::chrono::seconds(dist(gen));
|
||||
};
|
||||
|
||||
auto make_random_mutation = [&] {
|
||||
std::uniform_int_distribution<column_id> column_count_dist(1, column_count);
|
||||
std::uniform_int_distribution<column_id> column_id_dist(0, column_count - 1);
|
||||
std::uniform_int_distribution<size_t> value_blob_index_dist(0, 2);
|
||||
std::normal_distribution<> ck_index_dist(blobs.size() / 2, 1.5);
|
||||
std::uniform_int_distribution<int> bool_dist(0, 1);
|
||||
|
||||
std::uniform_int_distribution<api::timestamp_type> timestamp_dist(api::min_timestamp, api::min_timestamp + 2); // 3 values
|
||||
|
||||
auto pkey = partition_key::from_single_value(*s, blobs[0]);
|
||||
mutation m(pkey, s);
|
||||
|
||||
auto set_random_cells = [&] (row& r, column_kind kind) {
|
||||
auto columns_to_set = column_count_dist(gen);
|
||||
for (column_id i = 0; i < columns_to_set; ++i) {
|
||||
// FIXME: generate expiring cells
|
||||
auto cell = bool_dist(gen)
|
||||
? atomic_cell::make_live(timestamp_dist(gen), blobs[value_blob_index_dist(gen)])
|
||||
: atomic_cell::make_dead(timestamp_dist(gen), expiry_dist(gen));
|
||||
r.apply(s->column_at(kind, column_id_dist(gen)), std::move(cell));
|
||||
}
|
||||
};
|
||||
|
||||
auto random_tombstone = [&] {
|
||||
return tombstone(timestamp_dist(gen), expiry_dist(gen));
|
||||
};
|
||||
|
||||
auto random_row_marker = [&] {
|
||||
static thread_local std::uniform_int_distribution<int> dist(0, 3);
|
||||
switch (dist(gen)) {
|
||||
case 0: return row_marker();
|
||||
case 1: return row_marker(random_tombstone());
|
||||
case 2: return row_marker(timestamp_dist(gen));
|
||||
case 3: return row_marker(timestamp_dist(gen), std::chrono::seconds(1), expiry_dist(gen));
|
||||
default: assert(0);
|
||||
}
|
||||
};
|
||||
|
||||
if (bool_dist(gen)) {
|
||||
m.partition().apply(random_tombstone());
|
||||
}
|
||||
|
||||
set_random_cells(m.partition().static_row(), column_kind::static_column);
|
||||
|
||||
auto random_blob = [&] {
|
||||
return blobs[std::min(blobs.size() - 1, static_cast<size_t>(std::max(0.0, ck_index_dist(gen))))];
|
||||
};
|
||||
|
||||
auto row_count_dist = [&] (auto& gen) {
|
||||
static thread_local std::normal_distribution<> dist(32, 1.5);
|
||||
return static_cast<size_t>(std::min(100.0, std::max(0.0, dist(gen))));
|
||||
};
|
||||
|
||||
size_t row_count = row_count_dist(gen);
|
||||
for (size_t i = 0; i < row_count; ++i) {
|
||||
auto ckey = clustering_key::from_exploded(*s, {random_blob(), random_blob()});
|
||||
deletable_row& row = m.partition().clustered_row(ckey);
|
||||
set_random_cells(row.cells(), column_kind::regular_column);
|
||||
row.marker() = random_row_marker();
|
||||
}
|
||||
|
||||
size_t range_tombstone_count = row_count_dist(gen);
|
||||
for (size_t i = 0; i < range_tombstone_count; ++i) {
|
||||
auto key = clustering_key::from_exploded(*s, {random_blob()});
|
||||
m.partition().apply_row_tombstone(*s, key, random_tombstone());
|
||||
}
|
||||
return m;
|
||||
};
|
||||
|
||||
failure_injecting_allocation_strategy alloc(standard_allocator());
|
||||
with_allocator(alloc, [&] {
|
||||
auto target = make_random_mutation();
|
||||
|
||||
BOOST_TEST_MESSAGE(sprint("Target: %s", target));
|
||||
|
||||
for (int i = 0; i < 10; ++i) {
|
||||
auto second = make_random_mutation();
|
||||
|
||||
BOOST_TEST_MESSAGE(sprint("Second: %s", second));
|
||||
|
||||
auto expected_apply_result = target;
|
||||
expected_apply_result.apply(second);
|
||||
|
||||
BOOST_TEST_MESSAGE(sprint("Expected: %s", expected_apply_result));
|
||||
|
||||
// Test the apply(const mutation&) variant
|
||||
{
|
||||
auto m = target;
|
||||
|
||||
// Try to fail at every possible allocation point during apply()
|
||||
size_t fail_offset = 0;
|
||||
while (true) {
|
||||
BOOST_TEST_MESSAGE(sprint("Failing allocation at %d", fail_offset));
|
||||
alloc.fail_after(fail_offset++);
|
||||
try {
|
||||
m.apply(second);
|
||||
alloc.stop_failing();
|
||||
BOOST_TEST_MESSAGE("Checking that apply has expected result");
|
||||
assert_that(m).is_equal_to(expected_apply_result);
|
||||
break; // we exhausted all allocation points
|
||||
} catch (const std::bad_alloc&) {
|
||||
BOOST_TEST_MESSAGE("Checking that apply was reverted");
|
||||
assert_that(m).is_equal_to(target);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Test the apply(mutation&&) variant
|
||||
{
|
||||
size_t fail_offset = 0;
|
||||
while (true) {
|
||||
auto copy_of_second = second;
|
||||
auto m = target;
|
||||
alloc.fail_after(fail_offset++);
|
||||
try {
|
||||
m.apply(std::move(copy_of_second));
|
||||
alloc.stop_failing();
|
||||
assert_that(m).is_equal_to(expected_apply_result);
|
||||
break; // we exhausted all allocation points
|
||||
} catch (const std::bad_alloc&) {
|
||||
assert_that(m).is_equal_to(target);
|
||||
// they should still commute
|
||||
m.apply(copy_of_second);
|
||||
assert_that(m).is_equal_to(expected_apply_result);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
return make_ready_future<>();
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_mutation_diff) {
|
||||
return seastar::async([] {
|
||||
auto my_set_type = set_type_impl::get_instance(int32_type, true);
|
||||
@@ -805,15 +1088,6 @@ SEASTAR_TEST_CASE(test_large_blobs) {
|
||||
|
||||
auto mt = make_lw_shared<memtable>(s);
|
||||
|
||||
auto make_blob = [] (size_t blob_size) -> bytes {
|
||||
bytes big_blob(bytes::initialized_later(), blob_size);
|
||||
std::independent_bits_engine<std::default_random_engine, 8, uint8_t> random_bytes;
|
||||
for (auto&& b : big_blob) {
|
||||
b = random_bytes();
|
||||
}
|
||||
return big_blob;
|
||||
};
|
||||
|
||||
auto blob1 = make_blob(1234567);
|
||||
auto blob2 = make_blob(2345678);
|
||||
|
||||
|
||||
@@ -50,6 +50,7 @@ struct test_config {
|
||||
unsigned partitions;
|
||||
unsigned concurrency;
|
||||
bool query_single_key;
|
||||
unsigned duration_in_seconds;
|
||||
};
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, const test_config::run_mode& m) {
|
||||
@@ -79,7 +80,7 @@ future<> test_read(cql_test_env& env, test_config& cfg) {
|
||||
return time_parallel([&env, &cfg, id] {
|
||||
bytes key = make_key(cfg.query_single_key ? 0 : std::rand() % cfg.partitions);
|
||||
return env.execute_prepared(id, {{std::move(key)}}).discard_result();
|
||||
}, cfg.concurrency);
|
||||
}, cfg.concurrency, cfg.duration_in_seconds);
|
||||
});
|
||||
}
|
||||
|
||||
@@ -95,7 +96,7 @@ future<> test_write(cql_test_env& env, test_config& cfg) {
|
||||
return time_parallel([&env, &cfg, id] {
|
||||
bytes key = make_key(cfg.query_single_key ? 0 : std::rand() % cfg.partitions);
|
||||
return env.execute_prepared(id, {{std::move(key)}}).discard_result();
|
||||
}, cfg.concurrency);
|
||||
}, cfg.concurrency, cfg.duration_in_seconds);
|
||||
});
|
||||
}
|
||||
|
||||
@@ -125,6 +126,7 @@ int main(int argc, char** argv) {
|
||||
app.add_options()
|
||||
("partitions", bpo::value<unsigned>()->default_value(10000), "number of partitions")
|
||||
("write", "test write path instead of read path")
|
||||
("duration", bpo::value<unsigned>()->default_value(5), "test duration in seconds")
|
||||
("query-single-key", "test write path instead of read path")
|
||||
("concurrency", bpo::value<unsigned>()->default_value(100), "workers per core");
|
||||
|
||||
@@ -132,6 +134,7 @@ int main(int argc, char** argv) {
|
||||
make_env_for_test().then([&app] (auto env) {
|
||||
auto cfg = make_lw_shared<test_config>();
|
||||
cfg->partitions = app.configuration()["partitions"].as<unsigned>();
|
||||
cfg->duration_in_seconds = app.configuration()["duration"].as<unsigned>();
|
||||
cfg->concurrency = app.configuration()["concurrency"].as<unsigned>();
|
||||
cfg->mode = app.configuration().count("write") ? test_config::run_mode::write : test_config::run_mode::read;
|
||||
cfg->query_single_key = app.configuration().count("query-single-key");
|
||||
|
||||
@@ -106,13 +106,17 @@ int main(int argc, char** argv) {
|
||||
keys.push_back(key);
|
||||
}
|
||||
|
||||
auto reclaimable_memory = [] {
|
||||
return memory::stats().free_memory() + logalloc::shard_tracker().occupancy().free_space();
|
||||
};
|
||||
|
||||
std::cout << "memtable occupancy: " << mt->occupancy() << "\n";
|
||||
std::cout << "Cache occupancy: " << tracker.region().occupancy() << "\n";
|
||||
std::cout << "Free memory: " << memory::stats().free_memory() << "\n";
|
||||
std::cout << "Reclaimable memory: " << reclaimable_memory() << "\n";
|
||||
|
||||
// We need to have enough Free memory to copy memtable into cache
|
||||
// When this assertion fails, increase amount of memory
|
||||
assert(mt->occupancy().used_space() < memory::stats().free_memory());
|
||||
assert(mt->occupancy().used_space() < reclaimable_memory());
|
||||
|
||||
auto checker = [](const partition_key& key) {
|
||||
return partition_presence_checker_result::maybe_exists;
|
||||
@@ -146,13 +150,14 @@ int main(int argc, char** argv) {
|
||||
for (auto&& key : keys) {
|
||||
cache.touch(key);
|
||||
}
|
||||
std::cout << "Free memory: " << memory::stats().free_memory() << "\n";
|
||||
std::cout << "Reclaimable memory: " << reclaimable_memory() << "\n";
|
||||
std::cout << "Cache occupancy: " << tracker.region().occupancy() << "\n";
|
||||
};
|
||||
|
||||
std::deque<std::unique_ptr<char[]>> stuffing;
|
||||
auto fragment_free_space = [&] {
|
||||
stuffing.clear();
|
||||
std::cout << "Reclaimable memory: " << reclaimable_memory() << "\n";
|
||||
std::cout << "Free memory: " << memory::stats().free_memory() << "\n";
|
||||
std::cout << "Cache occupancy: " << tracker.region().occupancy() << "\n";
|
||||
|
||||
@@ -165,6 +170,7 @@ int main(int argc, char** argv) {
|
||||
}
|
||||
|
||||
std::cout << "After fragmenting:\n";
|
||||
std::cout << "Reclaimable memory: " << reclaimable_memory() << "\n";
|
||||
std::cout << "Free memory: " << memory::stats().free_memory() << "\n";
|
||||
std::cout << "Cache occupancy: " << tracker.region().occupancy() << "\n";
|
||||
};
|
||||
|
||||
@@ -864,16 +864,17 @@ SEASTAR_TEST_CASE(reshuffle) {
|
||||
auto cf = make_lw_shared<column_family>(uncompressed_schema(), cfg, column_family::no_commitlog(), *cm);
|
||||
cf->start();
|
||||
cf->mark_ready_for_writes();
|
||||
return cf->reshuffle_sstables(3).then([cm, cf] (std::vector<sstables::entry_descriptor> reshuffled) {
|
||||
BOOST_REQUIRE(reshuffled.size() == 2);
|
||||
BOOST_REQUIRE(reshuffled[0].generation == 3);
|
||||
BOOST_REQUIRE(reshuffled[1].generation == 4);
|
||||
std::set<int64_t> existing_sstables = { 1, 5 };
|
||||
return cf->reshuffle_sstables(existing_sstables, 6).then([cm, cf] (std::vector<sstables::entry_descriptor> reshuffled) {
|
||||
BOOST_REQUIRE(reshuffled.size() == 1);
|
||||
BOOST_REQUIRE(reshuffled[0].generation == 6);
|
||||
return when_all(
|
||||
test_sstable_exists("tests/sstables/generation", 1, true),
|
||||
test_sstable_exists("tests/sstables/generation", 2, false),
|
||||
test_sstable_exists("tests/sstables/generation", 3, true),
|
||||
test_sstable_exists("tests/sstables/generation", 4, true),
|
||||
test_sstable_exists("tests/sstables/generation", 5, false),
|
||||
test_sstable_exists("tests/sstables/generation", 3, false),
|
||||
test_sstable_exists("tests/sstables/generation", 4, false),
|
||||
test_sstable_exists("tests/sstables/generation", 5, true),
|
||||
test_sstable_exists("tests/sstables/generation", 6, true),
|
||||
test_sstable_exists("tests/sstables/generation", 10, false)
|
||||
).discard_result().then([cm] {
|
||||
return cm->stop();
|
||||
|
||||
13
tombstone.hh
13
tombstone.hh
@@ -84,12 +84,23 @@ struct tombstone final {
|
||||
return timestamp != api::missing_timestamp;
|
||||
}
|
||||
|
||||
void apply(const tombstone& t) {
|
||||
void apply(const tombstone& t) noexcept {
|
||||
if (*this < t) {
|
||||
*this = t;
|
||||
}
|
||||
}
|
||||
|
||||
// See reversibly_mergeable.hh
|
||||
void apply_reversibly(tombstone& t) noexcept {
|
||||
std::swap(*this, t);
|
||||
apply(t);
|
||||
}
|
||||
|
||||
// See reversibly_mergeable.hh
|
||||
void revert(tombstone& t) noexcept {
|
||||
std::swap(*this, t);
|
||||
}
|
||||
|
||||
friend std::ostream& operator<<(std::ostream& out, const tombstone& t) {
|
||||
if (t) {
|
||||
return out << "{tombstone: timestamp=" << t.timestamp << ", deletion_time=" << t.deletion_time.time_since_epoch().count() << "}";
|
||||
|
||||
@@ -87,6 +87,7 @@ public:
|
||||
size_t compact_and_evict(size_t bytes);
|
||||
void full_compaction();
|
||||
void reclaim_all_free_segments();
|
||||
occupancy_stats region_occupancy();
|
||||
occupancy_stats occupancy();
|
||||
};
|
||||
|
||||
@@ -112,6 +113,10 @@ size_t tracker::reclaim(size_t bytes) {
|
||||
return _impl->reclaim(bytes);
|
||||
}
|
||||
|
||||
occupancy_stats tracker::region_occupancy() {
|
||||
return _impl->region_occupancy();
|
||||
}
|
||||
|
||||
occupancy_stats tracker::occupancy() {
|
||||
return _impl->occupancy();
|
||||
}
|
||||
@@ -551,6 +556,7 @@ public:
|
||||
void on_segment_migration() { _stats.segments_migrated++; }
|
||||
void on_segment_compaction() { _stats.segments_compacted++; }
|
||||
size_t free_segments_in_zones() const { return _free_segments_in_zones; }
|
||||
size_t free_segments() const { return _free_segments_in_zones + _emergency_reserve.size(); }
|
||||
};
|
||||
|
||||
size_t segment_pool::reclaim_segments(size_t target) {
|
||||
@@ -861,6 +867,7 @@ public:
|
||||
void on_segment_migration() { _stats.segments_migrated++; }
|
||||
void on_segment_compaction() { _stats.segments_compacted++; }
|
||||
size_t free_segments_in_zones() const { return 0; }
|
||||
size_t free_segments() const { return 0; }
|
||||
public:
|
||||
class reservation_goal;
|
||||
};
|
||||
@@ -1211,6 +1218,7 @@ public:
|
||||
assert(seg->is_empty());
|
||||
free_segment(seg);
|
||||
}
|
||||
_closed_occupancy = {};
|
||||
if (_active) {
|
||||
assert(_active->is_empty());
|
||||
free_segment(_active);
|
||||
@@ -1527,7 +1535,7 @@ std::ostream& operator<<(std::ostream& out, const occupancy_stats& stats) {
|
||||
stats.used_fraction() * 100, stats.used_space(), stats.total_space());
|
||||
}
|
||||
|
||||
occupancy_stats tracker::impl::occupancy() {
|
||||
occupancy_stats tracker::impl::region_occupancy() {
|
||||
reclaiming_lock _(*this);
|
||||
occupancy_stats total{};
|
||||
for (auto&& r: _regions) {
|
||||
@@ -1536,6 +1544,16 @@ occupancy_stats tracker::impl::occupancy() {
|
||||
return total;
|
||||
}
|
||||
|
||||
occupancy_stats tracker::impl::occupancy() {
|
||||
reclaiming_lock _(*this);
|
||||
auto occ = region_occupancy();
|
||||
{
|
||||
auto s = shard_segment_pool.free_segments() * segment::size;
|
||||
occ += occupancy_stats(s, s);
|
||||
}
|
||||
return occ;
|
||||
}
|
||||
|
||||
void tracker::impl::reclaim_all_free_segments()
|
||||
{
|
||||
logger.debug("Reclaiming all free segments");
|
||||
@@ -1547,7 +1565,7 @@ void tracker::impl::reclaim_all_free_segments()
|
||||
void tracker::impl::full_compaction() {
|
||||
reclaiming_lock _(*this);
|
||||
|
||||
logger.debug("Full compaction on all regions, {}", occupancy());
|
||||
logger.debug("Full compaction on all regions, {}", region_occupancy());
|
||||
|
||||
for (region_impl* r : _regions) {
|
||||
if (r->is_compactible()) {
|
||||
@@ -1555,7 +1573,7 @@ void tracker::impl::full_compaction() {
|
||||
}
|
||||
}
|
||||
|
||||
logger.debug("Compaction done, {}", occupancy());
|
||||
logger.debug("Compaction done, {}", region_occupancy());
|
||||
}
|
||||
|
||||
static void reclaim_from_evictable(region::impl& r, size_t target_mem_in_use) {
|
||||
@@ -1805,19 +1823,19 @@ void tracker::impl::register_collectd_metrics() {
|
||||
_collectd_registrations = scollectd::registrations({
|
||||
scollectd::add_polled_metric(
|
||||
scollectd::type_instance_id("lsa", scollectd::per_cpu_plugin_instance, "bytes", "total_space"),
|
||||
scollectd::make_typed(scollectd::data_type::GAUGE, [this] { return occupancy().total_space(); })
|
||||
scollectd::make_typed(scollectd::data_type::GAUGE, [this] { return region_occupancy().total_space(); })
|
||||
),
|
||||
scollectd::add_polled_metric(
|
||||
scollectd::type_instance_id("lsa", scollectd::per_cpu_plugin_instance, "bytes", "used_space"),
|
||||
scollectd::make_typed(scollectd::data_type::GAUGE, [this] { return occupancy().used_space(); })
|
||||
scollectd::make_typed(scollectd::data_type::GAUGE, [this] { return region_occupancy().used_space(); })
|
||||
),
|
||||
scollectd::add_polled_metric(
|
||||
scollectd::type_instance_id("lsa", scollectd::per_cpu_plugin_instance, "bytes", "small_objects_total_space"),
|
||||
scollectd::make_typed(scollectd::data_type::GAUGE, [this] { return occupancy().total_space() - shard_segment_pool.non_lsa_memory_in_use(); })
|
||||
scollectd::make_typed(scollectd::data_type::GAUGE, [this] { return region_occupancy().total_space() - shard_segment_pool.non_lsa_memory_in_use(); })
|
||||
),
|
||||
scollectd::add_polled_metric(
|
||||
scollectd::type_instance_id("lsa", scollectd::per_cpu_plugin_instance, "bytes", "small_objects_used_space"),
|
||||
scollectd::make_typed(scollectd::data_type::GAUGE, [this] { return occupancy().used_space() - shard_segment_pool.non_lsa_memory_in_use(); })
|
||||
scollectd::make_typed(scollectd::data_type::GAUGE, [this] { return region_occupancy().used_space() - shard_segment_pool.non_lsa_memory_in_use(); })
|
||||
),
|
||||
scollectd::add_polled_metric(
|
||||
scollectd::type_instance_id("lsa", scollectd::per_cpu_plugin_instance, "bytes", "large_objects_total_space"),
|
||||
@@ -1827,7 +1845,7 @@ void tracker::impl::register_collectd_metrics() {
|
||||
scollectd::type_instance_id("lsa", scollectd::per_cpu_plugin_instance, "bytes", "non_lsa_used_space"),
|
||||
scollectd::make_typed(scollectd::data_type::GAUGE, [this] {
|
||||
auto free_space_in_zones = shard_segment_pool.free_segments_in_zones() * segment_size;
|
||||
return memory::stats().allocated_memory() - occupancy().total_space() - free_space_in_zones; })
|
||||
return memory::stats().allocated_memory() - region_occupancy().total_space() - free_space_in_zones; })
|
||||
),
|
||||
scollectd::add_polled_metric(
|
||||
scollectd::type_instance_id("lsa", scollectd::per_cpu_plugin_instance, "bytes", "free_space_in_zones"),
|
||||
@@ -1835,7 +1853,7 @@ void tracker::impl::register_collectd_metrics() {
|
||||
),
|
||||
scollectd::add_polled_metric(
|
||||
scollectd::type_instance_id("lsa", scollectd::per_cpu_plugin_instance, "percent", "occupancy"),
|
||||
scollectd::make_typed(scollectd::data_type::GAUGE, [this] { return occupancy().used_fraction() * 100; })
|
||||
scollectd::make_typed(scollectd::data_type::GAUGE, [this] { return region_occupancy().used_fraction() * 100; })
|
||||
),
|
||||
scollectd::add_polled_metric(
|
||||
scollectd::type_instance_id("lsa", scollectd::per_cpu_plugin_instance, "objects", "zones"),
|
||||
|
||||
@@ -116,6 +116,9 @@ public:
|
||||
void reclaim_all_free_segments();
|
||||
|
||||
// Returns aggregate statistics for all pools.
|
||||
occupancy_stats region_occupancy();
|
||||
|
||||
// Returns statistics for all segments allocated by LSA on this shard.
|
||||
occupancy_stats occupancy();
|
||||
|
||||
impl& get_impl() { return *_impl; }
|
||||
|
||||
@@ -26,7 +26,7 @@
|
||||
thread_local managed_bytes::linearization_context managed_bytes::_linearization_context;
|
||||
|
||||
void
|
||||
managed_bytes::linearization_context::forget(const blob_storage* p) {
|
||||
managed_bytes::linearization_context::forget(const blob_storage* p) noexcept {
|
||||
_state.erase(p);
|
||||
}
|
||||
|
||||
|
||||
@@ -80,7 +80,7 @@ class managed_bytes {
|
||||
_state.clear();
|
||||
}
|
||||
}
|
||||
void forget(const blob_storage* p);
|
||||
void forget(const blob_storage* p) noexcept;
|
||||
};
|
||||
static thread_local linearization_context _linearization_context;
|
||||
public:
|
||||
@@ -110,7 +110,7 @@ private:
|
||||
size_t max_seg(allocation_strategy& alctr) {
|
||||
return alctr.preferred_max_contiguous_allocation() - sizeof(blob_storage);
|
||||
}
|
||||
void free_chain(blob_storage* p) {
|
||||
void free_chain(blob_storage* p) noexcept {
|
||||
if (p->next && _linearization_context._nesting) {
|
||||
_linearization_context.forget(p);
|
||||
}
|
||||
@@ -130,6 +130,17 @@ private:
|
||||
return do_linearize();
|
||||
}
|
||||
}
|
||||
bytes_view::value_type& value_at_index(blob_storage::size_type index) {
|
||||
if (!external()) {
|
||||
return _u.small.data[index];
|
||||
}
|
||||
blob_storage* a = _u.ptr;
|
||||
while (index >= a->frag_size) {
|
||||
index -= a->frag_size;
|
||||
a = a->next;
|
||||
}
|
||||
return a->data[index];
|
||||
}
|
||||
const bytes_view::value_type* do_linearize() const;
|
||||
public:
|
||||
using size_type = blob_storage::size_type;
|
||||
@@ -191,7 +202,7 @@ public:
|
||||
|
||||
managed_bytes(std::initializer_list<bytes::value_type> b) : managed_bytes(b.begin(), b.size()) {}
|
||||
|
||||
~managed_bytes() {
|
||||
~managed_bytes() noexcept {
|
||||
if (external()) {
|
||||
free_chain(_u.ptr);
|
||||
}
|
||||
@@ -244,7 +255,7 @@ public:
|
||||
o._u.small.size = 0;
|
||||
}
|
||||
|
||||
managed_bytes& operator=(managed_bytes&& o) {
|
||||
managed_bytes& operator=(managed_bytes&& o) noexcept {
|
||||
if (this != &o) {
|
||||
this->~managed_bytes();
|
||||
new (this) managed_bytes(std::move(o));
|
||||
@@ -254,9 +265,9 @@ public:
|
||||
|
||||
managed_bytes& operator=(const managed_bytes& o) {
|
||||
if (this != &o) {
|
||||
// FIXME: not exception safe
|
||||
managed_bytes tmp(o);
|
||||
this->~managed_bytes();
|
||||
new (this) managed_bytes(o);
|
||||
new (this) managed_bytes(std::move(tmp));
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
@@ -309,11 +320,12 @@ public:
|
||||
}
|
||||
|
||||
bytes_view::value_type& operator[](size_type index) {
|
||||
return data()[index];
|
||||
return value_at_index(index);
|
||||
}
|
||||
|
||||
const bytes_view::value_type& operator[](size_type index) const {
|
||||
return data()[index];
|
||||
return const_cast<const bytes_view::value_type&>(
|
||||
const_cast<managed_bytes*>(this)->value_at_index(index));
|
||||
}
|
||||
|
||||
size_type size() const {
|
||||
|
||||
Reference in New Issue
Block a user