Compare commits

..

3 Commits

Author SHA1 Message Date
Avi Kivity
b2eb0810a2 build: support for alternative versions of libsystemd pkgconfig
While pkgconfig is supposed to be a distribution and version neutral way
of detecting packages, it doesn't always work this way.  The sd_notify()
manual page documents that sd_notify is available via the libsystemd
package, but on centos 7.0 it is only available via the libsystemd-daemon
package (on centos 7.1+ it works as expected).

Fix by allowing for alternate version of package names, testing each one
until a match is found.

Fixes #879.

Message-Id: <1454858862-5239-1-git-send-email-avi@scylladb.com>
(cherry picked from commit 8b0a26f06d)
2016-02-07 17:38:10 +02:00
Avi Kivity
14d029bf71 Merge "Sstable cleanup fixes" from Tomasz
"  - Added waiting for async cleanup on clean shutdown

  - Crash in the middle of sstable removal doesn't leave system in a non-bootable state"

(cherry picked from commit f3ca597a01)
2016-02-04 16:43:09 +02:00
Pekka Enberg
38470b4d28 release: prepare for 0.17 2016-01-28 14:44:40 +02:00
322 changed files with 6287 additions and 11701 deletions

1
.gitignore vendored
View File

@@ -8,4 +8,3 @@ cscope.*
dist/ami/files/*.rpm
dist/ami/variables.json
dist/ami/scylla_deploy.sh
*.pyc

2
.gitmodules vendored
View File

@@ -1,6 +1,6 @@
[submodule "seastar"]
path = seastar
url = ../scylla-seastar
url = ../seastar
ignore = dirty
[submodule "swagger-ui"]
path = swagger-ui

View File

@@ -1,6 +1,6 @@
#!/bin/sh
VERSION=1.0.4
VERSION=0.17
if test -f version
then

View File

@@ -836,22 +836,6 @@
"type":"string",
"paramType":"query"
},
{
"name":"startToken",
"description":"Token on which to begin repair",
"required":false,
"allowMultiple":false,
"type":"string",
"paramType":"query"
},
{
"name":"endToken",
"description":"Token on which to end repair",
"required":false,
"allowMultiple":false,
"type":"string",
"paramType":"query"
},
{
"name":"columnFamilies",
"description":"Which column families to repair in the given keyspace. Multiple columns families can be named separated by commas. If this option is missing, all column families in the keyspace are repaired.",

View File

@@ -214,16 +214,16 @@ void set_storage_proxy(http_context& ctx, routes& r) {
});
sp::get_schema_versions.set(r, [](std::unique_ptr<request> req) {
return service::get_local_storage_service().describe_schema_versions().then([] (auto result) {
std::vector<sp::mapper_list> res;
for (auto e : result) {
sp::mapper_list entry;
entry.key = std::move(e.first);
entry.value = std::move(e.second);
res.emplace_back(std::move(entry));
}
return make_ready_future<json::json_return_type>(std::move(res));
});
//TBD
// FIXME
// describe_schema_versions is not implemented yet
// this is a work around
std::vector<sp::mapper_list> res;
sp::mapper_list entry;
entry.key = boost::lexical_cast<std::string>(utils::fb_utilities::get_broadcast_address());
entry.value.push(service::get_local_storage_service().get_schema_version());
res.push_back(entry);
return make_ready_future<json::json_return_type>(res);
});
sp::get_cas_read_timeouts.set(r, [](std::unique_ptr<request> req) {

View File

@@ -280,12 +280,10 @@ void set_storage_service(http_context& ctx, routes& r) {
return ctx.db.invoke_on_all([keyspace, column_families] (database& db) {
std::vector<column_family*> column_families_vec;
auto& cm = db.get_compaction_manager();
for (auto cf : column_families) {
column_families_vec.push_back(&db.find_column_family(keyspace, cf));
for (auto entry : column_families) {
column_family* cf = &db.find_column_family(keyspace, entry);
cm.submit_cleanup_job(cf);
}
return parallel_for_each(column_families_vec, [&cm] (column_family* cf) {
return cm.perform_cleanup(cf);
});
}).then([]{
return make_ready_future<json::json_return_type>(0);
});
@@ -328,8 +326,7 @@ void set_storage_service(http_context& ctx, routes& r) {
ss::repair_async.set(r, [&ctx](std::unique_ptr<request> req) {
static std::vector<sstring> options = {"primaryRange", "parallelism", "incremental",
"jobThreads", "ranges", "columnFamilies", "dataCenters", "hosts", "trace",
"startToken", "endToken" };
"jobThreads", "ranges", "columnFamilies", "dataCenters", "hosts", "trace"};
std::unordered_map<sstring, sstring> options_map;
for (auto o : options) {
auto s = req->get_query_param(o);
@@ -588,8 +585,6 @@ void set_storage_service(http_context& ctx, routes& r) {
auto val_str = req->get_query_param("value");
bool value = (val_str == "True") || (val_str == "true") || (val_str == "1");
return service::get_local_storage_service().db().invoke_on_all([value] (database& db) {
db.set_enable_incremental_backups(value);
// Change both KS and CF, so they are in sync
for (auto& pair: db.get_keyspaces()) {
auto& ks = pair.second;

View File

@@ -32,16 +32,11 @@ namespace hs = httpd::stream_manager_json;
static void set_summaries(const std::vector<streaming::stream_summary>& from,
json::json_list<hs::stream_summary>& to) {
if (!from.empty()) {
for (auto sum : from) {
hs::stream_summary res;
res.cf_id = boost::lexical_cast<std::string>(from.front().cf_id);
// For each stream_session, we pretend we are sending/receiving one
// file, to make it compatible with nodetool.
res.files = 1;
// We can not estimate total number of bytes the stream_session will
// send or recvieve since we don't know the size of the frozen_mutation
// until we read it.
res.total_size = 0;
res.cf_id = boost::lexical_cast<std::string>(sum.cf_id);
res.files = sum.files;
res.total_size = sum.total_size;
to.push(res);
}
}
@@ -90,22 +85,18 @@ static hs::stream_state get_state(
void set_stream_manager(http_context& ctx, routes& r) {
hs::get_current_streams.set(r,
[] (std::unique_ptr<request> req) {
return streaming::get_stream_manager().invoke_on_all([] (auto& sm) {
return sm.update_all_progress_info();
}).then([] {
return streaming::get_stream_manager().map_reduce0([](streaming::stream_manager& stream) {
std::vector<hs::stream_state> res;
for (auto i : stream.get_initiated_streams()) {
res.push_back(get_state(*i.second.get()));
}
for (auto i : stream.get_receiving_streams()) {
res.push_back(get_state(*i.second.get()));
}
return res;
}, std::vector<hs::stream_state>(),concat<hs::stream_state>).
then([](const std::vector<hs::stream_state>& res) {
return make_ready_future<json::json_return_type>(res);
});
return streaming::get_stream_manager().map_reduce0([](streaming::stream_manager& stream) {
std::vector<hs::stream_state> res;
for (auto i : stream.get_initiated_streams()) {
res.push_back(get_state(*i.second.get()));
}
for (auto i : stream.get_receiving_streams()) {
res.push_back(get_state(*i.second.get()));
}
return res;
}, std::vector<hs::stream_state>(),concat<hs::stream_state>).
then([](const std::vector<hs::stream_state>& res) {
return make_ready_future<json::json_return_type>(res);
});
});
@@ -120,9 +111,17 @@ void set_stream_manager(http_context& ctx, routes& r) {
hs::get_total_incoming_bytes.set(r, [](std::unique_ptr<request> req) {
gms::inet_address peer(req->param["peer"]);
return streaming::get_stream_manager().map_reduce0([peer](streaming::stream_manager& sm) {
return sm.get_progress_on_all_shards(peer).then([] (auto sbytes) {
return sbytes.bytes_received;
});
int64_t res = 0;
for (auto sr : sm.get_all_streams()) {
if (sr) {
for (auto session : sr->get_coordinator()->get_all_stream_sessions()) {
if (session->peer == peer) {
res += session->get_bytes_received();
}
}
}
}
return res;
}, 0, std::plus<int64_t>()).then([](int64_t res) {
return make_ready_future<json::json_return_type>(res);
});
@@ -130,9 +129,15 @@ void set_stream_manager(http_context& ctx, routes& r) {
hs::get_all_total_incoming_bytes.set(r, [](std::unique_ptr<request> req) {
return streaming::get_stream_manager().map_reduce0([](streaming::stream_manager& sm) {
return sm.get_progress_on_all_shards().then([] (auto sbytes) {
return sbytes.bytes_received;
});
int64_t res = 0;
for (auto sr : sm.get_all_streams()) {
if (sr) {
for (auto session : sr->get_coordinator()->get_all_stream_sessions()) {
res += session->get_bytes_received();
}
}
}
return res;
}, 0, std::plus<int64_t>()).then([](int64_t res) {
return make_ready_future<json::json_return_type>(res);
});
@@ -140,10 +145,18 @@ void set_stream_manager(http_context& ctx, routes& r) {
hs::get_total_outgoing_bytes.set(r, [](std::unique_ptr<request> req) {
gms::inet_address peer(req->param["peer"]);
return streaming::get_stream_manager().map_reduce0([peer] (streaming::stream_manager& sm) {
return sm.get_progress_on_all_shards(peer).then([] (auto sbytes) {
return sbytes.bytes_sent;
});
return streaming::get_stream_manager().map_reduce0([peer](streaming::stream_manager& sm) {
int64_t res = 0;
for (auto sr : sm.get_all_streams()) {
if (sr) {
for (auto session : sr->get_coordinator()->get_all_stream_sessions()) {
if (session->peer == peer) {
res += session->get_bytes_sent();
}
}
}
}
return res;
}, 0, std::plus<int64_t>()).then([](int64_t res) {
return make_ready_future<json::json_return_type>(res);
});
@@ -151,9 +164,15 @@ void set_stream_manager(http_context& ctx, routes& r) {
hs::get_all_total_outgoing_bytes.set(r, [](std::unique_ptr<request> req) {
return streaming::get_stream_manager().map_reduce0([](streaming::stream_manager& sm) {
return sm.get_progress_on_all_shards().then([] (auto sbytes) {
return sbytes.bytes_sent;
});
int64_t res = 0;
for (auto sr : sm.get_all_streams()) {
if (sr) {
for (auto session : sr->get_coordinator()->get_all_stream_sessions()) {
res += session->get_bytes_sent();
}
}
}
return res;
}, 0, std::plus<int64_t>()).then([](int64_t res) {
return make_ready_future<json::json_return_type>(res);
});

View File

@@ -54,9 +54,9 @@ class atomic_cell_or_collection;
*/
class atomic_cell_type final {
private:
static constexpr int8_t DEAD_FLAGS = 0;
static constexpr int8_t LIVE_FLAG = 0x01;
static constexpr int8_t EXPIRY_FLAG = 0x02; // When present, expiry field is present. Set only for live cells
static constexpr int8_t REVERT_FLAG = 0x04; // transient flag used to efficiently implement ReversiblyMergeable for atomic cells.
static constexpr unsigned flags_size = 1;
static constexpr unsigned timestamp_offset = flags_size;
static constexpr unsigned timestamp_size = 8;
@@ -67,21 +67,14 @@ private:
static constexpr unsigned ttl_offset = expiry_offset + expiry_size;
static constexpr unsigned ttl_size = 4;
private:
static bool is_revert_set(bytes_view cell) {
return cell[0] & REVERT_FLAG;
}
template<typename BytesContainer>
static void set_revert(BytesContainer& cell, bool revert) {
cell[0] = (cell[0] & ~REVERT_FLAG) | (revert * REVERT_FLAG);
}
static bool is_live(const bytes_view& cell) {
return cell[0] & LIVE_FLAG;
return cell[0] != DEAD_FLAGS;
}
static bool is_live_and_has_ttl(const bytes_view& cell) {
return cell[0] & EXPIRY_FLAG;
}
static bool is_dead(const bytes_view& cell) {
return !is_live(cell);
return cell[0] == DEAD_FLAGS;
}
// Can be called on live and dead cells
static api::timestamp_type timestamp(const bytes_view& cell) {
@@ -113,7 +106,7 @@ private:
}
static managed_bytes make_dead(api::timestamp_type timestamp, gc_clock::time_point deletion_time) {
managed_bytes b(managed_bytes::initialized_later(), flags_size + timestamp_size + deletion_time_size);
b[0] = 0;
b[0] = DEAD_FLAGS;
set_field(b, timestamp_offset, timestamp);
set_field(b, deletion_time_offset, deletion_time.time_since_epoch().count());
return b;
@@ -147,11 +140,8 @@ protected:
ByteContainer _data;
protected:
atomic_cell_base(ByteContainer&& data) : _data(std::forward<ByteContainer>(data)) { }
friend class atomic_cell_or_collection;
atomic_cell_base(const ByteContainer& data) : _data(data) { }
public:
bool is_revert_set() const {
return atomic_cell_type::is_revert_set(_data);
}
bool is_live() const {
return atomic_cell_type::is_live(_data);
}
@@ -197,13 +187,10 @@ public:
bytes_view serialize() const {
return _data;
}
void set_revert(bool revert) {
atomic_cell_type::set_revert(_data, revert);
}
};
class atomic_cell_view final : public atomic_cell_base<bytes_view> {
atomic_cell_view(bytes_view data) : atomic_cell_base(std::move(data)) {}
atomic_cell_view(bytes_view data) : atomic_cell_base(data) {}
public:
static atomic_cell_view from_bytes(bytes_view data) { return atomic_cell_view(data); }
@@ -211,11 +198,6 @@ public:
friend std::ostream& operator<<(std::ostream& os, const atomic_cell_view& acv);
};
class atomic_cell_ref final : public atomic_cell_base<managed_bytes&> {
public:
atomic_cell_ref(managed_bytes& buf) : atomic_cell_base(buf) {}
};
class atomic_cell final : public atomic_cell_base<managed_bytes> {
atomic_cell(managed_bytes b) : atomic_cell_base(std::move(b)) {}
public:

View File

@@ -27,18 +27,16 @@
#include "atomic_cell.hh"
#include "hashing.hh"
template<>
struct appending_hash<collection_mutation_view> {
template<typename Hasher>
void operator()(Hasher& h, collection_mutation_view cell) const {
auto m_view = collection_type_impl::deserialize_mutation_form(cell);
::feed_hash(h, m_view.tomb);
for (auto&& key_and_value : m_view.cells) {
::feed_hash(h, key_and_value.first);
::feed_hash(h, key_and_value.second);
}
template<typename Hasher>
void feed_hash(collection_mutation_view cell, Hasher& h, const data_type& type) {
auto&& ctype = static_pointer_cast<const collection_type_impl>(type);
auto m_view = ctype->deserialize_mutation_form(cell);
::feed_hash(h, m_view.tomb);
for (auto&& key_and_value : m_view.cells) {
::feed_hash(h, key_and_value.first);
::feed_hash(h, key_and_value.second);
}
};
}
template<>
struct appending_hash<atomic_cell_view> {
@@ -57,19 +55,3 @@ struct appending_hash<atomic_cell_view> {
}
}
};
template<>
struct appending_hash<atomic_cell> {
template<typename Hasher>
void operator()(Hasher& h, const atomic_cell& cell) const {
feed_hash(h, static_cast<atomic_cell_view>(cell));
}
};
template<>
struct appending_hash<collection_mutation> {
template<typename Hasher>
void operator()(Hasher& h, const collection_mutation& cm) const {
feed_hash(h, static_cast<collection_mutation_view>(cm));
}
};

View File

@@ -27,10 +27,11 @@
// A variant type that can hold either an atomic_cell, or a serialized collection.
// Which type is stored is determined by the schema.
// Has an "empty" state.
// Objects moved-from are left in an empty state.
class atomic_cell_or_collection final {
managed_bytes _data;
template<typename T>
friend class db::serializer;
private:
atomic_cell_or_collection(managed_bytes&& data) : _data(std::move(data)) {}
public:
@@ -38,7 +39,6 @@ public:
atomic_cell_or_collection(atomic_cell ac) : _data(std::move(ac._data)) {}
static atomic_cell_or_collection from_atomic_cell(atomic_cell data) { return { std::move(data._data) }; }
atomic_cell_view as_atomic_cell() const { return atomic_cell_view::from_bytes(_data); }
atomic_cell_ref as_atomic_cell_ref() { return { _data }; }
atomic_cell_or_collection(collection_mutation cm) : _data(std::move(cm.data)) {}
explicit operator bool() const {
return !_data.empty();
@@ -63,5 +63,11 @@ public:
::feed_hash(as_collection_mutation(), h, def.type);
}
}
void linearize() {
_data.linearize();
}
void unlinearize() {
_data.scatter();
}
friend std::ostream& operator<<(std::ostream&, const atomic_cell_or_collection&);
};

View File

@@ -103,41 +103,35 @@ static auth_migration_listener auth_migration;
* Should be abstracted to some sort of global server function
* probably.
*/
struct waiter {
promise<> done;
timer<> tmr;
waiter() : tmr([this] {done.set_value();})
{
tmr.arm(auth::auth::SUPERUSER_SETUP_DELAY);
}
~waiter() {
if (tmr.armed()) {
tmr.cancel();
done.set_exception(std::runtime_error("shutting down"));
}
logger.trace("Deleting scheduled task");
}
void kill() {
}
};
typedef std::unique_ptr<waiter> waiter_ptr;
static std::vector<waiter_ptr> & thread_waiters() {
static thread_local std::vector<waiter_ptr> the_waiters;
return the_waiters;
}
void auth::auth::schedule_when_up(scheduled_func f) {
logger.trace("Adding scheduled task");
struct waiter {
promise<> done;
timer<> tmr;
waiter() : tmr([this] {done.set_value();})
{
tmr.arm(SUPERUSER_SETUP_DELAY);
}
~waiter() {
if (tmr.armed()) {
tmr.cancel();
done.set_exception(std::runtime_error("shutting down"));
}
logger.trace("Deleting scheduled task");
}
void kill() {
}
};
auto & waiters = thread_waiters();
typedef std::unique_ptr<waiter> waiter_ptr;
static thread_local std::vector<waiter_ptr> waiters;
logger.trace("Adding scheduled task");
waiters.emplace_back(std::make_unique<waiter>());
auto* w = waiters.back().get();
w->done.get_future().finally([w] {
auto & waiters = thread_waiters();
auto i = std::find_if(waiters.begin(), waiters.end(), [w](const waiter_ptr& p) {
return p.get() == w;
});
@@ -152,6 +146,7 @@ void auth::auth::schedule_when_up(scheduled_func f) {
});
}
bool auth::auth::is_class_type(const sstring& type, const sstring& classname) {
if (type == classname) {
return true;
@@ -210,15 +205,6 @@ future<> auth::auth::setup() {
});
}
future<> auth::auth::shutdown() {
// just make sure we don't have pending tasks.
// this is mostly relevant for test cases where
// db-env-shutdown != process shutdown
return smp::invoke_on_all([] {
thread_waiters().clear();
});
}
static db::consistency_level consistency_for_user(const sstring& username) {
if (username == auth::auth::DEFAULT_SUPERUSER_NAME) {
return db::consistency_level::QUORUM;

View File

@@ -102,7 +102,6 @@ public:
* Sets up Authenticator and Authorizer.
*/
static future<> setup();
static future<> shutdown();
/**
* Set up table from given CREATE TABLE statement under system_auth keyspace, if not already done so.

View File

@@ -21,12 +21,11 @@
#pragma once
#include <boost/range/iterator_range.hpp>
#include "bytes.hh"
#include "types.hh"
#include "net/byteorder.hh"
#include "core/unaligned.hh"
#include "hashing.hh"
#include "seastar/core/simple-stream.hh"
/**
* Utility for writing data into a buffer when its final size is not known up front.
*
@@ -43,14 +42,6 @@ private:
struct chunk {
// FIXME: group fragment pointers to reduce pointer chasing when packetizing
std::unique_ptr<chunk> next;
~chunk() {
auto p = std::move(next);
while (p) {
// Avoid recursion when freeing chunks
auto p_next = std::move(p->next);
p = std::move(p_next);
}
}
size_type offset; // Also means "size" after chunk is closed
size_type size;
value_type data[0];
@@ -172,12 +163,16 @@ public:
template <typename T>
struct place_holder {
value_type* ptr;
// makes the place_holder looks like a stream
seastar::simple_output_stream get_stream() {
return seastar::simple_output_stream{reinterpret_cast<char*>(ptr)};
}
};
// Writes given values in big-endian format
template <typename T>
inline
std::enable_if_t<std::is_fundamental<T>::value, void>
write(T val) {
*reinterpret_cast<unaligned<T>*>(alloc(sizeof(T))) = net::hton(val);
}
// Returns a place holder for a value to be written later.
template <typename T>
inline
@@ -215,6 +210,19 @@ public:
write(bytes_view(reinterpret_cast<const signed char*>(ptr), size));
}
// Writes given sequence of bytes with a preceding length component encoded in big-endian format
inline void write_blob(bytes_view v) {
assert((size_type)v.size() == v.size());
write<size_type>(v.size());
write(v);
}
// Writes given value into the place holder in big-endian format
template <typename T>
inline void set(place_holder<T> ph, T val) {
*reinterpret_cast<unaligned<T>*>(ph.ptr) = net::hton(val);
}
bool is_linearized() const {
return !_begin || !_begin->next;
}

View File

@@ -24,66 +24,80 @@
#include "mutation_partition_serializer.hh"
#include "converting_mutation_partition_applier.hh"
#include "hashing_partition_visitor.hh"
#include "utils/UUID.hh"
#include "serializer.hh"
#include "idl/uuid.dist.hh"
#include "idl/keys.dist.hh"
#include "idl/mutation.dist.hh"
#include "serializer_impl.hh"
#include "serialization_visitors.hh"
#include "idl/uuid.dist.impl.hh"
#include "idl/keys.dist.impl.hh"
#include "idl/mutation.dist.impl.hh"
template class db::serializer<canonical_mutation>;
//
// Representation layout:
//
// <canonical_mutation> ::= <column_family_id> <table_schema_version> <partition_key> <column-mapping> <partition>
//
// For <partition> see mutation_partition_serializer.cc
// For <column-mapping> see db::serializer<column_mapping>
//
canonical_mutation::canonical_mutation(bytes data)
: _data(std::move(data))
{ }
canonical_mutation::canonical_mutation(const mutation& m)
{
mutation_partition_serializer part_ser(*m.schema(), m.partition());
bytes_ostream out;
ser::writer_of_canonical_mutation wr(out);
std::move(wr).write_table_id(m.schema()->id())
.write_schema_version(m.schema()->version())
.write_key(m.key())
.write_mapping(m.schema()->get_column_mapping())
.partition([&] (auto wr) {
part_ser.write(std::move(wr));
}).end_canonical_mutation();
_data = to_bytes(out.linearize());
}
: _data([&m] {
bytes_ostream out;
db::serializer<utils::UUID>(m.column_family_id()).write(out);
db::serializer<table_schema_version>(m.schema()->version()).write(out);
db::serializer<partition_key_view>(m.key()).write(out);
db::serializer<column_mapping>(m.schema()->get_column_mapping()).write(out);
mutation_partition_serializer ser(*m.schema(), m.partition());
ser.write(out);
return to_bytes(out.linearize());
}())
{ }
utils::UUID canonical_mutation::column_family_id() const {
auto in = ser::as_input_stream(_data);
auto mv = ser::deserialize(in, boost::type<ser::canonical_mutation_view>());
return mv.table_id();
data_input in(_data);
return db::serializer<utils::UUID>::read(in);
}
mutation canonical_mutation::to_mutation(schema_ptr s) const {
auto in = ser::as_input_stream(_data);
auto mv = ser::deserialize(in, boost::type<ser::canonical_mutation_view>());
data_input in(_data);
auto cf_id = mv.table_id();
auto cf_id = db::serializer<utils::UUID>::read(in);
if (s->id() != cf_id) {
throw std::runtime_error(sprint("Attempted to deserialize canonical_mutation of table %s with schema of table %s (%s.%s)",
cf_id, s->id(), s->ks_name(), s->cf_name()));
}
auto version = mv.schema_version();
auto pk = mv.key();
auto version = db::serializer<table_schema_version>::read(in);
auto pk = partition_key(db::serializer<partition_key_view>::read(in));
mutation m(std::move(pk), std::move(s));
if (version == m.schema()->version()) {
auto partition_view = mutation_partition_view::from_view(mv.partition());
db::serializer<column_mapping>::skip(in);
auto partition_view = mutation_partition_serializer::read_as_view(in);
m.partition().apply(*m.schema(), partition_view, *m.schema());
} else {
column_mapping cm = mv.mapping();
column_mapping cm = db::serializer<column_mapping>::read(in);
converting_mutation_partition_applier v(cm, *m.schema(), m.partition());
auto partition_view = mutation_partition_view::from_view(mv.partition());
auto partition_view = mutation_partition_serializer::read_as_view(in);
partition_view.accept(cm, v);
}
return m;
}
template<>
db::serializer<canonical_mutation>::serializer(const canonical_mutation& v)
: _item(v)
, _size(db::serializer<bytes>(v._data).size())
{ }
template<>
void
db::serializer<canonical_mutation>::write(output& out, const canonical_mutation& v) {
db::serializer<bytes>(v._data).write(out);
}
template<>
canonical_mutation db::serializer<canonical_mutation>::read(input& in) {
return canonical_mutation(db::serializer<bytes>::read(in));
}

View File

@@ -24,6 +24,7 @@
#include "bytes.hh"
#include "schema.hh"
#include "database_fwd.hh"
#include "db/serializer.hh"
#include "mutation_partition_visitor.hh"
#include "mutation_partition_serializer.hh"
@@ -32,8 +33,8 @@
// Safe to pass serialized across nodes.
class canonical_mutation {
bytes _data;
canonical_mutation(bytes);
public:
explicit canonical_mutation(bytes);
explicit canonical_mutation(const mutation&);
canonical_mutation(canonical_mutation&&) = default;
@@ -50,6 +51,15 @@ public:
utils::UUID column_family_id() const;
const bytes& representation() const { return _data; }
friend class db::serializer<canonical_mutation>;
};
namespace db {
template<> serializer<canonical_mutation>::serializer(const canonical_mutation&);
template<> void serializer<canonical_mutation>::write(output&, const canonical_mutation&);
template<> canonical_mutation serializer<canonical_mutation>::read(input&);
extern template class serializer<canonical_mutation>;
}

View File

@@ -26,10 +26,29 @@
#include <algorithm>
#include <vector>
#include <boost/range/iterator_range.hpp>
#include <boost/range/adaptor/transformed.hpp>
#include "utils/serialization.hh"
#include "unimplemented.hh"
// value_traits is meant to abstract away whether we are working on 'bytes'
// elements or 'bytes_opt' elements. We don't support optional values, but
// there are some generic layers which use this code which provide us with
// data in that format. In order to avoid allocation and rewriting that data
// into a new vector just to throw it away soon after that, we accept that
// format too.
template <typename T>
struct value_traits {
static const T& unwrap(const T& t) { return t; }
};
template<>
struct value_traits<bytes_opt> {
static const bytes& unwrap(const bytes_opt& t) {
assert(t);
return *t;
}
};
enum class allow_prefixes { no, yes };
template<allow_prefixes AllowPrefixes = allow_prefixes::no>
@@ -43,14 +62,13 @@ public:
static constexpr bool is_prefixable = AllowPrefixes == allow_prefixes::yes;
using prefix_type = compound_type<allow_prefixes::yes>;
using value_type = std::vector<bytes>;
using size_type = uint16_t;
compound_type(std::vector<data_type> types)
: _types(std::move(types))
, _byte_order_equal(std::all_of(_types.begin(), _types.end(), [] (auto t) {
return t->is_byte_order_equal();
}))
, _byte_order_comparable(false)
, _byte_order_comparable(!is_prefixable && _types.size() == 1 && _types[0]->is_byte_order_comparable())
, _is_reversed(_types.size() == 1 && _types[0]->is_reversed())
{ }
@@ -67,54 +85,79 @@ public:
prefix_type as_prefix() {
return prefix_type(_types);
}
private:
/*
* Format:
* <len(value1)><value1><len(value2)><value2>...<len(value_n)><value_n>
* <len(value1)><value1><len(value2)><value2>...<len(value_n-1)><value_n-1>(len(value_n))?<value_n>
*
* For non-prefixable compounds, the value corresponding to the last component of types doesn't
* have its length encoded, its length is deduced from the input range.
*
* serialize_value() and serialize_optionals() for single element rely on the fact that for a single-element
* compounds their serialized form is equal to the serialized form of the component.
*/
template<typename RangeOfSerializedComponents>
static void serialize_value(RangeOfSerializedComponents&& values, bytes::iterator& out) {
for (auto&& val : values) {
assert(val.size() <= std::numeric_limits<size_type>::max());
write<size_type>(out, size_type(val.size()));
template<typename Wrapped>
void serialize_value(const std::vector<Wrapped>& values, bytes::iterator& out) {
if (AllowPrefixes == allow_prefixes::yes) {
assert(values.size() <= _types.size());
} else {
assert(values.size() == _types.size());
}
size_t n_left = _types.size();
for (auto&& wrapped : values) {
auto&& val = value_traits<Wrapped>::unwrap(wrapped);
assert(val.size() <= std::numeric_limits<uint16_t>::max());
if (--n_left || AllowPrefixes == allow_prefixes::yes) {
write<uint16_t>(out, uint16_t(val.size()));
}
out = std::copy(val.begin(), val.end(), out);
}
}
template <typename RangeOfSerializedComponents>
static size_t serialized_size(RangeOfSerializedComponents&& values) {
template <typename Wrapped>
size_t serialized_size(const std::vector<Wrapped>& values) {
size_t len = 0;
for (auto&& val : values) {
len += sizeof(size_type) + val.size();
size_t n_left = _types.size();
for (auto&& wrapped : values) {
auto&& val = value_traits<Wrapped>::unwrap(wrapped);
assert(val.size() <= std::numeric_limits<uint16_t>::max());
if (--n_left || AllowPrefixes == allow_prefixes::yes) {
len += sizeof(uint16_t);
}
len += val.size();
}
return len;
}
public:
bytes serialize_single(bytes&& v) {
return serialize_value({std::move(v)});
}
template<typename RangeOfSerializedComponents>
static bytes serialize_value(RangeOfSerializedComponents&& values) {
auto size = serialized_size(values);
if (size > std::numeric_limits<size_type>::max()) {
throw std::runtime_error(sprint("Key size too large: %d > %d", size, std::numeric_limits<size_type>::max()));
if (AllowPrefixes == allow_prefixes::no) {
assert(_types.size() == 1);
return std::move(v);
} else {
// FIXME: Optimize
std::vector<bytes> vec;
vec.reserve(1);
vec.emplace_back(std::move(v));
return ::serialize_value(*this, vec);
}
bytes b(bytes::initialized_later(), size);
auto i = b.begin();
serialize_value(values, i);
return b;
}
template<typename T>
static bytes serialize_value(std::initializer_list<T> values) {
return serialize_value(boost::make_iterator_range(values.begin(), values.end()));
bytes serialize_value(const std::vector<bytes>& values) {
return ::serialize_value(*this, values);
}
bytes serialize_value(std::vector<bytes>&& values) {
if (AllowPrefixes == allow_prefixes::no && _types.size() == 1 && values.size() == 1) {
return std::move(values[0]);
}
return ::serialize_value(*this, values);
}
bytes serialize_optionals(const std::vector<bytes_opt>& values) {
return serialize_value(values | boost::adaptors::transformed([] (const bytes_opt& bo) -> bytes_view {
if (!bo) {
throw std::logic_error("attempted to create key component from empty optional");
}
return *bo;
}));
return ::serialize_value(*this, values);
}
bytes serialize_optionals(std::vector<bytes_opt>&& values) {
if (AllowPrefixes == allow_prefixes::no && _types.size() == 1 && values.size() == 1) {
assert(values[0]);
return std::move(*values[0]);
}
return ::serialize_value(*this, values);
}
bytes serialize_value_deep(const std::vector<data_value>& values) {
// TODO: Optimize
@@ -128,21 +171,37 @@ public:
return serialize_value(partial);
}
bytes decompose_value(const value_type& values) {
return serialize_value(values);
return ::serialize_value(*this, values);
}
class iterator : public std::iterator<std::input_iterator_tag, bytes_view> {
private:
ssize_t _types_left;
bytes_view _v;
value_type _current;
private:
void read_current() {
size_type len;
{
if (_v.empty()) {
_v = bytes_view(nullptr, 0);
return;
if (_types_left == 0) {
if (!_v.empty()) {
throw marshal_exception();
}
len = read_simple<size_type>(_v);
_v = bytes_view(nullptr, 0);
return;
}
--_types_left;
uint16_t len;
if (_types_left == 0 && AllowPrefixes == allow_prefixes::no) {
len = _v.size();
} else {
if (_v.empty()) {
if (AllowPrefixes == allow_prefixes::yes) {
_types_left = 0;
_v = bytes_view(nullptr, 0);
return;
} else {
throw marshal_exception();
}
}
len = read_simple<uint16_t>(_v);
if (_v.size() < len) {
throw marshal_exception();
}
@@ -152,10 +211,10 @@ public:
}
public:
struct end_iterator_tag {};
iterator(const bytes_view& v) : _v(v) {
iterator(const compound_type& t, const bytes_view& v) : _types_left(t._types.size()), _v(v) {
read_current();
}
iterator(end_iterator_tag, const bytes_view& v) : _v(nullptr, 0) {}
iterator(end_iterator_tag, const bytes_view& v) : _types_left(0), _v(nullptr, 0) {}
iterator& operator++() {
read_current();
return *this;
@@ -167,18 +226,21 @@ public:
}
const value_type& operator*() const { return _current; }
const value_type* operator->() const { return &_current; }
bool operator!=(const iterator& i) const { return _v.begin() != i._v.begin(); }
bool operator==(const iterator& i) const { return _v.begin() == i._v.begin(); }
bool operator!=(const iterator& i) const { return _v.begin() != i._v.begin() || _types_left != i._types_left; }
bool operator==(const iterator& i) const { return _v.begin() == i._v.begin() && _types_left == i._types_left; }
};
static iterator begin(const bytes_view& v) {
return iterator(v);
iterator begin(const bytes_view& v) const {
return iterator(*this, v);
}
static iterator end(const bytes_view& v) {
iterator end(const bytes_view& v) const {
return iterator(typename iterator::end_iterator_tag(), v);
}
static boost::iterator_range<iterator> components(const bytes_view& v) {
boost::iterator_range<iterator> components(const bytes_view& v) const {
return { begin(v), end(v) };
}
auto iter_items(const bytes_view& v) {
return boost::iterator_range<iterator>(begin(v), end(v));
}
value_type deserialize_value(bytes_view v) {
std::vector<bytes> result;
result.reserve(_types.size());
@@ -196,7 +258,7 @@ public:
}
auto t = _types.begin();
size_t h = 0;
for (auto&& value : components(v)) {
for (auto&& value : iter_items(v)) {
h ^= (*t)->hash(value);
++t;
}
@@ -215,6 +277,12 @@ public:
return type->compare(v1, v2);
});
}
bytes from_string(sstring_view s) {
throw std::runtime_error(sprint("%s not implemented", __PRETTY_FUNCTION__));
}
sstring to_string(const bytes& b) {
throw std::runtime_error(sprint("%s not implemented", __PRETTY_FUNCTION__));
}
// Retruns true iff given prefix has no missing components
bool is_full(bytes_view v) const {
assert(AllowPrefixes == allow_prefixes::yes);

View File

@@ -25,31 +25,6 @@ from distutils.spawn import find_executable
configure_args = str.join(' ', [shlex.quote(x) for x in sys.argv[1:]])
for line in open('/etc/os-release'):
key, _, value = line.partition('=')
value = value.strip().strip('"')
if key == 'ID':
os_ids = [value]
if key == 'ID_LIKE':
os_ids += value.split(' ')
# distribution "internationalization", converting package names.
# Fedora name is key, values is distro -> package name dict.
i18n_xlat = {
'boost-devel': {
'debian': 'libboost-dev',
'ubuntu': 'libboost-dev (libboost1.55-dev on 14.04)',
},
}
def pkgname(name):
if name in i18n_xlat:
dict = i18n_xlat[name]
for id in os_ids:
if id in dict:
return dict[id]
return name
def get_flags():
with open('/proc/cpuinfo') as f:
for line in f:
@@ -162,7 +137,6 @@ modes = {
scylla_tests = [
'tests/mutation_test',
'tests/schema_registry_test',
'tests/canonical_mutation_test',
'tests/range_test',
'tests/types_test',
@@ -193,6 +167,7 @@ scylla_tests = [
'tests/commitlog_test',
'tests/cartesian_product_test',
'tests/hash_test',
'tests/serializer_test',
'tests/map_difference_test',
'tests/message',
'tests/gossip',
@@ -215,7 +190,6 @@ scylla_tests = [
'tests/flush_queue_test',
'tests/dynamic_bitset_test',
'tests/auth_test',
'tests/idl_test',
]
apps = [
@@ -224,11 +198,7 @@ apps = [
tests = scylla_tests
other = [
'iotune',
]
all_artifacts = apps + tests + other
all_artifacts = apps + tests
arg_parser = argparse.ArgumentParser('Configure scylla')
arg_parser.add_argument('--static', dest = 'static', action = 'store_const', default = '',
@@ -265,6 +235,7 @@ add_tristate(arg_parser, name = 'xen', dest = 'xen', help = 'Xen support')
args = arg_parser.parse_args()
defines = []
scylla_libs = '-llz4 -lsnappy -lz -lboost_thread -lcryptopp -lrt -lyaml-cpp -lboost_date_time'
extra_cxxflags = {}
@@ -318,7 +289,6 @@ scylla_core = (['database.cc',
'cql3/statements/cf_statement.cc',
'cql3/statements/create_keyspace_statement.cc',
'cql3/statements/create_table_statement.cc',
'cql3/statements/create_type_statement.cc',
'cql3/statements/drop_keyspace_statement.cc',
'cql3/statements/drop_table_statement.cc',
'cql3/statements/schema_altering_statement.cc',
@@ -373,7 +343,7 @@ scylla_core = (['database.cc',
'db/schema_tables.cc',
'db/commitlog/commitlog.cc',
'db/commitlog/commitlog_replayer.cc',
'db/commitlog/commitlog_entry.cc',
'db/serializer.cc',
'db/config.cc',
'db/index/secondary_index.cc',
'db/marshal/type_parser.cc',
@@ -387,7 +357,6 @@ scylla_core = (['database.cc',
'utils/rate_limiter.cc',
'utils/file_lock.cc',
'utils/dynamic_bitset.cc',
'utils/managed_bytes.cc',
'gms/version_generator.cc',
'gms/versioned_value.cc',
'gms/gossiper.cc',
@@ -409,7 +378,6 @@ scylla_core = (['database.cc',
'locator/simple_strategy.cc',
'locator/local_strategy.cc',
'locator/network_topology_strategy.cc',
'locator/everywhere_replication_strategy.cc',
'locator/token_metadata.cc',
'locator/locator.cc',
'locator/snitch_base.cc',
@@ -423,6 +391,7 @@ scylla_core = (['database.cc',
'service/client_state.cc',
'service/migration_task.cc',
'service/storage_service.cc',
'service/pending_range_calculator_service.cc',
'service/load_broadcaster.cc',
'service/pager/paging_state.cc',
'service/pager/query_pagers.cc',
@@ -502,14 +471,6 @@ idls = ['idl/gossip_digest.idl.hh',
'idl/reconcilable_result.idl.hh',
'idl/streaming.idl.hh',
'idl/paging_state.idl.hh',
'idl/frozen_schema.idl.hh',
'idl/partition_checksum.idl.hh',
'idl/replay_position.idl.hh',
'idl/truncation_record.idl.hh',
'idl/mutation.idl.hh',
'idl/query.idl.hh',
'idl/idl_test.idl.hh',
'idl/commitlog.idl.hh',
]
scylla_tests_dependencies = scylla_core + api + idls + [
@@ -553,7 +514,6 @@ tests_not_using_seastar_test_framework = set([
'tests/perf/perf_sstable',
'tests/managed_vector_test',
'tests/dynamic_bitset_test',
'tests/idl_test',
])
for t in tests_not_using_seastar_test_framework:
@@ -622,19 +582,6 @@ for pkglist in optional_packages:
alternatives = ':'.join(pkglist[1:])
print('Missing optional package {pkglist[0]} (or alteratives {alternatives})'.format(**locals()))
if not try_compile(compiler=args.cxx, source='#include <boost/version.hpp>'):
print('Boost not installed. Please install {}.'.format(pkgname("boost-devel")))
sys.exit(1)
if not try_compile(compiler=args.cxx, source='''\
#include <boost/version.hpp>
#if BOOST_VERSION < 105500
#error Boost version too low
#endif
'''):
print('Installed boost version too old. Please update {}.'.format(pkgname("boost-devel")))
sys.exit(1)
defines = ' '.join(['-D' + d for d in defines])
globals().update(vars(args))
@@ -663,8 +610,6 @@ if args.dpdk:
seastar_flags += ['--enable-dpdk']
elif args.dpdk_target:
seastar_flags += ['--dpdk-target', args.dpdk_target]
if args.staticcxx:
seastar_flags += ['--static-stdc++']
seastar_cflags = args.user_cflags + " -march=nehalem"
seastar_flags += ['--compiler', args.cxx, '--cflags=%s' % (seastar_cflags)]
@@ -698,7 +643,7 @@ for mode in build_modes:
seastar_deps = 'practically_anything_can_change_so_lets_run_it_every_time_and_restat.'
args.user_cflags += " " + pkg_config("--cflags", "jsoncpp")
libs = "-lyaml-cpp -llz4 -lz -lsnappy " + pkg_config("--libs", "jsoncpp") + ' -lboost_filesystem' + ' -lcrypt' + ' -lboost_date_time'
libs = "-lyaml-cpp -llz4 -lz -lsnappy " + pkg_config("--libs", "jsoncpp") + ' -lboost_filesystem' + ' -lcrypt'
for pkg in pkgs:
args.user_cflags += ' ' + pkg_config('--cflags', pkg)
libs += ' ' + pkg_config('--libs', pkg)
@@ -734,15 +679,12 @@ with open(buildfile, 'w') as f:
command = seastar/json/json2code.py -f $in -o $out
description = SWAGGER $out
rule serializer
command = {python} ./idl-compiler.py --ns ser -f $in -o $out
command = ./idl-compiler.py --ns ser -f $in -o $out
description = IDL compiler $out
rule ninja
command = {ninja} -C $subdir $target
restat = 1
description = NINJA $out
rule copy
command = cp $in $out
description = COPY $out
''').format(**globals()))
for mode in build_modes:
modeval = modes[mode]
@@ -779,8 +721,6 @@ with open(buildfile, 'w') as f:
thrifts = set()
antlr3_grammars = set()
for binary in build_artifacts:
if binary in other:
continue
srcs = deps[binary]
objs = ['$builddir/' + mode + '/' + src.replace('.cc', '.o')
for src in srcs
@@ -846,8 +786,7 @@ with open(buildfile, 'w') as f:
for obj in compiles:
src = compiles[obj]
gen_headers = list(ragels.keys())
gen_headers += ['seastar/build/{}/gen/http/request_parser.hh'.format(mode)]
gen_headers += ['seastar/build/{}/gen/http/http_response_parser.hh'.format(mode)]
gen_headers += ['seastar/build/{}/http/request_parser.hh'.format(mode)]
for th in thrifts:
gen_headers += th.headers('$builddir/{}/gen'.format(mode))
for g in antlr3_grammars:
@@ -878,14 +817,10 @@ with open(buildfile, 'w') as f:
grammar.source.rsplit('.', 1)[0]))
for cc in grammar.sources('$builddir/{}/gen'.format(mode)):
obj = cc.replace('.cpp', '.o')
f.write('build {}: cxx.{} {} || {}\n'.format(obj, mode, cc, ' '.join(serializers)))
f.write('build seastar/build/{mode}/libseastar.a seastar/build/{mode}/apps/iotune/iotune seastar/build/{mode}/gen/http/request_parser.hh seastar/build/{mode}/gen/http/http_response_parser.hh: ninja {seastar_deps}\n'
.format(**locals()))
f.write('build {}: cxx.{} {}\n'.format(obj, mode, cc))
f.write('build seastar/build/{}/libseastar.a: ninja {}\n'.format(mode, seastar_deps))
f.write(' subdir = seastar\n')
f.write(' target = build/{mode}/libseastar.a build/{mode}/apps/iotune/iotune build/{mode}/gen/http/request_parser.hh build/{mode}/gen/http/http_response_parser.hh\n'.format(**locals()))
f.write(textwrap.dedent('''\
build build/{mode}/iotune: copy seastar/build/{mode}/apps/iotune/iotune
''').format(**locals()))
f.write(' target = build/{}/libseastar.a\n'.format(mode))
f.write('build {}: phony\n'.format(seastar_deps))
f.write(textwrap.dedent('''\
rule configure
@@ -896,6 +831,10 @@ with open(buildfile, 'w') as f:
command = find -name '*.[chS]' -o -name "*.cc" -o -name "*.hh" | cscope -bq -i-
description = CSCOPE
build cscope: cscope
rule request_parser_hh
command = {ninja} -C seastar build/release/gen/http/request_parser.hh build/debug/gen/http/request_parser.hh
description = GEN seastar/http/request_parser.hh
build seastar/build/release/http/request_parser.hh seastar/build/debug/http/request_parser.hh: request_parser_hh
rule clean
command = rm -rf build
description = CLEAN

View File

@@ -75,7 +75,7 @@ public:
}
virtual void accept_static_cell(column_id id, atomic_cell_view cell) override {
const column_mapping_entry& col = _visited_column_mapping.static_column_at(id);
const column_mapping::column& col = _visited_column_mapping.static_column_at(id);
const column_definition* def = _p_schema.get_column_definition(col.name());
if (def) {
accept_cell(_p._static_row, column_kind::static_column, *def, col.type(), cell);
@@ -83,7 +83,7 @@ public:
}
virtual void accept_static_cell(column_id id, collection_mutation_view collection) override {
const column_mapping_entry& col = _visited_column_mapping.static_column_at(id);
const column_mapping::column& col = _visited_column_mapping.static_column_at(id);
const column_definition* def = _p_schema.get_column_definition(col.name());
if (def) {
accept_cell(_p._static_row, column_kind::static_column, *def, col.type(), collection);
@@ -102,7 +102,7 @@ public:
}
virtual void accept_row_cell(column_id id, atomic_cell_view cell) override {
const column_mapping_entry& col = _visited_column_mapping.regular_column_at(id);
const column_mapping::column& col = _visited_column_mapping.regular_column_at(id);
const column_definition* def = _p_schema.get_column_definition(col.name());
if (def) {
accept_cell(_current_row->cells(), column_kind::regular_column, *def, col.type(), cell);
@@ -110,7 +110,7 @@ public:
}
virtual void accept_row_cell(column_id id, collection_mutation_view collection) override {
const column_mapping_entry& col = _visited_column_mapping.regular_column_at(id);
const column_mapping::column& col = _visited_column_mapping.regular_column_at(id);
const column_definition* def = _p_schema.get_column_definition(col.name());
if (def) {
accept_cell(_current_row->cells(), column_kind::regular_column, *def, col.type(), collection);

View File

@@ -36,7 +36,6 @@ options {
#include "cql3/statements/drop_keyspace_statement.hh"
#include "cql3/statements/create_index_statement.hh"
#include "cql3/statements/create_table_statement.hh"
#include "cql3/statements/create_type_statement.hh"
#include "cql3/statements/property_definitions.hh"
#include "cql3/statements/drop_table_statement.hh"
#include "cql3/statements/truncate_statement.hh"
@@ -284,9 +283,7 @@ cqlStatement returns [shared_ptr<parsed_statement> stmt]
| st22=listUsersStatement { $stmt = st22; }
| st23=createTriggerStatement { $stmt = st23; }
| st24=dropTriggerStatement { $stmt = st24; }
#endif
| st25=createTypeStatement { $stmt = st25; }
#if 0
| st26=alterTypeStatement { $stmt = st26; }
| st27=dropTypeStatement { $stmt = st27; }
| st28=createFunctionStatement { $stmt = st28; }
@@ -698,6 +695,7 @@ cfamOrdering[shared_ptr<cql3::statements::create_table_statement::raw_statement>
;
#if 0
/**
* CREATE TYPE foo (
* <name1> <type1>,
@@ -705,16 +703,17 @@ cfamOrdering[shared_ptr<cql3::statements::create_table_statement::raw_statement>
* ....
* )
*/
createTypeStatement returns [::shared_ptr<create_type_statement> expr]
@init { bool if_not_exists = false; }
: K_CREATE K_TYPE (K_IF K_NOT K_EXISTS { if_not_exists = true; } )?
tn=userTypeName { $expr = ::make_shared<create_type_statement>(tn, if_not_exists); }
createTypeStatement returns [CreateTypeStatement expr]
@init { boolean ifNotExists = false; }
: K_CREATE K_TYPE (K_IF K_NOT K_EXISTS { ifNotExists = true; } )?
tn=userTypeName { $expr = new CreateTypeStatement(tn, ifNotExists); }
'(' typeColumns[expr] ( ',' typeColumns[expr]? )* ')'
;
typeColumns[::shared_ptr<create_type_statement> expr]
: k=ident v=comparatorType { $expr->add_definition(k, v); }
typeColumns[CreateTypeStatement expr]
: k=ident v=comparatorType { $expr.addDefinition(k, v); }
;
#endif
/**

View File

@@ -737,7 +737,7 @@ public:
/** A condition on a collection element. For example: "IF col['key'] = 'foo'" */
static ::shared_ptr<raw> collection_condition(::shared_ptr<term::raw> value, ::shared_ptr<term::raw> collection_element,
const operator_type& op) {
return ::make_shared<raw>(std::move(value), std::vector<::shared_ptr<term::raw>>{}, ::shared_ptr<abstract_marker::in_raw>{}, std::move(collection_element), op);
return ::make_shared<raw>(std::move(value), std::vector<::shared_ptr<term::raw>>{}, ::shared_ptr<abstract_marker::in_raw>{}, std::move(collection_element), operator_type::IN);
}
/** An IN condition on a collection element. For example: "IF col['key'] IN ('foo', 'bar', ...)" */

View File

@@ -121,7 +121,3 @@ column_identifier::new_selector_factory(database& db, schema_ptr schema, std::ve
}
}
bool cql3::column_identifier::text_comparator::operator()(const cql3::column_identifier& c1, const cql3::column_identifier& c2) const {
return c1.text() < c2.text();
}

View File

@@ -61,11 +61,6 @@ public:
private:
sstring _text;
public:
// less comparator sorting by text
struct text_comparator {
bool operator()(const column_identifier& c1, const column_identifier& c2) const;
};
column_identifier(sstring raw_text, bool keep_case);
column_identifier(bytes bytes_, data_type type);

View File

@@ -58,10 +58,10 @@ public:
virtual void reset() override {
_count = 0;
}
virtual opt_bytes compute(cql_serialization_format sf) override {
virtual opt_bytes compute(serialization_format sf) override {
return long_type->decompose(_count);
}
virtual void add_input(cql_serialization_format sf, const std::vector<opt_bytes>& values) override {
virtual void add_input(serialization_format sf, const std::vector<opt_bytes>& values) override {
++_count;
}
};
@@ -83,10 +83,10 @@ public:
virtual void reset() override {
_sum = {};
}
virtual opt_bytes compute(cql_serialization_format sf) override {
virtual opt_bytes compute(serialization_format sf) override {
return data_type_for<Type>()->decompose(_sum);
}
virtual void add_input(cql_serialization_format sf, const std::vector<opt_bytes>& values) override {
virtual void add_input(serialization_format sf, const std::vector<opt_bytes>& values) override {
if (!values[0]) {
return;
}
@@ -120,14 +120,14 @@ public:
_sum = {};
_count = 0;
}
virtual opt_bytes compute(cql_serialization_format sf) override {
virtual opt_bytes compute(serialization_format sf) override {
Type ret = 0;
if (_count) {
ret = _sum / _count;
}
return data_type_for<Type>()->decompose(ret);
}
virtual void add_input(cql_serialization_format sf, const std::vector<opt_bytes>& values) override {
virtual void add_input(serialization_format sf, const std::vector<opt_bytes>& values) override {
if (!values[0]) {
return;
}
@@ -159,13 +159,13 @@ public:
virtual void reset() override {
_max = {};
}
virtual opt_bytes compute(cql_serialization_format sf) override {
virtual opt_bytes compute(serialization_format sf) override {
if (!_max) {
return {};
}
return data_type_for<Type>()->decompose(*_max);
}
virtual void add_input(cql_serialization_format sf, const std::vector<opt_bytes>& values) override {
virtual void add_input(serialization_format sf, const std::vector<opt_bytes>& values) override {
if (!values[0]) {
return;
}
@@ -206,13 +206,13 @@ public:
virtual void reset() override {
_min = {};
}
virtual opt_bytes compute(cql_serialization_format sf) override {
virtual opt_bytes compute(serialization_format sf) override {
if (!_min) {
return {};
}
return data_type_for<Type>()->decompose(*_min);
}
virtual void add_input(cql_serialization_format sf, const std::vector<opt_bytes>& values) override {
virtual void add_input(serialization_format sf, const std::vector<opt_bytes>& values) override {
if (!values[0]) {
return;
}
@@ -255,10 +255,10 @@ public:
virtual void reset() override {
_count = 0;
}
virtual opt_bytes compute(cql_serialization_format sf) override {
virtual opt_bytes compute(serialization_format sf) override {
return long_type->decompose(_count);
}
virtual void add_input(cql_serialization_format sf, const std::vector<opt_bytes>& values) override {
virtual void add_input(serialization_format sf, const std::vector<opt_bytes>& values) override {
if (!values[0]) {
return;
}

View File

@@ -77,7 +77,7 @@ public:
* @param protocol_version native protocol version
* @param values the values to add to the aggregate.
*/
virtual void add_input(cql_serialization_format sf, const std::vector<opt_bytes>& values) = 0;
virtual void add_input(serialization_format sf, const std::vector<opt_bytes>& values) = 0;
/**
* Computes and returns the aggregate current value.
@@ -85,7 +85,7 @@ public:
* @param protocol_version native protocol version
* @return the aggregate current value.
*/
virtual opt_bytes compute(cql_serialization_format sf) = 0;
virtual opt_bytes compute(serialization_format sf) = 0;
/**
* Reset this aggregate.

View File

@@ -58,7 +58,7 @@ shared_ptr<function>
make_to_blob_function(data_type from_type) {
auto name = from_type->as_cql3_type()->to_string() + "asblob";
return make_native_scalar_function<true>(name, bytes_type, { from_type },
[] (cql_serialization_format sf, const std::vector<bytes_opt>& parameters) {
[] (serialization_format sf, const std::vector<bytes_opt>& parameters) {
return parameters[0];
});
}
@@ -68,7 +68,7 @@ shared_ptr<function>
make_from_blob_function(data_type to_type) {
sstring name = sstring("blobas") + to_type->as_cql3_type()->to_string();
return make_native_scalar_function<true>(name, to_type, { bytes_type },
[name, to_type] (cql_serialization_format sf, const std::vector<bytes_opt>& parameters) -> bytes_opt {
[name, to_type] (serialization_format sf, const std::vector<bytes_opt>& parameters) -> bytes_opt {
auto&& val = parameters[0];
if (!val) {
return val;
@@ -89,7 +89,7 @@ inline
shared_ptr<function>
make_varchar_as_blob_fct() {
return make_native_scalar_function<true>("varcharasblob", bytes_type, { utf8_type },
[] (cql_serialization_format sf, const std::vector<bytes_opt>& parameters) -> bytes_opt {
[] (serialization_format sf, const std::vector<bytes_opt>& parameters) -> bytes_opt {
return parameters[0];
});
}
@@ -98,7 +98,7 @@ inline
shared_ptr<function>
make_blob_as_varchar_fct() {
return make_native_scalar_function<true>("blobasvarchar", utf8_type, { bytes_type },
[] (cql_serialization_format sf, const std::vector<bytes_opt>& parameters) -> bytes_opt {
[] (serialization_format sf, const std::vector<bytes_opt>& parameters) -> bytes_opt {
return parameters[0];
});
}

View File

@@ -61,11 +61,11 @@ public:
virtual shared_ptr<terminal> bind(const query_options& options) override;
virtual bytes_view_opt bind_and_get(const query_options& options) override;
private:
static bytes_opt execute_internal(cql_serialization_format sf, scalar_function& fun, std::vector<bytes_opt> params);
static bytes_opt execute_internal(serialization_format sf, scalar_function& fun, std::vector<bytes_opt> params);
public:
virtual bool contains_bind_marker() const override;
private:
static shared_ptr<terminal> make_terminal(shared_ptr<function> fun, bytes_opt result, cql_serialization_format sf);
static shared_ptr<terminal> make_terminal(shared_ptr<function> fun, bytes_opt result, serialization_format sf);
public:
class raw : public term::raw {
function_name _name;

View File

@@ -299,7 +299,7 @@ function_call::collect_marker_specification(shared_ptr<variable_specifications>
shared_ptr<terminal>
function_call::bind(const query_options& options) {
return make_terminal(_fun, to_bytes_opt(bind_and_get(options)), options.get_cql_serialization_format());
return make_terminal(_fun, to_bytes_opt(bind_and_get(options)), options.get_serialization_format());
}
bytes_view_opt
@@ -315,12 +315,12 @@ function_call::bind_and_get(const query_options& options) {
}
buffers.push_back(std::move(to_bytes_opt(val)));
}
auto result = execute_internal(options.get_cql_serialization_format(), *_fun, std::move(buffers));
auto result = execute_internal(options.get_serialization_format(), *_fun, std::move(buffers));
return options.make_temporary(result);
}
bytes_opt
function_call::execute_internal(cql_serialization_format sf, scalar_function& fun, std::vector<bytes_opt> params) {
function_call::execute_internal(serialization_format sf, scalar_function& fun, std::vector<bytes_opt> params) {
bytes_opt result = fun.execute(sf, params);
try {
// Check the method didn't lied on it's declared return type
@@ -347,7 +347,7 @@ function_call::contains_bind_marker() const {
}
shared_ptr<terminal>
function_call::make_terminal(shared_ptr<function> fun, bytes_opt result, cql_serialization_format sf) {
function_call::make_terminal(shared_ptr<function> fun, bytes_opt result, serialization_format sf) {
if (!dynamic_pointer_cast<const collection_type_impl>(fun->return_type())) {
return ::make_shared<constants::value>(std::move(result));
}
@@ -413,7 +413,7 @@ function_call::raw::prepare(database& db, const sstring& keyspace, ::shared_ptr<
// If all parameters are terminal and the function is pure, we can
// evaluate it now, otherwise we'd have to wait execution time
if (all_terminal && scalar_fun->is_pure()) {
return make_terminal(scalar_fun, execute(*scalar_fun, parameters), query_options::DEFAULT.get_cql_serialization_format());
return make_terminal(scalar_fun, execute(*scalar_fun, parameters), query_options::DEFAULT.get_serialization_format());
} else {
return ::make_shared<function_call>(scalar_fun, parameters);
}
@@ -429,7 +429,7 @@ function_call::raw::execute(scalar_function& fun, std::vector<shared_ptr<term>>
buffers.push_back(std::move(param));
}
return execute_internal(cql_serialization_format::internal(), fun, buffers);
return execute_internal(serialization_format::internal(), fun, buffers);
}
assignment_testable::test_result

View File

@@ -74,10 +74,7 @@ public:
: native_scalar_function(std::move(name), std::move(return_type), std::move(arg_types))
, _func(std::forward<Func>(func)) {
}
virtual bool is_pure() override {
return Pure;
}
virtual bytes_opt execute(cql_serialization_format sf, const std::vector<bytes_opt>& parameters) override {
virtual bytes_opt execute(serialization_format sf, const std::vector<bytes_opt>& parameters) override {
return _func(sf, parameters);
}
};

View File

@@ -58,7 +58,7 @@ public:
* @return the result of applying this function to the parameter
* @throws InvalidRequestException if this function cannot not be applied to the parameter
*/
virtual bytes_opt execute(cql_serialization_format sf, const std::vector<bytes_opt>& parameters) = 0;
virtual bytes_opt execute(serialization_format sf, const std::vector<bytes_opt>& parameters) = 0;
};

View File

@@ -56,7 +56,7 @@ inline
shared_ptr<function>
make_now_fct() {
return make_native_scalar_function<false>("now", timeuuid_type, {},
[] (cql_serialization_format sf, const std::vector<bytes_opt>& values) -> bytes_opt {
[] (serialization_format sf, const std::vector<bytes_opt>& values) -> bytes_opt {
return {to_bytes(utils::UUID_gen::get_time_UUID())};
});
}
@@ -65,7 +65,7 @@ inline
shared_ptr<function>
make_min_timeuuid_fct() {
return make_native_scalar_function<true>("mintimeuuid", timeuuid_type, { timestamp_type },
[] (cql_serialization_format sf, const std::vector<bytes_opt>& values) -> bytes_opt {
[] (serialization_format sf, const std::vector<bytes_opt>& values) -> bytes_opt {
auto& bb = values[0];
if (!bb) {
return {};
@@ -84,7 +84,7 @@ inline
shared_ptr<function>
make_max_timeuuid_fct() {
return make_native_scalar_function<true>("maxtimeuuid", timeuuid_type, { timestamp_type },
[] (cql_serialization_format sf, const std::vector<bytes_opt>& values) -> bytes_opt {
[] (serialization_format sf, const std::vector<bytes_opt>& values) -> bytes_opt {
// FIXME: should values be a vector<optional<bytes>>?
auto& bb = values[0];
if (!bb) {
@@ -104,7 +104,7 @@ inline
shared_ptr<function>
make_date_of_fct() {
return make_native_scalar_function<true>("dateof", timestamp_type, { timeuuid_type },
[] (cql_serialization_format sf, const std::vector<bytes_opt>& values) -> bytes_opt {
[] (serialization_format sf, const std::vector<bytes_opt>& values) -> bytes_opt {
using namespace utils;
auto& bb = values[0];
if (!bb) {
@@ -119,7 +119,7 @@ inline
shared_ptr<function>
make_unix_timestamp_of_fcf() {
return make_native_scalar_function<true>("unixtimestampof", long_type, { timeuuid_type },
[] (cql_serialization_format sf, const std::vector<bytes_opt>& values) -> bytes_opt {
[] (serialization_format sf, const std::vector<bytes_opt>& values) -> bytes_opt {
using namespace utils;
auto& bb = values[0];
if (!bb) {

View File

@@ -61,9 +61,10 @@ public:
, _schema(s) {
}
bytes_opt execute(cql_serialization_format sf, const std::vector<bytes_opt>& parameters) override {
auto key = partition_key::from_optional_exploded(*_schema, parameters);
auto tok = dht::global_partitioner().get_token(*_schema, key);
bytes_opt execute(serialization_format sf, const std::vector<bytes_opt>& parameters) override {
auto buf = _schema->partition_key_type()->serialize_optionals(parameters);
auto view = partition_key_view::from_bytes(std::move(buf));
auto tok = dht::global_partitioner().get_token(*_schema, view);
warn(unimplemented::cause::VALIDATION);
return dht::global_partitioner().token_to_bytes(tok);
}

View File

@@ -53,7 +53,7 @@ inline
shared_ptr<function>
make_uuid_fct() {
return make_native_scalar_function<false>("uuid", uuid_type, {},
[] (cql_serialization_format sf, const std::vector<bytes_opt>& parameters) -> bytes_opt {
[] (serialization_format sf, const std::vector<bytes_opt>& parameters) -> bytes_opt {
return {uuid_type->decompose(utils::make_random_uuid())};
});
}

View File

@@ -108,7 +108,7 @@ lists::literal::to_string() const {
}
lists::value
lists::value::from_serialized(bytes_view v, list_type type, cql_serialization_format sf) {
lists::value::from_serialized(bytes_view v, list_type type, serialization_format sf) {
try {
// Collections have this small hack that validate cannot be called on a serialized object,
// but compose does the validation (so we're fine).
@@ -128,11 +128,11 @@ lists::value::from_serialized(bytes_view v, list_type type, cql_serialization_fo
bytes_opt
lists::value::get(const query_options& options) {
return get_with_protocol_version(options.get_cql_serialization_format());
return get_with_protocol_version(options.get_serialization_format());
}
bytes
lists::value::get_with_protocol_version(cql_serialization_format sf) {
lists::value::get_with_protocol_version(serialization_format sf) {
// Can't use boost::indirect_iterator, because optional is not an iterator
auto deref = [] (bytes_opt& x) { return *x; };
return collection_type_impl::pack(
@@ -212,7 +212,7 @@ lists::marker::bind(const query_options& options) {
if (!value) {
return nullptr;
} else {
return make_shared(value::from_serialized(*value, std::move(ltype), options.get_cql_serialization_format()));
return make_shared(value::from_serialized(*value, std::move(ltype), options.get_serialization_format()));
}
}
@@ -259,10 +259,7 @@ lists::setter_by_index::execute(mutation& m, const exploded_clustering_prefix& p
// we should not get here for frozen lists
assert(column.type->is_multi_cell()); // "Attempted to set an individual element on a frozen list";
std::experimental::optional<clustering_key> row_key;
if (!column.is_static()) {
row_key = clustering_key::from_clustering_prefix(*params._schema, prefix);
}
auto row_key = clustering_key::from_clustering_prefix(*params._schema, prefix);
auto index = _idx->bind_and_get(params._options);
auto value = _t->bind_and_get(params._options);
@@ -272,30 +269,32 @@ lists::setter_by_index::execute(mutation& m, const exploded_clustering_prefix& p
}
auto idx = net::ntoh(int32_t(*unaligned_cast<int32_t>(index->begin())));
auto&& existing_list_opt = params.get_prefetched_list(m.key(), std::move(row_key), column);
auto existing_list_opt = params.get_prefetched_list(m.key(), row_key, column);
if (!existing_list_opt) {
throw exceptions::invalid_request_exception("Attempted to set an element on a list which is null");
}
collection_mutation_view existing_list_ser = *existing_list_opt;
auto ltype = dynamic_pointer_cast<const list_type_impl>(column.type);
auto&& existing_list = *existing_list_opt;
collection_type_impl::mutation_view existing_list = ltype->deserialize_mutation_form(existing_list_ser);
// we verified that index is an int32_type
if (idx < 0 || size_t(idx) >= existing_list.size()) {
if (idx < 0 || size_t(idx) >= existing_list.cells.size()) {
throw exceptions::invalid_request_exception(sprint("List index %d out of bound, list has size %d",
idx, existing_list.size()));
idx, existing_list.cells.size()));
}
const bytes& eidx = existing_list[idx].key;
bytes_view eidx = existing_list.cells[idx].first;
list_type_impl::mutation mut;
mut.cells.reserve(1);
if (!value) {
mut.cells.emplace_back(eidx, params.make_dead_cell());
mut.cells.emplace_back(to_bytes(eidx), params.make_dead_cell());
} else {
if (value->size() > std::numeric_limits<uint16_t>::max()) {
throw exceptions::invalid_request_exception(
sprint("List value is too long. List values are limited to %d bytes but %d bytes value provided",
std::numeric_limits<uint16_t>::max(), value->size()));
}
mut.cells.emplace_back(eidx, params.make_cell(*value));
mut.cells.emplace_back(to_bytes(eidx), params.make_cell(*value));
}
auto smut = ltype->serialize_mutation_form(mut);
m.set_cell(prefix, column, atomic_cell_or_collection::from_collection_mutation(std::move(smut)));
@@ -338,8 +337,13 @@ lists::do_append(shared_ptr<term> t,
if (!value) {
m.set_cell(prefix, column, params.make_dead_cell());
} else {
auto newv = list_value->get_with_protocol_version(cql_serialization_format::internal());
m.set_cell(prefix, column, params.make_cell(std::move(newv)));
auto&& to_add = list_value->_elements;
auto deref = [] (const bytes_opt& v) { return *v; };
auto&& newv = collection_mutation{list_type_impl::pack(
boost::make_transform_iterator(to_add.begin(), deref),
boost::make_transform_iterator(to_add.end(), deref),
to_add.size(), serialization_format::internal())};
m.set_cell(prefix, column, atomic_cell_or_collection::from_collection_mutation(std::move(newv)));
}
}
}
@@ -379,13 +383,8 @@ lists::discarder::requires_read() {
void
lists::discarder::execute(mutation& m, const exploded_clustering_prefix& prefix, const update_parameters& params) {
assert(column.type->is_multi_cell()); // "Attempted to delete from a frozen list";
std::experimental::optional<clustering_key> row_key;
if (!column.is_static()) {
row_key = clustering_key::from_clustering_prefix(*params._schema, prefix);
}
auto&& existing_list = params.get_prefetched_list(m.key(), std::move(row_key), column);
auto&& row_key = clustering_key::from_clustering_prefix(*params._schema, prefix);
auto&& existing_list = params.get_prefetched_list(m.key(), row_key, column);
// We want to call bind before possibly returning to reject queries where the value provided is not a list.
auto&& value = _t->bind(params._options);
@@ -395,9 +394,9 @@ lists::discarder::execute(mutation& m, const exploded_clustering_prefix& prefix,
return;
}
auto&& elist = *existing_list;
auto&& elist = ltype->deserialize_mutation_form(*existing_list);
if (elist.empty()) {
if (elist.cells.empty()) {
return;
}
@@ -414,14 +413,14 @@ lists::discarder::execute(mutation& m, const exploded_clustering_prefix& prefix,
// toDiscard will be small and keeping a list will be more efficient.
auto&& to_discard = lvalue->_elements;
collection_type_impl::mutation mnew;
for (auto&& cell : elist) {
for (auto&& cell : elist.cells) {
auto have_value = [&] (bytes_view value) {
return std::find_if(to_discard.begin(), to_discard.end(),
[ltype, value] (auto&& v) { return ltype->get_elements_type()->equal(*v, value); })
!= to_discard.end();
};
if (have_value(cell.value)) {
mnew.cells.emplace_back(cell.key, params.make_dead_cell());
if (cell.second.is_live() && have_value(cell.second.value())) {
mnew.cells.emplace_back(bytes(cell.first.begin(), cell.first.end()), params.make_dead_cell());
}
}
auto mnew_ser = ltype->serialize_mutation_form(mnew);
@@ -445,21 +444,18 @@ lists::discarder_by_index::execute(mutation& m, const exploded_clustering_prefix
auto cvalue = dynamic_pointer_cast<constants::value>(index);
assert(cvalue);
std::experimental::optional<clustering_key> row_key;
if (!column.is_static()) {
row_key = clustering_key::from_clustering_prefix(*params._schema, prefix);
}
auto&& existing_list_opt = params.get_prefetched_list(m.key(), std::move(row_key), column);
auto row_key = clustering_key::from_clustering_prefix(*params._schema, prefix);
auto&& existing_list = params.get_prefetched_list(m.key(), row_key, column);
int32_t idx = read_simple_exactly<int32_t>(*cvalue->_bytes);
if (!existing_list_opt) {
if (!existing_list) {
throw exceptions::invalid_request_exception("Attempted to delete an element from a list which is null");
}
auto&& existing_list = *existing_list_opt;
if (idx < 0 || size_t(idx) >= existing_list.size()) {
throw exceptions::invalid_request_exception(sprint("List index %d out of bound, list has size %d", idx, existing_list.size()));
auto&& deserialized = ltype->deserialize_mutation_form(*existing_list);
if (idx < 0 || size_t(idx) >= deserialized.cells.size()) {
throw exceptions::invalid_request_exception(sprint("List index %d out of bound, list has size %d", idx, deserialized.cells.size()));
}
collection_type_impl::mutation mut;
mut.cells.emplace_back(existing_list[idx].key, params.make_dead_cell());
mut.cells.emplace_back(to_bytes(deserialized.cells[idx].first), params.make_dead_cell());
m.set_cell(prefix, column, ltype->serialize_mutation_form(mut));
}

View File

@@ -78,9 +78,9 @@ public:
explicit value(std::vector<bytes_opt> elements)
: _elements(std::move(elements)) {
}
static value from_serialized(bytes_view v, list_type type, cql_serialization_format sf);
static value from_serialized(bytes_view v, list_type type, serialization_format sf);
virtual bytes_opt get(const query_options& options) override;
virtual bytes get_with_protocol_version(cql_serialization_format sf) override;
virtual bytes get_with_protocol_version(serialization_format sf) override;
bool equals(shared_ptr<list_type_impl> lt, const value& v);
virtual std::vector<bytes_opt> get_elements() override;
virtual sstring to_string() const;

View File

@@ -152,7 +152,7 @@ maps::literal::to_string() const {
}
maps::value
maps::value::from_serialized(bytes_view value, map_type type, cql_serialization_format sf) {
maps::value::from_serialized(bytes_view value, map_type type, serialization_format sf) {
try {
// Collections have this small hack that validate cannot be called on a serialized object,
// but compose does the validation (so we're fine).
@@ -171,11 +171,11 @@ maps::value::from_serialized(bytes_view value, map_type type, cql_serialization_
bytes_opt
maps::value::get(const query_options& options) {
return get_with_protocol_version(options.get_cql_serialization_format());
return get_with_protocol_version(options.get_serialization_format());
}
bytes
maps::value::get_with_protocol_version(cql_serialization_format sf) {
maps::value::get_with_protocol_version(serialization_format sf) {
//FIXME: share code with serialize_partially_deserialized_form
size_t len = collection_value_len(sf) * map.size() * 2 + collection_size_len(sf);
for (auto&& e : map) {
@@ -257,7 +257,7 @@ maps::marker::bind(const query_options& options) {
maps::value::from_serialized(*val,
static_pointer_cast<const map_type_impl>(
_receiver->type),
options.get_cql_serialization_format())) :
options.get_serialization_format())) :
nullptr;
}
@@ -333,7 +333,7 @@ maps::do_put(mutation& m, const exploded_clustering_prefix& prefix, const update
m.set_cell(prefix, column, params.make_dead_cell());
} else {
auto v = map_type_impl::serialize_partially_deserialized_form({map_value->map.begin(), map_value->map.end()},
cql_serialization_format::internal());
serialization_format::internal());
m.set_cell(prefix, column, params.make_cell(std::move(v)));
}
}

View File

@@ -81,9 +81,9 @@ public:
value(std::map<bytes, bytes, serialized_compare> map)
: map(std::move(map)) {
}
static value from_serialized(bytes_view value, map_type type, cql_serialization_format sf);
static value from_serialized(bytes_view value, map_type type, serialization_format sf);
virtual bytes_opt get(const query_options& options) override;
virtual bytes get_with_protocol_version(cql_serialization_format sf);
virtual bytes get_with_protocol_version(serialization_format sf);
bool equals(map_type mt, const value& v);
virtual sstring to_string() const;
};

View File

@@ -47,7 +47,7 @@ namespace cql3 {
thread_local const query_options::specific_options query_options::specific_options::DEFAULT{-1, {}, {}, api::missing_timestamp};
thread_local query_options query_options::DEFAULT{db::consistency_level::ONE, std::experimental::nullopt,
{}, false, query_options::specific_options::DEFAULT, cql_serialization_format::latest()};
{}, false, query_options::specific_options::DEFAULT, version::native_protocol(), serialization_format::use_32_bit()};
query_options::query_options(db::consistency_level consistency,
std::experimental::optional<std::vector<sstring_view>> names,
@@ -55,14 +55,16 @@ query_options::query_options(db::consistency_level consistency,
std::vector<bytes_view_opt> value_views,
bool skip_metadata,
specific_options options,
cql_serialization_format sf)
int32_t protocol_version,
serialization_format sf)
: _consistency(consistency)
, _names(std::move(names))
, _values(std::move(values))
, _value_views(std::move(value_views))
, _skip_metadata(skip_metadata)
, _options(std::move(options))
, _cql_serialization_format(sf)
, _protocol_version(protocol_version)
, _serialization_format(sf)
{
}
@@ -71,7 +73,8 @@ query_options::query_options(db::consistency_level consistency,
std::vector<bytes_view_opt> value_views,
bool skip_metadata,
specific_options options,
cql_serialization_format sf)
int32_t protocol_version,
serialization_format sf)
: query_options(
consistency,
std::move(names),
@@ -79,6 +82,7 @@ query_options::query_options(db::consistency_level consistency,
std::move(value_views),
skip_metadata,
std::move(options),
protocol_version,
sf
)
{
@@ -90,7 +94,7 @@ query_options::query_options(query_options&& o, std::vector<std::vector<bytes_vi
std::vector<query_options> tmp;
tmp.reserve(value_views.size());
std::transform(value_views.begin(), value_views.end(), std::back_inserter(tmp), [this](auto& vals) {
return query_options(_consistency, {}, vals, _skip_metadata, _options, _cql_serialization_format);
return query_options(_consistency, {}, vals, _skip_metadata, _options, _protocol_version, _serialization_format);
});
_batch_options = std::move(tmp);
}
@@ -103,7 +107,8 @@ query_options::query_options(db::consistency_level cl, std::vector<bytes_opt> va
{},
false,
query_options::specific_options::DEFAULT,
cql_serialization_format::latest()
version::native_protocol(),
serialization_format::use_32_bit()
)
{
for (auto&& value : _values) {
@@ -173,12 +178,12 @@ api::timestamp_type query_options::get_timestamp(service::query_state& state) co
int query_options::get_protocol_version() const
{
return _cql_serialization_format.protocol_version();
return _protocol_version;
}
cql_serialization_format query_options::get_cql_serialization_format() const
serialization_format query_options::get_serialization_format() const
{
return _cql_serialization_format;
return _serialization_format;
}
const query_options::specific_options& query_options::get_specific_options() const

View File

@@ -48,7 +48,7 @@
#include "service/pager/paging_state.hh"
#include "cql3/column_specification.hh"
#include "cql3/column_identifier.hh"
#include "cql_serialization_format.hh"
#include "serialization_format.hh"
namespace cql3 {
@@ -74,7 +74,8 @@ private:
mutable std::vector<std::vector<int8_t>> _temporaries;
const bool _skip_metadata;
const specific_options _options;
cql_serialization_format _cql_serialization_format;
const int32_t _protocol_version; // transient
serialization_format _serialization_format;
std::experimental::optional<std::vector<query_options>> _batch_options;
public:
query_options(query_options&&) = default;
@@ -86,19 +87,22 @@ public:
std::vector<bytes_view_opt> value_views,
bool skip_metadata,
specific_options options,
cql_serialization_format sf);
int32_t protocol_version,
serialization_format sf);
explicit query_options(db::consistency_level consistency,
std::experimental::optional<std::vector<sstring_view>> names,
std::vector<bytes_view_opt> value_views,
bool skip_metadata,
specific_options options,
cql_serialization_format sf);
int32_t protocol_version,
serialization_format sf);
explicit query_options(db::consistency_level consistency,
std::vector<std::vector<bytes_view_opt>> value_views,
bool skip_metadata,
specific_options options,
cql_serialization_format sf);
int32_t protocol_version,
serialization_format sf);
// Batch query_options constructor
explicit query_options(query_options&&, std::vector<std::vector<bytes_view_opt>> value_views);
@@ -127,7 +131,7 @@ public:
* a native protocol request (i.e. it's been allocated locally or by CQL-over-thrift).
*/
int get_protocol_version() const;
cql_serialization_format get_cql_serialization_format() const;
serialization_format get_serialization_format() const;
// Mainly for the sake of BatchQueryOptions
const specific_options& get_specific_options() const;
const query_options& for_statement(size_t i) const;

View File

@@ -423,9 +423,10 @@ void query_processor::migration_subscriber::on_update_keyspace(const sstring& ks
void query_processor::migration_subscriber::on_update_column_family(const sstring& ks_name, const sstring& cf_name, bool columns_changed)
{
// #1255: Ignoring columns_changed deliberately.
log.info("Column definitions for {}.{} changed, invalidating related prepared statements", ks_name, cf_name);
remove_invalid_prepared_statements(ks_name, cf_name);
if (columns_changed) {
log.info("Column definitions for {}.{} changed, invalidating related prepared statements", ks_name, cf_name);
remove_invalid_prepared_statements(ks_name, cf_name);
}
}
void query_processor::migration_subscriber::on_update_user_type(const sstring& ks_name, const sstring& type_name)

View File

@@ -287,13 +287,6 @@ public:
};
inline ::shared_ptr<cql3::metadata> make_empty_metadata()
{
auto result = ::make_shared<cql3::metadata>(std::vector<::shared_ptr<cql3::column_specification>>{});
result->set_skip_metadata();
return result;
}
class result_set {
#if 0
private static final ColumnIdentifier COUNT_COLUMN = new ColumnIdentifier("count", false);

View File

@@ -53,7 +53,7 @@ public:
return true;
}
virtual void add_input(cql_serialization_format sf, result_set_builder& rs) override {
virtual void add_input(serialization_format sf, result_set_builder& rs) override {
// Aggregation of aggregation is not supported
size_t m = _arg_selectors.size();
for (size_t i = 0; i < m; ++i) {
@@ -65,7 +65,7 @@ public:
_aggregate->add_input(sf, _args);
}
virtual bytes_opt get_output(cql_serialization_format sf) override {
virtual bytes_opt get_output(serialization_format sf) override {
return _aggregate->compute(sf);
}

View File

@@ -87,11 +87,11 @@ public:
return false;
}
virtual void add_input(cql_serialization_format sf, result_set_builder& rs) override {
virtual void add_input(serialization_format sf, result_set_builder& rs) override {
_selected->add_input(sf, rs);
}
virtual bytes_opt get_output(cql_serialization_format sf) override {
virtual bytes_opt get_output(serialization_format sf) override {
auto&& value = _selected->get_output(sf);
if (!value) {
return std::experimental::nullopt;

View File

@@ -57,7 +57,7 @@ public:
return _arg_selectors[0]->is_aggregate();
}
virtual void add_input(cql_serialization_format sf, result_set_builder& rs) override {
virtual void add_input(serialization_format sf, result_set_builder& rs) override {
size_t m = _arg_selectors.size();
for (size_t i = 0; i < m; ++i) {
auto&& s = _arg_selectors[i];
@@ -68,7 +68,7 @@ public:
virtual void reset() override {
}
virtual bytes_opt get_output(cql_serialization_format sf) override {
virtual bytes_opt get_output(serialization_format sf) override {
size_t m = _arg_selectors.size();
for (size_t i = 0; i < m; ++i) {
auto&& s = _arg_selectors[i];

View File

@@ -52,11 +52,6 @@ selectable::writetime_or_ttl::new_selector_factory(database& db, schema_ptr s, s
return writetime_or_ttl_selector::new_factory(def->name_as_text(), add_and_get_index(*def, defs), _is_writetime);
}
sstring
selectable::writetime_or_ttl::to_string() const {
return sprint("%s(%s)", _is_writetime ? "writetime" : "ttl", _id->to_string());
}
shared_ptr<selectable>
selectable::writetime_or_ttl::raw::prepare(schema_ptr s) {
return make_shared<writetime_or_ttl>(_id->prepare_column_identifier(s), _is_writetime);
@@ -83,11 +78,6 @@ selectable::with_function::new_selector_factory(database& db, schema_ptr s, std:
return abstract_function_selector::new_factory(std::move(fun), std::move(factories));
}
sstring
selectable::with_function::to_string() const {
return sprint("%s(%s)", _function_name.name, join(", ", _args));
}
shared_ptr<selectable>
selectable::with_function::raw::prepare(schema_ptr s) {
std::vector<shared_ptr<selectable>> prepared_args;
@@ -111,7 +101,7 @@ selectable::with_field_selection::new_selector_factory(database& db, schema_ptr
if (!ut) {
throw exceptions::invalid_request_exception(
sprint("Invalid field selection: %s of type %s is not a user type",
_selected->to_string(), factory->new_instance()->get_type()->as_cql3_type()));
"FIXME: selectable" /* FIMXME: _selected */, ut->as_cql3_type()));
}
for (size_t i = 0; i < ut->size(); ++i) {
if (ut->field_name(i) != _field->bytes_) {
@@ -120,12 +110,7 @@ selectable::with_field_selection::new_selector_factory(database& db, schema_ptr
return field_selector::new_factory(std::move(ut), i, std::move(factory));
}
throw exceptions::invalid_request_exception(sprint("%s of type %s has no field %s",
_selected->to_string(), ut->as_cql3_type(), _field));
}
sstring
selectable::with_field_selection::to_string() const {
return sprint("%s.%s", _selected->to_string(), _field->to_string());
"FIXME: selectable" /* FIXME: _selected */, ut->as_cql3_type(), _field));
}
shared_ptr<selectable>
@@ -141,10 +126,6 @@ selectable::with_field_selection::raw::processes_selection() const {
return true;
}
std::ostream & operator<<(std::ostream &os, const selectable& s) {
return os << s.to_string();
}
}
}

View File

@@ -55,7 +55,6 @@ class selectable {
public:
virtual ~selectable() {}
virtual ::shared_ptr<selector::factory> new_selector_factory(database& db, schema_ptr schema, std::vector<const column_definition*>& defs) = 0;
virtual sstring to_string() const = 0;
protected:
static size_t add_and_get_index(const column_definition& def, std::vector<const column_definition*>& defs) {
auto i = std::find(defs.begin(), defs.end(), &def);
@@ -85,8 +84,6 @@ public:
class with_field_selection;
};
std::ostream & operator<<(std::ostream &os, const selectable& s);
class selectable::with_function : public selectable {
functions::function_name _function_name;
std::vector<shared_ptr<selectable>> _args;
@@ -95,7 +92,17 @@ public:
: _function_name(std::move(fname)), _args(std::move(args)) {
}
virtual sstring to_string() const override;
#if 0
@Override
public String toString()
{
return new StrBuilder().append(functionName)
.append("(")
.appendWithSeparators(args, ", ")
.append(")")
.toString();
}
#endif
virtual shared_ptr<selector::factory> new_selector_factory(database& db, schema_ptr s, std::vector<const column_definition*>& defs) override;
class raw : public selectable::raw {

View File

@@ -59,7 +59,13 @@ public:
: _selected(std::move(selected)), _field(std::move(field)) {
}
virtual sstring to_string() const override;
#if 0
@Override
public String toString()
{
return String.format("%s.%s", selected, field);
}
#endif
virtual shared_ptr<selector::factory> new_selector_factory(database& db, schema_ptr s, std::vector<const column_definition*>& defs) override;

View File

@@ -63,8 +63,7 @@ selection::selection(schema_ptr schema,
query::partition_slice::option_set selection::get_query_options() {
query::partition_slice::option_set opts;
opts.set_if<query::partition_slice::option::send_timestamp>(_collect_timestamps);
opts.set_if<query::partition_slice::option::send_expiry>(_collect_TTLs);
opts.set_if<query::partition_slice::option::send_timestamp_and_expiry>(_collect_timestamps || _collect_TTLs);
opts.set_if<query::partition_slice::option::send_partition_key>(
std::any_of(_columns.begin(), _columns.end(),
@@ -113,11 +112,11 @@ protected:
_current.clear();
}
virtual std::vector<bytes_opt> get_output_row(cql_serialization_format sf) override {
virtual std::vector<bytes_opt> get_output_row(serialization_format sf) override {
return std::move(_current);
}
virtual void add_input_row(cql_serialization_format sf, result_set_builder& rs) override {
virtual void add_input_row(serialization_format sf, result_set_builder& rs) override {
_current = std::move(*rs.current);
}
@@ -181,7 +180,7 @@ protected:
return _factories->contains_only_aggregate_functions();
}
virtual std::vector<bytes_opt> get_output_row(cql_serialization_format sf) override {
virtual std::vector<bytes_opt> get_output_row(serialization_format sf) override {
std::vector<bytes_opt> output_row;
output_row.reserve(_selectors.size());
for (auto&& s : _selectors) {
@@ -190,7 +189,7 @@ protected:
return output_row;
}
virtual void add_input_row(cql_serialization_format sf, result_set_builder& rs) {
virtual void add_input_row(serialization_format sf, result_set_builder& rs) {
for (auto&& s : _selectors) {
s->add_input(sf, rs);
}
@@ -253,11 +252,11 @@ selection::collect_metadata(schema_ptr schema, const std::vector<::shared_ptr<ra
return r;
}
result_set_builder::result_set_builder(const selection& s, db_clock::time_point now, cql_serialization_format sf)
result_set_builder::result_set_builder(const selection& s, db_clock::time_point now, serialization_format sf)
: _result_set(std::make_unique<result_set>(::make_shared<metadata>(*(s.get_result_metadata()))))
, _selectors(s.new_selectors())
, _now(now)
, _cql_serialization_format(sf)
, _serialization_format(sf)
{
if (s._collect_timestamps) {
_timestamps.resize(s._columns.size(), 0);
@@ -296,16 +295,17 @@ void result_set_builder::add(const column_definition& def, const query::result_a
}
}
void result_set_builder::add_collection(const column_definition& def, bytes_view c) {
current->emplace_back(to_bytes(c));
void result_set_builder::add(const column_definition& def, collection_mutation_view c) {
auto&& ctype = static_cast<const collection_type_impl*>(def.type.get());
current->emplace_back(ctype->to_value(c, _serialization_format));
// timestamps, ttls meaningless for collections
}
void result_set_builder::new_row() {
if (current) {
_selectors->add_input_row(_cql_serialization_format, *this);
_selectors->add_input_row(_serialization_format, *this);
if (!_selectors->is_aggregate()) {
_result_set->add_row(_selectors->get_output_row(_cql_serialization_format));
_result_set->add_row(_selectors->get_output_row(_serialization_format));
_selectors->reset();
}
current->clear();
@@ -319,13 +319,13 @@ void result_set_builder::new_row() {
std::unique_ptr<result_set> result_set_builder::build() {
if (current) {
_selectors->add_input_row(_cql_serialization_format, *this);
_result_set->add_row(_selectors->get_output_row(_cql_serialization_format));
_selectors->add_input_row(_serialization_format, *this);
_result_set->add_row(_selectors->get_output_row(_serialization_format));
_selectors->reset();
current = std::experimental::nullopt;
}
if (_result_set->empty() && _selectors->is_aggregate()) {
_result_set->add_row(_selectors->get_output_row(_cql_serialization_format));
_result_set->add_row(_selectors->get_output_row(_serialization_format));
}
return std::move(_result_set);
}
@@ -344,7 +344,7 @@ void result_set_builder::visitor::add_value(const column_definition& def,
_builder.add_empty();
return;
}
_builder.add_collection(def, *cell);
_builder.add(def, *cell);
} else {
auto cell = i.next_atomic_cell();
if (!cell) {

View File

@@ -69,9 +69,9 @@ public:
* @param rs the <code>ResultSetBuilder</code>
* @throws InvalidRequestException
*/
virtual void add_input_row(cql_serialization_format sf, result_set_builder& rs) = 0;
virtual void add_input_row(serialization_format sf, result_set_builder& rs) = 0;
virtual std::vector<bytes_opt> get_output_row(cql_serialization_format sf) = 0;
virtual std::vector<bytes_opt> get_output_row(serialization_format sf) = 0;
virtual void reset() = 0;
};
@@ -236,13 +236,13 @@ private:
std::vector<api::timestamp_type> _timestamps;
std::vector<int32_t> _ttls;
const db_clock::time_point _now;
cql_serialization_format _cql_serialization_format;
serialization_format _serialization_format;
public:
result_set_builder(const selection& s, db_clock::time_point now, cql_serialization_format sf);
result_set_builder(const selection& s, db_clock::time_point now, serialization_format sf);
void add_empty();
void add(bytes_opt value);
void add(const column_definition& def, const query::result_atomic_cell_view& c);
void add_collection(const column_definition& def, bytes_view c);
void add(const column_definition& def, collection_mutation_view c);
void new_row();
std::unique_ptr<result_set> build();
api::timestamp_type timestamp_of(size_t idx);

View File

@@ -71,7 +71,7 @@ public:
* @param rs the <code>result_set_builder</code>
* @throws InvalidRequestException if a problem occurs while add the input value
*/
virtual void add_input(cql_serialization_format sf, result_set_builder& rs) = 0;
virtual void add_input(serialization_format sf, result_set_builder& rs) = 0;
/**
* Returns the selector output.
@@ -80,7 +80,7 @@ public:
* @return the selector output
* @throws InvalidRequestException if a problem occurs while computing the output value
*/
virtual bytes_opt get_output(cql_serialization_format sf) = 0;
virtual bytes_opt get_output(serialization_format sf) = 0;
/**
* Returns the <code>selector</code> output type.

View File

@@ -88,12 +88,12 @@ public:
, _type(type)
{ }
virtual void add_input(cql_serialization_format sf, result_set_builder& rs) override {
virtual void add_input(serialization_format sf, result_set_builder& rs) override {
// TODO: can we steal it?
_current = (*rs.current)[_idx];
}
virtual bytes_opt get_output(cql_serialization_format sf) override {
virtual bytes_opt get_output(serialization_format sf) override {
return std::move(_current);
}

View File

@@ -58,7 +58,13 @@ public:
: _id(std::move(id)), _is_writetime(is_writetime) {
}
virtual sstring to_string() const override;
#if 0
@Override
public String toString()
{
return (isWritetime ? "writetime" : "ttl") + "(" + id + ")";
}
#endif
virtual shared_ptr<selector::factory> new_selector_factory(database& db, schema_ptr s, std::vector<const column_definition*>& defs) override;

View File

@@ -86,7 +86,7 @@ public:
return make_shared<wtots_factory>(std::move(column_name), idx, is_writetime);
}
virtual void add_input(cql_serialization_format sf, result_set_builder& rs) override {
virtual void add_input(serialization_format sf, result_set_builder& rs) override {
if (_is_writetime) {
int64_t ts = rs.timestamp_of(_idx);
if (ts != api::missing_timestamp) {
@@ -108,7 +108,7 @@ public:
}
}
virtual bytes_opt get_output(cql_serialization_format sf) override {
virtual bytes_opt get_output(serialization_format sf) override {
return _current;
}

View File

@@ -120,7 +120,7 @@ sets::literal::to_string() const {
}
sets::value
sets::value::from_serialized(bytes_view v, set_type type, cql_serialization_format sf) {
sets::value::from_serialized(bytes_view v, set_type type, serialization_format sf) {
try {
// Collections have this small hack that validate cannot be called on a serialized object,
// but compose does the validation (so we're fine).
@@ -138,11 +138,11 @@ sets::value::from_serialized(bytes_view v, set_type type, cql_serialization_form
bytes_opt
sets::value::get(const query_options& options) {
return get_with_protocol_version(options.get_cql_serialization_format());
return get_with_protocol_version(options.get_serialization_format());
}
bytes
sets::value::get_with_protocol_version(cql_serialization_format sf) {
sets::value::get_with_protocol_version(serialization_format sf) {
return collection_type_impl::pack(_elements.begin(), _elements.end(),
_elements.size(), sf);
}
@@ -215,7 +215,7 @@ sets::marker::bind(const query_options& options) {
return nullptr;
} else {
auto as_set_type = static_pointer_cast<const set_type_impl>(_receiver->type);
return make_shared(value::from_serialized(*value, as_set_type, options.get_cql_serialization_format()));
return make_shared(value::from_serialized(*value, as_set_type, options.get_serialization_format()));
}
}
@@ -258,14 +258,16 @@ sets::adder::do_add(mutation& m, const exploded_clustering_prefix& row_key, cons
auto smut = set_type->serialize_mutation_form(mut);
m.set_cell(row_key, column, std::move(smut));
} else if (set_value != nullptr) {
} else {
// for frozen sets, we're overwriting the whole cell
auto v = set_type->serialize_partially_deserialized_form(
{set_value->_elements.begin(), set_value->_elements.end()},
cql_serialization_format::internal());
m.set_cell(row_key, column, params.make_cell(std::move(v)));
} else {
m.set_cell(row_key, column, params.make_dead_cell());
serialization_format::internal());
if (set_value->_elements.empty()) {
m.set_cell(row_key, column, params.make_dead_cell());
} else {
m.set_cell(row_key, column, params.make_cell(std::move(v)));
}
}
}

View File

@@ -78,9 +78,9 @@ public:
value(std::set<bytes, serialized_compare> elements)
: _elements(std::move(elements)) {
}
static value from_serialized(bytes_view v, set_type type, cql_serialization_format sf);
static value from_serialized(bytes_view v, set_type type, serialization_format sf);
virtual bytes_opt get(const query_options& options) override;
virtual bytes get_with_protocol_version(cql_serialization_format sf) override;
virtual bytes get_with_protocol_version(serialization_format sf) override;
bool equals(set_type st, const value& v);
virtual sstring to_string() const override;
};

View File

@@ -169,21 +169,26 @@ public:
}
private:
future<std::vector<mutation>> get_mutations(distributed<service::storage_proxy>& storage, const query_options& options, bool local, api::timestamp_type now) {
// Do not process in parallel because operations like list append/prepend depend on execution order.
return do_with(std::vector<mutation>(), [this, &storage, &options, now, local] (auto&& result) {
return do_for_each(boost::make_counting_iterator<size_t>(0),
boost::make_counting_iterator<size_t>(_statements.size()),
[this, &storage, &options, now, local, &result] (size_t i) {
auto&& statement = _statements[i];
auto&& statement_options = options.for_statement(i);
auto timestamp = _attrs->get_timestamp(now, statement_options);
return statement->get_mutations(storage, statement_options, local, timestamp).then([&result] (auto&& more) {
std::move(more.begin(), more.end(), std::back_inserter(result));
});
}).then([&result] {
return std::move(result);
});
});
struct collector {
std::vector<mutation> _result;
std::vector<mutation> get() && { return std::move(_result); }
void operator()(std::vector<mutation> more) {
std::move(more.begin(), more.end(), std::back_inserter(_result));
}
};
auto get_mutations_for_statement = [this, &storage, &options, now, local] (size_t i) {
auto&& statement = _statements[i];
auto&& statement_options = options.for_statement(i);
auto timestamp = _attrs->get_timestamp(now, statement_options);
return statement->get_mutations(storage, statement_options, local, timestamp);
};
// FIXME: origin tries hard to merge mutations to same keyspace, for
// some reason.
return map_reduce(
boost::make_counting_iterator<size_t>(0),
boost::make_counting_iterator<size_t>(_statements.size()),
get_mutations_for_statement,
collector());
}
public:

View File

@@ -44,7 +44,6 @@
#include <regex>
#include <boost/range/adaptor/map.hpp>
#include <boost/range/algorithm/adjacent_find.hpp>
#include "cql3/statements/create_table_statement.hh"
@@ -174,12 +173,13 @@ create_table_statement::raw_statement::raw_statement(::shared_ptr<cf_name> name,
throw exceptions::invalid_request_exception(sprint("Table names shouldn't be more than %d characters long (got \"%s\")", schema::NAME_LENGTH, cf_name.c_str()));
}
// Check for duplicate column names
auto i = boost::range::adjacent_find(_defined_names, [] (auto&& e1, auto&& e2) {
return e1->text() == e2->text();
});
if (i != _defined_names.end()) {
throw exceptions::invalid_request_exception(sprint("Multiple definition of identifier %s", (*i)->text()));
for (auto&& entry : _defined_names) {
auto c = std::count_if(_defined_names.begin(), _defined_names.end(), [&entry] (auto e) {
return entry->text() == e->text();
});
if (c > 1) {
throw exceptions::invalid_request_exception(sprint("Multiple definition of identifier %s", entry->text().c_str()));
}
}
properties->validate();

View File

@@ -51,7 +51,6 @@
#include "core/shared_ptr.hh"
#include <seastar/util/indirect.hh>
#include <unordered_map>
#include <utility>
#include <vector>
@@ -140,8 +139,7 @@ private:
create_table_statement::column_set_type _static_columns;
bool _use_compact_storage = false;
std::multiset<::shared_ptr<column_identifier>,
indirect_less<::shared_ptr<column_identifier>, column_identifier::text_comparator>> _defined_names;
std::multiset<::shared_ptr<column_identifier>> _defined_names;
bool _if_not_exists;
public:
raw_statement(::shared_ptr<cf_name> name, bool if_not_exists);

View File

@@ -1,156 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* Copyright 2016 ScyllaDB
*/
/*
* This file is part of Scylla.
*
* Scylla is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Scylla is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
*/
#include "cql3/statements/create_type_statement.hh"
namespace cql3 {
namespace statements {
create_type_statement::create_type_statement(const ut_name& name, bool if_not_exists)
: _name{name}
, _if_not_exists{if_not_exists}
{
}
void create_type_statement::prepare_keyspace(const service::client_state& state)
{
if (!_name.has_keyspace()) {
_name.set_keyspace(state.get_keyspace());
}
}
void create_type_statement::add_definition(::shared_ptr<column_identifier> name, ::shared_ptr<cql3_type::raw> type)
{
_column_names.emplace_back(name);
_column_types.emplace_back(type);
}
void create_type_statement::check_access(const service::client_state& state)
{
warn(unimplemented::cause::PERMISSIONS);
#if 0
state.hasKeyspaceAccess(keyspace(), Permission.CREATE);
#endif
}
void create_type_statement::validate(distributed<service::storage_proxy>&, const service::client_state& state)
{
#if 0
KSMetaData ksm = Schema.instance.getKSMetaData(name.getKeyspace());
if (ksm == null)
throw new InvalidRequestException(String.format("Cannot add type in unknown keyspace %s", name.getKeyspace()));
if (ksm.userTypes.getType(name.getUserTypeName()) != null && !ifNotExists)
throw new InvalidRequestException(String.format("A user type of name %s already exists", name));
for (CQL3Type.Raw type : columnTypes)
if (type.isCounter())
throw new InvalidRequestException("A user type cannot contain counters");
#endif
}
#if 0
public static void checkForDuplicateNames(UserType type) throws InvalidRequestException
{
for (int i = 0; i < type.size() - 1; i++)
{
ByteBuffer fieldName = type.fieldName(i);
for (int j = i+1; j < type.size(); j++)
{
if (fieldName.equals(type.fieldName(j)))
throw new InvalidRequestException(String.format("Duplicate field name %s in type %s",
UTF8Type.instance.getString(fieldName),
UTF8Type.instance.getString(type.name)));
}
}
}
#endif
shared_ptr<transport::event::schema_change> create_type_statement::change_event()
{
using namespace transport;
return make_shared<transport::event::schema_change>(event::schema_change::change_type::CREATED,
event::schema_change::target_type::TYPE,
keyspace(),
_name.get_string_type_name());
}
const sstring& create_type_statement::keyspace() const
{
return _name.get_keyspace();
}
#if 0
private UserType createType() throws InvalidRequestException
{
List<ByteBuffer> names = new ArrayList<>(columnNames.size());
for (ColumnIdentifier name : columnNames)
names.add(name.bytes);
List<AbstractType<?>> types = new ArrayList<>(columnTypes.size());
for (CQL3Type.Raw type : columnTypes)
types.add(type.prepare(keyspace()).getType());
return new UserType(name.getKeyspace(), name.getUserTypeName(), names, types);
}
#endif
future<bool> create_type_statement::announce_migration(distributed<service::storage_proxy>& proxy, bool is_local_only)
{
throw std::runtime_error("User-defined types are not supported yet");
#if 0
KSMetaData ksm = Schema.instance.getKSMetaData(name.getKeyspace());
assert ksm != null; // should haven't validate otherwise
// Can happen with ifNotExists
if (ksm.userTypes.getType(name.getUserTypeName()) != null)
return false;
UserType type = createType();
checkForDuplicateNames(type);
MigrationManager.announceNewType(type, isLocalOnly);
return true;
#endif
}
}
}

View File

@@ -1,75 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* Copyright 2016 ScyllaDB
*/
/*
* This file is part of Scylla.
*
* Scylla is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Scylla is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include "cql3/statements/schema_altering_statement.hh"
#include "cql3/cql3_type.hh"
#include "cql3/ut_name.hh"
namespace cql3 {
namespace statements {
class create_type_statement : public schema_altering_statement {
ut_name _name;
std::vector<::shared_ptr<column_identifier>> _column_names;
std::vector<::shared_ptr<cql3_type::raw>> _column_types;
bool _if_not_exists;
public:
create_type_statement(const ut_name& name, bool if_not_exists);
virtual void prepare_keyspace(const service::client_state& state) override;
void add_definition(::shared_ptr<column_identifier> name, ::shared_ptr<cql3_type::raw> type);
virtual void check_access(const service::client_state& state) override;
virtual void validate(distributed<service::storage_proxy>&, const service::client_state& state) override;
virtual shared_ptr<transport::event::schema_change> change_event() override;
virtual const sstring& keyspace() const override;
virtual future<bool> announce_migration(distributed<service::storage_proxy>& proxy, bool is_local_only) override;
};
}
}

View File

@@ -186,30 +186,11 @@ modification_statement::make_update_parameters(
class prefetch_data_builder {
update_parameters::prefetch_data& _data;
const query::partition_slice& _ps;
schema_ptr _schema;
std::experimental::optional<partition_key> _pkey;
private:
void add_cell(update_parameters::prefetch_data::row& cells, const column_definition& def, const std::experimental::optional<bytes_view>& cell) {
if (cell) {
auto ctype = static_pointer_cast<const collection_type_impl>(def.type);
if (!ctype->is_multi_cell()) {
throw std::logic_error(sprint("cannot prefetch frozen collection: %s", def.name_as_text()));
}
auto map_type = map_type_impl::get_instance(ctype->name_comparator(), ctype->value_comparator(), true);
update_parameters::prefetch_data::cell_list list;
// FIXME: Iterate over a range instead of fully exploded collection
auto dv = map_type->deserialize(*cell);
for (auto&& el : value_cast<map_type_impl::native_type>(dv)) {
list.emplace_back(update_parameters::prefetch_data::cell{el.first.serialize(), el.second.serialize()});
}
cells.emplace(def.id, std::move(list));
}
};
public:
prefetch_data_builder(schema_ptr s, update_parameters::prefetch_data& data, const query::partition_slice& ps)
prefetch_data_builder(update_parameters::prefetch_data& data, const query::partition_slice& ps)
: _data(data)
, _ps(ps)
, _schema(std::move(s))
{ }
void accept_new_partition(const partition_key& key, uint32_t row_count) {
@@ -224,9 +205,20 @@ public:
const query::result_row_view& row) {
update_parameters::prefetch_data::row cells;
auto add_cell = [&cells] (column_id id, std::experimental::optional<collection_mutation_view>&& cell) {
if (cell) {
cells.emplace(id, collection_mutation{to_bytes(cell->data)});
}
};
auto static_row_iterator = static_row.iterator();
for (auto&& id : _ps.static_columns) {
add_cell(id, static_row_iterator.next_collection_cell());
}
auto row_iterator = row.iterator();
for (auto&& id : _ps.regular_columns) {
add_cell(cells, _schema->regular_column_at(id), row_iterator.next_collection_cell());
add_cell(id, row_iterator.next_collection_cell());
}
_data.rows.emplace(std::make_pair(*_pkey, key), std::move(cells));
@@ -236,16 +228,7 @@ public:
assert(0);
}
void accept_partition_end(const query::result_row_view& static_row) {
update_parameters::prefetch_data::row cells;
auto static_row_iterator = static_row.iterator();
for (auto&& id : _ps.static_columns) {
add_cell(cells, _schema->static_column_at(id), static_row_iterator.next_collection_cell());
}
_data.rows.emplace(std::make_pair(*_pkey, std::experimental::nullopt), std::move(cells));
}
void accept_partition_end(const query::result_row_view& static_row) {}
};
future<update_parameters::prefetched_rows_type>
@@ -282,8 +265,7 @@ modification_statement::read_required_rows(
std::move(regular_cols),
query::partition_slice::option_set::of<
query::partition_slice::option::send_partition_key,
query::partition_slice::option::send_clustering_key,
query::partition_slice::option::collections_as_maps>());
query::partition_slice::option::send_clustering_key>());
std::vector<query::partition_range> pr;
for (auto&& pk : *keys) {
pr.emplace_back(dht::global_partitioner().decorate_key(*s, pk));
@@ -296,7 +278,7 @@ modification_statement::read_required_rows(
bytes_ostream buf(result->buf());
query::result_view v(buf.linearize());
auto prefetched_rows = update_parameters::prefetched_rows_type({update_parameters::prefetch_data(s)});
v.consume(ps, prefetch_data_builder(s, prefetched_rows.value(), ps));
v.consume(ps, prefetch_data_builder(prefetched_rows.value(), ps));
return prefetched_rows;
});
}

View File

@@ -117,11 +117,6 @@ select_statement::for_selection(schema_ptr schema, ::shared_ptr<selection::selec
::shared_ptr<term>{});
}
::shared_ptr<cql3::metadata> select_statement::get_result_metadata() const {
// FIXME: COUNT needs special result metadata handling.
return _selection->get_result_metadata();
}
uint32_t select_statement::get_bound_terms() {
return _bound_terms;
}
@@ -175,7 +170,7 @@ select_statement::make_partition_slice(const query_options& options) {
if (_parameters->is_distinct()) {
_opts.set(query::partition_slice::option::distinct);
return query::partition_slice({ query::clustering_range::make_open_ended_both_sides() },
std::move(static_columns), {}, _opts, nullptr, options.get_cql_serialization_format());
std::move(static_columns), {}, _opts);
}
auto bounds = _restrictions->get_clustering_bounds(options);
@@ -184,7 +179,7 @@ select_statement::make_partition_slice(const query_options& options) {
std::reverse(bounds.begin(), bounds.end());
}
return query::partition_slice(std::move(bounds),
std::move(static_columns), std::move(regular_columns), _opts, nullptr, options.get_cql_serialization_format());
std::move(static_columns), std::move(regular_columns), _opts);
}
int32_t select_statement::get_limit(const query_options& options) const {
@@ -251,7 +246,7 @@ select_statement::execute(distributed<service::storage_proxy>& proxy, service::q
if (aggregate) {
return do_with(
cql3::selection::result_set_builder(*_selection, now,
options.get_cql_serialization_format()),
options.get_serialization_format()),
[p, page_size, now](auto& builder) {
return do_until([p] {return p->is_exhausted();},
[p, &builder, page_size, now] {
@@ -343,8 +338,8 @@ shared_ptr<transport::messages::result_message> select_statement::process_result
db_clock::time_point now) {
cql3::selection::result_set_builder builder(*_selection, now,
options.get_cql_serialization_format());
query::result_view::consume(*results, cmd->slice,
options.get_serialization_format());
query::result_view::consume(results->buf(), cmd->slice,
cql3::selection::result_set_builder::visitor(builder, *_schema,
*_selection));
auto rs = builder.build();
@@ -534,12 +529,9 @@ select_statement::raw_statement::get_ordering_comparator(schema_ptr schema,
}
bool select_statement::raw_statement::is_reversed(schema_ptr schema) {
std::experimental::optional<bool> reversed_map[schema->clustering_key_size()];
assert(_parameters->orderings().size() > 0);
parameters::orderings_type::size_type i = 0;
bool is_reversed_ = false;
bool relation_order_unsupported = false;
uint32_t i = 0;
for (auto&& e : _parameters->orderings()) {
::shared_ptr<column_identifier> column = e.first->prepare_column_identifier(schema);
bool reversed = e.second;
@@ -559,23 +551,32 @@ bool select_statement::raw_statement::is_reversed(schema_ptr schema) {
"Order by currently only support the ordering of columns following their declared order in the PRIMARY KEY");
}
bool current_reverse_status = (reversed != def->type->is_reversed());
if (i == 0) {
is_reversed_ = current_reverse_status;
}
if (is_reversed_ != current_reverse_status) {
relation_order_unsupported = true;
}
reversed_map[i] = std::experimental::make_optional(reversed != def->type->is_reversed());
++i;
}
if (relation_order_unsupported) {
throw exceptions::invalid_request_exception("Unsupported order by relation");
// GCC incorrenctly complains about "*is_reversed_" below
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
// Check that all bool in reversedMap, if set, agrees
std::experimental::optional<bool> is_reversed_{};
for (auto&& b : reversed_map) {
if (b) {
if (!is_reversed_) {
is_reversed_ = b;
} else {
if ((*is_reversed_) != *b) {
throw exceptions::invalid_request_exception("Unsupported order by relation");
}
}
}
}
return is_reversed_;
assert(is_reversed_);
return *is_reversed_;
#pragma GCC diagnostic pop
}
/** If ALLOW FILTERING was not specified, this verifies that it is not needed */

View File

@@ -121,7 +121,6 @@ public:
static ::shared_ptr<select_statement> for_selection(
schema_ptr schema, ::shared_ptr<selection::selection> selection);
::shared_ptr<cql3::metadata> get_result_metadata() const;
virtual uint32_t get_bound_terms() override;
virtual void check_access(const service::client_state& state) override;
virtual void validate(distributed<service::storage_proxy>&, const service::client_state& state) override;

View File

@@ -78,7 +78,7 @@ void update_statement::add_update_for_key(mutation& m, const exploded_clustering
// If there are static columns, there also must be clustering columns, in which
// case empty prefix can only refer to the static row.
bool is_static_prefix = s->has_static_columns() && !prefix;
if (type == statement_type::INSERT && !is_static_prefix && s->is_cql3_table()) {
if (type == statement_type::INSERT && !is_static_prefix) {
auto& row = m.partition().clustered_row(clustering_key::from_clustering_prefix(*s, prefix));
row.apply(row_marker(params.timestamp(), params.ttl(), params.expiry()));
}
@@ -137,17 +137,19 @@ update_statement::parsed_insert::prepare_internal(database& db, schema_ptr schem
throw exceptions::invalid_request_exception("No columns provided to INSERT");
}
std::unordered_set<bytes> column_ids;
for (size_t i = 0; i < _column_names.size(); i++) {
auto id = _column_names[i]->prepare_column_identifier(schema);
auto def = get_column_definition(schema, *id);
if (!def) {
throw exceptions::invalid_request_exception(sprint("Unknown identifier %s", *id));
}
if (column_ids.count(id->name())) {
throw exceptions::invalid_request_exception(sprint("Multiple definitions found for column %s", *id));
for (size_t j = 0; j < i; j++) {
auto other_id = _column_names[j]->prepare_column_identifier(schema);
if (*id == *other_id) {
throw exceptions::invalid_request_exception(sprint("Multiple definitions found for column %s", *id));
}
}
column_ids.emplace(id->name());
auto&& value = _column_values[i];

View File

@@ -205,7 +205,7 @@ class collection_terminal {
public:
virtual ~collection_terminal() {}
/** Gets the value of the collection when serialized with the given protocol version format */
virtual bytes get_with_protocol_version(cql_serialization_format sf) = 0;
virtual bytes get_with_protocol_version(serialization_format sf) = 0;
};
/**

View File

@@ -202,12 +202,12 @@ public:
buffers[i] = to_bytes_opt(_elements[i]->bind_and_get(options));
// Inside tuples, we must force the serialization of collections to v3 whatever protocol
// version is in use since we're going to store directly that serialized value.
if (options.get_cql_serialization_format() != cql_serialization_format::internal()
if (options.get_serialization_format() != serialization_format::internal()
&& _type->type(i)->is_collection()) {
if (buffers[i]) {
buffers[i] = static_pointer_cast<const collection_type_impl>(_type->type(i))->reserialize(
options.get_cql_serialization_format(),
cql_serialization_format::internal(),
options.get_serialization_format(),
serialization_format::internal(),
bytes_view(*buffers[i]));
}
}
@@ -251,7 +251,7 @@ public:
try {
// Collections have this small hack that validate cannot be called on a serialized object,
// but the deserialization does the validation (so we're fine).
auto l = value_cast<list_type_impl::native_type>(type->deserialize(value, options.get_cql_serialization_format()));
auto l = value_cast<list_type_impl::native_type>(type->deserialize(value, options.get_serialization_format()));
auto ttype = dynamic_pointer_cast<const tuple_type_impl>(type->get_elements_type());
assert(ttype);

View File

@@ -43,17 +43,17 @@
namespace cql3 {
const update_parameters::prefetch_data::cell_list*
std::experimental::optional<collection_mutation_view>
update_parameters::get_prefetched_list(
partition_key pkey,
std::experimental::optional<clustering_key> ckey,
const partition_key& pkey,
const clustering_key& row_key,
const column_definition& column) const
{
if (!_prefetched) {
return {};
}
auto i = _prefetched->rows.find(std::make_pair(std::move(pkey), std::move(ckey)));
auto i = _prefetched->rows.find(std::make_pair(pkey, row_key));
if (i == _prefetched->rows.end()) {
return {};
}
@@ -63,7 +63,7 @@ update_parameters::get_prefetched_list(
if (j == row.end()) {
return {};
}
return &j->second;
return {j->second};
}
update_parameters::prefetch_data::prefetch_data(schema_ptr schema)

View File

@@ -58,9 +58,8 @@ namespace cql3 {
*/
class update_parameters final {
public:
// Holder for data needed by CQL list updates which depend on current state of the list.
struct prefetch_data {
using key = std::pair<partition_key, std::experimental::optional<clustering_key>>;
using key = std::pair<partition_key, clustering_key>;
struct key_hashing {
partition_key::hashing pk_hash;
clustering_key::hashing ck_hash;
@@ -71,7 +70,7 @@ public:
{ }
size_t operator()(const key& k) const {
return pk_hash(k.first) ^ (k.second ? ck_hash(*k.second) : 0);
return pk_hash(k.first) ^ ck_hash(k.second);
}
};
struct key_equality {
@@ -84,16 +83,10 @@ public:
{ }
bool operator()(const key& k1, const key& k2) const {
return pk_eq(k1.first, k2.first)
&& bool(k1.second) == bool(k2.second) && (!k1.second || ck_eq(*k1.second, *k2.second));
return pk_eq(k1.first, k2.first) && ck_eq(k1.second, k2.second);
}
};
struct cell {
bytes key;
bytes value;
};
using cell_list = std::vector<cell>;
using row = std::unordered_map<column_id, cell_list>;
using row = std::unordered_map<column_id, collection_mutation>;
public:
std::unordered_map<key, row, key_hashing, key_equality> rows;
schema_ptr schema;
@@ -190,11 +183,8 @@ public:
return _timestamp;
}
const prefetch_data::cell_list*
get_prefetched_list(
partition_key pkey,
std::experimental::optional<clustering_key> ckey,
const column_definition& column) const;
std::experimental::optional<collection_mutation_view> get_prefetched_list(
const partition_key& pkey, const clustering_key& row_key, const column_definition& column) const;
};
}

View File

@@ -161,15 +161,15 @@ void user_types::delayed_value::collect_marker_specification(shared_ptr<variable
}
std::vector<bytes_opt> user_types::delayed_value::bind_internal(const query_options& options) {
auto sf = options.get_cql_serialization_format();
auto sf = options.get_serialization_format();
std::vector<bytes_opt> buffers;
for (size_t i = 0; i < _type->size(); ++i) {
buffers.push_back(to_bytes_opt(_values[i]->bind_and_get(options)));
// Inside UDT values, we must force the serialization of collections to v3 whatever protocol
// version is in use since we're going to store directly that serialized value.
if (!sf.collection_format_unchanged() && _type->field_type(i)->is_collection() && buffers.back()) {
if (sf != serialization_format::use_32_bit() && _type->field_type(i)->is_collection() && buffers.back()) {
auto&& ctype = static_pointer_cast<const collection_type_impl>(_type->field_type(i));
buffers.back() = ctype->reserialize(sf, cql_serialization_format::latest(), bytes_view(*buffers.back()));
buffers.back() = ctype->reserialize(sf, serialization_format::use_32_bit(), bytes_view(*buffers.back()));
}
}
return buffers;

View File

@@ -56,7 +56,7 @@ void ut_name::set_keyspace(sstring keyspace) {
_ks_name = std::experimental::optional<sstring>{keyspace};
}
const sstring& ut_name::get_keyspace() const {
sstring ut_name::get_keyspace() const {
return _ks_name.value();
}

View File

@@ -58,7 +58,7 @@ public:
void set_keyspace(sstring keyspace);
const sstring& get_keyspace() const;
sstring get_keyspace() const;
bytes get_user_type_name() const;

View File

@@ -1,52 +0,0 @@
/*
* Copyright (C) 2015 Cloudius Systems, Ltd.
*/
/*
* This file is part of Scylla.
*
* Scylla is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Scylla is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include <iostream>
using cql_protocol_version_type = uint8_t;
// Abstraction of transport protocol-dependent serialization format
// Protocols v1, v2 used 16 bits for collection sizes, while v3 and
// above use 32 bits. But letting every bit of the code know what
// transport protocol we're using (and in some cases, we aren't using
// any transport -- it's for internal storage) is bad, so abstract it
// away here.
class cql_serialization_format {
cql_protocol_version_type _version;
public:
static constexpr cql_protocol_version_type latest_version = 3;
explicit cql_serialization_format(cql_protocol_version_type version) : _version(version) {}
static cql_serialization_format latest() { return cql_serialization_format{latest_version}; }
static cql_serialization_format internal() { return latest(); }
bool using_32_bits_for_collections() const { return _version >= 3; }
bool operator==(cql_serialization_format x) const { return _version == x._version; }
bool operator!=(cql_serialization_format x) const { return !operator==(x); }
cql_protocol_version_type protocol_version() const { return _version; }
friend std::ostream& operator<<(std::ostream& out, const cql_serialization_format& sf) {
return out << static_cast<int>(sf._version);
}
bool collection_format_unchanged(cql_serialization_format other = cql_serialization_format::latest()) const {
return using_32_bits_for_collections() == other.using_32_bits_for_collections();
}
};

File diff suppressed because it is too large Load Diff

View File

@@ -41,7 +41,6 @@
#include <set>
#include <iostream>
#include <boost/functional/hash.hpp>
#include <boost/range/algorithm/find.hpp>
#include <experimental/optional>
#include <string.h>
#include "types.hh"
@@ -57,6 +56,7 @@
#include "tombstone.hh"
#include "atomic_cell.hh"
#include "query-request.hh"
#include "query-result.hh"
#include "keys.hh"
#include "mutation.hh"
#include "memtable.hh"
@@ -71,7 +71,6 @@
#include "sstables/compaction.hh"
#include "key_reader.hh"
#include <seastar/core/rwlock.hh>
#include <seastar/core/shared_future.hh>
class frozen_mutation;
class reconcilable_result;
@@ -98,132 +97,9 @@ void make(database& db, bool durable, bool volatile_testing_only);
}
}
class throttle_state {
size_t _max_space;
logalloc::region_group& _region_group;
throttle_state* _parent;
circular_buffer<promise<>> _throttled_requests;
timer<> _throttling_timer{[this] { unthrottle(); }};
void unthrottle();
bool should_throttle() const {
if (_region_group.memory_used() > _max_space) {
return true;
}
if (_parent) {
return _parent->should_throttle();
}
return false;
}
public:
throttle_state(size_t max_space, logalloc::region_group& region, throttle_state* parent = nullptr)
: _max_space(max_space)
, _region_group(region)
, _parent(parent)
{}
future<> throttle();
};
class replay_position_reordered_exception : public std::exception {};
// We could just add all memtables, regardless of types, to a single list, and
// then filter them out when we read them. Here's why I have chosen not to do
// it:
//
// First, some of the methods in which a memtable is involved (like seal) are
// assume a commitlog, and go through great care of updating the replay
// position, flushing the log, etc. We want to bypass those, and that has to
// be done either by sprikling the seal code with conditionals, or having a
// separate method for each seal.
//
// Also, if we ever want to put some of the memtables in as separate allocator
// region group to provide for extra QoS, having the classes properly wrapped
// will make that trivial: just pass a version of new_memtable() that puts it
// in a different region, while the list approach would require a lot of
// conditionals as well.
//
// If we are going to have different methods, better have different instances
// of a common class.
class memtable_list {
using shared_memtable = lw_shared_ptr<memtable>;
std::vector<shared_memtable> _memtables;
std::function<future<> ()> _seal_fn;
std::function<schema_ptr()> _current_schema;
size_t _max_memtable_size;
logalloc::region_group* _dirty_memory_region_group;
public:
memtable_list(std::function<future<> ()> seal_fn, std::function<schema_ptr()> cs, size_t max_memtable_size, logalloc::region_group* region_group)
: _memtables({})
, _seal_fn(seal_fn)
, _current_schema(cs)
, _max_memtable_size(max_memtable_size)
, _dirty_memory_region_group(region_group) {
add_memtable();
}
shared_memtable back() {
return _memtables.back();
}
// The caller has to make sure the element exist before calling this.
void erase(const shared_memtable& element) {
_memtables.erase(boost::range::find(_memtables, element));
}
void clear() {
_memtables.clear();
}
size_t size() const {
return _memtables.size();
}
future<> seal_active_memtable() {
return _seal_fn();
}
auto begin() noexcept {
return _memtables.begin();
}
auto begin() const noexcept {
return _memtables.begin();
}
auto end() noexcept {
return _memtables.end();
}
auto end() const noexcept {
return _memtables.end();
}
memtable& active_memtable() {
return *_memtables.back();
}
void add_memtable() {
_memtables.emplace_back(new_memtable());
}
bool should_flush() {
return active_memtable().occupancy().total_space() >= _max_memtable_size;
}
void seal_on_overflow() {
if (should_flush()) {
// FIXME: if sparse, do some in-memory compaction first
// FIXME: maybe merge with other in-memory memtables
_seal_fn();
}
}
private:
lw_shared_ptr<memtable> new_memtable() {
return make_lw_shared<memtable>(_current_schema(), _dirty_memory_region_group);
}
};
using memtable_list = std::vector<lw_shared_ptr<memtable>>;
using sstable_list = sstables::sstable_list;
// The CF has a "stats" structure. But we don't want all fields here,
@@ -246,9 +122,7 @@ public:
bool enable_commitlog = true;
bool enable_incremental_backups = false;
size_t max_memtable_size = 5'000'000;
size_t max_streaming_memtable_size = 5'000'000;
logalloc::region_group* dirty_memory_region_group = nullptr;
logalloc::region_group* streaming_dirty_memory_region_group = nullptr;
::cf_stats* cf_stats = nullptr;
};
struct no_commitlog {};
@@ -280,43 +154,14 @@ private:
config _config;
stats _stats;
lw_shared_ptr<memtable_list> _memtables;
// In older incarnations, we simply commited the mutations to memtables.
// However, doing that makes it harder for us to provide QoS within the
// disk subsystem. Keeping them in separate memtables allow us to properly
// classify those streams into its own I/O class
//
// We could write those directly to disk, but we still want the mutations
// coming through the wire to go to a memtable staging area. This has two
// major advantages:
//
// first, it will allow us to properly order the partitions. They are
// hopefuly sent in order but we can't really guarantee that without
// sacrificing sender-side parallelism.
//
// second, we will be able to coalesce writes from multiple plan_id's and
// even multiple senders, as well as automatically tapping into the dirty
// memory throttling mechanism, guaranteeing we will not overload the
// server.
lw_shared_ptr<memtable_list> _streaming_memtables;
lw_shared_ptr<memtable_list> make_memtable_list();
lw_shared_ptr<memtable_list> make_streaming_memtable_list();
// generation -> sstable. Ordered by key so we can easily get the most recent.
lw_shared_ptr<sstable_list> _sstables;
// sstables that have been compacted (so don't look up in query) but
// have not been deleted yet, so must not GC any tombstones in other sstables
// that may delete data in these sstables:
std::vector<sstables::shared_sstable> _sstables_compacted_but_not_deleted;
// Control background fibers waiting for sstables to be deleted
seastar::gate _sstable_deletion_gate;
// There are situations in which we need to stop writing sstables. Flushers will take
// the read lock, and the ones that wish to stop that process will take the write lock.
rwlock _sstables_lock;
mutable row_cache _cache; // Cache covers only sstables.
std::experimental::optional<int64_t> _sstable_generation = {};
int64_t _sstable_generation = 1;
unsigned _mutation_count = 0;
db::replay_position _highest_flushed_rp;
// Provided by the database that owns this commitlog
db::commitlog* _commitlog;
@@ -327,43 +172,30 @@ private:
int _compaction_disabled = 0;
class memtable_flush_queue;
std::unique_ptr<memtable_flush_queue> _flush_queue;
// Because streaming mutations bypass the commitlog, there is
// no need for the complications of the flush queue. Besides, it
// is easier to just use a common gate than it is to modify the flush_queue
// to work both with and without a replay position.
//
// Last but not least, we seldom need to guarantee any ordering here: as long
// as all data is waited for, we're good.
seastar::gate _streaming_flush_gate;
// Store generation of sstables being compacted at the moment. That's needed to prevent a
// sstable from being compacted twice.
std::unordered_set<unsigned long> _compacting_generations;
private:
void update_stats_for_new_sstable(uint64_t disk_space_used_by_sstable);
void update_stats_for_new_sstable(uint64_t new_sstable_data_size);
void add_sstable(sstables::sstable&& sstable);
void add_sstable(lw_shared_ptr<sstables::sstable> sstable);
lw_shared_ptr<memtable> new_memtable();
lw_shared_ptr<memtable> new_streaming_memtable();
void add_memtable();
future<stop_iteration> try_flush_memtable_to_sstable(lw_shared_ptr<memtable> memt);
future<> update_cache(memtable&, lw_shared_ptr<sstable_list> old_sstables);
struct merge_comparator;
// update the sstable generation, making sure that new new sstables don't overwrite this one.
void update_sstables_known_generation(unsigned generation) {
if (!_sstable_generation) {
_sstable_generation = 1;
}
_sstable_generation = std::max<uint64_t>(*_sstable_generation, generation / smp::count + 1);
_sstable_generation = std::max<uint64_t>(_sstable_generation, generation / smp::count + 1);
}
uint64_t calculate_generation_for_new_table() {
assert(_sstable_generation);
// FIXME: better way of ensuring we don't attempt to
// overwrite an existing table.
return (*_sstable_generation)++ * smp::count + engine().cpu_id();
return _sstable_generation++ * smp::count + engine().cpu_id();
}
// Rebuild existing _sstables with new_sstables added to it and sstables_to_remove removed from it.
void rebuild_sstable_list(const std::vector<sstables::shared_sstable>& new_sstables,
const std::vector<sstables::shared_sstable>& sstables_to_remove);
void rebuild_statistics();
private:
// Creates a mutation reader which covers sstables.
// Caller needs to ensure that column_family remains live (FIXME: relax this).
@@ -375,29 +207,7 @@ private:
key_source sstables_as_key_source() const;
partition_presence_checker make_partition_presence_checker(lw_shared_ptr<sstable_list> old_sstables);
std::chrono::steady_clock::time_point _sstable_writes_disabled_at;
void do_trigger_compaction();
public:
// This function should be called when this column family is ready for writes, IOW,
// to produce SSTables. Extensive details about why this is important can be found
// in Scylla's Github Issue #1014
//
// Nothing should be writing to SSTables before we have the chance to populate the
// existing SSTables and calculate what should the next generation number be.
//
// However, if that happens, we want to protect against it in a way that does not
// involve overwriting existing tables. This is one of the ways to do it: every
// column family starts in an unwriteable state, and when it can finally be written
// to, we mark it as writeable.
//
// Note that this *cannot* be a part of add_column_family. That adds a column family
// to a db in memory only, and if anybody is about to write to a CF, that was most
// likely already called. We need to call this explicitly when we are sure we're ready
// to issue disk operations safely.
void mark_ready_for_writes() {
update_sstables_known_generation(0);
}
// Creates a mutation reader which covers all data sources for this column family.
// Caller needs to ensure that column_family remains live (FIXME: relax this).
// Note: for data queries use query() instead.
@@ -417,7 +227,7 @@ public:
// FIXME: in case a query is satisfied from a single memtable, avoid a copy
using const_mutation_partition_ptr = std::unique_ptr<const mutation_partition>;
using const_row_ptr = std::unique_ptr<const row>;
memtable& active_memtable() { return _memtables->active_memtable(); }
memtable& active_memtable() { return *_memtables->back(); }
const row_cache& get_row_cache() const {
return _cache;
}
@@ -442,11 +252,10 @@ public:
// The mutation is always upgraded to current schema.
void apply(const frozen_mutation& m, const schema_ptr& m_schema, const db::replay_position& = db::replay_position());
void apply(const mutation& m, const db::replay_position& = db::replay_position());
void apply_streaming_mutation(schema_ptr, const frozen_mutation&);
// Returns at most "cmd.limit" rows
future<lw_shared_ptr<query::result>> query(schema_ptr,
const query::read_command& cmd, query::result_request request,
const query::read_command& cmd,
const std::vector<query::partition_range>& ranges);
future<> populate(sstring datadir);
@@ -455,7 +264,6 @@ public:
future<> stop();
future<> flush();
future<> flush(const db::replay_position&);
future<> flush_streaming_mutations(std::vector<query::partition_range> ranges = std::vector<query::partition_range>{});
void clear(); // discards memtable(s) without flushing them to disk.
future<db::replay_position> discard_sstables(db_clock::time_point);
@@ -466,19 +274,14 @@ public:
future<int64_t> disable_sstable_write() {
_sstable_writes_disabled_at = std::chrono::steady_clock::now();
return _sstables_lock.write_lock().then([this] {
if (_sstables->empty()) {
return make_ready_future<int64_t>(0);
}
return make_ready_future<int64_t>((*_sstables->rbegin()).first);
return make_ready_future<int64_t>((*_sstables->end()).first);
});
}
// SSTable writes are now allowed again, and generation is updated to new_generation if != -1
// SSTable writes are now allowed again, and generation is updated to new_generation
// returns the amount of microseconds elapsed since we disabled writes.
std::chrono::steady_clock::duration enable_sstable_write(int64_t new_generation) {
if (new_generation != -1) {
update_sstables_known_generation(new_generation);
}
update_sstables_known_generation(new_generation);
_sstables_lock.write_unlock();
return std::chrono::steady_clock::now() - _sstable_writes_disabled_at;
}
@@ -492,11 +295,9 @@ public:
// very dangerous to do that with live SSTables. This is meant to be used with SSTables
// that are not yet managed by the system.
//
// Parameter all_generations stores the generation of all SSTables in the system, so it
// will be easy to determine which SSTable is new.
// An example usage would query all shards asking what is the highest SSTable number known
// to them, and then pass that + 1 as "start".
future<std::vector<sstables::entry_descriptor>> reshuffle_sstables(std::set<int64_t> all_generations, int64_t start);
future<std::vector<sstables::entry_descriptor>> reshuffle_sstables(int64_t start);
// FIXME: this is just an example, should be changed to something more
// general. compact_all_sstables() starts a compaction of all sstables.
@@ -530,7 +331,6 @@ public:
}
lw_shared_ptr<sstable_list> get_sstables();
lw_shared_ptr<sstable_list> get_sstables_including_compacted_undeleted();
size_t sstables_count();
int64_t get_unleveled_sstables() const;
@@ -562,15 +362,15 @@ public:
Result run_with_compaction_disabled(Func && func) {
++_compaction_disabled;
return _compaction_manager.remove(this).then(std::forward<Func>(func)).finally([this] {
// #934. The pending counter is actually a great indicator into whether we
// actually need to trigger a compaction again.
if (--_compaction_disabled == 0 && _stats.pending_compactions > 0) {
// we're turning if on again, use function that does not increment
// the counter further.
do_trigger_compaction();
if (--_compaction_disabled == 0) {
trigger_compaction();
}
});
}
std::unordered_set<unsigned long>& compacting_generations() {
return _compacting_generations;
}
private:
// One does not need to wait on this future if all we are interested in, is
// initiating the write. The writes initiated here will eventually
@@ -580,42 +380,23 @@ private:
// But it is possible to synchronously wait for the seal to complete by
// waiting on this future. This is useful in situations where we want to
// synchronously flush data to disk.
//
// FIXME: A better interface would guarantee that all writes before this
// one are also complete
future<> seal_active_memtable();
// I am assuming here that the repair process will potentially send ranges containing
// few mutations, definitely not enough to fill a memtable. It wants to know whether or
// not each of those ranges individually succeeded or failed, so we need a future for
// each.
//
// One of the ways to fix that, is changing the repair itself to send more mutations at
// a single batch. But relying on that is a bad idea for two reasons:
//
// First, the goals of the SSTable writer and the repair sender are at odds. The SSTable
// writer wants to write as few SSTables as possible, while the repair sender wants to
// break down the range in pieces as small as it can and checksum them individually, so
// it doesn't have to send a lot of mutations for no reason.
//
// Second, even if the repair process wants to process larger ranges at once, some ranges
// themselves may be small. So while most ranges would be large, we would still have
// potentially some fairly small SSTables lying around.
//
// The best course of action in this case is to coalesce the incoming streams write-side.
// repair can now choose whatever strategy - small or big ranges - it wants, resting assure
// that the incoming memtables will be coalesced together.
shared_promise<> _waiting_streaming_flushes;
timer<> _delayed_streaming_flush{[this] { seal_active_streaming_memtable(); }};
future<> seal_active_streaming_memtable();
future<> seal_active_streaming_memtable_delayed();
// filter manifest.json files out
static bool manifest_json_filter(const sstring& fname);
seastar::gate _in_flight_seals;
// Iterate over all partitions. Protocol is the same as std::all_of(),
// so that iteration can be stopped by returning false.
// Func signature: bool (const decorated_key& dk, const mutation_partition& mp)
template <typename Func>
future<bool> for_all_partitions(schema_ptr, Func&& func) const;
future<sstables::entry_descriptor> probe_file(sstring sstdir, sstring fname);
void seal_on_overflow();
void check_valid_rp(const db::replay_position&) const;
public:
// Iterate over all partitions. Protocol is the same as std::all_of(),
@@ -718,9 +499,7 @@ public:
bool enable_cache = true;
bool enable_incremental_backups = false;
size_t max_memtable_size = 5'000'000;
size_t max_streaming_memtable_size = 5'000'000;
logalloc::region_group* dirty_memory_region_group = nullptr;
logalloc::region_group* streaming_dirty_memory_region_group = nullptr;
::cf_stats* cf_stats = nullptr;
};
private:
@@ -782,19 +561,18 @@ public:
class database {
::cf_stats _cf_stats;
logalloc::region_group _dirty_memory_region_group;
logalloc::region_group _streaming_dirty_memory_region_group;
std::unordered_map<sstring, keyspace> _keyspaces;
std::unordered_map<utils::UUID, lw_shared_ptr<column_family>> _column_families;
std::unordered_map<std::pair<sstring, sstring>, utils::UUID, utils::tuple_hash> _ks_cf_to_uuid;
std::unique_ptr<db::commitlog> _commitlog;
std::unique_ptr<db::config> _cfg;
size_t _memtable_total_space = 500 << 20;
size_t _streaming_memtable_total_space = 500 << 20;
utils::UUID _version;
// compaction_manager object is referenced by all column families of a database.
compaction_manager _compaction_manager;
std::vector<scollectd::registration> _collectd;
bool _enable_incremental_backups = false;
timer<> _throttling_timer{[this] { unthrottle(); }};
circular_buffer<promise<>> _throttled_requests;
future<> init_commitlog();
future<> apply_in_memory(const frozen_mutation& m, const schema_ptr& m_schema, const db::replay_position&);
@@ -808,16 +586,12 @@ private:
void create_in_memory_keyspace(const lw_shared_ptr<keyspace_metadata>& ksm);
friend void db::system_keyspace::make(database& db, bool durable, bool volatile_testing_only);
void setup_collectd();
throttle_state _memtables_throttler;
throttle_state _streaming_throttler;
future<> throttle();
future<> do_apply(schema_ptr, const frozen_mutation&);
void unthrottle();
public:
static utils::UUID empty_version;
void set_enable_incremental_backups(bool val) { _enable_incremental_backups = val; }
future<> parse_system_tables(distributed<service::storage_proxy>&);
database();
database(const db::config&);
@@ -844,6 +618,8 @@ public:
void add_column_family(schema_ptr schema, column_family::config cfg);
future<> drop_column_family(db_clock::time_point changed_at, const sstring& ks_name, const sstring& cf_name);
/* throws std::out_of_range if missing */
const utils::UUID& find_uuid(const sstring& ks, const sstring& cf) const throw (std::out_of_range);
const utils::UUID& find_uuid(const schema_ptr&) const throw (std::out_of_range);
@@ -868,7 +644,6 @@ public:
const column_family& find_column_family(const utils::UUID&) const throw (no_such_column_family);
column_family& find_column_family(const schema_ptr&) throw (no_such_column_family);
const column_family& find_column_family(const schema_ptr&) const throw (no_such_column_family);
bool column_family_exists(const utils::UUID& uuid) const;
schema_ptr find_schema(const sstring& ks_name, const sstring& cf_name) const throw (no_such_column_family);
schema_ptr find_schema(const utils::UUID&) const throw (no_such_column_family);
bool has_schema(const sstring& ks_name, const sstring& cf_name) const;
@@ -877,10 +652,9 @@ public:
unsigned shard_of(const dht::token& t);
unsigned shard_of(const mutation& m);
unsigned shard_of(const frozen_mutation& m);
future<lw_shared_ptr<query::result>> query(schema_ptr, const query::read_command& cmd, query::result_request request, const std::vector<query::partition_range>& ranges);
future<lw_shared_ptr<query::result>> query(schema_ptr, const query::read_command& cmd, const std::vector<query::partition_range>& ranges);
future<reconcilable_result> query_mutations(schema_ptr, const query::read_command& cmd, const query::partition_range& range);
future<> apply(schema_ptr, const frozen_mutation&);
future<> apply_streaming_mutation(schema_ptr, const frozen_mutation&);
keyspace::config make_keyspace_config(const keyspace_metadata& ksm);
const sstring& get_snitch_name() const;
future<> clear_snapshot(sstring tag, std::vector<sstring> keyspace_names);
@@ -912,16 +686,9 @@ public:
}
future<> flush_all_memtables();
// See #937. Truncation now requires a callback to get a time stamp
// that must be guaranteed to be the same for all shards.
typedef std::function<future<db_clock::time_point>()> timestamp_func;
/** Truncates the given column family */
future<> truncate(sstring ksname, sstring cfname, timestamp_func);
future<> truncate(const keyspace& ks, column_family& cf, timestamp_func);
future<> drop_column_family(const sstring& ks_name, const sstring& cf_name, timestamp_func);
future<> truncate(db_clock::time_point truncated_at, sstring ksname, sstring cfname);
future<> truncate(db_clock::time_point truncated_at, const keyspace& ks, column_family& cf);
const logalloc::region_group& dirty_memory_region_group() const {
return _dirty_memory_region_group;

View File

@@ -59,12 +59,6 @@
#include "gms/failure_detector.hh"
#include "service/storage_service.hh"
#include "schema_registry.hh"
#include "idl/uuid.dist.hh"
#include "idl/frozen_schema.dist.hh"
#include "serializer_impl.hh"
#include "serialization_visitors.hh"
#include "idl/uuid.dist.impl.hh"
#include "idl/frozen_schema.dist.impl.hh"
static logging::logger logger("batchlog_manager");
@@ -125,11 +119,15 @@ mutation db::batchlog_manager::get_batch_log_mutation_for(const std::vector<muta
auto timestamp = api::new_timestamp();
auto data = [this, &mutations] {
std::vector<canonical_mutation> fm(mutations.begin(), mutations.end());
bytes_ostream out;
const auto size = std::accumulate(fm.begin(), fm.end(), size_t(0), [](size_t s, auto& m) {
return s + serializer<canonical_mutation>{m}.size();
});
bytes buf(bytes::initialized_later(), size);
data_output out(buf);
for (auto& m : fm) {
ser::serialize(out, m);
serializer<canonical_mutation>{m}(out);
}
return to_bytes(out.linearize());
return buf;
}();
mutation m(key, schema);
@@ -157,58 +155,47 @@ future<> db::batchlog_manager::replay_all_failed_batches() {
auto written_at = row.get_as<db_clock::time_point>("written_at");
auto id = row.get_as<utils::UUID>("id");
// enough time for the actual write + batchlog entry mutation delivery (two separate requests).
// enough time for the actual write + batchlog entry mutation delivery (two separate requests).
auto timeout = get_batch_log_timeout();
if (db_clock::now() < written_at + timeout) {
logger.debug("Skipping replay of {}, too fresh", id);
return make_ready_future<>();
}
// check version of serialization format
if (!row.has("version")) {
logger.warn("Skipping logged batch because of unknown version");
return make_ready_future<>();
}
auto version = row.get_as<int32_t>("version");
if (version != net::messaging_service::current_version) {
logger.warn("Skipping logged batch because of incorrect version");
return make_ready_future<>();
}
// not used currently. ever?
//auto version = row.has("version") ? row.get_as<uint32_t>("version") : /*MessagingService.VERSION_12*/6u;
auto data = row.get_blob("data");
logger.debug("Replaying batch {}", id);
auto fms = make_lw_shared<std::deque<canonical_mutation>>();
auto in = ser::as_input_stream(data);
while (in.size()) {
fms->emplace_back(ser::deserialize(in, boost::type<canonical_mutation>()));
data_input in(data);
while (in.has_next()) {
fms->emplace_back(serializer<canonical_mutation>::read(in));
}
auto mutations = make_lw_shared<std::vector<mutation>>();
auto size = data.size();
return map_reduce(*fms, [this, written_at] (canonical_mutation& fm) {
return system_keyspace::get_truncated_at(fm.column_family_id()).then([written_at, &fm] (db_clock::time_point t) ->
std::experimental::optional<std::reference_wrapper<canonical_mutation>> {
if (written_at > t) {
return { std::ref(fm) };
} else {
return {};
}
});
},
std::vector<mutation>(),
[this] (std::vector<mutation> mutations, std::experimental::optional<std::reference_wrapper<canonical_mutation>> fm) {
if (fm) {
schema_ptr s = _qp.db().local().find_schema(fm.value().get().column_family_id());
mutations.emplace_back(fm.value().get().to_mutation(s));
return repeat([this, fms = std::move(fms), written_at, mutations]() mutable {
if (fms->empty()) {
return make_ready_future<stop_iteration>(stop_iteration::yes);
}
return mutations;
}).then([this, id, limiter, written_at, size, fms] (std::vector<mutation> mutations) {
if (mutations.empty()) {
auto& fm = fms->front();
auto mid = fm.column_family_id();
return system_keyspace::get_truncated_at(mid).then([this, mid, &fm, written_at, mutations](db_clock::time_point t) {
schema_ptr s = _qp.db().local().find_schema(mid);
if (written_at > t) {
mutations->emplace_back(fm.to_mutation(s));
}
}).then([fms] {
fms->pop_front();
return make_ready_future<stop_iteration>(stop_iteration::no);
});
}).then([this, id, mutations, limiter, written_at, size] {
if (mutations->empty()) {
return make_ready_future<>();
}
const auto ttl = [this, &mutations, written_at]() -> clock_type {
const auto ttl = [this, mutations, written_at]() -> clock_type {
/*
* Calculate ttl for the mutations' hints (and reduce ttl by the time the mutations spent in the batchlog).
* This ensures that deletes aren't "undone" by an old batch replay.
@@ -230,8 +217,8 @@ future<> db::batchlog_manager::replay_all_failed_batches() {
// Our normal write path does not add much redundancy to the dispatch, and rate is handled after send
// in both cases.
// FIXME: verify that the above is reasonably true.
return limiter->reserve(size).then([this, mutations = std::move(mutations), id] {
return _qp.proxy().local().mutate(mutations, db::consistency_level::ANY);
return limiter->reserve(size).then([this, mutations, id] {
return _qp.proxy().local().mutate(std::move(*mutations), db::consistency_level::ANY);
});
}).then([this, id] {
// delete batch

View File

@@ -67,9 +67,6 @@
#include "commitlog_entry.hh"
#include "service/priority_manager.hh"
#include <boost/range/numeric.hpp>
#include <boost/range/adaptor/transformed.hpp>
static logging::logger logger("commitlog");
class crc32_nbo {
@@ -148,7 +145,7 @@ const std::string db::commitlog::descriptor::FILENAME_PREFIX(
"CommitLog" + SEPARATOR);
const std::string db::commitlog::descriptor::FILENAME_EXTENSION(".log");
class db::commitlog::segment_manager : public ::enable_shared_from_this<segment_manager> {
class db::commitlog::segment_manager {
public:
config cfg;
const uint64_t max_size;
@@ -278,8 +275,6 @@ public:
scollectd::registrations create_counters();
void orphan_all();
void discard_unused_segments();
void discard_completed_segments(const cf_id_type& id,
const replay_position& pos);
@@ -377,7 +372,7 @@ private:
*/
class db::commitlog::segment: public enable_lw_shared_from_this<segment> {
::shared_ptr<segment_manager> _segment_manager;
segment_manager* _segment_manager;
descriptor _desc;
file _file;
@@ -409,7 +404,7 @@ class db::commitlog::segment: public enable_lw_shared_from_this<segment> {
// This is maintaining the semantica of only using the write-lock
// as a gate for flushing, i.e. once we've begun a flush for position X
// we are ok with writes to positions > X
return _segment_manager->begin_flush().then(std::bind(&rwlock::write_lock, &_dwrite)).finally([this] {
return _dwrite.write_lock().then(std::bind(&segment_manager::begin_flush, _segment_manager)).finally([this] {
_dwrite.write_unlock();
});
}
@@ -422,12 +417,12 @@ class db::commitlog::segment: public enable_lw_shared_from_this<segment> {
// This is maintaining the semantica of only using the write-lock
// as a gate for flushing, i.e. once we've begun a flush for position X
// we are ok with writes to positions > X
return _segment_manager->begin_write().then(std::bind(&rwlock::read_lock, &_dwrite));
return _dwrite.read_lock().then(std::bind(&segment_manager::begin_write, _segment_manager));
}
void end_write() {
_dwrite.read_unlock();
_segment_manager->end_write();
_dwrite.read_unlock();
}
public:
@@ -449,8 +444,8 @@ public:
// TODO : tune initial / default size
static constexpr size_t default_size = align_up<size_t>(128 * 1024, alignment);
segment(::shared_ptr<segment_manager> m, const descriptor& d, file && f, bool active)
: _segment_manager(std::move(m)), _desc(std::move(d)), _file(std::move(f)), _sync_time(
segment(segment_manager* m, const descriptor& d, file && f, bool active)
: _segment_manager(m), _desc(std::move(d)), _file(std::move(f)), _sync_time(
clock_type::now()), _queue(0)
{
++_segment_manager->totals.segments_created;
@@ -558,7 +553,7 @@ public:
throw;
}
});
}).finally([this, me] {
}).finally([this] {
end_flush();
});
}
@@ -647,7 +642,7 @@ public:
forget_schema_versions();
// acquire read lock
return begin_write().then([this, size, off, buf = std::move(buf)]() mutable {
return begin_write().then([this, size, off, buf = std::move(buf), me]() mutable {
auto written = make_lw_shared<size_t>(0);
auto p = buf.get();
return repeat([this, size, off, written, p]() mutable {
@@ -1043,12 +1038,10 @@ void db::commitlog::segment_manager::flush_segments(bool force) {
future<db::commitlog::segment_manager::sseg_ptr> db::commitlog::segment_manager::allocate_segment(bool active) {
descriptor d(next_id());
file_open_options opt;
opt.extent_allocation_size_hint = max_size;
return open_file_dma(cfg.commit_log_location + "/" + d.filename(), open_flags::wo | open_flags::create, opt).then([this, d, active](file f) {
return open_file_dma(cfg.commit_log_location + "/" + d.filename(), open_flags::wo | open_flags::create).then([this, d, active](file f) {
// xfs doesn't like files extended betond eof, so enlarge the file
return f.truncate(max_size).then([this, d, active, f] () mutable {
auto s = make_lw_shared<segment>(this->shared_from_this(), d, std::move(f), active);
auto s = make_lw_shared<segment>(this, d, std::move(f), active);
return make_ready_future<sseg_ptr>(s);
});
});
@@ -1162,10 +1155,6 @@ future<> db::commitlog::segment_manager::shutdown() {
return make_ready_future<>();
}
void db::commitlog::segment_manager::orphan_all() {
_segments.clear();
_reserve_segments.clear();
}
/*
* Sync all segments, then clear them out. To ensure all ops are done.
@@ -1179,7 +1168,7 @@ future<> db::commitlog::segment_manager::clear() {
for (auto& s : _segments) {
s->mark_clean();
}
orphan_all();
_segments.clear();
});
}
/**
@@ -1213,15 +1202,7 @@ void db::commitlog::segment_manager::on_timer() {
// take outstanding allocations into regard. This is paranoid,
// but if for some reason the file::open takes longer than timer period,
// we could flood the reserve list with new segments
//
// #482 - _reserve_allocating is decremented in the finally clause below.
// This is needed because if either allocate_segment _or_ emplacing into
// _reserve_segments should throw, we still need the counter reset
// However, because of this, it might be that emplace was done, but not decrement,
// when we get here again. So occasionally we might get a sum of the two that is
// not consistent. It should however always just potentially be _to much_, i.e.
// just an indicator that we don't need to do anything. So lets do that.
auto n = std::min(_reserve_segments.size() + _reserve_allocating, _num_reserve_segments);
auto n = _reserve_segments.size() + _reserve_allocating;
return parallel_for_each(boost::irange(n, _num_reserve_segments), [this, n](auto i) {
++_reserve_allocating;
return this->allocate_segment(false).then([this](sseg_ptr s) {
@@ -1302,9 +1283,8 @@ void db::commitlog::segment_manager::release_buffer(buffer_type&& b) {
logger.trace("Deleting {} buffers", _temp_buffers.size() - max_temp_buffers);
_temp_buffers.erase(_temp_buffers.begin() + max_temp_buffers, _temp_buffers.end());
}
totals.buffer_list_bytes = boost::accumulate(
_temp_buffers | boost::adaptors::transformed(std::mem_fn(&buffer_type::size)),
size_t(0), std::plus<size_t>());
totals.buffer_list_bytes = std::accumulate(_temp_buffers.begin(),
_temp_buffers.end(), size_t(0), std::plus<size_t>());
}
/**
@@ -1354,7 +1334,7 @@ future<db::replay_position> db::commitlog::add_entry(const cf_id_type& id, const
}
db::commitlog::commitlog(config cfg)
: _segment_manager(::make_shared<segment_manager>(std::move(cfg))) {
: _segment_manager(new segment_manager(std::move(cfg))) {
}
db::commitlog::commitlog(commitlog&& v) noexcept
@@ -1362,9 +1342,6 @@ db::commitlog::commitlog(commitlog&& v) noexcept
}
db::commitlog::~commitlog() {
if (_segment_manager != nullptr) {
_segment_manager->orphan_all();
}
}
future<db::commitlog> db::commitlog::create_commitlog(config cfg) {

View File

@@ -98,7 +98,7 @@ public:
class segment;
private:
::shared_ptr<segment_manager> _segment_manager;
std::unique_ptr<segment_manager> _segment_manager;
public:
enum class sync_mode {
PERIODIC, BATCH

View File

@@ -1,86 +0,0 @@
/*
* Copyright 2016 ScyllaDB
*/
/*
* This file is part of Scylla.
*
* Scylla is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Scylla is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
*/
#include "commitlog_entry.hh"
#include "idl/uuid.dist.hh"
#include "idl/keys.dist.hh"
#include "idl/frozen_mutation.dist.hh"
#include "idl/mutation.dist.hh"
#include "idl/commitlog.dist.hh"
#include "serializer_impl.hh"
#include "serialization_visitors.hh"
#include "idl/uuid.dist.impl.hh"
#include "idl/keys.dist.impl.hh"
#include "idl/frozen_mutation.dist.impl.hh"
#include "idl/mutation.dist.impl.hh"
#include "idl/commitlog.dist.impl.hh"
commitlog_entry::commitlog_entry(stdx::optional<column_mapping> mapping, frozen_mutation&& mutation)
: _mapping(std::move(mapping))
, _mutation_storage(std::move(mutation))
, _mutation(*_mutation_storage)
{ }
commitlog_entry::commitlog_entry(stdx::optional<column_mapping> mapping, const frozen_mutation& mutation)
: _mapping(std::move(mapping))
, _mutation(mutation)
{ }
commitlog_entry::commitlog_entry(commitlog_entry&& ce)
: _mapping(std::move(ce._mapping))
, _mutation_storage(std::move(ce._mutation_storage))
, _mutation(_mutation_storage ? *_mutation_storage : ce._mutation)
{
}
commitlog_entry& commitlog_entry::operator=(commitlog_entry&& ce)
{
if (this != &ce) {
this->~commitlog_entry();
new (this) commitlog_entry(std::move(ce));
}
return *this;
}
commitlog_entry commitlog_entry_writer::get_entry() const {
if (_with_schema) {
return commitlog_entry(_schema->get_column_mapping(), _mutation);
} else {
return commitlog_entry({}, _mutation);
}
}
void commitlog_entry_writer::compute_size() {
_size = ser::get_sizeof(get_entry());
}
void commitlog_entry_writer::write(data_output& out) const {
seastar::simple_output_stream str(out.reserve(size()));
ser::serialize(str, get_entry());
}
commitlog_entry_reader::commitlog_entry_reader(const temporary_buffer<char>& buffer)
: _ce([&] {
seastar::simple_input_stream in(buffer.get(), buffer.size());
return ser::deserialize(in, boost::type<commitlog_entry>());
}())
{
}

View File

@@ -25,43 +25,21 @@
#include "frozen_mutation.hh"
#include "schema.hh"
#include "utils/data_output.hh"
namespace stdx = std::experimental;
class commitlog_entry {
stdx::optional<column_mapping> _mapping;
stdx::optional<frozen_mutation> _mutation_storage;
const frozen_mutation& _mutation;
public:
commitlog_entry(stdx::optional<column_mapping> mapping, frozen_mutation&& mutation);
commitlog_entry(stdx::optional<column_mapping> mapping, const frozen_mutation& mutation);
commitlog_entry(commitlog_entry&&);
commitlog_entry(const commitlog_entry&) = delete;
commitlog_entry& operator=(commitlog_entry&&);
commitlog_entry& operator=(const commitlog_entry&) = delete;
const stdx::optional<column_mapping>& mapping() const { return _mapping; }
const frozen_mutation& mutation() const { return _mutation; }
};
class commitlog_entry_writer {
schema_ptr _schema;
db::serializer<column_mapping> _column_mapping_serializer;
const frozen_mutation& _mutation;
bool _with_schema = true;
size_t _size;
private:
void compute_size();
commitlog_entry get_entry() const;
public:
commitlog_entry_writer(schema_ptr s, const frozen_mutation& fm)
: _schema(std::move(s)), _mutation(fm)
{
compute_size();
}
: _schema(std::move(s)), _column_mapping_serializer(_schema->get_column_mapping()), _mutation(fm)
{ }
void set_with_schema(bool value) {
_with_schema = value;
compute_size();
}
bool with_schema() {
return _with_schema;
@@ -71,17 +49,40 @@ public:
}
size_t size() const {
return _size;
size_t size = data_output::serialized_size<bool>();
if (_with_schema) {
size += _column_mapping_serializer.size();
}
size += _mutation.representation().size();
return size;
}
void write(data_output& out) const;
void write(data_output& out) const {
out.write(_with_schema);
if (_with_schema) {
_column_mapping_serializer.write(out);
}
auto bv = _mutation.representation();
out.write(bv.begin(), bv.end());
}
};
class commitlog_entry_reader {
commitlog_entry _ce;
frozen_mutation _mutation;
stdx::optional<column_mapping> _column_mapping;
public:
commitlog_entry_reader(const temporary_buffer<char>& buffer);
commitlog_entry_reader(const temporary_buffer<char>& buffer)
: _mutation(bytes())
{
data_input in(buffer);
bool has_column_mapping = in.read<bool>();
if (has_column_mapping) {
_column_mapping = db::serializer<::column_mapping>::read(in);
}
auto bv = in.read_view(in.avail());
_mutation = frozen_mutation(bytes(bv.begin(), bv.end()));
}
const stdx::optional<column_mapping>& get_column_mapping() const { return _ce.mapping(); }
const frozen_mutation& mutation() const { return _ce.mutation(); }
const stdx::optional<column_mapping>& get_column_mapping() const { return _column_mapping; }
const frozen_mutation& mutation() const { return _mutation; }
};

View File

@@ -53,6 +53,7 @@
#include "database.hh"
#include "sstables/sstables.hh"
#include "db/system_keyspace.hh"
#include "db/serializer.hh"
#include "cql3/query_processor.hh"
#include "log.hh"
#include "converting_mutation_partition_applier.hh"

View File

@@ -487,7 +487,7 @@ public:
val(cas_contention_timeout_in_ms, uint32_t, 5000, Unused, \
"The time that the coordinator continues to retry a CAS (compare and set) operation that contends with other proposals for the same row." \
) \
val(truncate_request_timeout_in_ms, uint32_t, 10000, Used, \
val(truncate_request_timeout_in_ms, uint32_t, 10000, Unused, \
"The time that the coordinator waits for truncates (remove all data from a table) to complete. The long default value allows for a snapshot to be taken before removing the data. If auto_snapshot is disabled (not recommended), you can reduce this time." \
) \
val(write_request_timeout_in_ms, uint32_t, 2000, Used, \
@@ -556,7 +556,7 @@ public:
val(start_rpc, bool, false, Used, \
"Starts the Thrift RPC server" \
) \
val(rpc_keepalive, bool, true, Used, \
val(rpc_keepalive, bool, true, Unused, \
"Enable or disable keepalive on client connections (RPC or native)." \
) \
val(rpc_max_threads, uint32_t, 0, Invalid, \

View File

@@ -241,7 +241,7 @@ is_sufficient_live_nodes(consistency_level cl,
if (rs.get_type() == replication_strategy_type::network_topology) {
for (auto& entry : count_per_dc_endpoints(ks, live_endpoints)) {
if (entry.second.live < local_quorum_for(ks, entry.first)) {
if (entry.second < local_quorum_for(ks, entry.first)) {
return false;
}
}

View File

@@ -88,16 +88,10 @@ filter_for_query(consistency_level cl,
std::vector<gms::inet_address> filter_for_query(consistency_level cl, keyspace& ks, std::vector<gms::inet_address>& live_endpoints);
struct dc_node_count {
size_t live = 0;
size_t pending = 0;
};
template <typename Range, typename PendingRange = std::array<gms::inet_address, 0>>
inline std::unordered_map<sstring, dc_node_count> count_per_dc_endpoints(
template <typename Range>
inline std::unordered_map<sstring, size_t> count_per_dc_endpoints(
keyspace& ks,
Range& live_endpoints,
const PendingRange& pending_endpoints = std::array<gms::inet_address, 0>()) {
Range& live_endpoints) {
using namespace locator;
auto& rs = ks.get_replication_strategy();
@@ -106,9 +100,9 @@ inline std::unordered_map<sstring, dc_node_count> count_per_dc_endpoints(
network_topology_strategy* nrs =
static_cast<network_topology_strategy*>(&rs);
std::unordered_map<sstring, dc_node_count> dc_endpoints;
std::unordered_map<sstring, size_t> dc_endpoints;
for (auto& dc : nrs->get_datacenters()) {
dc_endpoints.emplace(dc, dc_node_count());
dc_endpoints.emplace(dc, 0);
}
//
@@ -117,11 +111,7 @@ inline std::unordered_map<sstring, dc_node_count> count_per_dc_endpoints(
// nrs->get_datacenters().
//
for (auto& endpoint : live_endpoints) {
++(dc_endpoints[snitch_ptr->get_datacenter(endpoint)].live);
}
for (auto& endpoint : pending_endpoints) {
++(dc_endpoints[snitch_ptr->get_datacenter(endpoint)].pending);
++(dc_endpoints[snitch_ptr->get_datacenter(endpoint)]);
}
return dc_endpoints;
@@ -132,23 +122,21 @@ is_sufficient_live_nodes(consistency_level cl,
keyspace& ks,
const std::vector<gms::inet_address>& live_endpoints);
template<typename Range, typename PendingRange>
template<typename Range>
inline bool assure_sufficient_live_nodes_each_quorum(
consistency_level cl,
keyspace& ks,
Range& live_endpoints,
const PendingRange& pending_endpoints) {
Range& live_endpoints) {
using namespace locator;
auto& rs = ks.get_replication_strategy();
if (rs.get_type() == replication_strategy_type::network_topology) {
for (auto& entry : count_per_dc_endpoints(ks, live_endpoints, pending_endpoints)) {
for (auto& entry : count_per_dc_endpoints(ks, live_endpoints)) {
auto dc_block_for = local_quorum_for(ks, entry.first);
auto dc_live = entry.second.live;
auto dc_pending = entry.second.pending;
auto dc_live = entry.second;
if (dc_live < dc_block_for + dc_pending) {
if (dc_live < dc_block_for) {
throw exceptions::unavailable_exception(cl, dc_block_for, dc_live);
}
}
@@ -159,12 +147,11 @@ inline bool assure_sufficient_live_nodes_each_quorum(
return false;
}
template<typename Range, typename PendingRange = std::array<gms::inet_address, 0>>
template<typename Range>
inline void assure_sufficient_live_nodes(
consistency_level cl,
keyspace& ks,
Range& live_endpoints,
const PendingRange& pending_endpoints = std::array<gms::inet_address, 0>()) {
Range& live_endpoints) {
size_t need = block_for(ks, cl);
switch (cl) {
@@ -172,13 +159,13 @@ inline void assure_sufficient_live_nodes(
// local hint is acceptable, and local node is always live
break;
case consistency_level::LOCAL_ONE:
if (count_local_endpoints(live_endpoints) < count_local_endpoints(pending_endpoints) + 1) {
if (count_local_endpoints(live_endpoints) == 0) {
throw exceptions::unavailable_exception(cl, 1, 0);
}
break;
case consistency_level::LOCAL_QUORUM: {
size_t local_live = count_local_endpoints(live_endpoints);
if (local_live < need + count_local_endpoints(pending_endpoints)) {
if (local_live < need) {
#if 0
if (logger.isDebugEnabled())
{
@@ -197,15 +184,14 @@ inline void assure_sufficient_live_nodes(
break;
}
case consistency_level::EACH_QUORUM:
if (assure_sufficient_live_nodes_each_quorum(cl, ks, live_endpoints, pending_endpoints)) {
if (assure_sufficient_live_nodes_each_quorum(cl, ks, live_endpoints)) {
break;
}
// Fallthough on purpose for SimpleStrategy
default:
size_t live = live_endpoints.size();
size_t pending = pending_endpoints.size();
if (live < need + pending) {
cl_logger.debug("Live nodes {} do not satisfy ConsistencyLevel ({} required, {} pending)", live, need, pending);
if (live < need) {
cl_logger.debug("Live nodes {} do not satisfy ConsistencyLevel ({} required)", live, need);
throw exceptions::unavailable_exception(cl, need, live);
}
break;

View File

@@ -65,7 +65,6 @@
#include <boost/range/adaptor/map.hpp>
#include "compaction_strategy.hh"
#include "utils/joinpoint.hh"
using namespace db::system_keyspace;
@@ -416,16 +415,16 @@ future<std::vector<frozen_mutation>> convert_schema_to_mutations(distributed<ser
if (partition_key == system_keyspace::NAME) {
continue;
}
results.emplace_back(std::move(p.mut()));
results.emplace_back(p.mut());
}
return results;
});
};
auto reduce = [] (auto&& result, auto&& mutations) {
std::move(mutations.begin(), mutations.end(), std::back_inserter(result));
std::copy(mutations.begin(), mutations.end(), std::back_inserter(result));
return std::move(result);
};
return map_reduce(ALL.begin(), ALL.end(), map, std::vector<frozen_mutation>{}, reduce);
return map_reduce(ALL.begin(), ALL.end(), map, std::move(std::vector<frozen_mutation>{}), reduce);
}
future<schema_result>
@@ -607,10 +606,10 @@ future<> do_merge_schema(distributed<service::storage_proxy>& proxy, std::vector
#endif
proxy.local().get_db().invoke_on_all([keyspaces_to_drop = std::move(keyspaces_to_drop)] (database& db) {
// it is safe to drop a keyspace only when all nested ColumnFamilies where deleted
return do_for_each(keyspaces_to_drop, [&db] (auto keyspace_to_drop) {
for (auto&& keyspace_to_drop : keyspaces_to_drop) {
db.drop_keyspace(keyspace_to_drop);
return service::get_local_migration_manager().notify_drop_keyspace(keyspace_to_drop);
});
service::get_local_migration_manager().notify_drop_keyspace(keyspace_to_drop);
}
}).get0();
});
}
@@ -650,7 +649,7 @@ future<std::set<sstring>> merge_keyspaces(distributed<service::storage_proxy>& p
return do_for_each(created, [&db](auto&& val) {
auto ksm = create_keyspace_from_schema_partition(val);
return db.create_keyspace(ksm).then([ksm] {
return service::get_local_migration_manager().notify_create_keyspace(ksm);
service::get_local_migration_manager().notify_create_keyspace(ksm);
});
}).then([&altered, &db] () mutable {
for (auto&& name : altered) {
@@ -663,7 +662,7 @@ future<std::set<sstring>> merge_keyspaces(distributed<service::storage_proxy>& p
});
}
static future<> update_column_family(database& db, schema_ptr new_schema) {
static void update_column_family(database& db, schema_ptr new_schema) {
column_family& cfm = db.find_column_family(new_schema->id());
bool columns_changed = !cfm.schema()->equal_columns(*new_schema);
@@ -672,7 +671,7 @@ static future<> update_column_family(database& db, schema_ptr new_schema) {
s->registry_entry()->mark_synced();
cfm.set_schema(std::move(s));
return service::get_local_migration_manager().notify_update_column_family(cfm.schema(), columns_changed);
service::get_local_migration_manager().notify_update_column_family(cfm.schema(), columns_changed);
}
// see the comments for merge_keyspaces()
@@ -680,6 +679,7 @@ static void merge_tables(distributed<service::storage_proxy>& proxy,
std::map<qualified_name, schema_mutations>&& before,
std::map<qualified_name, schema_mutations>&& after)
{
auto changed_at = db_clock::now();
std::vector<global_schema_ptr> created;
std::vector<global_schema_ptr> altered;
std::vector<global_schema_ptr> dropped;
@@ -687,44 +687,34 @@ static void merge_tables(distributed<service::storage_proxy>& proxy,
auto diff = difference(before, after);
for (auto&& key : diff.entries_only_on_left) {
auto&& s = proxy.local().get_db().local().find_schema(key.keyspace_name, key.table_name);
logger.info("Dropping {}.{} id={} version={}", s->ks_name(), s->cf_name(), s->id(), s->version());
dropped.emplace_back(s);
}
for (auto&& key : diff.entries_only_on_right) {
auto s = create_table_from_mutations(after.at(key));
logger.info("Creating {}.{} id={} version={}", s->ks_name(), s->cf_name(), s->id(), s->version());
created.emplace_back(s);
created.emplace_back(create_table_from_mutations(after.at(key)));
}
for (auto&& key : diff.entries_differing) {
auto s = create_table_from_mutations(after.at(key));
logger.info("Altering {}.{} id={} version={}", s->ks_name(), s->cf_name(), s->id(), s->version());
altered.emplace_back(s);
altered.emplace_back(create_table_from_mutations(after.at(key)));
}
do_with(utils::make_joinpoint([] { return db_clock::now();})
, [&created, &dropped, &altered, &proxy](auto& tsf) {
return proxy.local().get_db().invoke_on_all([&created, &dropped, &altered, &tsf] (database& db) {
return seastar::async([&] {
for (auto&& gs : created) {
schema_ptr s = gs.get();
auto& ks = db.find_keyspace(s->ks_name());
auto cfg = ks.make_column_family_config(*s);
db.add_column_family(s, cfg);
auto& cf = db.find_column_family(s);
cf.mark_ready_for_writes();
ks.make_directory_for_column_family(s->cf_name(), s->id()).get();
service::get_local_migration_manager().notify_create_column_family(s).get();
}
for (auto&& gs : altered) {
update_column_family(db, gs.get()).get();
}
parallel_for_each(dropped.begin(), dropped.end(), [&db, &tsf](auto&& gs) {
schema_ptr s = gs.get();
return db.drop_column_family(s->ks_name(), s->cf_name(), [&tsf] { return tsf.value(); }).then([s] {
return service::get_local_migration_manager().notify_drop_column_family(s);
});
}).get();
});
proxy.local().get_db().invoke_on_all([&created, &dropped, &altered, changed_at] (database& db) {
return seastar::async([&] {
for (auto&& gs : created) {
schema_ptr s = gs.get();
auto& ks = db.find_keyspace(s->ks_name());
auto cfg = ks.make_column_family_config(*s);
db.add_column_family(s, cfg);
ks.make_directory_for_column_family(s->cf_name(), s->id()).get();
service::get_local_migration_manager().notify_create_column_family(s);
}
for (auto&& gs : altered) {
update_column_family(db, gs.get());
}
parallel_for_each(dropped.begin(), dropped.end(), [changed_at, &db](auto&& gs) {
schema_ptr s = gs.get();
return db.drop_column_family(changed_at, s->ks_name(), s->cf_name()).then([s] {
service::get_local_migration_manager().notify_drop_column_family(s);
});
}).get();
});
}).get();
}

194
db/serializer.cc Normal file
View File

@@ -0,0 +1,194 @@
/*
* Copyright 2015 Cloudius Systems
*/
/*
* This file is part of Scylla.
*
* Scylla is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Scylla is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
*/
#include "serializer.hh"
#include "database.hh"
#include "types.hh"
#include "utils/serialization.hh"
typedef uint32_t count_type; // Me thinks 32-bits are enough for "normal" count purposes.
template<>
db::serializer<utils::UUID>::serializer(const utils::UUID& uuid)
: _item(uuid), _size(2 * sizeof(uint64_t)) {
}
template<>
void db::serializer<utils::UUID>::write(output& out,
const type& t) {
out.write(t.get_most_significant_bits());
out.write(t.get_least_significant_bits());
}
template<>
void db::serializer<utils::UUID>::read(utils::UUID& uuid, input& in) {
uuid = read(in);
}
template<>
void db::serializer<utils::UUID>::skip(input& in) {
in.skip(2 * sizeof(uint64_t));
}
template<> utils::UUID db::serializer<utils::UUID>::read(input& in) {
auto msb = in.read<uint64_t>();
auto lsb = in.read<uint64_t>();
return utils::UUID(msb, lsb);
}
template<>
db::serializer<bytes>::serializer(const bytes& b)
: _item(b), _size(output::serialized_size(b)) {
}
template<>
void db::serializer<bytes>::write(output& out, const type& t) {
out.write(t);
}
template<>
void db::serializer<bytes>::read(bytes& b, input& in) {
b = in.read<bytes>();
}
template<>
void db::serializer<bytes>::skip(input& in) {
in.read<bytes>(); // FIXME: Avoid reading
}
template<>
db::serializer<bytes_view>::serializer(const bytes_view& v)
: _item(v), _size(output::serialized_size(v)) {
}
template<>
void db::serializer<bytes_view>::write(output& out, const type& t) {
out.write(t);
}
template<>
void db::serializer<bytes_view>::read(bytes_view& v, input& in) {
v = in.read<bytes_view>();
}
template<>
bytes_view db::serializer<bytes_view>::read(input& in) {
return in.read<bytes_view>();
}
template<>
db::serializer<sstring>::serializer(const sstring& s)
: _item(s), _size(output::serialized_size(s)) {
}
template<>
void db::serializer<sstring>::write(output& out, const type& t) {
out.write(t);
}
template<>
void db::serializer<sstring>::read(sstring& s, input& in) {
s = in.read<sstring>();
}
template<>
void db::serializer<sstring>::skip(input& in) {
in.read<sstring>(); // FIXME: avoid reading
}
template<>
db::serializer<tombstone>::serializer(const tombstone& t)
: _item(t), _size(sizeof(t.timestamp) + sizeof(decltype(t.deletion_time.time_since_epoch().count()))) {
}
template<>
void db::serializer<tombstone>::write(output& out, const type& t) {
out.write(t.timestamp);
out.write(t.deletion_time.time_since_epoch().count());
}
template<>
void db::serializer<tombstone>::read(tombstone& t, input& in) {
t.timestamp = in.read<decltype(t.timestamp)>();
auto deletion_time = in.read<decltype(t.deletion_time.time_since_epoch().count())>();
t.deletion_time = gc_clock::time_point(gc_clock::duration(deletion_time));
}
template<>
db::serializer<atomic_cell_view>::serializer(const atomic_cell_view& c)
: _item(c), _size(bytes_view_serializer(c.serialize()).size()) {
}
template<>
void db::serializer<atomic_cell_view>::write(output& out, const atomic_cell_view& t) {
bytes_view_serializer::write(out, t.serialize());
}
template<>
void db::serializer<atomic_cell_view>::read(atomic_cell_view& c, input& in) {
c = atomic_cell_view::from_bytes(bytes_view_serializer::read(in));
}
template<>
atomic_cell_view db::serializer<atomic_cell_view>::read(input& in) {
return atomic_cell_view::from_bytes(bytes_view_serializer::read(in));
}
template<>
db::serializer<collection_mutation_view>::serializer(const collection_mutation_view& c)
: _item(c), _size(bytes_view_serializer(c.serialize()).size()) {
}
template<>
void db::serializer<collection_mutation_view>::write(output& out, const collection_mutation_view& t) {
bytes_view_serializer::write(out, t.serialize());
}
template<>
void db::serializer<collection_mutation_view>::read(collection_mutation_view& c, input& in) {
c = collection_mutation_view::from_bytes(bytes_view_serializer::read(in));
}
template<>
db::serializer<db::replay_position>::serializer(const db::replay_position& rp)
: _item(rp), _size(sizeof(uint64_t) * 2) {
}
template<>
void db::serializer<db::replay_position>::write(output& out, const db::replay_position& rp) {
out.write<uint64_t>(rp.id);
out.write<uint64_t>(rp.pos);
}
template<>
void db::serializer<db::replay_position>::read(db::replay_position& rp, input& in) {
rp.id = in.read<uint64_t>();
rp.pos = in.read<uint64_t>();
}
template class db::serializer<tombstone> ;
template class db::serializer<bytes> ;
template class db::serializer<bytes_view> ;
template class db::serializer<sstring> ;
template class db::serializer<atomic_cell_view> ;
template class db::serializer<collection_mutation_view> ;
template class db::serializer<utils::UUID> ;
template class db::serializer<db::replay_position> ;

235
db/serializer.hh Normal file
View File

@@ -0,0 +1,235 @@
/*
* Copyright 2015 Cloudius Systems
*/
/*
* This file is part of Scylla.
*
* Scylla is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Scylla is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef DB_SERIALIZER_HH_
#define DB_SERIALIZER_HH_
#include <experimental/optional>
#include "utils/data_input.hh"
#include "utils/data_output.hh"
#include "bytes_ostream.hh"
#include "bytes.hh"
#include "database_fwd.hh"
#include "db/commitlog/replay_position.hh"
namespace db {
/**
* Serialization objects for various types and using "internal" format. (Not CQL, origin whatnot).
* The design rationale is that a "serializer" can be instantiated for an object, and will contain
* the obj + size, and is usable as a functor.
*
* Serialization can also be done "explicitly" through the static method "write"
* (Not using "serialize", because writing "serializer<apa>::serialize" all the time is tiring and redundant)
* though care should be takes than data will fit of course.
*/
template<typename T>
class serializer {
public:
typedef T type;
typedef data_output output;
typedef data_input input;
typedef serializer<T> _MyType;
serializer(const type&);
// apply to memory, must be at least size() large.
const _MyType& operator()(output& out) const {
write(out, _item);
return *this;
}
static void write(output&, const type&);
static void read(type&, input&);
static type read(input&);
static void skip(input& in);
size_t size() const {
return _size;
}
void write(bytes_ostream& out) const {
auto buf = out.write_place_holder(_size);
data_output data_out((char*)buf, _size);
write(data_out, _item);
}
void write(data_output& out) const {
write(out, _item);
}
bytes to_bytes() const {
bytes b(bytes::initialized_later(), _size);
data_output out(b);
write(out);
return b;
}
static type from_bytes(bytes_view v) {
data_input in(v);
return read(in);
}
private:
const type& _item;
size_t _size;
};
template<typename T>
class serializer<std::experimental::optional<T>> {
public:
typedef std::experimental::optional<T> type;
typedef data_output output;
typedef data_input input;
typedef serializer<T> _MyType;
serializer(const type& t)
: _item(t)
, _size(output::serialized_size<bool>() + (t ? serializer<T>(*t).size() : 0))
{}
// apply to memory, must be at least size() large.
const _MyType& operator()(output& out) const {
write(out, _item);
return *this;
}
static void write(output& out, const type& v) {
bool en = v;
out.write<bool>(en);
if (en) {
serializer<T>::write(out, *v);
}
}
static void read(type& dst, input& in) {
auto en = in.read<bool>();
if (en) {
dst = serializer<T>::read(in);
} else {
dst = {};
}
}
static type read(input& in) {
type t;
read(t, in);
return t;
}
static void skip(input& in) {
auto en = in.read<bool>();
if (en) {
serializer<T>::skip(in);
}
}
size_t size() const {
return _size;
}
void write(bytes_ostream& out) const {
auto buf = out.write_place_holder(_size);
data_output data_out((char*)buf, _size);
write(data_out, _item);
}
void write(data_output& out) const {
write(out, _item);
}
bytes to_bytes() const {
bytes b(bytes::initialized_later(), _size);
data_output out(b);
write(out);
return b;
}
static type from_bytes(bytes_view v) {
data_input in(v);
return read(in);
}
private:
const std::experimental::optional<T> _item;
size_t _size;
};
template<> serializer<utils::UUID>::serializer(const utils::UUID &);
template<> void serializer<utils::UUID>::write(output&, const type&);
template<> void serializer<utils::UUID>::read(utils::UUID&, input&);
template<> void serializer<utils::UUID>::skip(input&);
template<> utils::UUID serializer<utils::UUID>::read(input&);
template<> serializer<bytes>::serializer(const bytes &);
template<> void serializer<bytes>::write(output&, const type&);
template<> void serializer<bytes>::read(bytes&, input&);
template<> void serializer<bytes>::skip(input&);
template<> serializer<bytes_view>::serializer(const bytes_view&);
template<> void serializer<bytes_view>::write(output&, const type&);
template<> void serializer<bytes_view>::read(bytes_view&, input&);
template<> bytes_view serializer<bytes_view>::read(input&);
template<> serializer<sstring>::serializer(const sstring&);
template<> void serializer<sstring>::write(output&, const type&);
template<> void serializer<sstring>::read(sstring&, input&);
template<> void serializer<sstring>::skip(input&);
template<> serializer<tombstone>::serializer(const tombstone &);
template<> void serializer<tombstone>::write(output&, const type&);
template<> void serializer<tombstone>::read(tombstone&, input&);
template<> serializer<atomic_cell_view>::serializer(const atomic_cell_view &);
template<> void serializer<atomic_cell_view>::write(output&, const type&);
template<> void serializer<atomic_cell_view>::read(atomic_cell_view&, input&);
template<> atomic_cell_view serializer<atomic_cell_view>::read(input&);
template<> serializer<collection_mutation_view>::serializer(const collection_mutation_view &);
template<> void serializer<collection_mutation_view>::write(output&, const type&);
template<> void serializer<collection_mutation_view>::read(collection_mutation_view&, input&);
template<> serializer<db::replay_position>::serializer(const db::replay_position&);
template<> void serializer<db::replay_position>::write(output&, const db::replay_position&);
template<> void serializer<db::replay_position>::read(db::replay_position&, input&);
template<typename T>
T serializer<T>::read(input& in) {
type t;
read(t, in);
return t;
}
extern template class serializer<tombstone>;
extern template class serializer<bytes>;
extern template class serializer<bytes_view>;
extern template class serializer<sstring>;
extern template class serializer<utils::UUID>;
extern template class serializer<db::replay_position>;
typedef serializer<tombstone> tombstone_serializer;
typedef serializer<bytes> bytes_serializer; // Compatible with bytes_view_serializer
typedef serializer<bytes_view> bytes_view_serializer; // Compatible with bytes_serializer
typedef serializer<sstring> sstring_serializer;
typedef serializer<atomic_cell_view> atomic_cell_view_serializer;
typedef serializer<collection_mutation_view> collection_mutation_view_serializer;
typedef serializer<utils::UUID> uuid_serializer;
typedef serializer<db::replay_position> replay_position_serializer;
}
#endif /* DB_SERIALIZER_HH_ */

View File

@@ -58,16 +58,14 @@
#include "thrift/server.hh"
#include "exceptions/exceptions.hh"
#include "cql3/query_processor.hh"
#include "db/serializer.hh"
#include "query_context.hh"
#include "partition_slice_builder.hh"
#include "db/config.hh"
#include "schema_builder.hh"
#include "md5_hasher.hh"
#include "release.hh"
#include "log.hh"
#include "serializer.hh"
#include <core/enum.hh>
#include "service/storage_proxy.hh"
using days = std::chrono::duration<int, std::ratio<24 * 3600>>;
@@ -77,7 +75,6 @@ std::unique_ptr<query_context> qctx = {};
namespace system_keyspace {
static logging::logger logger("system_keyspace");
static const api::timestamp_type creation_timestamp = api::new_timestamp();
api::timestamp_type schema_creation_timestamp() {
@@ -441,7 +438,7 @@ static future<> setup_version() {
version::release(),
cql3::query_processor::CQL_VERSION,
org::apache::cassandra::thrift_version,
to_sstring(cql_serialization_format::latest_version),
to_sstring(version::native_protocol()),
snitch->get_datacenter(utils::fb_utilities::get_broadcast_address()),
snitch->get_rack(utils::fb_utilities::get_broadcast_address()),
sstring(dht::global_partitioner().name()),
@@ -549,44 +546,31 @@ future<> setup(distributed<database>& db, distributed<cql3::query_processor>& qp
});
}
struct truncation_record {
static constexpr uint32_t current_magic = 0x53435452; // 'S' 'C' 'T' 'R'
uint32_t magic;
std::vector<db::replay_position> positions;
db_clock::time_point time_stamp;
};
}
}
#include "idl/replay_position.dist.hh"
#include "idl/truncation_record.dist.hh"
#include "serializer_impl.hh"
#include "idl/replay_position.dist.impl.hh"
#include "idl/truncation_record.dist.impl.hh"
namespace db {
namespace system_keyspace {
typedef std::pair<replay_positions, db_clock::time_point> truncation_entry;
typedef utils::UUID truncation_key;
typedef std::unordered_map<truncation_key, truncation_record> truncation_map;
typedef std::unordered_map<truncation_key, truncation_entry> truncation_map;
static constexpr uint8_t current_version = 1;
static thread_local std::experimental::optional<truncation_map> truncation_records;
future<> save_truncation_records(const column_family& cf, db_clock::time_point truncated_at, replay_positions positions) {
truncation_record r;
auto size =
sizeof(db_clock::rep)
+ positions.size()
* db::serializer<replay_position>(
db::replay_position()).size();
bytes buf(bytes::initialized_later(), size);
data_output out(buf);
r.magic = truncation_record::current_magic;
r.time_stamp = truncated_at;
r.positions = std::move(positions);
auto buf = ser::serialize_to_buffer<bytes>(r, sizeof(current_version));
buf[0] = current_version;
static_assert(sizeof(current_version) == 1, "using this as mark");
assert(buf.size() & 1); // verify we've created an odd-numbered buffer
// Old version would write a single RP. We write N. Resulting blob size
// will determine how many.
// An external entity reading this blob would get a "correct" RP
// and a garbled time stamp. But an external entity has no business
// reading this data anyway, since it is meaningless outside this
// machine instance.
for (auto& rp : positions) {
db::serializer<replay_position>::write(out, rp);
}
out.write<db_clock::rep>(truncated_at.time_since_epoch().count());
map_type_impl::native_type tmp;
tmp.emplace_back(cf.schema()->id(), data_value(buf));
@@ -610,7 +594,7 @@ future<> remove_truncation_record(utils::UUID id) {
});
}
static future<truncation_record> get_truncation_record(utils::UUID cf_id) {
static future<truncation_entry> get_truncation_record(utils::UUID cf_id) {
if (!truncation_records) {
sstring req = sprint("SELECT truncated_at FROM system.%s WHERE key = '%s'", LOCAL, LOCAL);
return qctx->qp().execute_internal(req).then([cf_id](::shared_ptr<cql3::untyped_result_set> rs) {
@@ -621,56 +605,22 @@ static future<truncation_record> get_truncation_record(utils::UUID cf_id) {
auto uuid = p.first;
auto buf = p.second;
try {
truncation_record e;
truncation_entry e;
if (buf.size() & 1) {
// new record.
if (buf[0] != current_version) {
logger.warn("Found truncation record of unknown version {}. Ignoring.", int(buf[0]));
continue;
}
e = ser::deserialize_from_buffer(buf, boost::type<truncation_record>(), 1);
if (e.magic == truncation_record::current_magic) {
tmp[uuid] = e;
continue;
}
} else {
// old scylla records. (We hope)
// Read 64+64 bit RP:s, even though the
// struct (and official serial size) is 64+32.
data_input in(buf);
data_input in(buf);
logger.debug("Reading old type record");
while (in.avail() > sizeof(db_clock::rep)) {
auto id = in.read<uint64_t>();
auto pos = in.read<uint64_t>();
e.positions.emplace_back(id, position_type(pos));
}
if (in.avail() == sizeof(db_clock::rep)) {
e.time_stamp = db_clock::time_point(db_clock::duration(in.read<db_clock::rep>()));
tmp[uuid] = e;
continue;
}
}
} catch (std::out_of_range &) {
while (in.avail() > sizeof(db_clock::rep)) {
e.first.emplace_back(db::serializer<replay_position>::read(in));
}
// Trying to load an origin table.
// This is useless to us, because the only usage for this
// data is commit log and batch replay, and we cannot replay
// either from origin anyway.
logger.warn("Error reading truncation record for {}. "
"Most likely this is data from a cassandra instance."
"Make sure you have cleared commit and batch logs before upgrading.",
uuid
);
e.second = db_clock::time_point(db_clock::duration(in.read<db_clock::rep>()));
tmp[uuid] = e;
}
}
truncation_records = std::move(tmp);
return get_truncation_record(cf_id);
});
}
return make_ready_future<truncation_record>((*truncation_records)[cf_id]);
return make_ready_future<truncation_entry>((*truncation_records)[cf_id]);
}
future<> save_truncation_record(const column_family& cf, db_clock::time_point truncated_at, db::replay_position rp) {
@@ -678,16 +628,16 @@ future<> save_truncation_record(const column_family& cf, db_clock::time_point tr
// once, for each core (calling us). But right now, redesigning so that calling here (or, rather,
// save_truncation_records), is done from "somewhere higher, once per machine, not shard" is tricky.
// Mainly because drop_tables also uses truncate. And is run per-core as well. Gah.
return get_truncation_record(cf.schema()->id()).then([&cf, truncated_at, rp](truncation_record e) {
auto i = std::find_if(e.positions.begin(), e.positions.end(), [rp](replay_position& p) {
return get_truncated_position(cf.schema()->id()).then([&cf, truncated_at, rp](replay_positions positions) {
auto i = std::find_if(positions.begin(), positions.end(), [rp](auto& p) {
return p.shard_id() == rp.shard_id();
});
if (i == e.positions.end()) {
e.positions.emplace_back(rp);
if (i == positions.end()) {
positions.emplace_back(rp);
} else {
*i = rp;
}
return save_truncation_records(cf, std::max(truncated_at, e.time_stamp), e.positions);
return save_truncation_records(cf, truncated_at, positions);
});
}
@@ -703,14 +653,14 @@ future<db::replay_position> get_truncated_position(utils::UUID cf_id, uint32_t s
}
future<replay_positions> get_truncated_position(utils::UUID cf_id) {
return get_truncation_record(cf_id).then([](truncation_record e) {
return make_ready_future<replay_positions>(e.positions);
return get_truncation_record(cf_id).then([](truncation_entry e) {
return make_ready_future<replay_positions>(e.first);
});
}
future<db_clock::time_point> get_truncated_at(utils::UUID cf_id) {
return get_truncation_record(cf_id).then([](truncation_record e) {
return make_ready_future<db_clock::time_point>(e.time_stamp);
return get_truncation_record(cf_id).then([](truncation_entry e) {
return make_ready_future<db_clock::time_point>(e.second);
});
}
@@ -1146,36 +1096,5 @@ future<std::vector<compaction_history_entry>> get_compaction_history()
});
}
future<int> increment_and_get_generation() {
auto req = sprint("SELECT gossip_generation FROM system.%s WHERE key='%s'", LOCAL, LOCAL);
return qctx->qp().execute_internal(req).then([] (auto rs) {
int generation;
if (rs->empty() || !rs->one().has("gossip_generation")) {
// seconds-since-epoch isn't a foolproof new generation
// (where foolproof is "guaranteed to be larger than the last one seen at this ip address"),
// but it's as close as sanely possible
generation = service::get_generation_number();
} else {
// Other nodes will ignore gossip messages about a node that have a lower generation than previously seen.
int stored_generation = rs->one().template get_as<int>("gossip_generation") + 1;
int now = service::get_generation_number();
if (stored_generation >= now) {
logger.warn("Using stored Gossip Generation {} as it is greater than current system time {}."
"See CASSANDRA-3654 if you experience problems", stored_generation, now);
generation = stored_generation;
} else {
generation = now;
}
}
auto req = sprint("INSERT INTO system.%s (key, gossip_generation) VALUES ('%s', ?)", LOCAL, LOCAL);
return qctx->qp().execute_internal(req, {generation}).then([generation] (auto rs) {
return force_blocking_flush(LOCAL);
}).then([generation] {
return make_ready_future<int>(generation);
});
});
}
} // namespace system_keyspace
} // namespace db

View File

@@ -401,9 +401,127 @@ enum class bootstrap_state {
*/
future<std::unordered_map<gms::inet_address, utils::UUID>> load_host_ids();
future<std::unordered_set<dht::token>> get_saved_tokens();
#if 0
/**
* Get preferred IP for given endpoint if it is known. Otherwise this returns given endpoint itself.
*
* @param ep endpoint address to check
* @return Preferred IP for given endpoint if present, otherwise returns given ep
*/
public static InetAddress getPreferredIP(InetAddress ep)
{
String req = "SELECT preferred_ip FROM system.%s WHERE peer=?";
UntypedResultSet result = executeInternal(String.format(req, PEERS), ep);
if (!result.isEmpty() && result.one().has("preferred_ip"))
return result.one().getInetAddress("preferred_ip");
return ep;
}
/**
* Return a map of IP addresses containing a map of dc and rack info
*/
public static Map<InetAddress, Map<String,String>> loadDcRackInfo()
{
Map<InetAddress, Map<String, String>> result = new HashMap<>();
for (UntypedResultSet.Row row : executeInternal("SELECT peer, data_center, rack from system." + PEERS))
{
InetAddress peer = row.getInetAddress("peer");
if (row.has("data_center") && row.has("rack"))
{
Map<String, String> dcRack = new HashMap<>();
dcRack.put("data_center", row.getString("data_center"));
dcRack.put("rack", row.getString("rack"));
result.put(peer, dcRack);
}
}
return result;
}
/**
* One of three things will happen if you try to read the system keyspace:
* 1. files are present and you can read them: great
* 2. no files are there: great (new node is assumed)
* 3. files are present but you can't read them: bad
* @throws ConfigurationException
*/
public static void checkHealth() throws ConfigurationException
{
Keyspace keyspace;
try
{
keyspace = Keyspace.open(NAME);
}
catch (AssertionError err)
{
// this happens when a user switches from OPP to RP.
ConfigurationException ex = new ConfigurationException("Could not read system keyspace!");
ex.initCause(err);
throw ex;
}
ColumnFamilyStore cfs = keyspace.getColumnFamilyStore(LOCAL);
String req = "SELECT cluster_name FROM system.%s WHERE key='%s'";
UntypedResultSet result = executeInternal(String.format(req, LOCAL, LOCAL));
if (result.isEmpty() || !result.one().has("cluster_name"))
{
// this is a brand new node
if (!cfs.getSSTables().isEmpty())
throw new ConfigurationException("Found system keyspace files, but they couldn't be loaded!");
// no system files. this is a new node.
req = "INSERT INTO system.%s (key, cluster_name) VALUES ('%s', ?)";
executeInternal(String.format(req, LOCAL, LOCAL), DatabaseDescriptor.getClusterName());
return;
}
String savedClusterName = result.one().getString("cluster_name");
if (!DatabaseDescriptor.getClusterName().equals(savedClusterName))
throw new ConfigurationException("Saved cluster name " + savedClusterName + " != configured name " + DatabaseDescriptor.getClusterName());
}
#endif
future<std::unordered_set<dht::token>> get_saved_tokens();
#if 0
public static int incrementAndGetGeneration()
{
String req = "SELECT gossip_generation FROM system.%s WHERE key='%s'";
UntypedResultSet result = executeInternal(String.format(req, LOCAL, LOCAL));
int generation;
if (result.isEmpty() || !result.one().has("gossip_generation"))
{
// seconds-since-epoch isn't a foolproof new generation
// (where foolproof is "guaranteed to be larger than the last one seen at this ip address"),
// but it's as close as sanely possible
generation = (int) (System.currentTimeMillis() / 1000);
}
else
{
// Other nodes will ignore gossip messages about a node that have a lower generation than previously seen.
final int storedGeneration = result.one().getInt("gossip_generation") + 1;
final int now = (int) (System.currentTimeMillis() / 1000);
if (storedGeneration >= now)
{
logger.warn("Using stored Gossip Generation {} as it is greater than current system time {}. See CASSANDRA-3654 if you experience problems",
storedGeneration, now);
generation = storedGeneration;
}
else
{
generation = now;
}
}
req = "INSERT INTO system.%s (key, gossip_generation) VALUES ('%s', ?)";
executeInternal(String.format(req, LOCAL, LOCAL), generation);
forceBlockingFlush(LOCAL);
return generation;
}
#endif
future<int> increment_and_get_generation();
bool bootstrap_complete();
bool bootstrap_in_progress();
bootstrap_state get_bootstrap_state();

View File

@@ -263,6 +263,29 @@ int token_comparator::operator()(const token& t1, const token& t2) const {
return tri_compare(t1, t2);
}
void token::serialize(bytes::iterator& out) const {
uint8_t kind = _kind == dht::token::kind::before_all_keys ? 0 :
_kind == dht::token::kind::key ? 1 : 2;
serialize_int8(out, kind);
serialize_int16(out, _data.size());
out = std::copy(_data.begin(), _data.end(), out);
}
token token::deserialize(bytes_view& in) {
uint8_t kind = read_simple<uint8_t>(in);
size_t size = read_simple<uint16_t>(in);
return token(kind == 0 ? dht::token::kind::before_all_keys :
kind == 1 ? dht::token::kind::key :
dht::token::kind::after_all_keys,
to_bytes(read_simple_bytes(in, size)));
}
size_t token::serialized_size() const {
return serialize_int8_size // token::kind;
+ serialize_int16_size // token size
+ _data.size();
}
bool ring_position::equal(const schema& s, const ring_position& other) const {
return tri_compare(s, other) == 0;
}

View File

@@ -97,6 +97,11 @@ public:
bool is_maximum() const {
return _kind == kind::after_all_keys;
}
void serialize(bytes::iterator& out) const;
static token deserialize(bytes_view& in);
size_t serialized_size() const;
};
token midpoint_unsigned(const token& t1, const token& t2);

102
dist/ami/build_ami.sh vendored
View File

@@ -6,100 +6,22 @@ if [ ! -e dist/ami/build_ami.sh ]; then
fi
print_usage() {
echo "build_ami.sh --localrpm --unstable"
echo " --localrpm deploy locally built rpms"
echo " --unstable use unstable branch"
echo "build_ami.sh -l"
echo " -l deploy locally built rpms"
exit 1
}
LOCALRPM=0
while [ $# -gt 0 ]; do
case "$1" in
"--localrpm")
while getopts lh OPT; do
case "$OPT" in
"l")
LOCALRPM=1
INSTALL_ARGS="$INSTALL_ARGS --localrpm"
shift 1
;;
"--unstable")
INSTALL_ARGS="$INSTALL_ARGS --unstable"
shift 1
;;
*)
"h")
print_usage
;;
esac
done
. /etc/os-release
case "$ID" in
"centos")
AMI=ami-f3102499
REGION=us-east-1
SSH_USERNAME=centos
;;
"ubuntu")
AMI=ami-ff427095
REGION=us-east-1
SSH_USERNAME=ubuntu
;;
*)
echo "build_ami.sh does not supported this distribution."
exit 1
;;
esac
if [ $LOCALRPM -eq 1 ]; then
if [ "$ID" = "centos" ]; then
rm -rf build/*
sudo yum -y install git
if [ ! -f dist/ami/files/scylla-server.x86_64.rpm ]; then
dist/redhat/build_rpm.sh
cp build/rpmbuild/RPMS/x86_64/scylla-server-`cat build/SCYLLA-VERSION-FILE`-`cat build/SCYLLA-RELEASE-FILE`.*.x86_64.rpm dist/ami/files/scylla-server.x86_64.rpm
fi
if [ ! -f dist/ami/files/scylla-jmx.noarch.rpm ]; then
cd build
git clone --depth 1 https://github.com/scylladb/scylla-jmx.git
cd scylla-jmx
sh -x -e dist/redhat/build_rpm.sh $*
cd ../..
cp build/scylla-jmx/build/rpmbuild/RPMS/noarch/scylla-jmx-`cat build/scylla-jmx/build/SCYLLA-VERSION-FILE`-`cat build/scylla-jmx/build/SCYLLA-RELEASE-FILE`.*.noarch.rpm dist/ami/files/scylla-jmx.noarch.rpm
fi
if [ ! -f dist/ami/files/scylla-tools.noarch.rpm ]; then
cd build
git clone --depth 1 https://github.com/scylladb/scylla-tools-java.git
cd scylla-tools-java
sh -x -e dist/redhat/build_rpm.sh
cd ../..
cp build/scylla-tools-java/build/rpmbuild/RPMS/noarch/scylla-tools-`cat build/scylla-tools-java/build/SCYLLA-VERSION-FILE`-`cat build/scylla-tools-java/build/SCYLLA-RELEASE-FILE`.*.noarch.rpm dist/ami/files/scylla-tools.noarch.rpm
fi
else
sudo apt-get install -y git
if [ ! -f dist/ami/files/scylla-server_amd64.deb ]; then
if [ ! -f ../scylla-server_`cat build/SCYLLA-VERSION-FILE | sed 's/\.rc/~rc/'`-`cat build/SCYLLA-RELEASE-FILE`-ubuntu1_amd64.deb ]; then
echo "Build .deb before running build_ami.sh"
exit 1
fi
cp ../scylla-server_`cat build/SCYLLA-VERSION-FILE | sed 's/\.rc/~rc/'`-`cat build/SCYLLA-RELEASE-FILE`-ubuntu1_amd64.deb dist/ami/files/scylla-server_amd64.deb
fi
if [ ! -f dist/ami/files/scylla-jmx_all.deb ]; then
cd build
git clone --depth 1 https://github.com/scylladb/scylla-jmx.git
cd scylla-jmx
sh -x -e dist/ubuntu/build_deb.sh $*
cd ../..
cp build/scylla-jmx_`cat build/scylla-jmx/build/SCYLLA-VERSION-FILE | sed 's/\.rc/~rc/'`-`cat build/scylla-jmx/build/SCYLLA-RELEASE-FILE`-ubuntu1_all.deb dist/ami/files/scylla-jmx_all.deb
fi
if [ ! -f dist/ami/files/scylla-tools_all.deb ]; then
cd build
git clone --depth 1 https://github.com/scylladb/scylla-tools-java.git
cd scylla-tools-java
sh -x -e dist/ubuntu/build_deb.sh $*
cd ../..
cp build/scylla-tools_`cat build/scylla-tools-java/build/SCYLLA-VERSION-FILE | sed 's/\.rc/~rc/'`-`cat build/scylla-tools-java/build/SCYLLA-RELEASE-FILE`-ubuntu1_all.deb dist/ami/files/scylla-tools_all.deb
fi
fi
fi
cd dist/ami
if [ ! -f variables.json ]; then
@@ -108,11 +30,19 @@ if [ ! -f variables.json ]; then
fi
if [ ! -d packer ]; then
wget https://releases.hashicorp.com/packer/0.8.6/packer_0.8.6_linux_amd64.zip
wget https://dl.bintray.com/mitchellh/packer/packer_0.8.6_linux_amd64.zip
mkdir packer
cd packer
unzip -x ../packer_0.8.6_linux_amd64.zip
cd -
fi
packer/packer build -var-file=variables.json -var install_args="$INSTALL_ARGS" -var region="$REGION" -var source_ami="$AMI" -var ssh_username="$SSH_USERNAME" scylla.json
if [ $LOCALRPM = 0 ]; then
echo "sudo yum remove -y abrt; sudo sh -x -e /home/centos/scylla_install_pkg; sudo sh -x -e /usr/lib/scylla/scylla_setup -a" > scylla_deploy.sh
else
echo "sudo yum remove -y abrt; sudo sh -x -e /home/centos/scylla_install_pkg -l /home/centos; sudo sh -x -e /usr/lib/scylla/scylla_setup -a" > scylla_deploy.sh
fi
chmod a+rx scylla_deploy.sh
packer/packer build -var-file=variables.json scylla.json

31
dist/ami/build_ami_local.sh vendored Executable file
View File

@@ -0,0 +1,31 @@
#!/bin/sh -e
if [ ! -e dist/ami/build_ami_local.sh ]; then
echo "run build_ami_local.sh in top of scylla dir"
exit 1
fi
rm -rf build/*
sudo yum -y install git
if [ ! -f dist/ami/files/scylla-server.x86_64.rpm ]; then
dist/redhat/build_rpm.sh
cp build/rpmbuild/RPMS/x86_64/scylla-server-`cat build/SCYLLA-VERSION-FILE`-`cat build/SCYLLA-RELEASE-FILE`.*.x86_64.rpm dist/ami/files/scylla-server.x86_64.rpm
fi
if [ ! -f dist/ami/files/scylla-jmx.noarch.rpm ]; then
cd build
git clone --depth 1 https://github.com/scylladb/scylla-jmx.git
cd scylla-jmx
sh -x -e dist/redhat/build_rpm.sh $*
cd ../..
cp build/scylla-jmx/build/rpmbuild/RPMS/noarch/scylla-jmx-`cat build/scylla-jmx/build/SCYLLA-VERSION-FILE`-`cat build/scylla-jmx/build/SCYLLA-RELEASE-FILE`.*.noarch.rpm dist/ami/files/scylla-jmx.noarch.rpm
fi
if [ ! -f dist/ami/files/scylla-tools.noarch.rpm ]; then
cd build
git clone --depth 1 https://github.com/scylladb/scylla-tools-java.git
cd scylla-tools-java
sh -x -e dist/redhat/build_rpm.sh
cd ../..
cp build/scylla-tools-java/build/rpmbuild/RPMS/noarch/scylla-tools-`cat build/scylla-tools-java/build/SCYLLA-VERSION-FILE`-`cat build/scylla-tools-java/build/SCYLLA-RELEASE-FILE`.*.noarch.rpm dist/ami/files/scylla-tools.noarch.rpm
fi
exec dist/ami/build_ami.sh -l

View File

@@ -30,21 +30,7 @@ echo 'More documentation available at: '
echo ' http://www.scylladb.com/doc/'
echo
. /etc/os-release
if [ "$ID" = "ubuntu" ]; then
if [ "`initctl status ssh|grep "running, process"`" != "" ]; then
STARTED=1
else
STARTED=0
fi
else
if [ "`systemctl is-active scylla-server`" = "active" ]; then
STARTED=1
else
STARTED=0
fi
fi
if [ $STARTED -eq 1 ]; then
if [ "`systemctl is-active scylla-server`" = "active" ]; then
tput setaf 4
tput bold
echo " ScyllaDB is active."
@@ -56,13 +42,6 @@ else
echo " ScyllaDB is not started!"
tput sgr0
echo "Please wait for startup. To see status of ScyllaDB, run "
if [ "$ID" = "ubuntu" ]; then
echo " 'initctl status scylla-server'"
echo "and"
echo " 'cat /var/log/upstart/scylla-server.log'"
echo
else
echo " 'systemctl status scylla-server'"
echo
fi
echo " 'systemctl status scylla-server'"
echo
fi

58
dist/ami/scylla.json vendored
View File

@@ -8,52 +8,16 @@
"security_group_id": "{{user `security_group_id`}}",
"region": "{{user `region`}}",
"associate_public_ip_address": "{{user `associate_public_ip_address`}}",
"source_ami": "{{user `source_ami`}}",
"source_ami": "ami-8ef1d6e4",
"user_data_file": "user_data.txt",
"instance_type": "{{user `instance_type`}}",
"ssh_username": "{{user `ssh_username`}}",
"ssh_username": "centos",
"ssh_timeout": "5m",
"ami_name": "{{user `ami_prefix`}}scylla_{{isotime | clean_ami_name}}",
"enhanced_networking": true,
"ami_name": "scylla_{{isotime | clean_ami_name}}",
"launch_block_device_mappings": [
{
"device_name": "/dev/sda1",
"volume_size": 10,
"delete_on_termination": true
}
],
"ami_block_device_mappings": [
{
"device_name": "/dev/sdb",
"virtual_name": "ephemeral0"
},
{
"device_name": "/dev/sdc",
"virtual_name": "ephemeral1"
},
{
"device_name": "/dev/sdd",
"virtual_name": "ephemeral2"
},
{
"device_name": "/dev/sde",
"virtual_name": "ephemeral3"
},
{
"device_name": "/dev/sdf",
"virtual_name": "ephemeral4"
},
{
"device_name": "/dev/sdg",
"virtual_name": "ephemeral5"
},
{
"device_name": "/dev/sdh",
"virtual_name": "ephemeral6"
},
{
"device_name": "/dev/sdi",
"virtual_name": "ephemeral7"
"volume_size": 10
}
]
}
@@ -62,18 +26,16 @@
{
"type": "file",
"source": "files/",
"destination": "/home/{{user `ssh_username`}}/"
"destination": "/home/centos/"
},
{
"type": "file",
"source": "../../scripts/scylla_install_pkg",
"destination": "/home/{{user `ssh_username`}}/scylla_install_pkg"
"destination": "/home/centos/scylla_install_pkg"
},
{
"type": "shell",
"inline": [
"sudo /home/{{user `ssh_username`}}/scylla-ami/scylla_install_ami {{ user `install_args` }}"
]
"script": "scylla_deploy.sh"
}
],
"variables": {
@@ -83,10 +45,6 @@
"security_group_id": "",
"region": "",
"associate_public_ip_address": "",
"instance_type": "",
"install_args": "",
"ami_prefix": "",
"source_ami": "",
"ssh_username": ""
"instance_type": ""
}
}

View File

@@ -1,5 +0,0 @@
#!/bin/sh -e
#
# Copyright (C) 2016 ScyllaDB
exec python /usr/lib/scylla/scyllatop/scyllatop.py $@

View File

@@ -1,16 +0,0 @@
LoadPlugin network
LoadPlugin unixsock
# dummy write_graphite to silent noisy warning
LoadPlugin network
<Plugin "network">
Server "127.0.0.1 65534"
</Plugin>
<Plugin network>
Listen "127.0.0.1" "25826"
</Plugin>
<Plugin unixsock>
SocketFile "/var/run/collectd-unixsock"
SocketPerms "0666"
</Plugin>

View File

@@ -2,25 +2,6 @@
#
# Copyright (C) 2015 ScyllaDB
print_usage() {
echo "scylla_bootparam_setup --ami"
echo " --ami setup AMI instance"
exit 1
}
AMI_OPT=0
while [ $# -gt 0 ]; do
case "$1" in
"--ami")
AMI_OPT=1
shift 1
;;
*)
print_usage
;;
esac
done
. /etc/os-release
if [ ! -f /etc/default/grub ]; then
@@ -33,11 +14,7 @@ if [ "`grep hugepagesz /etc/default/grub`" != "" ] || [ "`grep hugepages /etc/de
sed -e "s#hugepages=[0-9]* ##" /etc/default/grub > /tmp/grub
mv /tmp/grub /etc/default/grub
fi
if [ $AMI_OPT -eq 1 ]; then
sed -e "s#^GRUB_CMDLINE_LINUX=\"#GRUB_CMDLINE_LINUX=\"clocksource=tsc tsc=reliable hugepagesz=2M hugepages=$NR_HUGEPAGES #" /etc/default/grub > /tmp/grub
else
sed -e "s#^GRUB_CMDLINE_LINUX=\"#GRUB_CMDLINE_LINUX=\"hugepagesz=2M hugepages=$NR_HUGEPAGES #" /etc/default/grub > /tmp/grub
fi
sed -e "s#^GRUB_CMDLINE_LINUX=\"#GRUB_CMDLINE_LINUX=\"hugepagesz=2M hugepages=$NR_HUGEPAGES #" /etc/default/grub > /tmp/grub
mv /tmp/grub /etc/default/grub
if [ "$ID" = "ubuntu" ]; then
grub-mkconfig -o /boot/grub/grub.cfg

View File

@@ -3,19 +3,18 @@
# Copyright (C) 2015 ScyllaDB
print_usage() {
echo "scylla_coredump_setup --dump-to-raiddir"
echo " --dump-to-raiddir store coredump to /var/lib/scylla"
echo "scylla_coredump_setup -s"
echo " -s store coredump to /var/lib/scylla"
exit 1
}
SYMLINK=0
while [ $# -gt 0 ]; do
case "$1" in
"--dump-to-raiddir")
while getopts sh OPT; do
case "$OPT" in
"s")
SYMLINK=1
shift 1
;;
*)
"h")
print_usage
;;
esac

View File

@@ -1,31 +0,0 @@
#!/bin/sh -e
#
# Copyright (C) 2015 ScyllaDB
print_usage() {
echo "scylla_developer_mode_setup --developer-mode=[0|1]"
echo " --developer-mode enable/disable developer mode"
exit 1
}
DEV_MODE=
while [ $# -gt 0 ]; do
case "$1" in
"--developer-mode")
DEV_MODE=$2
shift 2
;;
*)
print_usage
;;
esac
done
if [ "$DEV_MODE" = "" ]; then
print_usage
fi
if [ "$DEV_MODE" != "0" ] && [ "$DEV_MODE" != "1" ]; then
print_usage
fi
echo "DEV_MODE=--developer-mode=$DEV_MODE" > /etc/scylla.d/dev-mode.conf

View File

@@ -1,80 +0,0 @@
#!/bin/sh
print_usage() {
echo "scylla_io_setup --ami"
echo " --ami setup AMI instance"
exit 1
}
AMI_OPT=0
while [ $# -gt 0 ]; do
case "$1" in
"--ami")
AMI_OPT=1
shift 1
;;
*)
print_usage
;;
esac
done
is_developer_mode() {
cat /etc/scylla.d/dev-mode.conf|egrep -c "\-\-developer-mode(\s+|=)(1|true)"
}
output_to_user()
{
echo "$1"
logger -p user.err "$1"
}
. /etc/os-release
if [ "$NAME" = "Ubuntu" ]; then
. /etc/default/scylla-server
else
. /etc/sysconfig/scylla-server
fi
if [ `is_developer_mode` -eq 0 ]; then
SMP=`echo $SCYLLA_ARGS|grep smp|sed -e "s/^.*smp\(\s\+\|=\)\([0-9]*\).*$/\2/"`
CPUSET=`echo $SCYLLA_ARGS|grep cpuset|sed -e "s/^.*\(--cpuset\(\s\+\|=\)[0-9\-]*\).*$/\1/"`
if [ $AMI_OPT -eq 1 ]; then
NR_CPU=`cat /proc/cpuinfo |grep processor|wc -l`
NR_DISKS=`lsblk --list --nodeps --noheadings | grep -v xvda | grep xvd | wc -l`
TYPE=`curl http://169.254.169.254/latest/meta-data/instance-type|cut -d . -f 1`
if [ "$SMP" != "" ]; then
NR_CPU=$SMP
fi
NR_SHARDS=$NR_CPU
if [ $NR_CPU -ge 8 ] && [ "$SET_NIC" = "no" ]; then
NR_SHARDS=$((NR_CPU - 1))
fi
if [ $NR_DISKS -lt 2 ]; then NR_DISKS=2; fi
NR_REQS=$((32 * $NR_DISKS / 2))
NR_IO_QUEUES=$NR_SHARDS
if [ $(($NR_REQS/$NR_IO_QUEUES)) -lt 4 ]; then
NR_IO_QUEUES=$(($NR_REQS / 4))
fi
NR_IO_QUEUES=$((NR_IO_QUEUES>NR_SHARDS?NR_SHARDS:NR_IO_QUEUES))
NR_REQS=$(($(($NR_REQS / $NR_IO_QUEUES)) * $NR_IO_QUEUES))
if [ "$TYPE" = "i2" ]; then
NR_REQS=$(($NR_REQS * 2))
fi
echo "SEASTAR_IO=\"--num-io-queues $NR_IO_QUEUES --max-io-requests $NR_REQS\"" > /etc/scylla.d/io.conf
else
iotune --evaluation-directory /var/lib/scylla --format envfile --options-file /etc/scylla.d/io.conf $CPUSET
if [ $? -ne 0 ]; then
output_to_user "/var/lib/scylla did not pass validation tests, it may not be on XFS and/or has limited disk space."
output_to_user "This is a non-supported setup, and performance is expected to be very bad."
output_to_user "For better performance, placing your data on XFS-formatted directories is required."
output_to_user " To override this error, see the developer_mode configuration option."
fi
fi
fi

Some files were not shown because too many files have changed in this diff Show More