Compare commits
12 Commits
branch-0.1
...
branch-0.1
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d3a05737f7 | ||
|
|
21c68d3da9 | ||
|
|
88d544ed14 | ||
|
|
638c0c0ea8 | ||
|
|
f3e96e0f0b | ||
|
|
fa15440665 | ||
|
|
c4d66a3beb | ||
|
|
5023f9bbab | ||
|
|
3efc145562 | ||
|
|
1ad638f8bf | ||
|
|
43f4a8031d | ||
|
|
27dbbe1ca4 |
3
.gitignore
vendored
3
.gitignore
vendored
@@ -5,6 +5,3 @@ build
|
||||
build.ninja
|
||||
cscope.*
|
||||
/debian/
|
||||
dist/ami/files/*.rpm
|
||||
dist/ami/variables.json
|
||||
dist/ami/scylla_deploy.sh
|
||||
|
||||
2
.gitmodules
vendored
2
.gitmodules
vendored
@@ -1,6 +1,6 @@
|
||||
[submodule "seastar"]
|
||||
path = seastar
|
||||
url = ../scylla-seastar
|
||||
url = ../seastar
|
||||
ignore = dirty
|
||||
[submodule "swagger-ui"]
|
||||
path = swagger-ui
|
||||
|
||||
103
IDL.md
103
IDL.md
@@ -1,103 +0,0 @@
|
||||
#IDL definition
|
||||
The schema we use similar to c++ schema.
|
||||
Use class or struct similar to the object you need the serializer for.
|
||||
Use namespace when applicable.
|
||||
|
||||
##keywords
|
||||
* class/struct - a class or a struct like C++
|
||||
class/struct can have final or stub marker
|
||||
* namespace - has the same C++ meaning
|
||||
* enum class - has the same C++ meaning
|
||||
* final modifier for class - when a class mark as final it will not contain a size parameter. Note that final class cannot be extended by future version, so use with care
|
||||
* stub class - when a class is mark as stub, it means that no code will be generated for this class and it is only there as a documentation.
|
||||
* version attributes - mark with [[version id ]] mark that a field is available from a specific version
|
||||
* template - A template class definition like C++
|
||||
##Syntax
|
||||
|
||||
###Namespace
|
||||
```
|
||||
namespace ns_name { namespace-body }
|
||||
```
|
||||
* ns_name: either a previously unused identifier, in which case this is original-namespace-definition or the name of a namespace, in which case this is extension-namespace-definition
|
||||
* namespace-body: possibly empty sequence of declarations of any kind (including class and struct definitions as well as nested namespaces)
|
||||
|
||||
###class/struct
|
||||
`
|
||||
class-key class-name final(optional) stub(optional) { member-specification } ;(optional)
|
||||
`
|
||||
* class-key: one of class or struct.
|
||||
* class-name: the name of the class that's being defined. optionally followed by keyword final, optionally followed by keyword stub
|
||||
* final: when a class mark as final, it means it can not be extended and there is no need to serialize its size, use with care.
|
||||
* stub: when a class is mark as stub, it means no code will generate for it and it is added for documentation only.
|
||||
* member-specification: list of access specifiers, and public member accessor see class member below.
|
||||
* to be compatible with C++ a class definition can be followed by a semicolon.
|
||||
###enum
|
||||
`enum-key identifier enum-base { enumerator-list(optional) }`
|
||||
* enum-key: only enum class is supported
|
||||
* identifier: the name of the enumeration that's being declared.
|
||||
* enum-base: colon (:), followed by a type-specifier-seq that names an integral type (see the C++ standard for the full list of all possible integral types).
|
||||
* enumerator-list: comma-separated list of enumerator definitions, each of which is either simply an identifier, which becomes the name of the enumerator, or an identifier with an initializer: identifier = integral value.
|
||||
Note that though C++ allows constexpr as an initialize value, it makes the documentation less readable, hence is not permitted.
|
||||
|
||||
###class member
|
||||
`type member-access attributes(optional) default-value(optional);`
|
||||
* type: Any valid C++ type, following the C++ notation. note that there should be a serializer for the type, but deceleration order is not mandatory
|
||||
* member-access: is the way the member can be access. If the member is public it can be the name itself. if not it could be a getter function that should be followed by braces. Note that getter can (and probably should) be const methods.
|
||||
* attributes: Attributes define by square brackets. Currently are use to mark a version in which a specific member was added [ [ version version-number] ] would mark that the specific member was added in the given version number.
|
||||
|
||||
###template
|
||||
`template < parameter-list > class-declaration`
|
||||
* parameter-list - a non-empty comma-separated list of the template parameters.
|
||||
* class-decleration - (See class section) The class name declared become a template name.
|
||||
|
||||
##IDL example
|
||||
Forward slashes comments are ignored until the end of the line.
|
||||
```
|
||||
namespace utils {
|
||||
// An example of a stub class
|
||||
class UUID stub {
|
||||
int64_t most_sig_bits;
|
||||
int64_t least_sig_bits;
|
||||
}
|
||||
}
|
||||
|
||||
namespace gms {
|
||||
//an enum example
|
||||
enum class application_state:int {STATUS = 0,
|
||||
LOAD,
|
||||
SCHEMA,
|
||||
DC};
|
||||
|
||||
// example of final class
|
||||
class versioned_value final {
|
||||
// getter and setter as public member
|
||||
int version;
|
||||
sstring value;
|
||||
}
|
||||
|
||||
class heart_beat_state {
|
||||
//getter as function
|
||||
int32_t get_generation();
|
||||
//default value example
|
||||
int32_t get_heart_beat_version() = 1;
|
||||
}
|
||||
|
||||
class endpoint_state {
|
||||
heart_beat_state get_heart_beat_state();
|
||||
std::map<application_state, versioned_value> get_application_state_map();
|
||||
}
|
||||
|
||||
class gossip_digest {
|
||||
inet_address get_endpoint();
|
||||
int32_t get_generation();
|
||||
//mark that a field was added on a specific version
|
||||
int32_t get_max_version() [ [version 0.14.2] ];
|
||||
}
|
||||
|
||||
class gossip_digest_ack {
|
||||
std::vector<gossip_digest> digests();
|
||||
std::map<inet_address, gms::endpoint_state> get_endpoint_state_map();
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
@@ -15,13 +15,13 @@ git submodule update --recursive
|
||||
* Installing required packages:
|
||||
|
||||
```
|
||||
sudo yum install yaml-cpp-devel lz4-devel zlib-devel snappy-devel jsoncpp-devel thrift-devel antlr3-tool antlr3-C++-devel libasan libubsan gcc-c++ gnutls-devel ninja-build ragel libaio-devel cryptopp-devel xfsprogs-devel numactl-devel hwloc-devel libpciaccess-devel libxml2-devel python3-pyparsing
|
||||
sudo yum install yaml-cpp-devel lz4-devel zlib-devel snappy-devel jsoncpp-devel thrift-devel antlr3-tool antlr3-C++-devel libasan libubsan
|
||||
```
|
||||
|
||||
* Build Scylla
|
||||
```
|
||||
./configure.py --mode=release --with=scylla --disable-xen
|
||||
ninja-build build/release/scylla -j2 # you can use more cpus if you have tons of RAM
|
||||
ninja build/release/scylla -j2 # you can use more cpus if you have tons of RAM
|
||||
|
||||
```
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
#!/bin/sh
|
||||
|
||||
VERSION=0.18.2
|
||||
VERSION=0.14.1
|
||||
|
||||
if test -f version
|
||||
then
|
||||
|
||||
@@ -106,7 +106,7 @@
|
||||
"required":true,
|
||||
"allowMultiple":false,
|
||||
"type":"string",
|
||||
"paramType":"query"
|
||||
"paramType":"string"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
@@ -196,10 +196,6 @@
|
||||
"value": {
|
||||
"type": "string",
|
||||
"description": "The version value"
|
||||
},
|
||||
"version": {
|
||||
"type": "int",
|
||||
"description": "The application state version"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -233,27 +233,46 @@
|
||||
"verb":{
|
||||
"type":"string",
|
||||
"enum":[
|
||||
"CLIENT_ID",
|
||||
"MUTATION",
|
||||
"MUTATION_DONE",
|
||||
"READ_DATA",
|
||||
"READ_MUTATION_DATA",
|
||||
"READ_DIGEST",
|
||||
"GOSSIP_ECHO",
|
||||
"GOSSIP_DIGEST_SYN",
|
||||
"GOSSIP_DIGEST_ACK2",
|
||||
"GOSSIP_SHUTDOWN",
|
||||
"DEFINITIONS_UPDATE",
|
||||
"TRUNCATE",
|
||||
"REPLICATION_FINISHED",
|
||||
"MIGRATION_REQUEST",
|
||||
"PREPARE_MESSAGE",
|
||||
"PREPARE_DONE_MESSAGE",
|
||||
"STREAM_MUTATION",
|
||||
"STREAM_MUTATION_DONE",
|
||||
"COMPLETE_MESSAGE",
|
||||
"REPAIR_CHECKSUM_RANGE",
|
||||
"GET_SCHEMA_VERSION"
|
||||
"MUTATION",
|
||||
"BINARY",
|
||||
"READ_REPAIR",
|
||||
"READ",
|
||||
"REQUEST_RESPONSE",
|
||||
"STREAM_INITIATE",
|
||||
"STREAM_INITIATE_DONE",
|
||||
"STREAM_REPLY",
|
||||
"STREAM_REQUEST",
|
||||
"RANGE_SLICE",
|
||||
"BOOTSTRAP_TOKEN",
|
||||
"TREE_REQUEST",
|
||||
"TREE_RESPONSE",
|
||||
"JOIN",
|
||||
"GOSSIP_DIGEST_SYN",
|
||||
"GOSSIP_DIGEST_ACK",
|
||||
"GOSSIP_DIGEST_ACK2",
|
||||
"DEFINITIONS_ANNOUNCE",
|
||||
"DEFINITIONS_UPDATE",
|
||||
"TRUNCATE",
|
||||
"SCHEMA_CHECK",
|
||||
"INDEX_SCAN",
|
||||
"REPLICATION_FINISHED",
|
||||
"INTERNAL_RESPONSE",
|
||||
"COUNTER_MUTATION",
|
||||
"STREAMING_REPAIR_REQUEST",
|
||||
"STREAMING_REPAIR_RESPONSE",
|
||||
"SNAPSHOT",
|
||||
"MIGRATION_REQUEST",
|
||||
"GOSSIP_SHUTDOWN",
|
||||
"_TRACE",
|
||||
"ECHO",
|
||||
"REPAIR_MESSAGE",
|
||||
"PAXOS_PREPARE",
|
||||
"PAXOS_PROPOSE",
|
||||
"PAXOS_COMMIT",
|
||||
"PAGED_RANGE",
|
||||
"UNUSED_1",
|
||||
"UNUSED_2",
|
||||
"UNUSED_3"
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
@@ -425,7 +425,7 @@
|
||||
"summary":"load value. Keys are IP addresses",
|
||||
"type":"array",
|
||||
"items":{
|
||||
"type":"map_string_double"
|
||||
"type":"double_mapper"
|
||||
},
|
||||
"nickname":"get_load_map",
|
||||
"produces":[
|
||||
@@ -2028,8 +2028,8 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"map_string_double":{
|
||||
"id":"map_string_double",
|
||||
"double_mapper":{
|
||||
"id":"double_mapper",
|
||||
"description":"A key value mapping between a string and a double",
|
||||
"properties":{
|
||||
"key":{
|
||||
|
||||
127
api/api.cc
127
api/api.cc
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright 2015 ScyllaDB
|
||||
* Copyright 2015 Cloudius Systems
|
||||
*/
|
||||
|
||||
/*
|
||||
@@ -52,98 +52,67 @@ static std::unique_ptr<reply> exception_reply(std::exception_ptr eptr) {
|
||||
return std::make_unique<reply>();
|
||||
}
|
||||
|
||||
future<> set_server_init(http_context& ctx) {
|
||||
future<> set_server(http_context& ctx) {
|
||||
auto rb = std::make_shared < api_registry_builder > (ctx.api_doc);
|
||||
|
||||
return ctx.http_server.set_routes([rb, &ctx](routes& r) {
|
||||
r.register_exeption_handler(exception_reply);
|
||||
httpd::directory_handler* dir = new httpd::directory_handler(ctx.api_dir,
|
||||
new content_replace("html"));
|
||||
r.put(GET, "/ui", new httpd::file_handler(ctx.api_dir + "/index.html",
|
||||
new content_replace("html")));
|
||||
r.add(GET, url("/ui").remainder("path"), new httpd::directory_handler(ctx.api_dir,
|
||||
new content_replace("html")));
|
||||
rb->register_function(r, "system",
|
||||
"The system related API");
|
||||
set_system(ctx, r);
|
||||
r.add(GET, url("/ui").remainder("path"), dir);
|
||||
|
||||
rb->set_api_doc(r);
|
||||
});
|
||||
}
|
||||
rb->register_function(r, "storage_service",
|
||||
"The storage service API");
|
||||
set_storage_service(ctx,r);
|
||||
rb->register_function(r, "commitlog",
|
||||
"The commit log API");
|
||||
set_commitlog(ctx,r);
|
||||
rb->register_function(r, "gossiper",
|
||||
"The gossiper API");
|
||||
set_gossiper(ctx,r);
|
||||
rb->register_function(r, "column_family",
|
||||
"The column family API");
|
||||
set_column_family(ctx, r);
|
||||
|
||||
static future<> register_api(http_context& ctx, const sstring& api_name,
|
||||
const sstring api_desc,
|
||||
std::function<void(http_context& ctx, routes& r)> f) {
|
||||
auto rb = std::make_shared < api_registry_builder > (ctx.api_doc);
|
||||
|
||||
return ctx.http_server.set_routes([rb, &ctx, api_name, api_desc, f](routes& r) {
|
||||
rb->register_function(r, api_name, api_desc);
|
||||
f(ctx,r);
|
||||
});
|
||||
}
|
||||
|
||||
future<> set_server_storage_service(http_context& ctx) {
|
||||
return register_api(ctx, "storage_service", "The storage service API", set_storage_service);
|
||||
}
|
||||
|
||||
future<> set_server_gossip(http_context& ctx) {
|
||||
return register_api(ctx, "gossiper",
|
||||
"The gossiper API", set_gossiper);
|
||||
}
|
||||
|
||||
future<> set_server_load_sstable(http_context& ctx) {
|
||||
return register_api(ctx, "column_family",
|
||||
"The column family API", set_column_family);
|
||||
}
|
||||
|
||||
future<> set_server_messaging_service(http_context& ctx) {
|
||||
return register_api(ctx, "messaging_service",
|
||||
"The messaging service API", set_messaging_service);
|
||||
}
|
||||
|
||||
future<> set_server_storage_proxy(http_context& ctx) {
|
||||
return register_api(ctx, "storage_proxy",
|
||||
"The storage proxy API", set_storage_proxy);
|
||||
}
|
||||
|
||||
future<> set_server_stream_manager(http_context& ctx) {
|
||||
return register_api(ctx, "stream_manager",
|
||||
"The stream manager API", set_stream_manager);
|
||||
}
|
||||
|
||||
future<> set_server_gossip_settle(http_context& ctx) {
|
||||
auto rb = std::make_shared < api_registry_builder > (ctx.api_doc);
|
||||
|
||||
return ctx.http_server.set_routes([rb, &ctx](routes& r) {
|
||||
rb->register_function(r, "failure_detector",
|
||||
"The failure detector API");
|
||||
set_failure_detector(ctx,r);
|
||||
rb->register_function(r, "cache_service",
|
||||
"The cache service API");
|
||||
set_cache_service(ctx,r);
|
||||
|
||||
rb->register_function(r, "endpoint_snitch_info",
|
||||
"The endpoint snitch info API");
|
||||
set_endpoint_snitch(ctx, r);
|
||||
});
|
||||
}
|
||||
|
||||
future<> set_server_done(http_context& ctx) {
|
||||
auto rb = std::make_shared < api_registry_builder > (ctx.api_doc);
|
||||
|
||||
return ctx.http_server.set_routes([rb, &ctx](routes& r) {
|
||||
rb->register_function(r, "compaction_manager",
|
||||
"The Compaction manager API");
|
||||
set_compaction_manager(ctx, r);
|
||||
rb->register_function(r, "lsa", "Log-structured allocator API");
|
||||
set_lsa(ctx, r);
|
||||
|
||||
rb->register_function(r, "commitlog",
|
||||
"The commit log API");
|
||||
set_commitlog(ctx,r);
|
||||
rb->register_function(r, "hinted_handoff",
|
||||
"The hinted handoff API");
|
||||
set_hinted_handoff(ctx, r);
|
||||
rb->register_function(r, "failure_detector",
|
||||
"The failure detector API");
|
||||
set_failure_detector(ctx,r);
|
||||
|
||||
rb->register_function(r, "messaging_service",
|
||||
"The messaging service API");
|
||||
set_messaging_service(ctx, r);
|
||||
rb->register_function(r, "storage_proxy",
|
||||
"The storage proxy API");
|
||||
set_storage_proxy(ctx, r);
|
||||
|
||||
rb->register_function(r, "cache_service",
|
||||
"The cache service API");
|
||||
set_cache_service(ctx,r);
|
||||
rb->register_function(r, "collectd",
|
||||
"The collectd API");
|
||||
set_collectd(ctx, r);
|
||||
rb->register_function(r, "endpoint_snitch_info",
|
||||
"The endpoint snitch info API");
|
||||
set_endpoint_snitch(ctx, r);
|
||||
rb->register_function(r, "compaction_manager",
|
||||
"The Compaction manager API");
|
||||
set_compaction_manager(ctx, r);
|
||||
rb->register_function(r, "hinted_handoff",
|
||||
"The hinted handoff API");
|
||||
set_hinted_handoff(ctx, r);
|
||||
rb->register_function(r, "stream_manager",
|
||||
"The stream manager API");
|
||||
set_stream_manager(ctx, r);
|
||||
rb->register_function(r, "system",
|
||||
"The system related API");
|
||||
set_system(ctx, r);
|
||||
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
18
api/api.hh
18
api/api.hh
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright 2015 ScyllaDB
|
||||
* Copyright 2015 Cloudius Systems
|
||||
*/
|
||||
|
||||
/*
|
||||
@@ -21,17 +21,31 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "http/httpd.hh"
|
||||
#include "json/json_elements.hh"
|
||||
#include "database.hh"
|
||||
#include "service/storage_proxy.hh"
|
||||
#include <boost/lexical_cast.hpp>
|
||||
#include <boost/algorithm/string/split.hpp>
|
||||
#include <boost/algorithm/string/classification.hpp>
|
||||
#include "api/api-doc/utils.json.hh"
|
||||
#include "utils/histogram.hh"
|
||||
#include "http/exception.hh"
|
||||
#include "api_init.hh"
|
||||
|
||||
namespace api {
|
||||
|
||||
struct http_context {
|
||||
sstring api_dir;
|
||||
sstring api_doc;
|
||||
httpd::http_server_control http_server;
|
||||
distributed<database>& db;
|
||||
distributed<service::storage_proxy>& sp;
|
||||
http_context(distributed<database>& _db, distributed<service::storage_proxy>&
|
||||
_sp) : db(_db), sp(_sp) {}
|
||||
};
|
||||
|
||||
future<> set_server(http_context& ctx);
|
||||
|
||||
template<class T>
|
||||
std::vector<sstring> container_to_vec(const T& container) {
|
||||
std::vector<sstring> res;
|
||||
|
||||
@@ -1,51 +0,0 @@
|
||||
/*
|
||||
* Copyright 2016 ScylaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file is part of Scylla.
|
||||
*
|
||||
* Scylla is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Scylla is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
#pragma once
|
||||
#include "database.hh"
|
||||
#include "service/storage_proxy.hh"
|
||||
#include "http/httpd.hh"
|
||||
|
||||
namespace api {
|
||||
|
||||
struct http_context {
|
||||
sstring api_dir;
|
||||
sstring api_doc;
|
||||
httpd::http_server_control http_server;
|
||||
distributed<database>& db;
|
||||
distributed<service::storage_proxy>& sp;
|
||||
http_context(distributed<database>& _db,
|
||||
distributed<service::storage_proxy>& _sp)
|
||||
: db(_db), sp(_sp) {
|
||||
}
|
||||
};
|
||||
|
||||
future<> set_server_init(http_context& ctx);
|
||||
future<> set_server_storage_service(http_context& ctx);
|
||||
future<> set_server_gossip(http_context& ctx);
|
||||
future<> set_server_load_sstable(http_context& ctx);
|
||||
future<> set_server_messaging_service(http_context& ctx);
|
||||
future<> set_server_storage_proxy(http_context& ctx);
|
||||
future<> set_server_stream_manager(http_context& ctx);
|
||||
future<> set_server_gossip_settle(http_context& ctx);
|
||||
future<> set_server_done(http_context& ctx);
|
||||
|
||||
|
||||
}
|
||||
@@ -49,7 +49,7 @@ void set_compaction_manager(http_context& ctx, routes& r) {
|
||||
s.ks = c->ks;
|
||||
s.cf = c->cf;
|
||||
s.unit = "keys";
|
||||
s.task_type = sstables::compaction_name(c->type);
|
||||
s.task_type = "compaction";
|
||||
s.completed = c->total_keys_written;
|
||||
s.total = c->total_partitions;
|
||||
summaries.push_back(std::move(s));
|
||||
@@ -67,14 +67,11 @@ void set_compaction_manager(http_context& ctx, routes& r) {
|
||||
return make_ready_future<json::json_return_type>(json_void());
|
||||
});
|
||||
|
||||
cm::stop_compaction.set(r, [&ctx] (std::unique_ptr<request> req) {
|
||||
auto type = req->get_query_param("type");
|
||||
return ctx.db.invoke_on_all([type] (database& db) {
|
||||
auto& cm = db.get_compaction_manager();
|
||||
cm.stop_compaction(type);
|
||||
}).then([] {
|
||||
return make_ready_future<json::json_return_type>(json_void());
|
||||
});
|
||||
cm::stop_compaction.set(r, [] (std::unique_ptr<request> req) {
|
||||
//TBD
|
||||
// FIXME
|
||||
warn(unimplemented::cause::API);
|
||||
return make_ready_future<json::json_return_type>("");
|
||||
});
|
||||
|
||||
cm::get_pending_tasks.set(r, [&ctx] (std::unique_ptr<request> req) {
|
||||
|
||||
@@ -44,7 +44,6 @@ void set_failure_detector(http_context& ctx, routes& r) {
|
||||
// method that the state index are static but the name can be changed.
|
||||
version_val.application_state = static_cast<std::underlying_type<gms::application_state>::type>(a.first);
|
||||
version_val.value = a.second.value;
|
||||
version_val.version = a.second.version;
|
||||
val.application_state.push(version_val);
|
||||
}
|
||||
res.push_back(val);
|
||||
|
||||
@@ -32,9 +32,9 @@ using namespace net;
|
||||
namespace api {
|
||||
|
||||
using shard_info = messaging_service::shard_info;
|
||||
using msg_addr = messaging_service::msg_addr;
|
||||
using shard_id = messaging_service::shard_id;
|
||||
|
||||
static const int32_t num_verb = static_cast<int32_t>(messaging_verb::LAST);
|
||||
static const int32_t num_verb = static_cast<int32_t>(messaging_verb::UNUSED_3) + 1;
|
||||
|
||||
std::vector<message_counter> map_to_message_counters(
|
||||
const std::unordered_map<gms::inet_address, unsigned long>& map) {
|
||||
@@ -58,7 +58,7 @@ future_json_function get_client_getter(std::function<uint64_t(const shard_info&)
|
||||
using map_type = std::unordered_map<gms::inet_address, uint64_t>;
|
||||
auto get_shard_map = [f](messaging_service& ms) {
|
||||
std::unordered_map<gms::inet_address, unsigned long> map;
|
||||
ms.foreach_client([&map, f] (const msg_addr& id, const shard_info& info) {
|
||||
ms.foreach_client([&map, f] (const shard_id& id, const shard_info& info) {
|
||||
map[id.addr] = f(info);
|
||||
});
|
||||
return map;
|
||||
@@ -124,7 +124,7 @@ void set_messaging_service(http_context& ctx, routes& r) {
|
||||
});
|
||||
|
||||
get_dropped_messages_by_ver.set(r, [](std::unique_ptr<request> req) {
|
||||
shared_ptr<std::vector<uint64_t>> map = make_shared<std::vector<uint64_t>>(num_verb);
|
||||
shared_ptr<std::vector<uint64_t>> map = make_shared<std::vector<uint64_t>>(num_verb, 0);
|
||||
|
||||
return net::get_messaging_service().map_reduce([map](const uint64_t* local_map) mutable {
|
||||
for (auto i = 0; i < num_verb; i++) {
|
||||
@@ -137,12 +137,8 @@ void set_messaging_service(http_context& ctx, routes& r) {
|
||||
for (auto i : verb_counter::verb_wrapper::all_items()) {
|
||||
verb_counter c;
|
||||
messaging_verb v = i; // for type safety we use messaging_verb values
|
||||
auto idx = static_cast<uint32_t>(v);
|
||||
if (idx >= map->size()) {
|
||||
throw std::runtime_error(sprint("verb index out of bounds: %lu, map size: %lu", idx, map->size()));
|
||||
}
|
||||
if ((*map)[idx] > 0) {
|
||||
c.count = (*map)[idx];
|
||||
if ((*map)[static_cast<int32_t>(v)] > 0) {
|
||||
c.count = (*map)[static_cast<int32_t>(v)];
|
||||
c.verb = i;
|
||||
res.push_back(c);
|
||||
}
|
||||
|
||||
@@ -30,7 +30,6 @@
|
||||
#include "repair/repair.hh"
|
||||
#include "locator/snitch_base.hh"
|
||||
#include "column_family.hh"
|
||||
#include "log.hh"
|
||||
|
||||
namespace api {
|
||||
|
||||
@@ -170,9 +169,9 @@ void set_storage_service(http_context& ctx, routes& r) {
|
||||
|
||||
ss::get_load_map.set(r, [] (std::unique_ptr<request> req) {
|
||||
return service::get_local_storage_service().get_load_map().then([] (auto&& load_map) {
|
||||
std::vector<ss::map_string_double> res;
|
||||
std::vector<ss::double_mapper> res;
|
||||
for (auto i : load_map) {
|
||||
ss::map_string_double val;
|
||||
ss::double_mapper val;
|
||||
val.key = i.first;
|
||||
val.value = i.second;
|
||||
res.push_back(val);
|
||||
@@ -272,21 +271,15 @@ void set_storage_service(http_context& ctx, routes& r) {
|
||||
});
|
||||
|
||||
ss::force_keyspace_cleanup.set(r, [&ctx](std::unique_ptr<request> req) {
|
||||
//TBD
|
||||
// FIXME
|
||||
// the nodetool clean up is used in many tests
|
||||
// this workaround willl let it work until
|
||||
// a cleanup is implemented
|
||||
warn(unimplemented::cause::API);
|
||||
auto keyspace = validate_keyspace(ctx, req->param);
|
||||
auto column_families = split_cf(req->get_query_param("cf"));
|
||||
if (column_families.empty()) {
|
||||
column_families = map_keys(ctx.db.local().find_keyspace(keyspace).metadata().get()->cf_meta_data());
|
||||
}
|
||||
return ctx.db.invoke_on_all([keyspace, column_families] (database& db) {
|
||||
std::vector<column_family*> column_families_vec;
|
||||
auto& cm = db.get_compaction_manager();
|
||||
for (auto entry : column_families) {
|
||||
column_family* cf = &db.find_column_family(keyspace, entry);
|
||||
cm.submit_cleanup_job(cf);
|
||||
}
|
||||
}).then([]{
|
||||
return make_ready_future<json::json_return_type>(0);
|
||||
});
|
||||
auto column_family = req->get_query_param("cf");
|
||||
return make_ready_future<json::json_return_type>(0);
|
||||
});
|
||||
|
||||
ss::scrub.set(r, [&ctx](std::unique_ptr<request> req) {
|
||||
@@ -405,13 +398,9 @@ void set_storage_service(http_context& ctx, routes& r) {
|
||||
});
|
||||
|
||||
ss::get_logging_levels.set(r, [](std::unique_ptr<request> req) {
|
||||
//TBD
|
||||
unimplemented();
|
||||
std::vector<ss::mapper> res;
|
||||
for (auto i : logging::logger_registry().get_all_logger_names()) {
|
||||
ss::mapper log;
|
||||
log.key = i;
|
||||
log.value = logging::level_name(logging::logger_registry().get_logger_level(i));
|
||||
res.push_back(log);
|
||||
}
|
||||
return make_ready_future<json::json_return_type>(res);
|
||||
});
|
||||
|
||||
@@ -553,9 +542,10 @@ void set_storage_service(http_context& ctx, routes& r) {
|
||||
return make_ready_future<json::json_return_type>(0);
|
||||
});
|
||||
|
||||
ss::get_compaction_throughput_mb_per_sec.set(r, [&ctx](std::unique_ptr<request> req) {
|
||||
int value = ctx.db.local().get_config().compaction_throughput_mb_per_sec();
|
||||
return make_ready_future<json::json_return_type>(value);
|
||||
ss::get_compaction_throughput_mb_per_sec.set(r, [](std::unique_ptr<request> req) {
|
||||
//TBD
|
||||
unimplemented();
|
||||
return make_ready_future<json::json_return_type>(0);
|
||||
});
|
||||
|
||||
ss::set_compaction_throughput_mb_per_sec.set(r, [](std::unique_ptr<request> req) {
|
||||
|
||||
@@ -47,7 +47,7 @@ static hs::progress_info get_progress_info(const streaming::progress_info& info)
|
||||
res.direction = info.dir;
|
||||
res.file_name = info.file_name;
|
||||
res.peer = boost::lexical_cast<std::string>(info.peer);
|
||||
res.session_index = 0;
|
||||
res.session_index = info.session_index;
|
||||
res.total_bytes = info.total_bytes;
|
||||
return res;
|
||||
}
|
||||
@@ -70,14 +70,13 @@ static hs::stream_state get_state(
|
||||
for (auto info : result_future.get_coordinator().get()->get_all_session_info()) {
|
||||
hs::stream_info si;
|
||||
si.peer = boost::lexical_cast<std::string>(info.peer);
|
||||
si.session_index = 0;
|
||||
si.session_index = info.session_index;
|
||||
si.state = info.state;
|
||||
si.connecting = si.peer;
|
||||
si.connecting = boost::lexical_cast<std::string>(info.connecting);
|
||||
set_summaries(info.receiving_summaries, si.receiving_summaries);
|
||||
set_summaries(info.sending_summaries, si.sending_summaries);
|
||||
set_files(info.receiving_files, si.receiving_files);
|
||||
set_files(info.sending_files, si.sending_files);
|
||||
state.sessions.push(si);
|
||||
}
|
||||
return state;
|
||||
}
|
||||
@@ -109,16 +108,14 @@ void set_stream_manager(http_context& ctx, routes& r) {
|
||||
});
|
||||
|
||||
hs::get_total_incoming_bytes.set(r, [](std::unique_ptr<request> req) {
|
||||
gms::inet_address peer(req->param["peer"]);
|
||||
return streaming::get_stream_manager().map_reduce0([peer](streaming::stream_manager& sm) {
|
||||
gms::inet_address ep(req->param["peer"]);
|
||||
utils::UUID plan_id = gms::get_local_gossiper().get_host_id(ep);
|
||||
return streaming::get_stream_manager().map_reduce0([plan_id](streaming::stream_manager& stream) {
|
||||
int64_t res = 0;
|
||||
for (auto sr : sm.get_all_streams()) {
|
||||
if (sr) {
|
||||
for (auto session : sr->get_coordinator()->get_all_stream_sessions()) {
|
||||
if (session->peer == peer) {
|
||||
res += session->get_bytes_received();
|
||||
}
|
||||
}
|
||||
streaming::stream_result_future* s = stream.get_receiving_stream(plan_id).get();
|
||||
if (s != nullptr) {
|
||||
for (auto si: s->get_coordinator()->get_all_session_info()) {
|
||||
res += si.get_total_size_received();
|
||||
}
|
||||
}
|
||||
return res;
|
||||
@@ -128,12 +125,12 @@ void set_stream_manager(http_context& ctx, routes& r) {
|
||||
});
|
||||
|
||||
hs::get_all_total_incoming_bytes.set(r, [](std::unique_ptr<request> req) {
|
||||
return streaming::get_stream_manager().map_reduce0([](streaming::stream_manager& sm) {
|
||||
return streaming::get_stream_manager().map_reduce0([](streaming::stream_manager& stream) {
|
||||
int64_t res = 0;
|
||||
for (auto sr : sm.get_all_streams()) {
|
||||
if (sr) {
|
||||
for (auto session : sr->get_coordinator()->get_all_stream_sessions()) {
|
||||
res += session->get_bytes_received();
|
||||
for (auto s : stream.get_receiving_streams()) {
|
||||
if (s.second.get() != nullptr) {
|
||||
for (auto si: s.second.get()->get_coordinator()->get_all_session_info()) {
|
||||
res += si.get_total_size_received();
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -144,16 +141,14 @@ void set_stream_manager(http_context& ctx, routes& r) {
|
||||
});
|
||||
|
||||
hs::get_total_outgoing_bytes.set(r, [](std::unique_ptr<request> req) {
|
||||
gms::inet_address peer(req->param["peer"]);
|
||||
return streaming::get_stream_manager().map_reduce0([peer](streaming::stream_manager& sm) {
|
||||
gms::inet_address ep(req->param["peer"]);
|
||||
utils::UUID plan_id = gms::get_local_gossiper().get_host_id(ep);
|
||||
return streaming::get_stream_manager().map_reduce0([plan_id](streaming::stream_manager& stream) {
|
||||
int64_t res = 0;
|
||||
for (auto sr : sm.get_all_streams()) {
|
||||
if (sr) {
|
||||
for (auto session : sr->get_coordinator()->get_all_stream_sessions()) {
|
||||
if (session->peer == peer) {
|
||||
res += session->get_bytes_sent();
|
||||
}
|
||||
}
|
||||
streaming::stream_result_future* s = stream.get_sending_stream(plan_id).get();
|
||||
if (s != nullptr) {
|
||||
for (auto si: s->get_coordinator()->get_all_session_info()) {
|
||||
res += si.get_total_size_received();
|
||||
}
|
||||
}
|
||||
return res;
|
||||
@@ -163,12 +158,12 @@ void set_stream_manager(http_context& ctx, routes& r) {
|
||||
});
|
||||
|
||||
hs::get_all_total_outgoing_bytes.set(r, [](std::unique_ptr<request> req) {
|
||||
return streaming::get_stream_manager().map_reduce0([](streaming::stream_manager& sm) {
|
||||
return streaming::get_stream_manager().map_reduce0([](streaming::stream_manager& stream) {
|
||||
int64_t res = 0;
|
||||
for (auto sr : sm.get_all_streams()) {
|
||||
if (sr) {
|
||||
for (auto session : sr->get_coordinator()->get_all_stream_sessions()) {
|
||||
res += session->get_bytes_sent();
|
||||
for (auto s : stream.get_initiated_streams()) {
|
||||
if (s.second.get() != nullptr) {
|
||||
for (auto si: s.second.get()->get_coordinator()->get_all_session_info()) {
|
||||
res += si.get_total_size_received();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -272,6 +272,45 @@ template<typename T>
|
||||
class serializer;
|
||||
}
|
||||
|
||||
// A variant type that can hold either an atomic_cell, or a serialized collection.
|
||||
// Which type is stored is determined by the schema.
|
||||
class atomic_cell_or_collection final {
|
||||
managed_bytes _data;
|
||||
|
||||
template<typename T>
|
||||
friend class db::serializer;
|
||||
private:
|
||||
atomic_cell_or_collection(managed_bytes&& data) : _data(std::move(data)) {}
|
||||
public:
|
||||
atomic_cell_or_collection() = default;
|
||||
atomic_cell_or_collection(atomic_cell ac) : _data(std::move(ac._data)) {}
|
||||
static atomic_cell_or_collection from_atomic_cell(atomic_cell data) { return { std::move(data._data) }; }
|
||||
atomic_cell_view as_atomic_cell() const { return atomic_cell_view::from_bytes(_data); }
|
||||
atomic_cell_or_collection(collection_mutation cm) : _data(std::move(cm.data)) {}
|
||||
explicit operator bool() const {
|
||||
return !_data.empty();
|
||||
}
|
||||
static atomic_cell_or_collection from_collection_mutation(collection_mutation data) {
|
||||
return std::move(data.data);
|
||||
}
|
||||
collection_mutation_view as_collection_mutation() const {
|
||||
return collection_mutation_view{_data};
|
||||
}
|
||||
bytes_view serialize() const {
|
||||
return _data;
|
||||
}
|
||||
bool operator==(const atomic_cell_or_collection& other) const {
|
||||
return _data == other._data;
|
||||
}
|
||||
void linearize() {
|
||||
_data.linearize();
|
||||
}
|
||||
void unlinearize() {
|
||||
_data.scatter();
|
||||
}
|
||||
friend std::ostream& operator<<(std::ostream&, const atomic_cell_or_collection&);
|
||||
};
|
||||
|
||||
class column_definition;
|
||||
|
||||
int compare_atomic_cell_for_merge(atomic_cell_view left, atomic_cell_view right);
|
||||
|
||||
@@ -1,57 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2015 Cloudius Systems, Ltd.
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file is part of Scylla.
|
||||
*
|
||||
* Scylla is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Scylla is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
// Not part of atomic_cell.hh to avoid cyclic dependency between types.hh and atomic_cell.hh
|
||||
|
||||
#include "types.hh"
|
||||
#include "atomic_cell.hh"
|
||||
#include "hashing.hh"
|
||||
|
||||
template<typename Hasher>
|
||||
void feed_hash(collection_mutation_view cell, Hasher& h, const data_type& type) {
|
||||
auto&& ctype = static_pointer_cast<const collection_type_impl>(type);
|
||||
auto m_view = ctype->deserialize_mutation_form(cell);
|
||||
::feed_hash(h, m_view.tomb);
|
||||
for (auto&& key_and_value : m_view.cells) {
|
||||
::feed_hash(h, key_and_value.first);
|
||||
::feed_hash(h, key_and_value.second);
|
||||
}
|
||||
}
|
||||
|
||||
template<>
|
||||
struct appending_hash<atomic_cell_view> {
|
||||
template<typename Hasher>
|
||||
void operator()(Hasher& h, atomic_cell_view cell) const {
|
||||
feed_hash(h, cell.is_live());
|
||||
feed_hash(h, cell.timestamp());
|
||||
if (cell.is_live()) {
|
||||
if (cell.is_live_and_has_ttl()) {
|
||||
feed_hash(h, cell.expiry());
|
||||
feed_hash(h, cell.ttl());
|
||||
}
|
||||
feed_hash(h, cell.value());
|
||||
} else {
|
||||
feed_hash(h, cell.deletion_time());
|
||||
}
|
||||
}
|
||||
};
|
||||
@@ -1,73 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2015 Cloudius Systems, Ltd.
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file is part of Scylla.
|
||||
*
|
||||
* Scylla is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Scylla is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "atomic_cell.hh"
|
||||
#include "schema.hh"
|
||||
#include "hashing.hh"
|
||||
|
||||
// A variant type that can hold either an atomic_cell, or a serialized collection.
|
||||
// Which type is stored is determined by the schema.
|
||||
class atomic_cell_or_collection final {
|
||||
managed_bytes _data;
|
||||
|
||||
template<typename T>
|
||||
friend class db::serializer;
|
||||
private:
|
||||
atomic_cell_or_collection(managed_bytes&& data) : _data(std::move(data)) {}
|
||||
public:
|
||||
atomic_cell_or_collection() = default;
|
||||
atomic_cell_or_collection(atomic_cell ac) : _data(std::move(ac._data)) {}
|
||||
static atomic_cell_or_collection from_atomic_cell(atomic_cell data) { return { std::move(data._data) }; }
|
||||
atomic_cell_view as_atomic_cell() const { return atomic_cell_view::from_bytes(_data); }
|
||||
atomic_cell_or_collection(collection_mutation cm) : _data(std::move(cm.data)) {}
|
||||
explicit operator bool() const {
|
||||
return !_data.empty();
|
||||
}
|
||||
static atomic_cell_or_collection from_collection_mutation(collection_mutation data) {
|
||||
return std::move(data.data);
|
||||
}
|
||||
collection_mutation_view as_collection_mutation() const {
|
||||
return collection_mutation_view{_data};
|
||||
}
|
||||
bytes_view serialize() const {
|
||||
return _data;
|
||||
}
|
||||
bool operator==(const atomic_cell_or_collection& other) const {
|
||||
return _data == other._data;
|
||||
}
|
||||
template<typename Hasher>
|
||||
void feed_hash(Hasher& h, const column_definition& def) const {
|
||||
if (def.is_atomic()) {
|
||||
::feed_hash(h, as_atomic_cell());
|
||||
} else {
|
||||
::feed_hash(as_collection_mutation(), h, def.type);
|
||||
}
|
||||
}
|
||||
void linearize() {
|
||||
_data.linearize();
|
||||
}
|
||||
void unlinearize() {
|
||||
_data.scatter();
|
||||
}
|
||||
friend std::ostream& operator<<(std::ostream&, const atomic_cell_or_collection&);
|
||||
};
|
||||
292
auth/auth.cc
292
auth/auth.cc
@@ -1,292 +0,0 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright 2016 Cloudius Systems
|
||||
*
|
||||
* Modified by Cloudius Systems
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file is part of Scylla.
|
||||
*
|
||||
* Scylla is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Scylla is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
#include <seastar/core/sleep.hh>
|
||||
|
||||
#include "auth.hh"
|
||||
#include "authenticator.hh"
|
||||
#include "database.hh"
|
||||
#include "cql3/query_processor.hh"
|
||||
#include "cql3/statements/cf_statement.hh"
|
||||
#include "cql3/statements/create_table_statement.hh"
|
||||
#include "db/config.hh"
|
||||
#include "service/migration_manager.hh"
|
||||
|
||||
const sstring auth::auth::DEFAULT_SUPERUSER_NAME("cassandra");
|
||||
const sstring auth::auth::AUTH_KS("system_auth");
|
||||
const sstring auth::auth::USERS_CF("users");
|
||||
|
||||
static const sstring USER_NAME("name");
|
||||
static const sstring SUPER("super");
|
||||
|
||||
static logging::logger logger("auth");
|
||||
|
||||
// TODO: configurable
|
||||
using namespace std::chrono_literals;
|
||||
const std::chrono::milliseconds auth::auth::SUPERUSER_SETUP_DELAY = 10000ms;
|
||||
|
||||
class auth_migration_listener : public service::migration_listener {
|
||||
void on_create_keyspace(const sstring& ks_name) override {}
|
||||
void on_create_column_family(const sstring& ks_name, const sstring& cf_name) override {}
|
||||
void on_create_user_type(const sstring& ks_name, const sstring& type_name) override {}
|
||||
void on_create_function(const sstring& ks_name, const sstring& function_name) override {}
|
||||
void on_create_aggregate(const sstring& ks_name, const sstring& aggregate_name) override {}
|
||||
|
||||
void on_update_keyspace(const sstring& ks_name) override {}
|
||||
void on_update_column_family(const sstring& ks_name, const sstring& cf_name, bool) override {}
|
||||
void on_update_user_type(const sstring& ks_name, const sstring& type_name) override {}
|
||||
void on_update_function(const sstring& ks_name, const sstring& function_name) override {}
|
||||
void on_update_aggregate(const sstring& ks_name, const sstring& aggregate_name) override {}
|
||||
|
||||
void on_drop_keyspace(const sstring& ks_name) override {
|
||||
// TODO:
|
||||
//DatabaseDescriptor.getAuthorizer().revokeAll(DataResource.keyspace(ksName));
|
||||
|
||||
}
|
||||
void on_drop_column_family(const sstring& ks_name, const sstring& cf_name) override {
|
||||
// TODO:
|
||||
//DatabaseDescriptor.getAuthorizer().revokeAll(DataResource.columnFamily(ksName, cfName));
|
||||
}
|
||||
void on_drop_user_type(const sstring& ks_name, const sstring& type_name) override {}
|
||||
void on_drop_function(const sstring& ks_name, const sstring& function_name) override {}
|
||||
void on_drop_aggregate(const sstring& ks_name, const sstring& aggregate_name) override {}
|
||||
};
|
||||
|
||||
static auth_migration_listener auth_migration;
|
||||
|
||||
/**
|
||||
* Poor mans job schedule. For maximum 2 jobs. Sic.
|
||||
* Still does nothing more clever than waiting 10 seconds
|
||||
* like origin, then runs the submitted tasks.
|
||||
*
|
||||
* Only difference compared to sleep (from which this
|
||||
* borrows _heavily_) is that if tasks have not run by the time
|
||||
* we exit (and do static clean up) we delete the promise + cont
|
||||
*
|
||||
* Should be abstracted to some sort of global server function
|
||||
* probably.
|
||||
*/
|
||||
void auth::auth::schedule_when_up(scheduled_func f) {
|
||||
struct waiter {
|
||||
promise<> done;
|
||||
timer<> tmr;
|
||||
waiter() : tmr([this] {done.set_value();})
|
||||
{
|
||||
tmr.arm(SUPERUSER_SETUP_DELAY);
|
||||
}
|
||||
~waiter() {
|
||||
if (tmr.armed()) {
|
||||
tmr.cancel();
|
||||
done.set_exception(std::runtime_error("shutting down"));
|
||||
}
|
||||
logger.trace("Deleting scheduled task");
|
||||
}
|
||||
void kill() {
|
||||
}
|
||||
};
|
||||
|
||||
typedef std::unique_ptr<waiter> waiter_ptr;
|
||||
|
||||
static thread_local std::vector<waiter_ptr> waiters;
|
||||
|
||||
logger.trace("Adding scheduled task");
|
||||
|
||||
waiters.emplace_back(std::make_unique<waiter>());
|
||||
auto* w = waiters.back().get();
|
||||
|
||||
w->done.get_future().finally([w] {
|
||||
auto i = std::find_if(waiters.begin(), waiters.end(), [w](const waiter_ptr& p) {
|
||||
return p.get() == w;
|
||||
});
|
||||
if (i != waiters.end()) {
|
||||
waiters.erase(i);
|
||||
}
|
||||
}).then([f = std::move(f)] {
|
||||
logger.trace("Running scheduled task");
|
||||
return f();
|
||||
}).handle_exception([](auto ep) {
|
||||
return make_ready_future();
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
bool auth::auth::is_class_type(const sstring& type, const sstring& classname) {
|
||||
if (type == classname) {
|
||||
return true;
|
||||
}
|
||||
auto i = classname.find_last_of('.');
|
||||
return classname.compare(i + 1, sstring::npos, type) == 0;
|
||||
}
|
||||
|
||||
future<> auth::auth::setup() {
|
||||
auto& db = cql3::get_local_query_processor().db().local();
|
||||
auto& cfg = db.get_config();
|
||||
auto type = cfg.authenticator();
|
||||
|
||||
if (is_class_type(type, authenticator::ALLOW_ALL_AUTHENTICATOR_NAME)) {
|
||||
return authenticator::setup(type).discard_result(); // just create the object
|
||||
}
|
||||
|
||||
future<> f = make_ready_future();
|
||||
|
||||
if (!db.has_keyspace(AUTH_KS)) {
|
||||
std::map<sstring, sstring> opts;
|
||||
opts["replication_factor"] = "1";
|
||||
auto ksm = keyspace_metadata::new_keyspace(AUTH_KS, "org.apache.cassandra.locator.SimpleStrategy", opts, true);
|
||||
f = service::get_local_migration_manager().announce_new_keyspace(ksm, false);
|
||||
}
|
||||
|
||||
return f.then([] {
|
||||
return setup_table(USERS_CF, sprint("CREATE TABLE %s.%s (%s text, %s boolean, PRIMARY KEY(%s)) WITH gc_grace_seconds=%d",
|
||||
AUTH_KS, USERS_CF, USER_NAME, SUPER, USER_NAME,
|
||||
90 * 24 * 60 * 60)); // 3 months.
|
||||
}).then([type] {
|
||||
return authenticator::setup(type).discard_result();
|
||||
}).then([] {
|
||||
// TODO authorizer
|
||||
}).then([] {
|
||||
service::get_local_migration_manager().register_listener(&auth_migration); // again, only one shard...
|
||||
// instead of once-timer, just schedule this later
|
||||
schedule_when_up([] {
|
||||
// setup default super user
|
||||
return has_existing_users(USERS_CF, DEFAULT_SUPERUSER_NAME, USER_NAME).then([](bool exists) {
|
||||
if (!exists) {
|
||||
auto query = sprint("INSERT INTO %s.%s (%s, %s) VALUES (?, ?) USING TIMESTAMP 0",
|
||||
AUTH_KS, USERS_CF, USER_NAME, SUPER);
|
||||
cql3::get_local_query_processor().process(query, db::consistency_level::ONE, {DEFAULT_SUPERUSER_NAME, true}).then([](auto) {
|
||||
logger.info("Created default superuser '{}'", DEFAULT_SUPERUSER_NAME);
|
||||
}).handle_exception([](auto ep) {
|
||||
try {
|
||||
std::rethrow_exception(ep);
|
||||
} catch (exceptions::request_execution_exception&) {
|
||||
logger.warn("Skipped default superuser setup: some nodes were not ready");
|
||||
}
|
||||
});
|
||||
}
|
||||
});
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
static db::consistency_level consistency_for_user(const sstring& username) {
|
||||
if (username == auth::auth::DEFAULT_SUPERUSER_NAME) {
|
||||
return db::consistency_level::QUORUM;
|
||||
}
|
||||
return db::consistency_level::LOCAL_ONE;
|
||||
}
|
||||
|
||||
static future<::shared_ptr<cql3::untyped_result_set>> select_user(const sstring& username) {
|
||||
// Here was a thread local, explicit cache of prepared statement. In normal execution this is
|
||||
// fine, but since we in testing set up and tear down system over and over, we'd start using
|
||||
// obsolete prepared statements pretty quickly.
|
||||
// Rely on query processing caching statements instead, and lets assume
|
||||
// that a map lookup string->statement is not gonna kill us much.
|
||||
return cql3::get_local_query_processor().process(
|
||||
sprint("SELECT * FROM %s.%s WHERE %s = ?",
|
||||
auth::auth::AUTH_KS, auth::auth::USERS_CF,
|
||||
USER_NAME), consistency_for_user(username),
|
||||
{ username }, true);
|
||||
}
|
||||
|
||||
future<bool> auth::auth::is_existing_user(const sstring& username) {
|
||||
return select_user(username).then(
|
||||
[](::shared_ptr<cql3::untyped_result_set> res) {
|
||||
return make_ready_future<bool>(!res->empty());
|
||||
});
|
||||
}
|
||||
|
||||
future<bool> auth::auth::is_super_user(const sstring& username) {
|
||||
return select_user(username).then(
|
||||
[](::shared_ptr<cql3::untyped_result_set> res) {
|
||||
return make_ready_future<bool>(!res->empty() && res->one().get_as<bool>(SUPER));
|
||||
});
|
||||
}
|
||||
|
||||
future<> auth::auth::insert_user(const sstring& username, bool is_super)
|
||||
throw (exceptions::request_execution_exception) {
|
||||
return cql3::get_local_query_processor().process(sprint("INSERT INTO %s.%s (%s, %s) VALUES (?, ?)",
|
||||
AUTH_KS, USERS_CF, USER_NAME, SUPER),
|
||||
consistency_for_user(username), { username, is_super }).discard_result();
|
||||
}
|
||||
|
||||
future<> auth::auth::delete_user(const sstring& username) throw(exceptions::request_execution_exception) {
|
||||
return cql3::get_local_query_processor().process(sprint("DELETE FROM %s.%s WHERE %s = ?",
|
||||
AUTH_KS, USERS_CF, USER_NAME),
|
||||
consistency_for_user(username), { username }).discard_result();
|
||||
}
|
||||
|
||||
future<> auth::auth::setup_table(const sstring& name, const sstring& cql) {
|
||||
auto& qp = cql3::get_local_query_processor();
|
||||
auto& db = qp.db().local();
|
||||
|
||||
if (db.has_schema(AUTH_KS, name)) {
|
||||
return make_ready_future();
|
||||
}
|
||||
|
||||
::shared_ptr<cql3::statements::cf_statement> parsed = static_pointer_cast<
|
||||
cql3::statements::cf_statement>(cql3::query_processor::parse_statement(cql));
|
||||
parsed->prepare_keyspace(AUTH_KS);
|
||||
::shared_ptr<cql3::statements::create_table_statement> statement =
|
||||
static_pointer_cast<cql3::statements::create_table_statement>(
|
||||
parsed->prepare(db)->statement);
|
||||
// Origin sets "Legacy Cf Id" for the new table. We have no need to be
|
||||
// pre-2.1 compatible (afaik), so lets skip a whole lotta hoolaballo
|
||||
return statement->announce_migration(qp.proxy(), false).then([statement](bool) {});
|
||||
}
|
||||
|
||||
future<bool> auth::auth::has_existing_users(const sstring& cfname, const sstring& def_user_name, const sstring& name_column) {
|
||||
auto default_user_query = sprint("SELECT * FROM %s.%s WHERE %s = ?", AUTH_KS, cfname, name_column);
|
||||
auto all_users_query = sprint("SELECT * FROM %s.%s LIMIT 1", AUTH_KS, cfname);
|
||||
|
||||
return cql3::get_local_query_processor().process(default_user_query, db::consistency_level::ONE, { def_user_name }).then([=](::shared_ptr<cql3::untyped_result_set> res) {
|
||||
if (!res->empty()) {
|
||||
return make_ready_future<bool>(true);
|
||||
}
|
||||
return cql3::get_local_query_processor().process(default_user_query, db::consistency_level::QUORUM, { def_user_name }).then([all_users_query](::shared_ptr<cql3::untyped_result_set> res) {
|
||||
if (!res->empty()) {
|
||||
return make_ready_future<bool>(true);
|
||||
}
|
||||
return cql3::get_local_query_processor().process(all_users_query, db::consistency_level::QUORUM).then([](::shared_ptr<cql3::untyped_result_set> res) {
|
||||
return make_ready_future<bool>(!res->empty());
|
||||
});
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
120
auth/auth.hh
120
auth/auth.hh
@@ -1,120 +0,0 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright 2016 Cloudius Systems
|
||||
*
|
||||
* Modified by Cloudius Systems
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file is part of Scylla.
|
||||
*
|
||||
* Scylla is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Scylla is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <chrono>
|
||||
#include <seastar/core/sstring.hh>
|
||||
#include <seastar/core/future.hh>
|
||||
|
||||
#include "exceptions/exceptions.hh"
|
||||
|
||||
namespace auth {
|
||||
|
||||
class auth {
|
||||
public:
|
||||
static const sstring DEFAULT_SUPERUSER_NAME;
|
||||
static const sstring AUTH_KS;
|
||||
static const sstring USERS_CF;
|
||||
static const std::chrono::milliseconds SUPERUSER_SETUP_DELAY;
|
||||
|
||||
static bool is_class_type(const sstring& type, const sstring& classname);
|
||||
|
||||
#if 0
|
||||
public static Set<Permission> getPermissions(AuthenticatedUser user, IResource resource)
|
||||
{
|
||||
return permissionsCache.getPermissions(user, resource);
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Checks if the username is stored in AUTH_KS.USERS_CF.
|
||||
*
|
||||
* @param username Username to query.
|
||||
* @return whether or not Cassandra knows about the user.
|
||||
*/
|
||||
static future<bool> is_existing_user(const sstring& username);
|
||||
|
||||
/**
|
||||
* Checks if the user is a known superuser.
|
||||
*
|
||||
* @param username Username to query.
|
||||
* @return true is the user is a superuser, false if they aren't or don't exist at all.
|
||||
*/
|
||||
static future<bool> is_super_user(const sstring& username);
|
||||
|
||||
/**
|
||||
* Inserts the user into AUTH_KS.USERS_CF (or overwrites their superuser status as a result of an ALTER USER query).
|
||||
*
|
||||
* @param username Username to insert.
|
||||
* @param isSuper User's new status.
|
||||
* @throws RequestExecutionException
|
||||
*/
|
||||
static future<> insert_user(const sstring& username, bool is_super) throw(exceptions::request_execution_exception);
|
||||
|
||||
/**
|
||||
* Deletes the user from AUTH_KS.USERS_CF.
|
||||
*
|
||||
* @param username Username to delete.
|
||||
* @throws RequestExecutionException
|
||||
*/
|
||||
static future<> delete_user(const sstring& username) throw(exceptions::request_execution_exception);
|
||||
|
||||
/**
|
||||
* Sets up Authenticator and Authorizer.
|
||||
*/
|
||||
static future<> setup();
|
||||
|
||||
/**
|
||||
* Set up table from given CREATE TABLE statement under system_auth keyspace, if not already done so.
|
||||
*
|
||||
* @param name name of the table
|
||||
* @param cql CREATE TABLE statement
|
||||
*/
|
||||
static future<> setup_table(const sstring& name, const sstring& cql);
|
||||
|
||||
static future<bool> has_existing_users(const sstring& cfname, const sstring& def_user_name, const sstring& name_column_name);
|
||||
|
||||
// For internal use. Run function "when system is up".
|
||||
typedef std::function<future<>()> scheduled_func;
|
||||
static void schedule_when_up(scheduled_func);
|
||||
};
|
||||
}
|
||||
@@ -1,110 +0,0 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright 2016 Cloudius Systems
|
||||
*
|
||||
* Modified by Cloudius Systems
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file is part of Scylla.
|
||||
*
|
||||
* Scylla is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Scylla is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "authenticator.hh"
|
||||
#include "authenticated_user.hh"
|
||||
#include "password_authenticator.hh"
|
||||
#include "auth.hh"
|
||||
#include "db/config.hh"
|
||||
|
||||
const sstring auth::authenticator::USERNAME_KEY("username");
|
||||
const sstring auth::authenticator::PASSWORD_KEY("password");
|
||||
const sstring auth::authenticator::ALLOW_ALL_AUTHENTICATOR_NAME("org.apache.cassandra.auth.AllowAllAuthenticator");
|
||||
|
||||
/**
|
||||
* Authenticator is assumed to be a fully state-less immutable object (note all the const).
|
||||
* We thus store a single instance globally, since it should be safe/ok.
|
||||
*/
|
||||
static std::unique_ptr<auth::authenticator> global_authenticator;
|
||||
|
||||
future<>
|
||||
auth::authenticator::setup(const sstring& type) throw (exceptions::configuration_exception) {
|
||||
if (auth::auth::is_class_type(type, ALLOW_ALL_AUTHENTICATOR_NAME)) {
|
||||
class allow_all_authenticator : public authenticator {
|
||||
public:
|
||||
const sstring& class_name() const override {
|
||||
return ALLOW_ALL_AUTHENTICATOR_NAME;
|
||||
}
|
||||
bool require_authentication() const override {
|
||||
return false;
|
||||
}
|
||||
option_set supported_options() const override {
|
||||
return option_set();
|
||||
}
|
||||
option_set alterable_options() const override {
|
||||
return option_set();
|
||||
}
|
||||
future<::shared_ptr<authenticated_user>> authenticate(const credentials_map& credentials) const throw(exceptions::authentication_exception) override {
|
||||
return make_ready_future<::shared_ptr<authenticated_user>>(::make_shared<authenticated_user>());
|
||||
}
|
||||
future<> create(sstring username, const option_map& options) throw(exceptions::request_validation_exception, exceptions::request_execution_exception) override {
|
||||
return make_ready_future();
|
||||
}
|
||||
future<> alter(sstring username, const option_map& options) throw(exceptions::request_validation_exception, exceptions::request_execution_exception) override {
|
||||
return make_ready_future();
|
||||
}
|
||||
future<> drop(sstring username) throw(exceptions::request_validation_exception, exceptions::request_execution_exception) override {
|
||||
return make_ready_future();
|
||||
}
|
||||
resource_ids protected_resources() const override {
|
||||
return resource_ids();
|
||||
}
|
||||
::shared_ptr<sasl_challenge> new_sasl_challenge() const override {
|
||||
throw std::runtime_error("Should not reach");
|
||||
}
|
||||
};
|
||||
global_authenticator = std::make_unique<allow_all_authenticator>();
|
||||
} else if (auth::auth::is_class_type(type, password_authenticator::PASSWORD_AUTHENTICATOR_NAME)) {
|
||||
auto pwa = std::make_unique<password_authenticator>();
|
||||
auto f = pwa->init();
|
||||
return f.then([pwa = std::move(pwa)]() mutable {
|
||||
global_authenticator = std::move(pwa);
|
||||
});
|
||||
} else {
|
||||
throw exceptions::configuration_exception("Invalid authenticator type: " + type);
|
||||
}
|
||||
return make_ready_future();
|
||||
}
|
||||
|
||||
auth::authenticator& auth::authenticator::get() {
|
||||
assert(global_authenticator);
|
||||
return *global_authenticator;
|
||||
}
|
||||
@@ -1,198 +0,0 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright 2016 Cloudius Systems
|
||||
*
|
||||
* Modified by Cloudius Systems
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file is part of Scylla.
|
||||
*
|
||||
* Scylla is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Scylla is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <unordered_map>
|
||||
#include <set>
|
||||
#include <stdexcept>
|
||||
#include <boost/any.hpp>
|
||||
|
||||
#include <seastar/core/sstring.hh>
|
||||
#include <seastar/core/future.hh>
|
||||
#include <seastar/core/shared_ptr.hh>
|
||||
#include <seastar/core/enum.hh>
|
||||
|
||||
#include "bytes.hh"
|
||||
#include "data_resource.hh"
|
||||
#include "enum_set.hh"
|
||||
#include "exceptions/exceptions.hh"
|
||||
|
||||
namespace db {
|
||||
class config;
|
||||
}
|
||||
|
||||
namespace auth {
|
||||
|
||||
class authenticated_user;
|
||||
|
||||
class authenticator {
|
||||
public:
|
||||
static const sstring USERNAME_KEY;
|
||||
static const sstring PASSWORD_KEY;
|
||||
static const sstring ALLOW_ALL_AUTHENTICATOR_NAME;
|
||||
|
||||
/**
|
||||
* Supported CREATE USER/ALTER USER options.
|
||||
* Currently only PASSWORD is available.
|
||||
*/
|
||||
enum class option {
|
||||
PASSWORD
|
||||
};
|
||||
|
||||
using option_set = enum_set<super_enum<option, option::PASSWORD>>;
|
||||
using option_map = std::unordered_map<option, boost::any, enum_hash<option>>;
|
||||
using credentials_map = std::unordered_map<sstring, sstring>;
|
||||
|
||||
/**
|
||||
* Resource id mappings, i.e. keyspace and/or column families.
|
||||
*/
|
||||
using resource_ids = std::set<data_resource>;
|
||||
|
||||
/**
|
||||
* Setup is called once upon system startup to initialize the IAuthenticator.
|
||||
*
|
||||
* For example, use this method to create any required keyspaces/column families.
|
||||
* Note: Only call from main thread.
|
||||
*/
|
||||
static future<> setup(const sstring& type) throw(exceptions::configuration_exception);
|
||||
|
||||
/**
|
||||
* Returns the system authenticator. Must have called setup before calling this.
|
||||
*/
|
||||
static authenticator& get();
|
||||
|
||||
virtual ~authenticator()
|
||||
{}
|
||||
|
||||
virtual const sstring& class_name() const = 0;
|
||||
|
||||
/**
|
||||
* Whether or not the authenticator requires explicit login.
|
||||
* If false will instantiate user with AuthenticatedUser.ANONYMOUS_USER.
|
||||
*/
|
||||
virtual bool require_authentication() const = 0;
|
||||
|
||||
/**
|
||||
* Set of options supported by CREATE USER and ALTER USER queries.
|
||||
* Should never return null - always return an empty set instead.
|
||||
*/
|
||||
virtual option_set supported_options() const = 0;
|
||||
|
||||
/**
|
||||
* Subset of supportedOptions that users are allowed to alter when performing ALTER USER [themselves].
|
||||
* Should never return null - always return an empty set instead.
|
||||
*/
|
||||
virtual option_set alterable_options() const = 0;
|
||||
|
||||
/**
|
||||
* Authenticates a user given a Map<String, String> of credentials.
|
||||
* Should never return null - always throw AuthenticationException instead.
|
||||
* Returning AuthenticatedUser.ANONYMOUS_USER is an option as well if authentication is not required.
|
||||
*
|
||||
* @throws authentication_exception if credentials don't match any known user.
|
||||
*/
|
||||
virtual future<::shared_ptr<authenticated_user>> authenticate(const credentials_map& credentials) const throw(exceptions::authentication_exception) = 0;
|
||||
|
||||
/**
|
||||
* Called during execution of CREATE USER query (also may be called on startup, see seedSuperuserOptions method).
|
||||
* If authenticator is static then the body of the method should be left blank, but don't throw an exception.
|
||||
* options are guaranteed to be a subset of supportedOptions().
|
||||
*
|
||||
* @param username Username of the user to create.
|
||||
* @param options Options the user will be created with.
|
||||
* @throws exceptions::request_validation_exception
|
||||
* @throws exceptions::request_execution_exception
|
||||
*/
|
||||
virtual future<> create(sstring username, const option_map& options) throw(exceptions::request_validation_exception, exceptions::request_execution_exception) = 0;
|
||||
|
||||
/**
|
||||
* Called during execution of ALTER USER query.
|
||||
* options are always guaranteed to be a subset of supportedOptions(). Furthermore, if the user performing the query
|
||||
* is not a superuser and is altering himself, then options are guaranteed to be a subset of alterableOptions().
|
||||
* Keep the body of the method blank if your implementation doesn't support any options.
|
||||
*
|
||||
* @param username Username of the user that will be altered.
|
||||
* @param options Options to alter.
|
||||
* @throws exceptions::request_validation_exception
|
||||
* @throws exceptions::request_execution_exception
|
||||
*/
|
||||
virtual future<> alter(sstring username, const option_map& options) throw(exceptions::request_validation_exception, exceptions::request_execution_exception) = 0;
|
||||
|
||||
|
||||
/**
|
||||
* Called during execution of DROP USER query.
|
||||
*
|
||||
* @param username Username of the user that will be dropped.
|
||||
* @throws exceptions::request_validation_exception
|
||||
* @throws exceptions::request_execution_exception
|
||||
*/
|
||||
virtual future<> drop(sstring username) throw(exceptions::request_validation_exception, exceptions::request_execution_exception) = 0;
|
||||
|
||||
/**
|
||||
* Set of resources that should be made inaccessible to users and only accessible internally.
|
||||
*
|
||||
* @return Keyspaces, column families that will be unmodifiable by users; other resources.
|
||||
* @see resource_ids
|
||||
*/
|
||||
virtual resource_ids protected_resources() const = 0;
|
||||
|
||||
class sasl_challenge {
|
||||
public:
|
||||
virtual ~sasl_challenge() {}
|
||||
virtual bytes evaluate_response(bytes_view client_response) throw(exceptions::authentication_exception) = 0;
|
||||
virtual bool is_complete() const = 0;
|
||||
virtual future<::shared_ptr<authenticated_user>> get_authenticated_user() const throw(exceptions::authentication_exception) = 0;
|
||||
};
|
||||
|
||||
/**
|
||||
* Provide a sasl_challenge to be used by the CQL binary protocol server. If
|
||||
* the configured authenticator requires authentication but does not implement this
|
||||
* interface we refuse to start the binary protocol server as it will have no way
|
||||
* of authenticating clients.
|
||||
* @return sasl_challenge implementation
|
||||
*/
|
||||
virtual ::shared_ptr<sasl_challenge> new_sasl_challenge() const = 0;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
@@ -1,175 +0,0 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright 2016 Cloudius Systems
|
||||
*
|
||||
* Modified by Cloudius Systems
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file is part of Scylla.
|
||||
*
|
||||
* Scylla is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Scylla is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "data_resource.hh"
|
||||
|
||||
#include <regex>
|
||||
#include "service/storage_proxy.hh"
|
||||
|
||||
const sstring auth::data_resource::ROOT_NAME("data");
|
||||
|
||||
auth::data_resource::data_resource(level l, const sstring& ks, const sstring& cf)
|
||||
: _ks(ks), _cf(cf)
|
||||
{
|
||||
if (l != get_level()) {
|
||||
throw std::invalid_argument("level/keyspace/column mismatch");
|
||||
}
|
||||
}
|
||||
|
||||
auth::data_resource::data_resource()
|
||||
: data_resource(level::ROOT)
|
||||
{}
|
||||
|
||||
auth::data_resource::data_resource(const sstring& ks)
|
||||
: data_resource(level::KEYSPACE, ks)
|
||||
{}
|
||||
|
||||
auth::data_resource::data_resource(const sstring& ks, const sstring& cf)
|
||||
: data_resource(level::COLUMN_FAMILY, ks, cf)
|
||||
{}
|
||||
|
||||
auth::data_resource::level auth::data_resource::get_level() const {
|
||||
if (!_cf.empty()) {
|
||||
assert(!_ks.empty());
|
||||
return level::COLUMN_FAMILY;
|
||||
}
|
||||
if (!_ks.empty()) {
|
||||
return level::KEYSPACE;
|
||||
}
|
||||
return level::ROOT;
|
||||
}
|
||||
|
||||
auth::data_resource auth::data_resource::from_name(
|
||||
const sstring& s) {
|
||||
|
||||
static std::regex slash_regex("/");
|
||||
|
||||
auto i = std::regex_token_iterator<sstring::const_iterator>(s.begin(),
|
||||
s.end(), slash_regex, -1);
|
||||
auto e = std::regex_token_iterator<sstring::const_iterator>();
|
||||
auto n = std::distance(i, e);
|
||||
|
||||
if (n > 3 || ROOT_NAME != sstring(*i++)) {
|
||||
throw std::invalid_argument(sprint("%s is not a valid data resource name", s));
|
||||
}
|
||||
|
||||
if (n == 1) {
|
||||
return data_resource();
|
||||
}
|
||||
auto ks = *i++;
|
||||
if (n == 2) {
|
||||
return data_resource(ks.str());
|
||||
}
|
||||
auto cf = *i++;
|
||||
return data_resource(ks.str(), cf.str());
|
||||
}
|
||||
|
||||
sstring auth::data_resource::name() const {
|
||||
switch (get_level()) {
|
||||
case level::ROOT:
|
||||
return ROOT_NAME;
|
||||
case level::KEYSPACE:
|
||||
return sprint("%s/%s", ROOT_NAME, _ks);
|
||||
case level::COLUMN_FAMILY:
|
||||
default:
|
||||
return sprint("%s/%s/%s", ROOT_NAME, _ks, _cf);
|
||||
}
|
||||
}
|
||||
|
||||
auth::data_resource auth::data_resource::get_parent() const {
|
||||
switch (get_level()) {
|
||||
case level::KEYSPACE:
|
||||
return data_resource();
|
||||
case level::COLUMN_FAMILY:
|
||||
return data_resource(_ks);
|
||||
default:
|
||||
throw std::invalid_argument("Root-level resource can't have a parent");
|
||||
}
|
||||
}
|
||||
|
||||
const sstring& auth::data_resource::keyspace() const
|
||||
throw (std::invalid_argument) {
|
||||
if (is_root_level()) {
|
||||
throw std::invalid_argument("ROOT data resource has no keyspace");
|
||||
}
|
||||
return _ks;
|
||||
}
|
||||
|
||||
const sstring& auth::data_resource::column_family() const
|
||||
throw (std::invalid_argument) {
|
||||
if (!is_column_family_level()) {
|
||||
throw std::invalid_argument(sprint("%s data resource has no column family", name()));
|
||||
}
|
||||
return _cf;
|
||||
}
|
||||
|
||||
bool auth::data_resource::has_parent() const {
|
||||
return !is_root_level();
|
||||
}
|
||||
|
||||
bool auth::data_resource::exists() const {
|
||||
switch (get_level()) {
|
||||
case level::ROOT:
|
||||
return true;
|
||||
case level::KEYSPACE:
|
||||
return service::get_local_storage_proxy().get_db().local().has_keyspace(_ks);
|
||||
case level::COLUMN_FAMILY:
|
||||
default:
|
||||
return service::get_local_storage_proxy().get_db().local().has_schema(_ks, _cf);
|
||||
}
|
||||
}
|
||||
|
||||
sstring auth::data_resource::to_string() const {
|
||||
return name();
|
||||
}
|
||||
|
||||
bool auth::data_resource::operator==(const data_resource& v) const {
|
||||
return _ks == v._ks && _cf == v._cf;
|
||||
}
|
||||
|
||||
bool auth::data_resource::operator<(const data_resource& v) const {
|
||||
return _ks < v._ks ? true : (v._ks < _ks ? false : _cf < v._cf);
|
||||
}
|
||||
|
||||
std::ostream& auth::operator<<(std::ostream& os, const data_resource& r) {
|
||||
return os << r.name();
|
||||
}
|
||||
|
||||
@@ -1,146 +0,0 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright 2016 Cloudius Systems
|
||||
*
|
||||
* Modified by Cloudius Systems
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file is part of Scylla.
|
||||
*
|
||||
* Scylla is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Scylla is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <iosfwd>
|
||||
#include <seastar/core/sstring.hh>
|
||||
|
||||
namespace auth {
|
||||
|
||||
class data_resource {
|
||||
private:
|
||||
enum class level {
|
||||
ROOT, KEYSPACE, COLUMN_FAMILY
|
||||
};
|
||||
|
||||
static const sstring ROOT_NAME;
|
||||
|
||||
sstring _ks;
|
||||
sstring _cf;
|
||||
|
||||
data_resource(level, const sstring& ks = {}, const sstring& cf = {});
|
||||
|
||||
level get_level() const;
|
||||
public:
|
||||
/**
|
||||
* Creates a DataResource representing the root-level resource.
|
||||
* @return the root-level resource.
|
||||
*/
|
||||
data_resource();
|
||||
/**
|
||||
* Creates a DataResource representing a keyspace.
|
||||
*
|
||||
* @param keyspace Name of the keyspace.
|
||||
*/
|
||||
data_resource(const sstring& ks);
|
||||
/**
|
||||
* Creates a DataResource instance representing a column family.
|
||||
*
|
||||
* @param keyspace Name of the keyspace.
|
||||
* @param columnFamily Name of the column family.
|
||||
*/
|
||||
data_resource(const sstring& ks, const sstring& cf);
|
||||
|
||||
/**
|
||||
* Parses a data resource name into a DataResource instance.
|
||||
*
|
||||
* @param name Name of the data resource.
|
||||
* @return DataResource instance matching the name.
|
||||
*/
|
||||
static data_resource from_name(const sstring&);
|
||||
|
||||
/**
|
||||
* @return Printable name of the resource.
|
||||
*/
|
||||
sstring name() const;
|
||||
|
||||
/**
|
||||
* @return Parent of the resource, if any. Throws IllegalStateException if it's the root-level resource.
|
||||
*/
|
||||
data_resource get_parent() const;
|
||||
|
||||
bool is_root_level() const {
|
||||
return get_level() == level::ROOT;
|
||||
}
|
||||
|
||||
bool is_keyspace_level() const {
|
||||
return get_level() == level::KEYSPACE;
|
||||
}
|
||||
|
||||
bool is_column_family_level() const {
|
||||
return get_level() == level::COLUMN_FAMILY;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return keyspace of the resource.
|
||||
* @throws std::invalid_argument if it's the root-level resource.
|
||||
*/
|
||||
const sstring& keyspace() const throw(std::invalid_argument);
|
||||
|
||||
/**
|
||||
* @return column family of the resource.
|
||||
* @throws std::invalid_argument if it's not a cf-level resource.
|
||||
*/
|
||||
const sstring& column_family() const throw(std::invalid_argument);
|
||||
|
||||
/**
|
||||
* @return Whether or not the resource has a parent in the hierarchy.
|
||||
*/
|
||||
bool has_parent() const;
|
||||
|
||||
/**
|
||||
* @return Whether or not the resource exists in scylla.
|
||||
*/
|
||||
bool exists() const;
|
||||
|
||||
sstring to_string() const;
|
||||
|
||||
bool operator==(const data_resource&) const;
|
||||
bool operator<(const data_resource&) const;
|
||||
};
|
||||
|
||||
std::ostream& operator<<(std::ostream&, const data_resource&);
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -1,357 +0,0 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright 2016 Cloudius Systems
|
||||
*
|
||||
* Modified by Cloudius Systems
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file is part of Scylla.
|
||||
*
|
||||
* Scylla is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Scylla is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <unistd.h>
|
||||
#include <crypt.h>
|
||||
#include <random>
|
||||
#include <chrono>
|
||||
|
||||
#include <seastar/core/reactor.hh>
|
||||
|
||||
#include "auth.hh"
|
||||
#include "password_authenticator.hh"
|
||||
#include "authenticated_user.hh"
|
||||
#include "cql3/query_processor.hh"
|
||||
#include "log.hh"
|
||||
|
||||
const sstring auth::password_authenticator::PASSWORD_AUTHENTICATOR_NAME("org.apache.cassandra.auth.PasswordAuthenticator");
|
||||
|
||||
// name of the hash column.
|
||||
static const sstring SALTED_HASH = "salted_hash";
|
||||
static const sstring USER_NAME = "username";
|
||||
static const sstring DEFAULT_USER_NAME = auth::auth::DEFAULT_SUPERUSER_NAME;
|
||||
static const sstring DEFAULT_USER_PASSWORD = auth::auth::DEFAULT_SUPERUSER_NAME;
|
||||
static const sstring CREDENTIALS_CF = "credentials";
|
||||
|
||||
static logging::logger logger("password_authenticator");
|
||||
|
||||
auth::password_authenticator::~password_authenticator()
|
||||
{}
|
||||
|
||||
auth::password_authenticator::password_authenticator()
|
||||
{}
|
||||
|
||||
// TODO: blowfish
|
||||
// Origin uses Java bcrypt library, i.e. blowfish salt
|
||||
// generation and hashing, which is arguably a "better"
|
||||
// password hash than sha/md5 versions usually available in
|
||||
// crypt_r. Otoh, glibc 2.7+ uses a modified sha512 algo
|
||||
// which should be the same order of safe, so the only
|
||||
// real issue should be salted hash compatibility with
|
||||
// origin if importing system tables from there.
|
||||
//
|
||||
// Since bcrypt/blowfish is _not_ (afaict) not available
|
||||
// as a dev package/lib on most linux distros, we'd have to
|
||||
// copy and compile for example OWL crypto
|
||||
// (http://cvsweb.openwall.com/cgi/cvsweb.cgi/Owl/packages/glibc/crypt_blowfish/)
|
||||
// to be fully bit-compatible.
|
||||
//
|
||||
// Until we decide this is needed, let's just use crypt_r,
|
||||
// and some old-fashioned random salt generation.
|
||||
|
||||
static constexpr size_t rand_bytes = 16;
|
||||
|
||||
static sstring hashpw(const sstring& pass, const sstring& salt) {
|
||||
// crypt_data is huge. should this be a thread_local static?
|
||||
auto tmp = std::make_unique<crypt_data>();
|
||||
tmp->initialized = 0;
|
||||
auto res = crypt_r(pass.c_str(), salt.c_str(), tmp.get());
|
||||
if (res == nullptr) {
|
||||
throw std::system_error(errno, std::system_category());
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
static bool checkpw(const sstring& pass, const sstring& salted_hash) {
|
||||
auto tmp = hashpw(pass, salted_hash);
|
||||
return tmp == salted_hash;
|
||||
}
|
||||
|
||||
static sstring gensalt() {
|
||||
static sstring prefix;
|
||||
|
||||
std::random_device rd;
|
||||
std::default_random_engine e1(rd());
|
||||
std::uniform_int_distribution<char> dist;
|
||||
|
||||
sstring valid_salt = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789./";
|
||||
sstring input(rand_bytes, 0);
|
||||
|
||||
for (char&c : input) {
|
||||
c = valid_salt[dist(e1) % valid_salt.size()];
|
||||
}
|
||||
|
||||
sstring salt;
|
||||
|
||||
if (!prefix.empty()) {
|
||||
return prefix + salt;
|
||||
}
|
||||
|
||||
auto tmp = std::make_unique<crypt_data>();
|
||||
tmp->initialized = 0;
|
||||
|
||||
// Try in order:
|
||||
// blowfish 2011 fix, blowfish, sha512, sha256, md5
|
||||
for (sstring pfx : { "$2y$", "$2a$", "$6$", "$5$", "$1$" }) {
|
||||
salt = pfx + input;
|
||||
if (crypt_r("fisk", salt.c_str(), tmp.get())) {
|
||||
prefix = pfx;
|
||||
return salt;
|
||||
}
|
||||
}
|
||||
throw std::runtime_error("Could not initialize hashing algorithm");
|
||||
}
|
||||
|
||||
static sstring hashpw(const sstring& pass) {
|
||||
return hashpw(pass, gensalt());
|
||||
}
|
||||
|
||||
future<> auth::password_authenticator::init() {
|
||||
gensalt(); // do this once to determine usable hashing
|
||||
|
||||
sstring create_table = sprint(
|
||||
"CREATE TABLE %s.%s ("
|
||||
"%s text,"
|
||||
"%s text," // salt + hash + number of rounds
|
||||
"options map<text,text>,"// for future extensions
|
||||
"PRIMARY KEY(%s)"
|
||||
") WITH gc_grace_seconds=%d",
|
||||
auth::auth::AUTH_KS,
|
||||
CREDENTIALS_CF, USER_NAME, SALTED_HASH, USER_NAME,
|
||||
90 * 24 * 60 * 60); // 3 months.
|
||||
|
||||
return auth::setup_table(CREDENTIALS_CF, create_table).then([this] {
|
||||
// instead of once-timer, just schedule this later
|
||||
auth::schedule_when_up([] {
|
||||
return auth::has_existing_users(CREDENTIALS_CF, DEFAULT_USER_NAME, USER_NAME).then([](bool exists) {
|
||||
if (!exists) {
|
||||
cql3::get_local_query_processor().process(sprint("INSERT INTO %s.%s (%s, %s) VALUES (?, ?) USING TIMESTAMP 0",
|
||||
auth::AUTH_KS,
|
||||
CREDENTIALS_CF,
|
||||
USER_NAME, SALTED_HASH
|
||||
),
|
||||
db::consistency_level::ONE, {DEFAULT_USER_NAME, hashpw(DEFAULT_USER_PASSWORD)}).then([](auto) {
|
||||
logger.info("Created default user '{}'", DEFAULT_USER_NAME);
|
||||
});
|
||||
}
|
||||
});
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
db::consistency_level auth::password_authenticator::consistency_for_user(const sstring& username) {
|
||||
if (username == DEFAULT_USER_NAME) {
|
||||
return db::consistency_level::QUORUM;
|
||||
}
|
||||
return db::consistency_level::LOCAL_ONE;
|
||||
}
|
||||
|
||||
const sstring& auth::password_authenticator::class_name() const {
|
||||
return PASSWORD_AUTHENTICATOR_NAME;
|
||||
}
|
||||
|
||||
bool auth::password_authenticator::require_authentication() const {
|
||||
return true;
|
||||
}
|
||||
|
||||
auth::authenticator::option_set auth::password_authenticator::supported_options() const {
|
||||
return option_set::of<option::PASSWORD>();
|
||||
}
|
||||
|
||||
auth::authenticator::option_set auth::password_authenticator::alterable_options() const {
|
||||
return option_set::of<option::PASSWORD>();
|
||||
}
|
||||
|
||||
future<::shared_ptr<auth::authenticated_user> > auth::password_authenticator::authenticate(
|
||||
const credentials_map& credentials) const
|
||||
throw (exceptions::authentication_exception) {
|
||||
if (!credentials.count(USERNAME_KEY)) {
|
||||
throw exceptions::authentication_exception(sprint("Required key '%s' is missing", USERNAME_KEY));
|
||||
}
|
||||
if (!credentials.count(PASSWORD_KEY)) {
|
||||
throw exceptions::authentication_exception(sprint("Required key '%s' is missing", PASSWORD_KEY));
|
||||
}
|
||||
|
||||
auto& username = credentials.at(USERNAME_KEY);
|
||||
auto& password = credentials.at(PASSWORD_KEY);
|
||||
|
||||
// Here was a thread local, explicit cache of prepared statement. In normal execution this is
|
||||
// fine, but since we in testing set up and tear down system over and over, we'd start using
|
||||
// obsolete prepared statements pretty quickly.
|
||||
// Rely on query processing caching statements instead, and lets assume
|
||||
// that a map lookup string->statement is not gonna kill us much.
|
||||
auto& qp = cql3::get_local_query_processor();
|
||||
return qp.process(
|
||||
sprint("SELECT %s FROM %s.%s WHERE %s = ?", SALTED_HASH,
|
||||
auth::AUTH_KS, CREDENTIALS_CF, USER_NAME),
|
||||
consistency_for_user(username), { username }, true).then_wrapped(
|
||||
[=](future<::shared_ptr<cql3::untyped_result_set>> f) {
|
||||
try {
|
||||
auto res = f.get0();
|
||||
if (res->empty() || !checkpw(password, res->one().get_as<sstring>(SALTED_HASH))) {
|
||||
throw exceptions::authentication_exception("Username and/or password are incorrect");
|
||||
}
|
||||
return make_ready_future<::shared_ptr<authenticated_user>>(::make_shared<authenticated_user>(username));
|
||||
} catch (std::system_error &) {
|
||||
std::throw_with_nested(exceptions::authentication_exception("Could not verify password"));
|
||||
} catch (exceptions::request_execution_exception& e) {
|
||||
std::throw_with_nested(exceptions::authentication_exception(e.what()));
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
future<> auth::password_authenticator::create(sstring username,
|
||||
const option_map& options)
|
||||
throw (exceptions::request_validation_exception,
|
||||
exceptions::request_execution_exception) {
|
||||
try {
|
||||
auto password = boost::any_cast<sstring>(options.at(option::PASSWORD));
|
||||
auto query = sprint("INSERT INTO %s.%s (%s, %s) VALUES (?, ?)",
|
||||
auth::AUTH_KS, CREDENTIALS_CF, USER_NAME, SALTED_HASH);
|
||||
auto& qp = cql3::get_local_query_processor();
|
||||
return qp.process(query, consistency_for_user(username), { username, hashpw(password) }).discard_result();
|
||||
} catch (std::out_of_range&) {
|
||||
throw exceptions::invalid_request_exception("PasswordAuthenticator requires PASSWORD option");
|
||||
}
|
||||
}
|
||||
|
||||
future<> auth::password_authenticator::alter(sstring username,
|
||||
const option_map& options)
|
||||
throw (exceptions::request_validation_exception,
|
||||
exceptions::request_execution_exception) {
|
||||
try {
|
||||
auto password = boost::any_cast<sstring>(options.at(option::PASSWORD));
|
||||
auto query = sprint("UPDATE %s.%s SET %s = ? WHERE %s = ?",
|
||||
auth::AUTH_KS, CREDENTIALS_CF, SALTED_HASH, USER_NAME);
|
||||
auto& qp = cql3::get_local_query_processor();
|
||||
return qp.process(query, consistency_for_user(username), { hashpw(password), username }).discard_result();
|
||||
} catch (std::out_of_range&) {
|
||||
throw exceptions::invalid_request_exception("PasswordAuthenticator requires PASSWORD option");
|
||||
}
|
||||
}
|
||||
|
||||
future<> auth::password_authenticator::drop(sstring username)
|
||||
throw (exceptions::request_validation_exception,
|
||||
exceptions::request_execution_exception) {
|
||||
try {
|
||||
auto query = sprint("DELETE FROM %s.%s WHERE %s = ?",
|
||||
auth::AUTH_KS, CREDENTIALS_CF, USER_NAME);
|
||||
auto& qp = cql3::get_local_query_processor();
|
||||
return qp.process(query, consistency_for_user(username), { username }).discard_result();
|
||||
} catch (std::out_of_range&) {
|
||||
throw exceptions::invalid_request_exception("PasswordAuthenticator requires PASSWORD option");
|
||||
}
|
||||
}
|
||||
|
||||
auth::authenticator::resource_ids auth::password_authenticator::protected_resources() const {
|
||||
return { data_resource(auth::AUTH_KS, CREDENTIALS_CF) };
|
||||
}
|
||||
|
||||
::shared_ptr<auth::authenticator::sasl_challenge> auth::password_authenticator::new_sasl_challenge() const {
|
||||
class plain_text_password_challenge: public sasl_challenge {
|
||||
public:
|
||||
plain_text_password_challenge(const password_authenticator& a)
|
||||
: _authenticator(a)
|
||||
{}
|
||||
|
||||
/**
|
||||
* SASL PLAIN mechanism specifies that credentials are encoded in a
|
||||
* sequence of UTF-8 bytes, delimited by 0 (US-ASCII NUL).
|
||||
* The form is : {code}authzId<NUL>authnId<NUL>password<NUL>{code}
|
||||
* authzId is optional, and in fact we don't care about it here as we'll
|
||||
* set the authzId to match the authnId (that is, there is no concept of
|
||||
* a user being authorized to act on behalf of another).
|
||||
*
|
||||
* @param bytes encoded credentials string sent by the client
|
||||
* @return map containing the username/password pairs in the form an IAuthenticator
|
||||
* would expect
|
||||
* @throws javax.security.sasl.SaslException
|
||||
*/
|
||||
bytes evaluate_response(bytes_view client_response)
|
||||
throw (exceptions::authentication_exception) override {
|
||||
logger.debug("Decoding credentials from client token");
|
||||
|
||||
sstring username, password;
|
||||
|
||||
auto b = client_response.crbegin();
|
||||
auto e = client_response.crend();
|
||||
auto i = b;
|
||||
|
||||
while (i != e) {
|
||||
if (*i == 0) {
|
||||
sstring tmp(i.base(), b.base());
|
||||
if (password.empty()) {
|
||||
password = std::move(tmp);
|
||||
} else if (username.empty()) {
|
||||
username = std::move(tmp);
|
||||
}
|
||||
b = ++i;
|
||||
continue;
|
||||
}
|
||||
++i;
|
||||
}
|
||||
|
||||
if (username.empty()) {
|
||||
throw exceptions::authentication_exception("Authentication ID must not be null");
|
||||
}
|
||||
if (password.empty()) {
|
||||
throw exceptions::authentication_exception("Password must not be null");
|
||||
}
|
||||
|
||||
_credentials[USERNAME_KEY] = std::move(username);
|
||||
_credentials[PASSWORD_KEY] = std::move(password);
|
||||
_complete = true;
|
||||
return {};
|
||||
}
|
||||
bool is_complete() const override {
|
||||
return _complete;
|
||||
}
|
||||
future<::shared_ptr<authenticated_user>> get_authenticated_user() const
|
||||
throw (exceptions::authentication_exception) override {
|
||||
return _authenticator.authenticate(_credentials);
|
||||
}
|
||||
private:
|
||||
const password_authenticator& _authenticator;
|
||||
credentials_map _credentials;
|
||||
bool _complete = false;
|
||||
};
|
||||
return ::make_shared<plain_text_password_challenge>(*this);
|
||||
}
|
||||
@@ -1,49 +0,0 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright 2016 Cloudius Systems
|
||||
*
|
||||
* Modified by Cloudius Systems
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file is part of Scylla.
|
||||
*
|
||||
* Scylla is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Scylla is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "permission.hh"
|
||||
|
||||
const auth::permission_set auth::ALL_DATA = auth::permission_set::of
|
||||
< auth::permission::CREATE, auth::permission::ALTER,
|
||||
auth::permission::DROP, auth::permission::SELECT,
|
||||
auth::permission::MODIFY, auth::permission::AUTHORIZE>();
|
||||
const auth::permission_set auth::ALL = auth::ALL_DATA;
|
||||
const auth::permission_set auth::NONE;
|
||||
@@ -1,81 +0,0 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright 2016 Cloudius Systems
|
||||
*
|
||||
* Modified by Cloudius Systems
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file is part of Scylla.
|
||||
*
|
||||
* Scylla is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Scylla is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "enum_set.hh"
|
||||
|
||||
namespace auth {
|
||||
|
||||
enum class permission {
|
||||
//Deprecated
|
||||
READ,
|
||||
//Deprecated
|
||||
WRITE,
|
||||
|
||||
// schema management
|
||||
CREATE, // required for CREATE KEYSPACE and CREATE TABLE.
|
||||
ALTER, // required for ALTER KEYSPACE, ALTER TABLE, CREATE INDEX, DROP INDEX.
|
||||
DROP, // required for DROP KEYSPACE and DROP TABLE.
|
||||
|
||||
// data access
|
||||
SELECT, // required for SELECT.
|
||||
MODIFY, // required for INSERT, UPDATE, DELETE, TRUNCATE.
|
||||
|
||||
// permission management
|
||||
AUTHORIZE, // required for GRANT and REVOKE.
|
||||
};
|
||||
|
||||
typedef enum_set<super_enum<permission,
|
||||
permission::READ,
|
||||
permission::WRITE,
|
||||
permission::CREATE,
|
||||
permission::ALTER,
|
||||
permission::DROP,
|
||||
permission::SELECT,
|
||||
permission::MODIFY,
|
||||
permission::AUTHORIZE>> permission_set;
|
||||
|
||||
extern const permission_set ALL_DATA;
|
||||
extern const permission_set ALL;
|
||||
extern const permission_set NONE;
|
||||
|
||||
}
|
||||
18
bytes.hh
18
bytes.hh
@@ -22,7 +22,6 @@
|
||||
#pragma once
|
||||
|
||||
#include "core/sstring.hh"
|
||||
#include "hashing.hh"
|
||||
#include <experimental/optional>
|
||||
#include <iosfwd>
|
||||
#include <functional>
|
||||
@@ -58,20 +57,3 @@ std::ostream& operator<<(std::ostream& os, const bytes_view& b);
|
||||
|
||||
}
|
||||
|
||||
template<>
|
||||
struct appending_hash<bytes> {
|
||||
template<typename Hasher>
|
||||
void operator()(Hasher& h, const bytes& v) const {
|
||||
feed_hash(h, v.size());
|
||||
h.update(reinterpret_cast<const char*>(v.cbegin()), v.size() * sizeof(bytes::value_type));
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct appending_hash<bytes_view> {
|
||||
template<typename Hasher>
|
||||
void operator()(Hasher& h, bytes_view v) const {
|
||||
feed_hash(h, v.size());
|
||||
h.update(reinterpret_cast<const char*>(v.begin()), v.size() * sizeof(bytes_view::value_type));
|
||||
}
|
||||
};
|
||||
|
||||
@@ -24,7 +24,6 @@
|
||||
#include "types.hh"
|
||||
#include "net/byteorder.hh"
|
||||
#include "core/unaligned.hh"
|
||||
#include "hashing.hh"
|
||||
|
||||
/**
|
||||
* Utility for writing data into a buffer when its final size is not known up front.
|
||||
@@ -42,14 +41,6 @@ private:
|
||||
struct chunk {
|
||||
// FIXME: group fragment pointers to reduce pointer chasing when packetizing
|
||||
std::unique_ptr<chunk> next;
|
||||
~chunk() {
|
||||
auto p = std::move(next);
|
||||
while (p) {
|
||||
// Avoid recursion when freeing chunks
|
||||
auto p_next = std::move(p->next);
|
||||
p = std::move(p_next);
|
||||
}
|
||||
}
|
||||
size_type offset; // Also means "size" after chunk is closed
|
||||
size_type size;
|
||||
value_type data[0];
|
||||
@@ -214,10 +205,6 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
void write(const char* ptr, size_t size) {
|
||||
write(bytes_view(reinterpret_cast<const signed char*>(ptr), size));
|
||||
}
|
||||
|
||||
// Writes given sequence of bytes with a preceding length component encoded in big-endian format
|
||||
inline void write_blob(bytes_view v) {
|
||||
assert((size_type)v.size() == v.size());
|
||||
@@ -345,13 +332,3 @@ public:
|
||||
_current->offset = pos._offset;
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct appending_hash<bytes_ostream> {
|
||||
template<typename Hasher>
|
||||
void operator()(Hasher& h, const bytes_ostream& b) const {
|
||||
for (auto&& frag : b.fragments()) {
|
||||
feed_hash(h, frag);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
@@ -1,103 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2015 ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file is part of Scylla.
|
||||
*
|
||||
* Scylla is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Scylla is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "canonical_mutation.hh"
|
||||
#include "mutation.hh"
|
||||
#include "mutation_partition_serializer.hh"
|
||||
#include "converting_mutation_partition_applier.hh"
|
||||
#include "hashing_partition_visitor.hh"
|
||||
|
||||
template class db::serializer<canonical_mutation>;
|
||||
|
||||
//
|
||||
// Representation layout:
|
||||
//
|
||||
// <canonical_mutation> ::= <column_family_id> <table_schema_version> <partition_key> <column-mapping> <partition>
|
||||
//
|
||||
// For <partition> see mutation_partition_serializer.cc
|
||||
// For <column-mapping> see db::serializer<column_mapping>
|
||||
//
|
||||
|
||||
canonical_mutation::canonical_mutation(bytes data)
|
||||
: _data(std::move(data))
|
||||
{ }
|
||||
|
||||
canonical_mutation::canonical_mutation(const mutation& m)
|
||||
: _data([&m] {
|
||||
bytes_ostream out;
|
||||
db::serializer<utils::UUID>(m.column_family_id()).write(out);
|
||||
db::serializer<table_schema_version>(m.schema()->version()).write(out);
|
||||
db::serializer<partition_key_view>(m.key()).write(out);
|
||||
db::serializer<column_mapping>(m.schema()->get_column_mapping()).write(out);
|
||||
mutation_partition_serializer ser(*m.schema(), m.partition());
|
||||
ser.write(out);
|
||||
return to_bytes(out.linearize());
|
||||
}())
|
||||
{ }
|
||||
|
||||
utils::UUID canonical_mutation::column_family_id() const {
|
||||
data_input in(_data);
|
||||
return db::serializer<utils::UUID>::read(in);
|
||||
}
|
||||
|
||||
mutation canonical_mutation::to_mutation(schema_ptr s) const {
|
||||
data_input in(_data);
|
||||
|
||||
auto cf_id = db::serializer<utils::UUID>::read(in);
|
||||
if (s->id() != cf_id) {
|
||||
throw std::runtime_error(sprint("Attempted to deserialize canonical_mutation of table %s with schema of table %s (%s.%s)",
|
||||
cf_id, s->id(), s->ks_name(), s->cf_name()));
|
||||
}
|
||||
|
||||
auto version = db::serializer<table_schema_version>::read(in);
|
||||
auto pk = partition_key(db::serializer<partition_key_view>::read(in));
|
||||
|
||||
mutation m(std::move(pk), std::move(s));
|
||||
|
||||
if (version == m.schema()->version()) {
|
||||
db::serializer<column_mapping>::skip(in);
|
||||
auto partition_view = mutation_partition_serializer::read_as_view(in);
|
||||
m.partition().apply(*m.schema(), partition_view, *m.schema());
|
||||
} else {
|
||||
column_mapping cm = db::serializer<column_mapping>::read(in);
|
||||
converting_mutation_partition_applier v(cm, *m.schema(), m.partition());
|
||||
auto partition_view = mutation_partition_serializer::read_as_view(in);
|
||||
partition_view.accept(cm, v);
|
||||
}
|
||||
return m;
|
||||
}
|
||||
|
||||
template<>
|
||||
db::serializer<canonical_mutation>::serializer(const canonical_mutation& v)
|
||||
: _item(v)
|
||||
, _size(db::serializer<bytes>(v._data).size())
|
||||
{ }
|
||||
|
||||
template<>
|
||||
void
|
||||
db::serializer<canonical_mutation>::write(output& out, const canonical_mutation& v) {
|
||||
db::serializer<bytes>(v._data).write(out);
|
||||
}
|
||||
|
||||
template<>
|
||||
canonical_mutation db::serializer<canonical_mutation>::read(input& in) {
|
||||
return canonical_mutation(db::serializer<bytes>::read(in));
|
||||
}
|
||||
@@ -1,65 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2015 ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file is part of Scylla.
|
||||
*
|
||||
* Scylla is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Scylla is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "bytes.hh"
|
||||
#include "schema.hh"
|
||||
#include "database_fwd.hh"
|
||||
#include "db/serializer.hh"
|
||||
#include "mutation_partition_visitor.hh"
|
||||
#include "mutation_partition_serializer.hh"
|
||||
|
||||
// Immutable mutation form which can be read using any schema version of the same table.
|
||||
// Safe to access from other shards via const&.
|
||||
// Safe to pass serialized across nodes.
|
||||
class canonical_mutation {
|
||||
bytes _data;
|
||||
canonical_mutation(bytes);
|
||||
public:
|
||||
explicit canonical_mutation(const mutation&);
|
||||
|
||||
canonical_mutation(canonical_mutation&&) = default;
|
||||
canonical_mutation(const canonical_mutation&) = default;
|
||||
canonical_mutation& operator=(const canonical_mutation&) = default;
|
||||
canonical_mutation& operator=(canonical_mutation&&) = default;
|
||||
|
||||
// Create a mutation object interpreting this canonical mutation using
|
||||
// given schema.
|
||||
//
|
||||
// Data which is not representable in the target schema is dropped. If this
|
||||
// is not intended, user should sync the schema first.
|
||||
mutation to_mutation(schema_ptr) const;
|
||||
|
||||
utils::UUID column_family_id() const;
|
||||
|
||||
friend class db::serializer<canonical_mutation>;
|
||||
};
|
||||
|
||||
namespace db {
|
||||
|
||||
template<> serializer<canonical_mutation>::serializer(const canonical_mutation&);
|
||||
template<> void serializer<canonical_mutation>::write(output&, const canonical_mutation&);
|
||||
template<> canonical_mutation serializer<canonical_mutation>::read(input&);
|
||||
|
||||
extern template class serializer<canonical_mutation>;
|
||||
|
||||
}
|
||||
@@ -34,8 +34,6 @@ enum class compaction_strategy_type {
|
||||
};
|
||||
|
||||
class compaction_strategy_impl;
|
||||
class sstable;
|
||||
struct compaction_descriptor;
|
||||
|
||||
class compaction_strategy {
|
||||
::shared_ptr<compaction_strategy_impl> _compaction_strategy_impl;
|
||||
@@ -48,9 +46,7 @@ public:
|
||||
compaction_strategy(compaction_strategy&&);
|
||||
compaction_strategy& operator=(compaction_strategy&&);
|
||||
|
||||
// Return a list of sstables to be compacted after applying the strategy.
|
||||
compaction_descriptor get_sstables_for_compaction(column_family& cfs, std::vector<lw_shared_ptr<sstable>> candidates);
|
||||
|
||||
future<> compact(column_family& cfs);
|
||||
static sstring name(compaction_strategy_type type) {
|
||||
switch (type) {
|
||||
case compaction_strategy_type::null:
|
||||
|
||||
153
compound.hh
153
compound.hh
@@ -26,10 +26,29 @@
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
#include <boost/range/iterator_range.hpp>
|
||||
#include <boost/range/adaptor/transformed.hpp>
|
||||
#include "utils/serialization.hh"
|
||||
#include "unimplemented.hh"
|
||||
|
||||
// value_traits is meant to abstract away whether we are working on 'bytes'
|
||||
// elements or 'bytes_opt' elements. We don't support optional values, but
|
||||
// there are some generic layers which use this code which provide us with
|
||||
// data in that format. In order to avoid allocation and rewriting that data
|
||||
// into a new vector just to throw it away soon after that, we accept that
|
||||
// format too.
|
||||
|
||||
template <typename T>
|
||||
struct value_traits {
|
||||
static const T& unwrap(const T& t) { return t; }
|
||||
};
|
||||
|
||||
template<>
|
||||
struct value_traits<bytes_opt> {
|
||||
static const bytes& unwrap(const bytes_opt& t) {
|
||||
assert(t);
|
||||
return *t;
|
||||
}
|
||||
};
|
||||
|
||||
enum class allow_prefixes { no, yes };
|
||||
|
||||
template<allow_prefixes AllowPrefixes = allow_prefixes::no>
|
||||
@@ -49,7 +68,7 @@ public:
|
||||
, _byte_order_equal(std::all_of(_types.begin(), _types.end(), [] (auto t) {
|
||||
return t->is_byte_order_equal();
|
||||
}))
|
||||
, _byte_order_comparable(false)
|
||||
, _byte_order_comparable(!is_prefixable && _types.size() == 1 && _types[0]->is_byte_order_comparable())
|
||||
, _is_reversed(_types.size() == 1 && _types[0]->is_reversed())
|
||||
{ }
|
||||
|
||||
@@ -69,47 +88,76 @@ public:
|
||||
|
||||
/*
|
||||
* Format:
|
||||
* <len(value1)><value1><len(value2)><value2>...<len(value_n)><value_n>
|
||||
* <len(value1)><value1><len(value2)><value2>...<len(value_n-1)><value_n-1>(len(value_n))?<value_n>
|
||||
*
|
||||
* For non-prefixable compounds, the value corresponding to the last component of types doesn't
|
||||
* have its length encoded, its length is deduced from the input range.
|
||||
*
|
||||
* serialize_value() and serialize_optionals() for single element rely on the fact that for a single-element
|
||||
* compounds their serialized form is equal to the serialized form of the component.
|
||||
*/
|
||||
template<typename RangeOfSerializedComponents>
|
||||
static void serialize_value(RangeOfSerializedComponents&& values, bytes::iterator& out) {
|
||||
for (auto&& val : values) {
|
||||
template<typename Wrapped>
|
||||
void serialize_value(const std::vector<Wrapped>& values, bytes::iterator& out) {
|
||||
if (AllowPrefixes == allow_prefixes::yes) {
|
||||
assert(values.size() <= _types.size());
|
||||
} else {
|
||||
assert(values.size() == _types.size());
|
||||
}
|
||||
|
||||
size_t n_left = _types.size();
|
||||
for (auto&& wrapped : values) {
|
||||
auto&& val = value_traits<Wrapped>::unwrap(wrapped);
|
||||
assert(val.size() <= std::numeric_limits<uint16_t>::max());
|
||||
write<uint16_t>(out, uint16_t(val.size()));
|
||||
if (--n_left || AllowPrefixes == allow_prefixes::yes) {
|
||||
write<uint16_t>(out, uint16_t(val.size()));
|
||||
}
|
||||
out = std::copy(val.begin(), val.end(), out);
|
||||
}
|
||||
}
|
||||
template <typename RangeOfSerializedComponents>
|
||||
static size_t serialized_size(RangeOfSerializedComponents&& values) {
|
||||
template <typename Wrapped>
|
||||
size_t serialized_size(const std::vector<Wrapped>& values) {
|
||||
size_t len = 0;
|
||||
for (auto&& val : values) {
|
||||
size_t n_left = _types.size();
|
||||
for (auto&& wrapped : values) {
|
||||
auto&& val = value_traits<Wrapped>::unwrap(wrapped);
|
||||
assert(val.size() <= std::numeric_limits<uint16_t>::max());
|
||||
len += sizeof(uint16_t) + val.size();
|
||||
if (--n_left || AllowPrefixes == allow_prefixes::yes) {
|
||||
len += sizeof(uint16_t);
|
||||
}
|
||||
len += val.size();
|
||||
}
|
||||
return len;
|
||||
}
|
||||
bytes serialize_single(bytes&& v) {
|
||||
return serialize_value({std::move(v)});
|
||||
if (AllowPrefixes == allow_prefixes::no) {
|
||||
assert(_types.size() == 1);
|
||||
return std::move(v);
|
||||
} else {
|
||||
// FIXME: Optimize
|
||||
std::vector<bytes> vec;
|
||||
vec.reserve(1);
|
||||
vec.emplace_back(std::move(v));
|
||||
return ::serialize_value(*this, vec);
|
||||
}
|
||||
}
|
||||
template<typename RangeOfSerializedComponents>
|
||||
static bytes serialize_value(RangeOfSerializedComponents&& values) {
|
||||
bytes b(bytes::initialized_later(), serialized_size(values));
|
||||
auto i = b.begin();
|
||||
serialize_value(values, i);
|
||||
return b;
|
||||
bytes serialize_value(const std::vector<bytes>& values) {
|
||||
return ::serialize_value(*this, values);
|
||||
}
|
||||
template<typename T>
|
||||
static bytes serialize_value(std::initializer_list<T> values) {
|
||||
return serialize_value(boost::make_iterator_range(values.begin(), values.end()));
|
||||
bytes serialize_value(std::vector<bytes>&& values) {
|
||||
if (AllowPrefixes == allow_prefixes::no && _types.size() == 1 && values.size() == 1) {
|
||||
return std::move(values[0]);
|
||||
}
|
||||
return ::serialize_value(*this, values);
|
||||
}
|
||||
bytes serialize_optionals(const std::vector<bytes_opt>& values) {
|
||||
return serialize_value(values | boost::adaptors::transformed([] (const bytes_opt& bo) -> bytes_view {
|
||||
if (!bo) {
|
||||
throw std::logic_error("attempted to create key component from empty optional");
|
||||
}
|
||||
return *bo;
|
||||
}));
|
||||
return ::serialize_value(*this, values);
|
||||
}
|
||||
bytes serialize_optionals(std::vector<bytes_opt>&& values) {
|
||||
if (AllowPrefixes == allow_prefixes::no && _types.size() == 1 && values.size() == 1) {
|
||||
assert(values[0]);
|
||||
return std::move(*values[0]);
|
||||
}
|
||||
return ::serialize_value(*this, values);
|
||||
}
|
||||
bytes serialize_value_deep(const std::vector<data_value>& values) {
|
||||
// TODO: Optimize
|
||||
@@ -123,19 +171,35 @@ public:
|
||||
return serialize_value(partial);
|
||||
}
|
||||
bytes decompose_value(const value_type& values) {
|
||||
return serialize_value(values);
|
||||
return ::serialize_value(*this, values);
|
||||
}
|
||||
class iterator : public std::iterator<std::input_iterator_tag, bytes_view> {
|
||||
private:
|
||||
ssize_t _types_left;
|
||||
bytes_view _v;
|
||||
value_type _current;
|
||||
private:
|
||||
void read_current() {
|
||||
if (_types_left == 0) {
|
||||
if (!_v.empty()) {
|
||||
throw marshal_exception();
|
||||
}
|
||||
_v = bytes_view(nullptr, 0);
|
||||
return;
|
||||
}
|
||||
--_types_left;
|
||||
uint16_t len;
|
||||
{
|
||||
if (_types_left == 0 && AllowPrefixes == allow_prefixes::no) {
|
||||
len = _v.size();
|
||||
} else {
|
||||
if (_v.empty()) {
|
||||
_v = bytes_view(nullptr, 0);
|
||||
return;
|
||||
if (AllowPrefixes == allow_prefixes::yes) {
|
||||
_types_left = 0;
|
||||
_v = bytes_view(nullptr, 0);
|
||||
return;
|
||||
} else {
|
||||
throw marshal_exception();
|
||||
}
|
||||
}
|
||||
len = read_simple<uint16_t>(_v);
|
||||
if (_v.size() < len) {
|
||||
@@ -147,10 +211,10 @@ public:
|
||||
}
|
||||
public:
|
||||
struct end_iterator_tag {};
|
||||
iterator(const bytes_view& v) : _v(v) {
|
||||
iterator(const compound_type& t, const bytes_view& v) : _types_left(t._types.size()), _v(v) {
|
||||
read_current();
|
||||
}
|
||||
iterator(end_iterator_tag, const bytes_view& v) : _v(nullptr, 0) {}
|
||||
iterator(end_iterator_tag, const bytes_view& v) : _types_left(0), _v(nullptr, 0) {}
|
||||
iterator& operator++() {
|
||||
read_current();
|
||||
return *this;
|
||||
@@ -162,18 +226,21 @@ public:
|
||||
}
|
||||
const value_type& operator*() const { return _current; }
|
||||
const value_type* operator->() const { return &_current; }
|
||||
bool operator!=(const iterator& i) const { return _v.begin() != i._v.begin(); }
|
||||
bool operator==(const iterator& i) const { return _v.begin() == i._v.begin(); }
|
||||
bool operator!=(const iterator& i) const { return _v.begin() != i._v.begin() || _types_left != i._types_left; }
|
||||
bool operator==(const iterator& i) const { return _v.begin() == i._v.begin() && _types_left == i._types_left; }
|
||||
};
|
||||
static iterator begin(const bytes_view& v) {
|
||||
return iterator(v);
|
||||
iterator begin(const bytes_view& v) const {
|
||||
return iterator(*this, v);
|
||||
}
|
||||
static iterator end(const bytes_view& v) {
|
||||
iterator end(const bytes_view& v) const {
|
||||
return iterator(typename iterator::end_iterator_tag(), v);
|
||||
}
|
||||
static boost::iterator_range<iterator> components(const bytes_view& v) {
|
||||
boost::iterator_range<iterator> components(const bytes_view& v) const {
|
||||
return { begin(v), end(v) };
|
||||
}
|
||||
auto iter_items(const bytes_view& v) {
|
||||
return boost::iterator_range<iterator>(begin(v), end(v));
|
||||
}
|
||||
value_type deserialize_value(bytes_view v) {
|
||||
std::vector<bytes> result;
|
||||
result.reserve(_types.size());
|
||||
@@ -191,7 +258,7 @@ public:
|
||||
}
|
||||
auto t = _types.begin();
|
||||
size_t h = 0;
|
||||
for (auto&& value : components(v)) {
|
||||
for (auto&& value : iter_items(v)) {
|
||||
h ^= (*t)->hash(value);
|
||||
++t;
|
||||
}
|
||||
@@ -210,6 +277,12 @@ public:
|
||||
return type->compare(v1, v2);
|
||||
});
|
||||
}
|
||||
bytes from_string(sstring_view s) {
|
||||
throw std::runtime_error(sprint("%s not implemented", __PRETTY_FUNCTION__));
|
||||
}
|
||||
sstring to_string(const bytes& b) {
|
||||
throw std::runtime_error(sprint("%s not implemented", __PRETTY_FUNCTION__));
|
||||
}
|
||||
// Retruns true iff given prefix has no missing components
|
||||
bool is_full(bytes_view v) const {
|
||||
assert(AllowPrefixes == allow_prefixes::yes);
|
||||
|
||||
@@ -169,17 +169,6 @@ rpc_address: localhost
|
||||
# port for Thrift to listen for clients on
|
||||
rpc_port: 9160
|
||||
|
||||
# port for REST API server
|
||||
api_port: 10000
|
||||
|
||||
# IP for the REST API server
|
||||
api_address: 127.0.0.1
|
||||
|
||||
# Log WARN on any batch size exceeding this value. 5kb per batch by default.
|
||||
# Caution should be taken on increasing the size of this threshold as it can lead to node instability.
|
||||
batch_size_warn_threshold_in_kb: 5
|
||||
|
||||
|
||||
###################################################
|
||||
## Not currently supported, reserved for future use
|
||||
###################################################
|
||||
@@ -216,7 +205,7 @@ batch_size_warn_threshold_in_kb: 5
|
||||
# reduced proportionally to the number of nodes in the cluster.
|
||||
# batchlog_replay_throttle_in_kb: 1024
|
||||
|
||||
# Authentication backend, identifying users
|
||||
# Authentication backend, implementing IAuthenticator; used to identify users
|
||||
# Out of the box, Scylla provides org.apache.cassandra.auth.{AllowAllAuthenticator,
|
||||
# PasswordAuthenticator}.
|
||||
#
|
||||
@@ -610,6 +599,10 @@ commitlog_total_space_in_mb: -1
|
||||
# column_index_size_in_kb: 64
|
||||
|
||||
|
||||
# Log WARN on any batch size exceeding this value. 5kb per batch by default.
|
||||
# Caution should be taken on increasing the size of this threshold as it can lead to node instability.
|
||||
# batch_size_warn_threshold_in_kb: 5
|
||||
|
||||
# Number of simultaneous compactions to allow, NOT including
|
||||
# validation "compactions" for anti-entropy repair. Simultaneous
|
||||
# compactions can help preserve read performance in a mixed read/write
|
||||
|
||||
138
configure.py
138
configure.py
@@ -25,31 +25,6 @@ from distutils.spawn import find_executable
|
||||
|
||||
configure_args = str.join(' ', [shlex.quote(x) for x in sys.argv[1:]])
|
||||
|
||||
for line in open('/etc/os-release'):
|
||||
key, _, value = line.partition('=')
|
||||
value = value.strip().strip('"')
|
||||
if key == 'ID':
|
||||
os_ids = [value]
|
||||
if key == 'ID_LIKE':
|
||||
os_ids += value.split(' ')
|
||||
|
||||
# distribution "internationalization", converting package names.
|
||||
# Fedora name is key, values is distro -> package name dict.
|
||||
i18n_xlat = {
|
||||
'boost-devel': {
|
||||
'debian': 'libboost-dev',
|
||||
'ubuntu': 'libboost-dev (libboost1.55-dev on 14.04)',
|
||||
},
|
||||
}
|
||||
|
||||
def pkgname(name):
|
||||
if name in i18n_xlat:
|
||||
dict = i18n_xlat[name]
|
||||
for id in os_ids:
|
||||
if id in dict:
|
||||
return dict[id]
|
||||
return name
|
||||
|
||||
def get_flags():
|
||||
with open('/proc/cpuinfo') as f:
|
||||
for line in f:
|
||||
@@ -75,9 +50,6 @@ def apply_tristate(var, test, note, missing):
|
||||
return False
|
||||
return False
|
||||
|
||||
def have_pkg(package):
|
||||
return subprocess.call(['pkg-config', package]) == 0
|
||||
|
||||
def pkg_config(option, package):
|
||||
output = subprocess.check_output(['pkg-config', option, package])
|
||||
return output.decode('utf-8').strip()
|
||||
@@ -162,7 +134,6 @@ modes = {
|
||||
|
||||
scylla_tests = [
|
||||
'tests/mutation_test',
|
||||
'tests/canonical_mutation_test',
|
||||
'tests/range_test',
|
||||
'tests/types_test',
|
||||
'tests/keys_test',
|
||||
@@ -180,7 +151,6 @@ scylla_tests = [
|
||||
'tests/perf/perf_sstable',
|
||||
'tests/cql_query_test',
|
||||
'tests/storage_proxy_test',
|
||||
'tests/schema_change_test',
|
||||
'tests/mutation_reader_test',
|
||||
'tests/key_reader_test',
|
||||
'tests/mutation_query_test',
|
||||
@@ -214,7 +184,6 @@ scylla_tests = [
|
||||
'tests/crc_test',
|
||||
'tests/flush_queue_test',
|
||||
'tests/dynamic_bitset_test',
|
||||
'tests/auth_test',
|
||||
]
|
||||
|
||||
apps = [
|
||||
@@ -253,8 +222,6 @@ arg_parser.add_argument('--static-stdc++', dest = 'staticcxx', action = 'store_t
|
||||
help = 'Link libgcc and libstdc++ statically')
|
||||
arg_parser.add_argument('--tests-debuginfo', action = 'store', dest = 'tests_debuginfo', type = int, default = 0,
|
||||
help = 'Enable(1)/disable(0)compiler debug information generation for tests')
|
||||
arg_parser.add_argument('--python', action = 'store', dest = 'python', default = 'python3',
|
||||
help = 'Python3 path')
|
||||
add_tristate(arg_parser, name = 'hwloc', dest = 'hwloc', help = 'hwloc support')
|
||||
add_tristate(arg_parser, name = 'xen', dest = 'xen', help = 'Xen support')
|
||||
args = arg_parser.parse_args()
|
||||
@@ -268,15 +235,11 @@ cassandra_interface = Thrift(source = 'interface/cassandra.thrift', service = 'C
|
||||
|
||||
scylla_core = (['database.cc',
|
||||
'schema.cc',
|
||||
'frozen_schema.cc',
|
||||
'schema_registry.cc',
|
||||
'bytes.cc',
|
||||
'mutation.cc',
|
||||
'row_cache.cc',
|
||||
'canonical_mutation.cc',
|
||||
'frozen_mutation.cc',
|
||||
'memtable.cc',
|
||||
'schema_mutations.cc',
|
||||
'release.cc',
|
||||
'utils/logalloc.cc',
|
||||
'utils/large_bitset.cc',
|
||||
@@ -294,7 +257,6 @@ scylla_core = (['database.cc',
|
||||
'sstables/partition.cc',
|
||||
'sstables/filter.cc',
|
||||
'sstables/compaction.cc',
|
||||
'sstables/compaction_manager.cc',
|
||||
'log.cc',
|
||||
'transport/event.cc',
|
||||
'transport/event_notifier.cc',
|
||||
@@ -330,7 +292,6 @@ scylla_core = (['database.cc',
|
||||
'cql3/statements/index_target.cc',
|
||||
'cql3/statements/create_index_statement.cc',
|
||||
'cql3/statements/truncate_statement.cc',
|
||||
'cql3/statements/alter_table_statement.cc',
|
||||
'cql3/update_parameters.cc',
|
||||
'cql3/ut_name.cc',
|
||||
'thrift/handler.cc',
|
||||
@@ -342,7 +303,6 @@ scylla_core = (['database.cc',
|
||||
'utils/big_decimal.cc',
|
||||
'types.cc',
|
||||
'validation.cc',
|
||||
'service/priority_manager.cc',
|
||||
'service/migration_manager.cc',
|
||||
'service/storage_proxy.cc',
|
||||
'cql3/operator.cc',
|
||||
@@ -380,6 +340,7 @@ scylla_core = (['database.cc',
|
||||
'utils/bloom_filter.cc',
|
||||
'utils/bloom_calculations.cc',
|
||||
'utils/rate_limiter.cc',
|
||||
'utils/compaction_manager.cc',
|
||||
'utils/file_lock.cc',
|
||||
'utils/dynamic_bitset.cc',
|
||||
'gms/version_generator.cc',
|
||||
@@ -413,12 +374,13 @@ scylla_core = (['database.cc',
|
||||
'locator/ec2_snitch.cc',
|
||||
'locator/ec2_multi_region_snitch.cc',
|
||||
'message/messaging_service.cc',
|
||||
'service/client_state.cc',
|
||||
'service/migration_task.cc',
|
||||
'service/storage_service.cc',
|
||||
'service/pending_range_calculator_service.cc',
|
||||
'service/load_broadcaster.cc',
|
||||
'service/pager/paging_state.cc',
|
||||
'service/pager/query_pagers.cc',
|
||||
'streaming/streaming.cc',
|
||||
'streaming/stream_task.cc',
|
||||
'streaming/stream_session.cc',
|
||||
'streaming/stream_request.cc',
|
||||
@@ -431,6 +393,13 @@ scylla_core = (['database.cc',
|
||||
'streaming/stream_coordinator.cc',
|
||||
'streaming/stream_manager.cc',
|
||||
'streaming/stream_result_future.cc',
|
||||
'streaming/messages/stream_init_message.cc',
|
||||
'streaming/messages/retry_message.cc',
|
||||
'streaming/messages/received_message.cc',
|
||||
'streaming/messages/prepare_message.cc',
|
||||
'streaming/messages/file_message_header.cc',
|
||||
'streaming/messages/outgoing_file_message.cc',
|
||||
'streaming/messages/incoming_file_message.cc',
|
||||
'streaming/stream_session_state.cc',
|
||||
'gc_clock.cc',
|
||||
'partition_slice_builder.cc',
|
||||
@@ -438,12 +407,6 @@ scylla_core = (['database.cc',
|
||||
'repair/repair.cc',
|
||||
'exceptions/exceptions.cc',
|
||||
'dns.cc',
|
||||
'auth/auth.cc',
|
||||
'auth/authenticated_user.cc',
|
||||
'auth/authenticator.cc',
|
||||
'auth/data_resource.cc',
|
||||
'auth/password_authenticator.cc',
|
||||
'auth/permission.cc',
|
||||
]
|
||||
+ [Antlr3Grammar('cql3/Cql.g')]
|
||||
+ [Thrift('interface/cassandra.thrift', 'Cassandra')]
|
||||
@@ -483,23 +446,7 @@ api = ['api/api.cc',
|
||||
'api/system.cc'
|
||||
]
|
||||
|
||||
idls = ['idl/gossip_digest.idl.hh',
|
||||
'idl/uuid.idl.hh',
|
||||
'idl/range.idl.hh',
|
||||
'idl/keys.idl.hh',
|
||||
'idl/read_command.idl.hh',
|
||||
'idl/token.idl.hh',
|
||||
'idl/ring_position.idl.hh',
|
||||
'idl/result.idl.hh',
|
||||
'idl/frozen_mutation.idl.hh',
|
||||
'idl/reconcilable_result.idl.hh',
|
||||
'idl/streaming.idl.hh',
|
||||
'idl/paging_state.idl.hh',
|
||||
'idl/frozen_schema.idl.hh',
|
||||
'idl/partition_checksum.idl.hh',
|
||||
]
|
||||
|
||||
scylla_tests_dependencies = scylla_core + api + idls + [
|
||||
scylla_tests_dependencies = scylla_core + [
|
||||
'tests/cql_test_env.cc',
|
||||
'tests/cql_assertions.cc',
|
||||
'tests/result_set_assertions.cc',
|
||||
@@ -512,15 +459,15 @@ scylla_tests_seastar_deps = [
|
||||
]
|
||||
|
||||
deps = {
|
||||
'scylla': idls + ['main.cc'] + scylla_core + api,
|
||||
'scylla': ['main.cc'] + scylla_core + api,
|
||||
}
|
||||
|
||||
tests_not_using_seastar_test_framework = set([
|
||||
'tests/types_test',
|
||||
'tests/keys_test',
|
||||
'tests/partitioner_test',
|
||||
'tests/map_difference_test',
|
||||
'tests/frozen_mutation_test',
|
||||
'tests/canonical_mutation_test',
|
||||
'tests/perf/perf_mutation',
|
||||
'tests/lsa_async_eviction_test',
|
||||
'tests/lsa_sync_eviction_test',
|
||||
@@ -582,45 +529,6 @@ else:
|
||||
args.pie = ''
|
||||
args.fpie = ''
|
||||
|
||||
# a list element means a list of alternative packages to consider
|
||||
# the first element becomes the HAVE_pkg define
|
||||
# a string element is a package name with no alternatives
|
||||
optional_packages = [['libsystemd', 'libsystemd-daemon']]
|
||||
pkgs = []
|
||||
|
||||
def setup_first_pkg_of_list(pkglist):
|
||||
# The HAVE_pkg symbol is taken from the first alternative
|
||||
upkg = pkglist[0].upper().replace('-', '_')
|
||||
for pkg in pkglist:
|
||||
if have_pkg(pkg):
|
||||
pkgs.append(pkg)
|
||||
defines.append('HAVE_{}=1'.format(upkg))
|
||||
return True
|
||||
return False
|
||||
|
||||
for pkglist in optional_packages:
|
||||
if isinstance(pkglist, str):
|
||||
pkglist = [pkglist]
|
||||
if not setup_first_pkg_of_list(pkglist):
|
||||
if len(pkglist) == 1:
|
||||
print('Missing optional package {pkglist[0]}'.format(**locals()))
|
||||
else:
|
||||
alternatives = ':'.join(pkglist[1:])
|
||||
print('Missing optional package {pkglist[0]} (or alteratives {alternatives})'.format(**locals()))
|
||||
|
||||
if not try_compile(compiler=args.cxx, source='#include <boost/version.hpp>'):
|
||||
print('Boost not installed. Please install {}.'.format(pkgname("boost-devel")))
|
||||
sys.exit(1)
|
||||
|
||||
if not try_compile(compiler=args.cxx, source='''\
|
||||
#include <boost/version.hpp>
|
||||
#if BOOST_VERSION < 105500
|
||||
#error Boost version too low
|
||||
#endif
|
||||
'''):
|
||||
print('Installed boost version too old. Please update {}.'.format(pkgname("boost-devel")))
|
||||
sys.exit(1)
|
||||
|
||||
defines = ' '.join(['-D' + d for d in defines])
|
||||
|
||||
globals().update(vars(args))
|
||||
@@ -653,7 +561,7 @@ elif args.dpdk_target:
|
||||
seastar_cflags = args.user_cflags + " -march=nehalem"
|
||||
seastar_flags += ['--compiler', args.cxx, '--cflags=%s' % (seastar_cflags)]
|
||||
|
||||
status = subprocess.call([python, './configure.py'] + seastar_flags, cwd = 'seastar')
|
||||
status = subprocess.call(['./configure.py'] + seastar_flags, cwd = 'seastar')
|
||||
|
||||
if status != 0:
|
||||
print('Seastar configuration failed')
|
||||
@@ -682,10 +590,7 @@ for mode in build_modes:
|
||||
seastar_deps = 'practically_anything_can_change_so_lets_run_it_every_time_and_restat.'
|
||||
|
||||
args.user_cflags += " " + pkg_config("--cflags", "jsoncpp")
|
||||
libs = "-lyaml-cpp -llz4 -lz -lsnappy " + pkg_config("--libs", "jsoncpp") + ' -lboost_filesystem' + ' -lcrypt'
|
||||
for pkg in pkgs:
|
||||
args.user_cflags += ' ' + pkg_config('--cflags', pkg)
|
||||
libs += ' ' + pkg_config('--libs', pkg)
|
||||
libs = "-lyaml-cpp -llz4 -lz -lsnappy " + pkg_config("--libs", "jsoncpp") + ' -lboost_filesystem'
|
||||
user_cflags = args.user_cflags
|
||||
user_ldflags = args.user_ldflags
|
||||
if args.staticcxx:
|
||||
@@ -717,9 +622,6 @@ with open(buildfile, 'w') as f:
|
||||
rule swagger
|
||||
command = seastar/json/json2code.py -f $in -o $out
|
||||
description = SWAGGER $out
|
||||
rule serializer
|
||||
command = ./idl-compiler.py --ns ser -f $in -o $out
|
||||
description = IDL compiler $out
|
||||
rule ninja
|
||||
command = {ninja} -C $subdir $target
|
||||
restat = 1
|
||||
@@ -756,7 +658,6 @@ with open(buildfile, 'w') as f:
|
||||
compiles = {}
|
||||
ragels = {}
|
||||
swaggers = {}
|
||||
serializers = {}
|
||||
thrifts = set()
|
||||
antlr3_grammars = set()
|
||||
for binary in build_artifacts:
|
||||
@@ -810,9 +711,6 @@ with open(buildfile, 'w') as f:
|
||||
elif src.endswith('.rl'):
|
||||
hh = '$builddir/' + mode + '/gen/' + src.replace('.rl', '.hh')
|
||||
ragels[hh] = src
|
||||
elif src.endswith('.idl.hh'):
|
||||
hh = '$builddir/' + mode + '/gen/' + src.replace('.idl.hh', '.dist.hh')
|
||||
serializers[hh] = src
|
||||
elif src.endswith('.json'):
|
||||
hh = '$builddir/' + mode + '/gen/' + src + '.hh'
|
||||
swaggers[hh] = src
|
||||
@@ -831,7 +729,6 @@ with open(buildfile, 'w') as f:
|
||||
for g in antlr3_grammars:
|
||||
gen_headers += g.headers('$builddir/{}/gen'.format(mode))
|
||||
gen_headers += list(swaggers.keys())
|
||||
gen_headers += list(serializers.keys())
|
||||
f.write('build {}: cxx.{} {} || {} \n'.format(obj, mode, src, ' '.join(gen_headers)))
|
||||
if src in extra_cxxflags:
|
||||
f.write(' cxxflags = {seastar_cflags} $cxxflags $cxxflags_{mode} {extra_cxxflags}\n'.format(mode = mode, extra_cxxflags = extra_cxxflags[src], **modeval))
|
||||
@@ -841,9 +738,6 @@ with open(buildfile, 'w') as f:
|
||||
for hh in swaggers:
|
||||
src = swaggers[hh]
|
||||
f.write('build {}: swagger {}\n'.format(hh,src))
|
||||
for hh in serializers:
|
||||
src = serializers[hh]
|
||||
f.write('build {}: serializer {} | idl-compiler.py\n'.format(hh,src))
|
||||
for thrift in thrifts:
|
||||
outs = ' '.join(thrift.generated('$builddir/{}/gen'.format(mode)))
|
||||
f.write('build {}: thrift.{} {}\n'.format(outs, mode, thrift.source))
|
||||
@@ -863,7 +757,7 @@ with open(buildfile, 'w') as f:
|
||||
f.write('build {}: phony\n'.format(seastar_deps))
|
||||
f.write(textwrap.dedent('''\
|
||||
rule configure
|
||||
command = {python} configure.py $configure_args
|
||||
command = python3 configure.py $configure_args
|
||||
generator = 1
|
||||
build build.ninja: configure | configure.py
|
||||
rule cscope
|
||||
|
||||
@@ -1,119 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2015 Cloudius Systems, Ltd.
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file is part of Scylla.
|
||||
*
|
||||
* Scylla is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Scylla is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "mutation_partition_view.hh"
|
||||
#include "schema.hh"
|
||||
|
||||
// Mutation partition visitor which applies visited data into
|
||||
// existing mutation_partition. The visited data may be of a different schema.
|
||||
// Data which is not representable in the new schema is dropped.
|
||||
// Weak exception guarantees.
|
||||
class converting_mutation_partition_applier : public mutation_partition_visitor {
|
||||
const schema& _p_schema;
|
||||
mutation_partition& _p;
|
||||
const column_mapping& _visited_column_mapping;
|
||||
deletable_row* _current_row;
|
||||
private:
|
||||
static bool is_compatible(const column_definition& new_def, const data_type& old_type, column_kind kind) {
|
||||
return new_def.kind == kind && new_def.type->is_value_compatible_with(*old_type);
|
||||
}
|
||||
void accept_cell(row& dst, column_kind kind, const column_definition& new_def, const data_type& old_type, atomic_cell_view cell) {
|
||||
if (is_compatible(new_def, old_type, kind) && cell.timestamp() > new_def.dropped_at()) {
|
||||
dst.apply(new_def, atomic_cell_or_collection(cell));
|
||||
}
|
||||
}
|
||||
void accept_cell(row& dst, column_kind kind, const column_definition& new_def, const data_type& old_type, collection_mutation_view cell) {
|
||||
if (!is_compatible(new_def, old_type, kind)) {
|
||||
return;
|
||||
}
|
||||
auto&& ctype = static_pointer_cast<const collection_type_impl>(old_type);
|
||||
auto old_view = ctype->deserialize_mutation_form(cell);
|
||||
|
||||
collection_type_impl::mutation_view new_view;
|
||||
if (old_view.tomb.timestamp > new_def.dropped_at()) {
|
||||
new_view.tomb = old_view.tomb;
|
||||
}
|
||||
for (auto& c : old_view.cells) {
|
||||
if (c.second.timestamp() > new_def.dropped_at()) {
|
||||
new_view.cells.emplace_back(std::move(c));
|
||||
}
|
||||
}
|
||||
dst.apply(new_def, ctype->serialize_mutation_form(std::move(new_view)));
|
||||
}
|
||||
public:
|
||||
converting_mutation_partition_applier(
|
||||
const column_mapping& visited_column_mapping,
|
||||
const schema& target_schema,
|
||||
mutation_partition& target)
|
||||
: _p_schema(target_schema)
|
||||
, _p(target)
|
||||
, _visited_column_mapping(visited_column_mapping)
|
||||
{ }
|
||||
|
||||
virtual void accept_partition_tombstone(tombstone t) override {
|
||||
_p.apply(t);
|
||||
}
|
||||
|
||||
virtual void accept_static_cell(column_id id, atomic_cell_view cell) override {
|
||||
const column_mapping::column& col = _visited_column_mapping.static_column_at(id);
|
||||
const column_definition* def = _p_schema.get_column_definition(col.name());
|
||||
if (def) {
|
||||
accept_cell(_p._static_row, column_kind::static_column, *def, col.type(), cell);
|
||||
}
|
||||
}
|
||||
|
||||
virtual void accept_static_cell(column_id id, collection_mutation_view collection) override {
|
||||
const column_mapping::column& col = _visited_column_mapping.static_column_at(id);
|
||||
const column_definition* def = _p_schema.get_column_definition(col.name());
|
||||
if (def) {
|
||||
accept_cell(_p._static_row, column_kind::static_column, *def, col.type(), collection);
|
||||
}
|
||||
}
|
||||
|
||||
virtual void accept_row_tombstone(clustering_key_prefix_view prefix, tombstone t) override {
|
||||
_p.apply_row_tombstone(_p_schema, prefix, t);
|
||||
}
|
||||
|
||||
virtual void accept_row(clustering_key_view key, tombstone deleted_at, const row_marker& rm) override {
|
||||
deletable_row& r = _p.clustered_row(_p_schema, key);
|
||||
r.apply(rm);
|
||||
r.apply(deleted_at);
|
||||
_current_row = &r;
|
||||
}
|
||||
|
||||
virtual void accept_row_cell(column_id id, atomic_cell_view cell) override {
|
||||
const column_mapping::column& col = _visited_column_mapping.regular_column_at(id);
|
||||
const column_definition* def = _p_schema.get_column_definition(col.name());
|
||||
if (def) {
|
||||
accept_cell(_current_row->cells(), column_kind::regular_column, *def, col.type(), cell);
|
||||
}
|
||||
}
|
||||
|
||||
virtual void accept_row_cell(column_id id, collection_mutation_view collection) override {
|
||||
const column_mapping::column& col = _visited_column_mapping.regular_column_at(id);
|
||||
const column_definition* def = _p_schema.get_column_definition(col.name());
|
||||
if (def) {
|
||||
accept_cell(_current_row->cells(), column_kind::regular_column, *def, col.type(), collection);
|
||||
}
|
||||
}
|
||||
};
|
||||
42
cql3/Cql.g
42
cql3/Cql.g
@@ -31,7 +31,6 @@ options {
|
||||
|
||||
@parser::includes {
|
||||
#include "cql3/selection/writetime_or_ttl.hh"
|
||||
#include "cql3/statements/alter_table_statement.hh"
|
||||
#include "cql3/statements/create_keyspace_statement.hh"
|
||||
#include "cql3/statements/drop_keyspace_statement.hh"
|
||||
#include "cql3/statements/create_index_statement.hh"
|
||||
@@ -270,9 +269,7 @@ cqlStatement returns [shared_ptr<parsed_statement> stmt]
|
||||
| st12=dropTableStatement { $stmt = st12; }
|
||||
#if 0
|
||||
| st13=dropIndexStatement { $stmt = st13; }
|
||||
#endif
|
||||
| st14=alterTableStatement { $stmt = st14; }
|
||||
#if 0
|
||||
| st15=alterKeyspaceStatement { $stmt = st15; }
|
||||
| st16=grantStatement { $stmt = st16; }
|
||||
| st17=revokeStatement { $stmt = st17; }
|
||||
@@ -771,7 +768,7 @@ alterKeyspaceStatement returns [AlterKeyspaceStatement expr]
|
||||
: K_ALTER K_KEYSPACE ks=keyspaceName
|
||||
K_WITH properties[attrs] { $expr = new AlterKeyspaceStatement(ks, attrs); }
|
||||
;
|
||||
#endif
|
||||
|
||||
|
||||
/**
|
||||
* ALTER COLUMN FAMILY <CF> ALTER <column> TYPE <newtype>;
|
||||
@@ -780,29 +777,27 @@ alterKeyspaceStatement returns [AlterKeyspaceStatement expr]
|
||||
* ALTER COLUMN FAMILY <CF> WITH <property> = <value>;
|
||||
* ALTER COLUMN FAMILY <CF> RENAME <column> TO <column>;
|
||||
*/
|
||||
alterTableStatement returns [shared_ptr<alter_table_statement> expr]
|
||||
alterTableStatement returns [AlterTableStatement expr]
|
||||
@init {
|
||||
alter_table_statement::type type;
|
||||
auto props = make_shared<cql3::statements::cf_prop_defs>();;
|
||||
std::vector<std::pair<shared_ptr<cql3::column_identifier::raw>, shared_ptr<cql3::column_identifier::raw>>> renames;
|
||||
bool is_static = false;
|
||||
AlterTableStatement.Type type = null;
|
||||
CFPropDefs props = new CFPropDefs();
|
||||
Map<ColumnIdentifier.Raw, ColumnIdentifier.Raw> renames = new HashMap<ColumnIdentifier.Raw, ColumnIdentifier.Raw>();
|
||||
boolean isStatic = false;
|
||||
}
|
||||
: K_ALTER K_COLUMNFAMILY cf=columnFamilyName
|
||||
( K_ALTER id=cident K_TYPE v=comparatorType { type = alter_table_statement::type::alter; }
|
||||
| K_ADD id=cident v=comparatorType ({ is_static=true; } K_STATIC)? { type = alter_table_statement::type::add; }
|
||||
| K_DROP id=cident { type = alter_table_statement::type::drop; }
|
||||
| K_WITH properties[props] { type = alter_table_statement::type::opts; }
|
||||
| K_RENAME { type = alter_table_statement::type::rename; }
|
||||
id1=cident K_TO toId1=cident { renames.emplace_back(id1, toId1); }
|
||||
( K_AND idn=cident K_TO toIdn=cident { renames.emplace_back(idn, toIdn); } )*
|
||||
( K_ALTER id=cident K_TYPE v=comparatorType { type = AlterTableStatement.Type.ALTER; }
|
||||
| K_ADD id=cident v=comparatorType ({ isStatic=true; } K_STATIC)? { type = AlterTableStatement.Type.ADD; }
|
||||
| K_DROP id=cident { type = AlterTableStatement.Type.DROP; }
|
||||
| K_WITH properties[props] { type = AlterTableStatement.Type.OPTS; }
|
||||
| K_RENAME { type = AlterTableStatement.Type.RENAME; }
|
||||
id1=cident K_TO toId1=cident { renames.put(id1, toId1); }
|
||||
( K_AND idn=cident K_TO toIdn=cident { renames.put(idn, toIdn); } )*
|
||||
)
|
||||
{
|
||||
$expr = ::make_shared<alter_table_statement>(std::move(cf), type, std::move(id),
|
||||
std::move(v), std::move(props), std::move(renames), is_static);
|
||||
$expr = new AlterTableStatement(cf, type, id, v, props, renames, isStatic);
|
||||
}
|
||||
;
|
||||
|
||||
#if 0
|
||||
/**
|
||||
* ALTER TYPE <name> ALTER <field> TYPE <newtype>;
|
||||
* ALTER TYPE <name> ADD <field> <newtype>;
|
||||
@@ -1248,7 +1243,6 @@ relationType returns [const cql3::operator_type* op = nullptr]
|
||||
;
|
||||
|
||||
relation[std::vector<cql3::relation_ptr>& clauses]
|
||||
@init{ const cql3::operator_type* rt = nullptr; }
|
||||
: name=cident type=relationType t=term { $clauses.emplace_back(::make_shared<cql3::single_column_relation>(std::move(name), *type, std::move(t))); }
|
||||
|
||||
| K_TOKEN l=tupleOfIdentifiers type=relationType t=term
|
||||
@@ -1258,9 +1252,11 @@ relation[std::vector<cql3::relation_ptr>& clauses]
|
||||
{ $clauses.emplace_back(make_shared<cql3::single_column_relation>(std::move(name), cql3::operator_type::IN, std::move(marker))); }
|
||||
| name=cident K_IN in_values=singleColumnInValues
|
||||
{ $clauses.emplace_back(cql3::single_column_relation::create_in_relation(std::move(name), std::move(in_values))); }
|
||||
| name=cident K_CONTAINS { rt = &cql3::operator_type::CONTAINS; } (K_KEY { rt = &cql3::operator_type::CONTAINS_KEY; })?
|
||||
t=term { $clauses.emplace_back(make_shared<cql3::single_column_relation>(std::move(name), *rt, std::move(t))); }
|
||||
| name=cident '[' key=term ']' type=relationType t=term { $clauses.emplace_back(make_shared<cql3::single_column_relation>(std::move(name), std::move(key), *type, std::move(t))); }
|
||||
#if 0
|
||||
| name=cident K_CONTAINS { Operator rt = Operator.CONTAINS; } (K_KEY { rt = Operator.CONTAINS_KEY; })?
|
||||
t=term { $clauses.add(new SingleColumnRelation(name, rt, t)); }
|
||||
| name=cident '[' key=term ']' type=relationType t=term { $clauses.add(new SingleColumnRelation(name, key, type, t)); }
|
||||
#endif
|
||||
| ids=tupleOfIdentifiers
|
||||
( K_IN
|
||||
( '(' ')'
|
||||
|
||||
@@ -259,10 +259,7 @@ lists::setter_by_index::execute(mutation& m, const exploded_clustering_prefix& p
|
||||
// we should not get here for frozen lists
|
||||
assert(column.type->is_multi_cell()); // "Attempted to set an individual element on a frozen list";
|
||||
|
||||
std::experimental::optional<clustering_key> row_key;
|
||||
if (!column.is_static()) {
|
||||
row_key = clustering_key::from_clustering_prefix(*params._schema, prefix);
|
||||
}
|
||||
auto row_key = clustering_key::from_clustering_prefix(*params._schema, prefix);
|
||||
|
||||
auto index = _idx->bind_and_get(params._options);
|
||||
auto value = _t->bind_and_get(params._options);
|
||||
@@ -272,7 +269,8 @@ lists::setter_by_index::execute(mutation& m, const exploded_clustering_prefix& p
|
||||
}
|
||||
|
||||
auto idx = net::ntoh(int32_t(*unaligned_cast<int32_t>(index->begin())));
|
||||
auto&& existing_list_opt = params.get_prefetched_list(m.key(), std::move(row_key), column);
|
||||
|
||||
auto existing_list_opt = params.get_prefetched_list(m.key(), row_key, column);
|
||||
if (!existing_list_opt) {
|
||||
throw exceptions::invalid_request_exception("Attempted to set an element on a list which is null");
|
||||
}
|
||||
@@ -385,13 +383,8 @@ lists::discarder::requires_read() {
|
||||
void
|
||||
lists::discarder::execute(mutation& m, const exploded_clustering_prefix& prefix, const update_parameters& params) {
|
||||
assert(column.type->is_multi_cell()); // "Attempted to delete from a frozen list";
|
||||
|
||||
std::experimental::optional<clustering_key> row_key;
|
||||
if (!column.is_static()) {
|
||||
row_key = clustering_key::from_clustering_prefix(*params._schema, prefix);
|
||||
}
|
||||
|
||||
auto&& existing_list = params.get_prefetched_list(m.key(), std::move(row_key), column);
|
||||
auto&& row_key = clustering_key::from_clustering_prefix(*params._schema, prefix);
|
||||
auto&& existing_list = params.get_prefetched_list(m.key(), row_key, column);
|
||||
// We want to call bind before possibly returning to reject queries where the value provided is not a list.
|
||||
auto&& value = _t->bind(params._options);
|
||||
|
||||
@@ -451,11 +444,8 @@ lists::discarder_by_index::execute(mutation& m, const exploded_clustering_prefix
|
||||
auto cvalue = dynamic_pointer_cast<constants::value>(index);
|
||||
assert(cvalue);
|
||||
|
||||
std::experimental::optional<clustering_key> row_key;
|
||||
if (!column.is_static()) {
|
||||
row_key = clustering_key::from_clustering_prefix(*params._schema, prefix);
|
||||
}
|
||||
auto&& existing_list = params.get_prefetched_list(m.key(), std::move(row_key), column);
|
||||
auto row_key = clustering_key::from_clustering_prefix(*params._schema, prefix);
|
||||
auto&& existing_list = params.get_prefetched_list(m.key(), row_key, column);
|
||||
int32_t idx = read_simple_exactly<int32_t>(*cvalue->_bytes);
|
||||
if (!existing_list) {
|
||||
throw exceptions::invalid_request_exception("Attempted to delete an element from a list which is null");
|
||||
|
||||
@@ -99,9 +99,9 @@ query_options::query_options(query_options&& o, std::vector<std::vector<bytes_vi
|
||||
_batch_options = std::move(tmp);
|
||||
}
|
||||
|
||||
query_options::query_options(db::consistency_level cl, std::vector<bytes_opt> values)
|
||||
query_options::query_options(std::vector<bytes_opt> values)
|
||||
: query_options(
|
||||
cl,
|
||||
db::consistency_level::ONE,
|
||||
{},
|
||||
std::move(values),
|
||||
{},
|
||||
@@ -120,11 +120,6 @@ query_options::query_options(db::consistency_level cl, std::vector<bytes_opt> va
|
||||
}
|
||||
}
|
||||
|
||||
query_options::query_options(std::vector<bytes_opt> values)
|
||||
: query_options(
|
||||
db::consistency_level::ONE, std::move(values))
|
||||
{}
|
||||
|
||||
db::consistency_level query_options::get_consistency() const
|
||||
{
|
||||
return _consistency;
|
||||
|
||||
@@ -112,7 +112,6 @@ public:
|
||||
|
||||
// forInternalUse
|
||||
explicit query_options(std::vector<bytes_opt> values);
|
||||
explicit query_options(db::consistency_level, std::vector<bytes_opt> values);
|
||||
|
||||
db::consistency_level get_consistency() const;
|
||||
bytes_view_opt get_value_at(size_t idx) const;
|
||||
|
||||
@@ -109,7 +109,6 @@ future<> query_processor::stop()
|
||||
future<::shared_ptr<result_message>>
|
||||
query_processor::process(const sstring_view& query_string, service::query_state& query_state, query_options& options)
|
||||
{
|
||||
log.trace("process: \"{}\"", query_string);
|
||||
auto p = get_statement(query_string, query_state.get_client_state());
|
||||
options.prepare(p->bound_names);
|
||||
auto cql_statement = p->statement;
|
||||
@@ -300,9 +299,8 @@ query_processor::parse_statement(const sstring_view& query)
|
||||
}
|
||||
|
||||
query_options query_processor::make_internal_options(
|
||||
::shared_ptr<statements::parsed_statement::prepared> p,
|
||||
const std::initializer_list<data_value>& values,
|
||||
db::consistency_level cl) {
|
||||
::shared_ptr<statements::parsed_statement::prepared> p,
|
||||
const std::initializer_list<data_value>& values) {
|
||||
if (p->bound_names.size() != values.size()) {
|
||||
throw std::invalid_argument(sprint("Invalid number of values. Expecting %d but got %d", p->bound_names.size(), values.size()));
|
||||
}
|
||||
@@ -318,12 +316,13 @@ query_options query_processor::make_internal_options(
|
||||
bound_values.push_back({n->type->decompose(v)});
|
||||
}
|
||||
}
|
||||
return query_options(cl, bound_values);
|
||||
return query_options(bound_values);
|
||||
}
|
||||
|
||||
::shared_ptr<statements::parsed_statement::prepared> query_processor::prepare_internal(
|
||||
const sstring& query_string) {
|
||||
auto& p = _internal_statements[query_string];
|
||||
const std::experimental::string_view& query_string) {
|
||||
|
||||
auto& p = _internal_statements[sstring(query_string.begin(), query_string.end())];
|
||||
if (p == nullptr) {
|
||||
auto np = parse_statement(query_string)->prepare(_db.local());
|
||||
np->statement->validate(_proxy, *_internal_state);
|
||||
@@ -333,54 +332,22 @@ query_options query_processor::make_internal_options(
|
||||
}
|
||||
|
||||
future<::shared_ptr<untyped_result_set>> query_processor::execute_internal(
|
||||
const sstring& query_string,
|
||||
const std::experimental::string_view& query_string,
|
||||
const std::initializer_list<data_value>& values) {
|
||||
if (log.is_enabled(logging::log_level::trace)) {
|
||||
log.trace("execute_internal: \"{}\" ({})", query_string, ::join(", ", values));
|
||||
}
|
||||
auto p = prepare_internal(query_string);
|
||||
return execute_internal(p, values);
|
||||
}
|
||||
|
||||
future<::shared_ptr<untyped_result_set>> query_processor::execute_internal(
|
||||
::shared_ptr<statements::parsed_statement::prepared> p,
|
||||
const std::initializer_list<data_value>& values) {
|
||||
auto opts = make_internal_options(p, values);
|
||||
return do_with(std::move(opts),
|
||||
[this, p = std::move(p)](query_options & opts) {
|
||||
return p->statement->execute_internal(_proxy, *_internal_state, opts).then(
|
||||
[p](::shared_ptr<transport::messages::result_message> msg) {
|
||||
[](::shared_ptr<transport::messages::result_message> msg) {
|
||||
return make_ready_future<::shared_ptr<untyped_result_set>>(::make_shared<untyped_result_set>(msg));
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
future<::shared_ptr<untyped_result_set>> query_processor::process(
|
||||
const sstring& query_string,
|
||||
db::consistency_level cl, const std::initializer_list<data_value>& values, bool cache)
|
||||
{
|
||||
auto p = cache ? prepare_internal(query_string) : parse_statement(query_string)->prepare(_db.local());
|
||||
if (!cache) {
|
||||
p->statement->validate(_proxy, *_internal_state);
|
||||
}
|
||||
return process(p, cl, values);
|
||||
}
|
||||
|
||||
future<::shared_ptr<untyped_result_set>> query_processor::process(
|
||||
::shared_ptr<statements::parsed_statement::prepared> p,
|
||||
db::consistency_level cl, const std::initializer_list<data_value>& values)
|
||||
{
|
||||
auto opts = make_internal_options(p, values, cl);
|
||||
return do_with(std::move(opts),
|
||||
[this, p = std::move(p)](query_options & opts) {
|
||||
return p->statement->execute(_proxy, *_internal_state, opts).then(
|
||||
[p](::shared_ptr<transport::messages::result_message> msg) {
|
||||
return make_ready_future<::shared_ptr<untyped_result_set>>(::make_shared<untyped_result_set>(msg));
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
future<::shared_ptr<transport::messages::result_message>>
|
||||
query_processor::process_batch(::shared_ptr<statements::batch_statement> batch, service::query_state& query_state, query_options& options) {
|
||||
auto& client_state = query_state.get_client_state();
|
||||
@@ -421,12 +388,8 @@ void query_processor::migration_subscriber::on_update_keyspace(const sstring& ks
|
||||
{
|
||||
}
|
||||
|
||||
void query_processor::migration_subscriber::on_update_column_family(const sstring& ks_name, const sstring& cf_name, bool columns_changed)
|
||||
void query_processor::migration_subscriber::on_update_column_family(const sstring& ks_name, const sstring& cf_name)
|
||||
{
|
||||
if (columns_changed) {
|
||||
log.info("Column definitions for {}.{} changed, invalidating related prepared statements", ks_name, cf_name);
|
||||
remove_invalid_prepared_statements(ks_name, cf_name);
|
||||
}
|
||||
}
|
||||
|
||||
void query_processor::migration_subscriber::on_update_user_type(const sstring& ks_name, const sstring& type_name)
|
||||
@@ -476,7 +439,9 @@ void query_processor::migration_subscriber::remove_invalid_prepared_statements(s
|
||||
}
|
||||
}
|
||||
for (auto& id : invalid) {
|
||||
_qp->invalidate_prepared_statement(id);
|
||||
get_query_processor().invoke_on_all([id] (auto& qp) {
|
||||
qp.invalidate_prepared_statement(id);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -322,25 +322,14 @@ public:
|
||||
}
|
||||
#endif
|
||||
private:
|
||||
query_options make_internal_options(::shared_ptr<statements::parsed_statement::prepared>, const std::initializer_list<data_value>&, db::consistency_level = db::consistency_level::ONE);
|
||||
::shared_ptr<statements::parsed_statement::prepared> prepare_internal(const std::experimental::string_view& query);
|
||||
query_options make_internal_options(::shared_ptr<statements::parsed_statement::prepared>, const std::initializer_list<data_value>&);
|
||||
|
||||
public:
|
||||
future<::shared_ptr<untyped_result_set>> execute_internal(
|
||||
const sstring& query_string,
|
||||
const std::experimental::string_view& query_string,
|
||||
const std::initializer_list<data_value>& = { });
|
||||
|
||||
::shared_ptr<statements::parsed_statement::prepared> prepare_internal(const sstring& query);
|
||||
|
||||
future<::shared_ptr<untyped_result_set>> execute_internal(
|
||||
::shared_ptr<statements::parsed_statement::prepared>,
|
||||
const std::initializer_list<data_value>& = { });
|
||||
|
||||
future<::shared_ptr<untyped_result_set>> process(
|
||||
const sstring& query_string,
|
||||
db::consistency_level, const std::initializer_list<data_value>& = { }, bool cache = false);
|
||||
future<::shared_ptr<untyped_result_set>> process(
|
||||
::shared_ptr<statements::parsed_statement::prepared>,
|
||||
db::consistency_level, const std::initializer_list<data_value>& = { });
|
||||
|
||||
/*
|
||||
* This function provides a timestamp that is guaranteed to be higher than any timestamp
|
||||
* previously used in internal queries.
|
||||
@@ -497,7 +486,7 @@ public:
|
||||
virtual void on_create_aggregate(const sstring& ks_name, const sstring& aggregate_name) override;
|
||||
|
||||
virtual void on_update_keyspace(const sstring& ks_name) override;
|
||||
virtual void on_update_column_family(const sstring& ks_name, const sstring& cf_name, bool columns_changed) override;
|
||||
virtual void on_update_column_family(const sstring& ks_name, const sstring& cf_name) override;
|
||||
virtual void on_update_user_type(const sstring& ks_name, const sstring& type_name) override;
|
||||
virtual void on_update_function(const sstring& ks_name, const sstring& function_name) override;
|
||||
virtual void on_update_aggregate(const sstring& ks_name, const sstring& aggregate_name) override;
|
||||
|
||||
@@ -134,7 +134,7 @@ public:
|
||||
* @return <code>true</code> if this selection contains a collection, <code>false</code> otherwise.
|
||||
*/
|
||||
bool contains_a_collection() const {
|
||||
if (!_schema->has_multi_cell_collections()) {
|
||||
if (!_schema->has_collections()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
10
cql3/sets.cc
10
cql3/sets.cc
@@ -258,14 +258,16 @@ sets::adder::do_add(mutation& m, const exploded_clustering_prefix& row_key, cons
|
||||
auto smut = set_type->serialize_mutation_form(mut);
|
||||
|
||||
m.set_cell(row_key, column, std::move(smut));
|
||||
} else if (set_value != nullptr) {
|
||||
} else {
|
||||
// for frozen sets, we're overwriting the whole cell
|
||||
auto v = set_type->serialize_partially_deserialized_form(
|
||||
{set_value->_elements.begin(), set_value->_elements.end()},
|
||||
serialization_format::internal());
|
||||
m.set_cell(row_key, column, params.make_cell(std::move(v)));
|
||||
} else {
|
||||
m.set_cell(row_key, column, params.make_dead_cell());
|
||||
if (set_value->_elements.empty()) {
|
||||
m.set_cell(row_key, column, params.make_dead_cell());
|
||||
} else {
|
||||
m.set_cell(row_key, column, params.make_cell(std::move(v)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,284 +0,0 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright 2015 ScyllaDB
|
||||
*
|
||||
* Modified by ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file is part of Scylla.
|
||||
*
|
||||
* Scylla is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Scylla is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "cql3/statements/alter_table_statement.hh"
|
||||
#include "service/migration_manager.hh"
|
||||
#include "validation.hh"
|
||||
#include "db/config.hh"
|
||||
|
||||
namespace cql3 {
|
||||
|
||||
namespace statements {
|
||||
|
||||
alter_table_statement::alter_table_statement(shared_ptr<cf_name> name,
|
||||
type t,
|
||||
shared_ptr<column_identifier::raw> column_name,
|
||||
shared_ptr<cql3_type::raw> validator,
|
||||
shared_ptr<cf_prop_defs> properties,
|
||||
renames_type renames,
|
||||
bool is_static)
|
||||
: schema_altering_statement(std::move(name))
|
||||
, _type(t)
|
||||
, _raw_column_name(std::move(column_name))
|
||||
, _validator(std::move(validator))
|
||||
, _properties(std::move(properties))
|
||||
, _renames(std::move(renames))
|
||||
, _is_static(is_static)
|
||||
{
|
||||
}
|
||||
|
||||
void alter_table_statement::check_access(const service::client_state& state)
|
||||
{
|
||||
warn(unimplemented::cause::PERMISSIONS);
|
||||
#if 0
|
||||
state.hasColumnFamilyAccess(keyspace(), columnFamily(), Permission.ALTER);
|
||||
#endif
|
||||
}
|
||||
|
||||
void alter_table_statement::validate(distributed<service::storage_proxy>& proxy, const service::client_state& state)
|
||||
{
|
||||
// validated in announce_migration()
|
||||
}
|
||||
|
||||
static const sstring ALTER_TABLE_FEATURE = "ALTER TABLE";
|
||||
|
||||
future<bool> alter_table_statement::announce_migration(distributed<service::storage_proxy>& proxy, bool is_local_only)
|
||||
{
|
||||
auto& db = proxy.local().get_db().local();
|
||||
db.get_config().check_experimental(ALTER_TABLE_FEATURE);
|
||||
|
||||
auto schema = validation::validate_column_family(db, keyspace(), column_family());
|
||||
auto cfm = schema_builder(schema);
|
||||
|
||||
shared_ptr<cql3_type> validator;
|
||||
if (_validator) {
|
||||
validator = _validator->prepare(db, keyspace());
|
||||
}
|
||||
shared_ptr<column_identifier> column_name;
|
||||
const column_definition* def = nullptr;
|
||||
if (_raw_column_name) {
|
||||
column_name = _raw_column_name->prepare_column_identifier(schema);
|
||||
def = get_column_definition(schema, *column_name);
|
||||
}
|
||||
|
||||
switch (_type) {
|
||||
case alter_table_statement::type::add:
|
||||
{
|
||||
assert(column_name);
|
||||
if (schema->is_dense()) {
|
||||
throw exceptions::invalid_request_exception("Cannot add new column to a COMPACT STORAGE table");
|
||||
}
|
||||
|
||||
if (_is_static) {
|
||||
if (!schema->is_compound()) {
|
||||
throw exceptions::invalid_request_exception("Static columns are not allowed in COMPACT STORAGE tables");
|
||||
}
|
||||
if (!schema->clustering_key_size()) {
|
||||
throw exceptions::invalid_request_exception("Static columns are only useful (and thus allowed) if the table has at least one clustering column");
|
||||
}
|
||||
}
|
||||
|
||||
if (def) {
|
||||
if (def->is_partition_key()) {
|
||||
throw exceptions::invalid_request_exception(sprint("Invalid column name %s because it conflicts with a PRIMARY KEY part", column_name));
|
||||
} else {
|
||||
throw exceptions::invalid_request_exception(sprint("Invalid column name %s because it conflicts with an existing column", column_name));
|
||||
}
|
||||
}
|
||||
|
||||
// Cannot re-add a dropped counter column. See #7831.
|
||||
if (schema->is_counter() && schema->dropped_columns().count(column_name->text())) {
|
||||
throw exceptions::invalid_request_exception(sprint("Cannot re-add previously dropped counter column %s", column_name));
|
||||
}
|
||||
|
||||
auto type = validator->get_type();
|
||||
if (type->is_collection() && type->is_multi_cell()) {
|
||||
if (!schema->is_compound()) {
|
||||
throw exceptions::invalid_request_exception("Cannot use non-frozen collections with a non-composite PRIMARY KEY");
|
||||
}
|
||||
if (schema->is_super()) {
|
||||
throw exceptions::invalid_request_exception("Cannot use non-frozen collections with super column families");
|
||||
}
|
||||
|
||||
auto it = schema->collections().find(column_name->name());
|
||||
if (it != schema->collections().end() && !type->is_compatible_with(*it->second)) {
|
||||
throw exceptions::invalid_request_exception(sprint("Cannot add a collection with the name %s "
|
||||
"because a collection with the same name and a different type has already been used in the past", column_name));
|
||||
}
|
||||
}
|
||||
|
||||
cfm.with_column(column_name->name(), type, _is_static ? column_kind::static_column : column_kind::regular_column);
|
||||
break;
|
||||
}
|
||||
case alter_table_statement::type::alter:
|
||||
{
|
||||
assert(column_name);
|
||||
if (!def) {
|
||||
throw exceptions::invalid_request_exception(sprint("Column %s was not found in table %s", column_name, column_family()));
|
||||
}
|
||||
|
||||
auto type = validator->get_type();
|
||||
switch (def->kind) {
|
||||
case column_kind::partition_key:
|
||||
if (type->is_counter()) {
|
||||
throw exceptions::invalid_request_exception(sprint("counter type is not supported for PRIMARY KEY part %s", column_name));
|
||||
}
|
||||
|
||||
if (!type->is_value_compatible_with(*def->type)) {
|
||||
throw exceptions::configuration_exception(sprint("Cannot change %s from type %s to type %s: types are incompatible.",
|
||||
column_name,
|
||||
def->type->as_cql3_type(),
|
||||
validator));
|
||||
}
|
||||
break;
|
||||
|
||||
case column_kind::clustering_key:
|
||||
if (!schema->is_cql3_table()) {
|
||||
throw exceptions::invalid_request_exception(sprint("Cannot alter clustering column %s in a non-CQL3 table", column_name));
|
||||
}
|
||||
|
||||
// Note that CFMetaData.validateCompatibility already validate the change we're about to do. However, the error message it
|
||||
// sends is a bit cryptic for a CQL3 user, so validating here for a sake of returning a better error message
|
||||
// Do note that we need isCompatibleWith here, not just isValueCompatibleWith.
|
||||
if (!type->is_compatible_with(*def->type)) {
|
||||
throw exceptions::configuration_exception(sprint("Cannot change %s from type %s to type %s: types are not order-compatible.",
|
||||
column_name,
|
||||
def->type->as_cql3_type(),
|
||||
validator));
|
||||
}
|
||||
break;
|
||||
|
||||
case column_kind::compact_column:
|
||||
case column_kind::regular_column:
|
||||
case column_kind::static_column:
|
||||
// Thrift allows to change a column validator so CFMetaData.validateCompatibility will let it slide
|
||||
// if we change to an incompatible type (contrarily to the comparator case). But we don't want to
|
||||
// allow it for CQL3 (see #5882) so validating it explicitly here. We only care about value compatibility
|
||||
// though since we won't compare values (except when there is an index, but that is validated by
|
||||
// ColumnDefinition already).
|
||||
if (!type->is_value_compatible_with(*def->type)) {
|
||||
throw exceptions::configuration_exception(sprint("Cannot change %s from type %s to type %s: types are incompatible.",
|
||||
column_name,
|
||||
def->type->as_cql3_type(),
|
||||
validator));
|
||||
}
|
||||
break;
|
||||
}
|
||||
// In any case, we update the column definition
|
||||
cfm.with_altered_column_type(column_name->name(), type);
|
||||
break;
|
||||
}
|
||||
case alter_table_statement::type::drop:
|
||||
assert(column_name);
|
||||
if (!schema->is_cql3_table()) {
|
||||
throw exceptions::invalid_request_exception("Cannot drop columns from a non-CQL3 table");
|
||||
}
|
||||
if (!def) {
|
||||
throw exceptions::invalid_request_exception(sprint("Column %s was not found in table %s", column_name, column_family()));
|
||||
}
|
||||
|
||||
if (def->is_primary_key()) {
|
||||
throw exceptions::invalid_request_exception(sprint("Cannot drop PRIMARY KEY part %s", column_name));
|
||||
} else {
|
||||
for (auto&& column_def : boost::range::join(schema->static_columns(), schema->regular_columns())) { // find
|
||||
if (column_def.name() == column_name->name()) {
|
||||
cfm.without_column(column_name->name());
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case alter_table_statement::type::opts:
|
||||
if (!_properties) {
|
||||
throw exceptions::invalid_request_exception("ALTER COLUMNFAMILY WITH invoked, but no parameters found");
|
||||
}
|
||||
|
||||
_properties->validate();
|
||||
|
||||
if (schema->is_counter() && _properties->get_default_time_to_live() > 0) {
|
||||
throw exceptions::invalid_request_exception("Cannot set default_time_to_live on a table with counters");
|
||||
}
|
||||
|
||||
_properties->apply_to_builder(cfm);
|
||||
break;
|
||||
|
||||
case alter_table_statement::type::rename:
|
||||
for (auto&& entry : _renames) {
|
||||
auto from = entry.first->prepare_column_identifier(schema);
|
||||
auto to = entry.second->prepare_column_identifier(schema);
|
||||
|
||||
auto def = schema->get_column_definition(from->name());
|
||||
if (!def) {
|
||||
throw exceptions::invalid_request_exception(sprint("Cannot rename unknown column %s in table %s", from, column_family()));
|
||||
}
|
||||
|
||||
if (schema->get_column_definition(to->name())) {
|
||||
throw exceptions::invalid_request_exception(sprint("Cannot rename column %s to %s in table %s; another column of that name already exist", from, to, column_family()));
|
||||
}
|
||||
|
||||
if (def->is_part_of_cell_name()) {
|
||||
throw exceptions::invalid_request_exception(sprint("Cannot rename non PRIMARY KEY part %s", from));
|
||||
}
|
||||
|
||||
if (def->is_indexed()) {
|
||||
throw exceptions::invalid_request_exception(sprint("Cannot rename column %s because it is secondary indexed", from));
|
||||
}
|
||||
|
||||
cfm.with_column_rename(from->name(), to->name());
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
return service::get_local_migration_manager().announce_column_family_update(cfm.build(), false, is_local_only).then([] {
|
||||
return true;
|
||||
});
|
||||
}
|
||||
|
||||
shared_ptr<transport::event::schema_change> alter_table_statement::change_event()
|
||||
{
|
||||
return make_shared<transport::event::schema_change>(transport::event::schema_change::change_type::UPDATED,
|
||||
transport::event::schema_change::target_type::TABLE, keyspace(), column_family());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,87 +0,0 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright 2015 ScyllaDB
|
||||
*
|
||||
* Modified by ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file is part of Scylla.
|
||||
*
|
||||
* Scylla is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Scylla is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "cql3/statements/schema_altering_statement.hh"
|
||||
#include "cql3/statements/cf_prop_defs.hh"
|
||||
#include "cql3/cql3_type.hh"
|
||||
|
||||
namespace cql3 {
|
||||
|
||||
namespace statements {
|
||||
|
||||
class alter_table_statement : public schema_altering_statement {
|
||||
public:
|
||||
enum class type {
|
||||
add,
|
||||
alter,
|
||||
drop,
|
||||
opts,
|
||||
rename,
|
||||
};
|
||||
using renames_type = std::vector<std::pair<shared_ptr<column_identifier::raw>,
|
||||
shared_ptr<column_identifier::raw>>>;
|
||||
private:
|
||||
const type _type;
|
||||
const shared_ptr<column_identifier::raw> _raw_column_name;
|
||||
const shared_ptr<cql3_type::raw> _validator;
|
||||
const shared_ptr<cf_prop_defs> _properties;
|
||||
const renames_type _renames;
|
||||
const bool _is_static;
|
||||
public:
|
||||
alter_table_statement(shared_ptr<cf_name> name,
|
||||
type t,
|
||||
shared_ptr<column_identifier::raw> column_name,
|
||||
shared_ptr<cql3_type::raw> validator,
|
||||
shared_ptr<cf_prop_defs> properties,
|
||||
renames_type renames,
|
||||
bool is_static);
|
||||
|
||||
virtual void check_access(const service::client_state& state) override;
|
||||
virtual void validate(distributed<service::storage_proxy>& proxy, const service::client_state& state) override;
|
||||
virtual future<bool> announce_migration(distributed<service::storage_proxy>& proxy, bool is_local_only) override;
|
||||
virtual shared_ptr<transport::event::schema_change> change_event() override;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
@@ -38,7 +38,6 @@
|
||||
*/
|
||||
|
||||
#include "batch_statement.hh"
|
||||
#include "db/config.hh"
|
||||
|
||||
namespace cql3 {
|
||||
|
||||
@@ -56,50 +55,6 @@ bool batch_statement::depends_on_column_family(const sstring& cf_name) const
|
||||
return false;
|
||||
}
|
||||
|
||||
void batch_statement::verify_batch_size(const std::vector<mutation>& mutations) {
|
||||
size_t warn_threshold = service::get_local_storage_proxy().get_db().local().get_config().batch_size_warn_threshold_in_kb();
|
||||
|
||||
class my_partition_visitor : public mutation_partition_visitor {
|
||||
public:
|
||||
void accept_partition_tombstone(tombstone) override {}
|
||||
void accept_static_cell(column_id, atomic_cell_view v) override {
|
||||
size += v.value().size();
|
||||
}
|
||||
void accept_static_cell(column_id, collection_mutation_view v) override {
|
||||
size += v.data.size();
|
||||
}
|
||||
void accept_row_tombstone(clustering_key_prefix_view, tombstone) override {}
|
||||
void accept_row(clustering_key_view, tombstone, const row_marker&) override {}
|
||||
void accept_row_cell(column_id, atomic_cell_view v) override {
|
||||
size += v.value().size();
|
||||
}
|
||||
void accept_row_cell(column_id id, collection_mutation_view v) override {
|
||||
size += v.data.size();
|
||||
}
|
||||
|
||||
size_t size = 0;
|
||||
};
|
||||
|
||||
my_partition_visitor v;
|
||||
|
||||
for (auto&m : mutations) {
|
||||
m.partition().accept(*m.schema(), v);
|
||||
}
|
||||
|
||||
auto size = v.size / 1024;
|
||||
|
||||
if (size > warn_threshold) {
|
||||
std::unordered_set<sstring> ks_cf_pairs;
|
||||
for (auto&& m : mutations) {
|
||||
ks_cf_pairs.insert(m.schema()->ks_name() + "." + m.schema()->cf_name());
|
||||
}
|
||||
_logger.warn(
|
||||
"Batch of prepared statements for {} is of size {}, exceeding specified threshold of {} by {}.{}",
|
||||
join(", ", ks_cf_pairs), size, warn_threshold,
|
||||
size - warn_threshold, "");
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -196,8 +196,27 @@ public:
|
||||
* Checks batch size to ensure threshold is met. If not, a warning is logged.
|
||||
* @param cfs ColumnFamilies that will store the batch's mutations.
|
||||
*/
|
||||
static void verify_batch_size(const std::vector<mutation>& mutations);
|
||||
static void verify_batch_size(const std::vector<mutation>& mutations) {
|
||||
size_t warn_threshold = 1000; // FIXME: database_descriptor::get_batch_size_warn_threshold();
|
||||
size_t fail_threshold = 2000; // FIXME: database_descriptor::get_batch_size_fail_threshold();
|
||||
|
||||
size_t size = mutations.size();
|
||||
|
||||
if (size > warn_threshold) {
|
||||
std::unordered_set<sstring> ks_cf_pairs;
|
||||
for (auto&& m : mutations) {
|
||||
ks_cf_pairs.insert(m.schema()->ks_name() + "." + m.schema()->cf_name());
|
||||
}
|
||||
const char* format = "Batch of prepared statements for {} is of size {}, exceeding specified threshold of {} by {}.{}";
|
||||
if (size > fail_threshold) {
|
||||
// FIXME: Tracing.trace(format, new Object[] {ksCfPairs, size, failThreshold, size - failThreshold, " (see batch_size_fail_threshold_in_kb)"});
|
||||
_logger.error(format, join(", ", ks_cf_pairs), size, fail_threshold, size - fail_threshold, " (see batch_size_fail_threshold_in_kb)");
|
||||
throw exceptions::invalid_request_exception("Batch too large");
|
||||
} else {
|
||||
_logger.warn(format, join(", ", ks_cf_pairs), size, warn_threshold, size - warn_threshold, "");
|
||||
}
|
||||
}
|
||||
}
|
||||
virtual future<shared_ptr<transport::messages::result_message>> execute(
|
||||
distributed<service::storage_proxy>& storage, service::query_state& state, const query_options& options) override {
|
||||
return execute(storage, state, options, false, options.get_timestamp(state));
|
||||
|
||||
@@ -139,11 +139,6 @@ std::map<sstring, sstring> cf_prop_defs::get_compression_options() const {
|
||||
return std::map<sstring, sstring>{};
|
||||
}
|
||||
|
||||
int32_t cf_prop_defs::get_default_time_to_live() const
|
||||
{
|
||||
return get_int(KW_DEFAULT_TIME_TO_LIVE, 0);
|
||||
}
|
||||
|
||||
void cf_prop_defs::apply_to_builder(schema_builder& builder) {
|
||||
if (has_property(KW_COMMENT)) {
|
||||
builder.set_comment(get_string(KW_COMMENT, ""));
|
||||
|
||||
@@ -100,8 +100,6 @@ public:
|
||||
return options;
|
||||
}
|
||||
#endif
|
||||
int32_t get_default_time_to_live() const;
|
||||
|
||||
void apply_to_builder(schema_builder& builder);
|
||||
void validate_minimum_int(const sstring& field, int32_t minimum_value, int32_t default_value) const;
|
||||
};
|
||||
|
||||
@@ -81,7 +81,7 @@ cql3::statements::create_index_statement::validate(distributed<service::storage_
|
||||
auto cd = schema->get_column_definition(target->column->name());
|
||||
|
||||
if (cd == nullptr) {
|
||||
throw exceptions::invalid_request_exception(sprint("No column definition found for column %s", *target->column));
|
||||
throw exceptions::invalid_request_exception(sprint("No column definition found for column %s", target->column->name()));
|
||||
}
|
||||
|
||||
bool is_map = dynamic_cast<const collection_type_impl *>(cd->type.get()) != nullptr
|
||||
@@ -93,7 +93,7 @@ cql3::statements::create_index_statement::validate(distributed<service::storage_
|
||||
throw exceptions::invalid_request_exception(
|
||||
sprint("Cannot create index on %s of frozen<map> column %s",
|
||||
index_target::index_option(target->type),
|
||||
*target->column));
|
||||
target->column->name()));
|
||||
}
|
||||
} else {
|
||||
// validateNotFullIndex
|
||||
@@ -107,7 +107,7 @@ cql3::statements::create_index_statement::validate(distributed<service::storage_
|
||||
sprint(
|
||||
"Cannot create index on %s of column %s; only non-frozen collections support %s indexes",
|
||||
index_target::index_option(target->type),
|
||||
*target->column,
|
||||
target->column->name(),
|
||||
index_target::index_option(target->type)));
|
||||
}
|
||||
// validateTargetColumnIsMapIfIndexInvolvesKeys
|
||||
@@ -118,7 +118,7 @@ cql3::statements::create_index_statement::validate(distributed<service::storage_
|
||||
sprint(
|
||||
"Cannot create index on %s of column %s with non-map type",
|
||||
index_target::index_option(target->type),
|
||||
*target->column));
|
||||
target->column->name()));
|
||||
|
||||
}
|
||||
}
|
||||
@@ -132,9 +132,9 @@ cql3::statements::create_index_statement::validate(distributed<service::storage_
|
||||
"Cannot create index on %s(%s): an index on %s(%s) already exists and indexing "
|
||||
"a map on more than one dimension at the same time is not currently supported",
|
||||
index_target::index_option(target->type),
|
||||
*target->column,
|
||||
target->column->name(),
|
||||
index_target::index_option(prev_type),
|
||||
*target->column));
|
||||
target->column->name()));
|
||||
}
|
||||
if (_if_not_exists) {
|
||||
return;
|
||||
@@ -164,13 +164,12 @@ cql3::statements::create_index_statement::validate(distributed<service::storage_
|
||||
throw exceptions::invalid_request_exception(
|
||||
sprint(
|
||||
"Cannot create secondary index on partition key column %s",
|
||||
*target->column));
|
||||
target->column->name()));
|
||||
}
|
||||
}
|
||||
|
||||
future<bool>
|
||||
cql3::statements::create_index_statement::announce_migration(distributed<service::storage_proxy>& proxy, bool is_local_only) {
|
||||
throw std::runtime_error("Indexes are not supported yet");
|
||||
auto schema = proxy.local().get_db().local().find_schema(keyspace(), column_family());
|
||||
auto target = _raw_target->prepare(schema);
|
||||
|
||||
|
||||
@@ -186,23 +186,11 @@ modification_statement::make_update_parameters(
|
||||
class prefetch_data_builder {
|
||||
update_parameters::prefetch_data& _data;
|
||||
const query::partition_slice& _ps;
|
||||
schema_ptr _schema;
|
||||
std::experimental::optional<partition_key> _pkey;
|
||||
private:
|
||||
void add_cell(update_parameters::prefetch_data::row& cells, const column_definition& def, const std::experimental::optional<collection_mutation_view>& cell) {
|
||||
if (cell) {
|
||||
auto ctype = static_pointer_cast<const collection_type_impl>(def.type);
|
||||
if (!ctype->is_multi_cell()) {
|
||||
throw std::logic_error(sprint("cannot prefetch frozen collection: %s", def.name_as_text()));
|
||||
}
|
||||
cells.emplace(def.id, collection_mutation{*cell});
|
||||
}
|
||||
};
|
||||
public:
|
||||
prefetch_data_builder(schema_ptr s, update_parameters::prefetch_data& data, const query::partition_slice& ps)
|
||||
prefetch_data_builder(update_parameters::prefetch_data& data, const query::partition_slice& ps)
|
||||
: _data(data)
|
||||
, _ps(ps)
|
||||
, _schema(std::move(s))
|
||||
{ }
|
||||
|
||||
void accept_new_partition(const partition_key& key, uint32_t row_count) {
|
||||
@@ -217,9 +205,20 @@ public:
|
||||
const query::result_row_view& row) {
|
||||
update_parameters::prefetch_data::row cells;
|
||||
|
||||
auto add_cell = [&cells] (column_id id, std::experimental::optional<collection_mutation_view>&& cell) {
|
||||
if (cell) {
|
||||
cells.emplace(id, collection_mutation{to_bytes(cell->data)});
|
||||
}
|
||||
};
|
||||
|
||||
auto static_row_iterator = static_row.iterator();
|
||||
for (auto&& id : _ps.static_columns) {
|
||||
add_cell(id, static_row_iterator.next_collection_cell());
|
||||
}
|
||||
|
||||
auto row_iterator = row.iterator();
|
||||
for (auto&& id : _ps.regular_columns) {
|
||||
add_cell(cells, _schema->regular_column_at(id), row_iterator.next_collection_cell());
|
||||
add_cell(id, row_iterator.next_collection_cell());
|
||||
}
|
||||
|
||||
_data.rows.emplace(std::make_pair(*_pkey, key), std::move(cells));
|
||||
@@ -229,16 +228,7 @@ public:
|
||||
assert(0);
|
||||
}
|
||||
|
||||
void accept_partition_end(const query::result_row_view& static_row) {
|
||||
update_parameters::prefetch_data::row cells;
|
||||
|
||||
auto static_row_iterator = static_row.iterator();
|
||||
for (auto&& id : _ps.static_columns) {
|
||||
add_cell(cells, _schema->static_column_at(id), static_row_iterator.next_collection_cell());
|
||||
}
|
||||
|
||||
_data.rows.emplace(std::make_pair(*_pkey, std::experimental::nullopt), std::move(cells));
|
||||
}
|
||||
void accept_partition_end(const query::result_row_view& static_row) {}
|
||||
};
|
||||
|
||||
future<update_parameters::prefetched_rows_type>
|
||||
@@ -280,7 +270,7 @@ modification_statement::read_required_rows(
|
||||
for (auto&& pk : *keys) {
|
||||
pr.emplace_back(dht::global_partitioner().decorate_key(*s, pk));
|
||||
}
|
||||
query::read_command cmd(s->id(), s->version(), ps, std::numeric_limits<uint32_t>::max());
|
||||
query::read_command cmd(s->id(), ps, std::numeric_limits<uint32_t>::max());
|
||||
// FIXME: ignoring "local"
|
||||
return proxy.local().query(s, make_lw_shared(std::move(cmd)), std::move(pr), cl).then([this, ps] (auto result) {
|
||||
// FIXME: copying
|
||||
@@ -288,7 +278,7 @@ modification_statement::read_required_rows(
|
||||
bytes_ostream buf(result->buf());
|
||||
query::result_view v(buf.linearize());
|
||||
auto prefetched_rows = update_parameters::prefetched_rows_type({update_parameters::prefetch_data(s)});
|
||||
v.consume(ps, prefetch_data_builder(s, prefetched_rows.value(), ps));
|
||||
v.consume(ps, prefetch_data_builder(prefetched_rows.value(), ps));
|
||||
return prefetched_rows;
|
||||
});
|
||||
}
|
||||
|
||||
@@ -218,24 +218,22 @@ select_statement::execute(distributed<service::storage_proxy>& proxy, service::q
|
||||
int32_t limit = get_limit(options);
|
||||
auto now = db_clock::now();
|
||||
|
||||
auto command = ::make_lw_shared<query::read_command>(_schema->id(), _schema->version(),
|
||||
make_partition_slice(options), limit, to_gc_clock(now));
|
||||
auto command = ::make_lw_shared<query::read_command>(_schema->id(), make_partition_slice(options), limit, to_gc_clock(now));
|
||||
|
||||
int32_t page_size = options.get_page_size();
|
||||
|
||||
// An aggregation query will never be paged for the user, but we always page it internally to avoid OOM.
|
||||
// If we user provided a page_size we'll use that to page internally (because why not), otherwise we use our default
|
||||
// Note that if there are some nodes in the cluster with a version less than 2.0, we can't use paging (CASSANDRA-6707).
|
||||
auto aggregate = _selection->is_aggregate();
|
||||
if (aggregate && page_size <= 0) {
|
||||
if (_selection->is_aggregate() && page_size <= 0) {
|
||||
page_size = DEFAULT_COUNT_PAGE_SIZE;
|
||||
}
|
||||
|
||||
auto key_ranges = _restrictions->get_partition_key_ranges(options);
|
||||
|
||||
if (!aggregate && (page_size <= 0
|
||||
if (page_size <= 0
|
||||
|| !service::pager::query_pagers::may_need_paging(page_size,
|
||||
*command, key_ranges))) {
|
||||
*command, key_ranges)) {
|
||||
return execute(proxy, command, std::move(key_ranges), state, options,
|
||||
now);
|
||||
}
|
||||
@@ -243,7 +241,7 @@ select_statement::execute(distributed<service::storage_proxy>& proxy, service::q
|
||||
auto p = service::pager::query_pagers::pager(_schema, _selection,
|
||||
state, options, command, std::move(key_ranges));
|
||||
|
||||
if (aggregate) {
|
||||
if (_selection->is_aggregate()) {
|
||||
return do_with(
|
||||
cql3::selection::result_set_builder(*_selection, now,
|
||||
options.get_serialization_format()),
|
||||
@@ -310,8 +308,7 @@ future<::shared_ptr<transport::messages::result_message>>
|
||||
select_statement::execute_internal(distributed<service::storage_proxy>& proxy, service::query_state& state, const query_options& options) {
|
||||
int32_t limit = get_limit(options);
|
||||
auto now = db_clock::now();
|
||||
auto command = ::make_lw_shared<query::read_command>(_schema->id(), _schema->version(),
|
||||
make_partition_slice(options), limit);
|
||||
auto command = ::make_lw_shared<query::read_command>(_schema->id(), make_partition_slice(options), limit);
|
||||
auto partition_ranges = _restrictions->get_partition_key_ranges(options);
|
||||
|
||||
if (needs_post_query_ordering() && _limit) {
|
||||
@@ -529,12 +526,9 @@ select_statement::raw_statement::get_ordering_comparator(schema_ptr schema,
|
||||
}
|
||||
|
||||
bool select_statement::raw_statement::is_reversed(schema_ptr schema) {
|
||||
std::experimental::optional<bool> reversed_map[schema->clustering_key_size()];
|
||||
|
||||
assert(_parameters->orderings().size() > 0);
|
||||
parameters::orderings_type::size_type i = 0;
|
||||
bool is_reversed_ = false;
|
||||
bool relation_order_unsupported = false;
|
||||
|
||||
uint32_t i = 0;
|
||||
for (auto&& e : _parameters->orderings()) {
|
||||
::shared_ptr<column_identifier> column = e.first->prepare_column_identifier(schema);
|
||||
bool reversed = e.second;
|
||||
@@ -554,23 +548,32 @@ bool select_statement::raw_statement::is_reversed(schema_ptr schema) {
|
||||
"Order by currently only support the ordering of columns following their declared order in the PRIMARY KEY");
|
||||
}
|
||||
|
||||
bool current_reverse_status = (reversed != def->type->is_reversed());
|
||||
|
||||
if (i == 0) {
|
||||
is_reversed_ = current_reverse_status;
|
||||
}
|
||||
|
||||
if (is_reversed_ != current_reverse_status) {
|
||||
relation_order_unsupported = true;
|
||||
}
|
||||
reversed_map[i] = std::experimental::make_optional(reversed != def->type->is_reversed());
|
||||
++i;
|
||||
}
|
||||
|
||||
if (relation_order_unsupported) {
|
||||
throw exceptions::invalid_request_exception("Unsupported order by relation");
|
||||
// GCC incorrenctly complains about "*is_reversed_" below
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
|
||||
|
||||
// Check that all bool in reversedMap, if set, agrees
|
||||
std::experimental::optional<bool> is_reversed_{};
|
||||
for (auto&& b : reversed_map) {
|
||||
if (b) {
|
||||
if (!is_reversed_) {
|
||||
is_reversed_ = b;
|
||||
} else {
|
||||
if ((*is_reversed_) != *b) {
|
||||
throw exceptions::invalid_request_exception("Unsupported order by relation");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return is_reversed_;
|
||||
assert(is_reversed_);
|
||||
return *is_reversed_;
|
||||
|
||||
#pragma GCC diagnostic pop
|
||||
}
|
||||
|
||||
/** If ALLOW FILTERING was not specified, this verifies that it is not needed */
|
||||
|
||||
@@ -59,7 +59,7 @@ bool update_statement::require_full_clustering_key() const {
|
||||
void update_statement::add_update_for_key(mutation& m, const exploded_clustering_prefix& prefix, const update_parameters& params) {
|
||||
if (s->is_dense()) {
|
||||
if (!prefix || (prefix.size() == 1 && prefix.components().front().empty())) {
|
||||
throw exceptions::invalid_request_exception(sprint("Missing PRIMARY KEY part %s", s->clustering_key_columns().begin()->name_as_text()));
|
||||
throw exceptions::invalid_request_exception(sprint("Missing PRIMARY KEY part %s", *s->clustering_key_columns().begin()));
|
||||
}
|
||||
|
||||
// An empty name for the compact value is what we use to recognize the case where there is not column
|
||||
|
||||
@@ -45,15 +45,15 @@ namespace cql3 {
|
||||
|
||||
std::experimental::optional<collection_mutation_view>
|
||||
update_parameters::get_prefetched_list(
|
||||
partition_key pkey,
|
||||
std::experimental::optional<clustering_key> ckey,
|
||||
const partition_key& pkey,
|
||||
const clustering_key& row_key,
|
||||
const column_definition& column) const
|
||||
{
|
||||
if (!_prefetched) {
|
||||
return {};
|
||||
}
|
||||
|
||||
auto i = _prefetched->rows.find(std::make_pair(std::move(pkey), std::move(ckey)));
|
||||
auto i = _prefetched->rows.find(std::make_pair(pkey, row_key));
|
||||
if (i == _prefetched->rows.end()) {
|
||||
return {};
|
||||
}
|
||||
|
||||
@@ -58,9 +58,8 @@ namespace cql3 {
|
||||
*/
|
||||
class update_parameters final {
|
||||
public:
|
||||
// Holder for data needed by CQL list updates which depend on current state of the list.
|
||||
struct prefetch_data {
|
||||
using key = std::pair<partition_key, std::experimental::optional<clustering_key>>;
|
||||
using key = std::pair<partition_key, clustering_key>;
|
||||
struct key_hashing {
|
||||
partition_key::hashing pk_hash;
|
||||
clustering_key::hashing ck_hash;
|
||||
@@ -71,7 +70,7 @@ public:
|
||||
{ }
|
||||
|
||||
size_t operator()(const key& k) const {
|
||||
return pk_hash(k.first) ^ (k.second ? ck_hash(*k.second) : 0);
|
||||
return pk_hash(k.first) ^ ck_hash(k.second);
|
||||
}
|
||||
};
|
||||
struct key_equality {
|
||||
@@ -84,8 +83,7 @@ public:
|
||||
{ }
|
||||
|
||||
bool operator()(const key& k1, const key& k2) const {
|
||||
return pk_eq(k1.first, k2.first)
|
||||
&& bool(k1.second) == bool(k2.second) && (!k1.second || ck_eq(*k1.second, *k2.second));
|
||||
return pk_eq(k1.first, k2.first) && ck_eq(k1.second, k2.second);
|
||||
}
|
||||
};
|
||||
using row = std::unordered_map<column_id, collection_mutation>;
|
||||
@@ -185,11 +183,8 @@ public:
|
||||
return _timestamp;
|
||||
}
|
||||
|
||||
std::experimental::optional<collection_mutation_view>
|
||||
get_prefetched_list(
|
||||
partition_key pkey,
|
||||
std::experimental::optional<clustering_key> ckey,
|
||||
const column_definition& column) const;
|
||||
std::experimental::optional<collection_mutation_view> get_prefetched_list(
|
||||
const partition_key& pkey, const clustering_key& row_key, const column_definition& column) const;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
530
database.cc
530
database.cc
@@ -23,7 +23,6 @@
|
||||
#include "database.hh"
|
||||
#include "unimplemented.hh"
|
||||
#include "core/future-util.hh"
|
||||
#include "db/commitlog/commitlog_entry.hh"
|
||||
#include "db/system_keyspace.hh"
|
||||
#include "db/consistency_level.hh"
|
||||
#include "db/serializer.hh"
|
||||
@@ -58,8 +57,6 @@
|
||||
#include <seastar/core/enum.hh>
|
||||
#include "utils/latency.hh"
|
||||
#include "utils/flush_queue.hh"
|
||||
#include "schema_registry.hh"
|
||||
#include "service/priority_manager.hh"
|
||||
|
||||
using namespace std::chrono_literals;
|
||||
|
||||
@@ -129,9 +126,9 @@ column_family::make_partition_presence_checker(lw_shared_ptr<sstable_list> old_s
|
||||
|
||||
mutation_source
|
||||
column_family::sstables_as_mutation_source() {
|
||||
return mutation_source([this] (schema_ptr s, const query::partition_range& r, const io_priority_class& pc) {
|
||||
return make_sstable_reader(std::move(s), r, pc);
|
||||
});
|
||||
return [this] (const query::partition_range& r) {
|
||||
return make_sstable_reader(r);
|
||||
};
|
||||
}
|
||||
|
||||
// define in .cc, since sstable is forward-declared in .hh
|
||||
@@ -156,14 +153,10 @@ class range_sstable_reader final : public mutation_reader::impl {
|
||||
const query::partition_range& _pr;
|
||||
lw_shared_ptr<sstable_list> _sstables;
|
||||
mutation_reader _reader;
|
||||
// Use a pointer instead of copying, so we don't need to regenerate the reader if
|
||||
// the priority changes.
|
||||
const io_priority_class* _pc;
|
||||
public:
|
||||
range_sstable_reader(schema_ptr s, lw_shared_ptr<sstable_list> sstables, const query::partition_range& pr, const io_priority_class& pc)
|
||||
range_sstable_reader(schema_ptr s, lw_shared_ptr<sstable_list> sstables, const query::partition_range& pr)
|
||||
: _pr(pr)
|
||||
, _sstables(std::move(sstables))
|
||||
, _pc(&pc)
|
||||
{
|
||||
std::vector<mutation_reader> readers;
|
||||
for (const lw_shared_ptr<sstables::sstable>& sst : *_sstables | boost::adaptors::map_values) {
|
||||
@@ -190,15 +183,11 @@ class single_key_sstable_reader final : public mutation_reader::impl {
|
||||
mutation_opt _m;
|
||||
bool _done = false;
|
||||
lw_shared_ptr<sstable_list> _sstables;
|
||||
// Use a pointer instead of copying, so we don't need to regenerate the reader if
|
||||
// the priority changes.
|
||||
const io_priority_class* _pc;
|
||||
public:
|
||||
single_key_sstable_reader(schema_ptr schema, lw_shared_ptr<sstable_list> sstables, const partition_key& key, const io_priority_class& pc)
|
||||
single_key_sstable_reader(schema_ptr schema, lw_shared_ptr<sstable_list> sstables, const partition_key& key)
|
||||
: _schema(std::move(schema))
|
||||
, _key(sstables::key::from_partition_key(*_schema, key))
|
||||
, _sstables(std::move(sstables))
|
||||
, _pc(&pc)
|
||||
{ }
|
||||
|
||||
virtual future<mutation_opt> operator()() override {
|
||||
@@ -217,26 +206,26 @@ public:
|
||||
};
|
||||
|
||||
mutation_reader
|
||||
column_family::make_sstable_reader(schema_ptr s, const query::partition_range& pr, const io_priority_class& pc) const {
|
||||
column_family::make_sstable_reader(const query::partition_range& pr) const {
|
||||
if (pr.is_singular() && pr.start()->value().has_key()) {
|
||||
const dht::ring_position& pos = pr.start()->value();
|
||||
if (dht::shard_of(pos.token()) != engine().cpu_id()) {
|
||||
return make_empty_reader(); // range doesn't belong to this shard
|
||||
}
|
||||
return make_mutation_reader<single_key_sstable_reader>(std::move(s), _sstables, *pos.key(), pc);
|
||||
return make_mutation_reader<single_key_sstable_reader>(_schema, _sstables, *pos.key());
|
||||
} else {
|
||||
// range_sstable_reader is not movable so we need to wrap it
|
||||
return make_mutation_reader<range_sstable_reader>(std::move(s), _sstables, pr, pc);
|
||||
return make_mutation_reader<range_sstable_reader>(_schema, _sstables, pr);
|
||||
}
|
||||
}
|
||||
|
||||
key_source column_family::sstables_as_key_source() const {
|
||||
return key_source([this] (const query::partition_range& range, const io_priority_class& pc) {
|
||||
return [this] (const query::partition_range& range) {
|
||||
std::vector<key_reader> readers;
|
||||
readers.reserve(_sstables->size());
|
||||
std::transform(_sstables->begin(), _sstables->end(), std::back_inserter(readers), [&] (auto&& entry) {
|
||||
auto& sst = entry.second;
|
||||
auto rd = sstables::make_key_reader(_schema, sst, range, pc);
|
||||
auto rd = sstables::make_key_reader(_schema, sst, range);
|
||||
if (sst->is_shared()) {
|
||||
rd = make_filtering_reader(std::move(rd), [] (const dht::decorated_key& dk) {
|
||||
return dht::shard_of(dk.token()) == engine().cpu_id();
|
||||
@@ -245,14 +234,14 @@ key_source column_family::sstables_as_key_source() const {
|
||||
return rd;
|
||||
});
|
||||
return make_combined_reader(_schema, std::move(readers));
|
||||
});
|
||||
};
|
||||
}
|
||||
|
||||
// Exposed for testing, not performance critical.
|
||||
future<column_family::const_mutation_partition_ptr>
|
||||
column_family::find_partition(schema_ptr s, const dht::decorated_key& key) const {
|
||||
return do_with(query::partition_range::make_singular(key), [s = std::move(s), this] (auto& range) {
|
||||
return do_with(this->make_reader(s, range), [] (mutation_reader& reader) {
|
||||
column_family::find_partition(const dht::decorated_key& key) const {
|
||||
return do_with(query::partition_range::make_singular(key), [this] (auto& range) {
|
||||
return do_with(this->make_reader(range), [] (mutation_reader& reader) {
|
||||
return reader().then([] (mutation_opt&& mo) -> std::unique_ptr<const mutation_partition> {
|
||||
if (!mo) {
|
||||
return {};
|
||||
@@ -264,13 +253,13 @@ column_family::find_partition(schema_ptr s, const dht::decorated_key& key) const
|
||||
}
|
||||
|
||||
future<column_family::const_mutation_partition_ptr>
|
||||
column_family::find_partition_slow(schema_ptr s, const partition_key& key) const {
|
||||
return find_partition(s, dht::global_partitioner().decorate_key(*s, key));
|
||||
column_family::find_partition_slow(const partition_key& key) const {
|
||||
return find_partition(dht::global_partitioner().decorate_key(*_schema, key));
|
||||
}
|
||||
|
||||
future<column_family::const_row_ptr>
|
||||
column_family::find_row(schema_ptr s, const dht::decorated_key& partition_key, clustering_key clustering_key) const {
|
||||
return find_partition(std::move(s), partition_key).then([clustering_key = std::move(clustering_key)] (const_mutation_partition_ptr p) {
|
||||
column_family::find_row(const dht::decorated_key& partition_key, clustering_key clustering_key) const {
|
||||
return find_partition(partition_key).then([clustering_key = std::move(clustering_key)] (const_mutation_partition_ptr p) {
|
||||
if (!p) {
|
||||
return make_ready_future<const_row_ptr>();
|
||||
}
|
||||
@@ -285,8 +274,8 @@ column_family::find_row(schema_ptr s, const dht::decorated_key& partition_key, c
|
||||
}
|
||||
|
||||
mutation_reader
|
||||
column_family::make_reader(schema_ptr s, const query::partition_range& range, const io_priority_class& pc) const {
|
||||
if (query::is_wrap_around(range, *s)) {
|
||||
column_family::make_reader(const query::partition_range& range) const {
|
||||
if (query::is_wrap_around(range, *_schema)) {
|
||||
// make_combined_reader() can't handle streams that wrap around yet.
|
||||
fail(unimplemented::cause::WRAP_AROUND);
|
||||
}
|
||||
@@ -315,22 +304,21 @@ column_family::make_reader(schema_ptr s, const query::partition_range& range, co
|
||||
// https://github.com/scylladb/scylla/issues/185
|
||||
|
||||
for (auto&& mt : *_memtables) {
|
||||
readers.emplace_back(mt->make_reader(s, range));
|
||||
readers.emplace_back(mt->make_reader(range));
|
||||
}
|
||||
|
||||
if (_config.enable_cache) {
|
||||
readers.emplace_back(_cache.make_reader(s, range, pc));
|
||||
readers.emplace_back(_cache.make_reader(range));
|
||||
} else {
|
||||
readers.emplace_back(make_sstable_reader(s, range, pc));
|
||||
readers.emplace_back(make_sstable_reader(range));
|
||||
}
|
||||
|
||||
return make_combined_reader(std::move(readers));
|
||||
}
|
||||
|
||||
// Not performance critical. Currently used for testing only.
|
||||
template <typename Func>
|
||||
future<bool>
|
||||
column_family::for_all_partitions(schema_ptr s, Func&& func) const {
|
||||
column_family::for_all_partitions(Func&& func) const {
|
||||
static_assert(std::is_same<bool, std::result_of_t<Func(const dht::decorated_key&, const mutation_partition&)>>::value,
|
||||
"bad Func signature");
|
||||
|
||||
@@ -341,13 +329,13 @@ column_family::for_all_partitions(schema_ptr s, Func&& func) const {
|
||||
bool empty = false;
|
||||
public:
|
||||
bool done() const { return !ok || empty; }
|
||||
iteration_state(schema_ptr s, const column_family& cf, Func&& func)
|
||||
: reader(cf.make_reader(std::move(s)))
|
||||
iteration_state(const column_family& cf, Func&& func)
|
||||
: reader(cf.make_reader())
|
||||
, func(std::move(func))
|
||||
{ }
|
||||
};
|
||||
|
||||
return do_with(iteration_state(std::move(s), *this, std::move(func)), [] (iteration_state& is) {
|
||||
return do_with(iteration_state(*this, std::move(func)), [] (iteration_state& is) {
|
||||
return do_until([&is] { return is.done(); }, [&is] {
|
||||
return is.reader().then([&is](mutation_opt&& mo) {
|
||||
if (!mo) {
|
||||
@@ -363,39 +351,30 @@ column_family::for_all_partitions(schema_ptr s, Func&& func) const {
|
||||
}
|
||||
|
||||
future<bool>
|
||||
column_family::for_all_partitions_slow(schema_ptr s, std::function<bool (const dht::decorated_key&, const mutation_partition&)> func) const {
|
||||
return for_all_partitions(std::move(s), std::move(func));
|
||||
column_family::for_all_partitions_slow(std::function<bool (const dht::decorated_key&, const mutation_partition&)> func) const {
|
||||
return for_all_partitions(std::move(func));
|
||||
}
|
||||
|
||||
class lister {
|
||||
public:
|
||||
using dir_entry_types = std::unordered_set<directory_entry_type, enum_hash<directory_entry_type>>;
|
||||
using walker_type = std::function<future<> (directory_entry)>;
|
||||
using filter_type = std::function<bool (const sstring&)>;
|
||||
private:
|
||||
file _f;
|
||||
walker_type _walker;
|
||||
filter_type _filter;
|
||||
std::function<future<> (directory_entry de)> _walker;
|
||||
dir_entry_types _expected_type;
|
||||
subscription<directory_entry> _listing;
|
||||
sstring _dirname;
|
||||
|
||||
public:
|
||||
lister(file f, dir_entry_types type, walker_type walker, sstring dirname)
|
||||
lister(file f, dir_entry_types type, std::function<future<> (directory_entry)> walker, sstring dirname)
|
||||
: _f(std::move(f))
|
||||
, _walker(std::move(walker))
|
||||
, _filter([] (const sstring& fname) { return true; })
|
||||
, _expected_type(type)
|
||||
, _listing(_f.list_directory([this] (directory_entry de) { return _visit(de); }))
|
||||
, _dirname(dirname) {
|
||||
}
|
||||
|
||||
lister(file f, dir_entry_types type, walker_type walker, filter_type filter, sstring dirname)
|
||||
: lister(std::move(f), type, std::move(walker), dirname) {
|
||||
_filter = std::move(filter);
|
||||
}
|
||||
|
||||
static future<> scan_dir(sstring name, dir_entry_types type, walker_type walker, filter_type filter = [] (const sstring& fname) { return true; });
|
||||
static future<> scan_dir(sstring name, dir_entry_types type, std::function<future<> (directory_entry)> walker);
|
||||
protected:
|
||||
future<> _visit(directory_entry de) {
|
||||
|
||||
@@ -404,12 +383,6 @@ protected:
|
||||
if ((!_expected_type.count(*(de.type))) || (de.name[0] == '.')) {
|
||||
return make_ready_future<>();
|
||||
}
|
||||
|
||||
// apply a filter
|
||||
if (!_filter(_dirname + "/" + de.name)) {
|
||||
return make_ready_future<>();
|
||||
}
|
||||
|
||||
return _walker(de);
|
||||
});
|
||||
|
||||
@@ -430,9 +403,9 @@ private:
|
||||
};
|
||||
|
||||
|
||||
future<> lister::scan_dir(sstring name, lister::dir_entry_types type, walker_type walker, filter_type filter) {
|
||||
return engine().open_directory(name).then([type, walker = std::move(walker), filter = std::move(filter), name] (file f) {
|
||||
auto l = make_lw_shared<lister>(std::move(f), type, walker, filter, name);
|
||||
future<> lister::scan_dir(sstring name, lister::dir_entry_types type, std::function<future<> (directory_entry)> walker) {
|
||||
return engine().open_directory(name).then([type, walker = std::move(walker), name] (file f) {
|
||||
auto l = make_lw_shared<lister>(std::move(f), type, walker, name);
|
||||
return l->done().then([l] { });
|
||||
});
|
||||
}
|
||||
@@ -474,23 +447,12 @@ future<sstables::entry_descriptor> column_family::probe_file(sstring sstdir, sst
|
||||
}
|
||||
|
||||
update_sstables_known_generation(comps.generation);
|
||||
|
||||
{
|
||||
auto i = _sstables->find(comps.generation);
|
||||
if (i != _sstables->end()) {
|
||||
auto new_toc = sstdir + "/" + fname;
|
||||
throw std::runtime_error(sprint("Attempted to add sstable generation %d twice: new=%s existing=%s",
|
||||
comps.generation, new_toc, i->second->toc_filename()));
|
||||
}
|
||||
}
|
||||
assert(_sstables->count(comps.generation) == 0);
|
||||
|
||||
auto fut = sstable::get_sstable_key_range(*_schema, _schema->ks_name(), _schema->cf_name(), sstdir, comps.generation, comps.version, comps.format);
|
||||
return std::move(fut).then([this, sstdir = std::move(sstdir), comps] (range<partition_key> r) {
|
||||
// Checks whether or not sstable belongs to current shard.
|
||||
if (!belongs_to_current_shard(*_schema, std::move(r))) {
|
||||
dblog.debug("sstable {} not relevant for this shard, ignoring",
|
||||
sstables::sstable::filename(sstdir, _schema->ks_name(), _schema->cf_name(), comps.version, comps.generation, comps.format,
|
||||
sstables::sstable::component_type::Data));
|
||||
sstable::mark_sstable_for_deletion(_schema->ks_name(), _schema->cf_name(), sstdir, comps.generation, comps.version, comps.format);
|
||||
return make_ready_future<>();
|
||||
}
|
||||
@@ -589,7 +551,9 @@ column_family::seal_active_memtable() {
|
||||
|
||||
future<stop_iteration>
|
||||
column_family::try_flush_memtable_to_sstable(lw_shared_ptr<memtable> old) {
|
||||
auto gen = calculate_generation_for_new_table();
|
||||
// FIXME: better way of ensuring we don't attempt to
|
||||
// overwrite an existing table.
|
||||
auto gen = _sstable_generation++ * smp::count + engine().cpu_id();
|
||||
|
||||
auto newtab = make_lw_shared<sstables::sstable>(_schema->ks_name(), _schema->cf_name(),
|
||||
_config.datadir, gen,
|
||||
@@ -602,20 +566,27 @@ column_family::try_flush_memtable_to_sstable(lw_shared_ptr<memtable> old) {
|
||||
_config.cf_stats->pending_memtables_flushes_bytes += memtable_size;
|
||||
newtab->set_unshared();
|
||||
dblog.debug("Flushing to {}", newtab->get_filename());
|
||||
// Note that due to our sharded architecture, it is possible that
|
||||
// in the face of a value change some shards will backup sstables
|
||||
// while others won't.
|
||||
//
|
||||
// This is, in theory, possible to mitigate through a rwlock.
|
||||
// However, this doesn't differ from the situation where all tables
|
||||
// are coming from a single shard and the toggle happens in the
|
||||
// middle of them.
|
||||
//
|
||||
// The code as is guarantees that we'll never partially backup a
|
||||
// single sstable, so that is enough of a guarantee.
|
||||
auto&& priority = service::get_local_memtable_flush_priority();
|
||||
return newtab->write_components(*old, incremental_backups_enabled(), priority).then([this, newtab, old] {
|
||||
return newtab->open_data();
|
||||
return newtab->write_components(*old).then([this, newtab, old] {
|
||||
return newtab->open_data().then([this, newtab] {
|
||||
// Note that due to our sharded architecture, it is possible that
|
||||
// in the face of a value change some shards will backup sstables
|
||||
// while others won't.
|
||||
//
|
||||
// This is, in theory, possible to mitigate through a rwlock.
|
||||
// However, this doesn't differ from the situation where all tables
|
||||
// are coming from a single shard and the toggle happens in the
|
||||
// middle of them.
|
||||
//
|
||||
// The code as is guarantees that we'll never partially backup a
|
||||
// single sstable, so that is enough of a guarantee.
|
||||
if (!incremental_backups_enabled()) {
|
||||
return make_ready_future<>();
|
||||
}
|
||||
auto dir = newtab->get_dir() + "/backups/";
|
||||
return touch_directory(dir).then([dir, newtab] {
|
||||
return newtab->create_links(dir);
|
||||
});
|
||||
});
|
||||
}).then_wrapped([this, old, newtab, memtable_size] (future<> ret) {
|
||||
_config.cf_stats->pending_memtables_flushes_count--;
|
||||
_config.cf_stats->pending_memtables_flushes_bytes -= memtable_size;
|
||||
@@ -701,7 +672,7 @@ column_family::reshuffle_sstables(int64_t start) {
|
||||
// Those SSTables are not known by anyone in the system. So we don't have any kind of
|
||||
// object describing them. There isn't too much of a choice.
|
||||
return work.sstables[comps.generation]->read_toc();
|
||||
}, &manifest_json_filter).then([&work] {
|
||||
}).then([&work] {
|
||||
// Note: cannot be parallel because we will be shuffling things around at this stage. Can't race.
|
||||
return do_for_each(work.sstables, [&work] (auto& pair) {
|
||||
auto&& comps = std::move(work.descriptors.at(pair.first));
|
||||
@@ -720,104 +691,68 @@ column_family::reshuffle_sstables(int64_t start) {
|
||||
});
|
||||
}
|
||||
|
||||
void
|
||||
column_family::rebuild_sstable_list(const std::vector<sstables::shared_sstable>& new_sstables,
|
||||
const std::vector<sstables::shared_sstable>& sstables_to_remove) {
|
||||
// Build a new list of _sstables: We remove from the existing list the
|
||||
// tables we compacted (by now, there might be more sstables flushed
|
||||
// later), and we add the new tables generated by the compaction.
|
||||
// We create a new list rather than modifying it in-place, so that
|
||||
// on-going reads can continue to use the old list.
|
||||
auto current_sstables = _sstables;
|
||||
_sstables = make_lw_shared<sstable_list>();
|
||||
|
||||
// zeroing live_disk_space_used and live_sstable_count because the
|
||||
// sstable list is re-created below.
|
||||
_stats.live_disk_space_used = 0;
|
||||
_stats.live_sstable_count = 0;
|
||||
|
||||
std::unordered_set<sstables::shared_sstable> s(
|
||||
sstables_to_remove.begin(), sstables_to_remove.end());
|
||||
|
||||
for (const auto& oldtab : *current_sstables) {
|
||||
// Checks if oldtab is a sstable not being compacted.
|
||||
if (!s.count(oldtab.second)) {
|
||||
update_stats_for_new_sstable(oldtab.second->data_size());
|
||||
_sstables->emplace(oldtab.first, oldtab.second);
|
||||
}
|
||||
}
|
||||
|
||||
for (const auto& newtab : new_sstables) {
|
||||
// FIXME: rename the new sstable(s). Verify a rename doesn't cause
|
||||
// problems for the sstable object.
|
||||
update_stats_for_new_sstable(newtab->data_size());
|
||||
_sstables->emplace(newtab->generation(), newtab);
|
||||
}
|
||||
|
||||
for (const auto& oldtab : sstables_to_remove) {
|
||||
oldtab->mark_for_deletion();
|
||||
}
|
||||
}
|
||||
|
||||
future<>
|
||||
column_family::compact_sstables(sstables::compaction_descriptor descriptor, bool cleanup) {
|
||||
column_family::compact_sstables(sstables::compaction_descriptor descriptor) {
|
||||
if (!descriptor.sstables.size()) {
|
||||
// if there is nothing to compact, just return.
|
||||
return make_ready_future<>();
|
||||
}
|
||||
|
||||
return with_lock(_sstables_lock.for_read(), [this, descriptor = std::move(descriptor), cleanup] {
|
||||
return with_lock(_sstables_lock.for_read(), [this, descriptor = std::move(descriptor)] {
|
||||
auto sstables_to_compact = make_lw_shared<std::vector<sstables::shared_sstable>>(std::move(descriptor.sstables));
|
||||
|
||||
auto create_sstable = [this] {
|
||||
auto gen = this->calculate_generation_for_new_table();
|
||||
auto new_tables = make_lw_shared<std::vector<
|
||||
std::pair<unsigned, sstables::shared_sstable>>>();
|
||||
auto create_sstable = [this, new_tables] {
|
||||
// FIXME: this generation calculation should be in a function.
|
||||
auto gen = _sstable_generation++ * smp::count + engine().cpu_id();
|
||||
// FIXME: use "tmp" marker in names of incomplete sstable
|
||||
auto sst = make_lw_shared<sstables::sstable>(_schema->ks_name(), _schema->cf_name(), _config.datadir, gen,
|
||||
sstables::sstable::version_types::ka,
|
||||
sstables::sstable::format_types::big);
|
||||
sst->set_unshared();
|
||||
new_tables->emplace_back(gen, sst);
|
||||
return sst;
|
||||
};
|
||||
return sstables::compact_sstables(*sstables_to_compact, *this, create_sstable, descriptor.max_sstable_bytes, descriptor.level,
|
||||
cleanup).then([this, sstables_to_compact] (auto new_sstables) {
|
||||
this->rebuild_sstable_list(new_sstables, *sstables_to_compact);
|
||||
return sstables::compact_sstables(*sstables_to_compact, *this,
|
||||
create_sstable, descriptor.max_sstable_bytes, descriptor.level).then([this, new_tables, sstables_to_compact] {
|
||||
// Build a new list of _sstables: We remove from the existing list the
|
||||
// tables we compacted (by now, there might be more sstables flushed
|
||||
// later), and we add the new tables generated by the compaction.
|
||||
// We create a new list rather than modifying it in-place, so that
|
||||
// on-going reads can continue to use the old list.
|
||||
auto current_sstables = _sstables;
|
||||
_sstables = make_lw_shared<sstable_list>();
|
||||
|
||||
// zeroing live_disk_space_used and live_sstable_count because the
|
||||
// sstable list is re-created below.
|
||||
_stats.live_disk_space_used = 0;
|
||||
_stats.live_sstable_count = 0;
|
||||
|
||||
std::unordered_set<sstables::shared_sstable> s(
|
||||
sstables_to_compact->begin(), sstables_to_compact->end());
|
||||
for (const auto& oldtab : *current_sstables) {
|
||||
// Checks if oldtab is a sstable not being compacted.
|
||||
if (!s.count(oldtab.second)) {
|
||||
update_stats_for_new_sstable(oldtab.second->data_size());
|
||||
_sstables->emplace(oldtab.first, oldtab.second);
|
||||
}
|
||||
}
|
||||
|
||||
for (const auto& newtab : *new_tables) {
|
||||
// FIXME: rename the new sstable(s). Verify a rename doesn't cause
|
||||
// problems for the sstable object.
|
||||
update_stats_for_new_sstable(newtab.second->data_size());
|
||||
_sstables->emplace(newtab.first, newtab.second);
|
||||
}
|
||||
|
||||
for (const auto& oldtab : *sstables_to_compact) {
|
||||
oldtab->mark_for_deletion();
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
static bool needs_cleanup(const lw_shared_ptr<sstables::sstable>& sst,
|
||||
const lw_shared_ptr<std::vector<range<dht::token>>>& owned_ranges,
|
||||
schema_ptr s) {
|
||||
auto first = sst->get_first_partition_key(*s);
|
||||
auto last = sst->get_last_partition_key(*s);
|
||||
auto first_token = dht::global_partitioner().get_token(*s, first);
|
||||
auto last_token = dht::global_partitioner().get_token(*s, last);
|
||||
range<dht::token> sst_token_range = range<dht::token>::make(first_token, last_token);
|
||||
|
||||
// return true iff sst partition range isn't fully contained in any of the owned ranges.
|
||||
for (auto& r : *owned_ranges) {
|
||||
if (r.contains(sst_token_range, dht::token_comparator())) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
future<> column_family::cleanup_sstables(sstables::compaction_descriptor descriptor) {
|
||||
std::vector<range<dht::token>> r = service::get_local_storage_service().get_local_ranges(_schema->ks_name());
|
||||
auto owned_ranges = make_lw_shared<std::vector<range<dht::token>>>(std::move(r));
|
||||
auto sstables_to_cleanup = make_lw_shared<std::vector<sstables::shared_sstable>>(std::move(descriptor.sstables));
|
||||
|
||||
return parallel_for_each(*sstables_to_cleanup, [this, owned_ranges = std::move(owned_ranges), sstables_to_cleanup] (auto& sst) {
|
||||
if (!owned_ranges->empty() && !needs_cleanup(sst, owned_ranges, _schema)) {
|
||||
return make_ready_future<>();
|
||||
}
|
||||
|
||||
std::vector<sstables::shared_sstable> sstable_to_compact({ sst });
|
||||
return this->compact_sstables(sstables::compaction_descriptor(std::move(sstable_to_compact)), true);
|
||||
});
|
||||
}
|
||||
|
||||
future<>
|
||||
column_family::load_new_sstables(std::vector<sstables::entry_descriptor> new_tables) {
|
||||
return parallel_for_each(new_tables, [this] (auto comps) {
|
||||
@@ -857,26 +792,18 @@ void column_family::start_compaction() {
|
||||
|
||||
void column_family::trigger_compaction() {
|
||||
// Submitting compaction job to compaction manager.
|
||||
// #934 - always inc the pending counter, to help
|
||||
// indicate the want for compaction.
|
||||
_stats.pending_compactions++;
|
||||
do_trigger_compaction(); // see below
|
||||
}
|
||||
|
||||
void column_family::do_trigger_compaction() {
|
||||
// But only submit if we're not locked out
|
||||
if (!_compaction_disabled) {
|
||||
_stats.pending_compactions++;
|
||||
_compaction_manager.submit(this);
|
||||
}
|
||||
}
|
||||
|
||||
future<> column_family::run_compaction(sstables::compaction_descriptor descriptor) {
|
||||
assert(_stats.pending_compactions > 0);
|
||||
return compact_sstables(std::move(descriptor)).then([this] {
|
||||
// only do this on success. (no exceptions)
|
||||
// in that case, we rely on it being still set
|
||||
// for reqeueuing
|
||||
_stats.pending_compactions--;
|
||||
future<> column_family::run_compaction() {
|
||||
sstables::compaction_strategy strategy = _compaction_strategy;
|
||||
return do_with(std::move(strategy), [this] (sstables::compaction_strategy& cs) {
|
||||
return cs.compact(*this).then([this] {
|
||||
_stats.pending_compactions--;
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
@@ -911,17 +838,6 @@ lw_shared_ptr<sstable_list> column_family::get_sstables() {
|
||||
return _sstables;
|
||||
}
|
||||
|
||||
inline bool column_family::manifest_json_filter(const sstring& fname) {
|
||||
using namespace boost::filesystem;
|
||||
|
||||
path entry_path(fname);
|
||||
if (!is_directory(status(entry_path)) && entry_path.filename() == path("manifest.json")) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
future<> column_family::populate(sstring sstdir) {
|
||||
// We can catch most errors when we try to load an sstable. But if the TOC
|
||||
// file is the one missing, we won't try to load the sstable at all. This
|
||||
@@ -983,7 +899,7 @@ future<> column_family::populate(sstring sstdir) {
|
||||
futures.push_back(std::move(f));
|
||||
|
||||
return make_ready_future<>();
|
||||
}, &manifest_json_filter).then([&futures] {
|
||||
}).then([&futures] {
|
||||
return when_all(futures.begin(), futures.end()).then([] (std::vector<future<>> ret) {
|
||||
try {
|
||||
for (auto& f : ret) {
|
||||
@@ -1003,7 +919,7 @@ future<> column_family::populate(sstring sstdir) {
|
||||
sstables::sstable::format_types format = descriptor->format.value();
|
||||
|
||||
if (engine().cpu_id() != 0) {
|
||||
dblog.debug("At directory: {}, partial SSTable with generation {} not relevant for this shard, ignoring", sstdir, v.first);
|
||||
dblog.info("At directory: {}, partial SSTable with generation {} not relevant for this shard, ignoring", sstdir, v.first);
|
||||
return make_ready_future<>();
|
||||
}
|
||||
// shard 0 is the responsible for removing a partial sstable.
|
||||
@@ -1014,9 +930,6 @@ future<> column_family::populate(sstring sstdir) {
|
||||
return make_ready_future<>();
|
||||
});
|
||||
});
|
||||
}).then([this] {
|
||||
// Make sure this is called even if CF is empty
|
||||
mark_ready_for_writes();
|
||||
});
|
||||
}
|
||||
|
||||
@@ -1033,6 +946,8 @@ database::database(const db::config& cfg)
|
||||
if (!_memtable_total_space) {
|
||||
_memtable_total_space = memory::stats().total_memory() / 2;
|
||||
}
|
||||
bool durable = cfg.data_file_directories().size() > 0;
|
||||
db::system_keyspace::make(*this, durable, _cfg->volatile_system_keyspace_for_testing());
|
||||
// Start compaction manager with two tasks for handling compaction jobs.
|
||||
_compaction_manager.start(2);
|
||||
setup_collectd();
|
||||
@@ -1089,37 +1004,19 @@ future<> database::populate_keyspace(sstring datadir, sstring ks_name) {
|
||||
dblog.error("Keyspace {}: Skipping malformed CF {} ", ksdir, de.name);
|
||||
return make_ready_future<>();
|
||||
}
|
||||
|
||||
sstring cfname = comps[0];
|
||||
sstring uuidst = comps[1];
|
||||
|
||||
auto sstdir = ksdir + "/" + de.name;
|
||||
|
||||
try {
|
||||
auto&& uuid = [&] {
|
||||
try {
|
||||
return find_uuid(ks_name, cfname);
|
||||
} catch (const std::out_of_range& e) {
|
||||
std::throw_with_nested(no_such_column_family(ks_name, cfname));
|
||||
}
|
||||
}();
|
||||
auto& cf = find_column_family(uuid);
|
||||
|
||||
// #870: Check that the directory name matches
|
||||
// the current, expected UUID of the CF.
|
||||
if (utils::UUID(uuidst) == uuid) {
|
||||
// FIXME: Increase parallelism.
|
||||
auto sstdir = ksdir + "/" + de.name;
|
||||
dblog.info("Keyspace {}: Reading CF {} ", ksdir, cfname);
|
||||
return cf.populate(sstdir);
|
||||
}
|
||||
// Nope. Warn and ignore.
|
||||
dblog.info("Keyspace {}: Skipping obsolete version of CF {} ({})", ksdir, cfname, uuidst);
|
||||
} catch (marshal_exception&) {
|
||||
// Bogus UUID part of directory name
|
||||
dblog.warn("{}, CF {}: malformed UUID: {}. Ignoring", ksdir, comps[0], uuidst);
|
||||
auto& cf = find_column_family(ks_name, cfname);
|
||||
dblog.info("Keyspace {}: Reading CF {} ", ksdir, cfname);
|
||||
// FIXME: Increase parallelism.
|
||||
return cf.populate(sstdir);
|
||||
} catch (no_such_column_family&) {
|
||||
dblog.warn("{}, CF {}: schema not loaded!", ksdir, comps[0]);
|
||||
return make_ready_future<>();
|
||||
}
|
||||
return make_ready_future<>();
|
||||
});
|
||||
}
|
||||
return make_ready_future<>();
|
||||
@@ -1193,22 +1090,11 @@ future<> database::parse_system_tables(distributed<service::storage_proxy>& prox
|
||||
|
||||
future<>
|
||||
database::init_system_keyspace() {
|
||||
bool durable = _cfg->data_file_directories().size() > 0;
|
||||
db::system_keyspace::make(*this, durable, _cfg->volatile_system_keyspace_for_testing());
|
||||
|
||||
// FIXME support multiple directories
|
||||
return touch_directory(_cfg->data_file_directories()[0] + "/" + db::system_keyspace::NAME).then([this] {
|
||||
return populate_keyspace(_cfg->data_file_directories()[0], db::system_keyspace::NAME).then([this]() {
|
||||
return init_commitlog();
|
||||
});
|
||||
}).then([this] {
|
||||
auto& ks = find_keyspace(db::system_keyspace::NAME);
|
||||
return parallel_for_each(ks.metadata()->cf_meta_data(), [this] (auto& pair) {
|
||||
auto cfm = pair.second;
|
||||
auto& cf = this->find_column_family(cfm);
|
||||
cf.mark_ready_for_writes();
|
||||
return make_ready_future<>();
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
@@ -1269,8 +1155,6 @@ void database::drop_keyspace(const sstring& name) {
|
||||
}
|
||||
|
||||
void database::add_column_family(schema_ptr schema, column_family::config cfg) {
|
||||
schema = local_schema_registry().learn(schema);
|
||||
schema->registry_entry()->mark_synced();
|
||||
auto uuid = schema->id();
|
||||
lw_shared_ptr<column_family> cf;
|
||||
if (cfg.enable_commitlog && _commitlog) {
|
||||
@@ -1296,6 +1180,17 @@ void database::add_column_family(schema_ptr schema, column_family::config cfg) {
|
||||
_ks_cf_to_uuid.emplace(std::move(kscf), uuid);
|
||||
}
|
||||
|
||||
future<> database::update_column_family(const sstring& ks_name, const sstring& cf_name) {
|
||||
auto& proxy = service::get_storage_proxy();
|
||||
auto old_cfm = find_schema(ks_name, cf_name);
|
||||
return db::schema_tables::create_table_from_name(proxy, ks_name, cf_name).then([old_cfm] (auto&& new_cfm) {
|
||||
if (old_cfm->id() != new_cfm->id()) {
|
||||
return make_exception_future<>(exceptions::configuration_exception(sprint("Column family ID mismatch (found %s; expected %s)", new_cfm->id(), old_cfm->id())));
|
||||
}
|
||||
return make_exception_future<>(std::runtime_error("update column family not implemented"));
|
||||
});
|
||||
}
|
||||
|
||||
future<> database::drop_column_family(db_clock::time_point dropped_at, const sstring& ks_name, const sstring& cf_name) {
|
||||
auto uuid = find_uuid(ks_name, cf_name);
|
||||
auto& ks = find_keyspace(ks_name);
|
||||
@@ -1559,17 +1454,13 @@ compare_atomic_cell_for_merge(atomic_cell_view left, atomic_cell_view right) {
|
||||
}
|
||||
|
||||
struct query_state {
|
||||
explicit query_state(schema_ptr s,
|
||||
const query::read_command& cmd,
|
||||
const std::vector<query::partition_range>& ranges)
|
||||
: schema(std::move(s))
|
||||
, cmd(cmd)
|
||||
explicit query_state(const query::read_command& cmd, const std::vector<query::partition_range>& ranges)
|
||||
: cmd(cmd)
|
||||
, builder(cmd.slice)
|
||||
, limit(cmd.row_limit)
|
||||
, current_partition_range(ranges.begin())
|
||||
, range_end(ranges.end()){
|
||||
}
|
||||
schema_ptr schema;
|
||||
const query::read_command& cmd;
|
||||
query::result::builder builder;
|
||||
uint32_t limit;
|
||||
@@ -1583,21 +1474,21 @@ struct query_state {
|
||||
};
|
||||
|
||||
future<lw_shared_ptr<query::result>>
|
||||
column_family::query(schema_ptr s, const query::read_command& cmd, const std::vector<query::partition_range>& partition_ranges) {
|
||||
column_family::query(const query::read_command& cmd, const std::vector<query::partition_range>& partition_ranges) {
|
||||
utils::latency_counter lc;
|
||||
_stats.reads.set_latency(lc);
|
||||
return do_with(query_state(std::move(s), cmd, partition_ranges), [this] (query_state& qs) {
|
||||
return do_with(query_state(cmd, partition_ranges), [this] (query_state& qs) {
|
||||
return do_until(std::bind(&query_state::done, &qs), [this, &qs] {
|
||||
auto&& range = *qs.current_partition_range++;
|
||||
qs.reader = make_reader(qs.schema, range, service::get_local_sstable_query_read_priority());
|
||||
qs.reader = make_reader(range);
|
||||
qs.range_empty = false;
|
||||
return do_until([&qs] { return !qs.limit || qs.range_empty; }, [&qs] {
|
||||
return qs.reader().then([&qs](mutation_opt mo) {
|
||||
return do_until([&qs] { return !qs.limit || qs.range_empty; }, [this, &qs] {
|
||||
return qs.reader().then([this, &qs](mutation_opt mo) {
|
||||
if (mo) {
|
||||
auto p_builder = qs.builder.add_partition(*mo->schema(), mo->key());
|
||||
auto is_distinct = qs.cmd.slice.options.contains(query::partition_slice::option::distinct);
|
||||
auto limit = !is_distinct ? qs.limit : 1;
|
||||
mo->partition().query(p_builder, *qs.schema, qs.cmd.timestamp, limit);
|
||||
mo->partition().query(p_builder, *_schema, qs.cmd.timestamp, limit);
|
||||
qs.limit -= p_builder.row_count();
|
||||
} else {
|
||||
qs.range_empty = true;
|
||||
@@ -1618,21 +1509,21 @@ column_family::query(schema_ptr s, const query::read_command& cmd, const std::ve
|
||||
|
||||
mutation_source
|
||||
column_family::as_mutation_source() const {
|
||||
return mutation_source([this] (schema_ptr s, const query::partition_range& range, const io_priority_class& pc) {
|
||||
return this->make_reader(std::move(s), range, pc);
|
||||
});
|
||||
return [this] (const query::partition_range& range) {
|
||||
return this->make_reader(range);
|
||||
};
|
||||
}
|
||||
|
||||
future<lw_shared_ptr<query::result>>
|
||||
database::query(schema_ptr s, const query::read_command& cmd, const std::vector<query::partition_range>& ranges) {
|
||||
database::query(const query::read_command& cmd, const std::vector<query::partition_range>& ranges) {
|
||||
column_family& cf = find_column_family(cmd.cf_id);
|
||||
return cf.query(std::move(s), cmd, ranges);
|
||||
return cf.query(cmd, ranges);
|
||||
}
|
||||
|
||||
future<reconcilable_result>
|
||||
database::query_mutations(schema_ptr s, const query::read_command& cmd, const query::partition_range& range) {
|
||||
database::query_mutations(const query::read_command& cmd, const query::partition_range& range) {
|
||||
column_family& cf = find_column_family(cmd.cf_id);
|
||||
return mutation_query(std::move(s), cf.as_mutation_source(), range, cmd.slice, cmd.row_limit, cmd.timestamp);
|
||||
return mutation_query(cf.as_mutation_source(), range, cmd.slice, cmd.row_limit, cmd.timestamp);
|
||||
}
|
||||
|
||||
std::unordered_set<sstring> database::get_initial_tokens() {
|
||||
@@ -1677,8 +1568,7 @@ std::ostream& operator<<(std::ostream& out, const atomic_cell_or_collection& c)
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, const mutation& m) {
|
||||
const ::schema& s = *m.schema();
|
||||
fprint(os, "{%s.%s key %s data ", s.ks_name(), s.cf_name(), m.decorated_key());
|
||||
fprint(os, "{mutation: schema %p key %s data ", m.schema().get(), m.decorated_key());
|
||||
os << m.partition() << "}";
|
||||
return os;
|
||||
}
|
||||
@@ -1697,72 +1587,28 @@ std::ostream& operator<<(std::ostream& out, const database& db) {
|
||||
return out;
|
||||
}
|
||||
|
||||
void
|
||||
column_family::apply(const mutation& m, const db::replay_position& rp) {
|
||||
utils::latency_counter lc;
|
||||
_stats.writes.set_latency(lc);
|
||||
active_memtable().apply(m, rp);
|
||||
seal_on_overflow();
|
||||
_stats.writes.mark(lc);
|
||||
if (lc.is_start()) {
|
||||
_stats.estimated_write.add(lc.latency(), _stats.writes.count);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
column_family::apply(const frozen_mutation& m, const schema_ptr& m_schema, const db::replay_position& rp) {
|
||||
utils::latency_counter lc;
|
||||
_stats.writes.set_latency(lc);
|
||||
check_valid_rp(rp);
|
||||
active_memtable().apply(m, m_schema, rp);
|
||||
seal_on_overflow();
|
||||
_stats.writes.mark(lc);
|
||||
if (lc.is_start()) {
|
||||
_stats.estimated_write.add(lc.latency(), _stats.writes.count);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
column_family::seal_on_overflow() {
|
||||
if (active_memtable().occupancy().total_space() >= _config.max_memtable_size) {
|
||||
// FIXME: if sparse, do some in-memory compaction first
|
||||
// FIXME: maybe merge with other in-memory memtables
|
||||
seal_active_memtable();
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
column_family::check_valid_rp(const db::replay_position& rp) const {
|
||||
if (rp < _highest_flushed_rp) {
|
||||
throw replay_position_reordered_exception();
|
||||
}
|
||||
}
|
||||
|
||||
future<> database::apply_in_memory(const frozen_mutation& m, const schema_ptr& m_schema, const db::replay_position& rp) {
|
||||
future<> database::apply_in_memory(const frozen_mutation& m, const db::replay_position& rp) {
|
||||
try {
|
||||
auto& cf = find_column_family(m.column_family_id());
|
||||
cf.apply(m, m_schema, rp);
|
||||
cf.apply(m, rp);
|
||||
} catch (no_such_column_family&) {
|
||||
dblog.error("Attempting to mutate non-existent table {}", m.column_family_id());
|
||||
}
|
||||
return make_ready_future<>();
|
||||
}
|
||||
|
||||
future<> database::do_apply(schema_ptr s, const frozen_mutation& m) {
|
||||
future<> database::do_apply(const frozen_mutation& m) {
|
||||
// I'm doing a nullcheck here since the init code path for db etc
|
||||
// is a little in flux and commitlog is created only when db is
|
||||
// initied from datadir.
|
||||
auto uuid = m.column_family_id();
|
||||
auto& cf = find_column_family(uuid);
|
||||
if (!s->is_synced()) {
|
||||
throw std::runtime_error(sprint("attempted to mutate using not synced schema of %s.%s, version=%s",
|
||||
s->ks_name(), s->cf_name(), s->version()));
|
||||
}
|
||||
auto& cf = find_column_family(m.column_family_id());
|
||||
if (cf.commitlog() != nullptr) {
|
||||
commitlog_entry_writer cew(s, m);
|
||||
return cf.commitlog()->add_entry(uuid, cew).then([&m, this, s](auto rp) {
|
||||
auto uuid = m.column_family_id();
|
||||
bytes_view repr = m.representation();
|
||||
auto write_repr = [repr] (data_output& out) { out.write(repr.begin(), repr.end()); };
|
||||
return cf.commitlog()->add_mutation(uuid, repr.size(), write_repr).then([&m, this](auto rp) {
|
||||
try {
|
||||
return this->apply_in_memory(m, s, rp);
|
||||
return this->apply_in_memory(m, rp);
|
||||
} catch (replay_position_reordered_exception&) {
|
||||
// expensive, but we're assuming this is super rare.
|
||||
// if we failed to apply the mutation due to future re-ordering
|
||||
@@ -1770,11 +1616,11 @@ future<> database::do_apply(schema_ptr s, const frozen_mutation& m) {
|
||||
// let's just try again, add the mutation to the CL once more,
|
||||
// and assume success in inevitable eventually.
|
||||
dblog.debug("replay_position reordering detected");
|
||||
return this->apply(s, m);
|
||||
return this->apply(m);
|
||||
}
|
||||
});
|
||||
}
|
||||
return apply_in_memory(m, s, db::replay_position());
|
||||
return apply_in_memory(m, db::replay_position());
|
||||
}
|
||||
|
||||
future<> database::throttle() {
|
||||
@@ -1808,12 +1654,9 @@ void database::unthrottle() {
|
||||
}
|
||||
}
|
||||
|
||||
future<> database::apply(schema_ptr s, const frozen_mutation& m) {
|
||||
if (dblog.is_enabled(logging::log_level::trace)) {
|
||||
dblog.trace("apply {}", m.pretty_printer(s));
|
||||
}
|
||||
return throttle().then([this, &m, s = std::move(s)] {
|
||||
return do_apply(std::move(s), m);
|
||||
future<> database::apply(const frozen_mutation& m) {
|
||||
return throttle().then([this, &m] {
|
||||
return do_apply(m);
|
||||
});
|
||||
}
|
||||
|
||||
@@ -1955,36 +1798,6 @@ const sstring& database::get_snitch_name() const {
|
||||
return _cfg->endpoint_snitch();
|
||||
}
|
||||
|
||||
// For the filesystem operations, this code will assume that all keyspaces are visible in all shards
|
||||
// (as we have been doing for a lot of the other operations, like the snapshot itself).
|
||||
future<> database::clear_snapshot(sstring tag, std::vector<sstring> keyspace_names) {
|
||||
std::vector<std::reference_wrapper<keyspace>> keyspaces;
|
||||
|
||||
if (keyspace_names.empty()) {
|
||||
// if keyspace names are not given - apply to all existing local keyspaces
|
||||
for (auto& ks: _keyspaces) {
|
||||
keyspaces.push_back(std::reference_wrapper<keyspace>(ks.second));
|
||||
}
|
||||
} else {
|
||||
for (auto& ksname: keyspace_names) {
|
||||
try {
|
||||
keyspaces.push_back(std::reference_wrapper<keyspace>(find_keyspace(ksname)));
|
||||
} catch (no_such_keyspace& e) {
|
||||
return make_exception_future(std::current_exception());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return parallel_for_each(keyspaces, [this, tag] (auto& ks) {
|
||||
return parallel_for_each(ks.get().metadata()->cf_meta_data(), [this, tag] (auto& pair) {
|
||||
auto& cf = this->find_column_family(pair.second);
|
||||
return cf.clear_snapshot(tag);
|
||||
}).then_wrapped([] (future<> f) {
|
||||
dblog.debug("Cleared out snapshot directories");
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
future<> update_schema_version_and_announce(distributed<service::storage_proxy>& proxy)
|
||||
{
|
||||
return db::schema_tables::calculate_schema_digest(proxy).then([&proxy] (utils::UUID uuid) {
|
||||
@@ -2049,7 +1862,7 @@ seal_snapshot(sstring jsondir) {
|
||||
dblog.debug("Storing manifest {}", jsonfile);
|
||||
|
||||
return recursive_touch_directory(jsondir).then([jsonfile, json = std::move(json)] {
|
||||
return open_file_dma(jsonfile, open_flags::wo | open_flags::create | open_flags::truncate).then([json](file f) {
|
||||
return engine().open_file_dma(jsonfile, open_flags::wo | open_flags::create | open_flags::truncate).then([json](file f) {
|
||||
return do_with(make_file_output_stream(std::move(f)), [json] (output_stream<char>& out) {
|
||||
return out.write(json.c_str(), json.size()).then([&out] {
|
||||
return out.flush();
|
||||
@@ -2296,8 +2109,7 @@ void column_family::clear() {
|
||||
// NOTE: does not need to be futurized, but might eventually, depending on
|
||||
// if we implement notifications, whatnot.
|
||||
future<db::replay_position> column_family::discard_sstables(db_clock::time_point truncated_at) {
|
||||
assert(_compaction_disabled > 0);
|
||||
assert(!compaction_manager_queued());
|
||||
assert(_stats.pending_compactions == 0);
|
||||
|
||||
return with_lock(_sstables_lock.for_read(), [this, truncated_at] {
|
||||
db::replay_position rp;
|
||||
@@ -2355,15 +2167,3 @@ std::ostream& operator<<(std::ostream& os, const keyspace_metadata& m) {
|
||||
os << "}";
|
||||
return os;
|
||||
}
|
||||
|
||||
void column_family::set_schema(schema_ptr s) {
|
||||
dblog.debug("Changing schema version of {}.{} ({}) from {} to {}",
|
||||
_schema->ks_name(), _schema->cf_name(), _schema->id(), _schema->version(), s->version());
|
||||
|
||||
for (auto& m : *_memtables) {
|
||||
m->set_schema(s);
|
||||
}
|
||||
|
||||
_cache.set_schema(s);
|
||||
_schema = std::move(s);
|
||||
}
|
||||
|
||||
168
database.hh
168
database.hh
@@ -56,6 +56,7 @@
|
||||
#include "tombstone.hh"
|
||||
#include "atomic_cell.hh"
|
||||
#include "query-request.hh"
|
||||
#include "query-result.hh"
|
||||
#include "keys.hh"
|
||||
#include "mutation.hh"
|
||||
#include "memtable.hh"
|
||||
@@ -63,7 +64,7 @@
|
||||
#include "mutation_reader.hh"
|
||||
#include "row_cache.hh"
|
||||
#include "compaction_strategy.hh"
|
||||
#include "sstables/compaction_manager.hh"
|
||||
#include "utils/compaction_manager.hh"
|
||||
#include "utils/exponential_backoff_retry.hh"
|
||||
#include "utils/histogram.hh"
|
||||
#include "sstables/estimated_histogram.hh"
|
||||
@@ -159,8 +160,8 @@ private:
|
||||
// the read lock, and the ones that wish to stop that process will take the write lock.
|
||||
rwlock _sstables_lock;
|
||||
mutable row_cache _cache; // Cache covers only sstables.
|
||||
std::experimental::optional<int64_t> _sstable_generation = {};
|
||||
|
||||
int64_t _sstable_generation = 1;
|
||||
unsigned _mutation_count = 0;
|
||||
db::replay_position _highest_flushed_rp;
|
||||
// Provided by the database that owns this commitlog
|
||||
db::commitlog* _commitlog;
|
||||
@@ -171,9 +172,6 @@ private:
|
||||
int _compaction_disabled = 0;
|
||||
class memtable_flush_queue;
|
||||
std::unique_ptr<memtable_flush_queue> _flush_queue;
|
||||
// Store generation of sstables being compacted at the moment. That's needed to prevent a
|
||||
// sstable from being compacted twice.
|
||||
std::unordered_set<unsigned long> _compacting_generations;
|
||||
private:
|
||||
void update_stats_for_new_sstable(uint64_t new_sstable_data_size);
|
||||
void add_sstable(sstables::sstable&& sstable);
|
||||
@@ -185,66 +183,24 @@ private:
|
||||
|
||||
// update the sstable generation, making sure that new new sstables don't overwrite this one.
|
||||
void update_sstables_known_generation(unsigned generation) {
|
||||
if (!_sstable_generation) {
|
||||
_sstable_generation = 1;
|
||||
}
|
||||
_sstable_generation = std::max<uint64_t>(*_sstable_generation, generation / smp::count + 1);
|
||||
_sstable_generation = std::max<uint64_t>(_sstable_generation, generation / smp::count + 1);
|
||||
}
|
||||
|
||||
uint64_t calculate_generation_for_new_table() {
|
||||
assert(_sstable_generation);
|
||||
// FIXME: better way of ensuring we don't attempt to
|
||||
// overwrite an existing table.
|
||||
return (*_sstable_generation)++ * smp::count + engine().cpu_id();
|
||||
}
|
||||
|
||||
// Rebuild existing _sstables with new_sstables added to it and sstables_to_remove removed from it.
|
||||
void rebuild_sstable_list(const std::vector<sstables::shared_sstable>& new_sstables,
|
||||
const std::vector<sstables::shared_sstable>& sstables_to_remove);
|
||||
private:
|
||||
// Creates a mutation reader which covers sstables.
|
||||
// Caller needs to ensure that column_family remains live (FIXME: relax this).
|
||||
// The 'range' parameter must be live as long as the reader is used.
|
||||
// Mutations returned by the reader will all have given schema.
|
||||
mutation_reader make_sstable_reader(schema_ptr schema, const query::partition_range& range, const io_priority_class& pc) const;
|
||||
mutation_reader make_sstable_reader(const query::partition_range& range) const;
|
||||
|
||||
mutation_source sstables_as_mutation_source();
|
||||
key_source sstables_as_key_source() const;
|
||||
partition_presence_checker make_partition_presence_checker(lw_shared_ptr<sstable_list> old_sstables);
|
||||
std::chrono::steady_clock::time_point _sstable_writes_disabled_at;
|
||||
void do_trigger_compaction();
|
||||
public:
|
||||
|
||||
// This function should be called when this column family is ready for writes, IOW,
|
||||
// to produce SSTables. Extensive details about why this is important can be found
|
||||
// in Scylla's Github Issue #1014
|
||||
//
|
||||
// Nothing should be writing to SSTables before we have the chance to populate the
|
||||
// existing SSTables and calculate what should the next generation number be.
|
||||
//
|
||||
// However, if that happens, we want to protect against it in a way that does not
|
||||
// involve overwriting existing tables. This is one of the ways to do it: every
|
||||
// column family starts in an unwriteable state, and when it can finally be written
|
||||
// to, we mark it as writeable.
|
||||
//
|
||||
// Note that this *cannot* be a part of add_column_family. That adds a column family
|
||||
// to a db in memory only, and if anybody is about to write to a CF, that was most
|
||||
// likely already called. We need to call this explicitly when we are sure we're ready
|
||||
// to issue disk operations safely.
|
||||
void mark_ready_for_writes() {
|
||||
update_sstables_known_generation(0);
|
||||
}
|
||||
|
||||
// Creates a mutation reader which covers all data sources for this column family.
|
||||
// Caller needs to ensure that column_family remains live (FIXME: relax this).
|
||||
// Note: for data queries use query() instead.
|
||||
// The 'range' parameter must be live as long as the reader is used.
|
||||
// Mutations returned by the reader will all have given schema.
|
||||
// If I/O needs to be issued to read anything in the specified range, the operations
|
||||
// will be scheduled under the priority class given by pc.
|
||||
mutation_reader make_reader(schema_ptr schema,
|
||||
const query::partition_range& range = query::full_partition_range,
|
||||
const io_priority_class& pc = default_priority_class()) const;
|
||||
mutation_reader make_reader(const query::partition_range& range = query::full_partition_range) const;
|
||||
|
||||
mutation_source as_mutation_source() const;
|
||||
|
||||
@@ -269,21 +225,16 @@ public:
|
||||
column_family(schema_ptr schema, config cfg, no_commitlog, compaction_manager&);
|
||||
column_family(column_family&&) = delete; // 'this' is being captured during construction
|
||||
~column_family();
|
||||
const schema_ptr& schema() const { return _schema; }
|
||||
void set_schema(schema_ptr);
|
||||
schema_ptr schema() const { return _schema; }
|
||||
db::commitlog* commitlog() { return _commitlog; }
|
||||
future<const_mutation_partition_ptr> find_partition(schema_ptr, const dht::decorated_key& key) const;
|
||||
future<const_mutation_partition_ptr> find_partition_slow(schema_ptr, const partition_key& key) const;
|
||||
future<const_row_ptr> find_row(schema_ptr, const dht::decorated_key& partition_key, clustering_key clustering_key) const;
|
||||
// Applies given mutation to this column family
|
||||
// The mutation is always upgraded to current schema.
|
||||
void apply(const frozen_mutation& m, const schema_ptr& m_schema, const db::replay_position& = db::replay_position());
|
||||
future<const_mutation_partition_ptr> find_partition(const dht::decorated_key& key) const;
|
||||
future<const_mutation_partition_ptr> find_partition_slow(const partition_key& key) const;
|
||||
future<const_row_ptr> find_row(const dht::decorated_key& partition_key, clustering_key clustering_key) const;
|
||||
void apply(const frozen_mutation& m, const db::replay_position& = db::replay_position());
|
||||
void apply(const mutation& m, const db::replay_position& = db::replay_position());
|
||||
|
||||
// Returns at most "cmd.limit" rows
|
||||
future<lw_shared_ptr<query::result>> query(schema_ptr,
|
||||
const query::read_command& cmd,
|
||||
const std::vector<query::partition_range>& ranges);
|
||||
future<lw_shared_ptr<query::result>> query(const query::read_command& cmd, const std::vector<query::partition_range>& ranges);
|
||||
|
||||
future<> populate(sstring datadir);
|
||||
|
||||
@@ -332,15 +283,7 @@ public:
|
||||
// not a real compaction policy.
|
||||
future<> compact_all_sstables();
|
||||
// Compact all sstables provided in the vector.
|
||||
// If cleanup is set to true, compaction_sstables will run on behalf of a cleanup job,
|
||||
// meaning that irrelevant keys will be discarded.
|
||||
future<> compact_sstables(sstables::compaction_descriptor descriptor, bool cleanup = false);
|
||||
// Performs a cleanup on each sstable of this column family, excluding
|
||||
// those ones that are irrelevant to this node or being compacted.
|
||||
// Cleanup is about discarding keys that are no longer relevant for a
|
||||
// given sstable, e.g. after node loses part of its token range because
|
||||
// of a newly added node.
|
||||
future<> cleanup_sstables(sstables::compaction_descriptor descriptor);
|
||||
future<> compact_sstables(sstables::compaction_descriptor descriptor);
|
||||
|
||||
future<bool> snapshot_exists(sstring name);
|
||||
|
||||
@@ -363,7 +306,7 @@ public:
|
||||
|
||||
void start_compaction();
|
||||
void trigger_compaction();
|
||||
future<> run_compaction(sstables::compaction_descriptor descriptor);
|
||||
future<> run_compaction();
|
||||
void set_compaction_strategy(sstables::compaction_strategy_type strategy);
|
||||
const sstables::compaction_strategy& get_compaction_strategy() const {
|
||||
return _compaction_strategy;
|
||||
@@ -389,19 +332,11 @@ public:
|
||||
Result run_with_compaction_disabled(Func && func) {
|
||||
++_compaction_disabled;
|
||||
return _compaction_manager.remove(this).then(std::forward<Func>(func)).finally([this] {
|
||||
// #934. The pending counter is actually a great indicator into whether we
|
||||
// actually need to trigger a compaction again.
|
||||
if (--_compaction_disabled == 0 && _stats.pending_compactions > 0) {
|
||||
// we're turning if on again, use function that does not increment
|
||||
// the counter further.
|
||||
do_trigger_compaction();
|
||||
if (--_compaction_disabled == 0) {
|
||||
trigger_compaction();
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
std::unordered_set<unsigned long>& compacting_generations() {
|
||||
return _compacting_generations;
|
||||
}
|
||||
private:
|
||||
// One does not need to wait on this future if all we are interested in, is
|
||||
// initiating the write. The writes initiated here will eventually
|
||||
@@ -411,23 +346,25 @@ private:
|
||||
// But it is possible to synchronously wait for the seal to complete by
|
||||
// waiting on this future. This is useful in situations where we want to
|
||||
// synchronously flush data to disk.
|
||||
//
|
||||
// FIXME: A better interface would guarantee that all writes before this
|
||||
// one are also complete
|
||||
future<> seal_active_memtable();
|
||||
|
||||
// filter manifest.json files out
|
||||
static bool manifest_json_filter(const sstring& fname);
|
||||
seastar::gate _in_flight_seals;
|
||||
|
||||
// Iterate over all partitions. Protocol is the same as std::all_of(),
|
||||
// so that iteration can be stopped by returning false.
|
||||
// Func signature: bool (const decorated_key& dk, const mutation_partition& mp)
|
||||
template <typename Func>
|
||||
future<bool> for_all_partitions(schema_ptr, Func&& func) const;
|
||||
future<bool> for_all_partitions(Func&& func) const;
|
||||
future<sstables::entry_descriptor> probe_file(sstring sstdir, sstring fname);
|
||||
void seal_on_overflow();
|
||||
void check_valid_rp(const db::replay_position&) const;
|
||||
public:
|
||||
// Iterate over all partitions. Protocol is the same as std::all_of(),
|
||||
// so that iteration can be stopped by returning false.
|
||||
future<bool> for_all_partitions_slow(schema_ptr, std::function<bool (const dht::decorated_key&, const mutation_partition&)> func) const;
|
||||
future<bool> for_all_partitions_slow(std::function<bool (const dht::decorated_key&, const mutation_partition&)> func) const;
|
||||
|
||||
friend std::ostream& operator<<(std::ostream& out, const column_family& cf);
|
||||
// Testing purposes.
|
||||
@@ -601,7 +538,7 @@ class database {
|
||||
circular_buffer<promise<>> _throttled_requests;
|
||||
|
||||
future<> init_commitlog();
|
||||
future<> apply_in_memory(const frozen_mutation& m, const schema_ptr& m_schema, const db::replay_position&);
|
||||
future<> apply_in_memory(const frozen_mutation&, const db::replay_position&);
|
||||
future<> populate(sstring datadir);
|
||||
future<> populate_keyspace(sstring datadir, sstring ks_name);
|
||||
|
||||
@@ -613,7 +550,7 @@ private:
|
||||
friend void db::system_keyspace::make(database& db, bool durable, bool volatile_testing_only);
|
||||
void setup_collectd();
|
||||
future<> throttle();
|
||||
future<> do_apply(schema_ptr, const frozen_mutation&);
|
||||
future<> do_apply(const frozen_mutation&);
|
||||
void unthrottle();
|
||||
public:
|
||||
static utils::UUID empty_version;
|
||||
@@ -644,6 +581,7 @@ public:
|
||||
|
||||
void add_column_family(schema_ptr schema, column_family::config cfg);
|
||||
|
||||
future<> update_column_family(const sstring& ks_name, const sstring& cf_name);
|
||||
future<> drop_column_family(db_clock::time_point changed_at, const sstring& ks_name, const sstring& cf_name);
|
||||
|
||||
/* throws std::out_of_range if missing */
|
||||
@@ -678,12 +616,11 @@ public:
|
||||
unsigned shard_of(const dht::token& t);
|
||||
unsigned shard_of(const mutation& m);
|
||||
unsigned shard_of(const frozen_mutation& m);
|
||||
future<lw_shared_ptr<query::result>> query(schema_ptr, const query::read_command& cmd, const std::vector<query::partition_range>& ranges);
|
||||
future<reconcilable_result> query_mutations(schema_ptr, const query::read_command& cmd, const query::partition_range& range);
|
||||
future<> apply(schema_ptr, const frozen_mutation&);
|
||||
future<lw_shared_ptr<query::result>> query(const query::read_command& cmd, const std::vector<query::partition_range>& ranges);
|
||||
future<reconcilable_result> query_mutations(const query::read_command& cmd, const query::partition_range& range);
|
||||
future<> apply(const frozen_mutation&);
|
||||
keyspace::config make_keyspace_config(const keyspace_metadata& ksm);
|
||||
const sstring& get_snitch_name() const;
|
||||
future<> clear_snapshot(sstring tag, std::vector<sstring> keyspace_names);
|
||||
|
||||
friend std::ostream& operator<<(std::ostream& out, const database& db);
|
||||
const std::unordered_map<sstring, keyspace>& get_keyspaces() const {
|
||||
@@ -728,6 +665,53 @@ public:
|
||||
// FIXME: stub
|
||||
class secondary_index_manager {};
|
||||
|
||||
inline
|
||||
void
|
||||
column_family::apply(const mutation& m, const db::replay_position& rp) {
|
||||
utils::latency_counter lc;
|
||||
_stats.writes.set_latency(lc);
|
||||
active_memtable().apply(m, rp);
|
||||
seal_on_overflow();
|
||||
_stats.writes.mark(lc);
|
||||
if (lc.is_start()) {
|
||||
_stats.estimated_write.add(lc.latency(), _stats.writes.count);
|
||||
}
|
||||
}
|
||||
|
||||
inline
|
||||
void
|
||||
column_family::seal_on_overflow() {
|
||||
++_mutation_count;
|
||||
if (active_memtable().occupancy().total_space() >= _config.max_memtable_size) {
|
||||
// FIXME: if sparse, do some in-memory compaction first
|
||||
// FIXME: maybe merge with other in-memory memtables
|
||||
_mutation_count = 0;
|
||||
seal_active_memtable();
|
||||
}
|
||||
}
|
||||
|
||||
inline
|
||||
void
|
||||
column_family::check_valid_rp(const db::replay_position& rp) const {
|
||||
if (rp < _highest_flushed_rp) {
|
||||
throw replay_position_reordered_exception();
|
||||
}
|
||||
}
|
||||
|
||||
inline
|
||||
void
|
||||
column_family::apply(const frozen_mutation& m, const db::replay_position& rp) {
|
||||
utils::latency_counter lc;
|
||||
_stats.writes.set_latency(lc);
|
||||
check_valid_rp(rp);
|
||||
active_memtable().apply(m, rp);
|
||||
seal_on_overflow();
|
||||
_stats.writes.mark(lc);
|
||||
if (lc.is_start()) {
|
||||
_stats.estimated_write.add(lc.latency(), _stats.writes.count);
|
||||
}
|
||||
}
|
||||
|
||||
future<> update_schema_version_and_announce(distributed<service::storage_proxy>& proxy);
|
||||
|
||||
#endif /* DATABASE_HH_ */
|
||||
|
||||
@@ -31,19 +31,12 @@ class mutation_partition;
|
||||
// schema.hh
|
||||
class schema;
|
||||
class column_definition;
|
||||
class column_mapping;
|
||||
|
||||
// schema_mutations.hh
|
||||
class schema_mutations;
|
||||
|
||||
// keys.hh
|
||||
class exploded_clustering_prefix;
|
||||
class partition_key;
|
||||
class partition_key_view;
|
||||
class clustering_key_prefix;
|
||||
class clustering_key_prefix_view;
|
||||
using clustering_key = clustering_key_prefix;
|
||||
using clustering_key_view = clustering_key_prefix_view;
|
||||
|
||||
// memtable.hh
|
||||
class memtable;
|
||||
|
||||
@@ -45,7 +45,6 @@
|
||||
#include <boost/range/adaptor/sliced.hpp>
|
||||
|
||||
#include "batchlog_manager.hh"
|
||||
#include "canonical_mutation.hh"
|
||||
#include "service/storage_service.hh"
|
||||
#include "service/storage_proxy.hh"
|
||||
#include "system_keyspace.hh"
|
||||
@@ -58,7 +57,6 @@
|
||||
#include "db/config.hh"
|
||||
#include "gms/failure_detector.hh"
|
||||
#include "service/storage_service.hh"
|
||||
#include "schema_registry.hh"
|
||||
|
||||
static logging::logger logger("batchlog_manager");
|
||||
|
||||
@@ -118,14 +116,14 @@ mutation db::batchlog_manager::get_batch_log_mutation_for(const std::vector<muta
|
||||
auto key = partition_key::from_singular(*schema, id);
|
||||
auto timestamp = api::new_timestamp();
|
||||
auto data = [this, &mutations] {
|
||||
std::vector<canonical_mutation> fm(mutations.begin(), mutations.end());
|
||||
std::vector<frozen_mutation> fm(mutations.begin(), mutations.end());
|
||||
const auto size = std::accumulate(fm.begin(), fm.end(), size_t(0), [](size_t s, auto& m) {
|
||||
return s + serializer<canonical_mutation>{m}.size();
|
||||
return s + serializer<frozen_mutation>{m}.size();
|
||||
});
|
||||
bytes buf(bytes::initialized_later(), size);
|
||||
data_output out(buf);
|
||||
for (auto& m : fm) {
|
||||
serializer<canonical_mutation>{m}(out);
|
||||
serializer<frozen_mutation>{m}(out);
|
||||
}
|
||||
return buf;
|
||||
}();
|
||||
@@ -153,60 +151,49 @@ future<> db::batchlog_manager::replay_all_failed_batches() {
|
||||
|
||||
auto batch = [this, limiter](const cql3::untyped_result_set::row& row) {
|
||||
auto written_at = row.get_as<db_clock::time_point>("written_at");
|
||||
auto id = row.get_as<utils::UUID>("id");
|
||||
// enough time for the actual write + batchlog entry mutation delivery (two separate requests).
|
||||
// enough time for the actual write + batchlog entry mutation delivery (two separate requests).
|
||||
auto timeout = get_batch_log_timeout();
|
||||
if (db_clock::now() < written_at + timeout) {
|
||||
logger.debug("Skipping replay of {}, too fresh", id);
|
||||
return make_ready_future<>();
|
||||
}
|
||||
|
||||
// check version of serialization format
|
||||
if (!row.has("version")) {
|
||||
logger.warn("Skipping logged batch because of unknown version");
|
||||
return make_ready_future<>();
|
||||
}
|
||||
|
||||
auto version = row.get_as<int32_t>("version");
|
||||
if (version != net::messaging_service::current_version) {
|
||||
logger.warn("Skipping logged batch because of incorrect version");
|
||||
return make_ready_future<>();
|
||||
}
|
||||
|
||||
// not used currently. ever?
|
||||
//auto version = row.has("version") ? row.get_as<uint32_t>("version") : /*MessagingService.VERSION_12*/6u;
|
||||
auto id = row.get_as<utils::UUID>("id");
|
||||
auto data = row.get_blob("data");
|
||||
|
||||
logger.debug("Replaying batch {}", id);
|
||||
|
||||
auto fms = make_lw_shared<std::deque<canonical_mutation>>();
|
||||
auto fms = make_lw_shared<std::deque<frozen_mutation>>();
|
||||
data_input in(data);
|
||||
while (in.has_next()) {
|
||||
fms->emplace_back(serializer<canonical_mutation>::read(in));
|
||||
fms->emplace_back(serializer<frozen_mutation>::read(in));
|
||||
}
|
||||
|
||||
auto mutations = make_lw_shared<std::vector<mutation>>();
|
||||
auto size = data.size();
|
||||
|
||||
return map_reduce(*fms, [this, written_at] (canonical_mutation& fm) {
|
||||
return system_keyspace::get_truncated_at(fm.column_family_id()).then([written_at, &fm] (db_clock::time_point t) ->
|
||||
std::experimental::optional<std::reference_wrapper<canonical_mutation>> {
|
||||
if (written_at > t) {
|
||||
return { std::ref(fm) };
|
||||
} else {
|
||||
return {};
|
||||
}
|
||||
});
|
||||
},
|
||||
std::vector<mutation>(),
|
||||
[this] (std::vector<mutation> mutations, std::experimental::optional<std::reference_wrapper<canonical_mutation>> fm) {
|
||||
if (fm) {
|
||||
schema_ptr s = _qp.db().local().find_schema(fm.value().get().column_family_id());
|
||||
mutations.emplace_back(fm.value().get().to_mutation(s));
|
||||
return repeat([this, fms = std::move(fms), written_at, mutations]() mutable {
|
||||
if (fms->empty()) {
|
||||
return make_ready_future<stop_iteration>(stop_iteration::yes);
|
||||
}
|
||||
return mutations;
|
||||
}).then([this, id, limiter, written_at, size, fms] (std::vector<mutation> mutations) {
|
||||
if (mutations.empty()) {
|
||||
auto& fm = fms->front();
|
||||
auto mid = fm.column_family_id();
|
||||
return system_keyspace::get_truncated_at(mid).then([this, &fm, written_at, mutations](db_clock::time_point t) {
|
||||
auto schema = _qp.db().local().find_schema(fm.column_family_id());
|
||||
if (written_at > t) {
|
||||
auto schema = _qp.db().local().find_schema(fm.column_family_id());
|
||||
mutations->emplace_back(fm.unfreeze(schema));
|
||||
}
|
||||
}).then([fms] {
|
||||
fms->pop_front();
|
||||
return make_ready_future<stop_iteration>(stop_iteration::no);
|
||||
});
|
||||
}).then([this, id, mutations, limiter, written_at, size] {
|
||||
if (mutations->empty()) {
|
||||
return make_ready_future<>();
|
||||
}
|
||||
const auto ttl = [this, &mutations, written_at]() -> clock_type {
|
||||
const auto ttl = [this, mutations, written_at]() -> clock_type {
|
||||
/*
|
||||
* Calculate ttl for the mutations' hints (and reduce ttl by the time the mutations spent in the batchlog).
|
||||
* This ensures that deletes aren't "undone" by an old batch replay.
|
||||
@@ -228,8 +215,8 @@ future<> db::batchlog_manager::replay_all_failed_batches() {
|
||||
// Our normal write path does not add much redundancy to the dispatch, and rate is handled after send
|
||||
// in both cases.
|
||||
// FIXME: verify that the above is reasonably true.
|
||||
return limiter->reserve(size).then([this, mutations = std::move(mutations), id] {
|
||||
return _qp.proxy().local().mutate(mutations, db::consistency_level::ANY);
|
||||
return limiter->reserve(size).then([this, mutations, id] {
|
||||
return _qp.proxy().local().mutate(std::move(*mutations), db::consistency_level::ANY);
|
||||
});
|
||||
}).then([this, id] {
|
||||
// delete batch
|
||||
|
||||
@@ -64,8 +64,6 @@
|
||||
#include "utils/crc.hh"
|
||||
#include "utils/runtime.hh"
|
||||
#include "log.hh"
|
||||
#include "commitlog_entry.hh"
|
||||
#include "service/priority_manager.hh"
|
||||
|
||||
static logging::logger logger("commitlog");
|
||||
|
||||
@@ -157,9 +155,6 @@ public:
|
||||
bool _shutdown = false;
|
||||
|
||||
semaphore _new_segment_semaphore;
|
||||
semaphore _write_semaphore;
|
||||
semaphore _flush_semaphore;
|
||||
|
||||
scollectd::registrations _regs;
|
||||
|
||||
// TODO: verify that we're ok with not-so-great granularity
|
||||
@@ -175,11 +170,7 @@ public:
|
||||
uint64_t bytes_slack = 0;
|
||||
uint64_t segments_created = 0;
|
||||
uint64_t segments_destroyed = 0;
|
||||
uint64_t pending_writes = 0;
|
||||
uint64_t pending_flushes = 0;
|
||||
uint64_t pending_allocations = 0;
|
||||
uint64_t write_limit_exceeded = 0;
|
||||
uint64_t flush_limit_exceeded = 0;
|
||||
uint64_t pending_operations = 0;
|
||||
uint64_t total_size = 0;
|
||||
uint64_t buffer_list_bytes = 0;
|
||||
uint64_t total_size_on_disk = 0;
|
||||
@@ -187,73 +178,33 @@ public:
|
||||
|
||||
stats totals;
|
||||
|
||||
future<> begin_write() {
|
||||
void begin_op() {
|
||||
_gate.enter();
|
||||
++totals.pending_writes; // redundant, given semaphore. but easier to read
|
||||
if (totals.pending_writes >= cfg.max_active_writes) {
|
||||
++totals.write_limit_exceeded;
|
||||
logger.trace("Write ops overflow: {}. Will block.", totals.pending_writes);
|
||||
}
|
||||
return _write_semaphore.wait();
|
||||
++totals.pending_operations;
|
||||
}
|
||||
void end_write() {
|
||||
_write_semaphore.signal();
|
||||
--totals.pending_writes;
|
||||
void end_op() {
|
||||
--totals.pending_operations;
|
||||
_gate.leave();
|
||||
}
|
||||
|
||||
future<> begin_flush() {
|
||||
_gate.enter();
|
||||
++totals.pending_flushes;
|
||||
if (totals.pending_flushes >= cfg.max_active_flushes) {
|
||||
++totals.flush_limit_exceeded;
|
||||
logger.trace("Flush ops overflow: {}. Will block.", totals.pending_flushes);
|
||||
}
|
||||
return _flush_semaphore.wait();
|
||||
}
|
||||
void end_flush() {
|
||||
_flush_semaphore.signal();
|
||||
--totals.pending_flushes;
|
||||
_gate.leave();
|
||||
}
|
||||
|
||||
bool should_wait_for_write() const {
|
||||
return _write_semaphore.waiters() > 0 || _flush_semaphore.waiters() > 0;
|
||||
}
|
||||
|
||||
segment_manager(config c)
|
||||
: cfg([&c] {
|
||||
config cfg(c);
|
||||
|
||||
if (cfg.commit_log_location.empty()) {
|
||||
cfg.commit_log_location = "/var/lib/scylla/commitlog";
|
||||
}
|
||||
|
||||
if (cfg.max_active_writes == 0) {
|
||||
cfg.max_active_writes = // TODO: call someone to get an idea...
|
||||
25 * smp::count;
|
||||
}
|
||||
cfg.max_active_writes = std::max(uint64_t(1), cfg.max_active_writes / smp::count);
|
||||
if (cfg.max_active_flushes == 0) {
|
||||
cfg.max_active_flushes = // TODO: call someone to get an idea...
|
||||
5 * smp::count;
|
||||
}
|
||||
cfg.max_active_flushes = std::max(uint64_t(1), cfg.max_active_flushes / smp::count);
|
||||
|
||||
return cfg;
|
||||
}())
|
||||
, max_size(std::min<size_t>(std::numeric_limits<position_type>::max(), std::max<size_t>(cfg.commitlog_segment_size_in_mb, 1) * 1024 * 1024))
|
||||
, max_mutation_size(max_size >> 1)
|
||||
, max_disk_size(size_t(std::ceil(cfg.commitlog_total_space_in_mb / double(smp::count))) * 1024 * 1024)
|
||||
, _write_semaphore(cfg.max_active_writes)
|
||||
, _flush_semaphore(cfg.max_active_flushes)
|
||||
: cfg(c), max_size(
|
||||
std::min<size_t>(std::numeric_limits<position_type>::max(),
|
||||
std::max<size_t>(cfg.commitlog_segment_size_in_mb,
|
||||
1) * 1024 * 1024)), max_mutation_size(
|
||||
max_size >> 1), max_disk_size(
|
||||
size_t(
|
||||
std::ceil(
|
||||
cfg.commitlog_total_space_in_mb
|
||||
/ double(smp::count))) * 1024 * 1024)
|
||||
{
|
||||
assert(max_size > 0);
|
||||
|
||||
if (cfg.commit_log_location.empty()) {
|
||||
cfg.commit_log_location = "/var/lib/scylla/commitlog";
|
||||
}
|
||||
logger.trace("Commitlog {} maximum disk size: {} MB / cpu ({} cpus)",
|
||||
cfg.commit_log_location, max_disk_size / (1024 * 1024),
|
||||
smp::count);
|
||||
|
||||
_regs = create_counters();
|
||||
}
|
||||
~segment_manager() {
|
||||
@@ -287,8 +238,6 @@ public:
|
||||
}
|
||||
|
||||
std::vector<sstring> get_active_names() const;
|
||||
uint64_t get_num_dirty_segments() const;
|
||||
uint64_t get_num_active_segments() const;
|
||||
|
||||
using buffer_type = temporary_buffer<char>;
|
||||
|
||||
@@ -392,39 +341,9 @@ class db::commitlog::segment: public enable_lw_shared_from_this<segment> {
|
||||
std::unordered_map<cf_id_type, position_type> _cf_dirty;
|
||||
time_point _sync_time;
|
||||
seastar::gate _gate;
|
||||
uint64_t _write_waiters = 0;
|
||||
semaphore _queue;
|
||||
|
||||
std::unordered_set<table_schema_version> _known_schema_versions;
|
||||
|
||||
friend std::ostream& operator<<(std::ostream&, const segment&);
|
||||
friend class segment_manager;
|
||||
|
||||
future<> begin_flush() {
|
||||
// This is maintaining the semantica of only using the write-lock
|
||||
// as a gate for flushing, i.e. once we've begun a flush for position X
|
||||
// we are ok with writes to positions > X
|
||||
return _dwrite.write_lock().then(std::bind(&segment_manager::begin_flush, _segment_manager)).finally([this] {
|
||||
_dwrite.write_unlock();
|
||||
});
|
||||
}
|
||||
|
||||
void end_flush() {
|
||||
_segment_manager->end_flush();
|
||||
}
|
||||
|
||||
future<> begin_write() {
|
||||
// This is maintaining the semantica of only using the write-lock
|
||||
// as a gate for flushing, i.e. once we've begun a flush for position X
|
||||
// we are ok with writes to positions > X
|
||||
return _dwrite.read_lock().then(std::bind(&segment_manager::begin_write, _segment_manager));
|
||||
}
|
||||
|
||||
void end_write() {
|
||||
_segment_manager->end_write();
|
||||
_dwrite.read_unlock();
|
||||
}
|
||||
|
||||
public:
|
||||
struct cf_mark {
|
||||
const segment& s;
|
||||
@@ -446,7 +365,7 @@ public:
|
||||
|
||||
segment(segment_manager* m, const descriptor& d, file && f, bool active)
|
||||
: _segment_manager(m), _desc(std::move(d)), _file(std::move(f)), _sync_time(
|
||||
clock_type::now()), _queue(0)
|
||||
clock_type::now())
|
||||
{
|
||||
++_segment_manager->totals.segments_created;
|
||||
logger.debug("Created new {} segment {}", active ? "active" : "reserve", *this);
|
||||
@@ -464,19 +383,9 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
bool is_schema_version_known(schema_ptr s) {
|
||||
return _known_schema_versions.count(s->version());
|
||||
}
|
||||
void add_schema_version(schema_ptr s) {
|
||||
_known_schema_versions.emplace(s->version());
|
||||
}
|
||||
void forget_schema_versions() {
|
||||
_known_schema_versions.clear();
|
||||
}
|
||||
|
||||
bool must_sync() {
|
||||
if (_segment_manager->cfg.mode == sync_mode::BATCH) {
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
auto now = clock_type::now();
|
||||
auto ms = std::chrono::duration_cast<std::chrono::milliseconds>(
|
||||
@@ -492,9 +401,8 @@ public:
|
||||
*/
|
||||
future<sseg_ptr> finish_and_get_new() {
|
||||
_closed = true;
|
||||
return maybe_wait_for_write(sync()).then([](sseg_ptr s) {
|
||||
return s->_segment_manager->active_segment();
|
||||
});
|
||||
sync();
|
||||
return _segment_manager->active_segment();
|
||||
}
|
||||
void reset_sync_time() {
|
||||
_sync_time = clock_type::now();
|
||||
@@ -509,7 +417,7 @@ public:
|
||||
logger.trace("Sync not needed {}: ({} / {})", *this, position(), _flush_pos);
|
||||
return make_ready_future<sseg_ptr>(shared_from_this());
|
||||
}
|
||||
return cycle().then([](sseg_ptr seg) {
|
||||
return cycle().then([](auto seg) {
|
||||
return seg->flush();
|
||||
});
|
||||
}
|
||||
@@ -532,14 +440,16 @@ public:
|
||||
// This is not 100% neccesary, we really only need the ones below our flush pos,
|
||||
// but since we pretty much assume that task ordering will make this the case anyway...
|
||||
|
||||
return begin_flush().then(
|
||||
return _dwrite.write_lock().then(
|
||||
[this, me, pos]() mutable {
|
||||
_dwrite.write_unlock(); // release it already.
|
||||
pos = std::max(pos, _file_pos);
|
||||
if (pos <= _flush_pos) {
|
||||
logger.trace("{} already synced! ({} < {})", *this, pos, _flush_pos);
|
||||
return make_ready_future<sseg_ptr>(std::move(me));
|
||||
}
|
||||
return _file.flush().then_wrapped([this, pos, me](future<> f) {
|
||||
_segment_manager->begin_op();
|
||||
return _file.flush().then_wrapped([this, pos, me](auto f) {
|
||||
try {
|
||||
f.get();
|
||||
// TODO: retry/ignore/fail/stop - optional behaviour in origin.
|
||||
@@ -552,50 +462,16 @@ public:
|
||||
logger.error("Failed to flush commits to disk: {}", std::current_exception());
|
||||
throw;
|
||||
}
|
||||
}).finally([this, me] {
|
||||
_segment_manager->end_op();
|
||||
});
|
||||
}).finally([this] {
|
||||
end_flush();
|
||||
});
|
||||
});
|
||||
}
|
||||
/**
|
||||
* Allocate a new buffer
|
||||
*/
|
||||
void new_buffer(size_t s) {
|
||||
assert(_buffer.empty());
|
||||
|
||||
auto overhead = segment_overhead_size;
|
||||
if (_file_pos == 0) {
|
||||
overhead += descriptor_header_size;
|
||||
}
|
||||
|
||||
auto a = align_up(s + overhead, alignment);
|
||||
auto k = std::max(a, default_size);
|
||||
|
||||
for (;;) {
|
||||
try {
|
||||
_buffer = _segment_manager->acquire_buffer(k);
|
||||
break;
|
||||
} catch (std::bad_alloc&) {
|
||||
logger.warn("Could not allocate {} k bytes output buffer ({} k required)", k / 1024, a / 1024);
|
||||
if (k > a) {
|
||||
k = std::max(a, k / 2);
|
||||
logger.debug("Trying reduced size: {} k", k / 1024);
|
||||
continue;
|
||||
}
|
||||
throw;
|
||||
}
|
||||
}
|
||||
_buf_pos = overhead;
|
||||
auto * p = reinterpret_cast<uint32_t *>(_buffer.get_write());
|
||||
std::fill(p, p + overhead, 0);
|
||||
_segment_manager->totals.total_size += k;
|
||||
}
|
||||
|
||||
/**
|
||||
* Send any buffer contents to disk and get a new tmp buffer
|
||||
*/
|
||||
// See class comment for info
|
||||
future<sseg_ptr> cycle() {
|
||||
future<sseg_ptr> cycle(size_t s = 0) {
|
||||
auto size = clear_buffer_slack();
|
||||
auto buf = std::move(_buffer);
|
||||
auto off = _file_pos;
|
||||
@@ -603,6 +479,36 @@ public:
|
||||
_file_pos += size;
|
||||
_buf_pos = 0;
|
||||
|
||||
// if we need new buffer, get one.
|
||||
// TODO: keep a queue of available buffers?
|
||||
if (s > 0) {
|
||||
auto overhead = segment_overhead_size;
|
||||
if (_file_pos == 0) {
|
||||
overhead += descriptor_header_size;
|
||||
}
|
||||
|
||||
auto a = align_up(s + overhead, alignment);
|
||||
auto k = std::max(a, default_size);
|
||||
|
||||
for (;;) {
|
||||
try {
|
||||
_buffer = _segment_manager->acquire_buffer(k);
|
||||
break;
|
||||
} catch (std::bad_alloc&) {
|
||||
logger.warn("Could not allocate {} k bytes output buffer ({} k required)", k / 1024, a / 1024);
|
||||
if (k > a) {
|
||||
k = std::max(a, k / 2);
|
||||
logger.debug("Trying reduced size: {} k", k / 1024);
|
||||
continue;
|
||||
}
|
||||
throw;
|
||||
}
|
||||
}
|
||||
_buf_pos = overhead;
|
||||
auto * p = reinterpret_cast<uint32_t *>(_buffer.get_write());
|
||||
std::fill(p, p + overhead, 0);
|
||||
_segment_manager->totals.total_size += k;
|
||||
}
|
||||
auto me = shared_from_this();
|
||||
assert(!me.owned());
|
||||
|
||||
@@ -639,15 +545,13 @@ public:
|
||||
out.write(uint32_t(_file_pos));
|
||||
out.write(crc.checksum());
|
||||
|
||||
forget_schema_versions();
|
||||
|
||||
// acquire read lock
|
||||
return begin_write().then([this, size, off, buf = std::move(buf), me]() mutable {
|
||||
return _dwrite.read_lock().then([this, size, off, buf = std::move(buf), me]() mutable {
|
||||
auto written = make_lw_shared<size_t>(0);
|
||||
auto p = buf.get();
|
||||
_segment_manager->begin_op();
|
||||
return repeat([this, size, off, written, p]() mutable {
|
||||
auto&& priority_class = service::get_local_commitlog_priority();
|
||||
return _file.dma_write(off + *written, p + *written, size - *written, priority_class).then_wrapped([this, size, written](future<size_t>&& f) {
|
||||
return _file.dma_write(off + *written, p + *written, size - *written).then_wrapped([this, size, written](auto&& f) {
|
||||
try {
|
||||
auto bytes = std::get<0>(f.get());
|
||||
*written += bytes;
|
||||
@@ -671,59 +575,20 @@ public:
|
||||
});
|
||||
}).finally([this, buf = std::move(buf)]() mutable {
|
||||
_segment_manager->release_buffer(std::move(buf));
|
||||
_segment_manager->end_op();
|
||||
});
|
||||
}).then([me] {
|
||||
return make_ready_future<sseg_ptr>(std::move(me));
|
||||
}).finally([me, this]() {
|
||||
end_write(); // release
|
||||
});
|
||||
}
|
||||
|
||||
future<sseg_ptr> maybe_wait_for_write(future<sseg_ptr> f) {
|
||||
if (_segment_manager->should_wait_for_write()) {
|
||||
++_write_waiters;
|
||||
logger.trace("Too many pending writes. Must wait.");
|
||||
return f.finally([this] {
|
||||
if (--_write_waiters == 0) {
|
||||
_queue.signal(_queue.waiters());
|
||||
}
|
||||
});
|
||||
}
|
||||
return make_ready_future<sseg_ptr>(shared_from_this());
|
||||
}
|
||||
|
||||
/**
|
||||
* If an allocation causes a write, and the write causes a block,
|
||||
* any allocations post that need to wait for this to finish,
|
||||
* other wise we will just continue building up more write queue
|
||||
* eventually (+ loose more ordering)
|
||||
*
|
||||
* Some caution here, since maybe_wait_for_write actually
|
||||
* releases _all_ queued up ops when finishing, we could get
|
||||
* "bursts" of alloc->write, causing build-ups anyway.
|
||||
* This should be measured properly. For now I am hoping this
|
||||
* will work out as these should "block as a group". However,
|
||||
* buffer memory usage might grow...
|
||||
*/
|
||||
bool must_wait_for_alloc() {
|
||||
return _write_waiters > 0;
|
||||
}
|
||||
|
||||
future<sseg_ptr> wait_for_alloc() {
|
||||
auto me = shared_from_this();
|
||||
++_segment_manager->totals.pending_allocations;
|
||||
logger.trace("Previous allocation is blocking. Must wait.");
|
||||
return _queue.wait().then([me] { // TODO: do we need a finally?
|
||||
--me->_segment_manager->totals.pending_allocations;
|
||||
return make_ready_future<sseg_ptr>(me);
|
||||
_dwrite.read_unlock(); // release
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a "mutation" to the segment.
|
||||
*/
|
||||
future<replay_position> allocate(const cf_id_type& id, shared_ptr<entry_writer> writer) {
|
||||
const auto size = writer->size(*this);
|
||||
future<replay_position> allocate(const cf_id_type& id, size_t size,
|
||||
serializer_func func) {
|
||||
const auto s = size + entry_overhead_size; // total size
|
||||
if (s > _segment_manager->max_mutation_size) {
|
||||
return make_exception_future<replay_position>(
|
||||
@@ -732,26 +597,23 @@ public:
|
||||
+ " bytes is too large for the maxiumum size of "
|
||||
+ std::to_string(_segment_manager->max_mutation_size)));
|
||||
}
|
||||
|
||||
std::experimental::optional<future<sseg_ptr>> op;
|
||||
|
||||
if (must_sync()) {
|
||||
op = sync();
|
||||
} else if (must_wait_for_alloc()) {
|
||||
op = wait_for_alloc();
|
||||
} else if (!is_still_allocating() || position() + s > _segment_manager->max_size) { // would we make the file too big?
|
||||
// do this in next segment instead.
|
||||
op = finish_and_get_new();
|
||||
} else if (_buffer.empty()) {
|
||||
new_buffer(s);
|
||||
} else if (s > (_buffer.size() - _buf_pos)) { // enough data?
|
||||
op = maybe_wait_for_write(cycle());
|
||||
}
|
||||
|
||||
if (op) {
|
||||
return op->then([id, writer = std::move(writer)] (sseg_ptr new_seg) mutable {
|
||||
return new_seg->allocate(id, std::move(writer));
|
||||
});
|
||||
// would we make the file too big?
|
||||
for (;;) {
|
||||
if (position() + s > _segment_manager->max_size) {
|
||||
// do this in next segment instead.
|
||||
return finish_and_get_new().then(
|
||||
[id, size, func = std::move(func)](auto new_seg) {
|
||||
return new_seg->allocate(id, size, func);
|
||||
});
|
||||
}
|
||||
// enough data?
|
||||
if (s > (_buffer.size() - _buf_pos)) {
|
||||
// TODO: iff we have to many writes running, maybe we should
|
||||
// wait for this?
|
||||
cycle(s);
|
||||
continue; // re-check file size overflow
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
_gate.enter(); // this might throw. I guess we accept this?
|
||||
@@ -772,7 +634,7 @@ public:
|
||||
out.write(crc.checksum());
|
||||
|
||||
// actual data
|
||||
writer->write(*this, out);
|
||||
func(out);
|
||||
|
||||
crc.process_bytes(p + 2 * sizeof(uint32_t), size);
|
||||
|
||||
@@ -783,8 +645,9 @@ public:
|
||||
|
||||
_gate.leave();
|
||||
|
||||
if (_segment_manager->cfg.mode == sync_mode::BATCH) {
|
||||
return sync().then([rp](sseg_ptr) {
|
||||
// finally, check if we're required to sync.
|
||||
if (must_sync()) {
|
||||
return sync().then([rp](auto seg) {
|
||||
return make_ready_future<replay_position>(rp);
|
||||
});
|
||||
}
|
||||
@@ -873,7 +736,7 @@ db::commitlog::segment_manager::list_descriptors(sstring dirname) {
|
||||
}
|
||||
return make_ready_future<std::experimental::optional<directory_entry_type>>(de.type);
|
||||
};
|
||||
return entry_type(de).then([this, de](std::experimental::optional<directory_entry_type> type) {
|
||||
return entry_type(de).then([this, de](auto type) {
|
||||
if (type == directory_entry_type::regular && de.name[0] != '.') {
|
||||
try {
|
||||
_result.emplace_back(de.name);
|
||||
@@ -890,7 +753,7 @@ db::commitlog::segment_manager::list_descriptors(sstring dirname) {
|
||||
}
|
||||
};
|
||||
|
||||
return engine().open_directory(dirname).then([this, dirname](file dir) {
|
||||
return engine().open_directory(dirname).then([this, dirname](auto dir) {
|
||||
auto h = make_lw_shared<helper>(std::move(dirname), std::move(dir));
|
||||
return h->done().then([h]() {
|
||||
return make_ready_future<std::vector<db::commitlog::descriptor>>(std::move(h->_result));
|
||||
@@ -899,7 +762,7 @@ db::commitlog::segment_manager::list_descriptors(sstring dirname) {
|
||||
}
|
||||
|
||||
future<> db::commitlog::segment_manager::init() {
|
||||
return list_descriptors(cfg.commit_log_location).then([this](std::vector<descriptor> descs) {
|
||||
return list_descriptors(cfg.commit_log_location).then([this](auto descs) {
|
||||
segment_id_type id = std::chrono::duration_cast<std::chrono::milliseconds>(runtime::get_boot_time().time_since_epoch()).count() + 1;
|
||||
for (auto& d : descs) {
|
||||
id = std::max(id, replay_position(d.id).base_id());
|
||||
@@ -969,23 +832,9 @@ scollectd::registrations db::commitlog::segment_manager::create_counters() {
|
||||
),
|
||||
|
||||
add_polled_metric(type_instance_id("commitlog"
|
||||
, per_cpu_plugin_instance, "queue_length", "pending_writes")
|
||||
, make_typed(data_type::GAUGE, totals.pending_writes)
|
||||
, per_cpu_plugin_instance, "queue_length", "pending_operations")
|
||||
, make_typed(data_type::GAUGE, totals.pending_operations)
|
||||
),
|
||||
add_polled_metric(type_instance_id("commitlog"
|
||||
, per_cpu_plugin_instance, "queue_length", "pending_flushes")
|
||||
, make_typed(data_type::GAUGE, totals.pending_flushes)
|
||||
),
|
||||
|
||||
add_polled_metric(type_instance_id("commitlog"
|
||||
, per_cpu_plugin_instance, "total_operations", "write_limit_exceeded")
|
||||
, make_typed(data_type::DERIVE, totals.write_limit_exceeded)
|
||||
),
|
||||
add_polled_metric(type_instance_id("commitlog"
|
||||
, per_cpu_plugin_instance, "total_operations", "flush_limit_exceeded")
|
||||
, make_typed(data_type::DERIVE, totals.flush_limit_exceeded)
|
||||
),
|
||||
|
||||
add_polled_metric(type_instance_id("commitlog"
|
||||
, per_cpu_plugin_instance, "memory", "total_size")
|
||||
, make_typed(data_type::GAUGE, totals.total_size)
|
||||
@@ -1038,7 +887,7 @@ void db::commitlog::segment_manager::flush_segments(bool force) {
|
||||
|
||||
future<db::commitlog::segment_manager::sseg_ptr> db::commitlog::segment_manager::allocate_segment(bool active) {
|
||||
descriptor d(next_id());
|
||||
return open_file_dma(cfg.commit_log_location + "/" + d.filename(), open_flags::wo | open_flags::create).then([this, d, active](file f) {
|
||||
return engine().open_file_dma(cfg.commit_log_location + "/" + d.filename(), open_flags::wo | open_flags::create).then([this, d, active](file f) {
|
||||
// xfs doesn't like files extended betond eof, so enlarge the file
|
||||
return f.truncate(max_size).then([this, d, active, f] () mutable {
|
||||
auto s = make_lw_shared<segment>(this, d, std::move(f), active);
|
||||
@@ -1114,7 +963,7 @@ std::ostream& db::operator<<(std::ostream& out, const db::replay_position& p) {
|
||||
void db::commitlog::segment_manager::discard_unused_segments() {
|
||||
logger.trace("Checking for unused segments ({} active)", _segments.size());
|
||||
|
||||
auto i = std::remove_if(_segments.begin(), _segments.end(), [=](sseg_ptr s) {
|
||||
auto i = std::remove_if(_segments.begin(), _segments.end(), [=](auto s) {
|
||||
if (s->can_delete()) {
|
||||
logger.debug("Segment {} is unused", *s);
|
||||
return true;
|
||||
@@ -1208,7 +1057,7 @@ void db::commitlog::segment_manager::on_timer() {
|
||||
return this->allocate_segment(false).then([this](sseg_ptr s) {
|
||||
if (!_shutdown) {
|
||||
// insertion sort.
|
||||
auto i = std::upper_bound(_reserve_segments.begin(), _reserve_segments.end(), s, [](sseg_ptr s1, sseg_ptr s2) {
|
||||
auto i = std::upper_bound(_reserve_segments.begin(), _reserve_segments.end(), s, [](auto s1, auto s2) {
|
||||
const descriptor& d1 = s1->_desc;
|
||||
const descriptor& d2 = s2->_desc;
|
||||
return d1.id < d2.id;
|
||||
@@ -1220,7 +1069,7 @@ void db::commitlog::segment_manager::on_timer() {
|
||||
--_reserve_allocating;
|
||||
});
|
||||
});
|
||||
}).handle_exception([](std::exception_ptr ep) {
|
||||
}).handle_exception([](auto ep) {
|
||||
logger.warn("Exception in segment reservation: {}", ep);
|
||||
});
|
||||
arm();
|
||||
@@ -1237,19 +1086,6 @@ std::vector<sstring> db::commitlog::segment_manager::get_active_names() const {
|
||||
return res;
|
||||
}
|
||||
|
||||
uint64_t db::commitlog::segment_manager::get_num_dirty_segments() const {
|
||||
return std::count_if(_segments.begin(), _segments.end(), [](sseg_ptr s) {
|
||||
return !s->is_still_allocating() && !s->is_clean();
|
||||
});
|
||||
}
|
||||
|
||||
uint64_t db::commitlog::segment_manager::get_num_active_segments() const {
|
||||
return std::count_if(_segments.begin(), _segments.end(), [](sseg_ptr s) {
|
||||
return s->is_still_allocating();
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
db::commitlog::segment_manager::buffer_type db::commitlog::segment_manager::acquire_buffer(size_t s) {
|
||||
auto i = _temp_buffers.begin();
|
||||
auto e = _temp_buffers.end();
|
||||
@@ -1292,44 +1128,8 @@ void db::commitlog::segment_manager::release_buffer(buffer_type&& b) {
|
||||
*/
|
||||
future<db::replay_position> db::commitlog::add(const cf_id_type& id,
|
||||
size_t size, serializer_func func) {
|
||||
class serializer_func_entry_writer final : public entry_writer {
|
||||
serializer_func _func;
|
||||
size_t _size;
|
||||
public:
|
||||
serializer_func_entry_writer(size_t sz, serializer_func func)
|
||||
: _func(std::move(func)), _size(sz)
|
||||
{ }
|
||||
virtual size_t size(segment&) override { return _size; }
|
||||
virtual void write(segment&, output& out) override {
|
||||
_func(out);
|
||||
}
|
||||
};
|
||||
auto writer = ::make_shared<serializer_func_entry_writer>(size, std::move(func));
|
||||
return _segment_manager->active_segment().then([id, writer] (auto s) {
|
||||
return s->allocate(id, writer);
|
||||
});
|
||||
}
|
||||
|
||||
future<db::replay_position> db::commitlog::add_entry(const cf_id_type& id, const commitlog_entry_writer& cew)
|
||||
{
|
||||
class cl_entry_writer final : public entry_writer {
|
||||
commitlog_entry_writer _writer;
|
||||
public:
|
||||
cl_entry_writer(const commitlog_entry_writer& wr) : _writer(wr) { }
|
||||
virtual size_t size(segment& seg) override {
|
||||
_writer.set_with_schema(!seg.is_schema_version_known(_writer.schema()));
|
||||
return _writer.size();
|
||||
}
|
||||
virtual void write(segment& seg, output& out) override {
|
||||
if (_writer.with_schema()) {
|
||||
seg.add_schema_version(_writer.schema());
|
||||
}
|
||||
_writer.write(out);
|
||||
}
|
||||
};
|
||||
auto writer = ::make_shared<cl_entry_writer>(cew);
|
||||
return _segment_manager->active_segment().then([id, writer] (auto s) {
|
||||
return s->allocate(id, writer);
|
||||
return _segment_manager->active_segment().then([=](auto s) {
|
||||
return s->allocate(id, size, std::move(func));
|
||||
});
|
||||
}
|
||||
|
||||
@@ -1400,18 +1200,11 @@ future<> db::commitlog::shutdown() {
|
||||
return _segment_manager->shutdown();
|
||||
}
|
||||
|
||||
|
||||
size_t db::commitlog::max_record_size() const {
|
||||
return _segment_manager->max_mutation_size - segment::entry_overhead_size;
|
||||
}
|
||||
|
||||
uint64_t db::commitlog::max_active_writes() const {
|
||||
return _segment_manager->cfg.max_active_writes;
|
||||
}
|
||||
|
||||
uint64_t db::commitlog::max_active_flushes() const {
|
||||
return _segment_manager->cfg.max_active_flushes;
|
||||
}
|
||||
|
||||
future<> db::commitlog::clear() {
|
||||
return _segment_manager->clear();
|
||||
}
|
||||
@@ -1422,7 +1215,7 @@ const db::commitlog::config& db::commitlog::active_config() const {
|
||||
|
||||
future<std::unique_ptr<subscription<temporary_buffer<char>, db::replay_position>>>
|
||||
db::commitlog::read_log_file(const sstring& filename, commit_load_reader_func next, position_type off) {
|
||||
return open_file_dma(filename, open_flags::ro).then([next = std::move(next), off](file f) {
|
||||
return engine().open_file_dma(filename, open_flags::ro).then([next = std::move(next), off](file f) {
|
||||
return std::make_unique<subscription<temporary_buffer<char>, replay_position>>(
|
||||
read_log_file(std::move(f), std::move(next), off));
|
||||
});
|
||||
@@ -1557,17 +1350,6 @@ db::commitlog::read_log_file(file f, commit_load_reader_func next, position_type
|
||||
}
|
||||
future<> read_entry() {
|
||||
static constexpr size_t entry_header_size = segment::entry_overhead_size - sizeof(uint32_t);
|
||||
|
||||
/**
|
||||
* #598 - Must check that data left in chunk is enough to even read an entry.
|
||||
* If not, this is small slack space in the chunk end, and we should just go
|
||||
* to the next.
|
||||
*/
|
||||
assert(pos <= next);
|
||||
if ((pos + entry_header_size) >= next) {
|
||||
return skip(next - pos);
|
||||
}
|
||||
|
||||
return fin.read_exactly(entry_header_size).then([this](temporary_buffer<char> buf) {
|
||||
replay_position rp(id, position_type(pos));
|
||||
|
||||
@@ -1593,6 +1375,10 @@ db::commitlog::read_log_file(file f, commit_load_reader_func next, position_type
|
||||
return skip(slack);
|
||||
}
|
||||
|
||||
if (start_off > pos) {
|
||||
return skip(size - entry_header_size);
|
||||
}
|
||||
|
||||
return fin.read_exactly(size - entry_header_size).then([this, size, crc = std::move(crc), rp](temporary_buffer<char> buf) mutable {
|
||||
advance(buf);
|
||||
|
||||
@@ -1662,28 +1448,7 @@ uint64_t db::commitlog::get_flush_count() const {
|
||||
}
|
||||
|
||||
uint64_t db::commitlog::get_pending_tasks() const {
|
||||
return _segment_manager->totals.pending_writes
|
||||
+ _segment_manager->totals.pending_flushes;
|
||||
}
|
||||
|
||||
uint64_t db::commitlog::get_pending_writes() const {
|
||||
return _segment_manager->totals.pending_writes;
|
||||
}
|
||||
|
||||
uint64_t db::commitlog::get_pending_flushes() const {
|
||||
return _segment_manager->totals.pending_flushes;
|
||||
}
|
||||
|
||||
uint64_t db::commitlog::get_pending_allocations() const {
|
||||
return _segment_manager->totals.pending_allocations;
|
||||
}
|
||||
|
||||
uint64_t db::commitlog::get_write_limit_exceeded_count() const {
|
||||
return _segment_manager->totals.write_limit_exceeded;
|
||||
}
|
||||
|
||||
uint64_t db::commitlog::get_flush_limit_exceeded_count() const {
|
||||
return _segment_manager->totals.flush_limit_exceeded;
|
||||
return _segment_manager->totals.pending_operations;
|
||||
}
|
||||
|
||||
uint64_t db::commitlog::get_num_segments_created() const {
|
||||
@@ -1694,14 +1459,6 @@ uint64_t db::commitlog::get_num_segments_destroyed() const {
|
||||
return _segment_manager->totals.segments_destroyed;
|
||||
}
|
||||
|
||||
uint64_t db::commitlog::get_num_dirty_segments() const {
|
||||
return _segment_manager->get_num_dirty_segments();
|
||||
}
|
||||
|
||||
uint64_t db::commitlog::get_num_active_segments() const {
|
||||
return _segment_manager->get_num_active_segments();
|
||||
}
|
||||
|
||||
future<std::vector<db::commitlog::descriptor>> db::commitlog::list_existing_descriptors() const {
|
||||
return list_existing_descriptors(active_config().commit_log_location);
|
||||
}
|
||||
|
||||
@@ -48,7 +48,6 @@
|
||||
#include "core/stream.hh"
|
||||
#include "utils/UUID.hh"
|
||||
#include "replay_position.hh"
|
||||
#include "commitlog_entry.hh"
|
||||
|
||||
class file;
|
||||
|
||||
@@ -115,10 +114,6 @@ public:
|
||||
// Max number of segments to keep in pre-alloc reserve.
|
||||
// Not (yet) configurable from scylla.conf.
|
||||
uint64_t max_reserve_segments = 12;
|
||||
// Max active writes/flushes. Default value
|
||||
// zero means try to figure it out ourselves
|
||||
uint64_t max_active_writes = 0;
|
||||
uint64_t max_active_flushes = 0;
|
||||
|
||||
sync_mode mode = sync_mode::PERIODIC;
|
||||
};
|
||||
@@ -186,13 +181,6 @@ public:
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Add an entry to the commit log.
|
||||
*
|
||||
* @param entry_writer a writer responsible for writing the entry
|
||||
*/
|
||||
future<replay_position> add_entry(const cf_id_type& id, const commitlog_entry_writer& entry_writer);
|
||||
|
||||
/**
|
||||
* Modifies the per-CF dirty cursors of any commit log segments for the column family according to the position
|
||||
* given. Discards any commit log segments that are no longer used.
|
||||
@@ -245,37 +233,14 @@ public:
|
||||
uint64_t get_completed_tasks() const;
|
||||
uint64_t get_flush_count() const;
|
||||
uint64_t get_pending_tasks() const;
|
||||
uint64_t get_pending_writes() const;
|
||||
uint64_t get_pending_flushes() const;
|
||||
uint64_t get_pending_allocations() const;
|
||||
uint64_t get_write_limit_exceeded_count() const;
|
||||
uint64_t get_flush_limit_exceeded_count() const;
|
||||
uint64_t get_num_segments_created() const;
|
||||
uint64_t get_num_segments_destroyed() const;
|
||||
/**
|
||||
* Get number of inactive (finished), segments lingering
|
||||
* due to still being dirty
|
||||
*/
|
||||
uint64_t get_num_dirty_segments() const;
|
||||
/**
|
||||
* Get number of active segments, i.e. still being allocated to
|
||||
*/
|
||||
uint64_t get_num_active_segments() const;
|
||||
|
||||
/**
|
||||
* Returns the largest amount of data that can be written in a single "mutation".
|
||||
*/
|
||||
size_t max_record_size() const;
|
||||
|
||||
/**
|
||||
* Return max allowed pending writes (per this shard)
|
||||
*/
|
||||
uint64_t max_active_writes() const;
|
||||
/**
|
||||
* Return max allowed pending flushes (per this shard)
|
||||
*/
|
||||
uint64_t max_active_flushes() const;
|
||||
|
||||
future<> clear();
|
||||
|
||||
const config& active_config() const;
|
||||
@@ -318,11 +283,6 @@ public:
|
||||
const sstring&, commit_load_reader_func, position_type = 0);
|
||||
private:
|
||||
commitlog(config);
|
||||
|
||||
struct entry_writer {
|
||||
virtual size_t size(segment&) = 0;
|
||||
virtual void write(segment&, output&) = 0;
|
||||
};
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
@@ -1,88 +0,0 @@
|
||||
/*
|
||||
* Copyright 2016 ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file is part of Scylla.
|
||||
*
|
||||
* Scylla is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Scylla is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <experimental/optional>
|
||||
|
||||
#include "frozen_mutation.hh"
|
||||
#include "schema.hh"
|
||||
|
||||
namespace stdx = std::experimental;
|
||||
|
||||
class commitlog_entry_writer {
|
||||
schema_ptr _schema;
|
||||
db::serializer<column_mapping> _column_mapping_serializer;
|
||||
const frozen_mutation& _mutation;
|
||||
bool _with_schema = true;
|
||||
public:
|
||||
commitlog_entry_writer(schema_ptr s, const frozen_mutation& fm)
|
||||
: _schema(std::move(s)), _column_mapping_serializer(_schema->get_column_mapping()), _mutation(fm)
|
||||
{ }
|
||||
|
||||
void set_with_schema(bool value) {
|
||||
_with_schema = value;
|
||||
}
|
||||
bool with_schema() {
|
||||
return _with_schema;
|
||||
}
|
||||
schema_ptr schema() const {
|
||||
return _schema;
|
||||
}
|
||||
|
||||
size_t size() const {
|
||||
size_t size = data_output::serialized_size<bool>();
|
||||
if (_with_schema) {
|
||||
size += _column_mapping_serializer.size();
|
||||
}
|
||||
size += _mutation.representation().size();
|
||||
return size;
|
||||
}
|
||||
|
||||
void write(data_output& out) const {
|
||||
out.write(_with_schema);
|
||||
if (_with_schema) {
|
||||
_column_mapping_serializer.write(out);
|
||||
}
|
||||
auto bv = _mutation.representation();
|
||||
out.write(bv.begin(), bv.end());
|
||||
}
|
||||
};
|
||||
|
||||
class commitlog_entry_reader {
|
||||
frozen_mutation _mutation;
|
||||
stdx::optional<column_mapping> _column_mapping;
|
||||
public:
|
||||
commitlog_entry_reader(const temporary_buffer<char>& buffer)
|
||||
: _mutation(bytes())
|
||||
{
|
||||
data_input in(buffer);
|
||||
bool has_column_mapping = in.read<bool>();
|
||||
if (has_column_mapping) {
|
||||
_column_mapping = db::serializer<::column_mapping>::read(in);
|
||||
}
|
||||
auto bv = in.read_view(in.avail());
|
||||
_mutation = frozen_mutation(bytes(bv.begin(), bv.end()));
|
||||
}
|
||||
|
||||
const stdx::optional<column_mapping>& get_column_mapping() const { return _column_mapping; }
|
||||
const frozen_mutation& mutation() const { return _mutation; }
|
||||
};
|
||||
@@ -56,14 +56,10 @@
|
||||
#include "db/serializer.hh"
|
||||
#include "cql3/query_processor.hh"
|
||||
#include "log.hh"
|
||||
#include "converting_mutation_partition_applier.hh"
|
||||
#include "schema_registry.hh"
|
||||
#include "commitlog_entry.hh"
|
||||
|
||||
static logging::logger logger("commitlog_replayer");
|
||||
|
||||
class db::commitlog_replayer::impl {
|
||||
std::unordered_map<table_schema_version, column_mapping> _column_mappings;
|
||||
public:
|
||||
impl(seastar::sharded<cql3::query_processor>& db);
|
||||
|
||||
@@ -74,19 +70,6 @@ public:
|
||||
uint64_t skipped_mutations = 0;
|
||||
uint64_t applied_mutations = 0;
|
||||
uint64_t corrupt_bytes = 0;
|
||||
|
||||
stats& operator+=(const stats& s) {
|
||||
invalid_mutations += s.invalid_mutations;
|
||||
skipped_mutations += s.skipped_mutations;
|
||||
applied_mutations += s.applied_mutations;
|
||||
corrupt_bytes += s.corrupt_bytes;
|
||||
return *this;
|
||||
}
|
||||
stats operator+(const stats& s) const {
|
||||
stats tmp = *this;
|
||||
tmp += s;
|
||||
return tmp;
|
||||
}
|
||||
};
|
||||
|
||||
future<> process(stats*, temporary_buffer<char> buf, replay_position rp);
|
||||
@@ -165,6 +148,8 @@ future<> db::commitlog_replayer::impl::init() {
|
||||
|
||||
future<db::commitlog_replayer::impl::stats>
|
||||
db::commitlog_replayer::impl::recover(sstring file) {
|
||||
logger.info("Replaying {}", file);
|
||||
|
||||
replay_position rp{commitlog::descriptor(file)};
|
||||
auto gp = _min_pos[rp.shard_id()];
|
||||
|
||||
@@ -197,29 +182,19 @@ db::commitlog_replayer::impl::recover(sstring file) {
|
||||
}
|
||||
|
||||
future<> db::commitlog_replayer::impl::process(stats* s, temporary_buffer<char> buf, replay_position rp) {
|
||||
auto shard = rp.shard_id();
|
||||
if (rp < _min_pos[shard]) {
|
||||
logger.trace("entry {} is less than global min position. skipping", rp);
|
||||
s->skipped_mutations++;
|
||||
return make_ready_future<>();
|
||||
}
|
||||
|
||||
try {
|
||||
|
||||
commitlog_entry_reader cer(buf);
|
||||
auto& fm = cer.mutation();
|
||||
|
||||
auto cm_it = _column_mappings.find(fm.schema_version());
|
||||
if (cm_it == _column_mappings.end()) {
|
||||
if (!cer.get_column_mapping()) {
|
||||
throw std::runtime_error(sprint("unknown schema version {}", fm.schema_version()));
|
||||
}
|
||||
logger.debug("new schema version {} in entry {}", fm.schema_version(), rp);
|
||||
cm_it = _column_mappings.emplace(fm.schema_version(), *cer.get_column_mapping()).first;
|
||||
}
|
||||
|
||||
auto shard_id = rp.shard_id();
|
||||
if (rp < _min_pos[shard_id]) {
|
||||
logger.trace("entry {} is less than global min position. skipping", rp);
|
||||
s->skipped_mutations++;
|
||||
return make_ready_future<>();
|
||||
}
|
||||
frozen_mutation fm(bytes(reinterpret_cast<const int8_t *>(buf.get()), buf.size()));
|
||||
|
||||
auto uuid = fm.column_family_id();
|
||||
auto& map = _rpm[shard_id];
|
||||
auto& map = _rpm[shard];
|
||||
auto i = map.find(uuid);
|
||||
if (i != map.end() && rp <= i->second) {
|
||||
logger.trace("entry {} at {} is younger than recorded replay position {}. skipping", fm.column_family_id(), rp, i->second);
|
||||
@@ -228,15 +203,14 @@ future<> db::commitlog_replayer::impl::process(stats* s, temporary_buffer<char>
|
||||
}
|
||||
|
||||
auto shard = _qp.local().db().local().shard_of(fm);
|
||||
return _qp.local().db().invoke_on(shard, [this, cer = std::move(cer), cm_it, rp, shard, s] (database& db) -> future<> {
|
||||
auto& fm = cer.mutation();
|
||||
return _qp.local().db().invoke_on(shard, [fm = std::move(fm), rp, shard, s] (database& db) -> future<> {
|
||||
// TODO: might need better verification that the deserialized mutation
|
||||
// is schema compatible. My guess is that just applying the mutation
|
||||
// will not do this.
|
||||
auto& cf = db.find_column_family(fm.column_family_id());
|
||||
|
||||
if (logger.is_enabled(logging::log_level::debug)) {
|
||||
logger.debug("replaying at {} v={} {}:{} at {}", fm.column_family_id(), fm.schema_version(),
|
||||
logger.debug("replaying at {} {}:{} at {}", fm.column_family_id(),
|
||||
cf.schema()->ks_name(), cf.schema()->cf_name(), rp);
|
||||
}
|
||||
// Removed forwarding "new" RP. Instead give none/empty.
|
||||
@@ -244,15 +218,7 @@ future<> db::commitlog_replayer::impl::process(stats* s, temporary_buffer<char>
|
||||
// The end result should be that once sstables are flushed out
|
||||
// their "replay_position" attribute will be empty, which is
|
||||
// lower than anything the new session will produce.
|
||||
if (cf.schema()->version() != fm.schema_version()) {
|
||||
const column_mapping& cm = cm_it->second;
|
||||
mutation m(fm.decorated_key(*cf.schema()), cf.schema());
|
||||
converting_mutation_partition_applier v(cm, *cf.schema(), m.partition());
|
||||
fm.partition().accept(cm, v);
|
||||
cf.apply(std::move(m));
|
||||
} else {
|
||||
cf.apply(fm, cf.schema());
|
||||
}
|
||||
cf.apply(fm);
|
||||
s->applied_mutations++;
|
||||
return make_ready_future<>();
|
||||
}).handle_exception([s](auto ep) {
|
||||
@@ -292,41 +258,32 @@ future<db::commitlog_replayer> db::commitlog_replayer::create_replayer(seastar::
|
||||
}
|
||||
|
||||
future<> db::commitlog_replayer::recover(std::vector<sstring> files) {
|
||||
logger.info("Replaying {}", join(", ", files));
|
||||
return map_reduce(files, [this](auto f) {
|
||||
logger.debug("Replaying {}", f);
|
||||
return _impl->recover(f).then([f](impl::stats stats) {
|
||||
if (stats.corrupt_bytes != 0) {
|
||||
logger.warn("Corrupted file: {}. {} bytes skipped.", f, stats.corrupt_bytes);
|
||||
}
|
||||
logger.debug("Log replay of {} complete, {} replayed mutations ({} invalid, {} skipped)"
|
||||
, f
|
||||
, stats.applied_mutations
|
||||
, stats.invalid_mutations
|
||||
, stats.skipped_mutations
|
||||
);
|
||||
return make_ready_future<impl::stats>(stats);
|
||||
}).handle_exception([f](auto ep) -> future<impl::stats> {
|
||||
logger.error("Error recovering {}: {}", f, ep);
|
||||
try {
|
||||
std::rethrow_exception(ep);
|
||||
} catch (std::invalid_argument&) {
|
||||
logger.error("Scylla cannot process {}. Make sure to fully flush all Cassandra commit log files to sstable before migrating.", f);
|
||||
throw;
|
||||
} catch (...) {
|
||||
throw;
|
||||
}
|
||||
});
|
||||
}, impl::stats(), std::plus<impl::stats>()).then([](impl::stats totals) {
|
||||
logger.info("Log replay complete, {} replayed mutations ({} invalid, {} skipped)"
|
||||
, totals.applied_mutations
|
||||
, totals.invalid_mutations
|
||||
, totals.skipped_mutations
|
||||
);
|
||||
return parallel_for_each(files, [this](auto f) {
|
||||
return this->recover(f);
|
||||
});
|
||||
}
|
||||
|
||||
future<> db::commitlog_replayer::recover(sstring f) {
|
||||
return recover(std::vector<sstring>{ f });
|
||||
return _impl->recover(f).then([f](impl::stats stats) {
|
||||
if (stats.corrupt_bytes != 0) {
|
||||
logger.warn("Corrupted file: {}. {} bytes skipped.", f, stats.corrupt_bytes);
|
||||
}
|
||||
logger.info("Log replay of {} complete, {} replayed mutations ({} invalid, {} skipped)"
|
||||
, f
|
||||
, stats.applied_mutations
|
||||
, stats.invalid_mutations
|
||||
, stats.skipped_mutations
|
||||
);
|
||||
}).handle_exception([f](auto ep) {
|
||||
logger.error("Error recovering {}: {}", f, ep);
|
||||
try {
|
||||
std::rethrow_exception(ep);
|
||||
} catch (std::invalid_argument&) {
|
||||
logger.error("Scylla cannot process {}. Make sure to fully flush all Cassandra commit log files to sstable before migrating.");
|
||||
throw;
|
||||
} catch (...) {
|
||||
throw;
|
||||
}
|
||||
});;
|
||||
}
|
||||
|
||||
|
||||
@@ -30,7 +30,6 @@
|
||||
#include "core/shared_ptr.hh"
|
||||
#include "core/fstream.hh"
|
||||
#include "core/do_with.hh"
|
||||
#include "core/print.hh"
|
||||
#include "log.hh"
|
||||
#include <boost/any.hpp>
|
||||
|
||||
@@ -411,7 +410,7 @@ future<> db::config::read_from_file(file f) {
|
||||
}
|
||||
|
||||
future<> db::config::read_from_file(const sstring& filename) {
|
||||
return open_file_dma(filename, open_flags::ro).then([this](file f) {
|
||||
return engine().open_file_dma(filename, open_flags::ro).then([this](file f) {
|
||||
return read_from_file(std::move(f));
|
||||
});
|
||||
}
|
||||
@@ -433,9 +432,3 @@ boost::filesystem::path db::config::get_conf_dir() {
|
||||
|
||||
return confdir;
|
||||
}
|
||||
|
||||
void db::config::check_experimental(const sstring& what) const {
|
||||
if (!experimental()) {
|
||||
throw std::runtime_error(sprint("%s is currently disabled. Start Scylla with --experimental=on to enable.", what));
|
||||
}
|
||||
}
|
||||
|
||||
37
db/config.hh
37
db/config.hh
@@ -102,9 +102,6 @@ public:
|
||||
|
||||
config();
|
||||
|
||||
// Throws exception if experimental feature is disabled.
|
||||
void check_experimental(const sstring& what) const;
|
||||
|
||||
boost::program_options::options_description
|
||||
get_options_description();
|
||||
|
||||
@@ -268,7 +265,7 @@ public:
|
||||
"Counter writes read the current values before incrementing and writing them back. The recommended value is (16 × number_of_drives) ." \
|
||||
) \
|
||||
/* Common automatic backup settings */ \
|
||||
val(incremental_backups, bool, false, Used, \
|
||||
val(incremental_backups, bool, false, Unused, \
|
||||
"Backs up data updated since the last snapshot was taken. When enabled, Cassandra creates a hard link to each SSTable flushed or streamed locally in a backups/ subdirectory of the keyspace data. Removing these links is the operator's responsibility.\n" \
|
||||
"Related information: Enabling incremental backups" \
|
||||
) \
|
||||
@@ -386,7 +383,7 @@ public:
|
||||
"This setting has been removed from default configuration. It makes new (non-seed) nodes automatically migrate the right data to themselves. When initializing a fresh cluster with no data, add auto_bootstrap: false.\n" \
|
||||
"Related information: Initializing a multiple node cluster (single data center) and Initializing a multiple node cluster (multiple data centers)." \
|
||||
) \
|
||||
val(batch_size_warn_threshold_in_kb, uint32_t, 5, Used, \
|
||||
val(batch_size_warn_threshold_in_kb, uint32_t, 5, Unused, \
|
||||
"Log WARN on any batch size exceeding this value in kilobytes. Caution should be taken on increasing the size of this threshold as it can lead to node instability." \
|
||||
) \
|
||||
val(broadcast_address, sstring, /* listen_address */, Used, \
|
||||
@@ -641,8 +638,8 @@ public:
|
||||
) \
|
||||
/* Security properties */ \
|
||||
/* Server and client security settings. */ \
|
||||
val(authenticator, sstring, "org.apache.cassandra.auth.AllowAllAuthenticator", Used, \
|
||||
"The authentication backend, used to identify users. The available authenticators are:\n" \
|
||||
val(authenticator, sstring, "org.apache.cassandra.auth.AllowAllAuthenticator", Unused, \
|
||||
"The authentication backend. It implements IAuthenticator, which is used to identify users. The available authenticators are:\n" \
|
||||
"\n" \
|
||||
"\torg.apache.cassandra.auth.AllowAllAuthenticator : Disables authentication; no checks are performed.\n" \
|
||||
"\torg.apache.cassandra.auth.PasswordAuthenticator : Authenticates users with user names and hashed passwords stored in the system_auth.credentials table. If you use the default, 1, and the node with the lone replica goes down, you will not be able to log into the cluster because the system_auth keyspace was not replicated.\n" \
|
||||
@@ -682,18 +679,28 @@ public:
|
||||
"truststore : (Default: <system truststore> ) Location of the truststore containing the trusted certificate for authenticating remote servers.\n" \
|
||||
"Related information: Node-to-node encryption" \
|
||||
) \
|
||||
val(client_encryption_options, string_map, /*none*/, Used, \
|
||||
"Enable or disable client-to-node encryption. You must also generate keys and provide the appropriate key and certificate. No custom encryption options are currently enabled. The available options are:\n" \
|
||||
val(client_encryption_options, string_map, /*none*/, Unused, \
|
||||
"Enable or disable client-to-node encryption. You must also generate keys and provide the appropriate key and trust store locations and passwords. No custom encryption options are currently enabled. The available options are:\n" \
|
||||
"\n" \
|
||||
"\tenabled : (Default: false ) To enable, set to true.\n" \
|
||||
"\tcertificate: (Default: conf/scylla.crt) The location of a PEM-encoded x509 certificate used to identify and encrypt the client/server communication.\n" \
|
||||
"\tkeyfile: (Default: conf/scylla.key) PEM Key file associated with certificate.\n" \
|
||||
"\tkeystore : (Default: conf/.keystore ) The location of a Java keystore (JKS) suitable for use with Java Secure Socket Extension (JSSE), which is the Java version of the Secure Sockets Layer (SSL), and Transport Layer Security (TLS) protocols. The keystore contains the private key used to encrypt outgoing messages.\n" \
|
||||
"\tkeystore_password : (Default: cassandra ) Password for the keystore. This must match the password used when generating the keystore and truststore.\n" \
|
||||
"\trequire_client_auth : (Default: false ) Enables or disables certificate authentication. (Available starting with Cassandra 1.2.3.)\n" \
|
||||
"\ttruststore : (Default: conf/.truststore ) Set if require_client_auth is true.\n" \
|
||||
"\ttruststore_password : <truststore_password> Set if require_client_auth is true.\n" \
|
||||
"\n" \
|
||||
"The advanced settings are:\n" \
|
||||
"\n" \
|
||||
"\tprotocol : (Default: TLS )\n" \
|
||||
"\talgorithm : (Default: SunX509 )\n" \
|
||||
"\tstore_type : (Default: JKS )\n" \
|
||||
"\tcipher_suites : (Default: TLS_RSA_WITH_AES_128_CBC_SHA , TLS_RSA_WITH_AES_256_CBC_SHA )\n" \
|
||||
"Related information: Client-to-node encryption" \
|
||||
) \
|
||||
val(ssl_storage_port, uint32_t, 7001, Used, \
|
||||
val(ssl_storage_port, uint32_t, 7001, Unused, \
|
||||
"The SSL port for encrypted communication. Unused unless enabled in encryption_options." \
|
||||
) \
|
||||
val(default_log_level, sstring, "info", Used, \
|
||||
val(default_log_level, sstring, "warn", Used, \
|
||||
"Default log level for log messages. Valid values are trace, debug, info, warn, error.") \
|
||||
val(logger_log_level, string_map, /* none */, Used,\
|
||||
"map of logger name to log level. Valid values are trace, debug, info, warn, error. " \
|
||||
@@ -718,10 +725,7 @@ public:
|
||||
val(replace_address_first_boot, sstring, "", Used, "Like replace_address option, but if the node has been bootstrapped sucessfully it will be ignored. Same as -Dcassandra.replace_address_first_boot.") \
|
||||
val(override_decommission, bool, false, Used, "Set true to force a decommissioned node to join the cluster") \
|
||||
val(ring_delay_ms, uint32_t, 30 * 1000, Used, "Time a node waits to hear from other nodes before joining the ring in milliseconds. Same as -Dcassandra.ring_delay_ms in cassandra.") \
|
||||
val(shutdown_announce_in_ms, uint32_t, 2 * 1000, Used, "Time a node waits after sending gossip shutdown message in milliseconds. Same as -Dcassandra.shutdown_announce_in_ms in cassandra.") \
|
||||
val(developer_mode, bool, false, Used, "Relax environement checks. Setting to true can reduce performance and reliability significantly.") \
|
||||
val(skip_wait_for_gossip_to_settle, int32_t, -1, Used, "An integer to configure the wait for gossip to settle. -1: wait normally, 0: do not wait at all, n: wait for at most n polls. Same as -Dcassandra.skip_wait_for_gossip_to_settle in cassandra.") \
|
||||
val(experimental, bool, false, Used, "Set to true to unlock experimental features.") \
|
||||
/* done! */
|
||||
|
||||
#define _make_value_member(name, type, deflt, status, desc, ...) \
|
||||
@@ -738,4 +742,5 @@ private:
|
||||
int _dummy;
|
||||
};
|
||||
|
||||
|
||||
}
|
||||
|
||||
@@ -50,20 +50,16 @@ namespace db {
|
||||
|
||||
namespace marshal {
|
||||
|
||||
type_parser::type_parser(sstring_view str, size_t idx)
|
||||
: _str{str.begin(), str.end()}
|
||||
type_parser::type_parser(const sstring& str, size_t idx)
|
||||
: _str{str}
|
||||
, _idx{idx}
|
||||
{ }
|
||||
|
||||
type_parser::type_parser(sstring_view str)
|
||||
type_parser::type_parser(const sstring& str)
|
||||
: type_parser{str, 0}
|
||||
{ }
|
||||
|
||||
data_type type_parser::parse(const sstring& str) {
|
||||
return type_parser(sstring_view(str)).parse();
|
||||
}
|
||||
|
||||
data_type type_parser::parse(sstring_view str) {
|
||||
return type_parser(str).parse();
|
||||
}
|
||||
|
||||
|
||||
@@ -62,15 +62,14 @@ class type_parser {
|
||||
|
||||
public static final TypeParser EMPTY_PARSER = new TypeParser("", 0);
|
||||
#endif
|
||||
type_parser(sstring_view str, size_t idx);
|
||||
type_parser(const sstring& str, size_t idx);
|
||||
public:
|
||||
explicit type_parser(sstring_view str);
|
||||
explicit type_parser(const sstring& str);
|
||||
|
||||
/**
|
||||
* Parse a string containing an type definition.
|
||||
*/
|
||||
static data_type parse(const sstring& str);
|
||||
static data_type parse(sstring_view str);
|
||||
|
||||
#if 0
|
||||
public static AbstractType<?> parse(CharSequence compareWith) throws SyntaxException, ConfigurationException
|
||||
|
||||
@@ -46,7 +46,6 @@
|
||||
#include "system_keyspace.hh"
|
||||
#include "query_context.hh"
|
||||
#include "query-result-set.hh"
|
||||
#include "query-result-writer.hh"
|
||||
#include "schema_builder.hh"
|
||||
#include "map_difference.hh"
|
||||
#include "utils/UUID_gen.hh"
|
||||
@@ -54,12 +53,9 @@
|
||||
#include "core/thread.hh"
|
||||
#include "json.hh"
|
||||
#include "log.hh"
|
||||
#include "frozen_schema.hh"
|
||||
#include "schema_registry.hh"
|
||||
|
||||
#include "db/marshal/type_parser.hh"
|
||||
#include "db/config.hh"
|
||||
#include "md5_hasher.hh"
|
||||
|
||||
#include <boost/range/algorithm/copy.hpp>
|
||||
#include <boost/range/adaptor/map.hpp>
|
||||
@@ -74,36 +70,6 @@ namespace schema_tables {
|
||||
|
||||
logging::logger logger("schema_tables");
|
||||
|
||||
struct qualified_name {
|
||||
sstring keyspace_name;
|
||||
sstring table_name;
|
||||
|
||||
qualified_name(sstring keyspace_name, sstring table_name)
|
||||
: keyspace_name(std::move(keyspace_name))
|
||||
, table_name(std::move(table_name))
|
||||
{ }
|
||||
|
||||
qualified_name(const schema_ptr& s)
|
||||
: keyspace_name(s->ks_name())
|
||||
, table_name(s->cf_name())
|
||||
{ }
|
||||
|
||||
bool operator<(const qualified_name& o) const {
|
||||
return keyspace_name < o.keyspace_name
|
||||
|| (keyspace_name == o.keyspace_name && table_name < o.table_name);
|
||||
}
|
||||
|
||||
bool operator==(const qualified_name& o) const {
|
||||
return keyspace_name == o.keyspace_name && table_name == o.table_name;
|
||||
}
|
||||
};
|
||||
|
||||
static future<schema_mutations> read_table_mutations(distributed<service::storage_proxy>& proxy, const qualified_name& table);
|
||||
|
||||
static void merge_tables(distributed<service::storage_proxy>& proxy,
|
||||
std::map<qualified_name, schema_mutations>&& before,
|
||||
std::map<qualified_name, schema_mutations>&& after);
|
||||
|
||||
std::vector<const char*> ALL { KEYSPACES, COLUMNFAMILIES, COLUMNS, TRIGGERS, USERTYPES, /* not present in 2.1.8: FUNCTIONS, AGGREGATES */ };
|
||||
|
||||
using days = std::chrono::duration<int, std::ratio<24 * 3600>>;
|
||||
@@ -129,9 +95,7 @@ using days = std::chrono::duration<int, std::ratio<24 * 3600>>;
|
||||
"keyspace definitions"
|
||||
)));
|
||||
builder.set_gc_grace_seconds(std::chrono::duration_cast<std::chrono::seconds>(days(7)).count());
|
||||
builder.with(schema_builder::compact_storage::yes);
|
||||
builder.with_version(generate_schema_version(builder.uuid()));
|
||||
return builder.build();
|
||||
return builder.build(schema_builder::compact_storage::yes);
|
||||
}();
|
||||
return keyspaces;
|
||||
}
|
||||
@@ -183,9 +147,7 @@ using days = std::chrono::duration<int, std::ratio<24 * 3600>>;
|
||||
"table definitions"
|
||||
)));
|
||||
builder.set_gc_grace_seconds(std::chrono::duration_cast<std::chrono::seconds>(days(7)).count());
|
||||
builder.with(schema_builder::compact_storage::no);
|
||||
builder.with_version(generate_schema_version(builder.uuid()));
|
||||
return builder.build();
|
||||
return builder.build(schema_builder::compact_storage::no);
|
||||
}();
|
||||
return columnfamilies;
|
||||
}
|
||||
@@ -214,9 +176,7 @@ using days = std::chrono::duration<int, std::ratio<24 * 3600>>;
|
||||
"column definitions"
|
||||
)));
|
||||
builder.set_gc_grace_seconds(std::chrono::duration_cast<std::chrono::seconds>(days(7)).count());
|
||||
builder.with(schema_builder::compact_storage::no);
|
||||
builder.with_version(generate_schema_version(builder.uuid()));
|
||||
return builder.build();
|
||||
return builder.build(schema_builder::compact_storage::no);
|
||||
}();
|
||||
return columns;
|
||||
}
|
||||
@@ -240,9 +200,7 @@ using days = std::chrono::duration<int, std::ratio<24 * 3600>>;
|
||||
"trigger definitions"
|
||||
)));
|
||||
builder.set_gc_grace_seconds(std::chrono::duration_cast<std::chrono::seconds>(days(7)).count());
|
||||
builder.with(schema_builder::compact_storage::no);
|
||||
builder.with_version(generate_schema_version(builder.uuid()));
|
||||
return builder.build();
|
||||
return builder.build(schema_builder::compact_storage::no);
|
||||
}();
|
||||
return triggers;
|
||||
}
|
||||
@@ -267,9 +225,7 @@ using days = std::chrono::duration<int, std::ratio<24 * 3600>>;
|
||||
"user defined type definitions"
|
||||
)));
|
||||
builder.set_gc_grace_seconds(std::chrono::duration_cast<std::chrono::seconds>(days(7)).count());
|
||||
builder.with(schema_builder::compact_storage::no);
|
||||
builder.with_version(generate_schema_version(builder.uuid()));
|
||||
return builder.build();
|
||||
return builder.build(schema_builder::compact_storage::no);
|
||||
}();
|
||||
return usertypes;
|
||||
}
|
||||
@@ -298,9 +254,7 @@ using days = std::chrono::duration<int, std::ratio<24 * 3600>>;
|
||||
"user defined type definitions"
|
||||
)));
|
||||
builder.set_gc_grace_seconds(std::chrono::duration_cast<std::chrono::seconds>(days(7)).count());
|
||||
builder.with(schema_builder::compact_storage::no);
|
||||
builder.with_version(generate_schema_version(builder.uuid()));
|
||||
return builder.build();
|
||||
return builder.build(schema_builder::compact_storage::no);
|
||||
}();
|
||||
return functions;
|
||||
}
|
||||
@@ -329,9 +283,7 @@ using days = std::chrono::duration<int, std::ratio<24 * 3600>>;
|
||||
"user defined aggregate definitions"
|
||||
)));
|
||||
builder.set_gc_grace_seconds(std::chrono::duration_cast<std::chrono::seconds>(days(7)).count());
|
||||
builder.with(schema_builder::compact_storage::no);
|
||||
builder.with_version(generate_schema_version(builder.uuid()));
|
||||
return builder.build();
|
||||
return builder.build(schema_builder::compact_storage::no);
|
||||
}();
|
||||
return aggregates;
|
||||
}
|
||||
@@ -343,11 +295,10 @@ future<> save_system_keyspace_schema() {
|
||||
|
||||
// delete old, possibly obsolete entries in schema tables
|
||||
return parallel_for_each(ALL, [ksm] (sstring cf) {
|
||||
auto deletion_timestamp = schema_creation_timestamp() - 1;
|
||||
return db::execute_cql(sprint("DELETE FROM system.%%s USING TIMESTAMP %s WHERE keyspace_name = ?",
|
||||
deletion_timestamp), cf, ksm->name()).discard_result();
|
||||
return db::execute_cql("DELETE FROM system.%s WHERE keyspace_name = ?", cf, ksm->name()).discard_result();
|
||||
}).then([ksm] {
|
||||
auto mvec = make_create_keyspace_mutations(ksm, schema_creation_timestamp(), true);
|
||||
// (+1 to timestamp to make sure we don't get shadowed by the tombstones we just added)
|
||||
auto mvec = make_create_keyspace_mutations(ksm, qctx->next_timestamp(), true);
|
||||
return qctx->proxy().mutate_locally(std::move(mvec));
|
||||
});
|
||||
}
|
||||
@@ -375,30 +326,36 @@ future<utils::UUID> calculate_schema_digest(distributed<service::storage_proxy>&
|
||||
auto map = [&proxy] (sstring table) {
|
||||
return db::system_keyspace::query_mutations(proxy, table).then([&proxy, table] (auto rs) {
|
||||
auto s = proxy.local().get_db().local().find_schema(system_keyspace::NAME, table);
|
||||
std::vector<mutation> mutations;
|
||||
std::vector<query::result> results;
|
||||
for (auto&& p : rs->partitions()) {
|
||||
auto mut = p.mut().unfreeze(s);
|
||||
auto partition_key = value_cast<sstring>(utf8_type->deserialize(mut.key().get_component(*s, 0)));
|
||||
if (partition_key == system_keyspace::NAME) {
|
||||
continue;
|
||||
}
|
||||
mutations.emplace_back(std::move(mut));
|
||||
auto slice = partition_slice_builder(*s).build();
|
||||
results.emplace_back(mut.query(slice));
|
||||
}
|
||||
return mutations;
|
||||
return results;
|
||||
});
|
||||
};
|
||||
auto reduce = [] (auto& hash, auto&& mutations) {
|
||||
for (const mutation& m : mutations) {
|
||||
feed_hash_for_schema_digest(hash, m);
|
||||
auto reduce = [] (auto& hash, auto&& results) {
|
||||
for (auto&& rs : results) {
|
||||
for (auto&& f : rs.buf().fragments()) {
|
||||
hash.Update(reinterpret_cast<const unsigned char*>(f.begin()), f.size());
|
||||
}
|
||||
}
|
||||
return make_ready_future<>();
|
||||
};
|
||||
return do_with(md5_hasher(), [map, reduce] (auto& hash) {
|
||||
return do_with(CryptoPP::Weak::MD5{}, [map, reduce] (auto& hash) {
|
||||
return do_for_each(ALL.begin(), ALL.end(), [&hash, map, reduce] (auto& table) {
|
||||
return map(table).then([&hash, reduce] (auto&& mutations) {
|
||||
reduce(hash, mutations);
|
||||
return map(table).then([&hash, reduce] (auto&& results) {
|
||||
return reduce(hash, results);
|
||||
});
|
||||
}).then([&hash] {
|
||||
return make_ready_future<utils::UUID>(utils::UUID_gen::get_name_UUID(hash.finalize()));
|
||||
bytes digest{bytes::initialized_later(), CryptoPP::Weak::MD5::DIGESTSIZE};
|
||||
hash.Final(reinterpret_cast<unsigned char*>(digest.begin()));
|
||||
return make_ready_future<utils::UUID>(utils::UUID_gen::get_name_UUID(digest));
|
||||
});
|
||||
});
|
||||
}
|
||||
@@ -415,16 +372,16 @@ future<std::vector<frozen_mutation>> convert_schema_to_mutations(distributed<ser
|
||||
if (partition_key == system_keyspace::NAME) {
|
||||
continue;
|
||||
}
|
||||
results.emplace_back(std::move(p.mut()));
|
||||
results.emplace_back(p.mut());
|
||||
}
|
||||
return results;
|
||||
});
|
||||
};
|
||||
auto reduce = [] (auto&& result, auto&& mutations) {
|
||||
std::move(mutations.begin(), mutations.end(), std::back_inserter(result));
|
||||
std::copy(mutations.begin(), mutations.end(), std::back_inserter(result));
|
||||
return std::move(result);
|
||||
};
|
||||
return map_reduce(ALL.begin(), ALL.end(), map, std::vector<frozen_mutation>{}, reduce);
|
||||
return map_reduce(ALL.begin(), ALL.end(), map, std::move(std::vector<frozen_mutation>{}), reduce);
|
||||
}
|
||||
|
||||
future<schema_result>
|
||||
@@ -441,28 +398,6 @@ read_schema_for_keyspaces(distributed<service::storage_proxy>& proxy, const sstr
|
||||
return map_reduce(keyspace_names.begin(), keyspace_names.end(), map, schema_result{}, insert);
|
||||
}
|
||||
|
||||
static
|
||||
future<mutation> query_partition_mutation(service::storage_proxy& proxy,
|
||||
schema_ptr s,
|
||||
lw_shared_ptr<query::read_command> cmd,
|
||||
partition_key pkey)
|
||||
{
|
||||
auto dk = dht::global_partitioner().decorate_key(*s, pkey);
|
||||
return do_with(query::partition_range::make_singular(dk), [&proxy, dk, s = std::move(s), cmd = std::move(cmd)] (auto& range) {
|
||||
return proxy.query_mutations_locally(s, std::move(cmd), range)
|
||||
.then([dk = std::move(dk), s](foreign_ptr<lw_shared_ptr<reconcilable_result>> res) {
|
||||
auto&& partitions = res->partitions();
|
||||
if (partitions.size() == 0) {
|
||||
return mutation(std::move(dk), s);
|
||||
} else if (partitions.size() == 1) {
|
||||
return partitions[0].mut().unfreeze(s);
|
||||
} else {
|
||||
assert(false && "Results must have at most one partition");
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
future<schema_result_value_type>
|
||||
read_schema_partition_for_keyspace(distributed<service::storage_proxy>& proxy, const sstring& schema_table_name, const sstring& keyspace_name)
|
||||
{
|
||||
@@ -474,18 +409,16 @@ read_schema_partition_for_keyspace(distributed<service::storage_proxy>& proxy, c
|
||||
});
|
||||
}
|
||||
|
||||
future<mutation>
|
||||
future<schema_result_value_type>
|
||||
read_schema_partition_for_table(distributed<service::storage_proxy>& proxy, const sstring& schema_table_name, const sstring& keyspace_name, const sstring& table_name)
|
||||
{
|
||||
auto schema = proxy.local().get_db().local().find_schema(system_keyspace::NAME, schema_table_name);
|
||||
auto keyspace_key = partition_key::from_singular(*schema, keyspace_name);
|
||||
auto clustering_range = query::clustering_range(clustering_key_prefix::from_clustering_prefix(
|
||||
*schema, exploded_clustering_prefix({utf8_type->decompose(table_name)})));
|
||||
auto slice = partition_slice_builder(*schema)
|
||||
.with_range(std::move(clustering_range))
|
||||
.build();
|
||||
auto cmd = make_lw_shared<query::read_command>(schema->id(), schema->version(), std::move(slice), query::max_rows);
|
||||
return query_partition_mutation(proxy.local(), std::move(schema), std::move(cmd), std::move(keyspace_key));
|
||||
auto keyspace_key = dht::global_partitioner().decorate_key(*schema,
|
||||
partition_key::from_singular(*schema, keyspace_name));
|
||||
auto clustering_range = query::clustering_range(clustering_key_prefix::from_clustering_prefix(*schema, exploded_clustering_prefix({utf8_type->decompose(table_name)})));
|
||||
return db::system_keyspace::query(proxy, schema_table_name, keyspace_key, clustering_range).then([keyspace_name] (auto&& rs) {
|
||||
return schema_result_value_type{keyspace_name, std::move(rs)};
|
||||
});
|
||||
}
|
||||
|
||||
static semaphore the_merge_lock;
|
||||
@@ -519,7 +452,7 @@ future<> merge_schema(distributed<service::storage_proxy>& proxy, std::vector<mu
|
||||
}
|
||||
|
||||
future<> merge_schema(distributed<service::storage_proxy>& proxy, std::vector<mutation> mutations, bool do_flush)
|
||||
{
|
||||
{
|
||||
return merge_lock().then([&proxy, mutations = std::move(mutations), do_flush] () mutable {
|
||||
return do_merge_schema(proxy, std::move(mutations), do_flush);
|
||||
}).finally([] {
|
||||
@@ -527,35 +460,6 @@ future<> merge_schema(distributed<service::storage_proxy>& proxy, std::vector<mu
|
||||
});
|
||||
}
|
||||
|
||||
// Returns names of live table definitions of given keyspace
|
||||
future<std::vector<sstring>>
|
||||
static read_table_names_of_keyspace(distributed<service::storage_proxy>& proxy, const sstring& keyspace_name) {
|
||||
auto s = columnfamilies();
|
||||
auto pkey = dht::global_partitioner().decorate_key(*s, partition_key::from_singular(*s, keyspace_name));
|
||||
return db::system_keyspace::query(proxy, COLUMNFAMILIES, pkey).then([] (auto&& rs) {
|
||||
std::vector<sstring> result;
|
||||
for (const query::result_set_row& row : rs->rows()) {
|
||||
result.emplace_back(row.get_nonnull<sstring>("columnfamily_name"));
|
||||
}
|
||||
return result;
|
||||
});
|
||||
}
|
||||
|
||||
// Call inside a seastar thread
|
||||
static
|
||||
std::map<qualified_name, schema_mutations>
|
||||
read_tables_for_keyspaces(distributed<service::storage_proxy>& proxy, const std::set<sstring>& keyspace_names)
|
||||
{
|
||||
std::map<qualified_name, schema_mutations> result;
|
||||
for (auto&& keyspace_name : keyspace_names) {
|
||||
for (auto&& table_name : read_table_names_of_keyspace(proxy, keyspace_name).get0()) {
|
||||
auto qn = qualified_name(keyspace_name, table_name);
|
||||
result.emplace(qn, read_table_mutations(proxy, qn).get0());
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
future<> do_merge_schema(distributed<service::storage_proxy>& proxy, std::vector<mutation> mutations, bool do_flush)
|
||||
{
|
||||
return seastar::async([&proxy, mutations = std::move(mutations), do_flush] () mutable {
|
||||
@@ -570,7 +474,7 @@ future<> do_merge_schema(distributed<service::storage_proxy>& proxy, std::vector
|
||||
|
||||
// current state of the schema
|
||||
auto&& old_keyspaces = read_schema_for_keyspaces(proxy, KEYSPACES, keyspaces).get0();
|
||||
auto&& old_column_families = read_tables_for_keyspaces(proxy, keyspaces);
|
||||
auto&& old_column_families = read_schema_for_keyspaces(proxy, COLUMNFAMILIES, keyspaces).get0();
|
||||
/*auto& old_types = */read_schema_for_keyspaces(proxy, USERTYPES, keyspaces).get0();
|
||||
#if 0 // not in 2.1.8
|
||||
/*auto& old_functions = */read_schema_for_keyspaces(proxy, FUNCTIONS, keyspaces).get0();
|
||||
@@ -590,7 +494,7 @@ future<> do_merge_schema(distributed<service::storage_proxy>& proxy, std::vector
|
||||
|
||||
// with new data applied
|
||||
auto&& new_keyspaces = read_schema_for_keyspaces(proxy, KEYSPACES, keyspaces).get0();
|
||||
auto&& new_column_families = read_tables_for_keyspaces(proxy, keyspaces);
|
||||
auto&& new_column_families = read_schema_for_keyspaces(proxy, COLUMNFAMILIES, keyspaces).get0();
|
||||
/*auto& new_types = */read_schema_for_keyspaces(proxy, USERTYPES, keyspaces).get0();
|
||||
#if 0 // not in 2.1.8
|
||||
/*auto& new_functions = */read_schema_for_keyspaces(proxy, FUNCTIONS, keyspaces).get0();
|
||||
@@ -598,7 +502,7 @@ future<> do_merge_schema(distributed<service::storage_proxy>& proxy, std::vector
|
||||
#endif
|
||||
|
||||
std::set<sstring> keyspaces_to_drop = merge_keyspaces(proxy, std::move(old_keyspaces), std::move(new_keyspaces)).get0();
|
||||
merge_tables(proxy, std::move(old_column_families), std::move(new_column_families));
|
||||
merge_tables(proxy, std::move(old_column_families), std::move(new_column_families)).get0();
|
||||
#if 0
|
||||
mergeTypes(oldTypes, newTypes);
|
||||
mergeFunctions(oldFunctions, newFunctions);
|
||||
@@ -608,7 +512,15 @@ future<> do_merge_schema(distributed<service::storage_proxy>& proxy, std::vector
|
||||
// it is safe to drop a keyspace only when all nested ColumnFamilies where deleted
|
||||
for (auto&& keyspace_to_drop : keyspaces_to_drop) {
|
||||
db.drop_keyspace(keyspace_to_drop);
|
||||
service::get_local_migration_manager().notify_drop_keyspace(keyspace_to_drop);
|
||||
}
|
||||
// FIXME: clean this up by reorganizing the code
|
||||
// Send CQL events only once, not once per shard.
|
||||
if (engine().cpu_id() == 0) {
|
||||
return do_for_each(keyspaces_to_drop, [] (auto& ks_name) {
|
||||
return service::migration_manager::notify_drop_keyspace(ks_name);
|
||||
});
|
||||
} else {
|
||||
return make_ready_future<>();
|
||||
}
|
||||
}).get0();
|
||||
});
|
||||
@@ -639,86 +551,138 @@ future<std::set<sstring>> merge_keyspaces(distributed<service::storage_proxy>& p
|
||||
}
|
||||
for (auto&& key : diff.entries_only_on_right) {
|
||||
auto&& value = after[key];
|
||||
created.emplace_back(schema_result_value_type{key, std::move(value)});
|
||||
if (!value->empty()) {
|
||||
created.emplace_back(schema_result_value_type{key, std::move(value)});
|
||||
}
|
||||
}
|
||||
for (auto&& key : diff.entries_differing) {
|
||||
altered.emplace_back(key);
|
||||
sstring keyspace_name = key;
|
||||
|
||||
auto&& pre = before[key];
|
||||
auto&& post = after[key];
|
||||
|
||||
if (!pre->empty() && !post->empty()) {
|
||||
altered.emplace_back(keyspace_name);
|
||||
} else if (!pre->empty()) {
|
||||
dropped.emplace(keyspace_name);
|
||||
} else if (!post->empty()) { // a (re)created keyspace
|
||||
created.emplace_back(schema_result_value_type{key, std::move(post)});
|
||||
}
|
||||
}
|
||||
return do_with(std::move(created), [&proxy, altered = std::move(altered)] (auto& created) {
|
||||
return proxy.local().get_db().invoke_on_all([&created, altered = std::move(altered)] (database& db) {
|
||||
return do_for_each(created, [&db](auto&& val) {
|
||||
return do_for_each(created, [&db] (auto&& val) {
|
||||
auto ksm = create_keyspace_from_schema_partition(val);
|
||||
return db.create_keyspace(ksm).then([ksm] {
|
||||
service::get_local_migration_manager().notify_create_keyspace(ksm);
|
||||
});
|
||||
return db.create_keyspace(std::move(ksm));
|
||||
}).then([&altered, &db] () mutable {
|
||||
for (auto&& name : altered) {
|
||||
db.update_keyspace(name);
|
||||
}
|
||||
|
||||
return make_ready_future<>();
|
||||
});
|
||||
}).then([&created] {
|
||||
// FIXME: clean this up by reorganizing the code
|
||||
// Send CQL events only once, not once per shard.
|
||||
if (engine().cpu_id() == 0) {
|
||||
return do_for_each(created, [] (auto&& partition) {
|
||||
auto ksm = create_keyspace_from_schema_partition(partition);
|
||||
return service::migration_manager::notify_create_keyspace(ksm);
|
||||
});
|
||||
} else {
|
||||
return make_ready_future<>();
|
||||
}
|
||||
});
|
||||
}).then([dropped = std::move(dropped)] () {
|
||||
return make_ready_future<std::set<sstring>>(dropped);
|
||||
});
|
||||
}
|
||||
|
||||
static void update_column_family(database& db, schema_ptr new_schema) {
|
||||
column_family& cfm = db.find_column_family(new_schema->id());
|
||||
|
||||
bool columns_changed = !cfm.schema()->equal_columns(*new_schema);
|
||||
|
||||
auto s = local_schema_registry().learn(new_schema);
|
||||
s->registry_entry()->mark_synced();
|
||||
cfm.set_schema(std::move(s));
|
||||
|
||||
service::get_local_migration_manager().notify_update_column_family(cfm.schema(), columns_changed);
|
||||
}
|
||||
|
||||
// see the comments for merge_keyspaces()
|
||||
static void merge_tables(distributed<service::storage_proxy>& proxy,
|
||||
std::map<qualified_name, schema_mutations>&& before,
|
||||
std::map<qualified_name, schema_mutations>&& after)
|
||||
future<> merge_tables(distributed<service::storage_proxy>& proxy, schema_result&& before, schema_result&& after)
|
||||
{
|
||||
auto changed_at = db_clock::now();
|
||||
std::vector<global_schema_ptr> created;
|
||||
std::vector<global_schema_ptr> altered;
|
||||
std::vector<global_schema_ptr> dropped;
|
||||
|
||||
auto diff = difference(before, after);
|
||||
for (auto&& key : diff.entries_only_on_left) {
|
||||
auto&& s = proxy.local().get_db().local().find_schema(key.keyspace_name, key.table_name);
|
||||
dropped.emplace_back(s);
|
||||
}
|
||||
for (auto&& key : diff.entries_only_on_right) {
|
||||
created.emplace_back(create_table_from_mutations(after.at(key)));
|
||||
}
|
||||
for (auto&& key : diff.entries_differing) {
|
||||
altered.emplace_back(create_table_from_mutations(after.at(key)));
|
||||
}
|
||||
|
||||
proxy.local().get_db().invoke_on_all([&created, &dropped, &altered, changed_at] (database& db) {
|
||||
return seastar::async([&] {
|
||||
for (auto&& gs : created) {
|
||||
schema_ptr s = gs.get();
|
||||
auto& ks = db.find_keyspace(s->ks_name());
|
||||
auto cfg = ks.make_column_family_config(*s);
|
||||
db.add_column_family(s, cfg);
|
||||
auto& cf = db.find_column_family(s);
|
||||
cf.mark_ready_for_writes();
|
||||
ks.make_directory_for_column_family(s->cf_name(), s->id()).get();
|
||||
service::get_local_migration_manager().notify_create_column_family(s);
|
||||
}
|
||||
for (auto&& gs : altered) {
|
||||
update_column_family(db, gs.get());
|
||||
}
|
||||
parallel_for_each(dropped.begin(), dropped.end(), [changed_at, &db](auto&& gs) {
|
||||
schema_ptr s = gs.get();
|
||||
return db.drop_column_family(changed_at, s->ks_name(), s->cf_name()).then([s] {
|
||||
service::get_local_migration_manager().notify_drop_column_family(s);
|
||||
return do_with(std::make_pair(std::move(after), std::move(before)), [&proxy] (auto& pair) {
|
||||
auto& after = pair.first;
|
||||
auto& before = pair.second;
|
||||
auto changed_at = db_clock::now();
|
||||
return proxy.local().get_db().invoke_on_all([changed_at, &proxy, &before, &after] (database& db) {
|
||||
return seastar::async([changed_at, &proxy, &db, &before, &after] {
|
||||
std::vector<schema_ptr> created;
|
||||
std::vector<schema_ptr> altered;
|
||||
std::vector<schema_ptr> dropped;
|
||||
auto diff = difference(before, after, [](const auto& x, const auto& y) -> bool {
|
||||
return *x == *y;
|
||||
});
|
||||
}).get();
|
||||
for (auto&& key : diff.entries_only_on_left) {
|
||||
auto&& rs = before[key];
|
||||
for (const query::result_set_row& row : rs->rows()) {
|
||||
auto ks_name = row.get_nonnull<sstring>("keyspace_name");
|
||||
auto cf_name = row.get_nonnull<sstring>("columnfamily_name");
|
||||
dropped.emplace_back(db.find_schema(ks_name, cf_name));
|
||||
}
|
||||
}
|
||||
for (auto&& key : diff.entries_only_on_right) {
|
||||
auto&& value = after[key];
|
||||
if (!value->empty()) {
|
||||
auto&& tables = create_tables_from_tables_partition(proxy, value).get0();
|
||||
boost::copy(tables | boost::adaptors::map_values, std::back_inserter(created));
|
||||
}
|
||||
}
|
||||
for (auto&& key : diff.entries_differing) {
|
||||
sstring keyspace_name = key;
|
||||
|
||||
auto&& pre = before[key];
|
||||
auto&& post = after[key];
|
||||
|
||||
if (!pre->empty() && !post->empty()) {
|
||||
auto before = db.find_keyspace(keyspace_name).metadata()->cf_meta_data();
|
||||
auto after = create_tables_from_tables_partition(proxy, post).get0();
|
||||
auto delta = difference(std::map<sstring, schema_ptr>{before.begin(), before.end()}, after, [](const schema_ptr& x, const schema_ptr& y) -> bool {
|
||||
return *x == *y;
|
||||
});
|
||||
for (auto&& key : delta.entries_only_on_left) {
|
||||
dropped.emplace_back(before[key]);
|
||||
}
|
||||
for (auto&& key : delta.entries_only_on_right) {
|
||||
created.emplace_back(after[key]);
|
||||
}
|
||||
for (auto&& key : delta.entries_differing) {
|
||||
altered.emplace_back(after[key]);
|
||||
}
|
||||
} else if (!pre->empty()) {
|
||||
auto before = db.find_keyspace(keyspace_name).metadata()->cf_meta_data();
|
||||
boost::copy(before | boost::adaptors::map_values, std::back_inserter(dropped));
|
||||
} else if (!post->empty()) {
|
||||
auto tables = create_tables_from_tables_partition(proxy, post).get0();
|
||||
boost::copy(tables | boost::adaptors::map_values, std::back_inserter(created));
|
||||
}
|
||||
}
|
||||
for (auto&& cfm : created) {
|
||||
auto& ks = db.find_keyspace(cfm->ks_name());
|
||||
auto cfg = ks.make_column_family_config(*cfm);
|
||||
db.add_column_family(cfm, cfg);
|
||||
}
|
||||
parallel_for_each(altered.begin(), altered.end(), [&db] (auto&& cfm) {
|
||||
return db.update_column_family(cfm->ks_name(), cfm->cf_name());
|
||||
}).get();
|
||||
parallel_for_each(dropped.begin(), dropped.end(), [changed_at, &db] (auto&& cfm) {
|
||||
return db.drop_column_family(changed_at, cfm->ks_name(), cfm->cf_name());
|
||||
}).get();
|
||||
// FIXME: clean this up by reorganizing the code
|
||||
// Send CQL events only once, not once per shard.
|
||||
if (engine().cpu_id() == 0) {
|
||||
for (auto&& cfm : created) {
|
||||
service::migration_manager::notify_create_column_family(cfm).get0();
|
||||
auto& ks = db.find_keyspace(cfm->ks_name());
|
||||
ks.make_directory_for_column_family(cfm->cf_name(), cfm->id());
|
||||
}
|
||||
for (auto&& cfm : dropped) {
|
||||
service::migration_manager::notify_drop_column_family(cfm).get0();
|
||||
}
|
||||
}
|
||||
});
|
||||
});
|
||||
}).get();
|
||||
});
|
||||
}
|
||||
|
||||
#if 0
|
||||
@@ -907,7 +871,7 @@ std::vector<mutation> make_create_keyspace_mutations(lw_shared_ptr<keyspace_meta
|
||||
addTypeToSchemaMutation(type, timestamp, mutation);
|
||||
#endif
|
||||
for (auto&& kv : keyspace->cf_meta_data()) {
|
||||
add_table_to_schema_mutation(kv.second, timestamp, true, mutations);
|
||||
add_table_to_schema_mutation(kv.second, timestamp, true, pkey, mutations);
|
||||
}
|
||||
}
|
||||
return mutations;
|
||||
@@ -1033,19 +997,17 @@ std::vector<mutation> make_create_table_mutations(lw_shared_ptr<keyspace_metadat
|
||||
{
|
||||
// Include the serialized keyspace in case the target node missed a CREATE KEYSPACE migration (see CASSANDRA-5631).
|
||||
auto mutations = make_create_keyspace_mutations(keyspace, timestamp, false);
|
||||
add_table_to_schema_mutation(table, timestamp, true, mutations);
|
||||
schema_ptr s = keyspaces();
|
||||
auto pkey = partition_key::from_singular(*s, keyspace->name());
|
||||
add_table_to_schema_mutation(table, timestamp, true, pkey, mutations);
|
||||
return mutations;
|
||||
}
|
||||
|
||||
schema_mutations make_table_mutations(schema_ptr table, api::timestamp_type timestamp, bool with_columns_and_triggers)
|
||||
void add_table_to_schema_mutation(schema_ptr table, api::timestamp_type timestamp, bool with_columns_and_triggers, const partition_key& pkey, std::vector<mutation>& mutations)
|
||||
{
|
||||
// When adding new schema properties, don't set cells for default values so that
|
||||
// both old and new nodes will see the same version during rolling upgrades.
|
||||
|
||||
// For property that can be null (and can be changed), we insert tombstones, to make sure
|
||||
// we don't keep a property the user has removed
|
||||
schema_ptr s = columnfamilies();
|
||||
auto pkey = partition_key::from_singular(*s, table->ks_name());
|
||||
mutation m{pkey, s};
|
||||
auto ckey = clustering_key::from_singular(*s, table->cf_name());
|
||||
m.set_clustered_cell(ckey, "cf_id", table->id(), timestamp);
|
||||
@@ -1104,24 +1066,16 @@ schema_mutations make_table_mutations(schema_ptr table, api::timestamp_type time
|
||||
if (table->compact_columns_count() == 1) {
|
||||
m.set_clustered_cell(ckey, "value_alias", table->compact_column().name_as_text(), timestamp);
|
||||
} // null if none
|
||||
|
||||
map_type_impl::mutation dropped_columns;
|
||||
auto dropped_columns_column = s->get_column_definition("dropped_columns");
|
||||
assert(dropped_columns_column);
|
||||
auto dropped_columns_type = static_pointer_cast<const map_type_impl>(dropped_columns_column->type);
|
||||
for (auto&& entry : table->dropped_columns()) {
|
||||
dropped_columns.cells.emplace_back(dropped_columns_type->get_keys_type()->decompose(data_value(entry.first)),
|
||||
atomic_cell::make_live(timestamp, dropped_columns_type->get_values_type()->decompose(entry.second)));
|
||||
}
|
||||
m.set_clustered_cell(ckey, *dropped_columns_column,
|
||||
atomic_cell_or_collection::from_collection_mutation(dropped_columns_type->serialize_mutation_form(std::move(dropped_columns))));
|
||||
#if 0
|
||||
for (Map.Entry<ColumnIdentifier, Long> entry : table.getDroppedColumns().entrySet())
|
||||
adder.addMapEntry("dropped_columns", entry.getKey().toString(), entry.getValue());
|
||||
#endif
|
||||
|
||||
m.set_clustered_cell(ckey, "is_dense", table->is_dense(), timestamp);
|
||||
|
||||
mutation columns_mutation(pkey, columns());
|
||||
if (with_columns_and_triggers) {
|
||||
for (auto&& column : table->all_columns_in_select_order()) {
|
||||
add_column_to_schema_mutation(table, column, timestamp, columns_mutation);
|
||||
add_column_to_schema_mutation(table, column, timestamp, pkey, mutations);
|
||||
}
|
||||
|
||||
#if 0
|
||||
@@ -1129,51 +1083,42 @@ schema_mutations make_table_mutations(schema_ptr table, api::timestamp_type time
|
||||
addTriggerToSchemaMutation(table, trigger, timestamp, mutation);
|
||||
#endif
|
||||
}
|
||||
return schema_mutations{std::move(m), std::move(columns_mutation)};
|
||||
mutations.emplace_back(std::move(m));
|
||||
}
|
||||
|
||||
void add_table_to_schema_mutation(schema_ptr table, api::timestamp_type timestamp, bool with_columns_and_triggers, std::vector<mutation>& mutations)
|
||||
{
|
||||
make_table_mutations(table, timestamp, with_columns_and_triggers).copy_to(mutations);
|
||||
}
|
||||
#if 0
|
||||
public static Mutation makeUpdateTableMutation(KSMetaData keyspace,
|
||||
CFMetaData oldTable,
|
||||
CFMetaData newTable,
|
||||
long timestamp,
|
||||
boolean fromThrift)
|
||||
{
|
||||
Mutation mutation = makeCreateKeyspaceMutation(keyspace, timestamp, false);
|
||||
|
||||
std::vector<mutation> make_update_table_mutations(lw_shared_ptr<keyspace_metadata> keyspace,
|
||||
schema_ptr old_table,
|
||||
schema_ptr new_table,
|
||||
api::timestamp_type timestamp,
|
||||
bool from_thrift)
|
||||
{
|
||||
// Include the serialized keyspace in case the target node missed a CREATE KEYSPACE migration (see CASSANDRA-5631).
|
||||
auto mutations = make_create_keyspace_mutations(keyspace, timestamp, false);
|
||||
addTableToSchemaMutation(newTable, timestamp, false, mutation);
|
||||
|
||||
add_table_to_schema_mutation(new_table, timestamp, false, mutations);
|
||||
MapDifference<ByteBuffer, ColumnDefinition> columnDiff = Maps.difference(oldTable.getColumnMetadata(),
|
||||
newTable.getColumnMetadata());
|
||||
|
||||
mutation columns_mutation(partition_key::from_singular(*columns(), old_table->ks_name()), columns());
|
||||
// columns that are no longer needed
|
||||
for (ColumnDefinition column : columnDiff.entriesOnlyOnLeft().values())
|
||||
{
|
||||
// Thrift only knows about the REGULAR ColumnDefinition type, so don't consider other type
|
||||
// are being deleted just because they are not here.
|
||||
if (fromThrift && column.kind != ColumnDefinition.Kind.REGULAR)
|
||||
continue;
|
||||
|
||||
auto diff = difference(old_table->all_columns(), new_table->all_columns());
|
||||
|
||||
// columns that are no longer needed
|
||||
for (auto&& name : diff.entries_only_on_left) {
|
||||
// Thrift only knows about the REGULAR ColumnDefinition type, so don't consider other type
|
||||
// are being deleted just because they are not here.
|
||||
const column_definition& column = *old_table->all_columns().at(name);
|
||||
if (from_thrift && !column.is_regular()) {
|
||||
continue;
|
||||
dropColumnFromSchemaMutation(oldTable, column, timestamp, mutation);
|
||||
}
|
||||
|
||||
drop_column_from_schema_mutation(old_table, column, timestamp, mutations);
|
||||
}
|
||||
// newly added columns
|
||||
for (ColumnDefinition column : columnDiff.entriesOnlyOnRight().values())
|
||||
addColumnToSchemaMutation(newTable, column, timestamp, mutation);
|
||||
|
||||
// newly added columns and old columns with updated attributes
|
||||
for (auto&& name : boost::range::join(diff.entries_differing, diff.entries_only_on_right)) {
|
||||
const column_definition& column = *new_table->all_columns().at(name);
|
||||
add_column_to_schema_mutation(new_table, column, timestamp, columns_mutation);
|
||||
}
|
||||
// old columns with updated attributes
|
||||
for (ByteBuffer name : columnDiff.entriesDiffering().keySet())
|
||||
addColumnToSchemaMutation(newTable, newTable.getColumnDefinition(name), timestamp, mutation);
|
||||
|
||||
mutations.emplace_back(std::move(columns_mutation));
|
||||
|
||||
warn(unimplemented::cause::TRIGGERS);
|
||||
#if 0
|
||||
MapDifference<String, TriggerDefinition> triggerDiff = Maps.difference(oldTable.getTriggers(), newTable.getTriggers());
|
||||
|
||||
// dropped triggers
|
||||
@@ -1184,9 +1129,9 @@ std::vector<mutation> make_update_table_mutations(lw_shared_ptr<keyspace_metadat
|
||||
for (TriggerDefinition trigger : triggerDiff.entriesOnlyOnRight().values())
|
||||
addTriggerToSchemaMutation(newTable, trigger, timestamp, mutation);
|
||||
|
||||
return mutation;
|
||||
}
|
||||
#endif
|
||||
return mutations;
|
||||
}
|
||||
|
||||
std::vector<mutation> make_drop_table_mutations(lw_shared_ptr<keyspace_metadata> keyspace, schema_ptr table, api::timestamp_type timestamp)
|
||||
{
|
||||
@@ -1214,39 +1159,13 @@ std::vector<mutation> make_drop_table_mutations(lw_shared_ptr<keyspace_metadata>
|
||||
return mutations;
|
||||
}
|
||||
|
||||
static future<schema_mutations> read_table_mutations(distributed<service::storage_proxy>& proxy, const qualified_name& table)
|
||||
{
|
||||
return read_schema_partition_for_table(proxy, COLUMNFAMILIES, table.keyspace_name, table.table_name)
|
||||
.then([&proxy, table] (mutation cf_m) {
|
||||
return read_schema_partition_for_table(proxy, COLUMNS, table.keyspace_name, table.table_name)
|
||||
.then([cf_m = std::move(cf_m)] (mutation col_m) {
|
||||
return schema_mutations{std::move(cf_m), std::move(col_m)};
|
||||
});
|
||||
#if 0
|
||||
// FIXME:
|
||||
Row serializedTriggers = readSchemaPartitionForTable(TRIGGERS, ksName, cfName);
|
||||
try
|
||||
{
|
||||
for (TriggerDefinition trigger : createTriggersFromTriggersPartition(serializedTriggers))
|
||||
cfm.addTriggerDefinition(trigger);
|
||||
}
|
||||
catch (InvalidRequestException e)
|
||||
{
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
#endif
|
||||
});
|
||||
}
|
||||
|
||||
future<schema_ptr> create_table_from_name(distributed<service::storage_proxy>& proxy, const sstring& keyspace, const sstring& table)
|
||||
{
|
||||
return do_with(qualified_name(keyspace, table), [&proxy] (auto&& qn) {
|
||||
return read_table_mutations(proxy, qn).then([qn] (schema_mutations sm) {
|
||||
if (!sm.live()) {
|
||||
throw std::runtime_error(sprint("%s:%s not found in the schema definitions keyspace.", qn.keyspace_name, qn.table_name));
|
||||
}
|
||||
return create_table_from_mutations(std::move(sm));
|
||||
});
|
||||
return read_schema_partition_for_table(proxy, COLUMNFAMILIES, keyspace, table).then([&proxy, keyspace, table] (auto partition) {
|
||||
if (partition.second->empty()) {
|
||||
throw std::runtime_error(sprint("%s:%s not found in the schema definitions keyspace.", keyspace, table));
|
||||
}
|
||||
return create_table_from_table_partition(proxy, std::move(partition.second));
|
||||
});
|
||||
}
|
||||
|
||||
@@ -1275,6 +1194,18 @@ future<std::map<sstring, schema_ptr>> create_tables_from_tables_partition(distri
|
||||
}
|
||||
#endif
|
||||
|
||||
void create_table_from_table_row_and_columns_partition(schema_builder& builder, const query::result_set_row& table_row, const schema_result::value_type& serialized_columns)
|
||||
{
|
||||
create_table_from_table_row_and_column_rows(builder, table_row, serialized_columns.second);
|
||||
}
|
||||
|
||||
future<schema_ptr> create_table_from_table_partition(distributed<service::storage_proxy>& proxy, lw_shared_ptr<query::result_set>&& partition)
|
||||
{
|
||||
return do_with(std::move(partition), [&proxy] (auto& partition) {
|
||||
return create_table_from_table_row(proxy, partition->row(0));
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Deserialize table metadata from low-level representation
|
||||
*
|
||||
@@ -1284,18 +1215,31 @@ future<schema_ptr> create_table_from_table_row(distributed<service::storage_prox
|
||||
{
|
||||
auto ks_name = row.get_nonnull<sstring>("keyspace_name");
|
||||
auto cf_name = row.get_nonnull<sstring>("columnfamily_name");
|
||||
return create_table_from_name(proxy, ks_name, cf_name);
|
||||
auto id = row.get_nonnull<utils::UUID>("cf_id");
|
||||
return read_schema_partition_for_table(proxy, COLUMNS, ks_name, cf_name).then([&row, ks_name, cf_name, id] (auto serialized_columns) {
|
||||
schema_builder builder{ks_name, cf_name, id};
|
||||
create_table_from_table_row_and_columns_partition(builder, row, serialized_columns);
|
||||
return builder.build();
|
||||
});
|
||||
#if 0
|
||||
// FIXME:
|
||||
Row serializedTriggers = readSchemaPartitionForTable(TRIGGERS, ksName, cfName);
|
||||
try
|
||||
{
|
||||
for (TriggerDefinition trigger : createTriggersFromTriggersPartition(serializedTriggers))
|
||||
cfm.addTriggerDefinition(trigger);
|
||||
}
|
||||
catch (InvalidRequestException e)
|
||||
{
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
schema_ptr create_table_from_mutations(schema_mutations sm, std::experimental::optional<table_schema_version> version)
|
||||
void create_table_from_table_row_and_column_rows(schema_builder& builder, const query::result_set_row& table_row, const schema_result::mapped_type& serialized_column_definitions)
|
||||
{
|
||||
auto table_rs = query::result_set(sm.columnfamilies_mutation());
|
||||
query::result_set_row table_row = table_rs.row(0);
|
||||
|
||||
auto ks_name = table_row.get_nonnull<sstring>("keyspace_name");
|
||||
auto cf_name = table_row.get_nonnull<sstring>("columnfamily_name");
|
||||
auto id = table_row.get_nonnull<utils::UUID>("cf_id");
|
||||
schema_builder builder{ks_name, cf_name, id};
|
||||
|
||||
#if 0
|
||||
AbstractType<?> rawComparator = TypeParser.parse(result.getString("comparator"));
|
||||
@@ -1313,12 +1257,11 @@ schema_ptr create_table_from_mutations(schema_mutations sm, std::experimental::o
|
||||
AbstractType<?> fullRawComparator = CFMetaData.makeRawAbstractType(rawComparator, subComparator);
|
||||
#endif
|
||||
|
||||
std::vector<column_definition> column_defs = create_columns_from_column_rows(
|
||||
query::result_set(sm.columns_mutation()),
|
||||
ks_name,
|
||||
cf_name,/*,
|
||||
fullRawComparator, */
|
||||
cf == cf_type::super);
|
||||
std::vector<column_definition> column_defs = create_columns_from_column_rows(serialized_column_definitions,
|
||||
ks_name,
|
||||
cf_name,/*,
|
||||
fullRawComparator, */
|
||||
cf == cf_type::super);
|
||||
|
||||
bool is_dense;
|
||||
if (table_row.has("is_dense")) {
|
||||
@@ -1329,10 +1272,8 @@ schema_ptr create_table_from_mutations(schema_mutations sm, std::experimental::o
|
||||
throw std::runtime_error(sprint("%s not implemented", __PRETTY_FUNCTION__));
|
||||
}
|
||||
|
||||
auto comparator = table_row.get_nonnull<sstring>("comparator");
|
||||
bool is_compound = cell_comparator::check_compound(comparator);
|
||||
bool is_compound = cell_comparator::check_compound(table_row.get_nonnull<sstring>("comparator"));
|
||||
builder.set_is_compound(is_compound);
|
||||
cell_comparator::read_collections(builder, comparator);
|
||||
#if 0
|
||||
CellNameType comparator = CellNames.fromAbstractType(fullRawComparator, isDense);
|
||||
|
||||
@@ -1424,22 +1365,13 @@ schema_ptr create_table_from_mutations(schema_mutations sm, std::experimental::o
|
||||
builder.set_bloom_filter_fp_chance(builder.get_bloom_filter_fp_chance());
|
||||
}
|
||||
|
||||
if (table_row.has("dropped_columns")) {
|
||||
auto map = table_row.get_nonnull<map_type_impl::native_type>("dropped_columns");
|
||||
for (auto&& entry : map) {
|
||||
builder.without_column(value_cast<sstring>(entry.first), value_cast<api::timestamp_type>(entry.second));
|
||||
};
|
||||
}
|
||||
|
||||
#if 0
|
||||
if (result.has("dropped_columns"))
|
||||
cfm.droppedColumns(convertDroppedColumns(result.getMap("dropped_columns", UTF8Type.instance, LongType.instance)));
|
||||
#endif
|
||||
for (auto&& cdef : column_defs) {
|
||||
builder.with_column(cdef);
|
||||
}
|
||||
if (version) {
|
||||
builder.with_version(*version);
|
||||
} else {
|
||||
builder.with_version(sm.digest());
|
||||
}
|
||||
return builder.build();
|
||||
}
|
||||
|
||||
#if 0
|
||||
@@ -1459,9 +1391,12 @@ schema_ptr create_table_from_mutations(schema_mutations sm, std::experimental::o
|
||||
void add_column_to_schema_mutation(schema_ptr table,
|
||||
const column_definition& column,
|
||||
api::timestamp_type timestamp,
|
||||
mutation& m)
|
||||
const partition_key& pkey,
|
||||
std::vector<mutation>& mutations)
|
||||
{
|
||||
auto ckey = clustering_key::from_exploded(*m.schema(), {utf8_type->decompose(table->cf_name()), column.name()});
|
||||
schema_ptr s = columns();
|
||||
mutation m{pkey, s};
|
||||
auto ckey = clustering_key::from_exploded(*s, {utf8_type->decompose(table->cf_name()), column.name()});
|
||||
m.set_clustered_cell(ckey, "validator", column.type->name(), timestamp);
|
||||
m.set_clustered_cell(ckey, "type", serialize_kind(column.kind), timestamp);
|
||||
if (!column.is_on_all_components()) {
|
||||
@@ -1472,6 +1407,7 @@ void add_column_to_schema_mutation(schema_ptr table,
|
||||
adder.add("index_type", column.getIndexType() == null ? null : column.getIndexType().toString());
|
||||
adder.add("index_options", json(column.getIndexOptions()));
|
||||
#endif
|
||||
mutations.emplace_back(std::move(m));
|
||||
}
|
||||
|
||||
sstring serialize_kind(column_kind kind)
|
||||
@@ -1512,14 +1448,14 @@ void drop_column_from_schema_mutation(schema_ptr table, const column_definition&
|
||||
mutations.emplace_back(m);
|
||||
}
|
||||
|
||||
std::vector<column_definition> create_columns_from_column_rows(const query::result_set& rows,
|
||||
std::vector<column_definition> create_columns_from_column_rows(const schema_result::mapped_type& rows,
|
||||
const sstring& keyspace,
|
||||
const sstring& table, /*,
|
||||
AbstractType<?> rawComparator, */
|
||||
bool is_super)
|
||||
{
|
||||
std::vector<column_definition> columns;
|
||||
for (auto&& row : rows.rows()) {
|
||||
for (auto&& row : rows->rows()) {
|
||||
columns.emplace_back(std::move(create_column_from_column_row(row, keyspace, table, /*, rawComparator, */ is_super)));
|
||||
}
|
||||
return columns;
|
||||
|
||||
@@ -43,8 +43,6 @@
|
||||
#include "service/storage_proxy.hh"
|
||||
#include "mutation.hh"
|
||||
#include "schema.hh"
|
||||
#include "hashing.hh"
|
||||
#include "schema_mutations.hh"
|
||||
|
||||
#include <vector>
|
||||
#include <map>
|
||||
@@ -94,24 +92,17 @@ std::vector<mutation> make_drop_keyspace_mutations(lw_shared_ptr<keyspace_metada
|
||||
|
||||
lw_shared_ptr<keyspace_metadata> create_keyspace_from_schema_partition(const schema_result_value_type& partition);
|
||||
|
||||
future<> merge_tables(distributed<service::storage_proxy>& proxy, schema_result&& before, schema_result&& after);
|
||||
|
||||
lw_shared_ptr<keyspace_metadata> create_keyspace_from_schema_partition(const schema_result_value_type& partition);
|
||||
|
||||
mutation make_create_keyspace_mutation(lw_shared_ptr<keyspace_metadata> keyspace, api::timestamp_type timestamp, bool with_tables_and_types_and_functions = true);
|
||||
|
||||
std::vector<mutation> make_create_table_mutations(lw_shared_ptr<keyspace_metadata> keyspace, schema_ptr table, api::timestamp_type timestamp);
|
||||
|
||||
std::vector<mutation> make_update_table_mutations(
|
||||
lw_shared_ptr<keyspace_metadata> keyspace,
|
||||
schema_ptr old_table,
|
||||
schema_ptr new_table,
|
||||
api::timestamp_type timestamp,
|
||||
bool from_thrift);
|
||||
|
||||
schema_mutations make_table_mutations(schema_ptr table, api::timestamp_type timestamp, bool with_columns_and_triggers = true);
|
||||
|
||||
future<std::map<sstring, schema_ptr>> create_tables_from_tables_partition(distributed<service::storage_proxy>& proxy, const schema_result::mapped_type& result);
|
||||
|
||||
void add_table_to_schema_mutation(schema_ptr table, api::timestamp_type timestamp, bool with_columns_and_triggers, std::vector<mutation>& mutations);
|
||||
void add_table_to_schema_mutation(schema_ptr table, api::timestamp_type timestamp, bool with_columns_and_triggers, const partition_key& pkey, std::vector<mutation>& mutations);
|
||||
|
||||
std::vector<mutation> make_drop_table_mutations(lw_shared_ptr<keyspace_metadata> keyspace, schema_ptr table, api::timestamp_type timestamp);
|
||||
|
||||
@@ -119,11 +110,13 @@ future<schema_ptr> create_table_from_name(distributed<service::storage_proxy>& p
|
||||
|
||||
future<schema_ptr> create_table_from_table_row(distributed<service::storage_proxy>& proxy, const query::result_set_row& row);
|
||||
|
||||
schema_ptr create_table_from_mutations(schema_mutations, std::experimental::optional<table_schema_version> version = {});
|
||||
void create_table_from_table_row_and_column_rows(schema_builder& builder, const query::result_set_row& table_row, const schema_result::mapped_type& serialized_columns);
|
||||
|
||||
future<schema_ptr> create_table_from_table_partition(distributed<service::storage_proxy>& proxy, lw_shared_ptr<query::result_set>&& partition);
|
||||
|
||||
void drop_column_from_schema_mutation(schema_ptr table, const column_definition& column, long timestamp, std::vector<mutation>& mutations);
|
||||
|
||||
std::vector<column_definition> create_columns_from_column_rows(const query::result_set& rows,
|
||||
std::vector<column_definition> create_columns_from_column_rows(const schema_result::mapped_type& rows,
|
||||
const sstring& keyspace,
|
||||
const sstring& table,/*,
|
||||
AbstractType<?> rawComparator, */
|
||||
@@ -136,25 +129,11 @@ column_definition create_column_from_column_row(const query::result_set_row& row
|
||||
bool is_super);
|
||||
|
||||
|
||||
void add_column_to_schema_mutation(schema_ptr table, const column_definition& column, api::timestamp_type timestamp, mutation& mutation);
|
||||
void add_column_to_schema_mutation(schema_ptr table, const column_definition& column, api::timestamp_type timestamp, const partition_key& pkey, std::vector<mutation>& mutations);
|
||||
|
||||
sstring serialize_kind(column_kind kind);
|
||||
column_kind deserialize_kind(sstring kind);
|
||||
data_type parse_type(sstring str);
|
||||
|
||||
schema_ptr columns();
|
||||
schema_ptr columnfamilies();
|
||||
|
||||
template<typename Hasher>
|
||||
void feed_hash_for_schema_digest(Hasher& h, const mutation& m) {
|
||||
// Cassandra is skipping tombstones from digest calculation
|
||||
// to avoid disagreements due to tombstone GC.
|
||||
// See https://issues.apache.org/jira/browse/CASSANDRA-6862.
|
||||
// We achieve similar effect with compact_for_compaction().
|
||||
mutation m_compacted(m);
|
||||
m_compacted.partition().compact_for_compaction(*m.schema(), api::max_timestamp, gc_clock::time_point::max());
|
||||
feed_hash(h, m_compacted);
|
||||
}
|
||||
|
||||
} // namespace schema_tables
|
||||
} // namespace db
|
||||
|
||||
@@ -69,11 +69,6 @@ void db::serializer<bytes>::read(bytes& b, input& in) {
|
||||
b = in.read<bytes>();
|
||||
}
|
||||
|
||||
template<>
|
||||
void db::serializer<bytes>::skip(input& in) {
|
||||
in.read<bytes>(); // FIXME: Avoid reading
|
||||
}
|
||||
|
||||
template<>
|
||||
db::serializer<bytes_view>::serializer(const bytes_view& v)
|
||||
: _item(v), _size(output::serialized_size(v)) {
|
||||
@@ -109,11 +104,6 @@ void db::serializer<sstring>::read(sstring& s, input& in) {
|
||||
s = in.read<sstring>();
|
||||
}
|
||||
|
||||
template<>
|
||||
void db::serializer<sstring>::skip(input& in) {
|
||||
in.read<sstring>(); // FIXME: avoid reading
|
||||
}
|
||||
|
||||
template<>
|
||||
db::serializer<tombstone>::serializer(const tombstone& t)
|
||||
: _item(t), _size(sizeof(t.timestamp) + sizeof(decltype(t.deletion_time.time_since_epoch().count()))) {
|
||||
@@ -167,6 +157,81 @@ void db::serializer<collection_mutation_view>::read(collection_mutation_view& c,
|
||||
c = collection_mutation_view::from_bytes(bytes_view_serializer::read(in));
|
||||
}
|
||||
|
||||
template<>
|
||||
db::serializer<partition_key_view>::serializer(const partition_key_view& key)
|
||||
: _item(key), _size(sizeof(uint16_t) /* size */ + key.representation().size()) {
|
||||
}
|
||||
|
||||
template<>
|
||||
void db::serializer<partition_key_view>::write(output& out, const partition_key_view& key) {
|
||||
bytes_view v = key.representation();
|
||||
out.write<uint16_t>(v.size());
|
||||
out.write(v.begin(), v.end());
|
||||
}
|
||||
|
||||
template<>
|
||||
void db::serializer<partition_key_view>::read(partition_key_view& b, input& in) {
|
||||
auto len = in.read<uint16_t>();
|
||||
b = partition_key_view::from_bytes(in.read_view(len));
|
||||
}
|
||||
|
||||
template<>
|
||||
partition_key_view db::serializer<partition_key_view>::read(input& in) {
|
||||
auto len = in.read<uint16_t>();
|
||||
return partition_key_view::from_bytes(in.read_view(len));
|
||||
}
|
||||
|
||||
template<>
|
||||
void db::serializer<partition_key_view>::skip(input& in) {
|
||||
auto len = in.read<uint16_t>();
|
||||
in.skip(len);
|
||||
}
|
||||
|
||||
template<>
|
||||
db::serializer<clustering_key_prefix_view>::serializer(const clustering_key_prefix_view& key)
|
||||
: _item(key), _size(sizeof(uint16_t) /* size */ + key.representation().size()) {
|
||||
}
|
||||
|
||||
template<>
|
||||
void db::serializer<clustering_key_prefix_view>::write(output& out, const clustering_key_prefix_view& key) {
|
||||
bytes_view v = key.representation();
|
||||
out.write<uint16_t>(v.size());
|
||||
out.write(v.begin(), v.end());
|
||||
}
|
||||
|
||||
template<>
|
||||
void db::serializer<clustering_key_prefix_view>::read(clustering_key_prefix_view& b, input& in) {
|
||||
auto len = in.read<uint16_t>();
|
||||
b = clustering_key_prefix_view::from_bytes(in.read_view(len));
|
||||
}
|
||||
|
||||
template<>
|
||||
clustering_key_prefix_view db::serializer<clustering_key_prefix_view>::read(input& in) {
|
||||
auto len = in.read<uint16_t>();
|
||||
return clustering_key_prefix_view::from_bytes(in.read_view(len));
|
||||
}
|
||||
|
||||
template<>
|
||||
db::serializer<frozen_mutation>::serializer(const frozen_mutation& mutation)
|
||||
: _item(mutation), _size(sizeof(uint32_t) /* size */ + mutation.representation().size()) {
|
||||
}
|
||||
|
||||
template<>
|
||||
void db::serializer<frozen_mutation>::write(output& out, const frozen_mutation& mutation) {
|
||||
bytes_view v = mutation.representation();
|
||||
out.write(v);
|
||||
}
|
||||
|
||||
template<>
|
||||
void db::serializer<frozen_mutation>::read(frozen_mutation& m, input& in) {
|
||||
m = read(in);
|
||||
}
|
||||
|
||||
template<>
|
||||
frozen_mutation db::serializer<frozen_mutation>::read(input& in) {
|
||||
return frozen_mutation(bytes_serializer::read(in));
|
||||
}
|
||||
|
||||
template<>
|
||||
db::serializer<db::replay_position>::serializer(const db::replay_position& rp)
|
||||
: _item(rp), _size(sizeof(uint64_t) * 2) {
|
||||
@@ -191,4 +256,7 @@ template class db::serializer<sstring> ;
|
||||
template class db::serializer<atomic_cell_view> ;
|
||||
template class db::serializer<collection_mutation_view> ;
|
||||
template class db::serializer<utils::UUID> ;
|
||||
template class db::serializer<partition_key_view> ;
|
||||
template class db::serializer<clustering_key_prefix_view> ;
|
||||
template class db::serializer<frozen_mutation> ;
|
||||
template class db::serializer<db::replay_position> ;
|
||||
|
||||
@@ -28,7 +28,9 @@
|
||||
#include "utils/data_output.hh"
|
||||
#include "bytes_ostream.hh"
|
||||
#include "bytes.hh"
|
||||
#include "keys.hh"
|
||||
#include "database_fwd.hh"
|
||||
#include "frozen_mutation.hh"
|
||||
#include "db/commitlog/replay_position.hh"
|
||||
|
||||
namespace db {
|
||||
@@ -178,7 +180,6 @@ template<> utils::UUID serializer<utils::UUID>::read(input&);
|
||||
template<> serializer<bytes>::serializer(const bytes &);
|
||||
template<> void serializer<bytes>::write(output&, const type&);
|
||||
template<> void serializer<bytes>::read(bytes&, input&);
|
||||
template<> void serializer<bytes>::skip(input&);
|
||||
|
||||
template<> serializer<bytes_view>::serializer(const bytes_view&);
|
||||
template<> void serializer<bytes_view>::write(output&, const type&);
|
||||
@@ -188,7 +189,6 @@ template<> bytes_view serializer<bytes_view>::read(input&);
|
||||
template<> serializer<sstring>::serializer(const sstring&);
|
||||
template<> void serializer<sstring>::write(output&, const type&);
|
||||
template<> void serializer<sstring>::read(sstring&, input&);
|
||||
template<> void serializer<sstring>::skip(input&);
|
||||
|
||||
template<> serializer<tombstone>::serializer(const tombstone &);
|
||||
template<> void serializer<tombstone>::write(output&, const type&);
|
||||
@@ -203,6 +203,22 @@ template<> serializer<collection_mutation_view>::serializer(const collection_mut
|
||||
template<> void serializer<collection_mutation_view>::write(output&, const type&);
|
||||
template<> void serializer<collection_mutation_view>::read(collection_mutation_view&, input&);
|
||||
|
||||
template<> serializer<frozen_mutation>::serializer(const frozen_mutation &);
|
||||
template<> void serializer<frozen_mutation>::write(output&, const type&);
|
||||
template<> void serializer<frozen_mutation>::read(frozen_mutation&, input&);
|
||||
template<> frozen_mutation serializer<frozen_mutation>::read(input&);
|
||||
|
||||
template<> serializer<partition_key_view>::serializer(const partition_key_view &);
|
||||
template<> void serializer<partition_key_view>::write(output&, const partition_key_view&);
|
||||
template<> void serializer<partition_key_view>::read(partition_key_view&, input&);
|
||||
template<> partition_key_view serializer<partition_key_view>::read(input&);
|
||||
template<> void serializer<partition_key_view>::skip(input&);
|
||||
|
||||
template<> serializer<clustering_key_prefix_view>::serializer(const clustering_key_prefix_view &);
|
||||
template<> void serializer<clustering_key_prefix_view>::write(output&, const clustering_key_prefix_view&);
|
||||
template<> void serializer<clustering_key_prefix_view>::read(clustering_key_prefix_view&, input&);
|
||||
template<> clustering_key_prefix_view serializer<clustering_key_prefix_view>::read(input&);
|
||||
|
||||
template<> serializer<db::replay_position>::serializer(const db::replay_position&);
|
||||
template<> void serializer<db::replay_position>::write(output&, const db::replay_position&);
|
||||
template<> void serializer<db::replay_position>::read(db::replay_position&, input&);
|
||||
@@ -219,6 +235,9 @@ extern template class serializer<bytes>;
|
||||
extern template class serializer<bytes_view>;
|
||||
extern template class serializer<sstring>;
|
||||
extern template class serializer<utils::UUID>;
|
||||
extern template class serializer<partition_key_view>;
|
||||
extern template class serializer<clustering_key_view>;
|
||||
extern template class serializer<clustering_key_prefix_view>;
|
||||
extern template class serializer<db::replay_position>;
|
||||
|
||||
typedef serializer<tombstone> tombstone_serializer;
|
||||
@@ -228,6 +247,10 @@ typedef serializer<sstring> sstring_serializer;
|
||||
typedef serializer<atomic_cell_view> atomic_cell_view_serializer;
|
||||
typedef serializer<collection_mutation_view> collection_mutation_view_serializer;
|
||||
typedef serializer<utils::UUID> uuid_serializer;
|
||||
typedef serializer<partition_key_view> partition_key_view_serializer;
|
||||
typedef serializer<clustering_key_view> clustering_key_view_serializer;
|
||||
typedef serializer<clustering_key_prefix_view> clustering_key_prefix_view_serializer;
|
||||
typedef serializer<frozen_mutation> frozen_mutation_serializer;
|
||||
typedef serializer<db::replay_position> replay_position_serializer;
|
||||
|
||||
}
|
||||
|
||||
@@ -63,8 +63,6 @@
|
||||
#include "partition_slice_builder.hh"
|
||||
#include "db/config.hh"
|
||||
#include "schema_builder.hh"
|
||||
#include "md5_hasher.hh"
|
||||
#include "release.hh"
|
||||
#include <core/enum.hh>
|
||||
|
||||
using days = std::chrono::duration<int, std::ratio<24 * 3600>>;
|
||||
@@ -75,23 +73,6 @@ std::unique_ptr<query_context> qctx = {};
|
||||
|
||||
namespace system_keyspace {
|
||||
|
||||
static const api::timestamp_type creation_timestamp = api::new_timestamp();
|
||||
|
||||
api::timestamp_type schema_creation_timestamp() {
|
||||
return creation_timestamp;
|
||||
}
|
||||
|
||||
// Increase whenever changing schema of any system table.
|
||||
// FIXME: Make automatic by calculating from schema structure.
|
||||
static const uint16_t version_sequence_number = 1;
|
||||
|
||||
table_schema_version generate_schema_version(utils::UUID table_id) {
|
||||
md5_hasher h;
|
||||
feed_hash(h, table_id);
|
||||
feed_hash(h, version_sequence_number);
|
||||
return utils::UUID_gen::get_name_UUID(h.finalize());
|
||||
}
|
||||
|
||||
// Currently, the type variables (uuid_type, etc.) are thread-local reference-
|
||||
// counted shared pointers. This forces us to also make the built in schemas
|
||||
// below thread-local as well.
|
||||
@@ -120,7 +101,6 @@ schema_ptr hints() {
|
||||
)));
|
||||
builder.set_gc_grace_seconds(0);
|
||||
builder.set_compaction_strategy_options({{ "enabled", "false" }});
|
||||
builder.with_version(generate_schema_version(builder.uuid()));
|
||||
return builder.build(schema_builder::compact_storage::yes);
|
||||
}();
|
||||
return hints;
|
||||
@@ -146,7 +126,6 @@ schema_ptr batchlog() {
|
||||
// .compactionStrategyOptions(Collections.singletonMap("min_threshold", "2"))
|
||||
)));
|
||||
builder.set_gc_grace_seconds(0);
|
||||
builder.with_version(generate_schema_version(builder.uuid()));
|
||||
return builder.build(schema_builder::compact_storage::no);
|
||||
}();
|
||||
return batchlog;
|
||||
@@ -171,7 +150,6 @@ schema_ptr batchlog() {
|
||||
// operations on resulting CFMetaData:
|
||||
// .compactionStrategyClass(LeveledCompactionStrategy.class);
|
||||
)));
|
||||
builder.with_version(generate_schema_version(builder.uuid()));
|
||||
return builder.build(schema_builder::compact_storage::no);
|
||||
}();
|
||||
return paxos;
|
||||
@@ -193,7 +171,6 @@ schema_ptr built_indexes() {
|
||||
// comment
|
||||
"built column indexes"
|
||||
)));
|
||||
builder.with_version(generate_schema_version(builder.uuid()));
|
||||
return builder.build(schema_builder::compact_storage::yes);
|
||||
}();
|
||||
return built_indexes;
|
||||
@@ -235,7 +212,6 @@ schema_ptr built_indexes() {
|
||||
// comment
|
||||
"information about the local node"
|
||||
)));
|
||||
builder.with_version(generate_schema_version(builder.uuid()));
|
||||
return builder.build(schema_builder::compact_storage::no);
|
||||
}();
|
||||
return local;
|
||||
@@ -266,7 +242,6 @@ schema_ptr built_indexes() {
|
||||
// comment
|
||||
"information about known peers in the cluster"
|
||||
)));
|
||||
builder.with_version(generate_schema_version(builder.uuid()));
|
||||
return builder.build(schema_builder::compact_storage::no);
|
||||
}();
|
||||
return peers;
|
||||
@@ -290,7 +265,6 @@ schema_ptr built_indexes() {
|
||||
// comment
|
||||
"events related to peers"
|
||||
)));
|
||||
builder.with_version(generate_schema_version(builder.uuid()));
|
||||
return builder.build(schema_builder::compact_storage::no);
|
||||
}();
|
||||
return peer_events;
|
||||
@@ -312,7 +286,6 @@ schema_ptr built_indexes() {
|
||||
// comment
|
||||
"ranges requested for transfer"
|
||||
)));
|
||||
builder.with_version(generate_schema_version(builder.uuid()));
|
||||
return builder.build(schema_builder::compact_storage::no);
|
||||
}();
|
||||
return range_xfers;
|
||||
@@ -338,7 +311,6 @@ schema_ptr built_indexes() {
|
||||
// comment
|
||||
"unfinished compactions"
|
||||
)));
|
||||
builder.with_version(generate_schema_version(builder.uuid()));
|
||||
return builder.build(schema_builder::compact_storage::no);
|
||||
}();
|
||||
return compactions_in_progress;
|
||||
@@ -368,7 +340,6 @@ schema_ptr built_indexes() {
|
||||
"week-long compaction history"
|
||||
)));
|
||||
builder.set_default_time_to_live(std::chrono::duration_cast<std::chrono::seconds>(days(7)));
|
||||
builder.with_version(generate_schema_version(builder.uuid()));
|
||||
return builder.build(schema_builder::compact_storage::no);
|
||||
}();
|
||||
return compaction_history;
|
||||
@@ -397,7 +368,6 @@ schema_ptr built_indexes() {
|
||||
// comment
|
||||
"historic sstable read rates"
|
||||
)));
|
||||
builder.with_version(generate_schema_version(builder.uuid()));
|
||||
return builder.build(schema_builder::compact_storage::no);
|
||||
}();
|
||||
return sstable_activity;
|
||||
@@ -423,7 +393,6 @@ schema_ptr size_estimates() {
|
||||
"per-table primary range size estimates"
|
||||
)));
|
||||
builder.set_gc_grace_seconds(0);
|
||||
builder.with_version(generate_schema_version(builder.uuid()));
|
||||
return builder.build(schema_builder::compact_storage::no);
|
||||
}();
|
||||
return size_estimates;
|
||||
@@ -517,10 +486,6 @@ future<> init_local_cache() {
|
||||
});
|
||||
}
|
||||
|
||||
future<> deinit_local_cache() {
|
||||
return _local_cache.stop();
|
||||
}
|
||||
|
||||
void minimal_setup(distributed<database>& db, distributed<cql3::query_processor>& qp) {
|
||||
qctx = std::make_unique<query_context>(db, qp);
|
||||
}
|
||||
@@ -544,6 +509,7 @@ future<> setup(distributed<database>& db, distributed<cql3::query_processor>& qp
|
||||
return ms.init_local_preferred_ip_cache();
|
||||
});
|
||||
});
|
||||
return make_ready_future<>();
|
||||
}
|
||||
|
||||
typedef std::pair<replay_positions, db_clock::time_point> truncation_entry;
|
||||
@@ -1015,9 +981,8 @@ query_mutations(distributed<service::storage_proxy>& proxy, const sstring& cf_na
|
||||
database& db = proxy.local().get_db().local();
|
||||
schema_ptr schema = db.find_schema(db::system_keyspace::NAME, cf_name);
|
||||
auto slice = partition_slice_builder(*schema).build();
|
||||
auto cmd = make_lw_shared<query::read_command>(schema->id(), schema->version(),
|
||||
std::move(slice), std::numeric_limits<uint32_t>::max());
|
||||
return proxy.local().query_mutations_locally(std::move(schema), std::move(cmd), query::full_partition_range);
|
||||
auto cmd = make_lw_shared<query::read_command>(schema->id(), std::move(slice), std::numeric_limits<uint32_t>::max());
|
||||
return proxy.local().query_mutations_locally(cmd, query::full_partition_range);
|
||||
}
|
||||
|
||||
future<lw_shared_ptr<query::result_set>>
|
||||
@@ -1025,8 +990,7 @@ query(distributed<service::storage_proxy>& proxy, const sstring& cf_name) {
|
||||
database& db = proxy.local().get_db().local();
|
||||
schema_ptr schema = db.find_schema(db::system_keyspace::NAME, cf_name);
|
||||
auto slice = partition_slice_builder(*schema).build();
|
||||
auto cmd = make_lw_shared<query::read_command>(schema->id(), schema->version(),
|
||||
std::move(slice), std::numeric_limits<uint32_t>::max());
|
||||
auto cmd = make_lw_shared<query::read_command>(schema->id(), std::move(slice), std::numeric_limits<uint32_t>::max());
|
||||
return proxy.local().query(schema, cmd, {query::full_partition_range}, db::consistency_level::ONE).then([schema, cmd] (auto&& result) {
|
||||
return make_lw_shared(query::result_set::from_raw_result(schema, cmd->slice, *result));
|
||||
});
|
||||
@@ -1040,7 +1004,7 @@ query(distributed<service::storage_proxy>& proxy, const sstring& cf_name, const
|
||||
auto slice = partition_slice_builder(*schema)
|
||||
.with_range(std::move(row_range))
|
||||
.build();
|
||||
auto cmd = make_lw_shared<query::read_command>(schema->id(), schema->version(), std::move(slice), query::max_rows);
|
||||
auto cmd = make_lw_shared<query::read_command>(schema->id(), std::move(slice), query::max_rows);
|
||||
return proxy.local().query(schema, cmd, {query::partition_range::make_singular(key)}, db::consistency_level::ONE).then([schema, cmd] (auto&& result) {
|
||||
return make_lw_shared(query::result_set::from_raw_result(schema, cmd->slice, *result));
|
||||
});
|
||||
|
||||
@@ -84,13 +84,10 @@ extern schema_ptr hints();
|
||||
extern schema_ptr batchlog();
|
||||
extern schema_ptr built_indexes(); // TODO (from Cassandra): make private
|
||||
|
||||
table_schema_version generate_schema_version(utils::UUID table_id);
|
||||
|
||||
// Only for testing.
|
||||
void minimal_setup(distributed<database>& db, distributed<cql3::query_processor>& qp);
|
||||
|
||||
future<> init_local_cache();
|
||||
future<> deinit_local_cache();
|
||||
future<> setup(distributed<database>& db, distributed<cql3::query_processor>& qp);
|
||||
future<> update_schema_version(utils::UUID version);
|
||||
future<> update_tokens(std::unordered_set<dht::token> tokens);
|
||||
@@ -675,7 +672,5 @@ future<> set_bootstrap_state(bootstrap_state state);
|
||||
executeInternal(String.format(cql, SSTABLE_ACTIVITY), keyspace, table, generation);
|
||||
}
|
||||
#endif
|
||||
|
||||
api::timestamp_type schema_creation_timestamp();
|
||||
} // namespace system_keyspace
|
||||
} // namespace db
|
||||
|
||||
@@ -251,6 +251,39 @@ std::ostream& operator<<(std::ostream& out, const ring_position& pos) {
|
||||
return out << "}";
|
||||
}
|
||||
|
||||
size_t ring_position::serialized_size() const {
|
||||
size_t size = serialize_int32_size; /* _key length */
|
||||
if (_key) {
|
||||
size += _key.value().representation().size();
|
||||
} else {
|
||||
size += sizeof(int8_t); /* _token_bund */
|
||||
}
|
||||
return size + _token.serialized_size();
|
||||
}
|
||||
|
||||
void ring_position::serialize(bytes::iterator& out) const {
|
||||
_token.serialize(out);
|
||||
if (_key) {
|
||||
auto v = _key.value().representation();
|
||||
serialize_int32(out, v.size());
|
||||
out = std::copy(v.begin(), v.end(), out);
|
||||
} else {
|
||||
serialize_int32(out, 0);
|
||||
serialize_int8(out, static_cast<int8_t>(_token_bound));
|
||||
}
|
||||
}
|
||||
|
||||
ring_position ring_position::deserialize(bytes_view& in) {
|
||||
auto token = token::deserialize(in);
|
||||
auto size = read_simple<uint32_t>(in);
|
||||
if (size == 0) {
|
||||
auto bound = dht::ring_position::token_bound(read_simple<int8_t>(in));
|
||||
return ring_position(std::move(token), bound);
|
||||
} else {
|
||||
return ring_position(std::move(token), partition_key::from_bytes(to_bytes(read_simple_bytes(in, size))));
|
||||
}
|
||||
}
|
||||
|
||||
unsigned shard_of(const token& t) {
|
||||
return global_partitioner().shard_of(t);
|
||||
}
|
||||
@@ -263,6 +296,29 @@ int token_comparator::operator()(const token& t1, const token& t2) const {
|
||||
return tri_compare(t1, t2);
|
||||
}
|
||||
|
||||
void token::serialize(bytes::iterator& out) const {
|
||||
uint8_t kind = _kind == dht::token::kind::before_all_keys ? 0 :
|
||||
_kind == dht::token::kind::key ? 1 : 2;
|
||||
serialize_int8(out, kind);
|
||||
serialize_int16(out, _data.size());
|
||||
out = std::copy(_data.begin(), _data.end(), out);
|
||||
}
|
||||
|
||||
token token::deserialize(bytes_view& in) {
|
||||
uint8_t kind = read_simple<uint8_t>(in);
|
||||
size_t size = read_simple<uint16_t>(in);
|
||||
return token(kind == 0 ? dht::token::kind::before_all_keys :
|
||||
kind == 1 ? dht::token::kind::key :
|
||||
dht::token::kind::after_all_keys,
|
||||
to_bytes(read_simple_bytes(in, size)));
|
||||
}
|
||||
|
||||
size_t token::serialized_size() const {
|
||||
return serialize_int8_size // token::kind;
|
||||
+ serialize_int16_size // token size
|
||||
+ _data.size();
|
||||
}
|
||||
|
||||
bool ring_position::equal(const schema& s, const ring_position& other) const {
|
||||
return tri_compare(s, other) == 0;
|
||||
}
|
||||
|
||||
@@ -97,6 +97,11 @@ public:
|
||||
bool is_maximum() const {
|
||||
return _kind == kind::after_all_keys;
|
||||
}
|
||||
|
||||
|
||||
void serialize(bytes::iterator& out) const;
|
||||
static token deserialize(bytes_view& in);
|
||||
size_t serialized_size() const;
|
||||
};
|
||||
|
||||
token midpoint_unsigned(const token& t1, const token& t2);
|
||||
@@ -333,12 +338,6 @@ public:
|
||||
, _key(std::experimental::make_optional(std::move(key)))
|
||||
{ }
|
||||
|
||||
ring_position(dht::token token, token_bound bound, std::experimental::optional<partition_key> key)
|
||||
: _token(std::move(token))
|
||||
, _token_bound(bound)
|
||||
, _key(std::move(key))
|
||||
{ }
|
||||
|
||||
ring_position(const dht::decorated_key& dk)
|
||||
: _token(dk._token)
|
||||
, _key(std::experimental::make_optional(dk._key))
|
||||
@@ -380,6 +379,10 @@ public:
|
||||
// "less" comparator corresponding to tri_compare()
|
||||
bool less_compare(const schema&, const ring_position&) const;
|
||||
|
||||
size_t serialized_size() const;
|
||||
void serialize(bytes::iterator& out) const;
|
||||
static ring_position deserialize(bytes_view& in);
|
||||
|
||||
friend std::ostream& operator<<(std::ostream&, const ring_position&);
|
||||
};
|
||||
|
||||
|
||||
@@ -241,11 +241,12 @@ future<streaming::stream_state> range_streamer::fetch_async() {
|
||||
for (auto& x : fetch.second) {
|
||||
auto& source = x.first;
|
||||
auto& ranges = x.second;
|
||||
auto preferred = net::get_local_messaging_service().get_preferred_ip(source);
|
||||
/* Send messages to respective folks to stream data over to me */
|
||||
if (logger.is_enabled(logging::log_level::debug)) {
|
||||
logger.debug("{}ing from {} ranges {}", _description, source, ranges);
|
||||
}
|
||||
_stream_plan.request_ranges(source, keyspace, ranges);
|
||||
_stream_plan.request_ranges(source, preferred, keyspace, ranges);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -107,7 +107,7 @@ public:
|
||||
, _tokens(std::move(tokens))
|
||||
, _address(address)
|
||||
, _description(std::move(description))
|
||||
, _stream_plan(_description) {
|
||||
, _stream_plan(_description, true) {
|
||||
}
|
||||
|
||||
range_streamer(distributed<database>& db, token_metadata& tm, inet_address address, sstring description)
|
||||
|
||||
36
dist/ami/build_ami.sh
vendored
36
dist/ami/build_ami.sh
vendored
@@ -5,22 +5,15 @@ if [ ! -e dist/ami/build_ami.sh ]; then
|
||||
exit 1
|
||||
fi
|
||||
|
||||
print_usage() {
|
||||
echo "build_ami.sh -l"
|
||||
echo " -l deploy locally built rpms"
|
||||
TARGET_JSON=scylla.json
|
||||
if [ "$1" != "" ]; then
|
||||
TARGET_JSON=$1
|
||||
fi
|
||||
|
||||
if [ ! -f dist/ami/$TARGET_JSON ]; then
|
||||
echo "dist/ami/$TARGET_JSON does not found"
|
||||
exit 1
|
||||
}
|
||||
LOCALRPM=0
|
||||
while getopts lh OPT; do
|
||||
case "$OPT" in
|
||||
"l")
|
||||
LOCALRPM=1
|
||||
;;
|
||||
"h")
|
||||
print_usage
|
||||
;;
|
||||
esac
|
||||
done
|
||||
fi
|
||||
|
||||
cd dist/ami
|
||||
|
||||
@@ -30,20 +23,11 @@ if [ ! -f variables.json ]; then
|
||||
fi
|
||||
|
||||
if [ ! -d packer ]; then
|
||||
wget https://releases.hashicorp.com/packer/0.8.6/packer_0.8.6_linux_amd64.zip
|
||||
wget https://dl.bintray.com/mitchellh/packer/packer_0.8.6_linux_amd64.zip
|
||||
mkdir packer
|
||||
cd packer
|
||||
unzip -x ../packer_0.8.6_linux_amd64.zip
|
||||
cd -
|
||||
fi
|
||||
|
||||
echo "sudo yum remove -y abrt" > scylla_deploy.sh
|
||||
if [ $LOCALRPM = 0 ]; then
|
||||
echo "sudo sh -x -e /home/centos/scylla_install_pkg" >> scylla_deploy.sh
|
||||
else
|
||||
echo "sudo sh -x -e /home/centos/scylla_install_pkg -l /home/centos" >> scylla_deploy.sh
|
||||
fi
|
||||
echo "sudo sh -x -e /usr/lib/scylla/scylla_setup -a" >> scylla_deploy.sh
|
||||
|
||||
chmod a+rx scylla_deploy.sh
|
||||
packer/packer build -var-file=variables.json scylla.json
|
||||
packer/packer build -var-file=variables.json $TARGET_JSON
|
||||
|
||||
17
dist/ami/build_ami_local.sh
vendored
17
dist/ami/build_ami_local.sh
vendored
@@ -5,27 +5,26 @@ if [ ! -e dist/ami/build_ami_local.sh ]; then
|
||||
exit 1
|
||||
fi
|
||||
|
||||
rm -rf build/*
|
||||
sudo yum -y install git
|
||||
if [ ! -f dist/ami/files/scylla-server.x86_64.rpm ]; then
|
||||
if [ ! -f dist/ami/scylla-server.x86_64.rpm ]; then
|
||||
dist/redhat/build_rpm.sh
|
||||
cp build/rpmbuild/RPMS/x86_64/scylla-server-`cat build/SCYLLA-VERSION-FILE`-`cat build/SCYLLA-RELEASE-FILE`.*.x86_64.rpm dist/ami/files/scylla-server.x86_64.rpm
|
||||
cp build/rpms/scylla-server-`cat build/SCYLLA-VERSION-FILE`-`cat build/SCYLLA-RELEASE-FILE`.*.x86_64.rpm dist/ami/scylla-server.x86_64.rpm
|
||||
fi
|
||||
if [ ! -f dist/ami/files/scylla-jmx.noarch.rpm ]; then
|
||||
if [ ! -f dist/ami/scylla-jmx.noarch.rpm ]; then
|
||||
cd build
|
||||
git clone --depth 1 https://github.com/scylladb/scylla-jmx.git
|
||||
cd scylla-jmx
|
||||
sh -x -e dist/redhat/build_rpm.sh $*
|
||||
sh -x -e dist/redhat/build_rpm.sh
|
||||
cd ../..
|
||||
cp build/scylla-jmx/build/rpmbuild/RPMS/noarch/scylla-jmx-`cat build/scylla-jmx/build/SCYLLA-VERSION-FILE`-`cat build/scylla-jmx/build/SCYLLA-RELEASE-FILE`.*.noarch.rpm dist/ami/files/scylla-jmx.noarch.rpm
|
||||
cp build/scylla-jmx/build/rpms/scylla-jmx-`cat build/scylla-jmx/build/SCYLLA-VERSION-FILE`-`cat build/scylla-jmx/build/SCYLLA-RELEASE-FILE`.*.noarch.rpm dist/ami/scylla-jmx.noarch.rpm
|
||||
fi
|
||||
if [ ! -f dist/ami/files/scylla-tools.noarch.rpm ]; then
|
||||
if [ ! -f dist/ami/scylla-tools.noarch.rpm ]; then
|
||||
cd build
|
||||
git clone --depth 1 https://github.com/scylladb/scylla-tools-java.git
|
||||
cd scylla-tools-java
|
||||
sh -x -e dist/redhat/build_rpm.sh
|
||||
cd ../..
|
||||
cp build/scylla-tools-java/build/rpmbuild/RPMS/noarch/scylla-tools-`cat build/scylla-tools-java/build/SCYLLA-VERSION-FILE`-`cat build/scylla-tools-java/build/SCYLLA-RELEASE-FILE`.*.noarch.rpm dist/ami/files/scylla-tools.noarch.rpm
|
||||
cp build/scylla-tools-java/build/rpms/scylla-tools-`cat build/scylla-tools-java/build/SCYLLA-VERSION-FILE`-`cat build/scylla-tools-java/build/SCYLLA-RELEASE-FILE`.*.noarch.rpm dist/ami/scylla-tools.noarch.rpm
|
||||
fi
|
||||
|
||||
exec dist/ami/build_ami.sh -l
|
||||
exec dist/ami/build_ami.sh scylla_local.json
|
||||
|
||||
4
dist/ami/files/.bash_profile
vendored
4
dist/ami/files/.bash_profile
vendored
@@ -23,7 +23,7 @@ echo ' |___/ '
|
||||
echo ''
|
||||
echo ''
|
||||
echo 'Nodetool:'
|
||||
echo ' nodetool help'
|
||||
echo ' nodetool --help'
|
||||
echo 'CQL Shell:'
|
||||
echo ' cqlsh'
|
||||
echo 'More documentation available at: '
|
||||
@@ -35,7 +35,6 @@ if [ "`systemctl is-active scylla-server`" = "active" ]; then
|
||||
tput bold
|
||||
echo " ScyllaDB is active."
|
||||
tput sgr0
|
||||
echo
|
||||
else
|
||||
tput setaf 1
|
||||
tput bold
|
||||
@@ -43,5 +42,4 @@ else
|
||||
tput sgr0
|
||||
echo "Please wait for startup. To see status of ScyllaDB, run "
|
||||
echo " 'systemctl status scylla-server'"
|
||||
echo
|
||||
fi
|
||||
|
||||
2
dist/ami/files/scylla-ami
vendored
2
dist/ami/files/scylla-ami
vendored
Submodule dist/ami/files/scylla-ami updated: b3b85befbb...07b71180d3
66
dist/ami/scylla.json
vendored
66
dist/ami/scylla.json
vendored
@@ -8,70 +8,34 @@
|
||||
"security_group_id": "{{user `security_group_id`}}",
|
||||
"region": "{{user `region`}}",
|
||||
"associate_public_ip_address": "{{user `associate_public_ip_address`}}",
|
||||
"source_ami": "ami-f3102499",
|
||||
"user_data_file": "user_data.txt",
|
||||
"source_ami": "ami-a51564c0",
|
||||
"instance_type": "{{user `instance_type`}}",
|
||||
"ssh_username": "centos",
|
||||
"ssh_username": "fedora",
|
||||
"ssh_timeout": "5m",
|
||||
"ami_name": "scylla_{{isotime | clean_ami_name}}",
|
||||
"enhanced_networking": true,
|
||||
"launch_block_device_mappings": [
|
||||
{
|
||||
"device_name": "/dev/sda1",
|
||||
"volume_size": 10,
|
||||
"delete_on_termination": true
|
||||
}
|
||||
],
|
||||
"ami_block_device_mappings": [
|
||||
{
|
||||
"device_name": "/dev/sdb",
|
||||
"virtual_name": "ephemeral0"
|
||||
},
|
||||
{
|
||||
"device_name": "/dev/sdc",
|
||||
"virtual_name": "ephemeral1"
|
||||
},
|
||||
{
|
||||
"device_name": "/dev/sdd",
|
||||
"virtual_name": "ephemeral2"
|
||||
},
|
||||
{
|
||||
"device_name": "/dev/sde",
|
||||
"virtual_name": "ephemeral3"
|
||||
},
|
||||
{
|
||||
"device_name": "/dev/sdf",
|
||||
"virtual_name": "ephemeral4"
|
||||
},
|
||||
{
|
||||
"device_name": "/dev/sdg",
|
||||
"virtual_name": "ephemeral5"
|
||||
},
|
||||
{
|
||||
"device_name": "/dev/sdh",
|
||||
"virtual_name": "ephemeral6"
|
||||
},
|
||||
{
|
||||
"device_name": "/dev/sdi",
|
||||
"virtual_name": "ephemeral7"
|
||||
}
|
||||
]
|
||||
"ami_name": "scylla_{{isotime | clean_ami_name}}"
|
||||
}
|
||||
],
|
||||
"provisioners": [
|
||||
{
|
||||
"type": "file",
|
||||
"source": "files/",
|
||||
"destination": "/home/centos/"
|
||||
"source": "files/scylla-ami",
|
||||
"destination": "/home/fedora/scylla-ami"
|
||||
},
|
||||
{
|
||||
"type": "file",
|
||||
"source": "../../scripts/scylla_install_pkg",
|
||||
"destination": "/home/centos/scylla_install_pkg"
|
||||
"source": "files/.bash_profile",
|
||||
"destination": "/home/fedora/.bash_profile"
|
||||
},
|
||||
{
|
||||
"type": "file",
|
||||
"source": "../../scripts/scylla_install",
|
||||
"destination": "/home/fedora/scylla_install"
|
||||
},
|
||||
{
|
||||
"type": "shell",
|
||||
"script": "scylla_deploy.sh"
|
||||
"inline": [
|
||||
"sudo sh -x -e /home/fedora/scylla_install -a"
|
||||
]
|
||||
}
|
||||
],
|
||||
"variables": {
|
||||
|
||||
67
dist/ami/scylla_local.json
vendored
Normal file
67
dist/ami/scylla_local.json
vendored
Normal file
@@ -0,0 +1,67 @@
|
||||
{
|
||||
"builders": [
|
||||
{
|
||||
"type": "amazon-ebs",
|
||||
"access_key": "{{user `access_key`}}",
|
||||
"secret_key": "{{user `secret_key`}}",
|
||||
"subnet_id": "{{user `subnet_id`}}",
|
||||
"security_group_id": "{{user `security_group_id`}}",
|
||||
"region": "{{user `region`}}",
|
||||
"associate_public_ip_address": "{{user `associate_public_ip_address`}}",
|
||||
"source_ami": "ami-a51564c0",
|
||||
"instance_type": "{{user `instance_type`}}",
|
||||
"ssh_username": "fedora",
|
||||
"ssh_timeout": "5m",
|
||||
"ami_name": "scylla_{{isotime | clean_ami_name}}"
|
||||
}
|
||||
],
|
||||
"provisioners": [
|
||||
{
|
||||
"type": "file",
|
||||
"source": "files/scylla-ami",
|
||||
"destination": "/home/fedora/scylla-ami"
|
||||
},
|
||||
{
|
||||
"type": "file",
|
||||
"source": "files/.bash_profile",
|
||||
"destination": "/home/fedora/.bash_profile"
|
||||
},
|
||||
{
|
||||
"type": "file",
|
||||
"source": "../../scripts/scylla_install",
|
||||
"destination": "/home/fedora/scylla_install"
|
||||
},
|
||||
{
|
||||
"type": "file",
|
||||
"source": "scylla-server.x86_64.rpm",
|
||||
"destination": "/home/fedora/scylla-server.x86_64.rpm"
|
||||
},
|
||||
{
|
||||
"type": "file",
|
||||
"source": "scylla-jmx.noarch.rpm",
|
||||
"destination": "/home/fedora/scylla-jmx.noarch.rpm"
|
||||
},
|
||||
{
|
||||
"type": "file",
|
||||
"source": "scylla-tools.noarch.rpm",
|
||||
"destination": "/home/fedora/scylla-tools.noarch.rpm"
|
||||
},
|
||||
{
|
||||
"type": "shell",
|
||||
"inline": [
|
||||
"sudo yum install -y /home/fedora/scylla-server.x86_64.rpm /home/fedora/scylla-jmx.noarch.rpm /home/fedora/scylla-tools.noarch.rpm",
|
||||
"sudo mv /home/fedora/scylla-ami /usr/lib/scylla/scylla-ami",
|
||||
"sudo sh -x -e /home/fedora/scylla_install -a -l /home/fedora"
|
||||
]
|
||||
}
|
||||
],
|
||||
"variables": {
|
||||
"access_key": "",
|
||||
"secret_key": "",
|
||||
"subnet_id": "",
|
||||
"security_group_id": "",
|
||||
"region": "",
|
||||
"associate_public_ip_address": "",
|
||||
"instance_type": ""
|
||||
}
|
||||
}
|
||||
2
dist/ami/user_data.txt
vendored
2
dist/ami/user_data.txt
vendored
@@ -1,2 +0,0 @@
|
||||
#!/bin/sh
|
||||
sed -i 's/Defaults requiretty/#Defaults requiretty/g' /etc/sudoers
|
||||
55
dist/common/scripts/scylla_bootparam_setup
vendored
55
dist/common/scripts/scylla_bootparam_setup
vendored
@@ -2,22 +2,47 @@
|
||||
#
|
||||
# Copyright (C) 2015 ScyllaDB
|
||||
|
||||
print_usage() {
|
||||
echo "scylla_bootparam_setup -a"
|
||||
echo " -a AMI instance mode"
|
||||
exit 1
|
||||
}
|
||||
|
||||
AMI=0
|
||||
while getopts a OPT; do
|
||||
case "$OPT" in
|
||||
"a")
|
||||
AMI=1
|
||||
;;
|
||||
"h")
|
||||
print_usage
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
. /etc/os-release
|
||||
|
||||
if [ ! -f /etc/default/grub ]; then
|
||||
echo "Unsupported bootloader"
|
||||
exit 1
|
||||
fi
|
||||
if [ "`grep hugepagesz /etc/default/grub`" != "" ] || [ "`grep hugepages /etc/default/grub`" != "" ]; then
|
||||
sed -e "s#hugepagesz=2M ##" /etc/default/grub > /tmp/grub
|
||||
mv /tmp/grub /etc/default/grub
|
||||
sed -e "s#hugepages=[0-9]* ##" /etc/default/grub > /tmp/grub
|
||||
mv /tmp/grub /etc/default/grub
|
||||
fi
|
||||
sed -e "s#^GRUB_CMDLINE_LINUX=\"#GRUB_CMDLINE_LINUX=\"hugepagesz=2M hugepages=$NR_HUGEPAGES #" /etc/default/grub > /tmp/grub
|
||||
mv /tmp/grub /etc/default/grub
|
||||
if [ "$ID" = "ubuntu" ]; then
|
||||
grub-mkconfig -o /boot/grub/grub.cfg
|
||||
if [ $AMI -eq 1 ]; then
|
||||
. /etc/sysconfig/scylla-server
|
||||
sed -e "s#append #append clocksource=tsc tsc=reliable hugepagesz=2M hugepages=$NR_HUGEPAGES #" /boot/extlinux/extlinux.conf > /tmp/extlinux.conf
|
||||
mv /tmp/extlinux.conf /boot/extlinux/extlinux.conf
|
||||
else
|
||||
grub2-mkconfig -o /boot/grub2/grub.cfg
|
||||
. /etc/sysconfig/scylla-server
|
||||
if [ ! -f /etc/default/grub ]; then
|
||||
echo "Unsupported bootloader"
|
||||
exit 1
|
||||
fi
|
||||
if [ "`grep hugepagesz /etc/default/grub`" != "" ] || [ "`grep hugepages /etc/default/grub`" != "" ]; then
|
||||
sed -e "s#hugepagesz=2M ##" /etc/default/grub > /tmp/grub
|
||||
mv /tmp/grub /etc/default/grub
|
||||
sed -e "s#hugepages=[0-9]* ##" /etc/default/grub > /tmp/grub
|
||||
mv /tmp/grub /etc/default/grub
|
||||
fi
|
||||
sed -e "s#^GRUB_CMDLINE_LINUX=\"#GRUB_CMDLINE_LINUX=\"hugepagesz=2M hugepages=$NR_HUGEPAGES #" /etc/default/grub > /tmp/grub
|
||||
mv /tmp/grub /etc/default/grub
|
||||
if [ "$ID" = "ubuntu" ]; then
|
||||
grub2-mkconfig -o /boot/grub/grub.cfg
|
||||
else
|
||||
grub2-mkconfig -o /boot/grub2/grub.cfg
|
||||
fi
|
||||
fi
|
||||
|
||||
38
dist/common/scripts/scylla_coredump_setup
vendored
38
dist/common/scripts/scylla_coredump_setup
vendored
@@ -2,43 +2,15 @@
|
||||
#
|
||||
# Copyright (C) 2015 ScyllaDB
|
||||
|
||||
print_usage() {
|
||||
echo "scylla_coredump_setup -s"
|
||||
echo " -s store coredump to /var/lib/scylla"
|
||||
exit 1
|
||||
}
|
||||
|
||||
SYMLINK=0
|
||||
while getopts sh OPT; do
|
||||
case "$OPT" in
|
||||
"s")
|
||||
SYMLINK=1
|
||||
;;
|
||||
"h")
|
||||
print_usage
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
|
||||
. /etc/os-release
|
||||
|
||||
if [ "$ID" = "ubuntu" ]; then
|
||||
apt-get remove -y apport-noui
|
||||
sysctl -p /etc/sysctl.d/99-scylla.conf
|
||||
else
|
||||
cat << EOS > /etc/systemd/coredump.conf
|
||||
[Coredump]
|
||||
Storage=external
|
||||
Compress=yes
|
||||
ProcessSizeMax=1024G
|
||||
ExternalSizeMax=1024G
|
||||
EOS
|
||||
if [ $SYMLINK = 1 ]; then
|
||||
rm -rf /var/lib/systemd/coredump
|
||||
ln -sf /var/lib/scylla/coredump /var/lib/systemd/coredump
|
||||
if [ -f /etc/systemd/coredump.conf ]; then
|
||||
mv /etc/systemd/coredump.conf /etc/systemd/coredump.conf.save
|
||||
systemctl daemon-reload
|
||||
fi
|
||||
systemctl daemon-reload
|
||||
echo "kernel.core_pattern=|/usr/lib/systemd/systemd-coredump %p %u %g %s %t %e" > /etc/sysctl.d/99-coredump.conf
|
||||
sysctl -p /etc/sysctl.d/99-coredump.conf
|
||||
fi
|
||||
|
||||
sysctl -p /etc/sysctl.d/99-scylla.conf
|
||||
|
||||
7
dist/common/scripts/scylla_ntp_setup
vendored
7
dist/common/scripts/scylla_ntp_setup
vendored
@@ -29,13 +29,10 @@ if [ "$NAME" = "Ubuntu" ]; then
|
||||
else
|
||||
yum install -y ntp ntpdate || true
|
||||
if [ $AMI -eq 1 ]; then
|
||||
sed -e s#centos.pool.ntp.org#amazon.pool.ntp.org# /etc/ntp.conf > /tmp/ntp.conf
|
||||
sed -e s#fedora.pool.ntp.org#amazon.pool.ntp.org# /etc/ntp.conf > /tmp/ntp.conf
|
||||
mv /tmp/ntp.conf /etc/ntp.conf
|
||||
fi
|
||||
if [ "`systemctl is-active ntpd`" = "active" ]; then
|
||||
systemctl stop ntpd.service
|
||||
fi
|
||||
ntpdate `cat /etc/ntp.conf |grep "^server"|head -n1|awk '{print $2}'`
|
||||
systemctl enable ntpd.service
|
||||
ntpdate `cat /etc/ntp.conf |grep "^server"|head -n1|awk '{print $2}'`
|
||||
systemctl start ntpd.service
|
||||
fi
|
||||
|
||||
39
dist/common/scripts/scylla_prepare
vendored
39
dist/common/scripts/scylla_prepare
vendored
@@ -1,8 +1,41 @@
|
||||
#!/bin/sh -e
|
||||
|
||||
if [ "$AMI" = "yes" ] && [ -f /etc/scylla/ami_disabled ]; then
|
||||
rm /etc/scylla/ami_disabled
|
||||
exit 1
|
||||
if [ "$AMI" = "yes" ]; then
|
||||
RAIDCNT=`grep xvdb /proc/mdstat | wc -l`
|
||||
RAIDDEV=`grep xvdb /proc/mdstat | awk '{print $1}'`
|
||||
|
||||
if [ $RAIDCNT -ge 1 ]; then
|
||||
echo "RAID already constructed."
|
||||
if [ "`mount|grep /var/lib/scylla`" = "" ]; then
|
||||
mount -o noatime /dev/$RAIDDEV /var/lib/scylla
|
||||
fi
|
||||
else
|
||||
echo "RAID does not constructed, going to initialize..."
|
||||
|
||||
if [ "$AMI_KEEP_VERSION" != "yes" ]; then
|
||||
yum update -y
|
||||
fi
|
||||
|
||||
DISKS=""
|
||||
for i in /dev/xvd{b..z}; do
|
||||
if [ -b $i ];then
|
||||
echo "Found disk $i"
|
||||
if [ "$DISKS" = "" ]; then
|
||||
DISKS=$i
|
||||
else
|
||||
DISKS="$DISKS,$i"
|
||||
fi
|
||||
fi
|
||||
done
|
||||
|
||||
if [ "$DISKS" != "" ]; then
|
||||
/usr/lib/scylla/scylla_raid_setup -d $DISKS
|
||||
else
|
||||
echo "WARN: Scylla is not using XFS to store data. Perforamnce will suffer." > /home/fedora/WARN_PLEASE_READ.TXT
|
||||
fi
|
||||
|
||||
/usr/lib/scylla/scylla-ami/ds2_configure.py
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ "$NETWORK_MODE" = "virtio" ]; then
|
||||
|
||||
7
dist/common/scripts/scylla_raid_setup
vendored
7
dist/common/scripts/scylla_raid_setup
vendored
@@ -43,13 +43,6 @@ if [ "`mount|grep /var/lib/scylla`" != "" ]; then
|
||||
echo "/var/lib/scylla is already mounted"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
. /etc/os-release
|
||||
if [ "$NAME" = "Ubuntu" ]; then
|
||||
apt-get -y install mdadm xfsprogs
|
||||
else
|
||||
yum -y install mdadm xfsprogs
|
||||
fi
|
||||
mdadm --create --verbose --force --run $RAID --level=0 -c256 --raid-devices=$NR_DISK $DISKS
|
||||
blockdev --setra 65536 $RAID
|
||||
mkfs.xfs $RAID -f
|
||||
|
||||
@@ -7,4 +7,4 @@ TIME=`date --date @$2 +%F-%T`
|
||||
PID=$3
|
||||
|
||||
mkdir -p /var/lib/scylla/coredump
|
||||
cat - > /var/lib/scylla/coredump/core.$FILE-$TIME-$PID
|
||||
/usr/bin/gzip -c > /var/lib/scylla/coredump/core.$FILE-$TIME-$PID.gz
|
||||
72
dist/common/scripts/scylla_setup
vendored
72
dist/common/scripts/scylla_setup
vendored
@@ -1,72 +0,0 @@
|
||||
#!/bin/sh -e
|
||||
#
|
||||
# Copyright (C) 2015 ScyllaDB
|
||||
|
||||
if [ "`id -u`" -ne 0 ]; then
|
||||
echo "Requires root permission."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
print_usage() {
|
||||
echo "scylla_setup -d /dev/hda,/dev/hdb... -n eth0 -a"
|
||||
echo " -d specify disks for RAID"
|
||||
echo " -n specify NIC"
|
||||
echo " -a setup AMI instance"
|
||||
exit 1
|
||||
}
|
||||
|
||||
NIC=eth0
|
||||
RAID=/dev/md0
|
||||
AMI=0
|
||||
while getopts d:n:al:h OPT; do
|
||||
case "$OPT" in
|
||||
"n")
|
||||
NIC=$OPTARG
|
||||
;;
|
||||
"d")
|
||||
DISKS=$OPTARG
|
||||
;;
|
||||
"a")
|
||||
AMI=1
|
||||
;;
|
||||
"h")
|
||||
print_usage
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
. /etc/os-release
|
||||
|
||||
if [ "$ID" != "ubuntu" ]; then
|
||||
if [ "`sestatus | awk '{print $3}'`" != "disabled" ]; then
|
||||
setenforce 0
|
||||
sed -e "s/enforcing/disabled/" /etc/sysconfig/selinux > /tmp/selinux
|
||||
mv /tmp/selinux /etc/sysconfig/
|
||||
fi
|
||||
if [ $AMI -eq 1 ]; then
|
||||
grep -v ' - mounts' /etc/cloud/cloud.cfg > /tmp/cloud.cfg
|
||||
mv /tmp/cloud.cfg /etc/cloud/cloud.cfg
|
||||
mv /home/centos/scylla-ami/scylla-ami-setup.service /usr/lib/systemd/system/
|
||||
mv /home/centos/scylla-ami /usr/lib/scylla/scylla-ami
|
||||
chmod a+rx /usr/lib/scylla/scylla-ami/ds2_configure.py
|
||||
systemctl daemon-reload
|
||||
systemctl enable scylla-ami-setup.service
|
||||
fi
|
||||
systemctl enable scylla-server.service
|
||||
systemctl enable scylla-jmx.service
|
||||
fi
|
||||
if [ $AMI -eq 0 ]; then
|
||||
/usr/lib/scylla/scylla_ntp_setup
|
||||
/usr/lib/scylla/scylla_bootparam_setup
|
||||
if [ $DISKS != "" ]; then
|
||||
/usr/lib/scylla/scylla_raid_setup -d $DISKS -u
|
||||
/usr/lib/scylla/scylla_coredump_setup -s
|
||||
else
|
||||
/usr/lib/scylla/scylla_coredump_setup
|
||||
fi
|
||||
else
|
||||
/usr/lib/scylla/scylla_coredump_setup -s
|
||||
/usr/lib/scylla/scylla_ntp_setup -a
|
||||
/usr/lib/scylla/scylla_bootparam_setup -a
|
||||
/usr/lib/scylla/scylla_sysconfig_setup -n $NIC
|
||||
fi
|
||||
56
dist/common/scripts/scylla_sysconfig_setup
vendored
56
dist/common/scripts/scylla_sysconfig_setup
vendored
@@ -3,17 +3,17 @@
|
||||
# Copyright (C) 2015 ScyllaDB
|
||||
|
||||
print_usage() {
|
||||
echo "scylla-sysconfig-setup -n eth0 -m posix -p 64 -u scylla -g scylla -r /var/lib/scylla -c /etc/scylla -N -a -k"
|
||||
echo "scylla-sysconfig-setup -n eth0 -m posix -p 64 -u scylla -g scylla -d /var/lib/scylla -c /etc/scylla -N -a -k"
|
||||
echo " -n specify NIC"
|
||||
echo " -m network mode (posix, dpdk)"
|
||||
echo " -p number of hugepages"
|
||||
echo " -u user (dpdk requires root)"
|
||||
echo " -g group (dpdk requires root)"
|
||||
echo " -r scylla home directory"
|
||||
echo " -d scylla home directory"
|
||||
echo " -c scylla config directory"
|
||||
echo " -N setup NIC's interrupts, RPS, XPS"
|
||||
echo " -a AMI instance mode"
|
||||
echo " -d disk count"
|
||||
echo " -k keep package version on AMI"
|
||||
exit 1
|
||||
}
|
||||
|
||||
@@ -23,9 +23,19 @@ if [ "$ID" = "ubuntu" ]; then
|
||||
else
|
||||
SYSCONFIG=/etc/sysconfig
|
||||
fi
|
||||
. $SYSCONFIG/scylla-server
|
||||
|
||||
DISK_COUNT=0
|
||||
NIC=eth0
|
||||
NETWORK_MODE=posix
|
||||
NR_HUGEPAGES=64
|
||||
USER=scylla
|
||||
GROUP=scylla
|
||||
SCYLLA_HOME=/var/lib/scylla
|
||||
SCYLLA_CONF=/etc/scylla
|
||||
SETUP_NIC=0
|
||||
SET_NIC="no"
|
||||
AMI=no
|
||||
AMI_KEEP_VERSION=no
|
||||
SCYLLA_ARGS=
|
||||
while getopts n:m:p:u:g:d:c:Nakh OPT; do
|
||||
case "$OPT" in
|
||||
"n")
|
||||
@@ -43,7 +53,7 @@ while getopts n:m:p:u:g:d:c:Nakh OPT; do
|
||||
"g")
|
||||
GROUP=$OPTARG
|
||||
;;
|
||||
"r")
|
||||
"d")
|
||||
SCYLLA_HOME=$OPTARG
|
||||
;;
|
||||
"c")
|
||||
@@ -55,8 +65,8 @@ while getopts n:m:p:u:g:d:c:Nakh OPT; do
|
||||
"a")
|
||||
AMI=yes
|
||||
;;
|
||||
"d")
|
||||
DISK_COUNT=$OPTARG
|
||||
"k")
|
||||
AMI_KEEP_VERSION=yes
|
||||
;;
|
||||
"h")
|
||||
print_usage
|
||||
@@ -69,29 +79,11 @@ echo Setting parameters on $SYSCONFIG/scylla-server
|
||||
ETHDRV=`/usr/lib/scylla/dpdk_nic_bind.py --status | grep if=$NIC | sed -e "s/^.*drv=//" -e "s/ .*$//"`
|
||||
ETHPCIID=`/usr/lib/scylla/dpdk_nic_bind.py --status | grep if=$NIC | awk '{print $1}'`
|
||||
NR_CPU=`cat /proc/cpuinfo |grep processor|wc -l`
|
||||
NR_SHARDS=$NR_CPU
|
||||
if [ $NR_CPU -ge 8 ] && [ "$SET_NIC" = "no" ]; then
|
||||
NR_SHARDS=$((NR_CPU - 1))
|
||||
if [ $NR_CPU -ge 8 ]; then
|
||||
NR=$((NR_CPU - 1))
|
||||
SET_NIC="yes"
|
||||
SCYLLA_ARGS="$SCYLLA_ARGS --cpuset 1-$NR_SHARDS --smp $NR_SHARDS"
|
||||
SCYLLA_ARGS="--cpuset 1-$NR --smp $NR"
|
||||
fi
|
||||
|
||||
if [ "$AMI" = "yes" ] && [ $DISK_COUNT -gt 0 ]; then
|
||||
NR_DISKS=$DISK_COUNT
|
||||
if [ $NR_DISKS -lt 2 ]; then NR_DISKS=2; fi
|
||||
|
||||
NR_REQS=$((32 * $NR_DISKS / 2))
|
||||
|
||||
NR_IO_QUEUES=$NR_SHARDS
|
||||
if [ $(($NR_REQS/$NR_IO_QUEUES)) -lt 4 ]; then
|
||||
NR_IO_QUEUES=$(($NR_REQS / 4))
|
||||
fi
|
||||
|
||||
NR_REQS=$(($(($NR_REQS / $NR_IO_QUEUES)) * $NR_IO_QUEUES))
|
||||
|
||||
SCYLLA_IO="$SCYLLA_IO --num-io-queues $NR_IO_QUEUES --max-io-requests $NR_REQS"
|
||||
fi
|
||||
|
||||
sed -e s#^NETWORK_MODE=.*#NETWORK_MODE=$NETWORK_MODE# \
|
||||
-e s#^ETHDRV=.*#ETHDRV=$ETHDRV# \
|
||||
-e s#^ETHPCIID=.*#ETHPCIID=$ETHPCIID# \
|
||||
@@ -101,8 +93,8 @@ sed -e s#^NETWORK_MODE=.*#NETWORK_MODE=$NETWORK_MODE# \
|
||||
-e s#^SCYLLA_HOME=.*#SCYLLA_HOME=$SCYLLA_HOME# \
|
||||
-e s#^SCYLLA_CONF=.*#SCYLLA_CONF=$SCYLLA_CONF# \
|
||||
-e s#^SET_NIC=.*#SET_NIC=$SET_NIC# \
|
||||
-e "s#^SCYLLA_ARGS=.*#SCYLLA_ARGS=\"$SCYLLA_ARGS\"#" \
|
||||
-e "s#^SCYLLA_IO=.*#SCYLLA_IO=\"$SCYLLA_IO\"#" \
|
||||
-e s#^AMI=.*#AMI=$AMI# \
|
||||
-e s#^SCYLLA_ARGS=.*#SCYLLA_ARGS="$SCYLLA_ARGS"# \
|
||||
-e s#^AMI=.*#AMI="$AMI"# \
|
||||
-e s#^AMI_KEEP_VERSION=.*#AMI_KEEP_VERSION="$AMI_KEEP_VERSION"# \
|
||||
$SYSCONFIG/scylla-server > /tmp/scylla-server
|
||||
mv /tmp/scylla-server $SYSCONFIG/scylla-server
|
||||
|
||||
1
dist/common/sudoers.d/scylla
vendored
1
dist/common/sudoers.d/scylla
vendored
@@ -1 +0,0 @@
|
||||
scylla ALL=(ALL) NOPASSWD:SETENV: /usr/lib/scylla/scylla_prepare,/usr/lib/scylla/scylla_stop
|
||||
13
dist/common/sysconfig/scylla-server
vendored
13
dist/common/sysconfig/scylla-server
vendored
@@ -34,14 +34,11 @@ SCYLLA_HOME=/var/lib/scylla
|
||||
# scylla config dir
|
||||
SCYLLA_CONF=/etc/scylla
|
||||
|
||||
# scylla arguments (for posix mode)
|
||||
SCYLLA_ARGS="--log-to-syslog 1 --log-to-stdout 0 --default-log-level info --collectd-address=127.0.0.1:25826 --collectd=1 --collectd-poll-period 3000 --network-stack posix"
|
||||
|
||||
## scylla arguments (for dpdk mode)
|
||||
#SCYLLA_ARGS="--log-to-syslog 1 --log-to-stdout 0 --default-log-level info --collectd-address=127.0.0.1:25826 --collectd=1 --collectd-poll-period 3000 --network-stack native --dpdk-pmd"
|
||||
|
||||
# scylla io
|
||||
SCYLLA_IO=
|
||||
# additional arguments
|
||||
SCYLLA_ARGS=""
|
||||
|
||||
# setup as AMI instance
|
||||
AMI=no
|
||||
|
||||
# do not upgrade Scylla packages on AMI startup
|
||||
AMI_KEEP_VERSION=no
|
||||
|
||||
10
dist/docker/Dockerfile
vendored
10
dist/docker/Dockerfile
vendored
@@ -1,13 +1,11 @@
|
||||
FROM centos:7
|
||||
FROM fedora:22
|
||||
|
||||
MAINTAINER Avi Kivity <avi@cloudius-systems.com>
|
||||
|
||||
RUN yum -y install epel-release
|
||||
ADD scylla.repo /etc/yum.repos.d/
|
||||
RUN yum -y update
|
||||
RUN yum -y remove boost-thread boost-system
|
||||
RUN yum -y install scylla-server hostname
|
||||
RUN yum clean all
|
||||
RUN dnf -y update
|
||||
RUN dnf -y install scylla-server hostname
|
||||
RUN dnf clean all
|
||||
ADD start-scylla /start-scylla
|
||||
RUN chown scylla /start-scylla
|
||||
|
||||
|
||||
20
dist/docker/scylla.repo
vendored
20
dist/docker/scylla.repo
vendored
@@ -1,23 +1,11 @@
|
||||
[scylla]
|
||||
name=Scylla for Centos $releasever - $basearch
|
||||
baseurl=https://s3.amazonaws.com/downloads.scylladb.com/rpm/centos/$releasever/$basearch/
|
||||
name=Scylla for Fedora $releasever - $basearch
|
||||
baseurl=https://s3.amazonaws.com/downloads.scylladb.com/rpm/fedora/$releasever/$basearch/
|
||||
enabled=1
|
||||
gpgcheck=0
|
||||
|
||||
[scylla-generic]
|
||||
name=Scylla for centos $releasever
|
||||
baseurl=https://s3.amazonaws.com/downloads.scylladb.com/rpm/centos/$releasever/noarch/
|
||||
enabled=1
|
||||
gpgcheck=0
|
||||
|
||||
[scylla-3rdparty]
|
||||
name=Scylla 3rdParty for Centos $releasever - $basearch
|
||||
baseurl=https://s3.amazonaws.com/downloads.scylladb.com/rpm/3rdparty/centos/$releasever/$basearch/
|
||||
enabled=1
|
||||
gpgcheck=0
|
||||
|
||||
[scylla-3rdparty-generic]
|
||||
name=Scylla 3rdParty for Centos $releasever
|
||||
baseurl=https://s3.amazonaws.com/downloads.scylladb.com/rpm/3rdparty/centos/$releasever/noarch/
|
||||
name=Scylla for Fedora $releasever
|
||||
baseurl=https://s3.amazonaws.com/downloads.scylladb.com/rpm/fedora/$releasever/noarch/
|
||||
enabled=1
|
||||
gpgcheck=0
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user