Files
scylladb/db/system_keyspace.cc

831 lines
31 KiB
C++

/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* Modified by Cloudius Systems
* Copyright 2015 Cloudius Systems
*/
#include <boost/range/algorithm_ext/push_back.hpp>
#include <boost/range/adaptor/transformed.hpp>
#include "system_keyspace.hh"
#include "types.hh"
#include "service/storage_service.hh"
#include "service/storage_proxy.hh"
#include "service/client_state.hh"
#include "service/query_state.hh"
#include "cql3/query_options.hh"
#include "cql3/query_processor.hh"
#include "utils/fb_utilities.hh"
#include "dht/i_partitioner.hh"
#include "version.hh"
#include "thrift/server.hh"
#include "exceptions/exceptions.hh"
#include "cql3/query_processor.hh"
#include "db/serializer.hh"
#include "query_context.hh"
#include "partition_slice_builder.hh"
#include "db/config.hh"
#include "schema_builder.hh"
using days = std::chrono::duration<int, std::ratio<24 * 3600>>;
namespace db {
std::unique_ptr<query_context> qctx = {};
namespace system_keyspace {
// Currently, the type variables (uuid_type, etc.) are thread-local reference-
// counted shared pointers. This forces us to also make the built in schemas
// below thread-local as well.
// We return schema_ptr, not schema&, because that's the "tradition" in our
// other code.
// We hide the thread_local variable inside a function, because if we later
// we remove the thread_local, we'll start having initialization order
// problems (we need the type variables to be constructed first), and using
// functions will solve this problem. So we use functions right now.
schema_ptr hints() {
static thread_local auto hints = [] {
schema_builder builder(make_lw_shared(schema(generate_legacy_id(NAME, HINTS), NAME, HINTS,
// partition key
{{"target_id", uuid_type}},
// clustering key
{{"hint_id", timeuuid_type}, {"message_version", int32_type}},
// regular columns
{{"mutation", bytes_type}},
// static columns
{},
// regular column name type
utf8_type,
// comment
"hints awaiting delivery"
)));
builder.set_gc_grace_seconds(0);
builder.set_compaction_strategy_options({{ "enabled", "false" }});
return builder.build(schema_builder::compact_storage::yes);
}();
return hints;
}
schema_ptr batchlog() {
static thread_local auto batchlog = [] {
schema_builder builder(make_lw_shared(schema(generate_legacy_id(NAME, BATCHLOG), NAME, BATCHLOG,
// partition key
{{"id", uuid_type}},
// clustering key
{},
// regular columns
{{"data", bytes_type}, {"version", int32_type}, {"written_at", timestamp_type}},
// static columns
{},
// regular column name type
utf8_type,
// comment
"batches awaiting replay"
// FIXME: the original Java code also had:
// operations on resulting CFMetaData:
// .compactionStrategyOptions(Collections.singletonMap("min_threshold", "2"))
)));
builder.set_gc_grace_seconds(0);
return builder.build(schema_builder::compact_storage::no);
}();
return batchlog;
}
/*static*/ schema_ptr paxos() {
static thread_local auto paxos = [] {
schema_builder builder(make_lw_shared(schema(generate_legacy_id(NAME, PAXOS), NAME, PAXOS,
// partition key
{{"row_key", bytes_type}},
// clustering key
{{"cf_id", uuid_type}},
// regular columns
{{"in_progress_ballot", timeuuid_type}, {"most_recent_commit", bytes_type}, {"most_recent_commit_at", timeuuid_type}, {"proposal", bytes_type}, {"proposal_ballot", timeuuid_type}},
// static columns
{},
// regular column name type
utf8_type,
// comment
"in-progress paxos proposals"
// FIXME: the original Java code also had:
// operations on resulting CFMetaData:
// .compactionStrategyClass(LeveledCompactionStrategy.class);
)));
return builder.build(schema_builder::compact_storage::no);
}();
return paxos;
}
schema_ptr built_indexes() {
static thread_local auto built_indexes = [] {
schema_builder builder(make_lw_shared(schema(generate_legacy_id(NAME, BUILT_INDEXES), NAME, BUILT_INDEXES,
// partition key
{{"table_name", utf8_type}},
// clustering key
{{"index_name", utf8_type}},
// regular columns
{},
// static columns
{},
// regular column name type
utf8_type,
// comment
"built column indexes"
)));
return builder.build(schema_builder::compact_storage::yes);
}();
return built_indexes;
}
/*static*/ schema_ptr local() {
static thread_local auto local = [] {
schema_builder builder(make_lw_shared(schema(generate_legacy_id(NAME, LOCAL), NAME, LOCAL,
// partition key
{{"key", utf8_type}},
// clustering key
{},
// regular columns
{
{"bootstrapped", utf8_type},
{"cluster_name", utf8_type},
{"cql_version", utf8_type},
{"data_center", utf8_type},
{"gossip_generation", int32_type},
{"host_id", uuid_type},
{"native_protocol_version", utf8_type},
{"partitioner", utf8_type},
{"rack", utf8_type},
{"release_version", utf8_type},
{"schema_version", uuid_type},
{"thrift_version", utf8_type},
{"tokens", set_type_impl::get_instance(utf8_type, true)},
{"truncated_at", map_type_impl::get_instance(uuid_type, bytes_type, true)},
// The following 3 columns are only present up until 2.1.8 tables
{"rpc_address", inet_addr_type},
{"broadcast_address", inet_addr_type},
{"listen_address", inet_addr_type},
},
// static columns
{},
// regular column name type
utf8_type,
// comment
"information about the local node"
)));
return builder.build(schema_builder::compact_storage::no);
}();
return local;
}
/*static*/ schema_ptr peers() {
static thread_local auto peers = [] {
schema_builder builder(make_lw_shared(schema(generate_legacy_id(NAME, PEERS), NAME, PEERS,
// partition key
{{"peer", inet_addr_type}},
// clustering key
{},
// regular columns
{
{"data_center", utf8_type},
{"host_id", uuid_type},
{"preferred_ip", inet_addr_type},
{"rack", utf8_type},
{"release_version", utf8_type},
{"rpc_address", inet_addr_type},
{"schema_version", uuid_type},
{"tokens", set_type_impl::get_instance(utf8_type, true)},
},
// static columns
{},
// regular column name type
utf8_type,
// comment
"information about known peers in the cluster"
)));
return builder.build(schema_builder::compact_storage::no);
}();
return peers;
}
/*static*/ schema_ptr peer_events() {
static thread_local auto peer_events = [] {
schema_builder builder(make_lw_shared(schema(generate_legacy_id(NAME, PEER_EVENTS), NAME, PEER_EVENTS,
// partition key
{{"peer", inet_addr_type}},
// clustering key
{},
// regular columns
{
{"hints_dropped", map_type_impl::get_instance(uuid_type, int32_type, true)},
},
// static columns
{},
// regular column name type
utf8_type,
// comment
"events related to peers"
)));
return builder.build(schema_builder::compact_storage::no);
}();
return peer_events;
}
/*static*/ schema_ptr range_xfers() {
static thread_local auto range_xfers = [] {
schema_builder builder(make_lw_shared(schema(generate_legacy_id(NAME, RANGE_XFERS), NAME, RANGE_XFERS,
// partition key
{{"token_bytes", bytes_type}},
// clustering key
{},
// regular columns
{{"requested_at", timestamp_type}},
// static columns
{},
// regular column name type
utf8_type,
// comment
"ranges requested for transfer"
)));
return builder.build(schema_builder::compact_storage::no);
}();
return range_xfers;
}
/*static*/ schema_ptr compactions_in_progress() {
static thread_local auto compactions_in_progress = [] {
schema_builder builder(make_lw_shared(schema(generate_legacy_id(NAME, COMPACTIONS_IN_PROGRESS), NAME, COMPACTIONS_IN_PROGRESS,
// partition key
{{"id", uuid_type}},
// clustering key
{},
// regular columns
{
{"columnfamily_name", utf8_type},
{"inputs", set_type_impl::get_instance(int32_type, true)},
{"keyspace_name", utf8_type},
},
// static columns
{},
// regular column name type
utf8_type,
// comment
"unfinished compactions"
)));
return builder.build(schema_builder::compact_storage::no);
}();
return compactions_in_progress;
}
/*static*/ schema_ptr compaction_history() {
static thread_local auto compaction_history = [] {
schema_builder builder(make_lw_shared(schema(generate_legacy_id(NAME, COMPACTION_HISTORY), NAME, COMPACTION_HISTORY,
// partition key
{{"id", uuid_type}},
// clustering key
{},
// regular columns
{
{"bytes_in", long_type},
{"bytes_out", long_type},
{"columnfamily_name", utf8_type},
{"compacted_at", timestamp_type},
{"keyspace_name", utf8_type},
{"rows_merged", map_type_impl::get_instance(int32_type, long_type, true)},
},
// static columns
{},
// regular column name type
utf8_type,
// comment
"week-long compaction history"
)));
builder.set_default_time_to_live(std::chrono::duration_cast<std::chrono::seconds>(days(7)));
return builder.build(schema_builder::compact_storage::no);
}();
return compaction_history;
}
/*static*/ schema_ptr sstable_activity() {
static thread_local auto sstable_activity = [] {
schema_builder builder(make_lw_shared(schema(generate_legacy_id(NAME, SSTABLE_ACTIVITY), NAME, SSTABLE_ACTIVITY,
// partition key
{
{"keyspace_name", utf8_type},
{"columnfamily_name", utf8_type},
{"generation", int32_type},
},
// clustering key
{},
// regular columns
{
{"rate_120m", double_type},
{"rate_15m", double_type},
},
// static columns
{},
// regular column name type
utf8_type,
// comment
"historic sstable read rates"
)));
return builder.build(schema_builder::compact_storage::no);
}();
return sstable_activity;
}
schema_ptr size_estimates() {
static thread_local auto size_estimates = [] {
schema_builder builder(make_lw_shared(schema(generate_legacy_id(NAME, SIZE_ESTIMATES), NAME, SIZE_ESTIMATES,
// partition key
{{"keyspace_name", utf8_type}},
// clustering key
{{"table_name", utf8_type}, {"range_start", utf8_type}, {"range_end", utf8_type}},
// regular columns
{
{"mean_partition_size", long_type},
{"partitions_count", long_type},
},
// static columns
{},
// regular column name type
utf8_type,
// comment
"per-table primary range size estimates"
)));
builder.set_gc_grace_seconds(0);
return builder.build(schema_builder::compact_storage::no);
}();
return size_estimates;
}
static future<> setup_version() {
sstring req = "INSERT INTO system.%s (key, release_version, cql_version, thrift_version, native_protocol_version, data_center, rack, partitioner, rpc_address, broadcast_address, listen_address) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)";
auto& snitch = locator::i_endpoint_snitch::get_local_snitch_ptr();
return execute_cql(req, db::system_keyspace::LOCAL,
sstring(db::system_keyspace::LOCAL),
version::release(),
cql3::query_processor::CQL_VERSION,
org::apache::cassandra::thrift_version,
to_sstring(version::native_protocol()),
snitch->get_datacenter(utils::fb_utilities::get_broadcast_address()),
snitch->get_rack(utils::fb_utilities::get_broadcast_address()),
sstring(dht::global_partitioner().name()),
gms::inet_address(qctx->db().get_config().rpc_address()).addr(),
utils::fb_utilities::get_broadcast_address().addr(),
net::get_local_messaging_service().listen_address().addr()
).discard_result();
}
future<> check_health();
future<> force_blocking_flush(sstring cfname);
// Changing the real load_dc_rack_info into a future would trigger a tidal wave of futurization that would spread
// even into simple string operations like get_rack() / get_dc(). We will cache those at startup, and then change
// our view of it every time we do updates on those values.
//
// The cache must be distributed, because the values themselves may not update atomically, so a shard reading that
// is different than the one that wrote, may see a corrupted value. invoke_on_all will be used to guarantee that all
// updates are propagated correctly.
struct local_cache {
std::unordered_map<gms::inet_address, locator::endpoint_dc_rack> _cached_dc_rack_info;
future<> stop() {
return make_ready_future<>();
}
};
static distributed<local_cache> _local_cache;
static future<> build_dc_rack_info() {
return _local_cache.start().then([] {
engine().at_exit([] {
return _local_cache.stop();
});
return execute_cql("SELECT peer, data_center, rack from system.%s", PEERS).then([] (::shared_ptr<cql3::untyped_result_set> msg) {
return do_for_each(*msg, [] (auto& row) {
// Not ideal to assume ipv4 here, but currently this is what the cql types wraps.
net::ipv4_address peer = row.template get_as<net::ipv4_address>("peer");
if (!row.has("data_center") || !row.has("rack")) {
return make_ready_future<>();
}
gms::inet_address gms_addr(std::move(peer));
sstring dc = row.template get_as<sstring>("data_center");
sstring rack = row.template get_as<sstring>("rack");
locator::endpoint_dc_rack element = { dc, rack };
return _local_cache.invoke_on_all([gms_addr = std::move(gms_addr), element = std::move(element)] (local_cache& lc) {
lc._cached_dc_rack_info.emplace(gms_addr, element);
});
});
});
});
}
future<> setup(distributed<database>& db, distributed<cql3::query_processor>& qp) {
auto new_ctx = std::make_unique<query_context>(db, qp);
qctx.swap(new_ctx);
assert(!new_ctx);
return setup_version().then([&db] {
return update_schema_version(db.local().get_version());
}).then([] {
return build_dc_rack_info();
}).then([] {
return check_health();
}).then([] {
return db::schema_tables::save_system_keyspace_schema();
});
}
typedef std::pair<db::replay_position, db_clock::time_point> truncation_entry;
typedef std::unordered_map<utils::UUID, truncation_entry> truncation_map;
static thread_local std::experimental::optional<truncation_map> truncation_records;
future<> save_truncation_record(cql3::query_processor& qp, const column_family& cf, db_clock::time_point truncated_at, const db::replay_position& rp) {
db::serializer<replay_position> rps(rp);
bytes buf(bytes::initialized_later(), sizeof(db_clock::rep) + rps.size());
data_output out(buf);
rps(out);
out.write<db_clock::rep>(truncated_at.time_since_epoch().count());
map_type_impl::native_type tmp;
tmp.emplace_back(boost::any{ cf.schema()->id() }, boost::any{ buf });
sstring req = sprint("UPDATE system.%s SET truncated_at = truncated_at + ? WHERE key = '%s'", LOCAL, LOCAL);
return qp.execute_internal(req, {tmp}).then([&qp](auto rs) {
truncation_records = {};
return force_blocking_flush(LOCAL);
});
}
/**
* This method is used to remove information about truncation time for specified column family
*/
future<> remove_truncation_record(cql3::query_processor& qp, utils::UUID id) {
sstring req = sprint("DELETE truncated_at[?] from system.%s WHERE key = '%s'", LOCAL, LOCAL);
return qp.execute_internal(req, {id}).then([&qp](auto rs) {
truncation_records = {};
return force_blocking_flush(LOCAL);
});
}
static future<truncation_entry> get_truncation_record(cql3::query_processor& qp, utils::UUID cf_id) {
if (!truncation_records) {
sstring req = sprint("SELECT truncated_at FROM system.%s WHERE key = '%s'", LOCAL, LOCAL);
return qp.execute_internal(req).then([&qp, cf_id](::shared_ptr<cql3::untyped_result_set> rs) {
truncation_map tmp;
if (!rs->empty() && rs->one().has("truncated_set")) {
auto map = rs->one().get_map<utils::UUID, bytes>("truncated_at");
for (auto& p : map) {
truncation_entry e;
data_input in(p.second);
e.first = db::serializer<replay_position>::read(in);
e.second = db_clock::time_point(db_clock::duration(in.read<db_clock::rep>()));
tmp[p.first] = e;
}
}
truncation_records = std::move(tmp);
return get_truncation_record(qp, cf_id);
});
}
return make_ready_future<truncation_entry>((*truncation_records)[cf_id]);
}
future<db::replay_position> get_truncated_position(cql3::query_processor& qp, utils::UUID cf_id) {
return get_truncation_record(qp, cf_id).then([](truncation_entry e) {
return make_ready_future<db::replay_position>(e.first);
});
}
future<db_clock::time_point> get_truncated_at(cql3::query_processor& qp, utils::UUID cf_id) {
return get_truncation_record(qp, cf_id).then([](truncation_entry e) {
return make_ready_future<db_clock::time_point>(e.second);
});
}
set_type_impl::native_type prepare_tokens(std::unordered_set<dht::token>& tokens) {
set_type_impl::native_type tset;
for (auto& t: tokens) {
tset.push_back(boost::any(dht::global_partitioner().to_sstring(t)));
}
return tset;
}
/**
* Record tokens being used by another node
*/
future<> update_tokens(gms::inet_address ep, std::unordered_set<dht::token> tokens)
{
if (ep == utils::fb_utilities::get_broadcast_address()) {
return remove_endpoint(ep);
}
sstring req = "INSERT INTO system.%s (peer, tokens) VALUES (?, ?)";
return execute_cql(req, PEERS, ep.addr(), prepare_tokens(tokens)).discard_result().then([] {
return force_blocking_flush(PEERS);
});
}
future<std::unordered_set<dht::token>> update_local_tokens(
const std::unordered_set<dht::token>& add_tokens,
const std::unordered_set<dht::token>& rm_tokens) {
auto tokens = get_saved_tokens();
for (auto& x : rm_tokens) {
tokens.erase(x);
}
for (auto& x : add_tokens) {
tokens.insert(x);
}
return update_tokens(tokens).then([tokens] {
return tokens;
});
}
future<> update_preferred_ip(gms::inet_address ep, gms::inet_address preferred_ip) {
sstring req = "INSERT INTO system.%s (peer, preferred_ip) VALUES (?, ?)";
return execute_cql(req, PEERS, ep.addr(), preferred_ip).discard_result().then([] {
return force_blocking_flush(PEERS);
});
}
template <typename Value>
static future<> update_cached_values(gms::inet_address ep, sstring column_name, Value value) {
return make_ready_future<>();
}
template <>
future<> update_cached_values(gms::inet_address ep, sstring column_name, sstring value) {
return _local_cache.invoke_on_all([ep = std::move(ep),
column_name = std::move(column_name),
value = std::move(value)] (local_cache& lc) {
if (column_name == "data_center") {
lc._cached_dc_rack_info[ep].dc = value;
} else if (column_name == "rack") {
lc._cached_dc_rack_info[ep].rack = value;
}
return make_ready_future<>();
});
}
template <typename Value>
future<> update_peer_info(gms::inet_address ep, sstring column_name, Value value) {
if (ep == utils::fb_utilities::get_broadcast_address()) {
return make_ready_future<>();
}
return update_cached_values(ep, column_name, value).then([ep, column_name, value] {
sstring clause = sprint("(peer, %s) VALUES (?, ?)", column_name);
sstring req = "INSERT INTO system.%s " + clause;
return execute_cql(req, PEERS, ep.addr(), value).discard_result();
});
}
// sets are not needed, since tokens are updated by another method
template future<> update_peer_info<sstring>(gms::inet_address ep, sstring column_name, sstring);
template future<> update_peer_info<utils::UUID>(gms::inet_address ep, sstring column_name, utils::UUID);
template future<> update_peer_info<net::ipv4_address>(gms::inet_address ep, sstring column_name, net::ipv4_address);
future<> update_hints_dropped(gms::inet_address ep, utils::UUID time_period, int value) {
// with 30 day TTL
sstring req = "UPDATE system.%s USING TTL 2592000 SET hints_dropped[ ? ] = ? WHERE peer = ?";
return execute_cql(req, PEER_EVENTS, time_period, value, ep.addr()).discard_result();
}
future<> update_schema_version(utils::UUID version) {
sstring req = "INSERT INTO system.%s (key, schema_version) VALUES (?, ?)";
return execute_cql(req, LOCAL, sstring(LOCAL), version).discard_result();
}
/**
* Remove stored tokens being used by another node
*/
future<> remove_endpoint(gms::inet_address ep) {
return _local_cache.invoke_on_all([ep] (local_cache& lc) {
lc._cached_dc_rack_info.erase(ep);
}).then([ep] {
sstring req = "DELETE FROM system.%s WHERE peer = ?";
return execute_cql(req, PEERS, ep.addr()).discard_result();
});
}
/**
* This method is used to update the System Keyspace with the new tokens for this node
*/
future<> update_tokens(std::unordered_set<dht::token> tokens) {
if (tokens.empty()) {
throw std::invalid_argument("remove_endpoint should be used instead");
}
sstring req = "INSERT INTO system.%s (key, tokens) VALUES (?, ?)";
return execute_cql(req, LOCAL, sstring(LOCAL), prepare_tokens(tokens)).discard_result().then([] {
return force_blocking_flush(LOCAL);
});
}
future<> force_blocking_flush(sstring cfname) {
if (!qctx) {
return make_ready_future<>();
}
return qctx->_db.invoke_on_all([cfname = std::move(cfname)](database& db) {
// if (!Boolean.getBoolean("cassandra.unsafesystem"))
column_family& cf = db.find_column_family(NAME, cfname);
return cf.flush();
});
}
/**
* One of three things will happen if you try to read the system keyspace:
* 1. files are present and you can read them: great
* 2. no files are there: great (new node is assumed)
* 3. files are present but you can't read them: bad
*/
future<> check_health() {
using namespace transport::messages;
sstring req = "SELECT cluster_name FROM system.%s WHERE key=?";
return execute_cql(req, LOCAL, sstring(LOCAL)).then([] (::shared_ptr<cql3::untyped_result_set> msg) {
if (msg->empty() || !msg->one().has("cluster_name")) {
// this is a brand new node
sstring ins_req = "INSERT INTO system.%s (key, cluster_name) VALUES (?, ?)";
return execute_cql(ins_req, LOCAL, sstring(LOCAL), qctx->db().get_config().cluster_name()).discard_result();
} else {
auto saved_cluster_name = msg->one().get_as<sstring>("cluster_name");
auto cluster_name = qctx->db().get_config().cluster_name();
if (cluster_name != saved_cluster_name) {
throw exceptions::configuration_exception("Saved cluster name " + saved_cluster_name + " != configured name " + cluster_name);
}
return make_ready_future<>();
}
});
}
std::unordered_set<dht::token> get_saved_tokens() {
#if 0
String req = "SELECT tokens FROM system.%s WHERE key='%s'";
UntypedResultSet result = executeInternal(String.format(req, LOCAL, LOCAL));
return result.isEmpty() || !result.one().has("tokens")
? Collections.<Token>emptyList()
: deserializeTokens(result.one().getSet("tokens", UTF8Type.instance));
#endif
return std::unordered_set<dht::token>();
}
bool bootstrap_complete() {
return get_bootstrap_state() == bootstrap_state::COMPLETED;
}
bool bootstrap_in_progress() {
return get_bootstrap_state() == bootstrap_state::IN_PROGRESS;
}
bootstrap_state get_bootstrap_state() {
#if 0
String req = "SELECT bootstrapped FROM system.%s WHERE key='%s'";
UntypedResultSet result = executeInternal(String.format(req, LOCAL, LOCAL));
if (result.isEmpty() || !result.one().has("bootstrapped"))
return BootstrapState.NEEDS_BOOTSTRAP;
return BootstrapState.valueOf(result.one().getString("bootstrapped"));
#endif
return bootstrap_state::NEEDS_BOOTSTRAP;
}
future<> set_bootstrap_state(bootstrap_state state) {
#if 0
sstring req = "INSERT INTO system.%s (key, bootstrapped) VALUES ('%s', '%s')";
return execute_cql(req, LOCAL, LOCAL, state.name()).discard_result().then([] {
return force_blocking_flush(LOCAL);
});
#endif
return make_ready_future<>();
}
std::vector<schema_ptr> all_tables() {
std::vector<schema_ptr> r;
auto legacy_tables = db::schema_tables::all_tables();
std::copy(legacy_tables.begin(), legacy_tables.end(), std::back_inserter(r));
r.push_back(built_indexes());
r.push_back(hints());
r.push_back(batchlog());
r.push_back(paxos());
r.push_back(local());
r.push_back(peers());
r.push_back(peer_events());
r.push_back(range_xfers());
r.push_back(compactions_in_progress());
r.push_back(compaction_history());
r.push_back(sstable_activity());
r.push_back(size_estimates());
return r;
}
void make(database& db, bool durable) {
auto ksm = make_lw_shared<keyspace_metadata>(NAME,
"org.apache.cassandra.locator.LocalStrategy",
std::map<sstring, sstring>{},
durable
);
auto kscfg = db.make_keyspace_config(*ksm);
kscfg.enable_disk_reads = true;
kscfg.enable_disk_writes = true;
kscfg.enable_commitlog = true;
kscfg.enable_cache = true;
keyspace _ks{ksm, std::move(kscfg)};
auto rs(locator::abstract_replication_strategy::create_replication_strategy(NAME, "LocalStrategy", service::get_local_storage_service().get_token_metadata(), ksm->strategy_options()));
_ks.set_replication_strategy(std::move(rs));
db.add_keyspace(NAME, std::move(_ks));
auto& ks = db.find_keyspace(NAME);
for (auto&& table : all_tables()) {
db.add_column_family(table, ks.make_column_family_config(*table));
}
}
future<utils::UUID> get_local_host_id() {
using namespace transport::messages;
sstring req = "SELECT host_id FROM system.%s WHERE key=?";
return execute_cql(req, LOCAL, sstring(LOCAL)).then([] (::shared_ptr<cql3::untyped_result_set> msg) {
auto new_id = [] {
auto host_id = utils::make_random_uuid();
return make_ready_future<utils::UUID>(host_id);
};
if (msg->empty() || !msg->one().has("host_id")) {
return new_id();
}
auto host_id = msg->one().get_as<utils::UUID>("host_id");
return make_ready_future<utils::UUID>(host_id);
});
}
future<utils::UUID> set_local_host_id(const utils::UUID& host_id) {
sstring req = "INSERT INTO system.%s (key, host_id) VALUES (?, ?)";
return execute_cql(req, LOCAL, sstring(LOCAL), host_id).then([] (auto msg) {
return force_blocking_flush(LOCAL);
}).then([host_id] {
return host_id;
});
}
std::unordered_map<gms::inet_address, locator::endpoint_dc_rack>
load_dc_rack_info() {
return _local_cache.local()._cached_dc_rack_info;
}
future<foreign_ptr<lw_shared_ptr<reconcilable_result>>>
query_mutations(service::storage_proxy& proxy, const sstring& cf_name) {
database& db = proxy.get_db().local();
schema_ptr schema = db.find_schema(db::system_keyspace::NAME, cf_name);
auto slice = partition_slice_builder(*schema).build();
auto cmd = make_lw_shared<query::read_command>(schema->id(), std::move(slice), std::numeric_limits<uint32_t>::max());
return proxy.query_mutations_locally(cmd, query::full_partition_range);
}
future<lw_shared_ptr<query::result_set>>
query(service::storage_proxy& proxy, const sstring& cf_name) {
database& db = proxy.get_db().local();
schema_ptr schema = db.find_schema(db::system_keyspace::NAME, cf_name);
auto slice = partition_slice_builder(*schema).build();
auto cmd = make_lw_shared<query::read_command>(schema->id(), std::move(slice), std::numeric_limits<uint32_t>::max());
return proxy.query(schema, cmd, {query::full_partition_range}, db::consistency_level::ONE).then([schema, cmd] (auto&& result) {
return make_lw_shared(query::result_set::from_raw_result(schema, cmd->slice, *result));
});
}
future<lw_shared_ptr<query::result_set>>
query(service::storage_proxy& proxy, const sstring& cf_name, const dht::decorated_key& key, query::clustering_range row_range)
{
auto&& db = proxy.get_db().local();
auto schema = db.find_schema(db::system_keyspace::NAME, cf_name);
auto slice = partition_slice_builder(*schema)
.with_range(std::move(row_range))
.build();
auto cmd = make_lw_shared<query::read_command>(schema->id(), std::move(slice), query::max_rows);
return proxy.query(schema, cmd, {query::partition_range::make_singular(key)}, db::consistency_level::ONE).then([schema, cmd] (auto&& result) {
return make_lw_shared(query::result_set::from_raw_result(schema, cmd->slice, *result));
});
}
} // namespace system_keyspace
} // namespace db